fork support

This commit is contained in:
lidj 2023-09-09 22:11:43 +08:00
parent e408942bc0
commit 046023f89d
22 changed files with 488 additions and 281 deletions

View File

@ -39,7 +39,6 @@ static inline vaddr_t get_fault_addr()
*/
extern int query_in_pgtbl(void *pgtbl, vaddr_t va, paddr_t *pa, pte_t **entry);
u64 tmp = 0;
/* Handle Page Fault Here */
void do_page_fault(u64 errorcode, u64 fault_ins_addr)
{
@ -48,7 +47,6 @@ void do_page_fault(u64 errorcode, u64 fault_ins_addr)
paddr_t pte_pa;
pte_t *pte;
tmp = fault_ins_addr;
/*
* errorcode
*

View File

@ -79,10 +79,6 @@ static int set_pte_flags(pte_t *entry, vmr_prop_t flags, int kind)
else
entry->pteval &= (~PAGE_PCD);
/* equals: entry->pte_4K.cache_disable = 0; */
if (flags & VMR_COW)
entry->pteval &= (~PAGE_RW);
/* equals: entry->pte_4K.writeable = 0; */
// TODO: set memory type
return 0;
@ -96,7 +92,6 @@ int set_pte_write_flag(pte_t *entry, bool flag)
else
entry->pteval &= (~PAGE_RW);
/* equals: entry->pte_4K.writeable = 0; */
// kinfo("set write %lx\n",entry);
return 0;
}
@ -395,7 +390,7 @@ void free_page_table(void *pgtbl)
#endif
int map_range_in_pgtbl(void *pgtbl, vaddr_t va, paddr_t pa,
size_t len, vmr_prop_t flags, u64 **out_pte)
size_t len, vmr_prop_t flags)
{
s64 total_page_cnt;
ptp_t *l0_ptp, *l1_ptp, *l2_ptp, *l3_ptp;
@ -404,7 +399,6 @@ int map_range_in_pgtbl(void *pgtbl, vaddr_t va, paddr_t pa,
/* the index of pte in the last level page table */
int pte_index;
int i;
bool set_pte = false;
/* root page table page must exist */
BUG_ON(pgtbl == NULL);
@ -446,11 +440,6 @@ int map_range_in_pgtbl(void *pgtbl, vaddr_t va, paddr_t pa,
set_pte_flags(&new_pte_val, flags, USER_PTE);
l3_ptp->ent[i].pteval = new_pte_val.pteval;
if (!set_pte) {
*out_pte = (u64 *)&l3_ptp->ent[i];
set_pte = true;
}
va += PAGE_SIZE;
pa += PAGE_SIZE;
@ -818,7 +807,6 @@ int pgtbl_deep_copy(vaddr_t *src_pgtbl, vaddr_t *dst_pgtbl)
/* L0 page table / pml4 */
src_l0_ptp = (ptp_t *)remove_pcid(src_pgtbl);
dst_l0_ptp = (ptp_t *)remove_pcid(dst_pgtbl);
kinfo("before pgcp %lx\n", dst_l0_ptp);
ret = __pgtbl_deep_copy(src_l0_ptp,dst_l0_ptp,0);
return ret;
}

View File

@ -15,7 +15,7 @@ unsigned long mem_checksum(unsigned char *start, int size)
}
u64 pmo_checksum(struct pmobject *pmo) {
if (use_continuous_pages(pmo->type)) {
if (use_continuous_pages(pmo)) {
if (pmo->dram_cache.array == NULL) {
return (u64)mem_checksum((unsigned char *)phys_to_virt(pmo->start), pmo->size);
} else {
@ -31,7 +31,7 @@ u64 pmo_checksum(struct pmobject *pmo) {
return checksum;
}
}
else if (use_radix(pmo->type))
else if (use_radix(pmo))
return (u64)radix_checksum(pmo->radix);
else
return 0;
@ -368,9 +368,7 @@ static int __radix_pmo_restore(struct pmobject *pmo, struct radix_node *page_nod
page_node->values[i] = (void*)virt_to_phys((void*)va);
struct page *page = virt_to_page((void*)va);
page->pmo = pmo;
page->index = prefix | i;
init_page_info(page, pmo, prefix | i);
}
}
}
@ -572,7 +570,7 @@ int init_ckpt_page_radix(struct ckpt_pmobject* ckpt_pmo, struct pmobject *pmo)
ckpt_page_radix = ckpt_pmo->radix;
/* copy all pages in pmo to ckpt page in ckpt pmo*/
if(use_radix(pmo->type)) {
if(use_radix(pmo)) {
// lock(&pmo_radix->radix_lock);
// lock(&ckpt_page_radix->radix_lock);
r = __init_ckpt_page_radix(pmo_radix->root,
@ -597,7 +595,7 @@ int pmo_ckpt(struct pmobject *pmo, struct ckpt_pmobject *ckpt_pmo)
current_ckpt_version = get_current_ckpt_version();
if (use_continuous_pages(pmo->type) || use_radix(pmo->type)) {
if (use_continuous_pages(pmo) || use_radix(pmo)) {
if (unlikely(!ckpt_pmo->radix)) {
/* If the radix tree is not created, try to reuse the
* previous version of the radix tree */
@ -622,7 +620,7 @@ int pmo_ckpt(struct pmobject *pmo, struct ckpt_pmobject *ckpt_pmo)
}
/* check ckpt_pmo (for debug) */
#ifdef PMO_CHECKSUM
if (use_radix(pmo->type)) {
if (use_radix(pmo)) {
if (!ckpt_pmo->radix_backup) {
ckpt_pmo->radix_backup = new_radix();
init_radix(ckpt_pmo->radix_backup);
@ -632,7 +630,7 @@ int pmo_ckpt(struct pmobject *pmo, struct ckpt_pmobject *ckpt_pmo)
ckpt_pmo->checksum = pmo_checksum(pmo);
}
if (use_radix(pmo->type)) {
if (use_radix(pmo)) {
u64 ckpt_checksum = ckpt_pmo_checksum(ckpt_pmo);
if (ckpt_checksum != ckpt_pmo->checksum) {
printk("type:%d, verison:%d, %lx pmo_ckpt erratic: %lx, %lx\n",
@ -673,7 +671,7 @@ int pmo_restore(struct object *pmo_obj, struct ckpt_object *ckpt_pmo_obj, struct
vaddr_t pmo_start_va;
struct page *sp;
if (use_continuous_pages(pmo->type)) {
if (use_continuous_pages(pmo)) {
continuous_pmo_restore(pmo, ckpt_pmo->radix);
pmo->dram_cache.array = NULL;
lock_init(&pmo->dram_cache.lock);
@ -694,7 +692,7 @@ int pmo_restore(struct object *pmo_obj, struct ckpt_object *ckpt_pmo_obj, struct
pmo, pmo->type, pmo_checksum(pmo), ckpt_pmo->checksum);
}
#endif
} else if (use_radix(pmo->type)) {
} else if (use_radix(pmo)) {
/* restore radix tree */
lock_init(&pmo->radix->radix_lock);
r = radix_pmo_restore(pmo, ckpt_pmo);
@ -711,3 +709,98 @@ int pmo_restore(struct object *pmo_obj, struct ckpt_object *ckpt_pmo_obj, struct
#endif
return r;
}
static int __radix_deep_copy_with_hybird_mem(struct radix_node *src, struct radix_node *dst, int node_level)
{
int err;
int i;
struct radix_node *new;
if (node_level == RADIX_LEVELS - 1) {
for (i = 0; i < RADIX_NODE_SIZE; i++) {
if (!src->values[i]) {
if (dst->values[i]) {
void *pa = dst->values[i];
dst->values[i] = NULL;
kfree((void *)phys_to_virt(pa));
}
continue;
}
if (dst->values[i]) {
pagecpy((void *)phys_to_virt(dst->values[i]),
(void *)phys_to_virt(src->values[i]));
} else {
void *src_pa = src->values[i];
void *src_va = (void*)phys_to_virt(src_pa);
struct page *page = virt_to_page(src_va);
if(get_page_type(page) == NVM_PAGE) {
lock(&page->lock);
if(page->track_info) {
if (page->track_info->active) {
delete_from_active_list(page->track_info);
}
}
atomic_fetch_add_64(&page->ref_cnt,1);
unlock(&page->lock);
dst->values[i] = src_pa;
}else {
void *newpage = get_pages(0);
BUG_ON(!newpage);
pagecpy(newpage,
(void *)phys_to_virt(src->values[i]));
dst->values[i] =
(void *)virt_to_phys(newpage);
}
}
}
return 0;
}
for (i = 0; i < RADIX_NODE_SIZE; i++) {
if (src->children[i]) {
new = kzalloc(sizeof(struct radix_node));
if (IS_ERR(new)) {
return -ENOMEM;
}
dst->children[i] = new;
err = __radix_deep_copy_with_hybird_mem(src->children[i],
dst->children[i],
node_level + 1);
if (err) {
return err;
}
}
}
return 0;
}
int radix_deep_copy_with_hybird_mem(struct radix *src,struct radix *dst)
{
int r;
struct radix_node *new;
BUG_ON(!(src && dst));
r = 0;
/* don't need to lock dst */
lock(&src->radix_lock);
if (!src->root) {
goto out;
}
if (!dst->root) {
new = kzalloc(sizeof(struct radix_node));
if (IS_ERR(new)) {
r = -ENOMEM;
}
dst->root = new;
}
r = __radix_deep_copy_with_hybird_mem(src->root, dst->root, 0);
out:
unlock(&src->radix_lock);
return r;
}

View File

@ -135,7 +135,7 @@ move_runtime_nvm_page_from_backup(struct pmobject *pmo, u64 index)
new_va = page_pair->pages[1].va;
page_pair->pages[1].version_number = 0;
if (use_continuous_pages(pmo->type)) {
if (use_continuous_pages(pmo)) {
BUG_ON(new_va != phys_to_virt(pmo->start + index * PAGE_SIZE));
}
@ -198,7 +198,7 @@ static int migrate_page(struct page *old_page, bool to_dram)
if (to_dram) {
commit_dram_cached_page(pmo, index, (paddr_t)new_pa);
} else {
if (use_radix(pmo->type)) {
if (use_radix(pmo)) {
commit_page_to_pmo(pmo, index, (paddr_t)new_pa);
} else {
clear_dram_cached_page(pmo, index);

View File

@ -11,11 +11,10 @@ typedef u64 vmr_prop_t;
#define VMR_EXEC (1 << 2)
#define VMR_DEVICE (1 << 3)
#define VMR_NOCACHE (1 << 4)
#define VMR_COW (1 << 5)
/* functions */
int map_range_in_pgtbl(void *pgtbl, vaddr_t va, paddr_t pa, size_t len,
vmr_prop_t flags, u64 **out_pte);
vmr_prop_t flags);
int unmap_range_in_pgtbl(void *pgtbl, vaddr_t va, size_t len);
#ifndef KBASE

View File

@ -205,3 +205,10 @@ static inline u64 compound_head_offset(struct page *page, struct page *head)
BUG_ON(dis % sizeof(struct page));
return dis / sizeof(struct page);
}
/* TreeSlS */
static inline void init_page_info(struct page *page, struct pmobject *pmo, u64 index) {
page->index = index;
page->pmo = pmo;
page->page_pair = 0;
}

View File

@ -213,11 +213,12 @@ void pmo_set_preserved(struct pmobject *pmo);
/* Fork */
int vmspace_clone(struct vmspace *dst_vmspace, struct vmspace *src_vmspace, struct cap_group *dst_cap_group);
int pmo_clone(struct pmobject *dst_pmo, struct pmobject *src_pmo);
int pmo_clone(struct pmobject *dst_pmo, struct pmobject *src_pmo, bool *is_cow);
bool use_radix(int type);
bool use_continuous_pages(int type);
bool use_radix(struct pmobject *pmo);
bool use_continuous_pages(struct pmobject *pmo);
bool is_external_sync_pmo(struct pmobject *pmo);
bool is_shared_pmo(struct pmobject *pmo);
/* init patch pool */
void *create_patch_pool(void);

View File

@ -40,42 +40,6 @@ extern void pagecpy_nt(void *dst, const void *src);
#if PGFAULT_POLICY == ONDEMAND
int pa_checksum(const char *page) {
int i, sum = 0;
for (i = 0; i < PAGE_SIZE; ++i)
sum += page[i];
return sum;
}
int pas_checksum(const char *page, int size) {
int i, sum = 0;
for (i = 0; i < size; ++i)
sum += page[i];
return sum;
}
static int size_to_page_order(unsigned long size)
{
unsigned long order;
unsigned long pg_num;
unsigned long tmp;
order = 0;
pg_num = ROUND_UP(size, BUDDY_PAGE_SIZE) / BUDDY_PAGE_SIZE;
tmp = pg_num;
while (tmp > 1) {
tmp >>= 1;
order += 1;
}
if (pg_num > (1 << order))
order += 1;
return (int)order;
}
extern u64 tmp;
/* add_pte_patch_to_pool: when trigger write, track pages's pte and page struct
*/
@ -117,7 +81,7 @@ u64 patch_page_num = 0;
extern u64 pf_count;
extern u64 pf_tot_time;
#endif
int map_page_in_pgtbl(struct vmspace *vmspace, vaddr_t va, paddr_t pa,
int map_page_in_pgtbl(void *pgtbl, vaddr_t va, paddr_t pa,
vmr_prop_t flags, pte_t **out_pte);
int handle_trans_fault(struct vmspace *vmspace, vaddr_t fault_addr, int present, int write)
{
@ -145,36 +109,12 @@ int handle_trans_fault(struct vmspace *vmspace, vaddr_t fault_addr, int present,
lock(&vmspace->vmspace_lock);
vmr = find_vmr_for_va(vmspace, fault_addr);
if (vmr == NULL) {
kinfo("[dbg] PF: P %d W %d\n", present, write);
kinfo("handle_trans_fault: no vmr found for va 0x%lx!\n",
fault_addr);
kinfo("process: %p\n", current_cap_group);
print_thread(current_thread);
kinfo("faulting IP: 0x%lx, SP: 0x%lx\n",
arch_get_thread_next_ip(current_thread),
arch_get_thread_stack(current_thread));
extern u64 tmp;
kinfo("fault_ins_addr: 0x%lx\n",tmp);
kprint_vmr(vmspace);
// TODO: kill the process
kwarn("TODO: kill such faulting process.\n");
return -ENOMAPPING;
}
if (present && !(vmspace->flags & VM_FLAG_PRESERVE) && !(vmr->perm & VMR_COW)) {
printk("perm %u, pmo_flags %u, write %u, pmo_type %u\n",
vmr->perm,
vmspace->flags & VM_FLAG_PRESERVE,
write,
vmr->pmo->type);
/* The PTE is valid, it's a permission error */
kinfo("General Protection Fault\n");
if (write) {
kinfo("Cannot write at %p.\n", fault_addr);
} else {
kinfo("Cannot read at %p.\n", fault_addr);
}
while(1);
}
pmo = vmr->pmo;
switch (pmo->type) {
@ -185,8 +125,6 @@ int handle_trans_fault(struct vmspace *vmspace, vaddr_t fault_addr, int present,
case PMO_ANONYM:
case PMO_SHM: {
BUG_ON(pmo->type == PMO_RING_BUFFER && present);
vmr_prop_t perm;
perm = vmr->perm;
@ -223,7 +161,6 @@ int handle_trans_fault(struct vmspace *vmspace, vaddr_t fault_addr, int present,
fault_addr = ROUND_DOWN(fault_addr, PAGE_SIZE);
pa = get_page_from_pmo(pmo, index);
// printk("[debug] %s: paddr=%llx\n", __func__, pa);
/* PMO_FILE fault means user fault */
if (pmo->type == PMO_FILE && !pa) {
@ -275,73 +212,13 @@ int handle_trans_fault(struct vmspace *vmspace, vaddr_t fault_addr, int present,
#ifdef PMO_CHECKSUM
page->ckpt_version_number = get_current_ckpt_version() + 1;
#endif
// int ckpt_ret =
#ifndef OMIT_BENCHMARK
ckpt_nvm_page(pmo, new_va, index);
#endif
add_pte_patch_to_pool(vmspace, (pte_t *)pte, page);
// if(ckpt_ret) {
// track_access(page);
// }
}
#endif
}
else if (vmr->perm & VMR_COW) {
/* write to a COWed page */
perm &= (~VMR_COW);
if (pmo->type == PMO_FILE) {
perm |= VMR_EXEC;
}
// BUG_ON(write == 0);
struct page *p_page;
if (pmo->type == PMO_DATA) {
p_page = virt_to_page((void*)phys_to_virt(pmo->start));
size_t size = pmo->size;
lock(&p_page->lock);
if (p_page->ref_cnt > 1) {
void *new_page = get_pages(size_to_page_order(size));
BUG_ON(new_page == 0);
memcpy(new_page, (void*)phys_to_virt(pmo->start), size);
p_page->ref_cnt--;
pmo->start = virt_to_phys(new_page);
}
unlock(&p_page->lock);
lock(&vmspace->pgtbl_lock);
map_range_in_pgtbl(vmspace->pgtbl, vmr->start, pmo->start,
size, perm, (u64 **)&pte);
unlock(&vmspace->pgtbl_lock);
}
else {
BUG_ON(/*pmo->type == PMO_FILE ||*/ pmo->type == PMO_SHM);
if (!write) {
perm |= VMR_COW;
lock(&vmspace->pgtbl_lock);
map_range_in_pgtbl(vmspace, fault_addr, pa,
PAGE_SIZE, perm, (u64 **)&pte);
unlock(&vmspace->pgtbl_lock);
} else {
p_page = virt_to_page((void*)phys_to_virt(pa));
lock(&p_page->lock);
if (p_page->ref_cnt > 1) {
void *new_page = get_pages(0);
BUG_ON(new_page == 0);
memcpy(new_page, (void*)phys_to_virt(pa), PAGE_SIZE);
p_page->ref_cnt--;
/* new pa */
pa = virt_to_phys(new_page);
commit_page_to_pmo(pmo, index, pa);
} else {
BUG_ON(p_page->ref_cnt != 1);
}
unlock(&p_page->lock);
lock(&vmspace->pgtbl_lock);
map_range_in_pgtbl(vmspace, fault_addr, pa,
PAGE_SIZE, perm, (u64 **)&pte);
unlock(&vmspace->pgtbl_lock);
}
}
}
else {
} else {
/*
* pa != 0: the faulting address has be committed a
* physical page.
@ -363,9 +240,9 @@ int handle_trans_fault(struct vmspace *vmspace, vaddr_t fault_addr, int present,
* needs to add the mapping in the page table.
* Repeated mapping operations are harmless.
*/
if (pmo->type == PMO_FILE) {
perm |= VMR_EXEC;
/* For PMO_FILE, we simply set all the perm now. */
if (pmo->type == PMO_FILE) {
perm = VMR_READ | VMR_WRITE | VMR_EXEC;
}
#ifndef OMIT_PF
if ((vmspace->flags & VM_FLAG_PRESERVE) && !write && !is_external_sync_pmo(pmo)) {
@ -373,11 +250,77 @@ int handle_trans_fault(struct vmspace *vmspace, vaddr_t fault_addr, int present,
perm &= ~VMR_WRITE;
}
#endif
/* handle COW */
if (!is_shared_pmo(pmo)) {
if (use_continuous_pages(pmo)) {
page = virt_to_page((void*)phys_to_virt(pmo->start));
lock(&page->lock);
if (page->ref_cnt > 1) {
void *new_va = kmalloc(pmo->size);
if (new_va == NULL) {
ret = -ENOMEM;
unlock(&page->lock);
break;
}
memcpy(new_va, (void*)phys_to_virt(pmo->start), pmo->size);
pmo->start = virt_to_phys(new_va);
/* new pa */
pa = pmo->start + index * PAGE_SIZE;
lock(&vmspace->pgtbl_lock);
if ((vmspace->flags & VM_FLAG_PRESERVE)) {
map_range_in_pgtbl(vmspace->pgtbl, vmr->start, pmo->start,
pmo->size, perm & (~VMR_WRITE));
} else {
map_range_in_pgtbl(vmspace->pgtbl, vmr->start, pmo->start,
pmo->size, perm);
}
unlock(&vmspace->pgtbl_lock);
flush_tlbs(vmspace, vmr->start, vmr->size);
atomic_fetch_sub_64(&page->ref_cnt, 1);
}
unlock(&page->lock);
} else {
int cow = false;
page = virt_to_page((void*)phys_to_virt(pa));
lock(&page->lock);
if (page->ref_cnt > 1) {
void *new_va = get_pages(0);
if (new_va == NULL) {
ret = -ENOMEM;
unlock(&page->lock);
break;
}
pagecpy_nt(new_va, (void*)phys_to_virt(pa));
/* new pa */
pa = virt_to_phys(new_va);
lock(&vmspace->pgtbl_lock);
map_page_in_pgtbl(vmspace->pgtbl, fault_addr, pa,
perm, &pte);
unlock(&vmspace->pgtbl_lock);
flush_tlbs(vmspace, fault_addr, PAGE_SIZE);
atomic_fetch_sub_64(&page->ref_cnt, 1);
cow = true;
}
unlock(&page->lock);
if (cow) {
commit_page_to_pmo(pmo, index, pa);
goto skip_add_mapping;
}
}
}
/* Add mapping in the page table */
pte_t *pte = NULL;
lock(&vmspace->pgtbl_lock);
map_page_in_pgtbl(vmspace->pgtbl, fault_addr, pa, perm, &pte);
unlock(&vmspace->pgtbl_lock);
skip_add_mapping:
/* do not persist pages belong to external sync pmo */
if (is_external_sync_pmo(pmo))
@ -388,13 +331,13 @@ int handle_trans_fault(struct vmspace *vmspace, vaddr_t fault_addr, int present,
if (need_omit(vmspace)) {
break;
}
#endif
page = virt_to_page((void*)phys_to_virt(pa));
#endif
if (write && (vmspace->flags & VM_FLAG_PRESERVE)) {
page = virt_to_page((void*)phys_to_virt(pa));
BUG_ON(unlikely(!page));
if (unlikely(get_page_type(page) != NVM_PAGE)) {
BUG("page(%p) is not NVM page, type=%d, flag=%d, pte=0x%llx, pmo_type=%d, %llx\n",
page, get_page_type(page), page->flags, pte, pmo->type, pmo->dram_cache.array);
/* Dram page will be marked as unwritable after fork */
break;
}
#ifndef OMIT_MEMCPY
/* copy page to ckpt_page */

View File

@ -197,7 +197,6 @@ static int fill_page_table(struct vmspace *vmspace, struct vmregion *vmr)
vaddr_t va;
vmr_prop_t perm;
int ret;
pte_t *pte;
pm_size = vmr->pmo->size;
pa = vmr->pmo->start;
@ -208,7 +207,7 @@ static int fill_page_table(struct vmspace *vmspace, struct vmregion *vmr)
perm &= ~(VMR_WRITE);
#endif
lock(&vmspace->pgtbl_lock);
ret = map_range_in_pgtbl(vmspace->pgtbl, va, pa, pm_size, perm, (u64 **)&pte);
ret = map_range_in_pgtbl(vmspace->pgtbl, va, pa, pm_size, perm);
unlock(&vmspace->pgtbl_lock);
return ret;
@ -735,13 +734,6 @@ void vmspace_deinit(void *ptr)
}
#endif
/* TreeSLS */
void page_refcnt_add(vaddr_t va, paddr_t pa)
{
struct page *p_page;
p_page = virt_to_page((void*)phys_to_virt(pa));
atomic_fetch_add_64(&p_page->ref_cnt, 1);
}
/*
* This function clones a vmspace. The new vmspace has the same layout and
@ -757,6 +749,7 @@ int vmspace_clone(struct vmspace *dst_vmspace, struct vmspace *src_vmspace,
struct pmobject *new_pmo;
int r;
int cap;
bool is_cow;
lock(&src_vmspace->vmspace_lock);
lock(&src_vmspace->pgtbl_lock);
@ -770,20 +763,12 @@ int vmspace_clone(struct vmspace *dst_vmspace, struct vmspace *src_vmspace,
r = -ENOMEM;
goto out_fail;
}
r = pmo_clone(new_pmo, vmr->pmo);
if(new_pmo->type == PMO_DATA || new_pmo->type == PMO_DATA_NOCACHE) {
struct page *p_page;
p_page = virt_to_page((void*)phys_to_virt(new_pmo->start));
atomic_fetch_add_64(&p_page->ref_cnt, 1);
}
else if(new_pmo->radix != NULL /*&& new_pmo->type != PMO_FILE*/ && new_pmo->type != PMO_SHM ){
radix_traverse(new_pmo->radix,page_refcnt_add);
}
r = pmo_clone(new_pmo, vmr->pmo, &is_cow);
if (r < 0) {
r = -ENOMEM;
goto out_fail;
}
cap = cap_alloc(dst_cap_group, new_pmo, 0);
if (cap < 0) {
r = cap;
@ -796,40 +781,30 @@ int vmspace_clone(struct vmspace *dst_vmspace, struct vmspace *src_vmspace,
kwarn("%s fails\n", __func__);
goto out_fail;
}
new_vmr->vmspace = (void *)dst_vmspace;
/* FIXME(FN): COWed page should first point to origin virt_vmr? */
new_vmr->start = vmr->start;
new_vmr->size = vmr->size;
if(/*new_pmo->type != PMO_FILE && */new_pmo->type != PMO_SHM)
new_vmr->perm = vmr->perm | VMR_COW;
else
new_vmr->perm = vmr->perm;
new_vmr->perm = vmr->perm;
new_vmr->pmo = new_pmo;
pmo_add_reverse_node(new_pmo, new_vmr);
add_vmr_to_vmspace(dst_vmspace, new_vmr);
/*
* For PMO_DATA & PMO_DATA_NOCACHE & PMO_DEVICE, we directly
* For PMO based on continous physical pages, we directly
* map it in the page table. For PMO based on radix tree, it
* will be automatically mapped when the page fault occurs.
*/
if ((new_pmo->type == PMO_DATA) || (new_pmo->type == PMO_DATA_NOCACHE)
|| (new_pmo->type == PMO_DEVICE))
if (use_continuous_pages(new_pmo))
fill_page_table(dst_vmspace, new_vmr);
if (vmr == src_vmspace->heap_vmr)
dst_vmspace->heap_vmr = new_vmr;
}
for_each_in_list_safe(vmr, tmp, list_node, &(src_vmspace->vmr_list))
{
// if(vmr->pmo->type == PMO_DATA || vmr->pmo->type == PMO_DATA_NOCACHE) {
if (/*vmr->pmo->type != PMO_FILE &&*/ vmr->pmo->type != PMO_SHM){
if (is_cow && (vmr->perm & VMR_WRITE)) {
extern int set_write_in_pgtbl(struct vmspace *vmspace, vaddr_t va, size_t len, bool flag);
if(vmr->perm & VMR_WRITE) {
set_write_in_pgtbl(src_vmspace,vmr->start,vmr->size,false);
}
/* Copy On Write */
vmr->perm |= VMR_COW;
if (use_continuous_pages(new_pmo))
set_write_in_pgtbl(dst_vmspace, new_vmr->start, new_vmr->size, false);
set_write_in_pgtbl(src_vmspace, vmr->start, vmr->size, false);
}
}

View File

@ -16,6 +16,7 @@
static int pmo_init(struct pmobject *pmo, pmo_type_t type,
size_t len, paddr_t paddr);
extern int radix_deep_copy_with_hybird_mem(struct radix *src,struct radix *dst);
int sys_create_device_pmo(u64 paddr, u64 size)
{
@ -270,54 +271,115 @@ out_fail:
return r;
}
/* For fork */
int pmo_clone(struct pmobject *dst_pmo, struct pmobject *src_pmo)
/* TreeSLS */
void page_refcnt_add(vaddr_t va, paddr_t pa)
{
int r;
struct page *p_page;
p_page = virt_to_page((void*)phys_to_virt(pa));
atomic_fetch_add_64(&p_page->ref_cnt, 1);
}
/* For fork */
int pmo_clone(struct pmobject *dst_pmo, struct pmobject *src_pmo, bool *is_cow)
{
int r = 0, i;
int page_num;
u64 *array;
struct page *page;
if (src_pmo == NULL || dst_pmo == NULL) {
return -EINVAL;
}
*is_cow = true;
dst_pmo->size = src_pmo->size;
dst_pmo->type = src_pmo->type;
if (dst_pmo->size == 0) {
return 0;
}
init_list_head(&dst_pmo->reverse_list);
lock_init(&dst_pmo->reverse_list_lock);
switch (src_pmo->type) {
case PMO_DATA:
case PMO_DATA_NOCACHE: {
/*
* For PMO_DATA, just copy!
*/
lock_init(&(dst_pmo->dram_cache.lock));
/* Copy on write */
dst_pmo->start = src_pmo->start;
if(src_pmo->dram_cache.array != NULL) {
/* Just copy */
*is_cow = false;
void *new_va = kmalloc(dst_pmo->size);
if(new_va == NULL) {
return -ENOMEM;
}
dst_pmo->start = (paddr_t)virt_to_phys(new_va);
page = virt_to_page(new_va);
init_page_info(page, dst_pmo, 0);
array = src_pmo->dram_cache.array;
page_num = DIV_ROUND_UP(src_pmo->size, PAGE_SIZE);
for(i = 0; i < page_num; i++) {
u64 src_pa, dst_pa = dst_pmo->start + i * PAGE_SIZE;
if(array[i] != 0) {
src_pa = array[i];
}else {
src_pa = src_pmo->start + i * PAGE_SIZE;
}
memcpy((void*)phys_to_virt(dst_pa), (void*)phys_to_virt(src_pa), PAGE_SIZE);
}
} else {
/* Copy on write */
dst_pmo->start = src_pmo->start;
page = virt_to_page((void*)phys_to_virt(src_pmo->start));
atomic_fetch_add_64(&page->ref_cnt, 1);
}
break;
}
case PMO_FILE: { /* PMO backed by a file. It also uses the radix. */
case PMO_FILE: {
#ifdef CHCORE_ENABLE_FMAP
/* PMO backed by a file. It also uses the radix. */
dst_pmo->private = src_pmo->private;
if (src_pmo->radix == NULL)
break;
#else
kwarn("fmap is not implemented, we should not use PMO_FILE\n");
r = -EINVAL;
break;
#endif
}
case PMO_ANONYM:
case PMO_SHM: {
case PMO_ANONYM: {
/*
* For radix tree based PMO, copy data in each existing
* physical page and rebuild the radix tree. The new radix
* tree should have the same structure.
* For radix tree based PMO, rebuild the radix tree.
* The new radix tree should have the same structure.
*/
dst_pmo->radix = new_radix();
init_radix(dst_pmo->radix);
r = radix_deep_copy_with_hybird_mem(src_pmo->radix,
dst_pmo->radix);
if (r) {
kinfo("radix_deep_copy_with_hybird_mem failed: %d\n", r);
break;
}
break;
}
case PMO_SHM:
case PMO_RING_BUFFER_RADIX: {
/*
* For radix tree based PMO, rebuild the radix tree.
* The new radix tree should have the same structure.
*/
dst_pmo->radix = new_radix();
init_radix(dst_pmo->radix);
r = radix_deep_copy(src_pmo->radix,
dst_pmo->radix,
src_pmo->type == PMO_SHM ? 1 : 0);
false);
if (r) {
kinfo("radix_deep_copy failed: %d\n", r);
return r;
break;
}
radix_traverse(dst_pmo->radix, page_refcnt_add);
break;
}
case PMO_RING_BUFFER:
case PMO_DEVICE: {
/* Device memory should be the same. */
dst_pmo->start = src_pmo->start;
@ -334,7 +396,11 @@ int pmo_clone(struct pmobject *dst_pmo, struct pmobject *src_pmo)
break;
}
}
return 0;
if (is_shared_pmo(src_pmo))
*is_cow = false;
return r;
}
/*
@ -632,9 +698,7 @@ static int pmo_init(struct pmobject *pmo, pmo_type_t type, size_t len, paddr_t p
lock_init(&(pmo->dram_cache.lock));
/* init first page's PMO info */
struct page *page = virt_to_page(new_va);
page->pmo = pmo;
page->index = 0;
page->page_pair = 0;
init_page_info(page, pmo, 0);
break;
}
case PMO_FILE: {
@ -703,7 +767,7 @@ static int pmo_init(struct pmobject *pmo, pmo_type_t type, size_t len, paddr_t p
void commit_dram_cached_page(struct pmobject *pmo, u64 index, paddr_t pa)
{
if (use_continuous_pages(pmo->type)) {
if (use_continuous_pages(pmo)) {
/* alloc dram cached array is NULL */
lock(&(pmo->dram_cache.lock));
if (!pmo->dram_cache.array) {
@ -713,21 +777,19 @@ void commit_dram_cached_page(struct pmobject *pmo, u64 index, paddr_t pa)
unlock(&(pmo->dram_cache.lock));
BUG_ON(index >= DIV_ROUND_UP(pmo->size, PAGE_SIZE));
pmo->dram_cache.array[index] = pa;
} else if (use_radix(pmo->type)) {
} else if (use_radix(pmo)) {
BUG_ON(radix_add(pmo->radix, index, (void *)pa));
} else {
BUG("Unsupport pmo type\n");
}
struct page *page = virt_to_page((void *)phys_to_virt(pa));
page->pmo = pmo;
page->index = index;
page->page_pair = 0;
init_page_info(page, pmo, index);
}
void clear_dram_cached_page(struct pmobject *pmo, u64 index)
{
BUG_ON(!use_continuous_pages(pmo->type));
BUG_ON(!use_continuous_pages(pmo));
BUG_ON(index >= DIV_ROUND_UP(pmo->size, PAGE_SIZE));
BUG_ON(!pmo->dram_cache.array);
if (pmo->dram_cache.array[index] == 0)
@ -739,14 +801,12 @@ void clear_dram_cached_page(struct pmobject *pmo, u64 index)
void commit_page_to_pmo(struct pmobject *pmo, u64 index, paddr_t pa)
{
/* commit nvm/dram page to radix pmo */
BUG_ON(!use_radix(pmo->type));
BUG_ON(!use_radix(pmo));
/* The radix interfaces are thread-safe */
BUG_ON(radix_add(pmo->radix, index, (void *)pa));
struct page *page = virt_to_page((void *)phys_to_virt(pa));
page->pmo = pmo;
page->index = index;
page->page_pair = 0;
init_page_info(page, pmo, index);
}
/* Return 0 (NULL) when not found */
@ -754,10 +814,10 @@ paddr_t get_page_from_pmo(struct pmobject *pmo, u64 index)
{
paddr_t pa = 0;
if (use_radix(pmo->type)) {
if (use_radix(pmo)) {
/* The radix interfaces are thread-safe */
pa = (paddr_t)radix_get(pmo->radix, index);
} else if (use_continuous_pages(pmo->type)) {
} else if (use_continuous_pages(pmo)) {
if (pmo->dram_cache.array)
pa = pmo->dram_cache.array[index];
/* pa is not dram cached */
@ -1074,15 +1134,17 @@ u64 sys_get_free_mem_size(void)
return get_free_mem_size();
}
inline bool use_radix(int type)
inline bool use_radix(struct pmobject *pmo)
{
u64 type = pmo->type;
if (type == PMO_ANONYM || type == PMO_FILE || type == PMO_SHM || type == PMO_RING_BUFFER_RADIX)
return true;
return false;
}
inline bool use_continuous_pages(int type)
inline bool use_continuous_pages(struct pmobject *pmo)
{
u64 type = pmo->type;
if(type == PMO_DATA || type == PMO_DATA_NOCACHE || type == PMO_DEVICE
|| type == PMO_RING_BUFFER)
return true;
@ -1094,4 +1156,11 @@ inline bool is_external_sync_pmo(struct pmobject *pmo)
if (pmo->type == PMO_RING_BUFFER || pmo->type == PMO_RING_BUFFER_RADIX || pmo->type == PMO_DEVICE)
return true;
return false;
}
}
inline bool is_shared_pmo(struct pmobject *pmo)
{
if (pmo->type == PMO_SHM || is_external_sync_pmo(pmo))
return true;
return false;
}

View File

@ -291,12 +291,7 @@ static int create_thread(struct cap_group *cap_group,
goto out_obj_put;
}
/* Set redis-server to CPU2 and redis-benchmark to CPU3 */
if(tls == 2 || tls == 3) {
ret = thread_init(thread, cap_group, stack, pc, prio, type, tls);
} else {
ret = thread_init(thread, cap_group, stack, pc, prio, type, NO_AFF);
}
ret = thread_init(thread, cap_group, stack, pc, prio, type, NO_AFF);
if (ret != 0)
goto out_free_obj;

View File

@ -27,7 +27,7 @@ struct list_head fmap_fault_pool_list;
typedef u64 pte_t;
void add_pte_patch_to_pool(struct vmspace *vmspace, pte_t *pte, struct page *page);
int map_page_in_pgtbl(struct vmspace *vmspace, vaddr_t va, paddr_t pa,
int map_page_in_pgtbl(void* pgtbl, vaddr_t va, paddr_t pa,
vmr_prop_t flags, pte_t **out_pte);
int track_access(struct page *page);

View File

@ -1,12 +1,12 @@
# SLS Basic Configruations
set(SLS_RESTORE OFF)
set(SLS_RESTORE ON)
set(SLS_EXT_SYNC OFF)
set(SLS_HYBRID_MEM ON)
# SLS Report Details
set(SLS_REPORT_CKPT OFF)
set(SLS_REPORT_RESTORE OFF)
set(SLS_REPORT_HYBRID OFF)
set(SLS_REPORT_CKPT ON)
set(SLS_REPORT_RESTORE ON)
set(SLS_REPORT_HYBRID ON)
# SLS special tests: for tests of only-checkpoint, +pf, +memcpy
set(SLS_SPECIAL_OMIT_PF OFF)

View File

@ -21,9 +21,9 @@ struct clone_cap_group_args {
u64 parent_badge;
};
extern int chcore_pid;
int chcore_do_fork()
{
// printf("chcore do fork\n");
pid_t pid;
int ret;
ipc_msg_t *ipc_msg;
@ -41,7 +41,6 @@ int chcore_do_fork()
/* Get child badge and child pid */
child_badge = ipc_call(procmgr_ipc_struct, ipc_msg);
// printf("child badge=%lx\n",child_badge);
pid = ((struct proc_request *)ipc_get_msg_data(ipc_msg))->pid;
pcid = ((struct proc_request *)ipc_get_msg_data(ipc_msg))->pcid;
ipc_destroy_msg(ipc_msg);
@ -52,35 +51,31 @@ int chcore_do_fork()
args.lwip_server_cap = lwip_server_cap;
args.procmgr_server_cap = procmgr_server_cap;
if ((ret = usys_clone_cap_group((u64)&args)) < 0) {
// fork 失败
// printf("fork failed\n");
// fork failed
return ret;
} else if (ret > 0) {
// 父进程
// parent
return pid;
} else {
// 子进程
/* 重新初始化 stdio */
// child
/* reinitialize stdio */
chcore_reinitialize_stdio();
/* 重新初始化与 system server 的链接 */
// printf("reconnect system servers\n");
/* set pid */
chcore_pid = pid;
/* reconnect system server */
reconnect_to_system_servers(args.fs_server_cap,
args.lwip_server_cap,
args.procmgr_server_cap);
// printf("icb->conn_cap=%lx\n",procmgr_ipc_struct->conn_cap);
ipc_msg = ipc_create_msg(procmgr_ipc_struct,
sizeof(struct proc_request), 2);
pr.req = PROC_CHILD_FINISH_FORK;
memcpy(ipc_get_msg_data(ipc_msg), &pr, sizeof(struct proc_request));
ipc_set_msg_cap(ipc_msg, 0, SELF_CAP);
ipc_set_msg_cap(ipc_msg, 1, args.child_mt_cap);
// printf("child ipc call\n");
// printf("icb->conn_cap=%lx\n",procmgr_ipc_struct->conn_cap);
ipc_call(procmgr_ipc_struct, ipc_msg);
// printf("child ipc call done\n");
ipc_destroy_msg(ipc_msg);
{
/* 重新初始化 libc metadata */
/* reinitialize libc metadata */
pthread_t self = __pthread_self();
self->robust_list.off = 0;
self->robust_list.pending = 0;
@ -93,10 +88,7 @@ int chcore_do_fork()
fsmr.req = FSM_CHILD_FINISH_FORK;
fsmr.parentBagde = args.parent_badge;
memcpy(ipc_get_msg_data(ipc_msg), &fsmr, sizeof(struct fsm_request));
// printf("child ipc call\n");
// printf("icb->conn_cap=%lx\n",procmgr_ipc_struct->conn_cap);
ipc_call(fsm_ipc_struct, ipc_msg);
// printf("child ipc call done\n");
ipc_destroy_msg(ipc_msg);
return 0;

View File

@ -6,7 +6,7 @@
pid_t waitpid(pid_t pid, int *status, int options)
{
assert(0);
// assert(0);
return chcore_waitpid(pid, status, options, 0);
return syscall_cp(SYS_wait4, pid, status, options, 0);
}

View File

@ -20,3 +20,4 @@ add_executable(fib.bin fib.c)
add_executable(ustress.bin ustress.c)
add_executable(get_free_mem_size.bin get_free_mem_size.c)
add_executable(test_heap.bin test_heap.c)
add_executable(test_fork.bin test_fork.c)

View File

@ -0,0 +1,80 @@
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <sys/types.h>
#include <sys/wait.h>
#define PAGE_SIZE 4096
int main() {
pid_t child_pid;
int num = PAGE_SIZE / sizeof(int);
int stack_array1[PAGE_SIZE / sizeof(int)], stack_array2[PAGE_SIZE / sizeof(int)];
int *heap_array1, *heap_array2;
heap_array1 = (int*)malloc(PAGE_SIZE);
heap_array2 = (int*)malloc(PAGE_SIZE);
for (int i = 0;i < num; i++) {
stack_array1[i] = i; // 0-1023
stack_array2[i] = num + i; // 1024-2047
heap_array1[i] = num * 2 + i; // 2047-3071
heap_array2[i] = num * 3 + i; // 3071-4095
}
child_pid = fork();
if (child_pid == -1) {
perror("my_fork");
exit(EXIT_FAILURE);
} else if (child_pid == 0) {
// child
int pass = 1;
printf("This is the child process. PID: %d\n", getpid());
for (int i = 0;i < num; i++) {
stack_array1[i] = -i;
heap_array1[i] = -(num * 2 + i);
}
for (int i = 0; i < num; i++) {
if (stack_array1[i] != -i || stack_array2[i] != num + i ||
heap_array1[i] != -(num * 2 + i) || heap_array2[i] != (num * 3 + i)) {
printf("fork error: COW failed\n");
pass = 0;
}
}
if(pass)
printf("child: fork pass\n");
free(heap_array1);
free(heap_array2);
exit(EXIT_SUCCESS);
} else {
// parent
int pass = 1;
printf("This is the parent process. Child PID: %d\n", child_pid);
for (int i = 0;i < num; i++) {
stack_array2[i] = -(num + i);
heap_array2[i] = -(num * 3 + i);
}
for (int i = 0; i < num; i++) {
if (stack_array1[i] != i || stack_array2[i] != -(num + i) ||
heap_array1[i] != (num * 2 + i) || heap_array2[i] != -(num * 3 + i)) {
printf("fork error: COW failed\n");
pass = 0;
}
}
if(pass)
printf("parent: fork pass\n");
printf("parent sleep\n");
int status;
waitpid(child_pid, &status, 0);
if (WIFEXITED(status)) {
printf("Child process exited with status %d\n", WEXITSTATUS(status));
}
}
free(heap_array1);
free(heap_array2);
return 0;
}

View File

@ -331,6 +331,9 @@ void fs_server_dispatch(ipc_msg_t *ipc_msg, u64 client_badge)
case FS_REQ_TEST_PERF:
ret = fs_wrapper_count(ipc_msg, fr);
break;
case FS_CHILD_FINISH_FORK:
ret = fs_finish_fork(ipc_msg, fr->fork.childBadge, fr->fork.parentBagde);
break;
default:
printf("[Error] Strange FS Server request number %d\n", fr->req);
ret = -EINVAL;

View File

@ -126,6 +126,7 @@ int fs_wrapper_fallocate(ipc_msg_t *ipc_msg, struct fs_request *fr);
int fs_wrapper_fcntl(u64 client_badge, ipc_msg_t *ipc_msg, struct fs_request *fr);
int fs_wrapper_mount(ipc_msg_t *ipc_msg, struct fs_request *fr);
int fs_wrapper_umount(ipc_msg_t *ipc_msg, struct fs_request *fr);
int fs_finish_fork(ipc_msg_t *ipc_msg, u64 child_badge, u64 parent_badge);
void fs_server_dispatch(ipc_msg_t *ipc_msg, u64 client_badge);

View File

@ -923,3 +923,52 @@ int fs_wrapper_umount(ipc_msg_t *ipc_msg, struct fs_request *fr)
{
return server_ops.umount(ipc_msg, fr);
}
int fs_finish_fork(ipc_msg_t *ipc_msg, u64 child_badge, u64 parent_badge)
{
struct fmap_area_mapping *area_iter;
struct server_entry_node *private_iter;
int ret;
/* Check if client_badge already involved */
for_each_in_list(private_iter, struct server_entry_node, node, &server_entry_mapping) {
if (private_iter->client_badge == parent_badge) {
/* New server_entry_node */
struct server_entry_node *n = (struct server_entry_node *)malloc(sizeof(*n));
n->client_badge = child_badge;
int i;
/* Insert node to server_entry_mapping */
for (i = 0; i < MAX_SERVER_ENTRY_PER_CLIENT; i++) {
int fid;
if((fid = private_iter->fd_to_fid[i]) != -1) {
pthread_mutex_lock(&server_entrys[fid]->lock);
server_entrys[fid]->refcnt++;
n->fd_to_fid[i] = fid;
pthread_mutex_unlock(&server_entrys[fid]->lock);
}
}
list_append(&n->node, &server_entry_mapping);
break;
}
}
for_each_in_list (area_iter,
struct fmap_area_mapping,
node,
&fmap_area_mappings) {
if (area_iter->client_badge == parent_badge) {
pthread_rwlock_rdlock(&area_iter->vnode->rwlock);
ret = fmap_area_insert(child_badge,area_iter->client_va_start,
area_iter->length,area_iter->vnode,area_iter->file_offset,area_iter->flags);
if (ret < 0) {
pthread_rwlock_unlock(&area_iter->vnode->rwlock);
goto out_fail;
}
// area_iter->vnode->refcnt += 1;
pthread_rwlock_unlock(&area_iter->vnode->rwlock);
}
}
return 0;
out_fail:
return ret;
}

View File

@ -263,6 +263,23 @@ void fsm_dispatch(ipc_msg_t *ipc_msg, u64 client_badge)
ret = fsm_sync_page_cache();
break;
}
case FSM_CHILD_FINISH_FORK: {
ipc_msg_t *finish_ipc_msg;
ipc_struct_t *ipc_struct;
struct fs_request *fr_ptr;
struct mount_point_info_node* iter;
for_each_in_list(iter, struct mount_point_info_node, node, &mount_point_infos) {
ipc_struct = iter->_fs_ipc_struct;
finish_ipc_msg = ipc_create_msg(ipc_struct, sizeof(struct fs_request), 0);
fr_ptr = (struct fs_request *)ipc_get_msg_data(finish_ipc_msg);
fr_ptr->req = FS_CHILD_FINISH_FORK;
fr_ptr->fork.parentBagde = fsm_req->parentBagde;
fr_ptr->fork.childBadge = client_badge;
ret = ipc_call(ipc_struct, finish_ipc_msg);
ipc_destroy_msg(finish_ipc_msg);
}
break;
}
default:
error("%s: %d Not impelemented yet\n", __func__,
((int *)(ipc_get_msg_data(ipc_msg)))[0]);

View File

@ -196,6 +196,7 @@ static void handle_fork(ipc_msg_t *ipc_msg, u64 client_badge)
struct proc_node *client_proc;
struct proc_node *child = NULL;
struct proc_request *pr;
char *name;
/* Get client_proc */
client_proc = get_proc_node(client_badge);
@ -207,18 +208,13 @@ static void handle_fork(ipc_msg_t *ipc_msg, u64 client_badge)
* to complete the missing info.
*
*/
child = new_proc_node(client_proc, NULL);
if (client_proc->name) {
child->name = malloc(strlen(client_proc->name));
strcpy(child->name, client_proc->name);
}
name = malloc(strlen(client_proc->name));
strcpy(name, client_proc->name);
child = new_proc_node(client_proc, name);
pr = (struct proc_request *)ipc_get_msg_data(ipc_msg);
pr->pid = child->pid;
pr->pcid = child->pcid;
// printf("In procmgr %s, pid: %d, badge: %08x, %d\n",
// __func__, pr->pid, child->badge, __LINE__);
ipc_return(ipc_msg, child->badge);
}