[PATCH] cpuset: memory migration interaction fix
Fix memory migration so that it works regardless of what cpuset the invoking task is in. If a task invoked a memory migration, by doing one of: 1) writing a different nodemask to a cpuset 'mems' file, or 2) writing a tasks pid to a different cpuset's 'tasks' file, where the cpuset had its 'memory_migrate' option turned on, then the allocation of the new pages for the migrated task(s) was constrained by the invoking tasks cpuset. If this task wasn't in a cpuset that allowed the requested memory nodes, the memory migration would happen to some other nodes that were in that invoking tasks cpuset. This was usually surprising and puzzling behaviour: Why didn't the pages move? Why did the pages move -there-? To fix this, temporarilly change the invoking tasks 'mems_allowed' task_struct field to the nodes the migrating tasks is moving to, so that new pages can be allocated there. Signed-off-by: Paul Jackson <pj@sgi.com> Acked-by: Christoph Lameter <clameter@sgi.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
This commit is contained in:
parent
2741a559a0
commit
e4e364e865
|
@ -833,6 +833,55 @@ static int update_cpumask(struct cpuset *cs, char *buf)
|
|||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* cpuset_migrate_mm
|
||||
*
|
||||
* Migrate memory region from one set of nodes to another.
|
||||
*
|
||||
* Temporarilly set tasks mems_allowed to target nodes of migration,
|
||||
* so that the migration code can allocate pages on these nodes.
|
||||
*
|
||||
* Call holding manage_mutex, so our current->cpuset won't change
|
||||
* during this call, as manage_mutex holds off any attach_task()
|
||||
* calls. Therefore we don't need to take task_lock around the
|
||||
* call to guarantee_online_mems(), as we know no one is changing
|
||||
* our tasks cpuset.
|
||||
*
|
||||
* Hold callback_mutex around the two modifications of our tasks
|
||||
* mems_allowed to synchronize with cpuset_mems_allowed().
|
||||
*
|
||||
* While the mm_struct we are migrating is typically from some
|
||||
* other task, the task_struct mems_allowed that we are hacking
|
||||
* is for our current task, which must allocate new pages for that
|
||||
* migrating memory region.
|
||||
*
|
||||
* We call cpuset_update_task_memory_state() before hacking
|
||||
* our tasks mems_allowed, so that we are assured of being in
|
||||
* sync with our tasks cpuset, and in particular, callbacks to
|
||||
* cpuset_update_task_memory_state() from nested page allocations
|
||||
* won't see any mismatch of our cpuset and task mems_generation
|
||||
* values, so won't overwrite our hacked tasks mems_allowed
|
||||
* nodemask.
|
||||
*/
|
||||
|
||||
static void cpuset_migrate_mm(struct mm_struct *mm, const nodemask_t *from,
|
||||
const nodemask_t *to)
|
||||
{
|
||||
struct task_struct *tsk = current;
|
||||
|
||||
cpuset_update_task_memory_state();
|
||||
|
||||
mutex_lock(&callback_mutex);
|
||||
tsk->mems_allowed = *to;
|
||||
mutex_unlock(&callback_mutex);
|
||||
|
||||
do_migrate_pages(mm, from, to, MPOL_MF_MOVE_ALL);
|
||||
|
||||
mutex_lock(&callback_mutex);
|
||||
guarantee_online_mems(tsk->cpuset, &tsk->mems_allowed);
|
||||
mutex_unlock(&callback_mutex);
|
||||
}
|
||||
|
||||
/*
|
||||
* Handle user request to change the 'mems' memory placement
|
||||
* of a cpuset. Needs to validate the request, update the
|
||||
|
@ -945,10 +994,8 @@ static int update_nodemask(struct cpuset *cs, char *buf)
|
|||
struct mm_struct *mm = mmarray[i];
|
||||
|
||||
mpol_rebind_mm(mm, &cs->mems_allowed);
|
||||
if (migrate) {
|
||||
do_migrate_pages(mm, &oldmem, &cs->mems_allowed,
|
||||
MPOL_MF_MOVE_ALL);
|
||||
}
|
||||
if (migrate)
|
||||
cpuset_migrate_mm(mm, &oldmem, &cs->mems_allowed);
|
||||
mmput(mm);
|
||||
}
|
||||
|
||||
|
@ -1184,7 +1231,7 @@ static int attach_task(struct cpuset *cs, char *pidbuf, char **ppathbuf)
|
|||
if (mm) {
|
||||
mpol_rebind_mm(mm, &to);
|
||||
if (is_memory_migrate(cs))
|
||||
do_migrate_pages(mm, &from, &to, MPOL_MF_MOVE_ALL);
|
||||
cpuset_migrate_mm(mm, &from, &to);
|
||||
mmput(mm);
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue