summary refs log tree commit diff
path: root/kernel/cpuset.c
diff options
context:
space:
mode:
authorPeter Zijlstra <a.p.zijlstra@chello.nl>2009-11-25 13:31:39 +0100
committerIngo Molnar <mingo@elte.hu>2009-12-06 21:10:56 +0100
commit6ad4c18884e864cf4c77f9074d3d1816063f99cd (patch)
treef09643f6148b576fa2d23bf7d4b37d082d94e267 /kernel/cpuset.c
parente1b8090bdf125f8b2e192149547fead7f302a89c (diff)
downloadlinux-6ad4c18884e864cf4c77f9074d3d1816063f99cd.tar.gz
sched: Fix balance vs hotplug race
Since (e761b77: cpu hotplug, sched: Introduce cpu_active_map and redo
sched domain managment) we have cpu_active_mask which is suppose to rule
scheduler migration and load-balancing, except it never (fully) did.

The particular problem being solved here is a crash in try_to_wake_up()
where select_task_rq() ends up selecting an offline cpu because
select_task_rq_fair() trusts the sched_domain tree to reflect the
current state of affairs, similarly select_task_rq_rt() trusts the
root_domain.

However, the sched_domains are updated from CPU_DEAD, which is after the
cpu is taken offline and after stop_machine is done. Therefore it can
race perfectly well with code assuming the domains are right.

Cure this by building the domains from cpu_active_mask on
CPU_DOWN_PREPARE.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'kernel/cpuset.c')
-rw-r--r--kernel/cpuset.c16
1 files changed, 9 insertions, 7 deletions
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 43fb7e800028..ba401fab459f 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -872,7 +872,7 @@ static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs,
 		if (retval < 0)
 			return retval;
 
-		if (!cpumask_subset(trialcs->cpus_allowed, cpu_online_mask))
+		if (!cpumask_subset(trialcs->cpus_allowed, cpu_active_mask))
 			return -EINVAL;
 	}
 	retval = validate_change(cs, trialcs);
@@ -2010,7 +2010,7 @@ static void scan_for_empty_cpusets(struct cpuset *root)
 		}
 
 		/* Continue past cpusets with all cpus, mems online */
-		if (cpumask_subset(cp->cpus_allowed, cpu_online_mask) &&
+		if (cpumask_subset(cp->cpus_allowed, cpu_active_mask) &&
 		    nodes_subset(cp->mems_allowed, node_states[N_HIGH_MEMORY]))
 			continue;
 
@@ -2019,7 +2019,7 @@ static void scan_for_empty_cpusets(struct cpuset *root)
 		/* Remove offline cpus and mems from this cpuset. */
 		mutex_lock(&callback_mutex);
 		cpumask_and(cp->cpus_allowed, cp->cpus_allowed,
-			    cpu_online_mask);
+			    cpu_active_mask);
 		nodes_and(cp->mems_allowed, cp->mems_allowed,
 						node_states[N_HIGH_MEMORY]);
 		mutex_unlock(&callback_mutex);
@@ -2057,8 +2057,10 @@ static int cpuset_track_online_cpus(struct notifier_block *unused_nb,
 	switch (phase) {
 	case CPU_ONLINE:
 	case CPU_ONLINE_FROZEN:
-	case CPU_DEAD:
-	case CPU_DEAD_FROZEN:
+	case CPU_DOWN_PREPARE:
+	case CPU_DOWN_PREPARE_FROZEN:
+	case CPU_DOWN_FAILED:
+	case CPU_DOWN_FAILED_FROZEN:
 		break;
 
 	default:
@@ -2067,7 +2069,7 @@ static int cpuset_track_online_cpus(struct notifier_block *unused_nb,
 
 	cgroup_lock();
 	mutex_lock(&callback_mutex);
-	cpumask_copy(top_cpuset.cpus_allowed, cpu_online_mask);
+	cpumask_copy(top_cpuset.cpus_allowed, cpu_active_mask);
 	mutex_unlock(&callback_mutex);
 	scan_for_empty_cpusets(&top_cpuset);
 	ndoms = generate_sched_domains(&doms, &attr);
@@ -2114,7 +2116,7 @@ static int cpuset_track_online_nodes(struct notifier_block *self,
 
 void __init cpuset_init_smp(void)
 {
-	cpumask_copy(top_cpuset.cpus_allowed, cpu_online_mask);
+	cpumask_copy(top_cpuset.cpus_allowed, cpu_active_mask);
 	top_cpuset.mems_allowed = node_states[N_HIGH_MEMORY];
 
 	hotcpu_notifier(cpuset_track_online_cpus, 0);