Merge branch 'for-3.10' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup

Pull cgroup updates from Tejun Heo: - Fixes and a lot of cleanups. Locking cleanup is finally complete. cgroup_mutex is no longer exposed to individual controlelrs which used to cause nasty deadlock issues. Li fixed and cleaned up quite a bit including long standing ones like racy cgroup_path(). - device cgroup now supports proper hierarchy thanks to Aristeu. - perf_event cgroup now supports proper hierarchy. - A new mount option "__DEVEL__sane_behavior" is added. As indicated by the name, this option is to be used for development only at this point and generates a warning message when used. Unfortunately, cgroup interface currently has too many brekages and inconsistencies to implement a consistent and unified hierarchy on top. The new flag is used to collect the behavior changes which are necessary to implement consistent unified hierarchy. It's likely that this flag won't be used verbatim when it becomes ready but will be enabled implicitly along with unified hierarchy. The option currently disables some of broken behaviors in cgroup core and also .use_hierarchy switch in memcg (will be routed through -mm), which can be used to make very unusual hierarchy where nesting is partially honored. It will also be used to implement hierarchy support for blk-throttle which would be impossible otherwise without introducing a full separate set of control knobs. This is essentially versioning of interface which isn't very nice but at this point I can't see any other options which would allow keeping the interface the same while moving towards hierarchy behavior which is at least somewhat sane. The planned unified hierarchy is likely to require some level of adaptation from userland anyway, so I think it'd be best to take the chance and update the interface such that it's supportable in the long term. Maintaining the existing interface does complicate cgroup core but shouldn't put too much strain on individual controllers and I think it'd be manageable for the foreseeable future. Maybe we'll be able to drop it in a decade. Fix up conflicts (including a semantic one adding a new #include to ppc that was uncovered by header the file changes) as per Tejun. * 'for-3.10' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup: (45 commits) cpuset: fix compile warning when CONFIG_SMP=n cpuset: fix cpu hotplug vs rebuild_sched_domains() race cpuset: use rebuild_sched_domains() in cpuset_hotplug_workfn() cgroup: restore the call to eventfd->poll() cgroup: fix use-after-free when umounting cgroupfs cgroup: fix broken file xattrs devcg: remove parent_cgroup. memcg: force use_hierarchy if sane_behavior cgroup: remove cgrp->top_cgroup cgroup: introduce sane_behavior mount option move cgroupfs_root to include/linux/cgroup.h cgroup: convert cgroupfs_root flag bits to masks and add CGRP_ prefix cgroup: make cgroup_path() not print double slashes Revert "cgroup: remove bind() method from cgroup_subsys." perf: make perf_event cgroup hierarchical cgroup: implement cgroup_is_descendant() cgroup: make sure parent won't be destroyed before its children cgroup: remove bind() method from cgroup_subsys. devcg: remove broken_hierarchy tag cgroup: remove cgroup_lock_is_held() ...
author: Linus Torvalds <torvalds@linux-foundation.org> 2013-04-29 19:14:20 -0700
committer: Linus Torvalds <torvalds@linux-foundation.org> 2013-04-29 19:14:20 -0700
commit: 191a712090bb8a10e6f129360eeed2d68f3d4c9a (patch)
tree: 17e2d6c27fb8a7c3a61828fbcc7c343a4966a0a9 /mm
parent: 46d9be3e5eb01f71fc02653755d970247174b400 (diff)
parent: 2a0010af17b1739ef8ea8cf02647a127241ee674 (diff)
download: linux-191a712090bb8a10e6f129360eeed2d68f3d4c9a.tar.gz
1 files changed, 49 insertions, 31 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index b8dc8e4cbf6a..0f1d92163f30 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -3321,52 +3321,53 @@ void mem_cgroup_destroy_cache(struct kmem_cache *cachep)
 	schedule_work(&cachep->memcg_params->destroy);
 }
 
-static char *memcg_cache_name(struct mem_cgroup *memcg, struct kmem_cache *s)
-{
-	char *name;
-	struct dentry *dentry;
-
-	rcu_read_lock();
-	dentry = rcu_dereference(memcg->css.cgroup->dentry);
-	rcu_read_unlock();
-
-	BUG_ON(dentry == NULL);
-
-	name = kasprintf(GFP_KERNEL, "%s(%d:%s)", s->name,
-			 memcg_cache_id(memcg), dentry->d_name.name);
-
-	return name;
-}
+/*
+ * This lock protects updaters, not readers. We want readers to be as fast as
+ * they can, and they will either see NULL or a valid cache value. Our model
+ * allow them to see NULL, in which case the root memcg will be selected.
+ *
+ * We need this lock because multiple allocations to the same cache from a non
+ * will span more than one worker. Only one of them can create the cache.
+ */
+static DEFINE_MUTEX(memcg_cache_mutex);
 
+/*
+ * Called with memcg_cache_mutex held
+ */
 static struct kmem_cache *kmem_cache_dup(struct mem_cgroup *memcg,
 					 struct kmem_cache *s)
 {
-	char *name;
 	struct kmem_cache *new;
+	static char *tmp_name = NULL;
 
-	name = memcg_cache_name(memcg, s);
-	if (!name)
-		return NULL;
+	lockdep_assert_held(&memcg_cache_mutex);
+
+	/*
+	 * kmem_cache_create_memcg duplicates the given name and
+	 * cgroup_name for this name requires RCU context.
+	 * This static temporary buffer is used to prevent from
+	 * pointless shortliving allocation.
+	 */
+	if (!tmp_name) {
+		tmp_name = kmalloc(PATH_MAX, GFP_KERNEL);
+		if (!tmp_name)
+			return NULL;
+	}
+
+	rcu_read_lock();
+	snprintf(tmp_name, PATH_MAX, "%s(%d:%s)", s->name,
+			 memcg_cache_id(memcg), cgroup_name(memcg->css.cgroup));
+	rcu_read_unlock();
 
-	new = kmem_cache_create_memcg(memcg, name, s->object_size, s->align,
+	new = kmem_cache_create_memcg(memcg, tmp_name, s->object_size, s->align,
 				      (s->flags & ~SLAB_PANIC), s->ctor, s);
 
 	if (new)
 		new->allocflags |= __GFP_KMEMCG;
 
-	kfree(name);
 	return new;
 }
 
-/*
- * This lock protects updaters, not readers. We want readers to be as fast as
- * they can, and they will either see NULL or a valid cache value. Our model
- * allow them to see NULL, in which case the root memcg will be selected.
- *
- * We need this lock because multiple allocations to the same cache from a non
- * will span more than one worker. Only one of them can create the cache.
- */
-static DEFINE_MUTEX(memcg_cache_mutex);
 static struct kmem_cache *memcg_create_kmem_cache(struct mem_cgroup *memcg,
 						  struct kmem_cache *cachep)
 {
@@ -5912,6 +5913,7 @@ static struct cftype mem_cgroup_files[] = {
 	},
 	{
 		.name = "use_hierarchy",
+		.flags = CFTYPE_INSANE,
 		.write_u64 = mem_cgroup_hierarchy_write,
 		.read_u64 = mem_cgroup_hierarchy_read,
 	},
@@ -6907,6 +6909,21 @@ static void mem_cgroup_move_task(struct cgroup *cont,
 }
 #endif
 
+/*
+ * Cgroup retains root cgroups across [un]mount cycles making it necessary
+ * to verify sane_behavior flag on each mount attempt.
+ */
+static void mem_cgroup_bind(struct cgroup *root)
+{
+	/*
+	 * use_hierarchy is forced with sane_behavior.  cgroup core
+	 * guarantees that @root doesn't have any children, so turning it
+	 * on for the root memcg is enough.
+	 */
+	if (cgroup_sane_behavior(root))
+		mem_cgroup_from_cont(root)->use_hierarchy = true;
+}
+
 struct cgroup_subsys mem_cgroup_subsys = {
 	.name = "memory",
 	.subsys_id = mem_cgroup_subsys_id,
@@ -6917,6 +6934,7 @@ struct cgroup_subsys mem_cgroup_subsys = {
 	.can_attach = mem_cgroup_can_attach,
 	.cancel_attach = mem_cgroup_cancel_attach,
 	.attach = mem_cgroup_move_task,
+	.bind = mem_cgroup_bind,
 	.base_cftypes = mem_cgroup_files,
 	.early_init = 0,
 	.use_id = 1,
author	Linus Torvalds <torvalds@linux-foundation.org>	2013-04-29 19:14:20 -0700
committer	Linus Torvalds <torvalds@linux-foundation.org>	2013-04-29 19:14:20 -0700
commit	191a712090bb8a10e6f129360eeed2d68f3d4c9a (patch)
tree	17e2d6c27fb8a7c3a61828fbcc7c343a4966a0a9 /mm
parent	46d9be3e5eb01f71fc02653755d970247174b400 (diff)
parent	2a0010af17b1739ef8ea8cf02647a127241ee674 (diff)
download	linux-191a712090bb8a10e6f129360eeed2d68f3d4c9a.tar.gz