[RFC PATCH 5/9] cgroup: add container support for cgroup
Gao feng
gaofeng at cn.fujitsu.com
Mon Dec 17 06:43:31 UTC 2012
with this patch, the cgroup mounted in the container will
have it's own cgroupfs_root.
The css of this hierarchy's top cgroup are same with
container's init task's css.
Signed-off-by: Gao feng <gaofeng at cn.fujitsu.com>
---
kernel/cgroup.c | 216 +++++++++++++++++++++++++++++++++++++++++--------------
1 files changed, 162 insertions(+), 54 deletions(-)
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 0195db1..ac61027 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -1024,21 +1024,13 @@ static void cgroup_d_remove_dir(struct dentry *dentry)
remove_dir(dentry);
}
-/*
- * Call with cgroup_mutex held. Drops reference counts on modules, including
- * any duplicate ones that parse_cgroupfs_options took. If this function
- * returns an error, no reference counts are touched.
- */
-static int rebind_subsystems(struct cgroupfs_root *root,
- unsigned long final_subsys_mask)
+static int __rebind_subsystems(struct cgroupfs_root *root,
+ unsigned long final_subsys_mask)
{
unsigned long added_mask, removed_mask;
struct cgroup *cgrp = &root->top_cgroup;
int i;
- BUG_ON(!mutex_is_locked(&cgroup_mutex));
- BUG_ON(!mutex_is_locked(&cgroup_root_mutex));
-
removed_mask = root->actual_subsys_mask & ~final_subsys_mask;
added_mask = final_subsys_mask & ~root->actual_subsys_mask;
/* Check that any added subsystems are currently free */
@@ -1059,13 +1051,6 @@ static int rebind_subsystems(struct cgroupfs_root *root,
}
}
- /* Currently we don't handle adding/removing subsystems when
- * any child cgroups exist. This is theoretically supportable
- * but involves complex error handling, so it's being left until
- * later */
- if (root->number_of_cgroups > 1)
- return -EBUSY;
-
/* Process each subsystem */
for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
struct cgroup_subsys *ss = subsys[i];
@@ -1113,6 +1098,117 @@ static int rebind_subsystems(struct cgroupfs_root *root,
BUG_ON(cgrp->subsys[i]);
}
}
+
+ return 0;
+}
+
+static int __rebind_subsystems_ns(struct cgroupfs_root *root,
+ unsigned long final_subsys_mask)
+{
+ unsigned long added_mask, removed_mask;
+ struct cgroup *cgrp = &root->top_cgroup;
+ struct cgroup *parent = NULL;
+ struct cgroupfs_root *top_root = NULL;
+ unsigned long bit;
+ int i;
+
+ removed_mask = root->actual_subsys_mask & ~final_subsys_mask;
+ added_mask = final_subsys_mask & ~root->actual_subsys_mask;
+
+ /* Get new top root and new parent */
+ if (final_subsys_mask) {
+ top_root = find_top_root(final_subsys_mask);
+ if (top_root == NULL)
+ return -EINVAL;
+
+ parent = task_cgroup_from_root(root->pid_ns->child_reaper,
+ top_root);
+ BUG_ON(parent == NULL);
+ }
+
+ /* Process each subsystem */
+ for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
+ struct cgroup_subsys *ss = subsys[i];
+ struct cgroup_subsys_state *css;
+ bit = 1UL << i;
+ if (bit & added_mask) {
+ BUG_ON(cgrp->subsys[i]);
+ BUG_ON(parent->subsys[ss->subsys_id] == NULL);
+
+ css = parent->subsys[ss->subsys_id];
+ if (!css_tryget(css))
+ goto out;
+ cgrp->subsys[ss->subsys_id] = css;
+
+ /* refcount was already taken, and we're keeping it */
+ } else if (bit & removed_mask) {
+ BUG_ON(cgrp->subsys[i] != cgrp->parent->subsys[i]);
+
+ css_put(cgrp->subsys[i]);
+ cgrp->subsys[i] = NULL;
+
+ /* subsystem is now free - drop reference on module */
+ module_put(ss->module);
+ } else if (bit & final_subsys_mask) {
+ /*
+ * a refcount was taken, but we already had one, so
+ * drop the extra reference.
+ */
+ module_put(ss->module);
+ }
+ }
+
+ root->top_root = top_root;
+ cgrp->parent = parent;
+
+ /* Link to new top_root or unlink when umounting */
+ if (top_root)
+ list_move_tail(&cgrp->allcg_node, &top_root->allcg_list);
+ else
+ list_del_init(&cgrp->allcg_node);
+
+ return 0;
+out:
+ for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
+ bit = 1UL << i;
+ if ((bit & added_mask) && cgrp->subsys[i]) {
+ css_put(cgrp->subsys[i]);
+ cgrp->subsys[i] = NULL;
+ }
+ }
+ return -EINVAL;
+}
+
+
+/*
+ * Call with cgroup_mutex held. Drops reference counts on modules, including
+ * any duplicate ones that parse_cgroupfs_options took. If this function
+ * returns an error, no reference counts are touched.
+ */
+static int rebind_subsystems(struct cgroupfs_root *root,
+ unsigned long final_subsys_mask)
+{
+ int err = 0;
+
+ BUG_ON(!mutex_is_locked(&cgroup_mutex));
+ BUG_ON(!mutex_is_locked(&cgroup_root_mutex));
+
+ /* Currently we don't handle adding/removing subsystems when
+ * any child cgroups exist. This is theoretically supportable
+ * but involves complex error handling, so it's being left until
+ * later */
+ if (root->number_of_cgroups > 1)
+ return -EBUSY;
+
+ if (test_bit(ROOT_NAMESPACE, &root->flags))
+ err = __rebind_subsystems_ns(root, final_subsys_mask);
+ else
+ err = __rebind_subsystems(root, final_subsys_mask);
+
+ if (err)
+ return err;
+
+
root->subsys_mask = root->actual_subsys_mask = final_subsys_mask;
synchronize_rcu();
@@ -1490,6 +1586,10 @@ static int cgroup_test_super(struct super_block *sb, void *data)
&& (opts->subsys_mask != root->subsys_mask))
return 0;
+ /* Pid namespace must match too */
+ if (root->pid_ns != task_active_pid_ns(current))
+ return 0;
+
return 1;
}
@@ -1656,52 +1756,60 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type,
if (!strcmp(existing_root->name, root->name))
goto unlock_drop;
- /*
- * We're accessing css_set_count without locking
- * css_set_lock here, but that's OK - it can only be
- * increased by someone holding cgroup_lock, and
- * that's us. The worst that can happen is that we
- * have some link structures left over
- */
- ret = allocate_cg_links(css_set_count, &tmp_cg_links);
- if (ret)
- goto unlock_drop;
+ if (!test_bit(ROOT_NAMESPACE, &root->flags)) {
+ /*
+ * We're accessing css_set_count without locking
+ * css_set_lock here, but that's OK - it can only be
+ * increased by someone holding cgroup_lock, and
+ * that's us. The worst that can happen is that we
+ * have some link structures left over
+ */
+ ret = allocate_cg_links(css_set_count, &tmp_cg_links);
+ if (ret)
+ goto unlock_drop;
+
+ ret = rebind_subsystems(root, root->subsys_mask);
+ if (ret == -EBUSY) {
+ free_cg_links(&tmp_cg_links);
+ goto unlock_drop;
+ }
+ /*
+ * There must be no failure case after here, since
+ * rebinding takes care of subsystems' refcounts,
+ * which are explicitly dropped in the failure exit
+ * path.
+ */
+
+ /* EBUSY should be the only error here */
+ BUG_ON(ret);
+ top_root_count++;
+
+ /* Link the top cgroup in this hierarchy into all
+ * the css_set objects */
+ write_lock(&css_set_lock);
+ for (i = 0; i < CSS_SET_TABLE_SIZE; i++) {
+ struct hlist_head *hhead = &css_set_table[i];
+ struct hlist_node *node;
+ struct css_set *cg;
+
+ hlist_for_each_entry(cg, node, hhead, hlist)
+ link_css_set(&tmp_cg_links, cg,
+ root_cgrp);
+ }
+ write_unlock(&css_set_lock);
- ret = rebind_subsystems(root, root->subsys_mask);
- if (ret == -EBUSY) {
free_cg_links(&tmp_cg_links);
- goto unlock_drop;
+ } else {
+ ret = rebind_subsystems(root, root->subsys_mask);
+ if (ret)
+ goto unlock_drop;
}
- /*
- * There must be no failure case after here, since rebinding
- * takes care of subsystems' refcounts, which are explicitly
- * dropped in the failure exit path.
- */
-
- /* EBUSY should be the only error here */
- BUG_ON(ret);
list_add(&root->root_list, &roots);
- top_root_count++;
sb->s_root->d_fsdata = root_cgrp;
root->top_cgroup.dentry = sb->s_root;
- /* Link the top cgroup in this hierarchy into all
- * the css_set objects */
- write_lock(&css_set_lock);
- for (i = 0; i < CSS_SET_TABLE_SIZE; i++) {
- struct hlist_head *hhead = &css_set_table[i];
- struct hlist_node *node;
- struct css_set *cg;
-
- hlist_for_each_entry(cg, node, hhead, hlist)
- link_css_set(&tmp_cg_links, cg, root_cgrp);
- }
- write_unlock(&css_set_lock);
-
- free_cg_links(&tmp_cg_links);
-
BUG_ON(!list_empty(&root_cgrp->children));
BUG_ON(root->number_of_cgroups != 1);
--
1.7.7.6
More information about the Containers
mailing list