[PATCH 09/10] Enabling checkpoint relink of unlinked files inside containers

Matt Helsley matthltc at us.ibm.com
Mon Feb 28 20:05:15 PST 2011


When we use relinking to checkpoint unlinked files we're often working
in a different mount namespace than the opened file. This means that
we'll get -EXDEV because the mounts are different and there is no
reasonable way to map between them in different namespaces.

Factor setns() and utilize it to quickly switch namespaces so that
we may relink files in different mount namespaces during checkpoint.

Unlike setns() we already have the mount namespace pointer and know
the proc namespace ops to use -- so we can skip a few steps and call
a factored kern_setns() in both the setns() syscall and from
checkpoint.

Signed-off-by: Matt Helsley <matthltc at us.ibm.com>
---
 fs/namei.c              |   40 +++++++++++++++++++++++++++++++++-------
 include/linux/nsproxy.h |    3 +++
 kernel/nsproxy.c        |   33 +++++++++++++++++++--------------
 3 files changed, 55 insertions(+), 21 deletions(-)

diff --git a/fs/namei.c b/fs/namei.c
index fcf35b3..f6361f8 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -33,6 +33,8 @@
 #include <linux/device_cgroup.h>
 #include <linux/fs_struct.h>
 #ifdef CONFIG_CHECKPOINT
+#include <linux/proc_fs.h>
+#include <linux/nsproxy.h>
 #include <linux/sys-wrapper.h>
 #include <linux/deferqueue.h>
 #include <linux/checkpoint.h>
@@ -2623,27 +2625,51 @@ static int checkpoint_file_relink(struct ckpt_ctx *ctx,
 				  struct file *file,
 				  char new_path[PATH_MAX])
 {
+	struct nsproxy *old_nsproxy;
 	int ret, len;
 
 	/* 
 	 * Relinking arbitrary files without searching a path
 	 * (which is non-existent if the file is unlinked) requires
-	 * special privileges.
+	 * special privileges. Also, since we need to set the mount
+	 * namespace we need CAP_SYS_ADMIN (see sys_setns).
 	 */
-	if (!capable(CAP_DAC_OVERRIDE|CAP_DAC_READ_SEARCH)) {
-		ckpt_err(ctx, -EPERM, "%(T)Relinking unlinked files requires CAP_DAC_{OVERRIDE,READ_SEARCH}\n");
+	if (!capable(CAP_DAC_OVERRIDE|CAP_DAC_READ_SEARCH) || !capable(CAP_SYS_ADMIN)) {
+		ckpt_err(ctx, -EPERM, "%(T)Relinking unlinked files requires CAP_DAC_{OVERRIDE,READ_SEARCH} and CAP_SYS_ADMIN\n");
 		return -EPERM;
 	}
-	ret = checkpoint_fill_relink_fname(ctx, file, new_path, &len);
-	if (ret)
+
+	/* Temporarily set mount namespace to be that of the file to 'relink' */
+	old_nsproxy = current->nsproxy;
+	get_nsproxy(old_nsproxy);
+	ret = kern_setns(&mntns_operations, file->f_path.mnt->mnt_ns);
+	if (ret) {
+		/* We never switched so we never dropped the new ref to old_nsproxy*/
+		put_nsproxy(old_nsproxy);
 		return ret;
+	}
+
+	/* The switch dropped the old ref to old_nsproxy */
+
+	/* Relink it */
+	ret = checkpoint_fill_relink_fname(ctx, file, new_path, &len);
+	if (ret) {
+		ckpt_err(ctx, ret, "%(T)%(P)%(V)Failed to fill relink name.\n", file, file->f_op);
+		goto switch_oldns;
+	}
 	ret = checkpoint_make_relink_collection(ctx, new_path, len);
-	if (ret)
-		return ret;
+	if (ret) {
+		ckpt_err(ctx, ret, "%(T)%(P)%(V)Failed to make relink collection dir for \"%s\".\n", file, file->f_op, new_path);
+		goto switch_oldns;
+	}
 	ret = do_kern_linkat(&file->f_path, file->f_dentry,
 			     AT_FDCWD, new_path, 0);
 	if (ret)
 		ckpt_err(ctx, ret, "%(T)%(P)%(V)Failed to relink unlinked file.\n", file, file->f_op);
+
+	/* Restore old mount namespace */
+switch_oldns:
+	switch_task_namespaces(current, old_nsproxy);
 	return ret;
 }
 
diff --git a/include/linux/nsproxy.h b/include/linux/nsproxy.h
index 7b370c7..adcef23 100644
--- a/include/linux/nsproxy.h
+++ b/include/linux/nsproxy.h
@@ -69,6 +69,9 @@ void free_nsproxy(struct nsproxy *ns);
 int unshare_nsproxy_namespaces(unsigned long, struct nsproxy **,
 	struct fs_struct *);
 
+struct proc_ns_operations;
+int kern_setns(const struct proc_ns_operations *ops, void *ns);
+
 static inline void put_nsproxy(struct nsproxy *ns)
 {
 	if (atomic_dec_and_test(&ns->count)) {
diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c
index 62ee344..5c7c59a 100644
--- a/kernel/nsproxy.c
+++ b/kernel/nsproxy.c
@@ -454,11 +454,27 @@ static inline int checkpoint_register_nsproxy(void)
 }
 #endif /* CONFIG_CHECKPOINT */
 
+int kern_setns(const struct proc_ns_operations *ops, void *ns)
+{
+	struct nsproxy *new_nsproxy;
+	int err;
+
+	new_nsproxy = create_new_namespaces(0, current, current->fs);
+	if (IS_ERR(new_nsproxy))
+		return PTR_ERR(new_nsproxy);
+	err = ops->install(new_nsproxy, ns);
+	if (err) {
+		free_nsproxy(new_nsproxy);
+		goto out;
+	}
+	switch_task_namespaces(current, new_nsproxy);
+out:
+	return err;
+}
+
 SYSCALL_DEFINE2(setns, unsigned int, nstype, int, fd)
 {
 	const struct proc_ns_operations *ops;
-	struct task_struct *tsk = current;
-	struct nsproxy *new_nsproxy;
 	struct proc_inode *ei;
 	struct file *file;
 	int err;
@@ -478,18 +494,7 @@ SYSCALL_DEFINE2(setns, unsigned int, nstype, int, fd)
 	    memcmp(&nstype, ops->name.name, ops->name.len)))
 		goto out;
 
-	new_nsproxy = create_new_namespaces(0, tsk, tsk->fs);
-	if (IS_ERR(new_nsproxy)) {
-		err = PTR_ERR(new_nsproxy);
-		goto out;
-	}
-
-	err = ops->install(new_nsproxy, ei->ns);
-	if (err) {
-		free_nsproxy(new_nsproxy);
-		goto out;
-	}
-	switch_task_namespaces(tsk, new_nsproxy);
+	err = kern_setns(ops, ei->ns);
 out:
 	fput(file);
 	return err;
-- 
1.6.3.3



More information about the Containers mailing list