[RFC][PATCH 1/2] add user namespace [try #2]

Cedric Le Goater clg at fr.ibm.com
Mon Aug 28 07:56:44 PDT 2006


This patch adds the user namespace.

Basically, it allows a process to unshare its user_struct table,
resetting at the same time its own user_struct and all the associated
accounting.

A new root user (uid == 0) is added to the user namespace upon
creation.  Such root users have full privileges and it seems that
theses privileges should be controlled through some means (process
capabilities ?)

Changes [try #2]

	- removed struct user_namespace* argument from find_user()
	- added a root_user per user namespace

Signed-off-by: Cedric Le Goater <clg at fr.ibm.com>
Cc: Andrew Morton <akpm at osdl.org>
Cc: Kirill Korotaev <dev at openvz.org>
Cc: Eric W. Biederman <ebiederm at xmission.com>
Cc: Herbert Poetzl <herbert at 13thfloor.at>
Cc: Serge E. Hallyn <serue at us.ibm.com>
Cc: Dave Hansen <haveblue at us.ibm.com>

---
 include/linux/init_task.h |    2
 include/linux/nsproxy.h   |    2
 include/linux/sched.h     |    4 +
 include/linux/user.h      |   46 +++++++++++++++
 init/Kconfig              |    8 ++
 kernel/fork.c             |    2
 kernel/nsproxy.c          |   15 ++++-
 kernel/sys.c              |    5 +
 kernel/user.c             |  133
++++++++++++++++++++++++++++++++++++++++++----
 9 files changed, 203 insertions(+), 14 deletions(-)

Index: 2.6.18-rc4-mm3/kernel/user.c
===================================================================
--- 2.6.18-rc4-mm3.orig/kernel/user.c
+++ 2.6.18-rc4-mm3/kernel/user.c
@@ -14,20 +14,29 @@
 #include <linux/bitops.h>
 #include <linux/key.h>
 #include <linux/interrupt.h>
+#include <linux/user.h>
+#include <linux/module.h>
+#include <linux/nsproxy.h>

 /*
  * UID task count cache, to get fast user lookup in "alloc_uid"
  * when changing user ID's (ie setuid() and friends).
  */

-#define UIDHASH_BITS (CONFIG_BASE_SMALL ? 3 : 8)
-#define UIDHASH_SZ		(1 << UIDHASH_BITS)
 #define UIDHASH_MASK		(UIDHASH_SZ - 1)
 #define __uidhashfn(uid)	(((uid >> UIDHASH_BITS) + uid) & UIDHASH_MASK)
-#define uidhashentry(uid)	(uidhash_table + __uidhashfn((uid)))
+#define uidhashentry(ns, uid)	((ns)->uidhash_table + __uidhashfn((uid)))

 static kmem_cache_t *uid_cachep;
-static struct list_head uidhash_table[UIDHASH_SZ];
+
+struct user_namespace init_user_ns = {
+	.kref = {
+		.refcount	= ATOMIC_INIT(2),
+	},
+	.root_user = &root_user,
+};
+
+EXPORT_SYMBOL_GPL(init_user_ns);

 /*
  * The uidhash_lock is mostly taken from process context, but it is
@@ -84,6 +93,111 @@ static inline struct user_struct *uid_ha
 	return NULL;
 }

+
+#ifdef CONFIG_USER_NS
+
+/*
+ * Clone a new ns copying an original user ns, setting refcount to 1
+ * @old_ns: namespace to clone
+ * Return NULL on error (failure to kmalloc), new ns otherwise
+ */
+static struct user_namespace *clone_user_ns(struct user_namespace *old_ns)
+{
+	struct user_namespace *ns;
+
+	ns = kmalloc(sizeof(struct user_namespace), GFP_KERNEL);
+	if (ns) {
+		int n;
+		struct user_struct *new_user;
+
+		kref_init(&ns->kref);
+
+		for(n = 0; n < UIDHASH_SZ; ++n)
+			INIT_LIST_HEAD(ns->uidhash_table + n);
+
+		/* Insert new root user.  */
+		ns->root_user = alloc_uid(ns, 0);
+		if (!ns->root_user) {
+			kfree(ns);
+			return NULL;
+		}
+
+		/* Reset current->user with a new one */
+		new_user = alloc_uid(ns, current->uid);
+		if (!new_user) {
+			kfree(ns);
+			return NULL;
+		}
+
+		switch_uid(new_user);
+	}
+	return ns;
+}
+
+/*
+ * unshare the current process' user namespace.
+ */
+int unshare_user_ns(unsigned long unshare_flags,
+		    struct user_namespace **new_user)
+{
+	if (unshare_flags & CLONE_NEWUSER) {
+		if (!capable(CAP_SYS_ADMIN))
+			return -EPERM;
+
+		*new_user = clone_user_ns(current->nsproxy->user_ns);
+		if (!*new_user)
+			return -ENOMEM;
+	}
+
+	return 0;
+}
+
+/*
+ * Copy task tsk's user namespace, or clone it if flags specifies
+ * CLONE_NEWUSER. In latter case, changes to the user namespace of
+ * this process won't be seen by parent, and vice versa.
+ */
+int copy_user_ns(int flags, struct task_struct *tsk)
+{
+	struct user_namespace *old_ns = tsk->nsproxy->user_ns;
+	struct user_namespace *new_ns;
+	int err = 0;
+
+	if (!old_ns)
+		return 0;
+
+	get_user_ns(old_ns);
+
+	if (!(flags & CLONE_NEWUSER))
+		return 0;
+
+	if (!capable(CAP_SYS_ADMIN)) {
+		err = -EPERM;
+		goto out;
+	}
+
+	new_ns = clone_user_ns(old_ns);
+	if (!new_ns) {
+		err = -ENOMEM;
+		goto out;
+	}
+	tsk->nsproxy->user_ns = new_ns;
+
+out:
+	put_user_ns(old_ns);
+	return err;
+}
+
+void free_user_ns(struct kref *kref)
+{
+	struct user_namespace *ns;
+
+	ns = container_of(kref, struct user_namespace, kref);
+	kfree(ns);
+}
+
+#endif /* CONFIG_USER_NS */
+
 /*
  * Locate the user_struct for the passed UID.  If found, take a ref on it.
 The
  * caller must undo that ref with free_uid().
@@ -94,9 +208,10 @@ struct user_struct *find_user(uid_t uid)
 {
 	struct user_struct *ret;
 	unsigned long flags;
+	struct user_namespace *ns = current->nsproxy->user_ns;

 	spin_lock_irqsave(&uidhash_lock, flags);
-	ret = uid_hash_find(uid, uidhashentry(uid));
+	ret = uid_hash_find(uid, uidhashentry(ns, uid));
 	spin_unlock_irqrestore(&uidhash_lock, flags);
 	return ret;
 }
@@ -120,9 +235,9 @@ void free_uid(struct user_struct *up)
 	}
 }

-struct user_struct * alloc_uid(uid_t uid)
+struct user_struct * alloc_uid(struct user_namespace *ns, uid_t uid)
 {
-	struct list_head *hashent = uidhashentry(uid);
+	struct list_head *hashent = uidhashentry(ns, uid);
 	struct user_struct *up;

 	spin_lock_irq(&uidhash_lock);
@@ -200,11 +315,11 @@ static int __init uid_cache_init(void)
 			0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);

 	for(n = 0; n < UIDHASH_SZ; ++n)
-		INIT_LIST_HEAD(uidhash_table + n);
+		INIT_LIST_HEAD(init_user_ns.uidhash_table + n);

 	/* Insert the root user immediately (init already runs as root) */
 	spin_lock_irq(&uidhash_lock);
-	uid_hash_insert(&root_user, uidhashentry(0));
+	uid_hash_insert(&root_user, uidhashentry(&init_user_ns, 0));
 	spin_unlock_irq(&uidhash_lock);

 	return 0;
Index: 2.6.18-rc4-mm3/include/linux/nsproxy.h
===================================================================
--- 2.6.18-rc4-mm3.orig/include/linux/nsproxy.h
+++ 2.6.18-rc4-mm3/include/linux/nsproxy.h
@@ -7,6 +7,7 @@
 struct namespace;
 struct uts_namespace;
 struct ipc_namespace;
+struct user_namespace;

 /*
  * A structure to contain pointers to all per-process
@@ -25,6 +26,7 @@ struct nsproxy {
 	spinlock_t nslock;
 	struct uts_namespace *uts_ns;
 	struct ipc_namespace *ipc_ns;
+	struct user_namespace *user_ns;
 	struct namespace *namespace;
 };
 extern struct nsproxy init_nsproxy;
Index: 2.6.18-rc4-mm3/include/linux/user.h
===================================================================
--- 2.6.18-rc4-mm3.orig/include/linux/user.h
+++ 2.6.18-rc4-mm3/include/linux/user.h
@@ -1 +1,47 @@
+#ifndef _LINUX_USER_H
+#define _LINUX_USER_H
+
 #include <asm/user.h>
+
+#define UIDHASH_BITS (CONFIG_BASE_SMALL ? 3 : 8)
+#define UIDHASH_SZ		(1 << UIDHASH_BITS)
+
+struct user_namespace {
+	struct kref		kref;
+	struct list_head	uidhash_table[UIDHASH_SZ];
+	struct user_struct	*root_user;
+};
+
+extern struct user_namespace init_user_ns;
+
+static inline void get_user_ns(struct user_namespace *ns)
+{
+	kref_get(&ns->kref);
+}
+
+#ifdef CONFIG_USER_NS
+extern int unshare_user_ns(unsigned long unshare_flags,
+			   struct user_namespace **new_user);
+extern int copy_user_ns(int flags, struct task_struct *tsk);
+extern void free_user_ns(struct kref *kref);
+
+static inline void put_user_ns(struct user_namespace *ns)
+{
+	kref_put(&ns->kref, free_user_ns);
+}
+#else
+static inline int unshare_user_ns(unsigned long unshare_flags,
+			struct user_namespace **new_user)
+{
+	return -EINVAL;
+}
+static inline int copy_user_ns(int flags, struct task_struct *tsk)
+{
+	return 0;
+}
+static inline void put_user_ns(struct user_namespace *ns)
+{
+}
+#endif /* CONFIG_USER_NS */
+
+#endif /* _LINUX_USER_H */
Index: 2.6.18-rc4-mm3/kernel/nsproxy.c
===================================================================
--- 2.6.18-rc4-mm3.orig/kernel/nsproxy.c
+++ 2.6.18-rc4-mm3/kernel/nsproxy.c
@@ -19,6 +19,7 @@
 #include <linux/init_task.h>
 #include <linux/namespace.h>
 #include <linux/utsname.h>
+#include <linux/user.h>

 struct nsproxy init_nsproxy = INIT_NSPROXY(init_nsproxy);

@@ -68,6 +69,8 @@ struct nsproxy *dup_namespaces(struct ns
 			get_uts_ns(ns->uts_ns);
 		if (ns->ipc_ns)
 			get_ipc_ns(ns->ipc_ns);
+		if (ns->user_ns)
+			get_user_ns(ns->user_ns);
 	}

 	return ns;
@@ -88,7 +91,8 @@ int copy_namespaces(int flags, struct ta

 	get_nsproxy(old_ns);

-	if (!(flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC)))
+	if (!(flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC |
+			CLONE_NEWUSER)))
 		return 0;

 	new_ns = clone_namespaces(old_ns);
@@ -111,10 +115,17 @@ int copy_namespaces(int flags, struct ta
 	if (err)
 		goto out_ipc;

+	err = copy_user_ns(flags, tsk);
+	if (err)
+		goto out_user;
+
 out:
 	put_nsproxy(old_ns);
 	return err;

+out_user:
+	if (new_ns->ipc_ns)
+		put_ipc_ns(new_ns->ipc_ns);
 out_ipc:
 	if (new_ns->uts_ns)
 		put_uts_ns(new_ns->uts_ns);
@@ -135,5 +146,7 @@ void free_nsproxy(struct nsproxy *ns)
 			put_uts_ns(ns->uts_ns);
 		if (ns->ipc_ns)
 			put_ipc_ns(ns->ipc_ns);
+		if (ns->user_ns)
+			put_user_ns(ns->user_ns);
 		kfree(ns);
 }
Index: 2.6.18-rc4-mm3/include/linux/sched.h
===================================================================
--- 2.6.18-rc4-mm3.orig/include/linux/sched.h
+++ 2.6.18-rc4-mm3/include/linux/sched.h
@@ -26,6 +26,7 @@
 #define CLONE_STOPPED		0x02000000	/* Start in stopped state */
 #define CLONE_NEWUTS		0x04000000	/* New utsname group? */
 #define CLONE_NEWIPC		0x08000000	/* New ipcs */
+#define CLONE_NEWUSER		0x10000000	/* New user */

 /*
  * Scheduling policies
@@ -242,6 +243,7 @@ extern signed long schedule_timeout_unin
 asmlinkage void schedule(void);

 struct nsproxy;
+struct user_namespace;

 /* Maximum number of active map areas.. This is a random (large) number */
 #define DEFAULT_MAX_MAP_COUNT	65536
@@ -1249,7 +1251,7 @@ extern void set_special_pids(pid_t sessi
 extern void __set_special_pids(pid_t session, pid_t pgrp);

 /* per-UID process charging. */
-extern struct user_struct * alloc_uid(uid_t);
+extern struct user_struct * alloc_uid(struct user_namespace *, uid_t);
 static inline struct user_struct *get_uid(struct user_struct *u)
 {
 	atomic_inc(&u->__count);
Index: 2.6.18-rc4-mm3/init/Kconfig
===================================================================
--- 2.6.18-rc4-mm3.orig/init/Kconfig
+++ 2.6.18-rc4-mm3/init/Kconfig
@@ -250,6 +250,14 @@ config UTS_NS
 	  vservers, to use uts namespaces to provide different
 	  uts info for different servers.  If unsure, say N.

+config USER_NS
+	bool "User Namespaces"
+	default n
+	help
+	  Support user namespaces.  This allows containers, i.e.
+	  vservers, to use user namespaces to provide different
+	  user info for different servers.  If unsure, say N.
+
 config AUDIT
 	bool "Auditing support"
 	depends on NET
Index: 2.6.18-rc4-mm3/include/linux/init_task.h
===================================================================
--- 2.6.18-rc4-mm3.orig/include/linux/init_task.h
+++ 2.6.18-rc4-mm3/include/linux/init_task.h
@@ -7,6 +7,7 @@
 #include <linux/utsname.h>
 #include <linux/lockdep.h>
 #include <linux/ipc.h>
+#include <linux/user.h>

 #define INIT_FDTABLE \
 {							\
@@ -77,6 +78,7 @@ extern struct nsproxy init_nsproxy;
 	.uts_ns		= &init_uts_ns,					\
 	.namespace	= NULL,						\
 	INIT_IPC_NS(ipc_ns)						\
+	.user_ns	= &init_user_ns,				\
 }

 #define INIT_SIGHAND(sighand) {						\
Index: 2.6.18-rc4-mm3/kernel/sys.c
===================================================================
--- 2.6.18-rc4-mm3.orig/kernel/sys.c
+++ 2.6.18-rc4-mm3/kernel/sys.c
@@ -33,6 +33,7 @@
 #include <linux/compat.h>
 #include <linux/syscalls.h>
 #include <linux/kprobes.h>
+#include <linux/user.h>

 #include <asm/uaccess.h>
 #include <asm/io.h>
@@ -1010,13 +1011,13 @@ static int set_user(uid_t new_ruid, int
 {
 	struct user_struct *new_user;

-	new_user = alloc_uid(new_ruid);
+	new_user = alloc_uid(current->nsproxy->user_ns, new_ruid);
 	if (!new_user)
 		return -EAGAIN;

 	if (atomic_read(&new_user->processes) >=
 				current->signal->rlim[RLIMIT_NPROC].rlim_cur &&
-			new_user != &root_user) {
+			new_user != current->nsproxy->user_ns->root_user) {
 		free_uid(new_user);
 		return -EAGAIN;
 	}
Index: 2.6.18-rc4-mm3/kernel/fork.c
===================================================================
--- 2.6.18-rc4-mm3.orig/kernel/fork.c
+++ 2.6.18-rc4-mm3/kernel/fork.c
@@ -991,7 +991,7 @@ static struct task_struct *copy_process(
 	if (atomic_read(&p->user->processes) >=
 			p->signal->rlim[RLIMIT_NPROC].rlim_cur) {
 		if (!capable(CAP_SYS_ADMIN) && !capable(CAP_SYS_RESOURCE) &&
-				p->user != &root_user)
+		    p->user != current->nsproxy->user_ns->root_user)
 			goto bad_fork_free;
 	}




More information about the Containers mailing list