[RFC PATCH 2/2] userns/nproc: Add hooks for userns nproc management

Nikolay Borisov kernel at kyup.com
Tue Sep 8 08:11:13 UTC 2015


From: Nikolay Borisov <n.borisov at siteground.com>

This patch introduce the usage of the userns_nproc_* functions
where necessary to have correct accounting of the processes.

Signed-off-by: Nikolay Borisov <kernel at kyup.com>
---
 kernel/cred.c | 36 ++++++++++++++++++++++++++++++++++--
 kernel/exit.c |  9 +++++++++
 kernel/fork.c | 33 +++++++++++++++++++++++++++------
 3 files changed, 70 insertions(+), 8 deletions(-)

diff --git a/kernel/cred.c b/kernel/cred.c
index b7581dc..79565b8 100644
--- a/kernel/cred.c
+++ b/kernel/cred.c
@@ -320,6 +320,7 @@ struct cred *prepare_exec_creds(void)
 int copy_creds(struct task_struct *p, unsigned long clone_flags)
 {
 	struct cred *new;
+	struct user_namespace *ns;
 	int ret;
 
 	if (
@@ -331,10 +332,15 @@ int copy_creds(struct task_struct *p, unsigned long clone_flags)
 		p->real_cred = get_cred(p->cred);
 		get_cred(p->cred);
 		alter_cred_subscribers(p->cred, 2);
+		ns = p->real_cred->user_ns;
 		kdebug("share_creds(%p{%d,%d})",
 		       p->cred, atomic_read(&p->cred->usage),
 		       read_cred_subscribers(p->cred));
 		atomic_inc(&p->cred->user->processes);
+		if (ns != &init_user_ns) {
+			pr_info ("%s: incrementing nproc from due copy_process (CLONE_THREAD)\n", __func__);
+			userns_nproc_inc(ns, from_kuid_munged(ns, p->real_cred->uid));
+		}
 		return 0;
 	}
 
@@ -343,6 +349,7 @@ int copy_creds(struct task_struct *p, unsigned long clone_flags)
 		return -ENOMEM;
 
 	if (clone_flags & CLONE_NEWUSER) {
+		pr_debug("%s: Creating new usernamespace\n", __func__);
 		ret = create_user_ns(new);
 		if (ret < 0)
 			goto error_put;
@@ -369,6 +376,11 @@ int copy_creds(struct task_struct *p, unsigned long clone_flags)
 
 	atomic_inc(&new->user->processes);
 	p->cred = p->real_cred = get_cred(new);
+	ns = p->real_cred->user_ns;
+	if (ns != &init_user_ns) {
+		pr_info("%s: Incrementing due to not-being a thread\n",	__func__);
+		userns_nproc_inc(ns, from_kuid_munged(ns, p->real_cred->uid));
+	}
 	alter_cred_subscribers(new, 2);
 	validate_creds(new);
 	return 0;
@@ -454,17 +466,37 @@ int commit_creds(struct cred *new)
 	if (!gid_eq(new->fsgid, old->fsgid))
 		key_fsgid_changed(task);
 
+	/* Handle cases when a process is moving from one userns to another */
+	if (old->user_ns != new->user_ns) {
+		if (new->user_ns != &init_user_ns) {
+			pr_info ("\t%s: incrementing user count in %p\n", __func__, new->user_ns);
+			userns_nproc_inc(new->user_ns, from_kuid_munged(new->user_ns, new->uid));
+		}
+		if (old->user_ns != &init_user_ns) {
+			pr_info ("\t%s: decrementing user_count in %p\n", __func__, old->user_ns);
+			userns_nproc_dec(old->user_ns, from_kuid_munged(old->user_ns, old->uid));
+		}
+	}
+
 	/* do it
 	 * RLIMIT_NPROC limits on user->processes have already been checked
 	 * in set_user().
 	 */
 	alter_cred_subscribers(new, 2);
-	if (new->user != old->user)
+	if (new->user != old->user) {
 		atomic_inc(&new->user->processes);
+		if (new->user_ns != &init_user_ns)
+			userns_nproc_inc(new->user_ns,
+				 from_kuid_munged(new->user_ns, new->uid));
+	}
 	rcu_assign_pointer(task->real_cred, new);
 	rcu_assign_pointer(task->cred, new);
-	if (new->user != old->user)
+	if (new->user != old->user) {
 		atomic_dec(&old->user->processes);
+		if (old->user_ns != &init_user_ns)
+			userns_nproc_dec(old->user_ns,
+				 from_kuid_munged(old->user_ns, old->uid));
+	}
 	alter_cred_subscribers(old, -2);
 
 	/* send notifications */
diff --git a/kernel/exit.c b/kernel/exit.c
index 22fcc05..dde172b 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -170,13 +170,22 @@ void release_task(struct task_struct *p)
 {
 	struct task_struct *leader;
 	int zap_leader;
+	struct user_namespace *ns;
+	kuid_t uid;
 repeat:
 	/* don't need to get the RCU readlock here - the process is dead and
 	 * can't be modifying its own credentials. But shut RCU-lockdep up */
 	rcu_read_lock();
 	atomic_dec(&__task_cred(p)->user->processes);
+	ns = get_user_ns(__task_cred(p)->user_ns);
+	uid = __task_cred(p)->uid;
 	rcu_read_unlock();
 
+	if (ns != &init_user_ns)
+		userns_nproc_dec(ns, from_kuid_munged(ns, uid));
+
+	put_user_ns(ns);
+
 	proc_flush_task(p);
 
 	write_lock_irq(&tasklist_lock);
diff --git a/kernel/fork.c b/kernel/fork.c
index f9826a3..c537b6a 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1308,18 +1308,34 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 	DEBUG_LOCKS_WARN_ON(!p->softirqs_enabled);
 #endif
 	retval = -EAGAIN;
-	if (atomic_read(&p->real_cred->user->processes) >=
-			task_rlimit(p, RLIMIT_NPROC)) {
-		if (p->real_cred->user != INIT_USER &&
-		    !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN))
-			goto bad_fork_free;
-	}
+	//If we are in the root namespace use this check
+	if (p->real_cred->user_ns == &init_user_ns) {
+		if (atomic_read(&p->real_cred->user->processes) >=
+				task_rlimit(p, RLIMIT_NPROC)) {
+			if (p->real_cred->user != INIT_USER &&
+			    !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN))
+				goto bad_fork_free;
+		}
 	current->flags &= ~PF_NPROC_EXCEEDED;
+	}
 
 	retval = copy_creds(p, clone_flags);
 	if (retval < 0)
 		goto bad_fork_free;
 
+	//Otherwise perform the non-root userns check here
+	//since we want the stuff in copy_cred to have already happened
+	if (p->real_cred->user_ns != &init_user_ns) {
+		struct user_namespace *ns = p->real_cred->user_ns;
+		int32_t processes = get_userns_nproc(ns, from_kuid_munged(ns, p->real_cred->uid));
+		retval = -EAGAIN;
+
+		if (processes >= task_rlimit(p, RLIMIT_NPROC))
+			goto bad_fork_cleanup_userns_count;
+		else
+			current->flags &= ~PF_NPROC_EXCEEDED;
+	}
+
 	/*
 	 * If multiple threads are within copy_process(), then this check
 	 * triggers too late. This doesn't hurt, the check is only there
@@ -1652,6 +1668,9 @@ bad_fork_cleanup_threadgroup_lock:
 	delayacct_tsk_free(p);
 bad_fork_cleanup_count:
 	atomic_dec(&p->cred->user->processes);
+bad_fork_cleanup_userns_count:
+	if (p->cred->user_ns != &init_user_ns)
+		userns_nproc_dec(p->cred->user_ns, from_kuid_munged(p->cred->user_ns, p->cred->uid));
 	exit_creds(p);
 bad_fork_free:
 	free_task(p);
@@ -1936,6 +1955,7 @@ SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags)
 	int do_sysvsem = 0;
 	int err;
 
+	pr_info("%s begin\n", __func__);
 	/*
 	 * If unsharing a user namespace must also unshare the thread.
 	 */
@@ -2037,6 +2057,7 @@ bad_unshare_cleanup_fs:
 		free_fs_struct(new_fs);
 
 bad_unshare_out:
+	pr_info("%s end\n", __func__);
 	return err;
 }
 
-- 
2.5.0



More information about the Containers mailing list