[RFC][PATCH] userns: Limit process in a user namespace to what the creator is allowed

Eric W. Biederman ebiederm at xmission.com
Mon Oct 19 20:07:02 UTC 2020


Ordinary unix permissions and posix acls have the ability to
expression that processes show uid or gid match have fewer permissions
than processes without matches that use the other permissions.

The fact a root user in a user namespace can call setgroups and setuid
allows these restrictive permissions to be avoided.  To limit the problems
this can cause populationg the the set of uids that can be switched to,
and the set of gids that can be switched to is an operation that requires
priviliege outside of the user namespace.

This restriction is currently being reexamined as it appears that
there is a way to implement uids and gids that do not map outside of
the user namespace.  Such uids and gids would not require privilege
from outside of the usernamespace to use.  So it becomes important to
find a way to allow calling setuid and setgroups in a user namespace
without allowing processes in a user namespace to do more than the
creator of the user namespace.

To that end capture the groups set with setgroups of the creator of a
user_namespace.  Update the affected permission checks to notice when
something is being allowed with other permissions and only allow the
operation if the creator of the user namespace does not have a user or
group match that would disallow the operation.

The goal is to ensure that creating a user namespace and allowing
the user namespace root to setuid and setgroups does not result
in being permitted to do more than before the user namespace was
created, while still supporting explicitly specified users and
groups to have fewer permissions.

Signed-off-by: Eric W. Biederman <ebiederm at xmission.com>
---

So far only generic_permission is covered, but I think this demonstrates
that the goal is achievable.  Preserving negative acls while allowing
setuid and setgroups.

 fs/namei.c                     |  7 +++++
 fs/posix_acl.c                 | 51 ++++++++++++++++++++++++++++++++++
 include/linux/user_namespace.h | 16 +++++++++++
 kernel/user_namespace.c        | 29 +++++++++++++++++++
 4 files changed, 103 insertions(+)

diff --git a/fs/namei.c b/fs/namei.c
index e99e2a9da0f7..ca06bd81d4e4 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -322,6 +322,13 @@ static int acl_permission_check(struct inode *inode, int mask)
 	if (mask & (mode ^ (mode >> 3))) {
 		if (in_group_p(inode->i_gid))
 			mode >>= 3;
+		/*
+		 * Only allow the intersection of what the creator of
+		 * the user namespace is allowed and what everyone is
+		 * allowed.
+		 */
+		else if (userns_in_group_p(inode->i_sb->s_user_ns, inode->i_gid))
+			mode &= (mode >> 3);
 	}
 
 	/* Bits in 'mode' clear that we require? */
diff --git a/fs/posix_acl.c b/fs/posix_acl.c
index 95882b3f5f62..525803f8f70c 100644
--- a/fs/posix_acl.c
+++ b/fs/posix_acl.c
@@ -340,6 +340,53 @@ posix_acl_from_mode(umode_t mode, gfp_t flags)
 }
 EXPORT_SYMBOL(posix_acl_from_mode);
 
+static bool userns_creator_allowed(struct inode *inode,
+				   const struct posix_acl *acl, int want)
+{
+	/* Don't allow access the creator of the user namespace does not have */
+	struct user_namespace *ns = inode->i_sb->s_user_ns;
+	const struct posix_acl_entry *pa, *pe;
+	unsigned short min_perm;
+	bool found = false;
+
+	min_perm = MAY_READ | MAY_WRITE | MAY_EXEC;
+	FOREACH_ACL_ENTRY(pa, acl, pe) {
+                switch(pa->e_tag) {
+                        case ACL_USER_OBJ:
+				/* No need to limit the owner of a file */
+                                break;
+                        case ACL_USER:
+				if (is_userns_creator(ns, pa->e_uid)) {
+					found = true;
+					min_perm &= pa->e_perm;
+				}
+				break;
+                        case ACL_GROUP_OBJ:
+				if (userns_in_group_p(ns, inode->i_gid)) {
+					found = true;
+					min_perm &= pa->e_perm;
+				}
+				break;
+                        case ACL_GROUP:
+				if (userns_in_group_p(ns, pa->e_gid)) {
+					found = true;
+					min_perm &= pa->e_perm;
+				}
+                                break;
+                        case ACL_MASK:
+				if (found)
+					min_perm &= pa->e_perm;
+                                break;
+                        case ACL_OTHER:
+				if (found &&
+				    ((pa->e_perm & want & min_perm) != want))
+					return false;
+				return true;
+                }
+        }
+	return false;
+}
+
 /*
  * Return 0 if current is granted want access to the inode
  * by the acl. Returns -E... otherwise.
@@ -382,6 +429,10 @@ posix_acl_permission(struct inode *inode, const struct posix_acl *acl, int want)
                         case ACL_OTHER:
 				if (found)
 					return -EACCES;
+				else if ((current_user_ns() != inode->i_sb->s_user_ns) &&
+					 ((pa->e_perm & want) == want) &&
+					 userns_creator_allowed(inode, acl, want))
+					return 0;
 				else
 					goto check_perm;
 			default:
diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h
index 6ef1c7109fc4..b4bcb49bed7a 100644
--- a/include/linux/user_namespace.h
+++ b/include/linux/user_namespace.h
@@ -62,6 +62,7 @@ struct user_namespace {
 	int			level;
 	kuid_t			owner;
 	kgid_t			group;
+	struct group_info 	*groups;
 	struct ns_common	ns;
 	unsigned long		flags;
 
@@ -137,6 +138,10 @@ extern bool in_userns(const struct user_namespace *ancestor,
 		       const struct user_namespace *child);
 extern bool current_in_userns(const struct user_namespace *target_ns);
 struct ns_common *ns_get_owner(struct ns_common *ns);
+
+bool is_userns_creator(struct user_namespace *ns, kuid_t uid);
+bool userns_in_group_p(struct user_namespace *ns, kgid_t group);
+
 #else
 
 static inline struct user_namespace *get_user_ns(struct user_namespace *ns)
@@ -181,6 +186,17 @@ static inline struct ns_common *ns_get_owner(struct ns_common *ns)
 {
 	return ERR_PTR(-EPERM);
 }
+
+static inline bool is_userns_creator(struct user_namespace *ns, kuid_t uid)
+{
+	return false;
+}
+
+static inline bool userns_in_group_p(struct user_namespace *ns, kgid_t group)
+{
+	return false;
+}
+
 #endif
 
 #endif /* _LINUX_USER_H */
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
index 87804e0371fe..8a4949a32e36 100644
--- a/kernel/user_namespace.c
+++ b/kernel/user_namespace.c
@@ -117,6 +117,7 @@ int create_user_ns(struct cred *new)
 	ns->level = parent_ns->level + 1;
 	ns->owner = owner;
 	ns->group = group;
+	ns->groups = get_group_info(new->group_info);
 	INIT_WORK(&ns->work, free_user_ns);
 	for (i = 0; i < UCOUNT_COUNTS; i++) {
 		ns->ucount_max[i] = INT_MAX;
@@ -143,6 +144,7 @@ int create_user_ns(struct cred *new)
 	key_put(ns->persistent_keyring_register);
 #endif
 	ns_free_inum(&ns->ns);
+	put_group_info(ns->groups);
 fail_free:
 	kmem_cache_free(user_ns_cachep, ns);
 fail_dec:
@@ -194,6 +196,7 @@ static void free_user_ns(struct work_struct *work)
 		retire_userns_sysctls(ns);
 		key_free_user_ns(ns);
 		ns_free_inum(&ns->ns);
+		put_group_info(ns->groups);
 		kmem_cache_free(user_ns_cachep, ns);
 		dec_user_namespaces(ucounts);
 		ns = parent;
@@ -1317,6 +1320,32 @@ const struct proc_ns_operations userns_operations = {
 	.get_parent	= ns_get_owner,
 };
 
+bool is_userns_creator(struct user_namespace *target_ns, kuid_t uid)
+{
+	struct user_namespace *user_ns = current_user_ns();
+
+	for (user_ns = current_user_ns();
+	     (user_ns != target_ns) && user_ns;
+	     user_ns = user_ns->parent) {
+		if (uid_eq(uid, user_ns->owner))
+			return true;
+	}
+	return false;
+}
+
+bool userns_in_group_p(struct user_namespace *target_ns, kgid_t group)
+{
+	struct user_namespace *user_ns;
+
+	for (user_ns = current_user_ns();
+	     (user_ns != target_ns) && user_ns;
+	     user_ns = user_ns->parent) {
+		if (groups_search(user_ns->groups, group))
+			return true;
+	}
+	return false;
+}
+
 static __init int user_namespaces_init(void)
 {
 	user_ns_cachep = KMEM_CACHE(user_namespace, SLAB_PANIC);
-- 
2.20.1




More information about the Containers mailing list