[PATCH v3 2/3] fs: introduce uid/gid shifting bind mount
Christoph Hellwig
hch at infradead.org
Tue Feb 18 22:33:13 UTC 2020
On Mon, Feb 17, 2020 at 12:53:06PM -0800, James Bottomley wrote:
>
> v3: added a bind mount base shift at the request of Serge Hallyn
> ---
> fs/attr.c | 127 +++++++++++++++++++++++++++++++++++++++++---------
> fs/exec.c | 3 +-
> fs/inode.c | 10 ++--
> fs/internal.h | 2 +
> fs/mount.h | 1 +
> fs/namei.c | 112 +++++++++++++++++++++++++++++++++++++-------
> fs/namespace.c | 5 ++
> fs/open.c | 25 +++++++++-
> fs/posix_acl.c | 4 +-
> fs/stat.c | 32 +++++++++++--
> include/linux/cred.h | 12 +++++
> include/linux/mount.h | 4 +-
> include/linux/sched.h | 5 ++
> kernel/capability.c | 9 +++-
> kernel/cred.c | 20 ++++++++
> 15 files changed, 317 insertions(+), 54 deletions(-)
>
> diff --git a/fs/attr.c b/fs/attr.c
> index 11201ab7e3b1..d7c5883a4b4c 100644
> --- a/fs/attr.c
> +++ b/fs/attr.c
> @@ -18,14 +18,26 @@
> #include <linux/evm.h>
> #include <linux/ima.h>
>
> +#include "internal.h"
> +#include "mount.h"
> +
> static bool chown_ok(const struct inode *inode, kuid_t uid)
> {
> + kuid_t i_uid = inode->i_uid;
> +
> + if (cred_is_shifted()) {
> + struct mount *m = real_mount(current->mnt);
> +
> + i_uid = KUIDT_INIT(from_kuid(m->mnt_userns, i_uid));
> + i_uid = make_kuid(current_user_ns(), __kuid_val(i_uid));
> + }
> +
> if (uid_eq(current_fsuid(), inode->i_uid) &&
> - uid_eq(uid, inode->i_uid))
> + uid_eq(uid, i_uid))
> return true;
> if (capable_wrt_inode_uidgid(inode, CAP_CHOWN))
> return true;
> - if (uid_eq(inode->i_uid, INVALID_UID) &&
> + if (uid_eq(i_uid, INVALID_UID) &&
> ns_capable(inode->i_sb->s_user_ns, CAP_CHOWN))
> return true;
> return false;
> @@ -33,17 +45,40 @@ static bool chown_ok(const struct inode *inode, kuid_t uid)
>
> static bool chgrp_ok(const struct inode *inode, kgid_t gid)
> {
> + kgid_t i_gid = inode->i_gid;
> + kuid_t i_uid = inode->i_uid;
> +
> + if (cred_is_shifted()) {
> + struct mount *m = real_mount(current->mnt);
> + struct user_namespace *ns = current_user_ns();
> +
> + i_uid = KUIDT_INIT(from_kuid(m->mnt_userns, i_uid));
> + i_uid = make_kuid(ns, __kuid_val(i_uid));
> + i_gid = KGIDT_INIT(from_kgid(m->mnt_userns, i_gid));
> + i_gid = make_kgid(ns, __kgid_val(i_gid));
> + }
> if (uid_eq(current_fsuid(), inode->i_uid) &&
> - (in_group_p(gid) || gid_eq(gid, inode->i_gid)))
> + (in_group_p(gid) || gid_eq(gid, i_gid)))
> return true;
> if (capable_wrt_inode_uidgid(inode, CAP_CHOWN))
> return true;
> - if (gid_eq(inode->i_gid, INVALID_GID) &&
> + if (gid_eq(i_gid, INVALID_GID) &&
> ns_capable(inode->i_sb->s_user_ns, CAP_CHOWN))
> return true;
> return false;
> }
>
> +int in_group_p_shifted(kgid_t grp)
> +{
> + if (cred_is_shifted()) {
> + struct mount *m = real_mount(current->mnt);
> +
> + grp = KGIDT_INIT(from_kgid(m->mnt_userns, grp));
> + grp = make_kgid(current_user_ns(), __kgid_val(grp));
> + }
> + return in_group_p(grp);
> +}
> +
> /**
> * setattr_prepare - check if attribute changes to a dentry are allowed
> * @dentry: dentry to check
> @@ -89,9 +124,10 @@ int setattr_prepare(struct dentry *dentry, struct iattr *attr)
> if (ia_valid & ATTR_MODE) {
> if (!inode_owner_or_capable(inode))
> return -EPERM;
> +
> /* Also check the setgid bit! */
> - if (!in_group_p((ia_valid & ATTR_GID) ? attr->ia_gid :
> - inode->i_gid) &&
> + if (!in_group_p_shifted((ia_valid & ATTR_GID) ? attr->ia_gid :
> + inode->i_gid) &&
> !capable_wrt_inode_uidgid(inode, CAP_FSETID))
> attr->ia_mode &= ~S_ISGID;
> }
> @@ -192,7 +228,7 @@ void setattr_copy(struct inode *inode, const struct iattr *attr)
> if (ia_valid & ATTR_MODE) {
> umode_t mode = attr->ia_mode;
>
> - if (!in_group_p(inode->i_gid) &&
> + if (!in_group_p_shifted(inode->i_gid) &&
> !capable_wrt_inode_uidgid(inode, CAP_FSETID))
> mode &= ~S_ISGID;
> inode->i_mode = mode;
> @@ -200,6 +236,23 @@ void setattr_copy(struct inode *inode, const struct iattr *attr)
> }
> EXPORT_SYMBOL(setattr_copy);
>
> +void cred_shift(kuid_t *uid, kgid_t *gid)
> +{
> + if (cred_is_shifted()) {
> + struct user_namespace *ns = current_user_ns();
> + struct mount *m = real_mount(current->mnt);
> +
> + if (uid) {
> + *uid = KUIDT_INIT(from_kuid(m->mnt_userns, *uid));
> + *uid = make_kuid(ns, __kuid_val(*uid));
> + }
> + if (gid) {
> + *gid = KGIDT_INIT(from_kgid(m->mnt_userns, *gid));
> + *gid = make_kgid(ns, __kgid_val(*gid));
> + }
> + }
> +}
> +
> /**
> * notify_change - modify attributes of a filesytem object
> * @dentry: object affected
> @@ -229,6 +282,9 @@ int notify_change(const struct path *path, struct iattr * attr,
> int error;
> struct timespec64 now;
> unsigned int ia_valid = attr->ia_valid;
> + const struct cred *cred;
> + kuid_t i_uid = inode->i_uid;
> + kgid_t i_gid = inode->i_gid;
>
> WARN_ON_ONCE(!inode_is_locked(inode));
>
> @@ -237,18 +293,30 @@ int notify_change(const struct path *path, struct iattr * attr,
> return -EPERM;
> }
>
> + cred = change_userns_creds(path);
> + if (cred) {
> + struct mount *m = real_mount(path->mnt);
> +
> + attr->ia_uid = KUIDT_INIT(from_kuid(m->mnt_ns->user_ns, attr->ia_uid));
> + attr->ia_uid = make_kuid(m->mnt_userns, __kuid_val(attr->ia_uid));
> + attr->ia_gid = KGIDT_INIT(from_kgid(m->mnt_ns->user_ns, attr->ia_gid));
> + attr->ia_gid = make_kgid(m->mnt_userns, __kgid_val(attr->ia_gid));
> + }
> +
> /*
> * If utimes(2) and friends are called with times == NULL (or both
> * times are UTIME_NOW), then we need to check for write permission
> */
> if (ia_valid & ATTR_TOUCH) {
> - if (IS_IMMUTABLE(inode))
> - return -EPERM;
> + if (IS_IMMUTABLE(inode)) {
> + error = -EPERM;
> + goto err;
> + }
>
> if (!inode_owner_or_capable(inode)) {
> error = inode_permission(inode, MAY_WRITE);
> if (error)
> - return error;
> + goto err;
> }
> }
>
> @@ -274,7 +342,7 @@ int notify_change(const struct path *path, struct iattr * attr,
> if (ia_valid & ATTR_KILL_PRIV) {
> error = security_inode_need_killpriv(dentry);
> if (error < 0)
> - return error;
> + goto err;
> if (error == 0)
> ia_valid = attr->ia_valid &= ~ATTR_KILL_PRIV;
> }
> @@ -305,34 +373,49 @@ int notify_change(const struct path *path, struct iattr * attr,
> attr->ia_mode &= ~S_ISGID;
> }
> }
> - if (!(attr->ia_valid & ~(ATTR_KILL_SUID | ATTR_KILL_SGID)))
> - return 0;
> + if (!(attr->ia_valid & ~(ATTR_KILL_SUID | ATTR_KILL_SGID))) {
> + error = 0;
> + goto err;
> + }
>
> /*
> * Verify that uid/gid changes are valid in the target
> * namespace of the superblock.
> */
> + error = -EOVERFLOW;
> if (ia_valid & ATTR_UID &&
> !kuid_has_mapping(inode->i_sb->s_user_ns, attr->ia_uid))
> - return -EOVERFLOW;
> + goto err;
> +
> if (ia_valid & ATTR_GID &&
> !kgid_has_mapping(inode->i_sb->s_user_ns, attr->ia_gid))
> - return -EOVERFLOW;
> + goto err;
>
> /* Don't allow modifications of files with invalid uids or
> * gids unless those uids & gids are being made valid.
> */
> - if (!(ia_valid & ATTR_UID) && !uid_valid(inode->i_uid))
> - return -EOVERFLOW;
> - if (!(ia_valid & ATTR_GID) && !gid_valid(inode->i_gid))
> - return -EOVERFLOW;
> + if (cred_is_shifted()) {
> + struct user_namespace *ns = current_user_ns();
> + struct mount *m = real_mount(current->mnt);
> +
> + i_uid = KUIDT_INIT(from_kuid(m->mnt_userns, i_uid));
> + i_uid = make_kuid(ns, __kuid_val(i_uid));
> + i_gid = KGIDT_INIT(from_kgid(m->mnt_userns, i_gid));
> + i_gid = make_kgid(ns, __kgid_val(i_gid));
> + }
> +
> + if (!(ia_valid & ATTR_UID) && !uid_valid(i_uid))
> + goto err;
> +
> + if (!(ia_valid & ATTR_GID) && !gid_valid(i_gid))
> + goto err;
>
> error = security_inode_setattr(dentry, attr);
> if (error)
> - return error;
> + goto err;
> error = try_break_deleg(inode, delegated_inode);
> if (error)
> - return error;
> + goto err;
>
> if (inode->i_op->setattr)
> error = inode->i_op->setattr(dentry, attr);
> @@ -345,6 +428,8 @@ int notify_change(const struct path *path, struct iattr * attr,
> evm_inode_post_setattr(dentry, ia_valid);
> }
>
> + err:
> + revert_userns_creds(cred);
> return error;
> }
> EXPORT_SYMBOL(notify_change);
> diff --git a/fs/exec.c b/fs/exec.c
> index db17be51b112..926bab39ed45 100644
> --- a/fs/exec.c
> +++ b/fs/exec.c
> @@ -1543,13 +1543,14 @@ static void bprm_fill_uid(struct linux_binprm *bprm)
>
> /* Be careful if suid/sgid is set */
> inode_lock(inode);
> -
> /* reload atomically mode/uid/gid now that lock held */
> mode = inode->i_mode;
> uid = inode->i_uid;
> gid = inode->i_gid;
> inode_unlock(inode);
>
> + cred_shift(&uid, &gid);
> +
> /* We ignore suid/sgid if there are no mappings for them in the ns */
> if (!kuid_has_mapping(bprm->cred->user_ns, uid) ||
> !kgid_has_mapping(bprm->cred->user_ns, gid))
> diff --git a/fs/inode.c b/fs/inode.c
> index be14d3fcbee1..ae75b6396786 100644
> --- a/fs/inode.c
> +++ b/fs/inode.c
> @@ -2064,7 +2064,7 @@ void inode_init_owner(struct inode *inode, const struct inode *dir,
> if (S_ISDIR(mode))
> mode |= S_ISGID;
> else if ((mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP) &&
> - !in_group_p(inode->i_gid) &&
> + !in_group_p_shifted(inode->i_gid) &&
> !capable_wrt_inode_uidgid(dir, CAP_FSETID))
> mode &= ~S_ISGID;
> } else
> @@ -2083,12 +2083,16 @@ EXPORT_SYMBOL(inode_init_owner);
> bool inode_owner_or_capable(const struct inode *inode)
> {
> struct user_namespace *ns;
> + kuid_t uid = inode->i_uid;
>
> - if (uid_eq(current_fsuid(), inode->i_uid))
> + if (uid_eq(current_fsuid(), uid))
> return true;
>
> ns = current_user_ns();
> - if (kuid_has_mapping(ns, inode->i_uid) && ns_capable(ns, CAP_FOWNER))
> +
> + cred_shift(&uid, NULL);
> +
> + if (kuid_has_mapping(ns, uid) && ns_capable(ns, CAP_FOWNER))
> return true;
> return false;
> }
> diff --git a/fs/internal.h b/fs/internal.h
> index 80d89ddb9b28..d2adcdb3eb2e 100644
> --- a/fs/internal.h
> +++ b/fs/internal.h
> @@ -73,6 +73,8 @@ long do_symlinkat(const char __user *oldname, int newdfd,
> const char __user *newname);
> int do_linkat(int olddfd, const char __user *oldname, int newdfd,
> const char __user *newname, int flags);
> +const struct cred *change_userns_creds(const struct path *p);
> +void revert_userns_creds(const struct cred *cred);
>
> /*
> * namespace.c
> diff --git a/fs/mount.h b/fs/mount.h
> index 711a4093e475..c3bfc6ced4c7 100644
> --- a/fs/mount.h
> +++ b/fs/mount.h
> @@ -72,6 +72,7 @@ struct mount {
> int mnt_expiry_mark; /* true if marked for expiry */
> struct hlist_head mnt_pins;
> struct hlist_head mnt_stuck_children;
> + struct user_namespace *mnt_userns; /* mapping for underlying mount uid/gid */
> } __randomize_layout;
>
> #define MNT_NS_INTERNAL ERR_PTR(-EINVAL) /* distinct from any mnt_namespace */
> diff --git a/fs/namei.c b/fs/namei.c
> index 531ac55c7e67..369bd18c7330 100644
> --- a/fs/namei.c
> +++ b/fs/namei.c
> @@ -124,6 +124,42 @@
>
> #define EMBEDDED_NAME_MAX (PATH_MAX - offsetof(struct filename, iname))
>
> +const struct cred *change_userns_creds(const struct path *p)
> +{
> + struct mount *m = real_mount(p->mnt);
> +
> + if ((p->mnt->mnt_flags & MNT_SHIFT) == 0)
> + return NULL;
> +
> + if (current->nsproxy->mnt_ns->user_ns != m->mnt_ns->user_ns)
> + return NULL;
> +
> + if (current->mnt != p->mnt) {
> + struct cred *cred;
> + struct user_namespace *user_ns = m->mnt_ns->user_ns;
> + kuid_t fsuid = current->cred->fsuid;
> + kgid_t fsgid = current->cred->fsgid;
> +
> + if (current->mnt_cred)
> + put_cred(current->mnt_cred);
> + cred = prepare_creds();
> + fsuid = KUIDT_INIT(from_kuid(user_ns, fsuid));
> + fsgid = KGIDT_INIT(from_kgid(user_ns, fsgid));
> + cred->fsuid = make_kuid(m->mnt_userns, __kuid_val(fsuid));
> + cred->fsgid = make_kgid(m->mnt_userns, __kgid_val(fsgid));
> + current->mnt = p->mnt; /* no reference needed */
> + current->mnt_cred = cred;
> + }
> + return override_creds(current->mnt_cred);
> +}
> +
> +void revert_userns_creds(const struct cred *cred)
> +{
> + if (!cred)
> + return;
> + revert_creds(cred);
> +}
> +
> struct filename *
> getname_flags(const char __user *filename, int flags, int *empty)
> {
> @@ -303,7 +339,7 @@ static int acl_permission_check(struct inode *inode, int mask)
> return error;
> }
>
> - if (in_group_p(inode->i_gid))
> + if (in_group_p_shifted(inode->i_gid))
> mode >>= 3;
> }
>
> @@ -366,7 +402,6 @@ int generic_permission(struct inode *inode, int mask)
> if (!(mask & MAY_EXEC) || (inode->i_mode & S_IXUGO))
> if (capable_wrt_inode_uidgid(inode, CAP_DAC_OVERRIDE))
> return 0;
> -
> return -EACCES;
> }
> EXPORT_SYMBOL(generic_permission);
> @@ -1897,6 +1932,7 @@ static int walk_component(struct nameidata *nd, int flags)
> struct inode *inode;
> unsigned seq;
> int err;
> + const struct cred *cred;
> /*
> * "." and ".." are special - ".." especially so because it has
> * to be able to know about the current root directory and
> @@ -1908,25 +1944,31 @@ static int walk_component(struct nameidata *nd, int flags)
> put_link(nd);
> return err;
> }
> + cred = change_userns_creds(&nd->path);
> err = lookup_fast(nd, &path, &inode, &seq);
> if (unlikely(err <= 0)) {
> if (err < 0)
> - return err;
> + goto out;
> path.dentry = lookup_slow(&nd->last, nd->path.dentry,
> nd->flags);
> - if (IS_ERR(path.dentry))
> - return PTR_ERR(path.dentry);
> + if (IS_ERR(path.dentry)) {
> + err = PTR_ERR(path.dentry);
> + goto out;
> + }
>
> path.mnt = nd->path.mnt;
> err = follow_managed(&path, nd);
> if (unlikely(err < 0))
> - return err;
> + goto out;
>
> seq = 0; /* we are already out of RCU mode */
> inode = d_backing_inode(path.dentry);
> }
>
> - return step_into(nd, &path, flags, inode, seq);
> + err = step_into(nd, &path, flags, inode, seq);
> + out:
> + revert_userns_creds(cred);
> + return err;
> }
>
> /*
> @@ -2180,8 +2222,10 @@ static int link_path_walk(const char *name, struct nameidata *nd)
> for(;;) {
> u64 hash_len;
> int type;
> + const struct cred *cred = change_userns_creds(&nd->path);
>
> err = may_lookup(nd);
> + revert_userns_creds(cred);
> if (err)
> return err;
>
> @@ -2373,12 +2417,17 @@ static const char *path_init(struct nameidata *nd, unsigned flags)
> static const char *trailing_symlink(struct nameidata *nd)
> {
> const char *s;
> + const struct cred *cred = change_userns_creds(&nd->path);
> int error = may_follow_link(nd);
> - if (unlikely(error))
> - return ERR_PTR(error);
> + if (unlikely(error)) {
> + s = ERR_PTR(error);
> + goto out;
> + }
> nd->flags |= LOOKUP_PARENT;
> nd->stack[0].name = NULL;
> s = get_link(nd);
> + out:
> + revert_userns_creds(cred);
> return s ? s : "";
> }
>
> @@ -3343,6 +3392,7 @@ static int do_last(struct nameidata *nd,
> struct inode *inode;
> struct path path;
> int error;
> + const struct cred *cred = change_userns_creds(&nd->path);
>
> nd->flags &= ~LOOKUP_PARENT;
> nd->flags |= op->intent;
> @@ -3350,7 +3400,7 @@ static int do_last(struct nameidata *nd,
> if (nd->last_type != LAST_NORM) {
> error = handle_dots(nd, nd->last_type);
> if (unlikely(error))
> - return error;
> + goto err;
> goto finish_open;
> }
>
> @@ -3363,7 +3413,7 @@ static int do_last(struct nameidata *nd,
> goto finish_lookup;
>
> if (error < 0)
> - return error;
> + goto err;
>
> BUG_ON(nd->inode != dir->d_inode);
> BUG_ON(nd->flags & LOOKUP_RCU);
> @@ -3376,12 +3426,14 @@ static int do_last(struct nameidata *nd,
> */
> error = complete_walk(nd);
> if (error)
> - return error;
> + goto err;
>
> audit_inode(nd->name, dir, AUDIT_INODE_PARENT);
> /* trailing slashes? */
> - if (unlikely(nd->last.name[nd->last.len]))
> - return -EISDIR;
> + if (unlikely(nd->last.name[nd->last.len])) {
> + error = -EISDIR;
> + goto err;
> + }
> }
>
> if (open_flag & (O_CREAT | O_TRUNC | O_WRONLY | O_RDWR)) {
> @@ -3437,7 +3489,7 @@ static int do_last(struct nameidata *nd,
>
> error = follow_managed(&path, nd);
> if (unlikely(error < 0))
> - return error;
> + goto err;
>
> /*
> * create/update audit record if it already exists.
> @@ -3446,7 +3498,8 @@ static int do_last(struct nameidata *nd,
>
> if (unlikely((open_flag & (O_EXCL | O_CREAT)) == (O_EXCL | O_CREAT))) {
> path_to_nameidata(&path, nd);
> - return -EEXIST;
> + error = -EEXIST;
> + goto err;
> }
>
> seq = 0; /* out of RCU mode, so the value doesn't matter */
> @@ -3454,12 +3507,12 @@ static int do_last(struct nameidata *nd,
> finish_lookup:
> error = step_into(nd, &path, 0, inode, seq);
> if (unlikely(error))
> - return error;
> + goto err;
> finish_open:
> /* Why this, you ask? _Now_ we might have grown LOOKUP_JUMPED... */
> error = complete_walk(nd);
> if (error)
> - return error;
> + goto err;
> audit_inode(nd->name, nd->path.dentry, 0);
> if (open_flag & O_CREAT) {
> error = -EISDIR;
> @@ -3501,6 +3554,8 @@ static int do_last(struct nameidata *nd,
> }
> if (got_write)
> mnt_drop_write(nd->path.mnt);
> + err:
> + revert_userns_creds(cred);
> return error;
> }
>
> @@ -3819,6 +3874,7 @@ long do_mknodat(int dfd, const char __user *filename, umode_t mode,
> struct path path;
> int error;
> unsigned int lookup_flags = 0;
> + const struct cred *cred;
>
> error = may_mknod(mode);
> if (error)
> @@ -3828,6 +3884,7 @@ long do_mknodat(int dfd, const char __user *filename, umode_t mode,
> if (IS_ERR(dentry))
> return PTR_ERR(dentry);
>
> + cred = change_userns_creds(&path);
> if (!IS_POSIXACL(path.dentry->d_inode))
> mode &= ~current_umask();
> error = security_path_mknod(&path, dentry, mode, dev);
> @@ -3849,6 +3906,7 @@ long do_mknodat(int dfd, const char __user *filename, umode_t mode,
> }
> out:
> done_path_create(&path, dentry);
> + revert_userns_creds(cred);
> if (retry_estale(error, lookup_flags)) {
> lookup_flags |= LOOKUP_REVAL;
> goto retry;
> @@ -3899,18 +3957,21 @@ long do_mkdirat(int dfd, const char __user *pathname, umode_t mode)
> struct path path;
> int error;
> unsigned int lookup_flags = LOOKUP_DIRECTORY;
> + const struct cred *cred;
>
> retry:
> dentry = user_path_create(dfd, pathname, &path, lookup_flags);
> if (IS_ERR(dentry))
> return PTR_ERR(dentry);
>
> + cred = change_userns_creds(&path);
> if (!IS_POSIXACL(path.dentry->d_inode))
> mode &= ~current_umask();
> error = security_path_mkdir(&path, dentry, mode);
> if (!error)
> error = vfs_mkdir(path.dentry->d_inode, dentry, mode);
> done_path_create(&path, dentry);
> + revert_userns_creds(cred);
> if (retry_estale(error, lookup_flags)) {
> lookup_flags |= LOOKUP_REVAL;
> goto retry;
> @@ -3977,12 +4038,14 @@ long do_rmdir(int dfd, const char __user *pathname)
> struct qstr last;
> int type;
> unsigned int lookup_flags = 0;
> + const struct cred *cred;
> retry:
> name = filename_parentat(dfd, getname(pathname), lookup_flags,
> &path, &last, &type);
> if (IS_ERR(name))
> return PTR_ERR(name);
>
> + cred = change_userns_creds(&path);
> switch (type) {
> case LAST_DOTDOT:
> error = -ENOTEMPTY;
> @@ -4018,6 +4081,7 @@ long do_rmdir(int dfd, const char __user *pathname)
> inode_unlock(path.dentry->d_inode);
> mnt_drop_write(path.mnt);
> exit1:
> + revert_userns_creds(cred);
> path_put(&path);
> putname(name);
> if (retry_estale(error, lookup_flags)) {
> @@ -4107,11 +4171,13 @@ long do_unlinkat(int dfd, struct filename *name)
> struct inode *inode = NULL;
> struct inode *delegated_inode = NULL;
> unsigned int lookup_flags = 0;
> + const struct cred *cred;
> retry:
> name = filename_parentat(dfd, name, lookup_flags, &path, &last, &type);
> if (IS_ERR(name))
> return PTR_ERR(name);
>
> + cred = change_userns_creds(&path);
> error = -EISDIR;
> if (type != LAST_NORM)
> goto exit1;
> @@ -4149,6 +4215,7 @@ long do_unlinkat(int dfd, struct filename *name)
> }
> mnt_drop_write(path.mnt);
> exit1:
> + revert_userns_creds(cred);
> path_put(&path);
> if (retry_estale(error, lookup_flags)) {
> lookup_flags |= LOOKUP_REVAL;
> @@ -4213,6 +4280,7 @@ long do_symlinkat(const char __user *oldname, int newdfd,
> struct dentry *dentry;
> struct path path;
> unsigned int lookup_flags = 0;
> + const struct cred *cred;
>
> from = getname(oldname);
> if (IS_ERR(from))
> @@ -4223,6 +4291,7 @@ long do_symlinkat(const char __user *oldname, int newdfd,
> if (IS_ERR(dentry))
> goto out_putname;
>
> + cred = change_userns_creds(&path);
> error = security_path_symlink(&path, dentry, from->name);
> if (!error)
> error = vfs_symlink(path.dentry->d_inode, dentry, from->name);
> @@ -4231,6 +4300,7 @@ long do_symlinkat(const char __user *oldname, int newdfd,
> lookup_flags |= LOOKUP_REVAL;
> goto retry;
> }
> + revert_userns_creds(cred);
> out_putname:
> putname(from);
> return error;
> @@ -4344,6 +4414,7 @@ int do_linkat(int olddfd, const char __user *oldname, int newdfd,
> struct inode *delegated_inode = NULL;
> int how = 0;
> int error;
> + const struct cred *cred;
>
> if ((flags & ~(AT_SYMLINK_FOLLOW | AT_EMPTY_PATH)) != 0)
> return -EINVAL;
> @@ -4371,6 +4442,7 @@ int do_linkat(int olddfd, const char __user *oldname, int newdfd,
> if (IS_ERR(new_dentry))
> goto out;
>
> + cred = change_userns_creds(&new_path);
> error = -EXDEV;
> if (old_path.mnt != new_path.mnt)
> goto out_dput;
> @@ -4382,6 +4454,7 @@ int do_linkat(int olddfd, const char __user *oldname, int newdfd,
> goto out_dput;
> error = vfs_link(old_path.dentry, new_path.dentry->d_inode, new_dentry, &delegated_inode);
> out_dput:
> + revert_userns_creds(cred);
> done_path_create(&new_path, new_dentry);
> if (delegated_inode) {
> error = break_deleg_wait(&delegated_inode);
> @@ -4601,6 +4674,7 @@ static int do_renameat2(int olddfd, const char __user *oldname, int newdfd,
> unsigned int lookup_flags = 0, target_flags = LOOKUP_RENAME_TARGET;
> bool should_retry = false;
> int error;
> + const struct cred *cred;
>
> if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE | RENAME_WHITEOUT))
> return -EINVAL;
> @@ -4630,6 +4704,7 @@ static int do_renameat2(int olddfd, const char __user *oldname, int newdfd,
> goto exit1;
> }
>
> + cred = change_userns_creds(&new_path);
> error = -EXDEV;
> if (old_path.mnt != new_path.mnt)
> goto exit2;
> @@ -4714,6 +4789,7 @@ static int do_renameat2(int olddfd, const char __user *oldname, int newdfd,
> }
> mnt_drop_write(old_path.mnt);
> exit2:
> + revert_userns_creds(cred);
> if (retry_estale(error, lookup_flags))
> should_retry = true;
> path_put(&new_path);
> diff --git a/fs/namespace.c b/fs/namespace.c
> index 69fb23ae3d8f..4720647588ab 100644
> --- a/fs/namespace.c
> +++ b/fs/namespace.c
> @@ -200,6 +200,8 @@ static struct mount *alloc_vfsmnt(const char *name)
> mnt->mnt_writers = 0;
> #endif
>
> + mnt->mnt_userns = get_user_ns(&init_user_ns);
> +
> INIT_HLIST_NODE(&mnt->mnt_hash);
> INIT_LIST_HEAD(&mnt->mnt_child);
> INIT_LIST_HEAD(&mnt->mnt_mounts);
> @@ -1044,6 +1046,8 @@ static struct mount *clone_mnt(struct mount *old, struct dentry *root,
> mnt->mnt.mnt_root = dget(root);
> mnt->mnt_mountpoint = mnt->mnt.mnt_root;
> mnt->mnt_parent = mnt;
> + put_user_ns(mnt->mnt_userns);
> + mnt->mnt_userns = get_user_ns(old->mnt_userns);
> lock_mount_hash();
> list_add_tail(&mnt->mnt_instance, &sb->s_mounts);
> unlock_mount_hash();
> @@ -1102,6 +1106,7 @@ static void cleanup_mnt(struct mount *mnt)
> dput(mnt->mnt.mnt_root);
> deactivate_super(mnt->mnt.mnt_sb);
> mnt_free_id(mnt);
> + put_user_ns(mnt->mnt_userns);
> call_rcu(&mnt->mnt_rcu, delayed_free_vfsmnt);
> }
>
> diff --git a/fs/open.c b/fs/open.c
> index db6758b9636a..d27b90dce64d 100644
> --- a/fs/open.c
> +++ b/fs/open.c
> @@ -456,11 +456,13 @@ int ksys_chdir(const char __user *filename)
> struct path path;
> int error;
> unsigned int lookup_flags = LOOKUP_FOLLOW | LOOKUP_DIRECTORY;
> + const struct cred *cred;
> retry:
> error = user_path_at(AT_FDCWD, filename, lookup_flags, &path);
> if (error)
> goto out;
>
> + cred = change_userns_creds(&path);
> error = inode_permission(path.dentry->d_inode, MAY_EXEC | MAY_CHDIR);
> if (error)
> goto dput_and_out;
> @@ -468,6 +470,7 @@ int ksys_chdir(const char __user *filename)
> set_fs_pwd(current->fs, &path);
>
> dput_and_out:
> + revert_userns_creds(cred);
> path_put(&path);
> if (retry_estale(error, lookup_flags)) {
> lookup_flags |= LOOKUP_REVAL;
> @@ -486,11 +489,13 @@ SYSCALL_DEFINE1(fchdir, unsigned int, fd)
> {
> struct fd f = fdget_raw(fd);
> int error;
> + const struct cred *cred;
>
> error = -EBADF;
> if (!f.file)
> goto out;
>
> + cred = change_userns_creds(&f.file->f_path);
> error = -ENOTDIR;
> if (!d_can_lookup(f.file->f_path.dentry))
> goto out_putf;
> @@ -499,6 +504,7 @@ SYSCALL_DEFINE1(fchdir, unsigned int, fd)
> if (!error)
> set_fs_pwd(current->fs, &f.file->f_path);
> out_putf:
> + revert_userns_creds(cred);
> fdput(f);
> out:
> return error;
> @@ -547,11 +553,13 @@ static int chmod_common(const struct path *path, umode_t mode)
> struct inode *inode = path->dentry->d_inode;
> struct inode *delegated_inode = NULL;
> struct iattr newattrs;
> + const struct cred *cred;
> int error;
>
> + cred = change_userns_creds(path);
> error = mnt_want_write(path->mnt);
> if (error)
> - return error;
> + goto out;
> retry_deleg:
> inode_lock(inode);
> error = security_path_chmod(path, mode);
> @@ -568,6 +576,8 @@ static int chmod_common(const struct path *path, umode_t mode)
> goto retry_deleg;
> }
> mnt_drop_write(path->mnt);
> + out:
> + revert_userns_creds(cred);
> return error;
> }
>
> @@ -666,6 +676,7 @@ int do_fchownat(int dfd, const char __user *filename, uid_t user, gid_t group,
> struct path path;
> int error = -EINVAL;
> int lookup_flags;
> + const struct cred *cred;
>
> if ((flag & ~(AT_SYMLINK_NOFOLLOW | AT_EMPTY_PATH)) != 0)
> goto out;
> @@ -677,12 +688,14 @@ int do_fchownat(int dfd, const char __user *filename, uid_t user, gid_t group,
> error = user_path_at(dfd, filename, lookup_flags, &path);
> if (error)
> goto out;
> + cred = change_userns_creds(&path);
> error = mnt_want_write(path.mnt);
> if (error)
> goto out_release;
> error = chown_common(&path, user, group);
> mnt_drop_write(path.mnt);
> out_release:
> + revert_userns_creds(cred);
> path_put(&path);
> if (retry_estale(error, lookup_flags)) {
> lookup_flags |= LOOKUP_REVAL;
> @@ -713,10 +726,12 @@ int ksys_fchown(unsigned int fd, uid_t user, gid_t group)
> {
> struct fd f = fdget(fd);
> int error = -EBADF;
> + const struct cred *cred;
>
> if (!f.file)
> goto out;
>
> + cred = change_userns_creds(&f.file->f_path);
> error = mnt_want_write_file(f.file);
> if (error)
> goto out_fput;
> @@ -724,6 +739,7 @@ int ksys_fchown(unsigned int fd, uid_t user, gid_t group)
> error = chown_common(&f.file->f_path, user, group);
> mnt_drop_write_file(f.file);
> out_fput:
> + revert_userns_creds(cred);
> fdput(f);
> out:
> return error;
> @@ -911,8 +927,13 @@ EXPORT_SYMBOL(file_path);
> */
> int vfs_open(const struct path *path, struct file *file)
> {
> + int ret;
> + const struct cred *cred = change_userns_creds(path);
> +
> file->f_path = *path;
> - return do_dentry_open(file, d_backing_inode(path->dentry), NULL);
> + ret = do_dentry_open(file, d_backing_inode(path->dentry), NULL);
> + revert_userns_creds(cred);
> + return ret;
> }
>
> struct file *dentry_open(const struct path *path, int flags,
> diff --git a/fs/posix_acl.c b/fs/posix_acl.c
> index 249672bf54fe..ff777110f3da 100644
> --- a/fs/posix_acl.c
> +++ b/fs/posix_acl.c
> @@ -364,7 +364,7 @@ posix_acl_permission(struct inode *inode, const struct posix_acl *acl, int want)
> goto mask;
> break;
> case ACL_GROUP_OBJ:
> - if (in_group_p(inode->i_gid)) {
> + if (in_group_p_shifted(inode->i_gid)) {
> found = 1;
> if ((pa->e_perm & want) == want)
> goto mask;
> @@ -655,7 +655,7 @@ int posix_acl_update_mode(struct inode *inode, umode_t *mode_p,
> return error;
> if (error == 0)
> *acl = NULL;
> - if (!in_group_p(inode->i_gid) &&
> + if (!in_group_p_shifted(inode->i_gid) &&
> !capable_wrt_inode_uidgid(inode, CAP_FSETID))
> mode &= ~S_ISGID;
> *mode_p = mode;
> diff --git a/fs/stat.c b/fs/stat.c
> index 030008796479..634b8d13ed51 100644
> --- a/fs/stat.c
> +++ b/fs/stat.c
> @@ -22,6 +22,7 @@
> #include <asm/unistd.h>
>
> #include "internal.h"
> +#include "mount.h"
>
> /**
> * generic_fillattr - Fill in the basic attributes from the inode struct
> @@ -50,6 +51,23 @@ void generic_fillattr(struct inode *inode, struct kstat *stat)
> }
> EXPORT_SYMBOL(generic_fillattr);
>
> +static void shift_check(struct vfsmount *mnt, struct kstat *stat)
> +{
> + struct mount *m = real_mount(mnt);
> + struct user_namespace *user_ns = m->mnt_ns->user_ns;
> +
> + if ((mnt->mnt_flags & MNT_SHIFT) == 0)
> + return;
> +
> + if (current->nsproxy->mnt_ns->user_ns != m->mnt_ns->user_ns)
> + return;
> +
> + stat->uid = KUIDT_INIT(from_kuid(m->mnt_userns, stat->uid));
> + stat->uid = make_kuid(user_ns, __kuid_val(stat->uid));
> + stat->gid = KGIDT_INIT(from_kgid(m->mnt_userns, stat->gid));
> + stat->gid = make_kgid(user_ns, __kgid_val(stat->gid));
> +}
> +
> /**
> * vfs_getattr_nosec - getattr without security checks
> * @path: file to get attributes from
> @@ -67,6 +85,7 @@ int vfs_getattr_nosec(const struct path *path, struct kstat *stat,
> u32 request_mask, unsigned int query_flags)
> {
> struct inode *inode = d_backing_inode(path->dentry);
> + int ret;
>
> memset(stat, 0, sizeof(*stat));
> stat->result_mask |= STATX_BASIC_STATS;
> @@ -79,12 +98,17 @@ int vfs_getattr_nosec(const struct path *path, struct kstat *stat,
> if (IS_AUTOMOUNT(inode))
> stat->attributes |= STATX_ATTR_AUTOMOUNT;
>
> + ret = 0;
> if (inode->i_op->getattr)
> - return inode->i_op->getattr(path, stat, request_mask,
> - query_flags);
> + ret = inode->i_op->getattr(path, stat, request_mask,
> + query_flags);
> + else
> + generic_fillattr(inode, stat);
>
> - generic_fillattr(inode, stat);
> - return 0;
> + if (!ret)
> + shift_check(path->mnt, stat);
> +
> + return ret;
> }
> EXPORT_SYMBOL(vfs_getattr_nosec);
>
> diff --git a/include/linux/cred.h b/include/linux/cred.h
> index 18639c069263..d29638617844 100644
> --- a/include/linux/cred.h
> +++ b/include/linux/cred.h
> @@ -59,6 +59,7 @@ extern struct group_info *groups_alloc(int);
> extern void groups_free(struct group_info *);
>
> extern int in_group_p(kgid_t);
> +extern int in_group_p_shifted(kgid_t);
How do I know when to use in_group_p_shifted vs in_group_p?
What about the various other fs callers?
More information about the Containers
mailing list