[PATCH review 1/6] mnt: Track which mounts use a dentry as root.

Nikolay Borisov kernel at kyup.com
Fri Aug 7 10:46:44 UTC 2015



On 08/04/2015 12:26 AM, Eric W. Biederman wrote:
> 
> This is needed infrastructure for better handling of when files
> or directories are moved out from under the root of a bind mount.
> 
> Signed-off-by: "Eric W. Biederman" <ebiederm at xmission.com>
> ---
>  fs/mount.h             |   7 +++
>  fs/namespace.c         | 120 +++++++++++++++++++++++++++++++++++++++++++++++--
>  include/linux/dcache.h |   7 +++
>  3 files changed, 130 insertions(+), 4 deletions(-)
> 
> diff --git a/fs/mount.h b/fs/mount.h
> index 14db05d424f7..e8f22970fe59 100644
> --- a/fs/mount.h
> +++ b/fs/mount.h
> @@ -27,6 +27,12 @@ struct mountpoint {
>  	int m_count;
>  };
>  
> +struct mountroot {
> +	struct hlist_node r_hash;
> +	struct dentry *r_dentry;
> +	struct hlist_head r_list;
> +};
> +
>  struct mount {
>  	struct hlist_node mnt_hash;
>  	struct mount *mnt_parent;
> @@ -55,6 +61,7 @@ struct mount {
>  	struct mnt_namespace *mnt_ns;	/* containing namespace */
>  	struct mountpoint *mnt_mp;	/* where is it mounted */
>  	struct hlist_node mnt_mp_list;	/* list mounts with the same mountpoint */
> +	struct hlist_node mnt_mr_list;	/* list mounts with the same mountroot */
>  #ifdef CONFIG_FSNOTIFY
>  	struct hlist_head mnt_fsnotify_marks;
>  	__u32 mnt_fsnotify_mask;
> diff --git a/fs/namespace.c b/fs/namespace.c
> index 2b8aa15fd6df..2ce987af9afa 100644
> --- a/fs/namespace.c
> +++ b/fs/namespace.c
> @@ -31,6 +31,8 @@ static unsigned int m_hash_mask __read_mostly;
>  static unsigned int m_hash_shift __read_mostly;
>  static unsigned int mp_hash_mask __read_mostly;
>  static unsigned int mp_hash_shift __read_mostly;
> +static unsigned int mr_hash_mask __read_mostly;
> +static unsigned int mr_hash_shift __read_mostly;
>  
>  static __initdata unsigned long mhash_entries;
>  static int __init set_mhash_entries(char *str)
> @@ -52,6 +54,16 @@ static int __init set_mphash_entries(char *str)
>  }
>  __setup("mphash_entries=", set_mphash_entries);
>  
> +static __initdata unsigned long mrhash_entries;
> +static int __init set_mrhash_entries(char *str)
> +{
> +	if (!str)
> +		return 0;
> +	mrhash_entries = simple_strtoul(str, &str, 0);

Nit: Any particular reason for using simple_* rather than kstrto* family
of functions?

> +	return 1;
> +}
> +__setup("mrhash_entries=", set_mrhash_entries);
> +
>  static u64 event;
>  static DEFINE_IDA(mnt_id_ida);
>  static DEFINE_IDA(mnt_group_ida);
> @@ -61,6 +73,7 @@ static int mnt_group_start = 1;
>  
>  static struct hlist_head *mount_hashtable __read_mostly;
>  static struct hlist_head *mountpoint_hashtable __read_mostly;
> +static struct hlist_head *mountroot_hashtable __read_mostly;
>  static struct kmem_cache *mnt_cache __read_mostly;
>  static DECLARE_RWSEM(namespace_sem);
>  
> @@ -93,6 +106,13 @@ static inline struct hlist_head *mp_hash(struct dentry *dentry)
>  	return &mountpoint_hashtable[tmp & mp_hash_mask];
>  }
>  
> +static inline struct hlist_head *mr_hash(struct dentry *dentry)
> +{
> +	unsigned long tmp = ((unsigned long)dentry / L1_CACHE_BYTES);
> +	tmp = tmp + (tmp >> mr_hash_shift);
> +	return &mountroot_hashtable[tmp & mr_hash_mask];
> +}
> +
>  /*
>   * allocation is serialized by namespace_sem, but we need the spinlock to
>   * serialize with freeing.
> @@ -234,6 +254,7 @@ static struct mount *alloc_vfsmnt(const char *name)
>  		INIT_LIST_HEAD(&mnt->mnt_slave_list);
>  		INIT_LIST_HEAD(&mnt->mnt_slave);
>  		INIT_HLIST_NODE(&mnt->mnt_mp_list);
> +		INIT_HLIST_NODE(&mnt->mnt_mr_list);
>  #ifdef CONFIG_FSNOTIFY
>  		INIT_HLIST_HEAD(&mnt->mnt_fsnotify_marks);
>  #endif
> @@ -779,6 +800,77 @@ static void put_mountpoint(struct mountpoint *mp)
>  	}
>  }
>  
> +static struct mountroot *lookup_mountroot(struct dentry *dentry)
> +{
> +	struct hlist_head *chain = mr_hash(dentry);
> +	struct mountroot *mr;
> +
> +	hlist_for_each_entry(mr, chain, r_hash) {
> +		if (mr->r_dentry == dentry)
> +			return mr;
> +	}
> +	return NULL;
> +}
> +
> +static int mnt_set_root(struct mount *mnt, struct dentry *root)
> +{
> +	struct mountroot *mr = NULL;
> +
> +	read_seqlock_excl(&mount_lock);
> +	if (d_mountroot(root))
> +		mr = lookup_mountroot(root);
> +	if (!mr) {
> +		struct mountroot *new;
> +		read_sequnlock_excl(&mount_lock);
> +
> +		new = kmalloc(sizeof(struct mountroot), GFP_KERNEL);
> +		if (!new)
> +			return -ENOMEM;
> +
> +		read_seqlock_excl(&mount_lock);
> +		mr = lookup_mountroot(root);
> +		if (mr) {
> +			kfree(new);
> +		} else {
> +			struct hlist_head *chain = mr_hash(root);
> +
> +			mr = new;
> +			mr->r_dentry = root;
> +			INIT_HLIST_HEAD(&mr->r_list);
> +			hlist_add_head(&mr->r_hash, chain);
> +
> +			spin_lock(&root->d_lock);
> +			root->d_flags |= DCACHE_MOUNTROOT;
> +			spin_unlock(&root->d_lock);
> +		}
> +	}
> +	mnt->mnt.mnt_root = root;
> +	hlist_add_head(&mnt->mnt_mr_list, &mr->r_list);
> +	read_sequnlock_excl(&mount_lock);
> +
> +	return 0;
> +}
> +
> +static void mnt_put_root(struct mount *mnt)
> +{
> +	struct dentry *root = mnt->mnt.mnt_root;
> +	struct mountroot *mr;
> +
> +	read_seqlock_excl(&mount_lock);
> +	mr = lookup_mountroot(root);
> +	BUG_ON(!mr);
> +	hlist_del(&mnt->mnt_mr_list);
> +	if (hlist_empty(&mr->r_list)) {
> +		hlist_del(&mr->r_hash);
> +		spin_lock(&root->d_lock);
> +		root->d_flags &= ~DCACHE_MOUNTROOT;
> +		spin_unlock(&root->d_lock);
> +		kfree(mr);
> +	}
> +	read_sequnlock_excl(&mount_lock);
> +	dput(root);
> +}
> +
>  static inline int check_mnt(struct mount *mnt)
>  {
>  	return mnt->mnt_ns == current->nsproxy->mnt_ns;
> @@ -934,6 +1026,7 @@ vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void
>  {
>  	struct mount *mnt;
>  	struct dentry *root;
> +	int err;
>  
>  	if (!type)
>  		return ERR_PTR(-ENODEV);
> @@ -952,8 +1045,16 @@ vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void
>  		return ERR_CAST(root);
>  	}
>  
> -	mnt->mnt.mnt_root = root;
>  	mnt->mnt.mnt_sb = root->d_sb;
> +	err = mnt_set_root(mnt, root);
> +	if (err) {
> +		dput(root);
> +		deactivate_super(mnt->mnt.mnt_sb);
> +		mnt_free_id(mnt);
> +		free_vfsmnt(mnt);
> +		return ERR_PTR(err);
> +	}
> +
>  	mnt->mnt_mountpoint = mnt->mnt.mnt_root;
>  	mnt->mnt_parent = mnt;
>  	lock_mount_hash();
> @@ -985,6 +1086,10 @@ static struct mount *clone_mnt(struct mount *old, struct dentry *root,
>  			goto out_free;
>  	}
>  
> +	err = mnt_set_root(mnt, root);
> +	if (err)
> +		goto out_free;
> +
>  	mnt->mnt.mnt_flags = old->mnt.mnt_flags & ~(MNT_WRITE_HOLD|MNT_MARKED);
>  	/* Don't allow unprivileged users to change mount flags */
>  	if (flag & CL_UNPRIVILEGED) {
> @@ -1010,7 +1115,7 @@ static struct mount *clone_mnt(struct mount *old, struct dentry *root,
>  
>  	atomic_inc(&sb->s_active);
>  	mnt->mnt.mnt_sb = sb;
> -	mnt->mnt.mnt_root = dget(root);
> +	dget(root);
>  	mnt->mnt_mountpoint = mnt->mnt.mnt_root;
>  	mnt->mnt_parent = mnt;
>  	lock_mount_hash();
> @@ -1063,7 +1168,7 @@ static void cleanup_mnt(struct mount *mnt)
>  	if (unlikely(mnt->mnt_pins.first))
>  		mnt_pin_kill(mnt);
>  	fsnotify_vfsmount_delete(&mnt->mnt);
> -	dput(mnt->mnt.mnt_root);
> +	mnt_put_root(mnt);
>  	deactivate_super(mnt->mnt.mnt_sb);
>  	mnt_free_id(mnt);
>  	call_rcu(&mnt->mnt_rcu, delayed_free_vfsmnt);
> @@ -3120,14 +3225,21 @@ void __init mnt_init(void)
>  				mphash_entries, 19,
>  				0,
>  				&mp_hash_shift, &mp_hash_mask, 0, 0);
> +	mountroot_hashtable = alloc_large_system_hash("Mountroot-cache",
> +				sizeof(struct hlist_head),
> +				mrhash_entries, 19,
> +				0,
> +				&mr_hash_shift, &mr_hash_mask, 0, 0);
>  
> -	if (!mount_hashtable || !mountpoint_hashtable)
> +	if (!mount_hashtable || !mountpoint_hashtable || !mountroot_hashtable)
>  		panic("Failed to allocate mount hash table\n");
>  
>  	for (u = 0; u <= m_hash_mask; u++)
>  		INIT_HLIST_HEAD(&mount_hashtable[u]);
>  	for (u = 0; u <= mp_hash_mask; u++)
>  		INIT_HLIST_HEAD(&mountpoint_hashtable[u]);
> +	for (u = 0; u <= mr_hash_mask; u++)
> +		INIT_HLIST_HEAD(&mountroot_hashtable[u]);
>  
>  	kernfs_init();
>  
> diff --git a/include/linux/dcache.h b/include/linux/dcache.h
> index d67ae119cf4e..52a5e6915f58 100644
> --- a/include/linux/dcache.h
> +++ b/include/linux/dcache.h
> @@ -228,6 +228,8 @@ struct dentry_operations {
>  #define DCACHE_FALLTHRU			0x01000000 /* Fall through to lower layer */
>  #define DCACHE_OP_SELECT_INODE		0x02000000 /* Unioned entry: dcache op selects inode */
>  
> +#define DCACHE_MOUNTROOT		0x04000000 /* Root of a vfsmount */
> +
>  extern seqlock_t rename_lock;
>  
>  /*
> @@ -404,6 +406,11 @@ static inline bool d_mountpoint(const struct dentry *dentry)
>  	return dentry->d_flags & DCACHE_MOUNTED;
>  }
>  
> +static inline bool d_mountroot(const struct dentry *dentry)
> +{
> +	return dentry->d_flags & DCACHE_MOUNTROOT;
> +}
> +
>  /*
>   * Directory cache entry type accessor functions.
>   */
> 


More information about the Containers mailing list