[PATCH 4/7] proc: Make /proc/net it's own filesystem

Eric W. Biederman ebiederm at xmission.com
Thu Nov 6 02:53:08 PST 2008


Make the VFS happy with /proc/net by making it it's own
filesystem avoiding issues with hard links to directories
and other silliness that confuse the vfs today.

We preserve backwards compatibility by automatically
mounting /proc/self/net and marking it as a shrinkable
mount so userspace doesn't need to care about it.

Signed-off-by: Eric W. Biederman <ebiederm at xmission.com>
---
 fs/proc/base.c              |    6 +-
 fs/proc/proc_net.c          |  212 +++++++++++++++++++++++++++++++------------
 include/linux/magic.h       |    1 +
 include/net/net_namespace.h |    1 +
 security/selinux/hooks.c    |   28 +++++-
 5 files changed, 183 insertions(+), 65 deletions(-)

diff --git a/fs/proc/base.c b/fs/proc/base.c
index 486cf3f..9a68fa4 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -128,6 +128,10 @@ struct pid_entry {
 	NOD(NAME, (S_IFREG|(MODE)), 			\
 		NULL, &proc_single_file_operations,	\
 		{ .proc_show = &proc_##OTYPE } )
+#define MNT(NAME, MODE, OTYPE) 				\
+	NOD(NAME, (S_IFDIR|(MODE)), 			\
+		&proc_##OTYPE##_inode_operations, NULL,	\
+		{} )
 
 /*
  * Count the number of hardlinks for the pid_entry table, excluding the .
@@ -2453,7 +2457,7 @@ static const struct pid_entry tgid_base_stuff[] = {
 	DIR("fd",         S_IRUSR|S_IXUSR, fd),
 	DIR("fdinfo",     S_IRUSR|S_IXUSR, fdinfo),
 #ifdef CONFIG_NET
-	DIR("net",        S_IRUGO|S_IXUGO, net),
+	MNT("net",        S_IRUGO|S_IXUGO, net),
 #endif
 	REG("environ",    S_IRUSR, environ),
 	INF("auxv",       S_IRUSR, pid_auxv),
diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c
index 7bc296f..57e0f22 100644
--- a/fs/proc/proc_net.c
+++ b/fs/proc/proc_net.c
@@ -21,11 +21,13 @@
 #include <linux/smp_lock.h>
 #include <linux/mount.h>
 #include <linux/nsproxy.h>
+#include <linux/namei.h>
 #include <net/net_namespace.h>
 #include <linux/seq_file.h>
 
 #include "internal.h"
 
+static struct file_system_type proc_net_fs_type;
 
 static struct net *get_proc_net(const struct inode *inode)
 {
@@ -118,65 +120,60 @@ static struct net *get_proc_task_net(struct inode *dir)
 	return net;
 }
 
-static struct dentry *proc_tgid_net_lookup(struct inode *dir,
-		struct dentry *dentry, struct nameidata *nd)
+void *proc_net_follow_link(struct dentry *dentry, struct nameidata *nd)
 {
-	struct dentry *de;
+	/* Follow to a mount point of the proper network namespace.
+	 */
+	struct vfsmount *mnt;
 	struct net *net;
-
-	de = ERR_PTR(-ENOENT);
-	net = get_proc_task_net(dir);
-	if (net != NULL) {
-		de = proc_lookup_de(net->proc_net, dir, dentry);
-		put_net(net);
+	int err = -ENOENT;
+
+	/* Which network namespace? */
+	net = get_proc_task_net(dentry->d_inode);
+	if (!net)
+		goto out_err;
+
+	/* Create a new mount. */
+	mnt = kern_mount_data(&proc_net_fs_type, net);
+	if (IS_ERR(mnt))
+		goto out_err;
+
+	dput(nd->path.dentry);
+	nd->path.dentry = dget(dentry);
+
+	/* Add mnt the mount namespace */
+	err = do_add_mount(mntget(mnt), &nd->path, MNT_SHRINKABLE,
+			   &proc_automounts);
+	if (err < 0) {
+		mntput(mnt);
+		if (err == -EBUSY)
+			goto out_follow;
+		goto out_err;
 	}
-	return de;
-}
-
-static int proc_tgid_net_getattr(struct vfsmount *mnt, struct dentry *dentry,
-		struct kstat *stat)
-{
-	struct inode *inode = dentry->d_inode;
-	struct net *net;
-
-	net = get_proc_task_net(inode);
-
-	generic_fillattr(inode, stat);
-
-	if (net != NULL) {
-		stat->nlink = net->proc_net->nlink;
-		put_net(net);
-	}
-
-	return 0;
+	/* Place the mnt on path and return it to the caller */
+	err = 0;
+	path_put(&nd->path);
+	nd->path.mnt = mnt;
+	nd->path.dentry = dget(mnt->mnt_root);
+	put_net(net);
+out:
+	return ERR_PTR(err);
+out_err:
+	path_put(&nd->path);
+	goto out;
+out_follow:
+	/* We raced with ourselves so just walk the mounts */
+	while (d_mountpoint(nd->path.dentry) &&
+		follow_down(&nd->path.mnt, &nd->path.dentry))
+		;
+	err = 0;
+	goto out;
 }
 
 const struct inode_operations proc_net_inode_operations = {
-	.lookup		= proc_tgid_net_lookup,
-	.getattr	= proc_tgid_net_getattr,
-};
-
-static int proc_tgid_net_readdir(struct file *filp, void *dirent,
-		filldir_t filldir)
-{
-	int ret;
-	struct net *net;
-
-	ret = -EINVAL;
-	net = get_proc_task_net(filp->f_path.dentry->d_inode);
-	if (net != NULL) {
-		ret = proc_readdir_de(net->proc_net, filp, dirent, filldir);
-		put_net(net);
-	}
-	return ret;
-}
-
-const struct file_operations proc_net_operations = {
-	.read		= generic_read_dir,
-	.readdir	= proc_tgid_net_readdir,
+	.follow_link	= proc_net_follow_link,
 };
 
-
 struct proc_dir_entry *proc_net_fops_create(struct net *net,
 	const char *name, mode_t mode, const struct file_operations *fops)
 {
@@ -190,21 +187,95 @@ void proc_net_remove(struct net *net, const char *name)
 }
 EXPORT_SYMBOL_GPL(proc_net_remove);
 
+
+static int proc_net_fill_super(struct super_block *sb)
+{
+	struct net *net = sb->s_fs_info;
+	struct proc_dir_entry *netd = net->proc_net;
+	struct inode *root_inode = NULL;
+
+	sb->s_flags |= MS_NODIRATIME | MS_NOSUID | MS_NOEXEC;
+	sb->s_blocksize = PAGE_SIZE;
+	sb->s_blocksize_bits = PAGE_SHIFT;
+	sb->s_magic = PROC_NET_SUPER_MAGIC;
+	sb->s_op = &proc_sops;
+	sb->s_time_gran = 1;
+
+	de_get(netd);
+	root_inode = proc_get_inode(sb, netd->low_ino, netd);
+	if (!root_inode)
+		goto out_no_root;
+	root_inode->i_uid = 0;
+	root_inode->i_gid = 0;
+	sb->s_root = d_alloc_root(root_inode);
+	if (!sb->s_root)
+		goto out_no_root;
+	return 0;
+
+out_no_root:
+	printk("%s: get root inode failed\n", __func__);
+	iput(root_inode);
+	de_put(netd);
+	return -ENOMEM;
+}
+
+static int proc_net_test_super(struct super_block *sb, void *data)
+{
+	return sb->s_fs_info == data;
+}
+
+static int proc_net_set_super(struct super_block *sb, void *data)
+{
+	sb->s_fs_info = data;
+	return set_anon_super(sb, NULL);
+}
+
+static int proc_net_get_sb(struct file_system_type *fs_type,
+	int flags, const char *dev_name, void *data, struct vfsmount *mnt)
+{
+	struct super_block *sb;
+
+	if (!(flags & MS_KERNMOUNT))
+		data = current->nsproxy->net_ns;
+
+	sb = sget(fs_type, proc_net_test_super, proc_net_set_super, data);
+	if (IS_ERR(sb))
+		return PTR_ERR(sb);
+
+	if (!sb->s_root) {
+		int err;
+		sb->s_flags = flags;
+		err = proc_net_fill_super(sb);
+		if (err) {
+			up_write(&sb->s_umount);
+			deactivate_super(sb);
+			return err;
+		}
+
+		sb->s_flags |= MS_ACTIVE;
+	}
+
+	return simple_set_mnt(mnt, sb);
+}
+
+static struct file_system_type proc_net_fs_type = {
+	.name		= "proc/net",
+	.get_sb		= proc_net_get_sb,
+	.kill_sb	= kill_litter_super,
+};
+
 static __net_init int proc_net_ns_init(struct net *net)
 {
 	struct proc_dir_entry *netd, *net_statd;
+	struct vfsmount *mnt;
 	int err;
 
 	err = -ENOMEM;
-	netd = kzalloc(sizeof(*netd), GFP_KERNEL);
+	netd = proc_create_root();
 	if (!netd)
 		goto out;
 
 	netd->data = net;
-	netd->nlink = 2;
-	netd->name = "net";
-	netd->namelen = 3;
-	netd->parent = &proc_root;
 
 	err = -EEXIST;
 	net_statd = proc_net_mkdir(net, "stat", netd);
@@ -213,8 +284,17 @@ static __net_init int proc_net_ns_init(struct net *net)
 
 	net->proc_net = netd;
 	net->proc_net_stat = net_statd;
+
+	mnt = kern_mount_data(&proc_net_fs_type, net);
+	if (IS_ERR(mnt))
+		goto free_stat;
+
+	net->proc_mnt = mnt;
+
 	return 0;
 
+free_stat:
+	remove_proc_entry("stat", netd);
 free_net:
 	kfree(netd);
 out:
@@ -224,7 +304,14 @@ out:
 static __net_exit void proc_net_ns_exit(struct net *net)
 {
 	remove_proc_entry("stat", net->proc_net);
-	kfree(net->proc_net);
+	release_proc_entry(net->proc_net);
+	/* We won't be looking up this super block
+	 * any more so set s_fs_info to NULL to ensure
+	 * it doesn't conflict with network namespaces
+	 * allocated in the future at the same address.
+	 */
+	net->proc_mnt->mnt_sb->s_fs_info = NULL;
+	mntput(net->proc_mnt);
 }
 
 static struct pernet_operations __net_initdata proc_net_ns_ops = {
@@ -234,7 +321,16 @@ static struct pernet_operations __net_initdata proc_net_ns_ops = {
 
 int __init proc_net_init(void)
 {
-	proc_symlink("net", NULL, "self/net");
+	struct proc_dir_entry *ent;
+	int err;
+
+	ent = proc_symlink("net", NULL, "self/net");
+	if (!ent)
+		return -EEXIST;
+
+	err = register_filesystem(&proc_net_fs_type);
+	if (err)
+		return err;
 
 	return register_pernet_subsys(&proc_net_ns_ops);
 }
diff --git a/include/linux/magic.h b/include/linux/magic.h
index f7f3fdd..2b31c02 100644
--- a/include/linux/magic.h
+++ b/include/linux/magic.h
@@ -30,6 +30,7 @@
 #define NFS_SUPER_MAGIC		0x6969
 #define OPENPROM_SUPER_MAGIC	0x9fa1
 #define PROC_SUPER_MAGIC	0x9fa0
+#define PROC_NET_SUPER_MAGIC	0x706e6574
 #define QNX4_SUPER_MAGIC	0x002f		/* qnx4 fs detection */
 
 #define REISERFS_SUPER_MAGIC	0x52654973	/* used by gcc */
diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
index 700c53a..77aba2b 100644
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h
@@ -40,6 +40,7 @@ struct net {
 
 	struct proc_dir_entry 	*proc_net;
 	struct proc_dir_entry 	*proc_net_stat;
+	struct vfsmount		*proc_mnt;
 
 #ifdef CONFIG_SYSCTL
 	struct ctl_table_set	sysctls;
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index f85597a..b38a2df 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -667,7 +667,7 @@ static int selinux_set_mnt_opts(struct super_block *sb,
 		goto out;
 	}
 
-	if (strcmp(sb->s_type->name, "proc") == 0)
+	if (strncmp(sb->s_type->name, "proc", 4) == 0)
 		sbsec->proc = 1;
 
 	/* Determine the labeling behavior to use for this filesystem type. */
@@ -1116,16 +1116,18 @@ static inline u16 socket_type_to_security_class(int family, int type, int protoc
 }
 
 #ifdef CONFIG_PROC_FS
-static int selinux_proc_get_sid(struct proc_dir_entry *de,
+static int selinux_proc_get_sid(struct super_block *sb,
+				struct proc_dir_entry *de,
 				u16 tclass,
 				u32 *sid)
 {
 	int buflen, rc;
 	char *buffer, *path, *end;
 
+	rc = -ENOMEM;
 	buffer = (char *)__get_free_page(GFP_KERNEL);
 	if (!buffer)
-		return -ENOMEM;
+		goto out;
 
 	buflen = PAGE_SIZE;
 	end = buffer+buflen;
@@ -1136,19 +1138,32 @@ static int selinux_proc_get_sid(struct proc_dir_entry *de,
 	while (de && de != de->parent) {
 		buflen -= de->namelen + 1;
 		if (buflen < 0)
-			break;
+			goto out_free;
 		end -= de->namelen;
 		memcpy(end, de->name, de->namelen);
 		*--end = '/';
 		path = end;
 		de = de->parent;
 	}
+	if (strcmp(sb->type->name, "proc") != 0) {
+		const char *name = sb->type->name + 4;
+		int namelen = strlen(name);
+		buflen -= namelen;
+		if (buflen < 0)
+			goto out_free;
+		end -= namelen;
+		memcpy(end, name);
+		path = end;
+	}
 	rc = security_genfs_sid("proc", path, tclass, sid);
+out_free:
 	free_page((unsigned long)buffer);
+out:
 	return rc;
 }
 #else
-static int selinux_proc_get_sid(struct proc_dir_entry *de,
+static int selinux_proc_get_sid(struct super_block *sb,
+				struct proc_dir_entry *de,
 				u16 tclass,
 				u32 *sid)
 {
@@ -1297,7 +1312,8 @@ static int inode_doinit_with_dentry(struct inode *inode, struct dentry *opt_dent
 			struct proc_inode *proci = PROC_I(inode);
 			if (proci->pde) {
 				isec->sclass = inode_mode_to_security_class(inode->i_mode);
-				rc = selinux_proc_get_sid(proci->pde,
+				rc = selinux_proc_get_sid(inode->i_sb,
+							  proci->pde,
 							  isec->sclass,
 							  &sid);
 				if (rc)
-- 
1.5.3.rc6.17.g1911



More information about the Containers mailing list