[resend][PATCH v9 1/3] procfs: show hierarchy of pid namespace

Chen, Hanxiao chenhanxiao at cn.fujitsu.com
Tue Dec 30 01:17:55 UTC 2014



> -----Original Message-----
> From: containers-bounces at lists.linux-foundation.org
> [mailto:containers-bounces at lists.linux-foundation.org] On Behalf Of Chen Hanxiao
> Sent: Tuesday, December 23, 2014 6:21 PM
> To: Eric W. Biederman; Serge Hallyn; Andrew Morton; Pavel Emelyanov
> Cc: Richard Weinberger; containers at lists.linux-foundation.org;
> linux-kernel at vger.kernel.org; Oleg Nesterov; David Howells; Mateusz Guzik
> Subject: [resend][PATCH v9 1/3] procfs: show hierarchy of pid namespace
> 
> We lack of pid hierarchy information, and this will lead to:
>   a) we don't know pids' relationship, who is whose child:
>    /proc/PID/ns/pid only tell us whether two pids live in different ns
>   b) bring trouble to nested lxc container checkpoint/restore/migration
>   c) bring trouble to pid translation between containers;
> 
> This patch will show the hierarchy of pid namespace
> by pidns_hierarchy like:
> 
> <init_PID> <parent_of_init_PID> <relative PID level>
> 

Hi Eric, Pavel
 
Any comments?

Regards,
- Chen

> Ex:
> [root at localhost ~]#cat /proc/pidns_hierarchy
> 18060 1 1
> 18102 18060 2
> 1534  18102 3
> 1600  18102 3
> 1550  1 1
> *Note: numbers represent the pid 1 in different ns
> 
> It shows the pid hierarchy below:
> 
>       init_pid_ns 1
>> ┌────────────┐
> ns1                      ns2
> │                        │
> 1550                    18060
>>>                          ns3
>>                         18102
>>                  ┌──────────┐
>                  ns4                   ns5
>                  │                    │
>                 1534                  1600
> 
> Every pid printed in pidns_hierarchy
> is the init pid of that pid ns level.
> 
> Acked-by: Richard Weinberer <richard at nod.at>
> 
> Signed-off-by: Chen Hanxiao <chenhanxiao at cn.fujitsu.com>
> ---
> v9: fix codes be included if CONFIG_PID_NS=n
> v8: use max() from kernel.h
>     fix some improper comments
> v7: change stype to be consistent with current interface like
>     <init_PID> <parent_of_init_PID> <relative PID level>
>     remove EXPERT dependent in Kconfig
> v6: fix a get_pid leak and do some cleanups;
> v5: collect pid by find_ge_pid;
>     use local list inside nslist_proc_show;
>     use get_pid, remove mutex lock.
> v4: simplify pid collection and some performance optimizamtion
>     fix another race issue.
> v3: fix a race issue and memory leak issue
> v2: use a procfs text file instead of dirs under /proc
> 
>  fs/proc/Kconfig           |   6 +
>  fs/proc/Makefile          |   1 +
>  fs/proc/internal.h        |   9 ++
>  fs/proc/pidns_hierarchy.c | 280 ++++++++++++++++++++++++++++++++++++++++++++++
>  fs/proc/root.c            |   1 +
>  5 files changed, 297 insertions(+)
>  create mode 100644 fs/proc/pidns_hierarchy.c
> 
> diff --git a/fs/proc/Kconfig b/fs/proc/Kconfig
> index 2183fcf..82dda55 100644
> --- a/fs/proc/Kconfig
> +++ b/fs/proc/Kconfig
> @@ -71,3 +71,9 @@ config PROC_PAGE_MONITOR
>  	  /proc/pid/smaps, /proc/pid/clear_refs, /proc/pid/pagemap,
>  	  /proc/kpagecount, and /proc/kpageflags. Disabling these
>            interfaces will reduce the size of the kernel by approximately 4kb.
> +
> +config PROC_PID_HIERARCHY
> +	bool "Enable /proc/pidns_hierarchy support"
> +	depends on PROC_FS
> +	help
> +	  Show pid namespace hierarchy information
> diff --git a/fs/proc/Makefile b/fs/proc/Makefile
> index 7151ea4..33e384b 100644
> --- a/fs/proc/Makefile
> +++ b/fs/proc/Makefile
> @@ -30,3 +30,4 @@ proc-$(CONFIG_PROC_KCORE)	+= kcore.o
>  proc-$(CONFIG_PROC_VMCORE)	+= vmcore.o
>  proc-$(CONFIG_PRINTK)	+= kmsg.o
>  proc-$(CONFIG_PROC_PAGE_MONITOR)	+= page.o
> +proc-$(CONFIG_PROC_PID_HIERARCHY)	+= pidns_hierarchy.o
> diff --git a/fs/proc/internal.h b/fs/proc/internal.h
> index 6fcdba5..18e0773 100644
> --- a/fs/proc/internal.h
> +++ b/fs/proc/internal.h
> @@ -280,6 +280,15 @@ struct proc_maps_private {
>  #endif
>  };
> 
> +/*
> + * pidns_hierarchy.c
> + */
> +#ifdef CONFIG_PROC_PID_HIERARCHY
> +	extern void proc_pidns_hierarchy_init(void);
> +#else
> +	static inline void proc_pidns_hierarchy_init(void) {}
> +#endif
> +
>  struct mm_struct *proc_mem_open(struct inode *inode, unsigned int mode);
> 
>  extern const struct file_operations proc_pid_maps_operations;
> diff --git a/fs/proc/pidns_hierarchy.c b/fs/proc/pidns_hierarchy.c
> new file mode 100644
> index 0000000..ab1c665
> --- /dev/null
> +++ b/fs/proc/pidns_hierarchy.c
> @@ -0,0 +1,280 @@
> +#include <linux/init.h>
> +#include <linux/errno.h>
> +#include <linux/proc_fs.h>
> +#include <linux/module.h>
> +#include <linux/list.h>
> +#include <linux/slab.h>
> +#include <linux/pid_namespace.h>
> +#include <linux/seq_file.h>
> +#include <linux/kernel.h>
> +
> +/*
> + *  /proc/pidns_hierarchy
> + *
> + *  show the hierarchy of pid namespace as:
> + *  <init_PID> <parent_of_init_PID> <relative PID level>
> + *
> + *  init_PID: child reaper in ns
> + *  parent_of_init_PID: init_PID's parent, child reaper too
> + *  relative PID level: pid level relative to caller's ns
> + */
> +
> +#define NS_HIERARCHY	"pidns_hierarchy"
> +
> +/* list for host pid collection */
> +struct pidns_list {
> +	struct list_head list;
> +	struct pid *pid;
> +	unsigned int level;
> +};
> +
> +static void free_pidns_list(struct list_head *head)
> +{
> +	struct pidns_list *tmp, *pos;
> +
> +	list_for_each_entry_safe(pos, tmp, head, list) {
> +		list_del(&pos->list);
> +		put_pid(pos->pid);
> +		kfree(pos);
> +	}
> +}
> +
> +static int
> +pidns_list_add(struct pid *pid, struct list_head *list_head,
> +		int level)
> +{
> +	struct pidns_list *ent;
> +
> +	ent = kmalloc(sizeof(*ent), GFP_KERNEL);
> +	if (!ent)
> +		return -ENOMEM;
> +
> +	ent->pid = pid;
> +	ent->level = level;
> +	list_add_tail(&ent->list, list_head);
> +
> +	return 0;
> +}
> +
> +static int
> +pidns_list_filter(struct list_head *pidns_pid_list,
> +		struct list_head *pidns_pid_tree)
> +{
> +	struct pidns_list *pos, *pos_t;
> +	struct pid_namespace *ns0, *ns1;
> +	struct pid *pid0, *pid1;
> +	int rc, flag = 0;
> +
> +	/*
> +	 * screen pids with relationship
> +	 * in pidns_pid_list, we may add pids like:
> +	 * ns0   ns1   ns2
> +	 * pid1->pid2->pid3
> +	 * we should screen pid1, pid2 and keep pid3
> +	 */
> +	list_for_each_entry(pos, pidns_pid_list, list) {
> +		list_for_each_entry(pos_t, pidns_pid_list, list) {
> +			flag = 0;
> +			pid0 = pos->pid;
> +			pid1 = pos_t->pid;
> +			ns0 = pid0->numbers[pid0->level].ns;
> +			ns1 = pid1->numbers[pid1->level].ns;
> +			if (pos->pid->level < pos_t->pid->level)
> +				for (; ns1 != NULL; ns1 = ns1->parent)
> +					if (ns0 == ns1) {
> +						flag = 1;
> +						break;
> +					}
> +			/* a redundant pid found */
> +			if (flag == 1)
> +				break;
> +		}
> +
> +		if (flag == 0) {
> +			get_pid(pos->pid);
> +			rc = pidns_list_add(pos->pid, pidns_pid_tree, 0);
> +			if (rc) {
> +				put_pid(pos->pid);
> +				goto cleanup;
> +			}
> +		}
> +	}
> +
> +	/*
> +	 *  Now all useful stuffs are in pidns_pid_tree,
> +	 *  free pidns_pid_list
> +	 */
> +	free_pidns_list(pidns_pid_list);
> +
> +	return 0;
> +
> +cleanup:
> +	free_pidns_list(pidns_pid_tree);
> +	return rc;
> +}
> +
> +static void
> +pidns_list_set_level(struct list_head *pidns_list_in,
> +		struct pid_namespace *curr_ns)
> +{
> +	struct pidns_list *pos, *pos_t;
> +	struct pid *pid0, *pid1;
> +	int i;
> +
> +	/*
> +	 * From the pid hierarchy point of view,
> +	 * we already had a list of pids who are not
> +	 * the subsets of each other.
> +	 * But part of them may be same.
> +	 * We need to set the level of each pids:
> +	 * pid0:         A->B->C   pid1:       A->B->D
> +	 * level:           2                  0
> +	 * We use level to identify
> +	 * the public part of each pids.
> +	 */
> +	list_for_each_entry(pos, pidns_list_in, list) {
> +		list_for_each_entry(pos_t, pidns_list_in, list) {
> +			pid0 = pos->pid;
> +			pid1 = pos_t->pid;
> +			if (pid0 == pid1)
> +				continue;
> +			if (pos_t->level > 0)
> +				continue;
> +			for (i = curr_ns->level + 1; i <= pid0->level; i++) {
> +				/* skip the public parts */
> +				if (pid0->numbers[i].ns ==
> +						pid1->numbers[i].ns)
> +					continue;
> +				else
> +					break;
> +			}
> +			pos->level = i - 1;
> +		}
> +	}
> +}
> +
> +/*
> + * Finds all init pids, places them into
> + * pidns_pid_list and then stores the hierarchy
> + * into pidns_pid_tree.
> + */
> +static int proc_pidns_list_refresh(struct pid_namespace *curr_ns,
> +		struct list_head *pidns_pid_list,
> +		struct list_head *pidns_pid_tree)
> +{
> +	struct pid *pid;
> +	int new_nr, nr = 0;
> +	int rc;
> +
> +	/* collect pids in current namespace */
> +	while (nr < PID_MAX_LIMIT) {
> +		rcu_read_lock();
> +		pid = find_ge_pid(nr, curr_ns);
> +		if (!pid) {
> +			rcu_read_unlock();
> +			break;
> +		}
> +
> +		new_nr = pid_vnr(pid);
> +		if (!is_child_reaper(pid)) {
> +			nr = new_nr + 1;
> +			rcu_read_unlock();
> +			continue;
> +		}
> +		get_pid(pid);
> +		rcu_read_unlock();
> +		rc = pidns_list_add(pid, pidns_pid_list, 0);
> +		if (rc) {
> +			put_pid(pid);
> +			goto cleanup;
> +		}
> +		nr = new_nr + 1;
> +	}
> +
> +	/*
> +	 * Only one pid found as the child reaper,
> +	 * so current pid namespace do not have sub-namespace,
> +	 * return 0 directly.
> +	 */
> +	if (list_is_singular(pidns_pid_list)) {
> +		rc = 0;
> +		goto cleanup;
> +	}
> +
> +	/*
> +	 * screen duplicate pids from pidns_pid_list
> +	 * and form a new list pidns_pid_tree.
> +	 */
> +	rc = pidns_list_filter(pidns_pid_list, pidns_pid_tree);
> +	if (rc)
> +		goto cleanup;
> +
> +	return 0;
> +
> +cleanup:
> +	free_pidns_list(pidns_pid_list);
> +	return rc;
> +}
> +
> +static int nslist_proc_show(struct seq_file *m, void *v)
> +{
> +	struct pidns_list *pos;
> +	struct pid_namespace *ns, *curr_ns;
> +	struct pid *pid;
> +	char pid_buf[16], ppid_buf[16];
> +	int i, rc;
> +
> +	LIST_HEAD(pidns_pid_list);
> +	LIST_HEAD(pidns_pid_tree);
> +
> +	curr_ns = task_active_pid_ns(current);
> +
> +	rc = proc_pidns_list_refresh(curr_ns,
> +			&pidns_pid_list, &pidns_pid_tree);
> +	if (rc)
> +		return rc;
> +
> +	pidns_list_set_level(&pidns_pid_tree, curr_ns);
> +
> +	/* print pid namespace's hierarchy */
> +	list_for_each_entry(pos, &pidns_pid_tree, list) {
> +		pid = pos->pid;
> +		for (i = max(curr_ns->level, pos->level) + 1;
> +				i <= pid->level; i++) {
> +			ns = pid->numbers[i].ns;
> +			/* show PID '1' in specific pid ns */
> +			snprintf(pid_buf, 16, "%u",
> +				pid_vnr(find_pid_ns(1, ns)));
> +			ns = pid->numbers[i - 1].ns;
> +			snprintf(ppid_buf, 16, "%u",
> +					pid_vnr(find_pid_ns(1, ns)));
> +			seq_printf(m, "%s\t%s\t%d\n", pid_buf, ppid_buf,
> +					i - curr_ns->level);
> +		}
> +	}
> +
> +	free_pidns_list(&pidns_pid_tree);
> +
> +	return 0;
> +}
> +
> +static int nslist_proc_open(struct inode *inode, struct file *file)
> +{
> +	return single_open(file, nslist_proc_show, NULL);
> +}
> +
> +static const struct file_operations proc_nspid_nslist_fops = {
> +	.open		= nslist_proc_open,
> +	.read		= seq_read,
> +	.llseek		= seq_lseek,
> +	.release	= single_release,
> +};
> +
> +/*
> + *  Called by proc_root_init() to initialize the /proc/pidns_hierarchy
> + */
> +void __init proc_pidns_hierarchy_init(void)
> +{
> +	proc_create(NS_HIERARCHY, S_IRUGO,
> +		NULL, &proc_nspid_nslist_fops);
> +}
> diff --git a/fs/proc/root.c b/fs/proc/root.c
> index e74ac9f..bcb55c7 100644
> --- a/fs/proc/root.c
> +++ b/fs/proc/root.c
> @@ -190,6 +190,7 @@ void __init proc_root_init(void)
>  	proc_tty_init();
>  	proc_mkdir("bus", NULL);
>  	proc_sys_init();
> +	proc_pidns_hierarchy_init();
>  }
> 
>  static int proc_root_getattr(struct vfsmount *mnt, struct dentry *dentry, struct
> kstat *stat
> --
> 1.9.3
> 
> _______________________________________________
> Containers mailing list
> Containers at lists.linux-foundation.org
> https://lists.linuxfoundation.org/mailman/listinfo/containers


More information about the Containers mailing list