[PATCH v2] ns: do not block exit_task_namespaces() for a long time

Myklebust, Trond Trond.Myklebust at netapp.com
Mon Jul 16 15:39:36 UTC 2012


On Mon, 2012-07-16 at 18:09 +0300, Kirill A. Shutemov wrote:
> From: "Kirill A. Shutemov" <kirill.shutemov at linux.intel.com>
> 
> On exiting of the last task in a namespace we need to trigger freeing of
> the namespace. Currently, we call synchronize_rcu() and free_nsproxy()
> directly on do_exit() path.
> 
> On my machine synchronize_rcu() blocks for about 0.01 seconds. For
> comparing: normal exit_group() syscall takes less than 0.0003 seconds.
> 
> Let's offload synchronize_rcu() and free_nsproxy() to a workqueue.
> 
> I also move synchronize_rcu() inside free_nsproxy(). It fixes racy
> put_nsproxy() which calls free_nsproxy() without synchronize_rcu().
> I guess it was missed during switch to RCU (see cf7b708).
> 
> Microbenchmark:
> 
> : #define _GNU_SOURCE
> : #include <unistd.h>
> : #include <sched.h>
> : #include <stdlib.h>
> : #include <sys/wait.h>
> :
> : int
> : main(void)
> : {
> :       int i;
> :       for (i = 0; i < 1024; i++) {
> :               if (fork()) {
> :                       wait(NULL);
> :                       continue;
> :               }
> :               unshare(CLONE_NEWIPC);
> :               exit(0);
> :       }
> :       return 0;
> : }
> 
> Before the patch:
> 
> real    0m8.335s
> user    0m0.000s
> sys     0m0.265s
> 
> After:
> 
> real    0m0.569s
> user    0m0.001s
> sys     0m0.154s
> 
> Signed-off-by: Kirill A. Shutemov <kirill.shutemov at linux.intel.com>
> Acked-by: Serge E. Hallyn <serge.hallyn at ubuntu.com>
> ---
> 
> v2:
>  - Updated description.
> 
> ---
>  include/linux/nsproxy.h |    1 +
>  kernel/nsproxy.c        |   34 +++++++++++++++++++++++-----------
>  2 files changed, 24 insertions(+), 11 deletions(-)
> 
> diff --git a/include/linux/nsproxy.h b/include/linux/nsproxy.h
> index cc37a55..1d26be7 100644
> --- a/include/linux/nsproxy.h
> +++ b/include/linux/nsproxy.h
> @@ -24,6 +24,7 @@ struct fs_struct;
>   */
>  struct nsproxy {
>  	atomic_t count;
> +	struct work_struct free_nsproxy_work;
>  	struct uts_namespace *uts_ns;
>  	struct ipc_namespace *ipc_ns;
>  	struct mnt_namespace *mnt_ns;
> diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c
> index b576f7f..ebc7d40 100644
> --- a/kernel/nsproxy.c
> +++ b/kernel/nsproxy.c
> @@ -41,13 +41,17 @@ struct nsproxy init_nsproxy = {
>  #endif
>  };
>  
> +static void free_nsproxy_work(struct work_struct *work);
> +
>  static inline struct nsproxy *create_nsproxy(void)
>  {
>  	struct nsproxy *nsproxy;
>  
>  	nsproxy = kmem_cache_alloc(nsproxy_cachep, GFP_KERNEL);
> -	if (nsproxy)
> +	if (nsproxy) {
>  		atomic_set(&nsproxy->count, 1);
> +		INIT_WORK(&nsproxy->free_nsproxy_work, free_nsproxy_work);
> +	}
>  	return nsproxy;
>  }
>  
> @@ -166,6 +170,14 @@ out:
>  
>  void free_nsproxy(struct nsproxy *ns)
>  {
> +	/*
> +	 * wait for others to get what they want from this nsproxy.
> +	 *
> +	 * cannot release this nsproxy via the call_rcu() since
> +	 * put_mnt_ns() will want to sleep
> +	 */
> +	synchronize_rcu();
> +
>  	if (ns->mnt_ns)
>  		put_mnt_ns(ns->mnt_ns);
>  	if (ns->uts_ns)
> @@ -178,6 +190,14 @@ void free_nsproxy(struct nsproxy *ns)
>  	kmem_cache_free(nsproxy_cachep, ns);
>  }
>  
> +static void free_nsproxy_work(struct work_struct *work)
> +{
> +	struct nsproxy *ns = container_of(work, struct nsproxy,
> +			free_nsproxy_work);
> +
> +	free_nsproxy(ns);
> +}
> +
>  /*
>   * Called from unshare. Unshare all the namespaces part of nsproxy.
>   * On success, returns the new nsproxy.
> @@ -215,16 +235,8 @@ void switch_task_namespaces(struct task_struct *p, struct nsproxy *new)
>  
>  	rcu_assign_pointer(p->nsproxy, new);
>  
> -	if (ns && atomic_dec_and_test(&ns->count)) {
> -		/*
> -		 * wait for others to get what they want from this nsproxy.
> -		 *
> -		 * cannot release this nsproxy via the call_rcu() since
> -		 * put_mnt_ns() will want to sleep
> -		 */
> -		synchronize_rcu();
> -		free_nsproxy(ns);
> -	}
> +	if (ns && atomic_dec_and_test(&ns->count))
> +		schedule_work(&ns->free_nsproxy_work);

What's wrong with using call_rcu()? The above will cause a workqueue
thread to block for no good reason.

>  }
>  
>  void exit_task_namespaces(struct task_struct *p)

-- 
Trond Myklebust
Linux NFS client maintainer

NetApp
Trond.Myklebust at netapp.com
www.netapp.com



More information about the Containers mailing list