[PATCH] Save and restore the [compat_]robust_list member of the task struct.

Oren Laadan orenl at cs.columbia.edu
Mon Jul 13 15:59:16 PDT 2009


Pulled, thanks.

Matt Helsley wrote:
> These lists record which futexes the task holds. To keep the overhead of
> robust futexes low the list is kept in userspace. When the task exits the
> kernel carefully walks these lists to recover held futexes that
> other tasks may be attempting to acquire with FUTEX_WAIT.
> 
> Because they point to userspace memory that is saved/restored by
> checkpoint/restart saving the list pointers themselves is safe.
> 
> While saving the pointers is safe during checkpoint, restart is tricky
> because the robust futex ABI contains provisions for changes based on
> checking the size of the list head. So we need to save the length of
> the list head too in order to make sure that the kernel used during
> restart is capable of handling that ABI. Since there is only one ABI
> supported at the moment taking the list head's size is simple. Should
> the ABI change we will need to use the same size as specified during
> sys_set_robust_list() and hence some new means of determining the length
> of this userspace structure in sys_checkpoint would be required.
> 
> Rather than rewrite the logic that checks and handles the ABI we reuse
> sys_set_robust_list() by factoring out the body of the function and
> calling it during restart.
> 
> Signed-off-by: Matt Helsley <matthltc at us.ibm.com>
> ---
> v2:
> 	Move the save/restore into include/linux/futex.h as this would
> 		seem to address some of the comments from the previous
> 		posting.
> 	Make the compat __user* 32-bits since that's all we'll ever
> 		need for it.
> 	Use the compat_ptr() macros instead of brute casting when
> 		possible.
> 	Cast the regular __user* to unsigned long first.
> 	Tested on x86-32 applied to ckpt-v17-rc1
> 	NOTE: The futex tests need a fix to run.sh which utilizes
> 		pid namespaces to recreate the pids. This is necessary
> 		because robust and pi futexes store TIDs and the
> 		kernel uses these TIDs to implement robust futex
> 		support. See the patch to cr_tests that will follow..
> 
>  checkpoint/process.c           |    2 +
>  include/linux/checkpoint_hdr.h |    5 ++++
>  include/linux/compat.h         |    3 +-
>  include/linux/futex.h          |   46 ++++++++++++++++++++++++++++++++++++++++
>  kernel/futex.c                 |   19 ++++++++++------
>  kernel/futex_compat.c          |   13 ++++++++--
>  6 files changed, 77 insertions(+), 11 deletions(-)
> 
> diff --git a/checkpoint/process.c b/checkpoint/process.c
> index a93df3d..768f25e 100644
> --- a/checkpoint/process.c
> +++ b/checkpoint/process.c
> @@ -47,6 +47,7 @@ static int checkpoint_task_struct(struct ckpt_ctx *ctx, struct task_struct *t)
>  		/* FIXME: save remaining relevant task_struct fields */
>  		h->exit_signal = t->exit_signal;
>  		h->pdeath_signal = t->pdeath_signal;
> +		save_task_robust_futex_list(h, t);
>  	}
>  
>  	ret = ckpt_write_obj(ctx, &h->h);
> @@ -389,6 +390,7 @@ static int restore_task_struct(struct ckpt_ctx *ctx)
>  		/* FIXME: restore remaining relevant task_struct fields */
>  		t->exit_signal = h->exit_signal;
>  		t->pdeath_signal = h->pdeath_signal;
> +		restore_task_robust_futex_list(h);
>  	}
>  
>  	memset(t->comm, 0, TASK_COMM_LEN);
> diff --git a/include/linux/checkpoint_hdr.h b/include/linux/checkpoint_hdr.h
> index b5243e1..2813a03 100644
> --- a/include/linux/checkpoint_hdr.h
> +++ b/include/linux/checkpoint_hdr.h
> @@ -201,6 +201,11 @@ struct ckpt_hdr_task {
>  	/* would audit want to track the checkpointed ids,
>  	   or (more likely) who actually restarted? */
>  #endif
> +
> +	__u32 compat_robust_futex_head_len;
> +	__u32 compat_robust_futex_list; /* a compat __user ptr */
> +	__u32 robust_futex_head_len;
> +	__u64 robust_futex_list; /* a __user ptr */
>  } __attribute__((aligned(8)));
>  
>  /* Posix capabilities */
> diff --git a/include/linux/compat.h b/include/linux/compat.h
> index af931ee..f444cf0 100644
> --- a/include/linux/compat.h
> +++ b/include/linux/compat.h
> @@ -165,7 +165,8 @@ struct compat_robust_list_head {
>  };
>  
>  extern void compat_exit_robust_list(struct task_struct *curr);
> -
> +extern long do_compat_set_robust_list(struct compat_robust_list_head __user *head,
> +				      compat_size_t len);
>  asmlinkage long
>  compat_sys_set_robust_list(struct compat_robust_list_head __user *head,
>  			   compat_size_t len);
> diff --git a/include/linux/futex.h b/include/linux/futex.h
> index 4326f81..934cf98 100644
> --- a/include/linux/futex.h
> +++ b/include/linux/futex.h
> @@ -185,9 +185,46 @@ union futex_key {
>  #define FUTEX_KEY_INIT (union futex_key) { .both = { .ptr = NULL } }
>  
>  #ifdef CONFIG_FUTEX
> +extern long do_set_robust_list(struct robust_list_head __user *head, size_t len);
>  extern void exit_robust_list(struct task_struct *curr);
>  extern void exit_pi_state_list(struct task_struct *curr);
>  extern int futex_cmpxchg_enabled;
> +
> +#ifdef CONFIG_CHECKPOINT
> +#include <linux/checkpoint_hdr.h>
> +
> +static inline void save_task_robust_futex_list(struct ckpt_hdr_task *h,
> +					       struct task_struct *t)
> +{
> +	/*
> +	 * These are __user pointers and thus can be saved without
> +	 * the objhash.
> +	 */
> +	h->robust_futex_list = (unsigned long)t->robust_list;
> +	h->robust_futex_head_len = sizeof(*t->robust_list);
> +#ifdef CONFIG_COMPAT
> +	h->compat_robust_futex_list = ptr_to_compat(t->compat_robust_list);
> +	h->compat_robust_futex_head_len = sizeof(*t->compat_robust_list);
> +#endif
> +}
> +
> +static inline void restore_task_robust_futex_list(struct ckpt_hdr_task *h)
> +{
> +	/* Since we restore the memory map the address remains the same and
> +	 * this is safe. This is the same as [compat_]sys_set_robust_list() */
> +	if (h->robust_futex_list) {
> +		struct robust_list_head __user *rfl = (void __user *)(unsigned long)h->robust_futex_list;
> +		do_set_robust_list(rfl, h->robust_futex_head_len);
> +	}
> +#ifdef CONFIG_COMPAT
> +	if (h->compat_robust_futex_list) {
> +		struct compat_robust_list_head __user *crfl = compat_ptr(h->compat_robust_futex_list);
> +		do_compat_set_robust_list(crfl, h->compat_robust_futex_head_len);
> +	}
> +#endif
> +}
> +#endif /* CONFIG_CHECKPOINT */
> +
>  #else
>  static inline void exit_robust_list(struct task_struct *curr)
>  {
> @@ -195,6 +232,15 @@ static inline void exit_robust_list(struct task_struct *curr)
>  static inline void exit_pi_state_list(struct task_struct *curr)
>  {
>  }
> +#ifdef CONFIG_CHECKPOINT
> +static inline void save_task_robust_futex_list(struct ckpt_hdr_task *h,
> +					      struct task_struct *t)
> +{
> +}
> +static inline void restore_task_robust_futex_list(struct ckpt_hdr_task *h)
> +{
> +}
> +#endif /* CONFIG_CHECKPOINT */
>  #endif
>  #endif /* __KERNEL__ */
>  
> diff --git a/kernel/futex.c b/kernel/futex.c
> index dfe246f..57a46c9 100644
> --- a/kernel/futex.c
> +++ b/kernel/futex.c
> @@ -2261,13 +2261,7 @@ out:
>   * the list. There can only be one such pending lock.
>   */
>  
> -/**
> - * sys_set_robust_list - set the robust-futex list head of a task
> - * @head: pointer to the list-head
> - * @len: length of the list-head, as userspace expects
> - */
> -SYSCALL_DEFINE2(set_robust_list, struct robust_list_head __user *, head,
> -		size_t, len)
> +long do_set_robust_list(struct robust_list_head __user *head, size_t len)
>  {
>  	if (!futex_cmpxchg_enabled)
>  		return -ENOSYS;
> @@ -2283,6 +2277,17 @@ SYSCALL_DEFINE2(set_robust_list, struct robust_list_head __user *, head,
>  }
>  
>  /**
> + * sys_set_robust_list - set the robust-futex list head of a task
> + * @head: pointer to the list-head
> + * @len: length of the list-head, as userspace expects
> + */
> +SYSCALL_DEFINE2(set_robust_list, struct robust_list_head __user *, head,
> +		size_t, len)
> +{
> +	return do_set_robust_list(head, len);
> +}
> +
> +/**
>   * sys_get_robust_list - get the robust-futex list head of a task
>   * @pid: pid of the process [zero for current task]
>   * @head_ptr: pointer to a list-head pointer, the kernel fills it in
> diff --git a/kernel/futex_compat.c b/kernel/futex_compat.c
> index d607a5b..eac734c 100644
> --- a/kernel/futex_compat.c
> +++ b/kernel/futex_compat.c
> @@ -114,9 +114,9 @@ void compat_exit_robust_list(struct task_struct *curr)
>  	}
>  }
>  
> -asmlinkage long
> -compat_sys_set_robust_list(struct compat_robust_list_head __user *head,
> -			   compat_size_t len)
> +long
> +do_compat_set_robust_list(struct compat_robust_list_head __user *head,
> +			  compat_size_t len)
>  {
>  	if (!futex_cmpxchg_enabled)
>  		return -ENOSYS;
> @@ -130,6 +130,13 @@ compat_sys_set_robust_list(struct compat_robust_list_head __user *head,
>  }
>  
>  asmlinkage long
> +compat_sys_set_robust_list(struct compat_robust_list_head __user *head,
> +			   compat_size_t len)
> +{
> +	return do_compat_set_robust_list(head, len);
> +}
> +
> +asmlinkage long
>  compat_sys_get_robust_list(int pid, compat_uptr_t __user *head_ptr,
>  			   compat_size_t __user *len_ptr)
>  {


More information about the Containers mailing list