[PATCH 06/17][cr][v4]: Checkpoint file-locks

Oren Laadan orenl at cs.columbia.edu
Thu Sep 16 17:03:06 PDT 2010



On 08/16/2010 03:43 PM, Sukadev Bhattiprolu wrote:
> While checkpointing each file-descriptor, find all the locks on the
> file and save information about the lock in the checkpoint-image.
> A follow-on patch will use this informaiton to restore the file-locks.
> 
> Changelog[v4]:
> 	[Oren Laadan]: For consistency with other such objects, replace
> 	the "marker lock" checkpoint with a checkpoint of a count of the
> 	file-locks before the first file-lock of each file.
> 
> Changelog[v3]:
> 	[Oren Laadan] Add a missing (loff_t) type cast and use a macro
> 		to set the marker/dummy file lock
> 
> Changelog[v2]:
> 	[Matt Helsley]: Use fixed sizes (__s64) instead of 'loff_t' in
> 		'struct ckpt_hdr_file_lock'.
> 	[Matt Helsley, Serge Hallyn]: Highlight new use of BKL (using
> 		lock_flocks() macros as suggested by Serge).
> 	[Matt Helsley]: Reorg code a bit to simplify error handling.
> 	[Matt Helsley]: Reorg code to initialize marker-lock (Pass a
> 		NULL lock to checkpoint_one_lock() to indicate marker).
> 
> Signed-off-by: Sukadev Bhattiprolu <sukadev at linux.vnet.ibm.com>
> ---
>  fs/checkpoint.c                |  142 ++++++++++++++++++++++++++++++++++++----
>  include/linux/checkpoint_hdr.h |   17 +++++
>  2 files changed, 146 insertions(+), 13 deletions(-)
> 
> diff --git a/fs/checkpoint.c b/fs/checkpoint.c
> index be9d39a..47aa802 100644
> --- a/fs/checkpoint.c
> +++ b/fs/checkpoint.c
> @@ -26,8 +26,19 @@
>  #include <linux/checkpoint.h>
>  #include <linux/eventpoll.h>
>  #include <linux/eventfd.h>
> +#include <linux/smp_lock.h>
>  #include <net/sock.h>
>  
> +/*
> + * TODO: This code uses the BKL for consistency with other uses of
> + * 	 'for_each_lock()'. But since the BKL may be replaced with another
> + * 	 lock in the future, use lock_flocks() macros instead. lock_flocks()
> + * 	 are currently used in BKL-fix sand boxes and when those changes
> + * 	 are merged, the following macros can be removed
> + */
> +#define lock_flocks()		lock_kernel()
> +#define unlock_flocks()	unlock_kernel()
> +
>  /**************************************************************************
>   * Checkpoint
>   */
> @@ -256,8 +267,120 @@ static int checkpoint_file(struct ckpt_ctx *ctx, void *ptr)
>  	return ret;
>  }
>  
> +static int checkpoint_one_file_lock(struct ckpt_ctx *ctx, 
> +		struct file_lock *lock)
> +{
> +	int rc;
> +	struct ckpt_hdr_file_lock *h;
> +
> +	if (!IS_POSIX(lock)) {
> +		/* Hmm, we should have caught this while counting locks */
> +		return -EBADF;
> +	}
> +
> +	h = ckpt_hdr_get_type(ctx, sizeof(*h), CKPT_HDR_FILE_LOCK);
> +	if (!h)
> +		return -ENOMEM;
> +
> +	h->fl_start = lock->fl_start;
> +	h->fl_end = lock->fl_end;
> +	h->fl_type = lock->fl_type;
> +	h->fl_flags = lock->fl_flags;
> +
> +	rc = ckpt_write_obj(ctx, &h->h);
> +
> +	ckpt_hdr_put(ctx, h);
> +
> +	return rc;
> +}
> +
> +static int checkpoint_file_lock_count(struct ckpt_ctx *ctx, int num_locks)
> +{
> +	int rc;
> +	struct ckpt_hdr_file_lock_count *h;
> +
> +	h = ckpt_hdr_get_type(ctx, sizeof(*h), CKPT_HDR_FILE_LOCK_COUNT);
> +	if (!h)
> +		return -ENOMEM;
> +
> +	h->nr_locks = num_locks;
> +
> +	rc = ckpt_write_obj(ctx, &h->h);
> +
> +	ckpt_hdr_put(ctx, h);
> +
> +	return rc;
> +}
> +
> +int
> +checkpoint_file_locks(struct ckpt_ctx *ctx, struct files_struct *files,
> +		struct file *file)
> +{
> +	int n;
> +	int rc;
> +	struct inode *inode;
> +	struct file_lock **lockpp;
> +	struct file_lock *lockp;
> +
> +	lock_flocks();
> +
> +	/*
> +	 * First count the number of file-locks on this file
> +	 */
> +	n = 0;
> +	rc = -EBADF;
> +	inode = file->f_path.dentry->d_inode;
> +	for_each_lock(inode, lockpp) {
> +		lockp = *lockpp;
> +		if (lockp->fl_owner != files)
> +			continue;
> +
> +		ckpt_debug("Lock [%lld, %lld, %d, 0x%x]\n", lockp->fl_start,
> +				lockp->fl_end, lockp->fl_type, lockp->fl_flags);
> +
> +		if (lockp->fl_owner != files)
> +			continue;
> +
> +		if (IS_POSIX(lockp))
> +			n++;
> +		else {
> +			ckpt_err(ctx,  rc, "%(T), checkpoint of lock "
> +					"[%lld, %lld, %d, 0x%x] failed\n",
> +					lockp->fl_start, lockp->fl_end,
> +					lockp->fl_type, lockp->fl_flags);
> +			goto out;
> +		}
> +	}
> +
> +	/*
> +	 * Checkpoint the count of file-locks
> +	 */
> +	rc = checkpoint_file_lock_count(ctx, n);
> +	if (rc < 0) {
> +		ckpt_err(ctx,  rc, "%(T), checkpoint file-lock count failed\n");
> +		goto out;
> +	}
> +
> +	/*
> +	 * Make a second pass and checkpoint file-locks themselves.
> +	 */
> +	for_each_lock(inode, lockpp) {
> +		lockp = *lockpp;
> +		if (lockp->fl_owner != files)
> +			continue;
> +
> +		rc = checkpoint_one_file_lock(ctx, lockp);
> +		if (rc < 0)
> +			goto out;
> +	}
> +
> +out:
> +	unlock_flocks();
> +	return rc;
> +}
> +
>  /**
> - * ckpt_write_file_desc - dump the state of a given file descriptor
> + * checkpoint_file_desc - dump the state of a given file descriptor
>   * @ctx: checkpoint context
>   * @files: files_struct pointer
>   * @fd: file descriptor
> @@ -288,18 +411,6 @@ static int checkpoint_file_desc(struct ckpt_ctx *ctx,
>  	}
>  	rcu_read_unlock();
>  
> -	ret = find_locks_with_owner(file, files);
> -	/*
> -	 * find_locks_with_owner() returns an error when there
> -	 * are no locks found, so we *want* it to return an error
> -	 * code.  Its success means we have to fail the checkpoint.
> -	 */
> -	if (!ret) {
> -		ret = -EBADF;
> -		ckpt_err(ctx, ret, "%(T)fd %d has file lock or lease\n", fd);
> -		goto out;
> -	}
> -
>  	/* sanity check (although this shouldn't happen) */
>  	ret = -EBADF;
>  	if (!file) {
> @@ -323,6 +434,11 @@ static int checkpoint_file_desc(struct ckpt_ctx *ctx,
>  	h->fd_close_on_exec = coe;
>  
>  	ret = ckpt_write_obj(ctx, &h->h);
> +	if (ret < 0)
> +		goto out;
> +
> +	ret = checkpoint_file_locks(ctx, files, file);
> +
>  out:
>  	ckpt_hdr_put(ctx, h);
>  	if (file)
> diff --git a/include/linux/checkpoint_hdr.h b/include/linux/checkpoint_hdr.h
> index 0381019..14b287f 100644
> --- a/include/linux/checkpoint_hdr.h
> +++ b/include/linux/checkpoint_hdr.h
> @@ -144,6 +144,10 @@ enum {
>  #define CKPT_HDR_TTY_LDISC CKPT_HDR_TTY_LDISC
>  	CKPT_HDR_EPOLL_ITEMS,  /* must be after file-table */
>  #define CKPT_HDR_EPOLL_ITEMS CKPT_HDR_EPOLL_ITEMS
> +	CKPT_HDR_FILE_LOCK_COUNT,
> +#define CKPT_HDR_FILE_LOCK_COUNT CKPT_HDR_FILE_LOCK_COUNT
> +	CKPT_HDR_FILE_LOCK,
> +#define CKPT_HDR_FILE_LOCK CKPT_HDR_FILE_LOCK
>  
>  	CKPT_HDR_MM = 401,
>  #define CKPT_HDR_MM CKPT_HDR_MM
> @@ -586,6 +590,19 @@ struct ckpt_hdr_file_generic {
>  	struct ckpt_hdr_file common;
>  } __attribute__((aligned(8)));
>  
> +struct ckpt_hdr_file_lock_count {
> +       struct ckpt_hdr h;
> +       __u32 nr_locks;
> +};
> +
> +struct ckpt_hdr_file_lock {
> +       struct ckpt_hdr h;
> +       __s64 fl_start;
> +       __s64 fl_end;
> +       __u8 fl_type;
> +       __u8 fl_flags;
> +};
> +

You forgot to add "__attribute__((aligned(8)));".

[maybe picking.. but-] Giving a "whole" object per lock seems
wasteful; I was hoping for something similar to how we do the
'struct ckpt_hdr_sigpending', e.g.

struct ckpt_file_lock {
	__s64 fl_start;
	__s64 fl_end;
	__u8 fl_type;
	__u8 fl_flags;
} __attribute__((aligned(8)));

struct ckpt_hdr_file_locks {
	struct ckpt_hdr h;
	__u32 nr_locks;
	struct ckpt_file_lock locks[0];
} __attribute__((aligned(8)));

You could use ckpt_write_obj_type() with null pointer, and then
follow with the payload in chunks in the loop in checkpoint_file_locks()

Makes sense ?

Oren.


More information about the Containers mailing list