[RFC v14-rc][PATCH 17/23] Record 'struct file' object instead of the file name for VMAs

Oren Laadan orenl at cs.columbia.edu
Fri Mar 20 11:47:42 PDT 2009


The vma->vm_file can be an arbitrary file pointer, including one that
is in use by a process as well and provided originally via the mmap()
syscall.

Thus, when dumping the state of a VMA, save a file object instead
of only the file name. As with other file objects, if it's seen for
the first time it is dumped entirely, otherwise only the 'objref' is
saved. The restart logic updated accordingly.

Changelog[v14]:
  - Introduce patch

Signed-off-by: Oren Laadan <orenl at cs.columbia.edu>
---
 checkpoint/checkpoint.c        |    5 +++
 checkpoint/ckpt_file.c         |    2 +-
 checkpoint/ckpt_mem.c          |   49 ++++++++++++++++---------
 checkpoint/rstr_file.c         |    2 +-
 checkpoint/rstr_mem.c          |   79 +++++++++++++++++++++++++++-------------
 include/linux/checkpoint.h     |    2 +
 include/linux/checkpoint_hdr.h |    2 +-
 7 files changed, 94 insertions(+), 47 deletions(-)

diff --git a/checkpoint/checkpoint.c b/checkpoint/checkpoint.c
index 89d57ad..1bd2dce 100644
--- a/checkpoint/checkpoint.c
+++ b/checkpoint/checkpoint.c
@@ -118,6 +118,11 @@ int cr_write_fname(struct cr_ctx *ctx, struct path *path, struct path *root)
 	char *buf, *fname;
 	int ret, flen;
 
+	/*
+	 * FIXME: we can optimize and save memory (and storage) if we
+	 * share strings (through objhash) and reference them instead
+	 */
+
 	flen = PATH_MAX;
 	buf = kmalloc(flen, GFP_KERNEL);
 	if (!buf)
diff --git a/checkpoint/ckpt_file.c b/checkpoint/ckpt_file.c
index 7444402..2a83b95 100644
--- a/checkpoint/ckpt_file.c
+++ b/checkpoint/ckpt_file.c
@@ -201,7 +201,7 @@ cr_inode_to_objref(struct cr_ctx *ctx, struct inode *inode, int type, int *new)
 }
 
 /* cr_write_file - dump the state of a given file pointer */
-static int cr_write_file(struct cr_ctx *ctx, struct file *file)
+int cr_write_file(struct cr_ctx *ctx, struct file *file)
 {
 	struct cr_hdr h;
 	struct cr_hdr_file *hh;
diff --git a/checkpoint/ckpt_mem.c b/checkpoint/ckpt_mem.c
index e479b2f..6975c87 100644
--- a/checkpoint/ckpt_mem.c
+++ b/checkpoint/ckpt_mem.c
@@ -440,7 +440,10 @@ static int cr_write_vma(struct cr_ctx *ctx, struct vm_area_struct *vma)
 {
 	struct cr_hdr h;
 	struct cr_hdr_vma *hh;
-	int vma_type, ret;
+	int vma_type;
+	int objref = 0;
+	int new = 0;
+	int ret;
 
 	h.type = CR_HDR_VMA;
 	h.len = sizeof(*hh);
@@ -460,36 +463,46 @@ static int cr_write_vma(struct cr_ctx *ctx, struct vm_area_struct *vma)
 
 	if (vma->vm_flags & CR_BAD_VM_FLAGS) {
 		pr_warning("c/r: unsupported VMA %#lx\n", vma->vm_flags);
-		cr_hbuf_put(ctx, sizeof(*hh));
-		return -ENOSYS;
+		ret = -ENOSYS;
+		goto out;
 	}
 
-	/* by default assume anon memory */
-	vma_type = CR_VMA_ANON;
+	vma_type = CR_VMA_ANON;  /* by default assume anon memory */
 
-	/*
-	 * if there is a backing file, assume private-mapped
-	 * (FIXME: check if the file is unlinked)
-	 */
 	if (vma->vm_file)
-		vma_type = CR_VMA_FILE;
+		vma_type = CR_VMA_FILE;		/* assume private-mapped */
+
+	/* if file-backed, add 'file' to the hash (will keep a reference) */
+	if (vma->vm_file) {
+		new = cr_obj_add_ptr(ctx, vma->vm_file,
+				     &objref, CR_OBJ_FILE, 0);
+		cr_debug("vma %p objref %d file %p)\n",
+			 vma, objref, vma->vm_file);
+		if (new < 0) {
+			ret  = new;
+			goto out;
+		}
+	}
 
 	hh->vma_type = vma_type;
+	hh->vma_objref = objref;
 
 	ret = cr_write_obj(ctx, &h, hh);
-	cr_hbuf_put(ctx, sizeof(*hh));
 	if (ret < 0)
-		return ret;
+		goto out;
 
-	/* save the file name */
-	/* FIXME: files should be deposited and sought in the objhash */
-	if (vma->vm_file) {
-		ret = cr_write_fname(ctx, &vma->vm_file->f_path, &ctx->fs_mnt);
+	/* new==1 if-and-only-if file was newly added to hash */
+	if (new) {
+		ret = cr_write_file(ctx, vma->vm_file);
 		if (ret < 0)
-			return ret;
+			goto out;
 	}
 
-	return cr_write_private_vma_contents(ctx, vma);
+	ret = cr_write_private_vma_contents(ctx, vma);
+
+ out:
+	cr_hbuf_put(ctx, sizeof(*hh));
+	return ret;
 }
 
 int cr_write_mm(struct cr_ctx *ctx, struct task_struct *t)
diff --git a/checkpoint/rstr_file.c b/checkpoint/rstr_file.c
index 2cbc308..5d2f84c 100644
--- a/checkpoint/rstr_file.c
+++ b/checkpoint/rstr_file.c
@@ -232,7 +232,7 @@ static int cr_read_fd_file(struct cr_ctx *ctx, struct cr_hdr_file *hh)
 #define CR_SETFL_MASK (O_APPEND|O_NONBLOCK|O_NDELAY|FASYNC|O_DIRECT|O_NOATIME)
 
 /* cr_read_file - restore the state of a given file pointer */
-static int cr_read_file(struct cr_ctx *ctx, int objref)
+int cr_read_file(struct cr_ctx *ctx, int objref)
 {
 	struct cr_hdr_file *hh;
 	struct file *file;
diff --git a/checkpoint/rstr_mem.c b/checkpoint/rstr_mem.c
index 50309b6..5f46f11 100644
--- a/checkpoint/rstr_mem.c
+++ b/checkpoint/rstr_mem.c
@@ -18,6 +18,7 @@
 #include <linux/mman.h>
 #include <linux/mm.h>
 #include <linux/err.h>
+#include <linux/syscalls.h>
 #include <linux/checkpoint.h>
 #include <linux/checkpoint_hdr.h>
 
@@ -204,6 +205,40 @@ static unsigned long cr_calc_map_flags_bits(unsigned long orig_vm_flags)
 	return vm_flags;
 }
 
+/*
+ * cr_vma_read_file - prepare a file object required for a vma
+ * @ctx - restart context
+ * @objref - objref of file object
+ *
+ * If the file object is found, will grab a reference to the pointer
+ * that the caller will need release.
+ */
+static struct file *cr_vma_read_file(struct cr_ctx *ctx, int objref)
+{
+	struct file *file;
+	int fd;
+
+	file = cr_obj_get_by_ref(ctx, objref, CR_OBJ_FILE);
+	if (IS_ERR(file))
+		return file;
+
+	/* if object found in objhash - use it */
+	if (file) {
+		get_file(file);
+		return file;
+	}
+
+	/* get (or construct) the respective file object */
+	fd = cr_read_file(ctx, objref);
+	if (fd < 0)
+		return ERR_PTR(fd);
+
+	file = fget(fd);
+	sys_close(fd);
+
+	return file;
+}
+
 static int cr_read_vma(struct cr_ctx *ctx, struct mm_struct *mm)
 {
 	struct cr_hdr_vma *hh;
@@ -218,14 +253,16 @@ static int cr_read_vma(struct cr_ctx *ctx, struct mm_struct *mm)
 		return -ENOMEM;
 	ret = cr_read_obj_type(ctx, hh, sizeof(*hh), CR_HDR_VMA);
 	if (ret < 0)
-		goto err;
+		goto out;
 
 	cr_debug("vma %#lx-%#lx type %d\n", (unsigned long) hh->vm_start,
 		 (unsigned long) hh->vm_end, (int) hh->vma_type);
 
 	ret = -EINVAL;
 	if (hh->vm_end < hh->vm_start)
-		goto err;
+		goto out;
+	if (hh->vma_objref <= 0)
+		goto out;
 
 	vm_start = hh->vm_start;
 	vm_pgoff = hh->vm_pgoff;
@@ -234,13 +271,11 @@ static int cr_read_vma(struct cr_ctx *ctx, struct mm_struct *mm)
 	vm_flags = cr_calc_map_flags_bits(hh->vm_flags);
 	vma_type = hh->vma_type;
 
-	cr_hbuf_put(ctx, sizeof(*hh));
-
 	switch (vma_type) {
 
 	case CR_VMA_ANON:		/* anonymous private mapping */
 		if (vm_flags & VM_SHARED)
-			goto err;
+			goto out;
 		/*
 		 * vm_pgoff for anonymous mapping is the "global" page
 		 * offset (namely from addr 0x0), so we force a zero
@@ -250,23 +285,20 @@ static int cr_read_vma(struct cr_ctx *ctx, struct mm_struct *mm)
 
 	case CR_VMA_FILE:		/* private mapping from a file */
 		if (vm_flags & VM_SHARED)
-			goto err;
-		/*
-		 * for private mapping using 'read-only' is sufficient
-		 */
-		file = cr_read_open_fname(ctx, O_RDONLY, 0);
+			goto out;
+		file = cr_vma_read_file(ctx, hh->vma_objref);
 		if (IS_ERR(file)) {
 			ret = PTR_ERR(file);
-			goto err;
+			file = NULL;
+			goto out;
 		}
 		break;
 
 	default:
-		goto err;
+		goto out;
 
 	}
 
-
 	down_write(&mm->mmap_sem);
 	addr = do_mmap_pgoff(file, vm_start, vm_size,
 			     vm_prot, vm_flags, vm_pgoff);
@@ -274,12 +306,10 @@ static int cr_read_vma(struct cr_ctx *ctx, struct mm_struct *mm)
 	cr_debug("size %#lx prot %#lx flag %#lx pgoff %#lx => %#lx\n",
 		 vm_size, vm_prot, vm_flags, vm_pgoff, addr);
 
-	/* the file (if opened) is now referenced by the vma */
-	if (file)
-		filp_close(file, NULL);
-
-	if (IS_ERR((void *) addr))
-		return PTR_ERR((void *) addr);
+	if (IS_ERR((void *) addr)) {
+		ret = PTR_ERR((void *) addr);
+		goto out;
+	}
 
 	/*
 	 * CR_VMA_ANON: read in memory as is
@@ -295,14 +325,11 @@ static int cr_read_vma(struct cr_ctx *ctx, struct mm_struct *mm)
 		break;
 	}
 
-	if (ret < 0)
-		return ret;
-
-	cr_debug("vma retval %d\n", ret);
-	return 0;
-
- err:
+ out:
+	if (file)
+		fput(file);
 	cr_hbuf_put(ctx, sizeof(*hh));
+	cr_debug("vma retval %d\n", ret);
 	return ret;
 }
 
diff --git a/include/linux/checkpoint.h b/include/linux/checkpoint.h
index 3be3902..69d14c4 100644
--- a/include/linux/checkpoint.h
+++ b/include/linux/checkpoint.h
@@ -110,10 +110,12 @@ extern struct file *cr_read_open_fname(struct cr_ctx *ctx,
 extern int do_checkpoint(struct cr_ctx *ctx, pid_t pid);
 extern int cr_write_mm(struct cr_ctx *ctx, struct task_struct *t);
 extern int cr_write_fd_table(struct cr_ctx *ctx, struct task_struct *t);
+extern int cr_write_file(struct cr_ctx *ctx, struct file *file);
 
 extern int do_restart(struct cr_ctx *ctx, pid_t pid);
 extern int cr_read_mm(struct cr_ctx *ctx);
 extern int cr_read_fd_table(struct cr_ctx *ctx);
+extern int cr_read_file(struct cr_ctx *ctx, int objref);
 
 #define cr_debug(fmt, args...)  \
 	pr_debug("[%d:c/r:%s] " fmt, task_pid_vnr(current), __func__, ## args)
diff --git a/include/linux/checkpoint_hdr.h b/include/linux/checkpoint_hdr.h
index 68c1f6b..bc054f2 100644
--- a/include/linux/checkpoint_hdr.h
+++ b/include/linux/checkpoint_hdr.h
@@ -123,7 +123,7 @@ enum vm_type {
 
 struct cr_hdr_vma {
 	__u32 vma_type;
-	__u32 _padding;
+	__u32 vma_objref;	/* for vma->vm_file */
 
 	__u64 vm_start;
 	__u64 vm_end;
-- 
1.5.4.3



More information about the Containers mailing list