[PATCH 3/3] restart: correctly handle pgid/ppid/sid = 0

Oren Laadan orenl at librato.com
Fri Sep 4 07:20:54 PDT 2009


If pgid, or sid, or ppid is zero, it means that they point to a task
from the ancestor pid-ns. Make sure 'mktree' handles these correctly.

If a 0 pid (pgid/sid/ppid) exists, then force the --pidns option to be
on - to ensure that we have a new pid-ns (and thus a parent pid-ns).

Ignore pid = 0 when setting up new tasks in ckpt_setup_task().

Improve sanity checks for pid 0 and tgid 0 (both are impossible).

Allow ppid = 0 only for root_task. For others, allow sid = 0.

Introduce dummy zero_task that represents the parent of root_task,
likely from ancestor pid-ns. Setting a creator to root_task simplifies
the logic.

Also, ensure that a the tgid of threads is valid (has an entry in the
pids array) even if the thread group leader died.

Signed-off-by: Oren Laadan <orenl at cs.columbia.edu>
---
 restart.c |  126 +++++++++++++++++++++++++++++++++++++++++++------------------
 1 files changed, 89 insertions(+), 37 deletions(-)

diff --git a/restart.c b/restart.c
index a7f9d22..3128d4e 100644
--- a/restart.c
+++ b/restart.c
@@ -63,8 +63,8 @@ static char usage_str[] =
  * the time of the checkpoint may be already in use then. Therefore,
  * by default, 'mktree' creates an equivalen tree without restoring
  * the original pids, assuming that the application can tolerate this.
- * For this, the 'ckpt_hdr_pids' array is transformed on-the-fly
- * before it is fed to the kernel.
+ * For this, the 'ckpt_pids' array is transformed on-the-fly before it
+ * is fed to the kernel.
  *
  * By default, "--pids" implied "--pidns" and vice-versa. The user can
  * use "--pids --no-pidns" for a restart in the currnet namespace -
@@ -144,6 +144,9 @@ struct task {
 	pid_t real_parent;	/* pid of task's real parent */
 };
 
+/* zero_task represents creator of root_task (all pids 0) */
+struct task zero_task;
+
 #define TASK_ROOT	0x1	/* root task */
 #define TASK_GHOST	0x2	/* dead task (pid used as sid/pgid) */
 #define TASK_THREAD	0x4	/* thread (non leader) */
@@ -153,7 +156,7 @@ struct task {
 #define TASK_DEAD	0x40	/* dead task (dummy) */
 
 struct ckpt_ctx {
-	pid_t init_pid;
+	pid_t root_pid;
 	int pipe_in;
 	int pipe_out;
 	int pids_nr;
@@ -196,6 +199,7 @@ int global_sent_sigint;
 
 static int ckpt_build_tree(struct ckpt_ctx *ctx);
 static int ckpt_init_tree(struct ckpt_ctx *ctx);
+static int ckpt_need_pidns(struct ckpt_ctx *ctx);
 static int ckpt_set_creator(struct ckpt_ctx *ctx, struct task *task);
 static int ckpt_placeholder_task(struct ckpt_ctx *ctx, struct task *task);
 static int ckpt_propagate_session(struct ckpt_ctx *ctx, struct task *session);
@@ -545,6 +549,16 @@ int main(int argc, char *argv[])
 	if (ret < 0)
 		exit(1);
 
+	/*
+	 * For a pgid/sid == 0, the corresponding restarting task will
+	 * expect to reference the parent pid-ns (of entire restart).
+	 * We ensure that one does exist by setting ctx.args->pidns.
+	 */
+	if (!ctx.args->pidns && ckpt_need_pidns(&ctx)) {
+		ckpt_dbg("found pgid/sid 0, need pidns\n");
+		ctx.args->pidns = 1;
+	}
+
 	if (ctx.args->pidns && ctx.tasks_arr[0].pid != 1) {
 		ckpt_dbg("new pidns without init\n");
 		if (global_send_sigint == -1)
@@ -821,8 +835,6 @@ static int ckpt_build_tree(struct ckpt_ctx *ctx)
 	/* assign a creator to each task */
 	for (i = 0; i < ctx->tasks_nr; i++) {
 		task = &ctx->tasks_arr[i];
-		if (task == ckpt_init_task(ctx))
-			continue;
 		if (task->creator)
 			continue;
 		if (ckpt_set_creator(ctx, task) < 0) {
@@ -837,7 +849,7 @@ static int ckpt_build_tree(struct ckpt_ctx *ctx)
 		task = &ctx->tasks_arr[i];
 		ckpt_dbg("[%d] pid %d ppid %d sid %d creator %d",
 			 i, task->pid, task->ppid, task->sid,
-		       task->creator ? task->creator->pid : 0);
+			 task->creator->pid);
 		if (task->next_sib)
 			ckpt_dbg_cont(" next %d", task->next_sib->pid);
 		if (task->prev_sib)
@@ -862,7 +874,10 @@ static int ckpt_setup_task(struct ckpt_ctx *ctx, pid_t pid, pid_t ppid)
 {
 	struct task *task;
 
-	if (hash_lookup(ctx, pid))
+	if (pid == 0)  /* ignore if outside namespace */
+		return 0;
+
+	if (hash_lookup(ctx, pid))  /* already handled */
 		return 0;
 
 	task = &ctx->tasks_arr[ctx->tasks_nr++];
@@ -896,7 +911,7 @@ static int ckpt_setup_task(struct ckpt_ctx *ctx, pid_t pid, pid_t ppid)
 
 static int ckpt_init_tree(struct ckpt_ctx *ctx)
 {
-	struct ckpt_hdr_pids *pids_arr = ctx->pids_arr;
+	struct ckpt_pids *pids_arr = ctx->pids_arr;
 	int pids_nr = ctx->pids_nr;
 	struct task *task;
 	pid_t root_sid;
@@ -908,16 +923,6 @@ static int ckpt_init_tree(struct ckpt_ctx *ctx)
 	root_sid = pids_arr[0].vsid;
 	root_pgid = pids_arr[0].vpgid;
 
-	/* XXX for out-of-container subtrees */
-	for (i = 0; i < pids_nr; i++) {
-		if (pids_arr[i].vsid == root_sid)
-			pids_arr[i].vsid = root_pid;
-		if (pids_arr[i].vpgid == root_sid)
-			pids_arr[i].vpgid = root_pid;
-		if (pids_arr[i].vpgid == root_pgid)
-			pids_arr[i].vpgid = root_pid;
-	}
-
 	/* populate with known tasks */
 	for (i = 0; i < pids_nr; i++) {
 		task = &ctx->tasks_arr[i];
@@ -938,6 +943,14 @@ static int ckpt_init_tree(struct ckpt_ctx *ctx)
 		task->rpid = -1;
 		task->ctx = ctx;
 
+		if (task->pid == 0) {
+			ckpt_err("Invalid pid 0 for task#%d\n", i);
+			return -1;
+		} else if (task->tgid == 0) {
+			ckpt_err("Invalid tgid 0 for task#%d\n", i);
+			return -1;
+		}
+
 		if (hash_insert(ctx, task->pid, task) < 0)
 			return -1;
 	}
@@ -946,31 +959,60 @@ static int ckpt_init_tree(struct ckpt_ctx *ctx)
 
 	/* add pids unaccounted for (no tasks) */
 	for (i = 0; i < pids_nr; i++) {
-		/* session leader's parent is root task */
-		if (ckpt_setup_task(ctx, pids_arr->vsid, root_pid) < 0)
+		pid_t sid;
+
+		sid = pids_arr[i].vsid;
+
+		/*
+		 * An unaccounted-for sid belongs to a task that was a
+		 * session leader and died. We can safe set its parent
+		 * (and creator) to be the root task.
+		 */
+		if (ckpt_setup_task(ctx, sid, root_pid) < 0)
 			return -1;
 
 		/*
-		 * If pgrp != sid, pgrp owner's parent is sid. Other
-		 * tasks with same pgrp will need to have threir sid
-		 * matching, too, when the kernel restores their pgrp.
-		 * If pgrp == sid, then the call above would have
-		 * ensured that the pid is hashed: ckpt_setup_task()
-		 * will return promptly.
+		 * If a pid belongs to a dead thread group leader, we
+		 * need to add it with the same sid as current (and
+		 * other) threads.
 		 */
-		if (ckpt_setup_task(ctx, pids_arr->vpgid, pids_arr->vsid) < 0)
+		if (ckpt_setup_task(ctx, pids_arr[i].vtgid, sid) < 0)
 			return -1;
 
-		pids_arr++;
+		/*
+		 * If pgrp == sid, then the pgrp/sid will already have
+		 * been hashed by now (e.g. by the call above) and the
+		 * ckpt_setup_task() will return promptly.
+		 * If pgrp != sid, then the pgrp 'owner' must have the
+		 * same sid as us: all tasks with same pgrp must have
+		 * their sid matching.
+		 */
+		if (ckpt_setup_task(ctx, pids_arr[i].vpgid, sid) < 0)
+			return -1;
 	}
 
-	/* mark root task(s) */
-	ctx->tasks_arr[0].flags |= TASK_ROOT;
+	/* mark root task(s), and set its "creator" to be zero_task */
+	ckpt_init_task(ctx)->flags |= TASK_ROOT;
+	ckpt_init_task(ctx)->creator = &zero_task;
 
 	ckpt_dbg("total tasks (including ghosts): %d\n", ctx->tasks_nr);
 	return 0;
 }
 
+static int ckpt_need_pidns(struct ckpt_ctx *ctx)
+{
+	int i;
+
+	for (i = 0; i < ctx->pids_nr; i++) {
+		if (ctx->pids_arr[i].vpid == 0 ||
+		    ctx->pids_arr[i].vpgid == 0 ||
+		    ctx->pids_arr[i].vsid == 0)
+			return 1;
+	}
+
+	return 0;
+}
+
 /*
  * Algorithm DumpForest
  * "Transparent Checkpoint/Restart of Multiple Processes on Commodity
@@ -1043,10 +1085,20 @@ static int ckpt_set_creator(struct ckpt_ctx *ctx, struct task *task)
 	struct task *creator;
 
 	if (task == ckpt_init_task(ctx)) {
-		ckpt_err("pid %d: no init creator\n", ckpt_init_task(ctx)->pid);
+		ckpt_err("pid %d: logical error\n", ckpt_init_task(ctx)->pid);
 		return -1;
 	}
 
+	/* only root_task can have ppid == 0, parent must always exist */
+	if (task->ppid == 0 || !parent) {
+		ckpt_err("pid %d: invalid ppid %d\n", task->pid, task->ppid);
+		return -1;
+	}
+
+	/* sid == 0 must have been inherited from outside the container */
+	if (task->sid == 0)
+		session = ckpt_init_task(ctx);
+
 	if (task->tgid != task->pid) {
 		/* thread: creator is thread-group-leader */
 		ckpt_dbg("pid %d: thread tgid %d\n", task->pid, task->tgid);
@@ -1073,7 +1125,7 @@ static int ckpt_set_creator(struct ckpt_ctx *ctx, struct task *task)
 		creator = session->phantom;
 	} else {
 		/* first make sure we know the session's creator */
-		if (!session->creator && session != ckpt_init_task(ctx)) {
+		if (!session->creator) {
 			/* (non-session-leader) recursive: session's creator */
 			ckpt_dbg("pid %d: recursive session creator %d\n",
 			       task->pid, task->sid);
@@ -1206,7 +1258,7 @@ static int ckpt_propagate_session(struct ckpt_ctx *ctx, struct task *task)
 		ckpt_dbg("pid %d: moving up to %d\n", task->pid, creator->pid);
 		task = creator;
 
-		if(!task->creator && task != ckpt_init_task(ctx)) {
+		if(!task->creator) {
 			if (ckpt_set_creator(ctx, task) < 0)
 				return -1;
 		}
@@ -1389,7 +1441,7 @@ int ckpt_fork_stub(void *data)
 
 	/* root has some extra work */
 	if (task->flags & TASK_ROOT) {
-		ctx->init_pid = _getpid();
+		ctx->root_pid = _getpid();
 		ckpt_dbg("root task pid %d\n", _getpid());
 	}
 
@@ -1513,13 +1565,13 @@ static int ckpt_fork_feeder(struct ckpt_ctx *ctx)
 static void ckpt_abort(struct ckpt_ctx *ctx, char *str)
 {
 	perror(str);
-	kill(-(ctx->init_pid), SIGKILL);
+	kill(-(ctx->root_pid), SIGKILL);
 	exit(1);
 }
 
 /*
  * feeder process: delegates checkpoint image stream to the kernel.
- * In '--no-pids' mode, transform the pids array (struct ckpt_hdr_pids)
+ * In '--no-pids' mode, transform the pids array (struct ckpt_pids)
  * on the fly and feed the result to the "init" task of the restart
  */
 static int ckpt_do_feeder(void *data)
@@ -1567,7 +1619,7 @@ static int ckpt_do_feeder(void *data)
 }
 
 /*
- * ckpt_adjust_pids: transform the pids array (struct ckpt_hdr_pids) by
+ * ckpt_adjust_pids: transform the pids array (struct ckpt_pids) by
  * substituing actual pid values for original pid values.
  *
  * Collect pids reported by the newly created tasks; each task sends
-- 
1.6.0.4



More information about the Containers mailing list