[PATCH 3/3] restart: allow subtree restart of image with sid/pgid zero

Oren Laadan orenl at librato.com
Tue Nov 10 14:04:52 PST 2009


In a checkpoint of a child namespace, it is possible that sid or pgid
will be 0 (meaning, they are from a process outside that namespace).

Restarting from such an image with --pidns is easy, and works well.
However, restarting with --no-pids didn't work because we can't deal
with zero pids.

This patch fixes the problem for subtree restart (--no-pidns). If it
finds sid/pgid that are zero, it creates a special ghost task, and
substitutes its pid for all those 0's.

(Also get rid of un-suqashing of sid/pgid == 0 in ckpt_adjust_pids()
because there should no longer be any 0's there).

Signed-off-by: Oren Laadan <orenl at cs.columbia.edu>
---
 restart.c |  127 +++++++++++++++++++++++++++++++++++++------------------------
 1 files changed, 77 insertions(+), 50 deletions(-)

diff --git a/restart.c b/restart.c
index 2efe879..0c438c2 100644
--- a/restart.c
+++ b/restart.c
@@ -250,9 +250,6 @@ struct task zero_task;
 #define TASK_NEWPID	0x20	/* starts a new pid namespace */
 #define TASK_DEAD	0x40	/* dead task (dummy) */
 
-#define TASK_ZERO_SID	0x100	/* sid was temporarily zeroed */
-#define TASK_ZERO_PGID	0x200	/* pgid was temporarily zeroed */
-
 struct ckpt_ctx {
 	pid_t root_pid;
 	int pipe_in;
@@ -1239,6 +1236,41 @@ static int ckpt_valid_pid(struct ckpt_ctx *ctx, pid_t pid, char *which, int i)
 	return 1;
 }
 
+static int ckpt_alloc_pid(struct ckpt_ctx *ctx)
+{
+	int n = 0;
+
+	/*
+	 * allocate an unused pid for the placeholder
+	 * (this will become inefficient if pid-space is exhausted)
+	 */
+	do {
+		if (ctx->tasks_pid == INT_MAX)
+			ctx->tasks_pid = 2;
+		else
+			ctx->tasks_pid++;
+
+		if (n++ == INT_MAX) {	/* ohhh... */
+			ckpt_err("pid namsepace exhausted");
+			return -1;
+		}
+	} while (hash_lookup(ctx, ctx->tasks_pid));
+
+	return ctx->tasks_pid;
+}
+
+static int ckpt_zero_pid(struct ckpt_ctx *ctx)
+{
+	pid_t pid;
+
+	pid = ckpt_alloc_pid(ctx);
+	if (pid < 0)
+		return -1;
+	if (ckpt_setup_task(ctx, pid, ctx->pids_arr[0].vpid) < 0)
+		return -1;
+	return pid;
+}
+
 static int ckpt_init_tree(struct ckpt_ctx *ctx)
 {
 	struct ckpt_pids *pids_arr = ctx->pids_arr;
@@ -1246,6 +1278,7 @@ static int ckpt_init_tree(struct ckpt_ctx *ctx)
 	struct task *task;
 	pid_t root_pid;
 	pid_t root_sid;
+	pid_t zero_pid = 0;
 	int i;
 
 	root_pid = pids_arr[0].vpid;
@@ -1257,16 +1290,20 @@ static int ckpt_init_tree(struct ckpt_ctx *ctx)
 	 * same as root_pid or 0), and root_sid was inherited from an
 	 * ancestor of that subtree.
 	 *
-	 * So we make the root-task also inherit sid from its ancestor
-	 * (== coordinator), whatever 'restart' task currently has.
-	 * For that, we force the root-task's sid and all references
-	 * to it from other tasks (via sid and pgid), to 0. Later, the
-	 * feeder will substitute the cooridnator's sid for them.
+	 * If we restart with --pidns, make the root-task also inherit
+	 * sid from its ancestor (== coordinator), whatever 'restart'
+	 * task currently has.  For that, we force the root-task's sid
+	 * and all references to it from other tasks (via sid and
+	 * pgid), to 0. Later, the feeder will substitute the
+	 * cooridnator's sid for them.
 	 *
 	 * (Note that this still works even if the coordinator's sid
 	 * is "used" by a restarting task: a new-pidns restart will
 	 * fail because the pid is in use, and in an old-pidns restart
 	 * the task will be assigned a new pid anyway).
+	 *
+	 * If we restart with --no-pidns, we'll add a ghost task below
+	 * whose pid will be used instead of these zeroed entried.
 	 */
 
 	/* forcing root_sid to -1, will make comparisons below fail */
@@ -1288,15 +1325,10 @@ static int ckpt_init_tree(struct ckpt_ctx *ctx)
 		else if (!ckpt_valid_pid(ctx, pids_arr[i].vpgid, "pgid", i))
 			return -1;
 
-		/* zero references to root_sid (root_sid != root_pid) */
-		if (pids_arr[i].vsid == root_sid) {
-			task->flags |= TASK_ZERO_SID;
+		if (pids_arr[i].vsid == root_sid)
 			pids_arr[i].vsid = 0;
-		}
-		if (pids_arr[i].vpgid == root_sid) {
-			task->flags |= TASK_ZERO_PGID;
+		if (pids_arr[i].vpgid == root_sid)
 			pids_arr[i].vpgid = 0;
-		}
 
 		task->pid = pids_arr[i].vpid;
 		task->ppid = pids_arr[i].vppid;
@@ -1324,6 +1356,9 @@ static int ckpt_init_tree(struct ckpt_ctx *ctx)
 
 		sid = pids_arr[i].vsid;
 
+		/* Remember if we find any vsid/vpgid - see below */
+		if (pids_arr[i].vsid == 0 || pids_arr[i].vpgid == 0)
+			zero_pid = 1;
 		/*
 		 * An unaccounted-for sid belongs to a task that was a
 		 * session leader and died. We can safe set its parent
@@ -1360,6 +1395,27 @@ static int ckpt_init_tree(struct ckpt_ctx *ctx)
 			return -1;
 	}
 
+	/*
+	 * Zero sid/pgid is disallowed in --no-pidns mode. If there
+	 * were any, we invent a new ghost-zero task and substitute
+	 * its pid for those any sid/pgid.
+	 */
+	if (zero_pid && !ctx->args->pidns) {
+		zero_pid = ckpt_zero_pid(ctx);
+		if (zero_pid < 0)
+			return -1;
+		for (i = 0; i < pids_nr; i++) {
+			if (pids_arr[i].vsid == 0) {
+				pids_arr[i].vsid = zero_pid;
+				pids_arr[i].vppid = zero_pid;
+			}
+			if (pids_arr[i].vpgid == 0) {
+				pids_arr[i].vpgid = zero_pid;
+				pids_arr[i].vppid = zero_pid;
+			}
+		}
+	}
+
 	/* mark root task(s), and set its "creator" to be zero_task */
 	ckpt_init_task(ctx)->flags |= TASK_ROOT;
 	ckpt_init_task(ctx)->creator = &zero_task;
@@ -1534,7 +1590,7 @@ static int ckpt_placeholder_task(struct ckpt_ctx *ctx, struct task *task)
 {
 	struct task *session = hash_lookup(ctx, task->sid);
 	struct task *holder = &ctx->tasks_arr[ctx->tasks_nr++];
-	int n = 0;
+	pid_t pid;
 
 	if (ctx->tasks_nr > ctx->tasks_max) {
 		/* shouldn't happen, beacuse we prepared enough */
@@ -1542,27 +1598,15 @@ static int ckpt_placeholder_task(struct ckpt_ctx *ctx, struct task *task)
 		return -1;
 	}
 
-	/*
-	 * allocate an unused pid for the placeholder
-	 * (this will become inefficient if pid-space is exhausted)
-	 */
-	do {
-		if (ctx->tasks_pid == INT_MAX)
-			ctx->tasks_pid = 2;
-		else
-			ctx->tasks_pid++;
-
-		if (n++ == INT_MAX) {	/* ohhh... */
-			ckpt_err("pid namsepace exhausted");
-			return -1;
-		}
-	} while (hash_lookup(ctx, ctx->tasks_pid));
+	pid = ckpt_alloc_pid(ctx);
+	if (pid < 0)
+		return -1;
 
 	holder->flags = TASK_DEAD;
 
-	holder->pid = ctx->tasks_pid;
+	holder->pid = pid;
 	holder->ppid = ckpt_init_task(ctx)->pid;
-	holder->tgid = holder->pid;
+	holder->tgid = pid;
 	holder->sid = task->sid;
 
 	holder->children = NULL;
@@ -2097,23 +2141,6 @@ static int ckpt_adjust_pids(struct ckpt_ctx *ctx)
 		}
 	}
 
-	if (!ctx->args->pidns) {
-		/*
-		 * If a task's {sid,pgid} was zeroed out (in ckpt_init_tree)
-		 * then substitute the coordinator's sid for it now. (This
-		 * should leave no more 0's in restart of subtree-checkpoint).
-		 *
-		 * NOTE: thanks to the construction of tasks_arr[], the first
-		 * ctx->pid_nr entries in both arrays match (the same pids).
-		 */
-		for (m = 0; m < ctx->pids_nr; m++) {
-			if (ctx->tasks_arr[m].flags & TASK_ZERO_SID)
-				ctx->copy_arr[m].vsid = coord_sid;
-			if (ctx->tasks_arr[m].flags & TASK_ZERO_PGID)
-				ctx->copy_arr[m].vpgid = coord_sid;
-		}
-	}
-
 	memcpy(ctx->pids_arr, ctx->copy_arr, len);
 
 #ifdef CHECKPOINT_DEBUG
-- 
1.6.0.4



More information about the Containers mailing list