[PATCH 4/5] c/r: fix restart failure due to a race with ghost/dead tasks

Oren Laadan orenl at librato.com
Wed Sep 30 18:47:13 PDT 2009


Ghost (and dead) tasks exit as soon as they find their checkpoint_ctx.
That may occur before other tasks even enter sys_restart(). If a ghost
child dies before its child enters the syscall, the child will receive
the death_signal which is set by userspace (in non-container restart).

This signal was supposedly skipped in reparent_thread() for restarting
task, but the PF_RESTARTING flag was tested on the child instead of
the parent; And the child was not in sys_restart, so .. bad luck.

This patch fixes reparent_threads() to test the parent's flag instead.

Also, if restart fails after some tasks have restore their state, then
the death_signal of such tasks is likely to have been reset. To ensure
proper cleanup in case of a failure, the coordinator tasks will force-
kill all those descendants whose checkpoint_ctx matches that of the
coordinator.

(To avoid a potential security issue here, prepare_descendants() was
modified to only allow setting the checkpoint_task of a descendant if
the coordinator has PTRACE_MODE_ATTACH permissions over it).

Signed-off-by: Oren Laadan <orenl at cs.columbia.edu>
---
 checkpoint/restart.c |   32 ++++++++++++++++++++++++++------
 kernel/exit.c        |    2 +-
 2 files changed, 27 insertions(+), 7 deletions(-)

diff --git a/checkpoint/restart.c b/checkpoint/restart.c
index c021eaf..258e9eb 100644
--- a/checkpoint/restart.c
+++ b/checkpoint/restart.c
@@ -810,6 +810,11 @@ static int __prepare_descendants(struct task_struct *task, void *data)
 
 	ckpt_debug("consider task %d\n", task_pid_vnr(task));
 
+	if (!ptrace_may_access(task, PTRACE_MODE_ATTACH)) {
+		ckpt_debug("stranger task %d\n", task_pid_vnr(task));
+		return -EPERM;
+	}
+
 	if (task_ptrace(task) & PT_PTRACED) {
 		ckpt_debug("ptraced task %d\n", task_pid_vnr(task));
 		return -EBUSY;
@@ -935,6 +940,21 @@ static int init_restart_ctx(struct ckpt_ctx *ctx, pid_t pid)
 	return 0;
 }
 
+static int __destroy_descendants(struct task_struct *task, void *data)
+{
+	struct ckpt_ctx *ctx = (struct ckpt_ctx *) data;
+
+	if (task->checkpoint_ctx == ctx)
+		force_sig(SIGKILL, task);
+
+	return 0;
+}
+
+static void destroy_descendants(struct ckpt_ctx *ctx)
+{
+	walk_task_subtree(ctx->root_task, __destroy_descendants, ctx);
+}
+
 static int do_restore_coord(struct ckpt_ctx *ctx, pid_t pid)
 {
 	int ret;
@@ -972,8 +992,8 @@ static int do_restore_coord(struct ckpt_ctx *ctx, pid_t pid)
 
 	/*
 	 * From now on we are committed to the restart. If anything
-	 * fails, we'll wipe out the entire subtree below us, to
-	 * ensure proper cleanup.
+	 * fails, we'll cleanup (that is, kill) those tasks in our
+	 * subtree that we marked for restart - see below.
 	 */
 
 	if (ctx->uflags & RESTART_TASKSELF) {
@@ -1009,13 +1029,13 @@ static int do_restore_coord(struct ckpt_ctx *ctx, pid_t pid)
 		ckpt_debug("freezing restart tasks ... %d\n", ret);
 	}
  out:
-	if (ret < 0)
+	if (ret < 0) {
 		ckpt_set_ctx_error(ctx, ret);
-	else
+		destroy_descendants(ctx);
+	} else {
 		ckpt_set_ctx_success(ctx);
-
-	if (!(ctx->uflags & RESTART_TASKSELF))
 		wake_up_all(&ctx->waitq);
+	}
 
 	set_task_ctx(current, NULL);
 	return ret;
diff --git a/kernel/exit.c b/kernel/exit.c
index 41ac4cf..cfb0f34 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -744,7 +744,7 @@ static void reparent_thread(struct task_struct *father, struct task_struct *p,
 				struct list_head *dead)
 {
 	/* restarting zombie doesn't trigger signals */
-	if (p->pdeath_signal && !(p->flags & PF_RESTARTING))
+	if (p->pdeath_signal && !(father->flags & PF_RESTARTING))
 		group_send_sig_info(p->pdeath_signal, SEND_SIG_NOINFO, p);
 
 	list_move_tail(&p->sibling, &p->real_parent->children);
-- 
1.6.0.4



More information about the Containers mailing list