[PATCH 20/38] C/R: i386 support

Alexey Dobriyan adobriyan at gmail.com
Thu May 21 21:55:14 PDT 2009


Segment registers are abstracted to allow i386 => x86_64
migration (BTW, I'm not so sure if just making 32-bit selectors
the same will achieve same effect)

Signed-off-by: Alexey Dobriyan <adobriyan at gmail.com>
---
 arch/x86/include/asm/unistd_32.h   |    2 +
 arch/x86/kernel/syscall_table_32.S |    2 +
 include/linux/kstate-image.h       |   30 ++++
 include/linux/kstate.h             |    2 +-
 kernel/kstate/Makefile             |    1 +
 kernel/kstate/kstate-x86_32.c      |  294 ++++++++++++++++++++++++++++++++++++
 6 files changed, 330 insertions(+), 1 deletions(-)
 create mode 100644 kernel/kstate/kstate-x86_32.c

diff --git a/arch/x86/include/asm/unistd_32.h b/arch/x86/include/asm/unistd_32.h
index 6e72d74..48557e1 100644
--- a/arch/x86/include/asm/unistd_32.h
+++ b/arch/x86/include/asm/unistd_32.h
@@ -340,6 +340,8 @@
 #define __NR_inotify_init1	332
 #define __NR_preadv		333
 #define __NR_pwritev		334
+#define __NR_checkpoint		335
+#define __NR_restart		336
 
 #ifdef __KERNEL__
 
diff --git a/arch/x86/kernel/syscall_table_32.S b/arch/x86/kernel/syscall_table_32.S
index ff5c873..70d5441 100644
--- a/arch/x86/kernel/syscall_table_32.S
+++ b/arch/x86/kernel/syscall_table_32.S
@@ -334,3 +334,5 @@ ENTRY(sys_call_table)
 	.long sys_inotify_init1
 	.long sys_preadv
 	.long sys_pwritev
+	.long sys_checkpoint		/* 335 */
+	.long sys_restart
diff --git a/include/linux/kstate-image.h b/include/linux/kstate-image.h
index 348f59f..8df5c4a 100644
--- a/include/linux/kstate-image.h
+++ b/include/linux/kstate-image.h
@@ -27,6 +27,7 @@ struct kstate_image_header {
 
 	/* Mutable part. */
 	/* Arch of the kernel which dumped the image. */
+#define KSTATE_ARCH_I386	1
 	__le32	kernel_arch;
 	/*
 	 * Distributions are expected to leave image version alone and
@@ -70,6 +71,35 @@ struct kstate_image_task_struct {
 	__u32		tsk_arch;
 } __packed;
 
+#define KSTATE_SEG_NULL		0
+#define KSTATE_SEG_USER32_CS	1
+#define KSTATE_SEG_USER32_DS	2
+#define KSTATE_SEG_TLS		0x4000	/* 0100 0000 0000 00xx */
+#define KSTATE_SEG_LDT		0x8000	/* 100x xxxx xxxx xxxx */
+
+struct kstate_image_task_struct_i386 {
+	__u32		ebx;
+	__u32		ecx;
+	__u32		edx;
+	__u32		esi;
+	__u32		edi;
+	__u32		ebp;
+	__u32		eax;
+	__u32		orig_eax;
+	__u32		eip;
+	__u32		eflags;
+	__u32		esp;
+
+	__u16		cs;
+	__u16		ds;
+	__u16		es;
+	__u16		fs;
+	__u16		gs;
+	__u16		ss;
+
+	__u64		tls_array[3];
+} __packed;
+
 struct kstate_image_mm_struct {
 	struct kstate_object_header hdr;
 
diff --git a/include/linux/kstate.h b/include/linux/kstate.h
index 3ae9e28..c4b55b6 100644
--- a/include/linux/kstate.h
+++ b/include/linux/kstate.h
@@ -67,7 +67,7 @@ int kstate_collect_all_file(struct kstate_context *ctx);
 int kstate_dump_all_file(struct kstate_context *ctx);
 int kstate_restore_file(struct kstate_context *ctx, kstate_ref_t *ref);
 
-#if 0
+#if defined(CONFIG_X86_32)
 extern const __u32 kstate_kernel_arch;
 int kstate_arch_check_image_header(struct kstate_image_header *i);
 
diff --git a/kernel/kstate/Makefile b/kernel/kstate/Makefile
index eacd3cf..ca19a22 100644
--- a/kernel/kstate/Makefile
+++ b/kernel/kstate/Makefile
@@ -6,3 +6,4 @@ kstate-y += kstate-image.o
 kstate-y += kstate-mm.o
 kstate-y += kstate-object.o
 kstate-y += kstate-task.o
+kstate-$(CONFIG_X86_32) += kstate-x86_32.o
diff --git a/kernel/kstate/kstate-x86_32.c b/kernel/kstate/kstate-x86_32.c
new file mode 100644
index 0000000..809242c
--- /dev/null
+++ b/kernel/kstate/kstate-x86_32.c
@@ -0,0 +1,294 @@
+/* Copyright (C) 2000-2009 Parallels Holdings, Ltd. */
+#include <linux/sched.h>
+
+#include <linux/kstate.h>
+#include <linux/kstate-image.h>
+
+const __u32 kstate_kernel_arch = KSTATE_ARCH_I386;
+
+int kstate_arch_check_image_header(struct kstate_image_header *i)
+{
+	if (i->kernel_arch == cpu_to_le32(KSTATE_ARCH_I386))
+		return 0;
+	return -EINVAL;
+}
+
+__u32 kstate_task_struct_arch(struct task_struct *tsk)
+{
+	return KSTATE_ARCH_I386;
+}
+
+static int check_eflags(__u32 eflags)
+{
+	eflags &= ~X86_EFLAGS_CF;
+	eflags &= ~X86_EFLAGS_PF;
+	eflags &= ~X86_EFLAGS_AF;
+	eflags &= ~X86_EFLAGS_ZF;
+	eflags &= ~X86_EFLAGS_SF;
+	eflags &= ~X86_EFLAGS_TF;
+	eflags &= ~X86_EFLAGS_DF;
+	eflags &= ~X86_EFLAGS_OF;
+	eflags &= ~X86_EFLAGS_NT;
+	eflags &= ~X86_EFLAGS_AC;
+	eflags &= ~X86_EFLAGS_ID;
+	if (eflags != (X86_EFLAGS_IF|0x2)) {
+		pr_debug("%s: eflags %08x\n", __func__, eflags);
+		return -EINVAL;
+	}
+	return 0;
+}
+
+static int check_segment(__u16 seg)
+{
+	switch (seg) {
+	case KSTATE_SEG_NULL:
+	case KSTATE_SEG_USER32_CS:
+	case KSTATE_SEG_USER32_DS:
+		return 0;
+	}
+	if (seg & KSTATE_SEG_TLS) {
+		if ((seg & ~KSTATE_SEG_TLS) > GDT_ENTRY_TLS_MAX - GDT_ENTRY_TLS_MIN) {
+			pr_debug("%s: seg %04x, GDT_ENTRY_TLS_MIN %u, GDT_ENTRY_TLS_MAX %u\n", __func__, seg, GDT_ENTRY_TLS_MIN, GDT_ENTRY_TLS_MAX);
+			return -EINVAL;
+		}
+		return 0;
+	}
+	if (seg & KSTATE_SEG_LDT) {
+		if ((seg & ~KSTATE_SEG_LDT) > 0x1fff) {
+			pr_debug("%s: seg %04x\n", __func__, seg);
+			return -EINVAL;
+		}
+		return 0;
+	}
+	pr_debug("%s: seg %04x\n", __func__, seg);
+	return -EINVAL;
+}
+
+static int check_tls(struct desc_struct *desc)
+{
+	if (desc->l != 0 || desc->s != 1 || desc->dpl != 3)
+		return -EINVAL;
+	return 0;
+}
+
+int kstate_arch_check_image_task_struct(struct kstate_image_task_struct *tsk_i)
+{
+	struct kstate_image_task_struct_i386 *i = (void *)(tsk_i + 1);
+	int rv;
+
+	if (tsk_i->tsk_arch != KSTATE_ARCH_I386)
+		return -EINVAL;
+	if (tsk_i->hdr.obj_len < sizeof(*tsk_i) + sizeof(*i))
+		return -EINVAL;
+
+	rv = check_eflags(i->eflags);
+	if (rv < 0)
+		return rv;
+
+	if (i->cs == KSTATE_SEG_NULL)
+		return -EINVAL;
+	rv = check_segment(i->cs);
+	if (rv < 0)
+		return rv;
+	rv = check_segment(i->ds);
+	if (rv < 0)
+		return rv;
+	rv = check_segment(i->es);
+	if (rv < 0)
+		return rv;
+	rv = check_segment(i->fs);
+	if (rv < 0)
+		return rv;
+	rv = check_segment(i->gs);
+	if (rv < 0)
+		return rv;
+	rv = check_segment(i->ss);
+	if (rv < 0)
+		return rv;
+
+	if (i->tls_array[0]) {
+		rv = check_tls((struct desc_struct *)&i->tls_array[0]);
+		if (rv < 0)
+			return rv;
+	}
+	if (i->tls_array[1]) {
+		rv = check_tls((struct desc_struct *)&i->tls_array[1]);
+		if (rv < 0)
+			return rv;
+	}
+	if (i->tls_array[2]) {
+		rv = check_tls((struct desc_struct *)&i->tls_array[2]);
+		if (rv < 0)
+			return rv;
+	}
+
+	return 0;
+}
+
+unsigned int kstate_arch_len_task_struct(struct task_struct *tsk)
+{
+	return sizeof(struct kstate_image_task_struct_i386);
+}
+
+int kstate_arch_check_task_struct(struct task_struct *tsk)
+{
+	struct restart_block *rb;
+
+	if (tsk->thread.xstate) {
+		WARN_ON(1);
+		return -EINVAL;
+	}
+	if (test_tsk_thread_flag(tsk, TIF_DEBUG)) {
+		WARN_ON(1);
+		return -EINVAL;
+	}
+	rb = &task_thread_info(tsk)->restart_block;
+	if (rb->fn != current_thread_info()->restart_block.fn) {
+		WARN(1, "rb->fn = %pF\n", rb->fn);
+		return -EINVAL;
+	}
+	if (tsk->thread.vm86_info) {
+		WARN_ON(1);
+		return -EINVAL;
+	}
+	if (tsk->thread.io_bitmap_ptr) {
+		WARN_ON(1);
+		return -EINVAL;
+	}
+#ifdef CONFIG_X86_DS
+	if (tsk->thread.ds_ctx) {
+		WARN_ON(1);
+		return -EINVAL;
+	}
+#endif
+	return 0;
+}
+
+static __u16 encode_segment(u16 seg)
+{
+	if (seg == 0)
+		return KSTATE_SEG_NULL;
+	BUG_ON((seg & 3) != 3);
+	if (seg & 4)
+		return KSTATE_SEG_LDT | (seg >> 3);
+
+	if (seg == __USER_CS)
+		return KSTATE_SEG_USER32_CS;
+	if (seg == __USER_DS)
+		return KSTATE_SEG_USER32_DS;
+
+	if (GDT_ENTRY_TLS_MIN <= (seg >> 3) && (seg >> 3) <= GDT_ENTRY_TLS_MAX)
+		return KSTATE_SEG_TLS | ((seg >> 3) - GDT_ENTRY_TLS_MIN);
+	BUG();
+}
+
+static u16 decode_segment(__u16 seg)
+{
+	if (seg == KSTATE_SEG_NULL)
+		return 0;
+	if (seg == KSTATE_SEG_USER32_CS)
+		return __USER_CS;
+	if (seg == KSTATE_SEG_USER32_DS)
+		return __USER_DS;
+
+	if (seg & KSTATE_SEG_TLS) {
+		seg &= ~KSTATE_SEG_TLS;
+		return ((GDT_ENTRY_TLS_MIN + seg) << 3) | 3;
+	}
+	if (seg & KSTATE_SEG_LDT) {
+		seg &= ~KSTATE_SEG_LDT;
+		return (seg << 3) | 7;
+	}
+	BUG();
+}
+
+int kstate_arch_dump_task_struct(struct kstate_context *ctx, struct task_struct *tsk, void *arch_i)
+{
+	struct kstate_image_task_struct_i386 *i = arch_i;
+	struct pt_regs *regs = task_pt_regs(tsk);
+
+	i->ebx = regs->bx;
+	i->ecx = regs->cx;
+	i->edx = regs->dx;
+	i->esi = regs->si;
+	i->edi = regs->di;
+	i->ebp = regs->bp;
+	i->eax = regs->ax;
+	i->orig_eax = regs->orig_ax;
+	i->eip = regs->ip;
+	i->eflags = regs->flags;
+	i->esp = regs->sp;
+
+	i->cs = encode_segment(regs->cs);
+	i->ds = encode_segment(regs->ds);
+	i->es = encode_segment(regs->es);
+	i->fs = encode_segment(regs->fs);
+	i->gs = encode_segment(tsk->thread.gs);
+	i->ss = encode_segment(regs->ss);
+
+	BUILD_BUG_ON(sizeof(tsk->thread.tls_array[0]) != 8);
+	BUILD_BUG_ON(sizeof(tsk->thread.tls_array) != 3 * 8);
+	memcpy(i->tls_array, tsk->thread.tls_array, sizeof(i->tls_array));
+
+	return 0;
+}
+
+asmlinkage void ret_from_fork(void);
+static int restore_task_struct_i386(struct task_struct *tsk, struct kstate_image_task_struct_i386 *i)
+{
+	struct pt_regs *regs = task_pt_regs(tsk);
+
+	tsk->thread.sp = (unsigned long)regs;
+	tsk->thread.sp0 = (unsigned long)(regs + 1);
+	tsk->thread.ip = (unsigned long)ret_from_fork;
+
+	regs->bx = i->ebx;
+	regs->cx = i->ecx;
+	regs->dx = i->edx;
+	regs->si = i->esi;
+	regs->di = i->edi;
+	regs->bp = i->ebp;
+	regs->ax = i->eax;
+	regs->orig_ax = i->orig_eax;
+	regs->ip = i->eip;
+	regs->flags = i->eflags;
+	regs->sp = i->esp;
+
+	regs->cs = decode_segment(i->cs);
+	regs->ds = decode_segment(i->ds);
+	regs->es = decode_segment(i->es);
+	regs->fs = decode_segment(i->fs);
+	tsk->thread.gs = decode_segment(i->gs);
+	regs->ss = decode_segment(i->ss);
+
+	memcpy(tsk->thread.tls_array, i->tls_array, 3 * 8);
+
+	return 0;
+}
+
+int kstate_arch_restore_task_struct(struct task_struct *tsk, struct kstate_image_task_struct *i)
+{
+	return restore_task_struct_i386(tsk, (void *)(i + 1));
+}
+
+int kstate_arch_check_mm_struct(struct mm_struct *mm)
+{
+	mutex_lock(&mm->context.lock);
+	if (mm->context.ldt || mm->context.size != 0) {
+		mutex_unlock(&mm->context.lock);
+		WARN_ON(1);
+		return -EINVAL;
+	}
+	mutex_unlock(&mm->context.lock);
+	return 0;
+}
+
+unsigned int kstate_arch_len_mm_struct(struct mm_struct *mm)
+{
+	return 0;
+}
+
+int kstate_arch_dump_mm_struct(struct kstate_context *ctx, struct mm_struct *mm, void *arch_i)
+{
+	return 0;
+}
-- 
1.5.6.5



More information about the Containers mailing list