[PATCH 1/1] RFC: Containerized syslog (Take II)

Jean-Marc Pigeon jmp at safe.ca
Tue Feb 16 07:24:17 PST 2010


	Containerized syslog is now part of nsproxy.
	A new flag CLONE_SYSLOG allow to unshare
	syslog area.
	Main containerized syslog purpose is to allow
	full container not to leak or compromise
	hosts syslog data.
---
 include/linux/init_task.h      |    2 +
 include/linux/nsproxy.h        |    2 +
 include/linux/sched.h          |    1 +
 include/linux/syslog.h         |    9 ++--
 include/linux/user_namespace.h |    1 -
 kernel/fork.c                  |    2 +-
 kernel/nsproxy.c               |   18 +++++++-
 kernel/printk.c                |   14 +++---
 kernel/syslog.c                |   84 ++++++++++++++++++++++++++++++++--------
 kernel/user.c                  |    3 -
 kernel/user_namespace.c        |    5 --
 11 files changed, 101 insertions(+), 40 deletions(-)

diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index abec69b..30b479e 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -11,6 +11,7 @@
 #include <linux/user_namespace.h>
 #include <linux/securebits.h>
 #include <net/net_namespace.h>
+#include <linux/syslog.h>
 
 extern struct files_struct init_files;
 extern struct fs_struct init_fs;
@@ -37,6 +38,7 @@ extern struct nsproxy init_nsproxy;
 	.count		= ATOMIC_INIT(1),				\
 	.uts_ns		= &init_uts_ns,					\
 	.mnt_ns		= NULL,						\
+	.syslog_ns	= &init_kernel_syslog_ns,			\
 	INIT_NET_NS(net_ns)                                             \
 	INIT_IPC_NS(ipc_ns)						\
 }
diff --git a/include/linux/nsproxy.h b/include/linux/nsproxy.h
index 7b370c7..852fed3 100644
--- a/include/linux/nsproxy.h
+++ b/include/linux/nsproxy.h
@@ -3,6 +3,7 @@
 
 #include <linux/spinlock.h>
 #include <linux/sched.h>
+#include <linux/syslog.h>
 
 struct mnt_namespace;
 struct uts_namespace;
@@ -29,6 +30,7 @@ struct nsproxy {
 	struct mnt_namespace *mnt_ns;
 	struct pid_namespace *pid_ns;
 	struct net 	     *net_ns;
+	struct syslog_ns     *syslog_ns;
 };
 extern struct nsproxy init_nsproxy;
 
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 78efe7c..659cc81 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -9,6 +9,7 @@
 #define CLONE_FS	0x00000200	/* set if fs info shared between processes */
 #define CLONE_FILES	0x00000400	/* set if open files shared between processes */
 #define CLONE_SIGHAND	0x00000800	/* set if signal handlers and blocked signals shared */
+#define CLONE_SYSLOG	0x00001000	/* set if we need private syslog (/proc/kmsg)	*/
 #define CLONE_PTRACE	0x00002000	/* set if we want to let tracing continue on the child too */
 #define CLONE_VFORK	0x00004000	/* set if the parent wants the child to wake it up on mm_release */
 #define CLONE_PARENT	0x00008000	/* set if we want to have the same parent as the cloner */
diff --git a/include/linux/syslog.h b/include/linux/syslog.h
index 98c6898..cdbebee 100644
--- a/include/linux/syslog.h
+++ b/include/linux/syslog.h
@@ -3,6 +3,7 @@
 #include	<linux/spinlock_types.h>
 
 struct syslog_ns {
+	struct kref kref;	/*syslog_ns reference count & control			*/
         wait_queue_head_t wait;
 	spinlock_t logbuf_lock;	/* access conflict locker				*/
         unsigned log_start;	/* Index into log_buf: next char to be read by syslog() */
@@ -22,8 +23,8 @@ extern struct syslog_ns init_kernel_syslog_ns;
  * Syslog API
  *
  */
-extern struct syslog_ns *syslog_malloc(unsigned container_buf_len);
-extern struct syslog_ns *syslog_realloc(struct syslog_ns *syslog_ns, unsigned container_buf_len);
-extern struct syslog_ns *syslog_free(struct syslog_ns *syslog);
-extern struct syslog_ns *syslog_get_current(void);
+extern struct syslog_ns *realloc_syslog_ns(struct syslog_ns *syslog_ns, unsigned container_buf_len);
+extern struct syslog_ns *copy_syslog_ns(unsigned long flags,struct syslog_ns *current_syslog_ns);
+extern struct syslog_ns *release_syslog_ns(struct syslog_ns *current_syslog_ns);
+extern struct syslog_ns *get_current_syslog_ns(void);
 #endif /* _LINUX_SYSLOG_H */
diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h
index 3d0c73e..cc4f453 100644
--- a/include/linux/user_namespace.h
+++ b/include/linux/user_namespace.h
@@ -14,7 +14,6 @@ struct user_namespace {
 	struct hlist_head	uidhash_table[UIDHASH_SZ];
 	struct user_struct	*creator;
 	struct work_struct	destroyer;
-	struct syslog_ns        *syslog;
 };
 
 extern struct user_namespace init_user_ns;
diff --git a/kernel/fork.c b/kernel/fork.c
index f88bd98..38c8d8c 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1647,7 +1647,7 @@ SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags)
 	err = -EINVAL;
 	if (unshare_flags & ~(CLONE_THREAD|CLONE_FS|CLONE_NEWNS|CLONE_SIGHAND|
 				CLONE_VM|CLONE_FILES|CLONE_SYSVSEM|
-				CLONE_NEWUTS|CLONE_NEWIPC|CLONE_NEWNET))
+				CLONE_NEWUTS|CLONE_NEWIPC|CLONE_NEWNET|CLONE_SYSLOG))
 		goto bad_unshare_out;
 
 	/*
diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c
index 09b4ff9..ff968db 100644
--- a/kernel/nsproxy.c
+++ b/kernel/nsproxy.c
@@ -44,6 +44,8 @@ static inline struct nsproxy *create_nsproxy(void)
 static struct nsproxy *create_new_namespaces(unsigned long flags,
 			struct task_struct *tsk, struct fs_struct *new_fs)
 {
+#define	CONTAINER_BUF_LEN	4096	/*should be enough for container syslog	*/
+
 	struct nsproxy *new_nsp;
 	int err;
 
@@ -80,9 +82,17 @@ static struct nsproxy *create_new_namespaces(unsigned long flags,
 		err = PTR_ERR(new_nsp->net_ns);
 		goto out_net;
 	}
-
+	new_nsp->syslog_ns = copy_syslog_ns(flags, tsk->nsproxy->syslog_ns);
+	if (IS_ERR(new_nsp->syslog_ns)) {
+		err = PTR_ERR(new_nsp->syslog_ns);
+		goto out_syslog;
+	}
+	
 	return new_nsp;
 
+out_syslog:
+	if (new_nsp->net_ns) 
+		put_net(new_nsp->net_ns);
 out_net:
 	if (new_nsp->pid_ns)
 		put_pid_ns(new_nsp->pid_ns);
@@ -116,7 +126,7 @@ int copy_namespaces(unsigned long flags, struct task_struct *tsk)
 	get_nsproxy(old_ns);
 
 	if (!(flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC |
-				CLONE_NEWPID | CLONE_NEWNET)))
+				CLONE_NEWPID | CLONE_NEWNET | CLONE_SYSLOG)))
 		return 0;
 
 	if (!capable(CAP_SYS_ADMIN)) {
@@ -151,6 +161,8 @@ out:
 
 void free_nsproxy(struct nsproxy *ns)
 {
+	if (ns->syslog_ns)
+		ns->syslog_ns=release_syslog_ns(ns->syslog_ns);
 	if (ns->mnt_ns)
 		put_mnt_ns(ns->mnt_ns);
 	if (ns->uts_ns)
@@ -173,7 +185,7 @@ int unshare_nsproxy_namespaces(unsigned long unshare_flags,
 	int err = 0;
 
 	if (!(unshare_flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC |
-			       CLONE_NEWNET)))
+			       CLONE_NEWNET | CLONE_SYSLOG )))
 		return 0;
 
 	if (!capable(CAP_SYS_ADMIN))
diff --git a/kernel/printk.c b/kernel/printk.c
index fd0a05c..3c7f213 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -148,7 +148,7 @@ static int saved_console_loglevel = -1;
  */
 void log_buf_kexec_setup(void)
 {
-	struct syslog_ns *syslog_ns = syslog_get_current();
+	struct syslog_ns *syslog_ns = get_current_syslog_ns();
 
 	VMCOREINFO_SYMBOL(sys_log_buf);
 	VMCOREINFO_SYMBOL(sys_log_end);
@@ -163,7 +163,7 @@ static int __init log_buf_len_setup(char *str)
 
 	if (size) {
 		size = roundup_pow_of_two(size);
-		(void) syslog_realloc(&init_kernel_syslog_ns,size);
+		(void) realloc_syslog_ns(&init_kernel_syslog_ns,size);
 	}
 	return 1;
 }
@@ -244,7 +244,7 @@ int do_syslog(int type, char __user *buf, int len)
 	int do_clear = 0;
 	char c;
 	int error = 0;
-	struct syslog_ns *syslog_ns = syslog_get_current();
+	struct syslog_ns *syslog_ns = get_current_syslog_ns();
 
 	error = security_syslog(type);
 	if (error)
@@ -638,7 +638,7 @@ asmlinkage int vprintk(const char *fmt, va_list args)
 {
 	int printed_len = 0;
 	int current_log_level = default_message_loglevel;
-	struct syslog_ns *syslog_ns = syslog_get_current();
+	struct syslog_ns *syslog_ns = get_current_syslog_ns();
 	unsigned long flags;
 	int this_cpu;
 	char *p;
@@ -1012,7 +1012,7 @@ void release_console_sem(void)
 		unsigned long flags;
 		unsigned _con_start, _log_end;
 		unsigned wake_klogd = 0;
-		struct syslog_ns *syslog_ns = syslog_get_current();
+		struct syslog_ns *syslog_ns = get_current_syslog_ns();
 
 		for ( ; ; ) {
 			spin_lock_irqsave(&sys_log_lock, flags);
@@ -1252,7 +1252,7 @@ void register_console(struct console *newcon)
 		 * for us.
 		 */
 
-		struct syslog_ns *syslog_ns = syslog_get_current();
+		struct syslog_ns *syslog_ns = get_current_syslog_ns();
 
 		spin_lock_irqsave(&sys_log_lock, flags);
 		sys_log_con_start = sys_log_start;
@@ -1462,7 +1462,7 @@ void kmsg_dump(enum kmsg_dump_reason reason)
 	const char *s1, *s2;
 	unsigned long l1, l2;
 	unsigned long flags;
-	struct syslog_ns *syslog_ns = syslog_get_current();
+	struct syslog_ns *syslog_ns = get_current_syslog_ns();
 
 	/* Theoretically, the log could move on after we do this, but
 	   there's not a lot we can do about that. The new messages
diff --git a/kernel/syslog.c b/kernel/syslog.c
index 69d30a9..0088a85 100644
--- a/kernel/syslog.c
+++ b/kernel/syslog.c
@@ -22,35 +22,66 @@
  *
  */
 
+#include <linux/module.h>
 #include <linux/bootmem.h>
 #include <linux/slab.h>
 #include <linux/cred.h>
+#include <linux/kref.h>
 #include <linux/user_namespace.h>
 #include <linux/syslog.h>
 
+#ifdef CONFIG_PRINTK
 /*
  * Static memory definition, used to assign a syslog
  * to the kernel itself
  *
  */
-
-#ifdef CONFIG_PRINTK
 #define __LOG_BUF_LEN   (1 << CONFIG_LOG_BUF_SHIFT)
 
 static char __log_buf[__LOG_BUF_LEN];
 
 struct syslog_ns init_kernel_syslog_ns = {
+	.kref = {
+		.refcount	= ATOMIC_INIT(2),
+	},
         .wait = __WAIT_QUEUE_HEAD_INITIALIZER(init_kernel_syslog_ns.wait),
         .buf_len = __LOG_BUF_LEN,
         .buf = __log_buf
 };
+EXPORT_SYMBOL_GPL(init_kernel_syslog_ns);
 #endif
+/*
+ * Procedure to free all ressources tied to syslog
+ *
+ */
+struct syslog_ns *syslog_free(struct syslog_ns *syslog)
+
+{
+	if (syslog != (struct syslog_ns *)0) {
+		(void) kfree(syslog->buf);
+		(void) kfree(syslog);
+		syslog = (struct syslog_ns *)0;
+		}
+	return syslog;
+}
 
 /*
+ * Procedure to interface kref _put with syslog_free
+ *
+ */
+static void syslog_out(struct kref *kref)
+
+{
+	struct syslog_ns *sl;
+
+	sl=container_of(kref, struct syslog_ns, kref);	
+	sl=syslog_free(sl);
+}
+/*
  * Procedure to assign memory for syslog area
  *
  */
-struct syslog_ns * syslog_malloc(unsigned container_buf_len)
+static struct syslog_ns * malloc_syslog_ns(unsigned container_buf_len)
 {
 	struct syslog_ns *ns;
 
@@ -61,6 +92,8 @@ struct syslog_ns * syslog_malloc(unsigned container_buf_len)
 	if (!ns)
 		return ERR_PTR(-ENOMEM);
 
+	(void) kref_init(&(ns->kref));
+
 	ns->buf_len = container_buf_len;
 	ns->buf = kzalloc(container_buf_len, GFP_KERNEL);
 	if (!ns->buf) {
@@ -77,7 +110,7 @@ struct syslog_ns * syslog_malloc(unsigned container_buf_len)
  * If syslog_ns is NULL, assign a brand new syslog_ns
  *
  */
-struct syslog_ns * syslog_realloc(struct syslog_ns *syslog_ns, unsigned container_buf_len)
+struct syslog_ns * realloc_syslog_ns(struct syslog_ns *syslog_ns, unsigned container_buf_len)
 
 {
 	if ((syslog_ns == &init_kernel_syslog_ns ) && (container_buf_len > syslog_ns->buf_len)) {
@@ -102,7 +135,7 @@ struct syslog_ns * syslog_realloc(struct syslog_ns *syslog_ns, unsigned containe
 			(void) free_bootmem((unsigned long)old_buf, old_buf_len);
 		}
 	if (!syslog_ns)
-		return syslog_malloc(container_buf_len);
+		return malloc_syslog_ns(container_buf_len);
 	if (syslog_ns->buf_len > container_buf_len) {
 		(void) printk(KERN_WARNING "log_buf_len: Not allowed to decrease syslog buffer\n");
 		return ERR_PTR(-EINVAL);
@@ -126,32 +159,51 @@ struct syslog_ns * syslog_realloc(struct syslog_ns *syslog_ns, unsigned containe
 	(void) printk(KERN_NOTICE "log_buf_len: %u\n", syslog_ns->buf_len);
 	return syslog_ns;
 }
+
 /*
- * Procedure to free all ressources tied to syslog
+ * Procedure to use current syslog unless a CLONE_SYSLOG is set
+ * such a new syslog area is defined and used
  *
  */
-struct syslog_ns *syslog_free(struct syslog_ns *syslog)
+struct syslog_ns *copy_syslog_ns(unsigned long flags,struct syslog_ns *current_syslog_ns)
 
 {
-	if (syslog != (struct syslog_ns *)0) {
-		(void) kfree(syslog->buf);
-		(void) kfree(syslog);
-		syslog = (struct syslog_ns *)0;
-		}
-	return syslog;
+#define	CONTAINER_BUF_LEN	4096	/*should be enough for container syslog	*/
+
+	BUG_ON(!current_syslog_ns);
+	if ((flags & CLONE_SYSLOG) == 0) /*incrementing usage reference count	*/
+		(void) kref_get(&(current_syslog_ns->kref));
+	else
+		current_syslog_ns=malloc_syslog_ns(CONTAINER_BUF_LEN);
+	return current_syslog_ns;
+	
+}
+
+/*
+ * Procedure to decrement syslog usage count and free memory
+ * if syslog usage count reach zero.
+ *
+ */
+struct syslog_ns *release_syslog_ns(struct syslog_ns *current_syslog_ns)
+
+{
+	if (kref_put(&(current_syslog_ns->kref), syslog_out)==0)
+	   current_syslog_ns=(struct syslog_ns *)0;
+	return current_syslog_ns;
 }
 
 /*
- * Procedure to get the current syslog area linked to a container (by CLONE_USER)
+ * Procedure to get the current syslog area linked to a container (by CLONE_SYSLOG)
  * if trouble, pin down the problem before it propagate.
  *
  */
-struct syslog_ns *syslog_get_current(void) 
+struct syslog_ns *get_current_syslog_ns(void) 
 
 {
+
 	struct syslog_ns *ns;
 
-	ns = current_user_ns()->syslog;
+	ns = current->nsproxy->syslog_ns;
 	BUG_ON(!ns);
 	return ns;
 }
diff --git a/kernel/user.c b/kernel/user.c
index cb2d4ba..d9bea1f 100644
--- a/kernel/user.c
+++ b/kernel/user.c
@@ -23,9 +23,6 @@ struct user_namespace init_user_ns = {
 	.kref = {
 		.refcount	= ATOMIC_INIT(2),
 	},
-#ifdef CONFIG_PRINTK
-	.syslog = &init_kernel_syslog_ns,
-#endif
 	.creator = &root_user
 };
 EXPORT_SYMBOL_GPL(init_user_ns);
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
index 9d8014f..db72d1b 100644
--- a/kernel/user_namespace.c
+++ b/kernel/user_namespace.c
@@ -38,11 +38,6 @@ int create_user_ns(struct cred *new)
 		INIT_HLIST_HEAD(ns->uidhash_table + n);
 
 	
-	ns->syslog = syslog_malloc(CONTAINER_BUF_LEN);
-        if (!ns->syslog) {
-                kfree(ns);
-                return -ENOMEM;
-        }
 	/* Alloc new root user.  */
 	root_user = alloc_uid(ns, 0);
 	if (!root_user) {
-- 
1.6.6




More information about the Containers mailing list