[RFC PATCH 1/1] swap namespaces: introduce basic, simple swap namespaces (v3)
Serge E. Hallyn
serue at us.ibm.com
Fri Apr 18 13:46:10 PDT 2008
By default processes are in the "init" swapns, which is NULL.
By default, all swapfiles are in the init swapns.
When a task calls clone(CLONE_NEWSWAP), it must also have
no vm in common with any tasks in another swap_namespace.
Its nsproxy->swap_namespace is then a structure containing
an 'int type', which defaults to -1.
A task in a non-init swapns can only have one swapfile.
After swapon, nsproxy->swap_namespace->type will be the
index into the swaplist. The corresponding swap_info_struct
will have swap_namespace pointing to the new swap_namespace.
(For any swapfiles which were swapon()d in the init swapns,
it is NULL).
get_swap_page now takes an argument, the page for which it
is finding a swap page. We use the anonvma to find an mm
to which the page belongs, use the mm->owner to find the
owning task, and use that to find the swap_namespace. If
it exists, then we use its swapfile to find the swap page.
Note:
shmem is a problem at checkpoint - any swapped pages from
an mmap on a shmem file will be in the init swap ns. So
we'll need (1) to copy over any tmpfs files for which there
are open fds, and (2) fine a way to detect mmaped tmpfs files
which no longer have an open fd.
Changelog:
Apr 24: small CONFIG_SWAP_NS=n fixes
Apr 23: fix wrongly inverted check or CLONE_VM
Apr 23: use u64 for clone flags (oops)
Apr 21: force CLONE_NEWIPC if clone(CLONE_NEWSWAP)
Apr 18: make sure to filter out private /proc/swaps
entries when viewing from the init namespace.
Signed-off-by: Serge E. Hallyn <serue at us.ibm.com>
---
include/linux/nsproxy.h | 2 +
include/linux/rmap.h | 6 ++
include/linux/sched.h | 1 +
include/linux/swap.h | 6 +-
include/linux/swap_namespace.h | 68 ++++++++++++++++
init/Kconfig | 8 ++
kernel/fork.c | 21 +++++
kernel/nsproxy.c | 14 +++-
mm/Makefile | 2 +-
mm/rmap.c | 4 +-
mm/shmem.c | 2 +-
mm/swap_namespace.c | 84 +++++++++++++++++++
mm/swap_state.c | 2 +-
mm/swapfile.c | 174 +++++++++++++++++++++++++++++++++++++++-
14 files changed, 385 insertions(+), 9 deletions(-)
create mode 100644 include/linux/swap_namespace.h
create mode 100644 mm/swap_namespace.c
diff --git a/include/linux/nsproxy.h b/include/linux/nsproxy.h
index 5395e8c..8e2490a 100644
--- a/include/linux/nsproxy.h
+++ b/include/linux/nsproxy.h
@@ -8,6 +8,7 @@ struct mnt_namespace;
struct uts_namespace;
struct ipc_namespace;
struct pid_namespace;
+struct swap_namespace;
/*
* A structure to contain pointers to all per-process
@@ -29,6 +30,7 @@ struct nsproxy {
struct pid_namespace *pid_ns;
struct user_namespace *user_ns;
struct net *net_ns;
+ struct swap_namespace *swap_ns;
};
extern struct nsproxy init_nsproxy;
diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index 1383692..48024db 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -31,6 +31,9 @@ struct anon_vma {
#ifdef CONFIG_MMU
+extern struct anon_vma *page_lock_anon_vma(struct page *page);
+extern void page_unlock_anon_vma(struct anon_vma *anon_vma);
+
extern struct kmem_cache *anon_vma_cachep;
static inline struct anon_vma *anon_vma_alloc(void)
@@ -111,6 +114,9 @@ int page_mkclean(struct page *);
#else /* !CONFIG_MMU */
+#define page_lock_anon_vma(p) (NULL)
+#define page_unlock_anon_vma(p) do {} while (0)
+
#define anon_vma_init() do {} while (0)
#define anon_vma_prepare(vma) (0)
#define anon_vma_link(vma) do {} while (0)
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 90130b7..5d59c2f 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -28,6 +28,7 @@
#define CLONE_NEWPID 0x20000000 /* New pid namespace */
#define CLONE_NEWNET 0x40000000 /* New network namespace */
#define CLONE_IO 0x80000000 /* Clone io context */
+#define CLONE_NEWSWAP 0x0000000100000000ULL
/*
* Scheduling policies
diff --git a/include/linux/swap.h b/include/linux/swap.h
index 0b33776..e0e8f3d 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -7,6 +7,7 @@
#include <linux/list.h>
#include <linux/memcontrol.h>
#include <linux/sched.h>
+#include <linux/swap_namespace.h>
#include <asm/atomic.h>
#include <asm/page.h>
@@ -134,6 +135,7 @@ enum {
*/
struct swap_info_struct {
unsigned int flags;
+ struct swap_namespace *swap_namespace;
int prio; /* swap priority */
struct file *swap_file;
struct block_device *bdev;
@@ -239,7 +241,7 @@ extern struct page *swapin_readahead(swp_entry_t, gfp_t,
extern long total_swap_pages;
extern unsigned int nr_swapfiles;
extern void si_swapinfo(struct sysinfo *);
-extern swp_entry_t get_swap_page(void);
+extern swp_entry_t get_swap_page(struct page *page);
extern swp_entry_t get_swap_page_of_type(int);
extern int swap_duplicate(swp_entry_t);
extern int valid_swaphandles(swp_entry_t, unsigned long *);
@@ -342,7 +344,7 @@ static inline int remove_exclusive_swap_page(struct page *p)
return 0;
}
-static inline swp_entry_t get_swap_page(void)
+static inline swp_entry_t get_swap_page(struct page *page)
{
swp_entry_t entry;
entry.val = 0;
diff --git a/include/linux/swap_namespace.h b/include/linux/swap_namespace.h
new file mode 100644
index 0000000..3839d58
--- /dev/null
+++ b/include/linux/swap_namespace.h
@@ -0,0 +1,68 @@
+#ifndef _LINUX_SWAP_NS_H
+#define _LINUX_SWAP_NS_H
+
+#include <linux/list.h>
+#include <linux/kref.h>
+#include <linux/err.h>
+#include <linux/sched.h>
+#include <asm/page.h>
+
+/*
+ * Practically, we store both type and si because both are useful.
+ * During get_swap_page(), we use type==-1 to decide whether there
+ * is a swapfile.
+ * During swapon/swapoff, we use si==NULL. This is because there
+ * are certain times - especially during swapoff - where we drop the
+ * swap_lock after removing the swapfile, only to find we couldn't
+ * remove the swapfile and need to reactivate it. Using si==NULL
+ * for the check here allows us to prevent a racing swapon and
+ * swapoff during this window.
+ */
+struct swap_namespace {
+ struct kref kref;
+ int type; /* index into swap_list */
+ struct swap_info_struct *si;
+ /*
+ * ns->lock can only be held *under* swap_lock
+ */
+ spinlock_t lock;
+};
+
+#ifdef CONFIG_SWAP_NS
+extern void free_swap_ns(struct kref *kref);
+extern struct swap_namespace *copy_swap_ns(u64 flags,
+ struct swap_namespace *ns);
+extern struct swap_namespace *vma_page_to_swapns(struct page *page);
+#else
+static inline void free_swap_ns(struct kref *kref)
+{
+}
+
+static inline struct swap_namespace *copy_swap_ns(u64 flags,
+ struct swap_namespace *ns)
+{
+ /* no namespaces so we don't copy it, just inc the refcount */
+ if (flags&CLONE_NEWSWAP)
+ return ERR_PTR(-EINVAL);
+ return NULL;
+}
+
+static inline struct swap_namespace *vma_page_to_swapns(struct page *page)
+{
+ return NULL;
+}
+#endif
+
+static inline struct swap_namespace *get_swap_ns(struct swap_namespace *swapns)
+{
+ if (swapns)
+ kref_get(&swapns->kref);
+ return swapns;
+}
+
+static inline void put_swap_ns(struct swap_namespace *swapns)
+{
+ if (swapns)
+ kref_put(&swapns->kref, free_swap_ns);
+}
+#endif
diff --git a/init/Kconfig b/init/Kconfig
index 27b1660..06e3d1e 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -478,6 +478,14 @@ config PID_NS
Unless you want to work with an experimental feature
say N here.
+config SWAP_NS
+ bool "Swap Namespaces (EXPERIMENTAL)"
+ default n
+ depends on SWAP && NAMESPACES && EXPERIMENTAL && MMU
+ select MM_OWNER
+ help
+ give some help.
+
config BLK_DEV_INITRD
bool "Initial RAM filesystem and RAM disk (initramfs/initrd) support"
depends on BROKEN || !FRV
diff --git a/kernel/fork.c b/kernel/fork.c
index 8f26d44..1ccf600 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -994,6 +994,20 @@ static void rt_mutex_init_task(struct task_struct *p)
#endif
}
+/*
+ * tasks in different swap namespaces cannot share any vm.
+ */
+static int check_swapns_clone(u64 flags)
+{
+ if (!(flags & CLONE_NEWSWAP))
+ return 0;
+ if (flags & CLONE_VM)
+ return -EINVAL;
+ if (!(flags & CLONE_NEWIPC))
+ return -EINVAL;
+ return 0;
+}
+
#ifdef CONFIG_MM_OWNER
void mm_init_owner(struct mm_struct *mm, struct task_struct *p)
{
@@ -1041,6 +1055,13 @@ static struct task_struct *copy_process(u64 clone_flags,
if ((clone_flags & CLONE_SIGHAND) && !(clone_flags & CLONE_VM))
return ERR_PTR(-EINVAL);
+ /*
+ * Can't fork into a new swap namespace while sharing vm
+ */
+ retval = check_swapns_clone(clone_flags);
+ if (retval)
+ return ERR_PTR(retval);
+
retval = security_task_create(clone_flags);
if (retval)
goto fork_out;
diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c
index ded928d..fab101e 100644
--- a/kernel/nsproxy.c
+++ b/kernel/nsproxy.c
@@ -22,6 +22,7 @@
#include <linux/pid_namespace.h>
#include <net/net_namespace.h>
#include <linux/ipc_namespace.h>
+#include <linux/swap_namespace.h>
static struct kmem_cache *nsproxy_cachep;
@@ -93,8 +94,17 @@ static struct nsproxy *create_new_namespaces(u64 flags,
goto out_net;
}
+ new_nsp->swap_ns = copy_swap_ns(flags, tsk->nsproxy->swap_ns);
+ if (IS_ERR(new_nsp->swap_ns)) {
+ err = PTR_ERR(new_nsp->swap_ns);
+ goto out_swap;
+ }
+
return new_nsp;
+out_swap:
+ if (new_nsp->net_ns)
+ put_net(new_nsp->net_ns);
out_net:
if (new_nsp->user_ns)
put_user_ns(new_nsp->user_ns);
@@ -131,7 +141,8 @@ int copy_namespaces(u64 flags, struct task_struct *tsk)
get_nsproxy(old_ns);
if (!(flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC |
- CLONE_NEWUSER | CLONE_NEWPID | CLONE_NEWNET)))
+ CLONE_NEWUSER | CLONE_NEWPID | CLONE_NEWNET |
+ CLONE_NEWSWAP)))
return 0;
if (!capable(CAP_SYS_ADMIN)) {
@@ -183,6 +194,7 @@ void free_nsproxy(struct nsproxy *ns)
if (ns->user_ns)
put_user_ns(ns->user_ns);
put_net(ns->net_ns);
+ put_swap_ns(ns->swap_ns);
kmem_cache_free(nsproxy_cachep, ns);
}
diff --git a/mm/Makefile b/mm/Makefile
index 18c143b..40c7224 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -11,7 +11,7 @@ obj-y := bootmem.o filemap.o mempool.o oom_kill.o fadvise.o \
maccess.o page_alloc.o page-writeback.o pdflush.o \
readahead.o swap.o truncate.o vmscan.o \
prio_tree.o util.o mmzone.o vmstat.o backing-dev.o \
- page_isolation.o $(mmu-y)
+ page_isolation.o swap_namespace.o $(mmu-y)
obj-$(CONFIG_PROC_PAGE_MONITOR) += pagewalk.o
obj-$(CONFIG_BOUNCE) += bounce.o
diff --git a/mm/rmap.c b/mm/rmap.c
index 6901c6d..f18528e 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -156,7 +156,7 @@ void __init anon_vma_init(void)
* Getting a lock on a stable anon_vma from a page off the LRU is
* tricky: page_lock_anon_vma rely on RCU to guard against the races.
*/
-static struct anon_vma *page_lock_anon_vma(struct page *page)
+struct anon_vma *page_lock_anon_vma(struct page *page)
{
struct anon_vma *anon_vma;
unsigned long anon_mapping;
@@ -176,7 +176,7 @@ out:
return NULL;
}
-static void page_unlock_anon_vma(struct anon_vma *anon_vma)
+void page_unlock_anon_vma(struct anon_vma *anon_vma)
{
spin_unlock(&anon_vma->lock);
rcu_read_unlock();
diff --git a/mm/shmem.c b/mm/shmem.c
index e2a6ae1..8573094 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1021,7 +1021,7 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc)
* want to check if there's a redundant swappage to be discarded.
*/
if (wbc->for_reclaim)
- swap = get_swap_page();
+ swap = get_swap_page(page);
else
swap.val = 0;
diff --git a/mm/swap_namespace.c b/mm/swap_namespace.c
new file mode 100644
index 0000000..5f5ff3a
--- /dev/null
+++ b/mm/swap_namespace.c
@@ -0,0 +1,84 @@
+/*
+ * Copyright (C) 2008 IBM Corporation
+ *
+ * Author: Serge Hallyn <serue at us.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation, version 2 of the
+ * License.
+ */
+
+#include <linux/module.h>
+#include <linux/swap_namespace.h>
+#include <linux/swap.h>
+#include <linux/version.h>
+#include <linux/err.h>
+#include <linux/sched.h>
+#include <linux/nsproxy.h>
+#include <linux/rmap.h>
+
+#ifdef CONFIG_SWAP_NS
+extern void free_swapinfo_fromnsput(struct swap_info_struct *si);
+
+void free_swap_ns(struct kref *kref)
+{
+ struct swap_namespace *ns;
+
+ ns = container_of(kref, struct swap_namespace, kref);
+ if (ns->si)
+ free_swapinfo_fromnsput(ns->si);
+ kfree(ns);
+}
+
+struct swap_namespace *clone_swapns(struct swap_namespace *old_ns)
+{
+ struct swap_namespace *new_ns;
+
+ new_ns = kmalloc(sizeof(struct swap_namespace), GFP_KERNEL);
+ if (!new_ns)
+ return ERR_PTR(-ENOMEM);
+
+ kref_init(&new_ns->kref);
+ spin_lock_init(&new_ns->lock);
+ new_ns->si = NULL;
+ new_ns->type = -1;
+ return new_ns;
+}
+
+struct swap_namespace *copy_swap_ns(u64 flags, struct swap_namespace *ns)
+{
+ struct swap_namespace *new_ns;
+
+ get_swap_ns(ns);
+
+ if (!(flags & CLONE_NEWSWAP))
+ return ns;
+
+ new_ns = clone_swapns(ns);
+ put_swap_ns(ns);
+ return new_ns;
+}
+
+struct swap_namespace *vma_page_to_swapns(struct page *page)
+{
+ struct swap_namespace *ns = NULL;
+ struct anon_vma *anon_vma;
+ struct vm_area_struct *vma;
+ struct mm_struct *mm;
+ struct task_struct *task;
+
+ anon_vma = page_lock_anon_vma(page);
+ if (!anon_vma)
+ return NULL;
+
+ /* we are under rcu_read_lock from page_lock_anon_vma */
+ vma = list_first_entry(&anon_vma->head, struct vm_area_struct,
+ anon_vma_node);
+ mm = vma->vm_mm;
+ task = rcu_dereference(mm->owner);
+ ns = task_nsproxy(task)->swap_ns;
+ page_unlock_anon_vma(anon_vma);
+ return ns;
+}
+#endif
diff --git a/mm/swap_state.c b/mm/swap_state.c
index d8aadaf..182ef26 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -129,7 +129,7 @@ int add_to_swap(struct page * page, gfp_t gfp_mask)
BUG_ON(!PageUptodate(page));
for (;;) {
- entry = get_swap_page();
+ entry = get_swap_page(page);
if (!entry.val)
return 0;
diff --git a/mm/swapfile.c b/mm/swapfile.c
index bd1bb59..1c302b4 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -28,6 +28,8 @@
#include <linux/capability.h>
#include <linux/syscalls.h>
#include <linux/memcontrol.h>
+#include <linux/nsproxy.h>
+#include <linux/swap_namespace.h>
#include <asm/pgtable.h>
#include <asm/tlbflush.h>
@@ -172,12 +174,46 @@ no_page:
return 0;
}
-swp_entry_t get_swap_page(void)
+swp_entry_t swap_page_in_swapfile(int type)
+{
+ struct swap_info_struct *si = swap_info + type;
+ pgoff_t offset;
+
+ if (type == -1)
+ goto noswap;
+
+ spin_lock(&swap_lock);
+ if (si->pages - si->inuse_pages < 1)
+ goto noswap;
+ if (!si->highest_bit)
+ goto noswap;
+ if (!(si->flags & SWP_WRITEOK))
+ goto noswap;
+ nr_swap_pages--;
+ offset = scan_swap_map(si);
+ if (offset) {
+ spin_unlock(&swap_lock);
+ return swp_entry(type, offset);
+ }
+
+ nr_swap_pages++;
+noswap:
+ spin_unlock(&swap_lock);
+ return (swp_entry_t) {0};
+}
+
+swp_entry_t get_swap_page(struct page *page)
{
struct swap_info_struct *si;
pgoff_t offset;
int type, next;
int wrapped = 0;
+ struct swap_namespace *swap_namespace;
+
+ swap_namespace = vma_page_to_swapns(page);
+ if (swap_namespace) {
+ return swap_page_in_swapfile(swap_namespace->type);
+ }
spin_lock(&swap_lock);
if (nr_swap_pages <= 0)
@@ -193,6 +229,8 @@ swp_entry_t get_swap_page(void)
wrapped++;
}
+ if (si->swap_namespace)
+ continue;
if (!si->highest_bit)
continue;
if (!(si->flags & SWP_WRITEOK))
@@ -1203,6 +1241,102 @@ int page_queue_congested(struct page *page)
}
#endif
+#ifdef CONFIG_SWAP_NS
+static void unlink_si_locked(struct swap_info_struct *si, int type, int prev)
+{
+ if (prev < 0)
+ swap_list.head = si->next;
+ else
+ swap_info[prev].next = si->next;
+ if (type == swap_list.next) {
+ /* just pick something that's safe... */
+ swap_list.next = swap_list.head;
+ }
+ nr_swap_pages -= si->pages;
+ total_swap_pages -= si->pages;
+ si->flags &= ~SWP_WRITEOK;
+}
+
+static int find_and_unlink_si_locked(struct swap_info_struct *si)
+{
+ int prev = -1, type;
+ struct swap_info_struct *tmp;
+
+ for (type = swap_list.head; type >= 0; type = swap_info[type].next) {
+ tmp = swap_info+type;
+ if (tmp == si)
+ break;
+ prev = type;
+ }
+ if (type < 0)
+ return 0;
+
+ unlink_si_locked(si, type, prev);
+
+ return 1;
+}
+
+static struct file *find_and_zero_si_locked(struct swap_info_struct *si)
+{
+ struct file *swap_file;
+
+ if (!find_and_unlink_si_locked(si))
+ return NULL;
+
+ swap_file = si->swap_file;
+ si->swap_file = NULL;
+ si->max = 0;
+ si->swap_map = NULL;
+ si->flags = 0;
+
+ return swap_file;
+}
+
+static void clear_swap_file(struct swap_info_struct *si, struct file *swap_file)
+{
+ struct inode *inode;
+ struct address_space *mapping = swap_file->f_mapping;
+
+ inode = mapping->host;
+ if (S_ISBLK(inode->i_mode)) {
+ struct block_device *bdev = I_BDEV(inode);
+ set_blocksize(bdev, si->old_block_size);
+ bd_release(bdev);
+ } else {
+ mutex_lock(&inode->i_mutex);
+ inode->i_flags &= ~S_SWAPFILE;
+ mutex_unlock(&inode->i_mutex);
+ }
+ filp_close(swap_file, NULL);
+}
+/*
+ * free_swapinfo_fromnsput
+ * Called when the last task in a swap ns exits, puts the
+ * swapinfo, and no other swap namespaces had references
+ * to the swapinfo.
+ * Since we were freeing the swap_ns, we're not holding the
+ * ns->lock, so it's safe to grab swap_lock here.
+ */
+void free_swapinfo_fromnsput(struct swap_info_struct *si)
+{
+ unsigned short *swap_map;
+ struct file *swap_file;
+
+ mutex_lock(&swapon_mutex);
+ spin_lock(&swap_lock);
+ swap_map = si->swap_map;
+ swap_file = find_and_zero_si_locked(si);
+ spin_unlock(&swap_lock);
+ mutex_unlock(&swapon_mutex);
+
+ if (!swap_file)
+ return;
+
+ vfree(swap_map);
+ clear_swap_file(si, swap_file);
+}
+#endif
+
asmlinkage long sys_swapoff(const char __user * specialfile)
{
struct swap_info_struct * p = NULL;
@@ -1213,6 +1347,7 @@ asmlinkage long sys_swapoff(const char __user * specialfile)
char * pathname;
int i, type, prev;
int err;
+ struct swap_namespace *swap_namespace = current->nsproxy->swap_ns;
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
@@ -1244,6 +1379,11 @@ asmlinkage long sys_swapoff(const char __user * specialfile)
spin_unlock(&swap_lock);
goto out_dput;
}
+ if (swap_namespace && type != swap_namespace->type) {
+ err = -EPERM;
+ spin_unlock(&swap_lock);
+ goto out_dput;
+ }
if (!security_vm_enough_memory(p->pages))
vm_unacct_memory(p->pages);
else {
@@ -1263,6 +1403,10 @@ asmlinkage long sys_swapoff(const char __user * specialfile)
nr_swap_pages -= p->pages;
total_swap_pages -= p->pages;
p->flags &= ~SWP_WRITEOK;
+ if (swap_namespace) {
+ swap_namespace->si = NULL;
+ swap_namespace->type = -1;
+ }
spin_unlock(&swap_lock);
current->flags |= PF_SWAPOFF;
@@ -1282,6 +1426,10 @@ asmlinkage long sys_swapoff(const char __user * specialfile)
swap_info[prev].next = p - swap_info;
nr_swap_pages += p->pages;
total_swap_pages += p->pages;
+ if (swap_namespace) {
+ swap_namespace->si = p;
+ swap_namespace->type = type;
+ }
p->flags |= SWP_WRITEOK;
spin_unlock(&swap_lock);
goto out_dput;
@@ -1339,6 +1487,7 @@ static void *swap_start(struct seq_file *swap, loff_t *pos)
struct swap_info_struct *ptr = swap_info;
int i;
loff_t l = *pos;
+ struct swap_namespace *swap_namespace = current->nsproxy->swap_ns;
mutex_lock(&swapon_mutex);
@@ -1348,6 +1497,10 @@ static void *swap_start(struct seq_file *swap, loff_t *pos)
for (i = 0; i < nr_swapfiles; i++, ptr++) {
if (!(ptr->flags & SWP_USED) || !ptr->swap_map)
continue;
+ if (swap_namespace && ptr->swap_namespace != swap_namespace)
+ continue;
+ if (!swap_namespace && ptr->swap_namespace)
+ continue;
if (!--l)
return ptr;
}
@@ -1359,6 +1512,7 @@ static void *swap_next(struct seq_file *swap, void *v, loff_t *pos)
{
struct swap_info_struct *ptr;
struct swap_info_struct *endptr = swap_info + nr_swapfiles;
+ struct swap_namespace *swap_namespace = current->nsproxy->swap_ns;
if (v == SEQ_START_TOKEN)
ptr = swap_info;
@@ -1370,6 +1524,10 @@ static void *swap_next(struct seq_file *swap, void *v, loff_t *pos)
for (; ptr < endptr; ptr++) {
if (!(ptr->flags & SWP_USED) || !ptr->swap_map)
continue;
+ if (swap_namespace && ptr->swap_namespace != swap_namespace)
+ continue;
+ if (!swap_namespace && ptr->swap_namespace)
+ continue;
++*pos;
return ptr;
}
@@ -1459,10 +1617,17 @@ asmlinkage long sys_swapon(const char __user * specialfile, int swap_flags)
struct page *page = NULL;
struct inode *inode = NULL;
int did_down = 0;
+ struct swap_namespace *swap_namespace = current->nsproxy->swap_ns;
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
spin_lock(&swap_lock);
+ if (swap_namespace && swap_namespace->si) {
+ /* only one swapfile for non-init swap_namespaces */
+ error = -EPERM;
+ spin_unlock(&swap_lock);
+ goto out;
+ }
p = swap_info;
for (type = 0 ; type < nr_swapfiles ; type++,p++)
if (!(p->flags & SWP_USED))
@@ -1490,6 +1655,8 @@ asmlinkage long sys_swapon(const char __user * specialfile, int swap_flags)
} else {
p->prio = --least_priority;
}
+ if (swap_namespace)
+ swap_namespace->si = p;
spin_unlock(&swap_lock);
name = getname(specialfile);
error = PTR_ERR(name);
@@ -1672,6 +1839,9 @@ asmlinkage long sys_swapon(const char __user * specialfile, int swap_flags)
mutex_lock(&swapon_mutex);
spin_lock(&swap_lock);
+ p->swap_namespace = swap_namespace;
+ if (swap_namespace)
+ swap_namespace->type = type;
p->flags = SWP_ACTIVE;
nr_swap_pages += nr_good_pages;
total_swap_pages += nr_good_pages;
@@ -1709,6 +1879,8 @@ bad_swap_2:
spin_lock(&swap_lock);
swap_map = p->swap_map;
p->swap_file = NULL;
+ if (swap_namespace)
+ swap_namespace->si = NULL;
p->swap_map = NULL;
p->flags = 0;
if (!(swap_flags & SWAP_FLAG_PREFER))
--
1.5.3.6
More information about the Containers
mailing list