[PATCH 2/5] /dev/vring: simple userspace-kernel ringbuffer interface.

Andrew Morton akpm at linux-foundation.org
Fri Apr 18 04:18:46 PDT 2008


On Fri, 18 Apr 2008 14:39:48 +1000 Rusty Russell <rusty at rustcorp.com.au> wrote:

> virtio introduced a ring structure ABI for guest-host communications
> (currently used by lguest and kvm).  Using this same ABI, we can
> create a nice fd version.
> 
> This is useful for efficiently passing packets to and from the tun,
> for example.
> 
> ...
>
> +static int vring_mmap(struct file *filp, struct vm_area_struct *vma)
> +{
> +	unsigned long size, num_descs;
> +	struct vring_info *vr = filp->private_data;
> +	int err;
> +
> +	/* We overload mmap's offset to hold the ring number. */
> +	num_descs = vma->vm_pgoff;
> +
> +	/* Must be a power of two, and limit indices to a u16. */
> +	if (!num_descs || (num_descs & (num_descs-1)) || num_descs > 65536)

We have an is_power_of_2().

> +		return -EINVAL;
> +
> +	/* mmap size must be what we expect for such a ring. */
> +	size = vma->vm_end - vma->vm_start;
> +	if (size != ALIGN(vring_size(num_descs, PAGE_SIZE), PAGE_SIZE))
> +		return -EINVAL;
> +
> +	/* We only let them map this in one place. */
> +	mutex_lock(&vr->lock);
> +	if (vr->ring.num != 0) {
> +		err = -EBUSY;
> +		goto unlock;
> +	}
> +
> +	vring_init(&vr->ring, num_descs, (void *)vma->vm_start, PAGE_SIZE);
> +
> +	vr->mask = num_descs - 1;
> +	err = 0;
> +
> +unlock:
> +	mutex_unlock(&vr->lock);
> +	return err;
> +}
>
> ...
>
> +/**
> + * vring_get - check out a vring file descriptor
> + * @filp: the file structure to attach to (eg. from fget()).
> + *
> + * Userspace opens /dev/vring and mmaps it, then hands that fd to the
> + * kernel subsystem it wants to communicate with.  That subsystem uses
> + * this routine and vring_set_ops() to attach to it.
> + *
> + * This simply checks that it really is a vring fd (otherwise it
> + * returns NULL), the other routine checks that it's not already
> + * attached.
> + */

hm, I don't understand the big picture here yet.

Isn't this kinda-sorta like what a relayfs file does?  The oprofile
buffers?  etc?  Nothing in common at all, no hope?

> +struct vring_info *vring_get(struct file *filp)
> +{
> +	/* Must be one of ours. */
> +	if (filp->f_op != &vring_fops)
> +		return NULL;
> +
> +	return filp->private_data;
> +}
> +EXPORT_SYMBOL_GPL(vring_get);
> +
> +/**
> + * vring_set_ops - attach operations to a vring file descriptor.
> + * @vr: the vring_info returned from vring_get.
> + * @ops: the operations to attach.
> + * @ops_data: the argument to the ops callbacks.
> + *
> + * This is called after vring_get(): the reason for the two-part
> + * process is that the ops can be called before vring_set_ops returns
> + * (we don't do locking), so you really need to set things up before
> + * this call.
> + *
> + * This simply checks that the ring is not already attached to something,
> + * then sets the ops.
> + */
> +int vring_set_ops(struct vring_info *vr,
> +		  const struct vring_ops *ops, void *ops_data)
> +{
> +	int err;
> +
> +	mutex_lock(&vr->lock);
> +	if (vr->ops) {
> +		err = -EBUSY;
> +		goto unlock;
> +	}
> +
> +	/* We don't lock, so make sure we get this in the right order. */
> +	vr->ops_data = ops_data;
> +	wmb();
> +	vr->ops = ops;
> +
> +	err = 0;
> +unlock:
> +	mutex_unlock(&vr->lock);
> +	local_irq_enable();

what's this doing here?

> +	return err;
> +}
> +EXPORT_SYMBOL_GPL(vring_set_ops);
> +
> +/**
> + * vring_unset_ops - remove operations to a vring file descriptor.
> + * @vr: the vring_info previously successfully vring_set_ops'd
> + */
> +void vring_unset_ops(struct vring_info *vr)
> +{
> +	BUG_ON(!vr->ops);
> +	mutex_lock(&vr->lock);
> +	vr->ops = NULL;
> +	mutex_unlock(&vr->lock);
> +}
> +EXPORT_SYMBOL_GPL(vring_unset_ops);

Isn't this just vring_set_ops(vr, NULL, NULL)?

> +static struct miscdevice vring_dev = {
> +	.minor = MISC_DYNAMIC_MINOR,
> +	.name = KBUILD_MODNAME,
> +	.fops = &vring_fops,
> +};
> +
> +static int __init init(void)
> +{
> +	return misc_register(&vring_dev);
> +}
> +
> +static void __exit fini(void)
> +{
> +	misc_deregister(&vring_dev);
> +}
> +
> +module_init(init);
> +module_exit(fini);
> diff -r b2d9869d338f include/linux/vring.h
> --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
> +++ b/include/linux/vring.h	Fri Apr 18 13:35:16 2008 +1000
> @@ -0,0 +1,58 @@
> +/* Ring-buffer file descriptor implementation.
> + *
> + *  Copyright 2008 Rusty Russell IBM Corporation
> + *
> + *  This program is free software; you can redistribute it and/or modify
> + *  it under the terms of the GNU General Public License as published by
> + *  the Free Software Foundation; either version 2 of the License, or
> + *  (at your option) any later version.
> + *
> + *  This program is distributed in the hope that it will be useful,
> + *  but WITHOUT ANY WARRANTY; without even the implied warranty of
> + *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + *  GNU General Public License for more details.
> + *
> + *  You should have received a copy of the GNU General Public License
> + *  along with this program; if not, write to the Free Software
> + *  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
> + */

ponders #include <copyright.h>

> +#ifndef _LINUX_VRING_H
> +#define _LINUX_VRING_H
> +
> +/**
> + * vring_ops - operations for a vring fd.
> + * @needs_pull: more data is pending, need to call pull.
> + * @pull: callback when read() is called to report used buffers.
> + * @push: callback when write() is called to notify of added buffers.
> + *
> + * Any of these callbacks can be NULL, if you don't need them.
> + */
> +struct vring_ops {
> +	bool (*needs_pull)(void *ops_data);
> +
> +	/* Returns 0 or negative errno. */
> +	int (*pull)(void *ops_data);
> +
> +	/* Returns 0 or negative errno. */
> +	int (*push)(void *ops_data);
> +};
> +
> +struct file;
> +
> +struct vring_info *vring_get(struct file *filp);
> +int vring_set_ops(struct vring_info *,
> +		  const struct vring_ops *ops, void *ops_data);

the first arg to vring_set_ops() lost its name.

> +void vring_unset_ops(struct vring_info *vr);
> +struct iovec;
> +
> +/* Returns an error, or 0 (no buffers), or an id for vring_used_buffer() */
> +int vring_get_buffer(struct vring_info *vr,
> +		     struct iovec *in_iov,
> +		     unsigned int *num_in, unsigned long *in_len,
> +		     struct iovec *out_iov,
> +		     unsigned int *num_out, unsigned long *out_len);
> +
> +void vring_used_buffer(struct vring_info *vr, int id, u32 len);
> +
> +void vring_wake(struct vring_info *vr);
> +#endif /* _LINUX_VRING_H */




More information about the Virtualization mailing list