[PATCH] SCSI driver for VMware's virtual HBA - V4.

Anthony Liguori anthony at codemonkey.ws
Wed Sep 9 14:00:42 PDT 2009


Hi Alok,

Joining this a bit late as this was just brought to my attention.  It 
would have been nice to CC the virtualization mailing list.  Please do 
in future submissions.

Alok Kataria wrote:
> VMware PVSCSI driver - v4.
> 
/
>  
> diff --git a/drivers/scsi/pvscsi.h b/drivers/scsi/pvscsi.h
> new file mode 100644
> index 0000000..96bb655
> --- /dev/null
> +++ b/drivers/scsi/pvscsi.h
> @@ -0,0 +1,395 @@
> +/*
> + * VMware PVSCSI header file
> + *
> + * Copyright (C) 2008-2009, VMware, Inc. All Rights Reserved.
> + *
> + * This program is free software; you can redistribute it and/or modify it
> + * under the terms of the GNU General Public License as published by the
> + * Free Software Foundation; version 2 of the License and no later version.
> + *
> + * This program is distributed in the hope that it will be useful, but
> + * WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
> + * NON INFRINGEMENT.  See the GNU General Public License for more
> + * details.
> + *
> + * You should have received a copy of the GNU General Public License
> + * along with this program; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
> + *
> + * Maintained by: Alok N Kataria <akataria at vmware.com>
> + *
> + */
> +
> +#ifndef _PVSCSI_H_
> +#define _PVSCSI_H_
> +
> +#include <linux/types.h>
> +
> +#define PVSCSI_DRIVER_VERSION_STRING   "1.0.0.0"
> +
> +#define PVSCSI_MAX_NUM_SG_ENTRIES_PER_SEGMENT 128
> +
> +#define MASK(n)        ((1 << (n)) - 1)        /* make an n-bit mask */
> +
> +#define PCI_VENDOR_ID_VMWARE		0x15AD
> +#define PCI_DEVICE_ID_VMWARE_PVSCSI	0x07C0
> +
> +/*
> + * host adapter status/error codes
> + */
> +typedef enum {
> +   BTSTAT_SUCCESS       = 0x00,  /* CCB complete normally with no errors */
> +   BTSTAT_LINKED_COMMAND_COMPLETED           = 0x0a,
> +   BTSTAT_LINKED_COMMAND_COMPLETED_WITH_FLAG = 0x0b,
> +   BTSTAT_DATA_UNDERRUN = 0x0c,
> +   BTSTAT_SELTIMEO      = 0x11,  /* SCSI selection timeout */
> +   BTSTAT_DATARUN       = 0x12,  /* data overrun/underrun */
> +   BTSTAT_BUSFREE       = 0x13,  /* unexpected bus free */
> +   BTSTAT_INVPHASE      = 0x14,  /* invalid bus phase or sequence requested by target */
> +   BTSTAT_LUNMISMATCH   = 0x17,  /* linked CCB has different LUN from first CCB */
> +   BTSTAT_SENSFAILED    = 0x1b,  /* auto request sense failed */
> +   BTSTAT_TAGREJECT     = 0x1c,  /* SCSI II tagged queueing message rejected by target */
> +   BTSTAT_BADMSG        = 0x1d,  /* unsupported message received by the host adapter */
> +   BTSTAT_HAHARDWARE    = 0x20,  /* host adapter hardware failed */
> +   BTSTAT_NORESPONSE    = 0x21,  /* target did not respond to SCSI ATN, sent a SCSI RST */
> +   BTSTAT_SENTRST       = 0x22,  /* host adapter asserted a SCSI RST */
> +   BTSTAT_RECVRST       = 0x23,  /* other SCSI devices asserted a SCSI RST */
> +   BTSTAT_DISCONNECT    = 0x24,  /* target device reconnected improperly (w/o tag) */
> +   BTSTAT_BUSRESET      = 0x25,  /* host adapter issued BUS device reset */
> +   BTSTAT_ABORTQUEUE    = 0x26,  /* abort queue generated */
> +   BTSTAT_HASOFTWARE    = 0x27,  /* host adapter software error */
> +   BTSTAT_HATIMEOUT     = 0x30,  /* host adapter hardware timeout error */
> +   BTSTAT_SCSIPARITY    = 0x34,  /* SCSI parity error detected */
> +} HostBusAdapterStatus;
> +
> +/*
> + * Register offsets.
> + *
> + * These registers are accessible both via i/o space and mm i/o.
> + */
> +
> +enum PVSCSIRegOffset {
> +	PVSCSI_REG_OFFSET_COMMAND        =    0x0,
> +	PVSCSI_REG_OFFSET_COMMAND_DATA   =    0x4,
> +	PVSCSI_REG_OFFSET_COMMAND_STATUS =    0x8,
> +	PVSCSI_REG_OFFSET_LAST_STS_0     =  0x100,
> +	PVSCSI_REG_OFFSET_LAST_STS_1     =  0x104,
> +	PVSCSI_REG_OFFSET_LAST_STS_2     =  0x108,
> +	PVSCSI_REG_OFFSET_LAST_STS_3     =  0x10c,
> +	PVSCSI_REG_OFFSET_INTR_STATUS    = 0x100c,
> +	PVSCSI_REG_OFFSET_INTR_MASK      = 0x2010,
> +	PVSCSI_REG_OFFSET_KICK_NON_RW_IO = 0x3014,
> +	PVSCSI_REG_OFFSET_DEBUG          = 0x3018,
> +	PVSCSI_REG_OFFSET_KICK_RW_IO     = 0x4018,
> +};
> +
> +/*
> + * Virtual h/w commands.
> + */
> +
> +enum PVSCSICommands {
> +	PVSCSI_CMD_FIRST             = 0, /* has to be first */
> +
> +	PVSCSI_CMD_ADAPTER_RESET     = 1,
> +	PVSCSI_CMD_ISSUE_SCSI        = 2,
> +	PVSCSI_CMD_SETUP_RINGS       = 3,
> +	PVSCSI_CMD_RESET_BUS         = 4,
> +	PVSCSI_CMD_RESET_DEVICE      = 5,
> +	PVSCSI_CMD_ABORT_CMD         = 6,
> +	PVSCSI_CMD_CONFIG            = 7,
> +	PVSCSI_CMD_SETUP_MSG_RING    = 8,
> +	PVSCSI_CMD_DEVICE_UNPLUG     = 9,
> +
> +	PVSCSI_CMD_LAST              = 10  /* has to be last */
> +};
> +
> +/*
> + * Command descriptor for PVSCSI_CMD_RESET_DEVICE --
> + */
> +
> +typedef struct PVSCSICmdDescResetDevice {
> +	u32	target;
> +	u8	lun[8];
> +} __packed PVSCSICmdDescResetDevice;
> +
> +/*
> + * Command descriptor for PVSCSI_CMD_ABORT_CMD --
> + *
> + * - currently does not support specifying the LUN.
> + * - _pad should be 0.
> + */
> +
> +typedef struct PVSCSICmdDescAbortCmd {
> +	u64	context;
> +	u32	target;
> +	u32	_pad;
> +} __packed PVSCSICmdDescAbortCmd;
> +
> +/*
> + * Command descriptor for PVSCSI_CMD_SETUP_RINGS --
> + *
> + * Notes:
> + * - reqRingNumPages and cmpRingNumPages need to be power of two.
> + * - reqRingNumPages and cmpRingNumPages need to be different from 0,
> + * - reqRingNumPages and cmpRingNumPages need to be inferior to
> + *   PVSCSI_SETUP_RINGS_MAX_NUM_PAGES.
> + */
> +
> +#define PVSCSI_SETUP_RINGS_MAX_NUM_PAGES        32
> +typedef struct PVSCSICmdDescSetupRings {
> +	u32	reqRingNumPages;
> +	u32	cmpRingNumPages;
> +	u64	ringsStatePPN;
> +	u64	reqRingPPNs[PVSCSI_SETUP_RINGS_MAX_NUM_PAGES];
> +	u64	cmpRingPPNs[PVSCSI_SETUP_RINGS_MAX_NUM_PAGES];
> +} __packed PVSCSICmdDescSetupRings;
> +
> +/*
> + * Command descriptor for PVSCSI_CMD_SETUP_MSG_RING --
> + *
> + * Notes:
> + * - this command was not supported in the initial revision of the h/w
> + *   interface. Before using it, you need to check that it is supported by
> + *   writing PVSCSI_CMD_SETUP_MSG_RING to the 'command' register, then
> + *   immediately after read the 'command status' register:
> + *       * a value of -1 means that the cmd is NOT supported,
> + *       * a value != -1 means that the cmd IS supported.
> + *   If it's supported the 'command status' register should return:
> + *      sizeof(PVSCSICmdDescSetupMsgRing) / sizeof(u32).
> + * - this command should be issued _after_ the usual SETUP_RINGS so that the
> + *   RingsState page is already setup. If not, the command is a nop.
> + * - numPages needs to be a power of two,
> + * - numPages needs to be different from 0,
> + * - _pad should be zero.
> + */
> +
> +#define PVSCSI_SETUP_MSG_RING_MAX_NUM_PAGES  16
> +
> +typedef struct PVSCSICmdDescSetupMsgRing {
> +	u32	numPages;
> +	u32	_pad;
> +	u64	ringPPNs[PVSCSI_SETUP_MSG_RING_MAX_NUM_PAGES];
> +} __packed PVSCSICmdDescSetupMsgRing;
> +
> +enum PVSCSIMsgType {
> +	PVSCSI_MSG_DEV_ADDED          = 0,
> +	PVSCSI_MSG_DEV_REMOVED        = 1,
> +	PVSCSI_MSG_LAST               = 2,
> +};
> +
> +/*
> + * Msg descriptor.
> + *
> + * sizeof(struct PVSCSIRingMsgDesc) == 128.
> + *
> + * - type is of type enum PVSCSIMsgType.
> + * - the content of args depend on the type of event being delivered.
> + */
> +
> +typedef struct PVSCSIRingMsgDesc {
> +	u32	type;
> +	u32	args[31];
> +} __packed PVSCSIRingMsgDesc;
> +
> +typedef struct PVSCSIMsgDescDevStatusChanged {
> +	u32	type;  /* PVSCSI_MSG_DEV _ADDED / _REMOVED */
> +	u32	bus;
> +	u32	target;
> +	u8	lun[8];
> +	u32	pad[27];
> +} __packed PVSCSIMsgDescDevStatusChanged;
> +
> +/*
> + * Rings state.
> + *
> + * - the fields:
> + *    . msgProdIdx,
> + *    . msgConsIdx,
> + *    . msgNumEntriesLog2,
> + *   .. are only used once the SETUP_MSG_RING cmd has been issued.
> + * - '_pad' helps to ensure that the msg related fields are on their own
> + *   cache-line.
> + */
> +
> +typedef struct PVSCSIRingsState {
> +	u32	reqProdIdx;
> +	u32	reqConsIdx;
> +	u32	reqNumEntriesLog2;
> +
> +	u32	cmpProdIdx;
> +	u32	cmpConsIdx;
> +	u32	cmpNumEntriesLog2;
> +
> +	u8	_pad[104];
> +
> +	u32	msgProdIdx;
> +	u32	msgConsIdx;
> +	u32	msgNumEntriesLog2;
> +} __packed PVSCSIRingsState;

All of this can be hidden behind a struct virtqueue.  You could then 
introduce a virtio-vmwring that implemented this ABI.

You could then separate out the actual scsi logic into a separate 
virtio-scsi.c driver.

There are numerous advantages to this layering.  You get to remain ABI 
compatible with yourself.  You can potentially reuse the ring logic in 
other drivers.  Other VMMs can use different ring transports without 
introducing new drivers.  For instance, in KVM, we would use virtio-pci 
+ virtio-scsi instead of using virtio-vmwring + virtio-scsi.

The virtio infrastructure has been backported to various old kernels too 
so there really is no reason not to use it.

We have the interfaces in virtio to do notification suppression and MSI 
so I don't think there's any real limitation that it would impose on you.

Regards,

Anthony Liguori


More information about the Virtualization mailing list