[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH 2/4] (Refactored) provide vhd support to blktap



[PATCH 2/4] (Refactored) provide vhd support to blktap
tapdisk-vhd-support.patch
Provides integration of vdisk library, and implementation of block-vhd
into blktap
Signed-off-by: Boris Ostrovsky <bostrovsky@xxxxxxxxxxxxxxx>
Signed-off-by: Ben Guthro <bguthro@xxxxxxxxxxxxxxx>

diff -r dff7e92bf3e9 tools/blktap/drivers/Makefile
--- a/tools/blktap/drivers/Makefile     Thu Jun 21 13:04:38 2007 -0400
+++ b/tools/blktap/drivers/Makefile     Thu Jun 21 13:04:41 2007 -0400
@@ -7,12 +7,14 @@ QCOW_UTIL    = img2qcow qcow2raw qcow-cr
 QCOW_UTIL    = img2qcow qcow2raw qcow-create
 INST_DIR     = /usr/sbin
 LIBAIO_DIR   = ../../libaio/src
+LIBVDISK_DIR = ../../vdisk
+LIBSDIR      = lib64
 
 CFLAGS   += -Werror
 CFLAGS   += -Wno-unused
 CFLAGS   += -fno-strict-aliasing
 CFLAGS   += -I $(XEN_LIBXC) -I $(LIBAIO_DIR)
-CFLAGS   += $(INCLUDES) -I. -I../../xenstore 
+CFLAGS   += $(INCLUDES) -I. -I../../xenstore -I$(LIBVDISK_DIR)
 CFLAGS   += -D_GNU_SOURCE
 
 # Get gcc to generate the dependencies for us.
@@ -21,10 +23,10 @@ DEPS      = .*.d
 
 THREADLIB := -lpthread -lz
 LIBS      := -L. -L.. -L../lib
-LIBS      += -L$(XEN_LIBXC)
+LIBS      += -L$(XEN_LIBXC) -L$(LIBVDISK_DIR) -L$(LIBAIO_DIR)
 LIBS      += -lblktap -lxenctrl
 LIBS      += -lcrypto
-LIBS      += -lz
+LIBS      += -lz -lvdisk -laio
 LIBS      += -L$(XEN_XENSTORE) -lxenstore
 
 AIOLIBS   := $(LIBAIO_DIR)/libaio.a
@@ -34,15 +36,16 @@ BLK-OBJS  += block-vmdk.o
 BLK-OBJS  += block-vmdk.o
 BLK-OBJS  += block-ram.o
 BLK-OBJS  += block-qcow.o
+BLK-OBJS  += block-vhd.o
 BLK-OBJS  += aes.o
 BLK-OBJS  += tapaio.o
 
 all: $(IBIN) qcow-util
 
-blktapctrl: blktapctrl.c
+blktapctrl: blktapctrl.c tapdisk.h
        $(CC) $(CFLAGS) -o blktapctrl $(LIBS) blktapctrl.c
 
-tapdisk: $(BLK-OBJS) tapdisk.c
+tapdisk: $(BLK-OBJS) tapdisk.c tapdisk.h
        $(CC) $(CFLAGS) -o tapdisk $(BLK-OBJS) tapdisk.c \
                $(AIOLIBS) $(LIBS)
 
diff -r dff7e92bf3e9 tools/blktap/drivers/block-vhd.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/blktap/drivers/block-vhd.c  Thu Jun 21 13:04:51 2007 -0400
@@ -0,0 +1,344 @@
+// Copyright (c) 2003-2007, Virtual Iron Software, Inc.
+//
+// Portions have been modified by Virtual Iron Software, Inc.
+// (c) 2007. This file and the modifications can be redistributed and/or
+// modified under the terms and conditions of the GNU General Public
+// License, version 2.1 and not any later version of the GPL, as published
+// by the Free Software Foundation.
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <linux/stddef.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <time.h>
+#include <string.h>
+#include <dlfcn.h>
+#include "tapdisk.h"
+#include <vdisk.h>
+
+#define MAX_AIO_REQS (MAX_REQUESTS * MAX_SEGMENTS_PER_REQ)
+
+struct tdvhd_state {
+       vdisk_dev_t *vdisk;
+       int poll_pipe[2]; /* dummy fd for polling on */
+       int fd;
+};
+
+typedef struct td_cbinfo {
+       struct td_state *s;
+       td_callback_t cb;
+       struct vdisk_dev *vdisk;
+       void *private;
+       int id; 
+       int cnt;
+} td_cbinfo_t;
+
+static int
+tdvhd_queue_rw(struct disk_driver *dd, uint64_t sector,
+              int nb_sectors, char *buf, td_callback_t cb,
+              int id, void *private, int op)
+{
+       struct tdvhd_state *prv = (struct tdvhd_state *)dd->private;
+       int res = 0;
+       int aio_cnt = prv->vdisk->aio_cnt;
+       td_cbinfo_t *cbi;
+
+       cbi = malloc(sizeof(td_cbinfo_t));
+       if (cbi == NULL) {
+               VIDDBG(0, "Can't allocate callback info\n");
+               return (-ENOMEM);
+       }
+       
+       cbi->s = dd->td_state;
+       cbi->cb = cb;
+       cbi->id = id;
+       cbi->vdisk = prv->vdisk;
+       cbi->private = private;
+
+       res = vdisk_rw(prv->vdisk, sector, (uint8_t *)buf, 
+                      nb_sectors, op, (void *)cbi);
+       if (res == -EBUSY) {
+               blkif_t *blkif = cbi->s->blkif;
+
+               /*
+                * This really should be done in tapdisk.c.
+                * However, we since we don't want to touch it,
+                * we do it here.
+                */
+               blkif->pending_list[id].secs_pending -= nb_sectors;
+
+               free(cbi);
+       }
+
+       // Didn't use async IO
+       if (res > 0) {
+               cbi->cb(dd, 0/*XXX: pass error*/, 
+                       sector, res>>9,
+                       cbi->id, cbi->private);
+       }
+
+       // How many AIOs have been created for this request
+       cbi->cnt = prv->vdisk->aio_cnt - aio_cnt;
+
+       return (res);
+
+}
+
+int tdvhd_queue_read(struct disk_driver *dd, uint64_t sector,
+                    int nb_sectors, char *buf, td_callback_t cb,
+                    int id, void *private)
+{
+       return (tdvhd_queue_rw(dd, sector, nb_sectors, buf,
+                              cb, id, private, VDISK_READ));
+}
+
+int tdvhd_queue_write(struct disk_driver *dd, uint64_t sector,
+                     int nb_sectors, char *buf, td_callback_t cb,
+                     int id, void *private)
+{
+       return (tdvhd_queue_rw(dd, sector, nb_sectors, buf,
+                              cb, id, private, VDISK_WRITE));
+}
+
+int tdvhd_close(struct disk_driver *dd)
+{
+       struct tdvhd_state *prv;
+
+       // We can be called more than once
+       if (dd == NULL)
+               return (0);
+       
+       prv = (struct tdvhd_state *)dd->private;        
+       if ((prv == NULL) || (prv->vdisk == NULL))
+               return (0); // XXX: Or error?
+
+       vdisk_fini(prv->vdisk);
+
+       free(prv->vdisk);
+       prv->vdisk = NULL;
+
+       close(prv->poll_pipe[0]);
+       close(prv->poll_pipe[1]);
+
+       return 0;
+}
+
+static void tdvhd_get_fd(struct disk_driver *dd)
+{
+       struct tdvhd_state *prv = (struct tdvhd_state *)dd->private;
+       vdisk_dev_t *vdisk = (vdisk_dev_t *)prv->vdisk;
+       int i;
+
+       /*initialise the FD array*/
+       for(i=0;i<MAX_IOFD;i++)
+               dd->io_fd[i] = 0;
+
+       dd->io_fd[0] = vdisk->aio_fd;
+}
+
+/* Open the disk file and initialize aio state. */
+int tdvhd_open (struct disk_driver *dd, const char *filename, td_flag_t flags)
+{
+       int i, fd, ret = 0;
+       struct tdvhd_state *prv = (struct tdvhd_state *)dd->private;
+       int heads, secs, cyls;
+       struct program_props props;
+       
+       prv->vdisk = malloc(sizeof(struct vdisk_dev));
+       if (prv->vdisk == NULL) {
+               VIDDBG(0, "Can't allocate memory for vdisk\n");
+               return (-ENOMEM);
+       }
+
+       prv->vdisk->use_aio = 1;
+
+       props.alloc_func = NULL;
+       props.free_func = NULL;
+       props.out_target = VDISK_OUT_SYSLOG;
+       ret = vdisk_init(prv->vdisk, (char *)filename, &props, 0);
+       if (ret) {
+               VIDDBG(0, "Can't initialize vdisk for %s\n", filename);
+               free(prv->vdisk);
+               return (ret>0?(-1*ret):ret);
+       }
+       
+       /* aio is only used in blktap, init here instead of in common*/
+       /* Initialize async IO data */
+       for (i=0;i<VDISK_HASH_SZ;i++)
+               prv->vdisk->hash[i].key = VDISK_INVALID_HASH;
+       
+       prv->vdisk->aio_cnt = 0;
+       
+
+       ret = tap_aio_setup(&prv->vdisk->aio_ctx, prv->vdisk->aio_events, 
MAX_AIO_REQS);
+       if (ret < 0) {
+                if (ret == -EAGAIN) {
+                        DPRINTF("Couldn't setup AIO context.  If you are "
+                                "trying to concurrently use a large number "
+                                "of blktap-based disks, you may need to "
+                                "increase the system-wide aio request limit. "
+                                "(e.g. 'echo echo 1048576 > /proc/sys/fs/"
+                                "aio-max-nr')\n");
+                } else {
+                        DPRINTF("Couldn't setup AIO context.\n");
+                }
+       
+               prv->vdisk->use_aio = 0;                
+       
+       }       
+
+       /* set up a pipe so that we can hand back a poll fd that won't fire.*/
+       ret = pipe(prv->poll_pipe);
+       if (ret != 0)
+               return (0 - errno);
+       
+       // VHD format limits geometry to roughly 136GB (0xffff cylinders,
+        // 0x10 heads and 0xff sectors per cylinder). We'll report "original
+        // size" (as specified by the header), not CHS product
+       dd->td_state->size = prv->vdisk->sz >> 9;
+       dd->td_state->sector_size = DEFAULT_SECTOR_SIZE;
+
+       tdvhd_get_fd(dd);
+
+done:
+       return ret;     
+}
+
+int tdvhd_submit(struct disk_driver *dd)
+{
+       int res;
+       struct tdvhd_state *prv = (struct tdvhd_state *)dd->private;
+       vdisk_dev_t *vdisk = (vdisk_dev_t *)prv->vdisk;
+
+       if (!vdisk->use_aio)
+               return (0);
+
+       if (!vdisk->aio_cnt)
+               return (0);
+
+       VIDDBG(50, "Submitting %d requests\n", vdisk->aio_cnt);
+
+       res = io_submit(vdisk->aio_ctx.aio_ctx, vdisk->aio_cnt, 
vdisk->aio_submit);
+       if (res != vdisk->aio_cnt)
+               VIDDBG(0, "Can't submit %d AIO requests (submitted %d)\n",
+                      vdisk->aio_cnt, res);
+
+       vdisk->aio_cnt = 0;
+
+       return 0;       
+}
+
+int tdvhd_do_callbacks(struct disk_driver *dd, int sid)
+{
+       struct tdvhd_state *prv = (struct tdvhd_state *)dd->private;
+       vdisk_dev_t *vdisk = (vdisk_dev_t *)prv->vdisk;
+        int ret, i, *ptr;
+        struct io_event *ep;
+       td_cbinfo_t *cbi;
+       uint32_t blk;
+       int nr_events, rsp = 0;
+
+       nr_events = tap_aio_get_events(&vdisk->aio_ctx);
+repeat:
+       for (ep = vdisk->aio_events, i = nr_events; i-- > 0; ep++) {
+                struct iocb        *io  = ep->obj;
+                struct pending_aio *pio;
+               int err;
+               
+               err = 0;
+
+                pio = (struct pending_aio *)io->data;
+               if (pio == NULL) {
+                       VIDDBG(0, "Can't find pending AIO data\n");
+                       return (-EIO);
+               }
+
+               if ((signed long)ep->res < 0) {
+                       VIDDBG(0, "AIO to block %u for %u blocks reported "
+                              "error %ld (%ld)\n", pio->block, pio->num_blocks,
+                              ep->res, ep->res2);
+                       err = ep->res;
+               } else if (ep->res != io->u.c.nbytes) {
+                        /* TODO: handle this case better. */
+                       ptr = (int *)&ep->res;
+                        VIDDBG(0, "AIO did less than I asked it to "
+                               "[%lu,%lu,%d]\n", 
+                               ep->res, io->u.c.nbytes, *ptr);
+                       err = -EIO;
+                }
+
+               cbi = (td_cbinfo_t *)pio->aiocb;                
+               if (cbi == NULL) {
+                       VIDDBG(0, "callback info is missing\n");
+                       //XXX: This is pretty bad. Maybe we should die?
+                       continue;
+               }
+               if (cbi->vdisk == NULL) {
+                       VIDDBG(0, "Can't find vdisk for pending AIO\n");
+                       err = -EIO;
+               } else {
+                       int vdisk_err;
+
+                       // Let vdisk know that a pending IO has completed
+                       pio->res = err;
+                       vdisk_err = vdisk_xfer_cb(cbi->vdisk, pio);
+                       if (vdisk_err != 0) {
+                               VIDDBG(0, "vdisk callback failed\n");
+                               //XXX: return (error) ???
+                               
+                               if (err == 0) // Report the earliest error
+                                       err = vdisk_err;
+                       }
+               }
+
+               cbi->cnt--;
+
+               // blktap's callback (usually to kick the driver)
+               rsp += cbi->cb(dd, err, 
+                              pio->block,            /* sector */
+                              pio->num_blocks,       /* nb_sectors */
+                              cbi->id, cbi->private);
+
+               if (cbi->cnt == 0)
+                       free(cbi);
+        }
+
+       if (nr_events) {
+               nr_events = tap_aio_more_events(&vdisk->aio_ctx);
+               goto repeat;
+       }
+
+       tap_aio_continue(&vdisk->aio_ctx);
+
+       // XXX: What do we return on errors?
+        return rsp;
+}
+int tdvhd_get_parent_id (struct disk_driver *dd, struct disk_id *id)
+{
+    return TD_NO_PARENT;
+}
+
+int tdvhd_validate_parent (struct disk_driver *dd, 
+                                 struct disk_driver *p, td_flag_t flags)
+{
+       return -EINVAL;
+}
+
+
+
+struct tap_disk tapdisk_vhd = {
+       "tapdisk_vhd",
+       sizeof(struct tdvhd_state),
+       tdvhd_open,
+       tdvhd_queue_read,
+       tdvhd_queue_write,
+       tdvhd_submit,
+       tdvhd_close,
+       tdvhd_do_callbacks,
+        tdvhd_get_parent_id,
+        tdvhd_validate_parent
+};
diff -r dff7e92bf3e9 tools/blktap/drivers/tapdisk.c
--- a/tools/blktap/drivers/tapdisk.c    Thu Jun 21 13:04:38 2007 -0400
+++ b/tools/blktap/drivers/tapdisk.c    Thu Jun 21 13:04:41 2007 -0400
@@ -556,15 +556,22 @@ int send_responses(struct disk_driver *d
        preq = &blkif->pending_list[idx];
        req  = &preq->req;
 
-       if (res == BLK_NOT_ALLOCATED) {
-               res = do_cow_read(dd, req, sidx, sector, nr_secs);
+        if (res == BLK_NOT_ALLOCATED) {
+#if 1 
+        /* VHD - do not support */
+            DPRINTF("invalid for VHD's\n");
+            return 0;
+#else
+        /* Original xen code */
+                res = do_cow_read(dd, req, sidx, sector, nr_secs);
                if (res >= 0) {
                        secs_done = res;
                        res = 0;
                } else
                        secs_done = 0;
-       }
-
+#endif
+       }
+        
        preq->secs_pending -= secs_done;
 
        if (res == -EBUSY && preq->submitting) 
diff -r dff7e92bf3e9 tools/blktap/drivers/tapdisk.h
--- a/tools/blktap/drivers/tapdisk.h    Thu Jun 21 13:04:38 2007 -0400
+++ b/tools/blktap/drivers/tapdisk.h    Thu Jun 21 13:04:41 2007 -0400
@@ -156,6 +156,7 @@ extern struct tap_disk tapdisk_vmdk;
 extern struct tap_disk tapdisk_vmdk;
 extern struct tap_disk tapdisk_ram;
 extern struct tap_disk tapdisk_qcow;
+extern struct tap_disk tapdisk_vhd;
 
 #define MAX_DISK_TYPES     20
 
@@ -164,6 +165,7 @@ extern struct tap_disk tapdisk_qcow;
 #define DISK_TYPE_VMDK     2
 #define DISK_TYPE_RAM      3
 #define DISK_TYPE_QCOW     4
+#define DISK_TYPE_VHD      5
 
 
 /*Define Individual Disk Parameters here */
@@ -214,6 +216,16 @@ static disk_info_t qcow_disk = {
        0,
 #ifdef TAPDISK
        &tapdisk_qcow,
+#endif
+};
+
+static disk_info_t vhd_disk = {
+       DISK_TYPE_VHD,
+       "VHD disk (vhd)",
+       "vhd",
+       1,
+#ifdef TAPDISK
+       &tapdisk_vhd,
 #endif
 };
 
@@ -224,6 +236,7 @@ static disk_info_t *dtypes[] = {
        &vmdk_disk,
        &ram_disk,
        &qcow_disk,
+       &vhd_disk,
 };
 
 typedef struct driver_list_entry {

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.