[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [RFC][PATCH] Use ioemu block drivers through blktap



> +static int connect_qemu(blkif_t *blkif)
> +{
> +     char *rdctldev, *wrctldev;
> +     
> +     if (asprintf(&rdctldev, BLKTAP_CTRL_DIR "/qemu-read-%d", 
> +                     blkif->domid) < 0)
> +             return -1;
> +
> +     if (asprintf(&wrctldev, BLKTAP_CTRL_DIR "/qemu-write-%d", 
> +                     blkif->domid) < 0) {
> +             free(rdctldev);
> +             return -1;
> +     }
> +
> +     DPRINTF("Using qemu blktap pipe: %s\n", rdctldev);
> +     
> +     blkif->fds[READ] = open_ctrl_socket(wrctldev);
> +     blkif->fds[WRITE] = open_ctrl_socket(rdctldev);

How about freeing the data here once?

> +     
> +     if (blkif->fds[READ] == -1 || blkif->fds[WRITE] == -1) {
> +             free(rdctldev);
> +             free(wrctldev);

And then this is not needed.

> +             return -1;
> +     }
> +
> +     DPRINTF("Attached to qemu blktap pipes\n");
> +     free(rdctldev);
> +     free(wrctldev);

Nor these two lines above.

Thought looking at the 'connect_tapdisk' you are using 'goto'
there, why not emulate the same behavior in this function?

> +     return 0;
> +}
> +
> +/* Launch tapdisk instance */
> +static int connect_tapdisk(blkif_t *blkif, int minor)
> +{
> +     char *rdctldev = NULL, *wrctldev = NULL;
> +     int ret = -1;
> +
> +     DPRINTF("tapdisk process does not exist:\n");
> +
> +     if (asprintf(&rdctldev,
> +                  "%s/tapctrlread%d", BLKTAP_CTRL_DIR, minor) == -1)
> +             goto fail;
> +
> +     if (asprintf(&wrctldev,
> +                  "%s/tapctrlwrite%d", BLKTAP_CTRL_DIR, minor) == -1)
> +             goto fail;
> +     
> +     blkif->fds[READ] = open_ctrl_socket(rdctldev);
> +     blkif->fds[WRITE] = open_ctrl_socket(wrctldev);
> +     
> +     if (blkif->fds[READ] == -1 || blkif->fds[WRITE] == -1)
> +             goto fail;
> +
> +     /*launch the new process*/
> +     DPRINTF("Launching process, CMDLINE [tapdisk %s %s]\n",
> +                     wrctldev, rdctldev);
> +
> +     if (launch_tapdisk(wrctldev, rdctldev) == -1) {
> +             DPRINTF("Unable to fork, cmdline: [tapdisk %s %s]\n",
> +                             wrctldev, rdctldev);
> +             goto fail;
> +     }
> +
> +     ret = 0;
> +     
> +fail:
> +     if (rdctldev)
> +             free(rdctldev);
> +
> +     if (wrctldev)
> +             free(wrctldev);
> +
> +     return ret;
> +}
> +
>  int blktapctrl_new_blkif(blkif_t *blkif)
>  {
>       blkif_info_t *blk;
> @@ -524,30 +600,14 @@ int blktapctrl_new_blkif(blkif_t *blkif)
>               blkif->cookie = next_cookie++;
>  
>               if (!exist) {
> -                     DPRINTF("Process does not exist:\n");
> -                     if (asprintf(&rdctldev,
> -                                  "%s/tapctrlread%d", BLKTAP_CTRL_DIR, 
> minor) == -1)
> -                             goto fail;
> -                     if (asprintf(&wrctldev,
> -                                  "%s/tapctrlwrite%d", BLKTAP_CTRL_DIR, 
> minor) == -1) {
> -                             free(rdctldev);
> -                             goto fail;
> +                     if (type == DISK_TYPE_IOEMU) {
> +                             if (connect_qemu(blkif))
> +                                     goto fail;
> +                     } else {
> +                             if (connect_tapdisk(blkif, minor))
> +                                     goto fail;
>                       }
> -                     blkif->fds[READ] = open_ctrl_socket(rdctldev);
> -                     blkif->fds[WRITE] = open_ctrl_socket(wrctldev);
> -                     
> -                     if (blkif->fds[READ] == -1 || blkif->fds[WRITE] == -1) 
> -                             goto fail;
> -
> -                     /*launch the new process*/
> -                     DPRINTF("Launching process, CMDLINE [tapdisk %s 
> %s]\n",wrctldev, rdctldev);
> -                     if (launch_tapdisk(wrctldev, rdctldev) == -1) {
> -                             DPRINTF("Unable to fork, cmdline: [tapdisk %s 
> %s]\n",wrctldev, rdctldev);
> -                             goto fail;
> -                     }
> -
> -                     free(rdctldev);
> -                     free(wrctldev);
> +
>               } else {
>                       DPRINTF("Process exists!\n");
>                       blkif->fds[READ] = exist->fds[READ];
> diff -r f33328217eee tools/blktap/drivers/tapdisk.h
> --- a/tools/blktap/drivers/tapdisk.h  Mon Mar 10 22:51:57 2008 +0000
> +++ b/tools/blktap/drivers/tapdisk.h  Thu Mar 13 13:00:18 2008 +0100
> @@ -167,6 +167,7 @@ extern struct tap_disk tapdisk_qcow2;
>  #define DISK_TYPE_RAM      3
>  #define DISK_TYPE_QCOW     4
>  #define DISK_TYPE_QCOW2    5
> +#define DISK_TYPE_IOEMU    6
>  
>  
>  /*Define Individual Disk Parameters here */
> @@ -227,6 +228,16 @@ static disk_info_t qcow2_disk = {
>       0,
>  #ifdef TAPDISK
>       &tapdisk_qcow2,
> +#endif
> +};
> +
> +static disk_info_t ioemu_disk = {
> +     DISK_TYPE_IOEMU,
> +     "ioemu disk",
> +     "ioemu",
> +     0,
> +#ifdef TAPDISK
> +     NULL
>  #endif
>  };
>  
> @@ -238,6 +249,7 @@ static disk_info_t *dtypes[] = {
>       &ram_disk,
>       &qcow_disk,
>       &qcow2_disk,
> +     &ioemu_disk,
>  };
>  
>  typedef struct driver_list_entry {
> diff -r f33328217eee tools/blktap/lib/blktaplib.h
> --- a/tools/blktap/lib/blktaplib.h    Mon Mar 10 22:51:57 2008 +0000
> +++ b/tools/blktap/lib/blktaplib.h    Thu Mar 13 13:00:18 2008 +0100
> @@ -221,15 +221,5 @@ int xs_fire_next_watch(struct xs_handle 
>       ((_req) * BLKIF_MAX_SEGMENTS_PER_REQUEST * getpagesize()) +    \
>       ((_seg) * getpagesize()))
>  
> -/* Defines that are only used by library clients */
> -
> -#ifndef __COMPILING_BLKTAP_LIB
> -
> -static char *blkif_op_name[] = {
> -     [BLKIF_OP_READ]       = "READ",
> -     [BLKIF_OP_WRITE]      = "WRITE",
> -};
> -
> -#endif /* __COMPILING_BLKTAP_LIB */
>  
>  #endif /* __BLKTAPLIB_H__ */
> diff -r f33328217eee tools/ioemu/Makefile.target
> --- a/tools/ioemu/Makefile.target     Mon Mar 10 22:51:57 2008 +0000
> +++ b/tools/ioemu/Makefile.target     Thu Mar 13 13:00:18 2008 +0100
> @@ -17,6 +17,7 @@ VPATH=$(SRC_PATH):$(TARGET_PATH):$(SRC_P
>  VPATH=$(SRC_PATH):$(TARGET_PATH):$(SRC_PATH)/hw:$(SRC_PATH)/audio
>  CPPFLAGS+=-I. -I.. -I$(TARGET_PATH) -I$(SRC_PATH)
>  CPPFLAGS+= -I$(XEN_ROOT)/tools/libxc
> +CPPFLAGS+= -I$(XEN_ROOT)/tools/blktap/lib
>  CPPFLAGS+= -I$(XEN_ROOT)/tools/xenstore
>  CPPFLAGS+= -I$(XEN_ROOT)/tools/include
>  ifdef CONFIG_DARWIN_USER
> @@ -429,6 +430,7 @@ VL_OBJS+= usb-uhci.o smbus_eeprom.o
>  VL_OBJS+= usb-uhci.o smbus_eeprom.o
>  VL_OBJS+= piix4acpi.o
>  VL_OBJS+= xenstore.o
> +VL_OBJS+= xen_blktap.o
>  VL_OBJS+= xen_platform.o
>  VL_OBJS+= xen_machine_fv.o
>  VL_OBJS+= xen_machine_pv.o
> diff -r f33328217eee tools/ioemu/hw/xen_machine_pv.c
> --- a/tools/ioemu/hw/xen_machine_pv.c Mon Mar 10 22:51:57 2008 +0000
> +++ b/tools/ioemu/hw/xen_machine_pv.c Thu Mar 13 13:00:18 2008 +0100
> @@ -26,6 +26,9 @@
>  #include "xen_console.h"
>  #include "xenfb.h"
>  
> +extern void init_blktap(void);
> +
> +
>  /* The Xen PV machine currently provides
>   *   - a virtual framebuffer
>   *   - ....
> @@ -40,6 +43,10 @@ static void xen_init_pv(uint64_t ram_siz
>  {
>      struct xenfb *xenfb;
>      extern int domid;
> +
> +
> +    /* Initialize tapdisk client */
> +    init_blktap();
>  
>      /* Connect to text console */
>      if (serial_hds[0]) {
> diff -r f33328217eee tools/ioemu/vl.c
> --- a/tools/ioemu/vl.c        Mon Mar 10 22:51:57 2008 +0000
> +++ b/tools/ioemu/vl.c        Thu Mar 13 13:00:18 2008 +0100
> @@ -6270,6 +6270,12 @@ void qemu_system_powerdown_request(void)
>      powerdown_requested = 1;
>      if (cpu_single_env)
>          cpu_interrupt(cpu_single_env, CPU_INTERRUPT_EXIT);
> +}
> +
> +static void qemu_sighup_handler(int signal)
> +{
> +    fprintf(stderr, "Received SIGHUP, terminating.\n");
> +    exit(0);
>  }
>  
>  void main_loop_wait(int timeout)
> @@ -7980,7 +7986,7 @@ int main(int argc, char **argv)
>  
>  #ifndef CONFIG_STUBDOM
>      /* Unblock SIGTERM and SIGHUP, which may have been blocked by the caller 
> */
> -    signal(SIGHUP, SIG_DFL);
> +    signal(SIGHUP, qemu_sighup_handler);
>      sigemptyset(&set);
>      sigaddset(&set, SIGTERM);
>      sigaddset(&set, SIGHUP);
> diff -r f33328217eee tools/python/xen/xend/server/BlktapController.py
> --- a/tools/python/xen/xend/server/BlktapController.py        Mon Mar 10 
> 22:51:57 2008 +0000
> +++ b/tools/python/xen/xend/server/BlktapController.py        Thu Mar 13 
> 13:00:18 2008 +0100
> @@ -13,7 +13,9 @@ blktap_disk_types = [
>      'vmdk',
>      'ram',
>      'qcow',
> -    'qcow2'
> +    'qcow2',
> +
> +    'ioemu'

Why add the extra \n ?

>      ]
>  
>  class BlktapController(BlkifController):
> diff -r f33328217eee tools/ioemu/hw/xen_blktap.c
> --- /dev/null Thu Jan 01 00:00:00 1970 +0000
> +++ b/tools/ioemu/hw/xen_blktap.c     Thu Mar 13 13:00:18 2008 +0100
> @@ -0,0 +1,688 @@
> +/* xen_blktap.c
> + *
> + * Interface to blktapctrl to allow use of qemu block drivers with blktap.
> + * This file is based on tools/blktap/drivers/tapdisk.c
> + * 
> + * Copyright (c) 2005 Julian Chesterfield and Andrew Warfield.
> + * Copyright (c) 2008 Kevin Wolf
> + */
> +
> +/*
> + * There are several communication channels which are used by this interface:
> + *
> + *   - A pair of pipes for receiving and sending general control messages
> + *     (qemu-read-N and qemu-writeN in /var/run/tap, where N is the domain 
> ID).
> + *     These control messages are handled by handle_blktap_ctrlmsg().
> + *
> + *   - One file descriptor per attached disk (/dev/xen/blktapN) for disk
> + *     specific control messages. A callback is triggered on this fd if there
> + *     is a new IO request. The callback function is handle_blktap_iomsg().
> + *
> + *   - A shared ring for each attached disk containing the actual IO 
> requests 
> + *     and responses. Whenever handle_blktap_iomsg() is triggered it 
> processes
> + *     the requests on this ring.
> + */
> +
> +#include <sys/stat.h>
> +#include <sys/types.h>
> +#include <sys/mman.h>
> +#include <sys/ioctl.h>
> +#include <fcntl.h>
> +#include <stdio.h>
> +#include <errno.h>
> +#include <stdlib.h>
> +
> +#include "vl.h"
> +#include "blktaplib.h"
> +#include "xen_blktap.h"
> +#include "block_int.h"
> +
> +#define MSG_SIZE 4096
> +
> +#define BLKTAP_CTRL_DIR "/var/run/tap"
> +
> +/* If enabled, print debug messages to stderr */
> +#if 1
> +#define DPRINTF(_f, _a...) fprintf(stderr, __FILE__ ":%d: " _f, __LINE__, 
> ##_a)
> +#else
> +#define DPRINTF(_f, _a...) ((void)0)
> +#endif
> +
> +#if 1                                                                        
> +#define ASSERT(_p) \
> +    if ( !(_p) ) { DPRINTF("Assertion '%s' failed, line %d, file %s", #_p , \

You probably want \n at the end.

> +        __LINE__, __FILE__); *(int*)0=0; }
> +#else
> +#define ASSERT(_p) ((void)0)
> +#endif 
> +
> +
> +extern int domid;
> +
> +int read_fd;
> +int write_fd;
> +
> +static pid_t process;
> +fd_list_entry_t *fd_start = NULL;
> +
> +static void handle_blktap_iomsg(void* private);
> +
> +struct aiocb_info {
> +     struct td_state *s;
> +     uint64_t sector;
> +     int nr_secs;
> +     int idx;
> +     long i;
> +};
> +
> +static void unmap_disk(struct td_state *s)
> +{
> +     tapdev_info_t *info = s->ring_info;
> +     fd_list_entry_t *entry;
> +     
> +     bdrv_close(s->bs);
> +
> +     if (info != NULL && info->mem > 0)
> +             munmap(info->mem, getpagesize() * BLKTAP_MMAP_REGION_SIZE);
> +
> +     entry = s->fd_entry;
> +     *entry->pprev = entry->next;
> +     if (entry->next)
> +             entry->next->pprev = entry->pprev;
> +
> +     qemu_set_fd_handler2(info->fd, NULL, NULL, NULL, NULL);
> +     close(info->fd);
> +
> +     free(s->fd_entry);
> +     free(s->blkif);
> +     free(s->ring_info);
> +     free(s);
> +
> +     return;
> +}
> +
> +static inline fd_list_entry_t *add_fd_entry(int tap_fd, struct td_state *s)
> +{
> +     fd_list_entry_t **pprev, *entry;
> +
> +     DPRINTF("Adding fd_list_entry\n");
> +
> +     /*Add to linked list*/
> +     s->fd_entry   = entry = malloc(sizeof(fd_list_entry_t));
> +     entry->tap_fd = tap_fd;
> +     entry->s      = s;
> +     entry->next   = NULL;
> +
> +     pprev = &fd_start;
> +     while (*pprev != NULL)
> +             pprev = &(*pprev)->next;
> +
> +     *pprev = entry;
> +     entry->pprev = pprev;
> +
> +     return entry;
> +}
> +
> +static inline struct td_state *get_state(int cookie)
> +{
> +     fd_list_entry_t *ptr;
> +
> +     ptr = fd_start;
> +     while (ptr != NULL) {
> +             if (ptr->cookie == cookie) return ptr->s;
> +             ptr = ptr->next;
> +     }
> +     return NULL;
> +}
> +
> +static struct td_state *state_init(void)
> +{
> +     int i;
> +     struct td_state *s;
> +     blkif_t *blkif;
> +
> +     s = malloc(sizeof(struct td_state));

Would it make sense to zero out the allocated memory?

> +     blkif = s->blkif = malloc(sizeof(blkif_t));
> +     s->ring_info = calloc(1, sizeof(tapdev_info_t));
> +
> +     for (i = 0; i < MAX_REQUESTS; i++) {
> +             blkif->pending_list[i].secs_pending = 0;
> +             blkif->pending_list[i].submitting = 0;
> +     }
> +
> +     return s;
> +}
> +
> +static int map_new_dev(struct td_state *s, int minor)
> +{
> +     int tap_fd;
> +     tapdev_info_t *info = s->ring_info;
> +     char *devname;
> +     fd_list_entry_t *ptr;
> +     int page_size;
> +
> +     if (asprintf(&devname,"%s/%s%d", BLKTAP_DEV_DIR, BLKTAP_DEV_NAME, 
> minor) == -1)
> +             return -1;
> +     tap_fd = open(devname, O_RDWR);
> +     if (tap_fd == -1) 
> +     {
> +             DPRINTF("open failed on dev %s!",devname);

You forgot to include \n above.

> +             goto fail;
> +     } 
> +     info->fd = tap_fd;
> +
> +     /*Map the shared memory*/
> +     page_size = getpagesize();
> +     info->mem = mmap(0, page_size * BLKTAP_MMAP_REGION_SIZE, 
> +                       PROT_READ | PROT_WRITE, MAP_SHARED, info->fd, 0);
> +     if ((long int)info->mem == -1) 
> +     {
> +             DPRINTF("mmap failed on dev %s!\n",devname);
> +             goto fail;
> +     }
> +
> +     /* assign the rings to the mapped memory */ 
> +     info->sring = (blkif_sring_t *)((unsigned long)info->mem);
> +     BACK_RING_INIT(&info->fe_ring, info->sring, page_size);
> +     
> +     info->vstart = 
> +             (unsigned long)info->mem + (BLKTAP_RING_PAGES * page_size);
> +
> +     ioctl(info->fd, BLKTAP_IOCTL_SENDPID, process );
> +     ioctl(info->fd, BLKTAP_IOCTL_SETMODE, BLKTAP_MODE_INTERPOSE );
> +     free(devname);
> +
> +     /*Update the fd entry*/
> +     ptr = fd_start;
> +     while (ptr != NULL) {
> +             if (s == ptr->s) {
> +                     ptr->tap_fd = tap_fd;
> +
> +                     /* Setup fd_handler for qemu main loop */
> +                     DPRINTF("set tap_fd = %d\n", tap_fd);
> +                     qemu_set_fd_handler2(tap_fd, NULL, 
> &handle_blktap_iomsg, NULL, s);
> +
> +                     break;
> +             }
> +             ptr = ptr->next;
> +     }       
> +
> +
> +     DPRINTF("map_new_dev = %d\n", minor);
> +     return minor;
> +
> + fail:
> +     free(devname);
> +     return -1;
> +}
> +
> +static int open_disk(struct td_state *s, char *path, int readonly)
> +{
> +     struct disk_id id;
> +     BlockDriverState* bs;
> +
> +     DPRINTF("Opening %s\n", path);
> +     bs = calloc(sizeof(*bs), 1);

Are the arguments swapped?

> +
> +     memset(&id, 0, sizeof(struct disk_id));
> +
> +     if (bdrv_open(bs, path, 0) != 0) {
> +             fprintf(stderr, "Could not open image file %s\n", path);
> +             return -ENOMEM;
> +     }
> +
> +     s->bs = bs;
> +     s->flags = readonly ? TD_RDONLY : 0;
> +     s->size = bs->total_sectors;
> +     s->sector_size = 512;
> +
> +     s->info = ((s->flags & TD_RDONLY) ? VDISK_READONLY : 0);
> +
> +     return 0;
> +}
> +
> +static inline int write_rsp_to_ring(struct td_state *s, blkif_response_t 
> *rsp)

Why bother with a return of int when you always return 0?

> +{
> +     tapdev_info_t *info = s->ring_info;
> +     blkif_response_t *rsp_d;
> +     
> +     rsp_d = RING_GET_RESPONSE(&info->fe_ring, info->fe_ring.rsp_prod_pvt);
> +     memcpy(rsp_d, rsp, sizeof(blkif_response_t));
> +     info->fe_ring.rsp_prod_pvt++;
> +     
> +     return 0;
> +}
> +
> +static inline void kick_responses(struct td_state *s)
> +{
> +     tapdev_info_t *info = s->ring_info;
> +
> +     if (info->fe_ring.rsp_prod_pvt != info->fe_ring.sring->rsp_prod) 
> +     {
> +             RING_PUSH_RESPONSES(&info->fe_ring);
> +             ioctl(info->fd, BLKTAP_IOCTL_KICK_FE);
> +     }
> +}
> +
> +static int send_responses(struct td_state *s, int res, 
> +                uint64_t sector, int nr_secs, int idx, void *private)
> +{
> +     pending_req_t   *preq;
> +     blkif_request_t *req;
> +     int responses_queued = 0;
> +     blkif_t *blkif = s->blkif;
> +     int secs_done = nr_secs;
> +
> +     if ( (idx > MAX_REQUESTS-1) )
> +     {
> +             DPRINTF("invalid index returned(%u)!\n", idx);
> +             return 0;
> +     }
> +     preq = &blkif->pending_list[idx];
> +     req  = &preq->req;
> +
> +     preq->secs_pending -= secs_done;
> +
> +     if (res == -EBUSY && preq->submitting) 
> +             return -EBUSY;  /* propagate -EBUSY back to higher layers */
> +     if (res) 
> +             preq->status = BLKIF_RSP_ERROR;
> +     
> +     if (!preq->submitting && preq->secs_pending == 0) 
> +     {
> +             blkif_request_t tmp;
> +             blkif_response_t *rsp;
> +
> +             tmp = preq->req;
> +             rsp = (blkif_response_t *)req;
> +             
> +             rsp->id = tmp.id;
> +             rsp->operation = tmp.operation;
> +             rsp->status = preq->status;
> +             
> +             write_rsp_to_ring(s, rsp);
> +             responses_queued++;
> +
> +             kick_responses(s);
> +     }
> +     
> +     return responses_queued;
> +}
> +
> +static void qemu_send_responses(void* opaque, int ret)
> +{
> +     struct aiocb_info* info = opaque;
> +
> +     if (ret != 0) {
> +             DPRINTF("ERROR: ret = %d (%s)\n", ret, strerror(-ret));
> +     }
> +
> +     send_responses(info->s, ret, info->sector, info->nr_secs, 
> +             info->idx, (void*) info->i);
> +     free(info);
> +}
> +
> +/**
> + * Callback function for the IO message pipe. Reads requests from the ring
> + * and processes them (call qemu read/write functions).
> + *
> + * The private parameter points to the struct td_state representing the
> + * disk the request is targeted at.
> + */
> +static void handle_blktap_iomsg(void* private)
> +{
> +     struct td_state* s = private;
> +
> +     RING_IDX          rp, j, i;
> +     blkif_request_t  *req;
> +     int idx, nsects, ret;
> +     uint64_t sector_nr;
> +     uint8_t *page;
> +     blkif_t *blkif = s->blkif;
> +     tapdev_info_t *info = s->ring_info;
> +     int page_size = getpagesize();
> +
> +     struct aiocb_info *aiocb_info;
> +
> +     if (info->fe_ring.sring == NULL) {
> +             DPRINTF("  sring == NULL, ignoring IO request\n");
> +             return;
> +     }
> +
> +     rp = info->fe_ring.sring->req_prod; 
> +     xen_rmb();
> +
> +     for (j = info->fe_ring.req_cons; j != rp; j++)
> +     {
> +             int start_seg = 0; 
> +
> +             req = NULL;
> +             req = RING_GET_REQUEST(&info->fe_ring, j);
> +             ++info->fe_ring.req_cons;
> +             
> +             if (req == NULL)
> +                     continue;
> +
> +             idx = req->id;
> +
> +             ASSERT(blkif->pending_list[idx].secs_pending == 0);
> +             memcpy(&blkif->pending_list[idx].req, req, sizeof(*req));
> +             blkif->pending_list[idx].status = BLKIF_RSP_OKAY;
> +             blkif->pending_list[idx].submitting = 1;
> +             sector_nr = req->sector_number;
> +
> +             /* Don't allow writes on readonly devices */
> +             if ((s->flags & TD_RDONLY) && 
> +                 (req->operation == BLKIF_OP_WRITE)) {
> +                     blkif->pending_list[idx].status = BLKIF_RSP_ERROR;
> +                     goto send_response;
> +             }
> +
> +             for (i = start_seg; i < req->nr_segments; i++) {
> +                     nsects = req->seg[i].last_sect - 
> +                              req->seg[i].first_sect + 1;
> +     
> +                     if ((req->seg[i].last_sect >= page_size >> 9) ||
> +                                     (nsects <= 0))
> +                             continue;
> +
> +                     page  = (uint8_t*) MMAP_VADDR(info->vstart, 
> +                                                (unsigned long)req->id, i);
> +                     page += (req->seg[i].first_sect << SECTOR_SHIFT);
> +
> +                     if (sector_nr >= s->size) {
> +                             DPRINTF("Sector request failed:\n");
> +                             DPRINTF("%s request, idx [%d,%d] size [%llu], "
> +                                     "sector [%llu,%llu]\n",
> +                                     (req->operation == BLKIF_OP_WRITE ? 
> +                                      "WRITE" : "READ"),
> +                                     idx,i,
> +                                     (long long unsigned) 
> +                                             nsects<<SECTOR_SHIFT,
> +                                     (long long unsigned) 
> +                                             sector_nr<<SECTOR_SHIFT,
> +                                     (long long unsigned) sector_nr);
> +                             continue;
> +                     }
> +
> +                     blkif->pending_list[idx].secs_pending += nsects;
> +
> +                     switch (req->operation) 
> +                     {
> +                     case BLKIF_OP_WRITE:
> +                             aiocb_info = malloc(sizeof(*aiocb_info));
> +
> +                             aiocb_info->s = s;
> +                             aiocb_info->sector = sector_nr;
> +                             aiocb_info->nr_secs = nsects;
> +                             aiocb_info->idx = idx;
> +                             aiocb_info->i = i;
> +
> +                             ret = (NULL == bdrv_aio_write(s->bs, sector_nr,
> +                                                       page, nsects,
> +                                                       qemu_send_responses,
> +                                                       aiocb_info));

Who de-allocates aiocb_info?

> +
> +                             if (ret) {
> +                                     blkif->pending_list[idx].status = 
> BLKIF_RSP_ERROR;
> +                                     DPRINTF("ERROR: bdrv_write() == 
> NULL\n");
> +                                     goto send_response;
> +                             }
> +                             break;
> +
> +                     case BLKIF_OP_READ:
> +                             aiocb_info = malloc(sizeof(*aiocb_info));
> +
> +                             aiocb_info->s = s;
> +                             aiocb_info->sector = sector_nr;
> +                             aiocb_info->nr_secs = nsects;
> +                             aiocb_info->idx = idx;
> +                             aiocb_info->i = i;
> +
> +                             ret = (NULL == bdrv_aio_read(s->bs, sector_nr,
> +                                                      page, nsects,
> +                                                      qemu_send_responses,
> +                                                      aiocb_info));

Ditto.

> +
> +                             if (ret) {
> +                                     blkif->pending_list[idx].status = 
> BLKIF_RSP_ERROR;
> +                                     DPRINTF("ERROR: bdrv_read() == NULL\n");
> +                                     goto send_response;
> +                             }
> +                             break;
> +
> +                     default:
> +                             DPRINTF("Unknown block operation\n");
> +                             break;
> +                     }
> +                     sector_nr += nsects;
> +             }
> +     send_response:
> +             blkif->pending_list[idx].submitting = 0;
> +
> +             /* force write_rsp_to_ring for synchronous case */
> +             if (blkif->pending_list[idx].secs_pending == 0)
> +                     send_responses(s, 0, 0, 0, idx, (void *)(long)0);
> +     }
> +}
> +
> +/**
> + * Callback function for the qemu-read pipe. Reads and processes control 
> + * message from the pipe.
> + *
> + * The parameter private is unused.
> + */
> +static void handle_blktap_ctrlmsg(void* private)
> +{
> +     int length, len, msglen;
> +     char *ptr, *path;
> +     image_t *img;
> +     msg_hdr_t *msg;
> +     msg_newdev_t *msg_dev;
> +     msg_pid_t *msg_pid;
> +     int ret = -1;
> +     struct td_state *s = NULL;
> +     fd_list_entry_t *entry;
> +
> +     char buf[MSG_SIZE];
> +
> +     length = read(read_fd, buf, MSG_SIZE);
> +
> +     if (length > 0 && length >= sizeof(msg_hdr_t)) 
> +     {
> +             msg = (msg_hdr_t *)buf;
> +             DPRINTF("blktap: Received msg, len %d, type %d, UID %d\n",
> +                     length,msg->type,msg->cookie);
> +
> +             switch (msg->type) {
> +             case CTLMSG_PARAMS:                     
> +                     ptr = buf + sizeof(msg_hdr_t);
> +                     len = (length - sizeof(msg_hdr_t));
> +                     path = calloc(1, len + 1);
> +                     
> +                     memcpy(path, ptr, len); 
> +                     DPRINTF("Received CTLMSG_PARAMS: [%s]\n", path);
> +
> +                     /* Allocate the disk structs */
> +                     s = state_init();
> +
> +                     /*Open file*/
> +                     if (s == NULL || open_disk(s, path, msg->readonly)) {
> +                             msglen = sizeof(msg_hdr_t);
> +                             msg->type = CTLMSG_IMG_FAIL;
> +                             msg->len = msglen;
> +                     } else {
> +                             entry = add_fd_entry(0, s);
> +                             entry->cookie = msg->cookie;
> +                             DPRINTF("Entered cookie %d\n", entry->cookie);
> +                             
> +                             memset(buf, 0x00, MSG_SIZE); 
> +                     
> +                             msglen = sizeof(msg_hdr_t) + sizeof(image_t);
> +                             msg->type = CTLMSG_IMG;
> +                             img = (image_t *)(buf + sizeof(msg_hdr_t));
> +                             img->size = s->size;
> +                             img->secsize = s->sector_size;
> +                             img->info = s->info;
> +                             DPRINTF("Writing (size, secsize, info) = "
> +                                     "(%#" PRIx64 ", %#" PRIx64 ", %d)\n",
> +                                     s->size, s->sector_size, s->info);
> +                     }
> +                     len = write(write_fd, buf, msglen);
> +                     free(path);
> +                     break;
> +                     
> +             case CTLMSG_NEWDEV:
> +                     msg_dev = (msg_newdev_t *)(buf + sizeof(msg_hdr_t));
> +
> +                     s = get_state(msg->cookie);
> +                     DPRINTF("Retrieving state, cookie %d.....[%s]\n",
> +                             msg->cookie, (s == NULL ? "FAIL":"OK"));
> +                     if (s != NULL) {
> +                             ret = ((map_new_dev(s, msg_dev->devnum) 
> +                                     == msg_dev->devnum ? 0: -1));
> +                     }       
> +
> +                     memset(buf, 0x00, MSG_SIZE); 
> +                     msglen = sizeof(msg_hdr_t);
> +                     msg->type = (ret == 0 ? CTLMSG_NEWDEV_RSP 
> +                                           : CTLMSG_NEWDEV_FAIL);
> +                     msg->len = msglen;
> +
> +                     len = write(write_fd, buf, msglen);
> +                     break;
> +
> +             case CTLMSG_CLOSE:
> +                     s = get_state(msg->cookie);
> +                     if (s) unmap_disk(s);
> +                     break;                  
> +
> +             case CTLMSG_PID:
> +                     memset(buf, 0x00, MSG_SIZE);
> +                     msglen = sizeof(msg_hdr_t) + sizeof(msg_pid_t);
> +                     msg->type = CTLMSG_PID_RSP;
> +                     msg->len = msglen;
> +
> +                     msg_pid = (msg_pid_t *)(buf + sizeof(msg_hdr_t));
> +                     process = getpid();
> +                     msg_pid->pid = process;
> +
> +                     len = write(write_fd, buf, msglen);
> +                     break;
> +
> +             default:
> +                     break;
> +             }
> +     }
> +}
> +
> +/**
> + * Opens a control socket, i.e. a pipe to communicate with blktapctrl.
> + *
> + * Returns the file descriptor number for the pipe; -1 in error case
> + */
> +static int open_ctrl_socket(char *devname)
> +{
> +     int ret;
> +     int ipc_fd;
> +
> +     if (mkdir(BLKTAP_CTRL_DIR, 0755) == 0)
> +             DPRINTF("Created %s directory\n", BLKTAP_CTRL_DIR);
> +
> +     ret = mkfifo(devname,S_IRWXU|S_IRWXG|S_IRWXO);
> +     if ( (ret != 0) && (errno != EEXIST) ) {
> +             DPRINTF("ERROR: pipe failed (%d)\n", errno);
> +             return -1;
> +     }
> +
> +     ipc_fd = open(devname,O_RDWR|O_NONBLOCK);
> +
> +     if (ipc_fd < 0) {
> +             DPRINTF("FD open failed\n");
> +             return -1;
> +     }
> +
> +     return ipc_fd;
> +}
> +
> +/**
> + * Unmaps all disks and closes their pipes
> + */
> +void shutdown_blktap(void)
> +{
> +     fd_list_entry_t *ptr;
> +     struct td_state *s;
> +     char *devname;
> +
> +     DPRINTF("Shutdown blktap\n");
> +
> +     /* Unmap all disks */
> +     ptr = fd_start;
> +     while (ptr != NULL) {
> +             s = ptr->s;
> +             unmap_disk(s);
> +             close(ptr->tap_fd);
> +             ptr = ptr->next;
> +     }
> +
> +     /* Delete control pipes */
> +     if (asprintf(&devname, BLKTAP_CTRL_DIR "/qemu-read-%d", domid) >= 0) {
> +             DPRINTF("Delete %s\n", devname);
> +             if (unlink(devname))
> +                     DPRINTF("Could not delete: %s\n", strerror(errno));
> +             free(devname);
> +     }
> +     
> +     if (asprintf(&devname, BLKTAP_CTRL_DIR "/qemu-write-%d", domid) >= 0) { 
> +             DPRINTF("Delete %s\n", devname);
> +             if (unlink(devname))
> +                     DPRINTF("Could not delete: %s\n", strerror(errno));
> +             free(devname);
> +     }
> +}
> +
> +/**
> + * Initialize the blktap interface, i.e. open a pair of pipes in /var/run/tap
> + * and register a fd handler.
> + *
> + * Returns 0 on success.
> + */
> +int init_blktap(void)
> +{
> +     char* devname;  
> +
> +     DPRINTF("Init blktap pipes\n");
> +
> +     /* Open the read pipe */
> +     if (asprintf(&devname, BLKTAP_CTRL_DIR "/qemu-read-%d", domid) >= 0) {  
> +             read_fd = open_ctrl_socket(devname);            
> +             free(devname);
> +             
> +             if (read_fd == -1) {
> +                     fprintf(stderr, "Could not open %s/qemu-read-%d\n",
> +                             BLKTAP_CTRL_DIR, domid);
> +                     return -1;
> +             }
> +     }
> +     
> +     /* Open the write pipe */
> +     if (asprintf(&devname, BLKTAP_CTRL_DIR "/qemu-write-%d", domid) >= 0) {
> +             write_fd = open_ctrl_socket(devname);
> +             free(devname);
> +             
> +             if (write_fd == -1) {
> +                     fprintf(stderr, "Could not open %s/qemu-write-%d\n",
> +                             BLKTAP_CTRL_DIR, domid);
> +                     close(read_fd);
> +                     return -1;
> +             }
> +     }
> +
> +     /* Attach a handler to the read pipe (called from qemu main loop) */
> +     qemu_set_fd_handler2(read_fd, NULL, &handle_blktap_ctrlmsg, NULL, NULL);
> +
> +     /* Register handler to clean up when the domain is destroyed */
> +     atexit(&shutdown_blktap);
> +
> +     return 0;
> +}
> diff -r f33328217eee tools/ioemu/hw/xen_blktap.h
> --- /dev/null Thu Jan 01 00:00:00 1970 +0000
> +++ b/tools/ioemu/hw/xen_blktap.h     Thu Mar 13 13:00:18 2008 +0100
> @@ -0,0 +1,57 @@
> +/* xen_blktap.h
> + *
> + * Generic disk interface for blktap-based image adapters.
> + *
> + * (c) 2006 Andrew Warfield and Julian Chesterfield
> + */
> +
> +#ifndef XEN_BLKTAP_H_ 
> +#define XEN_BLKTAP_H_
> +
> +#include <stdint.h>
> +#include <syslog.h>
> +#include <stdio.h>
> +
> +#include "block_int.h"
> +
> +/* Things disks need to know about, these should probably be in a 
> higher-level
> + * header. */
> +#define MAX_SEGMENTS_PER_REQ    11
> +#define SECTOR_SHIFT             9
> +#define DEFAULT_SECTOR_SIZE    512
> +
> +#define MAX_IOFD                 2
> +
> +#define BLK_NOT_ALLOCATED       99
> +#define TD_NO_PARENT             1
> +
> +typedef uint32_t td_flag_t;
> +
> +#define TD_RDONLY                1
> +
> +struct disk_id {
> +     char *name;
> +     int drivertype;
> +};
> +
> +/* This structure represents the state of an active virtual disk.           
> */
> +struct td_state {
> +     BlockDriverState* bs;
> +     td_flag_t flags;
> +     void *blkif;
> +     void *image;
> +     void *ring_info;
> +     void *fd_entry;
> +     uint64_t sector_size;
> +     uint64_t size;
> +     unsigned int       info;
> +};
> +
> +typedef struct fd_list_entry {
> +     int cookie;
> +     int  tap_fd;
> +     struct td_state *s;
> +     struct fd_list_entry **pprev, *next;
> +} fd_list_entry_t;
> +
> +#endif /*XEN_BLKTAP_H_*/

> _______________________________________________
> Xen-devel mailing list
> Xen-devel@xxxxxxxxxxxxxxxxxxx
> http://lists.xensource.com/xen-devel


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.