[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [Xen-devel] [RFC][PATCH] Use ioemu block drivers through blktap
> +static int connect_qemu(blkif_t *blkif) > +{ > + char *rdctldev, *wrctldev; > + > + if (asprintf(&rdctldev, BLKTAP_CTRL_DIR "/qemu-read-%d", > + blkif->domid) < 0) > + return -1; > + > + if (asprintf(&wrctldev, BLKTAP_CTRL_DIR "/qemu-write-%d", > + blkif->domid) < 0) { > + free(rdctldev); > + return -1; > + } > + > + DPRINTF("Using qemu blktap pipe: %s\n", rdctldev); > + > + blkif->fds[READ] = open_ctrl_socket(wrctldev); > + blkif->fds[WRITE] = open_ctrl_socket(rdctldev); How about freeing the data here once? > + > + if (blkif->fds[READ] == -1 || blkif->fds[WRITE] == -1) { > + free(rdctldev); > + free(wrctldev); And then this is not needed. > + return -1; > + } > + > + DPRINTF("Attached to qemu blktap pipes\n"); > + free(rdctldev); > + free(wrctldev); Nor these two lines above. Thought looking at the 'connect_tapdisk' you are using 'goto' there, why not emulate the same behavior in this function? > + return 0; > +} > + > +/* Launch tapdisk instance */ > +static int connect_tapdisk(blkif_t *blkif, int minor) > +{ > + char *rdctldev = NULL, *wrctldev = NULL; > + int ret = -1; > + > + DPRINTF("tapdisk process does not exist:\n"); > + > + if (asprintf(&rdctldev, > + "%s/tapctrlread%d", BLKTAP_CTRL_DIR, minor) == -1) > + goto fail; > + > + if (asprintf(&wrctldev, > + "%s/tapctrlwrite%d", BLKTAP_CTRL_DIR, minor) == -1) > + goto fail; > + > + blkif->fds[READ] = open_ctrl_socket(rdctldev); > + blkif->fds[WRITE] = open_ctrl_socket(wrctldev); > + > + if (blkif->fds[READ] == -1 || blkif->fds[WRITE] == -1) > + goto fail; > + > + /*launch the new process*/ > + DPRINTF("Launching process, CMDLINE [tapdisk %s %s]\n", > + wrctldev, rdctldev); > + > + if (launch_tapdisk(wrctldev, rdctldev) == -1) { > + DPRINTF("Unable to fork, cmdline: [tapdisk %s %s]\n", > + wrctldev, rdctldev); > + goto fail; > + } > + > + ret = 0; > + > +fail: > + if (rdctldev) > + free(rdctldev); > + > + if (wrctldev) > + free(wrctldev); > + > + return ret; > +} > + > int blktapctrl_new_blkif(blkif_t *blkif) > { > blkif_info_t *blk; > @@ -524,30 +600,14 @@ int blktapctrl_new_blkif(blkif_t *blkif) > blkif->cookie = next_cookie++; > > if (!exist) { > - DPRINTF("Process does not exist:\n"); > - if (asprintf(&rdctldev, > - "%s/tapctrlread%d", BLKTAP_CTRL_DIR, > minor) == -1) > - goto fail; > - if (asprintf(&wrctldev, > - "%s/tapctrlwrite%d", BLKTAP_CTRL_DIR, > minor) == -1) { > - free(rdctldev); > - goto fail; > + if (type == DISK_TYPE_IOEMU) { > + if (connect_qemu(blkif)) > + goto fail; > + } else { > + if (connect_tapdisk(blkif, minor)) > + goto fail; > } > - blkif->fds[READ] = open_ctrl_socket(rdctldev); > - blkif->fds[WRITE] = open_ctrl_socket(wrctldev); > - > - if (blkif->fds[READ] == -1 || blkif->fds[WRITE] == -1) > - goto fail; > - > - /*launch the new process*/ > - DPRINTF("Launching process, CMDLINE [tapdisk %s > %s]\n",wrctldev, rdctldev); > - if (launch_tapdisk(wrctldev, rdctldev) == -1) { > - DPRINTF("Unable to fork, cmdline: [tapdisk %s > %s]\n",wrctldev, rdctldev); > - goto fail; > - } > - > - free(rdctldev); > - free(wrctldev); > + > } else { > DPRINTF("Process exists!\n"); > blkif->fds[READ] = exist->fds[READ]; > diff -r f33328217eee tools/blktap/drivers/tapdisk.h > --- a/tools/blktap/drivers/tapdisk.h Mon Mar 10 22:51:57 2008 +0000 > +++ b/tools/blktap/drivers/tapdisk.h Thu Mar 13 13:00:18 2008 +0100 > @@ -167,6 +167,7 @@ extern struct tap_disk tapdisk_qcow2; > #define DISK_TYPE_RAM 3 > #define DISK_TYPE_QCOW 4 > #define DISK_TYPE_QCOW2 5 > +#define DISK_TYPE_IOEMU 6 > > > /*Define Individual Disk Parameters here */ > @@ -227,6 +228,16 @@ static disk_info_t qcow2_disk = { > 0, > #ifdef TAPDISK > &tapdisk_qcow2, > +#endif > +}; > + > +static disk_info_t ioemu_disk = { > + DISK_TYPE_IOEMU, > + "ioemu disk", > + "ioemu", > + 0, > +#ifdef TAPDISK > + NULL > #endif > }; > > @@ -238,6 +249,7 @@ static disk_info_t *dtypes[] = { > &ram_disk, > &qcow_disk, > &qcow2_disk, > + &ioemu_disk, > }; > > typedef struct driver_list_entry { > diff -r f33328217eee tools/blktap/lib/blktaplib.h > --- a/tools/blktap/lib/blktaplib.h Mon Mar 10 22:51:57 2008 +0000 > +++ b/tools/blktap/lib/blktaplib.h Thu Mar 13 13:00:18 2008 +0100 > @@ -221,15 +221,5 @@ int xs_fire_next_watch(struct xs_handle > ((_req) * BLKIF_MAX_SEGMENTS_PER_REQUEST * getpagesize()) + \ > ((_seg) * getpagesize())) > > -/* Defines that are only used by library clients */ > - > -#ifndef __COMPILING_BLKTAP_LIB > - > -static char *blkif_op_name[] = { > - [BLKIF_OP_READ] = "READ", > - [BLKIF_OP_WRITE] = "WRITE", > -}; > - > -#endif /* __COMPILING_BLKTAP_LIB */ > > #endif /* __BLKTAPLIB_H__ */ > diff -r f33328217eee tools/ioemu/Makefile.target > --- a/tools/ioemu/Makefile.target Mon Mar 10 22:51:57 2008 +0000 > +++ b/tools/ioemu/Makefile.target Thu Mar 13 13:00:18 2008 +0100 > @@ -17,6 +17,7 @@ VPATH=$(SRC_PATH):$(TARGET_PATH):$(SRC_P > VPATH=$(SRC_PATH):$(TARGET_PATH):$(SRC_PATH)/hw:$(SRC_PATH)/audio > CPPFLAGS+=-I. -I.. -I$(TARGET_PATH) -I$(SRC_PATH) > CPPFLAGS+= -I$(XEN_ROOT)/tools/libxc > +CPPFLAGS+= -I$(XEN_ROOT)/tools/blktap/lib > CPPFLAGS+= -I$(XEN_ROOT)/tools/xenstore > CPPFLAGS+= -I$(XEN_ROOT)/tools/include > ifdef CONFIG_DARWIN_USER > @@ -429,6 +430,7 @@ VL_OBJS+= usb-uhci.o smbus_eeprom.o > VL_OBJS+= usb-uhci.o smbus_eeprom.o > VL_OBJS+= piix4acpi.o > VL_OBJS+= xenstore.o > +VL_OBJS+= xen_blktap.o > VL_OBJS+= xen_platform.o > VL_OBJS+= xen_machine_fv.o > VL_OBJS+= xen_machine_pv.o > diff -r f33328217eee tools/ioemu/hw/xen_machine_pv.c > --- a/tools/ioemu/hw/xen_machine_pv.c Mon Mar 10 22:51:57 2008 +0000 > +++ b/tools/ioemu/hw/xen_machine_pv.c Thu Mar 13 13:00:18 2008 +0100 > @@ -26,6 +26,9 @@ > #include "xen_console.h" > #include "xenfb.h" > > +extern void init_blktap(void); > + > + > /* The Xen PV machine currently provides > * - a virtual framebuffer > * - .... > @@ -40,6 +43,10 @@ static void xen_init_pv(uint64_t ram_siz > { > struct xenfb *xenfb; > extern int domid; > + > + > + /* Initialize tapdisk client */ > + init_blktap(); > > /* Connect to text console */ > if (serial_hds[0]) { > diff -r f33328217eee tools/ioemu/vl.c > --- a/tools/ioemu/vl.c Mon Mar 10 22:51:57 2008 +0000 > +++ b/tools/ioemu/vl.c Thu Mar 13 13:00:18 2008 +0100 > @@ -6270,6 +6270,12 @@ void qemu_system_powerdown_request(void) > powerdown_requested = 1; > if (cpu_single_env) > cpu_interrupt(cpu_single_env, CPU_INTERRUPT_EXIT); > +} > + > +static void qemu_sighup_handler(int signal) > +{ > + fprintf(stderr, "Received SIGHUP, terminating.\n"); > + exit(0); > } > > void main_loop_wait(int timeout) > @@ -7980,7 +7986,7 @@ int main(int argc, char **argv) > > #ifndef CONFIG_STUBDOM > /* Unblock SIGTERM and SIGHUP, which may have been blocked by the caller > */ > - signal(SIGHUP, SIG_DFL); > + signal(SIGHUP, qemu_sighup_handler); > sigemptyset(&set); > sigaddset(&set, SIGTERM); > sigaddset(&set, SIGHUP); > diff -r f33328217eee tools/python/xen/xend/server/BlktapController.py > --- a/tools/python/xen/xend/server/BlktapController.py Mon Mar 10 > 22:51:57 2008 +0000 > +++ b/tools/python/xen/xend/server/BlktapController.py Thu Mar 13 > 13:00:18 2008 +0100 > @@ -13,7 +13,9 @@ blktap_disk_types = [ > 'vmdk', > 'ram', > 'qcow', > - 'qcow2' > + 'qcow2', > + > + 'ioemu' Why add the extra \n ? > ] > > class BlktapController(BlkifController): > diff -r f33328217eee tools/ioemu/hw/xen_blktap.c > --- /dev/null Thu Jan 01 00:00:00 1970 +0000 > +++ b/tools/ioemu/hw/xen_blktap.c Thu Mar 13 13:00:18 2008 +0100 > @@ -0,0 +1,688 @@ > +/* xen_blktap.c > + * > + * Interface to blktapctrl to allow use of qemu block drivers with blktap. > + * This file is based on tools/blktap/drivers/tapdisk.c > + * > + * Copyright (c) 2005 Julian Chesterfield and Andrew Warfield. > + * Copyright (c) 2008 Kevin Wolf > + */ > + > +/* > + * There are several communication channels which are used by this interface: > + * > + * - A pair of pipes for receiving and sending general control messages > + * (qemu-read-N and qemu-writeN in /var/run/tap, where N is the domain > ID). > + * These control messages are handled by handle_blktap_ctrlmsg(). > + * > + * - One file descriptor per attached disk (/dev/xen/blktapN) for disk > + * specific control messages. A callback is triggered on this fd if there > + * is a new IO request. The callback function is handle_blktap_iomsg(). > + * > + * - A shared ring for each attached disk containing the actual IO > requests > + * and responses. Whenever handle_blktap_iomsg() is triggered it > processes > + * the requests on this ring. > + */ > + > +#include <sys/stat.h> > +#include <sys/types.h> > +#include <sys/mman.h> > +#include <sys/ioctl.h> > +#include <fcntl.h> > +#include <stdio.h> > +#include <errno.h> > +#include <stdlib.h> > + > +#include "vl.h" > +#include "blktaplib.h" > +#include "xen_blktap.h" > +#include "block_int.h" > + > +#define MSG_SIZE 4096 > + > +#define BLKTAP_CTRL_DIR "/var/run/tap" > + > +/* If enabled, print debug messages to stderr */ > +#if 1 > +#define DPRINTF(_f, _a...) fprintf(stderr, __FILE__ ":%d: " _f, __LINE__, > ##_a) > +#else > +#define DPRINTF(_f, _a...) ((void)0) > +#endif > + > +#if 1 > +#define ASSERT(_p) \ > + if ( !(_p) ) { DPRINTF("Assertion '%s' failed, line %d, file %s", #_p , \ You probably want \n at the end. > + __LINE__, __FILE__); *(int*)0=0; } > +#else > +#define ASSERT(_p) ((void)0) > +#endif > + > + > +extern int domid; > + > +int read_fd; > +int write_fd; > + > +static pid_t process; > +fd_list_entry_t *fd_start = NULL; > + > +static void handle_blktap_iomsg(void* private); > + > +struct aiocb_info { > + struct td_state *s; > + uint64_t sector; > + int nr_secs; > + int idx; > + long i; > +}; > + > +static void unmap_disk(struct td_state *s) > +{ > + tapdev_info_t *info = s->ring_info; > + fd_list_entry_t *entry; > + > + bdrv_close(s->bs); > + > + if (info != NULL && info->mem > 0) > + munmap(info->mem, getpagesize() * BLKTAP_MMAP_REGION_SIZE); > + > + entry = s->fd_entry; > + *entry->pprev = entry->next; > + if (entry->next) > + entry->next->pprev = entry->pprev; > + > + qemu_set_fd_handler2(info->fd, NULL, NULL, NULL, NULL); > + close(info->fd); > + > + free(s->fd_entry); > + free(s->blkif); > + free(s->ring_info); > + free(s); > + > + return; > +} > + > +static inline fd_list_entry_t *add_fd_entry(int tap_fd, struct td_state *s) > +{ > + fd_list_entry_t **pprev, *entry; > + > + DPRINTF("Adding fd_list_entry\n"); > + > + /*Add to linked list*/ > + s->fd_entry = entry = malloc(sizeof(fd_list_entry_t)); > + entry->tap_fd = tap_fd; > + entry->s = s; > + entry->next = NULL; > + > + pprev = &fd_start; > + while (*pprev != NULL) > + pprev = &(*pprev)->next; > + > + *pprev = entry; > + entry->pprev = pprev; > + > + return entry; > +} > + > +static inline struct td_state *get_state(int cookie) > +{ > + fd_list_entry_t *ptr; > + > + ptr = fd_start; > + while (ptr != NULL) { > + if (ptr->cookie == cookie) return ptr->s; > + ptr = ptr->next; > + } > + return NULL; > +} > + > +static struct td_state *state_init(void) > +{ > + int i; > + struct td_state *s; > + blkif_t *blkif; > + > + s = malloc(sizeof(struct td_state)); Would it make sense to zero out the allocated memory? > + blkif = s->blkif = malloc(sizeof(blkif_t)); > + s->ring_info = calloc(1, sizeof(tapdev_info_t)); > + > + for (i = 0; i < MAX_REQUESTS; i++) { > + blkif->pending_list[i].secs_pending = 0; > + blkif->pending_list[i].submitting = 0; > + } > + > + return s; > +} > + > +static int map_new_dev(struct td_state *s, int minor) > +{ > + int tap_fd; > + tapdev_info_t *info = s->ring_info; > + char *devname; > + fd_list_entry_t *ptr; > + int page_size; > + > + if (asprintf(&devname,"%s/%s%d", BLKTAP_DEV_DIR, BLKTAP_DEV_NAME, > minor) == -1) > + return -1; > + tap_fd = open(devname, O_RDWR); > + if (tap_fd == -1) > + { > + DPRINTF("open failed on dev %s!",devname); You forgot to include \n above. > + goto fail; > + } > + info->fd = tap_fd; > + > + /*Map the shared memory*/ > + page_size = getpagesize(); > + info->mem = mmap(0, page_size * BLKTAP_MMAP_REGION_SIZE, > + PROT_READ | PROT_WRITE, MAP_SHARED, info->fd, 0); > + if ((long int)info->mem == -1) > + { > + DPRINTF("mmap failed on dev %s!\n",devname); > + goto fail; > + } > + > + /* assign the rings to the mapped memory */ > + info->sring = (blkif_sring_t *)((unsigned long)info->mem); > + BACK_RING_INIT(&info->fe_ring, info->sring, page_size); > + > + info->vstart = > + (unsigned long)info->mem + (BLKTAP_RING_PAGES * page_size); > + > + ioctl(info->fd, BLKTAP_IOCTL_SENDPID, process ); > + ioctl(info->fd, BLKTAP_IOCTL_SETMODE, BLKTAP_MODE_INTERPOSE ); > + free(devname); > + > + /*Update the fd entry*/ > + ptr = fd_start; > + while (ptr != NULL) { > + if (s == ptr->s) { > + ptr->tap_fd = tap_fd; > + > + /* Setup fd_handler for qemu main loop */ > + DPRINTF("set tap_fd = %d\n", tap_fd); > + qemu_set_fd_handler2(tap_fd, NULL, > &handle_blktap_iomsg, NULL, s); > + > + break; > + } > + ptr = ptr->next; > + } > + > + > + DPRINTF("map_new_dev = %d\n", minor); > + return minor; > + > + fail: > + free(devname); > + return -1; > +} > + > +static int open_disk(struct td_state *s, char *path, int readonly) > +{ > + struct disk_id id; > + BlockDriverState* bs; > + > + DPRINTF("Opening %s\n", path); > + bs = calloc(sizeof(*bs), 1); Are the arguments swapped? > + > + memset(&id, 0, sizeof(struct disk_id)); > + > + if (bdrv_open(bs, path, 0) != 0) { > + fprintf(stderr, "Could not open image file %s\n", path); > + return -ENOMEM; > + } > + > + s->bs = bs; > + s->flags = readonly ? TD_RDONLY : 0; > + s->size = bs->total_sectors; > + s->sector_size = 512; > + > + s->info = ((s->flags & TD_RDONLY) ? VDISK_READONLY : 0); > + > + return 0; > +} > + > +static inline int write_rsp_to_ring(struct td_state *s, blkif_response_t > *rsp) Why bother with a return of int when you always return 0? > +{ > + tapdev_info_t *info = s->ring_info; > + blkif_response_t *rsp_d; > + > + rsp_d = RING_GET_RESPONSE(&info->fe_ring, info->fe_ring.rsp_prod_pvt); > + memcpy(rsp_d, rsp, sizeof(blkif_response_t)); > + info->fe_ring.rsp_prod_pvt++; > + > + return 0; > +} > + > +static inline void kick_responses(struct td_state *s) > +{ > + tapdev_info_t *info = s->ring_info; > + > + if (info->fe_ring.rsp_prod_pvt != info->fe_ring.sring->rsp_prod) > + { > + RING_PUSH_RESPONSES(&info->fe_ring); > + ioctl(info->fd, BLKTAP_IOCTL_KICK_FE); > + } > +} > + > +static int send_responses(struct td_state *s, int res, > + uint64_t sector, int nr_secs, int idx, void *private) > +{ > + pending_req_t *preq; > + blkif_request_t *req; > + int responses_queued = 0; > + blkif_t *blkif = s->blkif; > + int secs_done = nr_secs; > + > + if ( (idx > MAX_REQUESTS-1) ) > + { > + DPRINTF("invalid index returned(%u)!\n", idx); > + return 0; > + } > + preq = &blkif->pending_list[idx]; > + req = &preq->req; > + > + preq->secs_pending -= secs_done; > + > + if (res == -EBUSY && preq->submitting) > + return -EBUSY; /* propagate -EBUSY back to higher layers */ > + if (res) > + preq->status = BLKIF_RSP_ERROR; > + > + if (!preq->submitting && preq->secs_pending == 0) > + { > + blkif_request_t tmp; > + blkif_response_t *rsp; > + > + tmp = preq->req; > + rsp = (blkif_response_t *)req; > + > + rsp->id = tmp.id; > + rsp->operation = tmp.operation; > + rsp->status = preq->status; > + > + write_rsp_to_ring(s, rsp); > + responses_queued++; > + > + kick_responses(s); > + } > + > + return responses_queued; > +} > + > +static void qemu_send_responses(void* opaque, int ret) > +{ > + struct aiocb_info* info = opaque; > + > + if (ret != 0) { > + DPRINTF("ERROR: ret = %d (%s)\n", ret, strerror(-ret)); > + } > + > + send_responses(info->s, ret, info->sector, info->nr_secs, > + info->idx, (void*) info->i); > + free(info); > +} > + > +/** > + * Callback function for the IO message pipe. Reads requests from the ring > + * and processes them (call qemu read/write functions). > + * > + * The private parameter points to the struct td_state representing the > + * disk the request is targeted at. > + */ > +static void handle_blktap_iomsg(void* private) > +{ > + struct td_state* s = private; > + > + RING_IDX rp, j, i; > + blkif_request_t *req; > + int idx, nsects, ret; > + uint64_t sector_nr; > + uint8_t *page; > + blkif_t *blkif = s->blkif; > + tapdev_info_t *info = s->ring_info; > + int page_size = getpagesize(); > + > + struct aiocb_info *aiocb_info; > + > + if (info->fe_ring.sring == NULL) { > + DPRINTF(" sring == NULL, ignoring IO request\n"); > + return; > + } > + > + rp = info->fe_ring.sring->req_prod; > + xen_rmb(); > + > + for (j = info->fe_ring.req_cons; j != rp; j++) > + { > + int start_seg = 0; > + > + req = NULL; > + req = RING_GET_REQUEST(&info->fe_ring, j); > + ++info->fe_ring.req_cons; > + > + if (req == NULL) > + continue; > + > + idx = req->id; > + > + ASSERT(blkif->pending_list[idx].secs_pending == 0); > + memcpy(&blkif->pending_list[idx].req, req, sizeof(*req)); > + blkif->pending_list[idx].status = BLKIF_RSP_OKAY; > + blkif->pending_list[idx].submitting = 1; > + sector_nr = req->sector_number; > + > + /* Don't allow writes on readonly devices */ > + if ((s->flags & TD_RDONLY) && > + (req->operation == BLKIF_OP_WRITE)) { > + blkif->pending_list[idx].status = BLKIF_RSP_ERROR; > + goto send_response; > + } > + > + for (i = start_seg; i < req->nr_segments; i++) { > + nsects = req->seg[i].last_sect - > + req->seg[i].first_sect + 1; > + > + if ((req->seg[i].last_sect >= page_size >> 9) || > + (nsects <= 0)) > + continue; > + > + page = (uint8_t*) MMAP_VADDR(info->vstart, > + (unsigned long)req->id, i); > + page += (req->seg[i].first_sect << SECTOR_SHIFT); > + > + if (sector_nr >= s->size) { > + DPRINTF("Sector request failed:\n"); > + DPRINTF("%s request, idx [%d,%d] size [%llu], " > + "sector [%llu,%llu]\n", > + (req->operation == BLKIF_OP_WRITE ? > + "WRITE" : "READ"), > + idx,i, > + (long long unsigned) > + nsects<<SECTOR_SHIFT, > + (long long unsigned) > + sector_nr<<SECTOR_SHIFT, > + (long long unsigned) sector_nr); > + continue; > + } > + > + blkif->pending_list[idx].secs_pending += nsects; > + > + switch (req->operation) > + { > + case BLKIF_OP_WRITE: > + aiocb_info = malloc(sizeof(*aiocb_info)); > + > + aiocb_info->s = s; > + aiocb_info->sector = sector_nr; > + aiocb_info->nr_secs = nsects; > + aiocb_info->idx = idx; > + aiocb_info->i = i; > + > + ret = (NULL == bdrv_aio_write(s->bs, sector_nr, > + page, nsects, > + qemu_send_responses, > + aiocb_info)); Who de-allocates aiocb_info? > + > + if (ret) { > + blkif->pending_list[idx].status = > BLKIF_RSP_ERROR; > + DPRINTF("ERROR: bdrv_write() == > NULL\n"); > + goto send_response; > + } > + break; > + > + case BLKIF_OP_READ: > + aiocb_info = malloc(sizeof(*aiocb_info)); > + > + aiocb_info->s = s; > + aiocb_info->sector = sector_nr; > + aiocb_info->nr_secs = nsects; > + aiocb_info->idx = idx; > + aiocb_info->i = i; > + > + ret = (NULL == bdrv_aio_read(s->bs, sector_nr, > + page, nsects, > + qemu_send_responses, > + aiocb_info)); Ditto. > + > + if (ret) { > + blkif->pending_list[idx].status = > BLKIF_RSP_ERROR; > + DPRINTF("ERROR: bdrv_read() == NULL\n"); > + goto send_response; > + } > + break; > + > + default: > + DPRINTF("Unknown block operation\n"); > + break; > + } > + sector_nr += nsects; > + } > + send_response: > + blkif->pending_list[idx].submitting = 0; > + > + /* force write_rsp_to_ring for synchronous case */ > + if (blkif->pending_list[idx].secs_pending == 0) > + send_responses(s, 0, 0, 0, idx, (void *)(long)0); > + } > +} > + > +/** > + * Callback function for the qemu-read pipe. Reads and processes control > + * message from the pipe. > + * > + * The parameter private is unused. > + */ > +static void handle_blktap_ctrlmsg(void* private) > +{ > + int length, len, msglen; > + char *ptr, *path; > + image_t *img; > + msg_hdr_t *msg; > + msg_newdev_t *msg_dev; > + msg_pid_t *msg_pid; > + int ret = -1; > + struct td_state *s = NULL; > + fd_list_entry_t *entry; > + > + char buf[MSG_SIZE]; > + > + length = read(read_fd, buf, MSG_SIZE); > + > + if (length > 0 && length >= sizeof(msg_hdr_t)) > + { > + msg = (msg_hdr_t *)buf; > + DPRINTF("blktap: Received msg, len %d, type %d, UID %d\n", > + length,msg->type,msg->cookie); > + > + switch (msg->type) { > + case CTLMSG_PARAMS: > + ptr = buf + sizeof(msg_hdr_t); > + len = (length - sizeof(msg_hdr_t)); > + path = calloc(1, len + 1); > + > + memcpy(path, ptr, len); > + DPRINTF("Received CTLMSG_PARAMS: [%s]\n", path); > + > + /* Allocate the disk structs */ > + s = state_init(); > + > + /*Open file*/ > + if (s == NULL || open_disk(s, path, msg->readonly)) { > + msglen = sizeof(msg_hdr_t); > + msg->type = CTLMSG_IMG_FAIL; > + msg->len = msglen; > + } else { > + entry = add_fd_entry(0, s); > + entry->cookie = msg->cookie; > + DPRINTF("Entered cookie %d\n", entry->cookie); > + > + memset(buf, 0x00, MSG_SIZE); > + > + msglen = sizeof(msg_hdr_t) + sizeof(image_t); > + msg->type = CTLMSG_IMG; > + img = (image_t *)(buf + sizeof(msg_hdr_t)); > + img->size = s->size; > + img->secsize = s->sector_size; > + img->info = s->info; > + DPRINTF("Writing (size, secsize, info) = " > + "(%#" PRIx64 ", %#" PRIx64 ", %d)\n", > + s->size, s->sector_size, s->info); > + } > + len = write(write_fd, buf, msglen); > + free(path); > + break; > + > + case CTLMSG_NEWDEV: > + msg_dev = (msg_newdev_t *)(buf + sizeof(msg_hdr_t)); > + > + s = get_state(msg->cookie); > + DPRINTF("Retrieving state, cookie %d.....[%s]\n", > + msg->cookie, (s == NULL ? "FAIL":"OK")); > + if (s != NULL) { > + ret = ((map_new_dev(s, msg_dev->devnum) > + == msg_dev->devnum ? 0: -1)); > + } > + > + memset(buf, 0x00, MSG_SIZE); > + msglen = sizeof(msg_hdr_t); > + msg->type = (ret == 0 ? CTLMSG_NEWDEV_RSP > + : CTLMSG_NEWDEV_FAIL); > + msg->len = msglen; > + > + len = write(write_fd, buf, msglen); > + break; > + > + case CTLMSG_CLOSE: > + s = get_state(msg->cookie); > + if (s) unmap_disk(s); > + break; > + > + case CTLMSG_PID: > + memset(buf, 0x00, MSG_SIZE); > + msglen = sizeof(msg_hdr_t) + sizeof(msg_pid_t); > + msg->type = CTLMSG_PID_RSP; > + msg->len = msglen; > + > + msg_pid = (msg_pid_t *)(buf + sizeof(msg_hdr_t)); > + process = getpid(); > + msg_pid->pid = process; > + > + len = write(write_fd, buf, msglen); > + break; > + > + default: > + break; > + } > + } > +} > + > +/** > + * Opens a control socket, i.e. a pipe to communicate with blktapctrl. > + * > + * Returns the file descriptor number for the pipe; -1 in error case > + */ > +static int open_ctrl_socket(char *devname) > +{ > + int ret; > + int ipc_fd; > + > + if (mkdir(BLKTAP_CTRL_DIR, 0755) == 0) > + DPRINTF("Created %s directory\n", BLKTAP_CTRL_DIR); > + > + ret = mkfifo(devname,S_IRWXU|S_IRWXG|S_IRWXO); > + if ( (ret != 0) && (errno != EEXIST) ) { > + DPRINTF("ERROR: pipe failed (%d)\n", errno); > + return -1; > + } > + > + ipc_fd = open(devname,O_RDWR|O_NONBLOCK); > + > + if (ipc_fd < 0) { > + DPRINTF("FD open failed\n"); > + return -1; > + } > + > + return ipc_fd; > +} > + > +/** > + * Unmaps all disks and closes their pipes > + */ > +void shutdown_blktap(void) > +{ > + fd_list_entry_t *ptr; > + struct td_state *s; > + char *devname; > + > + DPRINTF("Shutdown blktap\n"); > + > + /* Unmap all disks */ > + ptr = fd_start; > + while (ptr != NULL) { > + s = ptr->s; > + unmap_disk(s); > + close(ptr->tap_fd); > + ptr = ptr->next; > + } > + > + /* Delete control pipes */ > + if (asprintf(&devname, BLKTAP_CTRL_DIR "/qemu-read-%d", domid) >= 0) { > + DPRINTF("Delete %s\n", devname); > + if (unlink(devname)) > + DPRINTF("Could not delete: %s\n", strerror(errno)); > + free(devname); > + } > + > + if (asprintf(&devname, BLKTAP_CTRL_DIR "/qemu-write-%d", domid) >= 0) { > + DPRINTF("Delete %s\n", devname); > + if (unlink(devname)) > + DPRINTF("Could not delete: %s\n", strerror(errno)); > + free(devname); > + } > +} > + > +/** > + * Initialize the blktap interface, i.e. open a pair of pipes in /var/run/tap > + * and register a fd handler. > + * > + * Returns 0 on success. > + */ > +int init_blktap(void) > +{ > + char* devname; > + > + DPRINTF("Init blktap pipes\n"); > + > + /* Open the read pipe */ > + if (asprintf(&devname, BLKTAP_CTRL_DIR "/qemu-read-%d", domid) >= 0) { > + read_fd = open_ctrl_socket(devname); > + free(devname); > + > + if (read_fd == -1) { > + fprintf(stderr, "Could not open %s/qemu-read-%d\n", > + BLKTAP_CTRL_DIR, domid); > + return -1; > + } > + } > + > + /* Open the write pipe */ > + if (asprintf(&devname, BLKTAP_CTRL_DIR "/qemu-write-%d", domid) >= 0) { > + write_fd = open_ctrl_socket(devname); > + free(devname); > + > + if (write_fd == -1) { > + fprintf(stderr, "Could not open %s/qemu-write-%d\n", > + BLKTAP_CTRL_DIR, domid); > + close(read_fd); > + return -1; > + } > + } > + > + /* Attach a handler to the read pipe (called from qemu main loop) */ > + qemu_set_fd_handler2(read_fd, NULL, &handle_blktap_ctrlmsg, NULL, NULL); > + > + /* Register handler to clean up when the domain is destroyed */ > + atexit(&shutdown_blktap); > + > + return 0; > +} > diff -r f33328217eee tools/ioemu/hw/xen_blktap.h > --- /dev/null Thu Jan 01 00:00:00 1970 +0000 > +++ b/tools/ioemu/hw/xen_blktap.h Thu Mar 13 13:00:18 2008 +0100 > @@ -0,0 +1,57 @@ > +/* xen_blktap.h > + * > + * Generic disk interface for blktap-based image adapters. > + * > + * (c) 2006 Andrew Warfield and Julian Chesterfield > + */ > + > +#ifndef XEN_BLKTAP_H_ > +#define XEN_BLKTAP_H_ > + > +#include <stdint.h> > +#include <syslog.h> > +#include <stdio.h> > + > +#include "block_int.h" > + > +/* Things disks need to know about, these should probably be in a > higher-level > + * header. */ > +#define MAX_SEGMENTS_PER_REQ 11 > +#define SECTOR_SHIFT 9 > +#define DEFAULT_SECTOR_SIZE 512 > + > +#define MAX_IOFD 2 > + > +#define BLK_NOT_ALLOCATED 99 > +#define TD_NO_PARENT 1 > + > +typedef uint32_t td_flag_t; > + > +#define TD_RDONLY 1 > + > +struct disk_id { > + char *name; > + int drivertype; > +}; > + > +/* This structure represents the state of an active virtual disk. > */ > +struct td_state { > + BlockDriverState* bs; > + td_flag_t flags; > + void *blkif; > + void *image; > + void *ring_info; > + void *fd_entry; > + uint64_t sector_size; > + uint64_t size; > + unsigned int info; > +}; > + > +typedef struct fd_list_entry { > + int cookie; > + int tap_fd; > + struct td_state *s; > + struct fd_list_entry **pprev, *next; > +} fd_list_entry_t; > + > +#endif /*XEN_BLKTAP_H_*/ > _______________________________________________ > Xen-devel mailing list > Xen-devel@xxxxxxxxxxxxxxxxxxx > http://lists.xensource.com/xen-devel _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |