[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] [PATCH 03/10] blktap: Move from drivers/xen to drivers/block
From: Daniel Stodden <dns@xxxxxxxxxxxx> Signed-off-by: Daniel Stodden <daniel.stodden@xxxxxxxxxx> --- drivers/block/Kconfig | 9 + drivers/block/Makefile | 1 + drivers/block/blktap/Makefile | 3 + drivers/block/blktap/blktap.h | 161 +++++++++++ drivers/block/blktap/control.c | 315 +++++++++++++++++++++ drivers/block/blktap/device.c | 551 +++++++++++++++++++++++++++++++++++++ drivers/block/blktap/request.c | 418 ++++++++++++++++++++++++++++ drivers/block/blktap/ring.c | 595 ++++++++++++++++++++++++++++++++++++++++ drivers/block/blktap/sysfs.c | 288 +++++++++++++++++++ drivers/xen/Kconfig | 11 - drivers/xen/Makefile | 1 - drivers/xen/blktap/Makefile | 3 - drivers/xen/blktap/blktap.h | 161 ----------- drivers/xen/blktap/control.c | 315 --------------------- drivers/xen/blktap/device.c | 551 ------------------------------------- drivers/xen/blktap/request.c | 418 ---------------------------- drivers/xen/blktap/ring.c | 595 ---------------------------------------- drivers/xen/blktap/sysfs.c | 288 ------------------- 18 files changed, 2341 insertions(+), 2343 deletions(-) create mode 100644 drivers/block/blktap/Makefile create mode 100644 drivers/block/blktap/blktap.h create mode 100644 drivers/block/blktap/control.c create mode 100644 drivers/block/blktap/device.c create mode 100644 drivers/block/blktap/request.c create mode 100644 drivers/block/blktap/ring.c create mode 100644 drivers/block/blktap/sysfs.c delete mode 100644 drivers/xen/blktap/Makefile delete mode 100644 drivers/xen/blktap/blktap.h delete mode 100644 drivers/xen/blktap/control.c delete mode 100644 drivers/xen/blktap/device.c delete mode 100644 drivers/xen/blktap/request.c delete mode 100644 drivers/xen/blktap/ring.c delete mode 100644 drivers/xen/blktap/sysfs.c diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig index bea8ae7..c4a55a3 100644 --- a/drivers/block/Kconfig +++ b/drivers/block/Kconfig @@ -471,4 +471,13 @@ config BLK_DEV_HD If unsure, say N. +config BLK_DEV_TAP + tristate "Blktap userspace devices" + help + The block tap driver allows block device requests to be + redirected to processes, through a device interface. + Doing so allows user-space development of high-performance + block storage backends, where disk images may be implemented + as files, in memory, or on other hosts across the network. + endif # BLK_DEV diff --git a/drivers/block/Makefile b/drivers/block/Makefile index 7755a5e..8389917 100644 --- a/drivers/block/Makefile +++ b/drivers/block/Makefile @@ -28,6 +28,7 @@ obj-$(CONFIG_BLK_DEV_UMEM) += umem.o obj-$(CONFIG_BLK_DEV_NBD) += nbd.o obj-$(CONFIG_BLK_DEV_CRYPTOLOOP) += cryptoloop.o obj-$(CONFIG_VIRTIO_BLK) += virtio_blk.o +obj-$(CONFIG_BLK_DEV_TAP) += blktap/ obj-$(CONFIG_VIODASD) += viodasd.o obj-$(CONFIG_BLK_DEV_SX8) += sx8.o diff --git a/drivers/block/blktap/Makefile b/drivers/block/blktap/Makefile new file mode 100644 index 0000000..923a7c5 --- /dev/null +++ b/drivers/block/blktap/Makefile @@ -0,0 +1,3 @@ +obj-$(CONFIG_BLK_DEV_TAP) := blktap.o + +blktap-objs := control.o ring.o device.o request.o sysfs.o diff --git a/drivers/block/blktap/blktap.h b/drivers/block/blktap/blktap.h new file mode 100644 index 0000000..1318cad --- /dev/null +++ b/drivers/block/blktap/blktap.h @@ -0,0 +1,161 @@ +#ifndef _BLKTAP_H_ +#define _BLKTAP_H_ + +#include <linux/mm.h> +#include <linux/fs.h> +#include <linux/cdev.h> +#include <linux/init.h> +#include <linux/scatterlist.h> +#include <linux/blktap.h> + +extern int blktap_debug_level; +extern int blktap_ring_major; +extern int blktap_device_major; + +#define BTPRINTK(level, tag, force, _f, _a...) \ + do { \ + if (blktap_debug_level > level && \ + (force || printk_ratelimit())) \ + printk(tag "%s: " _f, __func__, ##_a); \ + } while (0) + +#define BTDBG(_f, _a...) BTPRINTK(8, KERN_DEBUG, 1, _f, ##_a) +#define BTINFO(_f, _a...) BTPRINTK(0, KERN_INFO, 0, _f, ##_a) +#define BTWARN(_f, _a...) BTPRINTK(0, KERN_WARNING, 0, _f, ##_a) +#define BTERR(_f, _a...) BTPRINTK(0, KERN_ERR, 0, _f, ##_a) + +#define MAX_BLKTAP_DEVICE 1024 + +#define BLKTAP_DEVICE 4 +#define BLKTAP_DEVICE_CLOSED 5 +#define BLKTAP_SHUTDOWN_REQUESTED 8 + +#define BLKTAP_REQUEST_FREE 0 +#define BLKTAP_REQUEST_PENDING 1 + +struct blktap_device { + spinlock_t lock; + struct gendisk *gd; +}; + +struct blktap_request; + +struct blktap_ring { + struct task_struct *task; + + struct vm_area_struct *vma; + blktap_front_ring_t ring; + unsigned long ring_vstart; + unsigned long user_vstart; + + int n_pending; + struct blktap_request *pending[BLKTAP_RING_SIZE]; + + wait_queue_head_t poll_wait; + + dev_t devno; + struct device *dev; +}; + +struct blktap_statistics { + unsigned long st_print; + int st_rd_req; + int st_wr_req; + int st_oo_req; + int st_rd_sect; + int st_wr_sect; + s64 st_rd_cnt; + s64 st_rd_sum_usecs; + s64 st_rd_max_usecs; + s64 st_wr_cnt; + s64 st_wr_sum_usecs; + s64 st_wr_max_usecs; +}; + +struct blktap_request { + struct blktap *tap; + struct request *rq; + int usr_idx; + + int operation; + struct timeval time; + + struct scatterlist sg_table[BLKTAP_SEGMENT_MAX]; + struct page *pages[BLKTAP_SEGMENT_MAX]; + int nr_pages; +}; + +#define blktap_for_each_sg(_sg, _req, _i) \ + for (_sg = (_req)->sg_table, _i = 0; \ + _i < (_req)->nr_pages; \ + (_sg)++, (_i)++) + +struct blktap { + int minor; + unsigned long dev_inuse; + + struct blktap_ring ring; + struct blktap_device device; + struct blktap_page_pool *pool; + + wait_queue_head_t remove_wait; + struct work_struct remove_work; + char name[BLKTAP_NAME_MAX]; + + struct blktap_statistics stats; +}; + +struct blktap_page_pool { + struct mempool_s *bufs; + spinlock_t lock; + struct kobject kobj; + wait_queue_head_t wait; +}; + +extern struct mutex blktap_lock; +extern struct blktap **blktaps; +extern int blktap_max_minor; + +int blktap_control_destroy_tap(struct blktap *); +size_t blktap_control_debug(struct blktap *, char *, size_t); + +int blktap_ring_init(void); +void blktap_ring_exit(void); +size_t blktap_ring_debug(struct blktap *, char *, size_t); +int blktap_ring_create(struct blktap *); +int blktap_ring_destroy(struct blktap *); +struct blktap_request *blktap_ring_make_request(struct blktap *); +void blktap_ring_free_request(struct blktap *,struct blktap_request *); +void blktap_ring_submit_request(struct blktap *, struct blktap_request *); +int blktap_ring_map_request_segment(struct blktap *, struct blktap_request *, int); +int blktap_ring_map_request(struct blktap *, struct blktap_request *); +void blktap_ring_unmap_request(struct blktap *, struct blktap_request *); +void blktap_ring_set_message(struct blktap *, int); +void blktap_ring_kick_user(struct blktap *); + +int blktap_sysfs_init(void); +void blktap_sysfs_exit(void); +int blktap_sysfs_create(struct blktap *); +void blktap_sysfs_destroy(struct blktap *); + +int blktap_device_init(void); +void blktap_device_exit(void); +size_t blktap_device_debug(struct blktap *, char *, size_t); +int blktap_device_create(struct blktap *, struct blktap_device_info *); +int blktap_device_destroy(struct blktap *); +void blktap_device_destroy_sync(struct blktap *); +void blktap_device_run_queue(struct blktap *); +void blktap_device_end_request(struct blktap *, struct blktap_request *, int); + +int blktap_page_pool_init(struct kobject *); +void blktap_page_pool_exit(void); +struct blktap_page_pool *blktap_page_pool_get(const char *); + +size_t blktap_request_debug(struct blktap *, char *, size_t); +struct blktap_request *blktap_request_alloc(struct blktap *); +int blktap_request_get_pages(struct blktap *, struct blktap_request *, int); +void blktap_request_free(struct blktap *, struct blktap_request *); +void blktap_request_bounce(struct blktap *, struct blktap_request *, int, int); + + +#endif diff --git a/drivers/block/blktap/control.c b/drivers/block/blktap/control.c new file mode 100644 index 0000000..57b1a10 --- /dev/null +++ b/drivers/block/blktap/control.c @@ -0,0 +1,315 @@ +#include <linux/module.h> +#include <linux/sched.h> +#include <linux/miscdevice.h> +#include <linux/device.h> +#include <asm/uaccess.h> + +#include "blktap.h" + +DEFINE_MUTEX(blktap_lock); + +struct blktap **blktaps; +int blktap_max_minor; +static struct blktap_page_pool *default_pool; + +static struct blktap * +blktap_control_get_minor(void) +{ + int minor; + struct blktap *tap; + + tap = kzalloc(sizeof(*tap), GFP_KERNEL); + if (unlikely(!tap)) + return NULL; + + mutex_lock(&blktap_lock); + + for (minor = 0; minor < blktap_max_minor; minor++) + if (!blktaps[minor]) + break; + + if (minor == MAX_BLKTAP_DEVICE) + goto fail; + + if (minor == blktap_max_minor) { + void *p; + int n; + + n = min(2 * blktap_max_minor, MAX_BLKTAP_DEVICE); + p = krealloc(blktaps, n * sizeof(blktaps[0]), GFP_KERNEL); + if (!p) + goto fail; + + blktaps = p; + minor = blktap_max_minor; + blktap_max_minor = n; + + memset(&blktaps[minor], 0, (n - minor) * sizeof(blktaps[0])); + } + + tap->minor = minor; + blktaps[minor] = tap; + + __module_get(THIS_MODULE); +out: + mutex_unlock(&blktap_lock); + return tap; + +fail: + mutex_unlock(&blktap_lock); + kfree(tap); + tap = NULL; + goto out; +} + +static void +blktap_control_put_minor(struct blktap* tap) +{ + blktaps[tap->minor] = NULL; + kfree(tap); + + module_put(THIS_MODULE); +} + +static struct blktap* +blktap_control_create_tap(void) +{ + struct blktap *tap; + int err; + + tap = blktap_control_get_minor(); + if (!tap) + return NULL; + + kobject_get(&default_pool->kobj); + tap->pool = default_pool; + + err = blktap_ring_create(tap); + if (err) + goto fail_tap; + + err = blktap_sysfs_create(tap); + if (err) + goto fail_ring; + + return tap; + +fail_ring: + blktap_ring_destroy(tap); +fail_tap: + blktap_control_put_minor(tap); + + return NULL; +} + +int +blktap_control_destroy_tap(struct blktap *tap) +{ + int err; + + err = blktap_ring_destroy(tap); + if (err) + return err; + + kobject_put(&tap->pool->kobj); + + blktap_sysfs_destroy(tap); + + blktap_control_put_minor(tap); + + return 0; +} + +static int +blktap_control_ioctl(struct inode *inode, struct file *filp, + unsigned int cmd, unsigned long arg) +{ + struct blktap *tap; + + switch (cmd) { + case BLKTAP_IOCTL_ALLOC_TAP: { + struct blktap_info info; + void __user *ptr = (void __user*)arg; + + tap = blktap_control_create_tap(); + if (!tap) + return -ENOMEM; + + info.ring_major = blktap_ring_major; + info.bdev_major = blktap_device_major; + info.ring_minor = tap->minor; + + if (copy_to_user(ptr, &info, sizeof(info))) { + blktap_control_destroy_tap(tap); + return -EFAULT; + } + + return 0; + } + + case BLKTAP_IOCTL_FREE_TAP: { + int minor = arg; + + if (minor > MAX_BLKTAP_DEVICE) + return -EINVAL; + + tap = blktaps[minor]; + if (!tap) + return -ENODEV; + + return blktap_control_destroy_tap(tap); + } + } + + return -ENOIOCTLCMD; +} + +static struct file_operations blktap_control_file_operations = { + .owner = THIS_MODULE, + .ioctl = blktap_control_ioctl, +}; + +static struct miscdevice blktap_control = { + .minor = MISC_DYNAMIC_MINOR, + .name = "blktap-control", + .fops = &blktap_control_file_operations, +}; + +static struct device *control_device; + +static ssize_t +blktap_control_show_default_pool(struct device *device, + struct device_attribute *attr, + char *buf) +{ + return sprintf(buf, "%s", kobject_name(&default_pool->kobj)); +} + +static ssize_t +blktap_control_store_default_pool(struct device *device, + struct device_attribute *attr, + const char *buf, size_t size) +{ + struct blktap_page_pool *pool, *tmp = default_pool; + + pool = blktap_page_pool_get(buf); + if (IS_ERR(pool)) + return PTR_ERR(pool); + + default_pool = pool; + kobject_put(&tmp->kobj); + + return size; +} + +static DEVICE_ATTR(default_pool, S_IRUSR|S_IWUSR|S_IRGRP|S_IROTH, + blktap_control_show_default_pool, + blktap_control_store_default_pool); + +size_t +blktap_control_debug(struct blktap *tap, char *buf, size_t size) +{ + char *s = buf, *end = buf + size; + + s += snprintf(s, end - s, + "tap %u:%u name:'%s' flags:%#08lx\n", + MAJOR(tap->ring.devno), MINOR(tap->ring.devno), + tap->name, tap->dev_inuse); + + return s - buf; +} + +static int __init +blktap_control_init(void) +{ + int err; + + err = misc_register(&blktap_control); + if (err) + return err; + + control_device = blktap_control.this_device; + + blktap_max_minor = min(64, MAX_BLKTAP_DEVICE); + blktaps = kzalloc(blktap_max_minor * sizeof(blktaps[0]), GFP_KERNEL); + if (!blktaps) { + BTERR("failed to allocate blktap minor map"); + return -ENOMEM; + } + + err = blktap_page_pool_init(&control_device->kobj); + if (err) + return err; + + default_pool = blktap_page_pool_get("default"); + if (!default_pool) + return -ENOMEM; + + err = device_create_file(control_device, &dev_attr_default_pool); + if (err) + return err; + + return 0; +} + +static void +blktap_control_exit(void) +{ + if (default_pool) { + kobject_put(&default_pool->kobj); + default_pool = NULL; + } + + blktap_page_pool_exit(); + + if (blktaps) { + kfree(blktaps); + blktaps = NULL; + } + + if (control_device) { + misc_deregister(&blktap_control); + control_device = NULL; + } +} + +static void +blktap_exit(void) +{ + blktap_control_exit(); + blktap_ring_exit(); + blktap_sysfs_exit(); + blktap_device_exit(); +} + +static int __init +blktap_init(void) +{ + int err; + + err = blktap_device_init(); + if (err) + goto fail; + + err = blktap_ring_init(); + if (err) + goto fail; + + err = blktap_sysfs_init(); + if (err) + goto fail; + + err = blktap_control_init(); + if (err) + goto fail; + + return 0; + +fail: + blktap_exit(); + return err; +} + +module_init(blktap_init); +module_exit(blktap_exit); +MODULE_LICENSE("Dual BSD/GPL"); diff --git a/drivers/block/blktap/device.c b/drivers/block/blktap/device.c new file mode 100644 index 0000000..9a09457 --- /dev/null +++ b/drivers/block/blktap/device.c @@ -0,0 +1,551 @@ +#include <linux/fs.h> +#include <linux/blkdev.h> +#include <linux/cdrom.h> +#include <linux/hdreg.h> +#include <scsi/scsi.h> +#include <scsi/scsi_ioctl.h> + +#include "blktap.h" + +int blktap_device_major; + +#define dev_to_blktap(_dev) container_of(_dev, struct blktap, device) + +static int +blktap_device_open(struct block_device *bdev, fmode_t mode) +{ + struct gendisk *disk = bdev->bd_disk; + struct blktap_device *tapdev = disk->private_data; + + if (!tapdev) + return -ENXIO; + + /* NB. we might have bounced a bd trylock by tapdisk. when + * failing for reasons not !tapdev, make sure to kick tapdisk + * out of destroy wait state again. */ + + return 0; +} + +static int +blktap_device_release(struct gendisk *disk, fmode_t mode) +{ + struct blktap_device *tapdev = disk->private_data; + struct block_device *bdev = bdget_disk(disk, 0); + struct blktap *tap = dev_to_blktap(tapdev); + + bdput(bdev); + + if (!bdev->bd_openers) { + set_bit(BLKTAP_DEVICE_CLOSED, &tap->dev_inuse); + blktap_ring_kick_user(tap); + } + + return 0; +} + +static int +blktap_device_getgeo(struct block_device *bd, struct hd_geometry *hg) +{ + /* We don't have real geometry info, but let's at least return + values consistent with the size of the device */ + sector_t nsect = get_capacity(bd->bd_disk); + sector_t cylinders = nsect; + + hg->heads = 0xff; + hg->sectors = 0x3f; + sector_div(cylinders, hg->heads * hg->sectors); + hg->cylinders = cylinders; + if ((sector_t)(hg->cylinders + 1) * hg->heads * hg->sectors < nsect) + hg->cylinders = 0xffff; + return 0; +} + +static int +blktap_device_ioctl(struct block_device *bd, fmode_t mode, + unsigned command, unsigned long argument) +{ + int i; + + switch (command) { + case CDROMMULTISESSION: + BTDBG("FIXME: support multisession CDs later\n"); + for (i = 0; i < sizeof(struct cdrom_multisession); i++) + if (put_user(0, (char __user *)(argument + i))) + return -EFAULT; + return 0; + + case SCSI_IOCTL_GET_IDLUN: + if (!access_ok(VERIFY_WRITE, argument, + sizeof(struct scsi_idlun))) + return -EFAULT; + + /* return 0 for now. */ + __put_user(0, &((struct scsi_idlun __user *)argument)->dev_id); + __put_user(0, + &((struct scsi_idlun __user *)argument)->host_unique_id); + return 0; + + default: + /*printk(KERN_ALERT "ioctl %08x not supported by Xen blkdev\n", + command);*/ + return -EINVAL; /* same return as native Linux */ + } + + return 0; +} + +static struct block_device_operations blktap_device_file_operations = { + .owner = THIS_MODULE, + .open = blktap_device_open, + .release = blktap_device_release, + .ioctl = blktap_device_ioctl, + .getgeo = blktap_device_getgeo +}; + +/* NB. __blktap holding the queue lock; blktap where unlocked */ + +static inline struct request* +__blktap_next_queued_rq(struct request_queue *q) +{ + return blk_peek_request(q); +} + +static inline void +__blktap_dequeue_rq(struct request *rq) +{ + blk_start_request(rq); +} + +/* NB. err == 0 indicates success, failures < 0 */ + +static inline void +__blktap_end_queued_rq(struct request *rq, int err) +{ + blk_start_request(rq); + __blk_end_request(rq, err, blk_rq_bytes(rq)); +} + +static inline void +__blktap_end_rq(struct request *rq, int err) +{ + __blk_end_request(rq, err, blk_rq_bytes(rq)); +} + +static inline void +blktap_end_rq(struct request *rq, int err) +{ + spin_lock_irq(rq->q->queue_lock); + __blktap_end_rq(rq, err); + spin_unlock_irq(rq->q->queue_lock); +} + +void +blktap_device_end_request(struct blktap *tap, + struct blktap_request *request, + int error) +{ + struct blktap_device *tapdev = &tap->device; + struct request *rq = request->rq; + + blktap_ring_unmap_request(tap, request); + + blktap_ring_free_request(tap, request); + + dev_dbg(disk_to_dev(tapdev->gd), + "end_request: op=%d error=%d bytes=%d\n", + rq_data_dir(rq), error, blk_rq_bytes(rq)); + + blktap_end_rq(rq, error); +} + +int +blktap_device_make_request(struct blktap *tap, struct request *rq) +{ + struct blktap_device *tapdev = &tap->device; + struct blktap_request *request; + int write, nsegs; + int err; + + request = blktap_ring_make_request(tap); + if (IS_ERR(request)) { + err = PTR_ERR(request); + request = NULL; + + if (err == -ENOSPC || err == -ENOMEM) + goto stop; + + goto fail; + } + + write = rq_data_dir(rq) == WRITE; + nsegs = blk_rq_map_sg(rq->q, rq, request->sg_table); + + dev_dbg(disk_to_dev(tapdev->gd), + "make_request: op=%c bytes=%d nsegs=%d\n", + write ? 'w' : 'r', blk_rq_bytes(rq), nsegs); + + request->rq = rq; + request->operation = write ? BLKTAP_OP_WRITE : BLKTAP_OP_READ; + + err = blktap_request_get_pages(tap, request, nsegs); + if (err) + goto stop; + + err = blktap_ring_map_request(tap, request); + if (err) + goto fail; + + blktap_ring_submit_request(tap, request); + + return 0; + +stop: + tap->stats.st_oo_req++; + err = -EBUSY; + +_out: + if (request) + blktap_ring_free_request(tap, request); + + return err; +fail: + if (printk_ratelimit()) + dev_warn(disk_to_dev(tapdev->gd), + "make request: %d, failing\n", err); + goto _out; +} + +/* + * called from tapdisk context + */ +void +blktap_device_run_queue(struct blktap *tap) +{ + struct blktap_device *tapdev = &tap->device; + struct request_queue *q; + struct request *rq; + int err; + + if (!tapdev->gd) + return; + + q = tapdev->gd->queue; + + spin_lock_irq(&tapdev->lock); + queue_flag_clear(QUEUE_FLAG_STOPPED, q); + + do { + rq = __blktap_next_queued_rq(q); + if (!rq) + break; + + if (!blk_fs_request(rq)) { + __blktap_end_queued_rq(rq, -EOPNOTSUPP); + continue; + } + + spin_unlock_irq(&tapdev->lock); + + err = blktap_device_make_request(tap, rq); + + spin_lock_irq(&tapdev->lock); + + if (err == -EBUSY) { + blk_stop_queue(q); + break; + } + + __blktap_dequeue_rq(rq); + + if (unlikely(err)) + __blktap_end_rq(rq, err); + } while (1); + + spin_unlock_irq(&tapdev->lock); +} + +static void +blktap_device_do_request(struct request_queue *rq) +{ + struct blktap_device *tapdev = rq->queuedata; + struct blktap *tap = dev_to_blktap(tapdev); + + blktap_ring_kick_user(tap); +} + +static void +blktap_device_configure(struct blktap *tap, + struct blktap_device_info *info) +{ + struct blktap_device *tapdev = &tap->device; + struct gendisk *gd = tapdev->gd; + struct request_queue *rq = gd->queue; + + set_capacity(gd, info->capacity); + set_disk_ro(gd, !!(info->flags & BLKTAP_DEVICE_FLAG_RO)); + + /* Hard sector size and max sectors impersonate the equiv. hardware. */ + blk_queue_logical_block_size(rq, info->sector_size); + blk_queue_max_sectors(rq, 512); + + /* Each segment in a request is up to an aligned page in size. */ + blk_queue_segment_boundary(rq, PAGE_SIZE - 1); + blk_queue_max_segment_size(rq, PAGE_SIZE); + + /* Ensure a merged request will fit in a single I/O ring slot. */ + blk_queue_max_phys_segments(rq, BLKTAP_SEGMENT_MAX); + blk_queue_max_hw_segments(rq, BLKTAP_SEGMENT_MAX); + + /* Make sure buffer addresses are sector-aligned. */ + blk_queue_dma_alignment(rq, 511); + + /* We are reordering, but cacheless. */ + blk_queue_ordered(rq, QUEUE_ORDERED_DRAIN, NULL); +} + +static int +blktap_device_validate_info(struct blktap *tap, + struct blktap_device_info *info) +{ + struct device *dev = tap->ring.dev; + int sector_order; + + sector_order = ffs(info->sector_size) - 1; + if (sector_order < 9 || + sector_order > 12 || + info->sector_size != 1U<<sector_order) + goto fail; + + if (!info->capacity || + (info->capacity > ULLONG_MAX >> sector_order)) + goto fail; + + return 0; + +fail: + dev_err(dev, "capacity: %llu, sector-size: %u\n", + info->capacity, info->sector_size); + return -EINVAL; +} + +int +blktap_device_destroy(struct blktap *tap) +{ + struct blktap_device *tapdev = &tap->device; + struct block_device *bdev; + struct gendisk *gd; + int err; + + gd = tapdev->gd; + if (!gd) + return 0; + + bdev = bdget_disk(gd, 0); + + err = !mutex_trylock(&bdev->bd_mutex); + if (err) { + /* NB. avoid a deadlock. the last opener syncs the + * bdev holding bd_mutex. */ + err = -EBUSY; + goto out_nolock; + } + + if (bdev->bd_openers) { + err = -EBUSY; + goto out; + } + + del_gendisk(gd); + gd->private_data = NULL; + + blk_cleanup_queue(gd->queue); + + put_disk(gd); + tapdev->gd = NULL; + + clear_bit(BLKTAP_DEVICE, &tap->dev_inuse); + err = 0; +out: + mutex_unlock(&bdev->bd_mutex); +out_nolock: + bdput(bdev); + + return err; +} + +static void +blktap_device_fail_queue(struct blktap *tap) +{ + struct blktap_device *tapdev = &tap->device; + struct request_queue *q = tapdev->gd->queue; + + spin_lock_irq(&tapdev->lock); + queue_flag_clear(QUEUE_FLAG_STOPPED, q); + + do { + struct request *rq = __blktap_next_queued_rq(q); + if (!rq) + break; + + __blktap_end_queued_rq(rq, -EIO); + } while (1); + + spin_unlock_irq(&tapdev->lock); +} + +static int +blktap_device_try_destroy(struct blktap *tap) +{ + int err; + + err = blktap_device_destroy(tap); + if (err) + blktap_device_fail_queue(tap); + + return err; +} + +void +blktap_device_destroy_sync(struct blktap *tap) +{ + wait_event(tap->ring.poll_wait, + !blktap_device_try_destroy(tap)); +} + +int +blktap_device_create(struct blktap *tap, struct blktap_device_info *info) +{ + int minor, err; + struct gendisk *gd; + struct request_queue *rq; + struct blktap_device *tapdev; + + gd = NULL; + rq = NULL; + tapdev = &tap->device; + minor = tap->minor; + + if (test_bit(BLKTAP_DEVICE, &tap->dev_inuse)) + return -EEXIST; + + if (blktap_device_validate_info(tap, info)) + return -EINVAL; + + gd = alloc_disk(1); + if (!gd) { + err = -ENOMEM; + goto fail; + } + + if (minor < 26) { + sprintf(gd->disk_name, "td%c", 'a' + minor % 26); + } else if (minor < (26 + 1) * 26) { + sprintf(gd->disk_name, "td%c%c", + 'a' + minor / 26 - 1,'a' + minor % 26); + } else { + const unsigned int m1 = (minor / 26 - 1) / 26 - 1; + const unsigned int m2 = (minor / 26 - 1) % 26; + const unsigned int m3 = minor % 26; + sprintf(gd->disk_name, "td%c%c%c", + 'a' + m1, 'a' + m2, 'a' + m3); + } + + gd->major = blktap_device_major; + gd->first_minor = minor; + gd->fops = &blktap_device_file_operations; + gd->private_data = tapdev; + + spin_lock_init(&tapdev->lock); + rq = blk_init_queue(blktap_device_do_request, &tapdev->lock); + if (!rq) { + err = -ENOMEM; + goto fail; + } + elevator_init(rq, "noop"); + + gd->queue = rq; + rq->queuedata = tapdev; + tapdev->gd = gd; + + blktap_device_configure(tap, info); + add_disk(gd); + + set_bit(BLKTAP_DEVICE, &tap->dev_inuse); + + dev_info(disk_to_dev(gd), "sector-size: %u/%u capacity: %llu\n", + queue_logical_block_size(rq), + queue_physical_block_size(rq), + (unsigned long long)get_capacity(gd)); + + return 0; + +fail: + if (gd) + del_gendisk(gd); + if (rq) + blk_cleanup_queue(rq); + + return err; +} + +size_t +blktap_device_debug(struct blktap *tap, char *buf, size_t size) +{ + struct gendisk *disk = tap->device.gd; + struct request_queue *q; + struct block_device *bdev; + char *s = buf, *end = buf + size; + + if (!disk) + return 0; + + q = disk->queue; + + s += snprintf(s, end - s, + "disk capacity:%llu sector size:%u\n", + (unsigned long long)get_capacity(disk), + queue_logical_block_size(q)); + + s += snprintf(s, end - s, + "queue flags:%#lx plugged:%d stopped:%d empty:%d\n", + q->queue_flags, + blk_queue_plugged(q), blk_queue_stopped(q), + elv_queue_empty(q)); + + bdev = bdget_disk(disk, 0); + if (bdev) { + s += snprintf(s, end - s, + "bdev openers:%d closed:%d\n", + bdev->bd_openers, + test_bit(BLKTAP_DEVICE_CLOSED, &tap->dev_inuse)); + bdput(bdev); + } + + return s - buf; +} + +int __init +blktap_device_init() +{ + int major; + + /* Dynamically allocate a major for this device */ + major = register_blkdev(0, "tapdev"); + if (major < 0) { + BTERR("Couldn't register blktap device\n"); + return -ENOMEM; + } + + blktap_device_major = major; + BTINFO("blktap device major %d\n", major); + + return 0; +} + +void +blktap_device_exit(void) +{ + if (blktap_device_major) + unregister_blkdev(blktap_device_major, "tapdev"); +} diff --git a/drivers/block/blktap/request.c b/drivers/block/blktap/request.c new file mode 100644 index 0000000..8cfd6c9 --- /dev/null +++ b/drivers/block/blktap/request.c @@ -0,0 +1,418 @@ +#include <linux/mempool.h> +#include <linux/spinlock.h> +#include <linux/mutex.h> +#include <linux/sched.h> +#include <linux/device.h> + +#include "blktap.h" + +/* max pages per shared pool. just to prevent accidental dos. */ +#define POOL_MAX_PAGES (256*BLKTAP_SEGMENT_MAX) + +/* default page pool size. when considering to shrink a shared pool, + * note that paused tapdisks may grab a whole lot of pages for a long + * time. */ +#define POOL_DEFAULT_PAGES (2 * BLKTAP_RING_SIZE * BLKTAP_SEGMENT_MAX) + +/* max number of pages allocatable per request. */ +#define POOL_MAX_REQUEST_PAGES BLKTAP_SEGMENT_MAX + +/* min request structs per pool. These grow dynamically. */ +#define POOL_MIN_REQS BLKTAP_RING_SIZE + +static struct kset *pool_set; + +#define kobj_to_pool(_kobj) \ + container_of(_kobj, struct blktap_page_pool, kobj) + +static struct kmem_cache *request_cache; +static mempool_t *request_pool; + +static void +__page_pool_wake(struct blktap_page_pool *pool) +{ + mempool_t *mem = pool->bufs; + + /* + NB. slightly wasteful to always wait for a full segment + set. but this ensures the next disk makes + progress. presently, the repeated request struct + alloc/release cycles would otherwise keep everyone spinning. + */ + + if (mem->curr_nr >= POOL_MAX_REQUEST_PAGES) + wake_up(&pool->wait); +} + +int +blktap_request_get_pages(struct blktap *tap, + struct blktap_request *request, int nr_pages) +{ + struct blktap_page_pool *pool = tap->pool; + mempool_t *mem = pool->bufs; + struct page *page; + + BUG_ON(request->nr_pages != 0); + BUG_ON(nr_pages > POOL_MAX_REQUEST_PAGES); + + if (mem->curr_nr < nr_pages) + return -ENOMEM; + + /* NB. avoid thundering herds of tapdisks colliding. */ + spin_lock(&pool->lock); + + if (mem->curr_nr < nr_pages) { + spin_unlock(&pool->lock); + return -ENOMEM; + } + + while (request->nr_pages < nr_pages) { + page = mempool_alloc(mem, GFP_NOWAIT); + BUG_ON(!page); + request->pages[request->nr_pages++] = page; + } + + spin_unlock(&pool->lock); + + return 0; +} + +static void +blktap_request_put_pages(struct blktap *tap, + struct blktap_request *request) +{ + struct blktap_page_pool *pool = tap->pool; + struct page *page; + + while (request->nr_pages) { + page = request->pages[--request->nr_pages]; + mempool_free(page, pool->bufs); + } +} + +size_t +blktap_request_debug(struct blktap *tap, char *buf, size_t size) +{ + struct blktap_page_pool *pool = tap->pool; + mempool_t *mem = pool->bufs; + char *s = buf, *end = buf + size; + + s += snprintf(buf, end - s, + "pool:%s pages:%d free:%d\n", + kobject_name(&pool->kobj), + mem->min_nr, mem->curr_nr); + + return s - buf; +} + +struct blktap_request* +blktap_request_alloc(struct blktap *tap) +{ + struct blktap_request *request; + + request = mempool_alloc(request_pool, GFP_NOWAIT); + if (request) + request->tap = tap; + + return request; +} + +void +blktap_request_free(struct blktap *tap, + struct blktap_request *request) +{ + blktap_request_put_pages(tap, request); + + mempool_free(request, request_pool); + + __page_pool_wake(tap->pool); +} + +void +blktap_request_bounce(struct blktap *tap, + struct blktap_request *request, + int seg, int write) +{ + struct scatterlist *sg = &request->sg_table[seg]; + void *s, *p; + + BUG_ON(seg >= request->nr_pages); + + s = sg_virt(sg); + p = page_address(request->pages[seg]) + sg->offset; + + if (write) + memcpy(p, s, sg->length); + else + memcpy(s, p, sg->length); +} + +static void +blktap_request_ctor(void *obj) +{ + struct blktap_request *request = obj; + + memset(request, 0, sizeof(*request)); + sg_init_table(request->sg_table, ARRAY_SIZE(request->sg_table)); +} + +static int +blktap_page_pool_resize(struct blktap_page_pool *pool, int target) +{ + mempool_t *bufs = pool->bufs; + int err; + + /* NB. mempool asserts min_nr >= 1 */ + target = max(1, target); + + err = mempool_resize(bufs, target, GFP_KERNEL); + if (err) + return err; + + __page_pool_wake(pool); + + return 0; +} + +struct pool_attribute { + struct attribute attr; + + ssize_t (*show)(struct blktap_page_pool *pool, + char *buf); + + ssize_t (*store)(struct blktap_page_pool *pool, + const char *buf, size_t count); +}; + +#define kattr_to_pool_attr(_kattr) \ + container_of(_kattr, struct pool_attribute, attr) + +static ssize_t +blktap_page_pool_show_size(struct blktap_page_pool *pool, + char *buf) +{ + mempool_t *mem = pool->bufs; + return sprintf(buf, "%d", mem->min_nr); +} + +static ssize_t +blktap_page_pool_store_size(struct blktap_page_pool *pool, + const char *buf, size_t size) +{ + int target; + + /* + * NB. target fixup to avoid undesired results. less than a + * full segment set can wedge the disk. much more than a + * couple times the physical queue depth is rarely useful. + */ + + target = simple_strtoul(buf, NULL, 0); + target = max(POOL_MAX_REQUEST_PAGES, target); + target = min(target, POOL_MAX_PAGES); + + return blktap_page_pool_resize(pool, target) ? : size; +} + +static struct pool_attribute blktap_page_pool_attr_size = + __ATTR(size, S_IRUSR|S_IWUSR|S_IRGRP|S_IROTH, + blktap_page_pool_show_size, + blktap_page_pool_store_size); + +static ssize_t +blktap_page_pool_show_free(struct blktap_page_pool *pool, + char *buf) +{ + mempool_t *mem = pool->bufs; + return sprintf(buf, "%d", mem->curr_nr); +} + +static struct pool_attribute blktap_page_pool_attr_free = + __ATTR(free, S_IRUSR|S_IRGRP|S_IROTH, + blktap_page_pool_show_free, + NULL); + +static struct attribute *blktap_page_pool_attrs[] = { + &blktap_page_pool_attr_size.attr, + &blktap_page_pool_attr_free.attr, + NULL, +}; + +static inline struct kobject* +__blktap_kset_find_obj(struct kset *kset, const char *name) +{ + struct kobject *k; + struct kobject *ret = NULL; + + spin_lock(&kset->list_lock); + list_for_each_entry(k, &kset->list, entry) { + if (kobject_name(k) && !strcmp(kobject_name(k), name)) { + ret = kobject_get(k); + break; + } + } + spin_unlock(&kset->list_lock); + return ret; +} + +static ssize_t +blktap_page_pool_show_attr(struct kobject *kobj, struct attribute *kattr, + char *buf) +{ + struct blktap_page_pool *pool = kobj_to_pool(kobj); + struct pool_attribute *attr = kattr_to_pool_attr(kattr); + + if (attr->show) + return attr->show(pool, buf); + + return -EIO; +} + +static ssize_t +blktap_page_pool_store_attr(struct kobject *kobj, struct attribute *kattr, + const char *buf, size_t size) +{ + struct blktap_page_pool *pool = kobj_to_pool(kobj); + struct pool_attribute *attr = kattr_to_pool_attr(kattr); + + if (attr->show) + return attr->store(pool, buf, size); + + return -EIO; +} + +static struct sysfs_ops blktap_page_pool_sysfs_ops = { + .show = blktap_page_pool_show_attr, + .store = blktap_page_pool_store_attr, +}; + +static void +blktap_page_pool_release(struct kobject *kobj) +{ + struct blktap_page_pool *pool = kobj_to_pool(kobj); + mempool_destroy(pool->bufs); + kfree(pool); +} + +struct kobj_type blktap_page_pool_ktype = { + .release = blktap_page_pool_release, + .sysfs_ops = &blktap_page_pool_sysfs_ops, + .default_attrs = blktap_page_pool_attrs, +}; + +static void* +__mempool_page_alloc(gfp_t gfp_mask, void *pool_data) +{ + struct page *page; + + if (!(gfp_mask & __GFP_WAIT)) + return NULL; + + page = alloc_page(gfp_mask); + if (page) + SetPageReserved(page); + + return page; +} + +static void +__mempool_page_free(void *element, void *pool_data) +{ + struct page *page = element; + + ClearPageReserved(page); + put_page(page); +} + +static struct kobject* +blktap_page_pool_create(const char *name, int nr_pages) +{ + struct blktap_page_pool *pool; + int err; + + pool = kzalloc(sizeof(*pool), GFP_KERNEL); + if (!pool) + goto fail; + + spin_lock_init(&pool->lock); + init_waitqueue_head(&pool->wait); + + pool->bufs = mempool_create(nr_pages, + __mempool_page_alloc, __mempool_page_free, + pool); + if (!pool->bufs) + goto fail_pool; + + kobject_init(&pool->kobj, &blktap_page_pool_ktype); + pool->kobj.kset = pool_set; + err = kobject_add(&pool->kobj, &pool_set->kobj, "%s", name); + if (err) + goto fail_bufs; + + return &pool->kobj; + + kobject_del(&pool->kobj); +fail_bufs: + mempool_destroy(pool->bufs); +fail_pool: + kfree(pool); +fail: + return NULL; +} + +struct blktap_page_pool* +blktap_page_pool_get(const char *name) +{ + struct kobject *kobj; + + kobj = __blktap_kset_find_obj(pool_set, name); + if (!kobj) + kobj = blktap_page_pool_create(name, + POOL_DEFAULT_PAGES); + if (!kobj) + return ERR_PTR(-ENOMEM); + + return kobj_to_pool(kobj); +} + +int __init +blktap_page_pool_init(struct kobject *parent) +{ + request_cache = + kmem_cache_create("blktap-request", + sizeof(struct blktap_request), 0, + 0, blktap_request_ctor); + if (!request_cache) + return -ENOMEM; + + request_pool = + mempool_create_slab_pool(POOL_MIN_REQS, request_cache); + if (!request_pool) + return -ENOMEM; + + pool_set = kset_create_and_add("pools", NULL, parent); + if (!pool_set) + return -ENOMEM; + + return 0; +} + +void +blktap_page_pool_exit(void) +{ + if (pool_set) { + BUG_ON(!list_empty(&pool_set->list)); + kset_unregister(pool_set); + pool_set = NULL; + } + + if (request_pool) { + mempool_destroy(request_pool); + request_pool = NULL; + } + + if (request_cache) { + kmem_cache_destroy(request_cache); + request_cache = NULL; + } +} diff --git a/drivers/block/blktap/ring.c b/drivers/block/blktap/ring.c new file mode 100644 index 0000000..635f1fd --- /dev/null +++ b/drivers/block/blktap/ring.c @@ -0,0 +1,595 @@ + +#include <linux/device.h> +#include <linux/signal.h> +#include <linux/sched.h> +#include <linux/poll.h> +#include <linux/blkdev.h> + +#include "blktap.h" + +int blktap_ring_major; +static struct cdev blktap_ring_cdev; + + /* + * BLKTAP - immediately before the mmap area, + * we have a bunch of pages reserved for shared memory rings. + */ +#define RING_PAGES 1 + +#define BLKTAP_INFO_SIZE_AT(_memb) \ + offsetof(struct blktap_device_info, _memb) + \ + sizeof(((struct blktap_device_info*)0)->_memb) + +static void +blktap_ring_read_response(struct blktap *tap, + const blktap_ring_rsp_t *rsp) +{ + struct blktap_ring *ring = &tap->ring; + struct blktap_request *request; + int usr_idx, err; + + request = NULL; + + usr_idx = rsp->id; + if (usr_idx < 0 || usr_idx >= BLKTAP_RING_SIZE) { + err = -ERANGE; + goto invalid; + } + + request = ring->pending[usr_idx]; + + if (!request) { + err = -ESRCH; + goto invalid; + } + + if (rsp->operation != request->operation) { + err = -EINVAL; + goto invalid; + } + + dev_dbg(ring->dev, + "request %d [%p] response: %d\n", + request->usr_idx, request, rsp->status); + + err = rsp->status == BLKTAP_RSP_OKAY ? 0 : -EIO; +end_request: + blktap_device_end_request(tap, request, err); + return; + +invalid: + dev_warn(ring->dev, + "invalid response, idx:%d status:%d op:%d/%d: err %d\n", + usr_idx, rsp->status, + rsp->operation, request->operation, + err); + if (request) + goto end_request; +} + +static void +blktap_read_ring(struct blktap *tap) +{ + struct blktap_ring *ring = &tap->ring; + blktap_ring_rsp_t rsp; + RING_IDX rc, rp; + + down_read(¤t->mm->mmap_sem); + if (!ring->vma) { + up_read(¤t->mm->mmap_sem); + return; + } + + /* for each outstanding message on the ring */ + rp = ring->ring.sring->rsp_prod; + rmb(); + + for (rc = ring->ring.rsp_cons; rc != rp; rc++) { + memcpy(&rsp, RING_GET_RESPONSE(&ring->ring, rc), sizeof(rsp)); + blktap_ring_read_response(tap, &rsp); + } + + ring->ring.rsp_cons = rc; + + up_read(¤t->mm->mmap_sem); +} + +#define MMAP_VADDR(_start, _req, _seg) \ + ((_start) + \ + ((_req) * BLKTAP_SEGMENT_MAX * BLKTAP_PAGE_SIZE) + \ + ((_seg) * BLKTAP_PAGE_SIZE)) + +static int blktap_ring_fault(struct vm_area_struct *vma, struct vm_fault *vmf) +{ + return VM_FAULT_SIGBUS; +} + +static void +blktap_ring_fail_pending(struct blktap *tap) +{ + struct blktap_ring *ring = &tap->ring; + struct blktap_request *request; + int usr_idx; + + for (usr_idx = 0; usr_idx < BLKTAP_RING_SIZE; usr_idx++) { + request = ring->pending[usr_idx]; + if (!request) + continue; + + blktap_device_end_request(tap, request, -EIO); + } +} + +static void +blktap_ring_vm_close(struct vm_area_struct *vma) +{ + struct blktap *tap = vma->vm_private_data; + struct blktap_ring *ring = &tap->ring; + struct page *page = virt_to_page(ring->ring.sring); + + blktap_ring_fail_pending(tap); + + zap_page_range(vma, vma->vm_start, PAGE_SIZE, NULL); + ClearPageReserved(page); + __free_page(page); + + ring->vma = NULL; + + if (test_bit(BLKTAP_SHUTDOWN_REQUESTED, &tap->dev_inuse)) + blktap_control_destroy_tap(tap); +} + +static struct vm_operations_struct blktap_ring_vm_operations = { + .close = blktap_ring_vm_close, + .fault = blktap_ring_fault, +}; + +int +blktap_ring_map_segment(struct blktap *tap, + struct blktap_request *request, + int seg) +{ + struct blktap_ring *ring = &tap->ring; + unsigned long uaddr; + + uaddr = MMAP_VADDR(ring->user_vstart, request->usr_idx, seg); + return vm_insert_page(ring->vma, uaddr, request->pages[seg]); +} + +int +blktap_ring_map_request(struct blktap *tap, + struct blktap_request *request) +{ + int seg, err = 0; + int write; + + write = request->operation == BLKTAP_OP_WRITE; + + for (seg = 0; seg < request->nr_pages; seg++) { + if (write) + blktap_request_bounce(tap, request, seg, write); + + err = blktap_ring_map_segment(tap, request, seg); + if (err) + break; + } + + if (err) + blktap_ring_unmap_request(tap, request); + + return err; +} + +void +blktap_ring_unmap_request(struct blktap *tap, + struct blktap_request *request) +{ + struct blktap_ring *ring = &tap->ring; + unsigned long uaddr; + unsigned size; + int seg, read; + + uaddr = MMAP_VADDR(ring->user_vstart, request->usr_idx, 0); + size = request->nr_pages << PAGE_SHIFT; + read = request->operation == BLKTAP_OP_READ; + + if (read) + for (seg = 0; seg < request->nr_pages; seg++) + blktap_request_bounce(tap, request, seg, !read); + + zap_page_range(ring->vma, uaddr, size, NULL); +} + +void +blktap_ring_free_request(struct blktap *tap, + struct blktap_request *request) +{ + struct blktap_ring *ring = &tap->ring; + + ring->pending[request->usr_idx] = NULL; + ring->n_pending--; + + blktap_request_free(tap, request); +} + +struct blktap_request* +blktap_ring_make_request(struct blktap *tap) +{ + struct blktap_ring *ring = &tap->ring; + struct blktap_request *request; + int usr_idx; + + if (RING_FULL(&ring->ring)) + return ERR_PTR(-ENOSPC); + + request = blktap_request_alloc(tap); + if (!request) + return ERR_PTR(-ENOMEM); + + for (usr_idx = 0; usr_idx < BLKTAP_RING_SIZE; usr_idx++) + if (!ring->pending[usr_idx]) + break; + + BUG_ON(usr_idx >= BLKTAP_RING_SIZE); + + request->tap = tap; + request->usr_idx = usr_idx; + + ring->pending[usr_idx] = request; + ring->n_pending++; + + return request; +} + +void +blktap_ring_submit_request(struct blktap *tap, + struct blktap_request *request) +{ + struct blktap_ring *ring = &tap->ring; + blktap_ring_req_t *breq; + struct scatterlist *sg; + int i, nsecs = 0; + + dev_dbg(ring->dev, + "request %d [%p] submit\n", request->usr_idx, request); + + breq = RING_GET_REQUEST(&ring->ring, ring->ring.req_prod_pvt); + + breq->id = request->usr_idx; + breq->sector_number = blk_rq_pos(request->rq); + breq->__pad = 0; + breq->operation = request->operation; + breq->nr_segments = request->nr_pages; + + blktap_for_each_sg(sg, request, i) { + struct blktap_segment *seg = &breq->seg[i]; + int first, count; + + count = sg->length >> 9; + first = sg->offset >> 9; + + seg->first_sect = first; + seg->last_sect = first + count - 1; + + nsecs += count; + } + + ring->ring.req_prod_pvt++; + + do_gettimeofday(&request->time); + + + if (request->operation == BLKTAP_OP_WRITE) { + tap->stats.st_wr_sect += nsecs; + tap->stats.st_wr_req++; + } + + if (request->operation == BLKTAP_OP_READ) { + tap->stats.st_rd_sect += nsecs; + tap->stats.st_rd_req++; + } +} + +static int +blktap_ring_open(struct inode *inode, struct file *filp) +{ + struct blktap *tap = NULL; + int minor; + + minor = iminor(inode); + + if (minor < blktap_max_minor) + tap = blktaps[minor]; + + if (!tap) + return -ENXIO; + + if (test_bit(BLKTAP_SHUTDOWN_REQUESTED, &tap->dev_inuse)) + return -ENXIO; + + if (tap->ring.task) + return -EBUSY; + + filp->private_data = tap; + tap->ring.task = current; + + return 0; +} + +static int +blktap_ring_release(struct inode *inode, struct file *filp) +{ + struct blktap *tap = filp->private_data; + + blktap_device_destroy_sync(tap); + + tap->ring.task = NULL; + + if (test_bit(BLKTAP_SHUTDOWN_REQUESTED, &tap->dev_inuse)) + blktap_control_destroy_tap(tap); + + return 0; +} + +static int +blktap_ring_mmap(struct file *filp, struct vm_area_struct *vma) +{ + struct blktap *tap = filp->private_data; + struct blktap_ring *ring = &tap->ring; + blktap_sring_t *sring; + struct page *page = NULL; + int err; + + if (ring->vma) + return -EBUSY; + + page = alloc_page(GFP_KERNEL|__GFP_ZERO); + if (!page) + return -ENOMEM; + + SetPageReserved(page); + + err = vm_insert_page(vma, vma->vm_start, page); + if (err) + goto fail; + + sring = page_address(page); + SHARED_RING_INIT(sring); + FRONT_RING_INIT(&ring->ring, sring, PAGE_SIZE); + + ring->ring_vstart = vma->vm_start; + ring->user_vstart = ring->ring_vstart + PAGE_SIZE; + + vma->vm_private_data = tap; + + vma->vm_flags |= VM_DONTCOPY; + vma->vm_flags |= VM_RESERVED; + + vma->vm_ops = &blktap_ring_vm_operations; + + ring->vma = vma; + return 0; + +fail: + if (page) { + zap_page_range(vma, vma->vm_start, PAGE_SIZE, NULL); + ClearPageReserved(page); + __free_page(page); + } + + return err; +} + +static int +blktap_ring_ioctl(struct inode *inode, struct file *filp, + unsigned int cmd, unsigned long arg) +{ + struct blktap *tap = filp->private_data; + struct blktap_ring *ring = &tap->ring; + void __user *ptr = (void *)arg; + int err; + + BTDBG("%d: cmd: %u, arg: %lu\n", tap->minor, cmd, arg); + + if (!ring->vma || ring->vma->vm_mm != current->mm) + return -EACCES; + + switch(cmd) { + case BLKTAP_IOCTL_RESPOND: + + blktap_read_ring(tap); + return 0; + + case BLKTAP_IOCTL_CREATE_DEVICE_COMPAT: { + struct blktap_device_info info; + struct blktap2_params params; + + if (copy_from_user(¶ms, ptr, sizeof(params))) + return -EFAULT; + + info.capacity = params.capacity; + info.sector_size = params.sector_size; + info.flags = 0; + + err = blktap_device_create(tap, &info); + if (err) + return err; + + if (params.name[0]) { + strncpy(tap->name, params.name, sizeof(params.name)); + tap->name[sizeof(tap->name)-1] = 0; + } + + return 0; + } + + case BLKTAP_IOCTL_CREATE_DEVICE: { + struct blktap_device_info __user *ptr = (void *)arg; + struct blktap_device_info info; + unsigned long mask; + size_t base_sz, sz; + + mask = BLKTAP_DEVICE_FLAG_RO; + + memset(&info, 0, sizeof(info)); + sz = base_sz = BLKTAP_INFO_SIZE_AT(flags); + + if (copy_from_user(&info, ptr, sz)) + return -EFAULT; + + if (sz > base_sz) + if (copy_from_user(&info, ptr, sz)) + return -EFAULT; + + if (put_user(info.flags & mask, &ptr->flags)) + return -EFAULT; + + return blktap_device_create(tap, &info); + } + + case BLKTAP_IOCTL_REMOVE_DEVICE: + + return blktap_device_destroy(tap); + } + + return -ENOIOCTLCMD; +} + +static unsigned int blktap_ring_poll(struct file *filp, poll_table *wait) +{ + struct blktap *tap = filp->private_data; + struct blktap_ring *ring = &tap->ring; + int work; + + poll_wait(filp, &tap->pool->wait, wait); + poll_wait(filp, &ring->poll_wait, wait); + + down_read(¤t->mm->mmap_sem); + if (ring->vma && tap->device.gd) + blktap_device_run_queue(tap); + up_read(¤t->mm->mmap_sem); + + work = ring->ring.req_prod_pvt - ring->ring.sring->req_prod; + RING_PUSH_REQUESTS(&ring->ring); + + if (work || + ring->ring.sring->private.tapif_user.msg || + test_and_clear_bit(BLKTAP_DEVICE_CLOSED, &tap->dev_inuse)) + return POLLIN | POLLRDNORM; + + return 0; +} + +static struct file_operations blktap_ring_file_operations = { + .owner = THIS_MODULE, + .open = blktap_ring_open, + .release = blktap_ring_release, + .ioctl = blktap_ring_ioctl, + .mmap = blktap_ring_mmap, + .poll = blktap_ring_poll, +}; + +void +blktap_ring_kick_user(struct blktap *tap) +{ + wake_up(&tap->ring.poll_wait); +} + +int +blktap_ring_destroy(struct blktap *tap) +{ + struct blktap_ring *ring = &tap->ring; + + if (ring->task || ring->vma) + return -EBUSY; + + return 0; +} + +int +blktap_ring_create(struct blktap *tap) +{ + struct blktap_ring *ring = &tap->ring; + + init_waitqueue_head(&ring->poll_wait); + ring->devno = MKDEV(blktap_ring_major, tap->minor); + + return 0; +} + +size_t +blktap_ring_debug(struct blktap *tap, char *buf, size_t size) +{ + struct blktap_ring *ring = &tap->ring; + char *s = buf, *end = buf + size; + int usr_idx; + + s += snprintf(s, end - s, + "begin pending:%d\n", ring->n_pending); + + for (usr_idx = 0; usr_idx < BLKTAP_RING_SIZE; usr_idx++) { + struct blktap_request *request; + struct timeval *time; + int write; + + request = ring->pending[usr_idx]; + if (!request) + continue; + + write = request->operation == BLKTAP_OP_WRITE; + time = &request->time; + + s += snprintf(s, end - s, + "%02d: usr_idx:%02d " + "op:%c nr_pages:%02d time:%lu.%09lu\n", + usr_idx, request->usr_idx, + write ? 'W' : 'R', request->nr_pages, + time->tv_sec, time->tv_usec); + } + + s += snprintf(s, end - s, "end pending\n"); + + return s - buf; +} + + +int __init +blktap_ring_init(void) +{ + dev_t dev = 0; + int err; + + cdev_init(&blktap_ring_cdev, &blktap_ring_file_operations); + blktap_ring_cdev.owner = THIS_MODULE; + + err = alloc_chrdev_region(&dev, 0, MAX_BLKTAP_DEVICE, "blktap2"); + if (err < 0) { + BTERR("error registering ring devices: %d\n", err); + return err; + } + + err = cdev_add(&blktap_ring_cdev, dev, MAX_BLKTAP_DEVICE); + if (err) { + BTERR("error adding ring device: %d\n", err); + unregister_chrdev_region(dev, MAX_BLKTAP_DEVICE); + return err; + } + + blktap_ring_major = MAJOR(dev); + BTINFO("blktap ring major: %d\n", blktap_ring_major); + + return 0; +} + +void +blktap_ring_exit(void) +{ + if (!blktap_ring_major) + return; + + cdev_del(&blktap_ring_cdev); + unregister_chrdev_region(MKDEV(blktap_ring_major, 0), + MAX_BLKTAP_DEVICE); + + blktap_ring_major = 0; +} diff --git a/drivers/block/blktap/sysfs.c b/drivers/block/blktap/sysfs.c new file mode 100644 index 0000000..182de9a --- /dev/null +++ b/drivers/block/blktap/sysfs.c @@ -0,0 +1,288 @@ +#include <linux/types.h> +#include <linux/device.h> +#include <linux/module.h> +#include <linux/sched.h> +#include <linux/genhd.h> +#include <linux/blkdev.h> + +#include "blktap.h" + +int blktap_debug_level = 1; + +static struct class *class; + +static ssize_t +blktap_sysfs_set_name(struct device *dev, struct device_attribute *attr, const char *buf, size_t size) +{ + struct blktap *tap; + + tap = dev_get_drvdata(dev); + if (!tap) + return 0; + + if (size >= BLKTAP_NAME_MAX) + return -ENAMETOOLONG; + + if (strnlen(buf, size) != size) + return -EINVAL; + + strcpy(tap->name, buf); + + return size; +} + +static ssize_t +blktap_sysfs_get_name(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct blktap *tap; + ssize_t size; + + tap = dev_get_drvdata(dev); + if (!tap) + return 0; + + if (tap->name[0]) + size = sprintf(buf, "%s\n", tap->name); + else + size = sprintf(buf, "%d\n", tap->minor); + + return size; +} +static DEVICE_ATTR(name, S_IRUGO|S_IWUSR, + blktap_sysfs_get_name, blktap_sysfs_set_name); + +static void +blktap_sysfs_remove_work(struct work_struct *work) +{ + struct blktap *tap + = container_of(work, struct blktap, remove_work); + blktap_control_destroy_tap(tap); +} + +static ssize_t +blktap_sysfs_remove_device(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t size) +{ + struct blktap *tap; + int err; + + tap = dev_get_drvdata(dev); + if (!tap) + return size; + + if (test_and_set_bit(BLKTAP_SHUTDOWN_REQUESTED, &tap->dev_inuse)) + goto wait; + + if (tap->ring.vma) { + blktap_sring_t *sring = tap->ring.ring.sring; + sring->private.tapif_user.msg = BLKTAP_RING_MESSAGE_CLOSE; + blktap_ring_kick_user(tap); + } else { + INIT_WORK(&tap->remove_work, blktap_sysfs_remove_work); + schedule_work(&tap->remove_work); + } +wait: + err = wait_event_interruptible(tap->remove_wait, + !dev_get_drvdata(dev)); + if (err) + return err; + + return size; +} +static DEVICE_ATTR(remove, S_IWUSR, NULL, blktap_sysfs_remove_device); + +static ssize_t +blktap_sysfs_debug_device(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct blktap *tap; + char *s = buf, *end = buf + PAGE_SIZE; + + tap = dev_get_drvdata(dev); + if (!tap) + return 0; + + s += blktap_control_debug(tap, s, end - s); + + s += blktap_request_debug(tap, s, end - s); + + s += blktap_device_debug(tap, s, end - s); + + s += blktap_ring_debug(tap, s, end - s); + + return s - buf; +} +static DEVICE_ATTR(debug, S_IRUGO, blktap_sysfs_debug_device, NULL); + +static ssize_t +blktap_sysfs_show_task(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct blktap *tap; + ssize_t rv = 0; + + tap = dev_get_drvdata(dev); + if (!tap) + return 0; + + if (tap->ring.task) + rv = sprintf(buf, "%d\n", tap->ring.task->pid); + + return rv; +} +static DEVICE_ATTR(task, S_IRUGO, blktap_sysfs_show_task, NULL); + +static ssize_t +blktap_sysfs_show_pool(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct blktap *tap = dev_get_drvdata(dev); + return sprintf(buf, "%s", kobject_name(&tap->pool->kobj)); +} + +static ssize_t +blktap_sysfs_store_pool(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t size) +{ + struct blktap *tap = dev_get_drvdata(dev); + struct blktap_page_pool *pool, *tmp = tap->pool; + + if (tap->device.gd) + return -EBUSY; + + pool = blktap_page_pool_get(buf); + if (IS_ERR(pool)) + return PTR_ERR(pool); + + tap->pool = pool; + kobject_put(&tmp->kobj); + + return size; +} +DEVICE_ATTR(pool, S_IRUSR|S_IWUSR, + blktap_sysfs_show_pool, blktap_sysfs_store_pool); + +int +blktap_sysfs_create(struct blktap *tap) +{ + struct blktap_ring *ring = &tap->ring; + struct device *dev; + int err = 0; + + init_waitqueue_head(&tap->remove_wait); + + dev = device_create(class, NULL, ring->devno, + tap, "blktap%d", tap->minor); + if (IS_ERR(dev)) + err = PTR_ERR(dev); + if (!err) + err = device_create_file(dev, &dev_attr_name); + if (!err) + err = device_create_file(dev, &dev_attr_remove); + if (!err) + err = device_create_file(dev, &dev_attr_debug); + if (!err) + err = device_create_file(dev, &dev_attr_task); + if (!err) + err = device_create_file(dev, &dev_attr_pool); + if (!err) + ring->dev = dev; + else + device_unregister(dev); + + return err; +} + +void +blktap_sysfs_destroy(struct blktap *tap) +{ + struct blktap_ring *ring = &tap->ring; + struct device *dev; + + dev = ring->dev; + + if (!dev) + return; + + dev_set_drvdata(dev, NULL); + wake_up(&tap->remove_wait); + + device_unregister(dev); + ring->dev = NULL; +} + +static ssize_t +blktap_sysfs_show_verbosity(struct class *class, char *buf) +{ + return sprintf(buf, "%d\n", blktap_debug_level); +} + +static ssize_t +blktap_sysfs_set_verbosity(struct class *class, const char *buf, size_t size) +{ + int level; + + if (sscanf(buf, "%d", &level) == 1) { + blktap_debug_level = level; + return size; + } + + return -EINVAL; +} +static CLASS_ATTR(verbosity, S_IRUGO|S_IWUSR, + blktap_sysfs_show_verbosity, blktap_sysfs_set_verbosity); + +static ssize_t +blktap_sysfs_show_devices(struct class *class, char *buf) +{ + int i, ret; + struct blktap *tap; + + mutex_lock(&blktap_lock); + + ret = 0; + for (i = 0; i < blktap_max_minor; i++) { + tap = blktaps[i]; + if (!tap) + continue; + + if (!test_bit(BLKTAP_DEVICE, &tap->dev_inuse)) + continue; + + ret += sprintf(buf + ret, "%d %s\n", tap->minor, tap->name); + } + + mutex_unlock(&blktap_lock); + + return ret; +} +static CLASS_ATTR(devices, S_IRUGO, blktap_sysfs_show_devices, NULL); + +void +blktap_sysfs_exit(void) +{ + if (class) + class_destroy(class); +} + +int __init +blktap_sysfs_init(void) +{ + struct class *cls; + int err = 0; + + cls = class_create(THIS_MODULE, "blktap2"); + if (IS_ERR(cls)) + err = PTR_ERR(cls); + if (!err) + err = class_create_file(cls, &class_attr_verbosity); + if (!err) + err = class_create_file(cls, &class_attr_devices); + if (!err) + class = cls; + else + class_destroy(cls); + + return err; +} diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig index c34e71c..b951b83 100644 --- a/drivers/xen/Kconfig +++ b/drivers/xen/Kconfig @@ -45,17 +45,6 @@ config XEN_BLKDEV_BACKEND interface. -config XEN_BLKDEV_TAP - tristate "Block-device tap backend driver" - depends on XEN_BACKEND && BLOCK - help - The block tap driver is an alternative to the block back driver - and allows VM block requests to be redirected to userspace through - a device interface. The tap allows user-space development of - high-performance block backends, where disk images may be implemented - as files, in memory, or on other hosts across the network. This - driver can safely coexist with the existing blockback driver. - config XEN_BLKBACK_PAGEMAP tristate depends on XEN_BLKDEV_BACKEND != n && XEN_BLKDEV_TAP != n diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile index aa4d6e2..44f835e 100644 --- a/drivers/xen/Makefile +++ b/drivers/xen/Makefile @@ -6,6 +6,5 @@ obj-$(CONFIG_XEN_XENCOMM) += xencomm.o obj-$(CONFIG_XEN_BALLOON) += balloon.o obj-$(CONFIG_XEN_DEV_EVTCHN) += evtchn.o obj-$(CONFIG_XEN_BLKDEV_BACKEND) += blkback/ -obj-$(CONFIG_XEN_BLKDEV_TAP) += blktap/ obj-$(CONFIG_XENFS) += xenfs/ obj-$(CONFIG_XEN_SYS_HYPERVISOR) += sys-hypervisor.o \ No newline at end of file diff --git a/drivers/xen/blktap/Makefile b/drivers/xen/blktap/Makefile deleted file mode 100644 index 822b4e4..0000000 --- a/drivers/xen/blktap/Makefile +++ /dev/null @@ -1,3 +0,0 @@ -obj-$(CONFIG_XEN_BLKDEV_TAP) := blktap.o - -blktap-objs := control.o ring.o device.o request.o sysfs.o diff --git a/drivers/xen/blktap/blktap.h b/drivers/xen/blktap/blktap.h deleted file mode 100644 index 1318cad..0000000 --- a/drivers/xen/blktap/blktap.h +++ /dev/null @@ -1,161 +0,0 @@ -#ifndef _BLKTAP_H_ -#define _BLKTAP_H_ - -#include <linux/mm.h> -#include <linux/fs.h> -#include <linux/cdev.h> -#include <linux/init.h> -#include <linux/scatterlist.h> -#include <linux/blktap.h> - -extern int blktap_debug_level; -extern int blktap_ring_major; -extern int blktap_device_major; - -#define BTPRINTK(level, tag, force, _f, _a...) \ - do { \ - if (blktap_debug_level > level && \ - (force || printk_ratelimit())) \ - printk(tag "%s: " _f, __func__, ##_a); \ - } while (0) - -#define BTDBG(_f, _a...) BTPRINTK(8, KERN_DEBUG, 1, _f, ##_a) -#define BTINFO(_f, _a...) BTPRINTK(0, KERN_INFO, 0, _f, ##_a) -#define BTWARN(_f, _a...) BTPRINTK(0, KERN_WARNING, 0, _f, ##_a) -#define BTERR(_f, _a...) BTPRINTK(0, KERN_ERR, 0, _f, ##_a) - -#define MAX_BLKTAP_DEVICE 1024 - -#define BLKTAP_DEVICE 4 -#define BLKTAP_DEVICE_CLOSED 5 -#define BLKTAP_SHUTDOWN_REQUESTED 8 - -#define BLKTAP_REQUEST_FREE 0 -#define BLKTAP_REQUEST_PENDING 1 - -struct blktap_device { - spinlock_t lock; - struct gendisk *gd; -}; - -struct blktap_request; - -struct blktap_ring { - struct task_struct *task; - - struct vm_area_struct *vma; - blktap_front_ring_t ring; - unsigned long ring_vstart; - unsigned long user_vstart; - - int n_pending; - struct blktap_request *pending[BLKTAP_RING_SIZE]; - - wait_queue_head_t poll_wait; - - dev_t devno; - struct device *dev; -}; - -struct blktap_statistics { - unsigned long st_print; - int st_rd_req; - int st_wr_req; - int st_oo_req; - int st_rd_sect; - int st_wr_sect; - s64 st_rd_cnt; - s64 st_rd_sum_usecs; - s64 st_rd_max_usecs; - s64 st_wr_cnt; - s64 st_wr_sum_usecs; - s64 st_wr_max_usecs; -}; - -struct blktap_request { - struct blktap *tap; - struct request *rq; - int usr_idx; - - int operation; - struct timeval time; - - struct scatterlist sg_table[BLKTAP_SEGMENT_MAX]; - struct page *pages[BLKTAP_SEGMENT_MAX]; - int nr_pages; -}; - -#define blktap_for_each_sg(_sg, _req, _i) \ - for (_sg = (_req)->sg_table, _i = 0; \ - _i < (_req)->nr_pages; \ - (_sg)++, (_i)++) - -struct blktap { - int minor; - unsigned long dev_inuse; - - struct blktap_ring ring; - struct blktap_device device; - struct blktap_page_pool *pool; - - wait_queue_head_t remove_wait; - struct work_struct remove_work; - char name[BLKTAP_NAME_MAX]; - - struct blktap_statistics stats; -}; - -struct blktap_page_pool { - struct mempool_s *bufs; - spinlock_t lock; - struct kobject kobj; - wait_queue_head_t wait; -}; - -extern struct mutex blktap_lock; -extern struct blktap **blktaps; -extern int blktap_max_minor; - -int blktap_control_destroy_tap(struct blktap *); -size_t blktap_control_debug(struct blktap *, char *, size_t); - -int blktap_ring_init(void); -void blktap_ring_exit(void); -size_t blktap_ring_debug(struct blktap *, char *, size_t); -int blktap_ring_create(struct blktap *); -int blktap_ring_destroy(struct blktap *); -struct blktap_request *blktap_ring_make_request(struct blktap *); -void blktap_ring_free_request(struct blktap *,struct blktap_request *); -void blktap_ring_submit_request(struct blktap *, struct blktap_request *); -int blktap_ring_map_request_segment(struct blktap *, struct blktap_request *, int); -int blktap_ring_map_request(struct blktap *, struct blktap_request *); -void blktap_ring_unmap_request(struct blktap *, struct blktap_request *); -void blktap_ring_set_message(struct blktap *, int); -void blktap_ring_kick_user(struct blktap *); - -int blktap_sysfs_init(void); -void blktap_sysfs_exit(void); -int blktap_sysfs_create(struct blktap *); -void blktap_sysfs_destroy(struct blktap *); - -int blktap_device_init(void); -void blktap_device_exit(void); -size_t blktap_device_debug(struct blktap *, char *, size_t); -int blktap_device_create(struct blktap *, struct blktap_device_info *); -int blktap_device_destroy(struct blktap *); -void blktap_device_destroy_sync(struct blktap *); -void blktap_device_run_queue(struct blktap *); -void blktap_device_end_request(struct blktap *, struct blktap_request *, int); - -int blktap_page_pool_init(struct kobject *); -void blktap_page_pool_exit(void); -struct blktap_page_pool *blktap_page_pool_get(const char *); - -size_t blktap_request_debug(struct blktap *, char *, size_t); -struct blktap_request *blktap_request_alloc(struct blktap *); -int blktap_request_get_pages(struct blktap *, struct blktap_request *, int); -void blktap_request_free(struct blktap *, struct blktap_request *); -void blktap_request_bounce(struct blktap *, struct blktap_request *, int, int); - - -#endif diff --git a/drivers/xen/blktap/control.c b/drivers/xen/blktap/control.c deleted file mode 100644 index 57b1a10..0000000 --- a/drivers/xen/blktap/control.c +++ /dev/null @@ -1,315 +0,0 @@ -#include <linux/module.h> -#include <linux/sched.h> -#include <linux/miscdevice.h> -#include <linux/device.h> -#include <asm/uaccess.h> - -#include "blktap.h" - -DEFINE_MUTEX(blktap_lock); - -struct blktap **blktaps; -int blktap_max_minor; -static struct blktap_page_pool *default_pool; - -static struct blktap * -blktap_control_get_minor(void) -{ - int minor; - struct blktap *tap; - - tap = kzalloc(sizeof(*tap), GFP_KERNEL); - if (unlikely(!tap)) - return NULL; - - mutex_lock(&blktap_lock); - - for (minor = 0; minor < blktap_max_minor; minor++) - if (!blktaps[minor]) - break; - - if (minor == MAX_BLKTAP_DEVICE) - goto fail; - - if (minor == blktap_max_minor) { - void *p; - int n; - - n = min(2 * blktap_max_minor, MAX_BLKTAP_DEVICE); - p = krealloc(blktaps, n * sizeof(blktaps[0]), GFP_KERNEL); - if (!p) - goto fail; - - blktaps = p; - minor = blktap_max_minor; - blktap_max_minor = n; - - memset(&blktaps[minor], 0, (n - minor) * sizeof(blktaps[0])); - } - - tap->minor = minor; - blktaps[minor] = tap; - - __module_get(THIS_MODULE); -out: - mutex_unlock(&blktap_lock); - return tap; - -fail: - mutex_unlock(&blktap_lock); - kfree(tap); - tap = NULL; - goto out; -} - -static void -blktap_control_put_minor(struct blktap* tap) -{ - blktaps[tap->minor] = NULL; - kfree(tap); - - module_put(THIS_MODULE); -} - -static struct blktap* -blktap_control_create_tap(void) -{ - struct blktap *tap; - int err; - - tap = blktap_control_get_minor(); - if (!tap) - return NULL; - - kobject_get(&default_pool->kobj); - tap->pool = default_pool; - - err = blktap_ring_create(tap); - if (err) - goto fail_tap; - - err = blktap_sysfs_create(tap); - if (err) - goto fail_ring; - - return tap; - -fail_ring: - blktap_ring_destroy(tap); -fail_tap: - blktap_control_put_minor(tap); - - return NULL; -} - -int -blktap_control_destroy_tap(struct blktap *tap) -{ - int err; - - err = blktap_ring_destroy(tap); - if (err) - return err; - - kobject_put(&tap->pool->kobj); - - blktap_sysfs_destroy(tap); - - blktap_control_put_minor(tap); - - return 0; -} - -static int -blktap_control_ioctl(struct inode *inode, struct file *filp, - unsigned int cmd, unsigned long arg) -{ - struct blktap *tap; - - switch (cmd) { - case BLKTAP_IOCTL_ALLOC_TAP: { - struct blktap_info info; - void __user *ptr = (void __user*)arg; - - tap = blktap_control_create_tap(); - if (!tap) - return -ENOMEM; - - info.ring_major = blktap_ring_major; - info.bdev_major = blktap_device_major; - info.ring_minor = tap->minor; - - if (copy_to_user(ptr, &info, sizeof(info))) { - blktap_control_destroy_tap(tap); - return -EFAULT; - } - - return 0; - } - - case BLKTAP_IOCTL_FREE_TAP: { - int minor = arg; - - if (minor > MAX_BLKTAP_DEVICE) - return -EINVAL; - - tap = blktaps[minor]; - if (!tap) - return -ENODEV; - - return blktap_control_destroy_tap(tap); - } - } - - return -ENOIOCTLCMD; -} - -static struct file_operations blktap_control_file_operations = { - .owner = THIS_MODULE, - .ioctl = blktap_control_ioctl, -}; - -static struct miscdevice blktap_control = { - .minor = MISC_DYNAMIC_MINOR, - .name = "blktap-control", - .fops = &blktap_control_file_operations, -}; - -static struct device *control_device; - -static ssize_t -blktap_control_show_default_pool(struct device *device, - struct device_attribute *attr, - char *buf) -{ - return sprintf(buf, "%s", kobject_name(&default_pool->kobj)); -} - -static ssize_t -blktap_control_store_default_pool(struct device *device, - struct device_attribute *attr, - const char *buf, size_t size) -{ - struct blktap_page_pool *pool, *tmp = default_pool; - - pool = blktap_page_pool_get(buf); - if (IS_ERR(pool)) - return PTR_ERR(pool); - - default_pool = pool; - kobject_put(&tmp->kobj); - - return size; -} - -static DEVICE_ATTR(default_pool, S_IRUSR|S_IWUSR|S_IRGRP|S_IROTH, - blktap_control_show_default_pool, - blktap_control_store_default_pool); - -size_t -blktap_control_debug(struct blktap *tap, char *buf, size_t size) -{ - char *s = buf, *end = buf + size; - - s += snprintf(s, end - s, - "tap %u:%u name:'%s' flags:%#08lx\n", - MAJOR(tap->ring.devno), MINOR(tap->ring.devno), - tap->name, tap->dev_inuse); - - return s - buf; -} - -static int __init -blktap_control_init(void) -{ - int err; - - err = misc_register(&blktap_control); - if (err) - return err; - - control_device = blktap_control.this_device; - - blktap_max_minor = min(64, MAX_BLKTAP_DEVICE); - blktaps = kzalloc(blktap_max_minor * sizeof(blktaps[0]), GFP_KERNEL); - if (!blktaps) { - BTERR("failed to allocate blktap minor map"); - return -ENOMEM; - } - - err = blktap_page_pool_init(&control_device->kobj); - if (err) - return err; - - default_pool = blktap_page_pool_get("default"); - if (!default_pool) - return -ENOMEM; - - err = device_create_file(control_device, &dev_attr_default_pool); - if (err) - return err; - - return 0; -} - -static void -blktap_control_exit(void) -{ - if (default_pool) { - kobject_put(&default_pool->kobj); - default_pool = NULL; - } - - blktap_page_pool_exit(); - - if (blktaps) { - kfree(blktaps); - blktaps = NULL; - } - - if (control_device) { - misc_deregister(&blktap_control); - control_device = NULL; - } -} - -static void -blktap_exit(void) -{ - blktap_control_exit(); - blktap_ring_exit(); - blktap_sysfs_exit(); - blktap_device_exit(); -} - -static int __init -blktap_init(void) -{ - int err; - - err = blktap_device_init(); - if (err) - goto fail; - - err = blktap_ring_init(); - if (err) - goto fail; - - err = blktap_sysfs_init(); - if (err) - goto fail; - - err = blktap_control_init(); - if (err) - goto fail; - - return 0; - -fail: - blktap_exit(); - return err; -} - -module_init(blktap_init); -module_exit(blktap_exit); -MODULE_LICENSE("Dual BSD/GPL"); diff --git a/drivers/xen/blktap/device.c b/drivers/xen/blktap/device.c deleted file mode 100644 index 9a09457..0000000 --- a/drivers/xen/blktap/device.c +++ /dev/null @@ -1,551 +0,0 @@ -#include <linux/fs.h> -#include <linux/blkdev.h> -#include <linux/cdrom.h> -#include <linux/hdreg.h> -#include <scsi/scsi.h> -#include <scsi/scsi_ioctl.h> - -#include "blktap.h" - -int blktap_device_major; - -#define dev_to_blktap(_dev) container_of(_dev, struct blktap, device) - -static int -blktap_device_open(struct block_device *bdev, fmode_t mode) -{ - struct gendisk *disk = bdev->bd_disk; - struct blktap_device *tapdev = disk->private_data; - - if (!tapdev) - return -ENXIO; - - /* NB. we might have bounced a bd trylock by tapdisk. when - * failing for reasons not !tapdev, make sure to kick tapdisk - * out of destroy wait state again. */ - - return 0; -} - -static int -blktap_device_release(struct gendisk *disk, fmode_t mode) -{ - struct blktap_device *tapdev = disk->private_data; - struct block_device *bdev = bdget_disk(disk, 0); - struct blktap *tap = dev_to_blktap(tapdev); - - bdput(bdev); - - if (!bdev->bd_openers) { - set_bit(BLKTAP_DEVICE_CLOSED, &tap->dev_inuse); - blktap_ring_kick_user(tap); - } - - return 0; -} - -static int -blktap_device_getgeo(struct block_device *bd, struct hd_geometry *hg) -{ - /* We don't have real geometry info, but let's at least return - values consistent with the size of the device */ - sector_t nsect = get_capacity(bd->bd_disk); - sector_t cylinders = nsect; - - hg->heads = 0xff; - hg->sectors = 0x3f; - sector_div(cylinders, hg->heads * hg->sectors); - hg->cylinders = cylinders; - if ((sector_t)(hg->cylinders + 1) * hg->heads * hg->sectors < nsect) - hg->cylinders = 0xffff; - return 0; -} - -static int -blktap_device_ioctl(struct block_device *bd, fmode_t mode, - unsigned command, unsigned long argument) -{ - int i; - - switch (command) { - case CDROMMULTISESSION: - BTDBG("FIXME: support multisession CDs later\n"); - for (i = 0; i < sizeof(struct cdrom_multisession); i++) - if (put_user(0, (char __user *)(argument + i))) - return -EFAULT; - return 0; - - case SCSI_IOCTL_GET_IDLUN: - if (!access_ok(VERIFY_WRITE, argument, - sizeof(struct scsi_idlun))) - return -EFAULT; - - /* return 0 for now. */ - __put_user(0, &((struct scsi_idlun __user *)argument)->dev_id); - __put_user(0, - &((struct scsi_idlun __user *)argument)->host_unique_id); - return 0; - - default: - /*printk(KERN_ALERT "ioctl %08x not supported by Xen blkdev\n", - command);*/ - return -EINVAL; /* same return as native Linux */ - } - - return 0; -} - -static struct block_device_operations blktap_device_file_operations = { - .owner = THIS_MODULE, - .open = blktap_device_open, - .release = blktap_device_release, - .ioctl = blktap_device_ioctl, - .getgeo = blktap_device_getgeo -}; - -/* NB. __blktap holding the queue lock; blktap where unlocked */ - -static inline struct request* -__blktap_next_queued_rq(struct request_queue *q) -{ - return blk_peek_request(q); -} - -static inline void -__blktap_dequeue_rq(struct request *rq) -{ - blk_start_request(rq); -} - -/* NB. err == 0 indicates success, failures < 0 */ - -static inline void -__blktap_end_queued_rq(struct request *rq, int err) -{ - blk_start_request(rq); - __blk_end_request(rq, err, blk_rq_bytes(rq)); -} - -static inline void -__blktap_end_rq(struct request *rq, int err) -{ - __blk_end_request(rq, err, blk_rq_bytes(rq)); -} - -static inline void -blktap_end_rq(struct request *rq, int err) -{ - spin_lock_irq(rq->q->queue_lock); - __blktap_end_rq(rq, err); - spin_unlock_irq(rq->q->queue_lock); -} - -void -blktap_device_end_request(struct blktap *tap, - struct blktap_request *request, - int error) -{ - struct blktap_device *tapdev = &tap->device; - struct request *rq = request->rq; - - blktap_ring_unmap_request(tap, request); - - blktap_ring_free_request(tap, request); - - dev_dbg(disk_to_dev(tapdev->gd), - "end_request: op=%d error=%d bytes=%d\n", - rq_data_dir(rq), error, blk_rq_bytes(rq)); - - blktap_end_rq(rq, error); -} - -int -blktap_device_make_request(struct blktap *tap, struct request *rq) -{ - struct blktap_device *tapdev = &tap->device; - struct blktap_request *request; - int write, nsegs; - int err; - - request = blktap_ring_make_request(tap); - if (IS_ERR(request)) { - err = PTR_ERR(request); - request = NULL; - - if (err == -ENOSPC || err == -ENOMEM) - goto stop; - - goto fail; - } - - write = rq_data_dir(rq) == WRITE; - nsegs = blk_rq_map_sg(rq->q, rq, request->sg_table); - - dev_dbg(disk_to_dev(tapdev->gd), - "make_request: op=%c bytes=%d nsegs=%d\n", - write ? 'w' : 'r', blk_rq_bytes(rq), nsegs); - - request->rq = rq; - request->operation = write ? BLKTAP_OP_WRITE : BLKTAP_OP_READ; - - err = blktap_request_get_pages(tap, request, nsegs); - if (err) - goto stop; - - err = blktap_ring_map_request(tap, request); - if (err) - goto fail; - - blktap_ring_submit_request(tap, request); - - return 0; - -stop: - tap->stats.st_oo_req++; - err = -EBUSY; - -_out: - if (request) - blktap_ring_free_request(tap, request); - - return err; -fail: - if (printk_ratelimit()) - dev_warn(disk_to_dev(tapdev->gd), - "make request: %d, failing\n", err); - goto _out; -} - -/* - * called from tapdisk context - */ -void -blktap_device_run_queue(struct blktap *tap) -{ - struct blktap_device *tapdev = &tap->device; - struct request_queue *q; - struct request *rq; - int err; - - if (!tapdev->gd) - return; - - q = tapdev->gd->queue; - - spin_lock_irq(&tapdev->lock); - queue_flag_clear(QUEUE_FLAG_STOPPED, q); - - do { - rq = __blktap_next_queued_rq(q); - if (!rq) - break; - - if (!blk_fs_request(rq)) { - __blktap_end_queued_rq(rq, -EOPNOTSUPP); - continue; - } - - spin_unlock_irq(&tapdev->lock); - - err = blktap_device_make_request(tap, rq); - - spin_lock_irq(&tapdev->lock); - - if (err == -EBUSY) { - blk_stop_queue(q); - break; - } - - __blktap_dequeue_rq(rq); - - if (unlikely(err)) - __blktap_end_rq(rq, err); - } while (1); - - spin_unlock_irq(&tapdev->lock); -} - -static void -blktap_device_do_request(struct request_queue *rq) -{ - struct blktap_device *tapdev = rq->queuedata; - struct blktap *tap = dev_to_blktap(tapdev); - - blktap_ring_kick_user(tap); -} - -static void -blktap_device_configure(struct blktap *tap, - struct blktap_device_info *info) -{ - struct blktap_device *tapdev = &tap->device; - struct gendisk *gd = tapdev->gd; - struct request_queue *rq = gd->queue; - - set_capacity(gd, info->capacity); - set_disk_ro(gd, !!(info->flags & BLKTAP_DEVICE_FLAG_RO)); - - /* Hard sector size and max sectors impersonate the equiv. hardware. */ - blk_queue_logical_block_size(rq, info->sector_size); - blk_queue_max_sectors(rq, 512); - - /* Each segment in a request is up to an aligned page in size. */ - blk_queue_segment_boundary(rq, PAGE_SIZE - 1); - blk_queue_max_segment_size(rq, PAGE_SIZE); - - /* Ensure a merged request will fit in a single I/O ring slot. */ - blk_queue_max_phys_segments(rq, BLKTAP_SEGMENT_MAX); - blk_queue_max_hw_segments(rq, BLKTAP_SEGMENT_MAX); - - /* Make sure buffer addresses are sector-aligned. */ - blk_queue_dma_alignment(rq, 511); - - /* We are reordering, but cacheless. */ - blk_queue_ordered(rq, QUEUE_ORDERED_DRAIN, NULL); -} - -static int -blktap_device_validate_info(struct blktap *tap, - struct blktap_device_info *info) -{ - struct device *dev = tap->ring.dev; - int sector_order; - - sector_order = ffs(info->sector_size) - 1; - if (sector_order < 9 || - sector_order > 12 || - info->sector_size != 1U<<sector_order) - goto fail; - - if (!info->capacity || - (info->capacity > ULLONG_MAX >> sector_order)) - goto fail; - - return 0; - -fail: - dev_err(dev, "capacity: %llu, sector-size: %u\n", - info->capacity, info->sector_size); - return -EINVAL; -} - -int -blktap_device_destroy(struct blktap *tap) -{ - struct blktap_device *tapdev = &tap->device; - struct block_device *bdev; - struct gendisk *gd; - int err; - - gd = tapdev->gd; - if (!gd) - return 0; - - bdev = bdget_disk(gd, 0); - - err = !mutex_trylock(&bdev->bd_mutex); - if (err) { - /* NB. avoid a deadlock. the last opener syncs the - * bdev holding bd_mutex. */ - err = -EBUSY; - goto out_nolock; - } - - if (bdev->bd_openers) { - err = -EBUSY; - goto out; - } - - del_gendisk(gd); - gd->private_data = NULL; - - blk_cleanup_queue(gd->queue); - - put_disk(gd); - tapdev->gd = NULL; - - clear_bit(BLKTAP_DEVICE, &tap->dev_inuse); - err = 0; -out: - mutex_unlock(&bdev->bd_mutex); -out_nolock: - bdput(bdev); - - return err; -} - -static void -blktap_device_fail_queue(struct blktap *tap) -{ - struct blktap_device *tapdev = &tap->device; - struct request_queue *q = tapdev->gd->queue; - - spin_lock_irq(&tapdev->lock); - queue_flag_clear(QUEUE_FLAG_STOPPED, q); - - do { - struct request *rq = __blktap_next_queued_rq(q); - if (!rq) - break; - - __blktap_end_queued_rq(rq, -EIO); - } while (1); - - spin_unlock_irq(&tapdev->lock); -} - -static int -blktap_device_try_destroy(struct blktap *tap) -{ - int err; - - err = blktap_device_destroy(tap); - if (err) - blktap_device_fail_queue(tap); - - return err; -} - -void -blktap_device_destroy_sync(struct blktap *tap) -{ - wait_event(tap->ring.poll_wait, - !blktap_device_try_destroy(tap)); -} - -int -blktap_device_create(struct blktap *tap, struct blktap_device_info *info) -{ - int minor, err; - struct gendisk *gd; - struct request_queue *rq; - struct blktap_device *tapdev; - - gd = NULL; - rq = NULL; - tapdev = &tap->device; - minor = tap->minor; - - if (test_bit(BLKTAP_DEVICE, &tap->dev_inuse)) - return -EEXIST; - - if (blktap_device_validate_info(tap, info)) - return -EINVAL; - - gd = alloc_disk(1); - if (!gd) { - err = -ENOMEM; - goto fail; - } - - if (minor < 26) { - sprintf(gd->disk_name, "td%c", 'a' + minor % 26); - } else if (minor < (26 + 1) * 26) { - sprintf(gd->disk_name, "td%c%c", - 'a' + minor / 26 - 1,'a' + minor % 26); - } else { - const unsigned int m1 = (minor / 26 - 1) / 26 - 1; - const unsigned int m2 = (minor / 26 - 1) % 26; - const unsigned int m3 = minor % 26; - sprintf(gd->disk_name, "td%c%c%c", - 'a' + m1, 'a' + m2, 'a' + m3); - } - - gd->major = blktap_device_major; - gd->first_minor = minor; - gd->fops = &blktap_device_file_operations; - gd->private_data = tapdev; - - spin_lock_init(&tapdev->lock); - rq = blk_init_queue(blktap_device_do_request, &tapdev->lock); - if (!rq) { - err = -ENOMEM; - goto fail; - } - elevator_init(rq, "noop"); - - gd->queue = rq; - rq->queuedata = tapdev; - tapdev->gd = gd; - - blktap_device_configure(tap, info); - add_disk(gd); - - set_bit(BLKTAP_DEVICE, &tap->dev_inuse); - - dev_info(disk_to_dev(gd), "sector-size: %u/%u capacity: %llu\n", - queue_logical_block_size(rq), - queue_physical_block_size(rq), - (unsigned long long)get_capacity(gd)); - - return 0; - -fail: - if (gd) - del_gendisk(gd); - if (rq) - blk_cleanup_queue(rq); - - return err; -} - -size_t -blktap_device_debug(struct blktap *tap, char *buf, size_t size) -{ - struct gendisk *disk = tap->device.gd; - struct request_queue *q; - struct block_device *bdev; - char *s = buf, *end = buf + size; - - if (!disk) - return 0; - - q = disk->queue; - - s += snprintf(s, end - s, - "disk capacity:%llu sector size:%u\n", - (unsigned long long)get_capacity(disk), - queue_logical_block_size(q)); - - s += snprintf(s, end - s, - "queue flags:%#lx plugged:%d stopped:%d empty:%d\n", - q->queue_flags, - blk_queue_plugged(q), blk_queue_stopped(q), - elv_queue_empty(q)); - - bdev = bdget_disk(disk, 0); - if (bdev) { - s += snprintf(s, end - s, - "bdev openers:%d closed:%d\n", - bdev->bd_openers, - test_bit(BLKTAP_DEVICE_CLOSED, &tap->dev_inuse)); - bdput(bdev); - } - - return s - buf; -} - -int __init -blktap_device_init() -{ - int major; - - /* Dynamically allocate a major for this device */ - major = register_blkdev(0, "tapdev"); - if (major < 0) { - BTERR("Couldn't register blktap device\n"); - return -ENOMEM; - } - - blktap_device_major = major; - BTINFO("blktap device major %d\n", major); - - return 0; -} - -void -blktap_device_exit(void) -{ - if (blktap_device_major) - unregister_blkdev(blktap_device_major, "tapdev"); -} diff --git a/drivers/xen/blktap/request.c b/drivers/xen/blktap/request.c deleted file mode 100644 index 8cfd6c9..0000000 --- a/drivers/xen/blktap/request.c +++ /dev/null @@ -1,418 +0,0 @@ -#include <linux/mempool.h> -#include <linux/spinlock.h> -#include <linux/mutex.h> -#include <linux/sched.h> -#include <linux/device.h> - -#include "blktap.h" - -/* max pages per shared pool. just to prevent accidental dos. */ -#define POOL_MAX_PAGES (256*BLKTAP_SEGMENT_MAX) - -/* default page pool size. when considering to shrink a shared pool, - * note that paused tapdisks may grab a whole lot of pages for a long - * time. */ -#define POOL_DEFAULT_PAGES (2 * BLKTAP_RING_SIZE * BLKTAP_SEGMENT_MAX) - -/* max number of pages allocatable per request. */ -#define POOL_MAX_REQUEST_PAGES BLKTAP_SEGMENT_MAX - -/* min request structs per pool. These grow dynamically. */ -#define POOL_MIN_REQS BLKTAP_RING_SIZE - -static struct kset *pool_set; - -#define kobj_to_pool(_kobj) \ - container_of(_kobj, struct blktap_page_pool, kobj) - -static struct kmem_cache *request_cache; -static mempool_t *request_pool; - -static void -__page_pool_wake(struct blktap_page_pool *pool) -{ - mempool_t *mem = pool->bufs; - - /* - NB. slightly wasteful to always wait for a full segment - set. but this ensures the next disk makes - progress. presently, the repeated request struct - alloc/release cycles would otherwise keep everyone spinning. - */ - - if (mem->curr_nr >= POOL_MAX_REQUEST_PAGES) - wake_up(&pool->wait); -} - -int -blktap_request_get_pages(struct blktap *tap, - struct blktap_request *request, int nr_pages) -{ - struct blktap_page_pool *pool = tap->pool; - mempool_t *mem = pool->bufs; - struct page *page; - - BUG_ON(request->nr_pages != 0); - BUG_ON(nr_pages > POOL_MAX_REQUEST_PAGES); - - if (mem->curr_nr < nr_pages) - return -ENOMEM; - - /* NB. avoid thundering herds of tapdisks colliding. */ - spin_lock(&pool->lock); - - if (mem->curr_nr < nr_pages) { - spin_unlock(&pool->lock); - return -ENOMEM; - } - - while (request->nr_pages < nr_pages) { - page = mempool_alloc(mem, GFP_NOWAIT); - BUG_ON(!page); - request->pages[request->nr_pages++] = page; - } - - spin_unlock(&pool->lock); - - return 0; -} - -static void -blktap_request_put_pages(struct blktap *tap, - struct blktap_request *request) -{ - struct blktap_page_pool *pool = tap->pool; - struct page *page; - - while (request->nr_pages) { - page = request->pages[--request->nr_pages]; - mempool_free(page, pool->bufs); - } -} - -size_t -blktap_request_debug(struct blktap *tap, char *buf, size_t size) -{ - struct blktap_page_pool *pool = tap->pool; - mempool_t *mem = pool->bufs; - char *s = buf, *end = buf + size; - - s += snprintf(buf, end - s, - "pool:%s pages:%d free:%d\n", - kobject_name(&pool->kobj), - mem->min_nr, mem->curr_nr); - - return s - buf; -} - -struct blktap_request* -blktap_request_alloc(struct blktap *tap) -{ - struct blktap_request *request; - - request = mempool_alloc(request_pool, GFP_NOWAIT); - if (request) - request->tap = tap; - - return request; -} - -void -blktap_request_free(struct blktap *tap, - struct blktap_request *request) -{ - blktap_request_put_pages(tap, request); - - mempool_free(request, request_pool); - - __page_pool_wake(tap->pool); -} - -void -blktap_request_bounce(struct blktap *tap, - struct blktap_request *request, - int seg, int write) -{ - struct scatterlist *sg = &request->sg_table[seg]; - void *s, *p; - - BUG_ON(seg >= request->nr_pages); - - s = sg_virt(sg); - p = page_address(request->pages[seg]) + sg->offset; - - if (write) - memcpy(p, s, sg->length); - else - memcpy(s, p, sg->length); -} - -static void -blktap_request_ctor(void *obj) -{ - struct blktap_request *request = obj; - - memset(request, 0, sizeof(*request)); - sg_init_table(request->sg_table, ARRAY_SIZE(request->sg_table)); -} - -static int -blktap_page_pool_resize(struct blktap_page_pool *pool, int target) -{ - mempool_t *bufs = pool->bufs; - int err; - - /* NB. mempool asserts min_nr >= 1 */ - target = max(1, target); - - err = mempool_resize(bufs, target, GFP_KERNEL); - if (err) - return err; - - __page_pool_wake(pool); - - return 0; -} - -struct pool_attribute { - struct attribute attr; - - ssize_t (*show)(struct blktap_page_pool *pool, - char *buf); - - ssize_t (*store)(struct blktap_page_pool *pool, - const char *buf, size_t count); -}; - -#define kattr_to_pool_attr(_kattr) \ - container_of(_kattr, struct pool_attribute, attr) - -static ssize_t -blktap_page_pool_show_size(struct blktap_page_pool *pool, - char *buf) -{ - mempool_t *mem = pool->bufs; - return sprintf(buf, "%d", mem->min_nr); -} - -static ssize_t -blktap_page_pool_store_size(struct blktap_page_pool *pool, - const char *buf, size_t size) -{ - int target; - - /* - * NB. target fixup to avoid undesired results. less than a - * full segment set can wedge the disk. much more than a - * couple times the physical queue depth is rarely useful. - */ - - target = simple_strtoul(buf, NULL, 0); - target = max(POOL_MAX_REQUEST_PAGES, target); - target = min(target, POOL_MAX_PAGES); - - return blktap_page_pool_resize(pool, target) ? : size; -} - -static struct pool_attribute blktap_page_pool_attr_size = - __ATTR(size, S_IRUSR|S_IWUSR|S_IRGRP|S_IROTH, - blktap_page_pool_show_size, - blktap_page_pool_store_size); - -static ssize_t -blktap_page_pool_show_free(struct blktap_page_pool *pool, - char *buf) -{ - mempool_t *mem = pool->bufs; - return sprintf(buf, "%d", mem->curr_nr); -} - -static struct pool_attribute blktap_page_pool_attr_free = - __ATTR(free, S_IRUSR|S_IRGRP|S_IROTH, - blktap_page_pool_show_free, - NULL); - -static struct attribute *blktap_page_pool_attrs[] = { - &blktap_page_pool_attr_size.attr, - &blktap_page_pool_attr_free.attr, - NULL, -}; - -static inline struct kobject* -__blktap_kset_find_obj(struct kset *kset, const char *name) -{ - struct kobject *k; - struct kobject *ret = NULL; - - spin_lock(&kset->list_lock); - list_for_each_entry(k, &kset->list, entry) { - if (kobject_name(k) && !strcmp(kobject_name(k), name)) { - ret = kobject_get(k); - break; - } - } - spin_unlock(&kset->list_lock); - return ret; -} - -static ssize_t -blktap_page_pool_show_attr(struct kobject *kobj, struct attribute *kattr, - char *buf) -{ - struct blktap_page_pool *pool = kobj_to_pool(kobj); - struct pool_attribute *attr = kattr_to_pool_attr(kattr); - - if (attr->show) - return attr->show(pool, buf); - - return -EIO; -} - -static ssize_t -blktap_page_pool_store_attr(struct kobject *kobj, struct attribute *kattr, - const char *buf, size_t size) -{ - struct blktap_page_pool *pool = kobj_to_pool(kobj); - struct pool_attribute *attr = kattr_to_pool_attr(kattr); - - if (attr->show) - return attr->store(pool, buf, size); - - return -EIO; -} - -static struct sysfs_ops blktap_page_pool_sysfs_ops = { - .show = blktap_page_pool_show_attr, - .store = blktap_page_pool_store_attr, -}; - -static void -blktap_page_pool_release(struct kobject *kobj) -{ - struct blktap_page_pool *pool = kobj_to_pool(kobj); - mempool_destroy(pool->bufs); - kfree(pool); -} - -struct kobj_type blktap_page_pool_ktype = { - .release = blktap_page_pool_release, - .sysfs_ops = &blktap_page_pool_sysfs_ops, - .default_attrs = blktap_page_pool_attrs, -}; - -static void* -__mempool_page_alloc(gfp_t gfp_mask, void *pool_data) -{ - struct page *page; - - if (!(gfp_mask & __GFP_WAIT)) - return NULL; - - page = alloc_page(gfp_mask); - if (page) - SetPageReserved(page); - - return page; -} - -static void -__mempool_page_free(void *element, void *pool_data) -{ - struct page *page = element; - - ClearPageReserved(page); - put_page(page); -} - -static struct kobject* -blktap_page_pool_create(const char *name, int nr_pages) -{ - struct blktap_page_pool *pool; - int err; - - pool = kzalloc(sizeof(*pool), GFP_KERNEL); - if (!pool) - goto fail; - - spin_lock_init(&pool->lock); - init_waitqueue_head(&pool->wait); - - pool->bufs = mempool_create(nr_pages, - __mempool_page_alloc, __mempool_page_free, - pool); - if (!pool->bufs) - goto fail_pool; - - kobject_init(&pool->kobj, &blktap_page_pool_ktype); - pool->kobj.kset = pool_set; - err = kobject_add(&pool->kobj, &pool_set->kobj, "%s", name); - if (err) - goto fail_bufs; - - return &pool->kobj; - - kobject_del(&pool->kobj); -fail_bufs: - mempool_destroy(pool->bufs); -fail_pool: - kfree(pool); -fail: - return NULL; -} - -struct blktap_page_pool* -blktap_page_pool_get(const char *name) -{ - struct kobject *kobj; - - kobj = __blktap_kset_find_obj(pool_set, name); - if (!kobj) - kobj = blktap_page_pool_create(name, - POOL_DEFAULT_PAGES); - if (!kobj) - return ERR_PTR(-ENOMEM); - - return kobj_to_pool(kobj); -} - -int __init -blktap_page_pool_init(struct kobject *parent) -{ - request_cache = - kmem_cache_create("blktap-request", - sizeof(struct blktap_request), 0, - 0, blktap_request_ctor); - if (!request_cache) - return -ENOMEM; - - request_pool = - mempool_create_slab_pool(POOL_MIN_REQS, request_cache); - if (!request_pool) - return -ENOMEM; - - pool_set = kset_create_and_add("pools", NULL, parent); - if (!pool_set) - return -ENOMEM; - - return 0; -} - -void -blktap_page_pool_exit(void) -{ - if (pool_set) { - BUG_ON(!list_empty(&pool_set->list)); - kset_unregister(pool_set); - pool_set = NULL; - } - - if (request_pool) { - mempool_destroy(request_pool); - request_pool = NULL; - } - - if (request_cache) { - kmem_cache_destroy(request_cache); - request_cache = NULL; - } -} diff --git a/drivers/xen/blktap/ring.c b/drivers/xen/blktap/ring.c deleted file mode 100644 index 635f1fd..0000000 --- a/drivers/xen/blktap/ring.c +++ /dev/null @@ -1,595 +0,0 @@ - -#include <linux/device.h> -#include <linux/signal.h> -#include <linux/sched.h> -#include <linux/poll.h> -#include <linux/blkdev.h> - -#include "blktap.h" - -int blktap_ring_major; -static struct cdev blktap_ring_cdev; - - /* - * BLKTAP - immediately before the mmap area, - * we have a bunch of pages reserved for shared memory rings. - */ -#define RING_PAGES 1 - -#define BLKTAP_INFO_SIZE_AT(_memb) \ - offsetof(struct blktap_device_info, _memb) + \ - sizeof(((struct blktap_device_info*)0)->_memb) - -static void -blktap_ring_read_response(struct blktap *tap, - const blktap_ring_rsp_t *rsp) -{ - struct blktap_ring *ring = &tap->ring; - struct blktap_request *request; - int usr_idx, err; - - request = NULL; - - usr_idx = rsp->id; - if (usr_idx < 0 || usr_idx >= BLKTAP_RING_SIZE) { - err = -ERANGE; - goto invalid; - } - - request = ring->pending[usr_idx]; - - if (!request) { - err = -ESRCH; - goto invalid; - } - - if (rsp->operation != request->operation) { - err = -EINVAL; - goto invalid; - } - - dev_dbg(ring->dev, - "request %d [%p] response: %d\n", - request->usr_idx, request, rsp->status); - - err = rsp->status == BLKTAP_RSP_OKAY ? 0 : -EIO; -end_request: - blktap_device_end_request(tap, request, err); - return; - -invalid: - dev_warn(ring->dev, - "invalid response, idx:%d status:%d op:%d/%d: err %d\n", - usr_idx, rsp->status, - rsp->operation, request->operation, - err); - if (request) - goto end_request; -} - -static void -blktap_read_ring(struct blktap *tap) -{ - struct blktap_ring *ring = &tap->ring; - blktap_ring_rsp_t rsp; - RING_IDX rc, rp; - - down_read(¤t->mm->mmap_sem); - if (!ring->vma) { - up_read(¤t->mm->mmap_sem); - return; - } - - /* for each outstanding message on the ring */ - rp = ring->ring.sring->rsp_prod; - rmb(); - - for (rc = ring->ring.rsp_cons; rc != rp; rc++) { - memcpy(&rsp, RING_GET_RESPONSE(&ring->ring, rc), sizeof(rsp)); - blktap_ring_read_response(tap, &rsp); - } - - ring->ring.rsp_cons = rc; - - up_read(¤t->mm->mmap_sem); -} - -#define MMAP_VADDR(_start, _req, _seg) \ - ((_start) + \ - ((_req) * BLKTAP_SEGMENT_MAX * BLKTAP_PAGE_SIZE) + \ - ((_seg) * BLKTAP_PAGE_SIZE)) - -static int blktap_ring_fault(struct vm_area_struct *vma, struct vm_fault *vmf) -{ - return VM_FAULT_SIGBUS; -} - -static void -blktap_ring_fail_pending(struct blktap *tap) -{ - struct blktap_ring *ring = &tap->ring; - struct blktap_request *request; - int usr_idx; - - for (usr_idx = 0; usr_idx < BLKTAP_RING_SIZE; usr_idx++) { - request = ring->pending[usr_idx]; - if (!request) - continue; - - blktap_device_end_request(tap, request, -EIO); - } -} - -static void -blktap_ring_vm_close(struct vm_area_struct *vma) -{ - struct blktap *tap = vma->vm_private_data; - struct blktap_ring *ring = &tap->ring; - struct page *page = virt_to_page(ring->ring.sring); - - blktap_ring_fail_pending(tap); - - zap_page_range(vma, vma->vm_start, PAGE_SIZE, NULL); - ClearPageReserved(page); - __free_page(page); - - ring->vma = NULL; - - if (test_bit(BLKTAP_SHUTDOWN_REQUESTED, &tap->dev_inuse)) - blktap_control_destroy_tap(tap); -} - -static struct vm_operations_struct blktap_ring_vm_operations = { - .close = blktap_ring_vm_close, - .fault = blktap_ring_fault, -}; - -int -blktap_ring_map_segment(struct blktap *tap, - struct blktap_request *request, - int seg) -{ - struct blktap_ring *ring = &tap->ring; - unsigned long uaddr; - - uaddr = MMAP_VADDR(ring->user_vstart, request->usr_idx, seg); - return vm_insert_page(ring->vma, uaddr, request->pages[seg]); -} - -int -blktap_ring_map_request(struct blktap *tap, - struct blktap_request *request) -{ - int seg, err = 0; - int write; - - write = request->operation == BLKTAP_OP_WRITE; - - for (seg = 0; seg < request->nr_pages; seg++) { - if (write) - blktap_request_bounce(tap, request, seg, write); - - err = blktap_ring_map_segment(tap, request, seg); - if (err) - break; - } - - if (err) - blktap_ring_unmap_request(tap, request); - - return err; -} - -void -blktap_ring_unmap_request(struct blktap *tap, - struct blktap_request *request) -{ - struct blktap_ring *ring = &tap->ring; - unsigned long uaddr; - unsigned size; - int seg, read; - - uaddr = MMAP_VADDR(ring->user_vstart, request->usr_idx, 0); - size = request->nr_pages << PAGE_SHIFT; - read = request->operation == BLKTAP_OP_READ; - - if (read) - for (seg = 0; seg < request->nr_pages; seg++) - blktap_request_bounce(tap, request, seg, !read); - - zap_page_range(ring->vma, uaddr, size, NULL); -} - -void -blktap_ring_free_request(struct blktap *tap, - struct blktap_request *request) -{ - struct blktap_ring *ring = &tap->ring; - - ring->pending[request->usr_idx] = NULL; - ring->n_pending--; - - blktap_request_free(tap, request); -} - -struct blktap_request* -blktap_ring_make_request(struct blktap *tap) -{ - struct blktap_ring *ring = &tap->ring; - struct blktap_request *request; - int usr_idx; - - if (RING_FULL(&ring->ring)) - return ERR_PTR(-ENOSPC); - - request = blktap_request_alloc(tap); - if (!request) - return ERR_PTR(-ENOMEM); - - for (usr_idx = 0; usr_idx < BLKTAP_RING_SIZE; usr_idx++) - if (!ring->pending[usr_idx]) - break; - - BUG_ON(usr_idx >= BLKTAP_RING_SIZE); - - request->tap = tap; - request->usr_idx = usr_idx; - - ring->pending[usr_idx] = request; - ring->n_pending++; - - return request; -} - -void -blktap_ring_submit_request(struct blktap *tap, - struct blktap_request *request) -{ - struct blktap_ring *ring = &tap->ring; - blktap_ring_req_t *breq; - struct scatterlist *sg; - int i, nsecs = 0; - - dev_dbg(ring->dev, - "request %d [%p] submit\n", request->usr_idx, request); - - breq = RING_GET_REQUEST(&ring->ring, ring->ring.req_prod_pvt); - - breq->id = request->usr_idx; - breq->sector_number = blk_rq_pos(request->rq); - breq->__pad = 0; - breq->operation = request->operation; - breq->nr_segments = request->nr_pages; - - blktap_for_each_sg(sg, request, i) { - struct blktap_segment *seg = &breq->seg[i]; - int first, count; - - count = sg->length >> 9; - first = sg->offset >> 9; - - seg->first_sect = first; - seg->last_sect = first + count - 1; - - nsecs += count; - } - - ring->ring.req_prod_pvt++; - - do_gettimeofday(&request->time); - - - if (request->operation == BLKTAP_OP_WRITE) { - tap->stats.st_wr_sect += nsecs; - tap->stats.st_wr_req++; - } - - if (request->operation == BLKTAP_OP_READ) { - tap->stats.st_rd_sect += nsecs; - tap->stats.st_rd_req++; - } -} - -static int -blktap_ring_open(struct inode *inode, struct file *filp) -{ - struct blktap *tap = NULL; - int minor; - - minor = iminor(inode); - - if (minor < blktap_max_minor) - tap = blktaps[minor]; - - if (!tap) - return -ENXIO; - - if (test_bit(BLKTAP_SHUTDOWN_REQUESTED, &tap->dev_inuse)) - return -ENXIO; - - if (tap->ring.task) - return -EBUSY; - - filp->private_data = tap; - tap->ring.task = current; - - return 0; -} - -static int -blktap_ring_release(struct inode *inode, struct file *filp) -{ - struct blktap *tap = filp->private_data; - - blktap_device_destroy_sync(tap); - - tap->ring.task = NULL; - - if (test_bit(BLKTAP_SHUTDOWN_REQUESTED, &tap->dev_inuse)) - blktap_control_destroy_tap(tap); - - return 0; -} - -static int -blktap_ring_mmap(struct file *filp, struct vm_area_struct *vma) -{ - struct blktap *tap = filp->private_data; - struct blktap_ring *ring = &tap->ring; - blktap_sring_t *sring; - struct page *page = NULL; - int err; - - if (ring->vma) - return -EBUSY; - - page = alloc_page(GFP_KERNEL|__GFP_ZERO); - if (!page) - return -ENOMEM; - - SetPageReserved(page); - - err = vm_insert_page(vma, vma->vm_start, page); - if (err) - goto fail; - - sring = page_address(page); - SHARED_RING_INIT(sring); - FRONT_RING_INIT(&ring->ring, sring, PAGE_SIZE); - - ring->ring_vstart = vma->vm_start; - ring->user_vstart = ring->ring_vstart + PAGE_SIZE; - - vma->vm_private_data = tap; - - vma->vm_flags |= VM_DONTCOPY; - vma->vm_flags |= VM_RESERVED; - - vma->vm_ops = &blktap_ring_vm_operations; - - ring->vma = vma; - return 0; - -fail: - if (page) { - zap_page_range(vma, vma->vm_start, PAGE_SIZE, NULL); - ClearPageReserved(page); - __free_page(page); - } - - return err; -} - -static int -blktap_ring_ioctl(struct inode *inode, struct file *filp, - unsigned int cmd, unsigned long arg) -{ - struct blktap *tap = filp->private_data; - struct blktap_ring *ring = &tap->ring; - void __user *ptr = (void *)arg; - int err; - - BTDBG("%d: cmd: %u, arg: %lu\n", tap->minor, cmd, arg); - - if (!ring->vma || ring->vma->vm_mm != current->mm) - return -EACCES; - - switch(cmd) { - case BLKTAP_IOCTL_RESPOND: - - blktap_read_ring(tap); - return 0; - - case BLKTAP_IOCTL_CREATE_DEVICE_COMPAT: { - struct blktap_device_info info; - struct blktap2_params params; - - if (copy_from_user(¶ms, ptr, sizeof(params))) - return -EFAULT; - - info.capacity = params.capacity; - info.sector_size = params.sector_size; - info.flags = 0; - - err = blktap_device_create(tap, &info); - if (err) - return err; - - if (params.name[0]) { - strncpy(tap->name, params.name, sizeof(params.name)); - tap->name[sizeof(tap->name)-1] = 0; - } - - return 0; - } - - case BLKTAP_IOCTL_CREATE_DEVICE: { - struct blktap_device_info __user *ptr = (void *)arg; - struct blktap_device_info info; - unsigned long mask; - size_t base_sz, sz; - - mask = BLKTAP_DEVICE_FLAG_RO; - - memset(&info, 0, sizeof(info)); - sz = base_sz = BLKTAP_INFO_SIZE_AT(flags); - - if (copy_from_user(&info, ptr, sz)) - return -EFAULT; - - if (sz > base_sz) - if (copy_from_user(&info, ptr, sz)) - return -EFAULT; - - if (put_user(info.flags & mask, &ptr->flags)) - return -EFAULT; - - return blktap_device_create(tap, &info); - } - - case BLKTAP_IOCTL_REMOVE_DEVICE: - - return blktap_device_destroy(tap); - } - - return -ENOIOCTLCMD; -} - -static unsigned int blktap_ring_poll(struct file *filp, poll_table *wait) -{ - struct blktap *tap = filp->private_data; - struct blktap_ring *ring = &tap->ring; - int work; - - poll_wait(filp, &tap->pool->wait, wait); - poll_wait(filp, &ring->poll_wait, wait); - - down_read(¤t->mm->mmap_sem); - if (ring->vma && tap->device.gd) - blktap_device_run_queue(tap); - up_read(¤t->mm->mmap_sem); - - work = ring->ring.req_prod_pvt - ring->ring.sring->req_prod; - RING_PUSH_REQUESTS(&ring->ring); - - if (work || - ring->ring.sring->private.tapif_user.msg || - test_and_clear_bit(BLKTAP_DEVICE_CLOSED, &tap->dev_inuse)) - return POLLIN | POLLRDNORM; - - return 0; -} - -static struct file_operations blktap_ring_file_operations = { - .owner = THIS_MODULE, - .open = blktap_ring_open, - .release = blktap_ring_release, - .ioctl = blktap_ring_ioctl, - .mmap = blktap_ring_mmap, - .poll = blktap_ring_poll, -}; - -void -blktap_ring_kick_user(struct blktap *tap) -{ - wake_up(&tap->ring.poll_wait); -} - -int -blktap_ring_destroy(struct blktap *tap) -{ - struct blktap_ring *ring = &tap->ring; - - if (ring->task || ring->vma) - return -EBUSY; - - return 0; -} - -int -blktap_ring_create(struct blktap *tap) -{ - struct blktap_ring *ring = &tap->ring; - - init_waitqueue_head(&ring->poll_wait); - ring->devno = MKDEV(blktap_ring_major, tap->minor); - - return 0; -} - -size_t -blktap_ring_debug(struct blktap *tap, char *buf, size_t size) -{ - struct blktap_ring *ring = &tap->ring; - char *s = buf, *end = buf + size; - int usr_idx; - - s += snprintf(s, end - s, - "begin pending:%d\n", ring->n_pending); - - for (usr_idx = 0; usr_idx < BLKTAP_RING_SIZE; usr_idx++) { - struct blktap_request *request; - struct timeval *time; - int write; - - request = ring->pending[usr_idx]; - if (!request) - continue; - - write = request->operation == BLKTAP_OP_WRITE; - time = &request->time; - - s += snprintf(s, end - s, - "%02d: usr_idx:%02d " - "op:%c nr_pages:%02d time:%lu.%09lu\n", - usr_idx, request->usr_idx, - write ? 'W' : 'R', request->nr_pages, - time->tv_sec, time->tv_usec); - } - - s += snprintf(s, end - s, "end pending\n"); - - return s - buf; -} - - -int __init -blktap_ring_init(void) -{ - dev_t dev = 0; - int err; - - cdev_init(&blktap_ring_cdev, &blktap_ring_file_operations); - blktap_ring_cdev.owner = THIS_MODULE; - - err = alloc_chrdev_region(&dev, 0, MAX_BLKTAP_DEVICE, "blktap2"); - if (err < 0) { - BTERR("error registering ring devices: %d\n", err); - return err; - } - - err = cdev_add(&blktap_ring_cdev, dev, MAX_BLKTAP_DEVICE); - if (err) { - BTERR("error adding ring device: %d\n", err); - unregister_chrdev_region(dev, MAX_BLKTAP_DEVICE); - return err; - } - - blktap_ring_major = MAJOR(dev); - BTINFO("blktap ring major: %d\n", blktap_ring_major); - - return 0; -} - -void -blktap_ring_exit(void) -{ - if (!blktap_ring_major) - return; - - cdev_del(&blktap_ring_cdev); - unregister_chrdev_region(MKDEV(blktap_ring_major, 0), - MAX_BLKTAP_DEVICE); - - blktap_ring_major = 0; -} diff --git a/drivers/xen/blktap/sysfs.c b/drivers/xen/blktap/sysfs.c deleted file mode 100644 index 182de9a..0000000 --- a/drivers/xen/blktap/sysfs.c +++ /dev/null @@ -1,288 +0,0 @@ -#include <linux/types.h> -#include <linux/device.h> -#include <linux/module.h> -#include <linux/sched.h> -#include <linux/genhd.h> -#include <linux/blkdev.h> - -#include "blktap.h" - -int blktap_debug_level = 1; - -static struct class *class; - -static ssize_t -blktap_sysfs_set_name(struct device *dev, struct device_attribute *attr, const char *buf, size_t size) -{ - struct blktap *tap; - - tap = dev_get_drvdata(dev); - if (!tap) - return 0; - - if (size >= BLKTAP_NAME_MAX) - return -ENAMETOOLONG; - - if (strnlen(buf, size) != size) - return -EINVAL; - - strcpy(tap->name, buf); - - return size; -} - -static ssize_t -blktap_sysfs_get_name(struct device *dev, struct device_attribute *attr, char *buf) -{ - struct blktap *tap; - ssize_t size; - - tap = dev_get_drvdata(dev); - if (!tap) - return 0; - - if (tap->name[0]) - size = sprintf(buf, "%s\n", tap->name); - else - size = sprintf(buf, "%d\n", tap->minor); - - return size; -} -static DEVICE_ATTR(name, S_IRUGO|S_IWUSR, - blktap_sysfs_get_name, blktap_sysfs_set_name); - -static void -blktap_sysfs_remove_work(struct work_struct *work) -{ - struct blktap *tap - = container_of(work, struct blktap, remove_work); - blktap_control_destroy_tap(tap); -} - -static ssize_t -blktap_sysfs_remove_device(struct device *dev, - struct device_attribute *attr, - const char *buf, size_t size) -{ - struct blktap *tap; - int err; - - tap = dev_get_drvdata(dev); - if (!tap) - return size; - - if (test_and_set_bit(BLKTAP_SHUTDOWN_REQUESTED, &tap->dev_inuse)) - goto wait; - - if (tap->ring.vma) { - blktap_sring_t *sring = tap->ring.ring.sring; - sring->private.tapif_user.msg = BLKTAP_RING_MESSAGE_CLOSE; - blktap_ring_kick_user(tap); - } else { - INIT_WORK(&tap->remove_work, blktap_sysfs_remove_work); - schedule_work(&tap->remove_work); - } -wait: - err = wait_event_interruptible(tap->remove_wait, - !dev_get_drvdata(dev)); - if (err) - return err; - - return size; -} -static DEVICE_ATTR(remove, S_IWUSR, NULL, blktap_sysfs_remove_device); - -static ssize_t -blktap_sysfs_debug_device(struct device *dev, struct device_attribute *attr, char *buf) -{ - struct blktap *tap; - char *s = buf, *end = buf + PAGE_SIZE; - - tap = dev_get_drvdata(dev); - if (!tap) - return 0; - - s += blktap_control_debug(tap, s, end - s); - - s += blktap_request_debug(tap, s, end - s); - - s += blktap_device_debug(tap, s, end - s); - - s += blktap_ring_debug(tap, s, end - s); - - return s - buf; -} -static DEVICE_ATTR(debug, S_IRUGO, blktap_sysfs_debug_device, NULL); - -static ssize_t -blktap_sysfs_show_task(struct device *dev, struct device_attribute *attr, char *buf) -{ - struct blktap *tap; - ssize_t rv = 0; - - tap = dev_get_drvdata(dev); - if (!tap) - return 0; - - if (tap->ring.task) - rv = sprintf(buf, "%d\n", tap->ring.task->pid); - - return rv; -} -static DEVICE_ATTR(task, S_IRUGO, blktap_sysfs_show_task, NULL); - -static ssize_t -blktap_sysfs_show_pool(struct device *dev, - struct device_attribute *attr, - char *buf) -{ - struct blktap *tap = dev_get_drvdata(dev); - return sprintf(buf, "%s", kobject_name(&tap->pool->kobj)); -} - -static ssize_t -blktap_sysfs_store_pool(struct device *dev, - struct device_attribute *attr, - const char *buf, size_t size) -{ - struct blktap *tap = dev_get_drvdata(dev); - struct blktap_page_pool *pool, *tmp = tap->pool; - - if (tap->device.gd) - return -EBUSY; - - pool = blktap_page_pool_get(buf); - if (IS_ERR(pool)) - return PTR_ERR(pool); - - tap->pool = pool; - kobject_put(&tmp->kobj); - - return size; -} -DEVICE_ATTR(pool, S_IRUSR|S_IWUSR, - blktap_sysfs_show_pool, blktap_sysfs_store_pool); - -int -blktap_sysfs_create(struct blktap *tap) -{ - struct blktap_ring *ring = &tap->ring; - struct device *dev; - int err = 0; - - init_waitqueue_head(&tap->remove_wait); - - dev = device_create(class, NULL, ring->devno, - tap, "blktap%d", tap->minor); - if (IS_ERR(dev)) - err = PTR_ERR(dev); - if (!err) - err = device_create_file(dev, &dev_attr_name); - if (!err) - err = device_create_file(dev, &dev_attr_remove); - if (!err) - err = device_create_file(dev, &dev_attr_debug); - if (!err) - err = device_create_file(dev, &dev_attr_task); - if (!err) - err = device_create_file(dev, &dev_attr_pool); - if (!err) - ring->dev = dev; - else - device_unregister(dev); - - return err; -} - -void -blktap_sysfs_destroy(struct blktap *tap) -{ - struct blktap_ring *ring = &tap->ring; - struct device *dev; - - dev = ring->dev; - - if (!dev) - return; - - dev_set_drvdata(dev, NULL); - wake_up(&tap->remove_wait); - - device_unregister(dev); - ring->dev = NULL; -} - -static ssize_t -blktap_sysfs_show_verbosity(struct class *class, char *buf) -{ - return sprintf(buf, "%d\n", blktap_debug_level); -} - -static ssize_t -blktap_sysfs_set_verbosity(struct class *class, const char *buf, size_t size) -{ - int level; - - if (sscanf(buf, "%d", &level) == 1) { - blktap_debug_level = level; - return size; - } - - return -EINVAL; -} -static CLASS_ATTR(verbosity, S_IRUGO|S_IWUSR, - blktap_sysfs_show_verbosity, blktap_sysfs_set_verbosity); - -static ssize_t -blktap_sysfs_show_devices(struct class *class, char *buf) -{ - int i, ret; - struct blktap *tap; - - mutex_lock(&blktap_lock); - - ret = 0; - for (i = 0; i < blktap_max_minor; i++) { - tap = blktaps[i]; - if (!tap) - continue; - - if (!test_bit(BLKTAP_DEVICE, &tap->dev_inuse)) - continue; - - ret += sprintf(buf + ret, "%d %s\n", tap->minor, tap->name); - } - - mutex_unlock(&blktap_lock); - - return ret; -} -static CLASS_ATTR(devices, S_IRUGO, blktap_sysfs_show_devices, NULL); - -void -blktap_sysfs_exit(void) -{ - if (class) - class_destroy(class); -} - -int __init -blktap_sysfs_init(void) -{ - struct class *cls; - int err = 0; - - cls = class_create(THIS_MODULE, "blktap2"); - if (IS_ERR(cls)) - err = PTR_ERR(cls); - if (!err) - err = class_create_file(cls, &class_attr_verbosity); - if (!err) - err = class_create_file(cls, &class_attr_devices); - if (!err) - class = cls; - else - class_destroy(cls); - - return err; -} -- 1.7.0.4 _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |