[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] [RFC v1 2/5] VBD: enlarge max segment per request in blkfront
add segring support in blkfront Signed-off-by: Ronghui Duan <ronghui.duan@xxxxxxxxx> diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index a263faf..b9f383d 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -76,10 +76,23 @@ struct blk_shadow { unsigned long frame[BLKIF_MAX_SEGMENTS_PER_REQUEST]; }; +struct blk_req_shadow { + struct blkif_request_header req; + struct request *request; +}; + +struct blk_seg_shadow { + uint64_t id; + struct blkif_request_segment req; + unsigned long frame; +}; + static DEFINE_MUTEX(blkfront_mutex); static const struct block_device_operations xlvbd_block_fops; #define BLK_RING_SIZE __CONST_RING_SIZE(blkif, PAGE_SIZE) +#define BLK_REQ_RING_SIZE __CONST_RING_SIZE(blkif_request, PAGE_SIZE) +#define BLK_SEG_RING_SIZE __CONST_RING_SIZE(blkif_segment, PAGE_SIZE) /* * We have one of these per vbd, whether ide, scsi or 'other'. They @@ -96,22 +109,30 @@ struct blkfront_info blkif_vdev_t handle; enum blkif_state connected; int ring_ref; + int reqring_ref; + int segring_ref; struct blkif_front_ring ring; + struct blkif_request_front_ring reqring; + struct blkif_segment_front_ring segring; struct scatterlist *sg; unsigned int evtchn, irq; struct request_queue *rq; struct work_struct work; struct gnttab_free_callback callback; struct blk_shadow *shadow; + struct blk_req_shadow *req_shadow; + struct blk_seg_shadow *seg_shadow; struct blk_front_operations *ops; enum blkif_ring_type ring_type; unsigned long shadow_free; + unsigned long seg_shadow_free; unsigned int feature_flush; unsigned int flush_op; unsigned int feature_discard:1; unsigned int feature_secdiscard:1; unsigned int discard_granularity; unsigned int discard_alignment; + unsigned long last_id; int is_ready; }; @@ -124,7 +145,7 @@ static struct blk_front_operations { unsigned long (*get_id) (struct blkfront_info *info); void (*add_id) (struct blkfront_info *info, unsigned long id); void (*save_seg_shadow) (struct blkfront_info *info, unsigned long mfn, - unsigned long id, int i); + unsigned long id, int i, struct blkif_request_segment *ring_seg); void (*save_req_shadow) (struct blkfront_info *info, struct request *req, unsigned long id); struct request *(*get_req_from_shadow)(struct blkfront_info *info, @@ -136,14 +157,16 @@ static struct blk_front_operations { void (*update_rsp_event) (struct blkfront_info *info, int i); void (*update_rsp_cons) (struct blkfront_info *info); void (*update_req_prod_pvt) (struct blkfront_info *info); + void (*update_segment_rsp_cons) (struct blkfront_info *info, unsigned long id); void (*ring_push) (struct blkfront_info *info, int *notify); int (*recover) (struct blkfront_info *info); int (*ring_full) (struct blkfront_info *info); + int (*segring_full) (struct blkfront_info *info, unsigned int nr_segments); int (*setup_blkring) (struct xenbus_device *dev, struct blkfront_info *info); void (*free_blkring) (struct blkfront_info *info, int suspend); void (*blkif_completion) (struct blkfront_info *info, unsigned long id); unsigned int max_seg; -} blk_front_ops; +} blk_front_ops, blk_front_ops_v2; static unsigned int nr_minors; static unsigned long *minors; @@ -179,6 +202,24 @@ static unsigned long get_id_from_freelist(struct blkfront_info *info) return free; } +static unsigned long get_id_from_freelist_v2(struct blkfront_info *info) +{ + unsigned long free = info->shadow_free; + BUG_ON(free >= BLK_REQ_RING_SIZE); + info->shadow_free = info->req_shadow[free].req.u.rw.id; + info->req_shadow[free].req.u.rw.id = 0x0fffffee; /* debug */ + return free; +} + +static unsigned long get_seg_shadow_id(struct blkfront_info *info) +{ + unsigned long free = info->seg_shadow_free; + BUG_ON(free >= BLK_SEG_RING_SIZE); + info->seg_shadow_free = info->seg_shadow[free].id; + info->seg_shadow[free].id = 0x0fffffee; /* debug */ + return free; +} + void add_id_to_freelist(struct blkfront_info *info, unsigned long id) { @@ -187,6 +228,21 @@ void add_id_to_freelist(struct blkfront_info *info, info->shadow_free = id; } +static void add_id_to_freelist_v2(struct blkfront_info *info, + unsigned long id) +{ + info->req_shadow[id].req.u.rw.id = info->shadow_free; + info->req_shadow[id].request = NULL; + info->shadow_free = id; +} + +static void free_seg_shadow_id(struct blkfront_info *info, + unsigned long id) +{ + info->seg_shadow[id].id = info->seg_shadow_free; + info->seg_shadow_free = id; +} + static int xlbd_reserve_minors(unsigned int minor, unsigned int nr) { unsigned int end = minor + nr; @@ -299,6 +355,14 @@ void *ring_get_request(struct blkfront_info *info) return (void *)RING_GET_REQUEST(&info->ring, info->ring.req_prod_pvt); } +void *ring_get_request_v2(struct blkfront_info *info) +{ + struct blkif_request_header *ring_req; + ring_req = RING_GET_REQUEST(&info->reqring, + info->reqring.req_prod_pvt); + return (void *)ring_req; +} + struct blkif_request_segment *ring_get_segment(struct blkfront_info *info, int i) { struct blkif_request *ring_req = @@ -306,12 +370,34 @@ struct blkif_request_segment *ring_get_segment(struct blkfront_info *info, int i return &ring_req->u.rw.seg[i]; } -void save_seg_shadow(struct blkfront_info *info, - unsigned long mfn, unsigned long id, int i) +struct blkif_request_segment *ring_get_segment_v2(struct blkfront_info *info, int i) +{ + return RING_GET_REQUEST(&info->segring, info->segring.req_prod_pvt++); +} + +void save_seg_shadow(struct blkfront_info *info, unsigned long mfn, + unsigned long id, int i, struct blkif_request_segment *ring_seg) { info->shadow[id].frame[i] = mfn_to_pfn(mfn); } +void save_seg_shadow_v2(struct blkfront_info *info, unsigned long mfn, + unsigned long id, int i, struct blkif_request_segment *ring_seg) +{ + struct blkif_request_header *ring_req; + unsigned long seg_id = get_seg_shadow_id(info); + + ring_req = (struct blkif_request_header *)info->ops->ring_get_request(info); + if (i == 0) + ring_req->u.rw.seg_id = seg_id; + else + info->seg_shadow[info->last_id].id = seg_id; + info->seg_shadow[seg_id].frame = mfn_to_pfn(mfn); + memcpy(&(info->seg_shadow[seg_id].req), ring_seg, + sizeof(struct blkif_request_segment)); + info->last_id = seg_id; +} + void save_req_shadow(struct blkfront_info *info, struct request *req, unsigned long id) { @@ -321,10 +407,34 @@ void save_req_shadow(struct blkfront_info *info, info->shadow[id].request = req; } +void save_req_shadow_v2(struct blkfront_info *info, + struct request *req, unsigned long id) +{ + struct blkif_request_header *ring_req = + (struct blkif_request_header *)info->ops->ring_get_request(info); + info->req_shadow[id].req = *ring_req; + info->req_shadow[id].request = req; +} + void update_req_prod_pvt(struct blkfront_info *info) { info->ring.req_prod_pvt++; } + +void update_req_prod_pvt_v2(struct blkfront_info *info) +{ + info->reqring.req_prod_pvt++; +} + +int segring_full(struct blkfront_info *info, unsigned int nr_segments) +{ + return 0; +} + +int segring_full_v2(struct blkfront_info *info, unsigned int nr_segments) +{ + return nr_segments > RING_FREE_REQUESTS(&info->segring); +} /* * Generate a Xen blkfront IO request from a blk layer request. Reads * and writes are handled as expected. @@ -347,19 +457,18 @@ static int blkif_queue_request(struct request *req) return 1; if (gnttab_alloc_grant_references( - BLKIF_MAX_SEGMENTS_PER_REQUEST, &gref_head) < 0) { + info->ops->max_seg, &gref_head) < 0) { gnttab_request_free_callback( &info->callback, blkif_restart_queue_callback, info, - BLKIF_MAX_SEGMENTS_PER_REQUEST); + info->ops->max_seg); return 1; } /* Fill out a communications ring structure. */ ring_req = (struct blkif_request *)info->ops->ring_get_request(info); id = info->ops->get_id(info); - //info->shadow[id].request = req; ring_req->u.rw.id = id; ring_req->u.rw.sector_number = (blkif_sector_t)blk_rq_pos(req); @@ -392,6 +501,9 @@ static int blkif_queue_request(struct request *req) info->sg); BUG_ON(ring_req->u.rw.nr_segments > info->ops->max_seg); + if (info->ops->segring_full(info, ring_req->u.rw.nr_segments)) + goto wait; + for_each_sg(info->sg, sg, ring_req->u.rw.nr_segments, i) { buffer_mfn = pfn_to_mfn(page_to_pfn(sg_page(sg))); fsect = sg->offset >> 9; @@ -411,7 +523,7 @@ static int blkif_queue_request(struct request *req) .gref = ref, .first_sect = fsect, .last_sect = lsect }; - info->ops->save_seg_shadow(info, buffer_mfn, id, i); + info->ops->save_seg_shadow(info, buffer_mfn, id, i, ring_seg); } } @@ -423,6 +535,11 @@ static int blkif_queue_request(struct request *req) gnttab_free_grant_references(gref_head); return 0; +wait: + gnttab_free_grant_references(gref_head); + pr_debug("No enough segment!\n"); + info->ops->add_id(info, id); + return 1; } void ring_push(struct blkfront_info *info, int *notify) @@ -430,6 +547,13 @@ void ring_push(struct blkfront_info *info, int *notify) RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&info->ring, *notify); } +void ring_push_v2(struct blkfront_info *info, int *notify) +{ + RING_PUSH_REQUESTS(&info->segring); + RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&info->reqring, *notify); +} + + static inline void flush_requests(struct blkfront_info *info) { int notify; @@ -440,6 +564,16 @@ static inline void flush_requests(struct blkfront_info *info) notify_remote_via_irq(info->irq); } +static int ring_free_v2(struct blkfront_info *info) +{ + return (!RING_FULL(&info->reqring) && + RING_FREE_REQUESTS(&info->segring) > RING_SIZE(&info->segring)/3); +} +static int ring_full_v2(struct blkfront_info *info) +{ + return (RING_FULL(&info->reqring) || RING_FULL(&info->segring)); +} + /* * do_blkif_request * read a block; request is in a request queue @@ -490,6 +624,17 @@ wait: flush_requests(info); } +static void update_blk_queue(struct blkfront_info *info) +{ + struct request_queue *q = info->rq; + + blk_queue_max_segments(q, info->ops->max_seg); + blk_queue_max_hw_sectors(q, queue_max_segments(q) * + queue_max_segment_size(q) / + queue_logical_block_size(q)); + return; +} + static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size) { struct request_queue *rq; @@ -740,7 +885,7 @@ static void xlvbd_release_gendisk(struct blkfront_info *info) static void kick_pending_request_queues(struct blkfront_info *info) { - if (!ring_full(info)) { + if (!info->ops->ring_full(info)) { /* Re-enable calldowns. */ blk_start_queue(info->rq); /* Kick things off immediately. */ @@ -793,39 +938,115 @@ static void blkif_completion(struct blkfront_info *info, unsigned long id) gnttab_end_foreign_access(s->req.u.rw.seg[i].gref, 0, 0UL); } +static void blkif_completion_v2(struct blkfront_info *info, unsigned long id) +{ + int i; + /* Do not let BLKIF_OP_DISCARD as nr_segment is in the same place + * flag. */ + unsigned short nr = info->req_shadow[id].req.u.rw.nr_segments; + unsigned long shadow_id, free_id; + + shadow_id = info->req_shadow[id].req.u.rw.seg_id; + for (i = 0; i < nr; i++) { + gnttab_end_foreign_access(info->seg_shadow[shadow_id].req.gref, 0, 0UL); + free_id = shadow_id; + shadow_id = info->seg_shadow[shadow_id].id; + free_seg_shadow_id(info, free_id); + } +} + struct blkif_response *ring_get_response(struct blkfront_info *info) { return RING_GET_RESPONSE(&info->ring, info->ring.rsp_cons); } + +struct blkif_response *ring_get_response_v2(struct blkfront_info *info) +{ + return RING_GET_RESPONSE(&info->reqring, info->reqring.rsp_cons); +} + RING_IDX get_rsp_prod(struct blkfront_info *info) { return info->ring.sring->rsp_prod; } + +RING_IDX get_rsp_prod_v2(struct blkfront_info *info) +{ + return info->reqring.sring->rsp_prod; +} + RING_IDX get_rsp_cons(struct blkfront_info *info) { return info->ring.rsp_cons; } + +RING_IDX get_rsp_cons_v2(struct blkfront_info *info) +{ + return info->reqring.rsp_cons; +} + struct request *get_req_from_shadow(struct blkfront_info *info, unsigned long id) { return info->shadow[id].request; } + +struct request *get_req_from_shadow_v2(struct blkfront_info *info, + unsigned long id) +{ + return info->req_shadow[id].request; +} + void update_rsp_cons(struct blkfront_info *info) { info->ring.rsp_cons++; } + +void update_rsp_cons_v2(struct blkfront_info *info) +{ + info->reqring.rsp_cons++; +} + RING_IDX get_req_prod_pvt(struct blkfront_info *info) { return info->ring.req_prod_pvt; } + +RING_IDX get_req_prod_pvt_v2(struct blkfront_info *info) +{ + return info->reqring.req_prod_pvt; +} + void check_left_response(struct blkfront_info *info, int *more_to_do) { RING_FINAL_CHECK_FOR_RESPONSES(&info->ring, *more_to_do); } + +void check_left_response_v2(struct blkfront_info *info, int *more_to_do) +{ + RING_FINAL_CHECK_FOR_RESPONSES(&info->reqring, *more_to_do); +} + void update_rsp_event(struct blkfront_info *info, int i) { info->ring.sring->rsp_event = i + 1; } + +void update_rsp_event_v2(struct blkfront_info *info, int i) +{ + info->reqring.sring->rsp_event = i + 1; +} + +void update_segment_rsp_cons(struct blkfront_info *info, unsigned long id) +{ + return; +} + +void update_segment_rsp_cons_v2(struct blkfront_info *info, unsigned long id) +{ + info->segring.rsp_cons += info->req_shadow[id].req.u.rw.nr_segments; + return; +} static irqreturn_t blkif_interrupt(int irq, void *dev_id) { struct request *req; @@ -903,8 +1124,8 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id) if (unlikely(bret->status != BLKIF_RSP_OKAY)) dev_dbg(&info->xbdev->dev, "Bad return from blkdev data " "request: %x\n", bret->status); - __blk_end_request_all(req, error); + info->ops->update_segment_rsp_cons(info, id); break; default: BUG(); @@ -949,6 +1170,43 @@ static int init_shadow(struct blkfront_info *info) return 0; } +static int init_shadow_v2(struct blkfront_info *info) +{ + unsigned int ring_size; + int i; + + if (info->ring_type != RING_TYPE_UNDEFINED) + return 0; + + info->ring_type = RING_TYPE_2; + + ring_size = BLK_REQ_RING_SIZE; + info->req_shadow = kzalloc(sizeof(struct blk_req_shadow) * ring_size, + GFP_KERNEL); + if (!info->req_shadow) + return -ENOMEM; + + for (i = 0; i < ring_size; i++) + info->req_shadow[i].req.u.rw.id = i+1; + info->req_shadow[ring_size - 1].req.u.rw.id = 0x0fffffff; + + ring_size = BLK_SEG_RING_SIZE; + + info->seg_shadow = kzalloc(sizeof(struct blk_seg_shadow) * ring_size, + GFP_KERNEL); + if (!info->seg_shadow) { + kfree(info->req_shadow); + return -ENOMEM; + } + + for (i = 0; i < ring_size; i++) { + info->seg_shadow[i].id = i+1; + } + info->seg_shadow[ring_size - 1].id = 0x0fffffff; + + return 0; +} + static int setup_blkring(struct xenbus_device *dev, struct blkfront_info *info) { @@ -1003,6 +1261,84 @@ fail: return err; } +static int setup_blkring_v2(struct xenbus_device *dev, + struct blkfront_info *info) +{ + struct blkif_request_sring *sring; + struct blkif_segment_sring *seg_sring; + int err; + + info->reqring_ref = GRANT_INVALID_REF; + + sring = (struct blkif_request_sring *)__get_free_page(GFP_NOIO | __GFP_HIGH); + if (!sring) { + xenbus_dev_fatal(dev, -ENOMEM, "allocating shared ring"); + return -ENOMEM; + } + SHARED_RING_INIT(sring); + FRONT_RING_INIT(&info->reqring, sring, PAGE_SIZE); + + err = xenbus_grant_ring(dev, virt_to_mfn(info->reqring.sring)); + if (err < 0) { + free_page((unsigned long)sring); + info->reqring.sring = NULL; + goto fail; + } + + info->reqring_ref = err; + + info->segring_ref = GRANT_INVALID_REF; + + seg_sring = (struct blkif_segment_sring *)__get_free_page(GFP_NOIO | __GFP_HIGH); + if (!seg_sring) { + xenbus_dev_fatal(dev, -ENOMEM, "allocating shared ring"); + err = -ENOMEM; + goto fail; + } + SHARED_RING_INIT(seg_sring); + FRONT_RING_INIT(&info->segring, seg_sring, PAGE_SIZE); + + err = xenbus_grant_ring(dev, virt_to_mfn(info->segring.sring)); + if (err < 0) { + free_page((unsigned long)seg_sring); + info->segring.sring = NULL; + goto fail; + } + + info->segring_ref = err; + + info->sg = kzalloc(sizeof(struct scatterlist) * info->ops->max_seg, + GFP_KERNEL); + if (!info->sg) { + err = -ENOMEM; + goto fail; + } + sg_init_table(info->sg, info->ops->max_seg); + + err = init_shadow_v2(info); + if (err) + goto fail; + + err = xenbus_alloc_evtchn(dev, &info->evtchn); + if (err) + goto fail; + + err = bind_evtchn_to_irqhandler(info->evtchn, + blkif_interrupt, + IRQF_SAMPLE_RANDOM, "blkif", info); + if (err <= 0) { + xenbus_dev_fatal(dev, err, + "bind_evtchn_to_irqhandler failed"); + goto fail; + } + info->irq = err; + + return 0; +fail: + blkif_free(info, 0); + return err; +} + static void free_blkring(struct blkfront_info *info, int suspend) { if (info->ring_ref != GRANT_INVALID_REF) { @@ -1018,6 +1354,32 @@ static void free_blkring(struct blkfront_info *info, int suspend) kfree(info->shadow); } +static void free_blkring_v2(struct blkfront_info *info, int suspend) +{ + if (info->reqring_ref != GRANT_INVALID_REF) { + gnttab_end_foreign_access(info->reqring_ref, 0, + (unsigned long)info->reqring.sring); + info->reqring_ref = GRANT_INVALID_REF; + info->reqring.sring = NULL; + } + + if (info->segring_ref != GRANT_INVALID_REF) { + gnttab_end_foreign_access(info->segring_ref, 0, + (unsigned long)info->segring.sring); + info->segring_ref = GRANT_INVALID_REF; + info->segring.sring = NULL; + } + + kfree(info->sg); + + if(!suspend) { + kfree(info->req_shadow); + kfree(info->seg_shadow); + } + +} + + /* Common code used when first setting up, and when resuming. */ static int talk_to_blkback(struct xenbus_device *dev, struct blkfront_info *info) @@ -1025,9 +1387,17 @@ static int talk_to_blkback(struct xenbus_device *dev, const char *message = NULL; struct xenbus_transaction xbt; int err; + unsigned int type; /* register ring ops */ - info->ops = &blk_front_ops; + err = xenbus_scanf(XBT_NIL, dev->otherend, "blkback-ring-type", "%u", + &type); + if (err != 1) + type = 1; + if (type == 2) + info->ops = &blk_front_ops_v2; + else + info->ops = &blk_front_ops; /* Create shared ring, alloc event channel. */ err = info->ops->setup_blkring(dev, info); @@ -1040,13 +1410,6 @@ again: xenbus_dev_fatal(dev, err, "starting transaction"); goto destroy_blkring; } - - err = xenbus_printf(xbt, dev->nodename, - "ring-ref", "%u", info->ring_ref); - if (err) { - message = "writing ring-ref"; - goto abort_transaction; - } err = xenbus_printf(xbt, dev->nodename, "event-channel", "%u", info->evtchn); if (err) { @@ -1059,7 +1422,40 @@ again: message = "writing protocol"; goto abort_transaction; } - + if (type == 1) { + err = xenbus_printf(xbt, dev->nodename, + "ring-ref", "%u", info->ring_ref); + if (err) { + message = "writing ring-ref"; + goto abort_transaction; + } + err = xenbus_printf(xbt, dev->nodename, "blkfront-ring-type", + "%u", type); + if (err) { + message = "writing blkfront ring type"; + goto abort_transaction; + } + } + if (type == 2) { + err = xenbus_printf(xbt, dev->nodename, + "reqring-ref", "%u", info->reqring_ref); + if (err) { + message = "writing reqring-ref"; + goto abort_transaction; + } + err = xenbus_printf(xbt, dev->nodename, + "segring-ref", "%u", info->segring_ref); + if (err) { + message = "writing segring-ref"; + goto abort_transaction; + } + err = xenbus_printf(xbt, dev->nodename, "blkfront-ring-type", + "%u", type); + if (err) { + message = "writing blkfront ring type"; + goto abort_transaction; + } + } err = xenbus_transaction_end(xbt, 0); if (err) { if (err == -EAGAIN) @@ -1164,7 +1560,7 @@ static int blkfront_probe(struct xenbus_device *dev, } -static int blkif_recover(struct blkfront_info *info) +static int recover_from_v1_to_v1(struct blkfront_info *info) { int i; struct blkif_request *req; @@ -1233,6 +1629,372 @@ static int blkif_recover(struct blkfront_info *info) return 0; } +/* migrate from V2 type ring to V1 type*/ +static int recover_from_v2_to_v1(struct blkfront_info *info) +{ + struct blk_req_shadow *copy; + struct blk_seg_shadow *seg_copy; + struct request *req; + struct blkif_request *new_req; + int i, j, err; + unsigned int req_rs; + struct bio *biolist = NULL, *biotail = NULL, *bio; + unsigned long index; + unsigned long flags; + + pr_info("Warning, migrate to older backend, some io may fail\n"); + + /* Stage 1: Init the new shadow state. */ + info->ring_type = RING_TYPE_UNDEFINED; + err = init_shadow(info); + if (err) + return err; + + req_rs = BLK_REQ_RING_SIZE; + + /* Stage 2: Set up free list. */ + info->shadow_free = info->ring.req_prod_pvt; + + /* Stage 3: Find pending requests and requeue them. */ + for (i = 0; i < req_rs; i++) { + req = info->req_shadow[i].request; + /* Not in use? */ + if (!req) + continue; + + if (ring_full(info)) + goto out; + + copy = &info->req_shadow[i]; + + /* We get a new request, reset the blkif request and shadow state. */ + new_req = RING_GET_REQUEST(&info->ring, info->ring.req_prod_pvt); + + if (copy->req.operation == BLKIF_OP_DISCARD) { + new_req->operation = BLKIF_OP_DISCARD; + new_req->u.discard = copy->req.u.discard; + new_req->u.discard.id = get_id_from_freelist(info); + info->shadow[new_req->u.discard.id].request = req; + } + else { + if (copy->req.u.rw.nr_segments > BLKIF_MAX_SEGMENTS_PER_REQUEST) + continue; + + new_req->u.rw.id = get_id_from_freelist(info); + info->shadow[new_req->u.rw.id].request = req; + new_req->operation = copy->req.operation; + new_req->u.rw.nr_segments = copy->req.u.rw.nr_segments; + new_req->u.rw.handle = copy->req.u.rw.handle; + new_req->u.rw.sector_number = copy->req.u.rw.sector_number; + index = copy->req.u.rw.seg_id; + for (j = 0; j < new_req->u.rw.nr_segments; j++) { + seg_copy = &info->seg_shadow[index]; + new_req->u.rw.seg[j].gref = seg_copy->req.gref; + new_req->u.rw.seg[j].first_sect = seg_copy->req.first_sect; + new_req->u.rw.seg[j].last_sect = seg_copy->req.last_sect; + info->shadow[new_req->u.rw.id].frame[j] = seg_copy->frame; + gnttab_grant_foreign_access_ref( + new_req->u.rw.seg[j].gref, + info->xbdev->otherend_id, + pfn_to_mfn(info->shadow[new_req->u.rw.id].frame[j]), + rq_data_dir(info->shadow[new_req->u.rw.id].request)); + index = info->seg_shadow[index].id; + } + } + info->shadow[new_req->u.rw.id].req = *new_req; + info->ring.req_prod_pvt++; + info->req_shadow[i].request = NULL; + + } +out: + xenbus_switch_state(info->xbdev, XenbusStateConnected); + + spin_lock_irqsave(&info->io_lock, flags); + + /* cancel the request and resubmit the bio */ + for (i = 0; i < req_rs; i++) { + req = info->req_shadow[i].request; + if (!req) + continue; + + blkif_completion_v2(info, i); + + if (biolist == NULL) + biolist = req->bio; + else + biotail->bi_next = req->bio; + biotail = req->biotail; + req->bio = NULL; + __blk_put_request(info->rq, req); + } + + while ((req = blk_peek_request(info->rq)) != NULL) { + + blk_start_request(req); + + if (biolist == NULL) + biolist = req->bio; + else + biotail->bi_next = req->bio; + biotail = req->biotail; + req->bio = NULL; + __blk_put_request(info->rq, req); + } + + /* Now safe for us to use the shared ring */ + info->connected = BLKIF_STATE_CONNECTED; + + /* need update the queue limit setting */ + update_blk_queue(info); + + /* Send off requeued requests */ + flush_requests(info); + + /* Kick any other new requests queued since we resumed */ + kick_pending_request_queues(info); + + spin_unlock_irqrestore(&info->io_lock, flags); + + /* free original shadow*/ + kfree(info->seg_shadow); + kfree(info->req_shadow); + + while(biolist) { + bio = biolist; + biolist = biolist->bi_next; + bio->bi_next = NULL; + submit_bio(bio->bi_rw, bio); + } + + return 0; +} + +static int blkif_recover(struct blkfront_info *info) +{ + int rc; + + if (info->ring_type == RING_TYPE_1) + rc = recover_from_v1_to_v1(info); + else if (info->ring_type == RING_TYPE_2) + rc = recover_from_v2_to_v1(info); + else + rc = -EPERM; + return rc; +} + +static int recover_from_v1_to_v2(struct blkfront_info *info) +{ + int i,err; + struct blkif_request_header *req; + struct blkif_request_segment *segring_req; + struct blk_shadow *copy; + int j; + unsigned long seg_id, last_id = 0x0fffffff; + + /* Stage 1: Init the new shadow. */ + info->ring_type = RING_TYPE_UNDEFINED; + err = init_shadow_v2(info); + if (err) + return err; + + /* Stage 2: Set up free list. */ + info->shadow_free = info->reqring.req_prod_pvt; + info->seg_shadow_free = info->segring.req_prod_pvt; + + /* Stage 3: Find pending requests and requeue them. */ + for (i = 0; i < BLK_RING_SIZE; i++) { + copy = &info->shadow[i]; + /* Not in use? */ + if (!copy->request) + continue; + + /* We get a new request, reset the blkif request and shadow state. */ + req = RING_GET_REQUEST(&info->reqring, info->reqring.req_prod_pvt); + + if (copy->req.operation == BLKIF_OP_DISCARD) { + req->operation = BLKIF_OP_DISCARD; + req->u.discard = copy->req.u.discard; + req->u.discard.id = get_id_from_freelist_v2(info); + info->req_shadow[req->u.discard.id].request = copy->request; + info->req_shadow[req->u.discard.id].req = *req; + } + else { + req->u.rw.id = get_id_from_freelist_v2(info); + req->operation = copy->req.operation; + req->u.rw.nr_segments = copy->req.u.rw.nr_segments; + req->u.rw.handle = copy->req.u.rw.handle; + req->u.rw.sector_number = copy->req.u.rw.sector_number; + for (j = 0; j < req->u.rw.nr_segments; j++) { + seg_id = get_seg_shadow_id(info); + if (j == 0) + req->u.rw.seg_id = seg_id; + else + info->seg_shadow[last_id].id = seg_id; + segring_req = RING_GET_REQUEST(&info->segring, info->segring.req_prod_pvt); + segring_req->gref = copy->req.u.rw.seg[j].gref; + segring_req->first_sect = copy->req.u.rw.seg[j].first_sect; + segring_req->last_sect = copy->req.u.rw.seg[j].last_sect; + info->seg_shadow[seg_id].req = *segring_req; + info->seg_shadow[seg_id].frame = copy->frame[j]; + info->segring.req_prod_pvt++; + gnttab_grant_foreign_access_ref( + segring_req->gref, + info->xbdev->otherend_id, + pfn_to_mfn(copy->frame[j]), + rq_data_dir(copy->request)); + last_id = seg_id; + } + info->req_shadow[req->u.rw.id].req = *req; + info->req_shadow[req->u.rw.id].request = copy->request; + } + + info->reqring.req_prod_pvt++; + } + + /* need update the queue limit setting */ + update_blk_queue(info); + + /* free original shadow*/ + kfree(info->shadow); + + xenbus_switch_state(info->xbdev, XenbusStateConnected); + + spin_lock_irq(&info->io_lock); + + /* Now safe for us to use the shared ring */ + info->connected = BLKIF_STATE_CONNECTED; + + /* Send off requeued requests */ + flush_requests(info); + + /* Kick any other new requests queued since we resumed */ + kick_pending_request_queues(info); + + spin_unlock_irq(&info->io_lock); + + return 0; +} + +static int recover_from_v2_to_v2(struct blkfront_info *info) +{ + int i; + struct blkif_request_header *req; + struct blkif_request_segment *segring_req; + struct blk_req_shadow *copy; + struct blk_seg_shadow *seg_copy; + unsigned long index = 0x0fffffff, seg_id, last_id = 0x0fffffff; + int j; + unsigned int req_rs, seg_rs; + unsigned long flags; + + req_rs = BLK_REQ_RING_SIZE; + seg_rs = BLK_SEG_RING_SIZE; + + /* Stage 1: Make a safe copy of the shadow state. */ + copy = kmalloc(sizeof(struct blk_req_shadow) * req_rs, + GFP_NOIO | __GFP_REPEAT | __GFP_HIGH); + if (!copy) + return -ENOMEM; + + seg_copy = kmalloc(sizeof(struct blk_seg_shadow) * seg_rs, + GFP_NOIO | __GFP_REPEAT | __GFP_HIGH); + if (!seg_copy ) { + kfree(copy); + return -ENOMEM; + } + + memcpy(copy, info->req_shadow, sizeof(struct blk_req_shadow) * req_rs); + memcpy(seg_copy, info->seg_shadow, + sizeof(struct blk_seg_shadow) * seg_rs); + + /* Stage 2: Set up free list. */ + for (i = 0; i < req_rs; i++) + info->req_shadow[i].req.u.rw.id = i+1; + info->req_shadow[req_rs - 1].req.u.rw.id = 0x0fffffff; + + for (i = 0; i < seg_rs; i++) + info->seg_shadow[i].id = i+1; + info->seg_shadow[seg_rs - 1].id = 0x0fffffff; + + info->shadow_free = info->reqring.req_prod_pvt; + info->seg_shadow_free = info->segring.req_prod_pvt; + + /* Stage 3: Find pending requests and requeue them. */ + for (i = 0; i < req_rs; i++) { + /* Not in use? */ + if (!copy[i].request) + continue; + + req = RING_GET_REQUEST(&info->reqring, info->reqring.req_prod_pvt); + *req = copy[i].req; + + req->u.rw.id = get_id_from_freelist_v2(info); + memcpy(&info->req_shadow[req->u.rw.id], ©[i], sizeof(copy[i])); + + if (req->operation != BLKIF_OP_DISCARD) { + for (j = 0; j < req->u.rw.nr_segments; j++) { + seg_id = get_seg_shadow_id(info); + if (j == 0) + index = req->u.rw.seg_id; + else + index = seg_copy[index].id ; + gnttab_grant_foreign_access_ref( + seg_copy[index].req.gref, + info->xbdev->otherend_id, + pfn_to_mfn(seg_copy[index].frame), + rq_data_dir(info->req_shadow[req->u.rw.id].request)); + segring_req = RING_GET_REQUEST(&info->segring, info->segring.req_prod_pvt); + memcpy(segring_req, &(seg_copy[index].req), + sizeof(struct blkif_request_segment)); + if (j == 0) + req->u.rw.seg_id = seg_id; + else + info->seg_shadow[last_id].id = seg_id; + + memcpy(&info->seg_shadow[seg_id], + &seg_copy[index], sizeof(struct blk_seg_shadow)); + info->segring.req_prod_pvt++; + last_id = seg_id; + } + } + info->req_shadow[req->u.rw.id].req = *req; + + info->reqring.req_prod_pvt++; + } + + kfree(seg_copy); + kfree(copy); + + xenbus_switch_state(info->xbdev, XenbusStateConnected); + + spin_lock_irqsave(&info->io_lock, flags); + + /* Now safe for us to use the shared ring */ + info->connected = BLKIF_STATE_CONNECTED; + + /* Send off requeued requests */ + flush_requests(info); + + /* Kick any other new requests queued since we resumed */ + kick_pending_request_queues(info); + + spin_unlock_irqrestore(&info->io_lock, flags); + + return 0; +} + +static int blkif_recover_v2(struct blkfront_info *info) +{ + int rc; + + if (info->ring_type == RING_TYPE_1) + rc = recover_from_v1_to_v2(info); + else if (info->ring_type == RING_TYPE_2) + rc = recover_from_v2_to_v2(info); + else + rc = -EPERM; + return rc; +} /** * We are reconnecting to the backend, due to a suspend/resume, or a backend * driver restart. We tear down our blkif structure and recreate it, but @@ -1609,15 +2371,44 @@ static struct blk_front_operations blk_front_ops = { .update_rsp_event = update_rsp_event, .update_rsp_cons = update_rsp_cons, .update_req_prod_pvt = update_req_prod_pvt, + .update_segment_rsp_cons = update_segment_rsp_cons, .ring_push = ring_push, .recover = blkif_recover, .ring_full = ring_full, + .segring_full = segring_full, .setup_blkring = setup_blkring, .free_blkring = free_blkring, .blkif_completion = blkif_completion, .max_seg = BLKIF_MAX_SEGMENTS_PER_REQUEST, }; +static struct blk_front_operations blk_front_ops_v2 = { + .ring_get_request = ring_get_request_v2, + .ring_get_response = ring_get_response_v2, + .ring_get_segment = ring_get_segment_v2, + .get_id = get_id_from_freelist_v2, + .add_id = add_id_to_freelist_v2, + .save_seg_shadow = save_seg_shadow_v2, + .save_req_shadow = save_req_shadow_v2, + .get_req_from_shadow = get_req_from_shadow_v2, + .get_rsp_prod = get_rsp_prod_v2, + .get_rsp_cons = get_rsp_cons_v2, + .get_req_prod_pvt = get_req_prod_pvt_v2, + .check_left_response = check_left_response_v2, + .update_rsp_event = update_rsp_event_v2, + .update_rsp_cons = update_rsp_cons_v2, + .update_req_prod_pvt = update_req_prod_pvt_v2, + .update_segment_rsp_cons = update_segment_rsp_cons_v2, + .ring_push = ring_push_v2, + .recover = blkif_recover_v2, + .ring_full = ring_full_v2, + .segring_full = segring_full_v2, + .setup_blkring = setup_blkring_v2, + .free_blkring = free_blkring_v2, + .blkif_completion = blkif_completion_v2, + .max_seg = BLKIF_MAX_SEGMENTS_PER_REQUEST_V2, +}; + static const struct block_device_operations xlvbd_block_fops = { .owner = THIS_MODULE, diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c index f100ce2..a5a98b0 100644 --- a/drivers/xen/grant-table.c +++ b/drivers/xen/grant-table.c @@ -475,7 +475,7 @@ void gnttab_end_foreign_access(grant_ref_t ref, int readonly, /* XXX This needs to be fixed so that the ref and page are placed on a list to be freed up later. */ printk(KERN_WARNING - "WARNING: leaking g.e. and page still in use!\n"); + "WARNING: ref %u leaking g.e. and page still in use!\n", ref); } } EXPORT_SYMBOL_GPL(gnttab_end_foreign_access); diff --git a/include/xen/interface/io/blkif.h b/include/xen/interface/io/blkif.h index ee338bf..763489a 100644 --- a/include/xen/interface/io/blkif.h +++ b/include/xen/interface/io/blkif.h @@ -108,6 +108,7 @@ typedef uint64_t blkif_sector_t; * NB. This could be 12 if the ring indexes weren't stored in the same page. */ #define BLKIF_MAX_SEGMENTS_PER_REQUEST 11 +#define BLKIF_MAX_SEGMENTS_PER_REQUEST_V2 128 struct blkif_request_rw { uint8_t nr_segments; /* number of segments */ @@ -125,6 +126,17 @@ struct blkif_request_rw { } seg[BLKIF_MAX_SEGMENTS_PER_REQUEST]; } __attribute__((__packed__)); +struct blkif_request_rw_header { + uint8_t nr_segments; /* number of segments */ + blkif_vdev_t handle; /* only for read/write requests */ +#ifdef CONFIG_X86_64 + uint32_t _pad1; /* offsetof(blkif_request,u.rw.id) == 8 */ +#endif + uint64_t id; /* private guest value, echoed in resp */ + blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */ + uint64_t seg_id; /* segment id in the segment shadow */ +} __attribute__((__packed__)); + struct blkif_request_discard { uint8_t flag; /* BLKIF_DISCARD_SECURE or zero. */ #define BLKIF_DISCARD_SECURE (1<<0) /* ignored if discard-secure=0 */ @@ -135,7 +147,6 @@ struct blkif_request_discard { uint64_t id; /* private guest value, echoed in resp */ blkif_sector_t sector_number; uint64_t nr_sectors; - uint8_t _pad3; } __attribute__((__packed__)); struct blkif_request { @@ -146,12 +157,24 @@ struct blkif_request { } u; } __attribute__((__packed__)); +struct blkif_request_header { + uint8_t operation; /* BLKIF_OP_??? */ + union { + struct blkif_request_rw_header rw; + struct blkif_request_discard discard; + } u; +} __attribute__((__packed__)); + struct blkif_response { uint64_t id; /* copied from request */ uint8_t operation; /* copied from request */ int16_t status; /* BLKIF_RSP_??? */ }; +struct blkif_response_segment { + char dummy; +} __attribute__((__packed__)); + /* * STATUS RETURN CODES. */ @@ -167,6 +190,8 @@ struct blkif_response { */ DEFINE_RING_TYPES(blkif, struct blkif_request, struct blkif_response); +DEFINE_RING_TYPES(blkif_request, struct blkif_request_header, struct blkif_response); +DEFINE_RING_TYPES(blkif_segment, struct blkif_request_segment, struct blkif_response_segment); #define VDISK_CDROM 0x1 #define VDISK_REMOVABLE 0x2 -ronghui Attachment:
vbd_enlarge_segments_02.patch _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxx http://lists.xen.org/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |