[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [RFC v1 2/5] VBD: enlarge max segment per request in blkfront



add segring support in blkfront
Signed-off-by: Ronghui Duan <ronghui.duan@xxxxxxxxx>

diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index a263faf..b9f383d 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -76,10 +76,23 @@ struct blk_shadow {
        unsigned long frame[BLKIF_MAX_SEGMENTS_PER_REQUEST];
 };
 
+struct blk_req_shadow {
+       struct blkif_request_header req;
+       struct request *request;
+};
+
+struct blk_seg_shadow {
+       uint64_t id;
+       struct blkif_request_segment req;
+       unsigned long frame;
+};
+
 static DEFINE_MUTEX(blkfront_mutex);
 static const struct block_device_operations xlvbd_block_fops;
 
 #define BLK_RING_SIZE __CONST_RING_SIZE(blkif, PAGE_SIZE)
+#define BLK_REQ_RING_SIZE __CONST_RING_SIZE(blkif_request, PAGE_SIZE)
+#define BLK_SEG_RING_SIZE __CONST_RING_SIZE(blkif_segment, PAGE_SIZE)
 
 /*
  * We have one of these per vbd, whether ide, scsi or 'other'.  They
@@ -96,22 +109,30 @@ struct blkfront_info
        blkif_vdev_t handle;
        enum blkif_state connected;
        int ring_ref;
+       int reqring_ref;
+       int segring_ref;
        struct blkif_front_ring ring;
+       struct blkif_request_front_ring reqring;
+       struct blkif_segment_front_ring segring;
        struct scatterlist *sg;
        unsigned int evtchn, irq;
        struct request_queue *rq;
        struct work_struct work;
        struct gnttab_free_callback callback;
        struct blk_shadow *shadow;
+       struct blk_req_shadow *req_shadow;
+       struct blk_seg_shadow *seg_shadow;
        struct blk_front_operations *ops;
        enum blkif_ring_type ring_type;
        unsigned long shadow_free;
+       unsigned long seg_shadow_free;
        unsigned int feature_flush;
        unsigned int flush_op;
        unsigned int feature_discard:1;
        unsigned int feature_secdiscard:1;
        unsigned int discard_granularity;
        unsigned int discard_alignment;
+       unsigned long last_id;
        int is_ready;
 };
 
@@ -124,7 +145,7 @@ static struct blk_front_operations {
        unsigned long (*get_id) (struct blkfront_info *info);
        void (*add_id) (struct blkfront_info *info, unsigned long id);
        void (*save_seg_shadow) (struct blkfront_info *info, unsigned long mfn,
-                                unsigned long id, int i);
+                                unsigned long id, int i, struct 
blkif_request_segment *ring_seg);
        void (*save_req_shadow) (struct blkfront_info *info,
                                 struct request *req, unsigned long id);
        struct request *(*get_req_from_shadow)(struct blkfront_info *info,
@@ -136,14 +157,16 @@ static struct blk_front_operations {
        void (*update_rsp_event) (struct blkfront_info *info, int i);
        void (*update_rsp_cons) (struct blkfront_info *info);
        void (*update_req_prod_pvt) (struct blkfront_info *info);
+       void (*update_segment_rsp_cons) (struct blkfront_info *info, unsigned 
long id);
        void (*ring_push) (struct blkfront_info *info, int *notify);
        int (*recover) (struct blkfront_info *info);
        int (*ring_full) (struct blkfront_info *info);
+       int (*segring_full) (struct blkfront_info *info, unsigned int 
nr_segments);
        int (*setup_blkring) (struct xenbus_device *dev, struct blkfront_info 
*info);
        void (*free_blkring) (struct blkfront_info *info, int suspend);
        void (*blkif_completion) (struct blkfront_info *info, unsigned long id);
        unsigned int max_seg;
-} blk_front_ops; 
+} blk_front_ops, blk_front_ops_v2; 
 
 static unsigned int nr_minors;
 static unsigned long *minors;
@@ -179,6 +202,24 @@ static unsigned long get_id_from_freelist(struct 
blkfront_info *info)
        return free;
 }
 
+static unsigned long get_id_from_freelist_v2(struct blkfront_info *info)
+{
+       unsigned long free = info->shadow_free;
+       BUG_ON(free >= BLK_REQ_RING_SIZE);
+       info->shadow_free = info->req_shadow[free].req.u.rw.id;
+       info->req_shadow[free].req.u.rw.id = 0x0fffffee; /* debug */
+       return free;
+}
+
+static unsigned long get_seg_shadow_id(struct blkfront_info *info)
+{
+       unsigned long free = info->seg_shadow_free;
+       BUG_ON(free >= BLK_SEG_RING_SIZE);
+       info->seg_shadow_free = info->seg_shadow[free].id;
+       info->seg_shadow[free].id = 0x0fffffee; /* debug */
+       return free;
+}
+
 void add_id_to_freelist(struct blkfront_info *info,
                               unsigned long id)
 {
@@ -187,6 +228,21 @@ void add_id_to_freelist(struct blkfront_info *info,
        info->shadow_free = id;
 }
 
+static void add_id_to_freelist_v2(struct blkfront_info *info,
+                                 unsigned long id)
+{
+       info->req_shadow[id].req.u.rw.id  = info->shadow_free;
+       info->req_shadow[id].request = NULL;
+       info->shadow_free = id;
+}
+
+static void free_seg_shadow_id(struct blkfront_info *info,
+                                 unsigned long id)
+{
+       info->seg_shadow[id].id  = info->seg_shadow_free;
+       info->seg_shadow_free = id;
+}
+
 static int xlbd_reserve_minors(unsigned int minor, unsigned int nr)
 {
        unsigned int end = minor + nr;
@@ -299,6 +355,14 @@ void *ring_get_request(struct blkfront_info *info)
        return (void *)RING_GET_REQUEST(&info->ring, info->ring.req_prod_pvt);
 }
 
+void *ring_get_request_v2(struct blkfront_info *info)
+{
+       struct blkif_request_header *ring_req;
+       ring_req = RING_GET_REQUEST(&info->reqring,
+                               info->reqring.req_prod_pvt);
+       return (void *)ring_req;
+}
+
 struct blkif_request_segment *ring_get_segment(struct blkfront_info *info, int 
i)
 {
        struct blkif_request *ring_req =
@@ -306,12 +370,34 @@ struct blkif_request_segment *ring_get_segment(struct 
blkfront_info *info, int i
        return &ring_req->u.rw.seg[i];
 }
 
-void save_seg_shadow(struct blkfront_info *info,
-                     unsigned long mfn, unsigned long id, int i)
+struct blkif_request_segment *ring_get_segment_v2(struct blkfront_info *info, 
int i)
+{
+       return RING_GET_REQUEST(&info->segring, info->segring.req_prod_pvt++);
+}
+
+void save_seg_shadow(struct blkfront_info *info, unsigned long mfn,
+                    unsigned long id, int i, struct blkif_request_segment 
*ring_seg)
 {
        info->shadow[id].frame[i] = mfn_to_pfn(mfn);
 }
 
+void save_seg_shadow_v2(struct blkfront_info *info, unsigned long mfn,
+                       unsigned long id, int i, struct blkif_request_segment 
*ring_seg)
+{
+       struct blkif_request_header *ring_req;
+       unsigned long seg_id = get_seg_shadow_id(info);
+
+       ring_req = (struct blkif_request_header 
*)info->ops->ring_get_request(info);
+       if (i == 0)
+               ring_req->u.rw.seg_id = seg_id;
+       else
+               info->seg_shadow[info->last_id].id = seg_id;
+       info->seg_shadow[seg_id].frame = mfn_to_pfn(mfn);
+       memcpy(&(info->seg_shadow[seg_id].req), ring_seg,
+              sizeof(struct blkif_request_segment));
+       info->last_id = seg_id;
+}
+
 void save_req_shadow(struct blkfront_info *info,
                      struct request *req, unsigned long id)
 {
@@ -321,10 +407,34 @@ void save_req_shadow(struct blkfront_info *info,
        info->shadow[id].request = req;
 }
 
+void save_req_shadow_v2(struct blkfront_info *info,
+                     struct request *req, unsigned long id)
+{
+       struct blkif_request_header *ring_req =
+                       (struct blkif_request_header 
*)info->ops->ring_get_request(info);
+       info->req_shadow[id].req = *ring_req;
+       info->req_shadow[id].request = req;
+}
+
 void update_req_prod_pvt(struct blkfront_info *info)
 {
        info->ring.req_prod_pvt++;
 }
+
+void update_req_prod_pvt_v2(struct blkfront_info *info)
+{
+       info->reqring.req_prod_pvt++;
+}
+
+int segring_full(struct blkfront_info *info, unsigned int nr_segments)
+{
+       return 0;
+}
+
+int segring_full_v2(struct blkfront_info *info, unsigned int nr_segments)
+{
+       return nr_segments > RING_FREE_REQUESTS(&info->segring);
+}
 /*
  * Generate a Xen blkfront IO request from a blk layer request.  Reads
  * and writes are handled as expected.
@@ -347,19 +457,18 @@ static int blkif_queue_request(struct request *req)
                return 1;
 
        if (gnttab_alloc_grant_references(
-               BLKIF_MAX_SEGMENTS_PER_REQUEST, &gref_head) < 0) {
+               info->ops->max_seg, &gref_head) < 0) {
                gnttab_request_free_callback(
                        &info->callback,
                        blkif_restart_queue_callback,
                        info,
-                       BLKIF_MAX_SEGMENTS_PER_REQUEST);
+                       info->ops->max_seg);
                return 1;
        }
 
        /* Fill out a communications ring structure. */
        ring_req = (struct blkif_request *)info->ops->ring_get_request(info);
        id = info->ops->get_id(info);
-       //info->shadow[id].request = req;
 
        ring_req->u.rw.id = id;
        ring_req->u.rw.sector_number = (blkif_sector_t)blk_rq_pos(req);
@@ -392,6 +501,9 @@ static int blkif_queue_request(struct request *req)
                                                           info->sg);
                BUG_ON(ring_req->u.rw.nr_segments > info->ops->max_seg);
 
+               if (info->ops->segring_full(info, ring_req->u.rw.nr_segments))
+                       goto wait;
+
                for_each_sg(info->sg, sg, ring_req->u.rw.nr_segments, i) {
                        buffer_mfn = pfn_to_mfn(page_to_pfn(sg_page(sg)));
                        fsect = sg->offset >> 9;
@@ -411,7 +523,7 @@ static int blkif_queue_request(struct request *req)
                                        .gref       = ref,
                                        .first_sect = fsect,
                                        .last_sect  = lsect };
-                       info->ops->save_seg_shadow(info, buffer_mfn, id, i);
+                       info->ops->save_seg_shadow(info, buffer_mfn, id, i, 
ring_seg);
                }
        }
 
@@ -423,6 +535,11 @@ static int blkif_queue_request(struct request *req)
        gnttab_free_grant_references(gref_head);
 
        return 0;
+wait:
+       gnttab_free_grant_references(gref_head);
+       pr_debug("No enough segment!\n");
+       info->ops->add_id(info, id);
+       return 1;
 }
 
 void ring_push(struct blkfront_info *info, int *notify)
@@ -430,6 +547,13 @@ void ring_push(struct blkfront_info *info, int *notify)
        RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&info->ring, *notify);
 }
 
+void ring_push_v2(struct blkfront_info *info, int *notify)
+{
+       RING_PUSH_REQUESTS(&info->segring);
+       RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&info->reqring, *notify);
+}
+
+
 static inline void flush_requests(struct blkfront_info *info)
 {
        int notify;
@@ -440,6 +564,16 @@ static inline void flush_requests(struct blkfront_info 
*info)
                notify_remote_via_irq(info->irq);
 }
 
+static int ring_free_v2(struct blkfront_info *info)
+{
+       return (!RING_FULL(&info->reqring) &&
+               RING_FREE_REQUESTS(&info->segring) > 
RING_SIZE(&info->segring)/3);
+}
+static int ring_full_v2(struct blkfront_info *info)
+{
+       return (RING_FULL(&info->reqring) || RING_FULL(&info->segring));
+}
+
 /*
  * do_blkif_request
  *  read a block; request is in a request queue
@@ -490,6 +624,17 @@ wait:
                flush_requests(info);
 }
 
+static void update_blk_queue(struct blkfront_info *info)
+{
+       struct request_queue *q = info->rq;
+
+       blk_queue_max_segments(q, info->ops->max_seg);
+       blk_queue_max_hw_sectors(q, queue_max_segments(q) *
+                                queue_max_segment_size(q) /
+                                queue_logical_block_size(q));
+       return;
+}
+
 static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size)
 {
        struct request_queue *rq;
@@ -740,7 +885,7 @@ static void xlvbd_release_gendisk(struct blkfront_info 
*info)
 
 static void kick_pending_request_queues(struct blkfront_info *info)
 {
-       if (!ring_full(info)) {
+       if (!info->ops->ring_full(info)) {
                /* Re-enable calldowns. */
                blk_start_queue(info->rq);
                /* Kick things off immediately. */
@@ -793,39 +938,115 @@ static void blkif_completion(struct blkfront_info *info, 
unsigned long id)
                gnttab_end_foreign_access(s->req.u.rw.seg[i].gref, 0, 0UL);
 }
 
+static void blkif_completion_v2(struct blkfront_info *info, unsigned long id)
+{
+       int i;
+       /* Do not let BLKIF_OP_DISCARD as nr_segment is in the same place
+        * flag. */
+       unsigned short nr = info->req_shadow[id].req.u.rw.nr_segments;
+       unsigned long shadow_id, free_id;
+
+       shadow_id = info->req_shadow[id].req.u.rw.seg_id;
+       for (i = 0; i < nr; i++) {
+               gnttab_end_foreign_access(info->seg_shadow[shadow_id].req.gref, 
0, 0UL);
+               free_id = shadow_id;
+               shadow_id = info->seg_shadow[shadow_id].id;
+               free_seg_shadow_id(info, free_id);
+       }
+}
+
 struct blkif_response *ring_get_response(struct blkfront_info *info)
 {
        return RING_GET_RESPONSE(&info->ring, info->ring.rsp_cons);
 }
+
+struct blkif_response *ring_get_response_v2(struct blkfront_info *info)
+{
+       return RING_GET_RESPONSE(&info->reqring, info->reqring.rsp_cons);
+}
+
 RING_IDX get_rsp_prod(struct blkfront_info *info)
 {
        return info->ring.sring->rsp_prod;
 }
+
+RING_IDX get_rsp_prod_v2(struct blkfront_info *info)
+{
+       return info->reqring.sring->rsp_prod;
+}
+
 RING_IDX get_rsp_cons(struct blkfront_info *info)
 {
        return info->ring.rsp_cons;
 }
+
+RING_IDX get_rsp_cons_v2(struct blkfront_info *info)
+{
+       return info->reqring.rsp_cons;
+}
+
 struct request *get_req_from_shadow(struct blkfront_info *info,
                                    unsigned long id)
 {
        return info->shadow[id].request;
 }
+
+struct request *get_req_from_shadow_v2(struct blkfront_info *info,
+                                   unsigned long id)
+{
+       return info->req_shadow[id].request;
+}
+
 void update_rsp_cons(struct blkfront_info *info)
 {
        info->ring.rsp_cons++;
 }
+
+void update_rsp_cons_v2(struct blkfront_info *info)
+{
+       info->reqring.rsp_cons++;
+}
+
 RING_IDX get_req_prod_pvt(struct blkfront_info *info)
 {
        return info->ring.req_prod_pvt;
 }
+
+RING_IDX get_req_prod_pvt_v2(struct blkfront_info *info)
+{
+       return info->reqring.req_prod_pvt;
+}
+
 void check_left_response(struct blkfront_info *info, int *more_to_do)
 {
        RING_FINAL_CHECK_FOR_RESPONSES(&info->ring, *more_to_do);
 }
+
+void check_left_response_v2(struct blkfront_info *info, int *more_to_do)
+{
+       RING_FINAL_CHECK_FOR_RESPONSES(&info->reqring, *more_to_do);
+}
+
 void update_rsp_event(struct blkfront_info *info, int i)
 {
        info->ring.sring->rsp_event = i + 1;
 }
+
+void update_rsp_event_v2(struct blkfront_info *info, int i)
+{
+       info->reqring.sring->rsp_event = i + 1;
+}
+
+void update_segment_rsp_cons(struct blkfront_info *info, unsigned long id)
+{
+       return;
+}
+
+void update_segment_rsp_cons_v2(struct blkfront_info *info, unsigned long id)
+{
+       info->segring.rsp_cons += info->req_shadow[id].req.u.rw.nr_segments;
+       return;
+}
 static irqreturn_t blkif_interrupt(int irq, void *dev_id)
 {
        struct request *req;
@@ -903,8 +1124,8 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
                        if (unlikely(bret->status != BLKIF_RSP_OKAY))
                                dev_dbg(&info->xbdev->dev, "Bad return from 
blkdev data "
                                        "request: %x\n", bret->status);
-
                        __blk_end_request_all(req, error);
+                       info->ops->update_segment_rsp_cons(info, id);
                        break;
                default:
                        BUG();
@@ -949,6 +1170,43 @@ static int init_shadow(struct blkfront_info *info)
        return 0;
 }
 
+static int init_shadow_v2(struct blkfront_info *info)
+{
+       unsigned int ring_size;
+       int i;
+
+       if (info->ring_type != RING_TYPE_UNDEFINED)
+               return 0;
+
+       info->ring_type = RING_TYPE_2;
+
+       ring_size = BLK_REQ_RING_SIZE;
+       info->req_shadow = kzalloc(sizeof(struct blk_req_shadow) * ring_size,
+                                  GFP_KERNEL);
+       if (!info->req_shadow)
+               return -ENOMEM;
+
+       for (i = 0; i < ring_size; i++)
+               info->req_shadow[i].req.u.rw.id = i+1;
+       info->req_shadow[ring_size - 1].req.u.rw.id = 0x0fffffff;
+
+       ring_size = BLK_SEG_RING_SIZE;
+
+       info->seg_shadow = kzalloc(sizeof(struct blk_seg_shadow) * ring_size,
+                                  GFP_KERNEL);         
+       if (!info->seg_shadow) {
+               kfree(info->req_shadow);
+               return -ENOMEM;
+       }
+
+       for (i = 0; i < ring_size; i++) {
+               info->seg_shadow[i].id = i+1;
+       }
+       info->seg_shadow[ring_size - 1].id = 0x0fffffff;
+
+       return 0;
+}
+
 static int setup_blkring(struct xenbus_device *dev,
                         struct blkfront_info *info)
 {
@@ -1003,6 +1261,84 @@ fail:
        return err;
 }
 
+static int setup_blkring_v2(struct xenbus_device *dev,
+                           struct blkfront_info *info)
+{
+       struct blkif_request_sring *sring;
+       struct blkif_segment_sring *seg_sring;
+       int err;
+
+       info->reqring_ref = GRANT_INVALID_REF;
+
+       sring = (struct blkif_request_sring *)__get_free_page(GFP_NOIO | 
__GFP_HIGH);
+       if (!sring) {
+               xenbus_dev_fatal(dev, -ENOMEM, "allocating shared ring");
+               return -ENOMEM;
+       }
+       SHARED_RING_INIT(sring);
+       FRONT_RING_INIT(&info->reqring, sring, PAGE_SIZE);
+
+       err = xenbus_grant_ring(dev, virt_to_mfn(info->reqring.sring));
+       if (err < 0) {
+               free_page((unsigned long)sring);
+               info->reqring.sring = NULL;
+               goto fail;
+       }
+
+       info->reqring_ref = err;
+
+       info->segring_ref = GRANT_INVALID_REF;
+
+       seg_sring = (struct blkif_segment_sring *)__get_free_page(GFP_NOIO | 
__GFP_HIGH);
+       if (!seg_sring) {
+               xenbus_dev_fatal(dev, -ENOMEM, "allocating shared ring");
+               err = -ENOMEM;
+               goto fail;
+       }
+       SHARED_RING_INIT(seg_sring);
+       FRONT_RING_INIT(&info->segring, seg_sring, PAGE_SIZE);
+
+       err = xenbus_grant_ring(dev, virt_to_mfn(info->segring.sring));
+       if (err < 0) {
+               free_page((unsigned long)seg_sring);
+               info->segring.sring = NULL;
+               goto fail;
+       }
+
+       info->segring_ref = err;
+
+       info->sg = kzalloc(sizeof(struct scatterlist) * info->ops->max_seg,
+                          GFP_KERNEL);
+       if (!info->sg) {
+               err = -ENOMEM;
+               goto fail;
+       }
+       sg_init_table(info->sg, info->ops->max_seg);
+
+       err = init_shadow_v2(info);
+       if (err)
+               goto fail;
+
+       err = xenbus_alloc_evtchn(dev, &info->evtchn);
+       if (err)
+               goto fail;
+
+       err = bind_evtchn_to_irqhandler(info->evtchn,
+                                       blkif_interrupt,
+                                       IRQF_SAMPLE_RANDOM, "blkif", info);
+       if (err <= 0) {
+               xenbus_dev_fatal(dev, err,
+                                "bind_evtchn_to_irqhandler failed");
+               goto fail;
+       }
+       info->irq = err;
+
+       return 0;
+fail:
+       blkif_free(info, 0);
+       return err;
+}
+
 static void free_blkring(struct blkfront_info *info, int suspend)
 {
        if (info->ring_ref != GRANT_INVALID_REF) {
@@ -1018,6 +1354,32 @@ static void free_blkring(struct blkfront_info *info, int 
suspend)
                kfree(info->shadow);
 }
 
+static void free_blkring_v2(struct blkfront_info *info, int suspend)
+{
+       if (info->reqring_ref != GRANT_INVALID_REF) {
+               gnttab_end_foreign_access(info->reqring_ref, 0,
+                                         (unsigned long)info->reqring.sring);
+               info->reqring_ref = GRANT_INVALID_REF;
+               info->reqring.sring = NULL;
+       }
+
+       if (info->segring_ref != GRANT_INVALID_REF) {
+               gnttab_end_foreign_access(info->segring_ref, 0,
+                                         (unsigned long)info->segring.sring);
+               info->segring_ref = GRANT_INVALID_REF;
+               info->segring.sring = NULL;
+       }
+
+       kfree(info->sg);
+
+       if(!suspend) {
+               kfree(info->req_shadow);
+               kfree(info->seg_shadow);
+       }
+
+}
+
+
 /* Common code used when first setting up, and when resuming. */
 static int talk_to_blkback(struct xenbus_device *dev,
                           struct blkfront_info *info)
@@ -1025,9 +1387,17 @@ static int talk_to_blkback(struct xenbus_device *dev,
        const char *message = NULL;
        struct xenbus_transaction xbt;
        int err;
+       unsigned int type;
 
        /* register ring ops */
-       info->ops = &blk_front_ops;
+       err = xenbus_scanf(XBT_NIL, dev->otherend, "blkback-ring-type", "%u",
+                          &type);
+       if (err != 1)
+               type = 1;
+       if (type == 2)
+               info->ops = &blk_front_ops_v2;
+       else
+               info->ops = &blk_front_ops;
 
        /* Create shared ring, alloc event channel. */
        err = info->ops->setup_blkring(dev, info);
@@ -1040,13 +1410,6 @@ again:
                xenbus_dev_fatal(dev, err, "starting transaction");
                goto destroy_blkring;
        }
-
-       err = xenbus_printf(xbt, dev->nodename,
-                           "ring-ref", "%u", info->ring_ref);
-       if (err) {
-               message = "writing ring-ref";
-               goto abort_transaction;
-       }
        err = xenbus_printf(xbt, dev->nodename,
                            "event-channel", "%u", info->evtchn);
        if (err) {
@@ -1059,7 +1422,40 @@ again:
                message = "writing protocol";
                goto abort_transaction;
        }
-
+       if (type == 1) {
+               err = xenbus_printf(xbt, dev->nodename,
+                                   "ring-ref", "%u", info->ring_ref);
+               if (err) {
+                       message = "writing ring-ref";
+                       goto abort_transaction;
+               }
+               err = xenbus_printf(xbt, dev->nodename, "blkfront-ring-type",
+                                   "%u", type);
+               if (err) {
+                       message = "writing blkfront ring type";
+                       goto abort_transaction;
+               }       
+       }
+       if (type == 2) {
+               err = xenbus_printf(xbt, dev->nodename,
+                                   "reqring-ref", "%u", info->reqring_ref);
+               if (err) {
+                       message = "writing reqring-ref";
+                       goto abort_transaction;
+               }
+               err = xenbus_printf(xbt, dev->nodename,
+                                   "segring-ref", "%u", info->segring_ref);
+               if (err) {
+                       message = "writing segring-ref";
+                       goto abort_transaction;
+               }
+               err = xenbus_printf(xbt, dev->nodename, "blkfront-ring-type",
+                                   "%u", type);
+               if (err) {
+                       message = "writing blkfront ring type";
+                       goto abort_transaction;
+               }       
+       }
        err = xenbus_transaction_end(xbt, 0);
        if (err) {
                if (err == -EAGAIN)
@@ -1164,7 +1560,7 @@ static int blkfront_probe(struct xenbus_device *dev,
 }
 
 
-static int blkif_recover(struct blkfront_info *info)
+static int recover_from_v1_to_v1(struct blkfront_info *info)
 {
        int i;
        struct blkif_request *req;
@@ -1233,6 +1629,372 @@ static int blkif_recover(struct blkfront_info *info)
        return 0;
 }
 
+/* migrate from V2 type ring to V1 type*/
+static int recover_from_v2_to_v1(struct blkfront_info *info)
+{      
+       struct blk_req_shadow *copy;
+       struct blk_seg_shadow *seg_copy;
+       struct request *req;
+       struct blkif_request *new_req;
+       int i, j, err;
+       unsigned int req_rs;
+       struct bio *biolist = NULL, *biotail = NULL, *bio;
+       unsigned long index;
+       unsigned long flags;
+
+       pr_info("Warning, migrate to older backend, some io may fail\n");
+
+       /* Stage 1: Init the new shadow state. */
+       info->ring_type = RING_TYPE_UNDEFINED;
+       err = init_shadow(info);
+       if (err)
+               return err;
+
+       req_rs = BLK_REQ_RING_SIZE;
+
+       /* Stage 2: Set up free list. */
+       info->shadow_free = info->ring.req_prod_pvt;
+
+       /* Stage 3: Find pending requests and requeue them. */
+       for (i = 0; i < req_rs; i++) {
+               req = info->req_shadow[i].request;
+               /* Not in use? */
+               if (!req)
+                       continue;
+
+               if (ring_full(info)) 
+                       goto out;
+
+               copy = &info->req_shadow[i];
+
+                /* We get a new request, reset the blkif request and shadow 
state. */
+               new_req = RING_GET_REQUEST(&info->ring, 
info->ring.req_prod_pvt);
+
+               if (copy->req.operation == BLKIF_OP_DISCARD) {
+                       new_req->operation = BLKIF_OP_DISCARD;
+                       new_req->u.discard = copy->req.u.discard;
+                       new_req->u.discard.id = get_id_from_freelist(info);
+                       info->shadow[new_req->u.discard.id].request = req;
+               }
+               else {
+                       if (copy->req.u.rw.nr_segments > 
BLKIF_MAX_SEGMENTS_PER_REQUEST) 
+                               continue; 
+
+                       new_req->u.rw.id = get_id_from_freelist(info);
+                       info->shadow[new_req->u.rw.id].request = req;
+                       new_req->operation = copy->req.operation;
+                       new_req->u.rw.nr_segments = copy->req.u.rw.nr_segments;
+                       new_req->u.rw.handle = copy->req.u.rw.handle;
+                       new_req->u.rw.sector_number = 
copy->req.u.rw.sector_number;
+                       index = copy->req.u.rw.seg_id;
+                       for (j = 0; j < new_req->u.rw.nr_segments; j++) {
+                               seg_copy = &info->seg_shadow[index];
+                               new_req->u.rw.seg[j].gref = seg_copy->req.gref;
+                               new_req->u.rw.seg[j].first_sect = 
seg_copy->req.first_sect;
+                               new_req->u.rw.seg[j].last_sect = 
seg_copy->req.last_sect; 
+                               info->shadow[new_req->u.rw.id].frame[j] = 
seg_copy->frame;
+                               gnttab_grant_foreign_access_ref(
+                                       new_req->u.rw.seg[j].gref,
+                                       info->xbdev->otherend_id,
+                                       
pfn_to_mfn(info->shadow[new_req->u.rw.id].frame[j]),
+                                       
rq_data_dir(info->shadow[new_req->u.rw.id].request));
+                               index = info->seg_shadow[index].id;
+                       }
+               }
+               info->shadow[new_req->u.rw.id].req = *new_req;
+               info->ring.req_prod_pvt++;
+               info->req_shadow[i].request = NULL;
+               
+       }
+out:
+       xenbus_switch_state(info->xbdev, XenbusStateConnected);
+
+       spin_lock_irqsave(&info->io_lock, flags);
+
+       /* cancel the request and resubmit the bio */
+       for (i = 0; i < req_rs; i++) {
+               req = info->req_shadow[i].request;
+               if (!req)
+                       continue;
+
+               blkif_completion_v2(info, i);
+
+               if (biolist == NULL)    
+                       biolist = req->bio;
+               else
+                       biotail->bi_next = req->bio;
+               biotail = req->biotail;
+               req->bio = NULL;
+               __blk_put_request(info->rq, req);
+       }
+
+       while ((req = blk_peek_request(info->rq)) != NULL) {
+
+               blk_start_request(req);
+
+               if (biolist == NULL)
+                       biolist = req->bio;
+               else
+                       biotail->bi_next = req->bio;
+               biotail = req->biotail;
+               req->bio = NULL;
+               __blk_put_request(info->rq, req);
+       }
+
+       /* Now safe for us to use the shared ring */
+       info->connected = BLKIF_STATE_CONNECTED;
+
+       /* need update the queue limit setting */
+       update_blk_queue(info);
+
+       /* Send off requeued requests */
+       flush_requests(info);
+
+       /* Kick any other new requests queued since we resumed */
+       kick_pending_request_queues(info);
+
+       spin_unlock_irqrestore(&info->io_lock, flags);
+
+       /* free original shadow*/
+       kfree(info->seg_shadow);
+       kfree(info->req_shadow);
+
+       while(biolist) {
+               bio = biolist;
+               biolist = biolist->bi_next;
+               bio->bi_next = NULL;
+               submit_bio(bio->bi_rw, bio);
+       }
+
+       return 0;
+}
+
+static int blkif_recover(struct blkfront_info *info)
+{
+       int rc;
+
+       if (info->ring_type == RING_TYPE_1)
+               rc = recover_from_v1_to_v1(info);
+       else if (info->ring_type == RING_TYPE_2)
+               rc = recover_from_v2_to_v1(info);
+       else
+               rc = -EPERM;
+       return rc;
+}
+
+static int recover_from_v1_to_v2(struct blkfront_info *info)
+{
+       int i,err;
+       struct blkif_request_header *req;
+       struct blkif_request_segment *segring_req;
+       struct blk_shadow *copy;
+       int j;
+       unsigned long seg_id, last_id = 0x0fffffff;
+
+       /* Stage 1: Init the new shadow. */
+       info->ring_type = RING_TYPE_UNDEFINED;
+       err = init_shadow_v2(info);
+       if (err)
+               return err;
+
+       /* Stage 2: Set up free list. */
+       info->shadow_free = info->reqring.req_prod_pvt;
+       info->seg_shadow_free = info->segring.req_prod_pvt;
+
+       /* Stage 3: Find pending requests and requeue them. */
+       for (i = 0; i < BLK_RING_SIZE; i++) {
+               copy = &info->shadow[i];
+               /* Not in use? */
+               if (!copy->request)
+                       continue;
+
+               /* We get a new request, reset the blkif request and shadow 
state. */
+               req = RING_GET_REQUEST(&info->reqring, 
info->reqring.req_prod_pvt);
+
+               if (copy->req.operation == BLKIF_OP_DISCARD) {
+                       req->operation = BLKIF_OP_DISCARD;
+                       req->u.discard = copy->req.u.discard;
+                       req->u.discard.id = get_id_from_freelist_v2(info);
+                       info->req_shadow[req->u.discard.id].request = 
copy->request;
+                       info->req_shadow[req->u.discard.id].req = *req;
+               }
+               else {
+                       req->u.rw.id = get_id_from_freelist_v2(info);
+                       req->operation = copy->req.operation;
+                       req->u.rw.nr_segments = copy->req.u.rw.nr_segments;
+                       req->u.rw.handle = copy->req.u.rw.handle;
+                       req->u.rw.sector_number = copy->req.u.rw.sector_number;
+                       for (j = 0; j < req->u.rw.nr_segments; j++) {
+                               seg_id = get_seg_shadow_id(info);
+                               if (j == 0)
+                                       req->u.rw.seg_id = seg_id;
+                               else
+                                       info->seg_shadow[last_id].id = seg_id;
+                               segring_req = RING_GET_REQUEST(&info->segring, 
info->segring.req_prod_pvt);
+                               segring_req->gref = copy->req.u.rw.seg[j].gref;
+                               segring_req->first_sect = 
copy->req.u.rw.seg[j].first_sect;
+                               segring_req->last_sect = 
copy->req.u.rw.seg[j].last_sect;
+                               info->seg_shadow[seg_id].req = *segring_req;
+                               info->seg_shadow[seg_id].frame = copy->frame[j];
+                               info->segring.req_prod_pvt++;
+                               gnttab_grant_foreign_access_ref(
+                                       segring_req->gref,
+                                       info->xbdev->otherend_id,
+                                       pfn_to_mfn(copy->frame[j]),
+                                       rq_data_dir(copy->request));
+                               last_id = seg_id;
+                       }
+                       info->req_shadow[req->u.rw.id].req = *req;
+                       info->req_shadow[req->u.rw.id].request = copy->request;
+               }
+
+               info->reqring.req_prod_pvt++;
+       }
+
+       /* need update the queue limit setting */
+       update_blk_queue(info);
+
+       /* free original shadow*/
+       kfree(info->shadow);
+
+       xenbus_switch_state(info->xbdev, XenbusStateConnected);
+
+       spin_lock_irq(&info->io_lock);
+
+       /* Now safe for us to use the shared ring */
+       info->connected = BLKIF_STATE_CONNECTED;
+
+       /* Send off requeued requests */
+       flush_requests(info);
+
+       /* Kick any other new requests queued since we resumed */
+       kick_pending_request_queues(info);
+
+       spin_unlock_irq(&info->io_lock);
+
+       return 0;
+}
+
+static int recover_from_v2_to_v2(struct blkfront_info *info)
+{
+       int i;
+       struct blkif_request_header *req;
+       struct blkif_request_segment *segring_req;
+       struct blk_req_shadow *copy;
+       struct blk_seg_shadow *seg_copy;
+       unsigned long index = 0x0fffffff, seg_id, last_id = 0x0fffffff;
+       int j;
+       unsigned int req_rs, seg_rs;
+       unsigned long flags;
+
+       req_rs = BLK_REQ_RING_SIZE;
+       seg_rs = BLK_SEG_RING_SIZE;
+
+       /* Stage 1: Make a safe copy of the shadow state. */
+       copy = kmalloc(sizeof(struct blk_req_shadow) * req_rs,
+                      GFP_NOIO | __GFP_REPEAT | __GFP_HIGH);
+       if (!copy)
+               return -ENOMEM;
+
+       seg_copy = kmalloc(sizeof(struct blk_seg_shadow) * seg_rs,
+                          GFP_NOIO | __GFP_REPEAT | __GFP_HIGH);
+       if (!seg_copy ) {
+               kfree(copy);
+               return -ENOMEM;
+       }
+
+       memcpy(copy, info->req_shadow, sizeof(struct blk_req_shadow) * req_rs);
+       memcpy(seg_copy, info->seg_shadow,
+              sizeof(struct blk_seg_shadow) * seg_rs);
+
+       /* Stage 2: Set up free list. */
+        for (i = 0; i < req_rs; i++)
+                info->req_shadow[i].req.u.rw.id = i+1;
+        info->req_shadow[req_rs - 1].req.u.rw.id = 0x0fffffff;
+
+       for (i = 0; i < seg_rs; i++)
+               info->seg_shadow[i].id = i+1;
+       info->seg_shadow[seg_rs - 1].id = 0x0fffffff;
+
+       info->shadow_free = info->reqring.req_prod_pvt;
+       info->seg_shadow_free = info->segring.req_prod_pvt;
+
+       /* Stage 3: Find pending requests and requeue them. */
+       for (i = 0; i < req_rs; i++) {
+               /* Not in use? */
+               if (!copy[i].request)
+                       continue;
+
+               req = RING_GET_REQUEST(&info->reqring, 
info->reqring.req_prod_pvt);
+               *req = copy[i].req;
+
+               req->u.rw.id = get_id_from_freelist_v2(info);
+               memcpy(&info->req_shadow[req->u.rw.id], &copy[i], 
sizeof(copy[i]));
+
+               if (req->operation != BLKIF_OP_DISCARD) {
+                       for (j = 0; j < req->u.rw.nr_segments; j++) {
+                               seg_id = get_seg_shadow_id(info);
+                               if (j == 0)
+                                       index = req->u.rw.seg_id;
+                               else
+                                       index = seg_copy[index].id ;
+                               gnttab_grant_foreign_access_ref(
+                                       seg_copy[index].req.gref,
+                                       info->xbdev->otherend_id,
+                                       pfn_to_mfn(seg_copy[index].frame),
+                                       
rq_data_dir(info->req_shadow[req->u.rw.id].request));
+                               segring_req = RING_GET_REQUEST(&info->segring, 
info->segring.req_prod_pvt);
+                               memcpy(segring_req, &(seg_copy[index].req),
+                                      sizeof(struct blkif_request_segment));
+                               if (j == 0)
+                                       req->u.rw.seg_id = seg_id;
+                               else
+                                       info->seg_shadow[last_id].id = seg_id;
+
+                               memcpy(&info->seg_shadow[seg_id],
+                                      &seg_copy[index], sizeof(struct 
blk_seg_shadow));
+                               info->segring.req_prod_pvt++;
+                               last_id = seg_id;
+                       }
+               }
+               info->req_shadow[req->u.rw.id].req = *req;
+
+               info->reqring.req_prod_pvt++;
+       }
+
+       kfree(seg_copy);
+       kfree(copy);
+
+       xenbus_switch_state(info->xbdev, XenbusStateConnected);
+
+       spin_lock_irqsave(&info->io_lock, flags);
+
+       /* Now safe for us to use the shared ring */
+       info->connected = BLKIF_STATE_CONNECTED;
+
+       /* Send off requeued requests */
+       flush_requests(info);
+
+       /* Kick any other new requests queued since we resumed */
+       kick_pending_request_queues(info);
+
+       spin_unlock_irqrestore(&info->io_lock, flags);
+
+       return 0;
+}
+
+static int blkif_recover_v2(struct blkfront_info *info)
+{
+       int rc;
+
+       if (info->ring_type == RING_TYPE_1)
+               rc = recover_from_v1_to_v2(info);
+       else if (info->ring_type == RING_TYPE_2)
+               rc = recover_from_v2_to_v2(info);
+       else
+               rc = -EPERM;
+       return rc;
+}
 /**
  * We are reconnecting to the backend, due to a suspend/resume, or a backend
  * driver restart.  We tear down our blkif structure and recreate it, but
@@ -1609,15 +2371,44 @@ static struct blk_front_operations blk_front_ops = {
        .update_rsp_event = update_rsp_event,
        .update_rsp_cons = update_rsp_cons,
        .update_req_prod_pvt = update_req_prod_pvt,
+       .update_segment_rsp_cons = update_segment_rsp_cons,
        .ring_push = ring_push,
        .recover = blkif_recover,
        .ring_full = ring_full,
+       .segring_full = segring_full,
        .setup_blkring = setup_blkring,
        .free_blkring = free_blkring,
        .blkif_completion = blkif_completion,
        .max_seg = BLKIF_MAX_SEGMENTS_PER_REQUEST,
 };
 
+static struct blk_front_operations blk_front_ops_v2 = {
+       .ring_get_request = ring_get_request_v2,
+       .ring_get_response = ring_get_response_v2,
+       .ring_get_segment = ring_get_segment_v2,
+       .get_id = get_id_from_freelist_v2,
+       .add_id = add_id_to_freelist_v2,
+       .save_seg_shadow = save_seg_shadow_v2,
+       .save_req_shadow = save_req_shadow_v2,
+       .get_req_from_shadow = get_req_from_shadow_v2,
+       .get_rsp_prod = get_rsp_prod_v2,
+       .get_rsp_cons = get_rsp_cons_v2,
+       .get_req_prod_pvt = get_req_prod_pvt_v2,
+       .check_left_response = check_left_response_v2,
+       .update_rsp_event = update_rsp_event_v2,
+       .update_rsp_cons = update_rsp_cons_v2,
+       .update_req_prod_pvt = update_req_prod_pvt_v2,
+       .update_segment_rsp_cons = update_segment_rsp_cons_v2,
+       .ring_push = ring_push_v2,
+       .recover = blkif_recover_v2,
+       .ring_full = ring_full_v2,
+       .segring_full = segring_full_v2,
+       .setup_blkring = setup_blkring_v2,
+       .free_blkring = free_blkring_v2,
+       .blkif_completion = blkif_completion_v2,
+       .max_seg = BLKIF_MAX_SEGMENTS_PER_REQUEST_V2,
+};
+
 static const struct block_device_operations xlvbd_block_fops =
 {
        .owner = THIS_MODULE,
diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c
index f100ce2..a5a98b0 100644
--- a/drivers/xen/grant-table.c
+++ b/drivers/xen/grant-table.c
@@ -475,7 +475,7 @@ void gnttab_end_foreign_access(grant_ref_t ref, int 
readonly,
                /* XXX This needs to be fixed so that the ref and page are
                   placed on a list to be freed up later. */
                printk(KERN_WARNING
-                      "WARNING: leaking g.e. and page still in use!\n");
+                      "WARNING: ref %u leaking g.e. and page still in use!\n", 
ref);
        }
 }
 EXPORT_SYMBOL_GPL(gnttab_end_foreign_access);
diff --git a/include/xen/interface/io/blkif.h b/include/xen/interface/io/blkif.h
index ee338bf..763489a 100644
--- a/include/xen/interface/io/blkif.h
+++ b/include/xen/interface/io/blkif.h
@@ -108,6 +108,7 @@ typedef uint64_t blkif_sector_t;
  * NB. This could be 12 if the ring indexes weren't stored in the same page.
  */
 #define BLKIF_MAX_SEGMENTS_PER_REQUEST 11
+#define BLKIF_MAX_SEGMENTS_PER_REQUEST_V2 128
 
 struct blkif_request_rw {
        uint8_t        nr_segments;  /* number of segments                   */
@@ -125,6 +126,17 @@ struct blkif_request_rw {
        } seg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
 } __attribute__((__packed__));
 
+struct blkif_request_rw_header {
+       uint8_t        nr_segments;  /* number of segments                   */
+       blkif_vdev_t   handle;       /* only for read/write requests         */
+#ifdef CONFIG_X86_64
+       uint32_t       _pad1;        /* offsetof(blkif_request,u.rw.id) == 8 */
+#endif
+       uint64_t       id;           /* private guest value, echoed in resp  */
+       blkif_sector_t sector_number;/* start sector idx on disk (r/w only)  */
+       uint64_t       seg_id;       /* segment id in the segment shadow     */ 
+} __attribute__((__packed__));
+
 struct blkif_request_discard {
        uint8_t        flag;         /* BLKIF_DISCARD_SECURE or zero.        */
 #define BLKIF_DISCARD_SECURE (1<<0)  /* ignored if discard-secure=0          */
@@ -135,7 +147,6 @@ struct blkif_request_discard {
        uint64_t       id;           /* private guest value, echoed in resp  */
        blkif_sector_t sector_number;
        uint64_t       nr_sectors;
-       uint8_t        _pad3;
 } __attribute__((__packed__));
 
 struct blkif_request {
@@ -146,12 +157,24 @@ struct blkif_request {
        } u;
 } __attribute__((__packed__));
 
+struct blkif_request_header {
+       uint8_t        operation;    /* BLKIF_OP_???                         */
+       union {
+               struct blkif_request_rw_header rw;
+               struct blkif_request_discard discard;
+       } u;
+} __attribute__((__packed__));
+
 struct blkif_response {
        uint64_t        id;              /* copied from request */
        uint8_t         operation;       /* copied from request */
        int16_t         status;          /* BLKIF_RSP_???       */
 };
 
+struct blkif_response_segment {
+       char            dummy;
+} __attribute__((__packed__));
+
 /*
  * STATUS RETURN CODES.
  */
@@ -167,6 +190,8 @@ struct blkif_response {
  */
 
 DEFINE_RING_TYPES(blkif, struct blkif_request, struct blkif_response);
+DEFINE_RING_TYPES(blkif_request, struct blkif_request_header, struct 
blkif_response);
+DEFINE_RING_TYPES(blkif_segment, struct blkif_request_segment, struct 
blkif_response_segment);
 
 #define VDISK_CDROM        0x1
 #define VDISK_REMOVABLE    0x2
-ronghui


Attachment: vbd_enlarge_segments_02.patch
Description: vbd_enlarge_segments_02.patch

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.