[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [Xen-devel] [PATCH] vscsiif: allow larger segments-per-request values
On Tue, Nov 27, 2012 at 11:37:31AM +0000, Jan Beulich wrote: > At least certain tape devices require fixed size blocks to be operated > upon, i.e. breaking up of I/O requests is not permitted. Consequently > we need an interface extension that (leaving aside implementation > limitations) doesn't impose a limit on the number of segments that can > be associated with an individual request. > > This, in turn, excludes the blkif extension FreeBSD folks implemented, > as that still imposes an upper limit (the actual I/O request still > specifies the full number of segments - as an 8-bit quantity -, and > subsequent ring slots get used to carry the excess segment > descriptors). > > The alternative therefore is to allow the frontend to pre-set segment > descriptors _before_ actually issuing the I/O request. I/O will then > be done by the backend for the accumulated set of segments. How do you deal with migration to older backends? > > To properly associate segment preset operations with the main request, > the rqid-s between them should match (originally I had hoped to use > this to avoid producing individual responses for the pre-set > operations, but that turned out to violate the underlying shared ring > implementation). Right. If we could seperate those two it would be solve that. So seperate 'request' and 'response' ring. > > Negotiation of the maximum number of segments a particular backend > implementation supports happens through a new "segs-per-req" xenstore > node. No 'feature-segs-per-req'? > > Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx> > --- > As I have no plans to backport this to the 2.6.18 tree, I'm attaching > for reference the full kernel side patch we're intending to use. > > --- a/xen/include/public/io/vscsiif.h > +++ b/xen/include/public/io/vscsiif.h > @@ -34,6 +34,7 @@ > #define VSCSIIF_ACT_SCSI_CDB 1 /* SCSI CDB command */ > #define VSCSIIF_ACT_SCSI_ABORT 2 /* SCSI Device(Lun) Abort*/ > #define VSCSIIF_ACT_SCSI_RESET 3 /* SCSI Device(Lun) Reset*/ > +#define VSCSIIF_ACT_SCSI_SG_PRESET 4 /* Preset SG elements */ > > /* > * Maximum scatter/gather segments per request. > @@ -50,6 +51,12 @@ > #define VSCSIIF_MAX_COMMAND_SIZE 16 > #define VSCSIIF_SENSE_BUFFERSIZE 96 > > +struct scsiif_request_segment { > + grant_ref_t gref; > + uint16_t offset; > + uint16_t length; > +}; > +typedef struct scsiif_request_segment vscsiif_segment_t; > > struct vscsiif_request { > uint16_t rqid; /* private guest value, echoed in resp */ > @@ -66,18 +73,26 @@ struct vscsiif_request { > DMA_NONE(3) requests */ > uint8_t nr_segments; /* Number of pieces of scatter-gather > */ > > - struct scsiif_request_segment { > - grant_ref_t gref; > - uint16_t offset; > - uint16_t length; > - } seg[VSCSIIF_SG_TABLESIZE]; > + vscsiif_segment_t seg[VSCSIIF_SG_TABLESIZE]; > uint32_t reserved[3]; > }; > typedef struct vscsiif_request vscsiif_request_t; > > +#define VSCSIIF_SG_LIST_SIZE ((sizeof(vscsiif_request_t) - 4) \ > + / sizeof(vscsiif_segment_t)) > + > +struct vscsiif_sg_list { > + /* First two fields must match struct vscsiif_request! */ > + uint16_t rqid; /* private guest value, must match main req */ > + uint8_t act; /* VSCSIIF_ACT_SCSI_SG_PRESET */ > + uint8_t nr_segments; /* Number of pieces of scatter-gather */ > + vscsiif_segment_t seg[VSCSIIF_SG_LIST_SIZE]; > +}; > +typedef struct vscsiif_sg_list vscsiif_sg_list_t; > + > struct vscsiif_response { > uint16_t rqid; > - uint8_t padding; > + uint8_t act; /* valid only when backend supports SG_PRESET > */ > uint8_t sense_len; > uint8_t sense_buffer[VSCSIIF_SENSE_BUFFERSIZE]; > int32_t rslt; > > > > vscsiif: allow larger segments-per-request values > > At least certain tape devices require fixed size blocks to be operated > upon, i.e. breaking up of I/O requests is not permitted. Consequently > we need an interface extension that (leaving aside implementation > limitations) doesn't impose a limit on the number of segments that can > be associated with an individual request. > > This, in turn, excludes the blkif extension FreeBSD folks implemented, > as that still imposes an upper limit (the actual I/O request still > specifies the full number of segments - as an 8-bit quantity -, and > subsequent ring slots get used to carry the excess segment > descriptors). > > The alternative therefore is to allow the frontend to pre-set segment > descriptors _before_ actually issuing the I/O request. I/O will then > be done by the backend for the accumulated set of segments. > > To properly associate segment preset operations with the main request, > the rqid-s between them should match (originally I had hoped to use > this to avoid producing individual responses for the pre-set > operations, but that turned out to violate the underlying shared ring > implementation). > > Negotiation of the maximum number of segments a particular backend > implementation supports happens through a new "segs-per-req" xenstore > node. > > Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx> > --- > As I have no plans to backport this to the 2.6.18 tree, I'm attaching > for reference the full kernel side patch we're intending to use. > > --- a/xen/include/public/io/vscsiif.h > +++ b/xen/include/public/io/vscsiif.h > @@ -34,6 +34,7 @@ > #define VSCSIIF_ACT_SCSI_CDB 1 /* SCSI CDB command */ > #define VSCSIIF_ACT_SCSI_ABORT 2 /* SCSI Device(Lun) Abort*/ > #define VSCSIIF_ACT_SCSI_RESET 3 /* SCSI Device(Lun) Reset*/ > +#define VSCSIIF_ACT_SCSI_SG_PRESET 4 /* Preset SG elements */ > > /* > * Maximum scatter/gather segments per request. > @@ -50,6 +51,12 @@ > #define VSCSIIF_MAX_COMMAND_SIZE 16 > #define VSCSIIF_SENSE_BUFFERSIZE 96 > > +struct scsiif_request_segment { > + grant_ref_t gref; > + uint16_t offset; > + uint16_t length; > +}; > +typedef struct scsiif_request_segment vscsiif_segment_t; > > struct vscsiif_request { > uint16_t rqid; /* private guest value, echoed in resp */ > @@ -66,18 +73,26 @@ struct vscsiif_request { > DMA_NONE(3) requests */ > uint8_t nr_segments; /* Number of pieces of scatter-gather > */ > > - struct scsiif_request_segment { > - grant_ref_t gref; > - uint16_t offset; > - uint16_t length; > - } seg[VSCSIIF_SG_TABLESIZE]; > + vscsiif_segment_t seg[VSCSIIF_SG_TABLESIZE]; > uint32_t reserved[3]; > }; > typedef struct vscsiif_request vscsiif_request_t; > > +#define VSCSIIF_SG_LIST_SIZE ((sizeof(vscsiif_request_t) - 4) \ > + / sizeof(vscsiif_segment_t)) > + > +struct vscsiif_sg_list { > + /* First two fields must match struct vscsiif_request! */ > + uint16_t rqid; /* private guest value, must match main req */ > + uint8_t act; /* VSCSIIF_ACT_SCSI_SG_PRESET */ > + uint8_t nr_segments; /* Number of pieces of scatter-gather */ > + vscsiif_segment_t seg[VSCSIIF_SG_LIST_SIZE]; > +}; > +typedef struct vscsiif_sg_list vscsiif_sg_list_t; > + > struct vscsiif_response { > uint16_t rqid; > - uint8_t padding; > + uint8_t act; /* valid only when backend supports SG_PRESET > */ > uint8_t sense_len; > uint8_t sense_buffer[VSCSIIF_SENSE_BUFFERSIZE]; > int32_t rslt; > --- sle11sp3.orig/drivers/xen/scsiback/common.h 2012-06-06 > 13:53:26.000000000 +0200 > +++ sle11sp3/drivers/xen/scsiback/common.h 2012-11-22 14:55:58.000000000 > +0100 > @@ -94,10 +94,15 @@ struct vscsibk_info { > unsigned int waiting_reqs; > struct page **mmap_pages; > > + struct pending_req *preq; > + > + union { > + struct gnttab_map_grant_ref *gmap; > + struct gnttab_unmap_grant_ref *gunmap; > + }; > }; > > -typedef struct { > - unsigned char act; > +typedef struct pending_req { > struct vscsibk_info *info; > struct scsi_device *sdev; > > @@ -114,7 +119,8 @@ typedef struct { > > uint32_t request_bufflen; > struct scatterlist *sgl; > - grant_ref_t gref[VSCSIIF_SG_TABLESIZE]; > + grant_ref_t *gref; > + vscsiif_segment_t *segs; > > int32_t rslt; > uint32_t resid; > @@ -123,7 +129,7 @@ typedef struct { > struct list_head free_list; > } pending_req_t; > > - > +extern unsigned int vscsiif_segs; > > #define scsiback_get(_b) (atomic_inc(&(_b)->nr_unreplied_reqs)) > #define scsiback_put(_b) \ > @@ -163,7 +169,7 @@ void scsiback_release_translation_entry( > > void scsiback_cmd_exec(pending_req_t *pending_req); > void scsiback_do_resp_with_sense(char *sense_buffer, int32_t result, > - uint32_t resid, pending_req_t *pending_req); > + uint32_t resid, pending_req_t *, uint8_t act); > void scsiback_fast_flush_area(pending_req_t *req); > > void scsiback_rsp_emulation(pending_req_t *pending_req); > --- sle11sp3.orig/drivers/xen/scsiback/emulate.c 2012-01-11 > 12:14:54.000000000 +0100 > +++ sle11sp3/drivers/xen/scsiback/emulate.c 2012-11-22 14:29:27.000000000 > +0100 > @@ -352,7 +352,9 @@ void scsiback_req_emulation_or_cmdexec(p > else { > scsiback_fast_flush_area(pending_req); > scsiback_do_resp_with_sense(pending_req->sense_buffer, > - pending_req->rslt, pending_req->resid, pending_req); > + pending_req->rslt, > + pending_req->resid, pending_req, > + VSCSIIF_ACT_SCSI_CDB); > } > } > > --- sle11sp3.orig/drivers/xen/scsiback/interface.c 2011-10-10 > 11:58:37.000000000 +0200 > +++ sle11sp3/drivers/xen/scsiback/interface.c 2012-11-13 13:21:10.000000000 > +0100 > @@ -51,6 +51,13 @@ struct vscsibk_info *vscsibk_info_alloc( > if (!info) > return ERR_PTR(-ENOMEM); > > + info->gmap = kcalloc(max(sizeof(*info->gmap), sizeof(*info->gunmap)), > + vscsiif_segs, GFP_KERNEL); > + if (!info->gmap) { > + kfree(info); > + return ERR_PTR(-ENOMEM); > + } > + > info->domid = domid; > spin_lock_init(&info->ring_lock); > atomic_set(&info->nr_unreplied_reqs, 0); > @@ -120,6 +127,7 @@ void scsiback_disconnect(struct vscsibk_ > > void scsiback_free(struct vscsibk_info *info) > { > + kfree(info->gmap); > kmem_cache_free(scsiback_cachep, info); > } > > --- sle11sp3.orig/drivers/xen/scsiback/scsiback.c 2012-11-22 > 15:36:11.000000000 +0100 > +++ sle11sp3/drivers/xen/scsiback/scsiback.c 2012-11-22 15:36:16.000000000 > +0100 > @@ -56,6 +56,10 @@ int vscsiif_reqs = VSCSIIF_BACK_MAX_PEND > module_param_named(reqs, vscsiif_reqs, int, 0); > MODULE_PARM_DESC(reqs, "Number of scsiback requests to allocate"); > > +unsigned int vscsiif_segs = VSCSIIF_SG_TABLESIZE; > +module_param_named(segs, vscsiif_segs, uint, 0); > +MODULE_PARM_DESC(segs, "Number of segments to allow per request"); > + > static unsigned int log_print_stat = 0; > module_param(log_print_stat, int, 0644); > > @@ -67,7 +71,7 @@ static grant_handle_t *pending_grant_han > > static int vaddr_pagenr(pending_req_t *req, int seg) > { > - return (req - pending_reqs) * VSCSIIF_SG_TABLESIZE + seg; > + return (req - pending_reqs) * vscsiif_segs + seg; > } > > static unsigned long vaddr(pending_req_t *req, int seg) > @@ -82,7 +86,7 @@ static unsigned long vaddr(pending_req_t > > void scsiback_fast_flush_area(pending_req_t *req) > { > - struct gnttab_unmap_grant_ref unmap[VSCSIIF_SG_TABLESIZE]; > + struct gnttab_unmap_grant_ref *unmap = req->info->gunmap; > unsigned int i, invcount = 0; > grant_handle_t handle; > int err; > @@ -117,6 +121,7 @@ static pending_req_t * alloc_req(struct > if (!list_empty(&pending_free)) { > req = list_entry(pending_free.next, pending_req_t, free_list); > list_del(&req->free_list); > + req->nr_segments = 0; > } > spin_unlock_irqrestore(&pending_free_lock, flags); > return req; > @@ -144,7 +149,8 @@ static void scsiback_notify_work(struct > } > > void scsiback_do_resp_with_sense(char *sense_buffer, int32_t result, > - uint32_t resid, pending_req_t *pending_req) > + uint32_t resid, pending_req_t *pending_req, > + uint8_t act) > { > vscsiif_response_t *ring_res; > struct vscsibk_info *info = pending_req->info; > @@ -159,6 +165,7 @@ void scsiback_do_resp_with_sense(char *s > ring_res = RING_GET_RESPONSE(&info->ring, info->ring.rsp_prod_pvt); > info->ring.rsp_prod_pvt++; > > + ring_res->act = act; > ring_res->rslt = result; > ring_res->rqid = pending_req->rqid; > > @@ -186,7 +193,8 @@ void scsiback_do_resp_with_sense(char *s > if (notify) > notify_remote_via_irq(info->irq); > > - free_req(pending_req); > + if (act != VSCSIIF_ACT_SCSI_SG_PRESET) > + free_req(pending_req); > } > > static void scsiback_print_status(char *sense_buffer, int errors, > @@ -225,25 +233,25 @@ static void scsiback_cmd_done(struct req > scsiback_rsp_emulation(pending_req); > > scsiback_fast_flush_area(pending_req); > - scsiback_do_resp_with_sense(sense_buffer, errors, resid, pending_req); > + scsiback_do_resp_with_sense(sense_buffer, errors, resid, pending_req, > + VSCSIIF_ACT_SCSI_CDB); > scsiback_put(pending_req->info); > > __blk_put_request(req->q, req); > } > > > -static int scsiback_gnttab_data_map(vscsiif_request_t *ring_req, > - pending_req_t *pending_req) > +static int scsiback_gnttab_data_map(const vscsiif_segment_t *segs, > + unsigned int nr_segs, > + pending_req_t *pending_req) > { > u32 flags; > - int write; > - int i, err = 0; > - unsigned int data_len = 0; > - struct gnttab_map_grant_ref map[VSCSIIF_SG_TABLESIZE]; > + int write, err = 0; > + unsigned int i, j, data_len = 0; > struct vscsibk_info *info = pending_req->info; > - > + struct gnttab_map_grant_ref *map = info->gmap; > int data_dir = (int)pending_req->sc_data_direction; > - unsigned int nr_segments = (unsigned int)pending_req->nr_segments; > + unsigned int nr_segments = pending_req->nr_segments + nr_segs; > > write = (data_dir == DMA_TO_DEVICE); > > @@ -264,14 +272,20 @@ static int scsiback_gnttab_data_map(vscs > if (write) > flags |= GNTMAP_readonly; > > - for (i = 0; i < nr_segments; i++) > + for (i = 0; i < pending_req->nr_segments; i++) > gnttab_set_map_op(&map[i], vaddr(pending_req, i), flags, > - ring_req->seg[i].gref, > + pending_req->segs[i].gref, > + info->domid); > + for (j = 0; i < nr_segments; i++, j++) > + gnttab_set_map_op(&map[i], vaddr(pending_req, i), flags, > + segs[j].gref, > info->domid); > > + > err = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, map, > nr_segments); > BUG_ON(err); > > + j = 0; > for_each_sg (pending_req->sgl, sg, nr_segments, i) { > struct page *pg; > > @@ -294,8 +308,15 @@ static int scsiback_gnttab_data_map(vscs > set_phys_to_machine(page_to_pfn(pg), > FOREIGN_FRAME(map[i].dev_bus_addr >> > PAGE_SHIFT)); > > - sg_set_page(sg, pg, ring_req->seg[i].length, > - ring_req->seg[i].offset); > + if (i < pending_req->nr_segments) > + sg_set_page(sg, pg, > + pending_req->segs[i].length, > + pending_req->segs[i].offset); > + else { > + sg_set_page(sg, pg, segs[j].length, > + segs[j].offset); > + ++j; > + } > data_len += sg->length; > > barrier(); > @@ -306,6 +327,8 @@ static int scsiback_gnttab_data_map(vscs > > } > > + pending_req->nr_segments = nr_segments; > + > if (err) > goto fail_flush; > } > @@ -471,7 +494,8 @@ static void scsiback_device_reset_exec(p > scsiback_get(info); > err = scsi_reset_provider(sdev, SCSI_TRY_RESET_DEVICE); > > - scsiback_do_resp_with_sense(NULL, err, 0, pending_req); > + scsiback_do_resp_with_sense(NULL, err, 0, pending_req, > + VSCSIIF_ACT_SCSI_RESET); > scsiback_put(info); > > return; > @@ -489,13 +513,11 @@ static int prepare_pending_reqs(struct v > { > struct scsi_device *sdev; > struct ids_tuple vir; > + unsigned int nr_segs; > int err = -EINVAL; > > DPRINTK("%s\n",__FUNCTION__); > > - pending_req->rqid = ring_req->rqid; > - pending_req->act = ring_req->act; > - > pending_req->info = info; > > pending_req->v_chn = vir.chn = ring_req->channel; > @@ -525,11 +547,10 @@ static int prepare_pending_reqs(struct v > goto invalid_value; > } > > - pending_req->nr_segments = ring_req->nr_segments; > + nr_segs = ring_req->nr_segments; > barrier(); > - if (pending_req->nr_segments > VSCSIIF_SG_TABLESIZE) { > - DPRINTK("scsiback: invalid parameter nr_seg = %d\n", > - pending_req->nr_segments); > + if (pending_req->nr_segments + nr_segs > vscsiif_segs) { > + DPRINTK("scsiback: invalid nr_segs = %u\n", nr_segs); > err = -EINVAL; > goto invalid_value; > } > @@ -546,7 +567,7 @@ static int prepare_pending_reqs(struct v > > pending_req->timeout_per_command = ring_req->timeout_per_command; > > - if(scsiback_gnttab_data_map(ring_req, pending_req)) { > + if (scsiback_gnttab_data_map(ring_req->seg, nr_segs, pending_req)) { > DPRINTK("scsiback: invalid buffer\n"); > err = -EINVAL; > goto invalid_value; > @@ -558,6 +579,20 @@ invalid_value: > return err; > } > > +static void latch_segments(pending_req_t *pending_req, > + const struct vscsiif_sg_list *sgl) > +{ > + unsigned int nr_segs = sgl->nr_segments; > + > + barrier(); > + if (pending_req->nr_segments + nr_segs <= vscsiif_segs) { > + memcpy(pending_req->segs + pending_req->nr_segments, > + sgl->seg, nr_segs * sizeof(*sgl->seg)); > + pending_req->nr_segments += nr_segs; > + } > + else > + DPRINTK("scsiback: invalid nr_segs = %u\n", nr_segs); > +} > > static int _scsiback_do_cmd_fn(struct vscsibk_info *info) > { > @@ -575,9 +610,11 @@ static int _scsiback_do_cmd_fn(struct vs > rmb(); > > while ((rc != rp)) { > + int act, rqid; > + > if (RING_REQUEST_CONS_OVERFLOW(ring, rc)) > break; > - pending_req = alloc_req(info); > + pending_req = info->preq ?: alloc_req(info); > if (NULL == pending_req) { > more_to_do = 1; > break; > @@ -586,32 +623,55 @@ static int _scsiback_do_cmd_fn(struct vs > ring_req = RING_GET_REQUEST(ring, rc); > ring->req_cons = ++rc; > > + act = ring_req->act; > + rqid = ring_req->rqid; > + barrier(); > + if (!pending_req->nr_segments) > + pending_req->rqid = rqid; > + else if (pending_req->rqid != rqid) > + DPRINTK("scsiback: invalid rqid %04x, expected %04x\n", > + rqid, pending_req->rqid); > + > + info->preq = NULL; > + if (pending_req->rqid != rqid) { > + scsiback_do_resp_with_sense(NULL, DRIVER_INVALID << 24, > + 0, pending_req, act); > + continue; > + } > + > + if (act == VSCSIIF_ACT_SCSI_SG_PRESET) { > + latch_segments(pending_req, (void *)ring_req); > + info->preq = pending_req; > + scsiback_do_resp_with_sense(NULL, 0, 0, > + pending_req, act); > + continue; > + } > + > err = prepare_pending_reqs(info, ring_req, > pending_req); > if (err == -EINVAL) { > scsiback_do_resp_with_sense(NULL, (DRIVER_ERROR << 24), > - 0, pending_req); > + 0, pending_req, act); > continue; > } else if (err == -ENODEV) { > scsiback_do_resp_with_sense(NULL, (DID_NO_CONNECT << > 16), > - 0, pending_req); > + 0, pending_req, act); > continue; > } > > - if (pending_req->act == VSCSIIF_ACT_SCSI_CDB) { > - > + if (act == VSCSIIF_ACT_SCSI_CDB) { > /* The Host mode is through as for Emulation. */ > if (info->feature == VSCSI_TYPE_HOST) > scsiback_cmd_exec(pending_req); > else > scsiback_req_emulation_or_cmdexec(pending_req); > > - } else if (pending_req->act == VSCSIIF_ACT_SCSI_RESET) { > + } else if (act == VSCSIIF_ACT_SCSI_RESET) { > scsiback_device_reset_exec(pending_req); > } else { > pr_err("scsiback: invalid parameter for request\n"); > scsiback_do_resp_with_sense(NULL, (DRIVER_ERROR << 24), > - 0, pending_req); > + 0, pending_req, act); > continue; > } > } > @@ -673,17 +733,32 @@ static int __init scsiback_init(void) > if (!is_running_on_xen()) > return -ENODEV; > > - mmap_pages = vscsiif_reqs * VSCSIIF_SG_TABLESIZE; > + if (vscsiif_segs < VSCSIIF_SG_TABLESIZE) > + vscsiif_segs = VSCSIIF_SG_TABLESIZE; > + if (vscsiif_segs != (uint8_t)vscsiif_segs) > + return -EINVAL; > + mmap_pages = vscsiif_reqs * vscsiif_segs; > > pending_reqs = kzalloc(sizeof(pending_reqs[0]) * > vscsiif_reqs, GFP_KERNEL); > + if (!pending_reqs) > + return -ENOMEM; > pending_grant_handles = kmalloc(sizeof(pending_grant_handles[0]) * > mmap_pages, GFP_KERNEL); > pending_pages = alloc_empty_pages_and_pagevec(mmap_pages); > > - if (!pending_reqs || !pending_grant_handles || !pending_pages) > + if (!pending_grant_handles || !pending_pages) > goto out_of_memory; > > + for (i = 0; i < vscsiif_reqs; ++i) { > + pending_reqs[i].gref = kcalloc(sizeof(*pending_reqs->gref), > + vscsiif_segs, GFP_KERNEL); > + pending_reqs[i].segs = kcalloc(sizeof(*pending_reqs->segs), > + vscsiif_segs, GFP_KERNEL); > + if (!pending_reqs[i].gref || !pending_reqs[i].segs) > + goto out_of_memory; > + } > + > for (i = 0; i < mmap_pages; i++) > pending_grant_handles[i] = SCSIBACK_INVALID_HANDLE; > > @@ -705,6 +780,10 @@ static int __init scsiback_init(void) > out_interface: > scsiback_interface_exit(); > out_of_memory: > + for (i = 0; i < vscsiif_reqs; ++i) { > + kfree(pending_reqs[i].gref); > + kfree(pending_reqs[i].segs); > + } > kfree(pending_reqs); > kfree(pending_grant_handles); > free_empty_pages_and_pagevec(pending_pages, mmap_pages); > @@ -715,12 +794,17 @@ out_of_memory: > #if 0 > static void __exit scsiback_exit(void) > { > + unsigned int i; > + > scsiback_xenbus_unregister(); > scsiback_interface_exit(); > + for (i = 0; i < vscsiif_reqs; ++i) { > + kfree(pending_reqs[i].gref); > + kfree(pending_reqs[i].segs); > + } > kfree(pending_reqs); > kfree(pending_grant_handles); > - free_empty_pages_and_pagevec(pending_pages, (vscsiif_reqs * > VSCSIIF_SG_TABLESIZE)); > - > + free_empty_pages_and_pagevec(pending_pages, vscsiif_reqs * > vscsiif_segs); > } > #endif > > --- sle11sp3.orig/drivers/xen/scsiback/xenbus.c 2011-06-30 > 17:04:59.000000000 +0200 > +++ sle11sp3/drivers/xen/scsiback/xenbus.c 2012-11-13 14:36:16.000000000 > +0100 > @@ -339,6 +339,13 @@ static int scsiback_probe(struct xenbus_ > if (val) > be->info->feature = VSCSI_TYPE_HOST; > > + if (vscsiif_segs > VSCSIIF_SG_TABLESIZE) { > + err = xenbus_printf(XBT_NIL, dev->nodename, "segs-per-req", > + "%u", vscsiif_segs); > + if (err) > + xenbus_dev_error(dev, err, "writing segs-per-req"); > + } > + > err = xenbus_switch_state(dev, XenbusStateInitWait); > if (err) > goto fail; > --- sle11sp3.orig/drivers/xen/scsifront/common.h 2011-01-31 > 17:29:16.000000000 +0100 > +++ sle11sp3/drivers/xen/scsifront/common.h 2012-11-22 13:45:50.000000000 > +0100 > @@ -95,7 +95,7 @@ struct vscsifrnt_shadow { > > /* requested struct scsi_cmnd is stored from kernel */ > unsigned long req_scsi_cmnd; > - int gref[VSCSIIF_SG_TABLESIZE]; > + int gref[SG_ALL]; > }; > > struct vscsifrnt_info { > @@ -110,7 +110,6 @@ struct vscsifrnt_info { > > grant_ref_t ring_ref; > struct vscsiif_front_ring ring; > - struct vscsiif_response ring_res; > > struct vscsifrnt_shadow shadow[VSCSIIF_MAX_REQS]; > uint32_t shadow_free; > @@ -119,6 +118,12 @@ struct vscsifrnt_info { > wait_queue_head_t wq; > unsigned int waiting_resp; > > + struct { > + struct scsi_cmnd *sc; > + unsigned int rqid; > + unsigned int done; > + vscsiif_segment_t segs[]; > + } active; > }; > > #define DPRINTK(_f, _a...) \ > --- sle11sp3.orig/drivers/xen/scsifront/scsifront.c 2011-06-28 > 18:57:14.000000000 +0200 > +++ sle11sp3/drivers/xen/scsifront/scsifront.c 2012-11-22 > 16:37:35.000000000 +0100 > @@ -106,6 +106,66 @@ irqreturn_t scsifront_intr(int irq, void > return IRQ_HANDLED; > } > > +static bool push_cmd_to_ring(struct vscsifrnt_info *info, > + vscsiif_request_t *ring_req) > +{ > + unsigned int left, rqid = info->active.rqid; > + struct scsi_cmnd *sc; > + > + for (; ; ring_req = NULL) { > + struct vscsiif_sg_list *sgl; > + > + if (!ring_req) { > + struct vscsiif_front_ring *ring = &info->ring; > + > + ring_req = RING_GET_REQUEST(ring, ring->req_prod_pvt); > + ring->req_prod_pvt++; > + ring_req->rqid = rqid; > + } > + > + left = info->shadow[rqid].nr_segments - info->active.done; > + if (left <= VSCSIIF_SG_TABLESIZE) > + break; > + > + sgl = (void *)ring_req; > + sgl->act = VSCSIIF_ACT_SCSI_SG_PRESET; > + > + if (left > VSCSIIF_SG_LIST_SIZE) > + left = VSCSIIF_SG_LIST_SIZE; > + memcpy(sgl->seg, info->active.segs + info->active.done, > + left * sizeof(*sgl->seg)); > + > + sgl->nr_segments = left; > + info->active.done += left; > + > + if (RING_FULL(&info->ring)) > + return false; > + } > + > + sc = info->active.sc; > + > + ring_req->act = VSCSIIF_ACT_SCSI_CDB; > + ring_req->id = sc->device->id; > + ring_req->lun = sc->device->lun; > + ring_req->channel = sc->device->channel; > + ring_req->cmd_len = sc->cmd_len; > + > + if ( sc->cmd_len ) > + memcpy(ring_req->cmnd, sc->cmnd, sc->cmd_len); > + else > + memset(ring_req->cmnd, 0, VSCSIIF_MAX_COMMAND_SIZE); > + > + ring_req->sc_data_direction = sc->sc_data_direction; > + ring_req->timeout_per_command = sc->request->timeout / HZ; > + ring_req->nr_segments = left; > + > + memcpy(ring_req->seg, info->active.segs + info->active.done, > + left * sizeof(*ring_req->seg)); > + > + info->active.sc = NULL; > + > + return !RING_FULL(&info->ring); > +} > > static void scsifront_gnttab_done(struct vscsifrnt_shadow *s, uint32_t id) > { > @@ -194,6 +254,16 @@ int scsifront_cmd_done(struct vscsifrnt_ > > ring_res = RING_GET_RESPONSE(&info->ring, i); > > + if (info->host->sg_tablesize > VSCSIIF_SG_TABLESIZE) { > + u8 act = ring_res->act; > + > + if (act == VSCSIIF_ACT_SCSI_SG_PRESET) > + continue; > + if (act != info->shadow[ring_res->rqid].act) > + DPRINTK("Bogus backend response (%02x vs > %02x)\n", > + act, info->shadow[ring_res->rqid].act); > + } > + > if (info->shadow[ring_res->rqid].act == VSCSIIF_ACT_SCSI_CDB) > scsifront_cdb_cmd_done(info, ring_res); > else > @@ -208,8 +278,16 @@ int scsifront_cmd_done(struct vscsifrnt_ > info->ring.sring->rsp_event = i + 1; > } > > - spin_unlock_irqrestore(&info->io_lock, flags); > + spin_unlock(&info->io_lock); > + > + spin_lock(info->host->host_lock); > + > + if (info->active.sc && !RING_FULL(&info->ring)) { > + push_cmd_to_ring(info, NULL); > + scsifront_do_request(info); > + } > > + spin_unlock_irqrestore(info->host->host_lock, flags); > > /* Yield point for this unbounded loop. */ > cond_resched(); > @@ -242,7 +320,8 @@ int scsifront_schedule(void *data) > > > static int map_data_for_request(struct vscsifrnt_info *info, > - struct scsi_cmnd *sc, vscsiif_request_t *ring_req, uint32_t id) > + struct scsi_cmnd *sc, > + struct vscsifrnt_shadow *shadow) > { > grant_ref_t gref_head; > struct page *page; > @@ -254,7 +333,7 @@ static int map_data_for_request(struct v > if (sc->sc_data_direction == DMA_NONE) > return 0; > > - err = gnttab_alloc_grant_references(VSCSIIF_SG_TABLESIZE, &gref_head); > + err = gnttab_alloc_grant_references(info->host->sg_tablesize, > &gref_head); > if (err) { > pr_err("scsifront: gnttab_alloc_grant_references() error\n"); > return -ENOMEM; > @@ -266,7 +345,7 @@ static int map_data_for_request(struct v > unsigned int data_len = scsi_bufflen(sc); > > nr_pages = (data_len + sgl->offset + PAGE_SIZE - 1) >> > PAGE_SHIFT; > - if (nr_pages > VSCSIIF_SG_TABLESIZE) { > + if (nr_pages > info->host->sg_tablesize) { > pr_err("scsifront: Unable to map request_buffer for > command!\n"); > ref_cnt = (-E2BIG); > goto big_to_sg; > @@ -294,10 +373,10 @@ static int map_data_for_request(struct v > gnttab_grant_foreign_access_ref(ref, > info->dev->otherend_id, > buffer_pfn, write); > > - info->shadow[id].gref[ref_cnt] = ref; > - ring_req->seg[ref_cnt].gref = ref; > - ring_req->seg[ref_cnt].offset = (uint16_t)off; > - ring_req->seg[ref_cnt].length = > (uint16_t)bytes; > + shadow->gref[ref_cnt] = ref; > + info->active.segs[ref_cnt].gref = ref; > + info->active.segs[ref_cnt].offset = off; > + info->active.segs[ref_cnt].length = bytes; > > buffer_pfn++; > len -= bytes; > @@ -336,34 +415,27 @@ static int scsifront_queuecommand(struct > return SCSI_MLQUEUE_HOST_BUSY; > } > > + if (info->active.sc && !push_cmd_to_ring(info, NULL)) { > + scsifront_do_request(info); > + spin_unlock_irqrestore(shost->host_lock, flags); > + return SCSI_MLQUEUE_HOST_BUSY; > + } > + > sc->result = 0; > > ring_req = scsifront_pre_request(info); > rqid = ring_req->rqid; > - ring_req->act = VSCSIIF_ACT_SCSI_CDB; > - > - ring_req->id = sc->device->id; > - ring_req->lun = sc->device->lun; > - ring_req->channel = sc->device->channel; > - ring_req->cmd_len = sc->cmd_len; > > BUG_ON(sc->cmd_len > VSCSIIF_MAX_COMMAND_SIZE); > > - if ( sc->cmd_len ) > - memcpy(ring_req->cmnd, sc->cmnd, sc->cmd_len); > - else > - memset(ring_req->cmnd, 0, VSCSIIF_MAX_COMMAND_SIZE); > - > - ring_req->sc_data_direction = (uint8_t)sc->sc_data_direction; > - ring_req->timeout_per_command = (sc->request->timeout / HZ); > - > info->shadow[rqid].req_scsi_cmnd = (unsigned long)sc; > info->shadow[rqid].sc_data_direction = sc->sc_data_direction; > - info->shadow[rqid].act = ring_req->act; > + info->shadow[rqid].act = VSCSIIF_ACT_SCSI_CDB; > > - ref_cnt = map_data_for_request(info, sc, ring_req, rqid); > + ref_cnt = map_data_for_request(info, sc, &info->shadow[rqid]); > if (ref_cnt < 0) { > add_id_to_freelist(info, rqid); > + scsifront_do_request(info); > spin_unlock_irqrestore(shost->host_lock, flags); > if (ref_cnt == (-ENOMEM)) > return SCSI_MLQUEUE_HOST_BUSY; > @@ -372,9 +444,13 @@ static int scsifront_queuecommand(struct > return 0; > } > > - ring_req->nr_segments = (uint8_t)ref_cnt; > info->shadow[rqid].nr_segments = ref_cnt; > > + info->active.sc = sc; > + info->active.rqid = rqid; > + info->active.done = 0; > + push_cmd_to_ring(info, ring_req); > + > scsifront_do_request(info); > spin_unlock_irqrestore(shost->host_lock, flags); > > --- sle11sp3.orig/drivers/xen/scsifront/xenbus.c 2012-10-02 > 14:32:45.000000000 +0200 > +++ sle11sp3/drivers/xen/scsifront/xenbus.c 2012-11-21 13:35:47.000000000 > +0100 > @@ -43,6 +43,10 @@ > #define DEFAULT_TASK_COMM_LEN TASK_COMM_LEN > #endif > > +static unsigned int max_nr_segs = VSCSIIF_SG_TABLESIZE; > +module_param_named(max_segs, max_nr_segs, uint, 0); > +MODULE_PARM_DESC(max_segs, "Maximum number of segments per request"); > + > extern struct scsi_host_template scsifront_sht; > > static void scsifront_free(struct vscsifrnt_info *info) > @@ -181,7 +185,9 @@ static int scsifront_probe(struct xenbus > int i, err = -ENOMEM; > char name[DEFAULT_TASK_COMM_LEN]; > > - host = scsi_host_alloc(&scsifront_sht, sizeof(*info)); > + host = scsi_host_alloc(&scsifront_sht, > + offsetof(struct vscsifrnt_info, > + active.segs[max_nr_segs])); > if (!host) { > xenbus_dev_fatal(dev, err, "fail to allocate scsi host"); > return err; > @@ -223,7 +229,7 @@ static int scsifront_probe(struct xenbus > host->max_id = VSCSIIF_MAX_TARGET; > host->max_channel = 0; > host->max_lun = VSCSIIF_MAX_LUN; > - host->max_sectors = (VSCSIIF_SG_TABLESIZE - 1) * PAGE_SIZE / 512; > + host->max_sectors = (host->sg_tablesize - 1) * PAGE_SIZE / 512; > host->max_cmd_len = VSCSIIF_MAX_COMMAND_SIZE; > > err = scsi_add_host(host, &dev->dev); > @@ -278,6 +284,23 @@ static int scsifront_disconnect(struct v > return 0; > } > > +static void scsifront_read_backend_params(struct xenbus_device *dev, > + struct vscsifrnt_info *info) > +{ > + unsigned int nr_segs; > + int ret; > + struct Scsi_Host *host = info->host; > + > + ret = xenbus_scanf(XBT_NIL, dev->otherend, "segs-per-req", "%u", > + &nr_segs); > + if (ret == 1 && nr_segs > host->sg_tablesize) { > + host->sg_tablesize = min(nr_segs, max_nr_segs); > + dev_info(&dev->dev, "using up to %d SG entries\n", > + host->sg_tablesize); > + host->max_sectors = (host->sg_tablesize - 1) * PAGE_SIZE / 512; > + } > +} > + > #define VSCSIFRONT_OP_ADD_LUN 1 > #define VSCSIFRONT_OP_DEL_LUN 2 > > @@ -368,6 +391,7 @@ static void scsifront_backend_changed(st > break; > > case XenbusStateConnected: > + scsifront_read_backend_params(dev, info); > if (xenbus_read_driver_state(dev->nodename) == > XenbusStateInitialised) { > scsifront_do_lun_hotplug(info, VSCSIFRONT_OP_ADD_LUN); > @@ -413,8 +437,13 @@ static DEFINE_XENBUS_DRIVER(scsifront, , > .otherend_changed = scsifront_backend_changed, > ); > > -int scsifront_xenbus_init(void) > +int __init scsifront_xenbus_init(void) > { > + if (max_nr_segs > SG_ALL) > + max_nr_segs = SG_ALL; > + if (max_nr_segs < VSCSIIF_SG_TABLESIZE) > + max_nr_segs = VSCSIIF_SG_TABLESIZE; > + > return xenbus_register_frontend(&scsifront_driver); > } > > --- sle11sp3.orig/include/xen/interface/io/vscsiif.h 2008-07-21 > 11:00:33.000000000 +0200 > +++ sle11sp3/include/xen/interface/io/vscsiif.h 2012-11-22 > 14:32:31.000000000 +0100 > @@ -34,6 +34,7 @@ > #define VSCSIIF_ACT_SCSI_CDB 1 /* SCSI CDB command */ > #define VSCSIIF_ACT_SCSI_ABORT 2 /* SCSI Device(Lun) Abort*/ > #define VSCSIIF_ACT_SCSI_RESET 3 /* SCSI Device(Lun) Reset*/ > +#define VSCSIIF_ACT_SCSI_SG_PRESET 4 /* Preset SG elements */ > > > #define VSCSIIF_BACK_MAX_PENDING_REQS 128 > @@ -53,6 +54,12 @@ > #define VSCSIIF_MAX_COMMAND_SIZE 16 > #define VSCSIIF_SENSE_BUFFERSIZE 96 > > +struct scsiif_request_segment { > + grant_ref_t gref; > + uint16_t offset; > + uint16_t length; > +}; > +typedef struct scsiif_request_segment vscsiif_segment_t; > > struct vscsiif_request { > uint16_t rqid; /* private guest value, echoed in resp */ > @@ -69,18 +76,26 @@ struct vscsiif_request { > DMA_NONE(3) requests */ > uint8_t nr_segments; /* Number of pieces of scatter-gather > */ > > - struct scsiif_request_segment { > - grant_ref_t gref; > - uint16_t offset; > - uint16_t length; > - } seg[VSCSIIF_SG_TABLESIZE]; > + vscsiif_segment_t seg[VSCSIIF_SG_TABLESIZE]; > uint32_t reserved[3]; > }; > typedef struct vscsiif_request vscsiif_request_t; > > +#define VSCSIIF_SG_LIST_SIZE ((sizeof(vscsiif_request_t) - 4) \ > + / sizeof(vscsiif_segment_t)) > + > +struct vscsiif_sg_list { > + /* First two fields must match struct vscsiif_request! */ > + uint16_t rqid; /* private guest value, must match main req */ > + uint8_t act; /* VSCSIIF_ACT_SCSI_SG_PRESET */ > + uint8_t nr_segments; /* Number of pieces of scatter-gather */ > + vscsiif_segment_t seg[VSCSIIF_SG_LIST_SIZE]; > +}; > +typedef struct vscsiif_sg_list vscsiif_sg_list_t; > + > struct vscsiif_response { > uint16_t rqid; > - uint8_t padding; > + uint8_t act; /* valid only when backend supports SG_PRESET > */ > uint8_t sense_len; > uint8_t sense_buffer[VSCSIIF_SENSE_BUFFERSIZE]; > int32_t rslt; > _______________________________________________ > Xen-devel mailing list > Xen-devel@xxxxxxxxxxxxx > http://lists.xen.org/xen-devel _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxx http://lists.xen.org/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |