[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [PATCH] vscsiif: allow larger segments-per-request values



On Tue, Nov 27, 2012 at 11:37:31AM +0000, Jan Beulich wrote:
> At least certain tape devices require fixed size blocks to be operated
> upon, i.e. breaking up of I/O requests is not permitted. Consequently
> we need an interface extension that (leaving aside implementation
> limitations) doesn't impose a limit on the number of segments that can
> be associated with an individual request.
> 
> This, in turn, excludes the blkif extension FreeBSD folks implemented,
> as that still imposes an upper limit (the actual I/O request still
> specifies the full number of segments - as an 8-bit quantity -, and
> subsequent ring slots get used to carry the excess segment
> descriptors).
> 
> The alternative therefore is to allow the frontend to pre-set segment
> descriptors _before_ actually issuing the I/O request. I/O will then
> be done by the backend for the accumulated set of segments.

How do you deal with migration to older backends?
> 
> To properly associate segment preset operations with the main request,
> the rqid-s between them should match (originally I had hoped to use
> this to avoid producing individual responses for the pre-set
> operations, but that turned out to violate the underlying shared ring
> implementation).

Right. If we could seperate those two it would be solve that.
So seperate 'request' and 'response' ring.


> 
> Negotiation of the maximum number of segments a particular backend
> implementation supports happens through a new "segs-per-req" xenstore
> node.

No 'feature-segs-per-req'?

> 
> Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
> ---
> As I have no plans to backport this to the 2.6.18 tree, I'm attaching
> for reference the full kernel side patch we're intending to use.

> 
> --- a/xen/include/public/io/vscsiif.h
> +++ b/xen/include/public/io/vscsiif.h
> @@ -34,6 +34,7 @@
>  #define VSCSIIF_ACT_SCSI_CDB         1    /* SCSI CDB command */
>  #define VSCSIIF_ACT_SCSI_ABORT       2    /* SCSI Device(Lun) Abort*/
>  #define VSCSIIF_ACT_SCSI_RESET       3    /* SCSI Device(Lun) Reset*/
> +#define VSCSIIF_ACT_SCSI_SG_PRESET   4    /* Preset SG elements */
>  
>  /*
>   * Maximum scatter/gather segments per request.
> @@ -50,6 +51,12 @@
>  #define VSCSIIF_MAX_COMMAND_SIZE         16
>  #define VSCSIIF_SENSE_BUFFERSIZE         96
>  
> +struct scsiif_request_segment {
> +    grant_ref_t gref;
> +    uint16_t offset;
> +    uint16_t length;
> +};
> +typedef struct scsiif_request_segment vscsiif_segment_t;
>  
>  struct vscsiif_request {
>      uint16_t rqid;          /* private guest value, echoed in resp  */
> @@ -66,18 +73,26 @@ struct vscsiif_request {
>                                           DMA_NONE(3) requests  */
>      uint8_t nr_segments;              /* Number of pieces of scatter-gather 
> */
>  
> -    struct scsiif_request_segment {
> -        grant_ref_t gref;
> -        uint16_t offset;
> -        uint16_t length;
> -    } seg[VSCSIIF_SG_TABLESIZE];
> +    vscsiif_segment_t seg[VSCSIIF_SG_TABLESIZE];
>      uint32_t reserved[3];
>  };
>  typedef struct vscsiif_request vscsiif_request_t;
>  
> +#define VSCSIIF_SG_LIST_SIZE ((sizeof(vscsiif_request_t) - 4) \
> +                              / sizeof(vscsiif_segment_t))
> +
> +struct vscsiif_sg_list {
> +    /* First two fields must match struct vscsiif_request! */
> +    uint16_t rqid;          /* private guest value, must match main req */
> +    uint8_t act;            /* VSCSIIF_ACT_SCSI_SG_PRESET */
> +    uint8_t nr_segments;    /* Number of pieces of scatter-gather */
> +    vscsiif_segment_t seg[VSCSIIF_SG_LIST_SIZE];
> +};
> +typedef struct vscsiif_sg_list vscsiif_sg_list_t;
> +
>  struct vscsiif_response {
>      uint16_t rqid;
> -    uint8_t padding;
> +    uint8_t act;               /* valid only when backend supports SG_PRESET 
> */
>      uint8_t sense_len;
>      uint8_t sense_buffer[VSCSIIF_SENSE_BUFFERSIZE];
>      int32_t rslt;
> 
> 
> 

> vscsiif: allow larger segments-per-request values
> 
> At least certain tape devices require fixed size blocks to be operated
> upon, i.e. breaking up of I/O requests is not permitted. Consequently
> we need an interface extension that (leaving aside implementation
> limitations) doesn't impose a limit on the number of segments that can
> be associated with an individual request.
> 
> This, in turn, excludes the blkif extension FreeBSD folks implemented,
> as that still imposes an upper limit (the actual I/O request still
> specifies the full number of segments - as an 8-bit quantity -, and
> subsequent ring slots get used to carry the excess segment
> descriptors).
> 
> The alternative therefore is to allow the frontend to pre-set segment
> descriptors _before_ actually issuing the I/O request. I/O will then
> be done by the backend for the accumulated set of segments.
> 
> To properly associate segment preset operations with the main request,
> the rqid-s between them should match (originally I had hoped to use
> this to avoid producing individual responses for the pre-set
> operations, but that turned out to violate the underlying shared ring
> implementation).
> 
> Negotiation of the maximum number of segments a particular backend
> implementation supports happens through a new "segs-per-req" xenstore
> node.
> 
> Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
> ---
> As I have no plans to backport this to the 2.6.18 tree, I'm attaching
> for reference the full kernel side patch we're intending to use.
> 
> --- a/xen/include/public/io/vscsiif.h
> +++ b/xen/include/public/io/vscsiif.h
> @@ -34,6 +34,7 @@
>  #define VSCSIIF_ACT_SCSI_CDB         1    /* SCSI CDB command */
>  #define VSCSIIF_ACT_SCSI_ABORT       2    /* SCSI Device(Lun) Abort*/
>  #define VSCSIIF_ACT_SCSI_RESET       3    /* SCSI Device(Lun) Reset*/
> +#define VSCSIIF_ACT_SCSI_SG_PRESET   4    /* Preset SG elements */
>  
>  /*
>   * Maximum scatter/gather segments per request.
> @@ -50,6 +51,12 @@
>  #define VSCSIIF_MAX_COMMAND_SIZE         16
>  #define VSCSIIF_SENSE_BUFFERSIZE         96
>  
> +struct scsiif_request_segment {
> +    grant_ref_t gref;
> +    uint16_t offset;
> +    uint16_t length;
> +};
> +typedef struct scsiif_request_segment vscsiif_segment_t;
>  
>  struct vscsiif_request {
>      uint16_t rqid;          /* private guest value, echoed in resp  */
> @@ -66,18 +73,26 @@ struct vscsiif_request {
>                                           DMA_NONE(3) requests  */
>      uint8_t nr_segments;              /* Number of pieces of scatter-gather 
> */
>  
> -    struct scsiif_request_segment {
> -        grant_ref_t gref;
> -        uint16_t offset;
> -        uint16_t length;
> -    } seg[VSCSIIF_SG_TABLESIZE];
> +    vscsiif_segment_t seg[VSCSIIF_SG_TABLESIZE];
>      uint32_t reserved[3];
>  };
>  typedef struct vscsiif_request vscsiif_request_t;
>  
> +#define VSCSIIF_SG_LIST_SIZE ((sizeof(vscsiif_request_t) - 4) \
> +                              / sizeof(vscsiif_segment_t))
> +
> +struct vscsiif_sg_list {
> +    /* First two fields must match struct vscsiif_request! */
> +    uint16_t rqid;          /* private guest value, must match main req */
> +    uint8_t act;            /* VSCSIIF_ACT_SCSI_SG_PRESET */
> +    uint8_t nr_segments;    /* Number of pieces of scatter-gather */
> +    vscsiif_segment_t seg[VSCSIIF_SG_LIST_SIZE];
> +};
> +typedef struct vscsiif_sg_list vscsiif_sg_list_t;
> +
>  struct vscsiif_response {
>      uint16_t rqid;
> -    uint8_t padding;
> +    uint8_t act;               /* valid only when backend supports SG_PRESET 
> */
>      uint8_t sense_len;
>      uint8_t sense_buffer[VSCSIIF_SENSE_BUFFERSIZE];
>      int32_t rslt;

> --- sle11sp3.orig/drivers/xen/scsiback/common.h       2012-06-06 
> 13:53:26.000000000 +0200
> +++ sle11sp3/drivers/xen/scsiback/common.h    2012-11-22 14:55:58.000000000 
> +0100
> @@ -94,10 +94,15 @@ struct vscsibk_info {
>       unsigned int waiting_reqs;
>       struct page **mmap_pages;
>  
> +     struct pending_req *preq;
> +
> +     union {
> +             struct gnttab_map_grant_ref   *gmap;
> +             struct gnttab_unmap_grant_ref *gunmap;
> +     };
>  };
>  
> -typedef struct {
> -     unsigned char act;
> +typedef struct pending_req {
>       struct vscsibk_info *info;
>       struct scsi_device *sdev;
>  
> @@ -114,7 +119,8 @@ typedef struct {
>       
>       uint32_t request_bufflen;
>       struct scatterlist *sgl;
> -     grant_ref_t gref[VSCSIIF_SG_TABLESIZE];
> +     grant_ref_t *gref;
> +     vscsiif_segment_t *segs;
>  
>       int32_t rslt;
>       uint32_t resid;
> @@ -123,7 +129,7 @@ typedef struct {
>       struct list_head free_list;
>  } pending_req_t;
>  
> -
> +extern unsigned int vscsiif_segs;
>  
>  #define scsiback_get(_b) (atomic_inc(&(_b)->nr_unreplied_reqs))
>  #define scsiback_put(_b)                             \
> @@ -163,7 +169,7 @@ void scsiback_release_translation_entry(
>  
>  void scsiback_cmd_exec(pending_req_t *pending_req);
>  void scsiback_do_resp_with_sense(char *sense_buffer, int32_t result,
> -                     uint32_t resid, pending_req_t *pending_req);
> +                     uint32_t resid, pending_req_t *, uint8_t act);
>  void scsiback_fast_flush_area(pending_req_t *req);
>  
>  void scsiback_rsp_emulation(pending_req_t *pending_req);
> --- sle11sp3.orig/drivers/xen/scsiback/emulate.c      2012-01-11 
> 12:14:54.000000000 +0100
> +++ sle11sp3/drivers/xen/scsiback/emulate.c   2012-11-22 14:29:27.000000000 
> +0100
> @@ -352,7 +352,9 @@ void scsiback_req_emulation_or_cmdexec(p
>       else {
>               scsiback_fast_flush_area(pending_req);
>               scsiback_do_resp_with_sense(pending_req->sense_buffer,
> -               pending_req->rslt, pending_req->resid, pending_req);
> +                                         pending_req->rslt,
> +                                         pending_req->resid, pending_req,
> +                                         VSCSIIF_ACT_SCSI_CDB);
>       }
>  }
>  
> --- sle11sp3.orig/drivers/xen/scsiback/interface.c    2011-10-10 
> 11:58:37.000000000 +0200
> +++ sle11sp3/drivers/xen/scsiback/interface.c 2012-11-13 13:21:10.000000000 
> +0100
> @@ -51,6 +51,13 @@ struct vscsibk_info *vscsibk_info_alloc(
>       if (!info)
>               return ERR_PTR(-ENOMEM);
>  
> +     info->gmap = kcalloc(max(sizeof(*info->gmap), sizeof(*info->gunmap)),
> +                          vscsiif_segs, GFP_KERNEL);
> +     if (!info->gmap) {
> +             kfree(info);
> +             return ERR_PTR(-ENOMEM);
> +     }
> +
>       info->domid = domid;
>       spin_lock_init(&info->ring_lock);
>       atomic_set(&info->nr_unreplied_reqs, 0);
> @@ -120,6 +127,7 @@ void scsiback_disconnect(struct vscsibk_
>  
>  void scsiback_free(struct vscsibk_info *info)
>  {
> +     kfree(info->gmap);
>       kmem_cache_free(scsiback_cachep, info);
>  }
>  
> --- sle11sp3.orig/drivers/xen/scsiback/scsiback.c     2012-11-22 
> 15:36:11.000000000 +0100
> +++ sle11sp3/drivers/xen/scsiback/scsiback.c  2012-11-22 15:36:16.000000000 
> +0100
> @@ -56,6 +56,10 @@ int vscsiif_reqs = VSCSIIF_BACK_MAX_PEND
>  module_param_named(reqs, vscsiif_reqs, int, 0);
>  MODULE_PARM_DESC(reqs, "Number of scsiback requests to allocate");
>  
> +unsigned int vscsiif_segs = VSCSIIF_SG_TABLESIZE;
> +module_param_named(segs, vscsiif_segs, uint, 0);
> +MODULE_PARM_DESC(segs, "Number of segments to allow per request");
> +
>  static unsigned int log_print_stat = 0;
>  module_param(log_print_stat, int, 0644);
>  
> @@ -67,7 +71,7 @@ static grant_handle_t *pending_grant_han
>  
>  static int vaddr_pagenr(pending_req_t *req, int seg)
>  {
> -     return (req - pending_reqs) * VSCSIIF_SG_TABLESIZE + seg;
> +     return (req - pending_reqs) * vscsiif_segs + seg;
>  }
>  
>  static unsigned long vaddr(pending_req_t *req, int seg)
> @@ -82,7 +86,7 @@ static unsigned long vaddr(pending_req_t
>  
>  void scsiback_fast_flush_area(pending_req_t *req)
>  {
> -     struct gnttab_unmap_grant_ref unmap[VSCSIIF_SG_TABLESIZE];
> +     struct gnttab_unmap_grant_ref *unmap = req->info->gunmap;
>       unsigned int i, invcount = 0;
>       grant_handle_t handle;
>       int err;
> @@ -117,6 +121,7 @@ static pending_req_t * alloc_req(struct 
>       if (!list_empty(&pending_free)) {
>               req = list_entry(pending_free.next, pending_req_t, free_list);
>               list_del(&req->free_list);
> +             req->nr_segments = 0;
>       }
>       spin_unlock_irqrestore(&pending_free_lock, flags);
>       return req;
> @@ -144,7 +149,8 @@ static void scsiback_notify_work(struct 
>  }
>  
>  void scsiback_do_resp_with_sense(char *sense_buffer, int32_t result,
> -                     uint32_t resid, pending_req_t *pending_req)
> +                              uint32_t resid, pending_req_t *pending_req,
> +                              uint8_t act)
>  {
>       vscsiif_response_t *ring_res;
>       struct vscsibk_info *info = pending_req->info;
> @@ -159,6 +165,7 @@ void scsiback_do_resp_with_sense(char *s
>       ring_res = RING_GET_RESPONSE(&info->ring, info->ring.rsp_prod_pvt);
>       info->ring.rsp_prod_pvt++;
>  
> +     ring_res->act    = act;
>       ring_res->rslt   = result;
>       ring_res->rqid   = pending_req->rqid;
>  
> @@ -186,7 +193,8 @@ void scsiback_do_resp_with_sense(char *s
>       if (notify)
>               notify_remote_via_irq(info->irq);
>  
> -     free_req(pending_req);
> +     if (act != VSCSIIF_ACT_SCSI_SG_PRESET)
> +             free_req(pending_req);
>  }
>  
>  static void scsiback_print_status(char *sense_buffer, int errors,
> @@ -225,25 +233,25 @@ static void scsiback_cmd_done(struct req
>               scsiback_rsp_emulation(pending_req);
>  
>       scsiback_fast_flush_area(pending_req);
> -     scsiback_do_resp_with_sense(sense_buffer, errors, resid, pending_req);
> +     scsiback_do_resp_with_sense(sense_buffer, errors, resid, pending_req,
> +                                 VSCSIIF_ACT_SCSI_CDB);
>       scsiback_put(pending_req->info);
>  
>       __blk_put_request(req->q, req);
>  }
>  
>  
> -static int scsiback_gnttab_data_map(vscsiif_request_t *ring_req,
> -                                     pending_req_t *pending_req)
> +static int scsiback_gnttab_data_map(const vscsiif_segment_t *segs,
> +                                 unsigned int nr_segs,
> +                                 pending_req_t *pending_req)
>  {
>       u32 flags;
> -     int write;
> -     int i, err = 0;
> -     unsigned int data_len = 0;
> -     struct gnttab_map_grant_ref map[VSCSIIF_SG_TABLESIZE];
> +     int write, err = 0;
> +     unsigned int i, j, data_len = 0;
>       struct vscsibk_info *info   = pending_req->info;
> -
> +     struct gnttab_map_grant_ref *map = info->gmap;
>       int data_dir = (int)pending_req->sc_data_direction;
> -     unsigned int nr_segments = (unsigned int)pending_req->nr_segments;
> +     unsigned int nr_segments = pending_req->nr_segments + nr_segs;
>  
>       write = (data_dir == DMA_TO_DEVICE);
>  
> @@ -264,14 +272,20 @@ static int scsiback_gnttab_data_map(vscs
>               if (write)
>                       flags |= GNTMAP_readonly;
>  
> -             for (i = 0; i < nr_segments; i++)
> +             for (i = 0; i < pending_req->nr_segments; i++)
>                       gnttab_set_map_op(&map[i], vaddr(pending_req, i), flags,
> -                                             ring_req->seg[i].gref,
> +                                             pending_req->segs[i].gref,
> +                                             info->domid);
> +             for (j = 0; i < nr_segments; i++, j++)
> +                     gnttab_set_map_op(&map[i], vaddr(pending_req, i), flags,
> +                                             segs[j].gref,
>                                               info->domid);
>  
> +
>               err = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, map, 
> nr_segments);
>               BUG_ON(err);
>  
> +             j = 0;
>               for_each_sg (pending_req->sgl, sg, nr_segments, i) {
>                       struct page *pg;
>  
> @@ -294,8 +308,15 @@ static int scsiback_gnttab_data_map(vscs
>                       set_phys_to_machine(page_to_pfn(pg),
>                               FOREIGN_FRAME(map[i].dev_bus_addr >> 
> PAGE_SHIFT));
>  
> -                     sg_set_page(sg, pg, ring_req->seg[i].length,
> -                                 ring_req->seg[i].offset);
> +                     if (i < pending_req->nr_segments)
> +                             sg_set_page(sg, pg,
> +                                         pending_req->segs[i].length,
> +                                         pending_req->segs[i].offset);
> +                     else {
> +                             sg_set_page(sg, pg, segs[j].length,
> +                                         segs[j].offset);
> +                             ++j;
> +                     }
>                       data_len += sg->length;
>  
>                       barrier();
> @@ -306,6 +327,8 @@ static int scsiback_gnttab_data_map(vscs
>  
>               }
>  
> +             pending_req->nr_segments = nr_segments;
> +
>               if (err)
>                       goto fail_flush;
>       }
> @@ -471,7 +494,8 @@ static void scsiback_device_reset_exec(p
>       scsiback_get(info);
>       err = scsi_reset_provider(sdev, SCSI_TRY_RESET_DEVICE);
>  
> -     scsiback_do_resp_with_sense(NULL, err, 0, pending_req);
> +     scsiback_do_resp_with_sense(NULL, err, 0, pending_req,
> +                                 VSCSIIF_ACT_SCSI_RESET);
>       scsiback_put(info);
>  
>       return;
> @@ -489,13 +513,11 @@ static int prepare_pending_reqs(struct v
>  {
>       struct scsi_device *sdev;
>       struct ids_tuple vir;
> +     unsigned int nr_segs;
>       int err = -EINVAL;
>  
>       DPRINTK("%s\n",__FUNCTION__);
>  
> -     pending_req->rqid       = ring_req->rqid;
> -     pending_req->act        = ring_req->act;
> -
>       pending_req->info       = info;
>  
>       pending_req->v_chn = vir.chn = ring_req->channel;
> @@ -525,11 +547,10 @@ static int prepare_pending_reqs(struct v
>               goto invalid_value;
>       }
>  
> -     pending_req->nr_segments = ring_req->nr_segments;
> +     nr_segs = ring_req->nr_segments;
>       barrier();
> -     if (pending_req->nr_segments > VSCSIIF_SG_TABLESIZE) {
> -             DPRINTK("scsiback: invalid parameter nr_seg = %d\n",
> -                     pending_req->nr_segments);
> +     if (pending_req->nr_segments + nr_segs > vscsiif_segs) {
> +             DPRINTK("scsiback: invalid nr_segs = %u\n", nr_segs);
>               err = -EINVAL;
>               goto invalid_value;
>       }
> @@ -546,7 +567,7 @@ static int prepare_pending_reqs(struct v
>       
>       pending_req->timeout_per_command = ring_req->timeout_per_command;
>  
> -     if(scsiback_gnttab_data_map(ring_req, pending_req)) {
> +     if (scsiback_gnttab_data_map(ring_req->seg, nr_segs, pending_req)) {
>               DPRINTK("scsiback: invalid buffer\n");
>               err = -EINVAL;
>               goto invalid_value;
> @@ -558,6 +579,20 @@ invalid_value:
>       return err;
>  }
>  
> +static void latch_segments(pending_req_t *pending_req,
> +                        const struct vscsiif_sg_list *sgl)
> +{
> +     unsigned int nr_segs = sgl->nr_segments;
> +
> +     barrier();
> +     if (pending_req->nr_segments + nr_segs <= vscsiif_segs) {
> +             memcpy(pending_req->segs + pending_req->nr_segments,
> +                    sgl->seg, nr_segs * sizeof(*sgl->seg));
> +             pending_req->nr_segments += nr_segs;
> +     }
> +     else
> +             DPRINTK("scsiback: invalid nr_segs = %u\n", nr_segs);
> +}
>  
>  static int _scsiback_do_cmd_fn(struct vscsibk_info *info)
>  {
> @@ -575,9 +610,11 @@ static int _scsiback_do_cmd_fn(struct vs
>       rmb();
>  
>       while ((rc != rp)) {
> +             int act, rqid;
> +
>               if (RING_REQUEST_CONS_OVERFLOW(ring, rc))
>                       break;
> -             pending_req = alloc_req(info);
> +             pending_req = info->preq ?: alloc_req(info);
>               if (NULL == pending_req) {
>                       more_to_do = 1;
>                       break;
> @@ -586,32 +623,55 @@ static int _scsiback_do_cmd_fn(struct vs
>               ring_req = RING_GET_REQUEST(ring, rc);
>               ring->req_cons = ++rc;
>  
> +             act = ring_req->act;
> +             rqid = ring_req->rqid;
> +             barrier();
> +             if (!pending_req->nr_segments)
> +                     pending_req->rqid = rqid;
> +             else if (pending_req->rqid != rqid)
> +                     DPRINTK("scsiback: invalid rqid %04x, expected %04x\n",
> +                             rqid, pending_req->rqid);
> +
> +             info->preq = NULL;
> +             if (pending_req->rqid != rqid) {
> +                     scsiback_do_resp_with_sense(NULL, DRIVER_INVALID << 24,
> +                                                 0, pending_req, act);
> +                     continue;
> +             }
> +
> +             if (act == VSCSIIF_ACT_SCSI_SG_PRESET) {
> +                     latch_segments(pending_req, (void *)ring_req);
> +                     info->preq = pending_req;
> +                     scsiback_do_resp_with_sense(NULL, 0, 0,
> +                                                 pending_req, act);
> +                     continue;
> +             }
> +
>               err = prepare_pending_reqs(info, ring_req,
>                                               pending_req);
>               if (err == -EINVAL) {
>                       scsiback_do_resp_with_sense(NULL, (DRIVER_ERROR << 24),
> -                             0, pending_req);
> +                                                 0, pending_req, act);
>                       continue;
>               } else if (err == -ENODEV) {
>                       scsiback_do_resp_with_sense(NULL, (DID_NO_CONNECT << 
> 16),
> -                             0, pending_req);
> +                                                 0, pending_req, act);
>                       continue;
>               }
>  
> -             if (pending_req->act == VSCSIIF_ACT_SCSI_CDB) {
> -
> +             if (act == VSCSIIF_ACT_SCSI_CDB) {
>                       /* The Host mode is through as for Emulation. */
>                       if (info->feature == VSCSI_TYPE_HOST)
>                               scsiback_cmd_exec(pending_req);
>                       else
>                               scsiback_req_emulation_or_cmdexec(pending_req);
>  
> -             } else if (pending_req->act == VSCSIIF_ACT_SCSI_RESET) {
> +             } else if (act == VSCSIIF_ACT_SCSI_RESET) {
>                       scsiback_device_reset_exec(pending_req);
>               } else {
>                       pr_err("scsiback: invalid parameter for request\n");
>                       scsiback_do_resp_with_sense(NULL, (DRIVER_ERROR << 24),
> -                             0, pending_req);
> +                                                 0, pending_req, act);
>                       continue;
>               }
>       }
> @@ -673,17 +733,32 @@ static int __init scsiback_init(void)
>       if (!is_running_on_xen())
>               return -ENODEV;
>  
> -     mmap_pages = vscsiif_reqs * VSCSIIF_SG_TABLESIZE;
> +     if (vscsiif_segs < VSCSIIF_SG_TABLESIZE)
> +             vscsiif_segs = VSCSIIF_SG_TABLESIZE;
> +     if (vscsiif_segs != (uint8_t)vscsiif_segs)
> +             return -EINVAL;
> +     mmap_pages = vscsiif_reqs * vscsiif_segs;
>  
>       pending_reqs          = kzalloc(sizeof(pending_reqs[0]) *
>                                       vscsiif_reqs, GFP_KERNEL);
> +     if (!pending_reqs)
> +             return -ENOMEM;
>       pending_grant_handles = kmalloc(sizeof(pending_grant_handles[0]) *
>                                       mmap_pages, GFP_KERNEL);
>       pending_pages         = alloc_empty_pages_and_pagevec(mmap_pages);
>  
> -     if (!pending_reqs || !pending_grant_handles || !pending_pages)
> +     if (!pending_grant_handles || !pending_pages)
>               goto out_of_memory;
>  
> +     for (i = 0; i < vscsiif_reqs; ++i) {
> +             pending_reqs[i].gref = kcalloc(sizeof(*pending_reqs->gref),
> +                                            vscsiif_segs, GFP_KERNEL);
> +             pending_reqs[i].segs = kcalloc(sizeof(*pending_reqs->segs),
> +                                            vscsiif_segs, GFP_KERNEL);
> +             if (!pending_reqs[i].gref || !pending_reqs[i].segs)
> +                     goto out_of_memory;
> +     }
> +
>       for (i = 0; i < mmap_pages; i++)
>               pending_grant_handles[i] = SCSIBACK_INVALID_HANDLE;
>  
> @@ -705,6 +780,10 @@ static int __init scsiback_init(void)
>  out_interface:
>       scsiback_interface_exit();
>  out_of_memory:
> +     for (i = 0; i < vscsiif_reqs; ++i) {
> +             kfree(pending_reqs[i].gref);
> +             kfree(pending_reqs[i].segs);
> +     }
>       kfree(pending_reqs);
>       kfree(pending_grant_handles);
>       free_empty_pages_and_pagevec(pending_pages, mmap_pages);
> @@ -715,12 +794,17 @@ out_of_memory:
>  #if 0
>  static void __exit scsiback_exit(void)
>  {
> +     unsigned int i;
> +
>       scsiback_xenbus_unregister();
>       scsiback_interface_exit();
> +     for (i = 0; i < vscsiif_reqs; ++i) {
> +             kfree(pending_reqs[i].gref);
> +             kfree(pending_reqs[i].segs);
> +     }
>       kfree(pending_reqs);
>       kfree(pending_grant_handles);
> -     free_empty_pages_and_pagevec(pending_pages, (vscsiif_reqs * 
> VSCSIIF_SG_TABLESIZE));
> -
> +     free_empty_pages_and_pagevec(pending_pages, vscsiif_reqs * 
> vscsiif_segs);
>  }
>  #endif
>  
> --- sle11sp3.orig/drivers/xen/scsiback/xenbus.c       2011-06-30 
> 17:04:59.000000000 +0200
> +++ sle11sp3/drivers/xen/scsiback/xenbus.c    2012-11-13 14:36:16.000000000 
> +0100
> @@ -339,6 +339,13 @@ static int scsiback_probe(struct xenbus_
>       if (val)
>               be->info->feature = VSCSI_TYPE_HOST;
>  
> +     if (vscsiif_segs > VSCSIIF_SG_TABLESIZE) {
> +             err = xenbus_printf(XBT_NIL, dev->nodename, "segs-per-req",
> +                                 "%u", vscsiif_segs);
> +             if (err)
> +                     xenbus_dev_error(dev, err, "writing segs-per-req");
> +     }
> +
>       err = xenbus_switch_state(dev, XenbusStateInitWait);
>       if (err)
>               goto fail;
> --- sle11sp3.orig/drivers/xen/scsifront/common.h      2011-01-31 
> 17:29:16.000000000 +0100
> +++ sle11sp3/drivers/xen/scsifront/common.h   2012-11-22 13:45:50.000000000 
> +0100
> @@ -95,7 +95,7 @@ struct vscsifrnt_shadow {
>  
>       /* requested struct scsi_cmnd is stored from kernel */
>       unsigned long req_scsi_cmnd;
> -     int gref[VSCSIIF_SG_TABLESIZE];
> +     int gref[SG_ALL];
>  };
>  
>  struct vscsifrnt_info {
> @@ -110,7 +110,6 @@ struct vscsifrnt_info {
>  
>       grant_ref_t ring_ref;
>       struct vscsiif_front_ring ring;
> -     struct vscsiif_response ring_res;
>  
>       struct vscsifrnt_shadow shadow[VSCSIIF_MAX_REQS];
>       uint32_t shadow_free;
> @@ -119,6 +118,12 @@ struct vscsifrnt_info {
>       wait_queue_head_t wq;
>       unsigned int waiting_resp;
>  
> +     struct {
> +             struct scsi_cmnd *sc;
> +             unsigned int rqid;
> +             unsigned int done;
> +             vscsiif_segment_t segs[];
> +     } active;
>  };
>  
>  #define DPRINTK(_f, _a...)                           \
> --- sle11sp3.orig/drivers/xen/scsifront/scsifront.c   2011-06-28 
> 18:57:14.000000000 +0200
> +++ sle11sp3/drivers/xen/scsifront/scsifront.c        2012-11-22 
> 16:37:35.000000000 +0100
> @@ -106,6 +106,66 @@ irqreturn_t scsifront_intr(int irq, void
>       return IRQ_HANDLED;
>  }
>  
> +static bool push_cmd_to_ring(struct vscsifrnt_info *info,
> +                          vscsiif_request_t *ring_req)
> +{
> +     unsigned int left, rqid = info->active.rqid;
> +     struct scsi_cmnd *sc;
> +
> +     for (; ; ring_req = NULL) {
> +             struct vscsiif_sg_list *sgl;
> +
> +             if (!ring_req) {
> +                     struct vscsiif_front_ring *ring = &info->ring;
> +
> +                     ring_req = RING_GET_REQUEST(ring, ring->req_prod_pvt);
> +                     ring->req_prod_pvt++;
> +                     ring_req->rqid = rqid;
> +             }
> +
> +             left = info->shadow[rqid].nr_segments - info->active.done;
> +             if (left <= VSCSIIF_SG_TABLESIZE)
> +                     break;
> +
> +             sgl = (void *)ring_req;
> +             sgl->act = VSCSIIF_ACT_SCSI_SG_PRESET;
> +
> +             if (left > VSCSIIF_SG_LIST_SIZE)
> +                     left = VSCSIIF_SG_LIST_SIZE;
> +             memcpy(sgl->seg, info->active.segs + info->active.done,
> +                    left * sizeof(*sgl->seg));
> +
> +             sgl->nr_segments = left;
> +             info->active.done += left;
> +
> +             if (RING_FULL(&info->ring))
> +                     return false;
> +     }
> +
> +     sc = info->active.sc;
> +
> +     ring_req->act     = VSCSIIF_ACT_SCSI_CDB;
> +     ring_req->id      = sc->device->id;
> +     ring_req->lun     = sc->device->lun;
> +     ring_req->channel = sc->device->channel;
> +     ring_req->cmd_len = sc->cmd_len;
> +
> +     if ( sc->cmd_len )
> +             memcpy(ring_req->cmnd, sc->cmnd, sc->cmd_len);
> +     else
> +             memset(ring_req->cmnd, 0, VSCSIIF_MAX_COMMAND_SIZE);
> +
> +     ring_req->sc_data_direction   = sc->sc_data_direction;
> +     ring_req->timeout_per_command = sc->request->timeout / HZ;
> +     ring_req->nr_segments         = left;
> +
> +     memcpy(ring_req->seg, info->active.segs + info->active.done,
> +               left * sizeof(*ring_req->seg));
> +
> +     info->active.sc = NULL;
> +
> +     return !RING_FULL(&info->ring);
> +}
>  
>  static void scsifront_gnttab_done(struct vscsifrnt_shadow *s, uint32_t id)
>  {
> @@ -194,6 +254,16 @@ int scsifront_cmd_done(struct vscsifrnt_
>               
>               ring_res = RING_GET_RESPONSE(&info->ring, i);
>  
> +             if (info->host->sg_tablesize > VSCSIIF_SG_TABLESIZE) {
> +                     u8 act = ring_res->act;
> +
> +                     if (act == VSCSIIF_ACT_SCSI_SG_PRESET)
> +                             continue;
> +                     if (act != info->shadow[ring_res->rqid].act)
> +                             DPRINTK("Bogus backend response (%02x vs 
> %02x)\n",
> +                                     act, info->shadow[ring_res->rqid].act);
> +             }
> +
>               if (info->shadow[ring_res->rqid].act == VSCSIIF_ACT_SCSI_CDB)
>                       scsifront_cdb_cmd_done(info, ring_res);
>               else
> @@ -208,8 +278,16 @@ int scsifront_cmd_done(struct vscsifrnt_
>               info->ring.sring->rsp_event = i + 1;
>       }
>  
> -     spin_unlock_irqrestore(&info->io_lock, flags);
> +     spin_unlock(&info->io_lock);
> +
> +     spin_lock(info->host->host_lock);
> +
> +     if (info->active.sc && !RING_FULL(&info->ring)) {
> +             push_cmd_to_ring(info, NULL);
> +             scsifront_do_request(info);
> +     }
>  
> +     spin_unlock_irqrestore(info->host->host_lock, flags);
>  
>       /* Yield point for this unbounded loop. */
>       cond_resched();
> @@ -242,7 +320,8 @@ int scsifront_schedule(void *data)
>  
>  
>  static int map_data_for_request(struct vscsifrnt_info *info,
> -             struct scsi_cmnd *sc, vscsiif_request_t *ring_req, uint32_t id)
> +                             struct scsi_cmnd *sc,
> +                             struct vscsifrnt_shadow *shadow)
>  {
>       grant_ref_t gref_head;
>       struct page *page;
> @@ -254,7 +333,7 @@ static int map_data_for_request(struct v
>       if (sc->sc_data_direction == DMA_NONE)
>               return 0;
>  
> -     err = gnttab_alloc_grant_references(VSCSIIF_SG_TABLESIZE, &gref_head);
> +     err = gnttab_alloc_grant_references(info->host->sg_tablesize, 
> &gref_head);
>       if (err) {
>               pr_err("scsifront: gnttab_alloc_grant_references() error\n");
>               return -ENOMEM;
> @@ -266,7 +345,7 @@ static int map_data_for_request(struct v
>               unsigned int data_len = scsi_bufflen(sc);
>  
>               nr_pages = (data_len + sgl->offset + PAGE_SIZE - 1) >> 
> PAGE_SHIFT;
> -             if (nr_pages > VSCSIIF_SG_TABLESIZE) {
> +             if (nr_pages > info->host->sg_tablesize) {
>                       pr_err("scsifront: Unable to map request_buffer for 
> command!\n");
>                       ref_cnt = (-E2BIG);
>                       goto big_to_sg;
> @@ -294,10 +373,10 @@ static int map_data_for_request(struct v
>                               gnttab_grant_foreign_access_ref(ref, 
> info->dev->otherend_id,
>                                       buffer_pfn, write);
>  
> -                             info->shadow[id].gref[ref_cnt]  = ref;
> -                             ring_req->seg[ref_cnt].gref     = ref;
> -                             ring_req->seg[ref_cnt].offset   = (uint16_t)off;
> -                             ring_req->seg[ref_cnt].length   = 
> (uint16_t)bytes;
> +                             shadow->gref[ref_cnt] = ref;
> +                             info->active.segs[ref_cnt].gref   = ref;
> +                             info->active.segs[ref_cnt].offset = off;
> +                             info->active.segs[ref_cnt].length = bytes;
>  
>                               buffer_pfn++;
>                               len -= bytes;
> @@ -336,34 +415,27 @@ static int scsifront_queuecommand(struct
>               return SCSI_MLQUEUE_HOST_BUSY;
>       }
>  
> +     if (info->active.sc && !push_cmd_to_ring(info, NULL)) {
> +             scsifront_do_request(info);
> +             spin_unlock_irqrestore(shost->host_lock, flags);
> +             return SCSI_MLQUEUE_HOST_BUSY;
> +     }
> +
>       sc->result    = 0;
>  
>       ring_req          = scsifront_pre_request(info);
>       rqid              = ring_req->rqid;
> -     ring_req->act     = VSCSIIF_ACT_SCSI_CDB;
> -
> -     ring_req->id      = sc->device->id;
> -     ring_req->lun     = sc->device->lun;
> -     ring_req->channel = sc->device->channel;
> -     ring_req->cmd_len = sc->cmd_len;
>  
>       BUG_ON(sc->cmd_len > VSCSIIF_MAX_COMMAND_SIZE);
>  
> -     if ( sc->cmd_len )
> -             memcpy(ring_req->cmnd, sc->cmnd, sc->cmd_len);
> -     else
> -             memset(ring_req->cmnd, 0, VSCSIIF_MAX_COMMAND_SIZE);
> -
> -     ring_req->sc_data_direction   = (uint8_t)sc->sc_data_direction;
> -     ring_req->timeout_per_command = (sc->request->timeout / HZ);
> -
>       info->shadow[rqid].req_scsi_cmnd     = (unsigned long)sc;
>       info->shadow[rqid].sc_data_direction = sc->sc_data_direction;
> -     info->shadow[rqid].act               = ring_req->act;
> +     info->shadow[rqid].act               = VSCSIIF_ACT_SCSI_CDB;
>  
> -     ref_cnt = map_data_for_request(info, sc, ring_req, rqid);
> +     ref_cnt = map_data_for_request(info, sc, &info->shadow[rqid]);
>       if (ref_cnt < 0) {
>               add_id_to_freelist(info, rqid);
> +             scsifront_do_request(info);
>               spin_unlock_irqrestore(shost->host_lock, flags);
>               if (ref_cnt == (-ENOMEM))
>                       return SCSI_MLQUEUE_HOST_BUSY;
> @@ -372,9 +444,13 @@ static int scsifront_queuecommand(struct
>               return 0;
>       }
>  
> -     ring_req->nr_segments          = (uint8_t)ref_cnt;
>       info->shadow[rqid].nr_segments = ref_cnt;
>  
> +     info->active.sc  = sc;
> +     info->active.rqid = rqid;
> +     info->active.done = 0;
> +     push_cmd_to_ring(info, ring_req);
> +
>       scsifront_do_request(info);
>       spin_unlock_irqrestore(shost->host_lock, flags);
>  
> --- sle11sp3.orig/drivers/xen/scsifront/xenbus.c      2012-10-02 
> 14:32:45.000000000 +0200
> +++ sle11sp3/drivers/xen/scsifront/xenbus.c   2012-11-21 13:35:47.000000000 
> +0100
> @@ -43,6 +43,10 @@
>    #define DEFAULT_TASK_COMM_LEN      TASK_COMM_LEN
>  #endif
>  
> +static unsigned int max_nr_segs = VSCSIIF_SG_TABLESIZE;
> +module_param_named(max_segs, max_nr_segs, uint, 0);
> +MODULE_PARM_DESC(max_segs, "Maximum number of segments per request");
> +
>  extern struct scsi_host_template scsifront_sht;
>  
>  static void scsifront_free(struct vscsifrnt_info *info)
> @@ -181,7 +185,9 @@ static int scsifront_probe(struct xenbus
>       int i, err = -ENOMEM;
>       char name[DEFAULT_TASK_COMM_LEN];
>  
> -     host = scsi_host_alloc(&scsifront_sht, sizeof(*info));
> +     host = scsi_host_alloc(&scsifront_sht,
> +                            offsetof(struct vscsifrnt_info,
> +                                     active.segs[max_nr_segs]));
>       if (!host) {
>               xenbus_dev_fatal(dev, err, "fail to allocate scsi host");
>               return err;
> @@ -223,7 +229,7 @@ static int scsifront_probe(struct xenbus
>       host->max_id      = VSCSIIF_MAX_TARGET;
>       host->max_channel = 0;
>       host->max_lun     = VSCSIIF_MAX_LUN;
> -     host->max_sectors = (VSCSIIF_SG_TABLESIZE - 1) * PAGE_SIZE / 512;
> +     host->max_sectors = (host->sg_tablesize - 1) * PAGE_SIZE / 512;
>       host->max_cmd_len = VSCSIIF_MAX_COMMAND_SIZE;
>  
>       err = scsi_add_host(host, &dev->dev);
> @@ -278,6 +284,23 @@ static int scsifront_disconnect(struct v
>       return 0;
>  }
>  
> +static void scsifront_read_backend_params(struct xenbus_device *dev,
> +                                       struct vscsifrnt_info *info)
> +{
> +     unsigned int nr_segs;
> +     int ret;
> +     struct Scsi_Host *host = info->host;
> +
> +     ret = xenbus_scanf(XBT_NIL, dev->otherend, "segs-per-req", "%u",
> +                        &nr_segs);
> +     if (ret == 1 && nr_segs > host->sg_tablesize) {
> +             host->sg_tablesize = min(nr_segs, max_nr_segs);
> +             dev_info(&dev->dev, "using up to %d SG entries\n",
> +                      host->sg_tablesize);
> +             host->max_sectors = (host->sg_tablesize - 1) * PAGE_SIZE / 512;
> +     }
> +}
> +
>  #define VSCSIFRONT_OP_ADD_LUN        1
>  #define VSCSIFRONT_OP_DEL_LUN        2
>  
> @@ -368,6 +391,7 @@ static void scsifront_backend_changed(st
>               break;
>  
>       case XenbusStateConnected:
> +             scsifront_read_backend_params(dev, info);
>               if (xenbus_read_driver_state(dev->nodename) ==
>                       XenbusStateInitialised) {
>                       scsifront_do_lun_hotplug(info, VSCSIFRONT_OP_ADD_LUN);
> @@ -413,8 +437,13 @@ static DEFINE_XENBUS_DRIVER(scsifront, ,
>       .otherend_changed       = scsifront_backend_changed,
>  );
>  
> -int scsifront_xenbus_init(void)
> +int __init scsifront_xenbus_init(void)
>  {
> +     if (max_nr_segs > SG_ALL)
> +             max_nr_segs = SG_ALL;
> +     if (max_nr_segs < VSCSIIF_SG_TABLESIZE)
> +             max_nr_segs = VSCSIIF_SG_TABLESIZE;
> +
>       return xenbus_register_frontend(&scsifront_driver);
>  }
>  
> --- sle11sp3.orig/include/xen/interface/io/vscsiif.h  2008-07-21 
> 11:00:33.000000000 +0200
> +++ sle11sp3/include/xen/interface/io/vscsiif.h       2012-11-22 
> 14:32:31.000000000 +0100
> @@ -34,6 +34,7 @@
>  #define VSCSIIF_ACT_SCSI_CDB         1    /* SCSI CDB command */
>  #define VSCSIIF_ACT_SCSI_ABORT       2    /* SCSI Device(Lun) Abort*/
>  #define VSCSIIF_ACT_SCSI_RESET       3    /* SCSI Device(Lun) Reset*/
> +#define VSCSIIF_ACT_SCSI_SG_PRESET   4    /* Preset SG elements */
>  
>  
>  #define VSCSIIF_BACK_MAX_PENDING_REQS    128
> @@ -53,6 +54,12 @@
>  #define VSCSIIF_MAX_COMMAND_SIZE         16
>  #define VSCSIIF_SENSE_BUFFERSIZE         96
>  
> +struct scsiif_request_segment {
> +    grant_ref_t gref;
> +    uint16_t offset;
> +    uint16_t length;
> +};
> +typedef struct scsiif_request_segment vscsiif_segment_t;
>  
>  struct vscsiif_request {
>      uint16_t rqid;          /* private guest value, echoed in resp  */
> @@ -69,18 +76,26 @@ struct vscsiif_request {
>                                           DMA_NONE(3) requests  */
>      uint8_t nr_segments;              /* Number of pieces of scatter-gather 
> */
>  
> -    struct scsiif_request_segment {
> -        grant_ref_t gref;
> -        uint16_t offset;
> -        uint16_t length;
> -    } seg[VSCSIIF_SG_TABLESIZE];
> +    vscsiif_segment_t seg[VSCSIIF_SG_TABLESIZE];
>      uint32_t reserved[3];
>  };
>  typedef struct vscsiif_request vscsiif_request_t;
>  
> +#define VSCSIIF_SG_LIST_SIZE ((sizeof(vscsiif_request_t) - 4) \
> +                              / sizeof(vscsiif_segment_t))
> +
> +struct vscsiif_sg_list {
> +    /* First two fields must match struct vscsiif_request! */
> +    uint16_t rqid;          /* private guest value, must match main req */
> +    uint8_t act;            /* VSCSIIF_ACT_SCSI_SG_PRESET */
> +    uint8_t nr_segments;    /* Number of pieces of scatter-gather */
> +    vscsiif_segment_t seg[VSCSIIF_SG_LIST_SIZE];
> +};
> +typedef struct vscsiif_sg_list vscsiif_sg_list_t;
> +
>  struct vscsiif_response {
>      uint16_t rqid;
> -    uint8_t padding;
> +    uint8_t act;               /* valid only when backend supports SG_PRESET 
> */
>      uint8_t sense_len;
>      uint8_t sense_buffer[VSCSIIF_SENSE_BUFFERSIZE];
>      int32_t rslt;

> _______________________________________________
> Xen-devel mailing list
> Xen-devel@xxxxxxxxxxxxx
> http://lists.xen.org/xen-devel


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.