|
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [Xen-devel] [PATCH V4 3/4] Introduce XEN scsiback module
Hi Juergen & Co,
Finally had a chance to review this code. Comments are inline below..
On Fri, 2014-08-08 at 09:49 +0200, jgross@xxxxxxxx wrote:
> From: Juergen Gross <jgross@xxxxxxxx>
>
> Introduces the XEN pvSCSI backend. With pvSCSI it is possible for a XEN domU
> to issue SCSI commands to a SCSI LUN assigned to that domU. The SCSI commands
> are passed to the pvSCSI backend in a driver domain (usually Dom0) which is
> owner of the physical device. This allows e.g. to use SCSI tape drives in a
> XEN domU.
>
> The code is taken from the pvSCSI implementation in XEN done by Fujitsu based
> on Linux kernel 2.6.18.
>
> Changes from the original version are:
> - port to upstream kernel
> - put all code in just one source file
> - adapt to Linux style guide
> - use target core infrastructure instead doing pure pass-through
> - enable module unloading
> - support SG-list in grant page(s)
> - support task abort
> - remove redundant struct backend
> - allocate resources dynamically
> - correct minor error in scsiback_fast_flush_area
> - free allocated resources in case of error during I/O preparation
> - remove CDB emulation, now handled by target core infrastructure
>
> Signed-off-by: Juergen Gross <jgross@xxxxxxxx>
>
> Xen related parts
> Acked-by: David Vrabel <david.vrabel@xxxxxxxxxx>
> ---
<SNIP>
> diff --git a/drivers/xen/xen-scsiback.c b/drivers/xen/xen-scsiback.c
> new file mode 100644
> index 0000000..4a0d6e3
> --- /dev/null
> +++ b/drivers/xen/xen-scsiback.c
<SNIP>
> +struct scsiback_nacl {
> + /* Binary World Wide unique Port Name for pvscsi Initiator port */
> + u64 iport_wwpn;
> + /* ASCII formatted WWPN for Sas Initiator port */
> + char iport_name[VSCSI_NAMELEN];
> + /* Returned by scsiback_make_nodeacl() */
> + struct se_node_acl se_node_acl;
> +};
> +
Given that this code is similar to how loopback + vhost-scsi function,
and uses a (locally) generated nexus for each WWPN endpoint, the
scsiback_nacl and associated code will be unused should be be dropped.
<SNIP>
> +static void scsiback_do_resp_with_sense(char *sense_buffer, int32_t result,
> + uint32_t resid, struct vscsibk_pend *pending_req)
> +{
> + struct vscsiif_response *ring_res;
> + struct vscsibk_info *info = pending_req->info;
> + int notify;
> + struct scsi_sense_hdr sshdr;
> + unsigned long flags;
> + unsigned len;
> +
> + spin_lock_irqsave(&info->ring_lock, flags);
> +
> + ring_res = RING_GET_RESPONSE(&info->ring, info->ring.rsp_prod_pvt);
> + info->ring.rsp_prod_pvt++;
> +
> + ring_res->rslt = result;
> + ring_res->rqid = pending_req->rqid;
> +
> + if (sense_buffer != NULL &&
> + scsi_normalize_sense(sense_buffer, VSCSIIF_SENSE_BUFFERSIZE,
> + &sshdr)) {
> + len = min_t(unsigned, 8 + sense_buffer[7],
> + VSCSIIF_SENSE_BUFFERSIZE);
> + memcpy(ring_res->sense_buffer, sense_buffer, len);
> + ring_res->sense_len = len;
> + } else {
> + ring_res->sense_len = 0;
> + }
> +
> + ring_res->residual_len = resid;
> +
> + RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&info->ring, notify);
> + spin_unlock_irqrestore(&info->ring_lock, flags);
> +
> + if (notify)
> + notify_remote_via_irq(info->irq);
> +
> + if (pending_req->v2p)
> + kref_put(&pending_req->v2p->kref,
> + scsiback_free_translation_entry);
> +
> + kmem_cache_free(scsiback_cachep, pending_req);
> +}
> +
> +static void scsiback_cmd_done(struct vscsibk_pend *pending_req)
> +{
> + struct vscsibk_info *info = pending_req->info;
> + unsigned char *sense_buffer;
> + unsigned int resid;
> + int errors;
> +
> + sense_buffer = pending_req->sense_buffer;
> + resid = pending_req->se_cmd.residual_count;
> + errors = pending_req->result;
> +
> + if (errors && log_print_stat)
> + scsiback_print_status(sense_buffer, errors, pending_req);
> +
> + scsiback_fast_flush_area(pending_req);
> + scsiback_do_resp_with_sense(sense_buffer, errors, resid, pending_req);
> + scsiback_put(info);
> +
> + transport_generic_free_cmd(&pending_req->se_cmd, 0);
> +}
> +
The usage here of scsiback_do_resp_with_sense() -> kmem_cache_free() for
*pending_req, and then invoking transport_generic_free_cmd() with
&pending_req->se_cmd is an free after use bug..
So the way this should work is similar to how loopback currently does
things:
- Move the kmem_cache_free() for pending_req from
scsiback_do_resp_with_sense() to scsiback_release_cmd()
- Remove the transport_generic_free_cmd() from scsiback_cmd_done()
- Copy what tcm_loop_check_stop_free() does into
scsiback_check_stop_free(), and remove target_put_sess_cmd()
> +static void scsiback_cmd_exec(struct vscsibk_pend *pending_req)
> +{
> + struct se_cmd *se_cmd = &pending_req->se_cmd;
> + struct se_session *sess = pending_req->v2p->tpg->tpg_nexus->tvn_se_sess;
> + int rc;
> +
> + memset(pending_req->sense_buffer, 0, VSCSIIF_SENSE_BUFFERSIZE);
> +
> + memset(se_cmd, 0, sizeof(*se_cmd));
> + se_cmd->prot_pto = true;
> +
No need to set prot_pto = true, as T10_PI support is not enabled..
> + scsiback_get(pending_req->info);
> + rc = target_submit_cmd_map_sgls(se_cmd, sess, pending_req->cmnd,
> + pending_req->sense_buffer, pending_req->v2p->lun,
> + pending_req->data_len, 0,
> + pending_req->sc_data_direction, 0,
> + pending_req->sgl, pending_req->n_sg,
> + NULL, 0, NULL, 0);
> + if (rc < 0) {
> + transport_send_check_condition_and_sense(se_cmd,
> + TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE, 0);
> + transport_generic_free_cmd(se_cmd, 0);
> + }
> +}
> +
<SNIP>
> +
> +static void scsiback_device_action(struct vscsibk_pend *pending_req,
> + enum tcm_tmreq_table act, int tag)
> +{
> + int rc, err = FAILED;
> + struct scsiback_tpg *tpg = pending_req->v2p->tpg;
> + struct se_cmd *se_cmd = &pending_req->se_cmd;
> + struct scsiback_tmr *tmr;
> +
> + tmr = kzalloc(sizeof(struct scsiback_tmr), GFP_KERNEL);
> + if (!tmr) {
> + pr_err("xen-pvscsi: %s: kmalloc() error\n", __func__);
> + goto out;
> + }
> + init_waitqueue_head(&tmr->tmr_wait);
> +
> + transport_init_se_cmd(se_cmd, tpg->se_tpg.se_tpg_tfo,
> + tpg->tpg_nexus->tvn_se_sess, 0, DMA_NONE, MSG_SIMPLE_TAG,
> + &pending_req->sense_buffer[0]);
> +
> + rc = core_tmr_alloc_req(se_cmd, tmr, act, GFP_KERNEL);
> + if (rc < 0)
> + goto out;
> +
> + se_cmd->se_tmr_req->ref_task_tag = tag;
> +
> + if (transport_lookup_tmr_lun(se_cmd, pending_req->v2p->lun) < 0)
> + goto out;
> +
> + transport_generic_handle_tmr(se_cmd);
> + wait_event(tmr->tmr_wait, atomic_read(&tmr->tmr_complete));
> +
> + err = (se_cmd->se_tmr_req->response == TMR_FUNCTION_COMPLETE) ?
> + SUCCESS : FAILED;
> +
> +out:
> + if (tmr) {
> + transport_generic_free_cmd(&pending_req->se_cmd, 1);
> + kfree(tmr);
> + }
> +
> + scsiback_do_resp_with_sense(NULL, err, 0, pending_req);
> +}
With scsiback_do_resp_with_sense() no longer freeing pending_req
directly, this special case will need a kmem_cache_free()
> +static int _scsiback_do_cmd_fn(struct vscsibk_info *info)
> +{
> + struct vscsiif_back_ring *ring = &info->ring;
> + struct vscsiif_request *ring_req;
> + struct vscsibk_pend *pending_req;
> + RING_IDX rc, rp;
> + int err, more_to_do = 0;
> + uint32_t result;
> + uint8_t act;
> +
> + rc = ring->req_cons;
> + rp = ring->sring->req_prod;
> + rmb(); /* guest system is accessing ring, too */
> +
> + if (RING_REQUEST_PROD_OVERFLOW(ring, rp)) {
> + rc = ring->rsp_prod_pvt;
> + pr_warn("xen-pvscsi: Dom%d provided bogus ring requests (%#x -
> %#x = %u). Halting ring processing\n",
> + info->domid, rp, rc, rp - rc);
> + return -EACCES;
> + }
> +
> + while ((rc != rp)) {
> + if (RING_REQUEST_CONS_OVERFLOW(ring, rc))
> + break;
> + pending_req = kmem_cache_alloc(scsiback_cachep, GFP_KERNEL);
> + if (NULL == pending_req) {
> + more_to_do = 1;
> + break;
> + }
> +
Ideally this will end up using percpu_ida descriptor pre-allocation via
se_sess->sess_tag_poll + se_sess->sess_cmd_map in order to avoid the
fast-path memory allocation.
See commit 4824d3bfb909 for an example of how this was done for
vhost-scsi, but should be considered a post-merge optimization..
> + ring_req = RING_GET_REQUEST(ring, rc);
> + ring->req_cons = ++rc;
> +
> + act = ring_req->act;
> + err = prepare_pending_reqs(info, ring_req, pending_req);
> + if (err) {
> + switch (err) {
> + case -ENODEV:
> + result = DID_NO_CONNECT;
> + break;
> + default:
> + result = DRIVER_ERROR;
> + break;
> + }
> + scsiback_do_resp_with_sense(NULL, result << 24, 0,
> + pending_req);
> + more_to_do = 1;
> + break;
> + }
> +
> + switch (act) {
> + case VSCSIIF_ACT_SCSI_CDB:
> + if (scsiback_gnttab_data_map(ring_req, pending_req)) {
> + scsiback_fast_flush_area(pending_req);
> + scsiback_do_resp_with_sense(NULL,
> + DRIVER_ERROR << 24, 0, pending_req);
Another special case for scsiback_do_resp_with_sense() that with the
recommended changes require a kmem_cache_free() call.
> + } else {
> + scsiback_cmd_exec(pending_req);
> + }
> + break;
> + case VSCSIIF_ACT_SCSI_ABORT:
> + scsiback_device_action(pending_req, TMR_ABORT_TASK,
> + ring_req->ref_rqid);
> + break;
> + case VSCSIIF_ACT_SCSI_RESET:
> + scsiback_device_action(pending_req, TMR_LUN_RESET, 0);
> + break;
> + default:
> + pr_err_ratelimited("xen-pvscsi: invalid request\n");
> + scsiback_do_resp_with_sense(NULL, DRIVER_ERROR << 24,
> + 0, pending_req);
Ditto here.
> + break;
> + }
> +
> + /* Yield point for this unbounded loop. */
> + cond_resched();
> + }
> +
> + if (RING_HAS_UNCONSUMED_REQUESTS(ring))
> + more_to_do = 1;
> +
> + return more_to_do;
> +}
> +
<SNIP>
> +static struct se_node_acl *
> +scsiback_alloc_fabric_acl(struct se_portal_group *se_tpg)
> +{
> + struct scsiback_nacl *nacl;
> +
> + nacl = kzalloc(sizeof(struct scsiback_nacl), GFP_KERNEL);
> + if (!nacl) {
> + pr_err("Unable to allocate struct scsiback_nacl\n");
> + return NULL;
> + }
> +
> + return &nacl->se_node_acl;
> +}
> +
> +static void
> +scsiback_release_fabric_acl(struct se_portal_group *se_tpg,
> + struct se_node_acl *se_nacl)
> +{
> + struct scsiback_nacl *nacl = container_of(se_nacl,
> + struct scsiback_nacl, se_node_acl);
> + kfree(nacl);
> +}
> +
> +static u32 scsiback_tpg_get_inst_index(struct se_portal_group *se_tpg)
> +{
> + return 1;
> +}
> +
> +static struct se_node_acl *
> +scsiback_make_nodeacl(struct se_portal_group *se_tpg,
> + struct config_group *group,
> + const char *name)
> +{
> + struct se_node_acl *se_nacl, *se_nacl_new;
> + struct scsiback_nacl *nacl;
> + u64 wwpn = 0;
> + u32 nexus_depth;
> +
> + se_nacl_new = scsiback_alloc_fabric_acl(se_tpg);
> + if (!se_nacl_new)
> + return ERR_PTR(-ENOMEM);
> +
> + nexus_depth = 1;
> + /*
> + * se_nacl_new may be released by core_tpg_add_initiator_node_acl()
> + * when converting a NodeACL from demo mode -> explict
> + */
> + se_nacl = core_tpg_add_initiator_node_acl(se_tpg, se_nacl_new,
> + name, nexus_depth);
> + if (IS_ERR(se_nacl)) {
> + scsiback_release_fabric_acl(se_tpg, se_nacl_new);
> + return se_nacl;
> + }
> + /*
> + * Locate our struct scsiback_nacl and set the FC Nport WWPN
> + */
> + nacl = container_of(se_nacl, struct scsiback_nacl, se_node_acl);
> + nacl->iport_wwpn = wwpn;
> +
> + return se_nacl;
> +}
> +
> +static void scsiback_drop_nodeacl(struct se_node_acl *se_acl)
> +{
> + struct scsiback_nacl *nacl = container_of(se_acl,
> + struct scsiback_nacl, se_node_acl);
> + core_tpg_del_initiator_node_acl(se_acl->se_tpg, se_acl, 1);
> + kfree(nacl);
> +}
> +
As mentioned above, the NodeACL use is unnecessary for this driver so
you can safely drop scsiback_make_node_acl() +
scsiback_alloc_fabric_acl() + scsiback_drop_nodeacl() +
scsiback_release_fabric_acl().
> +static int scsiback_check_stop_free(struct se_cmd *se_cmd)
> +{
> + return target_put_sess_cmd(se_cmd->se_sess, se_cmd);
> +}
> +
As mentioned above, scsiback_check_stop_free() should follow what
tcm_loop_check_stop_free() does here..
> +static void scsiback_release_cmd(struct se_cmd *se_cmd)
> +{
> +}
> +
Move the kmem_cache_free() for pending_req here..
> +static int scsiback_shutdown_session(struct se_session *se_sess)
> +{
> + return 0;
> +}
> +
> +static void scsiback_close_session(struct se_session *se_sess)
> +{
> +}
> +
> +static u32 scsiback_sess_get_index(struct se_session *se_sess)
> +{
> + return 0;
> +}
> +
> +static int scsiback_write_pending(struct se_cmd *se_cmd)
> +{
> + /* Go ahead and process the write immediately */
> + target_execute_cmd(se_cmd);
> +
> + return 0;
> +}
> +
> +static int scsiback_write_pending_status(struct se_cmd *se_cmd)
> +{
> + return 0;
> +}
> +
> +static void scsiback_set_default_node_attrs(struct se_node_acl *nacl)
> +{
> +}
> +
Safe to drop this no-op too.
> +static u32 scsiback_get_task_tag(struct se_cmd *se_cmd)
> +{
> + struct vscsibk_pend *pending_req = container_of(se_cmd,
> + struct vscsibk_pend, se_cmd);
> +
> + return pending_req->rqid;
> +}
> +
> +static int scsiback_get_cmd_state(struct se_cmd *se_cmd)
> +{
> + return 0;
> +}
> +
> +static int scsiback_queue_data_in(struct se_cmd *se_cmd)
> +{
> + struct vscsibk_pend *pending_req = container_of(se_cmd,
> + struct vscsibk_pend, se_cmd);
> +
> + pending_req->result = SAM_STAT_GOOD;
> + scsiback_cmd_done(pending_req);
> + return 0;
> +}
> +
> +static int scsiback_queue_status(struct se_cmd *se_cmd)
> +{
> + struct vscsibk_pend *pending_req = container_of(se_cmd,
> + struct vscsibk_pend, se_cmd);
> +
> + if (se_cmd->sense_buffer &&
> + ((se_cmd->se_cmd_flags & SCF_TRANSPORT_TASK_SENSE) ||
> + (se_cmd->se_cmd_flags & SCF_EMULATED_TASK_SENSE)))
> + pending_req->result = (DRIVER_SENSE << 24) |
> + SAM_STAT_CHECK_CONDITION;
> + else
> + pending_req->result = se_cmd->scsi_status;
> +
> + scsiback_cmd_done(pending_req);
> + return 0;
> +}
> +
> +static void scsiback_queue_tm_rsp(struct se_cmd *se_cmd)
> +{
> + struct se_tmr_req *se_tmr = se_cmd->se_tmr_req;
> + struct scsiback_tmr *tmr = se_tmr->fabric_tmr_ptr;
> +
> + atomic_set(&tmr->tmr_complete, 1);
> + wake_up(&tmr->tmr_wait);
> +}
> +
> +static void scsiback_aborted_task(struct se_cmd *se_cmd)
> +{
> + struct se_tmr_req *se_tmr = se_cmd->se_tmr_req;
> + struct scsiback_tmr *tmr = se_tmr->fabric_tmr_ptr;
> +
> + atomic_set(&tmr->tmr_complete, 1);
> + wake_up(&tmr->tmr_wait);
> +}
> +
scsiback_aborted_task() should just return here, instead of the
atomic_set + wake_up(). Eg: ABORT_TASK is already calling ->
queue_tm_rsp(), and the ->aborted_task() callback is specifically for
cleaning up any fabric specific descriptor resources, before
->release_cmd() gets called.
> +static ssize_t scsiback_tpg_param_show_alias(struct se_portal_group *se_tpg,
> + char *page)
> +{
> + struct scsiback_tpg *tpg = container_of(se_tpg, struct scsiback_tpg,
> + se_tpg);
> + ssize_t rb;
> +
> + mutex_lock(&tpg->tv_tpg_mutex);
> + rb = snprintf(page, PAGE_SIZE, "%s\n", tpg->param_alias);
> + mutex_unlock(&tpg->tv_tpg_mutex);
> +
> + return rb;
> +}
> +
> +static ssize_t scsiback_tpg_param_store_alias(struct se_portal_group *se_tpg,
> + const char *page, size_t count)
> +{
> + struct scsiback_tpg *tpg = container_of(se_tpg, struct scsiback_tpg,
> + se_tpg);
> + int len;
> +
> + if (strlen(page) >= VSCSI_NAMELEN) {
> + pr_err("param alias: %s, exceeds max: %d\n", page,
> + VSCSI_NAMELEN);
> + return -EINVAL;
> + }
> +
> + mutex_lock(&tpg->tv_tpg_mutex);
> + len = snprintf(tpg->param_alias, VSCSI_NAMELEN, "%s", page);
> + if (tpg->param_alias[len - 1] == '\n')
> + tpg->param_alias[len - 1] = '\0';
> + mutex_unlock(&tpg->tv_tpg_mutex);
> +
> + return count;
> +}
> +
> +TF_TPG_PARAM_ATTR(scsiback, alias, S_IRUGO | S_IWUSR);
> +
> +static struct configfs_attribute *scsiback_param_attrs[] = {
> + &scsiback_tpg_param_alias.attr,
> + NULL,
> +};
> +
> +static int scsiback_make_nexus(struct scsiback_tpg *tpg,
> + const char *name)
> +{
> + struct se_portal_group *se_tpg;
> + struct se_session *se_sess;
> + struct scsiback_nexus *tv_nexus;
> +
> + mutex_lock(&tpg->tv_tpg_mutex);
> + if (tpg->tpg_nexus) {
> + mutex_unlock(&tpg->tv_tpg_mutex);
> + pr_debug("tpg->tpg_nexus already exists\n");
> + return -EEXIST;
> + }
> + se_tpg = &tpg->se_tpg;
> +
> + tv_nexus = kzalloc(sizeof(struct scsiback_nexus), GFP_KERNEL);
> + if (!tv_nexus) {
> + mutex_unlock(&tpg->tv_tpg_mutex);
> + pr_err("Unable to allocate struct scsiback_nexus\n");
> + return -ENOMEM;
> + }
> + /*
> + * Initialize the struct se_session pointer
> + */
> + tv_nexus->tvn_se_sess = transport_init_session(TARGET_PROT_DIN_PASS |
> + TARGET_PROT_DOUT_PASS);
TARGET_PROT_D*_PASS incorrectly declares T10 PI support here, which
should be TARGET_PROT_NORMAL instead.
> + if (IS_ERR(tv_nexus->tvn_se_sess)) {
> + mutex_unlock(&tpg->tv_tpg_mutex);
> + kfree(tv_nexus);
> + return -ENOMEM;
> + }
> + se_sess = tv_nexus->tvn_se_sess;
> + /*
> + * Since we are running in 'demo mode' this call with generate a
> + * struct se_node_acl for the scsiback struct se_portal_group with
> + * the SCSI Initiator port name of the passed configfs group 'name'.
> + */
> + tv_nexus->tvn_se_sess->se_node_acl = core_tpg_check_initiator_node_acl(
> + se_tpg, (unsigned char *)name);
> + if (!tv_nexus->tvn_se_sess->se_node_acl) {
> + mutex_unlock(&tpg->tv_tpg_mutex);
> + pr_debug("core_tpg_check_initiator_node_acl() failed for %s\n",
> + name);
> + goto out;
> + }
> + /*
> + * Now register the TCM pvscsi virtual I_T Nexus as active with the
> + * call to __transport_register_session()
> + */
> + __transport_register_session(se_tpg, tv_nexus->tvn_se_sess->se_node_acl,
> + tv_nexus->tvn_se_sess, tv_nexus);
> + tpg->tpg_nexus = tv_nexus;
> +
> + mutex_unlock(&tpg->tv_tpg_mutex);
> + return 0;
> +
> +out:
> + transport_free_session(se_sess);
> + kfree(tv_nexus);
> + return -ENOMEM;
> +}
> +
<SNIP>
> +
> +static int scsiback_port_link(struct se_portal_group *se_tpg,
> + struct se_lun *lun)
> +{
> + struct scsiback_tpg *tpg = container_of(se_tpg,
> + struct scsiback_tpg, se_tpg);
> +
> + mutex_lock(&scsiback_mutex);
> +
> + mutex_lock(&tpg->tv_tpg_mutex);
> + tpg->tv_tpg_port_count++;
> + mutex_unlock(&tpg->tv_tpg_mutex);
> +
> + mutex_unlock(&scsiback_mutex);
> +
> + return 0;
> +}
> +
AFAICT, no need to hold scsiback_mutex while incrementing
tpg->tv_tpg_port_count.
> +static void scsiback_port_unlink(struct se_portal_group *se_tpg,
> + struct se_lun *lun)
> +{
> + struct scsiback_tpg *tpg = container_of(se_tpg,
> + struct scsiback_tpg, se_tpg);
> +
> + mutex_lock(&scsiback_mutex);
> +
> + mutex_lock(&tpg->tv_tpg_mutex);
> + tpg->tv_tpg_port_count--;
> + mutex_unlock(&tpg->tv_tpg_mutex);
> +
> + mutex_unlock(&scsiback_mutex);
> +}
> +
Ditto here.
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |