[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [PATCH v3 8/9] vpci/msi: add MSI handlers



>>> On 27.04.17 at 16:35, <roger.pau@xxxxxxxxxx> wrote:
> Add handlers for the MSI control, address, data and mask fields in order to
> detect accesses to them and setup the interrupts as requested by the guest.
> 
> Note that the pending register is not trapped, and the guest can freely
> read/write to it.
> 
> Whether Xen is going to provide this functionality to Dom0 (MSI emulation) is
> controlled by the "msi" option in the dom0 field. When disabling this option
> Xen will hide the MSI capability structure from Dom0.

Unless there's an actual reason behind this, I'd view this as a
development only thing, which shouldn't be in a non-RFC patch.

> --- a/xen/arch/x86/hvm/vmsi.c
> +++ b/xen/arch/x86/hvm/vmsi.c
> @@ -622,3 +622,144 @@ void msix_write_completion(struct vcpu *v)
>      if ( msixtbl_write(v, ctrl_address, 4, 0) != X86EMUL_OKAY )
>          gdprintk(XENLOG_WARNING, "MSI-X write completion failure\n");
>  }
> +
> +static unsigned int msi_vector(uint16_t data)
> +{
> +    return (data & MSI_DATA_VECTOR_MASK) >> MSI_DATA_VECTOR_SHIFT;

I know code elsewhere does it this way, but I'm intending to switch
that to use MASK_EXTR(), and I'd like to ask to use that construct
right away in new code.

> +static unsigned int msi_flags(uint16_t data, uint64_t addr)
> +{
> +    unsigned int rh, dm, dest_id, deliv_mode, trig_mode;
> +
> +    rh = (addr >> MSI_ADDR_REDIRECTION_SHIFT) & 0x1;
> +    dm = (addr >> MSI_ADDR_DESTMODE_SHIFT) & 0x1;
> +    dest_id = (addr & MSI_ADDR_DEST_ID_MASK) >> MSI_ADDR_DEST_ID_SHIFT;
> +    deliv_mode = (data >> MSI_DATA_DELIVERY_MODE_SHIFT) & 0x7;
> +    trig_mode = (data >> MSI_DATA_TRIGGER_SHIFT) & 0x1;

I'm sure you've simply copied code from elsewhere, which I agree
should generally be fine. However, on top of what I did say above
I'd also like to request to drop such stray 0x prefixes, plus I think
the 7 wants a #define.

> +    return dest_id | (rh << GFLAGS_SHIFT_RH) | (dm << GFLAGS_SHIFT_DM) |
> +           (deliv_mode << GFLAGS_SHIFT_DELIV_MODE) |
> +           (trig_mode << GFLAGS_SHIFT_TRG_MODE);

How come dest_id has no shift? Please let's not assume the shift
is zero now and forever.

> +void vpci_msi_mask(struct vpci_arch_msi *arch, unsigned int entry, bool mask)
> +{
> +    struct pirq *pinfo;
> +    struct irq_desc *desc;
> +    unsigned long flags;
> +    int irq;
> +
> +    ASSERT(arch->pirq != -1);

Perhaps better ">= 0"?

> +    pinfo = pirq_info(current->domain, arch->pirq + entry);
> +    ASSERT(pinfo);
> +
> +    irq = pinfo->arch.irq;
> +    ASSERT(irq < nr_irqs);
> +
> +    desc = irq_to_desc(irq);
> +    ASSERT(desc);

It's not entirely clear to me where all the checks are which allow
the checks here to be ASSERT()s.

> +int vpci_msi_enable(struct vpci_arch_msi *arch, struct pci_dev *pdev,
> +                    uint64_t address, uint32_t data, unsigned int vectors)
> +{
> +    struct msi_info msi_info = {
> +        .seg = pdev->seg,
> +        .bus = pdev->bus,
> +        .devfn = pdev->devfn,
> +        .entry_nr = vectors,
> +    };
> +    int index = -1, rc;
> +    unsigned int i;
> +
> +    ASSERT(arch->pirq == -1);
> +
> +    /* Get a PIRQ. */
> +    rc = allocate_and_map_msi_pirq(pdev->domain, &index, &arch->pirq,
> +                                   &msi_info);
> +    if ( rc )
> +    {
> +        dprintk(XENLOG_ERR, "%04x:%02x:%02x.%u: failed to map PIRQ: %d\n",
> +                pdev->seg, pdev->bus, PCI_SLOT(pdev->devfn),
> +                PCI_FUNC(pdev->devfn), rc);
> +        return rc;
> +    }
> +
> +    for ( i = 0; i < vectors; i++ )
> +    {
> +        xen_domctl_bind_pt_irq_t bind = {
> +            .hvm_domid = DOMID_SELF,
> +            .machine_irq = arch->pirq + i,
> +            .irq_type = PT_IRQ_TYPE_MSI,
> +            .u.msi.gvec = msi_vector(data) + i,
> +            .u.msi.gflags = msi_flags(data, address),
> +        };
> +
> +        pcidevs_lock();
> +        rc = pt_irq_create_bind(pdev->domain, &bind);
> +        if ( rc )

I don't think you need to hold the lock for the if() and its body. While
I see unmap_domain_pirq(), I don't really see why it does, so perhaps
there's some cleanup potential up front.

> --- a/xen/drivers/vpci/capabilities.c
> +++ b/xen/drivers/vpci/capabilities.c
> @@ -109,7 +109,7 @@ static int vpci_index_capabilities(struct pci_dev *pdev)
>      return 0;
>  }
>  
> -static void vpci_mask_capability(struct pci_dev *pdev, uint8_t cap_id)
> +void xen_vpci_mask_capability(struct pci_dev *pdev, uint8_t cap_id)

What's the xen_ prefix good for?

> +static int vpci_msi_control_read(struct pci_dev *pdev, unsigned int reg,
> +                                 union vpci_val *val, void *data)
> +{
> +    struct vpci_msi *msi = data;

const

> +    if ( msi->enabled )
> +        val->word |= PCI_MSI_FLAGS_ENABLE;
> +    if ( msi->masking )
> +        val->word |= PCI_MSI_FLAGS_MASKBIT;
> +    if ( msi->address64 )
> +        val->word |= PCI_MSI_FLAGS_64BIT;
> +
> +    /* Set multiple message capable. */
> +    val->word |= ((fls(msi->max_vectors) - 1) << 1) & PCI_MSI_FLAGS_QMASK;
> +
> +    /* Set current number of configured vectors. */
> +    val->word |= ((fls(msi->guest_vectors) - 1) << 4) & PCI_MSI_FLAGS_QSIZE;

The 1 and 4 here clearly need #define-s or the use of MASK_INSR().

> +static int vpci_msi_control_write(struct pci_dev *pdev, unsigned int reg,
> +                                  union vpci_val val, void *data)
> +{
> +    struct vpci_msi *msi = data;
> +    unsigned int vectors = 1 << ((val.word & PCI_MSI_FLAGS_QSIZE) >> 4);
> +    int rc;
> +
> +    if ( vectors > msi->max_vectors )
> +        return -EINVAL;
> +
> +    msi->guest_vectors = vectors;
> +
> +    if ( !!(val.word & PCI_MSI_FLAGS_ENABLE) == msi->enabled )
> +        return 0;
> +
> +    if ( val.word & PCI_MSI_FLAGS_ENABLE )
> +    {
> +        ASSERT(!msi->enabled && !msi->vectors);

I can see the value of the right side, but the left (with the imediately
prior if())?

> +        rc = vpci_msi_enable(&msi->arch, pdev, msi->address, msi->data,
> +                             vectors);
> +        if ( rc )
> +            return rc;
> +
> +        /* Apply the mask bits. */
> +        if ( msi->masking )
> +        {
> +            uint32_t mask = msi->mask;
> +
> +            while ( mask )
> +            {
> +                unsigned int i = ffs(mask);

ffs(), just like fls(), returns 1-based values, so this looks to be off by
one.

> +                vpci_msi_mask(&msi->arch, i, true);
> +                __clear_bit(i, &mask);
> +            }
> +        }
> +
> +        __msi_set_enable(pdev->seg, pdev->bus, PCI_SLOT(pdev->devfn),
> +                         PCI_FUNC(pdev->devfn), reg - PCI_MSI_FLAGS, 1);

Seems like you'll never come through msi_capability_init(); I can't
see how it can be a good idea to bypass lots of stuff.

> +static int vpci_msi_address_upper_write(struct pci_dev *pdev, unsigned int 
> reg,
> +                                        union vpci_val val, void *data)
> +{
> +    struct vpci_msi *msi = data;
> +
> +    /* Clear high part. */
> +    msi->address &= ~GENMASK(63, 32);
> +    msi->address |= (uint64_t)val.double_word << 32;

Is the value written here actually being used for anything other than
for reading back (also applicable to the high bits of the low half of the
address)?

> +static int vpci_msi_mask_read(struct pci_dev *pdev, unsigned int reg,
> +                              union vpci_val *val, void *data)
> +{
> +    struct vpci_msi *msi = data;
> +
> +    val->double_word = msi->mask;
> +
> +    return 0;
> +}
> +
> +static int vpci_msi_mask_write(struct pci_dev *pdev, unsigned int reg,
> +                               union vpci_val val, void *data)
> +{
> +    struct vpci_msi *msi = data;
> +    uint32_t dmask;
> +
> +    dmask = msi->mask ^ val.double_word;
> +
> +    if ( !dmask )
> +        return 0;
> +
> +    while ( dmask && msi->enabled )
> +    {
> +        unsigned int i = ffs(dmask);
> +
> +        vpci_msi_mask(&msi->arch, i, !test_bit(i, &msi->mask));
> +        __clear_bit(i, &dmask);
> +    }

I think this loop should be limited to the number of enabled vectors
(and the same likely applies then to vpci_msi_control_write()).

> +static int vpci_init_msi(struct pci_dev *pdev)
> +{
> +    uint8_t seg = pdev->seg, bus = pdev->bus;
> +    uint8_t slot = PCI_SLOT(pdev->devfn), func = PCI_FUNC(pdev->devfn);
> +    struct vpci_msi *msi = NULL;

Pointless initializer.

> +    unsigned int msi_offset;
> +    uint16_t control;
> +    int rc;
> +
> +    msi_offset = pci_find_cap_offset(seg, bus, slot, func, PCI_CAP_ID_MSI);
> +    if ( !msi_offset )
> +        return 0;
> +
> +    if ( !vpci_msi_enabled(pdev->domain) )
> +    {
> +        xen_vpci_mask_capability(pdev, PCI_CAP_ID_MSI);
> +        return 0;
> +    }
> +
> +    msi = xzalloc(struct vpci_msi);
> +    if ( !msi )
> +        return -ENOMEM;
> +
> +    control = pci_conf_read16(seg, bus, slot, func,
> +                              msi_control_reg(msi_offset));
> +
> +    rc = xen_vpci_add_register(pdev, vpci_msi_control_read,
> +                               vpci_msi_control_write,
> +                               msi_control_reg(msi_offset), 2, msi);
> +    if ( rc )
> +    {
> +        dprintk(XENLOG_ERR,
> +                "%04x:%02x:%02x.%u: failed to add handler for MSI control: 
> %d\n",
> +                seg, bus, slot, func, rc);
> +        goto error;
> +    }
> +
> +    /* Get the maximum number of vectors the device supports. */
> +    msi->max_vectors = multi_msi_capable(control);
> +    ASSERT(msi->max_vectors <= 32);
> +
> +    /* Initial value after reset. */
> +    msi->guest_vectors = 1;
> +
> +    /* No PIRQ bind yet. */
> +    vpci_msi_arch_init(&msi->arch);
> +
> +    if ( is_64bit_address(control) )
> +        msi->address64 = true;
> +    if ( is_mask_bit_support(control) )
> +        msi->masking = true;
> +
> +    rc = xen_vpci_add_register(pdev, vpci_msi_address_read,
> +                               vpci_msi_address_write,
> +                               msi_lower_address_reg(msi_offset), 4, msi);
> +    if ( rc )
> +    {
> +        dprintk(XENLOG_ERR,
> +                "%04x:%02x:%02x.%u: failed to add handler for MSI address: 
> %d\n",
> +                seg, bus, slot, func, rc);
> +        goto error;
> +    }
> +
> +    rc = xen_vpci_add_register(pdev, vpci_msi_data_read, vpci_msi_data_write,
> +                               msi_data_reg(msi_offset, msi->address64), 2,
> +                               msi);
> +    if ( rc )
> +    {
> +        dprintk(XENLOG_ERR,
> +                "%04x:%02x:%02x.%u: failed to add handler for MSI address: 
> %d\n",
> +                seg, bus, slot, func, rc);

Twice the same message text is unhelpful (and actually there's a third
one below). But iirc I did indicate anyway that I think most of them
should go away. Note also how much thy contribute to the function's
size.

> +static int __init vpci_msi_setup_keyhandler(void)
> +{
> +    register_keyhandler('Z', vpci_dump_msi, "dump guest MSI state", 1);

Please let's avoid using new (and even non-intuitive) keys if at all
possible. This is Dom0 only, so can easily be chained onto e.g. the
'M' handler.

> --- a/xen/include/xen/vpci.h
> +++ b/xen/include/xen/vpci.h
> @@ -89,9 +89,35 @@ struct vpci {
>  
>      /* List of capabilities supported by the device. */
>      struct list_head cap_list;
> +
> +    /* MSI data. */
> +    struct vpci_msi {
> +        /* Maximum number of vectors supported by the device. */
> +        unsigned int max_vectors;
> +        /* Current guest-written number of vectors. */
> +        unsigned int guest_vectors;
> +        /* Number of vectors configured. */
> +        unsigned int vectors;

So coming here I still don't really see what the difference between
these last two fields is (and hence why you need two).

Jan

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
https://lists.xen.org/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.