[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH v4 9/9] vpci/msix: add MSI-X handlers



Add handlers for accesses to the MSI-X message control field on the
PCI configuration space, and traps for accesses to the memory region
that contains the MSI-X table and PBA. This traps detect attempts from
the guest to configure MSI-X interrupts and properly sets them up.

Note that accesses to the Table Offset, Table BIR, PBA Offset and PBA
BIR are not trapped by Xen at the moment.

Finally, turn the panic in the Dom0 PVH builder into a warning.

Signed-off-by: Roger Pau Monné <roger.pau@xxxxxxxxxx>
---
Cc: Jan Beulich <jbeulich@xxxxxxxx>
Cc: Andrew Cooper <andrew.cooper3@xxxxxxxxxx>
---
Changes since v3:
 - Propagate changes from previous versions: remove xen_ prefix, use
   the new fields in vpci_val and remove the return value from
   handlers.
 - Remove the usage of GENMASK.
 - Mave the arch-specific parts of the dump routine to the
   x86/hvm/vmsi.c dump handler.
 - Chain the MSI-X dump handler to the 'M' debug key.
 - Fix the header BAR mappings so that the MSI-X regions inside of
   BARs are unmapped from the domain p2m in order for the handlers to
   work properly.
 - Unconditionally trap and forward accesses to the PBA MSI-X area.
 - Simplify the conditionals in vpci_msix_control_write.
 - Fix vpci_msix_accept to use a bool type.
 - Allow all supported accesses as described in the spec to the MSI-X
   table.
 - Truncate the returned address when the access is a 32b read.
 - Always return X86EMUL_OKAY from the handlers, returning ~0 in the
   read case if the access is not supported, or ignoring writes.
 - Do not check that max_entries is != 0 in the init handler.
 - Use trylock in the dump handler.

Changes since v2:
 - Split out arch-specific code.

This patch has been tested with devices using both a single MSI-X
entry and multiple ones.
---
 xen/arch/x86/hvm/dom0_build.c    |   2 +-
 xen/arch/x86/hvm/hvm.c           |   1 +
 xen/arch/x86/hvm/vmsi.c          | 128 +++++++++-
 xen/arch/x86/msi.c               |   1 +
 xen/drivers/vpci/Makefile        |   2 +-
 xen/drivers/vpci/header.c        |  85 ++++++-
 xen/drivers/vpci/msix.c          | 503 +++++++++++++++++++++++++++++++++++++++
 xen/include/asm-x86/hvm/domain.h |   3 +
 xen/include/asm-x86/hvm/io.h     |  18 ++
 xen/include/xen/vpci.h           |  39 +++
 10 files changed, 774 insertions(+), 8 deletions(-)
 create mode 100644 xen/drivers/vpci/msix.c

diff --git a/xen/arch/x86/hvm/dom0_build.c b/xen/arch/x86/hvm/dom0_build.c
index 6b9f76ec36..c060eb85eb 100644
--- a/xen/arch/x86/hvm/dom0_build.c
+++ b/xen/arch/x86/hvm/dom0_build.c
@@ -1091,7 +1091,7 @@ int __init dom0_construct_pvh(struct domain *d, const 
module_t *image,
         return rc;
     }
 
-    panic("Building a PVHv2 Dom0 is not yet supported.");
+    printk("WARNING: PVH is an experimental mode with limited 
functionality\n");
     return 0;
 }
 
diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c
index f45e2bd23d..9277e84150 100644
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -585,6 +585,7 @@ int hvm_domain_initialise(struct domain *d, unsigned long 
domcr_flags,
     INIT_LIST_HEAD(&d->arch.hvm_domain.write_map.list);
     INIT_LIST_HEAD(&d->arch.hvm_domain.g2m_ioport_list);
     INIT_LIST_HEAD(&d->arch.hvm_domain.mmcfg_regions);
+    INIT_LIST_HEAD(&d->arch.hvm_domain.msix_tables);
 
     rc = create_perdomain_mapping(d, PERDOMAIN_VIRT_START, 0, NULL, NULL);
     if ( rc )
diff --git a/xen/arch/x86/hvm/vmsi.c b/xen/arch/x86/hvm/vmsi.c
index 5732c70b5c..f1c72f23d9 100644
--- a/xen/arch/x86/hvm/vmsi.c
+++ b/xen/arch/x86/hvm/vmsi.c
@@ -643,17 +643,15 @@ static unsigned int msi_flags(uint16_t data, uint64_t 
addr)
            (trig_mode << GFLAGS_SHIFT_TRG_MODE);
 }
 
-void vpci_msi_arch_mask(struct vpci_arch_msi *arch, struct pci_dev *pdev,
-                        unsigned int entry, bool mask)
+static void vpci_mask_pirq(struct domain *d, int pirq, bool mask)
 {
-    struct domain *d = pdev->domain;
     const struct pirq *pinfo;
     struct irq_desc *desc;
     unsigned long flags;
     int irq;
 
-    ASSERT(arch->pirq >= 0);
-    pinfo = pirq_info(d, arch->pirq + entry);
+    ASSERT(pirq >= 0);
+    pinfo = pirq_info(d, pirq);
     ASSERT(pinfo);
 
     irq = pinfo->arch.irq;
@@ -667,6 +665,12 @@ void vpci_msi_arch_mask(struct vpci_arch_msi *arch, struct 
pci_dev *pdev,
     spin_unlock_irqrestore(&desc->lock, flags);
 }
 
+void vpci_msi_arch_mask(struct vpci_arch_msi *arch, struct pci_dev *pdev,
+                        unsigned int entry, bool mask)
+{
+    vpci_mask_pirq(pdev->domain, arch->pirq + entry, mask);
+}
+
 int vpci_msi_arch_enable(struct vpci_arch_msi *arch, struct pci_dev *pdev,
                          uint64_t address, uint32_t data, unsigned int vectors)
 {
@@ -771,3 +775,117 @@ void vpci_msi_arch_print(struct vpci_arch_msi *arch, 
uint16_t data,
            MASK_EXTR(addr, MSI_ADDR_DEST_ID_MASK),
            arch->pirq);
 }
+
+void vpci_msix_arch_mask(struct vpci_arch_msix_entry *arch,
+                         struct pci_dev *pdev, bool mask)
+{
+    vpci_mask_pirq(pdev->domain, arch->pirq, mask);
+}
+
+int vpci_msix_arch_enable(struct vpci_arch_msix_entry *arch,
+                          struct pci_dev *pdev, uint64_t address,
+                          uint32_t data, unsigned int entry_nr,
+                          paddr_t table_base)
+{
+    struct domain *d = pdev->domain;
+    xen_domctl_bind_pt_irq_t bind = {
+        .irq_type = PT_IRQ_TYPE_MSI,
+        .u.msi.gvec = msi_vector(data),
+        .u.msi.gflags = msi_flags(data, address),
+    };
+    int rc;
+
+    if ( arch->pirq == -1 )
+    {
+        struct msi_info msi_info = {
+            .seg = pdev->seg,
+            .bus = pdev->bus,
+            .devfn = pdev->devfn,
+            .table_base = table_base,
+            .entry_nr = entry_nr,
+        };
+
+        /* Map PIRQ. */
+        rc = allocate_and_map_msi_pirq(d, -1, &arch->pirq,
+                                       MAP_PIRQ_TYPE_MSI, &msi_info);
+        if ( rc )
+        {
+            dprintk(XENLOG_ERR,
+                    "%04x:%02x:%02x.%u: unable to map MSI-X PIRQ entry %u: 
%d\n",
+                    pdev->seg, pdev->bus, PCI_SLOT(pdev->devfn),
+                    PCI_FUNC(pdev->devfn), entry_nr, rc);
+            return rc;
+        }
+    }
+
+    bind.machine_irq = arch->pirq;
+    pcidevs_lock();
+    rc = pt_irq_create_bind(d, &bind);
+    if ( rc )
+    {
+        dprintk(XENLOG_ERR,
+                "%04x:%02x:%02x.%u: unable to create MSI-X bind %u: %d\n",
+                pdev->seg, pdev->bus, PCI_SLOT(pdev->devfn),
+                PCI_FUNC(pdev->devfn), entry_nr, rc);
+        spin_lock(&d->event_lock);
+        unmap_domain_pirq(d, arch->pirq);
+        spin_unlock(&d->event_lock);
+        pcidevs_unlock();
+        arch->pirq = -1;
+        return rc;
+    }
+    pcidevs_unlock();
+
+    return 0;
+}
+
+int vpci_msix_arch_disable(struct vpci_arch_msix_entry *arch,
+                           struct pci_dev *pdev)
+{
+    struct domain *d = pdev->domain;
+    xen_domctl_bind_pt_irq_t bind = {
+        .irq_type = PT_IRQ_TYPE_MSI,
+        .machine_irq = arch->pirq,
+    };
+    int rc;
+
+    if ( arch->pirq == -1 )
+        return 0;
+
+    pcidevs_lock();
+    rc = pt_irq_destroy_bind(d, &bind);
+    if ( rc )
+    {
+        pcidevs_unlock();
+        return rc;
+    }
+
+    spin_lock(&d->event_lock);
+    unmap_domain_pirq(d, arch->pirq);
+    spin_unlock(&d->event_lock);
+    pcidevs_unlock();
+
+    arch->pirq = -1;
+
+    return 0;
+}
+
+int vpci_msix_arch_init(struct vpci_arch_msix_entry *arch)
+{
+    arch->pirq = -1;
+    return 0;
+}
+
+void vpci_msix_arch_print(struct vpci_arch_msix_entry *entry, uint32_t data,
+                          uint64_t addr, bool masked, unsigned int pos)
+{
+    printk("%4u vec=%#02x%7s%6s%3sassert%5s%7s dest_id=%lu mask=%u pirq: %d\n",
+           pos, MASK_EXTR(data, MSI_DATA_VECTOR_MASK),
+           data & MSI_DATA_DELIVERY_LOWPRI ? "lowest" : "fixed",
+           data & MSI_DATA_TRIGGER_LEVEL ? "level" : "edge",
+           data & MSI_DATA_LEVEL_ASSERT ? "" : "de",
+           addr & MSI_ADDR_DESTMODE_LOGIC ? "log" : "phys",
+           addr & MSI_ADDR_REDIRECTION_LOWPRI ? "lowest" : "cpu",
+           MASK_EXTR(addr, MSI_ADDR_DEST_ID_MASK),
+           masked, entry->pirq);
+}
diff --git a/xen/arch/x86/msi.c b/xen/arch/x86/msi.c
index 573378d6c3..ad5c27df18 100644
--- a/xen/arch/x86/msi.c
+++ b/xen/arch/x86/msi.c
@@ -1539,6 +1539,7 @@ static void dump_msi(unsigned char key)
     }
 
     vpci_dump_msi();
+    vpci_dump_msix();
 }
 
 static int __init msi_setup_keyhandler(void)
diff --git a/xen/drivers/vpci/Makefile b/xen/drivers/vpci/Makefile
index 62cec9e82b..55d1bdfda0 100644
--- a/xen/drivers/vpci/Makefile
+++ b/xen/drivers/vpci/Makefile
@@ -1 +1 @@
-obj-y += vpci.o header.o msi.o
+obj-y += vpci.o header.o msi.o msix.o
diff --git a/xen/drivers/vpci/header.c b/xen/drivers/vpci/header.c
index ae5719ab1a..07056350d6 100644
--- a/xen/drivers/vpci/header.c
+++ b/xen/drivers/vpci/header.c
@@ -20,6 +20,7 @@
 #include <xen/sched.h>
 #include <xen/vpci.h>
 #include <xen/p2m-common.h>
+#include <asm/p2m.h>
 
 #define MAPPABLE_BAR(x)                                                 \
     (((x)->type == VPCI_BAR_MEM32 || (x)->type == VPCI_BAR_MEM64_LO ||  \
@@ -100,11 +101,45 @@ static int vpci_map_range(unsigned long s, unsigned long 
e, void *data)
     return modify_mmio(map->d, _gfn(s), _mfn(s), e - s + 1, map->map);
 }
 
+static int vpci_unmap_msix(struct domain *d, struct vpci_msix_mem *msix)
+{
+    unsigned long gfn;
+
+    for ( gfn = PFN_DOWN(msix->addr); gfn <= PFN_UP(msix->addr + msix->size);
+          gfn++ )
+    {
+        p2m_type_t t;
+        mfn_t mfn = get_gfn(d, gfn, &t);
+        int rc;
+
+        if ( mfn_eq(mfn, INVALID_MFN) )
+        {
+            /* Nothing to do, this is already a hole. */
+            put_gfn(d, gfn);
+            continue;
+        }
+
+        if ( !p2m_is_mmio(t) )
+        {
+            put_gfn(d, gfn);
+            return -EINVAL;
+        }
+
+        rc = modify_mmio(d, _gfn(gfn), mfn, 1, false);
+        put_gfn(d, gfn);
+        if ( rc )
+            return rc;
+    }
+
+    return 0;
+}
+
 static int vpci_modify_bar(struct domain *d, const struct vpci_bar *bar,
                            const bool map)
 {
     struct rangeset *mem;
     struct map_data data = { .d = d, .map = map };
+    unsigned int i;
     int rc;
 
     ASSERT(MAPPABLE_BAR(bar));
@@ -113,6 +148,35 @@ static int vpci_modify_bar(struct domain *d, const struct 
vpci_bar *bar,
     if ( IS_ERR(mem) )
         return -PTR_ERR(mem);
 
+    /*
+     * Make sure the MSI-X regions of the BAR are not mapped into the domain
+     * p2m, or else the MSI-X handlers are useless. Only do this when mapping,
+     * since that's when the memory decoding on the device is enabled.
+     */
+    for ( i = 0; map && i < ARRAY_SIZE(bar->msix); i++ )
+    {
+        struct vpci_msix_mem *msix = bar->msix[i];
+
+        if ( !msix || msix->addr == INVALID_PADDR )
+            continue;
+
+        rc = vpci_unmap_msix(d, msix);
+        if ( rc )
+        {
+            rangeset_destroy(mem);
+            return rc;
+        }
+
+        rc = rangeset_remove_range(mem, PFN_DOWN(msix->addr),
+                                   PFN_DOWN(msix->addr + msix->size));
+        if ( rc )
+        {
+            rangeset_destroy(mem);
+            return rc;
+        }
+
+    }
+
     rc = rangeset_report_ranges(mem, 0, ~0ul, vpci_map_range, &data);
     rangeset_destroy(mem);
     if ( rc )
@@ -221,6 +285,7 @@ static void vpci_bar_write(struct pci_dev *pdev, unsigned 
int reg,
     uint8_t seg = pdev->seg, bus = pdev->bus;
     uint8_t slot = PCI_SLOT(pdev->devfn), func = PCI_FUNC(pdev->devfn);
     uint32_t wdata = val.u32, size_mask;
+    unsigned int i;
     bool hi = false;
 
     switch ( bar->type )
@@ -269,6 +334,11 @@ static void vpci_bar_write(struct pci_dev *pdev, unsigned 
int reg,
     bar->addr &= ~((uint64_t)0xffffffff << (hi ? 32 : 0));
     bar->addr |= (uint64_t)wdata << (hi ? 32 : 0);
 
+    /* Update any MSI-X areas contained in this BAR. */
+    for ( i = 0; i < ARRAY_SIZE(bar->msix); i++ )
+        if ( bar->msix[i] )
+            bar->msix[i]->addr = bar->addr + bar->msix[i]->offset;
+
     /* Make sure Xen writes back the same value for the BAR RO bits. */
     if ( !hi )
         wdata |= pci_conf_read32(pdev->seg, pdev->bus, PCI_SLOT(pdev->devfn),
@@ -405,7 +475,20 @@ static int vpci_init_bars(struct pci_dev *pdev)
             continue;
         }
 
-        bars[i].addr = (cmd & PCI_COMMAND_MEMORY) ? addr : INVALID_PADDR;
+        if ( cmd & PCI_COMMAND_MEMORY )
+        {
+            unsigned int j;
+
+            bars[i].addr = addr;
+
+            for ( j = 0; j < ARRAY_SIZE(bars[i].msix); j++ )
+                if ( bars[i].msix[j] )
+                    bars[i].msix[j]->addr = bars[i].addr +
+                                            bars[i].msix[j]->offset;
+        }
+        else
+            bars[i].addr = INVALID_PADDR;
+
         bars[i].size = size;
         bars[i].prefetchable = val & PCI_BASE_ADDRESS_MEM_PREFETCH;
 
diff --git a/xen/drivers/vpci/msix.c b/xen/drivers/vpci/msix.c
new file mode 100644
index 0000000000..dc0e7070e4
--- /dev/null
+++ b/xen/drivers/vpci/msix.c
@@ -0,0 +1,503 @@
+/*
+ * Handlers for accesses to the MSI-X capability structure and the memory
+ * region.
+ *
+ * Copyright (C) 2017 Citrix Systems R&D
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms and conditions of the GNU General Public
+ * License, version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <xen/sched.h>
+#include <xen/vpci.h>
+#include <asm/msi.h>
+#include <xen/p2m-common.h>
+#include <xen/keyhandler.h>
+
+#define MSIX_SIZE(num) (offsetof(struct vpci_msix, entries[num]))
+#define MSIX_ADDR_IN_RANGE(a, table)                                    \
+    ((table)->addr != INVALID_PADDR && (a) >= (table)->addr &&          \
+     (a) < (table)->addr + (table)->size)
+
+static void vpci_msix_control_read(struct pci_dev *pdev, unsigned int reg,
+                                   union vpci_val *val, void *data)
+{
+    const struct vpci_msix *msix = data;
+
+    val->u16 = (msix->max_entries - 1) & PCI_MSIX_FLAGS_QSIZE;
+    val->u16 |= msix->enabled ? PCI_MSIX_FLAGS_ENABLE : 0;
+    val->u16 |= msix->masked ? PCI_MSIX_FLAGS_MASKALL : 0;
+}
+
+static void vpci_msix_control_write(struct pci_dev *pdev, unsigned int reg,
+                                    union vpci_val val, void *data)
+{
+    uint8_t seg = pdev->seg, bus = pdev->bus;
+    uint8_t slot = PCI_SLOT(pdev->devfn), func = PCI_FUNC(pdev->devfn);
+    struct vpci_msix *msix = data;
+    paddr_t table_base = pdev->vpci->header.bars[msix->table.bir].addr;
+    bool new_masked, new_enabled;
+    unsigned int i;
+    int rc;
+
+    new_masked = val.u16 & PCI_MSIX_FLAGS_MASKALL;
+    new_enabled = val.u16 & PCI_MSIX_FLAGS_ENABLE;
+
+    if ( !msix->enabled && new_enabled )
+    {
+        /* MSI-X enabled. */
+        for ( i = 0; i < msix->max_entries; i++ )
+        {
+            if ( msix->entries[i].masked )
+                continue;
+
+            rc = vpci_msix_arch_enable(&msix->entries[i].arch, pdev,
+                                       msix->entries[i].addr,
+                                       msix->entries[i].data,
+                                       msix->entries[i].nr, table_base);
+            if ( rc )
+            {
+                gdprintk(XENLOG_ERR,
+                         "%04x:%02x:%02x.%u: unable to update entry %u: %d\n",
+                         seg, bus, slot, func, i, rc);
+                return;
+            }
+
+            vpci_msix_arch_mask(&msix->entries[i].arch, pdev, false);
+        }
+    }
+    else if ( msix->enabled && !new_enabled )
+    {
+        /* MSI-X disabled. */
+        for ( i = 0; i < msix->max_entries; i++ )
+        {
+            rc = vpci_msix_arch_disable(&msix->entries[i].arch, pdev);
+            if ( rc )
+            {
+                gdprintk(XENLOG_ERR,
+                         "%04x:%02x:%02x.%u: unable to disable entry %u: %d\n",
+                         seg, bus, slot, func, i, rc);
+                return;
+            }
+        }
+    }
+
+    if ( (new_enabled != msix->enabled || new_masked != msix->masked) &&
+         pci_msi_conf_write_intercept(pdev, reg, 2, &val.u32) >= 0 )
+        pci_conf_write16(seg, bus, slot, func, reg, val.u32);
+
+    msix->masked = new_masked;
+    msix->enabled = new_enabled;
+}
+
+static struct vpci_msix *vpci_msix_find(struct domain *d, unsigned long addr)
+{
+    struct vpci_msix *msix;
+
+    ASSERT(vpci_locked(d));
+    list_for_each_entry ( msix,  &d->arch.hvm_domain.msix_tables, next )
+    {
+        uint8_t seg = msix->pdev->seg, bus = msix->pdev->bus;
+        uint8_t slot = PCI_SLOT(msix->pdev->devfn);
+        uint8_t func = PCI_FUNC(msix->pdev->devfn);
+        uint16_t cmd = pci_conf_read16(seg, bus, slot, func, PCI_COMMAND);
+
+        if ( (cmd & PCI_COMMAND_MEMORY) &&
+             (MSIX_ADDR_IN_RANGE(addr, &msix->table) ||
+             MSIX_ADDR_IN_RANGE(addr, &msix->pba)) )
+            return msix;
+    }
+
+    return NULL;
+}
+
+static int vpci_msix_accept(struct vcpu *v, unsigned long addr)
+{
+    bool found;
+
+    vpci_lock(v->domain);
+    found = vpci_msix_find(v->domain, addr);
+    vpci_unlock(v->domain);
+
+    return found;
+}
+
+static int vpci_msix_access_check(struct pci_dev *pdev, unsigned long addr,
+                                  unsigned int len)
+{
+    uint8_t seg = pdev->seg, bus = pdev->bus;
+    uint8_t slot = PCI_SLOT(pdev->devfn), func = PCI_FUNC(pdev->devfn);
+
+    /* Only allow 32/64b accesses. */
+    if ( len != 4 && len != 8 )
+    {
+        gdprintk(XENLOG_ERR,
+                 "%04x:%02x:%02x.%u: invalid MSI-X table access size: %u\n",
+                 seg, bus, slot, func, len);
+        return -EINVAL;
+    }
+
+    /* Only allow aligned accesses. */
+    if ( (addr & (len - 1)) != 0 )
+    {
+        gdprintk(XENLOG_ERR,
+                 "%04x:%02x:%02x.%u: MSI-X only allows aligned accesses\n",
+                 seg, bus, slot, func);
+        return -EINVAL;
+    }
+
+    return 0;
+}
+
+static struct vpci_msix_entry *vpci_msix_get_entry(struct vpci_msix *msix,
+                                                   unsigned long addr)
+{
+    return &msix->entries[(addr - msix->table.addr) / PCI_MSIX_ENTRY_SIZE];
+}
+
+static int vpci_msix_read(struct vcpu *v, unsigned long addr,
+                          unsigned int len, unsigned long *data)
+{
+    struct domain *d = v->domain;
+    struct vpci_msix *msix;
+    const struct vpci_msix_entry *entry;
+    unsigned int offset;
+
+    vpci_lock(d);
+
+    msix = vpci_msix_find(d, addr);
+    if ( !msix )
+    {
+        vpci_unlock(d);
+        *data = ~0ul;
+        return X86EMUL_OKAY;
+    }
+
+    if ( vpci_msix_access_check(msix->pdev, addr, len) )
+    {
+        vpci_unlock(d);
+        *data = ~0ul;
+        return X86EMUL_OKAY;
+    }
+
+    if ( MSIX_ADDR_IN_RANGE(addr, &msix->pba) )
+    {
+        /* Access to PBA. */
+        switch ( len )
+        {
+        case 4:
+            *data = readl(addr);
+            break;
+        case 8:
+            *data = readq(addr);
+            break;
+        default:
+            ASSERT_UNREACHABLE();
+            break;
+        }
+
+        vpci_unlock(d);
+        return X86EMUL_OKAY;
+    }
+
+    /* Get the table entry and offset. */
+    entry = vpci_msix_get_entry(msix, addr);
+    offset = addr & (PCI_MSIX_ENTRY_SIZE - 1);
+
+    switch ( offset )
+    {
+    case PCI_MSIX_ENTRY_LOWER_ADDR_OFFSET:
+        /*
+         * NB: do explicit truncation to the size of the access. This shouldn't
+         * be required here, since the caller of the handler should already
+         * take the appropriate measures to truncate the value before returning
+         * to the guest, but better be safe than sorry.
+         */
+        *data = len == 8 ? entry->addr : (uint32_t)entry->addr;
+        break;
+    case PCI_MSIX_ENTRY_UPPER_ADDR_OFFSET:
+        *data = entry->addr >> 32;
+        break;
+    case PCI_MSIX_ENTRY_DATA_OFFSET:
+        *data = entry->data;
+        if ( len == 8 )
+            *data |=
+                (uint64_t)(entry->masked ? PCI_MSIX_VECTOR_BITMASK : 0) << 32;
+        break;
+    case PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET:
+        *data = entry->masked ? PCI_MSIX_VECTOR_BITMASK : 0;
+        break;
+    default:
+        BUG();
+    }
+    vpci_unlock(d);
+
+    return X86EMUL_OKAY;
+}
+
+static int vpci_msix_write(struct vcpu *v, unsigned long addr,
+                                 unsigned int len, unsigned long data)
+{
+    struct domain *d = v->domain;
+    struct vpci_msix *msix;
+    struct vpci_msix_entry *entry;
+    unsigned int offset;
+
+    vpci_lock(d);
+    msix = vpci_msix_find(d, addr);
+    if ( !msix )
+    {
+        vpci_unlock(d);
+        return X86EMUL_OKAY;
+    }
+
+    if ( MSIX_ADDR_IN_RANGE(addr, &msix->pba) )
+    {
+        /* Ignore writes to PBA, it's behavior is undefined. */
+        vpci_unlock(d);
+        return X86EMUL_OKAY;
+    }
+
+    if ( vpci_msix_access_check(msix->pdev, addr, len) )
+    {
+        vpci_unlock(d);
+        return X86EMUL_OKAY;
+    }
+
+    /* Get the table entry and offset. */
+    entry = vpci_msix_get_entry(msix, addr);
+    offset = addr & (PCI_MSIX_ENTRY_SIZE - 1);
+
+    switch ( offset )
+    {
+    case PCI_MSIX_ENTRY_LOWER_ADDR_OFFSET:
+        if ( len == 8 )
+        {
+            entry->addr = data;
+            break;
+        }
+        entry->addr &= ~0xffffffff;
+        entry->addr |= data;
+        break;
+    case PCI_MSIX_ENTRY_UPPER_ADDR_OFFSET:
+        entry->addr &= ~((uint64_t)0xffffffff << 32);
+        entry->addr |= data << 32;
+        break;
+    case PCI_MSIX_ENTRY_DATA_OFFSET:
+        /*
+         * 8 byte writes to the msg data and vector control fields are
+         * only allowed if the entry is masked.
+         */
+        if ( len == 8 && !entry->masked && !msix->masked && msix->enabled )
+        {
+            vpci_unlock(d);
+            return X86EMUL_OKAY;
+        }
+
+        entry->data = data;
+
+        if ( len == 4 )
+            break;
+
+        data >>= 32;
+        /* fallthrough */
+    case PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET:
+    {
+        bool new_masked = data & PCI_MSIX_VECTOR_BITMASK;
+        struct pci_dev *pdev = msix->pdev;
+        paddr_t table_base = pdev->vpci->header.bars[msix->table.bir].addr;
+        int rc;
+
+        if ( !msix->enabled )
+        {
+            entry->masked = new_masked;
+            break;
+        }
+
+        if ( new_masked != entry->masked && !new_masked )
+        {
+            /* Unmasking an entry, update it. */
+            rc = vpci_msix_arch_enable(&entry->arch, pdev, entry->addr,
+                                       entry->data, entry->nr, table_base);
+            if ( rc )
+            {
+                vpci_unlock(d);
+                gdprintk(XENLOG_ERR,
+                         "%04x:%02x:%02x.%u: unable to update entry %u: %d\n",
+                         pdev->seg, pdev->bus, PCI_SLOT(pdev->devfn),
+                         PCI_FUNC(pdev->devfn), entry->nr, rc);
+                return X86EMUL_OKAY;
+            }
+        }
+
+        vpci_msix_arch_mask(&entry->arch, pdev, new_masked);
+        entry->masked = new_masked;
+
+        break;
+    }
+    default:
+        BUG();
+    }
+    vpci_unlock(d);
+
+    return X86EMUL_OKAY;
+}
+
+static const struct hvm_mmio_ops vpci_msix_table_ops = {
+    .check = vpci_msix_accept,
+    .read = vpci_msix_read,
+    .write = vpci_msix_write,
+};
+
+static int vpci_init_msix(struct pci_dev *pdev)
+{
+    struct domain *d = pdev->domain;
+    uint8_t seg = pdev->seg, bus = pdev->bus;
+    uint8_t slot = PCI_SLOT(pdev->devfn), func = PCI_FUNC(pdev->devfn);
+    struct vpci_msix *msix;
+    unsigned int msix_offset, i, max_entries;
+    struct vpci_bar *table_bar, *pba_bar;
+    uint16_t control;
+    int rc;
+
+    msix_offset = pci_find_cap_offset(seg, bus, slot, func, PCI_CAP_ID_MSIX);
+    if ( !msix_offset )
+        return 0;
+
+    control = pci_conf_read16(seg, bus, slot, func,
+                              msix_control_reg(msix_offset));
+
+    /* Get the maximum number of vectors the device supports. */
+    max_entries = msix_table_size(control);
+
+    msix = xzalloc_bytes(MSIX_SIZE(max_entries));
+    if ( !msix )
+        return -ENOMEM;
+
+    msix->max_entries = max_entries;
+    msix->pdev = pdev;
+
+    /* Find the MSI-X table address. */
+    msix->table.offset = pci_conf_read32(seg, bus, slot, func,
+                                         msix_table_offset_reg(msix_offset));
+    msix->table.bir = msix->table.offset & PCI_MSIX_BIRMASK;
+    msix->table.offset &= ~PCI_MSIX_BIRMASK;
+    msix->table.size = msix->max_entries * PCI_MSIX_ENTRY_SIZE;
+    msix->table.addr = INVALID_PADDR;
+
+    /* Find the MSI-X pba address. */
+    msix->pba.offset = pci_conf_read32(seg, bus, slot, func,
+                                       msix_pba_offset_reg(msix_offset));
+    msix->pba.bir = msix->pba.offset & PCI_MSIX_BIRMASK;
+    msix->pba.offset &= ~PCI_MSIX_BIRMASK;
+    msix->pba.size = DIV_ROUND_UP(msix->max_entries, 8);
+    msix->pba.addr = INVALID_PADDR;
+
+    for ( i = 0; i < msix->max_entries; i++)
+    {
+        msix->entries[i].masked = true;
+        msix->entries[i].nr = i;
+        vpci_msix_arch_init(&msix->entries[i].arch);
+    }
+
+    if ( list_empty(&d->arch.hvm_domain.msix_tables) )
+        register_mmio_handler(d, &vpci_msix_table_ops);
+
+    list_add(&msix->next, &d->arch.hvm_domain.msix_tables);
+
+    rc = vpci_add_register(pdev, vpci_msix_control_read,
+                           vpci_msix_control_write,
+                           msix_control_reg(msix_offset), 2, msix);
+    if ( rc )
+    {
+        dprintk(XENLOG_ERR,
+                "%04x:%02x:%02x.%u: failed to add handler for MSI-X control: 
%d\n",
+                seg, bus, slot, func, rc);
+        goto error;
+    }
+
+    table_bar = &pdev->vpci->header.bars[msix->table.bir];
+    pba_bar = &pdev->vpci->header.bars[msix->pba.bir];
+
+    /*
+     * The header handlers will take care of leaving a hole for the MSI-X
+     * related areas, that's why MSI-X needs to be initialized before the
+     * header.
+     */
+    table_bar->msix[VPCI_BAR_MSIX_TABLE] = &msix->table;
+    pba_bar->msix[VPCI_BAR_MSIX_PBA] = &msix->pba;
+    pdev->vpci->msix = msix;
+
+    return 0;
+
+ error:
+    ASSERT(rc);
+    xfree(msix);
+    return rc;
+}
+
+REGISTER_VPCI_INIT(vpci_init_msix, VPCI_PRIORITY_HIGH);
+
+void vpci_dump_msix(void)
+{
+    struct domain *d;
+    struct pci_dev *pdev;
+
+    for_each_domain ( d )
+    {
+        if ( !has_vpci(d) )
+            continue;
+
+        printk("vPCI MSI-X information for guest %u\n", d->domain_id);
+
+        if ( !vpci_trylock(d) )
+        {
+            printk("Unable to get vPCI lock, skipping\n");
+            continue;
+        }
+
+        list_for_each_entry ( pdev, &d->arch.pdev_list, domain_list)
+        {
+            uint8_t seg = pdev->seg, bus = pdev->bus;
+            uint8_t slot = PCI_SLOT(pdev->devfn), func = PCI_FUNC(pdev->devfn);
+            struct vpci_msix *msix = pdev->vpci->msix;
+            unsigned int i;
+
+            if ( !msix )
+                continue;
+
+            printk("Device %04x:%02x:%02x.%u\n", seg, bus, slot, func);
+
+            printk("Max entries: %u maskall: %u enabled: %u\n",
+                   msix->max_entries, msix->masked, msix->enabled);
+
+            printk("Guest table entries:\n");
+            for ( i = 0; i < msix->max_entries; i++ )
+                vpci_msix_arch_print(&msix->entries[i].arch,
+                                     msix->entries[i].data,
+                                     msix->entries[i].addr,
+                                     msix->entries[i].masked, i);
+        }
+        vpci_unlock(d);
+    }
+}
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
+
diff --git a/xen/include/asm-x86/hvm/domain.h b/xen/include/asm-x86/hvm/domain.h
index 7028f93861..980d718327 100644
--- a/xen/include/asm-x86/hvm/domain.h
+++ b/xen/include/asm-x86/hvm/domain.h
@@ -190,6 +190,9 @@ struct hvm_domain {
     /* List of ECAM (MMCFG) regions trapped by Xen. */
     struct list_head mmcfg_regions;
 
+    /* List of MSI-X tables. */
+    struct list_head msix_tables;
+
     /* List of permanently write-mapped pages. */
     struct {
         spinlock_t lock;
diff --git a/xen/include/asm-x86/hvm/io.h b/xen/include/asm-x86/hvm/io.h
index 55ed094734..739eefe541 100644
--- a/xen/include/asm-x86/hvm/io.h
+++ b/xen/include/asm-x86/hvm/io.h
@@ -144,6 +144,24 @@ int vpci_msi_arch_init(struct vpci_arch_msi *arch);
 void vpci_msi_arch_print(struct vpci_arch_msi *arch, uint16_t data,
                          uint64_t addr);
 
+/* Arch-specific MSI-X entry data for vPCI. */
+struct vpci_arch_msix_entry {
+    int pirq;
+};
+
+/* Arch-specific vPCI MSI-X helpers. */
+void vpci_msix_arch_mask(struct vpci_arch_msix_entry *arch,
+                         struct pci_dev *pdev, bool mask);
+int vpci_msix_arch_enable(struct vpci_arch_msix_entry *arch,
+                          struct pci_dev *pdev, uint64_t address,
+                          uint32_t data, unsigned int entry_nr,
+                          paddr_t table_base);
+int vpci_msix_arch_disable(struct vpci_arch_msix_entry *arch,
+                           struct pci_dev *pdev);
+int vpci_msix_arch_init(struct vpci_arch_msix_entry *arch);
+void vpci_msix_arch_print(struct vpci_arch_msix_entry *entry, uint32_t data,
+                          uint64_t addr, bool masked, unsigned int pos);
+
 enum stdvga_cache_state {
     STDVGA_CACHE_UNINITIALIZED,
     STDVGA_CACHE_ENABLED,
diff --git a/xen/include/xen/vpci.h b/xen/include/xen/vpci.h
index ca693f3667..5bc6380531 100644
--- a/xen/include/xen/vpci.h
+++ b/xen/include/xen/vpci.h
@@ -83,6 +83,10 @@ struct vpci {
             } type;
             paddr_t addr;
             uint64_t size;
+#define VPCI_BAR_MSIX_TABLE     0
+#define VPCI_BAR_MSIX_PBA       1
+#define VPCI_BAR_MSIX_NUM       2
+            struct vpci_msix_mem *msix[VPCI_BAR_MSIX_NUM];
             bool prefetchable;
             bool sizing;
             bool enabled;
@@ -112,10 +116,45 @@ struct vpci {
         /* Arch-specific data. */
         struct vpci_arch_msi arch;
     } *msi;
+
+    /* MSI-X data. */
+    struct vpci_msix {
+        struct pci_dev *pdev;
+        /* Maximum number of vectors supported by the device. */
+        unsigned int max_entries;
+        /* MSI-X enabled? */
+        bool enabled;
+        /* Masked? */
+        bool masked;
+        /* List link. */
+        struct list_head next;
+        /* Table information. */
+        struct vpci_msix_mem {
+            /* MSI-X table offset. */
+            unsigned int offset;
+            /* MSI-X table BIR. */
+            unsigned int bir;
+            /* Table addr. */
+            paddr_t addr;
+            /* Table size. */
+            unsigned int size;
+        } table;
+        /* PBA */
+        struct vpci_msix_mem pba;
+        /* Entries. */
+        struct vpci_msix_entry {
+            unsigned int nr;
+            uint64_t addr;
+            uint32_t data;
+            bool masked;
+            struct vpci_arch_msix_entry arch;
+        } entries[];
+    } *msix;
 #endif
 };
 
 void vpci_dump_msi(void);
+void vpci_dump_msix(void);
 
 #endif
 
-- 
2.11.0 (Apple Git-81)


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
https://lists.xen.org/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.