[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH v5 7/7] x86/PCI: intercept all PV Dom0 MMCFG writes



... to hook up pci_conf_write_intercept() even for Dom0 not using
method 1 accesses for the base part of PCI device config space.

Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
---
Not entirely sure whether the complicated logging logic in x86/mm.c is
actually worth it.

--- a/xen/arch/x86/mm.c
+++ b/xen/arch/x86/mm.c
@@ -734,6 +734,46 @@ static int update_xen_mappings(unsigned 
     return err;
 }
 
+#ifndef NDEBUG
+struct mmio_emul_range_ctxt {
+    const struct domain *d;
+    unsigned long mfn;
+};
+
+static int print_mmio_emul_range(unsigned long s, unsigned long e, void *arg)
+{
+    const struct mmio_emul_range_ctxt *ctxt = arg;
+
+    if ( ctxt->mfn > e )
+        return 0;
+
+    if ( ctxt->mfn >= s )
+    {
+        static DEFINE_SPINLOCK(last_lock);
+        static const struct domain *last_d;
+        static unsigned long last_s = ~0UL, last_e;
+        bool_t print = 0;
+
+        spin_lock(&last_lock);
+        if ( last_d != ctxt->d || last_s != s || last_e != e )
+        {
+            last_d = ctxt->d;
+            last_s = s;
+            last_e = e;
+            print = 1;
+        }
+        spin_unlock(&last_lock);
+
+        if ( print )
+            printk(XENLOG_G_INFO
+                   "d%d: Forcing write emulation on MFNs %lx-%lx\n",
+                   ctxt->d->domain_id, s, e);
+    }
+
+    return 1;
+}
+#endif
+
 int
 get_page_from_l1e(
     l1_pgentry_t l1e, struct domain *l1e_owner, struct domain *pg_owner)
@@ -757,6 +797,11 @@ get_page_from_l1e(
     if ( !mfn_valid(mfn) ||
          (real_pg_owner = page_get_owner_and_reference(page)) == dom_io )
     {
+#ifndef NDEBUG
+        const unsigned long *ro_map;
+        unsigned int seg, bdf;
+#endif
+
         /* Only needed the reference to confirm dom_io ownership. */
         if ( mfn_valid(mfn) )
             put_page(page);
@@ -792,9 +837,20 @@ get_page_from_l1e(
         if ( !(l1f & _PAGE_RW) ||
              !rangeset_contains_singleton(mmio_ro_ranges, mfn) )
             return 0;
-        dprintk(XENLOG_G_WARNING,
-                "d%d: Forcing read-only access to MFN %lx\n",
-                l1e_owner->domain_id, mfn);
+#ifndef NDEBUG
+        if ( !pci_mmcfg_decode(mfn, &seg, &bdf) ||
+             ((ro_map = pci_get_ro_map(seg)) != NULL &&
+              test_bit(bdf, ro_map)) )
+            printk(XENLOG_G_WARNING
+                   "d%d: Forcing read-only access to MFN %lx\n",
+                   l1e_owner->domain_id, mfn);
+        else
+            rangeset_report_ranges(mmio_ro_ranges, 0, ~0UL,
+                                   print_mmio_emul_range,
+                                   &(struct mmio_emul_range_ctxt){
+                                      .d = l1e_owner,
+                                      .mfn = mfn });
+#endif
         return 1;
     }
 
@@ -5145,6 +5201,7 @@ int ptwr_do_page_fault(struct vcpu *v, u
 
     /* We are looking only for read-only mappings of p.t. pages. */
     if ( ((l1e_get_flags(pte) & (_PAGE_PRESENT|_PAGE_RW)) != _PAGE_PRESENT) ||
+         rangeset_contains_singleton(mmio_ro_ranges, l1e_get_pfn(pte)) ||
          !get_page_from_pagenr(l1e_get_pfn(pte), d) )
         goto bail;
 
@@ -5192,6 +5249,7 @@ int ptwr_do_page_fault(struct vcpu *v, u
 struct mmio_ro_emulate_ctxt {
     struct x86_emulate_ctxt ctxt;
     unsigned long cr2;
+    unsigned int seg, bdf;
 };
 
 static int mmio_ro_emulated_read(
@@ -5231,6 +5289,44 @@ static const struct x86_emulate_ops mmio
     .write      = mmio_ro_emulated_write,
 };
 
+static int mmio_intercept_write(
+    enum x86_segment seg,
+    unsigned long offset,
+    void *p_data,
+    unsigned int bytes,
+    struct x86_emulate_ctxt *ctxt)
+{
+    struct mmio_ro_emulate_ctxt *mmio_ctxt =
+        container_of(ctxt, struct mmio_ro_emulate_ctxt, ctxt);
+
+    /*
+     * Only allow naturally-aligned stores no wider than 4 bytes to the
+     * original %cr2 address.
+     */
+    if ( ((bytes | offset) & (bytes - 1)) || bytes > 4 ||
+         offset != mmio_ctxt->cr2 )
+    {
+        MEM_LOG("mmio_intercept: bad write (cr2=%lx, addr=%lx, bytes=%u)",
+                mmio_ctxt->cr2, offset, bytes);
+        return X86EMUL_UNHANDLEABLE;
+    }
+
+    offset &= 0xfff;
+    pci_conf_write_intercept(mmio_ctxt->seg, mmio_ctxt->bdf, offset, bytes,
+                             p_data);
+    pci_mmcfg_write(mmio_ctxt->seg, PCI_BUS(mmio_ctxt->bdf),
+                    PCI_DEVFN2(mmio_ctxt->bdf), offset, bytes,
+                    *(uint32_t *)p_data);
+
+    return X86EMUL_OKAY;
+}
+
+static const struct x86_emulate_ops mmio_intercept_ops = {
+    .read       = mmio_ro_emulated_read,
+    .insn_fetch = ptwr_emulated_read,
+    .write      = mmio_intercept_write,
+};
+
 /* Check if guest is trying to modify a r/o MMIO page. */
 int mmio_ro_do_page_fault(struct vcpu *v, unsigned long addr,
                           struct cpu_user_regs *regs)
@@ -5245,6 +5341,7 @@ int mmio_ro_do_page_fault(struct vcpu *v
         .ctxt.swint_emulate = x86_swint_emulate_none,
         .cr2 = addr
     };
+    const unsigned long *ro_map;
     int rc;
 
     /* Attempt to read the PTE that maps the VA being accessed. */
@@ -5269,7 +5366,12 @@ int mmio_ro_do_page_fault(struct vcpu *v
     if ( !rangeset_contains_singleton(mmio_ro_ranges, mfn) )
         return 0;
 
-    rc = x86_emulate(&mmio_ro_ctxt.ctxt, &mmio_ro_emulate_ops);
+    if ( pci_mmcfg_decode(mfn, &mmio_ro_ctxt.seg, &mmio_ro_ctxt.bdf) &&
+         ((ro_map = pci_get_ro_map(mmio_ro_ctxt.seg)) == NULL ||
+          !test_bit(mmio_ro_ctxt.bdf, ro_map)) )
+        rc = x86_emulate(&mmio_ro_ctxt.ctxt, &mmio_intercept_ops);
+    else
+        rc = x86_emulate(&mmio_ro_ctxt.ctxt, &mmio_ro_emulate_ops);
 
     return rc != X86EMUL_UNHANDLEABLE ? EXCRET_fault_fixed : 0;
 }
--- a/xen/arch/x86/x86_64/mmconfig_64.c
+++ b/xen/arch/x86/x86_64/mmconfig_64.c
@@ -134,30 +134,10 @@ static void __iomem *mcfg_ioremap(const 
     return (void __iomem *) virt;
 }
 
-void arch_pci_ro_device(int seg, int bdf)
-{
-    unsigned int idx, bus = PCI_BUS(bdf);
-
-    for (idx = 0; idx < pci_mmcfg_config_num; ++idx) {
-        const struct acpi_mcfg_allocation *cfg = pci_mmcfg_virt[idx].cfg;
-        unsigned long mfn = (cfg->address >> PAGE_SHIFT) + bdf;
-
-        if (!pci_mmcfg_virt[idx].virt || cfg->pci_segment != seg ||
-            cfg->start_bus_number > bus || cfg->end_bus_number < bus)
-            continue;
-
-        if (rangeset_add_singleton(mmio_ro_ranges, mfn))
-            printk(XENLOG_ERR
-                   "%04x:%02x:%02x.%u: could not mark MCFG (mfn %#lx) 
read-only\n",
-                   cfg->pci_segment, bus, PCI_SLOT(bdf), PCI_FUNC(bdf),
-                   mfn);
-    }
-}
-
 int pci_mmcfg_arch_enable(unsigned int idx)
 {
     const typeof(pci_mmcfg_config[0]) *cfg = pci_mmcfg_virt[idx].cfg;
-    const unsigned long *ro_map = pci_get_ro_map(cfg->pci_segment);
+    unsigned long start_mfn, end_mfn;
 
     if (pci_mmcfg_virt[idx].virt)
         return 0;
@@ -169,16 +149,15 @@ int pci_mmcfg_arch_enable(unsigned int i
     }
     printk(KERN_INFO "PCI: Using MCFG for segment %04x bus %02x-%02x\n",
            cfg->pci_segment, cfg->start_bus_number, cfg->end_bus_number);
-    if (ro_map) {
-        unsigned int bdf = PCI_BDF(cfg->start_bus_number, 0, 0);
-        unsigned int end = PCI_BDF(cfg->end_bus_number, -1, -1);
-
-        while ((bdf = find_next_bit(ro_map, end + 1, bdf)) <= end) {
-            arch_pci_ro_device(cfg->pci_segment, bdf);
-            if (bdf++ == end)
-                break;
-        }
-    }
+
+    start_mfn = PFN_DOWN(cfg->address) + PCI_BDF(cfg->start_bus_number, 0, 0);
+    end_mfn = PFN_DOWN(cfg->address) + PCI_BDF(cfg->end_bus_number, ~0, ~0);
+    if ( rangeset_add_range(mmio_ro_ranges, start_mfn, end_mfn) )
+        printk(XENLOG_ERR
+               "%04x:%02x-%02x: could not mark MCFG (mfns %lx-%lx) 
read-only\n",
+               cfg->pci_segment, cfg->start_bus_number, cfg->end_bus_number,
+               start_mfn, end_mfn);
+
     return 0;
 }
 
@@ -197,6 +176,28 @@ void pci_mmcfg_arch_disable(unsigned int
            cfg->pci_segment, cfg->start_bus_number, cfg->end_bus_number);
 }
 
+bool_t pci_mmcfg_decode(unsigned long mfn, unsigned int *seg,
+                        unsigned int *bdf)
+{
+    unsigned int idx;
+
+    for (idx = 0; idx < pci_mmcfg_config_num; ++idx) {
+        const struct acpi_mcfg_allocation *cfg = pci_mmcfg_virt[idx].cfg;
+
+        if (pci_mmcfg_virt[idx].virt &&
+            mfn >= PFN_DOWN(cfg->address) + PCI_BDF(cfg->start_bus_number,
+                                                    0, 0) &&
+            mfn <= PFN_DOWN(cfg->address) + PCI_BDF(cfg->end_bus_number,
+                                                    ~0, ~0)) {
+            *seg = cfg->pci_segment;
+            *bdf = mfn - PFN_DOWN(cfg->address);
+            return 1;
+        }
+    }
+
+    return 0;
+}
+
 int __init pci_mmcfg_arch_init(void)
 {
     int i;
--- a/xen/drivers/passthrough/pci.c
+++ b/xen/drivers/passthrough/pci.c
@@ -447,7 +447,6 @@ int __init pci_ro_device(int seg, int bu
     }
 
     __set_bit(PCI_BDF2(bus, devfn), pseg->ro_map);
-    arch_pci_ro_device(seg, PCI_BDF2(bus, devfn));
     _pci_hide_device(pdev);
 
     return 0;
--- a/xen/include/asm-x86/pci.h
+++ b/xen/include/asm-x86/pci.h
@@ -20,5 +20,7 @@ int pci_conf_write_intercept(unsigned in
                              uint32_t *data);
 int pci_msi_conf_write_intercept(struct pci_dev *, unsigned int reg,
                                  unsigned int size, uint32_t *data);
+bool_t pci_mmcfg_decode(unsigned long mfn, unsigned int *seg,
+                        unsigned int *bdf);
 
 #endif /* __X86_PCI_H__ */
--- a/xen/include/xen/pci.h
+++ b/xen/include/xen/pci.h
@@ -110,7 +110,6 @@ int pci_add_device(u16 seg, u8 bus, u8 d
                    const struct pci_dev_info *, nodeid_t node);
 int pci_remove_device(u16 seg, u8 bus, u8 devfn);
 int pci_ro_device(int seg, int bus, int devfn);
-void arch_pci_ro_device(int seg, int bdf);
 int pci_hide_device(int bus, int devfn);
 struct pci_dev *pci_get_pdev(int seg, int bus, int devfn);
 struct pci_dev *pci_get_real_pdev(int seg, int bus, int devfn);


Attachment: x86-PCI-MMCFG-intercept.patch
Description: Text document

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.