[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [PATCH] dom0 linux: Reassign memory resources to device for pci passthrough.



Yuji & Keir,

I guess the comments I gave yesterday is obscure, so I wrote up more details this time. Please take a look.

Thanks,
Yu

> To reassign resources, please add boot parameters of dom0 linux as follows.
>
>     reassign_resources reassigndev=00:1d.7,01:00.0
>
>         reassign_resources
>                         Enables reassigning resources.
>
>         reassigndev=    Specifies devices include I/O device and PCI-PCI
>                         bridge to reassign resources.  PCI-PCI bridge
>                         can be specified, if resource windows need to
>                         be expanded.

Generally, it doesn't work as claimed from my observation.

 /* This quirk function enables us to force all memory resources which are
  * assigned to PCI devices, to be page-aligned.
  */
@@ -41,6 +54,42 @@
        int i;
        struct resource *r;
        resource_size_t old_start;
+
+       if (reassign_resources) {
+               if (dev->hdr_type == PCI_HEADER_TYPE_NORMAL &&
+                   (dev->class >> 8) == PCI_CLASS_BRIDGE_HOST) {
+                       /* PCI Host Bridge isn't a target device */

The comments is wrong -- we get invalid device type here, not the host bridge. Host bridge doesn't have a 'dev' at all.

+                       return;
+               }
+               if (is_reassigndev(dev)) {
+                       printk(KERN_INFO
+                               "PCI: Disable device and release resources"
+                               " [%s].\n", pci_name(dev));
+                       pci_disable_device(dev);
+
+                       for (i=0; i < PCI_NUM_RESOURCES; i++) {
+                               r = &dev->resource[i];
+                               if ((r == NULL) ||

How can 'r' be NULL?

+                                  !(r->flags & IORESOURCE_MEM))
+                                       continue;
+
+                               r->end = r->end - r->start;
+                               r->start = 0;
+
+                               if (i < PCI_BRIDGE_RESOURCES) {
+                                       pci_update_resource(dev, r, i);
+                               } else if (i == 8 || i == 9) {

The bridge even hasn't been scanned yet so this will *never* execute as expected.

+                                       /* need to update(clear) the Base/Limit
+                                        * register also, because PCI bridge is
+                                        * disabled and the resource is
+                                        * released.
+                                        */
+                                       pci_update_bridge(dev, i);
+                               }
+                       }
+               }
+               return;
+       }

        if (!pci_mem_align)
                return;

...

diff -r b54652ee29ef drivers/pci/setup-bus.c
--- a/drivers/pci/setup-bus.c   Thu Oct 02 11:29:02 2008 +0100
+++ b/drivers/pci/setup-bus.c   Wed Oct 08 12:12:27 2008 +0900
@@ -26,6 +26,7 @@
 #include <linux/cache.h>
 #include <linux/slab.h>

+#include "pci.h"

 #define DEBUG_CONFIG 1
 #if DEBUG_CONFIG
@@ -344,7 +345,8 @@

        list_for_each_entry(dev, &bus->devices, bus_list) {
                int i;

It most likely couldn't get here because the x86 PCI low level code has allocated resources for the bridge according to the BIOS and the function returns early.

-
+               int reassign = reassign_resources ? is_reassigndev(dev) : 0;
+
                for (i = 0; i < PCI_NUM_RESOURCES; i++) {
                        struct resource *r = &dev->resource[i];
                        unsigned long r_size;
@@ -352,6 +354,11 @@
                        if (r->parent || (r->flags & mask) != type)
                                continue;
                        r_size = r->end - r->start + 1;
+
+                       if (reassign) {
+                               r_size = ROUND_UP_TO_PAGESIZE(r_size);
+                       }
+
                        /* For bridges size != alignment */
                        align = (i < PCI_BRIDGE_RESOURCES) ? r_size : r->start;
                        order = __ffs(align) - 20;
diff -r b54652ee29ef drivers/pci/setup-res.c
--- a/drivers/pci/setup-res.c   Thu Oct 02 11:29:02 2008 +0100
+++ b/drivers/pci/setup-res.c   Wed Oct 08 12:12:27 2008 +0900
@@ -117,19 +117,96 @@
 }
 EXPORT_SYMBOL_GPL(pci_claim_resource);

+void
+pci_update_bridge(struct pci_dev *dev, int resno)
+{
+       struct resource *res = &dev->resource[resno];
+       struct pci_bus_region region;
+       u32 l, dw, base_up32, limit_up32;
+
+       if (dev->hdr_type != PCI_HEADER_TYPE_BRIDGE ||
+           (dev->class >> 8) != PCI_CLASS_BRIDGE_PCI) {
+               return;
+       }
+
+       if (!res->flags)
+               return;
+
+       switch (resno) {
+       case 8 :        /* MMIO Base/Limit */
+               pcibios_resource_to_bus(dev, &region, res);
+               if (res->flags & IORESOURCE_MEM &&
+                   !(res->flags & IORESOURCE_PREFETCH)) {
+                       l = (region.start >> 16) & 0xfff0;
+                       l |= region.end & 0xfff00000;
+               } else {
+                       l = 0x0000fff0;
+               }
+               pci_write_config_dword(dev, PCI_MEMORY_BASE, l);
+
+               break;
+
+       case 9 :        /* Prefetchable MMIO Base/Limit */
+               /* Clear out the upper 32 bits of PREF limit.
+                * If PCI_PREF_BASE_UPPER32 was non-zero, this temporarily
+                * disables PREF range, which is ok.
+                */
+               pci_write_config_dword(dev, PCI_PREF_LIMIT_UPPER32, 0);
+
+               /* Get PREF 32/64 bits Addressing mode */
+               pci_read_config_dword(dev, PCI_PREF_MEMORY_BASE, &dw);
+
+               pcibios_resource_to_bus(dev, &region, res);
+               if (res->flags & IORESOURCE_MEM &&
+                   res->flags & IORESOURCE_PREFETCH) {
+                       l = (region.start >> 16) & 0xfff0;
+                       l |= region.end & 0xfff00000;
+
+                       if (dw & PCI_PREF_RANGE_TYPE_64) {
+                               base_up32 = (region.start >> 32) & 0xffffffff;
+                               limit_up32 = (region.end >> 32) & 0xffffffff;
+                       } else {
+                               base_up32 = 0;
+                               limit_up32 = 0;
+                       }
+               } else {
+                       l = 0x0000fff0;
+                       base_up32 = 0xffffffff;
+                       limit_up32 = 0;
+               }
+               pci_write_config_dword(dev, PCI_PREF_MEMORY_BASE, l);
+               /* Set up the upper 32 bits of PREF base/limit. */
+               pci_write_config_dword(dev, PCI_PREF_BASE_UPPER32, base_up32);
+               pci_write_config_dword(dev, PCI_PREF_LIMIT_UPPER32, limit_up32);
+               break;
+       default :
+               BUG();
+               break;
+       }
+}
+

I don't see how this function 'expend' the resource window (even it was invoked). Actually it has many problems: 1, the dev->resource is not updated hence software doesn't know the new resource window size that can be granted to subordinate devices. 2, the values might be overwritten later by pci_setup_bridge. 3, the registers are changed without checking if resource range are available, which means it may conflict with other bridge.

 int pci_assign_resource(struct pci_dev *dev, int resno)
 {
        struct pci_bus *bus = dev->bus;
        struct resource *res = dev->resource + resno;
        resource_size_t size, min, align;
        int ret;
+       int reassigndev = reassign_resources ? is_reassigndev(dev) : 0;

        size = res->end - res->start + 1;
        min = (res->flags & IORESOURCE_IO) ? PCIBIOS_MIN_IO : PCIBIOS_MIN_MEM;
        /* The bridge resources are special, as their
           size != alignment. Sizing routines return
           required alignment in the "start" field. */
-       align = (resno < PCI_BRIDGE_RESOURCES) ? size : res->start;
+       if (resno < PCI_BRIDGE_RESOURCES) {
+               align = size;
+               if ((reassigndev) &&
+                   (res->flags & IORESOURCE_MEM)) {
+                       align = ROUND_UP_TO_PAGESIZE(align);

Different MMIO resrouces of a device may require different alignments. Using page size for all may cause problem. The alignment should be a bigger value between page size and resource size. Or the best way is keep aligned resources of a device and just reassign the unaligned resources in the quirk_align_mem_resources.

+               }
+       } else {
+               align = res->start;
+       }

        /* First, try exact prefetching match.. */
        ret = pci_bus_alloc_resource(bus, res, size, align, min,
@@ -154,6 +231,9 @@
                        resno, (unsigned long long)size,
                        (unsigned long long)res->start, pci_name(dev));
        } else if (resno < PCI_BRIDGE_RESOURCES) {
+               printk(KERN_DEBUG "PCI: Assign resource(%d) on %s "
+                       "%016llx - %016llx\n", resno, pci_name(dev),
+                       (u64)res->start, (u64)res->end);
                pci_update_resource(dev, res, resno);
        }




_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.