From bee45c2613b1f827e2610d7f8d06989f3cd76907 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Wed, 5 Feb 2014 14:26:56 -0500 Subject: [PATCH] DRAFT xen/pci: Re-add all PCI devices if pci=assign-busses is used. That parameter wreaks havoc with Xen hypervisor. Its internal structures end up being confused such that 'upstream bridge' information is lost. As such, this patch re-programs the Xen hypervisor's PCI devices. It does it in three steps: 1). Before 'acpi_init' (which parses the ACPI DSDT for PCI devices) in register_xen_pci_notifier we collect all of the PCI devices BDFs that are active. 2). When 'acpi_init' has finished and has reprogrammed the bus numbers, we intersect the list of all of the PCI devices that Linux knows with the list we created in step 1). The result is an array of BDFs which are orphaned - meaning they are not present on the machine any more - but Xen hypervisor is still holding on to them - because Linux has not made the 'xen_remove_device' call on them. The reason for that is explained later in this description[*1]. With the list of orphaned PCI devices and the ones we have added - we make the hypercall to remove all the orphaned ones and all the ones that were added. At this stage Xen has no knowledge of any PCI devices. 3). We all of the PCI devices that Linux knows about. This way the view from Linux and Xen is synced when it comes to the PCI devices. [*1]. Linux seperates the PCI devices from PCI bridges in two structures. That means that PCI devices know their slot and function number. While the bus structure keeps track of the bus number. This seperation allows Linux to expand the bridge to span more bus numbers and the changes are only updated in the PCI bus structures. The PCI devices are oblivious to this. Also the notifier call chain is only executed when a PCI device is added - and since this is during early bootup - the notifier is not used to 'delete' the devices that might have existed with the old bus numbers - because Linux hasn't gotten to enumerate them. With this patch, pci=assign-busses works with Xen hypervisors. Signed-off-by: Konrad Rzeszutek Wilk --- drivers/xen/pci.c | 118 ++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 files changed, 117 insertions(+), 1 deletions(-) diff --git a/drivers/xen/pci.c b/drivers/xen/pci.c index dd9c249..178de97 100644 --- a/drivers/xen/pci.c +++ b/drivers/xen/pci.c @@ -186,12 +186,104 @@ static struct notifier_block device_nb = { .notifier_call = xen_pci_notifier, }; +#include +static void __init walk_bus(struct pci_bus *bus, int (*fnc)(struct device *dev)) +{ + struct pci_dev *dev; + struct pci_bus *child; + + list_for_each_entry(dev, &bus->devices, bus_list) { + if (dev->subordinate) + continue; /* Scan bridges in the next loop */ + (void)fnc(&dev->dev); + } + list_for_each_entry(child, &bus->children, node) { + dev = child->self; + if (dev) + (void)fnc(&dev->dev); + walk_bus(child, fnc); + } +} +static void __init walk_tree(int (*fnc)(struct device *dev)) +{ + struct pci_bus *bus; + + down_read(&pci_bus_sem); + list_for_each_entry(bus, &pci_root_buses, node) + walk_bus(bus, fnc); + up_read(&pci_bus_sem); +} + +#include + +#define PCI_BUS(bdf) (((bdf) >> 8) & 0xff) +#define PCI_BDF(b,d,f) ((((b) & 0xff) << 8) | PCI_DEVFN(d,f)) +#define PCI_DEVFN2(bdf) ((bdf) & 0xff) +#define PCI_BDF2(b,df) ((((b) & 0xff) << 8) | ((df) & 0xff)) +static unsigned long __initdata *pci_devs; + +static void __init check_device(int bus, int slot, int func) +{ + u16 class; + + class = read_pci_config(bus, slot, func, PCI_CLASS_REVISION); + if (class == 0xffff) + return; + + set_bit(PCI_BDF(bus, slot, func), pci_devs); +} +static int __init xen_prune_pci_devs(struct device *dev) +{ + struct pci_dev *pci_dev = to_pci_dev(dev); + u16 busdevfn; + + busdevfn = PCI_BDF2(pci_dev->bus->number, pci_dev->devfn); + if (test_bit(busdevfn, pci_devs)) /* If present it is not orphaned */ + clear_bit(busdevfn, pci_devs); + return 0; +} +static void __init xen_delete_orphaned_pci_devs(void) +{ + struct physdev_manage_pci manage_pci; + unsigned int i; + + for_each_set_bit(i, pci_devs, PCI_BDF(-1, -1, -1) + 1) { + manage_pci.bus = PCI_BUS(i); + manage_pci.devfn = PCI_DEVFN2(i); + (void)HYPERVISOR_physdev_op(PHYSDEVOP_manage_pci_remove, + &manage_pci); + } +} + static int __init register_xen_pci_notifier(void) { + int bus, slot, func, rc = 0; + if (!xen_initial_domain()) return 0; - return bus_register_notifier(&pci_bus_type, &device_nb); + rc = bus_register_notifier(&pci_bus_type, &device_nb); + + if (!pcibios_assign_all_busses()) + return rc; + + if (!early_pci_allowed()) + return rc; + + /* 64K bits needed - we will revisit it in xen_pci_refresh */ + pci_devs = kcalloc(BITS_TO_LONGS(PCI_BDF(-1, -1, -1) + 1), sizeof(unsigned long), GFP_KERNEL); + if (!pci_devs) + return rc; + + /* Poor man's PCI discovery */ + for (bus = 0; bus < 256; bus++) { + for (slot = 0; slot < 32; slot++) { + for (func = 0; func < 8; func++) { + check_device(bus, slot, func); + } + } + } + return rc; } arch_initcall(register_xen_pci_notifier); @@ -241,3 +333,27 @@ static int __init xen_mcfg_late(void) */ subsys_initcall_sync(xen_mcfg_late); #endif + +static int __init xen_pci_refresh(void) +{ + if (!xen_initial_domain()) + return 0; + + if (!pcibios_assign_all_busses()) + return 0; + + /* Update the list - so that we only have orphaned devices. */ + walk_tree(&xen_prune_pci_devs); + + /* Remove orphaned devices. */ + xen_delete_orphaned_pci_devs(); + /* Remove all existing ones */ + walk_tree(&xen_remove_device); + + /* Now the hypervisor has no PCI devices, so lets add them in */ + walk_tree(&xen_add_device); + + kfree(pci_devs); + return 0; +} +subsys_initcall_sync(xen_pci_refresh); -- 1.7.7.6