From abf8a206a73bb037788b31b868102023c081d079 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Wed, 19 Feb 2014 17:16:01 -0500 Subject: [PATCH 5/6] xen/pci=assign-buses: Renumber the bus if there is a need to. Xen can re-number the PCI buses if there are SR-IOV devices there and the BIOS hadn't done its job. Signed-off-by: Konrad Rzeszutek Wilk --- xen/arch/x86/setup.c | 2 + xen/drivers/passthrough/pci.c | 689 ++++++++++++++++++++++++++++++++++++++++++ xen/include/xen/pci.h | 1 + 3 files changed, 692 insertions(+) diff --git a/xen/arch/x86/setup.c b/xen/arch/x86/setup.c index b49256d..0c2f9ba 100644 --- a/xen/arch/x86/setup.c +++ b/xen/arch/x86/setup.c @@ -1294,6 +1294,8 @@ void __init __start_xen(unsigned long mbi_p) acpi_mmcfg_init(); + early_pci_reassign_busses(); + early_msi_init(); iommu_setup(); /* setup iommu if available */ diff --git a/xen/drivers/passthrough/pci.c b/xen/drivers/passthrough/pci.c index 0e59216..ba852bd 100644 --- a/xen/drivers/passthrough/pci.c +++ b/xen/drivers/passthrough/pci.c @@ -999,6 +999,695 @@ static int __init _setup_dom0_pci_devices(struct pci_seg *pseg, void *arg) return 0; } +/* Move this to its own file */ +#define DEBUG 1 +static void parse_pci_param(char *s); +custom_param("pci", parse_pci_param); + +struct early_pci_bus; + +struct early_pci_dev { + struct list_head bus_list; /* Linked against 'devices */ + unsigned int is_serial:1; + unsigned int is_ehci:1; + unsigned int is_sriov:1; + unsigned int is_bridge:1; + u16 vendor; + u16 device; + u8 devfn; + u16 total_vfs; + u16 revision; + u16 class; + struct early_pci_bus *bus; /* On what bus we are. */ + struct early_pci_bus *bridge; /* Ourselves if we are a bridge */ +}; +struct early_pci_bus { + struct list_head next; + struct list_head devices; + struct list_head children; + struct early_pci_bus *parent; /* Bus upstream of us. */ + struct early_pci_dev *self; /* The PCI device that controls this bus. */ + u8 primary; /* The (parent) bus number */ + u8 number; + u8 start; + u8 end; + u8 new_end; /* To be updated too */ + u8 new_start; + u8 new_primary; + u8 old_number; +}; + +static unsigned int __initdata assign_busses; +static struct list_head __initdata early_buses_list; +static int __initdata verbose; + +#define PCI_CLASS_SERIAL_USB_EHCI 0x0c0320 +#if 0 +static __init void print_pci_dev(const char *prefix, u8 bus, u8 devfn) +{ + u32 class, id; + + class = pci_conf_read32(0, bus, PCI_SLOT(devfn), PCI_FUNC(devfn), + PCI_CLASS_REVISION); + id = pci_conf_read32(0, bus, PCI_SLOT(devfn), PCI_FUNC(devfn), + PCI_VENDOR_ID); + printk("%04x:%02x.%u [%04x:%04x] class %06x [%s]\n", bus, PCI_SLOT(devfn), + PCI_FUNC(devfn), id & 0xfff, (id >> 16) & 0xffff, class, prefix); +} +#endif +static __init struct early_pci_dev *early_alloc_pci_dev(struct early_pci_bus *bus, + u8 devfn) +{ + struct early_pci_dev *dev; + u8 type; + u16 class_dev, total; + u32 class, id; + unsigned int pos; + + if ( !bus ) + return NULL; + + dev = xzalloc(struct early_pci_dev); + if ( !dev ) + return NULL; + + INIT_LIST_HEAD(&dev->bus_list); + dev->devfn = devfn; + dev->bus = bus; + class = pci_conf_read32(0, bus->number, PCI_SLOT(devfn), PCI_FUNC(devfn), + PCI_CLASS_REVISION); + + dev->revision = class & 0xff; + dev->class = class >> 8; + if ( dev->class == PCI_CLASS_SERIAL_USB_EHCI ) + dev->is_ehci = 1; + + class_dev = pci_conf_read16(0, bus->number, PCI_SLOT(devfn), PCI_FUNC(devfn), + PCI_CLASS_DEVICE); + switch ( class_dev ) + { + case 0x0700: /* single port serial */ + case 0x0702: /* multi port serial */ + case 0x0780: /* other (e.g serial+parallel) */ + dev->is_serial = 1; + default: + break; + } + type = pci_conf_read8(0, bus->number, PCI_SLOT(devfn), PCI_FUNC(devfn), + PCI_HEADER_TYPE); + switch ( type & 0x7f ) + { + case PCI_HEADER_TYPE_BRIDGE: + case PCI_HEADER_TYPE_CARDBUS: + dev->is_bridge = 1; + break; + case PCI_HEADER_TYPE_NORMAL: + pos = pci_find_cap_offset(0, bus->number, PCI_SLOT(devfn), + PCI_FUNC(devfn), PCI_CAP_ID_EXP); + if (!pos) /* Not PCIe */ + break; + pos = pci_find_ext_capability(0, bus->number, devfn, + PCI_EXT_CAP_ID_SRIOV); + if (!pos) /* Not SR-IOV */ + break; + total = pci_conf_read16(0, bus->number, PCI_SLOT(devfn), + PCI_FUNC(devfn), pos + PCI_SRIOV_TOTAL_VF); + if (!total) + break; + dev->is_sriov = 1; + dev->total_vfs = total; + /* Fall through */ + default: + break; + } + id = pci_conf_read32(0, bus->number, PCI_SLOT(devfn), PCI_FUNC(devfn), + PCI_VENDOR_ID); + dev->vendor = id & 0xffff; + dev->device = (id >> 16) & 0xffff; + /* In case MCFG is not configured we have our blacklist */ + switch ( dev->vendor ) + { + case 0x8086: /* Intel */ + switch ( dev->device ) + { + case 0x10c9: /* Intel Corporation 82576 Gigabit Network Connection (rev 01) */ + if ( dev->is_sriov ) + break; + dev->is_sriov = 1; + dev->total_vfs = 8; + } + default: + break; + } + return dev; +} + +static __init struct early_pci_bus *__find_bus(struct early_pci_bus *parent, + u8 nr) +{ + struct early_pci_bus *child, *bus; + + if ( parent->number == nr ) + return parent; + + list_for_each_entry ( child, &parent->children, next ) + { + if ( child->number == nr ) + return child; + bus = __find_bus(child, nr); + if ( bus ) + return bus; + } + return NULL; +} + +static __init struct early_pci_bus *find_bus(u8 nr) +{ + struct early_pci_bus *bus, *child; + + list_for_each_entry ( bus, &early_buses_list, next ) + { + child = __find_bus(bus, nr); + if ( child ) + return child; + } + return NULL; +} + +static __init struct early_pci_dev *find_dev(u8 nr, u8 devfn) +{ + struct early_pci_bus *bus = NULL; + + bus = find_bus(nr); + if ( bus ) { + struct early_pci_dev *dev = NULL; + + list_for_each_entry ( dev, &bus->devices, bus_list ) + if ( dev->devfn == devfn ) + return dev; + } + return NULL; +} + +static __init struct early_pci_bus *early_alloc_pci_bus(struct early_pci_dev *dev, u8 nr) +{ + struct early_pci_bus *bus; + + bus = xzalloc(struct early_pci_bus); + if ( !bus ) + return NULL; + + INIT_LIST_HEAD(&bus->next); + INIT_LIST_HEAD(&bus->devices); + INIT_LIST_HEAD(&bus->children); + bus->number = nr; + bus->old_number = nr; + bus->self = dev; + if ( dev ) + if ( !dev->bridge ) + dev->bridge = bus; + return bus; +} + +static void __init early_free_pci_bus(struct early_pci_bus *bus) +{ + struct early_pci_dev *dev, *d_tmp; + struct early_pci_bus *b, *b_tmp; + + list_for_each_entry_safe ( b, b_tmp, &bus->children, next ) + { + early_free_pci_bus (b); + list_del ( &b->next ); + } + list_for_each_entry_safe ( dev, d_tmp, &bus->devices, bus_list ) + { + list_del ( &dev->bus_list ); + xfree ( dev ); + } +} + +static void __init early_free_all(void) +{ + struct early_pci_bus *bus, *tmp; + + list_for_each_entry_safe( bus, tmp, &early_buses_list, next ) + { + early_free_pci_bus (bus); + list_del( &bus->next ); + xfree(bus); + } +} + +unsigned int __init pci_iov_scan(struct early_pci_bus *bus) +{ + struct early_pci_dev *dev; + unsigned int max = 0; + u8 busnr; + + list_for_each_entry ( dev, &bus->devices, bus_list ) + { + if ( !dev->is_sriov ) + continue; + if ( !dev->total_vfs ) + continue; + busnr = (dev->total_vfs) / 8; /* How many buses we will need */ + if ( busnr > max ) + max = busnr; + } + /* Do we have enough space for them ? */ + if ( (bus->end - bus->start) >= max ) + return 0; + return max; +} + +#ifdef DEBUG +static __init const char *spaces(unsigned int lvl) +{ + if (lvl == 0) + return " "; + if (lvl == 1) + return " +--+"; + if (lvl == 2) + return " +-+"; + if (lvl == 3) + return " +-+"; + return " +...+"; +} + +static void __init print_devs(struct early_pci_bus *parent, int lvl) +{ + struct early_pci_dev *dev; + struct early_pci_bus *bus; + + list_for_each_entry( dev, &parent->devices, bus_list ) + { + printk("%s%04x:%02x:%u [%04x:%04x] class %06x", spaces(lvl), parent->number, + PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn), dev->vendor, + dev->device, dev->class); + if ( dev->is_bridge ) + { + printk(" BRIDGE"); + if ( dev->bridge ) + { + struct early_pci_bus *bridge = dev->bridge; + printk(" to BUS %x [spans %x->%x] up BUS %x", bridge->number, bridge->start, bridge->end, bridge->primary); + printk(" (up: %x spans %x->%x)", bridge->new_primary, bridge->new_start, bridge->new_end); + } + } + if ( dev->is_sriov ) + printk(" sriov: %d", dev->total_vfs); + if ( dev->is_ehci ) + printk (" EHCI DEBUG "); + if ( dev->is_serial ) + printk (" SERIAL "); + printk("\n"); + } + list_for_each_entry( bus, &parent->children, next ) + print_devs(bus, lvl + 1); +} +#endif + +static void __init print_devices(void) +{ +#ifdef DEBUG + struct early_pci_bus *bus; + + if ( !verbose ) + return; + + list_for_each_entry( bus, &early_buses_list, next ) + print_devs(bus, 0); +#endif +} + +unsigned int pci_scan_bus( struct early_pci_bus *bus); +unsigned int __init pci_scan_slot(struct early_pci_bus *bus, unsigned int devfn) +{ + struct early_pci_dev *dev; + + if ( find_dev(bus->number, devfn) ) + return 0; + + if ( !pci_device_detect (0, bus->number, PCI_SLOT(devfn), PCI_FUNC(devfn)) ) + return 0; + + dev = early_alloc_pci_dev(bus, devfn); + if ( !dev ) + return -ENODEV; + + list_add_tail(&dev->bus_list, &bus->devices); + return 0; +} + +static int __init pci_scan_bridge(struct early_pci_bus *bus, + struct early_pci_dev *dev, + unsigned int max) +{ + struct early_pci_bus *child; + u32 buses; + u8 primary, secondary, subordinate; + unsigned int cmax = 0; + + buses = pci_conf_read32(0, bus->number, PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn), + PCI_PRIMARY_BUS); + + primary = buses & 0xFF; + secondary = (buses >> 8) & 0xFF; + subordinate = (buses >> 16) & 0xFF; + + if (!primary && (primary != bus->number) && secondary && subordinate) { + printk("Primary bus is hard wired to 0\n"); + primary = bus->number; + } + + child = find_bus(secondary); + if ( !child ) + { + child = early_alloc_pci_bus(dev, secondary); + if ( !child ) + goto out; + /* Add to the parent's bus list */ + list_add_tail(&child->next, &bus->children); + /* The primary is the upstream bus number. */ + child->primary = primary; + child->start = secondary; + child->end = subordinate; + child->parent = bus; + } + cmax = pci_scan_bus(child); + if ( cmax > max ) + max = cmax; + + if ( child->end > max ) + max = child->end; +out: + return max; +} + +unsigned int __init pci_scan_bus( struct early_pci_bus *bus) +{ + unsigned int max = 0, devfn; + struct early_pci_dev *dev; + + for ( devfn = 0; devfn < 0x100; devfn++ ) + pci_scan_slot (bus, devfn); + + /* Walk all devices and create the bus structs */ + list_for_each_entry ( dev, &bus->devices, bus_list ) + { + if ( !dev->is_bridge ) + continue; + if ( verbose ) + printk("Scanning bridge %04x:%02x.%u [%04x:%04x] class %06x\n", bus->number, + PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn), dev->vendor, dev->device, + dev->class); + max = pci_scan_bridge(bus, dev, max); + } + if ( max > bus->end ) + bus->end = max; + return max; +} + +static __init unsigned int adjust_span(struct early_pci_bus *bus, + unsigned int offset, + unsigned int adjust_start) +{ + struct early_pci_bus *child = NULL, *parent; + unsigned int scan; + + scan = pci_iov_scan(bus); + offset += scan; + + list_for_each_entry( child, &bus->children, next ) + { + unsigned int new_offset; + + new_offset = adjust_span(child , offset, adjust_start); + + if ( new_offset > offset ) { + /* A new contender ! */ + offset = new_offset; + /* If we didn't find any IOV devices then we must adjust the + * start for all our children from this point on? */ + adjust_start = 1; + } + } + bus->new_start = bus->start; + bus->new_end = bus->end + offset; + + /* Do not update our new_start if we were the one that discovered it. */ + if ( scan ) + adjust_start = 0; + + /* We can't check against scan as the loop might have altered it. */ + /* N.B. Ignore host bridges. */ + parent = bus->parent; + if ( adjust_start && parent ) + bus->new_start += offset; + + return offset; +} +static __init void adjust_primary(struct early_pci_bus *bus, + unsigned int offset, + unsigned int adjust_start) +{ + struct early_pci_bus *child; + + list_for_each_entry( child, &bus->children, next ) + { + child->new_primary = bus->new_start; + adjust_primary(child, offset, adjust_start); + + } +} + +static void __init pci_disable_forwarding(struct early_pci_bus *parent) +{ + struct early_pci_dev *dev; + u32 buses; + + list_for_each_entry ( dev, &parent->devices, bus_list ) + { + u8 bus; + u16 bctl; + + if ( !dev->is_bridge ) + continue; + + bus = dev->bus->number; + buses = pci_conf_read32(0, bus, PCI_SLOT(dev->devfn), + PCI_FUNC(dev->devfn), PCI_PRIMARY_BUS); + if ( verbose ) + printk("%04x:%02x.%u PCI_PRIMARY_BUS read %x\n", bus, + PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn), buses); + /* Lifted from Linux but not sure if this MasterAbort masking is + * still needed. */ + + bctl = pci_conf_read32(0, bus, PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn), + PCI_BRIDGE_CONTROL); + + pci_conf_write32(0, bus, PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn), + PCI_BRIDGE_CONTROL, bctl & ~PCI_BRIDGE_CTL_MASTER_ABORT); + + /* Disable forwarding */ + pci_conf_write32(0, bus, PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn), + PCI_PRIMARY_BUS, buses & ~0xffffff); + + pci_conf_write32(0, bus, PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn), + PCI_BRIDGE_CONTROL, bctl); + } +} + +static void __init __pci_program_bridge(struct early_pci_dev *dev, + struct early_pci_bus *parent) +{ + u16 bctl; + u32 buses; + u8 bus; + struct early_pci_bus *child, *bridges; + + u8 primary, secondary, subordinate; + + bus = parent->number; /* Upstream number . */ + child = dev->bridge; /* The bridge we are serving. */ + + ASSERT( child ); + + buses = pci_conf_read32(0, bus, PCI_SLOT(dev->devfn), + PCI_FUNC(dev->devfn), PCI_PRIMARY_BUS); + if ( verbose ) + printk("%04x:%02x.%u PCI_PRIMARY_BUS read %x\n", bus, + PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn), buses); + + /* Lifted from Linux but not sure if this MasterAbort masking is + * still needed. */ + bctl = pci_conf_read32(0, bus, PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn), + PCI_BRIDGE_CONTROL); + pci_conf_write32(0, bus, PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn), + PCI_BRIDGE_CONTROL, bctl & ~PCI_BRIDGE_CTL_MASTER_ABORT); + + pci_conf_write32(0, bus, PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn), + PCI_STATUS, 0xffff); + + buses = (buses & 0xff000000) + | ((unsigned int)(child->new_primary) << 0) + | ((unsigned int)(child->new_start) << 8) + | ((unsigned int)(child->new_end) << 16); + if ( verbose ) + printk("%04x:%02x.%u wrote to PCI_PRIMARY_BUS %x\n", bus, PCI_SLOT(dev->devfn), + PCI_FUNC(dev->devfn), buses); + + pci_conf_write32(0, bus, PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn), + PCI_PRIMARY_BUS, buses); + + /* Double check that it is correct. */ + buses = pci_conf_read32(0, bus, PCI_SLOT(dev->devfn), + PCI_FUNC(dev->devfn), PCI_PRIMARY_BUS); + if ( verbose ) + printk("%04x:%02x.%u PCI_PRIMARY_BUS read %x\n", bus, + PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn), buses); + + primary = buses & 0xFF; + secondary = (buses >> 8) & 0xFF; + subordinate = (buses >> 16) & 0xFF; + + ASSERT(primary == child->new_primary); + ASSERT(secondary == child->new_start); + ASSERT(subordinate == child->new_end); + + pci_conf_write32(0, bus, PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn), + PCI_BRIDGE_CONTROL, bctl); + + child->number = child->new_start; + child->primary = child->new_primary; + child->start = child->new_start; + child->end = child->new_end; + + list_for_each_entry ( bridges, &child->children, next ) + if (bridges->self) + __pci_program_bridge(bridges->self, bridges); +} + +static void __init pci_program_bridge(struct early_pci_bus *bus) +{ + struct early_pci_dev *dev; + + list_for_each_entry ( dev, &bus->devices, bus_list ) + { + if ( !dev->is_bridge ) + continue; + __pci_program_bridge(dev, bus); + } +} +static void __init update_console_devices(struct early_pci_bus *parent) +{ + struct early_pci_dev *dev; + struct early_pci_bus *bus; + + list_for_each_entry( dev, &parent->devices, bus_list ) + { + if ( dev->is_ehci || dev->is_serial || dev->is_bridge ) + { + ;/* TODO */ + } + } + list_for_each_entry( bus, &parent->children, next ) + update_console_devices(bus); +} +static void __init parse_pci_param(char *s) +{ + char *ss; + + do { + ss = strchr(s, ','); + if ( ss ) + *ss = '\0'; + + if ( !strcmp(s, "assign-buses") ) + assign_busses = 1; + else if ( !strcmp(s, "verbose") ) + verbose = 1; + s = ss + 1; + } while ( ss ); +} + +void __init early_pci_reassign_busses(void) +{ + unsigned int nr; + struct early_pci_bus *bus; + unsigned int max = 0, adjust = 0, last_end; + + if ( !assign_busses ) + return; + + INIT_LIST_HEAD(&early_buses_list); + for ( nr = 0; nr < 256; nr++ ) + { + if ( !pci_device_detect (0, nr, 0, 0) ) + continue; + if ( find_bus(nr) ) + continue; + /* Host bridges do not have any parent devices ! */ + bus = early_alloc_pci_bus(NULL, nr); + if ( !bus ) + goto out; + bus->start = nr; + bus->primary = 0; /* Points to host, which is zero */ + max = pci_scan_bus(bus); + list_add_tail(&bus->next, &early_buses_list); + } + /* Walk all the devices, figure out what will be the _new_ + * max if any. */ + last_end = 0; + list_for_each_entry( bus, &early_buses_list, next ) + { + unsigned int offset; + /* Oh now, the previous end bus number overlaps! */ + if ( last_end > bus->start ) + { + bus->new_start = last_end; + bus->new_end = bus->new_end + last_end; + } + last_end = bus->end; + offset = adjust_span(bus, 0 /* no offset ! */, adjust); + if (offset > adjust) { + adjust = offset; + last_end = bus->new_end; + } + adjust_primary(bus, 0, 0); + } + + print_devices(); + if ( !adjust ) + { + printk("No need to reassign busses.\n"); + goto out; + } + printk("Re-assigning busses to make space for %d bus numbers.\n", adjust); + + /* Walk all the devices, disable serial and ehci */ + if ( !verbose) + serial_suspend(); + + /* Walk all the bridges, disable forwarding */ + list_for_each_entry( bus, &early_buses_list, next ) + pci_disable_forwarding(bus); + + /* Walk all bridges, reprogram with max (so new primary, secondary and such. */ + list_for_each_entry( bus, &early_buses_list, next ) + pci_program_bridge(bus); + + /* Walk all devices, re-enable serial, ehci with new bus number */ + list_for_each_entry( bus, &early_buses_list, next ) + update_console_devices(bus); + + if ( !verbose ) + serial_resume(); + print_devices(); +out: + early_free_all(); +} + void __init setup_dom0_pci_devices( struct domain *d, int (*handler)(u8 devfn, struct pci_dev *)) { diff --git a/xen/include/xen/pci.h b/xen/include/xen/pci.h index b883c28..1750196 100644 --- a/xen/include/xen/pci.h +++ b/xen/include/xen/pci.h @@ -102,6 +102,7 @@ struct pci_dev *pci_lock_domain_pdev( void setup_dom0_pci_devices(struct domain *, int (*)(u8 devfn, struct pci_dev *)); +void early_pci_reassign_busses(void); void pci_release_devices(struct domain *d); int pci_add_segment(u16 seg); const unsigned long *pci_get_ro_map(u16 seg); -- 1.8.3.1