[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [Xen-devel] Xen 3.4 code freeze
Hi Keir,There are some native kernel SR-IOV patches that would be accepted by the maintainer soon, and I planed to backport them to Xen/Dom0 then. However, it looks they can't be in the native kernel tree before the code freeze. I attached the backported patches, if it's possible, can you please take them for 3.4? Thanks, Yu Keir Fraser wrote: Folks, My plan is to take no further large feature patchsets into xen-unstable until 3.4 is branched, the only exception possibly being Danâs tmem patches. Furthermore, after Friday Iâm away for a week and when I get back I intend to shake the trees into better shape and accept bug-fix patches only. -- Keir # HG changeset patch # User Yu Zhao <yu.zhao@xxxxxxxxx> # Date 1237366099 14400 # Node ID cdf29bb6c74d5e075099855e9ddffb27e633079a # Parent 9fc957e63f8dc0fdb2400eb424da4c1122b7ac65 Xen: use proper device ID to search VT-d unit for ARI and SR-IOV device PCIe Alternative Routing-ID Interpretation (ARI) ECN defines the Extended Function -- a function whose function number is greater than 7 within an ARI Device. Intel VT-d spec 1.2 section 8.3.2 specifies that the Extended Function is under the scope of the same remapping unit as the traditional function. The hypervisor needs to know if a function is Extended Function so it can find proper DMAR for it. And section 8.3.3 specifies that the SR-IOV Virtual Function is under the scope of the same remapping unit as the Physical Function. The hypervisor also needs to know if a function is the Virtual Function and which Physical Function it's associated with for same reason. diff -r 9fc957e63f8d -r cdf29bb6c74d xen/arch/ia64/xen/hypercall.c --- a/xen/arch/ia64/xen/hypercall.c Tue Mar 17 15:40:25 2009 +0000 +++ b/xen/arch/ia64/xen/hypercall.c Wed Mar 18 04:48:19 2009 -0400 @@ -650,6 +650,7 @@ case PHYSDEVOP_manage_pci_add: { struct physdev_manage_pci manage_pci; + struct pci_dev dev; ret = -EPERM; if ( !IS_PRIV(current->domain) ) break; @@ -657,7 +658,13 @@ if ( copy_from_guest(&manage_pci, arg, 1) != 0 ) break; - ret = pci_add_device(manage_pci.bus, manage_pci.devfn); + dev.bus = manage_pci.bus; + dev.devfn = manage_pci.devfn; + dev.is_extfn = manage_pci.is_extfn; + dev.is_virtfn = manage_pci.is_virtfn; + dev.physfn.bus = manage_pci.physfn.bus; + dev.physfn.devfn = manage_pci.physfn.devfn; + ret = pci_add_device(&dev); break; } diff -r 9fc957e63f8d -r cdf29bb6c74d xen/arch/x86/physdev.c --- a/xen/arch/x86/physdev.c Tue Mar 17 15:40:25 2009 +0000 +++ b/xen/arch/x86/physdev.c Wed Mar 18 04:48:19 2009 -0400 @@ -397,6 +397,7 @@ case PHYSDEVOP_manage_pci_add: { struct physdev_manage_pci manage_pci; + struct pci_dev dev; ret = -EPERM; if ( !IS_PRIV(v->domain) ) break; @@ -404,7 +405,13 @@ if ( copy_from_guest(&manage_pci, arg, 1) != 0 ) break; - ret = pci_add_device(manage_pci.bus, manage_pci.devfn); + dev.bus = manage_pci.bus; + dev.devfn = manage_pci.devfn; + dev.is_extfn = manage_pci.is_extfn; + dev.is_virtfn = manage_pci.is_virtfn; + dev.physfn.bus = manage_pci.physfn.bus; + dev.physfn.devfn = manage_pci.physfn.devfn; + ret = pci_add_device(&dev); break; } diff -r 9fc957e63f8d -r cdf29bb6c74d xen/drivers/passthrough/pci.c --- a/xen/drivers/passthrough/pci.c Tue Mar 17 15:40:25 2009 +0000 +++ b/xen/drivers/passthrough/pci.c Wed Mar 18 04:48:19 2009 -0400 @@ -43,8 +43,8 @@ return NULL; memset(pdev, 0, sizeof(struct pci_dev)); - *((u8*) &pdev->bus) = bus; - *((u8*) &pdev->devfn) = devfn; + pdev->bus = bus; + pdev->devfn = devfn; pdev->domain = NULL; INIT_LIST_HEAD(&pdev->msi_list); list_add(&pdev->alldevs_list, &alldevs_list); @@ -92,15 +92,20 @@ return NULL; } -int pci_add_device(u8 bus, u8 devfn) +int pci_add_device(struct pci_dev *dev) { struct pci_dev *pdev; int ret = -ENOMEM; spin_lock(&pcidevs_lock); - pdev = alloc_pdev(bus, devfn); + pdev = alloc_pdev(dev->bus, dev->devfn); if ( !pdev ) goto out; + + pdev->is_extfn = dev->is_extfn; + pdev->is_virtfn = dev->is_virtfn; + pdev->physfn.bus = dev->physfn.bus; + pdev->physfn.devfn = dev->physfn.devfn; ret = 0; if ( !pdev->domain ) @@ -115,8 +120,8 @@ out: spin_unlock(&pcidevs_lock); - printk(XENLOG_DEBUG "PCI add device %02x:%02x.%x\n", bus, - PCI_SLOT(devfn), PCI_FUNC(devfn)); + printk(XENLOG_DEBUG "PCI add device %02x:%02x.%x\n", dev->bus, + PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn)); return ret; } diff -r 9fc957e63f8d -r cdf29bb6c74d xen/drivers/passthrough/vtd/dmar.c --- a/xen/drivers/passthrough/vtd/dmar.c Tue Mar 17 15:40:25 2009 +0000 +++ b/xen/drivers/passthrough/vtd/dmar.c Wed Mar 18 04:48:19 2009 -0400 @@ -157,6 +157,17 @@ struct acpi_drhd_unit *drhd; struct acpi_drhd_unit *found = NULL, *include_all = NULL; int i; + struct pci_dev *dev; + + dev = pci_get_pdev(bus, devfn); + BUG_ON(!dev); + + if (dev->is_extfn) { + devfn = 0; + } else if (dev->is_virtfn) { + bus = dev->physfn.bus; + devfn = PCI_SLOT(dev->physfn.devfn) ? 0 : dev->physfn.devfn; + } list_for_each_entry ( drhd, &acpi_drhd_units, list ) { diff -r 9fc957e63f8d -r cdf29bb6c74d xen/include/public/physdev.h --- a/xen/include/public/physdev.h Tue Mar 17 15:40:25 2009 +0000 +++ b/xen/include/public/physdev.h Wed Mar 18 04:48:19 2009 -0400 @@ -178,6 +178,12 @@ /* IN */ uint8_t bus; uint8_t devfn; + unsigned is_extfn:1; + unsigned is_virtfn:1; + struct { + uint8_t bus; + uint8_t devfn; + } physfn; }; typedef struct physdev_manage_pci physdev_manage_pci_t; diff -r 9fc957e63f8d -r cdf29bb6c74d xen/include/xen/pci.h --- a/xen/include/xen/pci.h Tue Mar 17 15:40:25 2009 +0000 +++ b/xen/include/xen/pci.h Wed Mar 18 04:48:19 2009 -0400 @@ -41,8 +41,14 @@ spinlock_t msix_table_lock; struct domain *domain; - const u8 bus; - const u8 devfn; + u8 bus; + u8 devfn; + unsigned is_extfn:1; + unsigned is_virtfn:1; + struct { + u8 bus; + u8 devfn; + } physfn; }; #define for_each_pdev(domain, pdev) \ @@ -62,7 +68,7 @@ struct pci_dev *pci_lock_domain_pdev(struct domain *d, int bus, int devfn); void pci_release_devices(struct domain *d); -int pci_add_device(u8 bus, u8 devfn); +int pci_add_device(struct pci_dev *dev); int pci_remove_device(u8 bus, u8 devfn); struct pci_dev *pci_get_pdev(int bus, int devfn); struct pci_dev *pci_get_pdev_by_domain(struct domain *d, int bus, int devfn); # HG changeset patch # User Yu Zhao <yu.zhao@xxxxxxxxx> # Date 1237268585 14400 # Node ID 92730fa710446b2502809faa72bb29fda95ba878 # Parent e8a9f8910a3f113759906e493eaa211e2c43cd85 PCI: initialize and release SR-IOV capability If a device has the SR-IOV capability, initialize it (set the ARI Capable Hierarchy in the lowest numbered PF if necessary; calculate the System Page Size for the VF MMIO, probe the VF Offset, Stride and BARs). A lock for the VF bus allocation is also initialized if a PF is the lowest numbered PF. Signed-off-by: Yu Zhao <yu.zhao@xxxxxxxxx> diff -r e8a9f8910a3f -r 92730fa71044 drivers/pci/Kconfig --- a/drivers/pci/Kconfig Fri Mar 13 10:08:22 2009 +0000 +++ b/drivers/pci/Kconfig Tue Mar 17 01:43:05 2009 -0400 @@ -37,3 +37,12 @@ help Say Y here if you want to reserve PCI device for passthrough. +config PCI_IOV + bool "PCI IOV support" + depends on PCI + help + PCI-SIG I/O Virtualization (IOV) Specifications support. + Single Root IOV: allows the creation of virtual PCI devices + that share the physical resources from a real device. + + When in doubt, say N. diff -r e8a9f8910a3f -r 92730fa71044 drivers/pci/Makefile --- a/drivers/pci/Makefile Fri Mar 13 10:08:22 2009 +0000 +++ b/drivers/pci/Makefile Tue Mar 17 01:43:05 2009 -0400 @@ -15,6 +15,8 @@ # Build the PCI Hotplug drivers if we were asked to obj-$(CONFIG_HOTPLUG_PCI) += hotplug/ + +obj-$(CONFIG_PCI_IOV) += iov.o # # Some architectures use the generic PCI setup functions diff -r e8a9f8910a3f -r 92730fa71044 drivers/pci/iov.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/drivers/pci/iov.c Tue Mar 17 01:43:05 2009 -0400 @@ -0,0 +1,175 @@ +/* + * drivers/pci/iov.c + * + * Copyright (C) 2009 Intel Corporation, Yu Zhao <yu.zhao@xxxxxxxxx> + * + * PCI Express I/O Virtualization (IOV) support. + * Single Root IOV 1.0 + */ + +#include <linux/pci.h> +#include <linux/mutex.h> +#include <linux/string.h> +#include <linux/delay.h> +#include "pci.h" + + +static int sriov_init(struct pci_dev *dev, int pos) +{ + int i; + int rc; + int nres; + u32 pgsz; + u16 ctrl, total, offset, stride; + struct pci_sriov *iov; + struct resource *res; + struct pci_dev *pdev; + + pci_read_config_word(dev, pos + PCI_SRIOV_CTRL, &ctrl); + if (ctrl & PCI_SRIOV_CTRL_VFE) { + pci_write_config_word(dev, pos + PCI_SRIOV_CTRL, 0); + ssleep(1); + } + + pci_read_config_word(dev, pos + PCI_SRIOV_TOTAL_VF, &total); + if (!total) + return 0; + + list_for_each_entry(pdev, &dev->bus->devices, bus_list) + if (pdev->is_physfn) + break; + if (list_empty(&dev->bus->devices) || !pdev->is_physfn) + pdev = NULL; + + ctrl = 0; + if (!pdev && pci_ari_enabled(dev->bus)) + ctrl |= PCI_SRIOV_CTRL_ARI; + + pci_write_config_word(dev, pos + PCI_SRIOV_CTRL, ctrl); + pci_write_config_word(dev, pos + PCI_SRIOV_NUM_VF, total); + pci_read_config_word(dev, pos + PCI_SRIOV_VF_OFFSET, &offset); + pci_read_config_word(dev, pos + PCI_SRIOV_VF_STRIDE, &stride); + if (!offset || (total > 1 && !stride)) + return -EIO; + + pci_read_config_dword(dev, pos + PCI_SRIOV_SUP_PGSIZE, &pgsz); + i = PAGE_SHIFT > 12 ? PAGE_SHIFT - 12 : 0; + pgsz &= ~((1 << i) - 1); + if (!pgsz) + return -EIO; + + pgsz &= ~(pgsz - 1); + pci_write_config_dword(dev, pos + PCI_SRIOV_SYS_PGSIZE, pgsz); + + nres = 0; + for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) { + res = dev->resource + PCI_IOV_RESOURCES + i; + i += __pci_read_base(dev, pci_bar_unknown, res, + pos + PCI_SRIOV_BAR + i * 4); + if (!res->flags) + continue; + if ((res->end - res->start + 1) & (PAGE_SIZE - 1)) { + rc = -EIO; + goto failed; + } + res->end = res->start + (res->end - res->start + 1) * total - 1; + nres++; + } + + iov = kzalloc(sizeof(*iov), GFP_KERNEL); + if (!iov) { + rc = -ENOMEM; + goto failed; + } + + iov->pos = pos; + iov->nres = nres; + iov->ctrl = ctrl; + iov->total = total; + iov->offset = offset; + iov->stride = stride; + iov->pgsz = pgsz; + iov->self = dev; + pci_read_config_dword(dev, pos + PCI_SRIOV_CAP, &iov->cap); + pci_read_config_byte(dev, pos + PCI_SRIOV_FUNC_LINK, &iov->link); + + if (pdev) + iov->dev = pci_dev_get(pdev); + else { + iov->dev = dev; + mutex_init(&iov->lock); + } + + dev->sriov = iov; + dev->is_physfn = 1; + + return 0; + +failed: + for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) { + res = dev->resource + PCI_IOV_RESOURCES + i; + res->flags = 0; + } + + return rc; +} + +static void sriov_release(struct pci_dev *dev) +{ + if (dev == dev->sriov->dev) + mutex_destroy(&dev->sriov->lock); + else + pci_dev_put(dev->sriov->dev); + + kfree(dev->sriov); + dev->sriov = NULL; +} + +/** + * pci_iov_init - initialize the IOV capability + * @dev: the PCI device + * + * Returns 0 on success, or negative on failure. + */ +int pci_iov_init(struct pci_dev *dev) +{ + int pos; + + pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_SRIOV); + if (pos) + return sriov_init(dev, pos); + + return -ENODEV; +} + +/** + * pci_iov_release - release resources used by the IOV capability + * @dev: the PCI device + */ +void pci_iov_release(struct pci_dev *dev) +{ + if (dev->is_physfn) + sriov_release(dev); +} + +/** + * pci_iov_resource_bar - get position of the SR-IOV BAR + * @dev: the PCI device + * @resno: the resource number + * @type: the BAR type to be filled in + * + * Returns position of the BAR encapsulated in the SR-IOV capability. + */ +int pci_iov_resource_bar(struct pci_dev *dev, int resno, + enum pci_bar_type *type) +{ + if (resno < PCI_IOV_RESOURCES || resno > PCI_IOV_RESOURCE_END) + return 0; + + BUG_ON(!dev->is_physfn); + + *type = pci_bar_unknown; + + return dev->sriov->pos + PCI_SRIOV_BAR + + 4 * (resno - PCI_IOV_RESOURCES); +} diff -r e8a9f8910a3f -r 92730fa71044 drivers/pci/pci.c --- a/drivers/pci/pci.c Fri Mar 13 10:08:22 2009 +0000 +++ b/drivers/pci/pci.c Tue Mar 17 01:43:05 2009 -0400 @@ -1048,12 +1048,19 @@ */ int pci_resource_bar(struct pci_dev *dev, int resno, enum pci_bar_type *type) { + int reg; + if (resno < PCI_ROM_RESOURCE) { *type = pci_bar_unknown; return PCI_BASE_ADDRESS_0 + 4 * resno; } else if (resno == PCI_ROM_RESOURCE) { *type = pci_bar_mem32; return dev->rom_base_reg; + } else if (resno < PCI_BRIDGE_RESOURCES) { + /* device specific resource */ + reg = pci_iov_resource_bar(dev, resno, type); + if (reg) + return reg; } dev_err(&dev->dev, "BAR: invalid resource #%d\n", resno); diff -r e8a9f8910a3f -r 92730fa71044 drivers/pci/pci.h --- a/drivers/pci/pci.h Fri Mar 13 10:08:22 2009 +0000 +++ b/drivers/pci/pci.h Tue Mar 17 01:43:05 2009 -0400 @@ -132,3 +132,40 @@ { return bus->self && bus->self->ari_enabled; } + +/* Single Root I/O Virtualization */ +struct pci_sriov { + int pos; /* capability position */ + int nres; /* number of resources */ + u32 cap; /* SR-IOV Capabilities */ + u16 ctrl; /* SR-IOV Control */ + u16 total; /* total VFs associated with the PF */ + u16 offset; /* first VF Routing ID offset */ + u16 stride; /* following VF stride */ + u32 pgsz; /* page size for BAR alignment */ + u8 link; /* Function Dependency Link */ + struct pci_dev *dev; /* lowest numbered PF */ + struct pci_dev *self; /* this PF */ + struct mutex lock; /* lock for VF bus */ +}; + +#ifdef CONFIG_PCI_IOV +extern int pci_iov_init(struct pci_dev *dev); +extern void pci_iov_release(struct pci_dev *dev); +extern int pci_iov_resource_bar(struct pci_dev *dev, int resno, + enum pci_bar_type *type); +#else +static inline int pci_iov_init(struct pci_dev *dev) +{ + return -ENODEV; +} +static inline void pci_iov_release(struct pci_dev *dev) + +{ +} +static inline int pci_iov_resource_bar(struct pci_dev *dev, int resno, + enum pci_bar_type *type) +{ + return 0; +} +#endif /* CONFIG_PCI_IOV */ diff -r e8a9f8910a3f -r 92730fa71044 drivers/pci/probe.c --- a/drivers/pci/probe.c Fri Mar 13 10:08:22 2009 +0000 +++ b/drivers/pci/probe.c Tue Mar 17 01:43:05 2009 -0400 @@ -765,6 +765,9 @@ struct pci_dev *pci_dev; pci_dev = to_pci_dev(dev); + + pci_iov_release(pci_dev); + kfree(pci_dev); } @@ -891,6 +894,9 @@ /* Alternative Routing-ID Forwarding */ pci_enable_ari(dev); + + /* Single Root I/O Virtualization */ + pci_iov_init(dev); /* * Add the device to our list of discovered devices diff -r e8a9f8910a3f -r 92730fa71044 include/linux/pci.h --- a/include/linux/pci.h Fri Mar 13 10:08:22 2009 +0000 +++ b/include/linux/pci.h Tue Mar 17 01:43:05 2009 -0400 @@ -77,6 +77,12 @@ /* #6: expansion ROM resource */ PCI_ROM_RESOURCE, + /* device specific resources */ +#ifdef CONFIG_PCI_IOV + PCI_IOV_RESOURCES, + PCI_IOV_RESOURCE_END = PCI_IOV_RESOURCES + PCI_SRIOV_NUM_BARS - 1, +#endif + /* resources assigned to buses behind the bridge */ #define PCI_BRIDGE_RESOURCE_NUM 4 @@ -127,6 +133,8 @@ char cap_nr; u32 data[0]; }; + +struct pci_sriov; /* * The pci_dev structure is used to describe PCI devices. @@ -189,13 +197,17 @@ unsigned int broken_parity_status:1; /* Device generates false positive parity */ unsigned int msi_enabled:1; unsigned int msix_enabled:1; + unsigned int ari_enabled:1; /* ARI forwarding */ + unsigned int is_physfn:1; u32 saved_config_space[16]; /* config space saved at suspend time */ struct hlist_head saved_cap_space; struct bin_attribute *rom_attr; /* attribute descriptor for sysfs ROM entry */ int rom_attr_enabled; /* has display of the rom attribute been enabled? */ struct bin_attribute *res_attr[DEVICE_COUNT_RESOURCE]; /* sysfs file for resources */ - unsigned int ari_enabled:1; /* ARI forwarding */ +#ifdef CONFIG_PCI_IOV + struct pci_sriov *sriov; /* SR-IOV capability related */ +#endif }; #define pci_dev_g(n) list_entry(n, struct pci_dev, global_list) diff -r e8a9f8910a3f -r 92730fa71044 include/linux/pci_regs.h --- a/include/linux/pci_regs.h Fri Mar 13 10:08:22 2009 +0000 +++ b/include/linux/pci_regs.h Tue Mar 17 01:43:05 2009 -0400 @@ -332,6 +332,7 @@ #define PCI_EXP_TYPE_UPSTREAM 0x5 /* Upstream Port */ #define PCI_EXP_TYPE_DOWNSTREAM 0x6 /* Downstream Port */ #define PCI_EXP_TYPE_PCI_BRIDGE 0x7 /* PCI/PCI-X Bridge */ +#define PCI_EXP_TYPE_RC_END 0x9 /* Root Complex Integrated Endpoint */ #define PCI_EXP_FLAGS_SLOT 0x0100 /* Slot implemented */ #define PCI_EXP_FLAGS_IRQ 0x3e00 /* Interrupt message number */ #define PCI_EXP_DEVCAP 4 /* Device capabilities */ @@ -393,6 +394,7 @@ #define PCI_EXT_CAP_ID_DSN 3 #define PCI_EXT_CAP_ID_PWR 4 #define PCI_EXT_CAP_ID_ARI 14 +#define PCI_EXT_CAP_ID_SRIOV 16 /* Advanced Error Reporting */ #define PCI_ERR_UNCOR_STATUS 4 /* Uncorrectable Error Status */ @@ -478,4 +480,35 @@ #define PCI_ARI_CTRL_ACS 0x0002 /* ACS Function Groups Enable */ #define PCI_ARI_CTRL_FG(x) (((x) >> 4) & 7) /* Function Group */ +/* Single Root I/O Virtualization */ +#define PCI_SRIOV_CAP 0x04 /* SR-IOV Capabilities */ +#define PCI_SRIOV_CAP_VFM 0x01 /* VF Migration Capable */ +#define PCI_SRIOV_CAP_INTR(x) ((x) >> 21) /* Interrupt Message Number */ +#define PCI_SRIOV_CTRL 0x08 /* SR-IOV Control */ +#define PCI_SRIOV_CTRL_VFE 0x01 /* VF Enable */ +#define PCI_SRIOV_CTRL_VFM 0x02 /* VF Migration Enable */ +#define PCI_SRIOV_CTRL_INTR 0x04 /* VF Migration Interrupt Enable */ +#define PCI_SRIOV_CTRL_MSE 0x08 /* VF Memory Space Enable */ +#define PCI_SRIOV_CTRL_ARI 0x10 /* ARI Capable Hierarchy */ +#define PCI_SRIOV_STATUS 0x0a /* SR-IOV Status */ +#define PCI_SRIOV_STATUS_VFM 0x01 /* VF Migration Status */ +#define PCI_SRIOV_INITIAL_VF 0x0c /* Initial VFs */ +#define PCI_SRIOV_TOTAL_VF 0x0e /* Total VFs */ +#define PCI_SRIOV_NUM_VF 0x10 /* Number of VFs */ +#define PCI_SRIOV_FUNC_LINK 0x12 /* Function Dependency Link */ +#define PCI_SRIOV_VF_OFFSET 0x14 /* First VF Offset */ +#define PCI_SRIOV_VF_STRIDE 0x16 /* Following VF Stride */ +#define PCI_SRIOV_VF_DID 0x1a /* VF Device ID */ +#define PCI_SRIOV_SUP_PGSIZE 0x1c /* Supported Page Sizes */ +#define PCI_SRIOV_SYS_PGSIZE 0x20 /* System Page Size */ +#define PCI_SRIOV_BAR 0x24 /* VF BAR0 */ +#define PCI_SRIOV_NUM_BARS 6 /* Number of VF BARs */ +#define PCI_SRIOV_VFM 0x3c /* VF Migration State Array Offset*/ +#define PCI_SRIOV_VFM_BIR(x) ((x) & 7) /* State BIR */ +#define PCI_SRIOV_VFM_OFFSET(x) ((x) & ~7) /* State Offset */ +#define PCI_SRIOV_VFM_UA 0x0 /* Inactive.Unavailable */ +#define PCI_SRIOV_VFM_MI 0x1 /* Dormant.MigrateIn */ +#define PCI_SRIOV_VFM_MO 0x2 /* Active.MigrateOut */ +#define PCI_SRIOV_VFM_AV 0x3 /* Active.Available */ + #endif /* LINUX_PCI_REGS_H */ # HG changeset patch # User Yu Zhao <yu.zhao@xxxxxxxxx> # Date 1237268742 14400 # Node ID 2629935bf356bb7118f8691a46e90daed77c3b48 # Parent 92730fa710446b2502809faa72bb29fda95ba878 PCI: restore saved SR-IOV state Restore the volatile registers in the SR-IOV capability after the D3->D0 transition. Signed-off-by: Yu Zhao <yu.zhao@xxxxxxxxx> diff -r 92730fa71044 -r 2629935bf356 drivers/pci/iov.c --- a/drivers/pci/iov.c Tue Mar 17 01:43:05 2009 -0400 +++ b/drivers/pci/iov.c Tue Mar 17 01:45:42 2009 -0400 @@ -125,6 +125,25 @@ dev->sriov = NULL; } +static void sriov_restore_state(struct pci_dev *dev) +{ + int i; + u16 ctrl; + struct pci_sriov *iov = dev->sriov; + + pci_read_config_word(dev, iov->pos + PCI_SRIOV_CTRL, &ctrl); + if (ctrl & PCI_SRIOV_CTRL_VFE) + return; + + for (i = PCI_IOV_RESOURCES; i <= PCI_IOV_RESOURCE_END; i++) + pci_update_resource(dev, i); + + pci_write_config_dword(dev, iov->pos + PCI_SRIOV_SYS_PGSIZE, iov->pgsz); + pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL, iov->ctrl); + if (iov->ctrl & PCI_SRIOV_CTRL_VFE) + msleep(100); +} + /** * pci_iov_init - initialize the IOV capability * @dev: the PCI device @@ -173,3 +192,13 @@ return dev->sriov->pos + PCI_SRIOV_BAR + 4 * (resno - PCI_IOV_RESOURCES); } + +/** + * pci_restore_iov_state - restore the state of the IOV capability + * @dev: the PCI device + */ +void pci_restore_iov_state(struct pci_dev *dev) +{ + if (dev->is_physfn) + sriov_restore_state(dev); +} diff -r 92730fa71044 -r 2629935bf356 drivers/pci/pci.c --- a/drivers/pci/pci.c Tue Mar 17 01:43:05 2009 -0400 +++ b/drivers/pci/pci.c Tue Mar 17 01:45:42 2009 -0400 @@ -562,6 +562,8 @@ pci_restore_pcix_state(dev); pci_restore_msi_state(dev); pci_restore_msix_state(dev); + pci_restore_iov_state(dev); + return 0; } diff -r 92730fa71044 -r 2629935bf356 drivers/pci/pci.h --- a/drivers/pci/pci.h Tue Mar 17 01:43:05 2009 -0400 +++ b/drivers/pci/pci.h Tue Mar 17 01:45:42 2009 -0400 @@ -154,6 +154,7 @@ extern void pci_iov_release(struct pci_dev *dev); extern int pci_iov_resource_bar(struct pci_dev *dev, int resno, enum pci_bar_type *type); +extern void pci_restore_iov_state(struct pci_dev *dev); #else static inline int pci_iov_init(struct pci_dev *dev) { @@ -168,4 +169,7 @@ { return 0; } +static inline void pci_restore_iov_state(struct pci_dev *dev) +{ +} #endif /* CONFIG_PCI_IOV */ # HG changeset patch # User Yu Zhao <yu.zhao@xxxxxxxxx> # Date 1237268873 14400 # Node ID 6b776c705e444562dda66dc0b33fd80eaceb1bfb # Parent 2629935bf356bb7118f8691a46e90daed77c3b48 PCI: reserve bus range for SR-IOV device Reserve the bus number range used by the Virtual Function when pcibios_assign_all_busses() returns true. Signed-off-by: Yu Zhao <yu.zhao@xxxxxxxxx> diff -r 2629935bf356 -r 6b776c705e44 drivers/pci/iov.c --- a/drivers/pci/iov.c Tue Mar 17 01:45:42 2009 -0400 +++ b/drivers/pci/iov.c Tue Mar 17 01:47:53 2009 -0400 @@ -13,6 +13,18 @@ #include <linux/delay.h> #include "pci.h" + +static inline u8 virtfn_bus(struct pci_dev *dev, int id) +{ + return dev->bus->number + ((dev->devfn + dev->sriov->offset + + dev->sriov->stride * id) >> 8); +} + +static inline u8 virtfn_devfn(struct pci_dev *dev, int id) +{ + return (dev->devfn + dev->sriov->offset + + dev->sriov->stride * id) & 0xff; +} static int sriov_init(struct pci_dev *dev, int pos) { @@ -202,3 +214,27 @@ if (dev->is_physfn) sriov_restore_state(dev); } + +/** + * pci_iov_bus_range - find bus range used by Virtual Function + * @bus: the PCI bus + * + * Returns max number of buses (exclude current one) used by Virtual + * Functions. + */ +int pci_iov_bus_range(struct pci_bus *bus) +{ + int max = 0; + u8 busnr; + struct pci_dev *dev; + + list_for_each_entry(dev, &bus->devices, bus_list) { + if (!dev->is_physfn) + continue; + busnr = virtfn_bus(dev, dev->sriov->total - 1); + if (busnr > max) + max = busnr; + } + + return max ? max - bus->number : 0; +} diff -r 2629935bf356 -r 6b776c705e44 drivers/pci/pci.h --- a/drivers/pci/pci.h Tue Mar 17 01:45:42 2009 -0400 +++ b/drivers/pci/pci.h Tue Mar 17 01:47:53 2009 -0400 @@ -155,6 +155,7 @@ extern int pci_iov_resource_bar(struct pci_dev *dev, int resno, enum pci_bar_type *type); extern void pci_restore_iov_state(struct pci_dev *dev); +extern int pci_iov_bus_range(struct pci_bus *bus); #else static inline int pci_iov_init(struct pci_dev *dev) { @@ -172,4 +173,8 @@ static inline void pci_restore_iov_state(struct pci_dev *dev) { } +static inline int pci_iov_bus_range(struct pci_bus *bus) +{ + return 0; +} #endif /* CONFIG_PCI_IOV */ diff -r 2629935bf356 -r 6b776c705e44 drivers/pci/probe.c --- a/drivers/pci/probe.c Tue Mar 17 01:45:42 2009 -0400 +++ b/drivers/pci/probe.c Tue Mar 17 01:47:53 2009 -0400 @@ -976,6 +976,9 @@ for (devfn = 0; devfn < 0x100; devfn += 8) pci_scan_slot(bus, devfn); + /* Reserve buses for SR-IOV capability. */ + max += pci_iov_bus_range(bus); + /* * After performing arch-dependent fixup of the bus, look behind * all PCI-to-PCI bridges on this bus. # HG changeset patch # User Yu Zhao <yu.zhao@xxxxxxxxx> # Date 1237270918 14400 # Node ID 3a2d0f486f533f0ef21267b9a1682997e0caf463 # Parent 6b776c705e444562dda66dc0b33fd80eaceb1bfb PCI: centralize device setup code Move the device setup stuff into pci_setup_device() which will be used to setup the Virtual Function later. Signed-off-by: Yu Zhao <yu.zhao@xxxxxxxxx> diff -r 6b776c705e44 -r 3a2d0f486f53 drivers/pci/pci.h --- a/drivers/pci/pci.h Tue Mar 17 01:47:53 2009 -0400 +++ b/drivers/pci/pci.h Tue Mar 17 02:21:58 2009 -0400 @@ -117,6 +117,7 @@ pci_bar_mem64, /* A 64-bit memory BAR */ }; +extern int pci_setup_device(struct pci_dev *dev); extern int __pci_read_base(struct pci_dev *dev, enum pci_bar_type type, struct resource *res, unsigned int reg); extern int pci_resource_bar(struct pci_dev *dev, int resno, diff -r 6b776c705e44 -r 3a2d0f486f53 drivers/pci/probe.c --- a/drivers/pci/probe.c Tue Mar 17 01:47:53 2009 -0400 +++ b/drivers/pci/probe.c Tue Mar 17 02:21:58 2009 -0400 @@ -683,13 +683,28 @@ * Initialize the device structure with information about the device's * vendor,class,memory and IO-space addresses,IRQ lines etc. * Called at initialisation of the PCI subsystem and by CardBus services. - * Returns 0 on success and -1 if unknown type of device (not normal, bridge - * or CardBus). + * Returns 0 on success and negative if unknown type of device (not normal, + * bridge or CardBus). */ -static int pci_setup_device(struct pci_dev * dev) +int pci_setup_device(struct pci_dev *dev) { u32 class; + u8 hdr_type; + if (pci_read_config_byte(dev, PCI_HEADER_TYPE, &hdr_type)) + return -EIO; + + dev->sysdata = dev->bus->sysdata; + dev->dev.parent = dev->bus->bridge; + dev->dev.bus = &pci_bus_type; + dev->hdr_type = hdr_type & 0x7f; + dev->multifunction = !!(hdr_type & 0x80); + dev->cfg_size = pci_cfg_space_size(dev); + dev->error_state = pci_channel_io_normal; + + /* Assume 32-bit PCI; let 64-bit PCI cards (which are far rarer) + set this higher, assuming the system even supports it. */ + dev->dma_mask = 0xffffffff; sprintf(pci_name(dev), "%04x:%02x:%02x.%d", pci_domain_nr(dev->bus), dev->bus->number, PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn)); @@ -706,7 +721,6 @@ /* Early fixups, before probing the BARs */ pci_fixup_device(pci_fixup_early, dev); - class = dev->class >> 8; switch (dev->hdr_type) { /* header type */ case PCI_HEADER_TYPE_NORMAL: /* standard header */ @@ -741,7 +755,7 @@ default: /* unknown header */ printk(KERN_ERR "PCI: device %s has unknown header type %02x, ignoring.\n", pci_name(dev), dev->hdr_type); - return -1; + return -EIO; bad: printk(KERN_ERR "PCI: %s: class %x doesn't match header type %02x. Ignoring class.\n", @@ -823,7 +837,6 @@ { struct pci_dev *dev; u32 l; - u8 hdr_type; int delay = 1; if (pci_bus_read_config_dword(bus, devfn, PCI_VENDOR_ID, &l)) @@ -850,29 +863,16 @@ } } - if (pci_bus_read_config_byte(bus, devfn, PCI_HEADER_TYPE, &hdr_type)) - return NULL; - dev = kzalloc(sizeof(struct pci_dev), GFP_KERNEL); if (!dev) return NULL; dev->bus = bus; - dev->sysdata = bus->sysdata; - dev->dev.parent = bus->bridge; - dev->dev.bus = &pci_bus_type; dev->devfn = devfn; - dev->hdr_type = hdr_type & 0x7f; - dev->multifunction = !!(hdr_type & 0x80); dev->vendor = l & 0xffff; dev->device = (l >> 16) & 0xffff; - dev->cfg_size = pci_cfg_space_size(dev); - dev->error_state = pci_channel_io_normal; - /* Assume 32-bit PCI; let 64-bit PCI cards (which are far rarer) - set this higher, assuming the system even supports it. */ - dev->dma_mask = 0xffffffff; - if (pci_setup_device(dev) < 0) { + if (pci_setup_device(dev)) { kfree(dev); return NULL; } # HG changeset patch # User Yu Zhao <yu.zhao@xxxxxxxxx> # Date 1237270982 14400 # Node ID 577169901110eb89ff36f1460e152a5c96297bde # Parent 3a2d0f486f533f0ef21267b9a1682997e0caf463 PCI: add SR-IOV API for Physical Function driver Add or remove the Virtual Function when the SR-IOV is enabled or disabled by the device driver. This can happen anytime rather than only at the device probe stage. Signed-off-by: Yu Zhao <yu.zhao@xxxxxxxxx> diff -r 3a2d0f486f53 -r 577169901110 drivers/pci/iov.c --- a/drivers/pci/iov.c Tue Mar 17 02:21:58 2009 -0400 +++ b/drivers/pci/iov.c Tue Mar 17 02:23:02 2009 -0400 @@ -13,6 +13,7 @@ #include <linux/delay.h> #include "pci.h" +#define VIRTFN_ID_LEN 16 static inline u8 virtfn_bus(struct pci_dev *dev, int id) { @@ -24,6 +25,267 @@ { return (dev->devfn + dev->sriov->offset + dev->sriov->stride * id) & 0xff; +} + +static struct pci_bus *virtfn_add_bus(struct pci_bus *bus, int busnr) +{ + struct pci_bus *child; + + if (bus->number == busnr) + return bus; + + child = pci_find_bus(pci_domain_nr(bus), busnr); + if (child) + return child; + + child = pci_add_new_bus(bus, NULL, busnr); + if (!child) + return NULL; + + child->subordinate = busnr; + + return child; +} + +static void virtfn_remove_bus(struct pci_bus *bus, int busnr) +{ + struct pci_bus *child; + + if (bus->number == busnr) + return; + + child = pci_find_bus(pci_domain_nr(bus), busnr); + BUG_ON(!child); + + if (list_empty(&child->devices)) + pci_remove_bus(child); +} + +static int virtfn_add(struct pci_dev *dev, int id) +{ + int i; + int rc; + u64 size; + char buf[VIRTFN_ID_LEN]; + struct pci_dev *virtfn; + struct resource *res; + struct pci_sriov *iov = dev->sriov; + + virtfn = kzalloc(sizeof(struct pci_dev), GFP_KERNEL); + if (!virtfn) + return -ENOMEM; + + mutex_lock(&iov->dev->sriov->lock); + virtfn->bus = virtfn_add_bus(dev->bus, virtfn_bus(dev, id)); + if (!virtfn->bus) { + kfree(virtfn); + mutex_unlock(&iov->dev->sriov->lock); + return -ENOMEM; + } + virtfn->devfn = virtfn_devfn(dev, id); + virtfn->vendor = dev->vendor; + pci_read_config_word(dev, iov->pos + PCI_SRIOV_VF_DID, &virtfn->device); + pci_setup_device(virtfn); + virtfn->dev.parent = dev->dev.parent; + + for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) { + res = dev->resource + PCI_IOV_RESOURCES + i; + if (!res->parent) + continue; + virtfn->resource[i].name = pci_name(virtfn); + virtfn->resource[i].flags = res->flags; + size = res->end - res->start + 1; + do_div(size, iov->total); + virtfn->resource[i].start = res->start + size * id; + virtfn->resource[i].end = virtfn->resource[i].start + size - 1; + rc = request_resource(res, &virtfn->resource[i]); + BUG_ON(rc); + } + + pci_device_add(virtfn, virtfn->bus); + mutex_unlock(&iov->dev->sriov->lock); + + virtfn->physfn = pci_dev_get(dev); + virtfn->is_virtfn = 1; + + pci_bus_add_device(virtfn); + sprintf(buf, "virtfn%u", id); + rc = sysfs_create_link(&dev->dev.kobj, &virtfn->dev.kobj, buf); + if (rc) + goto failed1; + rc = sysfs_create_link(&virtfn->dev.kobj, &dev->dev.kobj, "physfn"); + if (rc) + goto failed2; + + kobject_uevent(&virtfn->dev.kobj, KOBJ_CHANGE); + + return 0; + +failed2: + sysfs_remove_link(&dev->dev.kobj, buf); +failed1: + pci_dev_put(dev); + mutex_lock(&iov->dev->sriov->lock); + pci_remove_bus_device(virtfn); + virtfn_remove_bus(dev->bus, virtfn_bus(dev, id)); + mutex_unlock(&iov->dev->sriov->lock); + + return rc; +} + +static void virtfn_remove(struct pci_dev *dev, int id) +{ + char buf[VIRTFN_ID_LEN]; + struct pci_bus *bus; + struct pci_dev *virtfn; + struct pci_sriov *iov = dev->sriov; + + bus = pci_find_bus(pci_domain_nr(dev->bus), virtfn_bus(dev, id)); + if (!bus) + return; + + virtfn = pci_get_slot(bus, virtfn_devfn(dev, id)); + if (!virtfn) + return; + + pci_dev_put(virtfn); + + sprintf(buf, "virtfn%u", id); + sysfs_remove_link(&dev->dev.kobj, buf); + sysfs_remove_link(&virtfn->dev.kobj, "physfn"); + + mutex_lock(&iov->dev->sriov->lock); + pci_remove_bus_device(virtfn); + virtfn_remove_bus(dev->bus, virtfn_bus(dev, id)); + mutex_unlock(&iov->dev->sriov->lock); + + pci_dev_put(dev); +} + +static int sriov_enable(struct pci_dev *dev, int nr_virtfn) +{ + int rc; + int i, j; + int nres; + u16 offset, stride, initial; + struct resource *res; + struct pci_dev *pdev; + struct pci_sriov *iov = dev->sriov; + + if (!nr_virtfn) + return 0; + + if (iov->nr_virtfn) + return -EINVAL; + + pci_read_config_word(dev, iov->pos + PCI_SRIOV_INITIAL_VF, &initial); + if (initial > iov->total || + (!(iov->cap & PCI_SRIOV_CAP_VFM) && (initial != iov->total))) + return -EIO; + + if (nr_virtfn < 0 || nr_virtfn > iov->total || + (!(iov->cap & PCI_SRIOV_CAP_VFM) && (nr_virtfn > initial))) + return -EINVAL; + + pci_write_config_word(dev, iov->pos + PCI_SRIOV_NUM_VF, nr_virtfn); + pci_read_config_word(dev, iov->pos + PCI_SRIOV_VF_OFFSET, &offset); + pci_read_config_word(dev, iov->pos + PCI_SRIOV_VF_STRIDE, &stride); + if (!offset || (nr_virtfn > 1 && !stride)) + return -EIO; + + nres = 0; + for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) { + res = dev->resource + PCI_IOV_RESOURCES + i; + if (res->parent) + nres++; + } + if (nres != iov->nres) { + dev_err(&dev->dev, "not enough MMIO resources for SR-IOV\n"); + return -ENOMEM; + } + + iov->offset = offset; + iov->stride = stride; + + if (virtfn_bus(dev, nr_virtfn - 1) > dev->bus->subordinate) { + dev_err(&dev->dev, "SR-IOV: bus number out of range\n"); + return -ENOMEM; + } + + if (iov->link != dev->devfn) { + pdev = pci_get_slot(dev->bus, iov->link); + if (!pdev) + return -ENODEV; + + pci_dev_put(pdev); + + if (!pdev->is_physfn) + return -ENODEV; + + rc = sysfs_create_link(&dev->dev.kobj, + &pdev->dev.kobj, "dep_link"); + if (rc) + return rc; + } + + iov->ctrl |= PCI_SRIOV_CTRL_VFE | PCI_SRIOV_CTRL_MSE; + pci_block_user_cfg_access(dev); + pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL, iov->ctrl); + msleep(100); + pci_unblock_user_cfg_access(dev); + + iov->initial = initial; + if (nr_virtfn < initial) + initial = nr_virtfn; + + for (i = 0; i < initial; i++) { + rc = virtfn_add(dev, i); + if (rc) + goto failed; + } + + kobject_uevent(&dev->dev.kobj, KOBJ_CHANGE); + iov->nr_virtfn = nr_virtfn; + + return 0; + +failed: + for (j = 0; j < i; j++) + virtfn_remove(dev, j); + + iov->ctrl &= ~(PCI_SRIOV_CTRL_VFE | PCI_SRIOV_CTRL_MSE); + pci_block_user_cfg_access(dev); + pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL, iov->ctrl); + ssleep(1); + pci_unblock_user_cfg_access(dev); + + if (iov->link != dev->devfn) + sysfs_remove_link(&dev->dev.kobj, "dep_link"); + + return rc; +} + +static void sriov_disable(struct pci_dev *dev) +{ + int i; + struct pci_sriov *iov = dev->sriov; + + if (!iov->nr_virtfn) + return; + + for (i = 0; i < iov->nr_virtfn; i++) + virtfn_remove(dev, i); + + iov->ctrl &= ~(PCI_SRIOV_CTRL_VFE | PCI_SRIOV_CTRL_MSE); + pci_block_user_cfg_access(dev); + pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL, iov->ctrl); + ssleep(1); + pci_unblock_user_cfg_access(dev); + + if (iov->link != dev->devfn) + sysfs_remove_link(&dev->dev.kobj, "dep_link"); + + iov->nr_virtfn = 0; } static int sriov_init(struct pci_dev *dev, int pos) @@ -128,6 +390,8 @@ static void sriov_release(struct pci_dev *dev) { + BUG_ON(dev->sriov->nr_virtfn); + if (dev == dev->sriov->dev) mutex_destroy(&dev->sriov->lock); else @@ -151,6 +415,7 @@ pci_update_resource(dev, i); pci_write_config_dword(dev, iov->pos + PCI_SRIOV_SYS_PGSIZE, iov->pgsz); + pci_write_config_word(dev, iov->pos + PCI_SRIOV_NUM_VF, iov->nr_virtfn); pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL, iov->ctrl); if (iov->ctrl & PCI_SRIOV_CTRL_VFE) msleep(100); @@ -238,3 +503,35 @@ return max ? max - bus->number : 0; } + +/** + * pci_enable_sriov - enable the SR-IOV capability + * @dev: the PCI device + * + * Returns 0 on success, or negative on failure. + */ +int pci_enable_sriov(struct pci_dev *dev, int nr_virtfn) +{ + might_sleep(); + + if (!dev->is_physfn) + return -ENODEV; + + return sriov_enable(dev, nr_virtfn); +} +EXPORT_SYMBOL_GPL(pci_enable_sriov); + +/** + * pci_disable_sriov - disable the SR-IOV capability + * @dev: the PCI device + */ +void pci_disable_sriov(struct pci_dev *dev) +{ + might_sleep(); + + if (!dev->is_physfn) + return; + + sriov_disable(dev); +} +EXPORT_SYMBOL_GPL(pci_disable_sriov); diff -r 3a2d0f486f53 -r 577169901110 drivers/pci/pci.h --- a/drivers/pci/pci.h Tue Mar 17 02:21:58 2009 -0400 +++ b/drivers/pci/pci.h Tue Mar 17 02:23:02 2009 -0400 @@ -141,6 +141,8 @@ u32 cap; /* SR-IOV Capabilities */ u16 ctrl; /* SR-IOV Control */ u16 total; /* total VFs associated with the PF */ + u16 initial; /* initial VFs associated with the PF */ + u16 nr_virtfn; /* number of VFs available */ u16 offset; /* first VF Routing ID offset */ u16 stride; /* following VF stride */ u32 pgsz; /* page size for BAR alignment */ diff -r 3a2d0f486f53 -r 577169901110 include/linux/pci.h --- a/include/linux/pci.h Tue Mar 17 02:21:58 2009 -0400 +++ b/include/linux/pci.h Tue Mar 17 02:23:02 2009 -0400 @@ -199,6 +199,7 @@ unsigned int msix_enabled:1; unsigned int ari_enabled:1; /* ARI forwarding */ unsigned int is_physfn:1; + unsigned int is_virtfn:1; u32 saved_config_space[16]; /* config space saved at suspend time */ struct hlist_head saved_cap_space; @@ -206,7 +207,10 @@ int rom_attr_enabled; /* has display of the rom attribute been enabled? */ struct bin_attribute *res_attr[DEVICE_COUNT_RESOURCE]; /* sysfs file for resources */ #ifdef CONFIG_PCI_IOV - struct pci_sriov *sriov; /* SR-IOV capability related */ + union { + struct pci_sriov *sriov; /* SR-IOV capability related */ + struct pci_dev *physfn; /* the PF this VF is associated with */ + }; #endif }; @@ -829,5 +833,18 @@ int pci_is_guestdev(struct pci_dev *dev); #endif /* CONFIG_PCI_GUESTDEV */ +#ifdef CONFIG_PCI_IOV +extern int pci_enable_sriov(struct pci_dev *dev, int nr_virtfn); +extern void pci_disable_sriov(struct pci_dev *dev); +#else +static inline int pci_enable_sriov(struct pci_dev *dev, int nr_virtfn) +{ + return -ENODEV; +} +static inline void pci_disable_sriov(struct pci_dev *dev) +{ +} +#endif + #endif /* __KERNEL__ */ #endif /* LINUX_PCI_H */ # HG changeset patch # User Yu Zhao <yu.zhao@xxxxxxxxx> # Date 1237353055 14400 # Node ID 582ec8e86ffff64834e8c77ef6790774352ddc7a # Parent 577169901110eb89ff36f1460e152a5c96297bde PCI: pass ARI and SR-IOV device information to the hypervisor PCIe Alternative Routing-ID Interpretation (ARI) ECN defines the Extended Function -- a function whose function number is greater than 7 within an ARI Device. Intel VT-d spec 1.2 section 8.3.2 specifies that the Extended Function is under the scope of the same remapping unit as the traditional function. The hypervisor needs to know if a function is Extended Function so it can find proper DMAR for it. And section 8.3.3 specifies that the SR-IOV Virtual Function is under the scope of the same remapping unit as the Physical Function. The hypervisor also needs to know if a function is the Virtual Function and which Physical Function it's associated with for same reason. diff -r 577169901110 -r 582ec8e86fff drivers/xen/core/pci.c --- a/drivers/xen/core/pci.c Tue Mar 17 02:23:02 2009 -0400 +++ b/drivers/xen/core/pci.c Wed Mar 18 01:10:55 2009 -0400 @@ -6,6 +6,7 @@ #include <linux/init.h> #include <linux/pci.h> #include <xen/interface/physdev.h> +#include "../../pci/pci.h" static int (*pci_bus_probe)(struct device *dev); static int (*pci_bus_remove)(struct device *dev); @@ -15,8 +16,16 @@ int r; struct pci_dev *pci_dev = to_pci_dev(dev); struct physdev_manage_pci manage_pci; + + memset(&manage_pci, 0, sizeof(manage_pci)); manage_pci.bus = pci_dev->bus->number; manage_pci.devfn = pci_dev->devfn; + if (pci_dev->is_virtfn) { + manage_pci.is_virtfn = 1; + manage_pci.physfn.bus = pci_dev->physfn->bus->number; + manage_pci.physfn.devfn = pci_dev->physfn->devfn; + } else if (pci_ari_enabled(pci_dev->bus) && PCI_SLOT(pci_dev->devfn)) + manage_pci.is_extfn = 1; r = HYPERVISOR_physdev_op(PHYSDEVOP_manage_pci_add, &manage_pci); if (r && r != -ENOSYS) diff -r 577169901110 -r 582ec8e86fff include/xen/interface/physdev.h --- a/include/xen/interface/physdev.h Tue Mar 17 02:23:02 2009 -0400 +++ b/include/xen/interface/physdev.h Wed Mar 18 01:10:55 2009 -0400 @@ -178,6 +178,12 @@ /* IN */ uint8_t bus; uint8_t devfn; + unsigned is_extfn:1; + unsigned is_virtfn:1; + struct { + uint8_t bus; + uint8_t devfn; + } physfn; }; typedef struct physdev_manage_pci physdev_manage_pci_t; _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |