[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [PATCH] x86/PCI-MSI: properly determine VF BAR values




On 08/08/11 10:22, Jan Beulich wrote:
> As was discussed a couple of times on this list, SR-IOV virtual
> functions have their BARs read as zero - the physical function's SR-IOV
> capability structure must be consulted instead. The bogus warnings
> people complained about are being eliminated with this change.
>
> Signed-off-by: Jan Beulich <jbeulich@xxxxxxxxxx>
Tested-by: Andrew Cooper <andrew.cooper3@xxxxxxxxxx>
> --- a/xen/arch/x86/msi.c
> +++ b/xen/arch/x86/msi.c
> @@ -522,12 +522,48 @@ static int msi_capability_init(struct pc
>      return 0;
>  }
>
> -static u64 read_pci_mem_bar(u8 bus, u8 slot, u8 func, u8 bir)
> +static u64 read_pci_mem_bar(u8 bus, u8 slot, u8 func, u8 bir, int vf)
>  {
>      u8 limit;
> -    u32 addr;
> +    u32 addr, base = PCI_BASE_ADDRESS_0, disp = 0;
>
> -    switch ( pci_conf_read8(bus, slot, func, PCI_HEADER_TYPE) & 0x7f )
> +    if ( vf >= 0 )
> +    {
> +        struct pci_dev *pdev = pci_get_pdev(bus, PCI_DEVFN(slot, func));
> +        unsigned int pos = pci_find_ext_capability(0, bus,
> +                                                   PCI_DEVFN(slot, func),
> +                                                   PCI_EXT_CAP_ID_SRIOV);
> +        u16 ctrl = pci_conf_read16(bus, slot, func, pos + PCI_SRIOV_CTRL);
> +        u16 num_vf = pci_conf_read16(bus, slot, func, pos + 
> PCI_SRIOV_NUM_VF);
> +        u16 offset = pci_conf_read16(bus, slot, func,
> +                                     pos + PCI_SRIOV_VF_OFFSET);
> +        u16 stride = pci_conf_read16(bus, slot, func,
> +                                     pos + PCI_SRIOV_VF_STRIDE);
> +
> +        if ( !pdev || !pos ||
> +             !(ctrl & PCI_SRIOV_CTRL_VFE) ||
> +             !(ctrl & PCI_SRIOV_CTRL_MSE) ||
> +             !num_vf || !offset || (num_vf > 1 && !stride) ||
> +             bir >= PCI_SRIOV_NUM_BARS ||
> +             !pdev->vf_rlen[bir] )
> +            return 0;
> +        base = pos + PCI_SRIOV_BAR;
> +        vf -= PCI_BDF(bus, slot, func) + offset;
> +        if ( vf < 0 || (vf && vf % stride) )
> +            return 0;
> +        if ( stride )
> +        {
> +            if ( vf % stride )
> +                return 0;
> +            vf /= stride;
> +        }
> +        if ( vf >= num_vf )
> +            return 0;
> +        BUILD_BUG_ON(ARRAY_SIZE(pdev->vf_rlen) != PCI_SRIOV_NUM_BARS);
> +        disp = vf * pdev->vf_rlen[bir];
> +        limit = PCI_SRIOV_NUM_BARS;
> +    }
> +    else switch ( pci_conf_read8(bus, slot, func, PCI_HEADER_TYPE) & 0x7f )
>      {
>      case PCI_HEADER_TYPE_NORMAL:
>          limit = 6;
> @@ -544,7 +580,7 @@ static u64 read_pci_mem_bar(u8 bus, u8 s
>
>      if ( bir >= limit )
>          return 0;
> -    addr = pci_conf_read32(bus, slot, func, PCI_BASE_ADDRESS_0 + bir * 4);
> +    addr = pci_conf_read32(bus, slot, func, base + bir * 4);
>      if ( (addr & PCI_BASE_ADDRESS_SPACE) == PCI_BASE_ADDRESS_SPACE_IO )
>          return 0;
>      if ( (addr & PCI_BASE_ADDRESS_MEM_TYPE_MASK) == 
> PCI_BASE_ADDRESS_MEM_TYPE_64 )
> @@ -552,11 +588,10 @@ static u64 read_pci_mem_bar(u8 bus, u8 s
>          addr &= PCI_BASE_ADDRESS_MEM_MASK;
>          if ( ++bir >= limit )
>              return 0;
> -        return addr |
> -               ((u64)pci_conf_read32(bus, slot, func,
> -                                     PCI_BASE_ADDRESS_0 + bir * 4) << 32);
> +        return addr + disp +
> +               ((u64)pci_conf_read32(bus, slot, func, base + bir * 4) << 32);
>      }
> -    return addr & PCI_BASE_ADDRESS_MEM_MASK;
> +    return (addr & PCI_BASE_ADDRESS_MEM_MASK) + disp;
>  }
>
>  /**
> @@ -629,11 +664,29 @@ static int msix_capability_init(struct p
>
>      if ( !dev->msix_nr_entries )
>      {
> +        u8 pbus, pslot, pfunc;
> +        int vf;
>          u64 pba_paddr;
>          u32 pba_offset;
>
> +        if ( !dev->info.is_virtfn )
> +        {
> +            pbus = bus;
> +            pslot = slot;
> +            pfunc = func;
> +            vf = -1;
> +        }
> +        else
> +        {
> +            pbus = dev->info.physfn.bus;
> +            pslot = PCI_SLOT(dev->info.physfn.devfn);
> +            pfunc = PCI_FUNC(dev->info.physfn.devfn);
> +            vf = PCI_BDF2(dev->bus, dev->devfn);
> +        }
> +
>          ASSERT(!dev->msix_used_entries);
> -        WARN_ON(msi->table_base != read_pci_mem_bar(bus, slot, func, bir));
> +        WARN_ON(msi->table_base !=
> +                read_pci_mem_bar(pbus, pslot, pfunc, bir, vf));
>
>          dev->msix_nr_entries = nr_entries;
>          dev->msix_table.first = PFN_DOWN(table_paddr);
> @@ -645,7 +698,7 @@ static int msix_capability_init(struct p
>          pba_offset = pci_conf_read32(bus, slot, func,
>                                       msix_pba_offset_reg(pos));
>          bir = (u8)(pba_offset & PCI_MSIX_BIRMASK);
> -        pba_paddr = read_pci_mem_bar(bus, slot, func, bir);
> +        pba_paddr = read_pci_mem_bar(pbus, pslot, pfunc, bir, vf);
>          WARN_ON(!pba_paddr);
>          pba_paddr += pba_offset & ~PCI_MSIX_BIRMASK;
>
> --- a/xen/drivers/passthrough/pci.c
> +++ b/xen/drivers/passthrough/pci.c
> @@ -146,6 +146,7 @@ void pci_enable_acs(struct pci_dev *pdev
>  int pci_add_device(u8 bus, u8 devfn, const struct pci_dev_info *info)
>  {
>      struct pci_dev *pdev;
> +    unsigned int slot = PCI_SLOT(devfn), func = PCI_FUNC(devfn);
>      const char *pdev_type;
>      int ret = -ENOMEM;
>
> @@ -154,7 +155,14 @@ int pci_add_device(u8 bus, u8 devfn, con
>      else if (info->is_extfn)
>          pdev_type = "extended function";
>      else if (info->is_virtfn)
> +    {
> +        spin_lock(&pcidevs_lock);
> +        pdev = pci_get_pdev(info->physfn.bus, info->physfn.devfn);
> +        spin_unlock(&pcidevs_lock);
> +        if ( !pdev )
> +            pci_add_device(info->physfn.bus, info->physfn.devfn, NULL);
>          pdev_type = "virtual function";
> +    }
>      else
>          return -EINVAL;
>
> @@ -165,6 +173,70 @@ int pci_add_device(u8 bus, u8 devfn, con
>
>      if ( info )
>          pdev->info = *info;
> +    else if ( !pdev->vf_rlen[0] )
> +    {
> +        unsigned int pos = pci_find_ext_capability(0, bus, devfn,
> +                                                   PCI_EXT_CAP_ID_SRIOV);
> +        u16 ctrl = pci_conf_read16(bus, slot, func, pos + PCI_SRIOV_CTRL);
> +
> +        if ( !pos )
> +            /* Nothing */;
> +        else if ( !(ctrl & (PCI_SRIOV_CTRL_VFE | PCI_SRIOV_CTRL_MSE)) )
> +        {
> +            unsigned int i;
> +
> +            BUILD_BUG_ON(ARRAY_SIZE(pdev->vf_rlen) != PCI_SRIOV_NUM_BARS);
> +            for ( i = 0; i < PCI_SRIOV_NUM_BARS; ++i )
> +            {
> +                unsigned int idx = pos + PCI_SRIOV_BAR + i * 4;
> +                u32 bar = pci_conf_read32(bus, slot, func, idx);
> +                u32 hi = 0;
> +
> +                if ( (bar & PCI_BASE_ADDRESS_SPACE) ==
> +                     PCI_BASE_ADDRESS_SPACE_IO )
> +                {
> +                    printk(XENLOG_WARNING "SR-IOV device %02x:%02x.%x with 
> vf"
> +                                          " BAR%u in IO space\n",
> +                           bus, slot, func, i);
> +                    continue;
> +                }
> +                pci_conf_write32(bus, slot, func, idx, ~0);
> +                if ( (bar & PCI_BASE_ADDRESS_MEM_TYPE_MASK) ==
> +                     PCI_BASE_ADDRESS_MEM_TYPE_64 )
> +                {
> +                    if ( i >= PCI_SRIOV_NUM_BARS )
> +                    {
> +                        printk(XENLOG_WARNING "SR-IOV device %02x:%02x.%x 
> with"
> +                                              " 64-bit vf BAR in last 
> slot\n",
> +                               bus, slot, func);
> +                        break;
> +                    }
> +                    hi = pci_conf_read32(bus, slot, func, idx + 4);
> +                    pci_conf_write32(bus, slot, func, idx + 4, ~0);
> +                }
> +                pdev->vf_rlen[i] = pci_conf_read32(bus, slot, func, idx) &
> +                                   PCI_BASE_ADDRESS_MEM_MASK;
> +                if ( (bar & PCI_BASE_ADDRESS_MEM_TYPE_MASK) ==
> +                     PCI_BASE_ADDRESS_MEM_TYPE_64 )
> +                {
> +                    pdev->vf_rlen[i] |= (u64)pci_conf_read32(bus, slot, func,
> +                                                             idx + 4) << 32;
> +                    pci_conf_write32(bus, slot, func, idx + 4, hi);
> +                }
> +                else if ( pdev->vf_rlen[i] )
> +                    pdev->vf_rlen[i] |= (u64)~0 << 32;
> +                pci_conf_write32(bus, slot, func, idx, bar);
> +                pdev->vf_rlen[i] = -pdev->vf_rlen[i];
> +                if ( (bar & PCI_BASE_ADDRESS_MEM_TYPE_MASK) ==
> +                     PCI_BASE_ADDRESS_MEM_TYPE_64 )
> +                    ++i;
> +            }
> +        }
> +        else
> +            printk(XENLOG_WARNING "SR-IOV device %02x:%02x.%x has its 
> virtual"
> +                                  " functions already enabled (%04x)\n",
> +                   bus, slot, func, ctrl);
> +    }
>
>      ret = 0;
>      if ( !pdev->domain )
> @@ -184,7 +256,7 @@ int pci_add_device(u8 bus, u8 devfn, con
>  out:
>      spin_unlock(&pcidevs_lock);
>      printk(XENLOG_DEBUG "PCI add %s %02x:%02x.%x\n", pdev_type,
> -           bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
> +           bus, slot, func);
>      return ret;
>  }
>
> --- a/xen/include/xen/pci.h
> +++ b/xen/include/xen/pci.h
> @@ -59,6 +59,7 @@ struct pci_dev {
>      const u8 bus;
>      const u8 devfn;
>      struct pci_dev_info info;
> +    u64 vf_rlen[6];
>  };
>
>  #define for_each_pdev(domain, pdev) \
> --- a/xen/include/xen/pci_regs.h
> +++ b/xen/include/xen/pci_regs.h
> @@ -425,7 +425,7 @@
>  #define PCI_EXT_CAP_ID_ACS     13
>  #define PCI_EXT_CAP_ID_ARI     14
>  #define PCI_EXT_CAP_ID_ATS     15
> -#define PCI_EXT_CAP_ID_IOV     16
> +#define PCI_EXT_CAP_ID_SRIOV   16
>
>  /* Advanced Error Reporting */
>  #define PCI_ERR_UNCOR_STATUS   4       /* Uncorrectable Error Status */
> @@ -545,4 +545,35 @@
>  #define PCI_ACS_CTRL           0x06    /* ACS Control Register */
>  #define PCI_ACS_EGRESS_CTL_V   0x08    /* ACS Egress Control Vector */
>
> +/* Single Root I/O Virtualization */
> +#define PCI_SRIOV_CAP          0x04    /* SR-IOV Capabilities */
> +#define  PCI_SRIOV_CAP_VFM     0x01    /* VF Migration Capable */
> +#define  PCI_SRIOV_CAP_INTR(x) ((x) >> 21) /* Interrupt Message Number */
> +#define PCI_SRIOV_CTRL         0x08    /* SR-IOV Control */
> +#define  PCI_SRIOV_CTRL_VFE    0x01    /* VF Enable */
> +#define  PCI_SRIOV_CTRL_VFM    0x02    /* VF Migration Enable */
> +#define  PCI_SRIOV_CTRL_INTR   0x04    /* VF Migration Interrupt Enable */
> +#define  PCI_SRIOV_CTRL_MSE    0x08    /* VF Memory Space Enable */
> +#define  PCI_SRIOV_CTRL_ARI    0x10    /* ARI Capable Hierarchy */
> +#define PCI_SRIOV_STATUS       0x0a    /* SR-IOV Status */
> +#define  PCI_SRIOV_STATUS_VFM  0x01    /* VF Migration Status */
> +#define PCI_SRIOV_INITIAL_VF   0x0c    /* Initial VFs */
> +#define PCI_SRIOV_TOTAL_VF     0x0e    /* Total VFs */
> +#define PCI_SRIOV_NUM_VF       0x10    /* Number of VFs */
> +#define PCI_SRIOV_FUNC_LINK    0x12    /* Function Dependency Link */
> +#define PCI_SRIOV_VF_OFFSET    0x14    /* First VF Offset */
> +#define PCI_SRIOV_VF_STRIDE    0x16    /* Following VF Stride */
> +#define PCI_SRIOV_VF_DID       0x1a    /* VF Device ID */
> +#define PCI_SRIOV_SUP_PGSIZE   0x1c    /* Supported Page Sizes */
> +#define PCI_SRIOV_SYS_PGSIZE   0x20    /* System Page Size */
> +#define PCI_SRIOV_BAR          0x24    /* VF BAR0 */
> +#define  PCI_SRIOV_NUM_BARS    6       /* Number of VF BARs */
> +#define PCI_SRIOV_VFM          0x3c    /* VF Migration State Array Offset*/
> +#define  PCI_SRIOV_VFM_BIR(x)  ((x) & 7)       /* State BIR */
> +#define  PCI_SRIOV_VFM_OFFSET(x) ((x) & ~7)    /* State Offset */
> +#define  PCI_SRIOV_VFM_UA      0x0     /* Inactive.Unavailable */
> +#define  PCI_SRIOV_VFM_MI      0x1     /* Dormant.MigrateIn */
> +#define  PCI_SRIOV_VFM_MO      0x2     /* Active.MigrateOut */
> +#define  PCI_SRIOV_VFM_AV      0x3     /* Active.Available */
> +
>  #endif /* LINUX_PCI_REGS_H */
>
>

-- 
Andrew Cooper - Dom0 Kernel Engineer, Citrix XenServer
T: +44 (0)1223 225 900, http://www.citrix.com


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.