[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [PATCH] x86/vMSI: miscellaneous fixes



>>> On 20.01.12 at 17:33, "Jan Beulich" <JBeulich@xxxxxxxx> wrote:
> This addresses a number of problems in msixtbl_{read,write}():
> - address alignment was not checked, allowing for memory corruption in
>   the hypervisor (write case) or returning of hypervisor private data
>   to the guest (read case)
> - the interrupt mask bit was permitted to be written by the guest
>   (while Xen's interrupt flow control routines need to control it)
> - MAX_MSIX_TABLE_{ENTRIES,PAGES} were pointlessly defined to plain
>   numbers (making it unobvious why they have these values, and making
>   the latter non-portable)
> - MAX_MSIX_TABLE_PAGES was also off by one (failing to account for a
>   non-zero table offset); this was also affecting host MSI-X code
> - struct msixtbl_entry's table_flags[] was one element larger than
>   necessary due to improper open-coding of BITS_TO_LONGS()
> - msixtbl_read() unconditionally accessed the physical table, even
>   though the data was only needed in a quarter of all cases
> - various calculations were done unnecessarily for both of the rather
>   distinct code paths in msixtbl_read()

I should note (as done in the earlier response to the 4.2 todo thread)
that I have no way of actually testing this, so I'm relying on others
here. (Intel? Or, Ian, would the stage tester find issues with passed
through MSI-X capable devices?)

Jan

> Additionally it is unclear on what basis MAX_MSIX_ACC_ENTRIES was
> chosen to be 3.
> 
> Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
> 
> --- a/xen/arch/x86/hvm/vmsi.c
> +++ b/xen/arch/x86/hvm/vmsi.c
> @@ -165,7 +165,7 @@ struct msixtbl_entry
>      struct pci_dev *pdev;
>      unsigned long gtable;       /* gpa of msix table */
>      unsigned long table_len;
> -    unsigned long table_flags[MAX_MSIX_TABLE_ENTRIES / BITS_PER_LONG + 1];
> +    unsigned long table_flags[BITS_TO_LONGS(MAX_MSIX_TABLE_ENTRIES)];
>  #define MAX_MSIX_ACC_ENTRIES 3
>      struct { 
>          uint32_t msi_ad[3];  /* Shadow of address low, high and data */
> @@ -192,17 +192,14 @@ static struct msixtbl_entry *msixtbl_fin
>  static void __iomem *msixtbl_addr_to_virt(
>      struct msixtbl_entry *entry, unsigned long addr)
>  {
> -    int idx, nr_page;
> +    unsigned int idx, nr_page;
>  
> -    if ( !entry )
> +    if ( !entry || !entry->pdev )
>          return NULL;
>  
>      nr_page = (addr >> PAGE_SHIFT) -
>                (entry->gtable >> PAGE_SHIFT);
>  
> -    if ( !entry->pdev )
> -        return NULL;
> -
>      idx = entry->pdev->msix_table_idx[nr_page];
>      if ( !idx )
>          return NULL;
> @@ -215,37 +212,34 @@ static int msixtbl_read(
>      struct vcpu *v, unsigned long address,
>      unsigned long len, unsigned long *pval)
>  {
> -    unsigned long offset, val;
> +    unsigned long offset;
>      struct msixtbl_entry *entry;
>      void *virt;
> -    int nr_entry, index;
> +    unsigned int nr_entry, index;
>      int r = X86EMUL_UNHANDLEABLE;
>  
> -    rcu_read_lock(&msixtbl_rcu_lock);
> +    if ( len != 4 || (address & 3) )
> +        return r;
>  
> -    if ( len != 4 )
> -        goto out;
> +    rcu_read_lock(&msixtbl_rcu_lock);
>  
>      entry = msixtbl_find_entry(v, address);
> -    virt = msixtbl_addr_to_virt(entry, address);
> -    if ( !virt )
> -        goto out;
> -
> -    nr_entry = (address - entry->gtable) / PCI_MSIX_ENTRY_SIZE;
>      offset = address & (PCI_MSIX_ENTRY_SIZE - 1);
> -    if ( nr_entry >= MAX_MSIX_ACC_ENTRIES && 
> -         offset != PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET )
> -        goto out;
>  
> -    val = readl(virt);
>      if ( offset != PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET )
>      {
> +        nr_entry = (address - entry->gtable) / PCI_MSIX_ENTRY_SIZE;
> +        if ( nr_entry >= MAX_MSIX_ACC_ENTRIES )
> +            goto out;
>          index = offset / sizeof(uint32_t);
>          *pval = entry->gentries[nr_entry].msi_ad[index];
>      }
>      else 
>      {
> -        *pval = val;
> +        virt = msixtbl_addr_to_virt(entry, address);
> +        if ( !virt )
> +            goto out;
> +        *pval = readl(virt);
>      }
>      
>      r = X86EMUL_OKAY;
> @@ -260,13 +254,13 @@ static int msixtbl_write(struct vcpu *v,
>      unsigned long offset;
>      struct msixtbl_entry *entry;
>      void *virt;
> -    int nr_entry, index;
> +    unsigned int nr_entry, index;
>      int r = X86EMUL_UNHANDLEABLE;
>  
> -    rcu_read_lock(&msixtbl_rcu_lock);
> +    if ( len != 4 || (address & 3) )
> +        return r;
>  
> -    if ( len != 4 )
> -        goto out;
> +    rcu_read_lock(&msixtbl_rcu_lock);
>  
>      entry = msixtbl_find_entry(v, address);
>      nr_entry = (address - entry->gtable) / PCI_MSIX_ENTRY_SIZE;
> @@ -291,9 +285,22 @@ static int msixtbl_write(struct vcpu *v,
>      if ( !virt )
>          goto out;
>  
> +    /* Do not allow the mask bit to be changed. */
> +#if 0 /* XXX
> +       * As the mask bit is the only defined bit in the word, and as the
> +       * host MSI-X code doesn't preserve the other bits anyway, doing
> +       * this is pointless. So for now just discard the write (also
> +       * saving us from having to determine the matching irq_desc).
> +       */
> +    spin_lock_irqsave(&desc->lock, flags);
> +    orig = readl(virt);
> +    val &= ~PCI_MSIX_VECTOR_BITMASK;
> +    val |= orig & PCI_MSIX_VECTOR_BITMASK;
>      writel(val, virt);
> -    r = X86EMUL_OKAY;
> +    spin_unlock_irqrestore(&desc->lock, flags);
> +#endif
>  
> +    r = X86EMUL_OKAY;
>  out:
>      rcu_read_unlock(&msixtbl_rcu_lock);
>      return r;
> --- a/xen/include/asm-x86/msi.h
> +++ b/xen/include/asm-x86/msi.h
> @@ -122,12 +122,6 @@ int msi_free_irq(struct msi_desc *entry)
>   */
>  #define NR_HP_RESERVED_VECTORS       20
>  
> -#define PCI_MSIX_ENTRY_SIZE                  16
> -#define  PCI_MSIX_ENTRY_LOWER_ADDR_OFFSET    0
> -#define  PCI_MSIX_ENTRY_UPPER_ADDR_OFFSET    4
> -#define  PCI_MSIX_ENTRY_DATA_OFFSET          8
> -#define  PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET   12
> -
>  #define msi_control_reg(base)                (base + PCI_MSI_FLAGS)
>  #define msi_lower_address_reg(base)  (base + PCI_MSI_ADDRESS_LO)
>  #define msi_upper_address_reg(base)  (base + PCI_MSI_ADDRESS_HI)
> --- a/xen/include/xen/pci.h
> +++ b/xen/include/xen/pci.h
> @@ -11,6 +11,8 @@
>  #include <xen/list.h>
>  #include <xen/spinlock.h>
>  #include <xen/irq.h>
> +#include <xen/pci_regs.h>
> +#include <xen/pfn.h>
>  #include <asm/pci.h>
>  
>  /*
> @@ -30,8 +32,10 @@
>  #define PCI_BDF(b,d,f)  ((((b) & 0xff) << 8) | PCI_DEVFN(d,f))
>  #define PCI_BDF2(b,df)  ((((b) & 0xff) << 8) | ((df) & 0xff))
>  
> -#define MAX_MSIX_TABLE_ENTRIES  2048
> -#define MAX_MSIX_TABLE_PAGES    8
> +#define MAX_MSIX_TABLE_ENTRIES  (PCI_MSIX_FLAGS_QSIZE + 1)
> +#define MAX_MSIX_TABLE_PAGES    PFN_UP(MAX_MSIX_TABLE_ENTRIES * \
> +                                       PCI_MSIX_ENTRY_SIZE + \
> +                                       (~PCI_MSIX_BIRMASK & (PAGE_SIZE - 1)))
>  struct pci_dev_info {
>      bool_t is_extfn;
>      bool_t is_virtfn;
> --- a/xen/include/xen/pci_regs.h
> +++ b/xen/include/xen/pci_regs.h
> @@ -305,6 +305,12 @@
>  #define PCI_MSIX_PBA         8
>  #define  PCI_MSIX_BIRMASK    (7 << 0)
>  
> +#define PCI_MSIX_ENTRY_SIZE                  16
> +#define  PCI_MSIX_ENTRY_LOWER_ADDR_OFFSET    0
> +#define  PCI_MSIX_ENTRY_UPPER_ADDR_OFFSET    4
> +#define  PCI_MSIX_ENTRY_DATA_OFFSET          8
> +#define  PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET   12
> +
>  #define PCI_MSIX_VECTOR_BITMASK      (1 << 0)
>  
>  /* CompactPCI Hotswap Register */



_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.