x86/vMSI: miscellaneous fixes This addresses a number of problems in msixtbl_{read,write}(): - address alignment was not checked, allowing for memory corruption in the hypervisor (write case) or returning of hypervisor private data to the guest (read case) - the interrupt mask bit was permitted to be written by the guest (while Xen's interrupt flow control routines need to control it) - MAX_MSIX_TABLE_{ENTRIES,PAGES} were pointlessly defined to plain numbers (making it unobvious why they have these values, and making the latter non-portable) - MAX_MSIX_TABLE_PAGES was also off by one (failing to account for a non-zero table offset); this was also affecting host MSI-X code - struct msixtbl_entry's table_flags[] was one element larger than necessary due to improper open-coding of BITS_TO_LONGS() - msixtbl_read() unconditionally accessed the physical table, even though the data was only needed in a quarter of all cases - various calculations were done unnecessarily for both of the rather distinct code paths in msixtbl_read() Additionally it is unclear on what basis MAX_MSIX_ACC_ENTRIES was chosen to be 3. Signed-off-by: Jan Beulich --- a/xen/arch/x86/hvm/vmsi.c +++ b/xen/arch/x86/hvm/vmsi.c @@ -165,7 +165,7 @@ struct msixtbl_entry struct pci_dev *pdev; unsigned long gtable; /* gpa of msix table */ unsigned long table_len; - unsigned long table_flags[MAX_MSIX_TABLE_ENTRIES / BITS_PER_LONG + 1]; + unsigned long table_flags[BITS_TO_LONGS(MAX_MSIX_TABLE_ENTRIES)]; #define MAX_MSIX_ACC_ENTRIES 3 struct { uint32_t msi_ad[3]; /* Shadow of address low, high and data */ @@ -192,17 +192,14 @@ static struct msixtbl_entry *msixtbl_fin static void __iomem *msixtbl_addr_to_virt( struct msixtbl_entry *entry, unsigned long addr) { - int idx, nr_page; + unsigned int idx, nr_page; - if ( !entry ) + if ( !entry || !entry->pdev ) return NULL; nr_page = (addr >> PAGE_SHIFT) - (entry->gtable >> PAGE_SHIFT); - if ( !entry->pdev ) - return NULL; - idx = entry->pdev->msix_table_idx[nr_page]; if ( !idx ) return NULL; @@ -215,37 +212,34 @@ static int msixtbl_read( struct vcpu *v, unsigned long address, unsigned long len, unsigned long *pval) { - unsigned long offset, val; + unsigned long offset; struct msixtbl_entry *entry; void *virt; - int nr_entry, index; + unsigned int nr_entry, index; int r = X86EMUL_UNHANDLEABLE; - rcu_read_lock(&msixtbl_rcu_lock); + if ( len != 4 || (address & 3) ) + return r; - if ( len != 4 ) - goto out; + rcu_read_lock(&msixtbl_rcu_lock); entry = msixtbl_find_entry(v, address); - virt = msixtbl_addr_to_virt(entry, address); - if ( !virt ) - goto out; - - nr_entry = (address - entry->gtable) / PCI_MSIX_ENTRY_SIZE; offset = address & (PCI_MSIX_ENTRY_SIZE - 1); - if ( nr_entry >= MAX_MSIX_ACC_ENTRIES && - offset != PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET ) - goto out; - val = readl(virt); if ( offset != PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET ) { + nr_entry = (address - entry->gtable) / PCI_MSIX_ENTRY_SIZE; + if ( nr_entry >= MAX_MSIX_ACC_ENTRIES ) + goto out; index = offset / sizeof(uint32_t); *pval = entry->gentries[nr_entry].msi_ad[index]; } else { - *pval = val; + virt = msixtbl_addr_to_virt(entry, address); + if ( !virt ) + goto out; + *pval = readl(virt); } r = X86EMUL_OKAY; @@ -260,13 +254,13 @@ static int msixtbl_write(struct vcpu *v, unsigned long offset; struct msixtbl_entry *entry; void *virt; - int nr_entry, index; + unsigned int nr_entry, index; int r = X86EMUL_UNHANDLEABLE; - rcu_read_lock(&msixtbl_rcu_lock); + if ( len != 4 || (address & 3) ) + return r; - if ( len != 4 ) - goto out; + rcu_read_lock(&msixtbl_rcu_lock); entry = msixtbl_find_entry(v, address); nr_entry = (address - entry->gtable) / PCI_MSIX_ENTRY_SIZE; @@ -291,9 +285,22 @@ static int msixtbl_write(struct vcpu *v, if ( !virt ) goto out; + /* Do not allow the mask bit to be changed. */ +#if 0 /* XXX + * As the mask bit is the only defined bit in the word, and as the + * host MSI-X code doesn't preserve the other bits anyway, doing + * this is pointless. So for now just discard the write (also + * saving us from having to determine the matching irq_desc). + */ + spin_lock_irqsave(&desc->lock, flags); + orig = readl(virt); + val &= ~PCI_MSIX_VECTOR_BITMASK; + val |= orig & PCI_MSIX_VECTOR_BITMASK; writel(val, virt); - r = X86EMUL_OKAY; + spin_unlock_irqrestore(&desc->lock, flags); +#endif + r = X86EMUL_OKAY; out: rcu_read_unlock(&msixtbl_rcu_lock); return r; --- a/xen/include/asm-x86/msi.h +++ b/xen/include/asm-x86/msi.h @@ -122,12 +122,6 @@ int msi_free_irq(struct msi_desc *entry) */ #define NR_HP_RESERVED_VECTORS 20 -#define PCI_MSIX_ENTRY_SIZE 16 -#define PCI_MSIX_ENTRY_LOWER_ADDR_OFFSET 0 -#define PCI_MSIX_ENTRY_UPPER_ADDR_OFFSET 4 -#define PCI_MSIX_ENTRY_DATA_OFFSET 8 -#define PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET 12 - #define msi_control_reg(base) (base + PCI_MSI_FLAGS) #define msi_lower_address_reg(base) (base + PCI_MSI_ADDRESS_LO) #define msi_upper_address_reg(base) (base + PCI_MSI_ADDRESS_HI) --- a/xen/include/xen/pci.h +++ b/xen/include/xen/pci.h @@ -11,6 +11,8 @@ #include #include #include +#include +#include #include /* @@ -30,8 +32,10 @@ #define PCI_BDF(b,d,f) ((((b) & 0xff) << 8) | PCI_DEVFN(d,f)) #define PCI_BDF2(b,df) ((((b) & 0xff) << 8) | ((df) & 0xff)) -#define MAX_MSIX_TABLE_ENTRIES 2048 -#define MAX_MSIX_TABLE_PAGES 8 +#define MAX_MSIX_TABLE_ENTRIES (PCI_MSIX_FLAGS_QSIZE + 1) +#define MAX_MSIX_TABLE_PAGES PFN_UP(MAX_MSIX_TABLE_ENTRIES * \ + PCI_MSIX_ENTRY_SIZE + \ + (~PCI_MSIX_BIRMASK & (PAGE_SIZE - 1))) struct pci_dev_info { bool_t is_extfn; bool_t is_virtfn; --- a/xen/include/xen/pci_regs.h +++ b/xen/include/xen/pci_regs.h @@ -305,6 +305,12 @@ #define PCI_MSIX_PBA 8 #define PCI_MSIX_BIRMASK (7 << 0) +#define PCI_MSIX_ENTRY_SIZE 16 +#define PCI_MSIX_ENTRY_LOWER_ADDR_OFFSET 0 +#define PCI_MSIX_ENTRY_UPPER_ADDR_OFFSET 4 +#define PCI_MSIX_ENTRY_DATA_OFFSET 8 +#define PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET 12 + #define PCI_MSIX_VECTOR_BITMASK (1 << 0) /* CompactPCI Hotswap Register */