|
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [PATCH v3 3/4] x86/hvm: Allow writes to registers on the same page as MSI-X table
On 06.04.2023 05:57, Marek Marczykowski-Górecki wrote:
> --- a/xen/arch/x86/hvm/vmsi.c
> +++ b/xen/arch/x86/hvm/vmsi.c
> @@ -181,15 +181,21 @@ static bool msixtbl_initialised(const struct domain *d)
> }
>
> static struct msixtbl_entry *msixtbl_find_entry(
> - struct vcpu *v, unsigned long addr)
> + struct vcpu *v, unsigned long addr, bool same_page)
> {
> struct msixtbl_entry *entry;
> struct domain *d = v->domain;
>
> list_for_each_entry( entry, &d->arch.hvm.msixtbl_list, list )
> + {
> + if ( same_page &&
> + PFN_DOWN(addr) >= PFN_DOWN(entry->gtable) &&
> + PFN_DOWN(addr) <= PFN_DOWN(entry->gtable + entry->table_len) )
This will allow access to the subsequent page in case entry->gtable +
entry->table_len falls on a page boundary. I think you mean "< PFN_UP()"
or "<= PFN_DOWN(... - 1)".
But I anyway don't understand why you need both this and ...
> + return entry;
> if ( addr >= entry->gtable &&
> addr < entry->gtable + entry->table_len )
> return entry;
... this (and the new function parameter). Again like in the recently
added vPCI code, all you should need ought to be the new code you add
(see msix.c:msix_find()), with the caller then separating "in table" vs
"other space". Only one of the four callers really cares about _just_
the table range. In no event do you need to do both pairs of checks:
if ( same_page
? PFN_DOWN(addr) >= PFN_DOWN(entry->gtable) &&
...
: ...
(i.e. in case you disagree with moving the original check to the sole
place where it's needed).
> @@ -213,6 +219,144 @@ static struct msi_desc *msixtbl_addr_to_desc(
> return NULL;
> }
>
> +/*
> + * Returns:
> + * - UINT_MAX if no handling should be done
> + * - UINT_MAX-1 if write should be discarded
> + * - a fixmap idx to use for handling
> + */
> +#define ADJACENT_DONT_HANDLE UINT_MAX
> +#define ADJACENT_DISCARD_WRITE (UINT_MAX - 1)
> +static unsigned int adjacent_handle(
> + const struct msixtbl_entry *entry, unsigned long addr, bool write)
> +{
> + unsigned int adj_type;
> +
> + if ( !entry || !entry->pdev )
> + return ADJACENT_DONT_HANDLE;
> +
> + if ( PFN_DOWN(addr) == PFN_DOWN(entry->gtable) && addr < entry->gtable )
> + adj_type = ADJ_IDX_FIRST;
> + else if ( PFN_DOWN(addr) == PFN_DOWN(entry->gtable + entry->table_len -
> 1) &&
> + addr >= entry->gtable + entry->table_len )
> + adj_type = ADJ_IDX_LAST;
> + else
> + return ADJACENT_DONT_HANDLE;
> +
> + ASSERT(entry->pdev->msix);
Considering the code below, you probably want to latch this pointer into
a local variable.
> + if ( !entry->pdev->msix->adj_access_table_idx[adj_type] )
> + {
> + gprintk(XENLOG_WARNING,
> + "Page for adjacent(%d) MSI-X table access not initialized
> for %pp (addr %#lx, gtable %#lx\n",
> + adj_type, &entry->pdev->sbdf, addr, entry->gtable);
> +
> + return ADJACENT_DONT_HANDLE;
> + }
> +
> + /* If PBA lives on the same page too, discard writes. */
> + if ( write && (
Nit: Style (no standalone opening parenthesis ought to be last on a line).
> + (adj_type == ADJ_IDX_LAST &&
> + entry->pdev->msix->table.last == entry->pdev->msix->pba.first) ||
> + (adj_type == ADJ_IDX_FIRST &&
> + entry->pdev->msix->table.first == entry->pdev->msix->pba.last)) )
And these all need indenting by one more blank.
> + {
> + gprintk(XENLOG_WARNING,
> + "MSI-X table and PBA of %pp live on the same page, "
> + "writing to other registers there is not implemented\n",
> + &entry->pdev->sbdf);
Whereas here a blank needs dropping. But - don't you discard writes to
more than just the PBA here?
> + return ADJACENT_DISCARD_WRITE;
> + }
> +
> + return entry->pdev->msix->adj_access_table_idx[adj_type];
> +}
> +
> +static int adjacent_read(
> + unsigned int fixmap_idx,
> + uint64_t address, uint32_t len, uint64_t *pval)
Nit: Style (too deep indentation). Also while I'm fine with the two
uint64_t (arguably the former may want to be paddr_t), the uint32_t
clearly isn't needed here, and wants to be unsigned int.
> +{
> + const void __iomem *hwaddr;
> +
> + *pval = ~0UL;
> +
> + if ( !IS_ALIGNED(address, len) )
> + {
> + gdprintk(XENLOG_WARNING,
> + "Dropping unaligned read from MSI-X table page at %" PRIx64
> "\n",
> + address);
Here indentation is one too little again.
> + return X86EMUL_OKAY;
> + }
> +
> + ASSERT(fixmap_idx != ADJACENT_DISCARD_WRITE);
> +
> + hwaddr = fix_to_virt(fixmap_idx) + PAGE_OFFSET(address);
> +
> + switch ( len )
> + {
> + case 1:
> + *pval = readb(hwaddr);
> + break;
> +
> + case 2:
> + *pval = readw(hwaddr);
> + break;
> +
> + case 4:
> + *pval = readl(hwaddr);
> + break;
> +
> + case 8:
> + *pval = readq(hwaddr);
> + break;
> +
> + default:
> + ASSERT_UNREACHABLE();
> + }
> + return X86EMUL_OKAY;
> +}
> +
> +static int adjacent_write(
> + unsigned int fixmap_idx,
> + uint64_t address, uint32_t len, uint64_t val)
> +{
> + void __iomem *hwaddr;
> +
> + if ( !IS_ALIGNED(address, len) )
> + {
> + gdprintk(XENLOG_WARNING,
> + "Dropping unaligned write to MSI-X table page at %" PRIx64
> "\n",
> + address);
> + return X86EMUL_OKAY;
> + }
> +
> + if ( fixmap_idx == ADJACENT_DISCARD_WRITE )
> + return X86EMUL_OKAY;
> +
> + hwaddr = fix_to_virt(fixmap_idx) + PAGE_OFFSET(address);
> +
> + switch ( len ) {
Nit: Style (brace placement).
> @@ -220,16 +364,27 @@ static int cf_check msixtbl_read(
> unsigned long offset;
> struct msixtbl_entry *entry;
> unsigned int nr_entry, index;
> + unsigned int adjacent_fixmap;
> int r = X86EMUL_UNHANDLEABLE;
>
> - if ( (len != 4 && len != 8) || (address & (len - 1)) )
> + if ( !IS_ALIGNED(address, len) )
> return r;
>
> rcu_read_lock(&msixtbl_rcu_lock);
> -
> - entry = msixtbl_find_entry(current, address);
> + entry = msixtbl_find_entry(current, address, true);
> if ( !entry )
> goto out;
> +
> + adjacent_fixmap = adjacent_handle(entry, address, false);
> + if ( adjacent_fixmap != ADJACENT_DONT_HANDLE )
> + {
> + r = adjacent_read(adjacent_fixmap, address, len, pval);
> + goto out;
> + }
adjacent_handle() may return ADJACENT_DONT_HANDLE for reasons other than
the address not being in the expected ranges, so you can't blindly fall
through to the original code just yet (same in functions further down).
> @@ -375,14 +543,23 @@ static bool cf_check msixtbl_range(
> {
> struct vcpu *curr = current;
> unsigned long addr = r->addr;
> - const struct msi_desc *desc;
> + struct msixtbl_entry *entry;
const?
> + const struct msi_desc *desc = NULL;
> + unsigned int adjacent_fixmap;
> +
>
Nit: Please don't introduce double blank lines.
> --- a/xen/arch/x86/include/asm/msi.h
> +++ b/xen/arch/x86/include/asm/msi.h
> @@ -207,6 +207,10 @@ struct msg_address {
> PCI_MSIX_ENTRY_SIZE + \
> (~PCI_MSIX_BIRMASK & (PAGE_SIZE - 1)))
>
> +/* indexes in adj_access_table_idx[] below */
> +#define ADJ_IDX_FIRST 0
> +#define ADJ_IDX_LAST 1
> +
> struct arch_msix {
> unsigned int nr_entries, used_entries;
> struct {
> @@ -214,6 +218,7 @@ struct arch_msix {
> } table, pba;
> int table_refcnt[MAX_MSIX_TABLE_PAGES];
> int table_idx[MAX_MSIX_TABLE_PAGES];
> + int adj_access_table_idx[2];
adjacent_handle() returns unsigned int - why is it plain int here?
Also, to keep variable name length somewhat limited, I don't think "table"
adds much value to the name here.
> --- a/xen/arch/x86/msi.c
> +++ b/xen/arch/x86/msi.c
> @@ -961,6 +961,34 @@ static int msix_capability_init(struct pci_dev *dev,
> domain_crash(d);
> /* XXX How to deal with existing mappings? */
> }
> +
> + /*
> + * If the MSI-X table doesn't start at the page boundary, map the
> first page for
> + * passthrough accesses.
> + */
> + if ( PAGE_OFFSET(table_paddr) )
> + {
> + int idx = msix_get_fixmap(msix, table_paddr, table_paddr);
> +
> + if ( idx > 0 )
> + msix->adj_access_table_idx[ADJ_IDX_FIRST] = idx;
> + else
> + gprintk(XENLOG_ERR, "Failed to map first MSI-X table page:
> %d\n", idx);
> + }
> + /*
> + * If the MSI-X table doesn't span full page(s), map the last page
> for
> + * passthrough accesses.
> + */
> + if ( PAGE_OFFSET(table_paddr + msix->nr_entries *
> PCI_MSIX_ENTRY_SIZE) )
> + {
> + uint64_t entry_paddr = table_paddr + msix->nr_entries *
> PCI_MSIX_ENTRY_SIZE;
> + int idx = msix_get_fixmap(msix, table_paddr, entry_paddr);
> +
> + if ( idx > 0 )
> + msix->adj_access_table_idx[ADJ_IDX_LAST] = idx;
> + else
> + gprintk(XENLOG_ERR, "Failed to map last MSI-X table page:
> %d\n", idx);
> + }
Wouldn't this better be delayed until / restricted to the device actually
being prepared for guest use? Which may be as simple as making all of this
an "else" to the earlier if()?
Also I think the 2nd comment is somewhat misleading - you don't really mean
"spans full pages" but, along the lines of the first one, "ends on a page
boundary".
Jan
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |