|
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [PATCH v3 3/7] vpci: Use pervcpu ranges for BAR mapping
On Thu, Apr 09, 2026 at 02:01:33PM +0000, Mykyta Poturai wrote:
> From: Mykyta Poturai <Mykyta_Poturai@xxxxxxxx>
>
> There is no need to store ranges for each PCI device, as they are only
> used during the mapping/unmapping process and can be reused for each
> device. This also allows to avoid the need to allocate and destroy
> rangesets for each device.
>
> Move the rangesets from struct vpci_bar to struct vpci_vcpu and perform
> (de-)allocation with vcpu (de-)allocation. Introduce RANGESET_DESTROY()
> macro to free a rangeset and set the pointer to NULL.
While this obviously has advantatges when multiple PCI devices are
assigned to a single domain (ie: for the hardware domain), it's
actually consuming more memory in the domU case if a single device is
assigned to the domain, and it has more than one vCPU.
I'm not saying it's bad per-se, but it's not obviously such an
improvement IMO. With the current approach we know Xen will allocate
an amount of rangesets that's bound to the amount of PCI devices
(nr_rangesets = nr_pci_devs * NR_BARS), but with this approach the
amount of allocated rangesets depends on the number of vCPUs, and
hence it's no longer bound by the hardware anymore.
I assume that this will change further to support SR-IOV devices that
might have even more VF BARs per PF, so I have to look at further
patches.
> Amends: 622bdd962822 ("vpci/header: handle p2m range sets per BAR")
> Signed-off-by: Mykyta Poturai <mykyta_poturai@xxxxxxxx>
> Signed-off-by: Stewart Hildebrand <stewart.hildebrand@xxxxxxx>
> ---
> v2->v3:
> * synced with BAR write with memory decoding enabled series[1]
> * add Amends tag
> * remove unused variable i due to rebasing over 998060dd9101 ("vPCI:
> move vpci_init_capabilities() to a separate file")
> * enclose entire struct vpci_vcpu inside #ifdef __XEN__
> * s/bar_mem/mem/
> * use ARRAY_SIZE
> * put init/destroy in functions
> * only allocate for domains with vPCI and idle domain
> * replace 'if ( !mem ) continue;' with ASSERT
>
> v1->v2
> * new patch
>
> [1]:
> https://patchew.org/Xen/20260406191203.97662-1-stewart.hildebrand@xxxxxxx/
> ---
> xen/common/domain.c | 5 +++
> xen/drivers/vpci/header.c | 67 ++++++++++++++------------------------
> xen/drivers/vpci/vpci.c | 34 ++++++++++++++++---
> xen/include/xen/rangeset.h | 7 ++++
> xen/include/xen/vpci.h | 10 ++++--
> 5 files changed, 73 insertions(+), 50 deletions(-)
>
> diff --git a/xen/common/domain.c b/xen/common/domain.c
> index bb9e210c28..5ef7db8f09 100644
> --- a/xen/common/domain.c
> +++ b/xen/common/domain.c
> @@ -455,6 +455,8 @@ static int vcpu_teardown(struct vcpu *v)
> */
> static void vcpu_destroy(struct vcpu *v)
> {
> + vpci_vcpu_destroy(v);
> +
> free_vcpu_struct(v);
> }
>
> @@ -512,6 +514,9 @@ struct vcpu *vcpu_create(struct domain *d, unsigned int
> vcpu_id)
> if ( arch_vcpu_create(v) != 0 )
> goto fail_sched;
>
> + if ( vpci_vcpu_init(v) )
> + goto fail_sched;
> +
> d->vcpu[vcpu_id] = v;
> if ( vcpu_id != 0 )
> {
> diff --git a/xen/drivers/vpci/header.c b/xen/drivers/vpci/header.c
> index 5d5ba5c016..5bfb541b6a 100644
> --- a/xen/drivers/vpci/header.c
> +++ b/xen/drivers/vpci/header.c
> @@ -196,6 +196,7 @@ bool vpci_process_pending(struct vcpu *v)
> for ( i = 0; i < ARRAY_SIZE(header->bars); i++ )
> {
> struct vpci_bar *bar = &header->bars[i];
> + struct rangeset *mem = v->vpci.mem[i];
> struct map_data data = {
> .d = v->domain,
> .map = v->vpci.cmd & PCI_COMMAND_MEMORY,
> @@ -203,10 +204,10 @@ bool vpci_process_pending(struct vcpu *v)
> };
> int rc;
>
> - if ( rangeset_is_empty(bar->mem) )
> + if ( rangeset_is_empty(mem) )
> continue;
>
> - rc = rangeset_consume_ranges(bar->mem, map_range, &data);
> + rc = rangeset_consume_ranges(mem, map_range, &data);
>
> if ( rc == -ERESTART )
> {
> @@ -224,8 +225,8 @@ bool vpci_process_pending(struct vcpu *v)
>
> /* Clean all the rangesets */
> for ( i = 0; i < ARRAY_SIZE(header->bars); i++ )
> - if ( !rangeset_is_empty(header->bars[i].mem) )
> - rangeset_purge(header->bars[i].mem);
> + if ( !rangeset_is_empty(v->vpci.mem[i]) )
> + rangeset_purge(v->vpci.mem[i]);
>
> v->vpci.pdev = NULL;
>
> @@ -260,13 +261,14 @@ static int __init apply_map(struct domain *d, const
> struct pci_dev *pdev,
> for ( i = 0; i < ARRAY_SIZE(header->bars); i++ )
> {
> struct vpci_bar *bar = &header->bars[i];
> + struct rangeset *mem = current->vpci.mem[i];
> struct map_data data = { .d = d, .map = true, .bar = bar };
>
> - if ( rangeset_is_empty(bar->mem) )
> + if ( rangeset_is_empty(mem) )
> continue;
>
> - while ( (rc = rangeset_consume_ranges(bar->mem, map_range,
> - &data)) == -ERESTART )
> + while ( (rc = rangeset_consume_ranges(mem, map_range, &data)) ==
> + -ERESTART )
> {
> /*
> * It's safe to drop and reacquire the lock in this context
> @@ -331,13 +333,13 @@ int vpci_modify_bars(const struct pci_dev *pdev,
> uint16_t cmd, bool rom_only)
> for ( i = 0; i < ARRAY_SIZE(header->bars); i++ )
> {
> struct vpci_bar *bar = &header->bars[i];
> + struct rangeset *mem = current->vpci.mem[i];
> unsigned long start = PFN_DOWN(bar->addr);
> unsigned long end = PFN_DOWN(bar->addr + bar->size - 1);
> unsigned long start_guest = PFN_DOWN(bar->guest_addr);
> unsigned long end_guest = PFN_DOWN(bar->guest_addr + bar->size - 1);
>
> - if ( !bar->mem )
> - continue;
> + ASSERT(mem);
>
> if ( !MAPPABLE_BAR(bar) ||
> (rom_only ? bar->type != VPCI_BAR_ROM
> @@ -354,7 +356,7 @@ int vpci_modify_bars(const struct pci_dev *pdev, uint16_t
> cmd, bool rom_only)
> continue;
> }
>
> - ASSERT(rangeset_is_empty(bar->mem));
> + ASSERT(rangeset_is_empty(mem));
>
> /*
> * Make sure that the guest set address has the same page offset
> @@ -369,7 +371,7 @@ int vpci_modify_bars(const struct pci_dev *pdev, uint16_t
> cmd, bool rom_only)
> return -EINVAL;
> }
>
> - rc = rangeset_add_range(bar->mem, start_guest, end_guest);
> + rc = rangeset_add_range(mem, start_guest, end_guest);
> if ( rc )
> {
> printk(XENLOG_G_WARNING "Failed to add [%lx, %lx]: %d\n",
> @@ -380,12 +382,12 @@ int vpci_modify_bars(const struct pci_dev *pdev,
> uint16_t cmd, bool rom_only)
> /* Check for overlap with the already setup BAR ranges. */
> for ( j = 0; j < i; j++ )
> {
> - struct vpci_bar *prev_bar = &header->bars[j];
> + struct rangeset *prev_mem = current->vpci.mem[j];
>
> - if ( rangeset_is_empty(prev_bar->mem) )
> + if ( rangeset_is_empty(prev_mem) )
> continue;
>
> - rc = rangeset_remove_range(prev_bar->mem, start_guest,
> end_guest);
> + rc = rangeset_remove_range(prev_mem, start_guest, end_guest);
> if ( rc )
> {
> gprintk(XENLOG_WARNING,
> @@ -395,7 +397,7 @@ int vpci_modify_bars(const struct pci_dev *pdev, uint16_t
> cmd, bool rom_only)
> }
> }
>
> - rc = pci_sanitize_bar_memory(bar->mem);
> + rc = pci_sanitize_bar_memory(mem);
> if ( rc )
> {
> gprintk(XENLOG_WARNING,
> @@ -412,14 +414,14 @@ int vpci_modify_bars(const struct pci_dev *pdev,
> uint16_t cmd, bool rom_only)
> unsigned long end = PFN_DOWN(vmsix_table_addr(pdev->vpci, i) +
> vmsix_table_size(pdev->vpci, i) - 1);
>
> - for ( j = 0; j < ARRAY_SIZE(header->bars); j++ )
> + for ( j = 0; j < ARRAY_SIZE(current->vpci.mem); j++ )
You make a non-trivial use of current in vpci_modify_bars(), maybe you
should consider introducing a local variable for it:
struct *vcpu curr = current;
current expands to a call to get_cpu_info(9, which is better to avoid
doing repeatedly, specially in the context above which is used as a
loop upper bound.
> {
> - const struct vpci_bar *bar = &header->bars[j];
> + struct rangeset *mem = current->vpci.mem[j];
>
> - if ( rangeset_is_empty(bar->mem) )
> + if ( rangeset_is_empty(mem) )
> continue;
>
> - rc = rangeset_remove_range(bar->mem, start, end);
> + rc = rangeset_remove_range(mem, start, end);
> if ( rc )
> {
> gprintk(XENLOG_WARNING,
> @@ -469,8 +471,9 @@ int vpci_modify_bars(const struct pci_dev *pdev, uint16_t
> cmd, bool rom_only)
> for ( j = 0; j < ARRAY_SIZE(header->bars); j++)
> {
> const struct vpci_bar *bar = &header->bars[j];
> + struct rangeset *mem = current->vpci.mem[j];
>
> - if ( !rangeset_overlaps_range(bar->mem, start, end) ||
> + if ( !rangeset_overlaps_range(mem, start, end) ||
> /*
> * If only the ROM enable bit is toggled check
> against
> * other BARs in the same device for overlaps, but
> not
> @@ -481,7 +484,7 @@ int vpci_modify_bars(const struct pci_dev *pdev, uint16_t
> cmd, bool rom_only)
> bar->type == VPCI_BAR_ROM) )
> continue;
>
> - rc = rangeset_remove_range(bar->mem, start, end);
> + rc = rangeset_remove_range(mem, start, end);
> if ( rc )
> {
> gprintk(XENLOG_WARNING,
> @@ -734,18 +737,6 @@ static void cf_check rom_write(
> }
> }
>
> -static int bar_add_rangeset(const struct pci_dev *pdev, struct vpci_bar *bar,
> - unsigned int i)
> -{
> - char str[32];
> -
> - snprintf(str, sizeof(str), "%pp:BAR%u", &pdev->sbdf, i);
> -
> - bar->mem = rangeset_new(pdev->domain, str, RANGESETF_no_print);
> -
> - return !bar->mem ? -ENOMEM : 0;
> -}
> -
> int vpci_init_header(struct pci_dev *pdev)
> {
> uint16_t cmd;
> @@ -856,10 +847,6 @@ int vpci_init_header(struct pci_dev *pdev)
> else
> bars[i].type = VPCI_BAR_MEM32;
>
> - rc = bar_add_rangeset(pdev, &bars[i], i);
> - if ( rc )
> - goto fail;
> -
> rc = pci_size_mem_bar(pdev->sbdf, reg, &addr, &size,
> (i == num_bars - 1) ? PCI_BAR_LAST : 0);
> if ( rc < 0 )
> @@ -913,12 +900,6 @@ int vpci_init_header(struct pci_dev *pdev)
> 4, rom);
> if ( rc )
> rom->type = VPCI_BAR_EMPTY;
> - else
> - {
> - rc = bar_add_rangeset(pdev, rom, num_bars);
> - if ( rc )
> - goto fail;
> - }
> }
> else if ( !is_hwdom )
> {
> diff --git a/xen/drivers/vpci/vpci.c b/xen/drivers/vpci/vpci.c
> index 0ac9ec8b04..d069ca6d9c 100644
> --- a/xen/drivers/vpci/vpci.c
> +++ b/xen/drivers/vpci/vpci.c
> @@ -24,6 +24,35 @@
>
> #ifdef __XEN__
>
> +void vpci_vcpu_destroy(struct vcpu *v)
> +{
> + if ( !has_vpci(v->domain) && !is_idle_domain(v->domain) )
> + return;
> +
> + for ( unsigned int i = 0; i < ARRAY_SIZE(v->vpci.mem); i++ )
> + RANGESET_DESTROY(v->vpci.mem[i]);
> +}
> +
> +int vpci_vcpu_init(struct vcpu *v)
> +{
> + unsigned int i;
> +
> + if ( !has_vpci(v->domain) && !is_idle_domain(v->domain) )
> + return 0;
> +
> + for ( i = 0; i < ARRAY_SIZE(v->vpci.mem); i++ )
> + {
> + char str[32];
> +
> + snprintf(str, sizeof(str), "%pv:BAR%u", v, i);
> + v->vpci.mem[i] = rangeset_new(v->domain, str, RANGESETF_no_print);
> + if ( !v->vpci.mem[i] )
> + return -ENOMEM;
> + }
> +
> + return 0;
> +}
> +
> #ifdef CONFIG_HAS_VPCI_GUEST_SUPPORT
> static int assign_virtual_sbdf(struct pci_dev *pdev)
> {
> @@ -89,8 +118,6 @@ struct vpci_register *vpci_get_register(const struct vpci
> *vpci,
>
> void vpci_deassign_device(struct pci_dev *pdev)
> {
> - unsigned int i;
> -
> ASSERT(rw_is_write_locked(&pdev->domain->pci_lock));
>
> if ( !has_vpci(pdev->domain) || !pdev->vpci )
> @@ -116,9 +143,6 @@ void vpci_deassign_device(struct pci_dev *pdev)
> }
> spin_unlock(&pdev->vpci->lock);
>
> - for ( i = 0; i < ARRAY_SIZE(pdev->vpci->header.bars); i++ )
> - rangeset_destroy(pdev->vpci->header.bars[i].mem);
> -
> xfree(pdev->vpci);
> pdev->vpci = NULL;
> }
> diff --git a/xen/include/xen/rangeset.h b/xen/include/xen/rangeset.h
> index 817505badf..f01e00ec92 100644
> --- a/xen/include/xen/rangeset.h
> +++ b/xen/include/xen/rangeset.h
> @@ -40,6 +40,13 @@ struct rangeset *rangeset_new(
> void rangeset_destroy(
> struct rangeset *r);
>
> +/* Destroy a rangeset, and zero the pointer to it. */
> +#define RANGESET_DESTROY(r) \
> + ({ \
> + rangeset_destroy(r); \
> + (r) = NULL; \
You want to use a construct similar to what XFREE() does:
#define RANGESET_DESTROY(r) do { \
struct rangeset *_r_ = (r); \
(r) = NULL; \
rangeset_destroy(_r_); \
} while ( false )
After the GhostRace vulnerability we NULL the variable before freeing
the data associated with it.
Thanks, Roger.
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |