[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [PATCH v5 04/14] vpci: cancel pending map/unmap on vpci removal



I am going to postpone this patch as it does need major re-thinking
on the approach to take. This is possible as it fixes a really rare
use-case seen during development phase, so shouldn't hurt the run-time

Thank you,
Oleksandr

On 25.11.21 13:02, Oleksandr Andrushchenko wrote:
> From: Oleksandr Andrushchenko <oleksandr_andrushchenko@xxxxxxxx>
>
> When a vPCI is removed for a PCI device it is possible that we have
> scheduled a delayed work for map/unmap operations for that device.
> For example, the following scenario can illustrate the problem:
>
> pci_physdev_op
>     pci_add_device
>         init_bars -> modify_bars -> defer_map -> 
> raise_softirq(SCHEDULE_SOFTIRQ)
>     iommu_add_device <- FAILS
>     vpci_remove_device -> xfree(pdev->vpci)
>
> leave_hypervisor_to_guest
>     vpci_process_pending: v->vpci.mem != NULL; v->vpci.pdev->vpci == NULL
>
> For the hardware domain we continue execution as the worse that
> could happen is that MMIO mappings are left in place when the
> device has been deassigned.
>
> For unprivileged domains that get a failure in the middle of a vPCI
> {un}map operation we need to destroy them, as we don't know in which
> state the p2m is. This can only happen in vpci_process_pending for
> DomUs as they won't be allowed to call pci_add_device.
>
> Signed-off-by: Oleksandr Andrushchenko <oleksandr_andrushchenko@xxxxxxxx>
>
> ---
> Cc: Roger Pau Monné <roger.pau@xxxxxxxxxx>
> ---
> Since v4:
>   - crash guest domain if map/unmap operation didn't succeed
>   - re-work vpci cancel work to cancel work on all vCPUs
>   - use new locking scheme with pdev->vpci_lock
> New in v4
>
> Fixes: 86dbcf6e30cb ("vpci: cancel pending map/unmap on vpci removal")
>
> ---
>
> Signed-off-by: Oleksandr Andrushchenko <oleksandr_andrushchenko@xxxxxxxx>
> ---
>   xen/drivers/vpci/header.c | 49 ++++++++++++++++++++++++++++++---------
>   xen/drivers/vpci/vpci.c   |  2 ++
>   xen/include/xen/pci.h     |  5 ++++
>   xen/include/xen/vpci.h    |  6 +++++
>   4 files changed, 51 insertions(+), 11 deletions(-)
>
> diff --git a/xen/drivers/vpci/header.c b/xen/drivers/vpci/header.c
> index bd23c0274d48..ba333fb2f9b0 100644
> --- a/xen/drivers/vpci/header.c
> +++ b/xen/drivers/vpci/header.c
> @@ -131,7 +131,13 @@ static void modify_decoding(const struct pci_dev *pdev, 
> uint16_t cmd,
>   
>   bool vpci_process_pending(struct vcpu *v)
>   {
> -    if ( v->vpci.mem )
> +    struct pci_dev *pdev = v->vpci.pdev;
> +
> +    if ( !pdev )
> +        return false;
> +
> +    spin_lock(&pdev->vpci_lock);
> +    if ( !pdev->vpci_cancel_pending && v->vpci.mem )
>       {
>           struct map_data data = {
>               .d = v->domain,
> @@ -140,32 +146,53 @@ bool vpci_process_pending(struct vcpu *v)
>           int rc = rangeset_consume_ranges(v->vpci.mem, map_range, &data);
>   
>           if ( rc == -ERESTART )
> +        {
> +            spin_unlock(&pdev->vpci_lock);
>               return true;
> +        }
>   
> -        spin_lock(&v->vpci.pdev->vpci_lock);
> -        if ( v->vpci.pdev->vpci )
> +        if ( pdev->vpci )
>               /* Disable memory decoding unconditionally on failure. */
> -            modify_decoding(v->vpci.pdev,
> +            modify_decoding(pdev,
>                               rc ? v->vpci.cmd & ~PCI_COMMAND_MEMORY : 
> v->vpci.cmd,
>                               !rc && v->vpci.rom_only);
> -        spin_unlock(&v->vpci.pdev->vpci_lock);
>   
> -        rangeset_destroy(v->vpci.mem);
> -        v->vpci.mem = NULL;
>           if ( rc )
> +        {
>               /*
>                * FIXME: in case of failure remove the device from the domain.
>                * Note that there might still be leftover mappings. While this 
> is
> -             * safe for Dom0, for DomUs the domain will likely need to be
> -             * killed in order to avoid leaking stale p2m mappings on
> -             * failure.
> +             * safe for Dom0, for DomUs the domain needs to be killed in 
> order
> +             * to avoid leaking stale p2m mappings on failure.
>                */
> -            vpci_remove_device(v->vpci.pdev);
> +            if ( is_hardware_domain(v->domain) )
> +                vpci_remove_device_locked(pdev);
> +            else
> +                domain_crash(v->domain);
> +        }
>       }
> +    spin_unlock(&pdev->vpci_lock);
>   
>       return false;
>   }
>   
> +void vpci_cancel_pending_locked(struct pci_dev *pdev)
> +{
> +    struct vcpu *v;
> +
> +    ASSERT(spin_is_locked(&pdev->vpci_lock));
> +
> +    /* Cancel any pending work now on all vCPUs. */
> +    for_each_vcpu( pdev->domain, v )
> +    {
> +        if ( v->vpci.mem && (v->vpci.pdev == pdev) )
> +        {
> +            rangeset_destroy(v->vpci.mem);
> +            v->vpci.mem = NULL;
> +        }
> +    }
> +}
> +
>   static int __init apply_map(struct domain *d, const struct pci_dev *pdev,
>                               struct rangeset *mem, uint16_t cmd)
>   {
> diff --git a/xen/drivers/vpci/vpci.c b/xen/drivers/vpci/vpci.c
> index ceaac4516ff8..37103e207635 100644
> --- a/xen/drivers/vpci/vpci.c
> +++ b/xen/drivers/vpci/vpci.c
> @@ -54,7 +54,9 @@ void vpci_remove_device_locked(struct pci_dev *pdev)
>   {
>       ASSERT(spin_is_locked(&pdev->vpci_lock));
>   
> +    pdev->vpci_cancel_pending = true;
>       vpci_remove_device_handlers_locked(pdev);
> +    vpci_cancel_pending_locked(pdev);
>       xfree(pdev->vpci->msix);
>       xfree(pdev->vpci->msi);
>       xfree(pdev->vpci);
> diff --git a/xen/include/xen/pci.h b/xen/include/xen/pci.h
> index 3f60d6c6c6dd..52d302ac5f35 100644
> --- a/xen/include/xen/pci.h
> +++ b/xen/include/xen/pci.h
> @@ -135,6 +135,11 @@ struct pci_dev {
>   
>       /* Data for vPCI. */
>       spinlock_t vpci_lock;
> +    /*
> +     * Set if PCI device is being removed now and we need to cancel any
> +     * pending map/unmap operations.
> +     */
> +    bool vpci_cancel_pending;
>       struct vpci *vpci;
>   };
>   
> diff --git a/xen/include/xen/vpci.h b/xen/include/xen/vpci.h
> index 8b22bdef11d0..cfff87e5801e 100644
> --- a/xen/include/xen/vpci.h
> +++ b/xen/include/xen/vpci.h
> @@ -57,6 +57,7 @@ uint32_t vpci_hw_read32(const struct pci_dev *pdev, 
> unsigned int reg,
>    * should not run.
>    */
>   bool __must_check vpci_process_pending(struct vcpu *v);
> +void vpci_cancel_pending_locked(struct pci_dev *pdev);
>   
>   struct vpci {
>       /* List of vPCI handlers for a device. */
> @@ -253,6 +254,11 @@ static inline bool __must_check 
> vpci_process_pending(struct vcpu *v)
>       ASSERT_UNREACHABLE();
>       return false;
>   }
> +
> +static inline void vpci_cancel_pending_locked(struct pci_dev *pdev)
> +{
> +    ASSERT_UNREACHABLE();
> +}
>   #endif
>   
>   #endif

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.