[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [PATCH v4 5/9] x86/passthrough: Introduce pt_irq_bind_msi() as canonical MSI bind path



Le 27/04/2026 à 15:57, Julian Vetter a écrit :
> Change pt_irq_bind_msi() to accept raw MSI address and data values instead
> of pre-decoded gvec/gflags. Add msi_addr_to_gflags() to decode the
> destination ID and delivery attributes, including the Extended Destination
> ID bits from address[11:5] per Intel convention.
> 
> Update pt_irq_create_bind() to call pt_irq_bind_msi() via the existing
> gvec/gflags interface so domctl-based callers continue to work.
> 
> Signed-off-by: Julian Vetter <julian.vetter@xxxxxxxxxx>
> ---
> Changes in v4:
> - As suggested by Roger replace the v3 approach (v3 patches 2+4) of
>    extending the gflags ABI with XEN_DOMCTL_VMSI_X86_EXT_DEST_ID_MASK and
>    XEN_DOMCTL_VMSI_X86_FULL_DEST() so callers could pass extended bits
>    through XEN_DOMCTL_bind_pt_irq. pt_irq_bind_msi() now accepts raw MSI
>    address + data and decodes the destination internally via
>    msi_addr_to_gflags()
> - Replace the gmsi.gvec + gmsi.gflags fields in struct hvm_pirq_dpci
>    with gmsi.addr + gmsi.data
> - Replace msi_gflags() (v3 vmsi.c helper that packed the extended
>    destination bits into gflags) with msi_addr_to_gflags() which decodes
>    the raw MSI address directly
> - pt_irq_create_bind() now rejects PT_IRQ_TYPE_MSI with -EOPNOTSUPP and
>    all callers are redirected through the DM op path in patch 7
> - As suggested by Roger adapt the comment in msi.h in regards to the
>    extended destination encoding since it's not part of any specification
> ---
>   xen/arch/x86/hvm/vmsi.c            | 50 ++++++------------
>   xen/arch/x86/include/asm/hvm/irq.h |  4 +-
>   xen/arch/x86/include/asm/msi.h     | 18 ++++++-
>   xen/drivers/passthrough/x86/hvm.c  | 83 ++++++++++++++++++------------
>   xen/include/xen/iommu.h            |  3 ++
>   5 files changed, 86 insertions(+), 72 deletions(-)
> 
> diff --git a/xen/arch/x86/hvm/vmsi.c b/xen/arch/x86/hvm/vmsi.c
> index 27b1f089e2..2a4b97e2e1 100644
> --- a/xen/arch/x86/hvm/vmsi.c
> +++ b/xen/arch/x86/hvm/vmsi.c
> @@ -43,6 +43,7 @@
>   #include <asm/current.h>
>   #include <asm/event.h>
>   #include <asm/io_apic.h>
> +#include <asm/msi.h>
>   
>   static void vmsi_inj_irq(
>       struct vlapic *target,
> @@ -107,12 +108,12 @@ int vmsi_deliver(
>   
>   void vmsi_deliver_pirq(struct domain *d, const struct hvm_pirq_dpci 
> *pirq_dpci)
>   {
> -    uint32_t flags = pirq_dpci->gmsi.gflags;
> -    int vector = pirq_dpci->gmsi.gvec;
> -    uint8_t dest = (uint8_t)flags;
> -    bool dest_mode = flags & XEN_DOMCTL_VMSI_X86_DM_MASK;
> -    uint8_t delivery_mode = MASK_EXTR(flags, XEN_DOMCTL_VMSI_X86_DELIV_MASK);
> -    bool trig_mode = flags & XEN_DOMCTL_VMSI_X86_TRIG_MASK;
> +    uint32_t dest = MSI_ADDR_DEST(pirq_dpci->gmsi.addr);
> +    bool dest_mode = pirq_dpci->gmsi.addr & MSI_ADDR_DESTMODE_MASK;
> +    uint8_t delivery_mode = MASK_EXTR(pirq_dpci->gmsi.data,
> +                                      MSI_DATA_DELIVERY_MODE_MASK);
> +    bool trig_mode = pirq_dpci->gmsi.data & MSI_DATA_TRIGGER_MASK;
> +    int vector = pirq_dpci->gmsi.data & MSI_DATA_VECTOR_MASK;
>   
>       HVM_DBG_LOG(DBG_LEVEL_IOAPIC,
>                   "msi: dest=%x dest_mode=%x delivery_mode=%x "
> @@ -793,27 +794,6 @@ void msix_write_completion(struct vcpu *v)
>   }
>   
>   #ifdef CONFIG_HAS_VPCI
> -static unsigned int msi_gflags(uint16_t data, uint64_t addr, bool masked)
> -{
> -    /*
> -     * We need to use the DOMCTL constants here because the output of this
> -     * function is used as input to pt_irq_create_bind, which also takes the
> -     * input from the DOMCTL itself.
> -     */
> -    return MASK_INSR(MASK_EXTR(addr, MSI_ADDR_DEST_ID_MASK),
> -                     XEN_DOMCTL_VMSI_X86_DEST_ID_MASK) |
> -           MASK_INSR(MASK_EXTR(addr, MSI_ADDR_REDIRECTION_MASK),
> -                     XEN_DOMCTL_VMSI_X86_RH_MASK) |
> -           MASK_INSR(MASK_EXTR(addr, MSI_ADDR_DESTMODE_MASK),
> -                     XEN_DOMCTL_VMSI_X86_DM_MASK) |
> -           MASK_INSR(MASK_EXTR(data, MSI_DATA_DELIVERY_MODE_MASK),
> -                     XEN_DOMCTL_VMSI_X86_DELIV_MASK) |
> -           MASK_INSR(MASK_EXTR(data, MSI_DATA_TRIGGER_MASK),
> -                     XEN_DOMCTL_VMSI_X86_TRIG_MASK) |
> -           /* NB: by default MSI vectors are bound masked. */
> -           (masked ? 0 : XEN_DOMCTL_VMSI_X86_UNMASKED);
> -}
> -
>   static void vpci_mask_pirq(struct domain *d, int pirq, bool mask)
>   {
>       unsigned long flags;
> @@ -850,17 +830,17 @@ static int vpci_msi_update(const struct pci_dev *pdev, 
> uint32_t data,
>       {
>           uint8_t vector = MASK_EXTR(data, MSI_DATA_VECTOR_MASK);
>           uint8_t vector_mask = 0xff >> (8 - fls(vectors) + 1);
> -        struct xen_domctl_bind_pt_irq bind = {
> -            .machine_irq = pirq + i,
> -            .irq_type = PT_IRQ_TYPE_MSI,
> -            .u.msi.gvec = (vector & ~vector_mask) |
> -                          ((vector + i) & vector_mask),
> -            .u.msi.gflags = msi_gflags(data, address, (mask >> i) & 1),
> -        };
> -        int rc = pt_irq_create_bind(pdev->domain, &bind);
> +        uint8_t gvec = (vector & ~vector_mask) | ((vector + i) & 
> vector_mask);
> +        uint32_t msi_data = (data & ~MSI_DATA_VECTOR_MASK) | gvec;
> +        int rc = pt_irq_bind_msi(pdev->domain, pirq + i,
> +                                 address, msi_data, 0, !((mask >> i) & 1));
>   
>           if ( rc )
>           {
> +            struct xen_domctl_bind_pt_irq bind = {
> +                .irq_type = PT_IRQ_TYPE_MSI,
> +                .machine_irq = pirq + i,
> +            };
>               gdprintk(XENLOG_ERR, "%pp: failed to bind PIRQ %u: %d\n",
>                        &pdev->sbdf, pirq + i, rc);
>               while ( bind.machine_irq-- > pirq )
> diff --git a/xen/arch/x86/include/asm/hvm/irq.h 
> b/xen/arch/x86/include/asm/hvm/irq.h
> index 77595fb3f4..c50eee9996 100644
> --- a/xen/arch/x86/include/asm/hvm/irq.h
> +++ b/xen/arch/x86/include/asm/hvm/irq.h
> @@ -120,8 +120,8 @@ struct dev_intx_gsi_link {
>   #define HVM_IRQ_DPCI_TRANSLATE       (1u << _HVM_IRQ_DPCI_TRANSLATE_SHIFT)
>   
>   struct hvm_gmsi_info {
> -    uint32_t gvec;
> -    uint32_t gflags;
> +    uint64_t addr;    /* raw MSI address (0xfeexxxxx, includes ext dest ID) 
> */
> +    uint32_t data;    /* raw MSI data (vector, delivery mode, trigger mode) 
> */
>       int dest_vcpu_id; /* -1 :multi-dest, non-negative: dest_vcpu_id */
>       bool posted; /* directly deliver to guest via VT-d PI? */
>   };
> diff --git a/xen/arch/x86/include/asm/msi.h b/xen/arch/x86/include/asm/msi.h
> index 00059d4a3a..93aaf20e27 100644
> --- a/xen/arch/x86/include/asm/msi.h
> +++ b/xen/arch/x86/include/asm/msi.h
> @@ -51,8 +51,22 @@
>   #define MSI_ADDR_REDIRECTION_MASK   (1 << MSI_ADDR_REDIRECTION_SHIFT)
>   
>   #define MSI_ADDR_DEST_ID_SHIFT              12
> -#define       MSI_ADDR_DEST_ID_MASK          0x00ff000
> -#define  MSI_ADDR_DEST_ID(dest)              (((dest) << 
> MSI_ADDR_DEST_ID_SHIFT) & MSI_ADDR_DEST_ID_MASK)
> +#define MSI_ADDR_DEST_ID_UPPER_BITS  8
> +#define MSI_ADDR_DEST_ID_MASK                0x00ff000
> +#define MSI_ADDR_DEST_ID(dest)               (((dest) << 
> MSI_ADDR_DEST_ID_SHIFT) & MSI_ADDR_DEST_ID_MASK)
> +
> +/*
> + * Intel convention: in physical destination mode bits 11:5 of the MSI
> + * address carry APIC ID bits [14:8] (the "Extended Destination ID"),
> + * extending the addressable range from 8 to 15 bits.
> + */
> +#define MSI_ADDR_EXT_DEST_ID_MASK    0x0000fe0
> +
> +/* Extract the combined 15-bit destination ID from an MSI address. */
> +#define MSI_ADDR_DEST(addr) \
> +    (MASK_EXTR((addr), MSI_ADDR_DEST_ID_MASK) | \
> +     (MASK_EXTR((addr), MSI_ADDR_EXT_DEST_ID_MASK) << \
> +      MSI_ADDR_DEST_ID_UPPER_BITS))
>   
>   /* MAX fixed pages reserved for mapping MSIX tables. */
>   #define FIX_MSIX_MAX_PAGES              512
> diff --git a/xen/drivers/passthrough/x86/hvm.c 
> b/xen/drivers/passthrough/x86/hvm.c
> index eff1e8a79e..026534530f 100644
> --- a/xen/drivers/passthrough/x86/hvm.c
> +++ b/xen/drivers/passthrough/x86/hvm.c
> @@ -21,6 +21,7 @@
>   #include <xen/event.h>
>   #include <xen/iommu.h>
>   #include <xen/cpu.h>
> +#include <xen/ioreq.h>
>   #include <xen/irq.h>
>   #include <asm/hvm/irq.h>
>   #include <asm/io_apic.h>
> @@ -290,14 +291,15 @@ static int pt_irq_dpci_setup(struct domain *d, unsigned 
> int pirq,
>       } while ( true );
>   }
>   
> -static int pt_irq_bind_msi(struct domain *d, uint32_t machine_irq,
> -                            uint8_t gvec, uint32_t gflags, uint64_t gtable,
> -                            bool unmasked)
> +int pt_irq_bind_msi(struct domain *d, uint32_t machine_irq,
> +                    uint64_t msi_addr, uint32_t msi_data,
> +                    uint64_t gtable, bool unmasked)
>   {
>       struct hvm_irq_dpci *hvm_irq_dpci;
>       struct hvm_pirq_dpci *pirq_dpci;
>       struct pirq *info;
> -    uint8_t dest, delivery_mode;
> +    uint8_t gvec, delivery_mode;
> +    uint32_t dest;
>       bool dest_mode;
>       int dest_vcpu_id, rc;
>       const struct vcpu *vcpu;
> @@ -313,8 +315,8 @@ static int pt_irq_bind_msi(struct domain *d, uint32_t 
> machine_irq,
>       {
>           pirq_dpci->flags = HVM_IRQ_DPCI_MAPPED | HVM_IRQ_DPCI_MACH_MSI |
>                              HVM_IRQ_DPCI_GUEST_MSI;
> -        pirq_dpci->gmsi.gvec = gvec;
> -        pirq_dpci->gmsi.gflags = gflags;
> +        pirq_dpci->gmsi.addr = msi_addr;
> +        pirq_dpci->gmsi.data = msi_data;
>           /*
>            * 'pt_irq_bind_msi' can be called after 'pt_irq_destroy_bind'.
>            * The 'pirq_cleanup_check' which would free the structure is only
> @@ -346,8 +348,8 @@ static int pt_irq_bind_msi(struct domain *d, uint32_t 
> machine_irq,
>           }
>           if ( unlikely(rc) )
>           {
> -            pirq_dpci->gmsi.gflags = 0;
> -            pirq_dpci->gmsi.gvec = 0;
> +            pirq_dpci->gmsi.addr = 0;
> +            pirq_dpci->gmsi.data = 0;
>               pirq_dpci->dom = NULL;
>               pirq_dpci->flags = 0;
>               if ( !info->evtchn )
> @@ -367,20 +369,22 @@ static int pt_irq_bind_msi(struct domain *d, uint32_t 
> machine_irq,
>           }
>   
>           /* If pirq is already mapped as vmsi, update guest data/addr. */
> -        if ( pirq_dpci->gmsi.gvec != gvec || pirq_dpci->gmsi.gflags != 
> gflags )
> +        if ( pirq_dpci->gmsi.addr != msi_addr ||
> +             pirq_dpci->gmsi.data != msi_data )
>           {
>               /* Directly clear pending EOIs before enabling new MSI info. */
>               pirq_guest_eoi(info);
>   
> -            pirq_dpci->gmsi.gvec = gvec;
> -            pirq_dpci->gmsi.gflags = gflags;
> +            pirq_dpci->gmsi.addr = msi_addr;
> +            pirq_dpci->gmsi.data = msi_data;
>           }
>       }
> +
>       /* Calculate dest_vcpu_id for MSI-type pirq migration. */
> -    dest = MASK_EXTR(pirq_dpci->gmsi.gflags, 
> XEN_DOMCTL_VMSI_X86_DEST_ID_MASK);
> -    dest_mode = pirq_dpci->gmsi.gflags & XEN_DOMCTL_VMSI_X86_DM_MASK;
> -    delivery_mode = MASK_EXTR(pirq_dpci->gmsi.gflags,
> -                               XEN_DOMCTL_VMSI_X86_DELIV_MASK);
> +    gvec = msi_data & MSI_DATA_VECTOR_MASK;
> +    dest = MSI_ADDR_DEST(msi_addr);
> +    dest_mode = msi_addr & MSI_ADDR_DESTMODE_MASK;
> +    delivery_mode = MASK_EXTR(msi_data, MSI_DATA_DELIVERY_MODE_MASK);
>   
>       dest_vcpu_id = hvm_girq_dest_2_vcpu_id(d, dest, dest_mode);
>       pirq_dpci->gmsi.dest_vcpu_id = dest_vcpu_id;
> @@ -391,8 +395,7 @@ static int pt_irq_bind_msi(struct domain *d, uint32_t 
> machine_irq,
>       if ( iommu_intpost )
>       {
>           if ( delivery_mode == dest_LowestPrio )
> -            vcpu = vector_hashing_dest(d, dest, dest_mode,
> -                                       pirq_dpci->gmsi.gvec);
> +            vcpu = vector_hashing_dest(d, dest, dest_mode, gvec);
>           if ( vcpu )
>               pirq_dpci->gmsi.posted = true;
>       }
> @@ -407,7 +410,7 @@ static int pt_irq_bind_msi(struct domain *d, uint32_t 
> machine_irq,
>               .irq_type = PT_IRQ_TYPE_MSI,
>           };
>   
> -        rc = hvm_pi_update_irte(vcpu, info, pirq_dpci->gmsi.gvec);
> +        rc = hvm_pi_update_irte(vcpu, info, gvec);
>           if ( rc )
>           {
>               pt_irq_destroy_bind(d, &bind);
> @@ -417,15 +420,15 @@ static int pt_irq_bind_msi(struct domain *d, uint32_t 
> machine_irq,
>   
>       if ( unmasked )
>       {
> -        struct xen_domctl_bind_pt_irq bind = {
> -            .machine_irq = machine_irq,
> -            .irq_type = PT_IRQ_TYPE_MSI,
> -        };
>           unsigned long flags;
>           struct irq_desc *desc = pirq_spin_lock_irq_desc(info, &flags);
>   
>           if ( !desc )
>           {
> +            struct xen_domctl_bind_pt_irq bind = {
> +                .machine_irq = machine_irq,
> +                .irq_type = PT_IRQ_TYPE_MSI,
> +            };
>               pt_irq_destroy_bind(d, &bind);
>               return -EINVAL;
>           }
> @@ -448,13 +451,29 @@ int pt_irq_create_bind(
>       switch ( pt_irq_bind->irq_type )
>       {
>       case PT_IRQ_TYPE_MSI:
> -        return pt_irq_bind_msi(d, pirq,
> -                               pt_irq_bind->u.msi.gvec,
> -                               pt_irq_bind->u.msi.gflags &
> -                                   ~XEN_DOMCTL_VMSI_X86_UNMASKED,
> +    {
> +        uint32_t gflags = pt_irq_bind->u.msi.gflags;
> +        uint64_t msi_addr;
> +        uint32_t msi_data;
> +
> +        msi_addr = MSI_ADDR_HEADER |
> +                   MASK_INSR(MASK_EXTR(gflags, 
> XEN_DOMCTL_VMSI_X86_DEST_ID_MASK),
> +                             MSI_ADDR_DEST_ID_MASK) |
> +                   (gflags & XEN_DOMCTL_VMSI_X86_RH_MASK ?
> +                    MSI_ADDR_REDIRECTION_LOWPRI : MSI_ADDR_REDIRECTION_CPU) |
> +                   (gflags & XEN_DOMCTL_VMSI_X86_DM_MASK ?
> +                    MSI_ADDR_DESTMODE_LOGIC : MSI_ADDR_DESTMODE_PHYS);
> +        msi_data = pt_irq_bind->u.msi.gvec |
> +                   MASK_INSR(MASK_EXTR(gflags, 
> XEN_DOMCTL_VMSI_X86_DELIV_MASK),
> +                             MSI_DATA_DELIVERY_MODE_MASK) |
> +                   (gflags & XEN_DOMCTL_VMSI_X86_TRIG_MASK ?
> +                    MSI_DATA_TRIGGER_LEVEL : 0);
> +
> +        return pt_irq_bind_msi(d, pt_irq_bind->machine_irq,
> +                               msi_addr, msi_data,
>                                  pt_irq_bind->u.msi.gtable,
> -                               !!(pt_irq_bind->u.msi.gflags &
> -                                  XEN_DOMCTL_VMSI_X86_UNMASKED));
> +                               !!(gflags & XEN_DOMCTL_VMSI_X86_UNMASKED));
> +    }
>   
>       case PT_IRQ_TYPE_PCI:
>       case PT_IRQ_TYPE_MSI_TRANSLATE:
> @@ -617,7 +636,6 @@ int pt_irq_create_bind(
>       }
>   
>       default:
> -        write_unlock(&d->event_lock);
>           return -EOPNOTSUPP;
>       }
>   
> @@ -858,11 +876,10 @@ static int cf_check _hvm_dpci_msi_eoi(
>       int vector = (long)arg;
>   
>       if ( (pirq_dpci->flags & HVM_IRQ_DPCI_MACH_MSI) &&
> -         (pirq_dpci->gmsi.gvec == vector) )
> +         ((pirq_dpci->gmsi.data & MSI_DATA_VECTOR_MASK) == vector) )
>       {
> -        unsigned int dest = MASK_EXTR(pirq_dpci->gmsi.gflags,
> -                                      XEN_DOMCTL_VMSI_X86_DEST_ID_MASK);
> -        bool dest_mode = pirq_dpci->gmsi.gflags & 
> XEN_DOMCTL_VMSI_X86_DM_MASK;
> +        unsigned int dest = MSI_ADDR_DEST(pirq_dpci->gmsi.addr);
> +        bool dest_mode = pirq_dpci->gmsi.addr & XEN_DOMCTL_VMSI_X86_DM_MASK;
>   
>           if ( vlapic_match_dest(vcpu_vlapic(current), NULL, 0, dest,
>                                  dest_mode) )
> diff --git a/xen/include/xen/iommu.h b/xen/include/xen/iommu.h
> index 37c4a1dc82..4672d114e3 100644
> --- a/xen/include/xen/iommu.h
> +++ b/xen/include/xen/iommu.h
> @@ -222,6 +222,9 @@ int pt_irq_create_bind(struct domain *d,
>                          const struct xen_domctl_bind_pt_irq *pt_irq_bind);
>   int pt_irq_destroy_bind(struct domain *d,
>                           const struct xen_domctl_bind_pt_irq *pt_irq_bind);
> +int pt_irq_bind_msi(struct domain *d, uint32_t machine_irq,
> +                    uint64_t msi_addr, uint32_t msi_data,
> +                    uint64_t gtable, bool unmasked);
>   
>   struct hvm_irq_dpci *domain_get_irq_dpci(const struct domain *d);
>   void free_hvm_irq_dpci(struct hvm_irq_dpci *dpci);

There is a lot of bitwise manipulations there, and I wonder if using 
bitfields could help here ?

Teddy


--
Teddy Astie | Vates XCP-ng Developer

XCP-ng & Xen Orchestra - Vates solutions

web: https://vates.tech

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.