|
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [PATCH v4 7/9] x86/dmop: Add XEN_DMOP_{bind,unbind}_pt_msi_irq DM ops
Le 27/04/2026 à 15:57, Julian Vetter a écrit :
> Add two DM ops for MSI passthrough IRQs. These new DM ops take the raw
> MSI address and data fields rather than pre-decoded gflags values. Xen
> decodes the destination ID via msi_addr_to_gflags(), including any
> extended destination bits in address[11:5]. This means the device model
> does not need to understand the extended destination ID encoding, and
> simply forwards the MSI address it observes from the guest.
>
> With these DM ops in place, redirect xc_domain_update_msi_irq() and
> xc_domain_unbind_msi_irq() in libxenctrl to use
> xendevicemodel_bind_pt_msi_irq() / xendevicemodel_unbind_pt_msi_irq()
> via xch->dmod. The gflags/gvec arguments are translated to the raw MSI
> address and data words at the libxc level using the standard x86 MSI
> address format.
>
> Reject the PT_IRQ_TYPE_MSI sub-case in XEN_DOMCTL_bind_pt_irq and
> XEN_DOMCTL_unbind_pt_irq: all callers now go through the DM op path, so
> the domctl sub-case is fully obsolete.
>
We probably want to reflect that on XEN_DOMCTL_{un}bind_pt_irq interface
in domctl.h (e.g through a note saying that PT_IRQ_TYPE_MSI type is now
deprecated and unsupported).
> Signed-off-by: Julian Vetter <julian.vetter@xxxxxxxxxx>
> ---
> Changes in v4:
> - Corresponds to v3 patch 5, but with feedback from Jan
> - Redirect xc_domain_{update,unbind}_msi_irq() in libxenctrl to call
> xendevicemodel_{un}bind_pt_msi_irq() via xch->dmod, translating the
> existing gflags/gvec arguments to raw MSI address and data in libxc
> - As suggested by Jan, reject the PT_IRQ_TYPE_MSI sub-case in
> XEN_DOMCTL_{bind,unbind}_pt_irq with -EOPNOTSUPP -> All callers now go
> through the DM op path, making the domctl sub-case fully obsolete
> - Added the xlat.lst entry for dm_op_unbind_pt_msi_irq (v3 added only the
> bind entry)
> ---
> tools/include/xendevicemodel.h | 31 ++++++++++++++
> tools/libs/ctrl/xc_domain.c | 52 +++++++++++------------
> tools/libs/devicemodel/core.c | 38 +++++++++++++++++
> xen/arch/x86/domctl.c | 10 ++++-
> xen/arch/x86/hvm/dm.c | 68 +++++++++++++++++++++++++++++++
> xen/drivers/passthrough/x86/hvm.c | 1 -
> xen/include/public/hvm/dm_op.h | 37 +++++++++++++++++
> xen/include/xlat.lst | 2 +
> 8 files changed, 208 insertions(+), 31 deletions(-)
>
> diff --git a/tools/include/xendevicemodel.h b/tools/include/xendevicemodel.h
> index 227e7fd810..f15b35fa33 100644
> --- a/tools/include/xendevicemodel.h
> +++ b/tools/include/xendevicemodel.h
> @@ -375,6 +375,37 @@ int xendevicemodel_nr_vcpus(
> */
> int xendevicemodel_restrict(xendevicemodel_handle *dmod, domid_t domid);
>
> +/**
> + * This function binds a passthrough physical IRQ to a guest MSI vector
> + * using raw MSI address/data fields. Unlike XEN_DOMCTL_bind_pt_irq,
> + * this interface supports extended (15-bit) destination IDs by having
> + * Xen decode the MSI address internally.
> + *
"unlike XEN_DOMCTL_bind_pt_irq" feels a bit odd since that's not a
supported interface anymore for MSI.
> + * @parm dmod a handle to an open devicemodel interface.
> + * @parm domid the domain id to be serviced.
> + * @parm machine_irq the physical IRQ number (pirq).
> + * @parm msi_addr the MSI address (includes ext. dest. ID bits [11:5]).
> + * @parm msi_data the MSI data word (bits [7:0] are the guest vector).
> + * @parm gtable the MSI-X table base GFN, or 0 for plain MSI.
> + * @parm unmasked if non-zero, leave the IRQ unmasked after binding.
> + * @return 0 on success, -1 on failure.
> + */
> +int xendevicemodel_bind_pt_msi_irq(
> + xendevicemodel_handle *dmod, domid_t domid, uint32_t machine_irq,
> + uint64_t msi_addr, uint32_t msi_data, uint64_t gtable, int unmasked);
> +
> +/**
> + * This function unbinds a passthrough physical IRQ previously bound
> + * with xendevicemodel_bind_pt_msi_irq.
> + *
> + * @parm dmod a handle to an open devicemodel interface.
> + * @parm domid the domain id to be serviced.
> + * @parm machine_irq the physical IRQ number (pirq).
> + * @return 0 on success, -1 on failure.
> + */
> +int xendevicemodel_unbind_pt_msi_irq(
> + xendevicemodel_handle *dmod, domid_t domid, uint32_t machine_irq);
> +
> #endif /* XENDEVICEMODEL_H */
>
> /*
> diff --git a/tools/libs/ctrl/xc_domain.c b/tools/libs/ctrl/xc_domain.c
> index 01c0669c88..7e3b7a0dc6 100644
> --- a/tools/libs/ctrl/xc_domain.c
> +++ b/tools/libs/ctrl/xc_domain.c
> @@ -1677,6 +1677,21 @@ int xc_deassign_dt_device(
>
>
>
> +static void xc_msi_gflags_to_addr_data(uint32_t gvec, uint32_t gflags,
> + uint64_t *msi_addr, uint32_t
> *msi_data)
> +{
> + *msi_addr = 0xfee00000U |
> + ((uint64_t)((gflags & XEN_DOMCTL_VMSI_X86_DEST_ID_MASK) << 12)) |
> + (gflags & XEN_DOMCTL_VMSI_X86_RH_MASK ? (1U << 3) : 0) |
> + (gflags & XEN_DOMCTL_VMSI_X86_DM_MASK ? (1U << 2) : 0);
> +
> + *msi_data = (gvec & 0xff) |
> + (uint32_t)(((gflags & XEN_DOMCTL_VMSI_X86_DELIV_MASK) >>
> + (/* shift of XEN_DOMCTL_VMSI_X86_DELIV_MASK */ 12 -
> + /* MSI data delivery shift */ 8))) |
> + (gflags & XEN_DOMCTL_VMSI_X86_TRIG_MASK ? (1U << 15) : 0);
> +}
> +
> int xc_domain_update_msi_irq(
> xc_interface *xch,
> uint32_t domid,
> @@ -1685,22 +1700,15 @@ int xc_domain_update_msi_irq(
> uint32_t gflags,
> uint64_t gtable)
> {
> - int rc;
> - struct xen_domctl_bind_pt_irq *bind;
> - struct xen_domctl domctl = {};
> -
> - domctl.cmd = XEN_DOMCTL_bind_pt_irq;
> - domctl.domain = domid;
> + uint64_t msi_addr;
> + uint32_t msi_data;
> + int unmasked = !!(gflags & XEN_DOMCTL_VMSI_X86_UNMASKED);
>
> - bind = &(domctl.u.bind_pt_irq);
> - bind->irq_type = PT_IRQ_TYPE_MSI;
> - bind->machine_irq = pirq;
> - bind->u.msi.gvec = gvec;
> - bind->u.msi.gflags = gflags;
> - bind->u.msi.gtable = gtable;
> + xc_msi_gflags_to_addr_data(gvec, gflags, &msi_addr, &msi_data);
>
> - rc = do_domctl(xch, &domctl);
> - return rc;
> + return xendevicemodel_bind_pt_msi_irq(xch->dmod, domid, pirq,
> + msi_addr, msi_data, gtable,
> + unmasked);
> }
>
> int xc_domain_unbind_msi_irq(
> @@ -1710,21 +1718,7 @@ int xc_domain_unbind_msi_irq(
> uint32_t pirq,
> uint32_t gflags)
> {
> - int rc;
> - struct xen_domctl_bind_pt_irq *bind;
> - struct xen_domctl domctl = {};
> -
> - domctl.cmd = XEN_DOMCTL_unbind_pt_irq;
> - domctl.domain = domid;
> -
> - bind = &(domctl.u.bind_pt_irq);
> - bind->irq_type = PT_IRQ_TYPE_MSI;
> - bind->machine_irq = pirq;
> - bind->u.msi.gvec = gvec;
> - bind->u.msi.gflags = gflags;
> -
> - rc = do_domctl(xch, &domctl);
> - return rc;
> + return xendevicemodel_unbind_pt_msi_irq(xch->dmod, domid, pirq);
> }
>
> /* Pass-through: binds machine irq to guests irq */
> diff --git a/tools/libs/devicemodel/core.c b/tools/libs/devicemodel/core.c
> index 8e619eeb0a..adf2c41a96 100644
> --- a/tools/libs/devicemodel/core.c
> +++ b/tools/libs/devicemodel/core.c
> @@ -645,6 +645,44 @@ int xendevicemodel_nr_vcpus(
> return 0;
> }
>
> +int xendevicemodel_bind_pt_msi_irq(
> + xendevicemodel_handle *dmod, domid_t domid, uint32_t machine_irq,
> + uint64_t msi_addr, uint32_t msi_data, uint64_t gtable, int unmasked)
> +{
> + struct xen_dm_op op;
> + struct xen_dm_op_bind_pt_msi_irq *data;
> +
> + memset(&op, 0, sizeof(op));
> +
> + op.op = XEN_DMOP_bind_pt_msi_irq;
> + data = &op.u.bind_pt_msi_irq;
> +
> + data->machine_irq = machine_irq;
> + data->data = msi_data;
> + data->addr = msi_addr;
> + data->gtable = gtable;
> + if ( unmasked )
> + data->flags |= XEN_DMOP_MSI_FLAG_UNMASKED;
> +
> + return xendevicemodel_op(dmod, domid, 1, &op, sizeof(op));
> +}
> +
> +int xendevicemodel_unbind_pt_msi_irq(
> + xendevicemodel_handle *dmod, domid_t domid, uint32_t machine_irq)
> +{
> + struct xen_dm_op op;
> + struct xen_dm_op_unbind_pt_msi_irq *data;
> +
> + memset(&op, 0, sizeof(op));
> +
> + op.op = XEN_DMOP_unbind_pt_msi_irq;
> + data = &op.u.unbind_pt_msi_irq;
> +
> + data->machine_irq = machine_irq;
> +
> + return xendevicemodel_op(dmod, domid, 1, &op, sizeof(op));
> +}
> +
I think we want to mark
xc_domain_update_msi_irq/xc_domain_unbind_msi_irq as deprecated since we
implemented a newer (better) version of it in xendevicemodel; and the
old one is now a wrapper.
> int xendevicemodel_restrict(xendevicemodel_handle *dmod, domid_t domid)
> {
> return osdep_xendevicemodel_restrict(dmod, domid);
> diff --git a/xen/arch/x86/domctl.c b/xen/arch/x86/domctl.c
> index bfbc35c08b..d80a33fe40 100644
> --- a/xen/arch/x86/domctl.c
> +++ b/xen/arch/x86/domctl.c
> @@ -574,6 +574,14 @@ long arch_do_domctl(
> if ( !is_hvm_domain(d) )
> break;
>
> + /*
> + * PT_IRQ_TYPE_MSI is obsoleted by XEN_DMOP_bind_pt_msi_irq, which
> + * passes raw MSI address/data so Xen can decode extended destination
> + * ID bits. Device models must use the DM op path instead.
> + */
> + if ( bind->irq_type == PT_IRQ_TYPE_MSI )
> + break;
> +
> ret = xsm_bind_pt_irq(XSM_HOOK, d, bind);
> if ( ret )
> break;
> @@ -602,7 +610,7 @@ long arch_do_domctl(
> int irq = domain_pirq_to_irq(d, bind->machine_irq);
>
> ret = -EINVAL;
> - if ( !is_hvm_domain(d) )
> + if ( !is_hvm_domain(d) || bind->irq_type == PT_IRQ_TYPE_MSI )
> break;
>
> ret = -EPERM;
> diff --git a/xen/arch/x86/hvm/dm.c b/xen/arch/x86/hvm/dm.c
> index 3b53471af0..ac70cb6405 100644
> --- a/xen/arch/x86/hvm/dm.c
> +++ b/xen/arch/x86/hvm/dm.c
> @@ -7,6 +7,8 @@
> #include <xen/guest_access.h>
> #include <xen/dm.h>
> #include <xen/hypercall.h>
> +#include <xen/iocap.h>
> +#include <xen/iommu.h>
> #include <xen/ioreq.h>
> #include <xen/nospec.h>
> #include <xen/sched.h>
> @@ -350,6 +352,8 @@ int dm_op(const struct dmop_args *op_args)
> [XEN_DMOP_relocate_memory] = sizeof(struct
> xen_dm_op_relocate_memory),
> [XEN_DMOP_pin_memory_cacheattr] = sizeof(struct
> xen_dm_op_pin_memory_cacheattr),
> [XEN_DMOP_nr_vcpus] = sizeof(struct
> xen_dm_op_nr_vcpus),
> + [XEN_DMOP_bind_pt_msi_irq] = sizeof(struct
> xen_dm_op_bind_pt_msi_irq),
> + [XEN_DMOP_unbind_pt_msi_irq] = sizeof(struct
> xen_dm_op_unbind_pt_msi_irq),
> };
>
> rc = rcu_lock_remote_domain_by_id(op_args->domid, &d);
> @@ -607,6 +611,68 @@ int dm_op(const struct dmop_args *op_args)
> break;
> }
>
> + case XEN_DMOP_bind_pt_msi_irq:
> + {
> + const struct xen_dm_op_bind_pt_msi_irq *data =
> + &op.u.bind_pt_msi_irq;
> + int irq;
> +
> + rc = -EINVAL;
> + if ( data->pad || (data->flags & ~XEN_DMOP_MSI_FLAG_UNMASKED) )
> + break;
> +
> + irq = domain_pirq_to_irq(d, data->machine_irq);
> +
> + rc = -EPERM;
> + if ( irq <= 0 || !irq_access_permitted(current->domain, irq) )
> + break;
> +
> + rc = -ESRCH;
> + if ( is_iommu_enabled(d) )
> + {
> + read_lock(&d->pci_lock);
> + rc = pt_irq_bind_msi(d, data->machine_irq, data->addr,
> data->data,
> + data->gtable,
> + !!(data->flags &
> XEN_DMOP_MSI_FLAG_UNMASKED));
> + read_unlock(&d->pci_lock);
> + }
> + if ( rc < 0 )
> + printk(XENLOG_G_ERR
> + "XEN_DMOP_bind_pt_msi_irq: pt_irq_bind_msi failed (%ld)
> for %pd\n",
> + rc, d);
> + break;
> + }
> +
> + case XEN_DMOP_unbind_pt_msi_irq:
> + {
> + const struct xen_dm_op_unbind_pt_msi_irq *data =
> + &op.u.unbind_pt_msi_irq;
> + struct xen_domctl_bind_pt_irq bind = {
> + .machine_irq = data->machine_irq,
> + .irq_type = PT_IRQ_TYPE_MSI,
> + };
> + int irq;
> +
> + irq = domain_pirq_to_irq(d, bind.machine_irq);
> +
> + rc = -EPERM;
> + if ( irq <= 0 || !irq_access_permitted(current->domain, irq) )
> + break;
> +
> + rc = -ESRCH;
> + if ( is_iommu_enabled(d) )
> + {
> + read_lock(&d->pci_lock);
> + rc = pt_irq_destroy_bind(d, &bind);
> + read_unlock(&d->pci_lock);
> + }
> + if ( rc < 0 )
> + printk(XENLOG_G_ERR
> + "XEN_DMOP_unbind_pt_msi_irq: pt_irq_destroy_bind failed
> (%ld) for %pd\n",
> + rc, d);
> + break;
> + }
> +
> default:
> rc = ioreq_server_dm_op(&op, d, &const_op);
> break;
> @@ -643,6 +709,8 @@ CHECK_dm_op_remote_shutdown;
> CHECK_dm_op_relocate_memory;
> CHECK_dm_op_pin_memory_cacheattr;
> CHECK_dm_op_nr_vcpus;
> +CHECK_dm_op_bind_pt_msi_irq;
> +CHECK_dm_op_unbind_pt_msi_irq;
>
> int compat_dm_op(
> domid_t domid, unsigned int nr_bufs, XEN_GUEST_HANDLE_PARAM(void) bufs)
> diff --git a/xen/drivers/passthrough/x86/hvm.c
> b/xen/drivers/passthrough/x86/hvm.c
> index 026534530f..6fb4f8b7dc 100644
> --- a/xen/drivers/passthrough/x86/hvm.c
> +++ b/xen/drivers/passthrough/x86/hvm.c
> @@ -21,7 +21,6 @@
> #include <xen/event.h>
> #include <xen/iommu.h>
> #include <xen/cpu.h>
> -#include <xen/ioreq.h>
> #include <xen/irq.h>
> #include <asm/hvm/irq.h>
> #include <asm/io_apic.h>
> diff --git a/xen/include/public/hvm/dm_op.h b/xen/include/public/hvm/dm_op.h
> index 2bf0fdc1ae..43571b7713 100644
> --- a/xen/include/public/hvm/dm_op.h
> +++ b/xen/include/public/hvm/dm_op.h
> @@ -444,6 +444,41 @@ struct xen_dm_op_nr_vcpus {
> };
> typedef struct xen_dm_op_nr_vcpus xen_dm_op_nr_vcpus_t;
>
> +#define XEN_DMOP_bind_pt_msi_irq 21
> +#define XEN_DMOP_unbind_pt_msi_irq 22
> +
> +struct xen_dm_op_bind_pt_msi_irq {
> + /* IN - physical IRQ (pirq) */
> + uint32_t machine_irq;
> + /* IN - MSI data word (bits [7:0] are the guest vector) */
> + uint32_t data;
> + /* IN - flags */
> + uint32_t flags;
> +#define XEN_DMOP_MSI_FLAG_UNMASKED (1u << 0)
> + uint32_t pad;
> + /* IN - MSI address (includes extended destination ID in bits [11:5]) */
> + uint64_aligned_t addr;
> + /* IN - MSI-X table base GFN, 0 for plain MSI */
> + uint64_aligned_t gtable;
> +};
> +
> +typedef struct xen_dm_op_bind_pt_msi_irq xen_dm_op_bind_pt_msi_irq_t;
> +
> +struct xen_dm_op_unbind_pt_msi_irq {
> + /* IN - physical IRQ (pirq) */
> + uint32_t machine_irq;
> +};
> +typedef struct xen_dm_op_unbind_pt_msi_irq xen_dm_op_unbind_pt_msi_irq_t;
> +
> +/*
> + * XEN_DMOP_enable_ext_dest_id: Signal to Xen that this device model will use
> + * XEN_DMOP_bind_pt_msi_irq for all passthrough MSI bindings, passing raw MSI
> + * address/data fields. Once called, Xen will advertise
> + * XEN_HVM_CPUID_EXT_DEST_ID to the guest. Must be called before the guest
> + * starts.
> + */
> +#define XEN_DMOP_enable_ext_dest_id 23
> +
> struct xen_dm_op {
> uint32_t op;
> uint32_t pad;
> @@ -468,6 +503,8 @@ struct xen_dm_op {
> xen_dm_op_relocate_memory_t relocate_memory;
> xen_dm_op_pin_memory_cacheattr_t pin_memory_cacheattr;
> xen_dm_op_nr_vcpus_t nr_vcpus;
> + xen_dm_op_bind_pt_msi_irq_t bind_pt_msi_irq;
> + xen_dm_op_unbind_pt_msi_irq_t unbind_pt_msi_irq;
> } u;
> };
>
> diff --git a/xen/include/xlat.lst b/xen/include/xlat.lst
> index 9d08dcc4bb..6dc5f5796a 100644
> --- a/xen/include/xlat.lst
> +++ b/xen/include/xlat.lst
> @@ -98,6 +98,7 @@
> ? grant_entry_v2 grant_table.h
>
> ! dm_op_buf hvm/dm_op.h
> +? dm_op_bind_pt_msi_irq hvm/dm_op.h
> ? dm_op_create_ioreq_server hvm/dm_op.h
> ? dm_op_destroy_ioreq_server hvm/dm_op.h
> ? dm_op_get_ioreq_server_info hvm/dm_op.h
> @@ -116,6 +117,7 @@
> ? dm_op_set_pci_intx_level hvm/dm_op.h
> ? dm_op_set_pci_link_route hvm/dm_op.h
> ? dm_op_track_dirty_vram hvm/dm_op.h
> +? dm_op_unbind_pt_msi_irq hvm/dm_op.h
>
> ! hvm_altp2m_set_mem_access_multi hvm/hvm_op.h
>
--
Teddy Astie | Vates XCP-ng Developer
XCP-ng & Xen Orchestra - Vates solutions
web: https://vates.tech
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |