|
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [RFC PATCH 4/4] xen/arm: Implement standard PV time interface as per ARM DEN 0057A
On Sun, 22 Jun 2025, Koichiro Den wrote:
> The VCPUOP_register_runstate_memory_area hypercall is still actively
> used, e.g., in the Linux arm64 codebase. When KPTI is enabled, the area
> was not registered from the beginning due to the VA not always being
> valid. In such cases, Linux falls back to using the standard PV time
> interface (ARM DEN 0057A), but this interface has not been implemented
> in Xen for ARM64 (until this commit).
>
> The VCPUOP_register_runstate_phys_area was introduced, though it's
> unclear whether this would be used in Linux arm64 and when it will be
> prevalent amongst every possible downstream domain Linux variant. And of
> course Linux is not an only option for the Xen arm64 domains.
>
> Therefore, implementing the standard way of sharing PV time is
> generically beneficial, avoiding reliance on specially crafted
> hypercalls, the usage of which by guest VMs is not always guaranteed.
> Note that the PV_TIME_ST interface communicates with IPA (GPA), not GVA.
>
> Add the PV time interface according to ARM DEN 0057A.
>
> Signed-off-by: Koichiro Den <den@xxxxxxxxxxxxx>
> ---
> xen/arch/arm/domain.c | 30 +++++++++
> xen/arch/arm/domain_build.c | 87 ++++++++++++++++++++++++-
> xen/arch/arm/include/asm/domain.h | 17 +++++
> xen/arch/arm/include/asm/smccc.h | 12 ++++
> xen/arch/arm/vsmc.c | 38 +++++++++++
> xen/common/device-tree/dom0less-build.c | 2 +-
> xen/include/xen/fdt-domain-build.h | 2 +-
> 7 files changed, 183 insertions(+), 5 deletions(-)
>
> diff --git a/xen/arch/arm/domain.c b/xen/arch/arm/domain.c
> index be58a23dd725..e895e4111f1b 100644
> --- a/xen/arch/arm/domain.c
> +++ b/xen/arch/arm/domain.c
> @@ -277,6 +277,18 @@ static void ctxt_switch_to(struct vcpu *n)
> WRITE_SYSREG(n->arch.mdcr_el2, MDCR_EL2);
> }
>
> +static void update_stolen_time(struct vcpu *n)
> +{
> + uint64_t tot_stolen;
> +
> + if ( is_idle_vcpu(current) )
> + return;
> +
> + tot_stolen = n->runstate.time[RUNSTATE_runnable] +
> + n->runstate.time[RUNSTATE_offline];
> + write_atomic(&n->arch.pv_time_region->stolen_time, tot_stolen);
> +}
> +
> static void schedule_tail(struct vcpu *prev)
> {
> ASSERT(prev != current);
> @@ -291,6 +303,8 @@ static void schedule_tail(struct vcpu *prev)
>
> update_runstate_area(current);
>
> + update_stolen_time(current);
> +
> /* Ensure that the vcpu has an up-to-date time base. */
> update_vcpu_system_time(current);
> }
> @@ -586,6 +600,8 @@ int arch_vcpu_create(struct vcpu *v)
> if ( get_ssbd_state() == ARM_SSBD_RUNTIME )
> v->arch.cpu_info->flags |= CPUINFO_WORKAROUND_2_FLAG;
>
> + v->arch.pv_time_region = &v->domain->arch.pv_time_regions[v->vcpu_id];
> +
> return rc;
>
> fail:
> @@ -707,6 +723,7 @@ int arch_domain_create(struct domain *d,
> unsigned int flags)
> {
> unsigned int count = 0;
> + int order;
unsigned int
> int rc;
>
> BUILD_BUG_ON(GUEST_MAX_VCPUS < MAX_VIRT_CPUS);
> @@ -791,6 +808,19 @@ int arch_domain_create(struct domain *d,
> d->arch.sve_vl = config->arch.sve_vl;
> #endif
>
> + /*
> + * Preallocate the stolen time shared memory regions for all the
> + * possible vCPUs.
> + */
> + order = get_order_from_bytes(d->max_vcpus * sizeof(struct
> pv_time_region));
> + d->arch.pv_time_regions_gfn = INVALID_GFN;
> + d->arch.pv_time_regions = alloc_xenheap_pages(order, 0);
> + if ( !d->arch.pv_time_regions ) {
> + rc = -ENOMEM;
> + goto fail;
> + }
> + memset(d->arch.pv_time_regions, 0, PAGE_SIZE << order);
> +
> return 0;
>
> fail:
> diff --git a/xen/arch/arm/domain_build.c b/xen/arch/arm/domain_build.c
> index 85b6909e2b0e..1c51b53d9c6b 100644
> --- a/xen/arch/arm/domain_build.c
> +++ b/xen/arch/arm/domain_build.c
> @@ -1561,8 +1561,80 @@ int __init make_chosen_node(const struct kernel_info
> *kinfo)
> return res;
> }
>
> -int __init make_resv_memory_node(const struct kernel_info *kinfo, int
> addrcells,
> - int sizecells)
> +int __init make_pv_time_resv_memory_node(struct domain *d,
> + const struct kernel_info *kinfo,
> + int addrcells, int sizecells)
> +{
> + __be32 reg[GUEST_ROOT_ADDRESS_CELLS + GUEST_ROOT_SIZE_CELLS];
> + unsigned int len = (addrcells + sizecells) * sizeof(__be32);
> + struct p2m_domain *p2m = p2m_get_hostp2m(d);
> + struct membanks *unused_banks = NULL;
> + void *fdt = kinfo->fdt;
> + unsigned regions_len;
> + gfn_t pv_time_gfn;
> + mfn_t pv_time_mfn;
> + paddr_t aligned;
> + paddr_t avail;
> + __be32 *cells;
> + int res;
> + int i;
unsigned int i
> + /* Find unused regions */
> + regions_len = PAGE_ALIGN(d->max_vcpus * 64);
> + unused_banks = membanks_xzalloc(NR_MEM_BANKS, MEMORY);
> + if ( !unused_banks )
> + return -ENOMEM;
> +
> + res = find_unused_regions(d, kinfo, unused_banks);
> + if ( res ) {
> + printk(XENLOG_WARNING "%pd: failed to find unused regions\n", d);
> + goto fail;
> + }
> + for ( i = 0; i < unused_banks->nr_banks; i++ ) {
> + const struct membank *bank = &unused_banks->bank[i];
> + aligned = PAGE_ALIGN(bank->start);
> + avail = bank->size - (aligned - bank->start);
> + if ( avail >= regions_len )
> + break;
> + }
> + if ( i == unused_banks->nr_banks ) {
> + res = -ENOSPC;
> + goto fail;
> + }
> +
> + /* Insert P2M entry */
> + pv_time_mfn = virt_to_mfn(d->arch.pv_time_regions);
> + pv_time_gfn = gaddr_to_gfn(aligned);
> + p2m_write_lock(p2m);
> + res = p2m_set_entry(p2m, pv_time_gfn, regions_len / PAGE_SIZE,
PFN_DOW
> + pv_time_mfn, p2m_ram_rw, p2m_access_r);
> + p2m_write_unlock(p2m);
> + if ( res ) {
> + printk(XENLOG_WARNING "%pd: failed to set P2M entry for PV_TIME\n",
> d);
> + goto fail;
> + }
> + d->arch.pv_time_regions_gfn = pv_time_gfn;
> +
> + /* Reserve the selected GFN */
> + res = domain_fdt_begin_node(fdt, "pv-time", gfn_x(pv_time_gfn));
> + if ( res )
> + goto fail;
> +
> + cells = reg;
> + dt_child_set_range(&cells, addrcells, sizecells, gfn_x(pv_time_gfn),
> regions_len);
> + res = fdt_property(fdt, "reg", reg, len);
> + if ( res )
> + goto fail;
> +
> + res = fdt_end_node(fdt);
> +
> + fail:
> + xfree(unused_banks);
> + return res;
> +}
> +
> +int __init make_resv_memory_node(struct domain *d, const struct kernel_info
> *kinfo,
> + int addrcells, int sizecells)
> {
> const struct membanks *mem = kernel_info_get_shm_mem_const(kinfo);
> void *fdt = kinfo->fdt;
> @@ -1596,6 +1668,10 @@ int __init make_resv_memory_node(const struct
> kernel_info *kinfo, int addrcells,
> if ( res )
> return res;
>
> + res = make_pv_time_resv_memory_node(d, kinfo, addrcells, sizecells);
> + if ( res )
> + return res;
> +
> res = fdt_end_node(fdt);
>
> return res;
> @@ -1744,6 +1820,11 @@ static int __init handle_node(struct domain *d, struct
> kernel_info *kinfo,
> dt_n_size_cells(node));
> if ( res )
> return res;
> +
> + res = make_pv_time_resv_memory_node(d, kinfo, dt_n_addr_cells(node),
> + dt_n_size_cells(node));
> + if ( res )
> + return res;
> }
>
> for ( child = node->child; child != NULL; child = child->sibling )
> @@ -1791,7 +1872,7 @@ static int __init handle_node(struct domain *d, struct
> kernel_info *kinfo,
>
> if ( !res_mem_node_found )
> {
> - res = make_resv_memory_node(kinfo, addrcells, sizecells);
> + res = make_resv_memory_node(d, kinfo, addrcells, sizecells);
> if ( res )
> return res;
> }
> diff --git a/xen/arch/arm/include/asm/domain.h
> b/xen/arch/arm/include/asm/domain.h
> index a3487ca71303..c231c45fe40f 100644
> --- a/xen/arch/arm/include/asm/domain.h
> +++ b/xen/arch/arm/include/asm/domain.h
> @@ -59,6 +59,18 @@ struct paging_domain {
> unsigned long p2m_total_pages;
> };
>
> +/* Stolen time shared memory region (ARM DEN 0057A.b) */
> +struct pv_time_region {
> + /* This field must be 0 as per ARM DEN 0057A.b */
> + uint32_t revision;
> +
> + /* This field must be 0 as per ARM DEN 0057A.b */
> + uint32_t attribute;
> +
> + /* Total stolen time in nanoseconds */
> + uint64_t stolen_time;
> +} __aligned(64);
> +
> struct arch_domain
> {
> #ifdef CONFIG_ARM_64
> @@ -121,6 +133,9 @@ struct arch_domain
> void *tee;
> #endif
>
> + struct pv_time_region *pv_time_regions;
> + gfn_t pv_time_regions_gfn;
> +
> } __cacheline_aligned;
>
> struct arch_vcpu
> @@ -243,6 +258,8 @@ struct arch_vcpu
> */
> bool need_flush_to_ram;
>
> + struct pv_time_region *pv_time_region;
> +
> } __cacheline_aligned;
>
> void vcpu_show_registers(struct vcpu *v);
> diff --git a/xen/arch/arm/include/asm/smccc.h
> b/xen/arch/arm/include/asm/smccc.h
> index a289c48b7ffd..6207ac74b715 100644
> --- a/xen/arch/arm/include/asm/smccc.h
> +++ b/xen/arch/arm/include/asm/smccc.h
> @@ -380,6 +380,18 @@ void arm_smccc_1_2_smc(const struct arm_smccc_1_2_regs
> *args,
> ARM_SMCCC_OWNER_ARCH, \
> 0x3FFF)
>
> +#define ARM_SMCCC_HYP_PV_TIME_FEATURES \
> + ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, \
> + ARM_SMCCC_CONV_64, \
> + ARM_SMCCC_OWNER_HYPERVISOR, \
> + 0x20)
> +
> +#define ARM_SMCCC_HYP_PV_TIME_ST \
> + ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, \
> + ARM_SMCCC_CONV_64, \
> + ARM_SMCCC_OWNER_HYPERVISOR, \
> + 0x21)
> +
> /* SMCCC error codes */
> #define ARM_SMCCC_NOT_REQUIRED (-2)
> #define ARM_SMCCC_ERR_UNKNOWN_FUNCTION (-1)
> diff --git a/xen/arch/arm/vsmc.c b/xen/arch/arm/vsmc.c
> index 6081f14ed0c1..1e2fbc1a62b4 100644
> --- a/xen/arch/arm/vsmc.c
> +++ b/xen/arch/arm/vsmc.c
> @@ -12,6 +12,7 @@
> #include <public/arch-arm/smccc.h>
> #include <asm/cpuerrata.h>
> #include <asm/cpufeature.h>
> +#include <asm/domain.h>
> #include <asm/monitor.h>
> #include <asm/regs.h>
> #include <asm/smccc.h>
> @@ -127,6 +128,10 @@ static bool handle_arch(struct cpu_user_regs *regs)
> if ( cpus_have_cap(ARM_WORKAROUND_BHB_SMCC_3) )
> ret = ARM_SMCCC_SUCCESS;
> break;
> + case ARM_SMCCC_HYP_PV_TIME_FEATURES:
> + if ( !gfn_eq(current->domain->arch.pv_time_regions_gfn,
> INVALID_GFN) )
> + ret = ARM_SMCCC_SUCCESS;
> + break;
> }
>
> set_user_reg(regs, 0, ret);
> @@ -162,6 +167,35 @@ static bool handle_arch(struct cpu_user_regs *regs)
> return false;
> }
>
> +static bool fill_pv_time_features(struct cpu_user_regs *regs)
> +{
> + uint32_t arch_func_id = get_user_reg(regs, 1);
> + int ret = ARM_SMCCC_NOT_SUPPORTED;
> +
> + if ( arch_func_id == ARM_SMCCC_HYP_PV_TIME_ST &&
> + !gfn_eq(current->domain->arch.pv_time_regions_gfn, INVALID_GFN) )
> + ret = ARM_SMCCC_SUCCESS;
> +
> + set_user_reg(regs, 0, ret);
> +
> + return true;
> +}
> +
> +static bool fill_pv_time_st(struct cpu_user_regs *regs)
> +{
> + register_t ret = ARM_SMCCC_NOT_SUPPORTED;
> + paddr_t gaddr;
> +
> + if ( !gfn_eq(current->domain->arch.pv_time_regions_gfn, INVALID_GFN) )
> + {
> + gaddr = gfn_to_gaddr(current->domain->arch.pv_time_regions_gfn);
> + ret = gaddr + current->vcpu_id * sizeof(struct pv_time_region);
> + }
> +
> + set_user_reg(regs, 0, ret);
> + return true;
> +}
> +
> /* SMCCC interface for hypervisor. Tell about itself. */
> static bool handle_hypervisor(struct cpu_user_regs *regs)
> {
> @@ -176,6 +210,10 @@ static bool handle_hypervisor(struct cpu_user_regs *regs)
> case ARM_SMCCC_REVISION_FID(HYPERVISOR):
> return fill_revision(regs, XEN_SMCCC_MAJOR_REVISION,
> XEN_SMCCC_MINOR_REVISION);
> + case ARM_SMCCC_HYP_PV_TIME_FEATURES:
> + return fill_pv_time_features(regs);
> + case ARM_SMCCC_HYP_PV_TIME_ST:
> + return fill_pv_time_st(regs);
This file is common across 32-bit and 64-bit guests. Looking at the
implementation of fill_pv_time_st, which returns a 64-bit address on a
potentially 32-bit register, it looks like this interface is only meant
for 64-bit guests?
If so, then we should have a check here and return "not supported" for
32-bit callers.
> default:
> return false;
> }
> diff --git a/xen/common/device-tree/dom0less-build.c
> b/xen/common/device-tree/dom0less-build.c
> index 3d503c697337..fa31b1733388 100644
> --- a/xen/common/device-tree/dom0less-build.c
> +++ b/xen/common/device-tree/dom0less-build.c
> @@ -502,7 +502,7 @@ static int __init prepare_dtb_domU(struct domain *d,
> struct kernel_info *kinfo)
> if ( ret )
> goto err;
>
> - ret = make_resv_memory_node(kinfo, addrcells, sizecells);
> + ret = make_resv_memory_node(d, kinfo, addrcells, sizecells);
> if ( ret )
> goto err;
>
> diff --git a/xen/include/xen/fdt-domain-build.h
> b/xen/include/xen/fdt-domain-build.h
> index e9418857e386..645e7d0a54aa 100644
> --- a/xen/include/xen/fdt-domain-build.h
> +++ b/xen/include/xen/fdt-domain-build.h
> @@ -25,7 +25,7 @@ int construct_domain(struct domain *d, struct kernel_info
> *kinfo);
> int construct_hwdom(struct kernel_info *kinfo,
> const struct dt_device_node *node);
> int make_chosen_node(const struct kernel_info *kinfo);
> -int make_resv_memory_node(const struct kernel_info *kinfo,
> +int make_resv_memory_node(struct domain *d, const struct kernel_info *kinfo,
> int addrcells, int sizecells);
> int make_cpus_node(const struct domain *d, void *fdt);
> int make_hypervisor_node(struct domain *d, const struct kernel_info *kinfo,
> --
> 2.48.1
>
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |