[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [RFC PATCH 4/4] xen/arm: Implement standard PV time interface as per ARM DEN 0057A



On Sun, 22 Jun 2025, Koichiro Den wrote:
> The VCPUOP_register_runstate_memory_area hypercall is still actively
> used, e.g., in the Linux arm64 codebase. When KPTI is enabled, the area
> was not registered from the beginning due to the VA not always being
> valid. In such cases, Linux falls back to using the standard PV time
> interface (ARM DEN 0057A), but this interface has not been implemented
> in Xen for ARM64 (until this commit).
> 
> The VCPUOP_register_runstate_phys_area was introduced, though it's
> unclear whether this would be used in Linux arm64 and when it will be
> prevalent amongst every possible downstream domain Linux variant. And of
> course Linux is not an only option for the Xen arm64 domains.
> 
> Therefore, implementing the standard way of sharing PV time is
> generically beneficial, avoiding reliance on specially crafted
> hypercalls, the usage of which by guest VMs is not always guaranteed.
> Note that the PV_TIME_ST interface communicates with IPA (GPA), not GVA.
> 
> Add the PV time interface according to ARM DEN 0057A.
> 
> Signed-off-by: Koichiro Den <den@xxxxxxxxxxxxx>
> ---
>  xen/arch/arm/domain.c                   | 30 +++++++++
>  xen/arch/arm/domain_build.c             | 87 ++++++++++++++++++++++++-
>  xen/arch/arm/include/asm/domain.h       | 17 +++++
>  xen/arch/arm/include/asm/smccc.h        | 12 ++++
>  xen/arch/arm/vsmc.c                     | 38 +++++++++++
>  xen/common/device-tree/dom0less-build.c |  2 +-
>  xen/include/xen/fdt-domain-build.h      |  2 +-
>  7 files changed, 183 insertions(+), 5 deletions(-)
> 
> diff --git a/xen/arch/arm/domain.c b/xen/arch/arm/domain.c
> index be58a23dd725..e895e4111f1b 100644
> --- a/xen/arch/arm/domain.c
> +++ b/xen/arch/arm/domain.c
> @@ -277,6 +277,18 @@ static void ctxt_switch_to(struct vcpu *n)
>      WRITE_SYSREG(n->arch.mdcr_el2, MDCR_EL2);
>  }
>  
> +static void update_stolen_time(struct vcpu *n)
> +{
> +    uint64_t tot_stolen;
> +
> +    if ( is_idle_vcpu(current) )
> +        return;
> +
> +    tot_stolen = n->runstate.time[RUNSTATE_runnable] +
> +                 n->runstate.time[RUNSTATE_offline];
> +    write_atomic(&n->arch.pv_time_region->stolen_time, tot_stolen);
> +}
> +
>  static void schedule_tail(struct vcpu *prev)
>  {
>      ASSERT(prev != current);
> @@ -291,6 +303,8 @@ static void schedule_tail(struct vcpu *prev)
>  
>      update_runstate_area(current);
>  
> +    update_stolen_time(current);
> +
>      /* Ensure that the vcpu has an up-to-date time base. */
>      update_vcpu_system_time(current);
>  }
> @@ -586,6 +600,8 @@ int arch_vcpu_create(struct vcpu *v)
>      if ( get_ssbd_state() == ARM_SSBD_RUNTIME )
>          v->arch.cpu_info->flags |= CPUINFO_WORKAROUND_2_FLAG;
>  
> +    v->arch.pv_time_region = &v->domain->arch.pv_time_regions[v->vcpu_id];
> +
>      return rc;
>  
>  fail:
> @@ -707,6 +723,7 @@ int arch_domain_create(struct domain *d,
>                         unsigned int flags)
>  {
>      unsigned int count = 0;
> +    int order;

unsigned int


>      int rc;
>  
>      BUILD_BUG_ON(GUEST_MAX_VCPUS < MAX_VIRT_CPUS);
> @@ -791,6 +808,19 @@ int arch_domain_create(struct domain *d,
>      d->arch.sve_vl = config->arch.sve_vl;
>  #endif
>  
> +    /*
> +     * Preallocate the stolen time shared memory regions for all the
> +     * possible vCPUs.
> +     */
> +    order = get_order_from_bytes(d->max_vcpus * sizeof(struct 
> pv_time_region));
> +    d->arch.pv_time_regions_gfn = INVALID_GFN;
> +    d->arch.pv_time_regions = alloc_xenheap_pages(order, 0);
> +    if ( !d->arch.pv_time_regions ) {
> +        rc = -ENOMEM;
> +        goto fail;
> +    }
> +    memset(d->arch.pv_time_regions, 0, PAGE_SIZE << order);
> +
>      return 0;
>  
>  fail:
> diff --git a/xen/arch/arm/domain_build.c b/xen/arch/arm/domain_build.c
> index 85b6909e2b0e..1c51b53d9c6b 100644
> --- a/xen/arch/arm/domain_build.c
> +++ b/xen/arch/arm/domain_build.c
> @@ -1561,8 +1561,80 @@ int __init make_chosen_node(const struct kernel_info 
> *kinfo)
>      return res;
>  }
>  
> -int __init make_resv_memory_node(const struct kernel_info *kinfo, int 
> addrcells,
> -                                 int sizecells)
> +int __init make_pv_time_resv_memory_node(struct domain *d,
> +                                         const struct kernel_info *kinfo,
> +                                         int addrcells, int sizecells)
> +{
> +    __be32 reg[GUEST_ROOT_ADDRESS_CELLS + GUEST_ROOT_SIZE_CELLS];
> +    unsigned int len = (addrcells + sizecells) * sizeof(__be32);
> +    struct p2m_domain *p2m = p2m_get_hostp2m(d);
> +    struct membanks *unused_banks = NULL;
> +    void *fdt = kinfo->fdt;
> +    unsigned regions_len;
> +    gfn_t pv_time_gfn;
> +    mfn_t pv_time_mfn;
> +    paddr_t aligned;
> +    paddr_t avail;
> +    __be32 *cells;
> +    int res;
> +    int i;

unsigned int i


> +    /* Find unused regions */
> +    regions_len = PAGE_ALIGN(d->max_vcpus * 64);
> +    unused_banks = membanks_xzalloc(NR_MEM_BANKS, MEMORY);
> +    if ( !unused_banks )
> +        return -ENOMEM;
> +
> +    res = find_unused_regions(d, kinfo, unused_banks);
> +    if ( res ) {
> +        printk(XENLOG_WARNING "%pd: failed to find unused regions\n", d);
> +        goto fail;
> +    }
> +    for ( i = 0; i < unused_banks->nr_banks; i++ ) {
> +        const struct membank *bank = &unused_banks->bank[i];
> +        aligned = PAGE_ALIGN(bank->start);
> +        avail = bank->size - (aligned - bank->start);
> +        if ( avail >= regions_len )
> +            break;
> +    }
> +    if ( i == unused_banks->nr_banks ) {
> +        res = -ENOSPC;
> +        goto fail;
> +    }
> +
> +    /* Insert P2M entry */
> +    pv_time_mfn = virt_to_mfn(d->arch.pv_time_regions);
> +    pv_time_gfn = gaddr_to_gfn(aligned);
> +    p2m_write_lock(p2m);
> +    res = p2m_set_entry(p2m, pv_time_gfn, regions_len / PAGE_SIZE,

PFN_DOW


> +                        pv_time_mfn, p2m_ram_rw, p2m_access_r);
> +    p2m_write_unlock(p2m);
> +    if ( res ) {
> +        printk(XENLOG_WARNING "%pd: failed to set P2M entry for PV_TIME\n", 
> d);
> +        goto fail;
> +    }
> +    d->arch.pv_time_regions_gfn = pv_time_gfn;
> +
> +    /* Reserve the selected GFN */
> +    res = domain_fdt_begin_node(fdt, "pv-time", gfn_x(pv_time_gfn));
> +    if ( res )
> +        goto fail;
> +
> +    cells = reg;
> +    dt_child_set_range(&cells, addrcells, sizecells, gfn_x(pv_time_gfn), 
> regions_len);
> +    res = fdt_property(fdt, "reg", reg, len);
> +    if ( res )
> +        goto fail;
> +
> +    res = fdt_end_node(fdt);
> +
> +  fail:
> +    xfree(unused_banks);
> +    return res;
> +}
> +
> +int __init make_resv_memory_node(struct domain *d, const struct kernel_info 
> *kinfo,
> +                                 int addrcells, int sizecells)
>  {
>      const struct membanks *mem = kernel_info_get_shm_mem_const(kinfo);
>      void *fdt = kinfo->fdt;
> @@ -1596,6 +1668,10 @@ int __init make_resv_memory_node(const struct 
> kernel_info *kinfo, int addrcells,
>      if ( res )
>          return res;
>  
> +    res = make_pv_time_resv_memory_node(d, kinfo, addrcells, sizecells);
> +    if ( res )
> +        return res;
> +
>      res = fdt_end_node(fdt);
>  
>      return res;
> @@ -1744,6 +1820,11 @@ static int __init handle_node(struct domain *d, struct 
> kernel_info *kinfo,
>                                          dt_n_size_cells(node));
>          if ( res )
>              return res;
> +
> +        res = make_pv_time_resv_memory_node(d, kinfo, dt_n_addr_cells(node),
> +                                            dt_n_size_cells(node));
> +        if ( res )
> +            return res;
>      }
>  
>      for ( child = node->child; child != NULL; child = child->sibling )
> @@ -1791,7 +1872,7 @@ static int __init handle_node(struct domain *d, struct 
> kernel_info *kinfo,
>  
>          if ( !res_mem_node_found )
>          {
> -            res = make_resv_memory_node(kinfo, addrcells, sizecells);
> +            res = make_resv_memory_node(d, kinfo, addrcells, sizecells);
>              if ( res )
>                  return res;
>          }
> diff --git a/xen/arch/arm/include/asm/domain.h 
> b/xen/arch/arm/include/asm/domain.h
> index a3487ca71303..c231c45fe40f 100644
> --- a/xen/arch/arm/include/asm/domain.h
> +++ b/xen/arch/arm/include/asm/domain.h
> @@ -59,6 +59,18 @@ struct paging_domain {
>      unsigned long p2m_total_pages;
>  };
>  
> +/* Stolen time shared memory region (ARM DEN 0057A.b) */
> +struct pv_time_region {
> +    /* This field must be 0 as per ARM DEN 0057A.b */
> +    uint32_t revision;
> +
> +    /* This field must be 0 as per ARM DEN 0057A.b */
> +    uint32_t attribute;
> +
> +    /* Total stolen time in nanoseconds */
> +    uint64_t stolen_time;
> +} __aligned(64);
> +
>  struct arch_domain
>  {
>  #ifdef CONFIG_ARM_64
> @@ -121,6 +133,9 @@ struct arch_domain
>      void *tee;
>  #endif
>  
> +    struct pv_time_region *pv_time_regions;
> +    gfn_t pv_time_regions_gfn;
> +
>  }  __cacheline_aligned;
>  
>  struct arch_vcpu
> @@ -243,6 +258,8 @@ struct arch_vcpu
>       */
>      bool need_flush_to_ram;
>  
> +    struct pv_time_region *pv_time_region;
> +
>  }  __cacheline_aligned;
>  
>  void vcpu_show_registers(struct vcpu *v);
> diff --git a/xen/arch/arm/include/asm/smccc.h 
> b/xen/arch/arm/include/asm/smccc.h
> index a289c48b7ffd..6207ac74b715 100644
> --- a/xen/arch/arm/include/asm/smccc.h
> +++ b/xen/arch/arm/include/asm/smccc.h
> @@ -380,6 +380,18 @@ void arm_smccc_1_2_smc(const struct arm_smccc_1_2_regs 
> *args,
>                         ARM_SMCCC_OWNER_ARCH,        \
>                         0x3FFF)
>  
> +#define ARM_SMCCC_HYP_PV_TIME_FEATURES              \
> +    ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL,         \
> +                       ARM_SMCCC_CONV_64,           \
> +                       ARM_SMCCC_OWNER_HYPERVISOR,  \
> +                       0x20)
> +
> +#define ARM_SMCCC_HYP_PV_TIME_ST                    \
> +    ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL,         \
> +                       ARM_SMCCC_CONV_64,           \
> +                       ARM_SMCCC_OWNER_HYPERVISOR,  \
> +                       0x21)
> +
>  /* SMCCC error codes */
>  #define ARM_SMCCC_NOT_REQUIRED          (-2)
>  #define ARM_SMCCC_ERR_UNKNOWN_FUNCTION  (-1)
> diff --git a/xen/arch/arm/vsmc.c b/xen/arch/arm/vsmc.c
> index 6081f14ed0c1..1e2fbc1a62b4 100644
> --- a/xen/arch/arm/vsmc.c
> +++ b/xen/arch/arm/vsmc.c
> @@ -12,6 +12,7 @@
>  #include <public/arch-arm/smccc.h>
>  #include <asm/cpuerrata.h>
>  #include <asm/cpufeature.h>
> +#include <asm/domain.h>
>  #include <asm/monitor.h>
>  #include <asm/regs.h>
>  #include <asm/smccc.h>
> @@ -127,6 +128,10 @@ static bool handle_arch(struct cpu_user_regs *regs)
>              if ( cpus_have_cap(ARM_WORKAROUND_BHB_SMCC_3) )
>                  ret = ARM_SMCCC_SUCCESS;
>              break;
> +        case ARM_SMCCC_HYP_PV_TIME_FEATURES:
> +            if ( !gfn_eq(current->domain->arch.pv_time_regions_gfn, 
> INVALID_GFN) )
> +                ret = ARM_SMCCC_SUCCESS;
> +            break;
>          }
>  
>          set_user_reg(regs, 0, ret);
> @@ -162,6 +167,35 @@ static bool handle_arch(struct cpu_user_regs *regs)
>      return false;
>  }
>  
> +static bool fill_pv_time_features(struct cpu_user_regs *regs)
> +{
> +    uint32_t arch_func_id = get_user_reg(regs, 1);
> +    int ret = ARM_SMCCC_NOT_SUPPORTED;
> +
> +    if ( arch_func_id == ARM_SMCCC_HYP_PV_TIME_ST &&
> +         !gfn_eq(current->domain->arch.pv_time_regions_gfn, INVALID_GFN) )
> +        ret = ARM_SMCCC_SUCCESS;
> +
> +    set_user_reg(regs, 0, ret);
> +
> +    return true;
> +}
> +
> +static bool fill_pv_time_st(struct cpu_user_regs *regs)
> +{
> +    register_t ret = ARM_SMCCC_NOT_SUPPORTED;
> +    paddr_t gaddr;
> +
> +    if ( !gfn_eq(current->domain->arch.pv_time_regions_gfn, INVALID_GFN) )
> +    {
> +        gaddr = gfn_to_gaddr(current->domain->arch.pv_time_regions_gfn);
> +        ret = gaddr + current->vcpu_id * sizeof(struct pv_time_region);
> +    }
> +
> +    set_user_reg(regs, 0, ret);
> +    return true;
> +}
> +
>  /* SMCCC interface for hypervisor. Tell about itself. */
>  static bool handle_hypervisor(struct cpu_user_regs *regs)
>  {
> @@ -176,6 +210,10 @@ static bool handle_hypervisor(struct cpu_user_regs *regs)
>      case ARM_SMCCC_REVISION_FID(HYPERVISOR):
>          return fill_revision(regs, XEN_SMCCC_MAJOR_REVISION,
>                               XEN_SMCCC_MINOR_REVISION);
> +    case ARM_SMCCC_HYP_PV_TIME_FEATURES:
> +        return fill_pv_time_features(regs);
> +    case ARM_SMCCC_HYP_PV_TIME_ST:
> +        return fill_pv_time_st(regs);

This file is common across 32-bit and 64-bit guests. Looking at the
implementation of fill_pv_time_st, which returns a 64-bit address on a
potentially 32-bit register, it looks like this interface is only meant
for 64-bit guests?

If so, then we should have a check here and return "not supported" for
32-bit callers.


>      default:
>          return false;
>      }
> diff --git a/xen/common/device-tree/dom0less-build.c 
> b/xen/common/device-tree/dom0less-build.c
> index 3d503c697337..fa31b1733388 100644
> --- a/xen/common/device-tree/dom0less-build.c
> +++ b/xen/common/device-tree/dom0less-build.c
> @@ -502,7 +502,7 @@ static int __init prepare_dtb_domU(struct domain *d, 
> struct kernel_info *kinfo)
>      if ( ret )
>          goto err;
>  
> -    ret = make_resv_memory_node(kinfo, addrcells, sizecells);
> +    ret = make_resv_memory_node(d, kinfo, addrcells, sizecells);
>      if ( ret )
>          goto err;
>  
> diff --git a/xen/include/xen/fdt-domain-build.h 
> b/xen/include/xen/fdt-domain-build.h
> index e9418857e386..645e7d0a54aa 100644
> --- a/xen/include/xen/fdt-domain-build.h
> +++ b/xen/include/xen/fdt-domain-build.h
> @@ -25,7 +25,7 @@ int construct_domain(struct domain *d, struct kernel_info 
> *kinfo);
>  int construct_hwdom(struct kernel_info *kinfo,
>                      const struct dt_device_node *node);
>  int make_chosen_node(const struct kernel_info *kinfo);
> -int make_resv_memory_node(const struct kernel_info *kinfo,
> +int make_resv_memory_node(struct domain *d, const struct kernel_info *kinfo,
>                            int addrcells, int sizecells);
>  int make_cpus_node(const struct domain *d, void *fdt);
>  int make_hypervisor_node(struct domain *d, const struct kernel_info *kinfo,
> -- 
> 2.48.1
> 



 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.