 
	
| [Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [PATCH v5 01/12] xen/arm: enable SVE extension for Xen
 Hi Luca,
> On 12 Apr 2023, at 11:49, Luca Fancellu <Luca.Fancellu@xxxxxxx> wrote:
> 
> Enable Xen to handle the SVE extension, add code in cpufeature module
> to handle ZCR SVE register, disable trapping SVE feature on system
> boot only when SVE resources are accessed.
> While there, correct coding style for the comment on coprocessor
> trapping.
> 
> Now cptr_el2 is part of the domain context and it will be restored
> on context switch, this is a preparation for saving the SVE context
> which will be part of VFP operations, so restore it before the call
> to save VFP registers.
> To save an additional isb barrier, restore cptr_el2 before an
> existing isb barrier and move the call for saving VFP context after
> that barrier.
> 
> Change the KConfig entry to make ARM64_SVE symbol selectable, by
> default it will be not selected.
> 
> Create sve module and sve_asm.S that contains assembly routines for
> the SVE feature, this code is inspired from linux and it uses
> instruction encoding to be compatible with compilers that does not
> support SVE.
> 
> Signed-off-by: Luca Fancellu <luca.fancellu@xxxxxxx>
> ---
> Changes from v4:
> - don't use fixed types in vl_to_zcr, forgot to address that in
>   v3, by mistake I changed that in patch 2, fixing now (Jan)
> Changes from v3:
> - no changes
> Changes from v2:
> - renamed sve_asm.S in sve-asm.S, new files should not contain
>   underscore in the name (Jan)
> Changes from v1:
> - Add assert to vl_to_zcr, it is never called with vl==0, but just
>   to be sure it won't in the future.
> Changes from RFC:
> - Moved restoring of cptr before an existing barrier (Julien)
> - Marked the feature as unsupported for now (Julien)
> - Trap and un-trap only when using SVE resources in
>   compute_max_zcr() (Julien)
> ---
> xen/arch/arm/Kconfig                     | 10 +++--
> xen/arch/arm/arm64/Makefile              |  1 +
> xen/arch/arm/arm64/cpufeature.c          |  7 ++--
> xen/arch/arm/arm64/sve-asm.S             | 48 +++++++++++++++++++++++
> xen/arch/arm/arm64/sve.c                 | 50 ++++++++++++++++++++++++
> xen/arch/arm/cpufeature.c                |  6 ++-
> xen/arch/arm/domain.c                    |  9 +++--
> xen/arch/arm/include/asm/arm64/sve.h     | 43 ++++++++++++++++++++
> xen/arch/arm/include/asm/arm64/sysregs.h |  1 +
> xen/arch/arm/include/asm/cpufeature.h    | 14 +++++++
> xen/arch/arm/include/asm/domain.h        |  1 +
> xen/arch/arm/include/asm/processor.h     |  2 +
> xen/arch/arm/setup.c                     |  5 ++-
> xen/arch/arm/traps.c                     | 28 +++++++------
> 14 files changed, 201 insertions(+), 24 deletions(-)
> create mode 100644 xen/arch/arm/arm64/sve-asm.S
> create mode 100644 xen/arch/arm/arm64/sve.c
> create mode 100644 xen/arch/arm/include/asm/arm64/sve.h
> 
> diff --git a/xen/arch/arm/Kconfig b/xen/arch/arm/Kconfig
> index 239d3aed3c7f..41f45d8d1203 100644
> --- a/xen/arch/arm/Kconfig
> +++ b/xen/arch/arm/Kconfig
> @@ -112,11 +112,15 @@ config ARM64_PTR_AUTH
>  This feature is not supported in Xen.
> 
> config ARM64_SVE
> - def_bool n
> + bool "Enable Scalar Vector Extension support (UNSUPPORTED)" if UNSUPPORTED
> depends on ARM_64
> help
> -  Scalar Vector Extension support.
> -  This feature is not supported in Xen.
> +  Scalar Vector Extension (SVE/SVE2) support for guests.
I would prevent to mention SVE2 here unless both versions of SVE are supported 
with this config.
Is it the case ?
Cheers
Bertrand
> +
> +  Please be aware that currently, enabling this feature will add latency on
> +  VM context switch between SVE enabled guests, between not-enabled SVE
> +  guests and SVE enabled guests and viceversa, compared to the time
> +  required to switch between not-enabled SVE guests.
> 
> config ARM64_MTE
> def_bool n
> diff --git a/xen/arch/arm/arm64/Makefile b/xen/arch/arm/arm64/Makefile
> index 6d507da0d44d..24e08fd42596 100644
> --- a/xen/arch/arm/arm64/Makefile
> +++ b/xen/arch/arm/arm64/Makefile
> @@ -12,6 +12,7 @@ obj-y += insn.o
> obj-$(CONFIG_LIVEPATCH) += livepatch.o
> obj-y += smc.o
> obj-y += smpboot.o
> +obj-$(CONFIG_ARM64_SVE) += sve.o sve-asm.o
> obj-y += traps.o
> obj-y += vfp.o
> obj-y += vsysreg.o
> diff --git a/xen/arch/arm/arm64/cpufeature.c b/xen/arch/arm/arm64/cpufeature.c
> index d9039d37b2d1..b4656ff4d80f 100644
> --- a/xen/arch/arm/arm64/cpufeature.c
> +++ b/xen/arch/arm/arm64/cpufeature.c
> @@ -455,15 +455,11 @@ static const struct arm64_ftr_bits ftr_id_dfr1[] = {
> ARM64_FTR_END,
> };
> 
> -#if 0
> -/* TODO: use this to sanitize SVE once we support it */
> -
> static const struct arm64_ftr_bits ftr_zcr[] = {
> ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE,
> ZCR_ELx_LEN_SHIFT, ZCR_ELx_LEN_SIZE, 0), /* LEN */
> ARM64_FTR_END,
> };
> -#endif
> 
> /*
>  * Common ftr bits for a 32bit register with all hidden, strict
> @@ -603,6 +599,9 @@ void update_system_features(const struct cpuinfo_arm *new)
> 
> SANITIZE_ID_REG(zfr64, 0, aa64zfr0);
> 
> + if ( cpu_has_sve )
> + SANITIZE_REG(zcr64, 0, zcr);
> +
> /*
> * Comment from Linux:
> * Userspace may perform DC ZVA instructions. Mismatched block sizes
> diff --git a/xen/arch/arm/arm64/sve-asm.S b/xen/arch/arm/arm64/sve-asm.S
> new file mode 100644
> index 000000000000..4d1549344733
> --- /dev/null
> +++ b/xen/arch/arm/arm64/sve-asm.S
> @@ -0,0 +1,48 @@
> +/* SPDX-License-Identifier: GPL-2.0-only */
> +/*
> + * Arm SVE assembly routines
> + *
> + * Copyright (C) 2022 ARM Ltd.
> + *
> + * Some macros and instruction encoding in this file are taken from linux 
> 6.1.1,
> + * file arch/arm64/include/asm/fpsimdmacros.h, some of them are a modified
> + * version.
> + */
> +
> +/* Sanity-check macros to help avoid encoding garbage instructions */
> +
> +.macro _check_general_reg nr
> +    .if (\nr) < 0 || (\nr) > 30
> +        .error "Bad register number \nr."
> +    .endif
> +.endm
> +
> +.macro _check_num n, min, max
> +    .if (\n) < (\min) || (\n) > (\max)
> +        .error "Number \n out of range [\min,\max]"
> +    .endif
> +.endm
> +
> +/* SVE instruction encodings for non-SVE-capable assemblers */
> +/* (pre binutils 2.28, all kernel capable clang versions support SVE) */
> +
> +/* RDVL X\nx, #\imm */
> +.macro _sve_rdvl nx, imm
> +    _check_general_reg \nx
> +    _check_num (\imm), -0x20, 0x1f
> +    .inst 0x04bf5000                \
> +        | (\nx)                     \
> +        | (((\imm) & 0x3f) << 5)
> +.endm
> +
> +/* Gets the current vector register size in bytes */
> +GLOBAL(sve_get_hw_vl)
> +    _sve_rdvl 0, 1
> +    ret
> +
> +/*
> + * Local variables:
> + * mode: ASM
> + * indent-tabs-mode: nil
> + * End:
> + */
> diff --git a/xen/arch/arm/arm64/sve.c b/xen/arch/arm/arm64/sve.c
> new file mode 100644
> index 000000000000..6f3fb368c59b
> --- /dev/null
> +++ b/xen/arch/arm/arm64/sve.c
> @@ -0,0 +1,50 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +/*
> + * Arm SVE feature code
> + *
> + * Copyright (C) 2022 ARM Ltd.
> + */
> +
> +#include <xen/types.h>
> +#include <asm/arm64/sve.h>
> +#include <asm/arm64/sysregs.h>
> +#include <asm/processor.h>
> +#include <asm/system.h>
> +
> +extern unsigned int sve_get_hw_vl(void);
> +
> +register_t compute_max_zcr(void)
> +{
> +    register_t cptr_bits = get_default_cptr_flags();
> +    register_t zcr = vl_to_zcr(SVE_VL_MAX_BITS);
> +    unsigned int hw_vl;
> +
> +    /* Remove trap for SVE resources */
> +    WRITE_SYSREG(cptr_bits & ~HCPTR_CP(8), CPTR_EL2);
> +    isb();
> +
> +    /*
> +     * Set the maximum SVE vector length, doing that we will know the VL
> +     * supported by the platform, calling sve_get_hw_vl()
> +     */
> +    WRITE_SYSREG(zcr, ZCR_EL2);
> +
> +    /*
> +     * Read the maximum VL, which could be lower than what we imposed before,
> +     * hw_vl contains VL in bytes, multiply it by 8 to use vl_to_zcr() later
> +     */
> +    hw_vl = sve_get_hw_vl() * 8U;
> +
> +    /* Restore CPTR_EL2 */
> +    WRITE_SYSREG(cptr_bits, CPTR_EL2);
> +    isb();
> +
> +    return vl_to_zcr(hw_vl);
> +}
> +
> +/* Takes a vector length in bits and returns the ZCR_ELx encoding */
> +register_t vl_to_zcr(unsigned int vl)
> +{
> +    ASSERT(vl > 0);
> +    return ((vl / SVE_VL_MULTIPLE_VAL) - 1U) & ZCR_ELx_LEN_MASK;
> +}
> diff --git a/xen/arch/arm/cpufeature.c b/xen/arch/arm/cpufeature.c
> index c4ec38bb2554..83b84368f6d5 100644
> --- a/xen/arch/arm/cpufeature.c
> +++ b/xen/arch/arm/cpufeature.c
> @@ -9,6 +9,7 @@
> #include <xen/init.h>
> #include <xen/smp.h>
> #include <xen/stop_machine.h>
> +#include <asm/arm64/sve.h>
> #include <asm/cpufeature.h>
> 
> DECLARE_BITMAP(cpu_hwcaps, ARM_NCAPS);
> @@ -143,6 +144,9 @@ void identify_cpu(struct cpuinfo_arm *c)
> 
>     c->zfr64.bits[0] = READ_SYSREG(ID_AA64ZFR0_EL1);
> 
> +    if ( cpu_has_sve )
> +        c->zcr64.bits[0] = compute_max_zcr();
> +
>     c->dczid.bits[0] = READ_SYSREG(DCZID_EL0);
> 
>     c->ctr.bits[0] = READ_SYSREG(CTR_EL0);
> @@ -199,7 +203,7 @@ static int __init create_guest_cpuinfo(void)
>     guest_cpuinfo.pfr64.mpam = 0;
>     guest_cpuinfo.pfr64.mpam_frac = 0;
> 
> -    /* Hide SVE as Xen does not support it */
> +    /* Hide SVE by default to the guests */
>     guest_cpuinfo.pfr64.sve = 0;
>     guest_cpuinfo.zfr64.bits[0] = 0;
> 
> diff --git a/xen/arch/arm/domain.c b/xen/arch/arm/domain.c
> index 99577adb6c69..adb6ace2e24d 100644
> --- a/xen/arch/arm/domain.c
> +++ b/xen/arch/arm/domain.c
> @@ -181,9 +181,6 @@ static void ctxt_switch_to(struct vcpu *n)
>     /* VGIC */
>     gic_restore_state(n);
> 
> -    /* VFP */
> -    vfp_restore_state(n);
> -
>     /* XXX MPU */
> 
>     /* Fault Status */
> @@ -234,6 +231,7 @@ static void ctxt_switch_to(struct vcpu *n)
>     p2m_restore_state(n);
> 
>     /* Control Registers */
> +    WRITE_SYSREG(n->arch.cptr_el2, CPTR_EL2);
>     WRITE_SYSREG(n->arch.cpacr, CPACR_EL1);
> 
>     /*
> @@ -258,6 +256,9 @@ static void ctxt_switch_to(struct vcpu *n)
> #endif
>     isb();
> 
> +    /* VFP */
> +    vfp_restore_state(n);
> +
>     /* CP 15 */
>     WRITE_SYSREG(n->arch.csselr, CSSELR_EL1);
> 
> @@ -548,6 +549,8 @@ int arch_vcpu_create(struct vcpu *v)
> 
>     v->arch.vmpidr = MPIDR_SMP | vcpuid_to_vaffinity(v->vcpu_id);
> 
> +    v->arch.cptr_el2 = get_default_cptr_flags();
> +
>     v->arch.hcr_el2 = get_default_hcr_flags();
> 
>     v->arch.mdcr_el2 = HDCR_TDRA | HDCR_TDOSA | HDCR_TDA;
> diff --git a/xen/arch/arm/include/asm/arm64/sve.h 
> b/xen/arch/arm/include/asm/arm64/sve.h
> new file mode 100644
> index 000000000000..144d2b1cc485
> --- /dev/null
> +++ b/xen/arch/arm/include/asm/arm64/sve.h
> @@ -0,0 +1,43 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +/*
> + * Arm SVE feature code
> + *
> + * Copyright (C) 2022 ARM Ltd.
> + */
> +
> +#ifndef _ARM_ARM64_SVE_H
> +#define _ARM_ARM64_SVE_H
> +
> +#define SVE_VL_MAX_BITS (2048U)
> +
> +/* Vector length must be multiple of 128 */
> +#define SVE_VL_MULTIPLE_VAL (128U)
> +
> +#ifdef CONFIG_ARM64_SVE
> +
> +register_t compute_max_zcr(void);
> +register_t vl_to_zcr(unsigned int vl);
> +
> +#else /* !CONFIG_ARM64_SVE */
> +
> +static inline register_t compute_max_zcr(void)
> +{
> +    return 0;
> +}
> +
> +static inline register_t vl_to_zcr(unsigned int vl)
> +{
> +    return 0;
> +}
> +
> +#endif /* CONFIG_ARM64_SVE */
> +
> +#endif /* _ARM_ARM64_SVE_H */
> +/*
> + * Local variables:
> + * mode: C
> + * c-file-style: "BSD"
> + * c-basic-offset: 4
> + * indent-tabs-mode: nil
> + * End:
> + */
> diff --git a/xen/arch/arm/include/asm/arm64/sysregs.h 
> b/xen/arch/arm/include/asm/arm64/sysregs.h
> index 463899951414..4cabb9eb4d5e 100644
> --- a/xen/arch/arm/include/asm/arm64/sysregs.h
> +++ b/xen/arch/arm/include/asm/arm64/sysregs.h
> @@ -24,6 +24,7 @@
> #define ICH_EISR_EL2              S3_4_C12_C11_3
> #define ICH_ELSR_EL2              S3_4_C12_C11_5
> #define ICH_VMCR_EL2              S3_4_C12_C11_7
> +#define ZCR_EL2                   S3_4_C1_C2_0
> 
> #define __LR0_EL2(x)              S3_4_C12_C12_ ## x
> #define __LR8_EL2(x)              S3_4_C12_C13_ ## x
> diff --git a/xen/arch/arm/include/asm/cpufeature.h 
> b/xen/arch/arm/include/asm/cpufeature.h
> index c62cf6293fd6..6d703e051906 100644
> --- a/xen/arch/arm/include/asm/cpufeature.h
> +++ b/xen/arch/arm/include/asm/cpufeature.h
> @@ -32,6 +32,12 @@
> #define cpu_has_thumbee   (boot_cpu_feature32(thumbee) == 1)
> #define cpu_has_aarch32   (cpu_has_arm || cpu_has_thumb)
> 
> +#ifdef CONFIG_ARM64_SVE
> +#define cpu_has_sve       (boot_cpu_feature64(sve) == 1)
> +#else
> +#define cpu_has_sve       (0)
> +#endif
> +
> #ifdef CONFIG_ARM_32
> #define cpu_has_gicv3     (boot_cpu_feature32(gic) >= 1)
> #define cpu_has_gentimer  (boot_cpu_feature32(gentimer) == 1)
> @@ -323,6 +329,14 @@ struct cpuinfo_arm {
>         };
>     } isa64;
> 
> +    union {
> +        register_t bits[1];
> +        struct {
> +            unsigned long len:4;
> +            unsigned long __res0:60;
> +        };
> +    } zcr64;
> +
>     struct {
>         register_t bits[1];
>     } zfr64;
> diff --git a/xen/arch/arm/include/asm/domain.h 
> b/xen/arch/arm/include/asm/domain.h
> index 2a51f0ca688e..e776ee704b7d 100644
> --- a/xen/arch/arm/include/asm/domain.h
> +++ b/xen/arch/arm/include/asm/domain.h
> @@ -190,6 +190,7 @@ struct arch_vcpu
>     register_t tpidrro_el0;
> 
>     /* HYP configuration */
> +    register_t cptr_el2;
>     register_t hcr_el2;
>     register_t mdcr_el2;
> 
> diff --git a/xen/arch/arm/include/asm/processor.h 
> b/xen/arch/arm/include/asm/processor.h
> index 54f253087718..bc683334125c 100644
> --- a/xen/arch/arm/include/asm/processor.h
> +++ b/xen/arch/arm/include/asm/processor.h
> @@ -582,6 +582,8 @@ void do_trap_guest_serror(struct cpu_user_regs *regs);
> 
> register_t get_default_hcr_flags(void);
> 
> +register_t get_default_cptr_flags(void);
> +
> /*
>  * Synchronize SError unless the feature is selected.
>  * This is relying on the SErrors are currently unmasked.
> diff --git a/xen/arch/arm/setup.c b/xen/arch/arm/setup.c
> index 1f26f67b90e3..5459cc4f5e62 100644
> --- a/xen/arch/arm/setup.c
> +++ b/xen/arch/arm/setup.c
> @@ -135,10 +135,11 @@ static void __init processor_id(void)
>            cpu_has_el2_32 ? "64+32" : cpu_has_el2_64 ? "64" : "No",
>            cpu_has_el1_32 ? "64+32" : cpu_has_el1_64 ? "64" : "No",
>            cpu_has_el0_32 ? "64+32" : cpu_has_el0_64 ? "64" : "No");
> -    printk("    Extensions:%s%s%s\n",
> +    printk("    Extensions:%s%s%s%s\n",
>            cpu_has_fp ? " FloatingPoint" : "",
>            cpu_has_simd ? " AdvancedSIMD" : "",
> -           cpu_has_gicv3 ? " GICv3-SysReg" : "");
> +           cpu_has_gicv3 ? " GICv3-SysReg" : "",
> +           cpu_has_sve ? " SVE" : "");
> 
>     /* Warn user if we find unknown floating-point features */
>     if ( cpu_has_fp && (boot_cpu_feature64(fp) >= 2) )
> diff --git a/xen/arch/arm/traps.c b/xen/arch/arm/traps.c
> index 061c92acbd68..a78a99ddadd0 100644
> --- a/xen/arch/arm/traps.c
> +++ b/xen/arch/arm/traps.c
> @@ -93,6 +93,21 @@ register_t get_default_hcr_flags(void)
>              HCR_TID3|HCR_TSC|HCR_TAC|HCR_SWIO|HCR_TIDCP|HCR_FB|HCR_TSW);
> }
> 
> +register_t get_default_cptr_flags(void)
> +{
> +    /*
> +     * Trap all coprocessor registers (0-13) except cp10 and
> +     * cp11 for VFP.
> +     *
> +     * /!\ All coprocessors except cp10 and cp11 cannot be used in Xen.
> +     *
> +     * On ARM64 the TCPx bits which we set here (0..9,12,13) are all
> +     * RES1, i.e. they would trap whether we did this write or not.
> +     */
> +    return  ((HCPTR_CP_MASK & ~(HCPTR_CP(10) | HCPTR_CP(11))) |
> +             HCPTR_TTA | HCPTR_TAM);
> +}
> +
> static enum {
>     SERRORS_DIVERSE,
>     SERRORS_PANIC,
> @@ -122,6 +137,7 @@ __initcall(update_serrors_cpu_caps);
> 
> void init_traps(void)
> {
> +    register_t cptr_bits = get_default_cptr_flags();
>     /*
>      * Setup Hyp vector base. Note they might get updated with the
>      * branch predictor hardening.
> @@ -135,17 +151,7 @@ void init_traps(void)
>     /* Trap CP15 c15 used for implementation defined registers */
>     WRITE_SYSREG(HSTR_T(15), HSTR_EL2);
> 
> -    /* Trap all coprocessor registers (0-13) except cp10 and
> -     * cp11 for VFP.
> -     *
> -     * /!\ All coprocessors except cp10 and cp11 cannot be used in Xen.
> -     *
> -     * On ARM64 the TCPx bits which we set here (0..9,12,13) are all
> -     * RES1, i.e. they would trap whether we did this write or not.
> -     */
> -    WRITE_SYSREG((HCPTR_CP_MASK & ~(HCPTR_CP(10) | HCPTR_CP(11))) |
> -                 HCPTR_TTA | HCPTR_TAM,
> -                 CPTR_EL2);
> +    WRITE_SYSREG(cptr_bits, CPTR_EL2);
> 
>     /*
>      * Configure HCR_EL2 with the bare minimum to run Xen until a guest
> -- 
> 2.34.1
> 
 
 
 | 
|  | Lists.xenproject.org is hosted with RackSpace, monitoring our |