[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [PATCH v5 05/12] arm/sve: save/restore SVE context switch
Hi Luca, > On 12 Apr 2023, at 11:49, Luca Fancellu <Luca.Fancellu@xxxxxxx> wrote: > > Save/restore context switch for SVE, allocate memory to contain > the Z0-31 registers whose length is maximum 2048 bits each and > FFR who can be maximum 256 bits, the allocated memory depends on > how many bits is the vector length for the domain and how many bits > are supported by the platform. > > Save P0-15 whose length is maximum 256 bits each, in this case the > memory used is from the fpregs field in struct vfp_state, > because V0-31 are part of Z0-31 and this space would have been > unused for SVE domain otherwise. > > Create zcr_el{1,2} fields in arch_vcpu, initialise zcr_el2 on vcpu > creation given the requested vector length and restore it on > context switch, save/restore ZCR_EL1 value as well. > > Remove headers from sve.c that are already included using > xen/sched.h. > > Signed-off-by: Luca Fancellu <luca.fancellu@xxxxxxx> > --- > Changes from v4: > - No changes > Changes from v3: > - don't use fixed len types when not needed (Jan) > - now VL is an encoded value, decode it before using. > Changes from v2: > - No changes > Changes from v1: > - No changes > Changes from RFC: > - Moved zcr_el2 field introduction in this patch, restore its > content inside sve_restore_state function. (Julien) > --- > xen/arch/arm/arm64/sve-asm.S | 141 +++++++++++++++++++++++ > xen/arch/arm/arm64/sve.c | 68 ++++++++++- > xen/arch/arm/arm64/vfp.c | 79 +++++++------ > xen/arch/arm/domain.c | 7 ++ > xen/arch/arm/include/asm/arm64/sve.h | 13 +++ > xen/arch/arm/include/asm/arm64/sysregs.h | 3 + > xen/arch/arm/include/asm/arm64/vfp.h | 10 ++ > xen/arch/arm/include/asm/domain.h | 2 + > 8 files changed, 284 insertions(+), 39 deletions(-) > > diff --git a/xen/arch/arm/arm64/sve-asm.S b/xen/arch/arm/arm64/sve-asm.S > index 4d1549344733..8c37d7bc95d5 100644 > --- a/xen/arch/arm/arm64/sve-asm.S > +++ b/xen/arch/arm/arm64/sve-asm.S > @@ -17,6 +17,18 @@ > .endif > .endm > > +.macro _sve_check_zreg znr > + .if (\znr) < 0 || (\znr) > 31 > + .error "Bad Scalable Vector Extension vector register number \znr." > + .endif > +.endm > + > +.macro _sve_check_preg pnr > + .if (\pnr) < 0 || (\pnr) > 15 > + .error "Bad Scalable Vector Extension predicate register number > \pnr." > + .endif > +.endm > + > .macro _check_num n, min, max > .if (\n) < (\min) || (\n) > (\max) > .error "Number \n out of range [\min,\max]" > @@ -26,6 +38,54 @@ > /* SVE instruction encodings for non-SVE-capable assemblers */ > /* (pre binutils 2.28, all kernel capable clang versions support SVE) */ > > +/* STR (vector): STR Z\nz, [X\nxbase, #\offset, MUL VL] */ > +.macro _sve_str_v nz, nxbase, offset=0 > + _sve_check_zreg \nz > + _check_general_reg \nxbase > + _check_num (\offset), -0x100, 0xff > + .inst 0xe5804000 \ > + | (\nz) \ > + | ((\nxbase) << 5) \ > + | (((\offset) & 7) << 10) \ > + | (((\offset) & 0x1f8) << 13) > +.endm > + > +/* LDR (vector): LDR Z\nz, [X\nxbase, #\offset, MUL VL] */ > +.macro _sve_ldr_v nz, nxbase, offset=0 > + _sve_check_zreg \nz > + _check_general_reg \nxbase > + _check_num (\offset), -0x100, 0xff > + .inst 0x85804000 \ > + | (\nz) \ > + | ((\nxbase) << 5) \ > + | (((\offset) & 7) << 10) \ > + | (((\offset) & 0x1f8) << 13) > +.endm > + > +/* STR (predicate): STR P\np, [X\nxbase, #\offset, MUL VL] */ > +.macro _sve_str_p np, nxbase, offset=0 > + _sve_check_preg \np > + _check_general_reg \nxbase > + _check_num (\offset), -0x100, 0xff > + .inst 0xe5800000 \ > + | (\np) \ > + | ((\nxbase) << 5) \ > + | (((\offset) & 7) << 10) \ > + | (((\offset) & 0x1f8) << 13) > +.endm > + > +/* LDR (predicate): LDR P\np, [X\nxbase, #\offset, MUL VL] */ > +.macro _sve_ldr_p np, nxbase, offset=0 > + _sve_check_preg \np > + _check_general_reg \nxbase > + _check_num (\offset), -0x100, 0xff > + .inst 0x85800000 \ > + | (\np) \ > + | ((\nxbase) << 5) \ > + | (((\offset) & 7) << 10) \ > + | (((\offset) & 0x1f8) << 13) > +.endm > + > /* RDVL X\nx, #\imm */ > .macro _sve_rdvl nx, imm > _check_general_reg \nx > @@ -35,11 +95,92 @@ > | (((\imm) & 0x3f) << 5) > .endm > > +/* RDFFR (unpredicated): RDFFR P\np.B */ > +.macro _sve_rdffr np > + _sve_check_preg \np > + .inst 0x2519f000 \ > + | (\np) > +.endm > + > +/* WRFFR P\np.B */ > +.macro _sve_wrffr np > + _sve_check_preg \np > + .inst 0x25289000 \ > + | ((\np) << 5) > +.endm > + > +.macro __for from:req, to:req > + .if (\from) == (\to) > + _for__body %\from > + .else > + __for %\from, %((\from) + ((\to) - (\from)) / 2) > + __for %((\from) + ((\to) - (\from)) / 2 + 1), %\to > + .endif > +.endm > + > +.macro _for var:req, from:req, to:req, insn:vararg > + .macro _for__body \var:req > + .noaltmacro > + \insn > + .altmacro > + .endm > + > + .altmacro > + __for \from, \to > + .noaltmacro > + > + .purgem _for__body > +.endm > + > +.macro sve_save nxzffrctx, nxpctx, save_ffr > + _for n, 0, 31, _sve_str_v \n, \nxzffrctx, \n - 32 > + _for n, 0, 15, _sve_str_p \n, \nxpctx, \n > + cbz \save_ffr, 1f > + _sve_rdffr 0 > + _sve_str_p 0, \nxzffrctx > + _sve_ldr_p 0, \nxpctx > + b 2f > +1: > + str xzr, [x\nxzffrctx] // Zero out FFR > +2: > +.endm > + > +.macro sve_load nxzffrctx, nxpctx, restore_ffr > + _for n, 0, 31, _sve_ldr_v \n, \nxzffrctx, \n - 32 > + cbz \restore_ffr, 1f > + _sve_ldr_p 0, \nxzffrctx > + _sve_wrffr 0 > +1: > + _for n, 0, 15, _sve_ldr_p \n, \nxpctx, \n > +.endm > + > /* Gets the current vector register size in bytes */ > GLOBAL(sve_get_hw_vl) > _sve_rdvl 0, 1 > ret > > +/* > + * Save the SVE context > + * > + * x0 - pointer to buffer for Z0-31 + FFR > + * x1 - pointer to buffer for P0-15 > + * x2 - Save FFR if non-zero > + */ > +GLOBAL(sve_save_ctx) > + sve_save 0, 1, x2 > + ret > + > +/* > + * Load the SVE context > + * > + * x0 - pointer to buffer for Z0-31 + FFR > + * x1 - pointer to buffer for P0-15 > + * x2 - Restore FFR if non-zero > + */ > +GLOBAL(sve_load_ctx) > + sve_load 0, 1, x2 > + ret > + > /* > * Local variables: > * mode: ASM > diff --git a/xen/arch/arm/arm64/sve.c b/xen/arch/arm/arm64/sve.c > index 78f7482619da..5485648850a0 100644 > --- a/xen/arch/arm/arm64/sve.c > +++ b/xen/arch/arm/arm64/sve.c > @@ -5,14 +5,29 @@ > * Copyright (C) 2022 ARM Ltd. > */ > > -#include <xen/types.h> > -#include <asm/cpufeature.h> > +#include <xen/sched.h> > +#include <xen/sizes.h> > #include <asm/arm64/sve.h> > -#include <asm/arm64/sysregs.h> > -#include <asm/processor.h> > -#include <asm/system.h> > > extern unsigned int sve_get_hw_vl(void); > +extern void sve_save_ctx(uint64_t *sve_ctx, uint64_t *pregs, int save_ffr); > +extern void sve_load_ctx(uint64_t const *sve_ctx, uint64_t const *pregs, > + int restore_ffr); > + > +static inline unsigned int sve_zreg_ctx_size(unsigned int vl) > +{ > + /* > + * Z0-31 registers size in bytes is computed from VL that is in bits, so > VL > + * in bytes is VL/8. > + */ > + return (vl / 8U) * 32U; > +} > + > +static inline unsigned int sve_ffrreg_ctx_size(unsigned int vl) > +{ > + /* FFR register size is VL/8, which is in bytes (VL/8)/8 */ > + return (vl / 64U); > +} > > register_t compute_max_zcr(void) > { > @@ -60,3 +75,46 @@ unsigned int get_sys_vl_len(void) > return ((system_cpuinfo.zcr64.bits[0] & ZCR_ELx_LEN_MASK) + 1U) * > SVE_VL_MULTIPLE_VAL; > } > + > +int sve_context_init(struct vcpu *v) > +{ > + unsigned int sve_vl_bits = sve_decode_vl(v->domain->arch.sve_vl); > + uint64_t *ctx = _xzalloc(sve_zreg_ctx_size(sve_vl_bits) + > + sve_ffrreg_ctx_size(sve_vl_bits), > + L1_CACHE_BYTES); > + > + if ( !ctx ) > + return -ENOMEM; > + > + v->arch.vfp.sve_context = ctx; > + > + return 0; > +} > + > +void sve_context_free(struct vcpu *v) > +{ > + xfree(v->arch.vfp.sve_context); > +} > + > +void sve_save_state(struct vcpu *v) > +{ > + unsigned int sve_vl_bits = sve_decode_vl(v->domain->arch.sve_vl); > + uint64_t *sve_ctx_zreg_end = v->arch.vfp.sve_context + > + (sve_zreg_ctx_size(sve_vl_bits) / sizeof(uint64_t)); You do quite some computation here for something which does not change during the life of the VM. Could we save the context_end in the vcpu instead and just do this computation on init and free only ? > + > + v->arch.zcr_el1 = READ_SYSREG(ZCR_EL1); > + > + sve_save_ctx(sve_ctx_zreg_end, v->arch.vfp.fpregs, 1); > +} > + > +void sve_restore_state(struct vcpu *v) > +{ > + unsigned int sve_vl_bits = sve_decode_vl(v->domain->arch.sve_vl); > + uint64_t *sve_ctx_zreg_end = v->arch.vfp.sve_context + > + (sve_zreg_ctx_size(sve_vl_bits) / sizeof(uint64_t)); Same as before. > + > + WRITE_SYSREG(v->arch.zcr_el1, ZCR_EL1); > + WRITE_SYSREG(v->arch.zcr_el2, ZCR_EL2); > + > + sve_load_ctx(sve_ctx_zreg_end, v->arch.vfp.fpregs, 1); > +} > diff --git a/xen/arch/arm/arm64/vfp.c b/xen/arch/arm/arm64/vfp.c > index 47885e76baae..2d0d7c2e6ddb 100644 > --- a/xen/arch/arm/arm64/vfp.c > +++ b/xen/arch/arm/arm64/vfp.c > @@ -2,29 +2,35 @@ > #include <asm/processor.h> > #include <asm/cpufeature.h> > #include <asm/vfp.h> > +#include <asm/arm64/sve.h> > > void vfp_save_state(struct vcpu *v) > { > if ( !cpu_has_fp ) > return; > > - asm volatile("stp q0, q1, [%1, #16 * 0]\n\t" > - "stp q2, q3, [%1, #16 * 2]\n\t" > - "stp q4, q5, [%1, #16 * 4]\n\t" > - "stp q6, q7, [%1, #16 * 6]\n\t" > - "stp q8, q9, [%1, #16 * 8]\n\t" > - "stp q10, q11, [%1, #16 * 10]\n\t" > - "stp q12, q13, [%1, #16 * 12]\n\t" > - "stp q14, q15, [%1, #16 * 14]\n\t" > - "stp q16, q17, [%1, #16 * 16]\n\t" > - "stp q18, q19, [%1, #16 * 18]\n\t" > - "stp q20, q21, [%1, #16 * 20]\n\t" > - "stp q22, q23, [%1, #16 * 22]\n\t" > - "stp q24, q25, [%1, #16 * 24]\n\t" > - "stp q26, q27, [%1, #16 * 26]\n\t" > - "stp q28, q29, [%1, #16 * 28]\n\t" > - "stp q30, q31, [%1, #16 * 30]\n\t" > - : "=Q" (*v->arch.vfp.fpregs) : "r" (v->arch.vfp.fpregs)); > + if ( is_sve_domain(v->domain) ) > + sve_save_state(v); > + else > + { > + asm volatile("stp q0, q1, [%1, #16 * 0]\n\t" > + "stp q2, q3, [%1, #16 * 2]\n\t" > + "stp q4, q5, [%1, #16 * 4]\n\t" > + "stp q6, q7, [%1, #16 * 6]\n\t" > + "stp q8, q9, [%1, #16 * 8]\n\t" > + "stp q10, q11, [%1, #16 * 10]\n\t" > + "stp q12, q13, [%1, #16 * 12]\n\t" > + "stp q14, q15, [%1, #16 * 14]\n\t" > + "stp q16, q17, [%1, #16 * 16]\n\t" > + "stp q18, q19, [%1, #16 * 18]\n\t" > + "stp q20, q21, [%1, #16 * 20]\n\t" > + "stp q22, q23, [%1, #16 * 22]\n\t" > + "stp q24, q25, [%1, #16 * 24]\n\t" > + "stp q26, q27, [%1, #16 * 26]\n\t" > + "stp q28, q29, [%1, #16 * 28]\n\t" > + "stp q30, q31, [%1, #16 * 30]\n\t" > + : "=Q" (*v->arch.vfp.fpregs) : "r" > (v->arch.vfp.fpregs)); > + } > > v->arch.vfp.fpsr = READ_SYSREG(FPSR); > v->arch.vfp.fpcr = READ_SYSREG(FPCR); > @@ -37,23 +43,28 @@ void vfp_restore_state(struct vcpu *v) > if ( !cpu_has_fp ) > return; > > - asm volatile("ldp q0, q1, [%1, #16 * 0]\n\t" > - "ldp q2, q3, [%1, #16 * 2]\n\t" > - "ldp q4, q5, [%1, #16 * 4]\n\t" > - "ldp q6, q7, [%1, #16 * 6]\n\t" > - "ldp q8, q9, [%1, #16 * 8]\n\t" > - "ldp q10, q11, [%1, #16 * 10]\n\t" > - "ldp q12, q13, [%1, #16 * 12]\n\t" > - "ldp q14, q15, [%1, #16 * 14]\n\t" > - "ldp q16, q17, [%1, #16 * 16]\n\t" > - "ldp q18, q19, [%1, #16 * 18]\n\t" > - "ldp q20, q21, [%1, #16 * 20]\n\t" > - "ldp q22, q23, [%1, #16 * 22]\n\t" > - "ldp q24, q25, [%1, #16 * 24]\n\t" > - "ldp q26, q27, [%1, #16 * 26]\n\t" > - "ldp q28, q29, [%1, #16 * 28]\n\t" > - "ldp q30, q31, [%1, #16 * 30]\n\t" > - : : "Q" (*v->arch.vfp.fpregs), "r" (v->arch.vfp.fpregs)); > + if ( is_sve_domain(v->domain) ) > + sve_restore_state(v); > + else > + { > + asm volatile("ldp q0, q1, [%1, #16 * 0]\n\t" > + "ldp q2, q3, [%1, #16 * 2]\n\t" > + "ldp q4, q5, [%1, #16 * 4]\n\t" > + "ldp q6, q7, [%1, #16 * 6]\n\t" > + "ldp q8, q9, [%1, #16 * 8]\n\t" > + "ldp q10, q11, [%1, #16 * 10]\n\t" > + "ldp q12, q13, [%1, #16 * 12]\n\t" > + "ldp q14, q15, [%1, #16 * 14]\n\t" > + "ldp q16, q17, [%1, #16 * 16]\n\t" > + "ldp q18, q19, [%1, #16 * 18]\n\t" > + "ldp q20, q21, [%1, #16 * 20]\n\t" > + "ldp q22, q23, [%1, #16 * 22]\n\t" > + "ldp q24, q25, [%1, #16 * 24]\n\t" > + "ldp q26, q27, [%1, #16 * 26]\n\t" > + "ldp q28, q29, [%1, #16 * 28]\n\t" > + "ldp q30, q31, [%1, #16 * 30]\n\t" > + : : "Q" (*v->arch.vfp.fpregs), "r" > (v->arch.vfp.fpregs)); > + } > > WRITE_SYSREG(v->arch.vfp.fpsr, FPSR); > WRITE_SYSREG(v->arch.vfp.fpcr, FPCR); > diff --git a/xen/arch/arm/domain.c b/xen/arch/arm/domain.c > index 769fae8fe25e..060fc30bbb5d 100644 > --- a/xen/arch/arm/domain.c > +++ b/xen/arch/arm/domain.c > @@ -552,7 +552,12 @@ int arch_vcpu_create(struct vcpu *v) > > v->arch.cptr_el2 = get_default_cptr_flags(); > if ( is_sve_domain(v->domain) ) > + { > + if ( (rc = sve_context_init(v)) != 0 ) > + goto fail; > v->arch.cptr_el2 &= ~HCPTR_CP(8); > + v->arch.zcr_el2 = vl_to_zcr(sve_decode_vl(v->domain->arch.sve_vl)); > + } > > v->arch.hcr_el2 = get_default_hcr_flags(); > > @@ -582,6 +587,8 @@ fail: > > void arch_vcpu_destroy(struct vcpu *v) > { > + if ( is_sve_domain(v->domain) ) > + sve_context_free(v); > vcpu_timer_destroy(v); > vcpu_vgic_free(v); > free_xenheap_pages(v->arch.stack, STACK_ORDER); > diff --git a/xen/arch/arm/include/asm/arm64/sve.h > b/xen/arch/arm/include/asm/arm64/sve.h > index a4c53e3e8e2e..fc162c9d2cf7 100644 > --- a/xen/arch/arm/include/asm/arm64/sve.h > +++ b/xen/arch/arm/include/asm/arm64/sve.h > @@ -24,6 +24,10 @@ static inline unsigned int sve_decode_vl(unsigned int > sve_vl) > register_t compute_max_zcr(void); > register_t vl_to_zcr(unsigned int vl); > unsigned int get_sys_vl_len(void); > +int sve_context_init(struct vcpu *v); > +void sve_context_free(struct vcpu *v); > +void sve_save_state(struct vcpu *v); > +void sve_restore_state(struct vcpu *v); > > #else /* !CONFIG_ARM64_SVE */ > > @@ -42,6 +46,15 @@ static inline unsigned int get_sys_vl_len(void) > return 0; > } > > +static inline int sve_context_init(struct vcpu *v) > +{ > + return 0; > +} > + > +static inline void sve_context_free(struct vcpu *v) {} > +static inline void sve_save_state(struct vcpu *v) {} > +static inline void sve_restore_state(struct vcpu *v) {} > + > #endif /* CONFIG_ARM64_SVE */ > > #endif /* _ARM_ARM64_SVE_H */ > diff --git a/xen/arch/arm/include/asm/arm64/sysregs.h > b/xen/arch/arm/include/asm/arm64/sysregs.h > index 4cabb9eb4d5e..3fdeb9d8cdef 100644 > --- a/xen/arch/arm/include/asm/arm64/sysregs.h > +++ b/xen/arch/arm/include/asm/arm64/sysregs.h > @@ -88,6 +88,9 @@ > #ifndef ID_AA64ISAR2_EL1 > #define ID_AA64ISAR2_EL1 S3_0_C0_C6_2 > #endif > +#ifndef ZCR_EL1 > +#define ZCR_EL1 S3_0_C1_C2_0 > +#endif > What about ZCR_EL2 ? > /* ID registers (imported from arm64/include/asm/sysreg.h in Linux) */ > > diff --git a/xen/arch/arm/include/asm/arm64/vfp.h > b/xen/arch/arm/include/asm/arm64/vfp.h > index e6e8c363bc16..8af714cb8ecc 100644 > --- a/xen/arch/arm/include/asm/arm64/vfp.h > +++ b/xen/arch/arm/include/asm/arm64/vfp.h > @@ -6,7 +6,17 @@ > > struct vfp_state > { > + /* > + * When SVE is enabled for the guest, fpregs memory will be used to > + * save/restore P0-P15 registers, otherwise it will be used for the > V0-V31 > + * registers. > + */ > uint64_t fpregs[64] __vfp_aligned; > + /* > + * When SVE is enabled for the guest, sve_context contains memory to > + * save/restore Z0-Z31 registers and FFR. > + */ > + uint64_t *sve_context; > register_t fpcr; > register_t fpexc32_el2; > register_t fpsr; > diff --git a/xen/arch/arm/include/asm/domain.h > b/xen/arch/arm/include/asm/domain.h > index 78cc2da3d4e5..6b5ec3bd0680 100644 > --- a/xen/arch/arm/include/asm/domain.h > +++ b/xen/arch/arm/include/asm/domain.h > @@ -195,6 +195,8 @@ struct arch_vcpu > register_t tpidrro_el0; > > /* HYP configuration */ > + register_t zcr_el1; > + register_t zcr_el2; > register_t cptr_el2; > register_t hcr_el2; > register_t mdcr_el2; > -- > 2.34.1 > Cheers Bertrand
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |