Xen project Mailing List

[PATCH v4 02/18] xen/riscv: introduce VMID allocation and manegement

Current implementation is based on x86's way to allocate VMIDs: VMIDs partition the physical TLB. In the current implementation VMIDs are introduced to reduce the number of TLB flushes. Each time a guest-physical address space changes, instead of flushing the TLB, a new VMID is assigned. This reduces the number of TLB flushes to at most 1/#VMIDs. The biggest advantage is that hot parts of the hypervisor's code and data retain in the TLB. VMIDs are a hart-local resource. As preemption of VMIDs is not possible, VMIDs are assigned in a round-robin scheme. To minimize the overhead of VMID invalidation, at the time of a TLB flush, VMIDs are tagged with a 64-bit generation. Only on a generation overflow the code needs to invalidate all VMID information stored at the VCPUs with are run on the specific physical processor. When this overflow appears VMID usage is disabled to retain correctness. Only minor changes are made compared to the x86 implementation. These include using RISC-V-specific terminology, adding a check to ensure the type used for storing the VMID has enough bits to hold VMIDLEN, and introducing a new function vmidlen_detect() to clarify the VMIDLEN value, rename stuff connected to VMID enable/disable to "VMID use enable/disable". Signed-off-by: Oleksii Kurochko <oleksii.kurochko@xxxxxxxxx> --- Changes in V4: - s/guest's virtual/guest-physical in the comment inside vmid.c and in commit message. - Drop x86-related numbers in the comment about "Sketch of the Implementation". - s/__read_only/__ro_after_init in declaration of opt_vmid_enabled. - s/hart_vmid_generation/generation. - Update vmidlen_detect() to work with unsigned int type for vmid_bits variable. - Drop old variable im vmdidlen_detetct, it seems like there is no any reason to restore old value of hgatp with no guest running on a hart yet. - Update the comment above local_hfence_gvma_all() in vmidlen_detect(). - s/max_availalbe_bits/max_available_bits. - use BITS_PER_BYTE, instead of "<< 3". - Add BUILD_BUILD_BUG_ON() instead run-time check that an amount of set bits can be held in vmid_data->max_vmid. - Apply changes from the patch "x86/HVM: polish hvm_asid_init() a little" here (changes connected to g_disabled) with the following minor changes: Update the printk() message to "VMIDs use is...". Rename g_disabled to g_vmid_used. - Rename member 'disabled' of vmid_data structure to used. - Use gstage_mode to properly detect VMIDLEN. --- Changes in V3: - Reimplemnt VMID allocation similar to what x86 has implemented. --- Changes in V2: - New patch. --- xen/arch/riscv/Makefile | 1 + xen/arch/riscv/include/asm/domain.h | 6 + xen/arch/riscv/include/asm/vmid.h | 8 ++ xen/arch/riscv/setup.c | 3 + xen/arch/riscv/vmid.c | 193 ++++++++++++++++++++++++++++ 5 files changed, 211 insertions(+) create mode 100644 xen/arch/riscv/include/asm/vmid.h create mode 100644 xen/arch/riscv/vmid.c diff --git a/xen/arch/riscv/Makefile b/xen/arch/riscv/Makefile index 264e265699..e2499210c8 100644 --- a/xen/arch/riscv/Makefile +++ b/xen/arch/riscv/Makefile @@ -17,6 +17,7 @@ obj-y += smpboot.o obj-y += stubs.o obj-y += time.o obj-y += traps.o +obj-y += vmid.o obj-y += vm_event.o $(TARGET): $(TARGET)-syms diff --git a/xen/arch/riscv/include/asm/domain.h b/xen/arch/riscv/include/asm/domain.h index c3d965a559..aac1040658 100644 --- a/xen/arch/riscv/include/asm/domain.h +++ b/xen/arch/riscv/include/asm/domain.h @@ -5,6 +5,11 @@ #include <xen/xmalloc.h> #include <public/hvm/params.h> +struct vcpu_vmid { + uint64_t generation; + uint16_t vmid; +}; + struct hvm_domain { uint64_t params[HVM_NR_PARAMS]; @@ -14,6 +19,7 @@ struct arch_vcpu_io { }; struct arch_vcpu { + struct vcpu_vmid vmid; }; struct arch_domain { diff --git a/xen/arch/riscv/include/asm/vmid.h b/xen/arch/riscv/include/asm/vmid.h new file mode 100644 index 0000000000..2f1f7ec9a2 --- /dev/null +++ b/xen/arch/riscv/include/asm/vmid.h @@ -0,0 +1,8 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#ifndef ASM_RISCV_VMID_H +#define ASM_RISCV_VMID_H + +void vmid_init(void); + +#endif /* ASM_RISCV_VMID_H */ diff --git a/xen/arch/riscv/setup.c b/xen/arch/riscv/setup.c index 87ee96bdb3..3c9e6a9ee3 100644 --- a/xen/arch/riscv/setup.c +++ b/xen/arch/riscv/setup.c @@ -26,6 +26,7 @@ #include <asm/sbi.h> #include <asm/setup.h> #include <asm/traps.h> +#include <asm/vmid.h> /* Xen stack for bringing up the first CPU. */ unsigned char __initdata cpu0_boot_stack[STACK_SIZE] @@ -151,6 +152,8 @@ void __init noreturn start_xen(unsigned long bootcpu_id, gstage_mode_detect(); + vmid_init(); + printk("All set up\n"); machine_halt(); diff --git a/xen/arch/riscv/vmid.c b/xen/arch/riscv/vmid.c new file mode 100644 index 0000000000..b94d082c82 --- /dev/null +++ b/xen/arch/riscv/vmid.c @@ -0,0 +1,193 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#include <xen/domain.h> +#include <xen/init.h> +#include <xen/sections.h> +#include <xen/lib.h> +#include <xen/param.h> +#include <xen/percpu.h> + +#include <asm/atomic.h> +#include <asm/csr.h> +#include <asm/flushtlb.h> +#include <asm/p2m.h> + +/* Xen command-line option to enable VMIDs */ +static bool __ro_after_init opt_vmid_use_enabled = true; +boolean_param("vmid", opt_vmid_use_enabled); + +/* + * VMIDs partition the physical TLB. In the current implementation VMIDs are + * introduced to reduce the number of TLB flushes. Each time a guest-physical + * address space changes, instead of flushing the TLB, a new VMID is + * assigned. This reduces the number of TLB flushes to at most 1/#VMIDs. + * The biggest advantage is that hot parts of the hypervisor's code and data + * retain in the TLB. + * + * Sketch of the Implementation: + * + * VMIDs are a hart-local resource. As preemption of VMIDs is not possible, + * VMIDs are assigned in a round-robin scheme. To minimize the overhead of + * VMID invalidation, at the time of a TLB flush, VMIDs are tagged with a + * 64-bit generation. Only on a generation overflow the code needs to + * invalidate all VMID information stored at the VCPUs with are run on the + * specific physical processor. When this overflow appears VMID usage is + * disabled to retain correctness. + */ + +/* Per-Hart VMID management. */ +struct vmid_data { + uint64_t generation; + uint16_t next_vmid; + uint16_t max_vmid; + bool used; +}; + +static DEFINE_PER_CPU(struct vmid_data, vmid_data); + +static unsigned int vmidlen_detect(void) +{ + unsigned int vmid_bits; + + /* + * According to the RISC-V Privileged Architecture Spec: + * When MODE=Bare, guest physical addresses are equal to supervisor + * physical addresses, and there is no further memory protection + * for a guest virtual machine beyond the physical memory protection + * scheme described in Section 3.7. + * In this case, the remaining fields in hgatp must be set to zeros. + * Thereby it is necessary to set gstage_mode not equal to Bare. + */ + ASSERT(gstage_mode != HGATP_MODE_OFF); + csr_write(CSR_HGATP, + MASK_INSR(gstage_mode, HGATP_MODE_MASK) | HGATP_VMID_MASK); + vmid_bits = MASK_EXTR(csr_read(CSR_HGATP), HGATP_VMID_MASK); + vmid_bits = flsl(vmid_bits); + csr_write(CSR_HGATP, _AC(0, UL)); + + /* + * From RISC-V spec: + * Speculative executions of the address-translation algorithm behave as + * non-speculative executions of the algorithm do, except that they must + * not set the dirty bit for a PTE, they must not trigger an exception, + * and they must not create address-translation cache entries if those + * entries would have been invalidated by any SFENCE.VMA instruction + * executed by the hart since the speculative execution of the algorithm + * began. + * + * Also, despite of the fact here it is mentioned that when V=0 two-stage + * address translation is inactivated: + * The current virtualization mode, denoted V, indicates whether the hart + * is currently executing in a guest. When V=1, the hart is either in + * virtual S-mode (VS-mode), or in virtual U-mode (VU-mode) atop a guest + * OS running in VS-mode. When V=0, the hart is either in M-mode, in + * HS-mode, or in U-mode atop an OS running in HS-mode. The + * virtualization mode also indicates whether two-stage address + * translation is active (V=1) or inactive (V=0). + * But on the same side, writing to hgatp register activates it: + * The hgatp register is considered active for the purposes of + * the address-translation algorithm unless the effective privilege mode + * is U and hstatus.HU=0. + * + * Thereby it leaves some room for speculation even in this stage of boot, + * so it could be that we polluted local TLB so flush all guest TLB. + */ + local_hfence_gvma_all(); + + return vmid_bits; +} + +void vmid_init(void) +{ + static int8_t g_vmid_used = -1; + + unsigned int vmid_len = vmidlen_detect(); + struct vmid_data *data = &this_cpu(vmid_data); + + BUILD_BUG_ON((HGATP_VMID_MASK >> HGATP_VMID_SHIFT) > + (BIT((sizeof(data->max_vmid) * BITS_PER_BYTE), UL) - 1)); + + data->max_vmid = BIT(vmid_len, U) - 1; + data->used = !opt_vmid_use_enabled || (vmid_len <= 1); + + if ( g_vmid_used < 0 ) + { + g_vmid_used = data->used; + printk("VMIDs use is %sabled\n", data->used ? "dis" : "en"); + } + else if ( g_vmid_used != data->used ) + printk("CPU%u: VMIDs use is %sabled\n", smp_processor_id(), + data->used ? "dis" : "en"); + + /* Zero indicates 'invalid generation', so we start the count at one. */ + data->generation = 1; + + /* Zero indicates 'VMIDs use disabled', so we start the count at one. */ + data->next_vmid = 1; +} + +void vcpu_vmid_flush_vcpu(struct vcpu *v) +{ + write_atomic(&v->arch.vmid.generation, 0); +} + +void vmid_flush_hart(void) +{ + struct vmid_data *data = &this_cpu(vmid_data); + + if ( data->used ) + return; + + if ( likely(++data->generation != 0) ) + return; + + /* + * VMID generations are 64 bit. Overflow of generations never happens. + * For safety, we simply disable ASIDs, so correctness is established; it + * only runs a bit slower. + */ + printk("%s: VMID generation overrun. Disabling VMIDs.\n", __func__); + data->used = 1; +} + +bool vmid_handle_vmenter(struct vcpu_vmid *vmid) +{ + struct vmid_data *data = &this_cpu(vmid_data); + + /* Test if VCPU has valid VMID. */ + if ( read_atomic(&vmid->generation) == data->generation ) + return 0; + + /* If there are no free VMIDs, need to go to a new generation. */ + if ( unlikely(data->next_vmid > data->max_vmid) ) + { + vmid_flush_hart(); + data->next_vmid = 1; + if ( data->used ) + goto disabled; + } + + /* Now guaranteed to be a free VMID. */ + vmid->vmid = data->next_vmid++; + write_atomic(&vmid->generation, data->generation); + + /* + * When we assign VMID 1, flush all TLB entries as we are starting a new + * generation, and all old VMID allocations are now stale. + */ + return (vmid->vmid == 1); + + disabled: + vmid->vmid = 0; + return 0; +} + +/* + * Local variables: + * mode: C + * c-file-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ -- 2.51.0

©2013 Xen Project, A Linux Foundation Collaborative Project. All Rights Reserved.
Linux Foundation is a registered trademark of The Linux Foundation.
Xen Project is a trademark of The Linux Foundation.