|
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [Xen-devel] [PATCH 4/4] x86/hyperv: L0 assisted TLB flush
On Wed, Feb 12, 2020 at 04:09:18PM +0000, Wei Liu wrote:
> Implement L0 assisted TLB flush for Xen on Hyper-V. It takes advantage
> of several hypercalls:
>
> * HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST
> * HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX
> * HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE
> * HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX
>
> Pick the most efficient hypercalls available.
>
> Signed-off-by: Wei Liu <liuwe@xxxxxxxxxxxxx>
> ---
> xen/arch/x86/guest/hyperv/Makefile | 1 +
> xen/arch/x86/guest/hyperv/private.h | 9 ++
> xen/arch/x86/guest/hyperv/tlb.c | 172 +++++++++++++++++++++++++++-
> xen/arch/x86/guest/hyperv/util.c | 72 ++++++++++++
> 4 files changed, 253 insertions(+), 1 deletion(-)
> create mode 100644 xen/arch/x86/guest/hyperv/util.c
>
> diff --git a/xen/arch/x86/guest/hyperv/Makefile
> b/xen/arch/x86/guest/hyperv/Makefile
> index 18902c33e9..0e39410968 100644
> --- a/xen/arch/x86/guest/hyperv/Makefile
> +++ b/xen/arch/x86/guest/hyperv/Makefile
> @@ -1,2 +1,3 @@
> obj-y += hyperv.o
> obj-y += tlb.o
> +obj-y += util.o
> diff --git a/xen/arch/x86/guest/hyperv/private.h
> b/xen/arch/x86/guest/hyperv/private.h
> index 78e52f74ce..311f060495 100644
> --- a/xen/arch/x86/guest/hyperv/private.h
> +++ b/xen/arch/x86/guest/hyperv/private.h
> @@ -24,12 +24,21 @@
>
> #include <xen/cpumask.h>
> #include <xen/percpu.h>
> +#include <xen/types.h>
>
> DECLARE_PER_CPU(void *, hv_input_page);
> DECLARE_PER_CPU(void *, hv_vp_assist);
> DECLARE_PER_CPU(uint32_t, hv_vp_index);
>
> +static inline uint32_t hv_vp_index(int cpu)
unsigned int for cpu.
> +{
> + return per_cpu(hv_vp_index, cpu);
> +}
> +
> int hyperv_flush_tlb(const cpumask_t *mask, const void *va,
> unsigned int flags);
>
> +/* Returns number of banks, -ev if error */
> +int cpumask_to_vpset(struct hv_vpset *vpset, const cpumask_t *mask);
> +
> #endif /* __XEN_HYPERV_PRIVIATE_H__ */
> diff --git a/xen/arch/x86/guest/hyperv/tlb.c b/xen/arch/x86/guest/hyperv/tlb.c
> index 48f527229e..99b789d9e9 100644
> --- a/xen/arch/x86/guest/hyperv/tlb.c
> +++ b/xen/arch/x86/guest/hyperv/tlb.c
> @@ -19,15 +19,185 @@
> * Copyright (c) 2020 Microsoft.
> */
>
> +#include <xen/cpu.h>
> #include <xen/cpumask.h>
> #include <xen/errno.h>
>
> +#include <asm/guest/hyperv.h>
> +#include <asm/guest/hyperv-hcall.h>
> +#include <asm/guest/hyperv-tlfs.h>
> +
> #include "private.h"
>
> +/*
> + * It is possible to encode up to 4096 pages using the lower 12 bits
> + * in an element of gva_list
> + */
> +#define HV_TLB_FLUSH_UNIT (4096 * PAGE_SIZE)
> +#define ORDER_TO_BYTES(order) ((1ul << (order)) * PAGE_SIZE)
There are already some conversion functions in xen/mm.h
(get_order_from_{bytes/pages}), maybe you could add a
get_bytes_from_order helper there?
> +
> +static unsigned int fill_gva_list(uint64_t *gva_list, const void *va,
> + unsigned int order)
> +{
> + unsigned long start = (unsigned long)va;
> + unsigned long end = start + ORDER_TO_BYTES(order) - 1;
> + unsigned int n = 0;
> +
> + do {
> + unsigned long remain = end > start ? end - start : 0;
I don't think you can get here with end == start?
As that's the condition of the loop, and order 0 is going to set
end = start + 4096 - 1.
> +
> + gva_list[n] = start & PAGE_MASK;
> +
> + /*
> + * Use lower 12 bits to encode the number of additional pages
> + * to flush
> + */
> + if ( remain >= HV_TLB_FLUSH_UNIT )
> + {
> + gva_list[n] |= ~PAGE_MASK;
> + start += HV_TLB_FLUSH_UNIT;
> + }
> + else if ( remain )
> + {
> + gva_list[n] |= (remain - 1) >> PAGE_SHIFT;
> + start = end;
> + }
> +
> + n++;
> + } while ( start < end );
> +
> + return n;
> +}
> +
> +static uint64_t flush_tlb_ex(const cpumask_t *mask, const void *va,
> + unsigned int flags)
> +{
> + struct hv_tlb_flush_ex *flush = this_cpu(hv_input_page);
> + int nr_banks;
> + unsigned int max_gvas;
> + unsigned int order = flags & FLUSH_ORDER_MASK;
> + uint64_t ret;
> +
> + ASSERT(flush);
> + ASSERT(!local_irq_is_enabled());
Can you turn this into an if condition with ASSERT_UNREACHABLE and
return ~0ULL? (as I think that signals an error).
> +
> + if ( !(ms_hyperv.hints & HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED) )
> + return ~0ULL;
> +
> + flush->address_space = 0;
> + flush->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES;
> + if ( !(flags & FLUSH_TLB_GLOBAL) )
> + flush->flags |= HV_FLUSH_NON_GLOBAL_MAPPINGS_ONLY;
> +
> + flush->hv_vp_set.valid_bank_mask = 0;
> + flush->hv_vp_set.format = HV_GENERIC_SET_SPARSE_4K;
> +
> + nr_banks = cpumask_to_vpset(&flush->hv_vp_set, mask);
> + if ( nr_banks < 0 )
> + return ~0ULL;
> +
> + max_gvas =
> + (PAGE_SIZE - sizeof(*flush) - nr_banks *
> + sizeof(flush->hv_vp_set.bank_contents[0])) /
> + sizeof(uint64_t); /* gva is represented as uint64_t */
> +
> + /*
> + * Flush the entire address space if va is NULL or if there is not
> + * enough space for gva_list.
> + */
> + if ( !va || (ORDER_TO_BYTES(order) / HV_TLB_FLUSH_UNIT) > max_gvas )
> + ret = hv_do_rep_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX, 0,
> + nr_banks, virt_to_maddr(flush), 0);
> + else
> + {
> + uint64_t *gva_list = (uint64_t *)flush + sizeof(*flush) + nr_banks;
Don't you need nr_banks * sizeof(flush->hv_vp_set.bank_contents) in
order to calculate the position of the gva_list?
> + unsigned int gvas = fill_gva_list(gva_list, va, order);
> +
> + ret = hv_do_rep_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX,
> + gvas, nr_banks, virt_to_maddr(flush), 0);
> + }
> +
> + return ret;
> +}
> +
> int hyperv_flush_tlb(const cpumask_t *mask, const void *va,
> unsigned int flags)
> {
> - return -EOPNOTSUPP;
> + unsigned long irq_flags;
> + struct hv_tlb_flush *flush = this_cpu(hv_input_page);
> + uint64_t ret;
> + unsigned int order = flags & FLUSH_ORDER_MASK;
> + unsigned int max_gvas;
> +
> + ASSERT(flush);
> + ASSERT(!cpumask_empty(mask));
> +
> + local_irq_save(irq_flags);
> +
> + flush->address_space = 0;
> + flush->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES;
> + flush->processor_mask = 0;
> + if ( !(flags & FLUSH_TLB_GLOBAL) )
> + flush->flags |= HV_FLUSH_NON_GLOBAL_MAPPINGS_ONLY;
> +
> + if ( cpumask_equal(mask, &cpu_online_map) )
> + flush->flags |= HV_FLUSH_ALL_PROCESSORS;
> + else
> + {
> + int cpu;
unsigned int.
> +
> + /*
> + * Normally VP indices are in ascending order and match Xen's
> + * idea of CPU ids. Check the last index to see if VP index is
> + * >= 64. If so, we can skip setting up parameters for
> + * non-applicable hypercalls without looking further.
> + */
> + if ( hv_vp_index(cpumask_last(mask)) >= 64 )
> + goto do_ex_hypercall;
> +
> + for_each_cpu ( cpu, mask )
> + {
> + uint32_t vpid = hv_vp_index(cpu);
> +
> + if ( vpid > ms_hyperv.max_vp_index )
> + {
> + local_irq_restore(irq_flags);
> + return -ENXIO;
> + }
> +
> + if ( vpid >= 64 )
> + goto do_ex_hypercall;
> +
> + __set_bit(vpid, &flush->processor_mask);
> + }
> + }
> +
> + max_gvas = (PAGE_SIZE - sizeof(*flush)) / sizeof(flush->gva_list[0]);
> +
> + /*
> + * Flush the entire address space if va is NULL or if there is not
> + * enough space for gva_list.
> + */
> + if ( !va || (ORDER_TO_BYTES(order) / HV_TLB_FLUSH_UNIT) > max_gvas )
> + ret = hv_do_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE,
> + virt_to_maddr(flush), 0);
> + else
> + {
> + unsigned int gvas = fill_gva_list(flush->gva_list, va, order);
> +
> + ret = hv_do_rep_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST, gvas, 0,
> + virt_to_maddr(flush), 0);
> + }
> +
> + goto done;
> +
> + do_ex_hypercall:
> + ret = flush_tlb_ex(mask, va, flags);
> +
> + done:
> + local_irq_restore(irq_flags);
> +
> + return ret & HV_HYPERCALL_RESULT_MASK;
Will this return an error code that uses the same space as Xen's errno
values?
> }
>
> /*
> diff --git a/xen/arch/x86/guest/hyperv/util.c
> b/xen/arch/x86/guest/hyperv/util.c
> new file mode 100644
> index 0000000000..9d0b5f4a46
> --- /dev/null
> +++ b/xen/arch/x86/guest/hyperv/util.c
> @@ -0,0 +1,72 @@
> +/******************************************************************************
> + * arch/x86/guest/hyperv/util.c
> + *
> + * Hyper-V utility functions
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License as published by
> + * the Free Software Foundation; either version 2 of the License, or
> + * (at your option) any later version.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
> + * GNU General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License
> + * along with this program; If not, see <http://www.gnu.org/licenses/>.
> + *
> + * Copyright (c) 2020 Microsoft.
> + */
> +
> +#include <xen/cpu.h>
> +#include <xen/cpumask.h>
> +#include <xen/errno.h>
> +
> +#include <asm/guest/hyperv.h>
> +#include <asm/guest/hyperv-tlfs.h>
> +
> +#include "private.h"
> +
> +int cpumask_to_vpset(struct hv_vpset *vpset,
> + const cpumask_t *mask)
> +{
> + int nr = 1, cpu, vcpu_bank, vcpu_offset;
> + int max_banks = ms_hyperv.max_vp_index / 64;
I think nr whats to be int (to match the function return type), but
the rest should be unsigned ints, specially because they are used as
array indexes.
> +
> + /* Up to 64 banks can be represented by valid_bank_mask */
> + if ( max_banks >= 64 )
> + return -1;
E2BIG or some such?
> +
> + /* Clear all banks to avoid flushing unwanted CPUs */
> + for ( vcpu_bank = 0; vcpu_bank <= max_banks; vcpu_bank++ )
> + vpset->bank_contents[vcpu_bank] = 0;
> +
> + vpset->valid_bank_mask = 0;
> +
> + for_each_cpu ( cpu, mask )
> + {
> + int vcpu = hv_vp_index(cpu);
unsigned int or uint32_t (which is the tyupe that hv_vp_index
returns).
Thanks, Roger.
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxxx
https://lists.xenproject.org/mailman/listinfo/xen-devel
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |