Xen project Mailing List

[Xen-devel] [RFC PATCH KERNEL 1/4] x86/xen: start untangling PV and PVHVM guest support code

From: Vitaly Kuznetsov <vkuznets@xxxxxxxxxx>

Date: Mon, 14 Nov 2016 18:17:46 +0100

Cc: Juergen Gross <jgross@xxxxxxxx>, Boris Ostrovsky <boris.ostrovsky@xxxxxxxxxx>, x86@xxxxxxxxxx, Andrew Jones <drjones@xxxxxxxxxx>, David Vrabel <david.vrabel@xxxxxxxxxx>

Delivery-date: Mon, 14 Nov 2016 17:17:59 +0000

List-id: Xen developer discussion <xen-devel.lists.xen.org>

Introduce CONFIG_XEN_PV config option and split enlighten.c into 3 files. Temporary add #ifdef CONFIG_XEN_PV to smp.c and mmu.c to not break the build and not make the patch even bigger. xen_cpu_up_prepare*/xen_cpu_die hooks require separation to support future xen_smp_intr_init() split. Signed-off-by: Vitaly Kuznetsov <vkuznets@xxxxxxxxxx> --- arch/x86/include/asm/hypervisor.h | 3 +- arch/x86/kernel/cpu/hypervisor.c | 7 +- arch/x86/kernel/process_64.c | 2 +- arch/x86/xen/Kconfig | 25 ++- arch/x86/xen/Makefile | 7 +- arch/x86/xen/enlighten.c | 388 ++------------------------------------ arch/x86/xen/enlighten_common.c | 216 +++++++++++++++++++++ arch/x86/xen/enlighten_hvm.c | 202 ++++++++++++++++++++ arch/x86/xen/mmu.c | 2 + arch/x86/xen/smp.c | 40 ++-- arch/x86/xen/suspend.c | 4 + arch/x86/xen/xen-head.S | 4 + arch/x86/xen/xen-ops.h | 2 + include/xen/xen-ops.h | 7 + 14 files changed, 510 insertions(+), 399 deletions(-) create mode 100644 arch/x86/xen/enlighten_common.c create mode 100644 arch/x86/xen/enlighten_hvm.c diff --git a/arch/x86/include/asm/hypervisor.h b/arch/x86/include/asm/hypervisor.h index 67942b6..4faa12d 100644 --- a/arch/x86/include/asm/hypervisor.h +++ b/arch/x86/include/asm/hypervisor.h @@ -53,7 +53,8 @@ extern const struct hypervisor_x86 *x86_hyper; /* Recognized hypervisors */ extern const struct hypervisor_x86 x86_hyper_vmware; extern const struct hypervisor_x86 x86_hyper_ms_hyperv; -extern const struct hypervisor_x86 x86_hyper_xen; +extern const struct hypervisor_x86 x86_hyper_xen_pv; +extern const struct hypervisor_x86 x86_hyper_xen_pvhvm; extern const struct hypervisor_x86 x86_hyper_kvm; extern void init_hypervisor(struct cpuinfo_x86 *c); diff --git a/arch/x86/kernel/cpu/hypervisor.c b/arch/x86/kernel/cpu/hypervisor.c index 35691a6..dd68b03 100644 --- a/arch/x86/kernel/cpu/hypervisor.c +++ b/arch/x86/kernel/cpu/hypervisor.c @@ -28,8 +28,11 @@ static const __initconst struct hypervisor_x86 * const hypervisors[] = { -#ifdef CONFIG_XEN - &x86_hyper_xen, +#ifdef CONFIG_XEN_PV + &x86_hyper_xen_pv, +#endif +#ifdef CONFIG_XEN_PVHVM + &x86_hyper_xen_pvhvm, #endif &x86_hyper_vmware, &x86_hyper_ms_hyperv, diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index b3760b3..4e91a02 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -434,7 +434,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV)) __switch_to_xtra(prev_p, next_p, tss); -#ifdef CONFIG_XEN +#ifdef CONFIG_XEN_PV /* * On Xen PV, IOPL bits in pt_regs->flags have no effect, and * current_pt_regs()->flags may not match the current task's diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig index c7b15f3..8298378 100644 --- a/arch/x86/xen/Kconfig +++ b/arch/x86/xen/Kconfig @@ -6,7 +6,6 @@ config XEN bool "Xen guest support" depends on PARAVIRT select PARAVIRT_CLOCK - select XEN_HAVE_PVMMU select XEN_HAVE_VPMU depends on X86_64 || (X86_32 && X86_PAE) depends on X86_LOCAL_APIC && X86_TSC @@ -15,18 +14,32 @@ config XEN kernel to boot in a paravirtualized environment under the Xen hypervisor. +config XEN_PV + bool "Xen PV guest support" + default y + depends on XEN + help + Support running as a Xen PV guest. + config XEN_DOM0 - def_bool y - depends on XEN && PCI_XEN && SWIOTLB_XEN + bool "Xen PV Dom0 support" + default y + depends on XEN_PV && PCI_XEN && SWIOTLB_XEN depends on X86_IO_APIC && ACPI && PCI + select XEN_HAVE_PVMMU + help + Support running as a Xen PV Dom0 guest. config XEN_PVHVM - def_bool y + bool "Xen PVHVM guest support" + default y depends on XEN && PCI && X86_LOCAL_APIC + help + Support running as a Xen PVHVM guest. config XEN_512GB bool "Limit Xen pv-domain memory to 512GB" - depends on XEN && X86_64 + depends on XEN_PV && X86_64 default y help Limit paravirtualized user domains to 512GB of RAM. @@ -53,5 +66,5 @@ config XEN_DEBUG_FS config XEN_PVH bool "Support for running as a PVH guest" - depends on X86_64 && XEN && XEN_PVHVM + depends on X86_64 && XEN_PV && XEN_PVHVM def_bool n diff --git a/arch/x86/xen/Makefile b/arch/x86/xen/Makefile index e47e527..e60fc93 100644 --- a/arch/x86/xen/Makefile +++ b/arch/x86/xen/Makefile @@ -10,11 +10,14 @@ nostackp := $(call cc-option, -fno-stack-protector) CFLAGS_enlighten.o := $(nostackp) CFLAGS_mmu.o := $(nostackp) -obj-y := enlighten.o setup.o multicalls.o mmu.o irq.o \ - time.o xen-asm.o xen-asm_$(BITS).o \ +obj-y := enlighten_common.o setup.o multicalls.o \ + mmu.o irq.o time.o xen-asm.o xen-asm_$(BITS).o \ grant-table.o suspend.o platform-pci-unplug.o \ p2m.o apic.o pmu.o +obj-$(CONFIG_XEN_PV) += enlighten.o +obj-$(CONFIG_XEN_PVHVM) += enlighten_hvm.o + obj-$(CONFIG_EVENT_TRACING) += trace.o obj-$(CONFIG_SMP) += smp.o diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index bdd8556..086c339 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -90,78 +90,13 @@ #include "multicalls.h" #include "pmu.h" -EXPORT_SYMBOL_GPL(hypercall_page); - -/* - * Pointer to the xen_vcpu_info structure or - * &HYPERVISOR_shared_info->vcpu_info[cpu]. See xen_hvm_init_shared_info - * and xen_vcpu_setup for details. By default it points to share_info->vcpu_info - * but if the hypervisor supports VCPUOP_register_vcpu_info then it can point - * to xen_vcpu_info. The pointer is used in __xen_evtchn_do_upcall to - * acknowledge pending events. - * Also more subtly it is used by the patched version of irq enable/disable - * e.g. xen_irq_enable_direct and xen_iret in PV mode. - * - * The desire to be able to do those mask/unmask operations as a single - * instruction by using the per-cpu offset held in %gs is the real reason - * vcpu info is in a per-cpu pointer and the original reason for this - * hypercall. - * - */ -DEFINE_PER_CPU(struct vcpu_info *, xen_vcpu); - -/* - * Per CPU pages used if hypervisor supports VCPUOP_register_vcpu_info - * hypercall. This can be used both in PV and PVHVM mode. The structure - * overrides the default per_cpu(xen_vcpu, cpu) value. - */ -DEFINE_PER_CPU(struct vcpu_info, xen_vcpu_info); - -/* Linux <-> Xen vCPU id mapping */ -DEFINE_PER_CPU(uint32_t, xen_vcpu_id); -EXPORT_PER_CPU_SYMBOL(xen_vcpu_id); - -enum xen_domain_type xen_domain_type = XEN_NATIVE; -EXPORT_SYMBOL_GPL(xen_domain_type); - -unsigned long *machine_to_phys_mapping = (void *)MACH2PHYS_VIRT_START; -EXPORT_SYMBOL(machine_to_phys_mapping); -unsigned long machine_to_phys_nr; -EXPORT_SYMBOL(machine_to_phys_nr); - -struct start_info *xen_start_info; -EXPORT_SYMBOL_GPL(xen_start_info); - -struct shared_info xen_dummy_shared_info; void *xen_initial_gdt; RESERVE_BRK(shared_info_page_brk, PAGE_SIZE); -static int xen_cpu_up_prepare(unsigned int cpu); -static int xen_cpu_up_online(unsigned int cpu); -static int xen_cpu_dead(unsigned int cpu); - -/* - * Point at some empty memory to start with. We map the real shared_info - * page as soon as fixmap is up and running. - */ -struct shared_info *HYPERVISOR_shared_info = &xen_dummy_shared_info; - -/* - * Flag to determine whether vcpu info placement is available on all - * VCPUs. We assume it is to start with, and then set it to zero on - * the first failure. This is because it can succeed on some VCPUs - * and not others, since it can involve hypervisor memory allocation, - * or because the guest failed to guarantee all the appropriate - * constraints on all VCPUs (ie buffer can't cross a page boundary). - * - * Note that any particular CPU may be using a placed vcpu structure, - * but we can only optimise if the all are. - * - * 0: not available, 1: available - */ -static int have_vcpu_info_placement = 1; +static int xen_cpu_up_prepare_pv(unsigned int cpu); +static int xen_cpu_dead_pv(unsigned int cpu); struct tls_descs { struct desc_struct desc[3]; @@ -176,73 +111,6 @@ struct tls_descs { */ static DEFINE_PER_CPU(struct tls_descs, shadow_tls_desc); -static void clamp_max_cpus(void) -{ -#ifdef CONFIG_SMP - if (setup_max_cpus > MAX_VIRT_CPUS) - setup_max_cpus = MAX_VIRT_CPUS; -#endif -} - -void xen_vcpu_setup(int cpu) -{ - struct vcpu_register_vcpu_info info; - int err; - struct vcpu_info *vcpup; - - BUG_ON(HYPERVISOR_shared_info == &xen_dummy_shared_info); - - /* - * This path is called twice on PVHVM - first during bootup via - * smp_init -> xen_hvm_cpu_notify, and then if the VCPU is being - * hotplugged: cpu_up -> xen_hvm_cpu_notify. - * As we can only do the VCPUOP_register_vcpu_info once lets - * not over-write its result. - * - * For PV it is called during restore (xen_vcpu_restore) and bootup - * (xen_setup_vcpu_info_placement). The hotplug mechanism does not - * use this function. - */ - if (xen_hvm_domain()) { - if (per_cpu(xen_vcpu, cpu) == &per_cpu(xen_vcpu_info, cpu)) - return; - } - if (xen_vcpu_nr(cpu) < MAX_VIRT_CPUS) - per_cpu(xen_vcpu, cpu) = - &HYPERVISOR_shared_info->vcpu_info[xen_vcpu_nr(cpu)]; - - if (!have_vcpu_info_placement) { - if (cpu >= MAX_VIRT_CPUS) - clamp_max_cpus(); - return; - } - - vcpup = &per_cpu(xen_vcpu_info, cpu); - info.mfn = arbitrary_virt_to_mfn(vcpup); - info.offset = offset_in_page(vcpup); - - /* Check to see if the hypervisor will put the vcpu_info - structure where we want it, which allows direct access via - a percpu-variable. - N.B. This hypercall can _only_ be called once per CPU. Subsequent - calls will error out with -EINVAL. This is due to the fact that - hypervisor has no unregister variant and this hypercall does not - allow to over-write info.mfn and info.offset. - */ - err = HYPERVISOR_vcpu_op(VCPUOP_register_vcpu_info, xen_vcpu_nr(cpu), - &info); - - if (err) { - printk(KERN_DEBUG "register_vcpu_info failed: err=%d\n", err); - have_vcpu_info_placement = 0; - clamp_max_cpus(); - } else { - /* This cpu is using the registered vcpu info, even if - later ones fail to. */ - per_cpu(xen_vcpu, cpu) = vcpup; - } -} - /* * On restore, set the vcpu placement up again. * If it fails, then we're in a bad state, since @@ -1291,18 +1159,6 @@ static const struct pv_cpu_ops xen_cpu_ops __initconst = { .end_context_switch = xen_end_context_switch, }; -static void xen_reboot(int reason) -{ - struct sched_shutdown r = { .reason = reason }; - int cpu; - - for_each_online_cpu(cpu) - xen_pmu_finish(cpu); - - if (HYPERVISOR_sched_op(SCHEDOP_shutdown, &r)) - BUG(); -} - static void xen_restart(char *msg) { xen_reboot(SHUTDOWN_reboot); @@ -1330,25 +1186,6 @@ static void xen_crash_shutdown(struct pt_regs *regs) xen_reboot(SHUTDOWN_crash); } -static int -xen_panic_event(struct notifier_block *this, unsigned long event, void *ptr) -{ - if (!kexec_crash_loaded()) - xen_reboot(SHUTDOWN_crash); - return NOTIFY_DONE; -} - -static struct notifier_block xen_panic_block = { - .notifier_call= xen_panic_event, - .priority = INT_MIN -}; - -int xen_panic_handler_init(void) -{ - atomic_notifier_chain_register(&panic_notifier_list, &xen_panic_block); - return 0; -} - static const struct machine_ops xen_machine_ops __initconst = { .restart = xen_restart, .halt = xen_machine_halt, @@ -1537,24 +1374,6 @@ static void __init xen_dom0_set_legacy_features(void) x86_platform.legacy.rtc = 1; } -static int xen_cpuhp_setup(void) -{ - int rc; - - rc = cpuhp_setup_state_nocalls(CPUHP_XEN_PREPARE, - "XEN_HVM_GUEST_PREPARE", - xen_cpu_up_prepare, xen_cpu_dead); - if (rc >= 0) { - rc = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, - "XEN_HVM_GUEST_ONLINE", - xen_cpu_up_online, NULL); - if (rc < 0) - cpuhp_remove_state_nocalls(CPUHP_XEN_PREPARE); - } - - return rc >= 0 ? 0 : rc; -} - /* First C function to be called on Xen boot */ asmlinkage __visible void __init xen_start_kernel(void) { @@ -1656,7 +1475,7 @@ asmlinkage __visible void __init xen_start_kernel(void) possible map and a non-dummy shared_info. */ per_cpu(xen_vcpu, 0) = &HYPERVISOR_shared_info->vcpu_info[0]; - WARN_ON(xen_cpuhp_setup()); + WARN_ON(xen_cpuhp_setup(xen_cpu_up_prepare_pv, xen_cpu_dead_pv)); local_irq_disable(); early_boot_irqs_disabled = true; @@ -1771,95 +1590,10 @@ asmlinkage __visible void __init xen_start_kernel(void) #endif } -void __ref xen_hvm_init_shared_info(void) -{ - int cpu; - struct xen_add_to_physmap xatp; - static struct shared_info *shared_info_page = 0; - - if (!shared_info_page) - shared_info_page = (struct shared_info *) - extend_brk(PAGE_SIZE, PAGE_SIZE); - xatp.domid = DOMID_SELF; - xatp.idx = 0; - xatp.space = XENMAPSPACE_shared_info; - xatp.gpfn = __pa(shared_info_page) >> PAGE_SHIFT; - if (HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp)) - BUG(); - - HYPERVISOR_shared_info = (struct shared_info *)shared_info_page; - - /* xen_vcpu is a pointer to the vcpu_info struct in the shared_info - * page, we use it in the event channel upcall and in some pvclock - * related functions. We don't need the vcpu_info placement - * optimizations because we don't use any pv_mmu or pv_irq op on - * HVM. - * When xen_hvm_init_shared_info is run at boot time only vcpu 0 is - * online but xen_hvm_init_shared_info is run at resume time too and - * in that case multiple vcpus might be online. */ - for_each_online_cpu(cpu) { - /* Leave it to be NULL. */ - if (xen_vcpu_nr(cpu) >= MAX_VIRT_CPUS) - continue; - per_cpu(xen_vcpu, cpu) = - &HYPERVISOR_shared_info->vcpu_info[xen_vcpu_nr(cpu)]; - } -} - -#ifdef CONFIG_XEN_PVHVM -static void __init init_hvm_pv_info(void) -{ - int major, minor; - uint32_t eax, ebx, ecx, edx, pages, msr, base; - u64 pfn; - - base = xen_cpuid_base(); - cpuid(base + 1, &eax, &ebx, &ecx, &edx); - - major = eax >> 16; - minor = eax & 0xffff; - printk(KERN_INFO "Xen version %d.%d.\n", major, minor); - - cpuid(base + 2, &pages, &msr, &ecx, &edx); - - pfn = __pa(hypercall_page); - wrmsr_safe(msr, (u32)pfn, (u32)(pfn >> 32)); - - xen_setup_features(); - - cpuid(base + 4, &eax, &ebx, &ecx, &edx); - if (eax & XEN_HVM_CPUID_VCPU_ID_PRESENT) - this_cpu_write(xen_vcpu_id, ebx); - else - this_cpu_write(xen_vcpu_id, smp_processor_id()); - - pv_info.name = "Xen HVM"; - - xen_domain_type = XEN_HVM_DOMAIN; -} -#endif - -static int xen_cpu_up_prepare(unsigned int cpu) +static int xen_cpu_up_prepare_pv(unsigned int cpu) { int rc; - if (xen_hvm_domain()) { - /* - * This can happen if CPU was offlined earlier and - * offlining timed out in common_cpu_die(). - */ - if (cpu_report_state(cpu) == CPU_DEAD_FROZEN) { - xen_smp_intr_free(cpu); - xen_uninit_lock_cpu(cpu); - } - - if (cpu_acpi_id(cpu) != U32_MAX) - per_cpu(xen_vcpu_id, cpu) = cpu_acpi_id(cpu); - else - per_cpu(xen_vcpu_id, cpu) = cpu; - xen_vcpu_setup(cpu); - } - if (xen_pv_domain() || xen_feature(XENFEAT_hvm_safe_pvclock)) xen_setup_timer(cpu); @@ -1872,7 +1606,7 @@ static int xen_cpu_up_prepare(unsigned int cpu) return 0; } -static int xen_cpu_dead(unsigned int cpu) +static int xen_cpu_dead_pv(unsigned int cpu) { xen_smp_intr_free(cpu); @@ -1882,84 +1616,6 @@ static int xen_cpu_dead(unsigned int cpu) return 0; } -static int xen_cpu_up_online(unsigned int cpu) -{ - xen_init_lock_cpu(cpu); - return 0; -} - -#ifdef CONFIG_XEN_PVHVM -#ifdef CONFIG_KEXEC_CORE -static void xen_hvm_shutdown(void) -{ - native_machine_shutdown(); - if (kexec_in_progress) - xen_reboot(SHUTDOWN_soft_reset); -} - -static void xen_hvm_crash_shutdown(struct pt_regs *regs) -{ - native_machine_crash_shutdown(regs); - xen_reboot(SHUTDOWN_soft_reset); -} -#endif - -static void __init xen_hvm_guest_init(void) -{ - if (xen_pv_domain()) - return; - - init_hvm_pv_info(); - - xen_hvm_init_shared_info(); - - xen_panic_handler_init(); - - BUG_ON(!xen_feature(XENFEAT_hvm_callback_vector)); - - xen_hvm_smp_init(); - WARN_ON(xen_cpuhp_setup()); - xen_unplug_emulated_devices(); - x86_init.irqs.intr_init = xen_init_IRQ; - xen_hvm_init_time_ops(); - xen_hvm_init_mmu_ops(); -#ifdef CONFIG_KEXEC_CORE - machine_ops.shutdown = xen_hvm_shutdown; - machine_ops.crash_shutdown = xen_hvm_crash_shutdown; -#endif -} -#endif - -static bool xen_nopv = false; -static __init int xen_parse_nopv(char *arg) -{ - xen_nopv = true; - return 0; -} -early_param("xen_nopv", xen_parse_nopv); - -static uint32_t __init xen_platform(void) -{ - if (xen_nopv) - return 0; - - return xen_cpuid_base(); -} - -bool xen_hvm_need_lapic(void) -{ - if (xen_nopv) - return false; - if (xen_pv_domain()) - return false; - if (!xen_hvm_domain()) - return false; - if (xen_feature(XENFEAT_hvm_pirqs)) - return false; - return true; -} -EXPORT_SYMBOL_GPL(xen_hvm_need_lapic); - static void xen_set_cpu_features(struct cpuinfo_x86 *c) { if (xen_pv_domain()) { @@ -2007,28 +1663,18 @@ static void xen_pin_vcpu(int cpu) } } -const struct hypervisor_x86 x86_hyper_xen = { - .name = "Xen", - .detect = xen_platform, -#ifdef CONFIG_XEN_PVHVM - .init_platform = xen_hvm_guest_init, -#endif - .x2apic_available = xen_x2apic_para_available, - .set_cpu_features = xen_set_cpu_features, - .pin_vcpu = xen_pin_vcpu, -}; -EXPORT_SYMBOL(x86_hyper_xen); - -#ifdef CONFIG_HOTPLUG_CPU -void xen_arch_register_cpu(int num) +uint32_t __init xen_platform_pv(void) { - arch_register_cpu(num); -} -EXPORT_SYMBOL(xen_arch_register_cpu); + if (xen_pv_domain()) + return xen_cpuid_base(); -void xen_arch_unregister_cpu(int num) -{ - arch_unregister_cpu(num); + return 0; } -EXPORT_SYMBOL(xen_arch_unregister_cpu); -#endif + +const struct hypervisor_x86 x86_hyper_xen_pv = { + .name = "Xen PV", + .detect = xen_platform_pv, + .set_cpu_features = xen_set_cpu_features, + .pin_vcpu = xen_pin_vcpu, +}; +EXPORT_SYMBOL(x86_hyper_xen_pv); diff --git a/arch/x86/xen/enlighten_common.c b/arch/x86/xen/enlighten_common.c new file mode 100644 index 0000000..b8a7be2 --- /dev/null +++ b/arch/x86/xen/enlighten_common.c @@ -0,0 +1,216 @@ +#include <linux/cpu.h> +#include <linux/kexec.h> +#include <linux/interrupt.h> + +#include <xen/features.h> + +#include <asm/xen/hypercall.h> +#include <asm/xen/hypervisor.h> +#include <asm/xen/page.h> +#include <asm/cpu.h> + +#include "xen-ops.h" +#include "pmu.h" +#include "smp.h" + +EXPORT_SYMBOL_GPL(hypercall_page); + +/* + * Pointer to the xen_vcpu_info structure or + * &HYPERVISOR_shared_info->vcpu_info[cpu]. See xen_hvm_init_shared_info + * and xen_vcpu_setup for details. By default it points to share_info->vcpu_info + * but if the hypervisor supports VCPUOP_register_vcpu_info then it can point + * to xen_vcpu_info. The pointer is used in __xen_evtchn_do_upcall to + * acknowledge pending events. + * Also more subtly it is used by the patched version of irq enable/disable + * e.g. xen_irq_enable_direct and xen_iret in PV mode. + * + * The desire to be able to do those mask/unmask operations as a single + * instruction by using the per-cpu offset held in %gs is the real reason + * vcpu info is in a per-cpu pointer and the original reason for this + * hypercall. + * + */ +DEFINE_PER_CPU(struct vcpu_info *, xen_vcpu); + +/* + * Per CPU pages used if hypervisor supports VCPUOP_register_vcpu_info + * hypercall. This can be used both in PV and PVHVM mode. The structure + * overrides the default per_cpu(xen_vcpu, cpu) value. + */ +DEFINE_PER_CPU(struct vcpu_info, xen_vcpu_info); + +/* Linux <-> Xen vCPU id mapping */ +DEFINE_PER_CPU(uint32_t, xen_vcpu_id); +EXPORT_PER_CPU_SYMBOL(xen_vcpu_id); + +enum xen_domain_type xen_domain_type = XEN_NATIVE; +EXPORT_SYMBOL_GPL(xen_domain_type); + +unsigned long *machine_to_phys_mapping = (void *)MACH2PHYS_VIRT_START; +EXPORT_SYMBOL(machine_to_phys_mapping); +unsigned long machine_to_phys_nr; +EXPORT_SYMBOL(machine_to_phys_nr); + +struct start_info *xen_start_info; +EXPORT_SYMBOL_GPL(xen_start_info); + +struct shared_info xen_dummy_shared_info; + +/* + * Point at some empty memory to start with. We map the real shared_info + * page as soon as fixmap is up and running. + */ +struct shared_info *HYPERVISOR_shared_info = &xen_dummy_shared_info; + +/* + * Flag to determine whether vcpu info placement is available on all + * VCPUs. We assume it is to start with, and then set it to zero on + * the first failure. This is because it can succeed on some VCPUs + * and not others, since it can involve hypervisor memory allocation, + * or because the guest failed to guarantee all the appropriate + * constraints on all VCPUs (ie buffer can't cross a page boundary). + * + * Note that any particular CPU may be using a placed vcpu structure, + * but we can only optimise if the all are. + * + * 0: not available, 1: available + */ +int have_vcpu_info_placement = 1; + +static int xen_cpu_up_online(unsigned int cpu) +{ + xen_init_lock_cpu(cpu); + return 0; +} + +int xen_cpuhp_setup(int (*cpu_up_prepare_cb)(unsigned int), + int (*cpu_dead_cb)(unsigned int)) +{ + int rc; + + rc = cpuhp_setup_state_nocalls(CPUHP_XEN_PREPARE, + "XEN_HVM_GUEST_PREPARE", + cpu_up_prepare_cb, cpu_dead_cb); + if (rc >= 0) { + rc = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, + "XEN_HVM_GUEST_ONLINE", + xen_cpu_up_online, NULL); + if (rc < 0) + cpuhp_remove_state_nocalls(CPUHP_XEN_PREPARE); + } + + return rc >= 0 ? 0 : rc; +} + +static void clamp_max_cpus(void) +{ +#ifdef CONFIG_SMP + if (setup_max_cpus > MAX_VIRT_CPUS) + setup_max_cpus = MAX_VIRT_CPUS; +#endif +} + +void xen_vcpu_setup(int cpu) +{ + struct vcpu_register_vcpu_info info; + int err; + struct vcpu_info *vcpup; + + BUG_ON(HYPERVISOR_shared_info == &xen_dummy_shared_info); + + /* + * This path is called twice on PVHVM - first during bootup via + * smp_init -> xen_hvm_cpu_notify, and then if the VCPU is being + * hotplugged: cpu_up -> xen_hvm_cpu_notify. + * As we can only do the VCPUOP_register_vcpu_info once lets + * not over-write its result. + * + * For PV it is called during restore (xen_vcpu_restore) and bootup + * (xen_setup_vcpu_info_placement). The hotplug mechanism does not + * use this function. + */ + if (xen_hvm_domain()) { + if (per_cpu(xen_vcpu, cpu) == &per_cpu(xen_vcpu_info, cpu)) + return; + } + if (xen_vcpu_nr(cpu) < MAX_VIRT_CPUS) + per_cpu(xen_vcpu, cpu) = + &HYPERVISOR_shared_info->vcpu_info[xen_vcpu_nr(cpu)]; + + if (!have_vcpu_info_placement) { + if (cpu >= MAX_VIRT_CPUS) + clamp_max_cpus(); + return; + } + + vcpup = &per_cpu(xen_vcpu_info, cpu); + info.mfn = arbitrary_virt_to_mfn(vcpup); + info.offset = offset_in_page(vcpup); + + /* Check to see if the hypervisor will put the vcpu_info + structure where we want it, which allows direct access via + a percpu-variable. + N.B. This hypercall can _only_ be called once per CPU. Subsequent + calls will error out with -EINVAL. This is due to the fact that + hypervisor has no unregister variant and this hypercall does not + allow to over-write info.mfn and info.offset. + */ + err = HYPERVISOR_vcpu_op(VCPUOP_register_vcpu_info, xen_vcpu_nr(cpu), + &info); + + if (err) { + printk(KERN_DEBUG "register_vcpu_info failed: err=%d\n", err); + have_vcpu_info_placement = 0; + clamp_max_cpus(); + } else { + /* This cpu is using the registered vcpu info, even if + later ones fail to. */ + per_cpu(xen_vcpu, cpu) = vcpup; + } +} + +void xen_reboot(int reason) +{ + struct sched_shutdown r = { .reason = reason }; + int cpu; + + for_each_online_cpu(cpu) + xen_pmu_finish(cpu); + + if (HYPERVISOR_sched_op(SCHEDOP_shutdown, &r)) + BUG(); +} + +static int +xen_panic_event(struct notifier_block *this, unsigned long event, void *ptr) +{ + if (!kexec_crash_loaded()) + xen_reboot(SHUTDOWN_crash); + return NOTIFY_DONE; +} + +static struct notifier_block xen_panic_block = { + .notifier_call = xen_panic_event, + .priority = INT_MIN +}; + +int xen_panic_handler_init(void) +{ + atomic_notifier_chain_register(&panic_notifier_list, &xen_panic_block); + return 0; +} + +#ifdef CONFIG_HOTPLUG_CPU +void xen_arch_register_cpu(int num) +{ + arch_register_cpu(num); +} +EXPORT_SYMBOL(xen_arch_register_cpu); + +void xen_arch_unregister_cpu(int num) +{ + arch_unregister_cpu(num); +} +EXPORT_SYMBOL(xen_arch_unregister_cpu); +#endif diff --git a/arch/x86/xen/enlighten_hvm.c b/arch/x86/xen/enlighten_hvm.c new file mode 100644 index 0000000..58b9e44 --- /dev/null +++ b/arch/x86/xen/enlighten_hvm.c @@ -0,0 +1,202 @@ +#include <linux/cpu.h> +#include <linux/kexec.h> + +#include <xen/features.h> +#include <xen/events.h> +#include <xen/interface/memory.h> + +#include <asm/reboot.h> +#include <asm/setup.h> +#include <asm/hypervisor.h> + +#include <asm/xen/cpuid.h> +#include <asm/xen/hypervisor.h> + +#include "xen-ops.h" +#include "mmu.h" +#include "smp.h" + +void __ref xen_hvm_init_shared_info(void) +{ + int cpu; + struct xen_add_to_physmap xatp; + static struct shared_info *shared_info_page; + + if (!shared_info_page) + shared_info_page = (struct shared_info *) + extend_brk(PAGE_SIZE, PAGE_SIZE); + xatp.domid = DOMID_SELF; + xatp.idx = 0; + xatp.space = XENMAPSPACE_shared_info; + xatp.gpfn = __pa(shared_info_page) >> PAGE_SHIFT; + if (HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp)) + BUG(); + + HYPERVISOR_shared_info = (struct shared_info *)shared_info_page; + + /* xen_vcpu is a pointer to the vcpu_info struct in the shared_info + * page, we use it in the event channel upcall and in some pvclock + * related functions. We don't need the vcpu_info placement + * optimizations because we don't use any pv_mmu or pv_irq op on + * HVM. + * When xen_hvm_init_shared_info is run at boot time only vcpu 0 is + * online but xen_hvm_init_shared_info is run at resume time too and + * in that case multiple vcpus might be online. */ + for_each_online_cpu(cpu) { + /* Leave it to be NULL. */ + if (xen_vcpu_nr(cpu) >= MAX_VIRT_CPUS) + continue; + per_cpu(xen_vcpu, cpu) = + &HYPERVISOR_shared_info->vcpu_info[xen_vcpu_nr(cpu)]; + } +} + +static void __init init_hvm_pv_info(void) +{ + int major, minor; + uint32_t eax, ebx, ecx, edx, pages, msr, base; + u64 pfn; + + base = xen_cpuid_base(); + cpuid(base + 1, &eax, &ebx, &ecx, &edx); + + major = eax >> 16; + minor = eax & 0xffff; + printk(KERN_INFO "Xen version %d.%d.\n", major, minor); + + cpuid(base + 2, &pages, &msr, &ecx, &edx); + + pfn = __pa(hypercall_page); + wrmsr_safe(msr, (u32)pfn, (u32)(pfn >> 32)); + + xen_setup_features(); + + cpuid(base + 4, &eax, &ebx, &ecx, &edx); + if (eax & XEN_HVM_CPUID_VCPU_ID_PRESENT) + this_cpu_write(xen_vcpu_id, ebx); + else + this_cpu_write(xen_vcpu_id, smp_processor_id()); + + pv_info.name = "Xen HVM"; + + xen_domain_type = XEN_HVM_DOMAIN; +} + +#ifdef CONFIG_KEXEC_CORE +static void xen_hvm_shutdown(void) +{ + native_machine_shutdown(); + if (kexec_in_progress) + xen_reboot(SHUTDOWN_soft_reset); +} + +static void xen_hvm_crash_shutdown(struct pt_regs *regs) +{ + native_machine_crash_shutdown(regs); + xen_reboot(SHUTDOWN_soft_reset); +} +#endif + +static int xen_cpu_up_prepare_hvm(unsigned int cpu) +{ + int rc; + + /* + * This can happen if CPU was offlined earlier and + * offlining timed out in common_cpu_die(). + */ + if (cpu_report_state(cpu) == CPU_DEAD_FROZEN) { + xen_smp_intr_free(cpu); + xen_uninit_lock_cpu(cpu); + } + + if (cpu_acpi_id(cpu) != U32_MAX) + per_cpu(xen_vcpu_id, cpu) = cpu_acpi_id(cpu); + else + per_cpu(xen_vcpu_id, cpu) = cpu; + xen_vcpu_setup(cpu); + + if (xen_feature(XENFEAT_hvm_safe_pvclock)) + xen_setup_timer(cpu); + + rc = xen_smp_intr_init(cpu); + if (rc) { + WARN(1, "xen_smp_intr_init() for CPU %d failed: %d\n", + cpu, rc); + return rc; + } + return 0; +} + +static int xen_cpu_dead_hvm(unsigned int cpu) +{ + xen_smp_intr_free(cpu); + + if (xen_feature(XENFEAT_hvm_safe_pvclock)) + xen_teardown_timer(cpu); + + return 0; +} + +void __init xen_hvm_guest_init(void) +{ + if (xen_pv_domain()) + return; + + init_hvm_pv_info(); + + xen_hvm_init_shared_info(); + + xen_panic_handler_init(); + + BUG_ON(!xen_feature(XENFEAT_hvm_callback_vector)); + + xen_hvm_smp_init(); + WARN_ON(xen_cpuhp_setup(xen_cpu_up_prepare_hvm, xen_cpu_dead_hvm)); + xen_unplug_emulated_devices(); + x86_init.irqs.intr_init = xen_init_IRQ; + xen_hvm_init_time_ops(); + xen_hvm_init_mmu_ops(); +#ifdef CONFIG_KEXEC_CORE + machine_ops.shutdown = xen_hvm_shutdown; + machine_ops.crash_shutdown = xen_hvm_crash_shutdown; +#endif +} + +bool xen_hvm_need_lapic(void) +{ + if (xen_nopv) + return false; + if (xen_pv_domain()) + return false; + if (!xen_hvm_domain()) + return false; + if (xen_feature(XENFEAT_hvm_pirqs)) + return false; + return true; +} +EXPORT_SYMBOL_GPL(xen_hvm_need_lapic); + +bool xen_nopv; +static __init int xen_parse_nopv(char *arg) +{ + xen_nopv = true; + return 0; +} +early_param("xen_nopv", xen_parse_nopv); + +uint32_t __init xen_platform_hvm(void) +{ + if (xen_pv_domain() || xen_nopv) + return 0; + + return xen_cpuid_base(); +} + +const struct hypervisor_x86 x86_hyper_xen_pvhvm = { + .name = "Xen", + .detect = xen_platform_hvm, + .init_platform = xen_hvm_guest_init, + .x2apic_available = xen_x2apic_para_available, +}; +EXPORT_SYMBOL(x86_hyper_xen_pvhvm); diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 7d5afdb..65e184b 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -1295,7 +1295,9 @@ static void __init xen_pagetable_init(void) if (!xen_feature(XENFEAT_auto_translated_physmap)) xen_remap_memory(); +#ifdef CONFIG_XEN_PV xen_setup_shared_info(); +#endif } static void xen_write_cr2(unsigned long cr2) { diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index 9fa27ce..bdb0d9c 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c @@ -253,6 +253,7 @@ int xen_smp_intr_init(unsigned int cpu) return rc; } +#ifdef CONFIG_XEN_PV static void __init xen_fill_possible_map(void) { int i, rc; @@ -304,6 +305,7 @@ static void __init xen_filter_cpu_maps(void) #endif } +#endif static void __init xen_smp_prepare_boot_cpu(void) { @@ -311,6 +313,7 @@ static void __init xen_smp_prepare_boot_cpu(void) native_smp_prepare_boot_cpu(); if (xen_pv_domain()) { +#ifdef CONFIG_XEN_PV if (!xen_feature(XENFEAT_writable_page_tables)) /* We've switched to the "real" per-cpu gdt, so make * sure the old memory can be recycled. */ @@ -327,6 +330,7 @@ static void __init xen_smp_prepare_boot_cpu(void) xen_filter_cpu_maps(); xen_setup_vcpu_info_placement(); +#endif } /* @@ -344,6 +348,7 @@ static void __init xen_smp_prepare_boot_cpu(void) xen_init_spinlocks(); } +#ifdef CONFIG_XEN_PV static void __init xen_smp_prepare_cpus(unsigned int max_cpus) { unsigned cpu; @@ -525,22 +530,6 @@ static int xen_cpu_disable(void) return 0; } -static void xen_cpu_die(unsigned int cpu) -{ - while (xen_pv_domain() && HYPERVISOR_vcpu_op(VCPUOP_is_up, - xen_vcpu_nr(cpu), NULL)) { - __set_current_state(TASK_UNINTERRUPTIBLE); - schedule_timeout(HZ/10); - } - - if (common_cpu_die(cpu) == 0) { - xen_smp_intr_free(cpu); - xen_uninit_lock_cpu(cpu); - xen_teardown_timer(cpu); - xen_pmu_finish(cpu); - } -} - static void xen_play_dead(void) /* used only with HOTPLUG_CPU */ { play_dead_common(); @@ -592,6 +581,23 @@ static void xen_stop_other_cpus(int wait) { smp_call_function(stop_self, NULL, wait); } +#endif /* CONFIG_XEN_PV */ + +static void xen_cpu_die(unsigned int cpu) +{ + while (xen_pv_domain() && HYPERVISOR_vcpu_op(VCPUOP_is_up, + xen_vcpu_nr(cpu), NULL)) { + __set_current_state(TASK_UNINTERRUPTIBLE); + schedule_timeout(HZ/10); + } + + if (common_cpu_die(cpu) == 0) { + xen_smp_intr_free(cpu); + xen_uninit_lock_cpu(cpu); + xen_teardown_timer(cpu); + xen_pmu_finish(cpu); + } +} static void xen_smp_send_reschedule(int cpu) { @@ -738,6 +744,7 @@ static irqreturn_t xen_irq_work_interrupt(int irq, void *dev_id) return IRQ_HANDLED; } +#ifdef CONFIG_XEN_PV static const struct smp_ops xen_smp_ops __initconst = { .smp_prepare_boot_cpu = xen_smp_prepare_boot_cpu, .smp_prepare_cpus = xen_smp_prepare_cpus, @@ -760,6 +767,7 @@ void __init xen_smp_init(void) smp_ops = xen_smp_ops; xen_fill_possible_map(); } +#endif static void __init xen_hvm_smp_prepare_cpus(unsigned int max_cpus) { diff --git a/arch/x86/xen/suspend.c b/arch/x86/xen/suspend.c index 7f664c4..37f634f 100644 --- a/arch/x86/xen/suspend.c +++ b/arch/x86/xen/suspend.c @@ -16,6 +16,7 @@ static void xen_pv_pre_suspend(void) { +#ifdef CONFIG_XEN_PV xen_mm_pin_all(); xen_start_info->store_mfn = mfn_to_pfn(xen_start_info->store_mfn); @@ -28,6 +29,7 @@ static void xen_pv_pre_suspend(void) if (HYPERVISOR_update_va_mapping(fix_to_virt(FIX_PARAVIRT_BOOTMAP), __pte_ma(0), 0)) BUG(); +#endif } static void xen_hvm_post_suspend(int suspend_cancelled) @@ -48,6 +50,7 @@ static void xen_hvm_post_suspend(int suspend_cancelled) static void xen_pv_post_suspend(int suspend_cancelled) { +#ifdef CONFIG_XEN_PV xen_build_mfn_list_list(); xen_setup_shared_info(); @@ -66,6 +69,7 @@ static void xen_pv_post_suspend(int suspend_cancelled) } xen_mm_unpin_all(); +#endif } void xen_arch_pre_suspend(void) diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S index 7f8d8ab..b0016b3 100644 --- a/arch/x86/xen/xen-head.S +++ b/arch/x86/xen/xen-head.S @@ -35,6 +35,7 @@ #define PVH_FEATURES (0) #endif +#ifdef CONFIG_XEN_PV __INIT ENTRY(startup_xen) cld @@ -53,6 +54,7 @@ ENTRY(startup_xen) jmp xen_start_kernel __FINIT +#endif #ifdef CONFIG_XEN_PVH /* @@ -112,7 +114,9 @@ ENTRY(hypercall_page) /* Map the p2m table to a 512GB-aligned user address. */ ELFNOTE(Xen, XEN_ELFNOTE_INIT_P2M, .quad PGDIR_SIZE) #endif +#ifdef CONFIG_XEN_PV ELFNOTE(Xen, XEN_ELFNOTE_ENTRY, _ASM_PTR startup_xen) +#endif ELFNOTE(Xen, XEN_ELFNOTE_HYPERCALL_PAGE, _ASM_PTR hypercall_page) ELFNOTE(Xen, XEN_ELFNOTE_FEATURES, .ascii "!writable_page_tables|pae_pgdir_above_4gb"; .asciz PVH_FEATURES_STR) ELFNOTE(Xen, XEN_ELFNOTE_SUPPORTED_FEATURES, .long (PVH_FEATURES) | diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index 3cbce3b..b4e1d35 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h @@ -76,6 +76,8 @@ irqreturn_t xen_debug_interrupt(int irq, void *dev_id); bool xen_vcpu_stolen(int vcpu); +extern int have_vcpu_info_placement; + void xen_vcpu_setup(int cpu); void xen_setup_vcpu_info_placement(void); diff --git a/include/xen/xen-ops.h b/include/xen/xen-ops.h index b5486e6..bcf90ed 100644 --- a/include/xen/xen-ops.h +++ b/include/xen/xen-ops.h @@ -15,6 +15,8 @@ static inline uint32_t xen_vcpu_nr(int cpu) return per_cpu(xen_vcpu_id, cpu); } +extern bool xen_nopv; + void xen_arch_pre_suspend(void); void xen_arch_post_suspend(int suspend_cancelled); @@ -33,6 +35,11 @@ u64 xen_steal_clock(int cpu); int xen_setup_shutdown_event(void); +int xen_cpuhp_setup(int (*cpu_up_prepare_cb)(unsigned int), + int (*cpu_dead_cb)(unsigned int)); + +void xen_reboot(int reason); + extern unsigned long *xen_contiguous_bitmap; int xen_create_contiguous_region(phys_addr_t pstart, unsigned int order, unsigned int address_bits, -- 2.7.4 _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxx https://lists.xen.org/xen-devel

©2013 Xen Project, A Linux Foundation Collaborative Project. All Rights Reserved.
Linux Foundation is a registered trademark of The Linux Foundation.
Xen Project is a trademark of The Linux Foundation.