[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [Xen-devel] [PATCH v2 4/6] x86: bring up all CPUs even if not all are supposed to be used
On Wed, Jul 18, 2018 at 02:21:53AM -0600, Jan Beulich wrote: > Reportedly Intel CPUs which can't broadcast #MC to all targeted > cores/threads because some have CR4.MCE clear will shut down. Therefore > we want to keep CR4.MCE enabled when offlining a CPU, and we need to > bring up all CPUs in order to be able to set CR4.MCE in the first place. > > The use of clear_in_cr4() in cpu_mcheck_disable() was ill advised > anyway, and to avoid future similar mistakes I'm removing clear_in_cr4() > altogether right here. > > Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx> > --- > v2: Use ROUNDUP(). > --- > Instead of fully bringing up CPUs and then calling cpu_down(), another > option would be to suppress/cancel full bringup in smp_callin(). But I > guess we should try to keep things simple for now, and see later whether > this can be "optimized". > --- > Note: The parked CPUs can be brought online (i.e. the meaning of > "maxcpus=" isn't as strict anymore as it was before), but won't > immediately be used for scheduling pre-existing Dom0 CPUs. That's > because dom0_setup_vcpu() artifically restricts the affinity. For > DomU-s whose affinity was not artifically restricted, no such > limitation exists, albeit the shown "soft" affinity appears to > suffer a similar issue. As that's not a goal of this patch, I've > put the issues on the side for now, perhaps for someone else to > take care of. > Note: On one of my test systems the parked CPUs get _PSD data reported > by Dom0 that is different from the non-parked ones (coord_type is > 0xFC instead of 0xFE). Giving Dom0 enough vCPU-s eliminates this From drivers/xen/xen-acpi-processor.c: 181 /* 'acpi_processor_preregister_performance' does not parse if the 182 * num_processors <= 1, but Xen still requires it. Do it manually here. 183 */ 184 if (pdomain->num_processors <= 1) { 185 if (pdomain->coord_type == DOMAIN_COORD_TYPE_SW_ALL) 186 dst->shared_type = CPUFREQ_SHARED_TYPE_ALL; 187 else if (pdomain->coord_type == DOMAIN_COORD_TYPE_HW_ALL) 188 dst->shared_type = CPUFREQ_SHARED_TYPE_HW; 189 else if (pdomain->coord_type == DOMAIN_COORD_TYPE_SW_ANY) 190 dst->shared_type = CPUFREQ_SHARED_TYPE_ANY; 191 192 } ? > problem, so there is apparently something amiss in the processor > driver. I've tried to figure out what, but I couldn't, despite the > AML suggesting that this might be some _OSC invocation (but if it > is, I can't find it - acpi_run_osc() clearly does not anywhere get > invoked in a per-CPU fashion). > > --- a/xen/arch/x86/cpu/common.c > +++ b/xen/arch/x86/cpu/common.c > @@ -13,6 +13,7 @@ > #include <public/sysctl.h> /* for XEN_INVALID_{SOCKET,CORE}_ID */ > > #include "cpu.h" > +#include "mcheck/x86_mca.h" > > bool_t opt_arat = 1; > boolean_param("arat", opt_arat); > @@ -343,6 +344,9 @@ static void __init early_cpu_detect(void > hap_paddr_bits = PADDR_BITS; > } > > + if (c->x86_vendor != X86_VENDOR_AMD) > + park_offline_cpus = opt_mce; > + > initialize_cpu_data(0); > } > > --- a/xen/arch/x86/cpu/mcheck/mce_intel.c > +++ b/xen/arch/x86/cpu/mcheck/mce_intel.c > @@ -636,8 +636,6 @@ static void clear_cmci(void) > > static void cpu_mcheck_disable(void) > { > - clear_in_cr4(X86_CR4_MCE); > - > if ( cmci_support && opt_mce ) > clear_cmci(); > } > --- a/xen/arch/x86/mpparse.c > +++ b/xen/arch/x86/mpparse.c > @@ -68,19 +68,26 @@ physid_mask_t phys_cpu_present_map; > > void __init set_nr_cpu_ids(unsigned int max_cpus) > { > + unsigned int tot_cpus = num_processors + disabled_cpus; > + > if (!max_cpus) > - max_cpus = num_processors + disabled_cpus; > + max_cpus = tot_cpus; > if (max_cpus > NR_CPUS) > max_cpus = NR_CPUS; > else if (!max_cpus) > max_cpus = 1; > printk(XENLOG_INFO "SMP: Allowing %u CPUs (%d hotplug CPUs)\n", > max_cpus, max_t(int, max_cpus - num_processors, 0)); > - nr_cpu_ids = max_cpus; > + > + if (!park_offline_cpus) > + tot_cpus = max_cpus; > + nr_cpu_ids = min(tot_cpus, NR_CPUS + 0u); > + if (park_offline_cpus && nr_cpu_ids < num_processors) > + printk(XENLOG_WARNING "SMP: Cannot bring up %u further CPUs\n", > + num_processors - nr_cpu_ids); > > #ifndef nr_cpumask_bits > - nr_cpumask_bits = (max_cpus + (BITS_PER_LONG - 1)) & > - ~(BITS_PER_LONG - 1); > + nr_cpumask_bits = ROUNDUP(nr_cpu_ids, BITS_PER_LONG); > printk(XENLOG_DEBUG "NR_CPUS:%u nr_cpumask_bits:%u\n", > NR_CPUS, nr_cpumask_bits); > #endif > --- a/xen/arch/x86/setup.c > +++ b/xen/arch/x86/setup.c > @@ -665,7 +665,7 @@ void __init noreturn __start_xen(unsigne > { > char *memmap_type = NULL; > char *cmdline, *kextra, *loader; > - unsigned int initrdidx; > + unsigned int initrdidx, num_parked = 0; > multiboot_info_t *mbi; > module_t *mod; > unsigned long nr_pages, raw_max_page, modules_headroom, *module_map; > @@ -1512,7 +1512,8 @@ void __init noreturn __start_xen(unsigne > else > { > set_nr_cpu_ids(max_cpus); > - max_cpus = nr_cpu_ids; > + if ( !max_cpus ) > + max_cpus = nr_cpu_ids; > } > > if ( xen_guest ) > @@ -1635,16 +1636,27 @@ void __init noreturn __start_xen(unsigne > /* Set up node_to_cpumask based on cpu_to_node[]. */ > numa_add_cpu(i); > > - if ( (num_online_cpus() < max_cpus) && !cpu_online(i) ) > + if ( (park_offline_cpus || num_online_cpus() < max_cpus) && > + !cpu_online(i) ) > { > int ret = cpu_up(i); > if ( ret != 0 ) > printk("Failed to bring up CPU %u (error %d)\n", i, ret); > + else if ( num_online_cpus() > max_cpus ) > + { > + ret = cpu_down(i); > + if ( !ret ) > + ++num_parked; > + else > + printk("Could not re-offline CPU%u (%d)\n", i, ret); > + } > } > } > } > > printk("Brought up %ld CPUs\n", (long)num_online_cpus()); > + if ( num_parked ) > + printk(XENLOG_INFO "Parked %u CPUs\n", num_parked); > smp_cpus_done(); > > do_initcalls(); > --- a/xen/include/asm-x86/processor.h > +++ b/xen/include/asm-x86/processor.h > @@ -342,12 +342,6 @@ static always_inline void set_in_cr4 (un > write_cr4(read_cr4() | mask); > } > > -static always_inline void clear_in_cr4 (unsigned long mask) > -{ > - mmu_cr4_features &= ~mask; > - write_cr4(read_cr4() & ~mask); > -} > - > static inline unsigned int read_pkru(void) > { > unsigned int pkru; > > > > > _______________________________________________ > Xen-devel mailing list > Xen-devel@xxxxxxxxxxxxxxxxxxxx > https://lists.xenproject.org/mailman/listinfo/xen-devel _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxxxxxxxxx https://lists.xenproject.org/mailman/listinfo/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |