[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [Xen-devel] Crash in set_cpu_sibling_map() booting Xen 4.6.0 on Fusion
RFC. Boot tested on VMware Fusion, and on a 2-socket Xeon server. diff --git a/xen/include/asm-x86/smp.h b/xen/include/asm-x86/smp.h index ea07888..a41ce2d 100644 --- a/xen/include/asm-x86/smp.h +++ b/xen/include/asm-x86/smp.h @@ -67,7 +67,7 @@ extern unsigned int nr_sockets; void set_nr_sockets(void); /* Representing HT and core siblings in each socket. */ -extern cpumask_t **socket_cpumask; +cpumask_t *socket_cpumask(unsigned int socket); #endif /* !__ASSEMBLY__ */ diff --git a/xen/arch/x86/smpboot.c b/xen/arch/x86/smpboot.c index 0946992..6aadaac 100644 --- a/xen/arch/x86/smpboot.c +++ b/xen/arch/x86/smpboot.c @@ -60,7 +60,7 @@ cpumask_t cpu_online_map __read_mostly; EXPORT_SYMBOL(cpu_online_map); unsigned int __read_mostly nr_sockets; -cpumask_t **__read_mostly socket_cpumask; +static struct radix_tree_root socket_cpumask_tree; static cpumask_t *secondary_socket_cpumask; struct cpuinfo_x86 cpu_data[NR_CPUS]; @@ -81,6 +81,11 @@ static enum cpu_state { void *stack_base[NR_CPUS]; +cpumask_t *socket_cpumask(unsigned int socket) +{ + return radix_tree_lookup(&socket_cpumask_tree, socket); +} + static void smp_store_cpu_info(int id) { struct cpuinfo_x86 *c = cpu_data + id; @@ -92,9 +97,9 @@ static void smp_store_cpu_info(int id) identify_cpu(c); socket = cpu_to_socket(id); - if ( !socket_cpumask[socket] ) + if ( radix_tree_insert(&socket_cpumask_tree, socket, + secondary_socket_cpumask) == 0 ) { - socket_cpumask[socket] = secondary_socket_cpumask; secondary_socket_cpumask = NULL; } } @@ -258,7 +263,7 @@ static void set_cpu_sibling_map(int cpu) cpumask_set_cpu(cpu, &cpu_sibling_setup_map); - cpumask_set_cpu(cpu, socket_cpumask[cpu_to_socket(cpu)]); + cpumask_set_cpu(cpu, socket_cpumask(cpu_to_socket(cpu))); if ( c[cpu].x86_num_siblings > 1 ) { @@ -666,11 +671,12 @@ static void cpu_smpboot_free(unsigned int cpu) { unsigned int order, socket = cpu_to_socket(cpu); struct cpuinfo_x86 *c = cpu_data; + cpumask_t *m = socket_cpumask(socket); - if ( cpumask_empty(socket_cpumask[socket]) ) + if ( m && cpumask_empty(m) ) { - xfree(socket_cpumask[socket]); - socket_cpumask[socket] = NULL; + radix_tree_delete(&socket_cpumask_tree, socket); + xfree(m); } c[cpu].phys_proc_id = XEN_INVALID_SOCKET_ID; @@ -804,6 +810,8 @@ static struct notifier_block cpu_smpboot_nfb = { void __init smp_prepare_cpus(unsigned int max_cpus) { + cpumask_t *m; + register_cpu_notifier(&cpu_smpboot_nfb); mtrr_aps_sync_begin(); @@ -819,9 +827,9 @@ void __init smp_prepare_cpus(unsigned int max_cpus) set_nr_sockets(); - socket_cpumask = xzalloc_array(cpumask_t *, nr_sockets); - if ( socket_cpumask == NULL || - (socket_cpumask[cpu_to_socket(0)] = xzalloc(cpumask_t)) == NULL ) + radix_tree_init(&socket_cpumask_tree); + if ( (m = xzalloc(cpumask_t)) == NULL || + radix_tree_insert(&socket_cpumask_tree, cpu_to_socket(0), m) != 0 ) panic("No memory for socket CPU siblings map"); if ( !zalloc_cpumask_var(&per_cpu(cpu_sibling_mask, 0)) || @@ -888,7 +896,7 @@ remove_siblinginfo(int cpu) { int sibling; - cpumask_clear_cpu(cpu, socket_cpumask[cpu_to_socket(cpu)]); + cpumask_clear_cpu(cpu, socket_cpumask(cpu_to_socket(cpu))); for_each_cpu ( sibling, per_cpu(cpu_core_mask, cpu) ) { diff --git a/xen/arch/x86/psr.c b/xen/arch/x86/psr.c index c0daa2e..7acb3d9 100644 --- a/xen/arch/x86/psr.c +++ b/xen/arch/x86/psr.c @@ -52,14 +52,6 @@ static DEFINE_PER_CPU(struct psr_assoc, psr_assoc); static struct psr_cat_cbm *temp_cos_to_cbm; -static unsigned int get_socket_cpu(unsigned int socket) -{ - if ( likely(socket < nr_sockets) ) - return cpumask_any(socket_cpumask[socket]); - - return nr_cpu_ids; -} - static void __init parse_psr_bool(char *s, char *value, char *feature, unsigned int mask) { @@ -331,7 +323,8 @@ static int write_l3_cbm(unsigned int socket, unsigned int cos, uint64_t cbm) do_write_l3_cbm(&info); else { - unsigned int cpu = get_socket_cpu(socket); + cpumask_t *m = socket_cpumask(socket); + unsigned int cpu = m ? cpumask_any(m) : nr_cpu_ids; if ( cpu >= nr_cpu_ids ) return -ENOTSOCK; @@ -503,8 +496,9 @@ static void cat_cpu_init(void) static void cat_cpu_fini(unsigned int cpu) { unsigned int socket = cpu_to_socket(cpu); + cpumask_t *m = socket_cpumask(socket); - if ( !socket_cpumask[socket] || cpumask_empty(socket_cpumask[socket]) ) + if ( !m || cpumask_empty(m) ) { struct psr_cat_socket_info *info = cat_socket_info + socket; On Tue, Nov 24, 2015 at 7:20 AM, Jan Beulich <JBeulich@xxxxxxxx> wrote: >>>> On 24.11.15 at 15:13, <eswierk@xxxxxxxxxxxxxxxxxx> wrote: >> On Tue, Nov 24, 2015 at 2:34 AM, Jan Beulich <JBeulich@xxxxxxxx> wrote: >>> Bottom line - for the moment I do not see a reasonable way of >>> dealing with that situation. The closest I could see would be what >>> we iirc had temporarily during the review cycles of the initial CAT >>> series: A command line option to specify the number of sockets. Or >>> make all accesses to socket_cpumask[] conditional upon PSR being >>> enabled (which would have the bad side effect of making future >>> uses for other purposes more cumbersome), or go through and >>> range check the socket number on all of those accesses. >> >> Could we avoid the issue by replacing socket_cpumask array with a list >> or hashtable, indexed by socket ID? > > Yes, a radix tree would work. But it would also seem like overkill if > all we need it for is some strange virtualization of CPUID. The more > I think about it, the better I like the option below. > > Jan > >>> Chao, could you - inside Intel - please check whether there are >>> any assumptions on the respective CPUID leaf output that aren't >>> explicitly stated in the SDM right now (like resulting in contiguous >>> socket numbers), and ask for them getting made explicit (if there >>> are any), or it being made explicit that no assumptions at all are >>> to be made at all on the presented values (in which case we'd >>> have to consume MADT parsing data in set_nr_sockets(), e.g. >>> by replacing num_processors there with one more than the >>> maximum APIC ID of any non-disabled CPU)? >> >> I suppose the key is whether Intel has encoded such assumptions in the >> BIOS reference code, or has otherwise communicated them to AMI et al. >> >> --Ed > > > _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxx http://lists.xen.org/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |