[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] Crash in set_cpu_sibling_map() booting Xen 4.6.0 on Fusion



RFC. Boot tested on VMware Fusion, and on a 2-socket Xeon server.

diff --git a/xen/include/asm-x86/smp.h b/xen/include/asm-x86/smp.h
index ea07888..a41ce2d 100644
--- a/xen/include/asm-x86/smp.h
+++ b/xen/include/asm-x86/smp.h
@@ -67,7 +67,7 @@ extern unsigned int nr_sockets;
 void set_nr_sockets(void);

 /* Representing HT and core siblings in each socket. */
-extern cpumask_t **socket_cpumask;
+cpumask_t *socket_cpumask(unsigned int socket);

 #endif /* !__ASSEMBLY__ */

diff --git a/xen/arch/x86/smpboot.c b/xen/arch/x86/smpboot.c
index 0946992..6aadaac 100644
--- a/xen/arch/x86/smpboot.c
+++ b/xen/arch/x86/smpboot.c
@@ -60,7 +60,7 @@ cpumask_t cpu_online_map __read_mostly;
 EXPORT_SYMBOL(cpu_online_map);

 unsigned int __read_mostly nr_sockets;
-cpumask_t **__read_mostly socket_cpumask;
+static struct radix_tree_root socket_cpumask_tree;
 static cpumask_t *secondary_socket_cpumask;

 struct cpuinfo_x86 cpu_data[NR_CPUS];
@@ -81,6 +81,11 @@ static enum cpu_state {

 void *stack_base[NR_CPUS];

+cpumask_t *socket_cpumask(unsigned int socket)
+{
+    return radix_tree_lookup(&socket_cpumask_tree, socket);
+}
+
 static void smp_store_cpu_info(int id)
 {
     struct cpuinfo_x86 *c = cpu_data + id;
@@ -92,9 +97,9 @@ static void smp_store_cpu_info(int id)
         identify_cpu(c);

         socket = cpu_to_socket(id);
-        if ( !socket_cpumask[socket] )
+        if ( radix_tree_insert(&socket_cpumask_tree, socket,
+                               secondary_socket_cpumask) == 0 )
         {
-            socket_cpumask[socket] = secondary_socket_cpumask;
             secondary_socket_cpumask = NULL;
         }
     }
@@ -258,7 +263,7 @@ static void set_cpu_sibling_map(int cpu)

     cpumask_set_cpu(cpu, &cpu_sibling_setup_map);

-    cpumask_set_cpu(cpu, socket_cpumask[cpu_to_socket(cpu)]);
+    cpumask_set_cpu(cpu, socket_cpumask(cpu_to_socket(cpu)));

     if ( c[cpu].x86_num_siblings > 1 )
     {
@@ -666,11 +671,12 @@ static void cpu_smpboot_free(unsigned int cpu)
 {
     unsigned int order, socket = cpu_to_socket(cpu);
     struct cpuinfo_x86 *c = cpu_data;
+    cpumask_t *m = socket_cpumask(socket);

-    if ( cpumask_empty(socket_cpumask[socket]) )
+    if ( m && cpumask_empty(m) )
     {
-        xfree(socket_cpumask[socket]);
-        socket_cpumask[socket] = NULL;
+        radix_tree_delete(&socket_cpumask_tree, socket);
+        xfree(m);
     }

     c[cpu].phys_proc_id = XEN_INVALID_SOCKET_ID;
@@ -804,6 +810,8 @@ static struct notifier_block cpu_smpboot_nfb = {

 void __init smp_prepare_cpus(unsigned int max_cpus)
 {
+    cpumask_t *m;
+
     register_cpu_notifier(&cpu_smpboot_nfb);

     mtrr_aps_sync_begin();
@@ -819,9 +827,9 @@ void __init smp_prepare_cpus(unsigned int max_cpus)

     set_nr_sockets();

-    socket_cpumask = xzalloc_array(cpumask_t *, nr_sockets);
-    if ( socket_cpumask == NULL ||
-         (socket_cpumask[cpu_to_socket(0)] = xzalloc(cpumask_t)) == NULL )
+    radix_tree_init(&socket_cpumask_tree);
+    if ( (m = xzalloc(cpumask_t)) == NULL ||
+         radix_tree_insert(&socket_cpumask_tree, cpu_to_socket(0), m) != 0 )
         panic("No memory for socket CPU siblings map");

     if ( !zalloc_cpumask_var(&per_cpu(cpu_sibling_mask, 0)) ||
@@ -888,7 +896,7 @@ remove_siblinginfo(int cpu)
 {
     int sibling;

-    cpumask_clear_cpu(cpu, socket_cpumask[cpu_to_socket(cpu)]);
+    cpumask_clear_cpu(cpu, socket_cpumask(cpu_to_socket(cpu)));

     for_each_cpu ( sibling, per_cpu(cpu_core_mask, cpu) )
     {
diff --git a/xen/arch/x86/psr.c b/xen/arch/x86/psr.c
index c0daa2e..7acb3d9 100644
--- a/xen/arch/x86/psr.c
+++ b/xen/arch/x86/psr.c
@@ -52,14 +52,6 @@ static DEFINE_PER_CPU(struct psr_assoc, psr_assoc);

 static struct psr_cat_cbm *temp_cos_to_cbm;

-static unsigned int get_socket_cpu(unsigned int socket)
-{
-    if ( likely(socket < nr_sockets) )
-        return cpumask_any(socket_cpumask[socket]);
-
-    return nr_cpu_ids;
-}
-
 static void __init parse_psr_bool(char *s, char *value, char *feature,
                                   unsigned int mask)
 {
@@ -331,7 +323,8 @@ static int write_l3_cbm(unsigned int socket,
unsigned int cos, uint64_t cbm)
         do_write_l3_cbm(&info);
     else
     {
-        unsigned int cpu = get_socket_cpu(socket);
+        cpumask_t *m = socket_cpumask(socket);
+        unsigned int cpu = m ? cpumask_any(m) : nr_cpu_ids;

         if ( cpu >= nr_cpu_ids )
             return -ENOTSOCK;
@@ -503,8 +496,9 @@ static void cat_cpu_init(void)
 static void cat_cpu_fini(unsigned int cpu)
 {
     unsigned int socket = cpu_to_socket(cpu);
+    cpumask_t *m = socket_cpumask(socket);

-    if ( !socket_cpumask[socket] || cpumask_empty(socket_cpumask[socket]) )
+    if ( !m || cpumask_empty(m) )
     {
         struct psr_cat_socket_info *info = cat_socket_info + socket;



On Tue, Nov 24, 2015 at 7:20 AM, Jan Beulich <JBeulich@xxxxxxxx> wrote:
>>>> On 24.11.15 at 15:13, <eswierk@xxxxxxxxxxxxxxxxxx> wrote:
>> On Tue, Nov 24, 2015 at 2:34 AM, Jan Beulich <JBeulich@xxxxxxxx> wrote:
>>> Bottom line - for the moment I do not see a reasonable way of
>>> dealing with that situation. The closest I could see would be what
>>> we iirc had temporarily during the review cycles of the initial CAT
>>> series: A command line option to specify the number of sockets. Or
>>> make all accesses to socket_cpumask[] conditional upon PSR being
>>> enabled (which would have the bad side effect of making future
>>> uses for other purposes more cumbersome), or go through and
>>> range check the socket number on all of those accesses.
>>
>> Could we avoid the issue by replacing socket_cpumask array with a list
>> or hashtable, indexed by socket ID?
>
> Yes, a radix tree would work. But it would also seem like overkill if
> all we need it for is some strange virtualization of CPUID. The more
> I think about it, the better I like the option below.
>
> Jan
>
>>> Chao, could you - inside Intel - please check whether there are
>>> any assumptions on the respective CPUID leaf output that aren't
>>> explicitly stated in the SDM right now (like resulting in contiguous
>>> socket numbers), and ask for them getting made explicit (if there
>>> are any), or it being made explicit that no assumptions at all are
>>> to be made at all on the presented values (in which case we'd
>>> have to consume MADT parsing data in set_nr_sockets(), e.g.
>>> by replacing num_processors there with one more than the
>>> maximum APIC ID of any non-disabled CPU)?
>>
>> I suppose the key is whether Intel has encoded such assumptions in the
>> BIOS reference code, or has otherwise communicated them to AMI et al.
>>
>> --Ed
>
>
>

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.