[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [RFC PATCH v3 21/24] ARM: NUMA: ACPI: Extract proximity from SRAT table



On Tue, 18 Jul 2017, vijay.kilari@xxxxxxxxx wrote:
> From: Vijaya Kumar K <Vijaya.Kumar@xxxxxxxxxx>
> 
> Register SRAT entry handler for type
> ACPI_SRAT_TYPE_GICC_AFFINITY to parse SRAT table
> and extract proximity for all CPU IDs.
> 
> Signed-off-by: Vijaya Kumar <Vijaya.Kumar@xxxxxxxxxx>
> ---
>  xen/arch/arm/acpi/boot.c      |   2 +
>  xen/arch/arm/numa/acpi_numa.c | 124 
> +++++++++++++++++++++++++++++++++++++++++-
>  xen/drivers/acpi/numa.c       |  15 +++++
>  xen/include/acpi/actbl1.h     |  17 +++++-
>  xen/include/asm-arm/numa.h    |   9 +++
>  5 files changed, 165 insertions(+), 2 deletions(-)
> 
> diff --git a/xen/arch/arm/acpi/boot.c b/xen/arch/arm/acpi/boot.c
> index 889208a..4e28b16 100644
> --- a/xen/arch/arm/acpi/boot.c
> +++ b/xen/arch/arm/acpi/boot.c
> @@ -31,6 +31,7 @@
>  #include <acpi/actables.h>
>  #include <xen/mm.h>
>  #include <xen/device_tree.h>
> +#include <xen/numa.h>
>  
>  #include <asm/acpi.h>
>  #include <asm/smp.h>
> @@ -117,6 +118,7 @@ acpi_map_gic_cpu_interface(struct 
> acpi_madt_generic_interrupt *processor)
>          return;
>      }
>  
> +    numa_set_cpu_node(enabled_cpus, acpi_get_nodeid(mpidr));
>      /* map the logical cpu id to cpu MPIDR */
>      cpu_logical_map(enabled_cpus) = mpidr;
>  
> diff --git a/xen/arch/arm/numa/acpi_numa.c b/xen/arch/arm/numa/acpi_numa.c
> index 341e20b7..95617f9 100644
> --- a/xen/arch/arm/numa/acpi_numa.c
> +++ b/xen/arch/arm/numa/acpi_numa.c
> @@ -34,13 +34,63 @@ struct cpuid_to_hwid {
>      uint64_t hwid;
>  };
>  
> +/* Holds NODE to MPIDR mapping. */
> +struct node_to_hwid {
> +    nodeid_t nodeid;
> +    uint64_t hwid;
> +};
> +
>  #define PHYS_CPUID_INVALID 0xff
>  
>  /* Holds mapping of CPU id to MPIDR read from MADT */
>  static struct cpuid_to_hwid __read_mostly cpuid_to_hwid_map[NR_CPUS] =
>      { [0 ... NR_CPUS - 1] = {PHYS_CPUID_INVALID, MPIDR_INVALID} };
> +static struct node_to_hwid __read_mostly node_to_hwid_map[NR_CPUS] =
> +    { [0 ... NR_CPUS - 1] = {NUMA_NO_NODE, MPIDR_INVALID} };
> +static unsigned int cpus_in_srat;
>  static unsigned int num_cpuid_to_hwid;
>  
> +nodeid_t __init acpi_get_nodeid(uint64_t hwid)
> +{
> +    unsigned int i;
> +
> +    for ( i = 0; i < cpus_in_srat; i++ )
> +    {
> +        if ( node_to_hwid_map[i].hwid == hwid )
> +            return node_to_hwid_map[i].nodeid;
> +    }
> +
> +    return NUMA_NO_NODE;
> +}
> +
> +static uint64_t acpi_get_cpu_hwid(int cid)
> +{
> +    unsigned int i;
> +
> +    for ( i = 0; i < num_cpuid_to_hwid; i++ )
> +    {
> +        if ( cpuid_to_hwid_map[i].cpuid == cid )
> +            return cpuid_to_hwid_map[i].hwid;
> +    }
> +
> +    return MPIDR_INVALID;
> +}
> +
> +static void __init acpi_map_node_to_hwid(nodeid_t nodeid, uint64_t hwid)
> +{
> +    if ( nodeid >= MAX_NUMNODES )
> +    {
> +        printk(XENLOG_WARNING
> +               "ACPI: NUMA: nodeid out of range %d with MPIDR 0x%lx\n",
> +               nodeid, hwid);
> +        numa_failed();
> +        return;
> +    }
> +
> +    node_to_hwid_map[cpus_in_srat].nodeid = nodeid;
> +    node_to_hwid_map[cpus_in_srat].hwid = hwid;
> +}
> +
>  static void __init acpi_map_cpu_to_hwid(uint32_t cpuid, uint64_t mpidr)
>  {
>      if ( mpidr == MPIDR_INVALID )
> @@ -76,15 +126,87 @@ static int __init acpi_parse_madt_handler(struct 
> acpi_subtable_header *header,
>      return 0;
>  }
>  
> +/* Callback for Proximity Domain -> ACPI processor UID mapping */
> +static void __init
> +acpi_numa_gicc_affinity_init(const struct acpi_srat_gicc_affinity *pa)
> +{
> +    int pxm, node;
> +    uint64_t mpidr;
> +
> +    if ( srat_disabled() )
> +        return;
> +
> +    if ( pa->header.length < sizeof(struct acpi_srat_gicc_affinity) )
> +    {
> +        printk(XENLOG_WARNING "SRAT: Invalid SRAT header length: %d\n",
> +               pa->header.length);
> +        numa_failed();
> +        return;
> +    }
> +
> +    if ( !(pa->flags & ACPI_SRAT_GICC_ENABLED) )
> +        return;
> +
> +    if ( cpus_in_srat >= NR_CPUS )
> +    {
> +        printk(XENLOG_ERR
> +               "SRAT: cpu_to_node_map[%d] is too small to fit all cpus\n",
> +               NR_CPUS);
> +        return;
> +    }
> +
> +    pxm = pa->proximity_domain;
> +    node = acpi_setup_node(pxm);
> +    if ( node == NUMA_NO_NODE )
> +    {
> +        numa_failed();
> +        return;
> +    }
> +
> +    mpidr = acpi_get_cpu_hwid(pa->acpi_processor_uid);
> +    if ( mpidr == MPIDR_INVALID )
> +    {
> +        printk(XENLOG_ERR
> +               "SRAT: PXM %d with ACPI ID %d has no valid MPIDR in MADT\n",
> +               pxm, pa->acpi_processor_uid);
> +        numa_failed();
> +        return;
> +    }
> +
> +    acpi_map_node_to_hwid(node, mpidr);

Given that we don't really care about the mapping of nodes to mpidrs, if
not for the sake of setting up later the mapping between cpu ids and
mpidr ids, can't we just skip this step and setup the cpu id to node id
mapping directly?

We have the mpidr here, do we have a cpu id to mpidr mapping already at
this point in the boot process? I think not, because that is done by
acpi_smp_init_cpus which is called after numa_init. Would it be possible
to change the acpi table parsing sequence so that we can create the cpu
id to mpidr mappings first (acpi_map_gic_cpu_interface called before
acpi_numa_gicc_affinity_init)? That way, we wouldn't have to introduce
the array node_to_hwid_map.


> +    node_set(node, processor_nodes_parsed);
> +    cpus_in_srat++;

I think it would be more consistent with the rest of the code if we
moved "cpus_in_srat++" into acpi_map_node_to_hwid.


> +    acpi_numa = 1;

Wait, isn't this function called once per cpu? We should be setting
"acpi_numa = 1" once at boot not once per cpu.


> +    printk(XENLOG_INFO "SRAT: PXM %d -> MPIDR 0x%lx -> Node %d\n",
> +           pxm, mpidr, node);
> +}
> +
>  void __init acpi_map_uid_to_mpidr(void)
>  {
>      acpi_table_parse_madt(ACPI_MADT_TYPE_GENERIC_INTERRUPT,
>                      acpi_parse_madt_handler, NR_CPUS);
>  }
>  
> +static int __init
> +acpi_parse_gicc_affinity(struct acpi_subtable_header *header,
> +                         const unsigned long end)
> +{
> +   const struct acpi_srat_gicc_affinity *processor_affinity
> +                = (struct acpi_srat_gicc_affinity *)header;
> +
> +   if (!processor_affinity)
> +       return -EINVAL;

I don't think is possible for processor_affinity to be NULL here. I
noticed that the existing checks in the srat parsing code on x86 check
for NULL, like you do here, but I don't think NULL can happen. However
the length of the header might be smaller than required so I think it
would be more appropriate to have a check like BAD_MADT_ENTRY, but that
it already done by acpi_numa_gicc_affinity_init.


> +   acpi_table_print_srat_entry(header);
> +   acpi_numa_gicc_affinity_init(processor_affinity);
> +
> +   return 0;
> +}
> +
>  void __init arch_table_parse_srat(void)
>  {
> -    return;
> +    acpi_table_parse_srat(ACPI_SRAT_TYPE_GICC_AFFINITY,
> +                          acpi_parse_gicc_affinity, NR_CPUS);
>  }
>  
>  void __init acpi_numa_arch_fixup(void) {}
> diff --git a/xen/drivers/acpi/numa.c b/xen/drivers/acpi/numa.c
> index 0adc32c..b48d91d 100644
> --- a/xen/drivers/acpi/numa.c
> +++ b/xen/drivers/acpi/numa.c
> @@ -104,6 +104,21 @@ void __init acpi_table_print_srat_entry(struct 
> acpi_subtable_header * header)
>               }
>  #endif                               /* ACPI_DEBUG_OUTPUT */
>               break;
> +       case ACPI_SRAT_TYPE_GICC_AFFINITY:
> +#ifdef ACPI_DEBUG_OUTPUT
> +             {
> +                     struct acpi_srat_gicc_affinity *p =
> +                         (struct acpi_srat_gicc_affinity *)header;
> +                     ACPI_DEBUG_PRINT((ACPI_DB_INFO,
> +                                       "SRAT Processor (acpi id[0x%04x]) in"
> +                                       " proximity domain %d %s\n",
> +                                       p->acpi_processor_uid,
> +                                       p->proximity_domain,
> +                                       (p->flags & ACPI_SRAT_GICC_ENABLED) ?
> +                                       "enabled" : "disabled");
> +             }
> +#endif                         /* ACPI_DEBUG_OUTPUT */
> +               break;
>       default:
>               printk(KERN_WARNING PREFIX
>                      "Found unsupported SRAT entry (type = %#x)\n",
> diff --git a/xen/include/acpi/actbl1.h b/xen/include/acpi/actbl1.h
> index e199136..b84bfba 100644
> --- a/xen/include/acpi/actbl1.h
> +++ b/xen/include/acpi/actbl1.h
> @@ -949,7 +949,8 @@ enum acpi_srat_type {
>       ACPI_SRAT_TYPE_CPU_AFFINITY = 0,
>       ACPI_SRAT_TYPE_MEMORY_AFFINITY = 1,
>       ACPI_SRAT_TYPE_X2APIC_CPU_AFFINITY = 2,
> -     ACPI_SRAT_TYPE_RESERVED = 3     /* 3 and greater are reserved */
> +     ACPI_SRAT_TYPE_GICC_AFFINITY = 3,
> +     ACPI_SRAT_TYPE_RESERVED = 4     /* 4 and greater are reserved */
>  };
>  
>  /*
> @@ -1007,6 +1008,20 @@ struct acpi_srat_x2apic_cpu_affinity {
>  
>  #define ACPI_SRAT_CPU_ENABLED       (1)      /* 00: Use affinity structure */
>  
> +/* 3: GICC Affinity (ACPI 5.1) */
> +
> +struct acpi_srat_gicc_affinity {
> +     struct acpi_subtable_header header;
> +     u32 proximity_domain;
> +     u32 acpi_processor_uid;
> +     u32 flags;
> +     u32 clock_domain;
> +};
> +
> +/* Flags for struct acpi_srat_gicc_affinity */
> +
> +#define ACPI_SRAT_GICC_ENABLED     (1)  /* 00: Use affinity structure */
> +
>  /* Reset to default packing */
>  
>  #pragma pack()
> diff --git a/xen/include/asm-arm/numa.h b/xen/include/asm-arm/numa.h
> index 0d3146c..f0a50bd 100644
> --- a/xen/include/asm-arm/numa.h
> +++ b/xen/include/asm-arm/numa.h
> @@ -7,6 +7,15 @@ void dt_numa_process_memory_node(uint32_t nid, paddr_t 
> start, paddr_t size);
>  void register_node_distance(uint8_t (fn)(nodeid_t a, nodeid_t b));
>  void init_dt_numa_distance(void);
>  
> +#ifdef CONFIG_ACPI_NUMA
> +nodeid_t acpi_get_nodeid(uint64_t hwid);
> +#else
> +static inline nodeid_t acpi_get_nodeid(uint64_t hwid)
> +{
> +    return 0;
> +}
> +#endif /* CONFIG_ACPI_NUMA */
> +
>  #ifdef CONFIG_NUMA
>  void numa_init(void);
>  int dt_numa_init(void);
> -- 
> 2.7.4
> 

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
https://lists.xen.org/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.