[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [PATCH 2/2] x86/x2apic: properly implement cluster mode


  • To: Jan Beulich <JBeulich@xxxxxxxx>, xen-devel <xen-devel@xxxxxxxxxxxxx>
  • From: Keir Fraser <keir@xxxxxxx>
  • Date: Thu, 08 Nov 2012 15:17:45 +0000
  • Delivery-date: Thu, 08 Nov 2012 15:18:20 +0000
  • List-id: Xen developer discussion <xen-devel.lists.xen.org>
  • Thread-index: Ac29xDQp3gwAiOemPEStfcA1GgAuHQ==
  • Thread-topic: [Xen-devel] [PATCH 2/2] x86/x2apic: properly implement cluster mode

On 08/11/2012 15:03, "Jan Beulich" <JBeulich@xxxxxxxx> wrote:

> So far, cluster mode was just an alternative implementation of
> physical mode: Allowing only single CPU interrupt targets, and sending
> IPIs to each target CPU separately. Take advantage of what cluster
> mode really can do in that regard.

What does it allow? Multicast within certain constraints? I know it's not
part of our coding style, but some comments would be nice. ;)

 -- Keir

> Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
> 
> --- a/xen/arch/x86/genapic/x2apic.c
> +++ b/xen/arch/x86/genapic/x2apic.c
> @@ -19,6 +19,7 @@
>  
>  #include <xen/config.h>
>  #include <xen/init.h>
> +#include <xen/cpu.h>
>  #include <xen/cpumask.h>
>  #include <asm/apicdef.h>
>  #include <asm/genapic.h>
> @@ -33,6 +34,14 @@ static bool_t __initdata x2apic_phys; /*
>  boolean_param("x2apic_phys", x2apic_phys);
>  
>  static DEFINE_PER_CPU_READ_MOSTLY(u32, cpu_2_logical_apicid);
> +static DEFINE_PER_CPU_READ_MOSTLY(cpumask_t *, cluster_cpus);
> +static cpumask_t *cluster_cpus_spare;
> +static DEFINE_PER_CPU(cpumask_var_t, scratch_mask);
> +
> +static inline u32 x2apic_cluster(unsigned int cpu)
> +{
> +    return per_cpu(cpu_2_logical_apicid, cpu) >> 16;
> +}
>  
>  static void init_apic_ldr_x2apic_phys(void)
>  {
> @@ -40,20 +49,53 @@ static void init_apic_ldr_x2apic_phys(vo
>  
>  static void init_apic_ldr_x2apic_cluster(void)
>  {
> -    this_cpu(cpu_2_logical_apicid) = apic_read(APIC_LDR);
> +    unsigned int cpu, this_cpu = smp_processor_id();
> +
> +    per_cpu(cpu_2_logical_apicid, this_cpu) = apic_read(APIC_LDR);
> +
> +    if ( per_cpu(cluster_cpus, this_cpu) )
> +    {
> +        ASSERT(cpumask_test_cpu(this_cpu, per_cpu(cluster_cpus, this_cpu)));
> +        return;
> +    }
> +
> +    per_cpu(cluster_cpus, this_cpu) = cluster_cpus_spare;
> +    for_each_online_cpu ( cpu )
> +    {
> +        if (this_cpu == cpu || x2apic_cluster(this_cpu) !=
> x2apic_cluster(cpu))
> +            continue;
> +        per_cpu(cluster_cpus, this_cpu) = per_cpu(cluster_cpus, cpu);
> +        break;
> +    }
> +    if ( per_cpu(cluster_cpus, this_cpu) == cluster_cpus_spare )
> +        cluster_cpus_spare = NULL;
> +
> +    cpumask_set_cpu(this_cpu, per_cpu(cluster_cpus, this_cpu));
>  }
>  
>  static void __init clustered_apic_check_x2apic(void)
>  {
>  }
>  
> +static const cpumask_t *vector_allocation_cpumask_x2apic_cluster(int cpu)
> +{
> +    return per_cpu(cluster_cpus, cpu);
> +}
> +
>  static unsigned int cpu_mask_to_apicid_x2apic_cluster(const cpumask_t
> *cpumask)
>  {
> -    return per_cpu(cpu_2_logical_apicid, cpumask_first(cpumask));
> +    unsigned int cpu = cpumask_first(cpumask);
> +    unsigned int dest = per_cpu(cpu_2_logical_apicid, cpu);
> +    const cpumask_t *cluster_cpus = per_cpu(cluster_cpus, cpu);
> +
> +    for_each_cpu ( cpu, cluster_cpus )
> +        if ( cpumask_test_cpu(cpu, cpumask) )
> +            dest |= per_cpu(cpu_2_logical_apicid, cpu);
> +
> +    return dest;
>  }
>  
> -static void __send_IPI_mask_x2apic(
> -    const cpumask_t *cpumask, int vector, unsigned int dest_mode)
> +static void send_IPI_mask_x2apic_phys(const cpumask_t *cpumask, int vector)
>  {
>      unsigned int cpu;
>      unsigned long flags;
> @@ -77,23 +119,48 @@ static void __send_IPI_mask_x2apic(
>      {
>          if ( !cpu_online(cpu) || (cpu == smp_processor_id()) )
>              continue;
> -        msr_content = (dest_mode == APIC_DEST_PHYSICAL)
> -            ? cpu_physical_id(cpu) : per_cpu(cpu_2_logical_apicid, cpu);
> -        msr_content = (msr_content << 32) | APIC_DM_FIXED | dest_mode |
> vector;
> +        msr_content = cpu_physical_id(cpu);
> +        msr_content = (msr_content << 32) | APIC_DM_FIXED |
> +                      APIC_DEST_PHYSICAL | vector;
>          apic_wrmsr(APIC_ICR, msr_content);
>      }
>  
>      local_irq_restore(flags);
>  }
>  
> -static void send_IPI_mask_x2apic_phys(const cpumask_t *cpumask, int vector)
> -{
> -    __send_IPI_mask_x2apic(cpumask, vector, APIC_DEST_PHYSICAL);
> -}
> -
>  static void send_IPI_mask_x2apic_cluster(const cpumask_t *cpumask, int
> vector)
>  {
> -    __send_IPI_mask_x2apic(cpumask, vector, APIC_DEST_LOGICAL);
> +    unsigned int cpu = smp_processor_id();
> +    cpumask_t *ipimask = per_cpu(scratch_mask, cpu);
> +    const cpumask_t *cluster_cpus;
> +    unsigned long flags;
> +
> +    mb(); /* See above for an explanation. */
> +
> +    local_irq_save(flags);
> +
> +    cpumask_andnot(ipimask, &cpu_online_map, cpumask_of(cpu));
> +
> +    for ( cpumask_and(ipimask, cpumask, ipimask); !cpumask_empty(ipimask);
> +          cpumask_andnot(ipimask, ipimask, cluster_cpus) )
> +    {
> +        uint64_t msr_content = 0;
> +
> +        cluster_cpus = per_cpu(cluster_cpus, cpumask_first(ipimask));
> +        for_each_cpu ( cpu, cluster_cpus )
> +        {
> +            if ( !cpumask_test_cpu(cpu, ipimask) )
> +                continue;
> +            msr_content |= per_cpu(cpu_2_logical_apicid, cpu);
> +        }
> +
> +        BUG_ON(!msr_content);
> +        msr_content = (msr_content << 32) | APIC_DM_FIXED |
> +                      APIC_DEST_LOGICAL | vector;
> +        apic_wrmsr(APIC_ICR, msr_content);
> +    }
> +
> +    local_irq_restore(flags);
>  }
>  
>  static const struct genapic apic_x2apic_phys = {
> @@ -116,15 +183,60 @@ static const struct genapic apic_x2apic_
>      .init_apic_ldr = init_apic_ldr_x2apic_cluster,
>      .clustered_apic_check = clustered_apic_check_x2apic,
>      .target_cpus = target_cpus_all,
> -    .vector_allocation_cpumask = vector_allocation_cpumask_phys,
> +    .vector_allocation_cpumask = vector_allocation_cpumask_x2apic_cluster,
>      .cpu_mask_to_apicid = cpu_mask_to_apicid_x2apic_cluster,
>      .send_IPI_mask = send_IPI_mask_x2apic_cluster,
>      .send_IPI_self = send_IPI_self_x2apic
>  };
>  
> +static int update_clusterinfo(
> +    struct notifier_block *nfb, unsigned long action, void *hcpu)
> +{
> +    unsigned int cpu = (unsigned long)hcpu;
> +    int err = 0;
> +
> +    switch (action) {
> +    case CPU_UP_PREPARE:
> +        per_cpu(cpu_2_logical_apicid, cpu) = BAD_APICID;
> +        if ( !cluster_cpus_spare )
> +            cluster_cpus_spare = xzalloc(cpumask_t);
> +        if ( !cluster_cpus_spare ||
> +             !alloc_cpumask_var(&per_cpu(scratch_mask, cpu)) )
> +            err = -ENOMEM;
> +        break;
> +    case CPU_UP_CANCELED:
> +    case CPU_DEAD:
> +        if ( per_cpu(cluster_cpus, cpu) )
> +        {
> +            cpumask_clear_cpu(cpu, per_cpu(cluster_cpus, cpu));
> +            if ( cpumask_empty(per_cpu(cluster_cpus, cpu)) )
> +                xfree(per_cpu(cluster_cpus, cpu));
> +        }
> +        free_cpumask_var(per_cpu(scratch_mask, cpu));
> +        break;
> +    }
> +
> +    return !err ? NOTIFY_DONE : notifier_from_errno(err);
> +}
> +
> +static struct notifier_block x2apic_cpu_nfb = {
> +   .notifier_call = update_clusterinfo
> +};
> +
>  const struct genapic *__init apic_x2apic_probe(void)
>  {
> -    return x2apic_phys ? &apic_x2apic_phys : &apic_x2apic_cluster;
> +    if ( x2apic_phys )
> +        return &apic_x2apic_phys;
> +
> +    if ( !this_cpu(cluster_cpus) )
> +    {
> +        update_clusterinfo(NULL, CPU_UP_PREPARE,
> +                           (void *)(long)smp_processor_id());
> +        init_apic_ldr_x2apic_cluster();
> +        register_cpu_notifier(&x2apic_cpu_nfb);
> +    }
> +
> +    return &apic_x2apic_cluster;
>  }
>  
>  void __init check_x2apic_preenabled(void)
> 
> 
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@xxxxxxxxxxxxx
> http://lists.xen.org/xen-devel



_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.