[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [PATCH] IRQ: manually EOI migrating line interrupts




On 05/09/11 11:43, Jan Beulich wrote:
>>>> On 30.08.11 at 16:17, Andrew Cooper <andrew.cooper3@xxxxxxxxxx> wrote:
>> When migrating IO-APIC line level interrupts between PCPUs, the
>> migration code rewrites the IO-APIC entry to point to the new
>> CPU/Vector before EOI'ing it.
>>
>> The EOI process says that EOI'ing the Local APIC will cause a
>> broadcast with the vector number, which the IO-APIC must listen to to
>> clear the IRR and Status bits.
>>
>> In the case of migrating, the IO-APIC has already been
>> reprogrammed so the EOI broadcast with the old vector fails to match
>> the new vector, leaving the IO-APIC with an outstanding vector,
>> preventing any more use of that line interrupt.  This causes a lockup
>> especially when your root device is using PCI INTA (megaraid_sas
>> driver *ehem*)
>>
>> However, the problem is mostly hidden because send_cleanup_vector()
>> causes a cleanup of all moving vectors on the current PCPU in such a
>> way which does not cause the problem, and if the problem has occured,
>> the writes it makes to the IO-APIC clears the IRR and Status bits
>> which unlocks the problem.
>>
>>
>> This fix is distinctly a temporary hack, waiting on a cleanup of the
>> irq code.  It checks for the edge case where we have moved the irq,
>> and manually EOI's the old vector with the IO-APIC which correctly
>> clears the IRR and Status bits.  Also, it protects the code which
>> updates irq_cfg by disabling interrupts.
>>
>> Signed-off-by: Andrew Cooper <andrew.cooper3@xxxxxxxxxx>
>>
>> diff -r 227130622561 -r a95fd5d03c20 xen/arch/x86/hpet.c
>> --- a/xen/arch/x86/hpet.c    Thu Aug 25 12:03:14 2011 +0100
>> +++ b/xen/arch/x86/hpet.c    Tue Aug 30 15:15:56 2011 +0100
>> @@ -301,7 +301,7 @@ static void hpet_msi_ack(unsigned int ir
>>      ack_APIC_irq();
>>  }
>>  
>> -static void hpet_msi_end(unsigned int irq)
>> +static void hpet_msi_end(unsigned int irq, u8 vector)
>>  {
>>  }
>>  
>> diff -r 227130622561 -r a95fd5d03c20 xen/arch/x86/i8259.c
>> --- a/xen/arch/x86/i8259.c   Thu Aug 25 12:03:14 2011 +0100
>> +++ b/xen/arch/x86/i8259.c   Tue Aug 30 15:15:56 2011 +0100
>> @@ -93,7 +93,7 @@ static unsigned int startup_8259A_irq(un
>>      return 0; /* never anything pending */
>>  }
>>  
>> -static void end_8259A_irq(unsigned int irq)
>> +static void end_8259A_irq(unsigned int irq, u8 vector)
>>  {
>>      if (!(irq_desc[irq].status & (IRQ_DISABLED|IRQ_INPROGRESS)))
>>          enable_8259A_irq(irq);
>> diff -r 227130622561 -r a95fd5d03c20 xen/arch/x86/io_apic.c
>> --- a/xen/arch/x86/io_apic.c Thu Aug 25 12:03:14 2011 +0100
>> +++ b/xen/arch/x86/io_apic.c Tue Aug 30 15:15:56 2011 +0100
>> @@ -1690,7 +1690,7 @@ static void mask_and_ack_level_ioapic_ir
>>      }
>>  }
>>  
>> -static void end_level_ioapic_irq (unsigned int irq)
>> +static void end_level_ioapic_irq (unsigned int irq, u8 vector)
>>  {
>>      unsigned long v;
>>      int i;
>> @@ -1739,6 +1739,14 @@ static void end_level_ioapic_irq (unsign
>>   */
>>      i = IO_APIC_VECTOR(irq);
>>  
>> +    /* Manually EOI the old vector if we are moving to the new */
>> +    if ( vector && i != vector )
>> +    {
>> +        int ioapic;
>> +        for (ioapic = 0; ioapic < nr_ioapics; ioapic++)
>> +            io_apic_eoi(ioapic, i);
> While I realize that the patch already went in, this still will need
> adjustment for dealing with old IO-APICs that don't have an EOI
> register (or if we want to stop supporting such, a clear panic()
> rather than subtle and hard to debug failure).
>
> Jan
>

This is a good point.  However, due to the use of io_apic_eoi elsewhere
in the code, I don't think this is the only area susceptible to this issue.

I will add it to my todo list for the irq cleanup.

~Andrew

>> +    }
>> +
>>      v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1));
>>  
>>      ack_APIC_irq();
>> @@ -1762,7 +1770,10 @@ static void disable_edge_ioapic_irq(unsi
>>  {
>>  }
>>  
>> -#define end_edge_ioapic_irq disable_edge_ioapic_irq
>> +static void end_edge_ioapic_irq(unsigned int irq, u8 vector)
>> +{
>> +}
>> +
>>  
>>  /*
>>   * Level and edge triggered IO-APIC interrupts need different handling,
>> @@ -1811,7 +1822,7 @@ static void ack_msi_irq(unsigned int irq
>>          ack_APIC_irq(); /* ACKTYPE_NONE */
>>  }
>>  
>> -static void end_msi_irq(unsigned int irq)
>> +static void end_msi_irq(unsigned int irq, u8 vector)
>>  {
>>      if ( !msi_maskable_irq(irq_desc[irq].msi_desc) )
>>          ack_APIC_irq(); /* ACKTYPE_EOI */
>> diff -r 227130622561 -r a95fd5d03c20 xen/arch/x86/irq.c
>> --- a/xen/arch/x86/irq.c     Thu Aug 25 12:03:14 2011 +0100
>> +++ b/xen/arch/x86/irq.c     Tue Aug 30 15:15:56 2011 +0100
>> @@ -345,6 +345,7 @@ static void __do_IRQ_guest(int vector);
>>  void no_action(int cpl, void *dev_id, struct cpu_user_regs *regs) { }
>>  
>>  static void enable_none(unsigned int vector) { }
>> +static void end_none(unsigned int irq, u8 vector) { }
>>  static unsigned int startup_none(unsigned int vector) { return 0; }
>>  static void disable_none(unsigned int vector) { }
>>  static void ack_none(unsigned int irq)
>> @@ -353,7 +354,6 @@ static void ack_none(unsigned int irq)
>>  }
>>  
>>  #define shutdown_none   disable_none
>> -#define end_none        enable_none
>>  
>>  hw_irq_controller no_irq_type = {
>>      "none",
>> @@ -381,6 +381,7 @@ int __assign_irq_vector(int irq, struct 
>>      static int current_vector = FIRST_DYNAMIC_VECTOR, current_offset = 0;
>>      unsigned int old_vector;
>>      int cpu, err;
>> +    unsigned long flags;
>>      cpumask_t tmp_mask;
>>  
>>      old_vector = irq_to_vector(irq);
>> @@ -431,6 +432,7 @@ next:
>>          /* Found one! */
>>          current_vector = vector;
>>          current_offset = offset;
>> +        local_irq_save(flags);
>>          if (old_vector) {
>>              cfg->move_in_progress = 1;
>>              cpus_copy(cfg->old_cpu_mask, cfg->cpu_mask);
>> @@ -450,6 +452,7 @@ next:
>>              if (IO_APIC_IRQ(irq))
>>                      irq_vector[irq] = vector;
>>          err = 0;
>> +        local_irq_restore(flags);
>>          break;
>>      }
>>      return err;
>> @@ -657,7 +660,7 @@ asmlinkage void do_IRQ(struct cpu_user_r
>>      desc->status &= ~IRQ_INPROGRESS;
>>  
>>   out:
>> -    desc->handler->end(irq);
>> +    desc->handler->end(irq, regs->entry_vector);
>>   out_no_end:
>>      spin_unlock(&desc->lock);
>>      irq_exit();
>> @@ -857,7 +860,7 @@ static void irq_guest_eoi_timer_fn(void 
>>      switch ( action->ack_type )
>>      {
>>      case ACKTYPE_UNMASK:
>> -        desc->handler->end(irq);
>> +        desc->handler->end(irq, 0);
>>          break;
>>      case ACKTYPE_EOI:
>>          cpu_eoi_map = action->cpu_eoi_map;
>> @@ -885,7 +888,7 @@ static void __do_IRQ_guest(int irq)
>>          /* An interrupt may slip through while freeing an ACKTYPE_EOI irq. 
>> */
>>          ASSERT(action->ack_type == ACKTYPE_EOI);
>>          ASSERT(desc->status & IRQ_DISABLED);
>> -        desc->handler->end(irq);
>> +        desc->handler->end(irq, vector);
>>          return;
>>      }
>>  
>> @@ -1099,7 +1102,7 @@ static void flush_ready_eoi(void)
>>          ASSERT(irq > 0);
>>          desc = irq_to_desc(irq);
>>          spin_lock(&desc->lock);
>> -        desc->handler->end(irq);
>> +        desc->handler->end(irq, peoi[sp].vector);
>>          spin_unlock(&desc->lock);
>>      }
>>  
>> @@ -1177,7 +1180,7 @@ void desc_guest_eoi(struct irq_desc *des
>>      if ( action->ack_type == ACKTYPE_UNMASK )
>>      {
>>          ASSERT(cpus_empty(action->cpu_eoi_map));
>> -        desc->handler->end(irq);
>> +        desc->handler->end(irq, 0);
>>          spin_unlock_irq(&desc->lock);
>>          return;
>>      }
>> @@ -1431,7 +1434,7 @@ static irq_guest_action_t *__pirq_guest_
>>      case ACKTYPE_UNMASK:
>>          if ( test_and_clear_bool(pirq->masked) &&
>>               (--action->in_flight == 0) )
>> -            desc->handler->end(irq);
>> +            desc->handler->end(irq, 0);
>>          break;
>>      case ACKTYPE_EOI:
>>          /* NB. If #guests == 0 then we clear the eoi_map later on. */
>> diff -r 227130622561 -r a95fd5d03c20 xen/drivers/passthrough/amd/iommu_init.c
>> --- a/xen/drivers/passthrough/amd/iommu_init.c       Thu Aug 25 12:03:14 
>> 2011 +0100
>> +++ b/xen/drivers/passthrough/amd/iommu_init.c       Tue Aug 30 15:15:56 
>> 2011 +0100
>> @@ -441,7 +441,7 @@ static unsigned int iommu_msi_startup(un
>>      return 0;
>>  }
>>  
>> -static void iommu_msi_end(unsigned int irq)
>> +static void iommu_msi_end(unsigned int irq, u8 vector)
>>  {
>>      iommu_msi_unmask(irq);
>>      ack_APIC_irq();
>> diff -r 227130622561 -r a95fd5d03c20 xen/drivers/passthrough/vtd/iommu.c
>> --- a/xen/drivers/passthrough/vtd/iommu.c    Thu Aug 25 12:03:14 2011 +0100
>> +++ b/xen/drivers/passthrough/vtd/iommu.c    Tue Aug 30 15:15:56 2011 +0100
>> @@ -971,7 +971,7 @@ static unsigned int dma_msi_startup(unsi
>>      return 0;
>>  }
>>  
>> -static void dma_msi_end(unsigned int irq)
>> +static void dma_msi_end(unsigned int irq, u8 vector)
>>  {
>>      dma_msi_unmask(irq);
>>      ack_APIC_irq();
>> diff -r 227130622561 -r a95fd5d03c20 xen/include/xen/irq.h
>> --- a/xen/include/xen/irq.h  Thu Aug 25 12:03:14 2011 +0100
>> +++ b/xen/include/xen/irq.h  Tue Aug 30 15:15:56 2011 +0100
>> @@ -44,7 +44,7 @@ struct hw_interrupt_type {
>>      void (*enable)(unsigned int irq);
>>      void (*disable)(unsigned int irq);
>>      void (*ack)(unsigned int irq);
>> -    void (*end)(unsigned int irq);
>> +    void (*end)(unsigned int irq, u8 vector);
>>      void (*set_affinity)(unsigned int irq, cpumask_t mask);
>>  };
>>  
>>
>> _______________________________________________
>> Xen-devel mailing list
>> Xen-devel@xxxxxxxxxxxxxxxxxxx 
>> http://lists.xensource.com/xen-devel 
>

-- 
Andrew Cooper - Dom0 Kernel Engineer, Citrix XenServer
T: +44 (0)1223 225 900, http://www.citrix.com


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.