[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [PATCH v5 2/5] x86/hpet: Use singe apic vector rather than irq_descs for HPET interrupts



>>> On 05.03.14 at 16:43, Andrew Cooper <andrew.cooper3@xxxxxxxxxx> wrote:
> +/* Wake up all cpus in the channel mask.  Lock should be held. */
> +static void hpet_wake_cpus(struct hpet_event_channel *ch)
>  {
> -    unsigned int cpu = smp_processor_id();
> -
> -    if ( cpumask_test_and_clear_cpu(cpu, mask) )
> -        raise_softirq(TIMER_SOFTIRQ);
> -
> -    cpuidle_wakeup_mwait(mask);
> -
> -    if ( !cpumask_empty(mask) )
> -       cpumask_raise_softirq(mask, TIMER_SOFTIRQ);
> +    cpuidle_wakeup_mwait(ch->cpumask);
> +    cpumask_raise_softirq(ch->cpumask, TIMER_SOFTIRQ);
>  }
>  
> -static void handle_hpet_broadcast(struct hpet_event_channel *ch)
> +/* HPET interrupt handler.  Wake all requested cpus.  Lock should be held. 
> */
> +static void hpet_interrupt_handler(struct hpet_event_channel *ch)
>  {
> -    cpumask_t mask;
> -    s_time_t now, next_event;
> -    unsigned int cpu;
> -    unsigned long flags;
> -
> -    spin_lock_irqsave(&ch->lock, flags);
> -
> -again:
> -    ch->next_event = STIME_MAX;
> -
> -    spin_unlock_irqrestore(&ch->lock, flags);
> -
> -    next_event = STIME_MAX;
> -    cpumask_clear(&mask);
> -    now = NOW();
> -
> -    /* find all expired events */
> -    for_each_cpu(cpu, ch->cpumask)
> -    {
> -        s_time_t deadline;
> -
> -        rmb();
> -        deadline = per_cpu(timer_deadline, cpu);
> -        rmb();
> -        if ( !cpumask_test_cpu(cpu, ch->cpumask) )
> -            continue;
> -
> -        if ( deadline <= now )
> -            cpumask_set_cpu(cpu, &mask);
> -        else if ( deadline < next_event )
> -            next_event = deadline;
> -    }
> -
> -    /* wakeup the cpus which have an expired event. */
> -    evt_do_broadcast(&mask);
> -
> -    if ( next_event != STIME_MAX )
> -    {
> -        spin_lock_irqsave(&ch->lock, flags);
> -
> -        if ( next_event < ch->next_event &&
> -             hpet_program_time(ch, next_event, now, 0) )
> -            goto again;
> -
> -        spin_unlock_irqrestore(&ch->lock, flags);
> -    }
> +    hpet_wake_cpus(ch);
> +    raise_softirq(TIMER_SOFTIRQ);

hpet_wake_cpus() just did a cpumask_raise_softirq()?

> +static void __init hpet_init_channel(struct hpet_event_channel *ch)
>  {
> -    int irq;
> -
> -    if ( (irq = create_irq(NUMA_NO_NODE)) < 0 )
> -        return irq;
> +    u64 hpet_rate = hpet_setup();
>  
> -    ch->msi.irq = irq;
> -    if ( hpet_setup_msi_irq(ch) )
> -    {
> -        destroy_irq(irq);
> -        return -EINVAL;
> -    }
> +    /*
> +     * The period is a femto seconds value. We need to calculate the scaled
> +     * math multiplication factor for nanosecond to hpet tick conversion.
> +     */
> +    ch->mult = div_sc((unsigned long)hpet_rate,
> +                                     1000000000ul, 32);
> +    ch->shift = 32;
> +    ch->next_event = STIME_MAX;
> +    spin_lock_init(&ch->lock);
>  
> -    return 0;
> +    ch->msi.irq = -1;
> +    ch->msi.msi_attrib.maskbit = 1;
> +    ch->msi.msi_attrib.pos = MSI_TYPE_HPET;

I agree that this should be kept for consistency, but it reminds me
to ask whether you have anything in mind to make the MSI related
information dumpable again now that it's disconnected from the
IRQ system (and hence invisible to dump_msi()). Not even the
subsequent optional debugging patches appear to be doing that.

And if it's not easily integrateable, I wonder whether the msi field
of struct hpet_event_channel is really still needed. (If not,
replacing with the still used fields should probably be a follow-up
patch unless the one here is touching [almost] all relevant places
already anyway.)

>  void hpet_broadcast_enter(void)
>  {
>      unsigned int cpu = smp_processor_id();
> -    struct hpet_event_channel *ch = per_cpu(cpu_bc_channel, cpu);
> +    struct hpet_event_channel *ch = this_cpu(hpet_channel);
> +    s_time_t deadline = this_cpu(timer_deadline);
> +
> +    ASSERT(!local_irq_is_enabled());
> +    ASSERT(ch == NULL);
>  
> -    if ( per_cpu(timer_deadline, cpu) == 0 )
> +    if ( deadline == 0 )
>          return;
>  
> -    if ( !ch )
> -        ch = hpet_get_channel(cpu);
> +    /* If using HPET in legacy timer mode */
> +    if ( num_hpets_used == 0 )
> +    {
> +        spin_lock(&hpet_events->lock);
>  
> -    ASSERT(!local_irq_is_enabled());
> +        cpumask_set_cpu(cpu, hpet_events->cpumask);
> +        if ( deadline < hpet_events->next_event )
> +            hpet_program_time(hpet_events, deadline, NOW(), 1);
> +
> +        spin_unlock(&hpet_events->lock);
> +        return;
> +    }
> +
> +retry_free_channel:
> +    ch = hpet_get_free_channel();
> +
> +    if ( ch )
> +    {
> +        spin_lock(&ch->lock);
> +
> +        /* This really should be an MSI channel by this point */
> +        ASSERT(!(ch->flags & HPET_EVT_LEGACY));
> +
> +        hpet_msi_mask(ch);
> +
> +        ch->cpu = cpu;
> +        this_cpu(hpet_channel) = ch;
> +        cpumask_set_cpu(cpu, ch->cpumask);
> +
> +        hpet_setup_msi(ch);
> +        hpet_program_time(ch, deadline, NOW(), 1);
> +        hpet_msi_unmask(ch);
> +
> +        spin_unlock(&ch->lock);
> +    }
> +    else
> +    {
> +        s_time_t best_early_deadline = 0, best_late_deadline = STIME_MAX;
> +        unsigned int i, best_early_idx = -1, best_late_idx = -1;
> +
> +        for ( i = 0; i < num_hpets_used; ++i )
> +        {
> +            ch = &hpet_events[i];
> +            spin_lock(&ch->lock);
> +
> +            if ( ch->cpu == -1 )
> +                goto continue_search;
> +
> +            /* This channel is going to expire early */
> +            if ( ch->next_event < deadline )
> +            {
> +                if ( ch->next_event > best_early_deadline )
> +                {
> +                    best_early_idx = i;
> +                    best_early_deadline = ch->next_event;
> +                }
> +                goto continue_search;
> +            }
> +
> +            /* We can deal with being woken up 20us late */
> +            if ( ch->next_event <= deadline + MICROSECS(20) )
> +                break;

Hmm, no - the loop only has a handful of iterations, so I don't see
a need to try to bail early. If there is a better one, we should use
it.

Furthermore, if you initialized best_late_deadline to deadline +
MICROSECS(20) right away, you wouldn't need an extra
conditional here at all.

>  
> -    if ( !(ch->flags & HPET_EVT_LEGACY) )
> -        hpet_attach_channel(cpu, ch);
> +            /* Otherwise record the best late channel to program forwards */
> +            if ( ch->next_event <= best_late_deadline )
> +            {
> +                best_late_idx = i;
> +                best_late_deadline = ch->next_event;
> +            }
> +
> +        continue_search:
> +            spin_unlock(&ch->lock);
> +            ch = NULL;
> +        }
> +
> +        if ( ch )
> +        {
> +            /* Found HPET with an appropriate time.  Request to be woken up 
> */
> +            cpumask_set_cpu(cpu, ch->cpumask);
> +            this_cpu(hpet_channel) = ch;
> +            spin_unlock(&ch->lock);
> +            goto done_searching;
> +        }
> +
> +        /* Try and program the best late channel forwards a bit */
> +        if ( best_late_deadline < STIME_MAX && best_late_idx != -1 )

If you follow the above, this become moot anyway, but if not -
why the double condition?

> +        {
> +            ch = &hpet_events[best_late_idx];
> +            spin_lock(&ch->lock);
> +
> +            /* If this is still the same channel, good */
> +            if ( ch->next_event == best_late_deadline )

I think I commented on this the first time through already: There's
no reason to not use this channel even if the above condition
doesn't hold - as long as the condition we require holds. In
particular, if another CPU already moved the channel to a slightly
earlier wakeup, this might even be beneficial to us (up to saving
us from having to reprogram the channel).

> +            {
> +                cpumask_set_cpu(cpu, ch->cpumask);
> +                hpet_program_time(ch, deadline, NOW(), 1);

hpet_program_time()'s force parameter, btw., only ever gets 1
passed as argument - did you consider removing the pointless
parameter?

> +                spin_unlock(&ch->lock);
> +                goto done_searching;
> +            }
> +            /* else it has fired and changed ownership. */
> +            else
> +            {
> +                spin_unlock(&ch->lock);
> +                goto retry_free_channel;
> +            }
> +        }
> +
> +        /* Try to piggyback on an early channel in the hope that when we
> +           wake back up, our fortunes will improve. */
> +        if ( best_early_deadline > 0 && best_early_idx != -1 )

Same question as above regarding the double condition.

> +        {
> +            ch = &hpet_events[best_early_idx];
> +            spin_lock(&ch->lock);
> +
> +            /* If this is still the same channel, good */
> +            if ( ch->next_event == best_early_deadline )

Similar comment as earlier on.

> +            {
> +                cpumask_set_cpu(cpu, ch->cpumask);
> +                spin_unlock(&ch->lock);
> +                goto done_searching;
> +            }
> +            /* else it has fired and changed ownership. */
> +            else
> +            {
> +                spin_unlock(&ch->lock);
> +                goto retry_free_channel;
> +            }
> +        }
> +
> +        /* All else has failed, and we have wasted some time searching.
> +         * See whether another channel has become free. */
> +        goto retry_free_channel;
> +    }
> +
> +done_searching:
>  
>      /* Disable LAPIC timer interrupts. */
>      disable_APIC_timer();
> -    cpumask_set_cpu(cpu, ch->cpumask);
> -
> -    spin_lock(&ch->lock);
> -    /* reprogram if current cpu expire time is nearer */
> -    if ( per_cpu(timer_deadline, cpu) < ch->next_event )
> -        hpet_program_time(ch, per_cpu(timer_deadline, cpu), NOW(), 1);
> -    spin_unlock(&ch->lock);
>  }
>  
>  void hpet_broadcast_exit(void)
>  {
>      unsigned int cpu = smp_processor_id();
> -    struct hpet_event_channel *ch = per_cpu(cpu_bc_channel, cpu);
> +    struct hpet_event_channel *ch = this_cpu(hpet_channel);

In cases where you have latched smp_processor_id() already
anyway, using per_cpu() is actually cheaper than this_cpu().

> +
> +    ASSERT(local_irq_is_enabled());
> +
> +    if ( this_cpu(timer_deadline) == 0 )
> +        return;
>  
> -    if ( per_cpu(timer_deadline, cpu) == 0 )
> +    /* If using HPET in legacy timer mode */
> +    if ( num_hpets_used == 0 )
> +    {
> +        /* This is safe without the spinlock, and will reduce contention. */
> +        cpumask_clear_cpu(cpu, hpet_events->cpumask);
>          return;
> +    }
>  
>      if ( !ch )
> -        ch = hpet_get_channel(cpu);
> +        return;
>  
> -    /* Reprogram the deadline; trigger timer work now if it has passed. */
> -    enable_APIC_timer();
> -    if ( !reprogram_timer(per_cpu(timer_deadline, cpu)) )
> -        raise_softirq(TIMER_SOFTIRQ);
> +    spin_lock_irq(&ch->lock);
>  
>      cpumask_clear_cpu(cpu, ch->cpumask);
>  
> -    if ( !(ch->flags & HPET_EVT_LEGACY) )
> -        hpet_detach_channel(cpu, ch);
> +    /* If we own the channel, detach it */
> +    if ( ch->cpu == cpu )
> +    {
> +        hpet_msi_mask(ch);

There's an imbalance of mask/unmask operations here. While it
looks like this is correct, it is certainly not efficient - the other call
site of hpet_msi_mask() is then likely to find the channel already
masked, and considering the relatively long time MMIO accesses
take I would think it would be beneficial to at least avoid the
pointless write there is the channel is already masked (for
symmetry the same might then be worthwhile doing also in
hpet_msi_unmask()).

> +        hpet_wake_cpus(ch);
> +        ch->cpu = -1;
> +        set_bit(ch->idx, &free_channels);

Shouldn't you wake others _after_ having detached, so they have
a chance of becoming the owner of the now unused channel?

Also I think you need smp_wmb() between the writing of ch->cpu
and set_bit() - while x86's set_bit() currently implies a barrier(),
this isn't so by definition. Or at least you should add a comment
to explain why no barrier is currently needed.

Jan

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.