[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [PATCH 8/8] evtchn: add FIFO-based event channel hypercalls and port ops



On Fri, Aug 09, 2013 at 07:08:40PM +0100, David Vrabel wrote:
> From: David Vrabel <david.vrabel@xxxxxxxxxx>
> 
> Add the implementation for the FIFO-based event channel ABI.  The new
> hypercall sub-ops (EVTCHNOP_init_control, EVTCHNOP_expand_array,
> EVTCHNOP_set_priority) and the required evtchn_ops (set_pending,
> unmask, etc.).
> 
> This current implementation has three main limitations:
> 
> - EVTCHNOP_set_limit is not yet implemented so any guest will be able
>   to use up to 2^17 (requiring 128 global mapping pages for a fully
>   populated event array).
> 
> - The control block frames are not required to be shared with the
>   vcpu_info structure.  This requires an additional global mapping
>   page per-VCPU.  This does makes the guest implementation cleaner
>   though so perhaps we do not need to fix this?
> 
> Signed-off-by: David Vrabel <david.vrabel@xxxxxxxxxx>
> ---
>  xen/common/Makefile          |    1 +
>  xen/common/event_channel.c   |   37 ++++
>  xen/common/event_fifo.c      |  491 
> ++++++++++++++++++++++++++++++++++++++++++
>  xen/include/xen/event.h      |    6 +-
>  xen/include/xen/event_fifo.h |   56 +++++
>  xen/include/xen/sched.h      |    4 +
>  6 files changed, 593 insertions(+), 2 deletions(-)
>  create mode 100644 xen/common/event_fifo.c
>  create mode 100644 xen/include/xen/event_fifo.h
> 
> diff --git a/xen/common/Makefile b/xen/common/Makefile
> index ef03eac..afd7d40 100644
> --- a/xen/common/Makefile
> +++ b/xen/common/Makefile
> @@ -7,6 +7,7 @@ obj-y += domctl.o
>  obj-y += domain.o
>  obj-y += event_2l.o
>  obj-y += event_channel.o
> +obj-y += event_fifo.o
>  obj-y += grant_table.o
>  obj-y += irq.o
>  obj-y += kernel.o
> diff --git a/xen/common/event_channel.c b/xen/common/event_channel.c
> index 67dcdbc..a048107 100644
> --- a/xen/common/event_channel.c
> +++ b/xen/common/event_channel.c
> @@ -26,6 +26,7 @@
>  #include <xen/compat.h>
>  #include <xen/guest_access.h>
>  #include <xen/keyhandler.h>
> +#include <xen/event_fifo.h>
>  #include <asm/current.h>
>  
>  #include <public/xen.h>
> @@ -1053,6 +1054,40 @@ long do_event_channel_op(int cmd, 
> XEN_GUEST_HANDLE_PARAM(void) arg)
>          break;
>      }
>  
> +    case EVTCHNOP_init_control: {
> +        struct evtchn_init_control init_control;
> +        if ( copy_from_guest(&init_control, arg, 1) != 0 )
> +            return -EFAULT;
> +        rc = evtchn_fifo_init_control(&init_control);
> +        if ( !rc && __copy_to_guest(arg, &init_control, 1) )
> +            rc = -EFAULT;
> +        break;
> +    }
> +
> +    case EVTCHNOP_expand_array: {
> +        struct evtchn_expand_array expand_array;
> +        if ( copy_from_guest(&expand_array, arg, 1) != 0 )
> +            return -EFAULT;
> +        rc = evtchn_fifo_expand_array(&expand_array);
> +        break;
> +    }
> +
> +    case EVTCHNOP_set_priority: {
> +        struct evtchn_set_priority set_priority;
> +        if ( copy_from_guest(&set_priority, arg, 1) != 0 )
> +            return -EFAULT;
> +        rc = evtchn_fifo_set_priority(&set_priority);
> +        break;
> +    }
> +
> +    case EVTCHNOP_set_limit: {
> +        struct evtchn_set_limit set_limit;
> +        if ( copy_from_guest(&set_limit, arg, 1) != 0 )
> +            return -EFAULT;
> +        rc = evtchn_fifo_set_limit(&set_limit);
> +        break;
> +    }
> +
>      default:
>          rc = -ENOSYS;
>          break;
> @@ -1214,6 +1249,8 @@ void evtchn_destroy(struct domain *d)
>          (void)__evtchn_close(d, i);
>      }
>  
> +    evtchn_fifo_destroy(d);
> +

Don't need to put this inside event_lock critical region?

>      /* Free all event-channel buckets. */
>      spin_lock(&d->event_lock);
>      for ( i = 0; i < NR_EVTCHN_GROUPS; i++ )
> diff --git a/xen/common/event_fifo.c b/xen/common/event_fifo.c
> new file mode 100644
> index 0000000..a7b5dc9
> --- /dev/null
> +++ b/xen/common/event_fifo.c
> @@ -0,0 +1,491 @@
> +/*
> + * FIFO event channel management.
> + *
> + * Copyright (C) 2013 Citrix Systems R&D Ltd.
> + * 
> + * This source code is licensed under the GNU General Public License,
> + * Version 2 or later.  See the file COPYING for more details.
> + */
> +
> +#include <xen/config.h>
> +#include <xen/init.h>
> +#include <xen/lib.h>
> +#include <xen/errno.h>
> +#include <xen/sched.h>
> +#include <xen/event.h>
> +#include <xen/event_fifo.h>
> +#include <xen/paging.h>
> +#include <xen/mm.h>
> +
> +#include <public/event_channel.h>
> +
> +static inline event_word_t *evtchn_fifo_word_from_port(struct domain *d,
> +                                                       unsigned port)
> +{
> +    unsigned p, w;
> +
> +    if ( unlikely(port >= d->evtchn_fifo->num_evtchns) )
> +        return NULL;
> +
> +    p = port / EVTCHN_FIFO_EVENT_WORDS_PER_PAGE;
> +    w = port % EVTCHN_FIFO_EVENT_WORDS_PER_PAGE;
> +
> +    return d->evtchn_fifo->event_array[p].virt + w;
> +}
> +
> +static bool_t evtchn_fifo_set_link(event_word_t *word, uint32_t link)
> +{
> +    event_word_t n, o, w;
> +
> +    w = *word;
> +
> +    do {
> +        if ( !(w & (1 << EVTCHN_FIFO_LINKED)) )
> +            return 0;
> +        o = w;
> +        n = (w & ~EVTCHN_FIFO_LINK_MASK) | link;
> +    } while ( (w = cmpxchg(word, o, n)) != o );
> +
> +    return 1;
> +}
> +
> +static void evtchn_fifo_set_pending(struct vcpu *v, struct evtchn *evtchn)
> +{
> +    struct domain *d = v->domain;
> +    unsigned port;
> +    event_word_t *word;
> +    struct evtchn_fifo_queue *q;
> +    unsigned long flags;
> +    bool_t was_pending;
> +
> +    port = evtchn->port;
> +    word = evtchn_fifo_word_from_port(d, port);
> +    if ( unlikely(!word) )
> +        return;
> +
> +    /*
> +     * No locking around getting the queue. This may race with
> +     * changing the priority but we are allowed to signal the event
> +     * once on the old priority.
> +     */
> +    q = evtchn->queue;
> +
> +    was_pending = test_and_set_bit(EVTCHN_FIFO_PENDING, word);
> +
> +    /*
> +     * Link the event if it unmasked and not already linked.
> +     */
> +    if ( !test_bit(EVTCHN_FIFO_MASKED, word)
> +         && !test_and_set_bit(EVTCHN_FIFO_LINKED, word) )
> +    {
> +        event_word_t *tail_word;
> +        bool_t linked = 0;
> +
> +        spin_lock_irqsave(&q->lock, flags);
> +
> +        /*
> +         * Atomically link the tail to port iff the tail is linked.
> +         * If the tail is unlinked the queue is empty.
> +         *
> +         * If port is the same as tail, the queue is empty but q->tail
> +         * will appear linked as we just set LINKED above.
> +         *
> +         * If the queue is empty (i.e., we haven't linked to the new
> +         * event), head must be updated.
> +         */
> +        if ( port != q->tail )
> +        {
> +            tail_word = evtchn_fifo_word_from_port(d, q->tail);
> +            linked = evtchn_fifo_set_link(tail_word, port);
> +        }
> +        if ( !linked )
> +            write_atomic(q->head, port);
> +        q->tail = port;
> +
> +        spin_unlock_irqrestore(&q->lock, flags);
> +
> +        if ( !test_and_set_bit(q->priority, 
> &v->evtchn_fifo->control_block->ready) )

Line too long.

> +            vcpu_mark_events_pending(v);
> +    }
> +
> +    if ( !was_pending )
> +        evtchn_check_pollers(d, port);
> +}
> +
> +static void evtchn_fifo_clear_pending(struct domain *d, struct evtchn 
> *evtchn)
> +{
> +    event_word_t *word;
> +
> +    word = evtchn_fifo_word_from_port(d, evtchn->port);
> +    if ( unlikely(!word) )
> +        return;
> +
> +    /*
> +     * Just clear the P bit.
> +     *
> +     * No need to unlink as the guest will unlink and ignore
> +     * non-pending events.
> +     */
> +    clear_bit(EVTCHN_FIFO_PENDING, word);
> +}
> +
> +static void evtchn_fifo_unmask(struct domain *d, struct evtchn *evtchn)
> +{
> +    struct vcpu *v = d->vcpu[evtchn->notify_vcpu_id];
> +    event_word_t *word;
> +
> +    word = evtchn_fifo_word_from_port(d, evtchn->port);
> +    if ( unlikely(!word) )
> +        return;
> +
> +    clear_bit(EVTCHN_FIFO_MASKED, word);
> +
> +    /* Relink if pending. */
> +    if ( test_bit(EVTCHN_FIFO_PENDING, word ) )

Extra space after "word".

> +        evtchn_fifo_set_pending(v, evtchn);
> +}
> +
> +static bool_t evtchn_fifo_is_pending(struct domain *d,
> +                                     const struct evtchn *evtchn)
> +{
> +    event_word_t *word;
> +
> +    word = evtchn_fifo_word_from_port(d, evtchn->port);
> +    if ( unlikely(!word) )
> +        return 0;
> +
> +    return test_bit(EVTCHN_FIFO_PENDING, word);
> +}
> +
> +static bool_t evtchn_fifo_is_masked(struct domain *d,
> +                                    const struct evtchn *evtchn)
> +{
> +    event_word_t *word;
> +
> +    word = evtchn_fifo_word_from_port(d, evtchn->port);
> +    if ( unlikely(!word) )
> +        return 1;
> +
> +    return test_bit(EVTCHN_FIFO_MASKED, word);
> +}
> +
> +static void evtchn_fifo_bind_to_vcpu(struct domain *d, struct evtchn *evtchn,
> +                                     struct vcpu *v)
> +{
> +    unsigned priority;
> +
> +    /* Keep the same priority if possible, otherwise use the
> +       default. */

Comment style.

> +    if ( evtchn->queue )
> +        priority = evtchn->queue->priority;
> +    else
> +        priority = EVTCHN_FIFO_PRIORITY_DEFAULT;
> +
> +    evtchn->queue = &v->evtchn_fifo->queue[priority];
> +}
> +
> +static const struct evtchn_port_ops evtchn_port_ops_fifo =
> +{
> +    .set_pending   = evtchn_fifo_set_pending,
> +    .clear_pending = evtchn_fifo_clear_pending,
> +    .unmask        = evtchn_fifo_unmask,
> +    .is_pending    = evtchn_fifo_is_pending,
> +    .is_masked     = evtchn_fifo_is_masked,
> +    .bind_to_vcpu  = evtchn_fifo_bind_to_vcpu,
> +};
> +
> +static int map_guest_page(struct domain *d, uint64_t gfn,
> +                          struct page_info **page, void **virt)
> +{
> +    struct page_info *p;
> +
> +    p = get_page_from_gfn(d, gfn, NULL, P2M_ALLOC);
> +    if ( !p )
> +        return -EINVAL;
> +
> +    if ( !get_page_type(p, PGT_writable_page) )
> +    {
> +        put_page(p);
> +        return -EINVAL;
> +    }
> +
> +    *virt = map_domain_page_global(gfn);
> +    if ( *virt == NULL )
> +    {
> +        put_page_and_type(p);
> +        return -ENOMEM;
> +    }
> +    *page = p;
> +    return 0;
> +}
> +
> +static void unmap_guest_page(struct page_info *page, void *virt)
> +{
> +    if ( page == NULL )
> +        return;
> +
> +    unmap_domain_page_global(virt);
> +    put_page_and_type(page);
> +}
> +
> +static void cleanup_control_block(struct vcpu *v)
> +{
> +    if ( v->evtchn_fifo )
> +    {
> +        unmap_guest_page(v->evtchn_fifo->cb_page, 
> v->evtchn_fifo->control_block);
> +        xfree(v->evtchn_fifo);
> +        v->evtchn_fifo = NULL;
> +    }
> +}
> +
> +static void init_queue(struct vcpu *v, struct evtchn_fifo_queue *q, unsigned 
> i)
> +{
> +    spin_lock_init(&q->lock);
> +    q->priority = i;
> +    q->head = &v->evtchn_fifo->control_block->head[i];
> +}
> +
> +static int setup_control_block(struct vcpu *v, uint64_t gfn, uint32_t offset)
> +{
> +    struct domain *d = v->domain;
> +    struct evtchn_fifo_vcpu *efv;
> +    struct page_info *page;
> +    void *virt;
> +    unsigned i;
> +    int rc;
> +
> +    if ( v->evtchn_fifo )
> +        return -EINVAL;
> +
> +    efv = xzalloc(struct evtchn_fifo_vcpu);
> +    if ( efv == NULL )
> +        return -ENOMEM;
> +
> +    rc = map_guest_page(d, gfn, &page, &virt);
> +    if ( rc < 0 )
> +    {
> +        xfree(efv);
> +        return rc;
> +    }
> +
> +    v->evtchn_fifo = efv;
> +
> +    v->evtchn_fifo->cb_page       = page;
> +    v->evtchn_fifo->control_block = virt + offset;
> +
> +    for ( i = 0; i <= EVTCHN_FIFO_PRIORITY_MIN; i++ )
> +        init_queue(v, &v->evtchn_fifo->queue[i], i);
> + 
> +    return 0;
> +}
> +
> +/*
> + * Setup an event array with no pages.
> + */
> +static int setup_event_array(struct domain *d)
> +{
> +    if ( d->evtchn_fifo )
> +        return 0;
> +
> +    d->evtchn_fifo = xzalloc(struct evtchn_fifo_domain);

struct evtchn_fifo_domain is very big because it contains event_array
which has 1024 elements, however most domains will not use all 2^17
ports. Could you add in dynamically allocation for event_array?

> +    if ( d->evtchn_fifo == NULL )
> +        return -ENOMEM;
> +
> +    d->evtchn_fifo->num_evtchns = 0;
> +
> +    return 0;
> +}
> +
> +/*
> + * Some ports may already be bound, bind them to the correct VCPU so
> + * they have a valid queue.
> + *
> + * Note: any events that are currently pending will not be resent and
> + * will be lost.
> + */

Can this be fixed? I presume you can check the state of the port and set
it to pending in the new ABI when necessary? As you've got hold of the
event lock at this stage it should be easy to synchronize the state?

> +static void rebind_all_ports(struct domain *d)
> +{
> +    unsigned port;
> +
> +    for ( port = 1; port < d->max_evtchns; port++ )
> +    {
> +        struct evtchn *evtchn;
> +
> +        if ( !port_is_valid(d, port) )
> +            break;
> +
> +        evtchn = evtchn_from_port(d, port);
> +        switch ( evtchn->state )
> +        {
> +        case ECS_INTERDOMAIN:
> +        case ECS_PIRQ:
> +        case ECS_VIRQ:
> +            evtchn_port_bind_to_vcpu(d, evtchn, 
> d->vcpu[evtchn->notify_vcpu_id]);
> +            break;
> +        default:
> +            break;
> +        }
> +    }
> +}
> +
> +static void cleanup_event_array(struct domain *d)
> +{
> +    unsigned i;
> +
> +    if ( d->evtchn_fifo == NULL )
> +        return;
> +
> +    for ( i = 0; i < EVTCHN_FIFO_MAX_EVENT_ARRAY_PAGES; i++ )
> +    {
> +        unmap_guest_page(d->evtchn_fifo->event_array[i].page,
> +                         d->evtchn_fifo->event_array[i].virt);

Suggest reset page and virt to NULL / 0, just like you did for 
cleanup_control_block.

> +    }
> +    xfree(d->evtchn_fifo);

Same for evtchn_fifo.

> +}
> +
> +int evtchn_fifo_init_control(struct evtchn_init_control *init_control)
> +{
> +    struct domain *d = current->domain;
> +    uint32_t vcpu_id;
> +    uint64_t gfn;
> +    uint32_t offset;
> +    struct vcpu *v;
> +    int rc;
> +
> +    init_control->link_bits = EVTCHN_FIFO_LINK_BITS;
> +
> +    vcpu_id = init_control->vcpu;
> +    gfn     = init_control->control_mfn;
> +    offset  = init_control->offset;
> +
> +    if ( (vcpu_id >= d->max_vcpus) || (d->vcpu[vcpu_id] == NULL) )
> +        return -ENOENT;
> +    v = d->vcpu[vcpu_id];
> +
> +    /* Must not cross page boundary. */
> +    if ( offset > (PAGE_SIZE - sizeof(evtchn_fifo_control_block_t)) )
> +        return -EINVAL;
> +
> +    /* Must be 8-bytes aligned. */
> +    if ( offset & (8 - 1) )
> +        return -EINVAL;
> +
> +    spin_lock(&d->event_lock);
> +
> +    rc = setup_control_block(v, gfn, offset);
> +
> +    /* If this is the first control block, setup an empty event array
> +       and switch to the fifo port ops. */

Comment style.

> +    if ( d->evtchn_fifo == NULL )
> +    {
> +        rc = setup_event_array(d);
> +        if ( rc < 0 )
> +            cleanup_control_block(v);
> +        else
> +        {
> +            d->evtchn_port_ops = &evtchn_port_ops_fifo;
> +            d->max_evtchns = 1 << EVTCHN_FIFO_LINK_BITS;
> +            rebind_all_ports(d);
> +        }
> +    }
> +
> +    spin_unlock(&d->event_lock);
> +
> +    return rc;
> +}
> +
> +static int add_page_to_event_array(struct domain *d, unsigned long gfn)
> +{
> +    struct page_info *page = NULL;
> +    void *virt;
> +    unsigned slot;
> +    int rc;
> +
> +    slot = d->evtchn_fifo->num_evtchns / EVTCHN_FIFO_EVENT_WORDS_PER_PAGE;
> +    if ( slot >= EVTCHN_FIFO_MAX_EVENT_ARRAY_PAGES )
> +        return -ENOSPC;
> +
> +    rc = map_guest_page(d, gfn, &page, &virt);
> +    if ( rc < 0 )
> +        return rc;
> +
> +    d->evtchn_fifo->event_array[slot].page = page;
> +    d->evtchn_fifo->event_array[slot].virt = virt;
> +
> +    d->evtchn_fifo->num_evtchns += EVTCHN_FIFO_EVENT_WORDS_PER_PAGE;
> +
> +    return 0;
> +}
> +
> +int evtchn_fifo_expand_array(struct evtchn_expand_array *expand_array)
> +{
> +    struct domain *d = current->domain;
> +    int rc;
> +
> +    spin_lock(&d->event_lock);
> +    rc = add_page_to_event_array(d, expand_array->array_mfn);
> +    spin_unlock(&d->event_lock);
> +
> +    return rc;
> +}
> +
> +int evtchn_fifo_set_priority(struct evtchn_set_priority *set_priority)
> +{
> +    struct domain *d = current->domain;
> +    struct vcpu *v;
> +    uint32_t priority;
> +    unsigned port;
> +    struct evtchn *evtchn;
> +
> +    priority = set_priority->priority;
> +    port     = set_priority->port;
> +    
> +    if ( priority > EVTCHN_FIFO_PRIORITY_MIN )
> +        return -EINVAL;
> +
> +    spin_lock(&d->event_lock);
> +
> +    if ( !port_is_valid(d, port) )
> +    {
> +        spin_unlock(&d->event_lock);
> +        return -EINVAL;
> +    }
> +
> +    /*
> +     * Switch to the new queue for future events. If the event is
> +     * already pending or in the process of being linked it will be on
> +     * the old queue -- this is fine.
> +     */
> +    evtchn = evtchn_from_port(d, port);
> +    v = d->vcpu[evtchn->notify_vcpu_id];
> +    evtchn->queue = &v->evtchn_fifo->queue[priority];
> +
> +    spin_unlock(&d->event_lock);
> +
> +    return 0;
> +}
> +
> +int evtchn_fifo_set_limit(struct evtchn_set_limit *set_limit)
> +{
> +    /* FIXME: not supported yet. */
> +    return -ENOSYS;
> +}
> +
> +void evtchn_fifo_destroy(struct domain *d)
> +{
> +    struct vcpu *v;
> +
> +    for_each_vcpu( d, v )
> +        cleanup_control_block(v);
> +    cleanup_event_array(d);
> +}
> +
> +/*
> + * Local variables:
> + * mode: C
> + * c-file-style: "BSD"
> + * c-basic-offset: 4
> + * tab-width: 4
> + * indent-tabs-mode: nil
> + * End:
> + */
> diff --git a/xen/include/xen/event.h b/xen/include/xen/event.h
> index a795ae6..3a71956 100644
> --- a/xen/include/xen/event.h
> +++ b/xen/include/xen/event.h
> @@ -127,12 +127,14 @@ struct evtchn_port_ops {
>                           struct vcpu *vcpu);
>  };
>  
> -static inline void evtchn_port_set_pending(struct vcpu *v, struct evtchn 
> *evtchn)
> +static inline void evtchn_port_set_pending(struct vcpu *v,
> +                                           struct evtchn *evtchn)

This kind of adjustments should not happen as the styling problems are
introduced in patch 2. Could you fix them there?

>  {
>      v->domain->evtchn_port_ops->set_pending(v, evtchn);
>  }
>  
> -static inline void evtchn_port_clear_pending(struct domain *d, struct evtchn 
> *evtchn)
> +static inline void evtchn_port_clear_pending(struct domain *d,
> +                                             struct evtchn *evtchn)

Ditto.

>  {
>      d->evtchn_port_ops->clear_pending(d, evtchn);
>  }
> diff --git a/xen/include/xen/event_fifo.h b/xen/include/xen/event_fifo.h
> new file mode 100644
> index 0000000..2d7b7d0
> --- /dev/null
> +++ b/xen/include/xen/event_fifo.h
> @@ -0,0 +1,56 @@
> +/*
> + * FIFO-based event channel ABI.
> + *
> + * Copyright (C) 2013 Citrix Systems R&D Ltd.
> + *
> + * This source code is licensed under the GNU General Public License,
> + * Version 2 or later.  See the file COPYING for more details.
> + */
> +#ifndef __XEN_EVENT_FIFO_H__
> +#define __XEN_EVENT_FIFO_H__
> +
> +struct evtchn_fifo_queue {
> +    uint32_t *head; /* points into control block */
> +    uint32_t tail;
> +    spinlock_t lock;
> +    uint8_t priority;
> +};
> +
> +struct evtchn_fifo_vcpu {
> +    struct page_info *cb_page;
> +    struct evtchn_fifo_control_block *control_block;
> +    struct evtchn_fifo_queue queue[EVTCHN_FIFO_MAX_QUEUES];
> +};
> +
> +#define EVTCHN_FIFO_EVENT_WORDS_PER_PAGE (PAGE_SIZE / sizeof(event_word_t))
> +#define EVTCHN_FIFO_MAX_EVENT_ARRAY_PAGES \
> +    ((1 << EVTCHN_FIFO_LINK_BITS) / EVTCHN_FIFO_EVENT_WORDS_PER_PAGE)

Should probably define a macro for (1 << EVTCHN_FIFO_LINK_BITS)? I see
you repeat this snip for twice in other places.


Wei.

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.