[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [RFC PATCH 3/3] Implement 3-level event channel routines.



On Mon, 2012-12-31 at 18:22 +0000, Wei Liu wrote:
> diff --git a/xen/common/event_channel.c b/xen/common/event_channel.c
> index 9898f8e..fb3a7b4 100644
> --- a/xen/common/event_channel.c
> +++ b/xen/common/event_channel.c
> @@ -27,6 +27,7 @@
>  #include <xen/guest_access.h>
>  #include <xen/keyhandler.h>
>  #include <asm/current.h>
> +#include <xen/paging.h>
> 
>  #include <public/xen.h>
>  #include <public/event_channel.h>
> @@ -413,6 +414,85 @@ static long evtchn_bind_pirq(evtchn_bind_pirq_t *bind)
>      return rc;
>  }
> 
> +static inline int __evtchn_is_masked_l2(struct domain *d, int port)
> +{
> +    return test_bit(port, &shared_info(d, evtchn_mask));
> +}
> +
> +static inline int __evtchn_is_masked_l3(struct domain *d, int port)
> +{
> +    return test_bit(port % EVTCHNS_PER_PAGE,
> +                    d->evtchn_mask[port / EVTCHNS_PER_PAGE]);
> +}
> +
> +int evtchn_is_masked(struct domain *d, int port)
> +{
> +    switch ( d->evtchn_level )
> +    {
> +    case 2:
> +        return __evtchn_is_masked_l2(d, port);
> +    case 3:
> +        return __evtchn_is_masked_l3(d, port);
> +    default:
> +        printk(" %s: unknown event channel level %d for domain %d \n",
> +               __FUNCTION__, d->evtchn_level, d->domain_id);
> +        return 1;
> +    }
> +}

How much of this soft of thing goes away if we arrange for
d->evtchn_mask to point to &shared_info(d, evtchn_mask) when
evtchn_level == 2? (Likewise for pending etc)

> @@ -633,7 +712,50 @@ static void evtchn_set_pending(struct vcpu *v, int port)
>      {
>          vcpu_mark_events_pending(v);
>      }
> -
> +}
> +
> +static void __evtchn_set_pending_l3(struct vcpu *v, int port)
> +{
> +    struct domain *d = v->domain;
> +
> +    int page_no = port / EVTCHNS_PER_PAGE;
> +    int offset = port % EVTCHNS_PER_PAGE;
> +    int l1cb = BITS_PER_EVTCHN_WORD(d)*BITS_PER_EVTCHN_WORD(d);
> +    int l2cb = BITS_PER_EVTCHN_WORD(d);

What does "cb" in these variable stand for?

> +
> +    if ( test_and_set_bit(offset, d->evtchn_pending[page_no]) )
> +        return;
> +
> +    if ( !test_bit(offset, d->evtchn_mask[page_no]) &&
> +         !test_and_set_bit(port / l2cb,
> +                           v->evtchn_pending_sel_l2) &&
> +         !test_and_set_bit(port / l1cb,
> +                           &vcpu_info(v, evtchn_pending_sel)) )
> +    {
> +        vcpu_mark_events_pending(v);
> +    }
> +}

It doesn't seem like it would be too hard to merge this with the l2
version. Perhaps using a scheme similar to Linux's page table where a
per-level macro collapses into a nop (with the appropriate return).

If you do end up duplicating the function then you should duplicate the
command about the importance of the ordering and the barriers etc too.

You should write down the datastructure somewhere. In particular I'm not
sure if l1 is at the top or the bottom. I think it's the top. I think it
would also be useful to be explicit about what the l3 things
corresponding to the fields in the shared info and vcpu info are and
which is the new level.

> +static int __map_l2_sel(struct vcpu *v, unsigned long gfn, unsigned long off)
> +{
> +    void *mapping;
> +    int rc = -EINVAL;
> +    struct page_info *page;
> +    struct domain *d = v->domain;
> +
> +    if ( off >= PAGE_SIZE )
> +        return rc;
> +
> +    page = get_page_from_gfn(d, gfn, NULL, P2M_ALLOC);
> +    if ( !page )
> +        goto out;
> +
> +    if ( !get_page_type(page, PGT_writable_page) )
> +    {
> +        put_page(page);
> +        goto out;
> +    }
> +
> +    mapping = __map_domain_page_global(page);

I don't think you can validly use map_domain_page for a long lived
mapping. domain_page.h says "Allow temporary mapping of domain page
frames into Xen space.".

Are these array per-vcpu or shared? Because if they are per-vcpu you
don't need the global variant.

> +static int __map_l3_arrays(struct domain *d, unsigned long *pending,
> +                           unsigned long *mask)
> +{
> +    int i;
> +    void *pending_mapping, *mask_mapping;
> +    struct page_info *pending_page, *mask_page;
> +    unsigned long pending_gfn, mask_gfn;
> +    int rc = -EINVAL;
> +
> +    for ( i = 0;
> +          i < MAX_L3_PAGES && pending[i] != 0 && mask[i] != 0;
> +          i++ )
> +    {
> +        pending_gfn = pending[i];
> +        mask_gfn = mask[i];
> +
> +        pending_page = get_page_from_gfn(d, pending_gfn, NULL, P2M_ALLOC);
> +        if ( !pending_page )

I think you need to initialise rc here and for each subsequent goto err.
The initial EINVAL is overwritten to ENOMEM below on the first iteration
and isn't reset back to EINVAL at the top of the loop.

> +            goto err;
> +
> +        mask_page = get_page_from_gfn(d, mask_gfn, NULL, P2M_ALLOC);
> +        if ( !mask_page )
> +        {
> +            put_page(pending_page);
> +            goto err;
> +        }
> +
> +        if ( !get_page_type(pending_page, PGT_writable_page) )
> +        {
> +            put_page(pending_page);
> +            put_page(mask_page);
> +            goto err;
> +        }
> +
> +        if ( !get_page_type(mask_page, PGT_writable_page) )
> +        {
> +            put_page_and_type(pending_page);
> +            put_page(mask_page);
> +            goto err;
> +        }
> +
> +        pending_mapping = __map_domain_page_global(pending_page);
> +        if ( !pending_mapping )
> +        {
> +            put_page_and_type(pending_page);
> +            put_page_and_type(mask_page);
> +            rc = -ENOMEM;
> +            goto err;
> +        }
> +
> +        mask_mapping = __map_domain_page_global(mask_page);
> +        if ( !mask_mapping )
> +        {
> +            unmap_domain_page_global(pending_mapping);
> +            put_page_and_type(pending_page);
> +            put_page_and_type(mask_page);
> +            rc = -ENOMEM;
> +            goto err;
> +        }
> +
> +        d->evtchn_pending[i] = pending_mapping;
> +        d->evtchn_mask[i] = mask_mapping;
> +    }
> +
> +    rc = 0;
> +
> +err:
> +    return rc;
> +}

> diff --git a/xen/include/public/event_channel.h 
> b/xen/include/public/event_channel.h
> index 07ff321..29cd6e9 100644
> --- a/xen/include/public/event_channel.h
> +++ b/xen/include/public/event_channel.h
> @@ -71,6 +71,7 @@
>  #define EVTCHNOP_bind_vcpu        8
>  #define EVTCHNOP_unmask           9
>  #define EVTCHNOP_reset           10
> +#define EVTCHNOP_register_nlevel 11
>  /* ` } */
> 
>  typedef uint32_t evtchn_port_t;
> @@ -258,6 +259,29 @@ struct evtchn_reset {
>  typedef struct evtchn_reset evtchn_reset_t;
> 
>  /*
> + * EVTCHNOP_register_nlevel: Register N level event channels.
> + * NOTES:
> + *   1. currently only 3-level is supported.
> + *   2. should fall back to basic 2-level if this call fails.
> + */
> +#define MAX_L3_PAGES 8
> +struct evtchn_register_3level {
> +    unsigned long evtchn_pending[MAX_L3_PAGES];
> +    unsigned long evtchn_mask[MAX_L3_PAGES];
> +    unsigned long *l2sel_mfn;
> +    unsigned long *l2sel_offset;

You can't use bare pointers in a hypercall argument like this, you have
to use the GUEST_HANDLE stuff.

Please also document which are IN and OUT parameters (look at other
struct definitions for examples).

The evtchn_{pending,mask} can't be the actual pages, are they mfns?

> +    uint32_t nr_vcpus;
> +};
> +
> +struct evtchn_register_nlevel {
> +    uint32_t level;
> +    union {
> +        struct evtchn_register_3level l3;

Might be more extensible to N>3 if evtchn_{pending,mask} were also guest
handles rather than fixed size arrays?

> +    } u;
> +};
> +typedef struct evtchn_register_nlevel evtchn_register_nlevel_t;
> +
> +/*
>   * ` enum neg_errnoval
>   * ` HYPERVISOR_event_channel_op_compat(struct evtchn_op *op)
>   * `
> diff --git a/xen/include/public/xen.h b/xen/include/public/xen.h
> index 5593066..1d4ef2d 100644
> --- a/xen/include/public/xen.h
> +++ b/xen/include/public/xen.h
> @@ -554,9 +554,24 @@ DEFINE_XEN_GUEST_HANDLE(multicall_entry_t);
> 
>  /*
>   * Event channel endpoints per domain:
> + * 2-level:
>   *  1024 if a long is 32 bits; 4096 if a long is 64 bits.
> + * 3-level:
> + *  32k if a long is 32 bits; 256k if a long is 64 bits;
>   */
> -#define NR_EVENT_CHANNELS (sizeof(unsigned long) * sizeof(unsigned long) * 
> 64)
> +#define NR_EVENT_CHANNELS_L2 (sizeof(unsigned long) * sizeof(unsigned long) 
> * 64)
> +#define NR_EVENT_CHANNELS_L3 (NR_EVENT_CHANNELS_L2 * 64)
> +#define NR_EVENT_CHANNELS(x) ({ unsigned int __v = 0;   \
> +            switch (x) {                                \
> +            case 2:                                     \
> +                __v = NR_EVENT_CHANNELS_L2; break;      \
> +            case 3:                                     \
> +                __v = NR_EVENT_CHANNELS_L3; break;      \
> +            default:                                    \
> +                BUG();                                  \
> +            }                                           \
> +            __v;})

xen/include/public is supposed to be ANSI-C and I think the #define ({...})
syntax is a gcc-ism.

I think the ..._L{N} defines are sufficient.

> diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h
> index 5f23213..ae78549 100644
> --- a/xen/include/xen/sched.h
> +++ b/xen/include/xen/sched.h
> @@ -64,7 +64,8 @@ extern struct domain *dom0;
>              __v;})
> 
>  #define EVTCHNS_PER_BUCKET 512
> -#define NR_EVTCHN_BUCKETS  (NR_EVENT_CHANNELS / EVTCHNS_PER_BUCKET)
> +#define NR_EVTCHN_BUCKETS  (NR_EVENT_CHANNELS_L3 / EVTCHNS_PER_BUCKET)
> +#define EVTCHNS_PER_PAGE   (PAGE_SIZE * 8)
> 
>  struct evtchn
>  {
> @@ -104,7 +105,7 @@ void evtchn_destroy_final(struct domain *d); /* from 
> complete_domain_destroy */
> 
>  struct waitqueue_vcpu;
> 
> -struct vcpu
> +struct vcpu
>  {
>      int              vcpu_id;
> 
> @@ -112,6 +113,9 @@ struct vcpu
> 
>      vcpu_info_t     *vcpu_info;
> 
> +    /* For 3-level event channel */
> +    unsigned long   *evtchn_pending_sel_l2;
> +
>      struct domain   *domain;
> 
>      struct vcpu     *next_in_list;
> @@ -275,6 +279,10 @@ struct domain
>      struct evtchn  **evtchn;
>      spinlock_t       event_lock;
>      unsigned int     evtchn_level;
> +#define L3_PAGES (NR_EVENT_CHANNELS_L3/BITS_PER_LONG*sizeof(unsigned 
> long)/PAGE_SIZE)
> +    unsigned long   *evtchn_pending[L3_PAGES];
> +    unsigned long   *evtchn_mask[L3_PAGES];
> +#undef  L3_PAGES
> 
>      struct grant_table *grant_table;
> 
> --
> 1.7.10.4
> 
> 
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@xxxxxxxxxxxxx
> http://lists.xen.org/xen-devel



_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.