[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [RFC PATCH] xen/common: cache colored buddy allocator for domains



On Sat, Oct 22, 2022 at 6:08 PM Carlo Nonato
<carlo.nonato@xxxxxxxxxxxxxxx> wrote:
>
> This commit replaces the colored allocator for domains with a simple buddy
> allocator indexed also by colors, so that it can allocate pages based on
> some coloring configuration.
>
> It applies on top of Arm cache coloring (v3) as sent to the mailing list.
>
> This has two benefits:
>  - order can now be greater than 0 if the color config contains a
>    sufficient number of adjacent colors starting from an order aligned
>    one;
>  - same benefits of the normal buddy: constant time alloc and free
>    (constant with respect to the number of pages, not for the number of
>    colors);
>
> But also one "big" cons:
>  - given the way Xen queries the allocator, it can only serve larger pages
>    first and only when a domain runs out of those, it can go with the smaller
>    ones. Let's say that domain 0 has 31 colors out of 32 total (0-30 out of
>    0-31). The order-4 pages (0-15) are allocated first and then the order-3
>    (16-23, since 0-7 and 8-15 are all already allocated), and then order-2
>    and so on. The result is... the domain practically uses only one half of
>    the colors that it should.

In case it isn't clear, less cache colors means less cache given to the
domain which results in lower performances.

> Signed-off-by: Carlo Nonato <carlo.nonato@xxxxxxxxxxxxxxx>
> ---
>  xen/common/page_alloc.c | 202 ++++++++++++++++++++++++++++------------
>  1 file changed, 141 insertions(+), 61 deletions(-)
>
> diff --git a/xen/common/page_alloc.c b/xen/common/page_alloc.c
> index fe214cd6ac..f20de1c891 100644
> --- a/xen/common/page_alloc.c
> +++ b/xen/common/page_alloc.c
> @@ -462,67 +462,133 @@ static void init_free_page_fields(struct page_info *pg)
>
>  #ifdef CONFIG_CACHE_COLORING
>  /*************************
> - * COLORED SIDE-ALLOCATOR
> + * COLORED BUDDY ALLOCATOR
>   *
> - * Pages are stored by their color in separate lists. Each list defines a 
> color
> - * and it is initialized during end_boot_allocator, where each page's color
> - * is calculated and the page itself is put in the correct list.
> - * After initialization there will be N lists where N is the number of
> - * available colors on the platform.
> - * The {free|alloc}_color_heap_page overwrite pg->count_info, but they do it 
> in
> - * the same way as the buddy allocator corresponding functions do:
> - * protecting the access with a critical section using heap_lock.
> + * A simplified version of the buddy allocator with the addition of colors to
> + * serve pages based on coloring configuration.
> + * Pages are divided in lists by color and order, so the total space needed
> + * for the heap is greater than the normal one. The matrix of colors x orders
> + * page lists is linearized since not every color can use any order.
>   */
>  typedef struct page_list_head colored_pages_t;
>  static colored_pages_t *__ro_after_init _color_heap;
>  static unsigned long *__ro_after_init free_colored_pages;
> +static unsigned int max_color_order;
>
> -#define color_heap(color) (&_color_heap[color])
> +#define order_mask(order) ((1UL << (order)) - 1)
> +#define color_index_of_order(color, order) ((color) & order_mask(order))
> +#define color_align_to_order(color, order) ((color) & ~order_mask(order))
> +#define color_heap_pos(color, order) \
> +    ((((1 << max_color_order) + (color)) >> (order)) - 1)
> +#define color_heap(color, order) (&_color_heap[color_heap_pos(color, order)])
>
> -static void free_color_heap_page(struct page_info *pg)
> +static void free_color_heap_pages(struct page_info *pg, unsigned int order)
>  {
> -    struct page_info *pos;
> -    unsigned int color = page_to_color(pg);
> -    colored_pages_t *head = color_heap(color);
> +    unsigned int i, color = page_to_color(pg);
> +
> +    ASSERT(color_index_of_order(color, order) == 0);
>
>      spin_lock(&heap_lock);
>
> -    pg->count_info = PGC_state_free | PGC_colored;
> -    page_set_owner(pg, NULL);
> -    free_colored_pages[color]++;
> +    for ( i = 0; i < (1 << order); i++ )
> +    {
> +        pg[i].count_info = PGC_state_free | PGC_colored;
> +        page_set_owner(&pg[i], NULL);
> +        free_colored_pages[page_to_color(&pg[i])]++;
> +    }
>
> -    page_list_for_each( pos, head )
> +    for ( ; order < max_color_order; order++ )
>      {
> -        if ( page_to_maddr(pos) < page_to_maddr(pg) )
> +        unsigned long mask = 1UL << order;
> +        struct page_info *merge = pg + ((color & mask) ? -mask : mask);
> +
> +        if ( !page_state_is(merge, free) || (PFN_ORDER(merge) != order) )
>              break;
> +
> +        if ( color & mask )
> +        {
> +            pg = merge;
> +            color -= mask;
> +        }
> +
> +        page_list_del(merge, color_heap(color, order));
>      }
>
> -    page_list_add_next(pg, pos, head);
> +    PFN_ORDER(pg) = order;
> +    page_list_add(pg, color_heap(color, order));
>
>      spin_unlock(&heap_lock);
>  }
>
> -static struct page_info *alloc_color_heap_page(unsigned int memflags,
> -                                               const unsigned int *colors,
> -                                               unsigned int num_colors)
> +static struct page_info *find_free_color_heap_page(unsigned int color,
> +                                                   unsigned int order)
> +{
> +    struct page_info *pg = NULL;
> +    unsigned int buddy_order, buddy_color;
> +
> +    ASSERT(color_index_of_order(color, order) == 0);
> +
> +    for ( buddy_order = order; buddy_order <= max_color_order; buddy_order++ 
> )
> +    {
> +        pg = page_list_remove_head(color_heap(
> +            color_align_to_order(color, buddy_order), buddy_order));
> +        if ( pg )
> +            break;
> +    }
> +
> +    if ( !pg )
> +        return NULL;
> +
> +    buddy_color = page_to_color(pg);
> +
> +    while ( buddy_order != order )
> +    {
> +        unsigned long mask = (1U << --buddy_order);
> +        struct page_info *half = pg + mask;
> +
> +        if ( color & mask )
> +        {
> +            page_list_add(pg, color_heap(buddy_color, buddy_order));
> +            pg = half;
> +            buddy_color += mask;
> +        }
> +        else
> +            page_list_add(half, color_heap(buddy_color + mask, buddy_order));
> +    }
> +
> +    return pg;
> +}
> +
> +static struct page_info *alloc_color_heap_pages(unsigned int order,
> +                                                unsigned int memflags,
> +                                                unsigned int *colors,
> +                                                unsigned int num_colors)
>  {
>      struct page_info *pg = NULL;
> -    unsigned int i, color;
> +    unsigned int i, color = 0;
> +    unsigned long mask = order_mask(order);
>      bool need_tlbflush = false;
>      uint32_t tlbflush_timestamp = 0;
> +    mfn_t mfn;
>
>      spin_lock(&heap_lock);
>
> +    /* Search for 2^order contiguous colors */
>      for ( i = 0; i < num_colors; i++ )
>      {
> -        struct page_info *tmp;
> +        unsigned int index = colors[i] & mask;
> +
> +        if ( index == 0 )
> +            color = colors[i];
>
> -        if ( page_list_empty(color_heap(colors[i])) )
> +        if ( colors[i] != color )
>              continue;
>
> -        tmp = page_list_first(color_heap(colors[i]));
> -        if ( !pg || page_to_maddr(tmp) > page_to_maddr(pg) )
> -            pg = tmp;
> +        if ( index == mask &&
> +             (pg = find_free_color_heap_page(colors[i] & ~mask, order)) )
> +            break;
> +
> +        color++;
>      }
>
>      if ( !pg )
> @@ -531,59 +597,77 @@ static struct page_info *alloc_color_heap_page(unsigned 
> int memflags,
>          return NULL;
>      }
>
> -    pg->count_info = PGC_state_inuse | PGC_colored;
> -
> -    if ( !(memflags & MEMF_no_tlbflush) )
> -        accumulate_tlbflush(&need_tlbflush, pg, &tlbflush_timestamp);
> +    for ( i = 0; i < (1 << order); i++ )
> +    {
> +        pg[i].count_info = PGC_state_inuse | PGC_colored;
>
> -    init_free_page_fields(pg);
> -    flush_page_to_ram(mfn_x(page_to_mfn(pg)),
> -                      !(memflags & MEMF_no_icache_flush));
> +        if ( !(memflags & MEMF_no_tlbflush) )
> +            accumulate_tlbflush(&need_tlbflush, &pg[i], &tlbflush_timestamp);
>
> -    color = page_to_color(pg);
> -    free_colored_pages[color]--;
> -    page_list_del(pg, color_heap(color));
> +        init_free_page_fields(&pg[i]);
> +        free_colored_pages[page_to_color(&pg[i])]--;
> +    }
>
>      spin_unlock(&heap_lock);
>
>      if ( need_tlbflush )
>          filtered_flush_tlb_mask(tlbflush_timestamp);
>
> +    mfn = page_to_mfn(pg);
> +    for ( i = 0; i < (1U << order); i++ )
> +        flush_page_to_ram(mfn_x(mfn) + i, !(memflags & 
> MEMF_no_icache_flush));
> +
>      return pg;
>  }
>
>  static void __init init_color_heap_pages(struct page_info *pg,
>                                           unsigned long nr_pages)
>  {
> -    unsigned int i;
> +    unsigned long s, e;
>
>      if ( !_color_heap )
>      {
>          unsigned int max_colors = get_max_colors();
> +        unsigned int color_heap_num = (2 * max_colors) - 1;
> +        unsigned int i;
>
> -        _color_heap = xmalloc_array(colored_pages_t, max_colors);
> +        max_color_order = flsl(max_colors) - 1;
> +
> +        _color_heap = xmalloc_array(colored_pages_t, color_heap_num);
>          BUG_ON(!_color_heap);
>          free_colored_pages = xzalloc_array(unsigned long, max_colors);
>          BUG_ON(!free_colored_pages);
> -
> -        for ( i = 0; i < max_colors; i++ )
> -            INIT_PAGE_LIST_HEAD(color_heap(i));
> +        for ( i = 0; i < color_heap_num; i++ )
> +            INIT_PAGE_LIST_HEAD(&_color_heap[i]);
>      }
>
>      printk(XENLOG_DEBUG
>             "Init color heap with %lu pages starting from: %#"PRIx64"\n",
>             nr_pages, page_to_maddr(pg));
>
> -    for ( i = 0; i < nr_pages; i++ )
> -        free_color_heap_page(&pg[i]);
> +    s = mfn_x(page_to_mfn(pg));
> +    e = mfn_x(mfn_add(page_to_mfn(pg + nr_pages - 1), 1));
> +
> +    while ( s < e )
> +    {
> +        unsigned int inc_order = min_t(unsigned int, max_color_order,
> +                                       flsl(e - s) - 1);
> +
> +        if ( s )
> +            inc_order = min(inc_order, ffsl(s) - 1U);
> +        free_color_heap_pages(mfn_to_page(_mfn(s)), inc_order);
> +        s += (1UL << inc_order);
> +    }
>  }
>
> -static struct page_info *alloc_color_domheap_page(struct domain *d,
> -                                                  unsigned int memflags)
> +static struct page_info *alloc_color_domheap_pages(struct domain *d,
> +                                                   unsigned int order,
> +                                                   unsigned int memflags)
>  {
>      struct page_info *pg;
>
> -    pg = alloc_color_heap_page(memflags, d->arch.colors, d->arch.num_colors);
> +    pg = alloc_color_heap_pages(order, memflags, d->arch.colors,
> +                                d->arch.num_colors);
>      if ( !pg )
>          return NULL;
>
> @@ -591,9 +675,9 @@ static struct page_info *alloc_color_domheap_page(struct 
> domain *d,
>      {
>          if ( memflags & MEMF_no_refcount )
>              pg->count_info |= PGC_extra;
> -        if ( assign_page(pg, 0, d, memflags) )
> +        if ( assign_page(pg, order, d, memflags) )
>          {
> -            free_color_heap_page(pg);
> +            free_color_heap_pages(pg, order);
>              return NULL;
>          }
>      }
> @@ -616,12 +700,13 @@ integer_param("buddy-alloc-size", buddy_alloc_size);
>
>  static void __init init_color_heap_pages(struct page_info *pg,
>                                           unsigned long nr_pages) {}
> -static struct page_info *alloc_color_domheap_page(struct domain *d,
> -                                                  unsigned int memflags)
> +static struct page_info *alloc_color_domheap_pages(struct domain *d,
> +                                                   unsigned int order,
> +                                                   unsigned int memflags)
>  {
>      return NULL;
>  }
> -static void free_color_heap_page(struct page_info *pg) {}
> +static void free_color_heap_pages(struct page_info *pg, unsigned int order) 
> {}
>  static void dump_color_heap(void) {}
>
>  #endif /* CONFIG_CACHE_COLORING */
> @@ -2637,12 +2722,7 @@ struct page_info *alloc_domheap_pages(
>
>      /* Only domains are supported for coloring */
>      if ( IS_ENABLED(CONFIG_CACHE_COLORING) && d )
> -    {
> -        /* Colored allocation must be done on 0 order */
> -        if ( order )
> -            return NULL;
> -        return alloc_color_domheap_page(d, memflags);
> -    }
> +        return alloc_color_domheap_pages(d, order, memflags);
>
>      bits = domain_clamp_alloc_bitsize(memflags & MEMF_no_owner ? NULL : d,
>                                        bits ? : (BITS_PER_LONG+PAGE_SHIFT));
> @@ -2762,7 +2842,7 @@ void free_domheap_pages(struct page_info *pg, unsigned 
> int order)
>          }
>
>          if ( pg->count_info & PGC_colored )
> -            free_color_heap_page(pg);
> +            free_color_heap_pages(pg, order);
>          else
>              free_heap_pages(pg, order, scrub);
>      }
> --
> 2.34.1
>



 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.