[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [PATCH v5 02/24] xen: make two memory hypercalls vNUMA-aware



On 12/02/15 19:44, Wei Liu wrote:
> Make XENMEM_increase_reservation and XENMEM_populate_physmap
> vNUMA-aware.
>
> That is, if guest requests Xen to allocate memory for specific vnode,
> Xen can translate vnode to pnode using vNUMA information of that guest.
>
> XENMEMF_vnode is introduced for the guest to mark the node number is in
> fact virtual node number and should be translated by Xen.
>
> XENFEAT_memory_op_vnode_supported is introduced to indicate that Xen is
> able to translate virtual node to physical node.
>
> Signed-off-by: Wei Liu <wei.liu2@xxxxxxxxxx>
> Cc: Jan Beulich <JBeulich@xxxxxxxx>
> ---
> Changes in v5:
> 1. New logic in translation function.
>
> Changes in v3:
> 1. Coding style fix.
> 2. Remove redundant assignment.
>
> Changes in v2:
> 1. Return start_extent when vnode translation fails.
> 2. Expose new feature bit to guest.
> 3. Fix typo in comment.
> ---
>  xen/common/kernel.c           |  2 +-
>  xen/common/memory.c           | 51 
> +++++++++++++++++++++++++++++++++++++++----
>  xen/include/public/features.h |  3 +++
>  xen/include/public/memory.h   |  2 ++
>  4 files changed, 53 insertions(+), 5 deletions(-)
>
> diff --git a/xen/common/kernel.c b/xen/common/kernel.c
> index 0d9e519..e5e0050 100644
> --- a/xen/common/kernel.c
> +++ b/xen/common/kernel.c
> @@ -301,7 +301,7 @@ DO(xen_version)(int cmd, XEN_GUEST_HANDLE_PARAM(void) arg)
>          switch ( fi.submap_idx )
>          {
>          case 0:
> -            fi.submap = 0;
> +            fi.submap = (1U << XENFEAT_memory_op_vnode_supported);
>              if ( VM_ASSIST(d, VMASST_TYPE_pae_extended_cr3) )
>                  fi.submap |= (1U << XENFEAT_pae_pgdir_above_4gb);
>              if ( paging_mode_translate(current->domain) )
> diff --git a/xen/common/memory.c b/xen/common/memory.c
> index e84ace9..fa3729b 100644
> --- a/xen/common/memory.c
> +++ b/xen/common/memory.c
> @@ -692,6 +692,43 @@ out:
>      return rc;
>  }
>  
> +static int translate_vnode_to_pnode(struct domain *d,
> +                                    struct xen_memory_reservation *r,

const struct xen_memory_reservation *r

> +                                    struct memop_args *a)
> +{
> +    int rc = 0;
> +    unsigned int vnode, pnode;
> +
> +    if ( r->mem_flags & XENMEMF_vnode )
> +    {
> +        a->memflags &= ~MEMF_node(XENMEMF_get_node(r->mem_flags));
> +        a->memflags &= ~MEMF_exact_node;

This interface feels semantically wrong, especially as the caller sets
these fields first, just to have them dropped at this point.

A rather more appropriate function would be something along the lines of
"construct_memop_from_reservation()" (name subject to improvement) which
takes care completely filling 'a' from 'r', doing a v->p translation if
needed.

~Andrew

> +
> +        read_lock(&d->vnuma_rwlock);
> +        if ( d->vnuma )
> +        {
> +            vnode = XENMEMF_get_node(r->mem_flags);
> +
> +            if ( vnode < d->vnuma->nr_vnodes )
> +            {
> +                pnode = d->vnuma->vnode_to_pnode[vnode];
> +
> +                if ( pnode != NUMA_NO_NODE )
> +                {
> +                    a->memflags |= MEMF_node(pnode);
> +                    if ( r->mem_flags & XENMEMF_exact_node_request )
> +                        a->memflags |= MEMF_exact_node;
> +                }
> +            }
> +            else
> +                rc = -EINVAL;
> +        }
> +        read_unlock(&d->vnuma_rwlock);
> +    }
> +
> +    return rc;
> +}
> +
>  long do_memory_op(unsigned long cmd, XEN_GUEST_HANDLE_PARAM(void) arg)
>  {
>      struct domain *d;
> @@ -734,10 +771,6 @@ long do_memory_op(unsigned long cmd, 
> XEN_GUEST_HANDLE_PARAM(void) arg)
>              args.memflags = MEMF_bits(address_bits);
>          }
>  
> -        args.memflags |= MEMF_node(XENMEMF_get_node(reservation.mem_flags));
> -        if ( reservation.mem_flags & XENMEMF_exact_node_request )
> -            args.memflags |= MEMF_exact_node;
> -
>          if ( op == XENMEM_populate_physmap
>               && (reservation.mem_flags & XENMEMF_populate_on_demand) )
>              args.memflags |= MEMF_populate_on_demand;
> @@ -747,6 +780,16 @@ long do_memory_op(unsigned long cmd, 
> XEN_GUEST_HANDLE_PARAM(void) arg)
>              return start_extent;
>          args.domain = d;
>  
> +        args.memflags |= MEMF_node(XENMEMF_get_node(reservation.mem_flags));
> +        if ( reservation.mem_flags & XENMEMF_exact_node_request )
> +            args.memflags |= MEMF_exact_node;
> +
> +        if ( translate_vnode_to_pnode(d, &reservation, &args) )
> +        {
> +            rcu_unlock_domain(d);
> +            return start_extent;
> +        }
> +
>          if ( xsm_memory_adjust_reservation(XSM_TARGET, current->domain, d) )
>          {
>              rcu_unlock_domain(d);
> diff --git a/xen/include/public/features.h b/xen/include/public/features.h
> index 16d92aa..2110b04 100644
> --- a/xen/include/public/features.h
> +++ b/xen/include/public/features.h
> @@ -99,6 +99,9 @@
>  #define XENFEAT_grant_map_identity        12
>   */
>  
> +/* Guest can use XENMEMF_vnode to specify virtual node for memory op. */
> +#define XENFEAT_memory_op_vnode_supported 13
> +
>  #define XENFEAT_NR_SUBMAPS 1
>  
>  #endif /* __XEN_PUBLIC_FEATURES_H__ */
> diff --git a/xen/include/public/memory.h b/xen/include/public/memory.h
> index 595f953..2b5206b 100644
> --- a/xen/include/public/memory.h
> +++ b/xen/include/public/memory.h
> @@ -55,6 +55,8 @@
>  /* Flag to request allocation only from the node specified */
>  #define XENMEMF_exact_node_request  (1<<17)
>  #define XENMEMF_exact_node(n) (XENMEMF_node(n) | XENMEMF_exact_node_request)
> +/* Flag to indicate the node specified is virtual node */
> +#define XENMEMF_vnode  (1<<18)
>  #endif
>  
>  struct xen_memory_reservation {



_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.