[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [PATCH v2 07/15] x86: implement set value flow for MBA



On Thu, Aug 24, 2017 at 09:14:41AM +0800, Yi Sun wrote:
> This patch implements set value flow for MBA including its callback
> function and domctl interface.
> 
> It also changes the memebers in 'cos_write_info' to transfer the
> feature array, feature properties array and value array. Then, we
> can write all features values on the cos id into MSRs.
> 
> Because multiple features may co-exist, we need handle all features to write
> values of them into a COS register with new COS ID. E.g:
> 1. L3 CAT and MBA co-exist.
> 2. Dom1 and Dom2 share a same COS ID (2). The L3 CAT CBM of Dom1 is 0x1ff,
>    the MBA Thrtle of Dom1 is 0xa.
> 3. User wants to change MBA Thrtl of Dom1 to be 0x14. Because COS ID 2 is
>    used by Dom2 too, we have to pick a new COS ID 3. The original values of
>    Dom1 on COS ID 3 may be below:

What original values? You said you pick COS ID 3, because I think it's
assumed to be empty? In which case there are no original values in COS
ID 3.

>            ---------
>            | COS 3 |
>            ---------
>    L3 CAT  | 0x7ff |
>            ---------
>    MBA     | 0x0   |
>            ---------
> 4. After setting, the L3 CAT CBM value of Dom1 should be kept and the new MBA
>    Thrtl is set. So, the values on COS ID 3 should be below.
>            ---------
>            | COS 3 |
>            ---------
>    L3 CAT  | 0x1ff |
>            ---------
>    MBA     | 0x14  |
>            ---------
> 
> So, we should write all features values into their MSRs. That requires the
> feature array, feature properties array and value array are input.
                                                          ^ as
> 
> Signed-off-by: Yi Sun <yi.y.sun@xxxxxxxxxxxxxxx>
> ---
> v2:
>     - remove linear mode 'thrtl_max' check in 'mba_check_thrtl' because it has
>       been checked in 'mba_init_feature'.
>       (suggested by Chao Peng)
>     - for non-linear mode, check if '*thrtl' is not 0 in 'mba_check_thrtl'. If
>       it is 0, we do not need to change it.
>       (suggested by Chao Peng)
>     - move comments to explain changes of 'cos_write_info' from psr.c to 
> commit
>       message.
>       (suggested by Chao Peng)
> ---
>  xen/arch/x86/domctl.c       |   6 ++
>  xen/arch/x86/psr.c          | 150 
> ++++++++++++++++++++++++++++++--------------
>  xen/include/public/domctl.h |   1 +
>  3 files changed, 109 insertions(+), 48 deletions(-)
> 
> diff --git a/xen/arch/x86/domctl.c b/xen/arch/x86/domctl.c
> index 4936bcb..0ae4799 100644
> --- a/xen/arch/x86/domctl.c
> +++ b/xen/arch/x86/domctl.c
> @@ -1468,6 +1468,12 @@ long arch_do_domctl(
>                                PSR_VAL_TYPE_L2_CBM);
>              break;
>  
> +        case XEN_DOMCTL_PSR_MBA_OP_SET_THRTL:
> +            ret = psr_set_val(d, domctl->u.psr_alloc_op.target,
> +                              domctl->u.psr_alloc_op.data,
> +                              PSR_VAL_TYPE_MBA);
> +            break;
> +
>          case XEN_DOMCTL_PSR_CAT_OP_GET_L3_CBM:
>              ret = psr_get_val(d, domctl->u.psr_alloc_op.target,
>                                &val32, PSR_VAL_TYPE_L3_CBM);
> diff --git a/xen/arch/x86/psr.c b/xen/arch/x86/psr.c
> index 4a0c982..ce82975 100644
> --- a/xen/arch/x86/psr.c
> +++ b/xen/arch/x86/psr.c
> @@ -138,6 +138,12 @@ static const struct feat_props {
>  
>      /* write_msr is used to write out feature MSR register. */
>      void (*write_msr)(unsigned int cos, uint32_t val, enum psr_val_type 
> type);
> +
> +    /*
> +     * check_val is used to check if input val fulfills SDM requirement.
> +     * Change it to valid value if SDM allows.

I'm not really sure it's a good idea to change the value to a valid
one, IMHO you should just check and print an error if the value is
invalid (and return false of course).

> +     */
> +    bool (*check_val)(const struct feat_node *feat, unsigned long *val);
>  } *feat_props[FEAT_TYPE_NUM];
>  
>  /*
> @@ -275,29 +281,6 @@ static enum psr_feat_type psr_val_type_to_feat_type(enum 
> psr_val_type type)
>      return feat_type;
>  }
>  
> -static bool psr_check_cbm(unsigned int cbm_len, unsigned long cbm)
> -{
> -    unsigned int first_bit, zero_bit;
> -
> -    /* Set bits should only in the range of [0, cbm_len]. */
> -    if ( cbm & (~0ul << cbm_len) )
> -        return false;
> -
> -    /* At least one bit need to be set. */
> -    if ( cbm == 0 )
> -        return false;
> -
> -    first_bit = find_first_bit(&cbm, cbm_len);
> -    zero_bit = find_next_zero_bit(&cbm, cbm_len, first_bit);
> -
> -    /* Set bits should be contiguous. */
> -    if ( zero_bit < cbm_len &&
> -         find_next_bit(&cbm, cbm_len, zero_bit) < cbm_len )
> -        return false;
> -
> -    return true;
> -}
> -
>  /* Implementation of allocation features' functions. */
>  static int cat_init_feature(const struct cpuid_leaf *regs,
>                              struct feat_node *feat,
> @@ -433,6 +416,30 @@ static bool cat_get_feat_info(const struct feat_node 
> *feat,
>      return true;
>  }
>  
> +static bool cat_check_cbm(const struct feat_node *feat, unsigned long *cbm)
> +{
> +    unsigned int first_bit, zero_bit;
> +    unsigned int cbm_len = feat->cat_info.cbm_len;
> +
> +    /* Set bits should only in the range of [0, cbm_len]. */
> +    if ( *cbm & (~0ul << cbm_len) )
> +        return false;
> +
> +    /* At least one bit need to be set. */
> +    if ( *cbm == 0 )
> +        return false;
> +
> +    first_bit = find_first_bit(cbm, cbm_len);
> +    zero_bit = find_next_zero_bit(cbm, cbm_len, first_bit);
> +
> +    /* Set bits should be contiguous. */
> +    if ( zero_bit < cbm_len &&
> +         find_next_bit(cbm, cbm_len, zero_bit) < cbm_len )
> +        return false;
> +
> +    return true;
> +}
> +
>  /* L3 CAT props */
>  static void l3_cat_write_msr(unsigned int cos, uint32_t val,
>                               enum psr_val_type type)
> @@ -446,6 +453,7 @@ static const struct feat_props l3_cat_props = {
>      .alt_type = PSR_VAL_TYPE_UNKNOWN,
>      .get_feat_info = cat_get_feat_info,
>      .write_msr = l3_cat_write_msr,
> +    .check_val = cat_check_cbm,
>  };

Maybe the introduction of check_val should be a separate patch? It's
mostly code movement and some fixup.

>  /* L3 CDP props */
> @@ -476,6 +484,7 @@ static const struct feat_props l3_cdp_props = {
>      .alt_type = PSR_VAL_TYPE_L3_CBM,
>      .get_feat_info = l3_cdp_get_feat_info,
>      .write_msr = l3_cdp_write_msr,
> +    .check_val = cat_check_cbm,
>  };
>  
>  /* L2 CAT props */
> @@ -491,6 +500,7 @@ static const struct feat_props l2_cat_props = {
>      .alt_type = PSR_VAL_TYPE_UNKNOWN,
>      .get_feat_info = cat_get_feat_info,
>      .write_msr = l2_cat_write_msr,
> +    .check_val = cat_check_cbm,
>  };
>  
>  /* MBA props */
> @@ -514,6 +524,40 @@ static bool mba_get_feat_info(const struct feat_node 
> *feat,
>  static void mba_write_msr(unsigned int cos, uint32_t val,
>                            enum psr_val_type type)
>  {
> +    wrmsrl(MSR_IA32_PSR_MBA_MASK(cos), val);
> +}
> +
> +static bool mba_check_thrtl(const struct feat_node *feat, unsigned long 
> *thrtl)
> +{
> +    if ( *thrtl > feat->mba_info.thrtl_max )
> +        return false;
> +
> +    /*
> +     * Per SDM (chapter "Memory Bandwidth Allocation Configuration"):
> +     * 1. Linear mode: In the linear mode the input precision is defined
> +     *    as 100-(MBA_MAX). For instance, if the MBA_MAX value is 90, the
> +     *    input precision is 10%. Values not an even multiple of the
> +     *    precision (e.g., 12%) will be rounded down (e.g., to 10% delay
> +     *    applied).
> +     * 2. Non-linear mode: Input delay values are powers-of-two from zero
> +     *    to the MBA_MAX value from CPUID. In this case any values not a
> +     *    power of two will be rounded down the next nearest power of two.
> +     */
> +    if ( feat->mba_info.linear )
> +    {
> +        unsigned int mod;
> +
> +        mod = *thrtl % (100 - feat->mba_info.thrtl_max);
> +        *thrtl -= mod;
> +    }
> +    else
> +    {
> +        /* Not power of 2. */
> +        if ( *thrtl && (*thrtl & (*thrtl - 1)) )

This can be joined with the else to avoid another indentation level:

else if ( *thrtl && (*thrtl & (*thrtl - 1)) )
...

> +            *thrtl = *thrtl & (1 << (flsl(*thrtl) - 1));
> +    }
> +
> +    return true;
>  }
>  
>  static const struct feat_props mba_props = {
> @@ -522,6 +566,7 @@ static const struct feat_props mba_props = {
>      .alt_type = PSR_VAL_TYPE_UNKNOWN,
>      .get_feat_info = mba_get_feat_info,
>      .write_msr = mba_write_msr,
> +    .check_val = mba_check_thrtl,
>  };
>  
>  static void __init parse_psr_bool(char *s, char *value, char *feature,
> @@ -942,6 +987,7 @@ static int insert_val_into_array(uint32_t val[],
>      const struct feat_node *feat;
>      const struct feat_props *props;
>      unsigned int i;
> +    unsigned long check_val = new_val;
>      int ret;
>  
>      ASSERT(feat_type < FEAT_TYPE_NUM);
> @@ -966,9 +1012,11 @@ static int insert_val_into_array(uint32_t val[],
>      if ( array_len < props->cos_num )
>          return -ENOSPC;
>  
> -    if ( !psr_check_cbm(feat->cat_info.cbm_len, new_val) )
> +    if ( !props->check_val(feat, &check_val) )
>          return -EINVAL;
>  
> +    new_val = check_val;
> +
>      /*
>       * Value setting position is same as feature array.
>       * For CDP, user may set both DATA and CODE to same value. For such case,
> @@ -1198,25 +1246,42 @@ static unsigned int get_socket_cpu(unsigned int 
> socket)
>  struct cos_write_info
>  {
>      unsigned int cos;
> -    struct feat_node *feature;
> +    struct feat_node **features;
>      const uint32_t *val;
> -    const struct feat_props *props;
> +    unsigned int array_len;
> +    const struct feat_props **props;
>  };
>  
>  static void do_write_psr_msrs(void *data)
>  {
>      const struct cos_write_info *info = data;
> -    struct feat_node *feat = info->feature;
> -    const struct feat_props *props = info->props;
> -    unsigned int i, cos = info->cos, cos_num = props->cos_num;
> +    unsigned int i, j, index = 0, array_len = info->array_len, cos = 
> info->cos;
> +    const uint32_t *val_array = info->val;
>  
> -    for ( i = 0; i < cos_num; i++ )
> +    for ( i = 0; i < ARRAY_SIZE(feat_props); i++ )
>      {

index and j can be defined here, they are only used inside of this for
loop AFAICT.

> -        if ( feat->cos_reg_val[cos * cos_num + i] != info->val[i] )
> +        struct feat_node *feat = info->features[i];
> +        const struct feat_props *props = info->props[i];
> +        unsigned int cos_num;
> +
> +        if ( !feat || !props )
> +            continue;
> +
> +        cos_num = props->cos_num;
> +        if ( array_len < cos_num )
> +            return;
> +
> +        for ( j = 0; j < cos_num; j++ )
>          {
> -            feat->cos_reg_val[cos * cos_num + i] = info->val[i];
> -            props->write_msr(cos, info->val[i], props->type[i]);
> +            if ( feat->cos_reg_val[cos * cos_num + j] != val_array[index + 
> j] )
> +            {
> +                feat->cos_reg_val[cos * cos_num + j] = val_array[index + j];
> +                props->write_msr(cos, val_array[index + j], props->type[j]);
> +            }
>          }
> +
> +        array_len -= cos_num;
> +        index += cos_num;
>      }
>  }
>  
> @@ -1224,30 +1289,19 @@ static int write_psr_msrs(unsigned int socket, 
> unsigned int cos,
>                            const uint32_t val[], unsigned int array_len,
>                            enum psr_feat_type feat_type)
>  {
> -    int ret;
>      struct psr_socket_info *info = get_socket_info(socket);

info should probably be const here.

>      struct cos_write_info data =
>      {
>          .cos = cos,
> -        .feature = info->features[feat_type],
> -        .props = feat_props[feat_type],
> +        .features = info->features,
> +        .val = val,
> +        .array_len = array_len,
> +        .props = feat_props,
>      };

AFAICT data should also be const, but I guess this is not going to
work because on_selected_cpus expects a non-const payload?

Thanks, Roger.

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
https://lists.xen.org/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.