[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [PATCH RESEND v7] libxl: vnuma topology configuration parser and doc



Konrad

Thank you for reviewing.
I will read your comment for all patches more careful and will
reply/fix tonight.

Elena

On Thu, Aug 21, 2014 at 10:06 PM, Konrad Rzeszutek Wilk
<konrad.wilk@xxxxxxxxxx> wrote:
> On Thu, Aug 21, 2014 at 01:10:29AM -0400, Elena Ufimtseva wrote:
>> Parses vnuma topoplogy number of nodes and memory
>> ranges. If not defined, initializes vnuma with
>> only one node and default topology. This one node covers
>> all domain memory and all vcpus assigned to it.
>
> .. snip..
>> +/*
>> + * init vNUMA to "zero config" with one node and all other
>> + * topology parameters set to default.
>> + */
>> +static int vnuma_default_config(libxl_domain_build_info *b_info)
>> +{
>> +    b_info->vnodes = 1;
>> +    /* all memory goes to this one vnode, as well as vcpus. */
>> +    if (!(b_info->vnuma_mem = (uint64_t *)calloc(b_info->vnodes,
>> +                                sizeof(*b_info->vnuma_mem))))
>> +        goto bad_vnumazerocfg;
>> +
>> +    if (!(b_info->vnuma_vcpumap = (unsigned int *)calloc(b_info->max_vcpus,
>> +                                sizeof(*b_info->vnuma_vcpumap))))
>> +        goto bad_vnumazerocfg;
>> +
>> +    if (!(b_info->vdistance = (unsigned int *)calloc(b_info->vnodes *
>> +                                b_info->vnodes, 
>> sizeof(*b_info->vdistance))))
>> +        goto bad_vnumazerocfg;
>> +
>> +    if (!(b_info->vnuma_vnodemap = (unsigned int *)calloc(b_info->vnodes,
>> +                                sizeof(*b_info->vnuma_vnodemap))))
>> +        goto bad_vnumazerocfg;
>> +
>> +    b_info->vnuma_mem[0] = b_info->max_memkb >> 10;
>> +
>> +    /* all vcpus assigned to this vnode. */
>> +    vcputovnode_default(b_info->vnuma_vcpumap, b_info->vnodes,
>> +                        b_info->max_vcpus);
>> +
>> +    /* default vdistance is 10. */
>> +    vdistance_set(b_info->vdistance, b_info->vnodes, 10, 10);
>> +
>> +    /* VNUMA_NO_NODE for vnode_to_pnode. */
>> +    vnuma_vnodemap_default(b_info->vnuma_vnodemap, b_info->vnodes);
>> +
>> +    /*
>> +     * will be placed to some physical nodes defined by automatic
>> +     * numa placement or VNUMA_NO_NODE will not request exact node.
>> +     */
>> +    libxl_defbool_set(&b_info->vnuma_autoplacement, true);
>> +    return 0;
>> +
>> + bad_vnumazerocfg:
>
> I know that the caller ends up calling free_vnuma_info if this fails,
> but my impression is that we should do it here, as in:
>
>         free_vnuma_info(&b_info);
>
> However it is up to the maintainer (Ian) to weigh in which way
> he would like it done.
>
>> +    return -1;
>> +}
>> +
>> +static void free_vnuma_info(libxl_domain_build_info *b_info)
>> +{
>> +    free(b_info->vnuma_mem);
>> +    free(b_info->vdistance);
>> +    free(b_info->vnuma_vcpumap);
>> +    free(b_info->vnuma_vnodemap);
>
> I think you need to set
>         b_info->vnuma_mem = NULL;
>         b_info->vdistance = NULL;
>         ..
>
> and so on. This is more of future proofing the code in case somebody
> in the future adds this inadvertly:
>
>  if (b_info->vnuma_mem) .. blah
>
> without first checking for 'if (b_info->vnodes)'.
>
> It has tripped me in the Linux kernel with the 'unbind' and 'bind' on
> SysFs with drivers forgetting to set their internal states to NULL
> and we crash.
>
>> +    b_info->vnodes = 0;
>> +}
>> +
>> +static int parse_vnuma_mem(XLU_Config *config,
>> +                            libxl_domain_build_info **b_info)
>> +{
>> +    libxl_domain_build_info *dst;
>> +    XLU_ConfigList *vnumamemcfg;
>> +    int nr_vnuma_regions, i;
>> +    unsigned long long vnuma_memparsed = 0;
>> +    unsigned long ul;
>> +    const char *buf;
>> +
>> +    dst = *b_info;
>> +    if (!xlu_cfg_get_list(config, "vnuma_mem",
>> +                          &vnumamemcfg, &nr_vnuma_regions, 0)) {
>> +
>> +        if (nr_vnuma_regions != dst->vnodes) {
>> +            fprintf(stderr, "Number of numa regions (vnumamem = %d) is \
>> +                    incorrect (should be %d).\n", nr_vnuma_regions,
>> +                    dst->vnodes);
>> +            goto bad_vnuma_mem;
>> +        }
>> +
>> +        dst->vnuma_mem = calloc(dst->vnodes,
>> +                                 sizeof(*dst->vnuma_mem));
>> +        if (dst->vnuma_mem == NULL) {
>> +            fprintf(stderr, "Unable to allocate memory for vnuma 
>> ranges.\n");
>> +            goto bad_vnuma_mem;
>> +        }
>> +
>> +        char *ep;
>
> I am surprised the compiler didn't throw a fit!
>
> This is usually done at the start of the function or the '{ }' block.
> Please move it there.
>> +        /*
>> +         * Will parse only nr_vnodes times, even if we have more/less 
>> regions.
>> +         * Take care of it later if less or discard if too many regions.
>> +         */
>> +        for (i = 0; i < dst->vnodes; i++) {
>> +            buf = xlu_cfg_get_listitem(vnumamemcfg, i);
>> +            if (!buf) {
>> +                fprintf(stderr,
>> +                        "xl: Unable to get element %d in vnuma memory 
>> list.\n", i);
>
> You also need to free dst->vnuma_mem before you do the 'goto'.
>
>> +                goto bad_vnuma_mem;
>> +            }
>> +
>> +            ul = strtoul(buf, &ep, 10);
>> +            if (ep == buf) {
>> +                fprintf(stderr, "xl: Invalid argument parsing vnumamem: 
>> %s.\n", buf);
>
> Ditto.
>> +                goto bad_vnuma_mem;
>> +            }
>> +
>> +            /* 32Mb is a min size for a node, taken from Linux */
>
> I thought it was 64? Or perhaps I forgot it? Could you give a pointer to
> where this was defined in Linux? Say (taken from Linux - see 
> arch/x86/blahbla/bla.c).
>
>> +            if (ul >= UINT32_MAX || ul < MIN_VNODE_SIZE) {
>> +                fprintf(stderr, "xl: vnuma memory %lu is not within %u - %u 
>> range.\n",
>> +                        ul, MIN_VNODE_SIZE, UINT32_MAX);
>
>         free(dst->vnuma_mem);
>
> before you jump. Or you can have in the bad_vnuma_mem:
>         free(dst->vnuma_mem);
>         dst->vnuma_mem = NULL;
>
>> +                goto bad_vnuma_mem;
>> +            }
>> +
>> +            /* memory in MBytes */
>> +            dst->vnuma_mem[i] = ul;
>> +        }
>> +
>> +        /* Total memory for vNUMA parsed to verify */
>> +        for (i = 0; i < nr_vnuma_regions; i++)
>> +            vnuma_memparsed = vnuma_memparsed + (dst->vnuma_mem[i]);
>> +
>> +        /* Amount of memory for vnodes same as total? */
>> +        if ((vnuma_memparsed << 10) != (dst->max_memkb)) {
>> +            fprintf(stderr, "xl: vnuma memory is not the same as domain \
>> +                    memory size.\n");
>> +            goto bad_vnuma_mem;
>
> Again, either free it or the label is responsible for it.
>> +        }
>> +    } else {
>> +        dst->vnuma_mem = calloc(dst->vnodes,
>> +                                      sizeof(*dst->vnuma_mem));
>> +        if (dst->vnuma_mem == NULL) {
>> +            fprintf(stderr, "Unable to allocate memory for vnuma 
>> ranges.\n");
>> +            goto bad_vnuma_mem;
>> +        }
>> +
>> +        fprintf(stderr, "WARNING: vNUMA memory ranges were not 
>> specified.\n");
>> +        fprintf(stderr, "Using default equal vnode memory size %lu Kbytes \
>> +                to cover %lu Kbytes.\n",
>> +                dst->max_memkb / dst->vnodes, dst->max_memkb);
>> +
>> +        if (split_vnumamem(dst) < 0) {
>> +            fprintf(stderr, "Could not split vnuma memory into equal 
>> chunks.\n");
>
> Guess what I am going to say :-)
>
>> +            goto bad_vnuma_mem;
>> +        }
>> +    }
>> +    return 0;
>> +
>> + bad_vnuma_mem:
>> +    return -1;
>> +}
>> +
>> +static int parse_vnuma_distance(XLU_Config *config,
>> +                                libxl_domain_build_info **b_info)
>> +{
>> +    libxl_domain_build_info *dst;
>> +    XLU_ConfigList *vdistancecfg;
>> +    int nr_vdist;
>> +
>> +    dst = *b_info;
>> +    dst->vdistance = calloc(dst->vnodes * dst->vnodes,
>> +                               sizeof(*dst->vdistance));
>> +    if (dst->vdistance == NULL)
>> +        goto bad_distance;
>> +
>> +    if (!xlu_cfg_get_list(config, "vdistance", &vdistancecfg, &nr_vdist, 
>> 0)) {
>> +        int d1, d2, i;
>> +        /*
>> +         * First value is the same node distance, the second as the
>> +         * rest of distances. The following is required right now to
>> +         * avoid non-symmetrical distance table as it may break latest 
>> kernel.
>> +         * TODO: Better way to analyze extended distance table, possibly
>> +         * OS specific.
>> +         */
>> +
>> +        for (i = 0; i < nr_vdist; i++) {
>> +            d1 = get_list_item_uint(vdistancecfg, i);
>> +        }
>> +
>> +        d1 = get_list_item_uint(vdistancecfg, 0);
>> +        if (dst->vnodes > 1)
>> +           d2 = get_list_item_uint(vdistancecfg, 1);
>> +        else
>> +           d2 = d1;
>> +
>> +        if (d1 >= 0 && d2 >= 0) {
>> +            if (d1 < d2)
>> +                fprintf(stderr, "WARNING: vnuma distance d1 < d2, %u < 
>> %u\n", d1, d2);
>> +            vdistance_set(dst->vdistance, dst->vnodes, d1, d2);
>> +        } else {
>> +            fprintf(stderr, "WARNING: vnuma distance values are 
>> incorrect.\n");
>> +            goto bad_distance;
>
> free dst->vdistance first.
>
>> +        }
>> +    } else {
>> +        fprintf(stderr, "Could not parse vnuma distances.\n");
>> +        vdistance_set(dst->vdistance, dst->vnodes, 10, 20);
>> +    }
>> +    return 0;
>> +
>> + bad_distance:
>> +    return -1;
>> +}
>> +
>> +static int parse_vnuma_vcpumap(XLU_Config *config,
>> +                                libxl_domain_build_info **b_info)
>> +{
>> +    libxl_domain_build_info *dst;
>> +    XLU_ConfigList *vcpumap;
>> +    int nr_vcpumap, i;
>> +
>> +    dst = *b_info;
>> +    dst->vnuma_vcpumap = (unsigned int *)calloc(dst->max_vcpus,
>> +                                     sizeof(*dst->vnuma_vcpumap));
>> +    if (dst->vnuma_vcpumap == NULL)
>> +        goto bad_vcpumap;
>> +
>> +    if (!xlu_cfg_get_list(config, "vnuma_vcpumap",
>> +                          &vcpumap, &nr_vcpumap, 0)) {
>> +        if (nr_vcpumap == dst->max_vcpus) {
>> +            unsigned int  vnode, vcpumask = 0, vmask;
>
> An newline here please.
>
>> +            vmask = ~(~0 << nr_vcpumap);
>> +            for (i = 0; i < nr_vcpumap; i++) {
>> +                vnode = get_list_item_uint(vcpumap, i);
>> +                if (vnode >= 0 && vnode < dst->vnodes) {
>> +                    vcpumask  |= (1 << i);
>
> There is an extra space there.
>
>> +                    dst->vnuma_vcpumap[i] = vnode;
>> +                }
>> +            }
>> +
>> +            /* Did it covered all vnodes in the vcpu mask? */
>> +            if ( !(((vmask & vcpumask) + 1) == (1 << nr_vcpumap)) ) {
>> +                fprintf(stderr, "WARNING: Not all vnodes were covered \
>> +                        in numa_cpumask.\n");
>
> Either free it here or the label has to clear it.
>
>> +                goto bad_vcpumap;
>> +            }
>> +        } else {
>> +            fprintf(stderr, "WARNING:  Bad vnuma_vcpumap.\n");
>> +            goto bad_vcpumap;
>> +        }
>> +    }
>> +    else
>> +        vcputovnode_default(dst->vnuma_vcpumap,
>> +                            dst->vnodes,
>> +                            dst->max_vcpus);
>> +    return 0;
>> +
>> + bad_vcpumap:
>> +    return -1;
>> +}
>> +
>> +static int parse_vnuma_vnodemap(XLU_Config *config,
>> +                                libxl_domain_build_info **b_info)
>> +{
>> +    libxl_domain_build_info *dst;
>> +    XLU_ConfigList *vnodemap;
>> +    int nr_vnodemap, i;
>> +
>> +    dst = *b_info;
>> +
>> +    /* There is mapping to NUMA physical nodes? */
>> +    dst->vnuma_vnodemap = (unsigned int *)calloc(dst->vnodes,
>> +                                    sizeof(*dst->vnuma_vnodemap));
>> +    if (dst->vnuma_vnodemap == NULL)
>> +        goto bad_vnodemap;
>> +    if (!xlu_cfg_get_list(config, "vnuma_vnodemap",&vnodemap,
>> +                                            &nr_vnodemap, 0)) {
>
> something is off. It doesn't need to be so far off. The &nr_vnodemap
> can be aligned with the first parameter.
>> +        /*
>> +        * If not specified or incorred, will be defined
>
> s/incorred/incorrect/



-- 
Elena

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.