|
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [PATCH v2 6/8] xen/lib: Add topology generator for x86
On Wed, May 08, 2024 at 01:39:25PM +0100, Alejandro Vallejo wrote:
> Add a helper to populate topology leaves in the cpu policy from
> threads/core and cores/package counts.
>
> No functional change, as it's not connected to anything yet.
There is a functional change in test-cpu-policy.c.
Maybe the commit message needs to be updated to reflect the added
testing to test-cpu-policy.c using the newly introduced helper to
generate topologies?
>
> Signed-off-by: Alejandro Vallejo <alejandro.vallejo@xxxxxxxxx>
> ---
> v2:
> * New patch. Extracted from v1/patch6
> ---
> tools/tests/cpu-policy/test-cpu-policy.c | 128 +++++++++++++++++++++++
> xen/include/xen/lib/x86/cpu-policy.h | 16 +++
> xen/lib/x86/policy.c | 86 +++++++++++++++
> 3 files changed, 230 insertions(+)
>
> diff --git a/tools/tests/cpu-policy/test-cpu-policy.c
> b/tools/tests/cpu-policy/test-cpu-policy.c
> index 301df2c00285..0ba8c418b1b3 100644
> --- a/tools/tests/cpu-policy/test-cpu-policy.c
> +++ b/tools/tests/cpu-policy/test-cpu-policy.c
> @@ -650,6 +650,132 @@ static void test_is_compatible_failure(void)
> }
> }
>
> +static void test_topo_from_parts(void)
> +{
> + static const struct test {
> + unsigned int threads_per_core;
> + unsigned int cores_per_pkg;
> + struct cpu_policy policy;
> + } tests[] = {
> + {
> + .threads_per_core = 3, .cores_per_pkg = 1,
> + .policy = {
> + .x86_vendor = X86_VENDOR_AMD,
> + .topo.subleaf = {
> + [0] = { .nr_logical = 3, .level = 0, .type = 1,
> .id_shift = 2, },
> + [1] = { .nr_logical = 1, .level = 1, .type = 2,
> .id_shift = 2, },
> + },
> + },
> + },
> + {
> + .threads_per_core = 1, .cores_per_pkg = 3,
> + .policy = {
> + .x86_vendor = X86_VENDOR_AMD,
> + .topo.subleaf = {
> + [0] = { .nr_logical = 1, .level = 0, .type = 1,
> .id_shift = 0, },
> + [1] = { .nr_logical = 3, .level = 1, .type = 2,
> .id_shift = 2, },
> + },
> + },
> + },
> + {
> + .threads_per_core = 7, .cores_per_pkg = 5,
> + .policy = {
> + .x86_vendor = X86_VENDOR_AMD,
> + .topo.subleaf = {
> + [0] = { .nr_logical = 7, .level = 0, .type = 1,
> .id_shift = 3, },
> + [1] = { .nr_logical = 5, .level = 1, .type = 2,
> .id_shift = 6, },
> + },
> + },
> + },
> + {
> + .threads_per_core = 2, .cores_per_pkg = 128,
> + .policy = {
> + .x86_vendor = X86_VENDOR_AMD,
> + .topo.subleaf = {
> + [0] = { .nr_logical = 2, .level = 0, .type = 1,
> .id_shift = 1, },
> + [1] = { .nr_logical = 128, .level = 1, .type = 2,
> .id_shift = 8, },
> + },
> + },
> + },
> + {
> + .threads_per_core = 3, .cores_per_pkg = 1,
> + .policy = {
> + .x86_vendor = X86_VENDOR_INTEL,
> + .topo.subleaf = {
> + [0] = { .nr_logical = 3, .level = 0, .type = 1,
> .id_shift = 2, },
> + [1] = { .nr_logical = 3, .level = 1, .type = 2,
> .id_shift = 2, },
> + },
> + },
> + },
> + {
> + .threads_per_core = 1, .cores_per_pkg = 3,
> + .policy = {
> + .x86_vendor = X86_VENDOR_INTEL,
> + .topo.subleaf = {
> + [0] = { .nr_logical = 1, .level = 0, .type = 1,
> .id_shift = 0, },
> + [1] = { .nr_logical = 3, .level = 1, .type = 2,
> .id_shift = 2, },
> + },
> + },
> + },
> + {
> + .threads_per_core = 7, .cores_per_pkg = 5,
> + .policy = {
> + .x86_vendor = X86_VENDOR_INTEL,
> + .topo.subleaf = {
> + [0] = { .nr_logical = 7, .level = 0, .type = 1,
> .id_shift = 3, },
> + [1] = { .nr_logical = 35, .level = 1, .type = 2,
> .id_shift = 6, },
> + },
> + },
> + },
> + {
> + .threads_per_core = 2, .cores_per_pkg = 128,
> + .policy = {
> + .x86_vendor = X86_VENDOR_INTEL,
> + .topo.subleaf = {
> + [0] = { .nr_logical = 2, .level = 0, .type = 1,
> .id_shift = 1, },
> + [1] = { .nr_logical = 256, .level = 1, .type = 2,
> .id_shift = 8, },
You don't need the array index in the initialization:
.topo.subleaf = {
{ .nr_logical = 2, .level = 0, .type = 1, .id_shift = 1, },
{ .nr_logical = 256, .level = 1, .type = 2,
.id_shift = 8, },
}
And lines should be limited to 80 columns if possible.
> + },
> + },
> + },
> + };
> +
> + printf("Testing topology synthesis from parts:\n");
> +
> + for ( size_t i = 0; i < ARRAY_SIZE(tests); ++i )
> + {
> + const struct test *t = &tests[i];
> + struct cpu_policy actual = { .x86_vendor = t->policy.x86_vendor };
> + int rc = x86_topo_from_parts(&actual, t->threads_per_core,
> t->cores_per_pkg);
> +
> + if ( rc || memcmp(&actual.topo, &t->policy.topo,
> sizeof(actual.topo)) )
> + {
> +#define TOPO(n) topo.subleaf[(n)]
> + fail("FAIL[%d] - '%s %u t/c, %u c/p'\n",
> + rc,
> + x86_cpuid_vendor_to_str(t->policy.x86_vendor),
> + t->threads_per_core, t->cores_per_pkg);
> + printf(" subleaf=%u expected_n=%u actual_n=%u\n"
> + " expected_lvl=%u actual_lvl=%u\n"
> + " expected_type=%u actual_type=%u\n"
> + " expected_shift=%u actual_shift=%u\n",
> + 0, t->policy.TOPO(0).nr_logical,
> actual.TOPO(0).nr_logical,
> + t->policy.TOPO(0).level, actual.TOPO(0).level,
> + t->policy.TOPO(0).type, actual.TOPO(0).type,
> + t->policy.TOPO(0).id_shift, actual.TOPO(0).id_shift);
> +
> + printf(" subleaf=%u expected_n=%u actual_n=%u\n"
> + " expected_lvl=%u actual_lvl=%u\n"
> + " expected_type=%u actual_type=%u\n"
> + " expected_shift=%u actual_shift=%u\n",
> + 1, t->policy.TOPO(1).nr_logical,
> actual.TOPO(1).nr_logical,
> + t->policy.TOPO(1).level, actual.TOPO(1).level,
> + t->policy.TOPO(1).type, actual.TOPO(1).type,
> + t->policy.TOPO(1).id_shift, actual.TOPO(1).id_shift);
> +#undef TOPO
Seeing the usage of the macro, maybe you could even do something like:
TOPO(n, f) t->policy.topo.subleaf[(n)].f, actual.topo.subleaf[(n)].f
This will limit a bit the repetition of the "t->policy..., actual..."
tuple.
> + }
> + }
> +}
> +
> int main(int argc, char **argv)
> {
> printf("CPU Policy unit tests\n");
> @@ -667,6 +793,8 @@ int main(int argc, char **argv)
> test_is_compatible_success();
> test_is_compatible_failure();
>
> + test_topo_from_parts();
> +
> if ( nr_failures )
> printf("Done: %u failures\n", nr_failures);
> else
> diff --git a/xen/include/xen/lib/x86/cpu-policy.h
> b/xen/include/xen/lib/x86/cpu-policy.h
> index 392320b9adbe..f5df18e9f77c 100644
> --- a/xen/include/xen/lib/x86/cpu-policy.h
> +++ b/xen/include/xen/lib/x86/cpu-policy.h
> @@ -551,6 +551,22 @@ int x86_cpu_policies_are_compatible(const struct
> cpu_policy *host,
> */
> uint32_t x86_x2apic_id_from_vcpu_id(const struct cpu_policy *p, uint32_t id);
>
> +/**
> + * Synthesise topology information in `p` given high-level constraints
> + *
> + * Topology is given in various fields accross several leaves, some of
> + * which are vendor-specific. This function uses the policy itself to
> + * derive such leaves from threads/core and cores/package.
> + *
> + * @param p CPU policy of the domain.
> + * @param threads_per_core threads/core. Doesn't need to be a power of 2.
> + * @param cores_per_package cores/package. Doesn't need to be a power of 2.
> + * @return 0 on success; -errno on failure
> + */
> +int x86_topo_from_parts(struct cpu_policy *p,
> + unsigned int threads_per_core,
> + unsigned int cores_per_pkg);
> +
> #endif /* !XEN_LIB_X86_POLICIES_H */
>
> /*
> diff --git a/xen/lib/x86/policy.c b/xen/lib/x86/policy.c
> index 4cef658feeb8..d033ee5398dd 100644
> --- a/xen/lib/x86/policy.c
> +++ b/xen/lib/x86/policy.c
> @@ -13,6 +13,92 @@ uint32_t x86_x2apic_id_from_vcpu_id(const struct
> cpu_policy *p, uint32_t id)
> return vcpu_id * 2;
> }
>
> +static unsigned int order(unsigned int n)
> +{
> + return 8 * sizeof(n) - __builtin_clz(n);
Do we need to assert that n is not 0, otherwise the return of
__builtin_clz() is undefined.
I think the usage below doesn't pass 0 to __builtin_clz() in any case,
but better add the check IMO.
Is __builtin_clz() also available in all versions of GCC and CLANG
that we support? I have no idea when this was introduced.
> +}
> +
> +int x86_topo_from_parts(struct cpu_policy *p,
> + unsigned int threads_per_core,
> + unsigned int cores_per_pkg)
> +{
> + unsigned int threads_per_pkg = threads_per_core * cores_per_pkg;
> + unsigned int apic_id_size;
> +
> + if ( !p || !threads_per_core || !cores_per_pkg )
> + return -EINVAL;
> +
> + p->basic.max_leaf = MAX(0xb, p->basic.max_leaf);
> +
> + memset(p->topo.raw, 0, sizeof(p->topo.raw));
> +
> + /* thread level */
> + p->topo.subleaf[0].nr_logical = threads_per_core;
> + p->topo.subleaf[0].id_shift = 0;
> + p->topo.subleaf[0].level = 0;
> + p->topo.subleaf[0].type = 1;
> + if ( threads_per_core > 1 )
> + p->topo.subleaf[0].id_shift = order(threads_per_core - 1);
> +
> + /* core level */
> + p->topo.subleaf[1].nr_logical = cores_per_pkg;
> + if ( p->x86_vendor == X86_VENDOR_INTEL )
> + p->topo.subleaf[1].nr_logical = threads_per_pkg;
> + p->topo.subleaf[1].id_shift = p->topo.subleaf[0].id_shift;
> + p->topo.subleaf[1].level = 1;
> + p->topo.subleaf[1].type = 2;
> + if ( cores_per_pkg > 1 )
> + p->topo.subleaf[1].id_shift += order(cores_per_pkg - 1);
> +
> + apic_id_size = p->topo.subleaf[1].id_shift;
> +
> + /*
> + * Contrary to what the name might seem to imply. HTT is an enabler for
> + * SMP and there's no harm in setting it even with a single vCPU.
> + */
> + p->basic.htt = true;
> + p->basic.lppp = MIN(0xff, p->basic.lppp);
> +
> + switch ( p->x86_vendor )
> + {
> + case X86_VENDOR_INTEL: {
> + struct cpuid_cache_leaf *sl = p->cache.subleaf;
Newline please.
> + for ( size_t i = 0; sl->type &&
> + i < ARRAY_SIZE(p->cache.raw); i++, sl++ )
> + {
> + sl->cores_per_package = cores_per_pkg - 1;
> + sl->threads_per_cache = threads_per_core - 1;
> + if ( sl->type == 3 /* unified cache */ )
> + sl->threads_per_cache = threads_per_pkg - 1;
> + }
> + break;
> + }
Newline here also.
> + case X86_VENDOR_AMD:
> + case X86_VENDOR_HYGON:
> + /* Expose p->basic.lppp */
> + p->extd.cmp_legacy = true;
> +
> + /* Clip NC to the maximum value it can hold */
> + p->extd.nc = 0xff;
> + if ( threads_per_pkg <= 0xff )
> + p->extd.nc = threads_per_pkg - 1;
> +
> + /* TODO: Expose leaf e1E */
> + p->extd.topoext = false;
> +
> + /*
> + * Clip APIC ID to 8 bits, as that's what high core-count
> machines do
Overly long line? And missing full stop.
> + *
> + * That what AMD EPYC 9654 does with >256 CPUs
^ That's
Thanks, Roger.
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |