Xen project Mailing List

[Xen-devel] [RFC PATCH 3/7] xen: psr: reserve an RMID for each core

This allows for a new item to be passed as part of the psr= boot option: "percpu_cmt". If that is specified, Xen tries, at boot time, to associate an RMID to each core. XXX This all looks rather straightforward, if it weren't for the fact that it is, apparently, more common than I though to run out of RMID. For example, on a dev box we have in Cambridge, there are 144 pCPUs and only 71 RMIDs. In this preliminary version, nothing particularly smart happens if we run out of RMIDs, we just fail attaching the remaining cores and that's it. In future, I'd probably like to: + check whether the operation have any chance to succeed up front (by comparing number of pCPUs with available RMIDs) + on unexpected failure, rollback everything... it seems to make more sense to me than just leaving the system half configured for per-cpu CMT Thoughts? XXX Another idea I just have is to allow the user to somehow specify a different 'granularity'. Something like allowing 'percpu_cmt'|'percore_cmt'|'persocket_cmt' with the following meaning: + 'percpu_cmt': as in this patch + 'percore_cmt': same RMID to hthreads of the same core + 'persocket_cmt': same RMID to all cores of the same socket. 'percore_cmt' would only allow gathering info on a per-core basis... still better than nothing if we do not have enough RMIDs for each pCPUs. 'persocket_cmt' would basically only allow to track the amount of free L3 on each socket (by subtracting the monitored value from the total). Again, still better than nothing, would use very few RMIDs, and I could think of ways of using this information in a few places in the scheduler... Again, thought? XXX Finally, when a domain with its own RMID executes on a core that also has its own RMID, domain monitoring just overrides per-CPU monitoring. That means the cache occupancy reported fo that pCPU is not accurate. For reasons why this situation is difficult to deal with properly, see the document in the cover letter. Ideas on how to deal with this, either about how to make it work or how to handle the thing from a 'policying' perspective (i.e., which one mechanism should be disabled or penalized?), are very welcome Signed-off-by: Dario Faggioli <dario.faggioli@xxxxxxxxxx> --- xen/arch/x86/psr.c | 72 ++++++++++++++++++++++++++++++++++++--------- xen/include/asm-x86/psr.h | 11 ++++++- 2 files changed, 67 insertions(+), 16 deletions(-) diff --git a/xen/arch/x86/psr.c b/xen/arch/x86/psr.c index 0f2a6ce..a71391c 100644 --- a/xen/arch/x86/psr.c +++ b/xen/arch/x86/psr.c @@ -26,10 +26,13 @@ struct psr_assoc { struct psr_cmt *__read_mostly psr_cmt; static bool_t __initdata opt_psr; +static bool_t __initdata opt_cpu_cmt; static unsigned int __initdata opt_rmid_max = 255; static uint64_t rmid_mask; static DEFINE_PER_CPU(struct psr_assoc, psr_assoc); +DEFINE_PER_CPU(unsigned int, pcpu_rmid); + static void __init parse_psr_param(char *s) { char *ss, *val_str; @@ -57,6 +60,8 @@ static void __init parse_psr_param(char *s) val_str); } } + else if ( !strcmp(s, "percpu_cmt") ) + opt_cpu_cmt = 1; else if ( val_str && !strcmp(s, "rmid_max") ) opt_rmid_max = simple_strtoul(val_str, NULL, 0); @@ -94,8 +99,8 @@ static void __init init_psr_cmt(unsigned int rmid_max) } psr_cmt->rmid_max = min(psr_cmt->rmid_max, psr_cmt->l3.rmid_max); - psr_cmt->rmid_to_dom = xmalloc_array(domid_t, psr_cmt->rmid_max + 1UL); - if ( !psr_cmt->rmid_to_dom ) + psr_cmt->rmids = xmalloc_array(domid_t, psr_cmt->rmid_max + 1UL); + if ( !psr_cmt->rmids ) { xfree(psr_cmt); psr_cmt = NULL; @@ -107,56 +112,86 @@ static void __init init_psr_cmt(unsigned int rmid_max) * with it. To reduce the waste of RMID, reserve RMID 0 for all CPUs that * have no domain being monitored. */ - psr_cmt->rmid_to_dom[0] = DOMID_XEN; + psr_cmt->rmids[0] = DOMID_XEN; for ( rmid = 1; rmid <= psr_cmt->rmid_max; rmid++ ) - psr_cmt->rmid_to_dom[rmid] = DOMID_INVALID; + psr_cmt->rmids[rmid] = DOMID_INVALID; printk(XENLOG_INFO "Cache Monitoring Technology enabled, RMIDs: %u\n", psr_cmt->rmid_max); } -/* Called with domain lock held, no psr specific lock needed */ -int psr_alloc_rmid(struct domain *d) +static int _psr_alloc_rmid(unsigned int *trmid, unsigned int id) { unsigned int rmid; ASSERT(psr_cmt_enabled()); - if ( d->arch.psr_rmid > 0 ) + if ( *trmid > 0 ) return -EEXIST; for ( rmid = 1; rmid <= psr_cmt->rmid_max; rmid++ ) { - if ( psr_cmt->rmid_to_dom[rmid] != DOMID_INVALID ) + if ( psr_cmt->rmids[rmid] != DOMID_INVALID ) continue; - psr_cmt->rmid_to_dom[rmid] = d->domain_id; + psr_cmt->rmids[rmid] = id; break; } /* No RMID available, assign RMID=0 by default. */ if ( rmid > psr_cmt->rmid_max ) { - d->arch.psr_rmid = 0; + *trmid = 0; return -EUSERS; } - d->arch.psr_rmid = rmid; + *trmid = rmid; return 0; } +int psr_alloc_pcpu_rmid(unsigned int cpu) +{ + int ret; + + /* XXX Any locking required? */ + ret = _psr_alloc_rmid(&per_cpu(pcpu_rmid, cpu), DOMID_XEN); + if ( !ret ) + printk(XENLOG_DEBUG "using RMID %u for CPU %u\n", + per_cpu(pcpu_rmid, cpu), cpu); + + return ret; +} + /* Called with domain lock held, no psr specific lock needed */ -void psr_free_rmid(struct domain *d) +int psr_alloc_rmid(struct domain *d) { - unsigned int rmid; + return _psr_alloc_rmid(&d->arch.psr_rmid, d->domain_id); +} - rmid = d->arch.psr_rmid; +static void _psr_free_rmid(unsigned int rmid) +{ /* We do not free system reserved "RMID=0". */ if ( rmid == 0 ) return; - psr_cmt->rmid_to_dom[rmid] = DOMID_INVALID; + psr_cmt->rmids[rmid] = DOMID_INVALID; +} + +void psr_free_pcpu_rmid(unsigned int cpu) +{ + printk(XENLOG_DEBUG "Freeing RMID %u. CPU %u no longer monitored\n", + per_cpu(pcpu_rmid, cpu), cpu); + + /* XXX Any locking required? */ + _psr_free_rmid(per_cpu(pcpu_rmid, cpu)); + per_cpu(pcpu_rmid, cpu) = 0; +} + +/* Called with domain lock held, no psr specific lock needed */ +void psr_free_rmid(struct domain *d) +{ + _psr_free_rmid(d->arch.psr_rmid); d->arch.psr_rmid = 0; } @@ -184,6 +219,10 @@ static inline void psr_assoc_reg_write(struct psr_assoc *psra, uint64_t reg) static inline void psr_assoc_rmid(uint64_t *reg, unsigned int rmid) { + /* Domain not monitored: switch to the RMID of the pcpu (if any) */ + if ( rmid == 0 ) + rmid = this_cpu(pcpu_rmid); + *reg = (*reg & ~rmid_mask) | (rmid & rmid_mask); } @@ -202,6 +241,9 @@ void psr_ctxt_switch_to(struct domain *d) static void psr_cpu_init(unsigned int cpu) { + if ( opt_cpu_cmt && !psr_alloc_pcpu_rmid(cpu) ) + printk(XENLOG_INFO "pcpu %u: using RMID %u\n", + cpu, per_cpu(pcpu_rmid, cpu)); psr_assoc_init(); } diff --git a/xen/include/asm-x86/psr.h b/xen/include/asm-x86/psr.h index 585350c..b70f605 100644 --- a/xen/include/asm-x86/psr.h +++ b/xen/include/asm-x86/psr.h @@ -33,17 +33,26 @@ struct psr_cmt_l3 { struct psr_cmt { unsigned int rmid_max; unsigned int features; - domid_t *rmid_to_dom; + domid_t *rmids; struct psr_cmt_l3 l3; }; extern struct psr_cmt *psr_cmt; +/* + * RMID associated to each core, to track the cache + * occupancy contribution of the core itself. + */ +DECLARE_PER_CPU(unsigned int, pcpu_rmid); + static inline bool_t psr_cmt_enabled(void) { return !!psr_cmt; } +int psr_alloc_pcpu_rmid(unsigned int cpu); +void psr_free_pcpu_rmid(unsigned int cpu); + int psr_alloc_rmid(struct domain *d); void psr_free_rmid(struct domain *d); _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxx http://lists.xen.org/xen-devel

©2013 Xen Project, A Linux Foundation Collaborative Project. All Rights Reserved.
Linux Foundation is a registered trademark of The Linux Foundation.
Xen Project is a trademark of The Linux Foundation.