[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH v2 4/7] x86: add support for COS/CBM manangement



CAT introduces a mechanism for software to enable cache allocation based
on application priority or Class of Service(COS). Each COS can be
configured using capacity bitmasks(CBM) to represent cache capacity
and indicate the degree of overlap and isolation between COSs.

In XEN implementation, the cache allocation granularity is domain. All
VCPUs of a domain have the same COS, and therefore, correspond to the
same CBM. COS is maintained in hypervisor only while CBM is exposed to
user space directly to allow getting/setting domain's cache capacity.

Both CBM/COS may be socket-different for the same domain.

General Cache Allocation Technology(CAT) information such as maximum COS
and CBM length are exposed to user space by a SYSCTRL hypercall, to help
to construct the CBM from user side.

Signed-off-by: Chao Peng <chao.p.peng@xxxxxxxxxxxxxxx>
---
Changes in v2:
* set d->arch.psr_cos_ids when free it.
* check zero_bit < cbm_len when use it.
* change cpumask_check assertion to return error code.
---
 xen/arch/x86/domain.c           |   6 +-
 xen/arch/x86/domctl.c           |  18 ++++
 xen/arch/x86/psr.c              | 222 +++++++++++++++++++++++++++++++++++++++-
 xen/arch/x86/sysctl.c           |  20 ++++
 xen/include/asm-x86/domain.h    |   5 +-
 xen/include/asm-x86/msr-index.h |   1 +
 xen/include/asm-x86/psr.h       |   8 ++
 xen/include/public/domctl.h     |  12 +++
 xen/include/public/sysctl.h     |  16 +++
 9 files changed, 305 insertions(+), 3 deletions(-)

diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c
index 0b5374a..c9be83b 100644
--- a/xen/arch/x86/domain.c
+++ b/xen/arch/x86/domain.c
@@ -615,6 +615,9 @@ int arch_domain_create(struct domain *d, unsigned int 
domcr_flags)
         /* 64-bit PV guest by default. */
         d->arch.is_32bit_pv = d->arch.has_32bit_shinfo = 0;
 
+    if ( (rc = psr_domain_init(d)) != 0 )
+        goto fail;
+
     /* initialize default tsc behavior in case tools don't */
     tsc_set_info(d, TSC_MODE_DEFAULT, 0UL, 0, 0);
     spin_lock_init(&d->arch.vtsc_lock);
@@ -633,6 +636,7 @@ int arch_domain_create(struct domain *d, unsigned int 
domcr_flags)
     free_perdomain_mappings(d);
     if ( is_pv_domain(d) )
         free_xenheap_page(d->arch.pv_domain.gdt_ldt_l1tab);
+    psr_domain_free(d);
     return rc;
 }
 
@@ -656,7 +660,7 @@ void arch_domain_destroy(struct domain *d)
     free_xenheap_page(d->shared_info);
     cleanup_domain_irq_mapping(d);
 
-    psr_free_rmid(d);
+    psr_domain_free(d);
 }
 
 void arch_domain_shutdown(struct domain *d)
diff --git a/xen/arch/x86/domctl.c b/xen/arch/x86/domctl.c
index a1c5db0..c9f0f4f 100644
--- a/xen/arch/x86/domctl.c
+++ b/xen/arch/x86/domctl.c
@@ -1324,6 +1324,24 @@ long arch_do_domctl(
         }
         break;
 
+    case XEN_DOMCTL_psr_cat_op:
+        switch ( domctl->u.psr_cat_op.cmd )
+        {
+        case XEN_DOMCTL_PSR_CAT_OP_SET_L3_CBM:
+            ret = psr_set_l3_cbm(d, domctl->u.psr_cat_op.target,
+                                 domctl->u.psr_cat_op.data);
+            break;
+        case XEN_DOMCTL_PSR_CAT_OP_GET_L3_CBM:
+            ret = psr_get_l3_cbm(d, domctl->u.psr_cat_op.target,
+                                 &domctl->u.psr_cat_op.data);
+            copyback = 1;
+            break;
+        default:
+            ret = -EOPNOTSUPP;
+            break;
+        }
+        break;
+
     default:
         ret = iommu_do_domctl(domctl, d, u_domctl);
         break;
diff --git a/xen/arch/x86/psr.c b/xen/arch/x86/psr.c
index 5946122..af54aeb 100644
--- a/xen/arch/x86/psr.c
+++ b/xen/arch/x86/psr.c
@@ -21,11 +21,17 @@
 #define PSR_CMT        (1<<0)
 #define PSR_CAT        (1<<1)
 
+struct psr_cat_cbm {
+    unsigned int ref;
+    uint64_t cbm;
+};
+
 struct psr_cat_socket_info {
     bool_t initialized;
     bool_t enabled;
     unsigned int cbm_len;
     unsigned int cos_max;
+    struct psr_cat_cbm *cos_cbm_map;
 };
 
 struct psr_assoc {
@@ -52,6 +58,16 @@ static unsigned int get_max_socket(void)
     return socket;
 }
 
+static unsigned int get_socket_cpu(unsigned int socket)
+{
+    unsigned int cpu;
+
+    for_each_online_cpu ( cpu )
+       if ( cpu_to_socket(cpu) == socket )
+           return cpu;
+    return nr_cpu_ids;
+}
+
 static void __init parse_psr_bool(char* s, char* value, char* feature, int bit)
 {
     if ( !strcmp(s, feature) )
@@ -233,11 +249,204 @@ void psr_ctxt_switch_to(struct domain *d)
     psr_assoc_reg_write(psra, reg);
 }
 
+static int get_cat_socket_info(unsigned int socket,
+                               struct psr_cat_socket_info **info)
+{
+    if ( !cat_socket_info )
+        return -ENODEV;
+
+    if ( socket >= opt_num_sockets )
+        return -EBADSLT;
+
+    if ( !cat_socket_info[socket].enabled )
+        return -ENOENT;
+
+    *info = cat_socket_info + socket;
+    return 0;
+}
+
+int psr_get_cat_l3_info(unsigned int socket, uint32_t *cbm_len,
+                        uint32_t *cos_max)
+{
+    struct psr_cat_socket_info *info;
+    int ret = get_cat_socket_info(socket, &info);
+
+    if ( ret )
+        return ret;
+
+    *cbm_len = info->cbm_len;
+    *cos_max = info->cos_max;
+
+    return 0;
+}
+
+int psr_get_l3_cbm(struct domain *d, unsigned int socket, uint64_t *cbm)
+{
+    unsigned int cos;
+    struct psr_cat_socket_info *info;
+    int ret = get_cat_socket_info(socket, &info);
+
+    if ( ret )
+        return ret;
+
+    cos = d->arch.psr_cos_ids[socket];
+    *cbm = info->cos_cbm_map[cos].cbm;
+    return 0;
+}
+
+static bool_t psr_check_cbm(unsigned int cbm_len, uint64_t cbm)
+{
+    unsigned int first_bit, zero_bit;
+
+    /* Set bits should only in the range of [0, cbm_len). */
+    if ( cbm & (~0ull << cbm_len) )
+        return 0;
+
+    /* At least two contiguous bits need to be set. */
+    if ( hweight_long(cbm) < 2 )
+        return 0;
+
+    first_bit = find_first_bit(&cbm, cbm_len);
+    zero_bit = find_next_zero_bit(&cbm, cbm_len, first_bit);
+
+    /* Set bits should be contiguous. */
+    if ( zero_bit < cbm_len &&
+         find_next_bit(&cbm, cbm_len, zero_bit) < cbm_len )
+        return 0;
+
+    return 1;
+}
+
+struct cos_cbm_info
+{
+    unsigned int cos;
+    uint64_t cbm;
+};
+
+static void do_write_l3_cbm(void *data)
+{
+    struct cos_cbm_info *info = data;
+    wrmsrl(MSR_IA32_L3_MASK(info->cos), info->cbm);
+}
+
+static int write_l3_cbm(unsigned int socket, unsigned int cos, uint64_t cbm)
+{
+    struct cos_cbm_info info = {.cos = cos, .cbm = cbm };
+
+    if ( socket == cpu_to_socket(smp_processor_id()) )
+        do_write_l3_cbm(&info);
+    else
+    {
+        unsigned int cpu = get_socket_cpu(socket);
+
+        if ( cpu == nr_cpu_ids )
+            return -EBADSLT;
+        on_selected_cpus(cpumask_of(cpu), do_write_l3_cbm, &info, 1);
+    }
+
+    return 0;
+}
+
+int psr_set_l3_cbm(struct domain *d, unsigned int socket, uint64_t cbm)
+{
+    unsigned int old_cos, cos;
+    struct psr_cat_cbm *map, *find;
+    struct psr_cat_socket_info *info;
+    int ret = get_cat_socket_info(socket, &info);
+
+    if ( ret )
+        return ret;
+
+    if ( !psr_check_cbm(info->cbm_len, cbm) )
+        return -EINVAL;
+
+    old_cos = d->arch.psr_cos_ids[socket];
+    map = info->cos_cbm_map;
+    find = NULL;
+
+    for ( cos = 0; cos <= info->cos_max; cos++ )
+    {
+        /* If still not found, then keep unused one. */
+        if ( !find && cos != 0 && map[cos].ref == 0 )
+            find = map + cos;
+        else if ( map[cos].cbm == cbm )
+        {
+            if ( unlikely(cos == old_cos) )
+                return -EEXIST;
+            find = map + cos;
+            break;
+        }
+    }
+
+    /* If old cos is referred only by the domain, then use it. */
+    if ( !find && map[old_cos].ref == 1 )
+        find = map + old_cos;
+
+    if ( !find )
+        return -EUSERS;
+
+    cos = find - map;
+    if ( find->cbm != cbm )
+    {
+        ret = write_l3_cbm(socket, cos, cbm);
+        if ( ret )
+            return ret;
+        find->cbm = cbm;
+    }
+    find->ref++;
+    map[old_cos].ref--;
+    d->arch.psr_cos_ids[socket] = cos;
+    return 0;
+}
+
+/* Called with domain lock held, no psr specific lock needed */
+static void psr_free_cos(struct domain *d)
+{
+    unsigned int socket;
+    unsigned int cos;
+    struct psr_cat_cbm *map;
+
+    if( !d->arch.psr_cos_ids )
+        return;
+
+    for ( socket = 0; socket < opt_num_sockets; socket++)
+    {
+        cos = d->arch.psr_cos_ids[socket];
+        if ( cos == 0 )
+            continue;
+
+        map = cat_socket_info[socket].cos_cbm_map;
+        if ( map )
+            map[cos].ref--;
+    }
+
+    xfree(d->arch.psr_cos_ids);
+    d->arch.psr_cos_ids = NULL;
+}
+
+int psr_domain_init(struct domain *d)
+{
+    if ( cat_socket_info )
+    {
+        d->arch.psr_cos_ids = xzalloc_array(unsigned int, opt_num_sockets);
+        if ( !d->arch.psr_cos_ids )
+            return -ENOMEM;
+    }
+
+    return 0;
+}
+
+void psr_domain_free(struct domain *d)
+{
+    psr_free_rmid(d);
+    psr_free_cos(d);
+}
+
 static void cat_cpu_init(unsigned int cpu)
 {
     unsigned int eax, ebx, ecx, edx;
     struct psr_cat_socket_info *info;
-    unsigned int socket;
+    unsigned int socket, cos;
     const struct cpuinfo_x86 *c;
 
     socket = cpu_to_socket(cpu);
@@ -265,6 +474,17 @@ static void cat_cpu_init(unsigned int cpu)
         info->cbm_len = (eax & 0x1f) + 1;
         info->cos_max = (edx & 0xffff);
 
+        info->cos_cbm_map = xmalloc_array(struct psr_cat_cbm,
+                                          info->cos_max + 1UL);
+        if ( !info->cos_cbm_map )
+            return;
+
+        for ( cos = 0; cos <= info->cos_max; cos++ )
+            info->cos_cbm_map[cos].ref = 0;
+
+        /* cos=0 is reserved as default cbm(all ones). */
+        info->cos_cbm_map[0].cbm =  (1ull << info->cbm_len) - 1;
+
         info->enabled = 1;
         printk(XENLOG_DEBUG "CAT: enabled on socket %u, cos_max:%u, 
cbm_len:%u\n",
                socket, info->cos_max, info->cbm_len);
diff --git a/xen/arch/x86/sysctl.c b/xen/arch/x86/sysctl.c
index 611a291..096dc31 100644
--- a/xen/arch/x86/sysctl.c
+++ b/xen/arch/x86/sysctl.c
@@ -171,6 +171,26 @@ long arch_do_sysctl(
 
         break;
 
+    case XEN_SYSCTL_psr_cat_op:
+        switch ( sysctl->u.psr_cat_op.cmd )
+        {
+        case XEN_SYSCTL_PSR_CAT_get_l3_info:
+            ret = psr_get_cat_l3_info(sysctl->u.psr_cat_op.target,
+                                      &sysctl->u.psr_cat_op.u.l3_info.cbm_len,
+                                      &sysctl->u.psr_cat_op.u.l3_info.cos_max);
+            if ( ret )
+                break;
+
+            if ( __copy_to_guest(u_sysctl, sysctl, 1) )
+                ret = -EFAULT;
+
+            break;
+        default:
+            ret = -ENOSYS;
+            break;
+        }
+        break;
+
     default:
         ret = -ENOSYS;
         break;
diff --git a/xen/include/asm-x86/domain.h b/xen/include/asm-x86/domain.h
index 9cdffa8..9c4d0e6 100644
--- a/xen/include/asm-x86/domain.h
+++ b/xen/include/asm-x86/domain.h
@@ -333,7 +333,10 @@ struct arch_domain
     struct e820entry *e820;
     unsigned int nr_e820;
 
-    unsigned int psr_rmid; /* RMID assigned to the domain for CMT */
+    /* RMID assigned to the domain for CMT */
+    unsigned int psr_rmid;
+    /* COS assigned to the domain for each socket */
+    unsigned int *psr_cos_ids;
 
     /* Shared page for notifying that explicit PIRQ EOI is required. */
     unsigned long *pirq_eoi_map;
diff --git a/xen/include/asm-x86/msr-index.h b/xen/include/asm-x86/msr-index.h
index 83f2f70..b96f0f6 100644
--- a/xen/include/asm-x86/msr-index.h
+++ b/xen/include/asm-x86/msr-index.h
@@ -327,6 +327,7 @@
 #define MSR_IA32_CMT_EVTSEL            0x00000c8d
 #define MSR_IA32_CMT_CTR               0x00000c8e
 #define MSR_IA32_PSR_ASSOC             0x00000c8f
+#define MSR_IA32_L3_MASK(n)            (0x00000c90 + (n))
 
 /* Intel Model 6 */
 #define MSR_P6_PERFCTR(n)              (0x000000c1 + (n))
diff --git a/xen/include/asm-x86/psr.h b/xen/include/asm-x86/psr.h
index 585350c..74f150b 100644
--- a/xen/include/asm-x86/psr.h
+++ b/xen/include/asm-x86/psr.h
@@ -49,6 +49,14 @@ void psr_free_rmid(struct domain *d);
 
 void psr_ctxt_switch_to(struct domain *d);
 
+int psr_get_cat_l3_info(unsigned int socket, uint32_t *cbm_len,
+                        uint32_t *cos_max);
+int psr_get_l3_cbm(struct domain *d, unsigned int socket, uint64_t *cbm);
+int psr_set_l3_cbm(struct domain *d, unsigned int socket, uint64_t cbm);
+
+int psr_domain_init(struct domain *d);
+void psr_domain_free(struct domain *d);
+
 #endif /* __ASM_PSR_H__ */
 
 /*
diff --git a/xen/include/public/domctl.h b/xen/include/public/domctl.h
index ca0e51e..7c7baed 100644
--- a/xen/include/public/domctl.h
+++ b/xen/include/public/domctl.h
@@ -1005,6 +1005,16 @@ struct xen_domctl_psr_cmt_op {
 typedef struct xen_domctl_psr_cmt_op xen_domctl_psr_cmt_op_t;
 DEFINE_XEN_GUEST_HANDLE(xen_domctl_psr_cmt_op_t);
 
+struct xen_domctl_psr_cat_op {
+#define XEN_DOMCTL_PSR_CAT_OP_SET_L3_CBM     0
+#define XEN_DOMCTL_PSR_CAT_OP_GET_L3_CBM     1
+    uint32_t cmd;       /* IN: XEN_DOMCTL_PSR_CAT_OP_* */
+    uint32_t target;    /* IN: socket or cpu to be operated on */
+    uint64_t data;      /* IN/OUT */
+};
+typedef struct xen_domctl_psr_cat_op xen_domctl_psr_cat_op_t;
+DEFINE_XEN_GUEST_HANDLE(xen_domctl_psr_cat_op_t);
+
 struct xen_domctl {
     uint32_t cmd;
 #define XEN_DOMCTL_createdomain                   1
@@ -1080,6 +1090,7 @@ struct xen_domctl {
 #define XEN_DOMCTL_setvnumainfo                  74
 #define XEN_DOMCTL_psr_cmt_op                    75
 #define XEN_DOMCTL_arm_configure_domain          76
+#define XEN_DOMCTL_psr_cat_op                    77
 #define XEN_DOMCTL_gdbsx_guestmemio            1000
 #define XEN_DOMCTL_gdbsx_pausevcpu             1001
 #define XEN_DOMCTL_gdbsx_unpausevcpu           1002
@@ -1145,6 +1156,7 @@ struct xen_domctl {
         struct xen_domctl_gdbsx_domstatus   gdbsx_domstatus;
         struct xen_domctl_vnuma             vnuma;
         struct xen_domctl_psr_cmt_op        psr_cmt_op;
+        struct xen_domctl_psr_cat_op        psr_cat_op;
         uint8_t                             pad[128];
     } u;
 };
diff --git a/xen/include/public/sysctl.h b/xen/include/public/sysctl.h
index 8552dc6..9d8ed10 100644
--- a/xen/include/public/sysctl.h
+++ b/xen/include/public/sysctl.h
@@ -656,6 +656,20 @@ struct xen_sysctl_psr_cmt_op {
 typedef struct xen_sysctl_psr_cmt_op xen_sysctl_psr_cmt_op_t;
 DEFINE_XEN_GUEST_HANDLE(xen_sysctl_psr_cmt_op_t);
 
+#define XEN_SYSCTL_PSR_CAT_get_l3_info               0
+struct xen_sysctl_psr_cat_op {
+    uint32_t cmd;       /* IN: XEN_SYSCTL_PSR_CAT_* */
+    uint32_t target;    /* IN: socket or cpu to be operated on */
+    union {
+        struct {
+            uint32_t cbm_len;   /* OUT: CBM length */
+            uint32_t cos_max;   /* OUT: Maximum COS */
+        } l3_info;
+    } u;
+};
+typedef struct xen_sysctl_psr_cat_op xen_sysctl_psr_cat_op_t;
+DEFINE_XEN_GUEST_HANDLE(xen_sysctl_psr_cat_op_t);
+
 struct xen_sysctl {
     uint32_t cmd;
 #define XEN_SYSCTL_readconsole                    1
@@ -678,6 +692,7 @@ struct xen_sysctl {
 #define XEN_SYSCTL_scheduler_op                  19
 #define XEN_SYSCTL_coverage_op                   20
 #define XEN_SYSCTL_psr_cmt_op                    21
+#define XEN_SYSCTL_psr_cat_op                    22
     uint32_t interface_version; /* XEN_SYSCTL_INTERFACE_VERSION */
     union {
         struct xen_sysctl_readconsole       readconsole;
@@ -700,6 +715,7 @@ struct xen_sysctl {
         struct xen_sysctl_scheduler_op      scheduler_op;
         struct xen_sysctl_coverage_op       coverage_op;
         struct xen_sysctl_psr_cmt_op        psr_cmt_op;
+        struct xen_sysctl_psr_cat_op        psr_cat_op;
         uint8_t                             pad[128];
     } u;
 };
-- 
1.9.1


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.