[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[RFC PATCH v2 1/2] xen/memory : Add a stats_table resource type



This commit proposes a new mechanism to query the RUNSTATE_running counter for
a given vcpu from a dom0 userspace application. This commit proposes to expose
that counter by using the acquire_resource interface. The current mechanism
relies on the XEN_DOMCTL_getvcpuinfo and holds a single global domctl_lock for
the entire hypercall; and iterate over every vcpu in the system for every
update thus impacting operations that share that lock.

This commit proposes to expose vcpu RUNSTATE_running via the
xenforeignmemory interface thus preventing to issue the hypercall and holding
the lock. For that purpose, a new resource type named stats_table is added. The
first frame of this resource stores per-vcpu counters. The frame has one entry
of type struct vcpu_stats per vcpu. The allocation of this frame only happens
if the resource is requested. The frame is released after the domain is
destroyed.

Note that the updating of this counter is in a hot path, thus, in this commit,
copying only happens if it is specifically required.

Note that the exposed structure is extensible in two ways. First, the structure
vcpu_stats can be extended with new per-vcpu counters while it fits in a frame.
Second, new frames can be added in case new counters are required.

Signed-off-by: Matias Ezequiel Vara Larsen <matias.vara@xxxxxxxx>
---
Changes in v2:
- rework to ensure that guest reads a coherent value by using a version
  number in the vcpu_stats structure
- add version to the vcpu_stats structure

Changes in v1:
- rework the allocation and releasing of the frames
- use the zero frame for per-vcpu counters that are listed as an array
- allocate vcpu stats frames only when the resource is requested
- rewrite commit message
- add the vcpu_stats structure to keep per-vcpu counters
- add the shared_vcpustatspage to keep an array of per-vcpu counters for a
  given domain
- declare the structures in a public header 
- define the vcpustats_page in the domain structure
---
 xen/arch/x86/hvm/hvm.c      |  2 +
 xen/common/memory.c         | 94 +++++++++++++++++++++++++++++++++++++
 xen/common/sched/core.c     | 16 +++++++
 xen/include/public/memory.h |  3 ++
 xen/include/public/vcpu.h   | 16 +++++++
 xen/include/xen/mm.h        |  2 +
 xen/include/xen/sched.h     |  5 ++
 7 files changed, 138 insertions(+)

diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c
index ddd001a6ad..1ef6cb5ff0 100644
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -741,6 +741,8 @@ void hvm_domain_relinquish_resources(struct domain *d)
 
     ioreq_server_destroy_all(d);
 
+    stats_free_vcpu_mfn(d);
+
     msixtbl_pt_cleanup(d);
 
     /* Stop all asynchronous timer actions. */
diff --git a/xen/common/memory.c b/xen/common/memory.c
index 297b98a562..749486d5d4 100644
--- a/xen/common/memory.c
+++ b/xen/common/memory.c
@@ -1078,6 +1078,12 @@ unsigned int ioreq_server_max_frames(const struct domain 
*d)
     return nr;
 }
 
+unsigned int stats_table_max_frames(const struct domain *d)
+{
+    /* One frame per 512 vcpus. */
+    return 1;
+}
+
 /*
  * Return 0 on any kind of error.  Caller converts to -EINVAL.
  *
@@ -1099,6 +1105,9 @@ static unsigned int resource_max_frames(const struct 
domain *d,
     case XENMEM_resource_vmtrace_buf:
         return d->vmtrace_size >> PAGE_SHIFT;
 
+    case XENMEM_resource_stats_table:
+        return stats_table_max_frames(d);
+
     default:
         return -EOPNOTSUPP;
     }
@@ -1162,6 +1171,88 @@ static int acquire_vmtrace_buf(
     return nr_frames;
 }
 
+void stats_free_vcpu_mfn(struct domain * d)
+{
+    struct page_info *pg = d->vcpustats_page.pg;
+
+    if ( !pg )
+        return;
+
+    d->vcpustats_page.pg = NULL;
+
+    if ( d->vcpustats_page.va )
+        unmap_domain_page_global(d->vcpustats_page.va);
+
+    d->vcpustats_page.va = NULL;
+
+    put_page_alloc_ref(pg);
+    put_page_and_type(pg);
+}
+
+static int stats_vcpu_alloc_mfn(struct domain *d)
+{
+    struct page_info *pg;
+
+    pg = alloc_domheap_page(d, MEMF_no_refcount);
+
+    if ( !pg )
+        return -ENOMEM;
+
+    if ( !get_page_and_type(pg, d, PGT_writable_page) ) {
+        put_page_alloc_ref(pg);
+        return -ENODATA;
+    }
+
+    d->vcpustats_page.va = __map_domain_page_global(pg);
+    if ( !d->vcpustats_page.va )
+        goto fail;
+
+    d->vcpustats_page.pg = pg;
+    clear_page(d->vcpustats_page.va);
+    return 1;
+
+fail:
+    put_page_alloc_ref(pg);
+    put_page_and_type(pg);
+
+    return -ENOMEM;
+}
+
+static int acquire_stats_table(struct domain *d,
+                                unsigned int id,
+                                unsigned int frame,
+                                unsigned int nr_frames,
+                                xen_pfn_t mfn_list[])
+{
+    mfn_t mfn;
+    int rc;
+    unsigned int i;
+
+    if ( !d )
+        return -ENOENT;
+
+    for ( i = 0; i < nr_frames; i++ )
+    {
+        switch ( i )
+        {
+        case XENMEM_resource_stats_frame_vcpustats:
+            if ( !d->vcpustats_page.pg ) {
+                rc = stats_vcpu_alloc_mfn(d);
+                if ( rc < 1 )
+                    return rc;
+            }
+            mfn = page_to_mfn(d->vcpustats_page.pg);
+            mfn_list[i] = mfn_x(mfn);
+            break;
+
+        default:
+            return -EINVAL;
+        }
+    }
+
+    return nr_frames;
+}
+
 /*
  * Returns -errno on error, or positive in the range [1, nr_frames] on
  * success.  Returning less than nr_frames contitutes a request for a
@@ -1182,6 +1273,9 @@ static int _acquire_resource(
     case XENMEM_resource_vmtrace_buf:
         return acquire_vmtrace_buf(d, id, frame, nr_frames, mfn_list);
 
+    case XENMEM_resource_stats_table:
+        return acquire_stats_table(d, id, frame, nr_frames, mfn_list);
+
     default:
         return -EOPNOTSUPP;
     }
diff --git a/xen/common/sched/core.c b/xen/common/sched/core.c
index 8f4b1ca10d..3543a531a1 100644
--- a/xen/common/sched/core.c
+++ b/xen/common/sched/core.c
@@ -264,6 +264,8 @@ static inline void vcpu_runstate_change(
 {
     s_time_t delta;
     struct sched_unit *unit = v->sched_unit;
+    shared_vcpustatspage_t * vcpustats_va;
+    struct domain *d = v->domain;
 
     ASSERT(spin_is_locked(get_sched_res(v->processor)->schedule_lock));
     if ( v->runstate.state == new_state )
@@ -287,6 +289,20 @@ static inline void vcpu_runstate_change(
     }
 
     v->runstate.state = new_state;
+
+    vcpustats_va = (shared_vcpustatspage_t*)d->vcpustats_page.va;
+    if ( vcpustats_va )
+    {
+       vcpustats_va->vcpu_info[v->vcpu_id].version =
+           version_update_begin(vcpustats_va->vcpu_info[v->vcpu_id].version);
+        smp_wmb();
+        memcpy(&vcpustats_va->vcpu_info[v->vcpu_id].runstate_running_time,
+               &v->runstate.time[RUNSTATE_running],
+               sizeof(v->runstate.time[RUNSTATE_running]));
+        smp_wmb();
+        vcpustats_va->vcpu_info[v->vcpu_id].version =
+            version_update_end(vcpustats_va->vcpu_info[v->vcpu_id].version);
+    }
 }
 
 void sched_guest_idle(void (*idle) (void), unsigned int cpu)
diff --git a/xen/include/public/memory.h b/xen/include/public/memory.h
index 50e73eef98..e1a10b8b97 100644
--- a/xen/include/public/memory.h
+++ b/xen/include/public/memory.h
@@ -626,6 +626,7 @@ struct xen_mem_acquire_resource {
 #define XENMEM_resource_ioreq_server 0
 #define XENMEM_resource_grant_table 1
 #define XENMEM_resource_vmtrace_buf 2
+#define XENMEM_resource_stats_table 3
 
     /*
      * IN - a type-specific resource identifier, which must be zero
@@ -683,6 +684,8 @@ struct xen_mem_acquire_resource {
 typedef struct xen_mem_acquire_resource xen_mem_acquire_resource_t;
 DEFINE_XEN_GUEST_HANDLE(xen_mem_acquire_resource_t);
 
+#define XENMEM_resource_stats_frame_vcpustats 0
+
 /*
  * XENMEM_get_vnumainfo used by guest to get
  * vNUMA topology from hypervisor.
diff --git a/xen/include/public/vcpu.h b/xen/include/public/vcpu.h
index 3623af932f..5c1812dfd2 100644
--- a/xen/include/public/vcpu.h
+++ b/xen/include/public/vcpu.h
@@ -235,6 +235,22 @@ struct vcpu_register_time_memory_area {
 typedef struct vcpu_register_time_memory_area vcpu_register_time_memory_area_t;
 DEFINE_XEN_GUEST_HANDLE(vcpu_register_time_memory_area_t);
 
+struct vcpu_stats{
+    /* If the least-significant bit of the version number is set then an update
+     * is in progress and the guest must wait to read a consistent set of 
values
+     * This mechanism is similar to Linux's seqlock.
+     */
+    uint32_t version;
+    uint32_t pad0;
+    uint64_t runstate_running_time;
+};
+
+struct shared_vcpustatspage {
+    struct vcpu_stats vcpu_info[1];
+};
+
+typedef struct shared_vcpustatspage shared_vcpustatspage_t;
+
 #endif /* __XEN_PUBLIC_VCPU_H__ */
 
 /*
diff --git a/xen/include/xen/mm.h b/xen/include/xen/mm.h
index 667f9dac83..d1ca8b9aa8 100644
--- a/xen/include/xen/mm.h
+++ b/xen/include/xen/mm.h
@@ -134,6 +134,8 @@ int assign_pages(
 /* Dump info to serial console */
 void arch_dump_shared_mem_info(void);
 
+void stats_free_vcpu_mfn(struct domain * d);
+
 /*
  * Extra fault info types which are used to further describe
  * the source of an access violation.
diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h
index 5485d08afb..d9551ce35f 100644
--- a/xen/include/xen/sched.h
+++ b/xen/include/xen/sched.h
@@ -577,6 +577,11 @@ struct domain
         struct ioreq_server     *server[MAX_NR_IOREQ_SERVERS];
     } ioreq_server;
 #endif
+    /* Page that hosts vcpu stats */
+    struct {
+        struct page_info *pg;
+        void *va;
+    } vcpustats_page;
 };
 
 static inline struct page_list_head *page_to_list(
-- 
2.25.1




 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.