|
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] [RFC XEN PATCH 03/16] xen/x86: add a hypercall XENPF_pmem_add to report host pmem regions
Xen hypervisor does not include a pmem driver. Instead, it relies on the
pmem driver in Dom0 to report the PFN ranges of the entire pmem region,
its reserved area and data area via XENPF_pmem_add. The reserved area is
used by Xen hypervisor to place the frame table and M2P table, and is
disallowed to be accessed from Dom0 once it's reported.
Signed-off-by: Haozhong Zhang <haozhong.zhang@xxxxxxxxx>
---
Cc: Jan Beulich <jbeulich@xxxxxxxx>
Cc: Andrew Cooper <andrew.cooper3@xxxxxxxxxx>
Cc: Daniel De Graaf <dgdegra@xxxxxxxxxxxxx>
---
xen/arch/x86/Makefile | 1 +
xen/arch/x86/platform_hypercall.c | 7 ++
xen/arch/x86/pmem.c | 161 ++++++++++++++++++++++++++++++++++++++
xen/arch/x86/x86_64/mm.c | 54 +++++++++++++
xen/include/asm-x86/mm.h | 4 +
xen/include/public/platform.h | 14 ++++
xen/include/xen/pmem.h | 31 ++++++++
xen/xsm/flask/hooks.c | 1 +
8 files changed, 273 insertions(+)
create mode 100644 xen/arch/x86/pmem.c
create mode 100644 xen/include/xen/pmem.h
diff --git a/xen/arch/x86/Makefile b/xen/arch/x86/Makefile
index 931917d..9cf2da1 100644
--- a/xen/arch/x86/Makefile
+++ b/xen/arch/x86/Makefile
@@ -67,6 +67,7 @@ obj-$(CONFIG_TBOOT) += tboot.o
obj-y += hpet.o
obj-y += vm_event.o
obj-y += xstate.o
+obj-y += pmem.o
x86_emulate.o: x86_emulate/x86_emulate.c x86_emulate/x86_emulate.h
diff --git a/xen/arch/x86/platform_hypercall.c
b/xen/arch/x86/platform_hypercall.c
index 0879e19..c47eea4 100644
--- a/xen/arch/x86/platform_hypercall.c
+++ b/xen/arch/x86/platform_hypercall.c
@@ -24,6 +24,7 @@
#include <xen/pmstat.h>
#include <xen/irq.h>
#include <xen/symbols.h>
+#include <xen/pmem.h>
#include <asm/current.h>
#include <public/platform.h>
#include <acpi/cpufreq/processor_perf.h>
@@ -822,6 +823,12 @@ ret_t
do_platform_op(XEN_GUEST_HANDLE_PARAM(xen_platform_op_t) u_xenpf_op)
}
break;
+ case XENPF_pmem_add:
+ ret = pmem_add(op->u.pmem_add.spfn, op->u.pmem_add.epfn,
+ op->u.pmem_add.rsv_spfn, op->u.pmem_add.rsv_epfn,
+ op->u.pmem_add.data_spfn, op->u.pmem_add.data_epfn);
+ break;
+
default:
ret = -ENOSYS;
break;
diff --git a/xen/arch/x86/pmem.c b/xen/arch/x86/pmem.c
new file mode 100644
index 0000000..70358ed
--- /dev/null
+++ b/xen/arch/x86/pmem.c
@@ -0,0 +1,161 @@
+/******************************************************************************
+ * arch/x86/pmem.c
+ *
+ * Copyright (c) 2016, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Haozhong Zhang <haozhong.zhang@xxxxxxxxx>
+ */
+
+#include <xen/guest_access.h>
+#include <xen/list.h>
+#include <xen/spinlock.h>
+#include <xen/pmem.h>
+#include <xen/iocap.h>
+#include <asm-x86/mm.h>
+
+/*
+ * All pmem regions reported from Dom0 are linked in pmem_list, which
+ * is proected by pmem_list_lock. Its entries are of type struct pmem
+ * and sorted incrementally by field spa.
+ */
+static DEFINE_SPINLOCK(pmem_list_lock);
+static LIST_HEAD(pmem_list);
+
+struct pmem {
+ struct list_head link; /* link to pmem_list */
+ unsigned long spfn; /* start PFN of the whole pmem region */
+ unsigned long epfn; /* end PFN of the whole pmem region */
+ unsigned long rsv_spfn; /* start PFN of the reserved area */
+ unsigned long rsv_epfn; /* end PFN of the reserved area */
+ unsigned long data_spfn; /* start PFN of the data area */
+ unsigned long data_epfn; /* end PFN of the data area */
+};
+
+static int is_included(unsigned long s1, unsigned long e1,
+ unsigned long s2, unsigned long e2)
+{
+ return s1 <= s2 && s2 < e2 && e2 <= e1;
+}
+
+static int is_overlaped(unsigned long s1, unsigned long e1,
+ unsigned long s2, unsigned long e2)
+{
+ return (s1 <= s2 && s2 < e1) || (s2 < s1 && s1 < e2);
+}
+
+static int check_reserved_size(unsigned long rsv_mfns, unsigned long
total_mfns)
+{
+ return rsv_mfns >=
+ ((sizeof(struct page_info) * total_mfns) >> PAGE_SHIFT) +
+ ((sizeof(*machine_to_phys_mapping) * total_mfns) >> PAGE_SHIFT);
+}
+
+static int pmem_add_check(unsigned long spfn, unsigned long epfn,
+ unsigned long rsv_spfn, unsigned long rsv_epfn,
+ unsigned long data_spfn, unsigned long data_epfn)
+{
+ if ( spfn >= epfn || rsv_spfn >= rsv_epfn || data_spfn >= data_epfn )
+ return 0;
+
+ if ( !is_included(spfn, epfn, rsv_spfn, rsv_epfn) ||
+ !is_included(spfn, epfn, data_spfn, data_epfn) )
+ return 0;
+
+ if ( is_overlaped(rsv_spfn, rsv_epfn, data_spfn, data_epfn) )
+ return 0;
+
+ if ( !check_reserved_size(rsv_epfn - rsv_spfn, epfn - spfn) )
+ return 0;
+
+ return 1;
+}
+
+static int pmem_list_add(unsigned long spfn, unsigned long epfn,
+ unsigned long rsv_spfn, unsigned long rsv_epfn,
+ unsigned long data_spfn, unsigned long data_epfn)
+{
+ struct list_head *cur;
+ struct pmem *new_pmem;
+ int ret = 0;
+
+ spin_lock(&pmem_list_lock);
+
+ list_for_each_prev(cur, &pmem_list)
+ {
+ struct pmem *cur_pmem = list_entry(cur, struct pmem, link);
+ unsigned long cur_spfn = cur_pmem->spfn;
+ unsigned long cur_epfn = cur_pmem->epfn;
+
+ if ( (cur_spfn <= spfn && spfn < cur_epfn) ||
+ (spfn <= cur_spfn && cur_spfn < epfn) )
+ {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ if ( cur_spfn < spfn )
+ break;
+ }
+
+ new_pmem = xmalloc(struct pmem);
+ if ( !new_pmem )
+ {
+ ret = -ENOMEM;
+ goto out;
+ }
+ new_pmem->spfn = spfn;
+ new_pmem->epfn = epfn;
+ new_pmem->rsv_spfn = rsv_spfn;
+ new_pmem->rsv_epfn = rsv_epfn;
+ new_pmem->data_spfn = data_spfn;
+ new_pmem->data_epfn = data_epfn;
+ list_add(&new_pmem->link, cur);
+
+ out:
+ spin_unlock(&pmem_list_lock);
+ return ret;
+}
+
+int pmem_add(unsigned long spfn, unsigned long epfn,
+ unsigned long rsv_spfn, unsigned long rsv_epfn,
+ unsigned long data_spfn, unsigned long data_epfn)
+{
+ int ret;
+
+ if ( !pmem_add_check(spfn, epfn, rsv_spfn, rsv_epfn, data_spfn, data_epfn)
)
+ return -EINVAL;
+
+ ret = pmem_setup(spfn, epfn, rsv_spfn, rsv_epfn, data_spfn, data_epfn);
+ if ( ret )
+ goto out;
+
+ ret = iomem_deny_access(current->domain, rsv_spfn, rsv_epfn);
+ if ( ret )
+ goto out;
+
+ ret = pmem_list_add(spfn, epfn, rsv_spfn, rsv_epfn, data_spfn, data_epfn);
+ if ( ret )
+ goto out;
+
+ printk(XENLOG_INFO
+ "pmem: pfns 0x%lx - 0x%lx\n"
+ " reserved 0x%lx - 0x%lx\n"
+ " data 0x%lx - 0x%lx\n",
+ spfn, epfn, rsv_spfn, rsv_epfn, data_spfn, data_epfn);
+
+ out:
+ return ret;
+}
diff --git a/xen/arch/x86/x86_64/mm.c b/xen/arch/x86/x86_64/mm.c
index 5c0f527..b1f92f6 100644
--- a/xen/arch/x86/x86_64/mm.c
+++ b/xen/arch/x86/x86_64/mm.c
@@ -1474,6 +1474,60 @@ destroy_frametable:
return ret;
}
+int pmem_setup(unsigned long spfn, unsigned long epfn,
+ unsigned long rsv_spfn, unsigned long rsv_epfn,
+ unsigned long data_spfn, unsigned long data_epfn)
+{
+ unsigned old_max = max_page, old_total = total_pages;
+ struct mem_hotadd_info info =
+ { .spfn = spfn, .epfn = epfn, .cur = spfn };
+ struct mem_hotadd_info rsv_info =
+ { .spfn = rsv_spfn, .epfn = rsv_epfn, .cur = rsv_spfn };
+ int ret;
+ unsigned long i;
+ struct page_info *pg;
+
+ if ( !mem_hotadd_check(spfn, epfn) )
+ return -EINVAL;
+
+ ret = extend_frame_table(&info, &rsv_info);
+ if ( ret )
+ goto destroy_frametable;
+
+ if ( max_page < epfn )
+ {
+ max_page = epfn;
+ max_pdx = pfn_to_pdx(max_page - 1) + 1;
+ }
+ total_pages += epfn - spfn;
+
+ set_pdx_range(spfn, epfn);
+ ret = setup_m2p_table(&info, &rsv_info);
+ if ( ret )
+ goto destroy_m2p;
+
+ share_hotadd_m2p_table(&info);
+
+ for ( i = spfn; i < epfn; i++ )
+ {
+ pg = mfn_to_page(i);
+ pg->count_info = (rsv_spfn <= i && i < rsv_info.cur) ?
+ PGC_state_inuse : PGC_state_free;
+ }
+
+ return 0;
+
+destroy_m2p:
+ destroy_m2p_mapping(&info);
+ max_page = old_max;
+ total_pages = old_total;
+ max_pdx = pfn_to_pdx(max_page - 1) + 1;
+destroy_frametable:
+ cleanup_frame_table(&info);
+
+ return ret;
+}
+
#include "compat/mm.c"
/*
diff --git a/xen/include/asm-x86/mm.h b/xen/include/asm-x86/mm.h
index b781495..e31f1c8 100644
--- a/xen/include/asm-x86/mm.h
+++ b/xen/include/asm-x86/mm.h
@@ -597,4 +597,8 @@ typedef struct mm_rwlock {
extern const char zero_page[];
+int pmem_setup(unsigned long spfn, unsigned long epfn,
+ unsigned long rsv_spfn, unsigned long rsv_epfn,
+ unsigned long data_spfn, unsigned long data_epfn);
+
#endif /* __ASM_X86_MM_H__ */
diff --git a/xen/include/public/platform.h b/xen/include/public/platform.h
index 1e6a6ce..c7e7cce 100644
--- a/xen/include/public/platform.h
+++ b/xen/include/public/platform.h
@@ -608,6 +608,19 @@ struct xenpf_symdata {
typedef struct xenpf_symdata xenpf_symdata_t;
DEFINE_XEN_GUEST_HANDLE(xenpf_symdata_t);
+#define XENPF_pmem_add 64
+struct xenpf_pmem_add {
+ /* IN variables */
+ uint64_t spfn; /* start PFN of the whole pmem region */
+ uint64_t epfn; /* end PFN of the whole pmem region */
+ uint64_t rsv_spfn; /* start PFN of the reserved area within the region */
+ uint64_t rsv_epfn; /* end PFN of the reserved area within the region */
+ uint64_t data_spfn; /* start PFN of the data area within the region */
+ uint64_t data_epfn; /* end PFN of the data area within the region */
+};
+typedef struct xenpf_pmem_add xenpf_pmem_add_t;
+DEFINE_XEN_GUEST_HANDLE(xenpf_pmem_add_t);
+
/*
* ` enum neg_errnoval
* ` HYPERVISOR_platform_op(const struct xen_platform_op*);
@@ -638,6 +651,7 @@ struct xen_platform_op {
struct xenpf_core_parking core_parking;
struct xenpf_resource_op resource_op;
struct xenpf_symdata symdata;
+ struct xenpf_pmem_add pmem_add;
uint8_t pad[128];
} u;
};
diff --git a/xen/include/xen/pmem.h b/xen/include/xen/pmem.h
new file mode 100644
index 0000000..a670ab8
--- /dev/null
+++ b/xen/include/xen/pmem.h
@@ -0,0 +1,31 @@
+/*
+ * xen/include/xen/pmem.h
+ *
+ * Copyright (c) 2016, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Haozhong Zhang <haozhong.zhang@xxxxxxxxx>
+ */
+
+#ifndef __XEN_PMEM_H__
+#define __XEN_PMEM_H__
+
+#include <xen/types.h>
+
+int pmem_add(unsigned long spfn, unsigned long epfn,
+ unsigned long rsv_spfn, unsigned long rsv_epfn,
+ unsigned long data_spfn, unsigned long data_epfn);
+
+#endif /* __XEN_PMEM_H__ */
diff --git a/xen/xsm/flask/hooks.c b/xen/xsm/flask/hooks.c
index 177c11f..948a161 100644
--- a/xen/xsm/flask/hooks.c
+++ b/xen/xsm/flask/hooks.c
@@ -1360,6 +1360,7 @@ static int flask_platform_op(uint32_t op)
case XENPF_cpu_offline:
case XENPF_cpu_hotadd:
case XENPF_mem_hotadd:
+ case XENPF_pmem_add:
return 0;
#endif
--
2.10.1
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
https://lists.xen.org/xen-devel
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |