|
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] [RFC Patch v2 08/16] colo: implement restore_callbacks flush_memory
This patch implements restore callbacks for colo:
1. flush_memory():
We update the memory as the following:
a. pin non-dirty L1 pagetables
b. unpin pagetables execpt non-dirty L1
c. update the memory
d. pin page tables
e. unpin non-dirty L1 pagetables
Signed-off-by: Ye Wei <wei.ye1987@xxxxxxxxx>
Signed-off-by: Jiang Yunhong <yunhong.jiang@xxxxxxxxx>
Signed-off-by: Lai Jiangshan <laijs@xxxxxxxxxxxxxx>
Signed-off-by: Wen Congyang <wency@xxxxxxxxxxxxxx>
---
tools/libxc/xc_domain_restore_colo.c | 372 ++++++++++++++++++++++++++++++++++
tools/libxc/xc_save_restore_colo.h | 1 +
2 files changed, 373 insertions(+), 0 deletions(-)
diff --git a/tools/libxc/xc_domain_restore_colo.c
b/tools/libxc/xc_domain_restore_colo.c
index 77b63b6..50009fa 100644
--- a/tools/libxc/xc_domain_restore_colo.c
+++ b/tools/libxc/xc_domain_restore_colo.c
@@ -152,3 +152,375 @@ char* colo_get_page(struct restore_data *comm_data, void
*data,
set_bit(pfn, colo_data->dirty_pages);
return colo_data->pagebase + pfn * PAGE_SIZE;
}
+
+/* Step1:
+ *
+ * pin non-dirty L1 pagetables: ~dirty_pages & mL1 (= ~dirty_pages & sL1)
+ * mL1: L1 pages on master side
+ * sL1: L1 pages on slaver side
+ */
+static int pin_l1(struct restore_data *comm_data,
+ struct restore_colo_data *colo_data)
+{
+ unsigned int nr_pins = 0;
+ unsigned long i;
+ struct mmuext_op pin[MAX_PIN_BATCH];
+ struct domain_info_context *dinfo = comm_data->dinfo;
+ unsigned long *pfn_type = comm_data->pfn_type;
+ uint32_t dom = comm_data->dom;
+ xc_interface *xch = comm_data->xch;
+ unsigned long *pfn_type_slaver = colo_data->pfn_type_slaver;
+ unsigned long *dirty_pages = colo_data->dirty_pages;
+
+ for (i = 0; i < dinfo->p2m_size; i++)
+ {
+ switch ( pfn_type[i] & XEN_DOMCTL_PFINFO_LTABTYPE_MASK )
+ {
+ case XEN_DOMCTL_PFINFO_L1TAB:
+ if (pfn_type_slaver[i] & XEN_DOMCTL_PFINFO_LPINTAB)
+ /* don't pin already pined */
+ continue;
+
+ if (test_bit(i, dirty_pages))
+ /* don't pin dirty */
+ continue;
+
+ /* here, it must also be L1 in slaver, otherwise it is dirty.
+ * (add test code ?)
+ */
+ pin[nr_pins].cmd = MMUEXT_PIN_L1_TABLE;
+ break;
+
+ case XEN_DOMCTL_PFINFO_L2TAB:
+ case XEN_DOMCTL_PFINFO_L3TAB:
+ case XEN_DOMCTL_PFINFO_L4TAB:
+ default:
+ continue;
+ }
+
+ pin[nr_pins].arg1.mfn = comm_data->p2m[i];
+ nr_pins++;
+
+ /* Batch full? Then flush. */
+ if (nr_pins == MAX_PIN_BATCH)
+ {
+ if (xc_mmuext_op(xch, pin, nr_pins, dom) < 0)
+ {
+ PERROR("Failed to pin L1 batch of %d page tables", nr_pins);
+ return 1;
+ }
+ nr_pins = 0;
+ }
+ }
+
+ /* Flush final partial batch. */
+ if ((nr_pins != 0) && (xc_mmuext_op(xch, pin, nr_pins, dom) < 0))
+ {
+ PERROR("Failed to pin L1 batch of %d page tables", nr_pins);
+ return 1;
+ }
+
+ return 0;
+}
+
+/* Step2:
+ *
+ * unpin pagetables execpt non-dirty L1: sL2 + sL3 + sL4 + (dirty_pages & sL1)
+ * sL1: L1 pages on slaver side
+ * sL2: L2 pages on slaver side
+ * sL3: L3 pages on slaver side
+ * sL4: L4 pages on slaver side
+ */
+static int unpin_pagetable(struct restore_data *comm_data,
+ struct restore_colo_data *colo_data)
+{
+ unsigned int nr_pins = 0;
+ unsigned long i;
+ struct mmuext_op pin[MAX_PIN_BATCH];
+ struct domain_info_context *dinfo = comm_data->dinfo;
+ uint32_t dom = comm_data->dom;
+ xc_interface *xch = comm_data->xch;
+ unsigned long *pfn_type_slaver = colo_data->pfn_type_slaver;
+ unsigned long *dirty_pages = colo_data->dirty_pages;
+
+ for (i = 0; i < dinfo->p2m_size; i++)
+ {
+ if ( (pfn_type_slaver[i] & XEN_DOMCTL_PFINFO_LPINTAB) == 0 )
+ continue;
+
+ switch ( pfn_type_slaver[i] & XEN_DOMCTL_PFINFO_LTABTYPE_MASK )
+ {
+ case XEN_DOMCTL_PFINFO_L1TAB:
+ if (!test_bit(i, dirty_pages))
+ /* it is in (~dirty_pages & mL1), keep it */
+ continue;
+ /* fallthrough */
+ case XEN_DOMCTL_PFINFO_L2TAB:
+ case XEN_DOMCTL_PFINFO_L3TAB:
+ case XEN_DOMCTL_PFINFO_L4TAB:
+ pin[nr_pins].cmd = MMUEXT_UNPIN_TABLE;
+ break;
+
+ default:
+ continue;
+ }
+
+ pin[nr_pins].arg1.mfn = comm_data->p2m[i];
+ nr_pins++;
+
+ /* Batch full? Then flush. */
+ if (nr_pins == MAX_PIN_BATCH)
+ {
+ if (xc_mmuext_op(xch, pin, nr_pins, dom) < 0)
+ {
+ PERROR("Failed to unpin batch of %d page tables", nr_pins);
+ return 1;
+ }
+ nr_pins = 0;
+ }
+ }
+
+ /* Flush final partial batch. */
+ if ((nr_pins != 0) && (xc_mmuext_op(xch, pin, nr_pins, dom) < 0))
+ {
+ PERROR("Failed to unpin batch of %d page tables", nr_pins);
+ return 1;
+ }
+
+ return 0;
+}
+
+/* we have unpined all pagetables except non-diry l1. So it is OK to map the
+ * dirty memory and update it.
+ */
+static int update_memory(struct restore_data *comm_data,
+ struct restore_colo_data *colo_data)
+{
+ unsigned long pfn;
+ unsigned long max_mem_pfn = colo_data->max_mem_pfn;
+ unsigned long *pfn_type = comm_data->pfn_type;
+ unsigned long pagetype;
+ uint32_t dom = comm_data->dom;
+ xc_interface *xch = comm_data->xch;
+ struct xc_mmu *mmu = comm_data->mmu;
+ unsigned long *dirty_pages = colo_data->dirty_pages;
+ char *pagebase = colo_data->pagebase;
+ int pfn_err = 0;
+ char *region_base_slaver;
+ xen_pfn_t region_mfn_slaver;
+ unsigned long mfn;
+ char *pagebuff;
+
+ for (pfn = 0; pfn < max_mem_pfn; pfn++) {
+ if (!test_bit(pfn, dirty_pages))
+ continue;
+
+ pagetype = pfn_type[pfn] & XEN_DOMCTL_PFINFO_LTAB_MASK;
+ if (pagetype == XEN_DOMCTL_PFINFO_XTAB)
+ /* a bogus/unmapped page: skip it */
+ continue;
+
+ mfn = comm_data->p2m[pfn];
+ region_mfn_slaver = mfn;
+ region_base_slaver = xc_map_foreign_bulk(xch, dom,
+ PROT_WRITE,
+ ®ion_mfn_slaver,
+ &pfn_err, 1);
+ if (!region_base_slaver || pfn_err) {
+ PERROR("update_memory: xc_map_foreign_bulk failed");
+ return 1;
+ }
+
+ pagebuff = (char *)(pagebase + pfn * PAGE_SIZE);
+ memcpy(region_base_slaver, pagebuff, PAGE_SIZE);
+ munmap(region_base_slaver, PAGE_SIZE);
+
+ if (xc_add_mmu_update(xch, mmu, (((uint64_t)mfn) << PAGE_SHIFT)
+ | MMU_MACHPHYS_UPDATE, pfn) )
+ {
+ PERROR("failed machpys update mfn=%lx pfn=%lx", mfn, pfn);
+ return 1;
+ }
+ }
+
+ /*
+ * Ensure we flush all machphys updates before potential PAE-specific
+ * reallocations below.
+ */
+ if (xc_flush_mmu_updates(xch, mmu))
+ {
+ PERROR("Error doing flush_mmu_updates()");
+ return 1;
+ }
+
+ return 0;
+}
+
+/* Step 4: pin master pt
+ * Pin page tables. Do this after writing to them as otherwise Xen
+ * will barf when doing the type-checking.
+ */
+static int pin_pagetable(struct restore_data *comm_data,
+ struct restore_colo_data *colo_data)
+{
+ unsigned int nr_pins = 0;
+ unsigned long i;
+ struct mmuext_op pin[MAX_PIN_BATCH];
+ struct domain_info_context *dinfo = comm_data->dinfo;
+ unsigned long *pfn_type = comm_data->pfn_type;
+ uint32_t dom = comm_data->dom;
+ xc_interface *xch = comm_data->xch;
+ unsigned long *dirty_pages = colo_data->dirty_pages;
+
+ for ( i = 0; i < dinfo->p2m_size; i++ )
+ {
+ if ( (pfn_type[i] & XEN_DOMCTL_PFINFO_LPINTAB) == 0 )
+ continue;
+
+ switch ( pfn_type[i] & XEN_DOMCTL_PFINFO_LTABTYPE_MASK )
+ {
+ case XEN_DOMCTL_PFINFO_L1TAB:
+ if (!test_bit(i, dirty_pages))
+ /* it is in (~dirty_pages & mL1)(=~dirty_pages & sL1),
+ * already pined
+ */
+ continue;
+
+ pin[nr_pins].cmd = MMUEXT_PIN_L1_TABLE;
+ break;
+
+ case XEN_DOMCTL_PFINFO_L2TAB:
+ pin[nr_pins].cmd = MMUEXT_PIN_L2_TABLE;
+ break;
+
+ case XEN_DOMCTL_PFINFO_L3TAB:
+ pin[nr_pins].cmd = MMUEXT_PIN_L3_TABLE;
+ break;
+
+ case XEN_DOMCTL_PFINFO_L4TAB:
+ pin[nr_pins].cmd = MMUEXT_PIN_L4_TABLE;
+ break;
+
+ default:
+ continue;
+ }
+
+ pin[nr_pins].arg1.mfn = comm_data->p2m[i];
+ nr_pins++;
+
+ /* Batch full? Then flush. */
+ if (nr_pins == MAX_PIN_BATCH)
+ {
+ if (xc_mmuext_op(xch, pin, nr_pins, dom) < 0)
+ {
+ PERROR("Failed to pin batch of %d page tables", nr_pins);
+ return 1;
+ }
+ nr_pins = 0;
+ }
+ }
+
+ /* Flush final partial batch. */
+ if ((nr_pins != 0) && (xc_mmuext_op(xch, pin, nr_pins, dom) < 0))
+ {
+ PERROR("Failed to pin batch of %d page tables", nr_pins);
+ return 1;
+ }
+
+ return 0;
+}
+
+/* Step5:
+ * unpin unneeded non-dirty L1 pagetables: ~dirty_pages & mL1 (= ~dirty_pages
& sL1)
+ */
+static int unpin_l1(struct restore_data *comm_data,
+ struct restore_colo_data *colo_data)
+{
+ unsigned int nr_pins = 0;
+ unsigned long i;
+ struct mmuext_op pin[MAX_PIN_BATCH];
+ struct domain_info_context *dinfo = comm_data->dinfo;
+ unsigned long *pfn_type = comm_data->pfn_type;
+ uint32_t dom = comm_data->dom;
+ xc_interface *xch = comm_data->xch;
+ unsigned long *pfn_type_slaver = colo_data->pfn_type_slaver;
+ unsigned long *dirty_pages = colo_data->dirty_pages;
+
+ for (i = 0; i < dinfo->p2m_size; i++)
+ {
+ switch ( pfn_type_slaver[i] & XEN_DOMCTL_PFINFO_LTABTYPE_MASK )
+ {
+ case XEN_DOMCTL_PFINFO_L1TAB:
+ if (pfn_type[i] & XEN_DOMCTL_PFINFO_LPINTAB) // still needed
+ continue;
+ if (test_bit(i, dirty_pages)) // not pined by step 1
+ continue;
+
+ pin[nr_pins].cmd = MMUEXT_UNPIN_TABLE;
+ break;
+
+ case XEN_DOMCTL_PFINFO_L2TAB:
+ case XEN_DOMCTL_PFINFO_L3TAB:
+ case XEN_DOMCTL_PFINFO_L4TAB:
+ default:
+ continue;
+ }
+
+ pin[nr_pins].arg1.mfn = comm_data->p2m[i];
+ nr_pins++;
+
+ /* Batch full? Then flush. */
+ if (nr_pins == MAX_PIN_BATCH)
+ {
+ if (xc_mmuext_op(xch, pin, nr_pins, dom) < 0)
+ {
+ PERROR("Failed to pin L1 batch of %d page tables", nr_pins);
+ return 1;
+ }
+ nr_pins = 0;
+ }
+ }
+
+ /* Flush final partial batch. */
+ if ((nr_pins != 0) && (xc_mmuext_op(xch, pin, nr_pins, dom) < 0))
+ {
+ PERROR("Failed to pin L1 batch of %d page tables", nr_pins);
+ return 1;
+ }
+
+ return 0;
+}
+
+int colo_flush_memory(struct restore_data *comm_data, void *data)
+{
+ struct restore_colo_data *colo_data = data;
+ xc_interface *xch = comm_data->xch;
+ uint32_t dom = comm_data->dom;
+ DECLARE_HYPERCALL;
+
+ if (!colo_data->first_time)
+ {
+ /* reset cpu */
+ hypercall.op = __HYPERVISOR_reset_vcpu_op;
+ hypercall.arg[0] = (unsigned long)dom;
+ do_xen_hypercall(xch, &hypercall);
+ }
+
+ if (pin_l1(comm_data, colo_data) != 0)
+ return -1;
+ if (unpin_pagetable(comm_data, colo_data) != 0)
+ return -1;
+
+ if (update_memory(comm_data, colo_data) != 0)
+ return -1;
+
+ if (pin_pagetable(comm_data, colo_data) != 0)
+ return -1;
+ if (unpin_l1(comm_data, colo_data) != 0)
+ return -1;
+
+ memcpy(colo_data->pfn_type_slaver, comm_data->pfn_type,
+ comm_data->dinfo->p2m_size * sizeof(xen_pfn_t));
+
+ return 0;
+}
diff --git a/tools/libxc/xc_save_restore_colo.h
b/tools/libxc/xc_save_restore_colo.h
index 67c567c..8af75b4 100644
--- a/tools/libxc/xc_save_restore_colo.h
+++ b/tools/libxc/xc_save_restore_colo.h
@@ -7,5 +7,6 @@
extern int colo_init(struct restore_data *, void **);
extern void colo_free(struct restore_data *, void *);
extern char *colo_get_page(struct restore_data *, void *, unsigned long);
+extern int colo_flush_memory(struct restore_data *, void *);
#endif
--
1.7.4
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |