|
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] [PATCH v6 4/5] xen/arm: Implement hypercall for dirty page tracing
Add hypercall (shadow op: enable/disable and clean/peek dirtied page bitmap).
It consists of two parts: dirty page detecting and saving.
For detecting, we setup the guest p2m's leaf PTE read-only and whenever the
guest
tries to write something, permission fault happens and traps into xen.
The permission-faulted GPA should be saved for the toolstack (when it wants to
see
which pages are dirtied). For this purpose, we temporarily save the GPAs into
bitmap.
Signed-off-by: Jaeyong Yoo <jaeyong.yoo@xxxxxxxxxxx>
Signed-off-by: Junghyun Yoo <yjhyun.yoo@xxxxxxxxxxx>
---
xen/arch/arm/domain.c | 3 +
xen/arch/arm/domctl.c | 9 ++
xen/arch/arm/mm.c | 90 +++++++++++++++++++-
xen/arch/arm/p2m.c | 195 +++++++++++++++++++++++++++++++++++++++++++
xen/arch/arm/traps.c | 18 ++++
xen/include/asm-arm/domain.h | 3 +
xen/include/asm-arm/mm.h | 6 ++
xen/include/asm-arm/p2m.h | 7 +-
8 files changed, 329 insertions(+), 2 deletions(-)
diff --git a/xen/arch/arm/domain.c b/xen/arch/arm/domain.c
index 4978765..6a0b36d 100644
--- a/xen/arch/arm/domain.c
+++ b/xen/arch/arm/domain.c
@@ -512,6 +512,9 @@ int arch_domain_create(struct domain *d, unsigned int
domcr_flags)
/* init for dirty-page tracing */
d->arch.dirty.mode = 0;
+ spin_lock_init(&d->arch.dirty.lock);
+ d->arch.dirty.bitmap = NULL;
+ d->arch.dirty.bitmap_nr_bytes = 0;
d->arch.dirty.p2m_start_idx = 0;
d->arch.dirty.p2m_end_idx = 0;
diff --git a/xen/arch/arm/domctl.c b/xen/arch/arm/domctl.c
index 9f65442..054de16 100644
--- a/xen/arch/arm/domctl.c
+++ b/xen/arch/arm/domctl.c
@@ -107,6 +107,15 @@ long arch_do_domctl(struct xen_domctl *domctl, struct
domain *d,
xfree(c.data);
}
break;
+ case XEN_DOMCTL_shadow_op:
+ {
+ domain_pause(d);
+ ret = dirty_mode_op(d, &domctl->u.shadow_op);
+ domain_unpause(d);
+
+ copyback = 1;
+ }
+ break;
default:
return subarch_do_domctl(domctl, d, u_domctl);
diff --git a/xen/arch/arm/mm.c b/xen/arch/arm/mm.c
index 0fc9d9a..238a15e 100644
--- a/xen/arch/arm/mm.c
+++ b/xen/arch/arm/mm.c
@@ -865,7 +865,6 @@ void destroy_xen_mappings(unsigned long v, unsigned long e)
create_xen_entries(REMOVE, v, 0, (e - v) >> PAGE_SHIFT, 0);
}
-enum mg { mg_clear, mg_ro, mg_rw, mg_rx };
static void set_pte_flags_on_range(const char *p, unsigned long l, enum mg mg)
{
lpae_t pte;
@@ -1328,6 +1327,95 @@ void cleanup_vlpt(struct domain *d)
unmap_domain_page_global(d->arch.dirty.p2m_first[1]);
}
+static inline void mark_dirty_bitmap(struct domain *d, paddr_t addr)
+{
+ paddr_t ram_base = (paddr_t) GUEST_RAM_BASE;
+ int bit_index = PFN_DOWN(addr - ram_base);
+
+ set_bit(bit_index, d->arch.dirty.bitmap);
+}
+
+/* routine for dirty-page tracing
+ *
+ * On first write, it page faults, its entry is changed to read-write,
+ * and on retry the write succeeds.
+ *
+ * for locating p2m of the faulting entry, we use virtual-linear page table.
+ * returns zero if addr is not valid or dirty mode is not set
+ */
+int handle_page_fault(struct domain *d, paddr_t addr)
+{
+ struct p2m_domain *p2m = &d->arch.p2m;
+ lpae_t *vlp2m_pte = 0;
+ paddr_t gma_start = GUEST_RAM_BASE;
+ paddr_t gma_end = 0;
+
+ if ( !d->arch.dirty.mode ) return 0;
+ gma_end = get_gma_end(d);
+
+ /* Ensure that addr is inside guest's RAM */
+ if ( addr < gma_start ||
+ addr > gma_end ) return 0;
+
+ spin_lock(&p2m->lock);
+ vlp2m_pte = get_vlpt_3lvl_pte(addr);
+ if ( vlp2m_pte->p2m.valid && vlp2m_pte->p2m.write == 0 &&
+ vlp2m_pte->p2m.type == p2m_ram_logdirty )
+ {
+ lpae_t pte = *vlp2m_pte;
+ pte.p2m.write = 1;
+ write_pte(vlp2m_pte, pte);
+ flush_tlb();
+ spin_unlock(&p2m->lock);
+
+ /* only necessary to lock between get-dirty bitmap and mark dirty
+ * bitmap. If get-dirty bitmap happens immediately before this
+ * lock, the corresponding dirty-page would be marked at the next
+ * round of get-dirty bitmap */
+ spin_lock(&d->arch.dirty.lock);
+ mark_dirty_bitmap(d, addr);
+ spin_unlock(&d->arch.dirty.lock);
+ }
+ else
+ spin_unlock(&p2m->lock);
+
+ return 1;
+}
+
+int prepare_bitmap(struct domain *d)
+{
+ paddr_t gma_start = GUEST_RAM_BASE;
+ paddr_t gma_end = 0;
+ uint32_t nr_bytes, nr_pages, order;
+
+ gma_end = get_gma_end(d);
+
+ nr_bytes = (PFN_DOWN(gma_end - gma_start) + 7) / 8;
+ nr_pages = (nr_bytes + PAGE_SIZE - 1) / PAGE_SIZE;
+ order = get_order_from_pages(nr_pages);
+
+ d->arch.dirty.bitmap = alloc_xenheap_pages(order, 0);
+ if ( d->arch.dirty.bitmap == NULL )
+ return -ENOMEM;
+
+ memset(d->arch.dirty.bitmap, 0, nr_bytes);
+
+ d->arch.dirty.bitmap_nr_bytes = nr_bytes;
+ return 0;
+}
+
+void cleanup_bitmap(struct domain *d)
+{
+ uint32_t nr_pages, order;
+
+ nr_pages = (d->arch.dirty.bitmap_nr_bytes + PAGE_SIZE - 1) / PAGE_SIZE;
+ order = get_order_from_pages(nr_pages);
+
+ free_xenheap_pages(d->arch.dirty.bitmap, order);
+ d->arch.dirty.bitmap = NULL;
+ d->arch.dirty.bitmap_nr_bytes = 0;
+}
+
/*
* Local variables:
* mode: C
diff --git a/xen/arch/arm/p2m.c b/xen/arch/arm/p2m.c
index 96bc0ef..b111452 100644
--- a/xen/arch/arm/p2m.c
+++ b/xen/arch/arm/p2m.c
@@ -4,6 +4,8 @@
#include <xen/errno.h>
#include <xen/domain_page.h>
#include <xen/bitops.h>
+#include <xen/guest_access.h>
+#include <xen/pfn.h>
#include <asm/flushtlb.h>
#include <asm/gic.h>
#include <asm/event.h>
@@ -223,6 +225,7 @@ static lpae_t mfn_to_p2m_entry(unsigned long mfn, unsigned
int mattr,
break;
case p2m_ram_ro:
+ case p2m_ram_logdirty:
e.p2m.xn = 0;
e.p2m.write = 0;
break;
@@ -284,6 +287,10 @@ static int p2m_create_table(struct domain *d, lpae_t
*entry, bool_t flush_cache)
pte = mfn_to_p2m_entry(page_to_mfn(page), MATTR_MEM, p2m_invalid);
+ /* mark the write bit (page table's case, ro bit) as 0
+ * so, it is writable in case of vlpt access */
+ pte.pt.ro = 0;
+
p2m_write_pte(entry, pte, flush_cache);
return 0;
@@ -715,6 +722,194 @@ unsigned long gmfn_to_mfn(struct domain *d, unsigned long
gpfn)
return p >> PAGE_SHIFT;
}
+/* Change types across all p2m entries in a domain */
+int p2m_change_entry_type_global(struct domain *d, enum mg nt)
+{
+ struct p2m_domain *p2m = &d->arch.p2m;
+ paddr_t ram_base = GUEST_RAM_BASE;
+ paddr_t ram_end;
+ paddr_t paddr;
+ int nr_pages;
+ int rc = -EFAULT, i;
+ unsigned long cur_first_offset = ~0, cur_second_offset = ~0;
+ lpae_t *first = NULL, *second = NULL, *third = NULL;
+ lpae_t pte;
+
+ ram_end = get_gma_end(d);
+ paddr = ram_base;
+ nr_pages = (ram_end - ram_base) >> PAGE_SHIFT;
+
+ spin_lock(&p2m->lock);
+
+ first = __map_domain_page(p2m->first_level);
+ if ( !first ||
+ !first[first_table_offset(paddr)].p2m.valid ||
+ !first[first_table_offset(paddr)].p2m.table )
+ goto err;
+
+ for ( i = 0; i < nr_pages; ++i )
+ {
+ if ( cur_first_offset != first_table_offset(paddr) )
+ {
+ if ( second ) unmap_domain_page(second);
+ second =
map_domain_page(first[first_table_offset(paddr)].p2m.base);
+ cur_first_offset = first_table_offset(paddr);
+ }
+ if ( !second ||
+ !second[second_table_offset(paddr)].p2m.valid ||
+ !second[second_table_offset(paddr)].p2m.table )
+ goto err;
+ if ( cur_second_offset != second_table_offset(paddr) )
+ {
+ if ( third ) unmap_domain_page(third);
+ third =
map_domain_page(second[second_table_offset(paddr)].p2m.base);
+ cur_second_offset = second_table_offset(paddr);
+ }
+ if ( !third ||
+ !third[third_table_offset(paddr)].p2m.valid )
+ goto err;
+
+ pte = third[third_table_offset(paddr)];
+
+ if ( nt == mg_ro )
+ {
+ /* use avail-bit as a backup for write bit */
+ if ( pte.p2m.write == 1 )
+ {
+ pte.p2m.write = 0;
+ pte.p2m.type = p2m_ram_logdirty;
+ }
+ else
+ {
+ pte.p2m.type = p2m_ram_rw;
+ }
+ }
+ else if ( nt == mg_rw )
+ {
+ /* restore the write bit */
+ if ( pte.p2m.write == 0 && pte.p2m.type ==
p2m_ram_logdirty )
+ {
+ pte.p2m.write = p2m_ram_rw;
+ }
+ }
+
+ write_pte(&third[third_table_offset(paddr)], pte);
+ paddr += PAGE_SIZE;
+ }
+
+ rc = 0;
+err:
+ flush_tlb_all_local();
+ if ( third ) unmap_domain_page(third);
+ if ( second ) unmap_domain_page(second);
+ if ( first ) unmap_domain_page(first);
+ spin_unlock(&p2m->lock);
+ return rc;
+}
+
+/* Read a domain's log-dirty bitmap and stats.
+ * If the operation is a CLEAN, clear the bitmap and stats. */
+int log_dirty_op(struct domain *d, xen_domctl_shadow_op_t *sc)
+{
+ int bitmap_size;
+ paddr_t gma_start = GUEST_RAM_BASE, gma_end;
+
+ /* this hypercall is called from domain 0, and we don't know which guest's
+ * vlpt is mapped in xen_second, so, to be sure, we restore vlpt here */
+ restore_vlpt(d);
+
+ gma_end = get_gma_end(d);
+ bitmap_size = (gma_end - gma_start) / 8;
+
+ if ( guest_handle_is_null(sc->dirty_bitmap) )
+ {
+ return -EINVAL;
+ }
+ else
+ {
+ uint32_t j = 0;
+ uint8_t *bitmap = d->arch.dirty.bitmap;
+ uint32_t nr_bytes = d->arch.dirty.bitmap_nr_bytes;
+ spin_lock(&d->arch.dirty.lock);
+
+ if ( copy_to_guest_offset(sc->dirty_bitmap, 0, bitmap, nr_bytes) )
+ {
+ return -EINVAL;
+ }
+
+ dsb(sy);
+ while ((j = find_next_bit((const long unsigned int *)bitmap,
+ nr_bytes * 8, j)) < nr_bytes * 8)
+ {
+ lpae_t *vlpt, new_vlpt;
+ paddr_t addr = gma_start + (j << PAGE_SHIFT);
+ vlpt = get_vlpt_3lvl_pte(addr);
+ new_vlpt = *vlpt;
+ new_vlpt.p2m.write = 0;
+ __write_pte(vlpt, new_vlpt);
+ j++;
+ }
+ dsb(sy);
+
+ if ( sc->op == XEN_DOMCTL_SHADOW_OP_CLEAN )
+ memset(bitmap, 0, nr_bytes);
+
+ spin_unlock(&d->arch.dirty.lock);
+ flush_tlb_local();
+ }
+
+ sc->stats.dirty_count = 0;
+
+ return 0;
+}
+
+long dirty_mode_op(struct domain *d, xen_domctl_shadow_op_t *sc)
+{
+ long ret = 0;
+ switch (sc->op)
+ {
+ case XEN_DOMCTL_SHADOW_OP_ENABLE_LOGDIRTY:
+ case XEN_DOMCTL_SHADOW_OP_OFF:
+ {
+ enum mg nt = sc->op == XEN_DOMCTL_SHADOW_OP_OFF ? mg_rw : mg_ro;
+
+ d->arch.dirty.mode = sc->op == XEN_DOMCTL_SHADOW_OP_OFF ? 0 : 1;
+ if ( (ret = p2m_change_entry_type_global(d, nt)) )
+ return ret;
+
+ if ( sc->op == XEN_DOMCTL_SHADOW_OP_OFF )
+ {
+ cleanup_vlpt(d);
+ cleanup_bitmap(d);
+ }
+ else
+ {
+ if ( (ret = prepare_vlpt(d)) )
+ return ret;
+
+ if ( (ret = prepare_bitmap(d)) )
+ {
+ /* in case of failure, we have to cleanup vlpt */
+ cleanup_vlpt(d);
+ return ret;
+ }
+ }
+ }
+ break;
+
+ case XEN_DOMCTL_SHADOW_OP_CLEAN:
+ case XEN_DOMCTL_SHADOW_OP_PEEK:
+ {
+ ret = log_dirty_op(d, sc);
+ }
+ break;
+
+ default:
+ return -ENOSYS;
+ }
+ return ret;
+}
+
/*
* Local variables:
* mode: C
diff --git a/xen/arch/arm/traps.c b/xen/arch/arm/traps.c
index 03a3da6..9b6d746 100644
--- a/xen/arch/arm/traps.c
+++ b/xen/arch/arm/traps.c
@@ -1603,6 +1603,13 @@ static void do_trap_instr_abort_guest(struct
cpu_user_regs *regs,
inject_iabt_exception(regs, addr, hsr.len);
}
+static inline int dabt_is_page_fault(struct hsr_dabt dabt)
+{
+ /* dabt.valid can be 0 here */
+ return (dabt.dfsc & FSC_TYPE_MASK) == FSC_TYPE_FAULT &&
+ (dabt.dfsc & FSC_LL_MASK) == 0x3 /* third level */;
+}
+
static void do_trap_data_abort_guest(struct cpu_user_regs *regs,
union hsr hsr)
{
@@ -1630,6 +1637,17 @@ static void do_trap_data_abort_guest(struct
cpu_user_regs *regs,
if ( rc == -EFAULT )
goto bad_data_abort;
+ /* domU page fault handling for guest live migration */
+ if ( dabt_is_page_fault(dabt) )
+ {
+ /* Do not advancey pc here for repeating memory operation in guest */
+ if ( handle_page_fault(current->domain, info.gpa) ) return;
+
+ /* handle_page_fault returns 0 means either dirty-page tracing is not
+ * yet started or 'real' permission fault happens.
+ * Then just fall through */
+ }
+
/* XXX: Decode the instruction if ISS is not valid */
if ( !dabt.valid )
goto bad_data_abort;
diff --git a/xen/include/asm-arm/domain.h b/xen/include/asm-arm/domain.h
index 9674175..75f3a57 100644
--- a/xen/include/asm-arm/domain.h
+++ b/xen/include/asm-arm/domain.h
@@ -171,6 +171,9 @@ struct arch_domain
lpae_t *p2m_first[2]; /* copy of guest p2m's first */
int p2m_start_idx; /* start index of p2m_first */
int p2m_end_idx; /* end index of p2m_first */
+ uint8_t *bitmap; /* dirty bitmap */
+ uint32_t bitmap_nr_bytes; /* number of bytes for dirty bitmap */
+ spinlock_t lock; /* protect list: head, mvn_head */
} dirty;
} __cacheline_aligned;
diff --git a/xen/include/asm-arm/mm.h b/xen/include/asm-arm/mm.h
index 7ceb568..90ece9a 100644
--- a/xen/include/asm-arm/mm.h
+++ b/xen/include/asm-arm/mm.h
@@ -344,6 +344,8 @@ static inline void put_page_and_type(struct page_info *page)
void clear_and_clean_page(struct page_info *page);
+enum mg { mg_clear, mg_ro, mg_rw, mg_rx };
+
/* routine for dirty-page tracing */
#define VLPT_SIZE (1 << SECOND_SHIFT)
#define VLPT_VA_TO_IDX(va) ((va - DOMHEAP_VIRT_START) >> SECOND_SHIFT)
@@ -356,6 +358,10 @@ int prepare_vlpt(struct domain *d);
void cleanup_vlpt(struct domain *d);
void restore_vlpt(struct domain *d);
+int handle_page_fault(struct domain *d, paddr_t addr);
+int prepare_bitmap(struct domain *d);
+void cleanup_bitmap(struct domain *d);
+
/* calculate the xen's virtual address for accessing the leaf PTE of
* a given address (GPA) */
static inline lpae_t * get_vlpt_3lvl_pte(paddr_t addr)
diff --git a/xen/include/asm-arm/p2m.h b/xen/include/asm-arm/p2m.h
index bd71abe..cc17ec3 100644
--- a/xen/include/asm-arm/p2m.h
+++ b/xen/include/asm-arm/p2m.h
@@ -2,6 +2,7 @@
#define _XEN_P2M_H
#include <xen/mm.h>
+#include <public/domctl.h>
struct domain;
@@ -41,6 +42,7 @@ typedef enum {
p2m_invalid = 0, /* Nothing mapped here */
p2m_ram_rw, /* Normal read/write guest RAM */
p2m_ram_ro, /* Read-only; writes are silently dropped */
+ p2m_ram_logdirty, /* Read-only; specisl mode for log dirty */
p2m_mmio_direct, /* Read/write mapping of genuine MMIO area */
p2m_map_foreign, /* Ram pages from foreign domain */
p2m_grant_map_rw, /* Read/write grant mapping */
@@ -49,7 +51,7 @@ typedef enum {
} p2m_type_t;
#define p2m_is_foreign(_t) ((_t) == p2m_map_foreign)
-#define p2m_is_ram(_t) ((_t) == p2m_ram_rw || (_t) == p2m_ram_ro)
+#define p2m_is_ram(_t) ((_t) == p2m_ram_rw || (_t) == p2m_ram_ro || (_t)
== p2m_ram_logdirty )
/* Initialise vmid allocator */
void p2m_vmid_allocator_init(void);
@@ -178,6 +180,9 @@ static inline int get_page_and_type(struct page_info *page,
return rc;
}
+int p2m_change_entry_type_global(struct domain *d, enum mg nt);
+long dirty_mode_op(struct domain *d, xen_domctl_shadow_op_t *sc);
+
#endif /* _XEN_P2M_H */
/*
--
1.8.1.2
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |