|
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [PATCH v1 6/6] xen/riscv: implement p2m mapping functionality
These utilities are needed for building and managing RISC-V guest page
tables and MMIO mappings by using functions map_regions_p2mt() and
guest_physmap_add_entry().
To implement p2m mapping functionality the following is introduced:
- Define P2M root level/order and entry count.
- Introdude radix type for p2m types as it isn't enough free bits in pte
and the helpers (p2m_type_radix_{get,set}()) to deal with them.
- Introduce p2m_is_*() helpers() as pte_is_*() helpers are checking
the valid bit set in the PTE but we have to check p2m_type instead
(look at the comment above p2m_is_valid() for some details).
- Introduce helper to set p2m's pte permission: p2m_set_permissions().
- Introduce helper to create p2m entry based on mfn, p2m_type_t and
p2m_access_t.
- Introduce helper to generate table entry with correct attributes:
page_to_p2m_table().
- Introduce p2m page allocation function: p2m_alloc_page().
- Introduce functions to write/remove p2m's entries: p2m_{write,remove}_pte().
- Introduce function to allocate p2m table: p2m_create_table().
- Introduce functions used to free p2m entry.
- Introduce function for table walking: p2m_next_level().
- Introduce function to insert an entry in the p2m (p2m_set_entry()).
- Introduce superpage splitting: p2m_split_superpage()).
- Introduce page table type defines (PGT_{none,writable_page}, etc).
Signed-off-by: Oleksii Kurochko <oleksii.kurochko@xxxxxxxxx>
---
xen/arch/riscv/include/asm/mm.h | 32 +-
xen/arch/riscv/include/asm/p2m.h | 17 +-
xen/arch/riscv/include/asm/page.h | 11 +
xen/arch/riscv/p2m.c | 780 ++++++++++++++++++++++++++++++
4 files changed, 829 insertions(+), 11 deletions(-)
diff --git a/xen/arch/riscv/include/asm/mm.h b/xen/arch/riscv/include/asm/mm.h
index 972ec45448..c1e4519839 100644
--- a/xen/arch/riscv/include/asm/mm.h
+++ b/xen/arch/riscv/include/asm/mm.h
@@ -12,6 +12,7 @@
#include <xen/sections.h>
#include <xen/types.h>
+#include <asm/cmpxchg.h>
#include <asm/page-bits.h>
extern vaddr_t directmap_virt_start;
@@ -229,9 +230,21 @@ static inline bool arch_mfns_in_directmap(unsigned long
mfn, unsigned long nr)
#define PGT_writable_page PG_mask(1, 1) /* has writable mappings? */
#define PGT_type_mask PG_mask(1, 1) /* Bits 31 or 63. */
-/* Count of uses of this frame as its current type. */
-#define PGT_count_width PG_shift(2)
-#define PGT_count_mask ((1UL << PGT_count_width) - 1)
+ /* 9-bit count of uses of this frame as its current type. */
+#define PGT_count_mask PG_mask(0x3FF, 10)
+
+/*
+ * Sv32 has 22-bit GFN. Sv{39, 48, 57} have 44-bit GFN.
+ * Thereby we can use for `type_info` 10 bits for all modes, having the same
+ * amount of bits for `type_info` for all MMU modes let us avoid introducing
+ * an extra #ifdef to that header:
+ * if we go with maximum possible bits for count on each configuration
+ * we would need to have a set of PGT_count_* and PGT_gfn_*).
+ */
+#define PGT_gfn_width PG_shift(10)
+#define PGT_gfn_mask (BIT(PGT_gfn_width, UL) - 1)
+
+#define PGT_INVALID_XENHEAP_GFN _gfn(PGT_gfn_mask)
/*
* Page needs to be scrubbed. Since this bit can only be set on a page that is
@@ -283,6 +296,19 @@ static inline bool arch_mfns_in_directmap(unsigned long
mfn, unsigned long nr)
#define PFN_ORDER(pg) ((pg)->v.free.order)
+static inline void page_set_xenheap_gfn(struct page_info *p, gfn_t gfn)
+{
+ gfn_t gfn_ = gfn_eq(gfn, INVALID_GFN) ? PGT_INVALID_XENHEAP_GFN : gfn;
+ unsigned long x, nx, y = p->u.inuse.type_info;
+
+ ASSERT(is_xen_heap_page(p));
+
+ do {
+ x = y;
+ nx = (x & ~PGT_gfn_mask) | gfn_x(gfn_);
+ } while ( (y = cmpxchg(&p->u.inuse.type_info, x, nx)) != x );
+}
+
extern unsigned char cpu0_boot_stack[];
void setup_initial_pagetables(void);
diff --git a/xen/arch/riscv/include/asm/p2m.h b/xen/arch/riscv/include/asm/p2m.h
index 0fcfec7f03..9f4633501f 100644
--- a/xen/arch/riscv/include/asm/p2m.h
+++ b/xen/arch/riscv/include/asm/p2m.h
@@ -9,8 +9,13 @@
#include <xen/rwlock.h>
#include <xen/types.h>
+#include <asm/page.h>
#include <asm/page-bits.h>
+#define P2M_ROOT_LEVEL HYP_PT_ROOT_LEVEL
+#define P2M_ROOT_ORDER XEN_PT_LEVEL_ORDER(P2M_ROOT_LEVEL)
+#define P2M_ROOT_PAGES (1U << P2M_ROOT_ORDER)
+
#define paddr_bits PADDR_BITS
/* Get host p2m table */
@@ -145,14 +150,10 @@ static inline int
guest_physmap_mark_populate_on_demand(struct domain *d,
return -EOPNOTSUPP;
}
-static inline int guest_physmap_add_entry(struct domain *d,
- gfn_t gfn, mfn_t mfn,
- unsigned long page_order,
- p2m_type_t t)
-{
- BUG_ON("unimplemented");
- return -EINVAL;
-}
+int guest_physmap_add_entry(struct domain *d,
+ gfn_t gfn, mfn_t mfn,
+ unsigned long page_order,
+ p2m_type_t t);
/* Untyped version for RAM only, for compatibility */
static inline int __must_check
diff --git a/xen/arch/riscv/include/asm/page.h
b/xen/arch/riscv/include/asm/page.h
index cb3dea309c..a5c6f5140d 100644
--- a/xen/arch/riscv/include/asm/page.h
+++ b/xen/arch/riscv/include/asm/page.h
@@ -22,6 +22,7 @@
#define XEN_PT_LEVEL_SIZE(lvl) (_AT(paddr_t, 1) <<
XEN_PT_LEVEL_SHIFT(lvl))
#define XEN_PT_LEVEL_MAP_MASK(lvl) (~(XEN_PT_LEVEL_SIZE(lvl) - 1))
#define XEN_PT_LEVEL_MASK(lvl) (VPN_MASK << XEN_PT_LEVEL_SHIFT(lvl))
+#define XEN_PT_ENTRIES (_AT(unsigned int, 1) << PAGETABLE_ORDER)
/*
* PTE format:
@@ -69,10 +70,20 @@
#define PTE_PMBT_NOCACHE BIT(61, UL)
#define PTE_PMBT_IO BIT(62, UL)
+enum pmbt_type_t {
+ pbmt_pma,
+ pbmt_nc,
+ pbmt_io,
+ pbmt_rsvd,
+ pbmt_max,
+};
+
#define PTE_ACCESS_MASK (PTE_READABLE | PTE_WRITABLE | PTE_EXECUTABLE)
#define PTE_PBMT_MASK (PTE_PMBT_NOCACHE | PTE_PMBT_IO)
+#define P2M_CLEAR_PERM(p2m_pte) ((p2m_pte).pte & ~PTE_ACCESS_MASK)
+
/* Calculate the offsets into the pagetables for a given VA */
#define pt_linear_offset(lvl, va) ((va) >> XEN_PT_LEVEL_SHIFT(lvl))
diff --git a/xen/arch/riscv/p2m.c b/xen/arch/riscv/p2m.c
index a890870391..84cb3d28af 100644
--- a/xen/arch/riscv/p2m.c
+++ b/xen/arch/riscv/p2m.c
@@ -135,6 +135,37 @@ static int p2m_alloc_table(struct domain *d)
return 0;
}
+static p2m_type_t p2m_type_radix_get(struct p2m_domain *p2m, pte_t pte)
+{
+ void *ptr;
+
+ ptr = radix_tree_lookup(&p2m->p2m_type, pte.pte);
+
+ if ( !ptr )
+ return p2m_invalid;
+
+ return radix_tree_ptr_to_int(ptr);
+}
+
+static int p2m_type_radix_set(struct p2m_domain *p2m, pte_t pte, p2m_type_t t)
+{
+ int rc;
+
+ rc = radix_tree_insert(&p2m->p2m_type, pte.pte,
+ radix_tree_int_to_ptr(t));
+ if ( rc == -EEXIST )
+ {
+ /* If a setting already exists, change it to the new one */
+ radix_tree_replace_slot(
+ radix_tree_lookup_slot(
+ &p2m->p2m_type, pte.pte),
+ radix_tree_int_to_ptr(t));
+ rc = 0;
+ }
+
+ return rc;
+}
+
int p2m_init(struct domain *d)
{
struct p2m_domain *p2m = p2m_get_hostp2m(d);
@@ -233,3 +264,752 @@ int p2m_set_allocation(struct domain *d, unsigned long
pages, bool *preempted)
return 0;
}
+
+/*
+ * Find and map the root page table. The caller is responsible for
+ * unmapping the table.
+ *
+ * The function will return NULL if the offset of the root table is
+ * invalid.
+ */
+static pte_t *p2m_get_root_pointer(struct p2m_domain *p2m, gfn_t gfn)
+{
+ unsigned long root_table;
+
+ root_table = gfn_x(gfn) >> XEN_PT_LEVEL_ORDER(P2M_ROOT_LEVEL);
+ if ( root_table >= P2M_ROOT_PAGES )
+ return NULL;
+
+ return __map_domain_page(p2m->root + root_table);
+}
+
+/*
+ * In the case of the P2M, the valid bit is used for other purpose. Use
+ * the type to check whether an entry is valid.
+ */
+static inline bool p2m_is_valid(struct p2m_domain *p2m, pte_t pte)
+{
+ return p2m_type_radix_get(p2m, pte) != p2m_invalid;
+}
+
+/*
+ * pte_is_* helpers are checking the valid bit set in the
+ * PTE but we have to check p2m_type instead (look at the comment above
+ * p2m_is_valid())
+ * Provide our own overlay to check the valid bit.
+ */
+static inline bool p2m_is_mapping(struct p2m_domain *p2m, pte_t pte)
+{
+ return p2m_is_valid(p2m, pte) && (pte.pte & PTE_ACCESS_MASK);
+}
+
+static inline bool p2m_is_superpage(struct p2m_domain *p2m, pte_t pte,
+ unsigned int level)
+{
+ return p2m_is_valid(p2m, pte) && (pte.pte & PTE_ACCESS_MASK) &&
+ (level > 0);
+}
+
+static void p2m_set_permission(pte_t *e, p2m_type_t t, p2m_access_t a)
+{
+ /* First apply type permissions */
+ switch ( t )
+ {
+ case p2m_ram_rw:
+ e->bits.r = 1;
+ e->bits.w = 1;
+ e->bits.x = 1;
+
+ break;
+
+ case p2m_mmio_direct_dev:
+ e->bits.r = 1;
+ e->bits.w = 1;
+ e->bits.x = 0;
+ break;
+
+ case p2m_invalid:
+ e->bits.r = 0;
+ e->bits.w = 0;
+ e->bits.x = 0;
+ break;
+
+ default:
+ BUG();
+ break;
+ }
+
+ /* Then restrict with access permissions */
+ switch ( a )
+ {
+ case p2m_access_rwx:
+ break;
+ case p2m_access_wx:
+ e->bits.r = 0;
+ break;
+ case p2m_access_rw:
+ e->bits.x = 0;
+ break;
+ case p2m_access_w:
+ e->bits.r = 0;
+ e->bits.x = 0;
+ break;
+ case p2m_access_rx:
+ case p2m_access_rx2rw:
+ e->bits.w = 0;
+ break;
+ case p2m_access_x:
+ e->bits.r = 0;
+ e->bits.w = 0;
+ break;
+ case p2m_access_r:
+ e->bits.w = 0;
+ e->bits.x = 0;
+ break;
+ case p2m_access_n:
+ case p2m_access_n2rwx:
+ e->bits.r = 0;
+ e->bits.w = 0;
+ e->bits.x = 0;
+ break;
+ default:
+ BUG();
+ break;
+ }
+}
+
+static pte_t p2m_entry_from_mfn(struct p2m_domain *p2m, mfn_t mfn, p2m_type_t
t, p2m_access_t a)
+{
+ pte_t e = (pte_t) {
+ .bits.v = 1,
+ };
+
+ switch ( t )
+ {
+ case p2m_mmio_direct_dev:
+ e.bits.pbmt = pbmt_io;
+ break;
+
+ default:
+ e.bits.pbmt = pbmt_pma;
+ break;
+ }
+
+ p2m_set_permission(&e, t, a);
+
+ ASSERT(!(mfn_to_maddr(mfn) & ~PADDR_MASK));
+
+ pte_set_mfn(&e, mfn);
+
+ BUG_ON(p2m_type_radix_set(p2m, e, t));
+
+ return e;
+}
+
+/* Generate table entry with correct attributes. */
+static pte_t page_to_p2m_table(struct p2m_domain *p2m, struct page_info *page)
+{
+ /*
+ * Since this function generates a table entry, according to "Encoding
+ * of PTE R/W/X fields," the entry's r, w, and x fields must be set to 0
+ * to point to the next level of the page table.
+ * Therefore, to ensure that an entry is a page table entry,
+ * `p2m_access_n2rwx` is passed to `mfn_to_p2m_entry()` as the access
value,
+ * which overrides whatever was passed as `p2m_type_t` and guarantees that
+ * the entry is a page table entry by setting r = w = x = 0.
+ */
+ return p2m_entry_from_mfn(p2m, page_to_mfn(page), p2m_ram_rw,
p2m_access_n2rwx);
+}
+
+static struct page_info *p2m_alloc_page(struct domain *d)
+{
+ struct page_info *pg;
+
+ /*
+ * For hardware domain, there should be no limit in the number of pages
that
+ * can be allocated, so that the kernel may take advantage of the extended
+ * regions. Hence, allocate p2m pages for hardware domains from heap.
+ */
+ if ( is_hardware_domain(d) )
+ {
+ pg = alloc_domheap_page(d, MEMF_no_owner);
+ if ( pg == NULL )
+ printk(XENLOG_G_ERR "Failed to allocate P2M pages for hwdom.\n");
+ }
+ else
+ {
+ spin_lock(&d->arch.paging.lock);
+ pg = page_list_remove_head(&d->arch.paging.p2m_freelist);
+ spin_unlock(&d->arch.paging.lock);
+ }
+
+ return pg;
+}
+
+static inline void p2m_write_pte(pte_t *p, pte_t pte, bool clean_pte)
+{
+ write_pte(p, pte);
+ if ( clean_pte )
+ clean_dcache_va_range(p, sizeof(*p));
+}
+
+static inline void p2m_remove_pte(pte_t *p, bool clean_pte)
+{
+ pte_t pte;
+
+ memset(&pte, 0x00, sizeof(pte));
+ p2m_write_pte(p, pte, clean_pte);
+}
+
+/* Allocate a new page table page and hook it in via the given entry. */
+static int p2m_create_table(struct p2m_domain *p2m, pte_t *entry)
+{
+ struct page_info *page;
+ pte_t *p;
+
+ ASSERT(!p2m_is_valid(p2m, *entry));
+
+ page = p2m_alloc_page(p2m->domain);
+ if ( page == NULL )
+ return -ENOMEM;
+
+ page_list_add(page, &p2m->pages);
+
+ p = __map_domain_page(page);
+ clear_page(p);
+
+ unmap_domain_page(p);
+
+ p2m_write_pte(entry, page_to_p2m_table(p2m, page), p2m->clean_pte);
+
+ return 0;
+}
+
+#define GUEST_TABLE_MAP_NONE 0
+#define GUEST_TABLE_MAP_NOMEM 1
+#define GUEST_TABLE_SUPER_PAGE 2
+#define GUEST_TABLE_NORMAL 3
+
+/*
+ * Take the currently mapped table, find the corresponding GFN entry,
+ * and map the next table, if available. The previous table will be
+ * unmapped if the next level was mapped (e.g GUEST_TABLE_NORMAL
+ * returned).
+ *
+ * `alloc_tbl` parameter indicates whether intermediate tables should
+ * be allocated when not present.
+ *
+ * Return values:
+ * GUEST_TABLE_MAP_NONE: a table allocation isn't permitted.
+ * GUEST_TABLE_MAP_NOMEM: allocating a new page failed.
+ * GUEST_TABLE_SUPER_PAGE: next level or leaf mapped normally.
+ * GUEST_TABLE_NORMAL: The next entry points to a superpage.
+ */
+static int p2m_next_level(struct p2m_domain *p2m, bool alloc_tbl,
+ unsigned int level, pte_t **table,
+ unsigned int offset)
+{
+ pte_t *entry;
+ int ret;
+ mfn_t mfn;
+
+ entry = *table + offset;
+
+ if ( !p2m_is_valid(p2m, *entry) )
+ {
+ if ( !alloc_tbl )
+ return GUEST_TABLE_MAP_NONE;
+
+ ret = p2m_create_table(p2m, entry);
+ if ( ret )
+ return GUEST_TABLE_MAP_NOMEM;
+ }
+
+ /* The function p2m_next_level() is never called at the last level */
+ ASSERT(level != 0);
+ if ( p2m_is_mapping(p2m, *entry) )
+ return GUEST_TABLE_SUPER_PAGE;
+
+ mfn = mfn_from_pte(*entry);
+
+ unmap_domain_page(*table);
+ *table = map_domain_page(mfn);
+
+ return GUEST_TABLE_NORMAL;
+}
+
+static bool p2m_split_superpage(struct p2m_domain *p2m, pte_t *entry,
+ unsigned int level, unsigned int target,
+ const unsigned int *offsets)
+{
+ struct page_info *page;
+ unsigned int i;
+ pte_t pte, *table;
+ bool rv = true;
+
+ /* Convenience aliases */
+ mfn_t mfn = pte_get_mfn(*entry);
+ unsigned int next_level = level - 1;
+ unsigned int level_order = XEN_PT_LEVEL_ORDER(next_level);
+
+ /*
+ * This should only be called with target != level and the entry is
+ * a superpage.
+ */
+ ASSERT(level > target);
+ ASSERT(p2m_is_superpage(p2m, *entry, level));
+
+ page = p2m_alloc_page(p2m->domain);
+ if ( !page )
+ return false;
+
+ page_list_add(page, &p2m->pages);
+ table = __map_domain_page(page);
+
+ /*
+ * We are either splitting a first level 1G page into 512 second level
+ * 2M pages, or a second level 2M page into 512 third level 4K pages.
+ */
+ for ( i = 0; i < XEN_PT_ENTRIES; i++ )
+ {
+ pte_t *new_entry = table + i;
+
+ /*
+ * Use the content of the superpage entry and override
+ * the necessary fields. So the correct permission are kept.
+ */
+ pte = *entry;
+ pte_set_mfn(&pte, mfn_add(mfn, i << level_order));
+
+ write_pte(new_entry, pte);
+ }
+
+ /*
+ * Shatter superpage in the page to the level we want to make the
+ * changes.
+ * This is done outside the loop to avoid checking the offset to
+ * know whether the entry should be shattered for every entry.
+ */
+ if ( next_level != target )
+ rv = p2m_split_superpage(p2m, table + offsets[next_level],
+ level - 1, target, offsets);
+
+ /* TODO: why it is necessary to have clean here? Not somewhere in the
caller */
+ if ( p2m->clean_pte )
+ clean_dcache_va_range(table, PAGE_SIZE);
+
+ unmap_domain_page(table);
+
+ /*
+ * Even if we failed, we should install the newly allocated PTE
+ * entry. The caller will be in charge to free the sub-tree.
+ */
+ p2m_write_pte(entry, page_to_p2m_table(p2m, page), p2m->clean_pte);
+
+ return rv;
+}
+
+static void p2m_put_foreign_page(struct page_info *pg)
+{
+ /*
+ * It's safe to do the put_page here because page_alloc will
+ * flush the TLBs if the page is reallocated before the end of
+ * this loop.
+ */
+ put_page(pg);
+}
+
+/* Put any references on the single 4K page referenced by mfn. */
+static void p2m_put_4k_page(mfn_t mfn, p2m_type_t type)
+{
+ /* TODO: Handle other p2m types */
+ if ( p2m_is_foreign(type) )
+ {
+ ASSERT(mfn_valid(mfn));
+ p2m_put_foreign_page(mfn_to_page(mfn));
+ }
+ /* Detect the xenheap page and mark the stored GFN as invalid. */
+ else if ( p2m_is_ram(type) && is_xen_heap_mfn(mfn) )
+ page_set_xenheap_gfn(mfn_to_page(mfn), INVALID_GFN);
+}
+
+/* Put any references on the superpage referenced by mfn. */
+static void p2m_put_2m_superpage(mfn_t mfn, p2m_type_t type)
+{
+ struct page_info *pg;
+ unsigned int i;
+
+ /*
+ * TODO: Handle other p2m types, but be aware that any changes to handle
+ * different types should require an update on the relinquish code to
handle
+ * preemption.
+ */
+ if ( !p2m_is_foreign(type) )
+ return;
+
+ ASSERT(mfn_valid(mfn));
+
+ pg = mfn_to_page(mfn);
+
+ for ( i = 0; i < XEN_PT_ENTRIES; i++, pg++ )
+ p2m_put_foreign_page(pg);
+}
+
+/* Put any references on the page referenced by pte. */
+static void p2m_put_page(struct p2m_domain *p2m, const pte_t pte,
+ unsigned int level)
+{
+ mfn_t mfn = pte_get_mfn(pte);
+ p2m_type_t p2m_type = p2m_type_radix_get(p2m, pte);
+
+ ASSERT(p2m_is_valid(p2m, pte));
+
+ /*
+ * TODO: Currently we don't handle level 2 super-page, Xen is not
+ * preemptible and therefore some work is needed to handle such
+ * superpages, for which at some point Xen might end up freeing memory
+ * and therefore for such a big mapping it could end up in a very long
+ * operation.
+ */
+ if ( level == 1 )
+ return p2m_put_2m_superpage(mfn, p2m_type);
+ else if ( level == 0 )
+ return p2m_put_4k_page(mfn, p2m_type);
+}
+
+static void p2m_free_page(struct domain *d, struct page_info *pg)
+{
+ if ( is_hardware_domain(d) )
+ free_domheap_page(pg);
+ else
+ {
+ spin_lock(&d->arch.paging.lock);
+ page_list_add_tail(pg, &d->arch.paging.p2m_freelist);
+ spin_unlock(&d->arch.paging.lock);
+ }
+}
+
+/* Free pte sub-tree behind an entry */
+static void p2m_free_entry(struct p2m_domain *p2m,
+ pte_t entry, unsigned int level)
+{
+ unsigned int i;
+ pte_t *table;
+ mfn_t mfn;
+ struct page_info *pg;
+
+ /* Nothing to do if the entry is invalid. */
+ if ( !p2m_is_valid(p2m, entry) )
+ return;
+
+ if ( p2m_is_superpage(p2m, entry, level) || (level == 0) )
+ {
+#ifdef CONFIG_IOREQ_SERVER
+ /*
+ * If this gets called then either the entry was replaced by an entry
+ * with a different base (valid case) or the shattering of a superpage
+ * has failed (error case).
+ * So, at worst, the spurious mapcache invalidation might be sent.
+ */
+ if ( p2m_is_ram( p2m_type_radix_get(p2m, entry)) &&
+ domain_has_ioreq_server(p2m->domain) )
+ ioreq_request_mapcache_invalidate(p2m->domain);
+#endif
+
+ p2m_put_page(p2m, entry, level);
+
+ return;
+ }
+
+ table = map_domain_page(pte_get_mfn(entry));
+ for ( i = 0; i < XEN_PT_ENTRIES; i++ )
+ p2m_free_entry(p2m, *(table + i), level - 1);
+
+ unmap_domain_page(table);
+
+ /*
+ * Make sure all the references in the TLB have been removed before
+ * freing the intermediate page table.
+ * XXX: Should we defer the free of the page table to avoid the
+ * flush?
+ */
+ p2m_tlb_flush_sync(p2m);
+
+ mfn = pte_get_mfn(entry);
+ ASSERT(mfn_valid(mfn));
+
+ pg = mfn_to_page(mfn);
+
+ page_list_del(pg, &p2m->pages);
+ p2m_free_page(p2m->domain, pg);
+}
+
+/*
+ * Insert an entry in the p2m. This should be called with a mapping
+ * equal to a page/superpage.
+ */
+static int __p2m_set_entry(struct p2m_domain *p2m,
+ gfn_t sgfn,
+ unsigned int page_order,
+ mfn_t smfn,
+ p2m_type_t t,
+ p2m_access_t a)
+{
+ unsigned int level;
+ unsigned int target = page_order / PAGETABLE_ORDER;
+ pte_t *entry, *table, orig_pte;
+ int rc;
+ /* A mapping is removed if the MFN is invalid. */
+ bool removing_mapping = mfn_eq(smfn, INVALID_MFN);
+ DECLARE_OFFSETS(offsets, gfn_to_gaddr(sgfn));
+
+ ASSERT(p2m_is_write_locked(p2m));
+
+ /*
+ * Check if the level target is valid: we only support
+ * 4K - 2M - 1G mapping.
+ */
+ ASSERT(target <= 2);
+
+ table = p2m_get_root_pointer(p2m, sgfn);
+ if ( !table )
+ return -EINVAL;
+
+ for ( level = P2M_ROOT_LEVEL; level > target; level-- )
+ {
+ /*
+ * Don't try to allocate intermediate page table if the mapping
+ * is about to be removed.
+ */
+ rc = p2m_next_level(p2m, !removing_mapping,
+ level, &table, offsets[level]);
+ if ( (rc == GUEST_TABLE_MAP_NONE) || (rc == GUEST_TABLE_MAP_NOMEM) )
+ {
+ /*
+ * We are here because p2m_next_level has failed to map
+ * the intermediate page table (e.g the table does not exist
+ * and they p2m tree is read-only). It is a valid case
+ * when removing a mapping as it may not exist in the
+ * page table. In this case, just ignore it.
+ */
+ rc = removing_mapping ? 0 : -ENOENT;
+ goto out;
+ }
+ else if ( rc != GUEST_TABLE_NORMAL )
+ break;
+ }
+
+ entry = table + offsets[level];
+
+ /*
+ * If we are here with level > target, we must be at a leaf node,
+ * and we need to break up the superpage.
+ */
+ if ( level > target )
+ {
+ /* We need to split the original page. */
+ pte_t split_pte = *entry;
+
+ ASSERT(p2m_is_superpage(p2m, *entry, level));
+
+ if ( !p2m_split_superpage(p2m, &split_pte, level, target, offsets) )
+ {
+ /* Free the allocated sub-tree */
+ p2m_free_entry(p2m, split_pte, level);
+
+ rc = -ENOMEM;
+ goto out;
+ }
+
+ /* Follow the break-before-sequence to update the entry. */
+ p2m_remove_pte(entry, p2m->clean_pte);
+ p2m_force_tlb_flush_sync(p2m);
+
+ p2m_write_pte(entry, split_pte, p2m->clean_pte);
+
+ /* Then move to the level we want to make real changes */
+ for ( ; level < target; level++ )
+ {
+ rc = p2m_next_level(p2m, true, level, &table, offsets[level]);
+
+ /*
+ * The entry should be found and either be a table
+ * or a superpage if level 0 is not targeted
+ */
+ ASSERT(rc == GUEST_TABLE_NORMAL ||
+ (rc == GUEST_TABLE_SUPER_PAGE && target > 0));
+ }
+
+ entry = table + offsets[level];
+ }
+
+ /*
+ * We should always be there with the correct level because
+ * all the intermediate tables have been installed if necessary.
+ */
+ ASSERT(level == target);
+
+ orig_pte = *entry;
+
+ /*
+ * The access type should always be p2m_access_rwx when the mapping
+ * is removed.
+ */
+ ASSERT(!mfn_eq(INVALID_MFN, smfn) || (a == p2m_access_rwx));
+
+ /*
+ * Always remove the entry in order to follow the break-before-make
+ * sequence when updating the translation table.
+ */
+ if ( pte_is_valid(orig_pte) || removing_mapping )
+ p2m_remove_pte(entry, p2m->clean_pte);
+
+ if ( removing_mapping )
+ /* Flush can be deferred if the entry is removed */
+ p2m->need_flush |= !!pte_is_valid(orig_pte);
+ else
+ {
+ pte_t pte = p2m_entry_from_mfn(p2m, smfn, t, a);
+
+ /*
+ * It is necessary to flush the TLB before writing the new entry
+ * to keep coherency when the previous entry was valid.
+ *
+ * Although, it could be defered when only the permissions are
+ * changed (e.g in case of memaccess).
+ */
+ if ( pte_is_valid(orig_pte) )
+ {
+ if ( P2M_CLEAR_PERM(pte) != P2M_CLEAR_PERM(orig_pte) )
+ p2m_force_tlb_flush_sync(p2m);
+ else
+ p2m->need_flush = true;
+ }
+
+ p2m_write_pte(entry, pte, p2m->clean_pte);
+
+ p2m->max_mapped_gfn = gfn_max(p2m->max_mapped_gfn,
+ gfn_add(sgfn, (1UL << page_order) - 1));
+ p2m->lowest_mapped_gfn = gfn_min(p2m->lowest_mapped_gfn, sgfn);
+ }
+
+#ifdef CONFIG_HAS_PASSTHROUGH
+ if ( is_iommu_enabled(p2m->domain) &&
+ (pte_is_valid(orig_pte) || pte_is_valid(*entry)) )
+ {
+ unsigned int flush_flags = 0;
+
+ if ( pte_is_valid(orig_pte) )
+ flush_flags |= IOMMU_FLUSHF_modified;
+ if ( pte_is_valid(*entry) )
+ flush_flags |= IOMMU_FLUSHF_added;
+
+ rc = iommu_iotlb_flush(p2m->domain, _dfn(gfn_x(sgfn)),
+ 1UL << page_order, flush_flags);
+ }
+ else
+#endif
+ rc = 0;
+
+ /*
+ * Free the entry only if the original pte was valid and the base
+ * is different (to avoid freeing when permission is changed).
+ */
+ if ( p2m_is_valid(p2m, orig_pte) &&
+ !mfn_eq(pte_get_mfn(*entry), pte_get_mfn(orig_pte)) )
+ p2m_free_entry(p2m, orig_pte, level);
+
+out:
+ unmap_domain_page(table);
+
+ return rc;
+}
+
+int p2m_set_entry(struct p2m_domain *p2m,
+ gfn_t sgfn,
+ unsigned long nr,
+ mfn_t smfn,
+ p2m_type_t t,
+ p2m_access_t a)
+{
+ int rc = 0;
+
+ /*
+ * Any reference taken by the P2M mappings (e.g. foreign mapping) will
+ * be dropped in relinquish_p2m_mapping(). As the P2M will still
+ * be accessible after, we need to prevent mapping to be added when the
+ * domain is dying.
+ */
+ if ( unlikely(p2m->domain->is_dying) )
+ return -ENOMEM;
+
+ while ( nr )
+ {
+ unsigned long mask;
+ unsigned long order = 0;
+ /* 1gb, 2mb, 4k mappings are supported */
+ unsigned int i = ( P2M_ROOT_LEVEL > 2 ) ? 2 : P2M_ROOT_LEVEL;
+
+ /*
+ * Don't take into account the MFN when removing mapping (i.e
+ * MFN_INVALID) to calculate the correct target order.
+ *
+ * XXX: Support superpage mappings if nr is not aligned to a
+ * superpage size.
+ */
+ mask = !mfn_eq(smfn, INVALID_MFN) ? mfn_x(smfn) : 0;
+ mask |= gfn_x(sgfn) | nr;
+
+ for ( ; i != 0; i-- )
+ {
+ if ( !(mask & (BIT(XEN_PT_LEVEL_ORDER(i), UL) - 1)) )
+ {
+ order = XEN_PT_LEVEL_ORDER(i);
+ break;
+ }
+ }
+
+ rc = __p2m_set_entry(p2m, sgfn, order, smfn, t, a);
+ if ( rc )
+ break;
+
+ sgfn = gfn_add(sgfn, (1 << order));
+ if ( !mfn_eq(smfn, INVALID_MFN) )
+ smfn = mfn_add(smfn, (1 << order));
+
+ nr -= (1 << order);
+ }
+
+ return rc;
+}
+
+static int p2m_insert_mapping(struct domain *d, gfn_t start_gfn,
+ unsigned long nr, mfn_t mfn, p2m_type_t t)
+{
+ struct p2m_domain *p2m = p2m_get_hostp2m(d);
+ int rc;
+
+ p2m_write_lock(p2m);
+ rc = p2m_set_entry(p2m, start_gfn, nr, mfn, t, p2m->default_access);
+ p2m_write_unlock(p2m);
+
+ return rc;
+}
+
+int map_regions_p2mt(struct domain *d,
+ gfn_t gfn,
+ unsigned long nr,
+ mfn_t mfn,
+ p2m_type_t p2mt)
+{
+ return p2m_insert_mapping(d, gfn, nr, mfn, p2mt);
+}
+
+int guest_physmap_add_entry(struct domain *d,
+ gfn_t gfn,
+ mfn_t mfn,
+ unsigned long page_order,
+ p2m_type_t t)
+{
+ return p2m_insert_mapping(d, gfn, (1 << page_order), mfn, t);
+}
--
2.49.0
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |