[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH v4 11/16] xen/arm: Data abort exception (R/W) mem_events.



This patch enables to store, set, check and deliver LPAE R/W mem_events.
As the LPAE PTE's lack enough available software programmable bits,
we store the permissions in a Radix tree, where the key is the pfn
of a 4k page. Only settings other than p2m_access_rwx are saved
in the Radix tree.

Signed-off-by: Tamas K Lengyel <tklengyel@xxxxxxxxxxxxx>
---
v4: - Add p2m_mem_access_radix_set function to be called
      when inserting new PTE's and when updating existing entries.
    - Switch p2m_mem_access_check to return bool_t.
    - Use new struct npfec to pass violation info.
v3: - Add new function for updating the PTE entries, p2m_set_entry.
    - Use the new struct npfec to pass violation information.
    - Implement n2rwx, rx2rw and listener required routines.
v2: - Patch been split to ease the review process.
    - Add definitions of data abort data fetch status codes (enum dabt_dfsc)
      and only call p2m_mem_access_check for traps caused by permission 
violations.
    - Only call p2m_write_pte in p2m_lookup if the PTE permission actually 
changed.
    - Properly save settings in the Radix tree and pause the VCPU with
      mem_event_vcpu_pause.
---
 xen/arch/arm/p2m.c              | 443 +++++++++++++++++++++++++++++++++++++---
 xen/arch/arm/traps.c            |  31 ++-
 xen/include/asm-arm/p2m.h       |  17 ++
 xen/include/asm-arm/processor.h |  30 +++
 4 files changed, 487 insertions(+), 34 deletions(-)

diff --git a/xen/arch/arm/p2m.c b/xen/arch/arm/p2m.c
index 29e03b6..c723f9a 100644
--- a/xen/arch/arm/p2m.c
+++ b/xen/arch/arm/p2m.c
@@ -10,6 +10,7 @@
 #include <asm/event.h>
 #include <asm/hardirq.h>
 #include <asm/page.h>
+#include <xen/radix-tree.h>
 #include <xen/mem_event.h>
 #include <public/mem_event.h>
 #include <xen/mem_access.h>
@@ -148,6 +149,74 @@ static lpae_t *p2m_map_first(struct p2m_domain *p2m, 
paddr_t addr)
     return __map_domain_page(page);
 }
 
+static void p2m_set_permission(lpae_t *e, p2m_type_t t, p2m_access_t a)
+{
+    /* First apply type permissions */
+    switch ( t )
+    {
+    case p2m_ram_rw:
+        e->p2m.xn = 0;
+        e->p2m.write = 1;
+        break;
+
+    case p2m_ram_ro:
+        e->p2m.xn = 0;
+        e->p2m.write = 0;
+        break;
+
+    case p2m_iommu_map_rw:
+    case p2m_map_foreign:
+    case p2m_grant_map_rw:
+    case p2m_mmio_direct:
+        e->p2m.xn = 1;
+        e->p2m.write = 1;
+        break;
+
+    case p2m_iommu_map_ro:
+    case p2m_grant_map_ro:
+    case p2m_invalid:
+        e->p2m.xn = 1;
+        e->p2m.write = 0;
+        break;
+
+    case p2m_max_real_type:
+        BUG();
+        break;
+    }
+
+    /* Then restrict with access permissions */
+    switch( a )
+    {
+    case p2m_access_n:
+        e->p2m.read = e->p2m.write = 0;
+        e->p2m.xn = 1;
+        break;
+    case p2m_access_r:
+        e->p2m.write = 0;
+        e->p2m.xn = 1;
+        break;
+    case p2m_access_x:
+        e->p2m.write = 0;
+        e->p2m.read = 0;
+        break;
+    case p2m_access_rx:
+        e->p2m.write = 0;
+        break;
+    case p2m_access_w:
+        e->p2m.read = 0;
+        e->p2m.xn = 1;
+        break;
+    case p2m_access_rw:
+        e->p2m.xn = 1;
+        break;
+    case p2m_access_wx:
+        e->p2m.read = 0;
+        break;
+    case p2m_access_rwx:
+        break;
+    }
+}
+
 /*
  * Lookup the MFN corresponding to a domain's PFN.
  *
@@ -258,37 +327,7 @@ static lpae_t mfn_to_p2m_entry(unsigned long mfn, unsigned 
int mattr,
         break;
     }
 
-    switch (t)
-    {
-    case p2m_ram_rw:
-        e.p2m.xn = 0;
-        e.p2m.write = 1;
-        break;
-
-    case p2m_ram_ro:
-        e.p2m.xn = 0;
-        e.p2m.write = 0;
-        break;
-
-    case p2m_iommu_map_rw:
-    case p2m_map_foreign:
-    case p2m_grant_map_rw:
-    case p2m_mmio_direct:
-        e.p2m.xn = 1;
-        e.p2m.write = 1;
-        break;
-
-    case p2m_iommu_map_ro:
-    case p2m_grant_map_ro:
-    case p2m_invalid:
-        e.p2m.xn = 1;
-        e.p2m.write = 0;
-        break;
-
-    case p2m_max_real_type:
-        BUG();
-        break;
-    }
+    p2m_set_permission(&e, t, a);
 
     ASSERT(!(pa & ~PAGE_MASK));
     ASSERT(!(pa & ~PADDR_MASK));
@@ -440,6 +479,33 @@ static bool_t is_mapping_aligned(const paddr_t 
start_gpaddr,
     return true;
 }
 
+static long p2m_mem_access_radix_set(struct p2m_domain *p2m, unsigned long pfn,
+                                     p2m_access_t a)
+{
+    long rc;
+
+    if ( p2m_access_rwx == a )
+    {
+        radix_tree_delete(&p2m->mem_access_settings, pfn);
+        return 0;
+    }
+
+    rc = radix_tree_insert(&p2m->mem_access_settings, pfn,
+                           radix_tree_int_to_ptr(a));
+
+    if ( -EEXIST == rc )
+    {
+        /* If a setting existed already, change it to the new one */
+        radix_tree_replace_slot(
+            radix_tree_lookup_slot(
+                &p2m->mem_access_settings, pfn),
+            radix_tree_int_to_ptr(a));
+        rc = 0;
+    }
+
+    return rc;
+}
+
 #define P2M_ONE_DESCEND        0
 #define P2M_ONE_PROGRESS_NOP   0x1
 #define P2M_ONE_PROGRESS       0x10
@@ -502,6 +568,14 @@ static int apply_one_level(struct domain *d,
                 pte = mfn_to_p2m_entry(page_to_mfn(page), mattr, t, a);
                 if ( level < 3 )
                     pte.p2m.table = 0;
+
+                if ( p2m_mem_access_radix_set(p2m, paddr_to_pfn(*addr), a) < 0 
)
+                {
+                    /* If we fail to save the setting in the Radix tree, we
+                     * need to clear the PTE mem_access permissions. */
+                    p2m_set_permission(&pte, pte.p2m.type, p2m_access_rwx);
+                }
+
                 p2m_write_pte(entry, pte, flush_cache);
                 p2m->stats.mappings[level]++;
 
@@ -538,6 +612,13 @@ static int apply_one_level(struct domain *d,
             if ( level < 3 )
                 pte.p2m.table = 0; /* Superpage entry */
 
+            if ( p2m_mem_access_radix_set(p2m, paddr_to_pfn(*addr), a) < 0 )
+            {
+                /* If we fail to save the setting in the Radix tree, we
+                 * need to clear the PTE mem_access permissions. */
+                p2m_set_permission(&pte, pte.p2m.type, p2m_access_rwx);
+            }
+
             p2m_write_pte(entry, pte, flush_cache);
 
             *flush |= p2m_valid(orig_pte);
@@ -641,6 +722,7 @@ static int apply_one_level(struct domain *d,
 
         memset(&pte, 0x00, sizeof(pte));
         p2m_write_pte(entry, pte, flush_cache);
+        radix_tree_delete(&p2m->mem_access_settings, paddr_to_pfn(*addr));
 
         *addr += level_size;
 
@@ -1081,6 +1163,305 @@ err:
     return page;
 }
 
+static int p2m_set_entry(struct domain *d, paddr_t paddr, p2m_access_t p2ma)
+{
+    int rc = 0;
+    struct p2m_domain *p2m = &d->arch.p2m;
+    lpae_t *pte, *first = NULL, *second = NULL, *third = NULL;
+
+    first = p2m_map_first(p2m, paddr);
+    if ( !first )
+        goto out;
+
+    pte = &first[first_table_offset(paddr)];
+    if( !p2m_valid(*pte) )
+        goto out;
+
+    if ( !p2m_table(*pte) )
+        goto done;
+
+    second = map_domain_page(pte->p2m.base);
+    if ( !second )
+        goto out;
+
+    pte = &second[second_table_offset(paddr)];
+    if( !p2m_valid(*pte) )
+        goto out;
+
+    if ( !p2m_table(*pte) )
+        goto done;
+
+    third = map_domain_page(pte->p2m.base);
+    if ( !third )
+        goto out;
+
+    pte = &third[third_table_offset(paddr)];
+
+    BUILD_BUG_ON(THIRD_MASK != PAGE_MASK);
+
+    /* This bit must be one in the level 3 entry */
+    if ( !p2m_table(*pte) )
+        pte->bits = 0;
+
+done:
+    if ( p2m_valid(*pte) )
+    {
+        ASSERT(pte->p2m.type != p2m_invalid);
+
+        p2m_set_permission(pte, pte->p2m.type, p2ma);
+        p2m_write_pte(pte, *pte, 1);
+        rc = 1;
+    }
+
+out:
+    if (third) unmap_domain_page(third);
+    if (second) unmap_domain_page(second);
+    if (first) unmap_domain_page(first);
+
+    return rc;
+}
+
+bool_t p2m_mem_access_check(paddr_t gpa, vaddr_t gla, struct npfec npfec)
+{
+    struct vcpu *v = current;
+    struct p2m_domain *p2m = p2m_get_hostp2m(v->domain);
+    mem_event_request_t *req = NULL;
+    xenmem_access_t xma;
+    bool_t violation;
+    int rc;
+
+    rc = p2m_get_mem_access(v->domain, paddr_to_pfn(gpa), &xma);
+    if ( rc )
+    {
+        /* No setting was found, reinject */
+        return 1;
+    }
+    else
+    {
+        /* First, handle rx2rw and n2rwx conversion automatically. */
+        if ( npfec.write_access && xma == XENMEM_access_rx2rw )
+        {
+            rc = p2m_set_mem_access(v->domain, paddr_to_pfn(gpa), 1,
+                                    0, ~0, XENMEM_access_rw);
+            return 0;
+        }
+        else if ( xma == XENMEM_access_n2rwx )
+        {
+            rc = p2m_set_mem_access(v->domain, paddr_to_pfn(gpa), 1,
+                                    0, ~0, XENMEM_access_rwx);
+        }
+    }
+
+    /* Otherwise, check if there is a memory event listener, and send the 
message along */
+    if ( !mem_event_check_ring( &v->domain->mem_event->access ) )
+    {
+        /* No listener */
+        if ( p2m->access_required )
+        {
+            gdprintk(XENLOG_INFO, "Memory access permissions failure, "
+                                  "no mem_event listener VCPU %d, dom %d\n",
+                                  v->vcpu_id, v->domain->domain_id);
+            domain_crash(v->domain);
+        }
+        else
+        {
+            /* n2rwx was already handled */
+            if ( xma != XENMEM_access_n2rwx)
+            {
+                /* A listener is not required, so clear the access
+                 * restrictions. */
+                rc = p2m_set_mem_access(v->domain, paddr_to_pfn(gpa), 1,
+                                        0, ~0, XENMEM_access_rwx);
+            }
+        }
+
+        /* No need to reinject */
+        return 0;
+    }
+
+    switch ( xma )
+    {
+    default:
+    case XENMEM_access_n:
+        violation = npfec.read_access || npfec.write_access || 
npfec.insn_fetch;
+        break;
+    case XENMEM_access_r:
+        violation = npfec.write_access || npfec.insn_fetch;
+        break;
+    case XENMEM_access_w:
+        violation = npfec.read_access || npfec.insn_fetch;
+        break;
+    case XENMEM_access_x:
+        violation = npfec.read_access || npfec.write_access;
+        break;
+    case XENMEM_access_rx:
+        violation = npfec.write_access;
+        break;
+    case XENMEM_access_wx:
+        violation = npfec.read_access;
+        break;
+    case XENMEM_access_rw:
+        violation = npfec.insn_fetch;
+        break;
+    case XENMEM_access_rwx:
+        violation = 0;
+        break;
+    }
+
+    if ( !violation )
+        return 1;
+
+    req = xzalloc(mem_event_request_t);
+    if ( req )
+    {
+        req->reason = MEM_EVENT_REASON_VIOLATION;
+        if ( xma != XENMEM_access_n2rwx )
+            req->flags |= MEM_EVENT_FLAG_VCPU_PAUSED;
+        req->gfn = gpa >> PAGE_SHIFT;
+        req->offset =  gpa & ((1 << PAGE_SHIFT) - 1);
+        req->gla = gla;
+        req->gla_valid = npfec.gla_valid;
+        req->access_r = npfec.read_access;
+        req->access_w = npfec.write_access;
+        req->access_x = npfec.insn_fetch;
+        if ( npfec_kind_in_gpt == npfec.kind )
+            req->fault_in_gpt = 1;
+        if ( npfec_kind_with_gla == npfec.kind )
+            req->fault_with_gla = 1;
+        req->vcpu_id = v->vcpu_id;
+
+        mem_access_send_req(v->domain, req);
+        xfree(req);
+    }
+
+    /* Pause the current VCPU */
+    if ( xma != XENMEM_access_n2rwx )
+        mem_event_vcpu_pause(v);
+
+    return 0;
+}
+
+/* Set access type for a region of pfns.
+ * If start_pfn == -1ul, sets the default access type */
+long p2m_set_mem_access(struct domain *d, unsigned long pfn, uint32_t nr,
+                        uint32_t start, uint32_t mask, xenmem_access_t access)
+{
+    struct p2m_domain *p2m = p2m_get_hostp2m(d);
+    p2m_access_t a;
+    long rc = 0;
+
+    static const p2m_access_t memaccess[] = {
+#define ACCESS(ac) [XENMEM_access_##ac] = p2m_access_##ac
+        ACCESS(n),
+        ACCESS(r),
+        ACCESS(w),
+        ACCESS(rw),
+        ACCESS(x),
+        ACCESS(rx),
+        ACCESS(wx),
+        ACCESS(rwx),
+#undef ACCESS
+    };
+
+    switch ( access )
+    {
+    case 0 ... ARRAY_SIZE(memaccess) - 1:
+        a = memaccess[access];
+        break;
+    case XENMEM_access_default:
+        a = p2m->default_access;
+        break;
+    default:
+        return -EINVAL;
+    }
+
+    /* If request to set default access */
+    if ( pfn == ~0ul )
+    {
+        p2m->default_access = a;
+        return 0;
+    }
+
+    spin_lock(&p2m->lock);
+    for ( pfn += start; nr > start; ++pfn )
+    {
+
+        rc = p2m_set_entry(d, pfn_to_paddr(pfn), a);
+        if ( rc < 0 )
+            break;
+
+        if ( rc )
+        {
+            rc = p2m_mem_access_radix_set(p2m, pfn, a);
+            if ( rc < 0 )
+            {
+                /* If we fail to save the setting in the Radix tree, we
+                 * need to clear the PTE mem_access permissions. */
+                p2m_set_entry(d, pfn_to_paddr(pfn), p2m_access_rwx);
+                break;
+            }
+        }
+
+        /* Check for continuation if it's not the last iteration. */
+        if ( nr > ++start && !(start & mask) && hypercall_preempt_check() )
+        {
+            rc = start;
+            break;
+        }
+    }
+
+    /* Flush the TLB of the domain to ensure consistency */
+    flush_tlb_domain(d);
+
+    spin_unlock(&p2m->lock);
+    return rc;
+}
+
+int p2m_get_mem_access(struct domain *d, unsigned long gpfn,
+                       xenmem_access_t *access)
+{
+    struct p2m_domain *p2m = p2m_get_hostp2m(d);
+    void *i;
+    unsigned int index;
+
+    static const xenmem_access_t memaccess[] = {
+#define ACCESS(ac) [XENMEM_access_##ac] = XENMEM_access_##ac
+            ACCESS(n),
+            ACCESS(r),
+            ACCESS(w),
+            ACCESS(rw),
+            ACCESS(x),
+            ACCESS(rx),
+            ACCESS(wx),
+            ACCESS(rwx),
+#undef ACCESS
+    };
+
+    /* If request to get default access */
+    if ( gpfn == ~0ull )
+    {
+        *access = memaccess[p2m->default_access];
+        return 0;
+    }
+
+    spin_lock(&p2m->lock);
+
+    i = radix_tree_lookup(&p2m->mem_access_settings, gpfn);
+
+    spin_unlock(&p2m->lock);
+
+    if ( !i )
+        return -ESRCH;
+
+    index = radix_tree_ptr_to_int(i);
+
+    if ( index >= ARRAY_SIZE(memaccess) )
+        return -ERANGE;
+
+    *access =  memaccess[index];
+    return 0;
+}
+
 /*
  * Local variables:
  * mode: C
diff --git a/xen/arch/arm/traps.c b/xen/arch/arm/traps.c
index 019991f..7eb875a 100644
--- a/xen/arch/arm/traps.c
+++ b/xen/arch/arm/traps.c
@@ -1852,13 +1852,38 @@ static void do_trap_data_abort_guest(struct 
cpu_user_regs *regs,
     info.gva = READ_SYSREG64(FAR_EL2);
 #endif
 
-    if (dabt.s1ptw)
-        goto bad_data_abort;
-
     rc = gva_to_ipa(info.gva, &info.gpa);
     if ( rc == -EFAULT )
         goto bad_data_abort;
 
+    switch ( dabt.dfsc )
+    {
+    case DABT_DFSC_PERMISSION_1:
+    case DABT_DFSC_PERMISSION_2:
+    case DABT_DFSC_PERMISSION_3:
+    {
+        struct npfec npfec = {
+            .read_access = 1,
+            .write_access = dabt.write,
+            .gla_valid = 1,
+            .kind = dabt.s1ptw ? npfec_kind_in_gpt : npfec_kind_with_gla
+        };
+
+        rc = p2m_mem_access_check(info.gpa, info.gva, npfec);
+
+        /* Trap was triggered by mem_access, work here is done */
+        if ( !rc )
+            return;
+    }
+    break;
+
+    default:
+        break;
+    }
+
+    if ( dabt.s1ptw )
+        goto bad_data_abort;
+
     /* XXX: Decode the instruction if ISS is not valid */
     if ( !dabt.valid )
         goto bad_data_abort;
diff --git a/xen/include/asm-arm/p2m.h b/xen/include/asm-arm/p2m.h
index b2009ee..35466dd 100644
--- a/xen/include/asm-arm/p2m.h
+++ b/xen/include/asm-arm/p2m.h
@@ -247,6 +247,23 @@ static inline bool_t p2m_mem_event_sanity_check(struct 
domain *d)
     return 1;
 }
 
+/* Send mem event based on the access (gla is -1ull if not available). Boolean
+ * return value indicates if trap needs to be injected into guest. */
+bool_t p2m_mem_access_check(paddr_t gpa, vaddr_t gla, struct npfec npfec);
+
+/* Resumes the running of the VCPU, restarting the last instruction */
+void p2m_mem_access_resume(struct domain *d);
+
+/* Set access type for a region of pfns.
+ * If start_pfn == -1ul, sets the default access type */
+long p2m_set_mem_access(struct domain *d, unsigned long start_pfn, uint32_t nr,
+                        uint32_t start, uint32_t mask, xenmem_access_t access);
+
+/* Get access type for a pfn
+ * If pfn == -1ul, gets the default access type */
+int p2m_get_mem_access(struct domain *d, unsigned long pfn,
+                       xenmem_access_t *access);
+
 #endif /* _XEN_P2M_H */
 
 /*
diff --git a/xen/include/asm-arm/processor.h b/xen/include/asm-arm/processor.h
index 0cc5b6d..b844f1d 100644
--- a/xen/include/asm-arm/processor.h
+++ b/xen/include/asm-arm/processor.h
@@ -262,6 +262,36 @@ enum dabt_size {
     DABT_DOUBLE_WORD = 3,
 };
 
+/* Data abort data fetch status codes */
+enum dabt_dfsc {
+    DABT_DFSC_ADDR_SIZE_0       = 0b000000,
+    DABT_DFSC_ADDR_SIZE_1       = 0b000001,
+    DABT_DFSC_ADDR_SIZE_2       = 0b000010,
+    DABT_DFSC_ADDR_SIZE_3       = 0b000011,
+    DABT_DFSC_TRANSLATION_0     = 0b000100,
+    DABT_DFSC_TRANSLATION_1     = 0b000101,
+    DABT_DFSC_TRANSLATION_2     = 0b000110,
+    DABT_DFSC_TRANSLATION_3     = 0b000111,
+    DABT_DFSC_ACCESS_1          = 0b001001,
+    DABT_DFSC_ACCESS_2          = 0b001010,
+    DABT_DFSC_ACCESS_3          = 0b001011,
+    DABT_DFSC_PERMISSION_1      = 0b001101,
+    DABT_DFSC_PERMISSION_2      = 0b001110,
+    DABT_DFSC_PERMISSION_3      = 0b001111,
+    DABT_DFSC_SYNC_EXT          = 0b010000,
+    DABT_DFSC_SYNC_PARITY       = 0b011000,
+    DABT_DFSC_SYNC_EXT_TTW_0    = 0b010100,
+    DABT_DFSC_SYNC_EXT_TTW_1    = 0b010101,
+    DABT_DFSC_SYNC_EXT_TTW_2    = 0b010110,
+    DABT_DFSC_SYNC_EXT_TTW_3    = 0b010111,
+    DABT_DFSC_SYNC_PARITY_TTW_0 = 0b011100,
+    DABT_DFSC_SYNC_PARITY_TTW_1 = 0b011101,
+    DABT_DFSC_SYNC_PARITY_TTW_2 = 0b011110,
+    DABT_DFSC_SYNC_PARITY_TTW_3 = 0b011111,
+    DABT_DFSC_ALIGNMENT         = 0b100001,
+    DABT_DFSC_TLB_CONFLICT      = 0b110000,
+};
+
 union hsr {
     uint32_t bits;
     struct {
-- 
2.1.0


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.