[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH 04/17] Introduce support for version 2 grant tables. Use them by default when available.



This doesn't include any of the new features, like copy grants or
transitive grants, but it does include most of the V2 infrastructure.

Signed-off-by: Steven Smith <steven.smith@xxxxxxxxxx>
---
 arch/x86/xen/grant-table.c          |   38 +++++++-
 drivers/xen/grant-table.c           |  177 +++++++++++++++++++++++++++++------
 include/xen/grant_table.h           |    8 +-
 include/xen/interface/grant_table.h |  125 +++++++++++++++++++++++-
 4 files changed, 308 insertions(+), 40 deletions(-)

diff --git a/arch/x86/xen/grant-table.c b/arch/x86/xen/grant-table.c
index 49ba9b5..77af9e9 100644
--- a/arch/x86/xen/grant-table.c
+++ b/arch/x86/xen/grant-table.c
@@ -54,6 +54,16 @@ static int map_pte_fn(pte_t *pte, struct page *pmd_page,
        return 0;
 }
 
+static int map_pte_fn_status(pte_t *pte, struct page *pmd_page,
+                            unsigned long addr, void *data)
+{
+       uint64_t **frames = (uint64_t **)data;
+
+       set_pte_at(&init_mm, addr, pte, mfn_pte((*frames)[0], PAGE_KERNEL));
+       (*frames)++;
+       return 0;
+}
+
 static int unmap_pte_fn(pte_t *pte, struct page *pmd_page,
                        unsigned long addr, void *data)
 {
@@ -64,10 +74,10 @@ static int unmap_pte_fn(pte_t *pte, struct page *pmd_page,
 
 int arch_gnttab_map_shared(unsigned long *frames, unsigned long nr_gframes,
                           unsigned long max_nr_gframes,
-                          struct grant_entry **__shared)
+                          void **__shared)
 {
        int rc;
-       struct grant_entry *shared = *__shared;
+       void *shared = *__shared;
 
        if (shared == NULL) {
                struct vm_struct *area =
@@ -83,8 +93,28 @@ int arch_gnttab_map_shared(unsigned long *frames, unsigned 
long nr_gframes,
        return rc;
 }
 
-void arch_gnttab_unmap_shared(struct grant_entry *shared,
-                             unsigned long nr_gframes)
+int arch_gnttab_map_status(uint64_t *frames, unsigned long nr_gframes,
+                          unsigned long max_nr_gframes,
+                          grant_status_t **__shared)
+{
+       int rc;
+       grant_status_t *shared = *__shared;
+
+       if (shared == NULL) {
+               struct vm_struct *area =
+                       xen_alloc_vm_area(PAGE_SIZE * max_nr_gframes);
+               BUG_ON(area == NULL);
+               shared = area->addr;
+               *__shared = shared;
+       }
+
+       rc = apply_to_page_range(&init_mm, (unsigned long)shared,
+                                PAGE_SIZE * nr_gframes,
+                                map_pte_fn_status, &frames);
+       return rc;
+}
+
+void arch_gnttab_unmap(void *shared, unsigned long nr_gframes)
 {
        apply_to_page_range(&init_mm, (unsigned long)shared,
                            PAGE_SIZE * nr_gframes, unmap_pte_fn, NULL);
diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c
index 52183aa..3ac29e3 100644
--- a/drivers/xen/grant-table.c
+++ b/drivers/xen/grant-table.c
@@ -49,7 +49,10 @@
 /* External tools reserve first few grant table entries. */
 #define NR_RESERVED_ENTRIES 8
 #define GNTTAB_LIST_END 0xffffffff
-#define GREFS_PER_GRANT_FRAME (PAGE_SIZE / sizeof(struct grant_entry))
+#define GREFS_PER_GRANT_FRAME \
+(grant_table_version == 1 ?                     \
+ (PAGE_SIZE / sizeof(struct grant_entry_v1)) :   \
+ (PAGE_SIZE / sizeof(struct grant_entry_v2)))
 
 static void pending_free_timer(unsigned long ignore);
 
@@ -64,13 +67,22 @@ static DEFINE_TIMER(gnttab_delayed_free_timer, 
pending_free_timer, 0, 0);
 static DEFINE_SPINLOCK(gnttab_pending_free_lock);
 static DEFINE_SPINLOCK(gnttab_list_lock);
 
-static struct grant_entry *shared;
+static union {
+       struct grant_entry_v1 *v1;
+       struct grant_entry_v2 *v2;
+       void *raw;
+} shared;
+
+static grant_status_t *grstatus;
 
 static struct gnttab_free_callback *gnttab_free_callback_list;
 
+static int grant_table_version;
+
 static int gnttab_expand(unsigned int req_entries);
 
 #define RPP (PAGE_SIZE / sizeof(grant_ref_t))
+#define SPP (PAGE_SIZE / sizeof(grant_status_t))
 
 static inline grant_ref_t *__gnttab_entry(grant_ref_t entry)
 {
@@ -150,15 +162,22 @@ static void update_grant_entry(grant_ref_t ref, domid_t 
domid,
         *  1. Write ent->domid.
         *  2. Write ent->frame:
         *      GTF_permit_access:   Frame to which access is permitted.
-        *      GTF_accept_transfer: Pseudo-phys frame slot being filled by new
-        *                           frame, or zero if none.
+        *      GTF_accept_transfer: Pseudo-phys frame slot being filled by
+        *                           new frame, or zero if none.
         *  3. Write memory barrier (WMB).
         *  4. Write ent->flags, inc. valid type.
         */
-       shared[ref].frame = frame;
-       shared[ref].domid = domid;
-       wmb();
-       shared[ref].flags = flags;
+       if (grant_table_version == 1) {
+               shared.v1[ref].frame = frame;
+               shared.v1[ref].domid = domid;
+               wmb();
+               shared.v1[ref].flags = flags;
+       } else {
+               shared.v2[ref].frame = frame;
+               shared.v2[ref].hdr.domid = domid;
+               wmb();
+               shared.v2[ref].hdr.flags = GTF_permit_access | flags;
+       }
 }
 
 /*
@@ -191,7 +210,10 @@ int gnttab_query_foreign_access(grant_ref_t ref)
 {
        u16 nflags;
 
-       nflags = shared[ref].flags;
+       if (grant_table_version == 1)
+               nflags = shared.v1[ref].flags;
+       else
+               nflags = grstatus[ref];
 
        return (nflags & (GTF_reading|GTF_writing));
 }
@@ -200,13 +222,37 @@ EXPORT_SYMBOL_GPL(gnttab_query_foreign_access);
 static int _gnttab_end_foreign_access_ref(grant_ref_t ref)
 {
        u16 flags, nflags;
-
-       nflags = shared[ref].flags;
-       do {
-               flags = nflags;
-               if (flags & (GTF_reading|GTF_writing))
+       u16 *pflags;
+
+       if (grant_table_version == 1) {
+               pflags = &shared.v1[ref].flags;
+               nflags = *pflags;
+               do {
+                       flags = nflags;
+                       if (flags & (GTF_reading|GTF_writing))
+                               return 0;
+                       nflags = sync_cmpxchg(&shared.v1[ref].flags, flags,
+                                             0);
+               } while (nflags != flags);
+       } else {
+               shared.v2[ref].hdr.flags = 0;
+               mb();
+               if (grstatus[ref] & (GTF_reading|GTF_writing)) {
                        return 0;
-       } while ((nflags = sync_cmpxchg(&shared[ref].flags, flags, 0)) != 
flags);
+               } else {
+                       /* The read of grstatus needs to have acquire
+                          semantics.  On x86, reads already have
+                          that, and we just need to protect against
+                          compiler reorderings.  On other
+                          architectures we may need a full
+                          barrier. */
+#ifdef CONFIG_X86
+                       barrier();
+#else
+                       mb();
+#endif
+               }
+       }
 
        return 1;
 }
@@ -333,25 +379,34 @@ unsigned long gnttab_end_foreign_transfer_ref(grant_ref_t 
ref)
 {
        unsigned long frame;
        u16           flags;
+       u16          *pflags;
+
+       if (grant_table_version == 1)
+               pflags = &shared.v1[ref].flags;
+       else
+               pflags = &shared.v2[ref].hdr.flags;
 
        /*
         * If a transfer is not even yet started, try to reclaim the grant
         * reference and return failure (== 0).
         */
-       while (!((flags = shared[ref].flags) & GTF_transfer_committed)) {
-               if (sync_cmpxchg(&shared[ref].flags, flags, 0) == flags)
+       while (!((flags = *pflags) & GTF_transfer_committed)) {
+               if (sync_cmpxchg(pflags, flags, 0) == flags)
                        return 0;
                cpu_relax();
        }
 
        /* If a transfer is in progress then wait until it is completed. */
        while (!(flags & GTF_transfer_completed)) {
-               flags = shared[ref].flags;
+               flags = *pflags;
                cpu_relax();
        }
 
        rmb();  /* Read the frame number /after/ reading completion status. */
-       frame = shared[ref].frame;
+       if (grant_table_version == 1)
+               frame = shared.v1[ref].frame;
+       else
+               frame = shared.v2[ref].frame;
        BUG_ON(frame == 0);
 
        return frame;
@@ -525,34 +580,98 @@ static inline unsigned int max_nr_grant_frames(void)
        return xen_max;
 }
 
+static unsigned nr_status_frames(unsigned nr_grant_frames)
+{
+       return (nr_grant_frames * GREFS_PER_GRANT_FRAME + SPP - 1) / SPP;
+}
+
+static void gnttab_request_version(void)
+{
+       int rc;
+       struct gnttab_set_version gsv;
+
+       gsv.version = 2;
+       rc = HYPERVISOR_grant_table_op(GNTTABOP_set_version, &gsv, 1);
+       if (rc == 0) {
+               grant_table_version = 2;
+               printk(KERN_NOTICE "Using V2 grant tables.\n");
+       } else {
+               if (grant_table_version == 2) {
+                       /* If we've already used version 2 features,
+                          but then suddenly discover that they're not
+                          available (e.g. migrating to an older
+                          version of Xen), almost unbounded badness
+                          can happen. */
+                       panic("we need grant tables version 2, but only version 
1 is available");
+               }
+               grant_table_version = 1;
+               printk(KERN_WARNING "Using legacy V1 grant tables; upgrade to a 
newer version of Xen.\n");
+       }
+}
+
 static int gnttab_map(unsigned int start_idx, unsigned int end_idx)
 {
        struct gnttab_setup_table setup;
-       unsigned long *frames;
+       unsigned long *gframes;
+       uint64_t *sframes;
        unsigned int nr_gframes = end_idx + 1;
+       unsigned int nr_sframes;
        int rc;
 
-       frames = kmalloc(nr_gframes * sizeof(unsigned long), GFP_ATOMIC);
-       if (!frames)
+       gframes = kmalloc(nr_gframes * sizeof(unsigned long), GFP_ATOMIC);
+       if (!gframes)
                return -ENOMEM;
 
        setup.dom        = DOMID_SELF;
        setup.nr_frames  = nr_gframes;
-       set_xen_guest_handle(setup.frame_list, frames);
+       set_xen_guest_handle(setup.frame_list, gframes);
 
        rc = HYPERVISOR_grant_table_op(GNTTABOP_setup_table, &setup, 1);
        if (rc == -ENOSYS) {
-               kfree(frames);
+               kfree(gframes);
                return -ENOSYS;
        }
 
        BUG_ON(rc || setup.status);
 
-       rc = arch_gnttab_map_shared(frames, nr_gframes, max_nr_grant_frames(),
-                                   &shared);
+       if (grant_table_version > 1) {
+               struct gnttab_get_status_frames getframes;
+
+               nr_sframes = nr_status_frames(nr_gframes);
+
+               sframes = kmalloc(nr_sframes  * sizeof(uint64_t),
+                                 GFP_ATOMIC);
+               if (!sframes) {
+                       kfree(gframes);
+                       return -ENOMEM;
+               }
+               getframes.dom        = DOMID_SELF;
+               getframes.nr_frames  = nr_sframes;
+               getframes.frame_list = (unsigned long)sframes;
+
+               rc = HYPERVISOR_grant_table_op(GNTTABOP_get_status_frames,
+                                              &getframes, 1);
+               if (rc == -ENOSYS) {
+                       kfree(gframes);
+                       kfree(sframes);
+                       return -ENOSYS;
+               }
+
+               BUG_ON(rc || getframes.status);
+
+               rc = arch_gnttab_map_status(
+                       sframes, nr_sframes,
+                       nr_status_frames(max_nr_grant_frames()),
+                       &grstatus);
+               BUG_ON(rc);
+               kfree(sframes);
+       }
+
+       rc = arch_gnttab_map_shared(gframes, nr_gframes, max_nr_grant_frames(),
+                                   &shared.raw);
        BUG_ON(rc);
 
-       kfree(frames);
+       kfree(gframes);
 
        return 0;
 }
@@ -663,6 +782,7 @@ EXPORT_SYMBOL_GPL(gnttab_reset_grant_page);
 
 int gnttab_resume(void)
 {
+       gnttab_request_version();
        if (max_nr_grant_frames() < nr_grant_frames)
                return -ENOSYS;
        return gnttab_map(0, nr_grant_frames - 1);
@@ -670,7 +790,8 @@ int gnttab_resume(void)
 
 int gnttab_suspend(void)
 {
-       arch_gnttab_unmap_shared(shared, nr_grant_frames);
+       arch_gnttab_unmap(shared.raw, nr_grant_frames);
+       arch_gnttab_unmap(grstatus, nr_status_frames(nr_grant_frames));
        return 0;
 }
 
diff --git a/include/xen/grant_table.h b/include/xen/grant_table.h
index b89ee8a..1ebfbd9 100644
--- a/include/xen/grant_table.h
+++ b/include/xen/grant_table.h
@@ -149,9 +149,11 @@ gnttab_set_unmap_op(struct gnttab_unmap_grant_ref *unmap, 
unsigned long addr,
 
 int arch_gnttab_map_shared(unsigned long *frames, unsigned long nr_gframes,
                           unsigned long max_nr_gframes,
-                          struct grant_entry **__shared);
-void arch_gnttab_unmap_shared(struct grant_entry *shared,
-                             unsigned long nr_gframes);
+                          void **__shared);
+int arch_gnttab_map_status(uint64_t *frames, unsigned long nr_gframes,
+                          unsigned long max_nr_gframes,
+                          grant_status_t **__shared);
+void arch_gnttab_unmap(void *shared, unsigned long nr_gframes);
 
 #define gnttab_map_vaddr(map) ((void *)(map.host_virt_addr))
 
diff --git a/include/xen/interface/grant_table.h 
b/include/xen/interface/grant_table.h
index 8211af8..653f8c7 100644
--- a/include/xen/interface/grant_table.h
+++ b/include/xen/interface/grant_table.h
@@ -84,12 +84,22 @@
  */
 
 /*
+ * Reference to a grant entry in a specified domain's grant table.
+ */
+typedef uint32_t grant_ref_t;
+
+/*
  * A grant table comprises a packed array of grant entries in one or more
  * page frames shared between Xen and a guest.
  * [XEN]: This field is written by Xen and read by the sharing guest.
  * [GST]: This field is written by the guest and read by Xen.
  */
-struct grant_entry {
+
+/*
+ * Version 1 of the grant table entry structure is maintained purely
+ * for backwards compatibility.  New guests should use version 2.
+ */
+struct grant_entry_v1 {
     /* GTF_xxx: various type and flag information.  [XEN,GST] */
     uint16_t flags;
     /* The domain being granted foreign privileges. [GST] */
@@ -107,10 +117,13 @@ struct grant_entry {
  *  GTF_permit_access: Allow @domid to map/access @frame.
  *  GTF_accept_transfer: Allow @domid to transfer ownership of one page frame
  *                       to this guest. Xen writes the page number to @frame.
+ *  GTF_transitive: Allow @domid to transitively access a subrange of
+ *                  @trans_grant in @trans_domid.  No mappings are allowed.
  */
 #define GTF_invalid         (0U<<0)
 #define GTF_permit_access   (1U<<0)
 #define GTF_accept_transfer (2U<<0)
+#define GTF_transitive      (3U<<0)
 #define GTF_type_mask       (3U<<0)
 
 /*
@@ -118,6 +131,9 @@ struct grant_entry {
  *  GTF_readonly: Restrict @domid to read-only mappings and accesses. [GST]
  *  GTF_reading: Grant entry is currently mapped for reading by @domid. [XEN]
  *  GTF_writing: Grant entry is currently mapped for writing by @domid. [XEN]
+ *  GTF_sub_page: Grant access to only a subrange of the page.  @domid
+ *                will only be allowed to copy from the grant, and not
+ *                map it. [GST]
  */
 #define _GTF_readonly       (2)
 #define GTF_readonly        (1U<<_GTF_readonly)
@@ -125,6 +141,8 @@ struct grant_entry {
 #define GTF_reading         (1U<<_GTF_reading)
 #define _GTF_writing        (4)
 #define GTF_writing         (1U<<_GTF_writing)
+#define _GTF_sub_page       (8)
+#define GTF_sub_page        (1U<<_GTF_sub_page)
 
 /*
  * Subflags for GTF_accept_transfer:
@@ -141,15 +159,75 @@ struct grant_entry {
 #define _GTF_transfer_completed (3)
 #define GTF_transfer_completed  (1U<<_GTF_transfer_completed)
 
+/*
+ * Version 2 grant table entries.  These fulfil the same role as
+ * version 1 entries, but can represent more complicated operations.
+ * Any given domain will have either a version 1 or a version 2 table,
+ * and every entry in the table will be the same version.
+ *
+ * The interface by which domains use grant references does not depend
+ * on the grant table version in use by the other domain.
+ */
 
-/***********************************
- * GRANT TABLE QUERIES AND USES
+/*
+ * Version 1 and version 2 grant entries share a common prefix.  The
+ * fields of the prefix are documented as part of struct
+ * grant_entry_v1.
  */
+struct grant_entry_header {
+    uint16_t flags;
+    domid_t  domid;
+};
+typedef struct grant_entry_header grant_entry_header_t;
 
 /*
- * Reference to a grant entry in a specified domain's grant table.
+ * Version 2 of the grant entry structure.
+ */
+struct grant_entry_v2 {
+    grant_entry_header_t hdr;
+    union {
+        /*
+         * The frame to which we are granting access.  This field has
+         * the same meaning as the grant_entry_v1 field of the same
+         * name.
+         */
+        uint32_t frame;
+
+        /*
+         * If the grant type is GTF_grant_access and GTF_sub_page is
+         * set, @domid is allowed to access bytes
+         * [@page_off,@page_off+@length) in frame @frame.
+         */
+        struct {
+            uint32_t frame;
+            uint16_t page_off;
+            uint16_t length;
+        } sub_page;
+
+        /*
+         * If the grant is GTF_transitive, @domid is allowed to use
+         * the grant @gref in domain @trans_domid, as if it was the
+         * local domain.  Obviously, the transitive access must be
+         * compatible with the original grant.
+         *
+         * The current version of Xen does not allow transitive grants
+         * to be mapped.
+         */
+        struct {
+            domid_t trans_domid;
+            uint16_t pad0;
+            grant_ref_t gref;
+        } transitive;
+
+        uint32_t __spacer[3]; /* Pad to a power of two */
+    };
+};
+
+typedef uint16_t grant_status_t;
+
+/***********************************
+ * GRANT TABLE QUERIES AND USES
  */
-typedef uint32_t grant_ref_t;
 
 /*
  * Handle to track a mapping created via a grant reference.
@@ -343,6 +421,43 @@ struct gnttab_unmap_and_replace {
 DEFINE_GUEST_HANDLE_STRUCT(gnttab_unmap_and_replace);
 
 /*
+ * GNTTABOP_set_version: Request a particular version of the grant
+ * table shared table structure.  This operation can only be performed
+ * once in any given domain.  It must be performed before any grants
+ * are activated; otherwise, the domain will be stuck with version 1.
+ * The only defined versions are 1 and 2.
+ */
+#define GNTTABOP_set_version          8
+struct gnttab_set_version {
+    /* IN parameters */
+    uint32_t version;
+};
+DEFINE_GUEST_HANDLE_STRUCT(gnttab_set_version);
+
+/*
+ * GNTTABOP_get_status_frames: Get the list of frames used to store grant
+ * status for <dom>. In grant format version 2, the status is separated 
+ * from the other shared grant fields to allow more efficient synchronization 
+ * using barriers instead of atomic cmpexch operations.
+ * <nr_frames> specify the size of vector <frame_list>.
+ * The frame addresses are returned in the <frame_list>.
+ * Only <nr_frames> addresses are returned, even if the table is larger.
+ * NOTES:
+ *  1. <dom> may be specified as DOMID_SELF.
+ *  2. Only a sufficiently-privileged domain may specify <dom> != DOMID_SELF.
+  */
+#define GNTTABOP_get_status_frames     9
+struct gnttab_get_status_frames {
+    /* IN parameters. */
+    uint32_t nr_frames;
+    domid_t  dom;
+    /* OUT parameters. */
+    int16_t  status;              /* GNTST_* */
+    uint64_t frame_list;
+};
+DEFINE_GUEST_HANDLE_STRUCT(gnttab_get_status_frames);
+
+/*
  * Bitfield values for update_pin_status.flags.
  */
  /* Map the grant entry for access by I/O devices. */
-- 
1.6.3.1


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.