>From linux-kernel-owner@xxxxxxxxxxxxxxx Tue Jul 26 12:50:37 2011 Return-Path: Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by andromeda.dapyr.net (8.13.4/8.13.4/Debian-3sarge3) with ESMTP id p6QGnwvD013486 for ; Tue, 26 Jul 2011 12:50:34 -0400 Received: (majordomo@xxxxxxxxxxxxxxx) by vger.kernel.org via listexpand id S1753921Ab1GZQt4 (ORCPT ); Tue, 26 Jul 2011 12:49:56 -0400 Received: from smtp02.citrix.com ([66.165.176.63]:8832 "EHLO SMTP02.CITRIX.COM" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1753546Ab1GZQtv (ORCPT ); Tue, 26 Jul 2011 12:49:51 -0400 X-IronPort-AV: E=Sophos;i="4.67,270,1309752000"; d="scan'208";a="156708068" Received: from ftlpmailmx02.citrite.net ([10.13.107.66]) by FTLPIPO02.CITRIX.COM with ESMTP/TLS/RC4-MD5; 26 Jul 2011 12:49:50 -0400 Received: from smtp01.ad.xensource.com (10.219.128.104) by smtprelay.citrix.com (10.13.107.66) with Microsoft SMTP Server id 8.3.137.0; Tue, 26 Jul 2011 12:49:50 -0400 Received: from localhost.localdomain (kaball.uk.xensource.com [10.80.2.59]) by smtp01.ad.xensource.com (8.13.1/8.13.1) with ESMTP id p6QGngDE004417; Tue, 26 Jul 2011 09:49:42 -0700 From: To: konrad.wilk@xxxxxxxxxx CC: xen-devel@xxxxxxxxxxxxxxxxxxx, Jeremy.Fitzhardinge@xxxxxxxxxx, linux-kernel@xxxxxxxxxxxxxxx, Stefano.Stabellini@xxxxxxxxxxxxx, Ian.Campbell@xxxxxxxxxxxxx, Stefano Stabellini Subject: [PATCH] xen: modify kernel mappings corresponding to granted pages Date: Tue, 26 Jul 2011 17:55:45 +0100 Message-ID: <1311699345-32579-1-git-send-email-stefano.stabellini@xxxxxxxxxxxxx> X-Mailer: git-send-email 1.7.0.4 MIME-Version: 1.0 Content-Type: text/plain Sender: linux-kernel-owner@xxxxxxxxxxxxxxx Precedence: bulk List-ID: X-Mailing-List: linux-kernel@xxxxxxxxxxxxxxx X-Spam-Status: No, score=0.2 required=7.0 tests=FORGED_RCVD_HELO,NO_REAL_NAME autolearn=no version=3.0.4 X-Spam-Checker-Version: SpamAssassin 3.0.4 (2005-06-05) on andromeda.dapyr.net Content-Length: 12135 Lines: 347 From: Stefano Stabellini If we want to use granted pages for AIO, changing the mappings of a user vma and the corresponding p2m is not enough, we also need to update the kernel mappings accordingly. On x86_64 it is easy, we can issue another HYPERVISOR_grant_table_op right away in m2p_add_override. We can remove the mappings using another HYPERVISOR_grant_table_op in m2p_remove_override. On x86_32 it is more difficult because the pages are highmem pages and therefore we need to catch the set_pte that tries to map a granted page and issue an HYPERVISOR_grant_table_op instead. Same thing for unmapping them: we need to catch the pte clear or the set_pte that try to unmap a granted page and issue an HYPERVISOR_grant_table_op. Signed-off-by: Stefano Stabellini --- arch/x86/include/asm/xen/page.h | 5 ++- arch/x86/xen/mmu.c | 68 +++++++++++++++++++++++++++++++++++++++ arch/x86/xen/p2m.c | 47 ++++++++++++++++++++------ drivers/xen/gntdev.c | 27 +++++++++++++++- drivers/xen/grant-table.c | 4 +- include/xen/grant_table.h | 1 + 6 files changed, 137 insertions(+), 15 deletions(-) diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h index 64a619d..ec7bbfb 100644 --- a/arch/x86/include/asm/xen/page.h +++ b/arch/x86/include/asm/xen/page.h @@ -12,6 +12,7 @@ #include #include +#include #include /* Xen machine address */ @@ -31,8 +32,10 @@ typedef struct xpaddr { #define INVALID_P2M_ENTRY (~0UL) #define FOREIGN_FRAME_BIT (1UL<<(BITS_PER_LONG-1)) #define IDENTITY_FRAME_BIT (1UL<<(BITS_PER_LONG-2)) +#define GRANT_FRAME_BIT (1UL<<(BITS_PER_LONG-3)) #define FOREIGN_FRAME(m) ((m) | FOREIGN_FRAME_BIT) #define IDENTITY_FRAME(m) ((m) | IDENTITY_FRAME_BIT) +#define GRANT_FRAME(m) ((m) | GRANT_FRAME_BIT) /* Maximum amount of memory we can handle in a domain in pages */ #define MAX_DOMAIN_PAGES \ @@ -48,7 +51,7 @@ extern unsigned long set_phys_range_identity(unsigned long pfn_s, unsigned long pfn_e); extern int m2p_add_override(unsigned long mfn, struct page *page, - bool clear_pte); + struct gnttab_map_grant_ref *kmap_op); extern int m2p_remove_override(struct page *page, bool clear_pte); extern struct page *m2p_find_override(unsigned long mfn); extern unsigned long m2p_find_override_pfn(unsigned long mfn, unsigned long pfn); diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 4e37a7c0..13f20d8 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -282,8 +282,70 @@ static bool xen_batched_set_pte(pte_t *ptep, pte_t pteval) return true; } +#ifdef CONFIG_HIGHMEM +static int xen_unmap_granted_page(pte_t *ptep) +{ + unsigned long mfn; + struct page *page; + + if (pte_flags(*ptep) & _PAGE_USER) + return 1; + mfn = (ptep->pte & PTE_PFN_MASK) >> PAGE_SHIFT; + page = m2p_find_override(mfn); + if (page != NULL && (page->private & GRANT_FRAME_BIT)) { + int ret; + struct gnttab_unmap_grant_ref kunmap_op; + struct gnttab_map_grant_ref *kmap_op = + (struct gnttab_map_grant_ref *) page->index; + kunmap_op.host_addr = kmap_op->host_addr; + kunmap_op.handle = kmap_op->handle; + kunmap_op.dev_bus_addr = 0; + ret = HYPERVISOR_grant_table_op( + GNTTABOP_unmap_grant_ref, &kunmap_op, 1); + WARN(ret, "m2p_remove_override: pfn %lx mfn %lx, failed to " + "modify kernel mappings", page_to_pfn(page), mfn); + return ret; + } + return 1; +} +#endif + static void xen_set_pte(pte_t *ptep, pte_t pteval) { +#ifdef CONFIG_HIGHMEM + if (!(pte_flags(pteval) & _PAGE_USER)) { + int ret; + struct page *page; + unsigned long mfn = (pteval.pte & PTE_PFN_MASK) >> PAGE_SHIFT; + page = m2p_find_override(mfn); + /* + * if this is a granted page we need to use a grant table + * hypercall to map it instead + */ + if (page != NULL && (page->private & GRANT_FRAME_BIT)) { + struct gnttab_map_grant_ref *kmap_op = + (struct gnttab_map_grant_ref *) page->index; + unsigned long old_mfn = kmap_op->dev_bus_addr; + kmap_op->host_addr = + arbitrary_virt_to_machine(ptep).maddr; + kmap_op->dev_bus_addr = 0; + ret = HYPERVISOR_grant_table_op( + GNTTABOP_map_grant_ref, kmap_op, 1); + WARN(ret, "xen_set_pte: pfn %lx mfn %lx, failed to " + "modify kernel mappings", + page_to_pfn(page), mfn); + kmap_op->dev_bus_addr = old_mfn; + return; + } + /* + * even if it is not a granted page, the old page we are about + * to overwrite could be a granted page and in that case we need + * to unmap it using a grant table hypercall + */ + xen_unmap_granted_page(ptep); + } +#endif + if (!xen_batched_set_pte(ptep, pteval)) native_set_pte(ptep, pteval); } @@ -548,6 +610,12 @@ static void xen_set_pte_atomic(pte_t *ptep, pte_t pte) static void xen_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { + /* + * check if this is a granted page and unmap it using a grant table + * hypercall in that case + */ + if (!xen_unmap_granted_page(ptep)) + return; if (!xen_batched_set_pte(ptep, native_make_pte(0))) native_pte_clear(mm, addr, ptep); } diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c index 58efeb9..1c4d2b5 100644 --- a/arch/x86/xen/p2m.c +++ b/arch/x86/xen/p2m.c @@ -161,6 +161,7 @@ #include #include #include +#include #include "xen-ops.h" @@ -676,7 +677,8 @@ static unsigned long mfn_hash(unsigned long mfn) } /* Add an MFN override for a particular page */ -int m2p_add_override(unsigned long mfn, struct page *page, bool clear_pte) +int m2p_add_override(unsigned long mfn, struct page *page, + struct gnttab_map_grant_ref *kmap_op) { unsigned long flags; unsigned long pfn; @@ -699,9 +701,18 @@ int m2p_add_override(unsigned long mfn, struct page *page, bool clear_pte) if (unlikely(!set_phys_to_machine(pfn, FOREIGN_FRAME(mfn)))) return -ENOMEM; - if (clear_pte && !PageHighMem(page)) - /* Just zap old mapping for now */ - pte_clear(&init_mm, address, ptep); + if (kmap_op != NULL) { + if (!PageHighMem(page)) { + int ret = HYPERVISOR_grant_table_op( + GNTTABOP_map_grant_ref, kmap_op, 1); + WARN(ret, "m2p_add_override: pfn %lx mfn %lx, " + "failed to modify kernel mappings", pfn, mfn); + } + page->private |= GRANT_FRAME_BIT; + /* let's use dev_bus_addr to record the old mfn instead */ + kmap_op->dev_bus_addr = page->index; + page->index = (unsigned long) kmap_op; + } spin_lock_irqsave(&m2p_override_lock, flags); list_add(&page->lru, &m2p_overrides[mfn_hash(mfn)]); spin_unlock_irqrestore(&m2p_override_lock, flags); @@ -735,13 +746,27 @@ int m2p_remove_override(struct page *page, bool clear_pte) spin_lock_irqsave(&m2p_override_lock, flags); list_del(&page->lru); spin_unlock_irqrestore(&m2p_override_lock, flags); - set_phys_to_machine(pfn, page->index); - if (clear_pte && !PageHighMem(page)) - set_pte_at(&init_mm, address, ptep, - pfn_pte(pfn, PAGE_KERNEL)); - /* No tlb flush necessary because the caller already - * left the pte unmapped. */ + if (clear_pte) { + struct gnttab_map_grant_ref *map_op = + (struct gnttab_map_grant_ref *) page->index; + set_phys_to_machine(pfn, map_op->dev_bus_addr); + if (!PageHighMem(page)) { + int ret; + struct gnttab_unmap_grant_ref unmap_op; + unmap_op.host_addr = map_op->host_addr; + unmap_op.handle = map_op->handle; + unmap_op.dev_bus_addr = 0; + ret = HYPERVISOR_grant_table_op( + GNTTABOP_unmap_grant_ref, &unmap_op, 1); + WARN(ret, "m2p_remove_override: pfn %lx mfn %lx, " + "failed to modify kernel mappings", pfn, mfn); + set_pte_at(&init_mm, address, ptep, + pfn_pte(pfn, PAGE_KERNEL)); + __flush_tlb_single(address); + } + } else + set_phys_to_machine(pfn, page->index); return 0; } @@ -758,7 +783,7 @@ struct page *m2p_find_override(unsigned long mfn) spin_lock_irqsave(&m2p_override_lock, flags); list_for_each_entry(p, bucket, lru) { - if (p->private == mfn) { + if ((p->private & (~GRANT_FRAME_BIT)) == mfn) { ret = p; break; } diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c index f914b26..ca41772 100644 --- a/drivers/xen/gntdev.c +++ b/drivers/xen/gntdev.c @@ -83,6 +83,7 @@ struct grant_map { struct ioctl_gntdev_grant_ref *grants; struct gnttab_map_grant_ref *map_ops; struct gnttab_unmap_grant_ref *unmap_ops; + struct gnttab_map_grant_ref *kmap_ops; struct page **pages; }; @@ -116,10 +117,12 @@ static struct grant_map *gntdev_alloc_map(struct gntdev_priv *priv, int count) add->grants = kzalloc(sizeof(add->grants[0]) * count, GFP_KERNEL); add->map_ops = kzalloc(sizeof(add->map_ops[0]) * count, GFP_KERNEL); add->unmap_ops = kzalloc(sizeof(add->unmap_ops[0]) * count, GFP_KERNEL); + add->kmap_ops = kzalloc(sizeof(add->kmap_ops[0]) * count, GFP_KERNEL); add->pages = kzalloc(sizeof(add->pages[0]) * count, GFP_KERNEL); if (NULL == add->grants || NULL == add->map_ops || NULL == add->unmap_ops || + NULL == add->kmap_ops || NULL == add->pages) goto err; @@ -129,6 +132,7 @@ static struct grant_map *gntdev_alloc_map(struct gntdev_priv *priv, int count) for (i = 0; i < count; i++) { add->map_ops[i].handle = -1; add->unmap_ops[i].handle = -1; + add->kmap_ops[i].handle = -1; } add->index = 0; @@ -142,6 +146,7 @@ err: kfree(add->grants); kfree(add->map_ops); kfree(add->unmap_ops); + kfree(add->kmap_ops); kfree(add); return NULL; } @@ -243,10 +248,30 @@ static int map_grant_pages(struct grant_map *map) gnttab_set_unmap_op(&map->unmap_ops[i], addr, map->flags, -1 /* handle */); } + } else { + for (i = 0; i < map->count; i++) { + unsigned level; + unsigned long address = (unsigned long) + pfn_to_kaddr(page_to_pfn(map->pages[i])); + pte_t *ptep; + u64 pte_maddr = 0; + if (!PageHighMem(map->pages[i])) { + ptep = lookup_address(address, &level); + pte_maddr = + arbitrary_virt_to_machine(ptep).maddr; + } + gnttab_set_map_op(&map->kmap_ops[i], pte_maddr, + map->flags | + GNTMAP_host_map | + GNTMAP_contains_pte, + map->grants[i].ref, + map->grants[i].domid); + } } pr_debug("map %d+%d\n", map->index, map->count); - err = gnttab_map_refs(map->map_ops, map->pages, map->count); + err = gnttab_map_refs(map->map_ops, use_ptemod ? map->kmap_ops : NULL, + map->pages, map->count); if (err) return err; diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c index fd725cd..1e8669a 100644 --- a/drivers/xen/grant-table.c +++ b/drivers/xen/grant-table.c @@ -448,6 +448,7 @@ unsigned int gnttab_max_grant_frames(void) EXPORT_SYMBOL_GPL(gnttab_max_grant_frames); int gnttab_map_refs(struct gnttab_map_grant_ref *map_ops, + struct gnttab_map_grant_ref *kmap_ops, struct page **pages, unsigned int count) { int i, ret; @@ -488,8 +489,7 @@ int gnttab_map_refs(struct gnttab_map_grant_ref *map_ops, */ return -EOPNOTSUPP; } - ret = m2p_add_override(mfn, pages[i], - map_ops[i].flags & GNTMAP_contains_pte); + ret = m2p_add_override(mfn, pages[i], &kmap_ops[i]); if (ret) return ret; } diff --git a/include/xen/grant_table.h b/include/xen/grant_table.h index b1fab6b..6b99bfb 100644 --- a/include/xen/grant_table.h +++ b/include/xen/grant_table.h @@ -156,6 +156,7 @@ unsigned int gnttab_max_grant_frames(void); #define gnttab_map_vaddr(map) ((void *)(map.host_virt_addr)) int gnttab_map_refs(struct gnttab_map_grant_ref *map_ops, + struct gnttab_map_grant_ref *kmap_ops, struct page **pages, unsigned int count); int gnttab_unmap_refs(struct gnttab_unmap_grant_ref *unmap_ops, struct page **pages, unsigned int count); -- 1.7.2.3 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/