Xen project Mailing List

[Xen-ia64-devel] [RFC] paravirtualize stock swiotlb

To: xen-ia64-devel <xen-ia64-devel@xxxxxxxxxxxxxxxxxxx>

From: Alex Williamson <alex.williamson@xxxxxx>

Date: Fri, 27 Apr 2007 18:13:07 -0600

Delivery-date: Fri, 27 Apr 2007 17:11:39 -0700

List-id: Discussion of the ia64 port of Xen <xen-ia64-devel.lists.xensource.com>

We need to get to a CONFIG_IA64_GENERIC kernel working, and the first step to doing that seems to be fixing our usage of the i386 swiotlb and pci-dma-xen. The patch below paravirtualizes lib/swiotlb.c and rips out a lot of the code we had around for supporting the i386 versions. It should be a little more straight-forward to switch to the generic kernel flavor after this. This hasn't had much testing, so please don't run this on a machine with data you care about. I'm running it with swiotlb=force to try to exercise it, and it's holding up well. Comments welcome. Thanks, Alex Signed-off-by: Alex Williamson <alex.williamson@xxxxxx> --- diff -r 9313d0ce09f8 linux-2.6-xen-sparse/arch/ia64/xen/Makefile --- a/linux-2.6-xen-sparse/arch/ia64/xen/Makefile Tue Apr 24 09:26:32 2007 -0600 +++ b/linux-2.6-xen-sparse/arch/ia64/xen/Makefile Fri Apr 27 10:05:04 2007 -0600 @@ -3,7 +3,5 @@ # obj-y := hypercall.o xenivt.o xenentry.o xensetup.o xenpal.o xenhpski.o \ - hypervisor.o pci-dma-xen.o util.o xencomm.o xcom_hcall.o \ - xcom_mini.o xcom_privcmd.o mem.o - -pci-dma-xen-y := ../../i386/kernel/pci-dma-xen.o + hypervisor.o util.o xencomm.o xcom_hcall.o xcom_mini.o \ + xcom_privcmd.o mem.o diff -r 9313d0ce09f8 linux-2.6-xen-sparse/include/asm-ia64/dma-mapping.h --- a/linux-2.6-xen-sparse/include/asm-ia64/dma-mapping.h Tue Apr 24 09:26:32 2007 -0600 +++ b/linux-2.6-xen-sparse/include/asm-ia64/dma-mapping.h Fri Apr 27 10:13:57 2007 -0600 @@ -13,7 +13,6 @@ #include <asm/swiotlb.h> #endif -#ifndef CONFIG_XEN #define dma_alloc_coherent platform_dma_alloc_coherent #define dma_alloc_noncoherent platform_dma_alloc_coherent /* coherent mem. is cheap */ #define dma_free_coherent platform_dma_free_coherent @@ -27,47 +26,6 @@ #define dma_sync_single_for_device platform_dma_sync_single_for_device #define dma_sync_sg_for_device platform_dma_sync_sg_for_device #define dma_mapping_error platform_dma_mapping_error -#else -int dma_map_sg(struct device *hwdev, struct scatterlist *sg, int nents, - enum dma_data_direction direction); -void dma_unmap_sg(struct device *hwdev, struct scatterlist *sg, int nents, - enum dma_data_direction direction); -int dma_supported(struct device *dev, u64 mask); -void *dma_alloc_coherent(struct device *dev, size_t size, - dma_addr_t *dma_handle, gfp_t gfp); -void dma_free_coherent(struct device *dev, size_t size, void *vaddr, - dma_addr_t dma_handle); -dma_addr_t dma_map_single(struct device *dev, void *ptr, size_t size, - enum dma_data_direction direction); -void dma_unmap_single(struct device *dev, dma_addr_t dma_addr, size_t size, - enum dma_data_direction direction); -void dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle, - size_t size, enum dma_data_direction direction); -void dma_sync_single_for_device(struct device *dev, dma_addr_t dma_handle, - size_t size, - enum dma_data_direction direction); -int dma_mapping_error(dma_addr_t dma_addr); - -#define flush_write_buffers() do { } while (0) -static inline void -dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, int nelems, - enum dma_data_direction direction) -{ - if (swiotlb) - swiotlb_sync_sg_for_cpu(dev,sg,nelems,direction); - flush_write_buffers(); -} - -static inline void -dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, int nelems, - enum dma_data_direction direction) -{ - if (swiotlb) - swiotlb_sync_sg_for_device(dev,sg,nelems,direction); - flush_write_buffers(); -} -#endif - #define dma_map_page(dev, pg, off, size, dir) \ dma_map_single(dev, page_address(pg) + (off), (size), (dir)) #define dma_unmap_page(dev, dma_addr, size, dir) \ @@ -83,9 +41,7 @@ dma_sync_sg_for_device(struct device *de #define dma_sync_single_range_for_device(dev, dma_handle, offset, size, dir) \ dma_sync_single_for_device(dev, dma_handle, size, dir) -#ifndef CONFIG_XEN #define dma_supported platform_dma_supported -#endif static inline int dma_set_mask (struct device *dev, u64 mask) @@ -111,19 +67,6 @@ dma_cache_sync (void *vaddr, size_t size #define dma_is_consistent(dma_handle) (1) /* all we do is coherent memory... */ #ifdef CONFIG_XEN -/* arch/i386/kernel/swiotlb.o requires */ -void contiguous_bitmap_init(unsigned long end_pfn); - -static inline int -address_needs_mapping(struct device *hwdev, dma_addr_t addr) -{ - dma_addr_t mask = DMA_64BIT_MASK; - /* If the device has a mask, use it, otherwise default to 64 bits */ - if (hwdev && hwdev->dma_mask) - mask = *hwdev->dma_mask; - return (addr & ~mask) != 0; -} - static inline int range_straddles_page_boundary(void *p, size_t size) { diff -r 9313d0ce09f8 linux-2.6-xen-sparse/include/asm-ia64/machvec_dig.h --- a/linux-2.6-xen-sparse/include/asm-ia64/machvec_dig.h Tue Apr 24 09:26:32 2007 -0600 +++ b/linux-2.6-xen-sparse/include/asm-ia64/machvec_dig.h Fri Apr 27 10:04:20 2007 -0600 @@ -13,19 +13,4 @@ extern ia64_mv_setup_t dig_setup; #define platform_name "dig" #define platform_setup dig_setup -#ifdef CONFIG_XEN -# define platform_dma_map_sg dma_map_sg -# define platform_dma_unmap_sg dma_unmap_sg -# define platform_dma_mapping_error dma_mapping_error -# define platform_dma_supported dma_supported -# define platform_dma_alloc_coherent dma_alloc_coherent -# define platform_dma_free_coherent dma_free_coherent -# define platform_dma_map_single dma_map_single -# define platform_dma_unmap_single dma_unmap_single -# define platform_dma_sync_single_for_cpu \ - dma_sync_single_for_cpu -# define platform_dma_sync_single_for_device \ - dma_sync_single_for_device -#endif - #endif /* _ASM_IA64_MACHVEC_DIG_h */ diff -r 9313d0ce09f8 linux-2.6-xen-sparse/lib/Makefile --- a/linux-2.6-xen-sparse/lib/Makefile Tue Apr 24 09:26:32 2007 -0600 +++ b/linux-2.6-xen-sparse/lib/Makefile Fri Apr 27 10:03:11 2007 -0600 @@ -52,7 +52,9 @@ obj-$(CONFIG_AUDIT_GENERIC) += audit.o obj-$(CONFIG_AUDIT_GENERIC) += audit.o obj-$(CONFIG_SWIOTLB) += swiotlb.o +ifeq ($(CONFIG_X86),y) swiotlb-$(CONFIG_XEN) := ../arch/i386/kernel/swiotlb.o +endif hostprogs-y := gen_crc32table clean-files := crc32table.h diff -r 9313d0ce09f8 linux-2.6-xen-sparse/lib/swiotlb.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/linux-2.6-xen-sparse/lib/swiotlb.c Fri Apr 27 17:06:43 2007 -0600 @@ -0,0 +1,860 @@ +/* + * Dynamic DMA mapping support. + * + * This implementation is for IA-64 and EM64T platforms that do not support + * I/O TLBs (aka DMA address translation hardware). + * Copyright (C) 2000 Asit Mallick <Asit.K.Mallick@xxxxxxxxx> + * Copyright (C) 2000 Goutham Rao <goutham.rao@xxxxxxxxx> + * Copyright (C) 2000, 2003 Hewlett-Packard Co + * David Mosberger-Tang <davidm@xxxxxxxxxx> + * + * 03/05/07 davidm Switch from PCI-DMA to generic device DMA API. + * 00/12/13 davidm Rename to swiotlb.c and add mark_clean() to avoid + * unnecessary i-cache flushing. + * 04/07/.. ak Better overflow handling. Assorted fixes. + * 05/09/10 linville Add support for syncing ranges, support syncing for + * DMA_BIDIRECTIONAL mappings, miscellaneous cleanup. + */ + +#include <linux/cache.h> +#include <linux/dma-mapping.h> +#include <linux/mm.h> +#include <linux/module.h> +#include <linux/spinlock.h> +#include <linux/string.h> +#include <linux/types.h> +#include <linux/ctype.h> + +#include <asm/io.h> +#include <asm/dma.h> +#include <asm/scatterlist.h> + +#include <linux/init.h> +#include <linux/bootmem.h> + +#ifdef CONFIG_XEN +#define VIRT_TO_PHYS(x) virt_to_bus(x) +#define PHYS_TO_VIRT(x) bus_to_virt(x) +#else +#define VIRT_TO_PHYS(x) virt_to_phys(x) +#define PHYS_TO_VIRT(x) phys_to_virt(x) +#define range_straddles_page_boundary(x, y) (0) +#endif + +#define OFFSET(val,align) ((unsigned long) \ + ( (val) & ( (align) - 1))) + +#define SG_ENT_VIRT_ADDRESS(sg) (page_address((sg)->page) + (sg)->offset) +#define SG_ENT_PHYS_ADDRESS(SG) VIRT_TO_PHYS(SG_ENT_VIRT_ADDRESS(SG)) + +/* + * Maximum allowable number of contiguous slabs to map, + * must be a power of 2. What is the appropriate value ? + * The complexity of {map,unmap}_single is linearly dependent on this value. + */ +#define IO_TLB_SEGSIZE 128 + +/* + * log of the size of each IO TLB slab. The number of slabs is command line + * controllable. + */ +#define IO_TLB_SHIFT 11 + +#define SLABS_PER_PAGE (1 << (PAGE_SHIFT - IO_TLB_SHIFT)) + +/* + * Minimum IO TLB size to bother booting with. Systems with mainly + * 64bit capable cards will only lightly use the swiotlb. If we can't + * allocate a contiguous 1MB, we're probably in trouble anyway. + */ +#define IO_TLB_MIN_SLABS ((1<<20) >> IO_TLB_SHIFT) + +/* + * Enumeration for sync targets + */ +enum dma_sync_target { + SYNC_FOR_CPU = 0, + SYNC_FOR_DEVICE = 1, +}; + +int swiotlb_force; + +/* + * Used to do a quick range check in swiotlb_unmap_single and + * swiotlb_sync_single_*, to see if the memory was in fact allocated by this + * API. + */ +static char *io_tlb_start, *io_tlb_end; + +/* + * The number of IO TLB blocks (in groups of 64) betweeen io_tlb_start and + * io_tlb_end. This is command line adjustable via setup_io_tlb_npages. + */ +static unsigned long io_tlb_nslabs; + +/* + * When the IOMMU overflows we return a fallback buffer. This sets the size. + */ +static unsigned long io_tlb_overflow = 32*1024; + +void *io_tlb_overflow_buffer; + +/* + * This is a free list describing the number of free entries available from + * each index + */ +static unsigned int *io_tlb_list; +static unsigned int io_tlb_index; + +/* + * We need to save away the original address corresponding to a mapped entry + * for the sync operations. + */ +static unsigned char **io_tlb_orig_addr; + +/* + * Protect the above data structures in the map and unmap calls + */ +static DEFINE_SPINLOCK(io_tlb_lock); + +static int __init +setup_io_tlb_npages(char *str) +{ + if (isdigit(*str)) { + io_tlb_nslabs = simple_strtoul(str, &str, 0); + /* avoid tail segment of size < IO_TLB_SEGSIZE */ + io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE); + } + if (*str == ',') + ++str; + if (!strcmp(str, "force")) + swiotlb_force = 1; + return 1; +} +__setup("swiotlb=", setup_io_tlb_npages); +/* make io_tlb_overflow tunable too? */ + +/* + * Statically reserve bounce buffer space and initialize bounce buffer data + * structures for the software IO TLB used to implement the DMA API. + */ +void +swiotlb_init_with_default_size (size_t default_size) +{ + unsigned long i; + + if (!io_tlb_nslabs) { + io_tlb_nslabs = (default_size >> IO_TLB_SHIFT); + io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE); + } + +#ifdef CONFIG_XEN + if (is_running_on_xen()) { + /* Round up to a power of two */ + while (io_tlb_nslabs & (io_tlb_nslabs - 1)) + io_tlb_nslabs += io_tlb_nslabs & ~(io_tlb_nslabs - 1); + } +#endif + /* + * Get IO TLB memory from the low pages + */ + io_tlb_start = alloc_bootmem_low_pages(io_tlb_nslabs * (1 << IO_TLB_SHIFT)); + if (!io_tlb_start) + panic("Cannot allocate SWIOTLB buffer"); + io_tlb_end = io_tlb_start + io_tlb_nslabs * (1 << IO_TLB_SHIFT); + +#ifdef CONFIG_XEN + /* + * Map these into 32bit space + * TODO: add logic for trying to get even lower pages + */ + for (i = 0 ; i < io_tlb_nslabs ; i += IO_TLB_SEGSIZE) { + if (xen_create_contiguous_region((unsigned long)io_tlb_start + (i << IO_TLB_SHIFT), get_order(IO_TLB_SEGSIZE << IO_TLB_SHIFT), 30)) + panic("Failed to setup Xen contiguous region"); + } +#endif + + /* + * Allocate and initialize the free list array. This array is used + * to find contiguous free memory regions of size up to IO_TLB_SEGSIZE + * between io_tlb_start and io_tlb_end. + */ + io_tlb_list = alloc_bootmem(io_tlb_nslabs * sizeof(int)); + for (i = 0; i < io_tlb_nslabs; i++) + io_tlb_list[i] = IO_TLB_SEGSIZE - OFFSET(i, IO_TLB_SEGSIZE); + io_tlb_index = 0; + io_tlb_orig_addr = alloc_bootmem(io_tlb_nslabs * sizeof(char *)); + + /* + * Get the overflow emergency buffer + */ + io_tlb_overflow_buffer = alloc_bootmem_low(io_tlb_overflow); +#ifdef CONFIG_XEN + if (xen_create_contiguous_region((unsigned long)io_tlb_overflow_buffer, + get_order(io_tlb_overflow), 32)) + panic("Failed to setup Xen contiguous region for overflow"); +#endif + printk(KERN_INFO "Placing software IO TLB between 0x%lx - 0x%lx\n", + virt_to_phys(io_tlb_start), virt_to_phys(io_tlb_end)); +} + +void +swiotlb_init (void) +{ + swiotlb_init_with_default_size(64 * (1<<20)); /* default to 64MB */ +} + +/* + * Systems with larger DMA zones (those that don't support ISA) can + * initialize the swiotlb later using the slab allocator if needed. + * This should be just like above, but with some error catching. + */ +int +swiotlb_late_init_with_default_size (size_t default_size) +{ + unsigned long i, req_nslabs = io_tlb_nslabs; + unsigned int order; + +#ifdef CONFIG_XEN + if (is_running_on_xen()) + panic("TODO\n"); +#endif + if (!io_tlb_nslabs) { + io_tlb_nslabs = (default_size >> IO_TLB_SHIFT); + io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE); + } + + /* + * Get IO TLB memory from the low pages + */ + order = get_order(io_tlb_nslabs * (1 << IO_TLB_SHIFT)); + io_tlb_nslabs = SLABS_PER_PAGE << order; + + while ((SLABS_PER_PAGE << order) > IO_TLB_MIN_SLABS) { + io_tlb_start = (char *)__get_free_pages(GFP_DMA | __GFP_NOWARN, + order); + if (io_tlb_start) + break; + order--; + } + + if (!io_tlb_start) + goto cleanup1; + + if (order != get_order(io_tlb_nslabs * (1 << IO_TLB_SHIFT))) { + printk(KERN_WARNING "Warning: only able to allocate %ld MB " + "for software IO TLB\n", (PAGE_SIZE << order) >> 20); + io_tlb_nslabs = SLABS_PER_PAGE << order; + } + io_tlb_end = io_tlb_start + io_tlb_nslabs * (1 << IO_TLB_SHIFT); + memset(io_tlb_start, 0, io_tlb_nslabs * (1 << IO_TLB_SHIFT)); + + /* + * Allocate and initialize the free list array. This array is used + * to find contiguous free memory regions of size up to IO_TLB_SEGSIZE + * between io_tlb_start and io_tlb_end. + */ + io_tlb_list = (unsigned int *)__get_free_pages(GFP_KERNEL, + get_order(io_tlb_nslabs * sizeof(int))); + if (!io_tlb_list) + goto cleanup2; + + for (i = 0; i < io_tlb_nslabs; i++) + io_tlb_list[i] = IO_TLB_SEGSIZE - OFFSET(i, IO_TLB_SEGSIZE); + io_tlb_index = 0; + + io_tlb_orig_addr = (unsigned char **)__get_free_pages(GFP_KERNEL, + get_order(io_tlb_nslabs * sizeof(char *))); + if (!io_tlb_orig_addr) + goto cleanup3; + + memset(io_tlb_orig_addr, 0, io_tlb_nslabs * sizeof(char *)); + + /* + * Get the overflow emergency buffer + */ + io_tlb_overflow_buffer = (void *)__get_free_pages(GFP_DMA, + get_order(io_tlb_overflow)); + if (!io_tlb_overflow_buffer) + goto cleanup4; + + printk(KERN_INFO "Placing %ldMB software IO TLB between 0x%lx - " + "0x%lx\n", (io_tlb_nslabs * (1 << IO_TLB_SHIFT)) >> 20, + virt_to_phys(io_tlb_start), virt_to_phys(io_tlb_end)); + + return 0; + +cleanup4: + free_pages((unsigned long)io_tlb_orig_addr, get_order(io_tlb_nslabs * + sizeof(char *))); + io_tlb_orig_addr = NULL; +cleanup3: + free_pages((unsigned long)io_tlb_list, get_order(io_tlb_nslabs * + sizeof(int))); + io_tlb_list = NULL; + io_tlb_end = NULL; +cleanup2: + free_pages((unsigned long)io_tlb_start, order); + io_tlb_start = NULL; +cleanup1: + io_tlb_nslabs = req_nslabs; + return -ENOMEM; +} + +static inline int +address_needs_mapping(struct device *hwdev, dma_addr_t addr) +{ + dma_addr_t mask = 0xffffffff; + /* If the device has a mask, use it, otherwise default to 32 bits */ + if (hwdev && hwdev->dma_mask) + mask = *hwdev->dma_mask; + return (addr & ~mask) != 0; +} + +/* + * Allocates bounce buffer and returns its kernel virtual address. + */ +static void * +map_single(struct device *hwdev, char *buffer, size_t size, int dir) +{ + unsigned long flags; + char *dma_addr; + unsigned int nslots, stride, index, wrap; + int i; + + /* + * For mappings greater than a page, we limit the stride (and + * hence alignment) to a page size. + */ + nslots = ALIGN(size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT; + if (size > PAGE_SIZE) + stride = (1 << (PAGE_SHIFT - IO_TLB_SHIFT)); + else + stride = 1; + + BUG_ON(!nslots); + + /* + * Find suitable number of IO TLB entries size that will fit this + * request and allocate a buffer from that IO TLB pool. + */ + spin_lock_irqsave(&io_tlb_lock, flags); + { + wrap = index = ALIGN(io_tlb_index, stride); + + if (index >= io_tlb_nslabs) + wrap = index = 0; + + do { + /* + * If we find a slot that indicates we have 'nslots' + * number of contiguous buffers, we allocate the + * buffers from that slot and mark the entries as '0' + * indicating unavailable. + */ + if (io_tlb_list[index] >= nslots) { + int count = 0; + + for (i = index; i < (int) (index + nslots); i++) + io_tlb_list[i] = 0; + for (i = index - 1; (OFFSET(i, IO_TLB_SEGSIZE) != IO_TLB_SEGSIZE -1) && io_tlb_list[i]; i--) + io_tlb_list[i] = ++count; + dma_addr = io_tlb_start + (index << IO_TLB_SHIFT); + + /* + * Update the indices to avoid searching in + * the next round. + */ + io_tlb_index = ((index + nslots) < io_tlb_nslabs + ? (index + nslots) : 0); + + goto found; + } + index += stride; + if (index >= io_tlb_nslabs) + index = 0; + } while (index != wrap); + + spin_unlock_irqrestore(&io_tlb_lock, flags); + return NULL; + } + found: + spin_unlock_irqrestore(&io_tlb_lock, flags); + + /* + * Save away the mapping from the original address to the DMA address. + * This is needed when we sync the memory. Then we sync the buffer if + * needed. + */ + io_tlb_orig_addr[index] = buffer; + if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL) + memcpy(dma_addr, buffer, size); + + return dma_addr; +} + +/* + * dma_addr is the kernel virtual address of the bounce buffer to unmap. + */ +static void +unmap_single(struct device *hwdev, char *dma_addr, size_t size, int dir) +{ + unsigned long flags; + int i, count, nslots = ALIGN(size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT; + int index = (dma_addr - io_tlb_start) >> IO_TLB_SHIFT; + char *buffer = io_tlb_orig_addr[index]; + + /* + * First, sync the memory before unmapping the entry + */ + if (buffer && ((dir == DMA_FROM_DEVICE) || (dir == DMA_BIDIRECTIONAL))) + /* + * bounce... copy the data back into the original buffer * and + * delete the bounce buffer. + */ + memcpy(buffer, dma_addr, size); + + /* + * Return the buffer to the free list by setting the corresponding + * entries to indicate the number of contigous entries available. + * While returning the entries to the free list, we merge the entries + * with slots below and above the pool being returned. + */ + spin_lock_irqsave(&io_tlb_lock, flags); + { + count = ((index + nslots) < ALIGN(index + 1, IO_TLB_SEGSIZE) ? + io_tlb_list[index + nslots] : 0); + /* + * Step 1: return the slots to the free list, merging the + * slots with superceeding slots + */ + for (i = index + nslots - 1; i >= index; i--) + io_tlb_list[i] = ++count; + /* + * Step 2: merge the returned slots with the preceding slots, + * if available (non zero) + */ + for (i = index - 1; (OFFSET(i, IO_TLB_SEGSIZE) != IO_TLB_SEGSIZE -1) && io_tlb_list[i]; i--) + io_tlb_list[i] = ++count; + } + spin_unlock_irqrestore(&io_tlb_lock, flags); +} + +static void +sync_single(struct device *hwdev, char *dma_addr, size_t size, + int dir, int target) +{ + int index = (dma_addr - io_tlb_start) >> IO_TLB_SHIFT; + char *buffer = io_tlb_orig_addr[index]; + + switch (target) { + case SYNC_FOR_CPU: + if (likely(dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)) + memcpy(buffer, dma_addr, size); + else + BUG_ON(dir != DMA_TO_DEVICE); + break; + case SYNC_FOR_DEVICE: + if (likely(dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL)) + memcpy(dma_addr, buffer, size); + else + BUG_ON(dir != DMA_FROM_DEVICE); + break; + default: + BUG(); + } +} + +void * +swiotlb_alloc_coherent(struct device *hwdev, size_t size, + dma_addr_t *dma_handle, gfp_t flags) +{ + unsigned long dev_addr; + void *ret; + int order = get_order(size); + + /* + * XXX fix me: the DMA API should pass us an explicit DMA mask + * instead, or use ZONE_DMA32 (ia64 overloads ZONE_DMA to be a ~32 + * bit range instead of a 16MB one). + */ + flags |= GFP_DMA; + + ret = (void *)__get_free_pages(flags, order); +#ifdef CONFIG_XEN + if (ret && is_running_on_xen()) { + u64 mask = hwdev->coherent_dma_mask ? hwdev->coherent_dma_mask : + DMA_32BIT_MASK; + if (xen_create_contiguous_region((unsigned long)ret, order, + fls64(mask))) { + free_pages((unsigned long)ret, order); + ret = NULL; + } + } +#endif + if (ret && address_needs_mapping(hwdev, VIRT_TO_PHYS(ret))) { + /* + * The allocated memory isn't reachable by the device. + * Fall back on swiotlb_map_single(). + */ + free_pages((unsigned long) ret, order); + ret = NULL; + } + if (!ret) { + /* + * We are either out of memory or the device can't DMA + * to GFP_DMA memory; fall back on + * swiotlb_map_single(), which will grab memory from + * the lowest available address range. + */ + dma_addr_t handle; + handle = swiotlb_map_single(NULL, NULL, size, DMA_FROM_DEVICE); + if (swiotlb_dma_mapping_error(handle)) + return NULL; + + ret = PHYS_TO_VIRT(handle); + } + + memset(ret, 0, size); + dev_addr = VIRT_TO_PHYS(ret); + + /* Confirm address can be DMA'd by device */ + if (address_needs_mapping(hwdev, dev_addr)) { + printk("hwdev DMA mask = 0x%016Lx, dev_addr = 0x%016lx\n", + (unsigned long long)*hwdev->dma_mask, dev_addr); + panic("swiotlb_alloc_coherent: allocated memory is out of " + "range for device"); + } + *dma_handle = dev_addr; + return ret; +} + +void +swiotlb_free_coherent(struct device *hwdev, size_t size, void *vaddr, + dma_addr_t dma_handle) +{ + if (!(vaddr >= (void *)io_tlb_start + && vaddr < (void *)io_tlb_end)) { +#ifdef CONFIG_XEN + if (is_running_on_xen()) + xen_destroy_contiguous_region((unsigned long)vaddr, + get_order(size)); +#endif + free_pages((unsigned long) vaddr, get_order(size)); + } else + /* DMA_TO_DEVICE to avoid memcpy in unmap_single */ + swiotlb_unmap_single (hwdev, dma_handle, size, DMA_TO_DEVICE); +} + +static void +swiotlb_full(struct device *dev, size_t size, int dir, int do_panic) +{ + /* + * Ran out of IOMMU space for this operation. This is very bad. + * Unfortunately the drivers cannot handle this operation properly. + * unless they check for dma_mapping_error (most don't) + * When the mapping is small enough return a static buffer to limit + * the damage, or panic when the transfer is too big. + */ + printk(KERN_ERR "DMA: Out of SW-IOMMU space for %lu bytes at " + "device %s\n", size, dev ? dev->bus_id : "?"); + + if (size > io_tlb_overflow && do_panic) { + if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL) + panic("DMA: Memory would be corrupted\n"); + if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL) + panic("DMA: Random memory would be DMAed\n"); + } +} + +/* + * Map a single buffer of the indicated size for DMA in streaming mode. The + * physical address to use is returned. + * + * Once the device is given the dma address, the device owns this memory until + * either swiotlb_unmap_single or swiotlb_dma_sync_single is performed. + */ +dma_addr_t +swiotlb_map_single(struct device *hwdev, void *ptr, size_t size, int dir) +{ + unsigned long dev_addr = VIRT_TO_PHYS(ptr); + void *map; + + BUG_ON(dir == DMA_NONE); + /* + * If the pointer passed in happens to be in the device's DMA window, + * we can safely return the device addr and not worry about bounce + * buffering it. + */ + if (!range_straddles_page_boundary(ptr, size) && + !address_needs_mapping(hwdev, dev_addr) && !swiotlb_force) + return dev_addr; + + /* + * Oh well, have to allocate and map a bounce buffer. + */ + map = map_single(hwdev, ptr, size, dir); + if (!map) { + swiotlb_full(hwdev, size, dir, 1); + map = io_tlb_overflow_buffer; + } + + dev_addr = VIRT_TO_PHYS(map); + + /* + * Ensure that the address returned is DMA'ble + */ + if (address_needs_mapping(hwdev, dev_addr)) + panic("map_single: bounce buffer is not DMA'ble"); + + return dev_addr; +} + +/* + * Since DMA is i-cache coherent, any (complete) pages that were written via + * DMA can be marked as "clean" so that lazy_mmu_prot_update() doesn't have to + * flush them when they get mapped into an executable vm-area. + */ +static void +mark_clean(void *addr, size_t size) +{ + unsigned long pg_addr, end; + +#ifndef XEN /* FIXME */ + if (is_running_on_xen()) + return; +#endif + pg_addr = PAGE_ALIGN((unsigned long) addr); + end = (unsigned long) addr + size; + while (pg_addr + PAGE_SIZE <= end) { + struct page *page = virt_to_page(pg_addr); + set_bit(PG_arch_1, &page->flags); + pg_addr += PAGE_SIZE; + } +} + +/* + * Unmap a single streaming mode DMA translation. The dma_addr and size must + * match what was provided for in a previous swiotlb_map_single call. All + * other usages are undefined. + * + * After this call, reads by the cpu to the buffer are guaranteed to see + * whatever the device wrote there. + */ +void +swiotlb_unmap_single(struct device *hwdev, dma_addr_t dev_addr, size_t size, + int dir) +{ + char *dma_addr = PHYS_TO_VIRT(dev_addr); + + BUG_ON(dir == DMA_NONE); + if (dma_addr >= io_tlb_start && dma_addr < io_tlb_end) + unmap_single(hwdev, dma_addr, size, dir); + else if (dir == DMA_FROM_DEVICE) + mark_clean(dma_addr, size); +} + +/* + * Make physical memory consistent for a single streaming mode DMA translation + * after a transfer. + * + * If you perform a swiotlb_map_single() but wish to interrogate the buffer + * using the cpu, yet do not wish to teardown the dma mapping, you must + * call this function before doing so. At the next point you give the dma + * address back to the card, you must first perform a + * swiotlb_dma_sync_for_device, and then the device again owns the buffer + */ +static inline void +swiotlb_sync_single(struct device *hwdev, dma_addr_t dev_addr, + size_t size, int dir, int target) +{ + char *dma_addr = PHYS_TO_VIRT(dev_addr); + + BUG_ON(dir == DMA_NONE); + if (dma_addr >= io_tlb_start && dma_addr < io_tlb_end) + sync_single(hwdev, dma_addr, size, dir, target); + else if (dir == DMA_FROM_DEVICE) + mark_clean(dma_addr, size); +} + +void +swiotlb_sync_single_for_cpu(struct device *hwdev, dma_addr_t dev_addr, + size_t size, int dir) +{ + swiotlb_sync_single(hwdev, dev_addr, size, dir, SYNC_FOR_CPU); +} + +void +swiotlb_sync_single_for_device(struct device *hwdev, dma_addr_t dev_addr, + size_t size, int dir) +{ + swiotlb_sync_single(hwdev, dev_addr, size, dir, SYNC_FOR_DEVICE); +} + +/* + * Same as above, but for a sub-range of the mapping. + */ +static inline void +swiotlb_sync_single_range(struct device *hwdev, dma_addr_t dev_addr, + unsigned long offset, size_t size, + int dir, int target) +{ + char *dma_addr = PHYS_TO_VIRT(dev_addr) + offset; + + BUG_ON(dir == DMA_NONE); + if (dma_addr >= io_tlb_start && dma_addr < io_tlb_end) + sync_single(hwdev, dma_addr, size, dir, target); + else if (dir == DMA_FROM_DEVICE) + mark_clean(dma_addr, size); +} + +void +swiotlb_sync_single_range_for_cpu(struct device *hwdev, dma_addr_t dev_addr, + unsigned long offset, size_t size, int dir) +{ + swiotlb_sync_single_range(hwdev, dev_addr, offset, size, dir, + SYNC_FOR_CPU); +} + +void +swiotlb_sync_single_range_for_device(struct device *hwdev, dma_addr_t dev_addr, + unsigned long offset, size_t size, int dir) +{ + swiotlb_sync_single_range(hwdev, dev_addr, offset, size, dir, + SYNC_FOR_DEVICE); +} + +/* + * Map a set of buffers described by scatterlist in streaming mode for DMA. + * This is the scatter-gather version of the above swiotlb_map_single + * interface. Here the scatter gather list elements are each tagged with the + * appropriate dma address and length. They are obtained via + * sg_dma_{address,length}(SG). + * + * NOTE: An implementation may be able to use a smaller number of + * DMA address/length pairs than there are SG table elements. + * (for example via virtual mapping capabilities) + * The routine returns the number of addr/length pairs actually + * used, at most nents. + * + * Device ownership issues as mentioned above for swiotlb_map_single are the + * same here. + */ +int +swiotlb_map_sg(struct device *hwdev, struct scatterlist *sg, int nelems, + int dir) +{ + void *addr; + unsigned long dev_addr; + int i; + + BUG_ON(dir == DMA_NONE); + + for (i = 0; i < nelems; i++, sg++) { + addr = SG_ENT_VIRT_ADDRESS(sg); + dev_addr = VIRT_TO_PHYS(addr); + if (swiotlb_force || address_needs_mapping(hwdev, dev_addr)) { + void *map = map_single(hwdev, addr, sg->length, dir); + sg->dma_address = virt_to_bus(map); + if (!map) { + /* Don't panic here, we expect map_sg users + to do proper error handling. */ + swiotlb_full(hwdev, sg->length, dir, 0); + swiotlb_unmap_sg(hwdev, sg - i, i, dir); + sg[0].dma_length = 0; + return 0; + } + } else + sg->dma_address = dev_addr; + sg->dma_length = sg->length; + } + return nelems; +} + +/* + * Unmap a set of streaming mode DMA translations. Again, cpu read rules + * concerning calls here are the same as for swiotlb_unmap_single() above. + */ +void +swiotlb_unmap_sg(struct device *hwdev, struct scatterlist *sg, int nelems, + int dir) +{ + int i; + + BUG_ON(dir == DMA_NONE); + + for (i = 0; i < nelems; i++, sg++) + if (sg->dma_address != SG_ENT_PHYS_ADDRESS(sg)) + unmap_single(hwdev, (void *) PHYS_TO_VIRT(sg->dma_address), sg->dma_length, dir); + else if (dir == DMA_FROM_DEVICE) + mark_clean(SG_ENT_VIRT_ADDRESS(sg), sg->dma_length); +} + +/* + * Make physical memory consistent for a set of streaming mode DMA translations + * after a transfer. + * + * The same as swiotlb_sync_single_* but for a scatter-gather list, same rules + * and usage. + */ +static inline void +swiotlb_sync_sg(struct device *hwdev, struct scatterlist *sg, + int nelems, int dir, int target) +{ + int i; + + BUG_ON(dir == DMA_NONE); + + for (i = 0; i < nelems; i++, sg++) + if (sg->dma_address != SG_ENT_PHYS_ADDRESS(sg)) + sync_single(hwdev, (void *) sg->dma_address, + sg->dma_length, dir, target); +} + +void +swiotlb_sync_sg_for_cpu(struct device *hwdev, struct scatterlist *sg, + int nelems, int dir) +{ + swiotlb_sync_sg(hwdev, sg, nelems, dir, SYNC_FOR_CPU); +} + +void +swiotlb_sync_sg_for_device(struct device *hwdev, struct scatterlist *sg, + int nelems, int dir) +{ + swiotlb_sync_sg(hwdev, sg, nelems, dir, SYNC_FOR_DEVICE); +} + +int +swiotlb_dma_mapping_error(dma_addr_t dma_addr) +{ + return (dma_addr == VIRT_TO_PHYS(io_tlb_overflow_buffer)); +} + +/* + * Return whether the given device DMA address mask can be supported + * properly. For example, if your device can only drive the low 24-bits + * during bus mastering, then you would pass 0x00ffffff as the mask to + * this function. + */ +int +swiotlb_dma_supported (struct device *hwdev, u64 mask) +{ + return (VIRT_TO_PHYS(io_tlb_end) - 1) <= mask; +} + +EXPORT_SYMBOL(swiotlb_init); +EXPORT_SYMBOL(swiotlb_map_single); +EXPORT_SYMBOL(swiotlb_unmap_single); +EXPORT_SYMBOL(swiotlb_map_sg); +EXPORT_SYMBOL(swiotlb_unmap_sg); +EXPORT_SYMBOL(swiotlb_sync_single_for_cpu); +EXPORT_SYMBOL(swiotlb_sync_single_for_device); +EXPORT_SYMBOL_GPL(swiotlb_sync_single_range_for_cpu); +EXPORT_SYMBOL_GPL(swiotlb_sync_single_range_for_device); +EXPORT_SYMBOL(swiotlb_sync_sg_for_cpu); +EXPORT_SYMBOL(swiotlb_sync_sg_for_device); +EXPORT_SYMBOL(swiotlb_dma_mapping_error); +EXPORT_SYMBOL(swiotlb_alloc_coherent); +EXPORT_SYMBOL(swiotlb_free_coherent); +EXPORT_SYMBOL(swiotlb_dma_supported); _______________________________________________ Xen-ia64-devel mailing list Xen-ia64-devel@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-ia64-devel

©2013 Xen Project, A Linux Foundation Collaborative Project. All Rights Reserved.
Linux Foundation is a registered trademark of The Linux Foundation.
Xen Project is a trademark of The Linux Foundation.