Xen project Mailing List

[Xen-devel] [PATCH v4 05/25] Introduce clear_user and clear_guest

From: Stefano Stabellini <stefano.stabellini@xxxxxxxxxxxxx> Introduce clear_user for x86 and ia64, shamelessly taken from Linux. The x86 version is the 32 bit clear_user implementation. Introduce clear_guest for x86 and ia64. The x86 implementation is based on clear_user and a new clear_user_hvm function. The ia64 implementation is actually in xencomm and it is based on xencomm_copy_to_guest. Changes in v3: - introduce clear_guest. Changes in v2: - change d0 to be a long; - cast addr to long. Signed-off-by: Stefano Stabellini <stefano.stabellini@xxxxxxxxxxxxx> --- xen/arch/ia64/linux/memcpy_mck.S | 177 ++++++++++++++++++++++++++++++++ xen/arch/x86/hvm/hvm.c | 107 +++++++++++++++++++ xen/arch/x86/usercopy.c | 36 +++++++ xen/common/xencomm.c | 111 ++++++++++++++++++++ xen/include/asm-ia64/uaccess.h | 12 ++ xen/include/asm-x86/guest_access.h | 18 +++ xen/include/asm-x86/hvm/guest_access.h | 1 + xen/include/asm-x86/uaccess.h | 1 + xen/include/xen/guest_access.h | 6 + xen/include/xen/xencomm.h | 24 +++++ 10 files changed, 493 insertions(+), 0 deletions(-) diff --git a/xen/arch/ia64/linux/memcpy_mck.S b/xen/arch/ia64/linux/memcpy_mck.S index 6f308e6..8b07006 100644 --- a/xen/arch/ia64/linux/memcpy_mck.S +++ b/xen/arch/ia64/linux/memcpy_mck.S @@ -659,3 +659,180 @@ EK(.ex_handler, (p17) st8 [dst1]=r39,8); \ /* end of McKinley specific optimization */ END(__copy_user) + +/* + * Theory of operations: + * - we check whether or not the buffer is small, i.e., less than 17 + * in which case we do the byte by byte loop. + * + * - Otherwise we go progressively from 1 byte store to 8byte store in + * the head part, the body is a 16byte store loop and we finish we the + * tail for the last 15 bytes. + * The good point about this breakdown is that the long buffer handling + * contains only 2 branches. + * + * The reason for not using shifting & masking for both the head and the + * tail is to stay semantically correct. This routine is not supposed + * to write bytes outside of the buffer. While most of the time this would + * be ok, we can't tolerate a mistake. A classical example is the case + * of multithreaded code were to the extra bytes touched is actually owned + * by another thread which runs concurrently to ours. Another, less likely, + * example is with device drivers where reading an I/O mapped location may + * have side effects (same thing for writing). + */ +GLOBAL_ENTRY(__do_clear_user) + .prologue + .save ar.pfs, saved_pfs + alloc saved_pfs=ar.pfs,2,0,0,0 + cmp.eq p6,p0=r0,len // check for zero length + .save ar.lc, saved_lc + mov saved_lc=ar.lc // preserve ar.lc (slow) + .body + ;; // avoid WAW on CFM + adds tmp=-1,len // br.ctop is repeat/until + mov ret0=len // return value is length at this point +(p6) br.ret.spnt.many rp + ;; + cmp.lt p6,p0=16,len // if len > 16 then long memset + mov ar.lc=tmp // initialize lc for small count +(p6) br.cond.dptk .long_do_clear + ;; // WAR on ar.lc + // + // worst case 16 iterations, avg 8 iterations + // + // We could have played with the predicates to use the extra + // M slot for 2 stores/iteration but the cost the initialization + // the various counters compared to how long the loop is supposed + // to last on average does not make this solution viable. + // +1: + EX( .Lexit1, st1 [buf]=r0,1 ) + adds len=-1,len // countdown length using len + br.cloop.dptk 1b + ;; // avoid RAW on ar.lc + // + // .Lexit4: comes from byte by byte loop + // len contains bytes left +.Lexit1: + mov ret0=len // faster than using ar.lc + mov ar.lc=saved_lc + br.ret.sptk.many rp // end of short clear_user + + + // + // At this point we know we have more than 16 bytes to copy + // so we focus on alignment (no branches required) + // + // The use of len/len2 for countdown of the number of bytes left + // instead of ret0 is due to the fact that the exception code + // changes the values of r8. + // +.long_do_clear: + tbit.nz p6,p0=buf,0 // odd alignment (for long_do_clear) + ;; + EX( .Lexit3, (p6) st1 [buf]=r0,1 ) // 1-byte aligned +(p6) adds len=-1,len;; // sync because buf is modified + tbit.nz p6,p0=buf,1 + ;; + EX( .Lexit3, (p6) st2 [buf]=r0,2 ) // 2-byte aligned +(p6) adds len=-2,len;; + tbit.nz p6,p0=buf,2 + ;; + EX( .Lexit3, (p6) st4 [buf]=r0,4 ) // 4-byte aligned +(p6) adds len=-4,len;; + tbit.nz p6,p0=buf,3 + ;; + EX( .Lexit3, (p6) st8 [buf]=r0,8 ) // 8-byte aligned +(p6) adds len=-8,len;; + shr.u cnt=len,4 // number of 128-bit (2x64bit) words + ;; + cmp.eq p6,p0=r0,cnt + adds tmp=-1,cnt +(p6) br.cond.dpnt .dotail // we have less than 16 bytes left + ;; + adds buf2=8,buf // setup second base pointer + mov ar.lc=tmp + ;; + + // + // 16bytes/iteration core loop + // + // The second store can never generate a fault because + // we come into the loop only when we are 16-byte aligned. + // This means that if we cross a page then it will always be + // in the first store and never in the second. + // + // + // We need to keep track of the remaining length. A possible (optimistic) + // way would be to use ar.lc and derive how many byte were left by + // doing : left= 16*ar.lc + 16. this would avoid the addition at + // every iteration. + // However we need to keep the synchronization point. A template + // M;;MB does not exist and thus we can keep the addition at no + // extra cycle cost (use a nop slot anyway). It also simplifies the + // (unlikely) error recovery code + // + +2: EX(.Lexit3, st8 [buf]=r0,16 ) + ;; // needed to get len correct when error + st8 [buf2]=r0,16 + adds len=-16,len + br.cloop.dptk 2b + ;; + mov ar.lc=saved_lc + // + // tail correction based on len only + // + // We alternate the use of len3,len2 to allow parallelism and correct + // error handling. We also reuse p6/p7 to return correct value. + // The addition of len2/len3 does not cost anything more compared to + // the regular memset as we had empty slots. + // +.dotail: + mov len2=len // for parallelization of error handling + mov len3=len + tbit.nz p6,p0=len,3 + ;; + EX( .Lexit2, (p6) st8 [buf]=r0,8 ) // at least 8 bytes +(p6) adds len3=-8,len2 + tbit.nz p7,p6=len,2 + ;; + EX( .Lexit2, (p7) st4 [buf]=r0,4 ) // at least 4 bytes +(p7) adds len2=-4,len3 + tbit.nz p6,p7=len,1 + ;; + EX( .Lexit2, (p6) st2 [buf]=r0,2 ) // at least 2 bytes +(p6) adds len3=-2,len2 + tbit.nz p7,p6=len,0 + ;; + EX( .Lexit2, (p7) st1 [buf]=r0 ) // only 1 byte left + mov ret0=r0 // success + br.ret.sptk.many rp // end of most likely path + + // + // Outlined error handling code + // + + // + // .Lexit3: comes from core loop, need restore pr/lc + // len contains bytes left + // + // + // .Lexit2: + // if p6 -> coming from st8 or st2 : len2 contains what's left + // if p7 -> coming from st4 or st1 : len3 contains what's left + // We must restore lc/pr even though might not have been used. +.Lexit2: + .pred.rel "mutex", p6, p7 +(p6) mov len=len2 +(p7) mov len=len3 + ;; + // + // .Lexit4: comes from head, need not restore pr/lc + // len contains bytes left + // +.Lexit3: + mov ret0=len + mov ar.lc=saved_lc + br.ret.sptk.many rp +END(__do_clear_user) diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c index 160a47f..de1a0ed 100644 --- a/xen/arch/x86/hvm/hvm.c +++ b/xen/arch/x86/hvm/hvm.c @@ -2390,6 +2390,96 @@ static enum hvm_copy_result __hvm_copy( return HVMCOPY_okay; } +static enum hvm_copy_result __hvm_clear(paddr_t addr, int size) +{ + struct vcpu *curr = current; + unsigned long gfn, mfn; + p2m_type_t p2mt; + char *p; + int count, todo = size; + uint32_t pfec = PFEC_page_present | PFEC_write_access; + + /* + * XXX Disable for 4.1.0: PV-on-HVM drivers will do grant-table ops + * such as query_size. Grant-table code currently does copy_to/from_guest + * accesses under the big per-domain lock, which this test would disallow. + * The test is not needed until we implement sleeping-on-waitqueue when + * we access a paged-out frame, and that's post 4.1.0 now. + */ +#if 0 + /* + * If the required guest memory is paged out, this function may sleep. + * Hence we bail immediately if called from atomic context. + */ + if ( in_atomic() ) + return HVMCOPY_unhandleable; +#endif + + while ( todo > 0 ) + { + count = min_t(int, PAGE_SIZE - (addr & ~PAGE_MASK), todo); + + gfn = paging_gva_to_gfn(curr, addr, &pfec); + if ( gfn == INVALID_GFN ) + { + if ( pfec == PFEC_page_paged ) + return HVMCOPY_gfn_paged_out; + if ( pfec == PFEC_page_shared ) + return HVMCOPY_gfn_shared; + return HVMCOPY_bad_gva_to_gfn; + } + + mfn = mfn_x(get_gfn_unshare(curr->domain, gfn, &p2mt)); + + if ( p2m_is_paging(p2mt) ) + { + p2m_mem_paging_populate(curr->domain, gfn); + put_gfn(curr->domain, gfn); + return HVMCOPY_gfn_paged_out; + } + if ( p2m_is_shared(p2mt) ) + { + put_gfn(curr->domain, gfn); + return HVMCOPY_gfn_shared; + } + if ( p2m_is_grant(p2mt) ) + { + put_gfn(curr->domain, gfn); + return HVMCOPY_unhandleable; + } + if ( !p2m_is_ram(p2mt) ) + { + put_gfn(curr->domain, gfn); + return HVMCOPY_bad_gfn_to_mfn; + } + ASSERT(mfn_valid(mfn)); + + p = (char *)map_domain_page(mfn) + (addr & ~PAGE_MASK); + + if ( p2mt == p2m_ram_ro ) + { + static unsigned long lastpage; + if ( xchg(&lastpage, gfn) != gfn ) + gdprintk(XENLOG_DEBUG, "guest attempted write to read-only" + " memory page. gfn=%#lx, mfn=%#lx\n", + gfn, mfn); + } + else + { + memset(p, 0x00, count); + paging_mark_dirty(curr->domain, mfn); + } + + unmap_domain_page(p); + + addr += count; + todo -= count; + put_gfn(curr->domain, gfn); + } + + return HVMCOPY_okay; +} + enum hvm_copy_result hvm_copy_to_guest_phys( paddr_t paddr, void *buf, int size) { @@ -2476,6 +2566,23 @@ unsigned long copy_to_user_hvm(void *to, const void *from, unsigned int len) return rc ? len : 0; /* fake a copy_to_user() return code */ } +unsigned long clear_user_hvm(void *to, unsigned int len) +{ + int rc; + +#ifdef __x86_64__ + if ( !current->arch.hvm_vcpu.hcall_64bit && + is_compat_arg_xlat_range(to, len) ) + { + memset(to, 0x00, len); + return 0; + } +#endif + + rc = __hvm_clear((unsigned long)to, len); + return rc ? len : 0; /* fake a copy_to_user() return code */ +} + unsigned long copy_from_user_hvm(void *to, const void *from, unsigned len) { int rc; diff --git a/xen/arch/x86/usercopy.c b/xen/arch/x86/usercopy.c index d88e635..47dadae 100644 --- a/xen/arch/x86/usercopy.c +++ b/xen/arch/x86/usercopy.c @@ -110,6 +110,42 @@ copy_to_user(void __user *to, const void *from, unsigned n) return n; } +#define __do_clear_user(addr,size) \ +do { \ + long __d0; \ + __asm__ __volatile__( \ + "0: rep; stosl\n" \ + " movl %2,%0\n" \ + "1: rep; stosb\n" \ + "2:\n" \ + ".section .fixup,\"ax\"\n" \ + "3: lea 0(%2,%0,4),%0\n" \ + " jmp 2b\n" \ + ".previous\n" \ + _ASM_EXTABLE(0b,3b) \ + _ASM_EXTABLE(1b,2b) \ + : "=&c"(size), "=&D" (__d0) \ + : "r"(size & 3), "0"(size / 4), "1"((long)addr), "a"(0)); \ +} while (0) + +/** + * clear_user: - Zero a block of memory in user space. + * @to: Destination address, in user space. + * @n: Number of bytes to zero. + * + * Zero a block of memory in user space. + * + * Returns number of bytes that could not be cleared. + * On success, this will be zero. + */ +unsigned long +clear_user(void __user *to, unsigned n) +{ + if ( access_ok(to, n) ) + __do_clear_user(to, n); + return n; +} + /** * copy_from_user: - Copy a block of data from user space. * @to: Destination address, in kernel space. diff --git a/xen/common/xencomm.c b/xen/common/xencomm.c index 2475454..9f6c1c5 100644 --- a/xen/common/xencomm.c +++ b/xen/common/xencomm.c @@ -414,6 +414,117 @@ out: return n - from_pos; } +static int +xencomm_clear_chunk( + unsigned long paddr, unsigned int len) +{ + struct page_info *page; + int res; + + do { + res = xencomm_get_page(paddr, &page); + } while ( res == -EAGAIN ); + + if ( res ) + return res; + + memset(xencomm_vaddr(paddr, page), 0x00, len); + xencomm_mark_dirty((unsigned long)xencomm_vaddr(paddr, page), len); + put_page(page); + + return 0; +} + +static unsigned long +xencomm_inline_clear_guest( + void *to, unsigned int n, unsigned int skip) +{ + unsigned long dest_paddr = xencomm_inline_addr(to) + skip; + + while ( n > 0 ) + { + unsigned int chunksz, bytes; + + chunksz = PAGE_SIZE - (dest_paddr % PAGE_SIZE); + bytes = min(chunksz, n); + + if ( xencomm_clear_chunk(dest_paddr, bytes) ) + return n; + dest_paddr += bytes; + n -= bytes; + } + + /* Always successful. */ + return 0; +} + +/** + * xencomm_clear_guest: Clear a block of data in domain space. + * @to: Physical address to xencomm buffer descriptor. + * @n: Number of bytes to copy. + * @skip: Number of bytes from the start to skip. + * + * Clear domain data + * + * Returns number of bytes that could not be cleared + * On success, this will be zero. + */ +unsigned long +xencomm_clear_guest( + void *to, unsigned int n, unsigned int skip) +{ + struct xencomm_ctxt ctxt; + unsigned int from_pos = 0; + unsigned int to_pos = 0; + unsigned int i = 0; + + if ( xencomm_is_inline(to) ) + return xencomm_inline_clear_guest(to, n, skip); + + if ( xencomm_ctxt_init(to, &ctxt) ) + return n; + + /* Iterate through the descriptor, copying up to a page at a time */ + while ( (from_pos < n) && (i < xencomm_ctxt_nr_addrs(&ctxt)) ) + { + unsigned long dest_paddr; + unsigned int pgoffset, chunksz, chunk_skip; + + if ( xencomm_ctxt_next(&ctxt, i) ) + goto out; + dest_paddr = *xencomm_ctxt_address(&ctxt); + if ( dest_paddr == XENCOMM_INVALID ) + { + i++; + continue; + } + + pgoffset = dest_paddr % PAGE_SIZE; + chunksz = PAGE_SIZE - pgoffset; + + chunk_skip = min(chunksz, skip); + to_pos += chunk_skip; + chunksz -= chunk_skip; + skip -= chunk_skip; + + if ( skip == 0 && chunksz > 0 ) + { + unsigned int bytes = min(chunksz, n - from_pos); + + if ( xencomm_clear_chunk(dest_paddr + chunk_skip, bytes) ) + goto out; + from_pos += bytes; + to_pos += bytes; + } + + i++; + } + +out: + xencomm_ctxt_done(&ctxt); + return n - from_pos; +} + static int xencomm_inline_add_offset(void **handle, unsigned int bytes) { *handle += bytes; diff --git a/xen/include/asm-ia64/uaccess.h b/xen/include/asm-ia64/uaccess.h index 32ef415..2ececb1 100644 --- a/xen/include/asm-ia64/uaccess.h +++ b/xen/include/asm-ia64/uaccess.h @@ -236,6 +236,18 @@ __copy_from_user (void *to, const void __user *from, unsigned long count) __cu_len; \ }) +extern unsigned long __do_clear_user (void __user * to, unsigned long count); + +#define clear_user(to, n) \ +({ \ + void __user *__cu_to = (to); \ + long __cu_len = (n); \ + \ + if (__access_ok(__cu_to)) \ + __cu_len = __do_clear_user(__cu_to, __cu_len); \ + __cu_len; \ +}) + #define copy_from_user(to, from, n) \ ({ \ void *__cu_to = (to); \ diff --git a/xen/include/asm-x86/guest_access.h b/xen/include/asm-x86/guest_access.h index 99ea64d..2b429c2 100644 --- a/xen/include/asm-x86/guest_access.h +++ b/xen/include/asm-x86/guest_access.h @@ -21,6 +21,10 @@ (is_hvm_vcpu(current) ? \ copy_from_user_hvm((dst), (src), (len)) : \ copy_from_user((dst), (src), (len))) +#define raw_clear_guest(dst, len) \ + (is_hvm_vcpu(current) ? \ + clear_user_hvm((dst), (len)) : \ + clear_user((dst), (len))) #define __raw_copy_to_guest(dst, src, len) \ (is_hvm_vcpu(current) ? \ copy_to_user_hvm((dst), (src), (len)) : \ @@ -29,6 +33,10 @@ (is_hvm_vcpu(current) ? \ copy_from_user_hvm((dst), (src), (len)) : \ __copy_from_user((dst), (src), (len))) +#define __raw_clear_guest(dst, len) \ + (is_hvm_vcpu(current) ? \ + clear_user_hvm((dst), (len)) : \ + clear_user((dst), (len))) /* Is the guest handle a NULL reference? */ #define guest_handle_is_null(hnd) ((hnd).p == NULL) @@ -69,6 +77,11 @@ raw_copy_from_guest(_d, _s+(off), sizeof(*_d)*(nr));\ }) +#define clear_guest_offset(hnd, off, nr) ({ \ + void *_d = (hnd).p; \ + raw_clear_guest(_d+(off), nr); \ +}) + /* Copy sub-field of a structure to guest context via a guest handle. */ #define copy_field_to_guest(hnd, ptr, field) ({ \ const typeof(&(ptr)->field) _s = &(ptr)->field; \ @@ -110,6 +123,11 @@ __raw_copy_from_guest(_d, _s+(off), sizeof(*_d)*(nr));\ }) +#define __clear_guest_offset(hnd, off, nr) ({ \ + void *_d = (hnd).p; \ + __raw_clear_guest(_d+(off), nr); \ +}) + #define __copy_field_to_guest(hnd, ptr, field) ({ \ const typeof(&(ptr)->field) _s = &(ptr)->field; \ void *_d = &(hnd).p->field; \ diff --git a/xen/include/asm-x86/hvm/guest_access.h b/xen/include/asm-x86/hvm/guest_access.h index 7a89e81..b92dbe9 100644 --- a/xen/include/asm-x86/hvm/guest_access.h +++ b/xen/include/asm-x86/hvm/guest_access.h @@ -2,6 +2,7 @@ #define __ASM_X86_HVM_GUEST_ACCESS_H__ unsigned long copy_to_user_hvm(void *to, const void *from, unsigned len); +unsigned long clear_user_hvm(void *to, unsigned int len); unsigned long copy_from_user_hvm(void *to, const void *from, unsigned len); #endif /* __ASM_X86_HVM_GUEST_ACCESS_H__ */ diff --git a/xen/include/asm-x86/uaccess.h b/xen/include/asm-x86/uaccess.h index e3e541b..d6f4458 100644 --- a/xen/include/asm-x86/uaccess.h +++ b/xen/include/asm-x86/uaccess.h @@ -16,6 +16,7 @@ #endif unsigned long copy_to_user(void *to, const void *from, unsigned len); +unsigned long clear_user(void *to, unsigned len); unsigned long copy_from_user(void *to, const void *from, unsigned len); /* Handles exceptions in both to and from, but doesn't do access_ok */ unsigned long __copy_to_user_ll(void *to, const void *from, unsigned n); diff --git a/xen/include/xen/guest_access.h b/xen/include/xen/guest_access.h index 0b9fb07..373454e 100644 --- a/xen/include/xen/guest_access.h +++ b/xen/include/xen/guest_access.h @@ -15,10 +15,16 @@ #define copy_from_guest(ptr, hnd, nr) \ copy_from_guest_offset(ptr, hnd, 0, nr) +#define clear_guest(hnd, nr) \ + clear_guest_offset(hnd, 0, nr) + #define __copy_to_guest(hnd, ptr, nr) \ __copy_to_guest_offset(hnd, 0, ptr, nr) #define __copy_from_guest(ptr, hnd, nr) \ __copy_from_guest_offset(ptr, hnd, 0, nr) +#define __clear_guest(hnd, nr) \ + __clear_guest_offset(hnd, 0, nr) + #endif /* __XEN_GUEST_ACCESS_H__ */ diff --git a/xen/include/xen/xencomm.h b/xen/include/xen/xencomm.h index bce2ca7..730da7c 100644 --- a/xen/include/xen/xencomm.h +++ b/xen/include/xen/xencomm.h @@ -27,6 +27,8 @@ unsigned long xencomm_copy_to_guest( void *to, const void *from, unsigned int len, unsigned int skip); unsigned long xencomm_copy_from_guest( void *to, const void *from, unsigned int len, unsigned int skip); +unsigned long xencomm_clear_guest( + void *to, unsigned int n, unsigned int skip); int xencomm_add_offset(void **handle, unsigned int bytes); int xencomm_handle_is_null(void *ptr); @@ -41,6 +43,16 @@ static inline unsigned long xencomm_inline_addr(const void *handle) return (unsigned long)handle & ~XENCOMM_INLINE_FLAG; } +#define raw_copy_to_guest(dst, src, len) \ + xencomm_copy_to_guest(dst, src, len, 0) +#define raw_copy_from_guest(dst, src, len) \ + xencomm_copy_from_guest(dst, src, nr, 0) +#define raw_clear_guest(dst, len) \ + xencomm_clear_guest(dst, len, 0) +#define __raw_copy_to_guest raw_copy_to_guest +#define __raw_copy_from_guest raw_copy_from_guest +#define __raw_clear_guest raw_clear_guest + /* Is the guest handle a NULL reference? */ #define guest_handle_is_null(hnd) \ ((hnd).p == NULL || xencomm_handle_is_null((hnd).p)) @@ -82,6 +94,13 @@ static inline unsigned long xencomm_inline_addr(const void *handle) #define copy_from_guest_offset(ptr, hnd, idx, nr) \ __copy_from_guest_offset(ptr, hnd, idx, nr) +/* + * Clear an array of objects in guest context via a guest handle. + * Optionally specify an offset into the guest array. + */ +#define clear_guest_offset(hnd, idx, nr) \ + __clear_guest_offset(hnd, idx, nr) + /* Copy sub-field of a structure from guest context via a guest handle. */ #define copy_field_from_guest(ptr, hnd, field) \ __copy_field_from_guest(ptr, hnd, field) @@ -115,6 +134,11 @@ static inline unsigned long xencomm_inline_addr(const void *handle) xencomm_copy_from_guest(_d, _s, sizeof(*_d), _off); \ }) +#define __clear_guest_offset(hnd, idx, nr) ({ \ + void *_d = (hnd).p; \ + xencomm_clear_guest(_d, nr, idx); \ +}) + #ifdef CONFIG_XENCOMM_MARK_DIRTY extern void xencomm_mark_dirty(unsigned long addr, unsigned int len); #else -- 1.7.2.5 _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-devel

©2013 Xen Project, A Linux Foundation Collaborative Project. All Rights Reserved.
Linux Foundation is a registered trademark of The Linux Foundation.
Xen Project is a trademark of The Linux Foundation.