[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] The hypercall will fail and return EFAULT when the page becomes COW by forking process in linux



Hi, Ian

  Thanks your reply. 
  I added two hooks in xc_osdep_ops.u.pricmd for the different OSes 
after allocating and before freeing the hypercall buffer. 
One hook makes the hypercall buffer not to COW after being 
allocated in Linux, and to restore it to normal before being freed.

> Another thread in the process might fork though, wasn't that the main
> observation you made when you first posted?
> 
> I think perhaps you mean it is forbidden to fork and then access a
> hypercall buffer allocated before the fork, which sounds ok, since no
> thread which allocates a hypercall buffer should fork with it still
> allocated.

Yes.

> 
> That will make "hg diff" and similar functions show the name of the
> changed function here which is very useful for reviewers.

OK.

> the page being COW when hyper calling*/
> > +    madvise(p, nr_pages * PAGE_SIZE, MADV_DONTFORK);
> 
> madvise(2) tells me that MADV_{DO,DONT}FORK are Linux specific, so I
> think this belongs in the Linux specific alloc_hypercall_buffer hook.

I don't think so. We only need madvise(MADV_DONTFORK) before hypercall, 
and madvise(MADV_DOFORK) after hypercall. The pages in the hypercall buffer 
need not be protected. So two extra hooks are added in xc_osdep_ops.u.pricmd. 
The linux version is implemented.

> > +    p = mmap(NULL, size, PROT_READ|PROT_WRITE,
> MAP_PRIVATE|MAP_ANONYMOUS|MAP_LOCKED, -1, 0);
> 
> I suppose this must necessarily return a page aligned result?
The address returned by mmap is already page aligned in the linux OS. 

the patch is below:

# HG changeset patch
# Parent bd244b9bc84bc74b6c6182c34369a31d1c5c869c
libxc: Add VM_DONTCOPY flag of the VMA of the hypercall buffer, to avoid the 
hypercall buffer becoming COW on hypercall.

In multi-threads and multi-processes environment, e.g. the process has two 
threads, thread A 
may call hypercall, thread B may call fork() to create child process. 
After forking, all pages of the process including hypercall buffers are cow. 
The hypervisor calls copy_to_user in hypercall in thread A context, will cause 
a write protection and return EFAULT error.

Fix:
1. Before hypercall: use MADV_DONTFORK of madvise syscall to make the hypercall 
buffer not to be copied
   to child process after fork. 
2. After hypercall: undo the effect of MADV_DONTFORK for the hypercall buffer 
by using MADV_DOFORK 
   of madvise syscall.
3. Use mmap/nunmap for memory alloc/free instead of malloc/free to bypass libc.

Signed-off-by: Zhenguo Wang <wangzhenguo@xxxxxxxxxx>
Signed-off-by: Xiaowei Yang <xiaowei.yang@xxxxxxxxxx>

diff -r bd244b9bc84b tools/libxc/xc_hcall_buf.c
--- a/tools/libxc/xc_hcall_buf.c        Tue Aug 07 16:44:20 2012 +0800
+++ b/tools/libxc/xc_hcall_buf.c        Tue Aug 07 16:46:56 2012 +0800
@@ -135,6 +135,9 @@ void *xc__hypercall_buffer_alloc_pages(x
 
     b->hbuf = p;
 
+    if (xch->ops->u.privcmd.prepare_hypercall_buffer)
+        xch->ops->u.privcmd.prepare_hypercall_buffer(xch, xch->ops_handle, p, 
nr_pages);
+
     memset(p, 0, nr_pages * PAGE_SIZE);
 
     return b->hbuf;
@@ -145,6 +148,9 @@ void xc__hypercall_buffer_free_pages(xc_
     if ( b->hbuf == NULL )
         return;
 
+    if (xch->ops->u.privcmd.unprepare_hypercall_buffer)
+        xch->ops->u.privcmd.unprepare_hypercall_buffer(xch, xch->ops_handle, 
b->hbuf, nr_pages);
+
     if ( !hypercall_buffer_cache_free(xch, b->hbuf, nr_pages) )
         xch->ops->u.privcmd.free_hypercall_buffer(xch, xch->ops_handle, 
b->hbuf, nr_pages);
 }
diff -r bd244b9bc84b tools/libxc/xc_linux_osdep.c
--- a/tools/libxc/xc_linux_osdep.c      Tue Aug 07 16:44:20 2012 +0800
+++ b/tools/libxc/xc_linux_osdep.c      Tue Aug 07 16:46:56 2012 +0800
@@ -93,22 +93,26 @@ static void *linux_privcmd_alloc_hyperca
     size_t size = npages * XC_PAGE_SIZE;
     void *p;
 
-    p = xc_memalign(xch, XC_PAGE_SIZE, size);
-    if (!p)
-        return NULL;
-
-    if ( mlock(p, size) < 0 )
-    {
-        free(p);
-        return NULL;
-    }
+    /* Address returned by mmap is page aligned. */
+    p = mmap(NULL, size, PROT_READ|PROT_WRITE, 
MAP_PRIVATE|MAP_ANONYMOUS|MAP_LOCKED, -1, 0);
     return p;
 }
 
 static void linux_privcmd_free_hypercall_buffer(xc_interface *xch, 
xc_osdep_handle h, void *ptr, int npages)
 {
-    munlock(ptr, npages * XC_PAGE_SIZE);
-    free(ptr);
+    munmap(ptr, npages * XC_PAGE_SIZE);
+}
+
+static void linux_privcmd_prepare_hypercall_buffer(xc_interface *xch, 
xc_osdep_handle h, void *ptr, int npages)
+{
+    /* Do not copy the VMA to child process on fork. Avoid the page being COW 
on hypercall. */
+    madvise(ptr, npages * XC_PAGE_SIZE, MADV_DONTFORK);
+}
+
+static void linux_privcmd_unprepare_hypercall_buffer(xc_interface *xch, 
xc_osdep_handle h, void *ptr, int npages)
+{
+    /* Recover the VMA flags. */
+    madvise(ptr, npages * XC_PAGE_SIZE, MADV_DOFORK);
 }
 
 static int linux_privcmd_hypercall(xc_interface *xch, xc_osdep_handle h, 
privcmd_hypercall_t *hypercall)
@@ -420,6 +424,8 @@ static struct xc_osdep_ops linux_privcmd
     .u.privcmd = {
         .alloc_hypercall_buffer = &linux_privcmd_alloc_hypercall_buffer,
         .free_hypercall_buffer = &linux_privcmd_free_hypercall_buffer,
+        .prepare_hypercall_buffer = &linux_privcmd_prepare_hypercall_buffer,
+        .unprepare_hypercall_buffer = 
&linux_privcmd_unprepare_hypercall_buffer,
 
         .hypercall = &linux_privcmd_hypercall,
 
diff -r bd244b9bc84b tools/libxc/xenctrlosdep.h
--- a/tools/libxc/xenctrlosdep.h        Tue Aug 07 16:44:20 2012 +0800
+++ b/tools/libxc/xenctrlosdep.h        Tue Aug 07 16:46:56 2012 +0800
@@ -78,6 +78,9 @@ struct xc_osdep_ops
             void *(*alloc_hypercall_buffer)(xc_interface *xch, xc_osdep_handle 
h, int npages);
             void (*free_hypercall_buffer)(xc_interface *xch, xc_osdep_handle 
h, void *ptr, int npages);
 
+            void (*prepare_hypercall_buffer)(xc_interface *xch, 
xc_osdep_handle h, void *ptr, int npages);
+            void (*unprepare_hypercall_buffer)(xc_interface *xch, 
xc_osdep_handle h, void *ptr, int npages);
+
             int (*hypercall)(xc_interface *xch, xc_osdep_handle h, 
privcmd_hypercall_t *hypercall);
 
             void *(*map_foreign_batch)(xc_interface *xch, xc_osdep_handle h, 
uint32_t dom, int prot,
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.