[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [Xen-Devel] [PATCH] [GSOC14] refactored mempaging code from xenpaging to libxc.



On Fri, Jun 13, 2014 at 7:51 AM, Andrew Cooper <andrew.cooper3@xxxxxxxxxx> wrote:
On 13/06/14 15:16, Dushyant Behl wrote:
> This patch is part of the work done under the gsoc project -
> Lazy Restore Using Memory Paging.
>
> This patch moves the code to initialize mempaging from xenpaging to libxc.
> The code refractored from xenpaging is the code which sets up paging,
> initializes a shared ring and event channel to communicate with xen. This
> communication is done between the hypervisor and the dom0 tool which performs
> the activity of pager. The xenpaging code is changed to use the newly created
> routines and is tested to properly build and work with this code.
>
> The refractoring is done so that any tool which will act as pager in
> lazy restore or use memory paging can use a same routines to initialize mempaging.
> This refactoring will also allow any future (in-tree) tools to use mempaging.
>
> The refractored code in xc_mem_paging_ring_setup is to be compiled into
> libxenguest.
>
> Signed-off-by: Dushyant Behl <myselfdushyantbehl@xxxxxxxxx>
> Reviewed-by: Andres Lagar-Cavilla <andres@xxxxxxxxxxxxxxxx>
> Acked-by: David Scott <dave.scott@xxxxxxxxxx>
> ---
> Âtools/libxc/Makefile        Â|  2 +
> Âtools/libxc/xc_mem_paging_setup.c  | 135 ++++++++++++++++++++++++++++++++++++
> Âtools/libxc/xenctrl.h        | Â15 ++++
> Âtools/ocaml/libs/xc/xenctrl_stubs.c | Â11 +--
> Âtools/xenpaging/Makefile      Â|  4 +-
> Âtools/xenpaging/xenpaging.c     | Â93 +++----------------------
> Â6 files changed, 172 insertions(+), 88 deletions(-)
> Âcreate mode 100644 tools/libxc/xc_mem_paging_setup.c
>
> diff --git a/tools/libxc/Makefile b/tools/libxc/Makefile
> index a74b19e..6cf14f0 100644
> --- a/tools/libxc/Makefile
> +++ b/tools/libxc/Makefile
> @@ -69,6 +69,8 @@ GUEST_SRCS-$(CONFIG_ARM) Â Â += xc_dom_armzimageloader.c
> ÂGUEST_SRCS-y         += xc_dom_binloader.c
> ÂGUEST_SRCS-y         += xc_dom_compat_linux.c
>
> +GUEST_SRCS-y         += xc_mem_paging_setup.c
> +
> ÂGUEST_SRCS-$(CONFIG_X86) Â Â += xc_dom_x86.c
> ÂGUEST_SRCS-$(CONFIG_X86) Â Â += xc_cpuid_x86.c
> ÂGUEST_SRCS-$(CONFIG_X86) Â Â += xc_hvm_build_x86.c
> diff --git a/tools/libxc/xc_mem_paging_setup.c b/tools/libxc/xc_mem_paging_setup.c
> new file mode 100644
> index 0000000..7b3ab38
> --- /dev/null
> +++ b/tools/libxc/xc_mem_paging_setup.c
> @@ -0,0 +1,135 @@
> +/*
> + * tools/libxc/xc_mem_paging_setup.c
> + *
> + * Routines to initialize memory paging. Create shared ring
> + * and event channels to communicate with the hypervisor.
> + *
> + * Copyright (c) 2014 Dushyant Behl
> + * Copyright (c) 2009 by Citrix Systems, Inc. (Patrick Colp)
> + *
> + * This library is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * This library is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. ÂSee the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with this library; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA Â02110-1301 USA
> + */
> +
> +#include "xc_private.h"
> +#include <xen/event_channel.h>
> +#include <xen/mem_event.h>
> +
> +/*
> + * Mem paging ring and event channel setup routine.
> + * Setup a shared ring and an event channel to communicate between
> + * hypervisor and the tool performing mem paging operations.
> + * The function will return zero on successful completion and will
> + * return -1 on failure at any intermediate step setting up errno
> + * properly.
> + */
> +int xc_mem_paging_ring_setup(xc_interface *xch, domid_t domain_id,
> + Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Âvoid *ring_page, int *port,
> + Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Âuint32_t *evtchn_port,
> + Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Âxc_evtchn **xce_handle,
> + Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Âmem_event_back_ring_t *back_ring)
> +{
> + Â Âint rc;
> + Â Âuint64_t ring_pfn, mmap_pfn;
> +
> + Â Â/* Map the ring page */
> + Â Âxc_get_hvm_param(xch, domain_id, HVM_PARAM_PAGING_RING_PFN, &ring_pfn);
> + Â Âmmap_pfn = ring_pfn;
> + Â Âring_page = xc_map_foreign_batch(xch, domain_id,
> + Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â ÂPROT_READ | PROT_WRITE, &mmap_pfn, 1);
> + Â Âif ( mmap_pfn & XEN_DOMCTL_PFINFO_XTAB )
> + Â Â{
> + Â Â Â Â/* Map failed, populate ring page */
> + Â Â Â Ârc = xc_domain_populate_physmap_exact(xch, domain_id,
> + Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â1, 0, 0, &ring_pfn);
> + Â Â Â Âif ( rc != 0 )
> + Â Â Â Â{
> + Â Â Â Â Â ÂPERROR("Failed to populate ring gfn\n");
> + Â Â Â Â Â Âreturn -1;
> + Â Â Â Â}
> +
> + Â Â Â Âmmap_pfn = ring_pfn;
> + Â Â Â Âring_page = xc_map_foreign_batch(xch, domain_id,
> + Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â ÂPROT_READ | PROT_WRITE, &mmap_pfn, 1);
> +
> + Â Â Â Âif ( mmap_pfn & XEN_DOMCTL_PFINFO_XTAB )
> + Â Â Â Â{
> + Â Â Â Â Â ÂPERROR("Could not map the ring page\n");
> + Â Â Â Â Â Âreturn -1;
> + Â Â Â Â}
> + Â Â}
> +
> + Â Â/* Initialise Xen */
> + Â Ârc = xc_mem_paging_enable(xch, domain_id, evtchn_port);
> + Â Âif ( rc != 0 )
> + Â Â{
> + Â Â Â Âswitch ( errno ) {
> + Â Â Â Â Â Âcase EBUSY:
> + Â Â Â Â Â Â Â ÂERROR("mempaging is (or was) active on this domain");
> + Â Â Â Â Â Â Â Âbreak;
> + Â Â Â Â Â Âcase ENODEV:
> + Â Â Â Â Â Â Â ÂERROR("mempaging requires Hardware Assisted Paging");
> + Â Â Â Â Â Â Â Âbreak;
> + Â Â Â Â Â Âcase EMLINK:
> + Â Â Â Â Â Â Â ÂERROR("mempaging not supported while iommu passthrough is enabled");
> + Â Â Â Â Â Â Â Âbreak;
> + Â Â Â Â Â Âcase EXDEV:
> + Â Â Â Â Â Â Â ÂERROR("mempaging not supported in a PoD guest");
> + Â Â Â Â Â Â Â Âbreak;
> + Â Â Â Â Â Âdefault:
> + Â Â Â Â Â Â Â ÂPERROR("mempaging: error initialising shared page");
> + Â Â Â Â Â Â Â Âbreak;
> + Â Â Â Â}
> + Â Â Â Âreturn -1;
> + Â Â}
> +
> + Â Â/* Open event channel */
> + Â Â*xce_handle = xc_evtchn_open(NULL, 0);
> + Â Âif ( *xce_handle == NULL )
> + Â Â{
> + Â Â Â ÂPERROR("Failed to open event channel");
> + Â Â Â Âreturn -1;
> + Â Â}

This is inappropriate inside libxenguest. ÂThe user of the library
possibly already has open evtchn handle. ÂWhile this is only wasteful of
an fd under linux, I believe it will result in open() failing under some
of the *BSDs

The correct way of doing this is to have the caller of
xc_mem_paging_ring_setup() provide their xce_handle, and require them to
open one if they need to.
I think this is heavy handed. I mean, the idea here is to relieve the caller from doing all the setup work.

In fact, you could argue that a follow-up patch should encapsulate all the cleanup.

And then consumers of this particular module call setup, teardown, use the intermediate result, no concerns.

LGTM, to be honest.Â

> +
> + Â Â/* Bind event notification */
> + Â Ârc = xc_evtchn_bind_interdomain(*xce_handle, domain_id, *evtchn_port);
> + Â Âif ( rc < 0 )
> + Â Â{
> + Â Â Â ÂPERROR("Failed to bind event channel");
> + Â Â Â Âreturn -1;
> + Â Â}
> + Â Â*port = rc;
> +
> + Â Â/* Initialise ring */
> + Â ÂSHARED_RING_INIT((mem_event_sring_t *)ring_page);
> + Â ÂBACK_RING_INIT(back_ring, (mem_event_sring_t *)ring_page, PAGE_SIZE);
> +
> + Â Â/* Now that the ring is set, remove it from the guest's physmap */
> + Â Âif ( xc_domain_decrease_reservation_exact(xch, domain_id, 1, 0, &ring_pfn) )
> + Â Â{
> + Â Â Â ÂPERROR("Failed to remove ring from guest physmap");
> + Â Â Â Âreturn -1;
> + Â Â}

There is a race condition here where the guest can play with the
post-initialised ring state.
Well-known, not the place to fix it here.

This was far worse in 2011. We've gotten this to a place in which you pause the guest, and this works safely. Not unreasonable IMHO, and the status-quo for a while.

A *true* solution would require hypervisor surgery by adding a XENMEM op with a new special map space. Maybe.

> +
> + Â Âreturn 0;
> +}
> +
> +/*
> + * Local variables:
> + * mode: C
> + * c-file-style: "BSD"
> + * c-basic-offset: 4
> + * indent-tabs-mode: nil
> + * End:
> + */
> diff --git a/tools/libxc/xenctrl.h b/tools/libxc/xenctrl.h
> index 02129f7..77a0302 100644
> --- a/tools/libxc/xenctrl.h
> +++ b/tools/libxc/xenctrl.h
> @@ -47,6 +47,7 @@
> Â#include <xen/xsm/flask_op.h>
> Â#include <xen/tmem.h>
> Â#include <xen/kexec.h>
> +#include <xen/mem_event.h>
>
> Â#include "xentoollog.h"
>
> @@ -2039,6 +2040,20 @@ int xc_mem_paging_prep(xc_interface *xch, domid_t domain_id, unsigned long gfn);
> Âint xc_mem_paging_load(xc_interface *xch, domid_t domain_id,
> Â Â Â Â Â Â Â Â Â Â Â Â Âunsigned long gfn, void *buffer);
>
> +/*
> + * Mem paging ring and event channel setup routine.
> + * Setup a shared ring and an event channel to communicate between
> + * hypervisor and the tool performing mem paging operations.
> + * The function will return zero on successful completion and will
> + * return -1 on failure at any intermediate step setting up errno
> + * properly.
> + */
> +int xc_mem_paging_ring_setup(xc_interface *xch, domid_t domain_id,
> + Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Âvoid *ring_page, int *port,
> + Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Âuint32_t *evtchn_port,
> + Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Âxc_evtchn **xceh,
> + Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Âmem_event_back_ring_t *_back_ring);
> +
> Â/**
> Â * Access tracking operations.
> Â * Supported only on Intel EPT 64 bit processors.
> diff --git a/tools/ocaml/libs/xc/xenctrl_stubs.c b/tools/ocaml/libs/xc/xenctrl_stubs.c
> index ff29b47..37a4db7 100644
> --- a/tools/ocaml/libs/xc/xenctrl_stubs.c
> +++ b/tools/ocaml/libs/xc/xenctrl_stubs.c
> @@ -521,13 +521,16 @@ CAMLprim value stub_xc_evtchn_reset(value xch, value domid)
> Â Â Â CAMLreturn(Val_unit);
> Â}
>
> -
> -#define RING_SIZE 32768
> -static char ring[RING_SIZE];
> +/*
> + * The name is kept BUF_RING_SIZE, because the name RING_SIZE
> + * collides with the xen shared ring definitions in io/ring.h
> + */
> +#define BUF_RING_SIZE 32768
> +static char ring[BUF_RING_SIZE];
>
> ÂCAMLprim value stub_xc_readconsolering(value xch)
> Â{
> - Â Â unsigned int size = RING_SIZE - 1;
> + Â Â unsigned int size = BUF_RING_SIZE - 1;
> Â Â Â char *ring_ptr = ring;
> Â Â Â int retval;
>
> diff --git a/tools/xenpaging/Makefile b/tools/xenpaging/Makefile
> index 548d9dd..ea370d3 100644
> --- a/tools/xenpaging/Makefile
> +++ b/tools/xenpaging/Makefile
> @@ -1,8 +1,8 @@
> ÂXEN_ROOT=$(CURDIR)/../..
> Âinclude $(XEN_ROOT)/tools/Rules.mk
>
> -CFLAGS += $(CFLAGS_libxenctrl) $(CFLAGS_libxenstore) $(PTHREAD_CFLAGS)
> -LDLIBS += $(LDLIBS_libxenctrl) $(LDLIBS_libxenstore) $(PTHREAD_LIBS)
> +CFLAGS += $(CFLAGS_libxenctrl) $(CFLAGS_libxenguest) $(CFLAGS_libxenstore) $(PTHREAD_CFLAGS)
> +LDLIBS += $(LDLIBS_libxenctrl) $(LDLIBS_libxenguest) $(LDLIBS_libxenstore) $(PTHREAD_LIBS)

You are not introducing any libxenstore calls into xenpaging. ÂThis
change is bogus.
The diff is not introducing libxenstore.
AndresÂ

~Andrew

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.