[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH v3] libvchan: interdomain communications library



This library implements a bidirectional communication interface between
applications in different domains, similar to unix sockets. Data can be
sent using the byte-oriented libvchan_read/libvchan_write or the
packet-oriented libvchan_recv/libvchan_send.

Channel setup is done using a client-server model; domain IDs and a port
number must be negotiated prior to initialization. The server allocates
memory for the shared pages and determines the sizes of the
communication rings (which may span multiple pages, although the default
places rings and control within a single page).

With properly sized rings, testing has shown that this interface
provides speed comparable to pipes within a single Linux domain; it is
significantly faster than network-based communication.

Signed-off-by: Daniel De Graaf <dgdegra@xxxxxxxxxxxxx>
---

Changes since v2:
        - Use asm() barriers instead of __sync_synchronize()
        - Fix build when xs.h has not yet been installed
        - Remove // coments in header to fix strict build
        - Add (non)blocking flag and change node1 example to use it

---
 tools/Makefile                         |    1 +
 tools/include/xen-sys/Linux/gntalloc.h |   82 ++++++
 tools/include/xen-sys/Linux/gntdev.h   |   33 +++-
 tools/libvchan/Makefile                |   56 ++++
 tools/libvchan/init.c                  |  464 ++++++++++++++++++++++++++++++++
 tools/libvchan/io.c                    |  314 +++++++++++++++++++++
 tools/libvchan/node-select.c           |  161 +++++++++++
 tools/libvchan/node.c                  |  168 ++++++++++++
 xen/include/public/io/libvchan.h       |  209 ++++++++++++++
 9 files changed, 1487 insertions(+), 1 deletions(-)
 create mode 100644 tools/include/xen-sys/Linux/gntalloc.h
 create mode 100644 tools/libvchan/Makefile
 create mode 100644 tools/libvchan/init.c
 create mode 100644 tools/libvchan/io.c
 create mode 100644 tools/libvchan/node-select.c
 create mode 100644 tools/libvchan/node.c
 create mode 100644 xen/include/public/io/libvchan.h

diff --git a/tools/Makefile b/tools/Makefile
index df6270c..9389e1f 100644
--- a/tools/Makefile
+++ b/tools/Makefile
@@ -27,6 +27,7 @@ SUBDIRS-$(CONFIG_NetBSD) += blktap2
 SUBDIRS-$(CONFIG_NetBSD) += xenbackendd
 SUBDIRS-y += libfsimage
 SUBDIRS-$(LIBXENAPI_BINDINGS) += libxen
+SUBDIRS-y += libvchan
 
 # do not recurse in to a dir we are about to delete
 ifneq "$(MAKECMDGOALS)" "distclean"
diff --git a/tools/include/xen-sys/Linux/gntalloc.h 
b/tools/include/xen-sys/Linux/gntalloc.h
new file mode 100644
index 0000000..76bd580
--- /dev/null
+++ b/tools/include/xen-sys/Linux/gntalloc.h
@@ -0,0 +1,82 @@
+/******************************************************************************
+ * gntalloc.h
+ *
+ * Interface to /dev/xen/gntalloc.
+ *
+ * Author: Daniel De Graaf <dgdegra@xxxxxxxxxxxxx>
+ *
+ * This file is in the public domain.
+ */
+
+#ifndef __LINUX_PUBLIC_GNTALLOC_H__
+#define __LINUX_PUBLIC_GNTALLOC_H__
+
+/*
+ * Allocates a new page and creates a new grant reference.
+ */
+#define IOCTL_GNTALLOC_ALLOC_GREF \
+_IOC(_IOC_NONE, 'G', 5, sizeof(struct ioctl_gntalloc_alloc_gref))
+struct ioctl_gntalloc_alloc_gref {
+       /* IN parameters */
+       /* The ID of the domain to be given access to the grants. */
+       uint16_t domid;
+       /* Flags for this mapping */
+       uint16_t flags;
+       /* Number of pages to map */
+       uint32_t count;
+       /* OUT parameters */
+       /* The offset to be used on a subsequent call to mmap(). */
+       uint64_t index;
+       /* The grant references of the newly created grant, one per page */
+       /* Variable size, depending on count */
+       uint32_t gref_ids[1];
+};
+
+#define GNTALLOC_FLAG_WRITABLE 1
+
+/*
+ * Deallocates the grant reference, allowing the associated page to be freed if
+ * no other domains are using it.
+ */
+#define IOCTL_GNTALLOC_DEALLOC_GREF \
+_IOC(_IOC_NONE, 'G', 6, sizeof(struct ioctl_gntalloc_dealloc_gref))
+struct ioctl_gntalloc_dealloc_gref {
+       /* IN parameters */
+       /* The offset returned in the map operation */
+       uint64_t index;
+       /* Number of references to unmap */
+       uint32_t count;
+};
+
+/*
+ * Sets up an unmap notification within the page, so that the other side can do
+ * cleanup if this side crashes. Required to implement cross-domain robust
+ * mutexes or close notification on communication channels.
+ *
+ * Each mapped page only supports one notification; multiple calls referring to
+ * the same page overwrite the previous notification. You must clear the
+ * notification prior to the IOCTL_GNTALLOC_DEALLOC_GREF if you do not want it
+ * to occur.
+ */
+#define IOCTL_GNTALLOC_SET_UNMAP_NOTIFY \
+_IOC(_IOC_NONE, 'G', 7, sizeof(struct ioctl_gntalloc_unmap_notify))
+struct ioctl_gntalloc_unmap_notify {
+       /* IN parameters */
+       /* Offset in the file descriptor for a byte within the page (same as
+        * used in mmap). If using UNMAP_NOTIFY_CLEAR_BYTE, this is the byte to
+        * be cleared. Otherwise, it can be any byte in the page whose
+        * notification we are adjusting.
+        */
+       uint64_t index;
+       /* Action(s) to take on unmap */
+       uint32_t action;
+       /* Event channel to notify */
+       uint32_t event_channel_port;
+};
+
+/* Clear (set to zero) the byte specified by index */
+#define UNMAP_NOTIFY_CLEAR_BYTE 0x1
+/* Send an interrupt on the indicated event channel */
+#define UNMAP_NOTIFY_SEND_EVENT 0x2
+
+#endif /* __LINUX_PUBLIC_GNTALLOC_H__ */
diff --git a/tools/include/xen-sys/Linux/gntdev.h 
b/tools/include/xen-sys/Linux/gntdev.h
index 8bd1467..5304bd3 100644
--- a/tools/include/xen-sys/Linux/gntdev.h
+++ b/tools/include/xen-sys/Linux/gntdev.h
@@ -66,7 +66,7 @@ struct ioctl_gntdev_map_grant_ref {
  * before this ioctl is called, or an error will result.
  */
 #define IOCTL_GNTDEV_UNMAP_GRANT_REF \
-_IOC(_IOC_NONE, 'G', 1, sizeof(struct ioctl_gntdev_unmap_grant_ref))       
+_IOC(_IOC_NONE, 'G', 1, sizeof(struct ioctl_gntdev_unmap_grant_ref))
 struct ioctl_gntdev_unmap_grant_ref {
        /* IN parameters */
        /* The offset was returned by the corresponding map operation. */
@@ -116,4 +116,35 @@ struct ioctl_gntdev_set_max_grants {
        uint32_t count;
 };
 
+/*
+ * Sets up an unmap notification within the page, so that the other side can do
+ * cleanup if this side crashes. Required to implement cross-domain robust
+ * mutexes or close notification on communication channels.
+ *
+ * Each mapped page only supports one notification; multiple calls referring to
+ * the same page overwrite the previous notification. You must clear the
+ * notification prior to the IOCTL_GNTALLOC_DEALLOC_GREF if you do not want it
+ * to occur.
+ */
+#define IOCTL_GNTDEV_SET_UNMAP_NOTIFY \
+_IOC(_IOC_NONE, 'G', 7, sizeof(struct ioctl_gntdev_unmap_notify))
+struct ioctl_gntdev_unmap_notify {
+       /* IN parameters */
+       /* Offset in the file descriptor for a byte within the page (same as
+        * used in mmap). If using UNMAP_NOTIFY_CLEAR_BYTE, this is the byte to
+        * be cleared. Otherwise, it can be any byte in the page whose
+        * notification we are adjusting.
+        */
+       uint64_t index;
+       /* Action(s) to take on unmap */
+       uint32_t action;
+       /* Event channel to notify */
+       uint32_t event_channel_port;
+};
+
+/* Clear (set to zero) the byte specified by index */
+#define UNMAP_NOTIFY_CLEAR_BYTE 0x1
+/* Send an interrupt on the indicated event channel */
+#define UNMAP_NOTIFY_SEND_EVENT 0x2
+
 #endif /* __LINUX_PUBLIC_GNTDEV_H__ */
diff --git a/tools/libvchan/Makefile b/tools/libvchan/Makefile
new file mode 100644
index 0000000..6cccf3e
--- /dev/null
+++ b/tools/libvchan/Makefile
@@ -0,0 +1,56 @@
+#
+# tools/libvchan/Makefile
+#
+
+XEN_ROOT = $(CURDIR)/../..
+include $(XEN_ROOT)/tools/Rules.mk
+
+LIBVCHAN_OBJS = init.o io.o
+NODE_OBJS = node.o
+NODE2_OBJS = node-select.o
+
+LIBVCHAN_LIBS = $(LDLIBS_libxenstore)
+$(LIBVCHAN_OBJS): CFLAGS += $(CFLAGS_libxenstore)
+
+MAJOR = 1.0
+MINOR = 0
+
+CFLAGS += -I../include -I. -fPIC
+
+.PHONY: all
+all: libvchan.so vchan-node1 vchan-node2 libvchan.a
+
+libvchan.so: libvchan.so.$(MAJOR)
+       ln -sf $< $@
+
+libvchan.so.$(MAJOR): libvchan.so.$(MAJOR).$(MINOR)
+       ln -sf $< $@
+
+libvchan.so.$(MAJOR).$(MINOR): $(LIBVCHAN_OBJS)
+       $(CC) $(LDFLAGS) -Wl,$(SONAME_LDFLAG) -Wl,libvchan.so.$(MAJOR) 
$(SHLIB_LDFLAGS) -o $@ $^ $(LIBVCHAN_LIBS)
+
+libvchan.a: $(LIBVCHAN_OBJS)
+       $(AR) rcs libvchan.a $^
+
+vchan-node1: $(NODE_OBJS) libvchan.so
+       $(CC) $(LDFLAGS) -o $@ $(NODE_OBJS) libvchan.so $(LDLIBS_libvchan)
+
+vchan-node2: $(NODE2_OBJS) libvchan.so
+       $(CC) $(LDFLAGS) -o $@ $(NODE2_OBJS) libvchan.so $(LDLIBS_libvchan)
+
+.PHONY: install
+install: all
+       $(INSTALL_DIR) $(DESTDIR)$(LIBDIR)
+       $(INSTALL_DIR) $(DESTDIR)$(INCLUDEDIR)
+       $(INSTALL_PROG) libvchan.so.$(MAJOR).$(MINOR) $(DESTDIR)$(LIBDIR)
+       ln -sf libvchan.so.$(MAJOR).$(MINOR) 
$(DESTDIR)$(LIBDIR)/libvchan.so.$(MAJOR)
+       ln -sf libvchan.so.$(MAJOR) $(DESTDIR)$(LIBDIR)/libvchan.so
+       $(INSTALL_DATA) libvchan.a $(DESTDIR)$(LIBDIR)
+
+.PHONY: clean
+clean:
+       $(RM) -f *.o *.so* *.a vchan-node1 vchan-node2 $(DEPS)
+
+distclean: clean
+
+-include $(DEPS)
diff --git a/tools/libvchan/init.c b/tools/libvchan/init.c
new file mode 100644
index 0000000..b267ca7
--- /dev/null
+++ b/tools/libvchan/init.c
@@ -0,0 +1,464 @@
+/**
+ * @file
+ * @section AUTHORS
+ *
+ * Copyright (C) 2010  Rafal Wojtczuk  <rafal@xxxxxxxxxxxxxxxxxxxxxx>
+ *
+ *  Authors:
+ *       Rafal Wojtczuk  <rafal@xxxxxxxxxxxxxxxxxxxxxx>
+ *       Daniel De Graaf <dgdegra@xxxxxxxxxxxxx>
+ *
+ * @section LICENSE
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; under version 2 of the License.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with this program; if not, write to the Free Software Foundation, Inc.,
+ *  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * @section DESCRIPTION
+ *
+ *  This file contains the setup code used to establish the ring buffer.
+ */
+
+#include <sys/types.h>
+#include <sys/mman.h>
+#include <sys/ioctl.h>
+#include <sys/user.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <string.h>
+#include <unistd.h>
+#include <fcntl.h>
+
+#include <xs.h>
+#include <xen/sys/evtchn.h>
+#include <xen/sys/gntalloc.h>
+#include <xen/sys/gntdev.h>
+#include <xen/io/libvchan.h>
+
+#ifndef PAGE_SHIFT
+#define PAGE_SHIFT 12
+#endif
+
+#ifndef PAGE_SIZE
+#define PAGE_SIZE 4096
+#endif
+
+#ifndef offsetof
+#define offsetof(TYPE, MEMBER) ((size_t) &((TYPE *)0)->MEMBER)
+#endif
+
+#define max(a,b) ((a > b) ? a : b)
+
+static int init_gnt_srv(struct libvchan *ctrl)
+{
+       int pages_left = ctrl->read.order >= PAGE_SHIFT ? 1 << 
(ctrl->read.order - PAGE_SHIFT) : 0;
+       int pages_right = ctrl->write.order >= PAGE_SHIFT ? 1 << 
(ctrl->write.order - PAGE_SHIFT) : 0;
+       struct ioctl_gntalloc_alloc_gref *gref_info = NULL;
+       int ring_fd = open("/dev/xen/gntalloc", O_RDWR);
+       int ring_ref = -1;
+       int err;
+       void *ring, *area;
+
+       if (ring_fd < 0)
+               return -1;
+
+       gref_info = malloc(sizeof(*gref_info) + max(pages_left, 
pages_right)*sizeof(uint32_t));
+
+       gref_info->domid = ctrl->other_domain_id;
+       gref_info->flags = GNTALLOC_FLAG_WRITABLE;
+       gref_info->count = 1;
+
+       err = ioctl(ring_fd, IOCTL_GNTALLOC_ALLOC_GREF, gref_info);
+       if (err)
+               goto out;
+
+       ring = mmap(NULL, PAGE_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED, 
ring_fd, gref_info->index);
+
+       if (ring == MAP_FAILED)
+               goto out;
+
+       ctrl->ring = ring;
+       ring_ref = gref_info->gref_ids[0];
+
+       memset(ring, 0, PAGE_SIZE);
+
+       ctrl->read.shr = &ctrl->ring->left;
+       ctrl->write.shr = &ctrl->ring->right;
+       ctrl->ring->left_order = ctrl->read.order;
+       ctrl->ring->right_order = ctrl->write.order;
+       ctrl->ring->cli_live = 2;
+       ctrl->ring->srv_live = 1;
+       ctrl->ring->debug = 0xabcd;
+
+#ifdef IOCTL_GNTALLOC_SET_UNMAP_NOTIFY
+       {
+               struct ioctl_gntalloc_unmap_notify arg;
+               arg.index = gref_info->index + offsetof(struct vchan_interface, 
srv_live);
+               arg.action = UNMAP_NOTIFY_CLEAR_BYTE | UNMAP_NOTIFY_SEND_EVENT;
+               arg.event_channel_port = ctrl->event_port;
+               ioctl(ring_fd, IOCTL_GNTALLOC_SET_UNMAP_NOTIFY, &arg);
+       }
+#endif
+
+       if (ctrl->read.order == 10) {
+               ctrl->read.buffer = ((void*)ctrl->ring) + 1024;
+       } else if (ctrl->read.order == 11) {
+               ctrl->read.buffer = ((void*)ctrl->ring) + 2048;
+       } else {
+               gref_info->count = pages_left;
+               err = ioctl(ring_fd, IOCTL_GNTALLOC_ALLOC_GREF, gref_info);
+               if (err)
+                       goto out_ring;
+               area = mmap(NULL, pages_left * PAGE_SIZE, PROT_READ | 
PROT_WRITE,
+                       MAP_SHARED, ring_fd, gref_info->index);
+               if (area == MAP_FAILED)
+                       goto out_ring;
+               ctrl->read.buffer = area;
+               memcpy(ctrl->ring->grants, gref_info->gref_ids, pages_left * 
sizeof(uint32_t));
+       }
+
+       if (ctrl->write.order == 10) {
+               ctrl->write.buffer = ((void*)ctrl->ring) + 1024;
+       } else if (ctrl->write.order == 11) {
+               ctrl->write.buffer = ((void*)ctrl->ring) + 2048;
+       } else {
+               gref_info->count = pages_right;
+               err = ioctl(ring_fd, IOCTL_GNTALLOC_ALLOC_GREF, gref_info);
+               if (err)
+                       goto out_unmap_left;
+               area = mmap(NULL, pages_right * PAGE_SIZE, PROT_READ | 
PROT_WRITE,
+                       MAP_SHARED, ring_fd, gref_info->index);
+               if (area == MAP_FAILED)
+                       goto out_unmap_left;
+               ctrl->write.buffer = area;
+               memcpy(ctrl->ring->grants + (pages_left * sizeof(uint32_t)),
+                      gref_info->gref_ids, pages_right * sizeof(uint32_t));
+       }
+
+out:
+       close(ring_fd);
+       free(gref_info);
+       return ring_ref;
+out_unmap_left:
+       if (ctrl->read.order > 11)
+               munmap(ctrl->read.buffer, pages_left * PAGE_SIZE);
+out_ring:
+       munmap(ring, PAGE_SIZE);
+       ring_ref = -1;
+       ctrl->ring = NULL;
+       ctrl->write.order = ctrl->read.order = 0;
+       goto out;
+}
+
+static void* do_gnt_map(int fd, int domid, uint32_t* pages, size_t npages, 
uint64_t *index)
+{
+       int i, rv;
+       void* area = NULL;
+       struct ioctl_gntdev_map_grant_ref *gref_info;
+       gref_info = malloc(sizeof(*gref_info) + 
npages*sizeof(gref_info->refs[0]));
+       gref_info->count = npages;
+       for(i=0; i < npages; i++) {
+               gref_info->refs[i].domid = domid;
+               gref_info->refs[i].ref = pages[i];
+       }
+
+       rv = ioctl(fd, IOCTL_GNTDEV_MAP_GRANT_REF, gref_info);
+       if (rv)
+               goto out;
+       if (index)
+               *index = gref_info->index;
+       area = mmap(NULL, PAGE_SIZE * npages, PROT_READ | PROT_WRITE, 
MAP_SHARED, fd, gref_info->index);
+       if (area == MAP_FAILED) {
+               struct ioctl_gntdev_unmap_grant_ref undo = {
+                       .index = gref_info->index,
+                       .count = gref_info->count
+               };
+               ioctl(fd, IOCTL_GNTDEV_UNMAP_GRANT_REF, &undo);
+               area = NULL;
+       }
+ out:
+       free(gref_info);
+       return area;
+}
+
+static int init_gnt_cli(struct libvchan *ctrl, uint32_t ring_ref)
+{
+       int ring_fd = open("/dev/xen/gntdev", O_RDWR);
+       int rv = -1;
+       uint64_t ring_index;
+       uint32_t *grants;
+       if (ring_fd < 0)
+               return -1;
+
+       ctrl->ring = do_gnt_map(ring_fd, ctrl->other_domain_id, &ring_ref, 1, 
&ring_index);
+
+       if (!ctrl->ring)
+               goto out;
+
+       ctrl->write.order = ctrl->ring->left_order;
+       ctrl->read.order = ctrl->ring->right_order;
+       ctrl->write.shr = &ctrl->ring->left;
+       ctrl->read.shr = &ctrl->ring->right;
+       if (ctrl->write.order < 10 || ctrl->write.order > 24)
+               goto out_unmap_ring;
+       if (ctrl->read.order < 10 || ctrl->read.order > 24)
+               goto out_unmap_ring;
+       if (ctrl->read.order == ctrl->write.order && ctrl->read.order < 12)
+               goto out_unmap_ring;
+
+       grants = ctrl->ring->grants;
+
+       if (ctrl->write.order == 10) {
+               ctrl->write.buffer = ((void*)ctrl->ring) + 1024;
+       } else if (ctrl->write.order == 11) {
+               ctrl->write.buffer = ((void*)ctrl->ring) + 2048;
+       } else {
+               int pages_left = 1 << (ctrl->write.order - PAGE_SHIFT);
+               ctrl->write.buffer = do_gnt_map(ring_fd, ctrl->other_domain_id, 
grants, pages_left, NULL);
+               if (!ctrl->write.buffer)
+                       goto out_unmap_ring;
+               grants += pages_left;
+       }
+
+       if (ctrl->read.order == 10) {
+               ctrl->read.buffer = ((void*)ctrl->ring) + 1024;
+       } else if (ctrl->read.order == 11) {
+               ctrl->read.buffer = ((void*)ctrl->ring) + 2048;
+       } else {
+               int pages_right = 1 << (ctrl->read.order - PAGE_SHIFT);
+               ctrl->read.buffer = do_gnt_map(ring_fd, ctrl->other_domain_id, 
grants, pages_right, NULL);
+               if (!ctrl->read.buffer)
+                       goto out_unmap_left;
+       }
+
+#ifdef IOCTL_GNTDEV_SET_UNMAP_NOTIFY
+       {
+               struct ioctl_gntdev_unmap_notify arg;
+               arg.index = ring_index + offsetof(struct vchan_interface, 
cli_live);
+               arg.action = UNMAP_NOTIFY_CLEAR_BYTE | UNMAP_NOTIFY_SEND_EVENT;
+               arg.event_channel_port = ctrl->event_port;
+               ioctl(ring_fd, IOCTL_GNTDEV_SET_UNMAP_NOTIFY, &arg);
+       }
+#endif
+
+       rv = 0;
+ out:
+       close(ring_fd);
+       return rv;
+ out_unmap_left:
+       if (ctrl->write.order >= PAGE_SHIFT)
+               munmap(ctrl->write.buffer, 1 << ctrl->write.order);
+ out_unmap_ring:
+       munmap(ctrl->ring, PAGE_SIZE);
+       ctrl->ring = 0;
+       ctrl->write.order = ctrl->read.order = 0;
+       rv = -1;
+       goto out;
+}
+
+static int init_evt_srv(struct libvchan *ctrl)
+{
+       struct ioctl_evtchn_bind_unbound_port bind;
+       ctrl->event_fd = open("/dev/xen/evtchn", O_RDWR);
+       if (ctrl->event_fd < 0)
+               return -1;
+       bind.remote_domain = ctrl->other_domain_id;
+       ctrl->event_port = ioctl(ctrl->event_fd, 
IOCTL_EVTCHN_BIND_UNBOUND_PORT, &bind);
+       if (ctrl->event_port < 0)
+               return -1;
+       write(ctrl->event_fd, &ctrl->event_port, sizeof(ctrl->event_port));
+       return 0;
+}
+
+static int init_xs_srv(struct libvchan *ctrl, int ring_ref)
+{
+       int ret = -1;
+       struct xs_handle *xs;
+       struct xs_permissions perms[2];
+       char buf[64];
+       char ref[16];
+       char* domid_str = NULL;
+       xs = xs_domain_open();
+       if (!xs)
+               goto fail;
+       domid_str = xs_read(xs, 0, "domid", NULL);
+       if (!domid_str)
+               goto fail_xs_open;
+
+       // owner domain is us
+       perms[0].id = atoi(domid_str);
+       // permissions for domains not listed = none
+       perms[0].perms = XS_PERM_NONE;
+       // other domains
+       perms[1].id = ctrl->other_domain_id;
+       perms[1].perms = XS_PERM_READ;
+
+       snprintf(ref, sizeof ref, "%d", ring_ref);
+       snprintf(buf, sizeof buf, "data/vchan/%d/ring-ref", 
ctrl->device_number);
+       if (!xs_write(xs, 0, buf, ref, strlen(ref)))
+               goto fail_xs_open;
+       if (!xs_set_permissions(xs, 0, buf, perms, 2))
+               goto fail_xs_open;
+
+       snprintf(ref, sizeof ref, "%d", ctrl->event_port);
+       snprintf(buf, sizeof buf, "data/vchan/%d/event-channel", 
ctrl->device_number);
+       if (!xs_write(xs, 0, buf, ref, strlen(ref)))
+               goto fail_xs_open;
+       if (!xs_set_permissions(xs, 0, buf, perms, 2))
+               goto fail_xs_open;
+
+       ret = 0;
+ fail_xs_open:
+       free(domid_str);
+       xs_daemon_close(xs);
+ fail:
+       return ret;
+}
+
+static int min_order(size_t siz)
+{
+       int rv = PAGE_SHIFT;
+       while (siz > (1 << rv))
+               rv++;
+       return rv;
+}
+
+struct libvchan *libvchan_server_init(int domain, int devno, size_t left_min, 
size_t right_min)
+{
+       // if you go over this size, you'll have too many grants to fit in the 
shared page.
+       size_t MAX_RING_SIZE = 256 * PAGE_SIZE;
+       struct libvchan *ctrl;
+       int ring_ref;
+       if (left_min > MAX_RING_SIZE || right_min > MAX_RING_SIZE)
+               return 0;
+
+       ctrl = malloc(sizeof(*ctrl));
+       if (!ctrl)
+               return 0;
+
+       ctrl->other_domain_id = domain;
+       ctrl->device_number = devno;
+       ctrl->ring = NULL;
+       ctrl->event_fd = -1;
+       ctrl->is_server = 1;
+       ctrl->server_persist = 0;
+
+       ctrl->read.order = min_order(left_min);
+       ctrl->write.order = min_order(right_min);
+
+       // if we can avoid allocating extra pages by using in-page rings, do so
+#define MAX_SMALL_RING 1024
+#define MAX_LARGE_RING 2048
+       if (left_min <= MAX_SMALL_RING && right_min <= MAX_LARGE_RING) {
+               ctrl->read.order = 10;
+               ctrl->write.order = 11;
+       } else if (left_min <= MAX_LARGE_RING && right_min <= MAX_SMALL_RING) {
+               ctrl->read.order = 11;
+               ctrl->write.order = 10;
+       } else if (left_min <= MAX_LARGE_RING) {
+               ctrl->read.order = 11;
+       } else if (right_min <= MAX_LARGE_RING) {
+               ctrl->write.order = 11;
+       }
+       if (init_evt_srv(ctrl))
+               goto out;
+       ring_ref = init_gnt_srv(ctrl);
+       if (ring_ref < 0)
+               goto out;
+       if (init_xs_srv(ctrl, ring_ref))
+               goto out;
+       return ctrl;
+out:
+       libvchan_close(ctrl);
+       return 0;
+}
+
+static int init_evt_cli(struct libvchan *ctrl)
+{
+       struct ioctl_evtchn_bind_interdomain bind;
+       ctrl->event_fd = open("/dev/xen/evtchn", O_RDWR);
+       if (ctrl->event_fd < 0)
+               return -1;
+
+       bind.remote_domain = ctrl->other_domain_id;
+       bind.remote_port = ctrl->event_port;
+       ctrl->event_port = ioctl(ctrl->event_fd, IOCTL_EVTCHN_BIND_INTERDOMAIN, 
&bind);
+       if (ctrl->event_port < 0)
+               return -1;
+       return 0;
+}
+
+
+struct libvchan *libvchan_client_init(int domain, int devno)
+{
+       struct libvchan *ctrl = malloc(sizeof(struct libvchan));
+       struct xs_handle *xs = NULL;
+       char buf[64];
+       char *ref;
+       int ring_ref;
+       unsigned int len;
+       if (!ctrl)
+               return 0;
+       ctrl->other_domain_id = domain;
+       ctrl->device_number = devno;
+       ctrl->ring = NULL;
+       ctrl->event_fd = -1;
+       ctrl->write.order = ctrl->read.order = 0;
+       ctrl->is_server = 0;
+
+       xs = xs_daemon_open();
+       if (!xs)
+               xs = xs_domain_open();
+       if (!xs)
+               goto fail;
+
+// find xenstore entry
+       snprintf(buf, sizeof buf, "/local/domain/%d/data/vchan/%d/ring-ref",
+               ctrl->other_domain_id, ctrl->device_number);
+       ref = xs_read(xs, 0, buf, &len);
+       if (!ref)
+               goto fail;
+       ring_ref = atoi(ref);
+       free(ref);
+       if (!ring_ref)
+               goto fail;
+       snprintf(buf, sizeof buf, 
"/local/domain/%d/data/vchan/%d/event-channel",
+               ctrl->other_domain_id, ctrl->device_number);
+       ref = xs_read(xs, 0, buf, &len);
+       if (!ref)
+               goto fail;
+       ctrl->event_port = atoi(ref);
+       free(ref);
+       if (!ctrl->event_port)
+               goto fail;
+
+// set up event channel
+       if (init_evt_cli(ctrl))
+               goto fail;
+
+// set up shared page(s)
+       if (init_gnt_cli(ctrl, ring_ref))
+               goto fail;
+
+       ctrl->ring->cli_live = 1;
+       ctrl->ring->debug = 0xabce;
+
+ out:
+       if (xs)
+               xs_daemon_close(xs);
+       return ctrl;
+ fail:
+       libvchan_close(ctrl);
+       ctrl = NULL;
+       goto out;
+}
diff --git a/tools/libvchan/io.c b/tools/libvchan/io.c
new file mode 100644
index 0000000..584b169
--- /dev/null
+++ b/tools/libvchan/io.c
@@ -0,0 +1,314 @@
+/**
+ * @file
+ * @section AUTHORS
+ *
+ * Copyright (C) 2010  Rafal Wojtczuk  <rafal@xxxxxxxxxxxxxxxxxxxxxx>
+ *
+ *  Authors:
+ *       Rafal Wojtczuk  <rafal@xxxxxxxxxxxxxxxxxxxxxx>
+ *       Daniel De Graaf <dgdegra@xxxxxxxxxxxxx>
+ *
+ * @section LICENSE
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; under version 2 of the License.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with this program; if not, write to the Free Software Foundation, Inc.,
+ *  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * @section DESCRIPTION
+ *
+ *  This file contains the communications interface built on the ring buffer.
+ */
+
+#include <sys/types.h>
+#include <sys/mman.h>
+#include <sys/ioctl.h>
+#include <sys/uio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <string.h>
+#include <unistd.h>
+
+#include <xenctrl.h>
+#include <xen/io/libvchan.h>
+
+#ifndef PAGE_SHIFT
+#define PAGE_SHIFT 12
+#endif
+
+#ifndef PAGE_SIZE
+#define PAGE_SIZE 4096
+#endif
+
+// allow vchan data to be easily observed in strace by doing a
+// writev() to FD -1 with the data being read/written.
+#ifndef VCHAN_DEBUG
+#define VCHAN_DEBUG 0
+#endif
+
+#define barrier() asm volatile("" ::: "memory")
+
+static uint32_t rd_prod(struct libvchan *ctrl)
+{
+       return ctrl->read.shr->prod;
+}
+
+static uint32_t* _rd_cons(struct libvchan *ctrl)
+{
+       return &ctrl->read.shr->cons;
+}
+#define rd_cons(x) (*_rd_cons(x))
+
+static uint32_t* _wr_prod(struct libvchan *ctrl)
+{
+       return &ctrl->write.shr->prod;
+}
+#define wr_prod(x) (*_wr_prod(x))
+
+static uint32_t wr_cons(struct libvchan *ctrl)
+{
+       return ctrl->write.shr->cons;
+}
+
+static const void* rd_ring(struct libvchan *ctrl)
+{
+       return ctrl->read.buffer;
+}
+
+static void* wr_ring(struct libvchan *ctrl)
+{
+       return ctrl->write.buffer;
+}
+
+static uint32_t wr_ring_size(struct libvchan *ctrl)
+{
+       return (1 << ctrl->write.order);
+}
+
+static uint32_t rd_ring_size(struct libvchan *ctrl)
+{
+       return (1 << ctrl->read.order);
+}
+
+int libvchan_data_ready(struct libvchan *ctrl)
+{
+       return rd_prod(ctrl) - rd_cons(ctrl);
+}
+
+int libvchan_buffer_space(struct libvchan *ctrl)
+{
+       return wr_ring_size(ctrl) - (wr_prod(ctrl) - wr_cons(ctrl));
+}
+
+static int do_notify(struct libvchan *ctrl)
+{
+       struct ioctl_evtchn_notify notify;
+       notify.port = ctrl->event_port;
+       return ioctl(ctrl->event_fd, IOCTL_EVTCHN_NOTIFY, &notify);
+}
+
+int libvchan_wait(struct libvchan *ctrl)
+{
+       int ret;
+       uint32_t dummy;
+       ret = read(ctrl->event_fd, &dummy, sizeof(dummy));
+       if (ret == -1)
+               return -1;
+       write(ctrl->event_fd, &dummy, sizeof(dummy));
+       return 0;
+}
+
+/**
+ * returns -1 on error, or size on success
+ */
+static int do_send(struct libvchan *ctrl, const void *data, size_t size)
+{
+       int real_idx = wr_prod(ctrl) & (wr_ring_size(ctrl) - 1);
+       int avail_contig = wr_ring_size(ctrl) - real_idx;
+       if (VCHAN_DEBUG) {
+               char metainfo[32];
+               struct iovec iov[2];
+               iov[0].iov_base = metainfo;
+               iov[0].iov_len = snprintf(metainfo, 32, "vchan wr %d/%d", 
ctrl->other_domain_id, ctrl->device_number);
+               iov[1].iov_base = (void *)data;
+               iov[1].iov_len = size;
+               writev(-1, iov, 2);
+       }
+       if (avail_contig > size)
+               avail_contig = size;
+       memcpy(wr_ring(ctrl) + real_idx, data, avail_contig);
+       if (avail_contig < size)
+       {
+               // we rolled across the end of the ring
+               memcpy(wr_ring(ctrl), data + avail_contig, size - avail_contig);
+       }
+       barrier(); // data must be in the ring prior to increment
+       wr_prod(ctrl) += size;
+       barrier(); // increment must happen prior to notify
+       if (do_notify(ctrl) < 0)
+               return -1;
+       return size;
+}
+
+/**
+ * returns 0 if no buffer space is available, -1 on error, or size on success
+ */
+int libvchan_send(struct libvchan *ctrl, const void *data, size_t size)
+{
+       int avail;
+       while (1) {
+               if (!libvchan_is_open(ctrl))
+                       return -1;
+               avail = libvchan_buffer_space(ctrl);
+               if (size <= avail)
+                       return do_send(ctrl, data, size);
+               if (!ctrl->blocking)
+                       return 0;
+               if (size > wr_ring_size(ctrl))
+                       return -1;
+               if (libvchan_wait(ctrl))
+                       return -1;
+       }
+}
+
+int libvchan_write(struct libvchan *ctrl, const void *data, size_t size)
+{
+       int avail;
+       if (!libvchan_is_open(ctrl))
+               return -1;
+       if (ctrl->blocking) {
+               size_t pos = 0;
+               while (1) {
+                       avail = libvchan_buffer_space(ctrl);
+                       if (pos + avail > size)
+                               avail = size - pos;
+                       if (avail)
+                               pos += do_send(ctrl, data + pos, avail);
+                       if (pos == size)
+                               return pos;
+                       if (libvchan_wait(ctrl))
+                               return -1;
+                       if (!libvchan_is_open(ctrl))
+                               return -1;
+               }
+       } else {
+               avail = libvchan_buffer_space(ctrl);
+               if (size > avail)
+                       size = avail;
+               if (size == 0)
+                       return 0;
+               return do_send(ctrl, data, size);
+       }
+}
+
+static int do_recv(struct libvchan *ctrl, void *data, size_t size)
+{
+       int real_idx = rd_cons(ctrl) & (rd_ring_size(ctrl) - 1);
+       int avail_contig = rd_ring_size(ctrl) - real_idx;
+       if (avail_contig > size)
+               avail_contig = size;
+       barrier(); // data read must happen after rd_cons read
+       memcpy(data, rd_ring(ctrl) + real_idx, avail_contig);
+       if (avail_contig < size)
+       {
+               // we rolled across the end of the ring
+               memcpy(data + avail_contig, rd_ring(ctrl), size - avail_contig);
+       }
+       rd_cons(ctrl) += size;
+       if (VCHAN_DEBUG) {
+               char metainfo[32];
+               struct iovec iov[2];
+               iov[0].iov_base = metainfo;
+               iov[0].iov_len = snprintf(metainfo, 32, "vchan rd %d/%d", 
ctrl->other_domain_id, ctrl->device_number);
+               iov[1].iov_base = data;
+               iov[1].iov_len = size;
+               writev(-1, iov, 2);
+       }
+       barrier(); // consumption must happen prior to notify of newly freed 
space
+       if (do_notify(ctrl) < 0)
+               return -1;
+       return size;
+}
+
+/**
+ * reads exactly size bytes from the vchan.
+ * returns 0 if insufficient data is available, -1 on error, or size on success
+ */
+int libvchan_recv(struct libvchan *ctrl, void *data, size_t size)
+{
+       while (1) {
+               int avail = libvchan_data_ready(ctrl);
+               if (size <= avail)
+                       return do_recv(ctrl, data, size);
+               if (!libvchan_is_open(ctrl))
+                       return -1;
+               if (!ctrl->blocking)
+                       return 0;
+               if (size > rd_ring_size(ctrl))
+                       return -1;
+               if (libvchan_wait(ctrl))
+                       return -1;
+       }
+}
+
+int libvchan_read(struct libvchan *ctrl, void *data, size_t size)
+{
+       while (1) {
+               int avail = libvchan_data_ready(ctrl);
+               if (avail && size > avail)
+                       size = avail;
+               if (avail)
+                       return do_recv(ctrl, data, size);
+               if (!libvchan_is_open(ctrl))
+                       return -1;
+               if (!ctrl->blocking)
+                       return 0;
+               if (libvchan_wait(ctrl))
+                       return -1;
+       }
+}
+
+int libvchan_is_open(struct libvchan* ctrl)
+{
+       if (ctrl->is_server)
+               return ctrl->server_persist || ctrl->ring->cli_live;
+       else
+               return ctrl->ring->srv_live;
+}
+
+/// The fd to use for select() set
+int libvchan_fd_for_select(struct libvchan *ctrl)
+{
+       return ctrl->event_fd;
+}
+
+void libvchan_close(struct libvchan *ctrl)
+{
+       if (!ctrl)
+               return;
+       if (ctrl->ring) {
+               if (ctrl->is_server)
+                       ctrl->ring->srv_live = 0;
+               else
+                       ctrl->ring->cli_live = 0;
+               munmap(ctrl->ring, PAGE_SIZE);
+       }
+       if (ctrl->event_fd != -1) {
+               if (ctrl->event_port > 0 && ctrl->ring)
+                       do_notify(ctrl);
+               close(ctrl->event_fd);
+       }
+       if (ctrl->read.order >= PAGE_SHIFT)
+               munmap(ctrl->read.buffer, 1 << ctrl->read.order);
+       if (ctrl->write.order >= PAGE_SHIFT)
+               munmap(ctrl->write.buffer, 1 << ctrl->write.order);
+       free(ctrl);
+}
diff --git a/tools/libvchan/node-select.c b/tools/libvchan/node-select.c
new file mode 100644
index 0000000..0000bc8
--- /dev/null
+++ b/tools/libvchan/node-select.c
@@ -0,0 +1,161 @@
+/**
+ * @file
+ * @section AUTHORS
+ *
+ * Copyright (C) 2010  Rafal Wojtczuk  <rafal@xxxxxxxxxxxxxxxxxxxxxx>
+ *
+ *  Authors:
+ *       Rafal Wojtczuk  <rafal@xxxxxxxxxxxxxxxxxxxxxx>
+ *       Daniel De Graaf <dgdegra@xxxxxxxxxxxxx>
+ *
+ * @section LICENSE
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; under version 2 of the License.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with this program; if not, write to the Free Software Foundation, Inc.,
+ *  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * @section DESCRIPTION
+ *
+ * This is a test program for libvchan.  Communications are bidirectional,
+ * with either server (grant offeror) or client able to read and write.
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <errno.h>
+
+#include <xen/io/libvchan.h>
+
+void usage(char** argv)
+{
+       fprintf(stderr, "usage:\n"
+               "\t%s [client|server] domainid nodeid [rbufsiz wbufsiz]\n",
+               argv[0]);
+       exit(1);
+}
+
+#define BUFSIZE 5000
+char inbuf[BUFSIZE];
+char outbuf[BUFSIZE];
+int insiz = 0;
+int outsiz = 0;
+struct libvchan *ctrl = 0;
+
+void vchan_wr() {
+       if (!insiz)
+               return;
+       int ret = libvchan_write(ctrl, inbuf, insiz);
+       if (ret < 0) {
+               fprintf(stderr, "vchan write failed\n");
+               exit(1);
+       }
+       if (ret > 0) {
+               insiz -= ret;
+               memmove(inbuf, inbuf + ret, insiz);
+       }
+}
+
+void stdout_wr() {
+       if (!outsiz)
+               return;
+       int ret = write(1, outbuf, outsiz);
+       if (ret < 0 && errno != EAGAIN)
+               exit(1);
+       if (ret > 0) {
+               outsiz -= ret;
+               memmove(outbuf, outbuf + ret, outsiz);
+       }
+}
+
+/**
+    Simple libvchan application, both client and server.
+       Both sides may write and read, both from the libvchan and from 
+       stdin/stdout (just like netcat).
+*/
+
+int main(int argc, char **argv)
+{
+       int ret;
+       int libvchan_fd;
+       if (argc < 4)
+               usage(argv);
+       if (!strcmp(argv[1], "server")) {
+               int rsiz = argc > 4 ? atoi(argv[4]) : 0;
+               int wsiz = argc > 5 ? atoi(argv[5]) : 0;
+               ctrl = libvchan_server_init(atoi(argv[2]), atoi(argv[3]), rsiz, 
wsiz);
+       } else if (!strcmp(argv[1], "client"))
+               ctrl = libvchan_client_init(atoi(argv[2]), atoi(argv[3]));
+       else
+               usage(argv);
+       if (!ctrl) {
+               perror("libvchan_*_init");
+               exit(1);
+       }
+
+       fcntl(0, F_SETFL, O_NONBLOCK);
+       fcntl(1, F_SETFL, O_NONBLOCK);
+
+       libvchan_fd = libvchan_fd_for_select(ctrl);
+       for (;;) {
+               fd_set rfds;
+               fd_set wfds;
+               FD_ZERO(&rfds);
+               FD_ZERO(&wfds);
+               if (insiz != BUFSIZE)
+                       FD_SET(0, &rfds);
+               if (outsiz)
+                       FD_SET(1, &wfds);
+               FD_SET(libvchan_fd, &rfds);
+               ret = select(libvchan_fd + 1, &rfds, &wfds, NULL, NULL);
+               if (ret < 0) {
+                       perror("select");
+                       exit(1);
+               }
+               if (FD_ISSET(0, &rfds)) {
+                       ret = read(0, inbuf + insiz, BUFSIZE - insiz);
+                       if (ret < 0 && errno != EAGAIN)
+                               exit(1);
+                       if (ret == 0) {
+                               while (insiz) {
+                                       vchan_wr();
+                                       libvchan_wait(ctrl);
+                               }
+                               return 0;
+                       }
+                       if (ret)
+                               insiz += ret;
+                       vchan_wr();
+               }
+               if (FD_ISSET(libvchan_fd, &rfds)) {
+                       libvchan_wait(ctrl);
+                       vchan_wr();
+               }
+               if (FD_ISSET(1, &wfds))
+                       stdout_wr();
+               while (libvchan_data_ready(ctrl) && outsiz < BUFSIZE) {
+                       ret = libvchan_read(ctrl, outbuf + outsiz, BUFSIZE - 
outsiz);
+                       if (ret < 0)
+                               exit(1);
+                       outsiz += ret;
+                       stdout_wr();
+               }
+               if (!libvchan_is_open(ctrl)) {
+                       fcntl(1, F_SETFL, 0);
+                       while (outsiz)
+                               stdout_wr();
+                       return 0;
+               }
+       }
+}
diff --git a/tools/libvchan/node.c b/tools/libvchan/node.c
new file mode 100644
index 0000000..219a6ef
--- /dev/null
+++ b/tools/libvchan/node.c
@@ -0,0 +1,168 @@
+/**
+ * @file
+ * @section AUTHORS
+ *
+ * Copyright (C) 2010  Rafal Wojtczuk  <rafal@xxxxxxxxxxxxxxxxxxxxxx>
+ *
+ *  Authors:
+ *       Rafal Wojtczuk  <rafal@xxxxxxxxxxxxxxxxxxxxxx>
+ *       Daniel De Graaf <dgdegra@xxxxxxxxxxxxx>
+ *
+ * @section LICENSE
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; under version 2 of the License.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with this program; if not, write to the Free Software Foundation, Inc.,
+ *  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * @section DESCRIPTION
+ *
+ * This is a test program for libvchan.  Communications are in one direction,
+ * either server (grant offeror) to client or vice versa.
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+#include <time.h>
+
+#include <xen/io/libvchan.h>
+
+int libvchan_write_all(struct libvchan *ctrl, char *buf, int size)
+{
+       int written = 0;
+       int ret;
+       while (written < size) {
+               ret = libvchan_write(ctrl, buf + written, size - written);
+               if (ret <= 0) {
+                       perror("write");
+                       exit(1);
+               }
+               written += ret;
+       }
+       return size;
+}
+
+int write_all(int fd, char *buf, int size)
+{
+       int written = 0;
+       int ret;
+       while (written < size) {
+               ret = write(fd, buf + written, size - written);
+               if (ret <= 0) {
+                       perror("write");
+                       exit(1);
+               }
+               written += ret;
+       }
+       return size;
+}
+
+void usage(char** argv)
+{
+       fprintf(stderr, "usage:\n"
+               "%s [client|server] [read|write] domid nodeid\n", argv[0]);
+       exit(1);
+}
+
+#define BUFSIZE 5000
+char buf[BUFSIZE];
+void reader(struct libvchan *ctrl)
+{
+       int size;
+       for (;;) {
+               size = rand() % (BUFSIZE - 1) + 1;
+               size = libvchan_read(ctrl, buf, size);
+               fprintf(stderr, "#");
+               if (size < 0) {
+                       perror("read vchan");
+                       libvchan_close(ctrl);
+                       exit(1);
+               }
+               size = write_all(1, buf, size);
+               if (size < 0) {
+                       perror("stdout write");
+                       exit(1);
+               }
+               if (size == 0) {
+                       perror("write size=0?\n");
+                       exit(1);
+               }
+       }
+}
+
+void writer(struct libvchan *ctrl)
+{
+       int size;
+       for (;;) {
+               size = rand() % (BUFSIZE - 1) + 1;
+               size = read(0, buf, size);
+               if (size < 0) {
+                       perror("read stdin");
+                       libvchan_close(ctrl);
+                       exit(1);
+               }
+               if (size == 0)
+                       break;
+               size = libvchan_write_all(ctrl, buf, size);
+               fprintf(stderr, "#");
+               if (size < 0) {
+                       perror("vchan write");
+                       exit(1);
+               }
+               if (size == 0) {
+                       perror("write size=0?\n");
+                       exit(1);
+               }
+       }
+}
+
+
+/**
+       Simple libvchan application, both client and server.
+       One side does writing, the other side does reading; both from
+       standard input/output fds.
+*/
+int main(int argc, char **argv)
+{
+       int seed = time(0);
+       struct libvchan *ctrl = 0;
+       int wr = 0;
+       if (argc < 4)
+               usage(argv);
+       if (!strcmp(argv[2], "read"))
+               wr = 0;
+       else if (!strcmp(argv[2], "write"))
+               wr = 1;
+       else
+               usage(argv);
+       if (!strcmp(argv[1], "server"))
+               ctrl = libvchan_server_init(atoi(argv[3]), atoi(argv[4]), 0, 0);
+       else if (!strcmp(argv[1], "client"))
+               ctrl = libvchan_client_init(atoi(argv[3]), atoi(argv[4]));
+       else
+               usage(argv);
+       if (!ctrl) {
+               perror("libvchan_*_init");
+               exit(1);
+       }
+       ctrl->blocking = 1;
+
+       srand(seed);
+       fprintf(stderr, "seed=%d\n", seed);
+       if (wr)
+               writer(ctrl);
+       else
+               reader(ctrl);
+       libvchan_close(ctrl);
+       return 0;
+}
diff --git a/xen/include/public/io/libvchan.h b/xen/include/public/io/libvchan.h
new file mode 100644
index 0000000..81db0e2
--- /dev/null
+++ b/xen/include/public/io/libvchan.h
@@ -0,0 +1,209 @@
+/**
+ * @file
+ * @section AUTHORS
+ *
+ * Copyright (C) 2010  Rafal Wojtczuk  <rafal@xxxxxxxxxxxxxxxxxxxxxx>
+ *
+ *  Authors:
+ *       Rafal Wojtczuk  <rafal@xxxxxxxxxxxxxxxxxxxxxx>
+ *       Daniel De Graaf <dgdegra@xxxxxxxxxxxxx>
+ *
+ * @section LICENSE
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; under version 2 of the License.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with this program; if not, write to the Free Software Foundation, Inc.,
+ *  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * @section DESCRIPTION
+ *
+ *  Originally borrowed from the Qubes OS Project, http://www.qubes-os.org,
+ *  this code has been substantially rewritten to use the gntdev and gntalloc
+ *  devices instead of raw MFNs and map_foreign_range.
+ *
+ *  This is a library for inter-domain communication.  A standard Xen ring
+ *  buffer is used, with a datagram-based interface built on top.  The grant
+ *  reference and event channels are shared in XenStore under the path
+ *  /local/domain/<domid>/data/vchan/<port>/{ring-ref,event-channel}
+ *
+ *  The ring.h macros define an asymmetric interface to a shared data structure
+ *  that assumes all rings reside in a single contiguous memory space. This is
+ *  not suitable for vchan because the interface to the ring is symmetric 
except
+ *  for the setup. Unlike the producer-consumer rings defined in ring.h, the
+ *  size of the rings used in vchan are determined at execution time instead of
+ *  compile time, so the macros in ring.h cannot be used to access the rings.
+ */
+
+#include <stdint.h>
+#include <sys/types.h>
+#include <xen/sys/evtchn.h>
+
+struct ring_shared {
+       uint32_t cons, prod;
+};
+
+/**
+ * vchan_interface: primary shared data structure
+ */
+struct vchan_interface {
+       /**
+        * Standard consumer/producer interface, one pair per buffer
+        * left is client write, server read
+        * right is client read, server write
+        */
+       struct ring_shared left, right;
+       /**
+        * size of the rings, which determines their location
+        * 10   - at offset 1024 in ring's page
+        * 11   - at offset 2048 in ring's page
+        * 12+  - uses 2^(N-12) grants to describe the multi-page ring
+        * These should remain constant once the page is shared.
+        * Only one of the two orders can be 10 (or 11).
+        */
+       uint16_t left_order, right_order;
+       /**
+        * Shutdown detection:
+        *  0: client (or server) has exited
+        *  1: client (or server) is connected
+        *  2: client has not yet connected
+        */
+       uint8_t cli_live, srv_live;
+       /**
+        * structure padding; magic values depending on setup stage
+        */
+       uint16_t debug;
+       /**
+        * Grant list: ordering is left, right. Must not extend into actual ring
+        * or grow beyond the end of the initial shared page.
+        * These should remain constant once the page is shared, to allow
+        * for possible remapping by a client that restarts.
+        */
+       uint32_t grants[0];
+};
+
+struct libvchan_ring {
+       /* Pointer into the shared page. Offsets into buffer. */
+       struct ring_shared* shr;
+       /* ring data; may be its own shared page(s) depending on order */
+       void* buffer;
+       /**
+        * The size of the ring is (1 << order); offsets wrap around when they
+        * exceed this. This copy is required because we can't trust the order
+        * in the shared page to remain constant.
+        */
+       int order;
+};
+
+/**
+ * struct libvchan: control structure passed to all library calls
+ */
+struct libvchan {
+       /* person we communicate with */
+       int other_domain_id;
+       /* "port" we communicate on (allows multiple vchans to exist in 
xenstore) */
+       int device_number;
+       /* Shared ring page, mapped using gntdev or gntalloc */
+       /* Note that the FD for gntdev or gntalloc has already been closed. */
+       struct vchan_interface *ring;
+       /* event channel interface (needs port for API) */
+       int event_fd;
+       uint32_t event_port;
+       /* informative flags: are we acting as server? */
+       int is_server:1;
+       /* true if server remains active when client closes (allows 
reconnection) */
+       int server_persist:1;
+       /* true if operations should block instead of returning 0 */
+       int blocking:1;
+       /* communication rings */
+       struct libvchan_ring read, write;
+};
+
+/**
+ * Set up a vchan, including granting pages
+ * @param domain The peer domain that will be connecting
+ * @param devno A device number, used to identify this vchan in xenstore
+ * @param send_min The minimum size (in bytes) of the send ring (left)
+ * @param recv_min The minimum size (in bytes) of the receive ring (right)
+ * @return The structure, or NULL in case of an error
+ */
+struct libvchan *libvchan_server_init(int domain, int devno, size_t read_min, 
size_t write_min);
+/**
+ * Connect to an existing vchan. Note: you can reconnect to an existing vchan
+ * safely, however no locking is performed, so you must prevent multiple 
clients
+ * from connecting to a single server.
+ *
+ * @param domain The peer domain to connect to
+ * @param devno A device number, used to identify this vchan in xenstore
+ * @return The structure, or NULL in case of an error
+ */
+struct libvchan *libvchan_client_init(int domain, int devno);
+/**
+ * Close a vchan. This deallocates the vchan and attempts to free its
+ * resources. The other side is notified of the close, but can still read any
+ * data pending prior to the close.
+ */
+void libvchan_close(struct libvchan *ctrl);
+
+/**
+ * Packet-based receive: always reads exactly $size bytes.
+ * @param ctrl The vchan control structure
+ * @param data Buffer for data that was read
+ * @param size Size of the buffer and amount of data to read
+ * @return -1 on error, 0 if nonblocking and insufficient data is available, 
or $size
+ */
+int libvchan_recv(struct libvchan *ctrl, void *data, size_t size);
+/**
+ * Stream-based receive: reads as much data as possible.
+ * @param ctrl The vchan control structure
+ * @param data Buffer for data that was read
+ * @param size Size of the buffer
+ * @return -1 on error, otherwise the amount of data read (which may be zero if
+ *         the vchan is nonblocking)
+ */
+int libvchan_read(struct libvchan *ctrl, void *data, size_t size);
+/**
+ * Packet-based send: send entire buffer if possible
+ * @param ctrl The vchan control structure
+ * @param data Buffer for data to send
+ * @param size Size of the buffer and amount of data to send
+ * @return -1 on error, 0 if nonblocking and insufficient space is available, 
or $size
+ */
+int libvchan_send(struct libvchan *ctrl, const void *data, size_t size);
+/**
+ * Stream-based send: send as much data as possible.
+ * @param ctrl The vchan control structure
+ * @param data Buffer for data to send
+ * @param size Size of the buffer
+ * @return -1 on error, otherwise the amount of data sent (which may be zero if
+ *         the vchan is nonblocking)
+ */
+int libvchan_write(struct libvchan *ctrl, const void *data, size_t size);
+/**
+ * Waits for reads or writes to unblock, or for a close
+ */
+int libvchan_wait(struct libvchan *ctrl);
+/**
+ * Returns the event file descriptor for this vchan. When this FD is readable,
+ * libvchan_wait() will not block, and the state of the vchan has changed since
+ * the last invocation of libvchan_wait().
+ */
+int libvchan_fd_for_select(struct libvchan *ctrl);
+/**
+ * Query the state of the vchan shared page:
+ *  return 0 when one side has called libvchan_close() or crashed
+ *  return 1 when both sides are open
+ *  return 2 [server only] when no client has yet connected
+ */
+int libvchan_is_open(struct libvchan* ctrl);
+/** Amount of data ready to read, in bytes */
+int libvchan_data_ready(struct libvchan *ctrl);
+/** Amount of data it is possible to send without blocking */
+int libvchan_buffer_space(struct libvchan *ctrl);
-- 
1.7.6


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.