[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [PATCH 1/8] oxenstored: add a poll-based select mechanism



Hi,

Thanks for sending these.

On 15 Sep 2014, at 23:39, Zheng Li <dev@xxxxxxxx> wrote:

> Currently, oxenstored uses Unix.select underneath, so it doesn't work properly
> if given a FD number >= 1024. This is a scalability bottleneck for hosts
> running large number of VMs.
> 
> To remove this limitation, we implemented a poll-based mechanism but with the
> same type signature as the Unix.select currently in use. So these two 
> functions
> can be interchangeable at any stage.
> 
> Signed-off-by: Zheng Li <dev@xxxxxxxx>
> ---
> tools/ocaml/xenstored/Makefile       |  9 +++--
> tools/ocaml/xenstored/select.ml      | 49 ++++++++++++++++++++++++++
> tools/ocaml/xenstored/select.mli     | 20 +++++++++++
> tools/ocaml/xenstored/select_stubs.c | 68 ++++++++++++++++++++++++++++++++++++
> tools/ocaml/xenstored/xenstored.ml   |  2 +-
> 5 files changed, 144 insertions(+), 4 deletions(-)
> create mode 100644 tools/ocaml/xenstored/select.ml
> create mode 100644 tools/ocaml/xenstored/select.mli
> create mode 100644 tools/ocaml/xenstored/select_stubs.c
> 
> diff --git a/tools/ocaml/xenstored/Makefile b/tools/ocaml/xenstored/Makefile
> index 068e04a..47d5303 100644
> --- a/tools/ocaml/xenstored/Makefile
> +++ b/tools/ocaml/xenstored/Makefile
> @@ -15,10 +15,12 @@ OCAMLINCLUDE += \
>       -I $(OCAML_TOPLEVEL)/libs/xc \
>       -I $(OCAML_TOPLEVEL)/libs/eventchn
> 
> -LIBS = syslog.cma syslog.cmxa
> +LIBS = syslog.cma syslog.cmxa select.cma select.cmxa
> syslog_OBJS = syslog
> syslog_C_OBJS = syslog_stubs
> -OCAML_LIBRARY = syslog
> +select_OBJS = select
> +select_C_OBJS = select_stubs
> +OCAML_LIBRARY = syslog select
> 
> LIBS += systemd.cma systemd.cmxa
> systemd_OBJS = systemd
> @@ -46,12 +48,13 @@ OBJS = define \
>       process \
>       xenstored
> 
> -INTF = symbol.cmi trie.cmi syslog.cmi systemd.cmi
> +INTF = symbol.cmi trie.cmi syslog.cmi systemd.cmi select.cmi
> 
> XENSTOREDLIBS = \
>       unix.cmxa \
>       -ccopt -L -ccopt . syslog.cmxa \
>       -ccopt -L -ccopt . systemd.cmxa \
> +     -ccopt -L -ccopt . select.cmxa \
>       -ccopt -L -ccopt $(OCAML_TOPLEVEL)/libs/mmap 
> $(OCAML_TOPLEVEL)/libs/mmap/xenmmap.cmxa \
>       -ccopt -L -ccopt $(OCAML_TOPLEVEL)/libs/eventchn 
> $(OCAML_TOPLEVEL)/libs/eventchn/xeneventchn.cmxa \
>       -ccopt -L -ccopt $(OCAML_TOPLEVEL)/libs/xc 
> $(OCAML_TOPLEVEL)/libs/xc/xenctrl.cmxa \
> diff --git a/tools/ocaml/xenstored/select.ml b/tools/ocaml/xenstored/select.ml
> new file mode 100644
> index 0000000..2c18c70
> --- /dev/null
> +++ b/tools/ocaml/xenstored/select.ml
> @@ -0,0 +1,49 @@
> +(*
> + * Copyright (C) 2014 Zheng Li <dev@xxxxxxxx>
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU Lesser General Public License as published
> + * by the Free Software Foundation; version 2.1 only. with the special
> + * exception on linking described in file LICENSE.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU Lesser General Public License for more details.
> + *)
> +
> +
> +type event = {
> +     mutable read: bool;
> +     mutable write: bool;
> +     mutable except: bool;
> +}

If I understand correctly this is a representation of the POLL{IN,OUT,PRI} 
bitmap in the C stubs — might be worth a short comment (to help me remember in 
future)

> +
> +external select_on_poll: (Unix.file_descr * event) array -> int -> int = 
> "stub_select_on_poll"
> +
> +let init_event () = {read = false; write = false; except = false}
> +
> +let select in_fds out_fds exc_fds timeout =
> +     let h = Hashtbl.create 57 in
> +     let add_event event_set fd =
> +             let e =
> +                     try Hashtbl.find h fd
> +                     with Not_found ->
> +                             let e = init_event () in
> +                             Hashtbl.add h fd e; e in
> +             event_set e in
> +     List.iter (add_event (fun x -> x.read <- true)) in_fds;
> +     List.iter (add_event (fun x -> x.write <- true)) out_fds;
> +     List.iter (add_event (fun x -> x.except <- true)) exc_fds;
> +     let a = Array.make (Hashtbl.length h) (Unix.stdin, init_event ()) in

Might be worth a comment to say that Unix.stdin is a dummy value, since all 
entries in the array are overwritten below.

> +     let i = ref (-1) in
> +     Hashtbl.iter (fun fd event -> incr i; Array.set a !i (fd, event)) h;
> +     let n = select_on_poll a (int_of_float (timeout *. 1000.)) in
> +     let r = [], [], [] in
> +     if n = 0 then r else
> +             Array.fold_right
> +                     (fun (fd, event) (r, w, x) ->
> +                      (if event.read then fd :: r else r),
> +                      (if event.write then fd :: w else w),
> +                      (if event.except then fd :: x else x))
> +                     a r
> diff --git a/tools/ocaml/xenstored/select.mli 
> b/tools/ocaml/xenstored/select.mli
> new file mode 100644
> index 0000000..1253d4e
> --- /dev/null
> +++ b/tools/ocaml/xenstored/select.mli
> @@ -0,0 +1,20 @@
> +(*
> + * Copyright (C) 2014 Zheng Li <dev@xxxxxxxx>
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU Lesser General Public License as published
> + * by the Free Software Foundation; version 2.1 only. with the special
> + * exception on linking described in file LICENSE.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU Lesser General Public License for more details.
> + *)
> +
> +
> +(** Same interface and semantics as [Unix.select] but with an extra 
> alternative
> +    implementation based on poll. *)
> +val select:
> +     Unix.file_descr list -> Unix.file_descr list -> Unix.file_descr list -> 
> float
> +     -> Unix.file_descr list * Unix.file_descr list * Unix.file_descr list
> diff --git a/tools/ocaml/xenstored/select_stubs.c 
> b/tools/ocaml/xenstored/select_stubs.c
> new file mode 100644
> index 0000000..a50f417
> --- /dev/null
> +++ b/tools/ocaml/xenstored/select_stubs.c
> @@ -0,0 +1,68 @@
> +/*
> + * Copyright (C) 2014 Zheng Li <dev@xxxxxxxx>
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU Lesser General Public License as published
> + * by the Free Software Foundation; version 2.1 only. with the special
> + * exception on linking described in file LICENSE.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU Lesser General Public License for more details.
> + */
> +
> +#include <poll.h>
> +#include <errno.h>
> +#include <sys/resource.h>
> +#include <unistd.h>
> +#include <caml/mlvalues.h>
> +#include <caml/memory.h>
> +#include <caml/fail.h>
> +#include <caml/alloc.h>
> +#include <caml/signals.h>
> +#include <caml/unixsupport.h>
> +
> +CAMLprim value stub_select_on_poll(value fd_events, value timeo) {
> +
> +     CAMLparam2(fd_events, timeo);
> +     CAMLlocal1(events);
> +     int i, rc, c_len = Wosize_val(fd_events), c_timeo = Int_val(timeo);
> +     struct pollfd c_fds[c_len];     
> +
> +
> +     for (i = 0; i < c_len; i++) {
> +
> +             events = Field(Field(fd_events, i), 1);
> +
> +             c_fds[i].fd = Int_val(Field(Field(fd_events, i), 0));
> +             c_fds[i].events = c_fds[i].revents = 0;
> +             c_fds[i].events |= Bool_val(Field(events, 0)) ? POLLIN : 0;
> +             c_fds[i].events |= Bool_val(Field(events, 1)) ? POLLOUT: 0;
> +             c_fds[i].events |= Bool_val(Field(events, 2)) ? POLLPRI: 0;
> +
> +     };
> +
> +     caml_enter_blocking_section();
> +     rc = poll(c_fds, c_len, c_timeo);
> +     caml_leave_blocking_section();
> +
> +     if (rc < 0) uerror("select", Nothing);

Perhaps the error string should be “poll” so it would be obvious from the 
Unix_error which implementation failed?

> +
> +     if (rc > 0) {
> +
> +             for (i = 0; i < c_len; i++) {
> +
> +                     events = Field(Field(fd_events, i), 1);
> +
> +                     if (c_fds[i].revents & POLLNVAL) unix_error(EBADF, 
> "select", Nothing);
> +                     Field(events, 0) = Val_bool(c_fds[i].events | POLLIN  
> && c_fds[i].revents & (POLLIN |POLLHUP|POLLERR));
> +                     Field(events, 1) = Val_bool(c_fds[i].events | POLLOUT 
> && c_fds[i].revents & (POLLOUT|POLLHUP|POLLERR));
> +                     Field(events, 2) = Val_bool(c_fds[i].revents & POLLPRI);
> +                     
> +             }
> +
> +     }
> +
> +     CAMLreturn(Val_int(rc));
> +}
> diff --git a/tools/ocaml/xenstored/xenstored.ml 
> b/tools/ocaml/xenstored/xenstored.ml
> index 1c02f2f..bfa488f 100644
> --- a/tools/ocaml/xenstored/xenstored.ml
> +++ b/tools/ocaml/xenstored/xenstored.ml
> @@ -368,7 +368,7 @@ let _ =
>               let timeout = if List.length mw > 0 then 0. else -1. in
>               let rset, wset, _ =
>               try
> -                     Unix.select (spec_fds @ inset) outset [] timeout
> +                     Select.select (spec_fds @ inset) outset [] timeout
>               with Unix.Unix_error(Unix.EINTR, _, _) ->
>                       [], [], [] in
>               let sfds, cfds =
> -- 
> 2.1.0
> 
> 
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@xxxxxxxxxxxxx
> http://lists.xen.org/xen-devel


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.