[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [PATCH] libxl: new xlu_disk_parse function



On Thu, 2011-03-24 at 17:49 +0000, Ian Jackson wrote:
> From: Ian Jackson <ijackson@xxxxxxxxxxxxxxxxxxxxxx>
> 
> Introduce new flex/regexp-based parser for disk configuration strings.
> 
> Callers will be updated in following patches.
> 
> Signed-off-by: Ian Jackson <ijackson@xxxxxxxxxxxxxxxxxxxxxx>
> ---
>  config/StdGNU.mk              |    1 +
>  tools/libxl/Makefile          |    9 +-
>  tools/libxl/libxlu_cfg.c      |   42 +++++++---
>  tools/libxl/libxlu_disk.c     |  164 
> +++++++++++++++++++++++++++++++++++++++++
>  tools/libxl/libxlu_disk_i.h   |   19 +++++
>  tools/libxl/libxlu_disk_l.l   |   54 ++++++++++++++
>  tools/libxl/libxlu_internal.h |   20 +++++
>  tools/libxl/libxlutil.h       |   12 +++
>  8 files changed, 304 insertions(+), 17 deletions(-)
>  create mode 100644 tools/libxl/libxlu_disk.c
>  create mode 100644 tools/libxl/libxlu_disk_i.h
>  create mode 100644 tools/libxl/libxlu_disk_l.l
> 
> diff --git a/config/StdGNU.mk b/config/StdGNU.mk
> index 25aeb4d..9b331f0 100644
> --- a/config/StdGNU.mk
> +++ b/config/StdGNU.mk
> @@ -67,6 +67,7 @@ CURSES_LIBS = -lncurses
>  PTHREAD_LIBS = -lpthread
>  UTIL_LIBS = -lutil
>  DLOPEN_LIBS = -ldl
> +PCRE_LIBS = -lpcre
> 
>  SONAME_LDFLAG = -soname
>  SHLIB_LDFLAGS = -shared
> diff --git a/tools/libxl/Makefile b/tools/libxl/Makefile
> index a7b1d51..f0c473f 100644
> --- a/tools/libxl/Makefile
> +++ b/tools/libxl/Makefile
> @@ -22,7 +22,7 @@ endif
>  LIBXL_LIBS =
>  LIBXL_LIBS = $(LDLIBS_libxenctrl) $(LDLIBS_libxenguest) 
> $(LDLIBS_libxenstore) $(LDLIBS_libblktapctl) $(UTIL_LIBS) $(LIBUUID_LIBS)
> 
> -LIBXLU_LIBS =
> +LIBXLU_LIBS = $(PCRE_LIBS)
> 
>  LIBXL_OBJS-y = osdeps.o libxl_paths.o libxl_bootloader.o flexarray.o
>  ifeq ($(LIBXL_BLKTAP),y)
> @@ -38,9 +38,10 @@ LIBXL_OBJS = flexarray.o libxl.o libxl_create.o libxl_dm.o 
> libxl_pci.o \
>                         libxl_internal.o libxl_utils.o $(LIBXL_OBJS-y)
>  LIBXL_OBJS += _libxl_types.o
> 
> -AUTOINCS= libxlu_cfg_y.h libxlu_cfg_l.h
> -AUTOSRCS= libxlu_cfg_y.c libxlu_cfg_l.c
> -LIBXLU_OBJS = libxlu_cfg_y.o libxlu_cfg_l.o libxlu_cfg.o
> +AUTOINCS= libxlu_cfg_y.h libxlu_cfg_l.h libxlu_disk_l.h
> +AUTOSRCS= libxlu_cfg_y.c libxlu_cfg_l.c libxlu_disk_l.c
> +LIBXLU_OBJS = libxlu_cfg_y.o libxlu_cfg_l.o libxlu_cfg.o \
> +       libxlu_disk_l.o libxlu_disk.o
> 
>  CLIENTS = xl
> 
> diff --git a/tools/libxl/libxlu_cfg.c b/tools/libxl/libxlu_cfg.c
> index f947c21..fd3e102 100644
> --- a/tools/libxl/libxlu_cfg.c
> +++ b/tools/libxl/libxlu_cfg.c
> @@ -15,6 +15,7 @@ XLU_Config *xlu_cfg_init(FILE *report, const char 
> *report_filename) {
>      if (!cfg->filename) { free(cfg); return 0; }
> 
>      cfg->settings= 0;
> +    cfg->disk_re= 0;
>      return cfg;
>  }
> 
> @@ -38,6 +39,29 @@ static int ctx_prep(CfgParseContext *ctx, XLU_Config *cfg) 
> {
> 
>  static void ctx_dispose(CfgParseContext *ctx) {
>      if (ctx->scanner) xlu__cfg_yylex_destroy(ctx->scanner);
> +    if (ctx->disk_re) pcre_free(ctx->disk_re);
> +}
> +
> +int xlu__scanner_string_prep(CfgParseContext *ctx, YY_BUFFER_STATE *buf,
> +                            XLU_Config *cfg, const char *data, int length) {
> +    int e;
> +
> +    e= ctx_prep(ctx, cfg);
> +    if (e) return e;
> +
> +    *buf = xlu__cfg_yy_scan_bytes(data, length, ctx->scanner);
> +    if (!*buf) {
> +        fprintf(cfg->report,"%s: unable to allocate scanner buffer\n",
> +                cfg->filename);
> +        return ENOMEM;
> +    }
> +
> +    return 0;
> +}
> +
> +void xlu__scanner_string_dispose(CfgParseContext *ctx, YY_BUFFER_STATE *buf) 
> {
> +    if (*buf) xlu__cfg_yy_delete_buffer(*buf, ctx->scanner);
> +    ctx_dispose(ctx);
>  }
> 
>  static void parse(CfgParseContext *ctx) {
> @@ -87,25 +111,17 @@ int xlu_cfg_readfile(XLU_Config *cfg, const char 
> *real_filename) {
> 
>  int xlu_cfg_readdata(XLU_Config *cfg, const char *data, int length) {
>      int e;
> -    YY_BUFFER_STATE buf= 0;
> -
> +    YY_BUFFER_STATE buf=0;
>      CfgParseContext ctx;
> -    e= ctx_prep(&ctx, cfg);
> -    if (e) { ctx.err= e; goto xe; }
> +    ctx.scanner=0;
> 
> -    buf = xlu__cfg_yy_scan_bytes(data, length, ctx.scanner);
> -    if (!buf) {
> -        fprintf(cfg->report,"%s: unable to allocate scanner buffer\n",
> -                cfg->filename);
> -        ctx.err= ENOMEM;
> -        goto xe;
> -    }
> +    e = xlu__scanner_string_prep(&ctx,&buf, cfg,data,length);
> +    if (e) { ctx.err = e; goto xe; }
> 
>      parse(&ctx);
> 
>   xe:
> -    if (buf) xlu__cfg_yy_delete_buffer(buf, ctx.scanner);
> -    ctx_dispose(&ctx);
> +    xlu__scanner_string_dispose(&ctx,&buf);
> 
>      return ctx.err;
>  }
> diff --git a/tools/libxl/libxlu_disk.c b/tools/libxl/libxlu_disk.c
> new file mode 100644
> index 0000000..be523f7
> --- /dev/null
> +++ b/tools/libxl/libxlu_disk.c
> @@ -0,0 +1,164 @@
> +
> +/* Parsing the disk spec a tricky problem, because the target
> + * string might contain "," which is the delimiter we use for
> + * stripping off things on the RHS, and ":", which is the delimiter
> + * we use for stripping off things on the LHS.
> + *
> + * For the LHS we use a flex scanner, see libxlu_disk_l.l.
> + * That chops prefixes like "phy:" from the front of the string,
> + * accumulating information into the disk structure.
> + *
> + * For the RHS, we use a regexp using pcre.
> + */
> +
> +#include "libxlu_internal.h"
> +#include "libxlu_disk_l.h"
> +#include "libxlu_disk_i.h"
> +#include "libxlu_cfg_i.h"
> +
> +static void err_core(DiskParseContext *dpc, const char *message,
> +                    const char *in, int inlen) {
> +    fprintf(dpc->ctx.cfg->report,
> +           "%s: config parsing error in disk specification:"
> +           " %s: near `%s' in `%.*s'\n",
> +           dpc->ctx.cfg->filename, message, dpc->spec, inlen, in);
> +    if (!dpc->ctx.err) dpc->ctx.err= EINVAL;
> +}
> +
> +void xlu__disk_err(DiskParseContext *dpc, const char *message) {
> +    err_core(dpc, message,
> +            xlu__disk_yyget_text(dpc->ctx.scanner),
> +            xlu__disk_yyget_leng(dpc->ctx.scanner));
> +}
> +
> +void xlu__disk_rhs(DiskParseContext *dpc) {
> +
> +    /* variables used by pcre */
> +#define WORKSPACE_SIZE 20 /* wtf no manifest constant from pcre */
> +#define OVECTOR_SIZE 10
> +    int ovector[OVECTOR_SIZE];
> +    int workspace[WORKSPACE_SIZE];
> +
> +    int rc, len;
> +    const char *text;
> +    libxl_device_disk *disk = dpc->disk;
> +
> +    static const char *re_text=
> +       "(.*),(?:/dev/)?([0-9a-zA-Z]*)(:[0-9a-z]+)?(,[0-9a-z]*)?"
> +       /* 1:target     2:vdev         3:type        4:attrib (r/w) */;
> +
> +    /* compile the regexp if we haven't got one already */
> +    if (!dpc->ctx.disk_re) {
> +       const char *re_errstr;
> +       int re_errcode, re_erroffset;
> +
> +       dpc->ctx.disk_re = pcre_compile2(re_text, PCRE_DOTALL,
> +                                        &re_errcode, &re_errstr, 
> &re_erroffset,
> +                                        0);
> +       if (!dpc->ctx.disk_re) {
> +           if (re_errcode == 21 /* wtf, no manifest constant */) {
> +               dpc->ctx.err = ENOMEM;
> +               return;
> +           }
> +           fprintf(dpc->ctx.cfg->report, "config parsing:"
> +                   " unable to compile regexp: %s [%d] (at `%s')",
> +                   re_errstr, re_errcode, re_text+re_erroffset);
> +           dpc->ctx.err = EIO;
> +           return;
> +       }
> +    }
> +
> +    /* actually run the regexp match */
> +
> +    text = xlu__disk_yyget_text(dpc->ctx.scanner);
> +    len = xlu__disk_yyget_leng(dpc->ctx.scanner);
> +
> +    rc = pcre_dfa_exec(dpc->ctx.disk_re, 0,
> +                      text, len, 0,
> +                      PCRE_ANCHORED,
> +                      ovector, OVECTOR_SIZE,
> +                      workspace, WORKSPACE_SIZE);
> +    switch (rc) {
> +    case PCRE_ERROR_NOMATCH:
> +       xlu__disk_err(dpc, "bad syntax for target, or missing vdev");
> +       return;

       case 1: ??

> +    case 2:
> +    case 3:
> +    case 4:
> +       break;

Or does it belong here? In which case aborting on a parse error is bad
juju.
       case 1:

> +    default:
> +       abort();
> +    }
> +
> +    /* macros for processing info from capturing parens; see pcreapi(3) */
> +
> +#define CAPTURED(cap) (START((cap)) >= 0)
> +#define START(cap) (ovector[(cap)*2])
> +#define END(cap)   (ovector[(cap)*2+1])
> +#define LEN(cap)   (END((cap)) - START((cap)))
> +#define TEXT(cap)  (text + START((cap)))
> +
> +#define STORE(cap, member) do{                                 \
> +    assert(CAPTURED((cap)));                                   \
> +    free(disk->member);                                                \
> +    disk->member = malloc(LEN((cap)) + 1);                     \
> +    if (!disk->member) { dpc->ctx.err = ENOMEM; return; }      \
> +    memcpy(disk->member, TEXT((cap)), LEN((cap)));             \
> +    disk->member[LEN((cap))] = 0;                              \
> +}while(0)
> +
> +#define EXACTLY(cap, string)                           \
> +    (CAPTURED((cap)) &&                                        \
> +     LEN((cap))==strlen((string)) &&                   \
> +     !memcmp(TEXT((cap)), (string), LEN((cap))))
> +
> +#define ERR(cap, msg) \
> +    err_core(dpc, (msg), TEXT((cap)), LEN((cap)))
> +
> +    /* actually process the four capturing parens in our regexp */
> +
> +    STORE(1, pdev_path);
> +
> +    STORE(2, vdev);
> +
> +    if (!CAPTURED(3) || EXACTLY(3, ":")) {
> +       disk->is_cdrom = 0;
> +    } else if (EXACTLY(3, ":cdrom")) {
> +       disk->is_cdrom = 1;
> +    } else {
> +       ERR(3, "unknown type (only `:' and `:cdrom' supported)");
> +    }
> +
> +    if (!CAPTURED(4) || EXACTLY(4, ",w") || EXACTLY(4, ",")) {
> +       disk->readwrite = 1;
> +    } else if (EXACTLY(4, ",r")) {
> +       disk->readwrite = 0;
> +    } else {
> +       ERR(4, "unknown read/write attribute");
> +    }
> +}

Hmm, I'm not sure this is nicer than the code it's replacing, it's
certainly a lot longer. I don't like the idea of it being full of things
comparing for ",w" or ":cdrom" etc, rather than tokenising it fully and
evaluating what the tokens are separately.

> +int xlu_disk_parse(XLU_Config *cfg, const char *spec,
> +                  libxl_device_disk *disk) {
> +    YY_BUFFER_STATE buf = 0;
> +    DiskParseContext dpc;
> +    int e, r;
> +
> +    dpc.disk = disk;
> +    dpc.spec = spec;
> +    dpc.ctx.scanner = 0;
> +
> +    e = xlu__scanner_string_prep(&dpc.ctx,&buf, cfg,spec,strlen(spec));
> +    if (e) { dpc.ctx.err = e; goto xe; }
> +
> +    r = xlu__disk_yylex(dpc.ctx.scanner);
> +    if (r) assert(dpc.ctx.err);
> +    if (dpc.ctx.err) goto xe;
> +
> +    if (disk->format == DISK_FORMAT_UNKNOWN) disk->format = DISK_FORMAT_RAW;
> +
> + xe:
> +    xlu__scanner_string_dispose(&dpc.ctx,&buf);
> +    return dpc.ctx.err;
> +}
> +
> diff --git a/tools/libxl/libxlu_disk_i.h b/tools/libxl/libxlu_disk_i.h
> new file mode 100644
> index 0000000..deb429f
> --- /dev/null
> +++ b/tools/libxl/libxlu_disk_i.h
> @@ -0,0 +1,19 @@
> +#ifndef LIBXLU_DISK_I_H
> +#define LIBXLU_DISK_I_H
> +
> +#include "libxlu_internal.h"
> +#include "libxl_internal.h"
> +
> +
> +typedef struct {
> +    CfgParseContext ctx;
> +    const char *spec;
> +    libxl_device_disk *disk;
> +} DiskParseContext;
> +
> +void xlu__disk_err(DiskParseContext *dpc, const char *message);
> +
> +void xlu__disk_rhs(DiskParseContext *dpc);
> +
> +
> +#endif /*LIBXLU_DISK_I_H*/
> diff --git a/tools/libxl/libxlu_disk_l.l b/tools/libxl/libxlu_disk_l.l
> new file mode 100644
> index 0000000..82e2248
> --- /dev/null
> +++ b/tools/libxl/libxlu_disk_l.l
> @@ -0,0 +1,54 @@
> +/* -*- fundamental -*- */
> +
> +%{
> +#include "libxlu_disk_i.h"
> +
> +#define dpc ((DiskParseContext*)yyextra)
> +#define YY_NO_INPUT
> +
> +#define DSET(member,enumname,valname) do{                              \
> +       if (dpc->disk->member != DISK_##enumname##_UNKNOWN &&           \
> +           dpc->disk->member != DISK_##enumname##_##valname) {         \
> +           xlu__disk_err(dpc, TOSTRING(member) " respecified");        \
> +           return 0;                                                   \
> +       } else {                                                        \
> +           dpc->disk->member = DISK_##enumname##_##valname;            \
> +       }                                                               \
> +    }while(0)
> +
> +/* Some versions of flex have a bug (Fedora bugzilla 612465) which causes
> + * it to fail to declare these functions, which it defines.  So declare
> + * them ourselves.  Hopefully we won't have to simultaneously support
> + * a flex version which declares these differently somehow. */
> +int xlu__disk_yyget_column(yyscan_t yyscanner);
> +void xlu__disk_yyset_column(int  column_no, yyscan_t yyscanner);
> +
> +%}
> +
> +%option warn
> +%option nodefault
> +%option batch
> +%option 8bit
> +%option noyywrap
> +%option reentrant
> +%option prefix="xlu__disk_yy"
> +%option nounput
> +
> +%%
> +
> +raw:           { DSET(format,FORMAT,RAW); }
> +qcow:          { DSET(format,FORMAT,QCOW); }
> +qcow2:         { DSET(format,FORMAT,QCOW2); }
> +vhd:           { DSET(format,FORMAT,QCOW2); }
> +
> +phy:           { DSET(format,FORMAT,RAW); DSET(backend,BACKEND,PHY); }
> +file:          { DSET(format,FORMAT,RAW); DSET(backend,BACKEND,TAP); }
> +tapdisk:|tap2?:        { DSET(backend,BACKEND,TAP); }
> +aio:           { }
> +ioemu:         { }

This bit is quite nice though. We could probably just tidy up the
existing parser using arrays of values and things rather than a lot of
if/else statements though.

> +[a-z][a-z0-9-]*: { xlu__disk_err(dpc,"unknown disk format/method"); return 
> 0; }
> +
> +[^:]*(\/.*)?   { xlu__disk_rhs(dpc); return 0; }
> +
> +.*             { xlu__disk_err(dpc,"bad disk syntax"); return 0; }
> diff --git a/tools/libxl/libxlu_internal.h b/tools/libxl/libxlu_internal.h
> index e251a63..d681fba 100644
> --- a/tools/libxl/libxlu_internal.h
> +++ b/tools/libxl/libxlu_internal.h
> @@ -21,10 +21,15 @@
>  #include <string.h>
>  #include <assert.h>
> 
> +#include <pcre.h>
> +
>  #define XLU_ConfigList XLU_ConfigSetting
> 
>  #include "libxlutil.h"
> 
> +
> +
> +
>  struct XLU_ConfigSetting { /* transparent */
>      struct XLU_ConfigSetting *next;
>      char *name;
> @@ -37,12 +42,27 @@ struct XLU_Config {
>      XLU_ConfigSetting *settings;
>      FILE *report;
>      char *filename;
> +    pcre *disk_re;
>  };
> 
>  typedef struct {
>      XLU_Config *cfg;
>      int err, lexerrlineno, likely_python;
>      void *scanner;
> +    pcre *disk_re;
>  } CfgParseContext;
> 
> +
> +#ifndef YY_TYPEDEF_YY_BUFFER_STATE
> +#define YY_TYPEDEF_YY_BUFFER_STATE
> +typedef struct yy_buffer_state *YY_BUFFER_STATE;
> +#endif
> +
> +int xlu__scanner_string_prep(CfgParseContext *ctx, YY_BUFFER_STATE *buf,
> +                            XLU_Config *cfg, const char *data, int length);
> +  /* Caller must initialise ctx->scanner=0 and buf=0 beforehand. */
> +
> +void xlu__scanner_string_dispose(CfgParseContext *ctx, YY_BUFFER_STATE *buf);
> +
> +
>  #endif /*LIBXLU_INTERNAL_H*/
> diff --git a/tools/libxl/libxlutil.h b/tools/libxl/libxlutil.h
> index 8a6fcbd..acf96fd 100644
> --- a/tools/libxl/libxlutil.h
> +++ b/tools/libxl/libxlutil.h
> @@ -58,4 +58,16 @@ const char *xlu_cfg_get_listitem(const XLU_ConfigList*, 
> int entry);
>    /* xlu_cfg_get_listitem cannot fail, except that if entry is
>     * out of range it returns 0 (not setting errno) */
> 
> +
> +/*
> + * Disk specification parsing.
> + */
> +
> +int xlu_disk_parse(XLU_Config *cfg, const char *spec,
> +                  libxl_device_disk *disk);
> +  /* disk must have been initialised; on error, returns errno value;
> +   * bad strings, returns EINVAL and prints a message to cfg's report.
> +   * That's all cfg is used for. */
> +
> +
>  #endif /* LIBXLUTIL_H */
> --
> 1.5.6.5

Gianni


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.