[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [Xen-devel] [PATCH v3 1/2] libxl: Implement the handler to handle unrecoverable AER errors
On 2017-08-08 15:33:01 +0100, Wei Liu wrote: > On Mon, Aug 07, 2017 at 06:54:56PM -0500, Venu Busireddy wrote: > > Implement the callback function to handle unrecoverable AER errors, and > > also the public APIs that can be used to register/unregister the handler. > > When an AER error occurs, the handler will forcibly remove the erring > > PCIe device from the guest. > > > > Signed-off-by: Venu Busireddy <venu.busireddy@xxxxxxxxxx> > > --- > > tools/libxl/libxl.h | 14 +++++++ > > tools/libxl/libxl_event.h | 13 +++++++ > > tools/libxl/libxl_internal.h | 7 ++++ > > tools/libxl/libxl_pci.c | 90 > > ++++++++++++++++++++++++++++++++++++++++++++ > > 4 files changed, 124 insertions(+) > > > > diff --git a/tools/libxl/libxl.h b/tools/libxl/libxl.h > > index 7cf0f31..c5af0aa 100644 > > --- a/tools/libxl/libxl.h > > +++ b/tools/libxl/libxl.h > > @@ -1044,6 +1044,20 @@ void libxl_mac_copy(libxl_ctx *ctx, libxl_mac *dst, > > const libxl_mac *src); > > */ > > #define LIBXL_HAVE_QED 1 > > > > +/* LIBXL_HAVE_REG_AER_EVENTS_HANDLER > > + * > > + * If it is defined, libxl has a library function called > > + * libxl_reg_aer_events_handler. > > + */ > > +#define LIBXL_HAVE_REG_AER_EVENTS_HANDLER 1 > > + > > +/* LIBXL_HAVE_UNREG_AER_EVENTS_HANDLER > > + * > > + * If it is defined, libxl has a library function called > > + * libxl_unreg_aer_events_handler. > > + */ > > +#define LIBXL_HAVE_UNREG_AER_EVENTS_HANDLER 1 > > + > > You can consolidate both into > > LIBLX_HAVE_AER_EVENTS_HANDLER > > > typedef char **libxl_string_list; > > void libxl_string_list_dispose(libxl_string_list *sl); > > int libxl_string_list_length(const libxl_string_list *sl); > > diff --git a/tools/libxl/libxl_event.h b/tools/libxl/libxl_event.h > > index 1ea789e..1aea906 100644 > > --- a/tools/libxl/libxl_event.h > > +++ b/tools/libxl/libxl_event.h > > @@ -184,6 +184,19 @@ void libxl_evdisable_domain_death(libxl_ctx *ctx, > > libxl_evgen_domain_death*); > > * may generate only a DEATH event. > > */ > > > > +typedef struct libxl__aer_watch libxl_aer_watch; > > +int libxl_reg_aer_events_handler(libxl_ctx *, uint32_t, libxl_aer_watch **) > > + LIBXL_EXTERNAL_CALLERS_ONLY; > > + /* > > + * Registers a handler to handle the occurrence of unrecoverable AER > > errors. > > + * This function depends on the calling application running the libxl's > > + * internal event loop. Toolstacks that do not use libxl's internal > > + * event loop must arrange to have their own event loop created and enter > > + * libxl (say, call libxl_event_wait()), to enable the event to be > > processed. > > + */ > > +void libxl_unreg_aer_events_handler(libxl_ctx *, uint32_t, libxl_aer_watch > > *) > > + LIBXL_EXTERNAL_CALLERS_ONLY; > > + > > typedef struct libxl__evgen_disk_eject libxl_evgen_disk_eject; > > int libxl_evenable_disk_eject(libxl_ctx *ctx, uint32_t domid, const char > > *vdev, > > libxl_ev_user, libxl_evgen_disk_eject **evgen_out); > > diff --git a/tools/libxl/libxl_internal.h b/tools/libxl/libxl_internal.h > > index afe6652..2b74286 100644 > > --- a/tools/libxl/libxl_internal.h > > +++ b/tools/libxl/libxl_internal.h > > @@ -352,6 +352,13 @@ struct libxl__ev_child { > > LIBXL_LIST_ENTRY(struct libxl__ev_child) entry; > > }; > > > > +/* > > + * Structure used for AER event handling. > > + */ > > +struct libxl__aer_watch { > > + uint32_t domid; > > + libxl__ev_xswatch watch; > > +}; > > > > /* > > * evgen structures, which are the state we use for generating > > diff --git a/tools/libxl/libxl_pci.c b/tools/libxl/libxl_pci.c > > index 65ad5e5..feedf27 100644 > > --- a/tools/libxl/libxl_pci.c > > +++ b/tools/libxl/libxl_pci.c > > @@ -1678,6 +1678,96 @@ static int libxl_device_pci_compare(libxl_device_pci > > *d1, > > return COMPARE_PCI(d1, d2); > > } > > > > +static void aer_backend_watch_callback(libxl__egc *egc, > > + libxl__ev_xswatch *watch, > > + const char *watch_path, > > + const char *event_path) > > +{ > > + EGC_GC; > > + libxl_aer_watch *aer_ws = CONTAINER_OF(watch, *aer_ws, watch); > > + int rc; > > + uint32_t dom, bus, dev, fn; > > + uint32_t domid = aer_ws->domid; > > + char *p, *path; > > + const char *aerFailedSBDF; > > + libxl_device_pci pcidev; > > + > > + /* Extract the backend directory. */ > > + path = libxl__strdup(gc, event_path); > > + p = strrchr(path, '/'); > > + if ((p == NULL) || (strcmp(p, "/aerFailedSBDF") != 0)) > > + return; > > + /* Truncate the string so it points to the backend directory. */ > > + *p = '\0'; > > + > > + /* Fetch the value of the failed PCI device. */ > > + rc = libxl__xs_read_checked(gc, XBT_NULL, > > + GCSPRINTF("%s/aerFailedSBDF", path), &aerFailedSBDF); > > + if (rc || !aerFailedSBDF) > > + return; > > + sscanf(aerFailedSBDF, "%x:%x:%x.%x", &dom, &bus, &dev, &fn); > > + > > + libxl_device_pci_init(&pcidev); > > + pcidev_struct_fill(&pcidev, dom, bus, dev, fn, 0); > > + /* Forcibly remove the device from the guest */ > > + rc = libxl__device_pci_remove_common(gc, domid, &pcidev, 1); > > + if (rc) > > + LOGD(ERROR, domid, " libxl__device_pci_remove_common() failed, > > rc=x%x", > > + (unsigned int)rc); > > + > > + return; > > +} > > + > > +int libxl_reg_aer_events_handler(libxl_ctx *ctx, > > + uint32_t domid, > > + libxl_aer_watch **aer_ws_out) > > Afaict libxl_aer_watch is an opaque type to external caller, so this > won't work, right? > > > +{ > > + int rc = 0; > > + int dom0_domid; > > uint32_t pciback_domid; > > > + char *be_path; > > + libxl_aer_watch *aer_ws = NULL; > > + GC_INIT(ctx); > > + > > + *aer_ws_out = NULL; > > + > > + rc = libxl__get_domid(gc, (uint32_t *)(&dom0_domid)); > > + if (rc) { > > + LOGD(ERROR, domid, " libxl__get_domid() failed, rc = %d", rc); > > + goto out; > > + } > > + > > + aer_ws = malloc(sizeof(libxl_aer_watch)); > > libxl__calloc(NOGC, ...); > > And then you can skip the check and memset. > > > + if (!aer_ws) { > > + rc = ERROR_NOMEM; > > + goto out; > > + } > > + memset(aer_ws, 0, sizeof(libxl_aer_watch)); > > + > > + aer_ws->domid = domid; > > + be_path = GCSPRINTF("/local/domain/%d/backend/pci/%u/%d/%s", > > + dom0_domid, domid, dom0_domid, "aerFailedSBDF"); > > Use %u here. > > > + rc = libxl__ev_xswatch_register(gc, &aer_ws->watch, > > + aer_backend_watch_callback, be_path); > > + *aer_ws_out = aer_ws; > > + > > +out: > > + GC_FREE; > > + return rc; > > +} > > + > > +void libxl_unreg_aer_events_handler(libxl_ctx *ctx, > > + uint32_t domid, > > + libxl_aer_watch *aer_ws) > > +{ > > + GC_INIT(ctx); > > + > > + libxl__ev_xswatch_deregister(gc, &aer_ws->watch); > > + > > + free(aer_ws); > > + GC_FREE; > > + return; > > +} I will implement all your above suggestions in v4. > I think a bigger question is whether you agree with Ian's comments > regarding API design and whether you have more questions? Ian suggested that I document the use of the API (about the event loop), and I believe I addressed it. I don't have any more questions. Just waiting for Ian's "Ack", or more comments. Venu _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxx https://lists.xen.org/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |