|
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [Xen-devel] [PATCH v4 6/9] tools/libxc: x86 pv save implementation
On Wed, 2014-04-30 at 19:36 +0100, Andrew Cooper wrote:
The previous "common" patch seemed to have a fair bit of x86 pv save
code in it, and there seems to be a fair bit of common code here. I
suppose the split was pretty artificial to make the patches manageable?
> @@ -121,6 +123,9 @@ struct context
> };
> };
>
> +/* Saves an x86 PV domain. */
I should hope so!
> +int save_x86_pv(struct context *ctx);
> +
> /*
> * Write the image and domain headers to the stream.
> * (to eventually make static in save.c)
> @@ -137,6 +142,21 @@ struct record
> void *data;
> };
>
> +/* Gets a field from an *_any union */
Unless the #undefs which were above before I trimmed them relate to
patch #2/9 they should go right before the redefine IMHO.
> +#define GET_FIELD(_c, _p, _f) \
> + ({ (_c)->x86_pv.width == 8 ? \
> + (_p)->x64._f: \
> + (_p)->x32._f; \
> + }) \
> +
> +/* Gets a field from an *_any union */
> +#define SET_FIELD(_c, _p, _f, _v) \
> + ({ if ( (_c)->x86_pv.width == 8 ) \
> + (_p)->x64._f = (_v); \
> + else \
> + (_p)->x32._f = (_v); \
> + })
> +
> /*
> * Writes a split record to the stream, applying correct padding where
> * appropriate. It is common when sending records containing blobs from Xen
> + /* MFNs of the batch pfns */
> + mfns = malloc(nr_pfns * sizeof *mfns);
> + /* Types of the batch pfns */
> + types = malloc(nr_pfns * sizeof *types);
> + /* Errors from attempting to map the mfns */
> + errors = malloc(nr_pfns * sizeof *errors);
> + /* Pointers to page data to send. Might be from mapped mfns or local
> allocations */
> + guest_data = calloc(nr_pfns, sizeof *guest_data);
> + /* Pointers to locally allocated pages. Probably not all used, but need
> freeing */
> + local_pages = calloc(nr_pfns, sizeof *local_pages);
Wouldn't it be better to preallocate (most of) these for the entire
save/restore than to do it for each batch?
> + for ( i = 0; i < nr_pfns; ++i )
> + {
> + types[i] = mfns[i] = ctx->ops.pfn_to_gfn(ctx, ctx->batch_pfns[i]);
> +
> + /* Likely a ballooned page */
> + if ( mfns[i] == INVALID_MFN )
> + set_bit(ctx->batch_pfns[i], ctx->deferred_pages);
deferred_pages doesn't need to grow dynamically like that bit map in a
previous patch, does it?
> + hdr.count = nr_pfns;
> + s = nr_pfns * sizeof *rec_pfns;
Brackets around sizeof would really help here.
> +
> +
> + rec_pfns = malloc(s);
Given the calculation of s is this a candidate for calloc?
> + if ( !rec_pfns )
> + {
> + ERROR("Unable to allocate memory for page data pfn list");
> + goto err;
> + }
> +
> + for ( i = 0; i < nr_pfns; ++i )
> + rec_pfns[i] = ((uint64_t)(types[i]) << 32) | ctx->batch_pfns[i];
Could this not have a more structured type? (I noticed this in the
decode bit of the previous patch too).
> +
> + /* header + pfns data + page data */
> + rec_size = 4 + 4 + (s) + (nr_pages * PAGE_SIZE);
Can these 4's not be sizeof something? Also the comment looks like it
wants to align with something, but is failing.
> + rec_count = nr_pfns;
> +
> + if ( write_exact(ctx->fd, &rec_type, sizeof(uint32_t)) ||
> + write_exact(ctx->fd, &rec_size, sizeof(uint32_t)) ||
Are these not a struct rhdr?
> + write_exact(ctx->fd, &rec_count, sizeof(uint32_t)) ||
> + write_exact(ctx->fd, &rec_res, sizeof(uint32_t)) )
> + {
> + PERROR("Failed to write page_type header to stream");
> + goto err;
> + }
> +
> + if ( write_exact(ctx->fd, rec_pfns, s) )
> + {
> + PERROR("Failed to write page_type header to stream");
> + goto err;
> + }
> +
> +
> + for ( i = 0; i < nr_pfns; ++i )
Extra braces are useful for clarity in these siturations.
> +static int flush_batch(struct context *ctx)
> +{
> + int rc = 0;
> +
> + if ( ctx->nr_batch_pfns == 0 )
> + return rc;
> +
> + rc = write_batch(ctx);
> +
> + if ( !rc )
> + {
> + /* Valgrind sanity check */
What now?
> + free(ctx->batch_pfns);
> + ctx->batch_pfns = malloc(MAX_BATCH_SIZE * sizeof *ctx->batch_pfns);
> + rc = !ctx->batch_pfns;
> + }
> +
> +
> + for ( x = 0, pages_written = 0; x < max_iter ; ++x )
> + {
> + DPRINTF("Iteration %u", x);
Something in this series should be using xc_report_progress_* (I'm not
sure that place is here, but I didn't see it elsewhere yet).
> int xc_domain_save2(xc_interface *xch, int io_fd, uint32_t dom, uint32_t
> max_iters,
> uint32_t max_factor, uint32_t flags,
> struct save_callbacks* callbacks, int hvm,
> unsigned long vm_generationid_addr)
> {
> + struct context ctx =
> + {
Coding style.
> + .xch = xch,
> + .fd = io_fd,
> + };
> +
> + /* Older GCC cant initialise anonymous unions */
How old?
> +static int map_p2m(struct context *ctx)
> +{
> [...]
> + fll_mfn = GET_FIELD(ctx, ctx->x86_pv.shinfo,
> arch.pfn_to_mfn_frame_list_list);
> + if ( !fll_mfn )
> + IPRINTF("Waiting for domain to set up its p2m frame list list");
> +
> + while ( tries-- && !fll_mfn )
> + {
> + usleep(10000);
> + fll_mfn = GET_FIELD(ctx, ctx->x86_pv.shinfo,
> + arch.pfn_to_mfn_frame_list_list);
> + }
> +
> + if ( !fll_mfn )
> + {
> + ERROR("Timed out waiting for p2m frame list list to be updated");
> + goto err;
> + }
This (preexisting) ugliness is there to "support" running migrate
immediately after starting a guest, when it hasn't got its act together
yet?
I wonder if we can get away with refusing to migrate under those
circumstances.
> +static int write_one_vcpu_basic(struct context *ctx, uint32_t id)
> +{
> + xc_interface *xch = ctx->xch;
> + xen_pfn_t mfn, pfn;
> + unsigned i;
> + int rc = -1;
> + vcpu_guest_context_any_t vcpu;
> + struct rec_x86_pv_vcpu vhdr = { .vcpu_id = id };
> + struct record rec =
> + {
> + .type = REC_TYPE_x86_pv_vcpu_basic,
> + .length = sizeof vhdr,
> + .data = &vhdr,
> + };
> +
> + if ( xc_vcpu_getcontext(xch, ctx->domid, id, &vcpu) )
> + {
> + PERROR("Failed to get vcpu%u context", id);
> + goto err;
> + }
> +
> + /* Vcpu 0 is special: Convert the suspend record to a PFN */
The "suspend record" is the guest's start_info_t I think? Would be
clearer to say that I think (and perhaps allude to it being the magic
3rd parameter to the hypercall.)
> + /* 64bit guests: Convert CR1 (guest pagetables) to PFN */
> + if ( ctx->x86_pv.levels == 4 && vcpu.x64.ctrlreg[1] )
> + {
> + mfn = vcpu.x64.ctrlreg[1] >> PAGE_SHIFT;
> + if ( !mfn_in_pseudophysmap(ctx, mfn) )
> + {
> + ERROR("Bad MFN for vcpu%u's cr1", id);
> + pseudophysmap_walk(ctx, mfn);
> + errno = ERANGE;
> + goto err;
> + }
> +
> + pfn = mfn_to_pfn(ctx, mfn);
> + vcpu.x64.ctrlreg[1] = 1 | ((uint64_t)pfn << PAGE_SHIFT);
> + }
> +
> + if ( ctx->x86_pv.width == 8 )
The use of x86_pv.levels vs x86_pv.width to determine the type of guest
is rather inconsistent. .width would seem to be the right one to use
almost all the time I think.
> + rc = write_split_record(ctx, &rec, &vcpu, sizeof vcpu.x64);
> + else
> + rc = write_split_record(ctx, &rec, &vcpu, sizeof vcpu.x32);
> +
> + if ( rc )
> + goto err;
> +
> + DPRINTF("Writing vcpu%u basic context", id);
> + rc = 0;
> + err:
> +
> + return rc;
> +}
> +
> +static int write_one_vcpu_extended(struct context *ctx, uint32_t id)
> +{
> + xc_interface *xch = ctx->xch;
> + int rc;
> + struct rec_x86_pv_vcpu vhdr = { .vcpu_id = id };
> + struct record rec =
> + {
> + .type = REC_TYPE_x86_pv_vcpu_extended,
> + .length = sizeof vhdr,
> + .data = &vhdr,
> + };
Does this pattern repeat a lot? DECLARE_REC(x86_pv_vcpu_extended, vhdr)
perhaps?
> + struct xen_domctl domctl =
> + {
> + .cmd = XEN_DOMCTL_get_ext_vcpucontext,
> + .domain = ctx->domid,
> + .u.ext_vcpucontext.vcpu = id,
> + };
Wants to use DECLARE_DOMCTL I think (at least everywhere else in libxc
seesms too).
> +static int write_all_vcpu_information(struct context *ctx)
> +{
> + xc_interface *xch = ctx->xch;
> + xc_vcpuinfo_t vinfo;
> + unsigned int i;
> + int rc;
> +
> + for ( i = 0; i <= ctx->dominfo.max_vcpu_id; ++i )
> + {
> + rc = xc_vcpu_getinfo(xch, ctx->domid, i, &vinfo);
> + if ( rc )
> + {
> + PERROR("Failed to get vcpu%u information", i);
> + return rc;
> + }
> +
> + if ( !vinfo.online )
> + {
> + DPRINTF("vcpu%u offline - skipping", i);
> + continue;
> + }
> +
> + rc = write_one_vcpu_basic(ctx, i) ?:
> + write_one_vcpu_extended(ctx, i) ?:
> + write_one_vcpu_xsave(ctx, i);
I'd prefer the more verbose one at a time version.
> + /* Map various structures */
> + rc = x86_pv_map_m2p(ctx) ?: map_shinfo(ctx) ?: map_p2m(ctx);
One at a time please.
> + if ( rc )
> + goto err;
> +
Ian.
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |