|
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [Xen-devel] [PATCH v6 04/10] xen: Introduce XENMEM_soft_reset operation
>>> On 13.05.15 at 11:49, <vkuznets@xxxxxxxxxx> wrote:
> --- a/xen/common/memory.c
> +++ b/xen/common/memory.c
> @@ -580,6 +580,234 @@ static long
> memory_exchange(XEN_GUEST_HANDLE_PARAM(xen_memory_exchange_t) arg)
> return rc;
> }
>
> +static long
> memory_soft_reset(XEN_GUEST_HANDLE_PARAM(xen_memory_soft_reset_t) arg)
> +{
> + long rc = 0;
The way it's coded right now, I don't see the need for this or the
function return type being "long" (but see the comments on the
public header change).
> + struct xen_memory_soft_reset softr;
> + struct domain *source_d, *dest_d;
> + unsigned long mfn, mfn_new, gmfn, last_gmfn, count;
> + unsigned int order;
> + p2m_type_t p2mt;
> + struct page_info *page, *new_page, *next_page;
> + int drop_dom_ref;
> +
> + if ( copy_from_guest(&softr, arg, 1) )
> + return -EFAULT;
> +
> + if ( softr.source_domid == softr.dest_domid )
> + return -EINVAL;
> +
> + source_d = rcu_lock_domain_by_any_id(softr.source_domid);
> + if ( source_d == NULL )
> + {
> + rc = -ESRCH;
> + goto fail_early;
> + }
> +
> + dest_d = rcu_lock_domain_by_any_id(softr.dest_domid);
> + if ( dest_d == NULL )
> + {
> + rc = -ESRCH;
> + rcu_unlock_domain(source_d);
> + goto fail_early;
> + }
> +
> + if ( dest_d->is_dying )
> + {
> + rc = -EINVAL;
> + goto fail;
> + }
> +
> + if ( !source_d->is_dying )
> + {
> + /*
> + * Make sure no allocation/remapping for the source domain is ongoing
> + * and set is_dying flag to prevent such actions in future.
> + */
> + spin_lock(&source_d->page_alloc_lock);
> + source_d->is_dying = DOMDYING_locked;
You need to repeat the is_dying check with the lock held, and act
accordingly if it isn't zero anymore.
Furthermore I don't see how this prevents there being further
changes to the GFN <-> MFN mappings for the guest (yet you rely
on them to be stable below afaict).
> + spin_unlock(&source_d->page_alloc_lock);
> + }
> +
> + last_gmfn = domain_get_maximum_gpfn(source_d);
> + gmfn = softr.gmfn_start;
> + while ( gmfn <= last_gmfn )
By now you should have done a privilege check.
> + {
> + page = get_page_from_gfn(source_d, gmfn, &p2mt, 0);
> + if ( unlikely(page == NULL) )
> + {
> +#ifdef CONFIG_X86
> + count = 0;
> + while ( p2m_is_pod(p2mt) )
> + {
> + count++;
> + if ( gmfn + count > last_gmfn )
> + break;
> + page = get_page_from_gfn(source_d, gmfn + count, &p2mt, 0);
> + if ( page )
> + {
> + put_page(page);
> + page = NULL;
Why? You don't use the value further down.
> + break;
> + }
> + }
> +
> + if ( !count )
> + gmfn++;
> +
> + while ( count )
> + {
> + order = get_order_from_pages(count);
> + if ( (1ul << order) > count )
> + order -= 1;
--
> + rc = guest_physmap_mark_populate_on_demand(dest_d, gmfn,
> order);
> + if ( rc )
> + goto fail;
> + count -= 1ul << order;
> + gmfn += 1ul << order;
> + }
> +#else
> + gmfn++;
> +#endif
> + goto preempt_check;
> + }
> +
> + mfn = page_to_mfn(page);
> + if ( unlikely(!mfn_valid(mfn)) )
> + {
> + put_page(page);
> + gmfn++;
> + goto preempt_check;
I guess this should never happen, and hence issuing a warning may be
warranted?
> + }
> +
> + next_page = page;
> +
> + /*
> + * A normal page is supposed to have count_info = 2 (1 from the
> domain
> + * and 1 from get_page_from_gfn() above). All other pages need to be
> + * copied.
> + */
I'm afraid this isn't precise enough: _PGC_allocated implies one
refcount, i.e. if that flag turns out to be clear, you should expect
just 1. And then I'd wonder whether a !_PGC_allocated page
needs any handling here in the first place (or perhaps, once the
GFN <-> MFN mapping is ensured to be stable, this case can't
happen anyway).
> + count = 0;
> + while ( next_page && !is_xen_heap_page(next_page) &&
> + (next_page->count_info & PGC_count_mask) == 2 &&
> + page_to_mfn(next_page) == mfn + count )
> + {
> + count++;
> + drop_dom_ref = 0;
> + spin_lock(&source_d->page_alloc_lock);
> + page_set_owner(next_page, NULL);
> + page_list_del(next_page, &source_d->page_list);
> + source_d->tot_pages -= 1;
--
> + if ( unlikely(source_d->tot_pages == 0) )
> + drop_dom_ref = 1;
> + spin_unlock(&source_d->page_alloc_lock);
> + put_page(next_page);
> + if ( drop_dom_ref )
> + put_domain(source_d);
> +
> + if ( unlikely(assign_pages(dest_d, next_page, 0, 0)) )
> + {
> + printk(XENLOG_G_INFO "Failed to assign Dom%d's MFN %lx"
> + " to Dom%d\n", source_d->domain_id, mfn,
> + dest_d->domain_id);
> + rc = -EFAULT;
A better (more distinct) error code should be used here. And I
suppose printing the GFN would be more helpful for debugging
possible issues than printing the MFN.
> + goto fail;
> + }
> +
> + if ( unlikely(gmfn + count > last_gmfn) )
> + {
> + next_page = NULL;
> + break;
> + }
> +
> + next_page = get_page_from_gfn(source_d, gmfn + count, &p2mt, 0);
> + }
> +
> + if ( next_page && count )
> + put_page(next_page);
> +
> + if ( !count && (page->count_info & PGC_count_mask) != 2 )
Re-reading the field seems wrong here: What if it was > 2 above,
but is == 2 now?
Also together with the "else if" below I think this would better be
structured
if ( count )
{
if ( next_page && count )
put_page(next_page);
}
else if ( (page->count_info & PGC_count_mask) != 2 )
...
else
> + {
> + new_page = alloc_domheap_page(dest_d, 0);
> + if ( unlikely(new_page == NULL) )
> + {
> + printk(XENLOG_G_INFO "Failed to alloc a page to replace"
> + " Dom%d's GFN %lx (MFN %lx) for Dom %d\n",
> + source_d->domain_id, gmfn, mfn, dest_d->domain_id);
> + rc = -ENOMEM;
> + put_page(page);
> + goto fail;
> + }
> + mfn_new = page_to_mfn(new_page);
> + copy_domain_page(mfn_new, mfn);
> + mfn = mfn_new;
> + put_page(page);
> + if ( guest_physmap_add_page(dest_d, gmfn, mfn, 0) )
> + {
> + printk(XENLOG_G_INFO "Failed to add new GFN %lx"
> + " (MFN %lx) to Dom%d\n",
> + gmfn, mfn, dest_d->domain_id);
> + rc = -EFAULT;
> + goto fail;
> + }
> + gmfn++;
> + softr.nr_transferred++;
> + }
> + else if ( !count )
> + {
> + put_page(page);
> + gmfn++;
> + goto preempt_check;
> + }
> +
> + while ( count )
> + {
> + order = get_order_from_pages(count);
> + if ( (1ul << order) > count )
> + order -= 1;
> +
> + guest_physmap_remove_page(source_d, gmfn, mfn, order);
> +
> + if ( guest_physmap_add_page(dest_d, gmfn, mfn, order) )
Considering the non-atomicity between the assign_pages() much
further up and this call, shouldn't the domain be paused while its
state is inconsistent?
> --- a/xen/include/public/memory.h
> +++ b/xen/include/public/memory.h
> @@ -573,7 +573,39 @@ struct xen_vnuma_topology_info {
> typedef struct xen_vnuma_topology_info xen_vnuma_topology_info_t;
> DEFINE_XEN_GUEST_HANDLE(xen_vnuma_topology_info_t);
>
> -/* Next available subop number is 27 */
> +/*
> + * Transfer all memory pages from one domain to the other. Pages are unmapped
> + * from the source domain and mapped at the same GFNs to the destination
> + * domain. This hypercall has a side-effect of moving the source domain to
> the
> + * 'dying' state.
Namely due to this, shouldn't this rather be a domctl?
> + * If a particular page is mapped more then once in the source domain a new
> + * empty page is being allocated for the destination domain and the content
> is
> + * being copied. The original page remains mapped to the source domain.
> + *
> + * The caller has to be priviliged and it is supposed to set gmfn_start to 0,
> + * this field is required for the hypercall continuation.
If it was kept to be a mem-op - mem-ops have a mechanism to be
continued without custom structure fields.
> +#define XENMEM_soft_reset 27
> +struct xen_memory_soft_reset {
> + /*
> + * [IN] Memory transfer details.
> + */
> + domid_t source_domid; /* steal pages from */
> + domid_t dest_domid; /* assign pages to */
> +
> + xen_pfn_t gmfn_start; /* start from gmfn */
> +
> + /*
> + * [OUT] Number of transfered pages including new allocations.
> + */
> + xen_ulong_t nr_transferred;
It's not really clear to me what use this is.
Jan
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |