[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [Xen-devel] [PATCH 1/6] mmu: Introduce XENMEM_claim_pages (subop of memory ops).
On Thu, Mar 07, 2013 at 03:56:35PM +0000, Tim Deegan wrote: > At 10:36 -0500 on 06 Mar (1362566187), Konrad Rzeszutek Wilk wrote: > > On Wed, Mar 06, 2013 at 09:07:46AM +0000, Tim Deegan wrote: > > > Oh I see. That's pretty strange semantics for a 'claim', though. > > > Wouldn't it make sense for the toolstack just to query free and claimed > > > memory on the first pass and fail if there's not enough space? > > > > So do something like this: > > > > if ( dom->claim_enabled ) { > > unsigned long outstanding = > > xc_domain_get_outstanding_pages(dom->xch); > > xc_physinfo_t xcphysinfo = { 0 }; > > int flag = XENMEMF_claim_normal; > > > > rc = xc_physinfo(dom->xch, &xcphysinfo); > > > > if (xcphysinfo.total_pages + outstanding > dom->total_pages) > > flag = XENMEMF_claim_tmem; > > > > rc = xc_domain_claim_pages(dom->xch, dom->guest_domid, > > dom->total_pages, > > flag); > > } > > > > (Ignorning the checks for 'rc' and bailing out as neccessary) > > No, I meant to get rid of the XENMEMF_claim_* flags altogether (so all > claims are 'tmem' claims) and, at a suitable level in the toolstack, do > something like this: > > LOCK > free := [XEN_SYSCTL_physinfo]->free > claimed := [XENMEM_get_outstanding_claims] > IF need_free_memory AND free - claimed < needed THEN > DESPAIR (claim would need tmem freeable pages) > ELSE > IF [XENMEM_claim](needed) THEN > REJOICE > ELSE > DESPAIR (could not claim enough memory) > ENDIF > ENDIF > UNLOCK > > (using whatever wrappers around the hypercalls are appropriate) Right so if we eliminate the flags and the 'need_free_memory' this becomes a lot easier. Here is the patch (there is of course more of the toolstack changes - those I can post later). diff --git a/xen/common/domain.c b/xen/common/domain.c index b360de1..64ee29d 100644 --- a/xen/common/domain.c +++ b/xen/common/domain.c @@ -507,6 +507,7 @@ int domain_kill(struct domain *d) evtchn_destroy(d); gnttab_release_mappings(d); tmem_destroy(d->tmem); + domain_set_outstanding_pages(d, 0); d->tmem = NULL; /* fallthrough */ case DOMDYING_dying: diff --git a/xen/common/domctl.c b/xen/common/domctl.c index b7f6619..c98e99c 100644 --- a/xen/common/domctl.c +++ b/xen/common/domctl.c @@ -154,6 +154,7 @@ void getdomaininfo(struct domain *d, struct xen_domctl_getdomaininfo *info) info->tot_pages = d->tot_pages; info->max_pages = d->max_pages; + info->outstanding_pages = d->outstanding_pages; info->shr_pages = atomic_read(&d->shr_pages); info->paged_pages = atomic_read(&d->paged_pages); info->shared_info_frame = mfn_to_gmfn(d, virt_to_mfn(d->shared_info)); diff --git a/xen/common/memory.c b/xen/common/memory.c index 08550ef..3cfc1e3 100644 --- a/xen/common/memory.c +++ b/xen/common/memory.c @@ -712,6 +712,39 @@ long do_memory_op(unsigned long cmd, XEN_GUEST_HANDLE_PARAM(void) arg) break; } + case XENMEM_claim_pages: + if ( !IS_PRIV(current->domain) ) + return -EPERM; + + if ( copy_from_guest(&reservation, arg, 1) ) + return -EFAULT; + + if ( !guest_handle_is_null(reservation.extent_start) ) + return -EINVAL; + + if ( reservation.extent_order != 0 ) + return -EINVAL; + + if ( reservation.mem_flags != 0 ) + return -EINVAL; + + d = rcu_lock_domain_by_id(reservation.domid); + if ( d == NULL ) + return -EINVAL; + + rc = domain_set_outstanding_pages(d, reservation.nr_extents); + + rcu_unlock_domain(d); + + break; + + case XENMEM_get_outstanding_pages: + if ( !IS_PRIV(current->domain) ) + return -EPERM; + + rc = get_outstanding_claims(); + break; + default: rc = arch_memory_op(op, arg); break; diff --git a/xen/common/page_alloc.c b/xen/common/page_alloc.c index 9e9fb15..aefef29 100644 --- a/xen/common/page_alloc.c +++ b/xen/common/page_alloc.c @@ -252,11 +252,114 @@ static long midsize_alloc_zone_pages; #define MIDSIZE_ALLOC_FRAC 128 static DEFINE_SPINLOCK(heap_lock); +static long outstanding_claims; /* total outstanding claims by all domains */ unsigned long domain_adjust_tot_pages(struct domain *d, long pages) { + long dom_before, dom_after, dom_claimed, sys_before, sys_after; + ASSERT(spin_is_locked(&d->page_alloc_lock)); - return d->tot_pages += pages; + d->tot_pages += pages; + + /* + * can test d->claimed_pages race-free because it can only change + * if d->page_alloc_lock and heap_lock are both held, see also + * domain_set_outstanding_pages below + */ + if ( !d->outstanding_pages ) + goto out; + + spin_lock(&heap_lock); + /* adjust domain outstanding pages; may not go negative */ + dom_before = d->outstanding_pages; + dom_after = dom_before - pages; + BUG_ON(dom_before < 0); + dom_claimed = dom_after < 0 ? 0 : dom_after; + d->outstanding_pages = dom_claimed; + /* flag accounting bug if system outstanding_claims would go negative */ + sys_before = outstanding_claims; + sys_after = sys_before - (dom_before - dom_claimed); + BUG_ON(sys_after < 0); + outstanding_claims = sys_after; + spin_unlock(&heap_lock); + +out: + return d->tot_pages; +} + +int domain_set_outstanding_pages(struct domain *d, unsigned long pages) +{ + int ret = -ENOMEM; + unsigned long claim, avail_pages; + + /* + * take the domain's page_alloc_lock, else all d->tot_page adjustments + * must always take the global heap_lock rather than only in the much + * rarer case that d->outstanding_pages is non-zero + */ + spin_lock(&d->page_alloc_lock); + spin_lock(&heap_lock); + + /* pages==0 means "unset" the claim. */ + if ( pages == 0 ) + { + outstanding_claims -= d->outstanding_pages; + d->outstanding_pages = 0; + ret = 0; + goto out; + } + + /* only one active claim per domain please */ + if ( d->outstanding_pages ) + { + ret = -EINVAL; + goto out; + } + + /* disallow a claim not exceeding current tot_pages or above max_pages */ + if ( (pages <= d->tot_pages) || (pages > d->max_pages) ) + { + ret = -EINVAL; + goto out; + } + + /* how much memory is available? */ + avail_pages = total_avail_pages; + + /* Note: The usage of claim means that allocation from a guest *might* + * have to come from freeable memory. Using free memory is always better, if + * it is available, than using freeable memory. + * + * But that is OK as once the claim has been made, it still can take minutes + * before the claim is fully satisfied. Tmem can make use of the unclaimed + * pages during this time (to store ephemeral/freeable pages only, + * not persistent pages). + */ + avail_pages += tmem_freeable_pages(); + avail_pages -= outstanding_claims; + + /* + * Note, if domain has already allocated memory before making a claim + * then the claim must take tot_pages into account + */ + claim = pages - d->tot_pages; + if ( claim > avail_pages ) + goto out; + + /* yay, claim fits in available memory, stake the claim, success! */ + d->outstanding_pages = claim; + outstanding_claims += d->outstanding_pages; + ret = 0; + +out: + spin_unlock(&heap_lock); + spin_unlock(&d->page_alloc_lock); + return ret; +} + +long get_outstanding_claims(void) +{ + return outstanding_claims; } static unsigned long init_node_heap(int node, unsigned long mfn, @@ -397,7 +500,7 @@ static void __init setup_low_mem_virq(void) static void check_low_mem_virq(void) { unsigned long avail_pages = total_avail_pages + - (opt_tmem ? tmem_freeable_pages() : 0); + (opt_tmem ? tmem_freeable_pages() : 0) - outstanding_claims; if ( unlikely(avail_pages <= low_mem_virq_th) ) { @@ -466,6 +569,15 @@ static struct page_info *alloc_heap_pages( spin_lock(&heap_lock); /* + * Claimed memory is considered unavailable unless the request + * is made by a domain with sufficient unclaimed pages. + */ + if ( (outstanding_claims + request > + total_avail_pages + tmem_freeable_pages()) && + (d == NULL || d->outstanding_pages < request) ) + goto not_found; + + /* * TMEM: When available memory is scarce due to tmem absorbing it, allow * only mid-size allocations to avoid worst of fragmentation issues. * Others try tmem pools then fail. This is a workaround until all diff --git a/xen/include/public/domctl.h b/xen/include/public/domctl.h index deb19db..113b8dc 100644 --- a/xen/include/public/domctl.h +++ b/xen/include/public/domctl.h @@ -36,7 +36,7 @@ #include "grant_table.h" #include "hvm/save.h" -#define XEN_DOMCTL_INTERFACE_VERSION 0x00000008 +#define XEN_DOMCTL_INTERFACE_VERSION 0x00000009 /* * NB. xen_domctl.domain is an IN/OUT parameter for this operation. @@ -95,6 +95,7 @@ struct xen_domctl_getdomaininfo { uint32_t flags; /* XEN_DOMINF_* */ uint64_aligned_t tot_pages; uint64_aligned_t max_pages; + uint64_aligned_t outstanding_pages; uint64_aligned_t shr_pages; uint64_aligned_t paged_pages; uint64_aligned_t shared_info_frame; /* GMFN of shared_info struct */ diff --git a/xen/include/public/memory.h b/xen/include/public/memory.h index 1c5ca19..51d5254 100644 --- a/xen/include/public/memory.h +++ b/xen/include/public/memory.h @@ -68,6 +68,8 @@ struct xen_memory_reservation { * IN: GPFN bases of extents to populate with memory * OUT: GMFN bases of extents that were allocated * (NB. This command also updates the mach_to_phys translation table) + * XENMEM_claim_pages: + * IN: must be zero */ XEN_GUEST_HANDLE(xen_pfn_t) extent_start; @@ -430,10 +432,39 @@ typedef struct xen_mem_sharing_op xen_mem_sharing_op_t; DEFINE_XEN_GUEST_HANDLE(xen_mem_sharing_op_t); /* - * Reserve ops for future/out-of-tree "claim" patches (Oracle) + * Attempt to stake a claim for a domain on a quantity of pages + * of system RAM, but _not_ assign specific pageframes. Only + * arithmetic is performed so the hypercall is very fast and need + * not be preemptible, thus sidestepping time-of-check-time-of-use + * races for memory allocation. Returns 0 if the hypervisor page + * allocator has atomically and successfully claimed the requested + * number of pages, else non-zero. + * + * Any domain may have only one active claim. When sufficient memory + * has been allocated to resolve the claim, the claim silently expires. + * Claiming zero pages effectively resets any outstanding claim and + * is always successful. + * + * Note that a valid claim may be staked even after memory has been + * allocated for a domain. In this case, the claim is not incremental, + * i.e. if the domain's tot_pages is 3, and a claim is staked for 10, + * only 7 additional pages are claimed. + * + * Caller must be privileged or the hypercall fails. */ #define XENMEM_claim_pages 24 -#define XENMEM_get_unclaimed_pages 25 + +/* + * XENMEM_claim_pages flags - the are no flags at this time. + * The zero value is appropiate. + */ + +/* + * Get the number of pages currently claimed (but not yet "possessed") + * across all domains. The caller must be privileged but otherwise + * the call never fails. + */ +#define XENMEM_get_outstanding_pages 25 #endif /* defined(__XEN__) || defined(__XEN_TOOLS__) */ diff --git a/xen/include/xen/mm.h b/xen/include/xen/mm.h index 2f701f5..28512fb 100644 --- a/xen/include/xen/mm.h +++ b/xen/include/xen/mm.h @@ -49,7 +49,10 @@ void free_xenheap_pages(void *v, unsigned int order); #define alloc_xenheap_page() (alloc_xenheap_pages(0,0)) #define free_xenheap_page(v) (free_xenheap_pages(v,0)) +/* Claim handling */ unsigned long domain_adjust_tot_pages(struct domain *d, long pages); +int domain_set_outstanding_pages(struct domain *d, unsigned long pages); +long get_outstanding_claims(void); /* Domain suballocator. These functions are *not* interrupt-safe.*/ void init_domheap_pages(paddr_t ps, paddr_t pe); diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h index e108436..569e76e 100644 --- a/xen/include/xen/sched.h +++ b/xen/include/xen/sched.h @@ -242,6 +242,7 @@ struct domain struct page_list_head page_list; /* linked list */ struct page_list_head xenpage_list; /* linked list (size xenheap_pages) */ unsigned int tot_pages; /* number of pages currently possesed */ + unsigned int outstanding_pages; /* pages claimed but not possessed */ unsigned int max_pages; /* maximum value for tot_pages */ atomic_t shr_pages; /* number of shared pages */ atomic_t paged_pages; /* number of paged-out pages */ -- 1.8.0.2 _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxx http://lists.xen.org/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |