[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [PATCH] Provide support for multiple frame buffers in Xen



On Fri, Mar 01, 2013 at 03:48:57PM -0500, Robert Phillips wrote:
> Support is provided for both shadow and hardware assisted paging (HAP)
> modes. This code bookkeeps the set of video frame buffers (vram),
> detects when the guest has modified any of those buffers and, upon request,
> returns a bitmap of the modified pages.
> This lets other software components re-paint the portions of the monitor
> (or monitors) that have changed.
> Each monitor has a frame buffer of some size at some position
> in guest physical memory.
> The set of frame buffers being tracked can change over time as monitors
> are plugged and unplugged.
> 
> This is the 9th version of this patch.
>

It'd be nice to have it in the subject of the email aswell..

-- Pasi

> Signed-Off-By: Robert Phillips <robert.phillips@xxxxxxxxxx>
> ---
>  tools/libxc/xenctrl.h            |   20 +-
>  xen/arch/x86/hvm/hvm.c           |    8 +-
>  xen/arch/x86/mm/Makefile         |    1 +
>  xen/arch/x86/mm/dirty_vram.c     |  951 
> ++++++++++++++++++++++++++++++++++++++
>  xen/arch/x86/mm/hap/hap.c        |  111 -----
>  xen/arch/x86/mm/p2m.c            |   11 +-
>  xen/arch/x86/mm/paging.c         |   57 ++-
>  xen/arch/x86/mm/shadow/common.c  |  353 +++++++-------
>  xen/arch/x86/mm/shadow/multi.c   |  174 ++++---
>  xen/arch/x86/mm/shadow/multi.h   |    7 +-
>  xen/arch/x86/mm/shadow/types.h   |    1 +
>  xen/include/asm-x86/dirty_vram.h |  227 +++++++++
>  xen/include/asm-x86/hap.h        |    4 -
>  xen/include/asm-x86/hvm/domain.h |    2 +-
>  xen/include/asm-x86/paging.h     |   15 +-
>  xen/include/asm-x86/shadow.h     |    6 -
>  16 files changed, 1535 insertions(+), 413 deletions(-)
>  create mode 100644 xen/arch/x86/mm/dirty_vram.c
>  create mode 100644 xen/include/asm-x86/dirty_vram.h
> 
> diff --git a/tools/libxc/xenctrl.h b/tools/libxc/xenctrl.h
> index 32122fd..cd4e1ef 100644
> --- a/tools/libxc/xenctrl.h
> +++ b/tools/libxc/xenctrl.h
> @@ -1563,15 +1563,23 @@ int xc_hvm_inject_msi(
>      xc_interface *xch, domid_t dom, uint64_t addr, uint32_t data);
>  
>  /*
> - * Track dirty bit changes in the VRAM area
> + * Track dirty bit changes in a VRAM region defined by
> + * [ first_pfn : first_pfn + nr - 1 ]
>   *
>   * All of this is done atomically:
> - * - get the dirty bitmap since the last call
> - * - set up dirty tracking area for period up to the next call
> - * - clear the dirty tracking area.
> + * - gets the dirty bitmap since the last call, all zeroes for
> + *   the first call with some new region
> + * - sets up a dirty tracking region for period up to the next call
> + * - clears the specified dirty tracking region.
>   *
> - * Returns -ENODATA and does not fill bitmap if the area has changed since 
> the
> - * last call.
> + * Creating a new region causes any existing regions that it overlaps
> + * to be discarded.
> + *
> + * Specifying nr == 0 causes all regions to be discarded and
> + * disables dirty bit tracking.
> + *
> + * If nr is not a multiple of 64, only the first nr bits of bitmap
> + * are well defined.
>   */
>  int xc_hvm_track_dirty_vram(
>      xc_interface *xch, domid_t dom,
> diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c
> index fcea52c..5a97ad3 100644
> --- a/xen/arch/x86/hvm/hvm.c
> +++ b/xen/arch/x86/hvm/hvm.c
> @@ -57,6 +57,7 @@
>  #include <asm/hvm/cacheattr.h>
>  #include <asm/hvm/trace.h>
>  #include <asm/hvm/nestedhvm.h>
> +#include <asm/dirty_vram.h>
>  #include <asm/mtrr.h>
>  #include <asm/apic.h>
>  #include <public/sched.h>
> @@ -1447,8 +1448,11 @@ int hvm_hap_nested_page_fault(paddr_t gpa,
>           */
>          if ( access_w )
>          {
> -            paging_mark_dirty(v->domain, mfn_x(mfn));
> -            p2m_change_type(v->domain, gfn, p2m_ram_logdirty, p2m_ram_rw);
> +            if ( p2m_change_type(v->domain, gfn, p2m_ram_logdirty,
> +                                 p2m_ram_rw) == p2m_ram_logdirty )
> +            {
> +                paging_mark_dirty_gpfn(v->domain, gfn);
> +            }
>          }
>          rc = 1;
>          goto out_put_gfn;
> diff --git a/xen/arch/x86/mm/Makefile b/xen/arch/x86/mm/Makefile
> index 73dcdf4..becd0c9 100644
> --- a/xen/arch/x86/mm/Makefile
> +++ b/xen/arch/x86/mm/Makefile
> @@ -5,6 +5,7 @@ obj-y += paging.o
>  obj-y += p2m.o p2m-pt.o p2m-ept.o p2m-pod.o
>  obj-y += guest_walk_2.o
>  obj-y += guest_walk_3.o
> +obj-y += dirty_vram.o
>  obj-$(x86_64) += guest_walk_4.o
>  obj-$(x86_64) += mem_event.o
>  obj-$(x86_64) += mem_paging.o
> diff --git a/xen/arch/x86/mm/dirty_vram.c b/xen/arch/x86/mm/dirty_vram.c
> new file mode 100644
> index 0000000..4f599ed
> --- /dev/null
> +++ b/xen/arch/x86/mm/dirty_vram.c
> @@ -0,0 +1,951 @@
> +/*
> + * arch/x86/mm/dirty_vram.c: Bookkeep/query dirty VRAM pages
> + * with support for multiple frame buffers.
> + *
> + * Copyright (c) 2012, Citrix Systems, Inc. (Robert Phillips)
> + * Parts of this code are Copyright (c) 2007 Advanced Micro Devices (Wei 
> Huang)
> + * Parts of this code are Copyright (c) 2007 XenSource Inc.
> + * Parts of this code are Copyright (c) 2006 by Michael A Fetterman
> + * Parts based on earlier work by Michael A Fetterman, Ian Pratt et al.
> + *
> + * This program is free software; you can redistribute it and/or modify it
> + * under the terms and conditions of the GNU General Public License,
> + * version 2, as published by the Free Software Foundation.
> + *
> + * This program is distributed in the hope it will be useful, but WITHOUT
> + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
> + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
> + * more details.
> + *
> + * You should have received a copy of the GNU General Public License along 
> with
> + * this program; if not, write to the Free Software Foundation, Inc., 59 
> Temple
> + * Place - Suite 330, Boston, MA 02111-1307 USA.
> + */
> +
> +
> +#include <xen/types.h>
> +#include <xen/sched.h>
> +#include <xen/guest_access.h>
> +#include <asm/shadow.h>
> +#include <asm/dirty_vram.h>
> +#include <asm/hap.h>
> +#include <asm/config.h>
> +#include "mm-locks.h"
> +
> +#define DEBUG_stop_tracking_all_vram          0
> +#define DEBUG_allocating_dirty_vram_range     0
> +#define DEBUG_high_water_mark_for_vram_ranges 0
> +#define DEBUG_freeing_dirty_vram_range        0
> +#define DEBUG_allocate_paddr_links_page       0
> +#define DEBUG_update_vram_mapping             0
> +#define DEBUG_alloc_paddr_inject_fault        0
> +#define DEBUG_link_limit_exceeded             0
> +
> +
> +/* Allocates domain's dirty_vram structure */
> +dv_dirty_vram_t *
> +dirty_vram_alloc(struct domain *d)
> +{
> +    dv_dirty_vram_t *dirty_vram;
> +    ASSERT( paging_locked_by_me(d) );
> +    dirty_vram = d->arch.hvm_domain.dirty_vram = xzalloc(dv_dirty_vram_t);
> +    if ( dirty_vram )
> +    {
> +        INIT_LIST_HEAD(&dirty_vram->range_head);
> +        dirty_vram->ext_head = INVALID_PADDR;
> +        dirty_vram->pl_free = INVALID_PADDR;
> +    }
> +    return dirty_vram;
> +}
> +
> +/*
> + * Returns domain's dirty_vram structure,
> + * allocating it if necessary
> + */
> +dv_dirty_vram_t *
> +dirty_vram_find_or_alloc(struct domain *d)
> +{
> +    dv_dirty_vram_t *dirty_vram = d->arch.hvm_domain.dirty_vram;
> +    ASSERT( paging_locked_by_me(d) );
> +    if ( !dirty_vram )
> +        dirty_vram = dirty_vram_alloc(d);
> +    return dirty_vram;
> +}
> +
> +
> +/* Free domain's dirty_vram structure */
> +void dirty_vram_free(struct domain *d)
> +{
> +    dv_dirty_vram_t *dirty_vram = d->arch.hvm_domain.dirty_vram;
> +    ASSERT( paging_locked_by_me(d) );
> +    if ( dirty_vram )
> +    {
> +        struct list_head *curr, *next;
> +        dv_paddr_link_ext_t *ext;
> +        
> +        /* Free all the ranges */
> +        list_for_each_safe(curr, next, &dirty_vram->range_head)
> +        {
> +            dv_range_t *range = list_entry(curr, dv_range_t, range_link);
> +#if DEBUG_stop_tracking_all_vram
> +            gdprintk(XENLOG_DEBUG, "[%05lx:%05lx] stop tracking all vram\n",
> +                     range->begin_pfn, range->end_pfn);
> +#endif
> +            xfree(range->pl_tab);
> +            xfree(range);
> +        }
> +        /* Free all the extension pages */
> +        
> +        ext = remap_maddr(NULL, dirty_vram->ext_head);
> +        while ( ext ) {
> +            struct page_info *pg = virt_to_page(ext);
> +            ext = remap_maddr(ext, ext->ext_link);
> +            d->arch.paging.free_page(d, pg);
> +        }
> +
> +        xfree(dirty_vram);
> +        d->arch.hvm_domain.dirty_vram = NULL;
> +    }
> +}
> +
> +/* Returns dirty vram range containing gfn, NULL if none */
> +struct dv_range *
> +dirty_vram_range_find_gfn(struct domain *d,
> +                          unsigned long gfn)
> +{
> +    struct dv_dirty_vram *dirty_vram = d->arch.hvm_domain.dirty_vram;
> +    ASSERT( paging_locked_by_me(d) );
> +    if ( dirty_vram )
> +    {
> +        struct list_head *curr;
> +        list_for_each(curr, &dirty_vram->range_head)
> +        {
> +            dv_range_t *range = list_entry(curr, dv_range_t, range_link);
> +            if ( gfn >= range->begin_pfn &&
> +                 gfn <  range->end_pfn )
> +                return range;
> +        }
> +    }
> +    return NULL;
> +}
> +
> +/*
> + * Returns pointer to dirty vram range matching [begin_pfn .. end_pfn ),
> + * NULL if none.
> + */
> +dv_range_t *
> +dirty_vram_range_find(struct domain *d,
> +                      unsigned long begin_pfn,
> +                      unsigned long nr)
> +{
> +    unsigned long end_pfn = begin_pfn + nr;
> +    dv_dirty_vram_t *dirty_vram = d->arch.hvm_domain.dirty_vram;
> +    ASSERT( paging_locked_by_me(d) );
> +    if ( dirty_vram )
> +    {
> +        struct list_head *curr;
> +        list_for_each(curr, &dirty_vram->range_head)
> +        {
> +            dv_range_t *range = list_entry(curr, dv_range_t, range_link);
> +            if ( begin_pfn == range->begin_pfn &&
> +                 end_pfn   == range->end_pfn )
> +                return range;
> +        }
> +    }
> +    return NULL;
> +}
> +
> +/* Allocate specified dirty_vram range */
> +static dv_range_t *
> +_dirty_vram_range_alloc(struct domain *d,
> +                        unsigned long begin_pfn,
> +                        unsigned long nr)
> +{
> +    dv_dirty_vram_t *dirty_vram = d->arch.hvm_domain.dirty_vram;
> +    dv_range_t *range = NULL;
> +    unsigned long end_pfn = begin_pfn + nr;
> +    dv_pl_entry_t *pl_tab = NULL;
> +    int i;
> +
> +    ASSERT( paging_locked_by_me(d) );
> +    ASSERT( dirty_vram != NULL );
> +
> +#if DEBUG_allocating_dirty_vram_range
> +    gdprintk(XENLOG_DEBUG,
> +             "[%05lx:%05lx] Allocating dirty vram range hap:%d\n",
> +             begin_pfn, end_pfn,
> +             d->arch.hvm_domain.hap_enabled);
> +#endif
> +
> +    range = xzalloc(dv_range_t);
> +    if ( range == NULL )
> +        goto err_out;
> +
> +    INIT_LIST_HEAD(&range->range_link);
> +
> +    range->begin_pfn = begin_pfn;
> +    range->end_pfn = end_pfn;
> +
> +    if ( !hap_enabled(d) )
> +    {
> +        if ( (pl_tab = xzalloc_array(dv_pl_entry_t, nr)) == NULL )
> +            goto err_out;
> +
> +        for ( i = 0; i != nr; i++ )
> +        {
> +            pl_tab[i].mapping.sl1ma = INVALID_PADDR;
> +            pl_tab[i].mapping.pl_next = INVALID_PADDR;
> +        }
> +    }
> +
> +    range->pl_tab = pl_tab;
> +    range->mappings_hwm = 1;
> +
> +    list_add(&range->range_link, &dirty_vram->range_head);
> +    if ( ++dirty_vram->nr_ranges > dirty_vram->ranges_hwm )
> +    {
> +        dirty_vram->ranges_hwm = dirty_vram->nr_ranges;
> +#if DEBUG_high_water_mark_for_vram_ranges
> +        gdprintk(XENLOG_DEBUG,
> +                 "High water mark for number of vram ranges is now:%d\n",
> +                 dirty_vram->ranges_hwm);
> +#endif
> +    }
> +    return range;
> +
> + err_out:
> +    xfree(pl_tab);
> +    xfree(range);
> +    return NULL;
> +}
> +
> +/* Frees specified dirty_vram range */
> +void dirty_vram_range_free(struct domain *d,
> +                           dv_range_t *range)
> +{
> +    dv_dirty_vram_t *dirty_vram = d->arch.hvm_domain.dirty_vram;
> +    ASSERT( paging_locked_by_me(d) );
> +    if ( dirty_vram )
> +    {
> +        int i, nr = range->end_pfn - range->begin_pfn;
> +
> +#if DEBUG_freeing_dirty_vram_range
> +        gdprintk(XENLOG_DEBUG,
> +                 "[%05lx:%05lx] Freeing dirty vram range\n",
> +                 range->begin_pfn, range->end_pfn);
> +#endif
> +
> +        if ( range->pl_tab )
> +        {
> +            for ( i = 0; i != nr; i++ )
> +            {
> +                dv_paddr_link_t *plx;
> +                
> +                plx = remap_maddr(NULL, range->pl_tab[i].mapping.pl_next);
> +
> +                /* Does current FB page have multiple mappings? */
> +                if ( plx ) /* Yes */
> +                {
> +                    /* Find the last element in singly-linked list */
> +                    while ( plx->pl_next != INVALID_PADDR ) {
> +                        plx = remap_maddr(plx, plx->pl_next);
> +                    }
> +                    
> +                    /* Prepend whole list to the free list */
> +                    plx->pl_next = dirty_vram->pl_free;
> +                    dirty_vram->pl_free = range->pl_tab[i].mapping.pl_next;
> +                }
> +                /* Unmap plx. */
> +                remap_vaddr(plx, NULL);
> +            }
> +            xfree(range->pl_tab);
> +            range->pl_tab = NULL;
> +        }
> +        /* Remove range from the linked list, free it, and adjust count*/
> +        list_del(&range->range_link);
> +        xfree(range);
> +        dirty_vram->nr_ranges--;
> +    }
> +}
> +
> +/*
> + * dirty_vram_range_alloc()
> + * This function ensures that the new range does not overlap any existing
> + * ranges -- deleting them if necessary -- and then calls
> + * _dirty_vram_range_alloc to actually allocate the new range.
> + */
> +dv_range_t *
> +dirty_vram_range_alloc(struct domain *d,
> +                        unsigned long begin_pfn,
> +                        unsigned long nr)
> +{
> +    unsigned long end_pfn = begin_pfn + nr;
> +    dv_dirty_vram_t *dirty_vram = d->arch.hvm_domain.dirty_vram;
> +    dv_range_t *range;
> +    struct list_head *curr, *next;
> +
> +    ASSERT( paging_locked_by_me(d) );
> +    ASSERT( dirty_vram != NULL );
> +
> +    /*
> +     * Ranges cannot overlap so
> +     * free any range that overlaps [ begin_pfn .. end_pfn ).
> +     */
> +    list_for_each_safe(curr, next, &dirty_vram->range_head)
> +    {
> +        dv_range_t *rng = list_entry(curr, dv_range_t, range_link);
> +        if ( ( ( rng->begin_pfn <= begin_pfn ) &&
> +               ( begin_pfn <  rng->end_pfn   )
> +                 ) ||
> +             ( ( begin_pfn <= rng->begin_pfn ) &&
> +               ( rng->begin_pfn < end_pfn    )
> +                 ) )
> +        {
> +            /* Different tracking, tear the previous down. */
> +            dirty_vram_range_free(d, rng);
> +        }
> +    }
> +
> +    range = _dirty_vram_range_alloc(d, begin_pfn, nr);
> +    if ( !range )
> +        goto out;
> +
> + out:
> +    return range;
> +}
> +
> +/*
> + * dirty_vram_range_find_or_alloc()
> + * Find the range for [begin_pfn:begin_pfn+nr).
> + * If it doesn't exists, create it.
> + */
> +dv_range_t *
> +dirty_vram_range_find_or_alloc(struct domain *d,
> +                                unsigned long begin_pfn,
> +                                unsigned long nr)
> +{
> +    dv_range_t *range;
> +    ASSERT( paging_locked_by_me(d) );
> +    range = dirty_vram_range_find(d, begin_pfn, nr);
> +    if ( !range )
> +        range = dirty_vram_range_alloc(d, begin_pfn, nr);
> +    
> +    return range;
> +}
> +
> +
> +static paddr_t
> +domain_page_map_to_maddr(void *va)
> +{
> +    if ( va )
> +        return (domain_page_map_to_mfn(va) << PAGE_SHIFT) +
> +            (((unsigned long)va) & ~PAGE_MASK);
> +    else
> +        return INVALID_PADDR;
> +}
> +
> +void *
> +remap_maddr(void *old_va, paddr_t new_ma)
> +{
> +    if ( old_va )
> +        unmap_domain_page(old_va);
> +    
> +    if ( new_ma != INVALID_PADDR )
> +        return map_domain_page(new_ma >> PAGE_SHIFT) + (new_ma & ~PAGE_MASK);
> +    else
> +        return NULL;
> +}
> +
> +void *
> +remap_vaddr(void *old_va, void *new_va)
> +{
> +    if ( old_va )
> +        unmap_domain_page(old_va);
> +    
> +    if ( new_va )
> +        /* Map page now in anticipation of future matching unmap */
> +        map_domain_page(domain_page_map_to_mfn(new_va));
> +
> +    return new_va;
> +}
> +
> +
> +/* Allocate a dv_paddr_link struct */
> +static dv_paddr_link_t *
> +alloc_paddr_link(struct domain *d)
> +{
> +    dv_paddr_link_t *pl = NULL;
> +    dv_dirty_vram_t *dirty_vram = d->arch.hvm_domain.dirty_vram;
> +    dv_paddr_link_ext_t *ext = NULL;
> +    
> +
> +    ASSERT( paging_locked_by_me(d) );
> +    BUILD_BUG_ON(sizeof(dv_paddr_link_ext_t) > PAGE_SIZE);
> +    /* Is the list of free pl's empty? */
> +    if ( dirty_vram->pl_free == INVALID_PADDR ) /* Yes. */
> +    {
> +        /*
> +         * Allocate another page of pl's.
> +         * Link them all together and point the free list head at them
> +         */
> +        int i;
> +        struct page_info *pg = d->arch.paging.alloc_page(d);
> +        if ( pg == NULL )
> +            goto out;
> +
> +        ext = __map_domain_page(pg);
> +        if ( ext == NULL )
> +        {
> +            d->arch.paging.free_page(d, pg);
> +            goto out;
> +        }
> +
> +#if DEBUG_allocate_paddr_links_page
> +        gdprintk(XENLOG_DEBUG, "Allocated another page of paddr_links\n");
> +#endif
> +        /* Prepend new extension page to dirty_vram's list of same. */
> +        ext->ext_link = dirty_vram->ext_head;
> +        dirty_vram->ext_head = domain_page_map_to_maddr(ext);
> +
> +        /* Initialize and link together the new pl entries. */
> +        for ( i = 0; i != ARRAY_SIZE(ext->entries); i++ )
> +        {
> +            ext->entries[i].sl1ma = INVALID_PADDR;
> +            ext->entries[i].pl_next =
> +                domain_page_map_to_maddr(&ext->entries[i+1]);
> +        }
> +        /* Mark the last entry's next pointer as "null". */
> +        ext->entries[ARRAY_SIZE(ext->entries) - 1].pl_next = INVALID_PADDR;
> +
> +        /* Set the dirty_vram's free list pointer to the first entry. */
> +        dirty_vram->pl_free = domain_page_map_to_maddr(&ext->entries[0]);
> +    }
> +    pl = remap_maddr(NULL, dirty_vram->pl_free);
> +    dirty_vram->pl_free = pl->pl_next;
> +
> +    pl->sl1ma = INVALID_PADDR;
> +    pl->pl_next = INVALID_PADDR;
> + out:
> +    if ( ext )
> +        unmap_domain_page(ext);
> +    return pl;
> +}
> +
> +
> +/*
> + * Free a paddr_link struct.
> + *
> + * The caller has walked the singly-linked list of elements
> + * that have, as their head, an element in a pl_tab cell.
> + * The list walk has reached the element to be freed.
> + * (Each element is a dv_paddr_link_t struct.)
> + *
> + * @pl points to the element to be freed.
> + * @ppl points to its predecessor's next member.
> + *
> + * After linking the predecessor to the element's successor,
> + * we can free @pl by prepending it to the list of free
> + * elements.
> + *
> + * As a boundary case (which happens to be the common case),
> + * @pl points to a cell in the pl_tab rather than to some
> + * extension element danging from that cell.
> + * We recognize this case because @ppl is NULL.
> + * In that case we promote the first extension element by
> + * copying it into the pl_tab cell and freeing it.
> + *
> + * Returns a pointer to @pl's successor.  This is where
> + * any iterative processing will resume.
> + */
> +
> +dv_paddr_link_t *
> +free_paddr_link(struct domain *d,
> +                paddr_t *ppl,
> +                dv_paddr_link_t *pl)
> +{
> +    dv_dirty_vram_t *dirty_vram = d->arch.hvm_domain.dirty_vram;
> +    dv_paddr_link_t *npl; /* Next pl. */
> +
> +    ASSERT( paging_locked_by_me(d) );
> +    /* Extension mapping? */
> +    if ( ppl ) /* Yes, relink around it. */
> +    {
> +        /* When done, resume processing at pl's successor. */
> +        npl = remap_maddr(NULL, pl->pl_next);
> +        (*ppl) = domain_page_map_to_maddr(npl);
> +    }
> +    else  /* pl points to entry in the main table. */
> +    {
> +        /*
> +         * Move 2nd mapping to main table
> +         * and free 2nd mapping.
> +         */
> +        dv_paddr_link_t *spl; /* 2nd mapping */
> +        spl = remap_maddr(NULL, pl->pl_next);
> +        /* Is there a 2nd mapping? */
> +        if ( spl == NULL ) /* No. */
> +        {
> +            /* Invalidate the main table entry. */
> +            pl->sl1ma = INVALID_PADDR;
> +            return pl;
> +        }
> +        /* Copy 2nd mapping into main table. */
> +        pl->sl1ma = spl->sl1ma;
> +        pl->pl_next = spl->pl_next;
> +        npl = pl; /* Reprocess main table entry again. */
> +        pl = spl; /* Prepare to free 2nd mapping. */
> +    }
> +
> +    /* Prepare entry for prepending to the free list */
> +    pl->sl1ma = INVALID_PADDR;
> +    pl->pl_next = dirty_vram->pl_free;
> +    dirty_vram->pl_free = domain_page_map_to_maddr(pl);
> +    remap_vaddr(pl, NULL); /* Unmap pl. */
> +
> +    return npl;
> +}
> +
> +
> +/*
> + * dirty_vram_range_update()
> + *
> + * This is called whenever a level 1 page table entry is modified.
> + * If the L1PTE is being cleared, the function removes any paddr_links
> + * that refer to it.
> + * If the L1PTE is being set to a frame buffer page, a paddr_link is
> + * created for that page's entry in pl_tab.
> + * Returns 1 iff entry found and set or cleared.
> + */
> +int dirty_vram_range_update(struct domain *d,
> +                            unsigned long gfn,
> +                            paddr_t sl1ma,
> +                            int set)
> +{
> +    int effective = 0;
> +    dv_range_t *range;
> +    unsigned long i;
> +    dv_paddr_link_t *pl;
> +    paddr_t *ppl;
> +    int len = 0;
> +
> +    ASSERT(paging_locked_by_me(d));
> +    range = dirty_vram_range_find_gfn(d, gfn);
> +    if ( !range )
> +        return effective;
> +
> +    
> +    i = gfn - range->begin_pfn;
> +    pl = remap_vaddr(NULL, &range->pl_tab[ i ].mapping);
> +    ppl = NULL;
> +    /*
> +     * Find matching entry (pl), if any, and its predecessor (ppl)
> +     * in linked list.
> +     */
> +    while ( pl != NULL )
> +    {
> +        if ( pl->sl1ma == sl1ma || pl->sl1ma == INVALID_PADDR )
> +            break;
> +
> +        ppl = remap_vaddr(ppl, &pl->pl_next);
> +        pl =  remap_maddr(pl, (*ppl));
> +        len++;
> +    }
> +    
> +    if ( set )
> +    {
> +        /* Did we find sl1ma in either the main table or the linked list? */
> +        if ( pl == NULL ) /* No, so we'll need to alloc a link. */
> +        {
> +            ASSERT(ppl != NULL); /* ppl points to predecessor's next link. */
> +            
> +#if DEBUG_alloc_paddr_inject_fault
> +            {
> +                static int counter;
> +                
> +                /* Test stuck_dirty logic for some cases */
> +                if ( (++counter) % 4 == 0 )
> +                {
> +                    /* Simply mark the frame buffer page as always dirty */
> +                    range->pl_tab[ i ].stuck_dirty = 1;
> +                    gdprintk(XENLOG_DEBUG,
> +                             "[%lx] inject stuck dirty fault\n",
> +                             gfn );
> +                    goto out;
> +                }
> +            }
> +#endif
> +            /*
> +             * Have we reached the limit of mappings we're willing
> +             * to bookkeep?
> +             */
> +            if ( len > DV_ADDR_LINK_LIST_LIMIT ) /* Yes. */
> +            {
> +#if DEBUG_link_limit_exceeded
> +                if ( !range->pl_tab[ i ].stuck_dirty )
> +                    gdprintk(XENLOG_DEBUG,
> +                             "[%lx] link limit exceeded\n",
> +                             gfn );
> +#endif            
> +                /* Simply mark the frame buffer page as always dirty */
> +                range->pl_tab[ i ].stuck_dirty = 1;
> +                goto out;
> +            }
> +
> +            /* Alloc new link */
> +            pl = alloc_paddr_link(d);
> +            /* Were we able to allocate a link? */
> +            if ( pl == NULL ) /* No. */
> +            {
> +                /* Simply mark the frame buffer page as always dirty */
> +                range->pl_tab[ i ].stuck_dirty = 1;
> +                
> +                gdprintk(XENLOG_DEBUG,
> +                         "[%lx] alloc failure\n",
> +                         gfn );
> +                
> +                goto out;
> +            }
> +            /* Append new link to the list */
> +            (*ppl) = domain_page_map_to_maddr(pl);
> +        }
> +
> +        /* Did we find an entry for sl1ma? */
> +        if ( pl->sl1ma != sl1ma ) /* No. */
> +        {
> +            /* pl must point to either a previously unused entry in the
> +             * main table, or to a newly allocated link.
> +             * In either case, the link's sl1ma should be 'null'.
> +             */
> +            ASSERT(pl->sl1ma == INVALID_PADDR);
> +            pl->sl1ma = sl1ma;
> +        }
> +        effective = 1; /* Take note that we're going to set an entry. */
> +        if ( len > range->mappings_hwm )
> +        {
> +            /* Bookkeep the high water mark. */
> +            range->mappings_hwm = len;
> +#if DEBUG_update_vram_mapping
> +            gdprintk(XENLOG_DEBUG,
> +                     "[%lx] set      sl1ma:%lx hwm:%d "
> +                     "free_pages:%d\n",
> +                     gfn, sl1ma,
> +                     range->mappings_hwm,
> +                     d->arch.paging.shadow.free_pages);
> +#endif
> +        }
> +    }
> +    else /* Clear */
> +    {
> +        if ( pl && pl->sl1ma == sl1ma )
> +        {
> +#if DEBUG_update_vram_mapping
> +            gdprintk(XENLOG_DEBUG,
> +                     "[%lx] clear    sl1ma:%lx\n",
> +                     gfn, sl1ma);
> +#endif
> +            pl = free_paddr_link(d, ppl, pl);
> +            effective = 1; /* Take note that we're clearing an entry. */
> +        }
> +    }
> + out:
> +    remap_vaddr(pl,  NULL);
> +    remap_vaddr(ppl, NULL);
> +    return effective;
> +}
> +
> +
> +/*
> + * shadow_scan_dirty_flags()
> + * This produces a dirty bitmap for the range by examining every
> + * L1PTE referenced by some dv_paddr_link in the range's pl_tab table.
> + * It tests and clears each such L1PTE's dirty flag.
> + */
> +static int 
> +shadow_scan_dirty_flags(struct domain *d,
> +                        dv_range_t *range,
> +                        uint8_t *dirty_bitmap)
> +{
> +    int flush_tlb = 0;
> +    unsigned long i;
> +    unsigned long nr = range->end_pfn - range->begin_pfn;
> +    l1_pgentry_t *sl1e = NULL;
> +
> +    ASSERT( paging_locked_by_me(d) );
> +    /* Iterate over VRAM to track dirty bits. */
> +    for ( i = 0; i < nr; i++ )
> +    {
> +        int dirty = 0;
> +        
> +        /* Does the frame buffer have an incomplete set of mappings? */
> +        if ( unlikely(range->pl_tab[i].stuck_dirty) ) /* Yes. */
> +            dirty = 1; /* Then always assume the page is dirty. */
> +        else { /* The frame buffer's set of mappings is complete.  Scan it. 
> */
> +            paddr_t next = range->pl_tab[i].mapping.pl_next;
> +            paddr_t sl1ma = range->pl_tab[i].mapping.sl1ma;
> +            dv_paddr_link_t *pl = NULL;
> +            for (;;)
> +            {
> +                if ( sl1ma == INVALID_PADDR )
> +                    /* End of list or frame buffer page is unmapped. */
> +                    break;
> +
> +                if ( sl1e ) /* Cleanup from previous iteration. */
> +                    unmap_domain_page(sl1e);
> +
> +                sl1e = map_domain_page(sl1ma >> PAGE_SHIFT) +
> +                    (sl1ma & ~PAGE_MASK);
> +                if ( l1e_get_flags(*sl1e) & _PAGE_DIRTY )
> +                {
> +                    dirty = 1;
> +                    /* Clear dirty so we can detect if page gets re-dirtied.
> +                     * Note: this is atomic, so we may clear a
> +                     * _PAGE_ACCESSED set by another processor.
> +                     */
> +                    l1e_remove_flags(*sl1e, _PAGE_DIRTY);
> +                    flush_tlb = 1;
> +                    /* Even though we now know the frame buffer page is 
> dirty,
> +                     * keep iterating to clear the dirty flag in all other
> +                     * mappings. 
> +                     */
> +                }
> +                
> +                pl = remap_maddr(pl, next);
> +                if ( !pl )
> +                    break;
> +                
> +                next = pl->pl_next;
> +                sl1ma = pl->sl1ma;
> +            } /* for */
> +
> +            /* Unmap pl. */
> +            remap_vaddr(NULL, pl);
> +        }
> +        if ( dirty )
> +            /* Set the i'th bit in the output dirty_bitmap */
> +            dirty_bitmap[i >> 3] |= (1 << (i & 7));
> +
> +    }
> +    
> +    if ( sl1e )
> +        unmap_domain_page(sl1e);
> +
> +    return flush_tlb;
> +}
> +
> +
> +/*
> + * shadow_track_dirty_vram()
> + * This is the API called by the guest to determine which pages in the range
> + * from [begin_pfn:begin_pfn+nr) have been dirtied since the last call.
> + * It creates the domain's dv_dirty_vram on demand.
> + * It creates ranges on demand when some [begin_pfn:nr) is first encountered.
> + * To collect the dirty bitmask it calls shadow_scan_dirty_flags().
> + * It copies the dirty bitmask into guest storage.
> + */
> +int shadow_track_dirty_vram(struct domain *d,
> +                            unsigned long begin_pfn,
> +                            unsigned long nr,
> +                            XEN_GUEST_HANDLE_64(uint8) guest_dirty_bitmap)
> +{
> +    int rc = 0;
> +    unsigned long end_pfn = begin_pfn + nr;
> +    int flush_tlb = 0;
> +    dv_range_t *range;
> +    struct p2m_domain *p2m = p2m_get_hostp2m(d);
> +    uint8_t *dirty_bitmap = NULL;
> +
> +    /*
> +     * This range test is tricky.
> +     *
> +     * The range [begin_pfn..end_pfn) is an open interval, so end_pfn
> +     * is a pfn beyond the end of the range.
> +     *
> +     * p2m->max_mapped_pfn is a valid PFN so p2m->max_mapped_pfn + 1 is an
> +     * invalid PFN.
> +     *
> +     * If end_pfn is beyond *that* then the range is invalid.
> +     */
> +    if ( end_pfn < begin_pfn
> +         || begin_pfn > p2m->max_mapped_pfn
> +         || end_pfn > p2m->max_mapped_pfn + 1 )
> +        return -EINVAL;
> +
> +    paging_lock(d);
> +
> +    if ( !nr )
> +    {
> +        dirty_vram_free(d);
> +        goto out;
> +    }
> +
> +    if ( guest_handle_is_null(guest_dirty_bitmap) )
> +        goto out;
> +
> +    if ( !dirty_vram_find_or_alloc(d) )
> +    {
> +        rc = -ENOMEM;
> +        goto out;
> +    }
> +
> +    range = dirty_vram_range_find(d, begin_pfn, nr);
> +    if ( !range )
> +    {
> +        range = dirty_vram_range_alloc(d, begin_pfn, nr);
> +        if ( range )
> +            sh_find_all_vram_mappings(d->vcpu[0], range);
> +    }
> +    if ( range )
> +    {
> +        int size = ( nr + BITS_PER_BYTE - 1 ) / BITS_PER_BYTE;
> +        
> +        rc = -ENOMEM;
> +        dirty_bitmap = xzalloc_bytes( size );
> +        if ( !dirty_bitmap )
> +            goto out;
> +
> +        flush_tlb |= shadow_scan_dirty_flags(d, range, dirty_bitmap);
> +
> +        rc = -EFAULT;
> +        if ( copy_to_guest(guest_dirty_bitmap,
> +                           dirty_bitmap,
> +                           size) == 0 )
> +            rc = 0;
> +    }
> +    
> +    if ( flush_tlb )
> +        flush_tlb_mask(d->domain_dirty_cpumask);
> +
> +out:
> +    paging_unlock(d);
> +    
> +    if ( dirty_bitmap )
> +        xfree(dirty_bitmap);
> +    return rc;
> +}
> +
> +
> +/************************************************/
> +/*          HAP VRAM TRACKING SUPPORT           */
> +/************************************************/
> +
> +/*
> + * hap_track_dirty_vram()
> + * Create the domain's dv_dirty_vram struct on demand.
> + * Create a dirty vram range on demand when some [begin_pfn:begin_pfn+nr] is
> + * first encountered.
> + * Collect the guest_dirty bitmask, a bit mask of the dirty vram pages, by
> + * calling paging_log_dirty_range(), which interrogates each vram
> + * page's p2m type looking for pages that have been made writable.
> + */
> +int hap_track_dirty_vram(struct domain *d,
> +                         unsigned long begin_pfn,
> +                         unsigned long nr,
> +                         XEN_GUEST_HANDLE_64(uint8) guest_dirty_bitmap)
> +{
> +    long rc = 0;
> +    dv_dirty_vram_t *dirty_vram;
> +    uint8_t *dirty_bitmap = NULL;
> +
> +    if ( nr )
> +    {
> +        dv_range_t *range = NULL;
> +        int size = ( nr + BITS_PER_BYTE - 1 ) / BITS_PER_BYTE;
> +        
> +        if ( !paging_mode_log_dirty(d) )
> +        {
> +            hap_logdirty_init(d);
> +            rc = paging_log_dirty_enable(d);
> +            if ( rc )
> +                goto out;
> +        }
> +
> +        rc = -ENOMEM;
> +        dirty_bitmap = xzalloc_bytes( size );
> +        if ( !dirty_bitmap )
> +            goto out;
> +        
> +        paging_lock(d);
> +        
> +        dirty_vram = d->arch.hvm_domain.dirty_vram;
> +        if ( !dirty_vram ) 
> +        {
> +            rc = -ENOMEM;
> +            if ( !(dirty_vram = dirty_vram_alloc(d)) )
> +            {
> +                paging_unlock(d);
> +                goto out;
> +            }
> +        }
> +        
> +        range = dirty_vram_range_find(d, begin_pfn, nr);
> +        if ( !range )
> +        {
> +            rc = -ENOMEM;
> +            if ( !(range = dirty_vram_range_alloc(d, begin_pfn, nr)) )
> +            {
> +                paging_unlock(d);
> +                goto out;
> +            }
> +            
> +            paging_unlock(d);
> +            
> +            /* Set l1e entries of range within P2M table to be read-only. */
> +            p2m_change_type_range(d, begin_pfn, begin_pfn + nr,
> +                                  p2m_ram_rw, p2m_ram_logdirty);
> +            
> +            flush_tlb_mask(d->domain_dirty_cpumask);
> +            
> +            memset(dirty_bitmap, 0xff, size); /* Consider all pages dirty. */
> +        }
> +        else
> +        {
> +            paging_unlock(d);
> +            
> +            domain_pause(d);
> +            
> +            /* Get the bitmap. */
> +            paging_log_dirty_range(d, begin_pfn, nr, dirty_bitmap);
> +            
> +            domain_unpause(d);
> +        }
> +        
> +        
> +        rc = -EFAULT;
> +        if ( copy_to_guest(guest_dirty_bitmap,
> +                           dirty_bitmap,
> +                           size) == 0 )
> +        {
> +            rc = 0;
> +        }
> +    }
> +    else {
> +        paging_lock(d);
> +        
> +        dirty_vram = d->arch.hvm_domain.dirty_vram;
> +        if ( dirty_vram )
> +        {
> +            /*
> +             * If zero pages specified while tracking dirty vram
> +             * then stop tracking
> +             */
> +            dirty_vram_free(d);
> +        
> +        }
> +        
> +        paging_unlock(d);
> +    }
> +out:
> +    if ( dirty_bitmap )
> +        xfree(dirty_bitmap);
> +    
> +    return rc;
> +}
> +
> +/*
> + * Local variables:
> + * mode: C
> + * c-file-style: "BSD"
> + * c-basic-offset: 4
> + * indent-tabs-mode: nil
> + * End:
> + */
> diff --git a/xen/arch/x86/mm/hap/hap.c b/xen/arch/x86/mm/hap/hap.c
> index f353d3a..3ac54c9 100644
> --- a/xen/arch/x86/mm/hap/hap.c
> +++ b/xen/arch/x86/mm/hap/hap.c
> @@ -53,117 +53,6 @@
>  #define page_to_mfn(_pg) _mfn(__page_to_mfn(_pg))
>  
>  /************************************************/
> -/*          HAP VRAM TRACKING SUPPORT           */
> -/************************************************/
> -
> -/*
> - * hap_track_dirty_vram()
> - * Create the domain's dv_dirty_vram struct on demand.
> - * Create a dirty vram range on demand when some [begin_pfn:begin_pfn+nr] is
> - * first encountered.
> - * Collect the guest_dirty bitmask, a bit mask of the dirty vram pages, by
> - * calling paging_log_dirty_range(), which interrogates each vram
> - * page's p2m type looking for pages that have been made writable.
> - */
> -
> -int hap_track_dirty_vram(struct domain *d,
> -                         unsigned long begin_pfn,
> -                         unsigned long nr,
> -                         XEN_GUEST_HANDLE_64(uint8) guest_dirty_bitmap)
> -{
> -    long rc = 0;
> -    struct sh_dirty_vram *dirty_vram;
> -    uint8_t *dirty_bitmap = NULL;
> -
> -    if ( nr )
> -    {
> -        int size = (nr + BITS_PER_BYTE - 1) / BITS_PER_BYTE;
> -
> -        if ( !paging_mode_log_dirty(d) )
> -        {
> -            hap_logdirty_init(d);
> -            rc = paging_log_dirty_enable(d);
> -            if ( rc )
> -                goto out;
> -        }
> -
> -        rc = -ENOMEM;
> -        dirty_bitmap = xzalloc_bytes(size);
> -        if ( !dirty_bitmap )
> -            goto out;
> -
> -        paging_lock(d);
> -
> -        dirty_vram = d->arch.hvm_domain.dirty_vram;
> -        if ( !dirty_vram )
> -        {
> -            rc = -ENOMEM;
> -            if ( (dirty_vram = xzalloc(struct sh_dirty_vram)) == NULL )
> -            {
> -                paging_unlock(d);
> -                goto out;
> -            }
> -
> -            d->arch.hvm_domain.dirty_vram = dirty_vram;
> -        }
> -
> -        if ( begin_pfn != dirty_vram->begin_pfn ||
> -             begin_pfn + nr != dirty_vram->end_pfn )
> -        {
> -            dirty_vram->begin_pfn = begin_pfn;
> -            dirty_vram->end_pfn = begin_pfn + nr;
> -
> -            paging_unlock(d);
> -
> -            /* set l1e entries of range within P2M table to be read-only. */
> -            p2m_change_type_range(d, begin_pfn, begin_pfn + nr,
> -                                  p2m_ram_rw, p2m_ram_logdirty);
> -
> -            flush_tlb_mask(d->domain_dirty_cpumask);
> -
> -            memset(dirty_bitmap, 0xff, size); /* consider all pages dirty */
> -        }
> -        else
> -        {
> -            paging_unlock(d);
> -
> -            domain_pause(d);
> -
> -            /* get the bitmap */
> -            paging_log_dirty_range(d, begin_pfn, nr, dirty_bitmap);
> -
> -            domain_unpause(d);
> -        }
> -
> -        rc = -EFAULT;
> -        if ( copy_to_guest(guest_dirty_bitmap, dirty_bitmap, size) == 0 )
> -            rc = 0;
> -    }
> -    else
> -    {
> -        paging_lock(d);
> -
> -        dirty_vram = d->arch.hvm_domain.dirty_vram;
> -        if ( dirty_vram )
> -        {
> -            /*
> -             * If zero pages specified while tracking dirty vram
> -             * then stop tracking
> -             */
> -            xfree(dirty_vram);
> -            d->arch.hvm_domain.dirty_vram = NULL;
> -        }
> -
> -        paging_unlock(d);
> -    }
> -out:
> -    if ( dirty_bitmap )
> -        xfree(dirty_bitmap);
> -
> -    return rc;
> -}
> -
> -/************************************************/
>  /*            HAP LOG DIRTY SUPPORT             */
>  /************************************************/
>  
> diff --git a/xen/arch/x86/mm/p2m.c b/xen/arch/x86/mm/p2m.c
> index de1dd82..6f638a2 100644
> --- a/xen/arch/x86/mm/p2m.c
> +++ b/xen/arch/x86/mm/p2m.c
> @@ -741,20 +741,23 @@ void p2m_change_type_range(struct domain *d,
>      struct p2m_domain *p2m = p2m_get_hostp2m(d);
>  
>      BUG_ON(p2m_is_grant(ot) || p2m_is_grant(nt));
> -
>      p2m_lock(p2m);
> -    p2m->defer_nested_flush = 1;
>  
> +    p2m->defer_nested_flush = 1;
> +    
>      for ( gfn = start; gfn < end; gfn++ )
>      {
>          mfn = p2m->get_entry(p2m, gfn, &pt, &a, 0, NULL);
>          if ( pt == ot )
> -            set_p2m_entry(p2m, gfn, mfn, PAGE_ORDER_4K, nt, 
> p2m->default_access);
> +            set_p2m_entry(p2m, gfn, mfn, PAGE_ORDER_4K, nt,
> +                          p2m->default_access);
>      }
> -
> +    
>      p2m->defer_nested_flush = 0;
> +
>      if ( nestedhvm_enabled(d) )
>          p2m_flush_nestedp2m(d);
> +
>      p2m_unlock(p2m);
>  }
>  
> diff --git a/xen/arch/x86/mm/paging.c b/xen/arch/x86/mm/paging.c
> index ac9bb1a..d59e8d6 100644
> --- a/xen/arch/x86/mm/paging.c
> +++ b/xen/arch/x86/mm/paging.c
> @@ -27,6 +27,7 @@
>  #include <asm/p2m.h>
>  #include <asm/hap.h>
>  #include <asm/hvm/nestedhvm.h>
> +#include <asm/dirty_vram.h>
>  #include <xen/numa.h>
>  #include <xsm/xsm.h>
>  
> @@ -192,15 +193,11 @@ int paging_log_dirty_disable(struct domain *d)
>      return ret;
>  }
>  
> -/* Mark a page as dirty */
> +/* Given a guest mfn, mark a page as dirty */
>  void paging_mark_dirty(struct domain *d, unsigned long guest_mfn)
>  {
>      unsigned long pfn;
>      mfn_t gmfn;
> -    int changed;
> -    mfn_t mfn, *l4, *l3, *l2;
> -    unsigned long *l1;
> -    int i1, i2, i3, i4;
>  
>      gmfn = _mfn(guest_mfn);
>  
> @@ -210,6 +207,19 @@ void paging_mark_dirty(struct domain *d, unsigned long 
> guest_mfn)
>  
>      /* We /really/ mean PFN here, even for non-translated guests. */
>      pfn = get_gpfn_from_mfn(mfn_x(gmfn));
> +    paging_mark_dirty_gpfn(d, pfn);
> +}
> +
> +
> +/* Given a guest pfn, mark a page as dirty */
> +void paging_mark_dirty_gpfn(struct domain *d, unsigned long pfn)
> +{
> +    int changed;
> +    mfn_t mfn, *l4, *l3, *l2;
> +    unsigned long *l1;
> +    int i1, i2, i3, i4;
> +    dv_range_t *range;
> +    
>      /* Shared MFNs should NEVER be marked dirty */
>      BUG_ON(SHARED_M2P(pfn));
>  
> @@ -229,6 +239,11 @@ void paging_mark_dirty(struct domain *d, unsigned long 
> guest_mfn)
>      /* Recursive: this is called from inside the shadow code */
>      paging_lock_recursive(d);
>  
> +    d->arch.paging.log_dirty.dirty_count++;
> +    range = dirty_vram_range_find_gfn(d, pfn);
> +    if ( range )
> +        range->dirty_count++;
> +
>      if ( unlikely(!mfn_valid(d->arch.paging.log_dirty.top)) ) 
>      {
>           d->arch.paging.log_dirty.top = paging_new_log_dirty_node(d);
> @@ -445,7 +460,31 @@ void paging_log_dirty_range(struct domain *d,
>      struct p2m_domain *p2m = p2m_get_hostp2m(d);
>      int i;
>      unsigned long pfn;
> +    dv_range_t *range;
> +    unsigned int range_dirty_count;
>  
> +    paging_lock(d);
> +    range = dirty_vram_range_find_gfn(d, begin_pfn);
> +    if ( !range )
> +    {
> +        paging_unlock(d);
> +        goto out;
> +    }
> +    
> +    range_dirty_count = range->dirty_count;
> +    range->dirty_count = 0;
> +
> +    paging_unlock(d);
> +    
> +    if ( !range_dirty_count)
> +        goto out;
> +
> +    PAGING_DEBUG(LOGDIRTY,
> +                 "log-dirty-range: dom %u [%05lx:%05lx] range_dirty=%u\n",
> +                 d->domain_id,
> +                 begin_pfn,
> +                 begin_pfn + nr,
> +                 range_dirty_count);
>      /*
>       * Set l1e entries of P2M table to be read-only.
>       *
> @@ -460,15 +499,17 @@ void paging_log_dirty_range(struct domain *d,
>  
>      for ( i = 0, pfn = begin_pfn; pfn < begin_pfn + nr; i++, pfn++ )
>      {
> -        p2m_type_t pt;
> -        pt = p2m_change_type(d, pfn, p2m_ram_rw, p2m_ram_logdirty);
> -        if ( pt == p2m_ram_rw )
> +        if ( p2m_change_type(d, pfn, p2m_ram_rw, p2m_ram_logdirty) ==
> +             p2m_ram_rw )
>              dirty_bitmap[i >> 3] |= (1 << (i & 7));
>      }
>  
>      p2m_unlock(p2m);
>  
>      flush_tlb_mask(d->domain_dirty_cpumask);
> +
> + out:
> +    return;
>  }
>  
>  /* Note that this function takes three function pointers. Callers must supply
> diff --git a/xen/arch/x86/mm/shadow/common.c b/xen/arch/x86/mm/shadow/common.c
> index 292c1f7..b72a6bb 100644
> --- a/xen/arch/x86/mm/shadow/common.c
> +++ b/xen/arch/x86/mm/shadow/common.c
> @@ -36,6 +36,7 @@
>  #include <asm/current.h>
>  #include <asm/flushtlb.h>
>  #include <asm/shadow.h>
> +#include <asm/dirty_vram.h>
>  #include <xen/numa.h>
>  #include "private.h"
>  
> @@ -3121,12 +3122,7 @@ void shadow_teardown(struct domain *d)
>       * calls now that we've torn down the bitmap */
>      d->arch.paging.mode &= ~PG_log_dirty;
>  
> -    if (d->arch.hvm_domain.dirty_vram) {
> -        xfree(d->arch.hvm_domain.dirty_vram->sl1ma);
> -        xfree(d->arch.hvm_domain.dirty_vram->dirty_bitmap);
> -        xfree(d->arch.hvm_domain.dirty_vram);
> -        d->arch.hvm_domain.dirty_vram = NULL;
> -    }
> +    dirty_vram_free(d);
>  
>      paging_unlock(d);
>  
> @@ -3464,194 +3460,219 @@ void shadow_clean_dirty_bitmap(struct domain *d)
>  
>  /**************************************************************************/
>  /* VRAM dirty tracking support */
> -int shadow_track_dirty_vram(struct domain *d,
> -                            unsigned long begin_pfn,
> -                            unsigned long nr,
> -                            XEN_GUEST_HANDLE_64(uint8) dirty_bitmap)
> -{
> -    int rc;
> -    unsigned long end_pfn = begin_pfn + nr;
> -    unsigned long dirty_size = (nr + 7) / 8;
> -    int flush_tlb = 0;
> -    unsigned long i;
> -    p2m_type_t t;
> -    struct sh_dirty_vram *dirty_vram = d->arch.hvm_domain.dirty_vram;
> -    struct p2m_domain *p2m = p2m_get_hostp2m(d);
>  
> -    if (end_pfn < begin_pfn
> -            || begin_pfn > p2m->max_mapped_pfn
> -            || end_pfn >= p2m->max_mapped_pfn)
> -        return -EINVAL;
>  
> -    /* We perform p2m lookups, so lock the p2m upfront to avoid deadlock */
> -    p2m_lock(p2m_get_hostp2m(d));
> -    paging_lock(d);
> +/* Support functions for shadow-based dirty VRAM code */
>  
> -    if ( dirty_vram && (!nr ||
> -             ( begin_pfn != dirty_vram->begin_pfn
> -            || end_pfn   != dirty_vram->end_pfn )) )
> -    {
> -        /* Different tracking, tear the previous down. */
> -        gdprintk(XENLOG_INFO, "stopping tracking VRAM %lx - %lx\n", 
> dirty_vram->begin_pfn, dirty_vram->end_pfn);
> -        xfree(dirty_vram->sl1ma);
> -        xfree(dirty_vram->dirty_bitmap);
> -        xfree(dirty_vram);
> -        dirty_vram = d->arch.hvm_domain.dirty_vram = NULL;
> -    }
> +#define DEBUG_unshadow_sl1ma                  0          
> +#define DEBUG_unshadow_sl1ma_detail           0
> +#define DEBUG_count_initial_mappings          0
>  
> -    if ( !nr )
> +/* smfn is no longer a shadow page.  Remove it from any
> + * dirty vram range mapping. */
> +void
> +dirty_vram_delete_shadow(struct vcpu *v,
> +                         unsigned long gfn,
> +                         unsigned int shadow_type, 
> +                         mfn_t smfn)
> +{
> +    static unsigned int l1_shadow_mask = 
> +          1 << SH_type_l1_32_shadow
> +        | 1 << SH_type_fl1_32_shadow
> +        | 1 << SH_type_l1_pae_shadow
> +        | 1 << SH_type_fl1_pae_shadow
> +        | 1 << SH_type_l1_64_shadow
> +        | 1 << SH_type_fl1_64_shadow
> +        ;
> +    struct domain *d = v->domain;
> +    dv_dirty_vram_t *dirty_vram;
> +    struct list_head *curr, *next;
> +    dv_paddr_link_t *pl = NULL;
> +    paddr_t *ppl = NULL;
> +    
> +    ASSERT(paging_locked_by_me(d));
> +    /* Ignore all but level 1 shadows */
> +    
> +    if ( (l1_shadow_mask & (1 << shadow_type)) == 0 )
>      {
> -        rc = 0;
>          goto out;
>      }
>  
> -    /* This should happen seldomly (Video mode change),
> -     * no need to be careful. */
> +    dirty_vram = d->arch.hvm_domain.dirty_vram;
>      if ( !dirty_vram )
>      {
> -        /* Throw away all the shadows rather than walking through them 
> -         * up to nr times getting rid of mappings of each pfn */
> -        shadow_blow_tables(d);
> -
> -        gdprintk(XENLOG_INFO, "tracking VRAM %lx - %lx\n", begin_pfn, 
> end_pfn);
> -
> -        rc = -ENOMEM;
> -        if ( (dirty_vram = xmalloc(struct sh_dirty_vram)) == NULL )
> -            goto out;
> -        dirty_vram->begin_pfn = begin_pfn;
> -        dirty_vram->end_pfn = end_pfn;
> -        d->arch.hvm_domain.dirty_vram = dirty_vram;
> -
> -        if ( (dirty_vram->sl1ma = xmalloc_array(paddr_t, nr)) == NULL )
> -            goto out_dirty_vram;
> -        memset(dirty_vram->sl1ma, ~0, sizeof(paddr_t) * nr);
> -
> -        if ( (dirty_vram->dirty_bitmap = xzalloc_array(uint8_t, dirty_size)) 
> == NULL )
> -            goto out_sl1ma;
> -
> -        dirty_vram->last_dirty = NOW();
> -
> -        /* Tell the caller that this time we could not track dirty bits. */
> -        rc = -ENODATA;
> -    }
> -    else if (dirty_vram->last_dirty == -1)
> -    {
> -        /* still completely clean, just copy our empty bitmap */
> -        rc = -EFAULT;
> -        if ( copy_to_guest(dirty_bitmap, dirty_vram->dirty_bitmap, 
> dirty_size) == 0 )
> -            rc = 0;
> +        goto out;
>      }
> -    else
> +        
> +    list_for_each_safe(curr, next, &dirty_vram->range_head)
>      {
> -        unsigned long map_mfn = INVALID_MFN;
> -        void *map_sl1p = NULL;
> -
> -        /* Iterate over VRAM to track dirty bits. */
> -        for ( i = 0; i < nr; i++ ) {
> -            mfn_t mfn = get_gfn_query_unlocked(d, begin_pfn + i, &t);
> -            struct page_info *page;
> -            int dirty = 0;
> -            paddr_t sl1ma = dirty_vram->sl1ma[i];
> +        dv_range_t *range = list_entry(curr, dv_range_t, range_link);
> +        unsigned long i;
> +        int max_mappings = 1, mappings = 0;
> +        int unshadowed = 0;
> +        for ( i = 0; i != range->end_pfn - range->begin_pfn; i++ )
> +        {
> +            
> +            pl = remap_vaddr(pl, &range->pl_tab[ i ].mapping);
> +            /* clean up from previous iteration */
> +            ppl = remap_vaddr(ppl, NULL); /* unmap ppl */
>  
> -            if (mfn_x(mfn) == INVALID_MFN)
> -            {
> -                dirty = 1;
> -            }
> -            else
> +            mappings = 0;
> +            
> +            while ( pl != NULL )
>              {
> -                page = mfn_to_page(mfn);
> -                switch (page->u.inuse.type_info & PGT_count_mask)
> -                {
> -                case 0:
> -                    /* No guest reference, nothing to track. */
> -                    break;
> -                case 1:
> -                    /* One guest reference. */
> -                    if ( sl1ma == INVALID_PADDR )
> -                    {
> -                        /* We don't know which sl1e points to this, too bad. 
> */
> -                        dirty = 1;
> -                        /* TODO: Heuristics for finding the single mapping of
> -                         * this gmfn */
> -                        flush_tlb |= sh_remove_all_mappings(d->vcpu[0], mfn);
> -                    }
> -                    else
> -                    {
> -                        /* Hopefully the most common case: only one mapping,
> -                         * whose dirty bit we can use. */
> -                        l1_pgentry_t *sl1e;
> -                        unsigned long sl1mfn = paddr_to_pfn(sl1ma);
> -
> -                        if ( sl1mfn != map_mfn )
> -                        {
> -                            if ( map_sl1p )
> -                                sh_unmap_domain_page(map_sl1p);
> -                            map_sl1p = sh_map_domain_page(_mfn(sl1mfn));
> -                            map_mfn = sl1mfn;
> -                        }
> -                        sl1e = map_sl1p + (sl1ma & ~PAGE_MASK);
> -
> -                        if ( l1e_get_flags(*sl1e) & _PAGE_DIRTY )
> -                        {
> -                            dirty = 1;
> -                            /* Note: this is atomic, so we may clear a
> -                             * _PAGE_ACCESSED set by another processor. */
> -                            l1e_remove_flags(*sl1e, _PAGE_DIRTY);
> -                            flush_tlb = 1;
> -                        }
> -                    }
> -                    break;
> -                default:
> -                    /* More than one guest reference,
> -                     * we don't afford tracking that. */
> -                    dirty = 1;
> +                paddr_t sl1ma = pl->sl1ma;
> +                unsigned long sl1mn;
> +               
> +                if ( sl1ma == INVALID_PADDR )
>                      break;
> +                
> +                sl1mn = sl1ma >> PAGE_SHIFT;
> +                if ( sl1mn == mfn_x(smfn) ) {
> +#if DEBUG_unshadow_sl1ma_detail
> +                    gdprintk(XENLOG_DEBUG,
> +                             "[%lx] gfn[%lx] unshadow sl1ma:%lx\n",
> +                             mfn_x(smfn),
> +                             range->begin_pfn + i,
> +                             sl1ma);
> +#endif
> +                    unshadowed++;
> +                    pl = free_paddr_link(d, ppl, pl);
> +                    /* ppl remains unchanged. */
> +                }
> +                else
> +                {
> +                    ppl = remap_vaddr(ppl, &pl->pl_next);
> +                    pl =  remap_maddr(pl, (*ppl));
> +                    mappings++;
>                  }
> -            }
> -
> -            if ( dirty )
> -            {
> -                dirty_vram->dirty_bitmap[i / 8] |= 1 << (i % 8);
> -                dirty_vram->last_dirty = NOW();
>              }
>          }
> +        if ( mappings > max_mappings )
> +            max_mappings = mappings;
> +        
> +        if ( unshadowed ) {
> +#if DEBUG_unshadow_sl1ma
> +            gdprintk(XENLOG_DEBUG,
> +                     "[%lx] gfn[%05lx:%05lx] unshadowed:%d mappings:0x%x "
> +                     "max_mappings:%d\n",
> +                     mfn_x(smfn),
> +                     range->begin_pfn, range->end_pfn,
> +                     unshadowed, range->nr_mappings, max_mappings);
> +#endif
> +        }
> +    }
> + out:
> +    remap_vaddr(pl,  NULL); /* unmap pl */
> +    remap_vaddr(ppl, NULL); /* unmap ppl */
> +    return;
> +}
> +
>  
> -        if ( map_sl1p )
> -            sh_unmap_domain_page(map_sl1p);
> +typedef int (*hash_pfn_callback_t)(struct vcpu *v,
> +                                   mfn_t smfn,
> +                                   unsigned long begin_pfn,
> +                                   unsigned long end_pfn,
> +                                   int *removed);
>  
> -        rc = -EFAULT;
> -        if ( copy_to_guest(dirty_bitmap, dirty_vram->dirty_bitmap, 
> dirty_size) == 0 ) {
> -            memset(dirty_vram->dirty_bitmap, 0, dirty_size);
> -            if (dirty_vram->last_dirty + SECONDS(2) < NOW())
> +static int hash_pfn_foreach(struct vcpu *v, 
> +                            unsigned int callback_mask, 
> +                            hash_pfn_callback_t callbacks[], 
> +                            unsigned long begin_pfn,
> +                            unsigned long end_pfn)
> +/* Walk the hash table looking at the types of the entries and 
> + * calling the appropriate callback function for each entry. 
> + * The mask determines which shadow types we call back for, and the array
> + * of callbacks tells us which function to call.
> + * Any callback may return non-zero to let us skip the rest of the scan. 
> + *
> + * WARNING: Callbacks MUST NOT add or remove hash entries unless they 
> + * then return non-zero to terminate the scan. */
> +{
> +    int i, done = 0, removed = 0;
> +    struct domain *d = v->domain;
> +    struct page_info *x;
> +
> +    /* Say we're here, to stop hash-lookups reordering the chains */
> +    ASSERT(paging_locked_by_me(d));
> +    ASSERT(d->arch.paging.shadow.hash_walking == 0);
> +    d->arch.paging.shadow.hash_walking = 1;
> +
> +    for ( i = 0; i < SHADOW_HASH_BUCKETS; i++ ) 
> +    {
> +        /* WARNING: This is not safe against changes to the hash table.
> +         * The callback *must* return non-zero if it has inserted or
> +         * deleted anything from the hash (lookups are OK, though). */
> +        for ( x = d->arch.paging.shadow.hash_table[i];
> +              x;
> +              x = next_shadow(x) )
> +        {
> +            if ( callback_mask & (1 << x->u.sh.type) )
>              {
> -                /* was clean for more than two seconds, try to disable guest
> -                 * write access */
> -                for ( i = begin_pfn; i < end_pfn; i++ ) {
> -                    mfn_t mfn = get_gfn_query_unlocked(d, i, &t);
> -                    if (mfn_x(mfn) != INVALID_MFN)
> -                        flush_tlb |= sh_remove_write_access(d->vcpu[0], mfn, 
> 1, 0);
> -                }
> -                dirty_vram->last_dirty = -1;
> +                ASSERT(x->u.sh.type <= 15);
> +                ASSERT(callbacks[x->u.sh.type] != NULL);
> +                done = callbacks[x->u.sh.type](v, page_to_mfn(x), 
> +                                               begin_pfn, end_pfn,
> +                                               &removed);
> +                if ( done ) break;
>              }
> -            rc = 0;
>          }
> +        if ( done ) break; 
>      }
> -    if ( flush_tlb )
> -        flush_tlb_mask(d->domain_dirty_cpumask);
> -    goto out;
> +    d->arch.paging.shadow.hash_walking = 0;
> +    return removed;
> +}
> +
> +void sh_find_all_vram_mappings(struct vcpu *v,
> +                               dv_range_t *range)
> +{
> +    /* Dispatch table for getting per-type functions */
> +    static hash_pfn_callback_t callbacks[SH_type_unused] = {
> +        NULL, /* none    */
> +        SHADOW_INTERNAL_NAME(sh_find_vram_mappings_in_l1, 2), /* l1_32   */
> +        SHADOW_INTERNAL_NAME(sh_find_vram_mappings_in_l1, 2), /* fl1_32  */
> +        NULL, /* l2_32   */
> +        SHADOW_INTERNAL_NAME(sh_find_vram_mappings_in_l1, 3), /* l1_pae  */
> +        SHADOW_INTERNAL_NAME(sh_find_vram_mappings_in_l1, 3), /* fl1_pae */
> +        NULL, /* l2_pae  */
> +        NULL, /* l2h_pae */
> +#if CONFIG_PAGING_LEVELS >= 4
> +        SHADOW_INTERNAL_NAME(sh_find_vram_mappings_in_l1, 4), /* l1_64   */
> +        SHADOW_INTERNAL_NAME(sh_find_vram_mappings_in_l1, 4), /* fl1_64  */
> +#else
> +        NULL, /* l1_64   */
> +        NULL, /* fl1_64  */
> +#endif
> +        NULL, /* l2_64   */
> +        NULL, /* l2h_64  */
> +        NULL, /* l3_64   */
> +        NULL, /* l4_64   */
> +        NULL, /* p2m     */
> +        NULL  /* unused  */
> +    };
>  
> -out_sl1ma:
> -    xfree(dirty_vram->sl1ma);
> -out_dirty_vram:
> -    xfree(dirty_vram);
> -    dirty_vram = d->arch.hvm_domain.dirty_vram = NULL;
> +    static unsigned int callback_mask = 
> +          1 << SH_type_l1_32_shadow
> +        | 1 << SH_type_fl1_32_shadow
> +        | 1 << SH_type_l1_pae_shadow
> +        | 1 << SH_type_fl1_pae_shadow
> +        | 1 << SH_type_l1_64_shadow
> +        | 1 << SH_type_fl1_64_shadow
> +        ;
>  
> -out:
> -    paging_unlock(d);
> -    p2m_unlock(p2m_get_hostp2m(d));
> -    return rc;
> +    perfc_incr(shadow_mappings);
> +
> +    hash_pfn_foreach(v, callback_mask, callbacks,
> +                     range->begin_pfn,
> +                     range->end_pfn);
> +
> +#if DEBUG_count_initial_mappings
> +    gdprintk(XENLOG_DEBUG, "[%05lx:%05lx] count of initial\n",
> +             range->begin_pfn, range->end_pfn
> +#endif
>  }
>  
> +
>  /**************************************************************************/
>  /* Shadow-control XEN_DOMCTL dispatcher */
>  
> diff --git a/xen/arch/x86/mm/shadow/multi.c b/xen/arch/x86/mm/shadow/multi.c
> index b79cd6c..922e01a 100644
> --- a/xen/arch/x86/mm/shadow/multi.c
> +++ b/xen/arch/x86/mm/shadow/multi.c
> @@ -35,6 +35,7 @@
>  #include <asm/flushtlb.h>
>  #include <asm/hvm/hvm.h>
>  #include <asm/hvm/cacheattr.h>
> +#include <asm/dirty_vram.h>
>  #include <asm/mtrr.h>
>  #include <asm/guest_pt.h>
>  #include <public/sched.h>
> @@ -149,6 +150,10 @@ delete_fl1_shadow_status(struct vcpu *v, gfn_t gfn, 
> mfn_t smfn)
>      SHADOW_PRINTK("gfn=%"SH_PRI_gfn", type=%08x, smfn=%05lx\n",
>                     gfn_x(gfn), SH_type_fl1_shadow, mfn_x(smfn));
>      ASSERT(mfn_to_page(smfn)->u.sh.head);
> +
> +    /* Removing any dv_paddr_links to the erstwhile shadow page */
> +    dirty_vram_delete_shadow(v, gfn_x(gfn), SH_type_fl1_shadow, smfn);
> +    
>      shadow_hash_delete(v, gfn_x(gfn), SH_type_fl1_shadow, smfn);
>  }
>  
> @@ -160,6 +165,10 @@ delete_shadow_status(struct vcpu *v, mfn_t gmfn, u32 
> shadow_type, mfn_t smfn)
>                     v->domain->domain_id, v->vcpu_id,
>                     mfn_x(gmfn), shadow_type, mfn_x(smfn));
>      ASSERT(mfn_to_page(smfn)->u.sh.head);
> +    
> +    /* Removing any dv_paddr_links to the erstwhile shadow page */
> +    dirty_vram_delete_shadow(v, mfn_x(gmfn), shadow_type, smfn);
> +    
>      shadow_hash_delete(v, mfn_x(gmfn), shadow_type, smfn);
>      /* 32-on-64 PV guests don't own their l4 pages; see set_shadow_status */
>      if ( !is_pv_32on64_vcpu(v) || shadow_type != SH_type_l4_64_shadow )
> @@ -516,7 +525,6 @@ _sh_propagate(struct vcpu *v,
>      guest_l1e_t guest_entry = { guest_intpte };
>      shadow_l1e_t *sp = shadow_entry_ptr;
>      struct domain *d = v->domain;
> -    struct sh_dirty_vram *dirty_vram = d->arch.hvm_domain.dirty_vram;
>      gfn_t target_gfn = guest_l1e_get_gfn(guest_entry);
>      u32 pass_thru_flags;
>      u32 gflags, sflags;
> @@ -663,17 +671,6 @@ _sh_propagate(struct vcpu *v,
>          }
>      }
>  
> -    if ( unlikely((level == 1) && dirty_vram
> -            && dirty_vram->last_dirty == -1
> -            && gfn_x(target_gfn) >= dirty_vram->begin_pfn
> -            && gfn_x(target_gfn) < dirty_vram->end_pfn) )
> -    {
> -        if ( ft & FETCH_TYPE_WRITE )
> -            dirty_vram->last_dirty = NOW();
> -        else
> -            sflags &= ~_PAGE_RW;
> -    }
> -
>      /* Read-only memory */
>      if ( p2m_is_readonly(p2mt) ||
>           (p2mt == p2m_mmio_direct &&
> @@ -1072,101 +1069,60 @@ static int shadow_set_l2e(struct vcpu *v,
>      return flags;
>  }
>  
> -static inline void shadow_vram_get_l1e(shadow_l1e_t new_sl1e,
> +/* shadow_vram_fix_l1e()
> + *
> + * Tests L1PTEs as they are modified, looking for when they start to (or
> + * cease to) point to frame buffer pages.  If the old and new gfns differ,
> + * calls dirty_vram_range_update() to updates the dirty_vram structures.
> + */
> +static inline void shadow_vram_fix_l1e(shadow_l1e_t old_sl1e,
> +                                       shadow_l1e_t new_sl1e,
>                                         shadow_l1e_t *sl1e,
>                                         mfn_t sl1mfn,
>                                         struct domain *d)
>  { 
> -    mfn_t mfn = shadow_l1e_get_mfn(new_sl1e);
> -    int flags = shadow_l1e_get_flags(new_sl1e);
> -    unsigned long gfn;
> -    struct sh_dirty_vram *dirty_vram = d->arch.hvm_domain.dirty_vram;
> +    mfn_t new_mfn, old_mfn;
> +    unsigned long new_gfn = INVALID_M2P_ENTRY, old_gfn = INVALID_M2P_ENTRY;
> +    paddr_t sl1ma;
> +    dv_dirty_vram_t *dirty_vram = d->arch.hvm_domain.dirty_vram;
>  
> -    if ( !dirty_vram         /* tracking disabled? */
> -         || !(flags & _PAGE_RW) /* read-only mapping? */
> -         || !mfn_valid(mfn) )   /* mfn can be invalid in mmio_direct */
> +    if ( !dirty_vram )
>          return;
>  
> -    gfn = mfn_to_gfn(d, mfn);
> -    /* Page sharing not supported on shadow PTs */
> -    BUG_ON(SHARED_M2P(gfn));
> +    sl1ma = pfn_to_paddr(mfn_x(sl1mfn)) | ((unsigned long)sl1e & ~PAGE_MASK);
>  
> -    if ( (gfn >= dirty_vram->begin_pfn) && (gfn < dirty_vram->end_pfn) )
> +    old_mfn = shadow_l1e_get_mfn(old_sl1e);
> +
> +    if ( !sh_l1e_is_magic(old_sl1e) &&
> +         (l1e_get_flags(old_sl1e) & _PAGE_PRESENT) &&
> +         mfn_valid(old_mfn))
>      {
> -        unsigned long i = gfn - dirty_vram->begin_pfn;
> -        struct page_info *page = mfn_to_page(mfn);
> -        
> -        if ( (page->u.inuse.type_info & PGT_count_mask) == 1 )
> -            /* Initial guest reference, record it */
> -            dirty_vram->sl1ma[i] = pfn_to_paddr(mfn_x(sl1mfn))
> -                | ((unsigned long)sl1e & ~PAGE_MASK);
> +        old_gfn = mfn_to_gfn(d, old_mfn);
>      }
> -}
> -
> -static inline void shadow_vram_put_l1e(shadow_l1e_t old_sl1e,
> -                                       shadow_l1e_t *sl1e,
> -                                       mfn_t sl1mfn,
> -                                       struct domain *d)
> -{
> -    mfn_t mfn = shadow_l1e_get_mfn(old_sl1e);
> -    int flags = shadow_l1e_get_flags(old_sl1e);
> -    unsigned long gfn;
> -    struct sh_dirty_vram *dirty_vram = d->arch.hvm_domain.dirty_vram;
> -
> -    if ( !dirty_vram         /* tracking disabled? */
> -         || !(flags & _PAGE_RW) /* read-only mapping? */
> -         || !mfn_valid(mfn) )   /* mfn can be invalid in mmio_direct */
> -        return;
> -
> -    gfn = mfn_to_gfn(d, mfn);
> -    /* Page sharing not supported on shadow PTs */
> -    BUG_ON(SHARED_M2P(gfn));
> -
> -    if ( (gfn >= dirty_vram->begin_pfn) && (gfn < dirty_vram->end_pfn) )
> +    
> +    new_mfn = shadow_l1e_get_mfn(new_sl1e);
> +    if ( !sh_l1e_is_magic(new_sl1e) &&
> +         (l1e_get_flags(new_sl1e) & _PAGE_PRESENT) &&
> +         mfn_valid(new_mfn))
>      {
> -        unsigned long i = gfn - dirty_vram->begin_pfn;
> -        struct page_info *page = mfn_to_page(mfn);
> -        int dirty = 0;
> -        paddr_t sl1ma = pfn_to_paddr(mfn_x(sl1mfn))
> -            | ((unsigned long)sl1e & ~PAGE_MASK);
> +        new_gfn = mfn_to_gfn(d, new_mfn);
> +    }
>  
> -        if ( (page->u.inuse.type_info & PGT_count_mask) == 1 )
> -        {
> -            /* Last reference */
> -            if ( dirty_vram->sl1ma[i] == INVALID_PADDR ) {
> -                /* We didn't know it was that one, let's say it is dirty */
> -                dirty = 1;
> -            }
> -            else
> -            {
> -                ASSERT(dirty_vram->sl1ma[i] == sl1ma);
> -                dirty_vram->sl1ma[i] = INVALID_PADDR;
> -                if ( flags & _PAGE_DIRTY )
> -                    dirty = 1;
> -            }
> -        }
> -        else
> +    if ( old_gfn == new_gfn ) return;
> +
> +    if ( VALID_M2P(old_gfn) )
> +        if ( dirty_vram_range_update(d, old_gfn, sl1ma, 0/*clear*/) )
>          {
> -            /* We had more than one reference, just consider the page dirty. 
> */
> -            dirty = 1;
> -            /* Check that it's not the one we recorded. */
> -            if ( dirty_vram->sl1ma[i] == sl1ma )
> -            {
> -                /* Too bad, we remembered the wrong one... */
> -                dirty_vram->sl1ma[i] = INVALID_PADDR;
> -            }
> -            else
> -            {
> -                /* Ok, our recorded sl1e is still pointing to this page, 
> let's
> -                 * just hope it will remain. */
> -            }
> +            SHADOW_PRINTK("gfn %lx (mfn %lx) cleared vram pte\n",
> +                          old_gfn, mfn_x(old_mfn));
>          }
> -        if ( dirty )
> +
> +    if ( VALID_M2P(new_gfn) )
> +        if ( dirty_vram_range_update(d, new_gfn, sl1ma, 1/*set*/) )
>          {
> -            dirty_vram->dirty_bitmap[i / 8] |= 1 << (i % 8);
> -            dirty_vram->last_dirty = NOW();
> +            SHADOW_PRINTK("gfn %lx (mfn %lx) set vram pte\n",
> +                          new_gfn, mfn_x(new_mfn));
>          }
> -    }
>  }
>  
>  static int shadow_set_l1e(struct vcpu *v, 
> @@ -1211,12 +1167,13 @@ static int shadow_set_l1e(struct vcpu *v,
>                  shadow_l1e_remove_flags(new_sl1e, _PAGE_RW);
>                  /* fall through */
>              case 0:
> -                shadow_vram_get_l1e(new_sl1e, sl1e, sl1mfn, d);
>                  break;
>              }
>          }
>      } 
>  
> +    shadow_vram_fix_l1e(old_sl1e, new_sl1e, sl1e, sl1mfn, d);
> +
>      /* Write the new entry */
>      shadow_write_entries(sl1e, &new_sl1e, 1, sl1mfn);
>      flags |= SHADOW_SET_CHANGED;
> @@ -1231,7 +1188,6 @@ static int shadow_set_l1e(struct vcpu *v,
>           * trigger a flush later. */
>          if ( shadow_mode_refcounts(d) ) 
>          {
> -            shadow_vram_put_l1e(old_sl1e, sl1e, sl1mfn, d);
>              shadow_put_page_from_l1e(old_sl1e, d);
>              TRACE_SHADOW_PATH_FLAG(TRCE_SFLAG_SHADOW_L1_PUT_REF);
>          } 
> @@ -2018,7 +1974,6 @@ void sh_destroy_l1_shadow(struct vcpu *v, mfn_t smfn)
>          SHADOW_FOREACH_L1E(sl1mfn, sl1e, 0, 0, {
>              if ( (shadow_l1e_get_flags(*sl1e) & _PAGE_PRESENT)
>                   && !sh_l1e_is_magic(*sl1e) ) {
> -                shadow_vram_put_l1e(*sl1e, sl1e, sl1mfn, d);
>                  shadow_put_page_from_l1e(*sl1e, d);
>              }
>          });
> @@ -4336,6 +4291,37 @@ int sh_rm_mappings_from_l1(struct vcpu *v, mfn_t 
> sl1mfn, mfn_t target_mfn)
>      return done;
>  }
>  
> +
> +int sh_find_vram_mappings_in_l1(struct vcpu *v,
> +                                mfn_t sl1mfn,
> +                                unsigned long begin_pfn,
> +                                unsigned long end_pfn,
> +                                int *removed)
> +/* Find all VRAM mappings in this shadow l1 table */
> +{
> +    struct domain *d = v->domain;
> +    shadow_l1e_t *sl1e;
> +    int done = 0;
> +
> +    /* only returns _PAGE_PRESENT entries */
> +    SHADOW_FOREACH_L1E(sl1mfn, sl1e, 0, done, 
> +    {
> +        unsigned long gfn;
> +        mfn_t gmfn = shadow_l1e_get_mfn(*sl1e);
> +        if ( !mfn_valid(gmfn) )
> +            continue;
> +        gfn = mfn_to_gfn(d, gmfn);
> +        if ( VALID_M2P(gfn) && (begin_pfn <= gfn) && (gfn < end_pfn) ) 
> +        {
> +            paddr_t sl1ma =
> +                pfn_to_paddr(mfn_x(sl1mfn)) |
> +                ( (unsigned long)sl1e & ~PAGE_MASK );
> +            dirty_vram_range_update(v->domain, gfn, sl1ma, 1/*set*/);
> +        }
> +    });
> +    return 0;
> +}
> +
>  /**************************************************************************/
>  /* Functions to excise all pointers to shadows from higher-level shadows. */
>  
> diff --git a/xen/arch/x86/mm/shadow/multi.h b/xen/arch/x86/mm/shadow/multi.h
> index 835121e..436a4ac 100644
> --- a/xen/arch/x86/mm/shadow/multi.h
> +++ b/xen/arch/x86/mm/shadow/multi.h
> @@ -66,7 +66,12 @@ SHADOW_INTERNAL_NAME(sh_rm_write_access_from_l1, 
> GUEST_LEVELS)
>  extern int
>  SHADOW_INTERNAL_NAME(sh_rm_mappings_from_l1, GUEST_LEVELS)
>      (struct vcpu *v, mfn_t sl1mfn, mfn_t target_mfn);
> -
> +extern int
> +SHADOW_INTERNAL_NAME(sh_find_vram_mappings_in_l1, GUEST_LEVELS)
> +     (struct vcpu *v, mfn_t sl1mfn, 
> +      unsigned long begin_pfn,
> +      unsigned long end_pfn,
> +      int *removed);
>  extern void
>  SHADOW_INTERNAL_NAME(sh_clear_shadow_entry, GUEST_LEVELS)
>      (struct vcpu *v, void *ep, mfn_t smfn);
> diff --git a/xen/arch/x86/mm/shadow/types.h b/xen/arch/x86/mm/shadow/types.h
> index 43ce1db..5b0f9f7 100644
> --- a/xen/arch/x86/mm/shadow/types.h
> +++ b/xen/arch/x86/mm/shadow/types.h
> @@ -229,6 +229,7 @@ static inline shadow_l4e_t shadow_l4e_from_mfn(mfn_t mfn, 
> u32 flags)
>  #define sh_update_cr3              INTERNAL_NAME(sh_update_cr3)
>  #define sh_rm_write_access_from_l1 INTERNAL_NAME(sh_rm_write_access_from_l1)
>  #define sh_rm_mappings_from_l1     INTERNAL_NAME(sh_rm_mappings_from_l1)
> +#define sh_find_vram_mappings_in_l1 
> INTERNAL_NAME(sh_find_vram_mappings_in_l1)
>  #define sh_remove_l1_shadow        INTERNAL_NAME(sh_remove_l1_shadow)
>  #define sh_remove_l2_shadow        INTERNAL_NAME(sh_remove_l2_shadow)
>  #define sh_remove_l3_shadow        INTERNAL_NAME(sh_remove_l3_shadow)
> diff --git a/xen/include/asm-x86/dirty_vram.h 
> b/xen/include/asm-x86/dirty_vram.h
> new file mode 100644
> index 0000000..727af05
> --- /dev/null
> +++ b/xen/include/asm-x86/dirty_vram.h
> @@ -0,0 +1,227 @@
> +/****************************************************************************
> + * include/asm-x86/dirty_vram.h
> + *
> + * Interface for tracking dirty VRAM pages
> + *
> + * Copyright (c) 2012 Citrix Systems, Inc. (Robert Phillips)
> + * Parts of this code are Copyright (c) 2007 Advanced Micro Devices (Wei 
> Huang)
> + * Parts of this code are Copyright (c) 2006 by XenSource Inc.
> + * Parts of this code are Copyright (c) 2006 by Michael A Fetterman
> + * Parts based on earlier work by Michael A Fetterman, Ian Pratt et al.
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License as published by
> + * the Free Software Foundation; either version 2 of the License, or
> + * (at your option) any later version.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License
> + * along with this program; if not, write to the Free Software
> + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
> + */
> +
> +#ifndef _DIRTY_VRAM_H
> +#define _DIRTY_VRAM_H
> +
> +/*
> + * In shadow mode we need to bookkeep all the L1 page table entries that
> + * map a frame buffer page.  Struct dv_paddr_link does this by
> + * recording the address of a L1 page table entry for some frame buffer page.
> + * Also has a link to additional pl entries if the frame buffer page
> + * has multiple mappings.
> + * In practice very few pages have multiple mappings.
> + * But to rule out some pathological situation, we limit the number of
> + * mappings we're willing to bookkeep.
> + */
> +
> +#define DV_ADDR_LINK_LIST_LIMIT 64
> +
> +typedef struct dv_paddr_link {
> +    paddr_t sl1ma;
> +    paddr_t pl_next;
> +} dv_paddr_link_t;
> +
> +typedef struct dv_pl_entry {
> +    dv_paddr_link_t mapping;
> +    bool_t stuck_dirty;
> +} dv_pl_entry_t;
> +
> +/*
> + * This defines an extension page of pl entries for FB pages with multiple
> + * mappings. All such pages (of a domain) are linked together.
> + */
> +typedef struct dv_paddr_link_ext {
> +    paddr_t ext_link;
> +    dv_paddr_link_t entries[ ( PAGE_SIZE - sizeof( paddr_t ) ) /
> +                             sizeof( dv_paddr_link_t ) ];
> +} dv_paddr_link_ext_t;
> +
> +/*
> + * This defines a single frame buffer range.  It bookkeeps all the
> + * level 1 PTEs that map guest pages within that range.
> + * All such ranges (of a domain) are linked together.
> + */
> +typedef struct dv_range {
> +    struct list_head range_link; /* the several ranges form a linked list */
> +    unsigned long begin_pfn;
> +    unsigned long end_pfn;
> +    dv_pl_entry_t *pl_tab; /* table has 1 pl entry per pfn in range */
> +    int mappings_hwm; /* high water mark of max mapping count */
> +    unsigned int dirty_count;
> +} dv_range_t;
> +
> +/*
> + * This contains all the data structures required by a domain to
> + * bookkeep the dirty pages within its frame buffers.
> + */
> +typedef struct dv_dirty_vram {
> +    struct list_head range_head; /* head of the linked list of ranges */
> +    paddr_t ext_head;/* head of list of extension pages */
> +    paddr_t pl_free; /* free list of pl's within extension pages */
> +    int nr_ranges;   /* bookkeeps number of ranges */
> +    int ranges_hwm;  /* high water mark of max number of ranges */
> +} dv_dirty_vram_t;
> +
> +/* Allocates domain's dirty_vram structure */
> +dv_dirty_vram_t *
> +dirty_vram_alloc(struct domain *d);
> +
> +/*
> + * Returns domain's dirty_vram structure,
> + * allocating it if necessary
> + */
> +dv_dirty_vram_t *
> +dirty_vram_find_or_alloc(struct domain *d);
> +
> +/* Frees domain's dirty_vram structure */
> +void dirty_vram_free(struct domain *d);
> +
> +/* Returns dirty vram range containing gfn, NULL if none */
> +struct dv_range *
> +dirty_vram_range_find_gfn(struct domain *d,
> +                          unsigned long gfn);
> +
> +/*
> + * Returns dirty vram range matching [ begin_pfn .. begin_pfn+nr ),
> + * NULL if none
> + */
> +dv_range_t *
> +dirty_vram_range_find(struct domain *d,
> +                      unsigned long begin_pfn,
> +                      unsigned long nr);
> +
> +/*
> + * Allocate dirty vram range containing [ begin_pfn .. begin_pfn+nr ),
> + * freeing any existing range that overlaps the new range.
> + */
> +dv_range_t *
> +dirty_vram_range_alloc(struct domain *d,
> +                       unsigned long begin_pfn,
> +                       unsigned long nr);
> +
> +/*
> + * Returns dirty vram range matching [ begin_pfn .. begin_pfn+nr ),
> + * creating a range if none already exists and
> + * freeing any existing range that overlaps the new range.
> + */
> +dv_range_t *
> +dirty_vram_range_find_or_alloc(struct domain *d,
> +                               unsigned long begin_pfn,
> +                               unsigned long nr);
> +
> +void dirty_vram_range_free(struct domain *d,
> +                           dv_range_t *range);
> +
> +/* Bookkeep PTE address of a frame buffer page */
> +int dirty_vram_range_update(struct domain *d,
> +                            unsigned long gfn,
> +                            paddr_t sl1ma,
> +                            int set);
> +
> +/*
> + * smfn is no longer a shadow page.  Remove it from any
> + * dirty vram range mapping.
> + */
> +void
> +dirty_vram_delete_shadow(struct vcpu *v,
> +                         unsigned long gfn,
> +                         unsigned int shadow_type,
> +                         mfn_t smfn);
> +
> +
> +/*
> + * Scan all the L1 tables looking for VRAM mappings.
> + * Record them in the domain's dv_dirty_vram structure
> + */
> +void sh_find_all_vram_mappings(struct vcpu *v,
> +                               dv_range_t *range);
> +
> +/*
> + * Free a paddr_link struct, given address of its
> + * predecessor in singly-linked list
> + */
> +dv_paddr_link_t *
> +free_paddr_link(struct domain *d,
> +                paddr_t* ppl,
> +                dv_paddr_link_t *pl);
> +
> +
> +/* Enable VRAM dirty tracking. */
> +int
> +shadow_track_dirty_vram(struct domain *d,
> +                        unsigned long first_pfn,
> +                        unsigned long nr,
> +                        XEN_GUEST_HANDLE_64(uint8) dirty_bitmap);
> +
> +int
> +hap_track_dirty_vram(struct domain *d,
> +                     unsigned long begin_pfn,
> +                     unsigned long nr,
> +                     XEN_GUEST_HANDLE_64(uint8) dirty_bitmap);
> +
> +void
> +hap_clean_vram_tracking_range(struct domain *d,
> +                              unsigned long begin_pfn,
> +                              unsigned long nr,
> +                              uint8_t *dirty_bitmap);
> +
> +/* Unmap a va and map a ma.
> + * This is used when walking a linked list in which
> + * the links are stored as ma's rather than va's.
> + * Each step in the walk unmaps the va returned
> + * by the previous iteration and maps the next link.
> + *
> + * Unmaps @old_va if not NULL.
> + *
> + * If @new_va is not INVALID_PADDR, maps it and returns the resulting
> + * va (which must be subsequently unmapped.) Else returns NULL.
> + */
> +void *
> +remap_maddr(void *old_va, paddr_t new_ma);
> +
> +
> +/* Like the above but it acquires a mapping
> + * (possibly an additional mapping) on @new_va.
> + *
> + * Returns @new_va.
> + *
> + * Does no unmapping if @old_va is NULL.
> + * Does no mapping if @new_va is NULL.
> + */
> +void *
> +remap_vaddr(void *old_va, void *new_va);
> +
> +#endif /* _DIRTY_VRAM_H */
> +
> +/*
> + * Local variables:
> + * mode: C
> + * c-file-style: "BSD"
> + * c-basic-offset: 4
> + * indent-tabs-mode: nil
> + * End:
> + */
> diff --git a/xen/include/asm-x86/hap.h b/xen/include/asm-x86/hap.h
> index 916a35b..3e3a1f5 100644
> --- a/xen/include/asm-x86/hap.h
> +++ b/xen/include/asm-x86/hap.h
> @@ -57,10 +57,6 @@ void  hap_final_teardown(struct domain *d);
>  void  hap_teardown(struct domain *d);
>  void  hap_vcpu_init(struct vcpu *v);
>  void  hap_logdirty_init(struct domain *d);
> -int   hap_track_dirty_vram(struct domain *d,
> -                           unsigned long begin_pfn,
> -                           unsigned long nr,
> -                           XEN_GUEST_HANDLE_64(uint8) dirty_bitmap);
>  
>  extern const struct paging_mode *hap_paging_get_mode(struct vcpu *);
>  
> diff --git a/xen/include/asm-x86/hvm/domain.h 
> b/xen/include/asm-x86/hvm/domain.h
> index 27b3de5..0cc7b05 100644
> --- a/xen/include/asm-x86/hvm/domain.h
> +++ b/xen/include/asm-x86/hvm/domain.h
> @@ -74,7 +74,7 @@ struct hvm_domain {
>      struct list_head       pinned_cacheattr_ranges;
>  
>      /* VRAM dirty support. */
> -    struct sh_dirty_vram *dirty_vram;
> +    struct dv_dirty_vram  *dirty_vram;
>  
>      /* If one of vcpus of this domain is in no_fill_mode or
>       * mtrr/pat between vcpus is not the same, set is_in_uc_mode
> diff --git a/xen/include/asm-x86/paging.h b/xen/include/asm-x86/paging.h
> index c3a8848..e22df38 100644
> --- a/xen/include/asm-x86/paging.h
> +++ b/xen/include/asm-x86/paging.h
> @@ -154,9 +154,13 @@ void paging_log_dirty_init(struct domain *d,
>                             int  (*disable_log_dirty)(struct domain *d),
>                             void (*clean_dirty_bitmap)(struct domain *d));
>  
> -/* mark a page as dirty */
> +/* mark a gmfn as dirty, a wrapper around marking a gpfn as dirty */
>  void paging_mark_dirty(struct domain *d, unsigned long guest_mfn);
>  
> +/* mark a gpfn as dirty */
> +void paging_mark_dirty_gpfn(struct domain *d, unsigned long gpfn);
> +
> +
>  /* is this guest page dirty? 
>   * This is called from inside paging code, with the paging lock held. */
>  int paging_mfn_is_dirty(struct domain *d, mfn_t gmfn);
> @@ -183,15 +187,6 @@ int paging_mfn_is_dirty(struct domain *d, mfn_t gmfn);
>  #define L4_LOGDIRTY_IDX(pfn) 0
>  #endif
>  
> -/* VRAM dirty tracking support */
> -struct sh_dirty_vram {
> -    unsigned long begin_pfn;
> -    unsigned long end_pfn;
> -    paddr_t *sl1ma;
> -    uint8_t *dirty_bitmap;
> -    s_time_t last_dirty;
> -};
> -
>  
> /*****************************************************************************
>   * Entry points into the paging-assistance code */
>  
> diff --git a/xen/include/asm-x86/shadow.h b/xen/include/asm-x86/shadow.h
> index 2eb6efc..940d7fd 100644
> --- a/xen/include/asm-x86/shadow.h
> +++ b/xen/include/asm-x86/shadow.h
> @@ -62,12 +62,6 @@ void shadow_vcpu_init(struct vcpu *v);
>  /* Enable an arbitrary shadow mode.  Call once at domain creation. */
>  int shadow_enable(struct domain *d, u32 mode);
>  
> -/* Enable VRAM dirty bit tracking. */
> -int shadow_track_dirty_vram(struct domain *d,
> -                            unsigned long first_pfn,
> -                            unsigned long nr,
> -                            XEN_GUEST_HANDLE_64(uint8) dirty_bitmap);
> -
>  /* Handler for shadow control ops: operations from user-space to enable
>   * and disable ephemeral shadow modes (test mode and log-dirty mode) and
>   * manipulate the log-dirty bitmap. */
> -- 
> 1.7.9.5
> 
> 
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@xxxxxxxxxxxxx
> http://lists.xen.org/xen-devel

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.