[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [PATCH v4 3/5] xen/arm: clean and invalidate all guest caches by VMID after domain build.



On Fri, 7 Feb 2014, Ian Campbell wrote:
> Guests are initially started with caches disabled and so we need to make sure
> they see consistent data in RAM (requiring a cache clean) but also that they
> do not have old stale data suddenly appear in the caches when they enable
> their caches (requiring the invalidate).
> 
> This can be split into two halves. First we must flush each page as it is
> allocated to the guest. It is not sufficient to do the flush at scrub time
> since this will miss pages which are ballooned out by the guest (where the
> guest must scrub if it cares about not leaking the pagecontent). We need to
> clean as well as invalidate to make sure that any scrubbing which has occured
> gets committed to real RAM. To achieve this add a new cacheflush_page 
> function,
> which is a stub on x86.
> 
> Secondly we need to flush anything which the domain builder touches, which we
> do via a new domctl.
> 
> Signed-off-by: Ian Campbell <ian.campbell@xxxxxxxxxx>
> Acked-by: Ian Jackson <ian.jackson@xxxxxxxxxxxxx>
> Cc: jbeulich@xxxxxxxx
> Cc: keir@xxxxxxx
> Cc: ian.jackson@xxxxxxxxxxxxx
> --
> v4: introduce a function to clean and invalidate as intended
> 
>     make the domctl take a length not an end.
> 
> v3:
>     s/cacheflush_page/sync_page_to_ram/
> 
>     xc interface takes a length instead of an end
> 
>     make the domctl range inclusive.
> 
>     make xc interface internal -- it isn't needed from libxl in the current
>     design and it is easier to expose an interface in the future than to hide
>     it.
> 
> v2:
>    Switch to cleaning at page allocation time + explicit flushing of the
>    regions which the toolstack touches.
> 
>    Add XSM for new domctl.
> 
>    New domctl restricts the amount of space it is willing to flush, to avoid
>    thinking about preemption.
> ---
>  tools/libxc/xc_dom_boot.c           |    4 ++++
>  tools/libxc/xc_dom_core.c           |    2 ++
>  tools/libxc/xc_domain.c             |   10 ++++++++++
>  tools/libxc/xc_private.c            |    2 ++
>  tools/libxc/xc_private.h            |    3 +++
>  xen/arch/arm/domctl.c               |   14 ++++++++++++++
>  xen/arch/arm/mm.c                   |   12 ++++++++++++
>  xen/arch/arm/p2m.c                  |   25 +++++++++++++++++++++++++
>  xen/common/page_alloc.c             |    5 +++++
>  xen/include/asm-arm/arm32/page.h    |    4 ++++
>  xen/include/asm-arm/arm64/page.h    |    4 ++++
>  xen/include/asm-arm/p2m.h           |    3 +++
>  xen/include/asm-arm/page.h          |    3 +++
>  xen/include/asm-x86/page.h          |    3 +++
>  xen/include/public/domctl.h         |   13 +++++++++++++
>  xen/xsm/flask/hooks.c               |    3 +++
>  xen/xsm/flask/policy/access_vectors |    2 ++
>  17 files changed, 112 insertions(+)
> 
> diff --git a/tools/libxc/xc_dom_boot.c b/tools/libxc/xc_dom_boot.c
> index 5a9cfc6..3d4d107 100644
> --- a/tools/libxc/xc_dom_boot.c
> +++ b/tools/libxc/xc_dom_boot.c
> @@ -351,6 +351,10 @@ int xc_dom_gnttab_seed(xc_interface *xch, domid_t domid,
>          return -1;
>      }
>  
> +    /* Guest shouldn't really touch its grant table until it has
> +     * enabled its caches. But lets be nice. */
> +    xc_domain_cacheflush(xch, domid, gnttab_gmfn, 1);
> +
>      return 0;
>  }
>  
> diff --git a/tools/libxc/xc_dom_core.c b/tools/libxc/xc_dom_core.c
> index 77a4e64..b9d1015 100644
> --- a/tools/libxc/xc_dom_core.c
> +++ b/tools/libxc/xc_dom_core.c
> @@ -603,6 +603,8 @@ void xc_dom_unmap_one(struct xc_dom_image *dom, xen_pfn_t 
> pfn)
>          prev->next = phys->next;
>      else
>          dom->phys_pages = phys->next;
> +
> +    xc_domain_cacheflush(dom->xch, dom->guest_domid, phys->first, 
> phys->count);

I am not sure I understand the semantic of xc_dom_unmap_one: from the
name of the function and the parameters I would think that it is
supposed to unmap just one page. In that case we should flush just one
page. However the implementation calls

munmap(phys->ptr, phys->count << page_shift)

so it actually can unmap more than a single page, so I think that you
did the right thing by calling xc_domain_cacheflush with phys->first and
phys->count.


>  }
>  
>  void xc_dom_unmap_all(struct xc_dom_image *dom)
> diff --git a/tools/libxc/xc_domain.c b/tools/libxc/xc_domain.c
> index c2fdd74..f10ec01 100644
> --- a/tools/libxc/xc_domain.c
> +++ b/tools/libxc/xc_domain.c
> @@ -48,6 +48,16 @@ int xc_domain_create(xc_interface *xch,
>      return 0;
>  }
>  
> +int xc_domain_cacheflush(xc_interface *xch, uint32_t domid,
> +                         xen_pfn_t start_pfn, xen_pfn_t nr_pfns)
> +{
> +    DECLARE_DOMCTL;
> +    domctl.cmd = XEN_DOMCTL_cacheflush;
> +    domctl.domain = (domid_t)domid;
> +    domctl.u.cacheflush.start_pfn = start_pfn;
> +    domctl.u.cacheflush.nr_pfns = nr_pfns;
> +    return do_domctl(xch, &domctl);
> +}
>  
>  int xc_domain_pause(xc_interface *xch,
>                      uint32_t domid)
> diff --git a/tools/libxc/xc_private.c b/tools/libxc/xc_private.c
> index 838fd21..33ed15b 100644
> --- a/tools/libxc/xc_private.c
> +++ b/tools/libxc/xc_private.c
> @@ -628,6 +628,7 @@ int xc_copy_to_domain_page(xc_interface *xch,
>          return -1;
>      memcpy(vaddr, src_page, PAGE_SIZE);
>      munmap(vaddr, PAGE_SIZE);
> +    xc_domain_cacheflush(xch, domid, dst_pfn, 1);
>      return 0;
>  }
>  
> @@ -641,6 +642,7 @@ int xc_clear_domain_page(xc_interface *xch,
>          return -1;
>      memset(vaddr, 0, PAGE_SIZE);
>      munmap(vaddr, PAGE_SIZE);
> +    xc_domain_cacheflush(xch, domid, dst_pfn, 1);
>      return 0;
>  }
>  
> diff --git a/tools/libxc/xc_private.h b/tools/libxc/xc_private.h
> index 92271c9..a610f0c 100644
> --- a/tools/libxc/xc_private.h
> +++ b/tools/libxc/xc_private.h
> @@ -304,6 +304,9 @@ void bitmap_byte_to_64(uint64_t *lp, const uint8_t *bp, 
> int nbits);
>  /* Optionally flush file to disk and discard page cache */
>  void discard_file_cache(xc_interface *xch, int fd, int flush);
>  
> +int xc_domain_cacheflush(xc_interface *xch, uint32_t domid,
> +                      xen_pfn_t start_pfn, xen_pfn_t nr_pfns);
> +
>  #define MAX_MMU_UPDATES 1024
>  struct xc_mmu {
>      mmu_update_t updates[MAX_MMU_UPDATES];
> diff --git a/xen/arch/arm/domctl.c b/xen/arch/arm/domctl.c
> index 546e86b..ffb68da 100644
> --- a/xen/arch/arm/domctl.c
> +++ b/xen/arch/arm/domctl.c
> @@ -17,6 +17,20 @@ long arch_do_domctl(struct xen_domctl *domctl, struct 
> domain *d,
>  {
>      switch ( domctl->cmd )
>      {
> +    case XEN_DOMCTL_cacheflush:
> +    {
> +        unsigned long s = domctl->u.cacheflush.start_pfn;
> +        unsigned long e = s + domctl->u.cacheflush.nr_pfns;
> +
> +        if ( e < s )
> +            return -EINVAL;
> +
> +        if ( get_order_from_pages(domctl->u.cacheflush.nr_pfns) > MAX_ORDER )
> +            return -EINVAL;
> +
> +        return p2m_cache_flush(d, s, e);
> +    }
> +
>      default:
>          return subarch_do_domctl(domctl, d, u_domctl);
>      }
> diff --git a/xen/arch/arm/mm.c b/xen/arch/arm/mm.c
> index 2f48347..d2cfe64 100644
> --- a/xen/arch/arm/mm.c
> +++ b/xen/arch/arm/mm.c
> @@ -338,6 +338,18 @@ unsigned long domain_page_map_to_mfn(const void *va)
>  }
>  #endif
>  
> +void sync_page_to_ram(unsigned long mfn)
> +{
> +    void *p, *v = map_domain_page(mfn);
> +
> +    dsb();           /* So the CPU issues all writes to the range */
> +    for ( p = v; p < v + PAGE_SIZE ; p += cacheline_bytes )

What about the last few bytes on a page?
Can we assume that PAGE_SIZE is a multiple of cacheline_bytes?


> +        asm volatile (__clean_and_invalidate_xen_dcache_one(0) : : "r" (p));
> +    dsb();           /* So we know the flushes happen before continuing */
> +
> +    unmap_domain_page(v);
> +}
> +
>  void __init arch_init_memory(void)
>  {
>      /*
> diff --git a/xen/arch/arm/p2m.c b/xen/arch/arm/p2m.c
> index a61edeb..86f13e9 100644
> --- a/xen/arch/arm/p2m.c
> +++ b/xen/arch/arm/p2m.c
> @@ -8,6 +8,7 @@
>  #include <asm/gic.h>
>  #include <asm/event.h>
>  #include <asm/hardirq.h>
> +#include <asm/page.h>
>  
>  /* First level P2M is 2 consecutive pages */
>  #define P2M_FIRST_ORDER 1
> @@ -228,6 +229,7 @@ enum p2m_operation {
>      ALLOCATE,
>      REMOVE,
>      RELINQUISH,
> +    CACHEFLUSH,
>  };
>  
>  static int apply_p2m_changes(struct domain *d,
> @@ -381,6 +383,15 @@ static int apply_p2m_changes(struct domain *d,
>                      count++;
>                  }
>                  break;
> +
> +            case CACHEFLUSH:
> +                {
> +                    if ( !pte.p2m.valid || !p2m_is_ram(pte.p2m.type) )
> +                        break;
> +
> +                    sync_page_to_ram(pte.p2m.base);
> +                }
> +                break;
>          }
>  
>          /* Preempt every 2MiB (mapped) or 32 MiB (unmapped) - arbitrary */
> @@ -624,6 +635,20 @@ int relinquish_p2m_mapping(struct domain *d)
>                                MATTR_MEM, p2m_invalid);
>  }
>  
> +int p2m_cache_flush(struct domain *d, xen_pfn_t start_mfn, xen_pfn_t end_mfn)
> +{
> +    struct p2m_domain *p2m = &d->arch.p2m;
> +
> +    start_mfn = MAX(start_mfn, p2m->lowest_mapped_gfn);
> +    end_mfn = MIN(end_mfn, p2m->max_mapped_gfn);
> +
> +    return apply_p2m_changes(d, CACHEFLUSH,
> +                             pfn_to_paddr(start_mfn),
> +                             pfn_to_paddr(end_mfn),
> +                             pfn_to_paddr(INVALID_MFN),
> +                             MATTR_MEM, p2m_invalid);
> +}
> +
>  unsigned long gmfn_to_mfn(struct domain *d, unsigned long gpfn)
>  {
>      paddr_t p = p2m_lookup(d, pfn_to_paddr(gpfn), NULL);
> diff --git a/xen/common/page_alloc.c b/xen/common/page_alloc.c
> index 5f484a2..c73c717 100644
> --- a/xen/common/page_alloc.c
> +++ b/xen/common/page_alloc.c
> @@ -710,6 +710,11 @@ static struct page_info *alloc_heap_pages(
>          /* Initialise fields which have other uses for free pages. */
>          pg[i].u.inuse.type_info = 0;
>          page_set_owner(&pg[i], NULL);
> +
> +        /* Ensure cache and RAM are consistent for platforms where the
> +         * guest can control its own visibility of/through the cache.
> +         */
> +        sync_page_to_ram(page_to_mfn(&pg[i]));
>      }
>  
>      spin_unlock(&heap_lock);
> diff --git a/xen/include/asm-arm/arm32/page.h 
> b/xen/include/asm-arm/arm32/page.h
> index cf12a89..cb6add4 100644
> --- a/xen/include/asm-arm/arm32/page.h
> +++ b/xen/include/asm-arm/arm32/page.h
> @@ -22,6 +22,10 @@ static inline void write_pte(lpae_t *p, lpae_t pte)
>  /* Inline ASM to flush dcache on register R (may be an inline asm operand) */
>  #define __flush_xen_dcache_one(R) STORE_CP32(R, DCCMVAC)
>  
> +/* Inline ASM to clean and invalidate dcache on register R (may be an
> + * inline asm operand) */
> +#define __clean_and_invalidate_xen_dcache_one(R) STORE_CP32(R, DCCIMVAC)
> +
>  /*
>   * Flush all hypervisor mappings from the TLB and branch predictor.
>   * This is needed after changing Xen code mappings.
> diff --git a/xen/include/asm-arm/arm64/page.h 
> b/xen/include/asm-arm/arm64/page.h
> index 9551f90..baf8903 100644
> --- a/xen/include/asm-arm/arm64/page.h
> +++ b/xen/include/asm-arm/arm64/page.h
> @@ -17,6 +17,10 @@ static inline void write_pte(lpae_t *p, lpae_t pte)
>  /* Inline ASM to flush dcache on register R (may be an inline asm operand) */
>  #define __flush_xen_dcache_one(R) "dc cvac, %" #R ";"
>  
> +/* Inline ASM to clean and invalidate dcache on register R (may be an
> + * inline asm operand) */
> +#define __clean_and_invalidate_xen_dcache_one(R) "dc  civac, %" #R ";"
> +
>  /*
>   * Flush all hypervisor mappings from the TLB
>   * This is needed after changing Xen code mappings.
> diff --git a/xen/include/asm-arm/p2m.h b/xen/include/asm-arm/p2m.h
> index e9c884a..3b39c45 100644
> --- a/xen/include/asm-arm/p2m.h
> +++ b/xen/include/asm-arm/p2m.h
> @@ -78,6 +78,9 @@ void p2m_load_VTTBR(struct domain *d);
>  /* Look up the MFN corresponding to a domain's PFN. */
>  paddr_t p2m_lookup(struct domain *d, paddr_t gpfn, p2m_type_t *t);
>  
> +/* Clean & invalidate caches corresponding to a region of guest address 
> space */
> +int p2m_cache_flush(struct domain *d, xen_pfn_t start_mfn, xen_pfn_t 
> end_mfn);
> +
>  /* Setup p2m RAM mapping for domain d from start-end. */
>  int p2m_populate_ram(struct domain *d, paddr_t start, paddr_t end);
>  /* Map MMIO regions in the p2m: start_gaddr and end_gaddr is the range
> diff --git a/xen/include/asm-arm/page.h b/xen/include/asm-arm/page.h
> index 670d4e7..67d64c9 100644
> --- a/xen/include/asm-arm/page.h
> +++ b/xen/include/asm-arm/page.h
> @@ -253,6 +253,9 @@ static inline void flush_xen_dcache_va_range(void *p, 
> unsigned long size)
>              : : "r" (_p), "m" (*_p));                                   \
>  } while (0)
>  
> +/* Flush the dcache for an entire page. */
> +void sync_page_to_ram(unsigned long mfn);
> +
>  /* Print a walk of an arbitrary page table */
>  void dump_pt_walk(lpae_t *table, paddr_t addr);
>  
> diff --git a/xen/include/asm-x86/page.h b/xen/include/asm-x86/page.h
> index 7a46af5..abe35fb 100644
> --- a/xen/include/asm-x86/page.h
> +++ b/xen/include/asm-x86/page.h
> @@ -346,6 +346,9 @@ static inline uint32_t cacheattr_to_pte_flags(uint32_t 
> cacheattr)
>      return ((cacheattr & 4) << 5) | ((cacheattr & 3) << 3);
>  }
>  
> +/* No cache maintenance required on x86 architecture. */
> +static inline void sync_page_to_ram(unsigned long mfn) {}
> +
>  /* return true if permission increased */
>  static inline bool_t
>  perms_strictly_increased(uint32_t old_flags, uint32_t new_flags)
> diff --git a/xen/include/public/domctl.h b/xen/include/public/domctl.h
> index 91f01fa..f22fe2e 100644
> --- a/xen/include/public/domctl.h
> +++ b/xen/include/public/domctl.h
> @@ -885,6 +885,17 @@ struct xen_domctl_set_max_evtchn {
>  typedef struct xen_domctl_set_max_evtchn xen_domctl_set_max_evtchn_t;
>  DEFINE_XEN_GUEST_HANDLE(xen_domctl_set_max_evtchn_t);
>  
> +/*
> + * ARM: Clean and invalidate caches associated with given region of
> + * guest memory.
> + */
> +struct xen_domctl_cacheflush {
> +    /* IN: page range to flush. */
> +    xen_pfn_t start_pfn, nr_pfns;
> +};
> +typedef struct xen_domctl_cacheflush xen_domctl_cacheflush_t;
> +DEFINE_XEN_GUEST_HANDLE(xen_domctl_cacheflush_t);
> +
>  struct xen_domctl {
>      uint32_t cmd;
>  #define XEN_DOMCTL_createdomain                   1
> @@ -954,6 +965,7 @@ struct xen_domctl {
>  #define XEN_DOMCTL_setnodeaffinity               68
>  #define XEN_DOMCTL_getnodeaffinity               69
>  #define XEN_DOMCTL_set_max_evtchn                70
> +#define XEN_DOMCTL_cacheflush                    71
>  #define XEN_DOMCTL_gdbsx_guestmemio            1000
>  #define XEN_DOMCTL_gdbsx_pausevcpu             1001
>  #define XEN_DOMCTL_gdbsx_unpausevcpu           1002
> @@ -1012,6 +1024,7 @@ struct xen_domctl {
>          struct xen_domctl_set_max_evtchn    set_max_evtchn;
>          struct xen_domctl_gdbsx_memio       gdbsx_guest_memio;
>          struct xen_domctl_set_broken_page_p2m set_broken_page_p2m;
> +        struct xen_domctl_cacheflush        cacheflush;
>          struct xen_domctl_gdbsx_pauseunp_vcpu gdbsx_pauseunp_vcpu;
>          struct xen_domctl_gdbsx_domstatus   gdbsx_domstatus;
>          uint8_t                             pad[128];
> diff --git a/xen/xsm/flask/hooks.c b/xen/xsm/flask/hooks.c
> index 50a35fc..1345d7e 100644
> --- a/xen/xsm/flask/hooks.c
> +++ b/xen/xsm/flask/hooks.c
> @@ -737,6 +737,9 @@ static int flask_domctl(struct domain *d, int cmd)
>      case XEN_DOMCTL_set_max_evtchn:
>          return current_has_perm(d, SECCLASS_DOMAIN2, 
> DOMAIN2__SET_MAX_EVTCHN);
>  
> +    case XEN_DOMCTL_cacheflush:
> +         return current_has_perm(d, SECCLASS_DOMAIN2, DOMAIN2__CACHEFLUSH);
> +
>      default:
>          printk("flask_domctl: Unknown op %d\n", cmd);
>          return -EPERM;
> diff --git a/xen/xsm/flask/policy/access_vectors 
> b/xen/xsm/flask/policy/access_vectors
> index 1fbe241..a0ed13d 100644
> --- a/xen/xsm/flask/policy/access_vectors
> +++ b/xen/xsm/flask/policy/access_vectors
> @@ -196,6 +196,8 @@ class domain2
>      setclaim
>  # XEN_DOMCTL_set_max_evtchn
>      set_max_evtchn
> +# XEN_DOMCTL_cacheflush
> +    cacheflush
>  }
>  
>  # Similar to class domain, but primarily contains domctls related to HVM 
> domains
> -- 
> 1.7.10.4
> 

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.