[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [PATCH v3 07/10] xen/arm: Add handling write fault for dirty-page tracing



> -----Original Message-----
> From: Stefano Stabellini [mailto:stefano.stabellini@xxxxxxxxxxxxx]
> Sent: Monday, August 05, 2013 1:28 AM
> To: Jaeyong Yoo
> Cc: xen-devel@xxxxxxxxxxxxx
> Subject: Re: [Xen-devel] [PATCH v3 07/10] xen/arm: Add handling write
> fault for dirty-page tracing
> 
> On Thu, 1 Aug 2013, Jaeyong Yoo wrote:
> > Add handling write fault in do_trap_data_abort_guest for dirty-page
> tracing.
> > Rather than maintaining a bitmap for dirty pages, we use the avail bit
> in p2m entry.
> > For locating the write fault pte in guest p2m, we use virtual-linear
> > page table that slots guest p2m into xen's virtual memory.
> >
> > Signed-off-by: Jaeyong Yoo <jaeyong.yoo@xxxxxxxxxxx>
> 
> Looks good to me.
> I would appreciated some more comments in the code to explain the inner
> working of the vlp2m.
I got it.

One question: If you see patch #6, it implements the allocation and free of
vlp2m memory (xen/arch/arm/vlpt.c) which is almost the same to vmap
allocation (xen/arch/arm/vmap.c). To be honest, I copied vmap.c and change 
the virtual address start/end points and the name. While I was doing that, 
I think it would be better if we naje a common interface, something like 
Virtual address allocator. That is, if we create a virtual address allocator

giving the VA range from A to B, the allocator allocates the VA in between 
A and B. And, we initialize the virtual allocator instance at boot stage.


> Nonetheless:
> 
> Reviewed-by: Stefano Stabellini <stefano.stabellini@xxxxxxxxxxxxx>
> 
> 
> >  xen/arch/arm/mm.c               | 110
> +++++++++++++++++++++++++++++++++++++++-
> >  xen/arch/arm/traps.c            |  16 +++++-
> >  xen/include/asm-arm/domain.h    |  11 ++++
> >  xen/include/asm-arm/mm.h        |   5 ++
> >  xen/include/asm-arm/processor.h |   2 +
> >  5 files changed, 142 insertions(+), 2 deletions(-)
> >
> > diff --git a/xen/arch/arm/mm.c b/xen/arch/arm/mm.c index
> > 9d5d3e0..a24afe6 100644
> > --- a/xen/arch/arm/mm.c
> > +++ b/xen/arch/arm/mm.c
> > @@ -680,7 +680,6 @@ void destroy_xen_mappings(unsigned long v, unsigned
> long e)
> >      create_xen_entries(REMOVE, v, 0, (e - v) >> PAGE_SHIFT, 0);  }
> >
> > -enum mg { mg_clear, mg_ro, mg_rw, mg_rx };  static void
> > set_pte_flags_on_range(const char *p, unsigned long l, enum mg mg)  {
> >      lpae_t pte;
> > @@ -1214,6 +1213,115 @@ int is_iomem_page(unsigned long mfn)
> >          return 1;
> >      return 0;
> >  }
> > +
> > +static uint64_t find_guest_p2m_mfn(struct domain *d, paddr_t addr) {
> > +    lpae_t *first = NULL, *second = NULL;
> > +    struct p2m_domain *p2m = &d->arch.p2m;
> > +    uint64_t mfn = -EFAULT;
> > +
> > +    if ( first_table_offset(addr) >= LPAE_ENTRIES )
> > +        return mfn;
> > +
> > +    first = __map_domain_page(p2m->first_level);
> > +
> > +    if ( !first ||
> > +         !first[first_table_offset(addr)].walk.valid ||
> > +         !first[first_table_offset(addr)].walk.table )
> > +        goto done;
> > +
> > +    second =
> > + map_domain_page(first[first_table_offset(addr)].walk.base);
> > +
> > +    if ( !second ||
> > +         !second[second_table_offset(addr)].walk.valid ||
> > +         !second[second_table_offset(addr)].walk.table )
> > +        goto done;
> > +
> > +    mfn = second[second_table_offset(addr)].walk.base;
> > +
> > +done:
> > +    if ( second ) unmap_domain_page(second);
> > +    if ( first ) unmap_domain_page(first);
> > +
> > +    return mfn;
> > +}
> > +
> > +/*
> > + * routine for dirty-page tracing
> > + *
> > + * On first write, it page faults, its entry is changed to
> > +read-write,
> > + * and on retry the write succeeds.
> > + *
> > + * for locating p2m of the faulting entry, we use virtual-linear page
> table.
> > + */
> > +int handle_page_fault(struct domain *d, paddr_t addr) {
> > +    int rc = 0;
> > +    struct p2m_domain *p2m = &d->arch.p2m;
> > +    uint64_t gma_start;
> > +    int gma_third_index;
> > +    int xen_second_linear, xen_third_table;
> > +    lpae_t *xen_third;
> > +    lpae_t *vlp2m_pte;
> > +
> > +    BUG_ON( !d->arch.map_domain.nr_banks );
> > +
> > +    gma_start = d->arch.map_domain.bank[0].start;
> > +    gma_third_index = third_linear_offset(addr - gma_start);
> > +    vlp2m_pte = (lpae_t *)(d->arch.dirty.vlpt_start +
> > +                           sizeof(lpae_t) * gma_third_index);
> > +
> > +    BUG_ON( (void *)vlp2m_pte > d->arch.dirty.vlpt_end );
> > +
> > +    spin_lock(&p2m->lock);
> > +
> > +    xen_second_linear = second_linear_offset((unsigned long)vlp2m_pte);
> > +    xen_third_table = third_table_offset((unsigned long)vlp2m_pte);
> > +
> > +    /* starting from xen second level page table */
> > +    if ( !xen_second[xen_second_linear].pt.valid )
> > +    {
> > +        unsigned long va = (unsigned long)vlp2m_pte & ~(PAGE_SIZE-1);
> > +
> > +        rc = create_xen_table(&xen_second[second_linear_offset(va)]);
> > +        if ( rc < 0 )
> > +            goto out;
> > +    }
> > +
> > +    BUG_ON( !xen_second[xen_second_linear].pt.valid );
> > +
> > +    /* at this point, xen second level pt has valid entry
> > +     * check again the validity of third level pt */
> > +    xen_third =
> > + __va(pfn_to_paddr(xen_second[xen_second_linear].pt.base));
> > +
> > +    /* xen third-level page table invalid */
> > +    if ( !xen_third[xen_third_table].p2m.valid )
> > +    {
> > +        uint64_t mfn = find_guest_p2m_mfn(d, addr);
> > +        lpae_t pte = mfn_to_xen_entry(mfn);
> > +        unsigned long va = (unsigned long)vlp2m_pte & ~(PAGE_SIZE-1);
> > +
> > +        pte.pt.table = 1; /* 4k mappings always have this bit set */
> > +        write_pte(&xen_third[xen_third_table], pte);
> > +        flush_xen_data_tlb_range_va(va, PAGE_SIZE);
> > +    }
> > +
> > +    /* at this point, xen third level pt has valid entry: means we can
> access
> > +     * vlp2m_pte vlp2m_pte is like a fourth level pt for xen, but for
> guest,
> > +     * it is third level pt */
> > +    if ( vlp2m_pte->p2m.valid && vlp2m_pte->p2m.write == 0 )
> > +    {
> > +        vlp2m_pte->p2m.write = 1;
> > +        vlp2m_pte->p2m.avail = 1;
> > +        write_pte(vlp2m_pte, *vlp2m_pte);
> > +        flush_tlb_local();
> > +    }
> > +
> > +out:
> > +    spin_unlock(&p2m->lock);
> > +    return rc;
> > +}
> > +
> >  /*
> >   * Local variables:
> >   * mode: C
> > diff --git a/xen/arch/arm/traps.c b/xen/arch/arm/traps.c index
> > 1b9209d..f844f56 100644
> > --- a/xen/arch/arm/traps.c
> > +++ b/xen/arch/arm/traps.c
> > @@ -1226,7 +1226,12 @@ static void do_trap_data_abort_guest(struct
> cpu_user_regs *regs,
> >          goto bad_data_abort;
> >
> >      /* XXX: Decode the instruction if ISS is not valid */
> > -    if ( !dabt.valid )
> > +    /* Note: add additional check before goto bad_data_abort.
dabt.valid
> > +     * bit is for telling the validity of ISS[23:16] bits. For dirty-
> page
> > +     * tracing, we need to see DFSC bits. If DFSC bits are indicating
> the
> > +     * possibility of dirty page tracing, do not go to bad_data_abort
*/
> > +    if ( !dabt.valid &&
> > +         (dabt.dfsc & FSC_MASK) != (FSC_FLT_PERM + FSC_3D_LEVEL) &&
> > + dabt.write)
> >          goto bad_data_abort;
> >
> >      if (handle_mmio(&info))
> > @@ -1235,6 +1240,15 @@ static void do_trap_data_abort_guest(struct
> cpu_user_regs *regs,
> >          return;
> >      }
> >
> > +    /* handle permission fault on write */
> > +    if ( (dabt.dfsc & FSC_MASK) == (FSC_FLT_PERM + FSC_3D_LEVEL) &&
> dabt.write )
> > +    {
> > +        if ( current->domain->arch.dirty.mode == 0 )
> > +           goto bad_data_abort;
> > +        if ( handle_page_fault(current->domain, info.gpa) == 0 )
> > +            return;
> > +    }
> > +
> >  bad_data_abort:
> >
> >      msg = decode_fsc( dabt.dfsc, &level); diff --git
> > a/xen/include/asm-arm/domain.h b/xen/include/asm-arm/domain.h index
> > 0c80c65..413b89a 100644
> > --- a/xen/include/asm-arm/domain.h
> > +++ b/xen/include/asm-arm/domain.h
> > @@ -110,6 +110,17 @@ struct arch_domain
> >          spinlock_t             lock;
> >      } uart0;
> >
> > +    /* dirty-page tracing */
> > +    struct {
> > +        spinlock_t lock;
> > +        int mode;
> > +        unsigned int count;
> > +        uint32_t gmfn_guest_start;  /* guest physical memory start
> address */
> > +        void *vlpt_start;           /* va-start of guest p2m */
> > +        void *vlpt_end;             /* va-end of guest p2m */
> > +        struct page_info *head;     /* maintain the mapped vaddrs */
> > +    } dirty;
> > +
> >      struct dt_mem_info map_domain;
> >      spinlock_t         map_lock;
> >  }  __cacheline_aligned;
> > diff --git a/xen/include/asm-arm/mm.h b/xen/include/asm-arm/mm.h index
> > 404ec4d..fd976e3 100644
> > --- a/xen/include/asm-arm/mm.h
> > +++ b/xen/include/asm-arm/mm.h
> > @@ -328,6 +328,11 @@ static inline void put_page_and_type(struct
> page_info *page)
> >      put_page(page);
> >  }
> >
> > +enum mg { mg_clear, mg_ro, mg_rw, mg_rx };
> > +
> > +/* routine for dirty-page tracing */
> > +int handle_page_fault(struct domain *d, paddr_t addr);
> > +
> >  #endif /*  __ARCH_ARM_MM__ */
> >  /*
> >   * Local variables:
> > diff --git a/xen/include/asm-arm/processor.h
> > b/xen/include/asm-arm/processor.h index 06b0b25..34c21de 100644
> > --- a/xen/include/asm-arm/processor.h
> > +++ b/xen/include/asm-arm/processor.h
> > @@ -383,6 +383,8 @@ union hsr {
> >  #define FSC_CPR        (0x3a) /* Coprocossor Abort */
> >
> >  #define FSC_LL_MASK    (0x03<<0)
> > +#define FSC_MASK       (0x3f) /* Fault status mask */
> > +#define FSC_3D_LEVEL   (0x03) /* Third level fault*/
> >
> >  /* Time counter hypervisor control register */
> >  #define CNTHCTL_PA      (1u<<0)  /* Kernel/user access to physical
> counter */
> > --
> > 1.8.1.2
> >
> >
> > _______________________________________________
> > Xen-devel mailing list
> > Xen-devel@xxxxxxxxxxxxx
> > http://lists.xen.org/xen-devel
> >


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.