[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-ia64-devel] [PATCH 2/7][SMP] add volatile to p2m table pte entry



On Thu, 2006-06-15 at 11:58 +0900, Isaku Yamahata wrote: 
>   2 / 7 
>  
>  # HG changeset patch
>  # User yamahata@xxxxxxxxxxxxx
>  # Node ID 03b29286001dea87284cc401492a799e7e654a0f
>  # Parent  ad418fdb1981be2108d84bafbd294a9db9899396
>  add volatile pte entry of the p2m table.
>  Since p2m table are shared by cpu. volatile is needed.
>  fix the races in pte access of __assign_domain_page() and
>  destroy_grant_host_mapping()
>  PATCHNAME: volatile_pte_t
>  
>  Signed-off-by: Isaku Yamahata <yamahata@xxxxxxxxxxxxx>
>  
>  diff -r ad418fdb1981 -r 03b29286001d xen/arch/ia64/xen/mm.c
>  --- a/xen/arch/ia64/xen/mm.c Thu Jun 15 11:33:14 2006 +0900
>  +++ b/xen/arch/ia64/xen/mm.c Thu Jun 15 11:33:20 2006 +0900
>  @@ -338,10 +338,42 @@ unsigned long translate_domain_mpaddr(un
>   }
>   
>   //XXX !xxx_present() should be used instread of !xxx_none()?
>  +// __assign_new_domain_page(), assign_new_domain_page() and
>  +// assign_new_domain0_page() are used only when domain creation.
>  +// their accesses aren't racy so that returned pte_t doesn't need
>  +// volatile qualifier
>  +static pte_t*
>  +__lookup_alloc_domain_pte(struct domain* d, unsigned long mpaddr)
>  +{
>  +    struct mm_struct *mm = &d->arch.mm;
>  +    pgd_t *pgd;
>  +    pud_t *pud;
>  +    pmd_t *pmd;
>  +
>  +    BUG_ON(mm->pgd == NULL);
>  +    pgd = pgd_offset(mm, mpaddr);
>  +    if (pgd_none(*pgd)) {
>  +        pgd_populate(mm, pgd, pud_alloc_one(mm,mpaddr));
>  +    }
>  +
>  +    pud = pud_offset(pgd, mpaddr);
>  +    if (pud_none(*pud)) {
>  +        pud_populate(mm, pud, pmd_alloc_one(mm,mpaddr));
>  +    }
>  +
>  +    pmd = pmd_offset(pud, mpaddr);
>  +    if (pmd_none(*pmd)) {
>  +        pmd_populate_kernel(mm, pmd, pte_alloc_one_kernel(mm, mpaddr));
>  +    }
>  +
>  +    return pte_offset_map(pmd, mpaddr);
>  +}
>  +

Very nice rewrite.

>  +//XXX !xxx_present() should be used instread of !xxx_none()?
>   // pud, pmd, pte page is zero cleared when they are allocated.
>   // Their area must be visible before population so that
>   // cmpxchg must have release semantics.
>  -static pte_t*
>  +static volatile pte_t*
>   lookup_alloc_domain_pte(struct domain* d, unsigned long mpaddr)
>   {
>       struct mm_struct *mm = &d->arch.mm;
>  @@ -384,11 +416,11 @@ lookup_alloc_domain_pte(struct domain* d
>           }
>       }
>   
>  -    return pte_offset_map(pmd, mpaddr);
>  +    return (volatile pte_t*)pte_offset_map(pmd, mpaddr);
>   }
>   
>   //XXX xxx_none() should be used instread of !xxx_present()?
>  -static pte_t*
>  +static volatile pte_t*
>   lookup_noalloc_domain_pte(struct domain* d, unsigned long mpaddr)
>   {
>       struct mm_struct *mm = &d->arch.mm;
>  @@ -409,11 +441,11 @@ lookup_noalloc_domain_pte(struct domain*
>       if (unlikely(!pmd_present(*pmd)))
>           return NULL;
>   
>  -    return pte_offset_map(pmd, mpaddr);
>  +    return (volatile pte_t*)pte_offset_map(pmd, mpaddr);
>   }
>   
>   #ifdef CONFIG_XEN_IA64_DOM0_VP
>  -static pte_t*
>  +static volatile pte_t*
>   lookup_noalloc_domain_pte_none(struct domain* d, unsigned long mpaddr)
>   {
>       struct mm_struct *mm = &d->arch.mm;
>  @@ -434,13 +466,13 @@ lookup_noalloc_domain_pte_none(struct do
>       if (unlikely(pmd_none(*pmd)))
>           return NULL;
>   
>  -    return pte_offset_map(pmd, mpaddr);
>  +    return (volatile pte_t*)pte_offset_map(pmd, mpaddr);
>   }

In all of the functions above, it appears that the return value of
a function (pte_offset_map()) is being returned as a volatile result
from each of the functions.  Is that really needed?  I'm not sure
it helps in this case, but I could be wrong.

>   unsigned long
>   ____lookup_domain_mpa(struct domain *d, unsigned long mpaddr)
>   {
>  -    pte_t *pte;
>  +    volatile pte_t *pte;
>   
>       pte = lookup_noalloc_domain_pte(d, mpaddr);
>       if (pte == NULL)

pte seems to be used only locally, so I think the volatile
makes no difference here.

>  @@ -470,7 +502,7 @@ __lookup_domain_mpa(struct domain *d, un
>   
>   unsigned long lookup_domain_mpa(struct domain *d, unsigned long mpaddr)
>   {
>  -    pte_t *pte;
>  +    volatile pte_t *pte;
>   
>   #ifdef CONFIG_DOMAIN0_CONTIGUOUS
>       if (d == dom0) {
>  @@ -486,9 +518,10 @@ unsigned long lookup_domain_mpa(struct d
>   #endif
>       pte = lookup_noalloc_domain_pte(d, mpaddr);
>       if (pte != NULL) {
>  -        if (pte_present(*pte)) {
>  +        pte_t tmp_pte = *pte;// pte is volatile. copy the value.
>  +        if (pte_present(tmp_pte)) {
>   //printk("lookup_domain_page: found mapping for %lx, 
> pte=%lx\n",mpaddr,pte_val(*pte));
>  -            return pte_val(*pte);
>  +            return pte_val(tmp_pte);
>           } else if (VMX_DOMAIN(d->vcpu[0]))
>               return GPFN_INV_MASK;
>       }

Simililarly here: pte seems to be used only locally, so I think the
volatile makes no difference here.  There are several other functions
where 'pte_t *pte;' becomes 'volatile pte_t *pte;' that may not be
needed at all.

[snip...]

>  @@ -975,10 +1021,12 @@ destroy_grant_host_mapping(unsigned long
>                  unsigned long mfn, unsigned int flags)
>   {
>       struct domain* d = current->domain;
>  -    pte_t* pte;
>  +    volatile pte_t* pte;
>  +    unsigned long cur_arflags;
>  +    pte_t cur_pte;
>  +    pte_t new_pte;
>       pte_t old_pte;
>  -    unsigned long old_mfn = INVALID_MFN;
>  -    struct page_info* old_page;
>  +    struct page_info* page;
>   
>       if (flags & (GNTMAP_application_map | GNTMAP_contains_pte)) {
>           DPRINTK("%s: flags 0x%x\n", __func__, flags);
>  @@ -986,21 +1034,42 @@ destroy_grant_host_mapping(unsigned long
>       }
>   
>       pte = lookup_noalloc_domain_pte(d, gpaddr);
>  -    if (pte == NULL || !pte_present(*pte) || pte_pfn(*pte) != mfn)
>  +    if (pte == NULL) {
>  +        DPRINTK("%s: gpaddr 0x%lx mfn 0x%lx\n", __func__, gpaddr, mfn);
>           return GNTST_general_error;
>  -
>  -    // update pte
>  -    old_pte = ptep_get_and_clear(&d->arch.mm, gpaddr, pte);
>  -    if (pte_present(old_pte)) {
>  -        old_mfn = pte_pfn(old_pte);
>  -    } else {
>  +    }
>  +
>  + again:
>  +    cur_arflags = pte_val(*pte) & ~_PAGE_PPN_MASK;
>  +    cur_pte = pfn_pte(mfn, __pgprot(cur_arflags));
>  +    if (!pte_present(cur_pte)) {
>  +        DPRINTK("%s: gpaddr 0x%lx mfn 0x%lx cur_pte 0x%lx\n",
>  +                __func__, gpaddr, mfn, pte_val(cur_pte));
>           return GNTST_general_error;
>       }
>  -    domain_page_flush(d, gpaddr, old_mfn, INVALID_MFN);
>  -
>  -    old_page = mfn_to_page(old_mfn);
>  -    BUG_ON(page_get_owner(old_page) == d);//try_to_clear_PGC_allocate(d, 
> page) is not needed.
>  -    put_page(old_page);
>  +    new_pte = __pte(0);
>  +
>  +    old_pte = ptep_cmpxchg_rel(&d->arch.mm, gpaddr, pte, cur_pte, new_pte);
>  +    if (unlikely(!pte_present(old_pte))) {
>  +        DPRINTK("%s: gpaddr 0x%lx mfn 0x%lx cur_pte 0x%lx old_pte 0x%lx\n",
>  +                __func__, gpaddr, mfn, pte_val(cur_pte), pte_val(old_pte));
>  +        return GNTST_general_error;
>  +    }
>  +    if (unlikely(pte_val(cur_pte) != pte_val(old_pte))) {
>  +        if (pte_pfn(old_pte) == mfn) {
>  +            goto again;

Maybe I'm just being paranoid, but is there *any* chance this goto loop
will not terminate?

>  +        }
>  +        DPRINTK("%s gpaddr 0x%lx mfn 0x%lx cur_pte 0x%lx old_pte 0x%lx\n",
>  +                __func__, gpaddr, mfn, pte_val(cur_pte), pte_val(old_pte));
>  +        return GNTST_general_error;
>  +    }
>  +    BUG_ON(pte_pfn(old_pte) != mfn);
>  +
>  +    domain_page_flush(d, gpaddr, mfn, INVALID_MFN);
>  +
>  +    page = mfn_to_page(mfn);
>  +    BUG_ON(page_get_owner(page) == d);//try_to_clear_PGC_allocate(d, page) 
> is not needed.
>  +    put_page(page);
>   
>       return GNTST_okay;
>   }

[snip...]

>  diff -r ad418fdb1981 -r 03b29286001d 
> xen/include/asm-ia64/linux-xen/asm/pgtable.h
>  --- a/xen/include/asm-ia64/linux-xen/asm/pgtable.h   Thu Jun 15 11:33:14 
> 2006 +0900
>  +++ b/xen/include/asm-ia64/linux-xen/asm/pgtable.h   Thu Jun 15 11:33:20 
> 2006 +0900
>  @@ -210,7 +210,7 @@ ia64_phys_addr_valid (unsigned long addr
>   #define set_pte_at(mm,addr,ptep,pteval) set_pte(ptep,pteval)
>   #ifdef XEN
>   static inline void
>  -set_pte_rel(pte_t* ptep, pte_t pteval)
>  +set_pte_rel(volatile pte_t* ptep, pte_t pteval)
>   {
>   #if CONFIG_SMP
>       asm volatile ("st8.rel [%0]=%1" ::

Doesn't pass-by-value defeat the intention of marking the ptep argument
as volatile?

>  @@ -402,8 +402,14 @@ ptep_test_and_clear_dirty (struct vm_are
>   }
>   #endif  
>  +#ifdef XEN
>  +static inline pte_t
>  +ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
>  +               volatile pte_t *ptep)
>  +#else
>   static inline pte_t
>   ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
>  +#endif
>   {
>   #ifdef CONFIG_SMP
>       return __pte(xchg((long *) ptep, 0));

Similarly here in passing ptep....

>  @@ -416,7 +422,8 @@ ptep_get_and_clear(struct mm_struct *mm,
>   
>   #ifdef XEN
>   static inline pte_t
>  -ptep_xchg(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t npte)
>  +ptep_xchg(struct mm_struct *mm, unsigned long addr,
>  +      volatile pte_t *ptep, pte_t npte)
>   {
>   #ifdef CONFIG_SMP
>       return __pte(xchg((long *) ptep, pte_val(npte)));
>  @@ -428,8 +435,8 @@ ptep_xchg(struct mm_struct *mm, unsigned
>   }
>   
>   static inline pte_t
>  -ptep_cmpxchg_rel(struct mm_struct *mm, unsigned long addr, pte_t *ptep,
>  -             pte_t old_pte, pte_t new_pte)
>  +ptep_cmpxchg_rel(struct mm_struct *mm, unsigned long addr,
>  +             volatile pte_t *ptep, pte_t old_pte, pte_t new_pte)
>   {
>   #ifdef CONFIG_SMP
>       return __pte(cmpxchg_rel(&pte_val(*ptep),
>  

And in the above two cases, also.  Since we're passing an argument,
we're going to copy the value anyway, so volatile probably doesn't
matter -- unless I've missed something (which is always possible)...

-- 
Ciao,
al
----------------------------------------------------------------------
Al Stone                                      Alter Ego:
Open Source and Linux R&D                     Debian Developer
Hewlett-Packard Company                       http://www.debian.org
E-mail: ahs3@xxxxxxxxx                        ahs3@xxxxxxxxxx
----------------------------------------------------------------------


_______________________________________________
Xen-ia64-devel mailing list
Xen-ia64-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-ia64-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.