[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

RE: [Xen-ia64-devel] RFC: ptc.ga implementation for SMP-g


  • To: "Tristan Gingold" <Tristan.Gingold@xxxxxxxx>, <xen-ia64-devel@xxxxxxxxxxxxxxxxxxx>
  • From: "Xu, Anthony" <anthony.xu@xxxxxxxxx>
  • Date: Tue, 4 Apr 2006 21:56:56 +0800
  • Delivery-date: Tue, 04 Apr 2006 06:57:11 -0700
  • List-id: Discussion of the ia64 port of Xen <xen-ia64-devel.lists.xensource.com>
  • Thread-index: AcZXI2l5kWat5A8PSNO/SRC5ERKaLgAybMdA
  • Thread-topic: [Xen-ia64-devel] RFC: ptc.ga implementation for SMP-g

Hi Tristan,
Sorry for late response.

Let's first make sure our understanding of ptc.ga is correct.
Assume there are two LPs on one box, we call them LP1 and LP2,
And rr7=0x107 on LP1, rr7=0x207 on LP2,
And below code segment is executed at LP1
        movl    R2=0xe000000000000000;
     mov r3=16<<2;
        ptc.ga r2,r3;
The result on LP1 is, all tlb entries with rid=0x107 which are overlapped
with (0xe000000000000000~0xe000000000010000) are purged.
The result on LP2 should be same with that on LP1.

But according to this patch,
The result on LP2 is, all tlb entries with rid=0x207 which are overlapped
with (0xe000000000000000~0xe000000000010000) are purged.
This is not the desirable result of ptc.ga.

IMO, when using IPI to emulate ptc.ga, the related rid should be passed to
other LPs as a parameter.

Thanks,
Anthony


>From: Tristan
>Gingold
>Sent: 2006?4?3? 21:38
>To: xen-ia64-devel@xxxxxxxxxxxxxxxxxxx
>Subject: [Xen-ia64-devel] RFC: ptc.ga implementation for SMP-g
>
>Hi,
>
>after the comments, here is my updated patch for ptc.ga
>Please comment it.
>
>With this patch, the page_flags are always written atomically.  Ptc only clear
>it.  This eliminates itc and ptc conflicts.
>
>The other conflict is use.  This is within ia64_page_fault, between
>vcpu_translate and vcpu_itc_no_srlz.  This part of code is protected by a
>flag + counter: At entry the flag is set and the counter increment, at exit
>the flag is reset.  Ptc.ga waits if the flag is set and retries if the
>counter has changes.
>
>Tristan.
>
>diff -r ddc279c91502 xen/arch/ia64/vmx/vmmu.c
>--- a/xen/arch/ia64/vmx/vmmu.c Fri Mar 31 21:04:16 2006
>+++ b/xen/arch/ia64/vmx/vmmu.c Mon Apr  3 11:20:57 2006
>@@ -459,7 +459,7 @@
>     va = PAGEALIGN(ifa, ps);
>     index = vtr_find_overlap(vcpu, va, ps, DSIDE_TLB);
>     if (index>=0) {
>-        vcpu->arch.dtrs[index].p=0;
>+        vcpu->arch.dtrs[index].pte.p=0;
>         index = vtr_find_overlap(vcpu, va, ps, DSIDE_TLB);
>     }
>     hcb = vmx_vcpu_get_vtlb(vcpu);
>@@ -476,7 +476,7 @@
>     va = PAGEALIGN(ifa, ps);
>     index = vtr_find_overlap(vcpu, va, ps, ISIDE_TLB);
>     if (index>=0) {
>-        vcpu->arch.itrs[index].p=0;
>+        vcpu->arch.itrs[index].pte.p=0;
>         index = vtr_find_overlap(vcpu, va, ps, ISIDE_TLB);
>     }
>     hcb = vmx_vcpu_get_vtlb(vcpu);
>diff -r ddc279c91502 xen/arch/ia64/xen/process.c
>--- a/xen/arch/ia64/xen/process.c      Fri Mar 31 21:04:16 2006
>+++ b/xen/arch/ia64/xen/process.c      Mon Apr  3 11:20:57 2006
>@@ -290,12 +290,22 @@
>               return;
>       }
>
>+      /* Bit 0 must not be set.  */
>+      BUG_ON (PSCBX(current, tlb_inuse) & 1);
>+      /* Set bit 0 and increment counter (from bit 1).  */
>+      current->arch.tlb_inuse += 3;
>+
>       fault = vcpu_translate(current,address,is_data,0,&pteval,&itir,&iha);
>       if (fault == IA64_NO_FAULT) {
>               pteval = translate_domain_pte(pteval,address,itir);
>
>       vcpu_itc_no_srlz(current,is_data?2:1,address,pteval,-1UL,(itir>>2)&0x3
>f);
>+              /* Clear in_use flag.  */
>+              current->arch.tlb_inuse--;
>               return;
>       }
>+      /* Clear in_use flag.  */
>+      current->arch.tlb_inuse--;
>+
>       if (!user_mode (regs)) {
>               /* The fault occurs inside Xen.  */
>               if (!ia64_done_with_exception(regs)) {
>diff -r ddc279c91502 xen/arch/ia64/xen/vcpu.c
>--- a/xen/arch/ia64/xen/vcpu.c Fri Mar 31 21:04:16 2006
>+++ b/xen/arch/ia64/xen/vcpu.c Mon Apr  3 11:20:57 2006
>@@ -1276,7 +1276,7 @@
> // FIXME: also need to check && (!trp->key || vcpu_pkr_match(trp->key))
> static inline int vcpu_match_tr_entry(TR_ENTRY *trp, UINT64 ifa, UINT64 rid)
> {
>-      return trp->p && trp->rid == rid
>+      return trp->pte.p && trp->rid == rid
>               && ifa >= trp->vadr
>               && ifa <= (trp->vadr + (1L << trp->ps) - 1);
> }
>@@ -1328,7 +1328,7 @@
>               if (vcpu_quick_region_check(vcpu->arch.dtr_regions,address)) {
>                       for (trp = vcpu->arch.dtrs, i = NDTRS; i; i--, trp++) {
>                               if (vcpu_match_tr_entry(trp,address,rid)) {
>-                                      *pteval = trp->page_flags;
>+                                      *pteval = trp->pte.val;
>                                       *itir = trp->itir;
>                                       tr_translate_count++;
>                                       return IA64_NO_FAULT;
>@@ -1341,7 +1341,7 @@
>               if (vcpu_quick_region_check(vcpu->arch.itr_regions,address)) {
>                       for (trp = vcpu->arch.itrs, i = NITRS; i; i--, trp++) {
>                               if (vcpu_match_tr_entry(trp,address,rid)) {
>-                                      *pteval = trp->page_flags;
>+                                      *pteval = trp->pte.val;
>                                       *itir = trp->itir;
>                                       tr_translate_count++;
>                                       return IA64_NO_FAULT;
>@@ -1354,7 +1354,7 @@
>       // FIXME?: check dtlb for inst accesses too, else bad things happen?
>       trp = &vcpu->arch.dtlb;
>       if (/* is_data && */ vcpu_match_tr_entry(trp,address,rid)) {
>-              if (vcpu->domain==dom0 && !in_tpa) *pteval = trp->page_flags;
>+              if (vcpu->domain==dom0 && !in_tpa) *pteval = trp->pte.val;
>               else *pteval = vcpu->arch.dtlb_pte;
>               *itir = trp->itir;
>               dtlb_translate_count++;
>@@ -1691,24 +1691,27 @@
>
> static inline void vcpu_purge_tr_entry(TR_ENTRY *trp)
> {
>-      trp->p = 0;
>+      trp->pte.val = 0;
> }
>
> static void vcpu_set_tr_entry(TR_ENTRY *trp, UINT64 pte, UINT64 itir, UINT64
>ifa)
> {
>       UINT64 ps;
>+      union pte_flags new_pte;
>
>       trp->itir = itir;
>       trp->rid = VCPU(current,rrs[ifa>>61]) & RR_RID_MASK;
>-      trp->p = 1;
>       ps = trp->ps;
>-      trp->page_flags = pte;
>-      if (trp->pl < 2) trp->pl = 2;
>+      new_pte.val = pte;
>+      if (new_pte.pl < 2) new_pte.pl = 2;
>       trp->vadr = ifa & ~0xfff;
>       if (ps > 12) { // "ignore" relevant low-order bits
>-              trp->ppn &= ~((1UL<<(ps-12))-1);
>+              new_pte.ppn &= ~((1UL<<(ps-12))-1);
>               trp->vadr &= ~((1UL<<ps)-1);
>       }
>+
>+      /* Atomic write.  */
>+      trp->pte.val = new_pte.val;
> }
>
> IA64FAULT vcpu_itr_d(VCPU *vcpu, UINT64 slot, UINT64 pte,
>@@ -1877,12 +1880,15 @@
> struct ptc_ga_args {
>       unsigned long vadr;
>       unsigned long addr_range;
>+      struct vcpu *vcpu;
> };
>
> static void ptc_ga_remote_func (void *varg)
> {
>       struct ptc_ga_args *args = (struct ptc_ga_args *)varg;
>-      vhpt_flush_address (args->vadr, args->addr_range);
>+
>+      if (args->vcpu->processor == smp_processor_id ())
>+              vhpt_flush_address (args->vadr, args->addr_range);
> }
> #endif
>
>@@ -1900,6 +1906,7 @@
>
>       args.vadr = vadr;
>       args.addr_range = addr_range;
>+      args.vcpu = vcpu;
>
>       /* This method is very conservative and should be optimized:
>          - maybe IPI calls can be avoided,
>@@ -1909,22 +1916,40 @@
>       */
>       for_each_vcpu (d, v) {
>               if (v != vcpu) {
>-                      /* Purge tc entry.
>-                         Can we do this directly ?  Well, this is just a
>-                         single atomic write.  */
>-                      vcpu_purge_tr_entry(&PSCBX(v,dtlb));
>-                      vcpu_purge_tr_entry(&PSCBX(v,itlb));
>+                      unsigned int count;
>+                      int proc;
>+
>+                      do {
>+                              /* Wait until the tlb is not used.  */
>+                              while ((count = PSCBX(vcpu, tlb_inuse)) & 1)
>+                                      cpu_relax ();
>+
>+                              /* Purge tc entries.  */
>+                              vcpu_purge_tr_entry(&PSCBX(vcpu,dtlb));
>+                              vcpu_purge_tr_entry(&PSCBX(vcpu,itlb));
>+
>+                              /* Loop until the entry is not used.  */
>+                      } while (count != PSCBX(vcpu, tlb_inuse));
>+
>+                      /* Here, the vcpu tlb is cleared.
>+                         However the mapping is still present in the VHPT
>+                         and the tc.  */
> #ifdef VHPT_GLOBAL
>-                      /* Flush VHPT on remote processors.
>-                         FIXME: invalidate directly the entries? */
>-                      smp_call_function_single
>-                              (v->processor, &ptc_ga_remote_func,
>-                               &args, 0, 1);
>+                      /* Flush VHPT on remote processors.  */
>+                      do {
>+                              proc = v->processor;
>+                              smp_call_function_single
>+                                      (v->processor, &ptc_ga_remote_func,
>+                                       &args, 0, 1);
>+                              /* Try again if VCPU has migrated.  */
>+                      } while (proc != v->processor);
> #endif
>               }
>       }
> #endif
>
>+      /* No needs to watch tlb_inuse for local processor, since it is
>+         executing the ptc.ga.  */
> #ifdef VHPT_GLOBAL
>       vhpt_flush_address(vadr,addr_range);
> #endif
>diff -r ddc279c91502 xen/include/asm-ia64/domain.h
>--- a/xen/include/asm-ia64/domain.h    Fri Mar 31 21:04:16 2006
>+++ b/xen/include/asm-ia64/domain.h    Mon Apr  3 11:20:57 2006
>@@ -44,13 +44,19 @@
>     offsetof(vcpu_info_t, evtchn_upcall_mask))
>
> struct arch_vcpu {
>-#if 1
>       TR_ENTRY itrs[NITRS];
>       TR_ENTRY dtrs[NDTRS];
>       TR_ENTRY itlb;
>       TR_ENTRY dtlb;
>-      unsigned int itr_regions;
>-      unsigned int dtr_regions;
>+
>+    /* Bit is set for regions having an itr/dtr.  */
>+      unsigned short itr_regions;
>+      unsigned short dtr_regions;
>+
>+    /* Bit 0 means the itlb/dtlb is in use.
>+       bits 31-1 are a generation counter.  There is only one writers.  */
>+    volatile unsigned int tlb_inuse;
>+
>       unsigned long itlb_pte;
>       unsigned long dtlb_pte;
>       unsigned long irr[4];
>@@ -61,7 +67,7 @@
>       unsigned long domain_itm;
>       unsigned long domain_itm_last;
>       unsigned long xen_itm;
>-#endif
>+
>     mapped_regs_t *privregs; /* save the state of vcpu */
>     unsigned long metaphysical_rr0;           // from arch_domain (so is 
> pinned)
>     unsigned long metaphysical_rr4;           // from arch_domain (so is 
> pinned)
>diff -r ddc279c91502 xen/include/asm-ia64/tlb.h
>--- a/xen/include/asm-ia64/tlb.h       Fri Mar 31 21:04:16 2006
>+++ b/xen/include/asm-ia64/tlb.h       Mon Apr  3 11:20:57 2006
>@@ -4,23 +4,24 @@
> #define       NITRS   8
> #define NDTRS 8
>
>+union pte_flags {
>+    struct {
>+      unsigned long p    :  1; // 0
>+          unsigned long      :  1; // 1
>+          unsigned long ma   :  3; // 2-4
>+          unsigned long a    :  1; // 5
>+          unsigned long d    :  1; // 6
>+          unsigned long pl   :  2; // 7-8
>+          unsigned long ar   :  3; // 9-11
>+          unsigned long ppn  : 38; // 12-49
>+          unsigned long      :  2; // 50-51
>+          unsigned long ed   :  1; // 52
>+    };
>+    unsigned long val;
>+};
>+
> typedef struct {
>-    union {
>-        struct {
>-            unsigned long p    :  1; // 0
>-            unsigned long      :  1; // 1
>-            unsigned long ma   :  3; // 2-4
>-            unsigned long a    :  1; // 5
>-            unsigned long d    :  1; // 6
>-            unsigned long pl   :  2; // 7-8
>-            unsigned long ar   :  3; // 9-11
>-            unsigned long ppn  : 38; // 12-49
>-            unsigned long      :  2; // 50-51
>-            unsigned long ed   :  1; // 52
>-        };
>-        unsigned long page_flags;
>-    };
>-
>+    volatile union pte_flags pte;
>     union {
>         struct {
>             unsigned long      :  2; // 0-1

_______________________________________________
Xen-ia64-devel mailing list
Xen-ia64-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-ia64-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.