[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] HYBRID: PV in HVM container



On Thu, 17 Nov 2011, Mukesh Rathor wrote:
> Alright, got hybrid with EPT numbers in now from my prototype, it needs
> some perf work.. 

Is HVM a PV on HVM guest or a pure HVM guest (no CONFIG_XEN)?


> Processor, Processes - times in microseconds - smaller is better
> ------------------------------------------------------------------------------
> Host                 OS  Mhz null null      open slct sig  sig  fork exec sh  
>                              call  I/O stat clos TCP  inst hndl proc proc proc
> --------- ------------- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ----
> PV        Linux 2.6.39f 2639 0.65 0.88 2.14 4.59 3.77 0.79 3.62 535. 1294 3308
> Hybrid    Linux 2.6.39f 2639 0.13 0.21 0.89 1.96 3.08 0.24 1.10 529. 1294 3246
> HVM       Linux 2.6.39f 2639 0.12 0.21 0.64 1.76 3.04 0.24 3.37 113. 354. 1324
> Baremetal Linux 2.6.39+ 2649 0.13 0.23 0.74 1.93 3.46 0.28 1.58 127. 386. 1434
> HYB-EPT   Linux 2.6.39f 2639 0.13 0.21 0.68 1.95 3.04 0.25 3.09 145. 452. 1542

good, hybrid == HVM in this test

[...]
 

> Context switching - times in microseconds - smaller is better
> -------------------------------------------------------------------------
> Host                 OS  2p/0K 2p/16K 2p/64K 8p/16K 8p/64K 16p/16K 16p/64K
>                          ctxsw  ctxsw  ctxsw ctxsw  ctxsw   ctxsw   ctxsw
> --------- ------------- ------ ------ ------ ------ ------ ------- -------
> PV        Linux 2.6.39f 5.2800 5.7600 6.3600 6.3200 7.3600 6.69000 7.46000
> Hybrid    Linux 2.6.39f 4.9200 4.9300 5.2200 5.7600 6.9600 6.12000 7.31000
> HVM       Linux 2.6.39f 1.3100 1.2200 1.6200 1.9200 3.2600 2.23000 3.48000
> Baremetal Linux 2.6.39+ 1.5500 1.4100 2.0600 2.2500 3.3900 2.44000 3.38000
> HYB-EPT   Linux 2.6.39f 3.2000 3.6100 4.1700 4.3600 6.1200 4.81000 6.20000

How is it possible that the HYB-EPT numbers here are so much worse than
HVM? Shouldn't they be the same as in the other tests?


> *Local* Communication latencies in microseconds - smaller is better
> ---------------------------------------------------------------------
> Host                 OS 2p/0K  Pipe AF     UDP  RPC/   TCP  RPC/ TCP
>                         ctxsw       UNIX         UDP         TCP conn
> --------- ------------- ----- ----- ---- ----- ----- ----- ----- ----
> PV        Linux 2.6.39f 5.280  16.6 21.3  25.9  33.7  34.7  41.8  87.
> Hybrid    Linux 2.6.39f 4.920  11.2 14.4  19.6  26.1  27.5  32.9  71.
> HVM       Linux 2.6.39f 1.310 4.416 6.15 9.386  14.8  15.8  20.1  45.
> Baremetal Linux 2.6.39+ 1.550 4.625 7.34  14.3  19.8  21.4  26.4  66.
> HYB-EPT   Linux 2.6.39f 3.200 8.669 15.3  17.5  23.5  25.1  30.4  66.
>
> *Local* Communication bandwidths in MB/s - bigger is better
> -----------------------------------------------------------------------------
> Host                OS  Pipe AF    TCP  File   Mmap  Bcopy  Bcopy  Mem   Mem
>                              UNIX      reread reread (libc) (hand) read write
> --------- ------------- ---- ---- ---- ------ ------ ------ ------ ---- -----
> PV        Linux 2.6.39f 1661 2081 1041 3293.3 5528.3 3106.6 2800.0 4472 5633.
> Hybrid    Linux 2.6.39f 1974 2450 1183 3481.5 5529.6 3114.9 2786.6 4470 5672.
> HVM       Linux 2.6.39f 3232 2929 1622 3541.3 5527.5 3077.1 2765.6 4453 5634.
> Baremetal Linux 2.6.39+ 3320 2800 1666 3523.6 5578.9 3147.0 2841.6 4541 5752.
> HYB-EPT   Linux 2.6.39f 2104 2480 1231 3451.5 5503.4 3067.7 2751.0 4438 5636.

same on these two tests




> Attaching the diffs from my prototype. Linux: 2.6.39. Xen 4.0.2.

lin.diff:


> diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
> index e3c6a06..53ceae0 100644
> --- a/arch/x86/xen/enlighten.c
> +++ b/arch/x86/xen/enlighten.c
> @@ -110,7 +110,7 @@ struct shared_info *HYPERVISOR_shared_info = (void 
> *)&xen_dummy_shared_info;
>   *
>   * 0: not available, 1: available
>   */
> -static int have_vcpu_info_placement = 1;
> +static int have_vcpu_info_placement = 0;
>  
>  static void clamp_max_cpus(void)
>  {
> @@ -195,6 +195,13 @@ static void __init xen_banner(void)
>       printk(KERN_INFO "Xen version: %d.%d%s%s\n",
>              version >> 16, version & 0xffff, extra.extraversion,
>              xen_feature(XENFEAT_mmu_pt_update_preserve_ad) ? " 
> (preserve-AD)" : "");
> +
> +        if (xen_hybrid_domain()) {
> +             printk(KERN_INFO "MUK: is MUK HYBRID domain....");
> +             if (xen_feature(XENFEAT_auto_translated_physmap))
> +                     printk(KERN_INFO "with EPT...");
> +             printk(KERN_INFO "\n");
> +        }
>  }
>  
>  static __read_mostly unsigned int cpuid_leaf1_edx_mask = ~0;
> @@ -222,8 +229,10 @@ static void xen_cpuid(unsigned int *ax, unsigned int *bx,
>               maskebx = 0;
>               break;
>       }
> -
> -     asm(XEN_EMULATE_PREFIX "cpuid"
> +        if (xen_hybrid_domain()) {
> +                native_cpuid(ax, bx, cx, dx);
> +        } else
> +             asm(XEN_EMULATE_PREFIX "cpuid"
>               : "=a" (*ax),
>                 "=b" (*bx),
>                 "=c" (*cx),
> @@ -244,6 +253,7 @@ static __init void xen_init_cpuid_mask(void)
>               ~((1 << X86_FEATURE_MCE)  |  /* disable MCE */
>                 (1 << X86_FEATURE_MCA)  |  /* disable MCA */
>                 (1 << X86_FEATURE_MTRR) |  /* disable MTRR */
> +                  (1 << X86_FEATURE_PSE)  |  /* disable 2M pages */
>                 (1 << X86_FEATURE_ACC));   /* thermal monitoring */
>  
>       if (!xen_initial_domain())
> @@ -393,6 +403,10 @@ static void xen_load_gdt(const struct desc_ptr *dtr)
>               make_lowmem_page_readonly(virt);
>       }
>  
> +        if (xen_hybrid_domain()) {
> +                native_load_gdt(dtr);
> +                return;
> +        }
>       if (HYPERVISOR_set_gdt(frames, size / sizeof(struct desc_struct)))
>               BUG();
>  }
> @@ -431,6 +445,10 @@ static __init void xen_load_gdt_boot(const struct 
> desc_ptr *dtr)
>               frames[f] = mfn;
>       }
>  
> +        if (xen_hybrid_domain()) {
> +                native_load_gdt(dtr);
> +                return;
> +        }
>       if (HYPERVISOR_set_gdt(frames, size / sizeof(struct desc_struct)))
>               BUG();
>  }
> @@ -849,9 +867,11 @@ void xen_setup_shared_info(void)
>  
>               HYPERVISOR_shared_info =
>                       (struct shared_info *)fix_to_virt(FIX_PARAVIRT_BOOTMAP);
> -     } else
> +     } else {
>               HYPERVISOR_shared_info =
>                       (struct shared_info *)__va(xen_start_info->shared_info);
> +             return;
> +     }
>  
>  #ifndef CONFIG_SMP
>       /* In UP this is as good a place as any to set up shared info */
> @@ -944,6 +964,71 @@ static const struct pv_init_ops xen_init_ops __initdata 
> = {
>       .patch = xen_patch,
>  };
>  
> +extern void native_iret(void);
> +extern void native_irq_enable_sysexit(void);
> +extern void native_usergs_sysret32(void);
> +extern void native_usergs_sysret64(void);
> +
> +static const struct pv_cpu_ops xen_hybrid_cpu_ops __initdata = {
> +     .cpuid = xen_cpuid,
> +     .set_debugreg = xen_set_debugreg,
> +     .get_debugreg = xen_get_debugreg,
> +
> +     .clts = xen_clts,
> +
> +     .read_cr0 = xen_read_cr0,
> +     .write_cr0 = xen_write_cr0,
> +
> +     .read_cr4 = native_read_cr4,
> +     .read_cr4_safe = native_read_cr4_safe,
> +     .write_cr4 = native_write_cr4,
> +
> +     .wbinvd = native_wbinvd,
> +
> +     .read_msr = native_read_msr_safe,
> +     .write_msr = native_write_msr_safe,
> +     .read_tsc = native_read_tsc,
> +     .read_pmc = native_read_pmc,
> +
> +     .iret = native_iret,
> +     .irq_enable_sysexit = native_irq_enable_sysexit,
> +#ifdef CONFIG_X86_64
> +     .usergs_sysret32 = native_usergs_sysret32,
> +     .usergs_sysret64 = native_usergs_sysret64,
> +#endif
> +
> +     .load_tr_desc = native_load_tr_desc,
> +     .set_ldt = native_set_ldt,
> +     .load_gdt = native_load_gdt,
> +     .load_idt = native_load_idt,
> +     .load_tls = native_load_tls,
> +#ifdef CONFIG_X86_64
> +     .load_gs_index = native_load_gs_index,
> +#endif
> +
> +     .alloc_ldt = paravirt_nop,
> +     .free_ldt = paravirt_nop,
> +
> +     .store_gdt = native_store_gdt,
> +     .store_idt = native_store_idt,
> +     .store_tr = native_store_tr,
> +
> +     .write_ldt_entry = native_write_ldt_entry,
> +     .write_gdt_entry = native_write_gdt_entry,
> +     .write_idt_entry = native_write_idt_entry,
> +     .load_sp0 = native_load_sp0,
> +
> +     .set_iopl_mask = native_set_iopl_mask,
> +     .io_delay = xen_io_delay,
> +
> +     /* Xen takes care of %gs when switching to usermode for us */
> +     .swapgs = native_swapgs,
> +
> +     .start_context_switch = paravirt_start_context_switch,
> +     .end_context_switch = xen_end_context_switch,

why are you using the paravirt version of start_context_switch and
end_context_switch? Is this for the non-autotranslate version?


> +};
> +
>  static const struct pv_cpu_ops xen_cpu_ops __initdata = {
>       .cpuid = xen_cpuid,
>  
> @@ -1010,6 +1095,11 @@ static const struct pv_apic_ops xen_apic_ops 
> __initdata = {
>  #endif
>  };
>  
> +static void __init xen_hybrid_override_autox_cpu_ops(void)
> +{
> +        pv_cpu_ops.cpuid = xen_cpuid;
> +}
> +
>  static void xen_reboot(int reason)
>  {
>       struct sched_shutdown r = { .reason = reason };
> @@ -1071,6 +1161,10 @@ static const struct machine_ops __initdata 
> xen_machine_ops = {
>   */
>  static void __init xen_setup_stackprotector(void)
>  {
> +        if (xen_hybrid_domain()) {
> +                switch_to_new_gdt(0);
> +                return;
> +        }
>       pv_cpu_ops.write_gdt_entry = xen_write_gdt_entry_boot;
>       pv_cpu_ops.load_gdt = xen_load_gdt_boot;
>  
> @@ -1093,14 +1187,22 @@ asmlinkage void __init xen_start_kernel(void)
>  
>       xen_domain_type = XEN_PV_DOMAIN;
>  
> +     xen_setup_features();
>       xen_setup_machphys_mapping();
>  
>       /* Install Xen paravirt ops */
>       pv_info = xen_info;
>       pv_init_ops = xen_init_ops;
> -     pv_cpu_ops = xen_cpu_ops;
>       pv_apic_ops = xen_apic_ops;
>  
> +        if (xen_hybrid_domain()) {
> +             if (xen_feature(XENFEAT_auto_translated_physmap))
> +                        xen_hybrid_override_autox_cpu_ops();
> +                else
> +                     pv_cpu_ops = xen_hybrid_cpu_ops;
> +        } else
> +             pv_cpu_ops = xen_cpu_ops;

[...]

>  void __init xen_init_mmu_ops(void)
>  {
> +     memset(dummy_mapping, 0xff, PAGE_SIZE);
> +     x86_init.paging.pagetable_setup_done = xen_pagetable_setup_done;
> +
> +     if (xen_feature(XENFEAT_auto_translated_physmap))
> +             return;
> +
>       x86_init.mapping.pagetable_reserve = xen_mapping_pagetable_reserve;
>       x86_init.paging.pagetable_setup_start = xen_pagetable_setup_start;
> -     x86_init.paging.pagetable_setup_done = xen_pagetable_setup_done;
> -     pv_mmu_ops = xen_mmu_ops;
> +        pv_mmu_ops = xen_mmu_ops;
>  
> -     memset(dummy_mapping, 0xff, PAGE_SIZE);
> +        if (xen_hybrid_domain())      /* hybrid without EPT, ie, pv paging. 
> */
> +             xen_hyb_override_mmu_ops();
>  }
>  
>  /* Protected by xen_reservation_lock. */

So in theory HYB-EPT is running with native_cpu_ops and native_mmu_ops;
in this case I don't understand why the performances are lower than HVM.

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.