[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-ia64-devel] [Fwd: [Xen-bugs] [Bug 1392] New: Problems with denormalized floating point numbers on XEN-virtualized Linux/IA64]



Although I also replied by the bugzilla,
I also send the patch to the list for those who doesn't
watch on the bug report.
I hope this patch fixes it, please try this.

IA64: fix emulation of fp emulation of pv domain.

This patch fixes bug reported as
http://bugzilla.xensource.com/bugzilla/show_bug.cgi?id=1392
When pv domain case, the ip can't be accessed by VMM becuase
of tlb cache stuff, the instruction was skipped resulting
in wrong calculation. This patch fixes it.

Signed-off-by: Isaku Yamahata <yamahata@xxxxxxxxxxxxx>

diff --git a/xen/arch/ia64/xen/faults.c b/xen/arch/ia64/xen/faults.c
--- a/xen/arch/ia64/xen/faults.c
+++ b/xen/arch/ia64/xen/faults.c
@@ -318,6 +318,7 @@ handle_fpu_swa(int fp_fault, struct pt_r
        IA64_BUNDLE bundle;
        unsigned long fault_ip;
        fpswa_ret_t ret;
+       unsigned long rc;
 
        fault_ip = regs->cr_iip;
        /*
@@ -328,16 +329,15 @@ handle_fpu_swa(int fp_fault, struct pt_r
        if (!fp_fault && (ia64_psr(regs)->ri == 0))
                fault_ip -= 16;
 
-       if (VMX_DOMAIN(current)) {
-               if (IA64_RETRY == __vmx_get_domain_bundle(fault_ip, &bundle))
-                       return IA64_RETRY;
-       } else
-               bundle = __get_domain_bundle(fault_ip);
-
-       if (!bundle.i64[0] && !bundle.i64[1]) {
-               printk("%s: floating-point bundle at 0x%lx not mapped\n",
+       if (VMX_DOMAIN(current))
+               rc = __vmx_get_domain_bundle(fault_ip, &bundle);
+       else
+               rc = __get_domain_bundle(fault_ip, &bundle);
+       if (rc == IA64_RETRY) {
+               gdprintk(XENLOG_DEBUG,
+                        "%s: floating-point bundle at 0x%lx not mapped\n",
                       __FUNCTION__, fault_ip);
-               return -1;
+               return IA64_RETRY;
        }
 
        ret = fp_emulate(fp_fault, &bundle, &regs->cr_ipsr, &regs->ar_fpsr,
@@ -689,8 +689,10 @@ ia64_handle_reflection(unsigned long ifa
                if (!status)
                        return;
                // fetch code fail
-               if (IA64_RETRY == status)
+               if (IA64_RETRY == status) {
+                       vcpu_decrement_iip(v);
                        return;
+               }
                printk("ia64_handle_reflection: handling FP trap\n");
                vector = IA64_FP_TRAP_VECTOR;
                break;
diff --git a/xen/arch/ia64/xen/vcpu.c b/xen/arch/ia64/xen/vcpu.c
--- a/xen/arch/ia64/xen/vcpu.c
+++ b/xen/arch/ia64/xen/vcpu.c
@@ -1325,6 +1325,16 @@ static TR_ENTRY *vcpu_tr_lookup(VCPU * v
        return NULL;
 }
 
+unsigned long
+__get_domain_bundle(unsigned long iip, IA64_BUNDLE *bundle)
+{
+       *bundle = __get_domain_bundle_asm(iip);
+       if (!bundle->i64[0] && !bundle->i64[1])
+               return IA64_RETRY;
+
+       return 0;
+}
+
 // return value
 // 0: failure
 // 1: success
@@ -1335,6 +1345,7 @@ vcpu_get_domain_bundle(VCPU * vcpu, REGS
        u64 gpip;               // guest pseudo phyiscal ip
        unsigned long vaddr;
        struct page_info *page;
+       unsigned long rc;
 
  again:
 #if 0
@@ -1387,11 +1398,11 @@ vcpu_get_domain_bundle(VCPU * vcpu, REGS
                if (swap_rr0) {
                        set_virtual_rr0();
                }
-               *bundle = __get_domain_bundle(gip);
+               rc = __get_domain_bundle(gip, bundle);
                if (swap_rr0) {
                        set_metaphysical_rr0();
                }
-               if (bundle->i64[0] == 0 && bundle->i64[1] == 0) {
+               if (rc == IA64_RETRY) {
                        dprintk(XENLOG_INFO, "%s gip 0x%lx\n", __func__, gip);
                        return 0;
                }
diff --git a/xen/arch/ia64/xen/xenasm.S b/xen/arch/ia64/xen/xenasm.S
--- a/xen/arch/ia64/xen/xenasm.S
+++ b/xen/arch/ia64/xen/xenasm.S
@@ -389,7 +389,7 @@ END(ia64_prepare_handle_reflection)
 END(ia64_prepare_handle_reflection)
 #endif
 
-GLOBAL_ENTRY(__get_domain_bundle)
+GLOBAL_ENTRY(__get_domain_bundle_asm)
        EX(.failure_in_get_bundle,ld8 r8=[r32],8)
        ;;
        EX(.failure_in_get_bundle,ld8 r9=[r32])
@@ -403,7 +403,7 @@ GLOBAL_ENTRY(__get_domain_bundle)
        ;;
        br.ret.sptk.many rp
        ;;
-END(__get_domain_bundle)
+END(__get_domain_bundle_asm)
 
 /* derived from linux/arch/ia64/hp/sim/boot/boot_head.S */
 GLOBAL_ENTRY(pal_emulator_static)
diff --git a/xen/include/asm-ia64/bundle.h b/xen/include/asm-ia64/bundle.h
--- a/xen/include/asm-ia64/bundle.h
+++ b/xen/include/asm-ia64/bundle.h
@@ -225,7 +225,8 @@ typedef union U_INST64 {
 
 #ifdef __XEN__
 extern unsigned long __vmx_get_domain_bundle(unsigned long iip, IA64_BUNDLE 
*pbundle);
-extern IA64_BUNDLE __get_domain_bundle(unsigned long iip);
+extern IA64_BUNDLE __get_domain_bundle_asm(unsigned long iip);
+extern unsigned long __get_domain_bundle(unsigned long iip, IA64_BUNDLE 
*bundle);
 #endif
 
 #define MASK_41 ((unsigned long)0x1ffffffffff)


On Wed, Dec 03, 2008 at 12:40:59PM -0700, Alex Williamson wrote:
> 
> This looks pretty nasty and it still occurs on latest upstream.  The
> test program in the bugzilla usually shows the problem within a couple
> runs.  Thanks,
> 
> Alex
> 
> 
> -------- Forwarded Message --------
> > From: bugzilla-daemon@xxxxxxxxxxxxxxxxxxx
> > Reply-to: bugs@xxxxxxxxxxxxxxxxxx
> > To: xen-bugs@xxxxxxxxxxxxxxxxxxx
> > Subject: [Xen-bugs] [Bug 1392] New: Problems with denormalized
> > floating point numbers on XEN-virtualized Linux/IA64
> > Date: Wed, 3 Dec 2008 08:34:59 -0800
> > 
> > http://bugzilla.xensource.com/bugzilla/show_bug.cgi?id=1392
> > 
> >            Summary: Problems with denormalized floating point numbers on
> >                     XEN-virtualized Linux/IA64
> >            Product: Xen
> >            Version: 3.0.3
> >           Platform: IA64
> >         OS/Version: Linux
> >             Status: NEW
> >           Severity: normal
> >           Priority: P2
> >          Component: Unspecified
> >         AssignedTo: xen-bugs@xxxxxxxxxxxxxxxxxxx
> >         ReportedBy: volker.simonis@xxxxxxxxx
> >                 CC: volker.simonis@xxxxxxxxx
> > 
> > 
> > Hi,
> > 
> > while we were testing our Java VM on a XEN-virtualized Linux/IA64 we
> > encountered some non-deterministic, but reproducible floating point 
> > failures.
> > After some debugging we could exclude the Java VM as the root cause of the
> > problem and came up with the following small C++ test case, which usually 
> > fails
> > on a virtualized Linux box. We couldn't however reproduce the failure on any
> > other, non-virtualized IA64 Linux.
> > 
> > Attached you can find the test program "fnorms.cpp". Please compile with 
> > 'gcc
> > -g fnorms.cpp'. The program will silently finish if no error occurs, 
> > otherwise
> > it will print one or more lines like: "ERROR: 1.401298e-45 != 1.000000e+00".
> > 
> > During our debugging sessions, we observed that the reason for the failure 
> > is
> > that certain IA64 floating point instructions like 'fnorm.s', 'fmpy.s' or
> > 'fcmp' may fail if they are applied to denormalized floating point values.
> > 
> > If the multiplication ('fmpy.s') fails, the error line shows different 
> > numbers
> > (e.g. "ERROR: 1.401298e-45 != 1.000000e+00"). But thre's also a case where 
> > the
> > compare fails (i.e. the 'fcmp' which was generated for "if (result !=
> > min_float)"). If this happens, the test program "erroneously" reports that 
> > the
> > result of the multiplication and the initial value of "min_float" differ
> > ("ERROR: 1.401298e-45 != 1.401298e-45"), although the two numbers are really
> > equal.
> > 
> > Because we have only observed these failures on a Xen-virtualized IA64-Linux
> > version (in both, dom0 and dom1) our assumption is that there may be a 
> > problem
> > in the implementation of the Floating Point Software Assistance (FPSWA) in 
> > Xen,
> > because all of the above mentioned instructions generate a "floating-point
> > assist fault" if they are applied to denormalized values (as can be seen in
> > "/var/log/messages").  This is only a vague guess however...
> > 
> > Has anybody seen these problems before or are there any ideas why this 
> > happens?
> > 
> > With best regards,
> > Volker
> > 
> > PS: we have tested on:
> > 
> > Xen: 3.0.3-64
> > 
> > dom0: RHEL 5.2
> > ---------------
> > [root@xxxxxx ~]# uname -a
> > Linux xxxxxx.wdf.sap.corp 2.6.18-92.el5xen #1 SMP Tue Apr 29 13:36:07 EDT 
> > 2008
> > ia64 ia64 ia64 GNU/Linux
> > [root@xxxxxx ~]# lsb_release -a
> > LSB Version:   
> > :core-3.1-ia64:core-3.1-noarch:graphics-3.1-ia64:graphics-3.1-noarch
> > Distributor ID: RedHatEnterpriseServer
> > Description:    Red Hat Enterprise Linux Server release 5.2 (Tikanga)
> > Release:        5.2
> > Codename:       Tikanga
> > [root@xxxxxx ~]# rpm -q xen
> > xen-3.0.3-64.el5_2.1
> > [root@xxxxxx ~]# xm info
> > host                   : xxxxxx.wdf.sap.corp
> > release                : 2.6.18-92.el5xen
> > version                : #1 SMP Tue Apr 29 13:36:07 EDT 2008
> > machine                : ia64
> > nr_cpus                : 4
> > nr_nodes               : 1
> > sockets_per_node       : 2
> > cores_per_socket       : 2
> > threads_per_core       : 1
> > cpu_mhz                : 1594
> > hw_caps                :
> > 00000000:00000000:00000000:00000000:00000000:00000000:00000000:00000000:
> > total_memory           : 32722
> > free_memory            : 17277
> > node_to_cpu            : node0:no cpus
> > xen_major              : 3
> > xen_minor              : 1
> > xen_extra              : .2-92.el5
> > xen_caps               : xen-3.0-ia64 xen-3.0-ia64be hvm-3.0-ia64 
> > xen_pagesize           : 16384
> > platform_params        : virt_start=0xe800000000000000
> > xen_changeset          : unavailable
> > cc_compiler            : gcc version 4.1.2 20071124 (Red Hat 4.1.2-41)
> > cc_compile_by          : brewbuilder
> > cc_compile_domain      : redhat.com
> > cc_compile_date        : Tue Apr 29 13:14:31 EDT 2008
> > xend_config_format     : 2
> > 
> > dom1: RHEL 5.2 (2.6.18-53.1.14.el5xen #1 SMP Tue Feb 19 07:35:46 EST 2008 
> > ia64)
> > ---------------
> > 
> > # cat /proc/cpuinfo 
> > processor  : 0
> > vendor     : Xen/ia64
> > arch       : IA-64
> > family     : 32
> > model      : 0
> > revision   : 7
> > archrev    : 0
> > features   : branchlong, 16-byte atomic ops
> > cpu number : 0
> > cpu regs   : 4
> > cpu MHz    : 1594.000895
> > itc MHz    : 399.222286
> > BogoMIPS   : 3006.46
> > siblings   : 1
> > 
> > processor  : 1
> > vendor     : Xen/ia64
> > arch       : IA-64
> > family     : 32
> > model      : 0
> > revision   : 7
> > archrev    : 0
> > features   : branchlong, 16-byte atomic ops
> > cpu number : 0
> > cpu regs   : 4
> > cpu MHz    : 1594.000895
> > itc MHz    : 399.222286
> > BogoMIPS   : 3178.49
> > siblings   : 1
> > 
> > CPUID0: 0x756E6547
> > CPUID1: 0x6C65746E
> > CPUID2: 0x0
> > CPUID3: 0x20000704
> > CPUID4: 0x5
> > 
> > 

-- 
yamahata

_______________________________________________
Xen-ia64-devel mailing list
Xen-ia64-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-ia64-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.