[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH v2] x86/EFI: meet further spec requirements for runtime calls



So far we didn't guarantee 16-byte alignment of the stack: While (so
far) we don't tell the compiler to use smaller alignment, we also don't
guarantee 16-byte alignment when establishing stack pointers for new
vCPU-s. Runtime service functions using SSE instructions may end with
#GP(0) without that.

Note that making use of -mpreferred-stack-boundary=3, as mentioned in
the comment, wouldn't help to reduce the needed alignment: The compiler
would then be free to align the stack of the function with the aligned
object, but would be permitted to place an odd number of 8-byte objects
there, resulting in the callee to still run on an unaligned stack.

(The only working alternative to the approach chosen here would be to
use -mincoming-stack-boundary=3, but that would affect all functions in
runtime.c, not just the ones actually making runtime services calls.
And it would still require the manual alignment logic here to be used
with gcc 5.2 and earlier - not permitting that command line option -,
just that then the alignment amount would become conditional.)

Hence enforce the needed alignment by making efi_rs_enter() return a
suitably aligned structure, which the caller then necessarily has to
store in a suitably aligned local variable, the address of which then
gets passed to efi_rs_leave(). Also (to limit exposure) move the
function declarations to where they belong: They're local to runtime.c,
and shared only with compat.c (by the latter including the former).

Furthermore we should avoid #MF to be raised on the FLDCW we do.

Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
---
v2: Almost complete re-work.

--- a/xen/common/efi/runtime.c
+++ b/xen/common/efi/runtime.c
@@ -8,6 +8,25 @@
 
 DEFINE_XEN_GUEST_HANDLE(CHAR16);
 
+struct efi_rs_state {
+#ifdef CONFIG_X86
+ /*
+  * The way stacks get set up leads to them always being on an 8-byte
+  * boundary not evenly divisible by 16 (see asm-x86/current.h). The EFI ABI,
+  * just like the CPU one, however requires stacks to be 16-byte aligned
+  * before every function call. Since the compiler assumes this (unless
+  * passing it -mpreferred-stack-boundary=3), it wouldn't generate code to
+  * align the stack to 16 bytes even if putting a 16-byte aligned object
+  * there. Hence we need to force larger than 16-byte alignment, even if we
+  * don't strictly need that.
+  */
+ unsigned long __aligned(32) cr3;
+#endif
+};
+
+struct efi_rs_state efi_rs_enter(void);
+void efi_rs_leave(struct efi_rs_state *);
+
 #ifndef COMPAT
 
 /*
@@ -54,17 +73,19 @@ struct efi __read_mostly efi = {
 const struct efi_pci_rom *__read_mostly efi_pci_roms;
 
 #ifndef CONFIG_ARM /* TODO - disabled until implemented on ARM */
-unsigned long efi_rs_enter(void)
+
+struct efi_rs_state efi_rs_enter(void)
 {
     static const u16 fcw = FCW_DEFAULT;
     static const u32 mxcsr = MXCSR_DEFAULT;
-    unsigned long cr3 = read_cr3();
+    struct efi_rs_state state = { .cr3 = 0 };
 
     if ( !efi_l4_pgtable )
-        return 0;
+        return state;
 
+    state.cr3 = read_cr3();
     save_fpu_enable();
-    asm volatile ( "fldcw %0" :: "m" (fcw) );
+    asm volatile ( "fnclex; fldcw %0" :: "m" (fcw) );
     asm volatile ( "ldmxcsr %0" :: "m" (mxcsr) );
 
     spin_lock(&efi_rs_lock);
@@ -87,14 +108,14 @@ unsigned long efi_rs_enter(void)
 
     write_cr3(virt_to_maddr(efi_l4_pgtable));
 
-    return cr3;
+    return state;
 }
 
-void efi_rs_leave(unsigned long cr3)
+void efi_rs_leave(struct efi_rs_state *state)
 {
-    if ( !cr3 )
+    if ( !state->cr3 )
         return;
-    write_cr3(cr3);
+    write_cr3(state->cr3);
     if ( is_pv_vcpu(current) && !is_idle_vcpu(current) )
     {
         struct desc_ptr gdt_desc = {
@@ -121,14 +142,15 @@ unsigned long efi_get_time(void)
 {
     EFI_TIME time;
     EFI_STATUS status;
-    unsigned long cr3 = efi_rs_enter(), flags;
+    struct efi_rs_state state = efi_rs_enter();
+    unsigned long flags;
 
-    if ( !cr3 )
+    if ( !state.cr3 )
         return 0;
     spin_lock_irqsave(&rtc_lock, flags);
     status = efi_rs->GetTime(&time, NULL);
     spin_unlock_irqrestore(&rtc_lock, flags);
-    efi_rs_leave(cr3);
+    efi_rs_leave(&state);
 
     if ( EFI_ERROR(status) )
         return 0;
@@ -140,12 +162,12 @@ unsigned long efi_get_time(void)
 void efi_halt_system(void)
 {
     EFI_STATUS status;
-    unsigned long cr3 = efi_rs_enter();
+    struct efi_rs_state state = efi_rs_enter();
 
-    if ( !cr3 )
+    if ( !state.cr3 )
         return;
     status = efi_rs->ResetSystem(EfiResetShutdown, EFI_SUCCESS, 0, NULL);
-    efi_rs_leave(cr3);
+    efi_rs_leave(&state);
 
     printk(XENLOG_WARNING "EFI: could not halt system (%#lx)\n", status);
 }
@@ -153,13 +175,13 @@ void efi_halt_system(void)
 void efi_reset_system(bool_t warm)
 {
     EFI_STATUS status;
-    unsigned long cr3 = efi_rs_enter();
+    struct efi_rs_state state = efi_rs_enter();
 
-    if ( !cr3 )
+    if ( !state.cr3 )
         return;
     status = efi_rs->ResetSystem(warm ? EfiResetWarm : EfiResetCold,
                                  EFI_SUCCESS, 0, NULL);
-    efi_rs_leave(cr3);
+    efi_rs_leave(&state);
 
     printk(XENLOG_WARNING "EFI: could not reset system (%#lx)\n", status);
 }
@@ -179,12 +201,12 @@ int efi_get_info(uint32_t idx, union xen
         break;
     case XEN_FW_EFI_RT_VERSION:
     {
-        unsigned long cr3 = efi_rs_enter();
+        struct efi_rs_state state = efi_rs_enter();
 
-        if ( !cr3 )
+        if ( !state.cr3 )
             return -EOPNOTSUPP;
         info->version = efi_rs->Hdr.Revision;
-        efi_rs_leave(cr3);
+        efi_rs_leave(&state);
         break;
     }
     case XEN_FW_EFI_CONFIG_TABLE:
@@ -302,7 +324,8 @@ static inline EFI_GUID *cast_guid(struct
 
 int efi_runtime_call(struct xenpf_efi_runtime_call *op)
 {
-    unsigned long cr3, flags;
+    struct efi_rs_state state;
+    unsigned long flags;
     EFI_STATUS status = EFI_NOT_STARTED;
     int rc = 0;
 
@@ -315,13 +338,13 @@ int efi_runtime_call(struct xenpf_efi_ru
         if ( op->misc )
             return -EINVAL;
 
-        cr3 = efi_rs_enter();
-        if ( !cr3 )
+        state = efi_rs_enter();
+        if ( !state.cr3 )
             return -EOPNOTSUPP;
         spin_lock_irqsave(&rtc_lock, flags);
         status = efi_rs->GetTime(cast_time(&op->u.get_time.time), &caps);
         spin_unlock_irqrestore(&rtc_lock, flags);
-        efi_rs_leave(cr3);
+        efi_rs_leave(&state);
 
         if ( !EFI_ERROR(status) )
         {
@@ -337,13 +360,13 @@ int efi_runtime_call(struct xenpf_efi_ru
         if ( op->misc )
             return -EINVAL;
 
-        cr3 = efi_rs_enter();
-        if ( !cr3 )
+        state = efi_rs_enter();
+        if ( !state.cr3 )
             return -EOPNOTSUPP;
         spin_lock_irqsave(&rtc_lock, flags);
         status = efi_rs->SetTime(cast_time(&op->u.set_time));
         spin_unlock_irqrestore(&rtc_lock, flags);
-        efi_rs_leave(cr3);
+        efi_rs_leave(&state);
         break;
 
     case XEN_EFI_get_wakeup_time:
@@ -353,14 +376,14 @@ int efi_runtime_call(struct xenpf_efi_ru
         if ( op->misc )
             return -EINVAL;
 
-        cr3 = efi_rs_enter();
-        if ( !cr3 )
+        state = efi_rs_enter();
+        if ( !state.cr3 )
             return -EOPNOTSUPP;
         spin_lock_irqsave(&rtc_lock, flags);
         status = efi_rs->GetWakeupTime(&enabled, &pending,
                                        cast_time(&op->u.get_wakeup_time));
         spin_unlock_irqrestore(&rtc_lock, flags);
-        efi_rs_leave(cr3);
+        efi_rs_leave(&state);
 
         if ( !EFI_ERROR(status) )
         {
@@ -377,8 +400,8 @@ int efi_runtime_call(struct xenpf_efi_ru
                           XEN_EFI_SET_WAKEUP_TIME_ENABLE_ONLY) )
             return -EINVAL;
 
-        cr3 = efi_rs_enter();
-        if ( !cr3 )
+        state = efi_rs_enter();
+        if ( !state.cr3 )
             return -EOPNOTSUPP;
         spin_lock_irqsave(&rtc_lock, flags);
         status = efi_rs->SetWakeupTime(!!(op->misc &
@@ -388,7 +411,7 @@ int efi_runtime_call(struct xenpf_efi_ru
                                        NULL :
                                        cast_time(&op->u.set_wakeup_time));
         spin_unlock_irqrestore(&rtc_lock, flags);
-        efi_rs_leave(cr3);
+        efi_rs_leave(&state);
 
         op->misc = 0;
         break;
@@ -397,12 +420,12 @@ int efi_runtime_call(struct xenpf_efi_ru
         if ( op->misc )
             return -EINVAL;
 
-        cr3 = efi_rs_enter();
-        if ( cr3 )
+        state = efi_rs_enter();
+        if ( state.cr3 )
             status = efi_rs->GetNextHighMonotonicCount(&op->misc);
         else
             rc = -EOPNOTSUPP;
-        efi_rs_leave(cr3);
+        efi_rs_leave(&state);
         break;
 
     case XEN_EFI_get_variable:
@@ -436,13 +459,13 @@ int efi_runtime_call(struct xenpf_efi_ru
         else
             data = NULL;
 
-        cr3 = efi_rs_enter();
-        if ( cr3 )
+        state = efi_rs_enter();
+        if ( state.cr3 )
         {
             status = efi_rs->GetVariable(
                 name, cast_guid(&op->u.get_variable.vendor_guid),
                 &op->misc, &size, data);
-            efi_rs_leave(cr3);
+            efi_rs_leave(&state);
 
             if ( !EFI_ERROR(status) &&
                  copy_to_guest(op->u.get_variable.data, data, size) )
@@ -479,14 +502,14 @@ int efi_runtime_call(struct xenpf_efi_ru
             rc = -EFAULT;
         else
         {
-            cr3 = efi_rs_enter();
-            if ( cr3 )
+            state = efi_rs_enter();
+            if ( state.cr3 )
                 status = efi_rs->SetVariable(
                     name, cast_guid(&op->u.set_variable.vendor_guid),
                     op->misc, op->u.set_variable.size, data);
             else
                 rc = -EOPNOTSUPP;
-            efi_rs_leave(cr3);
+            efi_rs_leave(&state);
         }
 
         xfree(data);
@@ -516,13 +539,13 @@ int efi_runtime_call(struct xenpf_efi_ru
             return -EFAULT;
         }
 
-        cr3 = efi_rs_enter();
-        if ( cr3 )
+        state = efi_rs_enter();
+        if ( state.cr3 )
         {
             status = efi_rs->GetNextVariableName(
                 &size, name.str,
                 cast_guid(&op->u.get_next_variable_name.vendor_guid));
-            efi_rs_leave(cr3);
+            efi_rs_leave(&state);
 
             /*
              * Copy the variable name if necessary. The caller provided size
@@ -571,10 +594,10 @@ int efi_runtime_call(struct xenpf_efi_ru
             break;
         }
 
-        cr3 = efi_rs_enter();
-        if ( !cr3 || (efi_rs->Hdr.Revision >> 16) < 2 )
+        state = efi_rs_enter();
+        if ( !state.cr3 || (efi_rs->Hdr.Revision >> 16) < 2 )
         {
-            efi_rs_leave(cr3);
+            efi_rs_leave(&state);
             return -EOPNOTSUPP;
         }
         status = efi_rs->QueryVariableInfo(
@@ -582,7 +605,7 @@ int efi_runtime_call(struct xenpf_efi_ru
             &op->u.query_variable_info.max_store_size,
             &op->u.query_variable_info.remain_store_size,
             &op->u.query_variable_info.max_size);
-        efi_rs_leave(cr3);
+        efi_rs_leave(&state);
         break;
 
     case XEN_EFI_query_capsule_capabilities:
@@ -590,13 +613,13 @@ int efi_runtime_call(struct xenpf_efi_ru
         if ( op->misc )
             return -EINVAL;
 
-        cr3 = efi_rs_enter();
-        if ( !cr3 || (efi_rs->Hdr.Revision >> 16) < 2 )
+        state = efi_rs_enter();
+        if ( !state.cr3 || (efi_rs->Hdr.Revision >> 16) < 2 )
         {
-            efi_rs_leave(cr3);
+            efi_rs_leave(&state);
             return -EOPNOTSUPP;
         }
-        efi_rs_leave(cr3);
+        efi_rs_leave(&state);
         /* XXX fall through for now */
     default:
         return -ENOSYS;
--- a/xen/common/efi/efi.h
+++ b/xen/common/efi/efi.h
@@ -36,6 +36,3 @@ extern const struct efi_pci_rom *efi_pci
 
 extern UINT64 efi_boot_max_var_store_size, efi_boot_remain_var_store_size,
               efi_boot_max_var_size;
-
-unsigned long efi_rs_enter(void);
-void efi_rs_leave(unsigned long);


Attachment: x86-EFI-rs-state.patch
Description: Text document

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
https://lists.xen.org/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.