|
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [xen master] x86/pv: Adjust GS handling for FRED mode
commit 9ea7d7f9393c52de36ee9c42b52c235949453a2a
Author: Andrew Cooper <andrew.cooper3@xxxxxxxxxx>
AuthorDate: Mon Aug 11 23:46:01 2025 +0100
Commit: Andrew Cooper <andrew.cooper3@xxxxxxxxxx>
CommitDate: Tue Mar 10 11:57:31 2026 +0000
x86/pv: Adjust GS handling for FRED mode
When FRED is active, hardware automatically swaps GS when changing
privilege,
and the SWAPGS instruction is disallowed.
For native OSes using GS as the thread local pointer this is a massive
improvement on the pre-FRED architecture, but under Xen it makes handling PV
guests more complicated. Specifically, it means that GS_BASE and GS_SHADOW
are the opposite way around in FRED mode, as opposed to IDT mode.
This leads to the following changes:
* In load_segments(), we already load both GSes. Account for FRED in the
SWAP() condition and avoid the path with SWAGS.
* In save_segments(), we need to read GS_SHADOW rather than GS_BASE.
* In toggle_guest_mode(), we need to emulate SWAPGS.
* In {read,write}_msr() which access the live registers, GS_SHADOW and
GS_BASE need swapping.
* In do_set_segment_base(), merge the SEGBASE_GS_{USER,KERNEL} cases and
take FRED into account when choosing which base to update.
SEGBASE_GS_USER_SEL was already an LKGS invocation (decades before FRED)
so under FRED needs to be just a MOV %gs. Simply skip the SWAPGSes.
Signed-off-by: Andrew Cooper <andrew.cooper3@xxxxxxxxxx>
Reviewed-by: Jan Beulich <jbeulich@xxxxxxxx>
---
xen/arch/x86/domain.c | 16 +++++++++++-----
xen/arch/x86/pv/domain.c | 22 ++++++++++++++++++++--
xen/arch/x86/pv/emul-priv-op.c | 26 +++++++++++++++++---------
xen/arch/x86/pv/misc-hypercalls.c | 23 +++++++++++++++--------
4 files changed, 63 insertions(+), 24 deletions(-)
diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c
index e658c2d647..9c1f6ef76d 100644
--- a/xen/arch/x86/domain.c
+++ b/xen/arch/x86/domain.c
@@ -1791,9 +1791,10 @@ static void load_segments(struct vcpu *n)
/*
* Figure out which way around gsb/gss want to be. gsb needs to be
- * the active context, and gss needs to be the inactive context.
+ * the active context, and gss needs to be the inactive context,
+ * unless we're in FRED mode where they're reversed.
*/
- if ( !(n->arch.flags & TF_kernel_mode) )
+ if ( !(n->arch.flags & TF_kernel_mode) ^ opt_fred )
SWAP(gsb, gss);
if ( using_svm() && (n->arch.pv.fs | n->arch.pv.gs) <= 3 )
@@ -1814,7 +1815,9 @@ static void load_segments(struct vcpu *n)
if ( !fs_gs_done && !compat )
{
- if ( read_cr4() & X86_CR4_FSGSBASE )
+ unsigned long cr4 = read_cr4();
+
+ if ( !(cr4 & X86_CR4_FRED) && (cr4 & X86_CR4_FSGSBASE) )
{
__wrgsbase(gss);
__wrfsbase(n->arch.pv.fs_base);
@@ -1931,6 +1934,9 @@ static void load_segments(struct vcpu *n)
* Guests however cannot use SWAPGS, so there is no mechanism to modify the
* inactive GS base behind Xen's back. Therefore, Xen's copy of the inactive
* GS base is still accurate, and doesn't need reading back from hardware.
+ *
+ * Under FRED, hardware automatically swaps GS for us, so SHADOW_GS is the
+ * active GS from the guest's point of view.
*/
static void save_segments(struct vcpu *v)
{
@@ -1946,12 +1952,12 @@ static void save_segments(struct vcpu *v)
if ( read_cr4() & X86_CR4_FSGSBASE )
{
fs_base = __rdfsbase();
- gs_base = __rdgsbase();
+ gs_base = opt_fred ? rdmsr(MSR_SHADOW_GS_BASE) : __rdgsbase();
}
else
{
fs_base = rdmsr(MSR_FS_BASE);
- gs_base = rdmsr(MSR_GS_BASE);
+ gs_base = opt_fred ? rdmsr(MSR_SHADOW_GS_BASE) :
rdmsr(MSR_GS_BASE);
}
v->arch.pv.fs_base = fs_base;
diff --git a/xen/arch/x86/pv/domain.c b/xen/arch/x86/pv/domain.c
index d16583a745..b85abb5ed9 100644
--- a/xen/arch/x86/pv/domain.c
+++ b/xen/arch/x86/pv/domain.c
@@ -14,9 +14,10 @@
#include <asm/cpufeature.h>
#include <asm/fsgsbase.h>
#include <asm/invpcid.h>
-#include <asm/spec_ctrl.h>
#include <asm/pv/domain.h>
#include <asm/shadow.h>
+#include <asm/spec_ctrl.h>
+#include <asm/traps.h>
#ifdef CONFIG_PV32
int8_t __read_mostly opt_pv32 = -1;
@@ -514,11 +515,28 @@ void toggle_guest_mode(struct vcpu *v)
* subsequent context switch won't bother re-reading it.
*/
gs_base = read_gs_base();
+
+ /*
+ * In FRED mode, not only are the two GSes the other way around (i.e. we
+ * want to read GS_SHADOW here), the SWAPGS instruction is disallowed so
+ * we have to emulate it.
+ */
+ if ( opt_fred )
+ {
+ unsigned long gs_shadow = rdmsr(MSR_SHADOW_GS_BASE);
+
+ wrmsrns(MSR_SHADOW_GS_BASE, gs_base);
+ write_gs_base(gs_shadow);
+
+ gs_base = gs_shadow;
+ }
+ else
+ asm volatile ( "swapgs" );
+
if ( v->arch.flags & TF_kernel_mode )
v->arch.pv.gs_base_kernel = gs_base;
else
v->arch.pv.gs_base_user = gs_base;
- asm volatile ( "swapgs" );
_toggle_guest_pt(v);
diff --git a/xen/arch/x86/pv/emul-priv-op.c b/xen/arch/x86/pv/emul-priv-op.c
index 64d47ab677..53676b3021 100644
--- a/xen/arch/x86/pv/emul-priv-op.c
+++ b/xen/arch/x86/pv/emul-priv-op.c
@@ -25,6 +25,7 @@
#include <asm/pv/traps.h>
#include <asm/shared.h>
#include <asm/stubs.h>
+#include <asm/traps.h>
#include <xsm/xsm.h>
@@ -926,7 +927,8 @@ static int cf_check read_msr(
case MSR_GS_BASE:
if ( !cp->extd.lm )
break;
- *val = read_gs_base();
+ /* Under FRED, GS is automatically swapped on privilege change. */
+ *val = opt_fred ? rdmsr(MSR_SHADOW_GS_BASE) : read_gs_base();
return X86EMUL_OKAY;
case MSR_SHADOW_GS_BASE:
@@ -1066,17 +1068,23 @@ static int cf_check write_msr(
if ( !cp->extd.lm || !is_canonical_address(val) )
break;
- if ( reg == MSR_FS_BASE )
- write_fs_base(val);
- else if ( reg == MSR_GS_BASE )
- write_gs_base(val);
- else if ( reg == MSR_SHADOW_GS_BASE )
+ switch ( reg )
{
- write_gs_shadow(val);
+ case MSR_FS_BASE:
+ write_fs_base(val);
+ break;
+
+ case MSR_SHADOW_GS_BASE:
curr->arch.pv.gs_base_user = val;
+ fallthrough;
+ case MSR_GS_BASE:
+ /* Under FRED, GS is automatically swapped on privilege change. */
+ if ( (reg == MSR_GS_BASE) ^ opt_fred )
+ write_gs_base(val);
+ else
+ write_gs_shadow(val);
+ break;
}
- else
- ASSERT_UNREACHABLE();
return X86EMUL_OKAY;
case MSR_EFER:
diff --git a/xen/arch/x86/pv/misc-hypercalls.c
b/xen/arch/x86/pv/misc-hypercalls.c
index 4c2abeb4ad..7e915d86b7 100644
--- a/xen/arch/x86/pv/misc-hypercalls.c
+++ b/xen/arch/x86/pv/misc-hypercalls.c
@@ -11,6 +11,7 @@
#include <asm/debugreg.h>
#include <asm/fsgsbase.h>
+#include <asm/traps.h>
long do_set_debugreg(int reg, unsigned long value)
{
@@ -192,11 +193,13 @@ long do_set_segment_base(unsigned int which, unsigned
long base)
case SEGBASE_GS_USER:
v->arch.pv.gs_base_user = base;
- write_gs_shadow(base);
- break;
-
+ fallthrough;
case SEGBASE_GS_KERNEL:
- write_gs_base(base);
+ /* Under FRED, GS is automatically swapped on privilege change. */
+ if ( (which == SEGBASE_GS_KERNEL) ^ opt_fred )
+ write_gs_base(base);
+ else
+ write_gs_shadow(base);
break;
}
break;
@@ -206,10 +209,13 @@ long do_set_segment_base(unsigned int which, unsigned
long base)
unsigned int sel = (uint16_t)base;
/*
- * We wish to update the user %gs from the GDT/LDT. Currently, the
- * guest kernel's GS_BASE is in context.
+ * We wish to update the user %gs from the GDT/LDT. Currently, we are
+ * in guest kernel context.
+ *
+ * Under IDT, this means updating GS_SHADOW. Under FRED, plain GS.
*/
- asm volatile ( "swapgs" );
+ if ( !opt_fred )
+ asm volatile ( "swapgs" );
if ( sel > 3 )
/* Fix up RPL for non-NUL selectors. */
@@ -247,7 +253,8 @@ long do_set_segment_base(unsigned int which, unsigned long
base)
/* Update the cache of the inactive base, as read from the GDT/LDT. */
v->arch.pv.gs_base_user = read_gs_base();
- asm volatile ( safe_swapgs );
+ if ( !opt_fred )
+ asm volatile ( safe_swapgs );
break;
}
--
generated by git-patchbot for /home/xen/git/xen.git#master
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |