|
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [PATCH 10/22] x86/traps: Move subarch_percpu_traps_init() into traps-setup.c
On 14/08/2025 11:52 am, Jan Beulich wrote:
> On 14.08.2025 12:17, Andrew Cooper wrote:
>> On 14/08/2025 9:58 am, Jan Beulich wrote:
>>> On 13.08.2025 13:53, Andrew Cooper wrote:
>>>> On 12/08/2025 10:52 am, Jan Beulich wrote:
>>>>> On 11.08.2025 10:17, Andrew Cooper wrote:
>>>>>> On 08/08/2025 9:23 pm, Andrew Cooper wrote:
>>>>>>> ... along with the supporting functions. Switch to Xen coding style,
>>>>>>> and make
>>>>>>> static as there are no external callers.
>>>>>>>
>>>>>>> Rename to legacy_syscall_init() as a more accurate name.
>>>>>>>
>>>>>>> No functional change.
>>>>>>>
>>>>>>> Signed-off-by: Andrew Cooper <andrew.cooper3@xxxxxxxxxx>
>>>>>>> ---
>>>>>>> CC: Jan Beulich <JBeulich@xxxxxxxx>
>>>>>>> CC: Roger Pau Monné <roger.pau@xxxxxxxxxx>
>>>>>>> ---
>>>>>>> xen/arch/x86/include/asm/system.h | 2 -
>>>>>>> xen/arch/x86/traps-setup.c | 97 ++++++++++++++++++++++++++++++-
>>>>>>> xen/arch/x86/x86_64/traps.c | 92 -----------------------------
>>>>>>> 3 files changed, 95 insertions(+), 96 deletions(-)
>>>>>>>
>>>>>>> diff --git a/xen/arch/x86/include/asm/system.h
>>>>>>> b/xen/arch/x86/include/asm/system.h
>>>>>>> index 3cdc56e4ba6d..6c2800d8158d 100644
>>>>>>> --- a/xen/arch/x86/include/asm/system.h
>>>>>>> +++ b/xen/arch/x86/include/asm/system.h
>>>>>>> @@ -256,6 +256,4 @@ static inline int local_irq_is_enabled(void)
>>>>>>> #define BROKEN_ACPI_Sx 0x0001
>>>>>>> #define BROKEN_INIT_AFTER_S1 0x0002
>>>>>>>
>>>>>>> -void subarch_percpu_traps_init(void);
>>>>>>> -
>>>>>>> #endif
>>>>>>> diff --git a/xen/arch/x86/traps-setup.c b/xen/arch/x86/traps-setup.c
>>>>>>> index 13b8fcf0ba51..fbae7072c292 100644
>>>>>>> --- a/xen/arch/x86/traps-setup.c
>>>>>>> +++ b/xen/arch/x86/traps-setup.c
>>>>>>> @@ -2,13 +2,15 @@
>>>>>>> /*
>>>>>>> * Configuration of event handling for all CPUs.
>>>>>>> */
>>>>>>> +#include <xen/domain_page.h>
>>>>>>> #include <xen/init.h>
>>>>>>> #include <xen/param.h>
>>>>>>>
>>>>>>> +#include <asm/endbr.h>
>>>>>>> #include <asm/idt.h>
>>>>>>> #include <asm/msr.h>
>>>>>>> #include <asm/shstk.h>
>>>>>>> -#include <asm/system.h>
>>>>>>> +#include <asm/stubs.h>
>>>>>>> #include <asm/traps.h>
>>>>>>>
>>>>>>> DEFINE_PER_CPU_READ_MOSTLY(idt_entry_t *, idt);
>>>>>>> @@ -19,6 +21,8 @@ static bool __initdata opt_ler;
>>>>>>> boolean_param("ler", opt_ler);
>>>>>>>
>>>>>>> void nocall entry_PF(void);
>>>>>>> +void nocall lstar_enter(void);
>>>>>>> +void nocall cstar_enter(void);
>>>>>>>
>>>>>>> /*
>>>>>>> * Sets up system tables and descriptors for IDT devliery.
>>>>>>> @@ -138,6 +142,95 @@ static void load_system_tables(void)
>>>>>>> BUG_ON(stack_bottom & 15);
>>>>>>> }
>>>>>>>
>>>>>>> +static unsigned int write_stub_trampoline(
>>>>>>> + unsigned char *stub, unsigned long stub_va,
>>>>>>> + unsigned long stack_bottom, unsigned long target_va)
>>>>>>> +{
>>>>>>> + unsigned char *p = stub;
>>>>>>> +
>>>>>>> + if ( cpu_has_xen_ibt )
>>>>>>> + {
>>>>>>> + place_endbr64(p);
>>>>>>> + p += 4;
>>>>>>> + }
>>>>>>> +
>>>>>>> + /* Store guest %rax into %ss slot */
>>>>>>> + /* movabsq %rax, stack_bottom - 8 */
>>>>>>> + *p++ = 0x48;
>>>>>>> + *p++ = 0xa3;
>>>>>>> + *(uint64_t *)p = stack_bottom - 8;
>>>>>>> + p += 8;
>>>>>>> +
>>>>>>> + /* Store guest %rsp in %rax */
>>>>>>> + /* movq %rsp, %rax */
>>>>>>> + *p++ = 0x48;
>>>>>>> + *p++ = 0x89;
>>>>>>> + *p++ = 0xe0;
>>>>>>> +
>>>>>>> + /* Switch to Xen stack */
>>>>>>> + /* movabsq $stack_bottom - 8, %rsp */
>>>>>>> + *p++ = 0x48;
>>>>>>> + *p++ = 0xbc;
>>>>>>> + *(uint64_t *)p = stack_bottom - 8;
>>>>>>> + p += 8;
>>>>>>> +
>>>>>>> + /* jmp target_va */
>>>>>>> + *p++ = 0xe9;
>>>>>>> + *(int32_t *)p = target_va - (stub_va + (p - stub) + 4);
>>>>>>> + p += 4;
>>>>>>> +
>>>>>>> + /* Round up to a multiple of 16 bytes. */
>>>>>>> + return ROUNDUP(p - stub, 16);
>>>>>>> +}
>>>>>>> +
>>>>>>> +static void legacy_syscall_init(void)
>>>>>>> +{
>>>>>>> + unsigned long stack_bottom = get_stack_bottom();
>>>>>>> + unsigned long stub_va = this_cpu(stubs.addr);
>>>>>>> + unsigned char *stub_page;
>>>>>>> + unsigned int offset;
>>>>>>> +
>>>>>>> + /* No PV guests? No need to set up SYSCALL/SYSENTER
>>>>>>> infrastructure. */
>>>>>>> + if ( !IS_ENABLED(CONFIG_PV) )
>>>>>>> + return;
>>>>>>> +
>>>>>>> + stub_page = map_domain_page(_mfn(this_cpu(stubs.mfn)));
>>>>>>> +
>>>>>>> + /*
>>>>>>> + * Trampoline for SYSCALL entry from 64-bit mode. The VT-x HVM
>>>>>>> vcpu
>>>>>>> + * context switch logic relies on the SYSCALL trampoline being at
>>>>>>> the
>>>>>>> + * start of the stubs.
>>>>>>> + */
>>>>>>> + wrmsrl(MSR_LSTAR, stub_va);
>>>>>>> + offset = write_stub_trampoline(stub_page + (stub_va & ~PAGE_MASK),
>>>>>>> + stub_va, stack_bottom,
>>>>>>> + (unsigned long)lstar_enter);
>>>>>>> + stub_va += offset;
>>>>>>> +
>>>>>>> + if ( cpu_has_sep )
>>>>>>> + {
>>>>>>> + /* SYSENTER entry. */
>>>>>>> + wrmsrl(MSR_IA32_SYSENTER_ESP, stack_bottom);
>>>>>>> + wrmsrl(MSR_IA32_SYSENTER_EIP, (unsigned long)sysenter_entry);
>>>>>>> + wrmsr(MSR_IA32_SYSENTER_CS, __HYPERVISOR_CS, 0);
>>>>>>> + }
>>>>>>> +
>>>>>>> + /* Trampoline for SYSCALL entry from compatibility mode. */
>>>>>>> + wrmsrl(MSR_CSTAR, stub_va);
>>>>>>> + offset += write_stub_trampoline(stub_page + (stub_va & ~PAGE_MASK),
>>>>>>> + stub_va, stack_bottom,
>>>>>>> + (unsigned long)cstar_enter);
>>>>>>> +
>>>>>>> + /* Don't consume more than half of the stub space here. */
>>>>>>> + ASSERT(offset <= STUB_BUF_SIZE / 2);
>>>>>>> +
>>>>>>> + unmap_domain_page(stub_page);
>>>>>>> +
>>>>>>> + /* Common SYSCALL parameters. */
>>>>>>> + wrmsrl(MSR_STAR, XEN_MSR_STAR);
>>>>>>> + wrmsrl(MSR_SYSCALL_MASK, XEN_SYSCALL_MASK);
>>>>>>> +}
>>>>>> These want adjusting to use wrmsrns(), similarly to the previous patch.
>>>>>> Fixed locally.
>>>>> Also the one higher in the function, I suppose.
>>>> All of them.
>>>>
>>>> I'm not aware of anywhere were we want serialising behaviour, except for
>>>> ICR which is buggly non-serialising and has workarounds.
>>>>
>>>> But I'm also not sure enough of this to suggest that we make wrmsr() be
>>>> wrmsrns() by default.
>>> I'm pretty sure we don't want this. If nothing else then to avoid code bloat
>>> for MSR writes which are non-serializing even in the original form.
>> Even that's complicated.
>>
>> For FRED, FS/GS_BASE/KERN need changes because the lack of SWAPGS forces
>> MSR accesses even if we do have FSGSBASE active.
>>
>> Writes to these were made non-serialising in Zen2 and later, but are
>> still serialising on Intel. i.e. they need converting to WRMSRNS even
>> though plain WRMSR would be "fine" on all AMD systems (either because
>> it's the only option, or because it's non-serialising).
> Right, such would need converting. But x2APIC MSR accesses, for example,
> should have a need.
For serialising-ness, yes, but they still want to be MSR_IMM when
available, at which point the code bloat price is already paid.
~Andrew
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |