|
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [Xen-devel] [PATCH v2] x86/viridian: Add Partition Reference Time enlightenment
On 10.10.14 13:55, Egger, Christoph wrote:
> On 29.09.14 12:28, Paul Durrant wrote:
>> The presence of the partition reference time enlightenment persuades newer
>> versions of Windows to prefer the TSC as their primary time source. Hence,
>> if rdtsc is not being emulated and is invariant then many vmexits (for
>> alternative time sources such as the HPET or reference counter MSR) can
>> be avoided.
>>
>> The implementation is not yet complete as no attempt is made to prevent
>> emulation of rdtsc if the enlightenment is active and guest and host
>> TSC frequencies differ. To do that requires invasive changes in the core
>> x86 time code and hence a lot more testing.
>>
>> This patch avoids the issue by disabling the enlightenment if rdtsc is
>> being emulated, causing Windows to choose another time source. This is
>> safe, but may cause a big variation in performance of guests migrated
>> between hosts of differing TSC frequency. Thus the enlightenment is not
>> enabled in the default set, but may be enabled to improve guest performance
>> where such migrations are not a concern.
>>
>> See section 15.4 of the Microsoft Hypervisor Top Level Functional
>> Specification v4.0a for details.
>>
>> Signed-off-by: Paul Durrant <paul.durrant@xxxxxxxxxx>
>> Cc: Keir Fraser <keir@xxxxxxx>
>> Cc: Jan Beulich <jbeulich@xxxxxxxx>
>> Cc: Ian Campbell <ian.campbell@xxxxxxxxxx>
>> Cc: Ian Jackson <ian.jackson@xxxxxxxxxxxxx>
>> Cc: Stefano Stabellini <stefano.stabellini@xxxxxxxxxxxxx>
>> Cc: Christoph Egger <chegger@xxxxxxxxx>
>
> Reviewed-by: Christoph Egger <chegger@xxxxxxxxx>
I found one issue in update_reference_tsc(). See below.
Christoph
>
> Christoph
>
>> ---
>> v2: - Addressed comments from Jan:
>> - Leave enlightenment out of default set and add a comment explaining
>> why.
>> - De-dup code surrounding adjustment of sequence number on resume.
>> - Remove accidental code deletion.
>>
>> docs/man/xl.cfg.pod.5 | 6 ++
>> tools/libxl/libxl_dom.c | 3 +
>> tools/libxl/libxl_types.idl | 1 +
>> xen/arch/x86/hvm/viridian.c | 113
>> +++++++++++++++++++++++++++++++-
>> xen/include/asm-x86/hvm/viridian.h | 35 ++++++++++
>> xen/include/public/arch-x86/hvm/save.h | 11 ++++
>> xen/include/public/hvm/params.h | 7 +-
>> 7 files changed, 174 insertions(+), 2 deletions(-)
>>
>> diff --git a/docs/man/xl.cfg.pod.5 b/docs/man/xl.cfg.pod.5
>> index 8bba21c..925adbb 100644
>> --- a/docs/man/xl.cfg.pod.5
>> +++ b/docs/man/xl.cfg.pod.5
>> @@ -1163,6 +1163,12 @@ This group incorporates Partition Time Reference
>> Counter MSR. This
>> enlightenment can improve performance of Windows 8 and Windows
>> Server 2012 onwards.
>>
>> +=item B<reference_tsc>
>> +
>> +This set incorporates the Partition Reference TSC MSR. This
>> +enlightenment can improve performance of Windows 7 and Windows
>> +Server 2008 R2 onwards.
>> +
>> =item B<defaults>
>>
>> This is a special value that enables the default set of groups, which
>> diff --git a/tools/libxl/libxl_dom.c b/tools/libxl/libxl_dom.c
>> index bd21841..d04fc0d 100644
>> --- a/tools/libxl/libxl_dom.c
>> +++ b/tools/libxl/libxl_dom.c
>> @@ -262,6 +262,9 @@ static int hvm_set_viridian_features(libxl__gc *gc,
>> uint32_t domid,
>> if (libxl_bitmap_test(&enlightenments,
>> LIBXL_VIRIDIAN_ENLIGHTENMENT_TIME_REF_COUNT))
>> mask |= HVMPV_time_ref_count;
>>
>> + if (libxl_bitmap_test(&enlightenments,
>> LIBXL_VIRIDIAN_ENLIGHTENMENT_REFERENCE_TSC))
>> + mask |= HVMPV_reference_tsc;
>> +
>> if (mask != 0 &&
>> xc_hvm_param_set(CTX->xch,
>> domid,
>> diff --git a/tools/libxl/libxl_types.idl b/tools/libxl/libxl_types.idl
>> index 90d152f..da75a40 100644
>> --- a/tools/libxl/libxl_types.idl
>> +++ b/tools/libxl/libxl_types.idl
>> @@ -183,6 +183,7 @@ libxl_viridian_enlightenment =
>> Enumeration("viridian_enlightenment", [
>> (0, "base"),
>> (1, "freq"),
>> (2, "time_ref_count"),
>> + (3, "reference_tsc"),
>> ])
>>
>> #
>> diff --git a/xen/arch/x86/hvm/viridian.c b/xen/arch/x86/hvm/viridian.c
>> index 6726168..a7d3283 100644
>> --- a/xen/arch/x86/hvm/viridian.c
>> +++ b/xen/arch/x86/hvm/viridian.c
>> @@ -21,6 +21,7 @@
>> #define VIRIDIAN_MSR_HYPERCALL 0x40000001
>> #define VIRIDIAN_MSR_VP_INDEX 0x40000002
>> #define VIRIDIAN_MSR_TIME_REF_COUNT 0x40000020
>> +#define VIRIDIAN_MSR_REFERENCE_TSC 0x40000021
>> #define VIRIDIAN_MSR_TSC_FREQUENCY 0x40000022
>> #define VIRIDIAN_MSR_APIC_FREQUENCY 0x40000023
>> #define VIRIDIAN_MSR_EOI 0x40000070
>> @@ -40,6 +41,7 @@
>> #define CPUID3A_MSR_APIC_ACCESS (1 << 4)
>> #define CPUID3A_MSR_HYPERCALL (1 << 5)
>> #define CPUID3A_MSR_VP_INDEX (1 << 6)
>> +#define CPUID3A_MSR_REFERENCE_TSC (1 << 9)
>> #define CPUID3A_MSR_FREQ (1 << 11)
>>
>> /* Viridian CPUID 4000004, Implementation Recommendations. */
>> @@ -95,6 +97,8 @@ int cpuid_viridian_leaves(unsigned int leaf, unsigned int
>> *eax,
>> *eax |= CPUID3A_MSR_FREQ;
>> if ( viridian_feature_mask(d) & HVMPV_time_ref_count )
>> *eax |= CPUID3A_MSR_TIME_REF_COUNT;
>> + if ( viridian_feature_mask(d) & HVMPV_reference_tsc )
>> + *eax |= CPUID3A_MSR_REFERENCE_TSC;
>> break;
>> case 4:
>> /* Recommended hypercall usage. */
>> @@ -155,6 +159,17 @@ static void dump_apic_assist(const struct vcpu *v)
>> v, aa->fields.enabled, (unsigned long)aa->fields.pfn);
>> }
>>
>> +static void dump_reference_tsc(const struct domain *d)
>> +{
>> + const union viridian_reference_tsc *rt;
>> +
>> + rt = &d->arch.hvm_domain.viridian.reference_tsc;
>> +
>> + printk(XENLOG_G_INFO "d%d: VIRIDIAN REFERENCE_TSC: enabled: %x pfn:
>> %lx\n",
>> + d->domain_id,
>> + rt->fields.enabled, (unsigned long)rt->fields.pfn);
>> +}
>> +
>> static void enable_hypercall_page(struct domain *d)
>> {
>> unsigned long gmfn =
>> d->arch.hvm_domain.viridian.hypercall_gpa.fields.pfn;
>> @@ -224,6 +239,78 @@ static void initialize_apic_assist(struct vcpu *v)
>> put_page_and_type(page);
>> }
>>
>> +static void update_reference_tsc(struct domain *d, bool_t initialize)
>> +{
>> + unsigned long gmfn =
>> d->arch.hvm_domain.viridian.reference_tsc.fields.pfn;
>> + struct page_info *page = get_page_from_gfn(d, gmfn, NULL, P2M_ALLOC);
>> + HV_REFERENCE_TSC_PAGE *p;
>> +
>> + if ( !page || !get_page_type(page, PGT_writable_page) )
>> + {
>> + if ( page )
>> + put_page(page);
>> + gdprintk(XENLOG_WARNING, "Bad GMFN %lx (MFN %lx)\n", gmfn,
>> + page ? page_to_mfn(page) : INVALID_MFN);
>> + return;
>> + }
>> +
>> + p = __map_domain_page(page);
>> +
>> + if ( initialize )
>> + clear_page(p);
>> +
>> + /*
>> + * This enlightenment must be disabled is the host TSC is not invariant.
>> + * However it is also disabled if vtsc is true (which means rdtsc is
>> being
>> + * emulated). This generally happens when guest TSC freq and host TSC
>> freq
>> + * don't match. The TscScale value could be adjusted to cope with this,
>> + * allowing vtsc to be turned off, but support for this is not yet
>> present
>> + * in the hypervisor. Thus is it is possible that migrating a Windows VM
>> + * between hosts of differing TSC frequencies may result in large
>> + * differences in guest performance.
>> + */
>> + if ( !host_tsc_is_safe() || d->arch.vtsc )
>> + {
>> + /*
>> + * The specification states that valid values of TscSequence range
>> + * from 0 to 0xFFFFFFFE. The value 0xFFFFFFFF is used to indicate
>> + * this mechanism is no longer a reliable source of time and that
>> + * the VM should fall back to a different source.
>> + *
>> + * Server 2012 (6.2 kernel) and 2012 R2 (6.3 kernel) actually
>> violate
>> + * the spec. and rely on a value of 0 to indicate that this
>> + * enlightenment should no longer be used. These two kernel
>> + * versions are currently the only ones to make use of this
>> + * enlightenment, so just use 0 here.
>> + */
>> + p->TscSequence = 0;
>> +
>> + printk(XENLOG_G_INFO "d%d: VIRIDIAN REFERENCE_TSC: invalidated\n",
>> + d->domain_id);
>> + return;
>> + }
>> +
>> + /*
>> + * The guest will calculate reference time according to the following
>> + * formula:
>> + *
>> + * ReferenceTime = ((RDTSC() * TscScale) >> 64) + TscOffset
>> + *
>> + * Windows uses a 100ns tick, so we need a scale which is cpu
>> + * ticks per 100ns shifted left by 64.
>> + */
>> + p->TscScale = ((10000ul << 32) / d->arch.tsc_khz) << 32;
>> +
>> + do {
>> + p->TscSequence++;
>> + } while ( p->TscSequence == 0xFFFFFFFF ||
>> + p->TscSequence == 0 ); /* Avoid both 'invalid' values */
This is reading guest memory so a malicious guest can spin writing 0 and
cause a DoS.
Better do here:
p->TscSequence++;
if ( p->TscSequence == 0xFFFFFFFF || p->TscSequence == 0 )
p->TscSequence = 1;
Christoph
>> +
>> + unmap_domain_page(p);
>> +
>> + put_page_and_type(page);
>> +}
>> +
>> int wrmsr_viridian_regs(uint32_t idx, uint64_t val)
>> {
>> struct vcpu *v = current;
>> @@ -282,6 +369,17 @@ int wrmsr_viridian_regs(uint32_t idx, uint64_t val)
>> initialize_apic_assist(v);
>> break;
>>
>> + case VIRIDIAN_MSR_REFERENCE_TSC:
>> + if ( !(viridian_feature_mask(d) & HVMPV_reference_tsc) )
>> + return 0;
>> +
>> + perfc_incr(mshv_wrmsr_tsc_msr);
>> + d->arch.hvm_domain.viridian.reference_tsc.raw = val;
>> + dump_reference_tsc(d);
>> + if ( d->arch.hvm_domain.viridian.reference_tsc.fields.enabled )
>> + update_reference_tsc(d, 1);
>> + break;
>> +
>> default:
>> return 0;
>> }
>> @@ -346,6 +444,14 @@ int rdmsr_viridian_regs(uint32_t idx, uint64_t *val)
>> *val = v->arch.hvm_vcpu.viridian.apic_assist.raw;
>> break;
>>
>> + case VIRIDIAN_MSR_REFERENCE_TSC:
>> + if ( !(viridian_feature_mask(d) & HVMPV_reference_tsc) )
>> + return 0;
>> +
>> + perfc_incr(mshv_rdmsr_tsc_msr);
>> + *val = d->arch.hvm_domain.viridian.reference_tsc.raw;
>> + break;
>> +
>> case VIRIDIAN_MSR_TIME_REF_COUNT:
>> {
>> uint64_t tsc;
>> @@ -452,6 +558,7 @@ static int viridian_save_domain_ctxt(struct domain *d,
>> hvm_domain_context_t *h)
>>
>> ctxt.hypercall_gpa = d->arch.hvm_domain.viridian.hypercall_gpa.raw;
>> ctxt.guest_os_id = d->arch.hvm_domain.viridian.guest_os_id.raw;
>> + ctxt.reference_tsc = d->arch.hvm_domain.viridian.reference_tsc.raw;
>>
>> return (hvm_save_entry(VIRIDIAN_DOMAIN, 0, h, &ctxt) != 0);
>> }
>> @@ -460,11 +567,15 @@ static int viridian_load_domain_ctxt(struct domain *d,
>> hvm_domain_context_t *h)
>> {
>> struct hvm_viridian_domain_context ctxt;
>>
>> - if ( hvm_load_entry(VIRIDIAN_DOMAIN, h, &ctxt) != 0 )
>> + if ( hvm_load_entry_zeroextend(VIRIDIAN_DOMAIN, h, &ctxt) != 0 )
>> return -EINVAL;
>>
>> d->arch.hvm_domain.viridian.hypercall_gpa.raw = ctxt.hypercall_gpa;
>> d->arch.hvm_domain.viridian.guest_os_id.raw = ctxt.guest_os_id;
>> + d->arch.hvm_domain.viridian.reference_tsc.raw = ctxt.reference_tsc;
>> +
>> + if ( d->arch.hvm_domain.viridian.reference_tsc.fields.enabled )
>> + update_reference_tsc(d, 0);
>>
>> return 0;
>> }
>> diff --git a/xen/include/asm-x86/hvm/viridian.h
>> b/xen/include/asm-x86/hvm/viridian.h
>> index 496da33..35a22f5 100644
>> --- a/xen/include/asm-x86/hvm/viridian.h
>> +++ b/xen/include/asm-x86/hvm/viridian.h
>> @@ -48,10 +48,35 @@ union viridian_hypercall_gpa
>> } fields;
>> };
>>
>> +union viridian_reference_tsc
>> +{
>> + uint64_t raw;
>> + struct
>> + {
>> + uint64_t enabled:1;
>> + uint64_t reserved_preserved:11;
>> + uint64_t pfn:48;
>> + } fields;
>> +};
>> +
>> +/*
>> + * Type defintion as in Microsoft Hypervisor Top-Level Functional
>> + * Specification v4.0a, section 15.4.2.
>> + */
>> +typedef struct _HV_REFERENCE_TSC_PAGE
>> +{
>> + uint32_t TscSequence;
>> + uint32_t Reserved1;
>> + uint64_t TscScale;
>> + int64_t TscOffset;
>> + uint64_t Reserved2[509];
>> +} HV_REFERENCE_TSC_PAGE, *PHV_REFERENCE_TSC_PAGE;
>> +
>> struct viridian_domain
>> {
>> union viridian_guest_os_id guest_os_id;
>> union viridian_hypercall_gpa hypercall_gpa;
>> + union viridian_reference_tsc reference_tsc;
>> };
>>
>> int
>> @@ -76,3 +101,13 @@ int
>> viridian_hypercall(struct cpu_user_regs *regs);
>>
>> #endif /* __ASM_X86_HVM_VIRIDIAN_H__ */
>> +
>> +/*
>> + * Local variables:
>> + * mode: C
>> + * c-file-style: "BSD"
>> + * c-basic-offset: 4
>> + * tab-width: 4
>> + * indent-tabs-mode: nil
>> + * End:
>> + */
>> diff --git a/xen/include/public/arch-x86/hvm/save.h
>> b/xen/include/public/arch-x86/hvm/save.h
>> index 16d85a3..006924b 100644
>> --- a/xen/include/public/arch-x86/hvm/save.h
>> +++ b/xen/include/public/arch-x86/hvm/save.h
>> @@ -568,6 +568,7 @@ struct hvm_hw_cpu_xsave {
>> struct hvm_viridian_domain_context {
>> uint64_t hypercall_gpa;
>> uint64_t guest_os_id;
>> + uint64_t reference_tsc;
>> };
>>
>> DECLARE_HVM_SAVE_TYPE(VIRIDIAN_DOMAIN, 15, struct
>> hvm_viridian_domain_context);
>> @@ -616,3 +617,13 @@ struct hvm_msr {
>> #define HVM_SAVE_CODE_MAX 20
>>
>> #endif /* __XEN_PUBLIC_HVM_SAVE_X86_H__ */
>> +
>> +/*
>> + * Local variables:
>> + * mode: C
>> + * c-file-style: "BSD"
>> + * c-basic-offset: 4
>> + * tab-width: 4
>> + * indent-tabs-mode: nil
>> + * End:
>> + */
>> diff --git a/xen/include/public/hvm/params.h
>> b/xen/include/public/hvm/params.h
>> index 3c51072..a2d43bc 100644
>> --- a/xen/include/public/hvm/params.h
>> +++ b/xen/include/public/hvm/params.h
>> @@ -92,10 +92,15 @@
>> #define _HVMPV_time_ref_count 2
>> #define HVMPV_time_ref_count (1 << _HVMPV_time_ref_count)
>>
>> +/* Enable Reference TSC Page (HV_X64_MSR_REFERENCE_TSC) */
>> +#define _HVMPV_reference_tsc 3
>> +#define HVMPV_reference_tsc (1 << _HVMPV_reference_tsc)
>> +
>> #define HVMPV_feature_mask \
>> (HVMPV_base_freq | \
>> HVMPV_no_freq | \
>> - HVMPV_time_ref_count)
>> + HVMPV_time_ref_count | \
>> + HVMPV_reference_tsc)
>>
>> #endif
>>
>>
>
>
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@xxxxxxxxxxxxx
> http://lists.xen.org/xen-devel
>
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |