|
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [xen stable-4.15] x86/pv: Clean up _get_page_type()
commit cc74ff882364317e3667c6251cc79bc76fa20ae4
Author: Andrew Cooper <andrew.cooper3@xxxxxxxxxx>
AuthorDate: Thu Jun 9 15:35:00 2022 +0200
Commit: Jan Beulich <jbeulich@xxxxxxxx>
CommitDate: Thu Jun 9 15:35:00 2022 +0200
x86/pv: Clean up _get_page_type()
Various fixes for clarity, ahead of making complicated changes.
* Split the overflow check out of the if/else chain for type handling, as
it's somewhat unrelated.
* Comment the main if/else chain to explain what is going on. Adjust one
ASSERT() and state the bit layout for validate-locked and partial states.
* Correct the comment about TLB flushing, as it's backwards. The problem
case is when writeable mappings are retained to a page becoming
read-only,
as it allows the guest to bypass Xen's safety checks for updates.
* Reduce the scope of 'y'. It is an artefact of the cmpxchg loop and not
valid for use by subsequent logic. Switch to using ACCESS_ONCE() to
treat
all reads as explicitly volatile. The only thing preventing the
validated
wait-loop being infinite is the compiler barrier hidden in cpu_relax().
* Replace one page_get_owner(page) with the already-calculated 'd' already
in
scope.
No functional change.
This is part of XSA-401 / CVE-2022-26362.
Signed-off-by: Andrew Cooper <andrew.cooper3@xxxxxxxxxx>
Signed-off-by: George Dunlap <george.dunlap@xxxxxxxxxxxxx>
Reviewed-by: Jan Beulich <jbeulich@xxxxxxxx>
Reviewed-by: George Dunlap <george.dunlap@xxxxxxxxxx>
master commit: 9186e96b199e4f7e52e033b238f9fe869afb69c7
master date: 2022-06-09 14:20:36 +0200
---
xen/arch/x86/mm.c | 72 ++++++++++++++++++++++++++++++++++++++++++++++---------
1 file changed, 61 insertions(+), 11 deletions(-)
diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
index 13f9dd9db9..f630d9c1ee 100644
--- a/xen/arch/x86/mm.c
+++ b/xen/arch/x86/mm.c
@@ -2894,16 +2894,17 @@ static int _put_page_type(struct page_info *page,
unsigned int flags,
static int _get_page_type(struct page_info *page, unsigned long type,
bool preemptible)
{
- unsigned long nx, x, y = page->u.inuse.type_info;
+ unsigned long nx, x;
int rc = 0;
ASSERT(!(type & ~(PGT_type_mask | PGT_pae_xen_l2)));
ASSERT(!in_irq());
- for ( ; ; )
+ for ( unsigned long y = ACCESS_ONCE(page->u.inuse.type_info); ; )
{
x = y;
nx = x + 1;
+
if ( unlikely((nx & PGT_count_mask) == 0) )
{
gdprintk(XENLOG_WARNING,
@@ -2911,8 +2912,15 @@ static int _get_page_type(struct page_info *page,
unsigned long type,
mfn_x(page_to_mfn(page)));
return -EINVAL;
}
- else if ( unlikely((x & PGT_count_mask) == 0) )
+
+ if ( unlikely((x & PGT_count_mask) == 0) )
{
+ /*
+ * Typeref 0 -> 1.
+ *
+ * Type changes are permitted when the typeref is 0. If the type
+ * actually changes, the page needs re-validating.
+ */
struct domain *d = page_get_owner(page);
if ( d && shadow_mode_enabled(d) )
@@ -2923,8 +2931,8 @@ static int _get_page_type(struct page_info *page,
unsigned long type,
{
/*
* On type change we check to flush stale TLB entries. It is
- * vital that no other CPUs are left with mappings of a frame
- * which is about to become writeable to the guest.
+ * vital that no other CPUs are left with writeable mappings
+ * to a frame which is intending to become pgtable/segdesc.
*/
cpumask_t *mask = this_cpu(scratch_cpumask);
@@ -2936,7 +2944,7 @@ static int _get_page_type(struct page_info *page,
unsigned long type,
if ( unlikely(!cpumask_empty(mask)) &&
/* Shadow mode: track only writable pages. */
- (!shadow_mode_enabled(page_get_owner(page)) ||
+ (!shadow_mode_enabled(d) ||
((nx & PGT_type_mask) == PGT_writable_page)) )
{
perfc_incr(need_flush_tlb_flush);
@@ -2967,7 +2975,14 @@ static int _get_page_type(struct page_info *page,
unsigned long type,
}
else if ( unlikely((x & (PGT_type_mask|PGT_pae_xen_l2)) != type) )
{
- /* Don't log failure if it could be a recursive-mapping attempt. */
+ /*
+ * else, we're trying to take a new reference, of the wrong type.
+ *
+ * This (being able to prohibit use of the wrong type) is what the
+ * typeref system exists for, but skip printing the failure if it
+ * looks like a recursive mapping, as subsequent logic might
+ * ultimately permit the attempt.
+ */
if ( ((x & PGT_type_mask) == PGT_l2_page_table) &&
(type == PGT_l1_page_table) )
return -EINVAL;
@@ -2986,18 +3001,46 @@ static int _get_page_type(struct page_info *page,
unsigned long type,
}
else if ( unlikely(!(x & PGT_validated)) )
{
+ /*
+ * else, the count is non-zero, and we're grabbing the right type;
+ * but the page hasn't been validated yet.
+ *
+ * The page is in one of two states (depending on PGT_partial),
+ * and should have exactly one reference.
+ */
+ ASSERT((x & (PGT_type_mask | PGT_count_mask)) == (type | 1));
+
if ( !(x & PGT_partial) )
{
- /* Someone else is updating validation of this page. Wait... */
+ /*
+ * The page has been left in the "validate locked" state
+ * (i.e. PGT_[type] | 1) which means that a concurrent caller
+ * of _get_page_type() is in the middle of validation.
+ *
+ * Spin waiting for the concurrent user to complete (partial
+ * or fully validated), then restart our attempt to acquire a
+ * type reference.
+ */
do {
if ( preemptible && hypercall_preempt_check() )
return -EINTR;
cpu_relax();
- } while ( (y = page->u.inuse.type_info) == x );
+ } while ( (y = ACCESS_ONCE(page->u.inuse.type_info)) == x );
continue;
}
- /* Type ref count was left at 1 when PGT_partial got set. */
- ASSERT((x & PGT_count_mask) == 1);
+
+ /*
+ * The page has been left in the "partial" state
+ * (i.e., PGT_[type] | PGT_partial | 1).
+ *
+ * Rather than bumping the type count, we need to try to grab the
+ * validation lock; if we succeed, we need to validate the page,
+ * then drop the general ref associated with the PGT_partial bit.
+ *
+ * We grab the validation lock by setting nx to (PGT_[type] | 1)
+ * (i.e., non-zero type count, neither PGT_validated nor
+ * PGT_partial set).
+ */
nx = x & ~PGT_partial;
}
@@ -3046,6 +3089,13 @@ static int _get_page_type(struct page_info *page,
unsigned long type,
}
out:
+ /*
+ * Did we drop the PGT_partial bit when acquiring the typeref? If so,
+ * drop the general reference that went along with it.
+ *
+ * N.B. validate_page() may have have re-set PGT_partial, not reflected in
+ * nx, but will have taken an extra ref when doing so.
+ */
if ( (x & PGT_partial) && !(nx & PGT_partial) )
put_page(page);
--
generated by git-patchbot for /home/xen/git/xen.git#stable-4.15
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |