[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH] x86/hvm: don't rely on shared ioreq state for completion handling



Both hvm_io_pending() and hvm_wait_for_io() use the shared (with emulator)
ioreq structure to determined whether there is a pending I/O. The latter will
misbehave if the shared state is driven to STATE_IOREQ_NONE by the emulator,
or when the shared ioreq page is cleared for re-insertion into the guest
P2M when the ioreq server is disabled (STATE_IOREQ_NONE == 0) because it
will terminate its wait without calling hvm_io_assist() to adjust Xen's
internal I/O emulation state. This may then lead to an io completion
handler finding incorrect internal emulation state and calling
domain_crash().

This patch fixes the problem by adding a pending flag to the ioreq server's
per-vcpu structure which cannot be directly manipulated by the emulator
and thus can be used to determine whether an I/O is actually pending for
that vcpu on that ioreq server. If an I/O is pending and the shared state
is seen to go to STATE_IOREQ_NONE then it can be treated as an abnormal
completion of emulation (hence the data placed in the shared structure
is not used) and the internal state is adjusted as for a normal completion.
Thus, when a completion handler subsequently runs, the internal state is as
expected and domain_crash() will not be called.

Signed-off-by: Paul Durrant <paul.durrant@xxxxxxxxxx>
Reported-by: Sander Eikelenboom <linux@xxxxxxxxxxxxxx>
Tested-by: Roger Pau Monnà <roger.pau@xxxxxxxxxx>
Cc: Keir Fraser <keir@xxxxxxx>
Cc: Jan Beulich <jbeulich@xxxxxxxx>
Cc: Andrew Cooper <andrew.cooper3@xxxxxxxxxx>
---
 xen/arch/x86/hvm/hvm.c           |   46 +++++++++++++++++++++++++-------------
 xen/include/asm-x86/hvm/domain.h |    1 +
 2 files changed, 32 insertions(+), 15 deletions(-)

diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c
index ec1d797..4ab9498 100644
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -412,44 +412,57 @@ bool_t hvm_io_pending(struct vcpu *v)
                           &d->arch.hvm_domain.ioreq_server.list,
                           list_entry )
     {
-        ioreq_t *p = get_ioreq(s, v);
+        struct hvm_ioreq_vcpu *sv;
 
-        if ( p->state != STATE_IOREQ_NONE )
-            return 1;
+        list_for_each_entry ( sv,
+                              &s->ioreq_vcpu_list,
+                              list_entry )
+        {
+            if ( sv->vcpu == v && sv->pending )
+                return 1;
+        }
     }
 
     return 0;
 }
 
-static void hvm_io_assist(ioreq_t *p)
+static void hvm_io_assist(struct hvm_ioreq_vcpu *sv, uint64_t data)
 {
-    struct vcpu *curr = current;
-    struct hvm_vcpu_io *vio = &curr->arch.hvm_vcpu.hvm_io;
-
-    p->state = STATE_IOREQ_NONE;
+    struct vcpu *v = sv->vcpu;
+    struct hvm_vcpu_io *vio = &v->arch.hvm_vcpu.hvm_io;
 
     if ( hvm_vcpu_io_need_completion(vio) )
     {
         vio->io_req.state = STATE_IORESP_READY;
-        vio->io_req.data = p->data;
+        vio->io_req.data = data;
     }
     else
         vio->io_req.state = STATE_IOREQ_NONE;
 
-    msix_write_completion(curr);
-    vcpu_end_shutdown_deferral(curr);
+    msix_write_completion(v);
+    vcpu_end_shutdown_deferral(v);
+
+    sv->pending = 0;
 }
 
 static bool_t hvm_wait_for_io(struct hvm_ioreq_vcpu *sv, ioreq_t *p)
 {
-    /* NB. Optimised for common case (p->state == STATE_IOREQ_NONE). */
-    while ( p->state != STATE_IOREQ_NONE )
+    while ( sv->pending )
     {
         switch ( p->state )
         {
+        case STATE_IOREQ_NONE:
+            /*
+             * The only reason we should see this case is when an
+             * emulator is dying and it races with an I/O being
+             * requested.
+             */
+            hvm_io_assist(sv, ~0ul);
+            break;
         case STATE_IORESP_READY: /* IORESP_READY -> NONE */
             rmb(); /* see IORESP_READY /then/ read contents of ioreq */
-            hvm_io_assist(p);
+            p->state = STATE_IOREQ_NONE;
+            hvm_io_assist(sv, p->data);
             break;
         case STATE_IOREQ_READY:  /* IOREQ_{READY,INPROCESS} -> IORESP_READY */
         case STATE_IOREQ_INPROCESS:
@@ -459,6 +472,7 @@ static bool_t hvm_wait_for_io(struct hvm_ioreq_vcpu *sv, 
ioreq_t *p)
             break;
         default:
             gdprintk(XENLOG_ERR, "Weird HVM iorequest state %d.\n", p->state);
+            sv->pending = 0;
             domain_crash(sv->vcpu->domain);
             return 0; /* bail */
         }
@@ -489,7 +503,7 @@ void hvm_do_resume(struct vcpu *v)
                               &s->ioreq_vcpu_list,
                               list_entry )
         {
-            if ( sv->vcpu == v )
+            if ( sv->vcpu == v && sv->pending )
             {
                 if ( !hvm_wait_for_io(sv, get_ioreq(s, v)) )
                     return;
@@ -2743,6 +2757,8 @@ int hvm_send_ioreq(struct hvm_ioreq_server *s, ioreq_t 
*proto_p,
              */
             p->state = STATE_IOREQ_READY;
             notify_via_xen_event_channel(d, port);
+
+            sv->pending = 1;
             return X86EMUL_RETRY;
         }
     }
diff --git a/xen/include/asm-x86/hvm/domain.h b/xen/include/asm-x86/hvm/domain.h
index b612755..12b5e0c 100644
--- a/xen/include/asm-x86/hvm/domain.h
+++ b/xen/include/asm-x86/hvm/domain.h
@@ -46,6 +46,7 @@ struct hvm_ioreq_vcpu {
     struct list_head list_entry;
     struct vcpu      *vcpu;
     evtchn_port_t    ioreq_evtchn;
+    bool_t           pending;
 };
 
 #define NR_IO_RANGE_TYPES (HVMOP_IO_RANGE_PCI + 1)
-- 
1.7.10.4


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.