[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[PATCH v2 2/3] Limit retry in ControllerGetResponse



During unplug, NDIS may issue a hash reconfiguration. However, the
backend may disappear before the request has had a chance to complete.
This is observable by e.g. adding a delay at the beginning of
VifReceiverSetHashAlgorithm and unplugging. If this happens, Xenvif will
get stuck waiting forever in ControllerGetResponse, at DISPATCH_LEVEL to
boot.

Limit the total wait time from calling EvtchnWait to 5 seconds, and
return an error code to the caller if the wait has failed.

Signed-off-by: Tu Dinh <ngoc-tu.dinh@xxxxxxxxxx>
---
 src/xenvif/controller.c | 36 +++++++++++++++++++++++++++++++-----
 1 file changed, 31 insertions(+), 5 deletions(-)

diff --git a/src/xenvif/controller.c b/src/xenvif/controller.c
index e032972..6a091d6 100644
--- a/src/xenvif/controller.c
+++ b/src/xenvif/controller.c
@@ -246,9 +246,11 @@ fail1:
 
 #define TIME_US(_us)        ((_us) * 10)
 #define TIME_MS(_ms)        (TIME_US((_ms) * 1000))
+#define TIME_S(_s)          (TIME_MS((_s) * 1000))
 #define TIME_RELATIVE(_t)   (-(_t))
 
-#define XENVIF_CONTROLLER_POLL_PERIOD 100 // ms
+#define XENVIF_CONTROLLER_POLL_PERIOD   TIME_MS(100)
+#define XENVIF_CONTROLLER_MAX_WAIT      TIME_S(5)
 
 _IRQL_requires_(DISPATCH_LEVEL)
 static NTSTATUS
@@ -257,10 +259,15 @@ ControllerGetResponse(
     OUT PULONG                      Data OPTIONAL
     )
 {
-    LARGE_INTEGER                   Timeout;
+    LARGE_INTEGER                   PollTimeout;
+    LARGE_INTEGER                   MaxTimeout;
+    LARGE_INTEGER                   Start;
     NTSTATUS                        status;
 
-    Timeout.QuadPart = TIME_RELATIVE(TIME_MS(XENVIF_CONTROLLER_POLL_PERIOD));
+    PollTimeout.QuadPart = TIME_RELATIVE(XENVIF_CONTROLLER_POLL_PERIOD);
+    MaxTimeout.QuadPart = TIME_RELATIVE(XENVIF_CONTROLLER_MAX_WAIT);
+
+    KeQuerySystemTime(&Start);
 
     for (;;) {
         ULONG   Count;
@@ -279,9 +286,27 @@ ControllerGetResponse(
                                &Controller->EvtchnInterface,
                                Controller->Channel,
                                Count + 1,
-                               &Timeout);
-        if (status == STATUS_TIMEOUT)
+                               &PollTimeout);
+        if (status == STATUS_TIMEOUT) {
+            LARGE_INTEGER       Now;
+            LONGLONG            Delta;
+
+            KeQuerySystemTime(&Now);
+
+            // Relative timeout
+            Delta = Now.QuadPart - Start.QuadPart;
+            if (Delta > -MaxTimeout.QuadPart)
+                break;
+
             __ControllerSend(Controller);
+        }
+    }
+
+    // Use STATUS_TRANSACTION_TIMED_OUT as an error code since STATUS_TIMEOUT 
is
+    // a success code.
+    if (Controller->Response.id != Controller->Request.id) {
+        status = STATUS_TRANSACTION_TIMED_OUT;
+        goto done;
     }
 
     ASSERT3U(Controller->Response.type, ==, Controller->Request.type);
@@ -311,6 +336,7 @@ ControllerGetResponse(
     if (NT_SUCCESS(status) && Data != NULL)
         *Data = Controller->Response.data;
 
+done:
     RtlZeroMemory(&Controller->Request,
                   sizeof (struct xen_netif_ctrl_request));
     RtlZeroMemory(&Controller->Response,
-- 
2.51.0.windows.2



--
Ngoc Tu Dinh | Vates XCP-ng Developer

XCP-ng & Xen Orchestra - Vates solutions

web: https://vates.tech




 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.