|
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [PATCH v2 2/3] Limit retry in ControllerGetResponse
Would keeping a count of the number of STATUS_TIMEOUTs returned by
ControllerGetResponse() and breaking out of the loop after 50 timeouts be
simpler?
Or is it required to query the time to ensure a more accurate poll?
Owen
________________________________________
From: win-pv-devel <win-pv-devel-bounces@xxxxxxxxxxxxxxxxxxxx> on behalf of Tu
Dinh <ngoc-tu.dinh@xxxxxxxxxx>
Sent: 06 November 2025 2:18 PM
To: win-pv-devel@xxxxxxxxxxxxxxxxxxxx
Cc: Tu Dinh
Subject: [PATCH v2 2/3] Limit retry in ControllerGetResponse
During unplug, NDIS may issue a hash reconfiguration. However, the
backend may disappear before the request has had a chance to complete.
This is observable by e.g. adding a delay at the beginning of
VifReceiverSetHashAlgorithm and unplugging. If this happens, Xenvif will
get stuck waiting forever in ControllerGetResponse, at DISPATCH_LEVEL to
boot.
Limit the total wait time from calling EvtchnWait to 5 seconds, and
return an error code to the caller if the wait has failed.
Signed-off-by: Tu Dinh <ngoc-tu.dinh@xxxxxxxxxx>
---
src/xenvif/controller.c | 36 +++++++++++++++++++++++++++++++-----
1 file changed, 31 insertions(+), 5 deletions(-)
diff --git a/src/xenvif/controller.c b/src/xenvif/controller.c
index e032972..6a091d6 100644
--- a/src/xenvif/controller.c
+++ b/src/xenvif/controller.c
@@ -246,9 +246,11 @@ fail1:
#define TIME_US(_us) ((_us) * 10)
#define TIME_MS(_ms) (TIME_US((_ms) * 1000))
+#define TIME_S(_s) (TIME_MS((_s) * 1000))
#define TIME_RELATIVE(_t) (-(_t))
-#define XENVIF_CONTROLLER_POLL_PERIOD 100 // ms
+#define XENVIF_CONTROLLER_POLL_PERIOD TIME_MS(100)
+#define XENVIF_CONTROLLER_MAX_WAIT TIME_S(5)
_IRQL_requires_(DISPATCH_LEVEL)
static NTSTATUS
@@ -257,10 +259,15 @@ ControllerGetResponse(
OUT PULONG Data OPTIONAL
)
{
- LARGE_INTEGER Timeout;
+ LARGE_INTEGER PollTimeout;
+ LARGE_INTEGER MaxTimeout;
+ LARGE_INTEGER Start;
NTSTATUS status;
- Timeout.QuadPart = TIME_RELATIVE(TIME_MS(XENVIF_CONTROLLER_POLL_PERIOD));
+ PollTimeout.QuadPart = TIME_RELATIVE(XENVIF_CONTROLLER_POLL_PERIOD);
+ MaxTimeout.QuadPart = TIME_RELATIVE(XENVIF_CONTROLLER_MAX_WAIT);
+
+ KeQuerySystemTime(&Start);
for (;;) {
ULONG Count;
@@ -279,9 +286,27 @@ ControllerGetResponse(
&Controller->EvtchnInterface,
Controller->Channel,
Count + 1,
- &Timeout);
- if (status == STATUS_TIMEOUT)
+ &PollTimeout);
+ if (status == STATUS_TIMEOUT) {
+ LARGE_INTEGER Now;
+ LONGLONG Delta;
+
+ KeQuerySystemTime(&Now);
+
+ // Relative timeout
+ Delta = Now.QuadPart - Start.QuadPart;
+ if (Delta > -MaxTimeout.QuadPart)
+ break;
+
__ControllerSend(Controller);
+ }
+ }
+
+ // Use STATUS_TRANSACTION_TIMED_OUT as an error code since STATUS_TIMEOUT
is
+ // a success code.
+ if (Controller->Response.id != Controller->Request.id) {
+ status = STATUS_TRANSACTION_TIMED_OUT;
+ goto done;
}
ASSERT3U(Controller->Response.type, ==, Controller->Request.type);
@@ -311,6 +336,7 @@ ControllerGetResponse(
if (NT_SUCCESS(status) && Data != NULL)
*Data = Controller->Response.data;
+done:
RtlZeroMemory(&Controller->Request,
sizeof (struct xen_netif_ctrl_request));
RtlZeroMemory(&Controller->Response,
--
2.51.0.windows.2
--
Ngoc Tu Dinh | Vates XCP-ng Developer
XCP-ng & Xen Orchestra - Vates solutions
web: https://vates.tech
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |