|
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [Xen-devel] netback Oops then xenwatch stuck in D state
>>> On 14.02.13 at 13:45, "Jan Beulich" <JBeulich@xxxxxxxx> wrote:
>>>> On 14.02.13 at 13:33, "Jan Beulich" <JBeulich@xxxxxxxx> wrote:
>>>>> On 14.02.13 at 13:13, Wei Liu <wei.liu2@xxxxxxxxxx> wrote:
>>> On Thu, 2013-02-14 at 11:48 +0000, Jan Beulich wrote:
>>>> I don't think this patch will fix his problems, which - as described
>>>> yesterday - I'm relatively certain result from the harsh action
>>>> netbk_fatal_tx_err() does.
>>>
>>> Yes, having this one only is not enough. It will need to either disable
>>> the vif timer or check vif->netbk != NULL inside timer callback.
>>
>> I continue to be unclear what timer you refer to.
>>
>> If we keep the carrier-off in fatal_tx_err, then _all_
>> asynchronously callable interfaces need to check whether the
>> vif -> netbk association went away. But, especially in the
>> context of using tasklets instead of kthreads, I meanwhile
>> think that simply setting a flag (along with turning off the IRQ)
>> would be better (and keep the turning off of the carrier the way
>> it had been done before. The flag would basically need checking
>> anywhere we look at the shared ring (which ought to be very
>> few places), and perhaps should also cause xenvif_schedulable()
>> to return false.
>
> Or a "light" version of xenvif_down(), i.e. simply
>
> disable_irq(vif->irq);
> xen_netbk_deschedule_xenvif(vif);
>
> If I'm not mistaken, doing this instead of xenvif_carrier_off()
> could be all that's needed.
Like the below/attached (compile tested only so far).
Jan
netback: fix shutting down the ring if it contains garbage
Using rtnl_lock() in tasklet context is not permitted.
This undoes the part of 1219:5108c6901b30 that split off
xenvif_carrier_off() from netif_disconnect().
Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
--- a/drivers/xen/netback/common.h
+++ b/drivers/xen/netback/common.h
@@ -78,6 +78,8 @@ typedef struct netif_st {
u8 can_queue:1; /* can queue packets for receiver? */
u8 copying_receiver:1; /* copy packets to receiver? */
+ u8 busted:1;
+
/* Allow netif_be_start_xmit() to peek ahead in the rx request ring. */
RING_IDX rx_req_cons_peek;
@@ -195,7 +197,8 @@ int netif_map(netif_t *netif, unsigned l
void netif_xenbus_init(void);
#define netif_schedulable(netif) \
- (netif_running((netif)->dev) && netback_carrier_ok(netif))
+ (likely(!(netif)->busted) && \
+ netif_running((netif)->dev) && netback_carrier_ok(netif))
void netif_schedule_work(netif_t *netif);
void netif_deschedule_work(netif_t *netif);
@@ -204,9 +207,6 @@ int netif_be_start_xmit(struct sk_buff *
struct net_device_stats *netif_be_get_stats(struct net_device *dev);
irqreturn_t netif_be_int(int irq, void *dev_id, struct pt_regs *regs);
-/* Prevent the device from generating any further traffic. */
-void xenvif_carrier_off(netif_t *netif);
-
static inline int netbk_can_queue(struct net_device *dev)
{
netif_t *netif = netdev_priv(dev);
--- a/drivers/xen/netback/interface.c
+++ b/drivers/xen/netback/interface.c
@@ -56,6 +56,7 @@ module_param_named(queue_length, netbk_q
static void __netif_up(netif_t *netif)
{
+ netif->busted = 0;
enable_irq(netif->irq);
netif_schedule_work(netif);
}
@@ -347,23 +348,19 @@ err_rx:
return err;
}
-void xenvif_carrier_off(netif_t *netif)
-{
- rtnl_lock();
- netback_carrier_off(netif);
- netif_carrier_off(netif->dev); /* discard queued packets */
- if (netif_running(netif->dev))
- __netif_down(netif);
- rtnl_unlock();
- netif_put(netif);
-}
-
void netif_disconnect(struct backend_info *be)
{
netif_t *netif = be->netif;
- if (netback_carrier_ok(netif))
- xenvif_carrier_off(netif);
+ if (netback_carrier_ok(netif)) {
+ rtnl_lock();
+ netback_carrier_off(netif);
+ netif_carrier_off(netif->dev); /* discard queued packets */
+ if (netif_running(netif->dev))
+ __netif_down(netif);
+ rtnl_unlock();
+ netif_put(netif);
+ }
atomic_dec(&netif->refcnt);
wait_event(netif->waiting_to_free, atomic_read(&netif->refcnt) == 0);
--- a/drivers/xen/netback/netback.c
+++ b/drivers/xen/netback/netback.c
@@ -845,7 +845,7 @@ void netif_schedule_work(netif_t *netif)
RING_FINAL_CHECK_FOR_REQUESTS(&netif->tx, more_to_do);
#endif
- if (more_to_do) {
+ if (more_to_do && likely(!netif->busted)) {
add_to_net_schedule_list_tail(netif);
maybe_schedule_tx_action();
}
@@ -1024,7 +1024,9 @@ static void netbk_fatal_tx_err(netif_t *
{
printk(KERN_ERR "%s: fatal error; disabling device\n",
netif->dev->name);
- xenvif_carrier_off(netif);
+ netif->busted = 1;
+ disable_irq(netif->irq);
+ netif_deschedule_work(netif);
netif_put(netif);
}
@@ -1292,6 +1294,11 @@ static void net_tx_action(unsigned long
if (!netif)
continue;
+ if (unlikely(netif->busted)) {
+ netif_put(netif);
+ continue;
+ }
+
if (netif->tx.sring->req_prod - netif->tx.req_cons >
NET_TX_RING_SIZE) {
printk(KERN_ERR "%s: Impossible number of requests. "
Attachment:
xen-netback-garbage-ring-fix.patch _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxx http://lists.xen.org/xen-devel
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |