|
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: Possible bug? DOM-U network stopped working after fatal error reported in DOM0
On Thu, Dec 30, 2021 at 11:12:57PM +0800, G.R. wrote:
> On Thu, Dec 30, 2021 at 3:07 AM Roger Pau Monné <roger.pau@xxxxxxxxxx> wrote:
> >
> > On Wed, Dec 29, 2021 at 11:27:50AM +0100, Roger Pau Monné wrote:
> > > On Wed, Dec 29, 2021 at 05:13:00PM +0800, G.R. wrote:
> > > > >
> > > > > I think this is hitting a KASSERT, could you paste the text printed as
> > > > > part of the panic (not just he backtrace)?
> > > > >
> > > > > Sorry this is taking a bit of time to solve.
> > > > >
> > > > > Thanks!
> > > > >
> > > > Sorry that I didn't make it clear in the first place.
> > > > It is the same cross boundary assertion.
> > >
> > > I see. After looking at the code it seems like sglist will coalesce
> > > contiguous physical ranges without taking page boundaries into
> > > account, which is not suitable for our purpose here. I guess I will
> > > either have to modify sglist, or switch to using bus_dma. The main
> > > problem with using bus_dma is that it will require bigger changes to
> > > netfront I think.
> >
> > I have a crappy patch to use bus_dma. It's not yet ready for upstream
> > but you might want to give it a try to see if it solves the cross page
> > boundary issues.
> >
> I think this version is better.
Thanks for all the testing.
> It fixed the mbuf cross boundary issue and allowed me to boot from one
> disk image successfully.
It's good to know it seems to handle splitting mbufs fragments at page
boundaries correctly.
> But seems like this patch is not stable enough yet and has its own
> issue -- memory is not properly released?
I know. I've been working on improving it this morning and I'm
attaching an updated version below.
Thanks, Roger.
---
diff --git a/sys/dev/xen/netfront/netfront.c b/sys/dev/xen/netfront/netfront.c
index 8dba5a8dc6d5..69528cc39b94 100644
--- a/sys/dev/xen/netfront/netfront.c
+++ b/sys/dev/xen/netfront/netfront.c
@@ -71,6 +71,8 @@ __FBSDID("$FreeBSD$");
#include <xen/interface/io/netif.h>
#include <xen/xenbus/xenbusvar.h>
+#include <machine/bus.h>
+
#include "xenbus_if.h"
/* Features supported by all backends. TSO and LRO can be negotiated */
@@ -199,6 +201,17 @@ struct netfront_txq {
struct taskqueue *tq;
struct task defrtask;
+ bus_dma_segment_t segs[MAX_TX_REQ_FRAGS];
+ struct mbuf_xennet {
+ struct m_tag tag;
+ bus_dma_tag_t dma_tag;
+ bus_dmamap_t dma_map;
+ struct netfront_txq *txq;
+ SLIST_ENTRY(mbuf_xennet) next;
+ u_int count;
+ } xennet_tag[NET_TX_RING_SIZE + 1];
+ SLIST_HEAD(, mbuf_xennet) tags;
+
bool full;
};
@@ -221,6 +234,8 @@ struct netfront_info {
struct ifmedia sc_media;
+ bus_dma_tag_t dma_tag;
+
bool xn_reset;
};
@@ -301,6 +316,42 @@ xn_get_rx_ref(struct netfront_rxq *rxq, RING_IDX ri)
return (ref);
}
+#define MTAG_COOKIE 1218492000
+#define MTAG_XENNET 0
+
+static void mbuf_grab(struct mbuf *m)
+{
+ struct mbuf_xennet *ref;
+
+ ref = (struct mbuf_xennet *)m_tag_locate(m, MTAG_COOKIE,
+ MTAG_XENNET, NULL);
+ KASSERT(ref != NULL, ("Cannot find refcount"));
+ ref->count++;
+}
+
+static void mbuf_release(struct mbuf *m)
+{
+ struct mbuf_xennet *ref;
+
+ ref = (struct mbuf_xennet *)m_tag_locate(m, MTAG_COOKIE,
+ MTAG_XENNET, NULL);
+ KASSERT(ref != NULL, ("Cannot find refcount"));
+ KASSERT(ref->count > 0, ("Invalid reference count"));
+
+ if (--ref->count == 0)
+ m_freem(m);
+}
+
+static void tag_free(struct m_tag *t)
+{
+ struct mbuf_xennet *ref = (struct mbuf_xennet *)t;
+
+ KASSERT(ref->count == 0, ("Free mbuf tag with pending refcnt"));
+ bus_dmamap_sync(ref->dma_tag, ref->dma_map, BUS_DMASYNC_POSTWRITE);
+ bus_dmamap_destroy(ref->dma_tag, ref->dma_map);
+ SLIST_INSERT_HEAD(&ref->txq->tags, ref, next);
+}
+
#define IPRINTK(fmt, args...) \
printf("[XEN] " fmt, ##args)
#ifdef INVARIANTS
@@ -778,11 +829,18 @@ disconnect_txq(struct netfront_txq *txq)
static void
destroy_txq(struct netfront_txq *txq)
{
+ unsigned int i;
free(txq->ring.sring, M_DEVBUF);
buf_ring_free(txq->br, M_DEVBUF);
taskqueue_drain_all(txq->tq);
taskqueue_free(txq->tq);
+
+ for (i = 0; i <= NET_TX_RING_SIZE; i++) {
+ bus_dmamap_destroy(txq->info->dma_tag,
+ txq->xennet_tag[i].dma_map);
+ txq->xennet_tag[i].dma_map = NULL;
+ }
}
static void
@@ -822,10 +880,27 @@ setup_txqs(device_t dev, struct netfront_info *info,
mtx_init(&txq->lock, txq->name, "netfront transmit lock",
MTX_DEF);
+ SLIST_INIT(&txq->tags);
for (i = 0; i <= NET_TX_RING_SIZE; i++) {
txq->mbufs[i] = (void *) ((u_long) i+1);
txq->grant_ref[i] = GRANT_REF_INVALID;
+ txq->xennet_tag[i].txq = txq;
+ txq->xennet_tag[i].dma_tag = info->dma_tag;
+ error = bus_dmamap_create(info->dma_tag, 0,
+ &txq->xennet_tag[i].dma_map);
+ if (error != 0) {
+ device_printf(dev,
+ "failed to allocate dma map\n");
+ goto fail;
+ }
+ m_tag_setup(&txq->xennet_tag[i].tag,
+ MTAG_COOKIE, MTAG_XENNET,
+ sizeof(txq->xennet_tag[i]) -
+ sizeof(txq->xennet_tag[i].tag));
+ txq->xennet_tag[i].tag.m_tag_free = &tag_free;
+ SLIST_INSERT_HEAD(&txq->tags, &txq->xennet_tag[i],
+ next);
}
txq->mbufs[NET_TX_RING_SIZE] = (void *)0;
@@ -1041,7 +1116,7 @@ xn_release_tx_bufs(struct netfront_txq *txq)
if (txq->mbufs_cnt < 0) {
panic("%s: tx_chain_cnt must be >= 0", __func__);
}
- m_free(m);
+ mbuf_release(m);
}
}
@@ -1311,7 +1386,7 @@ xn_txeof(struct netfront_txq *txq)
txq->mbufs[id] = NULL;
add_id_to_freelist(txq->mbufs, id);
txq->mbufs_cnt--;
- m_free(m);
+ mbuf_release(m);
/* Only mark the txq active if we've freed up at least
one slot to try */
ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
}
@@ -1530,46 +1605,51 @@ xn_count_frags(struct mbuf *m)
static int
xn_assemble_tx_request(struct netfront_txq *txq, struct mbuf *m_head)
{
- struct mbuf *m;
struct netfront_info *np = txq->info;
struct ifnet *ifp = np->xn_ifp;
- u_int nfrags;
- int otherend_id;
+ int otherend_id, error, nfrags;
+ bus_dma_segment_t *segs;
+ struct mbuf_xennet *tag;
+ bus_dmamap_t map;
+ unsigned int i;
- /**
- * Defragment the mbuf if necessary.
- */
- nfrags = xn_count_frags(m_head);
+ segs = txq->segs;
+ KASSERT(!SLIST_EMPTY(&txq->tags), ("no tags available"));
+ tag = SLIST_FIRST(&txq->tags);
+ SLIST_REMOVE_HEAD(&txq->tags, next);
+ KASSERT(tag->count == 0, ("tag already in-use"));
+ map = tag->dma_map;
+ error = bus_dmamap_load_mbuf_sg(np->dma_tag, map, m_head, segs,
+ &nfrags, 0);
+ if (error == EFBIG || nfrags > np->maxfrags) {
+ struct mbuf *m;
- /*
- * Check to see whether this request is longer than netback
- * can handle, and try to defrag it.
- */
- /**
- * It is a bit lame, but the netback driver in Linux can't
- * deal with nfrags > MAX_TX_REQ_FRAGS, which is a quirk of
- * the Linux network stack.
- */
- if (nfrags > np->maxfrags) {
+ bus_dmamap_unload(np->dma_tag, map);
m = m_defrag(m_head, M_NOWAIT);
if (!m) {
/*
* Defrag failed, so free the mbuf and
* therefore drop the packet.
*/
+ SLIST_INSERT_HEAD(&txq->tags, tag, next);
m_freem(m_head);
return (EMSGSIZE);
}
m_head = m;
+ error = bus_dmamap_load_mbuf_sg(np->dma_tag, map, m_head, segs,
+ &nfrags, 0);
+ if (error != 0 || nfrags > np->maxfrags) {
+ bus_dmamap_unload(np->dma_tag, map);
+ SLIST_INSERT_HEAD(&txq->tags, tag, next);
+ m_freem(m_head);
+ return (error ?: EFBIG);
+ }
+ } else if (error != 0) {
+ SLIST_INSERT_HEAD(&txq->tags, tag, next);
+ m_freem(m_head);
+ return (error);
}
- /* Determine how many fragments now exist */
- nfrags = xn_count_frags(m_head);
-
- /*
- * Check to see whether the defragmented packet has too many
- * segments for the Linux netback driver.
- */
/**
* The FreeBSD TCP stack, with TSO enabled, can produce a chain
* of mbufs longer than Linux can handle. Make sure we don't
@@ -1583,6 +1663,8 @@ xn_assemble_tx_request(struct netfront_txq *txq, struct
mbuf *m_head)
"won't be able to handle it, dropping\n",
__func__, nfrags, MAX_TX_REQ_FRAGS);
#endif
+ SLIST_INSERT_HEAD(&txq->tags, tag, next);
+ bus_dmamap_unload(np->dma_tag, map);
m_freem(m_head);
return (EMSGSIZE);
}
@@ -1604,9 +1686,9 @@ xn_assemble_tx_request(struct netfront_txq *txq, struct
mbuf *m_head)
* the fragment pointers. Stop when we run out
* of fragments or hit the end of the mbuf chain.
*/
- m = m_head;
otherend_id = xenbus_get_otherend_id(np->xbdev);
- for (m = m_head; m; m = m->m_next) {
+ m_tag_prepend(m_head, &tag->tag);
+ for (i = 0; i < nfrags; i++) {
netif_tx_request_t *tx;
uintptr_t id;
grant_ref_t ref;
@@ -1621,17 +1703,20 @@ xn_assemble_tx_request(struct netfront_txq *txq, struct
mbuf *m_head)
if (txq->mbufs_cnt > NET_TX_RING_SIZE)
panic("%s: tx_chain_cnt must be <= NET_TX_RING_SIZE\n",
__func__);
- txq->mbufs[id] = m;
+ mbuf_grab(m_head);
+ txq->mbufs[id] = m_head;
tx->id = id;
ref = gnttab_claim_grant_reference(&txq->gref_head);
KASSERT((short)ref >= 0, ("Negative ref"));
- mfn = virt_to_mfn(mtod(m, vm_offset_t));
+ mfn = atop(segs[i].ds_addr);
gnttab_grant_foreign_access_ref(ref, otherend_id,
mfn, GNTMAP_readonly);
tx->gref = txq->grant_ref[id] = ref;
- tx->offset = mtod(m, vm_offset_t) & (PAGE_SIZE - 1);
+ tx->offset = segs[i].ds_addr & PAGE_MASK;
+ KASSERT(tx->offset + segs[i].ds_len <= PAGE_SIZE,
+ ("mbuf segment crosses a page boundary"));
tx->flags = 0;
- if (m == m_head) {
+ if (i == 0) {
/*
* The first fragment has the entire packet
* size, subsequent fragments have just the
@@ -1640,7 +1725,7 @@ xn_assemble_tx_request(struct netfront_txq *txq, struct
mbuf *m_head)
* subtracting the sizes of the other
* fragments.
*/
- tx->size = m->m_pkthdr.len;
+ tx->size = m_head->m_pkthdr.len;
/*
* The first fragment contains the checksum flags
@@ -1654,12 +1739,12 @@ xn_assemble_tx_request(struct netfront_txq *txq, struct
mbuf *m_head)
* so we have to test for CSUM_TSO
* explicitly.
*/
- if (m->m_pkthdr.csum_flags
+ if (m_head->m_pkthdr.csum_flags
& (CSUM_DELAY_DATA | CSUM_TSO)) {
tx->flags |= (NETTXF_csum_blank
| NETTXF_data_validated);
}
- if (m->m_pkthdr.csum_flags & CSUM_TSO) {
+ if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
struct netif_extra_info *gso =
(struct netif_extra_info *)
RING_GET_REQUEST(&txq->ring,
@@ -1667,7 +1752,7 @@ xn_assemble_tx_request(struct netfront_txq *txq, struct
mbuf *m_head)
tx->flags |= NETTXF_extra_info;
- gso->u.gso.size = m->m_pkthdr.tso_segsz;
+ gso->u.gso.size = m_head->m_pkthdr.tso_segsz;
gso->u.gso.type =
XEN_NETIF_GSO_TYPE_TCPV4;
gso->u.gso.pad = 0;
@@ -1677,13 +1762,14 @@ xn_assemble_tx_request(struct netfront_txq *txq, struct
mbuf *m_head)
gso->flags = 0;
}
} else {
- tx->size = m->m_len;
+ tx->size = segs[i].ds_len;
}
- if (m->m_next)
+ if (i != nfrags - 1)
tx->flags |= NETTXF_more_data;
txq->ring.req_prod_pvt++;
}
+ bus_dmamap_sync(np->dma_tag, map, BUS_DMASYNC_PREWRITE);
BPF_MTAP(ifp, m_head);
if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
@@ -2244,7 +2330,20 @@ create_netdev(device_t dev)
ether_ifattach(ifp, np->mac);
netfront_carrier_off(np);
- return (0);
+ err = bus_dma_tag_create(
+ bus_get_dma_tag(dev), /* parent */
+ 1, PAGE_SIZE, /* algnmnt, boundary */
+ BUS_SPACE_MAXADDR, /* lowaddr */
+ BUS_SPACE_MAXADDR, /* highaddr */
+ NULL, NULL, /* filter, filterarg */
+ PAGE_SIZE * MAX_TX_REQ_FRAGS, /* max request size */
+ MAX_TX_REQ_FRAGS, /* max segments */
+ PAGE_SIZE, /* maxsegsize */
+ BUS_DMA_ALLOCNOW, /* flags */
+ NULL, NULL, /* lockfunc, lockarg */
+ &np->dma_tag);
+
+ return (err);
error:
KASSERT(err != 0, ("Error path with no error code specified"));
@@ -2277,6 +2376,7 @@ netif_free(struct netfront_info *np)
if_free(np->xn_ifp);
np->xn_ifp = NULL;
ifmedia_removeall(&np->sc_media);
+ bus_dma_tag_destroy(np->dma_tag);
}
static void
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |