|
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: Possible bug? DOM-U network stopped working after fatal error reported in DOM0
On Sun, Dec 26, 2021 at 02:06:55AM +0800, G.R. wrote:
> > > Thanks. I've raised this on freensd-net for advice [0]. IMO netfront
> > > shouldn't receive an mbuf that crosses a page boundary, but if that's
> > > indeed a legit mbuf I will figure out the best way to handle it.
> > >
> > > I have a clumsy patch (below) that might solve this, if you want to
> > > give it a try.
> >
> > Applied the patch and it worked like a charm!
> > Thank you so much for your quick help!
> > Wish you a wonderful holiday!
>
> I may have said too quickly...
> With the patch I can attach the iscsi disk and neither the dom0 nor
> the NAS domU complains this time.
> But when I attempt to mount the attached disk it reports I/O errors randomly.
> By randomly I mean different disks behave differently...
> I don't see any error logs from kernels this time.
> (most of the iscsi disks are NTFS FS and mounted through the user mode
> fuse library)
> But since I have a local backup copy of the image, I can confirm that
> mounting that backup image does not result in any I/O error.
> Looks like something is still broken here...
Indeed. That patch was likely too simple, and didn't properly handle
the split of mbuf data buffers.
I have another version based on using sglist, which I think it's also
a worthwhile change for netfront. Can you please give it a try? I've
done a very simple test and seems fine, but you certainly have more
interesting cases.
You will have to apply it on top of a clean tree, without any of the
other patches applied.
Thanks, Roger.
---
diff --git a/sys/dev/xen/netfront/netfront.c b/sys/dev/xen/netfront/netfront.c
index 8dba5a8dc6d5..37ea7b1fa059 100644
--- a/sys/dev/xen/netfront/netfront.c
+++ b/sys/dev/xen/netfront/netfront.c
@@ -33,6 +33,8 @@ __FBSDID("$FreeBSD$");
#include "opt_inet.h"
#include "opt_inet6.h"
+#include <sys/types.h>
+
#include <sys/param.h>
#include <sys/sockio.h>
#include <sys/limits.h>
@@ -40,6 +42,7 @@ __FBSDID("$FreeBSD$");
#include <sys/malloc.h>
#include <sys/module.h>
#include <sys/kernel.h>
+#include <sys/sglist.h>
#include <sys/socket.h>
#include <sys/sysctl.h>
#include <sys/taskqueue.h>
@@ -199,6 +202,12 @@ struct netfront_txq {
struct taskqueue *tq;
struct task defrtask;
+ struct sglist *segments;
+ struct mbuf_refcount {
+ struct m_tag tag;
+ u_int count;
+ } refcount_tag[NET_TX_RING_SIZE + 1];
+
bool full;
};
@@ -301,6 +310,38 @@ xn_get_rx_ref(struct netfront_rxq *rxq, RING_IDX ri)
return (ref);
}
+#define MTAG_REFCOUNT 0
+
+static void mbuf_grab(uint32_t cookie, struct mbuf *m)
+{
+ struct mbuf_refcount *ref;
+
+ ref = (struct mbuf_refcount *)m_tag_locate(m, cookie, MTAG_REFCOUNT,
+ NULL);
+ KASSERT(ref != NULL, ("Cannot find refcount"));
+ ref->count++;
+}
+
+static void mbuf_release(uint32_t cookie, struct mbuf *m)
+{
+ struct mbuf_refcount *ref;
+
+ ref = (struct mbuf_refcount *)m_tag_locate(m, cookie, MTAG_REFCOUNT,
+ NULL);
+ KASSERT(ref != NULL, ("Cannot find refcount"));
+ KASSERT(ref->count > 0, ("Invalid reference count"));
+
+ if(--ref->count == 0)
+ m_freem(m);
+}
+
+static void tag_free(struct m_tag *t)
+{
+ struct mbuf_refcount *ref = (struct mbuf_refcount *)t;
+
+ KASSERT(ref->count == 0, ("Free mbuf tag with pending refcnt"));
+}
+
#define IPRINTK(fmt, args...) \
printf("[XEN] " fmt, ##args)
#ifdef INVARIANTS
@@ -778,7 +819,7 @@ disconnect_txq(struct netfront_txq *txq)
static void
destroy_txq(struct netfront_txq *txq)
{
-
+ sglist_free(txq->segments);
free(txq->ring.sring, M_DEVBUF);
buf_ring_free(txq->br, M_DEVBUF);
taskqueue_drain_all(txq->tq);
@@ -826,6 +867,11 @@ setup_txqs(device_t dev, struct netfront_info *info,
for (i = 0; i <= NET_TX_RING_SIZE; i++) {
txq->mbufs[i] = (void *) ((u_long) i+1);
txq->grant_ref[i] = GRANT_REF_INVALID;
+ m_tag_setup(&txq->refcount_tag[i].tag,
+ (unsigned long)txq, MTAG_REFCOUNT,
+ sizeof(txq->refcount_tag[i]) -
+ sizeof(txq->refcount_tag[i].tag));
+ txq->refcount_tag[i].tag.m_tag_free = &tag_free;
}
txq->mbufs[NET_TX_RING_SIZE] = (void *)0;
@@ -874,10 +920,18 @@ setup_txqs(device_t dev, struct netfront_info *info,
device_printf(dev, "xen_intr_alloc_and_bind_local_port
failed\n");
goto fail_bind_port;
}
+
+ txq->segments = sglist_alloc(MAX_TX_REQ_FRAGS, M_WAITOK);
+ if (txq->segments == NULL) {
+ device_printf(dev, "failed to allocate sglist\n");
+ goto fail_sglist;
+ }
}
return (0);
+fail_sglist:
+ xen_intr_unbind(&txq->xen_intr_handle);
fail_bind_port:
taskqueue_drain_all(txq->tq);
fail_start_thread:
@@ -1041,7 +1095,7 @@ xn_release_tx_bufs(struct netfront_txq *txq)
if (txq->mbufs_cnt < 0) {
panic("%s: tx_chain_cnt must be >= 0", __func__);
}
- m_free(m);
+ mbuf_release((unsigned long)txq, m);
}
}
@@ -1311,7 +1365,7 @@ xn_txeof(struct netfront_txq *txq)
txq->mbufs[id] = NULL;
add_id_to_freelist(txq->mbufs, id);
txq->mbufs_cnt--;
- m_free(m);
+ mbuf_release((unsigned long)txq, m);
/* Only mark the txq active if we've freed up at least
one slot to try */
ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
}
@@ -1507,22 +1561,6 @@ xn_get_responses(struct netfront_rxq *rxq,
return (err);
}
-/**
- * \brief Count the number of fragments in an mbuf chain.
- *
- * Surprisingly, there isn't an M* macro for this.
- */
-static inline int
-xn_count_frags(struct mbuf *m)
-{
- int nfrags;
-
- for (nfrags = 0; m != NULL; m = m->m_next)
- nfrags++;
-
- return (nfrags);
-}
-
/**
* Given an mbuf chain, make sure we have enough room and then push
* it onto the transmit ring.
@@ -1530,16 +1568,22 @@ xn_count_frags(struct mbuf *m)
static int
xn_assemble_tx_request(struct netfront_txq *txq, struct mbuf *m_head)
{
- struct mbuf *m;
struct netfront_info *np = txq->info;
struct ifnet *ifp = np->xn_ifp;
- u_int nfrags;
- int otherend_id;
+ u_int nfrags, i;
+ int otherend_id, rc;
+
+ sglist_reset(txq->segments);
+ rc = sglist_append_mbuf(txq->segments, m_head);
+ if (rc != 0) {
+ m_freem(m_head);
+ return (rc);
+ }
/**
* Defragment the mbuf if necessary.
*/
- nfrags = xn_count_frags(m_head);
+ nfrags = txq->segments->sg_nseg;
/*
* Check to see whether this request is longer than netback
@@ -1551,6 +1595,8 @@ xn_assemble_tx_request(struct netfront_txq *txq, struct
mbuf *m_head)
* the Linux network stack.
*/
if (nfrags > np->maxfrags) {
+ struct mbuf *m;
+
m = m_defrag(m_head, M_NOWAIT);
if (!m) {
/*
@@ -1561,11 +1607,15 @@ xn_assemble_tx_request(struct netfront_txq *txq, struct
mbuf *m_head)
return (EMSGSIZE);
}
m_head = m;
+ sglist_reset(txq->segments);
+ rc = sglist_append_mbuf(txq->segments, m_head);
+ if (rc != 0) {
+ m_freem(m_head);
+ return (rc);
+ }
+ nfrags = txq->segments->sg_nseg;
}
- /* Determine how many fragments now exist */
- nfrags = xn_count_frags(m_head);
-
/*
* Check to see whether the defragmented packet has too many
* segments for the Linux netback driver.
@@ -1604,14 +1654,15 @@ xn_assemble_tx_request(struct netfront_txq *txq, struct
mbuf *m_head)
* the fragment pointers. Stop when we run out
* of fragments or hit the end of the mbuf chain.
*/
- m = m_head;
otherend_id = xenbus_get_otherend_id(np->xbdev);
- for (m = m_head; m; m = m->m_next) {
+ for (i = 0; i < nfrags; i++) {
netif_tx_request_t *tx;
uintptr_t id;
grant_ref_t ref;
u_long mfn; /* XXX Wrong type? */
+ struct sglist_seg *seg;
+ seg = &txq->segments->sg_segs[i];
tx = RING_GET_REQUEST(&txq->ring, txq->ring.req_prod_pvt);
id = get_id_from_freelist(txq->mbufs);
if (id == 0)
@@ -1621,17 +1672,22 @@ xn_assemble_tx_request(struct netfront_txq *txq, struct
mbuf *m_head)
if (txq->mbufs_cnt > NET_TX_RING_SIZE)
panic("%s: tx_chain_cnt must be <= NET_TX_RING_SIZE\n",
__func__);
- txq->mbufs[id] = m;
+ if (i == 0)
+ m_tag_prepend(m_head, &txq->refcount_tag[id].tag);
+ mbuf_grab((unsigned long)txq, m_head);
+ txq->mbufs[id] = m_head;
tx->id = id;
ref = gnttab_claim_grant_reference(&txq->gref_head);
KASSERT((short)ref >= 0, ("Negative ref"));
- mfn = virt_to_mfn(mtod(m, vm_offset_t));
+ mfn = atop(seg->ss_paddr);
gnttab_grant_foreign_access_ref(ref, otherend_id,
mfn, GNTMAP_readonly);
tx->gref = txq->grant_ref[id] = ref;
- tx->offset = mtod(m, vm_offset_t) & (PAGE_SIZE - 1);
+ tx->offset = seg->ss_paddr & PAGE_MASK;
+ KASSERT(tx->offset + seg->ss_len <= PAGE_SIZE,
+ ("mbuf segment crosses a page boundary"));
tx->flags = 0;
- if (m == m_head) {
+ if (i == 0) {
/*
* The first fragment has the entire packet
* size, subsequent fragments have just the
@@ -1640,7 +1696,7 @@ xn_assemble_tx_request(struct netfront_txq *txq, struct
mbuf *m_head)
* subtracting the sizes of the other
* fragments.
*/
- tx->size = m->m_pkthdr.len;
+ tx->size = m_head->m_pkthdr.len;
/*
* The first fragment contains the checksum flags
@@ -1654,12 +1710,12 @@ xn_assemble_tx_request(struct netfront_txq *txq, struct
mbuf *m_head)
* so we have to test for CSUM_TSO
* explicitly.
*/
- if (m->m_pkthdr.csum_flags
+ if (m_head->m_pkthdr.csum_flags
& (CSUM_DELAY_DATA | CSUM_TSO)) {
tx->flags |= (NETTXF_csum_blank
| NETTXF_data_validated);
}
- if (m->m_pkthdr.csum_flags & CSUM_TSO) {
+ if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
struct netif_extra_info *gso =
(struct netif_extra_info *)
RING_GET_REQUEST(&txq->ring,
@@ -1667,7 +1723,7 @@ xn_assemble_tx_request(struct netfront_txq *txq, struct
mbuf *m_head)
tx->flags |= NETTXF_extra_info;
- gso->u.gso.size = m->m_pkthdr.tso_segsz;
+ gso->u.gso.size = m_head->m_pkthdr.tso_segsz;
gso->u.gso.type =
XEN_NETIF_GSO_TYPE_TCPV4;
gso->u.gso.pad = 0;
@@ -1677,9 +1733,9 @@ xn_assemble_tx_request(struct netfront_txq *txq, struct
mbuf *m_head)
gso->flags = 0;
}
} else {
- tx->size = m->m_len;
+ tx->size = seg->ss_len;
}
- if (m->m_next)
+ if (i != nfrags - 1)
tx->flags |= NETTXF_more_data;
txq->ring.req_prod_pvt++;
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |