Port-xen archive
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]
Re: NetBSD/xen goes off the network - reproduceable
On Tue, Feb 21, 2012 at 02:28:08PM -0500, Brian Marcotte wrote:
> > the attached patch changes xennet(4) to use a pool for receive
> > buffer, and allocate them on demand for the ring. If a receive buffer
> > is stalled in some socket queue, it's should not be a problem any more
> > (other than memory consumption).
>
> I applied the patch to NetBSD-6 and I can no longer provoke the problem
> with my simple test.
>
> Thanks!
>
> However, running "tcpdump -i xennet0 -n icmp" reliably causes the panic
> below after 1-15 seconds. There may be one other issue, but I'm going to
> need another day to look into it. I think it may be something I'm doing.
I found the problem: when pushing more receive buffers to the ring, we
may accidentally overwrite the status we're using. This leads to 0-len
mbufs, which bpf doesn't like. Just free the receive buffer a few line
latter, and all is well :)
I also fixed the xennet_rx_free_req() logic to not wait for the
ring to be completely empty to start filling it. 4/5 of the ring size
looks a good compromise (doing it too often for a low number of buffers
increases significantly the CPU usage).
--
Manuel Bouyer <bouyer%antioche.eu.org@localhost>
NetBSD: 26 ans d'experience feront toujours la difference
--
Index: xen/if_xennet_xenbus.c
===================================================================
RCS file: /cvsroot/src/sys/arch/xen/xen/if_xennet_xenbus.c,v
retrieving revision 1.58
diff -u -p -u -r1.58 if_xennet_xenbus.c
--- xen/if_xennet_xenbus.c 2 Feb 2012 19:43:01 -0000 1.58
+++ xen/if_xennet_xenbus.c 21 Feb 2012 21:39:58 -0000
@@ -148,7 +148,6 @@ int xennet_debug = 0xff;
#endif
#define GRANT_INVALID_REF -1 /* entry is free */
-#define GRANT_STACK_REF -2 /* entry owned by the network stack */
#define NET_TX_RING_SIZE __CONST_RING_SIZE(netif_tx, PAGE_SIZE)
#define NET_RX_RING_SIZE __CONST_RING_SIZE(netif_rx, PAGE_SIZE)
@@ -210,6 +209,9 @@ struct xennet_xenbus_softc {
static multicall_entry_t rx_mcl[NET_RX_RING_SIZE+1];
static u_long xennet_pages[NET_RX_RING_SIZE];
+static pool_cache_t if_xennetrxbuf_cache;
+static int if_xennetrxbuf_cache_inited=0;
+
static int xennet_xenbus_match(device_t, cfdata_t, void *);
static void xennet_xenbus_attach(device_t, device_t, void *);
static int xennet_xenbus_detach(device_t, int);
@@ -219,6 +221,7 @@ static void xennet_alloc_rx_buffer(struc
static void xennet_free_rx_buffer(struct xennet_xenbus_softc *);
static void xennet_tx_complete(struct xennet_xenbus_softc *);
static void xennet_rx_mbuf_free(struct mbuf *, void *, size_t, void *);
+static void xennet_rx_free_req(struct xennet_rxreq *);
static int xennet_handler(void *);
static bool xennet_talk_to_backend(struct xennet_xenbus_softc *);
#ifdef XENNET_DEBUG_DUMP
@@ -301,6 +304,14 @@ xennet_xenbus_attach(device_t parent, de
sc->sc_xbusd = xa->xa_xbusd;
sc->sc_xbusd->xbusd_otherend_changed = xennet_backend_changed;
+ /* xenbus ensure 2 devices can't be probed at the same time */
+ if (if_xennetrxbuf_cache_inited == 0) {
+ if_xennetrxbuf_cache = pool_cache_init(PAGE_SIZE, 0, 0, 0,
+ "xnfrx", NULL, IPL_VM, NULL, NULL, NULL);
+ if_xennetrxbuf_cache_inited = 1;
+ }
+
+
/* initialize free RX and RX request lists */
mutex_init(&sc->sc_tx_lock, MUTEX_DEFAULT, IPL_NET);
SLIST_INIT(&sc->sc_txreq_head);
@@ -316,13 +327,10 @@ xennet_xenbus_attach(device_t parent, de
struct xennet_rxreq *rxreq = &sc->sc_rxreqs[i];
rxreq->rxreq_id = i;
rxreq->rxreq_sc = sc;
- rxreq->rxreq_va = uvm_km_alloc(kernel_map,
- PAGE_SIZE, PAGE_SIZE, UVM_KMF_WIRED | UVM_KMF_ZERO);
+ rxreq->rxreq_va = (vaddr_t)pool_cache_get_paddr(
+ if_xennetrxbuf_cache, PR_WAITOK, &rxreq->rxreq_pa);
if (rxreq->rxreq_va == 0)
break;
- if (!pmap_extract(pmap_kernel(), rxreq->rxreq_va,
- &rxreq->rxreq_pa))
- panic("%s: no pa for mapped va ?", device_xname(self));
rxreq->rxreq_gntref = GRANT_INVALID_REF;
SLIST_INSERT_HEAD(&sc->sc_rxreq_head, rxreq, rxreq_next);
}
@@ -581,7 +589,9 @@ again:
xenbus_dev_fatal(sc->sc_xbusd, error, "completing transaction");
return false;
}
+ mutex_enter(&sc->sc_rx_lock);
xennet_alloc_rx_buffer(sc);
+ mutex_exit(&sc->sc_rx_lock);
if (sc->sc_backend_status == BEST_SUSPENDED) {
xenbus_device_resume(sc->sc_xbusd);
@@ -680,7 +690,7 @@ xennet_alloc_rx_buffer(struct xennet_xen
otherend_id = sc->sc_xbusd->xbusd_otherend_id;
- mutex_enter(&sc->sc_rx_lock);
+ KASSERT(mutex_owned(&sc->sc_rx_lock));
for (i = 0; sc->sc_free_rxreql != 0; i++) {
req = SLIST_FIRST(&sc->sc_rxreq_head);
KASSERT(req != NULL);
@@ -729,7 +739,6 @@ xennet_alloc_rx_buffer(struct xennet_xen
out_loop:
if (i == 0) {
- mutex_exit(&sc->sc_rx_lock);
return;
}
@@ -763,8 +772,11 @@ out_loop:
sc->sc_rx_ring.req_prod_pvt = req_prod + i;
RING_PUSH_REQUESTS(&sc->sc_rx_ring);
-
- mutex_exit(&sc->sc_rx_lock);
+#if 0
+ RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&sc->sc_rx_ring, notify);
+ if (notify)
+ hypervisor_notify_via_evtchn(sc->sc_evtchn);
+#endif
return;
}
@@ -780,7 +792,6 @@ xennet_free_rx_buffer(struct xennet_xenb
mmu_update_t mmu[1];
multicall_entry_t mcl[2];
- int s = splbio();
mutex_enter(&sc->sc_rx_lock);
DPRINTF(("%s: xennet_free_rx_buffer\n", device_xname(sc->sc_dev)));
@@ -788,16 +799,6 @@ xennet_free_rx_buffer(struct xennet_xenb
for (i = 0; i < NET_RX_RING_SIZE; i++) {
struct xennet_rxreq *rxreq = &sc->sc_rxreqs[i];
- /*
- * if the buffer is in transit in the network stack, wait for
- * the network stack to free it.
- */
- while ((volatile grant_ref_t)rxreq->rxreq_gntref ==
- GRANT_STACK_REF)
- mutex_exit(&sc->sc_rx_lock);
- tsleep(xennet_xenbus_detach, PRIBIO, "xnet_free", hz/2);
- mutex_enter(&sc->sc_rx_lock);
-
if (rxreq->rxreq_gntref != GRANT_INVALID_REF) {
/*
* this req is still granted. Get back the page or
@@ -861,7 +862,6 @@ xennet_free_rx_buffer(struct xennet_xenb
}
mutex_exit(&sc->sc_rx_lock);
- splx(s);
DPRINTF(("%s: xennet_free_rx_buffer done\n", device_xname(sc->sc_dev)));
}
@@ -871,10 +871,23 @@ xennet_free_rx_buffer(struct xennet_xenb
static void
xennet_rx_mbuf_free(struct mbuf *m, void *buf, size_t size, void *arg)
{
- struct xennet_rxreq *req = arg;
+ int s = splnet();
+ KASSERT(buf == m->m_ext.ext_buf);
+ KASSERT(arg == NULL);
+ KASSERT(m != NULL);
+ vaddr_t va = (vaddr_t)(buf) & ~((vaddr_t)PAGE_MASK);
+ pool_cache_put_paddr(if_xennetrxbuf_cache,
+ (void *)va, m->m_ext.ext_paddr);
+ pool_cache_put(mb_cache, m);
+ splx(s);
+};
+
+static void
+xennet_rx_free_req(struct xennet_rxreq *req)
+{
struct xennet_xenbus_softc *sc = req->rxreq_sc;
- mutex_enter(&sc->sc_rx_lock);
+ KASSERT(mutex_owned(&sc->sc_rx_lock));
/* puts back the RX request in the list of free RX requests */
SLIST_INSERT_HEAD(&sc->sc_rxreq_head, req, rxreq_next);
@@ -886,18 +899,10 @@ xennet_rx_mbuf_free(struct mbuf *m, void
*/
req->rxreq_gntref = GRANT_INVALID_REF;
- if (sc->sc_free_rxreql >= SC_NLIVEREQ(sc) &&
+ if (sc->sc_free_rxreql >= (NET_RX_RING_SIZE * 4 / 5) &&
__predict_true(sc->sc_backend_status == BEST_CONNECTED)) {
- mutex_exit(&sc->sc_rx_lock);
xennet_alloc_rx_buffer(sc);
}
- else {
- mutex_exit(&sc->sc_rx_lock);
- }
-
- if (m)
- pool_cache_put(mb_cache, m);
-
}
/*
@@ -988,16 +993,10 @@ again:
DPRINTFN(XEDB_EVENT, ("xennet_handler prod %d cons %d\n",
sc->sc_rx_ring.sring->rsp_prod, sc->sc_rx_ring.rsp_cons));
+ mutex_enter(&sc->sc_rx_lock);
resp_prod = sc->sc_rx_ring.sring->rsp_prod;
xen_rmb(); /* ensure we see replies up to resp_prod */
- /*
- * The locking in this loop needs to be done carefully, as
- * m_free() will take the sc_rx_lock() via
- * xennet_rx_mbuf_free()
- */
- mutex_enter(&sc->sc_rx_lock);
-
for (i = sc->sc_rx_ring.rsp_cons; i != resp_prod; i++) {
netif_rx_response_t *rx = RING_GET_RESPONSE(&sc->sc_rx_ring, i);
req = &sc->sc_rxreqs[rx->id];
@@ -1034,8 +1033,6 @@ again:
__func__, sc->sc_rx_feature);
}
- req->rxreq_gntref = GRANT_INVALID_REF;
-
pa = req->rxreq_pa;
va = req->rxreq_va;
@@ -1067,10 +1064,7 @@ again:
DPRINTFN(XEDB_EVENT,
("xennet_handler bad dest\n"));
/* packet not for us */
- mutex_exit(&sc->sc_rx_lock);
- xennet_rx_mbuf_free(NULL, (void *)va, PAGE_SIZE,
- req);
- mutex_enter(&sc->sc_rx_lock);
+ xennet_rx_free_req(req);
continue;
}
}
@@ -1078,50 +1072,37 @@ again:
if (__predict_false(m == NULL)) {
printf("%s: rx no mbuf\n", ifp->if_xname);
ifp->if_ierrors++;
- mutex_exit(&sc->sc_rx_lock);
- xennet_rx_mbuf_free(NULL, (void *)va, PAGE_SIZE, req);
- mutex_enter(&sc->sc_rx_lock);
+ xennet_rx_free_req(req);
continue;
}
MCLAIM(m, &sc->sc_ethercom.ec_rx_mowner);
m->m_pkthdr.rcvif = ifp;
- if (__predict_true(sc->sc_rx_ring.req_prod_pvt !=
- sc->sc_rx_ring.sring->rsp_prod)) {
- m->m_len = m->m_pkthdr.len = rx->status;
- MEXTADD(m, pktp, rx->status,
- M_DEVBUF, xennet_rx_mbuf_free, req);
- m->m_flags |= M_EXT_RW; /* we own the buffer */
- req->rxreq_gntref = GRANT_STACK_REF;
- mutex_exit(&sc->sc_rx_lock);
- } else {
- /*
- * This was our last receive buffer, allocate
- * memory, copy data and push the receive
- * buffer back to the hypervisor.
- */
- m->m_len = min(MHLEN, rx->status);
- m->m_pkthdr.len = 0;
- mutex_exit(&sc->sc_rx_lock);
- m_copyback(m, 0, rx->status, pktp);
- xennet_rx_mbuf_free(NULL, (void *)va, PAGE_SIZE, req);
- if (m->m_pkthdr.len < rx->status) {
- /* out of memory, just drop packets */
- ifp->if_ierrors++;
- m_freem(m);
- mutex_enter(&sc->sc_rx_lock);
- continue;
- }
+ req->rxreq_va = (vaddr_t)pool_cache_get_paddr(
+ if_xennetrxbuf_cache, PR_NOWAIT, &req->rxreq_pa);
+ if (__predict_false(req->rxreq_va == 0)) {
+ printf("%s: rx no buf\n", ifp->if_xname);
+ ifp->if_ierrors++;
+ req->rxreq_va = va;
+ req->rxreq_pa = pa;
+ xennet_rx_free_req(req);
+ m_freem(m);
+ continue;
}
-
+ m->m_len = m->m_pkthdr.len = rx->status;
+ MEXTADD(m, pktp, rx->status,
+ M_DEVBUF, xennet_rx_mbuf_free, NULL);
+ m->m_flags |= M_EXT_RW; /* we own the buffer */
+ m->m_ext.ext_paddr = pa;
if ((rx->flags & NETRXF_csum_blank) != 0) {
xennet_checksum_fill(&m);
if (m == NULL) {
ifp->if_ierrors++;
- mutex_enter(&sc->sc_rx_lock);
continue;
}
}
+ /* free req may overwrite *rx, better doing it late */
+ xennet_rx_free_req(req);
/*
* Pass packet to bpf if there is a listener.
*/
@@ -1131,7 +1112,6 @@ again:
/* Pass the packet up. */
(*ifp->if_input)(ifp, m);
- mutex_enter(&sc->sc_rx_lock);
}
xen_rmb();
sc->sc_rx_ring.rsp_cons = i;
Home |
Main Index |
Thread Index |
Old Index