Subject: Patch for two mbuf dma optimizations
To: None <tech-kern@netbsd.org>
From: Jason R Thorpe <thorpej@wasabisystems.com>
List: tech-kern
Date: 03/29/2003 12:22:53
--8t9RHnE3ZwKMSgU+
Content-Type: text/plain; charset=us-ascii
Content-Disposition: inline
Folks...
This is the second in a series of simple patches to improve network
performance being contributed by Wasabi Systems.
Building on the pool cache paddr patch, this patch does three things:
* Caches physical addresses of mbufs and clusters in the
m_hdr and m_ext, respectively. This allows bus_dma back-ends
to avoid having to extract the physical address from the
virtual when dealing with plain mbufs and clusters.
* For mbuf external data which is the result of sosend_loan,
remember pointers to the vm_page's for the loaned pages.
Initially, this saves some work when freeing the loaned
area. Eventually, it can be used by bus_dma back-ends
to avoid having to extract the physical address from the
virtual.
* Add a new M_EXT_ROMAP bit, which indicates that the mbuf
external data is mapped read-only at the MMU. On some
platforms, this implies that all cache lines associated
with the buffer are clean, so the bus_dma back-end can
skip cleaning the cache for such buffers. Use this bit
for sosend_loan'd buffers, since loaned pages are always
mapped read-only.
Patches for ARM and i386 bus_dma back-ends are forthcoming.
--
-- Jason R. Thorpe <thorpej@wasabisystems.com>
--8t9RHnE3ZwKMSgU+
Content-Type: text/plain; charset=us-ascii
Content-Disposition: attachment; filename=mbuf-patch
Index: sys/mbuf.h
===================================================================
RCS file: /cvsroot/src/sys/sys/mbuf.h,v
retrieving revision 1.79
diff -c -r1.79 mbuf.h
*** sys/mbuf.h 2003/03/22 02:21:57 1.79
--- sys/mbuf.h 2003/03/29 18:55:49
***************
*** 134,139 ****
--- 134,140 ----
struct mowner *mh_owner; /* mbuf owner */
int mh_len; /* amount of data in this mbuf */
int mh_flags; /* flags; see below */
+ paddr_t mh_paddr; /* physical address of mbuf */
short mh_type; /* type of data in this mbuf */
};
***************
*** 172,177 ****
--- 173,184 ----
#define M_CSUM_IPv4 0x00000040 /* IPv4 header */
#define M_CSUM_IPv4_BAD 0x00000080 /* IPv4 header checksum bad */
+ /*
+ * Max # of pages we can attach to m_ext. This is carefully chosen
+ * to be able to handle SOSEND_LOAN_CHUNK on a 4K page size machine.
+ */
+ #define M_EXT_MAXPAGES ((65536 / 4096) + 1)
+
/* description of external storage mapped into mbuf, valid if M_EXT set */
struct m_ext {
caddr_t ext_buf; /* start of buffer */
***************
*** 182,187 ****
--- 189,201 ----
struct malloc_type *ext_type; /* malloc type */
struct mbuf *ext_nextref;
struct mbuf *ext_prevref;
+ union {
+ paddr_t extun_paddr; /* physical address (M_EXT_CLUSTER) */
+ /* pages (M_EXT_PAGES) */
+ struct vm_page *extun_pgs[M_EXT_MAXPAGES];
+ } ext_un;
+ #define ext_paddr ext_un.extun_paddr
+ #define ext_pgs ext_un.extun_pgs
#ifdef DEBUG
const char *ext_ofile;
const char *ext_nfile;
***************
*** 190,195 ****
--- 204,211 ----
#endif
};
+ #define M_PADDR_INVALID POOL_PADDR_INVALID
+
struct mbuf {
struct m_hdr m_hdr;
union {
***************
*** 210,215 ****
--- 226,232 ----
#define m_type m_hdr.mh_type
#define m_flags m_hdr.mh_flags
#define m_nextpkt m_hdr.mh_nextpkt
+ #define m_paddr m_hdr.mh_paddr
#define m_pkthdr M_dat.MH.MH_pkthdr
#define m_ext M_dat.MH.MH_dat.MH_ext
#define m_pktdat M_dat.MH.MH_dat.MH_databuf
***************
*** 237,242 ****
--- 254,261 ----
/* additional flags for M_EXT mbufs */
#define M_EXT_FLAGS 0xff000000
#define M_EXT_CLUSTER 0x01000000 /* ext is a cluster */
+ #define M_EXT_PAGES 0x02000000 /* ext_pgs is valid */
+ #define M_EXT_ROMAP 0x04000000 /* ext mapping is r-o at MMU */
/* for source-level compatibility */
#define M_CLUSTER M_EXT_CLUSTER
***************
*** 453,460 ****
do { \
MBUFLOCK( \
(m)->m_ext.ext_buf = \
! pool_cache_get(&mclpool_cache, (how) == M_WAIT ? \
! (PR_WAITOK|PR_LIMITFAIL) : 0); \
if ((m)->m_ext.ext_buf != NULL) \
_MOWNERREF((m), M_EXT|M_CLUSTER); \
); \
--- 472,480 ----
do { \
MBUFLOCK( \
(m)->m_ext.ext_buf = \
! pool_cache_get_paddr(&mclpool_cache, \
! (how) == M_WAIT ? (PR_WAITOK|PR_LIMITFAIL) : 0, \
! &(m)->m_ext.ext_paddr); \
if ((m)->m_ext.ext_buf != NULL) \
_MOWNERREF((m), M_EXT|M_CLUSTER); \
); \
***************
*** 465,470 ****
--- 485,491 ----
(m)->m_ext.ext_size = MCLBYTES; \
(m)->m_ext.ext_free = NULL; \
(m)->m_ext.ext_arg = NULL; \
+ /* ext_paddr initialized above */ \
MCLINITREFERENCE(m); \
} \
} while (/* CONSTCOND */ 0)
***************
*** 505,511 ****
_MCLDEREFERENCE(m); \
splx(_ms_); \
} else if ((m)->m_flags & M_CLUSTER) { \
! pool_cache_put(&mclpool_cache, (m)->m_ext.ext_buf); \
splx(_ms_); \
} else if ((m)->m_ext.ext_free) { \
/* \
--- 526,533 ----
_MCLDEREFERENCE(m); \
splx(_ms_); \
} else if ((m)->m_flags & M_CLUSTER) { \
! pool_cache_put_paddr(&mclpool_cache, (m)->m_ext.ext_buf,\
! (m)->m_ext.ext_paddr); \
splx(_ms_); \
} else if ((m)->m_ext.ext_free) { \
/* \
***************
*** 554,561 ****
_MCLDEREFERENCE(m); \
pool_cache_put(&mbpool_cache, (m)); \
} else if ((m)->m_flags & M_CLUSTER) { \
! pool_cache_put(&mclpool_cache, \
! (m)->m_ext.ext_buf); \
pool_cache_put(&mbpool_cache, (m)); \
} else if ((m)->m_ext.ext_free) { \
/* \
--- 576,584 ----
_MCLDEREFERENCE(m); \
pool_cache_put(&mbpool_cache, (m)); \
} else if ((m)->m_flags & M_CLUSTER) { \
! pool_cache_put_paddr(&mclpool_cache, \
! (m)->m_ext.ext_buf, \
! (m)->m_ext.ext_paddr); \
pool_cache_put(&mbpool_cache, (m)); \
} else if ((m)->m_ext.ext_free) { \
/* \
***************
*** 617,622 ****
--- 640,651 ----
(((m)->m_flags & M_CLUSTER) == 0 || MCLISREFERENCED(m)))
/*
+ * Determine if an mbuf's data area is read-only at the MMU.
+ */
+ #define M_ROMAP(m) \
+ (((m)->m_flags & (M_EXT|M_EXT_ROMAP)) == (M_EXT|M_EXT_ROMAP))
+
+ /*
* Compute the amount of space available
* before the current start of data in an mbuf.
*/
***************
*** 639,644 ****
--- 668,681 ----
#define M_TRAILINGSPACE(m) \
(M_READONLY((m)) ? 0 : _M_TRAILINGSPACE((m)))
+
+ /*
+ * Compute the offset of the beginning of the data buffer of a non-ext
+ * mbuf.
+ */
+ #define M_BUFOFFSET(m) \
+ (((m)->m_flags & M_PKTHDR) ? \
+ offsetof(struct mbuf, m_pktdat) : offsetof(struct mbuf, m_dat))
/*
* Arrange to prepend space of size plen to mbuf m.
Index: kern/uipc_mbuf.c
===================================================================
RCS file: /cvsroot/src/sys/kern/uipc_mbuf.c,v
retrieving revision 1.64
diff -c -r1.64 uipc_mbuf.c
*** kern/uipc_mbuf.c 2003/02/26 06:31:11 1.64
--- kern/uipc_mbuf.c 2003/03/29 18:55:52
***************
*** 91,97 ****
#include <net/if.h>
! #include <uvm/uvm_extern.h>
struct pool mbpool; /* mbuf pool */
--- 91,97 ----
#include <net/if.h>
! #include <uvm/uvm.h>
struct pool mbpool; /* mbuf pool */
***************
*** 106,111 ****
--- 106,113 ----
int max_hdr;
int max_datalen;
+ static int mb_ctor(void *, void *, int);
+
void *mclpool_alloc(struct pool *, int);
void mclpool_release(struct pool *, void *);
***************
*** 147,153 ****
pool_set_drain_hook(&mbpool, m_reclaim, NULL);
pool_set_drain_hook(&mclpool, m_reclaim, NULL);
! pool_cache_init(&mbpool_cache, &mbpool, NULL, NULL, NULL);
pool_cache_init(&mclpool_cache, &mclpool, NULL, NULL, NULL);
/*
--- 149,155 ----
pool_set_drain_hook(&mbpool, m_reclaim, NULL);
pool_set_drain_hook(&mclpool, m_reclaim, NULL);
! pool_cache_init(&mbpool_cache, &mbpool, mb_ctor, NULL, NULL);
pool_cache_init(&mclpool_cache, &mclpool, NULL, NULL, NULL);
/*
***************
*** 287,292 ****
--- 289,308 ----
{
uvm_km_free_poolpage1(mb_map, (vaddr_t)v);
+ }
+
+ /*ARGSUSED*/
+ static int
+ mb_ctor(void *arg, void *object, int flags)
+ {
+ struct mbuf *m = object;
+
+ #ifdef POOL_VTOPHYS
+ m->m_paddr = POOL_VTOPHYS(m);
+ #else
+ m->m_paddr = M_PADDR_INVALID;
+ #endif
+ return (0);
}
void
Index: kern/uipc_socket.c
===================================================================
RCS file: /cvsroot/src/sys/kern/uipc_socket.c,v
retrieving revision 1.78
diff -c -r1.78 uipc_socket.c
*** kern/uipc_socket.c 2003/02/26 06:31:11 1.78
--- kern/uipc_socket.c 2003/03/29 18:55:53
***************
*** 154,162 ****
#define SOCK_LOAN_CHUNK 65536
static void
! sodoloanfree(caddr_t buf, size_t size)
{
- struct vm_page **pgs;
vaddr_t va, sva, eva;
vsize_t len;
paddr_t pa;
--- 154,161 ----
#define SOCK_LOAN_CHUNK 65536
static void
! sodoloanfree(struct vm_page **pgs, caddr_t buf, size_t size)
{
vaddr_t va, sva, eva;
vsize_t len;
paddr_t pa;
***************
*** 167,178 ****
len = eva - sva;
npgs = len >> PAGE_SHIFT;
! pgs = alloca(npgs * sizeof(*pgs));
! for (i = 0, va = sva; va < eva; i++, va += PAGE_SIZE) {
! if (pmap_extract(pmap_kernel(), va, &pa) == FALSE)
! panic("sodoloanfree: va 0x%lx not mapped", va);
! pgs[i] = PHYS_TO_VM_PAGE(pa);
}
pmap_kremove(sva, len);
--- 166,179 ----
len = eva - sva;
npgs = len >> PAGE_SHIFT;
! if (__predict_false(pgs == NULL)) {
! pgs = alloca(npgs * sizeof(*pgs));
! for (i = 0, va = sva; va < eva; i++, va += PAGE_SIZE) {
! if (pmap_extract(pmap_kernel(), va, &pa) == FALSE)
! panic("sodoloanfree: va 0x%lx not mapped", va);
! pgs[i] = PHYS_TO_VM_PAGE(pa);
! }
}
pmap_kremove(sva, len);
***************
*** 201,207 ****
splx(s);
rv += m->m_ext.ext_size;
! sodoloanfree(m->m_ext.ext_buf, m->m_ext.ext_size);
s = splvm();
pool_cache_put(&mbpool_cache, m);
}
--- 202,210 ----
splx(s);
rv += m->m_ext.ext_size;
! sodoloanfree((m->m_flags & M_EXT_PAGES) ?
! m->m_ext.ext_pgs : NULL, m->m_ext.ext_buf,
! m->m_ext.ext_size);
s = splvm();
pool_cache_put(&mbpool_cache, m);
}
***************
*** 214,220 ****
splx(s);
rv += m->m_ext.ext_size;
! sodoloanfree(m->m_ext.ext_buf, m->m_ext.ext_size);
s = splvm();
pool_cache_put(&mbpool_cache, m);
}
--- 217,225 ----
splx(s);
rv += m->m_ext.ext_size;
! sodoloanfree((m->m_flags & M_EXT_PAGES) ?
! m->m_ext.ext_pgs : NULL, m->m_ext.ext_buf,
! m->m_ext.ext_size);
s = splvm();
pool_cache_put(&mbpool_cache, m);
}
***************
*** 230,236 ****
int s;
if (m == NULL) {
! sodoloanfree(buf, size);
return;
}
--- 235,241 ----
int s;
if (m == NULL) {
! sodoloanfree(NULL, buf, size);
return;
}
***************
*** 248,254 ****
struct iovec *iov = uio->uio_iov;
vaddr_t sva, eva;
vsize_t len;
- struct vm_page **pgs;
vaddr_t lva, va;
int npgs, s, i, error;
--- 253,258 ----
***************
*** 265,270 ****
--- 269,277 ----
len = eva - sva;
npgs = len >> PAGE_SHIFT;
+ /* XXX KDASSERT */
+ KASSERT(npgs <= M_EXT_MAXPAGES);
+
while (socurkva + len > somaxkva) {
if (sodopendfree(so))
continue;
***************
*** 281,290 ****
return (0);
socurkva += len;
- pgs = alloca(npgs * sizeof(*pgs));
-
error = uvm_loan(&uio->uio_procp->p_vmspace->vm_map, sva, len,
! pgs, UVM_LOAN_TOPAGE);
if (error) {
uvm_km_free(kernel_map, lva, len);
socurkva -= len;
--- 288,295 ----
return (0);
socurkva += len;
error = uvm_loan(&uio->uio_procp->p_vmspace->vm_map, sva, len,
! m->m_ext.ext_pgs, UVM_LOAN_TOPAGE);
if (error) {
uvm_km_free(kernel_map, lva, len);
socurkva -= len;
***************
*** 292,303 ****
}
for (i = 0, va = lva; i < npgs; i++, va += PAGE_SIZE)
! pmap_kenter_pa(va, VM_PAGE_TO_PHYS(pgs[i]), VM_PROT_READ);
pmap_update(pmap_kernel());
lva += (vaddr_t) iov->iov_base & PAGE_MASK;
MEXTADD(m, (caddr_t) lva, space, M_MBUF, soloanfree, so);
uio->uio_resid -= space;
/* uio_offset not updated, not set/used for write(2) */
--- 297,310 ----
}
for (i = 0, va = lva; i < npgs; i++, va += PAGE_SIZE)
! pmap_kenter_pa(va, VM_PAGE_TO_PHYS(m->m_ext.ext_pgs[i]),
! VM_PROT_READ);
pmap_update(pmap_kernel());
lva += (vaddr_t) iov->iov_base & PAGE_MASK;
MEXTADD(m, (caddr_t) lva, space, M_MBUF, soloanfree, so);
+ m->m_flags |= M_EXT_PAGES | M_EXT_ROMAP;
uio->uio_resid -= space;
/* uio_offset not updated, not set/used for write(2) */
--8t9RHnE3ZwKMSgU+--