Subject: mbuf external storage sharing
To: None <tech-net@netbsd.org>
From: YAMAMOTO Takashi <yamt@mwd.biglobe.ne.jp>
List: tech-net
Date: 10/02/2004 19:32:10
--NextPart-20041002192117-2591600
Content-Type: Text/Plain; charset=us-ascii
hi,
the attached diffs are to change the way to share mbuf external storage.
with the current linked list method, it's difficult to be mp-safe
without having a global lock, because flags etc are not really shared.
comments?
YAMAMOTO Takashi
--NextPart-20041002192117-2591600
Content-Type: Text/Plain; charset=us-ascii
Content-Disposition: attachment; filename="extref.diff"
Index: sys/mbuf.h
===================================================================
--- sys/mbuf.h (revision 900)
+++ sys/mbuf.h (working copy)
@@ -177,34 +177,27 @@ struct pkthdr {
* is not yet 1s-complemented.
*/
+#if defined(_KERNEL)
/*
* Max # of pages we can attach to m_ext. This is carefully chosen
* to be able to handle SOSEND_LOAN_CHUNK with our minimum sized page.
*/
-#ifdef MIN_PAGE_SIZE
#define M_EXT_MAXPAGES ((65536 / MIN_PAGE_SIZE) + 1)
-#endif
/* description of external storage mapped into mbuf, valid if M_EXT set */
-struct _m_ext {
+struct mexthdr {
+ struct simplelock ext_lock;
+ int ext_refcnt;
+ int ext_flags; /* M_EXT_ flags */
caddr_t ext_buf; /* start of buffer */
void (*ext_free) /* free routine if not the usual */
(struct mbuf *, caddr_t, size_t, void *);
void *ext_arg; /* argument for ext_free */
size_t ext_size; /* size of buffer, for ext_free */
- struct malloc_type *ext_type; /* malloc type */
- struct mbuf *ext_nextref;
- struct mbuf *ext_prevref;
union {
paddr_t extun_paddr; /* physical address (M_EXT_CLUSTER) */
/* pages (M_EXT_PAGES) */
- /*
- * XXX This is gross, but it doesn't really matter; this is
- * XXX overlaid on top of the mbuf data area.
- */
-#ifdef M_EXT_MAXPAGES
struct vm_page *extun_pgs[M_EXT_MAXPAGES];
-#endif
} ext_un;
#define ext_paddr ext_un.extun_paddr
#define ext_pgs ext_un.extun_pgs
@@ -216,6 +209,22 @@ struct _m_ext {
#endif
};
+/*
+ * XXX a compatibility hack to make m->m_ext.ext_something work.
+ * it's recommended to use MEXT(m)->ext_something instead.
+ */
+struct mexthdr_compatwrapper {
+ struct mexthdr ext_hdr;
+};
+#define m_ext M_dat.MH.MH_dat.MH_ext.u.ext_hdr_compat->ext_hdr
+
+struct _m_ext {
+ union {
+ struct mexthdr *ext_hdr;
+ struct mexthdr_compatwrapper *ext_hdr_compat;
+ } u;
+};
+
#define M_PADDR_INVALID POOL_PADDR_INVALID
/*
@@ -245,7 +254,7 @@ struct _m_ext {
#define m_nextpkt m_hdr.mh_nextpkt
#define m_paddr m_hdr.mh_paddr
#define m_pkthdr M_dat.MH.MH_pkthdr
-#define m_ext M_dat.MH.MH_dat.MH_ext
+#define m_exthdr M_dat.MH.MH_dat.MH_ext.u.ext_hdr
#define m_pktdat M_dat.MH.MH_dat.MH_databuf
#define m_dat M_dat.M_databuf
@@ -288,11 +297,12 @@ MBUF_DEFINE(mbuf, MHLEN, MLEN);
#define M_LINK2 0x4000 /* link layer specific flag */
/* additional flags for M_EXT mbufs */
-#define M_EXT_FLAGS 0xff000000
-#define M_EXT_CLUSTER 0x01000000 /* ext is a cluster */
-#define M_EXT_PAGES 0x02000000 /* ext_pgs is valid */
-#define M_EXT_ROMAP 0x04000000 /* ext mapping is r-o at MMU */
-#define M_EXT_RW 0x08000000 /* ext storage is writable */
+#define M_EXT_FLAGS 0xff000000
+#define M_EXT_CLUSTER 0x01000000 /* ext is a cluster */
+#define M_EXT_PAGES 0x02000000 /* ext_pgs is valid */
+#define M_EXT_ROMAP 0x04000000 /* ext mapping is r-o at MMU */
+#define M_EXT_RW 0x08000000 /* ext storage is writable */
+#define M_EXT_HDREMBEDDED 0x10000000 /* mexthdr is embedded */
/* for source-level compatibility */
#define M_CLUSTER M_EXT_CLUSTER
@@ -300,9 +310,6 @@ MBUF_DEFINE(mbuf, MHLEN, MLEN);
/* flags copied when copying m_pkthdr */
#define M_COPYFLAGS (M_PKTHDR|M_EOR|M_BCAST|M_MCAST|M_CANFASTFWD|M_ANYCAST6|M_LINK0|M_LINK1|M_LINK2|M_AUTHIPHDR|M_DECRYPTED|M_LOOP|M_AUTHIPDGM)
-/* flag copied when shallow-copying external storage */
-#define M_EXTCOPYFLAGS (M_EXT|M_EXT_FLAGS)
-
/* mbuf types */
#define MT_FREE 0 /* should be on free list */
#define MT_DATA 1 /* dynamic (data) allocation */
@@ -362,10 +369,11 @@ do { \
#define _MOWNERCLAIM(m, mowner) do { \
(m)->m_owner = (mowner); \
(mowner)->mo_claims++; \
- if ((m)->m_flags & M_EXT) \
+ if ((m)->m_flags & M_EXT) { \
(mowner)->mo_ext_claims++; \
- if ((m)->m_flags & M_CLUSTER) \
- (mowner)->mo_cluster_claims++; \
+ if (MEXT(m)->ext_flags & M_EXT_CLUSTER) \
+ (mowner)->mo_cluster_claims++; \
+ } \
} while (/* CONSTCOND */ 0)
#define MCLAIM(m, mowner) \
@@ -440,7 +448,6 @@ do { \
} \
} while (/* CONSTCOND */ 0)
-#if defined(_KERNEL)
#define _M_
/*
* Macros for tracking external storage associated with an mbuf.
@@ -450,14 +457,14 @@ do { \
#ifdef DEBUG
#define MCLREFDEBUGN(m, file, line) \
do { \
- (m)->m_ext.ext_nfile = (file); \
- (m)->m_ext.ext_nline = (line); \
+ MEXT(m)->ext_nfile = (file); \
+ MEXT(m)->ext_nline = (line); \
} while (/* CONSTCOND */ 0)
#define MCLREFDEBUGO(m, file, line) \
do { \
- (m)->m_ext.ext_ofile = (file); \
- (m)->m_ext.ext_oline = (line); \
+ MEXT(m)->ext_ofile = (file); \
+ MEXT(m)->ext_oline = (line); \
} while (/* CONSTCOND */ 0)
#else
#define MCLREFDEBUGN(m, file, line)
@@ -465,35 +472,41 @@ do { \
#endif
#define MCLBUFREF(p)
-#define MCLISREFERENCED(m) ((m)->m_ext.ext_nextref != (m))
+#define MCLISREFERENCED(m) (MEXT(m)->ext_refcnt != 1)
#define _MCLDEREFERENCE(m) \
do { \
- (m)->m_ext.ext_nextref->m_ext.ext_prevref = \
- (m)->m_ext.ext_prevref; \
- (m)->m_ext.ext_prevref->m_ext.ext_nextref = \
- (m)->m_ext.ext_nextref; \
+ KASSERT((m)->m_flags & M_EXT); \
+ KASSERT(MEXT(m)->ext_refcnt > 1); \
+ MEXT(m)->ext_refcnt--; \
} while (/* CONSTCOND */ 0)
#define _MCLADDREFERENCE(o, n) \
do { \
- (n)->m_flags |= ((o)->m_flags & M_EXTCOPYFLAGS); \
- (n)->m_ext.ext_nextref = (o)->m_ext.ext_nextref; \
- (n)->m_ext.ext_prevref = (o); \
- (o)->m_ext.ext_nextref = (n); \
- (n)->m_ext.ext_nextref->m_ext.ext_prevref = (n); \
+ KASSERT((o)->m_flags & M_EXT); \
+ KASSERT(((n)->m_flags & M_EXT) == 0); \
+ KASSERT(MEXT(o)->ext_refcnt >= 1); \
+ (n)->m_flags |= M_EXT; \
+ MEXT(o)->ext_refcnt++; \
+ (n)->m_exthdr = (o)->m_exthdr; \
_MOWNERREF((n), (n)->m_flags); \
MCLREFDEBUGN((n), __FILE__, __LINE__); \
} while (/* CONSTCOND */ 0)
#define MCLINITREFERENCE(m) \
do { \
- (m)->m_ext.ext_prevref = (m); \
- (m)->m_ext.ext_nextref = (m); \
+ MEXT(m)->ext_refcnt = 1; \
+ MEXT_LOCK_INIT(MEXT(m)); \
MCLREFDEBUGO((m), __FILE__, __LINE__); \
MCLREFDEBUGN((m), NULL, 0); \
} while (/* CONSTCOND */ 0)
-#define MCLADDREFERENCE(o, n) MBUFLOCK(_MCLADDREFERENCE((o), (n));)
+#define MCLADDREFERENCE(o, n) \
+ MBUFLOCK( \
+ MEXT_LOCK(MEXT(o)); \
+ _MCLADDREFERENCE((o), (n)); \
+ MEXT_UNLOCK(MEXT(o)); \
+ )
+
/*
* Macros for mbuf external storage.
@@ -507,27 +520,19 @@ do { \
* MEXTADD adds pre-allocated external storage to
* a normal mbuf; the flag M_EXT is set upon success.
*/
-#define _MCLGET(m, pool_cache, size, how) \
-do { \
- MBUFLOCK( \
- (m)->m_ext.ext_buf = \
- pool_cache_get_paddr((pool_cache), \
- (how) == M_WAIT ? (PR_WAITOK|PR_LIMITFAIL) : 0, \
- &(m)->m_ext.ext_paddr); \
- if ((m)->m_ext.ext_buf != NULL) \
- _MOWNERREF((m), M_EXT|M_CLUSTER); \
- ); \
- if ((m)->m_ext.ext_buf != NULL) { \
- (m)->m_data = (m)->m_ext.ext_buf; \
- (m)->m_flags = ((m)->m_flags & ~M_EXTCOPYFLAGS) | \
- M_EXT|M_CLUSTER|M_EXT_RW; \
- (m)->m_ext.ext_size = (size); \
- (m)->m_ext.ext_free = NULL; \
- (m)->m_ext.ext_arg = (pool_cache); \
- /* ext_paddr initialized above */ \
- MCLINITREFERENCE(m); \
- } \
-} while (/* CONSTCOND */ 0)
+
+#define MEXT(m) ((m)->m_exthdr)
+#define MEXT_LOCK(e) simple_lock(&(e)->ext_lock)
+#define MEXT_UNLOCK(e) simple_unlock(&(e)->ext_lock)
+#define MEXT_LOCK_INIT(e) simple_lock_init(&(e)->ext_lock)
+#define MEXTHDR_GET(how) _mexthdr_get(how)
+#define MEXTHDR_PUT(ext) \
+ MBUFLOCK(pool_cache_put(&mexthdrpool_cache, (ext));)
+#define _MEXTHDR_GET(m, how) (m)->m_exthdr = MEXTHDR_GET(how)
+#define _MEXTHDR_PUT(m) MEXTHDR_PUT((m)->m_exthdr)
+
+#define _MCLGET(m, pool_cache, size, how) \
+ _m_clget((m), (pool_cache), (size), (how))
/*
* The standard mbuf cluster pool.
@@ -536,41 +541,58 @@ do { \
#define MEXTMALLOC(m, size, how) \
do { \
- (m)->m_ext.ext_buf = \
- (caddr_t)malloc((size), mbtypes[(m)->m_type], (how)); \
- if ((m)->m_ext.ext_buf != NULL) { \
- (m)->m_data = (m)->m_ext.ext_buf; \
- (m)->m_flags = ((m)->m_flags & ~M_EXTCOPYFLAGS) | \
- M_EXT|M_EXT_RW; \
- (m)->m_ext.ext_size = (size); \
- (m)->m_ext.ext_free = NULL; \
- (m)->m_ext.ext_arg = NULL; \
- (m)->m_ext.ext_type = mbtypes[(m)->m_type]; \
- MCLINITREFERENCE(m); \
- MOWNERREF((m), M_EXT); \
+ size_t realsize = ALIGN(size) + sizeof(struct mexthdr); \
+ void *p = malloc((realsize), mbtypes[(m)->m_type], (how)); \
+ if (p != NULL) { \
+ (m)->m_exthdr = (void *)((char *)p + ALIGN(size)); \
+ _MEXTADD((m), p, (size), M_EXT_RW | M_EXT_HDREMBEDDED, \
+ _mext_free_malloc, mbtypes[(m)->m_type]); \
} \
} while (/* CONSTCOND */ 0)
-#define MEXTADD(m, buf, size, type, free, arg) \
+#define MEXTADD(m, buf, size, extflags, free, arg, how) \
+do { \
+ KASSERT(((m)->m_flags & M_EXT) == 0); \
+ _MEXTHDR_GET(m, how); \
+ _MEXTADD((m), (buf), (size), (extflags), (free), (arg)); \
+} while (/* CONSTCOND */ 0)
+
+/* MEXTADD for malloc(9)'ed buffer */
+#define MEXTADD_MALLOC(m, buf, size, extflags, type, how) \
+ MEXTADD((m), (buf), (size), (extflags), _mext_free_malloc, \
+ (type), (how))
+
+/*
+ * MEXTADD2: MEXTADD with preallocated mexthdr
+ */
+#define MEXTADD2(m, buf, size, extflags, free, arg, exthdr) \
+do { \
+ KASSERT(((m)->m_flags & M_EXT) == 0); \
+ (m)->m_exthdr = (exthdr); \
+ _MEXTADD((m), (buf), (size), (extflags), (free), (arg)); \
+} while (/* CONSTCOND */ 0)
+
+#define _MEXTADD(m, buf, size, extflags, free, arg) \
do { \
- (m)->m_data = (m)->m_ext.ext_buf = (caddr_t)(buf); \
- (m)->m_flags = ((m)->m_flags & ~M_EXTCOPYFLAGS) | M_EXT; \
- (m)->m_ext.ext_size = (size); \
- (m)->m_ext.ext_free = (free); \
- (m)->m_ext.ext_arg = (arg); \
- (m)->m_ext.ext_type = (type); \
- MCLINITREFERENCE(m); \
- MOWNERREF((m), M_EXT); \
+ if (MEXT(m)) { \
+ MCLINITREFERENCE(m); \
+ (m)->m_data = MEXT(m)->ext_buf = (caddr_t)(buf); \
+ (m)->m_flags |= M_EXT; \
+ MEXT(m)->ext_flags = (extflags); \
+ MEXT(m)->ext_size = (size); \
+ MEXT(m)->ext_free = (free); \
+ MEXT(m)->ext_arg = (arg); \
+ MOWNERREF((m), M_EXT); \
+ } \
} while (/* CONSTCOND */ 0)
#define MEXTREMOVE(m) \
do { \
int _ms_ = splvm(); /* MBUFLOCK */ \
_MOWNERREVOKE((m), 0, (m)->m_flags); \
- m_ext_free(m, FALSE); \
+ _m_ext_free(m, FALSE); \
splx(_ms_); \
- (m)->m_flags &= ~M_EXTCOPYFLAGS; \
- (m)->m_ext.ext_size = 0; /* why ??? */ \
+ (m)->m_flags &= ~M_EXT; \
} while (/* CONSTCOND */ 0)
/*
@@ -579,7 +601,7 @@ do { \
#define MRESETDATA(m) \
do { \
if ((m)->m_flags & M_EXT) \
- (m)->m_data = (m)->m_ext.ext_buf; \
+ (m)->m_data = MEXT(m)->ext_buf; \
else if ((m)->m_flags & M_PKTHDR) \
(m)->m_data = (m)->m_pktdat; \
else \
@@ -592,6 +614,8 @@ do { \
* Place the successor, if any, in n.
*/
#define MFREE(m, n) \
+do { \
+ KASSERT(((m)->m_flags & M_EXT_FLAGS) == 0); \
MBUFLOCK( \
mbstat.m_mtypes[(m)->m_type]--; \
if ((m)->m_flags & M_PKTHDR) \
@@ -599,11 +623,12 @@ do { \
(n) = (m)->m_next; \
_MOWNERREVOKE((m), 1, m->m_flags); \
if ((m)->m_flags & M_EXT) { \
- m_ext_free(m, TRUE); \
+ _m_ext_free(m, TRUE); \
} else { \
pool_cache_put(&mbpool_cache, (m)); \
} \
- )
+ ); \
+} while (/* CONSTCOND */ 0)
/*
* Copy mbuf pkthdr from `from' to `to'.
@@ -643,21 +668,21 @@ do { \
*/
#define M_READONLY(m) \
(((m)->m_flags & M_EXT) != 0 && \
- (((m)->m_flags & (M_EXT_ROMAP|M_EXT_RW)) != M_EXT_RW || \
+ ((MEXT(m)->ext_flags & (M_EXT_ROMAP|M_EXT_RW)) != M_EXT_RW || \
MCLISREFERENCED(m)))
/*
* Determine if an mbuf's data area is read-only at the MMU.
*/
#define M_ROMAP(m) \
- (((m)->m_flags & (M_EXT|M_EXT_ROMAP)) == (M_EXT|M_EXT_ROMAP))
+ (((m)->m_flags & M_EXT) && (MEXT(m)->ext_flags & M_EXT_ROMAP))
/*
* Compute the amount of space available
* before the current start of data in an mbuf.
*/
#define _M_LEADINGSPACE(m) \
- ((m)->m_flags & M_EXT ? (m)->m_data - (m)->m_ext.ext_buf : \
+ ((m)->m_flags & M_EXT ? (m)->m_data - MEXT(m)->ext_buf : \
(m)->m_flags & M_PKTHDR ? (m)->m_data - (m)->m_pktdat : \
(m)->m_data - (m)->m_dat)
@@ -669,7 +694,7 @@ do { \
* after the end of data in an mbuf.
*/
#define _M_TRAILINGSPACE(m) \
- ((m)->m_flags & M_EXT ? (m)->m_ext.ext_buf + (m)->m_ext.ext_size - \
+ ((m)->m_flags & M_EXT ? MEXT(m)->ext_buf + MEXT(m)->ext_size - \
((m)->m_data + (m)->m_len) : \
&(m)->m_dat[MLEN] - ((m)->m_data + (m)->m_len))
@@ -781,8 +806,10 @@ extern const int msize; /* mbuf base s
extern const int mclbytes; /* mbuf cluster size */
extern struct pool mbpool;
extern struct pool mclpool;
+extern struct pool mexthdrpool;
extern struct pool_cache mbpool_cache;
extern struct pool_cache mclpool_cache;
+extern struct pool_cache mexthdrpool_cache;
#ifdef MBUFTRACE
LIST_HEAD(mownerhead, mowner);
extern struct mownerhead mowners;
@@ -825,10 +852,16 @@ void m_copydata(struct mbuf *, int, int,
void m_freem(struct mbuf *);
void m_reclaim(void *, int);
void mbinit(void);
+void _mext_free_malloc(struct mbuf *, caddr_t, size_t, void *);
/* Inline routines. */
static __inline u_int m_length(struct mbuf *) __unused;
-static __inline void m_ext_free(struct mbuf *, boolean_t) __unused;
+static __inline void m_free_extdone(struct mbuf *) __unused;
+
+static __inline void _m_ext_free(struct mbuf *, boolean_t) __unused;
+static __inline struct mexthdr *_mexthdr_get(int) __unused;
+static __inline void _m_clget(struct mbuf *, struct pool_cache *, size_t, int)
+ __unused;
/* Packet tag routines */
struct m_tag *m_tag_get(int, int, int);
@@ -887,31 +920,110 @@ m_length(struct mbuf *m)
}
/*
- * m_ext_free: release a reference to the mbuf external storage.
+ * _m_ext_free: release a reference to the mbuf external storage.
*
* => if 'dofree', free the mbuf m itsself as well.
* => called at splvm.
*/
static __inline void
-m_ext_free(struct mbuf *m, boolean_t dofree)
+_m_ext_free(struct mbuf *m, boolean_t dofree)
{
+ struct mexthdr *ext;
+ KASSERT(m->m_flags & M_EXT);
+ ext = MEXT(m);
+ KASSERT(ext->ext_refcnt >= 1);
+
+ MEXT_LOCK(ext);
if (MCLISREFERENCED(m)) {
_MCLDEREFERENCE(m);
- } else if (m->m_flags & M_CLUSTER) {
- pool_cache_put_paddr(m->m_ext.ext_arg,
- m->m_ext.ext_buf, m->m_ext.ext_paddr);
- } else if (m->m_ext.ext_free) {
- (*m->m_ext.ext_free)(dofree ? m : NULL, m->m_ext.ext_buf,
- m->m_ext.ext_size, m->m_ext.ext_arg);
- dofree = FALSE;
+ MEXT_UNLOCK(ext);
} else {
- free(m->m_ext.ext_buf, m->m_ext.ext_type);
+ int extflags;
+ boolean_t puthdr;
+ MEXT_UNLOCK(ext);
+
+ /* dropping the last reference */
+ extflags = ext->ext_flags;
+ puthdr = (extflags & M_EXT_HDREMBEDDED) == 0;
+ if (extflags & M_EXT_CLUSTER) {
+ pool_cache_put_paddr(ext->ext_arg,
+ ext->ext_buf, ext->ext_paddr);
+ } else {
+ (*ext->ext_free)(dofree ? m : NULL,
+ ext->ext_buf, ext->ext_size, ext->ext_arg);
+ if (dofree)
+ dofree = puthdr = FALSE;
+ }
+ if (puthdr)
+ MEXTHDR_PUT(ext);
}
if (dofree)
pool_cache_put(&mbpool_cache, m);
}
+static __inline struct mexthdr *
+_mexthdr_get(int how)
+{
+ struct mexthdr *ext;
+ int s;
+
+ s = splvm(); /* MBUFLOCK */
+ ext = pool_cache_get(&mexthdrpool_cache,
+ (how) == M_WAIT ? PR_WAITOK : PR_NOWAIT);
+ splx(s);
+
+ return ext;
+}
+
+/*
+ * XXX it's better to embed mexthdr to cluster data.
+ */
+void
+_m_clget(struct mbuf *m, struct pool_cache *pc, size_t size, int how)
+{
+ int s;
+ void *buf;
+ paddr_t pa;
+ const int prhow =
+ (how == M_WAIT) ? (PR_WAITOK|PR_LIMITFAIL) : PR_NOWAIT;
+
+ KASSERT((m->m_flags & M_EXT) == 0);
+
+ s = splvm(); /* BUFLOCK */
+ buf = pool_cache_get_paddr(pc, prhow, &pa);
+ splx(s);
+
+ if (buf == NULL)
+ return;
+
+ MEXTADD(m, buf, size, M_EXT_CLUSTER|M_EXT_RW, NULL, pc, how);
+ if ((m->m_flags & M_EXT) == 0) {
+ s = splvm(); /* BUFLOCK */
+ pool_cache_put_paddr(pc, buf, pa);
+ splx(s);
+ return;
+ }
+ MEXT(m)->ext_paddr = pa;
+}
+
+/*
+ * m_free_extdone: free mbuf and its associated mexthdr.
+ *
+ * => intended to be used by ext_free callback routines.
+ * => called at splvm.
+ */
+static __inline void
+m_free_extdone(struct mbuf *m)
+{
+ struct mexthdr *ext;
+
+ KASSERT((m)->m_flags & M_EXT);
+ ext = MEXT(m);
+ if ((ext->ext_flags & M_EXT_HDREMBEDDED) == 0)
+ MEXTHDR_PUT(ext);
+ pool_cache_put(&mbpool_cache, (m));
+}
#endif /* _KERNEL */
#endif /* !_SYS_MBUF_H_ */
Index: sys/socketvar.h
===================================================================
--- sys/socketvar.h (revision 878)
+++ sys/socketvar.h (working copy)
@@ -206,7 +206,7 @@ do { \
(sb)->sb_cc += (m)->m_len; \
(sb)->sb_mbcnt += MSIZE; \
if ((m)->m_flags & M_EXT) \
- (sb)->sb_mbcnt += (m)->m_ext.ext_size; \
+ (sb)->sb_mbcnt += MEXT(m)->ext_size; \
} while (/* CONSTCOND */ 0)
/* adjust counters in sb reflecting freeing of m */
@@ -215,7 +215,7 @@ do { \
(sb)->sb_cc -= (m)->m_len; \
(sb)->sb_mbcnt -= MSIZE; \
if ((m)->m_flags & M_EXT) \
- (sb)->sb_mbcnt -= (m)->m_ext.ext_size; \
+ (sb)->sb_mbcnt -= MEXT(m)->ext_size; \
} while (/* CONSTCOND */ 0)
/*
Index: kern/uipc_usrreq.c
===================================================================
--- kern/uipc_usrreq.c (revision 878)
+++ kern/uipc_usrreq.c (working copy)
@@ -1017,9 +1017,9 @@ unp_internalize(struct mbuf *control, st
if (newcm) {
if (control->m_flags & M_EXT)
MEXTREMOVE(control);
- MEXTADD(control, newcm,
+ MEXTADD_MALLOC(control, newcm,
CMSG_SPACE(nfds * sizeof(struct file *)),
- M_MBUF, NULL, NULL);
+ M_EXT_RW, M_MBUF, M_WAIT);
cm = newcm;
}
Index: kern/uipc_socket.c
===================================================================
--- kern/uipc_socket.c (revision 878)
+++ kern/uipc_socket.c (working copy)
@@ -332,19 +332,20 @@ sodopendfreel(struct socket *so)
break;
so_pendfree = NULL;
simple_unlock(&so_pendfree_slock);
- /* XXX splx */
for (; m != NULL; m = next) {
+ struct mexthdr *ext;
+ KASSERT(m->m_flags & M_EXT);
+ KASSERT((MEXT(m)->ext_flags & M_EXT_CLUSTER) == 0);
next = m->m_next;
- rv += m->m_ext.ext_size;
- sodoloanfree((m->m_flags & M_EXT_PAGES) ?
- m->m_ext.ext_pgs : NULL, m->m_ext.ext_buf,
- m->m_ext.ext_size);
- pool_cache_put(&mbpool_cache, m);
+ ext = MEXT(m);
+ rv += ext->ext_size;
+ sodoloanfree((ext->ext_flags & M_EXT_PAGES) ?
+ ext->ext_pgs : NULL, ext->ext_buf, ext->ext_size);
+ m_free_extdone(m);
}
- /* XXX splvm */
simple_lock(&so_pendfree_slock);
}
@@ -366,6 +367,9 @@ soloanfree(struct mbuf *m, caddr_t buf,
return;
}
+ KASSERT(m->m_flags & M_EXT);
+ KASSERT((MEXT(m)->ext_flags & M_EXT_CLUSTER) == 0);
+
/*
* postpone freeing mbuf.
*
@@ -391,6 +395,7 @@ sosend_loan(struct socket *so, struct ui
vsize_t len;
vaddr_t lva, va;
int npgs, i, error;
+ struct mexthdr *ext;
if (uio->uio_segflg != UIO_USERSPACE)
return (0);
@@ -413,22 +418,26 @@ sosend_loan(struct socket *so, struct ui
if (lva == 0)
return 0;
+ ext = MEXTHDR_GET(M_DONTWAIT);
+ if (ext == NULL)
+ return 0;
error = uvm_loan(&uio->uio_procp->p_vmspace->vm_map, sva, len,
- m->m_ext.ext_pgs, UVM_LOAN_TOPAGE);
+ ext->ext_pgs, UVM_LOAN_TOPAGE);
if (error) {
sokvafree(lva, len);
+ MEXTHDR_PUT(ext);
return (0);
}
for (i = 0, va = lva; i < npgs; i++, va += PAGE_SIZE)
- pmap_kenter_pa(va, VM_PAGE_TO_PHYS(m->m_ext.ext_pgs[i]),
+ pmap_kenter_pa(va, VM_PAGE_TO_PHYS(ext->ext_pgs[i]),
VM_PROT_READ);
pmap_update(pmap_kernel());
lva += (vaddr_t) iov->iov_base & PAGE_MASK;
- MEXTADD(m, (caddr_t) lva, space, M_MBUF, soloanfree, so);
- m->m_flags |= M_EXT_PAGES | M_EXT_ROMAP;
+ MEXTADD2(m, (caddr_t) lva, space, M_EXT_PAGES | M_EXT_ROMAP,
+ soloanfree, so, ext);
uio->uio_resid -= space;
/* uio_offset not updated, not set/used for write(2) */
Index: kern/uipc_mbuf.c
===================================================================
--- kern/uipc_mbuf.c (revision 900)
+++ kern/uipc_mbuf.c (working copy)
@@ -94,9 +94,11 @@ __KERNEL_RCSID(0, "uipc_mbuf.c,v 1.84 20
struct pool mbpool; /* mbuf pool */
struct pool mclpool; /* mbuf cluster pool */
+struct pool mexthdrpool; /* mbuf extref pool */
struct pool_cache mbpool_cache;
struct pool_cache mclpool_cache;
+struct pool_cache mexthdrpool_cache;
struct mbstat mbstat;
int max_linkhdr;
@@ -155,12 +157,16 @@ mbinit(void)
pool_init(&mbpool, msize, 0, 0, 0, "mbpl", NULL);
pool_init(&mclpool, mclbytes, 0, 0, 0, "mclpl", &mclpool_allocator);
+ pool_init(&mexthdrpool, sizeof(struct mexthdr), 0, 0, 0, "mexthdr",
+ NULL);
pool_set_drain_hook(&mbpool, m_reclaim, NULL);
pool_set_drain_hook(&mclpool, m_reclaim, NULL);
+ pool_set_drain_hook(&mexthdrpool, m_reclaim, NULL);
pool_cache_init(&mbpool_cache, &mbpool, mb_ctor, NULL, NULL);
pool_cache_init(&mclpool_cache, &mclpool, NULL, NULL, NULL);
+ pool_cache_init(&mexthdrpool_cache, &mexthdrpool, NULL, NULL, NULL);
/*
* Set the hard limit on the mclpool to the number of
@@ -177,6 +183,7 @@ mbinit(void)
*/
pool_setlowat(&mbpool, mblowat);
pool_setlowat(&mclpool, mcllowat);
+ pool_setlowat(&mexthdrpool, mcllowat); /* XXX */
#ifdef MBUFTRACE
{
@@ -567,7 +574,6 @@ m_copym0(struct mbuf *m, int off0, int l
if (m->m_flags & M_EXT) {
if (!deep) {
n->m_data = m->m_data + off;
- n->m_ext = m->m_ext;
MCLADDREFERENCE(m, n);
} else {
/*
@@ -626,7 +632,6 @@ m_copypacket(struct mbuf *m, int how)
n->m_len = m->m_len;
if (m->m_flags & M_EXT) {
n->m_data = m->m_data;
- n->m_ext = m->m_ext;
MCLADDREFERENCE(m, n);
} else {
memcpy(mtod(n, char *), mtod(m, char *), n->m_len);
@@ -645,7 +650,6 @@ m_copypacket(struct mbuf *m, int how)
n->m_len = m->m_len;
if (m->m_flags & M_EXT) {
n->m_data = m->m_data;
- n->m_ext = m->m_ext;
MCLADDREFERENCE(m, n);
} else {
memcpy(mtod(n, char *), mtod(m, char *), n->m_len);
@@ -972,7 +976,6 @@ m_split0(struct mbuf *m0, int len0, int
}
extpacket:
if (m->m_flags & M_EXT) {
- n->m_ext = m->m_ext;
MCLADDREFERENCE(m, n);
n->m_data = m->m_data + len;
} else {
@@ -1368,3 +1371,15 @@ m_getptr(struct mbuf *m, int loc, int *o
return (NULL);
}
+
+/*
+ * ext_free callback routine for MEXTMALLOC and MEXTADD_MALLOC.
+ */
+void
+_mext_free_malloc(struct mbuf *m, caddr_t buf, size_t size, void *type)
+{
+
+ if (m)
+ m_free_extdone(m);
+ free(buf, type);
+}
--NextPart-20041002192117-2591600
Content-Type: Text/Plain; charset=us-ascii
Content-Disposition: attachment; filename="x86.diff"
Index: arch/x86/x86/bus_dma.c
===================================================================
--- arch/x86/x86/bus_dma.c (revision 786)
+++ arch/x86/x86/bus_dma.c (working copy)
@@ -401,31 +401,34 @@ _bus_dmamap_load_mbuf(bus_dma_tag_t t, b
const struct vm_page * const *pgs;
paddr_t paddr;
int size;
+ struct mexthdr *ext;
if (m->m_len == 0)
continue;
- switch (m->m_flags & (M_EXT|M_EXT_CLUSTER|M_EXT_PAGES)) {
- case M_EXT|M_EXT_CLUSTER:
+ if ((m->m_flags & M_EXT) == 0) {
+ paddr = m->m_paddr + M_BUFOFFSET(m) +
+ (m->m_data - M_BUFADDR(m));
+ size = m->m_len;
+ error = _bus_dmamap_load_paddr(t, map, paddr, size);
+ } else if ((ext = MEXT(m))->ext_flags & M_EXT_CLUSTER) {
/* XXX KDASSERT */
- KASSERT(m->m_ext.ext_paddr != M_PADDR_INVALID);
- paddr = m->m_ext.ext_paddr +
- (m->m_data - m->m_ext.ext_buf);
+ KASSERT(ext->ext_paddr != M_PADDR_INVALID);
+ paddr = ext->ext_paddr +
+ (m->m_data - ext->ext_buf);
size = m->m_len;
error = _bus_dmamap_load_paddr(t, map, paddr, size);
- break;
-
- case M_EXT|M_EXT_PAGES:
- KASSERT(m->m_ext.ext_buf <= m->m_data);
+ } else if (ext->ext_flags & M_EXT_PAGES) {
+ KASSERT(ext->ext_buf <= m->m_data);
KASSERT(m->m_data <=
- m->m_ext.ext_buf + m->m_ext.ext_size);
+ ext->ext_buf + ext->ext_size);
offset = (vaddr_t)m->m_data -
- trunc_page((vaddr_t)m->m_ext.ext_buf);
+ trunc_page((vaddr_t)ext->ext_buf);
remainbytes = m->m_len;
/* skip uninteresting pages */
pgs = (const struct vm_page * const *)
- m->m_ext.ext_pgs + (offset >> PAGE_SHIFT);
+ ext->ext_pgs + (offset >> PAGE_SHIFT);
offset &= PAGE_MASK; /* offset in the first page */
@@ -446,16 +449,7 @@ _bus_dmamap_load_mbuf(bus_dma_tag_t t, b
offset = 0;
remainbytes -= size;
}
- break;
-
- case 0:
- paddr = m->m_paddr + M_BUFOFFSET(m) +
- (m->m_data - M_BUFADDR(m));
- size = m->m_len;
- error = _bus_dmamap_load_paddr(t, map, paddr, size);
- break;
-
- default:
+ } else {
error = _bus_dmamap_load_buffer(t, map, m->m_data,
m->m_len, NULL, flags);
}
--NextPart-20041002192117-2591600
Content-Type: Text/Plain; charset=us-ascii
Content-Disposition: attachment; filename="dev.diff"
Index: dev/pci/if_ti.c
===================================================================
--- dev/pci/if_ti.c (revision 904)
+++ dev/pci/if_ti.c (working copy)
@@ -721,7 +721,7 @@ static void ti_jfree(m, buf, size, arg)
SIMPLEQ_INSERT_HEAD(&sc->ti_jfree_listhead, entry, jpool_entries);
if (__predict_true(m != NULL))
- pool_cache_put(&mbpool_cache, m);
+ m_free_extdone(m);
splx(s);
}
@@ -904,9 +904,13 @@ static int ti_newbuf_jumbo(sc, i, m)
}
/* Attach the buffer to the mbuf. */
- MEXTADD(m_new, buf, ETHER_MAX_LEN_JUMBO,
- M_DEVBUF, ti_jfree, sc);
- m_new->m_flags |= M_EXT_RW;
+ MEXTADD(m_new, (void *)buf, ETHER_MAX_LEN_JUMBO,
+ M_EXT_RW, ti_jfree, sc, M_DONTWAIT);
+ if ((m_new->m_flags & M_EXT) == 0) {
+ m_freem(m_new);
+ ti_jfree(NULL, buf, ETHER_MAX_LEN_JUMBO, sc);
+ return(ENOBUFS);
+ }
m_new->m_len = m_new->m_pkthdr.len = ETHER_MAX_LEN_JUMBO;
} else {
m_new = m;
Index: dev/pci/if_dge.c
===================================================================
--- dev/pci/if_dge.c (revision 900)
+++ dev/pci/if_dge.c (working copy)
@@ -607,7 +607,7 @@ dge_freebuf(struct mbuf *m, caddr_t buf,
SLIST_INSERT_HEAD(&sc->sc_buglist, entry, rb_entry);
if (__predict_true(m != NULL))
- pool_cache_put(&mbpool_cache, m);
+ m_free_extdone(m);
splx(s);
}
#endif
@@ -2128,8 +2128,12 @@ dge_add_rxbuf(struct dge_softc *sc, int
return ENOBUFS;
m->m_len = m->m_pkthdr.len = DGE_BUFFER_SIZE;
- MEXTADD(m, buf, DGE_BUFFER_SIZE, M_DEVBUF, dge_freebuf, sc);
- m->m_flags |= M_EXT_RW;
+ MEXTADD(m, buf, DGE_BUFFER_SIZE, M_EXT_RW, dge_freebuf, sc, M_DONTWAIT);
+ if ((m->m_flags & M_EXT) == 0) {
+ m_freem(m);
+ dge_freebuf(NULL, buf, DGE_BUFFER_SIZE, sc);
+ return ENOBUFS;
+ }
if (rxs->rxs_mbuf != NULL)
bus_dmamap_unload(sc->sc_dmat, rxs->rxs_dmamap);
Index: dev/pci/if_bge.c
===================================================================
--- dev/pci/if_bge.c (revision 904)
+++ dev/pci/if_bge.c (working copy)
@@ -849,7 +849,7 @@ bge_jfree(m, buf, size, arg)
SLIST_INSERT_HEAD(&sc->bge_jfree_listhead, entry, jpool_entries);
if (__predict_true(m != NULL))
- pool_cache_put(&mbpool_cache, m);
+ m_free_extdone(m);
splx(s);
}
@@ -953,9 +953,13 @@ bge_newbuf_jumbo(sc, i, m)
/* Attach the buffer to the mbuf. */
m_new->m_len = m_new->m_pkthdr.len = BGE_JUMBO_FRAMELEN;
- MEXTADD(m_new, buf, BGE_JUMBO_FRAMELEN, M_DEVBUF,
- bge_jfree, sc);
- m_new->m_flags |= M_EXT_RW;
+ MEXTADD(m_new, buf, BGE_JUMBO_FRAMELEN, M_EXT_RW,
+ bge_jfree, sc, M_DONTWAIT);
+ if ((m_new->m_flags & M_EXT) == 0) {
+ m_freem(m_new);
+ bge_jfree(NULL, buf, BGE_JUMBO_FRAMELEN, sc);
+ return(ENOBUFS);
+ }
} else {
m_new = m;
m_new->m_data = m_new->m_ext.ext_buf;
--NextPart-20041002192117-2591600
Content-Type: Text/Plain; charset=us-ascii
Content-Disposition: attachment; filename="nfs.diff"
Index: nfs/nfs_vnops.c
===================================================================
--- nfs/nfs_vnops.c (revision 906)
+++ nfs/nfs_vnops.c (working copy)
@@ -1340,7 +1340,7 @@ nfs_writerpc_extfree(struct mbuf *m, cad
KASSERT(m != NULL);
KASSERT(ctx != NULL);
- pool_cache_put(&mbpool_cache, m);
+ m_free_extdone(m);
simple_lock(&ctx->nwc_slock);
if (--ctx->nwc_mbufcount == 0) {
wakeup(ctx);
@@ -1440,12 +1440,13 @@ retry:
*/
struct mbuf *m;
struct iovec *iovp = uiop->uio_iov;
+ struct mexthdr *ext;
m = m_get(M_WAIT, MT_DATA);
+ ext = MEXTHDR_GET(M_WAIT);
MCLAIM(m, &nfs_mowner);
- MEXTADD(m, iovp->iov_base, len, M_MBUF,
- nfs_writerpc_extfree, &ctx);
- m->m_flags |= M_EXT_ROMAP;
+ MEXTADD2(m, iovp->iov_base, len, M_EXT_ROMAP,
+ nfs_writerpc_extfree, &ctx, ext);
m->m_len = len;
mb->m_next = m;
/*
Index: nfs/nfsm_subs.h
===================================================================
--- nfs/nfsm_subs.h (revision 905)
+++ nfs/nfsm_subs.h (working copy)
@@ -51,7 +51,7 @@
#define M_HASCL(m) ((m)->m_flags & M_EXT)
#define NFSMADV(m, s) (m)->m_data += (s)
-#define NFSMSIZ(m) ((M_HASCL(m)) ? (m)->m_ext.ext_size : \
+#define NFSMSIZ(m) ((M_HASCL(m)) ? MEXT(m)->ext_size : \
(((m)->m_flags & M_PKTHDR) ? MHLEN : MLEN))
/*
Index: nfs/nfs_subs.c
===================================================================
--- nfs/nfs_subs.c (revision 900)
+++ nfs/nfs_subs.c (working copy)
@@ -1020,7 +1020,6 @@ nfsm_disct(mdp, dposp, siz, left, cp2)
* mbuf look empty.
*/
m2 = m_get(M_WAIT, MT_DATA);
- m2->m_ext = m1->m_ext;
m2->m_data = src;
m2->m_len = left;
MCLADDREFERENCE(m1, m2);
Index: nfs/nfs_serv.c
===================================================================
--- nfs/nfs_serv.c (revision 900)
+++ nfs/nfs_serv.c (working copy)
@@ -666,6 +666,7 @@ nfsrv_read(nfsd, slp, procp, mrq)
voff_t pgoff = trunc_page(off);
int npages;
vaddr_t lva;
+ struct mexthdr *ext;
npages = (round_page(off + cnt) - pgoff) >> PAGE_SHIFT;
KASSERT(npages <= M_EXT_MAXPAGES); /* XXX */
@@ -680,7 +681,8 @@ nfsrv_read(nfsd, slp, procp, mrq)
/* allocate mbuf */
m = m_get(M_WAIT, MT_DATA);
MCLAIM(m, &nfs_mowner);
- pgpp = m->m_ext.ext_pgs;
+ ext = MEXTHDR_GET(M_WAIT);
+ pgpp = ext->ext_pgs;
/* loan pages */
error = uvm_loanuobjpages(&vp->v_uobj, pgoff, npages,
@@ -688,13 +690,14 @@ nfsrv_read(nfsd, slp, procp, mrq)
if (error) {
sokvafree(lva, npages << PAGE_SHIFT);
m_free(m);
+ MEXTHDR_PUT(ext);
goto read_error;
}
/* associate kva to mbuf */
- MEXTADD(m, (void *)(lva + ((vaddr_t)off & PAGE_MASK)),
- cnt, M_MBUF, soloanfree, slp->ns_so);
- m->m_flags |= M_EXT_PAGES | M_EXT_ROMAP;
+ MEXTADD2(m, (void *)(lva + ((vaddr_t)off & PAGE_MASK)),
+ cnt, M_EXT_PAGES | M_EXT_ROMAP,
+ soloanfree, slp->ns_so, ext);
m->m_len = cnt;
/* map pages */
--NextPart-20041002192117-2591600--