Subject: Re: bpf/pcap performance
To: Guy Harris <guy@alum.mit.edu>
From: Darren Reed <darrenr@reed.wattle.id.au>
List: tech-net
Date: 04/10/2004 21:26:43
In some email I received from Guy Harris, sie wrote:
> On Sat, Apr 10, 2004 at 07:30:15AM +1000, Darren Reed wrote:
> > A merged change of the above plus a copy of FreeBSD's changes from 1.86,
> > adapted for NetBSD are below. I've not tested them yet beyond compiling
> > them up and making sure the kernel links cleanly :)
>
> BTW, there's some odd code in "bpfread()":
Thanks, change applied.
> Also, one of the changes imported from FreeBSD:
>
> > ***************
> > *** 1177,1182 ****
> > --- 1223,1233 ----
> > for (m0 = m; m0 != 0; m0 = m0->m_next)
> > pktlen += m0->m_len;
> >
> > + if (pktlen == m->m_len) {
> > + bpf_tap(arg, mtod(m, u_char *), pktlen);
> > + return;
> > + }
> > +
> > for (d = bp->bif_dlist; d != 0; d = d->bd_next) {
> > ++d->bd_rcount;
> > slen = bpf_filter(d->bd_filter, (u_char *)m, pktlen, 0);
>
> might introduce a bug - see FreeBSD PR kern/56441:
>
> In file 'sys/net/bpf.c' have a error introduced in CVS revision
> 1.95. This error is critical for the programs with used flag
> BIOCGSEESENT. Locally generated packet may be copied in user
> space if flag BIOCGSEESENT set to one. Function 'bpf_tap' must
> be used only for incoming packets. But function 'bpf_mtap' uses
> 'bpf_tap'. It is fast. But it's wrong.
Hmm. NetBSD doesn't yet support BIOCGSEESENT, but that's a SMOP.
Unfortunately the ioctl numbers for bpf won't match, so binary
compatibility will need tweaks in freebsd_compat.
Having looked at that change, I think the changes I've included
below encapsulate what the perceived speedup was in FreeBSD (and
so I should commit this over there at some point too) without
recreating the BIOCGSEESENT interaction problem.
Darren
Index: bpf.c
===================================================================
RCS file: /cvsroot/src/sys/net/bpf.c,v
retrieving revision 1.89
diff -c -r1.89 bpf.c
*** bpf.c 22 Jan 2004 00:32:41 -0000 1.89
--- bpf.c 10 Apr 2004 11:21:14 -0000
***************
*** 114,119 ****
--- 114,120 ----
static void bpf_attachd __P((struct bpf_d *, struct bpf_if *));
static void bpf_detachd __P((struct bpf_d *));
static int bpf_setif __P((struct bpf_d *, struct ifreq *));
+ static void bpf_timed_out __P((void *));
static __inline void
bpf_wakeup __P((struct bpf_d *));
static void catchpacket __P((struct bpf_d *, u_char *, u_int, u_int,
***************
*** 380,385 ****
--- 381,388 ----
/* Mark "free" and do most initialization. */
memset((char *)d, 0, sizeof(*d));
d->bd_bufsize = bpf_bufsize;
+ d->bd_seesent = 1;
+ callout_init(&d->bd_callout);
return (0);
}
***************
*** 400,405 ****
--- 403,411 ----
int s;
s = splnet();
+ if (d->bd_state == BPF_WAITING)
+ callout_stop(&d->bd_callout);
+ d->bd_state = BPF_IDLE;
if (d->bd_bif)
bpf_detachd(d);
splx(s);
***************
*** 429,434 ****
--- 435,441 ----
int ioflag;
{
struct bpf_d *d = &bpf_dtab[minor(dev)];
+ int timed_out;
int error;
int s;
***************
*** 440,456 ****
return (EINVAL);
s = splnet();
/*
* If the hold buffer is empty, then do a timed sleep, which
* ends when the timeout expires or when enough packets
* have arrived to fill the store buffer.
*/
while (d->bd_hbuf == 0) {
! if (d->bd_immediate) {
! if (d->bd_slen == 0) {
! splx(s);
! return (EWOULDBLOCK);
! }
/*
* A packet(s) either arrived since the previous
* read or arrived while we were asleep.
--- 447,463 ----
return (EINVAL);
s = splnet();
+ if (d->bd_state == BPF_WAITING)
+ callout_stop(&d->bd_callout);
+ timed_out = (d->bd_state == BPF_TIMED_OUT);
+ d->bd_state = BPF_IDLE;
/*
* If the hold buffer is empty, then do a timed sleep, which
* ends when the timeout expires or when enough packets
* have arrived to fill the store buffer.
*/
while (d->bd_hbuf == 0) {
! if ((d->bd_immediate || timed_out) && d->bd_slen != 0) {
/*
* A packet(s) either arrived since the previous
* read or arrived while we were asleep.
***************
*** 463,473 ****
error = tsleep((caddr_t)d, PRINET|PCATCH, "bpf",
d->bd_rtout);
else {
! if (d->bd_rtout == -1) {
! /* User requested non-blocking I/O */
! error = EWOULDBLOCK;
! } else
! error = 0;
}
if (error == EINTR || error == ERESTART) {
splx(s);
--- 470,477 ----
error = tsleep((caddr_t)d, PRINET|PCATCH, "bpf",
d->bd_rtout);
else {
! /* User requested non-blocking I/O */
! error = EWOULDBLOCK;
}
if (error == EINTR || error == ERESTART) {
splx(s);
***************
*** 535,540 ****
--- 539,562 ----
d->bd_sel.sel_pid = 0;
}
+
+ static void
+ bpf_timed_out(arg)
+ void *arg;
+ {
+ struct bpf_d *d = (struct bpf_d *)arg;
+ int s;
+
+ s = splnet();
+ if (d->bd_state == BPF_WAITING) {
+ d->bd_state = BPF_TIMED_OUT;
+ if (d->bd_slen != 0)
+ bpf_wakeup(d);
+ }
+ splx(s);
+ }
+
+
int
bpfwrite(dev, uio, ioflag)
dev_t dev;
***************
*** 631,636 ****
--- 653,664 ----
struct bpf_insn **p;
#endif
+ s = splnet();
+ if (d->bd_state == BPF_WAITING)
+ callout_stop(&d->bd_callout);
+ d->bd_state = BPF_IDLE;
+ splx(s);
+
switch (cmd) {
default:
***************
*** 853,858 ****
--- 881,900 ----
d->bd_hdrcmplt = *(u_int *)addr ? 1 : 0;
break;
+ /*
+ * Get "see sent packets" flag
+ */
+ case BIOCGSEESENT:
+ *(u_int *)addr = d->bd_seesent;
+ break;
+
+ /*
+ * Set "see sent" packets flag
+ */
+ case BIOCSSEESENT:
+ d->bd_seesent = *(u_int *)addr;
+ break;
+
case FIONBIO: /* Non-blocking I/O */
if (*(int *)addr)
d->bd_rtout = -1;
***************
*** 1040,1049 ****
--- 1082,1107 ----
/*
* An imitation of the FIONREAD ioctl code.
*/
+ #if 0
if (d->bd_hlen != 0 || (d->bd_immediate && d->bd_slen != 0))
revents |= events & (POLLIN | POLLRDNORM);
else
selrecord(p, &d->bd_sel);
+ #else
+ if (d->bd_hlen != 0 ||
+ ((d->bd_immediate || d->bd_state == BPF_TIMED_OUT) &&
+ d->bd_slen != 0))
+ revents |= events & (POLLIN | POLLRDNORM);
+ else {
+ selrecord(p, &d->bd_sel);
+ /* Start the read timeout if necessary */
+ if (d->bd_rtout > 0 && d->bd_state == BPF_IDLE) {
+ callout_reset(&d->bd_callout, d->bd_rtout,
+ bpf_timed_out, d);
+ d->bd_state = BPF_WAITING;
+ }
+ }
+ #endif
}
splx(s);
***************
*** 1168,1187 ****
caddr_t arg;
struct mbuf *m;
{
struct bpf_if *bp = (struct bpf_if *)arg;
struct bpf_d *d;
! u_int pktlen, slen;
struct mbuf *m0;
pktlen = 0;
for (m0 = m; m0 != 0; m0 = m0->m_next)
pktlen += m0->m_len;
for (d = bp->bif_dlist; d != 0; d = d->bd_next) {
++d->bd_rcount;
! slen = bpf_filter(d->bd_filter, (u_char *)m, pktlen, 0);
if (slen != 0)
! catchpacket(d, (u_char *)m, pktlen, slen, bpf_mcpy);
}
}
--- 1226,1259 ----
caddr_t arg;
struct mbuf *m;
{
+ void *(*cpfn) __P((void *, const void *, size_t));
struct bpf_if *bp = (struct bpf_if *)arg;
struct bpf_d *d;
! u_int pktlen, slen, buflen;
struct mbuf *m0;
+ void *marg;
pktlen = 0;
for (m0 = m; m0 != 0; m0 = m0->m_next)
pktlen += m0->m_len;
+ if (pktlen == m->m_len) {
+ cpfn = memcpy;
+ marg = mtod(m, void *);
+ buflen = pktlen;
+ } else {
+ cpfn = bpf_mcpy;
+ marg = m;
+ buflen = 0;
+ }
+
for (d = bp->bif_dlist; d != 0; d = d->bd_next) {
+ if (!d->bd_seesent && (m->m_pkthdr.rcvif == NULL))
+ continue;
++d->bd_rcount;
! slen = bpf_filter(d->bd_filter, marg, pktlen, buflen);
if (slen != 0)
! catchpacket(d, marg, pktlen, slen, cpfn);
}
}
***************
*** 1234,1240 ****
ROTATE_BUFFERS(d);
bpf_wakeup(d);
curlen = 0;
! }
/*
* Append the bpf header.
--- 1306,1318 ----
ROTATE_BUFFERS(d);
bpf_wakeup(d);
curlen = 0;
! } else if (d->bd_immediate || d->bd_state == BPF_TIMED_OUT)
! /*
! * Immediate mode is set, or the read timeout has
! * already expired during a select call. A packet
! * arrived, so the reader should be woken up.
! */
! bpf_wakeup(d);
/*
* Append the bpf header.
***************
*** 1248,1261 ****
*/
(*cpfn)((u_char *)hp + hdrlen, pkt, (hp->bh_caplen = totlen - hdrlen));
d->bd_slen = curlen + totlen;
-
- if (d->bd_immediate) {
- /*
- * Immediate mode is set. A packet arrived so any
- * reads should be woken up.
- */
- bpf_wakeup(d);
- }
}
/*
--- 1326,1331 ----
Index: bpf.h
===================================================================
RCS file: /cvsroot/src/sys/net/bpf.h,v
retrieving revision 1.33
diff -c -r1.33 bpf.h
*** bpf.h 22 Jan 2004 00:32:41 -0000 1.33
--- bpf.h 10 Apr 2004 11:21:15 -0000
***************
*** 102,126 ****
* header files. If your using gcc, we assume that you
* have run fixincludes so the latter set should work.
*/
! #define BIOCGBLEN _IOR('B',102, u_int)
! #define BIOCSBLEN _IOWR('B',102, u_int)
! #define BIOCSETF _IOW('B',103, struct bpf_program)
! #define BIOCFLUSH _IO('B',104)
! #define BIOCPROMISC _IO('B',105)
! #define BIOCGDLT _IOR('B',106, u_int)
! #define BIOCGETIF _IOR('B',107, struct ifreq)
! #define BIOCSETIF _IOW('B',108, struct ifreq)
! #define BIOCSRTIMEOUT _IOW('B',109, struct timeval)
! #define BIOCGRTIMEOUT _IOR('B',110, struct timeval)
! #define BIOCGSTATS _IOR('B',111, struct bpf_stat)
! #define BIOCIMMEDIATE _IOW('B',112, u_int)
! #define BIOCVERSION _IOR('B',113, struct bpf_version)
! #define BIOCSTCPF _IOW('B',114, struct bpf_program)
! #define BIOCSUDPF _IOW('B',115, struct bpf_program)
! #define BIOCGHDRCMPLT _IOR('B',116, u_int)
! #define BIOCSHDRCMPLT _IOW('B',117, u_int)
! #define BIOCSDLT _IOW('B',118, u_int)
! #define BIOCGDLTLIST _IOWR('B',119, struct bpf_dltlist)
/*
* Structure prepended to each packet.
--- 102,128 ----
* header files. If your using gcc, we assume that you
* have run fixincludes so the latter set should work.
*/
! #define BIOCGBLEN _IOR('B',102, u_int)
! #define BIOCSBLEN _IOWR('B',102, u_int)
! #define BIOCSETF _IOW('B',103, struct bpf_program)
! #define BIOCFLUSH _IO('B',104)
! #define BIOCPROMISC _IO('B',105)
! #define BIOCGDLT _IOR('B',106, u_int)
! #define BIOCGETIF _IOR('B',107, struct ifreq)
! #define BIOCSETIF _IOW('B',108, struct ifreq)
! #define BIOCSRTIMEOUT _IOW('B',109, struct timeval)
! #define BIOCGRTIMEOUT _IOR('B',110, struct timeval)
! #define BIOCGSTATS _IOR('B',111, struct bpf_stat)
! #define BIOCIMMEDIATE _IOW('B',112, u_int)
! #define BIOCVERSION _IOR('B',113, struct bpf_version)
! #define BIOCSTCPF _IOW('B',114, struct bpf_program)
! #define BIOCSUDPF _IOW('B',115, struct bpf_program)
! #define BIOCGHDRCMPLT _IOR('B',116, u_int)
! #define BIOCSHDRCMPLT _IOW('B',117, u_int)
! #define BIOCSDLT _IOW('B',118, u_int)
! #define BIOCGDLTLIST _IOWR('B',119, struct bpf_dltlist)
! #define BIOCGSEESENT _IOR('B',120, u_int)
! #define BIOCSSEESENT _IOW('B',121, u_int)
/*
* Structure prepended to each packet.
Index: bpfdesc.h
===================================================================
RCS file: /cvsroot/src/sys/net/bpfdesc.h,v
retrieving revision 1.16
diff -c -r1.16 bpfdesc.h
*** bpfdesc.h 7 Aug 2003 16:32:48 -0000 1.16
--- bpfdesc.h 10 Apr 2004 11:21:15 -0000
***************
*** 41,46 ****
--- 41,47 ----
#ifndef _NET_BPFDESC_H_
#define _NET_BPFDESC_H_
+ #include <sys/callout.h>
#include <sys/select.h>
/*
***************
*** 75,80 ****
--- 76,82 ----
u_char bd_state; /* idle, waiting, or timed out */
u_char bd_immediate; /* true to return on packet arrival */
int bd_hdrcmplt; /* false to fill in src lladdr */
+ int bd_seesent; /* true if bpf should see sent packets */
int bd_async; /* non-zero if packet reception should generate signal */
pid_t bd_pgid; /* process or group id for signal */
#if BSD < 199103
***************
*** 85,92 ****
--- 87,101 ----
u_char bd_pad; /* explicit alignment */
struct selinfo bd_sel; /* bsd select info */
#endif
+ struct callout bd_callout; /* for BPF timeouts with select */
};
+
+ /* Values for bd_state */
+ #define BPF_IDLE 0 /* no select in progress */
+ #define BPF_WAITING 1 /* waiting for read timeout in select */
+ #define BPF_TIMED_OUT 2 /* read timeout has expired in select */
+
/*
* Descriptor associated with each attached hardware interface.
*/