Subject: Re: bpf/pcap performance
To: Guy Harris <guy@alum.mit.edu>
From: Darren Reed <darrenr@reed.wattle.id.au>
List: tech-net
Date: 04/10/2004 07:30:15
In some email I received from Guy Harris, sie wrote:
> > * the application is threaded, one thread uses select over all the
> > NICs so it knows when to read data from BPF, the other writes to
> > disk.
>
> The original BPF implementation didn't correctly support "select()" on
> BPF devices if you had a timeout on the device - "select()" wouldn't
> consider the BPF device readable until the hold buffer was non-empty,
> but the store buffer wasn't rotated into the hold buffer until it
> filled up, so "select()" would wait until the store buffer filled.
>
> FreeBSD fixed that somewhere in the 4.x timeframe, and I *think*
> OpenBSD also has it fixed; NetBSD still doesn't have it fixed, as far
> as I know.
Ok, I went looking. I think the bug you are talking about here relates
to bpfread() ? NetBSD has:
while (d->bd_hbuf == 0) {
if (d->bd_immediate) {
if (d->bd_slen == 0) {
splx(s);
return (EWOULDBLOCK);
}
FreeBSD:
while (d->bd_hbuf == 0) {
if ((d->bd_immediate || timed_out) && d->bd_slen != 0) {
OpenBSD:
while (d->bd_hbuf == 0) {
if (d->bd_immediate && d->bd_slen != 0) {
FreeBSD also has a bunch of other changes with the use of callouts,
that according to the commit comment, relate to threads:
http://www.freebsd.org/cgi/cvsweb.cgi/src/sys/net/bpf.c
- search for rev 1.86.
A merged change of the above plus a copy of FreeBSD's changes from 1.86,
adapted for NetBSD are below. I've not tested them yet beyond compiling
them up and making sure the kernel links cleanly :)
Some feedback from other NetBSD types about whether or not this is a good
patch to apply would be nice. If so, I'll commit it.
Cheers,
Darren
Index: bpf.c
===================================================================
RCS file: /cvsroot/src/sys/net/bpf.c,v
retrieving revision 1.89
diff -c -r1.89 bpf.c
*** bpf.c 22 Jan 2004 00:32:41 -0000 1.89
--- bpf.c 9 Apr 2004 20:17:08 -0000
***************
*** 39,45 ****
*/
#include <sys/cdefs.h>
! __KERNEL_RCSID(0, "$NetBSD: bpf.c,v 1.89 2004/01/22 00:32:41 jonathan Exp $");
#include "bpfilter.h"
--- 39,45 ----
*/
#include <sys/cdefs.h>
! __KERNEL_RCSID(0, "$NetBSD: bpf.c,v 1.90 2004/03/24 15:34:54 atatat Exp $");
#include "bpfilter.h"
***************
*** 114,119 ****
--- 114,120 ----
static void bpf_attachd __P((struct bpf_d *, struct bpf_if *));
static void bpf_detachd __P((struct bpf_d *));
static int bpf_setif __P((struct bpf_d *, struct ifreq *));
+ static void bpf_timed_out __P((void *));
static __inline void
bpf_wakeup __P((struct bpf_d *));
static void catchpacket __P((struct bpf_d *, u_char *, u_int, u_int,
***************
*** 380,385 ****
--- 381,387 ----
/* Mark "free" and do most initialization. */
memset((char *)d, 0, sizeof(*d));
d->bd_bufsize = bpf_bufsize;
+ callout_init(&d->bd_callout);
return (0);
}
***************
*** 400,405 ****
--- 402,410 ----
int s;
s = splnet();
+ if (d->bd_state == BPF_WAITING)
+ callout_stop(&d->bd_callout);
+ d->bd_state = BPF_IDLE;
if (d->bd_bif)
bpf_detachd(d);
splx(s);
***************
*** 429,434 ****
--- 434,440 ----
int ioflag;
{
struct bpf_d *d = &bpf_dtab[minor(dev)];
+ int timed_out;
int error;
int s;
***************
*** 440,456 ****
return (EINVAL);
s = splnet();
/*
* If the hold buffer is empty, then do a timed sleep, which
* ends when the timeout expires or when enough packets
* have arrived to fill the store buffer.
*/
while (d->bd_hbuf == 0) {
! if (d->bd_immediate) {
! if (d->bd_slen == 0) {
! splx(s);
! return (EWOULDBLOCK);
! }
/*
* A packet(s) either arrived since the previous
* read or arrived while we were asleep.
--- 446,462 ----
return (EINVAL);
s = splnet();
+ if (d->bd_state == BPF_WAITING)
+ callout_stop(&d->bd_callout);
+ timed_out = (d->bd_state == BPF_TIMED_OUT);
+ d->bd_state = BPF_IDLE;
/*
* If the hold buffer is empty, then do a timed sleep, which
* ends when the timeout expires or when enough packets
* have arrived to fill the store buffer.
*/
while (d->bd_hbuf == 0) {
! if ((d->bd_immediate || timed_out) && d->bd_slen != 0) {
/*
* A packet(s) either arrived since the previous
* read or arrived while we were asleep.
***************
*** 535,540 ****
--- 541,564 ----
d->bd_sel.sel_pid = 0;
}
+
+ static void
+ bpf_timed_out(arg)
+ void *arg;
+ {
+ struct bpf_d *d = (struct bpf_d *)arg;
+ int s;
+
+ s = splnet();
+ if (d->bd_state == BPF_WAITING) {
+ d->bd_state = BPF_TIMED_OUT;
+ if (d->bd_slen != 0)
+ bpf_wakeup(d);
+ }
+ splx(s);
+ }
+
+
int
bpfwrite(dev, uio, ioflag)
dev_t dev;
***************
*** 631,636 ****
--- 655,666 ----
struct bpf_insn **p;
#endif
+ s = splnet();
+ if (d->bd_state == BPF_WAITING)
+ callout_stop(&d->bd_callout);
+ d->bd_state = BPF_IDLE;
+ splx(s);
+
switch (cmd) {
default:
***************
*** 1040,1049 ****
--- 1070,1095 ----
/*
* An imitation of the FIONREAD ioctl code.
*/
+ #if 0
if (d->bd_hlen != 0 || (d->bd_immediate && d->bd_slen != 0))
revents |= events & (POLLIN | POLLRDNORM);
else
selrecord(p, &d->bd_sel);
+ #else
+ if (d->bd_hlen != 0 ||
+ ((d->bd_immediate || d->bd_state == BPF_TIMED_OUT) &&
+ d->bd_slen != 0))
+ revents |= events & (POLLIN | POLLRDNORM);
+ else {
+ selrecord(p, &d->bd_sel);
+ /* Start the read timeout if necessary */
+ if (d->bd_rtout > 0 && d->bd_state == BPF_IDLE) {
+ callout_reset(&d->bd_callout, d->bd_rtout,
+ bpf_timed_out, d);
+ d->bd_state = BPF_WAITING;
+ }
+ }
+ #endif
}
splx(s);
***************
*** 1177,1182 ****
--- 1223,1233 ----
for (m0 = m; m0 != 0; m0 = m0->m_next)
pktlen += m0->m_len;
+ if (pktlen == m->m_len) {
+ bpf_tap(arg, mtod(m, u_char *), pktlen);
+ return;
+ }
+
for (d = bp->bif_dlist; d != 0; d = d->bd_next) {
++d->bd_rcount;
slen = bpf_filter(d->bd_filter, (u_char *)m, pktlen, 0);
***************
*** 1234,1240 ****
ROTATE_BUFFERS(d);
bpf_wakeup(d);
curlen = 0;
! }
/*
* Append the bpf header.
--- 1285,1297 ----
ROTATE_BUFFERS(d);
bpf_wakeup(d);
curlen = 0;
! } else if (d->bd_immediate || d->bd_state == BPF_TIMED_OUT)
! /*
! * Immediate mode is set, or the read timeout has
! * already expired during a select call. A packet
! * arrived, so the reader should be woken up.
! */
! bpf_wakeup(d);
/*
* Append the bpf header.
***************
*** 1248,1261 ****
*/
(*cpfn)((u_char *)hp + hdrlen, pkt, (hp->bh_caplen = totlen - hdrlen));
d->bd_slen = curlen + totlen;
-
- if (d->bd_immediate) {
- /*
- * Immediate mode is set. A packet arrived so any
- * reads should be woken up.
- */
- bpf_wakeup(d);
- }
}
/*
--- 1305,1310 ----
Index: bpfdesc.h
===================================================================
RCS file: /cvsroot/src/sys/net/bpfdesc.h,v
retrieving revision 1.16
diff -c -r1.16 bpfdesc.h
*** bpfdesc.h 7 Aug 2003 16:32:48 -0000 1.16
--- bpfdesc.h 9 Apr 2004 20:17:08 -0000
***************
*** 41,46 ****
--- 41,47 ----
#ifndef _NET_BPFDESC_H_
#define _NET_BPFDESC_H_
+ #include <sys/callout.h>
#include <sys/select.h>
/*
***************
*** 85,92 ****
--- 86,100 ----
u_char bd_pad; /* explicit alignment */
struct selinfo bd_sel; /* bsd select info */
#endif
+ struct callout bd_callout; /* for BPF timeouts with select */
};
+
+ /* Values for bd_state */
+ #define BPF_IDLE 0 /* no select in progress */
+ #define BPF_WAITING 1 /* waiting for read timeout in select */
+ #define BPF_TIMED_OUT 2 /* read timeout has expired in select */
+
/*
* Descriptor associated with each attached hardware interface.
*/