Subject: Re: Take #3 - final proposed patch for ipsec/bpf/ipfilter integration
To: None <avalon@caligula.anu.edu.au>
From: YAMAMOTO Takashi <yamt@mwd.biglobe.ne.jp>
List: tech-net
Date: 05/15/2003 10:11:15
--NextPart-20030515100657-0035700
Content-Type: Text/Plain; charset=us-ascii
> > > It is not just for ipf, but that is one of the two beneficaries.
> > > The other is BPF (tcpdump.)
> >
> > tcpdump can decode ESP by itsself and i think it should if needed.
> > it already has -E option. it's a bit limited currently, though.
> And it only fixes the problem for tcpdump and not all
> applications that want to use BPF to look at unecrypted
> traffic - which I think is an unnecessary burden.
if bpf interface isn't insufficient,
why not improve/hack it to be able to have multiple tap points
like attached (incomplete) patch or invent a new packet tapping interface?
> > > but so far, after much frustration to many people, there would appear to
> > > have been no better idea ? Consider that there are two targets here that
> > > we want to fix the problem for - tcpdump et al & ipfilter. Solving the
> > > problem for ipfilter with fancy pfil stuff is not going to work for
> > > tcpdump.
> >
> > i don't think that ipfilter and tcpdump should use the same mechanism
> > to solve this "problem".
> >
> > i think that additional pfil filtering points, e.g. inet_decoded_pfil_hook,
> > is enough for ipfilter. am i missing something?
>
> Yes, you need to be able to distinguish how the rules are applied.
because ipfilter must knows which pfil_head it added hooks in,
i think that it can distinguish them, of course.
YAMAMOTO Takashi
--NextPart-20030515100657-0035700
Content-Type: text/plain; charset=us-ascii
Content-Disposition: attachment; filename="bpf.pfil.diff"
Index: net/bpf.c
===================================================================
--- net/bpf.c (revision 1)
+++ net/bpf.c (working copy)
@@ -46,6 +46,7 @@
__KERNEL_RCSID(0, "$NetBSD: bpf.c,v 1.78 2003/03/13 10:18:35 dsl Exp $");
#include "bpfilter.h"
+#include "opt_pfil_hooks.h"
#include <sys/param.h>
#include <sys/systm.h>
@@ -93,6 +94,7 @@ __KERNEL_RCSID(0, "$NetBSD: bpf.c,v 1.78
* The default read buffer size is patchable.
*/
int bpf_bufsize = BPF_BUFSIZE;
+u_long bpf_nullfitted[2] = {0,0};
/*
* bpf_iflist is the list of interfaces; each corresponds to an ifnet
@@ -118,6 +120,16 @@ static void reset_d __P((struct bpf_d *)
static int bpf_getdltlist __P((struct bpf_d *, struct bpf_dltlist *));
static int bpf_setdlt __P((struct bpf_d *, u_int));
+static void bpf_nullmtap_prep __P((caddr_t, struct mbuf *, sa_family_t,
+ void (*callback)(caddr_t, struct mbuf *)));
+#ifdef PFIL_HOOKS
+static int bpf_pfilhook __P((void *, struct mbuf **, struct ifnet *, int));
+static void bpf_mtap_single __P((caddr_t, struct mbuf *));
+#define D_PFIL(d) ((d)->bd_pfil != NULL)
+#else /* PFIL_HOOKS */
+#define D_PFIL(d) 0
+#endif /* PFIL_HOOKS */
+
dev_type_open(bpfopen);
dev_type_close(bpfclose);
dev_type_read(bpfread);
@@ -270,10 +282,15 @@ bpf_attachd(d, bp)
* it will divert packets to bpf.
*/
d->bd_bif = bp;
- d->bd_next = bp->bif_dlist;
- bp->bif_dlist = d;
-
- *bp->bif_driverp = bp;
+ if (!D_PFIL(d)) {
+ d->bd_next = bp->bif_dlist;
+ bp->bif_dlist = d;
+
+ *bp->bif_driverp = bp;
+ } else {
+ d->bd_next = bp->bif_dlist_pfil;
+ bp->bif_dlist_pfil = d;
+ }
}
/*
@@ -307,14 +324,17 @@ bpf_detachd(d)
panic("bpf: ifpromisc failed");
}
/* Remove d from the interface's descriptor list. */
- p = &bp->bif_dlist;
+ if (!D_PFIL(d))
+ p = &bp->bif_dlist;
+ else
+ p = &bp->bif_dlist_pfil;
while (*p != d) {
p = &(*p)->bd_next;
if (*p == 0)
panic("bpf_detachd: descriptor not in list");
}
*p = (*p)->bd_next;
- if (bp->bif_dlist == 0)
+ if (!D_PFIL(d) && bp->bif_dlist == 0)
/*
* Let the driver know that there are no more listeners.
*/
@@ -396,6 +416,10 @@ bpfclose(dev, flag, mode, p)
int s;
s = splnet();
+#ifdef PFIL_HOOKS
+ if (D_PFIL(d))
+ pfil_remove_hook(bpf_pfilhook, d, PFIL_IN|PFIL_OUT, d->bd_pfil);
+#endif /* PFIL_HOOKS */
if (d->bd_bif)
bpf_detachd(d);
splx(s);
@@ -751,7 +775,9 @@ bpfioctl(dev, cmd, addr, flag, p)
* Get device parameters.
*/
case BIOCGDLT:
- if (d->bd_bif == 0)
+ if (D_PFIL(d))
+ *(u_int *)addr = DLT_NULL;
+ else if (d->bd_bif == 0)
error = EINVAL;
else
*(u_int *)addr = d->bd_bif->bif_dlt;
@@ -771,7 +797,9 @@ bpfioctl(dev, cmd, addr, flag, p)
* Set device parameters.
*/
case BIOCSDLT:
- if (d->bd_bif == 0)
+ if (D_PFIL(d))
+ error = EINVAL;
+ else if (d->bd_bif == 0)
error = EINVAL;
else
error = bpf_setdlt(d, *(u_int *)addr);
@@ -948,7 +976,7 @@ bpf_setif(d, ifr)
struct ifreq *ifr;
{
struct bpf_if *bp;
- char *cp;
+ char *cp, *ifname, *prname;
int unit_seen, i, s, error;
/*
@@ -957,14 +985,30 @@ bpf_setif(d, ifr)
* XXX This is ugly ... do this differently?
*/
unit_seen = 0;
- cp = ifr->ifr_name;
- cp[sizeof(ifr->ifr_name) - 1] = '\0'; /* sanity */
- while (*cp++)
- if (*cp >= '0' && *cp <= '9')
+ ifname = ifr->ifr_name;
+ ifname[sizeof(ifr->ifr_name) - 1] = '\0'; /* sanity */
+
+ /*
+ * ifr_name can be either "prname@ifname" or "ifname".
+ */
+ cp = strrchr(ifname, '@');
+ if (cp == NULL) {
+ prname = NULL;
+ } else {
+ *(cp++) = 0;
+ prname = ifname;
+ ifname = cp;
+ }
+
+ cp = ifname;
+ while (*cp++) {
+ if (*cp >= '0' && *cp <= '9') {
unit_seen = 1;
+ }
+ }
if (!unit_seen) {
/* Make sure to leave room for the '\0'. */
- for (i = 0; i < (IFNAMSIZ - 1); ++i) {
+ for (i = ifname - ifr->ifr_name; i < (IFNAMSIZ - 1); ++i) {
if ((ifr->ifr_name[i] >= 'a' &&
ifr->ifr_name[i] <= 'z') ||
(ifr->ifr_name[i] >= 'A' &&
@@ -979,9 +1023,10 @@ bpf_setif(d, ifr)
*/
for (bp = bpf_iflist; bp != 0; bp = bp->bif_next) {
struct ifnet *ifp = bp->bif_ifp;
+ struct pfil_head *pfilp;
if (ifp == 0 ||
- strcmp(ifp->if_xname, ifr->ifr_name) != 0)
+ strcmp(ifp->if_xname, ifname) != 0)
continue;
/* skip additional entry */
if (bp->bif_driverp != (struct bpf_if **)&ifp->if_bpf)
@@ -996,6 +1041,27 @@ bpf_setif(d, ifr)
if ((ifp->if_flags & IFF_UP) == 0)
return (ENETDOWN);
+ /*
+ *
+ */
+ if (prname) {
+#ifdef PFIL_HOOKS
+ pfilp = pfil_head_get(PFIL_TYPE_PROTOCOL,
+ (u_long)prname);
+ if (pfilp == NULL)
+ return (ENXIO);
+
+ error = pfil_add_hook(bpf_pfilhook, d,
+ PFIL_IN|PFIL_OUT|PFIL_WAITOK, pfilp);
+ if (error)
+ return (error);
+
+ d->bd_pfil = pfilp; /* XXX */
+#else /* PFIL_HOOKS */
+ return (ENXIO);
+#endif /* PFIL_HOOKS */
+ }
+
if (d->bd_sbuf == 0) {
error = bpf_allocbufs(d);
if (error != 0)
@@ -1200,6 +1266,66 @@ bpf_mtap(arg, m)
}
/*
+ * Incoming linkage from DLT_NULL device drivers.
+ */
+void
+bpf_nullmtap(arg, m, family)
+ caddr_t arg;
+ struct mbuf *m;
+ sa_family_t family;
+{
+
+ bpf_nullmtap_prep(arg, m, family, bpf_mtap);
+}
+
+/*
+ * Prepend the address family before mbuf and pass it callback.
+ */
+static void
+bpf_nullmtap_prep(arg, m, family, callback)
+ caddr_t arg;
+ struct mbuf *m;
+ sa_family_t family;
+ void (*callback) __P((caddr_t, struct mbuf *));
+{
+ u_int32_t mfamily = family;
+ struct mbuf m0;
+
+ /*
+ * We need to prepend the address family as a four byte field.
+ * Cons up a dummy header to pacify bpf. This is safe because bpf
+ * will only read from the mbuf (i.e., it won't try to free it or
+ * keep a pointer to it). If possible, prepend the family before
+ * the header, in the same mbuf, for better BPF performance.
+ */
+
+ if (M_LEADINGSPACE(m) >= sizeof(sa_family_t)) {
+ bpf_nullfitted[0]++;
+ m->m_data -= sizeof(mfamily);
+ bcopy(&family, mtod(m, char *), sizeof(mfamily));
+ m->m_len += sizeof(mfamily);
+ m->m_pkthdr.len += sizeof(mfamily);
+
+ callback(arg, m);
+
+ m->m_pkthdr.len -= sizeof(mfamily);
+ m->m_len -= sizeof(mfamily);
+ m->m_data += sizeof(mfamily);
+ } else {
+ bpf_nullfitted[1]++;
+ M_COPY_PKTHDR(&m0, m);
+ m0.m_next = m;
+ m0.m_len = sizeof(mfamily);
+ m0.m_data = (char *)&mfamily;
+ m0.m_pkthdr.len += m0.m_len;
+
+ callback(arg, &m0);
+ }
+
+}
+
+
+/*
* Move the packet data from interface memory (pkt) into the
* store buffer. Return 1 if it's time to wakeup a listener (buffer full),
* otherwise 0. "copy" is the routine called to do the actual data
@@ -1387,6 +1513,12 @@ bpfdetach(ifp)
* It will be free'ed later by close routine.
*/
s = splnet();
+#ifdef PFIL_HOOKS
+ if (D_PFIL(d)) {
+ pfil_remove_hook(bpf_pfilhook, d,
+ PFIL_IN|PFIL_OUT, d->bd_pfil);
+ }
+#endif
d->bd_promisc = 0; /* we can't touch device. */
bpf_detachd(d);
splx(s);
@@ -1500,3 +1632,52 @@ bpf_setdlt(d, dlt)
splx(s);
return 0;
}
+
+#ifdef PFIL_HOOKS
+/*
+ * bpf_mtap for a single bpf device.
+ */
+static void
+bpf_mtap_single(arg, m)
+ caddr_t arg;
+ struct mbuf *m;
+{
+ struct bpf_d *d = (struct bpf_d *)arg;
+ u_int pktlen, slen;
+ struct mbuf *m0;
+
+ pktlen = 0;
+ for (m0 = m; m0 != 0; m0 = m0->m_next)
+ pktlen += m0->m_len;
+
+ ++d->bd_rcount;
+ slen = bpf_filter(d->bd_filter, (u_char *)m, pktlen, 0);
+ if (slen != 0)
+ catchpacket(d, (u_char *)m, pktlen, slen, bpf_mcpy);
+}
+
+/*
+ * kick bpf using pfil.
+ */
+static int
+bpf_pfilhook(arg, mp, ifp, dir)
+ void *arg;
+ struct mbuf **mp;
+ struct ifnet *ifp;
+ int dir;
+{
+ struct bpf_d *d = arg;
+ /*
+ * XXX pfil_run_hooks and pfil callback function prototype
+ * XXX should be changed so that af can be passed here?
+ * XXX assume AF_INET for now
+ */
+ sa_family_t af = AF_INET;
+
+ if (ifp == d->bd_bif->bif_ifp)
+ bpf_nullmtap_prep(arg, *mp, af, bpf_mtap_single);
+
+ return 0;
+}
+#endif /* PFIL_HOOKS */
+
Index: net/bpfdesc.h
===================================================================
--- net/bpfdesc.h (revision 1)
+++ net/bpfdesc.h (working copy)
@@ -89,6 +89,9 @@ struct bpf_d {
u_char bd_pad; /* explicit alignment */
struct selinfo bd_sel; /* bsd select info */
#endif
+#ifdef _KERNEL /* XXX */
+ void /*struct pfil_head*/ *bd_pfil;
+#endif
};
/*
@@ -101,6 +104,7 @@ struct bpf_if {
u_int bif_dlt; /* link layer type */
u_int bif_hdrlen; /* length of header (with padding) */
struct ifnet *bif_ifp; /* correspoding interface */
+ struct bpf_d *bif_dlist_pfil; /* list of descriptors using pfil */
};
#ifdef _KERNEL
Index: net/bpf.h
===================================================================
--- net/bpf.h (revision 1)
+++ net/bpf.h (working copy)
@@ -240,6 +240,7 @@ struct bpf_dltlist {
int bpf_validate __P((struct bpf_insn *, int));
void bpf_tap __P((caddr_t, u_char *, u_int));
void bpf_mtap __P((caddr_t, struct mbuf *));
+void bpf_nullmtap __P((caddr_t, struct mbuf *, sa_family_t));
void bpfattach __P((struct ifnet *, u_int, u_int));
void bpfattach2 __P((struct ifnet *, u_int, u_int, caddr_t *));
void bpfdetach __P((struct ifnet *));
Index: net/bpf_filter.c
===================================================================
--- net/bpf_filter.c (revision 1)
+++ net/bpf_filter.c (working copy)
@@ -52,6 +52,7 @@ static const char rcsid[] =
#include <sys/param.h>
#include <sys/time.h>
+#include <sys/socket.h>
#if !defined(UNALIGNED_ACCESS)
#define BPF_ALIGN
Index: net/pfil.c
===================================================================
--- net/pfil.c (revision 1)
+++ net/pfil.c (working copy)
@@ -83,14 +83,9 @@ pfil_run_hooks(struct pfil_head *ph, str
int
pfil_head_register(struct pfil_head *ph)
{
- struct pfil_head *lph;
- for (lph = LIST_FIRST(&pfil_head_list); lph != NULL;
- lph = LIST_NEXT(lph, ph_list)) {
- if (ph->ph_type == lph->ph_type &&
- ph->ph_un.phu_val == lph->ph_un.phu_val)
- return EEXIST;
- }
+ if (pfil_head_get(ph->ph_type, ph->ph_un.phu_val))
+ return EEXIST;
TAILQ_INIT(&ph->ph_in);
TAILQ_INIT(&ph->ph_out);
@@ -122,9 +117,15 @@ pfil_head_get(int type, u_long val)
for (ph = LIST_FIRST(&pfil_head_list); ph != NULL;
ph = LIST_NEXT(ph, ph_list)) {
- if (ph->ph_type == type &&
- ph->ph_un.phu_val == val)
- break;
+ if (ph->ph_type == type) {
+ if (PFIL_IS_STRING(type)) {
+ if (!strcmp(ph->ph_string, (char *)val))
+ break;
+ } else {
+ if (ph->ph_un.phu_val == val)
+ break;
+ }
+ }
}
return (ph);
Index: net/pfil.h
===================================================================
--- net/pfil.h (revision 1)
+++ net/pfil.h (working copy)
@@ -56,8 +56,11 @@ struct packet_filter_hook {
typedef TAILQ_HEAD(pfil_list, packet_filter_hook) pfil_list_t;
-#define PFIL_TYPE_AF 1 /* key is AF_* type */
-#define PFIL_TYPE_IFNET 2 /* key is ifnet pointer */
+#define PFIL_TYPE_STRING 0x8000 /* key is a pointer to string */
+#define PFIL_TYPE_AF (1) /* key is AF_* type */
+#define PFIL_TYPE_IFNET (2) /* key is ifnet pointer */
+#define PFIL_TYPE_PROTOCOL (3|PFIL_TYPE_STRING) /* key is protocol name */
+#define PFIL_IS_STRING(type) ((type & PFIL_TYPE_STRING) != 0)
struct pfil_head {
pfil_list_t ph_in;
@@ -66,9 +69,11 @@ struct pfil_head {
union {
u_long phu_val;
void *phu_ptr;
+ const char *phu_string;
} ph_un;
#define ph_af ph_un.phu_val
#define ph_ifnet ph_un.phu_ptr
+#define ph_string ph_un.phu_string
LIST_ENTRY(pfil_head) ph_list;
};
typedef struct pfil_head pfil_head_t;
Index: netinet/ip_input.c
===================================================================
--- netinet/ip_input.c (revision 29)
+++ netinet/ip_input.c (working copy)
@@ -226,6 +226,9 @@ u_int16_t ip_id;
#ifdef PFIL_HOOKS
struct pfil_head inet_pfil_hook;
+#ifdef IPSEC
+struct pfil_head inet_esp_pfil_hook;
+#endif
#endif
struct ipqhead ipq;
@@ -371,6 +374,14 @@ ip_init()
if (i != 0)
printf("ip_init: WARNING: unable to register pfil hook, "
"error %d\n", i);
+#ifdef IPSEC
+ inet_esp_pfil_hook.ph_type = PFIL_TYPE_PROTOCOL;
+ inet_esp_pfil_hook.ph_string = "esp";
+ i = pfil_head_register(&inet_esp_pfil_hook);
+ if (i != 0)
+ printf("ip_init: WARNING: unable to register pfil hook "
+ "for ESP, error %d\n", i);
+#endif
#endif /* PFIL_HOOKS */
#ifdef INET_CSUM_COUNTERS
@@ -570,19 +581,16 @@ ip_input(struct mbuf *m)
* Note that filters must _never_ set this flag, as another filter
* in the list may have previously cleared it.
*/
- /*
- * let ipfilter look at packet on the wire,
- * not the decapsulated packet.
- */
-#ifdef IPSEC
- if (!ipsec_getnhist(m))
-#else
- if (1)
-#endif
{
- if (pfil_run_hooks(&inet_pfil_hook, &m, m->m_pkthdr.rcvif,
- PFIL_IN) != 0)
- return;
+ struct pfil_head *pfilp;
+#ifdef IPSEC
+ if (ipsec_getnhist(m))
+ pfilp = &inet_esp_pfil_hook;
+ else
+#endif /* IPSEC */
+ pfilp = &inet_pfil_hook;
+ if (pfil_run_hooks(pfilp, &m, m->m_pkthdr.rcvif, PFIL_IN) != 0)
+ return;
if (m == NULL)
return;
ip = mtod(m, struct ip *);
Index: netinet6/esp_input.c
===================================================================
--- netinet6/esp_input.c (revision 1)
+++ netinet6/esp_input.c (working copy)
@@ -38,6 +38,7 @@
__KERNEL_RCSID(0, "$NetBSD: esp_input.c,v 1.28 2003/01/20 00:39:30 simonb Exp $");
#include "opt_inet.h"
+#include "opt_pfil_hooks.h"
#include <sys/param.h>
#include <sys/systm.h>
@@ -89,6 +90,10 @@ __KERNEL_RCSID(0, "$NetBSD: esp_input.c,
? sizeof(struct newesp) : sizeof(struct esp))
#ifdef INET
+#ifdef PFIL_HOOKS
+extern struct pfil_head inet_esp_pfil_hook; /* XXX */
+#endif /* PFIL_HOOKS */
+
void
#if __STDC__
esp4_input(struct mbuf *m, ...)
@@ -393,6 +398,15 @@ noreplaycheck:
splx(s);
goto bad;
}
+
+#ifdef PFIL_HOOKS
+ if (pfil_run_hooks(&inet_esp_pfil_hook, &m, m->m_pkthdr.rcvif,
+ PFIL_IN) != 0 || m == NULL) {
+ splx(s);
+ goto bad;
+ }
+#endif /* PFIL_HOOKS */
+
IF_ENQUEUE(&ipintrq, m);
m = NULL;
schednetisr(NETISR_IP); /* can be skipped but to make sure */
@@ -428,6 +442,13 @@ noreplaycheck:
goto bad;
}
+#ifdef PFIL_HOOKS
+ if (pfil_run_hooks(&inet_esp_pfil_hook, &m, m->m_pkthdr.rcvif,
+ PFIL_IN) != 0 || m == NULL) {
+ goto bad;
+ }
+#endif /* PFIL_HOOKS */
+
if (nxt != IPPROTO_DONE) {
if ((inetsw[ip_protox[nxt]].pr_flags & PR_LASTHDR) != 0 &&
ipsec4_in_reject(m, NULL)) {
@@ -516,6 +537,10 @@ esp4_ctlinput(cmd, sa, v)
#endif /* INET */
#ifdef INET6
+#ifdef PFIL_HOOKS
+extern struct pfil_head inet6_esp_pfil_hook; /* XXX */
+#endif /* PFIL_HOOKS */
+
int
esp6_input(mp, offp, proto)
struct mbuf **mp;
@@ -814,6 +839,15 @@ noreplaycheck:
splx(s);
goto bad;
}
+
+#ifdef PFIL_HOOKS
+ if (pfil_run_hooks(&inet6_esp_pfil_hook, &m, m->m_pkthdr.rcvif,
+ PFIL_IN) != 0 || m == NULL) {
+ splx(s);
+ goto bad;
+ }
+#endif /* PFIL_HOOKS */
+
IF_ENQUEUE(&ip6intrq, m);
m = NULL;
schednetisr(NETISR_IPV6); /* can be skipped but to make sure */
@@ -914,6 +948,13 @@ noreplaycheck:
ipsec6stat.in_nomem++;
goto bad;
}
+
+#ifdef PFIL_HOOKS
+ if (pfil_run_hooks(&inet6_esp_pfil_hook, &m, m->m_pkthdr.rcvif,
+ PFIL_IN) != 0 || m == NULL) {
+ goto bad;
+ }
+#endif /* PFIL_HOOKS */
}
*offp = off;
Index: netinet6/ip6_input.c
===================================================================
--- netinet6/ip6_input.c (revision 1)
+++ netinet6/ip6_input.c (working copy)
@@ -142,6 +142,9 @@ int ip6_sourcecheck_interval; /* XXX */
#ifdef PFIL_HOOKS
struct pfil_head inet6_pfil_hook;
+#ifdef IPSEC
+struct pfil_head inet6_esp_pfil_hook;
+#endif
#endif
struct ip6stat ip6stat;
@@ -185,6 +188,14 @@ ip6_init()
if (i != 0)
printf("ip6_init: WARNING: unable to register pfil hook, "
"error %d\n", i);
+#ifdef IPSEC
+ inet6_esp_pfil_hook.ph_type = PFIL_TYPE_PROTOCOL;
+ inet6_esp_pfil_hook.ph_string = "esp6";
+ i = pfil_head_register(&inet6_esp_pfil_hook);
+ if (i != 0)
+ printf("ip6_init: WARNING: unable to register pfil hook "
+ "for ESP, error %d\n", i);
+#endif
#endif /* PFIL_HOOKS */
}
@@ -309,16 +320,15 @@ ip6_input(m)
* Note that filters must _never_ set this flag, as another filter
* in the list may have previously cleared it.
*/
- /*
- * let ipfilter look at packet on the wire,
- * not the decapsulated packet.
- */
+ {
+ struct pfil_head *pfilp;
#ifdef IPSEC
- if (!ipsec_getnhist(m))
-#else
- if (1)
+ if (!ipsec_getnhist(m))
+ pfilp = &inet6_esp_pfil_hook;
+ else
#endif
- {
+ pfilp = &inet6_pfil_hook;
+
if (pfil_run_hooks(&inet6_pfil_hook, &m, m->m_pkthdr.rcvif,
PFIL_IN) != 0)
return;
--NextPart-20030515100657-0035700--