Subject: kern/18770: in_multiaddrs should be hashed instead of chained on in_ifaddr
To: None <gnats-bugs@gnats.netbsd.org>
From: Nick <naamato@nexthop.com>
List: netbsd-bugs
Date: 10/22/2002 15:37:59
>Number: 18770
>Category: kern
>Synopsis: in_multiaddrs should be hashed instead of chained on in_ifaddr
>Confidential: no
>Severity: non-critical
>Priority: low
>Responsible: kern-bug-people
>State: open
>Class: change-request
>Submitter-Id: net
>Arrival-Date: Tue Oct 22 12:39:00 PDT 2002
>Closed-Date:
>Last-Modified:
>Originator: Nick Amato
>Release: current 20021022 <NetBSD-current source date>
>Organization:
NextHop Technologies
>Environment:
all
System: all
>Description:
The current IPv4 multicast group code has a few disadvantages:
1) The requirement of kludge code to stash outstanding multicast group
joins when their parent in_ifaddr goes away.
2) Poor scalability: the groups are stored in simple linked list.
3) Inability to join multicast groups on interfaces without an IPv4 address.
Given that ip_multicast_if() can select an interface using the
lower 24 bits of the arg to setsockopt(IP_ADD_MEMBERSHIP) and
friends, it makes sense to allow receipt of multicast datagrams on
interfaces without an address.
A solution is to hash these addresses just like other IPv4 addresses,
in their own hash table. The attached patch accomplishes this. All
three problems above are fixed; the kludge code goes away, the multicast
address lookups are typically faster, and groups may be joined on unnumbered
interfaces.
I have tested it and it seems to work fine, however, suggestions
are welcome.
>How-To-Repeat:
ifconfig gif0 create tunnel x.x.x.x y.y.y.y up
setsockopt(IP_ADD_MEMBERSHIP) -> in_addmulti() fails
>Fix:
Index: in.c
===================================================================
RCS file: /cvsroot/syssrc/sys/netinet/in.c,v
retrieving revision 1.81
diff -u -r1.81 in.c
--- in.c 2002/10/22 02:28:47 1.81
+++ in.c 2002/10/22 18:26:48
@@ -152,13 +152,6 @@
int hostzeroisbroadcast = HOSTZEROBROADCAST;
/*
- * This list is used to keep track of in_multi chains which belong to
- * deleted interface addresses. We use in_ifaddr so that a chain head
- * won't be deallocated until all multicast address record are deleted.
- */
-static TAILQ_HEAD(, in_ifaddr) in_mk = TAILQ_HEAD_INITIALIZER(in_mk);
-
-/*
* Return 1 if an internet address is for a ``local'' host
* (one to which we have a connection). If subnetsarelocal
* is true, this includes other subnets of the local net.
@@ -400,7 +393,6 @@
ia->ia_broadaddr.sin_family = AF_INET;
}
ia->ia_ifp = ifp;
- LIST_INIT(&ia->ia_multiaddrs);
}
break;
@@ -551,13 +543,6 @@
TAILQ_REMOVE(&in_ifaddr, ia, ia_list);
if (ia->ia_allhosts != NULL)
in_delmulti(ia->ia_allhosts);
- if (LIST_FIRST(&ia->ia_multiaddrs) != NULL &&
- /*
- * If the interface is going away, don't bother to save
- * the multicast entries.
- */
- ifp->if_output != if_nulloutput)
- in_savemkludge(ia);
IFAFREE(&ia->ia_ifa);
in_setmaxmtu();
}
@@ -574,7 +559,6 @@
continue;
in_purgeaddr(ifa, ifp);
}
- in_purgemkludge(ifp);
}
/*
@@ -854,11 +838,7 @@
flags |= RTF_HOST;
}
error = in_addprefix(ia, flags);
- /*
- * recover multicast kludge entry, if there is.
- */
- if (ifp->if_flags & IFF_MULTICAST)
- in_restoremkludge(ia, ifp);
+
/*
* If the interface supports multicast, join the "all hosts"
* multicast group on that interface.
@@ -1033,91 +1013,6 @@
}
/*
- * Multicast address kludge:
- * If there were any multicast addresses attached to this interface address,
- * either move them to another address on this interface, or save them until
- * such time as this interface is reconfigured for IPv4.
- */
-void
-in_savemkludge(oia)
- struct in_ifaddr *oia;
-{
- struct in_ifaddr *ia;
- struct in_multi *inm, *next;
-
- IFP_TO_IA(oia->ia_ifp, ia);
- if (ia) { /* there is another address */
- for (inm = LIST_FIRST(&oia->ia_multiaddrs); inm; inm = next){
- next = LIST_NEXT(inm, inm_list);
- LIST_REMOVE(inm, inm_list);
- IFAFREE(&inm->inm_ia->ia_ifa);
- IFAREF(&ia->ia_ifa);
- inm->inm_ia = ia;
- LIST_INSERT_HEAD(&ia->ia_multiaddrs, inm, inm_list);
- }
- } else { /* last address on this if deleted, save */
- TAILQ_INSERT_TAIL(&in_mk, oia, ia_list);
- IFAREF(&oia->ia_ifa);
- }
-}
-
-/*
- * Continuation of multicast address hack:
- * If there was a multicast group list previously saved for this interface,
- * then we re-attach it to the first address configured on the i/f.
- */
-void
-in_restoremkludge(ia, ifp)
- struct in_ifaddr *ia;
- struct ifnet *ifp;
-{
- struct in_ifaddr *oia;
-
- for (oia = TAILQ_FIRST(&in_mk); oia != NULL;
- oia = TAILQ_NEXT(oia, ia_list)) {
- if (oia->ia_ifp == ifp) {
- struct in_multi *inm, *next;
-
- for (inm = LIST_FIRST(&oia->ia_multiaddrs);
- inm != NULL; inm = next) {
- next = LIST_NEXT(inm, inm_list);
- LIST_REMOVE(inm, inm_list);
- IFAFREE(&inm->inm_ia->ia_ifa);
- IFAREF(&ia->ia_ifa);
- inm->inm_ia = ia;
- LIST_INSERT_HEAD(&ia->ia_multiaddrs,
- inm, inm_list);
- }
- TAILQ_REMOVE(&in_mk, oia, ia_list);
- IFAFREE(&oia->ia_ifa);
- break;
- }
- }
-}
-
-void
-in_purgemkludge(ifp)
- struct ifnet *ifp;
-{
- struct in_ifaddr *oia;
-
- for (oia = TAILQ_FIRST(&in_mk); oia != NULL;
- oia = TAILQ_NEXT(oia, ia_list)) {
- if (oia->ia_ifp != ifp)
- continue;
-
- /*
- * Leaving from all multicast groups joined through
- * this interface is done via in_pcbpurgeif().
- */
-
- TAILQ_REMOVE(&in_mk, oia, ia_list);
- IFAFREE(&oia->ia_ifa);
- break;
- }
-}
-
-/*
* Add an address to the list of IP multicast addresses for a given interface.
*/
struct in_multi *
@@ -1127,7 +1022,6 @@
{
struct in_multi *inm;
struct ifreq ifr;
- struct in_ifaddr *ia;
int s = splsoftnet();
/*
@@ -1142,7 +1036,7 @@
} else {
/*
* New address; allocate a new multicast record
- * and link it into the interface's multicast list.
+ * and link it into the multicast hash table
*/
inm = (struct in_multi *)malloc(sizeof(*inm),
M_IPMADDR, M_NOWAIT);
@@ -1153,15 +1047,10 @@
inm->inm_addr = *ap;
inm->inm_ifp = ifp;
inm->inm_refcount = 1;
- IFP_TO_IA(ifp, ia);
- if (ia == NULL) {
- free(inm, M_IPMADDR);
- splx(s);
- return (NULL);
- }
- inm->inm_ia = ia;
- IFAREF(&inm->inm_ia->ia_ifa);
- LIST_INSERT_HEAD(&ia->ia_multiaddrs, inm, inm_list);
+
+ LIST_INSERT_HEAD(&IN_MADDR_HASH(inm->inm_addr.s_addr),
+ inm, inm_hash);
+
/*
* Ask the network driver to update its multicast reception
* filter appropriately for the new address.
@@ -1171,7 +1060,7 @@
satosin(&ifr.ifr_addr)->sin_addr = *ap;
if ((ifp->if_ioctl == NULL) ||
(*ifp->if_ioctl)(ifp, SIOCADDMULTI,(caddr_t)&ifr) != 0) {
- LIST_REMOVE(inm, inm_list);
+ LIST_REMOVE(inm, inm_hash);
free(inm, M_IPMADDR);
splx(s);
return (NULL);
@@ -1204,8 +1093,7 @@
/*
* Unlink from list.
*/
- LIST_REMOVE(inm, inm_list);
- IFAFREE(&inm->inm_ia->ia_ifa);
+ LIST_REMOVE(inm, inm_hash);
/*
* Notify the network driver to update its multicast reception
* filter.
Index: in_var.h
===================================================================
RCS file: /cvsroot/syssrc/sys/netinet/in_var.h,v
retrieving revision 1.44
diff -u -r1.44 in_var.h
--- in_var.h 2002/05/12 20:33:50 1.44
+++ in_var.h 2002/10/22 18:26:48
@@ -99,7 +99,6 @@
struct sockaddr_in ia_dstaddr; /* reserve space for broadcast addr */
#define ia_broadaddr ia_dstaddr
struct sockaddr_in ia_sockmask; /* reserve space for general netmask */
- LIST_HEAD(, in_multi) ia_multiaddrs; /* list of multicast addresses */
struct in_multi *ia_allhosts; /* multicast address record for
the allhosts multicast group */
};
@@ -122,6 +121,9 @@
#ifndef IN_IFADDR_HASH_SIZE
#define IN_IFADDR_HASH_SIZE 509 /* 61, 127, 251, 509, 1021, 2039 are good */
#endif
+#ifndef IN_MADDR_HASH_SIZE
+#define IN_MADDR_HASH_SIZE 509 /* 61, 127, 251, 509, 1021, 2039 are good */
+#endif
/*
* This is a bit unconventional, and wastes a little bit of space, but
@@ -130,15 +132,22 @@
*/
#define IN_IFADDR_HASH(x) in_ifaddrhashtbl[(u_long)(x) % IN_IFADDR_HASH_SIZE]
+#define IN_MADDR_HASH(x) in_maddrhashtbl[(u_long)(x) % IN_MADDR_HASH_SIZE]
-LIST_HEAD(in_ifaddrhashhead, in_ifaddr); /* Type of the hash head */
-TAILQ_HEAD(in_ifaddrhead, in_ifaddr); /* Type of the list head */
+/*
+ * types for multicast and interface address hashes and list heads
+ */
+LIST_HEAD(in_ifaddrhashhead, in_ifaddr);
+TAILQ_HEAD(in_ifaddrhead, in_ifaddr);
+LIST_HEAD(in_maddrhashhead, in_multi);
extern u_long in_ifaddrhash; /* size of hash table - 1 */
-extern int in_ifaddrentries; /* total number of addrs */
extern struct in_ifaddrhashhead *in_ifaddrhashtbl; /* Hash table head */
extern struct in_ifaddrhead in_ifaddr; /* List head (in ip_input) */
+extern u_long in_maddrhash; /* size of hash table - 1 */
+extern struct in_maddrhashhead *in_maddrhashtbl; /* Hash table head */
+
extern struct ifqueue ipintrq; /* ip packet input queue */
extern const int inetctlerrmap[];
@@ -228,10 +237,9 @@
struct in_multi {
struct in_addr inm_addr; /* IP multicast address */
struct ifnet *inm_ifp; /* back pointer to ifnet */
- struct in_ifaddr *inm_ia; /* back pointer to in_ifaddr */
u_int inm_refcount; /* no. membership claims by sockets */
u_int inm_timer; /* IGMP membership report timer */
- LIST_ENTRY(in_multi) inm_list; /* list of multicast addresses */
+ LIST_ENTRY(in_multi) inm_hash; /* list of multicast addresses */
u_int inm_state; /* state of membership */
struct router_info *inm_rti; /* router version info */
};
@@ -242,7 +250,7 @@
* all of the in_multi records.
*/
struct in_multistep {
- struct in_ifaddr *i_ia;
+ struct in_maddrhashhead *i_hh;
struct in_multi *i_inm;
};
@@ -255,16 +263,11 @@
/* struct ifnet *ifp; */ \
/* struct in_multi *inm; */ \
{ \
- struct in_ifaddr *ia; \
-\
- IFP_TO_IA((ifp), ia); /* multicast */ \
- if (ia == NULL) \
- (inm) = NULL; \
- else \
- for ((inm) = LIST_FIRST(&ia->ia_multiaddrs); \
- (inm) != NULL && !in_hosteq((inm)->inm_addr, (addr)); \
- (inm) = LIST_NEXT((inm), inm_list)) \
- continue; \
+ LIST_FOREACH(inm, &IN_MADDR_HASH((addr).s_addr), inm_hash) { \
+ if (in_hosteq((inm)->inm_addr, (addr)) && \
+ (inm)->inm_ifp == ifp) \
+ break; \
+ } \
}
/*
@@ -279,13 +282,13 @@
/* struct in_multi *inm; */ \
{ \
if (((inm) = (step).i_inm) != NULL) \
- (step).i_inm = LIST_NEXT((inm), inm_list); \
+ (step).i_inm = LIST_NEXT((inm), inm_hash); \
else \
- while ((step).i_ia != NULL) { \
- (inm) = LIST_FIRST(&(step).i_ia->ia_multiaddrs); \
- (step).i_ia = TAILQ_NEXT((step).i_ia, ia_list); \
+ while ((step).i_hh != in_maddrhashtbl + IN_MADDR_HASH_SIZE) { \
+ (inm) = LIST_FIRST((step).i_hh); \
+ (step).i_hh++; \
if ((inm) != NULL) { \
- (step).i_inm = LIST_NEXT((inm), inm_list); \
+ (step).i_inm = LIST_NEXT((inm), inm_hash); \
break; \
} \
} \
@@ -295,7 +298,7 @@
/* struct in_multistep step; */ \
/* struct in_multi *inm; */ \
{ \
- (step).i_ia = TAILQ_FIRST(&in_ifaddr); \
+ (step).i_hh = &in_maddrhashtbl[0]; \
(step).i_inm = NULL; \
IN_NEXT_MULTI((step), (inm)); \
}
@@ -304,9 +307,6 @@
int in_ifinit __P((struct ifnet *,
struct in_ifaddr *, struct sockaddr_in *, int));
-void in_savemkludge __P((struct in_ifaddr *));
-void in_restoremkludge __P((struct in_ifaddr *, struct ifnet *));
-void in_purgemkludge __P((struct ifnet *));
struct in_multi *in_addmulti __P((struct in_addr *, struct ifnet *));
void in_delmulti __P((struct in_multi *));
void in_ifscrub __P((struct ifnet *, struct in_ifaddr *));
Index: ip_input.c
===================================================================
RCS file: /cvsroot/syssrc/sys/netinet/ip_input.c,v
retrieving revision 1.158
diff -u -r1.158 ip_input.c
--- ip_input.c 2002/09/23 13:43:27 1.158
+++ ip_input.c 2002/10/22 18:26:48
@@ -202,9 +202,10 @@
extern struct domain inetdomain;
int ipqmaxlen = IFQ_MAXLEN;
u_long in_ifaddrhash; /* size of hash table - 1 */
-int in_ifaddrentries; /* total number of addrs */
+u_long in_maddrhash; /* size of hash table - 1 */
struct in_ifaddrhead in_ifaddr;
struct in_ifaddrhashhead *in_ifaddrhashtbl;
+struct in_maddrhashhead *in_maddrhashtbl;
struct ifqueue ipintrq;
struct ipstat ipstat;
u_int16_t ip_id;
@@ -338,6 +339,8 @@
TAILQ_INIT(&in_ifaddr);
in_ifaddrhashtbl = hashinit(IN_IFADDR_HASH_SIZE, HASH_LIST, M_IFADDR,
M_WAITOK, &in_ifaddrhash);
+ in_maddrhashtbl = hashinit(IN_MADDR_HASH_SIZE, HASH_LIST, M_IPMADDR,
+ M_WAITOK, &in_maddrhash);
if (ip_mtudisc != 0)
ip_mtudisc_timeout_q =
rt_timer_queue_create(ip_mtudisc_timeout);
>Release-Note:
>Audit-Trail:
>Unformatted: