tech-net archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

Re: Plan for improving IP_PKTINFO socket option handling



Tom Ivar Helbekkmo <tih%hamartun.priv.no@localhost> writes:

> Christos Zoulas <christos%astron.com@localhost> writes:
>
>> This came up in a different discussion; we should pass the size around...
>
> That would be nice.  It would also make a lot of sense: after using the
> size to detect client expectations while setting options, I spent a lot
> of time trying to find out why my code was failing in the getting part;
> I really expected the circumstances to be equivalent.

Thinking about it, I put back the code that returns what Linux or
Solaris compatible code expects, depending on data size, and just added
a fallback to a Linux (and current NetBSD) compatible value if the size
is unknown (as it is now), or, in the future, if the calling application
specifies a receiving buffer that doesn't match either data item.

Look for the "XXX" below to see it.

-tih

Index: sys/netinet/in.h
===================================================================
RCS file: /cvsroot/src/sys/netinet/in.h,v
retrieving revision 1.101
diff -u -u -r1.101 in.h
--- sys/netinet/in.h	10 Aug 2017 04:31:58 -0000	1.101
+++ sys/netinet/in.h	31 Dec 2017 19:06:46 -0000
@@ -289,8 +289,10 @@
 #define	IP_IPSEC_POLICY		22   /* struct; get/set security policy */
 #define	IP_RECVTTL		23   /* bool; receive IP TTL w/dgram */
 #define	IP_MINTTL		24   /* minimum TTL for packet or drop */
-#define	IP_PKTINFO		25   /* int; send interface and src addr */
-#define	IP_RECVPKTINFO		26   /* int; send interface and dst addr */
+#define	IP_PKTINFO		25   /* struct; set default src if/addr */
+#define	IP_RECVPKTINFO		26   /* int; receive dst if/addr w/dgram */
+
+#define IP_SENDSRCADDR IP_RECVDSTADDR /* FreeBSD compatibility */
 
 /*
  * Information sent in the control message of a datagram socket for
@@ -301,6 +303,8 @@
 	unsigned int ipi_ifindex;	/* interface index */
 };
 
+#define ipi_spec_dst ipi_addr	/* Solaris/Linux compatibility */
+
 /*
  * Defaults and limits for options
  */
Index: sys/netinet/in_pcb.c
===================================================================
RCS file: /cvsroot/src/sys/netinet/in_pcb.c,v
retrieving revision 1.180
diff -u -u -r1.180 in_pcb.c
--- sys/netinet/in_pcb.c	15 Dec 2017 04:03:46 -0000	1.180
+++ sys/netinet/in_pcb.c	31 Dec 2017 19:06:46 -0000
@@ -204,6 +204,7 @@
 	inp->inp_errormtu = -1;
 	inp->inp_portalgo = PORTALGO_DEFAULT;
 	inp->inp_bindportonsend = false;
+	inp->inp_prefsrcip.s_addr = INADDR_ANY;
 #if defined(IPSEC)
 	if (ipsec_enabled) {
 		int error = ipsec_init_pcbpolicy(so, &inp->inp_sp);
Index: sys/netinet/in_pcb.h
===================================================================
RCS file: /cvsroot/src/sys/netinet/in_pcb.h,v
retrieving revision 1.64
diff -u -u -r1.64 in_pcb.h
--- sys/netinet/in_pcb.h	10 Aug 2017 04:31:58 -0000	1.64
+++ sys/netinet/in_pcb.h	31 Dec 2017 19:06:46 -0000
@@ -95,6 +95,7 @@
 	int	  inp_errormtu;		/* MTU of last xmit status = EMSGSIZE */
 	uint8_t	  inp_ip_minttl;
 	bool      inp_bindportonsend;
+	struct    in_addr inp_prefsrcip; /* preferred src IP when wild  */
 };
 
 #define	inp_faddr	inp_ip.ip_dst
@@ -121,11 +122,9 @@
 					 * Cancels INP_HDRINCL.
 					 */
 #define	INP_RECVTTL		0x0800	/* receive incoming IP TTL */
-#define	INP_PKTINFO		0x1000	/* receive dst packet info */
-#define	INP_RECVPKTINFO		0x2000	/* receive dst packet info */
+#define	INP_RECVPKTINFO		0x1000	/* receive IP dst if/addr */
 #define	INP_CONTROLOPTS		(INP_RECVOPTS|INP_RECVRETOPTS|INP_RECVDSTADDR|\
-				INP_RECVIF|INP_RECVTTL|INP_RECVPKTINFO|\
-				INP_PKTINFO)
+				 INP_RECVIF|INP_RECVTTL|INP_RECVPKTINFO)
 
 #define	sotoinpcb(so)		((struct inpcb *)(so)->so_pcb)
 #define	inp_lock(inp)		solock((inp)->inp_socket)
Index: sys/netinet/ip_input.c
===================================================================
RCS file: /cvsroot/src/sys/netinet/ip_input.c,v
retrieving revision 1.363
diff -u -u -r1.363 ip_input.c
--- sys/netinet/ip_input.c	24 Nov 2017 14:03:25 -0000	1.363
+++ sys/netinet/ip_input.c	31 Dec 2017 19:06:46 -0000
@@ -1533,15 +1533,6 @@
 
 	if (inpflags & INP_RECVPKTINFO) {
 		struct in_pktinfo ipi;
-		ipi.ipi_addr = ip->ip_src;
-		ipi.ipi_ifindex = ifp->if_index;
-		*mp = sbcreatecontrol(&ipi,
-		    sizeof(ipi), IP_RECVPKTINFO, IPPROTO_IP);
-		if (*mp)
-			mp = &(*mp)->m_next;
-	}
-	if (inpflags & INP_PKTINFO) {
-		struct in_pktinfo ipi;
 		ipi.ipi_addr = ip->ip_dst;
 		ipi.ipi_ifindex = ifp->if_index;
 		*mp = sbcreatecontrol(&ipi,
Index: sys/netinet/ip_output.c
===================================================================
RCS file: /cvsroot/src/sys/netinet/ip_output.c,v
retrieving revision 1.288
diff -u -u -r1.288 ip_output.c
--- sys/netinet/ip_output.c	22 Dec 2017 11:22:37 -0000	1.288
+++ sys/netinet/ip_output.c	31 Dec 2017 19:06:46 -0000
@@ -1081,6 +1081,7 @@
 	struct ip *ip = &inp->inp_ip;
 	int inpflags = inp->inp_flags;
 	int optval = 0, error = 0;
+	struct in_pktinfo pktinfo;
 
 	KASSERT(solocked(so));
 
@@ -1103,7 +1104,6 @@
 		case IP_TOS:
 		case IP_TTL:
 		case IP_MINTTL:
-		case IP_PKTINFO:
 		case IP_RECVOPTS:
 		case IP_RECVRETOPTS:
 		case IP_RECVDSTADDR:
@@ -1135,10 +1135,6 @@
 	else \
 		inpflags &= ~bit;
 
-			case IP_PKTINFO:
-				OPTSET(INP_PKTINFO);
-				break;
-
 			case IP_RECVOPTS:
 				OPTSET(INP_RECVOPTS);
 				break;
@@ -1163,6 +1159,43 @@
 				OPTSET(INP_RECVTTL);
 				break;
 			}
+			break;
+		case IP_PKTINFO:
+			error = sockopt_getint(sopt, &optval);
+			if (!error) {
+				/* Linux compatibility */
+				OPTSET(INP_RECVPKTINFO);
+				break;
+			}
+			error = sockopt_get(sopt, &pktinfo, sizeof(struct in_pktinfo));
+			if (error)
+				break;
+			/* Solaris compatibility */
+			if (pktinfo.ipi_ifindex) {
+				struct ifnet *ifp;
+				struct in_ifaddr *ia;
+				int s;
+
+				/* pick up primary address */
+				s = pserialize_read_enter();
+				ifp = if_byindex(pktinfo.ipi_ifindex);
+				if (ifp == NULL) {
+					pserialize_read_exit(s);
+					error = EADDRNOTAVAIL;
+					break;
+				}
+				ia = in_get_ia_from_ifp(ifp);
+				if (ia == NULL) {
+					pserialize_read_exit(s);
+					error = EADDRNOTAVAIL;
+					break;
+				}
+				inp->inp_prefsrcip = IA_SIN(ia)->sin_addr;
+				pserialize_read_exit(s);
+			} else {
+				inp->inp_prefsrcip = pktinfo.ipi_addr;
+			}
+			break;
 		break;
 #undef OPTSET
 
@@ -1239,7 +1272,6 @@
 			}
 			break;
 		}
-		case IP_PKTINFO:
 		case IP_TOS:
 		case IP_TTL:
 		case IP_MINTTL:
@@ -1269,10 +1301,6 @@
 
 #define	OPTBIT(bit)	(inpflags & bit ? 1 : 0)
 
-			case IP_PKTINFO:
-				optval = OPTBIT(INP_PKTINFO);
-				break;
-
 			case IP_RECVOPTS:
 				optval = OPTBIT(INP_RECVOPTS);
 				break;
@@ -1300,6 +1328,28 @@
 			error = sockopt_setint(sopt, optval);
 			break;
 
+		case IP_PKTINFO:
+			/* XXX these tests fail until size gets propagated */
+			/* It needs to be passed through from the caller */
+			if (sopt->sopt_size == sizeof(int)) {
+				/* Linux compatibility */
+				optval = OPTBIT(INP_RECVPKTINFO);
+				error = sockopt_setint(sopt, optval);
+			} else if (sopt->sopt_size == sizeof(struct in_pktinfo)) {
+				/* Solaris compatibility */
+				struct in_pktinfo ipiopt;
+				ipiopt.ipi_ifindex = 0;
+				ipiopt.ipi_addr = inp->inp_prefsrcip;
+				error = sockopt_set(sopt, &ipiopt, sizeof(ipiopt));
+			} else {
+				/* While size is stuck at 0, and, later, if the */
+				/* caller doesn't use an exactly sized recipient */
+				/* for the data, default to Linux compatibility */
+				optval = OPTBIT(INP_RECVPKTINFO);
+				error = sockopt_setint(sopt, optval);
+			}
+			break;
+
 #if 0	/* defined(IPSEC) */
 		case IP_IPSEC_POLICY:
 		{
@@ -1416,11 +1466,14 @@
     struct inpcb *inp, kauth_cred_t cred)
 {
 	struct cmsghdr *cm;
-	struct in_pktinfo *pktinfo;
+	struct in_pktinfo pktinfo;
 	int error;
 
 	pktopts->ippo_imo = inp->inp_moptions;
-	sockaddr_in_init(&pktopts->ippo_laddr, &inp->inp_laddr, 0);
+	if (!in_nullhost(inp->inp_prefsrcip))
+		sockaddr_in_init(&pktopts->ippo_laddr, &inp->inp_prefsrcip, 0);
+	else
+		sockaddr_in_init(&pktopts->ippo_laddr, &inp->inp_laddr, 0);
 
 	if (control == NULL)
 		return 0;
@@ -1446,13 +1499,22 @@
 
 		switch (cm->cmsg_type) {
 		case IP_PKTINFO:
-			if (cm->cmsg_len != CMSG_LEN(sizeof(struct in_pktinfo)))
+			if (cm->cmsg_len != CMSG_LEN(sizeof(pktinfo)))
 				return EINVAL;
-
-			pktinfo = (struct in_pktinfo *)CMSG_DATA(cm);
-			error = ip_pktinfo_prepare(pktinfo, pktopts, flags,
+			memcpy(&pktinfo, CMSG_DATA(cm), sizeof(pktinfo));
+			error = ip_pktinfo_prepare(&pktinfo, pktopts, flags,
 			    cred);
-			if (error != 0)
+			if (error)
+				return error;
+			break;
+		case IP_SENDSRCADDR: /* FreeBSD compatibility */
+			if (cm->cmsg_len != CMSG_LEN(sizeof(struct in_addr)))
+				return EINVAL;
+			pktinfo.ipi_ifindex = 0;
+			pktinfo.ipi_addr = ((struct in_pktinfo *)CMSG_DATA(cm))->ipi_addr;
+			error = ip_pktinfo_prepare(&pktinfo, pktopts, flags,
+			    cred);
+			if (error)
 				return error;
 			break;
 		default:
Index: share/man/man4/ip.4
===================================================================
RCS file: /cvsroot/src/share/man/man4/ip.4,v
retrieving revision 1.40
diff -u -u -r1.40 ip.4
--- share/man/man4/ip.4	13 Aug 2017 18:19:44 -0000	1.40
+++ share/man/man4/ip.4	31 Dec 2017 19:06:49 -0000
@@ -96,8 +96,8 @@
 .Ed
 .Pp
 The
-.Dv IP_PKTINFO
-option can be used to turn on receiving of information about the source
+.Dv IP_RECVPKTINFO
+option can be used to turn on receiving of information about the destination
 address of the packet, and the interface index.
 The information is passed in a
 .Vt struct in_pktinfo
@@ -117,13 +117,24 @@
 .Pp
 For
 .Xr sendmsg 2 ,
-the source address or output interface can be specified by adding
+the source address or output interface can be specified by adding an
 .Dv IP_PKTINFO
-to the control part of the message on a
+message to the control part of the message on a
 .Dv SOCK_DGRAM
 or
 .Dv SOCK_RAW
-socket.
+socket.  Setting ipi_ifindex will cause the primary address of that
+interface to be used; setting ipi_addr will directly choose that address.
+The IP_PKTINFO cmsghdr structure from a received message may be used
+unchanged, in which case the outgoing message will be sent from the
+address the incoming message was received on.
+.Pp
+Setting the
+.Dv IP_PKTINFO
+option on a socket, with the same
+.Vt struct in_pktinfo
+structure, will set the default source address to be used until set
+again, unless explicitly overridden on a per-packet basis, as above.
 .Pp
 The
 .Dv IP_PORTALGO
@@ -177,6 +188,18 @@
 cmsg_type = IP_RECVDSTADDR
 .Ed
 .Pp
+For
+.Xr sendmsg 2 ,
+the source address can be specified by adding
+.Dv IP_SENDSRCADDR
+to the control part of the message on a
+.Dv SOCK_DGRAM
+or
+.Dv SOCK_RAW
+socket.  The IP_RECVDSTADDR cmsghdr structure from a received message
+may be used unchanged, in which case the outgoing message will be sent
+from the address the incoming message was received on.
+.Pp
 If the
 .Dv IP_RECVIF
 option is enabled on a
@@ -197,12 +220,6 @@
 cmsg_type = IP_RECVIF
 .Ed
 .Pp
-The
-.Dv IP_RECVPKTINFO
-option is similar to the
-.Dv IP_PKTINFO
-one, only in this case the inbound information is returned.
-.Pp
 If the
 .Dv IP_RECVTTL
 option is enabled on a
@@ -452,6 +469,24 @@
 the IP option field was improperly formed; an option field was
 shorter than the minimum value or longer than the option buffer provided.
 .El
+.Sh COMPATIBILITY
+The
+.Dv IP_RECVPKTINFO
+option is used because it is directly compatible with Solaris, AIX, etc.,
+and the
+.Dv IP_PKTINFO
+option is intended to be used in their manner, to set the default source
+address for outgoing packets on a
+.Dv SOCK_DGRAM
+or
+.Dv SOCK_RAW
+socket.  For compatibility with Linux, however, if you attempt to set the
+.Dv IP_PKTINFO
+option, using an integer parameter as a boolean value, this will
+transparently manipulate the
+.Dv IP_RECVPKTINFO
+option instead.  Source code compatbility with both environments is thus
+maintained.
 .Sh SEE ALSO
 .Xr getsockopt 2 ,
 .Xr recv 2 ,

-- 
Most people who graduate with CS degrees don't understand the significance
of Lisp.  Lisp is the most important idea in computer science.  --Alan Kay


Home | Main Index | Thread Index | Old Index