tech-net archive
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]
Re: Plan for improving IP_PKTINFO socket option handling
Tom Ivar Helbekkmo <tih%hamartun.priv.no@localhost> writes:
> Christos Zoulas <christos%astron.com@localhost> writes:
>
>> This came up in a different discussion; we should pass the size around...
>
> That would be nice. It would also make a lot of sense: after using the
> size to detect client expectations while setting options, I spent a lot
> of time trying to find out why my code was failing in the getting part;
> I really expected the circumstances to be equivalent.
Thinking about it, I put back the code that returns what Linux or
Solaris compatible code expects, depending on data size, and just added
a fallback to a Linux (and current NetBSD) compatible value if the size
is unknown (as it is now), or, in the future, if the calling application
specifies a receiving buffer that doesn't match either data item.
Look for the "XXX" below to see it.
-tih
Index: sys/netinet/in.h
===================================================================
RCS file: /cvsroot/src/sys/netinet/in.h,v
retrieving revision 1.101
diff -u -u -r1.101 in.h
--- sys/netinet/in.h 10 Aug 2017 04:31:58 -0000 1.101
+++ sys/netinet/in.h 31 Dec 2017 19:06:46 -0000
@@ -289,8 +289,10 @@
#define IP_IPSEC_POLICY 22 /* struct; get/set security policy */
#define IP_RECVTTL 23 /* bool; receive IP TTL w/dgram */
#define IP_MINTTL 24 /* minimum TTL for packet or drop */
-#define IP_PKTINFO 25 /* int; send interface and src addr */
-#define IP_RECVPKTINFO 26 /* int; send interface and dst addr */
+#define IP_PKTINFO 25 /* struct; set default src if/addr */
+#define IP_RECVPKTINFO 26 /* int; receive dst if/addr w/dgram */
+
+#define IP_SENDSRCADDR IP_RECVDSTADDR /* FreeBSD compatibility */
/*
* Information sent in the control message of a datagram socket for
@@ -301,6 +303,8 @@
unsigned int ipi_ifindex; /* interface index */
};
+#define ipi_spec_dst ipi_addr /* Solaris/Linux compatibility */
+
/*
* Defaults and limits for options
*/
Index: sys/netinet/in_pcb.c
===================================================================
RCS file: /cvsroot/src/sys/netinet/in_pcb.c,v
retrieving revision 1.180
diff -u -u -r1.180 in_pcb.c
--- sys/netinet/in_pcb.c 15 Dec 2017 04:03:46 -0000 1.180
+++ sys/netinet/in_pcb.c 31 Dec 2017 19:06:46 -0000
@@ -204,6 +204,7 @@
inp->inp_errormtu = -1;
inp->inp_portalgo = PORTALGO_DEFAULT;
inp->inp_bindportonsend = false;
+ inp->inp_prefsrcip.s_addr = INADDR_ANY;
#if defined(IPSEC)
if (ipsec_enabled) {
int error = ipsec_init_pcbpolicy(so, &inp->inp_sp);
Index: sys/netinet/in_pcb.h
===================================================================
RCS file: /cvsroot/src/sys/netinet/in_pcb.h,v
retrieving revision 1.64
diff -u -u -r1.64 in_pcb.h
--- sys/netinet/in_pcb.h 10 Aug 2017 04:31:58 -0000 1.64
+++ sys/netinet/in_pcb.h 31 Dec 2017 19:06:46 -0000
@@ -95,6 +95,7 @@
int inp_errormtu; /* MTU of last xmit status = EMSGSIZE */
uint8_t inp_ip_minttl;
bool inp_bindportonsend;
+ struct in_addr inp_prefsrcip; /* preferred src IP when wild */
};
#define inp_faddr inp_ip.ip_dst
@@ -121,11 +122,9 @@
* Cancels INP_HDRINCL.
*/
#define INP_RECVTTL 0x0800 /* receive incoming IP TTL */
-#define INP_PKTINFO 0x1000 /* receive dst packet info */
-#define INP_RECVPKTINFO 0x2000 /* receive dst packet info */
+#define INP_RECVPKTINFO 0x1000 /* receive IP dst if/addr */
#define INP_CONTROLOPTS (INP_RECVOPTS|INP_RECVRETOPTS|INP_RECVDSTADDR|\
- INP_RECVIF|INP_RECVTTL|INP_RECVPKTINFO|\
- INP_PKTINFO)
+ INP_RECVIF|INP_RECVTTL|INP_RECVPKTINFO)
#define sotoinpcb(so) ((struct inpcb *)(so)->so_pcb)
#define inp_lock(inp) solock((inp)->inp_socket)
Index: sys/netinet/ip_input.c
===================================================================
RCS file: /cvsroot/src/sys/netinet/ip_input.c,v
retrieving revision 1.363
diff -u -u -r1.363 ip_input.c
--- sys/netinet/ip_input.c 24 Nov 2017 14:03:25 -0000 1.363
+++ sys/netinet/ip_input.c 31 Dec 2017 19:06:46 -0000
@@ -1533,15 +1533,6 @@
if (inpflags & INP_RECVPKTINFO) {
struct in_pktinfo ipi;
- ipi.ipi_addr = ip->ip_src;
- ipi.ipi_ifindex = ifp->if_index;
- *mp = sbcreatecontrol(&ipi,
- sizeof(ipi), IP_RECVPKTINFO, IPPROTO_IP);
- if (*mp)
- mp = &(*mp)->m_next;
- }
- if (inpflags & INP_PKTINFO) {
- struct in_pktinfo ipi;
ipi.ipi_addr = ip->ip_dst;
ipi.ipi_ifindex = ifp->if_index;
*mp = sbcreatecontrol(&ipi,
Index: sys/netinet/ip_output.c
===================================================================
RCS file: /cvsroot/src/sys/netinet/ip_output.c,v
retrieving revision 1.288
diff -u -u -r1.288 ip_output.c
--- sys/netinet/ip_output.c 22 Dec 2017 11:22:37 -0000 1.288
+++ sys/netinet/ip_output.c 31 Dec 2017 19:06:46 -0000
@@ -1081,6 +1081,7 @@
struct ip *ip = &inp->inp_ip;
int inpflags = inp->inp_flags;
int optval = 0, error = 0;
+ struct in_pktinfo pktinfo;
KASSERT(solocked(so));
@@ -1103,7 +1104,6 @@
case IP_TOS:
case IP_TTL:
case IP_MINTTL:
- case IP_PKTINFO:
case IP_RECVOPTS:
case IP_RECVRETOPTS:
case IP_RECVDSTADDR:
@@ -1135,10 +1135,6 @@
else \
inpflags &= ~bit;
- case IP_PKTINFO:
- OPTSET(INP_PKTINFO);
- break;
-
case IP_RECVOPTS:
OPTSET(INP_RECVOPTS);
break;
@@ -1163,6 +1159,43 @@
OPTSET(INP_RECVTTL);
break;
}
+ break;
+ case IP_PKTINFO:
+ error = sockopt_getint(sopt, &optval);
+ if (!error) {
+ /* Linux compatibility */
+ OPTSET(INP_RECVPKTINFO);
+ break;
+ }
+ error = sockopt_get(sopt, &pktinfo, sizeof(struct in_pktinfo));
+ if (error)
+ break;
+ /* Solaris compatibility */
+ if (pktinfo.ipi_ifindex) {
+ struct ifnet *ifp;
+ struct in_ifaddr *ia;
+ int s;
+
+ /* pick up primary address */
+ s = pserialize_read_enter();
+ ifp = if_byindex(pktinfo.ipi_ifindex);
+ if (ifp == NULL) {
+ pserialize_read_exit(s);
+ error = EADDRNOTAVAIL;
+ break;
+ }
+ ia = in_get_ia_from_ifp(ifp);
+ if (ia == NULL) {
+ pserialize_read_exit(s);
+ error = EADDRNOTAVAIL;
+ break;
+ }
+ inp->inp_prefsrcip = IA_SIN(ia)->sin_addr;
+ pserialize_read_exit(s);
+ } else {
+ inp->inp_prefsrcip = pktinfo.ipi_addr;
+ }
+ break;
break;
#undef OPTSET
@@ -1239,7 +1272,6 @@
}
break;
}
- case IP_PKTINFO:
case IP_TOS:
case IP_TTL:
case IP_MINTTL:
@@ -1269,10 +1301,6 @@
#define OPTBIT(bit) (inpflags & bit ? 1 : 0)
- case IP_PKTINFO:
- optval = OPTBIT(INP_PKTINFO);
- break;
-
case IP_RECVOPTS:
optval = OPTBIT(INP_RECVOPTS);
break;
@@ -1300,6 +1328,28 @@
error = sockopt_setint(sopt, optval);
break;
+ case IP_PKTINFO:
+ /* XXX these tests fail until size gets propagated */
+ /* It needs to be passed through from the caller */
+ if (sopt->sopt_size == sizeof(int)) {
+ /* Linux compatibility */
+ optval = OPTBIT(INP_RECVPKTINFO);
+ error = sockopt_setint(sopt, optval);
+ } else if (sopt->sopt_size == sizeof(struct in_pktinfo)) {
+ /* Solaris compatibility */
+ struct in_pktinfo ipiopt;
+ ipiopt.ipi_ifindex = 0;
+ ipiopt.ipi_addr = inp->inp_prefsrcip;
+ error = sockopt_set(sopt, &ipiopt, sizeof(ipiopt));
+ } else {
+ /* While size is stuck at 0, and, later, if the */
+ /* caller doesn't use an exactly sized recipient */
+ /* for the data, default to Linux compatibility */
+ optval = OPTBIT(INP_RECVPKTINFO);
+ error = sockopt_setint(sopt, optval);
+ }
+ break;
+
#if 0 /* defined(IPSEC) */
case IP_IPSEC_POLICY:
{
@@ -1416,11 +1466,14 @@
struct inpcb *inp, kauth_cred_t cred)
{
struct cmsghdr *cm;
- struct in_pktinfo *pktinfo;
+ struct in_pktinfo pktinfo;
int error;
pktopts->ippo_imo = inp->inp_moptions;
- sockaddr_in_init(&pktopts->ippo_laddr, &inp->inp_laddr, 0);
+ if (!in_nullhost(inp->inp_prefsrcip))
+ sockaddr_in_init(&pktopts->ippo_laddr, &inp->inp_prefsrcip, 0);
+ else
+ sockaddr_in_init(&pktopts->ippo_laddr, &inp->inp_laddr, 0);
if (control == NULL)
return 0;
@@ -1446,13 +1499,22 @@
switch (cm->cmsg_type) {
case IP_PKTINFO:
- if (cm->cmsg_len != CMSG_LEN(sizeof(struct in_pktinfo)))
+ if (cm->cmsg_len != CMSG_LEN(sizeof(pktinfo)))
return EINVAL;
-
- pktinfo = (struct in_pktinfo *)CMSG_DATA(cm);
- error = ip_pktinfo_prepare(pktinfo, pktopts, flags,
+ memcpy(&pktinfo, CMSG_DATA(cm), sizeof(pktinfo));
+ error = ip_pktinfo_prepare(&pktinfo, pktopts, flags,
cred);
- if (error != 0)
+ if (error)
+ return error;
+ break;
+ case IP_SENDSRCADDR: /* FreeBSD compatibility */
+ if (cm->cmsg_len != CMSG_LEN(sizeof(struct in_addr)))
+ return EINVAL;
+ pktinfo.ipi_ifindex = 0;
+ pktinfo.ipi_addr = ((struct in_pktinfo *)CMSG_DATA(cm))->ipi_addr;
+ error = ip_pktinfo_prepare(&pktinfo, pktopts, flags,
+ cred);
+ if (error)
return error;
break;
default:
Index: share/man/man4/ip.4
===================================================================
RCS file: /cvsroot/src/share/man/man4/ip.4,v
retrieving revision 1.40
diff -u -u -r1.40 ip.4
--- share/man/man4/ip.4 13 Aug 2017 18:19:44 -0000 1.40
+++ share/man/man4/ip.4 31 Dec 2017 19:06:49 -0000
@@ -96,8 +96,8 @@
.Ed
.Pp
The
-.Dv IP_PKTINFO
-option can be used to turn on receiving of information about the source
+.Dv IP_RECVPKTINFO
+option can be used to turn on receiving of information about the destination
address of the packet, and the interface index.
The information is passed in a
.Vt struct in_pktinfo
@@ -117,13 +117,24 @@
.Pp
For
.Xr sendmsg 2 ,
-the source address or output interface can be specified by adding
+the source address or output interface can be specified by adding an
.Dv IP_PKTINFO
-to the control part of the message on a
+message to the control part of the message on a
.Dv SOCK_DGRAM
or
.Dv SOCK_RAW
-socket.
+socket. Setting ipi_ifindex will cause the primary address of that
+interface to be used; setting ipi_addr will directly choose that address.
+The IP_PKTINFO cmsghdr structure from a received message may be used
+unchanged, in which case the outgoing message will be sent from the
+address the incoming message was received on.
+.Pp
+Setting the
+.Dv IP_PKTINFO
+option on a socket, with the same
+.Vt struct in_pktinfo
+structure, will set the default source address to be used until set
+again, unless explicitly overridden on a per-packet basis, as above.
.Pp
The
.Dv IP_PORTALGO
@@ -177,6 +188,18 @@
cmsg_type = IP_RECVDSTADDR
.Ed
.Pp
+For
+.Xr sendmsg 2 ,
+the source address can be specified by adding
+.Dv IP_SENDSRCADDR
+to the control part of the message on a
+.Dv SOCK_DGRAM
+or
+.Dv SOCK_RAW
+socket. The IP_RECVDSTADDR cmsghdr structure from a received message
+may be used unchanged, in which case the outgoing message will be sent
+from the address the incoming message was received on.
+.Pp
If the
.Dv IP_RECVIF
option is enabled on a
@@ -197,12 +220,6 @@
cmsg_type = IP_RECVIF
.Ed
.Pp
-The
-.Dv IP_RECVPKTINFO
-option is similar to the
-.Dv IP_PKTINFO
-one, only in this case the inbound information is returned.
-.Pp
If the
.Dv IP_RECVTTL
option is enabled on a
@@ -452,6 +469,24 @@
the IP option field was improperly formed; an option field was
shorter than the minimum value or longer than the option buffer provided.
.El
+.Sh COMPATIBILITY
+The
+.Dv IP_RECVPKTINFO
+option is used because it is directly compatible with Solaris, AIX, etc.,
+and the
+.Dv IP_PKTINFO
+option is intended to be used in their manner, to set the default source
+address for outgoing packets on a
+.Dv SOCK_DGRAM
+or
+.Dv SOCK_RAW
+socket. For compatibility with Linux, however, if you attempt to set the
+.Dv IP_PKTINFO
+option, using an integer parameter as a boolean value, this will
+transparently manipulate the
+.Dv IP_RECVPKTINFO
+option instead. Source code compatbility with both environments is thus
+maintained.
.Sh SEE ALSO
.Xr getsockopt 2 ,
.Xr recv 2 ,
--
Most people who graduate with CS degrees don't understand the significance
of Lisp. Lisp is the most important idea in computer science. --Alan Kay
Home |
Main Index |
Thread Index |
Old Index