1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
24 */
25 /* Copyright (c) 1990 Mentat Inc. */
26
27 #include <sys/types.h>
28 #include <sys/stream.h>
29 #include <sys/strsun.h>
30 #define _SUN_TPI_VERSION 2
31 #include <sys/tihdr.h>
32 #include <sys/xti_inet.h>
33 #include <sys/ucred.h>
34 #include <sys/zone.h>
35 #include <sys/ddi.h>
36 #include <sys/sunddi.h>
37 #include <sys/cmn_err.h>
38 #include <sys/debug.h>
39 #include <sys/atomic.h>
40 #include <sys/policy.h>
41
42 #include <sys/systm.h>
43 #include <sys/param.h>
44 #include <sys/kmem.h>
45 #include <sys/sdt.h>
46 #include <sys/socket.h>
47 #include <sys/ethernet.h>
48 #include <sys/mac.h>
49 #include <net/if.h>
50 #include <net/if_types.h>
51 #include <net/if_arp.h>
52 #include <net/route.h>
53 #include <sys/sockio.h>
54 #include <netinet/in.h>
55 #include <net/if_dl.h>
56
57 #include <inet/common.h>
58 #include <inet/mi.h>
59 #include <inet/mib2.h>
60 #include <inet/nd.h>
61 #include <inet/arp.h>
62 #include <inet/snmpcom.h>
63 #include <inet/kstatcom.h>
64
65 #include <netinet/igmp_var.h>
66 #include <netinet/ip6.h>
67 #include <netinet/icmp6.h>
68 #include <netinet/sctp.h>
69
70 #include <inet/ip.h>
71 #include <inet/ip_impl.h>
72 #include <inet/ip6.h>
73 #include <inet/ip6_asp.h>
74 #include <inet/tcp.h>
75 #include <inet/ip_multi.h>
76 #include <inet/ip_if.h>
77 #include <inet/ip_ire.h>
78 #include <inet/ip_ftable.h>
79 #include <inet/ip_rts.h>
80 #include <inet/optcom.h>
81 #include <inet/ip_ndp.h>
82 #include <inet/ip_listutils.h>
83 #include <netinet/igmp.h>
84 #include <netinet/ip_mroute.h>
85 #include <netinet/udp.h>
86 #include <inet/ipp_common.h>
87
88 #include <net/pfkeyv2.h>
89 #include <inet/sadb.h>
90 #include <inet/ipsec_impl.h>
91 #include <inet/ipdrop.h>
92 #include <inet/ip_netinfo.h>
93
94 #include <inet/ipclassifier.h>
95 #include <inet/sctp_ip.h>
96 #include <inet/sctp/sctp_impl.h>
97 #include <inet/udp_impl.h>
98 #include <sys/sunddi.h>
99
100 #include <sys/tsol/label.h>
101 #include <sys/tsol/tnet.h>
102
103 /*
104 * Return how much size is needed for the different ancillary data items
105 */
106 uint_t
107 conn_recvancillary_size(conn_t *connp, crb_t recv_ancillary,
108 ip_recv_attr_t *ira, mblk_t *mp, ip_pkt_t *ipp)
109 {
110 uint_t ancil_size;
111 ip_stack_t *ipst = connp->conn_netstack->netstack_ip;
112
113 /*
114 * If IP_RECVDSTADDR is set we include the destination IP
115 * address as an option. With IP_RECVOPTS we include all
116 * the IP options.
117 */
118 ancil_size = 0;
119 if (recv_ancillary.crb_recvdstaddr &&
120 (ira->ira_flags & IRAF_IS_IPV4)) {
121 ancil_size += sizeof (struct T_opthdr) +
122 sizeof (struct in_addr);
123 IP_STAT(ipst, conn_in_recvdstaddr);
124 }
125
126 /*
127 * ip_recvpktinfo is used for both AF_INET and AF_INET6 but
128 * are different
129 */
130 if (recv_ancillary.crb_ip_recvpktinfo &&
131 connp->conn_family == AF_INET) {
132 ancil_size += sizeof (struct T_opthdr) +
133 sizeof (struct in_pktinfo);
134 IP_STAT(ipst, conn_in_recvpktinfo);
135 }
136
137 if ((recv_ancillary.crb_recvopts) &&
138 (ipp->ipp_fields & IPPF_IPV4_OPTIONS)) {
139 ancil_size += sizeof (struct T_opthdr) +
140 ipp->ipp_ipv4_options_len;
141 IP_STAT(ipst, conn_in_recvopts);
142 }
143
144 if (recv_ancillary.crb_recvslla) {
145 ip_stack_t *ipst = connp->conn_netstack->netstack_ip;
146 ill_t *ill;
147
148 /* Make sure ira_l2src is setup if not already */
149 if (!(ira->ira_flags & IRAF_L2SRC_SET)) {
150 ill = ill_lookup_on_ifindex(ira->ira_rifindex, B_FALSE,
151 ipst);
152 if (ill != NULL) {
153 ip_setl2src(mp, ira, ill);
154 ill_refrele(ill);
155 }
156 }
157 ancil_size += sizeof (struct T_opthdr) +
158 sizeof (struct sockaddr_dl);
159 IP_STAT(ipst, conn_in_recvslla);
160 }
161
162 if (recv_ancillary.crb_recvif) {
163 ancil_size += sizeof (struct T_opthdr) + sizeof (uint_t);
164 IP_STAT(ipst, conn_in_recvif);
165 }
166
167 /*
168 * ip_recvpktinfo is used for both AF_INET and AF_INET6 but
169 * are different
170 */
171 if (recv_ancillary.crb_ip_recvpktinfo &&
172 connp->conn_family == AF_INET6) {
173 ancil_size += sizeof (struct T_opthdr) +
174 sizeof (struct in6_pktinfo);
175 IP_STAT(ipst, conn_in_recvpktinfo);
176 }
177
178 if (recv_ancillary.crb_ipv6_recvhoplimit) {
179 ancil_size += sizeof (struct T_opthdr) + sizeof (int);
180 IP_STAT(ipst, conn_in_recvhoplimit);
181 }
182
183 if (recv_ancillary.crb_ipv6_recvtclass) {
184 ancil_size += sizeof (struct T_opthdr) + sizeof (int);
185 IP_STAT(ipst, conn_in_recvtclass);
186 }
187
188 if (recv_ancillary.crb_ipv6_recvhopopts &&
189 (ipp->ipp_fields & IPPF_HOPOPTS)) {
190 ancil_size += sizeof (struct T_opthdr) + ipp->ipp_hopoptslen;
191 IP_STAT(ipst, conn_in_recvhopopts);
192 }
193 /*
194 * To honor RFC3542 when an application asks for both IPV6_RECVDSTOPTS
195 * and IPV6_RECVRTHDR, we pass up the item rthdrdstopts (the destination
196 * options that appear before a routing header.
197 * We also pass them up if IPV6_RECVRTHDRDSTOPTS is set.
198 */
199 if (ipp->ipp_fields & IPPF_RTHDRDSTOPTS) {
200 if (recv_ancillary.crb_ipv6_recvrthdrdstopts ||
201 (recv_ancillary.crb_ipv6_recvdstopts &&
202 recv_ancillary.crb_ipv6_recvrthdr)) {
203 ancil_size += sizeof (struct T_opthdr) +
204 ipp->ipp_rthdrdstoptslen;
205 IP_STAT(ipst, conn_in_recvrthdrdstopts);
206 }
207 }
208 if ((recv_ancillary.crb_ipv6_recvrthdr) &&
209 (ipp->ipp_fields & IPPF_RTHDR)) {
210 ancil_size += sizeof (struct T_opthdr) + ipp->ipp_rthdrlen;
211 IP_STAT(ipst, conn_in_recvrthdr);
212 }
213 if ((recv_ancillary.crb_ipv6_recvdstopts ||
214 recv_ancillary.crb_old_ipv6_recvdstopts) &&
215 (ipp->ipp_fields & IPPF_DSTOPTS)) {
216 ancil_size += sizeof (struct T_opthdr) + ipp->ipp_dstoptslen;
217 IP_STAT(ipst, conn_in_recvdstopts);
218 }
219 if (recv_ancillary.crb_recvucred && ira->ira_cred != NULL) {
220 ancil_size += sizeof (struct T_opthdr) +
221 ucredminsize(ira->ira_cred);
222 IP_STAT(ipst, conn_in_recvucred);
223 }
224
225 /*
226 * If SO_TIMESTAMP is set allocate the appropriate sized
227 * buffer. Since gethrestime() expects a pointer aligned
228 * argument, we allocate space necessary for extra
229 * alignment (even though it might not be used).
230 */
231 if (recv_ancillary.crb_timestamp) {
232 ancil_size += sizeof (struct T_opthdr) +
233 sizeof (timestruc_t) + _POINTER_ALIGNMENT;
234 IP_STAT(ipst, conn_in_timestamp);
235 }
236
237 /*
238 * If IP_RECVTTL is set allocate the appropriate sized buffer
239 */
240 if (recv_ancillary.crb_recvttl &&
241 (ira->ira_flags & IRAF_IS_IPV4)) {
242 ancil_size += sizeof (struct T_opthdr) + sizeof (uint8_t);
243 IP_STAT(ipst, conn_in_recvttl);
244 }
245
246 return (ancil_size);
247 }
248
249 /*
250 * Lay down the ancillary data items at "ancil_buf".
251 * Assumes caller has used conn_recvancillary_size to allocate a sufficiently
252 * large buffer - ancil_size.
253 */
254 void
255 conn_recvancillary_add(conn_t *connp, crb_t recv_ancillary,
256 ip_recv_attr_t *ira, ip_pkt_t *ipp, uchar_t *ancil_buf, uint_t ancil_size)
257 {
258 /*
259 * Copy in destination address before options to avoid
260 * any padding issues.
261 */
262 if (recv_ancillary.crb_recvdstaddr &&
263 (ira->ira_flags & IRAF_IS_IPV4)) {
264 struct T_opthdr *toh;
265 ipaddr_t *dstptr;
266
267 toh = (struct T_opthdr *)ancil_buf;
268 toh->level = IPPROTO_IP;
269 toh->name = IP_RECVDSTADDR;
270 toh->len = sizeof (struct T_opthdr) + sizeof (ipaddr_t);
271 toh->status = 0;
272 ancil_buf += sizeof (struct T_opthdr);
273 dstptr = (ipaddr_t *)ancil_buf;
274 *dstptr = ipp->ipp_addr_v4;
275 ancil_buf += sizeof (ipaddr_t);
276 ancil_size -= toh->len;
277 }
278
279 /*
280 * ip_recvpktinfo is used for both AF_INET and AF_INET6 but
281 * are different
282 */
283 if (recv_ancillary.crb_ip_recvpktinfo &&
284 connp->conn_family == AF_INET) {
285 ip_stack_t *ipst = connp->conn_netstack->netstack_ip;
286 struct T_opthdr *toh;
287 struct in_pktinfo *pktinfop;
288 ill_t *ill;
289 ipif_t *ipif;
290
291 toh = (struct T_opthdr *)ancil_buf;
292 toh->level = IPPROTO_IP;
293 toh->name = IP_PKTINFO;
294 toh->len = sizeof (struct T_opthdr) + sizeof (*pktinfop);
295 toh->status = 0;
296 ancil_buf += sizeof (struct T_opthdr);
297 pktinfop = (struct in_pktinfo *)ancil_buf;
298
299 pktinfop->ipi_ifindex = ira->ira_ruifindex;
300 pktinfop->ipi_spec_dst.s_addr = INADDR_ANY;
301
302 /* Find a good address to report */
303 ill = ill_lookup_on_ifindex(ira->ira_ruifindex, B_FALSE, ipst);
304 if (ill != NULL) {
305 ipif = ipif_good_addr(ill, IPCL_ZONEID(connp));
306 if (ipif != NULL) {
307 pktinfop->ipi_spec_dst.s_addr =
308 ipif->ipif_lcl_addr;
309 ipif_refrele(ipif);
310 }
311 ill_refrele(ill);
312 }
313 pktinfop->ipi_addr.s_addr = ipp->ipp_addr_v4;
314 ancil_buf += sizeof (struct in_pktinfo);
315 ancil_size -= toh->len;
316 }
317
318 if ((recv_ancillary.crb_recvopts) &&
319 (ipp->ipp_fields & IPPF_IPV4_OPTIONS)) {
320 struct T_opthdr *toh;
321
322 toh = (struct T_opthdr *)ancil_buf;
323 toh->level = IPPROTO_IP;
324 toh->name = IP_RECVOPTS;
325 toh->len = sizeof (struct T_opthdr) + ipp->ipp_ipv4_options_len;
326 toh->status = 0;
327 ancil_buf += sizeof (struct T_opthdr);
328 bcopy(ipp->ipp_ipv4_options, ancil_buf,
329 ipp->ipp_ipv4_options_len);
330 ancil_buf += ipp->ipp_ipv4_options_len;
331 ancil_size -= toh->len;
332 }
333
334 if (recv_ancillary.crb_recvslla) {
335 ip_stack_t *ipst = connp->conn_netstack->netstack_ip;
336 struct T_opthdr *toh;
337 struct sockaddr_dl *dstptr;
338 ill_t *ill;
339 int alen = 0;
340
341 ill = ill_lookup_on_ifindex(ira->ira_rifindex, B_FALSE, ipst);
342 if (ill != NULL)
343 alen = ill->ill_phys_addr_length;
344
345 /*
346 * For loopback multicast and broadcast the packet arrives
347 * with ira_ruifdex being the physical interface, but
348 * ira_l2src is all zero since ip_postfrag_loopback doesn't
349 * know our l2src. We don't report the address in that case.
350 */
351 if (ira->ira_flags & IRAF_LOOPBACK)
352 alen = 0;
353
354 toh = (struct T_opthdr *)ancil_buf;
355 toh->level = IPPROTO_IP;
356 toh->name = IP_RECVSLLA;
357 toh->len = sizeof (struct T_opthdr) +
358 sizeof (struct sockaddr_dl);
359 toh->status = 0;
360 ancil_buf += sizeof (struct T_opthdr);
361 dstptr = (struct sockaddr_dl *)ancil_buf;
362 dstptr->sdl_family = AF_LINK;
363 dstptr->sdl_index = ira->ira_ruifindex;
364 if (ill != NULL)
365 dstptr->sdl_type = ill->ill_type;
366 else
367 dstptr->sdl_type = 0;
368 dstptr->sdl_nlen = 0;
369 dstptr->sdl_alen = alen;
370 dstptr->sdl_slen = 0;
371 bcopy(ira->ira_l2src, dstptr->sdl_data, alen);
372 ancil_buf += sizeof (struct sockaddr_dl);
373 ancil_size -= toh->len;
374 if (ill != NULL)
375 ill_refrele(ill);
376 }
377
378 if (recv_ancillary.crb_recvif) {
379 struct T_opthdr *toh;
380 uint_t *dstptr;
381
382 toh = (struct T_opthdr *)ancil_buf;
383 toh->level = IPPROTO_IP;
384 toh->name = IP_RECVIF;
385 toh->len = sizeof (struct T_opthdr) + sizeof (uint_t);
386 toh->status = 0;
387 ancil_buf += sizeof (struct T_opthdr);
388 dstptr = (uint_t *)ancil_buf;
389 *dstptr = ira->ira_ruifindex;
390 ancil_buf += sizeof (uint_t);
391 ancil_size -= toh->len;
392 }
393
394 /*
395 * ip_recvpktinfo is used for both AF_INET and AF_INET6 but
396 * are different
397 */
398 if (recv_ancillary.crb_ip_recvpktinfo &&
399 connp->conn_family == AF_INET6) {
400 struct T_opthdr *toh;
401 struct in6_pktinfo *pkti;
402
403 toh = (struct T_opthdr *)ancil_buf;
404 toh->level = IPPROTO_IPV6;
405 toh->name = IPV6_PKTINFO;
406 toh->len = sizeof (struct T_opthdr) + sizeof (*pkti);
407 toh->status = 0;
408 ancil_buf += sizeof (struct T_opthdr);
409 pkti = (struct in6_pktinfo *)ancil_buf;
410 if (ira->ira_flags & IRAF_IS_IPV4) {
411 IN6_IPADDR_TO_V4MAPPED(ipp->ipp_addr_v4,
412 &pkti->ipi6_addr);
413 } else {
414 pkti->ipi6_addr = ipp->ipp_addr;
415 }
416 pkti->ipi6_ifindex = ira->ira_ruifindex;
417
418 ancil_buf += sizeof (*pkti);
419 ancil_size -= toh->len;
420 }
421 if (recv_ancillary.crb_ipv6_recvhoplimit) {
422 struct T_opthdr *toh;
423
424 toh = (struct T_opthdr *)ancil_buf;
425 toh->level = IPPROTO_IPV6;
426 toh->name = IPV6_HOPLIMIT;
427 toh->len = sizeof (struct T_opthdr) + sizeof (uint_t);
428 toh->status = 0;
429 ancil_buf += sizeof (struct T_opthdr);
430 *(uint_t *)ancil_buf = ipp->ipp_hoplimit;
431 ancil_buf += sizeof (uint_t);
432 ancil_size -= toh->len;
433 }
434 if (recv_ancillary.crb_ipv6_recvtclass) {
435 struct T_opthdr *toh;
436
437 toh = (struct T_opthdr *)ancil_buf;
438 toh->level = IPPROTO_IPV6;
439 toh->name = IPV6_TCLASS;
440 toh->len = sizeof (struct T_opthdr) + sizeof (uint_t);
441 toh->status = 0;
442 ancil_buf += sizeof (struct T_opthdr);
443
444 if (ira->ira_flags & IRAF_IS_IPV4)
445 *(uint_t *)ancil_buf = ipp->ipp_type_of_service;
446 else
447 *(uint_t *)ancil_buf = ipp->ipp_tclass;
448 ancil_buf += sizeof (uint_t);
449 ancil_size -= toh->len;
450 }
451 if (recv_ancillary.crb_ipv6_recvhopopts &&
452 (ipp->ipp_fields & IPPF_HOPOPTS)) {
453 struct T_opthdr *toh;
454
455 toh = (struct T_opthdr *)ancil_buf;
456 toh->level = IPPROTO_IPV6;
457 toh->name = IPV6_HOPOPTS;
458 toh->len = sizeof (struct T_opthdr) + ipp->ipp_hopoptslen;
459 toh->status = 0;
460 ancil_buf += sizeof (struct T_opthdr);
461 bcopy(ipp->ipp_hopopts, ancil_buf, ipp->ipp_hopoptslen);
462 ancil_buf += ipp->ipp_hopoptslen;
463 ancil_size -= toh->len;
464 }
465 /*
466 * To honor RFC3542 when an application asks for both IPV6_RECVDSTOPTS
467 * and IPV6_RECVRTHDR, we pass up the item rthdrdstopts (the destination
468 * options that appear before a routing header.
469 * We also pass them up if IPV6_RECVRTHDRDSTOPTS is set.
470 */
471 if (ipp->ipp_fields & IPPF_RTHDRDSTOPTS) {
472 if (recv_ancillary.crb_ipv6_recvrthdrdstopts ||
473 (recv_ancillary.crb_ipv6_recvdstopts &&
474 recv_ancillary.crb_ipv6_recvrthdr)) {
475 struct T_opthdr *toh;
476
477 toh = (struct T_opthdr *)ancil_buf;
478 toh->level = IPPROTO_IPV6;
479 toh->name = IPV6_DSTOPTS;
480 toh->len = sizeof (struct T_opthdr) +
481 ipp->ipp_rthdrdstoptslen;
482 toh->status = 0;
483 ancil_buf += sizeof (struct T_opthdr);
484 bcopy(ipp->ipp_rthdrdstopts, ancil_buf,
485 ipp->ipp_rthdrdstoptslen);
486 ancil_buf += ipp->ipp_rthdrdstoptslen;
487 ancil_size -= toh->len;
488 }
489 }
490 if (recv_ancillary.crb_ipv6_recvrthdr &&
491 (ipp->ipp_fields & IPPF_RTHDR)) {
492 struct T_opthdr *toh;
493
494 toh = (struct T_opthdr *)ancil_buf;
495 toh->level = IPPROTO_IPV6;
496 toh->name = IPV6_RTHDR;
497 toh->len = sizeof (struct T_opthdr) + ipp->ipp_rthdrlen;
498 toh->status = 0;
499 ancil_buf += sizeof (struct T_opthdr);
500 bcopy(ipp->ipp_rthdr, ancil_buf, ipp->ipp_rthdrlen);
501 ancil_buf += ipp->ipp_rthdrlen;
502 ancil_size -= toh->len;
503 }
504 if ((recv_ancillary.crb_ipv6_recvdstopts ||
505 recv_ancillary.crb_old_ipv6_recvdstopts) &&
506 (ipp->ipp_fields & IPPF_DSTOPTS)) {
507 struct T_opthdr *toh;
508
509 toh = (struct T_opthdr *)ancil_buf;
510 toh->level = IPPROTO_IPV6;
511 toh->name = IPV6_DSTOPTS;
512 toh->len = sizeof (struct T_opthdr) + ipp->ipp_dstoptslen;
513 toh->status = 0;
514 ancil_buf += sizeof (struct T_opthdr);
515 bcopy(ipp->ipp_dstopts, ancil_buf, ipp->ipp_dstoptslen);
516 ancil_buf += ipp->ipp_dstoptslen;
517 ancil_size -= toh->len;
518 }
519
520 if (recv_ancillary.crb_recvucred && ira->ira_cred != NULL) {
521 struct T_opthdr *toh;
522 cred_t *rcr = connp->conn_cred;
523
524 toh = (struct T_opthdr *)ancil_buf;
525 toh->level = SOL_SOCKET;
526 toh->name = SCM_UCRED;
527 toh->len = sizeof (struct T_opthdr) +
528 ucredminsize(ira->ira_cred);
529 toh->status = 0;
530 (void) cred2ucred(ira->ira_cred, ira->ira_cpid, &toh[1], rcr);
531 ancil_buf += toh->len;
532 ancil_size -= toh->len;
533 }
534 if (recv_ancillary.crb_timestamp) {
535 struct T_opthdr *toh;
536
537 toh = (struct T_opthdr *)ancil_buf;
538 toh->level = SOL_SOCKET;
539 toh->name = SCM_TIMESTAMP;
540 toh->len = sizeof (struct T_opthdr) +
541 sizeof (timestruc_t) + _POINTER_ALIGNMENT;
542 toh->status = 0;
543 ancil_buf += sizeof (struct T_opthdr);
544 /* Align for gethrestime() */
545 ancil_buf = (uchar_t *)P2ROUNDUP((intptr_t)ancil_buf,
546 sizeof (intptr_t));
547 gethrestime((timestruc_t *)ancil_buf);
548 ancil_buf = (uchar_t *)toh + toh->len;
549 ancil_size -= toh->len;
550 }
551
552 /*
553 * CAUTION:
554 * Due to aligment issues
555 * Processing of IP_RECVTTL option
556 * should always be the last. Adding
557 * any option processing after this will
558 * cause alignment panic.
559 */
560 if (recv_ancillary.crb_recvttl &&
561 (ira->ira_flags & IRAF_IS_IPV4)) {
562 struct T_opthdr *toh;
563 uint8_t *dstptr;
564
565 toh = (struct T_opthdr *)ancil_buf;
566 toh->level = IPPROTO_IP;
567 toh->name = IP_RECVTTL;
568 toh->len = sizeof (struct T_opthdr) + sizeof (uint8_t);
569 toh->status = 0;
570 ancil_buf += sizeof (struct T_opthdr);
571 dstptr = (uint8_t *)ancil_buf;
572 *dstptr = ipp->ipp_hoplimit;
573 ancil_buf += sizeof (uint8_t);
574 ancil_size -= toh->len;
575 }
576
577 /* Consumed all of allocated space */
578 ASSERT(ancil_size == 0);
579
580 }
581
582 /*
583 * This routine retrieves the current status of socket options.
584 * It returns the size of the option retrieved, or -1.
585 */
586 int
587 conn_opt_get(conn_opt_arg_t *coa, t_scalar_t level, t_scalar_t name,
588 uchar_t *ptr)
589 {
590 int *i1 = (int *)ptr;
591 conn_t *connp = coa->coa_connp;
592 ip_xmit_attr_t *ixa = coa->coa_ixa;
593 ip_pkt_t *ipp = coa->coa_ipp;
594 ip_stack_t *ipst = ixa->ixa_ipst;
595 uint_t len;
596
597 ASSERT(MUTEX_HELD(&coa->coa_connp->conn_lock));
598
599 switch (level) {
600 case SOL_SOCKET:
601 switch (name) {
602 case SO_DEBUG:
603 *i1 = connp->conn_debug ? SO_DEBUG : 0;
604 break; /* goto sizeof (int) option return */
605 case SO_KEEPALIVE:
606 *i1 = connp->conn_keepalive ? SO_KEEPALIVE : 0;
607 break;
608 case SO_LINGER: {
609 struct linger *lgr = (struct linger *)ptr;
610
611 lgr->l_onoff = connp->conn_linger ? SO_LINGER : 0;
612 lgr->l_linger = connp->conn_lingertime;
613 }
614 return (sizeof (struct linger));
615
616 case SO_OOBINLINE:
617 *i1 = connp->conn_oobinline ? SO_OOBINLINE : 0;
618 break;
619 case SO_REUSEADDR:
620 *i1 = connp->conn_reuseaddr ? SO_REUSEADDR : 0;
621 break; /* goto sizeof (int) option return */
622 case SO_TYPE:
623 *i1 = connp->conn_so_type;
624 break; /* goto sizeof (int) option return */
625 case SO_DONTROUTE:
626 *i1 = (ixa->ixa_flags & IXAF_DONTROUTE) ?
627 SO_DONTROUTE : 0;
628 break; /* goto sizeof (int) option return */
629 case SO_USELOOPBACK:
630 *i1 = connp->conn_useloopback ? SO_USELOOPBACK : 0;
631 break; /* goto sizeof (int) option return */
632 case SO_BROADCAST:
633 *i1 = connp->conn_broadcast ? SO_BROADCAST : 0;
634 break; /* goto sizeof (int) option return */
635
636 case SO_SNDBUF:
637 *i1 = connp->conn_sndbuf;
638 break; /* goto sizeof (int) option return */
639 case SO_RCVBUF:
640 *i1 = connp->conn_rcvbuf;
641 break; /* goto sizeof (int) option return */
642 case SO_RCVTIMEO:
643 case SO_SNDTIMEO:
644 /*
645 * Pass these two options in order for third part
646 * protocol usage. Here just return directly.
647 */
648 *i1 = 0;
649 break;
650 case SO_DGRAM_ERRIND:
651 *i1 = connp->conn_dgram_errind ? SO_DGRAM_ERRIND : 0;
652 break; /* goto sizeof (int) option return */
653 case SO_RECVUCRED:
654 *i1 = connp->conn_recv_ancillary.crb_recvucred;
655 break; /* goto sizeof (int) option return */
656 case SO_TIMESTAMP:
657 *i1 = connp->conn_recv_ancillary.crb_timestamp;
658 break; /* goto sizeof (int) option return */
659 case SO_VRRP:
660 *i1 = connp->conn_isvrrp;
661 break; /* goto sizeof (int) option return */
662 case SO_ANON_MLP:
663 *i1 = connp->conn_anon_mlp;
664 break; /* goto sizeof (int) option return */
665 case SO_MAC_EXEMPT:
666 *i1 = (connp->conn_mac_mode == CONN_MAC_AWARE);
667 break; /* goto sizeof (int) option return */
668 case SO_MAC_IMPLICIT:
669 *i1 = (connp->conn_mac_mode == CONN_MAC_IMPLICIT);
670 break; /* goto sizeof (int) option return */
671 case SO_ALLZONES:
672 *i1 = connp->conn_allzones;
673 break; /* goto sizeof (int) option return */
674 case SO_EXCLBIND:
675 *i1 = connp->conn_exclbind ? SO_EXCLBIND : 0;
676 break;
677 case SO_PROTOTYPE:
678 *i1 = connp->conn_proto;
679 break;
680
681 case SO_DOMAIN:
682 *i1 = connp->conn_family;
683 break;
684 default:
685 return (-1);
686 }
687 break;
688 case IPPROTO_IP:
689 if (connp->conn_family != AF_INET)
690 return (-1);
691 switch (name) {
692 case IP_OPTIONS:
693 case T_IP_OPTIONS:
694 if (!(ipp->ipp_fields & IPPF_IPV4_OPTIONS))
695 return (0);
696
697 len = ipp->ipp_ipv4_options_len;
698 if (len > 0) {
699 bcopy(ipp->ipp_ipv4_options, ptr, len);
700 }
701 return (len);
702
703 case IP_PKTINFO: {
704 /*
705 * This also handles IP_RECVPKTINFO.
706 * IP_PKTINFO and IP_RECVPKTINFO have same value.
707 * Differentiation is based on the size of the
708 * argument passed in.
709 */
710 struct in_pktinfo *pktinfo;
711
712 #ifdef notdef
713 /* optcom doesn't provide a length with "get" */
714 if (inlen == sizeof (int)) {
715 /* This is IP_RECVPKTINFO option. */
716 *i1 = connp->conn_recv_ancillary.
717 crb_ip_recvpktinfo;
718 return (sizeof (int));
719 }
720 #endif
721 /* XXX assumes that caller has room for max size! */
722
723 pktinfo = (struct in_pktinfo *)ptr;
724 pktinfo->ipi_ifindex = ixa->ixa_ifindex;
725 if (ipp->ipp_fields & IPPF_ADDR)
726 pktinfo->ipi_spec_dst.s_addr = ipp->ipp_addr_v4;
727 else
728 pktinfo->ipi_spec_dst.s_addr = INADDR_ANY;
729 return (sizeof (struct in_pktinfo));
730 }
731 case IP_DONTFRAG:
732 *i1 = (ixa->ixa_flags & IXAF_DONTFRAG) != 0;
733 return (sizeof (int));
734 case IP_TOS:
735 case T_IP_TOS:
736 *i1 = (int)ipp->ipp_type_of_service;
737 break; /* goto sizeof (int) option return */
738 case IP_TTL:
739 *i1 = (int)ipp->ipp_unicast_hops;
740 break; /* goto sizeof (int) option return */
741 case IP_DHCPINIT_IF:
742 return (-1);
743 case IP_NEXTHOP:
744 if (ixa->ixa_flags & IXAF_NEXTHOP_SET) {
745 *(ipaddr_t *)ptr = ixa->ixa_nexthop_v4;
746 return (sizeof (ipaddr_t));
747 } else {
748 return (0);
749 }
750
751 case IP_MULTICAST_IF:
752 /* 0 address if not set */
753 *(ipaddr_t *)ptr = ixa->ixa_multicast_ifaddr;
754 return (sizeof (ipaddr_t));
755 case IP_MULTICAST_TTL:
756 *(uchar_t *)ptr = ixa->ixa_multicast_ttl;
757 return (sizeof (uchar_t));
758 case IP_MULTICAST_LOOP:
759 *ptr = (ixa->ixa_flags & IXAF_MULTICAST_LOOP) ? 1 : 0;
760 return (sizeof (uint8_t));
761 case IP_RECVOPTS:
762 *i1 = connp->conn_recv_ancillary.crb_recvopts;
763 break; /* goto sizeof (int) option return */
764 case IP_RECVDSTADDR:
765 *i1 = connp->conn_recv_ancillary.crb_recvdstaddr;
766 break; /* goto sizeof (int) option return */
767 case IP_RECVIF:
768 *i1 = connp->conn_recv_ancillary.crb_recvif;
769 break; /* goto sizeof (int) option return */
770 case IP_RECVSLLA:
771 *i1 = connp->conn_recv_ancillary.crb_recvslla;
772 break; /* goto sizeof (int) option return */
773 case IP_RECVTTL:
774 *i1 = connp->conn_recv_ancillary.crb_recvttl;
775 break; /* goto sizeof (int) option return */
776 case IP_ADD_MEMBERSHIP:
777 case IP_DROP_MEMBERSHIP:
778 case MCAST_JOIN_GROUP:
779 case MCAST_LEAVE_GROUP:
780 case IP_BLOCK_SOURCE:
781 case IP_UNBLOCK_SOURCE:
782 case IP_ADD_SOURCE_MEMBERSHIP:
783 case IP_DROP_SOURCE_MEMBERSHIP:
784 case MCAST_BLOCK_SOURCE:
785 case MCAST_UNBLOCK_SOURCE:
786 case MCAST_JOIN_SOURCE_GROUP:
787 case MCAST_LEAVE_SOURCE_GROUP:
788 case MRT_INIT:
789 case MRT_DONE:
790 case MRT_ADD_VIF:
791 case MRT_DEL_VIF:
792 case MRT_ADD_MFC:
793 case MRT_DEL_MFC:
794 /* cannot "get" the value for these */
795 return (-1);
796 case MRT_VERSION:
797 case MRT_ASSERT:
798 (void) ip_mrouter_get(name, connp, ptr);
799 return (sizeof (int));
800 case IP_SEC_OPT:
801 return (ipsec_req_from_conn(connp, (ipsec_req_t *)ptr,
802 IPSEC_AF_V4));
803 case IP_BOUND_IF:
804 /* Zero if not set */
805 *i1 = connp->conn_bound_if;
806 break; /* goto sizeof (int) option return */
807 case IP_UNSPEC_SRC:
808 *i1 = connp->conn_unspec_src;
809 break; /* goto sizeof (int) option return */
810 case IP_BROADCAST_TTL:
811 if (ixa->ixa_flags & IXAF_BROADCAST_TTL_SET)
812 *(uchar_t *)ptr = ixa->ixa_broadcast_ttl;
813 else
814 *(uchar_t *)ptr = ipst->ips_ip_broadcast_ttl;
815 return (sizeof (uchar_t));
816 default:
817 return (-1);
818 }
819 break;
820 case IPPROTO_IPV6:
821 if (connp->conn_family != AF_INET6)
822 return (-1);
823 switch (name) {
824 case IPV6_UNICAST_HOPS:
825 *i1 = (int)ipp->ipp_unicast_hops;
826 break; /* goto sizeof (int) option return */
827 case IPV6_MULTICAST_IF:
828 /* 0 index if not set */
829 *i1 = ixa->ixa_multicast_ifindex;
830 break; /* goto sizeof (int) option return */
831 case IPV6_MULTICAST_HOPS:
832 *i1 = ixa->ixa_multicast_ttl;
833 break; /* goto sizeof (int) option return */
834 case IPV6_MULTICAST_LOOP:
835 *i1 = (ixa->ixa_flags & IXAF_MULTICAST_LOOP) ? 1 : 0;
836 break; /* goto sizeof (int) option return */
837 case IPV6_JOIN_GROUP:
838 case IPV6_LEAVE_GROUP:
839 case MCAST_JOIN_GROUP:
840 case MCAST_LEAVE_GROUP:
841 case MCAST_BLOCK_SOURCE:
842 case MCAST_UNBLOCK_SOURCE:
843 case MCAST_JOIN_SOURCE_GROUP:
844 case MCAST_LEAVE_SOURCE_GROUP:
845 /* cannot "get" the value for these */
846 return (-1);
847 case IPV6_BOUND_IF:
848 /* Zero if not set */
849 *i1 = connp->conn_bound_if;
850 break; /* goto sizeof (int) option return */
851 case IPV6_UNSPEC_SRC:
852 *i1 = connp->conn_unspec_src;
853 break; /* goto sizeof (int) option return */
854 case IPV6_RECVPKTINFO:
855 *i1 = connp->conn_recv_ancillary.crb_ip_recvpktinfo;
856 break; /* goto sizeof (int) option return */
857 case IPV6_RECVTCLASS:
858 *i1 = connp->conn_recv_ancillary.crb_ipv6_recvtclass;
859 break; /* goto sizeof (int) option return */
860 case IPV6_RECVPATHMTU:
861 *i1 = connp->conn_ipv6_recvpathmtu;
862 break; /* goto sizeof (int) option return */
863 case IPV6_RECVHOPLIMIT:
864 *i1 = connp->conn_recv_ancillary.crb_ipv6_recvhoplimit;
865 break; /* goto sizeof (int) option return */
866 case IPV6_RECVHOPOPTS:
867 *i1 = connp->conn_recv_ancillary.crb_ipv6_recvhopopts;
868 break; /* goto sizeof (int) option return */
869 case IPV6_RECVDSTOPTS:
870 *i1 = connp->conn_recv_ancillary.crb_ipv6_recvdstopts;
871 break; /* goto sizeof (int) option return */
872 case _OLD_IPV6_RECVDSTOPTS:
873 *i1 =
874 connp->conn_recv_ancillary.crb_old_ipv6_recvdstopts;
875 break; /* goto sizeof (int) option return */
876 case IPV6_RECVRTHDRDSTOPTS:
877 *i1 = connp->conn_recv_ancillary.
878 crb_ipv6_recvrthdrdstopts;
879 break; /* goto sizeof (int) option return */
880 case IPV6_RECVRTHDR:
881 *i1 = connp->conn_recv_ancillary.crb_ipv6_recvrthdr;
882 break; /* goto sizeof (int) option return */
883 case IPV6_PKTINFO: {
884 /* XXX assumes that caller has room for max size! */
885 struct in6_pktinfo *pkti;
886
887 pkti = (struct in6_pktinfo *)ptr;
888 pkti->ipi6_ifindex = ixa->ixa_ifindex;
889 if (ipp->ipp_fields & IPPF_ADDR)
890 pkti->ipi6_addr = ipp->ipp_addr;
891 else
892 pkti->ipi6_addr = ipv6_all_zeros;
893 return (sizeof (struct in6_pktinfo));
894 }
895 case IPV6_TCLASS:
896 *i1 = ipp->ipp_tclass;
897 break; /* goto sizeof (int) option return */
898 case IPV6_NEXTHOP: {
899 sin6_t *sin6 = (sin6_t *)ptr;
900
901 if (ixa->ixa_flags & IXAF_NEXTHOP_SET)
902 return (0);
903
904 *sin6 = sin6_null;
905 sin6->sin6_family = AF_INET6;
906 sin6->sin6_addr = ixa->ixa_nexthop_v6;
907
908 return (sizeof (sin6_t));
909 }
910 case IPV6_HOPOPTS:
911 if (!(ipp->ipp_fields & IPPF_HOPOPTS))
912 return (0);
913 bcopy(ipp->ipp_hopopts, ptr,
914 ipp->ipp_hopoptslen);
915 return (ipp->ipp_hopoptslen);
916 case IPV6_RTHDRDSTOPTS:
917 if (!(ipp->ipp_fields & IPPF_RTHDRDSTOPTS))
918 return (0);
919 bcopy(ipp->ipp_rthdrdstopts, ptr,
920 ipp->ipp_rthdrdstoptslen);
921 return (ipp->ipp_rthdrdstoptslen);
922 case IPV6_RTHDR:
923 if (!(ipp->ipp_fields & IPPF_RTHDR))
924 return (0);
925 bcopy(ipp->ipp_rthdr, ptr, ipp->ipp_rthdrlen);
926 return (ipp->ipp_rthdrlen);
927 case IPV6_DSTOPTS:
928 if (!(ipp->ipp_fields & IPPF_DSTOPTS))
929 return (0);
930 bcopy(ipp->ipp_dstopts, ptr, ipp->ipp_dstoptslen);
931 return (ipp->ipp_dstoptslen);
932 case IPV6_PATHMTU:
933 return (ip_fill_mtuinfo(connp, ixa,
934 (struct ip6_mtuinfo *)ptr));
935 case IPV6_SEC_OPT:
936 return (ipsec_req_from_conn(connp, (ipsec_req_t *)ptr,
937 IPSEC_AF_V6));
938 case IPV6_SRC_PREFERENCES:
939 return (ip6_get_src_preferences(ixa, (uint32_t *)ptr));
940 case IPV6_DONTFRAG:
941 *i1 = (ixa->ixa_flags & IXAF_DONTFRAG) != 0;
942 return (sizeof (int));
943 case IPV6_USE_MIN_MTU:
944 if (ixa->ixa_flags & IXAF_USE_MIN_MTU)
945 *i1 = ixa->ixa_use_min_mtu;
946 else
947 *i1 = IPV6_USE_MIN_MTU_MULTICAST;
948 break;
949 case IPV6_V6ONLY:
950 *i1 = connp->conn_ipv6_v6only;
951 return (sizeof (int));
952 default:
953 return (-1);
954 }
955 break;
956 case IPPROTO_UDP:
957 switch (name) {
958 case UDP_ANONPRIVBIND:
959 *i1 = connp->conn_anon_priv_bind;
960 break;
961 case UDP_EXCLBIND:
962 *i1 = connp->conn_exclbind ? UDP_EXCLBIND : 0;
963 break;
964 default:
965 return (-1);
966 }
967 break;
968 case IPPROTO_TCP:
969 switch (name) {
970 case TCP_RECVDSTADDR:
971 *i1 = connp->conn_recv_ancillary.crb_recvdstaddr;
972 break;
973 case TCP_ANONPRIVBIND:
974 *i1 = connp->conn_anon_priv_bind;
975 break;
976 case TCP_EXCLBIND:
977 *i1 = connp->conn_exclbind ? TCP_EXCLBIND : 0;
978 break;
979 default:
980 return (-1);
981 }
982 break;
983 default:
984 return (-1);
985 }
986 return (sizeof (int));
987 }
988
989 static int conn_opt_set_socket(conn_opt_arg_t *coa, t_scalar_t name,
990 uint_t inlen, uchar_t *invalp, boolean_t checkonly, cred_t *cr);
991 static int conn_opt_set_ip(conn_opt_arg_t *coa, t_scalar_t name,
992 uint_t inlen, uchar_t *invalp, boolean_t checkonly, cred_t *cr);
993 static int conn_opt_set_ipv6(conn_opt_arg_t *coa, t_scalar_t name,
994 uint_t inlen, uchar_t *invalp, boolean_t checkonly, cred_t *cr);
995 static int conn_opt_set_udp(conn_opt_arg_t *coa, t_scalar_t name,
996 uint_t inlen, uchar_t *invalp, boolean_t checkonly, cred_t *cr);
997 static int conn_opt_set_tcp(conn_opt_arg_t *coa, t_scalar_t name,
998 uint_t inlen, uchar_t *invalp, boolean_t checkonly, cred_t *cr);
999
1000 /*
1001 * This routine sets the most common socket options including some
1002 * that are transport/ULP specific.
1003 * It returns errno or zero.
1004 *
1005 * For fixed length options, there is no sanity check
1006 * of passed in length is done. It is assumed *_optcom_req()
1007 * routines do the right thing.
1008 */
1009 int
1010 conn_opt_set(conn_opt_arg_t *coa, t_scalar_t level, t_scalar_t name,
1011 uint_t inlen, uchar_t *invalp, boolean_t checkonly, cred_t *cr)
1012 {
1013 ASSERT(MUTEX_NOT_HELD(&coa->coa_connp->conn_lock));
1014
1015 /* We have different functions for different levels */
1016 switch (level) {
1017 case SOL_SOCKET:
1018 return (conn_opt_set_socket(coa, name, inlen, invalp,
1019 checkonly, cr));
1020 case IPPROTO_IP:
1021 return (conn_opt_set_ip(coa, name, inlen, invalp,
1022 checkonly, cr));
1023 case IPPROTO_IPV6:
1024 return (conn_opt_set_ipv6(coa, name, inlen, invalp,
1025 checkonly, cr));
1026 case IPPROTO_UDP:
1027 return (conn_opt_set_udp(coa, name, inlen, invalp,
1028 checkonly, cr));
1029 case IPPROTO_TCP:
1030 return (conn_opt_set_tcp(coa, name, inlen, invalp,
1031 checkonly, cr));
1032 default:
1033 return (0);
1034 }
1035 }
1036
1037 /*
1038 * Handle SOL_SOCKET
1039 * Note that we do not handle SO_PROTOTYPE here. The ULPs that support
1040 * it implement their own checks and setting of conn_proto.
1041 */
1042 /* ARGSUSED1 */
1043 static int
1044 conn_opt_set_socket(conn_opt_arg_t *coa, t_scalar_t name, uint_t inlen,
1045 uchar_t *invalp, boolean_t checkonly, cred_t *cr)
1046 {
1047 conn_t *connp = coa->coa_connp;
1048 ip_xmit_attr_t *ixa = coa->coa_ixa;
1049 int *i1 = (int *)invalp;
1050 boolean_t onoff = (*i1 == 0) ? 0 : 1;
1051
1052 switch (name) {
1053 case SO_ALLZONES:
1054 if (IPCL_IS_BOUND(connp))
1055 return (EINVAL);
1056 break;
1057 case SO_VRRP:
1058 if (secpolicy_ip_config(cr, checkonly) != 0)
1059 return (EACCES);
1060 break;
1061 case SO_MAC_EXEMPT:
1062 if (secpolicy_net_mac_aware(cr) != 0)
1063 return (EACCES);
1064 if (IPCL_IS_BOUND(connp))
1065 return (EINVAL);
1066 break;
1067 case SO_MAC_IMPLICIT:
1068 if (secpolicy_net_mac_implicit(cr) != 0)
1069 return (EACCES);
1070 break;
1071 }
1072 if (checkonly)
1073 return (0);
1074
1075 mutex_enter(&connp->conn_lock);
1076 /* Here we set the actual option value */
1077 switch (name) {
1078 case SO_DEBUG:
1079 connp->conn_debug = onoff;
1080 break;
1081 case SO_KEEPALIVE:
1082 connp->conn_keepalive = onoff;
1083 break;
1084 case SO_LINGER: {
1085 struct linger *lgr = (struct linger *)invalp;
1086
1087 if (lgr->l_onoff) {
1088 connp->conn_linger = 1;
1089 connp->conn_lingertime = lgr->l_linger;
1090 } else {
1091 connp->conn_linger = 0;
1092 connp->conn_lingertime = 0;
1093 }
1094 break;
1095 }
1096 case SO_OOBINLINE:
1097 connp->conn_oobinline = onoff;
1098 coa->coa_changed |= COA_OOBINLINE_CHANGED;
1099 break;
1100 case SO_REUSEADDR:
1101 connp->conn_reuseaddr = onoff;
1102 break;
1103 case SO_DONTROUTE:
1104 if (onoff)
1105 ixa->ixa_flags |= IXAF_DONTROUTE;
1106 else
1107 ixa->ixa_flags &= ~IXAF_DONTROUTE;
1108 coa->coa_changed |= COA_ROUTE_CHANGED;
1109 break;
1110 case SO_USELOOPBACK:
1111 connp->conn_useloopback = onoff;
1112 break;
1113 case SO_BROADCAST:
1114 connp->conn_broadcast = onoff;
1115 break;
1116 case SO_SNDBUF:
1117 /* ULP has range checked the value */
1118 connp->conn_sndbuf = *i1;
1119 coa->coa_changed |= COA_SNDBUF_CHANGED;
1120 break;
1121 case SO_RCVBUF:
1122 /* ULP has range checked the value */
1123 connp->conn_rcvbuf = *i1;
1124 coa->coa_changed |= COA_RCVBUF_CHANGED;
1125 break;
1126 case SO_RCVTIMEO:
1127 case SO_SNDTIMEO:
1128 /*
1129 * Pass these two options in order for third part
1130 * protocol usage.
1131 */
1132 break;
1133 case SO_DGRAM_ERRIND:
1134 connp->conn_dgram_errind = onoff;
1135 break;
1136 case SO_RECVUCRED:
1137 connp->conn_recv_ancillary.crb_recvucred = onoff;
1138 break;
1139 case SO_ALLZONES:
1140 connp->conn_allzones = onoff;
1141 coa->coa_changed |= COA_ROUTE_CHANGED;
1142 if (onoff)
1143 ixa->ixa_zoneid = ALL_ZONES;
1144 else
1145 ixa->ixa_zoneid = connp->conn_zoneid;
1146 break;
1147 case SO_TIMESTAMP:
1148 connp->conn_recv_ancillary.crb_timestamp = onoff;
1149 break;
1150 case SO_VRRP:
1151 connp->conn_isvrrp = onoff;
1152 break;
1153 case SO_ANON_MLP:
1154 connp->conn_anon_mlp = onoff;
1155 break;
1156 case SO_MAC_EXEMPT:
1157 connp->conn_mac_mode = onoff ?
1158 CONN_MAC_AWARE : CONN_MAC_DEFAULT;
1159 break;
1160 case SO_MAC_IMPLICIT:
1161 connp->conn_mac_mode = onoff ?
1162 CONN_MAC_IMPLICIT : CONN_MAC_DEFAULT;
1163 break;
1164 case SO_EXCLBIND:
1165 connp->conn_exclbind = onoff;
1166 break;
1167 }
1168 mutex_exit(&connp->conn_lock);
1169 return (0);
1170 }
1171
1172 /* Handle IPPROTO_IP */
1173 static int
1174 conn_opt_set_ip(conn_opt_arg_t *coa, t_scalar_t name, uint_t inlen,
1175 uchar_t *invalp, boolean_t checkonly, cred_t *cr)
1176 {
1177 conn_t *connp = coa->coa_connp;
1178 ip_xmit_attr_t *ixa = coa->coa_ixa;
1179 ip_pkt_t *ipp = coa->coa_ipp;
1180 int *i1 = (int *)invalp;
1181 boolean_t onoff = (*i1 == 0) ? 0 : 1;
1182 ipaddr_t addr = (ipaddr_t)*i1;
1183 uint_t ifindex;
1184 zoneid_t zoneid = IPCL_ZONEID(connp);
1185 ipif_t *ipif;
1186 ip_stack_t *ipst = connp->conn_netstack->netstack_ip;
1187 int error;
1188
1189 if (connp->conn_family != AF_INET)
1190 return (EINVAL);
1191
1192 ifindex = UINT_MAX;
1193 switch (name) {
1194 case IP_TTL:
1195 /* Don't allow zero */
1196 if (*i1 < 1 || *i1 > 255)
1197 return (EINVAL);
1198 break;
1199 case IP_MULTICAST_IF:
1200 if (addr == INADDR_ANY) {
1201 /* Clear */
1202 ifindex = 0;
1203 break;
1204 }
1205 ipif = ipif_lookup_addr(addr, NULL, zoneid, ipst);
1206 if (ipif == NULL)
1207 return (EHOSTUNREACH);
1208 /* not supported by the virtual network iface */
1209 if (IS_VNI(ipif->ipif_ill)) {
1210 ipif_refrele(ipif);
1211 return (EINVAL);
1212 }
1213 ifindex = ipif->ipif_ill->ill_phyint->phyint_ifindex;
1214 ipif_refrele(ipif);
1215 break;
1216 case IP_NEXTHOP: {
1217 ire_t *ire;
1218
1219 if (addr == INADDR_ANY) {
1220 /* Clear */
1221 break;
1222 }
1223 /* Verify that the next-hop is on-link */
1224 ire = ire_ftable_lookup_v4(addr, 0, 0, IRE_ONLINK, NULL, zoneid,
1225 NULL, MATCH_IRE_TYPE, 0, ipst, NULL);
1226 if (ire == NULL)
1227 return (EHOSTUNREACH);
1228 ire_refrele(ire);
1229 break;
1230 }
1231 case IP_OPTIONS:
1232 case T_IP_OPTIONS: {
1233 uint_t newlen;
1234
1235 if (ipp->ipp_fields & IPPF_LABEL_V4)
1236 newlen = inlen + (ipp->ipp_label_len_v4 + 3) & ~3;
1237 else
1238 newlen = inlen;
1239 if ((inlen & 0x3) || newlen > IP_MAX_OPT_LENGTH) {
1240 return (EINVAL);
1241 }
1242 break;
1243 }
1244 case IP_PKTINFO: {
1245 struct in_pktinfo *pktinfo;
1246
1247 /* Two different valid lengths */
1248 if (inlen != sizeof (int) &&
1249 inlen != sizeof (struct in_pktinfo))
1250 return (EINVAL);
1251 if (inlen == sizeof (int))
1252 break;
1253
1254 pktinfo = (struct in_pktinfo *)invalp;
1255 if (pktinfo->ipi_spec_dst.s_addr != INADDR_ANY) {
1256 switch (ip_laddr_verify_v4(pktinfo->ipi_spec_dst.s_addr,
1257 zoneid, ipst, B_FALSE)) {
1258 case IPVL_UNICAST_UP:
1259 case IPVL_UNICAST_DOWN:
1260 break;
1261 default:
1262 return (EADDRNOTAVAIL);
1263 }
1264 }
1265 if (!ip_xmit_ifindex_valid(pktinfo->ipi_ifindex, zoneid,
1266 B_FALSE, ipst))
1267 return (ENXIO);
1268 break;
1269 }
1270 case IP_BOUND_IF:
1271 ifindex = *(uint_t *)i1;
1272
1273 /* Just check it is ok. */
1274 if (!ip_xmit_ifindex_valid(ifindex, zoneid, B_FALSE, ipst))
1275 return (ENXIO);
1276 break;
1277 }
1278 if (checkonly)
1279 return (0);
1280
1281 /* Here we set the actual option value */
1282 /*
1283 * conn_lock protects the bitfields, and is used to
1284 * set the fields atomically. Not needed for ixa settings since
1285 * the caller has an exclusive copy of the ixa.
1286 * We can not hold conn_lock across the multicast options though.
1287 */
1288 switch (name) {
1289 case IP_OPTIONS:
1290 case T_IP_OPTIONS:
1291 /* Save options for use by IP. */
1292 mutex_enter(&connp->conn_lock);
1293 error = optcom_pkt_set(invalp, inlen,
1294 (uchar_t **)&ipp->ipp_ipv4_options,
1295 &ipp->ipp_ipv4_options_len);
1296 if (error != 0) {
1297 mutex_exit(&connp->conn_lock);
1298 return (error);
1299 }
1300 if (ipp->ipp_ipv4_options_len == 0) {
1301 ipp->ipp_fields &= ~IPPF_IPV4_OPTIONS;
1302 } else {
1303 ipp->ipp_fields |= IPPF_IPV4_OPTIONS;
1304 }
1305 mutex_exit(&connp->conn_lock);
1306 coa->coa_changed |= COA_HEADER_CHANGED;
1307 coa->coa_changed |= COA_WROFF_CHANGED;
1308 break;
1309
1310 case IP_TTL:
1311 mutex_enter(&connp->conn_lock);
1312 ipp->ipp_unicast_hops = *i1;
1313 mutex_exit(&connp->conn_lock);
1314 coa->coa_changed |= COA_HEADER_CHANGED;
1315 break;
1316 case IP_TOS:
1317 case T_IP_TOS:
1318 mutex_enter(&connp->conn_lock);
1319 if (*i1 == -1) {
1320 ipp->ipp_type_of_service = 0;
1321 } else {
1322 ipp->ipp_type_of_service = *i1;
1323 }
1324 mutex_exit(&connp->conn_lock);
1325 coa->coa_changed |= COA_HEADER_CHANGED;
1326 break;
1327 case IP_MULTICAST_IF:
1328 ixa->ixa_multicast_ifindex = ifindex;
1329 ixa->ixa_multicast_ifaddr = addr;
1330 coa->coa_changed |= COA_ROUTE_CHANGED;
1331 break;
1332 case IP_MULTICAST_TTL:
1333 ixa->ixa_multicast_ttl = *invalp;
1334 /* Handled automatically by ip_output */
1335 break;
1336 case IP_MULTICAST_LOOP:
1337 if (*invalp != 0)
1338 ixa->ixa_flags |= IXAF_MULTICAST_LOOP;
1339 else
1340 ixa->ixa_flags &= ~IXAF_MULTICAST_LOOP;
1341 /* Handled automatically by ip_output */
1342 break;
1343 case IP_RECVOPTS:
1344 mutex_enter(&connp->conn_lock);
1345 connp->conn_recv_ancillary.crb_recvopts = onoff;
1346 mutex_exit(&connp->conn_lock);
1347 break;
1348 case IP_RECVDSTADDR:
1349 mutex_enter(&connp->conn_lock);
1350 connp->conn_recv_ancillary.crb_recvdstaddr = onoff;
1351 mutex_exit(&connp->conn_lock);
1352 break;
1353 case IP_RECVIF:
1354 mutex_enter(&connp->conn_lock);
1355 connp->conn_recv_ancillary.crb_recvif = onoff;
1356 mutex_exit(&connp->conn_lock);
1357 break;
1358 case IP_RECVSLLA:
1359 mutex_enter(&connp->conn_lock);
1360 connp->conn_recv_ancillary.crb_recvslla = onoff;
1361 mutex_exit(&connp->conn_lock);
1362 break;
1363 case IP_RECVTTL:
1364 mutex_enter(&connp->conn_lock);
1365 connp->conn_recv_ancillary.crb_recvttl = onoff;
1366 mutex_exit(&connp->conn_lock);
1367 break;
1368 case IP_PKTINFO: {
1369 /*
1370 * This also handles IP_RECVPKTINFO.
1371 * IP_PKTINFO and IP_RECVPKTINFO have same value.
1372 * Differentiation is based on the size of the
1373 * argument passed in.
1374 */
1375 struct in_pktinfo *pktinfo;
1376
1377 if (inlen == sizeof (int)) {
1378 /* This is IP_RECVPKTINFO option. */
1379 mutex_enter(&connp->conn_lock);
1380 connp->conn_recv_ancillary.crb_ip_recvpktinfo =
1381 onoff;
1382 mutex_exit(&connp->conn_lock);
1383 break;
1384 }
1385
1386 /* This is IP_PKTINFO option. */
1387 mutex_enter(&connp->conn_lock);
1388 pktinfo = (struct in_pktinfo *)invalp;
1389 if (pktinfo->ipi_spec_dst.s_addr != INADDR_ANY) {
1390 ipp->ipp_fields |= IPPF_ADDR;
1391 IN6_INADDR_TO_V4MAPPED(&pktinfo->ipi_spec_dst,
1392 &ipp->ipp_addr);
1393 } else {
1394 ipp->ipp_fields &= ~IPPF_ADDR;
1395 ipp->ipp_addr = ipv6_all_zeros;
1396 }
1397 mutex_exit(&connp->conn_lock);
1398 ixa->ixa_ifindex = pktinfo->ipi_ifindex;
1399 coa->coa_changed |= COA_ROUTE_CHANGED;
1400 coa->coa_changed |= COA_HEADER_CHANGED;
1401 break;
1402 }
1403 case IP_DONTFRAG:
1404 if (onoff) {
1405 ixa->ixa_flags |= (IXAF_DONTFRAG | IXAF_PMTU_IPV4_DF);
1406 ixa->ixa_flags &= ~IXAF_PMTU_DISCOVERY;
1407 } else {
1408 ixa->ixa_flags &= ~(IXAF_DONTFRAG | IXAF_PMTU_IPV4_DF);
1409 ixa->ixa_flags |= IXAF_PMTU_DISCOVERY;
1410 }
1411 /* Need to redo ip_attr_connect */
1412 coa->coa_changed |= COA_ROUTE_CHANGED;
1413 break;
1414 case IP_ADD_MEMBERSHIP:
1415 case IP_DROP_MEMBERSHIP:
1416 case MCAST_JOIN_GROUP:
1417 case MCAST_LEAVE_GROUP:
1418 return (ip_opt_set_multicast_group(connp, name,
1419 invalp, B_FALSE, checkonly));
1420
1421 case IP_BLOCK_SOURCE:
1422 case IP_UNBLOCK_SOURCE:
1423 case IP_ADD_SOURCE_MEMBERSHIP:
1424 case IP_DROP_SOURCE_MEMBERSHIP:
1425 case MCAST_BLOCK_SOURCE:
1426 case MCAST_UNBLOCK_SOURCE:
1427 case MCAST_JOIN_SOURCE_GROUP:
1428 case MCAST_LEAVE_SOURCE_GROUP:
1429 return (ip_opt_set_multicast_sources(connp, name,
1430 invalp, B_FALSE, checkonly));
1431
1432 case IP_SEC_OPT:
1433 mutex_enter(&connp->conn_lock);
1434 error = ipsec_set_req(cr, connp, (ipsec_req_t *)invalp);
1435 mutex_exit(&connp->conn_lock);
1436 if (error != 0) {
1437 return (error);
1438 }
1439 /* This is an IPsec policy change - redo ip_attr_connect */
1440 coa->coa_changed |= COA_ROUTE_CHANGED;
1441 break;
1442 case IP_NEXTHOP:
1443 ixa->ixa_nexthop_v4 = addr;
1444 if (addr != INADDR_ANY)
1445 ixa->ixa_flags |= IXAF_NEXTHOP_SET;
1446 else
1447 ixa->ixa_flags &= ~IXAF_NEXTHOP_SET;
1448 coa->coa_changed |= COA_ROUTE_CHANGED;
1449 break;
1450
1451 case IP_BOUND_IF:
1452 ixa->ixa_ifindex = ifindex; /* Send */
1453 mutex_enter(&connp->conn_lock);
1454 connp->conn_incoming_ifindex = ifindex; /* Receive */
1455 connp->conn_bound_if = ifindex; /* getsockopt */
1456 mutex_exit(&connp->conn_lock);
1457 coa->coa_changed |= COA_ROUTE_CHANGED;
1458 break;
1459 case IP_UNSPEC_SRC:
1460 mutex_enter(&connp->conn_lock);
1461 connp->conn_unspec_src = onoff;
1462 if (onoff)
1463 ixa->ixa_flags &= ~IXAF_VERIFY_SOURCE;
1464 else
1465 ixa->ixa_flags |= IXAF_VERIFY_SOURCE;
1466
1467 mutex_exit(&connp->conn_lock);
1468 break;
1469 case IP_BROADCAST_TTL:
1470 ixa->ixa_broadcast_ttl = *invalp;
1471 ixa->ixa_flags |= IXAF_BROADCAST_TTL_SET;
1472 /* Handled automatically by ip_output */
1473 break;
1474 case MRT_INIT:
1475 case MRT_DONE:
1476 case MRT_ADD_VIF:
1477 case MRT_DEL_VIF:
1478 case MRT_ADD_MFC:
1479 case MRT_DEL_MFC:
1480 case MRT_ASSERT:
1481 if ((error = secpolicy_ip_config(cr, B_FALSE)) != 0) {
1482 return (error);
1483 }
1484 error = ip_mrouter_set((int)name, connp, checkonly,
1485 (uchar_t *)invalp, inlen);
1486 if (error) {
1487 return (error);
1488 }
1489 return (0);
1490
1491 }
1492 return (0);
1493 }
1494
1495 /* Handle IPPROTO_IPV6 */
1496 static int
1497 conn_opt_set_ipv6(conn_opt_arg_t *coa, t_scalar_t name, uint_t inlen,
1498 uchar_t *invalp, boolean_t checkonly, cred_t *cr)
1499 {
1500 conn_t *connp = coa->coa_connp;
1501 ip_xmit_attr_t *ixa = coa->coa_ixa;
1502 ip_pkt_t *ipp = coa->coa_ipp;
1503 int *i1 = (int *)invalp;
1504 boolean_t onoff = (*i1 == 0) ? 0 : 1;
1505 uint_t ifindex;
1506 zoneid_t zoneid = IPCL_ZONEID(connp);
1507 ip_stack_t *ipst = connp->conn_netstack->netstack_ip;
1508 int error;
1509
1510 if (connp->conn_family != AF_INET6)
1511 return (EINVAL);
1512
1513 ifindex = UINT_MAX;
1514 switch (name) {
1515 case IPV6_MULTICAST_IF:
1516 /*
1517 * The only possible error is EINVAL.
1518 * We call this option on both V4 and V6
1519 * If both fail, then this call returns
1520 * EINVAL. If at least one of them succeeds we
1521 * return success.
1522 */
1523 ifindex = *(uint_t *)i1;
1524
1525 if (!ip_xmit_ifindex_valid(ifindex, zoneid, B_TRUE, ipst) &&
1526 !ip_xmit_ifindex_valid(ifindex, zoneid, B_FALSE, ipst))
1527 return (EINVAL);
1528 break;
1529 case IPV6_UNICAST_HOPS:
1530 /* Don't allow zero. -1 means to use default */
1531 if (*i1 < -1 || *i1 == 0 || *i1 > IPV6_MAX_HOPS)
1532 return (EINVAL);
1533 break;
1534 case IPV6_MULTICAST_HOPS:
1535 /* -1 means use default */
1536 if (*i1 < -1 || *i1 > IPV6_MAX_HOPS)
1537 return (EINVAL);
1538 break;
1539 case IPV6_MULTICAST_LOOP:
1540 if (*i1 != 0 && *i1 != 1)
1541 return (EINVAL);
1542 break;
1543 case IPV6_BOUND_IF:
1544 ifindex = *(uint_t *)i1;
1545
1546 if (!ip_xmit_ifindex_valid(ifindex, zoneid, B_TRUE, ipst))
1547 return (ENXIO);
1548 break;
1549 case IPV6_PKTINFO: {
1550 struct in6_pktinfo *pkti;
1551 boolean_t isv6;
1552
1553 if (inlen != 0 && inlen != sizeof (struct in6_pktinfo))
1554 return (EINVAL);
1555 if (inlen == 0)
1556 break; /* Clear values below */
1557
1558 /*
1559 * Verify the source address and ifindex. Privileged users
1560 * can use any source address.
1561 */
1562 pkti = (struct in6_pktinfo *)invalp;
1563
1564 /*
1565 * For link-local addresses we use the ipi6_ifindex when
1566 * we verify the local address.
1567 * If net_rawaccess then any source address can be used.
1568 */
1569 if (!IN6_IS_ADDR_UNSPECIFIED(&pkti->ipi6_addr) &&
1570 secpolicy_net_rawaccess(cr) != 0) {
1571 uint_t scopeid = 0;
1572 in6_addr_t *v6src = &pkti->ipi6_addr;
1573 ipaddr_t v4src;
1574 ip_laddr_t laddr_type = IPVL_UNICAST_UP;
1575
1576 if (IN6_IS_ADDR_V4MAPPED(v6src)) {
1577 IN6_V4MAPPED_TO_IPADDR(v6src, v4src);
1578 if (v4src != INADDR_ANY) {
1579 laddr_type = ip_laddr_verify_v4(v4src,
1580 zoneid, ipst, B_FALSE);
1581 }
1582 } else {
1583 if (IN6_IS_ADDR_LINKSCOPE(v6src))
1584 scopeid = pkti->ipi6_ifindex;
1585
1586 laddr_type = ip_laddr_verify_v6(v6src, zoneid,
1587 ipst, B_FALSE, scopeid);
1588 }
1589 switch (laddr_type) {
1590 case IPVL_UNICAST_UP:
1591 case IPVL_UNICAST_DOWN:
1592 break;
1593 default:
1594 return (EADDRNOTAVAIL);
1595 }
1596 ixa->ixa_flags |= IXAF_VERIFY_SOURCE;
1597 } else if (!IN6_IS_ADDR_UNSPECIFIED(&pkti->ipi6_addr)) {
1598 /* Allow any source */
1599 ixa->ixa_flags &= ~IXAF_VERIFY_SOURCE;
1600 }
1601 isv6 = !(IN6_IS_ADDR_V4MAPPED(&pkti->ipi6_addr));
1602 if (!ip_xmit_ifindex_valid(pkti->ipi6_ifindex, zoneid, isv6,
1603 ipst))
1604 return (ENXIO);
1605 break;
1606 }
1607 case IPV6_HOPLIMIT:
1608 /* It is only allowed as ancilary data */
1609 if (!coa->coa_ancillary)
1610 return (EINVAL);
1611
1612 if (inlen != 0 && inlen != sizeof (int))
1613 return (EINVAL);
1614 if (inlen == sizeof (int)) {
1615 if (*i1 > 255 || *i1 < -1 || *i1 == 0)
1616 return (EINVAL);
1617 }
1618 break;
1619 case IPV6_TCLASS:
1620 if (inlen != 0 && inlen != sizeof (int))
1621 return (EINVAL);
1622 if (inlen == sizeof (int)) {
1623 if (*i1 > 255 || *i1 < -1)
1624 return (EINVAL);
1625 }
1626 break;
1627 case IPV6_NEXTHOP:
1628 if (inlen != 0 && inlen != sizeof (sin6_t))
1629 return (EINVAL);
1630 if (inlen == sizeof (sin6_t)) {
1631 sin6_t *sin6 = (sin6_t *)invalp;
1632 ire_t *ire;
1633
1634 if (sin6->sin6_family != AF_INET6)
1635 return (EAFNOSUPPORT);
1636 if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr))
1637 return (EADDRNOTAVAIL);
1638
1639 /* Verify that the next-hop is on-link */
1640 ire = ire_ftable_lookup_v6(&sin6->sin6_addr,
1641 0, 0, IRE_ONLINK, NULL, zoneid,
1642 NULL, MATCH_IRE_TYPE, 0, ipst, NULL);
1643 if (ire == NULL)
1644 return (EHOSTUNREACH);
1645 ire_refrele(ire);
1646 break;
1647 }
1648 break;
1649 case IPV6_RTHDR:
1650 case IPV6_DSTOPTS:
1651 case IPV6_RTHDRDSTOPTS:
1652 case IPV6_HOPOPTS: {
1653 /* All have the length field in the same place */
1654 ip6_hbh_t *hopts = (ip6_hbh_t *)invalp;
1655 /*
1656 * Sanity checks - minimum size, size a multiple of
1657 * eight bytes, and matching size passed in.
1658 */
1659 if (inlen != 0 &&
1660 inlen != (8 * (hopts->ip6h_len + 1)))
1661 return (EINVAL);
1662 break;
1663 }
1664 case IPV6_PATHMTU:
1665 /* Can't be set */
1666 return (EINVAL);
1667
1668 case IPV6_USE_MIN_MTU:
1669 if (inlen != sizeof (int))
1670 return (EINVAL);
1671 if (*i1 < -1 || *i1 > 1)
1672 return (EINVAL);
1673 break;
1674 case IPV6_SRC_PREFERENCES:
1675 if (inlen != sizeof (uint32_t))
1676 return (EINVAL);
1677 break;
1678 case IPV6_V6ONLY:
1679 if (*i1 < 0 || *i1 > 1) {
1680 return (EINVAL);
1681 }
1682 break;
1683 }
1684 if (checkonly)
1685 return (0);
1686
1687 /* Here we set the actual option value */
1688 /*
1689 * conn_lock protects the bitfields, and is used to
1690 * set the fields atomically. Not needed for ixa settings since
1691 * the caller has an exclusive copy of the ixa.
1692 * We can not hold conn_lock across the multicast options though.
1693 */
1694 ASSERT(MUTEX_NOT_HELD(&coa->coa_connp->conn_lock));
1695 switch (name) {
1696 case IPV6_MULTICAST_IF:
1697 ixa->ixa_multicast_ifindex = ifindex;
1698 /* Need to redo ip_attr_connect */
1699 coa->coa_changed |= COA_ROUTE_CHANGED;
1700 break;
1701 case IPV6_UNICAST_HOPS:
1702 /* -1 means use default */
1703 mutex_enter(&connp->conn_lock);
1704 if (*i1 == -1) {
1705 ipp->ipp_unicast_hops = connp->conn_default_ttl;
1706 } else {
1707 ipp->ipp_unicast_hops = (uint8_t)*i1;
1708 }
1709 mutex_exit(&connp->conn_lock);
1710 coa->coa_changed |= COA_HEADER_CHANGED;
1711 break;
1712 case IPV6_MULTICAST_HOPS:
1713 /* -1 means use default */
1714 if (*i1 == -1) {
1715 ixa->ixa_multicast_ttl = IP_DEFAULT_MULTICAST_TTL;
1716 } else {
1717 ixa->ixa_multicast_ttl = (uint8_t)*i1;
1718 }
1719 /* Handled automatically by ip_output */
1720 break;
1721 case IPV6_MULTICAST_LOOP:
1722 if (*i1 != 0)
1723 ixa->ixa_flags |= IXAF_MULTICAST_LOOP;
1724 else
1725 ixa->ixa_flags &= ~IXAF_MULTICAST_LOOP;
1726 /* Handled automatically by ip_output */
1727 break;
1728 case IPV6_JOIN_GROUP:
1729 case IPV6_LEAVE_GROUP:
1730 case MCAST_JOIN_GROUP:
1731 case MCAST_LEAVE_GROUP:
1732 return (ip_opt_set_multicast_group(connp, name,
1733 invalp, B_TRUE, checkonly));
1734
1735 case MCAST_BLOCK_SOURCE:
1736 case MCAST_UNBLOCK_SOURCE:
1737 case MCAST_JOIN_SOURCE_GROUP:
1738 case MCAST_LEAVE_SOURCE_GROUP:
1739 return (ip_opt_set_multicast_sources(connp, name,
1740 invalp, B_TRUE, checkonly));
1741
1742 case IPV6_BOUND_IF:
1743 ixa->ixa_ifindex = ifindex; /* Send */
1744 mutex_enter(&connp->conn_lock);
1745 connp->conn_incoming_ifindex = ifindex; /* Receive */
1746 connp->conn_bound_if = ifindex; /* getsockopt */
1747 mutex_exit(&connp->conn_lock);
1748 coa->coa_changed |= COA_ROUTE_CHANGED;
1749 break;
1750 case IPV6_UNSPEC_SRC:
1751 mutex_enter(&connp->conn_lock);
1752 connp->conn_unspec_src = onoff;
1753 if (onoff)
1754 ixa->ixa_flags &= ~IXAF_VERIFY_SOURCE;
1755 else
1756 ixa->ixa_flags |= IXAF_VERIFY_SOURCE;
1757 mutex_exit(&connp->conn_lock);
1758 break;
1759 case IPV6_RECVPKTINFO:
1760 mutex_enter(&connp->conn_lock);
1761 connp->conn_recv_ancillary.crb_ip_recvpktinfo = onoff;
1762 mutex_exit(&connp->conn_lock);
1763 break;
1764 case IPV6_RECVTCLASS:
1765 mutex_enter(&connp->conn_lock);
1766 connp->conn_recv_ancillary.crb_ipv6_recvtclass = onoff;
1767 mutex_exit(&connp->conn_lock);
1768 break;
1769 case IPV6_RECVPATHMTU:
1770 mutex_enter(&connp->conn_lock);
1771 connp->conn_ipv6_recvpathmtu = onoff;
1772 mutex_exit(&connp->conn_lock);
1773 break;
1774 case IPV6_RECVHOPLIMIT:
1775 mutex_enter(&connp->conn_lock);
1776 connp->conn_recv_ancillary.crb_ipv6_recvhoplimit =
1777 onoff;
1778 mutex_exit(&connp->conn_lock);
1779 break;
1780 case IPV6_RECVHOPOPTS:
1781 mutex_enter(&connp->conn_lock);
1782 connp->conn_recv_ancillary.crb_ipv6_recvhopopts = onoff;
1783 mutex_exit(&connp->conn_lock);
1784 break;
1785 case IPV6_RECVDSTOPTS:
1786 mutex_enter(&connp->conn_lock);
1787 connp->conn_recv_ancillary.crb_ipv6_recvdstopts = onoff;
1788 mutex_exit(&connp->conn_lock);
1789 break;
1790 case _OLD_IPV6_RECVDSTOPTS:
1791 mutex_enter(&connp->conn_lock);
1792 connp->conn_recv_ancillary.crb_old_ipv6_recvdstopts =
1793 onoff;
1794 mutex_exit(&connp->conn_lock);
1795 break;
1796 case IPV6_RECVRTHDRDSTOPTS:
1797 mutex_enter(&connp->conn_lock);
1798 connp->conn_recv_ancillary.crb_ipv6_recvrthdrdstopts =
1799 onoff;
1800 mutex_exit(&connp->conn_lock);
1801 break;
1802 case IPV6_RECVRTHDR:
1803 mutex_enter(&connp->conn_lock);
1804 connp->conn_recv_ancillary.crb_ipv6_recvrthdr = onoff;
1805 mutex_exit(&connp->conn_lock);
1806 break;
1807 case IPV6_PKTINFO:
1808 mutex_enter(&connp->conn_lock);
1809 if (inlen == 0) {
1810 ipp->ipp_fields &= ~IPPF_ADDR;
1811 ipp->ipp_addr = ipv6_all_zeros;
1812 ixa->ixa_ifindex = 0;
1813 } else {
1814 struct in6_pktinfo *pkti;
1815
1816 pkti = (struct in6_pktinfo *)invalp;
1817 ipp->ipp_addr = pkti->ipi6_addr;
1818 if (!IN6_IS_ADDR_UNSPECIFIED(&ipp->ipp_addr))
1819 ipp->ipp_fields |= IPPF_ADDR;
1820 else
1821 ipp->ipp_fields &= ~IPPF_ADDR;
1822 ixa->ixa_ifindex = pkti->ipi6_ifindex;
1823 }
1824 mutex_exit(&connp->conn_lock);
1825 /* Source and ifindex might have changed */
1826 coa->coa_changed |= COA_HEADER_CHANGED;
1827 coa->coa_changed |= COA_ROUTE_CHANGED;
1828 break;
1829 case IPV6_HOPLIMIT:
1830 mutex_enter(&connp->conn_lock);
1831 if (inlen == 0 || *i1 == -1) {
1832 /* Revert to default */
1833 ipp->ipp_fields &= ~IPPF_HOPLIMIT;
1834 ixa->ixa_flags &= ~IXAF_NO_TTL_CHANGE;
1835 } else {
1836 ipp->ipp_hoplimit = *i1;
1837 ipp->ipp_fields |= IPPF_HOPLIMIT;
1838 /* Ensure that it sticks for multicast packets */
1839 ixa->ixa_flags |= IXAF_NO_TTL_CHANGE;
1840 }
1841 mutex_exit(&connp->conn_lock);
1842 coa->coa_changed |= COA_HEADER_CHANGED;
1843 break;
1844 case IPV6_TCLASS:
1845 /*
1846 * IPV6_TCLASS accepts -1 as use kernel default
1847 * and [0, 255] as the actualy traffic class.
1848 */
1849 mutex_enter(&connp->conn_lock);
1850 if (inlen == 0 || *i1 == -1) {
1851 ipp->ipp_tclass = 0;
1852 ipp->ipp_fields &= ~IPPF_TCLASS;
1853 } else {
1854 ipp->ipp_tclass = *i1;
1855 ipp->ipp_fields |= IPPF_TCLASS;
1856 }
1857 mutex_exit(&connp->conn_lock);
1858 coa->coa_changed |= COA_HEADER_CHANGED;
1859 break;
1860 case IPV6_NEXTHOP:
1861 if (inlen == 0) {
1862 ixa->ixa_flags &= ~IXAF_NEXTHOP_SET;
1863 } else {
1864 sin6_t *sin6 = (sin6_t *)invalp;
1865
1866 ixa->ixa_nexthop_v6 = sin6->sin6_addr;
1867 if (!IN6_IS_ADDR_UNSPECIFIED(&ixa->ixa_nexthop_v6))
1868 ixa->ixa_flags |= IXAF_NEXTHOP_SET;
1869 else
1870 ixa->ixa_flags &= ~IXAF_NEXTHOP_SET;
1871 }
1872 coa->coa_changed |= COA_ROUTE_CHANGED;
1873 break;
1874 case IPV6_HOPOPTS:
1875 mutex_enter(&connp->conn_lock);
1876 error = optcom_pkt_set(invalp, inlen,
1877 (uchar_t **)&ipp->ipp_hopopts, &ipp->ipp_hopoptslen);
1878 if (error != 0) {
1879 mutex_exit(&connp->conn_lock);
1880 return (error);
1881 }
1882 if (ipp->ipp_hopoptslen == 0) {
1883 ipp->ipp_fields &= ~IPPF_HOPOPTS;
1884 } else {
1885 ipp->ipp_fields |= IPPF_HOPOPTS;
1886 }
1887 mutex_exit(&connp->conn_lock);
1888 coa->coa_changed |= COA_HEADER_CHANGED;
1889 coa->coa_changed |= COA_WROFF_CHANGED;
1890 break;
1891 case IPV6_RTHDRDSTOPTS:
1892 mutex_enter(&connp->conn_lock);
1893 error = optcom_pkt_set(invalp, inlen,
1894 (uchar_t **)&ipp->ipp_rthdrdstopts,
1895 &ipp->ipp_rthdrdstoptslen);
1896 if (error != 0) {
1897 mutex_exit(&connp->conn_lock);
1898 return (error);
1899 }
1900 if (ipp->ipp_rthdrdstoptslen == 0) {
1901 ipp->ipp_fields &= ~IPPF_RTHDRDSTOPTS;
1902 } else {
1903 ipp->ipp_fields |= IPPF_RTHDRDSTOPTS;
1904 }
1905 mutex_exit(&connp->conn_lock);
1906 coa->coa_changed |= COA_HEADER_CHANGED;
1907 coa->coa_changed |= COA_WROFF_CHANGED;
1908 break;
1909 case IPV6_DSTOPTS:
1910 mutex_enter(&connp->conn_lock);
1911 error = optcom_pkt_set(invalp, inlen,
1912 (uchar_t **)&ipp->ipp_dstopts, &ipp->ipp_dstoptslen);
1913 if (error != 0) {
1914 mutex_exit(&connp->conn_lock);
1915 return (error);
1916 }
1917 if (ipp->ipp_dstoptslen == 0) {
1918 ipp->ipp_fields &= ~IPPF_DSTOPTS;
1919 } else {
1920 ipp->ipp_fields |= IPPF_DSTOPTS;
1921 }
1922 mutex_exit(&connp->conn_lock);
1923 coa->coa_changed |= COA_HEADER_CHANGED;
1924 coa->coa_changed |= COA_WROFF_CHANGED;
1925 break;
1926 case IPV6_RTHDR:
1927 mutex_enter(&connp->conn_lock);
1928 error = optcom_pkt_set(invalp, inlen,
1929 (uchar_t **)&ipp->ipp_rthdr, &ipp->ipp_rthdrlen);
1930 if (error != 0) {
1931 mutex_exit(&connp->conn_lock);
1932 return (error);
1933 }
1934 if (ipp->ipp_rthdrlen == 0) {
1935 ipp->ipp_fields &= ~IPPF_RTHDR;
1936 } else {
1937 ipp->ipp_fields |= IPPF_RTHDR;
1938 }
1939 mutex_exit(&connp->conn_lock);
1940 coa->coa_changed |= COA_HEADER_CHANGED;
1941 coa->coa_changed |= COA_WROFF_CHANGED;
1942 break;
1943
1944 case IPV6_DONTFRAG:
1945 if (onoff) {
1946 ixa->ixa_flags |= IXAF_DONTFRAG;
1947 ixa->ixa_flags &= ~IXAF_PMTU_DISCOVERY;
1948 } else {
1949 ixa->ixa_flags &= ~IXAF_DONTFRAG;
1950 ixa->ixa_flags |= IXAF_PMTU_DISCOVERY;
1951 }
1952 /* Need to redo ip_attr_connect */
1953 coa->coa_changed |= COA_ROUTE_CHANGED;
1954 break;
1955
1956 case IPV6_USE_MIN_MTU:
1957 ixa->ixa_flags |= IXAF_USE_MIN_MTU;
1958 ixa->ixa_use_min_mtu = *i1;
1959 /* Need to redo ip_attr_connect */
1960 coa->coa_changed |= COA_ROUTE_CHANGED;
1961 break;
1962
1963 case IPV6_SEC_OPT:
1964 mutex_enter(&connp->conn_lock);
1965 error = ipsec_set_req(cr, connp, (ipsec_req_t *)invalp);
1966 mutex_exit(&connp->conn_lock);
1967 if (error != 0) {
1968 return (error);
1969 }
1970 /* This is an IPsec policy change - redo ip_attr_connect */
1971 coa->coa_changed |= COA_ROUTE_CHANGED;
1972 break;
1973 case IPV6_SRC_PREFERENCES:
1974 /*
1975 * This socket option only affects connected
1976 * sockets that haven't already bound to a specific
1977 * IPv6 address. In other words, sockets that
1978 * don't call bind() with an address other than the
1979 * unspecified address and that call connect().
1980 * ip_set_destination_v6() passes these preferences
1981 * to the ipif_select_source_v6() function.
1982 */
1983 mutex_enter(&connp->conn_lock);
1984 error = ip6_set_src_preferences(ixa, *(uint32_t *)invalp);
1985 mutex_exit(&connp->conn_lock);
1986 if (error != 0) {
1987 return (error);
1988 }
1989 break;
1990 case IPV6_V6ONLY:
1991 mutex_enter(&connp->conn_lock);
1992 connp->conn_ipv6_v6only = onoff;
1993 mutex_exit(&connp->conn_lock);
1994 break;
1995 }
1996 return (0);
1997 }
1998
1999 /* Handle IPPROTO_UDP */
2000 /* ARGSUSED1 */
2001 static int
2002 conn_opt_set_udp(conn_opt_arg_t *coa, t_scalar_t name, uint_t inlen,
2003 uchar_t *invalp, boolean_t checkonly, cred_t *cr)
2004 {
2005 conn_t *connp = coa->coa_connp;
2006 int *i1 = (int *)invalp;
2007 boolean_t onoff = (*i1 == 0) ? 0 : 1;
2008 int error;
2009
2010 switch (name) {
2011 case UDP_ANONPRIVBIND:
2012 if ((error = secpolicy_net_privaddr(cr, 0, IPPROTO_UDP)) != 0) {
2013 return (error);
2014 }
2015 break;
2016 }
2017 if (checkonly)
2018 return (0);
2019
2020 /* Here we set the actual option value */
2021 mutex_enter(&connp->conn_lock);
2022 switch (name) {
2023 case UDP_ANONPRIVBIND:
2024 connp->conn_anon_priv_bind = onoff;
2025 break;
2026 case UDP_EXCLBIND:
2027 connp->conn_exclbind = onoff;
2028 break;
2029 }
2030 mutex_exit(&connp->conn_lock);
2031 return (0);
2032 }
2033
2034 /* Handle IPPROTO_TCP */
2035 /* ARGSUSED1 */
2036 static int
2037 conn_opt_set_tcp(conn_opt_arg_t *coa, t_scalar_t name, uint_t inlen,
2038 uchar_t *invalp, boolean_t checkonly, cred_t *cr)
2039 {
2040 conn_t *connp = coa->coa_connp;
2041 int *i1 = (int *)invalp;
2042 boolean_t onoff = (*i1 == 0) ? 0 : 1;
2043 int error;
2044
2045 switch (name) {
2046 case TCP_ANONPRIVBIND:
2047 if ((error = secpolicy_net_privaddr(cr, 0, IPPROTO_TCP)) != 0) {
2048 return (error);
2049 }
2050 break;
2051 }
2052 if (checkonly)
2053 return (0);
2054
2055 /* Here we set the actual option value */
2056 mutex_enter(&connp->conn_lock);
2057 switch (name) {
2058 case TCP_ANONPRIVBIND:
2059 connp->conn_anon_priv_bind = onoff;
2060 break;
2061 case TCP_EXCLBIND:
2062 connp->conn_exclbind = onoff;
2063 break;
2064 case TCP_RECVDSTADDR:
2065 connp->conn_recv_ancillary.crb_recvdstaddr = onoff;
2066 break;
2067 }
2068 mutex_exit(&connp->conn_lock);
2069 return (0);
2070 }
2071
2072 int
2073 conn_getsockname(conn_t *connp, struct sockaddr *sa, uint_t *salenp)
2074 {
2075 sin_t *sin;
2076 sin6_t *sin6;
2077
2078 if (connp->conn_family == AF_INET) {
2079 if (*salenp < sizeof (sin_t))
2080 return (EINVAL);
2081
2082 *salenp = sizeof (sin_t);
2083 /* Fill zeroes and then initialize non-zero fields */
2084 sin = (sin_t *)sa;
2085 *sin = sin_null;
2086 sin->sin_family = AF_INET;
2087 if (!IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_saddr_v6) &&
2088 !IN6_IS_ADDR_UNSPECIFIED(&connp->conn_saddr_v6)) {
2089 sin->sin_addr.s_addr = connp->conn_saddr_v4;
2090 } else {
2091 /*
2092 * INADDR_ANY
2093 * conn_saddr is not set, we might be bound to
2094 * broadcast/multicast. Use conn_bound_addr as
2095 * local address instead (that could
2096 * also still be INADDR_ANY)
2097 */
2098 sin->sin_addr.s_addr = connp->conn_bound_addr_v4;
2099 }
2100 sin->sin_port = connp->conn_lport;
2101 } else {
2102 if (*salenp < sizeof (sin6_t))
2103 return (EINVAL);
2104
2105 *salenp = sizeof (sin6_t);
2106 /* Fill zeroes and then initialize non-zero fields */
2107 sin6 = (sin6_t *)sa;
2108 *sin6 = sin6_null;
2109 sin6->sin6_family = AF_INET6;
2110 if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_saddr_v6)) {
2111 sin6->sin6_addr = connp->conn_saddr_v6;
2112 } else {
2113 /*
2114 * conn_saddr is not set, we might be bound to
2115 * broadcast/multicast. Use conn_bound_addr as
2116 * local address instead (which could
2117 * also still be unspecified)
2118 */
2119 sin6->sin6_addr = connp->conn_bound_addr_v6;
2120 }
2121 sin6->sin6_port = connp->conn_lport;
2122 if (IN6_IS_ADDR_LINKSCOPE(&sin6->sin6_addr) &&
2123 (connp->conn_ixa->ixa_flags & IXAF_SCOPEID_SET))
2124 sin6->sin6_scope_id = connp->conn_ixa->ixa_scopeid;
2125 }
2126 return (0);
2127 }
2128
2129 int
2130 conn_getpeername(conn_t *connp, struct sockaddr *sa, uint_t *salenp)
2131 {
2132 struct sockaddr_in *sin;
2133 struct sockaddr_in6 *sin6;
2134
2135 if (connp->conn_family == AF_INET) {
2136 if (*salenp < sizeof (sin_t))
2137 return (EINVAL);
2138
2139 *salenp = sizeof (sin_t);
2140 /* initialize */
2141 sin = (sin_t *)sa;
2142 *sin = sin_null;
2143 sin->sin_family = AF_INET;
2144 sin->sin_addr.s_addr = connp->conn_faddr_v4;
2145 sin->sin_port = connp->conn_fport;
2146 } else {
2147 if (*salenp < sizeof (sin6_t))
2148 return (EINVAL);
2149
2150 *salenp = sizeof (sin6_t);
2151 /* initialize */
2152 sin6 = (sin6_t *)sa;
2153 *sin6 = sin6_null;
2154 sin6->sin6_family = AF_INET6;
2155 sin6->sin6_addr = connp->conn_faddr_v6;
2156 sin6->sin6_port = connp->conn_fport;
2157 sin6->sin6_flowinfo = connp->conn_flowinfo;
2158 if (IN6_IS_ADDR_LINKSCOPE(&sin6->sin6_addr) &&
2159 (connp->conn_ixa->ixa_flags & IXAF_SCOPEID_SET))
2160 sin6->sin6_scope_id = connp->conn_ixa->ixa_scopeid;
2161 }
2162 return (0);
2163 }
2164
2165 static uint32_t cksum_massage_options_v4(ipha_t *, netstack_t *);
2166 static uint32_t cksum_massage_options_v6(ip6_t *, uint_t, netstack_t *);
2167
2168 /*
2169 * Allocate and fill in conn_ht_iphc based on the current information
2170 * in the conn.
2171 * Normally used when we bind() and connect().
2172 * Returns failure if can't allocate memory, or if there is a problem
2173 * with a routing header/option.
2174 *
2175 * We allocate space for the transport header (ulp_hdr_len + extra) and
2176 * indicate the offset of the ulp header by setting ixa_ip_hdr_length.
2177 * The extra is there for transports that want some spare room for future
2178 * options. conn_ht_iphc_allocated is what was allocated; conn_ht_iphc_len
2179 * excludes the extra part.
2180 *
2181 * We massage an routing option/header and store the ckecksum difference
2182 * in conn_sum.
2183 *
2184 * Caller needs to update conn_wroff if desired.
2185 */
2186 int
2187 conn_build_hdr_template(conn_t *connp, uint_t ulp_hdr_length, uint_t extra,
2188 const in6_addr_t *v6src, const in6_addr_t *v6dst, uint32_t flowinfo)
2189 {
2190 ip_xmit_attr_t *ixa = connp->conn_ixa;
2191 ip_pkt_t *ipp = &connp->conn_xmit_ipp;
2192 uint_t ip_hdr_length;
2193 uchar_t *hdrs;
2194 uint_t hdrs_len;
2195
2196 ASSERT(MUTEX_HELD(&connp->conn_lock));
2197
2198 if (ixa->ixa_flags & IXAF_IS_IPV4) {
2199 ip_hdr_length = ip_total_hdrs_len_v4(ipp);
2200 /* In case of TX label and IP options it can be too much */
2201 if (ip_hdr_length > IP_MAX_HDR_LENGTH) {
2202 /* Preserves existing TX errno for this */
2203 return (EHOSTUNREACH);
2204 }
2205 } else {
2206 ip_hdr_length = ip_total_hdrs_len_v6(ipp);
2207 }
2208 ixa->ixa_ip_hdr_length = ip_hdr_length;
2209 hdrs_len = ip_hdr_length + ulp_hdr_length + extra;
2210 ASSERT(hdrs_len != 0);
2211
2212 if (hdrs_len != connp->conn_ht_iphc_allocated) {
2213 /* Allocate new before we free any old */
2214 hdrs = kmem_alloc(hdrs_len, KM_NOSLEEP);
2215 if (hdrs == NULL)
2216 return (ENOMEM);
2217
2218 if (connp->conn_ht_iphc != NULL) {
2219 kmem_free(connp->conn_ht_iphc,
2220 connp->conn_ht_iphc_allocated);
2221 }
2222 connp->conn_ht_iphc = hdrs;
2223 connp->conn_ht_iphc_allocated = hdrs_len;
2224 } else {
2225 hdrs = connp->conn_ht_iphc;
2226 }
2227 hdrs_len -= extra;
2228 connp->conn_ht_iphc_len = hdrs_len;
2229
2230 connp->conn_ht_ulp = hdrs + ip_hdr_length;
2231 connp->conn_ht_ulp_len = ulp_hdr_length;
2232
2233 if (ixa->ixa_flags & IXAF_IS_IPV4) {
2234 ipha_t *ipha = (ipha_t *)hdrs;
2235
2236 IN6_V4MAPPED_TO_IPADDR(v6src, ipha->ipha_src);
2237 IN6_V4MAPPED_TO_IPADDR(v6dst, ipha->ipha_dst);
2238 ip_build_hdrs_v4(hdrs, ip_hdr_length, ipp, connp->conn_proto);
2239 ipha->ipha_length = htons(hdrs_len);
2240 if (ixa->ixa_flags & IXAF_PMTU_IPV4_DF)
2241 ipha->ipha_fragment_offset_and_flags |= IPH_DF_HTONS;
2242 else
2243 ipha->ipha_fragment_offset_and_flags &= ~IPH_DF_HTONS;
2244
2245 if (ipp->ipp_fields & IPPF_IPV4_OPTIONS) {
2246 connp->conn_sum = cksum_massage_options_v4(ipha,
2247 connp->conn_netstack);
2248 } else {
2249 connp->conn_sum = 0;
2250 }
2251 } else {
2252 ip6_t *ip6h = (ip6_t *)hdrs;
2253
2254 ip6h->ip6_src = *v6src;
2255 ip6h->ip6_dst = *v6dst;
2256 ip_build_hdrs_v6(hdrs, ip_hdr_length, ipp, connp->conn_proto,
2257 flowinfo);
2258 ip6h->ip6_plen = htons(hdrs_len - IPV6_HDR_LEN);
2259
2260 if (ipp->ipp_fields & IPPF_RTHDR) {
2261 connp->conn_sum = cksum_massage_options_v6(ip6h,
2262 ip_hdr_length, connp->conn_netstack);
2263
2264 /*
2265 * Verify that the first hop isn't a mapped address.
2266 * Routers along the path need to do this verification
2267 * for subsequent hops.
2268 */
2269 if (IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_dst))
2270 return (EADDRNOTAVAIL);
2271
2272 } else {
2273 connp->conn_sum = 0;
2274 }
2275 }
2276 return (0);
2277 }
2278
2279 /*
2280 * Prepend a header template to data_mp based on the ip_pkt_t
2281 * and the passed in source, destination and protocol.
2282 *
2283 * Returns failure if can't allocate memory, in which case data_mp is freed.
2284 * We allocate space for the transport header (ulp_hdr_len) and
2285 * indicate the offset of the ulp header by setting ixa_ip_hdr_length.
2286 *
2287 * We massage an routing option/header and return the ckecksum difference
2288 * in *sump. This is in host byte order.
2289 *
2290 * Caller needs to update conn_wroff if desired.
2291 */
2292 mblk_t *
2293 conn_prepend_hdr(ip_xmit_attr_t *ixa, const ip_pkt_t *ipp,
2294 const in6_addr_t *v6src, const in6_addr_t *v6dst,
2295 uint8_t protocol, uint32_t flowinfo, uint_t ulp_hdr_length, mblk_t *data_mp,
2296 uint_t data_length, uint_t wroff_extra, uint32_t *sump, int *errorp)
2297 {
2298 uint_t ip_hdr_length;
2299 uchar_t *hdrs;
2300 uint_t hdrs_len;
2301 mblk_t *mp;
2302
2303 if (ixa->ixa_flags & IXAF_IS_IPV4) {
2304 ip_hdr_length = ip_total_hdrs_len_v4(ipp);
2305 ASSERT(ip_hdr_length <= IP_MAX_HDR_LENGTH);
2306 } else {
2307 ip_hdr_length = ip_total_hdrs_len_v6(ipp);
2308 }
2309 hdrs_len = ip_hdr_length + ulp_hdr_length;
2310 ASSERT(hdrs_len != 0);
2311
2312 ixa->ixa_ip_hdr_length = ip_hdr_length;
2313
2314 /* Can we prepend to data_mp? */
2315 if (data_mp != NULL &&
2316 data_mp->b_rptr - data_mp->b_datap->db_base >= hdrs_len &&
2317 data_mp->b_datap->db_ref == 1) {
2318 hdrs = data_mp->b_rptr - hdrs_len;
2319 data_mp->b_rptr = hdrs;
2320 mp = data_mp;
2321 } else {
2322 mp = allocb(hdrs_len + wroff_extra, BPRI_MED);
2323 if (mp == NULL) {
2324 freemsg(data_mp);
2325 *errorp = ENOMEM;
2326 return (NULL);
2327 }
2328 mp->b_wptr = mp->b_datap->db_lim;
2329 hdrs = mp->b_rptr = mp->b_wptr - hdrs_len;
2330 mp->b_cont = data_mp;
2331 }
2332
2333 /*
2334 * Set the source in the header. ip_build_hdrs_v4/v6 will overwrite it
2335 * if PKTINFO (aka IPPF_ADDR) was set.
2336 */
2337 if (ixa->ixa_flags & IXAF_IS_IPV4) {
2338 ipha_t *ipha = (ipha_t *)hdrs;
2339
2340 ASSERT(IN6_IS_ADDR_V4MAPPED(v6dst));
2341 IN6_V4MAPPED_TO_IPADDR(v6src, ipha->ipha_src);
2342 IN6_V4MAPPED_TO_IPADDR(v6dst, ipha->ipha_dst);
2343 ip_build_hdrs_v4(hdrs, ip_hdr_length, ipp, protocol);
2344 ipha->ipha_length = htons(hdrs_len + data_length);
2345 if (ixa->ixa_flags & IXAF_PMTU_IPV4_DF)
2346 ipha->ipha_fragment_offset_and_flags |= IPH_DF_HTONS;
2347 else
2348 ipha->ipha_fragment_offset_and_flags &= ~IPH_DF_HTONS;
2349
2350 if (ipp->ipp_fields & IPPF_IPV4_OPTIONS) {
2351 *sump = cksum_massage_options_v4(ipha,
2352 ixa->ixa_ipst->ips_netstack);
2353 } else {
2354 *sump = 0;
2355 }
2356 } else {
2357 ip6_t *ip6h = (ip6_t *)hdrs;
2358
2359 ip6h->ip6_src = *v6src;
2360 ip6h->ip6_dst = *v6dst;
2361 ip_build_hdrs_v6(hdrs, ip_hdr_length, ipp, protocol, flowinfo);
2362 ip6h->ip6_plen = htons(hdrs_len + data_length - IPV6_HDR_LEN);
2363
2364 if (ipp->ipp_fields & IPPF_RTHDR) {
2365 *sump = cksum_massage_options_v6(ip6h,
2366 ip_hdr_length, ixa->ixa_ipst->ips_netstack);
2367
2368 /*
2369 * Verify that the first hop isn't a mapped address.
2370 * Routers along the path need to do this verification
2371 * for subsequent hops.
2372 */
2373 if (IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_dst)) {
2374 *errorp = EADDRNOTAVAIL;
2375 freemsg(mp);
2376 return (NULL);
2377 }
2378 } else {
2379 *sump = 0;
2380 }
2381 }
2382 return (mp);
2383 }
2384
2385 /*
2386 * Massage a source route if any putting the first hop
2387 * in ipha_dst. Compute a starting value for the checksum which
2388 * takes into account that the original ipha_dst should be
2389 * included in the checksum but that IP will include the
2390 * first hop from the source route in the tcp checksum.
2391 */
2392 static uint32_t
2393 cksum_massage_options_v4(ipha_t *ipha, netstack_t *ns)
2394 {
2395 in_addr_t dst;
2396 uint32_t cksum;
2397
2398 /* Get last hop then diff against first hop */
2399 cksum = ip_massage_options(ipha, ns);
2400 cksum = (cksum & 0xFFFF) + (cksum >> 16);
2401 dst = ipha->ipha_dst;
2402 cksum -= ((dst >> 16) + (dst & 0xffff));
2403 if ((int)cksum < 0)
2404 cksum--;
2405 cksum = (cksum & 0xFFFF) + (cksum >> 16);
2406 cksum = (cksum & 0xFFFF) + (cksum >> 16);
2407 ASSERT(cksum < 0x10000);
2408 return (ntohs(cksum));
2409 }
2410
2411 static uint32_t
2412 cksum_massage_options_v6(ip6_t *ip6h, uint_t ip_hdr_len, netstack_t *ns)
2413 {
2414 uint8_t *end;
2415 ip6_rthdr_t *rth;
2416 uint32_t cksum;
2417
2418 end = (uint8_t *)ip6h + ip_hdr_len;
2419 rth = ip_find_rthdr_v6(ip6h, end);
2420 if (rth == NULL)
2421 return (0);
2422
2423 cksum = ip_massage_options_v6(ip6h, rth, ns);
2424 cksum = (cksum & 0xFFFF) + (cksum >> 16);
2425 ASSERT(cksum < 0x10000);
2426 return (ntohs(cksum));
2427 }
2428
2429 /*
2430 * ULPs that change the destination address need to call this for each
2431 * change to discard any state about a previous destination that might
2432 * have been multicast or multirt.
2433 */
2434 void
2435 ip_attr_newdst(ip_xmit_attr_t *ixa)
2436 {
2437 ixa->ixa_flags &= ~(IXAF_LOOPBACK_COPY | IXAF_NO_HW_CKSUM |
2438 IXAF_NO_TTL_CHANGE | IXAF_IPV6_ADD_FRAGHDR |
2439 IXAF_NO_LOOP_ZONEID_SET);
2440 }
2441
2442 /*
2443 * Determine the nexthop which will be used.
2444 * Normally this is just the destination, but if a IPv4 source route, or
2445 * IPv6 routing header, is in the ip_pkt_t then we extract the nexthop from
2446 * there.
2447 */
2448 void
2449 ip_attr_nexthop(const ip_pkt_t *ipp, const ip_xmit_attr_t *ixa,
2450 const in6_addr_t *dst, in6_addr_t *nexthop)
2451 {
2452 if (!(ipp->ipp_fields & (IPPF_IPV4_OPTIONS|IPPF_RTHDR))) {
2453 *nexthop = *dst;
2454 return;
2455 }
2456 if (ixa->ixa_flags & IXAF_IS_IPV4) {
2457 ipaddr_t v4dst;
2458 ipaddr_t v4nexthop;
2459
2460 IN6_V4MAPPED_TO_IPADDR(dst, v4dst);
2461 v4nexthop = ip_pkt_source_route_v4(ipp);
2462 if (v4nexthop == INADDR_ANY)
2463 v4nexthop = v4dst;
2464
2465 IN6_IPADDR_TO_V4MAPPED(v4nexthop, nexthop);
2466 } else {
2467 const in6_addr_t *v6nexthop;
2468
2469 v6nexthop = ip_pkt_source_route_v6(ipp);
2470 if (v6nexthop == NULL)
2471 v6nexthop = dst;
2472
2473 *nexthop = *v6nexthop;
2474 }
2475 }
2476
2477 /*
2478 * Update the ip_xmit_attr_t based the addresses, conn_xmit_ipp and conn_ixa.
2479 * If IPDF_IPSEC is set we cache the IPsec policy to handle the unconnected
2480 * case (connected latching is done in conn_connect).
2481 * Note that IPsec policy lookup requires conn_proto and conn_laddr to be
2482 * set, but doesn't otherwise use the conn_t.
2483 *
2484 * Caller must set/clear IXAF_IS_IPV4 as appropriately.
2485 * Caller must use ip_attr_nexthop() to determine the nexthop argument.
2486 *
2487 * The caller must NOT hold conn_lock (to avoid problems with ill_refrele
2488 * causing the squeue to run doing ipcl_walk grabbing conn_lock.)
2489 *
2490 * Updates laddrp and uinfo if they are non-NULL.
2491 *
2492 * TSOL notes: The callers if ip_attr_connect must check if the destination
2493 * is different than before and in that case redo conn_update_label.
2494 * The callers of conn_connect do not need that since conn_connect
2495 * performs the conn_update_label.
2496 */
2497 int
2498 ip_attr_connect(const conn_t *connp, ip_xmit_attr_t *ixa,
2499 const in6_addr_t *v6src, const in6_addr_t *v6dst,
2500 const in6_addr_t *v6nexthop, in_port_t dstport, in6_addr_t *laddrp,
2501 iulp_t *uinfo, uint32_t flags)
2502 {
2503 in6_addr_t laddr = *v6src;
2504 int error;
2505
2506 ASSERT(MUTEX_NOT_HELD(&connp->conn_lock));
2507
2508 if (connp->conn_zone_is_global)
2509 flags |= IPDF_ZONE_IS_GLOBAL;
2510 else
2511 flags &= ~IPDF_ZONE_IS_GLOBAL;
2512
2513 /*
2514 * Lookup the route to determine a source address and the uinfo.
2515 * If the ULP has a source route option then the caller will
2516 * have set v6nexthop to be the first hop.
2517 */
2518 if (ixa->ixa_flags & IXAF_IS_IPV4) {
2519 ipaddr_t v4dst;
2520 ipaddr_t v4src, v4nexthop;
2521
2522 IN6_V4MAPPED_TO_IPADDR(v6dst, v4dst);
2523 IN6_V4MAPPED_TO_IPADDR(v6nexthop, v4nexthop);
2524 IN6_V4MAPPED_TO_IPADDR(v6src, v4src);
2525
2526 if (connp->conn_unspec_src || v4src != INADDR_ANY)
2527 flags &= ~IPDF_SELECT_SRC;
2528 else
2529 flags |= IPDF_SELECT_SRC;
2530
2531 error = ip_set_destination_v4(&v4src, v4dst, v4nexthop, ixa,
2532 uinfo, flags, connp->conn_mac_mode);
2533 IN6_IPADDR_TO_V4MAPPED(v4src, &laddr);
2534 } else {
2535 if (connp->conn_unspec_src || !IN6_IS_ADDR_UNSPECIFIED(v6src))
2536 flags &= ~IPDF_SELECT_SRC;
2537 else
2538 flags |= IPDF_SELECT_SRC;
2539
2540 error = ip_set_destination_v6(&laddr, v6dst, v6nexthop, ixa,
2541 uinfo, flags, connp->conn_mac_mode);
2542 }
2543 /* Pass out some address even if we hit a RTF_REJECT etc */
2544 if (laddrp != NULL)
2545 *laddrp = laddr;
2546
2547 if (error != 0)
2548 return (error);
2549
2550 if (flags & IPDF_IPSEC) {
2551 /*
2552 * Set any IPsec policy in ixa. Routine also looks at ULP
2553 * ports.
2554 */
2555 ipsec_cache_outbound_policy(connp, v6src, v6dst, dstport, ixa);
2556 }
2557 return (0);
2558 }
2559
2560 /*
2561 * Connect the conn based on the addresses, conn_xmit_ipp and conn_ixa.
2562 * Assumes that conn_faddr and conn_fport are already set. As such it is not
2563 * usable for SCTP, since SCTP has multiple faddrs.
2564 *
2565 * Caller must hold conn_lock to provide atomic constency between the
2566 * conn_t's addresses and the ixa.
2567 * NOTE: this function drops and reaquires conn_lock since it can't be
2568 * held across ip_attr_connect/ip_set_destination.
2569 *
2570 * The caller needs to handle inserting in the receive-side fanout when
2571 * appropriate after conn_connect returns.
2572 */
2573 int
2574 conn_connect(conn_t *connp, iulp_t *uinfo, uint32_t flags)
2575 {
2576 ip_xmit_attr_t *ixa = connp->conn_ixa;
2577 in6_addr_t nexthop;
2578 in6_addr_t saddr, faddr;
2579 in_port_t fport;
2580 int error;
2581
2582 ASSERT(MUTEX_HELD(&connp->conn_lock));
2583
2584 if (connp->conn_ipversion == IPV4_VERSION)
2585 ixa->ixa_flags |= IXAF_IS_IPV4;
2586 else
2587 ixa->ixa_flags &= ~IXAF_IS_IPV4;
2588
2589 /* We do IPsec latching below - hence no caching in ip_attr_connect */
2590 flags &= ~IPDF_IPSEC;
2591
2592 /* In case we had previously done an ip_attr_connect */
2593 ip_attr_newdst(ixa);
2594
2595 /*
2596 * Determine the nexthop and copy the addresses before dropping
2597 * conn_lock.
2598 */
2599 ip_attr_nexthop(&connp->conn_xmit_ipp, connp->conn_ixa,
2600 &connp->conn_faddr_v6, &nexthop);
2601 saddr = connp->conn_saddr_v6;
2602 faddr = connp->conn_faddr_v6;
2603 fport = connp->conn_fport;
2604
2605 mutex_exit(&connp->conn_lock);
2606 error = ip_attr_connect(connp, ixa, &saddr, &faddr, &nexthop, fport,
2607 &saddr, uinfo, flags | IPDF_VERIFY_DST);
2608 mutex_enter(&connp->conn_lock);
2609
2610 /* Could have changed even if an error */
2611 connp->conn_saddr_v6 = saddr;
2612 if (error != 0)
2613 return (error);
2614
2615 /*
2616 * Check whether Trusted Solaris policy allows communication with this
2617 * host, and pretend that the destination is unreachable if not.
2618 * Compute any needed label and place it in ipp_label_v4/v6.
2619 *
2620 * Later conn_build_hdr_template() takes ipp_label_v4/v6 to form
2621 * the packet.
2622 *
2623 * TSOL Note: Any concurrent threads would pick a different ixa
2624 * (and ipp if they are to change the ipp) so we
2625 * don't have to worry about concurrent threads.
2626 */
2627 if (is_system_labeled()) {
2628 if (connp->conn_mlp_type != mlptSingle)
2629 return (ECONNREFUSED);
2630
2631 /*
2632 * conn_update_label will set ipp_label* which will later
2633 * be used by conn_build_hdr_template.
2634 */
2635 error = conn_update_label(connp, ixa,
2636 &connp->conn_faddr_v6, &connp->conn_xmit_ipp);
2637 if (error != 0)
2638 return (error);
2639 }
2640
2641 /*
2642 * Ensure that we match on the selected local address.
2643 * This overrides conn_laddr in the case we had earlier bound to a
2644 * multicast or broadcast address.
2645 */
2646 connp->conn_laddr_v6 = connp->conn_saddr_v6;
2647
2648 /*
2649 * Allow setting new policies.
2650 * The addresses/ports are already set, thus the IPsec policy calls
2651 * can handle their passed-in conn's.
2652 */
2653 connp->conn_policy_cached = B_FALSE;
2654
2655 /*
2656 * Cache IPsec policy in this conn. If we have per-socket policy,
2657 * we'll cache that. If we don't, we'll inherit global policy.
2658 *
2659 * This is done before the caller inserts in the receive-side fanout.
2660 * Note that conn_policy_cached is set by ipsec_conn_cache_policy() even
2661 * for connections where we don't have a policy. This is to prevent
2662 * global policy lookups in the inbound path.
2663 *
2664 * If we insert before we set conn_policy_cached,
2665 * CONN_INBOUND_POLICY_PRESENT() check can still evaluate true
2666 * because global policy cound be non-empty. We normally call
2667 * ipsec_check_policy() for conn_policy_cached connections only if
2668 * conn_in_enforce_policy is set. But in this case,
2669 * conn_policy_cached can get set anytime since we made the
2670 * CONN_INBOUND_POLICY_PRESENT() check and ipsec_check_policy() is
2671 * called, which will make the above assumption false. Thus, we
2672 * need to insert after we set conn_policy_cached.
2673 */
2674 error = ipsec_conn_cache_policy(connp,
2675 connp->conn_ipversion == IPV4_VERSION);
2676 if (error != 0)
2677 return (error);
2678
2679 /*
2680 * We defer to do LSO check until here since now we have better idea
2681 * whether IPsec is present. If the underlying ill is LSO capable,
2682 * copy its capability in so the ULP can decide whether to enable LSO
2683 * on this connection. So far, only TCP/IPv4 is implemented, so won't
2684 * claim LSO for IPv6.
2685 *
2686 * Currently, won't enable LSO for IRE_LOOPBACK or IRE_LOCAL, because
2687 * the receiver can not handle it. Also not to enable LSO for MULTIRT.
2688 */
2689 ixa->ixa_flags &= ~IXAF_LSO_CAPAB;
2690
2691 ASSERT(ixa->ixa_ire != NULL);
2692 if (ixa->ixa_ipst->ips_ip_lso_outbound && (flags & IPDF_LSO) &&
2693 !(ixa->ixa_flags & IXAF_IPSEC_SECURE) &&
2694 !(ixa->ixa_ire->ire_type & (IRE_LOCAL | IRE_LOOPBACK)) &&
2695 !(ixa->ixa_ire->ire_flags & RTF_MULTIRT) &&
2696 (ixa->ixa_nce != NULL) &&
2697 ((ixa->ixa_flags & IXAF_IS_IPV4) ?
2698 ILL_LSO_TCP_IPV4_USABLE(ixa->ixa_nce->nce_ill) :
2699 ILL_LSO_TCP_IPV6_USABLE(ixa->ixa_nce->nce_ill))) {
2700 ixa->ixa_lso_capab = *ixa->ixa_nce->nce_ill->ill_lso_capab;
2701 ixa->ixa_flags |= IXAF_LSO_CAPAB;
2702 }
2703
2704 /* Check whether ZEROCOPY capability is usable for this connection. */
2705 ixa->ixa_flags &= ~IXAF_ZCOPY_CAPAB;
2706
2707 if ((flags & IPDF_ZCOPY) &&
2708 !(ixa->ixa_flags & IXAF_IPSEC_SECURE) &&
2709 !(ixa->ixa_ire->ire_type & (IRE_LOCAL | IRE_LOOPBACK)) &&
2710 !(ixa->ixa_ire->ire_flags & RTF_MULTIRT) &&
2711 (ixa->ixa_nce != NULL) &&
2712 ILL_ZCOPY_USABLE(ixa->ixa_nce->nce_ill)) {
2713 ixa->ixa_flags |= IXAF_ZCOPY_CAPAB;
2714 }
2715 return (0);
2716 }
2717
2718 /*
2719 * Predicates to check if the addresses match conn_last*
2720 */
2721
2722 /*
2723 * Compare the conn against an address.
2724 * If using mapped addresses on AF_INET6 sockets, use the _v6 function
2725 */
2726 boolean_t
2727 conn_same_as_last_v4(conn_t *connp, sin_t *sin)
2728 {
2729 ASSERT(connp->conn_family == AF_INET);
2730 return (sin->sin_addr.s_addr == connp->conn_v4lastdst &&
2731 sin->sin_port == connp->conn_lastdstport);
2732 }
2733
2734 /*
2735 * Compare, including for mapped addresses
2736 */
2737 boolean_t
2738 conn_same_as_last_v6(conn_t *connp, sin6_t *sin6)
2739 {
2740 return (IN6_ARE_ADDR_EQUAL(&connp->conn_v6lastdst, &sin6->sin6_addr) &&
2741 sin6->sin6_port == connp->conn_lastdstport &&
2742 sin6->sin6_flowinfo == connp->conn_lastflowinfo &&
2743 sin6->sin6_scope_id == connp->conn_lastscopeid);
2744 }
2745
2746 /*
2747 * Compute a label and place it in the ip_packet_t.
2748 * Handles IPv4 and IPv6.
2749 * The caller should have a correct ixa_tsl and ixa_zoneid and have
2750 * already called conn_connect or ip_attr_connect to ensure that tsol_check_dest
2751 * has been called.
2752 */
2753 int
2754 conn_update_label(const conn_t *connp, const ip_xmit_attr_t *ixa,
2755 const in6_addr_t *v6dst, ip_pkt_t *ipp)
2756 {
2757 int err;
2758 ipaddr_t v4dst;
2759
2760 if (IN6_IS_ADDR_V4MAPPED(v6dst)) {
2761 uchar_t opt_storage[IP_MAX_OPT_LENGTH];
2762
2763 IN6_V4MAPPED_TO_IPADDR(v6dst, v4dst);
2764
2765 err = tsol_compute_label_v4(ixa->ixa_tsl, ixa->ixa_zoneid,
2766 v4dst, opt_storage, ixa->ixa_ipst);
2767 if (err == 0) {
2768 /* Length contained in opt_storage[IPOPT_OLEN] */
2769 err = optcom_pkt_set(opt_storage,
2770 opt_storage[IPOPT_OLEN],
2771 (uchar_t **)&ipp->ipp_label_v4,
2772 &ipp->ipp_label_len_v4);
2773 }
2774 if (err != 0) {
2775 DTRACE_PROBE4(tx__ip__log__info__updatelabel,
2776 char *, "conn(1) failed to update options(2) "
2777 "on ixa(3)",
2778 conn_t *, connp, char *, opt_storage,
2779 ip_xmit_attr_t *, ixa);
2780 }
2781 if (ipp->ipp_label_len_v4 != 0)
2782 ipp->ipp_fields |= IPPF_LABEL_V4;
2783 else
2784 ipp->ipp_fields &= ~IPPF_LABEL_V4;
2785 } else {
2786 uchar_t opt_storage[TSOL_MAX_IPV6_OPTION];
2787 uint_t optlen;
2788
2789 err = tsol_compute_label_v6(ixa->ixa_tsl, ixa->ixa_zoneid,
2790 v6dst, opt_storage, ixa->ixa_ipst);
2791 if (err == 0) {
2792 /*
2793 * Note that ipp_label_v6 is just the option - not
2794 * the hopopts extension header.
2795 *
2796 * Length contained in opt_storage[IPOPT_OLEN], but
2797 * that doesn't include the two byte options header.
2798 */
2799 optlen = opt_storage[IPOPT_OLEN];
2800 if (optlen != 0)
2801 optlen += 2;
2802
2803 err = optcom_pkt_set(opt_storage, optlen,
2804 (uchar_t **)&ipp->ipp_label_v6,
2805 &ipp->ipp_label_len_v6);
2806 }
2807 if (err != 0) {
2808 DTRACE_PROBE4(tx__ip__log__info__updatelabel,
2809 char *, "conn(1) failed to update options(2) "
2810 "on ixa(3)",
2811 conn_t *, connp, char *, opt_storage,
2812 ip_xmit_attr_t *, ixa);
2813 }
2814 if (ipp->ipp_label_len_v6 != 0)
2815 ipp->ipp_fields |= IPPF_LABEL_V6;
2816 else
2817 ipp->ipp_fields &= ~IPPF_LABEL_V6;
2818 }
2819 return (err);
2820 }
2821
2822 /*
2823 * Inherit all options settings from the parent/listener to the eager.
2824 * Returns zero on success; ENOMEM if memory allocation failed.
2825 *
2826 * We assume that the eager has not had any work done i.e., the conn_ixa
2827 * and conn_xmit_ipp are all zero.
2828 * Furthermore we assume that no other thread can access the eager (because
2829 * it isn't inserted in any fanout list).
2830 */
2831 int
2832 conn_inherit_parent(conn_t *lconnp, conn_t *econnp)
2833 {
2834 cred_t *credp;
2835 int err;
2836 void *notify_cookie;
2837 uint32_t xmit_hint;
2838
2839 econnp->conn_family = lconnp->conn_family;
2840 econnp->conn_ipv6_v6only = lconnp->conn_ipv6_v6only;
2841 econnp->conn_wq = lconnp->conn_wq;
2842 econnp->conn_rq = lconnp->conn_rq;
2843
2844 /*
2845 * Make a safe copy of the transmit attributes.
2846 * conn_connect will later be used by the caller to setup the ire etc.
2847 */
2848 ASSERT(econnp->conn_ixa->ixa_refcnt == 1);
2849 ASSERT(econnp->conn_ixa->ixa_ire == NULL);
2850 ASSERT(econnp->conn_ixa->ixa_dce == NULL);
2851 ASSERT(econnp->conn_ixa->ixa_nce == NULL);
2852
2853 /* Preserve ixa_notify_cookie and xmit_hint */
2854 notify_cookie = econnp->conn_ixa->ixa_notify_cookie;
2855 xmit_hint = econnp->conn_ixa->ixa_xmit_hint;
2856 ixa_safe_copy(lconnp->conn_ixa, econnp->conn_ixa);
2857 econnp->conn_ixa->ixa_notify_cookie = notify_cookie;
2858 econnp->conn_ixa->ixa_xmit_hint = xmit_hint;
2859
2860 econnp->conn_bound_if = lconnp->conn_bound_if;
2861 econnp->conn_incoming_ifindex = lconnp->conn_incoming_ifindex;
2862
2863 /* Inherit all RECV options */
2864 econnp->conn_recv_ancillary = lconnp->conn_recv_ancillary;
2865
2866 err = ip_pkt_copy(&lconnp->conn_xmit_ipp, &econnp->conn_xmit_ipp,
2867 KM_NOSLEEP);
2868 if (err != 0)
2869 return (err);
2870
2871 econnp->conn_zoneid = lconnp->conn_zoneid;
2872 econnp->conn_allzones = lconnp->conn_allzones;
2873
2874 /* This is odd. Pick a flowlabel for each connection instead? */
2875 econnp->conn_flowinfo = lconnp->conn_flowinfo;
2876
2877 econnp->conn_default_ttl = lconnp->conn_default_ttl;
2878
2879 /*
2880 * TSOL: tsol_input_proc() needs the eager's cred before the
2881 * eager is accepted
2882 */
2883 ASSERT(lconnp->conn_cred != NULL);
2884 econnp->conn_cred = credp = lconnp->conn_cred;
2885 crhold(credp);
2886 econnp->conn_cpid = lconnp->conn_cpid;
2887 econnp->conn_open_time = ddi_get_lbolt64();
2888
2889 /*
2890 * Cache things in the ixa without any refhold.
2891 * Listener might not have set up ixa_cred
2892 */
2893 ASSERT(!(econnp->conn_ixa->ixa_free_flags & IXA_FREE_CRED));
2894 econnp->conn_ixa->ixa_cred = econnp->conn_cred;
2895 econnp->conn_ixa->ixa_cpid = econnp->conn_cpid;
2896 if (is_system_labeled())
2897 econnp->conn_ixa->ixa_tsl = crgetlabel(econnp->conn_cred);
2898
2899 /*
2900 * If the caller has the process-wide flag set, then default to MAC
2901 * exempt mode. This allows read-down to unlabeled hosts.
2902 */
2903 if (getpflags(NET_MAC_AWARE, credp) != 0)
2904 econnp->conn_mac_mode = CONN_MAC_AWARE;
2905
2906 econnp->conn_zone_is_global = lconnp->conn_zone_is_global;
2907
2908 /*
2909 * We eliminate the need for sockfs to send down a T_SVR4_OPTMGMT_REQ
2910 * via soaccept()->soinheritoptions() which essentially applies
2911 * all the listener options to the new connection. The options that we
2912 * need to take care of are:
2913 * SO_DEBUG, SO_REUSEADDR, SO_KEEPALIVE, SO_DONTROUTE, SO_BROADCAST,
2914 * SO_USELOOPBACK, SO_OOBINLINE, SO_DGRAM_ERRIND, SO_LINGER,
2915 * SO_SNDBUF, SO_RCVBUF.
2916 *
2917 * SO_RCVBUF: conn_rcvbuf is set.
2918 * SO_SNDBUF: conn_sndbuf is set.
2919 */
2920
2921 /* Could we define a struct and use a struct copy for this? */
2922 econnp->conn_sndbuf = lconnp->conn_sndbuf;
2923 econnp->conn_rcvbuf = lconnp->conn_rcvbuf;
2924 econnp->conn_sndlowat = lconnp->conn_sndlowat;
2925 econnp->conn_rcvlowat = lconnp->conn_rcvlowat;
2926 econnp->conn_dgram_errind = lconnp->conn_dgram_errind;
2927 econnp->conn_oobinline = lconnp->conn_oobinline;
2928 econnp->conn_debug = lconnp->conn_debug;
2929 econnp->conn_keepalive = lconnp->conn_keepalive;
2930 econnp->conn_linger = lconnp->conn_linger;
2931 econnp->conn_lingertime = lconnp->conn_lingertime;
2932
2933 /* Set the IP options */
2934 econnp->conn_broadcast = lconnp->conn_broadcast;
2935 econnp->conn_useloopback = lconnp->conn_useloopback;
2936 econnp->conn_reuseaddr = lconnp->conn_reuseaddr;
2937 return (0);
2938 }