Print this page
3942 inject sanity into ipadm tcp buffer size properties
3943 _snd_lowat_fraction tcp tunable has no effect
Reviewed by: Adam Leventhal <ahl@delphix.com>
Reviewed by: Peng Dai <peng.dai@delphix.com>
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/uts/common/inet/ip/icmp.c
+++ new/usr/src/uts/common/inet/ip/icmp.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
↓ open down ↓ |
12 lines elided |
↑ open up ↑ |
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21 /*
22 22 * Copyright (c) 1991, 2010, Oracle and/or its affiliates. All rights reserved.
23 + * Copyright (c) 2013 by Delphix. All rights reserved.
23 24 */
24 25 /* Copyright (c) 1990 Mentat Inc. */
25 26
26 27 #include <sys/types.h>
27 28 #include <sys/stream.h>
28 29 #include <sys/stropts.h>
29 30 #include <sys/strlog.h>
30 31 #include <sys/strsun.h>
31 32 #define _SUN_TPI_VERSION 2
32 33 #include <sys/tihdr.h>
33 34 #include <sys/timod.h>
34 35 #include <sys/ddi.h>
35 36 #include <sys/sunddi.h>
36 37 #include <sys/strsubr.h>
37 38 #include <sys/suntpi.h>
38 39 #include <sys/xti_inet.h>
39 40 #include <sys/cmn_err.h>
40 41 #include <sys/kmem.h>
41 42 #include <sys/cred.h>
42 43 #include <sys/policy.h>
43 44 #include <sys/priv.h>
44 45 #include <sys/ucred.h>
45 46 #include <sys/zone.h>
46 47
47 48 #include <sys/sockio.h>
48 49 #include <sys/socket.h>
49 50 #include <sys/socketvar.h>
50 51 #include <sys/vtrace.h>
51 52 #include <sys/sdt.h>
52 53 #include <sys/debug.h>
53 54 #include <sys/isa_defs.h>
54 55 #include <sys/random.h>
55 56 #include <netinet/in.h>
56 57 #include <netinet/ip6.h>
57 58 #include <netinet/icmp6.h>
58 59 #include <netinet/udp.h>
59 60
60 61 #include <inet/common.h>
61 62 #include <inet/ip.h>
62 63 #include <inet/ip_impl.h>
63 64 #include <inet/ipsec_impl.h>
64 65 #include <inet/ip6.h>
65 66 #include <inet/ip_ire.h>
66 67 #include <inet/ip_if.h>
67 68 #include <inet/ip_multi.h>
68 69 #include <inet/ip_ndp.h>
69 70 #include <inet/proto_set.h>
70 71 #include <inet/mib2.h>
71 72 #include <inet/nd.h>
72 73 #include <inet/optcom.h>
73 74 #include <inet/snmpcom.h>
74 75 #include <inet/kstatcom.h>
75 76 #include <inet/ipclassifier.h>
76 77
77 78 #include <sys/tsol/label.h>
78 79 #include <sys/tsol/tnet.h>
79 80
80 81 #include <inet/rawip_impl.h>
81 82
82 83 #include <sys/disp.h>
83 84
84 85 /*
↓ open down ↓ |
52 lines elided |
↑ open up ↑ |
85 86 * Synchronization notes:
86 87 *
87 88 * RAWIP is MT and uses the usual kernel synchronization primitives. We use
88 89 * conn_lock to protect the icmp_t.
89 90 *
90 91 * Plumbing notes:
91 92 * ICMP is always a device driver. For compatibility with mibopen() code
92 93 * it is possible to I_PUSH "icmp", but that results in pushing a passthrough
93 94 * dummy module.
94 95 */
95 -
96 96 static void icmp_addr_req(queue_t *q, mblk_t *mp);
97 97 static void icmp_tpi_bind(queue_t *q, mblk_t *mp);
98 98 static void icmp_bind_proto(icmp_t *icmp);
99 99 static int icmp_build_hdr_template(conn_t *, const in6_addr_t *,
100 100 const in6_addr_t *, uint32_t);
101 101 static void icmp_capability_req(queue_t *q, mblk_t *mp);
102 102 static int icmp_close(queue_t *q, int flags);
103 103 static void icmp_close_free(conn_t *);
104 104 static void icmp_tpi_connect(queue_t *q, mblk_t *mp);
105 105 static void icmp_tpi_disconnect(queue_t *q, mblk_t *mp);
106 106 static void icmp_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error,
107 107 int sys_error);
108 108 static void icmp_err_ack_prim(queue_t *q, mblk_t *mp, t_scalar_t primitive,
109 109 t_scalar_t tlierr, int sys_error);
110 110 static void icmp_icmp_input(void *arg1, mblk_t *mp, void *arg2,
111 111 ip_recv_attr_t *);
112 112 static void icmp_icmp_error_ipv6(conn_t *connp, mblk_t *mp,
113 113 ip_recv_attr_t *);
114 114 static void icmp_info_req(queue_t *q, mblk_t *mp);
115 115 static void icmp_input(void *, mblk_t *, void *, ip_recv_attr_t *);
116 116 static conn_t *icmp_open(int family, cred_t *credp, int *err, int flags);
117 117 static int icmp_openv4(queue_t *q, dev_t *devp, int flag, int sflag,
118 118 cred_t *credp);
119 119 static int icmp_openv6(queue_t *q, dev_t *devp, int flag, int sflag,
120 120 cred_t *credp);
121 121 static boolean_t icmp_opt_allow_udr_set(t_scalar_t level, t_scalar_t name);
122 122 int icmp_opt_set(conn_t *connp, uint_t optset_context,
123 123 int level, int name, uint_t inlen,
124 124 uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp,
125 125 void *thisdg_attrs, cred_t *cr);
126 126 int icmp_opt_get(conn_t *connp, int level, int name,
127 127 uchar_t *ptr);
128 128 static int icmp_output_newdst(conn_t *connp, mblk_t *data_mp, sin_t *sin,
129 129 sin6_t *sin6, cred_t *cr, pid_t pid, ip_xmit_attr_t *ixa);
130 130 static mblk_t *icmp_prepend_hdr(conn_t *, ip_xmit_attr_t *, const ip_pkt_t *,
131 131 const in6_addr_t *, const in6_addr_t *, uint32_t, mblk_t *, int *);
132 132 static mblk_t *icmp_prepend_header_template(conn_t *, ip_xmit_attr_t *,
133 133 mblk_t *, const in6_addr_t *, uint32_t, int *);
134 134 static int icmp_snmp_set(queue_t *q, t_scalar_t level, t_scalar_t name,
135 135 uchar_t *ptr, int len);
136 136 static void icmp_ud_err(queue_t *q, mblk_t *mp, t_scalar_t err);
137 137 static void icmp_tpi_unbind(queue_t *q, mblk_t *mp);
138 138 static void icmp_wput(queue_t *q, mblk_t *mp);
139 139 static void icmp_wput_fallback(queue_t *q, mblk_t *mp);
140 140 static void icmp_wput_other(queue_t *q, mblk_t *mp);
141 141 static void icmp_wput_iocdata(queue_t *q, mblk_t *mp);
142 142 static void icmp_wput_restricted(queue_t *q, mblk_t *mp);
143 143 static void icmp_ulp_recv(conn_t *, mblk_t *, uint_t);
144 144
145 145 static void *rawip_stack_init(netstackid_t stackid, netstack_t *ns);
146 146 static void rawip_stack_fini(netstackid_t stackid, void *arg);
147 147
148 148 static void *rawip_kstat_init(netstackid_t stackid);
149 149 static void rawip_kstat_fini(netstackid_t stackid, kstat_t *ksp);
150 150 static int rawip_kstat_update(kstat_t *kp, int rw);
151 151 static void rawip_stack_shutdown(netstackid_t stackid, void *arg);
152 152
153 153 /* Common routines for TPI and socket module */
154 154 static conn_t *rawip_do_open(int, cred_t *, int *, int);
155 155 static void rawip_do_close(conn_t *);
156 156 static int rawip_do_bind(conn_t *, struct sockaddr *, socklen_t);
157 157 static int rawip_do_unbind(conn_t *);
158 158 static int rawip_do_connect(conn_t *, const struct sockaddr *, socklen_t,
159 159 cred_t *, pid_t);
160 160
161 161 int rawip_getsockname(sock_lower_handle_t, struct sockaddr *,
162 162 socklen_t *, cred_t *);
163 163 int rawip_getpeername(sock_lower_handle_t, struct sockaddr *,
164 164 socklen_t *, cred_t *);
165 165
166 166 static struct module_info icmp_mod_info = {
167 167 5707, "icmp", 1, INFPSZ, 512, 128
168 168 };
169 169
170 170 /*
171 171 * Entry points for ICMP as a device.
172 172 * We have separate open functions for the /dev/icmp and /dev/icmp6 devices.
173 173 */
174 174 static struct qinit icmprinitv4 = {
175 175 NULL, NULL, icmp_openv4, icmp_close, NULL, &icmp_mod_info
176 176 };
177 177
178 178 static struct qinit icmprinitv6 = {
179 179 NULL, NULL, icmp_openv6, icmp_close, NULL, &icmp_mod_info
180 180 };
181 181
182 182 static struct qinit icmpwinit = {
183 183 (pfi_t)icmp_wput, (pfi_t)ip_wsrv, NULL, NULL, NULL, &icmp_mod_info
184 184 };
185 185
186 186 /* ICMP entry point during fallback */
187 187 static struct qinit icmp_fallback_sock_winit = {
188 188 (pfi_t)icmp_wput_fallback, NULL, NULL, NULL, NULL, &icmp_mod_info
189 189 };
190 190
191 191 /* For AF_INET aka /dev/icmp */
192 192 struct streamtab icmpinfov4 = {
193 193 &icmprinitv4, &icmpwinit
194 194 };
195 195
196 196 /* For AF_INET6 aka /dev/icmp6 */
197 197 struct streamtab icmpinfov6 = {
198 198 &icmprinitv6, &icmpwinit
199 199 };
200 200
201 201 /* Default structure copied into T_INFO_ACK messages */
202 202 static struct T_info_ack icmp_g_t_info_ack = {
203 203 T_INFO_ACK,
204 204 IP_MAXPACKET, /* TSDU_size. icmp allows maximum size messages. */
205 205 T_INVALID, /* ETSDU_size. icmp does not support expedited data. */
↓ open down ↓ |
100 lines elided |
↑ open up ↑ |
206 206 T_INVALID, /* CDATA_size. icmp does not support connect data. */
207 207 T_INVALID, /* DDATA_size. icmp does not support disconnect data. */
208 208 0, /* ADDR_size - filled in later. */
209 209 0, /* OPT_size - not initialized here */
210 210 IP_MAXPACKET, /* TIDU_size. icmp allows maximum size messages. */
211 211 T_CLTS, /* SERV_type. icmp supports connection-less. */
212 212 TS_UNBND, /* CURRENT_state. This is set from icmp_state. */
213 213 (XPG4_1|SENDZERO) /* PROVIDER_flag */
214 214 };
215 215
216 +static int
217 +icmp_set_buf_prop(netstack_t *stack, cred_t *cr, mod_prop_info_t *pinfo,
218 + const char *ifname, const void *pval, uint_t flags)
219 +{
220 + return (mod_set_buf_prop(stack->netstack_icmp->is_propinfo_tbl,
221 + stack, cr, pinfo, ifname, pval, flags));
222 +}
223 +
224 +static int
225 +icmp_get_buf_prop(netstack_t *stack, mod_prop_info_t *pinfo, const char *ifname,
226 + void *val, uint_t psize, uint_t flags)
227 +{
228 + return (mod_get_buf_prop(stack->netstack_icmp->is_propinfo_tbl, stack,
229 + pinfo, ifname, val, psize, flags));
230 +}
231 +
216 232 /*
217 233 * All of these are alterable, within the min/max values given, at run time.
218 234 *
219 235 * Note: All those tunables which do not start with "icmp_" are Committed and
220 236 * therefore are public. See PSARC 2010/080.
221 237 */
222 238 static mod_prop_info_t icmp_propinfo_tbl[] = {
223 239 /* tunable - 0 */
224 240 { "_wroff_extra", MOD_PROTO_RAWIP,
225 241 mod_set_uint32, mod_get_uint32,
226 242 {0, 128, 32}, {32} },
227 243
228 244 { "_ipv4_ttl", MOD_PROTO_RAWIP,
229 245 mod_set_uint32, mod_get_uint32,
230 246 {1, 255, 255}, {255} },
↓ open down ↓ |
5 lines elided |
↑ open up ↑ |
231 247
232 248 { "_ipv6_hoplimit", MOD_PROTO_RAWIP,
233 249 mod_set_uint32, mod_get_uint32,
234 250 {0, IPV6_MAX_HOPS, IPV6_DEFAULT_HOPS},
235 251 {IPV6_DEFAULT_HOPS} },
236 252
237 253 { "_bsd_compat", MOD_PROTO_RAWIP,
238 254 mod_set_boolean, mod_get_boolean,
239 255 {B_TRUE}, {B_TRUE} },
240 256
241 - { "send_maxbuf", MOD_PROTO_RAWIP,
242 - mod_set_uint32, mod_get_uint32,
257 + { "send_buf", MOD_PROTO_RAWIP,
258 + icmp_set_buf_prop, icmp_get_buf_prop,
243 259 {4096, 65536, 8192}, {8192} },
244 260
245 261 { "_xmit_lowat", MOD_PROTO_RAWIP,
246 262 mod_set_uint32, mod_get_uint32,
247 263 {0, 65536, 1024}, {1024} },
248 264
249 - { "recv_maxbuf", MOD_PROTO_RAWIP,
250 - mod_set_uint32, mod_get_uint32,
265 + { "recv_buf", MOD_PROTO_RAWIP,
266 + icmp_set_buf_prop, icmp_get_buf_prop,
251 267 {4096, 65536, 8192}, {8192} },
252 268
253 - { "_max_buf", MOD_PROTO_RAWIP,
269 + { "max_buf", MOD_PROTO_RAWIP,
254 270 mod_set_uint32, mod_get_uint32,
255 - {65536, 1024*1024*1024, 256*1024}, {256 * 1024} },
271 + {65536, ULP_MAX_BUF, 256*1024}, {256*1024} },
256 272
257 273 { "_pmtu_discovery", MOD_PROTO_RAWIP,
258 274 mod_set_boolean, mod_get_boolean,
259 275 {B_FALSE}, {B_FALSE} },
260 276
261 277 { "_sendto_ignerr", MOD_PROTO_RAWIP,
262 278 mod_set_boolean, mod_get_boolean,
263 279 {B_FALSE}, {B_FALSE} },
264 280
265 281 { "?", MOD_PROTO_RAWIP, NULL, mod_get_allprop, {0}, {0} },
266 282
267 283 { NULL, 0, NULL, NULL, {0}, {0} }
268 284 };
269 285
270 286 #define is_wroff_extra is_propinfo_tbl[0].prop_cur_uval
271 287 #define is_ipv4_ttl is_propinfo_tbl[1].prop_cur_uval
272 288 #define is_ipv6_hoplimit is_propinfo_tbl[2].prop_cur_uval
273 289 #define is_bsd_compat is_propinfo_tbl[3].prop_cur_bval
274 290 #define is_xmit_hiwat is_propinfo_tbl[4].prop_cur_uval
275 291 #define is_xmit_lowat is_propinfo_tbl[5].prop_cur_uval
276 292 #define is_recv_hiwat is_propinfo_tbl[6].prop_cur_uval
277 293 #define is_max_buf is_propinfo_tbl[7].prop_cur_uval
278 294 #define is_pmtu_discovery is_propinfo_tbl[8].prop_cur_bval
279 295 #define is_sendto_ignerr is_propinfo_tbl[9].prop_cur_bval
280 296
281 297 typedef union T_primitives *t_primp_t;
282 298
283 299 /*
284 300 * This routine is called to handle each O_T_BIND_REQ/T_BIND_REQ message
285 301 * passed to icmp_wput.
286 302 * It calls IP to verify the local IP address, and calls IP to insert
287 303 * the conn_t in the fanout table.
288 304 * If everything is ok it then sends the T_BIND_ACK back up.
289 305 */
290 306 static void
291 307 icmp_tpi_bind(queue_t *q, mblk_t *mp)
292 308 {
293 309 int error;
294 310 struct sockaddr *sa;
295 311 struct T_bind_req *tbr;
296 312 socklen_t len;
297 313 sin_t *sin;
298 314 sin6_t *sin6;
299 315 icmp_t *icmp;
300 316 conn_t *connp = Q_TO_CONN(q);
301 317 mblk_t *mp1;
302 318 cred_t *cr;
303 319
304 320 /*
305 321 * All Solaris components should pass a db_credp
306 322 * for this TPI message, hence we ASSERT.
307 323 * But in case there is some other M_PROTO that looks
308 324 * like a TPI message sent by some other kernel
309 325 * component, we check and return an error.
310 326 */
311 327 cr = msg_getcred(mp, NULL);
312 328 ASSERT(cr != NULL);
313 329 if (cr == NULL) {
314 330 icmp_err_ack(q, mp, TSYSERR, EINVAL);
315 331 return;
316 332 }
317 333
318 334 icmp = connp->conn_icmp;
319 335 if ((mp->b_wptr - mp->b_rptr) < sizeof (*tbr)) {
320 336 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE,
321 337 "icmp_bind: bad req, len %u",
322 338 (uint_t)(mp->b_wptr - mp->b_rptr));
323 339 icmp_err_ack(q, mp, TPROTO, 0);
324 340 return;
325 341 }
326 342
327 343 if (icmp->icmp_state != TS_UNBND) {
328 344 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE,
329 345 "icmp_bind: bad state, %u", icmp->icmp_state);
330 346 icmp_err_ack(q, mp, TOUTSTATE, 0);
331 347 return;
332 348 }
333 349
334 350 /*
335 351 * Reallocate the message to make sure we have enough room for an
336 352 * address.
337 353 */
338 354 mp1 = reallocb(mp, sizeof (struct T_bind_ack) + sizeof (sin6_t), 1);
339 355 if (mp1 == NULL) {
340 356 icmp_err_ack(q, mp, TSYSERR, ENOMEM);
341 357 return;
342 358 }
343 359 mp = mp1;
344 360
345 361 /* Reset the message type in preparation for shipping it back. */
346 362 DB_TYPE(mp) = M_PCPROTO;
347 363 tbr = (struct T_bind_req *)mp->b_rptr;
348 364 len = tbr->ADDR_length;
349 365 switch (len) {
350 366 case 0: /* request for a generic port */
351 367 tbr->ADDR_offset = sizeof (struct T_bind_req);
352 368 if (connp->conn_family == AF_INET) {
353 369 tbr->ADDR_length = sizeof (sin_t);
354 370 sin = (sin_t *)&tbr[1];
355 371 *sin = sin_null;
356 372 sin->sin_family = AF_INET;
357 373 mp->b_wptr = (uchar_t *)&sin[1];
358 374 sa = (struct sockaddr *)sin;
359 375 len = sizeof (sin_t);
360 376 } else {
361 377 ASSERT(connp->conn_family == AF_INET6);
362 378 tbr->ADDR_length = sizeof (sin6_t);
363 379 sin6 = (sin6_t *)&tbr[1];
364 380 *sin6 = sin6_null;
365 381 sin6->sin6_family = AF_INET6;
366 382 mp->b_wptr = (uchar_t *)&sin6[1];
367 383 sa = (struct sockaddr *)sin6;
368 384 len = sizeof (sin6_t);
369 385 }
370 386 break;
371 387
372 388 case sizeof (sin_t): /* Complete IPv4 address */
373 389 sa = (struct sockaddr *)mi_offset_param(mp, tbr->ADDR_offset,
374 390 sizeof (sin_t));
375 391 break;
376 392
377 393 case sizeof (sin6_t): /* Complete IPv6 address */
378 394 sa = (struct sockaddr *)mi_offset_param(mp,
379 395 tbr->ADDR_offset, sizeof (sin6_t));
380 396 break;
381 397
382 398 default:
383 399 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE,
384 400 "icmp_bind: bad ADDR_length %u", tbr->ADDR_length);
385 401 icmp_err_ack(q, mp, TBADADDR, 0);
386 402 return;
387 403 }
388 404
389 405 error = rawip_do_bind(connp, sa, len);
390 406 if (error != 0) {
391 407 if (error > 0) {
392 408 icmp_err_ack(q, mp, TSYSERR, error);
393 409 } else {
394 410 icmp_err_ack(q, mp, -error, 0);
395 411 }
396 412 } else {
397 413 tbr->PRIM_type = T_BIND_ACK;
398 414 qreply(q, mp);
399 415 }
400 416 }
401 417
402 418 static int
403 419 rawip_do_bind(conn_t *connp, struct sockaddr *sa, socklen_t len)
404 420 {
405 421 sin_t *sin;
406 422 sin6_t *sin6;
407 423 icmp_t *icmp = connp->conn_icmp;
408 424 int error = 0;
409 425 ip_laddr_t laddr_type = IPVL_UNICAST_UP; /* INADDR_ANY */
410 426 in_port_t lport; /* Network byte order */
411 427 ipaddr_t v4src; /* Set if AF_INET */
412 428 in6_addr_t v6src;
413 429 uint_t scopeid = 0;
414 430 zoneid_t zoneid = IPCL_ZONEID(connp);
415 431 ip_stack_t *ipst = connp->conn_netstack->netstack_ip;
416 432
417 433 if (sa == NULL || !OK_32PTR((char *)sa)) {
418 434 return (EINVAL);
419 435 }
420 436
421 437 switch (len) {
422 438 case sizeof (sin_t): /* Complete IPv4 address */
423 439 sin = (sin_t *)sa;
424 440 if (sin->sin_family != AF_INET ||
425 441 connp->conn_family != AF_INET) {
426 442 /* TSYSERR, EAFNOSUPPORT */
427 443 return (EAFNOSUPPORT);
428 444 }
429 445 v4src = sin->sin_addr.s_addr;
430 446 IN6_IPADDR_TO_V4MAPPED(v4src, &v6src);
431 447 if (v4src != INADDR_ANY) {
432 448 laddr_type = ip_laddr_verify_v4(v4src, zoneid, ipst,
433 449 B_TRUE);
434 450 }
435 451 lport = sin->sin_port;
436 452 break;
437 453 case sizeof (sin6_t): /* Complete IPv6 address */
438 454 sin6 = (sin6_t *)sa;
439 455 if (sin6->sin6_family != AF_INET6 ||
440 456 connp->conn_family != AF_INET6) {
441 457 /* TSYSERR, EAFNOSUPPORT */
442 458 return (EAFNOSUPPORT);
443 459 }
444 460 /* No support for mapped addresses on raw sockets */
445 461 if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
446 462 /* TSYSERR, EADDRNOTAVAIL */
447 463 return (EADDRNOTAVAIL);
448 464 }
449 465 v6src = sin6->sin6_addr;
450 466 if (!IN6_IS_ADDR_UNSPECIFIED(&v6src)) {
451 467 if (IN6_IS_ADDR_LINKSCOPE(&v6src))
452 468 scopeid = sin6->sin6_scope_id;
453 469 laddr_type = ip_laddr_verify_v6(&v6src, zoneid, ipst,
454 470 B_TRUE, scopeid);
455 471 }
456 472 lport = sin6->sin6_port;
457 473 break;
458 474
459 475 default:
460 476 /* TBADADDR */
461 477 return (EADDRNOTAVAIL);
462 478 }
463 479
464 480 /* Is the local address a valid unicast, multicast, or broadcast? */
465 481 if (laddr_type == IPVL_BAD)
466 482 return (EADDRNOTAVAIL);
467 483
468 484 /*
469 485 * The state must be TS_UNBND.
470 486 */
471 487 mutex_enter(&connp->conn_lock);
472 488 if (icmp->icmp_state != TS_UNBND) {
473 489 mutex_exit(&connp->conn_lock);
474 490 return (-TOUTSTATE);
475 491 }
476 492
477 493 /*
478 494 * Copy the source address into our icmp structure. This address
479 495 * may still be zero; if so, ip will fill in the correct address
480 496 * each time an outbound packet is passed to it.
481 497 * If we are binding to a broadcast or multicast address then
482 498 * we just set the conn_bound_addr since we don't want to use
483 499 * that as the source address when sending.
484 500 */
485 501 connp->conn_bound_addr_v6 = v6src;
486 502 connp->conn_laddr_v6 = v6src;
487 503 if (scopeid != 0) {
488 504 connp->conn_ixa->ixa_flags |= IXAF_SCOPEID_SET;
489 505 connp->conn_ixa->ixa_scopeid = scopeid;
490 506 connp->conn_incoming_ifindex = scopeid;
491 507 } else {
492 508 connp->conn_ixa->ixa_flags &= ~IXAF_SCOPEID_SET;
493 509 connp->conn_incoming_ifindex = connp->conn_bound_if;
494 510 }
495 511
496 512 switch (laddr_type) {
497 513 case IPVL_UNICAST_UP:
498 514 case IPVL_UNICAST_DOWN:
499 515 connp->conn_saddr_v6 = v6src;
500 516 connp->conn_mcbc_bind = B_FALSE;
501 517 break;
502 518 case IPVL_MCAST:
503 519 case IPVL_BCAST:
504 520 /* ip_set_destination will pick a source address later */
505 521 connp->conn_saddr_v6 = ipv6_all_zeros;
506 522 connp->conn_mcbc_bind = B_TRUE;
507 523 break;
508 524 }
509 525
510 526 /* Any errors after this point should use late_error */
511 527
512 528 /*
513 529 * Use sin_port/sin6_port since applications like psh use SOCK_RAW
514 530 * with IPPROTO_TCP.
515 531 */
516 532 connp->conn_lport = lport;
517 533 connp->conn_fport = 0;
518 534
519 535 if (connp->conn_family == AF_INET) {
520 536 ASSERT(connp->conn_ipversion == IPV4_VERSION);
521 537 } else {
522 538 ASSERT(connp->conn_ipversion == IPV6_VERSION);
523 539 }
524 540
525 541 icmp->icmp_state = TS_IDLE;
526 542
527 543 /*
528 544 * We create an initial header template here to make a subsequent
529 545 * sendto have a starting point. Since conn_last_dst is zero the
530 546 * first sendto will always follow the 'dst changed' code path.
531 547 * Note that we defer massaging options and the related checksum
532 548 * adjustment until we have a destination address.
533 549 */
534 550 error = icmp_build_hdr_template(connp, &connp->conn_saddr_v6,
535 551 &connp->conn_faddr_v6, connp->conn_flowinfo);
536 552 if (error != 0) {
537 553 mutex_exit(&connp->conn_lock);
538 554 goto late_error;
539 555 }
540 556 /* Just in case */
541 557 connp->conn_faddr_v6 = ipv6_all_zeros;
542 558 connp->conn_v6lastdst = ipv6_all_zeros;
543 559 mutex_exit(&connp->conn_lock);
544 560
545 561 error = ip_laddr_fanout_insert(connp);
546 562 if (error != 0)
547 563 goto late_error;
548 564
549 565 /* Bind succeeded */
550 566 return (0);
551 567
552 568 late_error:
553 569 mutex_enter(&connp->conn_lock);
554 570 connp->conn_saddr_v6 = ipv6_all_zeros;
555 571 connp->conn_bound_addr_v6 = ipv6_all_zeros;
556 572 connp->conn_laddr_v6 = ipv6_all_zeros;
557 573 if (scopeid != 0) {
558 574 connp->conn_ixa->ixa_flags &= ~IXAF_SCOPEID_SET;
559 575 connp->conn_incoming_ifindex = connp->conn_bound_if;
560 576 }
561 577 icmp->icmp_state = TS_UNBND;
562 578 connp->conn_v6lastdst = ipv6_all_zeros;
563 579 connp->conn_lport = 0;
564 580
565 581 /* Restore the header that was built above - different source address */
566 582 (void) icmp_build_hdr_template(connp, &connp->conn_saddr_v6,
567 583 &connp->conn_faddr_v6, connp->conn_flowinfo);
568 584 mutex_exit(&connp->conn_lock);
569 585 return (error);
570 586 }
571 587
572 588 /*
573 589 * Tell IP to just bind to the protocol.
574 590 */
575 591 static void
576 592 icmp_bind_proto(icmp_t *icmp)
577 593 {
578 594 conn_t *connp = icmp->icmp_connp;
579 595
580 596 mutex_enter(&connp->conn_lock);
581 597 connp->conn_saddr_v6 = ipv6_all_zeros;
582 598 connp->conn_laddr_v6 = ipv6_all_zeros;
583 599 connp->conn_faddr_v6 = ipv6_all_zeros;
584 600 connp->conn_v6lastdst = ipv6_all_zeros;
585 601 mutex_exit(&connp->conn_lock);
586 602
587 603 (void) ip_laddr_fanout_insert(connp);
588 604 }
589 605
590 606 /*
591 607 * This routine handles each T_CONN_REQ message passed to icmp. It
592 608 * associates a default destination address with the stream.
593 609 *
594 610 * After various error checks are completed, icmp_connect() lays
595 611 * the target address and port into the composite header template.
596 612 * Then we ask IP for information, including a source address if we didn't
597 613 * already have one. Finally we send up the T_OK_ACK reply message.
598 614 */
599 615 static void
600 616 icmp_tpi_connect(queue_t *q, mblk_t *mp)
601 617 {
602 618 conn_t *connp = Q_TO_CONN(q);
603 619 struct T_conn_req *tcr;
604 620 struct sockaddr *sa;
605 621 socklen_t len;
606 622 int error;
607 623 cred_t *cr;
608 624 pid_t pid;
609 625 /*
610 626 * All Solaris components should pass a db_credp
611 627 * for this TPI message, hence we ASSERT.
612 628 * But in case there is some other M_PROTO that looks
613 629 * like a TPI message sent by some other kernel
614 630 * component, we check and return an error.
615 631 */
616 632 cr = msg_getcred(mp, &pid);
617 633 ASSERT(cr != NULL);
618 634 if (cr == NULL) {
619 635 icmp_err_ack(q, mp, TSYSERR, EINVAL);
620 636 return;
621 637 }
622 638
623 639 tcr = (struct T_conn_req *)mp->b_rptr;
624 640 /* Sanity checks */
625 641 if ((mp->b_wptr - mp->b_rptr) < sizeof (struct T_conn_req)) {
626 642 icmp_err_ack(q, mp, TPROTO, 0);
627 643 return;
628 644 }
629 645
630 646 if (tcr->OPT_length != 0) {
631 647 icmp_err_ack(q, mp, TBADOPT, 0);
632 648 return;
633 649 }
634 650
635 651 len = tcr->DEST_length;
636 652
637 653 switch (len) {
638 654 default:
639 655 icmp_err_ack(q, mp, TBADADDR, 0);
640 656 return;
641 657 case sizeof (sin_t):
642 658 sa = (struct sockaddr *)mi_offset_param(mp, tcr->DEST_offset,
643 659 sizeof (sin_t));
644 660 break;
645 661 case sizeof (sin6_t):
646 662 sa = (struct sockaddr *)mi_offset_param(mp,
647 663 tcr->DEST_offset, sizeof (sin6_t));
648 664 break;
649 665 }
650 666
651 667 error = proto_verify_ip_addr(connp->conn_family, sa, len);
652 668 if (error != 0) {
653 669 icmp_err_ack(q, mp, TSYSERR, error);
654 670 return;
655 671 }
656 672
657 673 error = rawip_do_connect(connp, sa, len, cr, pid);
658 674 if (error != 0) {
659 675 if (error < 0) {
660 676 icmp_err_ack(q, mp, -error, 0);
661 677 } else {
662 678 icmp_err_ack(q, mp, 0, error);
663 679 }
664 680 } else {
665 681 mblk_t *mp1;
666 682
667 683 /*
668 684 * We have to send a connection confirmation to
669 685 * keep TLI happy.
670 686 */
671 687 if (connp->conn_family == AF_INET) {
672 688 mp1 = mi_tpi_conn_con(NULL, (char *)sa,
673 689 sizeof (sin_t), NULL, 0);
674 690 } else {
675 691 ASSERT(connp->conn_family == AF_INET6);
676 692 mp1 = mi_tpi_conn_con(NULL, (char *)sa,
677 693 sizeof (sin6_t), NULL, 0);
678 694 }
679 695 if (mp1 == NULL) {
680 696 icmp_err_ack(q, mp, TSYSERR, ENOMEM);
681 697 return;
682 698 }
683 699
684 700 /*
685 701 * Send ok_ack for T_CONN_REQ
686 702 */
687 703 mp = mi_tpi_ok_ack_alloc(mp);
688 704 if (mp == NULL) {
689 705 /* Unable to reuse the T_CONN_REQ for the ack. */
690 706 icmp_err_ack_prim(q, mp1, T_CONN_REQ, TSYSERR, ENOMEM);
691 707 return;
692 708 }
693 709 putnext(connp->conn_rq, mp);
694 710 putnext(connp->conn_rq, mp1);
695 711 }
696 712 }
697 713
698 714 static int
699 715 rawip_do_connect(conn_t *connp, const struct sockaddr *sa, socklen_t len,
700 716 cred_t *cr, pid_t pid)
701 717 {
702 718 icmp_t *icmp;
703 719 sin_t *sin;
704 720 sin6_t *sin6;
705 721 int error;
706 722 uint16_t dstport;
707 723 ipaddr_t v4dst;
708 724 in6_addr_t v6dst;
709 725 uint32_t flowinfo;
710 726 ip_xmit_attr_t *ixa;
711 727 ip_xmit_attr_t *oldixa;
712 728 uint_t scopeid = 0;
713 729 uint_t srcid = 0;
714 730 in6_addr_t v6src = connp->conn_saddr_v6;
715 731
716 732 icmp = connp->conn_icmp;
717 733
718 734 if (sa == NULL || !OK_32PTR((char *)sa)) {
719 735 return (EINVAL);
720 736 }
721 737
722 738 ASSERT(sa != NULL && len != 0);
723 739
724 740 /*
725 741 * Determine packet type based on type of address passed in
726 742 * the request should contain an IPv4 or IPv6 address.
727 743 * Make sure that address family matches the type of
728 744 * family of the address passed down.
729 745 */
730 746 switch (len) {
731 747 case sizeof (sin_t):
732 748 sin = (sin_t *)sa;
733 749
734 750 v4dst = sin->sin_addr.s_addr;
735 751 dstport = sin->sin_port;
736 752 IN6_IPADDR_TO_V4MAPPED(v4dst, &v6dst);
737 753 ASSERT(connp->conn_ipversion == IPV4_VERSION);
738 754 break;
739 755
740 756 case sizeof (sin6_t):
741 757 sin6 = (sin6_t *)sa;
742 758
743 759 /* No support for mapped addresses on raw sockets */
744 760 if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
745 761 return (EADDRNOTAVAIL);
746 762 }
747 763 v6dst = sin6->sin6_addr;
748 764 dstport = sin6->sin6_port;
749 765 ASSERT(connp->conn_ipversion == IPV6_VERSION);
750 766 flowinfo = sin6->sin6_flowinfo;
751 767 if (IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr))
752 768 scopeid = sin6->sin6_scope_id;
753 769 srcid = sin6->__sin6_src_id;
754 770 if (srcid != 0 && IN6_IS_ADDR_UNSPECIFIED(&v6src)) {
755 771 ip_srcid_find_id(srcid, &v6src, IPCL_ZONEID(connp),
756 772 connp->conn_netstack);
757 773 }
758 774 break;
759 775 }
760 776
761 777 /*
762 778 * If there is a different thread using conn_ixa then we get a new
763 779 * copy and cut the old one loose from conn_ixa. Otherwise we use
764 780 * conn_ixa and prevent any other thread from using/changing it.
765 781 * Once connect() is done other threads can use conn_ixa since the
766 782 * refcnt will be back at one.
767 783 * We defer updating conn_ixa until later to handle any concurrent
768 784 * conn_ixa_cleanup thread.
769 785 */
770 786 ixa = conn_get_ixa(connp, B_FALSE);
771 787 if (ixa == NULL)
772 788 return (ENOMEM);
773 789
774 790 mutex_enter(&connp->conn_lock);
775 791 /*
776 792 * This icmp_t must have bound already before doing a connect.
777 793 * Reject if a connect is in progress (we drop conn_lock during
778 794 * rawip_do_connect).
779 795 */
780 796 if (icmp->icmp_state == TS_UNBND || icmp->icmp_state == TS_WCON_CREQ) {
781 797 mutex_exit(&connp->conn_lock);
782 798 ixa_refrele(ixa);
783 799 return (-TOUTSTATE);
784 800 }
785 801
786 802 if (icmp->icmp_state == TS_DATA_XFER) {
787 803 /* Already connected - clear out state */
788 804 if (connp->conn_mcbc_bind)
789 805 connp->conn_saddr_v6 = ipv6_all_zeros;
790 806 else
791 807 connp->conn_saddr_v6 = connp->conn_bound_addr_v6;
792 808 connp->conn_laddr_v6 = connp->conn_bound_addr_v6;
793 809 connp->conn_faddr_v6 = ipv6_all_zeros;
794 810 icmp->icmp_state = TS_IDLE;
795 811 }
796 812
797 813 /*
798 814 * Use sin_port/sin6_port since applications like psh use SOCK_RAW
799 815 * with IPPROTO_TCP.
800 816 */
801 817 connp->conn_fport = dstport;
802 818 if (connp->conn_ipversion == IPV4_VERSION) {
803 819 /*
804 820 * Interpret a zero destination to mean loopback.
805 821 * Update the T_CONN_REQ (sin/sin6) since it is used to
806 822 * generate the T_CONN_CON.
807 823 */
808 824 if (v4dst == INADDR_ANY) {
809 825 v4dst = htonl(INADDR_LOOPBACK);
810 826 IN6_IPADDR_TO_V4MAPPED(v4dst, &v6dst);
811 827 ASSERT(connp->conn_family == AF_INET);
812 828 sin->sin_addr.s_addr = v4dst;
813 829 }
814 830 connp->conn_faddr_v6 = v6dst;
815 831 connp->conn_flowinfo = 0;
816 832 } else {
817 833 ASSERT(connp->conn_ipversion == IPV6_VERSION);
818 834 /*
819 835 * Interpret a zero destination to mean loopback.
820 836 * Update the T_CONN_REQ (sin/sin6) since it is used to
821 837 * generate the T_CONN_CON.
822 838 */
823 839 if (IN6_IS_ADDR_UNSPECIFIED(&v6dst)) {
824 840 v6dst = ipv6_loopback;
825 841 sin6->sin6_addr = v6dst;
826 842 }
827 843 connp->conn_faddr_v6 = v6dst;
828 844 connp->conn_flowinfo = flowinfo;
829 845 }
830 846
831 847 /*
832 848 * We update our cred/cpid based on the caller of connect
833 849 */
834 850 if (connp->conn_cred != cr) {
835 851 crhold(cr);
836 852 crfree(connp->conn_cred);
837 853 connp->conn_cred = cr;
838 854 }
839 855 connp->conn_cpid = pid;
840 856 ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED));
841 857 ixa->ixa_cred = cr;
842 858 ixa->ixa_cpid = pid;
843 859 if (is_system_labeled()) {
844 860 /* We need to restart with a label based on the cred */
845 861 ip_xmit_attr_restore_tsl(ixa, ixa->ixa_cred);
846 862 }
847 863
848 864 if (scopeid != 0) {
849 865 ixa->ixa_flags |= IXAF_SCOPEID_SET;
850 866 ixa->ixa_scopeid = scopeid;
851 867 connp->conn_incoming_ifindex = scopeid;
852 868 } else {
853 869 ixa->ixa_flags &= ~IXAF_SCOPEID_SET;
854 870 connp->conn_incoming_ifindex = connp->conn_bound_if;
855 871 }
856 872
857 873 /*
858 874 * conn_connect will drop conn_lock and reacquire it.
859 875 * To prevent a send* from messing with this icmp_t while the lock
860 876 * is dropped we set icmp_state and clear conn_v6lastdst.
861 877 * That will make all send* fail with EISCONN.
862 878 */
863 879 connp->conn_v6lastdst = ipv6_all_zeros;
864 880 icmp->icmp_state = TS_WCON_CREQ;
865 881
866 882 error = conn_connect(connp, NULL, IPDF_ALLOW_MCBC);
867 883 mutex_exit(&connp->conn_lock);
868 884 if (error != 0)
869 885 goto connect_failed;
870 886
871 887 /*
872 888 * The addresses have been verified. Time to insert in
873 889 * the correct fanout list.
874 890 */
875 891 error = ipcl_conn_insert(connp);
876 892 if (error != 0)
877 893 goto connect_failed;
878 894
879 895 mutex_enter(&connp->conn_lock);
880 896 error = icmp_build_hdr_template(connp, &connp->conn_saddr_v6,
881 897 &connp->conn_faddr_v6, connp->conn_flowinfo);
882 898 if (error != 0) {
883 899 mutex_exit(&connp->conn_lock);
884 900 goto connect_failed;
885 901 }
886 902
887 903 icmp->icmp_state = TS_DATA_XFER;
888 904 /* Record this as the "last" send even though we haven't sent any */
889 905 connp->conn_v6lastdst = connp->conn_faddr_v6;
890 906 connp->conn_lastipversion = connp->conn_ipversion;
891 907 connp->conn_lastdstport = connp->conn_fport;
892 908 connp->conn_lastflowinfo = connp->conn_flowinfo;
893 909 connp->conn_lastscopeid = scopeid;
894 910 connp->conn_lastsrcid = srcid;
895 911 /* Also remember a source to use together with lastdst */
896 912 connp->conn_v6lastsrc = v6src;
897 913
898 914 oldixa = conn_replace_ixa(connp, ixa);
899 915 mutex_exit(&connp->conn_lock);
900 916 ixa_refrele(oldixa);
901 917
902 918 ixa_refrele(ixa);
903 919 return (0);
904 920
905 921 connect_failed:
906 922 if (ixa != NULL)
907 923 ixa_refrele(ixa);
908 924 mutex_enter(&connp->conn_lock);
909 925 icmp->icmp_state = TS_IDLE;
910 926 /* In case the source address was set above */
911 927 if (connp->conn_mcbc_bind)
912 928 connp->conn_saddr_v6 = ipv6_all_zeros;
913 929 else
914 930 connp->conn_saddr_v6 = connp->conn_bound_addr_v6;
915 931 connp->conn_laddr_v6 = connp->conn_bound_addr_v6;
916 932 connp->conn_faddr_v6 = ipv6_all_zeros;
917 933 connp->conn_v6lastdst = ipv6_all_zeros;
918 934 connp->conn_flowinfo = 0;
919 935
920 936 (void) icmp_build_hdr_template(connp, &connp->conn_saddr_v6,
921 937 &connp->conn_faddr_v6, connp->conn_flowinfo);
922 938 mutex_exit(&connp->conn_lock);
923 939 return (error);
924 940 }
925 941
926 942 static void
927 943 rawip_do_close(conn_t *connp)
928 944 {
929 945 ASSERT(connp != NULL && IPCL_IS_RAWIP(connp));
930 946
931 947 ip_quiesce_conn(connp);
932 948
933 949 if (!IPCL_IS_NONSTR(connp)) {
934 950 qprocsoff(connp->conn_rq);
935 951 }
936 952
937 953 icmp_close_free(connp);
938 954
939 955 /*
940 956 * Now we are truly single threaded on this stream, and can
941 957 * delete the things hanging off the connp, and finally the connp.
942 958 * We removed this connp from the fanout list, it cannot be
943 959 * accessed thru the fanouts, and we already waited for the
944 960 * conn_ref to drop to 0. We are already in close, so
945 961 * there cannot be any other thread from the top. qprocsoff
946 962 * has completed, and service has completed or won't run in
947 963 * future.
948 964 */
949 965 ASSERT(connp->conn_ref == 1);
950 966
951 967 if (!IPCL_IS_NONSTR(connp)) {
952 968 inet_minor_free(connp->conn_minor_arena, connp->conn_dev);
953 969 } else {
954 970 ip_free_helper_stream(connp);
955 971 }
956 972
957 973 connp->conn_ref--;
958 974 ipcl_conn_destroy(connp);
959 975 }
960 976
961 977 static int
962 978 icmp_close(queue_t *q, int flags)
963 979 {
964 980 conn_t *connp;
965 981
966 982 if (flags & SO_FALLBACK) {
967 983 /*
968 984 * stream is being closed while in fallback
969 985 * simply free the resources that were allocated
970 986 */
971 987 inet_minor_free(WR(q)->q_ptr, (dev_t)(RD(q)->q_ptr));
972 988 qprocsoff(q);
973 989 goto done;
974 990 }
975 991
976 992 connp = Q_TO_CONN(q);
977 993 (void) rawip_do_close(connp);
978 994 done:
979 995 q->q_ptr = WR(q)->q_ptr = NULL;
980 996 return (0);
981 997 }
982 998
983 999 static void
984 1000 icmp_close_free(conn_t *connp)
985 1001 {
986 1002 icmp_t *icmp = connp->conn_icmp;
987 1003
988 1004 if (icmp->icmp_filter != NULL) {
989 1005 kmem_free(icmp->icmp_filter, sizeof (icmp6_filter_t));
990 1006 icmp->icmp_filter = NULL;
991 1007 }
992 1008
993 1009 /*
994 1010 * Clear any fields which the kmem_cache constructor clears.
995 1011 * Only icmp_connp needs to be preserved.
996 1012 * TBD: We should make this more efficient to avoid clearing
997 1013 * everything.
998 1014 */
999 1015 ASSERT(icmp->icmp_connp == connp);
1000 1016 bzero(icmp, sizeof (icmp_t));
1001 1017 icmp->icmp_connp = connp;
1002 1018 }
1003 1019
1004 1020 /*
1005 1021 * This routine handles each T_DISCON_REQ message passed to icmp
1006 1022 * as an indicating that ICMP is no longer connected. This results
1007 1023 * in telling IP to restore the binding to just the local address.
1008 1024 */
1009 1025 static int
1010 1026 icmp_do_disconnect(conn_t *connp)
1011 1027 {
1012 1028 icmp_t *icmp = connp->conn_icmp;
1013 1029 int error;
1014 1030
1015 1031 mutex_enter(&connp->conn_lock);
1016 1032 if (icmp->icmp_state != TS_DATA_XFER) {
1017 1033 mutex_exit(&connp->conn_lock);
1018 1034 return (-TOUTSTATE);
1019 1035 }
1020 1036 if (connp->conn_mcbc_bind)
1021 1037 connp->conn_saddr_v6 = ipv6_all_zeros;
1022 1038 else
1023 1039 connp->conn_saddr_v6 = connp->conn_bound_addr_v6;
1024 1040 connp->conn_laddr_v6 = connp->conn_bound_addr_v6;
1025 1041 connp->conn_faddr_v6 = ipv6_all_zeros;
1026 1042 icmp->icmp_state = TS_IDLE;
1027 1043
1028 1044 connp->conn_v6lastdst = ipv6_all_zeros;
1029 1045 error = icmp_build_hdr_template(connp, &connp->conn_saddr_v6,
1030 1046 &connp->conn_faddr_v6, connp->conn_flowinfo);
1031 1047 mutex_exit(&connp->conn_lock);
1032 1048 if (error != 0)
1033 1049 return (error);
1034 1050
1035 1051 /*
1036 1052 * Tell IP to remove the full binding and revert
1037 1053 * to the local address binding.
1038 1054 */
1039 1055 return (ip_laddr_fanout_insert(connp));
1040 1056 }
1041 1057
1042 1058 static void
1043 1059 icmp_tpi_disconnect(queue_t *q, mblk_t *mp)
1044 1060 {
1045 1061 conn_t *connp = Q_TO_CONN(q);
1046 1062 int error;
1047 1063
1048 1064 /*
1049 1065 * Allocate the largest primitive we need to send back
1050 1066 * T_error_ack is > than T_ok_ack
1051 1067 */
1052 1068 mp = reallocb(mp, sizeof (struct T_error_ack), 1);
1053 1069 if (mp == NULL) {
1054 1070 /* Unable to reuse the T_DISCON_REQ for the ack. */
1055 1071 icmp_err_ack_prim(q, mp, T_DISCON_REQ, TSYSERR, ENOMEM);
1056 1072 return;
1057 1073 }
1058 1074
1059 1075 error = icmp_do_disconnect(connp);
1060 1076
1061 1077 if (error != 0) {
1062 1078 if (error > 0) {
1063 1079 icmp_err_ack(q, mp, 0, error);
1064 1080 } else {
1065 1081 icmp_err_ack(q, mp, -error, 0);
1066 1082 }
1067 1083 } else {
1068 1084 mp = mi_tpi_ok_ack_alloc(mp);
1069 1085 ASSERT(mp != NULL);
1070 1086 qreply(q, mp);
1071 1087 }
1072 1088 }
1073 1089
1074 1090 static int
1075 1091 icmp_disconnect(conn_t *connp)
1076 1092 {
1077 1093 int error;
1078 1094
1079 1095 connp->conn_dgram_errind = B_FALSE;
1080 1096
1081 1097 error = icmp_do_disconnect(connp);
1082 1098
1083 1099 if (error < 0)
1084 1100 error = proto_tlitosyserr(-error);
1085 1101 return (error);
1086 1102 }
1087 1103
1088 1104 /* This routine creates a T_ERROR_ACK message and passes it upstream. */
1089 1105 static void
1090 1106 icmp_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error, int sys_error)
1091 1107 {
1092 1108 if ((mp = mi_tpi_err_ack_alloc(mp, t_error, sys_error)) != NULL)
1093 1109 qreply(q, mp);
1094 1110 }
1095 1111
1096 1112 /* Shorthand to generate and send TPI error acks to our client */
1097 1113 static void
1098 1114 icmp_err_ack_prim(queue_t *q, mblk_t *mp, t_scalar_t primitive,
1099 1115 t_scalar_t t_error, int sys_error)
1100 1116 {
1101 1117 struct T_error_ack *teackp;
1102 1118
1103 1119 if ((mp = tpi_ack_alloc(mp, sizeof (struct T_error_ack),
1104 1120 M_PCPROTO, T_ERROR_ACK)) != NULL) {
1105 1121 teackp = (struct T_error_ack *)mp->b_rptr;
1106 1122 teackp->ERROR_prim = primitive;
1107 1123 teackp->TLI_error = t_error;
1108 1124 teackp->UNIX_error = sys_error;
1109 1125 qreply(q, mp);
1110 1126 }
1111 1127 }
1112 1128
1113 1129 /*
1114 1130 * icmp_icmp_input is called as conn_recvicmp to process ICMP messages.
1115 1131 * Generates the appropriate T_UDERROR_IND for permanent (non-transient) errors.
1116 1132 * Assumes that IP has pulled up everything up to and including the ICMP header.
1117 1133 */
1118 1134 /* ARGSUSED2 */
1119 1135 static void
1120 1136 icmp_icmp_input(void *arg1, mblk_t *mp, void *arg2, ip_recv_attr_t *ira)
1121 1137 {
1122 1138 conn_t *connp = (conn_t *)arg1;
1123 1139 icmp_t *icmp = connp->conn_icmp;
1124 1140 icmph_t *icmph;
1125 1141 ipha_t *ipha;
1126 1142 int iph_hdr_length;
1127 1143 sin_t sin;
1128 1144 mblk_t *mp1;
1129 1145 int error = 0;
1130 1146
1131 1147 ipha = (ipha_t *)mp->b_rptr;
1132 1148
1133 1149 ASSERT(OK_32PTR(mp->b_rptr));
1134 1150
1135 1151 if (IPH_HDR_VERSION(ipha) != IPV4_VERSION) {
1136 1152 ASSERT(IPH_HDR_VERSION(ipha) == IPV6_VERSION);
1137 1153 icmp_icmp_error_ipv6(connp, mp, ira);
1138 1154 return;
1139 1155 }
1140 1156 ASSERT(IPH_HDR_VERSION(ipha) == IPV4_VERSION);
1141 1157
1142 1158 /* Skip past the outer IP and ICMP headers */
1143 1159 ASSERT(IPH_HDR_LENGTH(ipha) == ira->ira_ip_hdr_length);
1144 1160 iph_hdr_length = ira->ira_ip_hdr_length;
1145 1161 icmph = (icmph_t *)&mp->b_rptr[iph_hdr_length];
1146 1162 ipha = (ipha_t *)&icmph[1]; /* Inner IP header */
1147 1163
1148 1164 iph_hdr_length = IPH_HDR_LENGTH(ipha);
1149 1165
1150 1166 switch (icmph->icmph_type) {
1151 1167 case ICMP_DEST_UNREACHABLE:
1152 1168 switch (icmph->icmph_code) {
1153 1169 case ICMP_FRAGMENTATION_NEEDED: {
1154 1170 ipha_t *ipha;
1155 1171 ip_xmit_attr_t *ixa;
1156 1172 /*
1157 1173 * IP has already adjusted the path MTU.
1158 1174 * But we need to adjust DF for IPv4.
1159 1175 */
1160 1176 if (connp->conn_ipversion != IPV4_VERSION)
1161 1177 break;
1162 1178
1163 1179 ixa = conn_get_ixa(connp, B_FALSE);
1164 1180 if (ixa == NULL || ixa->ixa_ire == NULL) {
1165 1181 /*
1166 1182 * Some other thread holds conn_ixa. We will
1167 1183 * redo this on the next ICMP too big.
1168 1184 */
1169 1185 if (ixa != NULL)
1170 1186 ixa_refrele(ixa);
1171 1187 break;
1172 1188 }
1173 1189 (void) ip_get_pmtu(ixa);
1174 1190
1175 1191 mutex_enter(&connp->conn_lock);
1176 1192 ipha = (ipha_t *)connp->conn_ht_iphc;
1177 1193 if (ixa->ixa_flags & IXAF_PMTU_IPV4_DF) {
1178 1194 ipha->ipha_fragment_offset_and_flags |=
1179 1195 IPH_DF_HTONS;
1180 1196 } else {
1181 1197 ipha->ipha_fragment_offset_and_flags &=
1182 1198 ~IPH_DF_HTONS;
1183 1199 }
1184 1200 mutex_exit(&connp->conn_lock);
1185 1201 ixa_refrele(ixa);
1186 1202 break;
1187 1203 }
1188 1204 case ICMP_PORT_UNREACHABLE:
1189 1205 case ICMP_PROTOCOL_UNREACHABLE:
1190 1206 error = ECONNREFUSED;
1191 1207 break;
1192 1208 default:
1193 1209 /* Transient errors */
1194 1210 break;
1195 1211 }
1196 1212 break;
1197 1213 default:
1198 1214 /* Transient errors */
1199 1215 break;
1200 1216 }
1201 1217 if (error == 0) {
1202 1218 freemsg(mp);
1203 1219 return;
1204 1220 }
1205 1221
1206 1222 /*
1207 1223 * Deliver T_UDERROR_IND when the application has asked for it.
1208 1224 * The socket layer enables this automatically when connected.
1209 1225 */
1210 1226 if (!connp->conn_dgram_errind) {
1211 1227 freemsg(mp);
1212 1228 return;
1213 1229 }
1214 1230
1215 1231 sin = sin_null;
1216 1232 sin.sin_family = AF_INET;
1217 1233 sin.sin_addr.s_addr = ipha->ipha_dst;
1218 1234
1219 1235 if (IPCL_IS_NONSTR(connp)) {
1220 1236 mutex_enter(&connp->conn_lock);
1221 1237 if (icmp->icmp_state == TS_DATA_XFER) {
1222 1238 if (sin.sin_addr.s_addr == connp->conn_faddr_v4) {
1223 1239 mutex_exit(&connp->conn_lock);
1224 1240 (*connp->conn_upcalls->su_set_error)
1225 1241 (connp->conn_upper_handle, error);
1226 1242 goto done;
1227 1243 }
1228 1244 } else {
1229 1245 icmp->icmp_delayed_error = error;
1230 1246 *((sin_t *)&icmp->icmp_delayed_addr) = sin;
1231 1247 }
1232 1248 mutex_exit(&connp->conn_lock);
1233 1249 } else {
1234 1250 mp1 = mi_tpi_uderror_ind((char *)&sin, sizeof (sin_t), NULL, 0,
1235 1251 error);
1236 1252 if (mp1 != NULL)
1237 1253 putnext(connp->conn_rq, mp1);
1238 1254 }
1239 1255 done:
1240 1256 freemsg(mp);
1241 1257 }
1242 1258
1243 1259 /*
1244 1260 * icmp_icmp_error_ipv6 is called by icmp_icmp_error to process ICMP for IPv6.
1245 1261 * Generates the appropriate T_UDERROR_IND for permanent (non-transient) errors.
1246 1262 * Assumes that IP has pulled up all the extension headers as well as the
1247 1263 * ICMPv6 header.
1248 1264 */
1249 1265 static void
1250 1266 icmp_icmp_error_ipv6(conn_t *connp, mblk_t *mp, ip_recv_attr_t *ira)
1251 1267 {
1252 1268 icmp6_t *icmp6;
1253 1269 ip6_t *ip6h, *outer_ip6h;
1254 1270 uint16_t iph_hdr_length;
1255 1271 uint8_t *nexthdrp;
1256 1272 sin6_t sin6;
1257 1273 mblk_t *mp1;
1258 1274 int error = 0;
1259 1275 icmp_t *icmp = connp->conn_icmp;
1260 1276
1261 1277 outer_ip6h = (ip6_t *)mp->b_rptr;
1262 1278 #ifdef DEBUG
1263 1279 if (outer_ip6h->ip6_nxt != IPPROTO_ICMPV6)
1264 1280 iph_hdr_length = ip_hdr_length_v6(mp, outer_ip6h);
1265 1281 else
1266 1282 iph_hdr_length = IPV6_HDR_LEN;
1267 1283 ASSERT(iph_hdr_length == ira->ira_ip_hdr_length);
1268 1284 #endif
1269 1285 /* Skip past the outer IP and ICMP headers */
1270 1286 iph_hdr_length = ira->ira_ip_hdr_length;
1271 1287 icmp6 = (icmp6_t *)&mp->b_rptr[iph_hdr_length];
1272 1288
1273 1289 ip6h = (ip6_t *)&icmp6[1]; /* Inner IP header */
1274 1290 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &iph_hdr_length, &nexthdrp)) {
1275 1291 freemsg(mp);
1276 1292 return;
1277 1293 }
1278 1294
1279 1295 switch (icmp6->icmp6_type) {
1280 1296 case ICMP6_DST_UNREACH:
1281 1297 switch (icmp6->icmp6_code) {
1282 1298 case ICMP6_DST_UNREACH_NOPORT:
1283 1299 error = ECONNREFUSED;
1284 1300 break;
1285 1301 case ICMP6_DST_UNREACH_ADMIN:
1286 1302 case ICMP6_DST_UNREACH_NOROUTE:
1287 1303 case ICMP6_DST_UNREACH_BEYONDSCOPE:
1288 1304 case ICMP6_DST_UNREACH_ADDR:
1289 1305 /* Transient errors */
1290 1306 break;
1291 1307 default:
1292 1308 break;
1293 1309 }
1294 1310 break;
1295 1311 case ICMP6_PACKET_TOO_BIG: {
1296 1312 struct T_unitdata_ind *tudi;
1297 1313 struct T_opthdr *toh;
1298 1314 size_t udi_size;
1299 1315 mblk_t *newmp;
1300 1316 t_scalar_t opt_length = sizeof (struct T_opthdr) +
1301 1317 sizeof (struct ip6_mtuinfo);
1302 1318 sin6_t *sin6;
1303 1319 struct ip6_mtuinfo *mtuinfo;
1304 1320
1305 1321 /*
1306 1322 * If the application has requested to receive path mtu
1307 1323 * information, send up an empty message containing an
1308 1324 * IPV6_PATHMTU ancillary data item.
1309 1325 */
1310 1326 if (!connp->conn_ipv6_recvpathmtu)
1311 1327 break;
1312 1328
1313 1329 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin6_t) +
1314 1330 opt_length;
1315 1331 if ((newmp = allocb(udi_size, BPRI_MED)) == NULL) {
1316 1332 BUMP_MIB(&icmp->icmp_is->is_rawip_mib, rawipInErrors);
1317 1333 break;
1318 1334 }
1319 1335
1320 1336 /*
1321 1337 * newmp->b_cont is left to NULL on purpose. This is an
1322 1338 * empty message containing only ancillary data.
1323 1339 */
1324 1340 newmp->b_datap->db_type = M_PROTO;
1325 1341 tudi = (struct T_unitdata_ind *)newmp->b_rptr;
1326 1342 newmp->b_wptr = (uchar_t *)tudi + udi_size;
1327 1343 tudi->PRIM_type = T_UNITDATA_IND;
1328 1344 tudi->SRC_length = sizeof (sin6_t);
1329 1345 tudi->SRC_offset = sizeof (struct T_unitdata_ind);
1330 1346 tudi->OPT_offset = tudi->SRC_offset + sizeof (sin6_t);
1331 1347 tudi->OPT_length = opt_length;
1332 1348
1333 1349 sin6 = (sin6_t *)&tudi[1];
1334 1350 bzero(sin6, sizeof (sin6_t));
1335 1351 sin6->sin6_family = AF_INET6;
1336 1352 sin6->sin6_addr = connp->conn_faddr_v6;
1337 1353
1338 1354 toh = (struct T_opthdr *)&sin6[1];
1339 1355 toh->level = IPPROTO_IPV6;
1340 1356 toh->name = IPV6_PATHMTU;
1341 1357 toh->len = opt_length;
1342 1358 toh->status = 0;
1343 1359
1344 1360 mtuinfo = (struct ip6_mtuinfo *)&toh[1];
1345 1361 bzero(mtuinfo, sizeof (struct ip6_mtuinfo));
1346 1362 mtuinfo->ip6m_addr.sin6_family = AF_INET6;
1347 1363 mtuinfo->ip6m_addr.sin6_addr = ip6h->ip6_dst;
1348 1364 mtuinfo->ip6m_mtu = icmp6->icmp6_mtu;
1349 1365 /*
1350 1366 * We've consumed everything we need from the original
1351 1367 * message. Free it, then send our empty message.
1352 1368 */
1353 1369 freemsg(mp);
1354 1370 icmp_ulp_recv(connp, newmp, msgdsize(newmp));
1355 1371 return;
1356 1372 }
1357 1373 case ICMP6_TIME_EXCEEDED:
1358 1374 /* Transient errors */
1359 1375 break;
1360 1376 case ICMP6_PARAM_PROB:
1361 1377 /* If this corresponds to an ICMP_PROTOCOL_UNREACHABLE */
1362 1378 if (icmp6->icmp6_code == ICMP6_PARAMPROB_NEXTHEADER &&
1363 1379 (uchar_t *)ip6h + icmp6->icmp6_pptr ==
1364 1380 (uchar_t *)nexthdrp) {
1365 1381 error = ECONNREFUSED;
1366 1382 break;
1367 1383 }
1368 1384 break;
1369 1385 }
1370 1386 if (error == 0) {
1371 1387 freemsg(mp);
1372 1388 return;
1373 1389 }
1374 1390
1375 1391 /*
1376 1392 * Deliver T_UDERROR_IND when the application has asked for it.
1377 1393 * The socket layer enables this automatically when connected.
1378 1394 */
1379 1395 if (!connp->conn_dgram_errind) {
1380 1396 freemsg(mp);
1381 1397 return;
1382 1398 }
1383 1399
1384 1400 sin6 = sin6_null;
1385 1401 sin6.sin6_family = AF_INET6;
1386 1402 sin6.sin6_addr = ip6h->ip6_dst;
1387 1403 sin6.sin6_flowinfo = ip6h->ip6_vcf & ~IPV6_VERS_AND_FLOW_MASK;
1388 1404 if (IPCL_IS_NONSTR(connp)) {
1389 1405 mutex_enter(&connp->conn_lock);
1390 1406 if (icmp->icmp_state == TS_DATA_XFER) {
1391 1407 if (IN6_ARE_ADDR_EQUAL(&sin6.sin6_addr,
1392 1408 &connp->conn_faddr_v6)) {
1393 1409 mutex_exit(&connp->conn_lock);
1394 1410 (*connp->conn_upcalls->su_set_error)
1395 1411 (connp->conn_upper_handle, error);
1396 1412 goto done;
1397 1413 }
1398 1414 } else {
1399 1415 icmp->icmp_delayed_error = error;
1400 1416 *((sin6_t *)&icmp->icmp_delayed_addr) = sin6;
1401 1417 }
1402 1418 mutex_exit(&connp->conn_lock);
1403 1419 } else {
1404 1420 mp1 = mi_tpi_uderror_ind((char *)&sin6, sizeof (sin6_t),
1405 1421 NULL, 0, error);
1406 1422 if (mp1 != NULL)
1407 1423 putnext(connp->conn_rq, mp1);
1408 1424 }
1409 1425 done:
1410 1426 freemsg(mp);
1411 1427 }
1412 1428
1413 1429 /*
1414 1430 * This routine responds to T_ADDR_REQ messages. It is called by icmp_wput.
1415 1431 * The local address is filled in if endpoint is bound. The remote address
1416 1432 * is filled in if remote address has been precified ("connected endpoint")
1417 1433 * (The concept of connected CLTS sockets is alien to published TPI
1418 1434 * but we support it anyway).
1419 1435 */
1420 1436 static void
1421 1437 icmp_addr_req(queue_t *q, mblk_t *mp)
1422 1438 {
1423 1439 struct sockaddr *sa;
1424 1440 mblk_t *ackmp;
1425 1441 struct T_addr_ack *taa;
1426 1442 icmp_t *icmp = Q_TO_ICMP(q);
1427 1443 conn_t *connp = icmp->icmp_connp;
1428 1444 uint_t addrlen;
1429 1445
1430 1446 /* Make it large enough for worst case */
1431 1447 ackmp = reallocb(mp, sizeof (struct T_addr_ack) +
1432 1448 2 * sizeof (sin6_t), 1);
1433 1449 if (ackmp == NULL) {
1434 1450 icmp_err_ack(q, mp, TSYSERR, ENOMEM);
1435 1451 return;
1436 1452 }
1437 1453 taa = (struct T_addr_ack *)ackmp->b_rptr;
1438 1454
1439 1455 bzero(taa, sizeof (struct T_addr_ack));
1440 1456 ackmp->b_wptr = (uchar_t *)&taa[1];
1441 1457
1442 1458 taa->PRIM_type = T_ADDR_ACK;
1443 1459 ackmp->b_datap->db_type = M_PCPROTO;
1444 1460
1445 1461 if (connp->conn_family == AF_INET)
1446 1462 addrlen = sizeof (sin_t);
1447 1463 else
1448 1464 addrlen = sizeof (sin6_t);
1449 1465
1450 1466 mutex_enter(&connp->conn_lock);
1451 1467 /*
1452 1468 * Note: Following code assumes 32 bit alignment of basic
1453 1469 * data structures like sin_t and struct T_addr_ack.
1454 1470 */
1455 1471 if (icmp->icmp_state != TS_UNBND) {
1456 1472 /*
1457 1473 * Fill in local address first
1458 1474 */
1459 1475 taa->LOCADDR_offset = sizeof (*taa);
1460 1476 taa->LOCADDR_length = addrlen;
1461 1477 sa = (struct sockaddr *)&taa[1];
1462 1478 (void) conn_getsockname(connp, sa, &addrlen);
1463 1479 ackmp->b_wptr += addrlen;
1464 1480 }
1465 1481 if (icmp->icmp_state == TS_DATA_XFER) {
1466 1482 /*
1467 1483 * connected, fill remote address too
1468 1484 */
1469 1485 taa->REMADDR_length = addrlen;
1470 1486 /* assumed 32-bit alignment */
1471 1487 taa->REMADDR_offset = taa->LOCADDR_offset + taa->LOCADDR_length;
1472 1488 sa = (struct sockaddr *)(ackmp->b_rptr + taa->REMADDR_offset);
1473 1489 (void) conn_getpeername(connp, sa, &addrlen);
1474 1490 ackmp->b_wptr += addrlen;
1475 1491 }
1476 1492 mutex_exit(&connp->conn_lock);
1477 1493 ASSERT(ackmp->b_wptr <= ackmp->b_datap->db_lim);
1478 1494 qreply(q, ackmp);
1479 1495 }
1480 1496
1481 1497 static void
1482 1498 icmp_copy_info(struct T_info_ack *tap, icmp_t *icmp)
1483 1499 {
1484 1500 conn_t *connp = icmp->icmp_connp;
1485 1501
1486 1502 *tap = icmp_g_t_info_ack;
1487 1503
1488 1504 if (connp->conn_family == AF_INET6)
1489 1505 tap->ADDR_size = sizeof (sin6_t);
1490 1506 else
1491 1507 tap->ADDR_size = sizeof (sin_t);
1492 1508 tap->CURRENT_state = icmp->icmp_state;
1493 1509 tap->OPT_size = icmp_max_optsize;
1494 1510 }
1495 1511
1496 1512 static void
1497 1513 icmp_do_capability_ack(icmp_t *icmp, struct T_capability_ack *tcap,
1498 1514 t_uscalar_t cap_bits1)
1499 1515 {
1500 1516 tcap->CAP_bits1 = 0;
1501 1517
1502 1518 if (cap_bits1 & TC1_INFO) {
1503 1519 icmp_copy_info(&tcap->INFO_ack, icmp);
1504 1520 tcap->CAP_bits1 |= TC1_INFO;
1505 1521 }
1506 1522 }
1507 1523
1508 1524 /*
1509 1525 * This routine responds to T_CAPABILITY_REQ messages. It is called by
1510 1526 * icmp_wput. Much of the T_CAPABILITY_ACK information is copied from
1511 1527 * icmp_g_t_info_ack. The current state of the stream is copied from
1512 1528 * icmp_state.
1513 1529 */
1514 1530 static void
1515 1531 icmp_capability_req(queue_t *q, mblk_t *mp)
1516 1532 {
1517 1533 icmp_t *icmp = Q_TO_ICMP(q);
1518 1534 t_uscalar_t cap_bits1;
1519 1535 struct T_capability_ack *tcap;
1520 1536
1521 1537 cap_bits1 = ((struct T_capability_req *)mp->b_rptr)->CAP_bits1;
1522 1538
1523 1539 mp = tpi_ack_alloc(mp, sizeof (struct T_capability_ack),
1524 1540 mp->b_datap->db_type, T_CAPABILITY_ACK);
1525 1541 if (!mp)
1526 1542 return;
1527 1543
1528 1544 tcap = (struct T_capability_ack *)mp->b_rptr;
1529 1545
1530 1546 icmp_do_capability_ack(icmp, tcap, cap_bits1);
1531 1547
1532 1548 qreply(q, mp);
1533 1549 }
1534 1550
1535 1551 /*
1536 1552 * This routine responds to T_INFO_REQ messages. It is called by icmp_wput.
1537 1553 * Most of the T_INFO_ACK information is copied from icmp_g_t_info_ack.
1538 1554 * The current state of the stream is copied from icmp_state.
1539 1555 */
1540 1556 static void
1541 1557 icmp_info_req(queue_t *q, mblk_t *mp)
1542 1558 {
1543 1559 icmp_t *icmp = Q_TO_ICMP(q);
1544 1560
1545 1561 /* Create a T_INFO_ACK message. */
1546 1562 mp = tpi_ack_alloc(mp, sizeof (struct T_info_ack), M_PCPROTO,
1547 1563 T_INFO_ACK);
1548 1564 if (!mp)
1549 1565 return;
1550 1566 icmp_copy_info((struct T_info_ack *)mp->b_rptr, icmp);
1551 1567 qreply(q, mp);
1552 1568 }
1553 1569
1554 1570 static int
1555 1571 icmp_tpi_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp,
1556 1572 int family)
1557 1573 {
1558 1574 conn_t *connp;
1559 1575 dev_t conn_dev;
1560 1576 int error;
1561 1577
1562 1578 /* If the stream is already open, return immediately. */
1563 1579 if (q->q_ptr != NULL)
1564 1580 return (0);
1565 1581
1566 1582 if (sflag == MODOPEN)
1567 1583 return (EINVAL);
1568 1584
1569 1585 /*
1570 1586 * Since ICMP is not used so heavily, allocating from the small
1571 1587 * arena should be sufficient.
1572 1588 */
1573 1589 if ((conn_dev = inet_minor_alloc(ip_minor_arena_sa)) == 0) {
1574 1590 return (EBUSY);
1575 1591 }
1576 1592
1577 1593 if (flag & SO_FALLBACK) {
1578 1594 /*
1579 1595 * Non streams socket needs a stream to fallback to
1580 1596 */
1581 1597 RD(q)->q_ptr = (void *)conn_dev;
1582 1598 WR(q)->q_qinfo = &icmp_fallback_sock_winit;
1583 1599 WR(q)->q_ptr = (void *)ip_minor_arena_sa;
1584 1600 qprocson(q);
1585 1601 return (0);
1586 1602 }
1587 1603
1588 1604 connp = rawip_do_open(family, credp, &error, KM_SLEEP);
1589 1605 if (connp == NULL) {
1590 1606 ASSERT(error != 0);
1591 1607 inet_minor_free(ip_minor_arena_sa, conn_dev);
1592 1608 return (error);
1593 1609 }
1594 1610
1595 1611 *devp = makedevice(getemajor(*devp), (minor_t)conn_dev);
1596 1612 connp->conn_dev = conn_dev;
1597 1613 connp->conn_minor_arena = ip_minor_arena_sa;
1598 1614
1599 1615 /*
1600 1616 * Initialize the icmp_t structure for this stream.
1601 1617 */
1602 1618 q->q_ptr = connp;
1603 1619 WR(q)->q_ptr = connp;
1604 1620 connp->conn_rq = q;
1605 1621 connp->conn_wq = WR(q);
1606 1622
1607 1623 WR(q)->q_hiwat = connp->conn_sndbuf;
1608 1624 WR(q)->q_lowat = connp->conn_sndlowat;
1609 1625
1610 1626 qprocson(q);
1611 1627
1612 1628 /* Set the Stream head write offset. */
1613 1629 (void) proto_set_tx_wroff(q, connp, connp->conn_wroff);
1614 1630 (void) proto_set_rx_hiwat(connp->conn_rq, connp, connp->conn_rcvbuf);
1615 1631
1616 1632 mutex_enter(&connp->conn_lock);
1617 1633 connp->conn_state_flags &= ~CONN_INCIPIENT;
1618 1634 mutex_exit(&connp->conn_lock);
1619 1635
1620 1636 icmp_bind_proto(connp->conn_icmp);
1621 1637
1622 1638 return (0);
1623 1639 }
1624 1640
1625 1641 /* For /dev/icmp aka AF_INET open */
1626 1642 static int
1627 1643 icmp_openv4(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp)
1628 1644 {
1629 1645 return (icmp_tpi_open(q, devp, flag, sflag, credp, AF_INET));
1630 1646 }
1631 1647
1632 1648 /* For /dev/icmp6 aka AF_INET6 open */
1633 1649 static int
1634 1650 icmp_openv6(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp)
1635 1651 {
1636 1652 return (icmp_tpi_open(q, devp, flag, sflag, credp, AF_INET6));
1637 1653 }
1638 1654
1639 1655 /*
1640 1656 * This is the open routine for icmp. It allocates a icmp_t structure for
1641 1657 * the stream and, on the first open of the module, creates an ND table.
1642 1658 */
1643 1659 static conn_t *
1644 1660 rawip_do_open(int family, cred_t *credp, int *err, int flags)
1645 1661 {
1646 1662 icmp_t *icmp;
1647 1663 conn_t *connp;
1648 1664 zoneid_t zoneid;
1649 1665 netstack_t *ns;
1650 1666 icmp_stack_t *is;
1651 1667 int len;
1652 1668 boolean_t isv6 = B_FALSE;
1653 1669
1654 1670 *err = secpolicy_net_icmpaccess(credp);
1655 1671 if (*err != 0)
1656 1672 return (NULL);
1657 1673
1658 1674 if (family == AF_INET6)
1659 1675 isv6 = B_TRUE;
1660 1676
1661 1677 ns = netstack_find_by_cred(credp);
1662 1678 ASSERT(ns != NULL);
1663 1679 is = ns->netstack_icmp;
1664 1680 ASSERT(is != NULL);
1665 1681
1666 1682 /*
1667 1683 * For exclusive stacks we set the zoneid to zero
1668 1684 * to make ICMP operate as if in the global zone.
1669 1685 */
1670 1686 if (ns->netstack_stackid != GLOBAL_NETSTACKID)
1671 1687 zoneid = GLOBAL_ZONEID;
1672 1688 else
1673 1689 zoneid = crgetzoneid(credp);
1674 1690
1675 1691 ASSERT(flags == KM_SLEEP || flags == KM_NOSLEEP);
1676 1692
1677 1693 connp = ipcl_conn_create(IPCL_RAWIPCONN, flags, ns);
1678 1694 icmp = connp->conn_icmp;
1679 1695
1680 1696 /*
1681 1697 * ipcl_conn_create did a netstack_hold. Undo the hold that was
1682 1698 * done by netstack_find_by_cred()
1683 1699 */
1684 1700 netstack_rele(ns);
1685 1701
1686 1702 /*
1687 1703 * Since this conn_t/icmp_t is not yet visible to anybody else we don't
1688 1704 * need to lock anything.
1689 1705 */
1690 1706 ASSERT(connp->conn_proto == IPPROTO_ICMP);
1691 1707 ASSERT(connp->conn_icmp == icmp);
1692 1708 ASSERT(icmp->icmp_connp == connp);
1693 1709
1694 1710 /* Set the initial state of the stream and the privilege status. */
1695 1711 icmp->icmp_state = TS_UNBND;
1696 1712 connp->conn_ixa->ixa_flags |= IXAF_VERIFY_SOURCE;
1697 1713 if (isv6) {
1698 1714 connp->conn_family = AF_INET6;
1699 1715 connp->conn_ipversion = IPV6_VERSION;
1700 1716 connp->conn_ixa->ixa_flags &= ~IXAF_IS_IPV4;
1701 1717 connp->conn_proto = IPPROTO_ICMPV6;
1702 1718 /* May be changed by a SO_PROTOTYPE socket option. */
1703 1719 connp->conn_proto = IPPROTO_ICMPV6;
1704 1720 connp->conn_ixa->ixa_protocol = connp->conn_proto;
1705 1721 connp->conn_ixa->ixa_raw_cksum_offset = 2;
1706 1722 connp->conn_default_ttl = is->is_ipv6_hoplimit;
1707 1723 len = sizeof (ip6_t);
1708 1724 } else {
1709 1725 connp->conn_family = AF_INET;
1710 1726 connp->conn_ipversion = IPV4_VERSION;
1711 1727 connp->conn_ixa->ixa_flags |= IXAF_IS_IPV4;
1712 1728 /* May be changed by a SO_PROTOTYPE socket option. */
1713 1729 connp->conn_proto = IPPROTO_ICMP;
1714 1730 connp->conn_ixa->ixa_protocol = connp->conn_proto;
1715 1731 connp->conn_default_ttl = is->is_ipv4_ttl;
1716 1732 len = sizeof (ipha_t);
1717 1733 }
1718 1734 connp->conn_xmit_ipp.ipp_unicast_hops = connp->conn_default_ttl;
1719 1735
1720 1736 connp->conn_ixa->ixa_multicast_ttl = IP_DEFAULT_MULTICAST_TTL;
1721 1737
1722 1738 /*
1723 1739 * For the socket of protocol IPPROTO_RAW or when IP_HDRINCL is set,
1724 1740 * the checksum is provided in the pre-built packet. We clear
1725 1741 * IXAF_SET_ULP_CKSUM to tell IP that the application has sent a
1726 1742 * complete IP header and not to compute the transport checksum.
1727 1743 */
1728 1744 connp->conn_ixa->ixa_flags |= IXAF_MULTICAST_LOOP | IXAF_SET_ULP_CKSUM;
1729 1745 /* conn_allzones can not be set this early, hence no IPCL_ZONEID */
1730 1746 connp->conn_ixa->ixa_zoneid = zoneid;
1731 1747
1732 1748 connp->conn_zoneid = zoneid;
1733 1749
1734 1750 /*
1735 1751 * If the caller has the process-wide flag set, then default to MAC
1736 1752 * exempt mode. This allows read-down to unlabeled hosts.
1737 1753 */
1738 1754 if (getpflags(NET_MAC_AWARE, credp) != 0)
1739 1755 connp->conn_mac_mode = CONN_MAC_AWARE;
1740 1756
1741 1757 connp->conn_zone_is_global = (crgetzoneid(credp) == GLOBAL_ZONEID);
1742 1758
1743 1759 icmp->icmp_is = is;
1744 1760
1745 1761 connp->conn_rcvbuf = is->is_recv_hiwat;
1746 1762 connp->conn_sndbuf = is->is_xmit_hiwat;
1747 1763 connp->conn_sndlowat = is->is_xmit_lowat;
1748 1764 connp->conn_rcvlowat = icmp_mod_info.mi_lowat;
1749 1765
1750 1766 connp->conn_wroff = len + is->is_wroff_extra;
1751 1767 connp->conn_so_type = SOCK_RAW;
1752 1768
1753 1769 connp->conn_recv = icmp_input;
1754 1770 connp->conn_recvicmp = icmp_icmp_input;
1755 1771 crhold(credp);
1756 1772 connp->conn_cred = credp;
1757 1773 connp->conn_cpid = curproc->p_pid;
1758 1774 connp->conn_open_time = ddi_get_lbolt64();
1759 1775 /* Cache things in ixa without an extra refhold */
1760 1776 ASSERT(!(connp->conn_ixa->ixa_free_flags & IXA_FREE_CRED));
1761 1777 connp->conn_ixa->ixa_cred = connp->conn_cred;
1762 1778 connp->conn_ixa->ixa_cpid = connp->conn_cpid;
1763 1779 if (is_system_labeled())
1764 1780 connp->conn_ixa->ixa_tsl = crgetlabel(connp->conn_cred);
1765 1781
1766 1782 connp->conn_flow_cntrld = B_FALSE;
1767 1783
1768 1784 if (is->is_pmtu_discovery)
1769 1785 connp->conn_ixa->ixa_flags |= IXAF_PMTU_DISCOVERY;
1770 1786
1771 1787 return (connp);
1772 1788 }
1773 1789
1774 1790 /*
1775 1791 * Which ICMP options OK to set through T_UNITDATA_REQ...
1776 1792 */
1777 1793 /* ARGSUSED */
1778 1794 static boolean_t
1779 1795 icmp_opt_allow_udr_set(t_scalar_t level, t_scalar_t name)
1780 1796 {
1781 1797 return (B_TRUE);
1782 1798 }
1783 1799
1784 1800 /*
1785 1801 * This routine gets default values of certain options whose default
1786 1802 * values are maintained by protcol specific code
1787 1803 */
1788 1804 int
1789 1805 icmp_opt_default(queue_t *q, t_scalar_t level, t_scalar_t name, uchar_t *ptr)
1790 1806 {
1791 1807 icmp_t *icmp = Q_TO_ICMP(q);
1792 1808 icmp_stack_t *is = icmp->icmp_is;
1793 1809 int *i1 = (int *)ptr;
1794 1810
1795 1811 switch (level) {
1796 1812 case IPPROTO_IP:
1797 1813 switch (name) {
1798 1814 case IP_MULTICAST_TTL:
1799 1815 *ptr = (uchar_t)IP_DEFAULT_MULTICAST_TTL;
1800 1816 return (sizeof (uchar_t));
1801 1817 case IP_MULTICAST_LOOP:
1802 1818 *ptr = (uchar_t)IP_DEFAULT_MULTICAST_LOOP;
1803 1819 return (sizeof (uchar_t));
1804 1820 }
1805 1821 break;
1806 1822 case IPPROTO_IPV6:
1807 1823 switch (name) {
1808 1824 case IPV6_MULTICAST_HOPS:
1809 1825 *i1 = IP_DEFAULT_MULTICAST_TTL;
1810 1826 return (sizeof (int));
1811 1827 case IPV6_MULTICAST_LOOP:
1812 1828 *i1 = IP_DEFAULT_MULTICAST_LOOP;
1813 1829 return (sizeof (int));
1814 1830 case IPV6_UNICAST_HOPS:
1815 1831 *i1 = is->is_ipv6_hoplimit;
1816 1832 return (sizeof (int));
1817 1833 }
1818 1834 break;
1819 1835 case IPPROTO_ICMPV6:
1820 1836 switch (name) {
1821 1837 case ICMP6_FILTER:
1822 1838 /* Make it look like "pass all" */
1823 1839 ICMP6_FILTER_SETPASSALL((icmp6_filter_t *)ptr);
1824 1840 return (sizeof (icmp6_filter_t));
1825 1841 }
1826 1842 break;
1827 1843 }
1828 1844 return (-1);
1829 1845 }
1830 1846
1831 1847 /*
1832 1848 * This routine retrieves the current status of socket options.
1833 1849 * It returns the size of the option retrieved, or -1.
1834 1850 */
1835 1851 int
1836 1852 icmp_opt_get(conn_t *connp, int level, int name, uchar_t *ptr)
1837 1853 {
1838 1854 icmp_t *icmp = connp->conn_icmp;
1839 1855 int *i1 = (int *)ptr;
1840 1856 conn_opt_arg_t coas;
1841 1857 int retval;
1842 1858
1843 1859 coas.coa_connp = connp;
1844 1860 coas.coa_ixa = connp->conn_ixa;
1845 1861 coas.coa_ipp = &connp->conn_xmit_ipp;
1846 1862 coas.coa_ancillary = B_FALSE;
1847 1863 coas.coa_changed = 0;
1848 1864
1849 1865 /*
1850 1866 * We assume that the optcom framework has checked for the set
1851 1867 * of levels and names that are supported, hence we don't worry
1852 1868 * about rejecting based on that.
1853 1869 * First check for ICMP specific handling, then pass to common routine.
1854 1870 */
1855 1871 switch (level) {
1856 1872 case IPPROTO_IP:
1857 1873 /*
1858 1874 * Only allow IPv4 option processing on IPv4 sockets.
1859 1875 */
1860 1876 if (connp->conn_family != AF_INET)
1861 1877 return (-1);
1862 1878
1863 1879 switch (name) {
1864 1880 case IP_OPTIONS:
1865 1881 case T_IP_OPTIONS:
1866 1882 /* Options are passed up with each packet */
1867 1883 return (0);
1868 1884 case IP_HDRINCL:
1869 1885 mutex_enter(&connp->conn_lock);
1870 1886 *i1 = (int)icmp->icmp_hdrincl;
1871 1887 mutex_exit(&connp->conn_lock);
1872 1888 return (sizeof (int));
1873 1889 }
1874 1890 break;
1875 1891
1876 1892 case IPPROTO_IPV6:
1877 1893 /*
1878 1894 * Only allow IPv6 option processing on native IPv6 sockets.
1879 1895 */
1880 1896 if (connp->conn_family != AF_INET6)
1881 1897 return (-1);
1882 1898
1883 1899 switch (name) {
1884 1900 case IPV6_CHECKSUM:
1885 1901 /*
1886 1902 * Return offset or -1 if no checksum offset.
1887 1903 * Does not apply to IPPROTO_ICMPV6
1888 1904 */
1889 1905 if (connp->conn_proto == IPPROTO_ICMPV6)
1890 1906 return (-1);
1891 1907
1892 1908 mutex_enter(&connp->conn_lock);
1893 1909 if (connp->conn_ixa->ixa_flags & IXAF_SET_RAW_CKSUM)
1894 1910 *i1 = connp->conn_ixa->ixa_raw_cksum_offset;
1895 1911 else
1896 1912 *i1 = -1;
1897 1913 mutex_exit(&connp->conn_lock);
1898 1914 return (sizeof (int));
1899 1915 }
1900 1916 break;
1901 1917
1902 1918 case IPPROTO_ICMPV6:
1903 1919 /*
1904 1920 * Only allow IPv6 option processing on native IPv6 sockets.
1905 1921 */
1906 1922 if (connp->conn_family != AF_INET6)
1907 1923 return (-1);
1908 1924
1909 1925 if (connp->conn_proto != IPPROTO_ICMPV6)
1910 1926 return (-1);
1911 1927
1912 1928 switch (name) {
1913 1929 case ICMP6_FILTER:
1914 1930 mutex_enter(&connp->conn_lock);
1915 1931 if (icmp->icmp_filter == NULL) {
1916 1932 /* Make it look like "pass all" */
1917 1933 ICMP6_FILTER_SETPASSALL((icmp6_filter_t *)ptr);
1918 1934 } else {
1919 1935 (void) bcopy(icmp->icmp_filter, ptr,
1920 1936 sizeof (icmp6_filter_t));
1921 1937 }
1922 1938 mutex_exit(&connp->conn_lock);
1923 1939 return (sizeof (icmp6_filter_t));
1924 1940 }
1925 1941 }
1926 1942 mutex_enter(&connp->conn_lock);
1927 1943 retval = conn_opt_get(&coas, level, name, ptr);
1928 1944 mutex_exit(&connp->conn_lock);
1929 1945 return (retval);
1930 1946 }
1931 1947
1932 1948 /*
1933 1949 * This routine retrieves the current status of socket options.
1934 1950 * It returns the size of the option retrieved, or -1.
1935 1951 */
1936 1952 int
1937 1953 icmp_tpi_opt_get(queue_t *q, int level, int name, uchar_t *ptr)
1938 1954 {
1939 1955 conn_t *connp = Q_TO_CONN(q);
1940 1956 int err;
1941 1957
1942 1958 err = icmp_opt_get(connp, level, name, ptr);
1943 1959 return (err);
1944 1960 }
1945 1961
1946 1962 /*
1947 1963 * This routine sets socket options.
1948 1964 */
1949 1965 int
1950 1966 icmp_do_opt_set(conn_opt_arg_t *coa, int level, int name,
1951 1967 uint_t inlen, uchar_t *invalp, cred_t *cr, boolean_t checkonly)
1952 1968 {
1953 1969 conn_t *connp = coa->coa_connp;
1954 1970 ip_xmit_attr_t *ixa = coa->coa_ixa;
1955 1971 icmp_t *icmp = connp->conn_icmp;
1956 1972 icmp_stack_t *is = icmp->icmp_is;
1957 1973 int *i1 = (int *)invalp;
1958 1974 boolean_t onoff = (*i1 == 0) ? 0 : 1;
1959 1975 int error;
1960 1976
1961 1977 ASSERT(MUTEX_NOT_HELD(&coa->coa_connp->conn_lock));
1962 1978
1963 1979 /*
1964 1980 * For fixed length options, no sanity check
1965 1981 * of passed in length is done. It is assumed *_optcom_req()
1966 1982 * routines do the right thing.
1967 1983 */
1968 1984
1969 1985 switch (level) {
1970 1986 case SOL_SOCKET:
1971 1987 switch (name) {
1972 1988 case SO_PROTOTYPE:
1973 1989 if ((*i1 & 0xFF) != IPPROTO_ICMP &&
1974 1990 (*i1 & 0xFF) != IPPROTO_ICMPV6 &&
1975 1991 secpolicy_net_rawaccess(cr) != 0) {
1976 1992 return (EACCES);
1977 1993 }
1978 1994 if (checkonly)
1979 1995 break;
1980 1996
1981 1997 mutex_enter(&connp->conn_lock);
1982 1998 connp->conn_proto = *i1 & 0xFF;
1983 1999 ixa->ixa_protocol = connp->conn_proto;
1984 2000 if ((connp->conn_proto == IPPROTO_RAW ||
1985 2001 connp->conn_proto == IPPROTO_IGMP) &&
1986 2002 connp->conn_family == AF_INET) {
1987 2003 icmp->icmp_hdrincl = 1;
1988 2004 ixa->ixa_flags &= ~IXAF_SET_ULP_CKSUM;
1989 2005 } else if (connp->conn_proto == IPPROTO_UDP ||
1990 2006 connp->conn_proto == IPPROTO_TCP ||
1991 2007 connp->conn_proto == IPPROTO_SCTP) {
1992 2008 /* Used by test applications like psh */
1993 2009 icmp->icmp_hdrincl = 0;
1994 2010 ixa->ixa_flags &= ~IXAF_SET_ULP_CKSUM;
1995 2011 } else {
1996 2012 icmp->icmp_hdrincl = 0;
1997 2013 ixa->ixa_flags |= IXAF_SET_ULP_CKSUM;
1998 2014 }
1999 2015
2000 2016 if (connp->conn_family == AF_INET6 &&
2001 2017 connp->conn_proto == IPPROTO_ICMPV6) {
2002 2018 /* Set offset for icmp6_cksum */
2003 2019 ixa->ixa_flags &= ~IXAF_SET_RAW_CKSUM;
2004 2020 ixa->ixa_raw_cksum_offset = 2;
2005 2021 }
2006 2022 if (icmp->icmp_filter != NULL &&
2007 2023 connp->conn_proto != IPPROTO_ICMPV6) {
2008 2024 kmem_free(icmp->icmp_filter,
2009 2025 sizeof (icmp6_filter_t));
2010 2026 icmp->icmp_filter = NULL;
2011 2027 }
2012 2028 mutex_exit(&connp->conn_lock);
2013 2029
2014 2030 coa->coa_changed |= COA_HEADER_CHANGED;
2015 2031 /*
2016 2032 * For SCTP, we don't use icmp_bind_proto() for
2017 2033 * raw socket binding.
2018 2034 */
2019 2035 if (connp->conn_proto == IPPROTO_SCTP)
2020 2036 return (0);
2021 2037
2022 2038 coa->coa_changed |= COA_ICMP_BIND_NEEDED;
2023 2039 return (0);
2024 2040
2025 2041 case SO_SNDBUF:
2026 2042 if (*i1 > is->is_max_buf) {
2027 2043 return (ENOBUFS);
2028 2044 }
2029 2045 break;
2030 2046 case SO_RCVBUF:
2031 2047 if (*i1 > is->is_max_buf) {
2032 2048 return (ENOBUFS);
2033 2049 }
2034 2050 break;
2035 2051 }
2036 2052 break;
2037 2053
2038 2054 case IPPROTO_IP:
2039 2055 /*
2040 2056 * Only allow IPv4 option processing on IPv4 sockets.
2041 2057 */
2042 2058 if (connp->conn_family != AF_INET)
2043 2059 return (EINVAL);
2044 2060
2045 2061 switch (name) {
2046 2062 case IP_HDRINCL:
2047 2063 if (!checkonly) {
2048 2064 mutex_enter(&connp->conn_lock);
2049 2065 icmp->icmp_hdrincl = onoff;
2050 2066 if (onoff)
2051 2067 ixa->ixa_flags &= ~IXAF_SET_ULP_CKSUM;
2052 2068 else
2053 2069 ixa->ixa_flags |= IXAF_SET_ULP_CKSUM;
2054 2070 mutex_exit(&connp->conn_lock);
2055 2071 }
2056 2072 break;
2057 2073 }
2058 2074 break;
2059 2075
2060 2076 case IPPROTO_IPV6:
2061 2077 if (connp->conn_family != AF_INET6)
2062 2078 return (EINVAL);
2063 2079
2064 2080 switch (name) {
2065 2081 case IPV6_CHECKSUM:
2066 2082 /*
2067 2083 * Integer offset into the user data of where the
2068 2084 * checksum is located.
2069 2085 * Offset of -1 disables option.
2070 2086 * Does not apply to IPPROTO_ICMPV6.
2071 2087 */
2072 2088 if (connp->conn_proto == IPPROTO_ICMPV6 ||
2073 2089 coa->coa_ancillary) {
2074 2090 return (EINVAL);
2075 2091 }
2076 2092 if ((*i1 != -1) && ((*i1 < 0) || (*i1 & 0x1) != 0)) {
2077 2093 /* Negative or not 16 bit aligned offset */
2078 2094 return (EINVAL);
2079 2095 }
2080 2096 if (checkonly)
2081 2097 break;
2082 2098
2083 2099 mutex_enter(&connp->conn_lock);
2084 2100 if (*i1 == -1) {
2085 2101 ixa->ixa_flags &= ~IXAF_SET_RAW_CKSUM;
2086 2102 ixa->ixa_raw_cksum_offset = 0;
2087 2103 ixa->ixa_flags &= ~IXAF_SET_ULP_CKSUM;
2088 2104 } else {
2089 2105 ixa->ixa_flags |= IXAF_SET_RAW_CKSUM;
2090 2106 ixa->ixa_raw_cksum_offset = *i1;
2091 2107 ixa->ixa_flags |= IXAF_SET_ULP_CKSUM;
2092 2108 }
2093 2109 mutex_exit(&connp->conn_lock);
2094 2110 break;
2095 2111 }
2096 2112 break;
2097 2113
2098 2114 case IPPROTO_ICMPV6:
2099 2115 /*
2100 2116 * Only allow IPv6 option processing on IPv6 sockets.
2101 2117 */
2102 2118 if (connp->conn_family != AF_INET6)
2103 2119 return (EINVAL);
2104 2120 if (connp->conn_proto != IPPROTO_ICMPV6)
2105 2121 return (EINVAL);
2106 2122
2107 2123 switch (name) {
2108 2124 case ICMP6_FILTER:
2109 2125 if (checkonly)
2110 2126 break;
2111 2127
2112 2128 if ((inlen != 0) &&
2113 2129 (inlen != sizeof (icmp6_filter_t)))
2114 2130 return (EINVAL);
2115 2131
2116 2132 mutex_enter(&connp->conn_lock);
2117 2133 if (inlen == 0) {
2118 2134 if (icmp->icmp_filter != NULL) {
2119 2135 kmem_free(icmp->icmp_filter,
2120 2136 sizeof (icmp6_filter_t));
2121 2137 icmp->icmp_filter = NULL;
2122 2138 }
2123 2139 } else {
2124 2140 if (icmp->icmp_filter == NULL) {
2125 2141 icmp->icmp_filter = kmem_alloc(
2126 2142 sizeof (icmp6_filter_t),
2127 2143 KM_NOSLEEP);
2128 2144 if (icmp->icmp_filter == NULL) {
2129 2145 mutex_exit(&connp->conn_lock);
2130 2146 return (ENOBUFS);
2131 2147 }
2132 2148 }
2133 2149 (void) bcopy(invalp, icmp->icmp_filter, inlen);
2134 2150 }
2135 2151 mutex_exit(&connp->conn_lock);
2136 2152 break;
2137 2153 }
2138 2154 break;
2139 2155 }
2140 2156 error = conn_opt_set(coa, level, name, inlen, invalp,
2141 2157 checkonly, cr);
2142 2158 return (error);
2143 2159 }
2144 2160
2145 2161 /*
2146 2162 * This routine sets socket options.
2147 2163 */
2148 2164 int
2149 2165 icmp_opt_set(conn_t *connp, uint_t optset_context, int level, int name,
2150 2166 uint_t inlen, uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp,
2151 2167 void *thisdg_attrs, cred_t *cr)
2152 2168 {
2153 2169 icmp_t *icmp = connp->conn_icmp;
2154 2170 int err;
2155 2171 conn_opt_arg_t coas, *coa;
2156 2172 boolean_t checkonly;
2157 2173 icmp_stack_t *is = icmp->icmp_is;
2158 2174
2159 2175 switch (optset_context) {
2160 2176 case SETFN_OPTCOM_CHECKONLY:
2161 2177 checkonly = B_TRUE;
2162 2178 /*
2163 2179 * Note: Implies T_CHECK semantics for T_OPTCOM_REQ
2164 2180 * inlen != 0 implies value supplied and
2165 2181 * we have to "pretend" to set it.
2166 2182 * inlen == 0 implies that there is no
2167 2183 * value part in T_CHECK request and just validation
2168 2184 * done elsewhere should be enough, we just return here.
2169 2185 */
2170 2186 if (inlen == 0) {
2171 2187 *outlenp = 0;
2172 2188 return (0);
2173 2189 }
2174 2190 break;
2175 2191 case SETFN_OPTCOM_NEGOTIATE:
2176 2192 checkonly = B_FALSE;
2177 2193 break;
2178 2194 case SETFN_UD_NEGOTIATE:
2179 2195 case SETFN_CONN_NEGOTIATE:
2180 2196 checkonly = B_FALSE;
2181 2197 /*
2182 2198 * Negotiating local and "association-related" options
2183 2199 * through T_UNITDATA_REQ.
2184 2200 *
2185 2201 * Following routine can filter out ones we do not
2186 2202 * want to be "set" this way.
2187 2203 */
2188 2204 if (!icmp_opt_allow_udr_set(level, name)) {
2189 2205 *outlenp = 0;
2190 2206 return (EINVAL);
2191 2207 }
2192 2208 break;
2193 2209 default:
2194 2210 /*
2195 2211 * We should never get here
2196 2212 */
2197 2213 *outlenp = 0;
2198 2214 return (EINVAL);
2199 2215 }
2200 2216
2201 2217 ASSERT((optset_context != SETFN_OPTCOM_CHECKONLY) ||
2202 2218 (optset_context == SETFN_OPTCOM_CHECKONLY && inlen != 0));
2203 2219
2204 2220 if (thisdg_attrs != NULL) {
2205 2221 /* Options from T_UNITDATA_REQ */
2206 2222 coa = (conn_opt_arg_t *)thisdg_attrs;
2207 2223 ASSERT(coa->coa_connp == connp);
2208 2224 ASSERT(coa->coa_ixa != NULL);
2209 2225 ASSERT(coa->coa_ipp != NULL);
2210 2226 ASSERT(coa->coa_ancillary);
2211 2227 } else {
2212 2228 coa = &coas;
2213 2229 coas.coa_connp = connp;
2214 2230 /* Get a reference on conn_ixa to prevent concurrent mods */
2215 2231 coas.coa_ixa = conn_get_ixa(connp, B_TRUE);
2216 2232 if (coas.coa_ixa == NULL) {
2217 2233 *outlenp = 0;
2218 2234 return (ENOMEM);
2219 2235 }
2220 2236 coas.coa_ipp = &connp->conn_xmit_ipp;
2221 2237 coas.coa_ancillary = B_FALSE;
2222 2238 coas.coa_changed = 0;
2223 2239 }
2224 2240
2225 2241 err = icmp_do_opt_set(coa, level, name, inlen, invalp,
2226 2242 cr, checkonly);
2227 2243 if (err != 0) {
2228 2244 errout:
2229 2245 if (!coa->coa_ancillary)
2230 2246 ixa_refrele(coa->coa_ixa);
2231 2247 *outlenp = 0;
2232 2248 return (err);
2233 2249 }
2234 2250
2235 2251 /*
2236 2252 * Common case of OK return with outval same as inval.
2237 2253 */
2238 2254 if (invalp != outvalp) {
2239 2255 /* don't trust bcopy for identical src/dst */
2240 2256 (void) bcopy(invalp, outvalp, inlen);
2241 2257 }
2242 2258 *outlenp = inlen;
2243 2259
2244 2260 /*
2245 2261 * If this was not ancillary data, then we rebuild the headers,
2246 2262 * update the IRE/NCE, and IPsec as needed.
2247 2263 * Since the label depends on the destination we go through
2248 2264 * ip_set_destination first.
2249 2265 */
2250 2266 if (coa->coa_ancillary) {
2251 2267 return (0);
2252 2268 }
2253 2269
2254 2270 if (coa->coa_changed & COA_ROUTE_CHANGED) {
2255 2271 in6_addr_t saddr, faddr, nexthop;
2256 2272 in_port_t fport;
2257 2273
2258 2274 /*
2259 2275 * We clear lastdst to make sure we pick up the change
2260 2276 * next time sending.
2261 2277 * If we are connected we re-cache the information.
2262 2278 * We ignore errors to preserve BSD behavior.
2263 2279 * Note that we don't redo IPsec policy lookup here
2264 2280 * since the final destination (or source) didn't change.
2265 2281 */
2266 2282 mutex_enter(&connp->conn_lock);
2267 2283 connp->conn_v6lastdst = ipv6_all_zeros;
2268 2284
2269 2285 ip_attr_nexthop(coa->coa_ipp, coa->coa_ixa,
2270 2286 &connp->conn_faddr_v6, &nexthop);
2271 2287 saddr = connp->conn_saddr_v6;
2272 2288 faddr = connp->conn_faddr_v6;
2273 2289 fport = connp->conn_fport;
2274 2290 mutex_exit(&connp->conn_lock);
2275 2291
2276 2292 if (!IN6_IS_ADDR_UNSPECIFIED(&faddr) &&
2277 2293 !IN6_IS_ADDR_V4MAPPED_ANY(&faddr)) {
2278 2294 (void) ip_attr_connect(connp, coa->coa_ixa,
2279 2295 &saddr, &faddr, &nexthop, fport, NULL, NULL,
2280 2296 IPDF_ALLOW_MCBC | IPDF_VERIFY_DST);
2281 2297 }
2282 2298 }
2283 2299
2284 2300 ixa_refrele(coa->coa_ixa);
2285 2301
2286 2302 if (coa->coa_changed & COA_HEADER_CHANGED) {
2287 2303 /*
2288 2304 * Rebuild the header template if we are connected.
2289 2305 * Otherwise clear conn_v6lastdst so we rebuild the header
2290 2306 * in the data path.
2291 2307 */
2292 2308 mutex_enter(&connp->conn_lock);
2293 2309 if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_faddr_v6) &&
2294 2310 !IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_faddr_v6)) {
2295 2311 err = icmp_build_hdr_template(connp,
2296 2312 &connp->conn_saddr_v6, &connp->conn_faddr_v6,
2297 2313 connp->conn_flowinfo);
2298 2314 if (err != 0) {
2299 2315 mutex_exit(&connp->conn_lock);
2300 2316 return (err);
2301 2317 }
2302 2318 } else {
2303 2319 connp->conn_v6lastdst = ipv6_all_zeros;
2304 2320 }
2305 2321 mutex_exit(&connp->conn_lock);
2306 2322 }
2307 2323 if (coa->coa_changed & COA_RCVBUF_CHANGED) {
2308 2324 (void) proto_set_rx_hiwat(connp->conn_rq, connp,
2309 2325 connp->conn_rcvbuf);
2310 2326 }
2311 2327 if ((coa->coa_changed & COA_SNDBUF_CHANGED) && !IPCL_IS_NONSTR(connp)) {
2312 2328 connp->conn_wq->q_hiwat = connp->conn_sndbuf;
2313 2329 }
2314 2330 if (coa->coa_changed & COA_WROFF_CHANGED) {
2315 2331 /* Increase wroff if needed */
2316 2332 uint_t wroff;
2317 2333
2318 2334 mutex_enter(&connp->conn_lock);
2319 2335 wroff = connp->conn_ht_iphc_allocated + is->is_wroff_extra;
2320 2336 if (wroff > connp->conn_wroff) {
2321 2337 connp->conn_wroff = wroff;
2322 2338 mutex_exit(&connp->conn_lock);
2323 2339 (void) proto_set_tx_wroff(connp->conn_rq, connp, wroff);
2324 2340 } else {
2325 2341 mutex_exit(&connp->conn_lock);
2326 2342 }
2327 2343 }
2328 2344 if (coa->coa_changed & COA_ICMP_BIND_NEEDED) {
2329 2345 icmp_bind_proto(icmp);
2330 2346 }
2331 2347 return (err);
2332 2348 }
2333 2349
2334 2350 /* This routine sets socket options. */
2335 2351 int
2336 2352 icmp_tpi_opt_set(queue_t *q, uint_t optset_context, int level, int name,
2337 2353 uint_t inlen, uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp,
2338 2354 void *thisdg_attrs, cred_t *cr)
2339 2355 {
2340 2356 conn_t *connp = Q_TO_CONN(q);
2341 2357 int error;
2342 2358
2343 2359 error = icmp_opt_set(connp, optset_context, level, name, inlen, invalp,
2344 2360 outlenp, outvalp, thisdg_attrs, cr);
2345 2361 return (error);
2346 2362 }
2347 2363
2348 2364 /*
2349 2365 * Setup IP headers.
2350 2366 *
2351 2367 * Note that IP_HDRINCL has ipha_protocol that is different than conn_proto,
2352 2368 * but icmp_output_hdrincl restores ipha_protocol once we return.
2353 2369 */
2354 2370 mblk_t *
2355 2371 icmp_prepend_hdr(conn_t *connp, ip_xmit_attr_t *ixa, const ip_pkt_t *ipp,
2356 2372 const in6_addr_t *v6src, const in6_addr_t *v6dst, uint32_t flowinfo,
2357 2373 mblk_t *data_mp, int *errorp)
2358 2374 {
2359 2375 mblk_t *mp;
2360 2376 icmp_stack_t *is = connp->conn_netstack->netstack_icmp;
2361 2377 uint_t data_len;
2362 2378 uint32_t cksum;
2363 2379
2364 2380 data_len = msgdsize(data_mp);
2365 2381 mp = conn_prepend_hdr(ixa, ipp, v6src, v6dst, connp->conn_proto,
2366 2382 flowinfo, 0, data_mp, data_len, is->is_wroff_extra, &cksum, errorp);
2367 2383 if (mp == NULL) {
2368 2384 ASSERT(*errorp != 0);
2369 2385 return (NULL);
2370 2386 }
2371 2387
2372 2388 ixa->ixa_pktlen = data_len + ixa->ixa_ip_hdr_length;
2373 2389
2374 2390 /*
2375 2391 * If there was a routing option/header then conn_prepend_hdr
2376 2392 * has massaged it and placed the pseudo-header checksum difference
2377 2393 * in the cksum argument.
2378 2394 *
2379 2395 * Prepare for ICMPv6 checksum done in IP.
2380 2396 *
2381 2397 * We make it easy for IP to include our pseudo header
2382 2398 * by putting our length (and any routing header adjustment)
2383 2399 * in the ICMPv6 checksum field.
2384 2400 * The IP source, destination, and length have already been set by
2385 2401 * conn_prepend_hdr.
2386 2402 */
2387 2403 cksum += data_len;
2388 2404 cksum = (cksum >> 16) + (cksum & 0xFFFF);
2389 2405 ASSERT(cksum < 0x10000);
2390 2406
2391 2407 if (ixa->ixa_flags & IXAF_IS_IPV4) {
2392 2408 ipha_t *ipha = (ipha_t *)mp->b_rptr;
2393 2409
2394 2410 ASSERT(ntohs(ipha->ipha_length) == ixa->ixa_pktlen);
2395 2411 } else {
2396 2412 ip6_t *ip6h = (ip6_t *)mp->b_rptr;
2397 2413 uint_t cksum_offset = 0;
2398 2414
2399 2415 ASSERT(ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN == ixa->ixa_pktlen);
2400 2416
2401 2417 if (ixa->ixa_flags & IXAF_SET_ULP_CKSUM) {
2402 2418 if (connp->conn_proto == IPPROTO_ICMPV6) {
2403 2419 cksum_offset = ixa->ixa_ip_hdr_length +
2404 2420 offsetof(icmp6_t, icmp6_cksum);
2405 2421 } else if (ixa->ixa_flags & IXAF_SET_RAW_CKSUM) {
2406 2422 cksum_offset = ixa->ixa_ip_hdr_length +
2407 2423 ixa->ixa_raw_cksum_offset;
2408 2424 }
2409 2425 }
2410 2426 if (cksum_offset != 0) {
2411 2427 uint16_t *ptr;
2412 2428
2413 2429 /* Make sure the checksum fits in the first mblk */
2414 2430 if (cksum_offset + sizeof (short) > MBLKL(mp)) {
2415 2431 mblk_t *mp1;
2416 2432
2417 2433 mp1 = msgpullup(mp,
2418 2434 cksum_offset + sizeof (short));
2419 2435 freemsg(mp);
2420 2436 if (mp1 == NULL) {
2421 2437 *errorp = ENOMEM;
2422 2438 return (NULL);
2423 2439 }
2424 2440 mp = mp1;
2425 2441 ip6h = (ip6_t *)mp->b_rptr;
2426 2442 }
2427 2443 ptr = (uint16_t *)(mp->b_rptr + cksum_offset);
2428 2444 *ptr = htons(cksum);
2429 2445 }
2430 2446 }
2431 2447
2432 2448 /* Note that we don't try to update wroff due to ancillary data */
2433 2449 return (mp);
2434 2450 }
2435 2451
2436 2452 static int
2437 2453 icmp_build_hdr_template(conn_t *connp, const in6_addr_t *v6src,
2438 2454 const in6_addr_t *v6dst, uint32_t flowinfo)
2439 2455 {
2440 2456 int error;
2441 2457
2442 2458 ASSERT(MUTEX_HELD(&connp->conn_lock));
2443 2459 /*
2444 2460 * We clear lastdst to make sure we don't use the lastdst path
2445 2461 * next time sending since we might not have set v6dst yet.
2446 2462 */
2447 2463 connp->conn_v6lastdst = ipv6_all_zeros;
2448 2464
2449 2465 error = conn_build_hdr_template(connp, 0, 0, v6src, v6dst, flowinfo);
2450 2466 if (error != 0)
2451 2467 return (error);
2452 2468
2453 2469 /*
2454 2470 * Any routing header/option has been massaged. The checksum difference
2455 2471 * is stored in conn_sum.
2456 2472 */
2457 2473 return (0);
2458 2474 }
2459 2475
2460 2476 static mblk_t *
2461 2477 icmp_queue_fallback(icmp_t *icmp, mblk_t *mp)
2462 2478 {
2463 2479 ASSERT(MUTEX_HELD(&icmp->icmp_recv_lock));
2464 2480 if (IPCL_IS_NONSTR(icmp->icmp_connp)) {
2465 2481 /*
2466 2482 * fallback has started but messages have not been moved yet
2467 2483 */
2468 2484 if (icmp->icmp_fallback_queue_head == NULL) {
2469 2485 ASSERT(icmp->icmp_fallback_queue_tail == NULL);
2470 2486 icmp->icmp_fallback_queue_head = mp;
2471 2487 icmp->icmp_fallback_queue_tail = mp;
2472 2488 } else {
2473 2489 ASSERT(icmp->icmp_fallback_queue_tail != NULL);
2474 2490 icmp->icmp_fallback_queue_tail->b_next = mp;
2475 2491 icmp->icmp_fallback_queue_tail = mp;
2476 2492 }
2477 2493 return (NULL);
2478 2494 } else {
2479 2495 /*
2480 2496 * Fallback completed, let the caller putnext() the mblk.
2481 2497 */
2482 2498 return (mp);
2483 2499 }
2484 2500 }
2485 2501
2486 2502 /*
2487 2503 * Deliver data to ULP. In case we have a socket, and it's falling back to
2488 2504 * TPI, then we'll queue the mp for later processing.
2489 2505 */
2490 2506 static void
2491 2507 icmp_ulp_recv(conn_t *connp, mblk_t *mp, uint_t len)
2492 2508 {
2493 2509 if (IPCL_IS_NONSTR(connp)) {
2494 2510 icmp_t *icmp = connp->conn_icmp;
2495 2511 int error;
2496 2512
2497 2513 ASSERT(len == msgdsize(mp));
2498 2514 if ((*connp->conn_upcalls->su_recv)
2499 2515 (connp->conn_upper_handle, mp, len, 0, &error, NULL) < 0) {
2500 2516 mutex_enter(&icmp->icmp_recv_lock);
2501 2517 if (error == ENOSPC) {
2502 2518 /*
2503 2519 * let's confirm while holding the lock
2504 2520 */
2505 2521 if ((*connp->conn_upcalls->su_recv)
2506 2522 (connp->conn_upper_handle, NULL, 0, 0,
2507 2523 &error, NULL) < 0) {
2508 2524 ASSERT(error == ENOSPC);
2509 2525 if (error == ENOSPC) {
2510 2526 connp->conn_flow_cntrld =
2511 2527 B_TRUE;
2512 2528 }
2513 2529 }
2514 2530 mutex_exit(&icmp->icmp_recv_lock);
2515 2531 } else {
2516 2532 ASSERT(error == EOPNOTSUPP);
2517 2533 mp = icmp_queue_fallback(icmp, mp);
2518 2534 mutex_exit(&icmp->icmp_recv_lock);
2519 2535 if (mp != NULL)
2520 2536 putnext(connp->conn_rq, mp);
2521 2537 }
2522 2538 }
2523 2539 ASSERT(MUTEX_NOT_HELD(&icmp->icmp_recv_lock));
2524 2540 } else {
2525 2541 putnext(connp->conn_rq, mp);
2526 2542 }
2527 2543 }
2528 2544
2529 2545 /*
2530 2546 * This is the inbound data path.
2531 2547 * IP has already pulled up the IP headers and verified alignment
2532 2548 * etc.
2533 2549 */
2534 2550 /* ARGSUSED2 */
2535 2551 static void
2536 2552 icmp_input(void *arg1, mblk_t *mp, void *arg2, ip_recv_attr_t *ira)
2537 2553 {
2538 2554 conn_t *connp = (conn_t *)arg1;
2539 2555 struct T_unitdata_ind *tudi;
2540 2556 uchar_t *rptr; /* Pointer to IP header */
2541 2557 int ip_hdr_length;
2542 2558 int udi_size; /* Size of T_unitdata_ind */
2543 2559 int pkt_len;
2544 2560 icmp_t *icmp;
2545 2561 ip_pkt_t ipps;
2546 2562 ip6_t *ip6h;
2547 2563 mblk_t *mp1;
2548 2564 crb_t recv_ancillary;
2549 2565 icmp_stack_t *is;
2550 2566 sin_t *sin;
2551 2567 sin6_t *sin6;
2552 2568 ipha_t *ipha;
2553 2569
2554 2570 ASSERT(connp->conn_flags & IPCL_RAWIPCONN);
2555 2571
2556 2572 icmp = connp->conn_icmp;
2557 2573 is = icmp->icmp_is;
2558 2574 rptr = mp->b_rptr;
2559 2575
2560 2576 ASSERT(DB_TYPE(mp) == M_DATA);
2561 2577 ASSERT(OK_32PTR(rptr));
2562 2578 ASSERT(ira->ira_pktlen == msgdsize(mp));
2563 2579 pkt_len = ira->ira_pktlen;
2564 2580
2565 2581 /*
2566 2582 * Get a snapshot of these and allow other threads to change
2567 2583 * them after that. We need the same recv_ancillary when determining
2568 2584 * the size as when adding the ancillary data items.
2569 2585 */
2570 2586 mutex_enter(&connp->conn_lock);
2571 2587 recv_ancillary = connp->conn_recv_ancillary;
2572 2588 mutex_exit(&connp->conn_lock);
2573 2589
2574 2590 ip_hdr_length = ira->ira_ip_hdr_length;
2575 2591 ASSERT(MBLKL(mp) >= ip_hdr_length); /* IP did a pullup */
2576 2592
2577 2593 /* Initialize regardless of IP version */
2578 2594 ipps.ipp_fields = 0;
2579 2595
2580 2596 if (ira->ira_flags & IRAF_IS_IPV4) {
2581 2597 ASSERT(IPH_HDR_VERSION(rptr) == IPV4_VERSION);
2582 2598 ASSERT(MBLKL(mp) >= sizeof (ipha_t));
2583 2599 ASSERT(ira->ira_ip_hdr_length == IPH_HDR_LENGTH(rptr));
2584 2600
2585 2601 ipha = (ipha_t *)mp->b_rptr;
2586 2602 if (recv_ancillary.crb_all != 0)
2587 2603 (void) ip_find_hdr_v4(ipha, &ipps, B_FALSE);
2588 2604
2589 2605 /*
2590 2606 * BSD for some reason adjusts ipha_length to exclude the
2591 2607 * IP header length. We do the same.
2592 2608 */
2593 2609 if (is->is_bsd_compat) {
2594 2610 ushort_t len;
2595 2611
2596 2612 len = ntohs(ipha->ipha_length);
2597 2613 if (mp->b_datap->db_ref > 1) {
2598 2614 /*
2599 2615 * Allocate a new IP header so that we can
2600 2616 * modify ipha_length.
2601 2617 */
2602 2618 mblk_t *mp1;
2603 2619
2604 2620 mp1 = allocb(ip_hdr_length, BPRI_MED);
2605 2621 if (mp1 == NULL) {
2606 2622 freemsg(mp);
2607 2623 BUMP_MIB(&is->is_rawip_mib,
2608 2624 rawipInErrors);
2609 2625 return;
2610 2626 }
2611 2627 bcopy(rptr, mp1->b_rptr, ip_hdr_length);
2612 2628 mp->b_rptr = rptr + ip_hdr_length;
2613 2629 rptr = mp1->b_rptr;
2614 2630 ipha = (ipha_t *)rptr;
2615 2631 mp1->b_cont = mp;
2616 2632 mp1->b_wptr = rptr + ip_hdr_length;
2617 2633 mp = mp1;
2618 2634 }
2619 2635 len -= ip_hdr_length;
2620 2636 ipha->ipha_length = htons(len);
2621 2637 }
2622 2638
2623 2639 /*
2624 2640 * For RAW sockets we not pass ICMP/IPv4 packets to AF_INET6
2625 2641 * sockets. This is ensured by icmp_bind and the IP fanout code.
2626 2642 */
2627 2643 ASSERT(connp->conn_family == AF_INET);
2628 2644
2629 2645 /*
2630 2646 * This is the inbound data path. Packets are passed upstream
2631 2647 * as T_UNITDATA_IND messages with full IPv4 headers still
2632 2648 * attached.
2633 2649 */
2634 2650
2635 2651 /*
2636 2652 * Normally only send up the source address.
2637 2653 * If any ancillary data items are wanted we add those.
2638 2654 */
2639 2655 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin_t);
2640 2656 if (recv_ancillary.crb_all != 0) {
2641 2657 udi_size += conn_recvancillary_size(connp,
2642 2658 recv_ancillary, ira, mp, &ipps);
2643 2659 }
2644 2660
2645 2661 /* Allocate a message block for the T_UNITDATA_IND structure. */
2646 2662 mp1 = allocb(udi_size, BPRI_MED);
2647 2663 if (mp1 == NULL) {
2648 2664 freemsg(mp);
2649 2665 BUMP_MIB(&is->is_rawip_mib, rawipInErrors);
2650 2666 return;
2651 2667 }
2652 2668 mp1->b_cont = mp;
2653 2669 tudi = (struct T_unitdata_ind *)mp1->b_rptr;
2654 2670 mp1->b_datap->db_type = M_PROTO;
2655 2671 mp1->b_wptr = (uchar_t *)tudi + udi_size;
2656 2672 tudi->PRIM_type = T_UNITDATA_IND;
2657 2673 tudi->SRC_length = sizeof (sin_t);
2658 2674 tudi->SRC_offset = sizeof (struct T_unitdata_ind);
2659 2675 sin = (sin_t *)&tudi[1];
2660 2676 *sin = sin_null;
2661 2677 sin->sin_family = AF_INET;
2662 2678 sin->sin_addr.s_addr = ipha->ipha_src;
2663 2679 *(uint32_t *)&sin->sin_zero[0] = 0;
2664 2680 *(uint32_t *)&sin->sin_zero[4] = 0;
2665 2681 tudi->OPT_offset = sizeof (struct T_unitdata_ind) +
2666 2682 sizeof (sin_t);
2667 2683 udi_size -= (sizeof (struct T_unitdata_ind) + sizeof (sin_t));
2668 2684 tudi->OPT_length = udi_size;
2669 2685
2670 2686 /*
2671 2687 * Add options if IP_RECVIF etc is set
2672 2688 */
2673 2689 if (udi_size != 0) {
2674 2690 conn_recvancillary_add(connp, recv_ancillary, ira,
2675 2691 &ipps, (uchar_t *)&sin[1], udi_size);
2676 2692 }
2677 2693 goto deliver;
2678 2694 }
2679 2695
2680 2696 ASSERT(IPH_HDR_VERSION(rptr) == IPV6_VERSION);
2681 2697 /*
2682 2698 * IPv6 packets can only be received by applications
2683 2699 * that are prepared to receive IPv6 addresses.
2684 2700 * The IP fanout must ensure this.
2685 2701 */
2686 2702 ASSERT(connp->conn_family == AF_INET6);
2687 2703
2688 2704 /*
2689 2705 * Handle IPv6 packets. We don't pass up the IP headers with the
2690 2706 * payload for IPv6.
2691 2707 */
2692 2708
2693 2709 ip6h = (ip6_t *)rptr;
2694 2710 if (recv_ancillary.crb_all != 0) {
2695 2711 /*
2696 2712 * Call on ip_find_hdr_v6 which gets individual lenghts of
2697 2713 * extension headers (and pointers to them).
2698 2714 */
2699 2715 uint8_t nexthdr;
2700 2716
2701 2717 /* We don't care about the length or nextheader. */
2702 2718 (void) ip_find_hdr_v6(mp, ip6h, B_TRUE, &ipps, &nexthdr);
2703 2719
2704 2720 /*
2705 2721 * We do not pass up hop-by-hop options or any other
2706 2722 * extension header as part of the packet. Applications
2707 2723 * that want to see them have to specify IPV6_RECV* socket
2708 2724 * options. And conn_recvancillary_size/add explicitly
2709 2725 * drops the TX option from IPV6_HOPOPTS as it does for UDP.
2710 2726 *
2711 2727 * If we had multilevel ICMP sockets, then we'd want to
2712 2728 * modify conn_recvancillary_size/add to
2713 2729 * allow the user to see the label.
2714 2730 */
2715 2731 }
2716 2732
2717 2733 /*
2718 2734 * Check a filter for ICMPv6 types if needed.
2719 2735 * Verify raw checksums if needed.
2720 2736 */
2721 2737 mutex_enter(&connp->conn_lock);
2722 2738 if (icmp->icmp_filter != NULL) {
2723 2739 int type;
2724 2740
2725 2741 /* Assumes that IP has done the pullupmsg */
2726 2742 type = mp->b_rptr[ip_hdr_length];
2727 2743
2728 2744 ASSERT(mp->b_rptr + ip_hdr_length <= mp->b_wptr);
2729 2745 if (ICMP6_FILTER_WILLBLOCK(type, icmp->icmp_filter)) {
2730 2746 mutex_exit(&connp->conn_lock);
2731 2747 freemsg(mp);
2732 2748 return;
2733 2749 }
2734 2750 }
2735 2751 if (connp->conn_ixa->ixa_flags & IXAF_SET_RAW_CKSUM) {
2736 2752 /* Checksum */
2737 2753 uint16_t *up;
2738 2754 uint32_t sum;
2739 2755 int remlen;
2740 2756
2741 2757 up = (uint16_t *)&ip6h->ip6_src;
2742 2758
2743 2759 remlen = msgdsize(mp) - ip_hdr_length;
2744 2760 sum = htons(connp->conn_proto + remlen)
2745 2761 + up[0] + up[1] + up[2] + up[3]
2746 2762 + up[4] + up[5] + up[6] + up[7]
2747 2763 + up[8] + up[9] + up[10] + up[11]
2748 2764 + up[12] + up[13] + up[14] + up[15];
2749 2765 sum = (sum & 0xffff) + (sum >> 16);
2750 2766 sum = IP_CSUM(mp, ip_hdr_length, sum);
2751 2767 if (sum != 0) {
2752 2768 /* IPv6 RAW checksum failed */
2753 2769 ip0dbg(("icmp_rput: RAW checksum failed %x\n", sum));
2754 2770 mutex_exit(&connp->conn_lock);
2755 2771 freemsg(mp);
2756 2772 BUMP_MIB(&is->is_rawip_mib, rawipInCksumErrs);
2757 2773 return;
2758 2774 }
2759 2775 }
2760 2776 mutex_exit(&connp->conn_lock);
2761 2777
2762 2778 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin6_t);
2763 2779
2764 2780 if (recv_ancillary.crb_all != 0) {
2765 2781 udi_size += conn_recvancillary_size(connp,
2766 2782 recv_ancillary, ira, mp, &ipps);
2767 2783 }
2768 2784
2769 2785 mp1 = allocb(udi_size, BPRI_MED);
2770 2786 if (mp1 == NULL) {
2771 2787 freemsg(mp);
2772 2788 BUMP_MIB(&is->is_rawip_mib, rawipInErrors);
2773 2789 return;
2774 2790 }
2775 2791 mp1->b_cont = mp;
2776 2792 mp1->b_datap->db_type = M_PROTO;
2777 2793 tudi = (struct T_unitdata_ind *)mp1->b_rptr;
2778 2794 mp1->b_wptr = (uchar_t *)tudi + udi_size;
2779 2795 tudi->PRIM_type = T_UNITDATA_IND;
2780 2796 tudi->SRC_length = sizeof (sin6_t);
2781 2797 tudi->SRC_offset = sizeof (struct T_unitdata_ind);
2782 2798 tudi->OPT_offset = sizeof (struct T_unitdata_ind) + sizeof (sin6_t);
2783 2799 udi_size -= (sizeof (struct T_unitdata_ind) + sizeof (sin6_t));
2784 2800 tudi->OPT_length = udi_size;
2785 2801 sin6 = (sin6_t *)&tudi[1];
2786 2802 *sin6 = sin6_null;
2787 2803 sin6->sin6_port = 0;
2788 2804 sin6->sin6_family = AF_INET6;
2789 2805
2790 2806 sin6->sin6_addr = ip6h->ip6_src;
2791 2807 /* No sin6_flowinfo per API */
2792 2808 sin6->sin6_flowinfo = 0;
2793 2809 /* For link-scope pass up scope id */
2794 2810 if (IN6_IS_ADDR_LINKSCOPE(&ip6h->ip6_src))
2795 2811 sin6->sin6_scope_id = ira->ira_ruifindex;
2796 2812 else
2797 2813 sin6->sin6_scope_id = 0;
2798 2814 sin6->__sin6_src_id = ip_srcid_find_addr(&ip6h->ip6_dst,
2799 2815 IPCL_ZONEID(connp), is->is_netstack);
2800 2816
2801 2817 if (udi_size != 0) {
2802 2818 conn_recvancillary_add(connp, recv_ancillary, ira,
2803 2819 &ipps, (uchar_t *)&sin6[1], udi_size);
2804 2820 }
2805 2821
2806 2822 /* Skip all the IPv6 headers per API */
2807 2823 mp->b_rptr += ip_hdr_length;
2808 2824 pkt_len -= ip_hdr_length;
2809 2825
2810 2826 deliver:
2811 2827 BUMP_MIB(&is->is_rawip_mib, rawipInDatagrams);
2812 2828 icmp_ulp_recv(connp, mp1, pkt_len);
2813 2829 }
2814 2830
2815 2831 /*
2816 2832 * return SNMP stuff in buffer in mpdata. We don't hold any lock and report
2817 2833 * information that can be changing beneath us.
2818 2834 */
2819 2835 mblk_t *
2820 2836 icmp_snmp_get(queue_t *q, mblk_t *mpctl)
2821 2837 {
2822 2838 mblk_t *mpdata;
2823 2839 struct opthdr *optp;
2824 2840 conn_t *connp = Q_TO_CONN(q);
2825 2841 icmp_stack_t *is = connp->conn_netstack->netstack_icmp;
2826 2842 mblk_t *mp2ctl;
2827 2843
2828 2844 /*
2829 2845 * make a copy of the original message
2830 2846 */
2831 2847 mp2ctl = copymsg(mpctl);
2832 2848
2833 2849 if (mpctl == NULL ||
2834 2850 (mpdata = mpctl->b_cont) == NULL) {
2835 2851 freemsg(mpctl);
2836 2852 freemsg(mp2ctl);
2837 2853 return (0);
2838 2854 }
2839 2855
2840 2856 /* fixed length structure for IPv4 and IPv6 counters */
2841 2857 optp = (struct opthdr *)&mpctl->b_rptr[sizeof (struct T_optmgmt_ack)];
2842 2858 optp->level = EXPER_RAWIP;
2843 2859 optp->name = 0;
2844 2860 (void) snmp_append_data(mpdata, (char *)&is->is_rawip_mib,
2845 2861 sizeof (is->is_rawip_mib));
2846 2862 optp->len = msgdsize(mpdata);
2847 2863 qreply(q, mpctl);
2848 2864
2849 2865 return (mp2ctl);
2850 2866 }
2851 2867
2852 2868 /*
2853 2869 * Return 0 if invalid set request, 1 otherwise, including non-rawip requests.
2854 2870 * TODO: If this ever actually tries to set anything, it needs to be
2855 2871 * to do the appropriate locking.
2856 2872 */
2857 2873 /* ARGSUSED */
2858 2874 int
2859 2875 icmp_snmp_set(queue_t *q, t_scalar_t level, t_scalar_t name,
2860 2876 uchar_t *ptr, int len)
2861 2877 {
2862 2878 switch (level) {
2863 2879 case EXPER_RAWIP:
2864 2880 return (0);
2865 2881 default:
2866 2882 return (1);
2867 2883 }
2868 2884 }
2869 2885
2870 2886 /*
2871 2887 * This routine creates a T_UDERROR_IND message and passes it upstream.
2872 2888 * The address and options are copied from the T_UNITDATA_REQ message
2873 2889 * passed in mp. This message is freed.
2874 2890 */
2875 2891 static void
2876 2892 icmp_ud_err(queue_t *q, mblk_t *mp, t_scalar_t err)
2877 2893 {
2878 2894 struct T_unitdata_req *tudr;
2879 2895 mblk_t *mp1;
2880 2896 uchar_t *destaddr;
2881 2897 t_scalar_t destlen;
2882 2898 uchar_t *optaddr;
2883 2899 t_scalar_t optlen;
2884 2900
2885 2901 if ((mp->b_wptr < mp->b_rptr) ||
2886 2902 (MBLKL(mp)) < sizeof (struct T_unitdata_req)) {
2887 2903 goto done;
2888 2904 }
2889 2905 tudr = (struct T_unitdata_req *)mp->b_rptr;
2890 2906 destaddr = mp->b_rptr + tudr->DEST_offset;
2891 2907 if (destaddr < mp->b_rptr || destaddr >= mp->b_wptr ||
2892 2908 destaddr + tudr->DEST_length < mp->b_rptr ||
2893 2909 destaddr + tudr->DEST_length > mp->b_wptr) {
2894 2910 goto done;
2895 2911 }
2896 2912 optaddr = mp->b_rptr + tudr->OPT_offset;
2897 2913 if (optaddr < mp->b_rptr || optaddr >= mp->b_wptr ||
2898 2914 optaddr + tudr->OPT_length < mp->b_rptr ||
2899 2915 optaddr + tudr->OPT_length > mp->b_wptr) {
2900 2916 goto done;
2901 2917 }
2902 2918 destlen = tudr->DEST_length;
2903 2919 optlen = tudr->OPT_length;
2904 2920
2905 2921 mp1 = mi_tpi_uderror_ind((char *)destaddr, destlen,
2906 2922 (char *)optaddr, optlen, err);
2907 2923 if (mp1 != NULL)
2908 2924 qreply(q, mp1);
2909 2925
2910 2926 done:
2911 2927 freemsg(mp);
2912 2928 }
2913 2929
2914 2930 static int
2915 2931 rawip_do_unbind(conn_t *connp)
2916 2932 {
2917 2933 icmp_t *icmp = connp->conn_icmp;
2918 2934
2919 2935 mutex_enter(&connp->conn_lock);
2920 2936 /* If a bind has not been done, we can't unbind. */
2921 2937 if (icmp->icmp_state == TS_UNBND) {
2922 2938 mutex_exit(&connp->conn_lock);
2923 2939 return (-TOUTSTATE);
2924 2940 }
2925 2941 connp->conn_saddr_v6 = ipv6_all_zeros;
2926 2942 connp->conn_bound_addr_v6 = ipv6_all_zeros;
2927 2943 connp->conn_laddr_v6 = ipv6_all_zeros;
2928 2944 connp->conn_mcbc_bind = B_FALSE;
2929 2945 connp->conn_lport = 0;
2930 2946 connp->conn_fport = 0;
2931 2947 /* In case we were also connected */
2932 2948 connp->conn_faddr_v6 = ipv6_all_zeros;
2933 2949 connp->conn_v6lastdst = ipv6_all_zeros;
2934 2950
2935 2951 icmp->icmp_state = TS_UNBND;
2936 2952
2937 2953 (void) icmp_build_hdr_template(connp, &connp->conn_saddr_v6,
2938 2954 &connp->conn_faddr_v6, connp->conn_flowinfo);
2939 2955 mutex_exit(&connp->conn_lock);
2940 2956
2941 2957 ip_unbind(connp);
2942 2958 return (0);
2943 2959 }
2944 2960
2945 2961 /*
2946 2962 * This routine is called by icmp_wput to handle T_UNBIND_REQ messages.
2947 2963 * After some error checking, the message is passed downstream to ip.
2948 2964 */
2949 2965 static void
2950 2966 icmp_tpi_unbind(queue_t *q, mblk_t *mp)
2951 2967 {
2952 2968 conn_t *connp = Q_TO_CONN(q);
2953 2969 int error;
2954 2970
2955 2971 ASSERT(mp->b_cont == NULL);
2956 2972 error = rawip_do_unbind(connp);
2957 2973 if (error) {
2958 2974 if (error < 0) {
2959 2975 icmp_err_ack(q, mp, -error, 0);
2960 2976 } else {
2961 2977 icmp_err_ack(q, mp, 0, error);
2962 2978 }
2963 2979 return;
2964 2980 }
2965 2981
2966 2982 /*
2967 2983 * Convert mp into a T_OK_ACK
2968 2984 */
2969 2985
2970 2986 mp = mi_tpi_ok_ack_alloc(mp);
2971 2987
2972 2988 /*
2973 2989 * should not happen in practice... T_OK_ACK is smaller than the
2974 2990 * original message.
2975 2991 */
2976 2992 ASSERT(mp != NULL);
2977 2993 ASSERT(((struct T_ok_ack *)mp->b_rptr)->PRIM_type == T_OK_ACK);
2978 2994 qreply(q, mp);
2979 2995 }
2980 2996
2981 2997 /*
2982 2998 * Process IPv4 packets that already include an IP header.
2983 2999 * Used when IP_HDRINCL has been set (implicit for IPPROTO_RAW and
2984 3000 * IPPROTO_IGMP).
2985 3001 * In this case we ignore the address and any options in the T_UNITDATA_REQ.
2986 3002 *
2987 3003 * The packet is assumed to have a base (20 byte) IP header followed
2988 3004 * by the upper-layer protocol. We include any IP_OPTIONS including a
2989 3005 * CIPSO label but otherwise preserve the base IP header.
2990 3006 */
2991 3007 static int
2992 3008 icmp_output_hdrincl(conn_t *connp, mblk_t *mp, cred_t *cr, pid_t pid)
2993 3009 {
2994 3010 icmp_t *icmp = connp->conn_icmp;
2995 3011 icmp_stack_t *is = icmp->icmp_is;
2996 3012 ipha_t iphas;
2997 3013 ipha_t *ipha;
2998 3014 int ip_hdr_length;
2999 3015 int tp_hdr_len;
3000 3016 ip_xmit_attr_t *ixa;
3001 3017 ip_pkt_t *ipp;
3002 3018 in6_addr_t v6src;
3003 3019 in6_addr_t v6dst;
3004 3020 in6_addr_t v6nexthop;
3005 3021 int error;
3006 3022 boolean_t do_ipsec;
3007 3023
3008 3024 /*
3009 3025 * We need an exclusive copy of conn_ixa since the included IP
3010 3026 * header could have any destination.
3011 3027 * That copy has no pointers hence we
3012 3028 * need to set them up once we've parsed the ancillary data.
3013 3029 */
3014 3030 ixa = conn_get_ixa_exclusive(connp);
3015 3031 if (ixa == NULL) {
3016 3032 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors);
3017 3033 freemsg(mp);
3018 3034 return (ENOMEM);
3019 3035 }
3020 3036 ASSERT(cr != NULL);
3021 3037 /*
3022 3038 * Caller has a reference on cr; from db_credp or because we
3023 3039 * are running in process context.
3024 3040 */
3025 3041 ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED));
3026 3042 ixa->ixa_cred = cr;
3027 3043 ixa->ixa_cpid = pid;
3028 3044 if (is_system_labeled()) {
3029 3045 /* We need to restart with a label based on the cred */
3030 3046 ip_xmit_attr_restore_tsl(ixa, ixa->ixa_cred);
3031 3047 }
3032 3048
3033 3049 /* In case previous destination was multicast or multirt */
3034 3050 ip_attr_newdst(ixa);
3035 3051
3036 3052 /* Get a copy of conn_xmit_ipp since the TX label might change it */
3037 3053 ipp = kmem_zalloc(sizeof (*ipp), KM_NOSLEEP);
3038 3054 if (ipp == NULL) {
3039 3055 ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED));
3040 3056 ixa->ixa_cred = connp->conn_cred; /* Restore */
3041 3057 ixa->ixa_cpid = connp->conn_cpid;
3042 3058 ixa_refrele(ixa);
3043 3059 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors);
3044 3060 freemsg(mp);
3045 3061 return (ENOMEM);
3046 3062 }
3047 3063 mutex_enter(&connp->conn_lock);
3048 3064 error = ip_pkt_copy(&connp->conn_xmit_ipp, ipp, KM_NOSLEEP);
3049 3065 mutex_exit(&connp->conn_lock);
3050 3066 if (error != 0) {
3051 3067 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors);
3052 3068 freemsg(mp);
3053 3069 goto done;
3054 3070 }
3055 3071
3056 3072 /* Sanity check length of packet */
3057 3073 ipha = (ipha_t *)mp->b_rptr;
3058 3074
3059 3075 ip_hdr_length = IP_SIMPLE_HDR_LENGTH;
3060 3076 if ((mp->b_wptr - mp->b_rptr) < IP_SIMPLE_HDR_LENGTH) {
3061 3077 if (!pullupmsg(mp, IP_SIMPLE_HDR_LENGTH)) {
3062 3078 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors);
3063 3079 freemsg(mp);
3064 3080 goto done;
3065 3081 }
3066 3082 ipha = (ipha_t *)mp->b_rptr;
3067 3083 }
3068 3084 ipha->ipha_version_and_hdr_length =
3069 3085 (IP_VERSION<<4) | (ip_hdr_length>>2);
3070 3086
3071 3087 /*
3072 3088 * We set IXAF_DONTFRAG if the application set DF which makes
3073 3089 * IP not fragment.
3074 3090 */
3075 3091 ipha->ipha_fragment_offset_and_flags &= htons(IPH_DF);
3076 3092 if (ipha->ipha_fragment_offset_and_flags & htons(IPH_DF))
3077 3093 ixa->ixa_flags |= (IXAF_DONTFRAG | IXAF_PMTU_IPV4_DF);
3078 3094 else
3079 3095 ixa->ixa_flags &= ~(IXAF_DONTFRAG | IXAF_PMTU_IPV4_DF);
3080 3096
3081 3097 /* Even for multicast and broadcast we honor the apps ttl */
3082 3098 ixa->ixa_flags |= IXAF_NO_TTL_CHANGE;
3083 3099
3084 3100 /*
3085 3101 * No source verification for non-local addresses
3086 3102 */
3087 3103 if (ipha->ipha_src != INADDR_ANY &&
3088 3104 ip_laddr_verify_v4(ipha->ipha_src, ixa->ixa_zoneid,
3089 3105 is->is_netstack->netstack_ip, B_FALSE)
3090 3106 != IPVL_UNICAST_UP) {
3091 3107 ixa->ixa_flags &= ~IXAF_VERIFY_SOURCE;
3092 3108 }
3093 3109
3094 3110 if (ipha->ipha_dst == INADDR_ANY)
3095 3111 ipha->ipha_dst = htonl(INADDR_LOOPBACK);
3096 3112
3097 3113 IN6_IPADDR_TO_V4MAPPED(ipha->ipha_src, &v6src);
3098 3114 IN6_IPADDR_TO_V4MAPPED(ipha->ipha_dst, &v6dst);
3099 3115
3100 3116 /* Defer IPsec if it might need to look at ICMP type/code */
3101 3117 do_ipsec = ipha->ipha_protocol != IPPROTO_ICMP;
3102 3118 ixa->ixa_flags |= IXAF_IS_IPV4;
3103 3119
3104 3120 ip_attr_nexthop(ipp, ixa, &v6dst, &v6nexthop);
3105 3121 error = ip_attr_connect(connp, ixa, &v6src, &v6dst, &v6nexthop,
3106 3122 connp->conn_fport, &v6src, NULL, IPDF_ALLOW_MCBC | IPDF_VERIFY_DST |
3107 3123 (do_ipsec ? IPDF_IPSEC : 0));
3108 3124 switch (error) {
3109 3125 case 0:
3110 3126 break;
3111 3127 case EADDRNOTAVAIL:
3112 3128 /*
3113 3129 * IXAF_VERIFY_SOURCE tells us to pick a better source.
3114 3130 * Don't have the application see that errno
3115 3131 */
3116 3132 error = ENETUNREACH;
3117 3133 goto failed;
3118 3134 case ENETDOWN:
3119 3135 /*
3120 3136 * Have !ipif_addr_ready address; drop packet silently
3121 3137 * until we can get applications to not send until we
3122 3138 * are ready.
3123 3139 */
3124 3140 error = 0;
3125 3141 goto failed;
3126 3142 case EHOSTUNREACH:
3127 3143 case ENETUNREACH:
3128 3144 if (ixa->ixa_ire != NULL) {
3129 3145 /*
3130 3146 * Let conn_ip_output/ire_send_noroute return
3131 3147 * the error and send any local ICMP error.
3132 3148 */
3133 3149 error = 0;
3134 3150 break;
3135 3151 }
3136 3152 /* FALLTHRU */
3137 3153 default:
3138 3154 failed:
3139 3155 freemsg(mp);
3140 3156 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors);
3141 3157 goto done;
3142 3158 }
3143 3159 if (ipha->ipha_src == INADDR_ANY)
3144 3160 IN6_V4MAPPED_TO_IPADDR(&v6src, ipha->ipha_src);
3145 3161
3146 3162 /*
3147 3163 * We might be going to a different destination than last time,
3148 3164 * thus check that TX allows the communication and compute any
3149 3165 * needed label.
3150 3166 *
3151 3167 * TSOL Note: We have an exclusive ipp and ixa for this thread so we
3152 3168 * don't have to worry about concurrent threads.
3153 3169 */
3154 3170 if (is_system_labeled()) {
3155 3171 /*
3156 3172 * Check whether Trusted Solaris policy allows communication
3157 3173 * with this host, and pretend that the destination is
3158 3174 * unreachable if not.
3159 3175 * Compute any needed label and place it in ipp_label_v4/v6.
3160 3176 *
3161 3177 * Later conn_build_hdr_template/conn_prepend_hdr takes
3162 3178 * ipp_label_v4/v6 to form the packet.
3163 3179 *
3164 3180 * Tsol note: We have ipp structure local to this thread so
3165 3181 * no locking is needed.
3166 3182 */
3167 3183 error = conn_update_label(connp, ixa, &v6dst, ipp);
3168 3184 if (error != 0) {
3169 3185 freemsg(mp);
3170 3186 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors);
3171 3187 goto done;
3172 3188 }
3173 3189 }
3174 3190
3175 3191 /*
3176 3192 * Save away a copy of the IPv4 header the application passed down
3177 3193 * and then prepend an IPv4 header complete with any IP options
3178 3194 * including label.
3179 3195 * We need a struct copy since icmp_prepend_hdr will reuse the available
3180 3196 * space in the mblk.
3181 3197 */
3182 3198 iphas = *ipha;
3183 3199 mp->b_rptr += IP_SIMPLE_HDR_LENGTH;
3184 3200
3185 3201 mp = icmp_prepend_hdr(connp, ixa, ipp, &v6src, &v6dst, 0, mp, &error);
3186 3202 if (mp == NULL) {
3187 3203 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors);
3188 3204 ASSERT(error != 0);
3189 3205 goto done;
3190 3206 }
3191 3207 if (ixa->ixa_pktlen > IP_MAXPACKET) {
3192 3208 error = EMSGSIZE;
3193 3209 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors);
3194 3210 freemsg(mp);
3195 3211 goto done;
3196 3212 }
3197 3213 /* Restore key parts of the header that the application passed down */
3198 3214 ipha = (ipha_t *)mp->b_rptr;
3199 3215 ipha->ipha_type_of_service = iphas.ipha_type_of_service;
3200 3216 ipha->ipha_ident = iphas.ipha_ident;
3201 3217 ipha->ipha_fragment_offset_and_flags =
3202 3218 iphas.ipha_fragment_offset_and_flags;
3203 3219 ipha->ipha_ttl = iphas.ipha_ttl;
3204 3220 ipha->ipha_protocol = iphas.ipha_protocol;
3205 3221 ipha->ipha_src = iphas.ipha_src;
3206 3222 ipha->ipha_dst = iphas.ipha_dst;
3207 3223
3208 3224 ixa->ixa_protocol = ipha->ipha_protocol;
3209 3225
3210 3226 /*
3211 3227 * Make sure that the IP header plus any transport header that is
3212 3228 * checksumed by ip_output is in the first mblk. (ip_output assumes
3213 3229 * that at least the checksum field is in the first mblk.)
3214 3230 */
3215 3231 switch (ipha->ipha_protocol) {
3216 3232 case IPPROTO_UDP:
3217 3233 tp_hdr_len = 8;
3218 3234 break;
3219 3235 case IPPROTO_TCP:
3220 3236 tp_hdr_len = 20;
3221 3237 break;
3222 3238 default:
3223 3239 tp_hdr_len = 0;
3224 3240 break;
3225 3241 }
3226 3242 ip_hdr_length = IPH_HDR_LENGTH(ipha);
3227 3243 if (mp->b_wptr - mp->b_rptr < ip_hdr_length + tp_hdr_len) {
3228 3244 if (!pullupmsg(mp, ip_hdr_length + tp_hdr_len)) {
3229 3245 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors);
3230 3246 if (mp->b_cont == NULL)
3231 3247 error = EINVAL;
3232 3248 else
3233 3249 error = ENOMEM;
3234 3250 freemsg(mp);
3235 3251 goto done;
3236 3252 }
3237 3253 }
3238 3254
3239 3255 if (!do_ipsec) {
3240 3256 /* Policy might differ for different ICMP type/code */
3241 3257 if (ixa->ixa_ipsec_policy != NULL) {
3242 3258 IPPOL_REFRELE(ixa->ixa_ipsec_policy);
3243 3259 ixa->ixa_ipsec_policy = NULL;
3244 3260 ixa->ixa_flags &= ~IXAF_IPSEC_SECURE;
3245 3261 }
3246 3262 mp = ip_output_attach_policy(mp, ipha, NULL, connp, ixa);
3247 3263 if (mp == NULL) {
3248 3264 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors);
3249 3265 error = EHOSTUNREACH; /* IPsec policy failure */
3250 3266 goto done;
3251 3267 }
3252 3268 }
3253 3269
3254 3270 /* We're done. Pass the packet to ip. */
3255 3271 BUMP_MIB(&is->is_rawip_mib, rawipOutDatagrams);
3256 3272
3257 3273 error = conn_ip_output(mp, ixa);
3258 3274 /* No rawipOutErrors if an error since IP increases its error counter */
3259 3275 switch (error) {
3260 3276 case 0:
3261 3277 break;
3262 3278 case EWOULDBLOCK:
3263 3279 (void) ixa_check_drain_insert(connp, ixa);
3264 3280 error = 0;
3265 3281 break;
3266 3282 case EADDRNOTAVAIL:
3267 3283 /*
3268 3284 * IXAF_VERIFY_SOURCE tells us to pick a better source.
3269 3285 * Don't have the application see that errno
3270 3286 */
3271 3287 error = ENETUNREACH;
3272 3288 break;
3273 3289 }
3274 3290 done:
3275 3291 ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED));
3276 3292 ixa->ixa_cred = connp->conn_cred; /* Restore */
3277 3293 ixa->ixa_cpid = connp->conn_cpid;
3278 3294 ixa_refrele(ixa);
3279 3295 ip_pkt_free(ipp);
3280 3296 kmem_free(ipp, sizeof (*ipp));
3281 3297 return (error);
3282 3298 }
3283 3299
3284 3300 static mblk_t *
3285 3301 icmp_output_attach_policy(mblk_t *mp, conn_t *connp, ip_xmit_attr_t *ixa)
3286 3302 {
3287 3303 ipha_t *ipha = NULL;
3288 3304 ip6_t *ip6h = NULL;
3289 3305
3290 3306 if (ixa->ixa_flags & IXAF_IS_IPV4)
3291 3307 ipha = (ipha_t *)mp->b_rptr;
3292 3308 else
3293 3309 ip6h = (ip6_t *)mp->b_rptr;
3294 3310
3295 3311 if (ixa->ixa_ipsec_policy != NULL) {
3296 3312 IPPOL_REFRELE(ixa->ixa_ipsec_policy);
3297 3313 ixa->ixa_ipsec_policy = NULL;
3298 3314 ixa->ixa_flags &= ~IXAF_IPSEC_SECURE;
3299 3315 }
3300 3316 return (ip_output_attach_policy(mp, ipha, ip6h, connp, ixa));
3301 3317 }
3302 3318
3303 3319 /*
3304 3320 * Handle T_UNITDATA_REQ with options. Both IPv4 and IPv6
3305 3321 * Either tudr_mp or msg is set. If tudr_mp we take ancillary data from
3306 3322 * the TPI options, otherwise we take them from msg_control.
3307 3323 * If both sin and sin6 is set it is a connected socket and we use conn_faddr.
3308 3324 * Always consumes mp; never consumes tudr_mp.
3309 3325 */
3310 3326 static int
3311 3327 icmp_output_ancillary(conn_t *connp, sin_t *sin, sin6_t *sin6, mblk_t *mp,
3312 3328 mblk_t *tudr_mp, struct nmsghdr *msg, cred_t *cr, pid_t pid)
3313 3329 {
3314 3330 icmp_t *icmp = connp->conn_icmp;
3315 3331 icmp_stack_t *is = icmp->icmp_is;
3316 3332 int error;
3317 3333 ip_xmit_attr_t *ixa;
3318 3334 ip_pkt_t *ipp;
3319 3335 in6_addr_t v6src;
3320 3336 in6_addr_t v6dst;
3321 3337 in6_addr_t v6nexthop;
3322 3338 in_port_t dstport;
3323 3339 uint32_t flowinfo;
3324 3340 uint_t srcid;
3325 3341 int is_absreq_failure = 0;
3326 3342 conn_opt_arg_t coas, *coa;
3327 3343
3328 3344 ASSERT(tudr_mp != NULL || msg != NULL);
3329 3345
3330 3346 /*
3331 3347 * Get ixa before checking state to handle a disconnect race.
3332 3348 *
3333 3349 * We need an exclusive copy of conn_ixa since the ancillary data
3334 3350 * options might modify it. That copy has no pointers hence we
3335 3351 * need to set them up once we've parsed the ancillary data.
3336 3352 */
3337 3353 ixa = conn_get_ixa_exclusive(connp);
3338 3354 if (ixa == NULL) {
3339 3355 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors);
3340 3356 freemsg(mp);
3341 3357 return (ENOMEM);
3342 3358 }
3343 3359 ASSERT(cr != NULL);
3344 3360 ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED));
3345 3361 ixa->ixa_cred = cr;
3346 3362 ixa->ixa_cpid = pid;
3347 3363 if (is_system_labeled()) {
3348 3364 /* We need to restart with a label based on the cred */
3349 3365 ip_xmit_attr_restore_tsl(ixa, ixa->ixa_cred);
3350 3366 }
3351 3367
3352 3368 /* In case previous destination was multicast or multirt */
3353 3369 ip_attr_newdst(ixa);
3354 3370
3355 3371 /* Get a copy of conn_xmit_ipp since the options might change it */
3356 3372 ipp = kmem_zalloc(sizeof (*ipp), KM_NOSLEEP);
3357 3373 if (ipp == NULL) {
3358 3374 ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED));
3359 3375 ixa->ixa_cred = connp->conn_cred; /* Restore */
3360 3376 ixa->ixa_cpid = connp->conn_cpid;
3361 3377 ixa_refrele(ixa);
3362 3378 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors);
3363 3379 freemsg(mp);
3364 3380 return (ENOMEM);
3365 3381 }
3366 3382 mutex_enter(&connp->conn_lock);
3367 3383 error = ip_pkt_copy(&connp->conn_xmit_ipp, ipp, KM_NOSLEEP);
3368 3384 mutex_exit(&connp->conn_lock);
3369 3385 if (error != 0) {
3370 3386 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors);
3371 3387 freemsg(mp);
3372 3388 goto done;
3373 3389 }
3374 3390
3375 3391 /*
3376 3392 * Parse the options and update ixa and ipp as a result.
3377 3393 */
3378 3394
3379 3395 coa = &coas;
3380 3396 coa->coa_connp = connp;
3381 3397 coa->coa_ixa = ixa;
3382 3398 coa->coa_ipp = ipp;
3383 3399 coa->coa_ancillary = B_TRUE;
3384 3400 coa->coa_changed = 0;
3385 3401
3386 3402 if (msg != NULL) {
3387 3403 error = process_auxiliary_options(connp, msg->msg_control,
3388 3404 msg->msg_controllen, coa, &icmp_opt_obj, icmp_opt_set, cr);
3389 3405 } else {
3390 3406 struct T_unitdata_req *tudr;
3391 3407
3392 3408 tudr = (struct T_unitdata_req *)tudr_mp->b_rptr;
3393 3409 ASSERT(tudr->PRIM_type == T_UNITDATA_REQ);
3394 3410 error = tpi_optcom_buf(connp->conn_wq, tudr_mp,
3395 3411 &tudr->OPT_length, tudr->OPT_offset, cr, &icmp_opt_obj,
3396 3412 coa, &is_absreq_failure);
3397 3413 }
3398 3414 if (error != 0) {
3399 3415 /*
3400 3416 * Note: No special action needed in this
3401 3417 * module for "is_absreq_failure"
3402 3418 */
3403 3419 freemsg(mp);
3404 3420 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors);
3405 3421 goto done;
3406 3422 }
3407 3423 ASSERT(is_absreq_failure == 0);
3408 3424
3409 3425 mutex_enter(&connp->conn_lock);
3410 3426 /*
3411 3427 * If laddr is unspecified then we look at sin6_src_id.
3412 3428 * We will give precedence to a source address set with IPV6_PKTINFO
3413 3429 * (aka IPPF_ADDR) but that is handled in build_hdrs. However, we don't
3414 3430 * want ip_attr_connect to select a source (since it can fail) when
3415 3431 * IPV6_PKTINFO is specified.
3416 3432 * If this doesn't result in a source address then we get a source
3417 3433 * from ip_attr_connect() below.
3418 3434 */
3419 3435 v6src = connp->conn_saddr_v6;
3420 3436 if (sin != NULL) {
3421 3437 IN6_IPADDR_TO_V4MAPPED(sin->sin_addr.s_addr, &v6dst);
3422 3438 dstport = sin->sin_port;
3423 3439 flowinfo = 0;
3424 3440 ixa->ixa_flags &= ~IXAF_SCOPEID_SET;
3425 3441 ixa->ixa_flags |= IXAF_IS_IPV4;
3426 3442 } else if (sin6 != NULL) {
3427 3443 v6dst = sin6->sin6_addr;
3428 3444 dstport = sin6->sin6_port;
3429 3445 flowinfo = sin6->sin6_flowinfo;
3430 3446 srcid = sin6->__sin6_src_id;
3431 3447 if (IN6_IS_ADDR_LINKSCOPE(&v6dst) && sin6->sin6_scope_id != 0) {
3432 3448 ixa->ixa_scopeid = sin6->sin6_scope_id;
3433 3449 ixa->ixa_flags |= IXAF_SCOPEID_SET;
3434 3450 } else {
3435 3451 ixa->ixa_flags &= ~IXAF_SCOPEID_SET;
3436 3452 }
3437 3453 if (srcid != 0 && IN6_IS_ADDR_UNSPECIFIED(&v6src)) {
3438 3454 ip_srcid_find_id(srcid, &v6src, IPCL_ZONEID(connp),
3439 3455 connp->conn_netstack);
3440 3456 }
3441 3457 if (IN6_IS_ADDR_V4MAPPED(&v6dst))
3442 3458 ixa->ixa_flags |= IXAF_IS_IPV4;
3443 3459 else
3444 3460 ixa->ixa_flags &= ~IXAF_IS_IPV4;
3445 3461 } else {
3446 3462 /* Connected case */
3447 3463 v6dst = connp->conn_faddr_v6;
3448 3464 flowinfo = connp->conn_flowinfo;
3449 3465 }
3450 3466 mutex_exit(&connp->conn_lock);
3451 3467 /* Handle IP_PKTINFO/IPV6_PKTINFO setting source address. */
3452 3468 if (ipp->ipp_fields & IPPF_ADDR) {
3453 3469 if (ixa->ixa_flags & IXAF_IS_IPV4) {
3454 3470 if (IN6_IS_ADDR_V4MAPPED(&ipp->ipp_addr))
3455 3471 v6src = ipp->ipp_addr;
3456 3472 } else {
3457 3473 if (!IN6_IS_ADDR_V4MAPPED(&ipp->ipp_addr))
3458 3474 v6src = ipp->ipp_addr;
3459 3475 }
3460 3476 }
3461 3477 /*
3462 3478 * Allow source not assigned to the system
3463 3479 * only if it is not a local addresses
3464 3480 */
3465 3481 if (!V6_OR_V4_INADDR_ANY(v6src)) {
3466 3482 ip_laddr_t laddr_type;
3467 3483
3468 3484 if (ixa->ixa_flags & IXAF_IS_IPV4) {
3469 3485 ipaddr_t v4src;
3470 3486
3471 3487 IN6_V4MAPPED_TO_IPADDR(&v6src, v4src);
3472 3488 laddr_type = ip_laddr_verify_v4(v4src, ixa->ixa_zoneid,
3473 3489 is->is_netstack->netstack_ip, B_FALSE);
3474 3490 } else {
3475 3491 laddr_type = ip_laddr_verify_v6(&v6src, ixa->ixa_zoneid,
3476 3492 is->is_netstack->netstack_ip, B_FALSE, B_FALSE);
3477 3493 }
3478 3494 if (laddr_type != IPVL_UNICAST_UP)
3479 3495 ixa->ixa_flags &= ~IXAF_VERIFY_SOURCE;
3480 3496 }
3481 3497
3482 3498 ip_attr_nexthop(ipp, ixa, &v6dst, &v6nexthop);
3483 3499 error = ip_attr_connect(connp, ixa, &v6src, &v6dst, &v6nexthop, dstport,
3484 3500 &v6src, NULL, IPDF_ALLOW_MCBC | IPDF_VERIFY_DST);
3485 3501
3486 3502 switch (error) {
3487 3503 case 0:
3488 3504 break;
3489 3505 case EADDRNOTAVAIL:
3490 3506 /*
3491 3507 * IXAF_VERIFY_SOURCE tells us to pick a better source.
3492 3508 * Don't have the application see that errno
3493 3509 */
3494 3510 error = ENETUNREACH;
3495 3511 goto failed;
3496 3512 case ENETDOWN:
3497 3513 /*
3498 3514 * Have !ipif_addr_ready address; drop packet silently
3499 3515 * until we can get applications to not send until we
3500 3516 * are ready.
3501 3517 */
3502 3518 error = 0;
3503 3519 goto failed;
3504 3520 case EHOSTUNREACH:
3505 3521 case ENETUNREACH:
3506 3522 if (ixa->ixa_ire != NULL) {
3507 3523 /*
3508 3524 * Let conn_ip_output/ire_send_noroute return
3509 3525 * the error and send any local ICMP error.
3510 3526 */
3511 3527 error = 0;
3512 3528 break;
3513 3529 }
3514 3530 /* FALLTHRU */
3515 3531 default:
3516 3532 failed:
3517 3533 freemsg(mp);
3518 3534 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors);
3519 3535 goto done;
3520 3536 }
3521 3537
3522 3538 /*
3523 3539 * We might be going to a different destination than last time,
3524 3540 * thus check that TX allows the communication and compute any
3525 3541 * needed label.
3526 3542 *
3527 3543 * TSOL Note: We have an exclusive ipp and ixa for this thread so we
3528 3544 * don't have to worry about concurrent threads.
3529 3545 */
3530 3546 if (is_system_labeled()) {
3531 3547 /*
3532 3548 * Check whether Trusted Solaris policy allows communication
3533 3549 * with this host, and pretend that the destination is
3534 3550 * unreachable if not.
3535 3551 * Compute any needed label and place it in ipp_label_v4/v6.
3536 3552 *
3537 3553 * Later conn_build_hdr_template/conn_prepend_hdr takes
3538 3554 * ipp_label_v4/v6 to form the packet.
3539 3555 *
3540 3556 * Tsol note: We have ipp structure local to this thread so
3541 3557 * no locking is needed.
3542 3558 */
3543 3559 error = conn_update_label(connp, ixa, &v6dst, ipp);
3544 3560 if (error != 0) {
3545 3561 freemsg(mp);
3546 3562 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors);
3547 3563 goto done;
3548 3564 }
3549 3565 }
3550 3566 mp = icmp_prepend_hdr(connp, ixa, ipp, &v6src, &v6dst, flowinfo, mp,
3551 3567 &error);
3552 3568 if (mp == NULL) {
3553 3569 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors);
3554 3570 ASSERT(error != 0);
3555 3571 goto done;
3556 3572 }
3557 3573 if (ixa->ixa_pktlen > IP_MAXPACKET) {
3558 3574 error = EMSGSIZE;
3559 3575 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors);
3560 3576 freemsg(mp);
3561 3577 goto done;
3562 3578 }
3563 3579
3564 3580 /* Policy might differ for different ICMP type/code */
3565 3581 mp = icmp_output_attach_policy(mp, connp, ixa);
3566 3582 if (mp == NULL) {
3567 3583 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors);
3568 3584 error = EHOSTUNREACH; /* IPsec policy failure */
3569 3585 goto done;
3570 3586 }
3571 3587
3572 3588 /* We're done. Pass the packet to ip. */
3573 3589 BUMP_MIB(&is->is_rawip_mib, rawipOutDatagrams);
3574 3590
3575 3591 error = conn_ip_output(mp, ixa);
3576 3592 if (!connp->conn_unspec_src)
3577 3593 ixa->ixa_flags |= IXAF_VERIFY_SOURCE;
3578 3594 /* No rawipOutErrors if an error since IP increases its error counter */
3579 3595 switch (error) {
3580 3596 case 0:
3581 3597 break;
3582 3598 case EWOULDBLOCK:
3583 3599 (void) ixa_check_drain_insert(connp, ixa);
3584 3600 error = 0;
3585 3601 break;
3586 3602 case EADDRNOTAVAIL:
3587 3603 /*
3588 3604 * IXAF_VERIFY_SOURCE tells us to pick a better source.
3589 3605 * Don't have the application see that errno
3590 3606 */
3591 3607 error = ENETUNREACH;
3592 3608 /* FALLTHRU */
3593 3609 default:
3594 3610 mutex_enter(&connp->conn_lock);
3595 3611 /*
3596 3612 * Clear the source and v6lastdst so we call ip_attr_connect
3597 3613 * for the next packet and try to pick a better source.
3598 3614 */
3599 3615 if (connp->conn_mcbc_bind)
3600 3616 connp->conn_saddr_v6 = ipv6_all_zeros;
3601 3617 else
3602 3618 connp->conn_saddr_v6 = connp->conn_bound_addr_v6;
3603 3619 connp->conn_v6lastdst = ipv6_all_zeros;
3604 3620 mutex_exit(&connp->conn_lock);
3605 3621 break;
3606 3622 }
3607 3623 done:
3608 3624 ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED));
3609 3625 ixa->ixa_cred = connp->conn_cred; /* Restore */
3610 3626 ixa->ixa_cpid = connp->conn_cpid;
3611 3627 ixa_refrele(ixa);
3612 3628 ip_pkt_free(ipp);
3613 3629 kmem_free(ipp, sizeof (*ipp));
3614 3630 return (error);
3615 3631 }
3616 3632
3617 3633 /*
3618 3634 * Handle sending an M_DATA for a connected socket.
3619 3635 * Handles both IPv4 and IPv6.
3620 3636 */
3621 3637 int
3622 3638 icmp_output_connected(conn_t *connp, mblk_t *mp, cred_t *cr, pid_t pid)
3623 3639 {
3624 3640 icmp_t *icmp = connp->conn_icmp;
3625 3641 icmp_stack_t *is = icmp->icmp_is;
3626 3642 int error;
3627 3643 ip_xmit_attr_t *ixa;
3628 3644 boolean_t do_ipsec;
3629 3645
3630 3646 /*
3631 3647 * If no other thread is using conn_ixa this just gets a reference to
3632 3648 * conn_ixa. Otherwise we get a safe copy of conn_ixa.
3633 3649 */
3634 3650 ixa = conn_get_ixa(connp, B_FALSE);
3635 3651 if (ixa == NULL) {
3636 3652 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors);
3637 3653 freemsg(mp);
3638 3654 return (ENOMEM);
3639 3655 }
3640 3656
3641 3657 ASSERT(cr != NULL);
3642 3658 ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED));
3643 3659 ixa->ixa_cred = cr;
3644 3660 ixa->ixa_cpid = pid;
3645 3661
3646 3662 /* Defer IPsec if it might need to look at ICMP type/code */
3647 3663 switch (ixa->ixa_protocol) {
3648 3664 case IPPROTO_ICMP:
3649 3665 case IPPROTO_ICMPV6:
3650 3666 do_ipsec = B_FALSE;
3651 3667 break;
3652 3668 default:
3653 3669 do_ipsec = B_TRUE;
3654 3670 }
3655 3671
3656 3672 mutex_enter(&connp->conn_lock);
3657 3673 mp = icmp_prepend_header_template(connp, ixa, mp,
3658 3674 &connp->conn_saddr_v6, connp->conn_flowinfo, &error);
3659 3675
3660 3676 if (mp == NULL) {
3661 3677 ASSERT(error != 0);
3662 3678 mutex_exit(&connp->conn_lock);
3663 3679 ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED));
3664 3680 ixa->ixa_cred = connp->conn_cred; /* Restore */
3665 3681 ixa->ixa_cpid = connp->conn_cpid;
3666 3682 ixa_refrele(ixa);
3667 3683 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors);
3668 3684 freemsg(mp);
3669 3685 return (error);
3670 3686 }
3671 3687
3672 3688 if (!do_ipsec) {
3673 3689 /* Policy might differ for different ICMP type/code */
3674 3690 mp = icmp_output_attach_policy(mp, connp, ixa);
3675 3691 if (mp == NULL) {
3676 3692 mutex_exit(&connp->conn_lock);
3677 3693 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors);
3678 3694 ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED));
3679 3695 ixa->ixa_cred = connp->conn_cred; /* Restore */
3680 3696 ixa->ixa_cpid = connp->conn_cpid;
3681 3697 ixa_refrele(ixa);
3682 3698 return (EHOSTUNREACH); /* IPsec policy failure */
3683 3699 }
3684 3700 }
3685 3701
3686 3702 /*
3687 3703 * In case we got a safe copy of conn_ixa, or if opt_set made us a new
3688 3704 * safe copy, then we need to fill in any pointers in it.
3689 3705 */
3690 3706 if (ixa->ixa_ire == NULL) {
3691 3707 in6_addr_t faddr, saddr;
3692 3708 in6_addr_t nexthop;
3693 3709 in_port_t fport;
3694 3710
3695 3711 saddr = connp->conn_saddr_v6;
3696 3712 faddr = connp->conn_faddr_v6;
3697 3713 fport = connp->conn_fport;
3698 3714 ip_attr_nexthop(&connp->conn_xmit_ipp, ixa, &faddr, &nexthop);
3699 3715 mutex_exit(&connp->conn_lock);
3700 3716
3701 3717 error = ip_attr_connect(connp, ixa, &saddr, &faddr, &nexthop,
3702 3718 fport, NULL, NULL, IPDF_ALLOW_MCBC | IPDF_VERIFY_DST |
3703 3719 (do_ipsec ? IPDF_IPSEC : 0));
3704 3720 switch (error) {
3705 3721 case 0:
3706 3722 break;
3707 3723 case EADDRNOTAVAIL:
3708 3724 /*
3709 3725 * IXAF_VERIFY_SOURCE tells us to pick a better source.
3710 3726 * Don't have the application see that errno
3711 3727 */
3712 3728 error = ENETUNREACH;
3713 3729 goto failed;
3714 3730 case ENETDOWN:
3715 3731 /*
3716 3732 * Have !ipif_addr_ready address; drop packet silently
3717 3733 * until we can get applications to not send until we
3718 3734 * are ready.
3719 3735 */
3720 3736 error = 0;
3721 3737 goto failed;
3722 3738 case EHOSTUNREACH:
3723 3739 case ENETUNREACH:
3724 3740 if (ixa->ixa_ire != NULL) {
3725 3741 /*
3726 3742 * Let conn_ip_output/ire_send_noroute return
3727 3743 * the error and send any local ICMP error.
3728 3744 */
3729 3745 error = 0;
3730 3746 break;
3731 3747 }
3732 3748 /* FALLTHRU */
3733 3749 default:
3734 3750 failed:
3735 3751 ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED));
3736 3752 ixa->ixa_cred = connp->conn_cred; /* Restore */
3737 3753 ixa->ixa_cpid = connp->conn_cpid;
3738 3754 ixa_refrele(ixa);
3739 3755 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors);
3740 3756 freemsg(mp);
3741 3757 return (error);
3742 3758 }
3743 3759 } else {
3744 3760 /* Done with conn_t */
3745 3761 mutex_exit(&connp->conn_lock);
3746 3762 }
3747 3763
3748 3764 /* We're done. Pass the packet to ip. */
3749 3765 BUMP_MIB(&is->is_rawip_mib, rawipOutDatagrams);
3750 3766
3751 3767 error = conn_ip_output(mp, ixa);
3752 3768 /* No rawipOutErrors if an error since IP increases its error counter */
3753 3769 switch (error) {
3754 3770 case 0:
3755 3771 break;
3756 3772 case EWOULDBLOCK:
3757 3773 (void) ixa_check_drain_insert(connp, ixa);
3758 3774 error = 0;
3759 3775 break;
3760 3776 case EADDRNOTAVAIL:
3761 3777 /*
3762 3778 * IXAF_VERIFY_SOURCE tells us to pick a better source.
3763 3779 * Don't have the application see that errno
3764 3780 */
3765 3781 error = ENETUNREACH;
3766 3782 break;
3767 3783 }
3768 3784 ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED));
3769 3785 ixa->ixa_cred = connp->conn_cred; /* Restore */
3770 3786 ixa->ixa_cpid = connp->conn_cpid;
3771 3787 ixa_refrele(ixa);
3772 3788 return (error);
3773 3789 }
3774 3790
3775 3791 /*
3776 3792 * Handle sending an M_DATA to the last destination.
3777 3793 * Handles both IPv4 and IPv6.
3778 3794 *
3779 3795 * NOTE: The caller must hold conn_lock and we drop it here.
3780 3796 */
3781 3797 int
3782 3798 icmp_output_lastdst(conn_t *connp, mblk_t *mp, cred_t *cr, pid_t pid,
3783 3799 ip_xmit_attr_t *ixa)
3784 3800 {
3785 3801 icmp_t *icmp = connp->conn_icmp;
3786 3802 icmp_stack_t *is = icmp->icmp_is;
3787 3803 int error;
3788 3804 boolean_t do_ipsec;
3789 3805
3790 3806 ASSERT(MUTEX_HELD(&connp->conn_lock));
3791 3807 ASSERT(ixa != NULL);
3792 3808
3793 3809 ASSERT(cr != NULL);
3794 3810 ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED));
3795 3811 ixa->ixa_cred = cr;
3796 3812 ixa->ixa_cpid = pid;
3797 3813
3798 3814 /* Defer IPsec if it might need to look at ICMP type/code */
3799 3815 switch (ixa->ixa_protocol) {
3800 3816 case IPPROTO_ICMP:
3801 3817 case IPPROTO_ICMPV6:
3802 3818 do_ipsec = B_FALSE;
3803 3819 break;
3804 3820 default:
3805 3821 do_ipsec = B_TRUE;
3806 3822 }
3807 3823
3808 3824
3809 3825 mp = icmp_prepend_header_template(connp, ixa, mp,
3810 3826 &connp->conn_v6lastsrc, connp->conn_lastflowinfo, &error);
3811 3827
3812 3828 if (mp == NULL) {
3813 3829 ASSERT(error != 0);
3814 3830 mutex_exit(&connp->conn_lock);
3815 3831 ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED));
3816 3832 ixa->ixa_cred = connp->conn_cred; /* Restore */
3817 3833 ixa->ixa_cpid = connp->conn_cpid;
3818 3834 ixa_refrele(ixa);
3819 3835 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors);
3820 3836 freemsg(mp);
3821 3837 return (error);
3822 3838 }
3823 3839
3824 3840 if (!do_ipsec) {
3825 3841 /* Policy might differ for different ICMP type/code */
3826 3842 mp = icmp_output_attach_policy(mp, connp, ixa);
3827 3843 if (mp == NULL) {
3828 3844 mutex_exit(&connp->conn_lock);
3829 3845 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors);
3830 3846 ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED));
3831 3847 ixa->ixa_cred = connp->conn_cred; /* Restore */
3832 3848 ixa->ixa_cpid = connp->conn_cpid;
3833 3849 ixa_refrele(ixa);
3834 3850 return (EHOSTUNREACH); /* IPsec policy failure */
3835 3851 }
3836 3852 }
3837 3853
3838 3854 /*
3839 3855 * In case we got a safe copy of conn_ixa, or if opt_set made us a new
3840 3856 * safe copy, then we need to fill in any pointers in it.
3841 3857 */
3842 3858 if (ixa->ixa_ire == NULL) {
3843 3859 in6_addr_t lastdst, lastsrc;
3844 3860 in6_addr_t nexthop;
3845 3861 in_port_t lastport;
3846 3862
3847 3863 lastsrc = connp->conn_v6lastsrc;
3848 3864 lastdst = connp->conn_v6lastdst;
3849 3865 lastport = connp->conn_lastdstport;
3850 3866 ip_attr_nexthop(&connp->conn_xmit_ipp, ixa, &lastdst, &nexthop);
3851 3867 mutex_exit(&connp->conn_lock);
3852 3868
3853 3869 error = ip_attr_connect(connp, ixa, &lastsrc, &lastdst,
3854 3870 &nexthop, lastport, NULL, NULL, IPDF_ALLOW_MCBC |
3855 3871 IPDF_VERIFY_DST | (do_ipsec ? IPDF_IPSEC : 0));
3856 3872 switch (error) {
3857 3873 case 0:
3858 3874 break;
3859 3875 case EADDRNOTAVAIL:
3860 3876 /*
3861 3877 * IXAF_VERIFY_SOURCE tells us to pick a better source.
3862 3878 * Don't have the application see that errno
3863 3879 */
3864 3880 error = ENETUNREACH;
3865 3881 goto failed;
3866 3882 case ENETDOWN:
3867 3883 /*
3868 3884 * Have !ipif_addr_ready address; drop packet silently
3869 3885 * until we can get applications to not send until we
3870 3886 * are ready.
3871 3887 */
3872 3888 error = 0;
3873 3889 goto failed;
3874 3890 case EHOSTUNREACH:
3875 3891 case ENETUNREACH:
3876 3892 if (ixa->ixa_ire != NULL) {
3877 3893 /*
3878 3894 * Let conn_ip_output/ire_send_noroute return
3879 3895 * the error and send any local ICMP error.
3880 3896 */
3881 3897 error = 0;
3882 3898 break;
3883 3899 }
3884 3900 /* FALLTHRU */
3885 3901 default:
3886 3902 failed:
3887 3903 ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED));
3888 3904 ixa->ixa_cred = connp->conn_cred; /* Restore */
3889 3905 ixa->ixa_cpid = connp->conn_cpid;
3890 3906 ixa_refrele(ixa);
3891 3907 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors);
3892 3908 freemsg(mp);
3893 3909 return (error);
3894 3910 }
3895 3911 } else {
3896 3912 /* Done with conn_t */
3897 3913 mutex_exit(&connp->conn_lock);
3898 3914 }
3899 3915
3900 3916 /* We're done. Pass the packet to ip. */
3901 3917 BUMP_MIB(&is->is_rawip_mib, rawipOutDatagrams);
3902 3918 error = conn_ip_output(mp, ixa);
3903 3919 /* No rawipOutErrors if an error since IP increases its error counter */
3904 3920 switch (error) {
3905 3921 case 0:
3906 3922 break;
3907 3923 case EWOULDBLOCK:
3908 3924 (void) ixa_check_drain_insert(connp, ixa);
3909 3925 error = 0;
3910 3926 break;
3911 3927 case EADDRNOTAVAIL:
3912 3928 /*
3913 3929 * IXAF_VERIFY_SOURCE tells us to pick a better source.
3914 3930 * Don't have the application see that errno
3915 3931 */
3916 3932 error = ENETUNREACH;
3917 3933 /* FALLTHRU */
3918 3934 default:
3919 3935 mutex_enter(&connp->conn_lock);
3920 3936 /*
3921 3937 * Clear the source and v6lastdst so we call ip_attr_connect
3922 3938 * for the next packet and try to pick a better source.
3923 3939 */
3924 3940 if (connp->conn_mcbc_bind)
3925 3941 connp->conn_saddr_v6 = ipv6_all_zeros;
3926 3942 else
3927 3943 connp->conn_saddr_v6 = connp->conn_bound_addr_v6;
3928 3944 connp->conn_v6lastdst = ipv6_all_zeros;
3929 3945 mutex_exit(&connp->conn_lock);
3930 3946 break;
3931 3947 }
3932 3948 ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED));
3933 3949 ixa->ixa_cred = connp->conn_cred; /* Restore */
3934 3950 ixa->ixa_cpid = connp->conn_cpid;
3935 3951 ixa_refrele(ixa);
3936 3952 return (error);
3937 3953 }
3938 3954
3939 3955
3940 3956 /*
3941 3957 * Prepend the header template and then fill in the source and
3942 3958 * flowinfo. The caller needs to handle the destination address since
3943 3959 * it's setting is different if rthdr or source route.
3944 3960 *
3945 3961 * Returns NULL is allocation failed or if the packet would exceed IP_MAXPACKET.
3946 3962 * When it returns NULL it sets errorp.
3947 3963 */
3948 3964 static mblk_t *
3949 3965 icmp_prepend_header_template(conn_t *connp, ip_xmit_attr_t *ixa, mblk_t *mp,
3950 3966 const in6_addr_t *v6src, uint32_t flowinfo, int *errorp)
3951 3967 {
3952 3968 icmp_t *icmp = connp->conn_icmp;
3953 3969 icmp_stack_t *is = icmp->icmp_is;
3954 3970 uint_t pktlen;
3955 3971 uint_t copylen;
3956 3972 uint8_t *iph;
3957 3973 uint_t ip_hdr_length;
3958 3974 uint32_t cksum;
3959 3975 ip_pkt_t *ipp;
3960 3976
3961 3977 ASSERT(MUTEX_HELD(&connp->conn_lock));
3962 3978
3963 3979 /*
3964 3980 * Copy the header template.
3965 3981 */
3966 3982 copylen = connp->conn_ht_iphc_len;
3967 3983 pktlen = copylen + msgdsize(mp);
3968 3984 if (pktlen > IP_MAXPACKET) {
3969 3985 freemsg(mp);
3970 3986 *errorp = EMSGSIZE;
3971 3987 return (NULL);
3972 3988 }
3973 3989 ixa->ixa_pktlen = pktlen;
3974 3990
3975 3991 /* check/fix buffer config, setup pointers into it */
3976 3992 iph = mp->b_rptr - copylen;
3977 3993 if (DB_REF(mp) != 1 || iph < DB_BASE(mp) || !OK_32PTR(iph)) {
3978 3994 mblk_t *mp1;
3979 3995
3980 3996 mp1 = allocb(copylen + is->is_wroff_extra, BPRI_MED);
3981 3997 if (mp1 == NULL) {
3982 3998 freemsg(mp);
3983 3999 *errorp = ENOMEM;
3984 4000 return (NULL);
3985 4001 }
3986 4002 mp1->b_wptr = DB_LIM(mp1);
3987 4003 mp1->b_cont = mp;
3988 4004 mp = mp1;
3989 4005 iph = (mp->b_wptr - copylen);
3990 4006 }
3991 4007 mp->b_rptr = iph;
3992 4008 bcopy(connp->conn_ht_iphc, iph, copylen);
3993 4009 ip_hdr_length = (uint_t)(connp->conn_ht_ulp - connp->conn_ht_iphc);
3994 4010
3995 4011 ixa->ixa_ip_hdr_length = ip_hdr_length;
3996 4012
3997 4013 /*
3998 4014 * Prepare for ICMPv6 checksum done in IP.
3999 4015 *
4000 4016 * icmp_build_hdr_template has already massaged any routing header
4001 4017 * and placed the result in conn_sum.
4002 4018 *
4003 4019 * We make it easy for IP to include our pseudo header
4004 4020 * by putting our length (and any routing header adjustment)
4005 4021 * in the ICMPv6 checksum field.
4006 4022 */
4007 4023 cksum = pktlen - ip_hdr_length;
4008 4024
4009 4025 cksum += connp->conn_sum;
4010 4026 cksum = (cksum >> 16) + (cksum & 0xFFFF);
4011 4027 ASSERT(cksum < 0x10000);
4012 4028
4013 4029 ipp = &connp->conn_xmit_ipp;
4014 4030 if (ixa->ixa_flags & IXAF_IS_IPV4) {
4015 4031 ipha_t *ipha = (ipha_t *)iph;
4016 4032
4017 4033 ipha->ipha_length = htons((uint16_t)pktlen);
4018 4034
4019 4035 /* if IP_PKTINFO specified an addres it wins over bind() */
4020 4036 if ((ipp->ipp_fields & IPPF_ADDR) &&
4021 4037 IN6_IS_ADDR_V4MAPPED(&ipp->ipp_addr)) {
4022 4038 ASSERT(ipp->ipp_addr_v4 != INADDR_ANY);
4023 4039 ipha->ipha_src = ipp->ipp_addr_v4;
4024 4040 } else {
4025 4041 IN6_V4MAPPED_TO_IPADDR(v6src, ipha->ipha_src);
4026 4042 }
4027 4043 } else {
4028 4044 ip6_t *ip6h = (ip6_t *)iph;
4029 4045 uint_t cksum_offset = 0;
4030 4046
4031 4047 ip6h->ip6_plen = htons((uint16_t)(pktlen - IPV6_HDR_LEN));
4032 4048
4033 4049 /* if IP_PKTINFO specified an addres it wins over bind() */
4034 4050 if ((ipp->ipp_fields & IPPF_ADDR) &&
4035 4051 !IN6_IS_ADDR_V4MAPPED(&ipp->ipp_addr)) {
4036 4052 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&ipp->ipp_addr));
4037 4053 ip6h->ip6_src = ipp->ipp_addr;
4038 4054 } else {
4039 4055 ip6h->ip6_src = *v6src;
4040 4056 }
4041 4057 ip6h->ip6_vcf =
4042 4058 (IPV6_DEFAULT_VERS_AND_FLOW & IPV6_VERS_AND_FLOW_MASK) |
4043 4059 (flowinfo & ~IPV6_VERS_AND_FLOW_MASK);
4044 4060 if (ipp->ipp_fields & IPPF_TCLASS) {
4045 4061 /* Overrides the class part of flowinfo */
4046 4062 ip6h->ip6_vcf = IPV6_TCLASS_FLOW(ip6h->ip6_vcf,
4047 4063 ipp->ipp_tclass);
4048 4064 }
4049 4065
4050 4066 if (ixa->ixa_flags & IXAF_SET_ULP_CKSUM) {
4051 4067 if (connp->conn_proto == IPPROTO_ICMPV6) {
4052 4068 cksum_offset = ixa->ixa_ip_hdr_length +
4053 4069 offsetof(icmp6_t, icmp6_cksum);
4054 4070 } else if (ixa->ixa_flags & IXAF_SET_RAW_CKSUM) {
4055 4071 cksum_offset = ixa->ixa_ip_hdr_length +
4056 4072 ixa->ixa_raw_cksum_offset;
4057 4073 }
4058 4074 }
4059 4075 if (cksum_offset != 0) {
4060 4076 uint16_t *ptr;
4061 4077
4062 4078 /* Make sure the checksum fits in the first mblk */
4063 4079 if (cksum_offset + sizeof (short) > MBLKL(mp)) {
4064 4080 mblk_t *mp1;
4065 4081
4066 4082 mp1 = msgpullup(mp,
4067 4083 cksum_offset + sizeof (short));
4068 4084 freemsg(mp);
4069 4085 if (mp1 == NULL) {
4070 4086 *errorp = ENOMEM;
4071 4087 return (NULL);
4072 4088 }
4073 4089 mp = mp1;
4074 4090 iph = mp->b_rptr;
4075 4091 ip6h = (ip6_t *)iph;
4076 4092 }
4077 4093 ptr = (uint16_t *)(mp->b_rptr + cksum_offset);
4078 4094 *ptr = htons(cksum);
4079 4095 }
4080 4096 }
4081 4097
4082 4098 return (mp);
4083 4099 }
4084 4100
4085 4101 /*
4086 4102 * This routine handles all messages passed downstream. It either
4087 4103 * consumes the message or passes it downstream; it never queues a
4088 4104 * a message.
4089 4105 */
4090 4106 void
4091 4107 icmp_wput(queue_t *q, mblk_t *mp)
4092 4108 {
4093 4109 sin6_t *sin6;
4094 4110 sin_t *sin = NULL;
4095 4111 uint_t srcid;
4096 4112 conn_t *connp = Q_TO_CONN(q);
4097 4113 icmp_t *icmp = connp->conn_icmp;
4098 4114 int error = 0;
4099 4115 struct sockaddr *addr = NULL;
4100 4116 socklen_t addrlen;
4101 4117 icmp_stack_t *is = icmp->icmp_is;
4102 4118 struct T_unitdata_req *tudr;
4103 4119 mblk_t *data_mp;
4104 4120 cred_t *cr;
4105 4121 pid_t pid;
4106 4122
4107 4123 /*
4108 4124 * We directly handle several cases here: T_UNITDATA_REQ message
4109 4125 * coming down as M_PROTO/M_PCPROTO and M_DATA messages for connected
4110 4126 * socket.
4111 4127 */
4112 4128 switch (DB_TYPE(mp)) {
4113 4129 case M_DATA:
4114 4130 /* sockfs never sends down M_DATA */
4115 4131 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors);
4116 4132 freemsg(mp);
4117 4133 return;
4118 4134
4119 4135 case M_PROTO:
4120 4136 case M_PCPROTO:
4121 4137 tudr = (struct T_unitdata_req *)mp->b_rptr;
4122 4138 if (MBLKL(mp) < sizeof (*tudr) ||
4123 4139 ((t_primp_t)mp->b_rptr)->type != T_UNITDATA_REQ) {
4124 4140 icmp_wput_other(q, mp);
4125 4141 return;
4126 4142 }
4127 4143 break;
4128 4144
4129 4145 default:
4130 4146 icmp_wput_other(q, mp);
4131 4147 return;
4132 4148 }
4133 4149
4134 4150 /* Handle valid T_UNITDATA_REQ here */
4135 4151 data_mp = mp->b_cont;
4136 4152 if (data_mp == NULL) {
4137 4153 error = EPROTO;
4138 4154 goto ud_error2;
4139 4155 }
4140 4156 mp->b_cont = NULL;
4141 4157
4142 4158 if (!MBLKIN(mp, 0, tudr->DEST_offset + tudr->DEST_length)) {
4143 4159 error = EADDRNOTAVAIL;
4144 4160 goto ud_error2;
4145 4161 }
4146 4162
4147 4163 /*
4148 4164 * All Solaris components should pass a db_credp
4149 4165 * for this message, hence we ASSERT.
4150 4166 * On production kernels we return an error to be robust against
4151 4167 * random streams modules sitting on top of us.
4152 4168 */
4153 4169 cr = msg_getcred(mp, &pid);
4154 4170 ASSERT(cr != NULL);
4155 4171 if (cr == NULL) {
4156 4172 error = EINVAL;
4157 4173 goto ud_error2;
4158 4174 }
4159 4175
4160 4176 /*
4161 4177 * If a port has not been bound to the stream, fail.
4162 4178 * This is not a problem when sockfs is directly
4163 4179 * above us, because it will ensure that the socket
4164 4180 * is first bound before allowing data to be sent.
4165 4181 */
4166 4182 if (icmp->icmp_state == TS_UNBND) {
4167 4183 error = EPROTO;
4168 4184 goto ud_error2;
4169 4185 }
4170 4186 addr = (struct sockaddr *)&mp->b_rptr[tudr->DEST_offset];
4171 4187 addrlen = tudr->DEST_length;
4172 4188
4173 4189 switch (connp->conn_family) {
4174 4190 case AF_INET6:
4175 4191 sin6 = (sin6_t *)addr;
4176 4192 if (!OK_32PTR((char *)sin6) || (addrlen != sizeof (sin6_t)) ||
4177 4193 (sin6->sin6_family != AF_INET6)) {
4178 4194 error = EADDRNOTAVAIL;
4179 4195 goto ud_error2;
4180 4196 }
4181 4197
4182 4198 /* No support for mapped addresses on raw sockets */
4183 4199 if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
4184 4200 error = EADDRNOTAVAIL;
4185 4201 goto ud_error2;
4186 4202 }
4187 4203 srcid = sin6->__sin6_src_id;
4188 4204
4189 4205 /*
4190 4206 * If the local address is a mapped address return
4191 4207 * an error.
4192 4208 * It would be possible to send an IPv6 packet but the
4193 4209 * response would never make it back to the application
4194 4210 * since it is bound to a mapped address.
4195 4211 */
4196 4212 if (IN6_IS_ADDR_V4MAPPED(&connp->conn_saddr_v6)) {
4197 4213 error = EADDRNOTAVAIL;
4198 4214 goto ud_error2;
4199 4215 }
4200 4216
4201 4217 if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr))
4202 4218 sin6->sin6_addr = ipv6_loopback;
4203 4219
4204 4220 if (tudr->OPT_length != 0) {
4205 4221 /*
4206 4222 * If we are connected then the destination needs to be
4207 4223 * the same as the connected one.
4208 4224 */
4209 4225 if (icmp->icmp_state == TS_DATA_XFER &&
4210 4226 !conn_same_as_last_v6(connp, sin6)) {
4211 4227 error = EISCONN;
4212 4228 goto ud_error2;
4213 4229 }
4214 4230 error = icmp_output_ancillary(connp, NULL, sin6,
4215 4231 data_mp, mp, NULL, cr, pid);
4216 4232 } else {
4217 4233 ip_xmit_attr_t *ixa;
4218 4234
4219 4235 /*
4220 4236 * We have to allocate an ip_xmit_attr_t before we grab
4221 4237 * conn_lock and we need to hold conn_lock once we've
4222 4238 * checked conn_same_as_last_v6 to handle concurrent
4223 4239 * send* calls on a socket.
4224 4240 */
4225 4241 ixa = conn_get_ixa(connp, B_FALSE);
4226 4242 if (ixa == NULL) {
4227 4243 error = ENOMEM;
4228 4244 goto ud_error2;
4229 4245 }
4230 4246 mutex_enter(&connp->conn_lock);
4231 4247
4232 4248 if (conn_same_as_last_v6(connp, sin6) &&
4233 4249 connp->conn_lastsrcid == srcid &&
4234 4250 ipsec_outbound_policy_current(ixa)) {
4235 4251 /* icmp_output_lastdst drops conn_lock */
4236 4252 error = icmp_output_lastdst(connp, data_mp, cr,
4237 4253 pid, ixa);
4238 4254 } else {
4239 4255 /* icmp_output_newdst drops conn_lock */
4240 4256 error = icmp_output_newdst(connp, data_mp, NULL,
4241 4257 sin6, cr, pid, ixa);
4242 4258 }
4243 4259 ASSERT(MUTEX_NOT_HELD(&connp->conn_lock));
4244 4260 }
4245 4261 if (error == 0) {
4246 4262 freeb(mp);
4247 4263 return;
4248 4264 }
4249 4265 break;
4250 4266
4251 4267 case AF_INET:
4252 4268 sin = (sin_t *)addr;
4253 4269 if ((!OK_32PTR((char *)sin) || addrlen != sizeof (sin_t)) ||
4254 4270 (sin->sin_family != AF_INET)) {
4255 4271 error = EADDRNOTAVAIL;
4256 4272 goto ud_error2;
4257 4273 }
4258 4274 if (sin->sin_addr.s_addr == INADDR_ANY)
4259 4275 sin->sin_addr.s_addr = htonl(INADDR_LOOPBACK);
4260 4276
4261 4277 /* Protocol 255 contains full IP headers */
4262 4278 /* Read without holding lock */
4263 4279 if (icmp->icmp_hdrincl) {
4264 4280 if (MBLKL(data_mp) < IP_SIMPLE_HDR_LENGTH) {
4265 4281 if (!pullupmsg(data_mp, IP_SIMPLE_HDR_LENGTH)) {
4266 4282 error = EINVAL;
4267 4283 goto ud_error2;
4268 4284 }
4269 4285 }
4270 4286 error = icmp_output_hdrincl(connp, data_mp, cr, pid);
4271 4287 if (error == 0) {
4272 4288 freeb(mp);
4273 4289 return;
4274 4290 }
4275 4291 /* data_mp consumed above */
4276 4292 data_mp = NULL;
4277 4293 goto ud_error2;
4278 4294 }
4279 4295
4280 4296 if (tudr->OPT_length != 0) {
4281 4297 /*
4282 4298 * If we are connected then the destination needs to be
4283 4299 * the same as the connected one.
4284 4300 */
4285 4301 if (icmp->icmp_state == TS_DATA_XFER &&
4286 4302 !conn_same_as_last_v4(connp, sin)) {
4287 4303 error = EISCONN;
4288 4304 goto ud_error2;
4289 4305 }
4290 4306 error = icmp_output_ancillary(connp, sin, NULL,
4291 4307 data_mp, mp, NULL, cr, pid);
4292 4308 } else {
4293 4309 ip_xmit_attr_t *ixa;
4294 4310
4295 4311 /*
4296 4312 * We have to allocate an ip_xmit_attr_t before we grab
4297 4313 * conn_lock and we need to hold conn_lock once we've
4298 4314 * checked conn_same_as_last_v4 to handle concurrent
4299 4315 * send* calls on a socket.
4300 4316 */
4301 4317 ixa = conn_get_ixa(connp, B_FALSE);
4302 4318 if (ixa == NULL) {
4303 4319 error = ENOMEM;
4304 4320 goto ud_error2;
4305 4321 }
4306 4322 mutex_enter(&connp->conn_lock);
4307 4323
4308 4324 if (conn_same_as_last_v4(connp, sin) &&
4309 4325 ipsec_outbound_policy_current(ixa)) {
4310 4326 /* icmp_output_lastdst drops conn_lock */
4311 4327 error = icmp_output_lastdst(connp, data_mp, cr,
4312 4328 pid, ixa);
4313 4329 } else {
4314 4330 /* icmp_output_newdst drops conn_lock */
4315 4331 error = icmp_output_newdst(connp, data_mp, sin,
4316 4332 NULL, cr, pid, ixa);
4317 4333 }
4318 4334 ASSERT(MUTEX_NOT_HELD(&connp->conn_lock));
4319 4335 }
4320 4336 if (error == 0) {
4321 4337 freeb(mp);
4322 4338 return;
4323 4339 }
4324 4340 break;
4325 4341 }
4326 4342 ASSERT(mp != NULL);
4327 4343 /* mp is freed by the following routine */
4328 4344 icmp_ud_err(q, mp, (t_scalar_t)error);
4329 4345 return;
4330 4346
4331 4347 ud_error2:
4332 4348 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors);
4333 4349 freemsg(data_mp);
4334 4350 ASSERT(mp != NULL);
4335 4351 /* mp is freed by the following routine */
4336 4352 icmp_ud_err(q, mp, (t_scalar_t)error);
4337 4353 }
4338 4354
4339 4355 /*
4340 4356 * Handle the case of the IP address or flow label being different
4341 4357 * for both IPv4 and IPv6.
4342 4358 *
4343 4359 * NOTE: The caller must hold conn_lock and we drop it here.
4344 4360 */
4345 4361 static int
4346 4362 icmp_output_newdst(conn_t *connp, mblk_t *data_mp, sin_t *sin, sin6_t *sin6,
4347 4363 cred_t *cr, pid_t pid, ip_xmit_attr_t *ixa)
4348 4364 {
4349 4365 icmp_t *icmp = connp->conn_icmp;
4350 4366 icmp_stack_t *is = icmp->icmp_is;
4351 4367 int error;
4352 4368 ip_xmit_attr_t *oldixa;
4353 4369 boolean_t do_ipsec;
4354 4370 uint_t srcid;
4355 4371 uint32_t flowinfo;
4356 4372 in6_addr_t v6src;
4357 4373 in6_addr_t v6dst;
4358 4374 in6_addr_t v6nexthop;
4359 4375 in_port_t dstport;
4360 4376
4361 4377 ASSERT(MUTEX_HELD(&connp->conn_lock));
4362 4378 ASSERT(ixa != NULL);
4363 4379
4364 4380 /*
4365 4381 * We hold conn_lock across all the use and modifications of
4366 4382 * the conn_lastdst, conn_ixa, and conn_xmit_ipp to ensure that they
4367 4383 * stay consistent.
4368 4384 */
4369 4385
4370 4386 ASSERT(cr != NULL);
4371 4387 ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED));
4372 4388 ixa->ixa_cred = cr;
4373 4389 ixa->ixa_cpid = pid;
4374 4390 if (is_system_labeled()) {
4375 4391 /* We need to restart with a label based on the cred */
4376 4392 ip_xmit_attr_restore_tsl(ixa, ixa->ixa_cred);
4377 4393 }
4378 4394 /*
4379 4395 * If we are connected then the destination needs to be the
4380 4396 * same as the connected one, which is not the case here since we
4381 4397 * checked for that above.
4382 4398 */
4383 4399 if (icmp->icmp_state == TS_DATA_XFER) {
4384 4400 mutex_exit(&connp->conn_lock);
4385 4401 error = EISCONN;
4386 4402 goto ud_error;
4387 4403 }
4388 4404
4389 4405 /* In case previous destination was multicast or multirt */
4390 4406 ip_attr_newdst(ixa);
4391 4407
4392 4408 /*
4393 4409 * If laddr is unspecified then we look at sin6_src_id.
4394 4410 * We will give precedence to a source address set with IPV6_PKTINFO
4395 4411 * (aka IPPF_ADDR) but that is handled in build_hdrs. However, we don't
4396 4412 * want ip_attr_connect to select a source (since it can fail) when
4397 4413 * IPV6_PKTINFO is specified.
4398 4414 * If this doesn't result in a source address then we get a source
4399 4415 * from ip_attr_connect() below.
4400 4416 */
4401 4417 v6src = connp->conn_saddr_v6;
4402 4418 if (sin != NULL) {
4403 4419 IN6_IPADDR_TO_V4MAPPED(sin->sin_addr.s_addr, &v6dst);
4404 4420 dstport = sin->sin_port;
4405 4421 flowinfo = 0;
4406 4422 srcid = 0;
4407 4423 ixa->ixa_flags &= ~IXAF_SCOPEID_SET;
4408 4424 if (srcid != 0 && V4_PART_OF_V6(&v6src) == INADDR_ANY) {
4409 4425 ip_srcid_find_id(srcid, &v6src, IPCL_ZONEID(connp),
4410 4426 connp->conn_netstack);
4411 4427 }
4412 4428 ixa->ixa_flags |= IXAF_IS_IPV4;
4413 4429 } else {
4414 4430 v6dst = sin6->sin6_addr;
4415 4431 dstport = sin6->sin6_port;
4416 4432 flowinfo = sin6->sin6_flowinfo;
4417 4433 srcid = sin6->__sin6_src_id;
4418 4434 if (IN6_IS_ADDR_LINKSCOPE(&v6dst) && sin6->sin6_scope_id != 0) {
4419 4435 ixa->ixa_scopeid = sin6->sin6_scope_id;
4420 4436 ixa->ixa_flags |= IXAF_SCOPEID_SET;
4421 4437 } else {
4422 4438 ixa->ixa_flags &= ~IXAF_SCOPEID_SET;
4423 4439 }
4424 4440 if (srcid != 0 && IN6_IS_ADDR_UNSPECIFIED(&v6src)) {
4425 4441 ip_srcid_find_id(srcid, &v6src, IPCL_ZONEID(connp),
4426 4442 connp->conn_netstack);
4427 4443 }
4428 4444 if (IN6_IS_ADDR_V4MAPPED(&v6dst))
4429 4445 ixa->ixa_flags |= IXAF_IS_IPV4;
4430 4446 else
4431 4447 ixa->ixa_flags &= ~IXAF_IS_IPV4;
4432 4448 }
4433 4449 /* Handle IP_PKTINFO/IPV6_PKTINFO setting source address. */
4434 4450 if (connp->conn_xmit_ipp.ipp_fields & IPPF_ADDR) {
4435 4451 ip_pkt_t *ipp = &connp->conn_xmit_ipp;
4436 4452
4437 4453 if (ixa->ixa_flags & IXAF_IS_IPV4) {
4438 4454 if (IN6_IS_ADDR_V4MAPPED(&ipp->ipp_addr))
4439 4455 v6src = ipp->ipp_addr;
4440 4456 } else {
4441 4457 if (!IN6_IS_ADDR_V4MAPPED(&ipp->ipp_addr))
4442 4458 v6src = ipp->ipp_addr;
4443 4459 }
4444 4460 }
4445 4461
4446 4462 /* Defer IPsec if it might need to look at ICMP type/code */
4447 4463 switch (ixa->ixa_protocol) {
4448 4464 case IPPROTO_ICMP:
4449 4465 case IPPROTO_ICMPV6:
4450 4466 do_ipsec = B_FALSE;
4451 4467 break;
4452 4468 default:
4453 4469 do_ipsec = B_TRUE;
4454 4470 }
4455 4471
4456 4472 ip_attr_nexthop(&connp->conn_xmit_ipp, ixa, &v6dst, &v6nexthop);
4457 4473 mutex_exit(&connp->conn_lock);
4458 4474
4459 4475 error = ip_attr_connect(connp, ixa, &v6src, &v6dst, &v6nexthop, dstport,
4460 4476 &v6src, NULL, IPDF_ALLOW_MCBC | IPDF_VERIFY_DST |
4461 4477 (do_ipsec ? IPDF_IPSEC : 0));
4462 4478 switch (error) {
4463 4479 case 0:
4464 4480 break;
4465 4481 case EADDRNOTAVAIL:
4466 4482 /*
4467 4483 * IXAF_VERIFY_SOURCE tells us to pick a better source.
4468 4484 * Don't have the application see that errno
4469 4485 */
4470 4486 error = ENETUNREACH;
4471 4487 goto failed;
4472 4488 case ENETDOWN:
4473 4489 /*
4474 4490 * Have !ipif_addr_ready address; drop packet silently
4475 4491 * until we can get applications to not send until we
4476 4492 * are ready.
4477 4493 */
4478 4494 error = 0;
4479 4495 goto failed;
4480 4496 case EHOSTUNREACH:
4481 4497 case ENETUNREACH:
4482 4498 if (ixa->ixa_ire != NULL) {
4483 4499 /*
4484 4500 * Let conn_ip_output/ire_send_noroute return
4485 4501 * the error and send any local ICMP error.
4486 4502 */
4487 4503 error = 0;
4488 4504 break;
4489 4505 }
4490 4506 /* FALLTHRU */
4491 4507 default:
4492 4508 failed:
4493 4509 goto ud_error;
4494 4510 }
4495 4511
4496 4512 mutex_enter(&connp->conn_lock);
4497 4513 /*
4498 4514 * While we dropped the lock some other thread might have connected
4499 4515 * this socket. If so we bail out with EISCONN to ensure that the
4500 4516 * connecting thread is the one that updates conn_ixa, conn_ht_*
4501 4517 * and conn_*last*.
4502 4518 */
4503 4519 if (icmp->icmp_state == TS_DATA_XFER) {
4504 4520 mutex_exit(&connp->conn_lock);
4505 4521 error = EISCONN;
4506 4522 goto ud_error;
4507 4523 }
4508 4524
4509 4525 /*
4510 4526 * We need to rebuild the headers if
4511 4527 * - we are labeling packets (could be different for different
4512 4528 * destinations)
4513 4529 * - we have a source route (or routing header) since we need to
4514 4530 * massage that to get the pseudo-header checksum
4515 4531 * - a socket option with COA_HEADER_CHANGED has been set which
4516 4532 * set conn_v6lastdst to zero.
4517 4533 *
4518 4534 * Otherwise the prepend function will just update the src, dst,
4519 4535 * and flow label.
4520 4536 */
4521 4537 if (is_system_labeled()) {
4522 4538 /* TX MLP requires SCM_UCRED and don't have that here */
4523 4539 if (connp->conn_mlp_type != mlptSingle) {
4524 4540 mutex_exit(&connp->conn_lock);
4525 4541 error = ECONNREFUSED;
4526 4542 goto ud_error;
4527 4543 }
4528 4544 /*
4529 4545 * Check whether Trusted Solaris policy allows communication
4530 4546 * with this host, and pretend that the destination is
4531 4547 * unreachable if not.
4532 4548 * Compute any needed label and place it in ipp_label_v4/v6.
4533 4549 *
4534 4550 * Later conn_build_hdr_template/conn_prepend_hdr takes
4535 4551 * ipp_label_v4/v6 to form the packet.
4536 4552 *
4537 4553 * Tsol note: Since we hold conn_lock we know no other
4538 4554 * thread manipulates conn_xmit_ipp.
4539 4555 */
4540 4556 error = conn_update_label(connp, ixa, &v6dst,
4541 4557 &connp->conn_xmit_ipp);
4542 4558 if (error != 0) {
4543 4559 mutex_exit(&connp->conn_lock);
4544 4560 goto ud_error;
4545 4561 }
4546 4562 /* Rebuild the header template */
4547 4563 error = icmp_build_hdr_template(connp, &v6src, &v6dst,
4548 4564 flowinfo);
4549 4565 if (error != 0) {
4550 4566 mutex_exit(&connp->conn_lock);
4551 4567 goto ud_error;
4552 4568 }
4553 4569 } else if (connp->conn_xmit_ipp.ipp_fields &
4554 4570 (IPPF_IPV4_OPTIONS|IPPF_RTHDR) ||
4555 4571 IN6_IS_ADDR_UNSPECIFIED(&connp->conn_v6lastdst)) {
4556 4572 /* Rebuild the header template */
4557 4573 error = icmp_build_hdr_template(connp, &v6src, &v6dst,
4558 4574 flowinfo);
4559 4575 if (error != 0) {
4560 4576 mutex_exit(&connp->conn_lock);
4561 4577 goto ud_error;
4562 4578 }
4563 4579 } else {
4564 4580 /* Simply update the destination address if no source route */
4565 4581 if (ixa->ixa_flags & IXAF_IS_IPV4) {
4566 4582 ipha_t *ipha = (ipha_t *)connp->conn_ht_iphc;
4567 4583
4568 4584 IN6_V4MAPPED_TO_IPADDR(&v6dst, ipha->ipha_dst);
4569 4585 if (ixa->ixa_flags & IXAF_PMTU_IPV4_DF) {
4570 4586 ipha->ipha_fragment_offset_and_flags |=
4571 4587 IPH_DF_HTONS;
4572 4588 } else {
4573 4589 ipha->ipha_fragment_offset_and_flags &=
4574 4590 ~IPH_DF_HTONS;
4575 4591 }
4576 4592 } else {
4577 4593 ip6_t *ip6h = (ip6_t *)connp->conn_ht_iphc;
4578 4594 ip6h->ip6_dst = v6dst;
4579 4595 }
4580 4596 }
4581 4597
4582 4598 /*
4583 4599 * Remember the dst etc which corresponds to the built header
4584 4600 * template and conn_ixa.
4585 4601 */
4586 4602 oldixa = conn_replace_ixa(connp, ixa);
4587 4603 connp->conn_v6lastdst = v6dst;
4588 4604 connp->conn_lastflowinfo = flowinfo;
4589 4605 connp->conn_lastscopeid = ixa->ixa_scopeid;
4590 4606 connp->conn_lastsrcid = srcid;
4591 4607 /* Also remember a source to use together with lastdst */
4592 4608 connp->conn_v6lastsrc = v6src;
4593 4609
4594 4610 data_mp = icmp_prepend_header_template(connp, ixa, data_mp, &v6src,
4595 4611 flowinfo, &error);
4596 4612
4597 4613 /* Done with conn_t */
4598 4614 mutex_exit(&connp->conn_lock);
4599 4615 ixa_refrele(oldixa);
4600 4616
4601 4617 if (data_mp == NULL) {
4602 4618 ASSERT(error != 0);
4603 4619 goto ud_error;
4604 4620 }
4605 4621
4606 4622 if (!do_ipsec) {
4607 4623 /* Policy might differ for different ICMP type/code */
4608 4624 data_mp = icmp_output_attach_policy(data_mp, connp, ixa);
4609 4625 if (data_mp == NULL) {
4610 4626 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors);
4611 4627 error = EHOSTUNREACH; /* IPsec policy failure */
4612 4628 goto done;
4613 4629 }
4614 4630 }
4615 4631
4616 4632 /* We're done. Pass the packet to ip. */
4617 4633 BUMP_MIB(&is->is_rawip_mib, rawipOutDatagrams);
4618 4634
4619 4635 error = conn_ip_output(data_mp, ixa);
4620 4636 /* No rawipOutErrors if an error since IP increases its error counter */
4621 4637 switch (error) {
4622 4638 case 0:
4623 4639 break;
4624 4640 case EWOULDBLOCK:
4625 4641 (void) ixa_check_drain_insert(connp, ixa);
4626 4642 error = 0;
4627 4643 break;
4628 4644 case EADDRNOTAVAIL:
4629 4645 /*
4630 4646 * IXAF_VERIFY_SOURCE tells us to pick a better source.
4631 4647 * Don't have the application see that errno
4632 4648 */
4633 4649 error = ENETUNREACH;
4634 4650 /* FALLTHRU */
4635 4651 default:
4636 4652 mutex_enter(&connp->conn_lock);
4637 4653 /*
4638 4654 * Clear the source and v6lastdst so we call ip_attr_connect
4639 4655 * for the next packet and try to pick a better source.
4640 4656 */
4641 4657 if (connp->conn_mcbc_bind)
4642 4658 connp->conn_saddr_v6 = ipv6_all_zeros;
4643 4659 else
4644 4660 connp->conn_saddr_v6 = connp->conn_bound_addr_v6;
4645 4661 connp->conn_v6lastdst = ipv6_all_zeros;
4646 4662 mutex_exit(&connp->conn_lock);
4647 4663 break;
4648 4664 }
4649 4665 done:
4650 4666 ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED));
4651 4667 ixa->ixa_cred = connp->conn_cred; /* Restore */
4652 4668 ixa->ixa_cpid = connp->conn_cpid;
4653 4669 ixa_refrele(ixa);
4654 4670 return (error);
4655 4671
4656 4672 ud_error:
4657 4673 ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED));
4658 4674 ixa->ixa_cred = connp->conn_cred; /* Restore */
4659 4675 ixa->ixa_cpid = connp->conn_cpid;
4660 4676 ixa_refrele(ixa);
4661 4677
4662 4678 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors);
4663 4679 freemsg(data_mp);
4664 4680 return (error);
4665 4681 }
4666 4682
4667 4683 /* ARGSUSED */
4668 4684 static void
4669 4685 icmp_wput_fallback(queue_t *q, mblk_t *mp)
4670 4686 {
4671 4687 #ifdef DEBUG
4672 4688 cmn_err(CE_CONT, "icmp_wput_fallback: Message during fallback \n");
4673 4689 #endif
4674 4690 freemsg(mp);
4675 4691 }
4676 4692
4677 4693 static void
4678 4694 icmp_wput_other(queue_t *q, mblk_t *mp)
4679 4695 {
4680 4696 uchar_t *rptr = mp->b_rptr;
4681 4697 struct iocblk *iocp;
4682 4698 conn_t *connp = Q_TO_CONN(q);
4683 4699 icmp_t *icmp = connp->conn_icmp;
4684 4700 cred_t *cr;
4685 4701
4686 4702 switch (mp->b_datap->db_type) {
4687 4703 case M_PROTO:
4688 4704 case M_PCPROTO:
4689 4705 if (mp->b_wptr - rptr < sizeof (t_scalar_t)) {
4690 4706 /*
4691 4707 * If the message does not contain a PRIM_type,
4692 4708 * throw it away.
4693 4709 */
4694 4710 freemsg(mp);
4695 4711 return;
4696 4712 }
4697 4713 switch (((t_primp_t)rptr)->type) {
4698 4714 case T_ADDR_REQ:
4699 4715 icmp_addr_req(q, mp);
4700 4716 return;
4701 4717 case O_T_BIND_REQ:
4702 4718 case T_BIND_REQ:
4703 4719 icmp_tpi_bind(q, mp);
4704 4720 return;
4705 4721 case T_CONN_REQ:
4706 4722 icmp_tpi_connect(q, mp);
4707 4723 return;
4708 4724 case T_CAPABILITY_REQ:
4709 4725 icmp_capability_req(q, mp);
4710 4726 return;
4711 4727 case T_INFO_REQ:
4712 4728 icmp_info_req(q, mp);
4713 4729 return;
4714 4730 case T_UNITDATA_REQ:
4715 4731 /*
4716 4732 * If a T_UNITDATA_REQ gets here, the address must
4717 4733 * be bad. Valid T_UNITDATA_REQs are handled
4718 4734 * in icmp_wput.
4719 4735 */
4720 4736 icmp_ud_err(q, mp, EADDRNOTAVAIL);
4721 4737 return;
4722 4738 case T_UNBIND_REQ:
4723 4739 icmp_tpi_unbind(q, mp);
4724 4740 return;
4725 4741 case T_SVR4_OPTMGMT_REQ:
4726 4742 /*
4727 4743 * All Solaris components should pass a db_credp
4728 4744 * for this TPI message, hence we ASSERT.
4729 4745 * But in case there is some other M_PROTO that looks
4730 4746 * like a TPI message sent by some other kernel
4731 4747 * component, we check and return an error.
4732 4748 */
4733 4749 cr = msg_getcred(mp, NULL);
4734 4750 ASSERT(cr != NULL);
4735 4751 if (cr == NULL) {
4736 4752 icmp_err_ack(q, mp, TSYSERR, EINVAL);
4737 4753 return;
4738 4754 }
4739 4755
4740 4756 if (!snmpcom_req(q, mp, icmp_snmp_set, ip_snmp_get,
4741 4757 cr)) {
4742 4758 svr4_optcom_req(q, mp, cr, &icmp_opt_obj);
4743 4759 }
4744 4760 return;
4745 4761
4746 4762 case T_OPTMGMT_REQ:
4747 4763 /*
4748 4764 * All Solaris components should pass a db_credp
4749 4765 * for this TPI message, hence we ASSERT.
4750 4766 * But in case there is some other M_PROTO that looks
4751 4767 * like a TPI message sent by some other kernel
4752 4768 * component, we check and return an error.
4753 4769 */
4754 4770 cr = msg_getcred(mp, NULL);
4755 4771 ASSERT(cr != NULL);
4756 4772 if (cr == NULL) {
4757 4773 icmp_err_ack(q, mp, TSYSERR, EINVAL);
4758 4774 return;
4759 4775 }
4760 4776 tpi_optcom_req(q, mp, cr, &icmp_opt_obj);
4761 4777 return;
4762 4778
4763 4779 case T_DISCON_REQ:
4764 4780 icmp_tpi_disconnect(q, mp);
4765 4781 return;
4766 4782
4767 4783 /* The following TPI message is not supported by icmp. */
4768 4784 case O_T_CONN_RES:
4769 4785 case T_CONN_RES:
4770 4786 icmp_err_ack(q, mp, TNOTSUPPORT, 0);
4771 4787 return;
4772 4788
4773 4789 /* The following 3 TPI requests are illegal for icmp. */
4774 4790 case T_DATA_REQ:
4775 4791 case T_EXDATA_REQ:
4776 4792 case T_ORDREL_REQ:
4777 4793 icmp_err_ack(q, mp, TNOTSUPPORT, 0);
4778 4794 return;
4779 4795 default:
4780 4796 break;
4781 4797 }
4782 4798 break;
4783 4799 case M_FLUSH:
4784 4800 if (*rptr & FLUSHW)
4785 4801 flushq(q, FLUSHDATA);
4786 4802 break;
4787 4803 case M_IOCTL:
4788 4804 iocp = (struct iocblk *)mp->b_rptr;
4789 4805 switch (iocp->ioc_cmd) {
4790 4806 case TI_GETPEERNAME:
4791 4807 if (icmp->icmp_state != TS_DATA_XFER) {
4792 4808 /*
4793 4809 * If a default destination address has not
4794 4810 * been associated with the stream, then we
4795 4811 * don't know the peer's name.
4796 4812 */
4797 4813 iocp->ioc_error = ENOTCONN;
4798 4814 iocp->ioc_count = 0;
4799 4815 mp->b_datap->db_type = M_IOCACK;
4800 4816 qreply(q, mp);
4801 4817 return;
4802 4818 }
4803 4819 /* FALLTHRU */
4804 4820 case TI_GETMYNAME:
4805 4821 /*
4806 4822 * For TI_GETPEERNAME and TI_GETMYNAME, we first
4807 4823 * need to copyin the user's strbuf structure.
4808 4824 * Processing will continue in the M_IOCDATA case
4809 4825 * below.
4810 4826 */
4811 4827 mi_copyin(q, mp, NULL,
4812 4828 SIZEOF_STRUCT(strbuf, iocp->ioc_flag));
4813 4829 return;
4814 4830 default:
4815 4831 break;
4816 4832 }
4817 4833 break;
4818 4834 case M_IOCDATA:
4819 4835 icmp_wput_iocdata(q, mp);
4820 4836 return;
4821 4837 default:
4822 4838 /* Unrecognized messages are passed through without change. */
4823 4839 break;
4824 4840 }
4825 4841 ip_wput_nondata(q, mp);
4826 4842 }
4827 4843
4828 4844 /*
4829 4845 * icmp_wput_iocdata is called by icmp_wput_other to handle all M_IOCDATA
4830 4846 * messages.
4831 4847 */
4832 4848 static void
4833 4849 icmp_wput_iocdata(queue_t *q, mblk_t *mp)
4834 4850 {
4835 4851 mblk_t *mp1;
4836 4852 STRUCT_HANDLE(strbuf, sb);
4837 4853 uint_t addrlen;
4838 4854 conn_t *connp = Q_TO_CONN(q);
4839 4855 icmp_t *icmp = connp->conn_icmp;
4840 4856
4841 4857 /* Make sure it is one of ours. */
4842 4858 switch (((struct iocblk *)mp->b_rptr)->ioc_cmd) {
4843 4859 case TI_GETMYNAME:
4844 4860 case TI_GETPEERNAME:
4845 4861 break;
4846 4862 default:
4847 4863 ip_wput_nondata(q, mp);
4848 4864 return;
4849 4865 }
4850 4866
4851 4867 switch (mi_copy_state(q, mp, &mp1)) {
4852 4868 case -1:
4853 4869 return;
4854 4870 case MI_COPY_CASE(MI_COPY_IN, 1):
4855 4871 break;
4856 4872 case MI_COPY_CASE(MI_COPY_OUT, 1):
4857 4873 /*
4858 4874 * The address has been copied out, so now
4859 4875 * copyout the strbuf.
4860 4876 */
4861 4877 mi_copyout(q, mp);
4862 4878 return;
4863 4879 case MI_COPY_CASE(MI_COPY_OUT, 2):
4864 4880 /*
4865 4881 * The address and strbuf have been copied out.
4866 4882 * We're done, so just acknowledge the original
4867 4883 * M_IOCTL.
4868 4884 */
4869 4885 mi_copy_done(q, mp, 0);
4870 4886 return;
4871 4887 default:
4872 4888 /*
4873 4889 * Something strange has happened, so acknowledge
4874 4890 * the original M_IOCTL with an EPROTO error.
4875 4891 */
4876 4892 mi_copy_done(q, mp, EPROTO);
4877 4893 return;
4878 4894 }
4879 4895
4880 4896 /*
4881 4897 * Now we have the strbuf structure for TI_GETMYNAME
4882 4898 * and TI_GETPEERNAME. Next we copyout the requested
4883 4899 * address and then we'll copyout the strbuf.
4884 4900 */
4885 4901 STRUCT_SET_HANDLE(sb, ((struct iocblk *)mp->b_rptr)->ioc_flag,
4886 4902 (void *)mp1->b_rptr);
4887 4903
4888 4904 if (connp->conn_family == AF_INET)
4889 4905 addrlen = sizeof (sin_t);
4890 4906 else
4891 4907 addrlen = sizeof (sin6_t);
4892 4908
4893 4909 if (STRUCT_FGET(sb, maxlen) < addrlen) {
4894 4910 mi_copy_done(q, mp, EINVAL);
4895 4911 return;
4896 4912 }
4897 4913 switch (((struct iocblk *)mp->b_rptr)->ioc_cmd) {
4898 4914 case TI_GETMYNAME:
4899 4915 break;
4900 4916 case TI_GETPEERNAME:
4901 4917 if (icmp->icmp_state != TS_DATA_XFER) {
4902 4918 mi_copy_done(q, mp, ENOTCONN);
4903 4919 return;
4904 4920 }
4905 4921 break;
4906 4922 default:
4907 4923 mi_copy_done(q, mp, EPROTO);
4908 4924 return;
4909 4925 }
4910 4926 mp1 = mi_copyout_alloc(q, mp, STRUCT_FGETP(sb, buf), addrlen, B_TRUE);
4911 4927 if (!mp1)
4912 4928 return;
4913 4929
4914 4930 STRUCT_FSET(sb, len, addrlen);
4915 4931 switch (((struct iocblk *)mp->b_rptr)->ioc_cmd) {
4916 4932 case TI_GETMYNAME:
4917 4933 (void) conn_getsockname(connp, (struct sockaddr *)mp1->b_wptr,
4918 4934 &addrlen);
4919 4935 break;
4920 4936 case TI_GETPEERNAME:
4921 4937 (void) conn_getpeername(connp, (struct sockaddr *)mp1->b_wptr,
4922 4938 &addrlen);
4923 4939 break;
4924 4940 }
4925 4941 mp1->b_wptr += addrlen;
4926 4942 /* Copy out the address */
4927 4943 mi_copyout(q, mp);
4928 4944 }
4929 4945
4930 4946 void
4931 4947 icmp_ddi_g_init(void)
4932 4948 {
4933 4949 icmp_max_optsize = optcom_max_optsize(icmp_opt_obj.odb_opt_des_arr,
4934 4950 icmp_opt_obj.odb_opt_arr_cnt);
4935 4951
4936 4952 /*
4937 4953 * We want to be informed each time a stack is created or
4938 4954 * destroyed in the kernel, so we can maintain the
4939 4955 * set of icmp_stack_t's.
4940 4956 */
4941 4957 netstack_register(NS_ICMP, rawip_stack_init, NULL, rawip_stack_fini);
4942 4958 }
4943 4959
4944 4960 void
4945 4961 icmp_ddi_g_destroy(void)
4946 4962 {
4947 4963 netstack_unregister(NS_ICMP);
4948 4964 }
4949 4965
4950 4966 #define INET_NAME "ip"
4951 4967
4952 4968 /*
4953 4969 * Initialize the ICMP stack instance.
4954 4970 */
4955 4971 static void *
4956 4972 rawip_stack_init(netstackid_t stackid, netstack_t *ns)
4957 4973 {
4958 4974 icmp_stack_t *is;
4959 4975 int error = 0;
4960 4976 size_t arrsz;
4961 4977 major_t major;
4962 4978
4963 4979 is = (icmp_stack_t *)kmem_zalloc(sizeof (*is), KM_SLEEP);
4964 4980 is->is_netstack = ns;
4965 4981
4966 4982 arrsz = sizeof (icmp_propinfo_tbl);
4967 4983 is->is_propinfo_tbl = (mod_prop_info_t *)kmem_alloc(arrsz, KM_SLEEP);
4968 4984 bcopy(icmp_propinfo_tbl, is->is_propinfo_tbl, arrsz);
4969 4985
4970 4986 is->is_ksp = rawip_kstat_init(stackid);
4971 4987
4972 4988 major = mod_name_to_major(INET_NAME);
4973 4989 error = ldi_ident_from_major(major, &is->is_ldi_ident);
4974 4990 ASSERT(error == 0);
4975 4991 return (is);
4976 4992 }
4977 4993
4978 4994 /*
4979 4995 * Free the ICMP stack instance.
4980 4996 */
4981 4997 static void
4982 4998 rawip_stack_fini(netstackid_t stackid, void *arg)
4983 4999 {
4984 5000 icmp_stack_t *is = (icmp_stack_t *)arg;
4985 5001
4986 5002 kmem_free(is->is_propinfo_tbl, sizeof (icmp_propinfo_tbl));
4987 5003 is->is_propinfo_tbl = NULL;
4988 5004
4989 5005 rawip_kstat_fini(stackid, is->is_ksp);
4990 5006 is->is_ksp = NULL;
4991 5007 ldi_ident_release(is->is_ldi_ident);
4992 5008 kmem_free(is, sizeof (*is));
4993 5009 }
4994 5010
4995 5011 static void *
4996 5012 rawip_kstat_init(netstackid_t stackid) {
4997 5013 kstat_t *ksp;
4998 5014
4999 5015 rawip_named_kstat_t template = {
5000 5016 { "inDatagrams", KSTAT_DATA_UINT32, 0 },
5001 5017 { "inCksumErrs", KSTAT_DATA_UINT32, 0 },
5002 5018 { "inErrors", KSTAT_DATA_UINT32, 0 },
5003 5019 { "outDatagrams", KSTAT_DATA_UINT32, 0 },
5004 5020 { "outErrors", KSTAT_DATA_UINT32, 0 },
5005 5021 };
5006 5022
5007 5023 ksp = kstat_create_netstack("icmp", 0, "rawip", "mib2",
5008 5024 KSTAT_TYPE_NAMED,
5009 5025 NUM_OF_FIELDS(rawip_named_kstat_t),
5010 5026 0, stackid);
5011 5027 if (ksp == NULL || ksp->ks_data == NULL)
5012 5028 return (NULL);
5013 5029
5014 5030 bcopy(&template, ksp->ks_data, sizeof (template));
5015 5031 ksp->ks_update = rawip_kstat_update;
5016 5032 ksp->ks_private = (void *)(uintptr_t)stackid;
5017 5033
5018 5034 kstat_install(ksp);
5019 5035 return (ksp);
5020 5036 }
5021 5037
5022 5038 static void
5023 5039 rawip_kstat_fini(netstackid_t stackid, kstat_t *ksp)
5024 5040 {
5025 5041 if (ksp != NULL) {
5026 5042 ASSERT(stackid == (netstackid_t)(uintptr_t)ksp->ks_private);
5027 5043 kstat_delete_netstack(ksp, stackid);
5028 5044 }
5029 5045 }
5030 5046
5031 5047 static int
5032 5048 rawip_kstat_update(kstat_t *ksp, int rw)
5033 5049 {
5034 5050 rawip_named_kstat_t *rawipkp;
5035 5051 netstackid_t stackid = (netstackid_t)(uintptr_t)ksp->ks_private;
5036 5052 netstack_t *ns;
5037 5053 icmp_stack_t *is;
5038 5054
5039 5055 if ((ksp == NULL) || (ksp->ks_data == NULL))
5040 5056 return (EIO);
5041 5057
5042 5058 if (rw == KSTAT_WRITE)
5043 5059 return (EACCES);
5044 5060
5045 5061 rawipkp = (rawip_named_kstat_t *)ksp->ks_data;
5046 5062
5047 5063 ns = netstack_find_by_stackid(stackid);
5048 5064 if (ns == NULL)
5049 5065 return (-1);
5050 5066 is = ns->netstack_icmp;
5051 5067 if (is == NULL) {
5052 5068 netstack_rele(ns);
5053 5069 return (-1);
5054 5070 }
5055 5071 rawipkp->inDatagrams.value.ui32 = is->is_rawip_mib.rawipInDatagrams;
5056 5072 rawipkp->inCksumErrs.value.ui32 = is->is_rawip_mib.rawipInCksumErrs;
5057 5073 rawipkp->inErrors.value.ui32 = is->is_rawip_mib.rawipInErrors;
5058 5074 rawipkp->outDatagrams.value.ui32 = is->is_rawip_mib.rawipOutDatagrams;
5059 5075 rawipkp->outErrors.value.ui32 = is->is_rawip_mib.rawipOutErrors;
5060 5076 netstack_rele(ns);
5061 5077 return (0);
5062 5078 }
5063 5079
5064 5080 /* ARGSUSED */
5065 5081 int
5066 5082 rawip_accept(sock_lower_handle_t lproto_handle,
5067 5083 sock_lower_handle_t eproto_handle, sock_upper_handle_t sock_handle,
5068 5084 cred_t *cr)
5069 5085 {
5070 5086 return (EOPNOTSUPP);
5071 5087 }
5072 5088
5073 5089 /* ARGSUSED */
5074 5090 int
5075 5091 rawip_bind(sock_lower_handle_t proto_handle, struct sockaddr *sa,
5076 5092 socklen_t len, cred_t *cr)
5077 5093 {
5078 5094 conn_t *connp = (conn_t *)proto_handle;
5079 5095 int error;
5080 5096
5081 5097 /* All Solaris components should pass a cred for this operation. */
5082 5098 ASSERT(cr != NULL);
5083 5099
5084 5100 /* Binding to a NULL address really means unbind */
5085 5101 if (sa == NULL)
5086 5102 error = rawip_do_unbind(connp);
5087 5103 else
5088 5104 error = rawip_do_bind(connp, sa, len);
5089 5105
5090 5106 if (error < 0) {
5091 5107 if (error == -TOUTSTATE)
5092 5108 error = EINVAL;
5093 5109 else
5094 5110 error = proto_tlitosyserr(-error);
5095 5111 }
5096 5112 return (error);
5097 5113 }
5098 5114
5099 5115 static int
5100 5116 rawip_implicit_bind(conn_t *connp)
5101 5117 {
5102 5118 sin6_t sin6addr;
5103 5119 sin_t *sin;
5104 5120 sin6_t *sin6;
5105 5121 socklen_t len;
5106 5122 int error;
5107 5123
5108 5124 if (connp->conn_family == AF_INET) {
5109 5125 len = sizeof (struct sockaddr_in);
5110 5126 sin = (sin_t *)&sin6addr;
5111 5127 *sin = sin_null;
5112 5128 sin->sin_family = AF_INET;
5113 5129 sin->sin_addr.s_addr = INADDR_ANY;
5114 5130 } else {
5115 5131 ASSERT(connp->conn_family == AF_INET6);
5116 5132 len = sizeof (sin6_t);
5117 5133 sin6 = (sin6_t *)&sin6addr;
5118 5134 *sin6 = sin6_null;
5119 5135 sin6->sin6_family = AF_INET6;
5120 5136 V6_SET_ZERO(sin6->sin6_addr);
5121 5137 }
5122 5138
5123 5139 error = rawip_do_bind(connp, (struct sockaddr *)&sin6addr, len);
5124 5140
5125 5141 return ((error < 0) ? proto_tlitosyserr(-error) : error);
5126 5142 }
5127 5143
5128 5144 static int
5129 5145 rawip_unbind(conn_t *connp)
5130 5146 {
5131 5147 int error;
5132 5148
5133 5149 error = rawip_do_unbind(connp);
5134 5150 if (error < 0) {
5135 5151 error = proto_tlitosyserr(-error);
5136 5152 }
5137 5153 return (error);
5138 5154 }
5139 5155
5140 5156 /* ARGSUSED */
5141 5157 int
5142 5158 rawip_listen(sock_lower_handle_t proto_handle, int backlog, cred_t *cr)
5143 5159 {
5144 5160 return (EOPNOTSUPP);
5145 5161 }
5146 5162
5147 5163 int
5148 5164 rawip_connect(sock_lower_handle_t proto_handle, const struct sockaddr *sa,
5149 5165 socklen_t len, sock_connid_t *id, cred_t *cr)
5150 5166 {
5151 5167 conn_t *connp = (conn_t *)proto_handle;
5152 5168 icmp_t *icmp = connp->conn_icmp;
5153 5169 int error;
5154 5170 boolean_t did_bind = B_FALSE;
5155 5171 pid_t pid = curproc->p_pid;
5156 5172
5157 5173 /* All Solaris components should pass a cred for this operation. */
5158 5174 ASSERT(cr != NULL);
5159 5175
5160 5176 if (sa == NULL) {
5161 5177 /*
5162 5178 * Disconnect
5163 5179 * Make sure we are connected
5164 5180 */
5165 5181 if (icmp->icmp_state != TS_DATA_XFER)
5166 5182 return (EINVAL);
5167 5183
5168 5184 error = icmp_disconnect(connp);
5169 5185 return (error);
5170 5186 }
5171 5187
5172 5188 error = proto_verify_ip_addr(connp->conn_family, sa, len);
5173 5189 if (error != 0)
5174 5190 return (error);
5175 5191
5176 5192 /* do an implicit bind if necessary */
5177 5193 if (icmp->icmp_state == TS_UNBND) {
5178 5194 error = rawip_implicit_bind(connp);
5179 5195 /*
5180 5196 * We could be racing with an actual bind, in which case
5181 5197 * we would see EPROTO. We cross our fingers and try
5182 5198 * to connect.
5183 5199 */
5184 5200 if (!(error == 0 || error == EPROTO))
5185 5201 return (error);
5186 5202 did_bind = B_TRUE;
5187 5203 }
5188 5204
5189 5205 /*
5190 5206 * set SO_DGRAM_ERRIND
5191 5207 */
5192 5208 connp->conn_dgram_errind = B_TRUE;
5193 5209
5194 5210 error = rawip_do_connect(connp, sa, len, cr, pid);
5195 5211 if (error != 0 && did_bind) {
5196 5212 int unbind_err;
5197 5213
5198 5214 unbind_err = rawip_unbind(connp);
5199 5215 ASSERT(unbind_err == 0);
5200 5216 }
5201 5217
5202 5218 if (error == 0) {
5203 5219 *id = 0;
5204 5220 (*connp->conn_upcalls->su_connected)(connp->conn_upper_handle,
5205 5221 0, NULL, -1);
5206 5222 } else if (error < 0) {
5207 5223 error = proto_tlitosyserr(-error);
5208 5224 }
5209 5225 return (error);
5210 5226 }
5211 5227
5212 5228 /* ARGSUSED2 */
5213 5229 int
5214 5230 rawip_fallback(sock_lower_handle_t proto_handle, queue_t *q,
5215 5231 boolean_t direct_sockfs, so_proto_quiesced_cb_t quiesced_cb,
5216 5232 sock_quiesce_arg_t *arg)
5217 5233 {
5218 5234 conn_t *connp = (conn_t *)proto_handle;
5219 5235 icmp_t *icmp;
5220 5236 struct T_capability_ack tca;
5221 5237 struct sockaddr_in6 laddr, faddr;
5222 5238 socklen_t laddrlen, faddrlen;
5223 5239 short opts;
5224 5240 struct stroptions *stropt;
5225 5241 mblk_t *mp, *stropt_mp;
5226 5242 int error;
5227 5243
5228 5244 icmp = connp->conn_icmp;
5229 5245
5230 5246 stropt_mp = allocb_wait(sizeof (*stropt), BPRI_HI, STR_NOSIG, NULL);
5231 5247
5232 5248 /*
5233 5249 * setup the fallback stream that was allocated
5234 5250 */
5235 5251 connp->conn_dev = (dev_t)RD(q)->q_ptr;
5236 5252 connp->conn_minor_arena = WR(q)->q_ptr;
5237 5253
5238 5254 RD(q)->q_ptr = WR(q)->q_ptr = connp;
5239 5255
5240 5256 WR(q)->q_qinfo = &icmpwinit;
5241 5257
5242 5258 connp->conn_rq = RD(q);
5243 5259 connp->conn_wq = WR(q);
5244 5260
5245 5261 /* Notify stream head about options before sending up data */
5246 5262 stropt_mp->b_datap->db_type = M_SETOPTS;
5247 5263 stropt_mp->b_wptr += sizeof (*stropt);
5248 5264 stropt = (struct stroptions *)stropt_mp->b_rptr;
5249 5265 stropt->so_flags = SO_WROFF | SO_HIWAT;
5250 5266 stropt->so_wroff = connp->conn_wroff;
5251 5267 stropt->so_hiwat = connp->conn_rcvbuf;
5252 5268 putnext(RD(q), stropt_mp);
5253 5269
5254 5270 /*
5255 5271 * free helper stream
5256 5272 */
5257 5273 ip_free_helper_stream(connp);
5258 5274
5259 5275 /*
5260 5276 * Collect the information needed to sync with the sonode
5261 5277 */
5262 5278 icmp_do_capability_ack(icmp, &tca, TC1_INFO);
5263 5279
5264 5280 laddrlen = faddrlen = sizeof (sin6_t);
5265 5281 (void) rawip_getsockname((sock_lower_handle_t)connp,
5266 5282 (struct sockaddr *)&laddr, &laddrlen, CRED());
5267 5283 error = rawip_getpeername((sock_lower_handle_t)connp,
5268 5284 (struct sockaddr *)&faddr, &faddrlen, CRED());
5269 5285 if (error != 0)
5270 5286 faddrlen = 0;
5271 5287 opts = 0;
5272 5288 if (connp->conn_dgram_errind)
5273 5289 opts |= SO_DGRAM_ERRIND;
5274 5290 if (connp->conn_ixa->ixa_flags & IXAF_DONTROUTE)
5275 5291 opts |= SO_DONTROUTE;
5276 5292
5277 5293 mp = (*quiesced_cb)(connp->conn_upper_handle, arg, &tca,
5278 5294 (struct sockaddr *)&laddr, laddrlen,
5279 5295 (struct sockaddr *)&faddr, faddrlen, opts);
5280 5296
5281 5297 /*
5282 5298 * Attempts to send data up during fallback will result in it being
5283 5299 * queued in icmp_t. Now we push up any queued packets.
5284 5300 */
5285 5301 mutex_enter(&icmp->icmp_recv_lock);
5286 5302 if (mp != NULL) {
5287 5303 mp->b_next = icmp->icmp_fallback_queue_head;
5288 5304 icmp->icmp_fallback_queue_head = mp;
5289 5305 }
5290 5306 while (icmp->icmp_fallback_queue_head != NULL) {
5291 5307 mp = icmp->icmp_fallback_queue_head;
5292 5308 icmp->icmp_fallback_queue_head = mp->b_next;
5293 5309 mp->b_next = NULL;
5294 5310 mutex_exit(&icmp->icmp_recv_lock);
5295 5311 putnext(RD(q), mp);
5296 5312 mutex_enter(&icmp->icmp_recv_lock);
5297 5313 }
5298 5314 icmp->icmp_fallback_queue_tail = icmp->icmp_fallback_queue_head;
5299 5315
5300 5316 /*
5301 5317 * No longer a streams less socket
5302 5318 */
5303 5319 mutex_enter(&connp->conn_lock);
5304 5320 connp->conn_flags &= ~IPCL_NONSTR;
5305 5321 mutex_exit(&connp->conn_lock);
5306 5322
5307 5323 mutex_exit(&icmp->icmp_recv_lock);
5308 5324
5309 5325 ASSERT(icmp->icmp_fallback_queue_head == NULL &&
5310 5326 icmp->icmp_fallback_queue_tail == NULL);
5311 5327
5312 5328 ASSERT(connp->conn_ref >= 1);
5313 5329
5314 5330 return (0);
5315 5331 }
5316 5332
5317 5333 /* ARGSUSED2 */
5318 5334 sock_lower_handle_t
5319 5335 rawip_create(int family, int type, int proto, sock_downcalls_t **sock_downcalls,
5320 5336 uint_t *smodep, int *errorp, int flags, cred_t *credp)
5321 5337 {
5322 5338 conn_t *connp;
5323 5339
5324 5340 if (type != SOCK_RAW || (family != AF_INET && family != AF_INET6)) {
5325 5341 *errorp = EPROTONOSUPPORT;
5326 5342 return (NULL);
5327 5343 }
5328 5344
5329 5345 connp = rawip_do_open(family, credp, errorp, flags);
5330 5346 if (connp != NULL) {
5331 5347 connp->conn_flags |= IPCL_NONSTR;
5332 5348
5333 5349 mutex_enter(&connp->conn_lock);
5334 5350 connp->conn_state_flags &= ~CONN_INCIPIENT;
5335 5351 mutex_exit(&connp->conn_lock);
5336 5352 *sock_downcalls = &sock_rawip_downcalls;
5337 5353 *smodep = SM_ATOMIC;
5338 5354 } else {
5339 5355 ASSERT(*errorp != 0);
5340 5356 }
5341 5357
5342 5358 return ((sock_lower_handle_t)connp);
5343 5359 }
5344 5360
5345 5361 /* ARGSUSED3 */
5346 5362 void
5347 5363 rawip_activate(sock_lower_handle_t proto_handle,
5348 5364 sock_upper_handle_t sock_handle, sock_upcalls_t *sock_upcalls, int flags,
5349 5365 cred_t *cr)
5350 5366 {
5351 5367 conn_t *connp = (conn_t *)proto_handle;
5352 5368 struct sock_proto_props sopp;
5353 5369
5354 5370 /* All Solaris components should pass a cred for this operation. */
5355 5371 ASSERT(cr != NULL);
5356 5372
5357 5373 connp->conn_upcalls = sock_upcalls;
5358 5374 connp->conn_upper_handle = sock_handle;
5359 5375
5360 5376 sopp.sopp_flags = SOCKOPT_WROFF | SOCKOPT_RCVHIWAT | SOCKOPT_RCVLOWAT |
5361 5377 SOCKOPT_MAXBLK | SOCKOPT_MAXPSZ | SOCKOPT_MINPSZ;
5362 5378 sopp.sopp_wroff = connp->conn_wroff;
5363 5379 sopp.sopp_rxhiwat = connp->conn_rcvbuf;
5364 5380 sopp.sopp_rxlowat = connp->conn_rcvlowat;
5365 5381 sopp.sopp_maxblk = INFPSZ;
5366 5382 sopp.sopp_maxpsz = IP_MAXPACKET;
5367 5383 sopp.sopp_minpsz = (icmp_mod_info.mi_minpsz == 1) ? 0 :
5368 5384 icmp_mod_info.mi_minpsz;
5369 5385
5370 5386 (*connp->conn_upcalls->su_set_proto_props)
5371 5387 (connp->conn_upper_handle, &sopp);
5372 5388
5373 5389 icmp_bind_proto(connp->conn_icmp);
5374 5390 }
5375 5391
5376 5392 /* ARGSUSED3 */
5377 5393 int
5378 5394 rawip_getpeername(sock_lower_handle_t proto_handle, struct sockaddr *sa,
5379 5395 socklen_t *salenp, cred_t *cr)
5380 5396 {
5381 5397 conn_t *connp = (conn_t *)proto_handle;
5382 5398 icmp_t *icmp = connp->conn_icmp;
5383 5399 int error;
5384 5400
5385 5401 /* All Solaris components should pass a cred for this operation. */
5386 5402 ASSERT(cr != NULL);
5387 5403
5388 5404 mutex_enter(&connp->conn_lock);
5389 5405 if (icmp->icmp_state != TS_DATA_XFER)
5390 5406 error = ENOTCONN;
5391 5407 else
5392 5408 error = conn_getpeername(connp, sa, salenp);
5393 5409 mutex_exit(&connp->conn_lock);
5394 5410 return (error);
5395 5411 }
5396 5412
5397 5413 /* ARGSUSED3 */
5398 5414 int
5399 5415 rawip_getsockname(sock_lower_handle_t proto_handle, struct sockaddr *sa,
5400 5416 socklen_t *salenp, cred_t *cr)
5401 5417 {
5402 5418 conn_t *connp = (conn_t *)proto_handle;
5403 5419 int error;
5404 5420
5405 5421 /* All Solaris components should pass a cred for this operation. */
5406 5422 ASSERT(cr != NULL);
5407 5423
5408 5424 mutex_enter(&connp->conn_lock);
5409 5425 error = conn_getsockname(connp, sa, salenp);
5410 5426 mutex_exit(&connp->conn_lock);
5411 5427 return (error);
5412 5428 }
5413 5429
5414 5430 int
5415 5431 rawip_setsockopt(sock_lower_handle_t proto_handle, int level, int option_name,
5416 5432 const void *optvalp, socklen_t optlen, cred_t *cr)
5417 5433 {
5418 5434 conn_t *connp = (conn_t *)proto_handle;
5419 5435 int error;
5420 5436
5421 5437 /* All Solaris components should pass a cred for this operation. */
5422 5438 ASSERT(cr != NULL);
5423 5439
5424 5440 error = proto_opt_check(level, option_name, optlen, NULL,
5425 5441 icmp_opt_obj.odb_opt_des_arr,
5426 5442 icmp_opt_obj.odb_opt_arr_cnt,
5427 5443 B_TRUE, B_FALSE, cr);
5428 5444
5429 5445 if (error != 0) {
5430 5446 /*
5431 5447 * option not recognized
5432 5448 */
5433 5449 if (error < 0) {
5434 5450 error = proto_tlitosyserr(-error);
5435 5451 }
5436 5452 return (error);
5437 5453 }
5438 5454
5439 5455 error = icmp_opt_set(connp, SETFN_OPTCOM_NEGOTIATE, level,
5440 5456 option_name, optlen, (uchar_t *)optvalp, (uint_t *)&optlen,
5441 5457 (uchar_t *)optvalp, NULL, cr);
5442 5458
5443 5459 ASSERT(error >= 0);
5444 5460
5445 5461 return (error);
5446 5462 }
5447 5463
5448 5464 int
5449 5465 rawip_getsockopt(sock_lower_handle_t proto_handle, int level, int option_name,
5450 5466 void *optvalp, socklen_t *optlen, cred_t *cr)
5451 5467 {
5452 5468 int error;
5453 5469 conn_t *connp = (conn_t *)proto_handle;
5454 5470 t_uscalar_t max_optbuf_len;
5455 5471 void *optvalp_buf;
5456 5472 int len;
5457 5473
5458 5474 /* All Solaris components should pass a cred for this operation. */
5459 5475 ASSERT(cr != NULL);
5460 5476
5461 5477 error = proto_opt_check(level, option_name, *optlen, &max_optbuf_len,
5462 5478 icmp_opt_obj.odb_opt_des_arr,
5463 5479 icmp_opt_obj.odb_opt_arr_cnt,
5464 5480 B_FALSE, B_TRUE, cr);
5465 5481
5466 5482 if (error != 0) {
5467 5483 if (error < 0) {
5468 5484 error = proto_tlitosyserr(-error);
5469 5485 }
5470 5486 return (error);
5471 5487 }
5472 5488
5473 5489 optvalp_buf = kmem_alloc(max_optbuf_len, KM_SLEEP);
5474 5490 len = icmp_opt_get(connp, level, option_name, optvalp_buf);
5475 5491 if (len == -1) {
5476 5492 kmem_free(optvalp_buf, max_optbuf_len);
5477 5493 return (EINVAL);
5478 5494 }
5479 5495
5480 5496 /*
5481 5497 * update optlen and copy option value
5482 5498 */
5483 5499 t_uscalar_t size = MIN(len, *optlen);
5484 5500
5485 5501 bcopy(optvalp_buf, optvalp, size);
5486 5502 bcopy(&size, optlen, sizeof (size));
5487 5503
5488 5504 kmem_free(optvalp_buf, max_optbuf_len);
5489 5505 return (0);
5490 5506 }
5491 5507
5492 5508 /* ARGSUSED1 */
5493 5509 int
5494 5510 rawip_close(sock_lower_handle_t proto_handle, int flags, cred_t *cr)
5495 5511 {
5496 5512 conn_t *connp = (conn_t *)proto_handle;
5497 5513
5498 5514 /* All Solaris components should pass a cred for this operation. */
5499 5515 ASSERT(cr != NULL);
5500 5516
5501 5517 (void) rawip_do_close(connp);
5502 5518 return (0);
5503 5519 }
5504 5520
5505 5521 /* ARGSUSED2 */
5506 5522 int
5507 5523 rawip_shutdown(sock_lower_handle_t proto_handle, int how, cred_t *cr)
5508 5524 {
5509 5525 conn_t *connp = (conn_t *)proto_handle;
5510 5526
5511 5527 /* All Solaris components should pass a cred for this operation. */
5512 5528 ASSERT(cr != NULL);
5513 5529
5514 5530 /* shut down the send side */
5515 5531 if (how != SHUT_RD)
5516 5532 (*connp->conn_upcalls->su_opctl)(connp->conn_upper_handle,
5517 5533 SOCK_OPCTL_SHUT_SEND, 0);
5518 5534 /* shut down the recv side */
5519 5535 if (how != SHUT_WR)
5520 5536 (*connp->conn_upcalls->su_opctl)(connp->conn_upper_handle,
5521 5537 SOCK_OPCTL_SHUT_RECV, 0);
5522 5538 return (0);
5523 5539 }
5524 5540
5525 5541 void
5526 5542 rawip_clr_flowctrl(sock_lower_handle_t proto_handle)
5527 5543 {
5528 5544 conn_t *connp = (conn_t *)proto_handle;
5529 5545 icmp_t *icmp = connp->conn_icmp;
5530 5546
5531 5547 mutex_enter(&icmp->icmp_recv_lock);
5532 5548 connp->conn_flow_cntrld = B_FALSE;
5533 5549 mutex_exit(&icmp->icmp_recv_lock);
5534 5550 }
5535 5551
5536 5552 int
5537 5553 rawip_ioctl(sock_lower_handle_t proto_handle, int cmd, intptr_t arg,
5538 5554 int mode, int32_t *rvalp, cred_t *cr)
5539 5555 {
5540 5556 conn_t *connp = (conn_t *)proto_handle;
5541 5557 int error;
5542 5558
5543 5559 /* All Solaris components should pass a cred for this operation. */
5544 5560 ASSERT(cr != NULL);
5545 5561
5546 5562 /*
5547 5563 * If we don't have a helper stream then create one.
5548 5564 * ip_create_helper_stream takes care of locking the conn_t,
5549 5565 * so this check for NULL is just a performance optimization.
5550 5566 */
5551 5567 if (connp->conn_helper_info == NULL) {
5552 5568 icmp_stack_t *is = connp->conn_icmp->icmp_is;
5553 5569
5554 5570 ASSERT(is->is_ldi_ident != NULL);
5555 5571
5556 5572 /*
5557 5573 * Create a helper stream for non-STREAMS socket.
5558 5574 */
5559 5575 error = ip_create_helper_stream(connp, is->is_ldi_ident);
5560 5576 if (error != 0) {
5561 5577 ip0dbg(("rawip_ioctl: create of IP helper stream "
5562 5578 "failed %d\n", error));
5563 5579 return (error);
5564 5580 }
5565 5581 }
5566 5582
5567 5583 switch (cmd) {
5568 5584 case _SIOCSOCKFALLBACK:
5569 5585 case TI_GETPEERNAME:
5570 5586 case TI_GETMYNAME:
5571 5587 #ifdef DEBUG
5572 5588 cmn_err(CE_CONT, "icmp_ioctl cmd 0x%x on non streams"
5573 5589 " socket", cmd);
5574 5590 #endif
5575 5591 error = EINVAL;
5576 5592 break;
5577 5593 default:
5578 5594 /*
5579 5595 * Pass on to IP using helper stream
5580 5596 */
5581 5597 error = ldi_ioctl(connp->conn_helper_info->iphs_handle,
5582 5598 cmd, arg, mode, cr, rvalp);
5583 5599 break;
5584 5600 }
5585 5601 return (error);
5586 5602 }
5587 5603
5588 5604 int
5589 5605 rawip_send(sock_lower_handle_t proto_handle, mblk_t *mp, struct nmsghdr *msg,
5590 5606 cred_t *cr)
5591 5607 {
5592 5608 sin6_t *sin6;
5593 5609 sin_t *sin = NULL;
5594 5610 uint_t srcid;
5595 5611 conn_t *connp = (conn_t *)proto_handle;
5596 5612 icmp_t *icmp = connp->conn_icmp;
5597 5613 int error = 0;
5598 5614 icmp_stack_t *is = icmp->icmp_is;
5599 5615 pid_t pid = curproc->p_pid;
5600 5616 ip_xmit_attr_t *ixa;
5601 5617
5602 5618 ASSERT(DB_TYPE(mp) == M_DATA);
5603 5619
5604 5620 /* All Solaris components should pass a cred for this operation. */
5605 5621 ASSERT(cr != NULL);
5606 5622
5607 5623 /* do an implicit bind if necessary */
5608 5624 if (icmp->icmp_state == TS_UNBND) {
5609 5625 error = rawip_implicit_bind(connp);
5610 5626 /*
5611 5627 * We could be racing with an actual bind, in which case
5612 5628 * we would see EPROTO. We cross our fingers and try
5613 5629 * to connect.
5614 5630 */
5615 5631 if (!(error == 0 || error == EPROTO)) {
5616 5632 freemsg(mp);
5617 5633 return (error);
5618 5634 }
5619 5635 }
5620 5636
5621 5637 /* Protocol 255 contains full IP headers */
5622 5638 /* Read without holding lock */
5623 5639 if (icmp->icmp_hdrincl) {
5624 5640 ASSERT(connp->conn_ipversion == IPV4_VERSION);
5625 5641 if (mp->b_wptr - mp->b_rptr < IP_SIMPLE_HDR_LENGTH) {
5626 5642 if (!pullupmsg(mp, IP_SIMPLE_HDR_LENGTH)) {
5627 5643 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors);
5628 5644 freemsg(mp);
5629 5645 return (EINVAL);
5630 5646 }
5631 5647 }
5632 5648 error = icmp_output_hdrincl(connp, mp, cr, pid);
5633 5649 if (is->is_sendto_ignerr)
5634 5650 return (0);
5635 5651 else
5636 5652 return (error);
5637 5653 }
5638 5654
5639 5655 /* Connected? */
5640 5656 if (msg->msg_name == NULL) {
5641 5657 if (icmp->icmp_state != TS_DATA_XFER) {
5642 5658 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors);
5643 5659 return (EDESTADDRREQ);
5644 5660 }
5645 5661 if (msg->msg_controllen != 0) {
5646 5662 error = icmp_output_ancillary(connp, NULL, NULL, mp,
5647 5663 NULL, msg, cr, pid);
5648 5664 } else {
5649 5665 error = icmp_output_connected(connp, mp, cr, pid);
5650 5666 }
5651 5667 if (is->is_sendto_ignerr)
5652 5668 return (0);
5653 5669 else
5654 5670 return (error);
5655 5671 }
5656 5672 if (icmp->icmp_state == TS_DATA_XFER) {
5657 5673 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors);
5658 5674 return (EISCONN);
5659 5675 }
5660 5676 error = proto_verify_ip_addr(connp->conn_family,
5661 5677 (struct sockaddr *)msg->msg_name, msg->msg_namelen);
5662 5678 if (error != 0) {
5663 5679 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors);
5664 5680 return (error);
5665 5681 }
5666 5682 switch (connp->conn_family) {
5667 5683 case AF_INET6:
5668 5684 sin6 = (sin6_t *)msg->msg_name;
5669 5685
5670 5686 /* No support for mapped addresses on raw sockets */
5671 5687 if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
5672 5688 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors);
5673 5689 return (EADDRNOTAVAIL);
5674 5690 }
5675 5691 srcid = sin6->__sin6_src_id;
5676 5692
5677 5693 /*
5678 5694 * If the local address is a mapped address return
5679 5695 * an error.
5680 5696 * It would be possible to send an IPv6 packet but the
5681 5697 * response would never make it back to the application
5682 5698 * since it is bound to a mapped address.
5683 5699 */
5684 5700 if (IN6_IS_ADDR_V4MAPPED(&connp->conn_saddr_v6)) {
5685 5701 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors);
5686 5702 return (EADDRNOTAVAIL);
5687 5703 }
5688 5704
5689 5705 if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr))
5690 5706 sin6->sin6_addr = ipv6_loopback;
5691 5707
5692 5708 /*
5693 5709 * We have to allocate an ip_xmit_attr_t before we grab
5694 5710 * conn_lock and we need to hold conn_lock once we've check
5695 5711 * conn_same_as_last_v6 to handle concurrent send* calls on a
5696 5712 * socket.
5697 5713 */
5698 5714 if (msg->msg_controllen == 0) {
5699 5715 ixa = conn_get_ixa(connp, B_FALSE);
5700 5716 if (ixa == NULL) {
5701 5717 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors);
5702 5718 return (ENOMEM);
5703 5719 }
5704 5720 } else {
5705 5721 ixa = NULL;
5706 5722 }
5707 5723 mutex_enter(&connp->conn_lock);
5708 5724 if (icmp->icmp_delayed_error != 0) {
5709 5725 sin6_t *sin2 = (sin6_t *)&icmp->icmp_delayed_addr;
5710 5726
5711 5727 error = icmp->icmp_delayed_error;
5712 5728 icmp->icmp_delayed_error = 0;
5713 5729
5714 5730 /* Compare IP address and family */
5715 5731
5716 5732 if (IN6_ARE_ADDR_EQUAL(&sin6->sin6_addr,
5717 5733 &sin2->sin6_addr) &&
5718 5734 sin6->sin6_family == sin2->sin6_family) {
5719 5735 mutex_exit(&connp->conn_lock);
5720 5736 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors);
5721 5737 if (ixa != NULL)
5722 5738 ixa_refrele(ixa);
5723 5739 return (error);
5724 5740 }
5725 5741 }
5726 5742 if (msg->msg_controllen != 0) {
5727 5743 mutex_exit(&connp->conn_lock);
5728 5744 ASSERT(ixa == NULL);
5729 5745 error = icmp_output_ancillary(connp, NULL, sin6, mp,
5730 5746 NULL, msg, cr, pid);
5731 5747 } else if (conn_same_as_last_v6(connp, sin6) &&
5732 5748 connp->conn_lastsrcid == srcid &&
5733 5749 ipsec_outbound_policy_current(ixa)) {
5734 5750 /* icmp_output_lastdst drops conn_lock */
5735 5751 error = icmp_output_lastdst(connp, mp, cr, pid, ixa);
5736 5752 } else {
5737 5753 /* icmp_output_newdst drops conn_lock */
5738 5754 error = icmp_output_newdst(connp, mp, NULL, sin6, cr,
5739 5755 pid, ixa);
5740 5756 }
5741 5757 ASSERT(MUTEX_NOT_HELD(&connp->conn_lock));
5742 5758 if (is->is_sendto_ignerr)
5743 5759 return (0);
5744 5760 else
5745 5761 return (error);
5746 5762 case AF_INET:
5747 5763 sin = (sin_t *)msg->msg_name;
5748 5764
5749 5765 if (sin->sin_addr.s_addr == INADDR_ANY)
5750 5766 sin->sin_addr.s_addr = htonl(INADDR_LOOPBACK);
5751 5767
5752 5768 /*
5753 5769 * We have to allocate an ip_xmit_attr_t before we grab
5754 5770 * conn_lock and we need to hold conn_lock once we've check
5755 5771 * conn_same_as_last_v6 to handle concurrent send* on a socket.
5756 5772 */
5757 5773 if (msg->msg_controllen == 0) {
5758 5774 ixa = conn_get_ixa(connp, B_FALSE);
5759 5775 if (ixa == NULL) {
5760 5776 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors);
5761 5777 return (ENOMEM);
5762 5778 }
5763 5779 } else {
5764 5780 ixa = NULL;
5765 5781 }
5766 5782 mutex_enter(&connp->conn_lock);
5767 5783 if (icmp->icmp_delayed_error != 0) {
5768 5784 sin_t *sin2 = (sin_t *)&icmp->icmp_delayed_addr;
5769 5785
5770 5786 error = icmp->icmp_delayed_error;
5771 5787 icmp->icmp_delayed_error = 0;
5772 5788
5773 5789 /* Compare IP address */
5774 5790
5775 5791 if (sin->sin_addr.s_addr == sin2->sin_addr.s_addr) {
5776 5792 mutex_exit(&connp->conn_lock);
5777 5793 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors);
5778 5794 if (ixa != NULL)
5779 5795 ixa_refrele(ixa);
5780 5796 return (error);
5781 5797 }
5782 5798 }
5783 5799
5784 5800 if (msg->msg_controllen != 0) {
5785 5801 mutex_exit(&connp->conn_lock);
5786 5802 ASSERT(ixa == NULL);
5787 5803 error = icmp_output_ancillary(connp, sin, NULL, mp,
5788 5804 NULL, msg, cr, pid);
5789 5805 } else if (conn_same_as_last_v4(connp, sin) &&
5790 5806 ipsec_outbound_policy_current(ixa)) {
5791 5807 /* icmp_output_lastdst drops conn_lock */
5792 5808 error = icmp_output_lastdst(connp, mp, cr, pid, ixa);
5793 5809 } else {
5794 5810 /* icmp_output_newdst drops conn_lock */
5795 5811 error = icmp_output_newdst(connp, mp, sin, NULL, cr,
5796 5812 pid, ixa);
5797 5813 }
5798 5814 ASSERT(MUTEX_NOT_HELD(&connp->conn_lock));
5799 5815 if (is->is_sendto_ignerr)
5800 5816 return (0);
5801 5817 else
5802 5818 return (error);
5803 5819 default:
5804 5820 return (EINVAL);
5805 5821 }
5806 5822 }
5807 5823
5808 5824 sock_downcalls_t sock_rawip_downcalls = {
5809 5825 rawip_activate,
5810 5826 rawip_accept,
5811 5827 rawip_bind,
5812 5828 rawip_listen,
5813 5829 rawip_connect,
5814 5830 rawip_getpeername,
5815 5831 rawip_getsockname,
5816 5832 rawip_getsockopt,
5817 5833 rawip_setsockopt,
5818 5834 rawip_send,
5819 5835 NULL,
5820 5836 NULL,
5821 5837 NULL,
5822 5838 rawip_shutdown,
5823 5839 rawip_clr_flowctrl,
5824 5840 rawip_ioctl,
5825 5841 rawip_close
5826 5842 };
↓ open down ↓ |
5561 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX