Print this page
%B
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/uts/common/inet/ip/ip_output.c
+++ new/usr/src/uts/common/inet/ip/ip_output.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21
22 22 /*
23 23 * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
24 24 */
25 25 /* Copyright (c) 1990 Mentat Inc. */
26 26
27 27 #include <sys/types.h>
28 28 #include <sys/stream.h>
29 29 #include <sys/strsubr.h>
30 30 #include <sys/dlpi.h>
31 31 #include <sys/strsun.h>
32 32 #include <sys/zone.h>
33 33 #include <sys/ddi.h>
34 34 #include <sys/sunddi.h>
35 35 #include <sys/cmn_err.h>
36 36 #include <sys/debug.h>
37 37 #include <sys/atomic.h>
38 38
39 39 #include <sys/systm.h>
40 40 #include <sys/param.h>
41 41 #include <sys/kmem.h>
42 42 #include <sys/sdt.h>
43 43 #include <sys/socket.h>
44 44 #include <sys/mac.h>
45 45 #include <net/if.h>
46 46 #include <net/if_arp.h>
47 47 #include <net/route.h>
48 48 #include <sys/sockio.h>
49 49 #include <netinet/in.h>
50 50 #include <net/if_dl.h>
51 51
52 52 #include <inet/common.h>
53 53 #include <inet/mi.h>
54 54 #include <inet/mib2.h>
55 55 #include <inet/nd.h>
56 56 #include <inet/arp.h>
57 57 #include <inet/snmpcom.h>
58 58 #include <inet/kstatcom.h>
59 59
60 60 #include <netinet/igmp_var.h>
61 61 #include <netinet/ip6.h>
62 62 #include <netinet/icmp6.h>
63 63 #include <netinet/sctp.h>
64 64
65 65 #include <inet/ip.h>
66 66 #include <inet/ip_impl.h>
67 67 #include <inet/ip6.h>
68 68 #include <inet/ip6_asp.h>
69 69 #include <inet/tcp.h>
70 70 #include <inet/ip_multi.h>
71 71 #include <inet/ip_if.h>
72 72 #include <inet/ip_ire.h>
73 73 #include <inet/ip_ftable.h>
74 74 #include <inet/ip_rts.h>
75 75 #include <inet/optcom.h>
76 76 #include <inet/ip_ndp.h>
77 77 #include <inet/ip_listutils.h>
78 78 #include <netinet/igmp.h>
79 79 #include <netinet/ip_mroute.h>
80 80 #include <inet/ipp_common.h>
81 81
82 82 #include <net/pfkeyv2.h>
83 83 #include <inet/sadb.h>
84 84 #include <inet/ipsec_impl.h>
85 85 #include <inet/ipdrop.h>
86 86 #include <inet/ip_netinfo.h>
87 87
88 88 #include <sys/pattr.h>
89 89 #include <inet/ipclassifier.h>
90 90 #include <inet/sctp_ip.h>
91 91 #include <inet/sctp/sctp_impl.h>
92 92 #include <inet/udp_impl.h>
93 93 #include <sys/sunddi.h>
94 94
95 95 #include <sys/tsol/label.h>
96 96 #include <sys/tsol/tnet.h>
97 97
98 98 #include <sys/clock_impl.h> /* For LBOLT_FASTPATH{,64} */
99 99
100 100 #ifdef DEBUG
101 101 extern boolean_t skip_sctp_cksum;
102 102 #endif
103 103
104 104 static int ip_verify_nce(mblk_t *, ip_xmit_attr_t *);
105 105 static int ip_verify_dce(mblk_t *, ip_xmit_attr_t *);
106 106 static boolean_t ip_verify_lso(ill_t *, ip_xmit_attr_t *);
107 107 static boolean_t ip_verify_zcopy(ill_t *, ip_xmit_attr_t *);
108 108 static void ip_output_simple_broadcast(ip_xmit_attr_t *, mblk_t *);
109 109
110 110 /*
111 111 * There are two types of output functions for IP used for different
112 112 * purposes:
113 113 * - ip_output_simple() is when sending ICMP errors, TCP resets, etc when there
114 114 * is no context in the form of a conn_t. However, there is a
115 115 * ip_xmit_attr_t that the callers use to influence interface selection
116 116 * (needed for ICMP echo as well as IPv6 link-locals) and IPsec.
117 117 *
118 118 * - conn_ip_output() is used when sending packets with a conn_t and
119 119 * ip_set_destination has been called to cache information. In that case
120 120 * various socket options are recorded in the ip_xmit_attr_t and should
121 121 * be taken into account.
122 122 */
123 123
124 124 /*
125 125 * The caller *must* have called conn_connect() or ip_attr_connect()
126 126 * before calling conn_ip_output(). The caller needs to redo that each time
127 127 * the destination IP address or port changes, as well as each time there is
128 128 * a change to any socket option that would modify how packets are routed out
129 129 * of the box (e.g., SO_DONTROUTE, IP_NEXTHOP, IP_BOUND_IF).
130 130 *
131 131 * The ULP caller has to serialize the use of a single ip_xmit_attr_t.
132 132 * We assert for that here.
133 133 */
134 134 int
135 135 conn_ip_output(mblk_t *mp, ip_xmit_attr_t *ixa)
136 136 {
137 137 iaflags_t ixaflags = ixa->ixa_flags;
138 138 ire_t *ire;
139 139 nce_t *nce;
140 140 dce_t *dce;
141 141 ill_t *ill;
142 142 ip_stack_t *ipst = ixa->ixa_ipst;
143 143 int error;
144 144
145 145 /* We defer ipIfStatsHCOutRequests until an error or we have an ill */
146 146
147 147 ASSERT(ixa->ixa_ire != NULL);
148 148 /* Note there is no ixa_nce when reject and blackhole routes */
149 149 ASSERT(ixa->ixa_dce != NULL); /* Could be default dce */
150 150
151 151 #ifdef DEBUG
152 152 ASSERT(ixa->ixa_curthread == NULL);
153 153 ixa->ixa_curthread = curthread;
154 154 #endif
155 155
156 156 /*
157 157 * Even on labeled systems we can have a NULL ixa_tsl e.g.,
158 158 * for IGMP/MLD traffic.
159 159 */
160 160
161 161 ire = ixa->ixa_ire;
162 162
163 163 /*
164 164 * If the ULP says the (old) IRE resulted in reachability we
165 165 * record this before determine whether to use a new IRE.
166 166 * No locking for performance reasons.
167 167 */
168 168 if (ixaflags & IXAF_REACH_CONF)
169 169 ire->ire_badcnt = 0;
170 170
171 171 /*
172 172 * Has routing changed since we cached the results of the lookup?
173 173 *
174 174 * This check captures all of:
175 175 * - the cached ire being deleted (by means of the special
176 176 * IRE_GENERATION_CONDEMNED)
177 177 * - A potentially better ire being added (ire_generation being
178 178 * increased)
179 179 * - A deletion of the nexthop ire that was used when we did the
180 180 * lookup.
181 181 * - An addition of a potentially better nexthop ire.
182 182 * The last two are handled by walking and increasing the generation
183 183 * number on all dependant IREs in ire_flush_cache().
184 184 *
185 185 * The check also handles all cases of RTF_REJECT and RTF_BLACKHOLE
186 186 * since we ensure that each time we set ixa_ire to such an IRE we
187 187 * make sure the ixa_ire_generation does not match (by using
188 188 * IRE_GENERATION_VERIFY).
189 189 */
190 190 if (ire->ire_generation != ixa->ixa_ire_generation) {
191 191 error = ip_verify_ire(mp, ixa);
192 192 if (error != 0) {
193 193 ip_drop_output("ipIfStatsOutDiscards - verify ire",
194 194 mp, NULL);
195 195 goto drop;
196 196 }
197 197 ire = ixa->ixa_ire;
198 198 ASSERT(ire != NULL);
199 199 if (ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)) {
200 200 #ifdef DEBUG
201 201 ASSERT(ixa->ixa_curthread == curthread);
202 202 ixa->ixa_curthread = NULL;
203 203 #endif
204 204 ire->ire_ob_pkt_count++;
205 205 /* ixa_dce might be condemned; use default one */
206 206 return ((ire->ire_sendfn)(ire, mp, mp->b_rptr, ixa,
207 207 &ipst->ips_dce_default->dce_ident));
208 208 }
209 209 /*
210 210 * If the ncec changed then ip_verify_ire already set
211 211 * ixa->ixa_dce_generation = DCE_GENERATION_VERIFY;
212 212 * so we can recheck the interface mtu.
213 213 */
214 214
215 215 /*
216 216 * Note that ire->ire_generation could already have changed.
217 217 * We catch that next time we send a packet.
218 218 */
219 219 }
220 220
221 221 /*
222 222 * No need to lock access to ixa_nce since the ip_xmit_attr usage
223 223 * is single threaded.
224 224 */
225 225 ASSERT(ixa->ixa_nce != NULL);
226 226 nce = ixa->ixa_nce;
227 227 if (nce->nce_is_condemned) {
228 228 error = ip_verify_nce(mp, ixa);
229 229 /*
230 230 * In case ZEROCOPY capability become not available, we
231 231 * copy the message and free the original one. We might
232 232 * be copying more data than needed but it doesn't hurt
233 233 * since such change rarely happens.
234 234 */
235 235 switch (error) {
236 236 case 0:
237 237 break;
238 238 case ENOTSUP: { /* ZEROCOPY */
239 239 mblk_t *nmp;
240 240
241 241 if ((nmp = copymsg(mp)) != NULL) {
242 242 freemsg(mp);
243 243 mp = nmp;
244 244
245 245 break;
246 246 }
247 247 /* FALLTHROUGH */
248 248 }
249 249 default:
250 250 ip_drop_output("ipIfStatsOutDiscards - verify nce",
251 251 mp, NULL);
252 252 goto drop;
253 253 }
254 254 ire = ixa->ixa_ire;
255 255 ASSERT(ire != NULL);
256 256 if (ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)) {
257 257 #ifdef DEBUG
258 258 ASSERT(ixa->ixa_curthread == curthread);
259 259 ixa->ixa_curthread = NULL;
260 260 #endif
261 261 ire->ire_ob_pkt_count++;
262 262 /* ixa_dce might be condemned; use default one */
263 263 return ((ire->ire_sendfn)(ire, mp, mp->b_rptr,
264 264 ixa, &ipst->ips_dce_default->dce_ident));
265 265 }
266 266 ASSERT(ixa->ixa_nce != NULL);
267 267 nce = ixa->ixa_nce;
268 268
269 269 /*
270 270 * Note that some other event could already have made
271 271 * the new nce condemned. We catch that next time we
272 272 * try to send a packet.
273 273 */
274 274 }
275 275 /*
276 276 * If there is no per-destination dce_t then we have a reference to
277 277 * the default dce_t (which merely contains the dce_ipid).
278 278 * The generation check captures both the introduction of a
279 279 * per-destination dce_t (e.g., due to ICMP packet too big) and
280 280 * any change to the per-destination dce (including it becoming
281 281 * condemned by use of the special DCE_GENERATION_CONDEMNED).
282 282 */
283 283 dce = ixa->ixa_dce;
284 284
285 285 /*
286 286 * To avoid a periodic timer to increase the path MTU we
287 287 * look at dce_last_change_time each time we send a packet.
288 288 */
289 289 if (dce->dce_flags & DCEF_PMTU) {
290 290 int64_t now = LBOLT_FASTPATH64;
291 291
292 292 if ((TICK_TO_SEC(now) - dce->dce_last_change_time >
293 293 ipst->ips_ip_pathmtu_interval)) {
294 294 /*
295 295 * Older than 20 minutes. Drop the path MTU information.
296 296 * Since the path MTU changes as a result of this,
297 297 * twiddle ixa_dce_generation to make us go through the
298 298 * dce verification code in conn_ip_output.
299 299 */
300 300 mutex_enter(&dce->dce_lock);
301 301 dce->dce_flags &= ~(DCEF_PMTU|DCEF_TOO_SMALL_PMTU);
302 302 dce->dce_last_change_time = TICK_TO_SEC(now);
303 303 mutex_exit(&dce->dce_lock);
304 304 dce_increment_generation(dce);
305 305 }
306 306 }
307 307
308 308 if (dce->dce_generation != ixa->ixa_dce_generation) {
309 309 error = ip_verify_dce(mp, ixa);
310 310 if (error != 0) {
311 311 ip_drop_output("ipIfStatsOutDiscards - verify dce",
312 312 mp, NULL);
313 313 goto drop;
314 314 }
315 315 dce = ixa->ixa_dce;
316 316
317 317 /*
318 318 * Note that some other event could already have made the
319 319 * new dce's generation number change.
320 320 * We catch that next time we try to send a packet.
321 321 */
322 322 }
323 323
324 324 ill = nce->nce_ill;
325 325
326 326 /*
327 327 * An initial ixa_fragsize was set in ip_set_destination
328 328 * and we update it if any routing changes above.
329 329 * A change to ill_mtu with ifconfig will increase all dce_generation
330 330 * so that we will detect that with the generation check. Ditto for
331 331 * ill_mc_mtu.
332 332 */
333 333
334 334 /*
335 335 * Caller needs to make sure IXAF_VERIFY_SRC is not set if
336 336 * conn_unspec_src.
337 337 */
338 338 if ((ixaflags & IXAF_VERIFY_SOURCE) &&
339 339 ixa->ixa_src_generation != ipst->ips_src_generation) {
340 340 /* Check if the IP source is still assigned to the host. */
341 341 uint_t gen;
342 342
343 343 if (!ip_verify_src(mp, ixa, &gen)) {
344 344 /* Don't send a packet with a source that isn't ours */
345 345 error = EADDRNOTAVAIL;
346 346 ip_drop_output("ipIfStatsOutDiscards - invalid src",
347 347 mp, NULL);
348 348 goto drop;
349 349 }
350 350 /* The source is still valid - update the generation number */
351 351 ixa->ixa_src_generation = gen;
352 352 }
353 353
354 354 /*
355 355 * We don't have an IRE when we fragment, hence ire_ob_pkt_count
356 356 * can only count the use prior to fragmentation. However the MIB
357 357 * counters on the ill will be incremented in post fragmentation.
358 358 */
359 359 ire->ire_ob_pkt_count++;
360 360 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCOutRequests);
361 361
362 362 /*
363 363 * Based on ire_type and ire_flags call one of:
364 364 * ire_send_local_v* - for IRE_LOCAL and IRE_LOOPBACK
365 365 * ire_send_multirt_v* - if RTF_MULTIRT
366 366 * ire_send_noroute_v* - if RTF_REJECT or RTF_BLACHOLE
367 367 * ire_send_multicast_v* - for IRE_MULTICAST
368 368 * ire_send_broadcast_v4 - for IRE_BROADCAST
369 369 * ire_send_wire_v* - for the rest.
370 370 */
371 371 #ifdef DEBUG
372 372 ASSERT(ixa->ixa_curthread == curthread);
373 373 ixa->ixa_curthread = NULL;
374 374 #endif
375 375 return ((ire->ire_sendfn)(ire, mp, mp->b_rptr, ixa, &dce->dce_ident));
376 376
377 377 drop:
378 378 if (ixaflags & IXAF_IS_IPV4) {
379 379 BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsHCOutRequests);
380 380 BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsOutDiscards);
381 381 } else {
382 382 BUMP_MIB(&ipst->ips_ip6_mib, ipIfStatsHCOutRequests);
383 383 BUMP_MIB(&ipst->ips_ip6_mib, ipIfStatsOutDiscards);
384 384 }
385 385 freemsg(mp);
386 386 #ifdef DEBUG
387 387 ASSERT(ixa->ixa_curthread == curthread);
388 388 ixa->ixa_curthread = NULL;
389 389 #endif
390 390 return (error);
391 391 }
392 392
393 393 /*
394 394 * Handle both IPv4 and IPv6. Sets the generation number
395 395 * to allow the caller to know when to call us again.
396 396 * Returns true if the source address in the packet is a valid source.
397 397 * We handle callers which try to send with a zero address (since we only
398 398 * get here if UNSPEC_SRC is not set).
399 399 */
400 400 boolean_t
401 401 ip_verify_src(mblk_t *mp, ip_xmit_attr_t *ixa, uint_t *generationp)
402 402 {
403 403 ip_stack_t *ipst = ixa->ixa_ipst;
404 404
405 405 /*
406 406 * Need to grab the generation number before we check to
407 407 * avoid a race with a change to the set of local addresses.
408 408 * No lock needed since the thread which updates the set of local
409 409 * addresses use ipif/ill locks and exit those (hence a store memory
410 410 * barrier) before doing the atomic increase of ips_src_generation.
411 411 */
412 412 if (generationp != NULL)
413 413 *generationp = ipst->ips_src_generation;
414 414
415 415 if (ixa->ixa_flags & IXAF_IS_IPV4) {
416 416 ipha_t *ipha = (ipha_t *)mp->b_rptr;
417 417
418 418 if (ipha->ipha_src == INADDR_ANY)
419 419 return (B_FALSE);
420 420
421 421 return (ip_laddr_verify_v4(ipha->ipha_src, ixa->ixa_zoneid,
422 422 ipst, B_FALSE) != IPVL_BAD);
423 423 } else {
424 424 ip6_t *ip6h = (ip6_t *)mp->b_rptr;
425 425 uint_t scopeid;
426 426
427 427 if (IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src))
428 428 return (B_FALSE);
429 429
430 430 if (ixa->ixa_flags & IXAF_SCOPEID_SET)
431 431 scopeid = ixa->ixa_scopeid;
432 432 else
433 433 scopeid = 0;
434 434
435 435 return (ip_laddr_verify_v6(&ip6h->ip6_src, ixa->ixa_zoneid,
436 436 ipst, B_FALSE, scopeid) != IPVL_BAD);
437 437 }
438 438 }
439 439
440 440 /*
441 441 * Handle both IPv4 and IPv6. Reverify/recalculate the IRE to use.
442 442 */
443 443 int
444 444 ip_verify_ire(mblk_t *mp, ip_xmit_attr_t *ixa)
445 445 {
446 446 uint_t gen;
447 447 ire_t *ire;
448 448 nce_t *nce;
449 449 int error;
450 450 boolean_t multirt = B_FALSE;
451 451
452 452 /*
453 453 * Redo ip_select_route.
454 454 * Need to grab generation number as part of the lookup to
455 455 * avoid race.
456 456 */
457 457 error = 0;
458 458 ire = ip_select_route_pkt(mp, ixa, &gen, &error, &multirt);
459 459 ASSERT(ire != NULL); /* IRE_NOROUTE if none found */
460 460 if (error != 0) {
461 461 ire_refrele(ire);
462 462 return (error);
463 463 }
464 464
465 465 if (ixa->ixa_ire != NULL)
466 466 ire_refrele_notr(ixa->ixa_ire);
467 467 #ifdef DEBUG
468 468 ire_refhold_notr(ire);
469 469 ire_refrele(ire);
470 470 #endif
471 471 ixa->ixa_ire = ire;
472 472 ixa->ixa_ire_generation = gen;
473 473 if (multirt) {
474 474 if (ixa->ixa_flags & IXAF_IS_IPV4)
475 475 ixa->ixa_postfragfn = ip_postfrag_multirt_v4;
476 476 else
477 477 ixa->ixa_postfragfn = ip_postfrag_multirt_v6;
478 478 ixa->ixa_flags |= IXAF_MULTIRT_MULTICAST;
479 479 } else {
480 480 ixa->ixa_postfragfn = ire->ire_postfragfn;
481 481 ixa->ixa_flags &= ~IXAF_MULTIRT_MULTICAST;
482 482 }
483 483
484 484 /*
485 485 * Don't look for an nce for reject or blackhole.
486 486 * They have ire_generation set to IRE_GENERATION_VERIFY which
487 487 * makes conn_ip_output avoid references to ixa_nce.
488 488 */
489 489 if (ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)) {
490 490 ASSERT(ixa->ixa_ire_generation == IRE_GENERATION_VERIFY);
491 491 ixa->ixa_dce_generation = DCE_GENERATION_VERIFY;
492 492 return (0);
493 493 }
494 494
495 495 /* The NCE could now be different */
496 496 nce = ire_to_nce_pkt(ire, mp);
497 497 if (nce == NULL) {
498 498 /*
499 499 * Allocation failure. Make sure we redo ire/nce selection
500 500 * next time we send.
501 501 */
502 502 ixa->ixa_ire_generation = IRE_GENERATION_VERIFY;
503 503 ixa->ixa_dce_generation = DCE_GENERATION_VERIFY;
504 504 return (ENOBUFS);
505 505 }
506 506 if (nce == ixa->ixa_nce) {
507 507 /* No change */
508 508 nce_refrele(nce);
509 509 return (0);
510 510 }
511 511
512 512 /*
513 513 * Since the path MTU might change as a result of this
514 514 * route change, we twiddle ixa_dce_generation to
515 515 * make conn_ip_output go through the ip_verify_dce code.
516 516 */
517 517 ixa->ixa_dce_generation = DCE_GENERATION_VERIFY;
518 518
519 519 if (ixa->ixa_nce != NULL)
520 520 nce_refrele(ixa->ixa_nce);
521 521 ixa->ixa_nce = nce;
522 522 return (0);
523 523 }
524 524
525 525 /*
526 526 * Handle both IPv4 and IPv6. Reverify/recalculate the NCE to use.
527 527 */
528 528 static int
529 529 ip_verify_nce(mblk_t *mp, ip_xmit_attr_t *ixa)
530 530 {
531 531 ire_t *ire = ixa->ixa_ire;
532 532 nce_t *nce;
533 533 int error = 0;
534 534 ipha_t *ipha = NULL;
535 535 ip6_t *ip6h = NULL;
536 536
537 537 if (ire->ire_ipversion == IPV4_VERSION)
538 538 ipha = (ipha_t *)mp->b_rptr;
539 539 else
540 540 ip6h = (ip6_t *)mp->b_rptr;
541 541
542 542 nce = ire_handle_condemned_nce(ixa->ixa_nce, ire, ipha, ip6h, B_TRUE);
543 543 if (nce == NULL) {
544 544 /* Try to find a better ire */
545 545 return (ip_verify_ire(mp, ixa));
546 546 }
547 547
548 548 /*
549 549 * The hardware offloading capabilities, for example LSO, of the
550 550 * interface might have changed, so do sanity verification here.
551 551 */
552 552 if (ixa->ixa_flags & IXAF_VERIFY_LSO) {
553 553 if (!ip_verify_lso(nce->nce_ill, ixa)) {
554 554 ASSERT(ixa->ixa_notify != NULL);
555 555 ixa->ixa_notify(ixa->ixa_notify_cookie, ixa,
556 556 IXAN_LSO, 0);
557 557 error = ENOTSUP;
558 558 }
559 559 }
560 560
561 561 /*
562 562 * Verify ZEROCOPY capability of underlying ill. Notify the ULP with
563 563 * any ZEROCOPY changes. In case ZEROCOPY capability is not available
564 564 * any more, return error so that conn_ip_output() can take care of
565 565 * the ZEROCOPY message properly. It's safe to continue send the
566 566 * message when ZEROCOPY newly become available.
567 567 */
568 568 if (ixa->ixa_flags & IXAF_VERIFY_ZCOPY) {
569 569 if (!ip_verify_zcopy(nce->nce_ill, ixa)) {
570 570 ASSERT(ixa->ixa_notify != NULL);
571 571 ixa->ixa_notify(ixa->ixa_notify_cookie, ixa,
572 572 IXAN_ZCOPY, 0);
573 573 if ((ixa->ixa_flags & IXAF_ZCOPY_CAPAB) == 0)
574 574 error = ENOTSUP;
575 575 }
576 576 }
577 577
578 578 /*
579 579 * Since the path MTU might change as a result of this
580 580 * change, we twiddle ixa_dce_generation to
581 581 * make conn_ip_output go through the ip_verify_dce code.
582 582 */
583 583 ixa->ixa_dce_generation = DCE_GENERATION_VERIFY;
584 584
585 585 nce_refrele(ixa->ixa_nce);
586 586 ixa->ixa_nce = nce;
587 587 return (error);
588 588 }
589 589
590 590 /*
591 591 * Handle both IPv4 and IPv6. Reverify/recalculate the DCE to use.
592 592 */
593 593 static int
594 594 ip_verify_dce(mblk_t *mp, ip_xmit_attr_t *ixa)
595 595 {
596 596 dce_t *dce;
597 597 uint_t gen;
598 598 uint_t pmtu;
599 599
600 600 dce = dce_lookup_pkt(mp, ixa, &gen);
601 601 ASSERT(dce != NULL);
602 602
603 603 dce_refrele_notr(ixa->ixa_dce);
604 604 #ifdef DEBUG
605 605 dce_refhold_notr(dce);
606 606 dce_refrele(dce);
607 607 #endif
608 608 ixa->ixa_dce = dce;
609 609 ixa->ixa_dce_generation = gen;
610 610
611 611 /* Extract the (path) mtu from the dce, ncec_ill etc */
612 612 pmtu = ip_get_pmtu(ixa);
613 613
614 614 /*
615 615 * Tell ULP about PMTU changes - increase or decrease - by returning
616 616 * an error if IXAF_VERIFY_PMTU is set. In such case, ULP should update
617 617 * both ixa_pmtu and ixa_fragsize appropriately.
618 618 *
619 619 * If ULP doesn't set that flag then we need to update ixa_fragsize
620 620 * since routing could have changed the ill after after ixa_fragsize
621 621 * was set previously in the conn_ip_output path or in
622 622 * ip_set_destination.
623 623 *
624 624 * In case of LSO, ixa_fragsize might be greater than ixa_pmtu.
625 625 *
626 626 * In the case of a path MTU increase we send the packet after the
627 627 * notify to the ULP.
628 628 */
629 629 if (ixa->ixa_flags & IXAF_VERIFY_PMTU) {
630 630 if (ixa->ixa_pmtu != pmtu) {
631 631 uint_t oldmtu = ixa->ixa_pmtu;
632 632
633 633 DTRACE_PROBE2(verify_pmtu, uint32_t, pmtu,
634 634 uint32_t, ixa->ixa_pmtu);
635 635 ASSERT(ixa->ixa_notify != NULL);
636 636 ixa->ixa_notify(ixa->ixa_notify_cookie, ixa,
637 637 IXAN_PMTU, pmtu);
638 638 if (pmtu < oldmtu)
639 639 return (EMSGSIZE);
640 640 }
641 641 } else {
642 642 ixa->ixa_fragsize = pmtu;
643 643 }
644 644 return (0);
645 645 }
646 646
647 647 /*
648 648 * Verify LSO usability. Keep the return value simple to indicate whether
649 649 * the LSO capability has changed. Handle both IPv4 and IPv6.
650 650 */
651 651 static boolean_t
652 652 ip_verify_lso(ill_t *ill, ip_xmit_attr_t *ixa)
653 653 {
654 654 ill_lso_capab_t *lsoc = &ixa->ixa_lso_capab;
655 655 ill_lso_capab_t *new_lsoc = ill->ill_lso_capab;
656 656
657 657 if (ixa->ixa_flags & IXAF_LSO_CAPAB) {
658 658 /*
659 659 * Not unsable any more.
660 660 */
661 661 if ((ixa->ixa_flags & IXAF_IPSEC_SECURE) ||
662 662 (ixa->ixa_ire->ire_type & (IRE_LOCAL | IRE_LOOPBACK)) ||
663 663 (ixa->ixa_ire->ire_flags & RTF_MULTIRT) ||
664 664 ((ixa->ixa_flags & IXAF_IS_IPV4) ?
665 665 !ILL_LSO_TCP_IPV4_USABLE(ill) :
666 666 !ILL_LSO_TCP_IPV6_USABLE(ill))) {
667 667 ixa->ixa_flags &= ~IXAF_LSO_CAPAB;
668 668
669 669 return (B_FALSE);
670 670 }
671 671
672 672 /*
673 673 * Capability has changed, refresh the copy in ixa.
674 674 */
675 675 if (lsoc->ill_lso_max != new_lsoc->ill_lso_max) {
676 676 *lsoc = *new_lsoc;
677 677
678 678 return (B_FALSE);
679 679 }
680 680 } else { /* Was not usable */
681 681 if (!(ixa->ixa_flags & IXAF_IPSEC_SECURE) &&
682 682 !(ixa->ixa_ire->ire_type & (IRE_LOCAL | IRE_LOOPBACK)) &&
683 683 !(ixa->ixa_ire->ire_flags & RTF_MULTIRT) &&
684 684 ((ixa->ixa_flags & IXAF_IS_IPV4) ?
685 685 ILL_LSO_TCP_IPV4_USABLE(ill) :
686 686 ILL_LSO_TCP_IPV6_USABLE(ill))) {
687 687 *lsoc = *new_lsoc;
688 688 ixa->ixa_flags |= IXAF_LSO_CAPAB;
689 689
690 690 return (B_FALSE);
691 691 }
692 692 }
693 693
694 694 return (B_TRUE);
695 695 }
696 696
697 697 /*
698 698 * Verify ZEROCOPY usability. Keep the return value simple to indicate whether
699 699 * the ZEROCOPY capability has changed. Handle both IPv4 and IPv6.
700 700 */
701 701 static boolean_t
702 702 ip_verify_zcopy(ill_t *ill, ip_xmit_attr_t *ixa)
703 703 {
704 704 if (ixa->ixa_flags & IXAF_ZCOPY_CAPAB) {
705 705 /*
706 706 * Not unsable any more.
707 707 */
708 708 if ((ixa->ixa_flags & IXAF_IPSEC_SECURE) ||
709 709 (ixa->ixa_ire->ire_type & (IRE_LOCAL | IRE_LOOPBACK)) ||
710 710 (ixa->ixa_ire->ire_flags & RTF_MULTIRT) ||
711 711 !ILL_ZCOPY_USABLE(ill)) {
712 712 ixa->ixa_flags &= ~IXAF_ZCOPY_CAPAB;
713 713
714 714 return (B_FALSE);
715 715 }
716 716 } else { /* Was not usable */
717 717 if (!(ixa->ixa_flags & IXAF_IPSEC_SECURE) &&
718 718 !(ixa->ixa_ire->ire_type & (IRE_LOCAL | IRE_LOOPBACK)) &&
719 719 !(ixa->ixa_ire->ire_flags & RTF_MULTIRT) &&
720 720 ILL_ZCOPY_USABLE(ill)) {
721 721 ixa->ixa_flags |= IXAF_ZCOPY_CAPAB;
722 722
723 723 return (B_FALSE);
724 724 }
725 725 }
726 726
727 727 return (B_TRUE);
728 728 }
729 729
730 730
731 731 /*
732 732 * When there is no conn_t context, this will send a packet.
733 733 * The caller must *not* have called conn_connect() or ip_attr_connect()
734 734 * before calling ip_output_simple().
735 735 * Handles IPv4 and IPv6. Returns zero or an errno such as ENETUNREACH.
736 736 * Honors IXAF_SET_SOURCE.
737 737 *
738 738 * We acquire the ire and after calling ire_sendfn we release
739 739 * the hold on the ire. Ditto for the nce and dce.
740 740 *
741 741 * This assumes that the caller has set the following in ip_xmit_attr_t:
742 742 * ixa_tsl, ixa_zoneid, and ixa_ipst must always be set.
743 743 * If ixa_ifindex is non-zero it means send out that ill. (If it is
744 744 * an upper IPMP ill we load balance across the group; if a lower we send
745 745 * on that lower ill without load balancing.)
746 746 * IXAF_IS_IPV4 must be set correctly.
747 747 * If IXAF_IPSEC_SECURE is set then the ixa_ipsec_* fields must be set.
748 748 * If IXAF_NO_IPSEC is set we'd skip IPsec policy lookup.
749 749 * If neither of those two are set we do an IPsec policy lookup.
750 750 *
751 751 * We handle setting things like
752 752 * ixa_pktlen
753 753 * ixa_ip_hdr_length
754 754 * ixa->ixa_protocol
755 755 *
756 756 * The caller may set ixa_xmit_hint, which is used for ECMP selection and
757 757 * transmit ring selecting in GLD.
758 758 *
759 759 * The caller must do an ixa_cleanup() to release any IPsec references
760 760 * after we return.
761 761 */
762 762 int
763 763 ip_output_simple(mblk_t *mp, ip_xmit_attr_t *ixa)
764 764 {
765 765 ts_label_t *effective_tsl = NULL;
766 766 int err;
767 767
768 768 ASSERT(ixa->ixa_ipst != NULL);
769 769
770 770 if (is_system_labeled()) {
771 771 ip_stack_t *ipst = ixa->ixa_ipst;
772 772
773 773 if (ixa->ixa_flags & IXAF_IS_IPV4) {
774 774 err = tsol_check_label_v4(ixa->ixa_tsl, ixa->ixa_zoneid,
775 775 &mp, CONN_MAC_DEFAULT, B_FALSE, ixa->ixa_ipst,
776 776 &effective_tsl);
777 777 } else {
778 778 err = tsol_check_label_v6(ixa->ixa_tsl, ixa->ixa_zoneid,
779 779 &mp, CONN_MAC_DEFAULT, B_FALSE, ixa->ixa_ipst,
780 780 &effective_tsl);
781 781 }
782 782 if (err != 0) {
783 783 ip2dbg(("tsol_check: label check failed (%d)\n", err));
784 784 BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsHCOutRequests);
785 785 BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsOutDiscards);
786 786 ip_drop_output("tsol_check_label", mp, NULL);
787 787 freemsg(mp);
788 788 return (err);
789 789 }
790 790 if (effective_tsl != NULL) {
791 791 /* Update the label */
792 792 ip_xmit_attr_replace_tsl(ixa, effective_tsl);
793 793 }
794 794 }
795 795
796 796 if (ixa->ixa_flags & IXAF_IS_IPV4)
797 797 return (ip_output_simple_v4(mp, ixa));
798 798 else
799 799 return (ip_output_simple_v6(mp, ixa));
800 800 }
801 801
802 802 int
803 803 ip_output_simple_v4(mblk_t *mp, ip_xmit_attr_t *ixa)
804 804 {
805 805 ipha_t *ipha;
806 806 ipaddr_t firsthop; /* In IP header */
807 807 ipaddr_t dst; /* End of source route, or ipha_dst if none */
808 808 ire_t *ire;
809 809 ipaddr_t setsrc; /* RTF_SETSRC */
810 810 int error;
811 811 ill_t *ill = NULL;
812 812 dce_t *dce = NULL;
813 813 nce_t *nce;
814 814 iaflags_t ixaflags = ixa->ixa_flags;
815 815 ip_stack_t *ipst = ixa->ixa_ipst;
816 816 boolean_t repeat = B_FALSE;
817 817 boolean_t multirt = B_FALSE;
818 818 int64_t now;
819 819
820 820 ipha = (ipha_t *)mp->b_rptr;
821 821 ASSERT(IPH_HDR_VERSION(ipha) == IPV4_VERSION);
822 822
823 823 /*
824 824 * Even on labeled systems we can have a NULL ixa_tsl e.g.,
825 825 * for IGMP/MLD traffic.
826 826 */
827 827
828 828 /* Caller already set flags */
829 829 ASSERT(ixa->ixa_flags & IXAF_IS_IPV4);
830 830
831 831 ASSERT(ixa->ixa_nce == NULL);
832 832
833 833 ixa->ixa_pktlen = ntohs(ipha->ipha_length);
834 834 ASSERT(ixa->ixa_pktlen == msgdsize(mp));
835 835 ixa->ixa_ip_hdr_length = IPH_HDR_LENGTH(ipha);
836 836 ixa->ixa_protocol = ipha->ipha_protocol;
837 837
838 838 /*
839 839 * Assumes that source routed packets have already been massaged by
840 840 * the ULP (ip_massage_options) and as a result ipha_dst is the next
841 841 * hop in the source route. The final destination is used for IPsec
842 842 * policy and DCE lookup.
843 843 */
844 844 firsthop = ipha->ipha_dst;
845 845 dst = ip_get_dst(ipha);
846 846
847 847 repeat_ire:
848 848 error = 0;
849 849 setsrc = INADDR_ANY;
850 850 ire = ip_select_route_v4(firsthop, ipha->ipha_src, ixa, NULL,
851 851 &setsrc, &error, &multirt);
852 852 ASSERT(ire != NULL); /* IRE_NOROUTE if none found */
853 853 if (error != 0) {
854 854 BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsHCOutRequests);
855 855 BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsOutDiscards);
856 856 ip_drop_output("ipIfStatsOutDiscards - select route", mp, NULL);
857 857 freemsg(mp);
858 858 goto done;
859 859 }
860 860
861 861 if (ire->ire_flags & (RTF_BLACKHOLE|RTF_REJECT)) {
862 862 /* ire_ill might be NULL hence need to skip some code */
863 863 if (ixaflags & IXAF_SET_SOURCE)
864 864 ipha->ipha_src = htonl(INADDR_LOOPBACK);
865 865 ixa->ixa_fragsize = IP_MAXPACKET;
866 866 ill = NULL;
867 867 nce = NULL;
868 868 ire->ire_ob_pkt_count++;
869 869 BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsHCOutRequests);
870 870 /* No dce yet; use default one */
871 871 error = (ire->ire_sendfn)(ire, mp, ipha, ixa,
872 872 &ipst->ips_dce_default->dce_ident);
873 873 goto done;
874 874 }
875 875
876 876 /* Note that ipha_dst is only used for IRE_MULTICAST */
877 877 nce = ire_to_nce(ire, ipha->ipha_dst, NULL);
878 878 if (nce == NULL) {
879 879 /* Allocation failure? */
880 880 ip_drop_output("ire_to_nce", mp, ill);
881 881 freemsg(mp);
882 882 error = ENOBUFS;
883 883 goto done;
884 884 }
885 885 if (nce->nce_is_condemned) {
886 886 nce_t *nce1;
887 887
888 888 nce1 = ire_handle_condemned_nce(nce, ire, ipha, NULL, B_TRUE);
889 889 nce_refrele(nce);
890 890 if (nce1 == NULL) {
891 891 if (!repeat) {
892 892 /* Try finding a better IRE */
893 893 repeat = B_TRUE;
894 894 ire_refrele(ire);
895 895 goto repeat_ire;
896 896 }
897 897 /* Tried twice - drop packet */
898 898 BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsOutDiscards);
899 899 ip_drop_output("No nce", mp, ill);
900 900 freemsg(mp);
901 901 error = ENOBUFS;
902 902 goto done;
903 903 }
904 904 nce = nce1;
905 905 }
906 906
907 907 /*
908 908 * For multicast with multirt we have a flag passed back from
909 909 * ire_lookup_multi_ill_v4 since we don't have an IRE for each
910 910 * possible multicast address.
911 911 * We also need a flag for multicast since we can't check
912 912 * whether RTF_MULTIRT is set in ixa_ire for multicast.
913 913 */
914 914 if (multirt) {
915 915 ixa->ixa_postfragfn = ip_postfrag_multirt_v4;
916 916 ixa->ixa_flags |= IXAF_MULTIRT_MULTICAST;
917 917 } else {
918 918 ixa->ixa_postfragfn = ire->ire_postfragfn;
919 919 ixa->ixa_flags &= ~IXAF_MULTIRT_MULTICAST;
920 920 }
921 921 ASSERT(ixa->ixa_nce == NULL);
922 922 ixa->ixa_nce = nce;
923 923
924 924 /*
925 925 * Check for a dce_t with a path mtu.
926 926 */
927 927 dce = dce_lookup_v4(dst, ipst, NULL);
928 928 ASSERT(dce != NULL);
929 929
930 930 if (!(ixaflags & IXAF_PMTU_DISCOVERY)) {
931 931 ixa->ixa_fragsize = ip_get_base_mtu(nce->nce_ill, ire);
932 932 } else if (dce->dce_flags & DCEF_PMTU) {
933 933 /*
934 934 * To avoid a periodic timer to increase the path MTU we
935 935 * look at dce_last_change_time each time we send a packet.
936 936 */
937 937 now = ddi_get_lbolt64();
938 938 if (TICK_TO_SEC(now) - dce->dce_last_change_time >
939 939 ipst->ips_ip_pathmtu_interval) {
940 940 /*
941 941 * Older than 20 minutes. Drop the path MTU information.
942 942 */
943 943 mutex_enter(&dce->dce_lock);
944 944 dce->dce_flags &= ~(DCEF_PMTU|DCEF_TOO_SMALL_PMTU);
945 945 dce->dce_last_change_time = TICK_TO_SEC(now);
946 946 mutex_exit(&dce->dce_lock);
947 947 dce_increment_generation(dce);
948 948 ixa->ixa_fragsize = ip_get_base_mtu(nce->nce_ill, ire);
949 949 } else {
950 950 uint_t fragsize;
951 951
952 952 fragsize = ip_get_base_mtu(nce->nce_ill, ire);
953 953 if (fragsize > dce->dce_pmtu)
954 954 fragsize = dce->dce_pmtu;
955 955 ixa->ixa_fragsize = fragsize;
956 956 }
957 957 } else {
958 958 ixa->ixa_fragsize = ip_get_base_mtu(nce->nce_ill, ire);
959 959 }
960 960
961 961 /*
962 962 * We use use ire_nexthop_ill (and not ncec_ill) to avoid the under ipmp
963 963 * interface for source address selection.
964 964 */
965 965 ill = ire_nexthop_ill(ire);
966 966
967 967 if (ixaflags & IXAF_SET_SOURCE) {
968 968 ipaddr_t src;
969 969
970 970 /*
971 971 * We use the final destination to get
972 972 * correct selection for source routed packets
973 973 */
974 974
975 975 /* If unreachable we have no ill but need some source */
976 976 if (ill == NULL) {
977 977 src = htonl(INADDR_LOOPBACK);
978 978 error = 0;
979 979 } else {
980 980 error = ip_select_source_v4(ill, setsrc, dst,
981 981 ixa->ixa_multicast_ifaddr, ixa->ixa_zoneid, ipst,
982 982 &src, NULL, NULL);
983 983 }
984 984 if (error != 0) {
985 985 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCOutRequests);
986 986 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards);
987 987 ip_drop_output("ipIfStatsOutDiscards - no source",
988 988 mp, ill);
989 989 freemsg(mp);
990 990 goto done;
991 991 }
992 992 ipha->ipha_src = src;
993 993 } else if (ixaflags & IXAF_VERIFY_SOURCE) {
994 994 /* Check if the IP source is assigned to the host. */
995 995 if (!ip_verify_src(mp, ixa, NULL)) {
996 996 /* Don't send a packet with a source that isn't ours */
997 997 BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsHCOutRequests);
998 998 BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsOutDiscards);
999 999 ip_drop_output("ipIfStatsOutDiscards - invalid source",
1000 1000 mp, ill);
1001 1001 freemsg(mp);
1002 1002 error = EADDRNOTAVAIL;
1003 1003 goto done;
1004 1004 }
1005 1005 }
1006 1006
1007 1007
1008 1008 /*
1009 1009 * Check against global IPsec policy to set the AH/ESP attributes.
1010 1010 * IPsec will set IXAF_IPSEC_* and ixa_ipsec_* as appropriate.
1011 1011 */
1012 1012 if (!(ixaflags & (IXAF_NO_IPSEC|IXAF_IPSEC_SECURE))) {
1013 1013 ASSERT(ixa->ixa_ipsec_policy == NULL);
1014 1014 mp = ip_output_attach_policy(mp, ipha, NULL, NULL, ixa);
1015 1015 if (mp == NULL) {
1016 1016 /* MIB and ip_drop_packet already done */
1017 1017 return (EHOSTUNREACH); /* IPsec policy failure */
1018 1018 }
1019 1019 }
1020 1020
1021 1021 if (ill != NULL) {
1022 1022 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCOutRequests);
1023 1023 } else {
1024 1024 BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsHCOutRequests);
1025 1025 }
1026 1026
1027 1027 /*
1028 1028 * We update the statistics on the most specific IRE i.e., the first
1029 1029 * one we found.
1030 1030 * We don't have an IRE when we fragment, hence ire_ob_pkt_count
1031 1031 * can only count the use prior to fragmentation. However the MIB
1032 1032 * counters on the ill will be incremented in post fragmentation.
1033 1033 */
1034 1034 ire->ire_ob_pkt_count++;
1035 1035
1036 1036 /*
1037 1037 * Based on ire_type and ire_flags call one of:
1038 1038 * ire_send_local_v4 - for IRE_LOCAL and IRE_LOOPBACK
1039 1039 * ire_send_multirt_v4 - if RTF_MULTIRT
1040 1040 * ire_send_noroute_v4 - if RTF_REJECT or RTF_BLACHOLE
1041 1041 * ire_send_multicast_v4 - for IRE_MULTICAST
1042 1042 * ire_send_broadcast_v4 - for IRE_BROADCAST
1043 1043 * ire_send_wire_v4 - for the rest.
1044 1044 */
1045 1045 error = (ire->ire_sendfn)(ire, mp, ipha, ixa, &dce->dce_ident);
1046 1046 done:
1047 1047 ire_refrele(ire);
1048 1048 if (dce != NULL)
1049 1049 dce_refrele(dce);
1050 1050 if (ill != NULL)
1051 1051 ill_refrele(ill);
1052 1052 if (ixa->ixa_nce != NULL)
1053 1053 nce_refrele(ixa->ixa_nce);
1054 1054 ixa->ixa_nce = NULL;
1055 1055 return (error);
1056 1056 }
1057 1057
1058 1058 /*
1059 1059 * ire_sendfn() functions.
1060 1060 * These functions use the following xmit_attr:
1061 1061 * - ixa_fragsize - read to determine whether or not to fragment
1062 1062 * - IXAF_IPSEC_SECURE - to determine whether or not to invoke IPsec
1063 1063 * - ixa_ipsec_* are used inside IPsec
1064 1064 * - IXAF_SET_SOURCE - replace IP source in broadcast case.
1065 1065 * - IXAF_LOOPBACK_COPY - for multicast and broadcast
1066 1066 */
1067 1067
1068 1068
1069 1069 /*
1070 1070 * ire_sendfn for IRE_LOCAL and IRE_LOOPBACK
1071 1071 *
1072 1072 * The checks for restrict_interzone_loopback are done in ire_route_recursive.
1073 1073 */
1074 1074 /* ARGSUSED4 */
1075 1075 int
1076 1076 ire_send_local_v4(ire_t *ire, mblk_t *mp, void *iph_arg,
1077 1077 ip_xmit_attr_t *ixa, uint32_t *identp)
1078 1078 {
1079 1079 ipha_t *ipha = (ipha_t *)iph_arg;
1080 1080 ip_stack_t *ipst = ixa->ixa_ipst;
1081 1081 ill_t *ill = ire->ire_ill;
1082 1082 ip_recv_attr_t iras; /* NOTE: No bzero for performance */
1083 1083 uint_t pktlen = ixa->ixa_pktlen;
1084 1084
1085 1085 /*
1086 1086 * No fragmentation, no nce, no application of IPsec,
1087 1087 * and no ipha_ident assignment.
1088 1088 *
1089 1089 * Note different order between IP provider and FW_HOOKS than in
1090 1090 * send_wire case.
1091 1091 */
1092 1092
1093 1093 /*
1094 1094 * DTrace this as ip:::send. A packet blocked by FW_HOOKS will fire the
1095 1095 * send probe, but not the receive probe.
1096 1096 */
1097 1097 DTRACE_IP7(send, mblk_t *, mp, conn_t *, NULL, void_ip_t *,
1098 1098 ipha, __dtrace_ipsr_ill_t *, ill, ipha_t *, ipha, ip6_t *, NULL,
1099 1099 int, 1);
1100 1100
1101 1101 if (HOOKS4_INTERESTED_LOOPBACK_OUT(ipst)) {
1102 1102 int error;
1103 1103
1104 1104 DTRACE_PROBE4(ip4__loopback__out__start, ill_t *, NULL,
1105 1105 ill_t *, ill, ipha_t *, ipha, mblk_t *, mp);
1106 1106 FW_HOOKS(ipst->ips_ip4_loopback_out_event,
1107 1107 ipst->ips_ipv4firewall_loopback_out,
1108 1108 NULL, ill, ipha, mp, mp, 0, ipst, error);
1109 1109 DTRACE_PROBE1(ip4__loopback__out__end, mblk_t *, mp);
1110 1110 if (mp == NULL)
1111 1111 return (error);
1112 1112
1113 1113 /*
1114 1114 * Even if the destination was changed by the filter we use the
1115 1115 * forwarding decision that was made based on the address
1116 1116 * in ip_output/ip_set_destination.
1117 1117 */
1118 1118 /* Length could be different */
1119 1119 ipha = (ipha_t *)mp->b_rptr;
1120 1120 pktlen = ntohs(ipha->ipha_length);
1121 1121 }
1122 1122
1123 1123 /*
1124 1124 * If a callback is enabled then we need to know the
1125 1125 * source and destination zoneids for the packet. We already
1126 1126 * have those handy.
1127 1127 */
1128 1128 if (ipst->ips_ip4_observe.he_interested) {
1129 1129 zoneid_t szone, dzone;
1130 1130 zoneid_t stackzoneid;
1131 1131
1132 1132 stackzoneid = netstackid_to_zoneid(
1133 1133 ipst->ips_netstack->netstack_stackid);
1134 1134
1135 1135 if (stackzoneid == GLOBAL_ZONEID) {
1136 1136 /* Shared-IP zone */
1137 1137 dzone = ire->ire_zoneid;
1138 1138 szone = ixa->ixa_zoneid;
1139 1139 } else {
1140 1140 szone = dzone = stackzoneid;
1141 1141 }
1142 1142 ipobs_hook(mp, IPOBS_HOOK_LOCAL, szone, dzone, ill, ipst);
1143 1143 }
1144 1144
1145 1145 /* Handle lo0 stats */
1146 1146 ipst->ips_loopback_packets++;
1147 1147
1148 1148 /* Map ixa to ira including IPsec policies */
1149 1149 ipsec_out_to_in(ixa, ill, &iras);
1150 1150 iras.ira_pktlen = pktlen;
1151 1151
1152 1152 if (!IS_SIMPLE_IPH(ipha)) {
1153 1153 ip_output_local_options(ipha, ipst);
1154 1154 iras.ira_flags |= IRAF_IPV4_OPTIONS;
1155 1155 }
1156 1156
1157 1157 if (HOOKS4_INTERESTED_LOOPBACK_IN(ipst)) {
1158 1158 int error;
1159 1159
1160 1160 DTRACE_PROBE4(ip4__loopback__in__start, ill_t *, ill,
1161 1161 ill_t *, NULL, ipha_t *, ipha, mblk_t *, mp);
1162 1162 FW_HOOKS(ipst->ips_ip4_loopback_in_event,
1163 1163 ipst->ips_ipv4firewall_loopback_in,
1164 1164 ill, NULL, ipha, mp, mp, 0, ipst, error);
1165 1165
1166 1166 DTRACE_PROBE1(ip4__loopback__in__end, mblk_t *, mp);
1167 1167 if (mp == NULL) {
1168 1168 ira_cleanup(&iras, B_FALSE);
1169 1169 return (error);
1170 1170 }
1171 1171 /*
1172 1172 * Even if the destination was changed by the filter we use the
1173 1173 * forwarding decision that was made based on the address
1174 1174 * in ip_output/ip_set_destination.
1175 1175 */
1176 1176 /* Length could be different */
1177 1177 ipha = (ipha_t *)mp->b_rptr;
1178 1178 pktlen = iras.ira_pktlen = ntohs(ipha->ipha_length);
1179 1179 }
1180 1180
1181 1181 DTRACE_IP7(receive, mblk_t *, mp, conn_t *, NULL, void_ip_t *,
1182 1182 ipha, __dtrace_ipsr_ill_t *, ill, ipha_t *, ipha, ip6_t *, NULL,
1183 1183 int, 1);
1184 1184
1185 1185 ire->ire_ib_pkt_count++;
1186 1186 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInReceives);
1187 1187 UPDATE_MIB(ill->ill_ip_mib, ipIfStatsHCInOctets, pktlen);
1188 1188
1189 1189 /* Destined to ire_zoneid - use that for fanout */
1190 1190 iras.ira_zoneid = ire->ire_zoneid;
1191 1191
1192 1192 if (is_system_labeled()) {
1193 1193 iras.ira_flags |= IRAF_SYSTEM_LABELED;
1194 1194
1195 1195 /*
1196 1196 * This updates ira_cred, ira_tsl and ira_free_flags based
1197 1197 * on the label. We don't expect this to ever fail for
1198 1198 * loopback packets, so we silently drop the packet should it
1199 1199 * fail.
1200 1200 */
1201 1201 if (!tsol_get_pkt_label(mp, IPV4_VERSION, &iras)) {
1202 1202 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
1203 1203 ip_drop_input("tsol_get_pkt_label", mp, ill);
1204 1204 freemsg(mp);
1205 1205 return (0);
1206 1206 }
1207 1207 ASSERT(iras.ira_tsl != NULL);
1208 1208
1209 1209 /* tsol_get_pkt_label sometimes does pullupmsg */
1210 1210 ipha = (ipha_t *)mp->b_rptr;
1211 1211 }
1212 1212
1213 1213 ip_fanout_v4(mp, ipha, &iras);
1214 1214
1215 1215 /* We moved any IPsec refs from ixa to iras */
1216 1216 ira_cleanup(&iras, B_FALSE);
1217 1217 return (0);
1218 1218 }
1219 1219
1220 1220 /*
1221 1221 * ire_sendfn for IRE_BROADCAST
1222 1222 * If the broadcast address is present on multiple ills and ixa_ifindex
1223 1223 * isn't set, then we generate
1224 1224 * a separate datagram (potentially with different source address) for
1225 1225 * those ills. In any case, only one copy is looped back to ip_input_v4.
1226 1226 */
1227 1227 int
1228 1228 ire_send_broadcast_v4(ire_t *ire, mblk_t *mp, void *iph_arg,
1229 1229 ip_xmit_attr_t *ixa, uint32_t *identp)
1230 1230 {
1231 1231 ipha_t *ipha = (ipha_t *)iph_arg;
1232 1232 ip_stack_t *ipst = ixa->ixa_ipst;
1233 1233 irb_t *irb = ire->ire_bucket;
1234 1234 ire_t *ire1;
1235 1235 mblk_t *mp1;
1236 1236 ipha_t *ipha1;
1237 1237 iaflags_t ixaflags = ixa->ixa_flags;
1238 1238 nce_t *nce1, *nce_orig;
1239 1239
1240 1240 /*
1241 1241 * Unless ire_send_multirt_v4 already set a ttl, force the
1242 1242 * ttl to a smallish value.
1243 1243 */
1244 1244 if (!(ixa->ixa_flags & IXAF_NO_TTL_CHANGE)) {
1245 1245 /*
1246 1246 * To avoid broadcast storms, we usually set the TTL to 1 for
1247 1247 * broadcasts. This can
1248 1248 * be overridden stack-wide through the ip_broadcast_ttl
1249 1249 * ndd tunable, or on a per-connection basis through the
1250 1250 * IP_BROADCAST_TTL socket option.
1251 1251 *
1252 1252 * If SO_DONTROUTE/IXAF_DONTROUTE is set, then ire_send_wire_v4
1253 1253 * will force ttl to one after we've set this.
1254 1254 */
1255 1255 if (ixaflags & IXAF_BROADCAST_TTL_SET)
1256 1256 ipha->ipha_ttl = ixa->ixa_broadcast_ttl;
1257 1257 else
1258 1258 ipha->ipha_ttl = ipst->ips_ip_broadcast_ttl;
1259 1259 }
1260 1260 /*
1261 1261 * Make sure we get a loopback copy (after IPsec and frag)
1262 1262 * Skip hardware checksum so that loopback copy is checksumed.
1263 1263 */
1264 1264 ixa->ixa_flags |= IXAF_LOOPBACK_COPY | IXAF_NO_HW_CKSUM;
1265 1265
1266 1266 /* Do we need to potentially generate multiple copies? */
1267 1267 if (irb->irb_ire_cnt == 1 || ixa->ixa_ifindex != 0)
1268 1268 return (ire_send_wire_v4(ire, mp, ipha, ixa, identp));
1269 1269
1270 1270 /*
1271 1271 * Loop over all IRE_BROADCAST in the bucket (might only be one).
1272 1272 * Note that everything in the bucket has the same destination address.
1273 1273 */
1274 1274 irb_refhold(irb);
1275 1275 for (ire1 = irb->irb_ire; ire1 != NULL; ire1 = ire1->ire_next) {
1276 1276 /* We do the main IRE after the end of the loop */
1277 1277 if (ire1 == ire)
1278 1278 continue;
1279 1279
1280 1280 /*
1281 1281 * Only IREs for the same IP address should be in the same
1282 1282 * bucket.
1283 1283 * But could have IRE_HOSTs in the case of CGTP.
1284 1284 * If we find any multirt routes we bail out of the loop
1285 1285 * and just do the single packet at the end; ip_postfrag_multirt
1286 1286 * will duplicate the packet.
1287 1287 */
1288 1288 ASSERT(ire1->ire_addr == ire->ire_addr);
1289 1289 if (!(ire1->ire_type & IRE_BROADCAST))
1290 1290 continue;
1291 1291
1292 1292 if (IRE_IS_CONDEMNED(ire1))
1293 1293 continue;
1294 1294
1295 1295 if (ixa->ixa_zoneid != ALL_ZONES &&
1296 1296 ire->ire_zoneid != ire1->ire_zoneid)
1297 1297 continue;
1298 1298
1299 1299 ASSERT(ire->ire_ill != ire1->ire_ill && ire1->ire_ill != NULL);
1300 1300
1301 1301 if (ire1->ire_flags & RTF_MULTIRT)
1302 1302 break;
1303 1303
1304 1304 /*
1305 1305 * For IPMP we only send for the ipmp_ill. arp_nce_init() will
1306 1306 * ensure that this goes out on the cast_ill.
1307 1307 */
1308 1308 if (IS_UNDER_IPMP(ire1->ire_ill))
1309 1309 continue;
1310 1310
1311 1311 mp1 = copymsg(mp);
1312 1312 if (mp1 == NULL) {
1313 1313 BUMP_MIB(ire1->ire_ill->ill_ip_mib,
1314 1314 ipIfStatsOutDiscards);
1315 1315 ip_drop_output("ipIfStatsOutDiscards",
1316 1316 mp, ire1->ire_ill);
1317 1317 continue;
1318 1318 }
1319 1319
1320 1320 ipha1 = (ipha_t *)mp1->b_rptr;
1321 1321 if (ixa->ixa_flags & IXAF_SET_SOURCE) {
1322 1322 /*
1323 1323 * Need to pick a different source address for each
1324 1324 * interface. If we have a global IPsec policy and
1325 1325 * no per-socket policy then we punt to
1326 1326 * ip_output_simple_v4 using a separate ip_xmit_attr_t.
1327 1327 */
1328 1328 if (ixaflags & IXAF_IPSEC_GLOBAL_POLICY) {
1329 1329 ip_output_simple_broadcast(ixa, mp1);
1330 1330 continue;
1331 1331 }
1332 1332 /* Pick a new source address for each interface */
1333 1333 if (ip_select_source_v4(ire1->ire_ill, INADDR_ANY,
1334 1334 ipha1->ipha_dst, INADDR_ANY, ixa->ixa_zoneid, ipst,
1335 1335 &ipha1->ipha_src, NULL, NULL) != 0) {
1336 1336 BUMP_MIB(ire1->ire_ill->ill_ip_mib,
1337 1337 ipIfStatsOutDiscards);
1338 1338 ip_drop_output("ipIfStatsOutDiscards - select "
1339 1339 "broadcast source", mp1, ire1->ire_ill);
1340 1340 freemsg(mp1);
1341 1341 continue;
1342 1342 }
1343 1343 /*
1344 1344 * Check against global IPsec policy to set the AH/ESP
1345 1345 * attributes. IPsec will set IXAF_IPSEC_* and
1346 1346 * ixa_ipsec_* as appropriate.
1347 1347 */
1348 1348 if (!(ixaflags & (IXAF_NO_IPSEC|IXAF_IPSEC_SECURE))) {
1349 1349 ASSERT(ixa->ixa_ipsec_policy == NULL);
1350 1350 mp1 = ip_output_attach_policy(mp1, ipha, NULL,
1351 1351 NULL, ixa);
1352 1352 if (mp1 == NULL) {
1353 1353 /*
1354 1354 * MIB and ip_drop_packet already
1355 1355 * done
1356 1356 */
1357 1357 continue;
1358 1358 }
1359 1359 }
1360 1360 }
1361 1361 /* Make sure we have an NCE on this ill */
1362 1362 nce1 = arp_nce_init(ire1->ire_ill, ire1->ire_addr,
1363 1363 ire1->ire_type);
1364 1364 if (nce1 == NULL) {
1365 1365 BUMP_MIB(ire1->ire_ill->ill_ip_mib,
1366 1366 ipIfStatsOutDiscards);
1367 1367 ip_drop_output("ipIfStatsOutDiscards - broadcast nce",
1368 1368 mp1, ire1->ire_ill);
1369 1369 freemsg(mp1);
1370 1370 continue;
1371 1371 }
1372 1372 nce_orig = ixa->ixa_nce;
1373 1373 ixa->ixa_nce = nce1;
1374 1374
1375 1375 ire_refhold(ire1);
1376 1376 /*
1377 1377 * Ignore any errors here. We just collect the errno for
1378 1378 * the main ire below
1379 1379 */
1380 1380 (void) ire_send_wire_v4(ire1, mp1, ipha1, ixa, identp);
1381 1381 ire_refrele(ire1);
1382 1382
1383 1383 ixa->ixa_nce = nce_orig;
1384 1384 nce_refrele(nce1);
1385 1385
1386 1386 ixa->ixa_flags &= ~IXAF_LOOPBACK_COPY;
1387 1387 }
1388 1388 irb_refrele(irb);
1389 1389 /* Finally, the main one */
1390 1390
1391 1391 /*
1392 1392 * For IPMP we only send broadcasts on the ipmp_ill.
1393 1393 */
1394 1394 if (IS_UNDER_IPMP(ire->ire_ill)) {
1395 1395 freemsg(mp);
1396 1396 return (0);
1397 1397 }
1398 1398
1399 1399 return (ire_send_wire_v4(ire, mp, ipha, ixa, identp));
1400 1400 }
1401 1401
1402 1402 /*
1403 1403 * Send a packet using a different source address and different
1404 1404 * IPsec policy.
1405 1405 */
1406 1406 static void
1407 1407 ip_output_simple_broadcast(ip_xmit_attr_t *ixa, mblk_t *mp)
1408 1408 {
1409 1409 ip_xmit_attr_t ixas;
1410 1410
1411 1411 bzero(&ixas, sizeof (ixas));
1412 1412 ixas.ixa_flags = IXAF_BASIC_SIMPLE_V4;
1413 1413 ixas.ixa_zoneid = ixa->ixa_zoneid;
1414 1414 ixas.ixa_ifindex = 0;
1415 1415 ixas.ixa_ipst = ixa->ixa_ipst;
1416 1416 ixas.ixa_cred = ixa->ixa_cred;
1417 1417 ixas.ixa_cpid = ixa->ixa_cpid;
1418 1418 ixas.ixa_tsl = ixa->ixa_tsl;
1419 1419 ixas.ixa_multicast_ttl = IP_DEFAULT_MULTICAST_TTL;
1420 1420
1421 1421 (void) ip_output_simple(mp, &ixas);
1422 1422 ixa_cleanup(&ixas);
1423 1423 }
1424 1424
1425 1425
1426 1426 static void
1427 1427 multirt_check_v4(ire_t *ire, ipha_t *ipha, ip_xmit_attr_t *ixa)
1428 1428 {
1429 1429 ip_stack_t *ipst = ixa->ixa_ipst;
1430 1430
1431 1431 /* Limit the TTL on multirt packets */
1432 1432 if (ire->ire_type & IRE_MULTICAST) {
1433 1433 if (ipha->ipha_ttl > 1) {
1434 1434 ip2dbg(("ire_send_multirt_v4: forcing multicast "
1435 1435 "multirt TTL to 1 (was %d), dst 0x%08x\n",
1436 1436 ipha->ipha_ttl, ntohl(ire->ire_addr)));
1437 1437 ipha->ipha_ttl = 1;
1438 1438 }
1439 1439 ixa->ixa_flags |= IXAF_NO_TTL_CHANGE;
1440 1440 } else if ((ipst->ips_ip_multirt_ttl > 0) &&
1441 1441 (ipha->ipha_ttl > ipst->ips_ip_multirt_ttl)) {
1442 1442 ipha->ipha_ttl = ipst->ips_ip_multirt_ttl;
1443 1443 /*
1444 1444 * Need to ensure we don't increase the ttl should we go through
1445 1445 * ire_send_broadcast or multicast.
1446 1446 */
1447 1447 ixa->ixa_flags |= IXAF_NO_TTL_CHANGE;
1448 1448 }
1449 1449 }
1450 1450
1451 1451 /*
1452 1452 * ire_sendfn for IRE_MULTICAST
1453 1453 */
1454 1454 int
1455 1455 ire_send_multicast_v4(ire_t *ire, mblk_t *mp, void *iph_arg,
1456 1456 ip_xmit_attr_t *ixa, uint32_t *identp)
1457 1457 {
1458 1458 ipha_t *ipha = (ipha_t *)iph_arg;
1459 1459 ip_stack_t *ipst = ixa->ixa_ipst;
1460 1460 ill_t *ill = ire->ire_ill;
1461 1461 iaflags_t ixaflags = ixa->ixa_flags;
1462 1462
1463 1463 /*
1464 1464 * The IRE_MULTICAST is the same whether or not multirt is in use.
1465 1465 * Hence we need special-case code.
1466 1466 */
1467 1467 if (ixaflags & IXAF_MULTIRT_MULTICAST)
1468 1468 multirt_check_v4(ire, ipha, ixa);
1469 1469
1470 1470 /*
1471 1471 * Check if anything in ip_input_v4 wants a copy of the transmitted
1472 1472 * packet (after IPsec and fragmentation)
1473 1473 *
1474 1474 * 1. Multicast routers always need a copy unless SO_DONTROUTE is set
1475 1475 * RSVP and the rsvp daemon is an example of a
1476 1476 * protocol and user level process that
1477 1477 * handles it's own routing. Hence, it uses the
1478 1478 * SO_DONTROUTE option to accomplish this.
1479 1479 * 2. If the sender has set IP_MULTICAST_LOOP, then we just
1480 1480 * check whether there are any receivers for the group on the ill
1481 1481 * (ignoring the zoneid).
1482 1482 * 3. If IP_MULTICAST_LOOP is not set, then we check if there are
1483 1483 * any members in other shared-IP zones.
1484 1484 * If such members exist, then we indicate that the sending zone
1485 1485 * shouldn't get a loopback copy to preserve the IP_MULTICAST_LOOP
1486 1486 * behavior.
1487 1487 *
1488 1488 * When we loopback we skip hardware checksum to make sure loopback
1489 1489 * copy is checksumed.
1490 1490 *
1491 1491 * Note that ire_ill is the upper in the case of IPMP.
1492 1492 */
1493 1493 ixa->ixa_flags &= ~(IXAF_LOOPBACK_COPY | IXAF_NO_HW_CKSUM);
1494 1494 if (ipst->ips_ip_g_mrouter && ill->ill_mrouter_cnt > 0 &&
1495 1495 !(ixaflags & IXAF_DONTROUTE)) {
1496 1496 ixa->ixa_flags |= IXAF_LOOPBACK_COPY | IXAF_NO_HW_CKSUM;
1497 1497 } else if (ixaflags & IXAF_MULTICAST_LOOP) {
1498 1498 /*
1499 1499 * If this zone or any other zone has members then loopback
1500 1500 * a copy.
1501 1501 */
1502 1502 if (ill_hasmembers_v4(ill, ipha->ipha_dst))
1503 1503 ixa->ixa_flags |= IXAF_LOOPBACK_COPY | IXAF_NO_HW_CKSUM;
1504 1504 } else if (ipst->ips_netstack->netstack_numzones > 1) {
1505 1505 /*
1506 1506 * This zone should not have a copy. But there are some other
1507 1507 * zones which might have members.
1508 1508 */
1509 1509 if (ill_hasmembers_otherzones_v4(ill, ipha->ipha_dst,
1510 1510 ixa->ixa_zoneid)) {
1511 1511 ixa->ixa_flags |= IXAF_NO_LOOP_ZONEID_SET;
1512 1512 ixa->ixa_no_loop_zoneid = ixa->ixa_zoneid;
1513 1513 ixa->ixa_flags |= IXAF_LOOPBACK_COPY | IXAF_NO_HW_CKSUM;
1514 1514 }
1515 1515 }
1516 1516
1517 1517 /*
1518 1518 * Unless ire_send_multirt_v4 or icmp_output_hdrincl already set a ttl,
1519 1519 * force the ttl to the IP_MULTICAST_TTL value
1520 1520 */
1521 1521 if (!(ixaflags & IXAF_NO_TTL_CHANGE)) {
1522 1522 ipha->ipha_ttl = ixa->ixa_multicast_ttl;
1523 1523 }
1524 1524
1525 1525 return (ire_send_wire_v4(ire, mp, ipha, ixa, identp));
1526 1526 }
1527 1527
1528 1528 /*
1529 1529 * ire_sendfn for IREs with RTF_MULTIRT
1530 1530 */
1531 1531 int
1532 1532 ire_send_multirt_v4(ire_t *ire, mblk_t *mp, void *iph_arg,
1533 1533 ip_xmit_attr_t *ixa, uint32_t *identp)
1534 1534 {
1535 1535 ipha_t *ipha = (ipha_t *)iph_arg;
1536 1536
1537 1537 multirt_check_v4(ire, ipha, ixa);
1538 1538
1539 1539 if (ire->ire_type & IRE_MULTICAST)
1540 1540 return (ire_send_multicast_v4(ire, mp, ipha, ixa, identp));
1541 1541 else if (ire->ire_type & IRE_BROADCAST)
1542 1542 return (ire_send_broadcast_v4(ire, mp, ipha, ixa, identp));
1543 1543 else
1544 1544 return (ire_send_wire_v4(ire, mp, ipha, ixa, identp));
1545 1545 }
1546 1546
1547 1547 /*
1548 1548 * ire_sendfn for IREs with RTF_REJECT/RTF_BLACKHOLE, including IRE_NOROUTE
1549 1549 */
1550 1550 int
1551 1551 ire_send_noroute_v4(ire_t *ire, mblk_t *mp, void *iph_arg,
1552 1552 ip_xmit_attr_t *ixa, uint32_t *identp)
1553 1553 {
1554 1554 ip_stack_t *ipst = ixa->ixa_ipst;
1555 1555 ipha_t *ipha = (ipha_t *)iph_arg;
1556 1556 ill_t *ill;
1557 1557 ip_recv_attr_t iras;
1558 1558 boolean_t dummy;
1559 1559
1560 1560 /* We assign an IP ident for nice errors */
1561 1561 ipha->ipha_ident = atomic_add_32_nv(identp, 1);
1562 1562
1563 1563 BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsOutNoRoutes);
1564 1564
1565 1565 if (ire->ire_type & IRE_NOROUTE) {
1566 1566 /* A lack of a route as opposed to RTF_REJECT|BLACKHOLE */
1567 1567 ip_rts_change(RTM_MISS, ipha->ipha_dst, 0, 0, 0, 0, 0, 0,
1568 1568 RTA_DST, ipst);
1569 1569 }
1570 1570
1571 1571 if (ire->ire_flags & RTF_BLACKHOLE) {
1572 1572 ip_drop_output("ipIfStatsOutNoRoutes RTF_BLACKHOLE", mp, NULL);
1573 1573 freemsg(mp);
1574 1574 /* No error even for local senders - silent blackhole */
1575 1575 return (0);
1576 1576 }
1577 1577 ip_drop_output("ipIfStatsOutNoRoutes RTF_REJECT", mp, NULL);
1578 1578
1579 1579 /*
1580 1580 * We need an ill_t for the ip_recv_attr_t even though this packet
1581 1581 * was never received and icmp_unreachable doesn't currently use
1582 1582 * ira_ill.
1583 1583 */
1584 1584 ill = ill_lookup_on_name("lo0", B_FALSE,
1585 1585 !(ixa->ixa_flags & IRAF_IS_IPV4), &dummy, ipst);
1586 1586 if (ill == NULL) {
1587 1587 freemsg(mp);
1588 1588 return (EHOSTUNREACH);
1589 1589 }
1590 1590
1591 1591 bzero(&iras, sizeof (iras));
1592 1592 /* Map ixa to ira including IPsec policies */
1593 1593 ipsec_out_to_in(ixa, ill, &iras);
1594 1594
1595 1595 if (ip_source_routed(ipha, ipst)) {
1596 1596 icmp_unreachable(mp, ICMP_SOURCE_ROUTE_FAILED, &iras);
1597 1597 } else {
1598 1598 icmp_unreachable(mp, ICMP_HOST_UNREACHABLE, &iras);
1599 1599 }
1600 1600 /* We moved any IPsec refs from ixa to iras */
1601 1601 ira_cleanup(&iras, B_FALSE);
1602 1602 ill_refrele(ill);
1603 1603 return (EHOSTUNREACH);
1604 1604 }
1605 1605
1606 1606 /*
1607 1607 * Calculate a checksum ignoring any hardware capabilities
1608 1608 *
1609 1609 * Returns B_FALSE if the packet was too short for the checksum. Caller
1610 1610 * should free and do stats.
1611 1611 */
1612 1612 static boolean_t
1613 1613 ip_output_sw_cksum_v4(mblk_t *mp, ipha_t *ipha, ip_xmit_attr_t *ixa)
1614 1614 {
1615 1615 ip_stack_t *ipst = ixa->ixa_ipst;
1616 1616 uint_t pktlen = ixa->ixa_pktlen;
1617 1617 uint16_t *cksump;
1618 1618 uint32_t cksum;
1619 1619 uint8_t protocol = ixa->ixa_protocol;
1620 1620 uint16_t ip_hdr_length = ixa->ixa_ip_hdr_length;
1621 1621 ipaddr_t dst = ipha->ipha_dst;
1622 1622 ipaddr_t src = ipha->ipha_src;
1623 1623
1624 1624 /* Just in case it contained garbage */
1625 1625 DB_CKSUMFLAGS(mp) &= ~HCK_FLAGS;
1626 1626
1627 1627 /*
1628 1628 * Calculate ULP checksum
1629 1629 */
1630 1630 if (protocol == IPPROTO_TCP) {
1631 1631 cksump = IPH_TCPH_CHECKSUMP(ipha, ip_hdr_length);
1632 1632 cksum = IP_TCP_CSUM_COMP;
1633 1633 } else if (protocol == IPPROTO_UDP) {
1634 1634 cksump = IPH_UDPH_CHECKSUMP(ipha, ip_hdr_length);
1635 1635 cksum = IP_UDP_CSUM_COMP;
1636 1636 } else if (protocol == IPPROTO_SCTP) {
1637 1637 sctp_hdr_t *sctph;
1638 1638
1639 1639 ASSERT(MBLKL(mp) >= (ip_hdr_length + sizeof (*sctph)));
1640 1640 sctph = (sctp_hdr_t *)(mp->b_rptr + ip_hdr_length);
↓ open down ↓ |
1640 lines elided |
↑ open up ↑ |
1641 1641 /*
1642 1642 * Zero out the checksum field to ensure proper
1643 1643 * checksum calculation.
1644 1644 */
1645 1645 sctph->sh_chksum = 0;
1646 1646 #ifdef DEBUG
1647 1647 if (!skip_sctp_cksum)
1648 1648 #endif
1649 1649 sctph->sh_chksum = sctp_cksum(mp, ip_hdr_length);
1650 1650 goto ip_hdr_cksum;
1651 + } else if (protocol == IPPROTO_DCCP) {
1652 + cksump = IPH_DCCPH_CHECKSUMP(ipha, ip_hdr_length);
1653 + cksum = IP_DCCP_CSUM_COMP;
1651 1654 } else {
1652 1655 goto ip_hdr_cksum;
1653 1656 }
1654 1657
1655 1658 /* ULP puts the checksum field is in the first mblk */
1656 1659 ASSERT(((uchar_t *)cksump) + sizeof (uint16_t) <= mp->b_wptr);
1657 1660
1658 1661 /*
1659 1662 * We accumulate the pseudo header checksum in cksum.
1660 1663 * This is pretty hairy code, so watch close. One
1661 1664 * thing to keep in mind is that UDP and TCP have
1662 1665 * stored their respective datagram lengths in their
1663 1666 * checksum fields. This lines things up real nice.
1664 1667 */
1665 1668 cksum += (dst >> 16) + (dst & 0xFFFF) + (src >> 16) + (src & 0xFFFF);
1666 1669
1667 1670 cksum = IP_CSUM(mp, ip_hdr_length, cksum);
1668 1671 /*
1669 1672 * For UDP/IPv4 a zero means that the packets wasn't checksummed.
1670 1673 * Change to 0xffff
1671 1674 */
1672 1675 if (protocol == IPPROTO_UDP && cksum == 0)
1673 1676 *cksump = ~cksum;
1674 1677 else
1675 1678 *cksump = cksum;
1676 1679
1677 1680 IP_STAT(ipst, ip_out_sw_cksum);
1678 1681 IP_STAT_UPDATE(ipst, ip_out_sw_cksum_bytes, pktlen);
1679 1682
1680 1683 ip_hdr_cksum:
1681 1684 /* Calculate IPv4 header checksum */
1682 1685 ipha->ipha_hdr_checksum = 0;
1683 1686 ipha->ipha_hdr_checksum = ip_csum_hdr(ipha);
1684 1687 return (B_TRUE);
1685 1688 }
1686 1689
1687 1690 /*
1688 1691 * Calculate the ULP checksum - try to use hardware.
1689 1692 * In the case of MULTIRT, broadcast or multicast the
1690 1693 * IXAF_NO_HW_CKSUM is set in which case we use software.
1691 1694 *
1692 1695 * If the hardware supports IP header checksum offload; then clear the
1693 1696 * contents of IP header checksum field as expected by NIC.
1694 1697 * Do this only if we offloaded either full or partial sum.
1695 1698 *
1696 1699 * Returns B_FALSE if the packet was too short for the checksum. Caller
1697 1700 * should free and do stats.
1698 1701 */
1699 1702 static boolean_t
1700 1703 ip_output_cksum_v4(iaflags_t ixaflags, mblk_t *mp, ipha_t *ipha,
1701 1704 ip_xmit_attr_t *ixa, ill_t *ill)
1702 1705 {
1703 1706 uint_t pktlen = ixa->ixa_pktlen;
1704 1707 uint16_t *cksump;
1705 1708 uint16_t hck_flags;
1706 1709 uint32_t cksum;
1707 1710 uint8_t protocol = ixa->ixa_protocol;
1708 1711 uint16_t ip_hdr_length = ixa->ixa_ip_hdr_length;
1709 1712
1710 1713 if ((ixaflags & IXAF_NO_HW_CKSUM) || !ILL_HCKSUM_CAPABLE(ill) ||
1711 1714 !dohwcksum) {
1712 1715 return (ip_output_sw_cksum_v4(mp, ipha, ixa));
1713 1716 }
1714 1717
1715 1718 /*
1716 1719 * Calculate ULP checksum. Note that we don't use cksump and cksum
1717 1720 * if the ill has FULL support.
1718 1721 */
1719 1722 if (protocol == IPPROTO_TCP) {
1720 1723 cksump = IPH_TCPH_CHECKSUMP(ipha, ip_hdr_length);
1721 1724 cksum = IP_TCP_CSUM_COMP; /* Pseudo-header cksum */
1722 1725 } else if (protocol == IPPROTO_UDP) {
1723 1726 cksump = IPH_UDPH_CHECKSUMP(ipha, ip_hdr_length);
1724 1727 cksum = IP_UDP_CSUM_COMP; /* Pseudo-header cksum */
1725 1728 } else if (protocol == IPPROTO_SCTP) {
1726 1729 sctp_hdr_t *sctph;
1727 1730
1728 1731 ASSERT(MBLKL(mp) >= (ip_hdr_length + sizeof (*sctph)));
1729 1732 sctph = (sctp_hdr_t *)(mp->b_rptr + ip_hdr_length);
↓ open down ↓ |
69 lines elided |
↑ open up ↑ |
1730 1733 /*
1731 1734 * Zero out the checksum field to ensure proper
1732 1735 * checksum calculation.
1733 1736 */
1734 1737 sctph->sh_chksum = 0;
1735 1738 #ifdef DEBUG
1736 1739 if (!skip_sctp_cksum)
1737 1740 #endif
1738 1741 sctph->sh_chksum = sctp_cksum(mp, ip_hdr_length);
1739 1742 goto ip_hdr_cksum;
1743 + } else if (protocol == IPPROTO_DCCP) {
1744 + cksump = IPH_DCCPH_CHECKSUMP(ipha, ip_hdr_length);
1745 + cksum = IP_DCCP_CSUM_COMP;
1740 1746 } else {
1741 1747 ip_hdr_cksum:
1742 1748 /* Calculate IPv4 header checksum */
1743 1749 ipha->ipha_hdr_checksum = 0;
1744 1750 ipha->ipha_hdr_checksum = ip_csum_hdr(ipha);
1745 1751 return (B_TRUE);
1746 1752 }
1747 1753
1748 1754 /* ULP puts the checksum field is in the first mblk */
1749 1755 ASSERT(((uchar_t *)cksump) + sizeof (uint16_t) <= mp->b_wptr);
1750 1756
1751 1757 /*
1752 1758 * Underlying interface supports hardware checksum offload for
1753 1759 * the payload; leave the payload checksum for the hardware to
1754 1760 * calculate. N.B: We only need to set up checksum info on the
1755 1761 * first mblk.
1756 1762 */
1757 1763 hck_flags = ill->ill_hcksum_capab->ill_hcksum_txflags;
1758 1764
1759 1765 DB_CKSUMFLAGS(mp) &= ~HCK_FLAGS;
1760 1766 if (hck_flags & HCKSUM_INET_FULL_V4) {
1761 1767 /*
1762 1768 * Hardware calculates pseudo-header, header and the
1763 1769 * payload checksums, so clear the checksum field in
1764 1770 * the protocol header.
1765 1771 */
1766 1772 *cksump = 0;
1767 1773 DB_CKSUMFLAGS(mp) |= HCK_FULLCKSUM;
1768 1774
1769 1775 ipha->ipha_hdr_checksum = 0;
1770 1776 if (hck_flags & HCKSUM_IPHDRCKSUM) {
1771 1777 DB_CKSUMFLAGS(mp) |= HCK_IPV4_HDRCKSUM;
1772 1778 } else {
1773 1779 ipha->ipha_hdr_checksum = ip_csum_hdr(ipha);
1774 1780 }
1775 1781 return (B_TRUE);
1776 1782 }
1777 1783 if ((hck_flags) & HCKSUM_INET_PARTIAL) {
1778 1784 ipaddr_t dst = ipha->ipha_dst;
1779 1785 ipaddr_t src = ipha->ipha_src;
1780 1786 /*
1781 1787 * Partial checksum offload has been enabled. Fill
1782 1788 * the checksum field in the protocol header with the
1783 1789 * pseudo-header checksum value.
1784 1790 *
1785 1791 * We accumulate the pseudo header checksum in cksum.
1786 1792 * This is pretty hairy code, so watch close. One
1787 1793 * thing to keep in mind is that UDP and TCP have
1788 1794 * stored their respective datagram lengths in their
1789 1795 * checksum fields. This lines things up real nice.
1790 1796 */
1791 1797 cksum += (dst >> 16) + (dst & 0xFFFF) +
1792 1798 (src >> 16) + (src & 0xFFFF);
1793 1799 cksum += *(cksump);
1794 1800 cksum = (cksum & 0xFFFF) + (cksum >> 16);
1795 1801 *(cksump) = (cksum & 0xFFFF) + (cksum >> 16);
1796 1802
1797 1803 /*
1798 1804 * Offsets are relative to beginning of IP header.
1799 1805 */
1800 1806 DB_CKSUMSTART(mp) = ip_hdr_length;
1801 1807 DB_CKSUMSTUFF(mp) = (uint8_t *)cksump - (uint8_t *)ipha;
1802 1808 DB_CKSUMEND(mp) = pktlen;
1803 1809 DB_CKSUMFLAGS(mp) |= HCK_PARTIALCKSUM;
1804 1810
1805 1811 ipha->ipha_hdr_checksum = 0;
1806 1812 if (hck_flags & HCKSUM_IPHDRCKSUM) {
1807 1813 DB_CKSUMFLAGS(mp) |= HCK_IPV4_HDRCKSUM;
1808 1814 } else {
1809 1815 ipha->ipha_hdr_checksum = ip_csum_hdr(ipha);
1810 1816 }
1811 1817 return (B_TRUE);
1812 1818 }
1813 1819 /* Hardware capabilities include neither full nor partial IPv4 */
1814 1820 return (ip_output_sw_cksum_v4(mp, ipha, ixa));
1815 1821 }
1816 1822
1817 1823 /*
1818 1824 * ire_sendfn for offlink and onlink destinations.
1819 1825 * Also called from the multicast, broadcast, multirt send functions.
1820 1826 *
1821 1827 * Assumes that the caller has a hold on the ire.
1822 1828 *
1823 1829 * This function doesn't care if the IRE just became condemned since that
1824 1830 * can happen at any time.
1825 1831 */
1826 1832 /* ARGSUSED */
1827 1833 int
1828 1834 ire_send_wire_v4(ire_t *ire, mblk_t *mp, void *iph_arg,
1829 1835 ip_xmit_attr_t *ixa, uint32_t *identp)
1830 1836 {
1831 1837 ip_stack_t *ipst = ixa->ixa_ipst;
1832 1838 ipha_t *ipha = (ipha_t *)iph_arg;
1833 1839 iaflags_t ixaflags = ixa->ixa_flags;
1834 1840 ill_t *ill;
1835 1841
1836 1842 ASSERT(ixa->ixa_nce != NULL);
1837 1843 ill = ixa->ixa_nce->nce_ill;
1838 1844
1839 1845 if (ixaflags & IXAF_DONTROUTE)
1840 1846 ipha->ipha_ttl = 1;
1841 1847
1842 1848 /*
1843 1849 * Assign an ident value for this packet. There could be other
1844 1850 * threads targeting the same destination, so we have to arrange
1845 1851 * for a atomic increment. Note that we use a 32-bit atomic add
1846 1852 * because it has better performance than its 16-bit sibling.
1847 1853 *
1848 1854 * Normally ixa_extra_ident is 0, but in the case of LSO it will
1849 1855 * be the number of TCP segments that the driver/hardware will
1850 1856 * extraly construct.
1851 1857 *
1852 1858 * If running in cluster mode and if the source address
1853 1859 * belongs to a replicated service then vector through
1854 1860 * cl_inet_ipident vector to allocate ip identifier
1855 1861 * NOTE: This is a contract private interface with the
1856 1862 * clustering group.
1857 1863 */
1858 1864 if (cl_inet_ipident != NULL) {
1859 1865 ipaddr_t src = ipha->ipha_src;
1860 1866 ipaddr_t dst = ipha->ipha_dst;
1861 1867 netstackid_t stack_id = ipst->ips_netstack->netstack_stackid;
1862 1868
1863 1869 ASSERT(cl_inet_isclusterwide != NULL);
1864 1870 if ((*cl_inet_isclusterwide)(stack_id, IPPROTO_IP,
1865 1871 AF_INET, (uint8_t *)(uintptr_t)src, NULL)) {
1866 1872 /*
1867 1873 * Note: not correct with LSO since we can't allocate
1868 1874 * ixa_extra_ident+1 consecutive values.
1869 1875 */
1870 1876 ipha->ipha_ident = (*cl_inet_ipident)(stack_id,
1871 1877 IPPROTO_IP, AF_INET, (uint8_t *)(uintptr_t)src,
1872 1878 (uint8_t *)(uintptr_t)dst, NULL);
1873 1879 } else {
1874 1880 ipha->ipha_ident = atomic_add_32_nv(identp,
1875 1881 ixa->ixa_extra_ident + 1);
1876 1882 }
1877 1883 } else {
1878 1884 ipha->ipha_ident = atomic_add_32_nv(identp,
1879 1885 ixa->ixa_extra_ident + 1);
1880 1886 }
1881 1887 #ifndef _BIG_ENDIAN
1882 1888 ipha->ipha_ident = htons(ipha->ipha_ident);
1883 1889 #endif
1884 1890
1885 1891 /*
1886 1892 * This might set b_band, thus the IPsec and fragmentation
1887 1893 * code in IP ensures that b_band is updated in the first mblk.
1888 1894 */
1889 1895 if (IPP_ENABLED(IPP_LOCAL_OUT, ipst)) {
1890 1896 /* ip_process translates an IS_UNDER_IPMP */
1891 1897 mp = ip_process(IPP_LOCAL_OUT, mp, ill, ill);
1892 1898 if (mp == NULL) {
1893 1899 /* ip_drop_packet and MIB done */
1894 1900 return (0); /* Might just be delayed */
1895 1901 }
1896 1902 }
1897 1903
1898 1904 /*
1899 1905 * Verify any IPv4 options.
1900 1906 *
1901 1907 * The presense of IP options also forces the network stack to
1902 1908 * calculate the checksum in software. This is because:
1903 1909 *
1904 1910 * Wrap around: certain partial-checksum NICs (eri, ce) limit
1905 1911 * the size of "start offset" width to 6-bit. This effectively
1906 1912 * sets the largest value of the offset to 64-bytes, starting
1907 1913 * from the MAC header. When the cumulative MAC and IP headers
1908 1914 * exceed such limit, the offset will wrap around. This causes
1909 1915 * the checksum to be calculated at the wrong place.
1910 1916 *
1911 1917 * IPv4 source routing: none of the full-checksum capable NICs
1912 1918 * is capable of correctly handling the IPv4 source-routing
1913 1919 * option for purposes of calculating the pseudo-header; the
1914 1920 * actual destination is different from the destination in the
1915 1921 * header which is that of the next-hop. (This case may not be
1916 1922 * true for NICs which can parse IPv6 extension headers, but
1917 1923 * we choose to simplify the implementation by not offloading
1918 1924 * checksum when they are present.)
1919 1925 */
1920 1926 if (!IS_SIMPLE_IPH(ipha)) {
1921 1927 ixaflags = ixa->ixa_flags |= IXAF_NO_HW_CKSUM;
1922 1928 /* An IS_UNDER_IPMP ill is ok here */
1923 1929 if (ip_output_options(mp, ipha, ixa, ill)) {
1924 1930 /* Packet has been consumed and ICMP error sent */
1925 1931 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards);
1926 1932 return (EINVAL);
1927 1933 }
1928 1934 }
1929 1935
1930 1936 /*
1931 1937 * To handle IPsec/iptun's labeling needs we need to tag packets
1932 1938 * while we still have ixa_tsl
1933 1939 */
1934 1940 if (is_system_labeled() && ixa->ixa_tsl != NULL &&
1935 1941 (ill->ill_mactype == DL_6TO4 || ill->ill_mactype == DL_IPV4 ||
1936 1942 ill->ill_mactype == DL_IPV6)) {
1937 1943 cred_t *newcr;
1938 1944
1939 1945 newcr = copycred_from_tslabel(ixa->ixa_cred, ixa->ixa_tsl,
1940 1946 KM_NOSLEEP);
1941 1947 if (newcr == NULL) {
1942 1948 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards);
1943 1949 ip_drop_output("ipIfStatsOutDiscards - newcr",
1944 1950 mp, ill);
1945 1951 freemsg(mp);
1946 1952 return (ENOBUFS);
1947 1953 }
1948 1954 mblk_setcred(mp, newcr, NOPID);
1949 1955 crfree(newcr); /* mblk_setcred did its own crhold */
1950 1956 }
1951 1957
1952 1958 if (ixa->ixa_pktlen > ixa->ixa_fragsize ||
1953 1959 (ixaflags & IXAF_IPSEC_SECURE)) {
1954 1960 uint32_t pktlen;
1955 1961
1956 1962 pktlen = ixa->ixa_pktlen;
1957 1963 if (ixaflags & IXAF_IPSEC_SECURE)
1958 1964 pktlen += ipsec_out_extra_length(ixa);
1959 1965
1960 1966 if (pktlen > IP_MAXPACKET)
1961 1967 return (EMSGSIZE);
1962 1968
1963 1969 if (ixaflags & IXAF_SET_ULP_CKSUM) {
1964 1970 /*
1965 1971 * Compute ULP checksum and IP header checksum
1966 1972 * using software
1967 1973 */
1968 1974 if (!ip_output_sw_cksum_v4(mp, ipha, ixa)) {
1969 1975 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards);
1970 1976 ip_drop_output("ipIfStatsOutDiscards", mp, ill);
1971 1977 freemsg(mp);
1972 1978 return (EINVAL);
1973 1979 }
1974 1980 } else {
1975 1981 /* Calculate IPv4 header checksum */
1976 1982 ipha->ipha_hdr_checksum = 0;
1977 1983 ipha->ipha_hdr_checksum = ip_csum_hdr(ipha);
1978 1984 }
1979 1985
1980 1986 /*
1981 1987 * If this packet would generate a icmp_frag_needed
1982 1988 * message, we need to handle it before we do the IPsec
1983 1989 * processing. Otherwise, we need to strip the IPsec
1984 1990 * headers before we send up the message to the ULPs
1985 1991 * which becomes messy and difficult.
1986 1992 *
1987 1993 * We check using IXAF_DONTFRAG. The DF bit in the header
1988 1994 * is not inspected - it will be copied to any generated
1989 1995 * fragments.
1990 1996 */
1991 1997 if ((pktlen > ixa->ixa_fragsize) &&
1992 1998 (ixaflags & IXAF_DONTFRAG)) {
1993 1999 /* Generate ICMP and return error */
1994 2000 ip_recv_attr_t iras;
1995 2001
1996 2002 DTRACE_PROBE4(ip4__fragsize__fail, uint_t, pktlen,
1997 2003 uint_t, ixa->ixa_fragsize, uint_t, ixa->ixa_pktlen,
1998 2004 uint_t, ixa->ixa_pmtu);
1999 2005
2000 2006 bzero(&iras, sizeof (iras));
2001 2007 /* Map ixa to ira including IPsec policies */
2002 2008 ipsec_out_to_in(ixa, ill, &iras);
2003 2009
2004 2010 ip_drop_output("ICMP_FRAG_NEEDED", mp, ill);
2005 2011 icmp_frag_needed(mp, ixa->ixa_fragsize, &iras);
2006 2012 /* We moved any IPsec refs from ixa to iras */
2007 2013 ira_cleanup(&iras, B_FALSE);
2008 2014 return (EMSGSIZE);
2009 2015 }
2010 2016 DTRACE_PROBE4(ip4__fragsize__ok, uint_t, pktlen,
2011 2017 uint_t, ixa->ixa_fragsize, uint_t, ixa->ixa_pktlen,
2012 2018 uint_t, ixa->ixa_pmtu);
2013 2019
2014 2020 if (ixaflags & IXAF_IPSEC_SECURE) {
2015 2021 /*
2016 2022 * Pass in sufficient information so that
2017 2023 * IPsec can determine whether to fragment, and
2018 2024 * which function to call after fragmentation.
2019 2025 */
2020 2026 return (ipsec_out_process(mp, ixa));
2021 2027 }
2022 2028 return (ip_fragment_v4(mp, ixa->ixa_nce, ixaflags,
2023 2029 ixa->ixa_pktlen, ixa->ixa_fragsize, ixa->ixa_xmit_hint,
2024 2030 ixa->ixa_zoneid, ixa->ixa_no_loop_zoneid,
2025 2031 ixa->ixa_postfragfn, &ixa->ixa_cookie));
2026 2032 }
2027 2033 if (ixaflags & IXAF_SET_ULP_CKSUM) {
2028 2034 /* Compute ULP checksum and IP header checksum */
2029 2035 /* An IS_UNDER_IPMP ill is ok here */
2030 2036 if (!ip_output_cksum_v4(ixaflags, mp, ipha, ixa, ill)) {
2031 2037 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards);
2032 2038 ip_drop_output("ipIfStatsOutDiscards", mp, ill);
2033 2039 freemsg(mp);
2034 2040 return (EINVAL);
2035 2041 }
2036 2042 } else {
2037 2043 /* Calculate IPv4 header checksum */
2038 2044 ipha->ipha_hdr_checksum = 0;
2039 2045 ipha->ipha_hdr_checksum = ip_csum_hdr(ipha);
2040 2046 }
2041 2047 return ((ixa->ixa_postfragfn)(mp, ixa->ixa_nce, ixaflags,
2042 2048 ixa->ixa_pktlen, ixa->ixa_xmit_hint, ixa->ixa_zoneid,
2043 2049 ixa->ixa_no_loop_zoneid, &ixa->ixa_cookie));
2044 2050 }
2045 2051
2046 2052 /*
2047 2053 * Send mp into ip_input
2048 2054 * Common for IPv4 and IPv6
2049 2055 */
2050 2056 void
2051 2057 ip_postfrag_loopback(mblk_t *mp, nce_t *nce, iaflags_t ixaflags,
2052 2058 uint_t pkt_len, zoneid_t nolzid)
2053 2059 {
2054 2060 rtc_t rtc;
2055 2061 ill_t *ill = nce->nce_ill;
2056 2062 ip_recv_attr_t iras; /* NOTE: No bzero for performance */
2057 2063 ncec_t *ncec;
2058 2064
2059 2065 ncec = nce->nce_common;
2060 2066 iras.ira_flags = IRAF_VERIFY_IP_CKSUM | IRAF_VERIFY_ULP_CKSUM |
2061 2067 IRAF_LOOPBACK | IRAF_L2SRC_LOOPBACK;
2062 2068 if (ncec->ncec_flags & NCE_F_BCAST)
2063 2069 iras.ira_flags |= IRAF_L2DST_BROADCAST;
2064 2070 else if (ncec->ncec_flags & NCE_F_MCAST)
2065 2071 iras.ira_flags |= IRAF_L2DST_MULTICAST;
2066 2072
2067 2073 iras.ira_free_flags = 0;
2068 2074 iras.ira_cred = NULL;
2069 2075 iras.ira_cpid = NOPID;
2070 2076 iras.ira_tsl = NULL;
2071 2077 iras.ira_zoneid = ALL_ZONES;
2072 2078 iras.ira_pktlen = pkt_len;
2073 2079 UPDATE_MIB(ill->ill_ip_mib, ipIfStatsHCInOctets, iras.ira_pktlen);
2074 2080 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInReceives);
2075 2081
2076 2082 if (ixaflags & IXAF_IS_IPV4)
2077 2083 iras.ira_flags |= IRAF_IS_IPV4;
2078 2084
2079 2085 iras.ira_ill = iras.ira_rill = ill;
2080 2086 iras.ira_ruifindex = ill->ill_phyint->phyint_ifindex;
2081 2087 iras.ira_rifindex = iras.ira_ruifindex;
2082 2088 iras.ira_mhip = NULL;
2083 2089
2084 2090 iras.ira_flags |= ixaflags & IAF_MASK;
2085 2091 iras.ira_no_loop_zoneid = nolzid;
2086 2092
2087 2093 /* Broadcast and multicast doesn't care about the squeue */
2088 2094 iras.ira_sqp = NULL;
2089 2095
2090 2096 rtc.rtc_ire = NULL;
2091 2097 if (ixaflags & IXAF_IS_IPV4) {
2092 2098 ipha_t *ipha = (ipha_t *)mp->b_rptr;
2093 2099
2094 2100 rtc.rtc_ipaddr = INADDR_ANY;
2095 2101
2096 2102 (*ill->ill_inputfn)(mp, ipha, &ipha->ipha_dst, &iras, &rtc);
2097 2103 if (rtc.rtc_ire != NULL) {
2098 2104 ASSERT(rtc.rtc_ipaddr != INADDR_ANY);
2099 2105 ire_refrele(rtc.rtc_ire);
2100 2106 }
2101 2107 } else {
2102 2108 ip6_t *ip6h = (ip6_t *)mp->b_rptr;
2103 2109
2104 2110 rtc.rtc_ip6addr = ipv6_all_zeros;
2105 2111
2106 2112 (*ill->ill_inputfn)(mp, ip6h, &ip6h->ip6_dst, &iras, &rtc);
2107 2113 if (rtc.rtc_ire != NULL) {
2108 2114 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&rtc.rtc_ip6addr));
2109 2115 ire_refrele(rtc.rtc_ire);
2110 2116 }
2111 2117 }
2112 2118 /* Any references to clean up? No hold on ira */
2113 2119 if (iras.ira_flags & (IRAF_IPSEC_SECURE|IRAF_SYSTEM_LABELED))
2114 2120 ira_cleanup(&iras, B_FALSE);
2115 2121 }
2116 2122
2117 2123 /*
2118 2124 * Post fragmentation function for IRE_MULTICAST and IRE_BROADCAST which
2119 2125 * looks at the IXAF_LOOPBACK_COPY flag.
2120 2126 * Common for IPv4 and IPv6.
2121 2127 *
2122 2128 * If the loopback copy fails (due to no memory) but we send the packet out
2123 2129 * on the wire we return no failure. Only in the case we supress the wire
2124 2130 * sending do we take the loopback failure into account.
2125 2131 *
2126 2132 * Note that we do not perform DTRACE_IP7 and FW_HOOKS for the looped back copy.
2127 2133 * Those operations are performed on this packet in ip_xmit() and it would
2128 2134 * be odd to do it twice for the same packet.
2129 2135 */
2130 2136 int
2131 2137 ip_postfrag_loopcheck(mblk_t *mp, nce_t *nce, iaflags_t ixaflags,
2132 2138 uint_t pkt_len, uint32_t xmit_hint, zoneid_t szone, zoneid_t nolzid,
2133 2139 uintptr_t *ixacookie)
2134 2140 {
2135 2141 ill_t *ill = nce->nce_ill;
2136 2142 int error = 0;
2137 2143
2138 2144 /*
2139 2145 * Check for IXAF_LOOPBACK_COPY - send a copy to ip as if the driver
2140 2146 * had looped it back
2141 2147 */
2142 2148 if (ixaflags & IXAF_LOOPBACK_COPY) {
2143 2149 mblk_t *mp1;
2144 2150
2145 2151 mp1 = copymsg(mp);
2146 2152 if (mp1 == NULL) {
2147 2153 /* Failed to deliver the loopback copy. */
2148 2154 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards);
2149 2155 ip_drop_output("ipIfStatsOutDiscards", mp, ill);
2150 2156 error = ENOBUFS;
2151 2157 } else {
2152 2158 ip_postfrag_loopback(mp1, nce, ixaflags, pkt_len,
2153 2159 nolzid);
2154 2160 }
2155 2161 }
2156 2162
2157 2163 /*
2158 2164 * If TTL = 0 then only do the loopback to this host i.e. we are
2159 2165 * done. We are also done if this was the
2160 2166 * loopback interface since it is sufficient
2161 2167 * to loopback one copy of a multicast packet.
2162 2168 */
2163 2169 if (ixaflags & IXAF_IS_IPV4) {
2164 2170 ipha_t *ipha = (ipha_t *)mp->b_rptr;
2165 2171
2166 2172 if (ipha->ipha_ttl == 0) {
2167 2173 ip_drop_output("multicast ipha_ttl not sent to wire",
2168 2174 mp, ill);
2169 2175 freemsg(mp);
2170 2176 return (error);
2171 2177 }
2172 2178 } else {
2173 2179 ip6_t *ip6h = (ip6_t *)mp->b_rptr;
2174 2180
2175 2181 if (ip6h->ip6_hops == 0) {
2176 2182 ip_drop_output("multicast ipha_ttl not sent to wire",
2177 2183 mp, ill);
2178 2184 freemsg(mp);
2179 2185 return (error);
2180 2186 }
2181 2187 }
2182 2188 if (nce->nce_ill->ill_wq == NULL) {
2183 2189 /* Loopback interface */
2184 2190 ip_drop_output("multicast on lo0 not sent to wire", mp, ill);
2185 2191 freemsg(mp);
2186 2192 return (error);
2187 2193 }
2188 2194
2189 2195 return (ip_xmit(mp, nce, ixaflags, pkt_len, xmit_hint, szone, 0,
2190 2196 ixacookie));
2191 2197 }
2192 2198
2193 2199 /*
2194 2200 * Post fragmentation function for RTF_MULTIRT routes.
2195 2201 * Since IRE_BROADCASTs can have RTF_MULTIRT, this function
2196 2202 * checks IXAF_LOOPBACK_COPY.
2197 2203 *
2198 2204 * If no packet is sent due to failures then we return an errno, but if at
2199 2205 * least one succeeded we return zero.
2200 2206 */
2201 2207 int
2202 2208 ip_postfrag_multirt_v4(mblk_t *mp, nce_t *nce, iaflags_t ixaflags,
2203 2209 uint_t pkt_len, uint32_t xmit_hint, zoneid_t szone, zoneid_t nolzid,
2204 2210 uintptr_t *ixacookie)
2205 2211 {
2206 2212 irb_t *irb;
2207 2213 ipha_t *ipha = (ipha_t *)mp->b_rptr;
2208 2214 ire_t *ire;
2209 2215 ire_t *ire1;
2210 2216 mblk_t *mp1;
2211 2217 nce_t *nce1;
2212 2218 ill_t *ill = nce->nce_ill;
2213 2219 ill_t *ill1;
2214 2220 ip_stack_t *ipst = ill->ill_ipst;
2215 2221 int error = 0;
2216 2222 int num_sent = 0;
2217 2223 int err;
2218 2224 uint_t ire_type;
2219 2225 ipaddr_t nexthop;
2220 2226
2221 2227 ASSERT(ixaflags & IXAF_IS_IPV4);
2222 2228
2223 2229 /* Check for IXAF_LOOPBACK_COPY */
2224 2230 if (ixaflags & IXAF_LOOPBACK_COPY) {
2225 2231 mblk_t *mp1;
2226 2232
2227 2233 mp1 = copymsg(mp);
2228 2234 if (mp1 == NULL) {
2229 2235 /* Failed to deliver the loopback copy. */
2230 2236 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards);
2231 2237 ip_drop_output("ipIfStatsOutDiscards", mp, ill);
2232 2238 error = ENOBUFS;
2233 2239 } else {
2234 2240 ip_postfrag_loopback(mp1, nce, ixaflags, pkt_len,
2235 2241 nolzid);
2236 2242 }
2237 2243 }
2238 2244
2239 2245 /*
2240 2246 * Loop over RTF_MULTIRT for ipha_dst in the same bucket. Send
2241 2247 * a copy to each one.
2242 2248 * Use the nce (nexthop) and ipha_dst to find the ire.
2243 2249 *
2244 2250 * MULTIRT is not designed to work with shared-IP zones thus we don't
2245 2251 * need to pass a zoneid or a label to the IRE lookup.
2246 2252 */
2247 2253 if (V4_PART_OF_V6(nce->nce_addr) == ipha->ipha_dst) {
2248 2254 /* Broadcast and multicast case */
2249 2255 ire = ire_ftable_lookup_v4(ipha->ipha_dst, 0, 0, 0,
2250 2256 NULL, ALL_ZONES, NULL, MATCH_IRE_DSTONLY, 0, ipst, NULL);
2251 2257 } else {
2252 2258 ipaddr_t v4addr = V4_PART_OF_V6(nce->nce_addr);
2253 2259
2254 2260 /* Unicast case */
2255 2261 ire = ire_ftable_lookup_v4(ipha->ipha_dst, 0, v4addr, 0,
2256 2262 NULL, ALL_ZONES, NULL, MATCH_IRE_GW, 0, ipst, NULL);
2257 2263 }
2258 2264
2259 2265 if (ire == NULL ||
2260 2266 (ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)) ||
2261 2267 !(ire->ire_flags & RTF_MULTIRT)) {
2262 2268 /* Drop */
2263 2269 ip_drop_output("ip_postfrag_multirt didn't find route",
2264 2270 mp, nce->nce_ill);
2265 2271 if (ire != NULL)
2266 2272 ire_refrele(ire);
2267 2273 return (ENETUNREACH);
2268 2274 }
2269 2275
2270 2276 irb = ire->ire_bucket;
2271 2277 irb_refhold(irb);
2272 2278 for (ire1 = irb->irb_ire; ire1 != NULL; ire1 = ire1->ire_next) {
2273 2279 /*
2274 2280 * For broadcast we can have a mixture of IRE_BROADCAST and
2275 2281 * IRE_HOST due to the manually added IRE_HOSTs that are used
2276 2282 * to trigger the creation of the special CGTP broadcast routes.
2277 2283 * Thus we have to skip if ire_type doesn't match the original.
2278 2284 */
2279 2285 if (IRE_IS_CONDEMNED(ire1) ||
2280 2286 !(ire1->ire_flags & RTF_MULTIRT) ||
2281 2287 ire1->ire_type != ire->ire_type)
2282 2288 continue;
2283 2289
2284 2290 /* Do the ire argument one after the loop */
2285 2291 if (ire1 == ire)
2286 2292 continue;
2287 2293
2288 2294 ill1 = ire_nexthop_ill(ire1);
2289 2295 if (ill1 == NULL) {
2290 2296 /*
2291 2297 * This ire might not have been picked by
2292 2298 * ire_route_recursive, in which case ire_dep might
2293 2299 * not have been setup yet.
2294 2300 * We kick ire_route_recursive to try to resolve
2295 2301 * starting at ire1.
2296 2302 */
2297 2303 ire_t *ire2;
2298 2304 uint_t match_flags = MATCH_IRE_DSTONLY;
2299 2305
2300 2306 if (ire1->ire_ill != NULL)
2301 2307 match_flags |= MATCH_IRE_ILL;
2302 2308 ire2 = ire_route_recursive_impl_v4(ire1,
2303 2309 ire1->ire_addr, ire1->ire_type, ire1->ire_ill,
2304 2310 ire1->ire_zoneid, NULL, match_flags,
2305 2311 IRR_ALLOCATE, 0, ipst, NULL, NULL, NULL);
2306 2312 if (ire2 != NULL)
2307 2313 ire_refrele(ire2);
2308 2314 ill1 = ire_nexthop_ill(ire1);
2309 2315 }
2310 2316
2311 2317 if (ill1 == NULL) {
2312 2318 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards);
2313 2319 ip_drop_output("ipIfStatsOutDiscards - no ill",
2314 2320 mp, ill);
2315 2321 error = ENETUNREACH;
2316 2322 continue;
2317 2323 }
2318 2324
2319 2325 /* Pick the addr and type to use for arp_nce_init */
2320 2326 if (nce->nce_common->ncec_flags & NCE_F_BCAST) {
2321 2327 ire_type = IRE_BROADCAST;
2322 2328 nexthop = ire1->ire_gateway_addr;
2323 2329 } else if (nce->nce_common->ncec_flags & NCE_F_MCAST) {
2324 2330 ire_type = IRE_MULTICAST;
2325 2331 nexthop = ipha->ipha_dst;
2326 2332 } else {
2327 2333 ire_type = ire1->ire_type; /* Doesn't matter */
2328 2334 nexthop = ire1->ire_gateway_addr;
2329 2335 }
2330 2336
2331 2337 /* If IPMP meta or under, then we just drop */
2332 2338 if (ill1->ill_grp != NULL) {
2333 2339 BUMP_MIB(ill1->ill_ip_mib, ipIfStatsOutDiscards);
2334 2340 ip_drop_output("ipIfStatsOutDiscards - IPMP",
2335 2341 mp, ill1);
2336 2342 ill_refrele(ill1);
2337 2343 error = ENETUNREACH;
2338 2344 continue;
2339 2345 }
2340 2346
2341 2347 nce1 = arp_nce_init(ill1, nexthop, ire_type);
2342 2348 if (nce1 == NULL) {
2343 2349 BUMP_MIB(ill1->ill_ip_mib, ipIfStatsOutDiscards);
2344 2350 ip_drop_output("ipIfStatsOutDiscards - no nce",
2345 2351 mp, ill1);
2346 2352 ill_refrele(ill1);
2347 2353 error = ENETUNREACH;
2348 2354 continue;
2349 2355 }
2350 2356 mp1 = copymsg(mp);
2351 2357 if (mp1 == NULL) {
2352 2358 BUMP_MIB(ill1->ill_ip_mib, ipIfStatsOutDiscards);
2353 2359 ip_drop_output("ipIfStatsOutDiscards", mp, ill1);
2354 2360 nce_refrele(nce1);
2355 2361 ill_refrele(ill1);
2356 2362 error = ENOBUFS;
2357 2363 continue;
2358 2364 }
2359 2365 /* Preserve HW checksum for this copy */
2360 2366 DB_CKSUMSTART(mp1) = DB_CKSUMSTART(mp);
2361 2367 DB_CKSUMSTUFF(mp1) = DB_CKSUMSTUFF(mp);
2362 2368 DB_CKSUMEND(mp1) = DB_CKSUMEND(mp);
2363 2369 DB_CKSUMFLAGS(mp1) = DB_CKSUMFLAGS(mp);
2364 2370 DB_LSOMSS(mp1) = DB_LSOMSS(mp);
2365 2371
2366 2372 ire1->ire_ob_pkt_count++;
2367 2373 err = ip_xmit(mp1, nce1, ixaflags, pkt_len, xmit_hint, szone,
2368 2374 0, ixacookie);
2369 2375 if (err == 0)
2370 2376 num_sent++;
2371 2377 else
2372 2378 error = err;
2373 2379 nce_refrele(nce1);
2374 2380 ill_refrele(ill1);
2375 2381 }
2376 2382 irb_refrele(irb);
2377 2383 ire_refrele(ire);
2378 2384 /* Finally, the main one */
2379 2385 err = ip_xmit(mp, nce, ixaflags, pkt_len, xmit_hint, szone, 0,
2380 2386 ixacookie);
2381 2387 if (err == 0)
2382 2388 num_sent++;
2383 2389 else
2384 2390 error = err;
2385 2391 if (num_sent > 0)
2386 2392 return (0);
2387 2393 else
2388 2394 return (error);
2389 2395 }
2390 2396
2391 2397 /*
2392 2398 * Verify local connectivity. This check is called by ULP fusion code.
2393 2399 * The generation number on an IRE_LOCAL or IRE_LOOPBACK only changes if
2394 2400 * the interface is brought down and back up. So we simply fail the local
2395 2401 * process. The caller, TCP Fusion, should unfuse the connection.
2396 2402 */
2397 2403 boolean_t
2398 2404 ip_output_verify_local(ip_xmit_attr_t *ixa)
2399 2405 {
2400 2406 ire_t *ire = ixa->ixa_ire;
2401 2407
2402 2408 if (!(ire->ire_type & (IRE_LOCAL | IRE_LOOPBACK)))
2403 2409 return (B_FALSE);
2404 2410
2405 2411 return (ixa->ixa_ire->ire_generation == ixa->ixa_ire_generation);
2406 2412 }
2407 2413
2408 2414 /*
2409 2415 * Local process for ULP loopback, TCP Fusion. Handle both IPv4 and IPv6.
2410 2416 *
2411 2417 * The caller must call ip_output_verify_local() first. This function handles
2412 2418 * IPobs, FW_HOOKS, and/or IPsec cases sequentially.
2413 2419 */
2414 2420 mblk_t *
2415 2421 ip_output_process_local(mblk_t *mp, ip_xmit_attr_t *ixa, boolean_t hooks_out,
2416 2422 boolean_t hooks_in, conn_t *peer_connp)
2417 2423 {
2418 2424 ill_t *ill = ixa->ixa_ire->ire_ill;
2419 2425 ipha_t *ipha = NULL;
2420 2426 ip6_t *ip6h = NULL;
2421 2427 ip_stack_t *ipst = ixa->ixa_ipst;
2422 2428 iaflags_t ixaflags = ixa->ixa_flags;
2423 2429 ip_recv_attr_t iras;
2424 2430 int error;
2425 2431
2426 2432 ASSERT(mp != NULL);
2427 2433
2428 2434 if (ixaflags & IXAF_IS_IPV4) {
2429 2435 ipha = (ipha_t *)mp->b_rptr;
2430 2436
2431 2437 /*
2432 2438 * If a callback is enabled then we need to know the
2433 2439 * source and destination zoneids for the packet. We already
2434 2440 * have those handy.
2435 2441 */
2436 2442 if (ipst->ips_ip4_observe.he_interested) {
2437 2443 zoneid_t szone, dzone;
2438 2444 zoneid_t stackzoneid;
2439 2445
2440 2446 stackzoneid = netstackid_to_zoneid(
2441 2447 ipst->ips_netstack->netstack_stackid);
2442 2448
2443 2449 if (stackzoneid == GLOBAL_ZONEID) {
2444 2450 /* Shared-IP zone */
2445 2451 dzone = ixa->ixa_ire->ire_zoneid;
2446 2452 szone = ixa->ixa_zoneid;
2447 2453 } else {
2448 2454 szone = dzone = stackzoneid;
2449 2455 }
2450 2456 ipobs_hook(mp, IPOBS_HOOK_LOCAL, szone, dzone, ill,
2451 2457 ipst);
2452 2458 }
2453 2459 DTRACE_IP7(send, mblk_t *, mp, conn_t *, NULL, void_ip_t *,
2454 2460 ipha, __dtrace_ipsr_ill_t *, ill, ipha_t *, ipha, ip6_t *,
2455 2461 NULL, int, 1);
2456 2462
2457 2463 /* FW_HOOKS: LOOPBACK_OUT */
2458 2464 if (hooks_out) {
2459 2465 DTRACE_PROBE4(ip4__loopback__out__start, ill_t *, NULL,
2460 2466 ill_t *, ill, ipha_t *, ipha, mblk_t *, mp);
2461 2467 FW_HOOKS(ipst->ips_ip4_loopback_out_event,
2462 2468 ipst->ips_ipv4firewall_loopback_out,
2463 2469 NULL, ill, ipha, mp, mp, 0, ipst, error);
2464 2470 DTRACE_PROBE1(ip4__loopback__out__end, mblk_t *, mp);
2465 2471 }
2466 2472 if (mp == NULL)
2467 2473 return (NULL);
2468 2474
2469 2475 /* FW_HOOKS: LOOPBACK_IN */
2470 2476 if (hooks_in) {
2471 2477 DTRACE_PROBE4(ip4__loopback__in__start, ill_t *, ill,
2472 2478 ill_t *, NULL, ipha_t *, ipha, mblk_t *, mp);
2473 2479 FW_HOOKS(ipst->ips_ip4_loopback_in_event,
2474 2480 ipst->ips_ipv4firewall_loopback_in,
2475 2481 ill, NULL, ipha, mp, mp, 0, ipst, error);
2476 2482 DTRACE_PROBE1(ip4__loopback__in__end, mblk_t *, mp);
2477 2483 }
2478 2484 if (mp == NULL)
2479 2485 return (NULL);
2480 2486
2481 2487 DTRACE_IP7(receive, mblk_t *, mp, conn_t *, NULL, void_ip_t *,
2482 2488 ipha, __dtrace_ipsr_ill_t *, ill, ipha_t *, ipha, ip6_t *,
2483 2489 NULL, int, 1);
2484 2490
2485 2491 /* Inbound IPsec polocies */
2486 2492 if (peer_connp != NULL) {
2487 2493 /* Map ixa to ira including IPsec policies. */
2488 2494 ipsec_out_to_in(ixa, ill, &iras);
2489 2495 mp = ipsec_check_inbound_policy(mp, peer_connp, ipha,
2490 2496 NULL, &iras);
2491 2497 }
2492 2498 } else {
2493 2499 ip6h = (ip6_t *)mp->b_rptr;
2494 2500
2495 2501 /*
2496 2502 * If a callback is enabled then we need to know the
2497 2503 * source and destination zoneids for the packet. We already
2498 2504 * have those handy.
2499 2505 */
2500 2506 if (ipst->ips_ip6_observe.he_interested) {
2501 2507 zoneid_t szone, dzone;
2502 2508 zoneid_t stackzoneid;
2503 2509
2504 2510 stackzoneid = netstackid_to_zoneid(
2505 2511 ipst->ips_netstack->netstack_stackid);
2506 2512
2507 2513 if (stackzoneid == GLOBAL_ZONEID) {
2508 2514 /* Shared-IP zone */
2509 2515 dzone = ixa->ixa_ire->ire_zoneid;
2510 2516 szone = ixa->ixa_zoneid;
2511 2517 } else {
2512 2518 szone = dzone = stackzoneid;
2513 2519 }
2514 2520 ipobs_hook(mp, IPOBS_HOOK_LOCAL, szone, dzone, ill,
2515 2521 ipst);
2516 2522 }
2517 2523 DTRACE_IP7(send, mblk_t *, mp, conn_t *, NULL, void_ip_t *,
2518 2524 ip6h, __dtrace_ipsr_ill_t *, ill, ipha_t *, NULL, ip6_t *,
2519 2525 ip6h, int, 1);
2520 2526
2521 2527 /* FW_HOOKS: LOOPBACK_OUT */
2522 2528 if (hooks_out) {
2523 2529 DTRACE_PROBE4(ip6__loopback__out__start, ill_t *, NULL,
2524 2530 ill_t *, ill, ip6_t *, ip6h, mblk_t *, mp);
2525 2531 FW_HOOKS6(ipst->ips_ip6_loopback_out_event,
2526 2532 ipst->ips_ipv6firewall_loopback_out,
2527 2533 NULL, ill, ip6h, mp, mp, 0, ipst, error);
2528 2534 DTRACE_PROBE1(ip6__loopback__out__end, mblk_t *, mp);
2529 2535 }
2530 2536 if (mp == NULL)
2531 2537 return (NULL);
2532 2538
2533 2539 /* FW_HOOKS: LOOPBACK_IN */
2534 2540 if (hooks_in) {
2535 2541 DTRACE_PROBE4(ip6__loopback__in__start, ill_t *, ill,
2536 2542 ill_t *, NULL, ip6_t *, ip6h, mblk_t *, mp);
2537 2543 FW_HOOKS6(ipst->ips_ip6_loopback_in_event,
2538 2544 ipst->ips_ipv6firewall_loopback_in,
2539 2545 ill, NULL, ip6h, mp, mp, 0, ipst, error);
2540 2546 DTRACE_PROBE1(ip6__loopback__in__end, mblk_t *, mp);
2541 2547 }
2542 2548 if (mp == NULL)
2543 2549 return (NULL);
2544 2550
2545 2551 DTRACE_IP7(receive, mblk_t *, mp, conn_t *, NULL, void_ip_t *,
2546 2552 ip6h, __dtrace_ipsr_ill_t *, ill, ipha_t *, NULL, ip6_t *,
2547 2553 ip6h, int, 1);
2548 2554
2549 2555 /* Inbound IPsec polocies */
2550 2556 if (peer_connp != NULL) {
2551 2557 /* Map ixa to ira including IPsec policies. */
2552 2558 ipsec_out_to_in(ixa, ill, &iras);
2553 2559 mp = ipsec_check_inbound_policy(mp, peer_connp, NULL,
2554 2560 ip6h, &iras);
2555 2561 }
2556 2562 }
2557 2563
2558 2564 if (mp == NULL) {
2559 2565 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
2560 2566 ip_drop_input("ipIfStatsInDiscards", NULL, ill);
2561 2567 }
2562 2568
2563 2569 return (mp);
2564 2570 }
↓ open down ↓ |
815 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX