1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 23 */ 24 25 /* 26 * iptun - IP Tunneling Driver 27 * 28 * This module is a GLDv3 driver that implements virtual datalinks over IP 29 * (a.k.a, IP tunneling). The datalinks are managed through a dld ioctl 30 * interface (see iptun_ctl.c), and registered with GLDv3 using 31 * mac_register(). It implements the logic for various forms of IP (IPv4 or 32 * IPv6) encapsulation within IP (IPv4 or IPv6) by interacting with the ip 33 * module below it. Each virtual IP tunnel datalink has a conn_t associated 34 * with it representing the "outer" IP connection. 35 * 36 * The module implements the following locking semantics: 37 * 38 * Lookups and deletions in iptun_hash are synchronized using iptun_hash_lock. 39 * See comments above iptun_hash_lock for details. 40 * 41 * No locks are ever held while calling up to GLDv3. The general architecture 42 * of GLDv3 requires this, as the mac perimeter (essentially a lock) for a 43 * given link will be held while making downcalls (iptun_m_*() callbacks). 44 * Because we need to hold locks while handling downcalls, holding these locks 45 * while issuing upcalls results in deadlock scenarios. See the block comment 46 * above iptun_task_cb() for details on how we safely issue upcalls without 47 * holding any locks. 48 * 49 * The contents of each iptun_t is protected by an iptun_mutex which is held 50 * in iptun_enter() (called by iptun_enter_by_linkid()), and exited in 51 * iptun_exit(). 52 * 53 * See comments in iptun_delete() and iptun_free() for details on how the 54 * iptun_t is deleted safely. 55 */ 56 57 #include <sys/types.h> 58 #include <sys/kmem.h> 59 #include <sys/errno.h> 60 #include <sys/modhash.h> 61 #include <sys/list.h> 62 #include <sys/strsun.h> 63 #include <sys/file.h> 64 #include <sys/systm.h> 65 #include <sys/tihdr.h> 66 #include <sys/param.h> 67 #include <sys/mac_provider.h> 68 #include <sys/mac_ipv4.h> 69 #include <sys/mac_ipv6.h> 70 #include <sys/mac_6to4.h> 71 #include <sys/tsol/tnet.h> 72 #include <sys/sunldi.h> 73 #include <netinet/in.h> 74 #include <netinet/ip6.h> 75 #include <inet/ip.h> 76 #include <inet/ip_ire.h> 77 #include <inet/ipsec_impl.h> 78 #include <sys/tsol/label.h> 79 #include <sys/tsol/tnet.h> 80 #include <inet/iptun.h> 81 #include "iptun_impl.h" 82 83 /* Do the tunnel type and address family match? */ 84 #define IPTUN_ADDR_MATCH(iptun_type, family) \ 85 ((iptun_type == IPTUN_TYPE_IPV4 && family == AF_INET) || \ 86 (iptun_type == IPTUN_TYPE_IPV6 && family == AF_INET6) || \ 87 (iptun_type == IPTUN_TYPE_6TO4 && family == AF_INET)) 88 89 #define IPTUN_HASH_KEY(key) ((mod_hash_key_t)(uintptr_t)(key)) 90 91 #define IPTUN_MIN_IPV4_MTU 576 /* ip.h still uses 68 (!) */ 92 #define IPTUN_MIN_IPV6_MTU IPV6_MIN_MTU 93 #define IPTUN_MAX_IPV4_MTU (IP_MAXPACKET - sizeof (ipha_t)) 94 #define IPTUN_MAX_IPV6_MTU (IP_MAXPACKET - sizeof (ip6_t) - \ 95 sizeof (iptun_encaplim_t)) 96 97 #define IPTUN_MIN_HOPLIMIT 1 98 #define IPTUN_MAX_HOPLIMIT UINT8_MAX 99 100 #define IPTUN_MIN_ENCAPLIMIT 0 101 #define IPTUN_MAX_ENCAPLIMIT UINT8_MAX 102 103 #define IPTUN_IPSEC_REQ_MASK (IPSEC_PREF_REQUIRED | IPSEC_PREF_NEVER) 104 105 static iptun_encaplim_t iptun_encaplim_init = { 106 { IPPROTO_NONE, 0 }, 107 IP6OPT_TUNNEL_LIMIT, 108 1, 109 IPTUN_DEFAULT_ENCAPLIMIT, /* filled in with actual value later */ 110 IP6OPT_PADN, 111 1, 112 0 113 }; 114 115 /* 116 * Table containing per-iptun-type information. 117 * Since IPv6 can run over all of these we have the IPv6 min as the min MTU. 118 */ 119 static iptun_typeinfo_t iptun_type_table[] = { 120 { IPTUN_TYPE_IPV4, MAC_PLUGIN_IDENT_IPV4, IPV4_VERSION, 121 IPTUN_MIN_IPV6_MTU, IPTUN_MAX_IPV4_MTU, B_TRUE }, 122 { IPTUN_TYPE_IPV6, MAC_PLUGIN_IDENT_IPV6, IPV6_VERSION, 123 IPTUN_MIN_IPV6_MTU, IPTUN_MAX_IPV6_MTU, B_TRUE }, 124 { IPTUN_TYPE_6TO4, MAC_PLUGIN_IDENT_6TO4, IPV4_VERSION, 125 IPTUN_MIN_IPV6_MTU, IPTUN_MAX_IPV4_MTU, B_FALSE }, 126 { IPTUN_TYPE_UNKNOWN, NULL, 0, 0, 0, B_FALSE } 127 }; 128 129 /* 130 * iptun_hash is an iptun_t lookup table by link ID protected by 131 * iptun_hash_lock. While the hash table's integrity is maintained via 132 * internal locking in the mod_hash_*() functions, we need additional locking 133 * so that an iptun_t cannot be deleted after a hash lookup has returned an 134 * iptun_t and before iptun_lock has been entered. As such, we use 135 * iptun_hash_lock when doing lookups and removals from iptun_hash. 136 */ 137 mod_hash_t *iptun_hash; 138 static kmutex_t iptun_hash_lock; 139 140 static uint_t iptun_tunnelcount; /* total for all stacks */ 141 kmem_cache_t *iptun_cache; 142 ddi_taskq_t *iptun_taskq; 143 144 typedef enum { 145 IPTUN_TASK_MTU_UPDATE, /* tell mac about new tunnel link MTU */ 146 IPTUN_TASK_LADDR_UPDATE, /* tell mac about new local address */ 147 IPTUN_TASK_RADDR_UPDATE, /* tell mac about new remote address */ 148 IPTUN_TASK_LINK_UPDATE, /* tell mac about new link state */ 149 IPTUN_TASK_PDATA_UPDATE /* tell mac about updated plugin data */ 150 } iptun_task_t; 151 152 typedef struct iptun_task_data_s { 153 iptun_task_t itd_task; 154 datalink_id_t itd_linkid; 155 } iptun_task_data_t; 156 157 static void iptun_task_dispatch(iptun_t *, iptun_task_t); 158 static int iptun_enter(iptun_t *); 159 static void iptun_exit(iptun_t *); 160 static void iptun_headergen(iptun_t *, boolean_t); 161 static void iptun_drop_pkt(mblk_t *, uint64_t *); 162 static void iptun_input(void *, mblk_t *, void *, ip_recv_attr_t *); 163 static void iptun_input_icmp(void *, mblk_t *, void *, ip_recv_attr_t *); 164 static void iptun_output(iptun_t *, mblk_t *); 165 static uint32_t iptun_get_maxmtu(iptun_t *, ip_xmit_attr_t *, uint32_t); 166 static uint32_t iptun_update_mtu(iptun_t *, ip_xmit_attr_t *, uint32_t); 167 static uint32_t iptun_get_dst_pmtu(iptun_t *, ip_xmit_attr_t *); 168 static void iptun_update_dst_pmtu(iptun_t *, ip_xmit_attr_t *); 169 static int iptun_setladdr(iptun_t *, const struct sockaddr_storage *); 170 171 static void iptun_output_6to4(iptun_t *, mblk_t *); 172 static void iptun_output_common(iptun_t *, ip_xmit_attr_t *, mblk_t *); 173 static boolean_t iptun_verifyicmp(conn_t *, void *, icmph_t *, icmp6_t *, 174 ip_recv_attr_t *); 175 176 static void iptun_notify(void *, ip_xmit_attr_t *, ixa_notify_type_t, 177 ixa_notify_arg_t); 178 179 static mac_callbacks_t iptun_m_callbacks; 180 181 static int 182 iptun_m_getstat(void *arg, uint_t stat, uint64_t *val) 183 { 184 iptun_t *iptun = arg; 185 int err = 0; 186 187 switch (stat) { 188 case MAC_STAT_IERRORS: 189 *val = iptun->iptun_ierrors; 190 break; 191 case MAC_STAT_OERRORS: 192 *val = iptun->iptun_oerrors; 193 break; 194 case MAC_STAT_RBYTES: 195 *val = iptun->iptun_rbytes; 196 break; 197 case MAC_STAT_IPACKETS: 198 *val = iptun->iptun_ipackets; 199 break; 200 case MAC_STAT_OBYTES: 201 *val = iptun->iptun_obytes; 202 break; 203 case MAC_STAT_OPACKETS: 204 *val = iptun->iptun_opackets; 205 break; 206 case MAC_STAT_NORCVBUF: 207 *val = iptun->iptun_norcvbuf; 208 break; 209 case MAC_STAT_NOXMTBUF: 210 *val = iptun->iptun_noxmtbuf; 211 break; 212 default: 213 err = ENOTSUP; 214 } 215 216 return (err); 217 } 218 219 static int 220 iptun_m_start(void *arg) 221 { 222 iptun_t *iptun = arg; 223 int err; 224 225 if ((err = iptun_enter(iptun)) == 0) { 226 iptun->iptun_flags |= IPTUN_MAC_STARTED; 227 iptun_task_dispatch(iptun, IPTUN_TASK_LINK_UPDATE); 228 iptun_exit(iptun); 229 } 230 return (err); 231 } 232 233 static void 234 iptun_m_stop(void *arg) 235 { 236 iptun_t *iptun = arg; 237 238 if (iptun_enter(iptun) == 0) { 239 iptun->iptun_flags &= ~IPTUN_MAC_STARTED; 240 iptun_task_dispatch(iptun, IPTUN_TASK_LINK_UPDATE); 241 iptun_exit(iptun); 242 } 243 } 244 245 /* 246 * iptun_m_setpromisc() does nothing and always succeeds. This is because a 247 * tunnel data-link only ever receives packets that are destined exclusively 248 * for the local address of the tunnel. 249 */ 250 /* ARGSUSED */ 251 static int 252 iptun_m_setpromisc(void *arg, boolean_t on) 253 { 254 return (0); 255 } 256 257 /* ARGSUSED */ 258 static int 259 iptun_m_multicst(void *arg, boolean_t add, const uint8_t *addrp) 260 { 261 return (ENOTSUP); 262 } 263 264 /* 265 * iptun_m_unicst() sets the local address. 266 */ 267 /* ARGSUSED */ 268 static int 269 iptun_m_unicst(void *arg, const uint8_t *addrp) 270 { 271 iptun_t *iptun = arg; 272 int err; 273 struct sockaddr_storage ss; 274 struct sockaddr_in *sin; 275 struct sockaddr_in6 *sin6; 276 277 if ((err = iptun_enter(iptun)) == 0) { 278 switch (iptun->iptun_typeinfo->iti_ipvers) { 279 case IPV4_VERSION: 280 sin = (struct sockaddr_in *)&ss; 281 sin->sin_family = AF_INET; 282 bcopy(addrp, &sin->sin_addr, sizeof (in_addr_t)); 283 break; 284 case IPV6_VERSION: 285 sin6 = (struct sockaddr_in6 *)&ss; 286 sin6->sin6_family = AF_INET6; 287 bcopy(addrp, &sin6->sin6_addr, sizeof (in6_addr_t)); 288 break; 289 default: 290 ASSERT(0); 291 } 292 err = iptun_setladdr(iptun, &ss); 293 iptun_exit(iptun); 294 } 295 return (err); 296 } 297 298 static mblk_t * 299 iptun_m_tx(void *arg, mblk_t *mpchain) 300 { 301 mblk_t *mp, *nmp; 302 iptun_t *iptun = arg; 303 304 if (!IS_IPTUN_RUNNING(iptun)) { 305 iptun_drop_pkt(mpchain, &iptun->iptun_noxmtbuf); 306 return (NULL); 307 } 308 309 for (mp = mpchain; mp != NULL; mp = nmp) { 310 nmp = mp->b_next; 311 mp->b_next = NULL; 312 iptun_output(iptun, mp); 313 } 314 315 return (NULL); 316 } 317 318 /* ARGSUSED */ 319 static int 320 iptun_m_setprop(void *barg, const char *pr_name, mac_prop_id_t pr_num, 321 uint_t pr_valsize, const void *pr_val) 322 { 323 iptun_t *iptun = barg; 324 uint32_t value = *(uint32_t *)pr_val; 325 int err; 326 327 /* 328 * We need to enter this iptun_t since we'll be modifying the outer 329 * header. 330 */ 331 if ((err = iptun_enter(iptun)) != 0) 332 return (err); 333 334 switch (pr_num) { 335 case MAC_PROP_IPTUN_HOPLIMIT: 336 if (value < IPTUN_MIN_HOPLIMIT || value > IPTUN_MAX_HOPLIMIT) { 337 err = EINVAL; 338 break; 339 } 340 if (value != iptun->iptun_hoplimit) { 341 iptun->iptun_hoplimit = (uint8_t)value; 342 iptun_headergen(iptun, B_TRUE); 343 } 344 break; 345 case MAC_PROP_IPTUN_ENCAPLIMIT: 346 if (iptun->iptun_typeinfo->iti_type != IPTUN_TYPE_IPV6 || 347 value > IPTUN_MAX_ENCAPLIMIT) { 348 err = EINVAL; 349 break; 350 } 351 if (value != iptun->iptun_encaplimit) { 352 iptun->iptun_encaplimit = (uint8_t)value; 353 iptun_headergen(iptun, B_TRUE); 354 } 355 break; 356 case MAC_PROP_MTU: { 357 uint32_t maxmtu = iptun_get_maxmtu(iptun, NULL, 0); 358 359 if (value < iptun->iptun_typeinfo->iti_minmtu || 360 value > maxmtu) { 361 err = EINVAL; 362 break; 363 } 364 iptun->iptun_flags |= IPTUN_FIXED_MTU; 365 if (value != iptun->iptun_mtu) { 366 iptun->iptun_mtu = value; 367 iptun_task_dispatch(iptun, IPTUN_TASK_MTU_UPDATE); 368 } 369 break; 370 } 371 default: 372 err = EINVAL; 373 } 374 iptun_exit(iptun); 375 return (err); 376 } 377 378 /* ARGSUSED */ 379 static int 380 iptun_m_getprop(void *barg, const char *pr_name, mac_prop_id_t pr_num, 381 uint_t pr_valsize, void *pr_val) 382 { 383 iptun_t *iptun = barg; 384 int err; 385 386 if ((err = iptun_enter(iptun)) != 0) 387 return (err); 388 389 switch (pr_num) { 390 case MAC_PROP_IPTUN_HOPLIMIT: 391 ASSERT(pr_valsize >= sizeof (uint32_t)); 392 *(uint32_t *)pr_val = iptun->iptun_hoplimit; 393 break; 394 395 case MAC_PROP_IPTUN_ENCAPLIMIT: 396 *(uint32_t *)pr_val = iptun->iptun_encaplimit; 397 break; 398 default: 399 err = ENOTSUP; 400 } 401 done: 402 iptun_exit(iptun); 403 return (err); 404 } 405 406 /* ARGSUSED */ 407 static void 408 iptun_m_propinfo(void *barg, const char *pr_name, mac_prop_id_t pr_num, 409 mac_prop_info_handle_t prh) 410 { 411 iptun_t *iptun = barg; 412 413 switch (pr_num) { 414 case MAC_PROP_IPTUN_HOPLIMIT: 415 mac_prop_info_set_range_uint32(prh, 416 IPTUN_MIN_HOPLIMIT, IPTUN_MAX_HOPLIMIT); 417 mac_prop_info_set_default_uint32(prh, IPTUN_DEFAULT_HOPLIMIT); 418 break; 419 420 case MAC_PROP_IPTUN_ENCAPLIMIT: 421 if (iptun->iptun_typeinfo->iti_type != IPTUN_TYPE_IPV6) 422 break; 423 mac_prop_info_set_range_uint32(prh, 424 IPTUN_MIN_ENCAPLIMIT, IPTUN_MAX_ENCAPLIMIT); 425 mac_prop_info_set_default_uint32(prh, IPTUN_DEFAULT_ENCAPLIMIT); 426 break; 427 case MAC_PROP_MTU: 428 mac_prop_info_set_range_uint32(prh, 429 iptun->iptun_typeinfo->iti_minmtu, 430 iptun_get_maxmtu(iptun, NULL, 0)); 431 break; 432 } 433 } 434 435 uint_t 436 iptun_count(void) 437 { 438 return (iptun_tunnelcount); 439 } 440 441 /* 442 * Enter an iptun_t exclusively. This is essentially just a mutex, but we 443 * don't allow iptun_enter() to succeed on a tunnel if it's in the process of 444 * being deleted. 445 */ 446 static int 447 iptun_enter(iptun_t *iptun) 448 { 449 mutex_enter(&iptun->iptun_lock); 450 while (iptun->iptun_flags & IPTUN_DELETE_PENDING) 451 cv_wait(&iptun->iptun_enter_cv, &iptun->iptun_lock); 452 if (iptun->iptun_flags & IPTUN_CONDEMNED) { 453 mutex_exit(&iptun->iptun_lock); 454 return (ENOENT); 455 } 456 return (0); 457 } 458 459 /* 460 * Exit the tunnel entered in iptun_enter(). 461 */ 462 static void 463 iptun_exit(iptun_t *iptun) 464 { 465 mutex_exit(&iptun->iptun_lock); 466 } 467 468 /* 469 * Enter the IP tunnel instance by datalink ID. 470 */ 471 static int 472 iptun_enter_by_linkid(datalink_id_t linkid, iptun_t **iptun) 473 { 474 int err; 475 476 mutex_enter(&iptun_hash_lock); 477 if (mod_hash_find(iptun_hash, IPTUN_HASH_KEY(linkid), 478 (mod_hash_val_t *)iptun) == 0) 479 err = iptun_enter(*iptun); 480 else 481 err = ENOENT; 482 if (err != 0) 483 *iptun = NULL; 484 mutex_exit(&iptun_hash_lock); 485 return (err); 486 } 487 488 /* 489 * Handle tasks that were deferred through the iptun_taskq because they require 490 * calling up to the mac module, and we can't call up to the mac module while 491 * holding locks. 492 * 493 * This is tricky to get right without introducing race conditions and 494 * deadlocks with the mac module, as we cannot issue an upcall while in the 495 * iptun_t. The reason is that upcalls may try and enter the mac perimeter, 496 * while iptun callbacks (such as iptun_m_setprop()) called from the mac 497 * module will already have the perimeter held, and will then try and enter 498 * the iptun_t. You can see the lock ordering problem with this; this will 499 * deadlock. 500 * 501 * The safe way to do this is to enter the iptun_t in question and copy the 502 * information we need out of it so that we can exit it and know that the 503 * information being passed up to the upcalls won't be subject to modification 504 * by other threads. The problem now is that we need to exit it prior to 505 * issuing the upcall, but once we do this, a thread could come along and 506 * delete the iptun_t and thus the mac handle required to issue the upcall. 507 * To prevent this, we set the IPTUN_UPCALL_PENDING flag prior to exiting the 508 * iptun_t. This flag is the condition associated with iptun_upcall_cv, which 509 * iptun_delete() will cv_wait() on. When the upcall completes, we clear 510 * IPTUN_UPCALL_PENDING and cv_signal() any potentially waiting 511 * iptun_delete(). We can thus still safely use iptun->iptun_mh after having 512 * exited the iptun_t. 513 */ 514 static void 515 iptun_task_cb(void *arg) 516 { 517 iptun_task_data_t *itd = arg; 518 iptun_task_t task = itd->itd_task; 519 datalink_id_t linkid = itd->itd_linkid; 520 iptun_t *iptun; 521 uint32_t mtu; 522 iptun_addr_t addr; 523 link_state_t linkstate; 524 size_t header_size; 525 iptun_header_t header; 526 527 kmem_free(itd, sizeof (*itd)); 528 529 /* 530 * Note that if the lookup fails, it's because the tunnel was deleted 531 * between the time the task was dispatched and now. That isn't an 532 * error. 533 */ 534 if (iptun_enter_by_linkid(linkid, &iptun) != 0) 535 return; 536 537 iptun->iptun_flags |= IPTUN_UPCALL_PENDING; 538 539 switch (task) { 540 case IPTUN_TASK_MTU_UPDATE: 541 mtu = iptun->iptun_mtu; 542 break; 543 case IPTUN_TASK_LADDR_UPDATE: 544 addr = iptun->iptun_laddr; 545 break; 546 case IPTUN_TASK_RADDR_UPDATE: 547 addr = iptun->iptun_raddr; 548 break; 549 case IPTUN_TASK_LINK_UPDATE: 550 linkstate = IS_IPTUN_RUNNING(iptun) ? 551 LINK_STATE_UP : LINK_STATE_DOWN; 552 break; 553 case IPTUN_TASK_PDATA_UPDATE: 554 header_size = iptun->iptun_header_size; 555 header = iptun->iptun_header; 556 break; 557 default: 558 ASSERT(0); 559 } 560 561 iptun_exit(iptun); 562 563 switch (task) { 564 case IPTUN_TASK_MTU_UPDATE: 565 (void) mac_maxsdu_update(iptun->iptun_mh, mtu); 566 break; 567 case IPTUN_TASK_LADDR_UPDATE: 568 mac_unicst_update(iptun->iptun_mh, (uint8_t *)&addr.ia_addr); 569 break; 570 case IPTUN_TASK_RADDR_UPDATE: 571 mac_dst_update(iptun->iptun_mh, (uint8_t *)&addr.ia_addr); 572 break; 573 case IPTUN_TASK_LINK_UPDATE: 574 mac_link_update(iptun->iptun_mh, linkstate); 575 break; 576 case IPTUN_TASK_PDATA_UPDATE: 577 if (mac_pdata_update(iptun->iptun_mh, 578 header_size == 0 ? NULL : &header, header_size) != 0) 579 atomic_inc_64(&iptun->iptun_taskq_fail); 580 break; 581 } 582 583 mutex_enter(&iptun->iptun_lock); 584 iptun->iptun_flags &= ~IPTUN_UPCALL_PENDING; 585 cv_signal(&iptun->iptun_upcall_cv); 586 mutex_exit(&iptun->iptun_lock); 587 } 588 589 static void 590 iptun_task_dispatch(iptun_t *iptun, iptun_task_t iptun_task) 591 { 592 iptun_task_data_t *itd; 593 594 itd = kmem_alloc(sizeof (*itd), KM_NOSLEEP); 595 if (itd == NULL) { 596 atomic_inc_64(&iptun->iptun_taskq_fail); 597 return; 598 } 599 itd->itd_task = iptun_task; 600 itd->itd_linkid = iptun->iptun_linkid; 601 if (ddi_taskq_dispatch(iptun_taskq, iptun_task_cb, itd, DDI_NOSLEEP)) { 602 atomic_inc_64(&iptun->iptun_taskq_fail); 603 kmem_free(itd, sizeof (*itd)); 604 } 605 } 606 607 /* 608 * Convert an iptun_addr_t to sockaddr_storage. 609 */ 610 static void 611 iptun_getaddr(iptun_addr_t *iptun_addr, struct sockaddr_storage *ss) 612 { 613 struct sockaddr_in *sin; 614 struct sockaddr_in6 *sin6; 615 616 bzero(ss, sizeof (*ss)); 617 switch (iptun_addr->ia_family) { 618 case AF_INET: 619 sin = (struct sockaddr_in *)ss; 620 sin->sin_addr.s_addr = iptun_addr->ia_addr.iau_addr4; 621 break; 622 case AF_INET6: 623 sin6 = (struct sockaddr_in6 *)ss; 624 sin6->sin6_addr = iptun_addr->ia_addr.iau_addr6; 625 break; 626 default: 627 ASSERT(0); 628 } 629 ss->ss_family = iptun_addr->ia_family; 630 } 631 632 /* 633 * General purpose function to set an IP tunnel source or destination address. 634 */ 635 static int 636 iptun_setaddr(iptun_type_t iptun_type, iptun_addr_t *iptun_addr, 637 const struct sockaddr_storage *ss) 638 { 639 if (!IPTUN_ADDR_MATCH(iptun_type, ss->ss_family)) 640 return (EINVAL); 641 642 switch (ss->ss_family) { 643 case AF_INET: { 644 struct sockaddr_in *sin = (struct sockaddr_in *)ss; 645 646 if ((sin->sin_addr.s_addr == INADDR_ANY) || 647 (sin->sin_addr.s_addr == INADDR_BROADCAST) || 648 CLASSD(sin->sin_addr.s_addr)) { 649 return (EADDRNOTAVAIL); 650 } 651 iptun_addr->ia_addr.iau_addr4 = sin->sin_addr.s_addr; 652 break; 653 } 654 case AF_INET6: { 655 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)ss; 656 657 if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr) || 658 IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr) || 659 IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) { 660 return (EADDRNOTAVAIL); 661 } 662 iptun_addr->ia_addr.iau_addr6 = sin6->sin6_addr; 663 break; 664 } 665 default: 666 return (EAFNOSUPPORT); 667 } 668 iptun_addr->ia_family = ss->ss_family; 669 return (0); 670 } 671 672 static int 673 iptun_setladdr(iptun_t *iptun, const struct sockaddr_storage *laddr) 674 { 675 return (iptun_setaddr(iptun->iptun_typeinfo->iti_type, 676 &iptun->iptun_laddr, laddr)); 677 } 678 679 static int 680 iptun_setraddr(iptun_t *iptun, const struct sockaddr_storage *raddr) 681 { 682 if (!(iptun->iptun_typeinfo->iti_hasraddr)) 683 return (EINVAL); 684 return (iptun_setaddr(iptun->iptun_typeinfo->iti_type, 685 &iptun->iptun_raddr, raddr)); 686 } 687 688 static boolean_t 689 iptun_canbind(iptun_t *iptun) 690 { 691 /* 692 * A tunnel may bind when its source address has been set, and if its 693 * tunnel type requires one, also its destination address. 694 */ 695 return ((iptun->iptun_flags & IPTUN_LADDR) && 696 ((iptun->iptun_flags & IPTUN_RADDR) || 697 !(iptun->iptun_typeinfo->iti_hasraddr))); 698 } 699 700 /* 701 * Verify that the local address is valid, and insert in the fanout 702 */ 703 static int 704 iptun_bind(iptun_t *iptun) 705 { 706 conn_t *connp = iptun->iptun_connp; 707 int error = 0; 708 ip_xmit_attr_t *ixa; 709 ip_xmit_attr_t *oldixa; 710 iulp_t uinfo; 711 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 712 713 /* 714 * Get an exclusive ixa for this thread. 715 * We defer updating conn_ixa until later to handle any concurrent 716 * conn_ixa_cleanup thread. 717 */ 718 ixa = conn_get_ixa(connp, B_FALSE); 719 if (ixa == NULL) 720 return (ENOMEM); 721 722 /* We create PMTU state including for 6to4 */ 723 ixa->ixa_flags |= IXAF_PMTU_DISCOVERY; 724 725 ASSERT(iptun_canbind(iptun)); 726 727 mutex_enter(&connp->conn_lock); 728 /* 729 * Note that conn_proto can't be set since the upper protocol 730 * can be both 41 and 4 when IPv6 and IPv4 are over the same tunnel. 731 * ipcl_iptun_classify doesn't use conn_proto. 732 */ 733 connp->conn_ipversion = iptun->iptun_typeinfo->iti_ipvers; 734 735 switch (iptun->iptun_typeinfo->iti_type) { 736 case IPTUN_TYPE_IPV4: 737 IN6_IPADDR_TO_V4MAPPED(iptun->iptun_laddr4, 738 &connp->conn_laddr_v6); 739 IN6_IPADDR_TO_V4MAPPED(iptun->iptun_raddr4, 740 &connp->conn_faddr_v6); 741 ixa->ixa_flags |= IXAF_IS_IPV4; 742 if (ip_laddr_verify_v4(iptun->iptun_laddr4, IPCL_ZONEID(connp), 743 ipst, B_FALSE) != IPVL_UNICAST_UP) { 744 mutex_exit(&connp->conn_lock); 745 error = EADDRNOTAVAIL; 746 goto done; 747 } 748 break; 749 case IPTUN_TYPE_IPV6: 750 connp->conn_laddr_v6 = iptun->iptun_laddr6; 751 connp->conn_faddr_v6 = iptun->iptun_raddr6; 752 ixa->ixa_flags &= ~IXAF_IS_IPV4; 753 /* We use a zero scopeid for now */ 754 if (ip_laddr_verify_v6(&iptun->iptun_laddr6, IPCL_ZONEID(connp), 755 ipst, B_FALSE, 0) != IPVL_UNICAST_UP) { 756 mutex_exit(&connp->conn_lock); 757 error = EADDRNOTAVAIL; 758 goto done; 759 } 760 break; 761 case IPTUN_TYPE_6TO4: 762 IN6_IPADDR_TO_V4MAPPED(iptun->iptun_laddr4, 763 &connp->conn_laddr_v6); 764 IN6_IPADDR_TO_V4MAPPED(INADDR_ANY, &connp->conn_faddr_v6); 765 ixa->ixa_flags |= IXAF_IS_IPV4; 766 mutex_exit(&connp->conn_lock); 767 768 switch (ip_laddr_verify_v4(iptun->iptun_laddr4, 769 IPCL_ZONEID(connp), ipst, B_FALSE)) { 770 case IPVL_UNICAST_UP: 771 case IPVL_UNICAST_DOWN: 772 break; 773 default: 774 error = EADDRNOTAVAIL; 775 goto done; 776 } 777 goto insert; 778 } 779 780 /* In case previous destination was multirt */ 781 ip_attr_newdst(ixa); 782 783 /* 784 * When we set a tunnel's destination address, we do not 785 * care if the destination is reachable. Transient routing 786 * issues should not inhibit the creation of a tunnel 787 * interface, for example. Thus we pass B_FALSE here. 788 */ 789 connp->conn_saddr_v6 = connp->conn_laddr_v6; 790 mutex_exit(&connp->conn_lock); 791 792 /* As long as the MTU is large we avoid fragmentation */ 793 ixa->ixa_flags |= IXAF_DONTFRAG | IXAF_PMTU_IPV4_DF; 794 795 /* We handle IPsec in iptun_output_common */ 796 error = ip_attr_connect(connp, ixa, &connp->conn_saddr_v6, 797 &connp->conn_faddr_v6, &connp->conn_faddr_v6, 0, 798 &connp->conn_saddr_v6, &uinfo, 0); 799 800 if (error != 0) 801 goto done; 802 803 /* saddr shouldn't change since it was already set */ 804 ASSERT(IN6_ARE_ADDR_EQUAL(&connp->conn_laddr_v6, 805 &connp->conn_saddr_v6)); 806 807 /* We set IXAF_VERIFY_PMTU to catch PMTU increases */ 808 ixa->ixa_flags |= IXAF_VERIFY_PMTU; 809 ASSERT(uinfo.iulp_mtu != 0); 810 811 /* 812 * Allow setting new policies. 813 * The addresses/ports are already set, thus the IPsec policy calls 814 * can handle their passed-in conn's. 815 */ 816 connp->conn_policy_cached = B_FALSE; 817 818 insert: 819 error = ipcl_conn_insert(connp); 820 if (error != 0) 821 goto done; 822 823 /* Atomically update v6lastdst and conn_ixa */ 824 mutex_enter(&connp->conn_lock); 825 /* Record this as the "last" send even though we haven't sent any */ 826 connp->conn_v6lastdst = connp->conn_faddr_v6; 827 828 iptun->iptun_flags |= IPTUN_BOUND; 829 830 oldixa = conn_replace_ixa(connp, ixa); 831 /* Done with conn_t */ 832 mutex_exit(&connp->conn_lock); 833 ixa_refrele(oldixa); 834 835 /* 836 * Now that we're bound with ip below us, this is a good 837 * time to initialize the destination path MTU and to 838 * re-calculate the tunnel's link MTU. 839 */ 840 (void) iptun_update_mtu(iptun, ixa, 0); 841 842 if (IS_IPTUN_RUNNING(iptun)) 843 iptun_task_dispatch(iptun, IPTUN_TASK_LINK_UPDATE); 844 845 done: 846 ixa_refrele(ixa); 847 return (error); 848 } 849 850 static void 851 iptun_unbind(iptun_t *iptun) 852 { 853 ASSERT(iptun->iptun_flags & IPTUN_BOUND); 854 ASSERT(mutex_owned(&iptun->iptun_lock) || 855 (iptun->iptun_flags & IPTUN_CONDEMNED)); 856 ip_unbind(iptun->iptun_connp); 857 iptun->iptun_flags &= ~IPTUN_BOUND; 858 if (!(iptun->iptun_flags & IPTUN_CONDEMNED)) 859 iptun_task_dispatch(iptun, IPTUN_TASK_LINK_UPDATE); 860 } 861 862 /* 863 * Re-generate the template data-link header for a given IP tunnel given the 864 * tunnel's current parameters. 865 */ 866 static void 867 iptun_headergen(iptun_t *iptun, boolean_t update_mac) 868 { 869 switch (iptun->iptun_typeinfo->iti_ipvers) { 870 case IPV4_VERSION: 871 /* 872 * We only need to use a custom IP header if the administrator 873 * has supplied a non-default hoplimit. 874 */ 875 if (iptun->iptun_hoplimit == IPTUN_DEFAULT_HOPLIMIT) { 876 iptun->iptun_header_size = 0; 877 break; 878 } 879 iptun->iptun_header_size = sizeof (ipha_t); 880 iptun->iptun_header4.ipha_version_and_hdr_length = 881 IP_SIMPLE_HDR_VERSION; 882 iptun->iptun_header4.ipha_fragment_offset_and_flags = 883 htons(IPH_DF); 884 iptun->iptun_header4.ipha_ttl = iptun->iptun_hoplimit; 885 break; 886 case IPV6_VERSION: { 887 ip6_t *ip6hp = &iptun->iptun_header6.it6h_ip6h; 888 889 /* 890 * We only need to use a custom IPv6 header if either the 891 * administrator has supplied a non-default hoplimit, or we 892 * need to include an encapsulation limit option in the outer 893 * header. 894 */ 895 if (iptun->iptun_hoplimit == IPTUN_DEFAULT_HOPLIMIT && 896 iptun->iptun_encaplimit == 0) { 897 iptun->iptun_header_size = 0; 898 break; 899 } 900 901 (void) memset(ip6hp, 0, sizeof (*ip6hp)); 902 if (iptun->iptun_encaplimit == 0) { 903 iptun->iptun_header_size = sizeof (ip6_t); 904 ip6hp->ip6_nxt = IPPROTO_NONE; 905 } else { 906 iptun_encaplim_t *iel; 907 908 iptun->iptun_header_size = sizeof (iptun_ipv6hdrs_t); 909 /* 910 * The mac_ipv6 plugin requires ip6_plen to be in host 911 * byte order and reflect the extension headers 912 * present in the template. The actual network byte 913 * order ip6_plen will be set on a per-packet basis on 914 * transmit. 915 */ 916 ip6hp->ip6_plen = sizeof (*iel); 917 ip6hp->ip6_nxt = IPPROTO_DSTOPTS; 918 iel = &iptun->iptun_header6.it6h_encaplim; 919 *iel = iptun_encaplim_init; 920 iel->iel_telopt.ip6ot_encap_limit = 921 iptun->iptun_encaplimit; 922 } 923 924 ip6hp->ip6_hlim = iptun->iptun_hoplimit; 925 break; 926 } 927 } 928 929 if (update_mac) 930 iptun_task_dispatch(iptun, IPTUN_TASK_PDATA_UPDATE); 931 } 932 933 /* 934 * Insert inbound and outbound IPv4 and IPv6 policy into the given policy 935 * head. 936 */ 937 static boolean_t 938 iptun_insert_simple_policies(ipsec_policy_head_t *ph, ipsec_act_t *actp, 939 uint_t n, netstack_t *ns) 940 { 941 int f = IPSEC_AF_V4; 942 943 if (!ipsec_polhead_insert(ph, actp, n, f, IPSEC_TYPE_INBOUND, ns) || 944 !ipsec_polhead_insert(ph, actp, n, f, IPSEC_TYPE_OUTBOUND, ns)) 945 return (B_FALSE); 946 947 f = IPSEC_AF_V6; 948 return (ipsec_polhead_insert(ph, actp, n, f, IPSEC_TYPE_INBOUND, ns) && 949 ipsec_polhead_insert(ph, actp, n, f, IPSEC_TYPE_OUTBOUND, ns)); 950 } 951 952 /* 953 * Used to set IPsec policy when policy is set through the IPTUN_CREATE or 954 * IPTUN_MODIFY ioctls. 955 */ 956 static int 957 iptun_set_sec_simple(iptun_t *iptun, const ipsec_req_t *ipsr) 958 { 959 int rc = 0; 960 uint_t nact; 961 ipsec_act_t *actp = NULL; 962 boolean_t clear_all, old_policy = B_FALSE; 963 ipsec_tun_pol_t *itp; 964 char name[MAXLINKNAMELEN]; 965 uint64_t gen; 966 netstack_t *ns = iptun->iptun_ns; 967 968 /* Can't specify self-encap on a tunnel. */ 969 if (ipsr->ipsr_self_encap_req != 0) 970 return (EINVAL); 971 972 /* 973 * If it's a "clear-all" entry, unset the security flags and resume 974 * normal cleartext (or inherit-from-global) policy. 975 */ 976 clear_all = ((ipsr->ipsr_ah_req & IPTUN_IPSEC_REQ_MASK) == 0 && 977 (ipsr->ipsr_esp_req & IPTUN_IPSEC_REQ_MASK) == 0); 978 979 ASSERT(mutex_owned(&iptun->iptun_lock)); 980 itp = iptun->iptun_itp; 981 if (itp == NULL) { 982 if (clear_all) 983 goto bail; 984 if ((rc = dls_mgmt_get_linkinfo(iptun->iptun_linkid, name, NULL, 985 NULL, NULL)) != 0) 986 goto bail; 987 ASSERT(name[0] != '\0'); 988 if ((itp = create_tunnel_policy(name, &rc, &gen, ns)) == NULL) 989 goto bail; 990 iptun->iptun_itp = itp; 991 } 992 993 /* Allocate the actvec now, before holding itp or polhead locks. */ 994 ipsec_actvec_from_req(ipsr, &actp, &nact, ns); 995 if (actp == NULL) { 996 rc = ENOMEM; 997 goto bail; 998 } 999 1000 /* 1001 * Just write on the active polhead. Save the primary/secondary stuff 1002 * for spdsock operations. 1003 * 1004 * Mutex because we need to write to the polhead AND flags atomically. 1005 * Other threads will acquire the polhead lock as a reader if the 1006 * (unprotected) flag is set. 1007 */ 1008 mutex_enter(&itp->itp_lock); 1009 if (itp->itp_flags & ITPF_P_TUNNEL) { 1010 /* Oops, we lost a race. Let's get out of here. */ 1011 rc = EBUSY; 1012 goto mutex_bail; 1013 } 1014 old_policy = ((itp->itp_flags & ITPF_P_ACTIVE) != 0); 1015 1016 if (old_policy) { 1017 ITPF_CLONE(itp->itp_flags); 1018 rc = ipsec_copy_polhead(itp->itp_policy, itp->itp_inactive, ns); 1019 if (rc != 0) { 1020 /* inactive has already been cleared. */ 1021 itp->itp_flags &= ~ITPF_IFLAGS; 1022 goto mutex_bail; 1023 } 1024 rw_enter(&itp->itp_policy->iph_lock, RW_WRITER); 1025 ipsec_polhead_flush(itp->itp_policy, ns); 1026 } else { 1027 /* Else assume itp->itp_policy is already flushed. */ 1028 rw_enter(&itp->itp_policy->iph_lock, RW_WRITER); 1029 } 1030 1031 if (clear_all) { 1032 ASSERT(avl_numnodes(&itp->itp_policy->iph_rulebyid) == 0); 1033 itp->itp_flags &= ~ITPF_PFLAGS; 1034 rw_exit(&itp->itp_policy->iph_lock); 1035 old_policy = B_FALSE; /* Clear out the inactive one too. */ 1036 goto recover_bail; 1037 } 1038 1039 if (iptun_insert_simple_policies(itp->itp_policy, actp, nact, ns)) { 1040 rw_exit(&itp->itp_policy->iph_lock); 1041 /* 1042 * Adjust MTU and make sure the DL side knows what's up. 1043 */ 1044 itp->itp_flags = ITPF_P_ACTIVE; 1045 (void) iptun_update_mtu(iptun, NULL, 0); 1046 old_policy = B_FALSE; /* Blank out inactive - we succeeded */ 1047 } else { 1048 rw_exit(&itp->itp_policy->iph_lock); 1049 rc = ENOMEM; 1050 } 1051 1052 recover_bail: 1053 if (old_policy) { 1054 /* Recover policy in in active polhead. */ 1055 ipsec_swap_policy(itp->itp_policy, itp->itp_inactive, ns); 1056 ITPF_SWAP(itp->itp_flags); 1057 } 1058 1059 /* Clear policy in inactive polhead. */ 1060 itp->itp_flags &= ~ITPF_IFLAGS; 1061 rw_enter(&itp->itp_inactive->iph_lock, RW_WRITER); 1062 ipsec_polhead_flush(itp->itp_inactive, ns); 1063 rw_exit(&itp->itp_inactive->iph_lock); 1064 1065 mutex_bail: 1066 mutex_exit(&itp->itp_lock); 1067 1068 bail: 1069 if (actp != NULL) 1070 ipsec_actvec_free(actp, nact); 1071 1072 return (rc); 1073 } 1074 1075 static iptun_typeinfo_t * 1076 iptun_gettypeinfo(iptun_type_t type) 1077 { 1078 int i; 1079 1080 for (i = 0; iptun_type_table[i].iti_type != IPTUN_TYPE_UNKNOWN; i++) { 1081 if (iptun_type_table[i].iti_type == type) 1082 break; 1083 } 1084 return (&iptun_type_table[i]); 1085 } 1086 1087 /* 1088 * Set the parameters included in ik on the tunnel iptun. Parameters that can 1089 * only be set at creation time are set in iptun_create(). 1090 */ 1091 static int 1092 iptun_setparams(iptun_t *iptun, const iptun_kparams_t *ik) 1093 { 1094 int err = 0; 1095 netstack_t *ns = iptun->iptun_ns; 1096 iptun_addr_t orig_laddr, orig_raddr; 1097 uint_t orig_flags = iptun->iptun_flags; 1098 1099 if (ik->iptun_kparam_flags & IPTUN_KPARAM_LADDR) { 1100 if (orig_flags & IPTUN_LADDR) 1101 orig_laddr = iptun->iptun_laddr; 1102 if ((err = iptun_setladdr(iptun, &ik->iptun_kparam_laddr)) != 0) 1103 return (err); 1104 iptun->iptun_flags |= IPTUN_LADDR; 1105 } 1106 1107 if (ik->iptun_kparam_flags & IPTUN_KPARAM_RADDR) { 1108 if (orig_flags & IPTUN_RADDR) 1109 orig_raddr = iptun->iptun_raddr; 1110 if ((err = iptun_setraddr(iptun, &ik->iptun_kparam_raddr)) != 0) 1111 goto done; 1112 iptun->iptun_flags |= IPTUN_RADDR; 1113 } 1114 1115 if (ik->iptun_kparam_flags & IPTUN_KPARAM_SECINFO) { 1116 /* 1117 * Set IPsec policy originating from the ifconfig(1M) command 1118 * line. This is traditionally called "simple" policy because 1119 * the ipsec_req_t (iptun_kparam_secinfo) can only describe a 1120 * simple policy of "do ESP on everything" and/or "do AH on 1121 * everything" (as opposed to the rich policy that can be 1122 * defined with ipsecconf(1M)). 1123 */ 1124 if (iptun->iptun_typeinfo->iti_type == IPTUN_TYPE_6TO4) { 1125 /* 1126 * Can't set security properties for automatic 1127 * tunnels. 1128 */ 1129 err = EINVAL; 1130 goto done; 1131 } 1132 1133 if (!ipsec_loaded(ns->netstack_ipsec)) { 1134 /* If IPsec can be loaded, try and load it now. */ 1135 if (ipsec_failed(ns->netstack_ipsec)) { 1136 err = EPROTONOSUPPORT; 1137 goto done; 1138 } 1139 ipsec_loader_loadnow(ns->netstack_ipsec); 1140 /* 1141 * ipsec_loader_loadnow() returns while IPsec is 1142 * loaded asynchronously. While a method exists to 1143 * wait for IPsec to load (ipsec_loader_wait()), it 1144 * requires use of a STREAMS queue to do a qwait(). 1145 * We're not in STREAMS context here, and so we can't 1146 * use it. This is not a problem in practice because 1147 * in the vast majority of cases, key management and 1148 * global policy will have loaded before any tunnels 1149 * are plumbed, and so IPsec will already have been 1150 * loaded. 1151 */ 1152 err = EAGAIN; 1153 goto done; 1154 } 1155 1156 err = iptun_set_sec_simple(iptun, &ik->iptun_kparam_secinfo); 1157 if (err == 0) { 1158 iptun->iptun_flags |= IPTUN_SIMPLE_POLICY; 1159 iptun->iptun_simple_policy = ik->iptun_kparam_secinfo; 1160 } 1161 } 1162 done: 1163 if (err != 0) { 1164 /* Restore original source and destination. */ 1165 if (ik->iptun_kparam_flags & IPTUN_KPARAM_LADDR && 1166 (orig_flags & IPTUN_LADDR)) 1167 iptun->iptun_laddr = orig_laddr; 1168 if ((ik->iptun_kparam_flags & IPTUN_KPARAM_RADDR) && 1169 (orig_flags & IPTUN_RADDR)) 1170 iptun->iptun_raddr = orig_raddr; 1171 iptun->iptun_flags = orig_flags; 1172 } 1173 return (err); 1174 } 1175 1176 static int 1177 iptun_register(iptun_t *iptun) 1178 { 1179 mac_register_t *mac; 1180 int err; 1181 1182 ASSERT(!(iptun->iptun_flags & IPTUN_MAC_REGISTERED)); 1183 1184 if ((mac = mac_alloc(MAC_VERSION)) == NULL) 1185 return (EINVAL); 1186 1187 mac->m_type_ident = iptun->iptun_typeinfo->iti_ident; 1188 mac->m_driver = iptun; 1189 mac->m_dip = iptun_dip; 1190 mac->m_instance = (uint_t)-1; 1191 mac->m_src_addr = (uint8_t *)&iptun->iptun_laddr.ia_addr; 1192 mac->m_dst_addr = iptun->iptun_typeinfo->iti_hasraddr ? 1193 (uint8_t *)&iptun->iptun_raddr.ia_addr : NULL; 1194 mac->m_callbacks = &iptun_m_callbacks; 1195 mac->m_min_sdu = iptun->iptun_typeinfo->iti_minmtu; 1196 mac->m_max_sdu = iptun->iptun_mtu; 1197 if (iptun->iptun_header_size != 0) { 1198 mac->m_pdata = &iptun->iptun_header; 1199 mac->m_pdata_size = iptun->iptun_header_size; 1200 } 1201 if ((err = mac_register(mac, &iptun->iptun_mh)) == 0) 1202 iptun->iptun_flags |= IPTUN_MAC_REGISTERED; 1203 mac_free(mac); 1204 return (err); 1205 } 1206 1207 static int 1208 iptun_unregister(iptun_t *iptun) 1209 { 1210 int err; 1211 1212 ASSERT(iptun->iptun_flags & IPTUN_MAC_REGISTERED); 1213 if ((err = mac_unregister(iptun->iptun_mh)) == 0) 1214 iptun->iptun_flags &= ~IPTUN_MAC_REGISTERED; 1215 return (err); 1216 } 1217 1218 static conn_t * 1219 iptun_conn_create(iptun_t *iptun, netstack_t *ns, cred_t *credp) 1220 { 1221 conn_t *connp; 1222 1223 if ((connp = ipcl_conn_create(IPCL_IPCCONN, KM_NOSLEEP, ns)) == NULL) 1224 return (NULL); 1225 1226 connp->conn_flags |= IPCL_IPTUN; 1227 connp->conn_iptun = iptun; 1228 connp->conn_recv = iptun_input; 1229 connp->conn_recvicmp = iptun_input_icmp; 1230 connp->conn_verifyicmp = iptun_verifyicmp; 1231 1232 /* 1233 * Register iptun_notify to listen to capability changes detected by IP. 1234 * This upcall is made in the context of the call to conn_ip_output. 1235 */ 1236 connp->conn_ixa->ixa_notify = iptun_notify; 1237 connp->conn_ixa->ixa_notify_cookie = iptun; 1238 1239 /* 1240 * For exclusive stacks we set conn_zoneid to GLOBAL_ZONEID as is done 1241 * for all other conn_t's. 1242 * 1243 * Note that there's an important distinction between iptun_zoneid and 1244 * conn_zoneid. The conn_zoneid is set to GLOBAL_ZONEID in non-global 1245 * exclusive stack zones to make the ip module believe that the 1246 * non-global zone is actually a global zone. Therefore, when 1247 * interacting with the ip module, we must always use conn_zoneid. 1248 */ 1249 connp->conn_zoneid = (ns->netstack_stackid == GLOBAL_NETSTACKID) ? 1250 crgetzoneid(credp) : GLOBAL_ZONEID; 1251 connp->conn_cred = credp; 1252 /* crfree() is done in ipcl_conn_destroy(), called by CONN_DEC_REF() */ 1253 crhold(connp->conn_cred); 1254 connp->conn_cpid = NOPID; 1255 1256 /* conn_allzones can not be set this early, hence no IPCL_ZONEID */ 1257 connp->conn_ixa->ixa_zoneid = connp->conn_zoneid; 1258 ASSERT(connp->conn_ref == 1); 1259 1260 /* Cache things in ixa without an extra refhold */ 1261 ASSERT(!(connp->conn_ixa->ixa_free_flags & IXA_FREE_CRED)); 1262 connp->conn_ixa->ixa_cred = connp->conn_cred; 1263 connp->conn_ixa->ixa_cpid = connp->conn_cpid; 1264 if (is_system_labeled()) 1265 connp->conn_ixa->ixa_tsl = crgetlabel(connp->conn_cred); 1266 1267 /* 1268 * Have conn_ip_output drop packets should our outer source 1269 * go invalid 1270 */ 1271 connp->conn_ixa->ixa_flags |= IXAF_VERIFY_SOURCE; 1272 1273 switch (iptun->iptun_typeinfo->iti_ipvers) { 1274 case IPV4_VERSION: 1275 connp->conn_family = AF_INET6; 1276 break; 1277 case IPV6_VERSION: 1278 connp->conn_family = AF_INET; 1279 break; 1280 } 1281 mutex_enter(&connp->conn_lock); 1282 connp->conn_state_flags &= ~CONN_INCIPIENT; 1283 mutex_exit(&connp->conn_lock); 1284 return (connp); 1285 } 1286 1287 static void 1288 iptun_conn_destroy(conn_t *connp) 1289 { 1290 ip_quiesce_conn(connp); 1291 connp->conn_iptun = NULL; 1292 ASSERT(connp->conn_ref == 1); 1293 CONN_DEC_REF(connp); 1294 } 1295 1296 static iptun_t * 1297 iptun_alloc(void) 1298 { 1299 iptun_t *iptun; 1300 1301 if ((iptun = kmem_cache_alloc(iptun_cache, KM_NOSLEEP)) != NULL) { 1302 bzero(iptun, sizeof (*iptun)); 1303 atomic_inc_32(&iptun_tunnelcount); 1304 } 1305 return (iptun); 1306 } 1307 1308 static void 1309 iptun_free(iptun_t *iptun) 1310 { 1311 ASSERT(iptun->iptun_flags & IPTUN_CONDEMNED); 1312 1313 if (iptun->iptun_flags & IPTUN_HASH_INSERTED) { 1314 iptun_stack_t *iptuns = iptun->iptun_iptuns; 1315 1316 mutex_enter(&iptun_hash_lock); 1317 VERIFY(mod_hash_remove(iptun_hash, 1318 IPTUN_HASH_KEY(iptun->iptun_linkid), 1319 (mod_hash_val_t *)&iptun) == 0); 1320 mutex_exit(&iptun_hash_lock); 1321 iptun->iptun_flags &= ~IPTUN_HASH_INSERTED; 1322 mutex_enter(&iptuns->iptuns_lock); 1323 list_remove(&iptuns->iptuns_iptunlist, iptun); 1324 mutex_exit(&iptuns->iptuns_lock); 1325 } 1326 1327 if (iptun->iptun_flags & IPTUN_BOUND) 1328 iptun_unbind(iptun); 1329 1330 /* 1331 * After iptun_unregister(), there will be no threads executing a 1332 * downcall from the mac module, including in the tx datapath. 1333 */ 1334 if (iptun->iptun_flags & IPTUN_MAC_REGISTERED) 1335 VERIFY(iptun_unregister(iptun) == 0); 1336 1337 if (iptun->iptun_itp != NULL) { 1338 /* 1339 * Remove from the AVL tree, AND release the reference iptun_t 1340 * itself holds on the ITP. 1341 */ 1342 itp_unlink(iptun->iptun_itp, iptun->iptun_ns); 1343 ITP_REFRELE(iptun->iptun_itp, iptun->iptun_ns); 1344 iptun->iptun_itp = NULL; 1345 iptun->iptun_flags &= ~IPTUN_SIMPLE_POLICY; 1346 } 1347 1348 /* 1349 * After ipcl_conn_destroy(), there will be no threads executing an 1350 * upcall from ip (i.e., iptun_input()), and it is then safe to free 1351 * the iptun_t. 1352 */ 1353 if (iptun->iptun_connp != NULL) { 1354 iptun_conn_destroy(iptun->iptun_connp); 1355 iptun->iptun_connp = NULL; 1356 } 1357 1358 kmem_cache_free(iptun_cache, iptun); 1359 atomic_dec_32(&iptun_tunnelcount); 1360 } 1361 1362 int 1363 iptun_create(iptun_kparams_t *ik, cred_t *credp) 1364 { 1365 iptun_t *iptun = NULL; 1366 int err = 0, mherr; 1367 char linkname[MAXLINKNAMELEN]; 1368 ipsec_tun_pol_t *itp; 1369 netstack_t *ns = NULL; 1370 iptun_stack_t *iptuns; 1371 datalink_id_t tmpid; 1372 zoneid_t zoneid = crgetzoneid(credp); 1373 boolean_t link_created = B_FALSE; 1374 1375 /* The tunnel type is mandatory */ 1376 if (!(ik->iptun_kparam_flags & IPTUN_KPARAM_TYPE)) 1377 return (EINVAL); 1378 1379 /* 1380 * Is the linkid that the caller wishes to associate with this new 1381 * tunnel assigned to this zone? 1382 */ 1383 if (zone_check_datalink(&zoneid, ik->iptun_kparam_linkid) != 0) { 1384 if (zoneid != GLOBAL_ZONEID) 1385 return (EINVAL); 1386 } else if (zoneid == GLOBAL_ZONEID) { 1387 return (EINVAL); 1388 } 1389 1390 /* 1391 * Make sure that we're not trying to create a tunnel that has already 1392 * been created. 1393 */ 1394 if (iptun_enter_by_linkid(ik->iptun_kparam_linkid, &iptun) == 0) { 1395 iptun_exit(iptun); 1396 iptun = NULL; 1397 err = EEXIST; 1398 goto done; 1399 } 1400 1401 ns = netstack_find_by_cred(credp); 1402 iptuns = ns->netstack_iptun; 1403 1404 if ((iptun = iptun_alloc()) == NULL) { 1405 err = ENOMEM; 1406 goto done; 1407 } 1408 1409 iptun->iptun_linkid = ik->iptun_kparam_linkid; 1410 iptun->iptun_zoneid = zoneid; 1411 iptun->iptun_ns = ns; 1412 1413 iptun->iptun_typeinfo = iptun_gettypeinfo(ik->iptun_kparam_type); 1414 if (iptun->iptun_typeinfo->iti_type == IPTUN_TYPE_UNKNOWN) { 1415 err = EINVAL; 1416 goto done; 1417 } 1418 1419 if (ik->iptun_kparam_flags & IPTUN_KPARAM_IMPLICIT) 1420 iptun->iptun_flags |= IPTUN_IMPLICIT; 1421 1422 if ((err = iptun_setparams(iptun, ik)) != 0) 1423 goto done; 1424 1425 iptun->iptun_hoplimit = IPTUN_DEFAULT_HOPLIMIT; 1426 if (iptun->iptun_typeinfo->iti_type == IPTUN_TYPE_IPV6) 1427 iptun->iptun_encaplimit = IPTUN_DEFAULT_ENCAPLIMIT; 1428 1429 iptun_headergen(iptun, B_FALSE); 1430 1431 iptun->iptun_connp = iptun_conn_create(iptun, ns, credp); 1432 if (iptun->iptun_connp == NULL) { 1433 err = ENOMEM; 1434 goto done; 1435 } 1436 1437 iptun->iptun_mtu = iptun->iptun_typeinfo->iti_maxmtu; 1438 iptun->iptun_dpmtu = iptun->iptun_mtu; 1439 1440 /* 1441 * Find an ITP based on linkname. If we have parms already set via 1442 * the iptun_setparams() call above, it may have created an ITP for 1443 * us. We always try get_tunnel_policy() for DEBUG correctness 1444 * checks, and we may wish to refactor this to only check when 1445 * iptun_itp is NULL. 1446 */ 1447 if ((err = dls_mgmt_get_linkinfo(iptun->iptun_linkid, linkname, NULL, 1448 NULL, NULL)) != 0) 1449 goto done; 1450 if ((itp = get_tunnel_policy(linkname, ns)) != NULL) 1451 iptun->iptun_itp = itp; 1452 1453 /* 1454 * See if we have the necessary IP addresses assigned to this tunnel 1455 * to try and bind them with ip underneath us. If we're not ready to 1456 * bind yet, then we'll defer the bind operation until the addresses 1457 * are modified. 1458 */ 1459 if (iptun_canbind(iptun) && ((err = iptun_bind(iptun)) != 0)) 1460 goto done; 1461 1462 if ((err = iptun_register(iptun)) != 0) 1463 goto done; 1464 1465 err = dls_devnet_create(iptun->iptun_mh, iptun->iptun_linkid, 1466 iptun->iptun_zoneid); 1467 if (err != 0) 1468 goto done; 1469 link_created = B_TRUE; 1470 1471 /* 1472 * We hash by link-id as that is the key used by all other iptun 1473 * interfaces (modify, delete, etc.). 1474 */ 1475 if ((mherr = mod_hash_insert(iptun_hash, 1476 IPTUN_HASH_KEY(iptun->iptun_linkid), (mod_hash_val_t)iptun)) == 0) { 1477 mutex_enter(&iptuns->iptuns_lock); 1478 list_insert_head(&iptuns->iptuns_iptunlist, iptun); 1479 mutex_exit(&iptuns->iptuns_lock); 1480 iptun->iptun_flags |= IPTUN_HASH_INSERTED; 1481 } else if (mherr == MH_ERR_NOMEM) { 1482 err = ENOMEM; 1483 } else if (mherr == MH_ERR_DUPLICATE) { 1484 err = EEXIST; 1485 } else { 1486 err = EINVAL; 1487 } 1488 1489 done: 1490 if (iptun == NULL && ns != NULL) 1491 netstack_rele(ns); 1492 if (err != 0 && iptun != NULL) { 1493 if (link_created) { 1494 (void) dls_devnet_destroy(iptun->iptun_mh, &tmpid, 1495 B_TRUE); 1496 } 1497 iptun->iptun_flags |= IPTUN_CONDEMNED; 1498 iptun_free(iptun); 1499 } 1500 return (err); 1501 } 1502 1503 int 1504 iptun_delete(datalink_id_t linkid, cred_t *credp) 1505 { 1506 int err; 1507 iptun_t *iptun = NULL; 1508 1509 if ((err = iptun_enter_by_linkid(linkid, &iptun)) != 0) 1510 return (err); 1511 1512 /* One cannot delete a tunnel that belongs to another zone. */ 1513 if (iptun->iptun_zoneid != crgetzoneid(credp)) { 1514 iptun_exit(iptun); 1515 return (EACCES); 1516 } 1517 1518 /* 1519 * We need to exit iptun in order to issue calls up the stack such as 1520 * dls_devnet_destroy(). If we call up while still in iptun, deadlock 1521 * with calls coming down the stack is possible. We prevent other 1522 * threads from entering this iptun after we've exited it by setting 1523 * the IPTUN_DELETE_PENDING flag. This will cause callers of 1524 * iptun_enter() to block waiting on iptun_enter_cv. The assumption 1525 * here is that the functions we're calling while IPTUN_DELETE_PENDING 1526 * is set dont resuult in an iptun_enter() call, as that would result 1527 * in deadlock. 1528 */ 1529 iptun->iptun_flags |= IPTUN_DELETE_PENDING; 1530 1531 /* Wait for any pending upcall to the mac module to complete. */ 1532 while (iptun->iptun_flags & IPTUN_UPCALL_PENDING) 1533 cv_wait(&iptun->iptun_upcall_cv, &iptun->iptun_lock); 1534 1535 iptun_exit(iptun); 1536 1537 if ((err = dls_devnet_destroy(iptun->iptun_mh, &linkid, B_TRUE)) == 0) { 1538 /* 1539 * mac_disable() will fail with EBUSY if there are references 1540 * to the iptun MAC. If there are none, then mac_disable() 1541 * will assure that none can be acquired until the MAC is 1542 * unregistered. 1543 * 1544 * XXX CR 6791335 prevents us from calling mac_disable() prior 1545 * to dls_devnet_destroy(), so we unfortunately need to 1546 * attempt to re-create the devnet node if mac_disable() 1547 * fails. 1548 */ 1549 if ((err = mac_disable(iptun->iptun_mh)) != 0) { 1550 (void) dls_devnet_create(iptun->iptun_mh, linkid, 1551 iptun->iptun_zoneid); 1552 } 1553 } 1554 1555 /* 1556 * Now that we know the fate of this iptun_t, we need to clear 1557 * IPTUN_DELETE_PENDING, and set IPTUN_CONDEMNED if the iptun_t is 1558 * slated to be freed. Either way, we need to signal the threads 1559 * waiting in iptun_enter() so that they can either fail if 1560 * IPTUN_CONDEMNED is set, or continue if it's not. 1561 */ 1562 mutex_enter(&iptun->iptun_lock); 1563 iptun->iptun_flags &= ~IPTUN_DELETE_PENDING; 1564 if (err == 0) 1565 iptun->iptun_flags |= IPTUN_CONDEMNED; 1566 cv_broadcast(&iptun->iptun_enter_cv); 1567 mutex_exit(&iptun->iptun_lock); 1568 1569 /* 1570 * Note that there is no danger in calling iptun_free() after having 1571 * dropped the iptun_lock since callers of iptun_enter() at this point 1572 * are doing so from iptun_enter_by_linkid() (mac_disable() got rid of 1573 * threads entering from mac callbacks which call iptun_enter() 1574 * directly) which holds iptun_hash_lock, and iptun_free() grabs this 1575 * lock in order to remove the iptun_t from the hash table. 1576 */ 1577 if (err == 0) 1578 iptun_free(iptun); 1579 1580 return (err); 1581 } 1582 1583 int 1584 iptun_modify(const iptun_kparams_t *ik, cred_t *credp) 1585 { 1586 iptun_t *iptun; 1587 boolean_t laddr_change = B_FALSE, raddr_change = B_FALSE; 1588 int err; 1589 1590 if ((err = iptun_enter_by_linkid(ik->iptun_kparam_linkid, &iptun)) != 0) 1591 return (err); 1592 1593 /* One cannot modify a tunnel that belongs to another zone. */ 1594 if (iptun->iptun_zoneid != crgetzoneid(credp)) { 1595 err = EACCES; 1596 goto done; 1597 } 1598 1599 /* The tunnel type cannot be changed */ 1600 if (ik->iptun_kparam_flags & IPTUN_KPARAM_TYPE) { 1601 err = EINVAL; 1602 goto done; 1603 } 1604 1605 if ((err = iptun_setparams(iptun, ik)) != 0) 1606 goto done; 1607 iptun_headergen(iptun, B_FALSE); 1608 1609 /* 1610 * If any of the tunnel's addresses has been modified and the tunnel 1611 * has the necessary addresses assigned to it, we need to try to bind 1612 * with ip underneath us. If we're not ready to bind yet, then we'll 1613 * try again when the addresses are modified later. 1614 */ 1615 laddr_change = (ik->iptun_kparam_flags & IPTUN_KPARAM_LADDR); 1616 raddr_change = (ik->iptun_kparam_flags & IPTUN_KPARAM_RADDR); 1617 if (laddr_change || raddr_change) { 1618 if (iptun->iptun_flags & IPTUN_BOUND) 1619 iptun_unbind(iptun); 1620 if (iptun_canbind(iptun) && (err = iptun_bind(iptun)) != 0) { 1621 if (laddr_change) 1622 iptun->iptun_flags &= ~IPTUN_LADDR; 1623 if (raddr_change) 1624 iptun->iptun_flags &= ~IPTUN_RADDR; 1625 goto done; 1626 } 1627 } 1628 1629 if (laddr_change) 1630 iptun_task_dispatch(iptun, IPTUN_TASK_LADDR_UPDATE); 1631 if (raddr_change) 1632 iptun_task_dispatch(iptun, IPTUN_TASK_RADDR_UPDATE); 1633 1634 done: 1635 iptun_exit(iptun); 1636 return (err); 1637 } 1638 1639 /* Given an IP tunnel's datalink id, fill in its parameters. */ 1640 int 1641 iptun_info(iptun_kparams_t *ik, cred_t *credp) 1642 { 1643 iptun_t *iptun; 1644 int err; 1645 1646 /* Is the tunnel link visible from the caller's zone? */ 1647 if (!dls_devnet_islinkvisible(ik->iptun_kparam_linkid, 1648 crgetzoneid(credp))) 1649 return (ENOENT); 1650 1651 if ((err = iptun_enter_by_linkid(ik->iptun_kparam_linkid, &iptun)) != 0) 1652 return (err); 1653 1654 bzero(ik, sizeof (iptun_kparams_t)); 1655 1656 ik->iptun_kparam_linkid = iptun->iptun_linkid; 1657 ik->iptun_kparam_type = iptun->iptun_typeinfo->iti_type; 1658 ik->iptun_kparam_flags |= IPTUN_KPARAM_TYPE; 1659 1660 if (iptun->iptun_flags & IPTUN_LADDR) { 1661 iptun_getaddr(&iptun->iptun_laddr, &ik->iptun_kparam_laddr); 1662 ik->iptun_kparam_flags |= IPTUN_KPARAM_LADDR; 1663 } 1664 if (iptun->iptun_flags & IPTUN_RADDR) { 1665 iptun_getaddr(&iptun->iptun_raddr, &ik->iptun_kparam_raddr); 1666 ik->iptun_kparam_flags |= IPTUN_KPARAM_RADDR; 1667 } 1668 1669 if (iptun->iptun_flags & IPTUN_IMPLICIT) 1670 ik->iptun_kparam_flags |= IPTUN_KPARAM_IMPLICIT; 1671 1672 if (iptun->iptun_itp != NULL) { 1673 mutex_enter(&iptun->iptun_itp->itp_lock); 1674 if (iptun->iptun_itp->itp_flags & ITPF_P_ACTIVE) { 1675 ik->iptun_kparam_flags |= IPTUN_KPARAM_IPSECPOL; 1676 if (iptun->iptun_flags & IPTUN_SIMPLE_POLICY) { 1677 ik->iptun_kparam_flags |= IPTUN_KPARAM_SECINFO; 1678 ik->iptun_kparam_secinfo = 1679 iptun->iptun_simple_policy; 1680 } 1681 } 1682 mutex_exit(&iptun->iptun_itp->itp_lock); 1683 } 1684 1685 done: 1686 iptun_exit(iptun); 1687 return (err); 1688 } 1689 1690 int 1691 iptun_set_6to4relay(netstack_t *ns, ipaddr_t relay_addr) 1692 { 1693 if (relay_addr == INADDR_BROADCAST || CLASSD(relay_addr)) 1694 return (EADDRNOTAVAIL); 1695 ns->netstack_iptun->iptuns_relay_rtr_addr = relay_addr; 1696 return (0); 1697 } 1698 1699 void 1700 iptun_get_6to4relay(netstack_t *ns, ipaddr_t *relay_addr) 1701 { 1702 *relay_addr = ns->netstack_iptun->iptuns_relay_rtr_addr; 1703 } 1704 1705 void 1706 iptun_set_policy(datalink_id_t linkid, ipsec_tun_pol_t *itp) 1707 { 1708 iptun_t *iptun; 1709 1710 if (iptun_enter_by_linkid(linkid, &iptun) != 0) 1711 return; 1712 if (iptun->iptun_itp != itp) { 1713 ASSERT(iptun->iptun_itp == NULL); 1714 ITP_REFHOLD(itp); 1715 iptun->iptun_itp = itp; 1716 } 1717 /* 1718 * IPsec policy means IPsec overhead, which means lower MTU. 1719 * Refresh the MTU for this tunnel. 1720 */ 1721 (void) iptun_update_mtu(iptun, NULL, 0); 1722 iptun_exit(iptun); 1723 } 1724 1725 /* 1726 * Obtain the path MTU to the tunnel destination. 1727 * Can return zero in some cases. 1728 */ 1729 static uint32_t 1730 iptun_get_dst_pmtu(iptun_t *iptun, ip_xmit_attr_t *ixa) 1731 { 1732 uint32_t pmtu = 0; 1733 conn_t *connp = iptun->iptun_connp; 1734 boolean_t need_rele = B_FALSE; 1735 1736 /* 1737 * We only obtain the pmtu for tunnels that have a remote tunnel 1738 * address. 1739 */ 1740 if (!(iptun->iptun_flags & IPTUN_RADDR)) 1741 return (0); 1742 1743 if (ixa == NULL) { 1744 ixa = conn_get_ixa(connp, B_FALSE); 1745 if (ixa == NULL) 1746 return (0); 1747 need_rele = B_TRUE; 1748 } 1749 /* 1750 * Guard against ICMP errors before we have sent, as well as against 1751 * and a thread which held conn_ixa. 1752 */ 1753 if (ixa->ixa_ire != NULL) { 1754 pmtu = ip_get_pmtu(ixa); 1755 1756 /* 1757 * For both IPv4 and IPv6 we can have indication that the outer 1758 * header needs fragmentation. 1759 */ 1760 if (ixa->ixa_flags & IXAF_PMTU_TOO_SMALL) { 1761 /* Must allow fragmentation in ip_output */ 1762 ixa->ixa_flags &= ~IXAF_DONTFRAG; 1763 } else if (iptun->iptun_typeinfo->iti_type != IPTUN_TYPE_6TO4) { 1764 ixa->ixa_flags |= IXAF_DONTFRAG; 1765 } else { 1766 /* ip_get_pmtu might have set this - we don't want it */ 1767 ixa->ixa_flags &= ~IXAF_PMTU_IPV4_DF; 1768 } 1769 } 1770 1771 if (need_rele) 1772 ixa_refrele(ixa); 1773 return (pmtu); 1774 } 1775 1776 /* 1777 * Update the ip_xmit_attr_t to capture the current lower path mtu as known 1778 * by ip. 1779 */ 1780 static void 1781 iptun_update_dst_pmtu(iptun_t *iptun, ip_xmit_attr_t *ixa) 1782 { 1783 uint32_t pmtu; 1784 conn_t *connp = iptun->iptun_connp; 1785 boolean_t need_rele = B_FALSE; 1786 1787 /* IXAF_VERIFY_PMTU is not set if we don't have a fixed destination */ 1788 if (!(iptun->iptun_flags & IPTUN_RADDR)) 1789 return; 1790 1791 if (ixa == NULL) { 1792 ixa = conn_get_ixa(connp, B_FALSE); 1793 if (ixa == NULL) 1794 return; 1795 need_rele = B_TRUE; 1796 } 1797 /* 1798 * Guard against ICMP errors before we have sent, as well as against 1799 * and a thread which held conn_ixa. 1800 */ 1801 if (ixa->ixa_ire != NULL) { 1802 pmtu = ip_get_pmtu(ixa); 1803 /* 1804 * Update ixa_fragsize and ixa_pmtu. 1805 */ 1806 ixa->ixa_fragsize = ixa->ixa_pmtu = pmtu; 1807 1808 /* 1809 * For both IPv4 and IPv6 we can have indication that the outer 1810 * header needs fragmentation. 1811 */ 1812 if (ixa->ixa_flags & IXAF_PMTU_TOO_SMALL) { 1813 /* Must allow fragmentation in ip_output */ 1814 ixa->ixa_flags &= ~IXAF_DONTFRAG; 1815 } else if (iptun->iptun_typeinfo->iti_type != IPTUN_TYPE_6TO4) { 1816 ixa->ixa_flags |= IXAF_DONTFRAG; 1817 } else { 1818 /* ip_get_pmtu might have set this - we don't want it */ 1819 ixa->ixa_flags &= ~IXAF_PMTU_IPV4_DF; 1820 } 1821 } 1822 1823 if (need_rele) 1824 ixa_refrele(ixa); 1825 } 1826 1827 /* 1828 * There is nothing that iptun can verify in addition to IP having 1829 * verified the IP addresses in the fanout. 1830 */ 1831 /* ARGSUSED */ 1832 static boolean_t 1833 iptun_verifyicmp(conn_t *connp, void *arg2, icmph_t *icmph, icmp6_t *icmp6, 1834 ip_recv_attr_t *ira) 1835 { 1836 return (B_TRUE); 1837 } 1838 1839 /* 1840 * Notify function registered with ip_xmit_attr_t. 1841 */ 1842 static void 1843 iptun_notify(void *arg, ip_xmit_attr_t *ixa, ixa_notify_type_t ntype, 1844 ixa_notify_arg_t narg) 1845 { 1846 iptun_t *iptun = (iptun_t *)arg; 1847 1848 switch (ntype) { 1849 case IXAN_PMTU: 1850 (void) iptun_update_mtu(iptun, ixa, narg); 1851 break; 1852 } 1853 } 1854 1855 /* 1856 * Returns the max of old_ovhd and the overhead associated with pol. 1857 */ 1858 static uint32_t 1859 iptun_max_policy_overhead(ipsec_policy_t *pol, uint32_t old_ovhd) 1860 { 1861 uint32_t new_ovhd = old_ovhd; 1862 1863 while (pol != NULL) { 1864 new_ovhd = max(new_ovhd, 1865 ipsec_act_ovhd(&pol->ipsp_act->ipa_act)); 1866 pol = pol->ipsp_hash.hash_next; 1867 } 1868 return (new_ovhd); 1869 } 1870 1871 static uint32_t 1872 iptun_get_ipsec_overhead(iptun_t *iptun) 1873 { 1874 ipsec_policy_root_t *ipr; 1875 ipsec_policy_head_t *iph; 1876 ipsec_policy_t *pol; 1877 ipsec_selector_t sel; 1878 int i; 1879 uint32_t ipsec_ovhd = 0; 1880 ipsec_tun_pol_t *itp = iptun->iptun_itp; 1881 netstack_t *ns = iptun->iptun_ns; 1882 1883 if (itp == NULL || !(itp->itp_flags & ITPF_P_ACTIVE)) { 1884 /* 1885 * Consult global policy, just in case. This will only work 1886 * if we have both source and destination addresses to work 1887 * with. 1888 */ 1889 if ((iptun->iptun_flags & (IPTUN_LADDR|IPTUN_RADDR)) != 1890 (IPTUN_LADDR|IPTUN_RADDR)) 1891 return (0); 1892 1893 iph = ipsec_system_policy(ns); 1894 bzero(&sel, sizeof (sel)); 1895 sel.ips_isv4 = 1896 (iptun->iptun_typeinfo->iti_ipvers == IPV4_VERSION); 1897 switch (iptun->iptun_typeinfo->iti_ipvers) { 1898 case IPV4_VERSION: 1899 sel.ips_local_addr_v4 = iptun->iptun_laddr4; 1900 sel.ips_remote_addr_v4 = iptun->iptun_raddr4; 1901 break; 1902 case IPV6_VERSION: 1903 sel.ips_local_addr_v6 = iptun->iptun_laddr6; 1904 sel.ips_remote_addr_v6 = iptun->iptun_raddr6; 1905 break; 1906 } 1907 /* Check for both IPv4 and IPv6. */ 1908 sel.ips_protocol = IPPROTO_ENCAP; 1909 pol = ipsec_find_policy_head(NULL, iph, IPSEC_TYPE_OUTBOUND, 1910 &sel); 1911 if (pol != NULL) { 1912 ipsec_ovhd = ipsec_act_ovhd(&pol->ipsp_act->ipa_act); 1913 IPPOL_REFRELE(pol); 1914 } 1915 sel.ips_protocol = IPPROTO_IPV6; 1916 pol = ipsec_find_policy_head(NULL, iph, IPSEC_TYPE_OUTBOUND, 1917 &sel); 1918 if (pol != NULL) { 1919 ipsec_ovhd = max(ipsec_ovhd, 1920 ipsec_act_ovhd(&pol->ipsp_act->ipa_act)); 1921 IPPOL_REFRELE(pol); 1922 } 1923 IPPH_REFRELE(iph, ns); 1924 } else { 1925 /* 1926 * Look through all of the possible IPsec actions for the 1927 * tunnel, and find the largest potential IPsec overhead. 1928 */ 1929 iph = itp->itp_policy; 1930 rw_enter(&iph->iph_lock, RW_READER); 1931 ipr = &(iph->iph_root[IPSEC_TYPE_OUTBOUND]); 1932 ipsec_ovhd = iptun_max_policy_overhead( 1933 ipr->ipr_nonhash[IPSEC_AF_V4], 0); 1934 ipsec_ovhd = iptun_max_policy_overhead( 1935 ipr->ipr_nonhash[IPSEC_AF_V6], ipsec_ovhd); 1936 for (i = 0; i < ipr->ipr_nchains; i++) { 1937 ipsec_ovhd = iptun_max_policy_overhead( 1938 ipr->ipr_hash[i].hash_head, ipsec_ovhd); 1939 } 1940 rw_exit(&iph->iph_lock); 1941 } 1942 1943 return (ipsec_ovhd); 1944 } 1945 1946 /* 1947 * Calculate and return the maximum possible upper MTU for the given tunnel. 1948 * 1949 * If new_pmtu is set then we also need to update the lower path MTU information 1950 * in the ip_xmit_attr_t. That is needed since we set IXAF_VERIFY_PMTU so that 1951 * we are notified by conn_ip_output() when the path MTU increases. 1952 */ 1953 static uint32_t 1954 iptun_get_maxmtu(iptun_t *iptun, ip_xmit_attr_t *ixa, uint32_t new_pmtu) 1955 { 1956 size_t header_size, ipsec_overhead; 1957 uint32_t maxmtu, pmtu; 1958 1959 /* 1960 * Start with the path-MTU to the remote address, which is either 1961 * provided as the new_pmtu argument, or obtained using 1962 * iptun_get_dst_pmtu(). 1963 */ 1964 if (new_pmtu != 0) { 1965 if (iptun->iptun_flags & IPTUN_RADDR) 1966 iptun->iptun_dpmtu = new_pmtu; 1967 pmtu = new_pmtu; 1968 } else if (iptun->iptun_flags & IPTUN_RADDR) { 1969 if ((pmtu = iptun_get_dst_pmtu(iptun, ixa)) == 0) { 1970 /* 1971 * We weren't able to obtain the path-MTU of the 1972 * destination. Use the previous value. 1973 */ 1974 pmtu = iptun->iptun_dpmtu; 1975 } else { 1976 iptun->iptun_dpmtu = pmtu; 1977 } 1978 } else { 1979 /* 1980 * We have no path-MTU information to go on, use the maximum 1981 * possible value. 1982 */ 1983 pmtu = iptun->iptun_typeinfo->iti_maxmtu; 1984 } 1985 1986 /* 1987 * Now calculate tunneling overhead and subtract that from the 1988 * path-MTU information obtained above. 1989 */ 1990 if (iptun->iptun_header_size != 0) { 1991 header_size = iptun->iptun_header_size; 1992 } else { 1993 switch (iptun->iptun_typeinfo->iti_ipvers) { 1994 case IPV4_VERSION: 1995 header_size = sizeof (ipha_t); 1996 if (is_system_labeled()) 1997 header_size += IP_MAX_OPT_LENGTH; 1998 break; 1999 case IPV6_VERSION: 2000 header_size = sizeof (iptun_ipv6hdrs_t); 2001 break; 2002 } 2003 } 2004 2005 ipsec_overhead = iptun_get_ipsec_overhead(iptun); 2006 2007 maxmtu = pmtu - (header_size + ipsec_overhead); 2008 return (max(maxmtu, iptun->iptun_typeinfo->iti_minmtu)); 2009 } 2010 2011 /* 2012 * Re-calculate the tunnel's MTU as seen from above and notify the MAC layer 2013 * of any change in MTU. The new_pmtu argument is the new lower path MTU to 2014 * the tunnel destination to be used in the tunnel MTU calculation. Passing 2015 * in 0 for new_pmtu causes the lower path MTU to be dynamically updated using 2016 * ip_get_pmtu(). 2017 * 2018 * If the calculated tunnel MTU is different than its previous value, then we 2019 * notify the MAC layer above us of this change using mac_maxsdu_update(). 2020 */ 2021 static uint32_t 2022 iptun_update_mtu(iptun_t *iptun, ip_xmit_attr_t *ixa, uint32_t new_pmtu) 2023 { 2024 uint32_t newmtu; 2025 2026 /* We always update the ixa since we might have set IXAF_VERIFY_PMTU */ 2027 iptun_update_dst_pmtu(iptun, ixa); 2028 2029 /* 2030 * We return the current MTU without updating it if it was pegged to a 2031 * static value using the MAC_PROP_MTU link property. 2032 */ 2033 if (iptun->iptun_flags & IPTUN_FIXED_MTU) 2034 return (iptun->iptun_mtu); 2035 2036 /* If the MTU isn't fixed, then use the maximum possible value. */ 2037 newmtu = iptun_get_maxmtu(iptun, ixa, new_pmtu); 2038 /* 2039 * We only dynamically adjust the tunnel MTU for tunnels with 2040 * destinations because dynamic MTU calculations are based on the 2041 * destination path-MTU. 2042 */ 2043 if ((iptun->iptun_flags & IPTUN_RADDR) && newmtu != iptun->iptun_mtu) { 2044 iptun->iptun_mtu = newmtu; 2045 if (iptun->iptun_flags & IPTUN_MAC_REGISTERED) 2046 iptun_task_dispatch(iptun, IPTUN_TASK_MTU_UPDATE); 2047 } 2048 2049 return (newmtu); 2050 } 2051 2052 /* 2053 * Frees a packet or packet chain and bumps stat for each freed packet. 2054 */ 2055 static void 2056 iptun_drop_pkt(mblk_t *mp, uint64_t *stat) 2057 { 2058 mblk_t *pktmp; 2059 2060 for (pktmp = mp; pktmp != NULL; pktmp = mp) { 2061 mp = mp->b_next; 2062 pktmp->b_next = NULL; 2063 if (stat != NULL) 2064 atomic_inc_64(stat); 2065 freemsg(pktmp); 2066 } 2067 } 2068 2069 /* 2070 * Allocate and return a new mblk to hold an IP and ICMP header, and chain the 2071 * original packet to its b_cont. Returns NULL on failure. 2072 */ 2073 static mblk_t * 2074 iptun_build_icmperr(size_t hdrs_size, mblk_t *orig_pkt) 2075 { 2076 mblk_t *icmperr_mp; 2077 2078 if ((icmperr_mp = allocb(hdrs_size, BPRI_MED)) != NULL) { 2079 icmperr_mp->b_wptr += hdrs_size; 2080 /* tack on the offending packet */ 2081 icmperr_mp->b_cont = orig_pkt; 2082 } 2083 return (icmperr_mp); 2084 } 2085 2086 /* 2087 * Transmit an ICMP error. mp->b_rptr points at the packet to be included in 2088 * the ICMP error. 2089 */ 2090 static void 2091 iptun_sendicmp_v4(iptun_t *iptun, icmph_t *icmp, ipha_t *orig_ipha, mblk_t *mp, 2092 ts_label_t *tsl) 2093 { 2094 size_t orig_pktsize, hdrs_size; 2095 mblk_t *icmperr_mp; 2096 ipha_t *new_ipha; 2097 icmph_t *new_icmp; 2098 ip_xmit_attr_t ixas; 2099 conn_t *connp = iptun->iptun_connp; 2100 2101 orig_pktsize = msgdsize(mp); 2102 hdrs_size = sizeof (ipha_t) + sizeof (icmph_t); 2103 if ((icmperr_mp = iptun_build_icmperr(hdrs_size, mp)) == NULL) { 2104 iptun_drop_pkt(mp, &iptun->iptun_noxmtbuf); 2105 return; 2106 } 2107 2108 new_ipha = (ipha_t *)icmperr_mp->b_rptr; 2109 new_icmp = (icmph_t *)(new_ipha + 1); 2110 2111 new_ipha->ipha_version_and_hdr_length = IP_SIMPLE_HDR_VERSION; 2112 new_ipha->ipha_type_of_service = 0; 2113 new_ipha->ipha_ident = 0; 2114 new_ipha->ipha_fragment_offset_and_flags = 0; 2115 new_ipha->ipha_ttl = orig_ipha->ipha_ttl; 2116 new_ipha->ipha_protocol = IPPROTO_ICMP; 2117 new_ipha->ipha_src = orig_ipha->ipha_dst; 2118 new_ipha->ipha_dst = orig_ipha->ipha_src; 2119 new_ipha->ipha_hdr_checksum = 0; /* will be computed by ip */ 2120 new_ipha->ipha_length = htons(hdrs_size + orig_pktsize); 2121 2122 *new_icmp = *icmp; 2123 new_icmp->icmph_checksum = 0; 2124 new_icmp->icmph_checksum = IP_CSUM(icmperr_mp, sizeof (ipha_t), 0); 2125 2126 bzero(&ixas, sizeof (ixas)); 2127 ixas.ixa_flags = IXAF_BASIC_SIMPLE_V4; 2128 if (new_ipha->ipha_src == INADDR_ANY) { 2129 ixas.ixa_flags &= ~IXAF_VERIFY_SOURCE; 2130 ixas.ixa_flags |= IXAF_SET_SOURCE; 2131 } 2132 2133 ixas.ixa_zoneid = IPCL_ZONEID(connp); 2134 ixas.ixa_ipst = connp->conn_netstack->netstack_ip; 2135 ixas.ixa_cred = connp->conn_cred; 2136 ixas.ixa_cpid = NOPID; 2137 if (is_system_labeled()) 2138 ixas.ixa_tsl = tsl; 2139 2140 ixas.ixa_ifindex = 0; 2141 ixas.ixa_multicast_ttl = IP_DEFAULT_MULTICAST_TTL; 2142 2143 (void) ip_output_simple(icmperr_mp, &ixas); 2144 ixa_cleanup(&ixas); 2145 } 2146 2147 static void 2148 iptun_sendicmp_v6(iptun_t *iptun, icmp6_t *icmp6, ip6_t *orig_ip6h, mblk_t *mp, 2149 ts_label_t *tsl) 2150 { 2151 size_t orig_pktsize, hdrs_size; 2152 mblk_t *icmp6err_mp; 2153 ip6_t *new_ip6h; 2154 icmp6_t *new_icmp6; 2155 ip_xmit_attr_t ixas; 2156 conn_t *connp = iptun->iptun_connp; 2157 2158 orig_pktsize = msgdsize(mp); 2159 hdrs_size = sizeof (ip6_t) + sizeof (icmp6_t); 2160 if ((icmp6err_mp = iptun_build_icmperr(hdrs_size, mp)) == NULL) { 2161 iptun_drop_pkt(mp, &iptun->iptun_noxmtbuf); 2162 return; 2163 } 2164 2165 new_ip6h = (ip6_t *)icmp6err_mp->b_rptr; 2166 new_icmp6 = (icmp6_t *)(new_ip6h + 1); 2167 2168 new_ip6h->ip6_vcf = orig_ip6h->ip6_vcf; 2169 new_ip6h->ip6_plen = htons(sizeof (icmp6_t) + orig_pktsize); 2170 new_ip6h->ip6_hops = orig_ip6h->ip6_hops; 2171 new_ip6h->ip6_nxt = IPPROTO_ICMPV6; 2172 new_ip6h->ip6_src = orig_ip6h->ip6_dst; 2173 new_ip6h->ip6_dst = orig_ip6h->ip6_src; 2174 2175 *new_icmp6 = *icmp6; 2176 /* The checksum is calculated in ip_output_simple and friends. */ 2177 new_icmp6->icmp6_cksum = new_ip6h->ip6_plen; 2178 2179 bzero(&ixas, sizeof (ixas)); 2180 ixas.ixa_flags = IXAF_BASIC_SIMPLE_V6; 2181 if (IN6_IS_ADDR_UNSPECIFIED(&new_ip6h->ip6_src)) { 2182 ixas.ixa_flags &= ~IXAF_VERIFY_SOURCE; 2183 ixas.ixa_flags |= IXAF_SET_SOURCE; 2184 } 2185 2186 ixas.ixa_zoneid = IPCL_ZONEID(connp); 2187 ixas.ixa_ipst = connp->conn_netstack->netstack_ip; 2188 ixas.ixa_cred = connp->conn_cred; 2189 ixas.ixa_cpid = NOPID; 2190 if (is_system_labeled()) 2191 ixas.ixa_tsl = tsl; 2192 2193 ixas.ixa_ifindex = 0; 2194 ixas.ixa_multicast_ttl = IP_DEFAULT_MULTICAST_TTL; 2195 2196 (void) ip_output_simple(icmp6err_mp, &ixas); 2197 ixa_cleanup(&ixas); 2198 } 2199 2200 static void 2201 iptun_icmp_error_v4(iptun_t *iptun, ipha_t *orig_ipha, mblk_t *mp, 2202 uint8_t type, uint8_t code, ts_label_t *tsl) 2203 { 2204 icmph_t icmp; 2205 2206 bzero(&icmp, sizeof (icmp)); 2207 icmp.icmph_type = type; 2208 icmp.icmph_code = code; 2209 2210 iptun_sendicmp_v4(iptun, &icmp, orig_ipha, mp, tsl); 2211 } 2212 2213 static void 2214 iptun_icmp_fragneeded_v4(iptun_t *iptun, uint32_t newmtu, ipha_t *orig_ipha, 2215 mblk_t *mp, ts_label_t *tsl) 2216 { 2217 icmph_t icmp; 2218 2219 icmp.icmph_type = ICMP_DEST_UNREACHABLE; 2220 icmp.icmph_code = ICMP_FRAGMENTATION_NEEDED; 2221 icmp.icmph_du_zero = 0; 2222 icmp.icmph_du_mtu = htons(newmtu); 2223 2224 iptun_sendicmp_v4(iptun, &icmp, orig_ipha, mp, tsl); 2225 } 2226 2227 static void 2228 iptun_icmp_error_v6(iptun_t *iptun, ip6_t *orig_ip6h, mblk_t *mp, 2229 uint8_t type, uint8_t code, uint32_t offset, ts_label_t *tsl) 2230 { 2231 icmp6_t icmp6; 2232 2233 bzero(&icmp6, sizeof (icmp6)); 2234 icmp6.icmp6_type = type; 2235 icmp6.icmp6_code = code; 2236 if (type == ICMP6_PARAM_PROB) 2237 icmp6.icmp6_pptr = htonl(offset); 2238 2239 iptun_sendicmp_v6(iptun, &icmp6, orig_ip6h, mp, tsl); 2240 } 2241 2242 static void 2243 iptun_icmp_toobig_v6(iptun_t *iptun, uint32_t newmtu, ip6_t *orig_ip6h, 2244 mblk_t *mp, ts_label_t *tsl) 2245 { 2246 icmp6_t icmp6; 2247 2248 icmp6.icmp6_type = ICMP6_PACKET_TOO_BIG; 2249 icmp6.icmp6_code = 0; 2250 icmp6.icmp6_mtu = htonl(newmtu); 2251 2252 iptun_sendicmp_v6(iptun, &icmp6, orig_ip6h, mp, tsl); 2253 } 2254 2255 /* 2256 * Determines if the packet pointed to by ipha or ip6h is an ICMP error. The 2257 * mp argument is only used to do bounds checking. 2258 */ 2259 static boolean_t 2260 is_icmp_error(mblk_t *mp, ipha_t *ipha, ip6_t *ip6h) 2261 { 2262 uint16_t hlen; 2263 2264 if (ipha != NULL) { 2265 icmph_t *icmph; 2266 2267 ASSERT(ip6h == NULL); 2268 if (ipha->ipha_protocol != IPPROTO_ICMP) 2269 return (B_FALSE); 2270 2271 hlen = IPH_HDR_LENGTH(ipha); 2272 icmph = (icmph_t *)((uint8_t *)ipha + hlen); 2273 return (ICMP_IS_ERROR(icmph->icmph_type) || 2274 icmph->icmph_type == ICMP_REDIRECT); 2275 } else { 2276 icmp6_t *icmp6; 2277 uint8_t *nexthdrp; 2278 2279 ASSERT(ip6h != NULL); 2280 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &hlen, &nexthdrp) || 2281 *nexthdrp != IPPROTO_ICMPV6) { 2282 return (B_FALSE); 2283 } 2284 2285 icmp6 = (icmp6_t *)((uint8_t *)ip6h + hlen); 2286 return (ICMP6_IS_ERROR(icmp6->icmp6_type) || 2287 icmp6->icmp6_type == ND_REDIRECT); 2288 } 2289 } 2290 2291 /* 2292 * Find inner and outer IP headers from a tunneled packet as setup for calls 2293 * into ipsec_tun_{in,out}bound(). 2294 * Note that we need to allow the outer header to be in a separate mblk from 2295 * the inner header. 2296 * If the caller knows the outer_hlen, the caller passes it in. Otherwise zero. 2297 */ 2298 static size_t 2299 iptun_find_headers(mblk_t *mp, size_t outer_hlen, ipha_t **outer4, 2300 ipha_t **inner4, ip6_t **outer6, ip6_t **inner6) 2301 { 2302 ipha_t *ipha; 2303 size_t first_mblkl = MBLKL(mp); 2304 mblk_t *inner_mp; 2305 2306 /* 2307 * Don't bother handling packets that don't have a full IP header in 2308 * the fist mblk. For the input path, the ip module ensures that this 2309 * won't happen, and on the output path, the IP tunneling MAC-type 2310 * plugins ensure that this also won't happen. 2311 */ 2312 if (first_mblkl < sizeof (ipha_t)) 2313 return (0); 2314 ipha = (ipha_t *)(mp->b_rptr); 2315 switch (IPH_HDR_VERSION(ipha)) { 2316 case IPV4_VERSION: 2317 *outer4 = ipha; 2318 *outer6 = NULL; 2319 if (outer_hlen == 0) 2320 outer_hlen = IPH_HDR_LENGTH(ipha); 2321 break; 2322 case IPV6_VERSION: 2323 *outer4 = NULL; 2324 *outer6 = (ip6_t *)ipha; 2325 if (outer_hlen == 0) 2326 outer_hlen = ip_hdr_length_v6(mp, (ip6_t *)ipha); 2327 break; 2328 default: 2329 return (0); 2330 } 2331 2332 if (first_mblkl < outer_hlen || 2333 (first_mblkl == outer_hlen && mp->b_cont == NULL)) 2334 return (0); 2335 2336 /* 2337 * We don't bother doing a pullup here since the outer header will 2338 * just get stripped off soon on input anyway. We just want to ensure 2339 * that the inner* pointer points to a full header. 2340 */ 2341 if (first_mblkl == outer_hlen) { 2342 inner_mp = mp->b_cont; 2343 ipha = (ipha_t *)inner_mp->b_rptr; 2344 } else { 2345 inner_mp = mp; 2346 ipha = (ipha_t *)(mp->b_rptr + outer_hlen); 2347 } 2348 switch (IPH_HDR_VERSION(ipha)) { 2349 case IPV4_VERSION: 2350 if (inner_mp->b_wptr - (uint8_t *)ipha < sizeof (ipha_t)) 2351 return (0); 2352 *inner4 = ipha; 2353 *inner6 = NULL; 2354 break; 2355 case IPV6_VERSION: 2356 if (inner_mp->b_wptr - (uint8_t *)ipha < sizeof (ip6_t)) 2357 return (0); 2358 *inner4 = NULL; 2359 *inner6 = (ip6_t *)ipha; 2360 break; 2361 default: 2362 return (0); 2363 } 2364 2365 return (outer_hlen); 2366 } 2367 2368 /* 2369 * Received ICMP error in response to an X over IPv4 packet that we 2370 * transmitted. 2371 * 2372 * NOTE: "outer" refers to what's inside the ICMP payload. We will get one of 2373 * the following: 2374 * 2375 * [IPv4(0)][ICMPv4][IPv4(1)][IPv4(2)][ULP] 2376 * 2377 * or 2378 * 2379 * [IPv4(0)][ICMPv4][IPv4(1)][IPv6][ULP] 2380 * 2381 * And "outer4" will get set to IPv4(1), and inner[46] will correspond to 2382 * whatever the very-inner packet is (IPv4(2) or IPv6). 2383 */ 2384 static void 2385 iptun_input_icmp_v4(iptun_t *iptun, mblk_t *data_mp, icmph_t *icmph, 2386 ip_recv_attr_t *ira) 2387 { 2388 uint8_t *orig; 2389 ipha_t *outer4, *inner4; 2390 ip6_t *outer6, *inner6; 2391 int outer_hlen; 2392 uint8_t type, code; 2393 2394 ASSERT(data_mp->b_cont == NULL); 2395 /* 2396 * Temporarily move b_rptr forward so that iptun_find_headers() can 2397 * find headers in the ICMP packet payload. 2398 */ 2399 orig = data_mp->b_rptr; 2400 data_mp->b_rptr = (uint8_t *)(icmph + 1); 2401 /* 2402 * The ip module ensures that ICMP errors contain at least the 2403 * original IP header (otherwise, the error would never have made it 2404 * here). 2405 */ 2406 ASSERT(MBLKL(data_mp) >= 0); 2407 outer_hlen = iptun_find_headers(data_mp, 0, &outer4, &inner4, &outer6, 2408 &inner6); 2409 ASSERT(outer6 == NULL); 2410 data_mp->b_rptr = orig; 2411 if (outer_hlen == 0) { 2412 iptun_drop_pkt(data_mp, &iptun->iptun_ierrors); 2413 return; 2414 } 2415 2416 /* Only ICMP errors due to tunneled packets should reach here. */ 2417 ASSERT(outer4->ipha_protocol == IPPROTO_ENCAP || 2418 outer4->ipha_protocol == IPPROTO_IPV6); 2419 2420 data_mp = ipsec_tun_inbound(ira, data_mp, iptun->iptun_itp, 2421 inner4, inner6, outer4, outer6, -outer_hlen, iptun->iptun_ns); 2422 if (data_mp == NULL) { 2423 /* Callee did all of the freeing. */ 2424 atomic_inc_64(&iptun->iptun_ierrors); 2425 return; 2426 } 2427 /* We should never see reassembled fragment here. */ 2428 ASSERT(data_mp->b_next == NULL); 2429 2430 data_mp->b_rptr = (uint8_t *)outer4 + outer_hlen; 2431 2432 /* 2433 * If the original packet being transmitted was itself an ICMP error, 2434 * then drop this packet. We don't want to generate an ICMP error in 2435 * response to an ICMP error. 2436 */ 2437 if (is_icmp_error(data_mp, inner4, inner6)) { 2438 iptun_drop_pkt(data_mp, &iptun->iptun_norcvbuf); 2439 return; 2440 } 2441 2442 switch (icmph->icmph_type) { 2443 case ICMP_DEST_UNREACHABLE: 2444 type = (inner4 != NULL ? icmph->icmph_type : ICMP6_DST_UNREACH); 2445 switch (icmph->icmph_code) { 2446 case ICMP_FRAGMENTATION_NEEDED: { 2447 uint32_t newmtu; 2448 2449 /* 2450 * We reconcile this with the fact that the tunnel may 2451 * also have IPsec policy by letting iptun_update_mtu 2452 * take care of it. 2453 */ 2454 newmtu = iptun_update_mtu(iptun, NULL, 2455 ntohs(icmph->icmph_du_mtu)); 2456 2457 if (inner4 != NULL) { 2458 iptun_icmp_fragneeded_v4(iptun, newmtu, inner4, 2459 data_mp, ira->ira_tsl); 2460 } else { 2461 iptun_icmp_toobig_v6(iptun, newmtu, inner6, 2462 data_mp, ira->ira_tsl); 2463 } 2464 return; 2465 } 2466 case ICMP_DEST_NET_UNREACH_ADMIN: 2467 case ICMP_DEST_HOST_UNREACH_ADMIN: 2468 code = (inner4 != NULL ? ICMP_DEST_NET_UNREACH_ADMIN : 2469 ICMP6_DST_UNREACH_ADMIN); 2470 break; 2471 default: 2472 code = (inner4 != NULL ? ICMP_HOST_UNREACHABLE : 2473 ICMP6_DST_UNREACH_ADDR); 2474 break; 2475 } 2476 break; 2477 case ICMP_TIME_EXCEEDED: 2478 if (inner6 != NULL) { 2479 type = ICMP6_TIME_EXCEEDED; 2480 code = 0; 2481 } /* else we're already set. */ 2482 break; 2483 case ICMP_PARAM_PROBLEM: 2484 /* 2485 * This is a problem with the outer header we transmitted. 2486 * Treat this as an output error. 2487 */ 2488 iptun_drop_pkt(data_mp, &iptun->iptun_oerrors); 2489 return; 2490 default: 2491 iptun_drop_pkt(data_mp, &iptun->iptun_norcvbuf); 2492 return; 2493 } 2494 2495 if (inner4 != NULL) { 2496 iptun_icmp_error_v4(iptun, inner4, data_mp, type, code, 2497 ira->ira_tsl); 2498 } else { 2499 iptun_icmp_error_v6(iptun, inner6, data_mp, type, code, 0, 2500 ira->ira_tsl); 2501 } 2502 } 2503 2504 /* 2505 * Return B_TRUE if the IPv6 packet pointed to by ip6h contains a Tunnel 2506 * Encapsulation Limit destination option. If there is one, set encaplim_ptr 2507 * to point to the option value. 2508 */ 2509 static boolean_t 2510 iptun_find_encaplimit(mblk_t *mp, ip6_t *ip6h, uint8_t **encaplim_ptr) 2511 { 2512 ip_pkt_t pkt; 2513 uint8_t *endptr; 2514 ip6_dest_t *destp; 2515 struct ip6_opt *optp; 2516 2517 pkt.ipp_fields = 0; /* must be initialized */ 2518 (void) ip_find_hdr_v6(mp, ip6h, B_FALSE, &pkt, NULL); 2519 if ((pkt.ipp_fields & IPPF_DSTOPTS) != 0) { 2520 destp = pkt.ipp_dstopts; 2521 } else if ((pkt.ipp_fields & IPPF_RTHDRDSTOPTS) != 0) { 2522 destp = pkt.ipp_rthdrdstopts; 2523 } else { 2524 return (B_FALSE); 2525 } 2526 2527 endptr = (uint8_t *)destp + 8 * (destp->ip6d_len + 1); 2528 optp = (struct ip6_opt *)(destp + 1); 2529 while (endptr - (uint8_t *)optp > sizeof (*optp)) { 2530 if (optp->ip6o_type == IP6OPT_TUNNEL_LIMIT) { 2531 if ((uint8_t *)(optp + 1) >= endptr) 2532 return (B_FALSE); 2533 *encaplim_ptr = (uint8_t *)&optp[1]; 2534 return (B_TRUE); 2535 } 2536 optp = (struct ip6_opt *)((uint8_t *)optp + optp->ip6o_len + 2); 2537 } 2538 return (B_FALSE); 2539 } 2540 2541 /* 2542 * Received ICMPv6 error in response to an X over IPv6 packet that we 2543 * transmitted. 2544 * 2545 * NOTE: "outer" refers to what's inside the ICMP payload. We will get one of 2546 * the following: 2547 * 2548 * [IPv6(0)][ICMPv6][IPv6(1)][IPv4][ULP] 2549 * 2550 * or 2551 * 2552 * [IPv6(0)][ICMPv6][IPv6(1)][IPv6(2)][ULP] 2553 * 2554 * And "outer6" will get set to IPv6(1), and inner[46] will correspond to 2555 * whatever the very-inner packet is (IPv4 or IPv6(2)). 2556 */ 2557 static void 2558 iptun_input_icmp_v6(iptun_t *iptun, mblk_t *data_mp, icmp6_t *icmp6h, 2559 ip_recv_attr_t *ira) 2560 { 2561 uint8_t *orig; 2562 ipha_t *outer4, *inner4; 2563 ip6_t *outer6, *inner6; 2564 int outer_hlen; 2565 uint8_t type, code; 2566 2567 ASSERT(data_mp->b_cont == NULL); 2568 2569 /* 2570 * Temporarily move b_rptr forward so that iptun_find_headers() can 2571 * find IP headers in the ICMP packet payload. 2572 */ 2573 orig = data_mp->b_rptr; 2574 data_mp->b_rptr = (uint8_t *)(icmp6h + 1); 2575 /* 2576 * The ip module ensures that ICMP errors contain at least the 2577 * original IP header (otherwise, the error would never have made it 2578 * here). 2579 */ 2580 ASSERT(MBLKL(data_mp) >= 0); 2581 outer_hlen = iptun_find_headers(data_mp, 0, &outer4, &inner4, &outer6, 2582 &inner6); 2583 ASSERT(outer4 == NULL); 2584 data_mp->b_rptr = orig; /* Restore r_ptr */ 2585 if (outer_hlen == 0) { 2586 iptun_drop_pkt(data_mp, &iptun->iptun_ierrors); 2587 return; 2588 } 2589 2590 data_mp = ipsec_tun_inbound(ira, data_mp, iptun->iptun_itp, 2591 inner4, inner6, outer4, outer6, -outer_hlen, iptun->iptun_ns); 2592 if (data_mp == NULL) { 2593 /* Callee did all of the freeing. */ 2594 atomic_inc_64(&iptun->iptun_ierrors); 2595 return; 2596 } 2597 /* We should never see reassembled fragment here. */ 2598 ASSERT(data_mp->b_next == NULL); 2599 2600 data_mp->b_rptr = (uint8_t *)outer6 + outer_hlen; 2601 2602 /* 2603 * If the original packet being transmitted was itself an ICMP error, 2604 * then drop this packet. We don't want to generate an ICMP error in 2605 * response to an ICMP error. 2606 */ 2607 if (is_icmp_error(data_mp, inner4, inner6)) { 2608 iptun_drop_pkt(data_mp, &iptun->iptun_norcvbuf); 2609 return; 2610 } 2611 2612 switch (icmp6h->icmp6_type) { 2613 case ICMP6_PARAM_PROB: { 2614 uint8_t *encaplim_ptr; 2615 2616 /* 2617 * If the ICMPv6 error points to a valid Tunnel Encapsulation 2618 * Limit option and the limit value is 0, then fall through 2619 * and send a host unreachable message. Otherwise, treat the 2620 * error as an output error, as there must have been a problem 2621 * with a packet we sent. 2622 */ 2623 if (!iptun_find_encaplimit(data_mp, outer6, &encaplim_ptr) || 2624 (icmp6h->icmp6_pptr != 2625 ((ptrdiff_t)encaplim_ptr - (ptrdiff_t)outer6)) || 2626 *encaplim_ptr != 0) { 2627 iptun_drop_pkt(data_mp, &iptun->iptun_oerrors); 2628 return; 2629 } 2630 /* FALLTHRU */ 2631 } 2632 case ICMP6_TIME_EXCEEDED: 2633 case ICMP6_DST_UNREACH: 2634 type = (inner4 != NULL ? ICMP_DEST_UNREACHABLE : 2635 ICMP6_DST_UNREACH); 2636 code = (inner4 != NULL ? ICMP_HOST_UNREACHABLE : 2637 ICMP6_DST_UNREACH_ADDR); 2638 break; 2639 case ICMP6_PACKET_TOO_BIG: { 2640 uint32_t newmtu; 2641 2642 /* 2643 * We reconcile this with the fact that the tunnel may also 2644 * have IPsec policy by letting iptun_update_mtu take care of 2645 * it. 2646 */ 2647 newmtu = iptun_update_mtu(iptun, NULL, 2648 ntohl(icmp6h->icmp6_mtu)); 2649 2650 if (inner4 != NULL) { 2651 iptun_icmp_fragneeded_v4(iptun, newmtu, inner4, 2652 data_mp, ira->ira_tsl); 2653 } else { 2654 iptun_icmp_toobig_v6(iptun, newmtu, inner6, data_mp, 2655 ira->ira_tsl); 2656 } 2657 return; 2658 } 2659 default: 2660 iptun_drop_pkt(data_mp, &iptun->iptun_norcvbuf); 2661 return; 2662 } 2663 2664 if (inner4 != NULL) { 2665 iptun_icmp_error_v4(iptun, inner4, data_mp, type, code, 2666 ira->ira_tsl); 2667 } else { 2668 iptun_icmp_error_v6(iptun, inner6, data_mp, type, code, 0, 2669 ira->ira_tsl); 2670 } 2671 } 2672 2673 /* 2674 * Called as conn_recvicmp from IP for ICMP errors. 2675 */ 2676 /* ARGSUSED2 */ 2677 static void 2678 iptun_input_icmp(void *arg, mblk_t *mp, void *arg2, ip_recv_attr_t *ira) 2679 { 2680 conn_t *connp = arg; 2681 iptun_t *iptun = connp->conn_iptun; 2682 mblk_t *tmpmp; 2683 size_t hlen; 2684 2685 ASSERT(IPCL_IS_IPTUN(connp)); 2686 2687 if (mp->b_cont != NULL) { 2688 /* 2689 * Since ICMP error processing necessitates access to bits 2690 * that are within the ICMP error payload (the original packet 2691 * that caused the error), pull everything up into a single 2692 * block for convenience. 2693 */ 2694 if ((tmpmp = msgpullup(mp, -1)) == NULL) { 2695 iptun_drop_pkt(mp, &iptun->iptun_norcvbuf); 2696 return; 2697 } 2698 freemsg(mp); 2699 mp = tmpmp; 2700 } 2701 2702 hlen = ira->ira_ip_hdr_length; 2703 switch (iptun->iptun_typeinfo->iti_ipvers) { 2704 case IPV4_VERSION: 2705 /* 2706 * The outer IP header coming up from IP is always ipha_t 2707 * alligned (otherwise, we would have crashed in ip). 2708 */ 2709 iptun_input_icmp_v4(iptun, mp, (icmph_t *)(mp->b_rptr + hlen), 2710 ira); 2711 break; 2712 case IPV6_VERSION: 2713 iptun_input_icmp_v6(iptun, mp, (icmp6_t *)(mp->b_rptr + hlen), 2714 ira); 2715 break; 2716 } 2717 } 2718 2719 static boolean_t 2720 iptun_in_6to4_ok(iptun_t *iptun, ipha_t *outer4, ip6_t *inner6) 2721 { 2722 ipaddr_t v4addr; 2723 2724 /* 2725 * It's possible that someone sent us an IPv4-in-IPv4 packet with the 2726 * IPv4 address of a 6to4 tunnel as the destination. 2727 */ 2728 if (inner6 == NULL) 2729 return (B_FALSE); 2730 2731 /* 2732 * Make sure that the IPv6 destination is within the site that this 2733 * 6to4 tunnel is routing for. We don't want people bouncing random 2734 * tunneled IPv6 packets through this 6to4 router. 2735 */ 2736 IN6_6TO4_TO_V4ADDR(&inner6->ip6_dst, (struct in_addr *)&v4addr); 2737 if (outer4->ipha_dst != v4addr) 2738 return (B_FALSE); 2739 2740 if (IN6_IS_ADDR_6TO4(&inner6->ip6_src)) { 2741 /* 2742 * Section 9 of RFC 3056 (security considerations) suggests 2743 * that when a packet is from a 6to4 site (i.e., it's not a 2744 * global address being forwarded froma relay router), make 2745 * sure that the packet was tunneled by that site's 6to4 2746 * router. 2747 */ 2748 IN6_6TO4_TO_V4ADDR(&inner6->ip6_src, (struct in_addr *)&v4addr); 2749 if (outer4->ipha_src != v4addr) 2750 return (B_FALSE); 2751 } else { 2752 /* 2753 * Only accept packets from a relay router if we've configured 2754 * outbound relay router functionality. 2755 */ 2756 if (iptun->iptun_iptuns->iptuns_relay_rtr_addr == INADDR_ANY) 2757 return (B_FALSE); 2758 } 2759 2760 return (B_TRUE); 2761 } 2762 2763 /* 2764 * Input function for everything that comes up from the ip module below us. 2765 * This is called directly from the ip module via connp->conn_recv(). 2766 * 2767 * We receive M_DATA messages with IP-in-IP tunneled packets. 2768 */ 2769 /* ARGSUSED2 */ 2770 static void 2771 iptun_input(void *arg, mblk_t *data_mp, void *arg2, ip_recv_attr_t *ira) 2772 { 2773 conn_t *connp = arg; 2774 iptun_t *iptun = connp->conn_iptun; 2775 int outer_hlen; 2776 ipha_t *outer4, *inner4; 2777 ip6_t *outer6, *inner6; 2778 2779 ASSERT(IPCL_IS_IPTUN(connp)); 2780 ASSERT(DB_TYPE(data_mp) == M_DATA); 2781 2782 outer_hlen = iptun_find_headers(data_mp, ira->ira_ip_hdr_length, 2783 &outer4, &inner4, &outer6, &inner6); 2784 if (outer_hlen == 0) 2785 goto drop; 2786 2787 /* 2788 * If the system is labeled, we call tsol_check_dest() on the packet 2789 * destination (our local tunnel address) to ensure that the packet as 2790 * labeled should be allowed to be sent to us. We don't need to call 2791 * the more involved tsol_receive_local() since the tunnel link itself 2792 * cannot be assigned to shared-stack non-global zones. 2793 */ 2794 if (ira->ira_flags & IRAF_SYSTEM_LABELED) { 2795 if (ira->ira_tsl == NULL) 2796 goto drop; 2797 if (tsol_check_dest(ira->ira_tsl, (outer4 != NULL ? 2798 (void *)&outer4->ipha_dst : (void *)&outer6->ip6_dst), 2799 (outer4 != NULL ? IPV4_VERSION : IPV6_VERSION), 2800 CONN_MAC_DEFAULT, B_FALSE, NULL) != 0) 2801 goto drop; 2802 } 2803 2804 data_mp = ipsec_tun_inbound(ira, data_mp, iptun->iptun_itp, 2805 inner4, inner6, outer4, outer6, outer_hlen, iptun->iptun_ns); 2806 if (data_mp == NULL) { 2807 /* Callee did all of the freeing. */ 2808 return; 2809 } 2810 2811 if (iptun->iptun_typeinfo->iti_type == IPTUN_TYPE_6TO4 && 2812 !iptun_in_6to4_ok(iptun, outer4, inner6)) 2813 goto drop; 2814 2815 /* 2816 * We need to statistically account for each packet individually, so 2817 * we might as well split up any b_next chains here. 2818 */ 2819 do { 2820 mblk_t *mp; 2821 2822 mp = data_mp->b_next; 2823 data_mp->b_next = NULL; 2824 2825 atomic_inc_64(&iptun->iptun_ipackets); 2826 atomic_add_64(&iptun->iptun_rbytes, msgdsize(data_mp)); 2827 mac_rx(iptun->iptun_mh, NULL, data_mp); 2828 2829 data_mp = mp; 2830 } while (data_mp != NULL); 2831 return; 2832 drop: 2833 iptun_drop_pkt(data_mp, &iptun->iptun_ierrors); 2834 } 2835 2836 /* 2837 * Do 6to4-specific header-processing on output. Return B_TRUE if the packet 2838 * was processed without issue, or B_FALSE if the packet had issues and should 2839 * be dropped. 2840 */ 2841 static boolean_t 2842 iptun_out_process_6to4(iptun_t *iptun, ipha_t *outer4, ip6_t *inner6) 2843 { 2844 ipaddr_t v4addr; 2845 2846 /* 2847 * IPv6 source must be a 6to4 address. This is because a conscious 2848 * decision was made to not allow a Solaris system to be used as a 2849 * relay router (for security reasons) when 6to4 was initially 2850 * integrated. If this decision is ever reversed, the following check 2851 * can be removed. 2852 */ 2853 if (!IN6_IS_ADDR_6TO4(&inner6->ip6_src)) 2854 return (B_FALSE); 2855 2856 /* 2857 * RFC3056 mandates that the IPv4 source MUST be set to the IPv4 2858 * portion of the 6to4 IPv6 source address. In other words, make sure 2859 * that we're tunneling packets from our own 6to4 site. 2860 */ 2861 IN6_6TO4_TO_V4ADDR(&inner6->ip6_src, (struct in_addr *)&v4addr); 2862 if (outer4->ipha_src != v4addr) 2863 return (B_FALSE); 2864 2865 /* 2866 * Automatically set the destination of the outer IPv4 header as 2867 * described in RFC3056. There are two possibilities: 2868 * 2869 * a. If the IPv6 destination is a 6to4 address, set the IPv4 address 2870 * to the IPv4 portion of the 6to4 address. 2871 * b. If the IPv6 destination is a native IPv6 address, set the IPv4 2872 * destination to the address of a relay router. 2873 * 2874 * Design Note: b shouldn't be necessary here, and this is a flaw in 2875 * the design of the 6to4relay command. Instead of setting a 6to4 2876 * relay address in this module via an ioctl, the 6to4relay command 2877 * could simply add a IPv6 route for native IPv6 addresses (such as a 2878 * default route) in the forwarding table that uses a 6to4 destination 2879 * as its next hop, and the IPv4 portion of that address could be a 2880 * 6to4 relay address. In order for this to work, IP would have to 2881 * resolve the next hop address, which would necessitate a link-layer 2882 * address resolver for 6to4 links, which doesn't exist today. 2883 * 2884 * In fact, if a resolver existed for 6to4 links, then setting the 2885 * IPv4 destination in the outer header could be done as part of 2886 * link-layer address resolution and fast-path header generation, and 2887 * not here. 2888 */ 2889 if (IN6_IS_ADDR_6TO4(&inner6->ip6_dst)) { 2890 /* destination is a 6to4 router */ 2891 IN6_6TO4_TO_V4ADDR(&inner6->ip6_dst, 2892 (struct in_addr *)&outer4->ipha_dst); 2893 2894 /* Reject attempts to send to INADDR_ANY */ 2895 if (outer4->ipha_dst == INADDR_ANY) 2896 return (B_FALSE); 2897 } else { 2898 /* 2899 * The destination is a native IPv6 address. If output to a 2900 * relay-router is enabled, use the relay-router's IPv4 2901 * address as the destination. 2902 */ 2903 if (iptun->iptun_iptuns->iptuns_relay_rtr_addr == INADDR_ANY) 2904 return (B_FALSE); 2905 outer4->ipha_dst = iptun->iptun_iptuns->iptuns_relay_rtr_addr; 2906 } 2907 2908 /* 2909 * If the outer source and destination are equal, this means that the 2910 * 6to4 router somehow forwarded an IPv6 packet destined for its own 2911 * 6to4 site to its 6to4 tunnel interface, which will result in this 2912 * packet infinitely bouncing between ip and iptun. 2913 */ 2914 return (outer4->ipha_src != outer4->ipha_dst); 2915 } 2916 2917 /* 2918 * Process output packets with outer IPv4 headers. Frees mp and bumps stat on 2919 * error. 2920 */ 2921 static mblk_t * 2922 iptun_out_process_ipv4(iptun_t *iptun, mblk_t *mp, ipha_t *outer4, 2923 ipha_t *inner4, ip6_t *inner6, ip_xmit_attr_t *ixa) 2924 { 2925 uint8_t *innerptr = (inner4 != NULL ? 2926 (uint8_t *)inner4 : (uint8_t *)inner6); 2927 size_t minmtu = iptun->iptun_typeinfo->iti_minmtu; 2928 2929 if (inner4 != NULL) { 2930 ASSERT(outer4->ipha_protocol == IPPROTO_ENCAP); 2931 /* 2932 * Copy the tos from the inner IPv4 header. We mask off ECN 2933 * bits (bits 6 and 7) because there is currently no 2934 * tunnel-tunnel communication to determine if both sides 2935 * support ECN. We opt for the safe choice: don't copy the 2936 * ECN bits when doing encapsulation. 2937 */ 2938 outer4->ipha_type_of_service = 2939 inner4->ipha_type_of_service & ~0x03; 2940 } else { 2941 ASSERT(outer4->ipha_protocol == IPPROTO_IPV6 && 2942 inner6 != NULL); 2943 } 2944 if (ixa->ixa_flags & IXAF_PMTU_IPV4_DF) 2945 outer4->ipha_fragment_offset_and_flags |= IPH_DF_HTONS; 2946 else 2947 outer4->ipha_fragment_offset_and_flags &= ~IPH_DF_HTONS; 2948 2949 /* 2950 * As described in section 3.2.2 of RFC4213, if the packet payload is 2951 * less than or equal to the minimum MTU size, then we need to allow 2952 * IPv4 to fragment the packet. The reason is that even if we end up 2953 * receiving an ICMP frag-needed, the interface above this tunnel 2954 * won't be allowed to drop its MTU as a result, since the packet was 2955 * already smaller than the smallest allowable MTU for that interface. 2956 */ 2957 if (mp->b_wptr - innerptr <= minmtu) { 2958 outer4->ipha_fragment_offset_and_flags = 0; 2959 ixa->ixa_flags &= ~IXAF_DONTFRAG; 2960 } else if (!(ixa->ixa_flags & IXAF_PMTU_TOO_SMALL) && 2961 (iptun->iptun_typeinfo->iti_type != IPTUN_TYPE_6TO4)) { 2962 ixa->ixa_flags |= IXAF_DONTFRAG; 2963 } 2964 2965 ixa->ixa_ip_hdr_length = IPH_HDR_LENGTH(outer4); 2966 ixa->ixa_pktlen = msgdsize(mp); 2967 ixa->ixa_protocol = outer4->ipha_protocol; 2968 2969 outer4->ipha_length = htons(ixa->ixa_pktlen); 2970 return (mp); 2971 } 2972 2973 /* 2974 * Insert an encapsulation limit destination option in the packet provided. 2975 * Always consumes the mp argument and returns a new mblk pointer. 2976 */ 2977 static mblk_t * 2978 iptun_insert_encaplimit(iptun_t *iptun, mblk_t *mp, ip6_t *outer6, 2979 uint8_t limit) 2980 { 2981 mblk_t *newmp; 2982 iptun_ipv6hdrs_t *newouter6; 2983 2984 ASSERT(outer6->ip6_nxt == IPPROTO_IPV6); 2985 ASSERT(mp->b_cont == NULL); 2986 2987 mp->b_rptr += sizeof (ip6_t); 2988 newmp = allocb(sizeof (iptun_ipv6hdrs_t) + MBLKL(mp), BPRI_MED); 2989 if (newmp == NULL) { 2990 iptun_drop_pkt(mp, &iptun->iptun_noxmtbuf); 2991 return (NULL); 2992 } 2993 newmp->b_wptr += sizeof (iptun_ipv6hdrs_t); 2994 /* Copy the payload (Starting with the inner IPv6 header). */ 2995 bcopy(mp->b_rptr, newmp->b_wptr, MBLKL(mp)); 2996 newmp->b_wptr += MBLKL(mp); 2997 newouter6 = (iptun_ipv6hdrs_t *)newmp->b_rptr; 2998 /* Now copy the outer IPv6 header. */ 2999 bcopy(outer6, &newouter6->it6h_ip6h, sizeof (ip6_t)); 3000 newouter6->it6h_ip6h.ip6_nxt = IPPROTO_DSTOPTS; 3001 newouter6->it6h_encaplim = iptun_encaplim_init; 3002 newouter6->it6h_encaplim.iel_destopt.ip6d_nxt = outer6->ip6_nxt; 3003 newouter6->it6h_encaplim.iel_telopt.ip6ot_encap_limit = limit; 3004 3005 /* 3006 * The payload length will be set at the end of 3007 * iptun_out_process_ipv6(). 3008 */ 3009 3010 freemsg(mp); 3011 return (newmp); 3012 } 3013 3014 /* 3015 * Process output packets with outer IPv6 headers. Frees mp and bumps stats 3016 * on error. 3017 */ 3018 static mblk_t * 3019 iptun_out_process_ipv6(iptun_t *iptun, mblk_t *mp, ip6_t *outer6, 3020 ipha_t *inner4, ip6_t *inner6, ip_xmit_attr_t *ixa) 3021 { 3022 uint8_t *innerptr = (inner4 != NULL ? 3023 (uint8_t *)inner4 : (uint8_t *)inner6); 3024 size_t minmtu = iptun->iptun_typeinfo->iti_minmtu; 3025 uint8_t *limit, *configlimit; 3026 uint32_t offset; 3027 iptun_ipv6hdrs_t *v6hdrs; 3028 3029 if (inner6 != NULL && iptun_find_encaplimit(mp, inner6, &limit)) { 3030 /* 3031 * The inner packet is an IPv6 packet which itself contains an 3032 * encapsulation limit option. The limit variable points to 3033 * the value in the embedded option. Process the 3034 * encapsulation limit option as specified in RFC 2473. 3035 * 3036 * If limit is 0, then we've exceeded the limit and we need to 3037 * send back an ICMPv6 parameter problem message. 3038 * 3039 * If limit is > 0, then we decrement it by 1 and make sure 3040 * that the encapsulation limit option in the outer header 3041 * reflects that (adding an option if one isn't already 3042 * there). 3043 */ 3044 ASSERT(limit > mp->b_rptr && limit < mp->b_wptr); 3045 if (*limit == 0) { 3046 mp->b_rptr = (uint8_t *)inner6; 3047 offset = limit - mp->b_rptr; 3048 iptun_icmp_error_v6(iptun, inner6, mp, ICMP6_PARAM_PROB, 3049 0, offset, ixa->ixa_tsl); 3050 atomic_inc_64(&iptun->iptun_noxmtbuf); 3051 return (NULL); 3052 } 3053 3054 /* 3055 * The outer header requires an encapsulation limit option. 3056 * If there isn't one already, add one. 3057 */ 3058 if (iptun->iptun_encaplimit == 0) { 3059 if ((mp = iptun_insert_encaplimit(iptun, mp, outer6, 3060 (*limit - 1))) == NULL) 3061 return (NULL); 3062 v6hdrs = (iptun_ipv6hdrs_t *)mp->b_rptr; 3063 } else { 3064 /* 3065 * There is an existing encapsulation limit option in 3066 * the outer header. If the inner encapsulation limit 3067 * is less than the configured encapsulation limit, 3068 * update the outer encapsulation limit to reflect 3069 * this lesser value. 3070 */ 3071 v6hdrs = (iptun_ipv6hdrs_t *)mp->b_rptr; 3072 configlimit = 3073 &v6hdrs->it6h_encaplim.iel_telopt.ip6ot_encap_limit; 3074 if ((*limit - 1) < *configlimit) 3075 *configlimit = (*limit - 1); 3076 } 3077 ixa->ixa_ip_hdr_length = sizeof (iptun_ipv6hdrs_t); 3078 ixa->ixa_protocol = v6hdrs->it6h_encaplim.iel_destopt.ip6d_nxt; 3079 } else { 3080 ixa->ixa_ip_hdr_length = sizeof (ip6_t); 3081 ixa->ixa_protocol = outer6->ip6_nxt; 3082 } 3083 /* 3084 * See iptun_output_process_ipv4() why we allow fragmentation for 3085 * small packets 3086 */ 3087 if (mp->b_wptr - innerptr <= minmtu) 3088 ixa->ixa_flags &= ~IXAF_DONTFRAG; 3089 else if (!(ixa->ixa_flags & IXAF_PMTU_TOO_SMALL)) 3090 ixa->ixa_flags |= IXAF_DONTFRAG; 3091 3092 ixa->ixa_pktlen = msgdsize(mp); 3093 outer6->ip6_plen = htons(ixa->ixa_pktlen - sizeof (ip6_t)); 3094 return (mp); 3095 } 3096 3097 /* 3098 * The IP tunneling MAC-type plugins have already done most of the header 3099 * processing and validity checks. We are simply responsible for multiplexing 3100 * down to the ip module below us. 3101 */ 3102 static void 3103 iptun_output(iptun_t *iptun, mblk_t *mp) 3104 { 3105 conn_t *connp = iptun->iptun_connp; 3106 mblk_t *newmp; 3107 int error; 3108 ip_xmit_attr_t *ixa; 3109 3110 ASSERT(mp->b_datap->db_type == M_DATA); 3111 3112 if (mp->b_cont != NULL) { 3113 if ((newmp = msgpullup(mp, -1)) == NULL) { 3114 iptun_drop_pkt(mp, &iptun->iptun_noxmtbuf); 3115 return; 3116 } 3117 freemsg(mp); 3118 mp = newmp; 3119 } 3120 3121 if (iptun->iptun_typeinfo->iti_type == IPTUN_TYPE_6TO4) { 3122 iptun_output_6to4(iptun, mp); 3123 return; 3124 } 3125 3126 if (is_system_labeled()) { 3127 /* 3128 * Since the label can be different meaning a potentially 3129 * different IRE,we always use a unique ip_xmit_attr_t. 3130 */ 3131 ixa = conn_get_ixa_exclusive(connp); 3132 } else { 3133 /* 3134 * If no other thread is using conn_ixa this just gets a 3135 * reference to conn_ixa. Otherwise we get a safe copy of 3136 * conn_ixa. 3137 */ 3138 ixa = conn_get_ixa(connp, B_FALSE); 3139 } 3140 if (ixa == NULL) { 3141 iptun_drop_pkt(mp, &iptun->iptun_oerrors); 3142 return; 3143 } 3144 3145 /* 3146 * In case we got a safe copy of conn_ixa, then we need 3147 * to fill in any pointers in it. 3148 */ 3149 if (ixa->ixa_ire == NULL) { 3150 error = ip_attr_connect(connp, ixa, &connp->conn_saddr_v6, 3151 &connp->conn_faddr_v6, &connp->conn_faddr_v6, 0, 3152 NULL, NULL, 0); 3153 if (error != 0) { 3154 if (ixa->ixa_ire != NULL && 3155 (error == EHOSTUNREACH || error == ENETUNREACH)) { 3156 /* 3157 * Let conn_ip_output/ire_send_noroute return 3158 * the error and send any local ICMP error. 3159 */ 3160 error = 0; 3161 } else { 3162 ixa_refrele(ixa); 3163 iptun_drop_pkt(mp, &iptun->iptun_oerrors); 3164 return; 3165 } 3166 } 3167 } 3168 3169 iptun_output_common(iptun, ixa, mp); 3170 ixa_refrele(ixa); 3171 } 3172 3173 /* 3174 * We use an ixa based on the last destination. 3175 */ 3176 static void 3177 iptun_output_6to4(iptun_t *iptun, mblk_t *mp) 3178 { 3179 conn_t *connp = iptun->iptun_connp; 3180 ipha_t *outer4, *inner4; 3181 ip6_t *outer6, *inner6; 3182 ip_xmit_attr_t *ixa; 3183 ip_xmit_attr_t *oldixa; 3184 int error; 3185 boolean_t need_connect; 3186 in6_addr_t v6dst; 3187 3188 ASSERT(mp->b_cont == NULL); /* Verified by iptun_output */ 3189 3190 /* Make sure we set ipha_dst before we look at ipha_dst */ 3191 3192 (void) iptun_find_headers(mp, 0, &outer4, &inner4, &outer6, &inner6); 3193 ASSERT(outer4 != NULL); 3194 if (!iptun_out_process_6to4(iptun, outer4, inner6)) { 3195 iptun_drop_pkt(mp, &iptun->iptun_oerrors); 3196 return; 3197 } 3198 3199 if (is_system_labeled()) { 3200 /* 3201 * Since the label can be different meaning a potentially 3202 * different IRE,we always use a unique ip_xmit_attr_t. 3203 */ 3204 ixa = conn_get_ixa_exclusive(connp); 3205 } else { 3206 /* 3207 * If no other thread is using conn_ixa this just gets a 3208 * reference to conn_ixa. Otherwise we get a safe copy of 3209 * conn_ixa. 3210 */ 3211 ixa = conn_get_ixa(connp, B_FALSE); 3212 } 3213 if (ixa == NULL) { 3214 iptun_drop_pkt(mp, &iptun->iptun_oerrors); 3215 return; 3216 } 3217 3218 mutex_enter(&connp->conn_lock); 3219 if (connp->conn_v4lastdst == outer4->ipha_dst) { 3220 need_connect = (ixa->ixa_ire == NULL); 3221 } else { 3222 /* In case previous destination was multirt */ 3223 ip_attr_newdst(ixa); 3224 3225 /* 3226 * We later update conn_ixa when we update conn_v4lastdst 3227 * which enables subsequent packets to avoid redoing 3228 * ip_attr_connect 3229 */ 3230 need_connect = B_TRUE; 3231 } 3232 mutex_exit(&connp->conn_lock); 3233 3234 /* 3235 * In case we got a safe copy of conn_ixa, or otherwise we don't 3236 * have a current ixa_ire, then we need to fill in any pointers in 3237 * the ixa. 3238 */ 3239 if (need_connect) { 3240 IN6_IPADDR_TO_V4MAPPED(outer4->ipha_dst, &v6dst); 3241 3242 /* We handle IPsec in iptun_output_common */ 3243 error = ip_attr_connect(connp, ixa, &connp->conn_saddr_v6, 3244 &v6dst, &v6dst, 0, NULL, NULL, 0); 3245 if (error != 0) { 3246 if (ixa->ixa_ire != NULL && 3247 (error == EHOSTUNREACH || error == ENETUNREACH)) { 3248 /* 3249 * Let conn_ip_output/ire_send_noroute return 3250 * the error and send any local ICMP error. 3251 */ 3252 error = 0; 3253 } else { 3254 ixa_refrele(ixa); 3255 iptun_drop_pkt(mp, &iptun->iptun_oerrors); 3256 return; 3257 } 3258 } 3259 } 3260 3261 iptun_output_common(iptun, ixa, mp); 3262 3263 /* Atomically replace conn_ixa and conn_v4lastdst */ 3264 mutex_enter(&connp->conn_lock); 3265 if (connp->conn_v4lastdst != outer4->ipha_dst) { 3266 /* Remember the dst which corresponds to conn_ixa */ 3267 connp->conn_v6lastdst = v6dst; 3268 oldixa = conn_replace_ixa(connp, ixa); 3269 } else { 3270 oldixa = NULL; 3271 } 3272 mutex_exit(&connp->conn_lock); 3273 ixa_refrele(ixa); 3274 if (oldixa != NULL) 3275 ixa_refrele(oldixa); 3276 } 3277 3278 /* 3279 * Check the destination/label. Modifies *mpp by adding/removing CIPSO. 3280 * 3281 * We get the label from the message in order to honor the 3282 * ULPs/IPs choice of label. This will be NULL for forwarded 3283 * packets, neighbor discovery packets and some others. 3284 */ 3285 static int 3286 iptun_output_check_label(mblk_t **mpp, ip_xmit_attr_t *ixa) 3287 { 3288 cred_t *cr; 3289 int adjust; 3290 int iplen; 3291 int err; 3292 ts_label_t *effective_tsl = NULL; 3293 3294 3295 ASSERT(is_system_labeled()); 3296 3297 cr = msg_getcred(*mpp, NULL); 3298 if (cr == NULL) 3299 return (0); 3300 3301 /* 3302 * We need to start with a label based on the IP/ULP above us 3303 */ 3304 ip_xmit_attr_restore_tsl(ixa, cr); 3305 3306 /* 3307 * Need to update packet with any CIPSO option since 3308 * conn_ip_output doesn't do that. 3309 */ 3310 if (ixa->ixa_flags & IXAF_IS_IPV4) { 3311 ipha_t *ipha; 3312 3313 ipha = (ipha_t *)(*mpp)->b_rptr; 3314 iplen = ntohs(ipha->ipha_length); 3315 err = tsol_check_label_v4(ixa->ixa_tsl, 3316 ixa->ixa_zoneid, mpp, CONN_MAC_DEFAULT, B_FALSE, 3317 ixa->ixa_ipst, &effective_tsl); 3318 if (err != 0) 3319 return (err); 3320 3321 ipha = (ipha_t *)(*mpp)->b_rptr; 3322 adjust = (int)ntohs(ipha->ipha_length) - iplen; 3323 } else { 3324 ip6_t *ip6h; 3325 3326 ip6h = (ip6_t *)(*mpp)->b_rptr; 3327 iplen = ntohs(ip6h->ip6_plen); 3328 3329 err = tsol_check_label_v6(ixa->ixa_tsl, 3330 ixa->ixa_zoneid, mpp, CONN_MAC_DEFAULT, B_FALSE, 3331 ixa->ixa_ipst, &effective_tsl); 3332 if (err != 0) 3333 return (err); 3334 3335 ip6h = (ip6_t *)(*mpp)->b_rptr; 3336 adjust = (int)ntohs(ip6h->ip6_plen) - iplen; 3337 } 3338 3339 if (effective_tsl != NULL) { 3340 /* Update the label */ 3341 ip_xmit_attr_replace_tsl(ixa, effective_tsl); 3342 } 3343 ixa->ixa_pktlen += adjust; 3344 ixa->ixa_ip_hdr_length += adjust; 3345 return (0); 3346 } 3347 3348 3349 static void 3350 iptun_output_common(iptun_t *iptun, ip_xmit_attr_t *ixa, mblk_t *mp) 3351 { 3352 ipsec_tun_pol_t *itp = iptun->iptun_itp; 3353 int outer_hlen; 3354 mblk_t *newmp; 3355 ipha_t *outer4, *inner4; 3356 ip6_t *outer6, *inner6; 3357 int error; 3358 boolean_t update_pktlen; 3359 3360 ASSERT(ixa->ixa_ire != NULL); 3361 3362 outer_hlen = iptun_find_headers(mp, 0, &outer4, &inner4, &outer6, 3363 &inner6); 3364 if (outer_hlen == 0) { 3365 iptun_drop_pkt(mp, &iptun->iptun_oerrors); 3366 return; 3367 } 3368 3369 /* Save IXAF_DONTFRAG value */ 3370 iaflags_t dontfrag = ixa->ixa_flags & IXAF_DONTFRAG; 3371 3372 /* Perform header processing. */ 3373 if (outer4 != NULL) { 3374 mp = iptun_out_process_ipv4(iptun, mp, outer4, inner4, inner6, 3375 ixa); 3376 } else { 3377 mp = iptun_out_process_ipv6(iptun, mp, outer6, inner4, inner6, 3378 ixa); 3379 } 3380 if (mp == NULL) 3381 return; 3382 3383 /* 3384 * Let's hope the compiler optimizes this with "branch taken". 3385 */ 3386 if (itp != NULL && (itp->itp_flags & ITPF_P_ACTIVE)) { 3387 /* This updates the ip_xmit_attr_t */ 3388 mp = ipsec_tun_outbound(mp, iptun, inner4, inner6, outer4, 3389 outer6, outer_hlen, ixa); 3390 if (mp == NULL) { 3391 atomic_inc_64(&iptun->iptun_oerrors); 3392 return; 3393 } 3394 if (is_system_labeled()) { 3395 /* 3396 * Might change the packet by adding/removing CIPSO. 3397 * After this caller inner* and outer* and outer_hlen 3398 * might be invalid. 3399 */ 3400 error = iptun_output_check_label(&mp, ixa); 3401 if (error != 0) { 3402 ip2dbg(("label check failed (%d)\n", error)); 3403 iptun_drop_pkt(mp, &iptun->iptun_oerrors); 3404 return; 3405 } 3406 } 3407 3408 /* 3409 * ipsec_tun_outbound() returns a chain of tunneled IP 3410 * fragments linked with b_next (or a single message if the 3411 * tunneled packet wasn't a fragment). 3412 * If fragcache returned a list then we need to update 3413 * ixa_pktlen for all packets in the list. 3414 */ 3415 update_pktlen = (mp->b_next != NULL); 3416 3417 /* 3418 * Otherwise, we're good to go. The ixa has been updated with 3419 * instructions for outbound IPsec processing. 3420 */ 3421 for (newmp = mp; newmp != NULL; newmp = mp) { 3422 size_t minmtu = iptun->iptun_typeinfo->iti_minmtu; 3423 3424 atomic_inc_64(&iptun->iptun_opackets); 3425 atomic_add_64(&iptun->iptun_obytes, ixa->ixa_pktlen); 3426 mp = mp->b_next; 3427 newmp->b_next = NULL; 3428 3429 /* 3430 * The IXAF_DONTFRAG flag is global, but there is 3431 * a chain here. Check if we're really already 3432 * smaller than the minimum allowed MTU and reset here 3433 * appropriately. Otherwise one small packet can kill 3434 * the whole chain's path mtu discovery. 3435 * In addition, update the pktlen to the length of 3436 * the actual packet being processed. 3437 */ 3438 if (update_pktlen) { 3439 ixa->ixa_pktlen = msgdsize(newmp); 3440 if (ixa->ixa_pktlen <= minmtu) 3441 ixa->ixa_flags &= ~IXAF_DONTFRAG; 3442 } 3443 3444 atomic_inc_64(&iptun->iptun_opackets); 3445 atomic_add_64(&iptun->iptun_obytes, ixa->ixa_pktlen); 3446 3447 error = conn_ip_output(newmp, ixa); 3448 3449 /* Restore IXAF_DONTFRAG value */ 3450 ixa->ixa_flags |= dontfrag; 3451 3452 if (error == EMSGSIZE) { 3453 /* IPsec policy might have changed */ 3454 (void) iptun_update_mtu(iptun, ixa, 0); 3455 } 3456 } 3457 } else { 3458 /* 3459 * The ip module will potentially apply global policy to the 3460 * packet in its output path if there's no active tunnel 3461 * policy. 3462 */ 3463 ASSERT(ixa->ixa_ipsec_policy == NULL); 3464 mp = ip_output_attach_policy(mp, outer4, outer6, NULL, ixa); 3465 if (mp == NULL) { 3466 atomic_inc_64(&iptun->iptun_oerrors); 3467 return; 3468 } 3469 if (is_system_labeled()) { 3470 /* 3471 * Might change the packet by adding/removing CIPSO. 3472 * After this caller inner* and outer* and outer_hlen 3473 * might be invalid. 3474 */ 3475 error = iptun_output_check_label(&mp, ixa); 3476 if (error != 0) { 3477 ip2dbg(("label check failed (%d)\n", error)); 3478 iptun_drop_pkt(mp, &iptun->iptun_oerrors); 3479 return; 3480 } 3481 } 3482 3483 atomic_inc_64(&iptun->iptun_opackets); 3484 atomic_add_64(&iptun->iptun_obytes, ixa->ixa_pktlen); 3485 3486 error = conn_ip_output(mp, ixa); 3487 if (error == EMSGSIZE) { 3488 /* IPsec policy might have changed */ 3489 (void) iptun_update_mtu(iptun, ixa, 0); 3490 } 3491 } 3492 if (ixa->ixa_flags & IXAF_IPSEC_SECURE) 3493 ipsec_out_release_refs(ixa); 3494 } 3495 3496 static mac_callbacks_t iptun_m_callbacks = { 3497 .mc_callbacks = (MC_SETPROP | MC_GETPROP | MC_PROPINFO), 3498 .mc_getstat = iptun_m_getstat, 3499 .mc_start = iptun_m_start, 3500 .mc_stop = iptun_m_stop, 3501 .mc_setpromisc = iptun_m_setpromisc, 3502 .mc_multicst = iptun_m_multicst, 3503 .mc_unicst = iptun_m_unicst, 3504 .mc_tx = iptun_m_tx, 3505 .mc_reserved = NULL, 3506 .mc_setprop = iptun_m_setprop, 3507 .mc_getprop = iptun_m_getprop, 3508 .mc_propinfo = iptun_m_propinfo 3509 };