Print this page
fixup .text where possible
cstyle sort of updates
7127 remove -Wno-missing-braces from Makefile.uts
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/uts/common/inet/iptun/iptun.c
+++ new/usr/src/uts/common/inet/iptun/iptun.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21 /*
22 22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23 23 */
24 24
25 25 /*
26 26 * iptun - IP Tunneling Driver
27 27 *
28 28 * This module is a GLDv3 driver that implements virtual datalinks over IP
29 29 * (a.k.a, IP tunneling). The datalinks are managed through a dld ioctl
30 30 * interface (see iptun_ctl.c), and registered with GLDv3 using
31 31 * mac_register(). It implements the logic for various forms of IP (IPv4 or
32 32 * IPv6) encapsulation within IP (IPv4 or IPv6) by interacting with the ip
33 33 * module below it. Each virtual IP tunnel datalink has a conn_t associated
34 34 * with it representing the "outer" IP connection.
35 35 *
36 36 * The module implements the following locking semantics:
37 37 *
38 38 * Lookups and deletions in iptun_hash are synchronized using iptun_hash_lock.
39 39 * See comments above iptun_hash_lock for details.
40 40 *
41 41 * No locks are ever held while calling up to GLDv3. The general architecture
42 42 * of GLDv3 requires this, as the mac perimeter (essentially a lock) for a
43 43 * given link will be held while making downcalls (iptun_m_*() callbacks).
44 44 * Because we need to hold locks while handling downcalls, holding these locks
45 45 * while issuing upcalls results in deadlock scenarios. See the block comment
46 46 * above iptun_task_cb() for details on how we safely issue upcalls without
47 47 * holding any locks.
48 48 *
49 49 * The contents of each iptun_t is protected by an iptun_mutex which is held
50 50 * in iptun_enter() (called by iptun_enter_by_linkid()), and exited in
51 51 * iptun_exit().
52 52 *
53 53 * See comments in iptun_delete() and iptun_free() for details on how the
54 54 * iptun_t is deleted safely.
55 55 */
56 56
57 57 #include <sys/types.h>
58 58 #include <sys/kmem.h>
59 59 #include <sys/errno.h>
60 60 #include <sys/modhash.h>
61 61 #include <sys/list.h>
62 62 #include <sys/strsun.h>
63 63 #include <sys/file.h>
64 64 #include <sys/systm.h>
65 65 #include <sys/tihdr.h>
66 66 #include <sys/param.h>
67 67 #include <sys/mac_provider.h>
68 68 #include <sys/mac_ipv4.h>
69 69 #include <sys/mac_ipv6.h>
70 70 #include <sys/mac_6to4.h>
71 71 #include <sys/tsol/tnet.h>
72 72 #include <sys/sunldi.h>
73 73 #include <netinet/in.h>
74 74 #include <netinet/ip6.h>
75 75 #include <inet/ip.h>
76 76 #include <inet/ip_ire.h>
77 77 #include <inet/ipsec_impl.h>
78 78 #include <sys/tsol/label.h>
79 79 #include <sys/tsol/tnet.h>
80 80 #include <inet/iptun.h>
81 81 #include "iptun_impl.h"
82 82
83 83 /* Do the tunnel type and address family match? */
84 84 #define IPTUN_ADDR_MATCH(iptun_type, family) \
85 85 ((iptun_type == IPTUN_TYPE_IPV4 && family == AF_INET) || \
86 86 (iptun_type == IPTUN_TYPE_IPV6 && family == AF_INET6) || \
87 87 (iptun_type == IPTUN_TYPE_6TO4 && family == AF_INET))
88 88
89 89 #define IPTUN_HASH_KEY(key) ((mod_hash_key_t)(uintptr_t)(key))
90 90
91 91 #define IPTUN_MIN_IPV4_MTU 576 /* ip.h still uses 68 (!) */
92 92 #define IPTUN_MIN_IPV6_MTU IPV6_MIN_MTU
93 93 #define IPTUN_MAX_IPV4_MTU (IP_MAXPACKET - sizeof (ipha_t))
94 94 #define IPTUN_MAX_IPV6_MTU (IP_MAXPACKET - sizeof (ip6_t) - \
95 95 sizeof (iptun_encaplim_t))
96 96
↓ open down ↓ |
96 lines elided |
↑ open up ↑ |
97 97 #define IPTUN_MIN_HOPLIMIT 1
98 98 #define IPTUN_MAX_HOPLIMIT UINT8_MAX
99 99
100 100 #define IPTUN_MIN_ENCAPLIMIT 0
101 101 #define IPTUN_MAX_ENCAPLIMIT UINT8_MAX
102 102
103 103 #define IPTUN_IPSEC_REQ_MASK (IPSEC_PREF_REQUIRED | IPSEC_PREF_NEVER)
104 104
105 105 static iptun_encaplim_t iptun_encaplim_init = {
106 106 { IPPROTO_NONE, 0 },
107 - IP6OPT_TUNNEL_LIMIT,
108 - 1,
109 - IPTUN_DEFAULT_ENCAPLIMIT, /* filled in with actual value later */
110 - IP6OPT_PADN,
111 - 1,
112 - 0
107 + { IP6OPT_TUNNEL_LIMIT,
108 + 1,
109 + IPTUN_DEFAULT_ENCAPLIMIT }, /* filled in with actual value later */
110 + { IP6OPT_PADN,
111 + 1,
112 + 0 }
113 113 };
114 114
115 115 /*
116 116 * Table containing per-iptun-type information.
117 117 * Since IPv6 can run over all of these we have the IPv6 min as the min MTU.
118 118 */
119 119 static iptun_typeinfo_t iptun_type_table[] = {
120 120 { IPTUN_TYPE_IPV4, MAC_PLUGIN_IDENT_IPV4, IPV4_VERSION,
121 121 IPTUN_MIN_IPV6_MTU, IPTUN_MAX_IPV4_MTU, B_TRUE },
122 122 { IPTUN_TYPE_IPV6, MAC_PLUGIN_IDENT_IPV6, IPV6_VERSION,
123 123 IPTUN_MIN_IPV6_MTU, IPTUN_MAX_IPV6_MTU, B_TRUE },
124 124 { IPTUN_TYPE_6TO4, MAC_PLUGIN_IDENT_6TO4, IPV4_VERSION,
125 125 IPTUN_MIN_IPV6_MTU, IPTUN_MAX_IPV4_MTU, B_FALSE },
126 126 { IPTUN_TYPE_UNKNOWN, NULL, 0, 0, 0, B_FALSE }
127 127 };
128 128
129 129 /*
130 130 * iptun_hash is an iptun_t lookup table by link ID protected by
131 131 * iptun_hash_lock. While the hash table's integrity is maintained via
132 132 * internal locking in the mod_hash_*() functions, we need additional locking
133 133 * so that an iptun_t cannot be deleted after a hash lookup has returned an
134 134 * iptun_t and before iptun_lock has been entered. As such, we use
135 135 * iptun_hash_lock when doing lookups and removals from iptun_hash.
136 136 */
137 137 mod_hash_t *iptun_hash;
138 138 static kmutex_t iptun_hash_lock;
139 139
140 140 static uint_t iptun_tunnelcount; /* total for all stacks */
141 141 kmem_cache_t *iptun_cache;
142 142 ddi_taskq_t *iptun_taskq;
143 143
144 144 typedef enum {
145 145 IPTUN_TASK_MTU_UPDATE, /* tell mac about new tunnel link MTU */
146 146 IPTUN_TASK_LADDR_UPDATE, /* tell mac about new local address */
147 147 IPTUN_TASK_RADDR_UPDATE, /* tell mac about new remote address */
148 148 IPTUN_TASK_LINK_UPDATE, /* tell mac about new link state */
149 149 IPTUN_TASK_PDATA_UPDATE /* tell mac about updated plugin data */
150 150 } iptun_task_t;
151 151
152 152 typedef struct iptun_task_data_s {
153 153 iptun_task_t itd_task;
154 154 datalink_id_t itd_linkid;
155 155 } iptun_task_data_t;
156 156
157 157 static void iptun_task_dispatch(iptun_t *, iptun_task_t);
158 158 static int iptun_enter(iptun_t *);
159 159 static void iptun_exit(iptun_t *);
160 160 static void iptun_headergen(iptun_t *, boolean_t);
161 161 static void iptun_drop_pkt(mblk_t *, uint64_t *);
162 162 static void iptun_input(void *, mblk_t *, void *, ip_recv_attr_t *);
163 163 static void iptun_input_icmp(void *, mblk_t *, void *, ip_recv_attr_t *);
164 164 static void iptun_output(iptun_t *, mblk_t *);
165 165 static uint32_t iptun_get_maxmtu(iptun_t *, ip_xmit_attr_t *, uint32_t);
166 166 static uint32_t iptun_update_mtu(iptun_t *, ip_xmit_attr_t *, uint32_t);
167 167 static uint32_t iptun_get_dst_pmtu(iptun_t *, ip_xmit_attr_t *);
168 168 static void iptun_update_dst_pmtu(iptun_t *, ip_xmit_attr_t *);
169 169 static int iptun_setladdr(iptun_t *, const struct sockaddr_storage *);
170 170
171 171 static void iptun_output_6to4(iptun_t *, mblk_t *);
172 172 static void iptun_output_common(iptun_t *, ip_xmit_attr_t *, mblk_t *);
173 173 static boolean_t iptun_verifyicmp(conn_t *, void *, icmph_t *, icmp6_t *,
174 174 ip_recv_attr_t *);
175 175
176 176 static void iptun_notify(void *, ip_xmit_attr_t *, ixa_notify_type_t,
177 177 ixa_notify_arg_t);
178 178
179 179 static mac_callbacks_t iptun_m_callbacks;
180 180
181 181 static int
182 182 iptun_m_getstat(void *arg, uint_t stat, uint64_t *val)
183 183 {
184 184 iptun_t *iptun = arg;
185 185 int err = 0;
186 186
187 187 switch (stat) {
188 188 case MAC_STAT_IERRORS:
189 189 *val = iptun->iptun_ierrors;
190 190 break;
191 191 case MAC_STAT_OERRORS:
192 192 *val = iptun->iptun_oerrors;
193 193 break;
194 194 case MAC_STAT_RBYTES:
195 195 *val = iptun->iptun_rbytes;
196 196 break;
197 197 case MAC_STAT_IPACKETS:
198 198 *val = iptun->iptun_ipackets;
199 199 break;
200 200 case MAC_STAT_OBYTES:
201 201 *val = iptun->iptun_obytes;
202 202 break;
203 203 case MAC_STAT_OPACKETS:
204 204 *val = iptun->iptun_opackets;
205 205 break;
206 206 case MAC_STAT_NORCVBUF:
207 207 *val = iptun->iptun_norcvbuf;
208 208 break;
209 209 case MAC_STAT_NOXMTBUF:
210 210 *val = iptun->iptun_noxmtbuf;
211 211 break;
212 212 default:
213 213 err = ENOTSUP;
214 214 }
215 215
216 216 return (err);
217 217 }
218 218
219 219 static int
220 220 iptun_m_start(void *arg)
221 221 {
222 222 iptun_t *iptun = arg;
223 223 int err;
224 224
225 225 if ((err = iptun_enter(iptun)) == 0) {
226 226 iptun->iptun_flags |= IPTUN_MAC_STARTED;
227 227 iptun_task_dispatch(iptun, IPTUN_TASK_LINK_UPDATE);
228 228 iptun_exit(iptun);
229 229 }
230 230 return (err);
231 231 }
232 232
233 233 static void
234 234 iptun_m_stop(void *arg)
235 235 {
236 236 iptun_t *iptun = arg;
237 237
238 238 if (iptun_enter(iptun) == 0) {
239 239 iptun->iptun_flags &= ~IPTUN_MAC_STARTED;
240 240 iptun_task_dispatch(iptun, IPTUN_TASK_LINK_UPDATE);
241 241 iptun_exit(iptun);
242 242 }
243 243 }
244 244
245 245 /*
246 246 * iptun_m_setpromisc() does nothing and always succeeds. This is because a
247 247 * tunnel data-link only ever receives packets that are destined exclusively
248 248 * for the local address of the tunnel.
249 249 */
250 250 /* ARGSUSED */
251 251 static int
252 252 iptun_m_setpromisc(void *arg, boolean_t on)
253 253 {
254 254 return (0);
255 255 }
256 256
257 257 /* ARGSUSED */
258 258 static int
259 259 iptun_m_multicst(void *arg, boolean_t add, const uint8_t *addrp)
260 260 {
261 261 return (ENOTSUP);
262 262 }
263 263
264 264 /*
265 265 * iptun_m_unicst() sets the local address.
266 266 */
267 267 /* ARGSUSED */
268 268 static int
269 269 iptun_m_unicst(void *arg, const uint8_t *addrp)
270 270 {
271 271 iptun_t *iptun = arg;
272 272 int err;
273 273 struct sockaddr_storage ss;
274 274 struct sockaddr_in *sin;
275 275 struct sockaddr_in6 *sin6;
276 276
277 277 if ((err = iptun_enter(iptun)) == 0) {
278 278 switch (iptun->iptun_typeinfo->iti_ipvers) {
279 279 case IPV4_VERSION:
280 280 sin = (struct sockaddr_in *)&ss;
281 281 sin->sin_family = AF_INET;
282 282 bcopy(addrp, &sin->sin_addr, sizeof (in_addr_t));
283 283 break;
284 284 case IPV6_VERSION:
285 285 sin6 = (struct sockaddr_in6 *)&ss;
286 286 sin6->sin6_family = AF_INET6;
287 287 bcopy(addrp, &sin6->sin6_addr, sizeof (in6_addr_t));
288 288 break;
289 289 default:
290 290 ASSERT(0);
291 291 }
292 292 err = iptun_setladdr(iptun, &ss);
293 293 iptun_exit(iptun);
294 294 }
295 295 return (err);
296 296 }
297 297
298 298 static mblk_t *
299 299 iptun_m_tx(void *arg, mblk_t *mpchain)
300 300 {
301 301 mblk_t *mp, *nmp;
302 302 iptun_t *iptun = arg;
303 303
304 304 if (!IS_IPTUN_RUNNING(iptun)) {
305 305 iptun_drop_pkt(mpchain, &iptun->iptun_noxmtbuf);
306 306 return (NULL);
307 307 }
308 308
309 309 for (mp = mpchain; mp != NULL; mp = nmp) {
310 310 nmp = mp->b_next;
311 311 mp->b_next = NULL;
312 312 iptun_output(iptun, mp);
313 313 }
314 314
315 315 return (NULL);
316 316 }
317 317
318 318 /* ARGSUSED */
319 319 static int
320 320 iptun_m_setprop(void *barg, const char *pr_name, mac_prop_id_t pr_num,
321 321 uint_t pr_valsize, const void *pr_val)
322 322 {
323 323 iptun_t *iptun = barg;
324 324 uint32_t value = *(uint32_t *)pr_val;
325 325 int err;
326 326
327 327 /*
328 328 * We need to enter this iptun_t since we'll be modifying the outer
329 329 * header.
330 330 */
331 331 if ((err = iptun_enter(iptun)) != 0)
332 332 return (err);
333 333
334 334 switch (pr_num) {
335 335 case MAC_PROP_IPTUN_HOPLIMIT:
336 336 if (value < IPTUN_MIN_HOPLIMIT || value > IPTUN_MAX_HOPLIMIT) {
337 337 err = EINVAL;
338 338 break;
339 339 }
340 340 if (value != iptun->iptun_hoplimit) {
341 341 iptun->iptun_hoplimit = (uint8_t)value;
342 342 iptun_headergen(iptun, B_TRUE);
343 343 }
344 344 break;
345 345 case MAC_PROP_IPTUN_ENCAPLIMIT:
346 346 if (iptun->iptun_typeinfo->iti_type != IPTUN_TYPE_IPV6 ||
347 347 value > IPTUN_MAX_ENCAPLIMIT) {
348 348 err = EINVAL;
349 349 break;
350 350 }
351 351 if (value != iptun->iptun_encaplimit) {
352 352 iptun->iptun_encaplimit = (uint8_t)value;
353 353 iptun_headergen(iptun, B_TRUE);
354 354 }
355 355 break;
356 356 case MAC_PROP_MTU: {
357 357 uint32_t maxmtu = iptun_get_maxmtu(iptun, NULL, 0);
358 358
359 359 if (value < iptun->iptun_typeinfo->iti_minmtu ||
360 360 value > maxmtu) {
361 361 err = EINVAL;
362 362 break;
363 363 }
364 364 iptun->iptun_flags |= IPTUN_FIXED_MTU;
365 365 if (value != iptun->iptun_mtu) {
366 366 iptun->iptun_mtu = value;
367 367 iptun_task_dispatch(iptun, IPTUN_TASK_MTU_UPDATE);
368 368 }
369 369 break;
370 370 }
371 371 default:
372 372 err = EINVAL;
373 373 }
374 374 iptun_exit(iptun);
375 375 return (err);
376 376 }
377 377
378 378 /* ARGSUSED */
379 379 static int
380 380 iptun_m_getprop(void *barg, const char *pr_name, mac_prop_id_t pr_num,
381 381 uint_t pr_valsize, void *pr_val)
382 382 {
383 383 iptun_t *iptun = barg;
384 384 int err;
385 385
386 386 if ((err = iptun_enter(iptun)) != 0)
387 387 return (err);
388 388
389 389 switch (pr_num) {
390 390 case MAC_PROP_IPTUN_HOPLIMIT:
391 391 ASSERT(pr_valsize >= sizeof (uint32_t));
392 392 *(uint32_t *)pr_val = iptun->iptun_hoplimit;
393 393 break;
394 394
395 395 case MAC_PROP_IPTUN_ENCAPLIMIT:
396 396 *(uint32_t *)pr_val = iptun->iptun_encaplimit;
397 397 break;
398 398 default:
399 399 err = ENOTSUP;
400 400 }
401 401 done:
402 402 iptun_exit(iptun);
403 403 return (err);
404 404 }
405 405
406 406 /* ARGSUSED */
407 407 static void
408 408 iptun_m_propinfo(void *barg, const char *pr_name, mac_prop_id_t pr_num,
409 409 mac_prop_info_handle_t prh)
410 410 {
411 411 iptun_t *iptun = barg;
412 412
413 413 switch (pr_num) {
414 414 case MAC_PROP_IPTUN_HOPLIMIT:
415 415 mac_prop_info_set_range_uint32(prh,
416 416 IPTUN_MIN_HOPLIMIT, IPTUN_MAX_HOPLIMIT);
417 417 mac_prop_info_set_default_uint32(prh, IPTUN_DEFAULT_HOPLIMIT);
418 418 break;
419 419
420 420 case MAC_PROP_IPTUN_ENCAPLIMIT:
421 421 if (iptun->iptun_typeinfo->iti_type != IPTUN_TYPE_IPV6)
422 422 break;
423 423 mac_prop_info_set_range_uint32(prh,
424 424 IPTUN_MIN_ENCAPLIMIT, IPTUN_MAX_ENCAPLIMIT);
425 425 mac_prop_info_set_default_uint32(prh, IPTUN_DEFAULT_ENCAPLIMIT);
426 426 break;
427 427 case MAC_PROP_MTU:
428 428 mac_prop_info_set_range_uint32(prh,
429 429 iptun->iptun_typeinfo->iti_minmtu,
430 430 iptun_get_maxmtu(iptun, NULL, 0));
431 431 break;
432 432 }
433 433 }
434 434
435 435 uint_t
436 436 iptun_count(void)
437 437 {
438 438 return (iptun_tunnelcount);
439 439 }
440 440
441 441 /*
442 442 * Enter an iptun_t exclusively. This is essentially just a mutex, but we
443 443 * don't allow iptun_enter() to succeed on a tunnel if it's in the process of
444 444 * being deleted.
445 445 */
446 446 static int
447 447 iptun_enter(iptun_t *iptun)
448 448 {
449 449 mutex_enter(&iptun->iptun_lock);
450 450 while (iptun->iptun_flags & IPTUN_DELETE_PENDING)
451 451 cv_wait(&iptun->iptun_enter_cv, &iptun->iptun_lock);
452 452 if (iptun->iptun_flags & IPTUN_CONDEMNED) {
453 453 mutex_exit(&iptun->iptun_lock);
454 454 return (ENOENT);
455 455 }
456 456 return (0);
457 457 }
458 458
459 459 /*
460 460 * Exit the tunnel entered in iptun_enter().
461 461 */
462 462 static void
463 463 iptun_exit(iptun_t *iptun)
464 464 {
465 465 mutex_exit(&iptun->iptun_lock);
466 466 }
467 467
468 468 /*
469 469 * Enter the IP tunnel instance by datalink ID.
470 470 */
471 471 static int
472 472 iptun_enter_by_linkid(datalink_id_t linkid, iptun_t **iptun)
473 473 {
474 474 int err;
475 475
476 476 mutex_enter(&iptun_hash_lock);
477 477 if (mod_hash_find(iptun_hash, IPTUN_HASH_KEY(linkid),
478 478 (mod_hash_val_t *)iptun) == 0)
479 479 err = iptun_enter(*iptun);
480 480 else
481 481 err = ENOENT;
482 482 if (err != 0)
483 483 *iptun = NULL;
484 484 mutex_exit(&iptun_hash_lock);
485 485 return (err);
486 486 }
487 487
488 488 /*
489 489 * Handle tasks that were deferred through the iptun_taskq because they require
490 490 * calling up to the mac module, and we can't call up to the mac module while
491 491 * holding locks.
492 492 *
493 493 * This is tricky to get right without introducing race conditions and
494 494 * deadlocks with the mac module, as we cannot issue an upcall while in the
495 495 * iptun_t. The reason is that upcalls may try and enter the mac perimeter,
496 496 * while iptun callbacks (such as iptun_m_setprop()) called from the mac
497 497 * module will already have the perimeter held, and will then try and enter
498 498 * the iptun_t. You can see the lock ordering problem with this; this will
499 499 * deadlock.
500 500 *
501 501 * The safe way to do this is to enter the iptun_t in question and copy the
502 502 * information we need out of it so that we can exit it and know that the
503 503 * information being passed up to the upcalls won't be subject to modification
504 504 * by other threads. The problem now is that we need to exit it prior to
505 505 * issuing the upcall, but once we do this, a thread could come along and
506 506 * delete the iptun_t and thus the mac handle required to issue the upcall.
507 507 * To prevent this, we set the IPTUN_UPCALL_PENDING flag prior to exiting the
508 508 * iptun_t. This flag is the condition associated with iptun_upcall_cv, which
509 509 * iptun_delete() will cv_wait() on. When the upcall completes, we clear
510 510 * IPTUN_UPCALL_PENDING and cv_signal() any potentially waiting
511 511 * iptun_delete(). We can thus still safely use iptun->iptun_mh after having
512 512 * exited the iptun_t.
513 513 */
514 514 static void
515 515 iptun_task_cb(void *arg)
516 516 {
517 517 iptun_task_data_t *itd = arg;
518 518 iptun_task_t task = itd->itd_task;
519 519 datalink_id_t linkid = itd->itd_linkid;
520 520 iptun_t *iptun;
521 521 uint32_t mtu;
522 522 iptun_addr_t addr;
523 523 link_state_t linkstate;
524 524 size_t header_size;
525 525 iptun_header_t header;
526 526
527 527 kmem_free(itd, sizeof (*itd));
528 528
529 529 /*
530 530 * Note that if the lookup fails, it's because the tunnel was deleted
531 531 * between the time the task was dispatched and now. That isn't an
532 532 * error.
533 533 */
534 534 if (iptun_enter_by_linkid(linkid, &iptun) != 0)
535 535 return;
536 536
537 537 iptun->iptun_flags |= IPTUN_UPCALL_PENDING;
538 538
539 539 switch (task) {
540 540 case IPTUN_TASK_MTU_UPDATE:
541 541 mtu = iptun->iptun_mtu;
542 542 break;
543 543 case IPTUN_TASK_LADDR_UPDATE:
544 544 addr = iptun->iptun_laddr;
545 545 break;
546 546 case IPTUN_TASK_RADDR_UPDATE:
547 547 addr = iptun->iptun_raddr;
548 548 break;
549 549 case IPTUN_TASK_LINK_UPDATE:
550 550 linkstate = IS_IPTUN_RUNNING(iptun) ?
551 551 LINK_STATE_UP : LINK_STATE_DOWN;
552 552 break;
553 553 case IPTUN_TASK_PDATA_UPDATE:
554 554 header_size = iptun->iptun_header_size;
555 555 header = iptun->iptun_header;
556 556 break;
557 557 default:
558 558 ASSERT(0);
559 559 }
560 560
561 561 iptun_exit(iptun);
562 562
563 563 switch (task) {
564 564 case IPTUN_TASK_MTU_UPDATE:
565 565 (void) mac_maxsdu_update(iptun->iptun_mh, mtu);
566 566 break;
567 567 case IPTUN_TASK_LADDR_UPDATE:
568 568 mac_unicst_update(iptun->iptun_mh, (uint8_t *)&addr.ia_addr);
569 569 break;
570 570 case IPTUN_TASK_RADDR_UPDATE:
571 571 mac_dst_update(iptun->iptun_mh, (uint8_t *)&addr.ia_addr);
572 572 break;
573 573 case IPTUN_TASK_LINK_UPDATE:
574 574 mac_link_update(iptun->iptun_mh, linkstate);
575 575 break;
576 576 case IPTUN_TASK_PDATA_UPDATE:
577 577 if (mac_pdata_update(iptun->iptun_mh,
578 578 header_size == 0 ? NULL : &header, header_size) != 0)
579 579 atomic_inc_64(&iptun->iptun_taskq_fail);
580 580 break;
581 581 }
582 582
583 583 mutex_enter(&iptun->iptun_lock);
584 584 iptun->iptun_flags &= ~IPTUN_UPCALL_PENDING;
585 585 cv_signal(&iptun->iptun_upcall_cv);
586 586 mutex_exit(&iptun->iptun_lock);
587 587 }
588 588
589 589 static void
590 590 iptun_task_dispatch(iptun_t *iptun, iptun_task_t iptun_task)
591 591 {
592 592 iptun_task_data_t *itd;
593 593
594 594 itd = kmem_alloc(sizeof (*itd), KM_NOSLEEP);
595 595 if (itd == NULL) {
596 596 atomic_inc_64(&iptun->iptun_taskq_fail);
597 597 return;
598 598 }
599 599 itd->itd_task = iptun_task;
600 600 itd->itd_linkid = iptun->iptun_linkid;
601 601 if (ddi_taskq_dispatch(iptun_taskq, iptun_task_cb, itd, DDI_NOSLEEP)) {
602 602 atomic_inc_64(&iptun->iptun_taskq_fail);
603 603 kmem_free(itd, sizeof (*itd));
604 604 }
605 605 }
606 606
607 607 /*
608 608 * Convert an iptun_addr_t to sockaddr_storage.
609 609 */
610 610 static void
611 611 iptun_getaddr(iptun_addr_t *iptun_addr, struct sockaddr_storage *ss)
612 612 {
613 613 struct sockaddr_in *sin;
614 614 struct sockaddr_in6 *sin6;
615 615
616 616 bzero(ss, sizeof (*ss));
617 617 switch (iptun_addr->ia_family) {
618 618 case AF_INET:
619 619 sin = (struct sockaddr_in *)ss;
620 620 sin->sin_addr.s_addr = iptun_addr->ia_addr.iau_addr4;
621 621 break;
622 622 case AF_INET6:
623 623 sin6 = (struct sockaddr_in6 *)ss;
624 624 sin6->sin6_addr = iptun_addr->ia_addr.iau_addr6;
625 625 break;
626 626 default:
627 627 ASSERT(0);
628 628 }
629 629 ss->ss_family = iptun_addr->ia_family;
630 630 }
631 631
632 632 /*
633 633 * General purpose function to set an IP tunnel source or destination address.
634 634 */
635 635 static int
636 636 iptun_setaddr(iptun_type_t iptun_type, iptun_addr_t *iptun_addr,
637 637 const struct sockaddr_storage *ss)
638 638 {
639 639 if (!IPTUN_ADDR_MATCH(iptun_type, ss->ss_family))
640 640 return (EINVAL);
641 641
642 642 switch (ss->ss_family) {
643 643 case AF_INET: {
644 644 struct sockaddr_in *sin = (struct sockaddr_in *)ss;
645 645
646 646 if ((sin->sin_addr.s_addr == INADDR_ANY) ||
647 647 (sin->sin_addr.s_addr == INADDR_BROADCAST) ||
648 648 CLASSD(sin->sin_addr.s_addr)) {
649 649 return (EADDRNOTAVAIL);
650 650 }
651 651 iptun_addr->ia_addr.iau_addr4 = sin->sin_addr.s_addr;
652 652 break;
653 653 }
654 654 case AF_INET6: {
655 655 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)ss;
656 656
657 657 if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr) ||
658 658 IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr) ||
659 659 IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
660 660 return (EADDRNOTAVAIL);
661 661 }
662 662 iptun_addr->ia_addr.iau_addr6 = sin6->sin6_addr;
663 663 break;
664 664 }
665 665 default:
666 666 return (EAFNOSUPPORT);
667 667 }
668 668 iptun_addr->ia_family = ss->ss_family;
669 669 return (0);
670 670 }
671 671
672 672 static int
673 673 iptun_setladdr(iptun_t *iptun, const struct sockaddr_storage *laddr)
674 674 {
675 675 return (iptun_setaddr(iptun->iptun_typeinfo->iti_type,
676 676 &iptun->iptun_laddr, laddr));
677 677 }
678 678
679 679 static int
680 680 iptun_setraddr(iptun_t *iptun, const struct sockaddr_storage *raddr)
681 681 {
682 682 if (!(iptun->iptun_typeinfo->iti_hasraddr))
683 683 return (EINVAL);
684 684 return (iptun_setaddr(iptun->iptun_typeinfo->iti_type,
685 685 &iptun->iptun_raddr, raddr));
686 686 }
687 687
688 688 static boolean_t
689 689 iptun_canbind(iptun_t *iptun)
690 690 {
691 691 /*
692 692 * A tunnel may bind when its source address has been set, and if its
693 693 * tunnel type requires one, also its destination address.
694 694 */
695 695 return ((iptun->iptun_flags & IPTUN_LADDR) &&
696 696 ((iptun->iptun_flags & IPTUN_RADDR) ||
697 697 !(iptun->iptun_typeinfo->iti_hasraddr)));
698 698 }
699 699
700 700 /*
701 701 * Verify that the local address is valid, and insert in the fanout
702 702 */
703 703 static int
704 704 iptun_bind(iptun_t *iptun)
705 705 {
706 706 conn_t *connp = iptun->iptun_connp;
707 707 int error = 0;
708 708 ip_xmit_attr_t *ixa;
709 709 ip_xmit_attr_t *oldixa;
710 710 iulp_t uinfo;
711 711 ip_stack_t *ipst = connp->conn_netstack->netstack_ip;
712 712
713 713 /*
714 714 * Get an exclusive ixa for this thread.
715 715 * We defer updating conn_ixa until later to handle any concurrent
716 716 * conn_ixa_cleanup thread.
717 717 */
718 718 ixa = conn_get_ixa(connp, B_FALSE);
719 719 if (ixa == NULL)
720 720 return (ENOMEM);
721 721
722 722 /* We create PMTU state including for 6to4 */
723 723 ixa->ixa_flags |= IXAF_PMTU_DISCOVERY;
724 724
725 725 ASSERT(iptun_canbind(iptun));
726 726
727 727 mutex_enter(&connp->conn_lock);
728 728 /*
729 729 * Note that conn_proto can't be set since the upper protocol
730 730 * can be both 41 and 4 when IPv6 and IPv4 are over the same tunnel.
731 731 * ipcl_iptun_classify doesn't use conn_proto.
732 732 */
733 733 connp->conn_ipversion = iptun->iptun_typeinfo->iti_ipvers;
734 734
735 735 switch (iptun->iptun_typeinfo->iti_type) {
736 736 case IPTUN_TYPE_IPV4:
737 737 IN6_IPADDR_TO_V4MAPPED(iptun->iptun_laddr4,
738 738 &connp->conn_laddr_v6);
739 739 IN6_IPADDR_TO_V4MAPPED(iptun->iptun_raddr4,
740 740 &connp->conn_faddr_v6);
741 741 ixa->ixa_flags |= IXAF_IS_IPV4;
742 742 if (ip_laddr_verify_v4(iptun->iptun_laddr4, IPCL_ZONEID(connp),
743 743 ipst, B_FALSE) != IPVL_UNICAST_UP) {
744 744 mutex_exit(&connp->conn_lock);
745 745 error = EADDRNOTAVAIL;
746 746 goto done;
747 747 }
748 748 break;
749 749 case IPTUN_TYPE_IPV6:
750 750 connp->conn_laddr_v6 = iptun->iptun_laddr6;
751 751 connp->conn_faddr_v6 = iptun->iptun_raddr6;
752 752 ixa->ixa_flags &= ~IXAF_IS_IPV4;
753 753 /* We use a zero scopeid for now */
754 754 if (ip_laddr_verify_v6(&iptun->iptun_laddr6, IPCL_ZONEID(connp),
755 755 ipst, B_FALSE, 0) != IPVL_UNICAST_UP) {
756 756 mutex_exit(&connp->conn_lock);
757 757 error = EADDRNOTAVAIL;
758 758 goto done;
759 759 }
760 760 break;
761 761 case IPTUN_TYPE_6TO4:
762 762 IN6_IPADDR_TO_V4MAPPED(iptun->iptun_laddr4,
763 763 &connp->conn_laddr_v6);
764 764 IN6_IPADDR_TO_V4MAPPED(INADDR_ANY, &connp->conn_faddr_v6);
765 765 ixa->ixa_flags |= IXAF_IS_IPV4;
766 766 mutex_exit(&connp->conn_lock);
767 767
768 768 switch (ip_laddr_verify_v4(iptun->iptun_laddr4,
769 769 IPCL_ZONEID(connp), ipst, B_FALSE)) {
770 770 case IPVL_UNICAST_UP:
771 771 case IPVL_UNICAST_DOWN:
772 772 break;
773 773 default:
774 774 error = EADDRNOTAVAIL;
775 775 goto done;
776 776 }
777 777 goto insert;
778 778 }
779 779
780 780 /* In case previous destination was multirt */
781 781 ip_attr_newdst(ixa);
782 782
783 783 /*
784 784 * When we set a tunnel's destination address, we do not
785 785 * care if the destination is reachable. Transient routing
786 786 * issues should not inhibit the creation of a tunnel
787 787 * interface, for example. Thus we pass B_FALSE here.
788 788 */
789 789 connp->conn_saddr_v6 = connp->conn_laddr_v6;
790 790 mutex_exit(&connp->conn_lock);
791 791
792 792 /* As long as the MTU is large we avoid fragmentation */
793 793 ixa->ixa_flags |= IXAF_DONTFRAG | IXAF_PMTU_IPV4_DF;
794 794
795 795 /* We handle IPsec in iptun_output_common */
796 796 error = ip_attr_connect(connp, ixa, &connp->conn_saddr_v6,
797 797 &connp->conn_faddr_v6, &connp->conn_faddr_v6, 0,
798 798 &connp->conn_saddr_v6, &uinfo, 0);
799 799
800 800 if (error != 0)
801 801 goto done;
802 802
803 803 /* saddr shouldn't change since it was already set */
804 804 ASSERT(IN6_ARE_ADDR_EQUAL(&connp->conn_laddr_v6,
805 805 &connp->conn_saddr_v6));
806 806
807 807 /* We set IXAF_VERIFY_PMTU to catch PMTU increases */
808 808 ixa->ixa_flags |= IXAF_VERIFY_PMTU;
809 809 ASSERT(uinfo.iulp_mtu != 0);
810 810
811 811 /*
812 812 * Allow setting new policies.
813 813 * The addresses/ports are already set, thus the IPsec policy calls
814 814 * can handle their passed-in conn's.
815 815 */
816 816 connp->conn_policy_cached = B_FALSE;
817 817
818 818 insert:
819 819 error = ipcl_conn_insert(connp);
820 820 if (error != 0)
821 821 goto done;
822 822
823 823 /* Atomically update v6lastdst and conn_ixa */
824 824 mutex_enter(&connp->conn_lock);
825 825 /* Record this as the "last" send even though we haven't sent any */
826 826 connp->conn_v6lastdst = connp->conn_faddr_v6;
827 827
828 828 iptun->iptun_flags |= IPTUN_BOUND;
829 829
830 830 oldixa = conn_replace_ixa(connp, ixa);
831 831 /* Done with conn_t */
832 832 mutex_exit(&connp->conn_lock);
833 833 ixa_refrele(oldixa);
834 834
835 835 /*
836 836 * Now that we're bound with ip below us, this is a good
837 837 * time to initialize the destination path MTU and to
838 838 * re-calculate the tunnel's link MTU.
839 839 */
840 840 (void) iptun_update_mtu(iptun, ixa, 0);
841 841
842 842 if (IS_IPTUN_RUNNING(iptun))
843 843 iptun_task_dispatch(iptun, IPTUN_TASK_LINK_UPDATE);
844 844
845 845 done:
846 846 ixa_refrele(ixa);
847 847 return (error);
848 848 }
849 849
850 850 static void
851 851 iptun_unbind(iptun_t *iptun)
852 852 {
853 853 ASSERT(iptun->iptun_flags & IPTUN_BOUND);
854 854 ASSERT(mutex_owned(&iptun->iptun_lock) ||
855 855 (iptun->iptun_flags & IPTUN_CONDEMNED));
856 856 ip_unbind(iptun->iptun_connp);
857 857 iptun->iptun_flags &= ~IPTUN_BOUND;
858 858 if (!(iptun->iptun_flags & IPTUN_CONDEMNED))
859 859 iptun_task_dispatch(iptun, IPTUN_TASK_LINK_UPDATE);
860 860 }
861 861
862 862 /*
863 863 * Re-generate the template data-link header for a given IP tunnel given the
864 864 * tunnel's current parameters.
865 865 */
866 866 static void
867 867 iptun_headergen(iptun_t *iptun, boolean_t update_mac)
868 868 {
869 869 switch (iptun->iptun_typeinfo->iti_ipvers) {
870 870 case IPV4_VERSION:
871 871 /*
872 872 * We only need to use a custom IP header if the administrator
873 873 * has supplied a non-default hoplimit.
874 874 */
875 875 if (iptun->iptun_hoplimit == IPTUN_DEFAULT_HOPLIMIT) {
876 876 iptun->iptun_header_size = 0;
877 877 break;
878 878 }
879 879 iptun->iptun_header_size = sizeof (ipha_t);
880 880 iptun->iptun_header4.ipha_version_and_hdr_length =
881 881 IP_SIMPLE_HDR_VERSION;
882 882 iptun->iptun_header4.ipha_fragment_offset_and_flags =
883 883 htons(IPH_DF);
884 884 iptun->iptun_header4.ipha_ttl = iptun->iptun_hoplimit;
885 885 break;
886 886 case IPV6_VERSION: {
887 887 ip6_t *ip6hp = &iptun->iptun_header6.it6h_ip6h;
888 888
889 889 /*
890 890 * We only need to use a custom IPv6 header if either the
891 891 * administrator has supplied a non-default hoplimit, or we
892 892 * need to include an encapsulation limit option in the outer
893 893 * header.
894 894 */
895 895 if (iptun->iptun_hoplimit == IPTUN_DEFAULT_HOPLIMIT &&
896 896 iptun->iptun_encaplimit == 0) {
897 897 iptun->iptun_header_size = 0;
898 898 break;
899 899 }
900 900
901 901 (void) memset(ip6hp, 0, sizeof (*ip6hp));
902 902 if (iptun->iptun_encaplimit == 0) {
903 903 iptun->iptun_header_size = sizeof (ip6_t);
904 904 ip6hp->ip6_nxt = IPPROTO_NONE;
905 905 } else {
906 906 iptun_encaplim_t *iel;
907 907
908 908 iptun->iptun_header_size = sizeof (iptun_ipv6hdrs_t);
909 909 /*
910 910 * The mac_ipv6 plugin requires ip6_plen to be in host
911 911 * byte order and reflect the extension headers
912 912 * present in the template. The actual network byte
913 913 * order ip6_plen will be set on a per-packet basis on
914 914 * transmit.
915 915 */
916 916 ip6hp->ip6_plen = sizeof (*iel);
917 917 ip6hp->ip6_nxt = IPPROTO_DSTOPTS;
918 918 iel = &iptun->iptun_header6.it6h_encaplim;
919 919 *iel = iptun_encaplim_init;
920 920 iel->iel_telopt.ip6ot_encap_limit =
921 921 iptun->iptun_encaplimit;
922 922 }
923 923
924 924 ip6hp->ip6_hlim = iptun->iptun_hoplimit;
925 925 break;
926 926 }
927 927 }
928 928
929 929 if (update_mac)
930 930 iptun_task_dispatch(iptun, IPTUN_TASK_PDATA_UPDATE);
931 931 }
932 932
933 933 /*
934 934 * Insert inbound and outbound IPv4 and IPv6 policy into the given policy
935 935 * head.
936 936 */
937 937 static boolean_t
938 938 iptun_insert_simple_policies(ipsec_policy_head_t *ph, ipsec_act_t *actp,
939 939 uint_t n, netstack_t *ns)
940 940 {
941 941 int f = IPSEC_AF_V4;
942 942
943 943 if (!ipsec_polhead_insert(ph, actp, n, f, IPSEC_TYPE_INBOUND, ns) ||
944 944 !ipsec_polhead_insert(ph, actp, n, f, IPSEC_TYPE_OUTBOUND, ns))
945 945 return (B_FALSE);
946 946
947 947 f = IPSEC_AF_V6;
948 948 return (ipsec_polhead_insert(ph, actp, n, f, IPSEC_TYPE_INBOUND, ns) &&
949 949 ipsec_polhead_insert(ph, actp, n, f, IPSEC_TYPE_OUTBOUND, ns));
950 950 }
951 951
952 952 /*
953 953 * Used to set IPsec policy when policy is set through the IPTUN_CREATE or
954 954 * IPTUN_MODIFY ioctls.
955 955 */
956 956 static int
957 957 iptun_set_sec_simple(iptun_t *iptun, const ipsec_req_t *ipsr)
958 958 {
959 959 int rc = 0;
960 960 uint_t nact;
961 961 ipsec_act_t *actp = NULL;
962 962 boolean_t clear_all, old_policy = B_FALSE;
963 963 ipsec_tun_pol_t *itp;
964 964 char name[MAXLINKNAMELEN];
965 965 uint64_t gen;
966 966 netstack_t *ns = iptun->iptun_ns;
967 967
968 968 /* Can't specify self-encap on a tunnel. */
969 969 if (ipsr->ipsr_self_encap_req != 0)
970 970 return (EINVAL);
971 971
972 972 /*
973 973 * If it's a "clear-all" entry, unset the security flags and resume
974 974 * normal cleartext (or inherit-from-global) policy.
975 975 */
976 976 clear_all = ((ipsr->ipsr_ah_req & IPTUN_IPSEC_REQ_MASK) == 0 &&
977 977 (ipsr->ipsr_esp_req & IPTUN_IPSEC_REQ_MASK) == 0);
978 978
979 979 ASSERT(mutex_owned(&iptun->iptun_lock));
980 980 itp = iptun->iptun_itp;
981 981 if (itp == NULL) {
982 982 if (clear_all)
983 983 goto bail;
984 984 if ((rc = dls_mgmt_get_linkinfo(iptun->iptun_linkid, name, NULL,
985 985 NULL, NULL)) != 0)
986 986 goto bail;
987 987 ASSERT(name[0] != '\0');
988 988 if ((itp = create_tunnel_policy(name, &rc, &gen, ns)) == NULL)
989 989 goto bail;
990 990 iptun->iptun_itp = itp;
991 991 }
992 992
993 993 /* Allocate the actvec now, before holding itp or polhead locks. */
994 994 ipsec_actvec_from_req(ipsr, &actp, &nact, ns);
995 995 if (actp == NULL) {
996 996 rc = ENOMEM;
997 997 goto bail;
998 998 }
999 999
1000 1000 /*
1001 1001 * Just write on the active polhead. Save the primary/secondary stuff
1002 1002 * for spdsock operations.
1003 1003 *
1004 1004 * Mutex because we need to write to the polhead AND flags atomically.
1005 1005 * Other threads will acquire the polhead lock as a reader if the
1006 1006 * (unprotected) flag is set.
1007 1007 */
1008 1008 mutex_enter(&itp->itp_lock);
1009 1009 if (itp->itp_flags & ITPF_P_TUNNEL) {
1010 1010 /* Oops, we lost a race. Let's get out of here. */
1011 1011 rc = EBUSY;
1012 1012 goto mutex_bail;
1013 1013 }
1014 1014 old_policy = ((itp->itp_flags & ITPF_P_ACTIVE) != 0);
1015 1015
1016 1016 if (old_policy) {
1017 1017 ITPF_CLONE(itp->itp_flags);
1018 1018 rc = ipsec_copy_polhead(itp->itp_policy, itp->itp_inactive, ns);
1019 1019 if (rc != 0) {
1020 1020 /* inactive has already been cleared. */
1021 1021 itp->itp_flags &= ~ITPF_IFLAGS;
1022 1022 goto mutex_bail;
1023 1023 }
1024 1024 rw_enter(&itp->itp_policy->iph_lock, RW_WRITER);
1025 1025 ipsec_polhead_flush(itp->itp_policy, ns);
1026 1026 } else {
1027 1027 /* Else assume itp->itp_policy is already flushed. */
1028 1028 rw_enter(&itp->itp_policy->iph_lock, RW_WRITER);
1029 1029 }
1030 1030
1031 1031 if (clear_all) {
1032 1032 ASSERT(avl_numnodes(&itp->itp_policy->iph_rulebyid) == 0);
1033 1033 itp->itp_flags &= ~ITPF_PFLAGS;
1034 1034 rw_exit(&itp->itp_policy->iph_lock);
1035 1035 old_policy = B_FALSE; /* Clear out the inactive one too. */
1036 1036 goto recover_bail;
1037 1037 }
1038 1038
1039 1039 if (iptun_insert_simple_policies(itp->itp_policy, actp, nact, ns)) {
1040 1040 rw_exit(&itp->itp_policy->iph_lock);
1041 1041 /*
1042 1042 * Adjust MTU and make sure the DL side knows what's up.
1043 1043 */
1044 1044 itp->itp_flags = ITPF_P_ACTIVE;
1045 1045 (void) iptun_update_mtu(iptun, NULL, 0);
1046 1046 old_policy = B_FALSE; /* Blank out inactive - we succeeded */
1047 1047 } else {
1048 1048 rw_exit(&itp->itp_policy->iph_lock);
1049 1049 rc = ENOMEM;
1050 1050 }
1051 1051
1052 1052 recover_bail:
1053 1053 if (old_policy) {
1054 1054 /* Recover policy in in active polhead. */
1055 1055 ipsec_swap_policy(itp->itp_policy, itp->itp_inactive, ns);
1056 1056 ITPF_SWAP(itp->itp_flags);
1057 1057 }
1058 1058
1059 1059 /* Clear policy in inactive polhead. */
1060 1060 itp->itp_flags &= ~ITPF_IFLAGS;
1061 1061 rw_enter(&itp->itp_inactive->iph_lock, RW_WRITER);
1062 1062 ipsec_polhead_flush(itp->itp_inactive, ns);
1063 1063 rw_exit(&itp->itp_inactive->iph_lock);
1064 1064
1065 1065 mutex_bail:
1066 1066 mutex_exit(&itp->itp_lock);
1067 1067
1068 1068 bail:
1069 1069 if (actp != NULL)
1070 1070 ipsec_actvec_free(actp, nact);
1071 1071
1072 1072 return (rc);
1073 1073 }
1074 1074
1075 1075 static iptun_typeinfo_t *
1076 1076 iptun_gettypeinfo(iptun_type_t type)
1077 1077 {
1078 1078 int i;
1079 1079
1080 1080 for (i = 0; iptun_type_table[i].iti_type != IPTUN_TYPE_UNKNOWN; i++) {
1081 1081 if (iptun_type_table[i].iti_type == type)
1082 1082 break;
1083 1083 }
1084 1084 return (&iptun_type_table[i]);
1085 1085 }
1086 1086
1087 1087 /*
1088 1088 * Set the parameters included in ik on the tunnel iptun. Parameters that can
1089 1089 * only be set at creation time are set in iptun_create().
1090 1090 */
1091 1091 static int
1092 1092 iptun_setparams(iptun_t *iptun, const iptun_kparams_t *ik)
1093 1093 {
1094 1094 int err = 0;
1095 1095 netstack_t *ns = iptun->iptun_ns;
1096 1096 iptun_addr_t orig_laddr, orig_raddr;
1097 1097 uint_t orig_flags = iptun->iptun_flags;
1098 1098
1099 1099 if (ik->iptun_kparam_flags & IPTUN_KPARAM_LADDR) {
1100 1100 if (orig_flags & IPTUN_LADDR)
1101 1101 orig_laddr = iptun->iptun_laddr;
1102 1102 if ((err = iptun_setladdr(iptun, &ik->iptun_kparam_laddr)) != 0)
1103 1103 return (err);
1104 1104 iptun->iptun_flags |= IPTUN_LADDR;
1105 1105 }
1106 1106
1107 1107 if (ik->iptun_kparam_flags & IPTUN_KPARAM_RADDR) {
1108 1108 if (orig_flags & IPTUN_RADDR)
1109 1109 orig_raddr = iptun->iptun_raddr;
1110 1110 if ((err = iptun_setraddr(iptun, &ik->iptun_kparam_raddr)) != 0)
1111 1111 goto done;
1112 1112 iptun->iptun_flags |= IPTUN_RADDR;
1113 1113 }
1114 1114
1115 1115 if (ik->iptun_kparam_flags & IPTUN_KPARAM_SECINFO) {
1116 1116 /*
1117 1117 * Set IPsec policy originating from the ifconfig(1M) command
1118 1118 * line. This is traditionally called "simple" policy because
1119 1119 * the ipsec_req_t (iptun_kparam_secinfo) can only describe a
1120 1120 * simple policy of "do ESP on everything" and/or "do AH on
1121 1121 * everything" (as opposed to the rich policy that can be
1122 1122 * defined with ipsecconf(1M)).
1123 1123 */
1124 1124 if (iptun->iptun_typeinfo->iti_type == IPTUN_TYPE_6TO4) {
1125 1125 /*
1126 1126 * Can't set security properties for automatic
1127 1127 * tunnels.
1128 1128 */
1129 1129 err = EINVAL;
1130 1130 goto done;
1131 1131 }
1132 1132
1133 1133 if (!ipsec_loaded(ns->netstack_ipsec)) {
1134 1134 /* If IPsec can be loaded, try and load it now. */
1135 1135 if (ipsec_failed(ns->netstack_ipsec)) {
1136 1136 err = EPROTONOSUPPORT;
1137 1137 goto done;
1138 1138 }
1139 1139 ipsec_loader_loadnow(ns->netstack_ipsec);
1140 1140 /*
1141 1141 * ipsec_loader_loadnow() returns while IPsec is
1142 1142 * loaded asynchronously. While a method exists to
1143 1143 * wait for IPsec to load (ipsec_loader_wait()), it
1144 1144 * requires use of a STREAMS queue to do a qwait().
1145 1145 * We're not in STREAMS context here, and so we can't
1146 1146 * use it. This is not a problem in practice because
1147 1147 * in the vast majority of cases, key management and
1148 1148 * global policy will have loaded before any tunnels
1149 1149 * are plumbed, and so IPsec will already have been
1150 1150 * loaded.
1151 1151 */
1152 1152 err = EAGAIN;
1153 1153 goto done;
1154 1154 }
1155 1155
1156 1156 err = iptun_set_sec_simple(iptun, &ik->iptun_kparam_secinfo);
1157 1157 if (err == 0) {
1158 1158 iptun->iptun_flags |= IPTUN_SIMPLE_POLICY;
1159 1159 iptun->iptun_simple_policy = ik->iptun_kparam_secinfo;
1160 1160 }
1161 1161 }
1162 1162 done:
1163 1163 if (err != 0) {
1164 1164 /* Restore original source and destination. */
1165 1165 if (ik->iptun_kparam_flags & IPTUN_KPARAM_LADDR &&
1166 1166 (orig_flags & IPTUN_LADDR))
1167 1167 iptun->iptun_laddr = orig_laddr;
1168 1168 if ((ik->iptun_kparam_flags & IPTUN_KPARAM_RADDR) &&
1169 1169 (orig_flags & IPTUN_RADDR))
1170 1170 iptun->iptun_raddr = orig_raddr;
1171 1171 iptun->iptun_flags = orig_flags;
1172 1172 }
1173 1173 return (err);
1174 1174 }
1175 1175
1176 1176 static int
1177 1177 iptun_register(iptun_t *iptun)
1178 1178 {
1179 1179 mac_register_t *mac;
1180 1180 int err;
1181 1181
1182 1182 ASSERT(!(iptun->iptun_flags & IPTUN_MAC_REGISTERED));
1183 1183
1184 1184 if ((mac = mac_alloc(MAC_VERSION)) == NULL)
1185 1185 return (EINVAL);
1186 1186
1187 1187 mac->m_type_ident = iptun->iptun_typeinfo->iti_ident;
1188 1188 mac->m_driver = iptun;
1189 1189 mac->m_dip = iptun_dip;
1190 1190 mac->m_instance = (uint_t)-1;
1191 1191 mac->m_src_addr = (uint8_t *)&iptun->iptun_laddr.ia_addr;
1192 1192 mac->m_dst_addr = iptun->iptun_typeinfo->iti_hasraddr ?
1193 1193 (uint8_t *)&iptun->iptun_raddr.ia_addr : NULL;
1194 1194 mac->m_callbacks = &iptun_m_callbacks;
1195 1195 mac->m_min_sdu = iptun->iptun_typeinfo->iti_minmtu;
1196 1196 mac->m_max_sdu = iptun->iptun_mtu;
1197 1197 if (iptun->iptun_header_size != 0) {
1198 1198 mac->m_pdata = &iptun->iptun_header;
1199 1199 mac->m_pdata_size = iptun->iptun_header_size;
1200 1200 }
1201 1201 if ((err = mac_register(mac, &iptun->iptun_mh)) == 0)
1202 1202 iptun->iptun_flags |= IPTUN_MAC_REGISTERED;
1203 1203 mac_free(mac);
1204 1204 return (err);
1205 1205 }
1206 1206
1207 1207 static int
1208 1208 iptun_unregister(iptun_t *iptun)
1209 1209 {
1210 1210 int err;
1211 1211
1212 1212 ASSERT(iptun->iptun_flags & IPTUN_MAC_REGISTERED);
1213 1213 if ((err = mac_unregister(iptun->iptun_mh)) == 0)
1214 1214 iptun->iptun_flags &= ~IPTUN_MAC_REGISTERED;
1215 1215 return (err);
1216 1216 }
1217 1217
1218 1218 static conn_t *
1219 1219 iptun_conn_create(iptun_t *iptun, netstack_t *ns, cred_t *credp)
1220 1220 {
1221 1221 conn_t *connp;
1222 1222
1223 1223 if ((connp = ipcl_conn_create(IPCL_IPCCONN, KM_NOSLEEP, ns)) == NULL)
1224 1224 return (NULL);
1225 1225
1226 1226 connp->conn_flags |= IPCL_IPTUN;
1227 1227 connp->conn_iptun = iptun;
1228 1228 connp->conn_recv = iptun_input;
1229 1229 connp->conn_recvicmp = iptun_input_icmp;
1230 1230 connp->conn_verifyicmp = iptun_verifyicmp;
1231 1231
1232 1232 /*
1233 1233 * Register iptun_notify to listen to capability changes detected by IP.
1234 1234 * This upcall is made in the context of the call to conn_ip_output.
1235 1235 */
1236 1236 connp->conn_ixa->ixa_notify = iptun_notify;
1237 1237 connp->conn_ixa->ixa_notify_cookie = iptun;
1238 1238
1239 1239 /*
1240 1240 * For exclusive stacks we set conn_zoneid to GLOBAL_ZONEID as is done
1241 1241 * for all other conn_t's.
1242 1242 *
1243 1243 * Note that there's an important distinction between iptun_zoneid and
1244 1244 * conn_zoneid. The conn_zoneid is set to GLOBAL_ZONEID in non-global
1245 1245 * exclusive stack zones to make the ip module believe that the
1246 1246 * non-global zone is actually a global zone. Therefore, when
1247 1247 * interacting with the ip module, we must always use conn_zoneid.
1248 1248 */
1249 1249 connp->conn_zoneid = (ns->netstack_stackid == GLOBAL_NETSTACKID) ?
1250 1250 crgetzoneid(credp) : GLOBAL_ZONEID;
1251 1251 connp->conn_cred = credp;
1252 1252 /* crfree() is done in ipcl_conn_destroy(), called by CONN_DEC_REF() */
1253 1253 crhold(connp->conn_cred);
1254 1254 connp->conn_cpid = NOPID;
1255 1255
1256 1256 /* conn_allzones can not be set this early, hence no IPCL_ZONEID */
1257 1257 connp->conn_ixa->ixa_zoneid = connp->conn_zoneid;
1258 1258 ASSERT(connp->conn_ref == 1);
1259 1259
1260 1260 /* Cache things in ixa without an extra refhold */
1261 1261 ASSERT(!(connp->conn_ixa->ixa_free_flags & IXA_FREE_CRED));
1262 1262 connp->conn_ixa->ixa_cred = connp->conn_cred;
1263 1263 connp->conn_ixa->ixa_cpid = connp->conn_cpid;
1264 1264 if (is_system_labeled())
1265 1265 connp->conn_ixa->ixa_tsl = crgetlabel(connp->conn_cred);
1266 1266
1267 1267 /*
1268 1268 * Have conn_ip_output drop packets should our outer source
1269 1269 * go invalid
1270 1270 */
1271 1271 connp->conn_ixa->ixa_flags |= IXAF_VERIFY_SOURCE;
1272 1272
1273 1273 switch (iptun->iptun_typeinfo->iti_ipvers) {
1274 1274 case IPV4_VERSION:
1275 1275 connp->conn_family = AF_INET6;
1276 1276 break;
1277 1277 case IPV6_VERSION:
1278 1278 connp->conn_family = AF_INET;
1279 1279 break;
1280 1280 }
1281 1281 mutex_enter(&connp->conn_lock);
1282 1282 connp->conn_state_flags &= ~CONN_INCIPIENT;
1283 1283 mutex_exit(&connp->conn_lock);
1284 1284 return (connp);
1285 1285 }
1286 1286
1287 1287 static void
1288 1288 iptun_conn_destroy(conn_t *connp)
1289 1289 {
1290 1290 ip_quiesce_conn(connp);
1291 1291 connp->conn_iptun = NULL;
1292 1292 ASSERT(connp->conn_ref == 1);
1293 1293 CONN_DEC_REF(connp);
1294 1294 }
1295 1295
1296 1296 static iptun_t *
1297 1297 iptun_alloc(void)
1298 1298 {
1299 1299 iptun_t *iptun;
1300 1300
1301 1301 if ((iptun = kmem_cache_alloc(iptun_cache, KM_NOSLEEP)) != NULL) {
1302 1302 bzero(iptun, sizeof (*iptun));
1303 1303 atomic_inc_32(&iptun_tunnelcount);
1304 1304 }
1305 1305 return (iptun);
1306 1306 }
1307 1307
1308 1308 static void
1309 1309 iptun_free(iptun_t *iptun)
1310 1310 {
1311 1311 ASSERT(iptun->iptun_flags & IPTUN_CONDEMNED);
1312 1312
1313 1313 if (iptun->iptun_flags & IPTUN_HASH_INSERTED) {
1314 1314 iptun_stack_t *iptuns = iptun->iptun_iptuns;
1315 1315
1316 1316 mutex_enter(&iptun_hash_lock);
1317 1317 VERIFY(mod_hash_remove(iptun_hash,
1318 1318 IPTUN_HASH_KEY(iptun->iptun_linkid),
1319 1319 (mod_hash_val_t *)&iptun) == 0);
1320 1320 mutex_exit(&iptun_hash_lock);
1321 1321 iptun->iptun_flags &= ~IPTUN_HASH_INSERTED;
1322 1322 mutex_enter(&iptuns->iptuns_lock);
1323 1323 list_remove(&iptuns->iptuns_iptunlist, iptun);
1324 1324 mutex_exit(&iptuns->iptuns_lock);
1325 1325 }
1326 1326
1327 1327 if (iptun->iptun_flags & IPTUN_BOUND)
1328 1328 iptun_unbind(iptun);
1329 1329
1330 1330 /*
1331 1331 * After iptun_unregister(), there will be no threads executing a
1332 1332 * downcall from the mac module, including in the tx datapath.
1333 1333 */
1334 1334 if (iptun->iptun_flags & IPTUN_MAC_REGISTERED)
1335 1335 VERIFY(iptun_unregister(iptun) == 0);
1336 1336
1337 1337 if (iptun->iptun_itp != NULL) {
1338 1338 /*
1339 1339 * Remove from the AVL tree, AND release the reference iptun_t
1340 1340 * itself holds on the ITP.
1341 1341 */
1342 1342 itp_unlink(iptun->iptun_itp, iptun->iptun_ns);
1343 1343 ITP_REFRELE(iptun->iptun_itp, iptun->iptun_ns);
1344 1344 iptun->iptun_itp = NULL;
1345 1345 iptun->iptun_flags &= ~IPTUN_SIMPLE_POLICY;
1346 1346 }
1347 1347
1348 1348 /*
1349 1349 * After ipcl_conn_destroy(), there will be no threads executing an
1350 1350 * upcall from ip (i.e., iptun_input()), and it is then safe to free
1351 1351 * the iptun_t.
1352 1352 */
1353 1353 if (iptun->iptun_connp != NULL) {
1354 1354 iptun_conn_destroy(iptun->iptun_connp);
1355 1355 iptun->iptun_connp = NULL;
1356 1356 }
1357 1357
1358 1358 kmem_cache_free(iptun_cache, iptun);
1359 1359 atomic_dec_32(&iptun_tunnelcount);
1360 1360 }
1361 1361
1362 1362 int
1363 1363 iptun_create(iptun_kparams_t *ik, cred_t *credp)
1364 1364 {
1365 1365 iptun_t *iptun = NULL;
1366 1366 int err = 0, mherr;
1367 1367 char linkname[MAXLINKNAMELEN];
1368 1368 ipsec_tun_pol_t *itp;
1369 1369 netstack_t *ns = NULL;
1370 1370 iptun_stack_t *iptuns;
1371 1371 datalink_id_t tmpid;
1372 1372 zoneid_t zoneid = crgetzoneid(credp);
1373 1373 boolean_t link_created = B_FALSE;
1374 1374
1375 1375 /* The tunnel type is mandatory */
1376 1376 if (!(ik->iptun_kparam_flags & IPTUN_KPARAM_TYPE))
1377 1377 return (EINVAL);
1378 1378
1379 1379 /*
1380 1380 * Is the linkid that the caller wishes to associate with this new
1381 1381 * tunnel assigned to this zone?
1382 1382 */
1383 1383 if (zone_check_datalink(&zoneid, ik->iptun_kparam_linkid) != 0) {
1384 1384 if (zoneid != GLOBAL_ZONEID)
1385 1385 return (EINVAL);
1386 1386 } else if (zoneid == GLOBAL_ZONEID) {
1387 1387 return (EINVAL);
1388 1388 }
1389 1389
1390 1390 /*
1391 1391 * Make sure that we're not trying to create a tunnel that has already
1392 1392 * been created.
1393 1393 */
1394 1394 if (iptun_enter_by_linkid(ik->iptun_kparam_linkid, &iptun) == 0) {
1395 1395 iptun_exit(iptun);
1396 1396 iptun = NULL;
1397 1397 err = EEXIST;
1398 1398 goto done;
1399 1399 }
1400 1400
1401 1401 ns = netstack_find_by_cred(credp);
1402 1402 iptuns = ns->netstack_iptun;
1403 1403
1404 1404 if ((iptun = iptun_alloc()) == NULL) {
1405 1405 err = ENOMEM;
1406 1406 goto done;
1407 1407 }
1408 1408
1409 1409 iptun->iptun_linkid = ik->iptun_kparam_linkid;
1410 1410 iptun->iptun_zoneid = zoneid;
1411 1411 iptun->iptun_ns = ns;
1412 1412
1413 1413 iptun->iptun_typeinfo = iptun_gettypeinfo(ik->iptun_kparam_type);
1414 1414 if (iptun->iptun_typeinfo->iti_type == IPTUN_TYPE_UNKNOWN) {
1415 1415 err = EINVAL;
1416 1416 goto done;
1417 1417 }
1418 1418
1419 1419 if (ik->iptun_kparam_flags & IPTUN_KPARAM_IMPLICIT)
1420 1420 iptun->iptun_flags |= IPTUN_IMPLICIT;
1421 1421
1422 1422 if ((err = iptun_setparams(iptun, ik)) != 0)
1423 1423 goto done;
1424 1424
1425 1425 iptun->iptun_hoplimit = IPTUN_DEFAULT_HOPLIMIT;
1426 1426 if (iptun->iptun_typeinfo->iti_type == IPTUN_TYPE_IPV6)
1427 1427 iptun->iptun_encaplimit = IPTUN_DEFAULT_ENCAPLIMIT;
1428 1428
1429 1429 iptun_headergen(iptun, B_FALSE);
1430 1430
1431 1431 iptun->iptun_connp = iptun_conn_create(iptun, ns, credp);
1432 1432 if (iptun->iptun_connp == NULL) {
1433 1433 err = ENOMEM;
1434 1434 goto done;
1435 1435 }
1436 1436
1437 1437 iptun->iptun_mtu = iptun->iptun_typeinfo->iti_maxmtu;
1438 1438 iptun->iptun_dpmtu = iptun->iptun_mtu;
1439 1439
1440 1440 /*
1441 1441 * Find an ITP based on linkname. If we have parms already set via
1442 1442 * the iptun_setparams() call above, it may have created an ITP for
1443 1443 * us. We always try get_tunnel_policy() for DEBUG correctness
1444 1444 * checks, and we may wish to refactor this to only check when
1445 1445 * iptun_itp is NULL.
1446 1446 */
1447 1447 if ((err = dls_mgmt_get_linkinfo(iptun->iptun_linkid, linkname, NULL,
1448 1448 NULL, NULL)) != 0)
1449 1449 goto done;
1450 1450 if ((itp = get_tunnel_policy(linkname, ns)) != NULL)
1451 1451 iptun->iptun_itp = itp;
1452 1452
1453 1453 /*
1454 1454 * See if we have the necessary IP addresses assigned to this tunnel
1455 1455 * to try and bind them with ip underneath us. If we're not ready to
1456 1456 * bind yet, then we'll defer the bind operation until the addresses
1457 1457 * are modified.
1458 1458 */
1459 1459 if (iptun_canbind(iptun) && ((err = iptun_bind(iptun)) != 0))
1460 1460 goto done;
1461 1461
1462 1462 if ((err = iptun_register(iptun)) != 0)
1463 1463 goto done;
1464 1464
1465 1465 err = dls_devnet_create(iptun->iptun_mh, iptun->iptun_linkid,
1466 1466 iptun->iptun_zoneid);
1467 1467 if (err != 0)
1468 1468 goto done;
1469 1469 link_created = B_TRUE;
1470 1470
1471 1471 /*
1472 1472 * We hash by link-id as that is the key used by all other iptun
1473 1473 * interfaces (modify, delete, etc.).
1474 1474 */
1475 1475 if ((mherr = mod_hash_insert(iptun_hash,
1476 1476 IPTUN_HASH_KEY(iptun->iptun_linkid), (mod_hash_val_t)iptun)) == 0) {
1477 1477 mutex_enter(&iptuns->iptuns_lock);
1478 1478 list_insert_head(&iptuns->iptuns_iptunlist, iptun);
1479 1479 mutex_exit(&iptuns->iptuns_lock);
1480 1480 iptun->iptun_flags |= IPTUN_HASH_INSERTED;
1481 1481 } else if (mherr == MH_ERR_NOMEM) {
1482 1482 err = ENOMEM;
1483 1483 } else if (mherr == MH_ERR_DUPLICATE) {
1484 1484 err = EEXIST;
1485 1485 } else {
1486 1486 err = EINVAL;
1487 1487 }
1488 1488
1489 1489 done:
1490 1490 if (iptun == NULL && ns != NULL)
1491 1491 netstack_rele(ns);
1492 1492 if (err != 0 && iptun != NULL) {
1493 1493 if (link_created) {
1494 1494 (void) dls_devnet_destroy(iptun->iptun_mh, &tmpid,
1495 1495 B_TRUE);
1496 1496 }
1497 1497 iptun->iptun_flags |= IPTUN_CONDEMNED;
1498 1498 iptun_free(iptun);
1499 1499 }
1500 1500 return (err);
1501 1501 }
1502 1502
1503 1503 int
1504 1504 iptun_delete(datalink_id_t linkid, cred_t *credp)
1505 1505 {
1506 1506 int err;
1507 1507 iptun_t *iptun = NULL;
1508 1508
1509 1509 if ((err = iptun_enter_by_linkid(linkid, &iptun)) != 0)
1510 1510 return (err);
1511 1511
1512 1512 /* One cannot delete a tunnel that belongs to another zone. */
1513 1513 if (iptun->iptun_zoneid != crgetzoneid(credp)) {
1514 1514 iptun_exit(iptun);
1515 1515 return (EACCES);
1516 1516 }
1517 1517
1518 1518 /*
1519 1519 * We need to exit iptun in order to issue calls up the stack such as
1520 1520 * dls_devnet_destroy(). If we call up while still in iptun, deadlock
1521 1521 * with calls coming down the stack is possible. We prevent other
1522 1522 * threads from entering this iptun after we've exited it by setting
1523 1523 * the IPTUN_DELETE_PENDING flag. This will cause callers of
1524 1524 * iptun_enter() to block waiting on iptun_enter_cv. The assumption
1525 1525 * here is that the functions we're calling while IPTUN_DELETE_PENDING
1526 1526 * is set dont resuult in an iptun_enter() call, as that would result
1527 1527 * in deadlock.
1528 1528 */
1529 1529 iptun->iptun_flags |= IPTUN_DELETE_PENDING;
1530 1530
1531 1531 /* Wait for any pending upcall to the mac module to complete. */
1532 1532 while (iptun->iptun_flags & IPTUN_UPCALL_PENDING)
1533 1533 cv_wait(&iptun->iptun_upcall_cv, &iptun->iptun_lock);
1534 1534
1535 1535 iptun_exit(iptun);
1536 1536
1537 1537 if ((err = dls_devnet_destroy(iptun->iptun_mh, &linkid, B_TRUE)) == 0) {
1538 1538 /*
1539 1539 * mac_disable() will fail with EBUSY if there are references
1540 1540 * to the iptun MAC. If there are none, then mac_disable()
1541 1541 * will assure that none can be acquired until the MAC is
1542 1542 * unregistered.
1543 1543 *
1544 1544 * XXX CR 6791335 prevents us from calling mac_disable() prior
1545 1545 * to dls_devnet_destroy(), so we unfortunately need to
1546 1546 * attempt to re-create the devnet node if mac_disable()
1547 1547 * fails.
1548 1548 */
1549 1549 if ((err = mac_disable(iptun->iptun_mh)) != 0) {
1550 1550 (void) dls_devnet_create(iptun->iptun_mh, linkid,
1551 1551 iptun->iptun_zoneid);
1552 1552 }
1553 1553 }
1554 1554
1555 1555 /*
1556 1556 * Now that we know the fate of this iptun_t, we need to clear
1557 1557 * IPTUN_DELETE_PENDING, and set IPTUN_CONDEMNED if the iptun_t is
1558 1558 * slated to be freed. Either way, we need to signal the threads
1559 1559 * waiting in iptun_enter() so that they can either fail if
1560 1560 * IPTUN_CONDEMNED is set, or continue if it's not.
1561 1561 */
1562 1562 mutex_enter(&iptun->iptun_lock);
1563 1563 iptun->iptun_flags &= ~IPTUN_DELETE_PENDING;
1564 1564 if (err == 0)
1565 1565 iptun->iptun_flags |= IPTUN_CONDEMNED;
1566 1566 cv_broadcast(&iptun->iptun_enter_cv);
1567 1567 mutex_exit(&iptun->iptun_lock);
1568 1568
1569 1569 /*
1570 1570 * Note that there is no danger in calling iptun_free() after having
1571 1571 * dropped the iptun_lock since callers of iptun_enter() at this point
1572 1572 * are doing so from iptun_enter_by_linkid() (mac_disable() got rid of
1573 1573 * threads entering from mac callbacks which call iptun_enter()
1574 1574 * directly) which holds iptun_hash_lock, and iptun_free() grabs this
1575 1575 * lock in order to remove the iptun_t from the hash table.
1576 1576 */
1577 1577 if (err == 0)
1578 1578 iptun_free(iptun);
1579 1579
1580 1580 return (err);
1581 1581 }
1582 1582
1583 1583 int
1584 1584 iptun_modify(const iptun_kparams_t *ik, cred_t *credp)
1585 1585 {
1586 1586 iptun_t *iptun;
1587 1587 boolean_t laddr_change = B_FALSE, raddr_change = B_FALSE;
1588 1588 int err;
1589 1589
1590 1590 if ((err = iptun_enter_by_linkid(ik->iptun_kparam_linkid, &iptun)) != 0)
1591 1591 return (err);
1592 1592
1593 1593 /* One cannot modify a tunnel that belongs to another zone. */
1594 1594 if (iptun->iptun_zoneid != crgetzoneid(credp)) {
1595 1595 err = EACCES;
1596 1596 goto done;
1597 1597 }
1598 1598
1599 1599 /* The tunnel type cannot be changed */
1600 1600 if (ik->iptun_kparam_flags & IPTUN_KPARAM_TYPE) {
1601 1601 err = EINVAL;
1602 1602 goto done;
1603 1603 }
1604 1604
1605 1605 if ((err = iptun_setparams(iptun, ik)) != 0)
1606 1606 goto done;
1607 1607 iptun_headergen(iptun, B_FALSE);
1608 1608
1609 1609 /*
1610 1610 * If any of the tunnel's addresses has been modified and the tunnel
1611 1611 * has the necessary addresses assigned to it, we need to try to bind
1612 1612 * with ip underneath us. If we're not ready to bind yet, then we'll
1613 1613 * try again when the addresses are modified later.
1614 1614 */
1615 1615 laddr_change = (ik->iptun_kparam_flags & IPTUN_KPARAM_LADDR);
1616 1616 raddr_change = (ik->iptun_kparam_flags & IPTUN_KPARAM_RADDR);
1617 1617 if (laddr_change || raddr_change) {
1618 1618 if (iptun->iptun_flags & IPTUN_BOUND)
1619 1619 iptun_unbind(iptun);
1620 1620 if (iptun_canbind(iptun) && (err = iptun_bind(iptun)) != 0) {
1621 1621 if (laddr_change)
1622 1622 iptun->iptun_flags &= ~IPTUN_LADDR;
1623 1623 if (raddr_change)
1624 1624 iptun->iptun_flags &= ~IPTUN_RADDR;
1625 1625 goto done;
1626 1626 }
1627 1627 }
1628 1628
1629 1629 if (laddr_change)
1630 1630 iptun_task_dispatch(iptun, IPTUN_TASK_LADDR_UPDATE);
1631 1631 if (raddr_change)
1632 1632 iptun_task_dispatch(iptun, IPTUN_TASK_RADDR_UPDATE);
1633 1633
1634 1634 done:
1635 1635 iptun_exit(iptun);
1636 1636 return (err);
1637 1637 }
1638 1638
1639 1639 /* Given an IP tunnel's datalink id, fill in its parameters. */
1640 1640 int
1641 1641 iptun_info(iptun_kparams_t *ik, cred_t *credp)
1642 1642 {
1643 1643 iptun_t *iptun;
1644 1644 int err;
1645 1645
1646 1646 /* Is the tunnel link visible from the caller's zone? */
1647 1647 if (!dls_devnet_islinkvisible(ik->iptun_kparam_linkid,
1648 1648 crgetzoneid(credp)))
1649 1649 return (ENOENT);
1650 1650
1651 1651 if ((err = iptun_enter_by_linkid(ik->iptun_kparam_linkid, &iptun)) != 0)
1652 1652 return (err);
1653 1653
1654 1654 bzero(ik, sizeof (iptun_kparams_t));
1655 1655
1656 1656 ik->iptun_kparam_linkid = iptun->iptun_linkid;
1657 1657 ik->iptun_kparam_type = iptun->iptun_typeinfo->iti_type;
1658 1658 ik->iptun_kparam_flags |= IPTUN_KPARAM_TYPE;
1659 1659
1660 1660 if (iptun->iptun_flags & IPTUN_LADDR) {
1661 1661 iptun_getaddr(&iptun->iptun_laddr, &ik->iptun_kparam_laddr);
1662 1662 ik->iptun_kparam_flags |= IPTUN_KPARAM_LADDR;
1663 1663 }
1664 1664 if (iptun->iptun_flags & IPTUN_RADDR) {
1665 1665 iptun_getaddr(&iptun->iptun_raddr, &ik->iptun_kparam_raddr);
1666 1666 ik->iptun_kparam_flags |= IPTUN_KPARAM_RADDR;
1667 1667 }
1668 1668
1669 1669 if (iptun->iptun_flags & IPTUN_IMPLICIT)
1670 1670 ik->iptun_kparam_flags |= IPTUN_KPARAM_IMPLICIT;
1671 1671
1672 1672 if (iptun->iptun_itp != NULL) {
1673 1673 mutex_enter(&iptun->iptun_itp->itp_lock);
1674 1674 if (iptun->iptun_itp->itp_flags & ITPF_P_ACTIVE) {
1675 1675 ik->iptun_kparam_flags |= IPTUN_KPARAM_IPSECPOL;
1676 1676 if (iptun->iptun_flags & IPTUN_SIMPLE_POLICY) {
1677 1677 ik->iptun_kparam_flags |= IPTUN_KPARAM_SECINFO;
1678 1678 ik->iptun_kparam_secinfo =
1679 1679 iptun->iptun_simple_policy;
1680 1680 }
1681 1681 }
1682 1682 mutex_exit(&iptun->iptun_itp->itp_lock);
1683 1683 }
1684 1684
1685 1685 done:
1686 1686 iptun_exit(iptun);
1687 1687 return (err);
1688 1688 }
1689 1689
1690 1690 int
1691 1691 iptun_set_6to4relay(netstack_t *ns, ipaddr_t relay_addr)
1692 1692 {
1693 1693 if (relay_addr == INADDR_BROADCAST || CLASSD(relay_addr))
1694 1694 return (EADDRNOTAVAIL);
1695 1695 ns->netstack_iptun->iptuns_relay_rtr_addr = relay_addr;
1696 1696 return (0);
1697 1697 }
1698 1698
1699 1699 void
1700 1700 iptun_get_6to4relay(netstack_t *ns, ipaddr_t *relay_addr)
1701 1701 {
1702 1702 *relay_addr = ns->netstack_iptun->iptuns_relay_rtr_addr;
1703 1703 }
1704 1704
1705 1705 void
1706 1706 iptun_set_policy(datalink_id_t linkid, ipsec_tun_pol_t *itp)
1707 1707 {
1708 1708 iptun_t *iptun;
1709 1709
1710 1710 if (iptun_enter_by_linkid(linkid, &iptun) != 0)
1711 1711 return;
1712 1712 if (iptun->iptun_itp != itp) {
1713 1713 ASSERT(iptun->iptun_itp == NULL);
1714 1714 ITP_REFHOLD(itp);
1715 1715 iptun->iptun_itp = itp;
1716 1716 }
1717 1717 /*
1718 1718 * IPsec policy means IPsec overhead, which means lower MTU.
1719 1719 * Refresh the MTU for this tunnel.
1720 1720 */
1721 1721 (void) iptun_update_mtu(iptun, NULL, 0);
1722 1722 iptun_exit(iptun);
1723 1723 }
1724 1724
1725 1725 /*
1726 1726 * Obtain the path MTU to the tunnel destination.
1727 1727 * Can return zero in some cases.
1728 1728 */
1729 1729 static uint32_t
1730 1730 iptun_get_dst_pmtu(iptun_t *iptun, ip_xmit_attr_t *ixa)
1731 1731 {
1732 1732 uint32_t pmtu = 0;
1733 1733 conn_t *connp = iptun->iptun_connp;
1734 1734 boolean_t need_rele = B_FALSE;
1735 1735
1736 1736 /*
1737 1737 * We only obtain the pmtu for tunnels that have a remote tunnel
1738 1738 * address.
1739 1739 */
1740 1740 if (!(iptun->iptun_flags & IPTUN_RADDR))
1741 1741 return (0);
1742 1742
1743 1743 if (ixa == NULL) {
1744 1744 ixa = conn_get_ixa(connp, B_FALSE);
1745 1745 if (ixa == NULL)
1746 1746 return (0);
1747 1747 need_rele = B_TRUE;
1748 1748 }
1749 1749 /*
1750 1750 * Guard against ICMP errors before we have sent, as well as against
1751 1751 * and a thread which held conn_ixa.
1752 1752 */
1753 1753 if (ixa->ixa_ire != NULL) {
1754 1754 pmtu = ip_get_pmtu(ixa);
1755 1755
1756 1756 /*
1757 1757 * For both IPv4 and IPv6 we can have indication that the outer
1758 1758 * header needs fragmentation.
1759 1759 */
1760 1760 if (ixa->ixa_flags & IXAF_PMTU_TOO_SMALL) {
1761 1761 /* Must allow fragmentation in ip_output */
1762 1762 ixa->ixa_flags &= ~IXAF_DONTFRAG;
1763 1763 } else if (iptun->iptun_typeinfo->iti_type != IPTUN_TYPE_6TO4) {
1764 1764 ixa->ixa_flags |= IXAF_DONTFRAG;
1765 1765 } else {
1766 1766 /* ip_get_pmtu might have set this - we don't want it */
1767 1767 ixa->ixa_flags &= ~IXAF_PMTU_IPV4_DF;
1768 1768 }
1769 1769 }
1770 1770
1771 1771 if (need_rele)
1772 1772 ixa_refrele(ixa);
1773 1773 return (pmtu);
1774 1774 }
1775 1775
1776 1776 /*
1777 1777 * Update the ip_xmit_attr_t to capture the current lower path mtu as known
1778 1778 * by ip.
1779 1779 */
1780 1780 static void
1781 1781 iptun_update_dst_pmtu(iptun_t *iptun, ip_xmit_attr_t *ixa)
1782 1782 {
1783 1783 uint32_t pmtu;
1784 1784 conn_t *connp = iptun->iptun_connp;
1785 1785 boolean_t need_rele = B_FALSE;
1786 1786
1787 1787 /* IXAF_VERIFY_PMTU is not set if we don't have a fixed destination */
1788 1788 if (!(iptun->iptun_flags & IPTUN_RADDR))
1789 1789 return;
1790 1790
1791 1791 if (ixa == NULL) {
1792 1792 ixa = conn_get_ixa(connp, B_FALSE);
1793 1793 if (ixa == NULL)
1794 1794 return;
1795 1795 need_rele = B_TRUE;
1796 1796 }
1797 1797 /*
1798 1798 * Guard against ICMP errors before we have sent, as well as against
1799 1799 * and a thread which held conn_ixa.
1800 1800 */
1801 1801 if (ixa->ixa_ire != NULL) {
1802 1802 pmtu = ip_get_pmtu(ixa);
1803 1803 /*
1804 1804 * Update ixa_fragsize and ixa_pmtu.
1805 1805 */
1806 1806 ixa->ixa_fragsize = ixa->ixa_pmtu = pmtu;
1807 1807
1808 1808 /*
1809 1809 * For both IPv4 and IPv6 we can have indication that the outer
1810 1810 * header needs fragmentation.
1811 1811 */
1812 1812 if (ixa->ixa_flags & IXAF_PMTU_TOO_SMALL) {
1813 1813 /* Must allow fragmentation in ip_output */
1814 1814 ixa->ixa_flags &= ~IXAF_DONTFRAG;
1815 1815 } else if (iptun->iptun_typeinfo->iti_type != IPTUN_TYPE_6TO4) {
1816 1816 ixa->ixa_flags |= IXAF_DONTFRAG;
1817 1817 } else {
1818 1818 /* ip_get_pmtu might have set this - we don't want it */
1819 1819 ixa->ixa_flags &= ~IXAF_PMTU_IPV4_DF;
1820 1820 }
1821 1821 }
1822 1822
1823 1823 if (need_rele)
1824 1824 ixa_refrele(ixa);
1825 1825 }
1826 1826
1827 1827 /*
1828 1828 * There is nothing that iptun can verify in addition to IP having
1829 1829 * verified the IP addresses in the fanout.
1830 1830 */
1831 1831 /* ARGSUSED */
1832 1832 static boolean_t
1833 1833 iptun_verifyicmp(conn_t *connp, void *arg2, icmph_t *icmph, icmp6_t *icmp6,
1834 1834 ip_recv_attr_t *ira)
1835 1835 {
1836 1836 return (B_TRUE);
1837 1837 }
1838 1838
1839 1839 /*
1840 1840 * Notify function registered with ip_xmit_attr_t.
1841 1841 */
1842 1842 static void
1843 1843 iptun_notify(void *arg, ip_xmit_attr_t *ixa, ixa_notify_type_t ntype,
1844 1844 ixa_notify_arg_t narg)
1845 1845 {
1846 1846 iptun_t *iptun = (iptun_t *)arg;
1847 1847
1848 1848 switch (ntype) {
1849 1849 case IXAN_PMTU:
1850 1850 (void) iptun_update_mtu(iptun, ixa, narg);
1851 1851 break;
1852 1852 }
1853 1853 }
1854 1854
1855 1855 /*
1856 1856 * Returns the max of old_ovhd and the overhead associated with pol.
1857 1857 */
1858 1858 static uint32_t
1859 1859 iptun_max_policy_overhead(ipsec_policy_t *pol, uint32_t old_ovhd)
1860 1860 {
1861 1861 uint32_t new_ovhd = old_ovhd;
1862 1862
1863 1863 while (pol != NULL) {
1864 1864 new_ovhd = max(new_ovhd,
1865 1865 ipsec_act_ovhd(&pol->ipsp_act->ipa_act));
1866 1866 pol = pol->ipsp_hash.hash_next;
1867 1867 }
1868 1868 return (new_ovhd);
1869 1869 }
1870 1870
1871 1871 static uint32_t
1872 1872 iptun_get_ipsec_overhead(iptun_t *iptun)
1873 1873 {
1874 1874 ipsec_policy_root_t *ipr;
1875 1875 ipsec_policy_head_t *iph;
1876 1876 ipsec_policy_t *pol;
1877 1877 ipsec_selector_t sel;
1878 1878 int i;
1879 1879 uint32_t ipsec_ovhd = 0;
1880 1880 ipsec_tun_pol_t *itp = iptun->iptun_itp;
1881 1881 netstack_t *ns = iptun->iptun_ns;
1882 1882
1883 1883 if (itp == NULL || !(itp->itp_flags & ITPF_P_ACTIVE)) {
1884 1884 /*
1885 1885 * Consult global policy, just in case. This will only work
1886 1886 * if we have both source and destination addresses to work
1887 1887 * with.
1888 1888 */
1889 1889 if ((iptun->iptun_flags & (IPTUN_LADDR|IPTUN_RADDR)) !=
1890 1890 (IPTUN_LADDR|IPTUN_RADDR))
1891 1891 return (0);
1892 1892
1893 1893 iph = ipsec_system_policy(ns);
1894 1894 bzero(&sel, sizeof (sel));
1895 1895 sel.ips_isv4 =
1896 1896 (iptun->iptun_typeinfo->iti_ipvers == IPV4_VERSION);
1897 1897 switch (iptun->iptun_typeinfo->iti_ipvers) {
1898 1898 case IPV4_VERSION:
1899 1899 sel.ips_local_addr_v4 = iptun->iptun_laddr4;
1900 1900 sel.ips_remote_addr_v4 = iptun->iptun_raddr4;
1901 1901 break;
1902 1902 case IPV6_VERSION:
1903 1903 sel.ips_local_addr_v6 = iptun->iptun_laddr6;
1904 1904 sel.ips_remote_addr_v6 = iptun->iptun_raddr6;
1905 1905 break;
1906 1906 }
1907 1907 /* Check for both IPv4 and IPv6. */
1908 1908 sel.ips_protocol = IPPROTO_ENCAP;
1909 1909 pol = ipsec_find_policy_head(NULL, iph, IPSEC_TYPE_OUTBOUND,
1910 1910 &sel);
1911 1911 if (pol != NULL) {
1912 1912 ipsec_ovhd = ipsec_act_ovhd(&pol->ipsp_act->ipa_act);
1913 1913 IPPOL_REFRELE(pol);
1914 1914 }
1915 1915 sel.ips_protocol = IPPROTO_IPV6;
1916 1916 pol = ipsec_find_policy_head(NULL, iph, IPSEC_TYPE_OUTBOUND,
1917 1917 &sel);
1918 1918 if (pol != NULL) {
1919 1919 ipsec_ovhd = max(ipsec_ovhd,
1920 1920 ipsec_act_ovhd(&pol->ipsp_act->ipa_act));
1921 1921 IPPOL_REFRELE(pol);
1922 1922 }
1923 1923 IPPH_REFRELE(iph, ns);
1924 1924 } else {
1925 1925 /*
1926 1926 * Look through all of the possible IPsec actions for the
1927 1927 * tunnel, and find the largest potential IPsec overhead.
1928 1928 */
1929 1929 iph = itp->itp_policy;
1930 1930 rw_enter(&iph->iph_lock, RW_READER);
1931 1931 ipr = &(iph->iph_root[IPSEC_TYPE_OUTBOUND]);
1932 1932 ipsec_ovhd = iptun_max_policy_overhead(
1933 1933 ipr->ipr_nonhash[IPSEC_AF_V4], 0);
1934 1934 ipsec_ovhd = iptun_max_policy_overhead(
1935 1935 ipr->ipr_nonhash[IPSEC_AF_V6], ipsec_ovhd);
1936 1936 for (i = 0; i < ipr->ipr_nchains; i++) {
1937 1937 ipsec_ovhd = iptun_max_policy_overhead(
1938 1938 ipr->ipr_hash[i].hash_head, ipsec_ovhd);
1939 1939 }
1940 1940 rw_exit(&iph->iph_lock);
1941 1941 }
1942 1942
1943 1943 return (ipsec_ovhd);
1944 1944 }
1945 1945
1946 1946 /*
1947 1947 * Calculate and return the maximum possible upper MTU for the given tunnel.
1948 1948 *
1949 1949 * If new_pmtu is set then we also need to update the lower path MTU information
1950 1950 * in the ip_xmit_attr_t. That is needed since we set IXAF_VERIFY_PMTU so that
1951 1951 * we are notified by conn_ip_output() when the path MTU increases.
1952 1952 */
1953 1953 static uint32_t
1954 1954 iptun_get_maxmtu(iptun_t *iptun, ip_xmit_attr_t *ixa, uint32_t new_pmtu)
1955 1955 {
1956 1956 size_t header_size, ipsec_overhead;
1957 1957 uint32_t maxmtu, pmtu;
1958 1958
1959 1959 /*
1960 1960 * Start with the path-MTU to the remote address, which is either
1961 1961 * provided as the new_pmtu argument, or obtained using
1962 1962 * iptun_get_dst_pmtu().
1963 1963 */
1964 1964 if (new_pmtu != 0) {
1965 1965 if (iptun->iptun_flags & IPTUN_RADDR)
1966 1966 iptun->iptun_dpmtu = new_pmtu;
1967 1967 pmtu = new_pmtu;
1968 1968 } else if (iptun->iptun_flags & IPTUN_RADDR) {
1969 1969 if ((pmtu = iptun_get_dst_pmtu(iptun, ixa)) == 0) {
1970 1970 /*
1971 1971 * We weren't able to obtain the path-MTU of the
1972 1972 * destination. Use the previous value.
1973 1973 */
1974 1974 pmtu = iptun->iptun_dpmtu;
1975 1975 } else {
1976 1976 iptun->iptun_dpmtu = pmtu;
1977 1977 }
1978 1978 } else {
1979 1979 /*
1980 1980 * We have no path-MTU information to go on, use the maximum
1981 1981 * possible value.
1982 1982 */
1983 1983 pmtu = iptun->iptun_typeinfo->iti_maxmtu;
1984 1984 }
1985 1985
1986 1986 /*
1987 1987 * Now calculate tunneling overhead and subtract that from the
1988 1988 * path-MTU information obtained above.
1989 1989 */
1990 1990 if (iptun->iptun_header_size != 0) {
1991 1991 header_size = iptun->iptun_header_size;
1992 1992 } else {
1993 1993 switch (iptun->iptun_typeinfo->iti_ipvers) {
1994 1994 case IPV4_VERSION:
1995 1995 header_size = sizeof (ipha_t);
1996 1996 if (is_system_labeled())
1997 1997 header_size += IP_MAX_OPT_LENGTH;
1998 1998 break;
1999 1999 case IPV6_VERSION:
2000 2000 header_size = sizeof (iptun_ipv6hdrs_t);
2001 2001 break;
2002 2002 }
2003 2003 }
2004 2004
2005 2005 ipsec_overhead = iptun_get_ipsec_overhead(iptun);
2006 2006
2007 2007 maxmtu = pmtu - (header_size + ipsec_overhead);
2008 2008 return (max(maxmtu, iptun->iptun_typeinfo->iti_minmtu));
2009 2009 }
2010 2010
2011 2011 /*
2012 2012 * Re-calculate the tunnel's MTU as seen from above and notify the MAC layer
2013 2013 * of any change in MTU. The new_pmtu argument is the new lower path MTU to
2014 2014 * the tunnel destination to be used in the tunnel MTU calculation. Passing
2015 2015 * in 0 for new_pmtu causes the lower path MTU to be dynamically updated using
2016 2016 * ip_get_pmtu().
2017 2017 *
2018 2018 * If the calculated tunnel MTU is different than its previous value, then we
2019 2019 * notify the MAC layer above us of this change using mac_maxsdu_update().
2020 2020 */
2021 2021 static uint32_t
2022 2022 iptun_update_mtu(iptun_t *iptun, ip_xmit_attr_t *ixa, uint32_t new_pmtu)
2023 2023 {
2024 2024 uint32_t newmtu;
2025 2025
2026 2026 /* We always update the ixa since we might have set IXAF_VERIFY_PMTU */
2027 2027 iptun_update_dst_pmtu(iptun, ixa);
2028 2028
2029 2029 /*
2030 2030 * We return the current MTU without updating it if it was pegged to a
2031 2031 * static value using the MAC_PROP_MTU link property.
2032 2032 */
2033 2033 if (iptun->iptun_flags & IPTUN_FIXED_MTU)
2034 2034 return (iptun->iptun_mtu);
2035 2035
2036 2036 /* If the MTU isn't fixed, then use the maximum possible value. */
2037 2037 newmtu = iptun_get_maxmtu(iptun, ixa, new_pmtu);
2038 2038 /*
2039 2039 * We only dynamically adjust the tunnel MTU for tunnels with
2040 2040 * destinations because dynamic MTU calculations are based on the
2041 2041 * destination path-MTU.
2042 2042 */
2043 2043 if ((iptun->iptun_flags & IPTUN_RADDR) && newmtu != iptun->iptun_mtu) {
2044 2044 iptun->iptun_mtu = newmtu;
2045 2045 if (iptun->iptun_flags & IPTUN_MAC_REGISTERED)
2046 2046 iptun_task_dispatch(iptun, IPTUN_TASK_MTU_UPDATE);
2047 2047 }
2048 2048
2049 2049 return (newmtu);
2050 2050 }
2051 2051
2052 2052 /*
2053 2053 * Frees a packet or packet chain and bumps stat for each freed packet.
2054 2054 */
2055 2055 static void
2056 2056 iptun_drop_pkt(mblk_t *mp, uint64_t *stat)
2057 2057 {
2058 2058 mblk_t *pktmp;
2059 2059
2060 2060 for (pktmp = mp; pktmp != NULL; pktmp = mp) {
2061 2061 mp = mp->b_next;
2062 2062 pktmp->b_next = NULL;
2063 2063 if (stat != NULL)
2064 2064 atomic_inc_64(stat);
2065 2065 freemsg(pktmp);
2066 2066 }
2067 2067 }
2068 2068
2069 2069 /*
2070 2070 * Allocate and return a new mblk to hold an IP and ICMP header, and chain the
2071 2071 * original packet to its b_cont. Returns NULL on failure.
2072 2072 */
2073 2073 static mblk_t *
2074 2074 iptun_build_icmperr(size_t hdrs_size, mblk_t *orig_pkt)
2075 2075 {
2076 2076 mblk_t *icmperr_mp;
2077 2077
2078 2078 if ((icmperr_mp = allocb(hdrs_size, BPRI_MED)) != NULL) {
2079 2079 icmperr_mp->b_wptr += hdrs_size;
2080 2080 /* tack on the offending packet */
2081 2081 icmperr_mp->b_cont = orig_pkt;
2082 2082 }
2083 2083 return (icmperr_mp);
2084 2084 }
2085 2085
2086 2086 /*
2087 2087 * Transmit an ICMP error. mp->b_rptr points at the packet to be included in
2088 2088 * the ICMP error.
2089 2089 */
2090 2090 static void
2091 2091 iptun_sendicmp_v4(iptun_t *iptun, icmph_t *icmp, ipha_t *orig_ipha, mblk_t *mp,
2092 2092 ts_label_t *tsl)
2093 2093 {
2094 2094 size_t orig_pktsize, hdrs_size;
2095 2095 mblk_t *icmperr_mp;
2096 2096 ipha_t *new_ipha;
2097 2097 icmph_t *new_icmp;
2098 2098 ip_xmit_attr_t ixas;
2099 2099 conn_t *connp = iptun->iptun_connp;
2100 2100
2101 2101 orig_pktsize = msgdsize(mp);
2102 2102 hdrs_size = sizeof (ipha_t) + sizeof (icmph_t);
2103 2103 if ((icmperr_mp = iptun_build_icmperr(hdrs_size, mp)) == NULL) {
2104 2104 iptun_drop_pkt(mp, &iptun->iptun_noxmtbuf);
2105 2105 return;
2106 2106 }
2107 2107
2108 2108 new_ipha = (ipha_t *)icmperr_mp->b_rptr;
2109 2109 new_icmp = (icmph_t *)(new_ipha + 1);
2110 2110
2111 2111 new_ipha->ipha_version_and_hdr_length = IP_SIMPLE_HDR_VERSION;
2112 2112 new_ipha->ipha_type_of_service = 0;
2113 2113 new_ipha->ipha_ident = 0;
2114 2114 new_ipha->ipha_fragment_offset_and_flags = 0;
2115 2115 new_ipha->ipha_ttl = orig_ipha->ipha_ttl;
2116 2116 new_ipha->ipha_protocol = IPPROTO_ICMP;
2117 2117 new_ipha->ipha_src = orig_ipha->ipha_dst;
2118 2118 new_ipha->ipha_dst = orig_ipha->ipha_src;
2119 2119 new_ipha->ipha_hdr_checksum = 0; /* will be computed by ip */
2120 2120 new_ipha->ipha_length = htons(hdrs_size + orig_pktsize);
2121 2121
2122 2122 *new_icmp = *icmp;
2123 2123 new_icmp->icmph_checksum = 0;
2124 2124 new_icmp->icmph_checksum = IP_CSUM(icmperr_mp, sizeof (ipha_t), 0);
2125 2125
2126 2126 bzero(&ixas, sizeof (ixas));
2127 2127 ixas.ixa_flags = IXAF_BASIC_SIMPLE_V4;
2128 2128 if (new_ipha->ipha_src == INADDR_ANY) {
2129 2129 ixas.ixa_flags &= ~IXAF_VERIFY_SOURCE;
2130 2130 ixas.ixa_flags |= IXAF_SET_SOURCE;
2131 2131 }
2132 2132
2133 2133 ixas.ixa_zoneid = IPCL_ZONEID(connp);
2134 2134 ixas.ixa_ipst = connp->conn_netstack->netstack_ip;
2135 2135 ixas.ixa_cred = connp->conn_cred;
2136 2136 ixas.ixa_cpid = NOPID;
2137 2137 if (is_system_labeled())
2138 2138 ixas.ixa_tsl = tsl;
2139 2139
2140 2140 ixas.ixa_ifindex = 0;
2141 2141 ixas.ixa_multicast_ttl = IP_DEFAULT_MULTICAST_TTL;
2142 2142
2143 2143 (void) ip_output_simple(icmperr_mp, &ixas);
2144 2144 ixa_cleanup(&ixas);
2145 2145 }
2146 2146
2147 2147 static void
2148 2148 iptun_sendicmp_v6(iptun_t *iptun, icmp6_t *icmp6, ip6_t *orig_ip6h, mblk_t *mp,
2149 2149 ts_label_t *tsl)
2150 2150 {
2151 2151 size_t orig_pktsize, hdrs_size;
2152 2152 mblk_t *icmp6err_mp;
2153 2153 ip6_t *new_ip6h;
2154 2154 icmp6_t *new_icmp6;
2155 2155 ip_xmit_attr_t ixas;
2156 2156 conn_t *connp = iptun->iptun_connp;
2157 2157
2158 2158 orig_pktsize = msgdsize(mp);
2159 2159 hdrs_size = sizeof (ip6_t) + sizeof (icmp6_t);
2160 2160 if ((icmp6err_mp = iptun_build_icmperr(hdrs_size, mp)) == NULL) {
2161 2161 iptun_drop_pkt(mp, &iptun->iptun_noxmtbuf);
2162 2162 return;
2163 2163 }
2164 2164
2165 2165 new_ip6h = (ip6_t *)icmp6err_mp->b_rptr;
2166 2166 new_icmp6 = (icmp6_t *)(new_ip6h + 1);
2167 2167
2168 2168 new_ip6h->ip6_vcf = orig_ip6h->ip6_vcf;
2169 2169 new_ip6h->ip6_plen = htons(sizeof (icmp6_t) + orig_pktsize);
2170 2170 new_ip6h->ip6_hops = orig_ip6h->ip6_hops;
2171 2171 new_ip6h->ip6_nxt = IPPROTO_ICMPV6;
2172 2172 new_ip6h->ip6_src = orig_ip6h->ip6_dst;
2173 2173 new_ip6h->ip6_dst = orig_ip6h->ip6_src;
2174 2174
2175 2175 *new_icmp6 = *icmp6;
2176 2176 /* The checksum is calculated in ip_output_simple and friends. */
2177 2177 new_icmp6->icmp6_cksum = new_ip6h->ip6_plen;
2178 2178
2179 2179 bzero(&ixas, sizeof (ixas));
2180 2180 ixas.ixa_flags = IXAF_BASIC_SIMPLE_V6;
2181 2181 if (IN6_IS_ADDR_UNSPECIFIED(&new_ip6h->ip6_src)) {
2182 2182 ixas.ixa_flags &= ~IXAF_VERIFY_SOURCE;
2183 2183 ixas.ixa_flags |= IXAF_SET_SOURCE;
2184 2184 }
2185 2185
2186 2186 ixas.ixa_zoneid = IPCL_ZONEID(connp);
2187 2187 ixas.ixa_ipst = connp->conn_netstack->netstack_ip;
2188 2188 ixas.ixa_cred = connp->conn_cred;
2189 2189 ixas.ixa_cpid = NOPID;
2190 2190 if (is_system_labeled())
2191 2191 ixas.ixa_tsl = tsl;
2192 2192
2193 2193 ixas.ixa_ifindex = 0;
2194 2194 ixas.ixa_multicast_ttl = IP_DEFAULT_MULTICAST_TTL;
2195 2195
2196 2196 (void) ip_output_simple(icmp6err_mp, &ixas);
2197 2197 ixa_cleanup(&ixas);
2198 2198 }
2199 2199
2200 2200 static void
2201 2201 iptun_icmp_error_v4(iptun_t *iptun, ipha_t *orig_ipha, mblk_t *mp,
2202 2202 uint8_t type, uint8_t code, ts_label_t *tsl)
2203 2203 {
2204 2204 icmph_t icmp;
2205 2205
2206 2206 bzero(&icmp, sizeof (icmp));
2207 2207 icmp.icmph_type = type;
2208 2208 icmp.icmph_code = code;
2209 2209
2210 2210 iptun_sendicmp_v4(iptun, &icmp, orig_ipha, mp, tsl);
2211 2211 }
2212 2212
2213 2213 static void
2214 2214 iptun_icmp_fragneeded_v4(iptun_t *iptun, uint32_t newmtu, ipha_t *orig_ipha,
2215 2215 mblk_t *mp, ts_label_t *tsl)
2216 2216 {
2217 2217 icmph_t icmp;
2218 2218
2219 2219 icmp.icmph_type = ICMP_DEST_UNREACHABLE;
2220 2220 icmp.icmph_code = ICMP_FRAGMENTATION_NEEDED;
2221 2221 icmp.icmph_du_zero = 0;
2222 2222 icmp.icmph_du_mtu = htons(newmtu);
2223 2223
2224 2224 iptun_sendicmp_v4(iptun, &icmp, orig_ipha, mp, tsl);
2225 2225 }
2226 2226
2227 2227 static void
2228 2228 iptun_icmp_error_v6(iptun_t *iptun, ip6_t *orig_ip6h, mblk_t *mp,
2229 2229 uint8_t type, uint8_t code, uint32_t offset, ts_label_t *tsl)
2230 2230 {
2231 2231 icmp6_t icmp6;
2232 2232
2233 2233 bzero(&icmp6, sizeof (icmp6));
2234 2234 icmp6.icmp6_type = type;
2235 2235 icmp6.icmp6_code = code;
2236 2236 if (type == ICMP6_PARAM_PROB)
2237 2237 icmp6.icmp6_pptr = htonl(offset);
2238 2238
2239 2239 iptun_sendicmp_v6(iptun, &icmp6, orig_ip6h, mp, tsl);
2240 2240 }
2241 2241
2242 2242 static void
2243 2243 iptun_icmp_toobig_v6(iptun_t *iptun, uint32_t newmtu, ip6_t *orig_ip6h,
2244 2244 mblk_t *mp, ts_label_t *tsl)
2245 2245 {
2246 2246 icmp6_t icmp6;
2247 2247
2248 2248 icmp6.icmp6_type = ICMP6_PACKET_TOO_BIG;
2249 2249 icmp6.icmp6_code = 0;
2250 2250 icmp6.icmp6_mtu = htonl(newmtu);
2251 2251
2252 2252 iptun_sendicmp_v6(iptun, &icmp6, orig_ip6h, mp, tsl);
2253 2253 }
2254 2254
2255 2255 /*
2256 2256 * Determines if the packet pointed to by ipha or ip6h is an ICMP error. The
2257 2257 * mp argument is only used to do bounds checking.
2258 2258 */
2259 2259 static boolean_t
2260 2260 is_icmp_error(mblk_t *mp, ipha_t *ipha, ip6_t *ip6h)
2261 2261 {
2262 2262 uint16_t hlen;
2263 2263
2264 2264 if (ipha != NULL) {
2265 2265 icmph_t *icmph;
2266 2266
2267 2267 ASSERT(ip6h == NULL);
2268 2268 if (ipha->ipha_protocol != IPPROTO_ICMP)
2269 2269 return (B_FALSE);
2270 2270
2271 2271 hlen = IPH_HDR_LENGTH(ipha);
2272 2272 icmph = (icmph_t *)((uint8_t *)ipha + hlen);
2273 2273 return (ICMP_IS_ERROR(icmph->icmph_type) ||
2274 2274 icmph->icmph_type == ICMP_REDIRECT);
2275 2275 } else {
2276 2276 icmp6_t *icmp6;
2277 2277 uint8_t *nexthdrp;
2278 2278
2279 2279 ASSERT(ip6h != NULL);
2280 2280 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &hlen, &nexthdrp) ||
2281 2281 *nexthdrp != IPPROTO_ICMPV6) {
2282 2282 return (B_FALSE);
2283 2283 }
2284 2284
2285 2285 icmp6 = (icmp6_t *)((uint8_t *)ip6h + hlen);
2286 2286 return (ICMP6_IS_ERROR(icmp6->icmp6_type) ||
2287 2287 icmp6->icmp6_type == ND_REDIRECT);
2288 2288 }
2289 2289 }
2290 2290
2291 2291 /*
2292 2292 * Find inner and outer IP headers from a tunneled packet as setup for calls
2293 2293 * into ipsec_tun_{in,out}bound().
2294 2294 * Note that we need to allow the outer header to be in a separate mblk from
2295 2295 * the inner header.
2296 2296 * If the caller knows the outer_hlen, the caller passes it in. Otherwise zero.
2297 2297 */
2298 2298 static size_t
2299 2299 iptun_find_headers(mblk_t *mp, size_t outer_hlen, ipha_t **outer4,
2300 2300 ipha_t **inner4, ip6_t **outer6, ip6_t **inner6)
2301 2301 {
2302 2302 ipha_t *ipha;
2303 2303 size_t first_mblkl = MBLKL(mp);
2304 2304 mblk_t *inner_mp;
2305 2305
2306 2306 /*
2307 2307 * Don't bother handling packets that don't have a full IP header in
2308 2308 * the fist mblk. For the input path, the ip module ensures that this
2309 2309 * won't happen, and on the output path, the IP tunneling MAC-type
2310 2310 * plugins ensure that this also won't happen.
2311 2311 */
2312 2312 if (first_mblkl < sizeof (ipha_t))
2313 2313 return (0);
2314 2314 ipha = (ipha_t *)(mp->b_rptr);
2315 2315 switch (IPH_HDR_VERSION(ipha)) {
2316 2316 case IPV4_VERSION:
2317 2317 *outer4 = ipha;
2318 2318 *outer6 = NULL;
2319 2319 if (outer_hlen == 0)
2320 2320 outer_hlen = IPH_HDR_LENGTH(ipha);
2321 2321 break;
2322 2322 case IPV6_VERSION:
2323 2323 *outer4 = NULL;
2324 2324 *outer6 = (ip6_t *)ipha;
2325 2325 if (outer_hlen == 0)
2326 2326 outer_hlen = ip_hdr_length_v6(mp, (ip6_t *)ipha);
2327 2327 break;
2328 2328 default:
2329 2329 return (0);
2330 2330 }
2331 2331
2332 2332 if (first_mblkl < outer_hlen ||
2333 2333 (first_mblkl == outer_hlen && mp->b_cont == NULL))
2334 2334 return (0);
2335 2335
2336 2336 /*
2337 2337 * We don't bother doing a pullup here since the outer header will
2338 2338 * just get stripped off soon on input anyway. We just want to ensure
2339 2339 * that the inner* pointer points to a full header.
2340 2340 */
2341 2341 if (first_mblkl == outer_hlen) {
2342 2342 inner_mp = mp->b_cont;
2343 2343 ipha = (ipha_t *)inner_mp->b_rptr;
2344 2344 } else {
2345 2345 inner_mp = mp;
2346 2346 ipha = (ipha_t *)(mp->b_rptr + outer_hlen);
2347 2347 }
2348 2348 switch (IPH_HDR_VERSION(ipha)) {
2349 2349 case IPV4_VERSION:
2350 2350 if (inner_mp->b_wptr - (uint8_t *)ipha < sizeof (ipha_t))
2351 2351 return (0);
2352 2352 *inner4 = ipha;
2353 2353 *inner6 = NULL;
2354 2354 break;
2355 2355 case IPV6_VERSION:
2356 2356 if (inner_mp->b_wptr - (uint8_t *)ipha < sizeof (ip6_t))
2357 2357 return (0);
2358 2358 *inner4 = NULL;
2359 2359 *inner6 = (ip6_t *)ipha;
2360 2360 break;
2361 2361 default:
2362 2362 return (0);
2363 2363 }
2364 2364
2365 2365 return (outer_hlen);
2366 2366 }
2367 2367
2368 2368 /*
2369 2369 * Received ICMP error in response to an X over IPv4 packet that we
2370 2370 * transmitted.
2371 2371 *
2372 2372 * NOTE: "outer" refers to what's inside the ICMP payload. We will get one of
2373 2373 * the following:
2374 2374 *
2375 2375 * [IPv4(0)][ICMPv4][IPv4(1)][IPv4(2)][ULP]
2376 2376 *
2377 2377 * or
2378 2378 *
2379 2379 * [IPv4(0)][ICMPv4][IPv4(1)][IPv6][ULP]
2380 2380 *
2381 2381 * And "outer4" will get set to IPv4(1), and inner[46] will correspond to
2382 2382 * whatever the very-inner packet is (IPv4(2) or IPv6).
2383 2383 */
2384 2384 static void
2385 2385 iptun_input_icmp_v4(iptun_t *iptun, mblk_t *data_mp, icmph_t *icmph,
2386 2386 ip_recv_attr_t *ira)
2387 2387 {
2388 2388 uint8_t *orig;
2389 2389 ipha_t *outer4, *inner4;
2390 2390 ip6_t *outer6, *inner6;
2391 2391 int outer_hlen;
2392 2392 uint8_t type, code;
2393 2393
2394 2394 ASSERT(data_mp->b_cont == NULL);
2395 2395 /*
2396 2396 * Temporarily move b_rptr forward so that iptun_find_headers() can
2397 2397 * find headers in the ICMP packet payload.
2398 2398 */
2399 2399 orig = data_mp->b_rptr;
2400 2400 data_mp->b_rptr = (uint8_t *)(icmph + 1);
2401 2401 /*
2402 2402 * The ip module ensures that ICMP errors contain at least the
2403 2403 * original IP header (otherwise, the error would never have made it
2404 2404 * here).
2405 2405 */
2406 2406 ASSERT(MBLKL(data_mp) >= 0);
2407 2407 outer_hlen = iptun_find_headers(data_mp, 0, &outer4, &inner4, &outer6,
2408 2408 &inner6);
2409 2409 ASSERT(outer6 == NULL);
2410 2410 data_mp->b_rptr = orig;
2411 2411 if (outer_hlen == 0) {
2412 2412 iptun_drop_pkt(data_mp, &iptun->iptun_ierrors);
2413 2413 return;
2414 2414 }
2415 2415
2416 2416 /* Only ICMP errors due to tunneled packets should reach here. */
2417 2417 ASSERT(outer4->ipha_protocol == IPPROTO_ENCAP ||
2418 2418 outer4->ipha_protocol == IPPROTO_IPV6);
2419 2419
2420 2420 data_mp = ipsec_tun_inbound(ira, data_mp, iptun->iptun_itp,
2421 2421 inner4, inner6, outer4, outer6, -outer_hlen, iptun->iptun_ns);
2422 2422 if (data_mp == NULL) {
2423 2423 /* Callee did all of the freeing. */
2424 2424 atomic_inc_64(&iptun->iptun_ierrors);
2425 2425 return;
2426 2426 }
2427 2427 /* We should never see reassembled fragment here. */
2428 2428 ASSERT(data_mp->b_next == NULL);
2429 2429
2430 2430 data_mp->b_rptr = (uint8_t *)outer4 + outer_hlen;
2431 2431
2432 2432 /*
2433 2433 * If the original packet being transmitted was itself an ICMP error,
2434 2434 * then drop this packet. We don't want to generate an ICMP error in
2435 2435 * response to an ICMP error.
2436 2436 */
2437 2437 if (is_icmp_error(data_mp, inner4, inner6)) {
2438 2438 iptun_drop_pkt(data_mp, &iptun->iptun_norcvbuf);
2439 2439 return;
2440 2440 }
2441 2441
2442 2442 switch (icmph->icmph_type) {
2443 2443 case ICMP_DEST_UNREACHABLE:
2444 2444 type = (inner4 != NULL ? icmph->icmph_type : ICMP6_DST_UNREACH);
2445 2445 switch (icmph->icmph_code) {
2446 2446 case ICMP_FRAGMENTATION_NEEDED: {
2447 2447 uint32_t newmtu;
2448 2448
2449 2449 /*
2450 2450 * We reconcile this with the fact that the tunnel may
2451 2451 * also have IPsec policy by letting iptun_update_mtu
2452 2452 * take care of it.
2453 2453 */
2454 2454 newmtu = iptun_update_mtu(iptun, NULL,
2455 2455 ntohs(icmph->icmph_du_mtu));
2456 2456
2457 2457 if (inner4 != NULL) {
2458 2458 iptun_icmp_fragneeded_v4(iptun, newmtu, inner4,
2459 2459 data_mp, ira->ira_tsl);
2460 2460 } else {
2461 2461 iptun_icmp_toobig_v6(iptun, newmtu, inner6,
2462 2462 data_mp, ira->ira_tsl);
2463 2463 }
2464 2464 return;
2465 2465 }
2466 2466 case ICMP_DEST_NET_UNREACH_ADMIN:
2467 2467 case ICMP_DEST_HOST_UNREACH_ADMIN:
2468 2468 code = (inner4 != NULL ? ICMP_DEST_NET_UNREACH_ADMIN :
2469 2469 ICMP6_DST_UNREACH_ADMIN);
2470 2470 break;
2471 2471 default:
2472 2472 code = (inner4 != NULL ? ICMP_HOST_UNREACHABLE :
2473 2473 ICMP6_DST_UNREACH_ADDR);
2474 2474 break;
2475 2475 }
2476 2476 break;
2477 2477 case ICMP_TIME_EXCEEDED:
2478 2478 if (inner6 != NULL) {
2479 2479 type = ICMP6_TIME_EXCEEDED;
2480 2480 code = 0;
2481 2481 } /* else we're already set. */
2482 2482 break;
2483 2483 case ICMP_PARAM_PROBLEM:
2484 2484 /*
2485 2485 * This is a problem with the outer header we transmitted.
2486 2486 * Treat this as an output error.
2487 2487 */
2488 2488 iptun_drop_pkt(data_mp, &iptun->iptun_oerrors);
2489 2489 return;
2490 2490 default:
2491 2491 iptun_drop_pkt(data_mp, &iptun->iptun_norcvbuf);
2492 2492 return;
2493 2493 }
2494 2494
2495 2495 if (inner4 != NULL) {
2496 2496 iptun_icmp_error_v4(iptun, inner4, data_mp, type, code,
2497 2497 ira->ira_tsl);
2498 2498 } else {
2499 2499 iptun_icmp_error_v6(iptun, inner6, data_mp, type, code, 0,
2500 2500 ira->ira_tsl);
2501 2501 }
2502 2502 }
2503 2503
2504 2504 /*
2505 2505 * Return B_TRUE if the IPv6 packet pointed to by ip6h contains a Tunnel
2506 2506 * Encapsulation Limit destination option. If there is one, set encaplim_ptr
2507 2507 * to point to the option value.
2508 2508 */
2509 2509 static boolean_t
2510 2510 iptun_find_encaplimit(mblk_t *mp, ip6_t *ip6h, uint8_t **encaplim_ptr)
2511 2511 {
2512 2512 ip_pkt_t pkt;
2513 2513 uint8_t *endptr;
2514 2514 ip6_dest_t *destp;
2515 2515 struct ip6_opt *optp;
2516 2516
2517 2517 pkt.ipp_fields = 0; /* must be initialized */
2518 2518 (void) ip_find_hdr_v6(mp, ip6h, B_FALSE, &pkt, NULL);
2519 2519 if ((pkt.ipp_fields & IPPF_DSTOPTS) != 0) {
2520 2520 destp = pkt.ipp_dstopts;
2521 2521 } else if ((pkt.ipp_fields & IPPF_RTHDRDSTOPTS) != 0) {
2522 2522 destp = pkt.ipp_rthdrdstopts;
2523 2523 } else {
2524 2524 return (B_FALSE);
2525 2525 }
2526 2526
2527 2527 endptr = (uint8_t *)destp + 8 * (destp->ip6d_len + 1);
2528 2528 optp = (struct ip6_opt *)(destp + 1);
2529 2529 while (endptr - (uint8_t *)optp > sizeof (*optp)) {
2530 2530 if (optp->ip6o_type == IP6OPT_TUNNEL_LIMIT) {
2531 2531 if ((uint8_t *)(optp + 1) >= endptr)
2532 2532 return (B_FALSE);
2533 2533 *encaplim_ptr = (uint8_t *)&optp[1];
2534 2534 return (B_TRUE);
2535 2535 }
2536 2536 optp = (struct ip6_opt *)((uint8_t *)optp + optp->ip6o_len + 2);
2537 2537 }
2538 2538 return (B_FALSE);
2539 2539 }
2540 2540
2541 2541 /*
2542 2542 * Received ICMPv6 error in response to an X over IPv6 packet that we
2543 2543 * transmitted.
2544 2544 *
2545 2545 * NOTE: "outer" refers to what's inside the ICMP payload. We will get one of
2546 2546 * the following:
2547 2547 *
2548 2548 * [IPv6(0)][ICMPv6][IPv6(1)][IPv4][ULP]
2549 2549 *
2550 2550 * or
2551 2551 *
2552 2552 * [IPv6(0)][ICMPv6][IPv6(1)][IPv6(2)][ULP]
2553 2553 *
2554 2554 * And "outer6" will get set to IPv6(1), and inner[46] will correspond to
2555 2555 * whatever the very-inner packet is (IPv4 or IPv6(2)).
2556 2556 */
2557 2557 static void
2558 2558 iptun_input_icmp_v6(iptun_t *iptun, mblk_t *data_mp, icmp6_t *icmp6h,
2559 2559 ip_recv_attr_t *ira)
2560 2560 {
2561 2561 uint8_t *orig;
2562 2562 ipha_t *outer4, *inner4;
2563 2563 ip6_t *outer6, *inner6;
2564 2564 int outer_hlen;
2565 2565 uint8_t type, code;
2566 2566
2567 2567 ASSERT(data_mp->b_cont == NULL);
2568 2568
2569 2569 /*
2570 2570 * Temporarily move b_rptr forward so that iptun_find_headers() can
2571 2571 * find IP headers in the ICMP packet payload.
2572 2572 */
2573 2573 orig = data_mp->b_rptr;
2574 2574 data_mp->b_rptr = (uint8_t *)(icmp6h + 1);
2575 2575 /*
2576 2576 * The ip module ensures that ICMP errors contain at least the
2577 2577 * original IP header (otherwise, the error would never have made it
2578 2578 * here).
2579 2579 */
2580 2580 ASSERT(MBLKL(data_mp) >= 0);
2581 2581 outer_hlen = iptun_find_headers(data_mp, 0, &outer4, &inner4, &outer6,
2582 2582 &inner6);
2583 2583 ASSERT(outer4 == NULL);
2584 2584 data_mp->b_rptr = orig; /* Restore r_ptr */
2585 2585 if (outer_hlen == 0) {
2586 2586 iptun_drop_pkt(data_mp, &iptun->iptun_ierrors);
2587 2587 return;
2588 2588 }
2589 2589
2590 2590 data_mp = ipsec_tun_inbound(ira, data_mp, iptun->iptun_itp,
2591 2591 inner4, inner6, outer4, outer6, -outer_hlen, iptun->iptun_ns);
2592 2592 if (data_mp == NULL) {
2593 2593 /* Callee did all of the freeing. */
2594 2594 atomic_inc_64(&iptun->iptun_ierrors);
2595 2595 return;
2596 2596 }
2597 2597 /* We should never see reassembled fragment here. */
2598 2598 ASSERT(data_mp->b_next == NULL);
2599 2599
2600 2600 data_mp->b_rptr = (uint8_t *)outer6 + outer_hlen;
2601 2601
2602 2602 /*
2603 2603 * If the original packet being transmitted was itself an ICMP error,
2604 2604 * then drop this packet. We don't want to generate an ICMP error in
2605 2605 * response to an ICMP error.
2606 2606 */
2607 2607 if (is_icmp_error(data_mp, inner4, inner6)) {
2608 2608 iptun_drop_pkt(data_mp, &iptun->iptun_norcvbuf);
2609 2609 return;
2610 2610 }
2611 2611
2612 2612 switch (icmp6h->icmp6_type) {
2613 2613 case ICMP6_PARAM_PROB: {
2614 2614 uint8_t *encaplim_ptr;
2615 2615
2616 2616 /*
2617 2617 * If the ICMPv6 error points to a valid Tunnel Encapsulation
2618 2618 * Limit option and the limit value is 0, then fall through
2619 2619 * and send a host unreachable message. Otherwise, treat the
2620 2620 * error as an output error, as there must have been a problem
2621 2621 * with a packet we sent.
2622 2622 */
2623 2623 if (!iptun_find_encaplimit(data_mp, outer6, &encaplim_ptr) ||
2624 2624 (icmp6h->icmp6_pptr !=
2625 2625 ((ptrdiff_t)encaplim_ptr - (ptrdiff_t)outer6)) ||
2626 2626 *encaplim_ptr != 0) {
2627 2627 iptun_drop_pkt(data_mp, &iptun->iptun_oerrors);
2628 2628 return;
2629 2629 }
2630 2630 /* FALLTHRU */
2631 2631 }
2632 2632 case ICMP6_TIME_EXCEEDED:
2633 2633 case ICMP6_DST_UNREACH:
2634 2634 type = (inner4 != NULL ? ICMP_DEST_UNREACHABLE :
2635 2635 ICMP6_DST_UNREACH);
2636 2636 code = (inner4 != NULL ? ICMP_HOST_UNREACHABLE :
2637 2637 ICMP6_DST_UNREACH_ADDR);
2638 2638 break;
2639 2639 case ICMP6_PACKET_TOO_BIG: {
2640 2640 uint32_t newmtu;
2641 2641
2642 2642 /*
2643 2643 * We reconcile this with the fact that the tunnel may also
2644 2644 * have IPsec policy by letting iptun_update_mtu take care of
2645 2645 * it.
2646 2646 */
2647 2647 newmtu = iptun_update_mtu(iptun, NULL,
2648 2648 ntohl(icmp6h->icmp6_mtu));
2649 2649
2650 2650 if (inner4 != NULL) {
2651 2651 iptun_icmp_fragneeded_v4(iptun, newmtu, inner4,
2652 2652 data_mp, ira->ira_tsl);
2653 2653 } else {
2654 2654 iptun_icmp_toobig_v6(iptun, newmtu, inner6, data_mp,
2655 2655 ira->ira_tsl);
2656 2656 }
2657 2657 return;
2658 2658 }
2659 2659 default:
2660 2660 iptun_drop_pkt(data_mp, &iptun->iptun_norcvbuf);
2661 2661 return;
2662 2662 }
2663 2663
2664 2664 if (inner4 != NULL) {
2665 2665 iptun_icmp_error_v4(iptun, inner4, data_mp, type, code,
2666 2666 ira->ira_tsl);
2667 2667 } else {
2668 2668 iptun_icmp_error_v6(iptun, inner6, data_mp, type, code, 0,
2669 2669 ira->ira_tsl);
2670 2670 }
2671 2671 }
2672 2672
2673 2673 /*
2674 2674 * Called as conn_recvicmp from IP for ICMP errors.
2675 2675 */
2676 2676 /* ARGSUSED2 */
2677 2677 static void
2678 2678 iptun_input_icmp(void *arg, mblk_t *mp, void *arg2, ip_recv_attr_t *ira)
2679 2679 {
2680 2680 conn_t *connp = arg;
2681 2681 iptun_t *iptun = connp->conn_iptun;
2682 2682 mblk_t *tmpmp;
2683 2683 size_t hlen;
2684 2684
2685 2685 ASSERT(IPCL_IS_IPTUN(connp));
2686 2686
2687 2687 if (mp->b_cont != NULL) {
2688 2688 /*
2689 2689 * Since ICMP error processing necessitates access to bits
2690 2690 * that are within the ICMP error payload (the original packet
2691 2691 * that caused the error), pull everything up into a single
2692 2692 * block for convenience.
2693 2693 */
2694 2694 if ((tmpmp = msgpullup(mp, -1)) == NULL) {
2695 2695 iptun_drop_pkt(mp, &iptun->iptun_norcvbuf);
2696 2696 return;
2697 2697 }
2698 2698 freemsg(mp);
2699 2699 mp = tmpmp;
2700 2700 }
2701 2701
2702 2702 hlen = ira->ira_ip_hdr_length;
2703 2703 switch (iptun->iptun_typeinfo->iti_ipvers) {
2704 2704 case IPV4_VERSION:
2705 2705 /*
2706 2706 * The outer IP header coming up from IP is always ipha_t
2707 2707 * alligned (otherwise, we would have crashed in ip).
2708 2708 */
2709 2709 iptun_input_icmp_v4(iptun, mp, (icmph_t *)(mp->b_rptr + hlen),
2710 2710 ira);
2711 2711 break;
2712 2712 case IPV6_VERSION:
2713 2713 iptun_input_icmp_v6(iptun, mp, (icmp6_t *)(mp->b_rptr + hlen),
2714 2714 ira);
2715 2715 break;
2716 2716 }
2717 2717 }
2718 2718
2719 2719 static boolean_t
2720 2720 iptun_in_6to4_ok(iptun_t *iptun, ipha_t *outer4, ip6_t *inner6)
2721 2721 {
2722 2722 ipaddr_t v4addr;
2723 2723
2724 2724 /*
2725 2725 * It's possible that someone sent us an IPv4-in-IPv4 packet with the
2726 2726 * IPv4 address of a 6to4 tunnel as the destination.
2727 2727 */
2728 2728 if (inner6 == NULL)
2729 2729 return (B_FALSE);
2730 2730
2731 2731 /*
2732 2732 * Make sure that the IPv6 destination is within the site that this
2733 2733 * 6to4 tunnel is routing for. We don't want people bouncing random
2734 2734 * tunneled IPv6 packets through this 6to4 router.
2735 2735 */
2736 2736 IN6_6TO4_TO_V4ADDR(&inner6->ip6_dst, (struct in_addr *)&v4addr);
2737 2737 if (outer4->ipha_dst != v4addr)
2738 2738 return (B_FALSE);
2739 2739
2740 2740 if (IN6_IS_ADDR_6TO4(&inner6->ip6_src)) {
2741 2741 /*
2742 2742 * Section 9 of RFC 3056 (security considerations) suggests
2743 2743 * that when a packet is from a 6to4 site (i.e., it's not a
2744 2744 * global address being forwarded froma relay router), make
2745 2745 * sure that the packet was tunneled by that site's 6to4
2746 2746 * router.
2747 2747 */
2748 2748 IN6_6TO4_TO_V4ADDR(&inner6->ip6_src, (struct in_addr *)&v4addr);
2749 2749 if (outer4->ipha_src != v4addr)
2750 2750 return (B_FALSE);
2751 2751 } else {
2752 2752 /*
2753 2753 * Only accept packets from a relay router if we've configured
2754 2754 * outbound relay router functionality.
2755 2755 */
2756 2756 if (iptun->iptun_iptuns->iptuns_relay_rtr_addr == INADDR_ANY)
2757 2757 return (B_FALSE);
2758 2758 }
2759 2759
2760 2760 return (B_TRUE);
2761 2761 }
2762 2762
2763 2763 /*
2764 2764 * Input function for everything that comes up from the ip module below us.
2765 2765 * This is called directly from the ip module via connp->conn_recv().
2766 2766 *
2767 2767 * We receive M_DATA messages with IP-in-IP tunneled packets.
2768 2768 */
2769 2769 /* ARGSUSED2 */
2770 2770 static void
2771 2771 iptun_input(void *arg, mblk_t *data_mp, void *arg2, ip_recv_attr_t *ira)
2772 2772 {
2773 2773 conn_t *connp = arg;
2774 2774 iptun_t *iptun = connp->conn_iptun;
2775 2775 int outer_hlen;
2776 2776 ipha_t *outer4, *inner4;
2777 2777 ip6_t *outer6, *inner6;
2778 2778
2779 2779 ASSERT(IPCL_IS_IPTUN(connp));
2780 2780 ASSERT(DB_TYPE(data_mp) == M_DATA);
2781 2781
2782 2782 outer_hlen = iptun_find_headers(data_mp, ira->ira_ip_hdr_length,
2783 2783 &outer4, &inner4, &outer6, &inner6);
2784 2784 if (outer_hlen == 0)
2785 2785 goto drop;
2786 2786
2787 2787 /*
2788 2788 * If the system is labeled, we call tsol_check_dest() on the packet
2789 2789 * destination (our local tunnel address) to ensure that the packet as
2790 2790 * labeled should be allowed to be sent to us. We don't need to call
2791 2791 * the more involved tsol_receive_local() since the tunnel link itself
2792 2792 * cannot be assigned to shared-stack non-global zones.
2793 2793 */
2794 2794 if (ira->ira_flags & IRAF_SYSTEM_LABELED) {
2795 2795 if (ira->ira_tsl == NULL)
2796 2796 goto drop;
2797 2797 if (tsol_check_dest(ira->ira_tsl, (outer4 != NULL ?
2798 2798 (void *)&outer4->ipha_dst : (void *)&outer6->ip6_dst),
2799 2799 (outer4 != NULL ? IPV4_VERSION : IPV6_VERSION),
2800 2800 CONN_MAC_DEFAULT, B_FALSE, NULL) != 0)
2801 2801 goto drop;
2802 2802 }
2803 2803
2804 2804 data_mp = ipsec_tun_inbound(ira, data_mp, iptun->iptun_itp,
2805 2805 inner4, inner6, outer4, outer6, outer_hlen, iptun->iptun_ns);
2806 2806 if (data_mp == NULL) {
2807 2807 /* Callee did all of the freeing. */
2808 2808 return;
2809 2809 }
2810 2810
2811 2811 if (iptun->iptun_typeinfo->iti_type == IPTUN_TYPE_6TO4 &&
2812 2812 !iptun_in_6to4_ok(iptun, outer4, inner6))
2813 2813 goto drop;
2814 2814
2815 2815 /*
2816 2816 * We need to statistically account for each packet individually, so
2817 2817 * we might as well split up any b_next chains here.
2818 2818 */
2819 2819 do {
2820 2820 mblk_t *mp;
2821 2821
2822 2822 mp = data_mp->b_next;
2823 2823 data_mp->b_next = NULL;
2824 2824
2825 2825 atomic_inc_64(&iptun->iptun_ipackets);
2826 2826 atomic_add_64(&iptun->iptun_rbytes, msgdsize(data_mp));
2827 2827 mac_rx(iptun->iptun_mh, NULL, data_mp);
2828 2828
2829 2829 data_mp = mp;
2830 2830 } while (data_mp != NULL);
2831 2831 return;
2832 2832 drop:
2833 2833 iptun_drop_pkt(data_mp, &iptun->iptun_ierrors);
2834 2834 }
2835 2835
2836 2836 /*
2837 2837 * Do 6to4-specific header-processing on output. Return B_TRUE if the packet
2838 2838 * was processed without issue, or B_FALSE if the packet had issues and should
2839 2839 * be dropped.
2840 2840 */
2841 2841 static boolean_t
2842 2842 iptun_out_process_6to4(iptun_t *iptun, ipha_t *outer4, ip6_t *inner6)
2843 2843 {
2844 2844 ipaddr_t v4addr;
2845 2845
2846 2846 /*
2847 2847 * IPv6 source must be a 6to4 address. This is because a conscious
2848 2848 * decision was made to not allow a Solaris system to be used as a
2849 2849 * relay router (for security reasons) when 6to4 was initially
2850 2850 * integrated. If this decision is ever reversed, the following check
2851 2851 * can be removed.
2852 2852 */
2853 2853 if (!IN6_IS_ADDR_6TO4(&inner6->ip6_src))
2854 2854 return (B_FALSE);
2855 2855
2856 2856 /*
2857 2857 * RFC3056 mandates that the IPv4 source MUST be set to the IPv4
2858 2858 * portion of the 6to4 IPv6 source address. In other words, make sure
2859 2859 * that we're tunneling packets from our own 6to4 site.
2860 2860 */
2861 2861 IN6_6TO4_TO_V4ADDR(&inner6->ip6_src, (struct in_addr *)&v4addr);
2862 2862 if (outer4->ipha_src != v4addr)
2863 2863 return (B_FALSE);
2864 2864
2865 2865 /*
2866 2866 * Automatically set the destination of the outer IPv4 header as
2867 2867 * described in RFC3056. There are two possibilities:
2868 2868 *
2869 2869 * a. If the IPv6 destination is a 6to4 address, set the IPv4 address
2870 2870 * to the IPv4 portion of the 6to4 address.
2871 2871 * b. If the IPv6 destination is a native IPv6 address, set the IPv4
2872 2872 * destination to the address of a relay router.
2873 2873 *
2874 2874 * Design Note: b shouldn't be necessary here, and this is a flaw in
2875 2875 * the design of the 6to4relay command. Instead of setting a 6to4
2876 2876 * relay address in this module via an ioctl, the 6to4relay command
2877 2877 * could simply add a IPv6 route for native IPv6 addresses (such as a
2878 2878 * default route) in the forwarding table that uses a 6to4 destination
2879 2879 * as its next hop, and the IPv4 portion of that address could be a
2880 2880 * 6to4 relay address. In order for this to work, IP would have to
2881 2881 * resolve the next hop address, which would necessitate a link-layer
2882 2882 * address resolver for 6to4 links, which doesn't exist today.
2883 2883 *
2884 2884 * In fact, if a resolver existed for 6to4 links, then setting the
2885 2885 * IPv4 destination in the outer header could be done as part of
2886 2886 * link-layer address resolution and fast-path header generation, and
2887 2887 * not here.
2888 2888 */
2889 2889 if (IN6_IS_ADDR_6TO4(&inner6->ip6_dst)) {
2890 2890 /* destination is a 6to4 router */
2891 2891 IN6_6TO4_TO_V4ADDR(&inner6->ip6_dst,
2892 2892 (struct in_addr *)&outer4->ipha_dst);
2893 2893
2894 2894 /* Reject attempts to send to INADDR_ANY */
2895 2895 if (outer4->ipha_dst == INADDR_ANY)
2896 2896 return (B_FALSE);
2897 2897 } else {
2898 2898 /*
2899 2899 * The destination is a native IPv6 address. If output to a
2900 2900 * relay-router is enabled, use the relay-router's IPv4
2901 2901 * address as the destination.
2902 2902 */
2903 2903 if (iptun->iptun_iptuns->iptuns_relay_rtr_addr == INADDR_ANY)
2904 2904 return (B_FALSE);
2905 2905 outer4->ipha_dst = iptun->iptun_iptuns->iptuns_relay_rtr_addr;
2906 2906 }
2907 2907
2908 2908 /*
2909 2909 * If the outer source and destination are equal, this means that the
2910 2910 * 6to4 router somehow forwarded an IPv6 packet destined for its own
2911 2911 * 6to4 site to its 6to4 tunnel interface, which will result in this
2912 2912 * packet infinitely bouncing between ip and iptun.
2913 2913 */
2914 2914 return (outer4->ipha_src != outer4->ipha_dst);
2915 2915 }
2916 2916
2917 2917 /*
2918 2918 * Process output packets with outer IPv4 headers. Frees mp and bumps stat on
2919 2919 * error.
2920 2920 */
2921 2921 static mblk_t *
2922 2922 iptun_out_process_ipv4(iptun_t *iptun, mblk_t *mp, ipha_t *outer4,
2923 2923 ipha_t *inner4, ip6_t *inner6, ip_xmit_attr_t *ixa)
2924 2924 {
2925 2925 uint8_t *innerptr = (inner4 != NULL ?
2926 2926 (uint8_t *)inner4 : (uint8_t *)inner6);
2927 2927 size_t minmtu = iptun->iptun_typeinfo->iti_minmtu;
2928 2928
2929 2929 if (inner4 != NULL) {
2930 2930 ASSERT(outer4->ipha_protocol == IPPROTO_ENCAP);
2931 2931 /*
2932 2932 * Copy the tos from the inner IPv4 header. We mask off ECN
2933 2933 * bits (bits 6 and 7) because there is currently no
2934 2934 * tunnel-tunnel communication to determine if both sides
2935 2935 * support ECN. We opt for the safe choice: don't copy the
2936 2936 * ECN bits when doing encapsulation.
2937 2937 */
2938 2938 outer4->ipha_type_of_service =
2939 2939 inner4->ipha_type_of_service & ~0x03;
2940 2940 } else {
2941 2941 ASSERT(outer4->ipha_protocol == IPPROTO_IPV6 &&
2942 2942 inner6 != NULL);
2943 2943 }
2944 2944 if (ixa->ixa_flags & IXAF_PMTU_IPV4_DF)
2945 2945 outer4->ipha_fragment_offset_and_flags |= IPH_DF_HTONS;
2946 2946 else
2947 2947 outer4->ipha_fragment_offset_and_flags &= ~IPH_DF_HTONS;
2948 2948
2949 2949 /*
2950 2950 * As described in section 3.2.2 of RFC4213, if the packet payload is
2951 2951 * less than or equal to the minimum MTU size, then we need to allow
2952 2952 * IPv4 to fragment the packet. The reason is that even if we end up
2953 2953 * receiving an ICMP frag-needed, the interface above this tunnel
2954 2954 * won't be allowed to drop its MTU as a result, since the packet was
2955 2955 * already smaller than the smallest allowable MTU for that interface.
2956 2956 */
2957 2957 if (mp->b_wptr - innerptr <= minmtu) {
2958 2958 outer4->ipha_fragment_offset_and_flags = 0;
2959 2959 ixa->ixa_flags &= ~IXAF_DONTFRAG;
2960 2960 } else if (!(ixa->ixa_flags & IXAF_PMTU_TOO_SMALL) &&
2961 2961 (iptun->iptun_typeinfo->iti_type != IPTUN_TYPE_6TO4)) {
2962 2962 ixa->ixa_flags |= IXAF_DONTFRAG;
2963 2963 }
2964 2964
2965 2965 ixa->ixa_ip_hdr_length = IPH_HDR_LENGTH(outer4);
2966 2966 ixa->ixa_pktlen = msgdsize(mp);
2967 2967 ixa->ixa_protocol = outer4->ipha_protocol;
2968 2968
2969 2969 outer4->ipha_length = htons(ixa->ixa_pktlen);
2970 2970 return (mp);
2971 2971 }
2972 2972
2973 2973 /*
2974 2974 * Insert an encapsulation limit destination option in the packet provided.
2975 2975 * Always consumes the mp argument and returns a new mblk pointer.
2976 2976 */
2977 2977 static mblk_t *
2978 2978 iptun_insert_encaplimit(iptun_t *iptun, mblk_t *mp, ip6_t *outer6,
2979 2979 uint8_t limit)
2980 2980 {
2981 2981 mblk_t *newmp;
2982 2982 iptun_ipv6hdrs_t *newouter6;
2983 2983
2984 2984 ASSERT(outer6->ip6_nxt == IPPROTO_IPV6);
2985 2985 ASSERT(mp->b_cont == NULL);
2986 2986
2987 2987 mp->b_rptr += sizeof (ip6_t);
2988 2988 newmp = allocb(sizeof (iptun_ipv6hdrs_t) + MBLKL(mp), BPRI_MED);
2989 2989 if (newmp == NULL) {
2990 2990 iptun_drop_pkt(mp, &iptun->iptun_noxmtbuf);
2991 2991 return (NULL);
2992 2992 }
2993 2993 newmp->b_wptr += sizeof (iptun_ipv6hdrs_t);
2994 2994 /* Copy the payload (Starting with the inner IPv6 header). */
2995 2995 bcopy(mp->b_rptr, newmp->b_wptr, MBLKL(mp));
2996 2996 newmp->b_wptr += MBLKL(mp);
2997 2997 newouter6 = (iptun_ipv6hdrs_t *)newmp->b_rptr;
2998 2998 /* Now copy the outer IPv6 header. */
2999 2999 bcopy(outer6, &newouter6->it6h_ip6h, sizeof (ip6_t));
3000 3000 newouter6->it6h_ip6h.ip6_nxt = IPPROTO_DSTOPTS;
3001 3001 newouter6->it6h_encaplim = iptun_encaplim_init;
3002 3002 newouter6->it6h_encaplim.iel_destopt.ip6d_nxt = outer6->ip6_nxt;
3003 3003 newouter6->it6h_encaplim.iel_telopt.ip6ot_encap_limit = limit;
3004 3004
3005 3005 /*
3006 3006 * The payload length will be set at the end of
3007 3007 * iptun_out_process_ipv6().
3008 3008 */
3009 3009
3010 3010 freemsg(mp);
3011 3011 return (newmp);
3012 3012 }
3013 3013
3014 3014 /*
3015 3015 * Process output packets with outer IPv6 headers. Frees mp and bumps stats
3016 3016 * on error.
3017 3017 */
3018 3018 static mblk_t *
3019 3019 iptun_out_process_ipv6(iptun_t *iptun, mblk_t *mp, ip6_t *outer6,
3020 3020 ipha_t *inner4, ip6_t *inner6, ip_xmit_attr_t *ixa)
3021 3021 {
3022 3022 uint8_t *innerptr = (inner4 != NULL ?
3023 3023 (uint8_t *)inner4 : (uint8_t *)inner6);
3024 3024 size_t minmtu = iptun->iptun_typeinfo->iti_minmtu;
3025 3025 uint8_t *limit, *configlimit;
3026 3026 uint32_t offset;
3027 3027 iptun_ipv6hdrs_t *v6hdrs;
3028 3028
3029 3029 if (inner6 != NULL && iptun_find_encaplimit(mp, inner6, &limit)) {
3030 3030 /*
3031 3031 * The inner packet is an IPv6 packet which itself contains an
3032 3032 * encapsulation limit option. The limit variable points to
3033 3033 * the value in the embedded option. Process the
3034 3034 * encapsulation limit option as specified in RFC 2473.
3035 3035 *
3036 3036 * If limit is 0, then we've exceeded the limit and we need to
3037 3037 * send back an ICMPv6 parameter problem message.
3038 3038 *
3039 3039 * If limit is > 0, then we decrement it by 1 and make sure
3040 3040 * that the encapsulation limit option in the outer header
3041 3041 * reflects that (adding an option if one isn't already
3042 3042 * there).
3043 3043 */
3044 3044 ASSERT(limit > mp->b_rptr && limit < mp->b_wptr);
3045 3045 if (*limit == 0) {
3046 3046 mp->b_rptr = (uint8_t *)inner6;
3047 3047 offset = limit - mp->b_rptr;
3048 3048 iptun_icmp_error_v6(iptun, inner6, mp, ICMP6_PARAM_PROB,
3049 3049 0, offset, ixa->ixa_tsl);
3050 3050 atomic_inc_64(&iptun->iptun_noxmtbuf);
3051 3051 return (NULL);
3052 3052 }
3053 3053
3054 3054 /*
3055 3055 * The outer header requires an encapsulation limit option.
3056 3056 * If there isn't one already, add one.
3057 3057 */
3058 3058 if (iptun->iptun_encaplimit == 0) {
3059 3059 if ((mp = iptun_insert_encaplimit(iptun, mp, outer6,
3060 3060 (*limit - 1))) == NULL)
3061 3061 return (NULL);
3062 3062 v6hdrs = (iptun_ipv6hdrs_t *)mp->b_rptr;
3063 3063 } else {
3064 3064 /*
3065 3065 * There is an existing encapsulation limit option in
3066 3066 * the outer header. If the inner encapsulation limit
3067 3067 * is less than the configured encapsulation limit,
3068 3068 * update the outer encapsulation limit to reflect
3069 3069 * this lesser value.
3070 3070 */
3071 3071 v6hdrs = (iptun_ipv6hdrs_t *)mp->b_rptr;
3072 3072 configlimit =
3073 3073 &v6hdrs->it6h_encaplim.iel_telopt.ip6ot_encap_limit;
3074 3074 if ((*limit - 1) < *configlimit)
3075 3075 *configlimit = (*limit - 1);
3076 3076 }
3077 3077 ixa->ixa_ip_hdr_length = sizeof (iptun_ipv6hdrs_t);
3078 3078 ixa->ixa_protocol = v6hdrs->it6h_encaplim.iel_destopt.ip6d_nxt;
3079 3079 } else {
3080 3080 ixa->ixa_ip_hdr_length = sizeof (ip6_t);
3081 3081 ixa->ixa_protocol = outer6->ip6_nxt;
3082 3082 }
3083 3083 /*
3084 3084 * See iptun_output_process_ipv4() why we allow fragmentation for
3085 3085 * small packets
3086 3086 */
3087 3087 if (mp->b_wptr - innerptr <= minmtu)
3088 3088 ixa->ixa_flags &= ~IXAF_DONTFRAG;
3089 3089 else if (!(ixa->ixa_flags & IXAF_PMTU_TOO_SMALL))
3090 3090 ixa->ixa_flags |= IXAF_DONTFRAG;
3091 3091
3092 3092 ixa->ixa_pktlen = msgdsize(mp);
3093 3093 outer6->ip6_plen = htons(ixa->ixa_pktlen - sizeof (ip6_t));
3094 3094 return (mp);
3095 3095 }
3096 3096
3097 3097 /*
3098 3098 * The IP tunneling MAC-type plugins have already done most of the header
3099 3099 * processing and validity checks. We are simply responsible for multiplexing
3100 3100 * down to the ip module below us.
3101 3101 */
3102 3102 static void
3103 3103 iptun_output(iptun_t *iptun, mblk_t *mp)
3104 3104 {
3105 3105 conn_t *connp = iptun->iptun_connp;
3106 3106 mblk_t *newmp;
3107 3107 int error;
3108 3108 ip_xmit_attr_t *ixa;
3109 3109
3110 3110 ASSERT(mp->b_datap->db_type == M_DATA);
3111 3111
3112 3112 if (mp->b_cont != NULL) {
3113 3113 if ((newmp = msgpullup(mp, -1)) == NULL) {
3114 3114 iptun_drop_pkt(mp, &iptun->iptun_noxmtbuf);
3115 3115 return;
3116 3116 }
3117 3117 freemsg(mp);
3118 3118 mp = newmp;
3119 3119 }
3120 3120
3121 3121 if (iptun->iptun_typeinfo->iti_type == IPTUN_TYPE_6TO4) {
3122 3122 iptun_output_6to4(iptun, mp);
3123 3123 return;
3124 3124 }
3125 3125
3126 3126 if (is_system_labeled()) {
3127 3127 /*
3128 3128 * Since the label can be different meaning a potentially
3129 3129 * different IRE,we always use a unique ip_xmit_attr_t.
3130 3130 */
3131 3131 ixa = conn_get_ixa_exclusive(connp);
3132 3132 } else {
3133 3133 /*
3134 3134 * If no other thread is using conn_ixa this just gets a
3135 3135 * reference to conn_ixa. Otherwise we get a safe copy of
3136 3136 * conn_ixa.
3137 3137 */
3138 3138 ixa = conn_get_ixa(connp, B_FALSE);
3139 3139 }
3140 3140 if (ixa == NULL) {
3141 3141 iptun_drop_pkt(mp, &iptun->iptun_oerrors);
3142 3142 return;
3143 3143 }
3144 3144
3145 3145 /*
3146 3146 * In case we got a safe copy of conn_ixa, then we need
3147 3147 * to fill in any pointers in it.
3148 3148 */
3149 3149 if (ixa->ixa_ire == NULL) {
3150 3150 error = ip_attr_connect(connp, ixa, &connp->conn_saddr_v6,
3151 3151 &connp->conn_faddr_v6, &connp->conn_faddr_v6, 0,
3152 3152 NULL, NULL, 0);
3153 3153 if (error != 0) {
3154 3154 if (ixa->ixa_ire != NULL &&
3155 3155 (error == EHOSTUNREACH || error == ENETUNREACH)) {
3156 3156 /*
3157 3157 * Let conn_ip_output/ire_send_noroute return
3158 3158 * the error and send any local ICMP error.
3159 3159 */
3160 3160 error = 0;
3161 3161 } else {
3162 3162 ixa_refrele(ixa);
3163 3163 iptun_drop_pkt(mp, &iptun->iptun_oerrors);
3164 3164 return;
3165 3165 }
3166 3166 }
3167 3167 }
3168 3168
3169 3169 iptun_output_common(iptun, ixa, mp);
3170 3170 ixa_refrele(ixa);
3171 3171 }
3172 3172
3173 3173 /*
3174 3174 * We use an ixa based on the last destination.
3175 3175 */
3176 3176 static void
3177 3177 iptun_output_6to4(iptun_t *iptun, mblk_t *mp)
3178 3178 {
3179 3179 conn_t *connp = iptun->iptun_connp;
3180 3180 ipha_t *outer4, *inner4;
3181 3181 ip6_t *outer6, *inner6;
3182 3182 ip_xmit_attr_t *ixa;
3183 3183 ip_xmit_attr_t *oldixa;
3184 3184 int error;
3185 3185 boolean_t need_connect;
3186 3186 in6_addr_t v6dst;
3187 3187
3188 3188 ASSERT(mp->b_cont == NULL); /* Verified by iptun_output */
3189 3189
3190 3190 /* Make sure we set ipha_dst before we look at ipha_dst */
3191 3191
3192 3192 (void) iptun_find_headers(mp, 0, &outer4, &inner4, &outer6, &inner6);
3193 3193 ASSERT(outer4 != NULL);
3194 3194 if (!iptun_out_process_6to4(iptun, outer4, inner6)) {
3195 3195 iptun_drop_pkt(mp, &iptun->iptun_oerrors);
3196 3196 return;
3197 3197 }
3198 3198
3199 3199 if (is_system_labeled()) {
3200 3200 /*
3201 3201 * Since the label can be different meaning a potentially
3202 3202 * different IRE,we always use a unique ip_xmit_attr_t.
3203 3203 */
3204 3204 ixa = conn_get_ixa_exclusive(connp);
3205 3205 } else {
3206 3206 /*
3207 3207 * If no other thread is using conn_ixa this just gets a
3208 3208 * reference to conn_ixa. Otherwise we get a safe copy of
3209 3209 * conn_ixa.
3210 3210 */
3211 3211 ixa = conn_get_ixa(connp, B_FALSE);
3212 3212 }
3213 3213 if (ixa == NULL) {
3214 3214 iptun_drop_pkt(mp, &iptun->iptun_oerrors);
3215 3215 return;
3216 3216 }
3217 3217
3218 3218 mutex_enter(&connp->conn_lock);
3219 3219 if (connp->conn_v4lastdst == outer4->ipha_dst) {
3220 3220 need_connect = (ixa->ixa_ire == NULL);
3221 3221 } else {
3222 3222 /* In case previous destination was multirt */
3223 3223 ip_attr_newdst(ixa);
3224 3224
3225 3225 /*
3226 3226 * We later update conn_ixa when we update conn_v4lastdst
3227 3227 * which enables subsequent packets to avoid redoing
3228 3228 * ip_attr_connect
3229 3229 */
3230 3230 need_connect = B_TRUE;
3231 3231 }
3232 3232 mutex_exit(&connp->conn_lock);
3233 3233
3234 3234 /*
3235 3235 * In case we got a safe copy of conn_ixa, or otherwise we don't
3236 3236 * have a current ixa_ire, then we need to fill in any pointers in
3237 3237 * the ixa.
3238 3238 */
3239 3239 if (need_connect) {
3240 3240 IN6_IPADDR_TO_V4MAPPED(outer4->ipha_dst, &v6dst);
3241 3241
3242 3242 /* We handle IPsec in iptun_output_common */
3243 3243 error = ip_attr_connect(connp, ixa, &connp->conn_saddr_v6,
3244 3244 &v6dst, &v6dst, 0, NULL, NULL, 0);
3245 3245 if (error != 0) {
3246 3246 if (ixa->ixa_ire != NULL &&
3247 3247 (error == EHOSTUNREACH || error == ENETUNREACH)) {
3248 3248 /*
3249 3249 * Let conn_ip_output/ire_send_noroute return
3250 3250 * the error and send any local ICMP error.
3251 3251 */
3252 3252 error = 0;
3253 3253 } else {
3254 3254 ixa_refrele(ixa);
3255 3255 iptun_drop_pkt(mp, &iptun->iptun_oerrors);
3256 3256 return;
3257 3257 }
3258 3258 }
3259 3259 }
3260 3260
3261 3261 iptun_output_common(iptun, ixa, mp);
3262 3262
3263 3263 /* Atomically replace conn_ixa and conn_v4lastdst */
3264 3264 mutex_enter(&connp->conn_lock);
3265 3265 if (connp->conn_v4lastdst != outer4->ipha_dst) {
3266 3266 /* Remember the dst which corresponds to conn_ixa */
3267 3267 connp->conn_v6lastdst = v6dst;
3268 3268 oldixa = conn_replace_ixa(connp, ixa);
3269 3269 } else {
3270 3270 oldixa = NULL;
3271 3271 }
3272 3272 mutex_exit(&connp->conn_lock);
3273 3273 ixa_refrele(ixa);
3274 3274 if (oldixa != NULL)
3275 3275 ixa_refrele(oldixa);
3276 3276 }
3277 3277
3278 3278 /*
3279 3279 * Check the destination/label. Modifies *mpp by adding/removing CIPSO.
3280 3280 *
3281 3281 * We get the label from the message in order to honor the
3282 3282 * ULPs/IPs choice of label. This will be NULL for forwarded
3283 3283 * packets, neighbor discovery packets and some others.
3284 3284 */
3285 3285 static int
3286 3286 iptun_output_check_label(mblk_t **mpp, ip_xmit_attr_t *ixa)
3287 3287 {
3288 3288 cred_t *cr;
3289 3289 int adjust;
3290 3290 int iplen;
3291 3291 int err;
3292 3292 ts_label_t *effective_tsl = NULL;
3293 3293
3294 3294
3295 3295 ASSERT(is_system_labeled());
3296 3296
3297 3297 cr = msg_getcred(*mpp, NULL);
3298 3298 if (cr == NULL)
3299 3299 return (0);
3300 3300
3301 3301 /*
3302 3302 * We need to start with a label based on the IP/ULP above us
3303 3303 */
3304 3304 ip_xmit_attr_restore_tsl(ixa, cr);
3305 3305
3306 3306 /*
3307 3307 * Need to update packet with any CIPSO option since
3308 3308 * conn_ip_output doesn't do that.
3309 3309 */
3310 3310 if (ixa->ixa_flags & IXAF_IS_IPV4) {
3311 3311 ipha_t *ipha;
3312 3312
3313 3313 ipha = (ipha_t *)(*mpp)->b_rptr;
3314 3314 iplen = ntohs(ipha->ipha_length);
3315 3315 err = tsol_check_label_v4(ixa->ixa_tsl,
3316 3316 ixa->ixa_zoneid, mpp, CONN_MAC_DEFAULT, B_FALSE,
3317 3317 ixa->ixa_ipst, &effective_tsl);
3318 3318 if (err != 0)
3319 3319 return (err);
3320 3320
3321 3321 ipha = (ipha_t *)(*mpp)->b_rptr;
3322 3322 adjust = (int)ntohs(ipha->ipha_length) - iplen;
3323 3323 } else {
3324 3324 ip6_t *ip6h;
3325 3325
3326 3326 ip6h = (ip6_t *)(*mpp)->b_rptr;
3327 3327 iplen = ntohs(ip6h->ip6_plen);
3328 3328
3329 3329 err = tsol_check_label_v6(ixa->ixa_tsl,
3330 3330 ixa->ixa_zoneid, mpp, CONN_MAC_DEFAULT, B_FALSE,
3331 3331 ixa->ixa_ipst, &effective_tsl);
3332 3332 if (err != 0)
3333 3333 return (err);
3334 3334
3335 3335 ip6h = (ip6_t *)(*mpp)->b_rptr;
3336 3336 adjust = (int)ntohs(ip6h->ip6_plen) - iplen;
3337 3337 }
3338 3338
3339 3339 if (effective_tsl != NULL) {
3340 3340 /* Update the label */
3341 3341 ip_xmit_attr_replace_tsl(ixa, effective_tsl);
3342 3342 }
3343 3343 ixa->ixa_pktlen += adjust;
3344 3344 ixa->ixa_ip_hdr_length += adjust;
3345 3345 return (0);
3346 3346 }
3347 3347
3348 3348
3349 3349 static void
3350 3350 iptun_output_common(iptun_t *iptun, ip_xmit_attr_t *ixa, mblk_t *mp)
3351 3351 {
3352 3352 ipsec_tun_pol_t *itp = iptun->iptun_itp;
3353 3353 int outer_hlen;
3354 3354 mblk_t *newmp;
3355 3355 ipha_t *outer4, *inner4;
3356 3356 ip6_t *outer6, *inner6;
3357 3357 int error;
3358 3358 boolean_t update_pktlen;
3359 3359
3360 3360 ASSERT(ixa->ixa_ire != NULL);
3361 3361
3362 3362 outer_hlen = iptun_find_headers(mp, 0, &outer4, &inner4, &outer6,
3363 3363 &inner6);
3364 3364 if (outer_hlen == 0) {
3365 3365 iptun_drop_pkt(mp, &iptun->iptun_oerrors);
3366 3366 return;
3367 3367 }
3368 3368
3369 3369 /* Save IXAF_DONTFRAG value */
3370 3370 iaflags_t dontfrag = ixa->ixa_flags & IXAF_DONTFRAG;
3371 3371
3372 3372 /* Perform header processing. */
3373 3373 if (outer4 != NULL) {
3374 3374 mp = iptun_out_process_ipv4(iptun, mp, outer4, inner4, inner6,
3375 3375 ixa);
3376 3376 } else {
3377 3377 mp = iptun_out_process_ipv6(iptun, mp, outer6, inner4, inner6,
3378 3378 ixa);
3379 3379 }
3380 3380 if (mp == NULL)
3381 3381 return;
3382 3382
3383 3383 /*
3384 3384 * Let's hope the compiler optimizes this with "branch taken".
3385 3385 */
3386 3386 if (itp != NULL && (itp->itp_flags & ITPF_P_ACTIVE)) {
3387 3387 /* This updates the ip_xmit_attr_t */
3388 3388 mp = ipsec_tun_outbound(mp, iptun, inner4, inner6, outer4,
3389 3389 outer6, outer_hlen, ixa);
3390 3390 if (mp == NULL) {
3391 3391 atomic_inc_64(&iptun->iptun_oerrors);
3392 3392 return;
3393 3393 }
3394 3394 if (is_system_labeled()) {
3395 3395 /*
3396 3396 * Might change the packet by adding/removing CIPSO.
3397 3397 * After this caller inner* and outer* and outer_hlen
3398 3398 * might be invalid.
3399 3399 */
3400 3400 error = iptun_output_check_label(&mp, ixa);
3401 3401 if (error != 0) {
3402 3402 ip2dbg(("label check failed (%d)\n", error));
3403 3403 iptun_drop_pkt(mp, &iptun->iptun_oerrors);
3404 3404 return;
3405 3405 }
3406 3406 }
3407 3407
3408 3408 /*
3409 3409 * ipsec_tun_outbound() returns a chain of tunneled IP
3410 3410 * fragments linked with b_next (or a single message if the
3411 3411 * tunneled packet wasn't a fragment).
3412 3412 * If fragcache returned a list then we need to update
3413 3413 * ixa_pktlen for all packets in the list.
3414 3414 */
3415 3415 update_pktlen = (mp->b_next != NULL);
3416 3416
3417 3417 /*
3418 3418 * Otherwise, we're good to go. The ixa has been updated with
3419 3419 * instructions for outbound IPsec processing.
3420 3420 */
3421 3421 for (newmp = mp; newmp != NULL; newmp = mp) {
3422 3422 size_t minmtu = iptun->iptun_typeinfo->iti_minmtu;
3423 3423
3424 3424 atomic_inc_64(&iptun->iptun_opackets);
3425 3425 atomic_add_64(&iptun->iptun_obytes, ixa->ixa_pktlen);
3426 3426 mp = mp->b_next;
3427 3427 newmp->b_next = NULL;
3428 3428
3429 3429 /*
3430 3430 * The IXAF_DONTFRAG flag is global, but there is
3431 3431 * a chain here. Check if we're really already
3432 3432 * smaller than the minimum allowed MTU and reset here
3433 3433 * appropriately. Otherwise one small packet can kill
3434 3434 * the whole chain's path mtu discovery.
3435 3435 * In addition, update the pktlen to the length of
3436 3436 * the actual packet being processed.
3437 3437 */
3438 3438 if (update_pktlen) {
3439 3439 ixa->ixa_pktlen = msgdsize(newmp);
3440 3440 if (ixa->ixa_pktlen <= minmtu)
3441 3441 ixa->ixa_flags &= ~IXAF_DONTFRAG;
3442 3442 }
3443 3443
3444 3444 atomic_inc_64(&iptun->iptun_opackets);
3445 3445 atomic_add_64(&iptun->iptun_obytes, ixa->ixa_pktlen);
3446 3446
3447 3447 error = conn_ip_output(newmp, ixa);
3448 3448
3449 3449 /* Restore IXAF_DONTFRAG value */
3450 3450 ixa->ixa_flags |= dontfrag;
3451 3451
3452 3452 if (error == EMSGSIZE) {
3453 3453 /* IPsec policy might have changed */
3454 3454 (void) iptun_update_mtu(iptun, ixa, 0);
3455 3455 }
3456 3456 }
3457 3457 } else {
3458 3458 /*
3459 3459 * The ip module will potentially apply global policy to the
3460 3460 * packet in its output path if there's no active tunnel
3461 3461 * policy.
3462 3462 */
3463 3463 ASSERT(ixa->ixa_ipsec_policy == NULL);
3464 3464 mp = ip_output_attach_policy(mp, outer4, outer6, NULL, ixa);
3465 3465 if (mp == NULL) {
3466 3466 atomic_inc_64(&iptun->iptun_oerrors);
3467 3467 return;
3468 3468 }
3469 3469 if (is_system_labeled()) {
3470 3470 /*
3471 3471 * Might change the packet by adding/removing CIPSO.
3472 3472 * After this caller inner* and outer* and outer_hlen
3473 3473 * might be invalid.
3474 3474 */
3475 3475 error = iptun_output_check_label(&mp, ixa);
3476 3476 if (error != 0) {
3477 3477 ip2dbg(("label check failed (%d)\n", error));
3478 3478 iptun_drop_pkt(mp, &iptun->iptun_oerrors);
3479 3479 return;
3480 3480 }
3481 3481 }
3482 3482
3483 3483 atomic_inc_64(&iptun->iptun_opackets);
3484 3484 atomic_add_64(&iptun->iptun_obytes, ixa->ixa_pktlen);
3485 3485
3486 3486 error = conn_ip_output(mp, ixa);
3487 3487 if (error == EMSGSIZE) {
3488 3488 /* IPsec policy might have changed */
3489 3489 (void) iptun_update_mtu(iptun, ixa, 0);
3490 3490 }
3491 3491 }
3492 3492 if (ixa->ixa_flags & IXAF_IPSEC_SECURE)
3493 3493 ipsec_out_release_refs(ixa);
3494 3494 }
3495 3495
3496 3496 static mac_callbacks_t iptun_m_callbacks = {
3497 3497 .mc_callbacks = (MC_SETPROP | MC_GETPROP | MC_PROPINFO),
3498 3498 .mc_getstat = iptun_m_getstat,
3499 3499 .mc_start = iptun_m_start,
3500 3500 .mc_stop = iptun_m_stop,
3501 3501 .mc_setpromisc = iptun_m_setpromisc,
3502 3502 .mc_multicst = iptun_m_multicst,
3503 3503 .mc_unicst = iptun_m_unicst,
3504 3504 .mc_tx = iptun_m_tx,
3505 3505 .mc_reserved = NULL,
3506 3506 .mc_setprop = iptun_m_setprop,
3507 3507 .mc_getprop = iptun_m_getprop,
3508 3508 .mc_propinfo = iptun_m_propinfo
3509 3509 };
↓ open down ↓ |
3387 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX