1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #include <sys/types.h> 27 #include <sys/socket.h> 28 #include <sys/ksynch.h> 29 #include <sys/kmem.h> 30 #include <sys/errno.h> 31 #include <sys/systm.h> 32 #include <sys/sysmacros.h> 33 #include <sys/cmn_err.h> 34 #include <sys/strsun.h> 35 #include <sys/zone.h> 36 #include <netinet/in.h> 37 #include <inet/common.h> 38 #include <inet/ip.h> 39 #include <inet/ip6.h> 40 #include <inet/ip6_asp.h> 41 #include <inet/ip_ire.h> 42 #include <inet/ip_if.h> 43 #include <inet/ipclassifier.h> 44 45 #define IN6ADDR_MASK128_INIT \ 46 { 0xffffffffU, 0xffffffffU, 0xffffffffU, 0xffffffffU } 47 #define IN6ADDR_MASK96_INIT { 0xffffffffU, 0xffffffffU, 0xffffffffU, 0 } 48 #ifdef _BIG_ENDIAN 49 #define IN6ADDR_MASK16_INIT { 0xffff0000U, 0, 0, 0 } 50 #else 51 #define IN6ADDR_MASK16_INIT { 0x0000ffffU, 0, 0, 0 } 52 #endif 53 54 55 /* 56 * This table is ordered such that longest prefix matches are hit first 57 * (longer prefix lengths first). The last entry must be the "default" 58 * entry (::0/0). 59 */ 60 static ip6_asp_t default_ip6_asp_table[] = { 61 { IN6ADDR_LOOPBACK_INIT, IN6ADDR_MASK128_INIT, 62 "Loopback", 50 }, 63 { IN6ADDR_ANY_INIT, IN6ADDR_MASK96_INIT, 64 "IPv4_Compatible", 20 }, 65 #ifdef _BIG_ENDIAN 66 { { 0, 0, 0x0000ffffU, 0 }, IN6ADDR_MASK96_INIT, 67 "IPv4", 10 }, 68 { { 0x20020000U, 0, 0, 0 }, IN6ADDR_MASK16_INIT, 69 "6to4", 30 }, 70 #else 71 { { 0, 0, 0xffff0000U, 0 }, IN6ADDR_MASK96_INIT, 72 "IPv4", 10 }, 73 { { 0x00000220U, 0, 0, 0 }, IN6ADDR_MASK16_INIT, 74 "6to4", 30 }, 75 #endif 76 { IN6ADDR_ANY_INIT, IN6ADDR_ANY_INIT, 77 "Default", 40 } 78 }; 79 80 /* 81 * The IPv6 Default Address Selection policy table. 82 * Until someone up above reconfigures the policy table, use the global 83 * default. The table needs no lock since the only way to alter it is 84 * through the SIOCSIP6ADDRPOLICY which is exclusive in ip. 85 */ 86 static void ip6_asp_copy(ip6_asp_t *, ip6_asp_t *, uint_t); 87 static void ip6_asp_check_for_updates(ip_stack_t *); 88 89 void 90 ip6_asp_init(ip_stack_t *ipst) 91 { 92 /* Initialize the table lock */ 93 mutex_init(&ipst->ips_ip6_asp_lock, NULL, MUTEX_DEFAULT, NULL); 94 95 ipst->ips_ip6_asp_table = default_ip6_asp_table; 96 97 ipst->ips_ip6_asp_table_count = 98 sizeof (default_ip6_asp_table) / sizeof (ip6_asp_t); 99 } 100 101 void 102 ip6_asp_free(ip_stack_t *ipst) 103 { 104 if (ipst->ips_ip6_asp_table != default_ip6_asp_table) { 105 kmem_free(ipst->ips_ip6_asp_table, 106 ipst->ips_ip6_asp_table_count * sizeof (ip6_asp_t)); 107 ipst->ips_ip6_asp_table = NULL; 108 } 109 mutex_destroy(&ipst->ips_ip6_asp_lock); 110 } 111 112 /* 113 * Return false if the table is being updated. Else, increment the ref 114 * count and return true. 115 */ 116 boolean_t 117 ip6_asp_can_lookup(ip_stack_t *ipst) 118 { 119 mutex_enter(&ipst->ips_ip6_asp_lock); 120 if (ipst->ips_ip6_asp_uip) { 121 mutex_exit(&ipst->ips_ip6_asp_lock); 122 return (B_FALSE); 123 } 124 IP6_ASP_TABLE_REFHOLD(ipst); 125 mutex_exit(&ipst->ips_ip6_asp_lock); 126 return (B_TRUE); 127 128 } 129 130 void 131 ip6_asp_pending_op(queue_t *q, mblk_t *mp, aspfunc_t func) 132 { 133 conn_t *connp = Q_TO_CONN(q); 134 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 135 136 ASSERT((mp->b_prev == NULL) && (mp->b_queue == NULL) && 137 (mp->b_next == NULL)); 138 mp->b_queue = (void *)q; 139 mp->b_prev = (void *)func; 140 mp->b_next = NULL; 141 142 mutex_enter(&ipst->ips_ip6_asp_lock); 143 if (ipst->ips_ip6_asp_pending_ops == NULL) { 144 ASSERT(ipst->ips_ip6_asp_pending_ops_tail == NULL); 145 ipst->ips_ip6_asp_pending_ops = 146 ipst->ips_ip6_asp_pending_ops_tail = mp; 147 } else { 148 ipst->ips_ip6_asp_pending_ops_tail->b_next = mp; 149 ipst->ips_ip6_asp_pending_ops_tail = mp; 150 } 151 mutex_exit(&ipst->ips_ip6_asp_lock); 152 } 153 154 static void 155 ip6_asp_complete_op(ip_stack_t *ipst) 156 { 157 mblk_t *mp; 158 queue_t *q; 159 aspfunc_t func; 160 161 mutex_enter(&ipst->ips_ip6_asp_lock); 162 while (ipst->ips_ip6_asp_pending_ops != NULL) { 163 mp = ipst->ips_ip6_asp_pending_ops; 164 ipst->ips_ip6_asp_pending_ops = mp->b_next; 165 mp->b_next = NULL; 166 if (ipst->ips_ip6_asp_pending_ops == NULL) 167 ipst->ips_ip6_asp_pending_ops_tail = NULL; 168 mutex_exit(&ipst->ips_ip6_asp_lock); 169 170 q = (queue_t *)mp->b_queue; 171 func = (aspfunc_t)mp->b_prev; 172 173 mp->b_prev = NULL; 174 mp->b_queue = NULL; 175 176 177 (*func)(NULL, q, mp, NULL); 178 mutex_enter(&ipst->ips_ip6_asp_lock); 179 } 180 mutex_exit(&ipst->ips_ip6_asp_lock); 181 } 182 183 /* 184 * Decrement reference count. When it gets to 0, we check for (pending) 185 * saved update to the table, if any. 186 */ 187 void 188 ip6_asp_table_refrele(ip_stack_t *ipst) 189 { 190 IP6_ASP_TABLE_REFRELE(ipst); 191 } 192 193 /* 194 * This function is guaranteed never to return a NULL pointer. It 195 * will always return information from one of the entries in the 196 * asp_table (which will never be empty). If a pointer is passed 197 * in for the precedence, the precedence value will be set; a 198 * pointer to the label will be returned by the function. 199 * 200 * Since the table is only anticipated to have five or six entries 201 * total, the lookup algorithm hasn't been optimized to anything 202 * better than O(n). 203 */ 204 char * 205 ip6_asp_lookup(const in6_addr_t *addr, uint32_t *precedence, ip_stack_t *ipst) 206 { 207 ip6_asp_t *aspp; 208 ip6_asp_t *match = NULL; 209 ip6_asp_t *default_policy; 210 211 aspp = ipst->ips_ip6_asp_table; 212 /* The default entry must always be the last one */ 213 default_policy = aspp + ipst->ips_ip6_asp_table_count - 1; 214 215 while (match == NULL) { 216 if (aspp == default_policy) { 217 match = aspp; 218 } else { 219 if (V6_MASK_EQ(*addr, aspp->ip6_asp_mask, 220 aspp->ip6_asp_prefix)) 221 match = aspp; 222 else 223 aspp++; 224 } 225 } 226 227 if (precedence != NULL) 228 *precedence = match->ip6_asp_precedence; 229 return (match->ip6_asp_label); 230 } 231 232 /* 233 * If we had deferred updating the table because of outstanding references, 234 * do it now. Note, we don't do error checking on the queued IOCTL mblk, since 235 * ip_sioctl_ip6addrpolicy() has already done it for us. 236 */ 237 void 238 ip6_asp_check_for_updates(ip_stack_t *ipst) 239 { 240 ip6_asp_t *table; 241 size_t table_size; 242 mblk_t *data_mp, *mp; 243 struct iocblk *iocp; 244 245 mutex_enter(&ipst->ips_ip6_asp_lock); 246 if (ipst->ips_ip6_asp_pending_update == NULL || 247 ipst->ips_ip6_asp_refcnt > 0) { 248 mutex_exit(&ipst->ips_ip6_asp_lock); 249 return; 250 } 251 252 mp = ipst->ips_ip6_asp_pending_update; 253 ipst->ips_ip6_asp_pending_update = NULL; 254 ASSERT(mp->b_prev != NULL); 255 256 ipst->ips_ip6_asp_uip = B_TRUE; 257 258 iocp = (struct iocblk *)mp->b_rptr; 259 data_mp = mp->b_cont; 260 if (data_mp == NULL) { 261 table = NULL; 262 table_size = iocp->ioc_count; 263 } else { 264 table = (ip6_asp_t *)data_mp->b_rptr; 265 table_size = iocp->ioc_count; 266 } 267 268 ip6_asp_replace(mp, table, table_size, B_TRUE, ipst, 269 iocp->ioc_flag & IOC_MODELS); 270 } 271 272 /* 273 * ip6_asp_replace replaces the contents of the IPv6 address selection 274 * policy table with those specified in new_table. If new_table is NULL, 275 * this indicates that the caller wishes ip to use the default policy 276 * table. The caller is responsible for making sure that there are exactly 277 * new_count policy entries in new_table. 278 */ 279 /*ARGSUSED5*/ 280 void 281 ip6_asp_replace(mblk_t *mp, ip6_asp_t *new_table, size_t new_size, 282 boolean_t locked, ip_stack_t *ipst, model_t datamodel) 283 { 284 int ret_val = 0; 285 ip6_asp_t *tmp_table; 286 uint_t count; 287 queue_t *q; 288 struct iocblk *iocp; 289 #if defined(_SYSCALL32_IMPL) && _LONG_LONG_ALIGNMENT_32 == 4 290 size_t ip6_asp_size = SIZEOF_STRUCT(ip6_asp, datamodel); 291 #else 292 const size_t ip6_asp_size = sizeof (ip6_asp_t); 293 #endif 294 295 if (new_size % ip6_asp_size != 0) { 296 ip1dbg(("ip6_asp_replace: invalid table size\n")); 297 ret_val = EINVAL; 298 if (locked) 299 goto unlock_end; 300 goto replace_end; 301 } else { 302 count = new_size / ip6_asp_size; 303 } 304 305 306 if (!locked) 307 mutex_enter(&ipst->ips_ip6_asp_lock); 308 /* 309 * Check if we are in the process of creating any IRE using the 310 * current information. If so, wait till that is done. 311 */ 312 if (!locked && ipst->ips_ip6_asp_refcnt > 0) { 313 /* Save this request for later processing */ 314 if (ipst->ips_ip6_asp_pending_update == NULL) { 315 ipst->ips_ip6_asp_pending_update = mp; 316 } else { 317 /* Let's not queue multiple requests for now */ 318 ip1dbg(("ip6_asp_replace: discarding request\n")); 319 mutex_exit(&ipst->ips_ip6_asp_lock); 320 ret_val = EAGAIN; 321 goto replace_end; 322 } 323 mutex_exit(&ipst->ips_ip6_asp_lock); 324 return; 325 } 326 327 /* Prevent lookups till the table have been updated */ 328 if (!locked) 329 ipst->ips_ip6_asp_uip = B_TRUE; 330 331 ASSERT(ipst->ips_ip6_asp_refcnt == 0); 332 333 if (new_table == NULL) { 334 /* 335 * This is a special case. The user wants to revert 336 * back to using the default table. 337 */ 338 if (ipst->ips_ip6_asp_table == default_ip6_asp_table) 339 goto unlock_end; 340 341 kmem_free(ipst->ips_ip6_asp_table, 342 ipst->ips_ip6_asp_table_count * sizeof (ip6_asp_t)); 343 ipst->ips_ip6_asp_table = default_ip6_asp_table; 344 ipst->ips_ip6_asp_table_count = 345 sizeof (default_ip6_asp_table) / sizeof (ip6_asp_t); 346 goto unlock_end; 347 } 348 349 if (count == 0) { 350 ret_val = EINVAL; 351 ip1dbg(("ip6_asp_replace: empty table\n")); 352 goto unlock_end; 353 } 354 355 if ((tmp_table = kmem_alloc(count * sizeof (ip6_asp_t), KM_NOSLEEP)) == 356 NULL) { 357 ret_val = ENOMEM; 358 goto unlock_end; 359 } 360 361 #if defined(_SYSCALL32_IMPL) && _LONG_LONG_ALIGNMENT_32 == 4 362 363 /* 364 * If 'new_table' -actually- originates from a 32-bit process 365 * then the nicely aligned ip6_asp_label array will be 366 * subtlely misaligned on this kernel, because the structure 367 * is 8 byte aligned in the kernel, but only 4 byte aligned in 368 * userland. Fix it up here. 369 * 370 * XX64 See the notes in ip_sioctl_ip6addrpolicy. Perhaps we could 371 * do the datamodel transformation (below) there instead of here? 372 */ 373 if (datamodel == IOC_ILP32) { 374 ip6_asp_t *dst; 375 ip6_asp32_t *src; 376 int i; 377 378 if ((dst = kmem_zalloc(count * sizeof (*dst), 379 KM_NOSLEEP)) == NULL) { 380 kmem_free(tmp_table, count * sizeof (ip6_asp_t)); 381 ret_val = ENOMEM; 382 goto unlock_end; 383 } 384 385 /* 386 * Copy each element of the table from ip6_asp32_t 387 * format into ip6_asp_t format. Fortunately, since 388 * we're just dealing with a trailing structure pad, 389 * we can do this straightforwardly with a flurry of 390 * bcopying. 391 */ 392 src = (void *)new_table; 393 for (i = 0; i < count; i++) 394 bcopy(src + i, dst + i, sizeof (*src)); 395 396 ip6_asp_copy(dst, tmp_table, count); 397 kmem_free(dst, count * sizeof (*dst)); 398 } else 399 #endif 400 ip6_asp_copy(new_table, tmp_table, count); 401 402 /* Make sure the last entry is the default entry */ 403 if (!IN6_IS_ADDR_UNSPECIFIED(&tmp_table[count - 1].ip6_asp_prefix) || 404 !IN6_IS_ADDR_UNSPECIFIED(&tmp_table[count - 1].ip6_asp_mask)) { 405 ret_val = EINVAL; 406 kmem_free(tmp_table, count * sizeof (ip6_asp_t)); 407 ip1dbg(("ip6_asp_replace: bad table: no default entry\n")); 408 goto unlock_end; 409 } 410 if (ipst->ips_ip6_asp_table != default_ip6_asp_table) { 411 kmem_free(ipst->ips_ip6_asp_table, 412 ipst->ips_ip6_asp_table_count * sizeof (ip6_asp_t)); 413 } 414 ipst->ips_ip6_asp_table = tmp_table; 415 ipst->ips_ip6_asp_table_count = count; 416 417 unlock_end: 418 ipst->ips_ip6_asp_uip = B_FALSE; 419 mutex_exit(&ipst->ips_ip6_asp_lock); 420 421 /* Let conn_ixa caching know that source address selection changed */ 422 ip_update_source_selection(ipst); 423 424 replace_end: 425 /* Reply to the ioctl */ 426 q = (queue_t *)mp->b_prev; 427 mp->b_prev = NULL; 428 if (q == NULL) { 429 freemsg(mp); 430 goto check_binds; 431 } 432 iocp = (struct iocblk *)mp->b_rptr; 433 iocp->ioc_error = ret_val; 434 iocp->ioc_count = 0; 435 DB_TYPE(mp) = (iocp->ioc_error == 0) ? M_IOCACK : M_IOCNAK; 436 qreply(q, mp); 437 check_binds: 438 ip6_asp_complete_op(ipst); 439 } 440 441 /* 442 * Copies the contents of src_table to dst_table, and sorts the 443 * entries in decending order of prefix lengths. It assumes that both 444 * tables are appropriately sized to contain count entries. 445 */ 446 static void 447 ip6_asp_copy(ip6_asp_t *src_table, ip6_asp_t *dst_table, uint_t count) 448 { 449 ip6_asp_t *src_ptr, *src_limit, *dst_ptr, *dst_limit, *dp; 450 451 dst_table[0] = src_table[0]; 452 if (count == 1) 453 return; 454 455 /* 456 * Sort the entries in descending order of prefix lengths. 457 * 458 * Note: this should be a small table. In 99% of cases, we 459 * expect the table to have 5 entries. In the remaining 1% 460 * of cases, we expect the table to have one or two more 461 * entries. It would be very rare for the table to have 462 * double-digit entries. 463 */ 464 src_limit = src_table + count; 465 dst_limit = dst_table + 1; 466 for (src_ptr = src_table + 1; src_ptr != src_limit; 467 src_ptr++, dst_limit++) { 468 for (dst_ptr = dst_table; dst_ptr < dst_limit; dst_ptr++) { 469 if (ip_mask_to_plen_v6(&src_ptr->ip6_asp_mask) > 470 ip_mask_to_plen_v6(&dst_ptr->ip6_asp_mask)) { 471 /* 472 * Make room to insert the source entry 473 * before dst_ptr by shifting entries to 474 * the right. 475 */ 476 for (dp = dst_limit - 1; dp >= dst_ptr; dp--) 477 *(dp + 1) = *dp; 478 break; 479 } 480 } 481 *dst_ptr = *src_ptr; 482 } 483 } 484 485 /* 486 * This function copies as many entries from ip6_asp_table as will fit 487 * into dtable. The dtable_size parameter is the size of dtable 488 * in bytes. This function returns the number of entries in 489 * ip6_asp_table, even if it's not able to fit all of the entries into 490 * dtable. 491 */ 492 int 493 ip6_asp_get(ip6_asp_t *dtable, size_t dtable_size, ip_stack_t *ipst) 494 { 495 uint_t dtable_count; 496 497 if (dtable != NULL) { 498 if (dtable_size < sizeof (ip6_asp_t)) 499 return (-1); 500 501 dtable_count = dtable_size / sizeof (ip6_asp_t); 502 bcopy(ipst->ips_ip6_asp_table, dtable, 503 MIN(ipst->ips_ip6_asp_table_count, dtable_count) * 504 sizeof (ip6_asp_t)); 505 } 506 507 return (ipst->ips_ip6_asp_table_count); 508 } 509 510 /* 511 * Compare two labels. Return B_TRUE if they are equal, B_FALSE 512 * otherwise. 513 */ 514 boolean_t 515 ip6_asp_labelcmp(const char *label1, const char *label2) 516 { 517 int64_t *llptr1, *llptr2; 518 519 /* 520 * The common case, the two labels are actually the same string 521 * from the policy table. 522 */ 523 if (label1 == label2) 524 return (B_TRUE); 525 526 /* 527 * Since we know the labels are at most 16 bytes long, compare 528 * the two strings as two 8-byte long integers. The ip6_asp_t 529 * structure guarantees that the labels are 8 byte alligned. 530 */ 531 llptr1 = (int64_t *)label1; 532 llptr2 = (int64_t *)label2; 533 if (llptr1[0] == llptr2[0] && llptr1[1] == llptr2[1]) 534 return (B_TRUE); 535 return (B_FALSE); 536 }