1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  */
  25 
  26 #include <sys/types.h>
  27 #include <sys/socket.h>
  28 #include <sys/ksynch.h>
  29 #include <sys/kmem.h>
  30 #include <sys/errno.h>
  31 #include <sys/systm.h>
  32 #include <sys/sysmacros.h>
  33 #include <sys/cmn_err.h>
  34 #include <sys/strsun.h>
  35 #include <sys/zone.h>
  36 #include <netinet/in.h>
  37 #include <inet/common.h>
  38 #include <inet/ip.h>
  39 #include <inet/ip6.h>
  40 #include <inet/ip6_asp.h>
  41 #include <inet/ip_ire.h>
  42 #include <inet/ip_if.h>
  43 #include <inet/ipclassifier.h>
  44 
  45 #define IN6ADDR_MASK128_INIT \
  46         { 0xffffffffU, 0xffffffffU, 0xffffffffU, 0xffffffffU }
  47 #define IN6ADDR_MASK96_INIT     { 0xffffffffU, 0xffffffffU, 0xffffffffU, 0 }
  48 #ifdef _BIG_ENDIAN
  49 #define IN6ADDR_MASK16_INIT     { 0xffff0000U, 0, 0, 0 }
  50 #else
  51 #define IN6ADDR_MASK16_INIT     { 0x0000ffffU, 0, 0, 0 }
  52 #endif
  53 
  54 
  55 /*
  56  * This table is ordered such that longest prefix matches are hit first
  57  * (longer prefix lengths first).  The last entry must be the "default"
  58  * entry (::0/0).
  59  */
  60 static ip6_asp_t default_ip6_asp_table[] = {
  61         { IN6ADDR_LOOPBACK_INIT,        IN6ADDR_MASK128_INIT,
  62             "Loopback", 50 },
  63         { IN6ADDR_ANY_INIT,             IN6ADDR_MASK96_INIT,
  64             "IPv4_Compatible", 20 },
  65 #ifdef _BIG_ENDIAN
  66         { { 0, 0, 0x0000ffffU, 0 },     IN6ADDR_MASK96_INIT,
  67             "IPv4", 10 },
  68         { { 0x20020000U, 0, 0, 0 },     IN6ADDR_MASK16_INIT,
  69             "6to4", 30 },
  70 #else
  71         { { 0, 0, 0xffff0000U, 0 },     IN6ADDR_MASK96_INIT,
  72             "IPv4", 10 },
  73         { { 0x00000220U, 0, 0, 0 },     IN6ADDR_MASK16_INIT,
  74             "6to4", 30 },
  75 #endif
  76         { IN6ADDR_ANY_INIT,             IN6ADDR_ANY_INIT,
  77             "Default", 40 }
  78 };
  79 
  80 /*
  81  * The IPv6 Default Address Selection policy table.
  82  * Until someone up above reconfigures the policy table, use the global
  83  * default.  The table needs no lock since the only way to alter it is
  84  * through the SIOCSIP6ADDRPOLICY which is exclusive in ip.
  85  */
  86 static void ip6_asp_copy(ip6_asp_t *, ip6_asp_t *, uint_t);
  87 static void ip6_asp_check_for_updates(ip_stack_t *);
  88 
  89 void
  90 ip6_asp_init(ip_stack_t *ipst)
  91 {
  92         /* Initialize the table lock */
  93         mutex_init(&ipst->ips_ip6_asp_lock, NULL, MUTEX_DEFAULT, NULL);
  94 
  95         ipst->ips_ip6_asp_table = default_ip6_asp_table;
  96 
  97         ipst->ips_ip6_asp_table_count =
  98             sizeof (default_ip6_asp_table) / sizeof (ip6_asp_t);
  99 }
 100 
 101 void
 102 ip6_asp_free(ip_stack_t *ipst)
 103 {
 104         if (ipst->ips_ip6_asp_table != default_ip6_asp_table) {
 105                 kmem_free(ipst->ips_ip6_asp_table,
 106                     ipst->ips_ip6_asp_table_count * sizeof (ip6_asp_t));
 107                 ipst->ips_ip6_asp_table = NULL;
 108         }
 109         mutex_destroy(&ipst->ips_ip6_asp_lock);
 110 }
 111 
 112 /*
 113  * Return false if the table is being updated. Else, increment the ref
 114  * count and return true.
 115  */
 116 boolean_t
 117 ip6_asp_can_lookup(ip_stack_t *ipst)
 118 {
 119         mutex_enter(&ipst->ips_ip6_asp_lock);
 120         if (ipst->ips_ip6_asp_uip) {
 121                 mutex_exit(&ipst->ips_ip6_asp_lock);
 122                 return (B_FALSE);
 123         }
 124         IP6_ASP_TABLE_REFHOLD(ipst);
 125         mutex_exit(&ipst->ips_ip6_asp_lock);
 126         return (B_TRUE);
 127 
 128 }
 129 
 130 void
 131 ip6_asp_pending_op(queue_t *q, mblk_t *mp, aspfunc_t func)
 132 {
 133         conn_t  *connp = Q_TO_CONN(q);
 134         ip_stack_t *ipst = connp->conn_netstack->netstack_ip;
 135 
 136         ASSERT((mp->b_prev == NULL) && (mp->b_queue == NULL) &&
 137             (mp->b_next == NULL));
 138         mp->b_queue = (void *)q;
 139         mp->b_prev = (void *)func;
 140         mp->b_next = NULL;
 141 
 142         mutex_enter(&ipst->ips_ip6_asp_lock);
 143         if (ipst->ips_ip6_asp_pending_ops == NULL) {
 144                 ASSERT(ipst->ips_ip6_asp_pending_ops_tail == NULL);
 145                 ipst->ips_ip6_asp_pending_ops =
 146                     ipst->ips_ip6_asp_pending_ops_tail = mp;
 147         } else {
 148                 ipst->ips_ip6_asp_pending_ops_tail->b_next = mp;
 149                 ipst->ips_ip6_asp_pending_ops_tail = mp;
 150         }
 151         mutex_exit(&ipst->ips_ip6_asp_lock);
 152 }
 153 
 154 static void
 155 ip6_asp_complete_op(ip_stack_t *ipst)
 156 {
 157         mblk_t          *mp;
 158         queue_t         *q;
 159         aspfunc_t       func;
 160 
 161         mutex_enter(&ipst->ips_ip6_asp_lock);
 162         while (ipst->ips_ip6_asp_pending_ops != NULL) {
 163                 mp = ipst->ips_ip6_asp_pending_ops;
 164                 ipst->ips_ip6_asp_pending_ops = mp->b_next;
 165                 mp->b_next = NULL;
 166                 if (ipst->ips_ip6_asp_pending_ops == NULL)
 167                         ipst->ips_ip6_asp_pending_ops_tail = NULL;
 168                 mutex_exit(&ipst->ips_ip6_asp_lock);
 169 
 170                 q = (queue_t *)mp->b_queue;
 171                 func = (aspfunc_t)mp->b_prev;
 172 
 173                 mp->b_prev = NULL;
 174                 mp->b_queue = NULL;
 175 
 176 
 177                 (*func)(NULL, q, mp, NULL);
 178                 mutex_enter(&ipst->ips_ip6_asp_lock);
 179         }
 180         mutex_exit(&ipst->ips_ip6_asp_lock);
 181 }
 182 
 183 /*
 184  * Decrement reference count. When it gets to 0, we check for (pending)
 185  * saved update to the table, if any.
 186  */
 187 void
 188 ip6_asp_table_refrele(ip_stack_t *ipst)
 189 {
 190         IP6_ASP_TABLE_REFRELE(ipst);
 191 }
 192 
 193 /*
 194  * This function is guaranteed never to return a NULL pointer.  It
 195  * will always return information from one of the entries in the
 196  * asp_table (which will never be empty).  If a pointer is passed
 197  * in for the precedence, the precedence value will be set; a
 198  * pointer to the label will be returned by the function.
 199  *
 200  * Since the table is only anticipated to have five or six entries
 201  * total, the lookup algorithm hasn't been optimized to anything
 202  * better than O(n).
 203  */
 204 char *
 205 ip6_asp_lookup(const in6_addr_t *addr, uint32_t *precedence, ip_stack_t *ipst)
 206 {
 207         ip6_asp_t *aspp;
 208         ip6_asp_t *match = NULL;
 209         ip6_asp_t *default_policy;
 210 
 211         aspp = ipst->ips_ip6_asp_table;
 212         /* The default entry must always be the last one */
 213         default_policy = aspp + ipst->ips_ip6_asp_table_count - 1;
 214 
 215         while (match == NULL) {
 216                 if (aspp == default_policy) {
 217                         match = aspp;
 218                 } else {
 219                         if (V6_MASK_EQ(*addr, aspp->ip6_asp_mask,
 220                             aspp->ip6_asp_prefix))
 221                                 match = aspp;
 222                         else
 223                                 aspp++;
 224                 }
 225         }
 226 
 227         if (precedence != NULL)
 228                 *precedence = match->ip6_asp_precedence;
 229         return (match->ip6_asp_label);
 230 }
 231 
 232 /*
 233  * If we had deferred updating the table because of outstanding references,
 234  * do it now. Note, we don't do error checking on the queued IOCTL mblk, since
 235  * ip_sioctl_ip6addrpolicy() has already done it for us.
 236  */
 237 void
 238 ip6_asp_check_for_updates(ip_stack_t *ipst)
 239 {
 240         ip6_asp_t *table;
 241         size_t  table_size;
 242         mblk_t  *data_mp, *mp;
 243         struct iocblk *iocp;
 244 
 245         mutex_enter(&ipst->ips_ip6_asp_lock);
 246         if (ipst->ips_ip6_asp_pending_update == NULL ||
 247             ipst->ips_ip6_asp_refcnt > 0) {
 248                 mutex_exit(&ipst->ips_ip6_asp_lock);
 249                 return;
 250         }
 251 
 252         mp = ipst->ips_ip6_asp_pending_update;
 253         ipst->ips_ip6_asp_pending_update = NULL;
 254         ASSERT(mp->b_prev != NULL);
 255 
 256         ipst->ips_ip6_asp_uip = B_TRUE;
 257 
 258         iocp = (struct iocblk *)mp->b_rptr;
 259         data_mp = mp->b_cont;
 260         if (data_mp == NULL) {
 261                 table = NULL;
 262                 table_size = iocp->ioc_count;
 263         } else {
 264                 table = (ip6_asp_t *)data_mp->b_rptr;
 265                 table_size = iocp->ioc_count;
 266         }
 267 
 268         ip6_asp_replace(mp, table, table_size, B_TRUE, ipst,
 269             iocp->ioc_flag & IOC_MODELS);
 270 }
 271 
 272 /*
 273  * ip6_asp_replace replaces the contents of the IPv6 address selection
 274  * policy table with those specified in new_table.  If new_table is NULL,
 275  * this indicates that the caller wishes ip to use the default policy
 276  * table.  The caller is responsible for making sure that there are exactly
 277  * new_count policy entries in new_table.
 278  */
 279 /*ARGSUSED5*/
 280 void
 281 ip6_asp_replace(mblk_t *mp, ip6_asp_t *new_table, size_t new_size,
 282     boolean_t locked, ip_stack_t *ipst, model_t datamodel)
 283 {
 284         int                     ret_val = 0;
 285         ip6_asp_t               *tmp_table;
 286         uint_t                  count;
 287         queue_t                 *q;
 288         struct iocblk           *iocp;
 289 #if defined(_SYSCALL32_IMPL) && _LONG_LONG_ALIGNMENT_32 == 4
 290         size_t ip6_asp_size = SIZEOF_STRUCT(ip6_asp, datamodel);
 291 #else
 292         const size_t ip6_asp_size = sizeof (ip6_asp_t);
 293 #endif
 294 
 295         if (new_size % ip6_asp_size != 0) {
 296                 ip1dbg(("ip6_asp_replace: invalid table size\n"));
 297                 ret_val = EINVAL;
 298                 if (locked)
 299                         goto unlock_end;
 300                 goto replace_end;
 301         } else {
 302                 count = new_size / ip6_asp_size;
 303         }
 304 
 305 
 306         if (!locked)
 307                 mutex_enter(&ipst->ips_ip6_asp_lock);
 308         /*
 309          * Check if we are in the process of creating any IRE using the
 310          * current information. If so, wait till that is done.
 311          */
 312         if (!locked && ipst->ips_ip6_asp_refcnt > 0) {
 313                 /* Save this request for later processing */
 314                 if (ipst->ips_ip6_asp_pending_update == NULL) {
 315                         ipst->ips_ip6_asp_pending_update = mp;
 316                 } else {
 317                         /* Let's not queue multiple requests for now */
 318                         ip1dbg(("ip6_asp_replace: discarding request\n"));
 319                         mutex_exit(&ipst->ips_ip6_asp_lock);
 320                         ret_val =  EAGAIN;
 321                         goto replace_end;
 322                 }
 323                 mutex_exit(&ipst->ips_ip6_asp_lock);
 324                 return;
 325         }
 326 
 327         /* Prevent lookups till the table have been updated */
 328         if (!locked)
 329                 ipst->ips_ip6_asp_uip = B_TRUE;
 330 
 331         ASSERT(ipst->ips_ip6_asp_refcnt == 0);
 332 
 333         if (new_table == NULL) {
 334                 /*
 335                  * This is a special case.  The user wants to revert
 336                  * back to using the default table.
 337                  */
 338                 if (ipst->ips_ip6_asp_table == default_ip6_asp_table)
 339                         goto unlock_end;
 340 
 341                 kmem_free(ipst->ips_ip6_asp_table,
 342                     ipst->ips_ip6_asp_table_count * sizeof (ip6_asp_t));
 343                 ipst->ips_ip6_asp_table = default_ip6_asp_table;
 344                 ipst->ips_ip6_asp_table_count =
 345                     sizeof (default_ip6_asp_table) / sizeof (ip6_asp_t);
 346                 goto unlock_end;
 347         }
 348 
 349         if (count == 0) {
 350                 ret_val = EINVAL;
 351                 ip1dbg(("ip6_asp_replace: empty table\n"));
 352                 goto unlock_end;
 353         }
 354 
 355         if ((tmp_table = kmem_alloc(count * sizeof (ip6_asp_t), KM_NOSLEEP)) ==
 356             NULL) {
 357                 ret_val = ENOMEM;
 358                 goto unlock_end;
 359         }
 360 
 361 #if defined(_SYSCALL32_IMPL) && _LONG_LONG_ALIGNMENT_32 == 4
 362 
 363         /*
 364          * If 'new_table' -actually- originates from a 32-bit process
 365          * then the nicely aligned ip6_asp_label array will be
 366          * subtlely misaligned on this kernel, because the structure
 367          * is 8 byte aligned in the kernel, but only 4 byte aligned in
 368          * userland.  Fix it up here.
 369          *
 370          * XX64 See the notes in ip_sioctl_ip6addrpolicy.  Perhaps we could
 371          *      do the datamodel transformation (below) there instead of here?
 372          */
 373         if (datamodel == IOC_ILP32) {
 374                 ip6_asp_t *dst;
 375                 ip6_asp32_t *src;
 376                 int i;
 377 
 378                 if ((dst = kmem_zalloc(count * sizeof (*dst),
 379                     KM_NOSLEEP)) == NULL) {
 380                         kmem_free(tmp_table, count * sizeof (ip6_asp_t));
 381                         ret_val = ENOMEM;
 382                         goto unlock_end;
 383                 }
 384 
 385                 /*
 386                  * Copy each element of the table from ip6_asp32_t
 387                  * format into ip6_asp_t format.  Fortunately, since
 388                  * we're just dealing with a trailing structure pad,
 389                  * we can do this straightforwardly with a flurry of
 390                  * bcopying.
 391                  */
 392                 src = (void *)new_table;
 393                 for (i = 0; i < count; i++)
 394                         bcopy(src + i, dst + i, sizeof (*src));
 395 
 396                 ip6_asp_copy(dst, tmp_table, count);
 397                 kmem_free(dst, count * sizeof (*dst));
 398         } else
 399 #endif
 400                 ip6_asp_copy(new_table, tmp_table, count);
 401 
 402         /* Make sure the last entry is the default entry */
 403         if (!IN6_IS_ADDR_UNSPECIFIED(&tmp_table[count - 1].ip6_asp_prefix) ||
 404             !IN6_IS_ADDR_UNSPECIFIED(&tmp_table[count - 1].ip6_asp_mask)) {
 405                 ret_val = EINVAL;
 406                 kmem_free(tmp_table, count * sizeof (ip6_asp_t));
 407                 ip1dbg(("ip6_asp_replace: bad table: no default entry\n"));
 408                 goto unlock_end;
 409         }
 410         if (ipst->ips_ip6_asp_table != default_ip6_asp_table) {
 411                 kmem_free(ipst->ips_ip6_asp_table,
 412                     ipst->ips_ip6_asp_table_count * sizeof (ip6_asp_t));
 413         }
 414         ipst->ips_ip6_asp_table = tmp_table;
 415         ipst->ips_ip6_asp_table_count = count;
 416 
 417 unlock_end:
 418         ipst->ips_ip6_asp_uip = B_FALSE;
 419         mutex_exit(&ipst->ips_ip6_asp_lock);
 420 
 421         /* Let conn_ixa caching know that source address selection changed */
 422         ip_update_source_selection(ipst);
 423 
 424 replace_end:
 425         /* Reply to the ioctl */
 426         q = (queue_t *)mp->b_prev;
 427         mp->b_prev = NULL;
 428         if (q == NULL) {
 429                 freemsg(mp);
 430                 goto check_binds;
 431         }
 432         iocp = (struct iocblk *)mp->b_rptr;
 433         iocp->ioc_error = ret_val;
 434         iocp->ioc_count = 0;
 435         DB_TYPE(mp) = (iocp->ioc_error == 0) ? M_IOCACK : M_IOCNAK;
 436         qreply(q, mp);
 437 check_binds:
 438         ip6_asp_complete_op(ipst);
 439 }
 440 
 441 /*
 442  * Copies the contents of src_table to dst_table, and sorts the
 443  * entries in decending order of prefix lengths.  It assumes that both
 444  * tables are appropriately sized to contain count entries.
 445  */
 446 static void
 447 ip6_asp_copy(ip6_asp_t *src_table, ip6_asp_t *dst_table, uint_t count)
 448 {
 449         ip6_asp_t *src_ptr, *src_limit, *dst_ptr, *dst_limit, *dp;
 450 
 451         dst_table[0] = src_table[0];
 452         if (count == 1)
 453                 return;
 454 
 455         /*
 456          * Sort the entries in descending order of prefix lengths.
 457          *
 458          * Note: this should be a small table.  In 99% of cases, we
 459          * expect the table to have 5 entries.  In the remaining 1%
 460          * of cases, we expect the table to have one or two more
 461          * entries.  It would be very rare for the table to have
 462          * double-digit entries.
 463          */
 464         src_limit = src_table + count;
 465         dst_limit = dst_table + 1;
 466         for (src_ptr = src_table + 1; src_ptr != src_limit;
 467             src_ptr++, dst_limit++) {
 468                 for (dst_ptr = dst_table; dst_ptr < dst_limit; dst_ptr++) {
 469                         if (ip_mask_to_plen_v6(&src_ptr->ip6_asp_mask) >
 470                             ip_mask_to_plen_v6(&dst_ptr->ip6_asp_mask)) {
 471                                 /*
 472                                  * Make room to insert the source entry
 473                                  * before dst_ptr by shifting entries to
 474                                  * the right.
 475                                  */
 476                                 for (dp = dst_limit - 1; dp >= dst_ptr; dp--)
 477                                         *(dp + 1) = *dp;
 478                                 break;
 479                         }
 480                 }
 481                 *dst_ptr = *src_ptr;
 482         }
 483 }
 484 
 485 /*
 486  * This function copies as many entries from ip6_asp_table as will fit
 487  * into dtable.  The dtable_size parameter is the size of dtable
 488  * in bytes.  This function returns the number of entries in
 489  * ip6_asp_table, even if it's not able to fit all of the entries into
 490  * dtable.
 491  */
 492 int
 493 ip6_asp_get(ip6_asp_t *dtable, size_t dtable_size, ip_stack_t *ipst)
 494 {
 495         uint_t dtable_count;
 496 
 497         if (dtable != NULL) {
 498                 if (dtable_size < sizeof (ip6_asp_t))
 499                         return (-1);
 500 
 501                 dtable_count = dtable_size / sizeof (ip6_asp_t);
 502                 bcopy(ipst->ips_ip6_asp_table, dtable,
 503                     MIN(ipst->ips_ip6_asp_table_count, dtable_count) *
 504                     sizeof (ip6_asp_t));
 505         }
 506 
 507         return (ipst->ips_ip6_asp_table_count);
 508 }
 509 
 510 /*
 511  * Compare two labels.  Return B_TRUE if they are equal, B_FALSE
 512  * otherwise.
 513  */
 514 boolean_t
 515 ip6_asp_labelcmp(const char *label1, const char *label2)
 516 {
 517         int64_t *llptr1, *llptr2;
 518 
 519         /*
 520          * The common case, the two labels are actually the same string
 521          * from the policy table.
 522          */
 523         if (label1 == label2)
 524                 return (B_TRUE);
 525 
 526         /*
 527          * Since we know the labels are at most 16 bytes long, compare
 528          * the two strings as two 8-byte long integers.  The ip6_asp_t
 529          * structure guarantees that the labels are 8 byte alligned.
 530          */
 531         llptr1 = (int64_t *)label1;
 532         llptr2 = (int64_t *)label2;
 533         if (llptr1[0] == llptr2[0] && llptr1[1] == llptr2[1])
 534                 return (B_TRUE);
 535         return (B_FALSE);
 536 }