1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2010 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* 28 * This module supports AF_TRILL sockets and TRILL layer-2 forwarding. 29 */ 30 31 #include <sys/strsubr.h> 32 #include <sys/socket.h> 33 #include <sys/socketvar.h> 34 #include <sys/modctl.h> 35 #include <sys/cmn_err.h> 36 #include <sys/tihdr.h> 37 #include <sys/strsun.h> 38 #include <sys/policy.h> 39 #include <sys/ethernet.h> 40 #include <sys/vlan.h> 41 #include <net/trill.h> 42 #include <net/if_dl.h> 43 #include <sys/mac.h> 44 #include <sys/mac_client.h> 45 #include <sys/mac_provider.h> 46 #include <sys/mac_client_priv.h> 47 #include <sys/sdt.h> 48 #include <sys/dls.h> 49 #include <sys/sunddi.h> 50 51 #include "trill_impl.h" 52 53 static void trill_del_all(trill_inst_t *, boolean_t); 54 static int trill_del_nick(trill_inst_t *, uint16_t, boolean_t); 55 static void trill_stop_recv(trill_sock_t *); 56 static void trill_ctrl_input(trill_sock_t *, mblk_t *, const uint8_t *, 57 uint16_t); 58 static trill_node_t *trill_node_lookup(trill_inst_t *, uint16_t); 59 static void trill_node_unref(trill_inst_t *, trill_node_t *); 60 static void trill_sock_unref(trill_sock_t *); 61 static void trill_kstats_init(trill_sock_t *, const char *); 62 63 static list_t trill_inst_list; 64 static krwlock_t trill_inst_rwlock; 65 66 static sock_lower_handle_t trill_create(int, int, int, sock_downcalls_t **, 67 uint_t *, int *, int, cred_t *); 68 69 static smod_reg_t sinfo = { 70 SOCKMOD_VERSION, 71 "trill", 72 SOCK_UC_VERSION, 73 SOCK_DC_VERSION, 74 trill_create, 75 NULL, 76 }; 77 78 /* modldrv structure */ 79 static struct modlsockmod sockmod = { 80 &mod_sockmodops, "AF_TRILL socket module", &sinfo 81 }; 82 83 /* modlinkage structure */ 84 static struct modlinkage ml = { 85 MODREV_1, 86 &sockmod, 87 NULL 88 }; 89 90 #define VALID_NICK(n) ((n) != RBRIDGE_NICKNAME_NONE && \ 91 (n) != RBRIDGE_NICKNAME_UNUSED) 92 93 static mblk_t * 94 create_trill_header(trill_sock_t *tsock, mblk_t *mp, const uint8_t *daddr, 95 boolean_t trill_hdr_ok, boolean_t multidest, uint16_t tci, 96 size_t msglen) 97 { 98 int extra_hdr_len; 99 struct ether_vlan_header *ethvlanhdr; 100 mblk_t *hdr_mp; 101 uint16_t etype; 102 103 etype = msglen > 0 ? (uint16_t)msglen : ETHERTYPE_TRILL; 104 105 /* When sending on the PVID, we must not give a VLAN ID */ 106 if (tci == tsock->ts_link->bl_pvid) 107 tci = TRILL_NO_TCI; 108 109 /* 110 * Create new Ethernet header and include additional space 111 * for writing TRILL header and/or VLAN tag. 112 */ 113 extra_hdr_len = (trill_hdr_ok ? 0 : sizeof (trill_header_t)) + 114 (tci != TRILL_NO_TCI ? sizeof (struct ether_vlan_extinfo) : 0); 115 hdr_mp = mac_header(tsock->ts_link->bl_mh, daddr, 116 tci != TRILL_NO_TCI ? ETHERTYPE_VLAN : etype, mp, extra_hdr_len); 117 if (hdr_mp == NULL) { 118 freemsg(mp); 119 return (NULL); 120 } 121 122 if (tci != TRILL_NO_TCI) { 123 /* LINTED: alignment */ 124 ethvlanhdr = (struct ether_vlan_header *)hdr_mp->b_rptr; 125 ethvlanhdr->ether_tci = htons(tci); 126 ethvlanhdr->ether_type = htons(etype); 127 hdr_mp->b_wptr += sizeof (struct ether_vlan_extinfo); 128 } 129 130 if (!trill_hdr_ok) { 131 trill_header_t *thp; 132 /* LINTED: alignment */ 133 thp = (trill_header_t *)hdr_mp->b_wptr; 134 (void) memset(thp, 0, sizeof (trill_header_t)); 135 thp->th_hopcount = TRILL_DEFAULT_HOPS; 136 thp->th_multidest = (multidest ? 1:0); 137 hdr_mp->b_wptr += sizeof (trill_header_t); 138 } 139 140 hdr_mp->b_cont = mp; 141 return (hdr_mp); 142 } 143 144 /* 145 * TRILL local recv function. TRILL data frames that should be received 146 * by the local system are decapsulated here and passed to bridging for 147 * learning and local system receive. Only called when we are the forwarder 148 * on the link (multi-dest frames) or the frame was destined for us. 149 */ 150 static void 151 trill_recv_local(trill_sock_t *tsock, mblk_t *mp, uint16_t ingressnick) 152 { 153 struct ether_header *inner_ethhdr; 154 155 /* LINTED: alignment */ 156 inner_ethhdr = (struct ether_header *)mp->b_rptr; 157 DTRACE_PROBE1(trill__recv__local, struct ether_header *, inner_ethhdr); 158 159 DB_CKSUMFLAGS(mp) = 0; 160 /* 161 * Transmit the decapsulated frame on the link via Bridging. 162 * Bridging does source address learning and appropriate forwarding. 163 */ 164 bridge_trill_decaps(tsock->ts_link, mp, ingressnick); 165 KSPINCR(tks_decap); 166 } 167 168 /* 169 * Determines the outgoing link to reach a RBridge having the given nick 170 * Assumes caller has acquired the trill instance rwlock. 171 */ 172 static trill_sock_t * 173 find_trill_link(trill_inst_t *tip, datalink_id_t linkid) 174 { 175 trill_sock_t *tsp = NULL; 176 177 ASSERT(RW_LOCK_HELD(&tip->ti_rwlock)); 178 for (tsp = list_head(&tip->ti_socklist); tsp != NULL; 179 tsp = list_next(&tip->ti_socklist, tsp)) { 180 if (tsp->ts_link != NULL && tsp->ts_link->bl_linkid == linkid) { 181 ASSERT(tsp->ts_link->bl_mh != NULL); 182 ASSERT(!(tsp->ts_flags & TSF_SHUTDOWN)); 183 atomic_inc_uint(&tsp->ts_refs); 184 break; 185 } 186 } 187 return (tsp); 188 } 189 190 /* 191 * TRILL destination forwarding function. Transmits the TRILL data packet 192 * to the next-hop, adjacent RBridge. Consumes passed mblk_t. 193 */ 194 static void 195 trill_dest_fwd(trill_inst_t *tip, mblk_t *fwd_mp, uint16_t adj_nick, 196 boolean_t has_trill_hdr, boolean_t multidest, uint16_t dtnick) 197 { 198 trill_node_t *adj; 199 trill_sock_t *tsock = NULL; 200 trill_header_t *trillhdr; 201 struct ether_header *ethhdr; 202 int ethtype; 203 int ethhdrlen; 204 205 adj = trill_node_lookup(tip, adj_nick); 206 if (adj == NULL || ((tsock = adj->tn_tsp) == NULL)) 207 goto dest_fwd_fail; 208 209 ASSERT(tsock->ts_link != NULL); 210 ASSERT(!(tsock->ts_flags & TSF_SHUTDOWN)); 211 ASSERT(adj->tn_ni != NULL); 212 213 DTRACE_PROBE3(trill__dest__fwd, uint16_t, adj_nick, trill_node_t, 214 adj, trill_sock_t, tsock); 215 216 /* 217 * For broadcast links by using the dest address of 218 * the RBridge to forward the frame should result in 219 * savings. When the link is a bridged LAN or there are 220 * many end stations the frame will not always be flooded. 221 */ 222 fwd_mp = create_trill_header(tsock, fwd_mp, adj->tn_ni->tni_adjsnpa, 223 has_trill_hdr, multidest, tsock->ts_desigvlan, 0); 224 if (fwd_mp == NULL) 225 goto dest_fwd_fail; 226 227 /* LINTED: alignment */ 228 ethhdr = (struct ether_header *)fwd_mp->b_rptr; 229 ethtype = ntohs(ethhdr->ether_type); 230 ASSERT(ethtype == ETHERTYPE_VLAN || ethtype == ETHERTYPE_TRILL); 231 232 /* Pullup Ethernet and TRILL header (w/o TRILL options) */ 233 ethhdrlen = sizeof (struct ether_header) + 234 (ethtype == ETHERTYPE_VLAN ? sizeof (struct ether_vlan_extinfo):0); 235 if (!pullupmsg(fwd_mp, ethhdrlen + sizeof (trill_header_t))) 236 goto dest_fwd_fail; 237 /* LINTED: alignment */ 238 trillhdr = (struct trill_header *)(fwd_mp->b_rptr + ethhdrlen); 239 240 /* Update TRILL header with ingress and egress nicks for new frames */ 241 if (!has_trill_hdr) { 242 /* We are creating a new TRILL frame */ 243 trillhdr->th_egressnick = (multidest ? dtnick:adj_nick); 244 rw_enter(&tip->ti_rwlock, RW_READER); 245 trillhdr->th_ingressnick = tip->ti_nick; 246 rw_exit(&tip->ti_rwlock); 247 if (!VALID_NICK(trillhdr->th_ingressnick)) 248 goto dest_fwd_fail; 249 } 250 251 /* Set hop count and update header in packet */ 252 ASSERT(trillhdr->th_hopcount != 0); 253 trillhdr->th_hopcount--; 254 255 /* Clear checksum flag and transmit frame on the link */ 256 DB_CKSUMFLAGS(fwd_mp) = 0; 257 DTRACE_PROBE1(trill__dest__fwd__tx, trill_header_t *, &trillhdr); 258 fwd_mp = bridge_trill_output(tsock->ts_link, fwd_mp); 259 if (fwd_mp == NULL) { 260 KSPINCR(tks_sent); 261 KSPINCR(tks_forward); 262 } else { 263 freemsg(fwd_mp); 264 KSPINCR(tks_drops); 265 } 266 trill_node_unref(tip, adj); 267 return; 268 269 dest_fwd_fail: 270 if (adj != NULL) 271 trill_node_unref(tip, adj); 272 if (tsock != NULL) 273 KSPINCR(tks_drops); 274 freemsg(fwd_mp); 275 } 276 277 /* 278 * TRILL multi-destination forwarding. Transmits the packet to the adjacencies 279 * on the distribution tree determined by the egress nick. Source addr (saddr) 280 * is NULL for new TRILL packets originating from us. 281 */ 282 static void 283 trill_multidest_fwd(trill_inst_t *tip, mblk_t *mp, uint16_t egressnick, 284 uint16_t ingressnick, boolean_t is_trill_pkt, const uint8_t *saddr, 285 int inner_vlan, boolean_t free_mblk) 286 { 287 int idx; 288 uint16_t adjnick; 289 trill_node_t *dest; 290 trill_node_t *adj; 291 mblk_t *fwd_mp; 292 boolean_t nicksaved = B_FALSE; 293 uint16_t adjnicksaved; 294 295 /* Lookup the egress nick info, this is the DT root */ 296 if ((dest = trill_node_lookup(tip, egressnick)) == NULL) 297 goto fail_multidest_fwd; 298 299 /* Send a copy to all our adjacencies on the DT root */ 300 ASSERT(dest->tn_ni); 301 for (idx = 0; idx < dest->tn_ni->tni_adjcount; idx++) { 302 303 /* Check for a valid adjacency node */ 304 adjnick = TNI_ADJNICK(dest->tn_ni, idx); 305 if (!VALID_NICK(adjnick) || ingressnick == adjnick || 306 ((adj = trill_node_lookup(tip, adjnick)) == NULL)) 307 continue; 308 309 /* Do not forward back to adjacency that sent the pkt to us */ 310 ASSERT(adj->tn_ni != NULL); 311 if ((saddr != NULL) && 312 (memcmp(adj->tn_ni->tni_adjsnpa, saddr, 313 ETHERADDRL) == 0)) { 314 trill_node_unref(tip, adj); 315 continue; 316 } 317 318 /* Check if adj is marked as reaching inner VLAN downstream */ 319 if ((inner_vlan != VLAN_ID_NONE) && 320 !TRILL_VLANISSET(TNI_VLANFILTERMAP(dest->tn_ni, idx), 321 inner_vlan)) { 322 trill_node_unref(tip, adj); 323 DTRACE_PROBE4(trill__multi__dest__fwd__vlanfiltered, 324 uint16_t, adjnick, uint16_t, ingressnick, 325 uint16_t, egressnick, int, inner_vlan); 326 continue; 327 } 328 329 trill_node_unref(tip, adj); 330 331 /* 332 * Save the nick and look ahead to see if we should forward the 333 * frame to more adjacencies. We avoid doing a copy for this 334 * nick and use the passed mblk when we can consume the passed 335 * mblk. 336 */ 337 if (free_mblk && !nicksaved) { 338 adjnicksaved = adjnick; 339 nicksaved = B_TRUE; 340 continue; 341 } 342 343 fwd_mp = copymsg(mp); 344 if (fwd_mp == NULL) 345 break; 346 DTRACE_PROBE2(trill__multi__dest__fwd, uint16_t, 347 adjnick, uint16_t, ingressnick); 348 trill_dest_fwd(tip, fwd_mp, adjnick, is_trill_pkt, 349 B_TRUE, egressnick); 350 } 351 trill_node_unref(tip, dest); 352 353 if (nicksaved) { 354 ASSERT(free_mblk); 355 DTRACE_PROBE2(trill__multi__dest__fwd, uint16_t, 356 adjnicksaved, uint16_t, ingressnick); 357 trill_dest_fwd(tip, mp, adjnicksaved, is_trill_pkt, 358 B_TRUE, egressnick); 359 return; 360 } 361 362 fail_multidest_fwd: 363 DTRACE_PROBE2(trill__multi__dest__fwd__fail, uint16_t, 364 egressnick, uint16_t, ingressnick); 365 if (free_mblk) { 366 freemsg(mp); 367 } 368 } 369 370 /* 371 * TRILL data receive function. Forwards the received frame if necessary 372 * and also determines if the received frame should be consumed locally. 373 * Consumes passed mblk. 374 */ 375 static void 376 trill_recv(trill_sock_t *tsock, mblk_t *mp, const uint8_t *mpsaddr) 377 { 378 trill_header_t *trillhdr; 379 trill_node_t *dest = NULL; 380 trill_node_t *source = NULL; 381 trill_node_t *adj; 382 uint16_t ournick, adjnick, treeroot; 383 struct ether_header *ethhdr; 384 trill_inst_t *tip = tsock->ts_tip; 385 uint8_t srcaddr[ETHERADDRL]; 386 size_t trillhdrlen; 387 int inner_vlan = VLAN_ID_NONE; 388 int tci; 389 int idx; 390 size_t min_size; 391 392 /* Copy Ethernet source address before modifying packet */ 393 (void) memcpy(srcaddr, mpsaddr, ETHERADDRL); 394 395 /* Pull up TRILL header if necessary. */ 396 min_size = sizeof (trill_header_t); 397 if ((MBLKL(mp) < min_size || 398 !IS_P2ALIGNED(mp->b_rptr, TRILL_HDR_ALIGN)) && 399 !pullupmsg(mp, min_size)) 400 goto fail; 401 402 /* LINTED: alignment */ 403 trillhdr = (trill_header_t *)mp->b_rptr; 404 if (trillhdr->th_version != TRILL_PROTOCOL_VERS) { 405 DTRACE_PROBE1(trill__recv__wrongversion, 406 trill_header_t *, trillhdr); 407 goto fail; 408 } 409 410 /* Drop if unknown or invalid nickname */ 411 if (!VALID_NICK(trillhdr->th_egressnick) || 412 !VALID_NICK(trillhdr->th_ingressnick)) { 413 DTRACE_PROBE1(trill__recv__invalidnick, 414 trill_header_t *, trillhdr); 415 goto fail; 416 } 417 418 rw_enter(&tip->ti_rwlock, RW_READER); 419 ournick = tip->ti_nick; 420 treeroot = tip->ti_treeroot; 421 rw_exit(&tip->ti_rwlock); 422 /* Drop if we received a packet with our nick as ingress */ 423 if (trillhdr->th_ingressnick == ournick) 424 goto fail; 425 426 /* Re-pull any TRILL options and inner Ethernet header */ 427 min_size += GET_TRILL_OPTS_LEN(trillhdr) * sizeof (uint32_t) + 428 sizeof (struct ether_header); 429 if (MBLKL(mp) < min_size) { 430 if (!pullupmsg(mp, min_size)) 431 goto fail; 432 /* LINTED: alignment */ 433 trillhdr = (trill_header_t *)mp->b_rptr; 434 } 435 trillhdrlen = sizeof (trill_header_t) + 436 (GET_TRILL_OPTS_LEN(trillhdr) * sizeof (uint32_t)); 437 438 /* 439 * Get the inner Ethernet header, plus the inner VLAN header if there 440 * is one. 441 */ 442 /* LINTED: alignment */ 443 ethhdr = (struct ether_header *)(mp->b_rptr + trillhdrlen); 444 if (ethhdr->ether_type == htons(ETHERTYPE_VLAN)) { 445 min_size += sizeof (struct ether_vlan_extinfo); 446 if (MBLKL(mp) < min_size) { 447 if (!pullupmsg(mp, min_size)) 448 goto fail; 449 /* LINTED: alignment */ 450 trillhdr = (trill_header_t *)mp->b_rptr; 451 /* LINTED: alignment */ 452 ethhdr = (struct ether_header *)(mp->b_rptr + 453 trillhdrlen); 454 } 455 456 tci = ntohs(((struct ether_vlan_header *)ethhdr)->ether_tci); 457 inner_vlan = VLAN_ID(tci); 458 } 459 460 /* Known/single destination forwarding. */ 461 if (!trillhdr->th_multidest) { 462 463 /* Inner MacDA must be unicast */ 464 if (ethhdr->ether_dhost.ether_addr_octet[0] & 1) 465 goto fail; 466 467 /* Ingress and Egress nicks must be different */ 468 if (trillhdr->th_egressnick == trillhdr->th_ingressnick) 469 goto fail; 470 471 DTRACE_PROBE1(trill__recv__singledest, 472 trill_header_t *, trillhdr); 473 if (trillhdr->th_egressnick == ournick) { 474 mp->b_rptr += trillhdrlen; 475 trill_recv_local(tsock, mp, trillhdr->th_ingressnick); 476 } else if (trillhdr->th_hopcount > 0) { 477 trill_dest_fwd(tip, mp, trillhdr->th_egressnick, 478 B_TRUE, B_FALSE, RBRIDGE_NICKNAME_NONE); 479 } else { 480 goto fail; 481 } 482 return; 483 } 484 485 /* 486 * Multi-destination frame: perform checks verifying we have 487 * received a valid multi-destination frame before receiving the 488 * frame locally and forwarding the frame to other RBridges. 489 * 490 * Check if we received this multi-destination frame on a 491 * adjacency in the distribution tree indicated by the frame's 492 * egress nickname. 493 */ 494 if ((dest = trill_node_lookup(tip, trillhdr->th_egressnick)) == NULL) 495 goto fail; 496 for (idx = 0; idx < dest->tn_ni->tni_adjcount; idx++) { 497 adjnick = TNI_ADJNICK(dest->tn_ni, idx); 498 if ((adj = trill_node_lookup(tip, adjnick)) == NULL) 499 continue; 500 if (memcmp(adj->tn_ni->tni_adjsnpa, srcaddr, ETHERADDRL) == 0) { 501 trill_node_unref(tip, adj); 502 break; 503 } 504 trill_node_unref(tip, adj); 505 } 506 507 if (idx >= dest->tn_ni->tni_adjcount) { 508 DTRACE_PROBE2(trill__recv__multidest__adjcheckfail, 509 trill_header_t *, trillhdr, trill_node_t *, dest); 510 goto fail; 511 } 512 513 /* 514 * Reverse path forwarding check. Check if the ingress RBridge 515 * that has forwarded the frame advertised the use of the 516 * distribution tree specified in the egress nick. 517 */ 518 if ((source = trill_node_lookup(tip, trillhdr->th_ingressnick)) == NULL) 519 goto fail; 520 for (idx = 0; idx < source->tn_ni->tni_dtrootcount; idx++) { 521 if (TNI_DTROOTNICK(source->tn_ni, idx) == 522 trillhdr->th_egressnick) 523 break; 524 } 525 526 if (idx >= source->tn_ni->tni_dtrootcount) { 527 /* 528 * Allow receipt of forwarded frame with the highest 529 * tree root RBridge as the egress RBridge when the 530 * ingress RBridge has not advertised the use of any 531 * distribution trees. 532 */ 533 if (source->tn_ni->tni_dtrootcount != 0 || 534 trillhdr->th_egressnick != treeroot) { 535 DTRACE_PROBE3( 536 trill__recv__multidest__rpfcheckfail, 537 trill_header_t *, trillhdr, trill_node_t *, 538 source, trill_inst_t *, tip); 539 goto fail; 540 } 541 } 542 543 /* Check hop count before doing any forwarding */ 544 if (trillhdr->th_hopcount == 0) 545 goto fail; 546 547 /* Forward frame using the distribution tree specified by egress nick */ 548 DTRACE_PROBE2(trill__recv__multidest, trill_header_t *, 549 trillhdr, trill_node_t *, source); 550 trill_node_unref(tip, source); 551 trill_node_unref(tip, dest); 552 553 /* Tell forwarding not to free if we're the link forwarder. */ 554 trill_multidest_fwd(tip, mp, trillhdr->th_egressnick, 555 trillhdr->th_ingressnick, B_TRUE, srcaddr, inner_vlan, 556 B_FALSE); 557 558 /* 559 * Send de-capsulated frame locally if we are the link forwarder (also 560 * does bridge learning). 561 */ 562 mp->b_rptr += trillhdrlen; 563 trill_recv_local(tsock, mp, trillhdr->th_ingressnick); 564 KSPINCR(tks_recv); 565 return; 566 567 fail: 568 DTRACE_PROBE2(trill__recv__multidest__fail, mblk_t *, mp, 569 trill_sock_t *, tsock); 570 if (dest != NULL) 571 trill_node_unref(tip, dest); 572 if (source != NULL) 573 trill_node_unref(tip, source); 574 freemsg(mp); 575 KSPINCR(tks_drops); 576 } 577 578 static void 579 trill_stop_recv(trill_sock_t *tsock) 580 { 581 mutex_enter(&tsock->ts_socklock); 582 stop_retry: 583 if (tsock->ts_state == TS_UNBND || tsock->ts_link == NULL) { 584 mutex_exit(&tsock->ts_socklock); 585 return; 586 } 587 588 /* 589 * If another thread is closing the socket then wait. Our callers 590 * expect us to return only after the socket is closed. 591 */ 592 if (tsock->ts_flags & TSF_CLOSEWAIT) { 593 cv_wait(&tsock->ts_sockclosewait, &tsock->ts_socklock); 594 goto stop_retry; 595 } 596 597 /* 598 * Set state and flags to block new bind or close calls 599 * while we close the socket. 600 */ 601 tsock->ts_flags |= TSF_CLOSEWAIT; 602 603 /* Wait until all AF_TRILL socket transmit operations are done */ 604 while (tsock->ts_sockthreadcount > 0) 605 cv_wait(&tsock->ts_sockthreadwait, &tsock->ts_socklock); 606 607 /* 608 * We are guaranteed to be the only thread closing on the 609 * socket while the TSF_CLOSEWAIT flag is set, all others cv_wait 610 * for us to finish. 611 */ 612 ASSERT(tsock->ts_link != NULL); 613 if (tsock->ts_ksp != NULL) 614 kstat_delete(tsock->ts_ksp); 615 616 /* 617 * Release lock before bridge_trill_lnunref to prevent deadlock 618 * between trill_ctrl_input thread waiting to acquire ts_socklock 619 * and bridge_trill_lnunref waiting for the trill thread to finish. 620 */ 621 mutex_exit(&tsock->ts_socklock); 622 623 /* 624 * Release TRILL link reference from Bridging. On return from 625 * bridge_trill_lnunref we can be sure there are no active TRILL data 626 * threads for this link. 627 */ 628 bridge_trill_lnunref(tsock->ts_link); 629 630 /* Set socket as unbound & wakeup threads waiting for socket to close */ 631 mutex_enter(&tsock->ts_socklock); 632 ASSERT(tsock->ts_link != NULL); 633 tsock->ts_link = NULL; 634 tsock->ts_state = TS_UNBND; 635 tsock->ts_flags &= ~TSF_CLOSEWAIT; 636 cv_broadcast(&tsock->ts_sockclosewait); 637 mutex_exit(&tsock->ts_socklock); 638 } 639 640 static int 641 trill_start_recv(trill_sock_t *tsock, const struct sockaddr *sa, socklen_t len) 642 { 643 struct sockaddr_dl *lladdr = (struct sockaddr_dl *)sa; 644 datalink_id_t linkid; 645 int err = 0; 646 647 if (len != sizeof (*lladdr)) 648 return (EINVAL); 649 650 mutex_enter(&tsock->ts_socklock); 651 if (tsock->ts_tip == NULL || tsock->ts_state != TS_UNBND) { 652 err = EINVAL; 653 goto bind_error; 654 } 655 656 if (tsock->ts_flags & TSF_CLOSEWAIT || tsock->ts_link != NULL) { 657 err = EBUSY; 658 goto bind_error; 659 } 660 661 (void) memcpy(&(tsock->ts_lladdr), lladdr, 662 sizeof (struct sockaddr_dl)); 663 (void) memcpy(&linkid, tsock->ts_lladdr.sdl_data, 664 sizeof (datalink_id_t)); 665 666 tsock->ts_link = bridge_trill_lnref(tsock->ts_tip->ti_binst, 667 linkid, tsock); 668 if (tsock->ts_link == NULL) { 669 err = EINVAL; 670 goto bind_error; 671 } 672 673 trill_kstats_init(tsock, tsock->ts_tip->ti_bridgename); 674 tsock->ts_state = TS_IDLE; 675 676 bind_error: 677 mutex_exit(&tsock->ts_socklock); 678 return (err); 679 } 680 681 static int 682 trill_do_unbind(trill_sock_t *tsock) 683 { 684 /* If a bind has not been done, we can't unbind. */ 685 if (tsock->ts_state != TS_IDLE) 686 return (EINVAL); 687 688 trill_stop_recv(tsock); 689 return (0); 690 } 691 692 static void 693 trill_instance_unref(trill_inst_t *tip) 694 { 695 rw_enter(&trill_inst_rwlock, RW_WRITER); 696 rw_enter(&tip->ti_rwlock, RW_WRITER); 697 if (atomic_dec_uint_nv(&tip->ti_refs) == 0) { 698 list_remove(&trill_inst_list, tip); 699 rw_exit(&tip->ti_rwlock); 700 rw_exit(&trill_inst_rwlock); 701 if (tip->ti_binst != NULL) 702 bridge_trill_brunref(tip->ti_binst); 703 list_destroy(&tip->ti_socklist); 704 rw_destroy(&tip->ti_rwlock); 705 kmem_free(tip, sizeof (*tip)); 706 } else { 707 rw_exit(&tip->ti_rwlock); 708 rw_exit(&trill_inst_rwlock); 709 } 710 } 711 712 /* 713 * This is called when the bridge module receives a TRILL-encapsulated packet 714 * on a given link or a packet identified as "TRILL control." We must verify 715 * that it's for us (it almost certainly will be), and then either decapsulate 716 * (if it's to our nickname), forward (if it's to someone else), or send up one 717 * of the sockets (if it's control traffic). 718 * 719 * Sadly, on Ethernet, the control traffic is identified by Outer.MacDA, and 720 * not by TRILL header information. 721 */ 722 static void 723 trill_recv_pkt_cb(void *lptr, bridge_link_t *blp, mac_resource_handle_t rsrc, 724 mblk_t *mp, mac_header_info_t *hdr_info) 725 { 726 trill_sock_t *tsock = lptr; 727 728 _NOTE(ARGUNUSED(rsrc)); 729 730 ASSERT(tsock->ts_tip != NULL); 731 ASSERT(tsock->ts_link != NULL); 732 ASSERT(!(tsock->ts_flags & TSF_SHUTDOWN)); 733 734 /* 735 * Only receive packet if the source address is not multicast (which is 736 * bogus). 737 */ 738 if (hdr_info->mhi_saddr[0] & 1) 739 goto discard; 740 741 /* 742 * Check if this is our own packet reflected back. It should not be. 743 */ 744 if (bcmp(hdr_info->mhi_saddr, blp->bl_local_mac, ETHERADDRL) == 0) 745 goto discard; 746 747 /* Only receive unicast packet if addressed to us */ 748 if (hdr_info->mhi_dsttype == MAC_ADDRTYPE_UNICAST && 749 bcmp(hdr_info->mhi_daddr, blp->bl_local_mac, ETHERADDRL) != 0) 750 goto discard; 751 752 if (hdr_info->mhi_bindsap == ETHERTYPE_TRILL) { 753 /* TRILL data packets */ 754 trill_recv(tsock, mp, hdr_info->mhi_saddr); 755 } else { 756 /* Design constraint for cheap IS-IS/BPDU comparison */ 757 ASSERT(all_isis_rbridges[4] != bridge_group_address[4]); 758 /* Send received control packet upstream */ 759 trill_ctrl_input(tsock, mp, hdr_info->mhi_saddr, 760 hdr_info->mhi_daddr[4] == all_isis_rbridges[4] ? 761 hdr_info->mhi_tci : TRILL_TCI_BPDU); 762 } 763 764 return; 765 766 discard: 767 freemsg(mp); 768 KSPINCR(tks_drops); 769 } 770 771 /* 772 * This is called when the bridge module discovers that the destination address 773 * for a packet is not local -- it's through some remote node. We must verify 774 * that the remote node isn't our nickname (it shouldn't be), add a TRILL 775 * header, and then use the IS-IS data to determine which link and which 776 * next-hop RBridge should be used for output. We then transmit on that link. 777 * 778 * The egress_nick is RBRIDGE_NICKNAME_NONE for the "unknown destination" case. 779 */ 780 static void 781 trill_encap_pkt_cb(void *lptr, bridge_link_t *blp, mac_header_info_t *hdr_info, 782 mblk_t *mp, uint16_t egress_nick) 783 { 784 uint16_t ournick; 785 uint16_t dtnick; 786 trill_node_t *self = NULL; 787 trill_sock_t *tsock = lptr; 788 trill_inst_t *tip = tsock->ts_tip; 789 int vlan = VLAN_ID_NONE; 790 791 _NOTE(ARGUNUSED(blp)); 792 ASSERT(hdr_info->mhi_bindsap != ETHERTYPE_TRILL); 793 794 /* egress_nick = RBRIDGE_NICKNAME_NONE is valid */ 795 if (egress_nick != RBRIDGE_NICKNAME_NONE && !VALID_NICK(egress_nick)) 796 goto discard; 797 798 /* Check if our own nick is valid before we do any forwarding */ 799 rw_enter(&tip->ti_rwlock, RW_READER); 800 ournick = tip->ti_nick; 801 dtnick = tip->ti_treeroot; 802 rw_exit(&tip->ti_rwlock); 803 if (!VALID_NICK(ournick)) 804 goto discard; 805 806 /* 807 * For Multi-Destination forwarding determine our choice of 808 * root distribution tree. If we didn't choose a distribution 809 * tree (dtroots_count=0) then we use the highest priority tree 810 * root (t_treeroot) else we drop the packet without forwarding. 811 */ 812 if (egress_nick == RBRIDGE_NICKNAME_NONE) { 813 if ((self = trill_node_lookup(tip, ournick)) == NULL) 814 goto discard; 815 816 /* 817 * Use the first DT configured for now. In future we 818 * should have DT selection code here. 819 */ 820 if (self->tn_ni->tni_dtrootcount > 0) { 821 dtnick = TNI_DTROOTNICK(self->tn_ni, 0); 822 } 823 824 trill_node_unref(tip, self); 825 if (!VALID_NICK(dtnick)) { 826 DTRACE_PROBE(trill__fwd__packet__nodtroot); 827 goto discard; 828 } 829 } 830 831 /* 832 * Retrieve VLAN ID of the native frame used for VLAN 833 * pruning of multi-destination frames. 834 */ 835 if (hdr_info->mhi_istagged) { 836 vlan = VLAN_ID(hdr_info->mhi_tci); 837 } 838 839 DTRACE_PROBE2(trill__fwd__packet, mac_header_info_t *, hdr_info, 840 uint16_t, egress_nick); 841 if (egress_nick == RBRIDGE_NICKNAME_NONE) { 842 trill_multidest_fwd(tip, mp, dtnick, 843 ournick, B_FALSE, NULL, vlan, B_TRUE); 844 } else { 845 trill_dest_fwd(tip, mp, egress_nick, B_FALSE, B_FALSE, 846 RBRIDGE_NICKNAME_NONE); 847 } 848 KSPINCR(tks_encap); 849 return; 850 851 discard: 852 freemsg(mp); 853 } 854 855 /* 856 * This is called when the bridge module has completely torn down a bridge 857 * instance and all of the attached links. We need to make the TRILL instance 858 * go away at this point. 859 */ 860 static void 861 trill_br_dstr_cb(void *bptr, bridge_inst_t *bip) 862 { 863 trill_inst_t *tip = bptr; 864 865 _NOTE(ARGUNUSED(bip)); 866 rw_enter(&tip->ti_rwlock, RW_WRITER); 867 if (tip->ti_binst != NULL) 868 bridge_trill_brunref(tip->ti_binst); 869 tip->ti_binst = NULL; 870 rw_exit(&tip->ti_rwlock); 871 } 872 873 /* 874 * This is called when the bridge module is tearing down a link, but before the 875 * actual tear-down starts. When this function returns, we must make sure that 876 * we will not initiate any new transmits on this link. 877 */ 878 static void 879 trill_ln_dstr_cb(void *lptr, bridge_link_t *blp) 880 { 881 trill_sock_t *tsock = lptr; 882 883 _NOTE(ARGUNUSED(blp)); 884 trill_stop_recv(tsock); 885 } 886 887 static void 888 trill_init(void) 889 { 890 list_create(&trill_inst_list, sizeof (trill_inst_t), 891 offsetof(trill_inst_t, ti_instnode)); 892 rw_init(&trill_inst_rwlock, NULL, RW_DRIVER, NULL); 893 bridge_trill_register_cb(trill_recv_pkt_cb, trill_encap_pkt_cb, 894 trill_br_dstr_cb, trill_ln_dstr_cb); 895 } 896 897 static void 898 trill_fini(void) 899 { 900 bridge_trill_register_cb(NULL, NULL, NULL, NULL); 901 rw_destroy(&trill_inst_rwlock); 902 list_destroy(&trill_inst_list); 903 } 904 905 /* Loadable module configuration entry points */ 906 int 907 _init(void) 908 { 909 int rc; 910 911 trill_init(); 912 if ((rc = mod_install(&ml)) != 0) 913 trill_fini(); 914 return (rc); 915 } 916 917 int 918 _info(struct modinfo *modinfop) 919 { 920 return (mod_info(&ml, modinfop)); 921 } 922 923 int 924 _fini(void) 925 { 926 int rc; 927 928 rw_enter(&trill_inst_rwlock, RW_READER); 929 rc = list_is_empty(&trill_inst_list) ? 0 : EBUSY; 930 rw_exit(&trill_inst_rwlock); 931 if (rc == 0 && ((rc = mod_remove(&ml)) == 0)) 932 trill_fini(); 933 return (rc); 934 } 935 936 static void 937 trill_kstats_init(trill_sock_t *tsock, const char *bname) 938 { 939 int i; 940 char kstatname[KSTAT_STRLEN]; 941 kstat_named_t *knt; 942 static const char *sock_kstats_list[] = { TRILL_KSSOCK_NAMES }; 943 char link_name[MAXNAMELEN]; 944 int num; 945 int err; 946 947 bzero(link_name, sizeof (link_name)); 948 if ((err = dls_mgmt_get_linkinfo(tsock->ts_link->bl_linkid, link_name, 949 NULL, NULL, NULL)) != 0) { 950 cmn_err(CE_WARN, "%s: trill_kstats_init: error %d retrieving" 951 " linkinfo for linkid:%d", "trill", err, 952 tsock->ts_link->bl_linkid); 953 return; 954 } 955 956 bzero(kstatname, sizeof (kstatname)); 957 (void) snprintf(kstatname, sizeof (kstatname), "%s-%s", 958 bname, link_name); 959 960 num = sizeof (sock_kstats_list) / sizeof (*sock_kstats_list); 961 for (i = 0; i < num; i++) { 962 knt = (kstat_named_t *)&(tsock->ts_kstats); 963 kstat_named_init(&knt[i], sock_kstats_list[i], 964 KSTAT_DATA_UINT64); 965 } 966 967 tsock->ts_ksp = kstat_create_zone("trill", 0, kstatname, "sock", 968 KSTAT_TYPE_NAMED, num, KSTAT_FLAG_VIRTUAL, GLOBAL_ZONEID); 969 if (tsock->ts_ksp != NULL) { 970 tsock->ts_ksp->ks_data = &tsock->ts_kstats; 971 kstat_install(tsock->ts_ksp); 972 } 973 } 974 975 static trill_sock_t * 976 trill_do_open(int flags) 977 { 978 trill_sock_t *tsock; 979 int kmflag = ((flags & SOCKET_NOSLEEP)) ? KM_NOSLEEP:KM_SLEEP; 980 981 tsock = kmem_zalloc(sizeof (trill_sock_t), kmflag); 982 if (tsock != NULL) { 983 tsock->ts_state = TS_UNBND; 984 tsock->ts_refs++; 985 mutex_init(&tsock->ts_socklock, NULL, MUTEX_DRIVER, NULL); 986 cv_init(&tsock->ts_sockthreadwait, NULL, CV_DRIVER, NULL); 987 cv_init(&tsock->ts_sockclosewait, NULL, CV_DRIVER, NULL); 988 } 989 return (tsock); 990 } 991 992 static int 993 trill_find_bridge(trill_sock_t *tsock, const char *bname, boolean_t can_create) 994 { 995 trill_inst_t *tip, *newtip = NULL; 996 997 /* Allocate some memory (speculatively) before taking locks */ 998 if (can_create) 999 newtip = kmem_zalloc(sizeof (*tip), KM_NOSLEEP); 1000 1001 rw_enter(&trill_inst_rwlock, RW_WRITER); 1002 for (tip = list_head(&trill_inst_list); tip != NULL; 1003 tip = list_next(&trill_inst_list, tip)) { 1004 if (strcmp(tip->ti_bridgename, bname) == 0) 1005 break; 1006 } 1007 if (tip == NULL) { 1008 if (!can_create || newtip == NULL) { 1009 rw_exit(&trill_inst_rwlock); 1010 return (can_create ? ENOMEM : ENOENT); 1011 } 1012 1013 tip = newtip; 1014 newtip = NULL; 1015 (void) strcpy(tip->ti_bridgename, bname); 1016 1017 /* Register TRILL instance with bridging */ 1018 tip->ti_binst = bridge_trill_brref(bname, tip); 1019 if (tip->ti_binst == NULL) { 1020 rw_exit(&trill_inst_rwlock); 1021 kmem_free(tip, sizeof (*tip)); 1022 return (ENOENT); 1023 } 1024 1025 rw_init(&tip->ti_rwlock, NULL, RW_DRIVER, NULL); 1026 list_create(&tip->ti_socklist, sizeof (trill_sock_t), 1027 offsetof(trill_sock_t, ts_socklistnode)); 1028 list_insert_tail(&trill_inst_list, tip); 1029 } 1030 atomic_inc_uint(&tip->ti_refs); 1031 rw_exit(&trill_inst_rwlock); 1032 1033 /* If we didn't need the preallocated memory, then discard now. */ 1034 if (newtip != NULL) 1035 kmem_free(newtip, sizeof (*newtip)); 1036 1037 rw_enter(&tip->ti_rwlock, RW_WRITER); 1038 list_insert_tail(&(tip->ti_socklist), tsock); 1039 tsock->ts_tip = tip; 1040 rw_exit(&tip->ti_rwlock); 1041 return (0); 1042 } 1043 1044 static void 1045 trill_clear_bridge(trill_sock_t *tsock) 1046 { 1047 trill_inst_t *tip; 1048 1049 if ((tip = tsock->ts_tip) == NULL) 1050 return; 1051 rw_enter(&tip->ti_rwlock, RW_WRITER); 1052 list_remove(&tip->ti_socklist, tsock); 1053 if (list_is_empty(&tip->ti_socklist)) 1054 trill_del_all(tip, B_TRUE); 1055 rw_exit(&tip->ti_rwlock); 1056 } 1057 1058 static void 1059 trill_sock_unref(trill_sock_t *tsock) 1060 { 1061 if (atomic_dec_uint_nv(&tsock->ts_refs) == 0) { 1062 mutex_destroy(&tsock->ts_socklock); 1063 cv_destroy(&tsock->ts_sockthreadwait); 1064 cv_destroy(&tsock->ts_sockclosewait); 1065 kmem_free(tsock, sizeof (trill_sock_t)); 1066 } 1067 } 1068 1069 static void 1070 trill_do_close(trill_sock_t *tsock) 1071 { 1072 trill_inst_t *tip; 1073 1074 tip = tsock->ts_tip; 1075 trill_stop_recv(tsock); 1076 /* Remove socket from TRILL instance socket list */ 1077 trill_clear_bridge(tsock); 1078 tsock->ts_flags |= TSF_SHUTDOWN; 1079 trill_sock_unref(tsock); 1080 if (tip != NULL) 1081 trill_instance_unref(tip); 1082 } 1083 1084 static void 1085 trill_del_all(trill_inst_t *tip, boolean_t lockheld) 1086 { 1087 int i; 1088 1089 if (!lockheld) 1090 rw_enter(&tip->ti_rwlock, RW_WRITER); 1091 for (i = RBRIDGE_NICKNAME_MIN; i < RBRIDGE_NICKNAME_MAX; i++) { 1092 if (tip->ti_nodes[i] != NULL) 1093 (void) trill_del_nick(tip, i, B_TRUE); 1094 } 1095 if (!lockheld) 1096 rw_exit(&tip->ti_rwlock); 1097 } 1098 1099 static void 1100 trill_node_free(trill_node_t *nick_entry) 1101 { 1102 trill_nickinfo_t *tni; 1103 1104 tni = nick_entry->tn_ni; 1105 kmem_free(tni, TNI_TOTALSIZE(tni)); 1106 kmem_free(nick_entry, sizeof (trill_node_t)); 1107 } 1108 1109 static void 1110 trill_node_unref(trill_inst_t *tip, trill_node_t *tnp) 1111 { 1112 if (atomic_dec_uint_nv(&tnp->tn_refs) == 0) { 1113 if (tnp->tn_tsp != NULL) 1114 trill_sock_unref(tnp->tn_tsp); 1115 trill_node_free(tnp); 1116 atomic_dec_uint(&tip->ti_nodecount); 1117 } 1118 } 1119 1120 static trill_node_t * 1121 trill_node_lookup(trill_inst_t *tip, uint16_t nick) 1122 { 1123 trill_node_t *nick_entry; 1124 1125 if (!VALID_NICK(nick)) 1126 return (NULL); 1127 rw_enter(&tip->ti_rwlock, RW_READER); 1128 nick_entry = tip->ti_nodes[nick]; 1129 if (nick_entry != NULL) { 1130 atomic_inc_uint(&nick_entry->tn_refs); 1131 } 1132 rw_exit(&tip->ti_rwlock); 1133 return (nick_entry); 1134 } 1135 1136 static int 1137 trill_del_nick(trill_inst_t *tip, uint16_t nick, boolean_t lockheld) 1138 { 1139 trill_node_t *nick_entry; 1140 int rc = ENOENT; 1141 1142 if (!lockheld) 1143 rw_enter(&tip->ti_rwlock, RW_WRITER); 1144 if (VALID_NICK(nick)) { 1145 nick_entry = tip->ti_nodes[nick]; 1146 if (nick_entry != NULL) { 1147 trill_node_unref(tip, nick_entry); 1148 tip->ti_nodes[nick] = NULL; 1149 rc = 0; 1150 } 1151 } 1152 if (!lockheld) 1153 rw_exit(&tip->ti_rwlock); 1154 return (rc); 1155 } 1156 1157 static int 1158 trill_add_nick(trill_inst_t *tip, void *arg, boolean_t self, int mode) 1159 { 1160 uint16_t nick; 1161 int size; 1162 trill_node_t *tnode; 1163 trill_nickinfo_t tnihdr; 1164 1165 /* First make sure we have at least the header available */ 1166 if (ddi_copyin(arg, &tnihdr, sizeof (trill_nickinfo_t), mode) != 0) 1167 return (EFAULT); 1168 1169 nick = tnihdr.tni_nick; 1170 if (!VALID_NICK(nick)) { 1171 DTRACE_PROBE1(trill__add__nick__bad, trill_nickinfo_t *, 1172 &tnihdr); 1173 return (EINVAL); 1174 } 1175 1176 size = TNI_TOTALSIZE(&tnihdr); 1177 if (size > TNI_MAXSIZE) 1178 return (EINVAL); 1179 tnode = kmem_zalloc(sizeof (trill_node_t), KM_SLEEP); 1180 tnode->tn_ni = kmem_zalloc(size, KM_SLEEP); 1181 if (ddi_copyin(arg, tnode->tn_ni, size, mode) != 0) { 1182 kmem_free(tnode->tn_ni, size); 1183 kmem_free(tnode, sizeof (trill_node_t)); 1184 return (EFAULT); 1185 } 1186 1187 tnode->tn_refs++; 1188 rw_enter(&tip->ti_rwlock, RW_WRITER); 1189 if (tip->ti_nodes[nick] != NULL) 1190 (void) trill_del_nick(tip, nick, B_TRUE); 1191 1192 if (self) { 1193 tip->ti_nick = nick; 1194 } else { 1195 tnode->tn_tsp = find_trill_link(tip, 1196 tnode->tn_ni->tni_linkid); 1197 } 1198 DTRACE_PROBE2(trill__add__nick, trill_node_t *, tnode, 1199 uint16_t, nick); 1200 tip->ti_nodes[nick] = tnode; 1201 tip->ti_nodecount++; 1202 rw_exit(&tip->ti_rwlock); 1203 return (0); 1204 } 1205 1206 static int 1207 trill_do_ioctl(trill_sock_t *tsock, int cmd, void *arg, int mode) 1208 { 1209 int error = 0; 1210 trill_inst_t *tip = tsock->ts_tip; 1211 1212 switch (cmd) { 1213 case TRILL_DESIGVLAN: { 1214 uint16_t desigvlan; 1215 1216 if (ddi_copyin(arg, &desigvlan, sizeof (desigvlan), mode) != 0) 1217 return (EFAULT); 1218 tsock->ts_desigvlan = desigvlan; 1219 break; 1220 } 1221 case TRILL_VLANFWDER: { 1222 uint8_t vlans[TRILL_VLANS_ARRSIZE]; 1223 1224 if (tsock->ts_link == NULL) 1225 return (EINVAL); 1226 if ((ddi_copyin(arg, vlans, sizeof (vlans), mode)) != 0) 1227 return (EFAULT); 1228 bridge_trill_setvlans(tsock->ts_link, vlans); 1229 break; 1230 } 1231 case TRILL_SETNICK: 1232 if (tip == NULL) 1233 return (EINVAL); 1234 error = trill_add_nick(tip, arg, B_TRUE, mode); 1235 break; 1236 1237 case TRILL_GETNICK: 1238 if (tip == NULL) 1239 return (EINVAL); 1240 rw_enter(&tip->ti_rwlock, RW_READER); 1241 if (ddi_copyout(&tip->ti_nick, arg, sizeof (tip->ti_nick), 1242 mode) != 0) 1243 error = EFAULT; 1244 rw_exit(&tip->ti_rwlock); 1245 break; 1246 1247 case TRILL_ADDNICK: 1248 if (tip == NULL) 1249 break; 1250 error = trill_add_nick(tip, arg, B_FALSE, mode); 1251 break; 1252 1253 case TRILL_DELNICK: { 1254 uint16_t delnick; 1255 1256 if (tip == NULL) 1257 break; 1258 if (ddi_copyin(arg, &delnick, sizeof (delnick), mode) != 0) 1259 return (EFAULT); 1260 error = trill_del_nick(tip, delnick, B_FALSE); 1261 break; 1262 } 1263 case TRILL_DELALL: 1264 if (tip == NULL) 1265 break; 1266 trill_del_all(tip, B_FALSE); 1267 break; 1268 1269 case TRILL_TREEROOT: { 1270 uint16_t treeroot; 1271 1272 if (tip == NULL) 1273 break; 1274 if (ddi_copyin(arg, &treeroot, sizeof (treeroot), mode) != 0) 1275 return (EFAULT); 1276 if (!VALID_NICK(treeroot)) 1277 return (EINVAL); 1278 rw_enter(&tip->ti_rwlock, RW_WRITER); 1279 tip->ti_treeroot = treeroot; 1280 rw_exit(&tip->ti_rwlock); 1281 break; 1282 } 1283 case TRILL_HWADDR: 1284 if (tsock->ts_link == NULL) 1285 break; 1286 if (ddi_copyout(tsock->ts_link->bl_local_mac, arg, ETHERADDRL, 1287 mode) != 0) 1288 return (EFAULT); 1289 break; 1290 1291 case TRILL_NEWBRIDGE: { 1292 char bname[MAXLINKNAMELEN]; 1293 1294 if (tsock->ts_state != TS_UNBND) 1295 return (ENOTSUP); 1296 /* ts_tip can only be set once */ 1297 if (tip != NULL) 1298 return (EEXIST); 1299 if (ddi_copyin(arg, bname, sizeof (bname), mode) != 0) 1300 return (EFAULT); 1301 bname[MAXLINKNAMELEN-1] = '\0'; 1302 error = trill_find_bridge(tsock, bname, B_TRUE); 1303 break; 1304 } 1305 1306 case TRILL_GETBRIDGE: { 1307 char bname[MAXLINKNAMELEN]; 1308 1309 /* ts_tip can only be set once */ 1310 if (tip != NULL) 1311 return (EEXIST); 1312 if (ddi_copyin(arg, bname, sizeof (bname), mode) != 0) 1313 return (EFAULT); 1314 bname[MAXLINKNAMELEN - 1] = '\0'; 1315 error = trill_find_bridge(tsock, bname, B_FALSE); 1316 break; 1317 } 1318 1319 case TRILL_LISTNICK: { 1320 trill_listnick_t tln; 1321 trill_node_t *tnp; 1322 trill_nickinfo_t *tnip; 1323 uint16_t nick; 1324 1325 if (tip == NULL) 1326 return (EINVAL); 1327 if (ddi_copyin(arg, &tln, sizeof (tln), mode) != 0) 1328 return (EFAULT); 1329 nick = tln.tln_nick; 1330 if (nick >= RBRIDGE_NICKNAME_MAX) { 1331 error = EINVAL; 1332 break; 1333 } 1334 rw_enter(&tip->ti_rwlock, RW_READER); 1335 while (++nick < RBRIDGE_NICKNAME_MAX) { 1336 if ((tnp = tip->ti_nodes[nick]) != NULL) { 1337 tnip = tnp->tn_ni; 1338 ASSERT(nick == tnip->tni_nick); 1339 tln.tln_nick = nick; 1340 bcopy(tnip->tni_adjsnpa, tln.tln_nexthop, 1341 ETHERADDRL); 1342 tln.tln_ours = nick == tip->ti_nick; 1343 if (tln.tln_ours || tnp->tn_tsp == NULL) { 1344 tln.tln_linkid = 1345 DATALINK_INVALID_LINKID; 1346 } else { 1347 tln.tln_linkid = 1348 tnp->tn_tsp->ts_link->bl_linkid; 1349 } 1350 break; 1351 } 1352 } 1353 rw_exit(&tip->ti_rwlock); 1354 if (nick >= RBRIDGE_NICKNAME_MAX) 1355 bzero(&tln, sizeof (tln)); 1356 if (ddi_copyout(&tln, arg, sizeof (tln), mode) != 0) 1357 return (EFAULT); 1358 break; 1359 } 1360 1361 /* 1362 * Port flush: this is used when we lose AF on a port. We must discard 1363 * all regular bridge forwarding entries on this port with the 1364 * indicated VLAN. 1365 */ 1366 case TRILL_PORTFLUSH: { 1367 uint16_t vlan = (uint16_t)(uintptr_t)arg; 1368 1369 if (tsock->ts_link == NULL) 1370 return (EINVAL); 1371 bridge_trill_flush(tsock->ts_link, vlan, B_FALSE); 1372 break; 1373 } 1374 1375 /* 1376 * Nick flush: this is used when we lose AF on a port. We must discard 1377 * all bridge TRILL forwarding entries on this port with the indicated 1378 * VLAN. 1379 */ 1380 case TRILL_NICKFLUSH: { 1381 uint16_t vlan = (uint16_t)(uintptr_t)arg; 1382 1383 if (tsock->ts_link == NULL) 1384 return (EINVAL); 1385 bridge_trill_flush(tsock->ts_link, vlan, B_TRUE); 1386 break; 1387 } 1388 1389 case TRILL_GETMTU: 1390 if (tsock->ts_link == NULL) 1391 break; 1392 if (ddi_copyout(&tsock->ts_link->bl_maxsdu, arg, 1393 sizeof (uint_t), mode) != 0) 1394 return (EFAULT); 1395 break; 1396 1397 default: 1398 error = ENOTSUP; 1399 break; 1400 } 1401 1402 return (error); 1403 } 1404 1405 /* 1406 * Sends received packet back upstream on the TRILL socket. 1407 * Consumes passed mblk_t. 1408 */ 1409 static void 1410 trill_ctrl_input(trill_sock_t *tsock, mblk_t *mp, const uint8_t *saddr, 1411 uint16_t tci) 1412 { 1413 int udi_size; 1414 mblk_t *mp1; 1415 struct T_unitdata_ind *tudi; 1416 struct sockaddr_dl *sdl; 1417 char *lladdr; 1418 int error; 1419 1420 ASSERT(!(tsock->ts_flags & TSF_SHUTDOWN)); 1421 if (tsock->ts_flow_ctrld) { 1422 freemsg(mp); 1423 KSPINCR(tks_drops); 1424 return; 1425 } 1426 1427 udi_size = sizeof (struct T_unitdata_ind) + 1428 sizeof (struct sockaddr_dl); 1429 mp1 = allocb(udi_size, BPRI_MED); 1430 if (mp1 == NULL) { 1431 freemsg(mp); 1432 KSPINCR(tks_drops); 1433 return; 1434 } 1435 1436 mp1->b_cont = mp; 1437 mp = mp1; 1438 mp->b_datap->db_type = M_PROTO; 1439 /* LINTED: alignment */ 1440 tudi = (struct T_unitdata_ind *)mp->b_rptr; 1441 mp->b_wptr = (uchar_t *)tudi + udi_size; 1442 1443 tudi->PRIM_type = T_UNITDATA_IND; 1444 tudi->SRC_length = sizeof (struct sockaddr_dl); 1445 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 1446 tudi->OPT_length = 0; 1447 tudi->OPT_offset = sizeof (struct T_unitdata_ind) + 1448 sizeof (struct sockaddr_dl); 1449 1450 /* Information of the link on which packet was received. */ 1451 sdl = (struct sockaddr_dl *)&tudi[1]; 1452 (void) memset(sdl, 0, sizeof (struct sockaddr_dl)); 1453 sdl->sdl_family = AF_TRILL; 1454 1455 /* LINTED: alignment */ 1456 *(datalink_id_t *)sdl->sdl_data = tsock->ts_link->bl_linkid; 1457 sdl->sdl_nlen = sizeof (tsock->ts_link->bl_linkid); 1458 1459 lladdr = LLADDR(sdl); 1460 (void) memcpy(lladdr, saddr, ETHERADDRL); 1461 lladdr += ETHERADDRL; 1462 sdl->sdl_alen = ETHERADDRL; 1463 1464 /* LINTED: alignment */ 1465 *(uint16_t *)lladdr = tci; 1466 sdl->sdl_slen = sizeof (uint16_t); 1467 1468 DTRACE_PROBE2(trill__ctrl__input, trill_sock_t *, tsock, mblk_t *, mp); 1469 (*tsock->ts_conn_upcalls->su_recv)(tsock->ts_conn_upper_handle, 1470 mp, msgdsize(mp), 0, &error, NULL); 1471 1472 if (error == ENOSPC) { 1473 mutex_enter(&tsock->ts_socklock); 1474 (*tsock->ts_conn_upcalls->su_recv)(tsock->ts_conn_upper_handle, 1475 NULL, 0, 0, &error, NULL); 1476 if (error == ENOSPC) 1477 tsock->ts_flow_ctrld = B_TRUE; 1478 mutex_exit(&tsock->ts_socklock); 1479 KSPINCR(tks_drops); 1480 } else if (error != 0) { 1481 KSPINCR(tks_drops); 1482 } else { 1483 KSPINCR(tks_recv); 1484 } 1485 1486 DTRACE_PROBE2(trill__ctrl__input__done, trill_sock_t *, 1487 tsock, int, error); 1488 } 1489 1490 /* ARGSUSED */ 1491 static void 1492 trill_activate(sock_lower_handle_t proto_handle, 1493 sock_upper_handle_t sock_handle, sock_upcalls_t *sock_upcalls, 1494 int flags, cred_t *cr) 1495 { 1496 trill_sock_t *tsock = (trill_sock_t *)proto_handle; 1497 struct sock_proto_props sopp; 1498 1499 tsock->ts_conn_upcalls = sock_upcalls; 1500 tsock->ts_conn_upper_handle = sock_handle; 1501 1502 sopp.sopp_flags = SOCKOPT_WROFF | SOCKOPT_RCVHIWAT | 1503 SOCKOPT_RCVLOWAT | SOCKOPT_MAXADDRLEN | SOCKOPT_MAXPSZ | 1504 SOCKOPT_MAXBLK | SOCKOPT_MINPSZ; 1505 sopp.sopp_wroff = 0; 1506 sopp.sopp_rxhiwat = SOCKET_RECVHIWATER; 1507 sopp.sopp_rxlowat = SOCKET_RECVLOWATER; 1508 sopp.sopp_maxaddrlen = sizeof (struct sockaddr_dl); 1509 sopp.sopp_maxpsz = INFPSZ; 1510 sopp.sopp_maxblk = INFPSZ; 1511 sopp.sopp_minpsz = 0; 1512 (*tsock->ts_conn_upcalls->su_set_proto_props)( 1513 tsock->ts_conn_upper_handle, &sopp); 1514 } 1515 1516 /* ARGSUSED */ 1517 static int 1518 trill_close(sock_lower_handle_t proto_handle, int flags, cred_t *cr) 1519 { 1520 trill_sock_t *tsock = (trill_sock_t *)proto_handle; 1521 1522 trill_do_close(tsock); 1523 return (0); 1524 } 1525 1526 /* ARGSUSED */ 1527 static int 1528 trill_bind(sock_lower_handle_t proto_handle, struct sockaddr *sa, 1529 socklen_t len, cred_t *cr) 1530 { 1531 int error; 1532 trill_sock_t *tsock = (trill_sock_t *)proto_handle; 1533 1534 if (sa == NULL) 1535 error = trill_do_unbind(tsock); 1536 else 1537 error = trill_start_recv(tsock, sa, len); 1538 1539 return (error); 1540 } 1541 1542 /* ARGSUSED */ 1543 static int 1544 trill_send(sock_lower_handle_t proto_handle, mblk_t *mp, struct nmsghdr *msg, 1545 cred_t *cr) 1546 { 1547 trill_sock_t *tsock = (trill_sock_t *)proto_handle; 1548 struct sockaddr_dl *laddr; 1549 uint16_t tci; 1550 1551 ASSERT(DB_TYPE(mp) == M_DATA); 1552 ASSERT(!(tsock->ts_flags & TSF_SHUTDOWN)); 1553 1554 if (msg->msg_name == NULL || msg->msg_namelen != sizeof (*laddr)) 1555 goto eproto; 1556 1557 /* 1558 * The name is a datalink_id_t, the address is an Ethernet address, and 1559 * the selector value is the VLAN ID. 1560 */ 1561 laddr = (struct sockaddr_dl *)msg->msg_name; 1562 if (laddr->sdl_nlen != sizeof (datalink_id_t) || 1563 laddr->sdl_alen != ETHERADDRL || 1564 (laddr->sdl_slen != sizeof (tci) && laddr->sdl_slen != 0)) 1565 goto eproto; 1566 1567 mutex_enter(&tsock->ts_socklock); 1568 if (tsock->ts_state != TS_IDLE || tsock->ts_link == NULL) { 1569 mutex_exit(&tsock->ts_socklock); 1570 goto eproto; 1571 } 1572 atomic_inc_uint(&tsock->ts_sockthreadcount); 1573 mutex_exit(&tsock->ts_socklock); 1574 1575 /* 1576 * Safe to dereference VLAN now, as we've checked the user's specified 1577 * values, and alignment is now guaranteed. 1578 */ 1579 if (laddr->sdl_slen == 0) { 1580 tci = TRILL_NO_TCI; 1581 } else { 1582 /* LINTED: alignment */ 1583 tci = *(uint16_t *)(LLADDR(laddr) + ETHERADDRL); 1584 } 1585 1586 mp = create_trill_header(tsock, mp, (const uchar_t *)LLADDR(laddr), 1587 B_TRUE, B_FALSE, tci, msgdsize(mp)); 1588 if (mp != NULL) { 1589 mp = bridge_trill_output(tsock->ts_link, mp); 1590 if (mp == NULL) { 1591 KSPINCR(tks_sent); 1592 } else { 1593 freemsg(mp); 1594 KSPINCR(tks_drops); 1595 } 1596 } 1597 1598 /* Wake up any threads blocking on us */ 1599 if (atomic_dec_uint_nv(&tsock->ts_sockthreadcount) == 0) 1600 cv_broadcast(&tsock->ts_sockthreadwait); 1601 return (0); 1602 1603 eproto: 1604 freemsg(mp); 1605 KSPINCR(tks_drops); 1606 return (EPROTO); 1607 } 1608 1609 /* ARGSUSED */ 1610 static int 1611 trill_ioctl(sock_lower_handle_t proto_handle, int cmd, intptr_t arg, 1612 int mode, int32_t *rvalp, cred_t *cr) 1613 { 1614 trill_sock_t *tsock = (trill_sock_t *)proto_handle; 1615 int rc; 1616 1617 switch (cmd) { 1618 /* List of unprivileged TRILL ioctls */ 1619 case TRILL_GETNICK: 1620 case TRILL_GETBRIDGE: 1621 case TRILL_LISTNICK: 1622 break; 1623 default: 1624 if (secpolicy_dl_config(cr) != 0) 1625 return (EPERM); 1626 break; 1627 } 1628 1629 /* Lock ensures socket state is unchanged during ioctl handling */ 1630 mutex_enter(&tsock->ts_socklock); 1631 rc = trill_do_ioctl(tsock, cmd, (void *)arg, mode); 1632 mutex_exit(&tsock->ts_socklock); 1633 return (rc); 1634 } 1635 1636 static void 1637 trill_clr_flowctrl(sock_lower_handle_t proto_handle) 1638 { 1639 trill_sock_t *tsock = (trill_sock_t *)proto_handle; 1640 1641 mutex_enter(&tsock->ts_socklock); 1642 tsock->ts_flow_ctrld = B_FALSE; 1643 mutex_exit(&tsock->ts_socklock); 1644 } 1645 1646 static sock_downcalls_t sock_trill_downcalls = { 1647 trill_activate, /* sd_activate */ 1648 sock_accept_notsupp, /* sd_accept */ 1649 trill_bind, /* sd_bind */ 1650 sock_listen_notsupp, /* sd_listen */ 1651 sock_connect_notsupp, /* sd_connect */ 1652 sock_getpeername_notsupp, /* sd_getpeername */ 1653 sock_getsockname_notsupp, /* sd_getsockname */ 1654 sock_getsockopt_notsupp, /* sd_getsockopt */ 1655 sock_setsockopt_notsupp, /* sd_setsockopt */ 1656 trill_send, /* sd_send */ 1657 NULL, /* sd_send_uio */ 1658 NULL, /* sd_recv_uio */ 1659 NULL, /* sd_poll */ 1660 sock_shutdown_notsupp, /* sd_shutdown */ 1661 trill_clr_flowctrl, /* sd_setflowctrl */ 1662 trill_ioctl, /* sd_ioctl */ 1663 trill_close /* sd_close */ 1664 }; 1665 1666 /* ARGSUSED */ 1667 static sock_lower_handle_t 1668 trill_create(int family, int type, int proto, sock_downcalls_t **sock_downcalls, 1669 uint_t *smodep, int *errorp, int flags, cred_t *credp) 1670 { 1671 trill_sock_t *tsock; 1672 1673 if (family != AF_TRILL || type != SOCK_DGRAM || proto != 0) { 1674 *errorp = EPROTONOSUPPORT; 1675 return (NULL); 1676 } 1677 1678 *sock_downcalls = &sock_trill_downcalls; 1679 *smodep = SM_ATOMIC; 1680 tsock = trill_do_open(flags); 1681 *errorp = (tsock != NULL) ? 0:ENOMEM; 1682 return ((sock_lower_handle_t)tsock); 1683 }