1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2010 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* 28 * This module supports AF_TRILL sockets and TRILL layer-2 forwarding. 29 */ 30 31 #include <sys/strsubr.h> 32 #include <sys/socket.h> 33 #include <sys/socketvar.h> 34 #include <sys/modctl.h> 35 #include <sys/cmn_err.h> 36 #include <sys/tihdr.h> 37 #include <sys/strsun.h> 38 #include <sys/policy.h> 39 #include <sys/ethernet.h> 40 #include <sys/vlan.h> 41 #include <net/trill.h> 42 #include <net/if_dl.h> 43 #include <sys/mac.h> 44 #include <sys/mac_client.h> 45 #include <sys/mac_provider.h> 46 #include <sys/mac_client_priv.h> 47 #include <sys/sdt.h> 48 #include <sys/dls.h> 49 #include <sys/sunddi.h> 50 51 #include "trill_impl.h" 52 53 static void trill_del_all(trill_inst_t *, boolean_t); 54 static int trill_del_nick(trill_inst_t *, uint16_t, boolean_t); 55 static void trill_stop_recv(trill_sock_t *); 56 static void trill_ctrl_input(trill_sock_t *, mblk_t *, const uint8_t *, 57 uint16_t); 58 static trill_node_t *trill_node_lookup(trill_inst_t *, uint16_t); 59 static void trill_node_unref(trill_inst_t *, trill_node_t *); 60 static void trill_sock_unref(trill_sock_t *); 61 static void trill_kstats_init(trill_sock_t *, const char *); 62 63 static list_t trill_inst_list; 64 static krwlock_t trill_inst_rwlock; 65 66 static sock_lower_handle_t trill_create(int, int, int, sock_downcalls_t **, 67 uint_t *, int *, int, cred_t *); 68 69 static smod_reg_t sinfo = { 70 SOCKMOD_VERSION, 71 "trill", 72 SOCK_UC_VERSION, 73 SOCK_DC_VERSION, 74 trill_create, 75 NULL, 76 }; 77 78 /* modldrv structure */ 79 static struct modlsockmod sockmod = { 80 &mod_sockmodops, "AF_TRILL socket module", &sinfo 81 }; 82 83 /* modlinkage structure */ 84 static struct modlinkage ml = { 85 MODREV_1, 86 { &sockmod, NULL } 87 }; 88 89 #define VALID_NICK(n) ((n) != RBRIDGE_NICKNAME_NONE && \ 90 (n) != RBRIDGE_NICKNAME_UNUSED) 91 92 static mblk_t * 93 create_trill_header(trill_sock_t *tsock, mblk_t *mp, const uint8_t *daddr, 94 boolean_t trill_hdr_ok, boolean_t multidest, uint16_t tci, 95 size_t msglen) 96 { 97 int extra_hdr_len; 98 struct ether_vlan_header *ethvlanhdr; 99 mblk_t *hdr_mp; 100 uint16_t etype; 101 102 etype = msglen > 0 ? (uint16_t)msglen : ETHERTYPE_TRILL; 103 104 /* When sending on the PVID, we must not give a VLAN ID */ 105 if (tci == tsock->ts_link->bl_pvid) 106 tci = TRILL_NO_TCI; 107 108 /* 109 * Create new Ethernet header and include additional space 110 * for writing TRILL header and/or VLAN tag. 111 */ 112 extra_hdr_len = (trill_hdr_ok ? 0 : sizeof (trill_header_t)) + 113 (tci != TRILL_NO_TCI ? sizeof (struct ether_vlan_extinfo) : 0); 114 hdr_mp = mac_header(tsock->ts_link->bl_mh, daddr, 115 tci != TRILL_NO_TCI ? ETHERTYPE_VLAN : etype, mp, extra_hdr_len); 116 if (hdr_mp == NULL) { 117 freemsg(mp); 118 return (NULL); 119 } 120 121 if (tci != TRILL_NO_TCI) { 122 /* LINTED: alignment */ 123 ethvlanhdr = (struct ether_vlan_header *)hdr_mp->b_rptr; 124 ethvlanhdr->ether_tci = htons(tci); 125 ethvlanhdr->ether_type = htons(etype); 126 hdr_mp->b_wptr += sizeof (struct ether_vlan_extinfo); 127 } 128 129 if (!trill_hdr_ok) { 130 trill_header_t *thp; 131 /* LINTED: alignment */ 132 thp = (trill_header_t *)hdr_mp->b_wptr; 133 (void) memset(thp, 0, sizeof (trill_header_t)); 134 thp->th_hopcount = TRILL_DEFAULT_HOPS; 135 thp->th_multidest = (multidest ? 1:0); 136 hdr_mp->b_wptr += sizeof (trill_header_t); 137 } 138 139 hdr_mp->b_cont = mp; 140 return (hdr_mp); 141 } 142 143 /* 144 * TRILL local recv function. TRILL data frames that should be received 145 * by the local system are decapsulated here and passed to bridging for 146 * learning and local system receive. Only called when we are the forwarder 147 * on the link (multi-dest frames) or the frame was destined for us. 148 */ 149 static void 150 trill_recv_local(trill_sock_t *tsock, mblk_t *mp, uint16_t ingressnick) 151 { 152 struct ether_header *inner_ethhdr; 153 154 /* LINTED: alignment */ 155 inner_ethhdr = (struct ether_header *)mp->b_rptr; 156 DTRACE_PROBE1(trill__recv__local, struct ether_header *, inner_ethhdr); 157 158 DB_CKSUMFLAGS(mp) = 0; 159 /* 160 * Transmit the decapsulated frame on the link via Bridging. 161 * Bridging does source address learning and appropriate forwarding. 162 */ 163 bridge_trill_decaps(tsock->ts_link, mp, ingressnick); 164 KSPINCR(tks_decap); 165 } 166 167 /* 168 * Determines the outgoing link to reach a RBridge having the given nick 169 * Assumes caller has acquired the trill instance rwlock. 170 */ 171 static trill_sock_t * 172 find_trill_link(trill_inst_t *tip, datalink_id_t linkid) 173 { 174 trill_sock_t *tsp = NULL; 175 176 ASSERT(RW_LOCK_HELD(&tip->ti_rwlock)); 177 for (tsp = list_head(&tip->ti_socklist); tsp != NULL; 178 tsp = list_next(&tip->ti_socklist, tsp)) { 179 if (tsp->ts_link != NULL && tsp->ts_link->bl_linkid == linkid) { 180 ASSERT(tsp->ts_link->bl_mh != NULL); 181 ASSERT(!(tsp->ts_flags & TSF_SHUTDOWN)); 182 atomic_inc_uint(&tsp->ts_refs); 183 break; 184 } 185 } 186 return (tsp); 187 } 188 189 /* 190 * TRILL destination forwarding function. Transmits the TRILL data packet 191 * to the next-hop, adjacent RBridge. Consumes passed mblk_t. 192 */ 193 static void 194 trill_dest_fwd(trill_inst_t *tip, mblk_t *fwd_mp, uint16_t adj_nick, 195 boolean_t has_trill_hdr, boolean_t multidest, uint16_t dtnick) 196 { 197 trill_node_t *adj; 198 trill_sock_t *tsock = NULL; 199 trill_header_t *trillhdr; 200 struct ether_header *ethhdr; 201 int ethtype; 202 int ethhdrlen; 203 204 adj = trill_node_lookup(tip, adj_nick); 205 if (adj == NULL || ((tsock = adj->tn_tsp) == NULL)) 206 goto dest_fwd_fail; 207 208 ASSERT(tsock->ts_link != NULL); 209 ASSERT(!(tsock->ts_flags & TSF_SHUTDOWN)); 210 ASSERT(adj->tn_ni != NULL); 211 212 DTRACE_PROBE3(trill__dest__fwd, uint16_t, adj_nick, trill_node_t, 213 adj, trill_sock_t, tsock); 214 215 /* 216 * For broadcast links by using the dest address of 217 * the RBridge to forward the frame should result in 218 * savings. When the link is a bridged LAN or there are 219 * many end stations the frame will not always be flooded. 220 */ 221 fwd_mp = create_trill_header(tsock, fwd_mp, adj->tn_ni->tni_adjsnpa, 222 has_trill_hdr, multidest, tsock->ts_desigvlan, 0); 223 if (fwd_mp == NULL) 224 goto dest_fwd_fail; 225 226 /* LINTED: alignment */ 227 ethhdr = (struct ether_header *)fwd_mp->b_rptr; 228 ethtype = ntohs(ethhdr->ether_type); 229 ASSERT(ethtype == ETHERTYPE_VLAN || ethtype == ETHERTYPE_TRILL); 230 231 /* Pullup Ethernet and TRILL header (w/o TRILL options) */ 232 ethhdrlen = sizeof (struct ether_header) + 233 (ethtype == ETHERTYPE_VLAN ? sizeof (struct ether_vlan_extinfo):0); 234 if (!pullupmsg(fwd_mp, ethhdrlen + sizeof (trill_header_t))) 235 goto dest_fwd_fail; 236 /* LINTED: alignment */ 237 trillhdr = (struct trill_header *)(fwd_mp->b_rptr + ethhdrlen); 238 239 /* Update TRILL header with ingress and egress nicks for new frames */ 240 if (!has_trill_hdr) { 241 /* We are creating a new TRILL frame */ 242 trillhdr->th_egressnick = (multidest ? dtnick:adj_nick); 243 rw_enter(&tip->ti_rwlock, RW_READER); 244 trillhdr->th_ingressnick = tip->ti_nick; 245 rw_exit(&tip->ti_rwlock); 246 if (!VALID_NICK(trillhdr->th_ingressnick)) 247 goto dest_fwd_fail; 248 } 249 250 /* Set hop count and update header in packet */ 251 ASSERT(trillhdr->th_hopcount != 0); 252 trillhdr->th_hopcount--; 253 254 /* Clear checksum flag and transmit frame on the link */ 255 DB_CKSUMFLAGS(fwd_mp) = 0; 256 DTRACE_PROBE1(trill__dest__fwd__tx, trill_header_t *, &trillhdr); 257 fwd_mp = bridge_trill_output(tsock->ts_link, fwd_mp); 258 if (fwd_mp == NULL) { 259 KSPINCR(tks_sent); 260 KSPINCR(tks_forward); 261 } else { 262 freemsg(fwd_mp); 263 KSPINCR(tks_drops); 264 } 265 trill_node_unref(tip, adj); 266 return; 267 268 dest_fwd_fail: 269 if (adj != NULL) 270 trill_node_unref(tip, adj); 271 if (tsock != NULL) 272 KSPINCR(tks_drops); 273 freemsg(fwd_mp); 274 } 275 276 /* 277 * TRILL multi-destination forwarding. Transmits the packet to the adjacencies 278 * on the distribution tree determined by the egress nick. Source addr (saddr) 279 * is NULL for new TRILL packets originating from us. 280 */ 281 static void 282 trill_multidest_fwd(trill_inst_t *tip, mblk_t *mp, uint16_t egressnick, 283 uint16_t ingressnick, boolean_t is_trill_pkt, const uint8_t *saddr, 284 int inner_vlan, boolean_t free_mblk) 285 { 286 int idx; 287 uint16_t adjnick; 288 trill_node_t *dest; 289 trill_node_t *adj; 290 mblk_t *fwd_mp; 291 boolean_t nicksaved = B_FALSE; 292 uint16_t adjnicksaved; 293 294 /* Lookup the egress nick info, this is the DT root */ 295 if ((dest = trill_node_lookup(tip, egressnick)) == NULL) 296 goto fail_multidest_fwd; 297 298 /* Send a copy to all our adjacencies on the DT root */ 299 ASSERT(dest->tn_ni); 300 for (idx = 0; idx < dest->tn_ni->tni_adjcount; idx++) { 301 302 /* Check for a valid adjacency node */ 303 adjnick = TNI_ADJNICK(dest->tn_ni, idx); 304 if (!VALID_NICK(adjnick) || ingressnick == adjnick || 305 ((adj = trill_node_lookup(tip, adjnick)) == NULL)) 306 continue; 307 308 /* Do not forward back to adjacency that sent the pkt to us */ 309 ASSERT(adj->tn_ni != NULL); 310 if ((saddr != NULL) && 311 (memcmp(adj->tn_ni->tni_adjsnpa, saddr, 312 ETHERADDRL) == 0)) { 313 trill_node_unref(tip, adj); 314 continue; 315 } 316 317 /* Check if adj is marked as reaching inner VLAN downstream */ 318 if ((inner_vlan != VLAN_ID_NONE) && 319 !TRILL_VLANISSET(TNI_VLANFILTERMAP(dest->tn_ni, idx), 320 inner_vlan)) { 321 trill_node_unref(tip, adj); 322 DTRACE_PROBE4(trill__multi__dest__fwd__vlanfiltered, 323 uint16_t, adjnick, uint16_t, ingressnick, 324 uint16_t, egressnick, int, inner_vlan); 325 continue; 326 } 327 328 trill_node_unref(tip, adj); 329 330 /* 331 * Save the nick and look ahead to see if we should forward the 332 * frame to more adjacencies. We avoid doing a copy for this 333 * nick and use the passed mblk when we can consume the passed 334 * mblk. 335 */ 336 if (free_mblk && !nicksaved) { 337 adjnicksaved = adjnick; 338 nicksaved = B_TRUE; 339 continue; 340 } 341 342 fwd_mp = copymsg(mp); 343 if (fwd_mp == NULL) 344 break; 345 DTRACE_PROBE2(trill__multi__dest__fwd, uint16_t, 346 adjnick, uint16_t, ingressnick); 347 trill_dest_fwd(tip, fwd_mp, adjnick, is_trill_pkt, 348 B_TRUE, egressnick); 349 } 350 trill_node_unref(tip, dest); 351 352 if (nicksaved) { 353 ASSERT(free_mblk); 354 DTRACE_PROBE2(trill__multi__dest__fwd, uint16_t, 355 adjnicksaved, uint16_t, ingressnick); 356 trill_dest_fwd(tip, mp, adjnicksaved, is_trill_pkt, 357 B_TRUE, egressnick); 358 return; 359 } 360 361 fail_multidest_fwd: 362 DTRACE_PROBE2(trill__multi__dest__fwd__fail, uint16_t, 363 egressnick, uint16_t, ingressnick); 364 if (free_mblk) { 365 freemsg(mp); 366 } 367 } 368 369 /* 370 * TRILL data receive function. Forwards the received frame if necessary 371 * and also determines if the received frame should be consumed locally. 372 * Consumes passed mblk. 373 */ 374 static void 375 trill_recv(trill_sock_t *tsock, mblk_t *mp, const uint8_t *mpsaddr) 376 { 377 trill_header_t *trillhdr; 378 trill_node_t *dest = NULL; 379 trill_node_t *source = NULL; 380 trill_node_t *adj; 381 uint16_t ournick, adjnick, treeroot; 382 struct ether_header *ethhdr; 383 trill_inst_t *tip = tsock->ts_tip; 384 uint8_t srcaddr[ETHERADDRL]; 385 size_t trillhdrlen; 386 int inner_vlan = VLAN_ID_NONE; 387 int tci; 388 int idx; 389 size_t min_size; 390 391 /* Copy Ethernet source address before modifying packet */ 392 (void) memcpy(srcaddr, mpsaddr, ETHERADDRL); 393 394 /* Pull up TRILL header if necessary. */ 395 min_size = sizeof (trill_header_t); 396 if ((MBLKL(mp) < min_size || 397 !IS_P2ALIGNED(mp->b_rptr, TRILL_HDR_ALIGN)) && 398 !pullupmsg(mp, min_size)) 399 goto fail; 400 401 /* LINTED: alignment */ 402 trillhdr = (trill_header_t *)mp->b_rptr; 403 if (trillhdr->th_version != TRILL_PROTOCOL_VERS) { 404 DTRACE_PROBE1(trill__recv__wrongversion, 405 trill_header_t *, trillhdr); 406 goto fail; 407 } 408 409 /* Drop if unknown or invalid nickname */ 410 if (!VALID_NICK(trillhdr->th_egressnick) || 411 !VALID_NICK(trillhdr->th_ingressnick)) { 412 DTRACE_PROBE1(trill__recv__invalidnick, 413 trill_header_t *, trillhdr); 414 goto fail; 415 } 416 417 rw_enter(&tip->ti_rwlock, RW_READER); 418 ournick = tip->ti_nick; 419 treeroot = tip->ti_treeroot; 420 rw_exit(&tip->ti_rwlock); 421 /* Drop if we received a packet with our nick as ingress */ 422 if (trillhdr->th_ingressnick == ournick) 423 goto fail; 424 425 /* Re-pull any TRILL options and inner Ethernet header */ 426 min_size += GET_TRILL_OPTS_LEN(trillhdr) * sizeof (uint32_t) + 427 sizeof (struct ether_header); 428 if (MBLKL(mp) < min_size) { 429 if (!pullupmsg(mp, min_size)) 430 goto fail; 431 /* LINTED: alignment */ 432 trillhdr = (trill_header_t *)mp->b_rptr; 433 } 434 trillhdrlen = sizeof (trill_header_t) + 435 (GET_TRILL_OPTS_LEN(trillhdr) * sizeof (uint32_t)); 436 437 /* 438 * Get the inner Ethernet header, plus the inner VLAN header if there 439 * is one. 440 */ 441 /* LINTED: alignment */ 442 ethhdr = (struct ether_header *)(mp->b_rptr + trillhdrlen); 443 if (ethhdr->ether_type == htons(ETHERTYPE_VLAN)) { 444 min_size += sizeof (struct ether_vlan_extinfo); 445 if (MBLKL(mp) < min_size) { 446 if (!pullupmsg(mp, min_size)) 447 goto fail; 448 /* LINTED: alignment */ 449 trillhdr = (trill_header_t *)mp->b_rptr; 450 /* LINTED: alignment */ 451 ethhdr = (struct ether_header *)(mp->b_rptr + 452 trillhdrlen); 453 } 454 455 tci = ntohs(((struct ether_vlan_header *)ethhdr)->ether_tci); 456 inner_vlan = VLAN_ID(tci); 457 } 458 459 /* Known/single destination forwarding. */ 460 if (!trillhdr->th_multidest) { 461 462 /* Inner MacDA must be unicast */ 463 if (ethhdr->ether_dhost.ether_addr_octet[0] & 1) 464 goto fail; 465 466 /* Ingress and Egress nicks must be different */ 467 if (trillhdr->th_egressnick == trillhdr->th_ingressnick) 468 goto fail; 469 470 DTRACE_PROBE1(trill__recv__singledest, 471 trill_header_t *, trillhdr); 472 if (trillhdr->th_egressnick == ournick) { 473 mp->b_rptr += trillhdrlen; 474 trill_recv_local(tsock, mp, trillhdr->th_ingressnick); 475 } else if (trillhdr->th_hopcount > 0) { 476 trill_dest_fwd(tip, mp, trillhdr->th_egressnick, 477 B_TRUE, B_FALSE, RBRIDGE_NICKNAME_NONE); 478 } else { 479 goto fail; 480 } 481 return; 482 } 483 484 /* 485 * Multi-destination frame: perform checks verifying we have 486 * received a valid multi-destination frame before receiving the 487 * frame locally and forwarding the frame to other RBridges. 488 * 489 * Check if we received this multi-destination frame on a 490 * adjacency in the distribution tree indicated by the frame's 491 * egress nickname. 492 */ 493 if ((dest = trill_node_lookup(tip, trillhdr->th_egressnick)) == NULL) 494 goto fail; 495 for (idx = 0; idx < dest->tn_ni->tni_adjcount; idx++) { 496 adjnick = TNI_ADJNICK(dest->tn_ni, idx); 497 if ((adj = trill_node_lookup(tip, adjnick)) == NULL) 498 continue; 499 if (memcmp(adj->tn_ni->tni_adjsnpa, srcaddr, ETHERADDRL) == 0) { 500 trill_node_unref(tip, adj); 501 break; 502 } 503 trill_node_unref(tip, adj); 504 } 505 506 if (idx >= dest->tn_ni->tni_adjcount) { 507 DTRACE_PROBE2(trill__recv__multidest__adjcheckfail, 508 trill_header_t *, trillhdr, trill_node_t *, dest); 509 goto fail; 510 } 511 512 /* 513 * Reverse path forwarding check. Check if the ingress RBridge 514 * that has forwarded the frame advertised the use of the 515 * distribution tree specified in the egress nick. 516 */ 517 if ((source = trill_node_lookup(tip, trillhdr->th_ingressnick)) == NULL) 518 goto fail; 519 for (idx = 0; idx < source->tn_ni->tni_dtrootcount; idx++) { 520 if (TNI_DTROOTNICK(source->tn_ni, idx) == 521 trillhdr->th_egressnick) 522 break; 523 } 524 525 if (idx >= source->tn_ni->tni_dtrootcount) { 526 /* 527 * Allow receipt of forwarded frame with the highest 528 * tree root RBridge as the egress RBridge when the 529 * ingress RBridge has not advertised the use of any 530 * distribution trees. 531 */ 532 if (source->tn_ni->tni_dtrootcount != 0 || 533 trillhdr->th_egressnick != treeroot) { 534 DTRACE_PROBE3( 535 trill__recv__multidest__rpfcheckfail, 536 trill_header_t *, trillhdr, trill_node_t *, 537 source, trill_inst_t *, tip); 538 goto fail; 539 } 540 } 541 542 /* Check hop count before doing any forwarding */ 543 if (trillhdr->th_hopcount == 0) 544 goto fail; 545 546 /* Forward frame using the distribution tree specified by egress nick */ 547 DTRACE_PROBE2(trill__recv__multidest, trill_header_t *, 548 trillhdr, trill_node_t *, source); 549 trill_node_unref(tip, source); 550 trill_node_unref(tip, dest); 551 552 /* Tell forwarding not to free if we're the link forwarder. */ 553 trill_multidest_fwd(tip, mp, trillhdr->th_egressnick, 554 trillhdr->th_ingressnick, B_TRUE, srcaddr, inner_vlan, 555 B_FALSE); 556 557 /* 558 * Send de-capsulated frame locally if we are the link forwarder (also 559 * does bridge learning). 560 */ 561 mp->b_rptr += trillhdrlen; 562 trill_recv_local(tsock, mp, trillhdr->th_ingressnick); 563 KSPINCR(tks_recv); 564 return; 565 566 fail: 567 DTRACE_PROBE2(trill__recv__multidest__fail, mblk_t *, mp, 568 trill_sock_t *, tsock); 569 if (dest != NULL) 570 trill_node_unref(tip, dest); 571 if (source != NULL) 572 trill_node_unref(tip, source); 573 freemsg(mp); 574 KSPINCR(tks_drops); 575 } 576 577 static void 578 trill_stop_recv(trill_sock_t *tsock) 579 { 580 mutex_enter(&tsock->ts_socklock); 581 stop_retry: 582 if (tsock->ts_state == TS_UNBND || tsock->ts_link == NULL) { 583 mutex_exit(&tsock->ts_socklock); 584 return; 585 } 586 587 /* 588 * If another thread is closing the socket then wait. Our callers 589 * expect us to return only after the socket is closed. 590 */ 591 if (tsock->ts_flags & TSF_CLOSEWAIT) { 592 cv_wait(&tsock->ts_sockclosewait, &tsock->ts_socklock); 593 goto stop_retry; 594 } 595 596 /* 597 * Set state and flags to block new bind or close calls 598 * while we close the socket. 599 */ 600 tsock->ts_flags |= TSF_CLOSEWAIT; 601 602 /* Wait until all AF_TRILL socket transmit operations are done */ 603 while (tsock->ts_sockthreadcount > 0) 604 cv_wait(&tsock->ts_sockthreadwait, &tsock->ts_socklock); 605 606 /* 607 * We are guaranteed to be the only thread closing on the 608 * socket while the TSF_CLOSEWAIT flag is set, all others cv_wait 609 * for us to finish. 610 */ 611 ASSERT(tsock->ts_link != NULL); 612 if (tsock->ts_ksp != NULL) 613 kstat_delete(tsock->ts_ksp); 614 615 /* 616 * Release lock before bridge_trill_lnunref to prevent deadlock 617 * between trill_ctrl_input thread waiting to acquire ts_socklock 618 * and bridge_trill_lnunref waiting for the trill thread to finish. 619 */ 620 mutex_exit(&tsock->ts_socklock); 621 622 /* 623 * Release TRILL link reference from Bridging. On return from 624 * bridge_trill_lnunref we can be sure there are no active TRILL data 625 * threads for this link. 626 */ 627 bridge_trill_lnunref(tsock->ts_link); 628 629 /* Set socket as unbound & wakeup threads waiting for socket to close */ 630 mutex_enter(&tsock->ts_socklock); 631 ASSERT(tsock->ts_link != NULL); 632 tsock->ts_link = NULL; 633 tsock->ts_state = TS_UNBND; 634 tsock->ts_flags &= ~TSF_CLOSEWAIT; 635 cv_broadcast(&tsock->ts_sockclosewait); 636 mutex_exit(&tsock->ts_socklock); 637 } 638 639 static int 640 trill_start_recv(trill_sock_t *tsock, const struct sockaddr *sa, socklen_t len) 641 { 642 struct sockaddr_dl *lladdr = (struct sockaddr_dl *)sa; 643 datalink_id_t linkid; 644 int err = 0; 645 646 if (len != sizeof (*lladdr)) 647 return (EINVAL); 648 649 mutex_enter(&tsock->ts_socklock); 650 if (tsock->ts_tip == NULL || tsock->ts_state != TS_UNBND) { 651 err = EINVAL; 652 goto bind_error; 653 } 654 655 if (tsock->ts_flags & TSF_CLOSEWAIT || tsock->ts_link != NULL) { 656 err = EBUSY; 657 goto bind_error; 658 } 659 660 (void) memcpy(&(tsock->ts_lladdr), lladdr, 661 sizeof (struct sockaddr_dl)); 662 (void) memcpy(&linkid, tsock->ts_lladdr.sdl_data, 663 sizeof (datalink_id_t)); 664 665 tsock->ts_link = bridge_trill_lnref(tsock->ts_tip->ti_binst, 666 linkid, tsock); 667 if (tsock->ts_link == NULL) { 668 err = EINVAL; 669 goto bind_error; 670 } 671 672 trill_kstats_init(tsock, tsock->ts_tip->ti_bridgename); 673 tsock->ts_state = TS_IDLE; 674 675 bind_error: 676 mutex_exit(&tsock->ts_socklock); 677 return (err); 678 } 679 680 static int 681 trill_do_unbind(trill_sock_t *tsock) 682 { 683 /* If a bind has not been done, we can't unbind. */ 684 if (tsock->ts_state != TS_IDLE) 685 return (EINVAL); 686 687 trill_stop_recv(tsock); 688 return (0); 689 } 690 691 static void 692 trill_instance_unref(trill_inst_t *tip) 693 { 694 rw_enter(&trill_inst_rwlock, RW_WRITER); 695 rw_enter(&tip->ti_rwlock, RW_WRITER); 696 if (atomic_dec_uint_nv(&tip->ti_refs) == 0) { 697 list_remove(&trill_inst_list, tip); 698 rw_exit(&tip->ti_rwlock); 699 rw_exit(&trill_inst_rwlock); 700 if (tip->ti_binst != NULL) 701 bridge_trill_brunref(tip->ti_binst); 702 list_destroy(&tip->ti_socklist); 703 rw_destroy(&tip->ti_rwlock); 704 kmem_free(tip, sizeof (*tip)); 705 } else { 706 rw_exit(&tip->ti_rwlock); 707 rw_exit(&trill_inst_rwlock); 708 } 709 } 710 711 /* 712 * This is called when the bridge module receives a TRILL-encapsulated packet 713 * on a given link or a packet identified as "TRILL control." We must verify 714 * that it's for us (it almost certainly will be), and then either decapsulate 715 * (if it's to our nickname), forward (if it's to someone else), or send up one 716 * of the sockets (if it's control traffic). 717 * 718 * Sadly, on Ethernet, the control traffic is identified by Outer.MacDA, and 719 * not by TRILL header information. 720 */ 721 static void 722 trill_recv_pkt_cb(void *lptr, bridge_link_t *blp, mac_resource_handle_t rsrc, 723 mblk_t *mp, mac_header_info_t *hdr_info) 724 { 725 trill_sock_t *tsock = lptr; 726 727 _NOTE(ARGUNUSED(rsrc)); 728 729 ASSERT(tsock->ts_tip != NULL); 730 ASSERT(tsock->ts_link != NULL); 731 ASSERT(!(tsock->ts_flags & TSF_SHUTDOWN)); 732 733 /* 734 * Only receive packet if the source address is not multicast (which is 735 * bogus). 736 */ 737 if (hdr_info->mhi_saddr[0] & 1) 738 goto discard; 739 740 /* 741 * Check if this is our own packet reflected back. It should not be. 742 */ 743 if (bcmp(hdr_info->mhi_saddr, blp->bl_local_mac, ETHERADDRL) == 0) 744 goto discard; 745 746 /* Only receive unicast packet if addressed to us */ 747 if (hdr_info->mhi_dsttype == MAC_ADDRTYPE_UNICAST && 748 bcmp(hdr_info->mhi_daddr, blp->bl_local_mac, ETHERADDRL) != 0) 749 goto discard; 750 751 if (hdr_info->mhi_bindsap == ETHERTYPE_TRILL) { 752 /* TRILL data packets */ 753 trill_recv(tsock, mp, hdr_info->mhi_saddr); 754 } else { 755 /* Design constraint for cheap IS-IS/BPDU comparison */ 756 ASSERT(all_isis_rbridges[4] != bridge_group_address[4]); 757 /* Send received control packet upstream */ 758 trill_ctrl_input(tsock, mp, hdr_info->mhi_saddr, 759 hdr_info->mhi_daddr[4] == all_isis_rbridges[4] ? 760 hdr_info->mhi_tci : TRILL_TCI_BPDU); 761 } 762 763 return; 764 765 discard: 766 freemsg(mp); 767 KSPINCR(tks_drops); 768 } 769 770 /* 771 * This is called when the bridge module discovers that the destination address 772 * for a packet is not local -- it's through some remote node. We must verify 773 * that the remote node isn't our nickname (it shouldn't be), add a TRILL 774 * header, and then use the IS-IS data to determine which link and which 775 * next-hop RBridge should be used for output. We then transmit on that link. 776 * 777 * The egress_nick is RBRIDGE_NICKNAME_NONE for the "unknown destination" case. 778 */ 779 static void 780 trill_encap_pkt_cb(void *lptr, bridge_link_t *blp, mac_header_info_t *hdr_info, 781 mblk_t *mp, uint16_t egress_nick) 782 { 783 uint16_t ournick; 784 uint16_t dtnick; 785 trill_node_t *self = NULL; 786 trill_sock_t *tsock = lptr; 787 trill_inst_t *tip = tsock->ts_tip; 788 int vlan = VLAN_ID_NONE; 789 790 _NOTE(ARGUNUSED(blp)); 791 ASSERT(hdr_info->mhi_bindsap != ETHERTYPE_TRILL); 792 793 /* egress_nick = RBRIDGE_NICKNAME_NONE is valid */ 794 if (egress_nick != RBRIDGE_NICKNAME_NONE && !VALID_NICK(egress_nick)) 795 goto discard; 796 797 /* Check if our own nick is valid before we do any forwarding */ 798 rw_enter(&tip->ti_rwlock, RW_READER); 799 ournick = tip->ti_nick; 800 dtnick = tip->ti_treeroot; 801 rw_exit(&tip->ti_rwlock); 802 if (!VALID_NICK(ournick)) 803 goto discard; 804 805 /* 806 * For Multi-Destination forwarding determine our choice of 807 * root distribution tree. If we didn't choose a distribution 808 * tree (dtroots_count=0) then we use the highest priority tree 809 * root (t_treeroot) else we drop the packet without forwarding. 810 */ 811 if (egress_nick == RBRIDGE_NICKNAME_NONE) { 812 if ((self = trill_node_lookup(tip, ournick)) == NULL) 813 goto discard; 814 815 /* 816 * Use the first DT configured for now. In future we 817 * should have DT selection code here. 818 */ 819 if (self->tn_ni->tni_dtrootcount > 0) { 820 dtnick = TNI_DTROOTNICK(self->tn_ni, 0); 821 } 822 823 trill_node_unref(tip, self); 824 if (!VALID_NICK(dtnick)) { 825 DTRACE_PROBE(trill__fwd__packet__nodtroot); 826 goto discard; 827 } 828 } 829 830 /* 831 * Retrieve VLAN ID of the native frame used for VLAN 832 * pruning of multi-destination frames. 833 */ 834 if (hdr_info->mhi_istagged) { 835 vlan = VLAN_ID(hdr_info->mhi_tci); 836 } 837 838 DTRACE_PROBE2(trill__fwd__packet, mac_header_info_t *, hdr_info, 839 uint16_t, egress_nick); 840 if (egress_nick == RBRIDGE_NICKNAME_NONE) { 841 trill_multidest_fwd(tip, mp, dtnick, 842 ournick, B_FALSE, NULL, vlan, B_TRUE); 843 } else { 844 trill_dest_fwd(tip, mp, egress_nick, B_FALSE, B_FALSE, 845 RBRIDGE_NICKNAME_NONE); 846 } 847 KSPINCR(tks_encap); 848 return; 849 850 discard: 851 freemsg(mp); 852 } 853 854 /* 855 * This is called when the bridge module has completely torn down a bridge 856 * instance and all of the attached links. We need to make the TRILL instance 857 * go away at this point. 858 */ 859 static void 860 trill_br_dstr_cb(void *bptr, bridge_inst_t *bip) 861 { 862 trill_inst_t *tip = bptr; 863 864 _NOTE(ARGUNUSED(bip)); 865 rw_enter(&tip->ti_rwlock, RW_WRITER); 866 if (tip->ti_binst != NULL) 867 bridge_trill_brunref(tip->ti_binst); 868 tip->ti_binst = NULL; 869 rw_exit(&tip->ti_rwlock); 870 } 871 872 /* 873 * This is called when the bridge module is tearing down a link, but before the 874 * actual tear-down starts. When this function returns, we must make sure that 875 * we will not initiate any new transmits on this link. 876 */ 877 static void 878 trill_ln_dstr_cb(void *lptr, bridge_link_t *blp) 879 { 880 trill_sock_t *tsock = lptr; 881 882 _NOTE(ARGUNUSED(blp)); 883 trill_stop_recv(tsock); 884 } 885 886 static void 887 trill_init(void) 888 { 889 list_create(&trill_inst_list, sizeof (trill_inst_t), 890 offsetof(trill_inst_t, ti_instnode)); 891 rw_init(&trill_inst_rwlock, NULL, RW_DRIVER, NULL); 892 bridge_trill_register_cb(trill_recv_pkt_cb, trill_encap_pkt_cb, 893 trill_br_dstr_cb, trill_ln_dstr_cb); 894 } 895 896 static void 897 trill_fini(void) 898 { 899 bridge_trill_register_cb(NULL, NULL, NULL, NULL); 900 rw_destroy(&trill_inst_rwlock); 901 list_destroy(&trill_inst_list); 902 } 903 904 /* Loadable module configuration entry points */ 905 int 906 _init(void) 907 { 908 int rc; 909 910 trill_init(); 911 if ((rc = mod_install(&ml)) != 0) 912 trill_fini(); 913 return (rc); 914 } 915 916 int 917 _info(struct modinfo *modinfop) 918 { 919 return (mod_info(&ml, modinfop)); 920 } 921 922 int 923 _fini(void) 924 { 925 int rc; 926 927 rw_enter(&trill_inst_rwlock, RW_READER); 928 rc = list_is_empty(&trill_inst_list) ? 0 : EBUSY; 929 rw_exit(&trill_inst_rwlock); 930 if (rc == 0 && ((rc = mod_remove(&ml)) == 0)) 931 trill_fini(); 932 return (rc); 933 } 934 935 static void 936 trill_kstats_init(trill_sock_t *tsock, const char *bname) 937 { 938 int i; 939 char kstatname[KSTAT_STRLEN]; 940 kstat_named_t *knt; 941 static const char *sock_kstats_list[] = { TRILL_KSSOCK_NAMES }; 942 char link_name[MAXNAMELEN]; 943 int num; 944 int err; 945 946 bzero(link_name, sizeof (link_name)); 947 if ((err = dls_mgmt_get_linkinfo(tsock->ts_link->bl_linkid, link_name, 948 NULL, NULL, NULL)) != 0) { 949 cmn_err(CE_WARN, "%s: trill_kstats_init: error %d retrieving" 950 " linkinfo for linkid:%d", "trill", err, 951 tsock->ts_link->bl_linkid); 952 return; 953 } 954 955 bzero(kstatname, sizeof (kstatname)); 956 (void) snprintf(kstatname, sizeof (kstatname), "%s-%s", 957 bname, link_name); 958 959 num = sizeof (sock_kstats_list) / sizeof (*sock_kstats_list); 960 for (i = 0; i < num; i++) { 961 knt = (kstat_named_t *)&(tsock->ts_kstats); 962 kstat_named_init(&knt[i], sock_kstats_list[i], 963 KSTAT_DATA_UINT64); 964 } 965 966 tsock->ts_ksp = kstat_create_zone("trill", 0, kstatname, "sock", 967 KSTAT_TYPE_NAMED, num, KSTAT_FLAG_VIRTUAL, GLOBAL_ZONEID); 968 if (tsock->ts_ksp != NULL) { 969 tsock->ts_ksp->ks_data = &tsock->ts_kstats; 970 kstat_install(tsock->ts_ksp); 971 } 972 } 973 974 static trill_sock_t * 975 trill_do_open(int flags) 976 { 977 trill_sock_t *tsock; 978 int kmflag = ((flags & SOCKET_NOSLEEP)) ? KM_NOSLEEP:KM_SLEEP; 979 980 tsock = kmem_zalloc(sizeof (trill_sock_t), kmflag); 981 if (tsock != NULL) { 982 tsock->ts_state = TS_UNBND; 983 tsock->ts_refs++; 984 mutex_init(&tsock->ts_socklock, NULL, MUTEX_DRIVER, NULL); 985 cv_init(&tsock->ts_sockthreadwait, NULL, CV_DRIVER, NULL); 986 cv_init(&tsock->ts_sockclosewait, NULL, CV_DRIVER, NULL); 987 } 988 return (tsock); 989 } 990 991 static int 992 trill_find_bridge(trill_sock_t *tsock, const char *bname, boolean_t can_create) 993 { 994 trill_inst_t *tip, *newtip = NULL; 995 996 /* Allocate some memory (speculatively) before taking locks */ 997 if (can_create) 998 newtip = kmem_zalloc(sizeof (*tip), KM_NOSLEEP); 999 1000 rw_enter(&trill_inst_rwlock, RW_WRITER); 1001 for (tip = list_head(&trill_inst_list); tip != NULL; 1002 tip = list_next(&trill_inst_list, tip)) { 1003 if (strcmp(tip->ti_bridgename, bname) == 0) 1004 break; 1005 } 1006 if (tip == NULL) { 1007 if (!can_create || newtip == NULL) { 1008 rw_exit(&trill_inst_rwlock); 1009 return (can_create ? ENOMEM : ENOENT); 1010 } 1011 1012 tip = newtip; 1013 newtip = NULL; 1014 (void) strcpy(tip->ti_bridgename, bname); 1015 1016 /* Register TRILL instance with bridging */ 1017 tip->ti_binst = bridge_trill_brref(bname, tip); 1018 if (tip->ti_binst == NULL) { 1019 rw_exit(&trill_inst_rwlock); 1020 kmem_free(tip, sizeof (*tip)); 1021 return (ENOENT); 1022 } 1023 1024 rw_init(&tip->ti_rwlock, NULL, RW_DRIVER, NULL); 1025 list_create(&tip->ti_socklist, sizeof (trill_sock_t), 1026 offsetof(trill_sock_t, ts_socklistnode)); 1027 list_insert_tail(&trill_inst_list, tip); 1028 } 1029 atomic_inc_uint(&tip->ti_refs); 1030 rw_exit(&trill_inst_rwlock); 1031 1032 /* If we didn't need the preallocated memory, then discard now. */ 1033 if (newtip != NULL) 1034 kmem_free(newtip, sizeof (*newtip)); 1035 1036 rw_enter(&tip->ti_rwlock, RW_WRITER); 1037 list_insert_tail(&(tip->ti_socklist), tsock); 1038 tsock->ts_tip = tip; 1039 rw_exit(&tip->ti_rwlock); 1040 return (0); 1041 } 1042 1043 static void 1044 trill_clear_bridge(trill_sock_t *tsock) 1045 { 1046 trill_inst_t *tip; 1047 1048 if ((tip = tsock->ts_tip) == NULL) 1049 return; 1050 rw_enter(&tip->ti_rwlock, RW_WRITER); 1051 list_remove(&tip->ti_socklist, tsock); 1052 if (list_is_empty(&tip->ti_socklist)) 1053 trill_del_all(tip, B_TRUE); 1054 rw_exit(&tip->ti_rwlock); 1055 } 1056 1057 static void 1058 trill_sock_unref(trill_sock_t *tsock) 1059 { 1060 if (atomic_dec_uint_nv(&tsock->ts_refs) == 0) { 1061 mutex_destroy(&tsock->ts_socklock); 1062 cv_destroy(&tsock->ts_sockthreadwait); 1063 cv_destroy(&tsock->ts_sockclosewait); 1064 kmem_free(tsock, sizeof (trill_sock_t)); 1065 } 1066 } 1067 1068 static void 1069 trill_do_close(trill_sock_t *tsock) 1070 { 1071 trill_inst_t *tip; 1072 1073 tip = tsock->ts_tip; 1074 trill_stop_recv(tsock); 1075 /* Remove socket from TRILL instance socket list */ 1076 trill_clear_bridge(tsock); 1077 tsock->ts_flags |= TSF_SHUTDOWN; 1078 trill_sock_unref(tsock); 1079 if (tip != NULL) 1080 trill_instance_unref(tip); 1081 } 1082 1083 static void 1084 trill_del_all(trill_inst_t *tip, boolean_t lockheld) 1085 { 1086 int i; 1087 1088 if (!lockheld) 1089 rw_enter(&tip->ti_rwlock, RW_WRITER); 1090 for (i = RBRIDGE_NICKNAME_MIN; i < RBRIDGE_NICKNAME_MAX; i++) { 1091 if (tip->ti_nodes[i] != NULL) 1092 (void) trill_del_nick(tip, i, B_TRUE); 1093 } 1094 if (!lockheld) 1095 rw_exit(&tip->ti_rwlock); 1096 } 1097 1098 static void 1099 trill_node_free(trill_node_t *nick_entry) 1100 { 1101 trill_nickinfo_t *tni; 1102 1103 tni = nick_entry->tn_ni; 1104 kmem_free(tni, TNI_TOTALSIZE(tni)); 1105 kmem_free(nick_entry, sizeof (trill_node_t)); 1106 } 1107 1108 static void 1109 trill_node_unref(trill_inst_t *tip, trill_node_t *tnp) 1110 { 1111 if (atomic_dec_uint_nv(&tnp->tn_refs) == 0) { 1112 if (tnp->tn_tsp != NULL) 1113 trill_sock_unref(tnp->tn_tsp); 1114 trill_node_free(tnp); 1115 atomic_dec_uint(&tip->ti_nodecount); 1116 } 1117 } 1118 1119 static trill_node_t * 1120 trill_node_lookup(trill_inst_t *tip, uint16_t nick) 1121 { 1122 trill_node_t *nick_entry; 1123 1124 if (!VALID_NICK(nick)) 1125 return (NULL); 1126 rw_enter(&tip->ti_rwlock, RW_READER); 1127 nick_entry = tip->ti_nodes[nick]; 1128 if (nick_entry != NULL) { 1129 atomic_inc_uint(&nick_entry->tn_refs); 1130 } 1131 rw_exit(&tip->ti_rwlock); 1132 return (nick_entry); 1133 } 1134 1135 static int 1136 trill_del_nick(trill_inst_t *tip, uint16_t nick, boolean_t lockheld) 1137 { 1138 trill_node_t *nick_entry; 1139 int rc = ENOENT; 1140 1141 if (!lockheld) 1142 rw_enter(&tip->ti_rwlock, RW_WRITER); 1143 if (VALID_NICK(nick)) { 1144 nick_entry = tip->ti_nodes[nick]; 1145 if (nick_entry != NULL) { 1146 trill_node_unref(tip, nick_entry); 1147 tip->ti_nodes[nick] = NULL; 1148 rc = 0; 1149 } 1150 } 1151 if (!lockheld) 1152 rw_exit(&tip->ti_rwlock); 1153 return (rc); 1154 } 1155 1156 static int 1157 trill_add_nick(trill_inst_t *tip, void *arg, boolean_t self, int mode) 1158 { 1159 uint16_t nick; 1160 int size; 1161 trill_node_t *tnode; 1162 trill_nickinfo_t tnihdr; 1163 1164 /* First make sure we have at least the header available */ 1165 if (ddi_copyin(arg, &tnihdr, sizeof (trill_nickinfo_t), mode) != 0) 1166 return (EFAULT); 1167 1168 nick = tnihdr.tni_nick; 1169 if (!VALID_NICK(nick)) { 1170 DTRACE_PROBE1(trill__add__nick__bad, trill_nickinfo_t *, 1171 &tnihdr); 1172 return (EINVAL); 1173 } 1174 1175 size = TNI_TOTALSIZE(&tnihdr); 1176 if (size > TNI_MAXSIZE) 1177 return (EINVAL); 1178 tnode = kmem_zalloc(sizeof (trill_node_t), KM_SLEEP); 1179 tnode->tn_ni = kmem_zalloc(size, KM_SLEEP); 1180 if (ddi_copyin(arg, tnode->tn_ni, size, mode) != 0) { 1181 kmem_free(tnode->tn_ni, size); 1182 kmem_free(tnode, sizeof (trill_node_t)); 1183 return (EFAULT); 1184 } 1185 1186 tnode->tn_refs++; 1187 rw_enter(&tip->ti_rwlock, RW_WRITER); 1188 if (tip->ti_nodes[nick] != NULL) 1189 (void) trill_del_nick(tip, nick, B_TRUE); 1190 1191 if (self) { 1192 tip->ti_nick = nick; 1193 } else { 1194 tnode->tn_tsp = find_trill_link(tip, 1195 tnode->tn_ni->tni_linkid); 1196 } 1197 DTRACE_PROBE2(trill__add__nick, trill_node_t *, tnode, 1198 uint16_t, nick); 1199 tip->ti_nodes[nick] = tnode; 1200 tip->ti_nodecount++; 1201 rw_exit(&tip->ti_rwlock); 1202 return (0); 1203 } 1204 1205 static int 1206 trill_do_ioctl(trill_sock_t *tsock, int cmd, void *arg, int mode) 1207 { 1208 int error = 0; 1209 trill_inst_t *tip = tsock->ts_tip; 1210 1211 switch (cmd) { 1212 case TRILL_DESIGVLAN: { 1213 uint16_t desigvlan; 1214 1215 if (ddi_copyin(arg, &desigvlan, sizeof (desigvlan), mode) != 0) 1216 return (EFAULT); 1217 tsock->ts_desigvlan = desigvlan; 1218 break; 1219 } 1220 case TRILL_VLANFWDER: { 1221 uint8_t vlans[TRILL_VLANS_ARRSIZE]; 1222 1223 if (tsock->ts_link == NULL) 1224 return (EINVAL); 1225 if ((ddi_copyin(arg, vlans, sizeof (vlans), mode)) != 0) 1226 return (EFAULT); 1227 bridge_trill_setvlans(tsock->ts_link, vlans); 1228 break; 1229 } 1230 case TRILL_SETNICK: 1231 if (tip == NULL) 1232 return (EINVAL); 1233 error = trill_add_nick(tip, arg, B_TRUE, mode); 1234 break; 1235 1236 case TRILL_GETNICK: 1237 if (tip == NULL) 1238 return (EINVAL); 1239 rw_enter(&tip->ti_rwlock, RW_READER); 1240 if (ddi_copyout(&tip->ti_nick, arg, sizeof (tip->ti_nick), 1241 mode) != 0) 1242 error = EFAULT; 1243 rw_exit(&tip->ti_rwlock); 1244 break; 1245 1246 case TRILL_ADDNICK: 1247 if (tip == NULL) 1248 break; 1249 error = trill_add_nick(tip, arg, B_FALSE, mode); 1250 break; 1251 1252 case TRILL_DELNICK: { 1253 uint16_t delnick; 1254 1255 if (tip == NULL) 1256 break; 1257 if (ddi_copyin(arg, &delnick, sizeof (delnick), mode) != 0) 1258 return (EFAULT); 1259 error = trill_del_nick(tip, delnick, B_FALSE); 1260 break; 1261 } 1262 case TRILL_DELALL: 1263 if (tip == NULL) 1264 break; 1265 trill_del_all(tip, B_FALSE); 1266 break; 1267 1268 case TRILL_TREEROOT: { 1269 uint16_t treeroot; 1270 1271 if (tip == NULL) 1272 break; 1273 if (ddi_copyin(arg, &treeroot, sizeof (treeroot), mode) != 0) 1274 return (EFAULT); 1275 if (!VALID_NICK(treeroot)) 1276 return (EINVAL); 1277 rw_enter(&tip->ti_rwlock, RW_WRITER); 1278 tip->ti_treeroot = treeroot; 1279 rw_exit(&tip->ti_rwlock); 1280 break; 1281 } 1282 case TRILL_HWADDR: 1283 if (tsock->ts_link == NULL) 1284 break; 1285 if (ddi_copyout(tsock->ts_link->bl_local_mac, arg, ETHERADDRL, 1286 mode) != 0) 1287 return (EFAULT); 1288 break; 1289 1290 case TRILL_NEWBRIDGE: { 1291 char bname[MAXLINKNAMELEN]; 1292 1293 if (tsock->ts_state != TS_UNBND) 1294 return (ENOTSUP); 1295 /* ts_tip can only be set once */ 1296 if (tip != NULL) 1297 return (EEXIST); 1298 if (ddi_copyin(arg, bname, sizeof (bname), mode) != 0) 1299 return (EFAULT); 1300 bname[MAXLINKNAMELEN-1] = '\0'; 1301 error = trill_find_bridge(tsock, bname, B_TRUE); 1302 break; 1303 } 1304 1305 case TRILL_GETBRIDGE: { 1306 char bname[MAXLINKNAMELEN]; 1307 1308 /* ts_tip can only be set once */ 1309 if (tip != NULL) 1310 return (EEXIST); 1311 if (ddi_copyin(arg, bname, sizeof (bname), mode) != 0) 1312 return (EFAULT); 1313 bname[MAXLINKNAMELEN - 1] = '\0'; 1314 error = trill_find_bridge(tsock, bname, B_FALSE); 1315 break; 1316 } 1317 1318 case TRILL_LISTNICK: { 1319 trill_listnick_t tln; 1320 trill_node_t *tnp; 1321 trill_nickinfo_t *tnip; 1322 uint16_t nick; 1323 1324 if (tip == NULL) 1325 return (EINVAL); 1326 if (ddi_copyin(arg, &tln, sizeof (tln), mode) != 0) 1327 return (EFAULT); 1328 nick = tln.tln_nick; 1329 if (nick >= RBRIDGE_NICKNAME_MAX) { 1330 error = EINVAL; 1331 break; 1332 } 1333 rw_enter(&tip->ti_rwlock, RW_READER); 1334 while (++nick < RBRIDGE_NICKNAME_MAX) { 1335 if ((tnp = tip->ti_nodes[nick]) != NULL) { 1336 tnip = tnp->tn_ni; 1337 ASSERT(nick == tnip->tni_nick); 1338 tln.tln_nick = nick; 1339 bcopy(tnip->tni_adjsnpa, tln.tln_nexthop, 1340 ETHERADDRL); 1341 tln.tln_ours = nick == tip->ti_nick; 1342 if (tln.tln_ours || tnp->tn_tsp == NULL) { 1343 tln.tln_linkid = 1344 DATALINK_INVALID_LINKID; 1345 } else { 1346 tln.tln_linkid = 1347 tnp->tn_tsp->ts_link->bl_linkid; 1348 } 1349 break; 1350 } 1351 } 1352 rw_exit(&tip->ti_rwlock); 1353 if (nick >= RBRIDGE_NICKNAME_MAX) 1354 bzero(&tln, sizeof (tln)); 1355 if (ddi_copyout(&tln, arg, sizeof (tln), mode) != 0) 1356 return (EFAULT); 1357 break; 1358 } 1359 1360 /* 1361 * Port flush: this is used when we lose AF on a port. We must discard 1362 * all regular bridge forwarding entries on this port with the 1363 * indicated VLAN. 1364 */ 1365 case TRILL_PORTFLUSH: { 1366 uint16_t vlan = (uint16_t)(uintptr_t)arg; 1367 1368 if (tsock->ts_link == NULL) 1369 return (EINVAL); 1370 bridge_trill_flush(tsock->ts_link, vlan, B_FALSE); 1371 break; 1372 } 1373 1374 /* 1375 * Nick flush: this is used when we lose AF on a port. We must discard 1376 * all bridge TRILL forwarding entries on this port with the indicated 1377 * VLAN. 1378 */ 1379 case TRILL_NICKFLUSH: { 1380 uint16_t vlan = (uint16_t)(uintptr_t)arg; 1381 1382 if (tsock->ts_link == NULL) 1383 return (EINVAL); 1384 bridge_trill_flush(tsock->ts_link, vlan, B_TRUE); 1385 break; 1386 } 1387 1388 case TRILL_GETMTU: 1389 if (tsock->ts_link == NULL) 1390 break; 1391 if (ddi_copyout(&tsock->ts_link->bl_maxsdu, arg, 1392 sizeof (uint_t), mode) != 0) 1393 return (EFAULT); 1394 break; 1395 1396 default: 1397 error = ENOTSUP; 1398 break; 1399 } 1400 1401 return (error); 1402 } 1403 1404 /* 1405 * Sends received packet back upstream on the TRILL socket. 1406 * Consumes passed mblk_t. 1407 */ 1408 static void 1409 trill_ctrl_input(trill_sock_t *tsock, mblk_t *mp, const uint8_t *saddr, 1410 uint16_t tci) 1411 { 1412 int udi_size; 1413 mblk_t *mp1; 1414 struct T_unitdata_ind *tudi; 1415 struct sockaddr_dl *sdl; 1416 char *lladdr; 1417 int error; 1418 1419 ASSERT(!(tsock->ts_flags & TSF_SHUTDOWN)); 1420 if (tsock->ts_flow_ctrld) { 1421 freemsg(mp); 1422 KSPINCR(tks_drops); 1423 return; 1424 } 1425 1426 udi_size = sizeof (struct T_unitdata_ind) + 1427 sizeof (struct sockaddr_dl); 1428 mp1 = allocb(udi_size, BPRI_MED); 1429 if (mp1 == NULL) { 1430 freemsg(mp); 1431 KSPINCR(tks_drops); 1432 return; 1433 } 1434 1435 mp1->b_cont = mp; 1436 mp = mp1; 1437 mp->b_datap->db_type = M_PROTO; 1438 /* LINTED: alignment */ 1439 tudi = (struct T_unitdata_ind *)mp->b_rptr; 1440 mp->b_wptr = (uchar_t *)tudi + udi_size; 1441 1442 tudi->PRIM_type = T_UNITDATA_IND; 1443 tudi->SRC_length = sizeof (struct sockaddr_dl); 1444 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 1445 tudi->OPT_length = 0; 1446 tudi->OPT_offset = sizeof (struct T_unitdata_ind) + 1447 sizeof (struct sockaddr_dl); 1448 1449 /* Information of the link on which packet was received. */ 1450 sdl = (struct sockaddr_dl *)&tudi[1]; 1451 (void) memset(sdl, 0, sizeof (struct sockaddr_dl)); 1452 sdl->sdl_family = AF_TRILL; 1453 1454 /* LINTED: alignment */ 1455 *(datalink_id_t *)sdl->sdl_data = tsock->ts_link->bl_linkid; 1456 sdl->sdl_nlen = sizeof (tsock->ts_link->bl_linkid); 1457 1458 lladdr = LLADDR(sdl); 1459 (void) memcpy(lladdr, saddr, ETHERADDRL); 1460 lladdr += ETHERADDRL; 1461 sdl->sdl_alen = ETHERADDRL; 1462 1463 /* LINTED: alignment */ 1464 *(uint16_t *)lladdr = tci; 1465 sdl->sdl_slen = sizeof (uint16_t); 1466 1467 DTRACE_PROBE2(trill__ctrl__input, trill_sock_t *, tsock, mblk_t *, mp); 1468 (*tsock->ts_conn_upcalls->su_recv)(tsock->ts_conn_upper_handle, 1469 mp, msgdsize(mp), 0, &error, NULL); 1470 1471 if (error == ENOSPC) { 1472 mutex_enter(&tsock->ts_socklock); 1473 (*tsock->ts_conn_upcalls->su_recv)(tsock->ts_conn_upper_handle, 1474 NULL, 0, 0, &error, NULL); 1475 if (error == ENOSPC) 1476 tsock->ts_flow_ctrld = B_TRUE; 1477 mutex_exit(&tsock->ts_socklock); 1478 KSPINCR(tks_drops); 1479 } else if (error != 0) { 1480 KSPINCR(tks_drops); 1481 } else { 1482 KSPINCR(tks_recv); 1483 } 1484 1485 DTRACE_PROBE2(trill__ctrl__input__done, trill_sock_t *, 1486 tsock, int, error); 1487 } 1488 1489 /* ARGSUSED */ 1490 static void 1491 trill_activate(sock_lower_handle_t proto_handle, 1492 sock_upper_handle_t sock_handle, sock_upcalls_t *sock_upcalls, 1493 int flags, cred_t *cr) 1494 { 1495 trill_sock_t *tsock = (trill_sock_t *)proto_handle; 1496 struct sock_proto_props sopp; 1497 1498 tsock->ts_conn_upcalls = sock_upcalls; 1499 tsock->ts_conn_upper_handle = sock_handle; 1500 1501 sopp.sopp_flags = SOCKOPT_WROFF | SOCKOPT_RCVHIWAT | 1502 SOCKOPT_RCVLOWAT | SOCKOPT_MAXADDRLEN | SOCKOPT_MAXPSZ | 1503 SOCKOPT_MAXBLK | SOCKOPT_MINPSZ; 1504 sopp.sopp_wroff = 0; 1505 sopp.sopp_rxhiwat = SOCKET_RECVHIWATER; 1506 sopp.sopp_rxlowat = SOCKET_RECVLOWATER; 1507 sopp.sopp_maxaddrlen = sizeof (struct sockaddr_dl); 1508 sopp.sopp_maxpsz = INFPSZ; 1509 sopp.sopp_maxblk = INFPSZ; 1510 sopp.sopp_minpsz = 0; 1511 (*tsock->ts_conn_upcalls->su_set_proto_props)( 1512 tsock->ts_conn_upper_handle, &sopp); 1513 } 1514 1515 /* ARGSUSED */ 1516 static int 1517 trill_close(sock_lower_handle_t proto_handle, int flags, cred_t *cr) 1518 { 1519 trill_sock_t *tsock = (trill_sock_t *)proto_handle; 1520 1521 trill_do_close(tsock); 1522 return (0); 1523 } 1524 1525 /* ARGSUSED */ 1526 static int 1527 trill_bind(sock_lower_handle_t proto_handle, struct sockaddr *sa, 1528 socklen_t len, cred_t *cr) 1529 { 1530 int error; 1531 trill_sock_t *tsock = (trill_sock_t *)proto_handle; 1532 1533 if (sa == NULL) 1534 error = trill_do_unbind(tsock); 1535 else 1536 error = trill_start_recv(tsock, sa, len); 1537 1538 return (error); 1539 } 1540 1541 /* ARGSUSED */ 1542 static int 1543 trill_send(sock_lower_handle_t proto_handle, mblk_t *mp, struct nmsghdr *msg, 1544 cred_t *cr) 1545 { 1546 trill_sock_t *tsock = (trill_sock_t *)proto_handle; 1547 struct sockaddr_dl *laddr; 1548 uint16_t tci; 1549 1550 ASSERT(DB_TYPE(mp) == M_DATA); 1551 ASSERT(!(tsock->ts_flags & TSF_SHUTDOWN)); 1552 1553 if (msg->msg_name == NULL || msg->msg_namelen != sizeof (*laddr)) 1554 goto eproto; 1555 1556 /* 1557 * The name is a datalink_id_t, the address is an Ethernet address, and 1558 * the selector value is the VLAN ID. 1559 */ 1560 laddr = (struct sockaddr_dl *)msg->msg_name; 1561 if (laddr->sdl_nlen != sizeof (datalink_id_t) || 1562 laddr->sdl_alen != ETHERADDRL || 1563 (laddr->sdl_slen != sizeof (tci) && laddr->sdl_slen != 0)) 1564 goto eproto; 1565 1566 mutex_enter(&tsock->ts_socklock); 1567 if (tsock->ts_state != TS_IDLE || tsock->ts_link == NULL) { 1568 mutex_exit(&tsock->ts_socklock); 1569 goto eproto; 1570 } 1571 atomic_inc_uint(&tsock->ts_sockthreadcount); 1572 mutex_exit(&tsock->ts_socklock); 1573 1574 /* 1575 * Safe to dereference VLAN now, as we've checked the user's specified 1576 * values, and alignment is now guaranteed. 1577 */ 1578 if (laddr->sdl_slen == 0) { 1579 tci = TRILL_NO_TCI; 1580 } else { 1581 /* LINTED: alignment */ 1582 tci = *(uint16_t *)(LLADDR(laddr) + ETHERADDRL); 1583 } 1584 1585 mp = create_trill_header(tsock, mp, (const uchar_t *)LLADDR(laddr), 1586 B_TRUE, B_FALSE, tci, msgdsize(mp)); 1587 if (mp != NULL) { 1588 mp = bridge_trill_output(tsock->ts_link, mp); 1589 if (mp == NULL) { 1590 KSPINCR(tks_sent); 1591 } else { 1592 freemsg(mp); 1593 KSPINCR(tks_drops); 1594 } 1595 } 1596 1597 /* Wake up any threads blocking on us */ 1598 if (atomic_dec_uint_nv(&tsock->ts_sockthreadcount) == 0) 1599 cv_broadcast(&tsock->ts_sockthreadwait); 1600 return (0); 1601 1602 eproto: 1603 freemsg(mp); 1604 KSPINCR(tks_drops); 1605 return (EPROTO); 1606 } 1607 1608 /* ARGSUSED */ 1609 static int 1610 trill_ioctl(sock_lower_handle_t proto_handle, int cmd, intptr_t arg, 1611 int mode, int32_t *rvalp, cred_t *cr) 1612 { 1613 trill_sock_t *tsock = (trill_sock_t *)proto_handle; 1614 int rc; 1615 1616 switch (cmd) { 1617 /* List of unprivileged TRILL ioctls */ 1618 case TRILL_GETNICK: 1619 case TRILL_GETBRIDGE: 1620 case TRILL_LISTNICK: 1621 break; 1622 default: 1623 if (secpolicy_dl_config(cr) != 0) 1624 return (EPERM); 1625 break; 1626 } 1627 1628 /* Lock ensures socket state is unchanged during ioctl handling */ 1629 mutex_enter(&tsock->ts_socklock); 1630 rc = trill_do_ioctl(tsock, cmd, (void *)arg, mode); 1631 mutex_exit(&tsock->ts_socklock); 1632 return (rc); 1633 } 1634 1635 static void 1636 trill_clr_flowctrl(sock_lower_handle_t proto_handle) 1637 { 1638 trill_sock_t *tsock = (trill_sock_t *)proto_handle; 1639 1640 mutex_enter(&tsock->ts_socklock); 1641 tsock->ts_flow_ctrld = B_FALSE; 1642 mutex_exit(&tsock->ts_socklock); 1643 } 1644 1645 static sock_downcalls_t sock_trill_downcalls = { 1646 trill_activate, /* sd_activate */ 1647 sock_accept_notsupp, /* sd_accept */ 1648 trill_bind, /* sd_bind */ 1649 sock_listen_notsupp, /* sd_listen */ 1650 sock_connect_notsupp, /* sd_connect */ 1651 sock_getpeername_notsupp, /* sd_getpeername */ 1652 sock_getsockname_notsupp, /* sd_getsockname */ 1653 sock_getsockopt_notsupp, /* sd_getsockopt */ 1654 sock_setsockopt_notsupp, /* sd_setsockopt */ 1655 trill_send, /* sd_send */ 1656 NULL, /* sd_send_uio */ 1657 NULL, /* sd_recv_uio */ 1658 NULL, /* sd_poll */ 1659 sock_shutdown_notsupp, /* sd_shutdown */ 1660 trill_clr_flowctrl, /* sd_setflowctrl */ 1661 trill_ioctl, /* sd_ioctl */ 1662 trill_close /* sd_close */ 1663 }; 1664 1665 /* ARGSUSED */ 1666 static sock_lower_handle_t 1667 trill_create(int family, int type, int proto, sock_downcalls_t **sock_downcalls, 1668 uint_t *smodep, int *errorp, int flags, cred_t *credp) 1669 { 1670 trill_sock_t *tsock; 1671 1672 if (family != AF_TRILL || type != SOCK_DGRAM || proto != 0) { 1673 *errorp = EPROTONOSUPPORT; 1674 return (NULL); 1675 } 1676 1677 *sock_downcalls = &sock_trill_downcalls; 1678 *smodep = SM_ATOMIC; 1679 tsock = trill_do_open(flags); 1680 *errorp = (tsock != NULL) ? 0:ENOMEM; 1681 return ((sock_lower_handle_t)tsock); 1682 }