1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2010 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 * Copyright 2012 OmniTI Computer Consulting, Inc All rights reserved. 25 */ 26 27 /* 28 * IEEE 802.3ad Link Aggregation - Link Aggregation MAC ports. 29 * 30 * Implements the functions needed to manage the MAC ports that are 31 * part of Link Aggregation groups. 32 */ 33 34 #include <sys/types.h> 35 #include <sys/sysmacros.h> 36 #include <sys/conf.h> 37 #include <sys/cmn_err.h> 38 #include <sys/id_space.h> 39 #include <sys/list.h> 40 #include <sys/ksynch.h> 41 #include <sys/kmem.h> 42 #include <sys/stream.h> 43 #include <sys/modctl.h> 44 #include <sys/ddi.h> 45 #include <sys/sunddi.h> 46 #include <sys/atomic.h> 47 #include <sys/stat.h> 48 #include <sys/sdt.h> 49 #include <sys/dlpi.h> 50 #include <sys/dls.h> 51 #include <sys/aggr.h> 52 #include <sys/aggr_impl.h> 53 54 static kmem_cache_t *aggr_port_cache; 55 static id_space_t *aggr_portids; 56 57 static void aggr_port_notify_cb(void *, mac_notify_type_t); 58 59 /*ARGSUSED*/ 60 static int 61 aggr_port_constructor(void *buf, void *arg, int kmflag) 62 { 63 bzero(buf, sizeof (aggr_port_t)); 64 return (0); 65 } 66 67 /*ARGSUSED*/ 68 static void 69 aggr_port_destructor(void *buf, void *arg) 70 { 71 aggr_port_t *port = buf; 72 73 ASSERT(port->lp_mnh == NULL); 74 ASSERT(port->lp_mphp == NULL); 75 ASSERT(!port->lp_rx_grp_added && !port->lp_tx_grp_added); 76 ASSERT(port->lp_hwgh == NULL); 77 } 78 79 void 80 aggr_port_init(void) 81 { 82 aggr_port_cache = kmem_cache_create("aggr_port_cache", 83 sizeof (aggr_port_t), 0, aggr_port_constructor, 84 aggr_port_destructor, NULL, NULL, NULL, 0); 85 86 /* 87 * Allocate a id space to manage port identification. The range of 88 * the arena will be from 1 to UINT16_MAX, because the LACP protocol 89 * specifies 16-bit unique identification. 90 */ 91 aggr_portids = id_space_create("aggr_portids", 1, UINT16_MAX); 92 ASSERT(aggr_portids != NULL); 93 } 94 95 void 96 aggr_port_fini(void) 97 { 98 /* 99 * This function is called only after all groups have been 100 * freed. This ensures that there are no remaining allocated 101 * ports when this function is invoked. 102 */ 103 kmem_cache_destroy(aggr_port_cache); 104 id_space_destroy(aggr_portids); 105 } 106 107 /* ARGSUSED */ 108 void 109 aggr_port_init_callbacks(aggr_port_t *port) 110 { 111 /* add the port's receive callback */ 112 port->lp_mnh = mac_notify_add(port->lp_mh, aggr_port_notify_cb, port); 113 /* 114 * Hold a reference of the grp and the port and this reference will 115 * be released when the thread exits. 116 * 117 * The reference on the port is used for aggr_port_delete() to 118 * continue without waiting for the thread to exit; the reference 119 * on the grp is used for aggr_grp_delete() to wait for the thread 120 * to exit before calling mac_unregister(). 121 * 122 * Note that these references will be released either in 123 * aggr_port_delete() when mac_notify_remove() succeeds, or in 124 * the aggr_port_notify_cb() callback when the port is deleted 125 * (lp_closing is set). 126 */ 127 aggr_grp_port_hold(port); 128 } 129 130 /* ARGSUSED */ 131 int 132 aggr_port_create(aggr_grp_t *grp, const datalink_id_t linkid, boolean_t force, 133 aggr_port_t **pp) 134 { 135 int err; 136 mac_handle_t mh; 137 mac_client_handle_t mch = NULL; 138 aggr_port_t *port; 139 uint16_t portid; 140 uint_t i; 141 boolean_t no_link_update = B_FALSE; 142 const mac_info_t *mip; 143 uint32_t note; 144 uint32_t margin; 145 char client_name[MAXNAMELEN]; 146 char aggr_name[MAXNAMELEN]; 147 char port_name[MAXNAMELEN]; 148 mac_diag_t diag; 149 mac_unicast_handle_t mah; 150 151 *pp = NULL; 152 153 if ((err = mac_open_by_linkid(linkid, &mh)) != 0) 154 return (err); 155 156 mip = mac_info(mh); 157 if (mip->mi_media != DL_ETHER || mip->mi_nativemedia != DL_ETHER) { 158 err = EINVAL; 159 goto fail; 160 } 161 162 /* 163 * If the underlying MAC does not support link update notification, it 164 * can only be aggregated if `force' is set. This is because aggr 165 * depends on link notifications to attach ports whose link is up. 166 */ 167 note = mac_no_notification(mh); 168 if ((note & (DL_NOTE_LINK_UP | DL_NOTE_LINK_DOWN)) != 0) { 169 no_link_update = B_TRUE; 170 if (!force) { 171 /* 172 * We borrow this error code to indicate that link 173 * notification is not supported. 174 */ 175 err = ENETDOWN; 176 goto fail; 177 } 178 } 179 180 if (((err = dls_mgmt_get_linkinfo(grp->lg_linkid, 181 aggr_name, NULL, NULL, NULL)) != 0) || 182 ((err = dls_mgmt_get_linkinfo(linkid, port_name, 183 NULL, NULL, NULL)) != 0)) { 184 goto fail; 185 } 186 187 (void) snprintf(client_name, MAXNAMELEN, "%s-%s", aggr_name, port_name); 188 if ((err = mac_client_open(mh, &mch, client_name, 189 MAC_OPEN_FLAGS_IS_AGGR_PORT | MAC_OPEN_FLAGS_EXCLUSIVE)) != 0) { 190 goto fail; 191 } 192 193 if ((portid = (uint16_t)id_alloc(aggr_portids)) == 0) { 194 err = ENOMEM; 195 goto fail; 196 } 197 198 /* 199 * As the underlying mac's current margin size is used to determine 200 * the margin size of the aggregation itself, request the underlying 201 * mac not to change to a smaller size. 202 */ 203 if ((err = mac_margin_add(mh, &margin, B_TRUE)) != 0) { 204 id_free(aggr_portids, portid); 205 goto fail; 206 } 207 208 if ((err = mac_unicast_add(mch, NULL, MAC_UNICAST_PRIMARY | 209 MAC_UNICAST_DISABLE_TX_VID_CHECK, &mah, 0, &diag)) != 0) { 210 VERIFY(mac_margin_remove(mh, margin) == 0); 211 id_free(aggr_portids, portid); 212 goto fail; 213 } 214 215 port = kmem_cache_alloc(aggr_port_cache, KM_SLEEP); 216 217 port->lp_refs = 1; 218 port->lp_next = NULL; 219 port->lp_mh = mh; 220 port->lp_mch = mch; 221 port->lp_mip = mip; 222 port->lp_linkid = linkid; 223 port->lp_closing = B_FALSE; 224 port->lp_mah = mah; 225 226 /* get the port's original MAC address */ 227 mac_unicast_primary_get(port->lp_mh, port->lp_addr); 228 229 /* initialize state */ 230 port->lp_state = AGGR_PORT_STATE_STANDBY; 231 port->lp_link_state = LINK_STATE_UNKNOWN; 232 port->lp_ifspeed = 0; 233 port->lp_link_duplex = LINK_DUPLEX_UNKNOWN; 234 port->lp_started = B_FALSE; 235 port->lp_tx_enabled = B_FALSE; 236 port->lp_promisc_on = B_FALSE; 237 port->lp_no_link_update = no_link_update; 238 port->lp_portid = portid; 239 port->lp_margin = margin; 240 port->lp_prom_addr = NULL; 241 242 /* 243 * Save the current statistics of the port. They will be used 244 * later by aggr_m_stats() when aggregating the statistics of 245 * the constituent ports. 246 */ 247 for (i = 0; i < MAC_NSTAT; i++) { 248 port->lp_stat[i] = 249 aggr_port_stat(port, i + MAC_STAT_MIN); 250 } 251 for (i = 0; i < ETHER_NSTAT; i++) { 252 port->lp_ether_stat[i] = 253 aggr_port_stat(port, i + MACTYPE_STAT_MIN); 254 } 255 256 /* LACP related state */ 257 port->lp_collector_enabled = B_FALSE; 258 259 *pp = port; 260 return (0); 261 262 fail: 263 if (mch != NULL) 264 mac_client_close(mch, MAC_CLOSE_FLAGS_EXCLUSIVE); 265 mac_close(mh); 266 return (err); 267 } 268 269 void 270 aggr_port_delete(aggr_port_t *port) 271 { 272 aggr_lacp_port_t *pl = &port->lp_lacp; 273 274 ASSERT(port->lp_mphp == NULL); 275 ASSERT(!port->lp_promisc_on); 276 277 port->lp_closing = B_TRUE; 278 279 VERIFY(mac_margin_remove(port->lp_mh, port->lp_margin) == 0); 280 mac_rx_clear(port->lp_mch); 281 /* 282 * If the notification callback is already in process and waiting for 283 * the aggr grp's mac perimeter, don't wait (otherwise there would be 284 * deadlock). Otherwise, if mac_notify_remove() succeeds, we can 285 * release the reference held when mac_notify_add() is called. 286 */ 287 if ((port->lp_mnh != NULL) && 288 (mac_notify_remove(port->lp_mnh, B_FALSE) == 0)) { 289 aggr_grp_port_rele(port); 290 } 291 port->lp_mnh = NULL; 292 293 /* 294 * Inform the the port lacp timer thread to exit. Note that waiting 295 * for the thread to exit may cause deadlock since that thread may 296 * need to enter into the mac perimeter which we are currently in. 297 * It is fine to continue without waiting though since that thread 298 * is holding a reference of the port. 299 */ 300 mutex_enter(&pl->lacp_timer_lock); 301 pl->lacp_timer_bits |= LACP_THREAD_EXIT; 302 cv_broadcast(&pl->lacp_timer_cv); 303 mutex_exit(&pl->lacp_timer_lock); 304 305 /* 306 * Restore the port MAC address. Note it is called after the 307 * port's notification callback being removed. This prevent 308 * port's MAC_NOTE_UNICST notify callback function being called. 309 */ 310 (void) mac_unicast_primary_set(port->lp_mh, port->lp_addr); 311 if (port->lp_mah != NULL) 312 (void) mac_unicast_remove(port->lp_mch, port->lp_mah); 313 mac_client_close(port->lp_mch, MAC_CLOSE_FLAGS_EXCLUSIVE); 314 mac_close(port->lp_mh); 315 AGGR_PORT_REFRELE(port); 316 } 317 318 void 319 aggr_port_free(aggr_port_t *port) 320 { 321 ASSERT(port->lp_refs == 0); 322 if (port->lp_grp != NULL) 323 AGGR_GRP_REFRELE(port->lp_grp); 324 port->lp_grp = NULL; 325 id_free(aggr_portids, port->lp_portid); 326 port->lp_portid = 0; 327 mutex_destroy(&port->lp_lacp.lacp_timer_lock); 328 cv_destroy(&port->lp_lacp.lacp_timer_cv); 329 kmem_cache_free(aggr_port_cache, port); 330 } 331 332 /* 333 * Invoked upon receiving a MAC_NOTE_LINK notification for 334 * one of the constituent ports. 335 */ 336 boolean_t 337 aggr_port_notify_link(aggr_grp_t *grp, aggr_port_t *port) 338 { 339 boolean_t do_attach = B_FALSE; 340 boolean_t do_detach = B_FALSE; 341 boolean_t link_state_changed = B_TRUE; 342 uint64_t ifspeed; 343 link_state_t link_state; 344 link_duplex_t link_duplex; 345 mac_perim_handle_t mph; 346 347 ASSERT(MAC_PERIM_HELD(grp->lg_mh)); 348 mac_perim_enter_by_mh(port->lp_mh, &mph); 349 350 /* 351 * link state change? For links that do not support link state 352 * notification, always assume the link is up. 353 */ 354 link_state = port->lp_no_link_update ? LINK_STATE_UP : 355 mac_link_get(port->lp_mh); 356 if (port->lp_link_state != link_state) { 357 if (link_state == LINK_STATE_UP) 358 do_attach = (port->lp_link_state != LINK_STATE_UP); 359 else 360 do_detach = (port->lp_link_state == LINK_STATE_UP); 361 } 362 port->lp_link_state = link_state; 363 364 /* link duplex change? */ 365 link_duplex = aggr_port_stat(port, ETHER_STAT_LINK_DUPLEX); 366 if (port->lp_link_duplex != link_duplex) { 367 if (link_duplex == LINK_DUPLEX_FULL) 368 do_attach |= (port->lp_link_duplex != LINK_DUPLEX_FULL); 369 else 370 do_detach |= (port->lp_link_duplex == LINK_DUPLEX_FULL); 371 } 372 port->lp_link_duplex = link_duplex; 373 374 /* link speed changes? */ 375 ifspeed = aggr_port_stat(port, MAC_STAT_IFSPEED); 376 if (port->lp_ifspeed != ifspeed) { 377 if (port->lp_state == AGGR_PORT_STATE_ATTACHED) 378 do_detach |= (ifspeed != grp->lg_ifspeed); 379 else 380 do_attach |= (ifspeed == grp->lg_ifspeed); 381 } 382 port->lp_ifspeed = ifspeed; 383 384 if (do_attach) { 385 /* attempt to attach the port to the aggregation */ 386 link_state_changed = aggr_grp_attach_port(grp, port); 387 } else if (do_detach) { 388 /* detach the port from the aggregation */ 389 link_state_changed = aggr_grp_detach_port(grp, port); 390 } 391 392 mac_perim_exit(mph); 393 return (link_state_changed); 394 } 395 396 /* 397 * Invoked upon receiving a MAC_NOTE_UNICST for one of the constituent 398 * ports of a group. 399 */ 400 static void 401 aggr_port_notify_unicst(aggr_grp_t *grp, aggr_port_t *port, 402 boolean_t *mac_addr_changedp, boolean_t *link_state_changedp) 403 { 404 boolean_t mac_addr_changed = B_FALSE; 405 boolean_t link_state_changed = B_FALSE; 406 uint8_t mac_addr[ETHERADDRL]; 407 mac_perim_handle_t mph; 408 409 ASSERT(MAC_PERIM_HELD(grp->lg_mh)); 410 ASSERT(mac_addr_changedp != NULL); 411 ASSERT(link_state_changedp != NULL); 412 mac_perim_enter_by_mh(port->lp_mh, &mph); 413 414 /* 415 * If it is called when setting the MAC address to the 416 * aggregation group MAC address, do nothing. 417 */ 418 mac_unicast_primary_get(port->lp_mh, mac_addr); 419 if (bcmp(mac_addr, grp->lg_addr, ETHERADDRL) == 0) { 420 mac_perim_exit(mph); 421 goto done; 422 } 423 424 /* save the new port MAC address */ 425 bcopy(mac_addr, port->lp_addr, ETHERADDRL); 426 427 aggr_grp_port_mac_changed(grp, port, &mac_addr_changed, 428 &link_state_changed); 429 430 mac_perim_exit(mph); 431 432 /* 433 * If this port was used to determine the MAC address of 434 * the group, update the MAC address of the constituent 435 * ports. 436 */ 437 if (mac_addr_changed && aggr_grp_update_ports_mac(grp)) 438 link_state_changed = B_TRUE; 439 440 done: 441 *mac_addr_changedp = mac_addr_changed; 442 *link_state_changedp = link_state_changed; 443 } 444 445 /* 446 * Notification callback invoked by the MAC service module for 447 * a particular MAC port. 448 */ 449 static void 450 aggr_port_notify_cb(void *arg, mac_notify_type_t type) 451 { 452 aggr_port_t *port = arg; 453 aggr_grp_t *grp = port->lp_grp; 454 boolean_t mac_addr_changed, link_state_changed; 455 mac_perim_handle_t mph; 456 457 mac_perim_enter_by_mh(grp->lg_mh, &mph); 458 if (port->lp_closing) { 459 mac_perim_exit(mph); 460 461 /* 462 * Release the reference so it is safe for aggr to call 463 * mac_unregister() now. 464 */ 465 aggr_grp_port_rele(port); 466 return; 467 } 468 469 switch (type) { 470 case MAC_NOTE_TX: 471 mac_tx_update(grp->lg_mh); 472 break; 473 case MAC_NOTE_LINK: 474 if (aggr_port_notify_link(grp, port)) 475 mac_link_update(grp->lg_mh, grp->lg_link_state); 476 break; 477 case MAC_NOTE_UNICST: 478 aggr_port_notify_unicst(grp, port, &mac_addr_changed, 479 &link_state_changed); 480 if (mac_addr_changed) 481 mac_unicst_update(grp->lg_mh, grp->lg_addr); 482 if (link_state_changed) 483 mac_link_update(grp->lg_mh, grp->lg_link_state); 484 break; 485 default: 486 break; 487 } 488 489 mac_perim_exit(mph); 490 } 491 492 int 493 aggr_port_start(aggr_port_t *port) 494 { 495 ASSERT(MAC_PERIM_HELD(port->lp_mh)); 496 497 if (port->lp_started) 498 return (0); 499 500 port->lp_started = B_TRUE; 501 aggr_grp_multicst_port(port, B_TRUE); 502 return (0); 503 } 504 505 void 506 aggr_port_stop(aggr_port_t *port) 507 { 508 ASSERT(MAC_PERIM_HELD(port->lp_mh)); 509 510 if (!port->lp_started) 511 return; 512 513 aggr_grp_multicst_port(port, B_FALSE); 514 515 /* update the port state */ 516 port->lp_started = B_FALSE; 517 } 518 519 int 520 aggr_port_promisc(aggr_port_t *port, boolean_t on) 521 { 522 int rc; 523 524 ASSERT(MAC_PERIM_HELD(port->lp_mh)); 525 526 if (on == port->lp_promisc_on) 527 /* already in desired promiscous mode */ 528 return (0); 529 530 if (on) { 531 mac_rx_clear(port->lp_mch); 532 /* We use the promisc callback because without hardware 533 * rings, we deliver through flows that will cause duplicate 534 * delivery of packets when we've flipped into this mode 535 * to compensate for the lack of hardware MAC matching 536 */ 537 rc = mac_promisc_add(port->lp_mch, MAC_CLIENT_PROMISC_ALL, 538 aggr_recv_promisc_cb, port, &port->lp_mphp, 539 MAC_PROMISC_FLAGS_NO_TX_LOOP); 540 if (rc != 0) { 541 mac_rx_set(port->lp_mch, aggr_recv_cb, port); 542 return (rc); 543 } 544 } else { 545 mac_promisc_remove(port->lp_mphp); 546 port->lp_mphp = NULL; 547 mac_rx_set(port->lp_mch, aggr_recv_cb, port); 548 } 549 550 port->lp_promisc_on = on; 551 552 return (0); 553 } 554 555 /* 556 * Set the MAC address of a port. 557 */ 558 int 559 aggr_port_unicst(aggr_port_t *port) 560 { 561 aggr_grp_t *grp = port->lp_grp; 562 563 ASSERT(MAC_PERIM_HELD(grp->lg_mh)); 564 ASSERT(MAC_PERIM_HELD(port->lp_mh)); 565 566 return (mac_unicast_primary_set(port->lp_mh, grp->lg_addr)); 567 } 568 569 /* 570 * Add or remove a multicast address to/from a port. 571 */ 572 int 573 aggr_port_multicst(void *arg, boolean_t add, const uint8_t *addrp) 574 { 575 aggr_port_t *port = arg; 576 577 if (add) { 578 return (mac_multicast_add(port->lp_mch, addrp)); 579 } else { 580 mac_multicast_remove(port->lp_mch, addrp); 581 return (0); 582 } 583 } 584 585 uint64_t 586 aggr_port_stat(aggr_port_t *port, uint_t stat) 587 { 588 return (mac_stat_get(port->lp_mh, stat)); 589 } 590 591 /* 592 * Add a non-primary unicast address to the underlying port. If the port 593 * supports HW Rx group, try to add the address into the HW Rx group of 594 * the port first. If that fails, or if the port does not support HW Rx 595 * group, enable the port's promiscous mode. 596 */ 597 int 598 aggr_port_addmac(aggr_port_t *port, const uint8_t *mac_addr) 599 { 600 aggr_unicst_addr_t *addr, **pprev; 601 mac_perim_handle_t pmph; 602 int err; 603 604 ASSERT(MAC_PERIM_HELD(port->lp_grp->lg_mh)); 605 mac_perim_enter_by_mh(port->lp_mh, &pmph); 606 607 /* 608 * If the underlying port support HW Rx group, add the mac to its 609 * RX group directly. 610 */ 611 if ((port->lp_hwgh != NULL) && 612 ((mac_hwgroup_addmac(port->lp_hwgh, mac_addr)) == 0)) { 613 mac_perim_exit(pmph); 614 return (0); 615 } 616 617 /* 618 * If that fails, or if the port does not support HW Rx group, enable 619 * the port's promiscous mode. (Note that we turn on the promiscous 620 * mode only if the port is already started. 621 */ 622 if (port->lp_started && 623 ((err = aggr_port_promisc(port, B_TRUE)) != 0)) { 624 mac_perim_exit(pmph); 625 return (err); 626 } 627 628 /* 629 * Walk through the unicast addresses that requires promiscous mode 630 * enabled on this port, and add this address to the end of the list. 631 */ 632 pprev = &port->lp_prom_addr; 633 while ((addr = *pprev) != NULL) { 634 ASSERT(bcmp(mac_addr, addr->aua_addr, ETHERADDRL) != 0); 635 pprev = &addr->aua_next; 636 } 637 addr = kmem_alloc(sizeof (aggr_unicst_addr_t), KM_SLEEP); 638 bcopy(mac_addr, addr->aua_addr, ETHERADDRL); 639 addr->aua_next = NULL; 640 *pprev = addr; 641 mac_perim_exit(pmph); 642 return (0); 643 } 644 645 /* 646 * Remove a non-primary unicast address from the underlying port. This address 647 * must has been added by aggr_port_addmac(). As a result, we probably need to 648 * remove the address from the port's HW Rx group, or to disable the port's 649 * promiscous mode. 650 */ 651 void 652 aggr_port_remmac(aggr_port_t *port, const uint8_t *mac_addr) 653 { 654 aggr_grp_t *grp = port->lp_grp; 655 aggr_unicst_addr_t *addr, **pprev; 656 mac_perim_handle_t pmph; 657 658 ASSERT(MAC_PERIM_HELD(grp->lg_mh)); 659 mac_perim_enter_by_mh(port->lp_mh, &pmph); 660 661 /* 662 * See whether this address is in the list of addresses that requires 663 * the port being promiscous mode. 664 */ 665 pprev = &port->lp_prom_addr; 666 while ((addr = *pprev) != NULL) { 667 if (bcmp(mac_addr, addr->aua_addr, ETHERADDRL) == 0) 668 break; 669 pprev = &addr->aua_next; 670 } 671 if (addr != NULL) { 672 /* 673 * This unicast address put the port into the promiscous mode, 674 * delete this address from the lp_prom_addr list. If this is 675 * the last address in that list, disable the promiscous mode 676 * if the aggregation is not in promiscous mode. 677 */ 678 *pprev = addr->aua_next; 679 kmem_free(addr, sizeof (aggr_unicst_addr_t)); 680 if (port->lp_prom_addr == NULL && !grp->lg_promisc) 681 (void) aggr_port_promisc(port, B_FALSE); 682 } else { 683 ASSERT(port->lp_hwgh != NULL); 684 (void) mac_hwgroup_remmac(port->lp_hwgh, mac_addr); 685 } 686 mac_perim_exit(pmph); 687 }