1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 23 */ 24 25 /* 26 * IEEE 802.3ad Link Aggregation - LACP & Marker Protocol processing. 27 */ 28 29 #include <sys/types.h> 30 #include <sys/sysmacros.h> 31 #include <sys/callb.h> 32 #include <sys/conf.h> 33 #include <sys/cmn_err.h> 34 #include <sys/disp.h> 35 #include <sys/list.h> 36 #include <sys/ksynch.h> 37 #include <sys/kmem.h> 38 #include <sys/stream.h> 39 #include <sys/modctl.h> 40 #include <sys/ddi.h> 41 #include <sys/sunddi.h> 42 #include <sys/atomic.h> 43 #include <sys/stat.h> 44 #include <sys/byteorder.h> 45 #include <sys/strsun.h> 46 #include <sys/isa_defs.h> 47 #include <sys/sdt.h> 48 49 #include <sys/aggr.h> 50 #include <sys/aggr_impl.h> 51 52 static struct ether_addr etherzeroaddr = { 53 {0x00, 0x00, 0x00, 0x00, 0x00, 0x00} 54 }; 55 56 /* 57 * Slow_Protocol_Multicast address, as per IEEE 802.3ad spec. 58 */ 59 static struct ether_addr slow_multicast_addr = { 60 {0x01, 0x80, 0xc2, 0x00, 0x00, 0x02} 61 }; 62 63 #ifdef DEBUG 64 /* LACP state machine debugging support */ 65 static uint32_t aggr_lacp_debug = 0; 66 #define AGGR_LACP_DBG(x) if (aggr_lacp_debug) { (void) printf x; } 67 #else 68 #define AGGR_LACP_DBG(x) {} 69 #endif /* DEBUG */ 70 71 #define NSECS_PER_SEC 1000000000ll 72 73 /* used by lacp_misconfig_walker() */ 74 typedef struct lacp_misconfig_check_state_s { 75 aggr_port_t *cs_portp; 76 boolean_t cs_found; 77 } lacp_misconfig_check_state_t; 78 79 static const char *lacp_receive_str[] = LACP_RECEIVE_STATE_STRINGS; 80 static const char *lacp_periodic_str[] = LACP_PERIODIC_STRINGS; 81 static const char *lacp_mux_str[] = LACP_MUX_STRINGS; 82 83 static uint16_t lacp_port_priority = 0x1000; 84 static uint16_t lacp_system_priority = 0x1000; 85 86 /* 87 * Maintains a list of all ports in ATTACHED state. This information 88 * is used to detect misconfiguration. 89 */ 90 typedef struct lacp_sel_ports { 91 datalink_id_t sp_grp_linkid; 92 datalink_id_t sp_linkid; 93 /* Note: sp_partner_system must be 2-byte aligned */ 94 struct ether_addr sp_partner_system; 95 uint32_t sp_partner_key; 96 struct lacp_sel_ports *sp_next; 97 } lacp_sel_ports_t; 98 99 static lacp_sel_ports_t *sel_ports = NULL; 100 static kmutex_t lacp_sel_lock; 101 102 static void periodic_timer_pop(void *); 103 static void periodic_timer_pop_handler(aggr_port_t *); 104 static void lacp_xmit_sm(aggr_port_t *); 105 static void lacp_periodic_sm(aggr_port_t *); 106 static void fill_lacp_pdu(aggr_port_t *, lacp_t *); 107 static void fill_lacp_ether(aggr_port_t *, struct ether_header *); 108 static void lacp_on(aggr_port_t *); 109 static void lacp_off(aggr_port_t *); 110 static boolean_t valid_lacp_pdu(aggr_port_t *, lacp_t *); 111 static void lacp_receive_sm(aggr_port_t *, lacp_t *); 112 static void aggr_set_coll_dist(aggr_port_t *, boolean_t); 113 static void start_wait_while_timer(aggr_port_t *); 114 static void stop_wait_while_timer(aggr_port_t *); 115 static void lacp_reset_port(aggr_port_t *); 116 static void stop_current_while_timer(aggr_port_t *); 117 static void current_while_timer_pop(void *); 118 static void current_while_timer_pop_handler(aggr_port_t *); 119 static void update_default_selected(aggr_port_t *); 120 static boolean_t update_selected(aggr_port_t *, lacp_t *); 121 static boolean_t lacp_sel_ports_add(aggr_port_t *); 122 static void lacp_sel_ports_del(aggr_port_t *); 123 static void wait_while_timer_pop(void *); 124 static void wait_while_timer_pop_handler(aggr_port_t *); 125 126 void 127 aggr_lacp_init(void) 128 { 129 mutex_init(&lacp_sel_lock, NULL, MUTEX_DEFAULT, NULL); 130 } 131 132 void 133 aggr_lacp_fini(void) 134 { 135 mutex_destroy(&lacp_sel_lock); 136 } 137 138 /* 139 * The following functions are used for handling LACP timers. 140 * 141 * Note that we cannot fully rely on the aggr's mac perimeter in the timeout 142 * handler routine, otherwise it may cause deadlock with the untimeout() call 143 * which is usually called with the mac perimeter held. Instead, a 144 * lacp_timer_lock mutex is introduced, which protects a bitwise flag 145 * (lacp_timer_bits). This flag is set/cleared by timeout()/stop_timer() 146 * routines and is checked by a dedicated thread, that executes the real 147 * timeout operation. 148 */ 149 static void 150 aggr_port_timer_thread(void *arg) 151 { 152 aggr_port_t *port = arg; 153 aggr_lacp_port_t *pl = &port->lp_lacp; 154 aggr_grp_t *grp = port->lp_grp; 155 uint32_t lacp_timer_bits; 156 mac_perim_handle_t mph; 157 callb_cpr_t cprinfo; 158 159 CALLB_CPR_INIT(&cprinfo, &pl->lacp_timer_lock, callb_generic_cpr, 160 "aggr_port_timer_thread"); 161 162 mutex_enter(&pl->lacp_timer_lock); 163 164 for (;;) { 165 166 if ((lacp_timer_bits = pl->lacp_timer_bits) == 0) { 167 CALLB_CPR_SAFE_BEGIN(&cprinfo); 168 cv_wait(&pl->lacp_timer_cv, &pl->lacp_timer_lock); 169 CALLB_CPR_SAFE_END(&cprinfo, &pl->lacp_timer_lock); 170 continue; 171 } 172 pl->lacp_timer_bits = 0; 173 174 if (lacp_timer_bits & LACP_THREAD_EXIT) 175 break; 176 177 if (lacp_timer_bits & LACP_PERIODIC_TIMEOUT) 178 pl->periodic_timer.id = 0; 179 if (lacp_timer_bits & LACP_WAIT_WHILE_TIMEOUT) 180 pl->wait_while_timer.id = 0; 181 if (lacp_timer_bits & LACP_CURRENT_WHILE_TIMEOUT) 182 pl->current_while_timer.id = 0; 183 184 mutex_exit(&pl->lacp_timer_lock); 185 186 mac_perim_enter_by_mh(grp->lg_mh, &mph); 187 if (port->lp_closing) { 188 mac_perim_exit(mph); 189 mutex_enter(&pl->lacp_timer_lock); 190 break; 191 } 192 193 if (lacp_timer_bits & LACP_PERIODIC_TIMEOUT) 194 periodic_timer_pop_handler(port); 195 if (lacp_timer_bits & LACP_WAIT_WHILE_TIMEOUT) 196 wait_while_timer_pop_handler(port); 197 if (lacp_timer_bits & LACP_CURRENT_WHILE_TIMEOUT) 198 current_while_timer_pop_handler(port); 199 mac_perim_exit(mph); 200 201 mutex_enter(&pl->lacp_timer_lock); 202 if (pl->lacp_timer_bits & LACP_THREAD_EXIT) 203 break; 204 } 205 206 pl->lacp_timer_bits = 0; 207 pl->lacp_timer_thread = NULL; 208 cv_broadcast(&pl->lacp_timer_cv); 209 210 /* CALLB_CPR_EXIT drops the lock */ 211 CALLB_CPR_EXIT(&cprinfo); 212 213 /* 214 * Release the reference of the grp so aggr_grp_delete() can call 215 * mac_unregister() safely. 216 */ 217 aggr_grp_port_rele(port); 218 thread_exit(); 219 } 220 221 /* 222 * Set the port LACP state to SELECTED. Returns B_FALSE if the operation 223 * could not be performed due to a memory allocation error, B_TRUE otherwise. 224 */ 225 static boolean_t 226 lacp_port_select(aggr_port_t *portp) 227 { 228 ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh)); 229 230 if (!lacp_sel_ports_add(portp)) 231 return (B_FALSE); 232 portp->lp_lacp.sm.selected = AGGR_SELECTED; 233 return (B_TRUE); 234 } 235 236 /* 237 * Set the port LACP state to UNSELECTED. 238 */ 239 static void 240 lacp_port_unselect(aggr_port_t *portp) 241 { 242 aggr_grp_t *grp = portp->lp_grp; 243 244 ASSERT((grp->lg_mh == NULL) || MAC_PERIM_HELD(grp->lg_mh)); 245 246 lacp_sel_ports_del(portp); 247 portp->lp_lacp.sm.selected = AGGR_UNSELECTED; 248 } 249 250 /* 251 * Initialize group specific LACP state and parameters. 252 */ 253 void 254 aggr_lacp_init_grp(aggr_grp_t *aggrp) 255 { 256 aggrp->aggr.PeriodicTimer = AGGR_LACP_TIMER_SHORT; 257 aggrp->aggr.ActorSystemPriority = (uint16_t)lacp_system_priority; 258 aggrp->aggr.CollectorMaxDelay = 10; 259 aggrp->lg_lacp_mode = AGGR_LACP_OFF; 260 aggrp->aggr.ready = B_FALSE; 261 } 262 263 /* 264 * Complete LACP info initialization at port creation time. 265 */ 266 void 267 aggr_lacp_init_port(aggr_port_t *portp) 268 { 269 aggr_grp_t *aggrp = portp->lp_grp; 270 aggr_lacp_port_t *pl = &portp->lp_lacp; 271 272 ASSERT(aggrp->lg_mh == NULL || MAC_PERIM_HELD(aggrp->lg_mh)); 273 ASSERT(MAC_PERIM_HELD(portp->lp_mh)); 274 275 /* actor port # */ 276 pl->ActorPortNumber = portp->lp_portid; 277 AGGR_LACP_DBG(("aggr_lacp_init_port(%d): " 278 "ActorPortNumber = 0x%x\n", portp->lp_linkid, 279 pl->ActorPortNumber)); 280 281 pl->ActorPortPriority = (uint16_t)lacp_port_priority; 282 pl->ActorPortAggrId = 0; /* aggregator id - not used */ 283 pl->NTT = B_FALSE; /* need to transmit */ 284 285 pl->ActorAdminPortKey = aggrp->lg_key; 286 pl->ActorOperPortKey = pl->ActorAdminPortKey; 287 AGGR_LACP_DBG(("aggr_lacp_init_port(%d) " 288 "ActorAdminPortKey = 0x%x, ActorAdminPortKey = 0x%x\n", 289 portp->lp_linkid, pl->ActorAdminPortKey, pl->ActorOperPortKey)); 290 291 /* Actor admin. port state */ 292 pl->ActorAdminPortState.bit.activity = B_FALSE; 293 pl->ActorAdminPortState.bit.timeout = B_TRUE; 294 pl->ActorAdminPortState.bit.aggregation = B_TRUE; 295 pl->ActorAdminPortState.bit.sync = B_FALSE; 296 pl->ActorAdminPortState.bit.collecting = B_FALSE; 297 pl->ActorAdminPortState.bit.distributing = B_FALSE; 298 pl->ActorAdminPortState.bit.defaulted = B_FALSE; 299 pl->ActorAdminPortState.bit.expired = B_FALSE; 300 pl->ActorOperPortState = pl->ActorAdminPortState; 301 302 /* 303 * Partner Administrative Information 304 * (All initialized to zero except for the following) 305 * Fast Timeouts. 306 */ 307 pl->PartnerAdminPortState.bit.timeout = 308 pl->PartnerOperPortState.bit.timeout = B_TRUE; 309 310 pl->PartnerCollectorMaxDelay = 0; /* tens of microseconds */ 311 312 /* 313 * State machine information. 314 */ 315 pl->sm.lacp_on = B_FALSE; /* LACP Off default */ 316 pl->sm.begin = B_TRUE; /* Prevents transmissions */ 317 pl->sm.lacp_enabled = B_FALSE; 318 pl->sm.port_enabled = B_FALSE; /* Link Down */ 319 pl->sm.actor_churn = B_FALSE; 320 pl->sm.partner_churn = B_FALSE; 321 pl->sm.ready_n = B_FALSE; 322 pl->sm.port_moved = B_FALSE; 323 324 lacp_port_unselect(portp); 325 326 pl->sm.periodic_state = LACP_NO_PERIODIC; 327 pl->sm.receive_state = LACP_INITIALIZE; 328 pl->sm.mux_state = LACP_DETACHED; 329 pl->sm.churn_state = LACP_NO_ACTOR_CHURN; 330 331 /* 332 * Timer information. 333 */ 334 pl->current_while_timer.id = 0; 335 pl->current_while_timer.val = SHORT_TIMEOUT_TIME; 336 337 pl->periodic_timer.id = 0; 338 pl->periodic_timer.val = FAST_PERIODIC_TIME; 339 340 pl->wait_while_timer.id = 0; 341 pl->wait_while_timer.val = AGGREGATE_WAIT_TIME; 342 343 pl->lacp_timer_bits = 0; 344 345 mutex_init(&pl->lacp_timer_lock, NULL, MUTEX_DRIVER, NULL); 346 cv_init(&pl->lacp_timer_cv, NULL, CV_DRIVER, NULL); 347 348 pl->lacp_timer_thread = thread_create(NULL, 0, aggr_port_timer_thread, 349 portp, 0, &p0, TS_RUN, minclsyspri); 350 351 /* 352 * Hold a reference of the grp and the port and this reference will 353 * be release when the thread exits. 354 * 355 * The reference on the port is used for aggr_port_delete() to 356 * continue without waiting for the thread to exit; the reference 357 * on the grp is used for aggr_grp_delete() to wait for the thread 358 * to exit before calling mac_unregister(). 359 */ 360 aggr_grp_port_hold(portp); 361 } 362 363 /* 364 * Port initialization when we need to 365 * turn LACP on/off, etc. Not everything is 366 * reset like in the above routine. 367 * Do NOT modify things like link status. 368 */ 369 static void 370 lacp_reset_port(aggr_port_t *portp) 371 { 372 aggr_lacp_port_t *pl = &portp->lp_lacp; 373 374 ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh)); 375 376 pl->NTT = B_FALSE; /* need to transmit */ 377 378 /* reset operational port state */ 379 pl->ActorOperPortState.bit.timeout = 380 pl->ActorAdminPortState.bit.timeout; 381 382 pl->ActorOperPortState.bit.sync = B_FALSE; 383 pl->ActorOperPortState.bit.collecting = B_FALSE; 384 pl->ActorOperPortState.bit.distributing = B_FALSE; 385 pl->ActorOperPortState.bit.defaulted = B_TRUE; 386 pl->ActorOperPortState.bit.expired = B_FALSE; 387 388 pl->PartnerOperPortState.bit.timeout = B_TRUE; /* fast t/o */ 389 pl->PartnerCollectorMaxDelay = 0; /* tens of microseconds */ 390 391 /* 392 * State machine information. 393 */ 394 pl->sm.begin = B_TRUE; /* Prevents transmissions */ 395 pl->sm.actor_churn = B_FALSE; 396 pl->sm.partner_churn = B_FALSE; 397 pl->sm.ready_n = B_FALSE; 398 399 lacp_port_unselect(portp); 400 401 pl->sm.periodic_state = LACP_NO_PERIODIC; 402 pl->sm.receive_state = LACP_INITIALIZE; 403 pl->sm.mux_state = LACP_DETACHED; 404 pl->sm.churn_state = LACP_NO_ACTOR_CHURN; 405 406 /* 407 * Timer information. 408 */ 409 pl->current_while_timer.val = SHORT_TIMEOUT_TIME; 410 pl->periodic_timer.val = FAST_PERIODIC_TIME; 411 } 412 413 static void 414 aggr_lacp_mcast_on(aggr_port_t *port) 415 { 416 ASSERT(MAC_PERIM_HELD(port->lp_grp->lg_mh)); 417 ASSERT(MAC_PERIM_HELD(port->lp_mh)); 418 419 if (port->lp_state != AGGR_PORT_STATE_ATTACHED) 420 return; 421 422 (void) aggr_port_multicst(port, B_TRUE, 423 (uchar_t *)&slow_multicast_addr); 424 } 425 426 static void 427 aggr_lacp_mcast_off(aggr_port_t *port) 428 { 429 ASSERT(MAC_PERIM_HELD(port->lp_grp->lg_mh)); 430 ASSERT(MAC_PERIM_HELD(port->lp_mh)); 431 432 if (port->lp_state != AGGR_PORT_STATE_ATTACHED) 433 return; 434 435 (void) aggr_port_multicst(port, B_FALSE, 436 (uchar_t *)&slow_multicast_addr); 437 } 438 439 static void 440 start_periodic_timer(aggr_port_t *portp) 441 { 442 aggr_lacp_port_t *pl = &portp->lp_lacp; 443 444 ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh)); 445 446 mutex_enter(&pl->lacp_timer_lock); 447 if (pl->periodic_timer.id == 0) { 448 pl->periodic_timer.id = timeout(periodic_timer_pop, portp, 449 drv_usectohz(1000000 * portp->lp_lacp.periodic_timer.val)); 450 } 451 mutex_exit(&pl->lacp_timer_lock); 452 } 453 454 static void 455 stop_periodic_timer(aggr_port_t *portp) 456 { 457 aggr_lacp_port_t *pl = &portp->lp_lacp; 458 timeout_id_t id; 459 460 ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh)); 461 462 mutex_enter(&pl->lacp_timer_lock); 463 if ((id = pl->periodic_timer.id) != 0) { 464 pl->lacp_timer_bits &= ~LACP_PERIODIC_TIMEOUT; 465 pl->periodic_timer.id = 0; 466 } 467 mutex_exit(&pl->lacp_timer_lock); 468 469 if (id != 0) 470 (void) untimeout(id); 471 } 472 473 /* 474 * When the timer pops, we arrive here to 475 * clear out LACPDU count as well as transmit an 476 * LACPDU. We then set the periodic state and let 477 * the periodic state machine restart the timer. 478 */ 479 static void 480 periodic_timer_pop(void *data) 481 { 482 aggr_port_t *portp = data; 483 aggr_lacp_port_t *pl = &portp->lp_lacp; 484 485 mutex_enter(&pl->lacp_timer_lock); 486 pl->lacp_timer_bits |= LACP_PERIODIC_TIMEOUT; 487 cv_broadcast(&pl->lacp_timer_cv); 488 mutex_exit(&pl->lacp_timer_lock); 489 } 490 491 /* 492 * When the timer pops, we arrive here to 493 * clear out LACPDU count as well as transmit an 494 * LACPDU. We then set the periodic state and let 495 * the periodic state machine restart the timer. 496 */ 497 static void 498 periodic_timer_pop_handler(aggr_port_t *portp) 499 { 500 ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh)); 501 502 portp->lp_lacp_stats.LACPDUsTx = 0; 503 504 /* current timestamp */ 505 portp->lp_lacp.time = gethrtime(); 506 portp->lp_lacp.NTT = B_TRUE; 507 lacp_xmit_sm(portp); 508 509 /* 510 * Set Periodic State machine state based on the 511 * value of the Partner Operation Port State timeout 512 * bit. 513 */ 514 if (portp->lp_lacp.PartnerOperPortState.bit.timeout) { 515 portp->lp_lacp.periodic_timer.val = FAST_PERIODIC_TIME; 516 portp->lp_lacp.sm.periodic_state = LACP_FAST_PERIODIC; 517 } else { 518 portp->lp_lacp.periodic_timer.val = SLOW_PERIODIC_TIME; 519 portp->lp_lacp.sm.periodic_state = LACP_SLOW_PERIODIC; 520 } 521 522 lacp_periodic_sm(portp); 523 } 524 525 /* 526 * Invoked from: 527 * - startup upon aggregation 528 * - when the periodic timer pops 529 * - when the periodic timer value is changed 530 * - when the port is attached or detached 531 * - when LACP mode is changed. 532 */ 533 static void 534 lacp_periodic_sm(aggr_port_t *portp) 535 { 536 lacp_periodic_state_t oldstate = portp->lp_lacp.sm.periodic_state; 537 aggr_lacp_port_t *pl = &portp->lp_lacp; 538 539 ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh)); 540 541 /* LACP_OFF state not in specification so check here. */ 542 if (!pl->sm.lacp_on) { 543 /* Stop timer whether it is running or not */ 544 stop_periodic_timer(portp); 545 pl->sm.periodic_state = LACP_NO_PERIODIC; 546 pl->NTT = B_FALSE; 547 AGGR_LACP_DBG(("lacp_periodic_sm(%d):NO LACP " 548 "%s--->%s\n", portp->lp_linkid, 549 lacp_periodic_str[oldstate], 550 lacp_periodic_str[pl->sm.periodic_state])); 551 return; 552 } 553 554 if (pl->sm.begin || !pl->sm.lacp_enabled || 555 !pl->sm.port_enabled || 556 !pl->ActorOperPortState.bit.activity && 557 !pl->PartnerOperPortState.bit.activity) { 558 559 /* Stop timer whether it is running or not */ 560 stop_periodic_timer(portp); 561 pl->sm.periodic_state = LACP_NO_PERIODIC; 562 pl->NTT = B_FALSE; 563 AGGR_LACP_DBG(("lacp_periodic_sm(%d):STOP %s--->%s\n", 564 portp->lp_linkid, lacp_periodic_str[oldstate], 565 lacp_periodic_str[pl->sm.periodic_state])); 566 return; 567 } 568 569 /* 570 * Startup with FAST_PERIODIC_TIME if no previous LACPDU 571 * has been received. Then after we timeout, then it is 572 * possible to go to SLOW_PERIODIC_TIME. 573 */ 574 if (pl->sm.periodic_state == LACP_NO_PERIODIC) { 575 pl->periodic_timer.val = FAST_PERIODIC_TIME; 576 pl->sm.periodic_state = LACP_FAST_PERIODIC; 577 } else if ((pl->sm.periodic_state == LACP_SLOW_PERIODIC) && 578 pl->PartnerOperPortState.bit.timeout) { 579 /* 580 * If we receive a bit indicating we are going to 581 * fast periodic from slow periodic, stop the timer 582 * and let the periodic_timer_pop routine deal 583 * with reseting the periodic state and transmitting 584 * a LACPDU. 585 */ 586 stop_periodic_timer(portp); 587 periodic_timer_pop_handler(portp); 588 } 589 590 /* Rearm timer with value provided by partner */ 591 start_periodic_timer(portp); 592 } 593 594 /* 595 * This routine transmits an LACPDU if lacp_enabled 596 * is TRUE and if NTT is set. 597 */ 598 static void 599 lacp_xmit_sm(aggr_port_t *portp) 600 { 601 aggr_lacp_port_t *pl = &portp->lp_lacp; 602 size_t len; 603 mblk_t *mp; 604 hrtime_t now, elapsed; 605 606 ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh)); 607 608 /* LACP_OFF state not in specification so check here. */ 609 if (!pl->sm.lacp_on || !pl->NTT || !portp->lp_started) 610 return; 611 612 /* 613 * Do nothing if LACP has been turned off or if the 614 * periodic state machine is not enabled. 615 */ 616 if ((pl->sm.periodic_state == LACP_NO_PERIODIC) || 617 !pl->sm.lacp_enabled || pl->sm.begin) { 618 pl->NTT = B_FALSE; 619 return; 620 } 621 622 /* 623 * If we have sent 5 Slow packets in the last second, avoid 624 * sending any more here. No more than three LACPDUs may be transmitted 625 * in any Fast_Periodic_Time interval. 626 */ 627 if (portp->lp_lacp_stats.LACPDUsTx >= 3) { 628 /* 629 * Grab the current time value and see if 630 * more than 1 second has passed. If so, 631 * reset the timestamp and clear the count. 632 */ 633 now = gethrtime(); 634 elapsed = now - pl->time; 635 if (elapsed > NSECS_PER_SEC) { 636 portp->lp_lacp_stats.LACPDUsTx = 0; 637 pl->time = now; 638 } else { 639 return; 640 } 641 } 642 643 len = sizeof (lacp_t) + sizeof (struct ether_header); 644 mp = allocb(len, BPRI_MED); 645 if (mp == NULL) 646 return; 647 648 mp->b_wptr = mp->b_rptr + len; 649 bzero(mp->b_rptr, len); 650 651 fill_lacp_ether(portp, (struct ether_header *)mp->b_rptr); 652 fill_lacp_pdu(portp, 653 (lacp_t *)(mp->b_rptr + sizeof (struct ether_header))); 654 655 /* Send the packet over the first TX ring */ 656 mp = mac_hwring_send_priv(portp->lp_mch, portp->lp_tx_rings[0], mp); 657 if (mp != NULL) 658 freemsg(mp); 659 660 pl->NTT = B_FALSE; 661 portp->lp_lacp_stats.LACPDUsTx++; 662 } 663 664 /* 665 * Initialize the ethernet header of a LACP packet sent from the specified 666 * port. 667 */ 668 static void 669 fill_lacp_ether(aggr_port_t *port, struct ether_header *ether) 670 { 671 bcopy(port->lp_addr, (uint8_t *)&(ether->ether_shost), ETHERADDRL); 672 bcopy(&slow_multicast_addr, (uint8_t *)&(ether->ether_dhost), 673 ETHERADDRL); 674 ether->ether_type = htons(ETHERTYPE_SLOW); 675 } 676 677 static void 678 fill_lacp_pdu(aggr_port_t *portp, lacp_t *lacp) 679 { 680 aggr_lacp_port_t *pl = &portp->lp_lacp; 681 aggr_grp_t *aggrp = portp->lp_grp; 682 mac_perim_handle_t pmph; 683 684 ASSERT(MAC_PERIM_HELD(aggrp->lg_mh)); 685 mac_perim_enter_by_mh(portp->lp_mh, &pmph); 686 687 lacp->subtype = LACP_SUBTYPE; 688 lacp->version = LACP_VERSION; 689 690 /* 691 * Actor Information 692 */ 693 lacp->actor_info.tlv_type = ACTOR_TLV; 694 lacp->actor_info.information_len = sizeof (link_info_t); 695 lacp->actor_info.system_priority = 696 htons(aggrp->aggr.ActorSystemPriority); 697 bcopy(aggrp->lg_addr, (uchar_t *)&lacp->actor_info.system_id, 698 ETHERADDRL); 699 lacp->actor_info.key = htons(pl->ActorOperPortKey); 700 lacp->actor_info.port_priority = htons(pl->ActorPortPriority); 701 lacp->actor_info.port = htons(pl->ActorPortNumber); 702 lacp->actor_info.state.state = pl->ActorOperPortState.state; 703 704 /* 705 * Partner Information 706 */ 707 lacp->partner_info.tlv_type = PARTNER_TLV; 708 lacp->partner_info.information_len = sizeof (link_info_t); 709 lacp->partner_info.system_priority = 710 htons(pl->PartnerOperSysPriority); 711 lacp->partner_info.system_id = pl->PartnerOperSystem; 712 lacp->partner_info.key = htons(pl->PartnerOperKey); 713 lacp->partner_info.port_priority = 714 htons(pl->PartnerOperPortPriority); 715 lacp->partner_info.port = htons(pl->PartnerOperPortNum); 716 lacp->partner_info.state.state = pl->PartnerOperPortState.state; 717 718 /* Collector Information */ 719 lacp->tlv_collector = COLLECTOR_TLV; 720 lacp->collector_len = 0x10; 721 lacp->collector_max_delay = htons(aggrp->aggr.CollectorMaxDelay); 722 723 /* Termination Information */ 724 lacp->tlv_terminator = TERMINATOR_TLV; 725 lacp->terminator_len = 0x0; 726 727 mac_perim_exit(pmph); 728 } 729 730 /* 731 * lacp_mux_sm - LACP mux state machine 732 * This state machine is invoked from: 733 * - startup upon aggregation 734 * - from the Selection logic 735 * - when the wait_while_timer pops 736 * - when the aggregation MAC address is changed 737 * - when receiving DL_NOTE_LINK_UP/DOWN 738 * - when receiving DL_NOTE_AGGR_AVAIL/UNAVAIL 739 * - when LACP mode is changed. 740 * - when a DL_NOTE_SPEED is received 741 */ 742 static void 743 lacp_mux_sm(aggr_port_t *portp) 744 { 745 aggr_grp_t *aggrp = portp->lp_grp; 746 boolean_t NTT_updated = B_FALSE; 747 aggr_lacp_port_t *pl = &portp->lp_lacp; 748 lacp_mux_state_t oldstate = pl->sm.mux_state; 749 750 ASSERT(MAC_PERIM_HELD(aggrp->lg_mh)); 751 752 /* LACP_OFF state not in specification so check here. */ 753 if (!pl->sm.lacp_on) { 754 pl->sm.mux_state = LACP_DETACHED; 755 pl->ActorOperPortState.bit.sync = B_FALSE; 756 757 if (pl->ActorOperPortState.bit.collecting || 758 pl->ActorOperPortState.bit.distributing) { 759 AGGR_LACP_DBG(("trunk link: (%d): " 760 "Collector_Distributor Disabled.\n", 761 portp->lp_linkid)); 762 } 763 764 pl->ActorOperPortState.bit.collecting = 765 pl->ActorOperPortState.bit.distributing = B_FALSE; 766 return; 767 } 768 769 if (pl->sm.begin || !pl->sm.lacp_enabled) 770 pl->sm.mux_state = LACP_DETACHED; 771 772 again: 773 /* determine next state, or return if state unchanged */ 774 switch (pl->sm.mux_state) { 775 case LACP_DETACHED: 776 if (pl->sm.begin) { 777 break; 778 } 779 780 if ((pl->sm.selected == AGGR_SELECTED) || 781 (pl->sm.selected == AGGR_STANDBY)) { 782 pl->sm.mux_state = LACP_WAITING; 783 break; 784 } 785 return; 786 787 case LACP_WAITING: 788 if (pl->sm.selected == AGGR_UNSELECTED) { 789 pl->sm.mux_state = LACP_DETACHED; 790 break; 791 } 792 793 if ((pl->sm.selected == AGGR_SELECTED) && aggrp->aggr.ready) { 794 pl->sm.mux_state = LACP_ATTACHED; 795 break; 796 } 797 return; 798 799 case LACP_ATTACHED: 800 if ((pl->sm.selected == AGGR_UNSELECTED) || 801 (pl->sm.selected == AGGR_STANDBY)) { 802 pl->sm.mux_state = LACP_DETACHED; 803 break; 804 } 805 806 if ((pl->sm.selected == AGGR_SELECTED) && 807 pl->PartnerOperPortState.bit.sync) { 808 pl->sm.mux_state = LACP_COLLECTING_DISTRIBUTING; 809 break; 810 } 811 return; 812 813 case LACP_COLLECTING_DISTRIBUTING: 814 if ((pl->sm.selected == AGGR_UNSELECTED) || 815 (pl->sm.selected == AGGR_STANDBY) || 816 !pl->PartnerOperPortState.bit.sync) { 817 pl->sm.mux_state = LACP_ATTACHED; 818 break; 819 } 820 return; 821 } 822 823 AGGR_LACP_DBG(("lacp_mux_sm(%d):%s--->%s\n", 824 portp->lp_linkid, lacp_mux_str[oldstate], 825 lacp_mux_str[pl->sm.mux_state])); 826 827 /* perform actions on entering a new state */ 828 switch (pl->sm.mux_state) { 829 case LACP_DETACHED: 830 if (pl->ActorOperPortState.bit.collecting || 831 pl->ActorOperPortState.bit.distributing) { 832 AGGR_LACP_DBG(("trunk link: (%d): " 833 "Collector_Distributor Disabled.\n", 834 portp->lp_linkid)); 835 } 836 837 pl->ActorOperPortState.bit.sync = 838 pl->ActorOperPortState.bit.collecting = B_FALSE; 839 840 /* Turn OFF Collector_Distributor */ 841 aggr_set_coll_dist(portp, B_FALSE); 842 843 pl->ActorOperPortState.bit.distributing = B_FALSE; 844 NTT_updated = B_TRUE; 845 break; 846 847 case LACP_WAITING: 848 start_wait_while_timer(portp); 849 break; 850 851 case LACP_ATTACHED: 852 if (pl->ActorOperPortState.bit.collecting || 853 pl->ActorOperPortState.bit.distributing) { 854 AGGR_LACP_DBG(("trunk link: (%d): " 855 "Collector_Distributor Disabled.\n", 856 portp->lp_linkid)); 857 } 858 859 pl->ActorOperPortState.bit.sync = B_TRUE; 860 pl->ActorOperPortState.bit.collecting = B_FALSE; 861 862 /* Turn OFF Collector_Distributor */ 863 aggr_set_coll_dist(portp, B_FALSE); 864 865 pl->ActorOperPortState.bit.distributing = B_FALSE; 866 NTT_updated = B_TRUE; 867 if (pl->PartnerOperPortState.bit.sync) { 868 /* 869 * We had already received an updated sync from 870 * the partner. Attempt to transition to 871 * collecting/distributing now. 872 */ 873 goto again; 874 } 875 break; 876 877 case LACP_COLLECTING_DISTRIBUTING: 878 if (!pl->ActorOperPortState.bit.collecting && 879 !pl->ActorOperPortState.bit.distributing) { 880 AGGR_LACP_DBG(("trunk link: (%d): " 881 "Collector_Distributor Enabled.\n", 882 portp->lp_linkid)); 883 } 884 pl->ActorOperPortState.bit.distributing = B_TRUE; 885 886 /* Turn Collector_Distributor back ON */ 887 aggr_set_coll_dist(portp, B_TRUE); 888 889 pl->ActorOperPortState.bit.collecting = B_TRUE; 890 NTT_updated = B_TRUE; 891 break; 892 } 893 894 /* 895 * If we updated the state of the NTT variable, then 896 * initiate a LACPDU transmission. 897 */ 898 if (NTT_updated) { 899 pl->NTT = B_TRUE; 900 lacp_xmit_sm(portp); 901 } 902 } /* lacp_mux_sm */ 903 904 905 static int 906 receive_marker_pdu(aggr_port_t *portp, mblk_t *mp) 907 { 908 marker_pdu_t *markerp = (marker_pdu_t *)mp->b_rptr; 909 910 ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh)); 911 912 AGGR_LACP_DBG(("trunk link: (%d): MARKER PDU received:\n", 913 portp->lp_linkid)); 914 915 /* LACP_OFF state not in specification so check here. */ 916 if (!portp->lp_lacp.sm.lacp_on) 917 return (-1); 918 919 if (MBLKL(mp) < sizeof (marker_pdu_t)) 920 return (-1); 921 922 if (markerp->version != MARKER_VERSION) { 923 AGGR_LACP_DBG(("trunk link (%d): Malformed MARKER PDU: " 924 "version = %d does not match s/w version %d\n", 925 portp->lp_linkid, markerp->version, MARKER_VERSION)); 926 return (-1); 927 } 928 929 if (markerp->tlv_marker == MARKER_RESPONSE_TLV) { 930 /* We do not yet send out MARKER info PDUs */ 931 AGGR_LACP_DBG(("trunk link (%d): MARKER RESPONSE PDU: " 932 " MARKER TLV = %d - We don't send out info type!\n", 933 portp->lp_linkid, markerp->tlv_marker)); 934 return (-1); 935 } 936 937 if (markerp->tlv_marker != MARKER_INFO_TLV) { 938 AGGR_LACP_DBG(("trunk link (%d): Malformed MARKER PDU: " 939 " MARKER TLV = %d \n", portp->lp_linkid, 940 markerp->tlv_marker)); 941 return (-1); 942 } 943 944 if (markerp->marker_len != MARKER_INFO_RESPONSE_LENGTH) { 945 AGGR_LACP_DBG(("trunk link (%d): Malformed MARKER PDU: " 946 " MARKER length = %d \n", portp->lp_linkid, 947 markerp->marker_len)); 948 return (-1); 949 } 950 951 if (markerp->requestor_port != portp->lp_lacp.PartnerOperPortNum) { 952 AGGR_LACP_DBG(("trunk link (%d): MARKER PDU: " 953 " MARKER Port %d not equal to Partner port %d\n", 954 portp->lp_linkid, markerp->requestor_port, 955 portp->lp_lacp.PartnerOperPortNum)); 956 return (-1); 957 } 958 959 if (ether_cmp(&markerp->system_id, 960 &portp->lp_lacp.PartnerOperSystem) != 0) { 961 AGGR_LACP_DBG(("trunk link (%d): MARKER PDU: " 962 " MARKER MAC not equal to Partner MAC\n", 963 portp->lp_linkid)); 964 return (-1); 965 } 966 967 /* 968 * Turn into Marker Response PDU 969 * and return mblk to sending system 970 */ 971 markerp->tlv_marker = MARKER_RESPONSE_TLV; 972 973 /* reuse the space that was used by received ethernet header */ 974 ASSERT(MBLKHEAD(mp) >= sizeof (struct ether_header)); 975 mp->b_rptr -= sizeof (struct ether_header); 976 fill_lacp_ether(portp, (struct ether_header *)mp->b_rptr); 977 return (0); 978 } 979 980 /* 981 * Update the LACP mode (off, active, or passive) of the specified group. 982 */ 983 void 984 aggr_lacp_update_mode(aggr_grp_t *grp, aggr_lacp_mode_t mode) 985 { 986 aggr_lacp_mode_t old_mode = grp->lg_lacp_mode; 987 aggr_port_t *port; 988 989 ASSERT(MAC_PERIM_HELD(grp->lg_mh)); 990 ASSERT(!grp->lg_closing); 991 992 if (mode == old_mode) 993 return; 994 995 grp->lg_lacp_mode = mode; 996 997 for (port = grp->lg_ports; port != NULL; port = port->lp_next) { 998 port->lp_lacp.ActorAdminPortState.bit.activity = 999 port->lp_lacp.ActorOperPortState.bit.activity = 1000 (mode == AGGR_LACP_ACTIVE); 1001 1002 if (old_mode == AGGR_LACP_OFF) { 1003 /* OFF -> {PASSIVE,ACTIVE} */ 1004 /* turn OFF Collector_Distributor */ 1005 aggr_set_coll_dist(port, B_FALSE); 1006 lacp_on(port); 1007 } else if (mode == AGGR_LACP_OFF) { 1008 /* {PASSIVE,ACTIVE} -> OFF */ 1009 lacp_off(port); 1010 /* Turn ON Collector_Distributor */ 1011 aggr_set_coll_dist(port, B_TRUE); 1012 } else { 1013 /* PASSIVE->ACTIVE or ACTIVE->PASSIVE */ 1014 port->lp_lacp.sm.begin = B_TRUE; 1015 lacp_mux_sm(port); 1016 lacp_periodic_sm(port); 1017 1018 /* kick off state machines */ 1019 lacp_receive_sm(port, NULL); 1020 lacp_mux_sm(port); 1021 } 1022 } 1023 } 1024 1025 1026 /* 1027 * Update the LACP timer (short or long) of the specified group. 1028 */ 1029 void 1030 aggr_lacp_update_timer(aggr_grp_t *grp, aggr_lacp_timer_t timer) 1031 { 1032 aggr_port_t *port; 1033 1034 ASSERT(MAC_PERIM_HELD(grp->lg_mh)); 1035 1036 if (timer == grp->aggr.PeriodicTimer) 1037 return; 1038 1039 grp->aggr.PeriodicTimer = timer; 1040 1041 for (port = grp->lg_ports; port != NULL; port = port->lp_next) { 1042 port->lp_lacp.ActorAdminPortState.bit.timeout = 1043 port->lp_lacp.ActorOperPortState.bit.timeout = 1044 (timer == AGGR_LACP_TIMER_SHORT); 1045 } 1046 } 1047 1048 void 1049 aggr_port_lacp_set_mode(aggr_grp_t *grp, aggr_port_t *port) 1050 { 1051 aggr_lacp_mode_t mode; 1052 aggr_lacp_timer_t timer; 1053 1054 ASSERT(MAC_PERIM_HELD(grp->lg_mh)); 1055 1056 mode = grp->lg_lacp_mode; 1057 timer = grp->aggr.PeriodicTimer; 1058 1059 port->lp_lacp.ActorAdminPortState.bit.activity = 1060 port->lp_lacp.ActorOperPortState.bit.activity = 1061 (mode == AGGR_LACP_ACTIVE); 1062 1063 port->lp_lacp.ActorAdminPortState.bit.timeout = 1064 port->lp_lacp.ActorOperPortState.bit.timeout = 1065 (timer == AGGR_LACP_TIMER_SHORT); 1066 1067 if (mode == AGGR_LACP_OFF) { 1068 /* Turn ON Collector_Distributor */ 1069 aggr_set_coll_dist(port, B_TRUE); 1070 } else { /* LACP_ACTIVE/PASSIVE */ 1071 lacp_on(port); 1072 } 1073 } 1074 1075 /* 1076 * Sets the initial LACP mode (off, active, passive) and LACP timer 1077 * (short, long) of the specified group. 1078 */ 1079 void 1080 aggr_lacp_set_mode(aggr_grp_t *grp, aggr_lacp_mode_t mode, 1081 aggr_lacp_timer_t timer) 1082 { 1083 aggr_port_t *port; 1084 1085 ASSERT(MAC_PERIM_HELD(grp->lg_mh)); 1086 1087 grp->lg_lacp_mode = mode; 1088 grp->aggr.PeriodicTimer = timer; 1089 1090 for (port = grp->lg_ports; port != NULL; port = port->lp_next) 1091 aggr_port_lacp_set_mode(grp, port); 1092 } 1093 1094 /* 1095 * Verify that the Partner MAC and Key recorded by the specified 1096 * port are not found in other ports that are not part of our 1097 * aggregation. Returns B_TRUE if such a port is found, B_FALSE 1098 * otherwise. 1099 */ 1100 static boolean_t 1101 lacp_misconfig_check(aggr_port_t *portp) 1102 { 1103 aggr_grp_t *grp = portp->lp_grp; 1104 lacp_sel_ports_t *cport; 1105 1106 mutex_enter(&lacp_sel_lock); 1107 1108 for (cport = sel_ports; cport != NULL; cport = cport->sp_next) { 1109 1110 /* skip entries of the group of the port being checked */ 1111 if (cport->sp_grp_linkid == grp->lg_linkid) 1112 continue; 1113 1114 if ((ether_cmp(&cport->sp_partner_system, 1115 &grp->aggr.PartnerSystem) == 0) && 1116 (cport->sp_partner_key == grp->aggr.PartnerOperAggrKey)) { 1117 char mac_str[ETHERADDRL*3]; 1118 struct ether_addr *mac = &cport->sp_partner_system; 1119 1120 /* 1121 * The Partner port information is already in use 1122 * by ports in another aggregation so disable this 1123 * port. 1124 */ 1125 1126 (void) snprintf(mac_str, sizeof (mac_str), 1127 "%x:%x:%x:%x:%x:%x", 1128 mac->ether_addr_octet[0], mac->ether_addr_octet[1], 1129 mac->ether_addr_octet[2], mac->ether_addr_octet[3], 1130 mac->ether_addr_octet[4], mac->ether_addr_octet[5]); 1131 1132 portp->lp_lacp.sm.selected = AGGR_UNSELECTED; 1133 1134 cmn_err(CE_NOTE, "aggr %d port %d: Port Partner " 1135 "MAC %s and key %d in use on aggregation %d " 1136 "port %d\n", grp->lg_linkid, portp->lp_linkid, 1137 mac_str, portp->lp_lacp.PartnerOperKey, 1138 cport->sp_grp_linkid, cport->sp_linkid); 1139 break; 1140 } 1141 } 1142 1143 mutex_exit(&lacp_sel_lock); 1144 return (cport != NULL); 1145 } 1146 1147 /* 1148 * Remove the specified port from the list of selected ports. 1149 */ 1150 static void 1151 lacp_sel_ports_del(aggr_port_t *portp) 1152 { 1153 lacp_sel_ports_t *cport, **prev = NULL; 1154 1155 mutex_enter(&lacp_sel_lock); 1156 1157 prev = &sel_ports; 1158 for (cport = sel_ports; cport != NULL; prev = &cport->sp_next, 1159 cport = cport->sp_next) { 1160 if (portp->lp_linkid == cport->sp_linkid) 1161 break; 1162 } 1163 1164 if (cport == NULL) { 1165 mutex_exit(&lacp_sel_lock); 1166 return; 1167 } 1168 1169 *prev = cport->sp_next; 1170 kmem_free(cport, sizeof (*cport)); 1171 1172 mutex_exit(&lacp_sel_lock); 1173 } 1174 1175 /* 1176 * Add the specified port to the list of selected ports. Returns B_FALSE 1177 * if the operation could not be performed due to an memory allocation 1178 * error. 1179 */ 1180 static boolean_t 1181 lacp_sel_ports_add(aggr_port_t *portp) 1182 { 1183 lacp_sel_ports_t *new_port; 1184 lacp_sel_ports_t *cport, **last; 1185 1186 mutex_enter(&lacp_sel_lock); 1187 1188 /* check if port is already in the list */ 1189 last = &sel_ports; 1190 for (cport = sel_ports; cport != NULL; 1191 last = &cport->sp_next, cport = cport->sp_next) { 1192 if (portp->lp_linkid == cport->sp_linkid) { 1193 ASSERT(cport->sp_partner_key == 1194 portp->lp_lacp.PartnerOperKey); 1195 ASSERT(ether_cmp(&cport->sp_partner_system, 1196 &portp->lp_lacp.PartnerOperSystem) == 0); 1197 1198 mutex_exit(&lacp_sel_lock); 1199 return (B_TRUE); 1200 } 1201 } 1202 1203 /* create and initialize new entry */ 1204 new_port = kmem_zalloc(sizeof (lacp_sel_ports_t), KM_NOSLEEP); 1205 if (new_port == NULL) { 1206 mutex_exit(&lacp_sel_lock); 1207 return (B_FALSE); 1208 } 1209 1210 new_port->sp_grp_linkid = portp->lp_grp->lg_linkid; 1211 bcopy(&portp->lp_lacp.PartnerOperSystem, 1212 &new_port->sp_partner_system, sizeof (new_port->sp_partner_system)); 1213 new_port->sp_partner_key = portp->lp_lacp.PartnerOperKey; 1214 new_port->sp_linkid = portp->lp_linkid; 1215 1216 *last = new_port; 1217 1218 mutex_exit(&lacp_sel_lock); 1219 return (B_TRUE); 1220 } 1221 1222 /* 1223 * lacp_selection_logic - LACP selection logic 1224 * Sets the selected variable on a per port basis 1225 * and sets Ready when all waiting ports are ready 1226 * to go online. 1227 * 1228 * parameters: 1229 * - portp - instance this applies to. 1230 * 1231 * invoked: 1232 * - when initialization is needed 1233 * - when UNSELECTED is set from the lacp_receive_sm() in LACP_CURRENT state 1234 * - When the lacp_receive_sm goes to the LACP_DEFAULTED state 1235 * - every time the wait_while_timer pops 1236 * - everytime we turn LACP on/off 1237 */ 1238 static void 1239 lacp_selection_logic(aggr_port_t *portp) 1240 { 1241 aggr_port_t *tpp; 1242 aggr_grp_t *aggrp = portp->lp_grp; 1243 int ports_waiting; 1244 boolean_t reset_mac = B_FALSE; 1245 aggr_lacp_port_t *pl = &portp->lp_lacp; 1246 1247 ASSERT(MAC_PERIM_HELD(aggrp->lg_mh)); 1248 1249 /* LACP_OFF state not in specification so check here. */ 1250 if (!pl->sm.lacp_on) { 1251 lacp_port_unselect(portp); 1252 aggrp->aggr.ready = B_FALSE; 1253 lacp_mux_sm(portp); 1254 return; 1255 } 1256 1257 if (pl->sm.begin || !pl->sm.lacp_enabled || 1258 (portp->lp_state != AGGR_PORT_STATE_ATTACHED)) { 1259 1260 AGGR_LACP_DBG(("lacp_selection_logic:(%d): " 1261 "selected %d-->%d (begin=%d, lacp_enabled = %d, " 1262 "lp_state=%d)\n", portp->lp_linkid, pl->sm.selected, 1263 AGGR_UNSELECTED, pl->sm.begin, pl->sm.lacp_enabled, 1264 portp->lp_state)); 1265 1266 lacp_port_unselect(portp); 1267 aggrp->aggr.ready = B_FALSE; 1268 lacp_mux_sm(portp); 1269 return; 1270 } 1271 1272 /* 1273 * If LACP is not enabled then selected is never set. 1274 */ 1275 if (!pl->sm.lacp_enabled) { 1276 AGGR_LACP_DBG(("lacp_selection_logic:(%d): selected %d-->%d\n", 1277 portp->lp_linkid, pl->sm.selected, AGGR_UNSELECTED)); 1278 1279 lacp_port_unselect(portp); 1280 lacp_mux_sm(portp); 1281 return; 1282 } 1283 1284 /* 1285 * Check if the Partner MAC or Key are zero. If so, we have 1286 * not received any LACP info or it has expired and the 1287 * receive machine is in the LACP_DEFAULTED state. 1288 */ 1289 if (ether_cmp(&pl->PartnerOperSystem, ðerzeroaddr) == 0 || 1290 (pl->PartnerOperKey == 0)) { 1291 1292 for (tpp = aggrp->lg_ports; tpp; tpp = tpp->lp_next) { 1293 if (ether_cmp(&tpp->lp_lacp.PartnerOperSystem, 1294 ðerzeroaddr) != 0 && 1295 (tpp->lp_lacp.PartnerOperKey != 0)) 1296 break; 1297 } 1298 1299 /* 1300 * If all ports have no key or aggregation address, 1301 * then clear the negotiated Partner MAC and key. 1302 */ 1303 if (tpp == NULL) { 1304 /* Clear the aggregation Partner MAC and key */ 1305 aggrp->aggr.PartnerSystem = etherzeroaddr; 1306 aggrp->aggr.PartnerOperAggrKey = 0; 1307 } 1308 1309 return; 1310 } 1311 1312 /* 1313 * Insure that at least one port in the aggregation 1314 * matches the Partner aggregation MAC and key. If not, 1315 * then clear the aggregation MAC and key. Later we will 1316 * set the Partner aggregation MAC and key to that of the 1317 * current port's Partner MAC and key. 1318 */ 1319 if (ether_cmp(&pl->PartnerOperSystem, 1320 &aggrp->aggr.PartnerSystem) != 0 || 1321 (pl->PartnerOperKey != aggrp->aggr.PartnerOperAggrKey)) { 1322 1323 for (tpp = aggrp->lg_ports; tpp; tpp = tpp->lp_next) { 1324 if (ether_cmp(&tpp->lp_lacp.PartnerOperSystem, 1325 &aggrp->aggr.PartnerSystem) == 0 && 1326 (tpp->lp_lacp.PartnerOperKey == 1327 aggrp->aggr.PartnerOperAggrKey)) { 1328 /* Set aggregation Partner MAC and key */ 1329 aggrp->aggr.PartnerSystem = 1330 pl->PartnerOperSystem; 1331 aggrp->aggr.PartnerOperAggrKey = 1332 pl->PartnerOperKey; 1333 break; 1334 } 1335 } 1336 1337 if (tpp == NULL) { 1338 /* Clear the aggregation Partner MAC and key */ 1339 aggrp->aggr.PartnerSystem = etherzeroaddr; 1340 aggrp->aggr.PartnerOperAggrKey = 0; 1341 reset_mac = B_TRUE; 1342 } 1343 } 1344 1345 /* 1346 * If our Actor MAC is found in the Partner MAC 1347 * on this port then we have a loopback misconfiguration. 1348 */ 1349 if (ether_cmp(&pl->PartnerOperSystem, 1350 (struct ether_addr *)&aggrp->lg_addr) == 0) { 1351 cmn_err(CE_NOTE, "trunk link: (%d): Loopback condition.\n", 1352 portp->lp_linkid); 1353 1354 lacp_port_unselect(portp); 1355 lacp_mux_sm(portp); 1356 return; 1357 } 1358 1359 /* 1360 * If our Partner MAC and Key are found on any other 1361 * ports that are not in our aggregation, we have 1362 * a misconfiguration. 1363 */ 1364 if (lacp_misconfig_check(portp)) { 1365 lacp_mux_sm(portp); 1366 return; 1367 } 1368 1369 /* 1370 * If the Aggregation Partner MAC and Key have not been 1371 * set, then this is either the first port or the aggregation 1372 * MAC and key have been reset. In either case we must set 1373 * the values of the Partner MAC and key. 1374 */ 1375 if (ether_cmp(&aggrp->aggr.PartnerSystem, ðerzeroaddr) == 0 && 1376 (aggrp->aggr.PartnerOperAggrKey == 0)) { 1377 /* Set aggregation Partner MAC and key */ 1378 aggrp->aggr.PartnerSystem = pl->PartnerOperSystem; 1379 aggrp->aggr.PartnerOperAggrKey = pl->PartnerOperKey; 1380 1381 /* 1382 * If we reset Partner aggregation MAC, then restart 1383 * selection_logic on ports that match new MAC address. 1384 */ 1385 if (reset_mac) { 1386 for (tpp = aggrp->lg_ports; tpp; tpp = 1387 tpp->lp_next) { 1388 if (tpp == portp) 1389 continue; 1390 if (ether_cmp(&tpp->lp_lacp.PartnerOperSystem, 1391 &aggrp->aggr.PartnerSystem) == 0 && 1392 (tpp->lp_lacp.PartnerOperKey == 1393 aggrp->aggr.PartnerOperAggrKey)) 1394 lacp_selection_logic(tpp); 1395 } 1396 } 1397 } else if (ether_cmp(&pl->PartnerOperSystem, 1398 &aggrp->aggr.PartnerSystem) != 0 || 1399 (pl->PartnerOperKey != aggrp->aggr.PartnerOperAggrKey)) { 1400 /* 1401 * The Partner port information does not match 1402 * that of the other ports in the aggregation 1403 * so disable this port. 1404 */ 1405 lacp_port_unselect(portp); 1406 1407 cmn_err(CE_NOTE, "trunk link: (%d): Port Partner MAC " 1408 "or key (%d) incompatible with Aggregation Partner " 1409 "MAC or key (%d)\n", portp->lp_linkid, pl->PartnerOperKey, 1410 aggrp->aggr.PartnerOperAggrKey); 1411 1412 lacp_mux_sm(portp); 1413 return; 1414 } 1415 1416 /* If we get to here, automatically set selected */ 1417 if (pl->sm.selected != AGGR_SELECTED) { 1418 AGGR_LACP_DBG(("lacp_selection_logic:(%d): " 1419 "selected %d-->%d\n", portp->lp_linkid, 1420 pl->sm.selected, AGGR_SELECTED)); 1421 if (!lacp_port_select(portp)) 1422 return; 1423 lacp_mux_sm(portp); 1424 } 1425 1426 /* 1427 * From this point onward we have selected the port 1428 * and are simply checking if the Ready flag should 1429 * be set. 1430 */ 1431 1432 /* 1433 * If at least two ports are waiting to aggregate 1434 * and ready_n is set on all ports waiting to aggregate 1435 * then set READY for the aggregation. 1436 */ 1437 1438 ports_waiting = 0; 1439 1440 if (!aggrp->aggr.ready) { 1441 /* 1442 * If all ports in the aggregation have received compatible 1443 * partner information and they match up correctly with the 1444 * switch, there is no need to wait for all the 1445 * wait_while_timers to pop. 1446 */ 1447 for (tpp = aggrp->lg_ports; tpp; tpp = tpp->lp_next) { 1448 if (((tpp->lp_lacp.sm.mux_state == LACP_WAITING) || 1449 tpp->lp_lacp.sm.begin) && 1450 !tpp->lp_lacp.PartnerOperPortState.bit.sync) { 1451 /* Add up ports uninitialized or waiting */ 1452 ports_waiting++; 1453 if (!tpp->lp_lacp.sm.ready_n) { 1454 DTRACE_PROBE1(port___not__ready, 1455 aggr_port_t *, tpp); 1456 return; 1457 } 1458 } 1459 } 1460 } 1461 1462 if (aggrp->aggr.ready) { 1463 AGGR_LACP_DBG(("lacp_selection_logic:(%d): " 1464 "aggr.ready already set\n", portp->lp_linkid)); 1465 lacp_mux_sm(portp); 1466 } else { 1467 AGGR_LACP_DBG(("lacp_selection_logic:(%d): Ready %d-->%d\n", 1468 portp->lp_linkid, aggrp->aggr.ready, B_TRUE)); 1469 aggrp->aggr.ready = B_TRUE; 1470 1471 for (tpp = aggrp->lg_ports; tpp; tpp = tpp->lp_next) 1472 lacp_mux_sm(tpp); 1473 } 1474 1475 } 1476 1477 /* 1478 * wait_while_timer_pop - When the timer pops, we arrive here to 1479 * set ready_n and trigger the selection logic. 1480 */ 1481 static void 1482 wait_while_timer_pop(void *data) 1483 { 1484 aggr_port_t *portp = data; 1485 aggr_lacp_port_t *pl = &portp->lp_lacp; 1486 1487 mutex_enter(&pl->lacp_timer_lock); 1488 pl->lacp_timer_bits |= LACP_WAIT_WHILE_TIMEOUT; 1489 cv_broadcast(&pl->lacp_timer_cv); 1490 mutex_exit(&pl->lacp_timer_lock); 1491 } 1492 1493 /* 1494 * wait_while_timer_pop_handler - When the timer pops, we arrive here to 1495 * set ready_n and trigger the selection logic. 1496 */ 1497 static void 1498 wait_while_timer_pop_handler(aggr_port_t *portp) 1499 { 1500 ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh)); 1501 1502 AGGR_LACP_DBG(("trunk link:(%d): wait_while_timer pop \n", 1503 portp->lp_linkid)); 1504 portp->lp_lacp.sm.ready_n = B_TRUE; 1505 1506 lacp_selection_logic(portp); 1507 } 1508 1509 static void 1510 start_wait_while_timer(aggr_port_t *portp) 1511 { 1512 aggr_lacp_port_t *pl = &portp->lp_lacp; 1513 1514 ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh)); 1515 1516 mutex_enter(&pl->lacp_timer_lock); 1517 if (pl->wait_while_timer.id == 0) { 1518 pl->wait_while_timer.id = 1519 timeout(wait_while_timer_pop, portp, 1520 drv_usectohz(1000000 * 1521 portp->lp_lacp.wait_while_timer.val)); 1522 } 1523 mutex_exit(&pl->lacp_timer_lock); 1524 } 1525 1526 1527 static void 1528 stop_wait_while_timer(aggr_port_t *portp) 1529 { 1530 aggr_lacp_port_t *pl = &portp->lp_lacp; 1531 timeout_id_t id; 1532 1533 ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh)); 1534 1535 mutex_enter(&pl->lacp_timer_lock); 1536 if ((id = pl->wait_while_timer.id) != 0) { 1537 pl->lacp_timer_bits &= ~LACP_WAIT_WHILE_TIMEOUT; 1538 pl->wait_while_timer.id = 0; 1539 } 1540 mutex_exit(&pl->lacp_timer_lock); 1541 1542 if (id != 0) 1543 (void) untimeout(id); 1544 } 1545 1546 /* 1547 * Invoked when a port has been attached to a group. 1548 * Complete the processing that couldn't be finished from lacp_on() 1549 * because the port was not started. We know that the link is full 1550 * duplex and ON, otherwise it wouldn't be attached. 1551 */ 1552 void 1553 aggr_lacp_port_attached(aggr_port_t *portp) 1554 { 1555 aggr_grp_t *grp = portp->lp_grp; 1556 aggr_lacp_port_t *pl = &portp->lp_lacp; 1557 1558 ASSERT(MAC_PERIM_HELD(grp->lg_mh)); 1559 ASSERT(MAC_PERIM_HELD(portp->lp_mh)); 1560 ASSERT(portp->lp_state == AGGR_PORT_STATE_ATTACHED); 1561 1562 AGGR_LACP_DBG(("aggr_lacp_port_attached: port %d\n", 1563 portp->lp_linkid)); 1564 1565 portp->lp_lacp.sm.port_enabled = B_TRUE; /* link on */ 1566 1567 if (grp->lg_lacp_mode == AGGR_LACP_OFF) 1568 return; 1569 1570 pl->sm.lacp_enabled = B_TRUE; 1571 pl->ActorOperPortState.bit.aggregation = B_TRUE; 1572 pl->sm.begin = B_TRUE; 1573 1574 lacp_receive_sm(portp, NULL); 1575 lacp_mux_sm(portp); 1576 1577 /* Enable Multicast Slow Protocol address */ 1578 aggr_lacp_mcast_on(portp); 1579 1580 /* periodic_sm is started up from the receive machine */ 1581 lacp_selection_logic(portp); 1582 } 1583 1584 /* 1585 * Invoked when a port has been detached from a group. Turn off 1586 * LACP processing if it was enabled. 1587 */ 1588 void 1589 aggr_lacp_port_detached(aggr_port_t *portp) 1590 { 1591 aggr_grp_t *grp = portp->lp_grp; 1592 1593 ASSERT(MAC_PERIM_HELD(grp->lg_mh)); 1594 ASSERT(MAC_PERIM_HELD(portp->lp_mh)); 1595 1596 AGGR_LACP_DBG(("aggr_lacp_port_detached: port %d\n", 1597 portp->lp_linkid)); 1598 1599 portp->lp_lacp.sm.port_enabled = B_FALSE; 1600 1601 if (grp->lg_lacp_mode == AGGR_LACP_OFF) 1602 return; 1603 1604 portp->lp_lacp.sm.lacp_enabled = B_FALSE; 1605 lacp_selection_logic(portp); 1606 lacp_mux_sm(portp); 1607 lacp_periodic_sm(portp); 1608 1609 /* 1610 * Disable Slow Protocol Timers. 1611 */ 1612 stop_periodic_timer(portp); 1613 stop_current_while_timer(portp); 1614 stop_wait_while_timer(portp); 1615 1616 /* Disable Multicast Slow Protocol address */ 1617 aggr_lacp_mcast_off(portp); 1618 aggr_set_coll_dist(portp, B_FALSE); 1619 } 1620 1621 /* 1622 * Enable Slow Protocol LACP and Marker PDUs. 1623 */ 1624 static void 1625 lacp_on(aggr_port_t *portp) 1626 { 1627 aggr_lacp_port_t *pl = &portp->lp_lacp; 1628 mac_perim_handle_t mph; 1629 1630 ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh)); 1631 1632 mac_perim_enter_by_mh(portp->lp_mh, &mph); 1633 1634 /* 1635 * Reset the state machines and Partner operational 1636 * information. Careful to not reset things like 1637 * our link state. 1638 */ 1639 lacp_reset_port(portp); 1640 pl->sm.lacp_on = B_TRUE; 1641 1642 AGGR_LACP_DBG(("lacp_on:(%d): \n", portp->lp_linkid)); 1643 1644 if (portp->lp_state == AGGR_PORT_STATE_ATTACHED) { 1645 pl->sm.port_enabled = B_TRUE; 1646 pl->sm.lacp_enabled = B_TRUE; 1647 pl->ActorOperPortState.bit.aggregation = B_TRUE; 1648 } 1649 1650 lacp_receive_sm(portp, NULL); 1651 lacp_mux_sm(portp); 1652 1653 if (portp->lp_state == AGGR_PORT_STATE_ATTACHED) { 1654 /* Enable Multicast Slow Protocol address */ 1655 aggr_lacp_mcast_on(portp); 1656 1657 /* periodic_sm is started up from the receive machine */ 1658 lacp_selection_logic(portp); 1659 } 1660 done: 1661 mac_perim_exit(mph); 1662 } /* lacp_on */ 1663 1664 /* Disable Slow Protocol LACP and Marker PDUs */ 1665 static void 1666 lacp_off(aggr_port_t *portp) 1667 { 1668 aggr_lacp_port_t *pl = &portp->lp_lacp; 1669 mac_perim_handle_t mph; 1670 1671 ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh)); 1672 mac_perim_enter_by_mh(portp->lp_mh, &mph); 1673 1674 pl->sm.lacp_on = B_FALSE; 1675 1676 AGGR_LACP_DBG(("lacp_off:(%d): \n", portp->lp_linkid)); 1677 1678 if (portp->lp_state == AGGR_PORT_STATE_ATTACHED) { 1679 /* 1680 * Disable Slow Protocol Timers. 1681 */ 1682 stop_periodic_timer(portp); 1683 stop_current_while_timer(portp); 1684 stop_wait_while_timer(portp); 1685 1686 /* Disable Multicast Slow Protocol address */ 1687 aggr_lacp_mcast_off(portp); 1688 1689 pl->sm.port_enabled = B_FALSE; 1690 pl->sm.lacp_enabled = B_FALSE; 1691 pl->ActorOperPortState.bit.aggregation = B_FALSE; 1692 } 1693 1694 lacp_mux_sm(portp); 1695 lacp_periodic_sm(portp); 1696 lacp_selection_logic(portp); 1697 1698 /* Turn OFF Collector_Distributor */ 1699 aggr_set_coll_dist(portp, B_FALSE); 1700 1701 lacp_reset_port(portp); 1702 mac_perim_exit(mph); 1703 } 1704 1705 1706 static boolean_t 1707 valid_lacp_pdu(aggr_port_t *portp, lacp_t *lacp) 1708 { 1709 /* 1710 * 43.4.12 - "a Receive machine shall not validate 1711 * the Version Number, TLV_type, or Reserved fields in received 1712 * LACPDUs." 1713 * ... "a Receive machine may validate the Actor_Information_Length, 1714 * Partner_Information_Length, Collector_Information_Length, 1715 * or Terminator_Length fields." 1716 */ 1717 if ((lacp->actor_info.information_len != sizeof (link_info_t)) || 1718 (lacp->partner_info.information_len != sizeof (link_info_t)) || 1719 (lacp->collector_len != LACP_COLLECTOR_INFO_LEN) || 1720 (lacp->terminator_len != LACP_TERMINATOR_INFO_LEN)) { 1721 AGGR_LACP_DBG(("trunk link (%d): Malformed LACPDU: " 1722 " Terminator Length = %d \n", portp->lp_linkid, 1723 lacp->terminator_len)); 1724 return (B_FALSE); 1725 } 1726 1727 return (B_TRUE); 1728 } 1729 1730 1731 static void 1732 start_current_while_timer(aggr_port_t *portp, uint_t time) 1733 { 1734 aggr_lacp_port_t *pl = &portp->lp_lacp; 1735 1736 ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh)); 1737 1738 mutex_enter(&pl->lacp_timer_lock); 1739 if (pl->current_while_timer.id == 0) { 1740 if (time > 0) 1741 pl->current_while_timer.val = time; 1742 else if (pl->ActorOperPortState.bit.timeout) 1743 pl->current_while_timer.val = SHORT_TIMEOUT_TIME; 1744 else 1745 pl->current_while_timer.val = LONG_TIMEOUT_TIME; 1746 1747 pl->current_while_timer.id = 1748 timeout(current_while_timer_pop, portp, 1749 drv_usectohz((clock_t)1000000 * 1750 (clock_t)portp->lp_lacp.current_while_timer.val)); 1751 } 1752 mutex_exit(&pl->lacp_timer_lock); 1753 } 1754 1755 1756 static void 1757 stop_current_while_timer(aggr_port_t *portp) 1758 { 1759 aggr_lacp_port_t *pl = &portp->lp_lacp; 1760 timeout_id_t id; 1761 1762 ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh)); 1763 1764 mutex_enter(&pl->lacp_timer_lock); 1765 if ((id = pl->current_while_timer.id) != 0) { 1766 pl->lacp_timer_bits &= ~LACP_CURRENT_WHILE_TIMEOUT; 1767 pl->current_while_timer.id = 0; 1768 } 1769 mutex_exit(&pl->lacp_timer_lock); 1770 1771 if (id != 0) 1772 (void) untimeout(id); 1773 } 1774 1775 static void 1776 current_while_timer_pop(void *data) 1777 { 1778 aggr_port_t *portp = (aggr_port_t *)data; 1779 aggr_lacp_port_t *pl = &portp->lp_lacp; 1780 1781 mutex_enter(&pl->lacp_timer_lock); 1782 pl->lacp_timer_bits |= LACP_CURRENT_WHILE_TIMEOUT; 1783 cv_broadcast(&pl->lacp_timer_cv); 1784 mutex_exit(&pl->lacp_timer_lock); 1785 } 1786 1787 static void 1788 current_while_timer_pop_handler(aggr_port_t *portp) 1789 { 1790 ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh)); 1791 1792 AGGR_LACP_DBG(("trunk link:(%d): current_while_timer " 1793 "pop id=%p\n", portp->lp_linkid, 1794 portp->lp_lacp.current_while_timer.id)); 1795 1796 lacp_receive_sm(portp, NULL); 1797 } 1798 1799 /* 1800 * record_Default - Simply copies over administrative values 1801 * to the partner operational values, and sets our state to indicate we 1802 * are using defaulted values. 1803 */ 1804 static void 1805 record_Default(aggr_port_t *portp) 1806 { 1807 aggr_lacp_port_t *pl = &portp->lp_lacp; 1808 1809 ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh)); 1810 1811 pl->PartnerOperPortNum = pl->PartnerAdminPortNum; 1812 pl->PartnerOperPortPriority = pl->PartnerAdminPortPriority; 1813 pl->PartnerOperSystem = pl->PartnerAdminSystem; 1814 pl->PartnerOperSysPriority = pl->PartnerAdminSysPriority; 1815 pl->PartnerOperKey = pl->PartnerAdminKey; 1816 pl->PartnerOperPortState.state = pl->PartnerAdminPortState.state; 1817 1818 pl->ActorOperPortState.bit.defaulted = B_TRUE; 1819 } 1820 1821 1822 /* Returns B_TRUE on sync value changing */ 1823 static boolean_t 1824 record_PDU(aggr_port_t *portp, lacp_t *lacp) 1825 { 1826 aggr_grp_t *aggrp = portp->lp_grp; 1827 aggr_lacp_port_t *pl = &portp->lp_lacp; 1828 uint8_t save_sync; 1829 1830 ASSERT(MAC_PERIM_HELD(aggrp->lg_mh)); 1831 1832 /* 1833 * Partner Information 1834 */ 1835 pl->PartnerOperPortNum = ntohs(lacp->actor_info.port); 1836 pl->PartnerOperPortPriority = 1837 ntohs(lacp->actor_info.port_priority); 1838 pl->PartnerOperSystem = lacp->actor_info.system_id; 1839 pl->PartnerOperSysPriority = 1840 htons(lacp->actor_info.system_priority); 1841 pl->PartnerOperKey = ntohs(lacp->actor_info.key); 1842 1843 /* All state info except for Synchronization */ 1844 save_sync = pl->PartnerOperPortState.bit.sync; 1845 pl->PartnerOperPortState.state = lacp->actor_info.state.state; 1846 1847 /* Defaulted set to FALSE */ 1848 pl->ActorOperPortState.bit.defaulted = B_FALSE; 1849 1850 /* 1851 * 43.4.9 - (Partner_Port, Partner_Port_Priority, Partner_system, 1852 * Partner_System_Priority, Partner_Key, and 1853 * Partner_State.Aggregation) are compared to the 1854 * corresponding operations paramters values for 1855 * the Actor. If these are equal, or if this is 1856 * an individual link, we are synchronized. 1857 */ 1858 if (((ntohs(lacp->partner_info.port) == pl->ActorPortNumber) && 1859 (ntohs(lacp->partner_info.port_priority) == 1860 pl->ActorPortPriority) && 1861 (ether_cmp(&lacp->partner_info.system_id, 1862 (struct ether_addr *)&aggrp->lg_addr) == 0) && 1863 (ntohs(lacp->partner_info.system_priority) == 1864 aggrp->aggr.ActorSystemPriority) && 1865 (ntohs(lacp->partner_info.key) == pl->ActorOperPortKey) && 1866 (lacp->partner_info.state.bit.aggregation == 1867 pl->ActorOperPortState.bit.aggregation)) || 1868 (!lacp->actor_info.state.bit.aggregation)) { 1869 1870 pl->PartnerOperPortState.bit.sync = 1871 lacp->actor_info.state.bit.sync; 1872 } else { 1873 pl->PartnerOperPortState.bit.sync = B_FALSE; 1874 } 1875 1876 if (save_sync != pl->PartnerOperPortState.bit.sync) { 1877 AGGR_LACP_DBG(("record_PDU:(%d): partner sync " 1878 "%d -->%d\n", portp->lp_linkid, save_sync, 1879 pl->PartnerOperPortState.bit.sync)); 1880 return (B_TRUE); 1881 } else { 1882 return (B_FALSE); 1883 } 1884 } 1885 1886 1887 /* 1888 * update_selected - If any of the Partner parameters has 1889 * changed from a previous value, then 1890 * unselect the link from the aggregator. 1891 */ 1892 static boolean_t 1893 update_selected(aggr_port_t *portp, lacp_t *lacp) 1894 { 1895 aggr_lacp_port_t *pl = &portp->lp_lacp; 1896 1897 ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh)); 1898 1899 if ((pl->PartnerOperPortNum != ntohs(lacp->actor_info.port)) || 1900 (pl->PartnerOperPortPriority != 1901 ntohs(lacp->actor_info.port_priority)) || 1902 (ether_cmp(&pl->PartnerOperSystem, 1903 &lacp->actor_info.system_id) != 0) || 1904 (pl->PartnerOperSysPriority != 1905 ntohs(lacp->actor_info.system_priority)) || 1906 (pl->PartnerOperKey != ntohs(lacp->actor_info.key)) || 1907 (pl->PartnerOperPortState.bit.aggregation != 1908 lacp->actor_info.state.bit.aggregation)) { 1909 AGGR_LACP_DBG(("update_selected:(%d): " 1910 "selected %d-->%d\n", portp->lp_linkid, pl->sm.selected, 1911 AGGR_UNSELECTED)); 1912 1913 lacp_port_unselect(portp); 1914 return (B_TRUE); 1915 } else { 1916 return (B_FALSE); 1917 } 1918 } 1919 1920 1921 /* 1922 * update_default_selected - If any of the operational Partner parameters 1923 * is different than that of the administrative values 1924 * then unselect the link from the aggregator. 1925 */ 1926 static void 1927 update_default_selected(aggr_port_t *portp) 1928 { 1929 aggr_lacp_port_t *pl = &portp->lp_lacp; 1930 1931 ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh)); 1932 1933 if ((pl->PartnerAdminPortNum != pl->PartnerOperPortNum) || 1934 (pl->PartnerOperPortPriority != pl->PartnerAdminPortPriority) || 1935 (ether_cmp(&pl->PartnerOperSystem, &pl->PartnerAdminSystem) != 0) || 1936 (pl->PartnerOperSysPriority != pl->PartnerAdminSysPriority) || 1937 (pl->PartnerOperKey != pl->PartnerAdminKey) || 1938 (pl->PartnerOperPortState.bit.aggregation != 1939 pl->PartnerAdminPortState.bit.aggregation)) { 1940 1941 AGGR_LACP_DBG(("update_default_selected:(%d): " 1942 "selected %d-->%d\n", portp->lp_linkid, 1943 pl->sm.selected, AGGR_UNSELECTED)); 1944 1945 lacp_port_unselect(portp); 1946 } 1947 } 1948 1949 1950 /* 1951 * update_NTT - If any of the Partner values in the received LACPDU 1952 * are different than that of the Actor operational 1953 * values then set NTT to true. 1954 */ 1955 static void 1956 update_NTT(aggr_port_t *portp, lacp_t *lacp) 1957 { 1958 aggr_grp_t *aggrp = portp->lp_grp; 1959 aggr_lacp_port_t *pl = &portp->lp_lacp; 1960 1961 ASSERT(MAC_PERIM_HELD(aggrp->lg_mh)); 1962 1963 if ((pl->ActorPortNumber != ntohs(lacp->partner_info.port)) || 1964 (pl->ActorPortPriority != 1965 ntohs(lacp->partner_info.port_priority)) || 1966 (ether_cmp(&aggrp->lg_addr, 1967 &lacp->partner_info.system_id) != 0) || 1968 (aggrp->aggr.ActorSystemPriority != 1969 ntohs(lacp->partner_info.system_priority)) || 1970 (pl->ActorOperPortKey != ntohs(lacp->partner_info.key)) || 1971 (pl->ActorOperPortState.bit.activity != 1972 lacp->partner_info.state.bit.activity) || 1973 (pl->ActorOperPortState.bit.timeout != 1974 lacp->partner_info.state.bit.timeout) || 1975 (pl->ActorOperPortState.bit.sync != 1976 lacp->partner_info.state.bit.sync) || 1977 (pl->ActorOperPortState.bit.aggregation != 1978 lacp->partner_info.state.bit.aggregation)) { 1979 1980 AGGR_LACP_DBG(("update_NTT:(%d): NTT %d-->%d\n", 1981 portp->lp_linkid, pl->NTT, B_TRUE)); 1982 1983 pl->NTT = B_TRUE; 1984 } 1985 } 1986 1987 /* 1988 * lacp_receive_sm - LACP receive state machine 1989 * 1990 * parameters: 1991 * - portp - instance this applies to. 1992 * - lacp - pointer in the case of a received LACPDU. 1993 * This value is NULL if there is no LACPDU. 1994 * 1995 * invoked: 1996 * - when initialization is needed 1997 * - upon reception of an LACPDU. This is the common case. 1998 * - every time the current_while_timer pops 1999 */ 2000 static void 2001 lacp_receive_sm(aggr_port_t *portp, lacp_t *lacp) 2002 { 2003 boolean_t sync_updated, selected_updated, save_activity; 2004 aggr_lacp_port_t *pl = &portp->lp_lacp; 2005 lacp_receive_state_t oldstate = pl->sm.receive_state; 2006 2007 ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh)); 2008 2009 /* LACP_OFF state not in specification so check here. */ 2010 if (!pl->sm.lacp_on) 2011 return; 2012 2013 /* figure next state */ 2014 if (pl->sm.begin || pl->sm.port_moved) { 2015 pl->sm.receive_state = LACP_INITIALIZE; 2016 } else if (!pl->sm.port_enabled) { /* DL_NOTE_LINK_DOWN */ 2017 pl->sm.receive_state = LACP_PORT_DISABLED; 2018 } else if (!pl->sm.lacp_enabled) { /* DL_NOTE_AGGR_UNAVAIL */ 2019 pl->sm.receive_state = 2020 (pl->sm.receive_state == LACP_PORT_DISABLED) ? 2021 LACP_DISABLED : LACP_PORT_DISABLED; 2022 } else if (lacp != NULL) { 2023 if ((pl->sm.receive_state == LACP_EXPIRED) || 2024 (pl->sm.receive_state == LACP_DEFAULTED)) { 2025 pl->sm.receive_state = LACP_CURRENT; 2026 } 2027 } else if ((pl->sm.receive_state == LACP_CURRENT) && 2028 (pl->current_while_timer.id == 0)) { 2029 pl->sm.receive_state = LACP_EXPIRED; 2030 } else if ((pl->sm.receive_state == LACP_EXPIRED) && 2031 (pl->current_while_timer.id == 0)) { 2032 pl->sm.receive_state = LACP_DEFAULTED; 2033 } 2034 2035 if (!((lacp && (oldstate == LACP_CURRENT) && 2036 (pl->sm.receive_state == LACP_CURRENT)))) { 2037 AGGR_LACP_DBG(("lacp_receive_sm(%d):%s--->%s\n", 2038 portp->lp_linkid, lacp_receive_str[oldstate], 2039 lacp_receive_str[pl->sm.receive_state])); 2040 } 2041 2042 switch (pl->sm.receive_state) { 2043 case LACP_INITIALIZE: 2044 lacp_port_unselect(portp); 2045 record_Default(portp); 2046 pl->ActorOperPortState.bit.expired = B_FALSE; 2047 pl->sm.port_moved = B_FALSE; 2048 pl->sm.receive_state = LACP_PORT_DISABLED; 2049 pl->sm.begin = B_FALSE; 2050 lacp_receive_sm(portp, NULL); 2051 break; 2052 2053 case LACP_PORT_DISABLED: 2054 pl->PartnerOperPortState.bit.sync = B_FALSE; 2055 /* 2056 * Stop current_while_timer in case 2057 * we got here from link down 2058 */ 2059 stop_current_while_timer(portp); 2060 2061 if (pl->sm.port_enabled && !pl->sm.lacp_enabled) { 2062 pl->sm.receive_state = LACP_DISABLED; 2063 lacp_receive_sm(portp, lacp); 2064 /* We goto LACP_DISABLED state */ 2065 break; 2066 } else if (pl->sm.port_enabled && pl->sm.lacp_enabled) { 2067 pl->sm.receive_state = LACP_EXPIRED; 2068 /* 2069 * FALL THROUGH TO LACP_EXPIRED CASE: 2070 * We have no way of knowing if we get into 2071 * lacp_receive_sm() from a current_while_timer 2072 * expiring as it has never been kicked off yet! 2073 */ 2074 } else { 2075 /* We stay in LACP_PORT_DISABLED state */ 2076 break; 2077 } 2078 /* LACP_PORT_DISABLED -> LACP_EXPIRED */ 2079 /* FALLTHROUGH */ 2080 2081 case LACP_EXPIRED: 2082 /* 2083 * Arrives here from LACP_PORT_DISABLED state as well as 2084 * as well as current_while_timer expiring. 2085 */ 2086 pl->PartnerOperPortState.bit.sync = B_FALSE; 2087 pl->PartnerOperPortState.bit.timeout = B_TRUE; 2088 2089 pl->ActorOperPortState.bit.expired = B_TRUE; 2090 start_current_while_timer(portp, SHORT_TIMEOUT_TIME); 2091 lacp_periodic_sm(portp); 2092 break; 2093 2094 case LACP_DISABLED: 2095 /* 2096 * This is the normal state for recv_sm when LACP_OFF 2097 * is set or the NIC is in half duplex mode. 2098 */ 2099 lacp_port_unselect(portp); 2100 record_Default(portp); 2101 pl->PartnerOperPortState.bit.aggregation = B_FALSE; 2102 pl->ActorOperPortState.bit.expired = B_FALSE; 2103 break; 2104 2105 case LACP_DEFAULTED: 2106 /* 2107 * Current_while_timer expired a second time. 2108 */ 2109 update_default_selected(portp); 2110 record_Default(portp); /* overwrite Partner Oper val */ 2111 pl->ActorOperPortState.bit.expired = B_FALSE; 2112 pl->PartnerOperPortState.bit.sync = B_TRUE; 2113 2114 lacp_selection_logic(portp); 2115 lacp_mux_sm(portp); 2116 break; 2117 2118 case LACP_CURRENT: 2119 /* 2120 * Reception of LACPDU 2121 */ 2122 2123 if (!lacp) /* no LACPDU so current_while_timer popped */ 2124 break; 2125 2126 AGGR_LACP_DBG(("lacp_receive_sm: (%d): LACPDU received:\n", 2127 portp->lp_linkid)); 2128 2129 /* 2130 * Validate Actor_Information_Length, 2131 * Partner_Information_Length, Collector_Information_Length, 2132 * and Terminator_Length fields. 2133 */ 2134 if (!valid_lacp_pdu(portp, lacp)) { 2135 AGGR_LACP_DBG(("lacp_receive_sm (%d): " 2136 "Invalid LACPDU received\n", 2137 portp->lp_linkid)); 2138 break; 2139 } 2140 2141 save_activity = pl->PartnerOperPortState.bit.activity; 2142 selected_updated = update_selected(portp, lacp); 2143 update_NTT(portp, lacp); 2144 sync_updated = record_PDU(portp, lacp); 2145 2146 pl->ActorOperPortState.bit.expired = B_FALSE; 2147 2148 if (selected_updated) { 2149 lacp_selection_logic(portp); 2150 lacp_mux_sm(portp); 2151 } else if (sync_updated) { 2152 lacp_mux_sm(portp); 2153 } 2154 2155 /* 2156 * If the periodic timer value bit has been modified 2157 * or the partner activity bit has been changed then 2158 * we need to respectively: 2159 * - restart the timer with the proper timeout value. 2160 * - possibly enable/disable transmission of LACPDUs. 2161 */ 2162 if ((pl->PartnerOperPortState.bit.timeout && 2163 (pl->periodic_timer.val != FAST_PERIODIC_TIME)) || 2164 (!pl->PartnerOperPortState.bit.timeout && 2165 (pl->periodic_timer.val != SLOW_PERIODIC_TIME)) || 2166 (pl->PartnerOperPortState.bit.activity != 2167 save_activity)) { 2168 lacp_periodic_sm(portp); 2169 } 2170 2171 stop_current_while_timer(portp); 2172 /* Check if we need to transmit an LACPDU */ 2173 if (pl->NTT) 2174 lacp_xmit_sm(portp); 2175 start_current_while_timer(portp, 0); 2176 2177 break; 2178 } 2179 } 2180 2181 static void 2182 aggr_set_coll_dist(aggr_port_t *portp, boolean_t enable) 2183 { 2184 mac_perim_handle_t mph; 2185 2186 AGGR_LACP_DBG(("AGGR_SET_COLL_DIST_TYPE: (%d) %s\n", 2187 portp->lp_linkid, enable ? "ENABLED" : "DISABLED")); 2188 2189 mac_perim_enter_by_mh(portp->lp_mh, &mph); 2190 if (!enable) { 2191 /* 2192 * Turn OFF Collector_Distributor. 2193 */ 2194 portp->lp_collector_enabled = B_FALSE; 2195 aggr_send_port_disable(portp); 2196 goto done; 2197 } 2198 2199 /* 2200 * Turn ON Collector_Distributor. 2201 */ 2202 2203 if (!portp->lp_lacp.sm.lacp_on || (portp->lp_lacp.sm.lacp_on && 2204 (portp->lp_lacp.sm.mux_state == LACP_COLLECTING_DISTRIBUTING))) { 2205 /* Port is compatible and can be aggregated */ 2206 portp->lp_collector_enabled = B_TRUE; 2207 aggr_send_port_enable(portp); 2208 } 2209 2210 done: 2211 mac_perim_exit(mph); 2212 } 2213 2214 /* 2215 * Because the LACP packet processing needs to enter the aggr's mac perimeter 2216 * and that would potentially cause a deadlock with the thread in which the 2217 * grp/port is deleted, we defer the packet process to a worker thread. Here 2218 * we only enqueue the received Marker or LACPDU for later processing. 2219 */ 2220 void 2221 aggr_lacp_rx_enqueue(aggr_port_t *portp, mblk_t *dmp) 2222 { 2223 aggr_grp_t *grp = portp->lp_grp; 2224 lacp_t *lacp; 2225 2226 dmp->b_rptr += sizeof (struct ether_header); 2227 2228 if (MBLKL(dmp) < sizeof (lacp_t)) { 2229 freemsg(dmp); 2230 return; 2231 } 2232 2233 lacp = (lacp_t *)dmp->b_rptr; 2234 if (lacp->subtype != LACP_SUBTYPE && lacp->subtype != MARKER_SUBTYPE) { 2235 AGGR_LACP_DBG(("aggr_lacp_rx_enqueue: (%d): " 2236 "Unknown Slow Protocol type %d\n", 2237 portp->lp_linkid, lacp->subtype)); 2238 freemsg(dmp); 2239 return; 2240 } 2241 2242 mutex_enter(&grp->lg_lacp_lock); 2243 2244 /* 2245 * If the lg_lacp_done is set, this aggregation is in the process of 2246 * being deleted, return directly. 2247 */ 2248 if (grp->lg_lacp_done) { 2249 mutex_exit(&grp->lg_lacp_lock); 2250 freemsg(dmp); 2251 return; 2252 } 2253 2254 if (grp->lg_lacp_tail == NULL) { 2255 grp->lg_lacp_head = grp->lg_lacp_tail = dmp; 2256 } else { 2257 grp->lg_lacp_tail->b_next = dmp; 2258 grp->lg_lacp_tail = dmp; 2259 } 2260 2261 /* 2262 * Hold a reference of the port so that the port won't be freed when it 2263 * is removed from the aggr. The b_prev field is borrowed to save the 2264 * port information. 2265 */ 2266 AGGR_PORT_REFHOLD(portp); 2267 dmp->b_prev = (mblk_t *)portp; 2268 cv_broadcast(&grp->lg_lacp_cv); 2269 mutex_exit(&grp->lg_lacp_lock); 2270 } 2271 2272 static void 2273 aggr_lacp_rx(mblk_t *dmp) 2274 { 2275 aggr_port_t *portp = (aggr_port_t *)dmp->b_prev; 2276 mac_perim_handle_t mph; 2277 lacp_t *lacp; 2278 2279 dmp->b_prev = NULL; 2280 2281 mac_perim_enter_by_mh(portp->lp_grp->lg_mh, &mph); 2282 if (portp->lp_closing) 2283 goto done; 2284 2285 lacp = (lacp_t *)dmp->b_rptr; 2286 switch (lacp->subtype) { 2287 case LACP_SUBTYPE: 2288 AGGR_LACP_DBG(("aggr_lacp_rx:(%d): LACPDU received.\n", 2289 portp->lp_linkid)); 2290 2291 if (!portp->lp_lacp.sm.lacp_on) { 2292 break; 2293 } 2294 lacp_receive_sm(portp, lacp); 2295 break; 2296 2297 case MARKER_SUBTYPE: 2298 AGGR_LACP_DBG(("aggr_lacp_rx:(%d): Marker Packet received.\n", 2299 portp->lp_linkid)); 2300 2301 if (receive_marker_pdu(portp, dmp) != 0) 2302 break; 2303 2304 /* Send the packet over the first TX ring */ 2305 dmp = mac_hwring_send_priv(portp->lp_mch, 2306 portp->lp_tx_rings[0], dmp); 2307 if (dmp != NULL) 2308 freemsg(dmp); 2309 mac_perim_exit(mph); 2310 AGGR_PORT_REFRELE(portp); 2311 return; 2312 } 2313 2314 done: 2315 mac_perim_exit(mph); 2316 AGGR_PORT_REFRELE(portp); 2317 freemsg(dmp); 2318 } 2319 2320 void 2321 aggr_lacp_rx_thread(void *arg) 2322 { 2323 callb_cpr_t cprinfo; 2324 aggr_grp_t *grp = (aggr_grp_t *)arg; 2325 aggr_port_t *port; 2326 mblk_t *mp, *nextmp; 2327 2328 CALLB_CPR_INIT(&cprinfo, &grp->lg_lacp_lock, callb_generic_cpr, 2329 "aggr_lacp_rx_thread"); 2330 2331 mutex_enter(&grp->lg_lacp_lock); 2332 2333 /* 2334 * Quit the thread if the grp is deleted. 2335 */ 2336 while (!grp->lg_lacp_done) { 2337 if ((mp = grp->lg_lacp_head) == NULL) { 2338 CALLB_CPR_SAFE_BEGIN(&cprinfo); 2339 cv_wait(&grp->lg_lacp_cv, &grp->lg_lacp_lock); 2340 CALLB_CPR_SAFE_END(&cprinfo, &grp->lg_lacp_lock); 2341 continue; 2342 } 2343 2344 grp->lg_lacp_head = grp->lg_lacp_tail = NULL; 2345 mutex_exit(&grp->lg_lacp_lock); 2346 2347 while (mp != NULL) { 2348 nextmp = mp->b_next; 2349 mp->b_next = NULL; 2350 aggr_lacp_rx(mp); 2351 mp = nextmp; 2352 } 2353 mutex_enter(&grp->lg_lacp_lock); 2354 } 2355 2356 /* 2357 * The grp is being destroyed, simply free all of the LACP messages 2358 * left in the queue which did not have the chance to be processed. 2359 * We cannot use freemsgchain() here since we need to clear the 2360 * b_prev field. 2361 */ 2362 for (mp = grp->lg_lacp_head; mp != NULL; mp = nextmp) { 2363 port = (aggr_port_t *)mp->b_prev; 2364 AGGR_PORT_REFRELE(port); 2365 nextmp = mp->b_next; 2366 mp->b_next = NULL; 2367 mp->b_prev = NULL; 2368 freemsg(mp); 2369 } 2370 2371 grp->lg_lacp_head = grp->lg_lacp_tail = NULL; 2372 grp->lg_lacp_rx_thread = NULL; 2373 cv_broadcast(&grp->lg_lacp_cv); 2374 CALLB_CPR_EXIT(&cprinfo); 2375 thread_exit(); 2376 }