1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 * Copyright 2012 OmniTI Computer Consulting, Inc All rights reserved.
25 */
26
27 /*
28 * IEEE 802.3ad Link Aggregation - Link Aggregation MAC ports.
29 *
30 * Implements the functions needed to manage the MAC ports that are
31 * part of Link Aggregation groups.
32 */
33
34 #include <sys/types.h>
35 #include <sys/sysmacros.h>
36 #include <sys/conf.h>
37 #include <sys/cmn_err.h>
38 #include <sys/id_space.h>
39 #include <sys/list.h>
40 #include <sys/ksynch.h>
41 #include <sys/kmem.h>
42 #include <sys/stream.h>
43 #include <sys/modctl.h>
44 #include <sys/ddi.h>
45 #include <sys/sunddi.h>
46 #include <sys/atomic.h>
47 #include <sys/stat.h>
48 #include <sys/sdt.h>
49 #include <sys/dlpi.h>
50 #include <sys/dls.h>
51 #include <sys/aggr.h>
52 #include <sys/aggr_impl.h>
53
54 static kmem_cache_t *aggr_port_cache;
55 static id_space_t *aggr_portids;
56
57 static void aggr_port_notify_cb(void *, mac_notify_type_t);
58
59 /*ARGSUSED*/
60 static int
61 aggr_port_constructor(void *buf, void *arg, int kmflag)
62 {
63 bzero(buf, sizeof (aggr_port_t));
64 return (0);
65 }
66
67 /*ARGSUSED*/
68 static void
69 aggr_port_destructor(void *buf, void *arg)
70 {
71 aggr_port_t *port = buf;
72
73 ASSERT(port->lp_mnh == NULL);
74 ASSERT(port->lp_mphp == NULL);
75 ASSERT(!port->lp_rx_grp_added && !port->lp_tx_grp_added);
76 ASSERT(port->lp_hwgh == NULL);
77 }
78
79 void
80 aggr_port_init(void)
81 {
82 aggr_port_cache = kmem_cache_create("aggr_port_cache",
83 sizeof (aggr_port_t), 0, aggr_port_constructor,
84 aggr_port_destructor, NULL, NULL, NULL, 0);
85
86 /*
87 * Allocate a id space to manage port identification. The range of
88 * the arena will be from 1 to UINT16_MAX, because the LACP protocol
89 * specifies 16-bit unique identification.
90 */
91 aggr_portids = id_space_create("aggr_portids", 1, UINT16_MAX);
92 ASSERT(aggr_portids != NULL);
93 }
94
95 void
96 aggr_port_fini(void)
97 {
98 /*
99 * This function is called only after all groups have been
100 * freed. This ensures that there are no remaining allocated
101 * ports when this function is invoked.
102 */
103 kmem_cache_destroy(aggr_port_cache);
104 id_space_destroy(aggr_portids);
105 }
106
107 /* ARGSUSED */
108 void
109 aggr_port_init_callbacks(aggr_port_t *port)
110 {
111 /* add the port's receive callback */
112 port->lp_mnh = mac_notify_add(port->lp_mh, aggr_port_notify_cb, port);
113 /*
114 * Hold a reference of the grp and the port and this reference will
115 * be released when the thread exits.
116 *
117 * The reference on the port is used for aggr_port_delete() to
118 * continue without waiting for the thread to exit; the reference
119 * on the grp is used for aggr_grp_delete() to wait for the thread
120 * to exit before calling mac_unregister().
121 *
122 * Note that these references will be released either in
123 * aggr_port_delete() when mac_notify_remove() succeeds, or in
124 * the aggr_port_notify_cb() callback when the port is deleted
125 * (lp_closing is set).
126 */
127 aggr_grp_port_hold(port);
128 }
129
130 /* ARGSUSED */
131 int
132 aggr_port_create(aggr_grp_t *grp, const datalink_id_t linkid, boolean_t force,
133 aggr_port_t **pp)
134 {
135 int err;
136 mac_handle_t mh;
137 mac_client_handle_t mch = NULL;
138 aggr_port_t *port;
139 uint16_t portid;
140 uint_t i;
141 boolean_t no_link_update = B_FALSE;
142 const mac_info_t *mip;
143 uint32_t note;
144 uint32_t margin;
145 char client_name[MAXNAMELEN];
146 char aggr_name[MAXNAMELEN];
147 char port_name[MAXNAMELEN];
148 mac_diag_t diag;
149 mac_unicast_handle_t mah;
150
151 *pp = NULL;
152
153 if ((err = mac_open_by_linkid(linkid, &mh)) != 0)
154 return (err);
155
156 mip = mac_info(mh);
157 if (mip->mi_media != DL_ETHER || mip->mi_nativemedia != DL_ETHER) {
158 err = EINVAL;
159 goto fail;
160 }
161
162 /*
163 * If the underlying MAC does not support link update notification, it
164 * can only be aggregated if `force' is set. This is because aggr
165 * depends on link notifications to attach ports whose link is up.
166 */
167 note = mac_no_notification(mh);
168 if ((note & (DL_NOTE_LINK_UP | DL_NOTE_LINK_DOWN)) != 0) {
169 no_link_update = B_TRUE;
170 if (!force) {
171 /*
172 * We borrow this error code to indicate that link
173 * notification is not supported.
174 */
175 err = ENETDOWN;
176 goto fail;
177 }
178 }
179
180 if (((err = dls_mgmt_get_linkinfo(grp->lg_linkid,
181 aggr_name, NULL, NULL, NULL)) != 0) ||
182 ((err = dls_mgmt_get_linkinfo(linkid, port_name,
183 NULL, NULL, NULL)) != 0)) {
184 goto fail;
185 }
186
187 (void) snprintf(client_name, MAXNAMELEN, "%s-%s", aggr_name, port_name);
188 if ((err = mac_client_open(mh, &mch, client_name,
189 MAC_OPEN_FLAGS_IS_AGGR_PORT | MAC_OPEN_FLAGS_EXCLUSIVE)) != 0) {
190 goto fail;
191 }
192
193 if ((portid = (uint16_t)id_alloc(aggr_portids)) == 0) {
194 err = ENOMEM;
195 goto fail;
196 }
197
198 /*
199 * As the underlying mac's current margin size is used to determine
200 * the margin size of the aggregation itself, request the underlying
201 * mac not to change to a smaller size.
202 */
203 if ((err = mac_margin_add(mh, &margin, B_TRUE)) != 0) {
204 id_free(aggr_portids, portid);
205 goto fail;
206 }
207
208 if ((err = mac_unicast_add(mch, NULL, MAC_UNICAST_PRIMARY |
209 MAC_UNICAST_DISABLE_TX_VID_CHECK, &mah, 0, &diag)) != 0) {
210 VERIFY(mac_margin_remove(mh, margin) == 0);
211 id_free(aggr_portids, portid);
212 goto fail;
213 }
214
215 port = kmem_cache_alloc(aggr_port_cache, KM_SLEEP);
216
217 port->lp_refs = 1;
218 port->lp_next = NULL;
219 port->lp_mh = mh;
220 port->lp_mch = mch;
221 port->lp_mip = mip;
222 port->lp_linkid = linkid;
223 port->lp_closing = B_FALSE;
224 port->lp_mah = mah;
225
226 /* get the port's original MAC address */
227 mac_unicast_primary_get(port->lp_mh, port->lp_addr);
228
229 /* initialize state */
230 port->lp_state = AGGR_PORT_STATE_STANDBY;
231 port->lp_link_state = LINK_STATE_UNKNOWN;
232 port->lp_ifspeed = 0;
233 port->lp_link_duplex = LINK_DUPLEX_UNKNOWN;
234 port->lp_started = B_FALSE;
235 port->lp_tx_enabled = B_FALSE;
236 port->lp_promisc_on = B_FALSE;
237 port->lp_no_link_update = no_link_update;
238 port->lp_portid = portid;
239 port->lp_margin = margin;
240 port->lp_prom_addr = NULL;
241
242 /*
243 * Save the current statistics of the port. They will be used
244 * later by aggr_m_stats() when aggregating the statistics of
245 * the constituent ports.
246 */
247 for (i = 0; i < MAC_NSTAT; i++) {
248 port->lp_stat[i] =
249 aggr_port_stat(port, i + MAC_STAT_MIN);
250 }
251 for (i = 0; i < ETHER_NSTAT; i++) {
252 port->lp_ether_stat[i] =
253 aggr_port_stat(port, i + MACTYPE_STAT_MIN);
254 }
255
256 /* LACP related state */
257 port->lp_collector_enabled = B_FALSE;
258
259 *pp = port;
260 return (0);
261
262 fail:
263 if (mch != NULL)
264 mac_client_close(mch, MAC_CLOSE_FLAGS_EXCLUSIVE);
265 mac_close(mh);
266 return (err);
267 }
268
269 void
270 aggr_port_delete(aggr_port_t *port)
271 {
272 aggr_lacp_port_t *pl = &port->lp_lacp;
273
274 ASSERT(port->lp_mphp == NULL);
275 ASSERT(!port->lp_promisc_on);
276
277 port->lp_closing = B_TRUE;
278
279 VERIFY(mac_margin_remove(port->lp_mh, port->lp_margin) == 0);
280 mac_rx_clear(port->lp_mch);
281 /*
282 * If the notification callback is already in process and waiting for
283 * the aggr grp's mac perimeter, don't wait (otherwise there would be
284 * deadlock). Otherwise, if mac_notify_remove() succeeds, we can
285 * release the reference held when mac_notify_add() is called.
286 */
287 if ((port->lp_mnh != NULL) &&
288 (mac_notify_remove(port->lp_mnh, B_FALSE) == 0)) {
289 aggr_grp_port_rele(port);
290 }
291 port->lp_mnh = NULL;
292
293 /*
294 * Inform the the port lacp timer thread to exit. Note that waiting
295 * for the thread to exit may cause deadlock since that thread may
296 * need to enter into the mac perimeter which we are currently in.
297 * It is fine to continue without waiting though since that thread
298 * is holding a reference of the port.
299 */
300 mutex_enter(&pl->lacp_timer_lock);
301 pl->lacp_timer_bits |= LACP_THREAD_EXIT;
302 cv_broadcast(&pl->lacp_timer_cv);
303 mutex_exit(&pl->lacp_timer_lock);
304
305 /*
306 * Restore the port MAC address. Note it is called after the
307 * port's notification callback being removed. This prevent
308 * port's MAC_NOTE_UNICST notify callback function being called.
309 */
310 (void) mac_unicast_primary_set(port->lp_mh, port->lp_addr);
311 if (port->lp_mah != NULL)
312 (void) mac_unicast_remove(port->lp_mch, port->lp_mah);
313 mac_client_close(port->lp_mch, MAC_CLOSE_FLAGS_EXCLUSIVE);
314 mac_close(port->lp_mh);
315 AGGR_PORT_REFRELE(port);
316 }
317
318 void
319 aggr_port_free(aggr_port_t *port)
320 {
321 ASSERT(port->lp_refs == 0);
322 if (port->lp_grp != NULL)
323 AGGR_GRP_REFRELE(port->lp_grp);
324 port->lp_grp = NULL;
325 id_free(aggr_portids, port->lp_portid);
326 port->lp_portid = 0;
327 mutex_destroy(&port->lp_lacp.lacp_timer_lock);
328 cv_destroy(&port->lp_lacp.lacp_timer_cv);
329 kmem_cache_free(aggr_port_cache, port);
330 }
331
332 /*
333 * Invoked upon receiving a MAC_NOTE_LINK notification for
334 * one of the constituent ports.
335 */
336 boolean_t
337 aggr_port_notify_link(aggr_grp_t *grp, aggr_port_t *port)
338 {
339 boolean_t do_attach = B_FALSE;
340 boolean_t do_detach = B_FALSE;
341 boolean_t link_state_changed = B_TRUE;
342 uint64_t ifspeed;
343 link_state_t link_state;
344 link_duplex_t link_duplex;
345 mac_perim_handle_t mph;
346
347 ASSERT(MAC_PERIM_HELD(grp->lg_mh));
348 mac_perim_enter_by_mh(port->lp_mh, &mph);
349
350 /*
351 * link state change? For links that do not support link state
352 * notification, always assume the link is up.
353 */
354 link_state = port->lp_no_link_update ? LINK_STATE_UP :
355 mac_link_get(port->lp_mh);
356 if (port->lp_link_state != link_state) {
357 if (link_state == LINK_STATE_UP)
358 do_attach = (port->lp_link_state != LINK_STATE_UP);
359 else
360 do_detach = (port->lp_link_state == LINK_STATE_UP);
361 }
362 port->lp_link_state = link_state;
363
364 /* link duplex change? */
365 link_duplex = aggr_port_stat(port, ETHER_STAT_LINK_DUPLEX);
366 if (port->lp_link_duplex != link_duplex) {
367 if (link_duplex == LINK_DUPLEX_FULL)
368 do_attach |= (port->lp_link_duplex != LINK_DUPLEX_FULL);
369 else
370 do_detach |= (port->lp_link_duplex == LINK_DUPLEX_FULL);
371 }
372 port->lp_link_duplex = link_duplex;
373
374 /* link speed changes? */
375 ifspeed = aggr_port_stat(port, MAC_STAT_IFSPEED);
376 if (port->lp_ifspeed != ifspeed) {
377 if (port->lp_state == AGGR_PORT_STATE_ATTACHED)
378 do_detach |= (ifspeed != grp->lg_ifspeed);
379 else
380 do_attach |= (ifspeed == grp->lg_ifspeed);
381 }
382 port->lp_ifspeed = ifspeed;
383
384 if (do_attach) {
385 /* attempt to attach the port to the aggregation */
386 link_state_changed = aggr_grp_attach_port(grp, port);
387 } else if (do_detach) {
388 /* detach the port from the aggregation */
389 link_state_changed = aggr_grp_detach_port(grp, port);
390 }
391
392 mac_perim_exit(mph);
393 return (link_state_changed);
394 }
395
396 /*
397 * Invoked upon receiving a MAC_NOTE_UNICST for one of the constituent
398 * ports of a group.
399 */
400 static void
401 aggr_port_notify_unicst(aggr_grp_t *grp, aggr_port_t *port,
402 boolean_t *mac_addr_changedp, boolean_t *link_state_changedp)
403 {
404 boolean_t mac_addr_changed = B_FALSE;
405 boolean_t link_state_changed = B_FALSE;
406 uint8_t mac_addr[ETHERADDRL];
407 mac_perim_handle_t mph;
408
409 ASSERT(MAC_PERIM_HELD(grp->lg_mh));
410 ASSERT(mac_addr_changedp != NULL);
411 ASSERT(link_state_changedp != NULL);
412 mac_perim_enter_by_mh(port->lp_mh, &mph);
413
414 /*
415 * If it is called when setting the MAC address to the
416 * aggregation group MAC address, do nothing.
417 */
418 mac_unicast_primary_get(port->lp_mh, mac_addr);
419 if (bcmp(mac_addr, grp->lg_addr, ETHERADDRL) == 0) {
420 mac_perim_exit(mph);
421 goto done;
422 }
423
424 /* save the new port MAC address */
425 bcopy(mac_addr, port->lp_addr, ETHERADDRL);
426
427 aggr_grp_port_mac_changed(grp, port, &mac_addr_changed,
428 &link_state_changed);
429
430 mac_perim_exit(mph);
431
432 /*
433 * If this port was used to determine the MAC address of
434 * the group, update the MAC address of the constituent
435 * ports.
436 */
437 if (mac_addr_changed && aggr_grp_update_ports_mac(grp))
438 link_state_changed = B_TRUE;
439
440 done:
441 *mac_addr_changedp = mac_addr_changed;
442 *link_state_changedp = link_state_changed;
443 }
444
445 /*
446 * Notification callback invoked by the MAC service module for
447 * a particular MAC port.
448 */
449 static void
450 aggr_port_notify_cb(void *arg, mac_notify_type_t type)
451 {
452 aggr_port_t *port = arg;
453 aggr_grp_t *grp = port->lp_grp;
454 boolean_t mac_addr_changed, link_state_changed;
455 mac_perim_handle_t mph;
456
457 mac_perim_enter_by_mh(grp->lg_mh, &mph);
458 if (port->lp_closing) {
459 mac_perim_exit(mph);
460
461 /*
462 * Release the reference so it is safe for aggr to call
463 * mac_unregister() now.
464 */
465 aggr_grp_port_rele(port);
466 return;
467 }
468
469 switch (type) {
470 case MAC_NOTE_TX:
471 mac_tx_update(grp->lg_mh);
472 break;
473 case MAC_NOTE_LINK:
474 if (aggr_port_notify_link(grp, port))
475 mac_link_update(grp->lg_mh, grp->lg_link_state);
476 break;
477 case MAC_NOTE_UNICST:
478 aggr_port_notify_unicst(grp, port, &mac_addr_changed,
479 &link_state_changed);
480 if (mac_addr_changed)
481 mac_unicst_update(grp->lg_mh, grp->lg_addr);
482 if (link_state_changed)
483 mac_link_update(grp->lg_mh, grp->lg_link_state);
484 break;
485 default:
486 break;
487 }
488
489 mac_perim_exit(mph);
490 }
491
492 int
493 aggr_port_start(aggr_port_t *port)
494 {
495 ASSERT(MAC_PERIM_HELD(port->lp_mh));
496
497 if (port->lp_started)
498 return (0);
499
500 port->lp_started = B_TRUE;
501 aggr_grp_multicst_port(port, B_TRUE);
502 return (0);
503 }
504
505 void
506 aggr_port_stop(aggr_port_t *port)
507 {
508 ASSERT(MAC_PERIM_HELD(port->lp_mh));
509
510 if (!port->lp_started)
511 return;
512
513 aggr_grp_multicst_port(port, B_FALSE);
514
515 /* update the port state */
516 port->lp_started = B_FALSE;
517 }
518
519 int
520 aggr_port_promisc(aggr_port_t *port, boolean_t on)
521 {
522 int rc;
523
524 ASSERT(MAC_PERIM_HELD(port->lp_mh));
525
526 if (on == port->lp_promisc_on)
527 /* already in desired promiscous mode */
528 return (0);
529
530 if (on) {
531 mac_rx_clear(port->lp_mch);
532 /* We use the promisc callback because without hardware
533 * rings, we deliver through flows that will cause duplicate
534 * delivery of packets when we've flipped into this mode
535 * to compensate for the lack of hardware MAC matching
536 */
537 rc = mac_promisc_add(port->lp_mch, MAC_CLIENT_PROMISC_ALL,
538 aggr_recv_promisc_cb, port, &port->lp_mphp,
539 MAC_PROMISC_FLAGS_NO_TX_LOOP);
540 if (rc != 0) {
541 mac_rx_set(port->lp_mch, aggr_recv_cb, port);
542 return (rc);
543 }
544 } else {
545 mac_promisc_remove(port->lp_mphp);
546 port->lp_mphp = NULL;
547 mac_rx_set(port->lp_mch, aggr_recv_cb, port);
548 }
549
550 port->lp_promisc_on = on;
551
552 return (0);
553 }
554
555 /*
556 * Set the MAC address of a port.
557 */
558 int
559 aggr_port_unicst(aggr_port_t *port)
560 {
561 aggr_grp_t *grp = port->lp_grp;
562
563 ASSERT(MAC_PERIM_HELD(grp->lg_mh));
564 ASSERT(MAC_PERIM_HELD(port->lp_mh));
565
566 return (mac_unicast_primary_set(port->lp_mh, grp->lg_addr));
567 }
568
569 /*
570 * Add or remove a multicast address to/from a port.
571 */
572 int
573 aggr_port_multicst(void *arg, boolean_t add, const uint8_t *addrp)
574 {
575 aggr_port_t *port = arg;
576
577 if (add) {
578 return (mac_multicast_add(port->lp_mch, addrp));
579 } else {
580 mac_multicast_remove(port->lp_mch, addrp);
581 return (0);
582 }
583 }
584
585 uint64_t
586 aggr_port_stat(aggr_port_t *port, uint_t stat)
587 {
588 return (mac_stat_get(port->lp_mh, stat));
589 }
590
591 /*
592 * Add a non-primary unicast address to the underlying port. If the port
593 * supports HW Rx group, try to add the address into the HW Rx group of
594 * the port first. If that fails, or if the port does not support HW Rx
595 * group, enable the port's promiscous mode.
596 */
597 int
598 aggr_port_addmac(aggr_port_t *port, const uint8_t *mac_addr)
599 {
600 aggr_unicst_addr_t *addr, **pprev;
601 mac_perim_handle_t pmph;
602 int err;
603
604 ASSERT(MAC_PERIM_HELD(port->lp_grp->lg_mh));
605 mac_perim_enter_by_mh(port->lp_mh, &pmph);
606
607 /*
608 * If the underlying port support HW Rx group, add the mac to its
609 * RX group directly.
610 */
611 if ((port->lp_hwgh != NULL) &&
612 ((mac_hwgroup_addmac(port->lp_hwgh, mac_addr)) == 0)) {
613 mac_perim_exit(pmph);
614 return (0);
615 }
616
617 /*
618 * If that fails, or if the port does not support HW Rx group, enable
619 * the port's promiscous mode. (Note that we turn on the promiscous
620 * mode only if the port is already started.
621 */
622 if (port->lp_started &&
623 ((err = aggr_port_promisc(port, B_TRUE)) != 0)) {
624 mac_perim_exit(pmph);
625 return (err);
626 }
627
628 /*
629 * Walk through the unicast addresses that requires promiscous mode
630 * enabled on this port, and add this address to the end of the list.
631 */
632 pprev = &port->lp_prom_addr;
633 while ((addr = *pprev) != NULL) {
634 ASSERT(bcmp(mac_addr, addr->aua_addr, ETHERADDRL) != 0);
635 pprev = &addr->aua_next;
636 }
637 addr = kmem_alloc(sizeof (aggr_unicst_addr_t), KM_SLEEP);
638 bcopy(mac_addr, addr->aua_addr, ETHERADDRL);
639 addr->aua_next = NULL;
640 *pprev = addr;
641 mac_perim_exit(pmph);
642 return (0);
643 }
644
645 /*
646 * Remove a non-primary unicast address from the underlying port. This address
647 * must has been added by aggr_port_addmac(). As a result, we probably need to
648 * remove the address from the port's HW Rx group, or to disable the port's
649 * promiscous mode.
650 */
651 void
652 aggr_port_remmac(aggr_port_t *port, const uint8_t *mac_addr)
653 {
654 aggr_grp_t *grp = port->lp_grp;
655 aggr_unicst_addr_t *addr, **pprev;
656 mac_perim_handle_t pmph;
657
658 ASSERT(MAC_PERIM_HELD(grp->lg_mh));
659 mac_perim_enter_by_mh(port->lp_mh, &pmph);
660
661 /*
662 * See whether this address is in the list of addresses that requires
663 * the port being promiscous mode.
664 */
665 pprev = &port->lp_prom_addr;
666 while ((addr = *pprev) != NULL) {
667 if (bcmp(mac_addr, addr->aua_addr, ETHERADDRL) == 0)
668 break;
669 pprev = &addr->aua_next;
670 }
671 if (addr != NULL) {
672 /*
673 * This unicast address put the port into the promiscous mode,
674 * delete this address from the lp_prom_addr list. If this is
675 * the last address in that list, disable the promiscous mode
676 * if the aggregation is not in promiscous mode.
677 */
678 *pprev = addr->aua_next;
679 kmem_free(addr, sizeof (aggr_unicst_addr_t));
680 if (port->lp_prom_addr == NULL && !grp->lg_promisc)
681 (void) aggr_port_promisc(port, B_FALSE);
682 } else {
683 ASSERT(port->lp_hwgh != NULL);
684 (void) mac_hwgroup_remmac(port->lp_hwgh, mac_addr);
685 }
686 mac_perim_exit(pmph);
687 }