Print this page
7127 remove -Wno-missing-braces from Makefile.uts
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/uts/common/io/bridge.c
+++ new/usr/src/uts/common/io/bridge.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21
22 22 /*
23 23 * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
24 24 * Use is subject to license terms.
25 25 */
26 26
27 27 /*
28 28 * This module implements a STREAMS driver that provides layer-two (Ethernet)
29 29 * bridging functionality. The STREAMS interface is used to provide
30 30 * observability (snoop/wireshark) and control, but not for interface plumbing.
31 31 */
32 32
33 33 #include <sys/types.h>
34 34 #include <sys/bitmap.h>
35 35 #include <sys/cmn_err.h>
36 36 #include <sys/conf.h>
37 37 #include <sys/ddi.h>
38 38 #include <sys/errno.h>
39 39 #include <sys/kstat.h>
40 40 #include <sys/modctl.h>
41 41 #include <sys/note.h>
42 42 #include <sys/param.h>
43 43 #include <sys/policy.h>
44 44 #include <sys/sdt.h>
45 45 #include <sys/stat.h>
46 46 #include <sys/stream.h>
47 47 #include <sys/stropts.h>
48 48 #include <sys/strsun.h>
49 49 #include <sys/sunddi.h>
50 50 #include <sys/sysmacros.h>
51 51 #include <sys/systm.h>
52 52 #include <sys/time.h>
53 53 #include <sys/dlpi.h>
54 54 #include <sys/dls.h>
55 55 #include <sys/mac_ether.h>
56 56 #include <sys/mac_provider.h>
57 57 #include <sys/mac_client_priv.h>
58 58 #include <sys/mac_impl.h>
59 59 #include <sys/vlan.h>
60 60 #include <net/bridge.h>
61 61 #include <net/bridge_impl.h>
62 62 #include <net/trill.h>
63 63 #include <sys/dld_ioc.h>
64 64
65 65 /*
66 66 * Locks and reference counts: object lifetime and design.
67 67 *
68 68 * bridge_mac_t
69 69 * Bridge mac (snoop) instances are in bmac_list, which is protected by
70 70 * bmac_rwlock. They're allocated by bmac_alloc and freed by bridge_timer().
71 71 * Every bridge_inst_t has a single bridge_mac_t, but when bridge_inst_t goes
72 72 * away, the bridge_mac_t remains until either all of the users go away
73 73 * (detected by a timer) or until the instance is picked up again by the same
74 74 * bridge starting back up.
75 75 *
76 76 * bridge_inst_t
77 77 * Bridge instances are in inst_list, which is protected by inst_lock.
78 78 * They're allocated by inst_alloc() and freed by inst_free(). After
79 79 * allocation, an instance is placed in inst_list, and the reference count is
80 80 * incremented to represent this. That reference is decremented when the
81 81 * BIF_SHUTDOWN flag is set, and no new increments may occur. When the last
82 82 * reference is freed, the instance is removed from the list.
83 83 *
84 84 * Bridge instances have lists of links and an AVL tree of forwarding
85 85 * entries. Each of these structures holds one reference on the bridge
86 86 * instance. These lists and tree are protected by bi_rwlock.
87 87 *
88 88 * bridge_stream_t
89 89 * Bridge streams are allocated by stream_alloc() and freed by stream_free().
90 90 * These streams are created when "bridged" opens /dev/bridgectl, and are
91 91 * used to create new bridge instances (via BRIOC_NEWBRIDGE) and control the
92 92 * links on the bridge. When a stream closes, the bridge instance created is
93 93 * destroyed. There's at most one bridge instance for a given control
94 94 * stream.
95 95 *
96 96 * bridge_link_t
97 97 * Links are allocated by bridge_add_link() and freed by link_free(). The
98 98 * bi_links list holds a reference to the link. When the BLF_DELETED flag is
99 99 * set, that reference is dropped. The link isn't removed from the list
100 100 * until the last reference drops. Each forwarding entry that uses a given
101 101 * link holds a reference, as does each thread transmitting a packet via the
102 102 * link. The MAC layer calls in via bridge_ref_cb() to hold a reference on
103 103 * a link when transmitting.
104 104 *
105 105 * It's important that once BLF_DELETED is set, there's no way for the
106 106 * reference count to increase again. If it can, then the link may be
107 107 * double-freed. The BLF_FREED flag is intended for use with assertions to
108 108 * guard against this in testing.
109 109 *
110 110 * bridge_fwd_t
111 111 * Bridge forwarding entries are allocated by bridge_recv_cb() and freed by
112 112 * fwd_free(). The bi_fwd AVL tree holds one reference to the entry. Unlike
113 113 * other data structures, the reference is dropped when the entry is removed
114 114 * from the tree by fwd_delete(), and the BFF_INTREE flag is removed. Each
115 115 * thread that's forwarding a packet to a known destination holds a reference
116 116 * to a forwarding entry.
117 117 *
118 118 * TRILL notes:
119 119 *
120 120 * The TRILL module does all of its I/O through bridging. It uses references
121 121 * on the bridge_inst_t and bridge_link_t structures, and has seven entry
122 122 * points and four callbacks. One entry point is for setting the callbacks
123 123 * (bridge_trill_register_cb). There are four entry points for taking bridge
124 124 * and link references (bridge_trill_{br,ln}{ref,unref}). The final two
125 125 * entry points are for decapsulated packets from TRILL (bridge_trill_decaps)
126 126 * that need to be bridged locally, and for TRILL-encapsulated output packets
127 127 * (bridge_trill_output).
128 128 *
129 129 * The four callbacks comprise two notification functions for bridges and
130 130 * links being deleted, one function for raw received TRILL packets, and one
131 131 * for bridge output to non-local TRILL destinations (tunnel entry).
132 132 */
133 133
134 134 /*
135 135 * Ethernet reserved multicast addresses for TRILL; used also in TRILL module.
136 136 */
137 137 const uint8_t all_isis_rbridges[] = ALL_ISIS_RBRIDGES;
138 138 static const uint8_t all_esadi_rbridges[] = ALL_ESADI_RBRIDGES;
139 139 const uint8_t bridge_group_address[] = BRIDGE_GROUP_ADDRESS;
140 140
141 141 static const char *inst_kstats_list[] = { KSINST_NAMES };
142 142 static const char *link_kstats_list[] = { KSLINK_NAMES };
143 143
144 144 #define KREF(p, m, vn) p->m.vn.value.ui64
145 145 #define KINCR(p, m, vn) ++KREF(p, m, vn)
146 146 #define KDECR(p, m, vn) --KREF(p, m, vn)
147 147
148 148 #define KIPINCR(p, vn) KINCR(p, bi_kstats, vn)
149 149 #define KIPDECR(p, vn) KDECR(p, bi_kstats, vn)
150 150 #define KLPINCR(p, vn) KINCR(p, bl_kstats, vn)
151 151
152 152 #define KIINCR(vn) KIPINCR(bip, vn)
153 153 #define KIDECR(vn) KIPDECR(bip, vn)
154 154 #define KLINCR(vn) KLPINCR(blp, vn)
155 155
156 156 #define Dim(x) (sizeof (x) / sizeof (*(x)))
157 157
158 158 /* Amount of overhead added when encapsulating with VLAN headers */
159 159 #define VLAN_INCR (sizeof (struct ether_vlan_header) - \
160 160 sizeof (struct ether_header))
161 161
162 162 static dev_info_t *bridge_dev_info;
163 163 static major_t bridge_major;
164 164 static ddi_taskq_t *bridge_taskq;
165 165
166 166 /*
167 167 * These are the bridge instance management data structures. The mutex lock
168 168 * protects the list of bridge instances. A reference count is then used on
169 169 * each instance to determine when to free it. We use mac_minor_hold() to
170 170 * allocate minor_t values, which are used both for self-cloning /dev/net/
171 171 * device nodes as well as client streams. Minor node 0 is reserved for the
172 172 * allocation control node.
173 173 */
174 174 static list_t inst_list;
175 175 static kcondvar_t inst_cv; /* Allows us to wait for shutdown */
176 176 static kmutex_t inst_lock;
177 177
178 178 static krwlock_t bmac_rwlock;
179 179 static list_t bmac_list;
180 180
181 181 /* Wait for taskq entries that use STREAMS */
182 182 static kcondvar_t stream_ref_cv;
183 183 static kmutex_t stream_ref_lock;
184 184
185 185 static timeout_id_t bridge_timerid;
186 186 static clock_t bridge_scan_interval;
187 187 static clock_t bridge_fwd_age;
188 188
189 189 static bridge_inst_t *bridge_find_name(const char *);
190 190 static void bridge_timer(void *);
191 191 static void bridge_unref(bridge_inst_t *);
192 192
193 193 static const uint8_t zero_addr[ETHERADDRL] = { 0 };
194 194
195 195 /* Global TRILL linkage */
196 196 static trill_recv_pkt_t trill_recv_fn;
197 197 static trill_encap_pkt_t trill_encap_fn;
198 198 static trill_br_dstr_t trill_brdstr_fn;
199 199 static trill_ln_dstr_t trill_lndstr_fn;
200 200
201 201 /* special settings to accommodate DLD flow control; see dld_str.c */
202 202 static struct module_info bridge_dld_modinfo = {
203 203 0, /* mi_idnum */
204 204 BRIDGE_DEV_NAME, /* mi_idname */
205 205 0, /* mi_minpsz */
206 206 INFPSZ, /* mi_maxpsz */
207 207 1, /* mi_hiwat */
208 208 0 /* mi_lowat */
209 209 };
210 210
211 211 static struct qinit bridge_dld_rinit = {
212 212 NULL, /* qi_putp */
213 213 NULL, /* qi_srvp */
214 214 dld_open, /* qi_qopen */
215 215 dld_close, /* qi_qclose */
216 216 NULL, /* qi_qadmin */
217 217 &bridge_dld_modinfo, /* qi_minfo */
218 218 NULL /* qi_mstat */
219 219 };
220 220
221 221 static struct qinit bridge_dld_winit = {
222 222 (int (*)())dld_wput, /* qi_putp */
223 223 (int (*)())dld_wsrv, /* qi_srvp */
224 224 NULL, /* qi_qopen */
225 225 NULL, /* qi_qclose */
226 226 NULL, /* qi_qadmin */
227 227 &bridge_dld_modinfo, /* qi_minfo */
228 228 NULL /* qi_mstat */
229 229 };
230 230
231 231 static int bridge_ioc_listfwd(void *, intptr_t, int, cred_t *, int *);
232 232
233 233 /* GLDv3 control ioctls used by Bridging */
234 234 static dld_ioc_info_t bridge_ioc_list[] = {
235 235 {BRIDGE_IOC_LISTFWD, DLDCOPYINOUT, sizeof (bridge_listfwd_t),
236 236 bridge_ioc_listfwd, NULL},
237 237 };
238 238
239 239 /*
240 240 * Given a bridge mac pointer, get a ref-held pointer to the corresponding
241 241 * bridge instance, if any. We must hold the global bmac_rwlock so that
242 242 * bm_inst doesn't slide out from under us.
243 243 */
244 244 static bridge_inst_t *
245 245 mac_to_inst(const bridge_mac_t *bmp)
246 246 {
247 247 bridge_inst_t *bip;
248 248
249 249 rw_enter(&bmac_rwlock, RW_READER);
250 250 if ((bip = bmp->bm_inst) != NULL)
251 251 atomic_inc_uint(&bip->bi_refs);
252 252 rw_exit(&bmac_rwlock);
253 253 return (bip);
254 254 }
255 255
256 256 static void
257 257 link_sdu_fail(bridge_link_t *blp, boolean_t failed, mblk_t **mlist)
258 258 {
259 259 mblk_t *mp;
260 260 bridge_ctl_t *bcp;
261 261 bridge_link_t *blcmp;
262 262 bridge_inst_t *bip;
263 263 bridge_mac_t *bmp;
264 264
265 265 if (failed) {
266 266 if (blp->bl_flags & BLF_SDUFAIL)
267 267 return;
268 268 blp->bl_flags |= BLF_SDUFAIL;
269 269 } else {
270 270 if (!(blp->bl_flags & BLF_SDUFAIL))
271 271 return;
272 272 blp->bl_flags &= ~BLF_SDUFAIL;
273 273 }
274 274
275 275 /*
276 276 * If this link is otherwise up, then check if there are any other
277 277 * non-failed non-down links. If not, then we control the state of the
278 278 * whole bridge.
279 279 */
280 280 bip = blp->bl_inst;
281 281 bmp = bip->bi_mac;
282 282 if (blp->bl_linkstate != LINK_STATE_DOWN) {
283 283 for (blcmp = list_head(&bip->bi_links); blcmp != NULL;
284 284 blcmp = list_next(&bip->bi_links, blcmp)) {
285 285 if (blp != blcmp &&
286 286 !(blcmp->bl_flags & (BLF_DELETED|BLF_SDUFAIL)) &&
287 287 blcmp->bl_linkstate != LINK_STATE_DOWN)
288 288 break;
289 289 }
290 290 if (blcmp == NULL) {
291 291 bmp->bm_linkstate = failed ? LINK_STATE_DOWN :
292 292 LINK_STATE_UP;
293 293 mac_link_redo(bmp->bm_mh, bmp->bm_linkstate);
294 294 }
295 295 }
296 296
297 297 /*
298 298 * If we're becoming failed, then the link's current true state needs
299 299 * to be reflected upwards to this link's clients. If we're becoming
300 300 * unfailed, then we get the state of the bridge instead on all
301 301 * clients.
302 302 */
303 303 if (failed) {
304 304 if (bmp->bm_linkstate != blp->bl_linkstate)
305 305 mac_link_redo(blp->bl_mh, blp->bl_linkstate);
306 306 } else {
307 307 mac_link_redo(blp->bl_mh, bmp->bm_linkstate);
308 308 }
309 309
310 310 /* get the current mblk we're going to send up */
311 311 if ((mp = blp->bl_lfailmp) == NULL &&
312 312 (mp = allocb(sizeof (bridge_ctl_t), BPRI_MED)) == NULL)
313 313 return;
314 314
315 315 /* get a new one for next time */
316 316 blp->bl_lfailmp = allocb(sizeof (bridge_ctl_t), BPRI_MED);
317 317
318 318 /* if none for next time, then report only failures */
319 319 if (blp->bl_lfailmp == NULL && !failed) {
320 320 blp->bl_lfailmp = mp;
321 321 return;
322 322 }
323 323
324 324 /* LINTED: alignment */
325 325 bcp = (bridge_ctl_t *)mp->b_rptr;
326 326 bcp->bc_linkid = blp->bl_linkid;
327 327 bcp->bc_failed = failed;
328 328 mp->b_wptr = (uchar_t *)(bcp + 1);
329 329 mp->b_next = *mlist;
330 330 *mlist = mp;
331 331 }
332 332
333 333 /*
334 334 * Send control messages (link SDU changes) using the stream to the
335 335 * bridge instance daemon.
336 336 */
337 337 static void
338 338 send_up_messages(bridge_inst_t *bip, mblk_t *mp)
339 339 {
340 340 mblk_t *mnext;
341 341 queue_t *rq;
342 342
343 343 rq = bip->bi_control->bs_wq;
344 344 rq = OTHERQ(rq);
345 345 while (mp != NULL) {
346 346 mnext = mp->b_next;
347 347 mp->b_next = NULL;
348 348 putnext(rq, mp);
349 349 mp = mnext;
350 350 }
351 351 }
352 352
353 353 /* ARGSUSED */
354 354 static int
355 355 bridge_m_getstat(void *arg, uint_t stat, uint64_t *val)
356 356 {
357 357 return (ENOTSUP);
358 358 }
359 359
360 360 static int
361 361 bridge_m_start(void *arg)
362 362 {
363 363 bridge_mac_t *bmp = arg;
364 364
365 365 bmp->bm_flags |= BMF_STARTED;
366 366 return (0);
367 367 }
368 368
369 369 static void
370 370 bridge_m_stop(void *arg)
371 371 {
372 372 bridge_mac_t *bmp = arg;
373 373
374 374 bmp->bm_flags &= ~BMF_STARTED;
375 375 }
376 376
377 377 /* ARGSUSED */
378 378 static int
379 379 bridge_m_setpromisc(void *arg, boolean_t on)
380 380 {
381 381 return (0);
382 382 }
383 383
384 384 /* ARGSUSED */
385 385 static int
386 386 bridge_m_multicst(void *arg, boolean_t add, const uint8_t *mca)
387 387 {
388 388 return (0);
389 389 }
390 390
391 391 /* ARGSUSED */
392 392 static int
393 393 bridge_m_unicst(void *arg, const uint8_t *macaddr)
394 394 {
395 395 return (ENOTSUP);
396 396 }
397 397
398 398 static mblk_t *
399 399 bridge_m_tx(void *arg, mblk_t *mp)
400 400 {
401 401 _NOTE(ARGUNUSED(arg));
402 402 freemsgchain(mp);
403 403 return (NULL);
404 404 }
405 405
406 406 /* ARGSUSED */
407 407 static int
408 408 bridge_ioc_listfwd(void *karg, intptr_t arg, int mode, cred_t *cred, int *rvalp)
409 409 {
410 410 bridge_listfwd_t *blf = karg;
411 411 bridge_inst_t *bip;
412 412 bridge_fwd_t *bfp, match;
413 413 avl_index_t where;
414 414
415 415 bip = bridge_find_name(blf->blf_name);
416 416 if (bip == NULL)
417 417 return (ENOENT);
418 418
419 419 bcopy(blf->blf_dest, match.bf_dest, ETHERADDRL);
420 420 match.bf_flags |= BFF_VLANLOCAL;
421 421 rw_enter(&bip->bi_rwlock, RW_READER);
422 422 if ((bfp = avl_find(&bip->bi_fwd, &match, &where)) == NULL)
423 423 bfp = avl_nearest(&bip->bi_fwd, where, AVL_AFTER);
424 424 else
425 425 bfp = AVL_NEXT(&bip->bi_fwd, bfp);
426 426 if (bfp == NULL) {
427 427 bzero(blf, sizeof (*blf));
428 428 } else {
429 429 bcopy(bfp->bf_dest, blf->blf_dest, ETHERADDRL);
430 430 blf->blf_trill_nick = bfp->bf_trill_nick;
431 431 blf->blf_ms_age =
432 432 drv_hztousec(ddi_get_lbolt() - bfp->bf_lastheard) / 1000;
433 433 blf->blf_is_local =
434 434 (bfp->bf_flags & BFF_LOCALADDR) != 0;
435 435 blf->blf_linkid = bfp->bf_links[0]->bl_linkid;
436 436 }
437 437 rw_exit(&bip->bi_rwlock);
438 438 bridge_unref(bip);
439 439 return (0);
440 440 }
441 441
442 442 static int
443 443 bridge_m_setprop(void *arg, const char *pr_name, mac_prop_id_t pr_num,
444 444 uint_t pr_valsize, const void *pr_val)
445 445 {
446 446 bridge_mac_t *bmp = arg;
447 447 bridge_inst_t *bip;
448 448 bridge_link_t *blp;
449 449 int err;
450 450 uint_t maxsdu;
451 451 mblk_t *mlist;
452 452
453 453 _NOTE(ARGUNUSED(pr_name));
454 454 switch (pr_num) {
455 455 case MAC_PROP_MTU:
456 456 if (pr_valsize < sizeof (bmp->bm_maxsdu)) {
457 457 err = EINVAL;
458 458 break;
459 459 }
460 460 (void) bcopy(pr_val, &maxsdu, sizeof (maxsdu));
461 461 if (maxsdu == bmp->bm_maxsdu) {
462 462 err = 0;
463 463 } else if ((bip = mac_to_inst(bmp)) == NULL) {
464 464 err = ENXIO;
465 465 } else {
466 466 rw_enter(&bip->bi_rwlock, RW_WRITER);
467 467 mlist = NULL;
468 468 for (blp = list_head(&bip->bi_links); blp != NULL;
469 469 blp = list_next(&bip->bi_links, blp)) {
470 470 if (blp->bl_flags & BLF_DELETED)
471 471 continue;
472 472 if (blp->bl_maxsdu == maxsdu)
473 473 link_sdu_fail(blp, B_FALSE, &mlist);
474 474 else if (blp->bl_maxsdu == bmp->bm_maxsdu)
475 475 link_sdu_fail(blp, B_TRUE, &mlist);
476 476 }
477 477 rw_exit(&bip->bi_rwlock);
478 478 bmp->bm_maxsdu = maxsdu;
479 479 (void) mac_maxsdu_update(bmp->bm_mh, maxsdu);
480 480 send_up_messages(bip, mlist);
481 481 bridge_unref(bip);
482 482 err = 0;
483 483 }
484 484 break;
485 485
486 486 default:
487 487 err = ENOTSUP;
488 488 break;
489 489 }
490 490 return (err);
491 491 }
492 492
493 493 static int
494 494 bridge_m_getprop(void *arg, const char *pr_name, mac_prop_id_t pr_num,
495 495 uint_t pr_valsize, void *pr_val)
496 496 {
497 497 bridge_mac_t *bmp = arg;
498 498 int err = 0;
499 499
500 500 _NOTE(ARGUNUSED(pr_name));
501 501 switch (pr_num) {
502 502 case MAC_PROP_STATUS:
503 503 ASSERT(pr_valsize >= sizeof (bmp->bm_linkstate));
504 504 bcopy(&bmp->bm_linkstate, pr_val, sizeof (&bmp->bm_linkstate));
505 505 break;
506 506
507 507 default:
508 508 err = ENOTSUP;
509 509 break;
510 510 }
511 511 return (err);
512 512 }
513 513
514 514 static void
515 515 bridge_m_propinfo(void *arg, const char *pr_name, mac_prop_id_t pr_num,
516 516 mac_prop_info_handle_t prh)
517 517 {
518 518 bridge_mac_t *bmp = arg;
519 519
520 520 _NOTE(ARGUNUSED(pr_name));
521 521
522 522 switch (pr_num) {
523 523 case MAC_PROP_MTU:
524 524 mac_prop_info_set_range_uint32(prh, bmp->bm_maxsdu,
525 525 bmp->bm_maxsdu);
526 526 break;
527 527 case MAC_PROP_STATUS:
528 528 mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ);
529 529 break;
530 530 }
531 531 }
532 532
533 533 static mac_callbacks_t bridge_m_callbacks = {
534 534 MC_SETPROP | MC_GETPROP | MC_PROPINFO,
535 535 bridge_m_getstat,
536 536 bridge_m_start,
537 537 bridge_m_stop,
538 538 bridge_m_setpromisc,
539 539 bridge_m_multicst,
540 540 bridge_m_unicst,
541 541 bridge_m_tx,
542 542 NULL, /* reserved */
543 543 NULL, /* ioctl */
544 544 NULL, /* getcapab */
545 545 NULL, /* open */
546 546 NULL, /* close */
547 547 bridge_m_setprop,
548 548 bridge_m_getprop,
549 549 bridge_m_propinfo
550 550 };
551 551
552 552 /*
553 553 * Create kstats from a list.
554 554 */
555 555 static kstat_t *
556 556 kstat_setup(kstat_named_t *knt, const char **names, int nstat,
557 557 const char *unitname)
558 558 {
559 559 kstat_t *ksp;
560 560 int i;
561 561
562 562 for (i = 0; i < nstat; i++)
563 563 kstat_named_init(&knt[i], names[i], KSTAT_DATA_UINT64);
564 564
565 565 ksp = kstat_create_zone(BRIDGE_DEV_NAME, 0, unitname, "net",
566 566 KSTAT_TYPE_NAMED, nstat, KSTAT_FLAG_VIRTUAL, GLOBAL_ZONEID);
567 567 if (ksp != NULL) {
568 568 ksp->ks_data = knt;
569 569 kstat_install(ksp);
570 570 }
571 571 return (ksp);
572 572 }
573 573
574 574 /*
575 575 * Find an existing bridge_mac_t structure or allocate a new one for the given
576 576 * bridge instance. This creates the mac driver instance that snoop can use.
577 577 */
578 578 static int
579 579 bmac_alloc(bridge_inst_t *bip, bridge_mac_t **bmacp)
580 580 {
581 581 bridge_mac_t *bmp, *bnew;
582 582 mac_register_t *mac;
583 583 int err;
584 584
585 585 *bmacp = NULL;
586 586 if ((mac = mac_alloc(MAC_VERSION)) == NULL)
587 587 return (EINVAL);
588 588
589 589 bnew = kmem_zalloc(sizeof (*bnew), KM_SLEEP);
590 590
591 591 rw_enter(&bmac_rwlock, RW_WRITER);
592 592 for (bmp = list_head(&bmac_list); bmp != NULL;
593 593 bmp = list_next(&bmac_list, bmp)) {
594 594 if (strcmp(bip->bi_name, bmp->bm_name) == 0) {
595 595 ASSERT(bmp->bm_inst == NULL);
596 596 bmp->bm_inst = bip;
597 597 rw_exit(&bmac_rwlock);
598 598 kmem_free(bnew, sizeof (*bnew));
599 599 mac_free(mac);
600 600 *bmacp = bmp;
601 601 return (0);
602 602 }
603 603 }
604 604
605 605 mac->m_type_ident = MAC_PLUGIN_IDENT_ETHER;
606 606 mac->m_driver = bnew;
607 607 mac->m_dip = bridge_dev_info;
608 608 mac->m_instance = (uint_t)-1;
609 609 mac->m_src_addr = (uint8_t *)zero_addr;
610 610 mac->m_callbacks = &bridge_m_callbacks;
611 611
612 612 /*
613 613 * Note that the SDU limits are irrelevant, as nobody transmits on the
614 614 * bridge node itself. It's mainly for monitoring but we allow
615 615 * setting the bridge MTU for quick transition of all links part of the
616 616 * bridge to a new MTU.
617 617 */
618 618 mac->m_min_sdu = 1;
619 619 mac->m_max_sdu = 1500;
620 620 err = mac_register(mac, &bnew->bm_mh);
621 621 mac_free(mac);
622 622 if (err != 0) {
623 623 rw_exit(&bmac_rwlock);
624 624 kmem_free(bnew, sizeof (*bnew));
625 625 return (err);
626 626 }
627 627
628 628 bnew->bm_inst = bip;
629 629 (void) strcpy(bnew->bm_name, bip->bi_name);
630 630 if (list_is_empty(&bmac_list)) {
631 631 bridge_timerid = timeout(bridge_timer, NULL,
632 632 bridge_scan_interval);
633 633 }
634 634 list_insert_tail(&bmac_list, bnew);
635 635 rw_exit(&bmac_rwlock);
636 636
637 637 /*
638 638 * Mark the MAC as unable to go "active" so that only passive clients
639 639 * (such as snoop) can bind to it.
640 640 */
641 641 mac_no_active(bnew->bm_mh);
642 642 *bmacp = bnew;
643 643 return (0);
644 644 }
645 645
646 646 /*
647 647 * Disconnect the given bridge_mac_t from its bridge instance. The bridge
648 648 * instance is going away. The mac instance can't go away until the clients
649 649 * are gone (see bridge_timer).
650 650 */
651 651 static void
652 652 bmac_disconnect(bridge_mac_t *bmp)
653 653 {
654 654 bridge_inst_t *bip;
655 655
656 656 bmp->bm_linkstate = LINK_STATE_DOWN;
657 657 mac_link_redo(bmp->bm_mh, LINK_STATE_DOWN);
658 658
659 659 rw_enter(&bmac_rwlock, RW_READER);
660 660 bip = bmp->bm_inst;
661 661 bip->bi_mac = NULL;
662 662 bmp->bm_inst = NULL;
663 663 rw_exit(&bmac_rwlock);
664 664 }
665 665
666 666 /* This is used by the avl trees to sort forwarding table entries */
667 667 static int
668 668 fwd_compare(const void *addr1, const void *addr2)
669 669 {
670 670 const bridge_fwd_t *fwd1 = addr1;
671 671 const bridge_fwd_t *fwd2 = addr2;
672 672 int diff = memcmp(fwd1->bf_dest, fwd2->bf_dest, ETHERADDRL);
673 673
674 674 if (diff != 0)
675 675 return (diff > 0 ? 1 : -1);
676 676
677 677 if ((fwd1->bf_flags ^ fwd2->bf_flags) & BFF_VLANLOCAL) {
678 678 if (fwd1->bf_vlanid > fwd2->bf_vlanid)
679 679 return (1);
680 680 else if (fwd1->bf_vlanid < fwd2->bf_vlanid)
681 681 return (-1);
682 682 }
683 683 return (0);
684 684 }
685 685
686 686 static void
687 687 inst_free(bridge_inst_t *bip)
688 688 {
689 689 ASSERT(bip->bi_mac == NULL);
690 690 rw_destroy(&bip->bi_rwlock);
691 691 list_destroy(&bip->bi_links);
692 692 cv_destroy(&bip->bi_linkwait);
693 693 avl_destroy(&bip->bi_fwd);
694 694 if (bip->bi_ksp != NULL)
695 695 kstat_delete(bip->bi_ksp);
696 696 kmem_free(bip, sizeof (*bip));
697 697 }
698 698
699 699 static bridge_inst_t *
700 700 inst_alloc(const char *bridge)
701 701 {
702 702 bridge_inst_t *bip;
703 703
704 704 bip = kmem_zalloc(sizeof (*bip), KM_SLEEP);
705 705 bip->bi_refs = 1;
706 706 (void) strcpy(bip->bi_name, bridge);
707 707 rw_init(&bip->bi_rwlock, NULL, RW_DRIVER, NULL);
708 708 list_create(&bip->bi_links, sizeof (bridge_link_t),
709 709 offsetof(bridge_link_t, bl_node));
710 710 cv_init(&bip->bi_linkwait, NULL, CV_DRIVER, NULL);
711 711 avl_create(&bip->bi_fwd, fwd_compare, sizeof (bridge_fwd_t),
712 712 offsetof(bridge_fwd_t, bf_node));
713 713 return (bip);
714 714 }
715 715
716 716 static bridge_inst_t *
717 717 bridge_find_name(const char *bridge)
718 718 {
719 719 bridge_inst_t *bip;
720 720
721 721 mutex_enter(&inst_lock);
722 722 for (bip = list_head(&inst_list); bip != NULL;
723 723 bip = list_next(&inst_list, bip)) {
724 724 if (!(bip->bi_flags & BIF_SHUTDOWN) &&
725 725 strcmp(bridge, bip->bi_name) == 0) {
726 726 atomic_inc_uint(&bip->bi_refs);
727 727 break;
728 728 }
729 729 }
730 730 mutex_exit(&inst_lock);
731 731
732 732 return (bip);
733 733 }
734 734
735 735 static int
736 736 bridge_create(datalink_id_t linkid, const char *bridge, bridge_inst_t **bipc,
737 737 cred_t *cred)
738 738 {
739 739 bridge_inst_t *bip, *bipnew;
740 740 bridge_mac_t *bmp = NULL;
741 741 int err;
742 742
743 743 *bipc = NULL;
744 744 bipnew = inst_alloc(bridge);
745 745
746 746 mutex_enter(&inst_lock);
747 747 lookup_retry:
748 748 for (bip = list_head(&inst_list); bip != NULL;
749 749 bip = list_next(&inst_list, bip)) {
750 750 if (strcmp(bridge, bip->bi_name) == 0)
751 751 break;
752 752 }
753 753
754 754 /* This should not take long; if it does, we've got a design problem */
755 755 if (bip != NULL && (bip->bi_flags & BIF_SHUTDOWN)) {
756 756 cv_wait(&inst_cv, &inst_lock);
757 757 goto lookup_retry;
758 758 }
759 759
760 760 if (bip == NULL) {
761 761 bip = bipnew;
762 762 bipnew = NULL;
763 763 list_insert_tail(&inst_list, bip);
764 764 }
765 765
766 766 mutex_exit(&inst_lock);
767 767 if (bipnew != NULL) {
768 768 inst_free(bipnew);
769 769 return (EEXIST);
770 770 }
771 771
772 772 bip->bi_ksp = kstat_setup((kstat_named_t *)&bip->bi_kstats,
773 773 inst_kstats_list, Dim(inst_kstats_list), bip->bi_name);
774 774
775 775 err = bmac_alloc(bip, &bmp);
776 776 if ((bip->bi_mac = bmp) == NULL)
777 777 goto fail_create;
778 778
779 779 /*
780 780 * bm_inst is set, so the timer cannot yank the DLS rug from under us.
781 781 * No extra locking is needed here.
782 782 */
783 783 if (!(bmp->bm_flags & BMF_DLS)) {
784 784 err = dls_devnet_create(bmp->bm_mh, linkid, crgetzoneid(cred));
785 785 if (err != 0)
786 786 goto fail_create;
787 787 bmp->bm_flags |= BMF_DLS;
788 788 }
789 789
790 790 bip->bi_dev = makedevice(bridge_major, mac_minor(bmp->bm_mh));
791 791 *bipc = bip;
792 792 return (0);
793 793
794 794 fail_create:
795 795 ASSERT(bip->bi_trilldata == NULL);
796 796 bip->bi_flags |= BIF_SHUTDOWN;
797 797 bridge_unref(bip);
798 798 return (err);
799 799 }
800 800
801 801 static void
802 802 bridge_unref(bridge_inst_t *bip)
803 803 {
804 804 if (atomic_dec_uint_nv(&bip->bi_refs) == 0) {
805 805 ASSERT(bip->bi_flags & BIF_SHUTDOWN);
806 806 /* free up mac for reuse before leaving global list */
807 807 if (bip->bi_mac != NULL)
808 808 bmac_disconnect(bip->bi_mac);
809 809 mutex_enter(&inst_lock);
810 810 list_remove(&inst_list, bip);
811 811 cv_broadcast(&inst_cv);
812 812 mutex_exit(&inst_lock);
813 813 inst_free(bip);
814 814 }
815 815 }
816 816
817 817 /*
818 818 * Stream instances are used only for allocating bridges and serving as a
819 819 * control node. They serve no data-handling function.
820 820 */
821 821 static bridge_stream_t *
822 822 stream_alloc(void)
823 823 {
824 824 bridge_stream_t *bsp;
825 825 minor_t mn;
826 826
827 827 if ((mn = mac_minor_hold(B_FALSE)) == 0)
828 828 return (NULL);
829 829 bsp = kmem_zalloc(sizeof (*bsp), KM_SLEEP);
830 830 bsp->bs_minor = mn;
831 831 return (bsp);
832 832 }
833 833
834 834 static void
835 835 stream_free(bridge_stream_t *bsp)
836 836 {
837 837 mac_minor_rele(bsp->bs_minor);
838 838 kmem_free(bsp, sizeof (*bsp));
839 839 }
840 840
841 841 /* Reference hold/release functions for STREAMS-related taskq */
842 842 static void
843 843 stream_ref(bridge_stream_t *bsp)
844 844 {
845 845 mutex_enter(&stream_ref_lock);
846 846 bsp->bs_taskq_cnt++;
847 847 mutex_exit(&stream_ref_lock);
848 848 }
849 849
850 850 static void
851 851 stream_unref(bridge_stream_t *bsp)
852 852 {
853 853 mutex_enter(&stream_ref_lock);
854 854 if (--bsp->bs_taskq_cnt == 0)
855 855 cv_broadcast(&stream_ref_cv);
856 856 mutex_exit(&stream_ref_lock);
857 857 }
858 858
859 859 static void
860 860 link_free(bridge_link_t *blp)
861 861 {
862 862 bridge_inst_t *bip = blp->bl_inst;
863 863
864 864 ASSERT(!(blp->bl_flags & BLF_FREED));
865 865 blp->bl_flags |= BLF_FREED;
866 866 if (blp->bl_ksp != NULL)
867 867 kstat_delete(blp->bl_ksp);
868 868 if (blp->bl_lfailmp != NULL)
869 869 freeb(blp->bl_lfailmp);
870 870 cv_destroy(&blp->bl_trillwait);
871 871 mutex_destroy(&blp->bl_trilllock);
872 872 kmem_free(blp, sizeof (*blp));
873 873 /* Don't unreference the bridge until the MAC is closed */
874 874 bridge_unref(bip);
875 875 }
876 876
877 877 static void
878 878 link_unref(bridge_link_t *blp)
879 879 {
880 880 if (atomic_dec_uint_nv(&blp->bl_refs) == 0) {
881 881 bridge_inst_t *bip = blp->bl_inst;
882 882
883 883 ASSERT(blp->bl_flags & BLF_DELETED);
884 884 rw_enter(&bip->bi_rwlock, RW_WRITER);
885 885 if (blp->bl_flags & BLF_LINK_ADDED)
886 886 list_remove(&bip->bi_links, blp);
887 887 rw_exit(&bip->bi_rwlock);
888 888 if (bip->bi_trilldata != NULL && list_is_empty(&bip->bi_links))
889 889 cv_broadcast(&bip->bi_linkwait);
890 890 link_free(blp);
891 891 }
892 892 }
893 893
894 894 static bridge_fwd_t *
895 895 fwd_alloc(const uint8_t *addr, uint_t nlinks, uint16_t nick)
896 896 {
897 897 bridge_fwd_t *bfp;
898 898
899 899 bfp = kmem_zalloc(sizeof (*bfp) + (nlinks * sizeof (bridge_link_t *)),
900 900 KM_NOSLEEP);
901 901 if (bfp != NULL) {
902 902 bcopy(addr, bfp->bf_dest, ETHERADDRL);
903 903 bfp->bf_lastheard = ddi_get_lbolt();
904 904 bfp->bf_maxlinks = nlinks;
905 905 bfp->bf_links = (bridge_link_t **)(bfp + 1);
906 906 bfp->bf_trill_nick = nick;
907 907 }
908 908 return (bfp);
909 909 }
910 910
911 911 static bridge_fwd_t *
912 912 fwd_find(bridge_inst_t *bip, const uint8_t *addr, uint16_t vlanid)
913 913 {
914 914 bridge_fwd_t *bfp, *vbfp;
915 915 bridge_fwd_t match;
916 916
917 917 bcopy(addr, match.bf_dest, ETHERADDRL);
918 918 match.bf_flags = 0;
919 919 rw_enter(&bip->bi_rwlock, RW_READER);
920 920 if ((bfp = avl_find(&bip->bi_fwd, &match, NULL)) != NULL) {
921 921 if (bfp->bf_vlanid != vlanid && bfp->bf_vcnt > 0) {
922 922 match.bf_vlanid = vlanid;
923 923 match.bf_flags = BFF_VLANLOCAL;
924 924 vbfp = avl_find(&bip->bi_fwd, &match, NULL);
925 925 if (vbfp != NULL)
926 926 bfp = vbfp;
927 927 }
928 928 atomic_inc_uint(&bfp->bf_refs);
929 929 }
930 930 rw_exit(&bip->bi_rwlock);
931 931 return (bfp);
932 932 }
933 933
934 934 static void
935 935 fwd_free(bridge_fwd_t *bfp)
936 936 {
937 937 uint_t i;
938 938 bridge_inst_t *bip = bfp->bf_links[0]->bl_inst;
939 939
940 940 KIDECR(bki_count);
941 941 for (i = 0; i < bfp->bf_nlinks; i++)
942 942 link_unref(bfp->bf_links[i]);
943 943 kmem_free(bfp,
944 944 sizeof (*bfp) + bfp->bf_maxlinks * sizeof (bridge_link_t *));
945 945 }
946 946
947 947 static void
948 948 fwd_unref(bridge_fwd_t *bfp)
949 949 {
950 950 if (atomic_dec_uint_nv(&bfp->bf_refs) == 0) {
951 951 ASSERT(!(bfp->bf_flags & BFF_INTREE));
952 952 fwd_free(bfp);
953 953 }
954 954 }
955 955
956 956 static void
957 957 fwd_delete(bridge_fwd_t *bfp)
958 958 {
959 959 bridge_inst_t *bip;
960 960 bridge_fwd_t *bfpzero;
961 961
962 962 if (bfp->bf_flags & BFF_INTREE) {
963 963 ASSERT(bfp->bf_nlinks > 0);
964 964 bip = bfp->bf_links[0]->bl_inst;
965 965 rw_enter(&bip->bi_rwlock, RW_WRITER);
966 966 /* Another thread could beat us to this */
967 967 if (bfp->bf_flags & BFF_INTREE) {
968 968 avl_remove(&bip->bi_fwd, bfp);
969 969 bfp->bf_flags &= ~BFF_INTREE;
970 970 if (bfp->bf_flags & BFF_VLANLOCAL) {
971 971 bfp->bf_flags &= ~BFF_VLANLOCAL;
972 972 bfpzero = avl_find(&bip->bi_fwd, bfp, NULL);
973 973 if (bfpzero != NULL && bfpzero->bf_vcnt > 0)
974 974 bfpzero->bf_vcnt--;
975 975 }
976 976 rw_exit(&bip->bi_rwlock);
977 977 fwd_unref(bfp); /* no longer in avl tree */
978 978 } else {
979 979 rw_exit(&bip->bi_rwlock);
980 980 }
981 981 }
982 982 }
983 983
984 984 static boolean_t
985 985 fwd_insert(bridge_inst_t *bip, bridge_fwd_t *bfp)
986 986 {
987 987 avl_index_t idx;
988 988 boolean_t retv;
989 989
990 990 rw_enter(&bip->bi_rwlock, RW_WRITER);
991 991 if (!(bip->bi_flags & BIF_SHUTDOWN) &&
992 992 avl_numnodes(&bip->bi_fwd) < bip->bi_tablemax &&
993 993 avl_find(&bip->bi_fwd, bfp, &idx) == NULL) {
994 994 avl_insert(&bip->bi_fwd, bfp, idx);
995 995 bfp->bf_flags |= BFF_INTREE;
996 996 atomic_inc_uint(&bfp->bf_refs); /* avl entry */
997 997 retv = B_TRUE;
998 998 } else {
999 999 retv = B_FALSE;
1000 1000 }
1001 1001 rw_exit(&bip->bi_rwlock);
1002 1002 return (retv);
1003 1003 }
1004 1004
1005 1005 static void
1006 1006 fwd_update_local(bridge_link_t *blp, const uint8_t *oldaddr,
1007 1007 const uint8_t *newaddr)
1008 1008 {
1009 1009 bridge_inst_t *bip = blp->bl_inst;
1010 1010 bridge_fwd_t *bfp, *bfnew;
1011 1011 bridge_fwd_t match;
1012 1012 avl_index_t idx;
1013 1013 boolean_t drop_ref = B_FALSE;
1014 1014
1015 1015 if (bcmp(oldaddr, newaddr, ETHERADDRL) == 0)
1016 1016 return;
1017 1017
1018 1018 if (bcmp(oldaddr, zero_addr, ETHERADDRL) == 0)
1019 1019 goto no_old_addr;
1020 1020
1021 1021 /*
1022 1022 * Find the previous entry, and remove our link from it.
1023 1023 */
1024 1024 bcopy(oldaddr, match.bf_dest, ETHERADDRL);
1025 1025 rw_enter(&bip->bi_rwlock, RW_WRITER);
1026 1026 if ((bfp = avl_find(&bip->bi_fwd, &match, NULL)) != NULL) {
1027 1027 int i;
1028 1028
1029 1029 /*
1030 1030 * See if we're in the list, and remove if so.
1031 1031 */
1032 1032 for (i = 0; i < bfp->bf_nlinks; i++) {
1033 1033 if (bfp->bf_links[i] == blp) {
1034 1034 /*
1035 1035 * We assume writes are atomic, so no special
1036 1036 * MT handling is needed. The list length is
1037 1037 * decremented first, and then we remove
1038 1038 * entries.
1039 1039 */
1040 1040 bfp->bf_nlinks--;
1041 1041 for (; i < bfp->bf_nlinks; i++)
1042 1042 bfp->bf_links[i] = bfp->bf_links[i + 1];
1043 1043 drop_ref = B_TRUE;
1044 1044 break;
1045 1045 }
1046 1046 }
1047 1047 /* If no more links, then remove and free up */
1048 1048 if (bfp->bf_nlinks == 0) {
1049 1049 avl_remove(&bip->bi_fwd, bfp);
1050 1050 bfp->bf_flags &= ~BFF_INTREE;
1051 1051 } else {
1052 1052 bfp = NULL;
1053 1053 }
1054 1054 }
1055 1055 rw_exit(&bip->bi_rwlock);
1056 1056 if (bfp != NULL)
1057 1057 fwd_unref(bfp); /* no longer in avl tree */
1058 1058
1059 1059 /*
1060 1060 * Now get the new link address and add this link to the list. The
1061 1061 * list should be of length 1 unless the user has configured multiple
1062 1062 * NICs with the same address. (That's an incorrect configuration, but
1063 1063 * we support it anyway.)
1064 1064 */
1065 1065 no_old_addr:
1066 1066 bfp = NULL;
1067 1067 if ((bip->bi_flags & BIF_SHUTDOWN) ||
1068 1068 bcmp(newaddr, zero_addr, ETHERADDRL) == 0)
1069 1069 goto no_new_addr;
1070 1070
1071 1071 bcopy(newaddr, match.bf_dest, ETHERADDRL);
1072 1072 rw_enter(&bip->bi_rwlock, RW_WRITER);
1073 1073 if ((bfp = avl_find(&bip->bi_fwd, &match, &idx)) == NULL) {
1074 1074 bfnew = fwd_alloc(newaddr, 1, RBRIDGE_NICKNAME_NONE);
1075 1075 if (bfnew != NULL)
1076 1076 KIINCR(bki_count);
1077 1077 } else if (bfp->bf_nlinks < bfp->bf_maxlinks) {
1078 1078 /* special case: link fits in existing entry */
1079 1079 bfnew = bfp;
1080 1080 } else {
1081 1081 bfnew = fwd_alloc(newaddr, bfp->bf_nlinks + 1,
1082 1082 RBRIDGE_NICKNAME_NONE);
1083 1083 if (bfnew != NULL) {
1084 1084 KIINCR(bki_count);
1085 1085 avl_remove(&bip->bi_fwd, bfp);
1086 1086 bfp->bf_flags &= ~BFF_INTREE;
1087 1087 bfnew->bf_nlinks = bfp->bf_nlinks;
1088 1088 bcopy(bfp->bf_links, bfnew->bf_links,
1089 1089 bfp->bf_nlinks * sizeof (bfp));
1090 1090 /* reset the idx value due to removal above */
1091 1091 (void) avl_find(&bip->bi_fwd, &match, &idx);
1092 1092 }
1093 1093 }
1094 1094
1095 1095 if (bfnew != NULL) {
1096 1096 bfnew->bf_links[bfnew->bf_nlinks++] = blp;
1097 1097 if (drop_ref)
1098 1098 drop_ref = B_FALSE;
1099 1099 else
1100 1100 atomic_inc_uint(&blp->bl_refs); /* bf_links entry */
1101 1101
1102 1102 if (bfnew != bfp) {
1103 1103 /* local addresses are not subject to table limits */
1104 1104 avl_insert(&bip->bi_fwd, bfnew, idx);
1105 1105 bfnew->bf_flags |= (BFF_INTREE | BFF_LOCALADDR);
1106 1106 atomic_inc_uint(&bfnew->bf_refs); /* avl entry */
1107 1107 }
1108 1108 }
1109 1109 rw_exit(&bip->bi_rwlock);
1110 1110
1111 1111 no_new_addr:
1112 1112 /*
1113 1113 * If we found an existing entry and we replaced it with a new one,
1114 1114 * then drop the table reference from the old one. We removed it from
1115 1115 * the AVL tree above.
1116 1116 */
1117 1117 if (bfnew != NULL && bfp != NULL && bfnew != bfp)
1118 1118 fwd_unref(bfp);
1119 1119
1120 1120 /* Account for removed entry. */
1121 1121 if (drop_ref)
1122 1122 link_unref(blp);
1123 1123 }
1124 1124
1125 1125 static void
1126 1126 bridge_new_unicst(bridge_link_t *blp)
1127 1127 {
1128 1128 uint8_t new_mac[ETHERADDRL];
1129 1129
1130 1130 mac_unicast_primary_get(blp->bl_mh, new_mac);
1131 1131 fwd_update_local(blp, blp->bl_local_mac, new_mac);
1132 1132 bcopy(new_mac, blp->bl_local_mac, ETHERADDRL);
1133 1133 }
1134 1134
1135 1135 /*
1136 1136 * We must shut down a link prior to freeing it, and doing that requires
1137 1137 * blocking to wait for running MAC threads while holding a reference. This is
1138 1138 * run from a taskq to accomplish proper link shutdown followed by reference
1139 1139 * drop.
1140 1140 */
1141 1141 static void
1142 1142 link_shutdown(void *arg)
1143 1143 {
1144 1144 bridge_link_t *blp = arg;
1145 1145 mac_handle_t mh = blp->bl_mh;
1146 1146 bridge_inst_t *bip;
1147 1147 bridge_fwd_t *bfp, *bfnext;
1148 1148 avl_tree_t fwd_scavenge;
1149 1149 int i;
1150 1150
1151 1151 /*
1152 1152 * This link is being destroyed. Notify TRILL now that it's no longer
1153 1153 * possible to send packets. Data packets may still arrive until TRILL
1154 1154 * calls bridge_trill_lnunref.
1155 1155 */
1156 1156 if (blp->bl_trilldata != NULL)
1157 1157 trill_lndstr_fn(blp->bl_trilldata, blp);
1158 1158
1159 1159 if (blp->bl_flags & BLF_PROM_ADDED)
1160 1160 (void) mac_promisc_remove(blp->bl_mphp);
1161 1161
1162 1162 if (blp->bl_flags & BLF_SET_BRIDGE)
1163 1163 mac_bridge_clear(mh, (mac_handle_t)blp);
1164 1164
1165 1165 if (blp->bl_flags & BLF_MARGIN_ADDED) {
1166 1166 (void) mac_notify_remove(blp->bl_mnh, B_TRUE);
1167 1167 (void) mac_margin_remove(mh, blp->bl_margin);
1168 1168 }
1169 1169
1170 1170 /* Tell the clients the real link state when we leave */
1171 1171 mac_link_redo(blp->bl_mh,
1172 1172 mac_stat_get(blp->bl_mh, MAC_STAT_LOWLINK_STATE));
1173 1173
1174 1174 /* Destroy all of the forwarding entries related to this link */
1175 1175 avl_create(&fwd_scavenge, fwd_compare, sizeof (bridge_fwd_t),
1176 1176 offsetof(bridge_fwd_t, bf_node));
1177 1177 bip = blp->bl_inst;
1178 1178 rw_enter(&bip->bi_rwlock, RW_WRITER);
1179 1179 bfnext = avl_first(&bip->bi_fwd);
1180 1180 while ((bfp = bfnext) != NULL) {
1181 1181 bfnext = AVL_NEXT(&bip->bi_fwd, bfp);
1182 1182 for (i = 0; i < bfp->bf_nlinks; i++) {
1183 1183 if (bfp->bf_links[i] == blp)
1184 1184 break;
1185 1185 }
1186 1186 if (i >= bfp->bf_nlinks)
1187 1187 continue;
1188 1188 if (bfp->bf_nlinks > 1) {
1189 1189 /* note that this can't be the last reference */
1190 1190 link_unref(blp);
1191 1191 bfp->bf_nlinks--;
1192 1192 for (; i < bfp->bf_nlinks; i++)
1193 1193 bfp->bf_links[i] = bfp->bf_links[i + 1];
1194 1194 } else {
1195 1195 ASSERT(bfp->bf_flags & BFF_INTREE);
1196 1196 avl_remove(&bip->bi_fwd, bfp);
1197 1197 bfp->bf_flags &= ~BFF_INTREE;
1198 1198 avl_add(&fwd_scavenge, bfp);
1199 1199 }
1200 1200 }
1201 1201 rw_exit(&bip->bi_rwlock);
1202 1202 bfnext = avl_first(&fwd_scavenge);
1203 1203 while ((bfp = bfnext) != NULL) {
1204 1204 bfnext = AVL_NEXT(&fwd_scavenge, bfp);
1205 1205 avl_remove(&fwd_scavenge, bfp);
1206 1206 fwd_unref(bfp);
1207 1207 }
1208 1208 avl_destroy(&fwd_scavenge);
1209 1209
1210 1210 if (blp->bl_flags & BLF_CLIENT_OPEN)
1211 1211 mac_client_close(blp->bl_mch, 0);
1212 1212
1213 1213 mac_close(mh);
1214 1214
1215 1215 /*
1216 1216 * We are now completely removed from the active list, so drop the
1217 1217 * reference (see bridge_add_link).
1218 1218 */
1219 1219 link_unref(blp);
1220 1220 }
1221 1221
1222 1222 static void
1223 1223 shutdown_inst(bridge_inst_t *bip)
1224 1224 {
1225 1225 bridge_link_t *blp, *blnext;
1226 1226 bridge_fwd_t *bfp;
1227 1227
1228 1228 mutex_enter(&inst_lock);
1229 1229 if (bip->bi_flags & BIF_SHUTDOWN) {
1230 1230 mutex_exit(&inst_lock);
1231 1231 return;
1232 1232 }
1233 1233
1234 1234 /*
1235 1235 * Once on the inst_list, the bridge instance must not leave that list
1236 1236 * without having the shutdown flag set first. When the shutdown flag
1237 1237 * is set, we own the list reference, so we must drop it before
1238 1238 * returning.
1239 1239 */
1240 1240 bip->bi_flags |= BIF_SHUTDOWN;
1241 1241 mutex_exit(&inst_lock);
1242 1242
1243 1243 bip->bi_control = NULL;
1244 1244
1245 1245 rw_enter(&bip->bi_rwlock, RW_READER);
1246 1246 blnext = list_head(&bip->bi_links);
1247 1247 while ((blp = blnext) != NULL) {
1248 1248 blnext = list_next(&bip->bi_links, blp);
1249 1249 if (!(blp->bl_flags & BLF_DELETED)) {
1250 1250 blp->bl_flags |= BLF_DELETED;
1251 1251 (void) ddi_taskq_dispatch(bridge_taskq, link_shutdown,
1252 1252 blp, DDI_SLEEP);
1253 1253 }
1254 1254 }
1255 1255 while ((bfp = avl_first(&bip->bi_fwd)) != NULL) {
1256 1256 atomic_inc_uint(&bfp->bf_refs);
1257 1257 rw_exit(&bip->bi_rwlock);
1258 1258 fwd_delete(bfp);
1259 1259 fwd_unref(bfp);
1260 1260 rw_enter(&bip->bi_rwlock, RW_READER);
1261 1261 }
1262 1262 rw_exit(&bip->bi_rwlock);
1263 1263
1264 1264 /*
1265 1265 * This bridge is being destroyed. Notify TRILL once all of the
1266 1266 * links are all gone.
1267 1267 */
1268 1268 mutex_enter(&inst_lock);
1269 1269 while (bip->bi_trilldata != NULL && !list_is_empty(&bip->bi_links))
1270 1270 cv_wait(&bip->bi_linkwait, &inst_lock);
1271 1271 mutex_exit(&inst_lock);
1272 1272 if (bip->bi_trilldata != NULL)
1273 1273 trill_brdstr_fn(bip->bi_trilldata, bip);
1274 1274
1275 1275 bridge_unref(bip);
1276 1276 }
1277 1277
1278 1278 /*
1279 1279 * This is called once by the TRILL module when it starts up. It just sets the
1280 1280 * global TRILL callback function pointers -- data transmit/receive and bridge
1281 1281 * and link destroy notification. There's only one TRILL module, so only one
1282 1282 * registration is needed.
1283 1283 *
1284 1284 * TRILL should call this function with NULL pointers before unloading. It
1285 1285 * must not do so before dropping all references to bridges and links. We
1286 1286 * assert that this is true on debug builds.
1287 1287 */
1288 1288 void
1289 1289 bridge_trill_register_cb(trill_recv_pkt_t recv_fn, trill_encap_pkt_t encap_fn,
1290 1290 trill_br_dstr_t brdstr_fn, trill_ln_dstr_t lndstr_fn)
1291 1291 {
1292 1292 #ifdef DEBUG
1293 1293 if (recv_fn == NULL && trill_recv_fn != NULL) {
1294 1294 bridge_inst_t *bip;
1295 1295 bridge_link_t *blp;
1296 1296
1297 1297 mutex_enter(&inst_lock);
1298 1298 for (bip = list_head(&inst_list); bip != NULL;
1299 1299 bip = list_next(&inst_list, bip)) {
1300 1300 ASSERT(bip->bi_trilldata == NULL);
1301 1301 rw_enter(&bip->bi_rwlock, RW_READER);
1302 1302 for (blp = list_head(&bip->bi_links); blp != NULL;
1303 1303 blp = list_next(&bip->bi_links, blp)) {
1304 1304 ASSERT(blp->bl_trilldata == NULL);
1305 1305 }
1306 1306 rw_exit(&bip->bi_rwlock);
1307 1307 }
1308 1308 mutex_exit(&inst_lock);
1309 1309 }
1310 1310 #endif
1311 1311 trill_recv_fn = recv_fn;
1312 1312 trill_encap_fn = encap_fn;
1313 1313 trill_brdstr_fn = brdstr_fn;
1314 1314 trill_lndstr_fn = lndstr_fn;
1315 1315 }
1316 1316
1317 1317 /*
1318 1318 * This registers the TRILL instance pointer with a bridge. Before this
1319 1319 * pointer is set, the forwarding, TRILL receive, and bridge destructor
1320 1320 * functions won't be called.
1321 1321 *
1322 1322 * TRILL holds a reference on a bridge with this call. It must free the
1323 1323 * reference by calling the unregister function below.
1324 1324 */
1325 1325 bridge_inst_t *
1326 1326 bridge_trill_brref(const char *bname, void *ptr)
1327 1327 {
1328 1328 char bridge[MAXLINKNAMELEN];
1329 1329 bridge_inst_t *bip;
1330 1330
1331 1331 (void) snprintf(bridge, MAXLINKNAMELEN, "%s0", bname);
1332 1332 bip = bridge_find_name(bridge);
1333 1333 if (bip != NULL) {
1334 1334 ASSERT(bip->bi_trilldata == NULL && ptr != NULL);
1335 1335 bip->bi_trilldata = ptr;
1336 1336 }
1337 1337 return (bip);
1338 1338 }
1339 1339
1340 1340 void
1341 1341 bridge_trill_brunref(bridge_inst_t *bip)
1342 1342 {
1343 1343 ASSERT(bip->bi_trilldata != NULL);
1344 1344 bip->bi_trilldata = NULL;
1345 1345 bridge_unref(bip);
1346 1346 }
1347 1347
1348 1348 /*
1349 1349 * TRILL calls this function when referencing a particular link on a bridge.
1350 1350 *
1351 1351 * It holds a reference on the link, so TRILL must clear out the reference when
1352 1352 * it's done with the link (on unbinding).
1353 1353 */
1354 1354 bridge_link_t *
1355 1355 bridge_trill_lnref(bridge_inst_t *bip, datalink_id_t linkid, void *ptr)
1356 1356 {
1357 1357 bridge_link_t *blp;
1358 1358
1359 1359 ASSERT(ptr != NULL);
1360 1360 rw_enter(&bip->bi_rwlock, RW_READER);
1361 1361 for (blp = list_head(&bip->bi_links); blp != NULL;
1362 1362 blp = list_next(&bip->bi_links, blp)) {
1363 1363 if (!(blp->bl_flags & BLF_DELETED) &&
1364 1364 blp->bl_linkid == linkid && blp->bl_trilldata == NULL) {
1365 1365 blp->bl_trilldata = ptr;
1366 1366 blp->bl_flags &= ~BLF_TRILLACTIVE;
1367 1367 (void) memset(blp->bl_afs, 0, sizeof (blp->bl_afs));
1368 1368 atomic_inc_uint(&blp->bl_refs);
1369 1369 break;
1370 1370 }
1371 1371 }
1372 1372 rw_exit(&bip->bi_rwlock);
1373 1373 return (blp);
1374 1374 }
1375 1375
1376 1376 void
1377 1377 bridge_trill_lnunref(bridge_link_t *blp)
1378 1378 {
1379 1379 mutex_enter(&blp->bl_trilllock);
1380 1380 ASSERT(blp->bl_trilldata != NULL);
1381 1381 blp->bl_trilldata = NULL;
1382 1382 blp->bl_flags &= ~BLF_TRILLACTIVE;
1383 1383 while (blp->bl_trillthreads > 0)
1384 1384 cv_wait(&blp->bl_trillwait, &blp->bl_trilllock);
1385 1385 mutex_exit(&blp->bl_trilllock);
1386 1386 (void) memset(blp->bl_afs, 0xff, sizeof (blp->bl_afs));
1387 1387 link_unref(blp);
1388 1388 }
1389 1389
1390 1390 /*
1391 1391 * This periodic timer performs three functions:
1392 1392 * 1. It scans the list of learned forwarding entries, and removes ones that
1393 1393 * haven't been heard from in a while. The time limit is backed down if
1394 1394 * we're above the configured table limit.
1395 1395 * 2. It walks the links and decays away the bl_learns counter.
1396 1396 * 3. It scans the observability node entries looking for ones that can be
1397 1397 * freed up.
1398 1398 */
1399 1399 /* ARGSUSED */
1400 1400 static void
1401 1401 bridge_timer(void *arg)
1402 1402 {
1403 1403 bridge_inst_t *bip;
1404 1404 bridge_fwd_t *bfp, *bfnext;
1405 1405 bridge_mac_t *bmp, *bmnext;
1406 1406 bridge_link_t *blp;
1407 1407 int err;
1408 1408 datalink_id_t tmpid;
1409 1409 avl_tree_t fwd_scavenge;
1410 1410 clock_t age_limit;
1411 1411 uint32_t ldecay;
1412 1412
1413 1413 avl_create(&fwd_scavenge, fwd_compare, sizeof (bridge_fwd_t),
1414 1414 offsetof(bridge_fwd_t, bf_node));
1415 1415 mutex_enter(&inst_lock);
1416 1416 for (bip = list_head(&inst_list); bip != NULL;
1417 1417 bip = list_next(&inst_list, bip)) {
1418 1418 if (bip->bi_flags & BIF_SHUTDOWN)
1419 1419 continue;
1420 1420 rw_enter(&bip->bi_rwlock, RW_WRITER);
1421 1421 /* compute scaled maximum age based on table limit */
1422 1422 if (avl_numnodes(&bip->bi_fwd) > bip->bi_tablemax)
1423 1423 bip->bi_tshift++;
1424 1424 else
1425 1425 bip->bi_tshift = 0;
1426 1426 if ((age_limit = bridge_fwd_age >> bip->bi_tshift) == 0) {
1427 1427 if (bip->bi_tshift != 0)
1428 1428 bip->bi_tshift--;
1429 1429 age_limit = 1;
1430 1430 }
1431 1431 bfnext = avl_first(&bip->bi_fwd);
1432 1432 while ((bfp = bfnext) != NULL) {
1433 1433 bfnext = AVL_NEXT(&bip->bi_fwd, bfp);
1434 1434 if (!(bfp->bf_flags & BFF_LOCALADDR) &&
1435 1435 (ddi_get_lbolt() - bfp->bf_lastheard) > age_limit) {
1436 1436 ASSERT(bfp->bf_flags & BFF_INTREE);
1437 1437 avl_remove(&bip->bi_fwd, bfp);
1438 1438 bfp->bf_flags &= ~BFF_INTREE;
1439 1439 avl_add(&fwd_scavenge, bfp);
1440 1440 }
1441 1441 }
1442 1442 for (blp = list_head(&bip->bi_links); blp != NULL;
1443 1443 blp = list_next(&bip->bi_links, blp)) {
1444 1444 ldecay = mac_get_ldecay(blp->bl_mh);
1445 1445 if (ldecay >= blp->bl_learns)
1446 1446 blp->bl_learns = 0;
1447 1447 else
1448 1448 atomic_add_int(&blp->bl_learns, -(int)ldecay);
1449 1449 }
1450 1450 rw_exit(&bip->bi_rwlock);
1451 1451 bfnext = avl_first(&fwd_scavenge);
1452 1452 while ((bfp = bfnext) != NULL) {
1453 1453 bfnext = AVL_NEXT(&fwd_scavenge, bfp);
1454 1454 avl_remove(&fwd_scavenge, bfp);
1455 1455 KIINCR(bki_expire);
1456 1456 fwd_unref(bfp); /* drop tree reference */
1457 1457 }
1458 1458 }
1459 1459 mutex_exit(&inst_lock);
1460 1460 avl_destroy(&fwd_scavenge);
1461 1461
1462 1462 /*
1463 1463 * Scan the bridge_mac_t entries and try to free up the ones that are
1464 1464 * no longer active. This must be done by polling, as neither DLS nor
1465 1465 * MAC provides a driver any sort of positive control over clients.
1466 1466 */
1467 1467 rw_enter(&bmac_rwlock, RW_WRITER);
1468 1468 bmnext = list_head(&bmac_list);
1469 1469 while ((bmp = bmnext) != NULL) {
1470 1470 bmnext = list_next(&bmac_list, bmp);
1471 1471
1472 1472 /* ignore active bridges */
1473 1473 if (bmp->bm_inst != NULL)
1474 1474 continue;
1475 1475
1476 1476 if (bmp->bm_flags & BMF_DLS) {
1477 1477 err = dls_devnet_destroy(bmp->bm_mh, &tmpid, B_FALSE);
1478 1478 ASSERT(err == 0 || err == EBUSY);
1479 1479 if (err == 0)
1480 1480 bmp->bm_flags &= ~BMF_DLS;
1481 1481 }
1482 1482
1483 1483 if (!(bmp->bm_flags & BMF_DLS)) {
1484 1484 err = mac_unregister(bmp->bm_mh);
1485 1485 ASSERT(err == 0 || err == EBUSY);
1486 1486 if (err == 0) {
1487 1487 list_remove(&bmac_list, bmp);
1488 1488 kmem_free(bmp, sizeof (*bmp));
1489 1489 }
1490 1490 }
1491 1491 }
1492 1492 if (list_is_empty(&bmac_list)) {
1493 1493 bridge_timerid = 0;
1494 1494 } else {
1495 1495 bridge_timerid = timeout(bridge_timer, NULL,
1496 1496 bridge_scan_interval);
1497 1497 }
1498 1498 rw_exit(&bmac_rwlock);
1499 1499 }
1500 1500
1501 1501 static int
1502 1502 bridge_open(queue_t *rq, dev_t *devp, int oflag, int sflag, cred_t *credp)
1503 1503 {
1504 1504 bridge_stream_t *bsp;
1505 1505
1506 1506 if (rq->q_ptr != NULL)
1507 1507 return (0);
1508 1508
1509 1509 if (sflag & MODOPEN)
1510 1510 return (EINVAL);
1511 1511
1512 1512 /*
1513 1513 * Check the minor node number being opened. This tells us which
1514 1514 * bridge instance the user wants.
1515 1515 */
1516 1516 if (getminor(*devp) != 0) {
1517 1517 /*
1518 1518 * This is a regular DLPI stream for snoop or the like.
1519 1519 * Redirect it through DLD.
1520 1520 */
1521 1521 rq->q_qinfo = &bridge_dld_rinit;
1522 1522 OTHERQ(rq)->q_qinfo = &bridge_dld_winit;
1523 1523 return (dld_open(rq, devp, oflag, sflag, credp));
1524 1524 } else {
1525 1525 /*
1526 1526 * Allocate the bridge control stream structure.
1527 1527 */
1528 1528 if ((bsp = stream_alloc()) == NULL)
1529 1529 return (ENOSR);
1530 1530 rq->q_ptr = WR(rq)->q_ptr = (caddr_t)bsp;
1531 1531 bsp->bs_wq = WR(rq);
1532 1532 *devp = makedevice(getmajor(*devp), bsp->bs_minor);
1533 1533 qprocson(rq);
1534 1534 return (0);
1535 1535 }
1536 1536 }
1537 1537
1538 1538 /*
1539 1539 * This is used only for bridge control streams. DLPI goes through dld
1540 1540 * instead.
1541 1541 */
1542 1542 static int
1543 1543 bridge_close(queue_t *rq)
1544 1544 {
1545 1545 bridge_stream_t *bsp = rq->q_ptr;
1546 1546 bridge_inst_t *bip;
1547 1547
1548 1548 /*
1549 1549 * Wait for any stray taskq (add/delete link) entries related to this
1550 1550 * stream to leave the system.
1551 1551 */
1552 1552 mutex_enter(&stream_ref_lock);
1553 1553 while (bsp->bs_taskq_cnt != 0)
1554 1554 cv_wait(&stream_ref_cv, &stream_ref_lock);
1555 1555 mutex_exit(&stream_ref_lock);
1556 1556
1557 1557 qprocsoff(rq);
1558 1558 if ((bip = bsp->bs_inst) != NULL)
1559 1559 shutdown_inst(bip);
1560 1560 rq->q_ptr = WR(rq)->q_ptr = NULL;
1561 1561 stream_free(bsp);
1562 1562 if (bip != NULL)
1563 1563 bridge_unref(bip);
1564 1564
1565 1565 return (0);
1566 1566 }
1567 1567
1568 1568 static void
1569 1569 bridge_learn(bridge_link_t *blp, const uint8_t *saddr, uint16_t ingress_nick,
1570 1570 uint16_t vlanid)
1571 1571 {
1572 1572 bridge_inst_t *bip = blp->bl_inst;
1573 1573 bridge_fwd_t *bfp, *bfpnew;
1574 1574 int i;
1575 1575 boolean_t replaced = B_FALSE;
1576 1576
1577 1577 /* Ignore multi-destination address used as source; it's nonsense. */
1578 1578 if (*saddr & 1)
1579 1579 return;
1580 1580
1581 1581 /*
1582 1582 * If the source is known, then check whether it belongs on this link.
1583 1583 * If not, and this isn't a fixed local address, then we've detected a
1584 1584 * move. If it's not known, learn it.
1585 1585 */
1586 1586 if ((bfp = fwd_find(bip, saddr, vlanid)) != NULL) {
1587 1587 /*
1588 1588 * If the packet has a fixed local source address, then there's
1589 1589 * nothing we can learn. We must quit. If this was a received
1590 1590 * packet, then the sender has stolen our address, but there's
1591 1591 * nothing we can do. If it's a transmitted packet, then
1592 1592 * that's the normal case.
1593 1593 */
1594 1594 if (bfp->bf_flags & BFF_LOCALADDR) {
1595 1595 fwd_unref(bfp);
1596 1596 return;
1597 1597 }
1598 1598
1599 1599 /*
1600 1600 * Check if the link (and TRILL sender, if any) being used is
1601 1601 * among the ones registered for this address. If so, then
1602 1602 * this is information that we already know.
1603 1603 */
1604 1604 if (bfp->bf_trill_nick == ingress_nick) {
1605 1605 for (i = 0; i < bfp->bf_nlinks; i++) {
1606 1606 if (bfp->bf_links[i] == blp) {
1607 1607 bfp->bf_lastheard = ddi_get_lbolt();
1608 1608 fwd_unref(bfp);
1609 1609 return;
1610 1610 }
1611 1611 }
1612 1612 }
1613 1613 }
1614 1614
1615 1615 /*
1616 1616 * Note that we intentionally "unlearn" things that appear to be under
1617 1617 * attack on this link. The forwarding cache is a negative thing for
1618 1618 * security -- it disables reachability as a performance optimization
1619 1619 * -- so leaving out entries optimizes for success and defends against
1620 1620 * the attack. Thus, the bare increment without a check in the delete
1621 1621 * code above is right. (And it's ok if we skid over the limit a
1622 1622 * little, so there's no syncronization needed on the test.)
1623 1623 */
1624 1624 if (blp->bl_learns >= mac_get_llimit(blp->bl_mh)) {
1625 1625 if (bfp != NULL) {
1626 1626 if (bfp->bf_vcnt == 0)
1627 1627 fwd_delete(bfp);
1628 1628 fwd_unref(bfp);
1629 1629 }
1630 1630 return;
1631 1631 }
1632 1632
1633 1633 atomic_inc_uint(&blp->bl_learns);
1634 1634
1635 1635 if ((bfpnew = fwd_alloc(saddr, 1, ingress_nick)) == NULL) {
1636 1636 if (bfp != NULL)
1637 1637 fwd_unref(bfp);
1638 1638 return;
1639 1639 }
1640 1640 KIINCR(bki_count);
1641 1641
1642 1642 if (bfp != NULL) {
1643 1643 /*
1644 1644 * If this is a new destination for the same VLAN, then delete
1645 1645 * so that we can update. If it's a different VLAN, then we're
1646 1646 * not going to delete the original. Split off instead into an
1647 1647 * IVL entry.
1648 1648 */
1649 1649 if (bfp->bf_vlanid == vlanid) {
1650 1650 /* save the count of IVL duplicates */
1651 1651 bfpnew->bf_vcnt = bfp->bf_vcnt;
1652 1652
1653 1653 /* entry deletes count as learning events */
1654 1654 atomic_inc_uint(&blp->bl_learns);
1655 1655
1656 1656 /* destroy and create anew; node moved */
1657 1657 fwd_delete(bfp);
1658 1658 replaced = B_TRUE;
1659 1659 KIINCR(bki_moved);
1660 1660 } else {
1661 1661 bfp->bf_vcnt++;
1662 1662 bfpnew->bf_flags |= BFF_VLANLOCAL;
1663 1663 }
1664 1664 fwd_unref(bfp);
1665 1665 }
1666 1666 bfpnew->bf_links[0] = blp;
1667 1667 bfpnew->bf_nlinks = 1;
1668 1668 atomic_inc_uint(&blp->bl_refs); /* bf_links entry */
1669 1669 if (!fwd_insert(bip, bfpnew))
1670 1670 fwd_free(bfpnew);
1671 1671 else if (!replaced)
1672 1672 KIINCR(bki_source);
1673 1673 }
1674 1674
1675 1675 /*
1676 1676 * Process the VLAN headers for output on a given link. There are several
1677 1677 * cases (noting that we don't map VLANs):
1678 1678 * 1. The input packet is good as it is; either
1679 1679 * a. It has no tag, and output has same PVID
1680 1680 * b. It has a non-zero priority-only tag for PVID, and b_band is same
1681 1681 * c. It has a tag with VLAN different from PVID, and b_band is same
1682 1682 * 2. The tag must change: non-zero b_band is different from tag priority
1683 1683 * 3. The packet has a tag and should not (VLAN same as PVID, b_band zero)
1684 1684 * 4. The packet has no tag and needs one:
1685 1685 * a. VLAN ID same as PVID, but b_band is non-zero
1686 1686 * b. VLAN ID different from PVID
1687 1687 * We exclude case 1 first, then modify the packet. Note that output packets
1688 1688 * get a priority set by the mblk, not by the header, because QoS in bridging
1689 1689 * requires priority recalculation at each node.
1690 1690 *
1691 1691 * The passed-in tci is the "impossible" value 0xFFFF when no tag is present.
1692 1692 */
1693 1693 static mblk_t *
1694 1694 reform_vlan_header(mblk_t *mp, uint16_t vlanid, uint16_t tci, uint16_t pvid)
1695 1695 {
1696 1696 boolean_t source_has_tag = (tci != 0xFFFF);
1697 1697 mblk_t *mpcopy;
1698 1698 size_t mlen, minlen;
1699 1699 struct ether_vlan_header *evh;
1700 1700 int pri;
1701 1701
1702 1702 /* This helps centralize error handling in the caller. */
1703 1703 if (mp == NULL)
1704 1704 return (mp);
1705 1705
1706 1706 /* No forwarded packet can have hardware checksum enabled */
1707 1707 DB_CKSUMFLAGS(mp) = 0;
1708 1708
1709 1709 /* Get the no-modification cases out of the way first */
1710 1710 if (!source_has_tag && vlanid == pvid) /* 1a */
1711 1711 return (mp);
1712 1712
1713 1713 pri = VLAN_PRI(tci);
1714 1714 if (source_has_tag && mp->b_band == pri) {
1715 1715 if (vlanid != pvid) /* 1c */
1716 1716 return (mp);
1717 1717 if (pri != 0 && VLAN_ID(tci) == 0) /* 1b */
1718 1718 return (mp);
1719 1719 }
1720 1720
1721 1721 /*
1722 1722 * We now know that we must modify the packet. Prepare for that. Note
1723 1723 * that if a tag is present, the caller has already done a pullup for
1724 1724 * the VLAN header, so we're good to go.
1725 1725 */
1726 1726 if (MBLKL(mp) < sizeof (struct ether_header)) {
1727 1727 mpcopy = msgpullup(mp, sizeof (struct ether_header));
1728 1728 if (mpcopy == NULL) {
1729 1729 freemsg(mp);
1730 1730 return (NULL);
1731 1731 }
1732 1732 mp = mpcopy;
1733 1733 }
1734 1734 if (DB_REF(mp) > 1 || !IS_P2ALIGNED(mp->b_rptr, sizeof (uint16_t)) ||
1735 1735 (!source_has_tag && MBLKTAIL(mp) < VLAN_INCR)) {
1736 1736 minlen = mlen = MBLKL(mp);
1737 1737 if (!source_has_tag)
1738 1738 minlen += VLAN_INCR;
1739 1739 ASSERT(minlen >= sizeof (struct ether_vlan_header));
1740 1740 /*
1741 1741 * We're willing to copy some data to avoid fragmentation, but
1742 1742 * not a lot.
1743 1743 */
1744 1744 if (minlen > 256)
1745 1745 minlen = sizeof (struct ether_vlan_header);
1746 1746 mpcopy = allocb(minlen, BPRI_MED);
1747 1747 if (mpcopy == NULL) {
1748 1748 freemsg(mp);
1749 1749 return (NULL);
1750 1750 }
1751 1751 if (mlen <= minlen) {
1752 1752 /* We toss the first mblk when we can. */
1753 1753 bcopy(mp->b_rptr, mpcopy->b_rptr, mlen);
1754 1754 mpcopy->b_wptr += mlen;
1755 1755 mpcopy->b_cont = mp->b_cont;
1756 1756 freeb(mp);
1757 1757 } else {
1758 1758 /* If not, then just copy what we need */
1759 1759 if (!source_has_tag)
1760 1760 minlen = sizeof (struct ether_header);
1761 1761 bcopy(mp->b_rptr, mpcopy->b_rptr, minlen);
1762 1762 mpcopy->b_wptr += minlen;
1763 1763 mpcopy->b_cont = mp;
1764 1764 mp->b_rptr += minlen;
1765 1765 }
1766 1766 mp = mpcopy;
1767 1767 }
1768 1768
1769 1769 /* LINTED: pointer alignment */
1770 1770 evh = (struct ether_vlan_header *)mp->b_rptr;
1771 1771 if (source_has_tag) {
1772 1772 if (mp->b_band == 0 && vlanid == pvid) { /* 3 */
1773 1773 evh->ether_tpid = evh->ether_type;
1774 1774 mlen = MBLKL(mp);
1775 1775 if (mlen > sizeof (struct ether_vlan_header))
1776 1776 ovbcopy(mp->b_rptr +
1777 1777 sizeof (struct ether_vlan_header),
1778 1778 mp->b_rptr + sizeof (struct ether_header),
1779 1779 mlen - sizeof (struct ether_vlan_header));
1780 1780 mp->b_wptr -= VLAN_INCR;
1781 1781 } else { /* 2 */
1782 1782 if (vlanid == pvid)
1783 1783 vlanid = VLAN_ID_NONE;
1784 1784 tci = VLAN_TCI(mp->b_band, ETHER_CFI, vlanid);
1785 1785 evh->ether_tci = htons(tci);
1786 1786 }
1787 1787 } else {
1788 1788 /* case 4: no header present, but one is needed */
1789 1789 mlen = MBLKL(mp);
1790 1790 if (mlen > sizeof (struct ether_header))
1791 1791 ovbcopy(mp->b_rptr + sizeof (struct ether_header),
1792 1792 mp->b_rptr + sizeof (struct ether_vlan_header),
1793 1793 mlen - sizeof (struct ether_header));
1794 1794 mp->b_wptr += VLAN_INCR;
1795 1795 ASSERT(mp->b_wptr <= DB_LIM(mp));
1796 1796 if (vlanid == pvid)
1797 1797 vlanid = VLAN_ID_NONE;
1798 1798 tci = VLAN_TCI(mp->b_band, ETHER_CFI, vlanid);
1799 1799 evh->ether_type = evh->ether_tpid;
1800 1800 evh->ether_tpid = htons(ETHERTYPE_VLAN);
1801 1801 evh->ether_tci = htons(tci);
1802 1802 }
1803 1803 return (mp);
1804 1804 }
1805 1805
1806 1806 /* Record VLAN information and strip header if requested . */
1807 1807 static void
1808 1808 update_header(mblk_t *mp, mac_header_info_t *hdr_info, boolean_t striphdr)
1809 1809 {
1810 1810 if (hdr_info->mhi_bindsap == ETHERTYPE_VLAN) {
1811 1811 struct ether_vlan_header *evhp;
1812 1812 uint16_t ether_type;
1813 1813
1814 1814 /* LINTED: alignment */
1815 1815 evhp = (struct ether_vlan_header *)mp->b_rptr;
1816 1816 hdr_info->mhi_istagged = B_TRUE;
1817 1817 hdr_info->mhi_tci = ntohs(evhp->ether_tci);
1818 1818 if (striphdr) {
1819 1819 /*
1820 1820 * For VLAN tagged frames update the ether_type
1821 1821 * in hdr_info before stripping the header.
1822 1822 */
1823 1823 ether_type = ntohs(evhp->ether_type);
1824 1824 hdr_info->mhi_origsap = ether_type;
1825 1825 hdr_info->mhi_bindsap = (ether_type > ETHERMTU) ?
1826 1826 ether_type : DLS_SAP_LLC;
1827 1827 mp->b_rptr = (uchar_t *)(evhp + 1);
1828 1828 }
1829 1829 } else {
1830 1830 hdr_info->mhi_istagged = B_FALSE;
1831 1831 hdr_info->mhi_tci = VLAN_ID_NONE;
1832 1832 if (striphdr)
1833 1833 mp->b_rptr += sizeof (struct ether_header);
1834 1834 }
1835 1835 }
1836 1836
1837 1837 /*
1838 1838 * Return B_TRUE if we're allowed to send on this link with the given VLAN ID.
1839 1839 */
1840 1840 static boolean_t
1841 1841 bridge_can_send(bridge_link_t *blp, uint16_t vlanid)
1842 1842 {
1843 1843 ASSERT(vlanid != VLAN_ID_NONE);
1844 1844 if (blp->bl_flags & BLF_DELETED)
1845 1845 return (B_FALSE);
1846 1846 if (blp->bl_trilldata == NULL && blp->bl_state != BLS_FORWARDING)
1847 1847 return (B_FALSE);
1848 1848 return (BRIDGE_VLAN_ISSET(blp, vlanid) && BRIDGE_AF_ISSET(blp, vlanid));
1849 1849 }
1850 1850
1851 1851 /*
1852 1852 * This function scans the bridge forwarding tables in order to forward a given
1853 1853 * packet. If the packet either doesn't need forwarding (the current link is
1854 1854 * correct) or the current link needs a copy as well, then the packet is
1855 1855 * returned to the caller.
1856 1856 *
1857 1857 * If a packet has been decapsulated from TRILL, then it must *NOT* reenter a
1858 1858 * TRILL tunnel. If the destination points there, then drop instead.
1859 1859 */
1860 1860 static mblk_t *
1861 1861 bridge_forward(bridge_link_t *blp, mac_header_info_t *hdr_info, mblk_t *mp,
1862 1862 uint16_t vlanid, uint16_t tci, boolean_t from_trill, boolean_t is_xmit)
1863 1863 {
1864 1864 mblk_t *mpsend, *mpcopy;
1865 1865 bridge_inst_t *bip = blp->bl_inst;
1866 1866 bridge_link_t *blpsend, *blpnext;
1867 1867 bridge_fwd_t *bfp;
1868 1868 uint_t i;
1869 1869 boolean_t selfseen = B_FALSE;
1870 1870 void *tdp;
1871 1871 const uint8_t *daddr = hdr_info->mhi_daddr;
1872 1872
1873 1873 /*
1874 1874 * Check for the IEEE "reserved" multicast addresses. Messages sent to
1875 1875 * these addresses are used for link-local control (STP and pause), and
1876 1876 * are never forwarded or redirected.
1877 1877 */
1878 1878 if (daddr[0] == 1 && daddr[1] == 0x80 && daddr[2] == 0xc2 &&
1879 1879 daddr[3] == 0 && daddr[4] == 0 && (daddr[5] & 0xf0) == 0) {
1880 1880 if (from_trill) {
1881 1881 freemsg(mp);
1882 1882 mp = NULL;
1883 1883 }
1884 1884 return (mp);
1885 1885 }
1886 1886
1887 1887 if ((bfp = fwd_find(bip, daddr, vlanid)) != NULL) {
1888 1888
1889 1889 /*
1890 1890 * If trill indicates a destination for this node, then it's
1891 1891 * clearly not intended for local delivery. We must tell TRILL
1892 1892 * to encapsulate, as long as we didn't just decapsulate it.
1893 1893 */
1894 1894 if (bfp->bf_trill_nick != RBRIDGE_NICKNAME_NONE) {
1895 1895 /*
1896 1896 * Error case: can't reencapsulate if the protocols are
1897 1897 * working correctly.
1898 1898 */
1899 1899 if (from_trill) {
1900 1900 freemsg(mp);
1901 1901 return (NULL);
1902 1902 }
1903 1903 mutex_enter(&blp->bl_trilllock);
1904 1904 if ((tdp = blp->bl_trilldata) != NULL) {
1905 1905 blp->bl_trillthreads++;
1906 1906 mutex_exit(&blp->bl_trilllock);
1907 1907 update_header(mp, hdr_info, B_FALSE);
1908 1908 if (is_xmit)
1909 1909 mp = mac_fix_cksum(mp);
1910 1910 /* all trill data frames have Inner.VLAN */
1911 1911 mp = reform_vlan_header(mp, vlanid, tci, 0);
1912 1912 if (mp == NULL) {
1913 1913 KIINCR(bki_drops);
1914 1914 fwd_unref(bfp);
1915 1915 return (NULL);
1916 1916 }
1917 1917 trill_encap_fn(tdp, blp, hdr_info, mp,
1918 1918 bfp->bf_trill_nick);
1919 1919 mutex_enter(&blp->bl_trilllock);
1920 1920 if (--blp->bl_trillthreads == 0 &&
1921 1921 blp->bl_trilldata == NULL)
1922 1922 cv_broadcast(&blp->bl_trillwait);
1923 1923 }
1924 1924 mutex_exit(&blp->bl_trilllock);
1925 1925
1926 1926 /* if TRILL has been disabled, then kill this stray */
1927 1927 if (tdp == NULL) {
1928 1928 freemsg(mp);
1929 1929 fwd_delete(bfp);
1930 1930 }
1931 1931 fwd_unref(bfp);
1932 1932 return (NULL);
1933 1933 }
1934 1934
1935 1935 /* find first link we can send on */
1936 1936 for (i = 0; i < bfp->bf_nlinks; i++) {
1937 1937 blpsend = bfp->bf_links[i];
1938 1938 if (blpsend == blp)
1939 1939 selfseen = B_TRUE;
1940 1940 else if (bridge_can_send(blpsend, vlanid))
1941 1941 break;
1942 1942 }
1943 1943
1944 1944 while (i < bfp->bf_nlinks) {
1945 1945 blpsend = bfp->bf_links[i];
1946 1946 for (i++; i < bfp->bf_nlinks; i++) {
1947 1947 blpnext = bfp->bf_links[i];
1948 1948 if (blpnext == blp)
1949 1949 selfseen = B_TRUE;
1950 1950 else if (bridge_can_send(blpnext, vlanid))
1951 1951 break;
1952 1952 }
1953 1953 if (i == bfp->bf_nlinks && !selfseen) {
1954 1954 mpsend = mp;
1955 1955 mp = NULL;
1956 1956 } else {
1957 1957 mpsend = copymsg(mp);
1958 1958 }
1959 1959
1960 1960 if (!from_trill && is_xmit)
1961 1961 mpsend = mac_fix_cksum(mpsend);
1962 1962
1963 1963 mpsend = reform_vlan_header(mpsend, vlanid, tci,
1964 1964 blpsend->bl_pvid);
1965 1965 if (mpsend == NULL) {
1966 1966 KIINCR(bki_drops);
1967 1967 continue;
1968 1968 }
1969 1969
1970 1970 KIINCR(bki_forwards);
1971 1971 /*
1972 1972 * No need to bump up the link reference count, as
1973 1973 * the forwarding entry itself holds a reference to
1974 1974 * the link.
1975 1975 */
1976 1976 if (bfp->bf_flags & BFF_LOCALADDR) {
1977 1977 mac_rx_common(blpsend->bl_mh, NULL, mpsend);
1978 1978 } else {
1979 1979 KLPINCR(blpsend, bkl_xmit);
1980 1980 MAC_RING_TX(blpsend->bl_mh, NULL, mpsend,
1981 1981 mpsend);
1982 1982 freemsg(mpsend);
1983 1983 }
1984 1984 }
1985 1985 /*
1986 1986 * Handle a special case: if we're transmitting to the original
1987 1987 * link, then check whether the localaddr flag is set. If it
1988 1988 * is, then receive instead. This doesn't happen with ordinary
1989 1989 * bridging, but does happen often with TRILL decapsulation.
1990 1990 */
1991 1991 if (mp != NULL && is_xmit && (bfp->bf_flags & BFF_LOCALADDR)) {
1992 1992 mac_rx_common(blp->bl_mh, NULL, mp);
1993 1993 mp = NULL;
1994 1994 }
1995 1995 fwd_unref(bfp);
1996 1996 } else {
1997 1997 /*
1998 1998 * TRILL has two cases to handle. If the packet is off the
1999 1999 * wire (not from TRILL), then we need to send up into the
2000 2000 * TRILL module to have the distribution tree computed. If the
2001 2001 * packet is from TRILL (decapsulated), then we're part of the
2002 2002 * distribution tree, and we need to copy the packet on member
2003 2003 * interfaces.
2004 2004 *
2005 2005 * Thus, the from TRILL case is identical to the STP case.
2006 2006 */
2007 2007 if (!from_trill && blp->bl_trilldata != NULL) {
2008 2008 mutex_enter(&blp->bl_trilllock);
2009 2009 if ((tdp = blp->bl_trilldata) != NULL) {
2010 2010 blp->bl_trillthreads++;
2011 2011 mutex_exit(&blp->bl_trilllock);
2012 2012 if ((mpsend = copymsg(mp)) != NULL) {
2013 2013 update_header(mpsend,
2014 2014 hdr_info, B_FALSE);
2015 2015 /*
2016 2016 * all trill data frames have
2017 2017 * Inner.VLAN
2018 2018 */
2019 2019 mpsend = reform_vlan_header(mpsend,
2020 2020 vlanid, tci, 0);
2021 2021 if (mpsend == NULL) {
2022 2022 KIINCR(bki_drops);
2023 2023 } else {
2024 2024 trill_encap_fn(tdp, blp,
2025 2025 hdr_info, mpsend,
2026 2026 RBRIDGE_NICKNAME_NONE);
2027 2027 }
2028 2028 }
2029 2029 mutex_enter(&blp->bl_trilllock);
2030 2030 if (--blp->bl_trillthreads == 0 &&
2031 2031 blp->bl_trilldata == NULL)
2032 2032 cv_broadcast(&blp->bl_trillwait);
2033 2033 }
2034 2034 mutex_exit(&blp->bl_trilllock);
2035 2035 }
2036 2036
2037 2037 /*
2038 2038 * This is an unknown destination, so flood.
2039 2039 */
2040 2040 rw_enter(&bip->bi_rwlock, RW_READER);
2041 2041 for (blpnext = list_head(&bip->bi_links); blpnext != NULL;
2042 2042 blpnext = list_next(&bip->bi_links, blpnext)) {
2043 2043 if (blpnext == blp)
2044 2044 selfseen = B_TRUE;
2045 2045 else if (bridge_can_send(blpnext, vlanid))
2046 2046 break;
2047 2047 }
2048 2048 if (blpnext != NULL)
2049 2049 atomic_inc_uint(&blpnext->bl_refs);
2050 2050 rw_exit(&bip->bi_rwlock);
2051 2051 while ((blpsend = blpnext) != NULL) {
2052 2052 rw_enter(&bip->bi_rwlock, RW_READER);
2053 2053 for (blpnext = list_next(&bip->bi_links, blpsend);
2054 2054 blpnext != NULL;
2055 2055 blpnext = list_next(&bip->bi_links, blpnext)) {
2056 2056 if (blpnext == blp)
2057 2057 selfseen = B_TRUE;
2058 2058 else if (bridge_can_send(blpnext, vlanid))
2059 2059 break;
2060 2060 }
2061 2061 if (blpnext != NULL)
2062 2062 atomic_inc_uint(&blpnext->bl_refs);
2063 2063 rw_exit(&bip->bi_rwlock);
2064 2064 if (blpnext == NULL && !selfseen) {
2065 2065 mpsend = mp;
2066 2066 mp = NULL;
2067 2067 } else {
2068 2068 mpsend = copymsg(mp);
2069 2069 }
2070 2070
2071 2071 if (!from_trill && is_xmit)
2072 2072 mpsend = mac_fix_cksum(mpsend);
2073 2073
2074 2074 mpsend = reform_vlan_header(mpsend, vlanid, tci,
2075 2075 blpsend->bl_pvid);
2076 2076 if (mpsend == NULL) {
2077 2077 KIINCR(bki_drops);
2078 2078 continue;
2079 2079 }
2080 2080
2081 2081 if (hdr_info->mhi_dsttype == MAC_ADDRTYPE_UNICAST)
2082 2082 KIINCR(bki_unknown);
2083 2083 else
2084 2084 KIINCR(bki_mbcast);
2085 2085 KLPINCR(blpsend, bkl_xmit);
2086 2086 if ((mpcopy = copymsg(mpsend)) != NULL)
2087 2087 mac_rx_common(blpsend->bl_mh, NULL, mpcopy);
2088 2088 MAC_RING_TX(blpsend->bl_mh, NULL, mpsend, mpsend);
2089 2089 freemsg(mpsend);
2090 2090 link_unref(blpsend);
2091 2091 }
2092 2092 }
2093 2093
2094 2094 /*
2095 2095 * At this point, if np is non-NULL, it means that the caller needs to
2096 2096 * continue on the selected link.
2097 2097 */
2098 2098 return (mp);
2099 2099 }
2100 2100
2101 2101 /*
2102 2102 * Extract and validate the VLAN information for a given packet. This checks
2103 2103 * conformance with the rules for use of the PVID on the link, and for the
2104 2104 * allowed (configured) VLAN set.
2105 2105 *
2106 2106 * Returns B_TRUE if the packet passes, B_FALSE if it fails.
2107 2107 */
2108 2108 static boolean_t
2109 2109 bridge_get_vlan(bridge_link_t *blp, mac_header_info_t *hdr_info, mblk_t *mp,
2110 2110 uint16_t *vlanidp, uint16_t *tcip)
2111 2111 {
2112 2112 uint16_t tci, vlanid;
2113 2113
2114 2114 if (hdr_info->mhi_bindsap == ETHERTYPE_VLAN) {
2115 2115 ptrdiff_t tpos = offsetof(struct ether_vlan_header, ether_tci);
2116 2116 ptrdiff_t mlen;
2117 2117
2118 2118 /*
2119 2119 * Extract the VLAN ID information, regardless of alignment,
2120 2120 * and without a pullup. This isn't attractive, but we do this
2121 2121 * to avoid having to deal with the pointers stashed in
2122 2122 * hdr_info moving around or having the caller deal with a new
2123 2123 * mblk_t pointer.
2124 2124 */
2125 2125 while (mp != NULL) {
2126 2126 mlen = MBLKL(mp);
2127 2127 if (mlen > tpos && mlen > 0)
2128 2128 break;
2129 2129 tpos -= mlen;
2130 2130 mp = mp->b_cont;
2131 2131 }
2132 2132 if (mp == NULL)
2133 2133 return (B_FALSE);
2134 2134 tci = mp->b_rptr[tpos] << 8;
2135 2135 if (++tpos >= mlen) {
2136 2136 do {
2137 2137 mp = mp->b_cont;
2138 2138 } while (mp != NULL && MBLKL(mp) == 0);
2139 2139 if (mp == NULL)
2140 2140 return (B_FALSE);
2141 2141 tpos = 0;
2142 2142 }
2143 2143 tci |= mp->b_rptr[tpos];
2144 2144
2145 2145 vlanid = VLAN_ID(tci);
2146 2146 if (VLAN_CFI(tci) != ETHER_CFI || vlanid > VLAN_ID_MAX)
2147 2147 return (B_FALSE);
2148 2148 if (vlanid == VLAN_ID_NONE || vlanid == blp->bl_pvid)
2149 2149 goto input_no_vlan;
2150 2150 if (!BRIDGE_VLAN_ISSET(blp, vlanid))
2151 2151 return (B_FALSE);
2152 2152 } else {
2153 2153 tci = 0xFFFF;
2154 2154 input_no_vlan:
2155 2155 /*
2156 2156 * If PVID is set to zero, then untagged traffic is not
2157 2157 * supported here. Do not learn or forward.
2158 2158 */
2159 2159 if ((vlanid = blp->bl_pvid) == VLAN_ID_NONE)
2160 2160 return (B_FALSE);
2161 2161 }
2162 2162
2163 2163 *tcip = tci;
2164 2164 *vlanidp = vlanid;
2165 2165 return (B_TRUE);
2166 2166 }
2167 2167
2168 2168 /*
2169 2169 * Handle MAC notifications.
2170 2170 */
2171 2171 static void
2172 2172 bridge_notify_cb(void *arg, mac_notify_type_t note_type)
2173 2173 {
2174 2174 bridge_link_t *blp = arg;
2175 2175
2176 2176 switch (note_type) {
2177 2177 case MAC_NOTE_UNICST:
2178 2178 bridge_new_unicst(blp);
2179 2179 break;
2180 2180
2181 2181 case MAC_NOTE_SDU_SIZE: {
2182 2182 uint_t maxsdu;
2183 2183 bridge_inst_t *bip = blp->bl_inst;
2184 2184 bridge_mac_t *bmp = bip->bi_mac;
2185 2185 boolean_t notify = B_FALSE;
2186 2186 mblk_t *mlist = NULL;
2187 2187
2188 2188 mac_sdu_get(blp->bl_mh, NULL, &maxsdu);
2189 2189 rw_enter(&bip->bi_rwlock, RW_READER);
2190 2190 if (list_prev(&bip->bi_links, blp) == NULL &&
2191 2191 list_next(&bip->bi_links, blp) == NULL) {
2192 2192 notify = (maxsdu != bmp->bm_maxsdu);
2193 2193 bmp->bm_maxsdu = maxsdu;
2194 2194 }
2195 2195 blp->bl_maxsdu = maxsdu;
2196 2196 if (maxsdu != bmp->bm_maxsdu)
2197 2197 link_sdu_fail(blp, B_TRUE, &mlist);
2198 2198 else if (notify)
2199 2199 (void) mac_maxsdu_update(bmp->bm_mh, maxsdu);
2200 2200 rw_exit(&bip->bi_rwlock);
2201 2201 send_up_messages(bip, mlist);
2202 2202 break;
2203 2203 }
2204 2204 }
2205 2205 }
2206 2206
2207 2207 /*
2208 2208 * This is called by the MAC layer. As with the transmit side, we're right in
2209 2209 * the data path for all I/O on this port, so if we don't need to forward this
2210 2210 * packet anywhere, we have to send it upwards via mac_rx_common.
2211 2211 */
2212 2212 static void
2213 2213 bridge_recv_cb(mac_handle_t mh, mac_resource_handle_t rsrc, mblk_t *mpnext)
2214 2214 {
2215 2215 mblk_t *mp, *mpcopy;
2216 2216 bridge_link_t *blp = (bridge_link_t *)mh;
2217 2217 bridge_inst_t *bip = blp->bl_inst;
2218 2218 bridge_mac_t *bmp = bip->bi_mac;
2219 2219 mac_header_info_t hdr_info;
2220 2220 uint16_t vlanid, tci;
2221 2221 boolean_t trillmode = B_FALSE;
2222 2222
2223 2223 KIINCR(bki_recv);
2224 2224 KLINCR(bkl_recv);
2225 2225
2226 2226 /*
2227 2227 * Regardless of state, check for inbound TRILL packets when TRILL is
2228 2228 * active. These are pulled out of band and sent for TRILL handling.
2229 2229 */
2230 2230 if (blp->bl_trilldata != NULL) {
2231 2231 void *tdp;
2232 2232 mblk_t *newhead;
2233 2233 mblk_t *tail = NULL;
2234 2234
2235 2235 mutex_enter(&blp->bl_trilllock);
2236 2236 if ((tdp = blp->bl_trilldata) != NULL) {
2237 2237 blp->bl_trillthreads++;
2238 2238 mutex_exit(&blp->bl_trilllock);
2239 2239 trillmode = B_TRUE;
2240 2240 newhead = mpnext;
2241 2241 while ((mp = mpnext) != NULL) {
2242 2242 boolean_t raw_isis, bridge_group;
2243 2243
2244 2244 mpnext = mp->b_next;
2245 2245
2246 2246 /*
2247 2247 * If the header isn't readable, then leave on
2248 2248 * the list and continue.
2249 2249 */
2250 2250 if (mac_header_info(blp->bl_mh, mp,
2251 2251 &hdr_info) != 0) {
2252 2252 tail = mp;
2253 2253 continue;
2254 2254 }
2255 2255
2256 2256 /*
2257 2257 * The TRILL document specifies that, on
2258 2258 * Ethernet alone, IS-IS packets arrive with
2259 2259 * LLC rather than Ethertype, and using a
2260 2260 * specific destination address. We must check
2261 2261 * for that here. Also, we need to give BPDUs
2262 2262 * to TRILL for processing.
2263 2263 */
2264 2264 raw_isis = bridge_group = B_FALSE;
2265 2265 if (hdr_info.mhi_dsttype ==
2266 2266 MAC_ADDRTYPE_MULTICAST) {
2267 2267 if (memcmp(hdr_info.mhi_daddr,
2268 2268 all_isis_rbridges, ETHERADDRL) == 0)
2269 2269 raw_isis = B_TRUE;
2270 2270 else if (memcmp(hdr_info.mhi_daddr,
2271 2271 bridge_group_address, ETHERADDRL) ==
2272 2272 0)
2273 2273 bridge_group = B_TRUE;
2274 2274 }
2275 2275 if (!raw_isis && !bridge_group &&
2276 2276 hdr_info.mhi_bindsap != ETHERTYPE_TRILL &&
2277 2277 (hdr_info.mhi_bindsap != ETHERTYPE_VLAN ||
2278 2278 /* LINTED: alignment */
2279 2279 ((struct ether_vlan_header *)mp->b_rptr)->
2280 2280 ether_type != htons(ETHERTYPE_TRILL))) {
2281 2281 tail = mp;
2282 2282 continue;
2283 2283 }
2284 2284
2285 2285 /*
2286 2286 * We've got TRILL input. Remove from the list
2287 2287 * and send up through the TRILL module. (Send
2288 2288 * a copy through promiscuous receive just to
2289 2289 * support snooping on TRILL. Order isn't
2290 2290 * preserved strictly, but that doesn't matter
2291 2291 * here.)
2292 2292 */
2293 2293 if (tail != NULL)
2294 2294 tail->b_next = mpnext;
2295 2295 mp->b_next = NULL;
2296 2296 if (mp == newhead)
2297 2297 newhead = mpnext;
2298 2298 mac_trill_snoop(blp->bl_mh, mp);
2299 2299 update_header(mp, &hdr_info, B_TRUE);
2300 2300 /*
2301 2301 * On raw IS-IS and BPDU frames, we have to
2302 2302 * make sure that the length is trimmed
2303 2303 * properly. We use origsap in order to cope
2304 2304 * with jumbograms for IS-IS. (Regular mac
2305 2305 * can't.)
2306 2306 */
2307 2307 if (raw_isis || bridge_group) {
2308 2308 size_t msglen = msgdsize(mp);
2309 2309
2310 2310 if (msglen > hdr_info.mhi_origsap) {
2311 2311 (void) adjmsg(mp,
2312 2312 hdr_info.mhi_origsap -
2313 2313 msglen);
2314 2314 } else if (msglen <
2315 2315 hdr_info.mhi_origsap) {
2316 2316 freemsg(mp);
2317 2317 continue;
2318 2318 }
2319 2319 }
2320 2320 trill_recv_fn(tdp, blp, rsrc, mp, &hdr_info);
2321 2321 }
2322 2322 mpnext = newhead;
2323 2323 mutex_enter(&blp->bl_trilllock);
2324 2324 if (--blp->bl_trillthreads == 0 &&
2325 2325 blp->bl_trilldata == NULL)
2326 2326 cv_broadcast(&blp->bl_trillwait);
2327 2327 }
2328 2328 mutex_exit(&blp->bl_trilllock);
2329 2329 if (mpnext == NULL)
2330 2330 return;
2331 2331 }
2332 2332
2333 2333 /*
2334 2334 * If this is a TRILL RBridge, then just check whether this link is
2335 2335 * used at all for forwarding. If not, then we're done.
2336 2336 */
2337 2337 if (trillmode) {
2338 2338 if (!(blp->bl_flags & BLF_TRILLACTIVE) ||
2339 2339 (blp->bl_flags & BLF_SDUFAIL)) {
2340 2340 mac_rx_common(blp->bl_mh, rsrc, mpnext);
2341 2341 return;
2342 2342 }
2343 2343 } else {
2344 2344 /*
2345 2345 * For regular (STP) bridges, if we're in blocking or listening
2346 2346 * state, then do nothing. We don't learn or forward until
2347 2347 * told to do so.
2348 2348 */
2349 2349 if (blp->bl_state == BLS_BLOCKLISTEN) {
2350 2350 mac_rx_common(blp->bl_mh, rsrc, mpnext);
2351 2351 return;
2352 2352 }
2353 2353 }
2354 2354
2355 2355 /*
2356 2356 * Send a copy of the message chain up to the observability node users.
2357 2357 * For TRILL, we must obey the VLAN AF rules, so we go packet-by-
2358 2358 * packet.
2359 2359 */
2360 2360 if (!trillmode && blp->bl_state == BLS_FORWARDING &&
2361 2361 (bmp->bm_flags & BMF_STARTED) &&
2362 2362 (mp = copymsgchain(mpnext)) != NULL) {
2363 2363 mac_rx(bmp->bm_mh, NULL, mp);
2364 2364 }
2365 2365
2366 2366 /*
2367 2367 * We must be in learning or forwarding state, or using TRILL on a link
2368 2368 * with one or more VLANs active. For each packet in the list, process
2369 2369 * the source address, and then attempt to forward.
2370 2370 */
2371 2371 while ((mp = mpnext) != NULL) {
2372 2372 mpnext = mp->b_next;
2373 2373 mp->b_next = NULL;
2374 2374
2375 2375 /*
2376 2376 * If we can't decode the header or if the header specifies a
2377 2377 * multicast source address (impossible!), then don't bother
2378 2378 * learning or forwarding, but go ahead and forward up the
2379 2379 * stack for subsequent processing.
2380 2380 */
2381 2381 if (mac_header_info(blp->bl_mh, mp, &hdr_info) != 0 ||
2382 2382 (hdr_info.mhi_saddr[0] & 1) != 0) {
2383 2383 KIINCR(bki_drops);
2384 2384 KLINCR(bkl_drops);
2385 2385 mac_rx_common(blp->bl_mh, rsrc, mp);
2386 2386 continue;
2387 2387 }
2388 2388
2389 2389 /*
2390 2390 * Extract and validate the VLAN ID for this packet.
2391 2391 */
2392 2392 if (!bridge_get_vlan(blp, &hdr_info, mp, &vlanid, &tci) ||
2393 2393 !BRIDGE_AF_ISSET(blp, vlanid)) {
2394 2394 mac_rx_common(blp->bl_mh, rsrc, mp);
2395 2395 continue;
2396 2396 }
2397 2397
2398 2398 if (trillmode) {
2399 2399 /*
2400 2400 * Special test required by TRILL document: must
2401 2401 * discard frames with outer address set to ESADI.
2402 2402 */
2403 2403 if (memcmp(hdr_info.mhi_daddr, all_esadi_rbridges,
2404 2404 ETHERADDRL) == 0) {
2405 2405 mac_rx_common(blp->bl_mh, rsrc, mp);
2406 2406 continue;
2407 2407 }
2408 2408
2409 2409 /*
2410 2410 * If we're in TRILL mode, then the call above to get
2411 2411 * the VLAN ID has also checked that we're the
2412 2412 * appointed forwarder, so report that we're handling
2413 2413 * this packet to any observability node users.
2414 2414 */
2415 2415 if ((bmp->bm_flags & BMF_STARTED) &&
2416 2416 (mpcopy = copymsg(mp)) != NULL)
2417 2417 mac_rx(bmp->bm_mh, NULL, mpcopy);
2418 2418 }
2419 2419
2420 2420 /*
2421 2421 * First process the source address and learn from it. For
2422 2422 * TRILL, we learn only if we're the appointed forwarder.
2423 2423 */
2424 2424 bridge_learn(blp, hdr_info.mhi_saddr, RBRIDGE_NICKNAME_NONE,
2425 2425 vlanid);
2426 2426
2427 2427 /*
2428 2428 * Now check whether we're forwarding and look up the
2429 2429 * destination. If we can forward, do so.
2430 2430 */
2431 2431 if (trillmode || blp->bl_state == BLS_FORWARDING) {
2432 2432 mp = bridge_forward(blp, &hdr_info, mp, vlanid, tci,
2433 2433 B_FALSE, B_FALSE);
2434 2434 }
2435 2435 if (mp != NULL)
2436 2436 mac_rx_common(blp->bl_mh, rsrc, mp);
2437 2437 }
2438 2438 }
2439 2439
2440 2440
2441 2441 /* ARGSUSED */
2442 2442 static mblk_t *
2443 2443 bridge_xmit_cb(mac_handle_t mh, mac_ring_handle_t rh, mblk_t *mpnext)
2444 2444 {
2445 2445 bridge_link_t *blp = (bridge_link_t *)mh;
2446 2446 bridge_inst_t *bip = blp->bl_inst;
2447 2447 bridge_mac_t *bmp = bip->bi_mac;
2448 2448 mac_header_info_t hdr_info;
2449 2449 uint16_t vlanid, tci;
2450 2450 mblk_t *mp, *mpcopy;
2451 2451 boolean_t trillmode;
2452 2452
2453 2453 trillmode = blp->bl_trilldata != NULL;
2454 2454
2455 2455 /*
2456 2456 * If we're using STP and we're in blocking or listening state, or if
2457 2457 * we're using TRILL and no VLANs are active, then behave as though the
2458 2458 * bridge isn't here at all, and send on the local link alone.
2459 2459 */
2460 2460 if ((!trillmode && blp->bl_state == BLS_BLOCKLISTEN) ||
2461 2461 (trillmode &&
2462 2462 (!(blp->bl_flags & BLF_TRILLACTIVE) ||
2463 2463 (blp->bl_flags & BLF_SDUFAIL)))) {
2464 2464 KIINCR(bki_sent);
2465 2465 KLINCR(bkl_xmit);
2466 2466 MAC_RING_TX(blp->bl_mh, rh, mpnext, mp);
2467 2467 return (mp);
2468 2468 }
2469 2469
2470 2470 /*
2471 2471 * Send a copy of the message up to the observability node users.
2472 2472 * TRILL needs to check on a packet-by-packet basis.
2473 2473 */
2474 2474 if (!trillmode && blp->bl_state == BLS_FORWARDING &&
2475 2475 (bmp->bm_flags & BMF_STARTED) &&
2476 2476 (mp = copymsgchain(mpnext)) != NULL) {
2477 2477 mac_rx(bmp->bm_mh, NULL, mp);
2478 2478 }
2479 2479
2480 2480 while ((mp = mpnext) != NULL) {
2481 2481 mpnext = mp->b_next;
2482 2482 mp->b_next = NULL;
2483 2483
2484 2484 if (mac_header_info(blp->bl_mh, mp, &hdr_info) != 0) {
2485 2485 freemsg(mp);
2486 2486 continue;
2487 2487 }
2488 2488
2489 2489 /*
2490 2490 * Extract and validate the VLAN ID for this packet.
2491 2491 */
2492 2492 if (!bridge_get_vlan(blp, &hdr_info, mp, &vlanid, &tci) ||
2493 2493 !BRIDGE_AF_ISSET(blp, vlanid)) {
2494 2494 freemsg(mp);
2495 2495 continue;
2496 2496 }
2497 2497
2498 2498 /*
2499 2499 * If we're using TRILL, then we've now validated that we're
2500 2500 * the forwarder for this VLAN, so go ahead and let
2501 2501 * observability node users know about the packet.
2502 2502 */
2503 2503 if (trillmode && (bmp->bm_flags & BMF_STARTED) &&
2504 2504 (mpcopy = copymsg(mp)) != NULL) {
2505 2505 mac_rx(bmp->bm_mh, NULL, mpcopy);
2506 2506 }
2507 2507
2508 2508 /*
2509 2509 * We have to learn from our own transmitted packets, because
2510 2510 * there may be a Solaris DLPI raw sender (who can specify his
2511 2511 * own source address) using promiscuous mode for receive. The
2512 2512 * mac layer information won't (and can't) tell us everything
2513 2513 * we need to know.
2514 2514 */
2515 2515 bridge_learn(blp, hdr_info.mhi_saddr, RBRIDGE_NICKNAME_NONE,
2516 2516 vlanid);
2517 2517
2518 2518 /* attempt forwarding */
2519 2519 if (trillmode || blp->bl_state == BLS_FORWARDING) {
2520 2520 mp = bridge_forward(blp, &hdr_info, mp, vlanid, tci,
2521 2521 B_FALSE, B_TRUE);
2522 2522 }
2523 2523 if (mp != NULL) {
2524 2524 MAC_RING_TX(blp->bl_mh, rh, mp, mp);
2525 2525 if (mp == NULL) {
2526 2526 KIINCR(bki_sent);
2527 2527 KLINCR(bkl_xmit);
2528 2528 }
2529 2529 }
2530 2530 /*
2531 2531 * If we get stuck, then stop. Don't let the user's output
2532 2532 * packets get out of order. (More importantly: don't try to
2533 2533 * bridge the same packet multiple times if flow control is
2534 2534 * asserted.)
2535 2535 */
2536 2536 if (mp != NULL) {
2537 2537 mp->b_next = mpnext;
2538 2538 break;
2539 2539 }
2540 2540 }
2541 2541 return (mp);
2542 2542 }
2543 2543
2544 2544 /*
2545 2545 * This is called by TRILL when it decapsulates an packet, and we must forward
2546 2546 * locally. On failure, we just drop.
2547 2547 *
2548 2548 * Note that the ingress_nick reported by TRILL must not represent this local
2549 2549 * node.
2550 2550 */
2551 2551 void
2552 2552 bridge_trill_decaps(bridge_link_t *blp, mblk_t *mp, uint16_t ingress_nick)
2553 2553 {
2554 2554 mac_header_info_t hdr_info;
2555 2555 uint16_t vlanid, tci;
2556 2556 bridge_inst_t *bip = blp->bl_inst; /* used by macros */
2557 2557 mblk_t *mpcopy;
2558 2558
2559 2559 if (mac_header_info(blp->bl_mh, mp, &hdr_info) != 0) {
2560 2560 freemsg(mp);
2561 2561 return;
2562 2562 }
2563 2563
2564 2564 /* Extract VLAN ID for this packet. */
2565 2565 if (hdr_info.mhi_bindsap == ETHERTYPE_VLAN) {
2566 2566 struct ether_vlan_header *evhp;
2567 2567
2568 2568 /* LINTED: alignment */
2569 2569 evhp = (struct ether_vlan_header *)mp->b_rptr;
2570 2570 tci = ntohs(evhp->ether_tci);
2571 2571 vlanid = VLAN_ID(tci);
2572 2572 } else {
2573 2573 /* Inner VLAN headers are required in TRILL data packets */
2574 2574 DTRACE_PROBE3(bridge__trill__decaps__novlan, bridge_link_t *,
2575 2575 blp, mblk_t *, mp, uint16_t, ingress_nick);
2576 2576 freemsg(mp);
2577 2577 return;
2578 2578 }
2579 2579
2580 2580 /* Learn the location of this sender in the RBridge network */
2581 2581 bridge_learn(blp, hdr_info.mhi_saddr, ingress_nick, vlanid);
2582 2582
2583 2583 /* attempt forwarding */
2584 2584 mp = bridge_forward(blp, &hdr_info, mp, vlanid, tci, B_TRUE, B_TRUE);
2585 2585 if (mp != NULL) {
2586 2586 if (bridge_can_send(blp, vlanid)) {
2587 2587 /* Deliver a copy locally as well */
2588 2588 if ((mpcopy = copymsg(mp)) != NULL)
2589 2589 mac_rx_common(blp->bl_mh, NULL, mpcopy);
2590 2590 MAC_RING_TX(blp->bl_mh, NULL, mp, mp);
2591 2591 }
2592 2592 if (mp == NULL) {
2593 2593 KIINCR(bki_sent);
2594 2594 KLINCR(bkl_xmit);
2595 2595 } else {
2596 2596 freemsg(mp);
2597 2597 }
2598 2598 }
2599 2599 }
2600 2600
2601 2601 /*
2602 2602 * This function is used by TRILL _only_ to transmit TRILL-encapsulated
2603 2603 * packets. It sends on a single underlying link and does not bridge.
2604 2604 */
2605 2605 mblk_t *
2606 2606 bridge_trill_output(bridge_link_t *blp, mblk_t *mp)
2607 2607 {
2608 2608 bridge_inst_t *bip = blp->bl_inst; /* used by macros */
2609 2609
2610 2610 mac_trill_snoop(blp->bl_mh, mp);
2611 2611 MAC_RING_TX(blp->bl_mh, NULL, mp, mp);
2612 2612 if (mp == NULL) {
2613 2613 KIINCR(bki_sent);
2614 2614 KLINCR(bkl_xmit);
2615 2615 }
2616 2616 return (mp);
2617 2617 }
2618 2618
2619 2619 /*
2620 2620 * Set the "appointed forwarder" flag array for this link. TRILL controls
2621 2621 * forwarding on a VLAN basis. The "trillactive" flag is an optimization for
2622 2622 * the forwarder.
2623 2623 */
2624 2624 void
2625 2625 bridge_trill_setvlans(bridge_link_t *blp, const uint8_t *arr)
2626 2626 {
2627 2627 int i;
2628 2628 uint_t newflags = 0;
2629 2629
2630 2630 for (i = 0; i < BRIDGE_VLAN_ARR_SIZE; i++) {
2631 2631 if ((blp->bl_afs[i] = arr[i]) != 0)
2632 2632 newflags = BLF_TRILLACTIVE;
2633 2633 }
2634 2634 blp->bl_flags = (blp->bl_flags & ~BLF_TRILLACTIVE) | newflags;
2635 2635 }
2636 2636
2637 2637 void
2638 2638 bridge_trill_flush(bridge_link_t *blp, uint16_t vlan, boolean_t dotrill)
2639 2639 {
2640 2640 bridge_inst_t *bip = blp->bl_inst;
2641 2641 bridge_fwd_t *bfp, *bfnext;
2642 2642 avl_tree_t fwd_scavenge;
2643 2643 int i;
2644 2644
2645 2645 _NOTE(ARGUNUSED(vlan));
2646 2646
2647 2647 avl_create(&fwd_scavenge, fwd_compare, sizeof (bridge_fwd_t),
2648 2648 offsetof(bridge_fwd_t, bf_node));
2649 2649 rw_enter(&bip->bi_rwlock, RW_WRITER);
2650 2650 bfnext = avl_first(&bip->bi_fwd);
2651 2651 while ((bfp = bfnext) != NULL) {
2652 2652 bfnext = AVL_NEXT(&bip->bi_fwd, bfp);
2653 2653 if (bfp->bf_flags & BFF_LOCALADDR)
2654 2654 continue;
2655 2655 if (dotrill) {
2656 2656 /* port doesn't matter if we're flushing TRILL */
2657 2657 if (bfp->bf_trill_nick == RBRIDGE_NICKNAME_NONE)
2658 2658 continue;
2659 2659 } else {
2660 2660 if (bfp->bf_trill_nick != RBRIDGE_NICKNAME_NONE)
2661 2661 continue;
2662 2662 for (i = 0; i < bfp->bf_nlinks; i++) {
2663 2663 if (bfp->bf_links[i] == blp)
2664 2664 break;
2665 2665 }
2666 2666 if (i >= bfp->bf_nlinks)
2667 2667 continue;
2668 2668 }
2669 2669 ASSERT(bfp->bf_flags & BFF_INTREE);
2670 2670 avl_remove(&bip->bi_fwd, bfp);
2671 2671 bfp->bf_flags &= ~BFF_INTREE;
2672 2672 avl_add(&fwd_scavenge, bfp);
2673 2673 }
2674 2674 rw_exit(&bip->bi_rwlock);
2675 2675 bfnext = avl_first(&fwd_scavenge);
2676 2676 while ((bfp = bfnext) != NULL) {
2677 2677 bfnext = AVL_NEXT(&fwd_scavenge, bfp);
2678 2678 avl_remove(&fwd_scavenge, bfp);
2679 2679 fwd_unref(bfp);
2680 2680 }
2681 2681 avl_destroy(&fwd_scavenge);
2682 2682 }
2683 2683
2684 2684 /*
2685 2685 * Let the mac module take or drop a reference to a bridge link. When this is
2686 2686 * called, the mac module is holding the mi_bridge_lock, so the link cannot be
2687 2687 * in the process of entering or leaving a bridge.
2688 2688 */
2689 2689 static void
2690 2690 bridge_ref_cb(mac_handle_t mh, boolean_t hold)
2691 2691 {
2692 2692 bridge_link_t *blp = (bridge_link_t *)mh;
2693 2693
2694 2694 if (hold)
2695 2695 atomic_inc_uint(&blp->bl_refs);
2696 2696 else
2697 2697 link_unref(blp);
2698 2698 }
2699 2699
2700 2700 /*
2701 2701 * Handle link state changes reported by the mac layer. This acts as a filter
2702 2702 * for link state changes: if a link is reporting down, but there are other
2703 2703 * links still up on the bridge, then the state is changed to "up." When the
2704 2704 * last link goes down, all are marked down, and when the first link goes up,
2705 2705 * all are marked up. (Recursion is avoided by the use of the "redo" function.)
2706 2706 *
2707 2707 * We treat unknown as equivalent to "up."
2708 2708 */
2709 2709 static link_state_t
2710 2710 bridge_ls_cb(mac_handle_t mh, link_state_t newls)
2711 2711 {
2712 2712 bridge_link_t *blp = (bridge_link_t *)mh;
2713 2713 bridge_link_t *blcmp;
2714 2714 bridge_inst_t *bip;
2715 2715 bridge_mac_t *bmp;
2716 2716
2717 2717 if (newls != LINK_STATE_DOWN && blp->bl_linkstate != LINK_STATE_DOWN ||
2718 2718 (blp->bl_flags & (BLF_DELETED|BLF_SDUFAIL))) {
2719 2719 blp->bl_linkstate = newls;
2720 2720 return (newls);
2721 2721 }
2722 2722
2723 2723 /*
2724 2724 * Scan first to see if there are any other non-down links. If there
2725 2725 * are, then we're done. Otherwise, if all others are down, then the
2726 2726 * state of this link is the state of the bridge.
2727 2727 */
2728 2728 bip = blp->bl_inst;
2729 2729 rw_enter(&bip->bi_rwlock, RW_WRITER);
2730 2730 for (blcmp = list_head(&bip->bi_links); blcmp != NULL;
2731 2731 blcmp = list_next(&bip->bi_links, blcmp)) {
2732 2732 if (blcmp != blp &&
2733 2733 !(blcmp->bl_flags & (BLF_DELETED|BLF_SDUFAIL)) &&
2734 2734 blcmp->bl_linkstate != LINK_STATE_DOWN)
2735 2735 break;
2736 2736 }
2737 2737
2738 2738 if (blcmp != NULL) {
2739 2739 /*
2740 2740 * If there are other links that are considered up, then tell
2741 2741 * the caller that the link is actually still up, regardless of
2742 2742 * this link's underlying state.
2743 2743 */
2744 2744 blp->bl_linkstate = newls;
2745 2745 newls = LINK_STATE_UP;
2746 2746 } else if (blp->bl_linkstate != newls) {
2747 2747 /*
2748 2748 * If we've found no other 'up' links, and this link has
2749 2749 * changed state, then report the new state of the bridge to
2750 2750 * all other clients.
2751 2751 */
2752 2752 blp->bl_linkstate = newls;
2753 2753 for (blcmp = list_head(&bip->bi_links); blcmp != NULL;
2754 2754 blcmp = list_next(&bip->bi_links, blcmp)) {
2755 2755 if (blcmp != blp && !(blcmp->bl_flags & BLF_DELETED))
2756 2756 mac_link_redo(blcmp->bl_mh, newls);
2757 2757 }
2758 2758 bmp = bip->bi_mac;
2759 2759 if ((bmp->bm_linkstate = newls) != LINK_STATE_DOWN)
2760 2760 bmp->bm_linkstate = LINK_STATE_UP;
2761 2761 mac_link_redo(bmp->bm_mh, bmp->bm_linkstate);
2762 2762 }
2763 2763 rw_exit(&bip->bi_rwlock);
2764 2764 return (newls);
2765 2765 }
2766 2766
2767 2767 static void
2768 2768 bridge_add_link(void *arg)
2769 2769 {
2770 2770 mblk_t *mp = arg;
2771 2771 bridge_stream_t *bsp;
2772 2772 bridge_inst_t *bip, *bipt;
2773 2773 bridge_mac_t *bmp;
2774 2774 datalink_id_t linkid;
2775 2775 int err;
2776 2776 mac_handle_t mh;
2777 2777 uint_t maxsdu;
2778 2778 bridge_link_t *blp = NULL, *blpt;
2779 2779 const mac_info_t *mip;
2780 2780 boolean_t macopen = B_FALSE;
2781 2781 char linkname[MAXLINKNAMELEN];
2782 2782 char kstatname[KSTAT_STRLEN];
2783 2783 int i;
2784 2784 link_state_t linkstate;
2785 2785 mblk_t *mlist;
2786 2786
2787 2787 bsp = (bridge_stream_t *)mp->b_next;
2788 2788 mp->b_next = NULL;
2789 2789 bip = bsp->bs_inst;
2790 2790 /* LINTED: alignment */
2791 2791 linkid = *(datalink_id_t *)mp->b_cont->b_rptr;
2792 2792
2793 2793 /*
2794 2794 * First make sure that there is no other bridge that has this link.
2795 2795 * We don't want to overlap operations from two bridges; the MAC layer
2796 2796 * supports only one bridge on a given MAC at a time.
2797 2797 *
2798 2798 * We rely on the fact that there's just one taskq thread for the
2799 2799 * bridging module: once we've checked for a duplicate, we can drop the
2800 2800 * lock, because no other thread could possibly be adding another link
2801 2801 * until we're done.
2802 2802 */
2803 2803 mutex_enter(&inst_lock);
2804 2804 for (bipt = list_head(&inst_list); bipt != NULL;
2805 2805 bipt = list_next(&inst_list, bipt)) {
2806 2806 rw_enter(&bipt->bi_rwlock, RW_READER);
2807 2807 for (blpt = list_head(&bipt->bi_links); blpt != NULL;
2808 2808 blpt = list_next(&bipt->bi_links, blpt)) {
2809 2809 if (linkid == blpt->bl_linkid)
2810 2810 break;
2811 2811 }
2812 2812 rw_exit(&bipt->bi_rwlock);
2813 2813 if (blpt != NULL)
2814 2814 break;
2815 2815 }
2816 2816 mutex_exit(&inst_lock);
2817 2817 if (bipt != NULL) {
2818 2818 err = EBUSY;
2819 2819 goto fail;
2820 2820 }
2821 2821
2822 2822 if ((err = mac_open_by_linkid(linkid, &mh)) != 0)
2823 2823 goto fail;
2824 2824 macopen = B_TRUE;
2825 2825
2826 2826 /* we bridge only Ethernet */
2827 2827 mip = mac_info(mh);
2828 2828 if (mip->mi_media != DL_ETHER) {
2829 2829 err = ENOTSUP;
2830 2830 goto fail;
2831 2831 }
2832 2832
2833 2833 /*
2834 2834 * Get the current maximum SDU on this interface. If there are other
2835 2835 * links on the bridge, then this one must match, or it errors out.
2836 2836 * Otherwise, the first link becomes the standard for the new bridge.
2837 2837 */
2838 2838 mac_sdu_get(mh, NULL, &maxsdu);
2839 2839 bmp = bip->bi_mac;
2840 2840 if (list_is_empty(&bip->bi_links)) {
2841 2841 bmp->bm_maxsdu = maxsdu;
2842 2842 (void) mac_maxsdu_update(bmp->bm_mh, maxsdu);
2843 2843 }
2844 2844
2845 2845 /* figure the kstat name; also used as the mac client name */
2846 2846 i = MBLKL(mp->b_cont) - sizeof (datalink_id_t);
2847 2847 if (i < 0 || i >= MAXLINKNAMELEN)
2848 2848 i = MAXLINKNAMELEN - 1;
2849 2849 bcopy(mp->b_cont->b_rptr + sizeof (datalink_id_t), linkname, i);
2850 2850 linkname[i] = '\0';
2851 2851 (void) snprintf(kstatname, sizeof (kstatname), "%s-%s", bip->bi_name,
2852 2852 linkname);
2853 2853
2854 2854 if ((blp = kmem_zalloc(sizeof (*blp), KM_NOSLEEP)) == NULL) {
2855 2855 err = ENOMEM;
2856 2856 goto fail;
2857 2857 }
2858 2858 blp->bl_lfailmp = allocb(sizeof (bridge_ctl_t), BPRI_MED);
2859 2859 if (blp->bl_lfailmp == NULL) {
2860 2860 kmem_free(blp, sizeof (*blp));
2861 2861 blp = NULL;
2862 2862 err = ENOMEM;
2863 2863 goto fail;
2864 2864 }
2865 2865
2866 2866 blp->bl_refs = 1;
2867 2867 atomic_inc_uint(&bip->bi_refs);
2868 2868 blp->bl_inst = bip;
2869 2869 blp->bl_mh = mh;
2870 2870 blp->bl_linkid = linkid;
2871 2871 blp->bl_maxsdu = maxsdu;
2872 2872 cv_init(&blp->bl_trillwait, NULL, CV_DRIVER, NULL);
2873 2873 mutex_init(&blp->bl_trilllock, NULL, MUTEX_DRIVER, NULL);
2874 2874 (void) memset(blp->bl_afs, 0xff, sizeof (blp->bl_afs));
2875 2875
2876 2876 err = mac_client_open(mh, &blp->bl_mch, kstatname, 0);
2877 2877 if (err != 0)
2878 2878 goto fail;
2879 2879 blp->bl_flags |= BLF_CLIENT_OPEN;
2880 2880
2881 2881 err = mac_margin_add(mh, &blp->bl_margin, B_TRUE);
2882 2882 if (err != 0)
2883 2883 goto fail;
2884 2884 blp->bl_flags |= BLF_MARGIN_ADDED;
2885 2885
2886 2886 blp->bl_mnh = mac_notify_add(mh, bridge_notify_cb, blp);
2887 2887
2888 2888 /* Enable Bridging on the link */
2889 2889 err = mac_bridge_set(mh, (mac_handle_t)blp);
2890 2890 if (err != 0)
2891 2891 goto fail;
2892 2892 blp->bl_flags |= BLF_SET_BRIDGE;
2893 2893
2894 2894 err = mac_promisc_add(blp->bl_mch, MAC_CLIENT_PROMISC_ALL, NULL,
2895 2895 blp, &blp->bl_mphp, MAC_PROMISC_FLAGS_NO_TX_LOOP);
2896 2896 if (err != 0)
2897 2897 goto fail;
2898 2898 blp->bl_flags |= BLF_PROM_ADDED;
2899 2899
2900 2900 bridge_new_unicst(blp);
2901 2901
2902 2902 blp->bl_ksp = kstat_setup((kstat_named_t *)&blp->bl_kstats,
2903 2903 link_kstats_list, Dim(link_kstats_list), kstatname);
2904 2904
2905 2905 /*
2906 2906 * The link holds a reference to the bridge instance, so that the
2907 2907 * instance can't go away before the link is freed. The insertion into
2908 2908 * bi_links holds a reference on the link (reference set to 1 above).
2909 2909 * When marking as removed from bi_links (BLF_DELETED), drop the
2910 2910 * reference on the link. When freeing the link, drop the reference on
2911 2911 * the instance. BLF_LINK_ADDED tracks link insertion in bi_links list.
2912 2912 */
2913 2913 rw_enter(&bip->bi_rwlock, RW_WRITER);
2914 2914 list_insert_tail(&bip->bi_links, blp);
2915 2915 blp->bl_flags |= BLF_LINK_ADDED;
2916 2916
2917 2917 /*
2918 2918 * If the new link is no good on this bridge, then let the daemon know
2919 2919 * about the problem.
2920 2920 */
2921 2921 mlist = NULL;
2922 2922 if (maxsdu != bmp->bm_maxsdu)
2923 2923 link_sdu_fail(blp, B_TRUE, &mlist);
2924 2924 rw_exit(&bip->bi_rwlock);
2925 2925 send_up_messages(bip, mlist);
2926 2926
2927 2927 /*
2928 2928 * Trigger a link state update so that if this link is the first one
2929 2929 * "up" in the bridge, then we notify everyone. This triggers a trip
2930 2930 * through bridge_ls_cb.
2931 2931 */
2932 2932 linkstate = mac_stat_get(mh, MAC_STAT_LOWLINK_STATE);
2933 2933 blp->bl_linkstate = LINK_STATE_DOWN;
2934 2934 mac_link_update(mh, linkstate);
2935 2935
2936 2936 /*
2937 2937 * We now need to report back to the stream that invoked us, and then
2938 2938 * drop the reference on the stream that we're holding.
2939 2939 */
2940 2940 miocack(bsp->bs_wq, mp, 0, 0);
2941 2941 stream_unref(bsp);
2942 2942 return;
2943 2943
2944 2944 fail:
2945 2945 if (blp == NULL) {
2946 2946 if (macopen)
2947 2947 mac_close(mh);
2948 2948 } else {
2949 2949 link_shutdown(blp);
2950 2950 }
2951 2951 miocnak(bsp->bs_wq, mp, 0, err);
2952 2952 stream_unref(bsp);
2953 2953 }
2954 2954
2955 2955 static void
2956 2956 bridge_rem_link(void *arg)
2957 2957 {
2958 2958 mblk_t *mp = arg;
2959 2959 bridge_stream_t *bsp;
2960 2960 bridge_inst_t *bip;
2961 2961 bridge_mac_t *bmp;
2962 2962 datalink_id_t linkid;
2963 2963 bridge_link_t *blp, *blsave;
2964 2964 boolean_t found;
2965 2965 mblk_t *mlist;
2966 2966
2967 2967 bsp = (bridge_stream_t *)mp->b_next;
2968 2968 mp->b_next = NULL;
2969 2969 bip = bsp->bs_inst;
2970 2970 /* LINTED: alignment */
2971 2971 linkid = *(datalink_id_t *)mp->b_cont->b_rptr;
2972 2972
2973 2973 /*
2974 2974 * We become reader here so that we can loop over the other links and
2975 2975 * deliver link up/down notification.
2976 2976 */
2977 2977 rw_enter(&bip->bi_rwlock, RW_READER);
2978 2978 found = B_FALSE;
2979 2979 for (blp = list_head(&bip->bi_links); blp != NULL;
2980 2980 blp = list_next(&bip->bi_links, blp)) {
2981 2981 if (blp->bl_linkid == linkid &&
2982 2982 !(blp->bl_flags & BLF_DELETED)) {
2983 2983 blp->bl_flags |= BLF_DELETED;
2984 2984 (void) ddi_taskq_dispatch(bridge_taskq, link_shutdown,
2985 2985 blp, DDI_SLEEP);
2986 2986 found = B_TRUE;
2987 2987 break;
2988 2988 }
2989 2989 }
2990 2990
2991 2991 /*
2992 2992 * Check if this link is up and the remainder of the links are all
2993 2993 * down.
2994 2994 */
2995 2995 if (blp != NULL && blp->bl_linkstate != LINK_STATE_DOWN) {
2996 2996 for (blp = list_head(&bip->bi_links); blp != NULL;
2997 2997 blp = list_next(&bip->bi_links, blp)) {
2998 2998 if (blp->bl_linkstate != LINK_STATE_DOWN &&
2999 2999 !(blp->bl_flags & (BLF_DELETED|BLF_SDUFAIL)))
3000 3000 break;
3001 3001 }
3002 3002 if (blp == NULL) {
3003 3003 for (blp = list_head(&bip->bi_links); blp != NULL;
3004 3004 blp = list_next(&bip->bi_links, blp)) {
3005 3005 if (!(blp->bl_flags & BLF_DELETED))
3006 3006 mac_link_redo(blp->bl_mh,
3007 3007 LINK_STATE_DOWN);
3008 3008 }
3009 3009 bmp = bip->bi_mac;
3010 3010 bmp->bm_linkstate = LINK_STATE_DOWN;
3011 3011 mac_link_redo(bmp->bm_mh, LINK_STATE_DOWN);
3012 3012 }
3013 3013 }
3014 3014
3015 3015 /*
3016 3016 * Check if there's just one working link left on the bridge. If so,
3017 3017 * then that link is now authoritative for bridge MTU.
3018 3018 */
3019 3019 blsave = NULL;
3020 3020 for (blp = list_head(&bip->bi_links); blp != NULL;
3021 3021 blp = list_next(&bip->bi_links, blp)) {
3022 3022 if (!(blp->bl_flags & BLF_DELETED)) {
3023 3023 if (blsave == NULL)
3024 3024 blsave = blp;
3025 3025 else
3026 3026 break;
3027 3027 }
3028 3028 }
3029 3029 mlist = NULL;
3030 3030 bmp = bip->bi_mac;
3031 3031 if (blsave != NULL && blp == NULL &&
3032 3032 blsave->bl_maxsdu != bmp->bm_maxsdu) {
3033 3033 bmp->bm_maxsdu = blsave->bl_maxsdu;
3034 3034 (void) mac_maxsdu_update(bmp->bm_mh, blsave->bl_maxsdu);
3035 3035 link_sdu_fail(blsave, B_FALSE, &mlist);
3036 3036 }
3037 3037 rw_exit(&bip->bi_rwlock);
3038 3038 send_up_messages(bip, mlist);
3039 3039
3040 3040 if (found)
3041 3041 miocack(bsp->bs_wq, mp, 0, 0);
3042 3042 else
3043 3043 miocnak(bsp->bs_wq, mp, 0, ENOENT);
3044 3044 stream_unref(bsp);
3045 3045 }
3046 3046
3047 3047 /*
3048 3048 * This function intentionally returns with bi_rwlock held; it is intended for
3049 3049 * quick checks and updates.
3050 3050 */
3051 3051 static bridge_link_t *
3052 3052 enter_link(bridge_inst_t *bip, datalink_id_t linkid)
3053 3053 {
3054 3054 bridge_link_t *blp;
3055 3055
3056 3056 rw_enter(&bip->bi_rwlock, RW_READER);
3057 3057 for (blp = list_head(&bip->bi_links); blp != NULL;
3058 3058 blp = list_next(&bip->bi_links, blp)) {
3059 3059 if (blp->bl_linkid == linkid && !(blp->bl_flags & BLF_DELETED))
3060 3060 break;
3061 3061 }
3062 3062 return (blp);
3063 3063 }
3064 3064
3065 3065 static void
3066 3066 bridge_ioctl(queue_t *wq, mblk_t *mp)
3067 3067 {
3068 3068 bridge_stream_t *bsp = wq->q_ptr;
3069 3069 bridge_inst_t *bip;
3070 3070 struct iocblk *iop;
3071 3071 int rc = EINVAL;
3072 3072 int len = 0;
3073 3073 bridge_link_t *blp;
3074 3074 cred_t *cr;
3075 3075
3076 3076 /* LINTED: alignment */
3077 3077 iop = (struct iocblk *)mp->b_rptr;
3078 3078
3079 3079 /*
3080 3080 * For now, all of the bridge ioctls are privileged.
3081 3081 */
3082 3082 if ((cr = msg_getcred(mp, NULL)) == NULL)
3083 3083 cr = iop->ioc_cr;
3084 3084 if (cr != NULL && secpolicy_net_config(cr, B_FALSE) != 0) {
3085 3085 miocnak(wq, mp, 0, EPERM);
3086 3086 return;
3087 3087 }
3088 3088
3089 3089 switch (iop->ioc_cmd) {
3090 3090 case BRIOC_NEWBRIDGE: {
3091 3091 bridge_newbridge_t *bnb;
3092 3092
3093 3093 if (bsp->bs_inst != NULL ||
3094 3094 (rc = miocpullup(mp, sizeof (bridge_newbridge_t))) != 0)
3095 3095 break;
3096 3096 /* LINTED: alignment */
3097 3097 bnb = (bridge_newbridge_t *)mp->b_cont->b_rptr;
3098 3098 bnb->bnb_name[MAXNAMELEN-1] = '\0';
3099 3099 rc = bridge_create(bnb->bnb_linkid, bnb->bnb_name, &bip, cr);
3100 3100 if (rc != 0)
3101 3101 break;
3102 3102
3103 3103 rw_enter(&bip->bi_rwlock, RW_WRITER);
3104 3104 if (bip->bi_control != NULL) {
3105 3105 rw_exit(&bip->bi_rwlock);
3106 3106 bridge_unref(bip);
3107 3107 rc = EBUSY;
3108 3108 } else {
3109 3109 atomic_inc_uint(&bip->bi_refs);
3110 3110 bsp->bs_inst = bip; /* stream holds reference */
3111 3111 bip->bi_control = bsp;
3112 3112 rw_exit(&bip->bi_rwlock);
3113 3113 rc = 0;
3114 3114 }
3115 3115 break;
3116 3116 }
3117 3117
3118 3118 case BRIOC_ADDLINK:
3119 3119 if ((bip = bsp->bs_inst) == NULL ||
3120 3120 (rc = miocpullup(mp, sizeof (datalink_id_t))) != 0)
3121 3121 break;
3122 3122 /*
3123 3123 * We cannot perform the action in this thread, because we're
3124 3124 * not in process context, and we may already be holding
3125 3125 * MAC-related locks. Place the request on taskq.
3126 3126 */
3127 3127 mp->b_next = (mblk_t *)bsp;
3128 3128 stream_ref(bsp);
3129 3129 (void) ddi_taskq_dispatch(bridge_taskq, bridge_add_link, mp,
3130 3130 DDI_SLEEP);
3131 3131 return;
3132 3132
3133 3133 case BRIOC_REMLINK:
3134 3134 if ((bip = bsp->bs_inst) == NULL ||
3135 3135 (rc = miocpullup(mp, sizeof (datalink_id_t))) != 0)
3136 3136 break;
3137 3137 /*
3138 3138 * We cannot perform the action in this thread, because we're
3139 3139 * not in process context, and we may already be holding
3140 3140 * MAC-related locks. Place the request on taskq.
3141 3141 */
3142 3142 mp->b_next = (mblk_t *)bsp;
3143 3143 stream_ref(bsp);
3144 3144 (void) ddi_taskq_dispatch(bridge_taskq, bridge_rem_link, mp,
3145 3145 DDI_SLEEP);
3146 3146 return;
3147 3147
3148 3148 case BRIOC_SETSTATE: {
3149 3149 bridge_setstate_t *bss;
3150 3150
3151 3151 if ((bip = bsp->bs_inst) == NULL ||
3152 3152 (rc = miocpullup(mp, sizeof (*bss))) != 0)
3153 3153 break;
3154 3154 /* LINTED: alignment */
3155 3155 bss = (bridge_setstate_t *)mp->b_cont->b_rptr;
3156 3156 if ((blp = enter_link(bip, bss->bss_linkid)) == NULL) {
3157 3157 rc = ENOENT;
3158 3158 } else {
3159 3159 rc = 0;
3160 3160 blp->bl_state = bss->bss_state;
3161 3161 }
3162 3162 rw_exit(&bip->bi_rwlock);
3163 3163 break;
3164 3164 }
3165 3165
3166 3166 case BRIOC_SETPVID: {
3167 3167 bridge_setpvid_t *bsv;
3168 3168
3169 3169 if ((bip = bsp->bs_inst) == NULL ||
3170 3170 (rc = miocpullup(mp, sizeof (*bsv))) != 0)
3171 3171 break;
3172 3172 /* LINTED: alignment */
3173 3173 bsv = (bridge_setpvid_t *)mp->b_cont->b_rptr;
3174 3174 if (bsv->bsv_vlan > VLAN_ID_MAX)
3175 3175 break;
3176 3176 if ((blp = enter_link(bip, bsv->bsv_linkid)) == NULL) {
3177 3177 rc = ENOENT;
3178 3178 } else if (blp->bl_pvid == bsv->bsv_vlan) {
3179 3179 rc = 0;
3180 3180 } else {
3181 3181 rc = 0;
3182 3182 BRIDGE_VLAN_CLR(blp, blp->bl_pvid);
3183 3183 blp->bl_pvid = bsv->bsv_vlan;
3184 3184 if (blp->bl_pvid != 0)
3185 3185 BRIDGE_VLAN_SET(blp, blp->bl_pvid);
3186 3186 }
3187 3187 rw_exit(&bip->bi_rwlock);
3188 3188 break;
3189 3189 }
3190 3190
3191 3191 case BRIOC_VLANENAB: {
3192 3192 bridge_vlanenab_t *bve;
3193 3193
3194 3194 if ((bip = bsp->bs_inst) == NULL ||
3195 3195 (rc = miocpullup(mp, sizeof (*bve))) != 0)
3196 3196 break;
3197 3197 /* LINTED: alignment */
3198 3198 bve = (bridge_vlanenab_t *)mp->b_cont->b_rptr;
3199 3199 if (bve->bve_vlan > VLAN_ID_MAX)
3200 3200 break;
3201 3201 if ((blp = enter_link(bip, bve->bve_linkid)) == NULL) {
3202 3202 rc = ENOENT;
3203 3203 } else {
3204 3204 rc = 0;
3205 3205 /* special case: vlan 0 means "all" */
3206 3206 if (bve->bve_vlan == 0) {
3207 3207 (void) memset(blp->bl_vlans,
3208 3208 bve->bve_onoff ? ~0 : 0,
3209 3209 sizeof (blp->bl_vlans));
3210 3210 BRIDGE_VLAN_CLR(blp, 0);
3211 3211 if (blp->bl_pvid != 0)
3212 3212 BRIDGE_VLAN_SET(blp, blp->bl_pvid);
3213 3213 } else if (bve->bve_vlan == blp->bl_pvid) {
3214 3214 rc = EINVAL;
3215 3215 } else if (bve->bve_onoff) {
3216 3216 BRIDGE_VLAN_SET(blp, bve->bve_vlan);
3217 3217 } else {
3218 3218 BRIDGE_VLAN_CLR(blp, bve->bve_vlan);
3219 3219 }
3220 3220 }
3221 3221 rw_exit(&bip->bi_rwlock);
3222 3222 break;
3223 3223 }
3224 3224
3225 3225 case BRIOC_FLUSHFWD: {
3226 3226 bridge_flushfwd_t *bff;
3227 3227 bridge_fwd_t *bfp, *bfnext;
3228 3228 avl_tree_t fwd_scavenge;
3229 3229 int i;
3230 3230
3231 3231 if ((bip = bsp->bs_inst) == NULL ||
3232 3232 (rc = miocpullup(mp, sizeof (*bff))) != 0)
3233 3233 break;
3234 3234 /* LINTED: alignment */
3235 3235 bff = (bridge_flushfwd_t *)mp->b_cont->b_rptr;
3236 3236 rw_enter(&bip->bi_rwlock, RW_WRITER);
3237 3237 /* This case means "all" */
3238 3238 if (bff->bff_linkid == DATALINK_INVALID_LINKID) {
3239 3239 blp = NULL;
3240 3240 } else {
3241 3241 for (blp = list_head(&bip->bi_links); blp != NULL;
3242 3242 blp = list_next(&bip->bi_links, blp)) {
3243 3243 if (blp->bl_linkid == bff->bff_linkid &&
3244 3244 !(blp->bl_flags & BLF_DELETED))
3245 3245 break;
3246 3246 }
3247 3247 if (blp == NULL) {
3248 3248 rc = ENOENT;
3249 3249 rw_exit(&bip->bi_rwlock);
3250 3250 break;
3251 3251 }
3252 3252 }
3253 3253 avl_create(&fwd_scavenge, fwd_compare, sizeof (bridge_fwd_t),
3254 3254 offsetof(bridge_fwd_t, bf_node));
3255 3255 bfnext = avl_first(&bip->bi_fwd);
3256 3256 while ((bfp = bfnext) != NULL) {
3257 3257 bfnext = AVL_NEXT(&bip->bi_fwd, bfp);
3258 3258 if (bfp->bf_flags & BFF_LOCALADDR)
3259 3259 continue;
3260 3260 if (blp != NULL) {
3261 3261 for (i = 0; i < bfp->bf_maxlinks; i++) {
3262 3262 if (bfp->bf_links[i] == blp)
3263 3263 break;
3264 3264 }
3265 3265 /*
3266 3266 * If the link is there and we're excluding,
3267 3267 * then skip. If the link is not there and
3268 3268 * we're doing only that link, then skip.
3269 3269 */
3270 3270 if ((i < bfp->bf_maxlinks) == bff->bff_exclude)
3271 3271 continue;
3272 3272 }
3273 3273 ASSERT(bfp->bf_flags & BFF_INTREE);
3274 3274 avl_remove(&bip->bi_fwd, bfp);
3275 3275 bfp->bf_flags &= ~BFF_INTREE;
3276 3276 avl_add(&fwd_scavenge, bfp);
3277 3277 }
3278 3278 rw_exit(&bip->bi_rwlock);
3279 3279 bfnext = avl_first(&fwd_scavenge);
3280 3280 while ((bfp = bfnext) != NULL) {
3281 3281 bfnext = AVL_NEXT(&fwd_scavenge, bfp);
3282 3282 avl_remove(&fwd_scavenge, bfp);
3283 3283 fwd_unref(bfp); /* drop tree reference */
3284 3284 }
3285 3285 avl_destroy(&fwd_scavenge);
3286 3286 break;
3287 3287 }
3288 3288
3289 3289 case BRIOC_TABLEMAX:
3290 3290 if ((bip = bsp->bs_inst) == NULL ||
3291 3291 (rc = miocpullup(mp, sizeof (uint32_t))) != 0)
3292 3292 break;
3293 3293 /* LINTED: alignment */
3294 3294 bip->bi_tablemax = *(uint32_t *)mp->b_cont->b_rptr;
3295 3295 break;
3296 3296 }
3297 3297
3298 3298 if (rc == 0)
3299 3299 miocack(wq, mp, len, 0);
3300 3300 else
3301 3301 miocnak(wq, mp, 0, rc);
3302 3302 }
3303 3303
3304 3304 static void
3305 3305 bridge_wput(queue_t *wq, mblk_t *mp)
3306 3306 {
3307 3307 switch (DB_TYPE(mp)) {
3308 3308 case M_IOCTL:
3309 3309 bridge_ioctl(wq, mp);
3310 3310 break;
3311 3311 case M_FLUSH:
3312 3312 if (*mp->b_rptr & FLUSHW)
3313 3313 *mp->b_rptr &= ~FLUSHW;
3314 3314 if (*mp->b_rptr & FLUSHR)
3315 3315 qreply(wq, mp);
3316 3316 else
3317 3317 freemsg(mp);
3318 3318 break;
3319 3319 default:
3320 3320 freemsg(mp);
3321 3321 break;
3322 3322 }
3323 3323 }
3324 3324
3325 3325 /*
3326 3326 * This function allocates the main data structures for the bridge driver and
3327 3327 * connects us into devfs.
3328 3328 */
3329 3329 static void
3330 3330 bridge_inst_init(void)
3331 3331 {
3332 3332 bridge_scan_interval = 5 * drv_usectohz(1000000);
3333 3333 bridge_fwd_age = 25 * drv_usectohz(1000000);
3334 3334
3335 3335 rw_init(&bmac_rwlock, NULL, RW_DRIVER, NULL);
3336 3336 list_create(&bmac_list, sizeof (bridge_mac_t),
3337 3337 offsetof(bridge_mac_t, bm_node));
3338 3338 list_create(&inst_list, sizeof (bridge_inst_t),
3339 3339 offsetof(bridge_inst_t, bi_node));
3340 3340 cv_init(&inst_cv, NULL, CV_DRIVER, NULL);
3341 3341 mutex_init(&inst_lock, NULL, MUTEX_DRIVER, NULL);
3342 3342 cv_init(&stream_ref_cv, NULL, CV_DRIVER, NULL);
3343 3343 mutex_init(&stream_ref_lock, NULL, MUTEX_DRIVER, NULL);
3344 3344
3345 3345 mac_bridge_vectors(bridge_xmit_cb, bridge_recv_cb, bridge_ref_cb,
3346 3346 bridge_ls_cb);
3347 3347 }
3348 3348
3349 3349 /*
3350 3350 * This function disconnects from devfs and destroys all data structures in
3351 3351 * preparation for unload. It's assumed that there are no active bridge
3352 3352 * references left at this point.
3353 3353 */
3354 3354 static void
3355 3355 bridge_inst_fini(void)
3356 3356 {
3357 3357 mac_bridge_vectors(NULL, NULL, NULL, NULL);
3358 3358 if (bridge_timerid != 0)
3359 3359 (void) untimeout(bridge_timerid);
3360 3360 rw_destroy(&bmac_rwlock);
3361 3361 list_destroy(&bmac_list);
3362 3362 list_destroy(&inst_list);
3363 3363 cv_destroy(&inst_cv);
3364 3364 mutex_destroy(&inst_lock);
3365 3365 cv_destroy(&stream_ref_cv);
3366 3366 mutex_destroy(&stream_ref_lock);
3367 3367 }
3368 3368
3369 3369 /*
3370 3370 * bridge_attach()
3371 3371 *
3372 3372 * Description:
3373 3373 * Attach bridge driver to the system.
3374 3374 */
3375 3375 static int
3376 3376 bridge_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
3377 3377 {
3378 3378 if (cmd != DDI_ATTACH)
3379 3379 return (DDI_FAILURE);
3380 3380
3381 3381 if (ddi_create_minor_node(dip, BRIDGE_CTL, S_IFCHR, 0, DDI_PSEUDO,
3382 3382 CLONE_DEV) == DDI_FAILURE) {
3383 3383 return (DDI_FAILURE);
3384 3384 }
3385 3385
3386 3386 if (dld_ioc_register(BRIDGE_IOC, bridge_ioc_list,
3387 3387 DLDIOCCNT(bridge_ioc_list)) != 0) {
3388 3388 ddi_remove_minor_node(dip, BRIDGE_CTL);
3389 3389 return (DDI_FAILURE);
3390 3390 }
3391 3391
3392 3392 bridge_dev_info = dip;
3393 3393 bridge_major = ddi_driver_major(dip);
3394 3394 bridge_taskq = ddi_taskq_create(dip, BRIDGE_DEV_NAME, 1,
3395 3395 TASKQ_DEFAULTPRI, 0);
3396 3396 return (DDI_SUCCESS);
3397 3397 }
3398 3398
3399 3399 /*
3400 3400 * bridge_detach()
3401 3401 *
3402 3402 * Description:
3403 3403 * Detach an interface to the system.
3404 3404 */
3405 3405 static int
3406 3406 bridge_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
3407 3407 {
3408 3408 if (cmd != DDI_DETACH)
3409 3409 return (DDI_FAILURE);
3410 3410
3411 3411 ddi_remove_minor_node(dip, NULL);
3412 3412 ddi_taskq_destroy(bridge_taskq);
3413 3413 bridge_dev_info = NULL;
3414 3414 return (DDI_SUCCESS);
3415 3415 }
3416 3416
3417 3417 /*
3418 3418 * bridge_info()
3419 3419 *
3420 3420 * Description:
3421 3421 * Translate "dev_t" to a pointer to the associated "dev_info_t".
3422 3422 */
3423 3423 /* ARGSUSED */
3424 3424 static int
3425 3425 bridge_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg,
3426 3426 void **result)
3427 3427 {
3428 3428 int rc;
3429 3429
3430 3430 switch (infocmd) {
3431 3431 case DDI_INFO_DEVT2DEVINFO:
3432 3432 if (bridge_dev_info == NULL) {
3433 3433 rc = DDI_FAILURE;
3434 3434 } else {
3435 3435 *result = (void *)bridge_dev_info;
3436 3436 rc = DDI_SUCCESS;
3437 3437 }
3438 3438 break;
3439 3439 case DDI_INFO_DEVT2INSTANCE:
3440 3440 *result = NULL;
3441 3441 rc = DDI_SUCCESS;
3442 3442 break;
3443 3443 default:
3444 3444 rc = DDI_FAILURE;
3445 3445 break;
3446 3446 }
3447 3447 return (rc);
3448 3448 }
3449 3449
3450 3450 static struct module_info bridge_modinfo = {
3451 3451 2105, /* mi_idnum */
3452 3452 BRIDGE_DEV_NAME, /* mi_idname */
3453 3453 0, /* mi_minpsz */
3454 3454 16384, /* mi_maxpsz */
3455 3455 65536, /* mi_hiwat */
3456 3456 128 /* mi_lowat */
3457 3457 };
3458 3458
3459 3459 static struct qinit bridge_rinit = {
3460 3460 NULL, /* qi_putp */
3461 3461 NULL, /* qi_srvp */
3462 3462 bridge_open, /* qi_qopen */
3463 3463 bridge_close, /* qi_qclose */
3464 3464 NULL, /* qi_qadmin */
3465 3465 &bridge_modinfo, /* qi_minfo */
3466 3466 NULL /* qi_mstat */
3467 3467 };
3468 3468
3469 3469 static struct qinit bridge_winit = {
3470 3470 (int (*)())bridge_wput, /* qi_putp */
3471 3471 NULL, /* qi_srvp */
3472 3472 NULL, /* qi_qopen */
3473 3473 NULL, /* qi_qclose */
3474 3474 NULL, /* qi_qadmin */
3475 3475 &bridge_modinfo, /* qi_minfo */
3476 3476 NULL /* qi_mstat */
3477 3477 };
3478 3478
3479 3479 static struct streamtab bridge_tab = {
3480 3480 &bridge_rinit, /* st_rdinit */
3481 3481 &bridge_winit /* st_wrinit */
3482 3482 };
3483 3483
3484 3484 /* No STREAMS perimeters; we do all our own locking */
3485 3485 DDI_DEFINE_STREAM_OPS(bridge_ops, nulldev, nulldev, bridge_attach,
3486 3486 bridge_detach, nodev, bridge_info, D_NEW | D_MP, &bridge_tab,
↓ open down ↓ |
3486 lines elided |
↑ open up ↑ |
3487 3487 ddi_quiesce_not_supported);
3488 3488
3489 3489 static struct modldrv modldrv = {
3490 3490 &mod_driverops,
3491 3491 "bridging driver",
3492 3492 &bridge_ops
3493 3493 };
3494 3494
3495 3495 static struct modlinkage modlinkage = {
3496 3496 MODREV_1,
3497 - (void *)&modldrv,
3498 - NULL
3497 + { (void *)&modldrv, NULL }
3499 3498 };
3500 3499
3501 3500 int
3502 3501 _init(void)
3503 3502 {
3504 3503 int retv;
3505 3504
3506 3505 mac_init_ops(NULL, BRIDGE_DEV_NAME);
3507 3506 bridge_inst_init();
3508 3507 if ((retv = mod_install(&modlinkage)) != 0)
3509 3508 bridge_inst_fini();
3510 3509 return (retv);
3511 3510 }
3512 3511
3513 3512 int
3514 3513 _fini(void)
3515 3514 {
3516 3515 int retv;
3517 3516
3518 3517 rw_enter(&bmac_rwlock, RW_READER);
3519 3518 retv = list_is_empty(&bmac_list) ? 0 : EBUSY;
3520 3519 rw_exit(&bmac_rwlock);
3521 3520 if (retv == 0 &&
3522 3521 (retv = mod_remove(&modlinkage)) == 0)
3523 3522 bridge_inst_fini();
3524 3523 return (retv);
3525 3524 }
3526 3525
3527 3526 int
3528 3527 _info(struct modinfo *modinfop)
3529 3528 {
3530 3529 return (mod_info(&modlinkage, modinfop));
3531 3530 }
↓ open down ↓ |
23 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX