1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 */
26
27 /*
28 * This module supports AF_TRILL sockets and TRILL layer-2 forwarding.
29 */
30
31 #include <sys/strsubr.h>
32 #include <sys/socket.h>
33 #include <sys/socketvar.h>
34 #include <sys/modctl.h>
35 #include <sys/cmn_err.h>
36 #include <sys/tihdr.h>
37 #include <sys/strsun.h>
38 #include <sys/policy.h>
39 #include <sys/ethernet.h>
40 #include <sys/vlan.h>
41 #include <net/trill.h>
42 #include <net/if_dl.h>
43 #include <sys/mac.h>
44 #include <sys/mac_client.h>
45 #include <sys/mac_provider.h>
46 #include <sys/mac_client_priv.h>
47 #include <sys/sdt.h>
48 #include <sys/dls.h>
49 #include <sys/sunddi.h>
50
51 #include "trill_impl.h"
52
53 static void trill_del_all(trill_inst_t *, boolean_t);
54 static int trill_del_nick(trill_inst_t *, uint16_t, boolean_t);
55 static void trill_stop_recv(trill_sock_t *);
56 static void trill_ctrl_input(trill_sock_t *, mblk_t *, const uint8_t *,
57 uint16_t);
58 static trill_node_t *trill_node_lookup(trill_inst_t *, uint16_t);
59 static void trill_node_unref(trill_inst_t *, trill_node_t *);
60 static void trill_sock_unref(trill_sock_t *);
61 static void trill_kstats_init(trill_sock_t *, const char *);
62
63 static list_t trill_inst_list;
64 static krwlock_t trill_inst_rwlock;
65
66 static sock_lower_handle_t trill_create(int, int, int, sock_downcalls_t **,
67 uint_t *, int *, int, cred_t *);
68
69 static smod_reg_t sinfo = {
70 SOCKMOD_VERSION,
71 "trill",
72 SOCK_UC_VERSION,
73 SOCK_DC_VERSION,
74 trill_create,
75 NULL,
76 };
77
78 /* modldrv structure */
79 static struct modlsockmod sockmod = {
80 &mod_sockmodops, "AF_TRILL socket module", &sinfo
81 };
82
83 /* modlinkage structure */
84 static struct modlinkage ml = {
85 MODREV_1,
86 { &sockmod, NULL }
87 };
88
89 #define VALID_NICK(n) ((n) != RBRIDGE_NICKNAME_NONE && \
90 (n) != RBRIDGE_NICKNAME_UNUSED)
91
92 static mblk_t *
93 create_trill_header(trill_sock_t *tsock, mblk_t *mp, const uint8_t *daddr,
94 boolean_t trill_hdr_ok, boolean_t multidest, uint16_t tci,
95 size_t msglen)
96 {
97 int extra_hdr_len;
98 struct ether_vlan_header *ethvlanhdr;
99 mblk_t *hdr_mp;
100 uint16_t etype;
101
102 etype = msglen > 0 ? (uint16_t)msglen : ETHERTYPE_TRILL;
103
104 /* When sending on the PVID, we must not give a VLAN ID */
105 if (tci == tsock->ts_link->bl_pvid)
106 tci = TRILL_NO_TCI;
107
108 /*
109 * Create new Ethernet header and include additional space
110 * for writing TRILL header and/or VLAN tag.
111 */
112 extra_hdr_len = (trill_hdr_ok ? 0 : sizeof (trill_header_t)) +
113 (tci != TRILL_NO_TCI ? sizeof (struct ether_vlan_extinfo) : 0);
114 hdr_mp = mac_header(tsock->ts_link->bl_mh, daddr,
115 tci != TRILL_NO_TCI ? ETHERTYPE_VLAN : etype, mp, extra_hdr_len);
116 if (hdr_mp == NULL) {
117 freemsg(mp);
118 return (NULL);
119 }
120
121 if (tci != TRILL_NO_TCI) {
122 /* LINTED: alignment */
123 ethvlanhdr = (struct ether_vlan_header *)hdr_mp->b_rptr;
124 ethvlanhdr->ether_tci = htons(tci);
125 ethvlanhdr->ether_type = htons(etype);
126 hdr_mp->b_wptr += sizeof (struct ether_vlan_extinfo);
127 }
128
129 if (!trill_hdr_ok) {
130 trill_header_t *thp;
131 /* LINTED: alignment */
132 thp = (trill_header_t *)hdr_mp->b_wptr;
133 (void) memset(thp, 0, sizeof (trill_header_t));
134 thp->th_hopcount = TRILL_DEFAULT_HOPS;
135 thp->th_multidest = (multidest ? 1:0);
136 hdr_mp->b_wptr += sizeof (trill_header_t);
137 }
138
139 hdr_mp->b_cont = mp;
140 return (hdr_mp);
141 }
142
143 /*
144 * TRILL local recv function. TRILL data frames that should be received
145 * by the local system are decapsulated here and passed to bridging for
146 * learning and local system receive. Only called when we are the forwarder
147 * on the link (multi-dest frames) or the frame was destined for us.
148 */
149 static void
150 trill_recv_local(trill_sock_t *tsock, mblk_t *mp, uint16_t ingressnick)
151 {
152 struct ether_header *inner_ethhdr;
153
154 /* LINTED: alignment */
155 inner_ethhdr = (struct ether_header *)mp->b_rptr;
156 DTRACE_PROBE1(trill__recv__local, struct ether_header *, inner_ethhdr);
157
158 DB_CKSUMFLAGS(mp) = 0;
159 /*
160 * Transmit the decapsulated frame on the link via Bridging.
161 * Bridging does source address learning and appropriate forwarding.
162 */
163 bridge_trill_decaps(tsock->ts_link, mp, ingressnick);
164 KSPINCR(tks_decap);
165 }
166
167 /*
168 * Determines the outgoing link to reach a RBridge having the given nick
169 * Assumes caller has acquired the trill instance rwlock.
170 */
171 static trill_sock_t *
172 find_trill_link(trill_inst_t *tip, datalink_id_t linkid)
173 {
174 trill_sock_t *tsp = NULL;
175
176 ASSERT(RW_LOCK_HELD(&tip->ti_rwlock));
177 for (tsp = list_head(&tip->ti_socklist); tsp != NULL;
178 tsp = list_next(&tip->ti_socklist, tsp)) {
179 if (tsp->ts_link != NULL && tsp->ts_link->bl_linkid == linkid) {
180 ASSERT(tsp->ts_link->bl_mh != NULL);
181 ASSERT(!(tsp->ts_flags & TSF_SHUTDOWN));
182 atomic_inc_uint(&tsp->ts_refs);
183 break;
184 }
185 }
186 return (tsp);
187 }
188
189 /*
190 * TRILL destination forwarding function. Transmits the TRILL data packet
191 * to the next-hop, adjacent RBridge. Consumes passed mblk_t.
192 */
193 static void
194 trill_dest_fwd(trill_inst_t *tip, mblk_t *fwd_mp, uint16_t adj_nick,
195 boolean_t has_trill_hdr, boolean_t multidest, uint16_t dtnick)
196 {
197 trill_node_t *adj;
198 trill_sock_t *tsock = NULL;
199 trill_header_t *trillhdr;
200 struct ether_header *ethhdr;
201 int ethtype;
202 int ethhdrlen;
203
204 adj = trill_node_lookup(tip, adj_nick);
205 if (adj == NULL || ((tsock = adj->tn_tsp) == NULL))
206 goto dest_fwd_fail;
207
208 ASSERT(tsock->ts_link != NULL);
209 ASSERT(!(tsock->ts_flags & TSF_SHUTDOWN));
210 ASSERT(adj->tn_ni != NULL);
211
212 DTRACE_PROBE3(trill__dest__fwd, uint16_t, adj_nick, trill_node_t,
213 adj, trill_sock_t, tsock);
214
215 /*
216 * For broadcast links by using the dest address of
217 * the RBridge to forward the frame should result in
218 * savings. When the link is a bridged LAN or there are
219 * many end stations the frame will not always be flooded.
220 */
221 fwd_mp = create_trill_header(tsock, fwd_mp, adj->tn_ni->tni_adjsnpa,
222 has_trill_hdr, multidest, tsock->ts_desigvlan, 0);
223 if (fwd_mp == NULL)
224 goto dest_fwd_fail;
225
226 /* LINTED: alignment */
227 ethhdr = (struct ether_header *)fwd_mp->b_rptr;
228 ethtype = ntohs(ethhdr->ether_type);
229 ASSERT(ethtype == ETHERTYPE_VLAN || ethtype == ETHERTYPE_TRILL);
230
231 /* Pullup Ethernet and TRILL header (w/o TRILL options) */
232 ethhdrlen = sizeof (struct ether_header) +
233 (ethtype == ETHERTYPE_VLAN ? sizeof (struct ether_vlan_extinfo):0);
234 if (!pullupmsg(fwd_mp, ethhdrlen + sizeof (trill_header_t)))
235 goto dest_fwd_fail;
236 /* LINTED: alignment */
237 trillhdr = (struct trill_header *)(fwd_mp->b_rptr + ethhdrlen);
238
239 /* Update TRILL header with ingress and egress nicks for new frames */
240 if (!has_trill_hdr) {
241 /* We are creating a new TRILL frame */
242 trillhdr->th_egressnick = (multidest ? dtnick:adj_nick);
243 rw_enter(&tip->ti_rwlock, RW_READER);
244 trillhdr->th_ingressnick = tip->ti_nick;
245 rw_exit(&tip->ti_rwlock);
246 if (!VALID_NICK(trillhdr->th_ingressnick))
247 goto dest_fwd_fail;
248 }
249
250 /* Set hop count and update header in packet */
251 ASSERT(trillhdr->th_hopcount != 0);
252 trillhdr->th_hopcount--;
253
254 /* Clear checksum flag and transmit frame on the link */
255 DB_CKSUMFLAGS(fwd_mp) = 0;
256 DTRACE_PROBE1(trill__dest__fwd__tx, trill_header_t *, &trillhdr);
257 fwd_mp = bridge_trill_output(tsock->ts_link, fwd_mp);
258 if (fwd_mp == NULL) {
259 KSPINCR(tks_sent);
260 KSPINCR(tks_forward);
261 } else {
262 freemsg(fwd_mp);
263 KSPINCR(tks_drops);
264 }
265 trill_node_unref(tip, adj);
266 return;
267
268 dest_fwd_fail:
269 if (adj != NULL)
270 trill_node_unref(tip, adj);
271 if (tsock != NULL)
272 KSPINCR(tks_drops);
273 freemsg(fwd_mp);
274 }
275
276 /*
277 * TRILL multi-destination forwarding. Transmits the packet to the adjacencies
278 * on the distribution tree determined by the egress nick. Source addr (saddr)
279 * is NULL for new TRILL packets originating from us.
280 */
281 static void
282 trill_multidest_fwd(trill_inst_t *tip, mblk_t *mp, uint16_t egressnick,
283 uint16_t ingressnick, boolean_t is_trill_pkt, const uint8_t *saddr,
284 int inner_vlan, boolean_t free_mblk)
285 {
286 int idx;
287 uint16_t adjnick;
288 trill_node_t *dest;
289 trill_node_t *adj;
290 mblk_t *fwd_mp;
291 boolean_t nicksaved = B_FALSE;
292 uint16_t adjnicksaved;
293
294 /* Lookup the egress nick info, this is the DT root */
295 if ((dest = trill_node_lookup(tip, egressnick)) == NULL)
296 goto fail_multidest_fwd;
297
298 /* Send a copy to all our adjacencies on the DT root */
299 ASSERT(dest->tn_ni);
300 for (idx = 0; idx < dest->tn_ni->tni_adjcount; idx++) {
301
302 /* Check for a valid adjacency node */
303 adjnick = TNI_ADJNICK(dest->tn_ni, idx);
304 if (!VALID_NICK(adjnick) || ingressnick == adjnick ||
305 ((adj = trill_node_lookup(tip, adjnick)) == NULL))
306 continue;
307
308 /* Do not forward back to adjacency that sent the pkt to us */
309 ASSERT(adj->tn_ni != NULL);
310 if ((saddr != NULL) &&
311 (memcmp(adj->tn_ni->tni_adjsnpa, saddr,
312 ETHERADDRL) == 0)) {
313 trill_node_unref(tip, adj);
314 continue;
315 }
316
317 /* Check if adj is marked as reaching inner VLAN downstream */
318 if ((inner_vlan != VLAN_ID_NONE) &&
319 !TRILL_VLANISSET(TNI_VLANFILTERMAP(dest->tn_ni, idx),
320 inner_vlan)) {
321 trill_node_unref(tip, adj);
322 DTRACE_PROBE4(trill__multi__dest__fwd__vlanfiltered,
323 uint16_t, adjnick, uint16_t, ingressnick,
324 uint16_t, egressnick, int, inner_vlan);
325 continue;
326 }
327
328 trill_node_unref(tip, adj);
329
330 /*
331 * Save the nick and look ahead to see if we should forward the
332 * frame to more adjacencies. We avoid doing a copy for this
333 * nick and use the passed mblk when we can consume the passed
334 * mblk.
335 */
336 if (free_mblk && !nicksaved) {
337 adjnicksaved = adjnick;
338 nicksaved = B_TRUE;
339 continue;
340 }
341
342 fwd_mp = copymsg(mp);
343 if (fwd_mp == NULL)
344 break;
345 DTRACE_PROBE2(trill__multi__dest__fwd, uint16_t,
346 adjnick, uint16_t, ingressnick);
347 trill_dest_fwd(tip, fwd_mp, adjnick, is_trill_pkt,
348 B_TRUE, egressnick);
349 }
350 trill_node_unref(tip, dest);
351
352 if (nicksaved) {
353 ASSERT(free_mblk);
354 DTRACE_PROBE2(trill__multi__dest__fwd, uint16_t,
355 adjnicksaved, uint16_t, ingressnick);
356 trill_dest_fwd(tip, mp, adjnicksaved, is_trill_pkt,
357 B_TRUE, egressnick);
358 return;
359 }
360
361 fail_multidest_fwd:
362 DTRACE_PROBE2(trill__multi__dest__fwd__fail, uint16_t,
363 egressnick, uint16_t, ingressnick);
364 if (free_mblk) {
365 freemsg(mp);
366 }
367 }
368
369 /*
370 * TRILL data receive function. Forwards the received frame if necessary
371 * and also determines if the received frame should be consumed locally.
372 * Consumes passed mblk.
373 */
374 static void
375 trill_recv(trill_sock_t *tsock, mblk_t *mp, const uint8_t *mpsaddr)
376 {
377 trill_header_t *trillhdr;
378 trill_node_t *dest = NULL;
379 trill_node_t *source = NULL;
380 trill_node_t *adj;
381 uint16_t ournick, adjnick, treeroot;
382 struct ether_header *ethhdr;
383 trill_inst_t *tip = tsock->ts_tip;
384 uint8_t srcaddr[ETHERADDRL];
385 size_t trillhdrlen;
386 int inner_vlan = VLAN_ID_NONE;
387 int tci;
388 int idx;
389 size_t min_size;
390
391 /* Copy Ethernet source address before modifying packet */
392 (void) memcpy(srcaddr, mpsaddr, ETHERADDRL);
393
394 /* Pull up TRILL header if necessary. */
395 min_size = sizeof (trill_header_t);
396 if ((MBLKL(mp) < min_size ||
397 !IS_P2ALIGNED(mp->b_rptr, TRILL_HDR_ALIGN)) &&
398 !pullupmsg(mp, min_size))
399 goto fail;
400
401 /* LINTED: alignment */
402 trillhdr = (trill_header_t *)mp->b_rptr;
403 if (trillhdr->th_version != TRILL_PROTOCOL_VERS) {
404 DTRACE_PROBE1(trill__recv__wrongversion,
405 trill_header_t *, trillhdr);
406 goto fail;
407 }
408
409 /* Drop if unknown or invalid nickname */
410 if (!VALID_NICK(trillhdr->th_egressnick) ||
411 !VALID_NICK(trillhdr->th_ingressnick)) {
412 DTRACE_PROBE1(trill__recv__invalidnick,
413 trill_header_t *, trillhdr);
414 goto fail;
415 }
416
417 rw_enter(&tip->ti_rwlock, RW_READER);
418 ournick = tip->ti_nick;
419 treeroot = tip->ti_treeroot;
420 rw_exit(&tip->ti_rwlock);
421 /* Drop if we received a packet with our nick as ingress */
422 if (trillhdr->th_ingressnick == ournick)
423 goto fail;
424
425 /* Re-pull any TRILL options and inner Ethernet header */
426 min_size += GET_TRILL_OPTS_LEN(trillhdr) * sizeof (uint32_t) +
427 sizeof (struct ether_header);
428 if (MBLKL(mp) < min_size) {
429 if (!pullupmsg(mp, min_size))
430 goto fail;
431 /* LINTED: alignment */
432 trillhdr = (trill_header_t *)mp->b_rptr;
433 }
434 trillhdrlen = sizeof (trill_header_t) +
435 (GET_TRILL_OPTS_LEN(trillhdr) * sizeof (uint32_t));
436
437 /*
438 * Get the inner Ethernet header, plus the inner VLAN header if there
439 * is one.
440 */
441 /* LINTED: alignment */
442 ethhdr = (struct ether_header *)(mp->b_rptr + trillhdrlen);
443 if (ethhdr->ether_type == htons(ETHERTYPE_VLAN)) {
444 min_size += sizeof (struct ether_vlan_extinfo);
445 if (MBLKL(mp) < min_size) {
446 if (!pullupmsg(mp, min_size))
447 goto fail;
448 /* LINTED: alignment */
449 trillhdr = (trill_header_t *)mp->b_rptr;
450 /* LINTED: alignment */
451 ethhdr = (struct ether_header *)(mp->b_rptr +
452 trillhdrlen);
453 }
454
455 tci = ntohs(((struct ether_vlan_header *)ethhdr)->ether_tci);
456 inner_vlan = VLAN_ID(tci);
457 }
458
459 /* Known/single destination forwarding. */
460 if (!trillhdr->th_multidest) {
461
462 /* Inner MacDA must be unicast */
463 if (ethhdr->ether_dhost.ether_addr_octet[0] & 1)
464 goto fail;
465
466 /* Ingress and Egress nicks must be different */
467 if (trillhdr->th_egressnick == trillhdr->th_ingressnick)
468 goto fail;
469
470 DTRACE_PROBE1(trill__recv__singledest,
471 trill_header_t *, trillhdr);
472 if (trillhdr->th_egressnick == ournick) {
473 mp->b_rptr += trillhdrlen;
474 trill_recv_local(tsock, mp, trillhdr->th_ingressnick);
475 } else if (trillhdr->th_hopcount > 0) {
476 trill_dest_fwd(tip, mp, trillhdr->th_egressnick,
477 B_TRUE, B_FALSE, RBRIDGE_NICKNAME_NONE);
478 } else {
479 goto fail;
480 }
481 return;
482 }
483
484 /*
485 * Multi-destination frame: perform checks verifying we have
486 * received a valid multi-destination frame before receiving the
487 * frame locally and forwarding the frame to other RBridges.
488 *
489 * Check if we received this multi-destination frame on a
490 * adjacency in the distribution tree indicated by the frame's
491 * egress nickname.
492 */
493 if ((dest = trill_node_lookup(tip, trillhdr->th_egressnick)) == NULL)
494 goto fail;
495 for (idx = 0; idx < dest->tn_ni->tni_adjcount; idx++) {
496 adjnick = TNI_ADJNICK(dest->tn_ni, idx);
497 if ((adj = trill_node_lookup(tip, adjnick)) == NULL)
498 continue;
499 if (memcmp(adj->tn_ni->tni_adjsnpa, srcaddr, ETHERADDRL) == 0) {
500 trill_node_unref(tip, adj);
501 break;
502 }
503 trill_node_unref(tip, adj);
504 }
505
506 if (idx >= dest->tn_ni->tni_adjcount) {
507 DTRACE_PROBE2(trill__recv__multidest__adjcheckfail,
508 trill_header_t *, trillhdr, trill_node_t *, dest);
509 goto fail;
510 }
511
512 /*
513 * Reverse path forwarding check. Check if the ingress RBridge
514 * that has forwarded the frame advertised the use of the
515 * distribution tree specified in the egress nick.
516 */
517 if ((source = trill_node_lookup(tip, trillhdr->th_ingressnick)) == NULL)
518 goto fail;
519 for (idx = 0; idx < source->tn_ni->tni_dtrootcount; idx++) {
520 if (TNI_DTROOTNICK(source->tn_ni, idx) ==
521 trillhdr->th_egressnick)
522 break;
523 }
524
525 if (idx >= source->tn_ni->tni_dtrootcount) {
526 /*
527 * Allow receipt of forwarded frame with the highest
528 * tree root RBridge as the egress RBridge when the
529 * ingress RBridge has not advertised the use of any
530 * distribution trees.
531 */
532 if (source->tn_ni->tni_dtrootcount != 0 ||
533 trillhdr->th_egressnick != treeroot) {
534 DTRACE_PROBE3(
535 trill__recv__multidest__rpfcheckfail,
536 trill_header_t *, trillhdr, trill_node_t *,
537 source, trill_inst_t *, tip);
538 goto fail;
539 }
540 }
541
542 /* Check hop count before doing any forwarding */
543 if (trillhdr->th_hopcount == 0)
544 goto fail;
545
546 /* Forward frame using the distribution tree specified by egress nick */
547 DTRACE_PROBE2(trill__recv__multidest, trill_header_t *,
548 trillhdr, trill_node_t *, source);
549 trill_node_unref(tip, source);
550 trill_node_unref(tip, dest);
551
552 /* Tell forwarding not to free if we're the link forwarder. */
553 trill_multidest_fwd(tip, mp, trillhdr->th_egressnick,
554 trillhdr->th_ingressnick, B_TRUE, srcaddr, inner_vlan,
555 B_FALSE);
556
557 /*
558 * Send de-capsulated frame locally if we are the link forwarder (also
559 * does bridge learning).
560 */
561 mp->b_rptr += trillhdrlen;
562 trill_recv_local(tsock, mp, trillhdr->th_ingressnick);
563 KSPINCR(tks_recv);
564 return;
565
566 fail:
567 DTRACE_PROBE2(trill__recv__multidest__fail, mblk_t *, mp,
568 trill_sock_t *, tsock);
569 if (dest != NULL)
570 trill_node_unref(tip, dest);
571 if (source != NULL)
572 trill_node_unref(tip, source);
573 freemsg(mp);
574 KSPINCR(tks_drops);
575 }
576
577 static void
578 trill_stop_recv(trill_sock_t *tsock)
579 {
580 mutex_enter(&tsock->ts_socklock);
581 stop_retry:
582 if (tsock->ts_state == TS_UNBND || tsock->ts_link == NULL) {
583 mutex_exit(&tsock->ts_socklock);
584 return;
585 }
586
587 /*
588 * If another thread is closing the socket then wait. Our callers
589 * expect us to return only after the socket is closed.
590 */
591 if (tsock->ts_flags & TSF_CLOSEWAIT) {
592 cv_wait(&tsock->ts_sockclosewait, &tsock->ts_socklock);
593 goto stop_retry;
594 }
595
596 /*
597 * Set state and flags to block new bind or close calls
598 * while we close the socket.
599 */
600 tsock->ts_flags |= TSF_CLOSEWAIT;
601
602 /* Wait until all AF_TRILL socket transmit operations are done */
603 while (tsock->ts_sockthreadcount > 0)
604 cv_wait(&tsock->ts_sockthreadwait, &tsock->ts_socklock);
605
606 /*
607 * We are guaranteed to be the only thread closing on the
608 * socket while the TSF_CLOSEWAIT flag is set, all others cv_wait
609 * for us to finish.
610 */
611 ASSERT(tsock->ts_link != NULL);
612 if (tsock->ts_ksp != NULL)
613 kstat_delete(tsock->ts_ksp);
614
615 /*
616 * Release lock before bridge_trill_lnunref to prevent deadlock
617 * between trill_ctrl_input thread waiting to acquire ts_socklock
618 * and bridge_trill_lnunref waiting for the trill thread to finish.
619 */
620 mutex_exit(&tsock->ts_socklock);
621
622 /*
623 * Release TRILL link reference from Bridging. On return from
624 * bridge_trill_lnunref we can be sure there are no active TRILL data
625 * threads for this link.
626 */
627 bridge_trill_lnunref(tsock->ts_link);
628
629 /* Set socket as unbound & wakeup threads waiting for socket to close */
630 mutex_enter(&tsock->ts_socklock);
631 ASSERT(tsock->ts_link != NULL);
632 tsock->ts_link = NULL;
633 tsock->ts_state = TS_UNBND;
634 tsock->ts_flags &= ~TSF_CLOSEWAIT;
635 cv_broadcast(&tsock->ts_sockclosewait);
636 mutex_exit(&tsock->ts_socklock);
637 }
638
639 static int
640 trill_start_recv(trill_sock_t *tsock, const struct sockaddr *sa, socklen_t len)
641 {
642 struct sockaddr_dl *lladdr = (struct sockaddr_dl *)sa;
643 datalink_id_t linkid;
644 int err = 0;
645
646 if (len != sizeof (*lladdr))
647 return (EINVAL);
648
649 mutex_enter(&tsock->ts_socklock);
650 if (tsock->ts_tip == NULL || tsock->ts_state != TS_UNBND) {
651 err = EINVAL;
652 goto bind_error;
653 }
654
655 if (tsock->ts_flags & TSF_CLOSEWAIT || tsock->ts_link != NULL) {
656 err = EBUSY;
657 goto bind_error;
658 }
659
660 (void) memcpy(&(tsock->ts_lladdr), lladdr,
661 sizeof (struct sockaddr_dl));
662 (void) memcpy(&linkid, tsock->ts_lladdr.sdl_data,
663 sizeof (datalink_id_t));
664
665 tsock->ts_link = bridge_trill_lnref(tsock->ts_tip->ti_binst,
666 linkid, tsock);
667 if (tsock->ts_link == NULL) {
668 err = EINVAL;
669 goto bind_error;
670 }
671
672 trill_kstats_init(tsock, tsock->ts_tip->ti_bridgename);
673 tsock->ts_state = TS_IDLE;
674
675 bind_error:
676 mutex_exit(&tsock->ts_socklock);
677 return (err);
678 }
679
680 static int
681 trill_do_unbind(trill_sock_t *tsock)
682 {
683 /* If a bind has not been done, we can't unbind. */
684 if (tsock->ts_state != TS_IDLE)
685 return (EINVAL);
686
687 trill_stop_recv(tsock);
688 return (0);
689 }
690
691 static void
692 trill_instance_unref(trill_inst_t *tip)
693 {
694 rw_enter(&trill_inst_rwlock, RW_WRITER);
695 rw_enter(&tip->ti_rwlock, RW_WRITER);
696 if (atomic_dec_uint_nv(&tip->ti_refs) == 0) {
697 list_remove(&trill_inst_list, tip);
698 rw_exit(&tip->ti_rwlock);
699 rw_exit(&trill_inst_rwlock);
700 if (tip->ti_binst != NULL)
701 bridge_trill_brunref(tip->ti_binst);
702 list_destroy(&tip->ti_socklist);
703 rw_destroy(&tip->ti_rwlock);
704 kmem_free(tip, sizeof (*tip));
705 } else {
706 rw_exit(&tip->ti_rwlock);
707 rw_exit(&trill_inst_rwlock);
708 }
709 }
710
711 /*
712 * This is called when the bridge module receives a TRILL-encapsulated packet
713 * on a given link or a packet identified as "TRILL control." We must verify
714 * that it's for us (it almost certainly will be), and then either decapsulate
715 * (if it's to our nickname), forward (if it's to someone else), or send up one
716 * of the sockets (if it's control traffic).
717 *
718 * Sadly, on Ethernet, the control traffic is identified by Outer.MacDA, and
719 * not by TRILL header information.
720 */
721 static void
722 trill_recv_pkt_cb(void *lptr, bridge_link_t *blp, mac_resource_handle_t rsrc,
723 mblk_t *mp, mac_header_info_t *hdr_info)
724 {
725 trill_sock_t *tsock = lptr;
726
727 _NOTE(ARGUNUSED(rsrc));
728
729 ASSERT(tsock->ts_tip != NULL);
730 ASSERT(tsock->ts_link != NULL);
731 ASSERT(!(tsock->ts_flags & TSF_SHUTDOWN));
732
733 /*
734 * Only receive packet if the source address is not multicast (which is
735 * bogus).
736 */
737 if (hdr_info->mhi_saddr[0] & 1)
738 goto discard;
739
740 /*
741 * Check if this is our own packet reflected back. It should not be.
742 */
743 if (bcmp(hdr_info->mhi_saddr, blp->bl_local_mac, ETHERADDRL) == 0)
744 goto discard;
745
746 /* Only receive unicast packet if addressed to us */
747 if (hdr_info->mhi_dsttype == MAC_ADDRTYPE_UNICAST &&
748 bcmp(hdr_info->mhi_daddr, blp->bl_local_mac, ETHERADDRL) != 0)
749 goto discard;
750
751 if (hdr_info->mhi_bindsap == ETHERTYPE_TRILL) {
752 /* TRILL data packets */
753 trill_recv(tsock, mp, hdr_info->mhi_saddr);
754 } else {
755 /* Design constraint for cheap IS-IS/BPDU comparison */
756 ASSERT(all_isis_rbridges[4] != bridge_group_address[4]);
757 /* Send received control packet upstream */
758 trill_ctrl_input(tsock, mp, hdr_info->mhi_saddr,
759 hdr_info->mhi_daddr[4] == all_isis_rbridges[4] ?
760 hdr_info->mhi_tci : TRILL_TCI_BPDU);
761 }
762
763 return;
764
765 discard:
766 freemsg(mp);
767 KSPINCR(tks_drops);
768 }
769
770 /*
771 * This is called when the bridge module discovers that the destination address
772 * for a packet is not local -- it's through some remote node. We must verify
773 * that the remote node isn't our nickname (it shouldn't be), add a TRILL
774 * header, and then use the IS-IS data to determine which link and which
775 * next-hop RBridge should be used for output. We then transmit on that link.
776 *
777 * The egress_nick is RBRIDGE_NICKNAME_NONE for the "unknown destination" case.
778 */
779 static void
780 trill_encap_pkt_cb(void *lptr, bridge_link_t *blp, mac_header_info_t *hdr_info,
781 mblk_t *mp, uint16_t egress_nick)
782 {
783 uint16_t ournick;
784 uint16_t dtnick;
785 trill_node_t *self = NULL;
786 trill_sock_t *tsock = lptr;
787 trill_inst_t *tip = tsock->ts_tip;
788 int vlan = VLAN_ID_NONE;
789
790 _NOTE(ARGUNUSED(blp));
791 ASSERT(hdr_info->mhi_bindsap != ETHERTYPE_TRILL);
792
793 /* egress_nick = RBRIDGE_NICKNAME_NONE is valid */
794 if (egress_nick != RBRIDGE_NICKNAME_NONE && !VALID_NICK(egress_nick))
795 goto discard;
796
797 /* Check if our own nick is valid before we do any forwarding */
798 rw_enter(&tip->ti_rwlock, RW_READER);
799 ournick = tip->ti_nick;
800 dtnick = tip->ti_treeroot;
801 rw_exit(&tip->ti_rwlock);
802 if (!VALID_NICK(ournick))
803 goto discard;
804
805 /*
806 * For Multi-Destination forwarding determine our choice of
807 * root distribution tree. If we didn't choose a distribution
808 * tree (dtroots_count=0) then we use the highest priority tree
809 * root (t_treeroot) else we drop the packet without forwarding.
810 */
811 if (egress_nick == RBRIDGE_NICKNAME_NONE) {
812 if ((self = trill_node_lookup(tip, ournick)) == NULL)
813 goto discard;
814
815 /*
816 * Use the first DT configured for now. In future we
817 * should have DT selection code here.
818 */
819 if (self->tn_ni->tni_dtrootcount > 0) {
820 dtnick = TNI_DTROOTNICK(self->tn_ni, 0);
821 }
822
823 trill_node_unref(tip, self);
824 if (!VALID_NICK(dtnick)) {
825 DTRACE_PROBE(trill__fwd__packet__nodtroot);
826 goto discard;
827 }
828 }
829
830 /*
831 * Retrieve VLAN ID of the native frame used for VLAN
832 * pruning of multi-destination frames.
833 */
834 if (hdr_info->mhi_istagged) {
835 vlan = VLAN_ID(hdr_info->mhi_tci);
836 }
837
838 DTRACE_PROBE2(trill__fwd__packet, mac_header_info_t *, hdr_info,
839 uint16_t, egress_nick);
840 if (egress_nick == RBRIDGE_NICKNAME_NONE) {
841 trill_multidest_fwd(tip, mp, dtnick,
842 ournick, B_FALSE, NULL, vlan, B_TRUE);
843 } else {
844 trill_dest_fwd(tip, mp, egress_nick, B_FALSE, B_FALSE,
845 RBRIDGE_NICKNAME_NONE);
846 }
847 KSPINCR(tks_encap);
848 return;
849
850 discard:
851 freemsg(mp);
852 }
853
854 /*
855 * This is called when the bridge module has completely torn down a bridge
856 * instance and all of the attached links. We need to make the TRILL instance
857 * go away at this point.
858 */
859 static void
860 trill_br_dstr_cb(void *bptr, bridge_inst_t *bip)
861 {
862 trill_inst_t *tip = bptr;
863
864 _NOTE(ARGUNUSED(bip));
865 rw_enter(&tip->ti_rwlock, RW_WRITER);
866 if (tip->ti_binst != NULL)
867 bridge_trill_brunref(tip->ti_binst);
868 tip->ti_binst = NULL;
869 rw_exit(&tip->ti_rwlock);
870 }
871
872 /*
873 * This is called when the bridge module is tearing down a link, but before the
874 * actual tear-down starts. When this function returns, we must make sure that
875 * we will not initiate any new transmits on this link.
876 */
877 static void
878 trill_ln_dstr_cb(void *lptr, bridge_link_t *blp)
879 {
880 trill_sock_t *tsock = lptr;
881
882 _NOTE(ARGUNUSED(blp));
883 trill_stop_recv(tsock);
884 }
885
886 static void
887 trill_init(void)
888 {
889 list_create(&trill_inst_list, sizeof (trill_inst_t),
890 offsetof(trill_inst_t, ti_instnode));
891 rw_init(&trill_inst_rwlock, NULL, RW_DRIVER, NULL);
892 bridge_trill_register_cb(trill_recv_pkt_cb, trill_encap_pkt_cb,
893 trill_br_dstr_cb, trill_ln_dstr_cb);
894 }
895
896 static void
897 trill_fini(void)
898 {
899 bridge_trill_register_cb(NULL, NULL, NULL, NULL);
900 rw_destroy(&trill_inst_rwlock);
901 list_destroy(&trill_inst_list);
902 }
903
904 /* Loadable module configuration entry points */
905 int
906 _init(void)
907 {
908 int rc;
909
910 trill_init();
911 if ((rc = mod_install(&ml)) != 0)
912 trill_fini();
913 return (rc);
914 }
915
916 int
917 _info(struct modinfo *modinfop)
918 {
919 return (mod_info(&ml, modinfop));
920 }
921
922 int
923 _fini(void)
924 {
925 int rc;
926
927 rw_enter(&trill_inst_rwlock, RW_READER);
928 rc = list_is_empty(&trill_inst_list) ? 0 : EBUSY;
929 rw_exit(&trill_inst_rwlock);
930 if (rc == 0 && ((rc = mod_remove(&ml)) == 0))
931 trill_fini();
932 return (rc);
933 }
934
935 static void
936 trill_kstats_init(trill_sock_t *tsock, const char *bname)
937 {
938 int i;
939 char kstatname[KSTAT_STRLEN];
940 kstat_named_t *knt;
941 static const char *sock_kstats_list[] = { TRILL_KSSOCK_NAMES };
942 char link_name[MAXNAMELEN];
943 int num;
944 int err;
945
946 bzero(link_name, sizeof (link_name));
947 if ((err = dls_mgmt_get_linkinfo(tsock->ts_link->bl_linkid, link_name,
948 NULL, NULL, NULL)) != 0) {
949 cmn_err(CE_WARN, "%s: trill_kstats_init: error %d retrieving"
950 " linkinfo for linkid:%d", "trill", err,
951 tsock->ts_link->bl_linkid);
952 return;
953 }
954
955 bzero(kstatname, sizeof (kstatname));
956 (void) snprintf(kstatname, sizeof (kstatname), "%s-%s",
957 bname, link_name);
958
959 num = sizeof (sock_kstats_list) / sizeof (*sock_kstats_list);
960 for (i = 0; i < num; i++) {
961 knt = (kstat_named_t *)&(tsock->ts_kstats);
962 kstat_named_init(&knt[i], sock_kstats_list[i],
963 KSTAT_DATA_UINT64);
964 }
965
966 tsock->ts_ksp = kstat_create_zone("trill", 0, kstatname, "sock",
967 KSTAT_TYPE_NAMED, num, KSTAT_FLAG_VIRTUAL, GLOBAL_ZONEID);
968 if (tsock->ts_ksp != NULL) {
969 tsock->ts_ksp->ks_data = &tsock->ts_kstats;
970 kstat_install(tsock->ts_ksp);
971 }
972 }
973
974 static trill_sock_t *
975 trill_do_open(int flags)
976 {
977 trill_sock_t *tsock;
978 int kmflag = ((flags & SOCKET_NOSLEEP)) ? KM_NOSLEEP:KM_SLEEP;
979
980 tsock = kmem_zalloc(sizeof (trill_sock_t), kmflag);
981 if (tsock != NULL) {
982 tsock->ts_state = TS_UNBND;
983 tsock->ts_refs++;
984 mutex_init(&tsock->ts_socklock, NULL, MUTEX_DRIVER, NULL);
985 cv_init(&tsock->ts_sockthreadwait, NULL, CV_DRIVER, NULL);
986 cv_init(&tsock->ts_sockclosewait, NULL, CV_DRIVER, NULL);
987 }
988 return (tsock);
989 }
990
991 static int
992 trill_find_bridge(trill_sock_t *tsock, const char *bname, boolean_t can_create)
993 {
994 trill_inst_t *tip, *newtip = NULL;
995
996 /* Allocate some memory (speculatively) before taking locks */
997 if (can_create)
998 newtip = kmem_zalloc(sizeof (*tip), KM_NOSLEEP);
999
1000 rw_enter(&trill_inst_rwlock, RW_WRITER);
1001 for (tip = list_head(&trill_inst_list); tip != NULL;
1002 tip = list_next(&trill_inst_list, tip)) {
1003 if (strcmp(tip->ti_bridgename, bname) == 0)
1004 break;
1005 }
1006 if (tip == NULL) {
1007 if (!can_create || newtip == NULL) {
1008 rw_exit(&trill_inst_rwlock);
1009 return (can_create ? ENOMEM : ENOENT);
1010 }
1011
1012 tip = newtip;
1013 newtip = NULL;
1014 (void) strcpy(tip->ti_bridgename, bname);
1015
1016 /* Register TRILL instance with bridging */
1017 tip->ti_binst = bridge_trill_brref(bname, tip);
1018 if (tip->ti_binst == NULL) {
1019 rw_exit(&trill_inst_rwlock);
1020 kmem_free(tip, sizeof (*tip));
1021 return (ENOENT);
1022 }
1023
1024 rw_init(&tip->ti_rwlock, NULL, RW_DRIVER, NULL);
1025 list_create(&tip->ti_socklist, sizeof (trill_sock_t),
1026 offsetof(trill_sock_t, ts_socklistnode));
1027 list_insert_tail(&trill_inst_list, tip);
1028 }
1029 atomic_inc_uint(&tip->ti_refs);
1030 rw_exit(&trill_inst_rwlock);
1031
1032 /* If we didn't need the preallocated memory, then discard now. */
1033 if (newtip != NULL)
1034 kmem_free(newtip, sizeof (*newtip));
1035
1036 rw_enter(&tip->ti_rwlock, RW_WRITER);
1037 list_insert_tail(&(tip->ti_socklist), tsock);
1038 tsock->ts_tip = tip;
1039 rw_exit(&tip->ti_rwlock);
1040 return (0);
1041 }
1042
1043 static void
1044 trill_clear_bridge(trill_sock_t *tsock)
1045 {
1046 trill_inst_t *tip;
1047
1048 if ((tip = tsock->ts_tip) == NULL)
1049 return;
1050 rw_enter(&tip->ti_rwlock, RW_WRITER);
1051 list_remove(&tip->ti_socklist, tsock);
1052 if (list_is_empty(&tip->ti_socklist))
1053 trill_del_all(tip, B_TRUE);
1054 rw_exit(&tip->ti_rwlock);
1055 }
1056
1057 static void
1058 trill_sock_unref(trill_sock_t *tsock)
1059 {
1060 if (atomic_dec_uint_nv(&tsock->ts_refs) == 0) {
1061 mutex_destroy(&tsock->ts_socklock);
1062 cv_destroy(&tsock->ts_sockthreadwait);
1063 cv_destroy(&tsock->ts_sockclosewait);
1064 kmem_free(tsock, sizeof (trill_sock_t));
1065 }
1066 }
1067
1068 static void
1069 trill_do_close(trill_sock_t *tsock)
1070 {
1071 trill_inst_t *tip;
1072
1073 tip = tsock->ts_tip;
1074 trill_stop_recv(tsock);
1075 /* Remove socket from TRILL instance socket list */
1076 trill_clear_bridge(tsock);
1077 tsock->ts_flags |= TSF_SHUTDOWN;
1078 trill_sock_unref(tsock);
1079 if (tip != NULL)
1080 trill_instance_unref(tip);
1081 }
1082
1083 static void
1084 trill_del_all(trill_inst_t *tip, boolean_t lockheld)
1085 {
1086 int i;
1087
1088 if (!lockheld)
1089 rw_enter(&tip->ti_rwlock, RW_WRITER);
1090 for (i = RBRIDGE_NICKNAME_MIN; i < RBRIDGE_NICKNAME_MAX; i++) {
1091 if (tip->ti_nodes[i] != NULL)
1092 (void) trill_del_nick(tip, i, B_TRUE);
1093 }
1094 if (!lockheld)
1095 rw_exit(&tip->ti_rwlock);
1096 }
1097
1098 static void
1099 trill_node_free(trill_node_t *nick_entry)
1100 {
1101 trill_nickinfo_t *tni;
1102
1103 tni = nick_entry->tn_ni;
1104 kmem_free(tni, TNI_TOTALSIZE(tni));
1105 kmem_free(nick_entry, sizeof (trill_node_t));
1106 }
1107
1108 static void
1109 trill_node_unref(trill_inst_t *tip, trill_node_t *tnp)
1110 {
1111 if (atomic_dec_uint_nv(&tnp->tn_refs) == 0) {
1112 if (tnp->tn_tsp != NULL)
1113 trill_sock_unref(tnp->tn_tsp);
1114 trill_node_free(tnp);
1115 atomic_dec_uint(&tip->ti_nodecount);
1116 }
1117 }
1118
1119 static trill_node_t *
1120 trill_node_lookup(trill_inst_t *tip, uint16_t nick)
1121 {
1122 trill_node_t *nick_entry;
1123
1124 if (!VALID_NICK(nick))
1125 return (NULL);
1126 rw_enter(&tip->ti_rwlock, RW_READER);
1127 nick_entry = tip->ti_nodes[nick];
1128 if (nick_entry != NULL) {
1129 atomic_inc_uint(&nick_entry->tn_refs);
1130 }
1131 rw_exit(&tip->ti_rwlock);
1132 return (nick_entry);
1133 }
1134
1135 static int
1136 trill_del_nick(trill_inst_t *tip, uint16_t nick, boolean_t lockheld)
1137 {
1138 trill_node_t *nick_entry;
1139 int rc = ENOENT;
1140
1141 if (!lockheld)
1142 rw_enter(&tip->ti_rwlock, RW_WRITER);
1143 if (VALID_NICK(nick)) {
1144 nick_entry = tip->ti_nodes[nick];
1145 if (nick_entry != NULL) {
1146 trill_node_unref(tip, nick_entry);
1147 tip->ti_nodes[nick] = NULL;
1148 rc = 0;
1149 }
1150 }
1151 if (!lockheld)
1152 rw_exit(&tip->ti_rwlock);
1153 return (rc);
1154 }
1155
1156 static int
1157 trill_add_nick(trill_inst_t *tip, void *arg, boolean_t self, int mode)
1158 {
1159 uint16_t nick;
1160 int size;
1161 trill_node_t *tnode;
1162 trill_nickinfo_t tnihdr;
1163
1164 /* First make sure we have at least the header available */
1165 if (ddi_copyin(arg, &tnihdr, sizeof (trill_nickinfo_t), mode) != 0)
1166 return (EFAULT);
1167
1168 nick = tnihdr.tni_nick;
1169 if (!VALID_NICK(nick)) {
1170 DTRACE_PROBE1(trill__add__nick__bad, trill_nickinfo_t *,
1171 &tnihdr);
1172 return (EINVAL);
1173 }
1174
1175 size = TNI_TOTALSIZE(&tnihdr);
1176 if (size > TNI_MAXSIZE)
1177 return (EINVAL);
1178 tnode = kmem_zalloc(sizeof (trill_node_t), KM_SLEEP);
1179 tnode->tn_ni = kmem_zalloc(size, KM_SLEEP);
1180 if (ddi_copyin(arg, tnode->tn_ni, size, mode) != 0) {
1181 kmem_free(tnode->tn_ni, size);
1182 kmem_free(tnode, sizeof (trill_node_t));
1183 return (EFAULT);
1184 }
1185
1186 tnode->tn_refs++;
1187 rw_enter(&tip->ti_rwlock, RW_WRITER);
1188 if (tip->ti_nodes[nick] != NULL)
1189 (void) trill_del_nick(tip, nick, B_TRUE);
1190
1191 if (self) {
1192 tip->ti_nick = nick;
1193 } else {
1194 tnode->tn_tsp = find_trill_link(tip,
1195 tnode->tn_ni->tni_linkid);
1196 }
1197 DTRACE_PROBE2(trill__add__nick, trill_node_t *, tnode,
1198 uint16_t, nick);
1199 tip->ti_nodes[nick] = tnode;
1200 tip->ti_nodecount++;
1201 rw_exit(&tip->ti_rwlock);
1202 return (0);
1203 }
1204
1205 static int
1206 trill_do_ioctl(trill_sock_t *tsock, int cmd, void *arg, int mode)
1207 {
1208 int error = 0;
1209 trill_inst_t *tip = tsock->ts_tip;
1210
1211 switch (cmd) {
1212 case TRILL_DESIGVLAN: {
1213 uint16_t desigvlan;
1214
1215 if (ddi_copyin(arg, &desigvlan, sizeof (desigvlan), mode) != 0)
1216 return (EFAULT);
1217 tsock->ts_desigvlan = desigvlan;
1218 break;
1219 }
1220 case TRILL_VLANFWDER: {
1221 uint8_t vlans[TRILL_VLANS_ARRSIZE];
1222
1223 if (tsock->ts_link == NULL)
1224 return (EINVAL);
1225 if ((ddi_copyin(arg, vlans, sizeof (vlans), mode)) != 0)
1226 return (EFAULT);
1227 bridge_trill_setvlans(tsock->ts_link, vlans);
1228 break;
1229 }
1230 case TRILL_SETNICK:
1231 if (tip == NULL)
1232 return (EINVAL);
1233 error = trill_add_nick(tip, arg, B_TRUE, mode);
1234 break;
1235
1236 case TRILL_GETNICK:
1237 if (tip == NULL)
1238 return (EINVAL);
1239 rw_enter(&tip->ti_rwlock, RW_READER);
1240 if (ddi_copyout(&tip->ti_nick, arg, sizeof (tip->ti_nick),
1241 mode) != 0)
1242 error = EFAULT;
1243 rw_exit(&tip->ti_rwlock);
1244 break;
1245
1246 case TRILL_ADDNICK:
1247 if (tip == NULL)
1248 break;
1249 error = trill_add_nick(tip, arg, B_FALSE, mode);
1250 break;
1251
1252 case TRILL_DELNICK: {
1253 uint16_t delnick;
1254
1255 if (tip == NULL)
1256 break;
1257 if (ddi_copyin(arg, &delnick, sizeof (delnick), mode) != 0)
1258 return (EFAULT);
1259 error = trill_del_nick(tip, delnick, B_FALSE);
1260 break;
1261 }
1262 case TRILL_DELALL:
1263 if (tip == NULL)
1264 break;
1265 trill_del_all(tip, B_FALSE);
1266 break;
1267
1268 case TRILL_TREEROOT: {
1269 uint16_t treeroot;
1270
1271 if (tip == NULL)
1272 break;
1273 if (ddi_copyin(arg, &treeroot, sizeof (treeroot), mode) != 0)
1274 return (EFAULT);
1275 if (!VALID_NICK(treeroot))
1276 return (EINVAL);
1277 rw_enter(&tip->ti_rwlock, RW_WRITER);
1278 tip->ti_treeroot = treeroot;
1279 rw_exit(&tip->ti_rwlock);
1280 break;
1281 }
1282 case TRILL_HWADDR:
1283 if (tsock->ts_link == NULL)
1284 break;
1285 if (ddi_copyout(tsock->ts_link->bl_local_mac, arg, ETHERADDRL,
1286 mode) != 0)
1287 return (EFAULT);
1288 break;
1289
1290 case TRILL_NEWBRIDGE: {
1291 char bname[MAXLINKNAMELEN];
1292
1293 if (tsock->ts_state != TS_UNBND)
1294 return (ENOTSUP);
1295 /* ts_tip can only be set once */
1296 if (tip != NULL)
1297 return (EEXIST);
1298 if (ddi_copyin(arg, bname, sizeof (bname), mode) != 0)
1299 return (EFAULT);
1300 bname[MAXLINKNAMELEN-1] = '\0';
1301 error = trill_find_bridge(tsock, bname, B_TRUE);
1302 break;
1303 }
1304
1305 case TRILL_GETBRIDGE: {
1306 char bname[MAXLINKNAMELEN];
1307
1308 /* ts_tip can only be set once */
1309 if (tip != NULL)
1310 return (EEXIST);
1311 if (ddi_copyin(arg, bname, sizeof (bname), mode) != 0)
1312 return (EFAULT);
1313 bname[MAXLINKNAMELEN - 1] = '\0';
1314 error = trill_find_bridge(tsock, bname, B_FALSE);
1315 break;
1316 }
1317
1318 case TRILL_LISTNICK: {
1319 trill_listnick_t tln;
1320 trill_node_t *tnp;
1321 trill_nickinfo_t *tnip;
1322 uint16_t nick;
1323
1324 if (tip == NULL)
1325 return (EINVAL);
1326 if (ddi_copyin(arg, &tln, sizeof (tln), mode) != 0)
1327 return (EFAULT);
1328 nick = tln.tln_nick;
1329 if (nick >= RBRIDGE_NICKNAME_MAX) {
1330 error = EINVAL;
1331 break;
1332 }
1333 rw_enter(&tip->ti_rwlock, RW_READER);
1334 while (++nick < RBRIDGE_NICKNAME_MAX) {
1335 if ((tnp = tip->ti_nodes[nick]) != NULL) {
1336 tnip = tnp->tn_ni;
1337 ASSERT(nick == tnip->tni_nick);
1338 tln.tln_nick = nick;
1339 bcopy(tnip->tni_adjsnpa, tln.tln_nexthop,
1340 ETHERADDRL);
1341 tln.tln_ours = nick == tip->ti_nick;
1342 if (tln.tln_ours || tnp->tn_tsp == NULL) {
1343 tln.tln_linkid =
1344 DATALINK_INVALID_LINKID;
1345 } else {
1346 tln.tln_linkid =
1347 tnp->tn_tsp->ts_link->bl_linkid;
1348 }
1349 break;
1350 }
1351 }
1352 rw_exit(&tip->ti_rwlock);
1353 if (nick >= RBRIDGE_NICKNAME_MAX)
1354 bzero(&tln, sizeof (tln));
1355 if (ddi_copyout(&tln, arg, sizeof (tln), mode) != 0)
1356 return (EFAULT);
1357 break;
1358 }
1359
1360 /*
1361 * Port flush: this is used when we lose AF on a port. We must discard
1362 * all regular bridge forwarding entries on this port with the
1363 * indicated VLAN.
1364 */
1365 case TRILL_PORTFLUSH: {
1366 uint16_t vlan = (uint16_t)(uintptr_t)arg;
1367
1368 if (tsock->ts_link == NULL)
1369 return (EINVAL);
1370 bridge_trill_flush(tsock->ts_link, vlan, B_FALSE);
1371 break;
1372 }
1373
1374 /*
1375 * Nick flush: this is used when we lose AF on a port. We must discard
1376 * all bridge TRILL forwarding entries on this port with the indicated
1377 * VLAN.
1378 */
1379 case TRILL_NICKFLUSH: {
1380 uint16_t vlan = (uint16_t)(uintptr_t)arg;
1381
1382 if (tsock->ts_link == NULL)
1383 return (EINVAL);
1384 bridge_trill_flush(tsock->ts_link, vlan, B_TRUE);
1385 break;
1386 }
1387
1388 case TRILL_GETMTU:
1389 if (tsock->ts_link == NULL)
1390 break;
1391 if (ddi_copyout(&tsock->ts_link->bl_maxsdu, arg,
1392 sizeof (uint_t), mode) != 0)
1393 return (EFAULT);
1394 break;
1395
1396 default:
1397 error = ENOTSUP;
1398 break;
1399 }
1400
1401 return (error);
1402 }
1403
1404 /*
1405 * Sends received packet back upstream on the TRILL socket.
1406 * Consumes passed mblk_t.
1407 */
1408 static void
1409 trill_ctrl_input(trill_sock_t *tsock, mblk_t *mp, const uint8_t *saddr,
1410 uint16_t tci)
1411 {
1412 int udi_size;
1413 mblk_t *mp1;
1414 struct T_unitdata_ind *tudi;
1415 struct sockaddr_dl *sdl;
1416 char *lladdr;
1417 int error;
1418
1419 ASSERT(!(tsock->ts_flags & TSF_SHUTDOWN));
1420 if (tsock->ts_flow_ctrld) {
1421 freemsg(mp);
1422 KSPINCR(tks_drops);
1423 return;
1424 }
1425
1426 udi_size = sizeof (struct T_unitdata_ind) +
1427 sizeof (struct sockaddr_dl);
1428 mp1 = allocb(udi_size, BPRI_MED);
1429 if (mp1 == NULL) {
1430 freemsg(mp);
1431 KSPINCR(tks_drops);
1432 return;
1433 }
1434
1435 mp1->b_cont = mp;
1436 mp = mp1;
1437 mp->b_datap->db_type = M_PROTO;
1438 /* LINTED: alignment */
1439 tudi = (struct T_unitdata_ind *)mp->b_rptr;
1440 mp->b_wptr = (uchar_t *)tudi + udi_size;
1441
1442 tudi->PRIM_type = T_UNITDATA_IND;
1443 tudi->SRC_length = sizeof (struct sockaddr_dl);
1444 tudi->SRC_offset = sizeof (struct T_unitdata_ind);
1445 tudi->OPT_length = 0;
1446 tudi->OPT_offset = sizeof (struct T_unitdata_ind) +
1447 sizeof (struct sockaddr_dl);
1448
1449 /* Information of the link on which packet was received. */
1450 sdl = (struct sockaddr_dl *)&tudi[1];
1451 (void) memset(sdl, 0, sizeof (struct sockaddr_dl));
1452 sdl->sdl_family = AF_TRILL;
1453
1454 /* LINTED: alignment */
1455 *(datalink_id_t *)sdl->sdl_data = tsock->ts_link->bl_linkid;
1456 sdl->sdl_nlen = sizeof (tsock->ts_link->bl_linkid);
1457
1458 lladdr = LLADDR(sdl);
1459 (void) memcpy(lladdr, saddr, ETHERADDRL);
1460 lladdr += ETHERADDRL;
1461 sdl->sdl_alen = ETHERADDRL;
1462
1463 /* LINTED: alignment */
1464 *(uint16_t *)lladdr = tci;
1465 sdl->sdl_slen = sizeof (uint16_t);
1466
1467 DTRACE_PROBE2(trill__ctrl__input, trill_sock_t *, tsock, mblk_t *, mp);
1468 (*tsock->ts_conn_upcalls->su_recv)(tsock->ts_conn_upper_handle,
1469 mp, msgdsize(mp), 0, &error, NULL);
1470
1471 if (error == ENOSPC) {
1472 mutex_enter(&tsock->ts_socklock);
1473 (*tsock->ts_conn_upcalls->su_recv)(tsock->ts_conn_upper_handle,
1474 NULL, 0, 0, &error, NULL);
1475 if (error == ENOSPC)
1476 tsock->ts_flow_ctrld = B_TRUE;
1477 mutex_exit(&tsock->ts_socklock);
1478 KSPINCR(tks_drops);
1479 } else if (error != 0) {
1480 KSPINCR(tks_drops);
1481 } else {
1482 KSPINCR(tks_recv);
1483 }
1484
1485 DTRACE_PROBE2(trill__ctrl__input__done, trill_sock_t *,
1486 tsock, int, error);
1487 }
1488
1489 /* ARGSUSED */
1490 static void
1491 trill_activate(sock_lower_handle_t proto_handle,
1492 sock_upper_handle_t sock_handle, sock_upcalls_t *sock_upcalls,
1493 int flags, cred_t *cr)
1494 {
1495 trill_sock_t *tsock = (trill_sock_t *)proto_handle;
1496 struct sock_proto_props sopp;
1497
1498 tsock->ts_conn_upcalls = sock_upcalls;
1499 tsock->ts_conn_upper_handle = sock_handle;
1500
1501 sopp.sopp_flags = SOCKOPT_WROFF | SOCKOPT_RCVHIWAT |
1502 SOCKOPT_RCVLOWAT | SOCKOPT_MAXADDRLEN | SOCKOPT_MAXPSZ |
1503 SOCKOPT_MAXBLK | SOCKOPT_MINPSZ;
1504 sopp.sopp_wroff = 0;
1505 sopp.sopp_rxhiwat = SOCKET_RECVHIWATER;
1506 sopp.sopp_rxlowat = SOCKET_RECVLOWATER;
1507 sopp.sopp_maxaddrlen = sizeof (struct sockaddr_dl);
1508 sopp.sopp_maxpsz = INFPSZ;
1509 sopp.sopp_maxblk = INFPSZ;
1510 sopp.sopp_minpsz = 0;
1511 (*tsock->ts_conn_upcalls->su_set_proto_props)(
1512 tsock->ts_conn_upper_handle, &sopp);
1513 }
1514
1515 /* ARGSUSED */
1516 static int
1517 trill_close(sock_lower_handle_t proto_handle, int flags, cred_t *cr)
1518 {
1519 trill_sock_t *tsock = (trill_sock_t *)proto_handle;
1520
1521 trill_do_close(tsock);
1522 return (0);
1523 }
1524
1525 /* ARGSUSED */
1526 static int
1527 trill_bind(sock_lower_handle_t proto_handle, struct sockaddr *sa,
1528 socklen_t len, cred_t *cr)
1529 {
1530 int error;
1531 trill_sock_t *tsock = (trill_sock_t *)proto_handle;
1532
1533 if (sa == NULL)
1534 error = trill_do_unbind(tsock);
1535 else
1536 error = trill_start_recv(tsock, sa, len);
1537
1538 return (error);
1539 }
1540
1541 /* ARGSUSED */
1542 static int
1543 trill_send(sock_lower_handle_t proto_handle, mblk_t *mp, struct nmsghdr *msg,
1544 cred_t *cr)
1545 {
1546 trill_sock_t *tsock = (trill_sock_t *)proto_handle;
1547 struct sockaddr_dl *laddr;
1548 uint16_t tci;
1549
1550 ASSERT(DB_TYPE(mp) == M_DATA);
1551 ASSERT(!(tsock->ts_flags & TSF_SHUTDOWN));
1552
1553 if (msg->msg_name == NULL || msg->msg_namelen != sizeof (*laddr))
1554 goto eproto;
1555
1556 /*
1557 * The name is a datalink_id_t, the address is an Ethernet address, and
1558 * the selector value is the VLAN ID.
1559 */
1560 laddr = (struct sockaddr_dl *)msg->msg_name;
1561 if (laddr->sdl_nlen != sizeof (datalink_id_t) ||
1562 laddr->sdl_alen != ETHERADDRL ||
1563 (laddr->sdl_slen != sizeof (tci) && laddr->sdl_slen != 0))
1564 goto eproto;
1565
1566 mutex_enter(&tsock->ts_socklock);
1567 if (tsock->ts_state != TS_IDLE || tsock->ts_link == NULL) {
1568 mutex_exit(&tsock->ts_socklock);
1569 goto eproto;
1570 }
1571 atomic_inc_uint(&tsock->ts_sockthreadcount);
1572 mutex_exit(&tsock->ts_socklock);
1573
1574 /*
1575 * Safe to dereference VLAN now, as we've checked the user's specified
1576 * values, and alignment is now guaranteed.
1577 */
1578 if (laddr->sdl_slen == 0) {
1579 tci = TRILL_NO_TCI;
1580 } else {
1581 /* LINTED: alignment */
1582 tci = *(uint16_t *)(LLADDR(laddr) + ETHERADDRL);
1583 }
1584
1585 mp = create_trill_header(tsock, mp, (const uchar_t *)LLADDR(laddr),
1586 B_TRUE, B_FALSE, tci, msgdsize(mp));
1587 if (mp != NULL) {
1588 mp = bridge_trill_output(tsock->ts_link, mp);
1589 if (mp == NULL) {
1590 KSPINCR(tks_sent);
1591 } else {
1592 freemsg(mp);
1593 KSPINCR(tks_drops);
1594 }
1595 }
1596
1597 /* Wake up any threads blocking on us */
1598 if (atomic_dec_uint_nv(&tsock->ts_sockthreadcount) == 0)
1599 cv_broadcast(&tsock->ts_sockthreadwait);
1600 return (0);
1601
1602 eproto:
1603 freemsg(mp);
1604 KSPINCR(tks_drops);
1605 return (EPROTO);
1606 }
1607
1608 /* ARGSUSED */
1609 static int
1610 trill_ioctl(sock_lower_handle_t proto_handle, int cmd, intptr_t arg,
1611 int mode, int32_t *rvalp, cred_t *cr)
1612 {
1613 trill_sock_t *tsock = (trill_sock_t *)proto_handle;
1614 int rc;
1615
1616 switch (cmd) {
1617 /* List of unprivileged TRILL ioctls */
1618 case TRILL_GETNICK:
1619 case TRILL_GETBRIDGE:
1620 case TRILL_LISTNICK:
1621 break;
1622 default:
1623 if (secpolicy_dl_config(cr) != 0)
1624 return (EPERM);
1625 break;
1626 }
1627
1628 /* Lock ensures socket state is unchanged during ioctl handling */
1629 mutex_enter(&tsock->ts_socklock);
1630 rc = trill_do_ioctl(tsock, cmd, (void *)arg, mode);
1631 mutex_exit(&tsock->ts_socklock);
1632 return (rc);
1633 }
1634
1635 static void
1636 trill_clr_flowctrl(sock_lower_handle_t proto_handle)
1637 {
1638 trill_sock_t *tsock = (trill_sock_t *)proto_handle;
1639
1640 mutex_enter(&tsock->ts_socklock);
1641 tsock->ts_flow_ctrld = B_FALSE;
1642 mutex_exit(&tsock->ts_socklock);
1643 }
1644
1645 static sock_downcalls_t sock_trill_downcalls = {
1646 trill_activate, /* sd_activate */
1647 sock_accept_notsupp, /* sd_accept */
1648 trill_bind, /* sd_bind */
1649 sock_listen_notsupp, /* sd_listen */
1650 sock_connect_notsupp, /* sd_connect */
1651 sock_getpeername_notsupp, /* sd_getpeername */
1652 sock_getsockname_notsupp, /* sd_getsockname */
1653 sock_getsockopt_notsupp, /* sd_getsockopt */
1654 sock_setsockopt_notsupp, /* sd_setsockopt */
1655 trill_send, /* sd_send */
1656 NULL, /* sd_send_uio */
1657 NULL, /* sd_recv_uio */
1658 NULL, /* sd_poll */
1659 sock_shutdown_notsupp, /* sd_shutdown */
1660 trill_clr_flowctrl, /* sd_setflowctrl */
1661 trill_ioctl, /* sd_ioctl */
1662 trill_close /* sd_close */
1663 };
1664
1665 /* ARGSUSED */
1666 static sock_lower_handle_t
1667 trill_create(int family, int type, int proto, sock_downcalls_t **sock_downcalls,
1668 uint_t *smodep, int *errorp, int flags, cred_t *credp)
1669 {
1670 trill_sock_t *tsock;
1671
1672 if (family != AF_TRILL || type != SOCK_DGRAM || proto != 0) {
1673 *errorp = EPROTONOSUPPORT;
1674 return (NULL);
1675 }
1676
1677 *sock_downcalls = &sock_trill_downcalls;
1678 *smodep = SM_ATOMIC;
1679 tsock = trill_do_open(flags);
1680 *errorp = (tsock != NULL) ? 0:ENOMEM;
1681 return ((sock_lower_handle_t)tsock);
1682 }