1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
23 */
24
25 #include <sys/types.h>
26 #include <sys/ddi.h>
27 #include <sys/sunddi.h>
28 #include <sys/strsubr.h>
29 #include <sys/socket.h>
30 #include <net/if_arp.h>
31 #include <net/if_types.h>
32 #include <sys/sockio.h>
33 #include <sys/pathname.h>
34
35 #include <sys/ib/mgt/ibcm/ibcm_arp.h>
36
37 #include <sys/kstr.h>
38 #include <sys/t_kuser.h>
39
40 #include <sys/dls.h>
41
42 extern char cmlog[];
43
44 extern int ibcm_resolver_pr_lookup(ibcm_arp_streams_t *ib_s,
45 ibt_ip_addr_t *dst_addr, ibt_ip_addr_t *src_addr, zoneid_t myzoneid);
46 extern void ibcm_arp_delete_prwqn(ibcm_arp_prwqn_t *wqnp);
47
48 int ibcm_printip = 0;
49
50 /*
51 * Function:
52 * ibcm_ip_print
53 * Input:
54 * label Arbitrary qualifying string
55 * ipa Pointer to IP Address to print
56 */
57 void
58 ibcm_ip_print(char *label, ibt_ip_addr_t *ipaddr)
59 {
60 char buf[INET6_ADDRSTRLEN];
61
62 if (ipaddr->family == AF_INET) {
63 IBTF_DPRINTF_L2(cmlog, "%s: %s", label,
64 inet_ntop(AF_INET, &ipaddr->un.ip4addr, buf, sizeof (buf)));
65 } else if (ipaddr->family == AF_INET6) {
66 IBTF_DPRINTF_L2(cmlog, "%s: %s", label, inet_ntop(AF_INET6,
67 &ipaddr->un.ip6addr, buf, sizeof (buf)));
68 } else {
69 IBTF_DPRINTF_L2(cmlog, "%s: IP ADDR NOT SPECIFIED ", label);
70 }
71 }
72
73
74 ibt_status_t
75 ibcm_arp_get_ibaddr(zoneid_t myzoneid, ibt_ip_addr_t srcaddr,
76 ibt_ip_addr_t destaddr, ib_gid_t *sgid, ib_gid_t *dgid,
77 ibt_ip_addr_t *saddrp)
78 {
79 ibcm_arp_streams_t *ib_s;
80 ibcm_arp_prwqn_t *wqnp;
81 int ret = 0;
82 int len;
83
84 IBTF_DPRINTF_L4(cmlog, "ibcm_arp_get_ibaddr(%d, %p, %p, %p, %p, %p)",
85 myzoneid, srcaddr, destaddr, sgid, dgid, saddrp);
86
87 ib_s = (ibcm_arp_streams_t *)kmem_zalloc(sizeof (ibcm_arp_streams_t),
88 KM_SLEEP);
89
90 mutex_init(&ib_s->lock, NULL, MUTEX_DEFAULT, NULL);
91 cv_init(&ib_s->cv, NULL, CV_DRIVER, NULL);
92
93 mutex_enter(&ib_s->lock);
94 ib_s->done = B_FALSE;
95 mutex_exit(&ib_s->lock);
96
97 ret = ibcm_resolver_pr_lookup(ib_s, &destaddr, &srcaddr, myzoneid);
98
99 IBTF_DPRINTF_L3(cmlog, "ibcm_arp_get_ibaddr: ibcm_resolver_pr_lookup "
100 "returned: %d", ret);
101 if (ret == 0) {
102 mutex_enter(&ib_s->lock);
103 while (ib_s->done != B_TRUE)
104 cv_wait(&ib_s->cv, &ib_s->lock);
105 mutex_exit(&ib_s->lock);
106 }
107
108 mutex_enter(&ib_s->lock);
109 wqnp = ib_s->wqnp;
110 if (ib_s->status == 0) {
111 if (sgid)
112 *sgid = wqnp->sgid;
113 if (dgid)
114 *dgid = wqnp->dgid;
115 /*
116 * If the user supplied a address, then verify we got
117 * for the same address.
118 */
119 if (wqnp->usrc_addr.family && sgid) {
120 len = (wqnp->usrc_addr.family == AF_INET) ?
121 IP_ADDR_LEN : sizeof (in6_addr_t);
122 if (bcmp(&wqnp->usrc_addr.un,
123 &wqnp->src_addr.un, len)) {
124 IBTF_DPRINTF_L3(cmlog, "ibcm_arp_get_ibaddr: "
125 "srcaddr mismatch");
126
127 /* Clean-up old data, and reset the done flag */
128 ibcm_arp_delete_prwqn(wqnp);
129 ib_s->done = B_FALSE;
130 mutex_exit(&ib_s->lock);
131
132 ret = ibcm_resolver_pr_lookup(ib_s, &srcaddr,
133 &srcaddr, myzoneid);
134 if (ret == 0) {
135 mutex_enter(&ib_s->lock);
136 while (ib_s->done != B_TRUE)
137 cv_wait(&ib_s->cv, &ib_s->lock);
138 mutex_exit(&ib_s->lock);
139 }
140 mutex_enter(&ib_s->lock);
141 wqnp = ib_s->wqnp;
142 if (ib_s->status == 0) {
143 if (sgid)
144 *sgid = wqnp->dgid;
145
146 if (saddrp)
147 bcopy(&wqnp->src_addr, saddrp,
148 sizeof (ibt_ip_addr_t));
149
150 IBTF_DPRINTF_L4(cmlog,
151 "ibcm_arp_get_ibaddr: "
152 "SGID: %llX:%llX DGID: %llX:%llX",
153 sgid->gid_prefix, sgid->gid_guid,
154 dgid->gid_prefix, dgid->gid_guid);
155
156 ibcm_arp_delete_prwqn(wqnp);
157 } else if (ret == 0) {
158 if (wqnp)
159 kmem_free(wqnp,
160 sizeof (ibcm_arp_prwqn_t));
161 }
162 goto arp_ibaddr_done;
163 }
164 }
165
166 if (saddrp)
167 bcopy(&wqnp->src_addr, saddrp, sizeof (ibt_ip_addr_t));
168
169 IBTF_DPRINTF_L4(cmlog, "ibcm_arp_get_ibaddr: SGID: %llX:%llX"
170 " DGID: %llX:%llX", sgid->gid_prefix, sgid->gid_guid,
171 dgid->gid_prefix, dgid->gid_guid);
172
173 ibcm_arp_delete_prwqn(wqnp);
174 } else if (ret == 0) {
175 /*
176 * We come here only when lookup has returned empty (failed)
177 * via callback routine.
178 * i.e. ib_s->status is non-zero, while ret is zero.
179 */
180 if (wqnp)
181 kmem_free(wqnp, sizeof (ibcm_arp_prwqn_t));
182 }
183 arp_ibaddr_done:
184 ret = ib_s->status;
185 mutex_exit(&ib_s->lock);
186
187 arp_ibaddr_error:
188
189 mutex_destroy(&ib_s->lock);
190 cv_destroy(&ib_s->cv);
191 kmem_free(ib_s, sizeof (ibcm_arp_streams_t));
192
193 if (ret)
194 return (IBT_FAILURE);
195 else
196 return (IBT_SUCCESS);
197 }
198
199 void
200 ibcm_arp_free_ibds(ibcm_arp_ibd_insts_t *ibds)
201 {
202 if (ibds->ibcm_arp_ip) {
203 kmem_free(ibds->ibcm_arp_ip, ibds->ibcm_arp_ibd_alloc *
204 sizeof (ibcm_arp_ip_t));
205 ibds->ibcm_arp_ibd_alloc = 0;
206 ibds->ibcm_arp_ibd_cnt = 0;
207 ibds->ibcm_arp_ip = NULL;
208 }
209 }
210
211 static void
212 ibcm_arp_get_ibd_insts(ibcm_arp_ibd_insts_t *ibds)
213 {
214 ibcm_arp_ip_t *ipp;
215 ib_gid_t port_gid;
216 ibt_part_attr_t *attr_list, *attr;
217 int nparts;
218
219 if ((ibt_get_all_part_attr(&attr_list, &nparts) != IBT_SUCCESS) ||
220 (nparts == 0)) {
221 IBTF_DPRINTF_L2(cmlog, "ibcm_arp_get_ibd_insts: Failed to "
222 "IB Part List - %d", nparts);
223 ibds->ibcm_arp_ibd_alloc = 0;
224 ibds->ibcm_arp_ibd_cnt = 0;
225 ibds->ibcm_arp_ip = NULL;
226 return;
227 }
228 IBTF_DPRINTF_L4(cmlog, "ibcm_arp_get_ibd_insts: Found %d IB Part List",
229 nparts);
230
231 ibds->ibcm_arp_ibd_alloc = nparts;
232 ibds->ibcm_arp_ibd_cnt = 0;
233 ibds->ibcm_arp_ip = (ibcm_arp_ip_t *)kmem_zalloc(
234 nparts * sizeof (ibcm_arp_ip_t), KM_SLEEP);
235
236 attr = attr_list;
237 while (nparts--) {
238 if (ibt_get_port_state_byguid(attr->pa_hca_guid,
239 attr->pa_port, &port_gid, NULL) == IBT_SUCCESS) {
240
241 ipp = &ibds->ibcm_arp_ip[ibds->ibcm_arp_ibd_cnt];
242 ipp->ip_linkid = attr->pa_plinkid;
243 ipp->ip_pkey = attr->pa_pkey;
244 ipp->ip_hca_guid = attr->pa_hca_guid;
245 ipp->ip_port_gid = port_gid;
246 ibds->ibcm_arp_ibd_cnt++;
247
248 IBTF_DPRINTF_L4(cmlog, "PartAttr: p-linkid %lX, "
249 "d-linkid %lX, pkey 0x%lX", ipp->ip_linkid,
250 attr->pa_dlinkid, ipp->ip_pkey);
251 IBTF_DPRINTF_L4(cmlog, "hca_guid 0x%llX, "
252 "port_gid %llX \n attr-port_guid %llX",
253 ipp->ip_hca_guid, ipp->ip_port_gid.gid_guid,
254 attr->pa_port_guid);
255 }
256 attr++;
257 }
258
259 (void) ibt_free_part_attr(attr_list, ibds->ibcm_arp_ibd_alloc);
260 }
261
262 /*
263 * Issue an ioctl down to IP. There are several similar versions of this
264 * function (e.g., rpcib_do_ip_ioctl()); clearly a utility routine is needed.
265 */
266 static int
267 ibcm_do_ip_ioctl(int cmd, int len, void *arg)
268 {
269 vnode_t *kkvp;
270 TIUSER *tiptr;
271 struct strioctl iocb;
272 int err = 0;
273
274 if (lookupname("/dev/udp", UIO_SYSSPACE, FOLLOW, NULLVPP, &kkvp) != 0)
275 return (EPROTO);
276
277 if (t_kopen(NULL, kkvp->v_rdev, FREAD|FWRITE, &tiptr, CRED()) != 0) {
278 VN_RELE(kkvp);
279 return (EPROTO);
280 }
281
282 iocb.ic_cmd = cmd;
283 iocb.ic_timout = 0;
284 iocb.ic_len = len;
285 iocb.ic_dp = (caddr_t)arg;
286 err = kstr_ioctl(tiptr->fp->f_vnode, I_STR, (intptr_t)&iocb);
287 (void) t_kclose(tiptr, 0);
288 VN_RELE(kkvp);
289 return (err);
290 }
291
292 /*
293 * Issue an SIOCGLIFCONF down to IP and return the result in `lifcp'.
294 * lifcp->lifc_buf is dynamically allocated to be *bufsizep bytes.
295 */
296 static int
297 ibcm_do_lifconf(struct lifconf *lifcp, uint_t *bufsizep, sa_family_t family_loc)
298 {
299 int err;
300 struct lifnum lifn;
301
302 bzero(&lifn, sizeof (struct lifnum));
303 lifn.lifn_family = family_loc;
304 lifn.lifn_flags = LIFC_NOXMIT | LIFC_TEMPORARY | LIFC_ALLZONES;
305
306 err = ibcm_do_ip_ioctl(SIOCGLIFNUM, sizeof (struct lifnum), &lifn);
307 if (err != 0)
308 return (err);
309
310 IBTF_DPRINTF_L3(cmlog, "ibcm_do_lifconf: Family %d, lifn_count %d",
311 family_loc, lifn.lifn_count);
312 /*
313 * Pad the interface count to account for additional interfaces that
314 * may have been configured between the SIOCGLIFNUM and SIOCGLIFCONF.
315 */
316 lifn.lifn_count += 4;
317
318 bzero(lifcp, sizeof (struct lifconf));
319 lifcp->lifc_family = family_loc;
320 lifcp->lifc_len = *bufsizep = lifn.lifn_count * sizeof (struct lifreq);
321 lifcp->lifc_buf = kmem_zalloc(*bufsizep, KM_SLEEP);
322 lifcp->lifc_flags = LIFC_NOXMIT | LIFC_TEMPORARY | LIFC_ALLZONES;
323
324 err = ibcm_do_ip_ioctl(SIOCGLIFCONF, sizeof (struct lifconf), lifcp);
325 if (err != 0) {
326 kmem_free(lifcp->lifc_buf, *bufsizep);
327 return (err);
328 }
329 return (0);
330 }
331
332 static ibcm_arp_ip_t *
333 ibcm_arp_lookup(ibcm_arp_ibd_insts_t *ibds, char *linkname)
334 {
335 datalink_id_t linkid;
336 int i;
337
338 IBTF_DPRINTF_L4(cmlog, "ibcm_arp_lookup: linkname = %s", linkname);
339
340 /*
341 * If at first we don't succeed, try again, just in case it is in
342 * hiding. The first call requires the datalink management daemon
343 * (the authorative source of information about name to id mapping)
344 * to be present and answering upcalls, the second does not.
345 */
346 if (dls_mgmt_get_linkid(linkname, &linkid) != 0) {
347 if (dls_devnet_macname2linkid(linkname, &linkid) != 0) {
348 IBTF_DPRINTF_L2(cmlog, "ibcm_arp_lookup: could not "
349 "get linkid from linkname (%s)", linkname);
350 return (NULL);
351 }
352 }
353
354 for (i = 0; i < ibds->ibcm_arp_ibd_cnt; i++) {
355 if (ibds->ibcm_arp_ip[i].ip_linkid == linkid)
356 return (&ibds->ibcm_arp_ip[i]);
357 }
358
359 IBTF_DPRINTF_L2(cmlog, "ibcm_arp_lookup: returning NULL for "
360 "linkname (%s)", linkname);
361 return (NULL);
362 }
363
364 /*
365 * Fill in `ibds' with IP addresses tied to IFT_IB IP interfaces. Returns
366 * B_TRUE if at least one address was filled in.
367 */
368 static boolean_t
369 ibcm_arp_get_ibd_ipaddr(ibcm_arp_ibd_insts_t *ibds, sa_family_t family_loc)
370 {
371 int i, nifs, naddr = 0;
372 uint_t bufsize;
373 struct lifconf lifc;
374 struct lifreq *lifrp, lifr_copy;
375 ibcm_arp_ip_t *ipp;
376 lifgroupinfo_t lifgr;
377 int err;
378 char ifname[LIFNAMSIZ + 1];
379 uint64_t ifflags = 0;
380 zoneid_t ifzoneid;
381
382 if (ibcm_do_lifconf(&lifc, &bufsize, family_loc) != 0)
383 return (B_FALSE);
384
385 nifs = lifc.lifc_len / sizeof (struct lifreq);
386
387 IBTF_DPRINTF_L4(cmlog, "ibcm_arp_get_ibd_ipaddr: Family %d, nifs %d",
388 family_loc, nifs);
389
390 for (lifrp = lifc.lifc_req, i = 0; i < nifs; i++, lifrp++) {
391
392 if (lifrp->lifr_type != IFT_IB)
393 continue;
394
395 IBTF_DPRINTF_L4(cmlog, "\nInterface# : %d", i);
396 IBTF_DPRINTF_L4(cmlog, "lifr_name : %s, lifr_family :%X, "
397 "lifr_type : 0x%lX", lifrp->lifr_name,
398 lifrp->lifr_addr.ss_family, lifrp->lifr_type);
399
400 (void) strlcpy(ifname, lifrp->lifr_name, LIFNAMSIZ);
401
402 /* Get ZoneId. */
403 lifr_copy = *lifrp;
404 ifzoneid = 0;
405 err = ibcm_do_ip_ioctl(SIOCGLIFZONE, sizeof (struct lifreq),
406 &lifr_copy);
407 if (err != 0) {
408 IBTF_DPRINTF_L2(cmlog, "IFZONE ioctl Failed: err = %d",
409 err);
410 } else {
411 IBTF_DPRINTF_L4(cmlog, "lifr_zoneid : 0x%X",
412 lifr_copy.lifr_zoneid);
413 ifzoneid = lifr_copy.lifr_zoneid;
414 }
415
416 /* Get IfIndex. */
417 lifr_copy = *lifrp;
418 err = ibcm_do_ip_ioctl(SIOCGLIFINDEX, sizeof (struct lifreq),
419 &lifr_copy);
420 if (err != 0) {
421 IBTF_DPRINTF_L2(cmlog, "IFINDEX ioctl Failed: err = %d",
422 err);
423 } else
424 IBTF_DPRINTF_L4(cmlog, "lifr_index : 0x%X",
425 lifr_copy.lifr_index);
426
427 /* Get Interface flags. */
428 lifr_copy = *lifrp;
429 err = ibcm_do_ip_ioctl(SIOCGLIFFLAGS, sizeof (struct lifreq),
430 &lifr_copy);
431 if (err != 0) {
432 IBTF_DPRINTF_L2(cmlog, "IFFLAGS ioctl Failed: err = %d",
433 err);
434 } else {
435 ifflags = lifr_copy.lifr_flags;
436 IBTF_DPRINTF_L4(cmlog, "lifr_flags : 0x%llX",
437 ifflags);
438 }
439
440 lifr_copy = *lifrp;
441 err = ibcm_do_ip_ioctl(SIOCGLIFGROUPNAME,
442 sizeof (struct lifreq), &lifr_copy);
443 if (err != 0) {
444 IBTF_DPRINTF_L3(cmlog, "IFGroupName ioctl Failed: "
445 "err = %d", err);
446 }
447
448 if (lifr_copy.lifr_groupname[0] != '\0') {
449 IBTF_DPRINTF_L4(cmlog, "lifr_groupname : %s",
450 lifr_copy.lifr_groupname);
451 (void) strlcpy(lifgr.gi_grname,
452 lifr_copy.lifr_groupname, LIFGRNAMSIZ);
453 err = ibcm_do_ip_ioctl(SIOCGLIFGROUPINFO,
454 sizeof (struct lifgroupinfo), &lifgr);
455 if (err != 0) {
456 IBTF_DPRINTF_L2(cmlog, "IFGroupINFO ioctl "
457 "Failed: err = %d", err);
458 } else {
459 IBTF_DPRINTF_L4(cmlog, "lifgroupinfo details");
460 IBTF_DPRINTF_L4(cmlog, "grname : %s, grifname :"
461 " %s, m4ifname : %s, m6ifname : %s",
462 lifgr.gi_grname, lifgr.gi_grifname,
463 lifgr.gi_m4ifname, lifgr.gi_m6ifname);
464 IBTF_DPRINTF_L4(cmlog, "gi_bcifname : %s",
465 lifgr.gi_bcifname);
466 IBTF_DPRINTF_L4(cmlog, "gi_v4 %d, gi_v6 %d, "
467 "gi_nv4 %d, gi_nv6 %d, gi_mactype %d",
468 lifgr.gi_v4, lifgr.gi_v6, lifgr.gi_nv4,
469 lifgr.gi_nv6, lifgr.gi_mactype);
470
471 (void) strlcpy(ifname, lifgr.gi_bcifname,
472 LIFNAMSIZ);
473 }
474 }
475
476 if ((ipp = ibcm_arp_lookup(ibds, ifname)) == NULL)
477 continue;
478
479 ipp->ip_zoneid = ifzoneid; /* Copy back the zoneid info */
480 switch (lifrp->lifr_addr.ss_family) {
481 case AF_INET:
482 ipp->ip_inet_family = AF_INET;
483 bcopy(&lifrp->lifr_addr, &ipp->ip_cm_sin,
484 sizeof (struct sockaddr_in));
485 naddr++;
486 break;
487 case AF_INET6:
488 ipp->ip_inet_family = AF_INET6;
489 bcopy(&lifrp->lifr_addr, &ipp->ip_cm_sin6,
490 sizeof (struct sockaddr_in6));
491 naddr++;
492 break;
493 }
494 }
495
496 kmem_free(lifc.lifc_buf, bufsize);
497 return (naddr > 0);
498 }
499
500 ibt_status_t
501 ibcm_arp_get_ibds(ibcm_arp_ibd_insts_t *ibdp, sa_family_t family_loc)
502 {
503 #ifdef DEBUG
504 int i;
505 #endif
506
507 IBTF_DPRINTF_L4(cmlog, "ibcm_arp_get_ibds(%p)", ibdp);
508
509 ibcm_arp_get_ibd_insts(ibdp);
510
511 IBTF_DPRINTF_L3(cmlog, "ibcm_arp_get_ibds: Found %d ibd instances",
512 ibdp->ibcm_arp_ibd_cnt);
513
514 if (ibdp->ibcm_arp_ibd_cnt == 0)
515 return (IBT_SRC_IP_NOT_FOUND);
516
517 /* Get the IP addresses of active ports. */
518 if (!ibcm_arp_get_ibd_ipaddr(ibdp, family_loc)) {
519 IBTF_DPRINTF_L2(cmlog, "ibcm_arp_get_ibds: failed to get "
520 "ibd instance: IBT_SRC_IP_NOT_FOUND");
521 ibcm_arp_free_ibds(ibdp);
522 return (IBT_SRC_IP_NOT_FOUND);
523 }
524
525 #ifdef DEBUG
526 for (i = 0; i < ibdp->ibcm_arp_ibd_cnt; i++) {
527 char my_buf[INET6_ADDRSTRLEN];
528 ibcm_arp_ip_t *aip = &ibdp->ibcm_arp_ip[i];
529
530 IBTF_DPRINTF_L4(cmlog, "ibcm_arp_get_ibds: Linkid %d Family %d "
531 "PKey 0x%lX \n HCAGUID 0x%llX SGID %llX:%llX",
532 aip->ip_linkid, aip->ip_inet_family, aip->ip_pkey,
533 aip->ip_hca_guid, aip->ip_port_gid.gid_prefix,
534 aip->ip_port_gid.gid_guid);
535 if (aip->ip_inet_family == AF_INET) {
536 IBTF_DPRINTF_L4(cmlog, "ibcm_arp_get_ibds: IPV4: %s",
537 inet_ntop(AF_INET, &aip->ip_cm_sin.sin_addr, my_buf,
538 sizeof (my_buf)));
539 } else if (aip->ip_inet_family == AF_INET6) {
540 IBTF_DPRINTF_L4(cmlog, "ibcm_arp_get_ibds: IPV6: %s",
541 inet_ntop(AF_INET6, &aip->ip_cm_sin6.sin6_addr,
542 my_buf, sizeof (my_buf)));
543 } else {
544 IBTF_DPRINTF_L2(cmlog, "ibcm_arp_get_ibds: Unknown "
545 "Family %d", aip->ip_inet_family);
546 }
547 }
548 #endif
549
550 return (IBT_SUCCESS);
551 }