1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 */
25
26 #include <sys/types.h>
27 #include <net/if.h>
28 #include <net/if_types.h>
29 #include <inet/ip.h>
30 #include <inet/ip_ire.h>
31 #include <inet/ip_if.h>
32 #include <sys/ethernet.h>
33 #include <sys/ib/mgt/ibcm/ibcm_arp.h>
34
35 extern char cmlog[];
36
37 static void ibcm_resolver_ack(ip2mac_t *, void *);
38 static int ibcm_nce_lookup(ibcm_arp_prwqn_t *wqnp, ill_t *ill, zoneid_t zid);
39
40 /*
41 * delete a wait queue node from the list.
42 * assumes mutex is acquired
43 */
44 void
45 ibcm_arp_delete_prwqn(ibcm_arp_prwqn_t *wqnp)
46 {
47 ibcm_arp_streams_t *ib_s;
48
49 IBTF_DPRINTF_L4(cmlog, "ibcm_arp_delete_prwqn(%p)", wqnp);
50
51 ib_s = wqnp->ib_str;
52 ib_s->wqnp = NULL;
53 kmem_free(wqnp, sizeof (ibcm_arp_prwqn_t));
54 }
55
56 /*
57 * allocate a wait queue node, and insert it in the list
58 */
59 static ibcm_arp_prwqn_t *
60 ibcm_arp_create_prwqn(ibcm_arp_streams_t *ib_s, ibt_ip_addr_t *dst_addr,
61 ibt_ip_addr_t *src_addr)
62 {
63 ibcm_arp_prwqn_t *wqnp;
64
65 IBTF_DPRINTF_L4(cmlog, "ibcm_arp_create_prwqn(ib_s: 0x%p)", ib_s);
66
67 if (dst_addr == NULL) {
68 return (NULL);
69 }
70 if ((wqnp = kmem_zalloc(sizeof (ibcm_arp_prwqn_t), KM_NOSLEEP)) ==
71 NULL) {
72 return (NULL);
73 }
74 wqnp->dst_addr = *dst_addr;
75
76 if (src_addr) {
77 wqnp->usrc_addr = *src_addr;
78 }
79 wqnp->ib_str = ib_s;
80 wqnp->ifproto = (dst_addr->family == AF_INET) ?
81 ETHERTYPE_IP : ETHERTYPE_IPV6;
82
83 ib_s->wqnp = wqnp;
84
85 IBTF_DPRINTF_L4(cmlog, "ibcm_arp_create_prwqn: Return wqnp: %p", wqnp);
86
87 return (wqnp);
88 }
89
90
91 /*
92 * Check if the interface is loopback or IB.
93 */
94 static int
95 ibcm_arp_check_interface(ill_t *ill)
96 {
97 if (IS_LOOPBACK(ill) || ill->ill_type == IFT_IB)
98 return (0);
99
100 return (ETIMEDOUT);
101 }
102
103 int
104 ibcm_resolver_pr_lookup(ibcm_arp_streams_t *ib_s, ibt_ip_addr_t *dst_addr,
105 ibt_ip_addr_t *src_addr, zoneid_t myzoneid)
106 {
107 ibcm_arp_prwqn_t *wqnp;
108 ire_t *ire = NULL;
109 ipif_t *ipif = NULL;
110 ill_t *ill = NULL;
111 ill_t *hwaddr_ill = NULL;
112 ip_stack_t *ipst;
113 ipaddr_t setsrcv4;
114 in6_addr_t setsrcv6;
115
116 IBCM_PRINT_IP("ibcm_arp_pr_lookup: SRC", src_addr);
117 IBCM_PRINT_IP("ibcm_arp_pr_lookup: DST", dst_addr);
118
119 if ((wqnp = ibcm_arp_create_prwqn(ib_s, dst_addr, src_addr)) == NULL) {
120 IBTF_DPRINTF_L2(cmlog, "ibcm_resolver_pr_lookup: "
121 "ibcm_arp_create_prwqn failed");
122 ib_s->status = ENOMEM;
123 return (1);
124 }
125
126 ipst = netstack_find_by_zoneid(myzoneid)->netstack_ip;
127 if (dst_addr->family == AF_INET) {
128 /*
129 * get an ire for the destination adress.
130 * Note that we can't use MATCH_IRE_ILL since that would
131 * require that the first ill we find have ire_ill set.
132 */
133 setsrcv4 = INADDR_ANY;
134 ire = ire_route_recursive_v4(dst_addr->un.ip4addr, 0, NULL,
135 myzoneid, NULL, MATCH_IRE_DSTONLY, B_TRUE, 0, ipst,
136 &setsrcv4, NULL, NULL);
137
138 ASSERT(ire != NULL);
139 if (ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)) {
140 IBTF_DPRINTF_L2(cmlog, "ibcm_resolver_pr_lookup: "
141 "ire_route_recursive_v4 failed");
142 ib_s->status = EFAULT;
143 goto fail;
144 }
145 ill = ire_nexthop_ill(ire);
146 if (ill == NULL) {
147 IBTF_DPRINTF_L2(cmlog, "ibcm_resolver_pr_lookup: "
148 "ire_nexthop_ill failed");
149 ib_s->status = EFAULT;
150 goto fail;
151 }
152
153 /* Pick a source address */
154 if (ip_select_source_v4(ill, setsrcv4, dst_addr->un.ip4addr,
155 INADDR_ANY, myzoneid, ipst, &wqnp->src_addr.un.ip4addr,
156 NULL, NULL) != 0) {
157 ib_s->status = EADDRNOTAVAIL;
158 goto fail;
159 }
160
161 wqnp->gateway.un.ip4addr = ire->ire_gateway_addr;
162 wqnp->netmask.un.ip4addr = ire->ire_mask;
163 wqnp->src_addr.family = wqnp->gateway.family =
164 wqnp->netmask.family = AF_INET;
165
166 } else if (dst_addr->family == AF_INET6) {
167 /*
168 * get an ire for the destination adress.
169 * Note that we can't use MATCH_IRE_ILL since that would
170 * require that the first ill we find have ire_ill set. Thus
171 * we compare ire_ill against ipif_ill after the lookup.
172 */
173 setsrcv6 = ipv6_all_zeros;
174 ire = ire_route_recursive_v6(&dst_addr->un.ip6addr, 0, NULL,
175 myzoneid, NULL, MATCH_IRE_DSTONLY, B_TRUE, 0, ipst,
176 &setsrcv6, NULL, NULL);
177
178 ASSERT(ire != NULL);
179 if (ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)) {
180 IBTF_DPRINTF_L2(cmlog, "ibcm_resolver_pr_lookup: "
181 "ire_route_recursive_v6 failed");
182 ib_s->status = EFAULT;
183 goto fail;
184 }
185 ill = ire_nexthop_ill(ire);
186 if (ill == NULL) {
187 IBTF_DPRINTF_L2(cmlog, "ibcm_resolver_pr_lookup: "
188 "ire_nexthop_ill failed");
189 ib_s->status = EFAULT;
190 goto fail;
191 }
192
193 /* Pick a source address */
194 if (ip_select_source_v6(ill, &setsrcv6, &dst_addr->un.ip6addr,
195 myzoneid, ipst, B_FALSE, IPV6_PREFER_SRC_DEFAULT,
196 &wqnp->src_addr.un.ip6addr, NULL, NULL) != 0) {
197 ib_s->status = EADDRNOTAVAIL;
198 goto fail;
199 }
200
201 wqnp->gateway.un.ip6addr = ire->ire_gateway_addr_v6;
202 wqnp->netmask.un.ip6addr = ire->ire_mask_v6;
203 wqnp->src_addr.family = wqnp->gateway.family =
204 wqnp->netmask.family = AF_INET6;
205 }
206
207 (void) strlcpy(wqnp->ifname, ill->ill_name, sizeof (wqnp->ifname));
208
209 /*
210 * For IPMP data addresses, we need to use the hardware address of the
211 * interface bound to the given address.
212 */
213 if (IS_IPMP(ill)) {
214 if (wqnp->src_addr.family == AF_INET) {
215 ipif = ipif_lookup_addr(wqnp->src_addr.un.ip4addr, ill,
216 myzoneid, ipst);
217 } else {
218 ipif = ipif_lookup_addr_v6(&wqnp->src_addr.un.ip6addr,
219 ill, myzoneid, ipst);
220 }
221 if (ipif == NULL) {
222 ib_s->status = ENETUNREACH;
223 goto fail;
224 }
225
226 if ((hwaddr_ill = ipmp_ipif_hold_bound_ill(ipif)) == NULL) {
227 IBTF_DPRINTF_L2(cmlog, "ibcm_resolver_pr_lookup: "
228 "no bound ill for IPMP interface %s",
229 ill->ill_name);
230 ib_s->status = EFAULT;
231 goto fail;
232 }
233 } else {
234 hwaddr_ill = ill;
235 ill_refhold(hwaddr_ill); /* for symmetry */
236 }
237
238 if ((ib_s->status = ibcm_arp_check_interface(hwaddr_ill)) != 0) {
239 IBTF_DPRINTF_L2(cmlog, "ibcm_resolver_pr_lookup: "
240 "ibcm_arp_check_interface failed");
241 goto fail;
242 }
243
244 bcopy(hwaddr_ill->ill_phys_addr, &wqnp->src_mac,
245 hwaddr_ill->ill_phys_addr_length);
246
247 IBTF_DPRINTF_L4(cmlog, "ibcm_resolver_pr_lookup: outgoing if:%s",
248 wqnp->ifname);
249
250 /*
251 * at this stage, we have the source address and the IB
252 * interface, now get the destination mac address from
253 * arp or ipv6 drivers
254 */
255 ib_s->status = ibcm_nce_lookup(wqnp, ill, myzoneid);
256 if (ib_s->status != 0) {
257 IBTF_DPRINTF_L2(cmlog, "ibcm_resolver_pr_lookup: "
258 "ibcm_nce_lookup failed: %d", ib_s->status);
259 goto fail;
260 }
261
262 ill_refrele(hwaddr_ill);
263 ill_refrele(ill);
264 ire_refrele(ire);
265 if (ipif != NULL)
266 ipif_refrele(ipif);
267 netstack_rele(ipst->ips_netstack);
268
269 IBTF_DPRINTF_L4(cmlog, "ibcm_resolver_pr_lookup: Return: 0x%p", wqnp);
270 return (0);
271 fail:
272 if (hwaddr_ill != NULL)
273 ill_refrele(hwaddr_ill);
274 if (ill != NULL)
275 ill_refrele(ill);
276 if (ire != NULL)
277 ire_refrele(ire);
278 if (ipif != NULL)
279 ipif_refrele(ipif);
280 ibcm_arp_delete_prwqn(wqnp);
281 netstack_rele(ipst->ips_netstack);
282 return (1);
283 }
284
285 /*
286 * Query the neighbor cache for IPv4/IPv6 to mac address mapping.
287 */
288 static int
289 ibcm_nce_lookup(ibcm_arp_prwqn_t *wqnp, ill_t *ill, zoneid_t zoneid)
290 {
291 ip2mac_t ip2m;
292 sin_t *sin;
293 sin6_t *sin6;
294 ip2mac_id_t ip2mid;
295 int err;
296
297 if (wqnp->src_addr.family != wqnp->dst_addr.family) {
298 IBTF_DPRINTF_L2(cmlog, "ibcm_nce_lookup: Mis-match SRC_ADDR "
299 "Family: %d, DST_ADDR Family %d", wqnp->src_addr.family,
300 wqnp->dst_addr.family);
301 return (1);
302 }
303 bzero(&ip2m, sizeof (ip2m));
304
305 if (wqnp->dst_addr.family == AF_INET) {
306 sin = (sin_t *)&ip2m.ip2mac_pa;
307 sin->sin_family = AF_INET;
308 sin->sin_addr.s_addr = wqnp->dst_addr.un.ip4addr;
309 } else if (wqnp->dst_addr.family == AF_INET6) {
310 sin6 = (sin6_t *)&ip2m.ip2mac_pa;
311 sin6->sin6_family = AF_INET6;
312 sin6->sin6_addr = wqnp->dst_addr.un.ip6addr;
313 } else {
314 IBTF_DPRINTF_L2(cmlog, "ibcm_nce_lookup: Invalid DST_ADDR "
315 "Family: %d", wqnp->dst_addr.family);
316 return (1);
317 }
318
319 ip2m.ip2mac_ifindex = ill->ill_phyint->phyint_ifindex;
320
321 wqnp->flags |= IBCM_ARP_PR_RESOLVE_PENDING;
322
323 /*
324 * issue the request to IP for Neighbor Discovery
325 */
326 ip2mid = ip2mac(IP2MAC_RESOLVE, &ip2m, ibcm_resolver_ack, wqnp,
327 zoneid);
328 err = ip2m.ip2mac_err;
329 if (err == EINPROGRESS) {
330 wqnp->ip2mac_id = ip2mid;
331 wqnp->flags |= IBCM_ARP_PR_RESOLVE_PENDING;
332 err = 0;
333 } else if (err == 0) {
334 ibcm_resolver_ack(&ip2m, wqnp);
335 }
336 return (err);
337 }
338
339 /*
340 * do sanity checks on the link-level sockaddr
341 */
342 static boolean_t
343 ibcm_check_sockdl(struct sockaddr_dl *sdl)
344 {
345
346 if (sdl->sdl_type != IFT_IB || sdl->sdl_alen != IPOIB_ADDRL)
347 return (B_FALSE);
348
349 return (B_TRUE);
350 }
351
352 /*
353 * callback for resolver lookups, both for success and failure.
354 * If Address resolution was succesful: return GID info.
355 */
356 static void
357 ibcm_resolver_ack(ip2mac_t *ip2macp, void *arg)
358 {
359 ibcm_arp_prwqn_t *wqnp = (ibcm_arp_prwqn_t *)arg;
360 ibcm_arp_streams_t *ib_s;
361 uchar_t *cp;
362 int err = 0;
363
364 IBTF_DPRINTF_L4(cmlog, "ibcm_resolver_ack(%p, %p)", ip2macp, wqnp);
365
366 ib_s = wqnp->ib_str;
367 mutex_enter(&ib_s->lock);
368
369 if (ip2macp->ip2mac_err != 0) {
370 wqnp->flags &= ~IBCM_ARP_PR_RESOLVE_PENDING;
371 cv_broadcast(&ib_s->cv);
372 err = EHOSTUNREACH;
373 goto user_callback;
374 }
375
376 if (!ibcm_check_sockdl(&ip2macp->ip2mac_ha)) {
377 IBTF_DPRINTF_L2(cmlog, "ibcm_resolver_ack: Error: "
378 "interface %s is not IB\n", wqnp->ifname);
379 err = EHOSTUNREACH;
380 goto user_callback;
381 }
382
383 cp = (uchar_t *)LLADDR(&ip2macp->ip2mac_ha);
384 bcopy(cp, &wqnp->dst_mac, IPOIB_ADDRL);
385
386 /*
387 * at this point we have src/dst gid's derived from the mac addresses
388 * now get the hca, port
389 */
390 bcopy(&wqnp->src_mac.ipoib_gidpref, &wqnp->sgid, sizeof (ib_gid_t));
391 bcopy(&wqnp->dst_mac.ipoib_gidpref, &wqnp->dgid, sizeof (ib_gid_t));
392
393 IBCM_H2N_GID(wqnp->sgid);
394 IBCM_H2N_GID(wqnp->dgid);
395
396 user_callback:
397
398 ib_s->status = err;
399 ib_s->done = B_TRUE;
400
401 /* lock is held by the caller. */
402 cv_signal(&ib_s->cv);
403 mutex_exit(&ib_s->lock);
404 }