1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2010 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #include <sys/types.h> 27 #include <net/if.h> 28 #include <net/if_types.h> 29 #include <inet/ip.h> 30 #include <inet/ip_ire.h> 31 #include <inet/ip_if.h> 32 #include <sys/ethernet.h> 33 #include <sys/ib/mgt/ibcm/ibcm_arp.h> 34 35 extern char cmlog[]; 36 37 static void ibcm_resolver_ack(ip2mac_t *, void *); 38 static int ibcm_nce_lookup(ibcm_arp_prwqn_t *wqnp, ill_t *ill, zoneid_t zid); 39 40 /* 41 * delete a wait queue node from the list. 42 * assumes mutex is acquired 43 */ 44 void 45 ibcm_arp_delete_prwqn(ibcm_arp_prwqn_t *wqnp) 46 { 47 ibcm_arp_streams_t *ib_s; 48 49 IBTF_DPRINTF_L4(cmlog, "ibcm_arp_delete_prwqn(%p)", wqnp); 50 51 ib_s = wqnp->ib_str; 52 ib_s->wqnp = NULL; 53 kmem_free(wqnp, sizeof (ibcm_arp_prwqn_t)); 54 } 55 56 /* 57 * allocate a wait queue node, and insert it in the list 58 */ 59 static ibcm_arp_prwqn_t * 60 ibcm_arp_create_prwqn(ibcm_arp_streams_t *ib_s, ibt_ip_addr_t *dst_addr, 61 ibt_ip_addr_t *src_addr) 62 { 63 ibcm_arp_prwqn_t *wqnp; 64 65 IBTF_DPRINTF_L4(cmlog, "ibcm_arp_create_prwqn(ib_s: 0x%p)", ib_s); 66 67 if (dst_addr == NULL) { 68 return (NULL); 69 } 70 if ((wqnp = kmem_zalloc(sizeof (ibcm_arp_prwqn_t), KM_NOSLEEP)) == 71 NULL) { 72 return (NULL); 73 } 74 wqnp->dst_addr = *dst_addr; 75 76 if (src_addr) { 77 wqnp->usrc_addr = *src_addr; 78 } 79 wqnp->ib_str = ib_s; 80 wqnp->ifproto = (dst_addr->family == AF_INET) ? 81 ETHERTYPE_IP : ETHERTYPE_IPV6; 82 83 ib_s->wqnp = wqnp; 84 85 IBTF_DPRINTF_L4(cmlog, "ibcm_arp_create_prwqn: Return wqnp: %p", wqnp); 86 87 return (wqnp); 88 } 89 90 91 /* 92 * Check if the interface is loopback or IB. 93 */ 94 static int 95 ibcm_arp_check_interface(ill_t *ill) 96 { 97 if (IS_LOOPBACK(ill) || ill->ill_type == IFT_IB) 98 return (0); 99 100 return (ETIMEDOUT); 101 } 102 103 int 104 ibcm_resolver_pr_lookup(ibcm_arp_streams_t *ib_s, ibt_ip_addr_t *dst_addr, 105 ibt_ip_addr_t *src_addr, zoneid_t myzoneid) 106 { 107 ibcm_arp_prwqn_t *wqnp; 108 ire_t *ire = NULL; 109 ipif_t *ipif = NULL; 110 ill_t *ill = NULL; 111 ill_t *hwaddr_ill = NULL; 112 ip_stack_t *ipst; 113 ipaddr_t setsrcv4; 114 in6_addr_t setsrcv6; 115 116 IBCM_PRINT_IP("ibcm_arp_pr_lookup: SRC", src_addr); 117 IBCM_PRINT_IP("ibcm_arp_pr_lookup: DST", dst_addr); 118 119 if ((wqnp = ibcm_arp_create_prwqn(ib_s, dst_addr, src_addr)) == NULL) { 120 IBTF_DPRINTF_L2(cmlog, "ibcm_resolver_pr_lookup: " 121 "ibcm_arp_create_prwqn failed"); 122 ib_s->status = ENOMEM; 123 return (1); 124 } 125 126 ipst = netstack_find_by_zoneid(myzoneid)->netstack_ip; 127 if (dst_addr->family == AF_INET) { 128 /* 129 * get an ire for the destination adress. 130 * Note that we can't use MATCH_IRE_ILL since that would 131 * require that the first ill we find have ire_ill set. 132 */ 133 setsrcv4 = INADDR_ANY; 134 ire = ire_route_recursive_v4(dst_addr->un.ip4addr, 0, NULL, 135 myzoneid, NULL, MATCH_IRE_DSTONLY, B_TRUE, 0, ipst, 136 &setsrcv4, NULL, NULL); 137 138 ASSERT(ire != NULL); 139 if (ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)) { 140 IBTF_DPRINTF_L2(cmlog, "ibcm_resolver_pr_lookup: " 141 "ire_route_recursive_v4 failed"); 142 ib_s->status = EFAULT; 143 goto fail; 144 } 145 ill = ire_nexthop_ill(ire); 146 if (ill == NULL) { 147 IBTF_DPRINTF_L2(cmlog, "ibcm_resolver_pr_lookup: " 148 "ire_nexthop_ill failed"); 149 ib_s->status = EFAULT; 150 goto fail; 151 } 152 153 /* Pick a source address */ 154 if (ip_select_source_v4(ill, setsrcv4, dst_addr->un.ip4addr, 155 INADDR_ANY, myzoneid, ipst, &wqnp->src_addr.un.ip4addr, 156 NULL, NULL) != 0) { 157 ib_s->status = EADDRNOTAVAIL; 158 goto fail; 159 } 160 161 wqnp->gateway.un.ip4addr = ire->ire_gateway_addr; 162 wqnp->netmask.un.ip4addr = ire->ire_mask; 163 wqnp->src_addr.family = wqnp->gateway.family = 164 wqnp->netmask.family = AF_INET; 165 166 } else if (dst_addr->family == AF_INET6) { 167 /* 168 * get an ire for the destination adress. 169 * Note that we can't use MATCH_IRE_ILL since that would 170 * require that the first ill we find have ire_ill set. Thus 171 * we compare ire_ill against ipif_ill after the lookup. 172 */ 173 setsrcv6 = ipv6_all_zeros; 174 ire = ire_route_recursive_v6(&dst_addr->un.ip6addr, 0, NULL, 175 myzoneid, NULL, MATCH_IRE_DSTONLY, B_TRUE, 0, ipst, 176 &setsrcv6, NULL, NULL); 177 178 ASSERT(ire != NULL); 179 if (ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)) { 180 IBTF_DPRINTF_L2(cmlog, "ibcm_resolver_pr_lookup: " 181 "ire_route_recursive_v6 failed"); 182 ib_s->status = EFAULT; 183 goto fail; 184 } 185 ill = ire_nexthop_ill(ire); 186 if (ill == NULL) { 187 IBTF_DPRINTF_L2(cmlog, "ibcm_resolver_pr_lookup: " 188 "ire_nexthop_ill failed"); 189 ib_s->status = EFAULT; 190 goto fail; 191 } 192 193 /* Pick a source address */ 194 if (ip_select_source_v6(ill, &setsrcv6, &dst_addr->un.ip6addr, 195 myzoneid, ipst, B_FALSE, IPV6_PREFER_SRC_DEFAULT, 196 &wqnp->src_addr.un.ip6addr, NULL, NULL) != 0) { 197 ib_s->status = EADDRNOTAVAIL; 198 goto fail; 199 } 200 201 wqnp->gateway.un.ip6addr = ire->ire_gateway_addr_v6; 202 wqnp->netmask.un.ip6addr = ire->ire_mask_v6; 203 wqnp->src_addr.family = wqnp->gateway.family = 204 wqnp->netmask.family = AF_INET6; 205 } 206 207 (void) strlcpy(wqnp->ifname, ill->ill_name, sizeof (wqnp->ifname)); 208 209 /* 210 * For IPMP data addresses, we need to use the hardware address of the 211 * interface bound to the given address. 212 */ 213 if (IS_IPMP(ill)) { 214 if (wqnp->src_addr.family == AF_INET) { 215 ipif = ipif_lookup_addr(wqnp->src_addr.un.ip4addr, ill, 216 myzoneid, ipst); 217 } else { 218 ipif = ipif_lookup_addr_v6(&wqnp->src_addr.un.ip6addr, 219 ill, myzoneid, ipst); 220 } 221 if (ipif == NULL) { 222 ib_s->status = ENETUNREACH; 223 goto fail; 224 } 225 226 if ((hwaddr_ill = ipmp_ipif_hold_bound_ill(ipif)) == NULL) { 227 IBTF_DPRINTF_L2(cmlog, "ibcm_resolver_pr_lookup: " 228 "no bound ill for IPMP interface %s", 229 ill->ill_name); 230 ib_s->status = EFAULT; 231 goto fail; 232 } 233 } else { 234 hwaddr_ill = ill; 235 ill_refhold(hwaddr_ill); /* for symmetry */ 236 } 237 238 if ((ib_s->status = ibcm_arp_check_interface(hwaddr_ill)) != 0) { 239 IBTF_DPRINTF_L2(cmlog, "ibcm_resolver_pr_lookup: " 240 "ibcm_arp_check_interface failed"); 241 goto fail; 242 } 243 244 bcopy(hwaddr_ill->ill_phys_addr, &wqnp->src_mac, 245 hwaddr_ill->ill_phys_addr_length); 246 247 IBTF_DPRINTF_L4(cmlog, "ibcm_resolver_pr_lookup: outgoing if:%s", 248 wqnp->ifname); 249 250 /* 251 * at this stage, we have the source address and the IB 252 * interface, now get the destination mac address from 253 * arp or ipv6 drivers 254 */ 255 ib_s->status = ibcm_nce_lookup(wqnp, ill, myzoneid); 256 if (ib_s->status != 0) { 257 IBTF_DPRINTF_L2(cmlog, "ibcm_resolver_pr_lookup: " 258 "ibcm_nce_lookup failed: %d", ib_s->status); 259 goto fail; 260 } 261 262 ill_refrele(hwaddr_ill); 263 ill_refrele(ill); 264 ire_refrele(ire); 265 if (ipif != NULL) 266 ipif_refrele(ipif); 267 netstack_rele(ipst->ips_netstack); 268 269 IBTF_DPRINTF_L4(cmlog, "ibcm_resolver_pr_lookup: Return: 0x%p", wqnp); 270 return (0); 271 fail: 272 if (hwaddr_ill != NULL) 273 ill_refrele(hwaddr_ill); 274 if (ill != NULL) 275 ill_refrele(ill); 276 if (ire != NULL) 277 ire_refrele(ire); 278 if (ipif != NULL) 279 ipif_refrele(ipif); 280 ibcm_arp_delete_prwqn(wqnp); 281 netstack_rele(ipst->ips_netstack); 282 return (1); 283 } 284 285 /* 286 * Query the neighbor cache for IPv4/IPv6 to mac address mapping. 287 */ 288 static int 289 ibcm_nce_lookup(ibcm_arp_prwqn_t *wqnp, ill_t *ill, zoneid_t zoneid) 290 { 291 ip2mac_t ip2m; 292 sin_t *sin; 293 sin6_t *sin6; 294 ip2mac_id_t ip2mid; 295 int err; 296 297 if (wqnp->src_addr.family != wqnp->dst_addr.family) { 298 IBTF_DPRINTF_L2(cmlog, "ibcm_nce_lookup: Mis-match SRC_ADDR " 299 "Family: %d, DST_ADDR Family %d", wqnp->src_addr.family, 300 wqnp->dst_addr.family); 301 return (1); 302 } 303 bzero(&ip2m, sizeof (ip2m)); 304 305 if (wqnp->dst_addr.family == AF_INET) { 306 sin = (sin_t *)&ip2m.ip2mac_pa; 307 sin->sin_family = AF_INET; 308 sin->sin_addr.s_addr = wqnp->dst_addr.un.ip4addr; 309 } else if (wqnp->dst_addr.family == AF_INET6) { 310 sin6 = (sin6_t *)&ip2m.ip2mac_pa; 311 sin6->sin6_family = AF_INET6; 312 sin6->sin6_addr = wqnp->dst_addr.un.ip6addr; 313 } else { 314 IBTF_DPRINTF_L2(cmlog, "ibcm_nce_lookup: Invalid DST_ADDR " 315 "Family: %d", wqnp->dst_addr.family); 316 return (1); 317 } 318 319 ip2m.ip2mac_ifindex = ill->ill_phyint->phyint_ifindex; 320 321 wqnp->flags |= IBCM_ARP_PR_RESOLVE_PENDING; 322 323 /* 324 * issue the request to IP for Neighbor Discovery 325 */ 326 ip2mid = ip2mac(IP2MAC_RESOLVE, &ip2m, ibcm_resolver_ack, wqnp, 327 zoneid); 328 err = ip2m.ip2mac_err; 329 if (err == EINPROGRESS) { 330 wqnp->ip2mac_id = ip2mid; 331 wqnp->flags |= IBCM_ARP_PR_RESOLVE_PENDING; 332 err = 0; 333 } else if (err == 0) { 334 ibcm_resolver_ack(&ip2m, wqnp); 335 } 336 return (err); 337 } 338 339 /* 340 * do sanity checks on the link-level sockaddr 341 */ 342 static boolean_t 343 ibcm_check_sockdl(struct sockaddr_dl *sdl) 344 { 345 346 if (sdl->sdl_type != IFT_IB || sdl->sdl_alen != IPOIB_ADDRL) 347 return (B_FALSE); 348 349 return (B_TRUE); 350 } 351 352 /* 353 * callback for resolver lookups, both for success and failure. 354 * If Address resolution was succesful: return GID info. 355 */ 356 static void 357 ibcm_resolver_ack(ip2mac_t *ip2macp, void *arg) 358 { 359 ibcm_arp_prwqn_t *wqnp = (ibcm_arp_prwqn_t *)arg; 360 ibcm_arp_streams_t *ib_s; 361 uchar_t *cp; 362 int err = 0; 363 364 IBTF_DPRINTF_L4(cmlog, "ibcm_resolver_ack(%p, %p)", ip2macp, wqnp); 365 366 ib_s = wqnp->ib_str; 367 mutex_enter(&ib_s->lock); 368 369 if (ip2macp->ip2mac_err != 0) { 370 wqnp->flags &= ~IBCM_ARP_PR_RESOLVE_PENDING; 371 cv_broadcast(&ib_s->cv); 372 err = EHOSTUNREACH; 373 goto user_callback; 374 } 375 376 if (!ibcm_check_sockdl(&ip2macp->ip2mac_ha)) { 377 IBTF_DPRINTF_L2(cmlog, "ibcm_resolver_ack: Error: " 378 "interface %s is not IB\n", wqnp->ifname); 379 err = EHOSTUNREACH; 380 goto user_callback; 381 } 382 383 cp = (uchar_t *)LLADDR(&ip2macp->ip2mac_ha); 384 bcopy(cp, &wqnp->dst_mac, IPOIB_ADDRL); 385 386 /* 387 * at this point we have src/dst gid's derived from the mac addresses 388 * now get the hca, port 389 */ 390 bcopy(&wqnp->src_mac.ipoib_gidpref, &wqnp->sgid, sizeof (ib_gid_t)); 391 bcopy(&wqnp->dst_mac.ipoib_gidpref, &wqnp->dgid, sizeof (ib_gid_t)); 392 393 IBCM_H2N_GID(wqnp->sgid); 394 IBCM_H2N_GID(wqnp->dgid); 395 396 user_callback: 397 398 ib_s->status = err; 399 ib_s->done = B_TRUE; 400 401 /* lock is held by the caller. */ 402 cv_signal(&ib_s->cv); 403 mutex_exit(&ib_s->lock); 404 }