1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2010 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #include <sys/types.h> 27 #include <net/if.h> 28 #include <net/if_types.h> 29 #include <inet/ip.h> 30 #include <inet/ip_ire.h> 31 #include <inet/ip_if.h> 32 #include <sys/ethernet.h> 33 #include <sys/ib/mgt/ibcm/ibcm_arp.h> 34 35 extern char cmlog[]; 36 37 _NOTE(SCHEME_PROTECTS_DATA("Unshared data", ibcm_arp_streams_t)) 38 39 static void ibcm_resolver_ack(ip2mac_t *, void *); 40 static int ibcm_nce_lookup(ibcm_arp_prwqn_t *wqnp, ill_t *ill, zoneid_t zid); 41 42 /* 43 * delete a wait queue node from the list. 44 * assumes mutex is acquired 45 */ 46 void 47 ibcm_arp_delete_prwqn(ibcm_arp_prwqn_t *wqnp) 48 { 49 ibcm_arp_streams_t *ib_s; 50 51 IBTF_DPRINTF_L4(cmlog, "ibcm_arp_delete_prwqn(%p)", wqnp); 52 53 ib_s = wqnp->ib_str; 54 ib_s->wqnp = NULL; 55 kmem_free(wqnp, sizeof (ibcm_arp_prwqn_t)); 56 } 57 58 /* 59 * allocate a wait queue node, and insert it in the list 60 */ 61 static ibcm_arp_prwqn_t * 62 ibcm_arp_create_prwqn(ibcm_arp_streams_t *ib_s, ibt_ip_addr_t *dst_addr, 63 ibt_ip_addr_t *src_addr) 64 { 65 ibcm_arp_prwqn_t *wqnp; 66 67 IBTF_DPRINTF_L4(cmlog, "ibcm_arp_create_prwqn(ib_s: 0x%p)", ib_s); 68 69 if (dst_addr == NULL) { 70 return (NULL); 71 } 72 if ((wqnp = kmem_zalloc(sizeof (ibcm_arp_prwqn_t), KM_NOSLEEP)) == 73 NULL) { 74 return (NULL); 75 } 76 wqnp->dst_addr = *dst_addr; 77 78 if (src_addr) { 79 wqnp->usrc_addr = *src_addr; 80 } 81 wqnp->ib_str = ib_s; 82 wqnp->ifproto = (dst_addr->family == AF_INET) ? 83 ETHERTYPE_IP : ETHERTYPE_IPV6; 84 85 ib_s->wqnp = wqnp; 86 87 IBTF_DPRINTF_L4(cmlog, "ibcm_arp_create_prwqn: Return wqnp: %p", wqnp); 88 89 return (wqnp); 90 } 91 92 93 /* 94 * Check if the interface is loopback or IB. 95 */ 96 static int 97 ibcm_arp_check_interface(ill_t *ill) 98 { 99 if (IS_LOOPBACK(ill) || ill->ill_type == IFT_IB) 100 return (0); 101 102 return (ETIMEDOUT); 103 } 104 105 int 106 ibcm_resolver_pr_lookup(ibcm_arp_streams_t *ib_s, ibt_ip_addr_t *dst_addr, 107 ibt_ip_addr_t *src_addr, zoneid_t myzoneid) 108 { 109 ibcm_arp_prwqn_t *wqnp; 110 ire_t *ire = NULL; 111 ipif_t *ipif = NULL; 112 ill_t *ill = NULL; 113 ill_t *hwaddr_ill = NULL; 114 ip_stack_t *ipst; 115 ipaddr_t setsrcv4; 116 in6_addr_t setsrcv6; 117 118 IBCM_PRINT_IP("ibcm_arp_pr_lookup: SRC", src_addr); 119 IBCM_PRINT_IP("ibcm_arp_pr_lookup: DST", dst_addr); 120 121 if ((wqnp = ibcm_arp_create_prwqn(ib_s, dst_addr, src_addr)) == NULL) { 122 IBTF_DPRINTF_L2(cmlog, "ibcm_resolver_pr_lookup: " 123 "ibcm_arp_create_prwqn failed"); 124 ib_s->status = ENOMEM; 125 return (1); 126 } 127 128 ipst = netstack_find_by_zoneid(myzoneid)->netstack_ip; 129 if (dst_addr->family == AF_INET) { 130 /* 131 * get an ire for the destination adress. 132 * Note that we can't use MATCH_IRE_ILL since that would 133 * require that the first ill we find have ire_ill set. 134 */ 135 setsrcv4 = INADDR_ANY; 136 ire = ire_route_recursive_v4(dst_addr->un.ip4addr, 0, NULL, 137 myzoneid, NULL, MATCH_IRE_DSTONLY, B_TRUE, 0, ipst, 138 &setsrcv4, NULL, NULL); 139 140 ASSERT(ire != NULL); 141 if (ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)) { 142 IBTF_DPRINTF_L2(cmlog, "ibcm_resolver_pr_lookup: " 143 "ire_route_recursive_v4 failed"); 144 ib_s->status = EFAULT; 145 goto fail; 146 } 147 ill = ire_nexthop_ill(ire); 148 if (ill == NULL) { 149 IBTF_DPRINTF_L2(cmlog, "ibcm_resolver_pr_lookup: " 150 "ire_nexthop_ill failed"); 151 ib_s->status = EFAULT; 152 goto fail; 153 } 154 155 /* Pick a source address */ 156 if (ip_select_source_v4(ill, setsrcv4, dst_addr->un.ip4addr, 157 INADDR_ANY, myzoneid, ipst, &wqnp->src_addr.un.ip4addr, 158 NULL, NULL) != 0) { 159 ib_s->status = EADDRNOTAVAIL; 160 goto fail; 161 } 162 163 wqnp->gateway.un.ip4addr = ire->ire_gateway_addr; 164 wqnp->netmask.un.ip4addr = ire->ire_mask; 165 wqnp->src_addr.family = wqnp->gateway.family = 166 wqnp->netmask.family = AF_INET; 167 168 } else if (dst_addr->family == AF_INET6) { 169 /* 170 * get an ire for the destination adress. 171 * Note that we can't use MATCH_IRE_ILL since that would 172 * require that the first ill we find have ire_ill set. Thus 173 * we compare ire_ill against ipif_ill after the lookup. 174 */ 175 setsrcv6 = ipv6_all_zeros; 176 ire = ire_route_recursive_v6(&dst_addr->un.ip6addr, 0, NULL, 177 myzoneid, NULL, MATCH_IRE_DSTONLY, B_TRUE, 0, ipst, 178 &setsrcv6, NULL, NULL); 179 180 ASSERT(ire != NULL); 181 if (ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)) { 182 IBTF_DPRINTF_L2(cmlog, "ibcm_resolver_pr_lookup: " 183 "ire_route_recursive_v6 failed"); 184 ib_s->status = EFAULT; 185 goto fail; 186 } 187 ill = ire_nexthop_ill(ire); 188 if (ill == NULL) { 189 IBTF_DPRINTF_L2(cmlog, "ibcm_resolver_pr_lookup: " 190 "ire_nexthop_ill failed"); 191 ib_s->status = EFAULT; 192 goto fail; 193 } 194 195 /* Pick a source address */ 196 if (ip_select_source_v6(ill, &setsrcv6, &dst_addr->un.ip6addr, 197 myzoneid, ipst, B_FALSE, IPV6_PREFER_SRC_DEFAULT, 198 &wqnp->src_addr.un.ip6addr, NULL, NULL) != 0) { 199 ib_s->status = EADDRNOTAVAIL; 200 goto fail; 201 } 202 203 wqnp->gateway.un.ip6addr = ire->ire_gateway_addr_v6; 204 wqnp->netmask.un.ip6addr = ire->ire_mask_v6; 205 wqnp->src_addr.family = wqnp->gateway.family = 206 wqnp->netmask.family = AF_INET6; 207 } 208 209 (void) strlcpy(wqnp->ifname, ill->ill_name, sizeof (wqnp->ifname)); 210 211 /* 212 * For IPMP data addresses, we need to use the hardware address of the 213 * interface bound to the given address. 214 */ 215 if (IS_IPMP(ill)) { 216 if (wqnp->src_addr.family == AF_INET) { 217 ipif = ipif_lookup_addr(wqnp->src_addr.un.ip4addr, ill, 218 myzoneid, ipst); 219 } else { 220 ipif = ipif_lookup_addr_v6(&wqnp->src_addr.un.ip6addr, 221 ill, myzoneid, ipst); 222 } 223 if (ipif == NULL) { 224 ib_s->status = ENETUNREACH; 225 goto fail; 226 } 227 228 if ((hwaddr_ill = ipmp_ipif_hold_bound_ill(ipif)) == NULL) { 229 IBTF_DPRINTF_L2(cmlog, "ibcm_resolver_pr_lookup: " 230 "no bound ill for IPMP interface %s", 231 ill->ill_name); 232 ib_s->status = EFAULT; 233 goto fail; 234 } 235 } else { 236 hwaddr_ill = ill; 237 ill_refhold(hwaddr_ill); /* for symmetry */ 238 } 239 240 if ((ib_s->status = ibcm_arp_check_interface(hwaddr_ill)) != 0) { 241 IBTF_DPRINTF_L2(cmlog, "ibcm_resolver_pr_lookup: " 242 "ibcm_arp_check_interface failed"); 243 goto fail; 244 } 245 246 bcopy(hwaddr_ill->ill_phys_addr, &wqnp->src_mac, 247 hwaddr_ill->ill_phys_addr_length); 248 249 IBTF_DPRINTF_L4(cmlog, "ibcm_resolver_pr_lookup: outgoing if:%s", 250 wqnp->ifname); 251 252 /* 253 * at this stage, we have the source address and the IB 254 * interface, now get the destination mac address from 255 * arp or ipv6 drivers 256 */ 257 ib_s->status = ibcm_nce_lookup(wqnp, ill, myzoneid); 258 if (ib_s->status != 0) { 259 IBTF_DPRINTF_L2(cmlog, "ibcm_resolver_pr_lookup: " 260 "ibcm_nce_lookup failed: %d", ib_s->status); 261 goto fail; 262 } 263 264 ill_refrele(hwaddr_ill); 265 ill_refrele(ill); 266 ire_refrele(ire); 267 if (ipif != NULL) 268 ipif_refrele(ipif); 269 netstack_rele(ipst->ips_netstack); 270 271 IBTF_DPRINTF_L4(cmlog, "ibcm_resolver_pr_lookup: Return: 0x%p", wqnp); 272 return (0); 273 fail: 274 if (hwaddr_ill != NULL) 275 ill_refrele(hwaddr_ill); 276 if (ill != NULL) 277 ill_refrele(ill); 278 if (ire != NULL) 279 ire_refrele(ire); 280 if (ipif != NULL) 281 ipif_refrele(ipif); 282 ibcm_arp_delete_prwqn(wqnp); 283 netstack_rele(ipst->ips_netstack); 284 return (1); 285 } 286 287 /* 288 * Query the neighbor cache for IPv4/IPv6 to mac address mapping. 289 */ 290 static int 291 ibcm_nce_lookup(ibcm_arp_prwqn_t *wqnp, ill_t *ill, zoneid_t zoneid) 292 { 293 ip2mac_t ip2m; 294 sin_t *sin; 295 sin6_t *sin6; 296 ip2mac_id_t ip2mid; 297 int err; 298 299 if (wqnp->src_addr.family != wqnp->dst_addr.family) { 300 IBTF_DPRINTF_L2(cmlog, "ibcm_nce_lookup: Mis-match SRC_ADDR " 301 "Family: %d, DST_ADDR Family %d", wqnp->src_addr.family, 302 wqnp->dst_addr.family); 303 return (1); 304 } 305 bzero(&ip2m, sizeof (ip2m)); 306 307 if (wqnp->dst_addr.family == AF_INET) { 308 sin = (sin_t *)&ip2m.ip2mac_pa; 309 sin->sin_family = AF_INET; 310 sin->sin_addr.s_addr = wqnp->dst_addr.un.ip4addr; 311 } else if (wqnp->dst_addr.family == AF_INET6) { 312 sin6 = (sin6_t *)&ip2m.ip2mac_pa; 313 sin6->sin6_family = AF_INET6; 314 sin6->sin6_addr = wqnp->dst_addr.un.ip6addr; 315 } else { 316 IBTF_DPRINTF_L2(cmlog, "ibcm_nce_lookup: Invalid DST_ADDR " 317 "Family: %d", wqnp->dst_addr.family); 318 return (1); 319 } 320 321 ip2m.ip2mac_ifindex = ill->ill_phyint->phyint_ifindex; 322 323 wqnp->flags |= IBCM_ARP_PR_RESOLVE_PENDING; 324 325 /* 326 * issue the request to IP for Neighbor Discovery 327 */ 328 ip2mid = ip2mac(IP2MAC_RESOLVE, &ip2m, ibcm_resolver_ack, wqnp, 329 zoneid); 330 err = ip2m.ip2mac_err; 331 if (err == EINPROGRESS) { 332 wqnp->ip2mac_id = ip2mid; 333 wqnp->flags |= IBCM_ARP_PR_RESOLVE_PENDING; 334 err = 0; 335 } else if (err == 0) { 336 ibcm_resolver_ack(&ip2m, wqnp); 337 } 338 return (err); 339 } 340 341 /* 342 * do sanity checks on the link-level sockaddr 343 */ 344 static boolean_t 345 ibcm_check_sockdl(struct sockaddr_dl *sdl) 346 { 347 348 if (sdl->sdl_type != IFT_IB || sdl->sdl_alen != IPOIB_ADDRL) 349 return (B_FALSE); 350 351 return (B_TRUE); 352 } 353 354 /* 355 * callback for resolver lookups, both for success and failure. 356 * If Address resolution was succesful: return GID info. 357 */ 358 static void 359 ibcm_resolver_ack(ip2mac_t *ip2macp, void *arg) 360 { 361 ibcm_arp_prwqn_t *wqnp = (ibcm_arp_prwqn_t *)arg; 362 ibcm_arp_streams_t *ib_s; 363 uchar_t *cp; 364 int err = 0; 365 366 IBTF_DPRINTF_L4(cmlog, "ibcm_resolver_ack(%p, %p)", ip2macp, wqnp); 367 368 ib_s = wqnp->ib_str; 369 mutex_enter(&ib_s->lock); 370 371 if (ip2macp->ip2mac_err != 0) { 372 wqnp->flags &= ~IBCM_ARP_PR_RESOLVE_PENDING; 373 cv_broadcast(&ib_s->cv); 374 err = EHOSTUNREACH; 375 goto user_callback; 376 } 377 378 if (!ibcm_check_sockdl(&ip2macp->ip2mac_ha)) { 379 IBTF_DPRINTF_L2(cmlog, "ibcm_resolver_ack: Error: " 380 "interface %s is not IB\n", wqnp->ifname); 381 err = EHOSTUNREACH; 382 goto user_callback; 383 } 384 385 cp = (uchar_t *)LLADDR(&ip2macp->ip2mac_ha); 386 bcopy(cp, &wqnp->dst_mac, IPOIB_ADDRL); 387 388 /* 389 * at this point we have src/dst gid's derived from the mac addresses 390 * now get the hca, port 391 */ 392 bcopy(&wqnp->src_mac.ipoib_gidpref, &wqnp->sgid, sizeof (ib_gid_t)); 393 bcopy(&wqnp->dst_mac.ipoib_gidpref, &wqnp->dgid, sizeof (ib_gid_t)); 394 395 IBCM_H2N_GID(wqnp->sgid); 396 IBCM_H2N_GID(wqnp->dgid); 397 398 user_callback: 399 400 ib_s->status = err; 401 ib_s->done = B_TRUE; 402 403 /* lock is held by the caller. */ 404 cv_signal(&ib_s->cv); 405 mutex_exit(&ib_s->lock); 406 }