1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  */
  25 
  26 #include <sys/types.h>
  27 #include <net/if.h>
  28 #include <net/if_types.h>
  29 #include <inet/ip.h>
  30 #include <inet/ip_ire.h>
  31 #include <inet/ip_if.h>
  32 #include <sys/ethernet.h>
  33 #include <sys/ib/mgt/ibcm/ibcm_arp.h>
  34 
  35 extern char cmlog[];
  36 
  37 static void ibcm_resolver_ack(ip2mac_t *, void *);
  38 static int ibcm_nce_lookup(ibcm_arp_prwqn_t *wqnp, ill_t *ill, zoneid_t zid);
  39 
  40 /*
  41  * delete a wait queue node from the list.
  42  * assumes mutex is acquired
  43  */
  44 void
  45 ibcm_arp_delete_prwqn(ibcm_arp_prwqn_t *wqnp)
  46 {
  47         ibcm_arp_streams_t *ib_s;
  48 
  49         IBTF_DPRINTF_L4(cmlog, "ibcm_arp_delete_prwqn(%p)", wqnp);
  50 
  51         ib_s = wqnp->ib_str;
  52         ib_s->wqnp = NULL;
  53         kmem_free(wqnp, sizeof (ibcm_arp_prwqn_t));
  54 }
  55 
  56 /*
  57  * allocate a wait queue node, and insert it in the list
  58  */
  59 static ibcm_arp_prwqn_t *
  60 ibcm_arp_create_prwqn(ibcm_arp_streams_t *ib_s, ibt_ip_addr_t *dst_addr,
  61     ibt_ip_addr_t *src_addr)
  62 {
  63         ibcm_arp_prwqn_t *wqnp;
  64 
  65         IBTF_DPRINTF_L4(cmlog, "ibcm_arp_create_prwqn(ib_s: 0x%p)", ib_s);
  66 
  67         if (dst_addr == NULL) {
  68                 return (NULL);
  69         }
  70         if ((wqnp = kmem_zalloc(sizeof (ibcm_arp_prwqn_t), KM_NOSLEEP)) ==
  71             NULL) {
  72                 return (NULL);
  73         }
  74         wqnp->dst_addr = *dst_addr;
  75 
  76         if (src_addr) {
  77                 wqnp->usrc_addr = *src_addr;
  78         }
  79         wqnp->ib_str = ib_s;
  80         wqnp->ifproto = (dst_addr->family == AF_INET) ?
  81             ETHERTYPE_IP : ETHERTYPE_IPV6;
  82 
  83         ib_s->wqnp = wqnp;
  84 
  85         IBTF_DPRINTF_L4(cmlog, "ibcm_arp_create_prwqn: Return wqnp: %p", wqnp);
  86 
  87         return (wqnp);
  88 }
  89 
  90 
  91 /*
  92  * Check if the interface is loopback or IB.
  93  */
  94 static int
  95 ibcm_arp_check_interface(ill_t *ill)
  96 {
  97         if (IS_LOOPBACK(ill) || ill->ill_type == IFT_IB)
  98                 return (0);
  99 
 100         return (ETIMEDOUT);
 101 }
 102 
 103 int
 104 ibcm_resolver_pr_lookup(ibcm_arp_streams_t *ib_s, ibt_ip_addr_t *dst_addr,
 105     ibt_ip_addr_t *src_addr, zoneid_t myzoneid)
 106 {
 107         ibcm_arp_prwqn_t *wqnp;
 108         ire_t   *ire = NULL;
 109         ipif_t  *ipif = NULL;
 110         ill_t   *ill = NULL;
 111         ill_t   *hwaddr_ill = NULL;
 112         ip_stack_t *ipst;
 113         ipaddr_t        setsrcv4;
 114         in6_addr_t      setsrcv6;
 115 
 116         IBCM_PRINT_IP("ibcm_arp_pr_lookup: SRC", src_addr);
 117         IBCM_PRINT_IP("ibcm_arp_pr_lookup: DST", dst_addr);
 118 
 119         if ((wqnp = ibcm_arp_create_prwqn(ib_s, dst_addr, src_addr)) == NULL) {
 120                 IBTF_DPRINTF_L2(cmlog, "ibcm_resolver_pr_lookup: "
 121                     "ibcm_arp_create_prwqn failed");
 122                 ib_s->status = ENOMEM;
 123                 return (1);
 124         }
 125 
 126         ipst = netstack_find_by_zoneid(myzoneid)->netstack_ip;
 127         if (dst_addr->family == AF_INET) {
 128                 /*
 129                  * get an ire for the destination adress.
 130                  * Note that we can't use MATCH_IRE_ILL since that would
 131                  * require that the first ill we find have ire_ill set.
 132                  */
 133                 setsrcv4 = INADDR_ANY;
 134                 ire = ire_route_recursive_v4(dst_addr->un.ip4addr, 0, NULL,
 135                     myzoneid, NULL, MATCH_IRE_DSTONLY, B_TRUE, 0, ipst,
 136                     &setsrcv4, NULL, NULL);
 137 
 138                 ASSERT(ire != NULL);
 139                 if (ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)) {
 140                         IBTF_DPRINTF_L2(cmlog, "ibcm_resolver_pr_lookup: "
 141                             "ire_route_recursive_v4 failed");
 142                         ib_s->status = EFAULT;
 143                         goto fail;
 144                 }
 145                 ill = ire_nexthop_ill(ire);
 146                 if (ill == NULL) {
 147                         IBTF_DPRINTF_L2(cmlog, "ibcm_resolver_pr_lookup: "
 148                             "ire_nexthop_ill failed");
 149                         ib_s->status = EFAULT;
 150                         goto fail;
 151                 }
 152 
 153                 /* Pick a source address */
 154                 if (ip_select_source_v4(ill, setsrcv4, dst_addr->un.ip4addr,
 155                     INADDR_ANY, myzoneid, ipst, &wqnp->src_addr.un.ip4addr,
 156                     NULL, NULL) != 0) {
 157                         ib_s->status = EADDRNOTAVAIL;
 158                         goto fail;
 159                 }
 160 
 161                 wqnp->gateway.un.ip4addr = ire->ire_gateway_addr;
 162                 wqnp->netmask.un.ip4addr = ire->ire_mask;
 163                 wqnp->src_addr.family = wqnp->gateway.family =
 164                     wqnp->netmask.family = AF_INET;
 165 
 166         } else if (dst_addr->family == AF_INET6) {
 167                 /*
 168                  * get an ire for the destination adress.
 169                  * Note that we can't use MATCH_IRE_ILL since that would
 170                  * require that the first ill we find have ire_ill set. Thus
 171                  * we compare ire_ill against ipif_ill after the lookup.
 172                  */
 173                 setsrcv6 = ipv6_all_zeros;
 174                 ire = ire_route_recursive_v6(&dst_addr->un.ip6addr, 0, NULL,
 175                     myzoneid, NULL, MATCH_IRE_DSTONLY, B_TRUE, 0, ipst,
 176                     &setsrcv6, NULL, NULL);
 177 
 178                 ASSERT(ire != NULL);
 179                 if (ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)) {
 180                         IBTF_DPRINTF_L2(cmlog, "ibcm_resolver_pr_lookup: "
 181                             "ire_route_recursive_v6 failed");
 182                         ib_s->status = EFAULT;
 183                         goto fail;
 184                 }
 185                 ill = ire_nexthop_ill(ire);
 186                 if (ill == NULL) {
 187                         IBTF_DPRINTF_L2(cmlog, "ibcm_resolver_pr_lookup: "
 188                             "ire_nexthop_ill failed");
 189                         ib_s->status = EFAULT;
 190                         goto fail;
 191                 }
 192 
 193                 /* Pick a source address */
 194                 if (ip_select_source_v6(ill, &setsrcv6, &dst_addr->un.ip6addr,
 195                     myzoneid, ipst, B_FALSE, IPV6_PREFER_SRC_DEFAULT,
 196                     &wqnp->src_addr.un.ip6addr, NULL, NULL) != 0) {
 197                         ib_s->status = EADDRNOTAVAIL;
 198                         goto fail;
 199                 }
 200 
 201                 wqnp->gateway.un.ip6addr = ire->ire_gateway_addr_v6;
 202                 wqnp->netmask.un.ip6addr = ire->ire_mask_v6;
 203                 wqnp->src_addr.family = wqnp->gateway.family =
 204                     wqnp->netmask.family = AF_INET6;
 205         }
 206 
 207         (void) strlcpy(wqnp->ifname, ill->ill_name, sizeof (wqnp->ifname));
 208 
 209         /*
 210          * For IPMP data addresses, we need to use the hardware address of the
 211          * interface bound to the given address.
 212          */
 213         if (IS_IPMP(ill)) {
 214                 if (wqnp->src_addr.family == AF_INET) {
 215                         ipif = ipif_lookup_addr(wqnp->src_addr.un.ip4addr, ill,
 216                             myzoneid, ipst);
 217                 } else {
 218                         ipif = ipif_lookup_addr_v6(&wqnp->src_addr.un.ip6addr,
 219                             ill, myzoneid, ipst);
 220                 }
 221                 if (ipif == NULL) {
 222                         ib_s->status = ENETUNREACH;
 223                         goto fail;
 224                 }
 225 
 226                 if ((hwaddr_ill = ipmp_ipif_hold_bound_ill(ipif)) == NULL) {
 227                         IBTF_DPRINTF_L2(cmlog, "ibcm_resolver_pr_lookup: "
 228                             "no bound ill for IPMP interface %s",
 229                             ill->ill_name);
 230                         ib_s->status = EFAULT;
 231                         goto fail;
 232                 }
 233         } else {
 234                 hwaddr_ill = ill;
 235                 ill_refhold(hwaddr_ill);        /* for symmetry */
 236         }
 237 
 238         if ((ib_s->status = ibcm_arp_check_interface(hwaddr_ill)) != 0) {
 239                 IBTF_DPRINTF_L2(cmlog, "ibcm_resolver_pr_lookup: "
 240                     "ibcm_arp_check_interface failed");
 241                 goto fail;
 242         }
 243 
 244         bcopy(hwaddr_ill->ill_phys_addr, &wqnp->src_mac,
 245             hwaddr_ill->ill_phys_addr_length);
 246 
 247         IBTF_DPRINTF_L4(cmlog, "ibcm_resolver_pr_lookup: outgoing if:%s",
 248             wqnp->ifname);
 249 
 250         /*
 251          * at this stage, we have the source address and the IB
 252          * interface, now get the destination mac address from
 253          * arp or ipv6 drivers
 254          */
 255         ib_s->status = ibcm_nce_lookup(wqnp, ill, myzoneid);
 256         if (ib_s->status != 0) {
 257                 IBTF_DPRINTF_L2(cmlog, "ibcm_resolver_pr_lookup: "
 258                     "ibcm_nce_lookup failed: %d", ib_s->status);
 259                 goto fail;
 260         }
 261 
 262         ill_refrele(hwaddr_ill);
 263         ill_refrele(ill);
 264         ire_refrele(ire);
 265         if (ipif != NULL)
 266                 ipif_refrele(ipif);
 267         netstack_rele(ipst->ips_netstack);
 268 
 269         IBTF_DPRINTF_L4(cmlog, "ibcm_resolver_pr_lookup: Return: 0x%p", wqnp);
 270         return (0);
 271 fail:
 272         if (hwaddr_ill != NULL)
 273                 ill_refrele(hwaddr_ill);
 274         if (ill != NULL)
 275                 ill_refrele(ill);
 276         if (ire != NULL)
 277                 ire_refrele(ire);
 278         if (ipif != NULL)
 279                 ipif_refrele(ipif);
 280         ibcm_arp_delete_prwqn(wqnp);
 281         netstack_rele(ipst->ips_netstack);
 282         return (1);
 283 }
 284 
 285 /*
 286  * Query the neighbor cache for IPv4/IPv6 to mac address mapping.
 287  */
 288 static int
 289 ibcm_nce_lookup(ibcm_arp_prwqn_t *wqnp, ill_t *ill, zoneid_t zoneid)
 290 {
 291         ip2mac_t        ip2m;
 292         sin_t           *sin;
 293         sin6_t          *sin6;
 294         ip2mac_id_t     ip2mid;
 295         int             err;
 296 
 297         if (wqnp->src_addr.family != wqnp->dst_addr.family) {
 298                 IBTF_DPRINTF_L2(cmlog, "ibcm_nce_lookup: Mis-match SRC_ADDR "
 299                     "Family: %d, DST_ADDR Family %d", wqnp->src_addr.family,
 300                     wqnp->dst_addr.family);
 301                 return (1);
 302         }
 303         bzero(&ip2m, sizeof (ip2m));
 304 
 305         if (wqnp->dst_addr.family == AF_INET) {
 306                 sin = (sin_t *)&ip2m.ip2mac_pa;
 307                 sin->sin_family = AF_INET;
 308                 sin->sin_addr.s_addr = wqnp->dst_addr.un.ip4addr;
 309         } else if (wqnp->dst_addr.family == AF_INET6) {
 310                 sin6 = (sin6_t *)&ip2m.ip2mac_pa;
 311                 sin6->sin6_family = AF_INET6;
 312                 sin6->sin6_addr = wqnp->dst_addr.un.ip6addr;
 313         } else {
 314                 IBTF_DPRINTF_L2(cmlog, "ibcm_nce_lookup: Invalid DST_ADDR "
 315                     "Family: %d", wqnp->dst_addr.family);
 316                 return (1);
 317         }
 318 
 319         ip2m.ip2mac_ifindex = ill->ill_phyint->phyint_ifindex;
 320 
 321         wqnp->flags |= IBCM_ARP_PR_RESOLVE_PENDING;
 322 
 323         /*
 324          * issue the request to IP for Neighbor Discovery
 325          */
 326         ip2mid = ip2mac(IP2MAC_RESOLVE, &ip2m, ibcm_resolver_ack, wqnp,
 327             zoneid);
 328         err = ip2m.ip2mac_err;
 329         if (err == EINPROGRESS) {
 330                 wqnp->ip2mac_id = ip2mid;
 331                 wqnp->flags |= IBCM_ARP_PR_RESOLVE_PENDING;
 332                 err = 0;
 333         } else if (err == 0) {
 334                 ibcm_resolver_ack(&ip2m, wqnp);
 335         }
 336         return (err);
 337 }
 338 
 339 /*
 340  * do sanity checks on the link-level sockaddr
 341  */
 342 static boolean_t
 343 ibcm_check_sockdl(struct sockaddr_dl *sdl)
 344 {
 345 
 346         if (sdl->sdl_type != IFT_IB || sdl->sdl_alen != IPOIB_ADDRL)
 347                 return (B_FALSE);
 348 
 349         return (B_TRUE);
 350 }
 351 
 352 /*
 353  * callback for resolver lookups, both for success and failure.
 354  * If Address resolution was succesful: return GID info.
 355  */
 356 static void
 357 ibcm_resolver_ack(ip2mac_t *ip2macp, void *arg)
 358 {
 359         ibcm_arp_prwqn_t *wqnp = (ibcm_arp_prwqn_t *)arg;
 360         ibcm_arp_streams_t *ib_s;
 361         uchar_t *cp;
 362         int err = 0;
 363 
 364         IBTF_DPRINTF_L4(cmlog, "ibcm_resolver_ack(%p, %p)", ip2macp, wqnp);
 365 
 366         ib_s = wqnp->ib_str;
 367         mutex_enter(&ib_s->lock);
 368 
 369         if (ip2macp->ip2mac_err != 0) {
 370                 wqnp->flags &= ~IBCM_ARP_PR_RESOLVE_PENDING;
 371                 cv_broadcast(&ib_s->cv);
 372                 err = EHOSTUNREACH;
 373                 goto user_callback;
 374         }
 375 
 376         if (!ibcm_check_sockdl(&ip2macp->ip2mac_ha)) {
 377                 IBTF_DPRINTF_L2(cmlog, "ibcm_resolver_ack: Error: "
 378                     "interface %s is not IB\n", wqnp->ifname);
 379                 err = EHOSTUNREACH;
 380                 goto user_callback;
 381         }
 382 
 383         cp = (uchar_t *)LLADDR(&ip2macp->ip2mac_ha);
 384         bcopy(cp, &wqnp->dst_mac, IPOIB_ADDRL);
 385 
 386         /*
 387          * at this point we have src/dst gid's derived from the mac addresses
 388          * now get the hca, port
 389          */
 390         bcopy(&wqnp->src_mac.ipoib_gidpref, &wqnp->sgid, sizeof (ib_gid_t));
 391         bcopy(&wqnp->dst_mac.ipoib_gidpref, &wqnp->dgid, sizeof (ib_gid_t));
 392 
 393         IBCM_H2N_GID(wqnp->sgid);
 394         IBCM_H2N_GID(wqnp->dgid);
 395 
 396 user_callback:
 397 
 398         ib_s->status = err;
 399         ib_s->done = B_TRUE;
 400 
 401         /* lock is held by the caller. */
 402         cv_signal(&ib_s->cv);
 403         mutex_exit(&ib_s->lock);
 404 }