1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  */
  25 
  26 #include <sys/types.h>
  27 #include <net/if.h>
  28 #include <net/if_types.h>
  29 #include <inet/ip.h>
  30 #include <inet/ip_ire.h>
  31 #include <inet/ip_if.h>
  32 #include <sys/ethernet.h>
  33 #include <sys/ib/mgt/ibcm/ibcm_arp.h>
  34 
  35 extern char cmlog[];
  36 
  37 _NOTE(SCHEME_PROTECTS_DATA("Unshared data", ibcm_arp_streams_t))
  38 
  39 static void ibcm_resolver_ack(ip2mac_t *, void *);
  40 static int ibcm_nce_lookup(ibcm_arp_prwqn_t *wqnp, ill_t *ill, zoneid_t zid);
  41 
  42 /*
  43  * delete a wait queue node from the list.
  44  * assumes mutex is acquired
  45  */
  46 void
  47 ibcm_arp_delete_prwqn(ibcm_arp_prwqn_t *wqnp)
  48 {
  49         ibcm_arp_streams_t *ib_s;
  50 
  51         IBTF_DPRINTF_L4(cmlog, "ibcm_arp_delete_prwqn(%p)", wqnp);
  52 
  53         ib_s = wqnp->ib_str;
  54         ib_s->wqnp = NULL;
  55         kmem_free(wqnp, sizeof (ibcm_arp_prwqn_t));
  56 }
  57 
  58 /*
  59  * allocate a wait queue node, and insert it in the list
  60  */
  61 static ibcm_arp_prwqn_t *
  62 ibcm_arp_create_prwqn(ibcm_arp_streams_t *ib_s, ibt_ip_addr_t *dst_addr,
  63     ibt_ip_addr_t *src_addr)
  64 {
  65         ibcm_arp_prwqn_t *wqnp;
  66 
  67         IBTF_DPRINTF_L4(cmlog, "ibcm_arp_create_prwqn(ib_s: 0x%p)", ib_s);
  68 
  69         if (dst_addr == NULL) {
  70                 return (NULL);
  71         }
  72         if ((wqnp = kmem_zalloc(sizeof (ibcm_arp_prwqn_t), KM_NOSLEEP)) ==
  73             NULL) {
  74                 return (NULL);
  75         }
  76         wqnp->dst_addr = *dst_addr;
  77 
  78         if (src_addr) {
  79                 wqnp->usrc_addr = *src_addr;
  80         }
  81         wqnp->ib_str = ib_s;
  82         wqnp->ifproto = (dst_addr->family == AF_INET) ?
  83             ETHERTYPE_IP : ETHERTYPE_IPV6;
  84 
  85         ib_s->wqnp = wqnp;
  86 
  87         IBTF_DPRINTF_L4(cmlog, "ibcm_arp_create_prwqn: Return wqnp: %p", wqnp);
  88 
  89         return (wqnp);
  90 }
  91 
  92 
  93 /*
  94  * Check if the interface is loopback or IB.
  95  */
  96 static int
  97 ibcm_arp_check_interface(ill_t *ill)
  98 {
  99         if (IS_LOOPBACK(ill) || ill->ill_type == IFT_IB)
 100                 return (0);
 101 
 102         return (ETIMEDOUT);
 103 }
 104 
 105 int
 106 ibcm_resolver_pr_lookup(ibcm_arp_streams_t *ib_s, ibt_ip_addr_t *dst_addr,
 107     ibt_ip_addr_t *src_addr, zoneid_t myzoneid)
 108 {
 109         ibcm_arp_prwqn_t *wqnp;
 110         ire_t   *ire = NULL;
 111         ipif_t  *ipif = NULL;
 112         ill_t   *ill = NULL;
 113         ill_t   *hwaddr_ill = NULL;
 114         ip_stack_t *ipst;
 115         ipaddr_t        setsrcv4;
 116         in6_addr_t      setsrcv6;
 117 
 118         IBCM_PRINT_IP("ibcm_arp_pr_lookup: SRC", src_addr);
 119         IBCM_PRINT_IP("ibcm_arp_pr_lookup: DST", dst_addr);
 120 
 121         if ((wqnp = ibcm_arp_create_prwqn(ib_s, dst_addr, src_addr)) == NULL) {
 122                 IBTF_DPRINTF_L2(cmlog, "ibcm_resolver_pr_lookup: "
 123                     "ibcm_arp_create_prwqn failed");
 124                 ib_s->status = ENOMEM;
 125                 return (1);
 126         }
 127 
 128         ipst = netstack_find_by_zoneid(myzoneid)->netstack_ip;
 129         if (dst_addr->family == AF_INET) {
 130                 /*
 131                  * get an ire for the destination adress.
 132                  * Note that we can't use MATCH_IRE_ILL since that would
 133                  * require that the first ill we find have ire_ill set.
 134                  */
 135                 setsrcv4 = INADDR_ANY;
 136                 ire = ire_route_recursive_v4(dst_addr->un.ip4addr, 0, NULL,
 137                     myzoneid, NULL, MATCH_IRE_DSTONLY, B_TRUE, 0, ipst,
 138                     &setsrcv4, NULL, NULL);
 139 
 140                 ASSERT(ire != NULL);
 141                 if (ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)) {
 142                         IBTF_DPRINTF_L2(cmlog, "ibcm_resolver_pr_lookup: "
 143                             "ire_route_recursive_v4 failed");
 144                         ib_s->status = EFAULT;
 145                         goto fail;
 146                 }
 147                 ill = ire_nexthop_ill(ire);
 148                 if (ill == NULL) {
 149                         IBTF_DPRINTF_L2(cmlog, "ibcm_resolver_pr_lookup: "
 150                             "ire_nexthop_ill failed");
 151                         ib_s->status = EFAULT;
 152                         goto fail;
 153                 }
 154 
 155                 /* Pick a source address */
 156                 if (ip_select_source_v4(ill, setsrcv4, dst_addr->un.ip4addr,
 157                     INADDR_ANY, myzoneid, ipst, &wqnp->src_addr.un.ip4addr,
 158                     NULL, NULL) != 0) {
 159                         ib_s->status = EADDRNOTAVAIL;
 160                         goto fail;
 161                 }
 162 
 163                 wqnp->gateway.un.ip4addr = ire->ire_gateway_addr;
 164                 wqnp->netmask.un.ip4addr = ire->ire_mask;
 165                 wqnp->src_addr.family = wqnp->gateway.family =
 166                     wqnp->netmask.family = AF_INET;
 167 
 168         } else if (dst_addr->family == AF_INET6) {
 169                 /*
 170                  * get an ire for the destination adress.
 171                  * Note that we can't use MATCH_IRE_ILL since that would
 172                  * require that the first ill we find have ire_ill set. Thus
 173                  * we compare ire_ill against ipif_ill after the lookup.
 174                  */
 175                 setsrcv6 = ipv6_all_zeros;
 176                 ire = ire_route_recursive_v6(&dst_addr->un.ip6addr, 0, NULL,
 177                     myzoneid, NULL, MATCH_IRE_DSTONLY, B_TRUE, 0, ipst,
 178                     &setsrcv6, NULL, NULL);
 179 
 180                 ASSERT(ire != NULL);
 181                 if (ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)) {
 182                         IBTF_DPRINTF_L2(cmlog, "ibcm_resolver_pr_lookup: "
 183                             "ire_route_recursive_v6 failed");
 184                         ib_s->status = EFAULT;
 185                         goto fail;
 186                 }
 187                 ill = ire_nexthop_ill(ire);
 188                 if (ill == NULL) {
 189                         IBTF_DPRINTF_L2(cmlog, "ibcm_resolver_pr_lookup: "
 190                             "ire_nexthop_ill failed");
 191                         ib_s->status = EFAULT;
 192                         goto fail;
 193                 }
 194 
 195                 /* Pick a source address */
 196                 if (ip_select_source_v6(ill, &setsrcv6, &dst_addr->un.ip6addr,
 197                     myzoneid, ipst, B_FALSE, IPV6_PREFER_SRC_DEFAULT,
 198                     &wqnp->src_addr.un.ip6addr, NULL, NULL) != 0) {
 199                         ib_s->status = EADDRNOTAVAIL;
 200                         goto fail;
 201                 }
 202 
 203                 wqnp->gateway.un.ip6addr = ire->ire_gateway_addr_v6;
 204                 wqnp->netmask.un.ip6addr = ire->ire_mask_v6;
 205                 wqnp->src_addr.family = wqnp->gateway.family =
 206                     wqnp->netmask.family = AF_INET6;
 207         }
 208 
 209         (void) strlcpy(wqnp->ifname, ill->ill_name, sizeof (wqnp->ifname));
 210 
 211         /*
 212          * For IPMP data addresses, we need to use the hardware address of the
 213          * interface bound to the given address.
 214          */
 215         if (IS_IPMP(ill)) {
 216                 if (wqnp->src_addr.family == AF_INET) {
 217                         ipif = ipif_lookup_addr(wqnp->src_addr.un.ip4addr, ill,
 218                             myzoneid, ipst);
 219                 } else {
 220                         ipif = ipif_lookup_addr_v6(&wqnp->src_addr.un.ip6addr,
 221                             ill, myzoneid, ipst);
 222                 }
 223                 if (ipif == NULL) {
 224                         ib_s->status = ENETUNREACH;
 225                         goto fail;
 226                 }
 227 
 228                 if ((hwaddr_ill = ipmp_ipif_hold_bound_ill(ipif)) == NULL) {
 229                         IBTF_DPRINTF_L2(cmlog, "ibcm_resolver_pr_lookup: "
 230                             "no bound ill for IPMP interface %s",
 231                             ill->ill_name);
 232                         ib_s->status = EFAULT;
 233                         goto fail;
 234                 }
 235         } else {
 236                 hwaddr_ill = ill;
 237                 ill_refhold(hwaddr_ill);        /* for symmetry */
 238         }
 239 
 240         if ((ib_s->status = ibcm_arp_check_interface(hwaddr_ill)) != 0) {
 241                 IBTF_DPRINTF_L2(cmlog, "ibcm_resolver_pr_lookup: "
 242                     "ibcm_arp_check_interface failed");
 243                 goto fail;
 244         }
 245 
 246         bcopy(hwaddr_ill->ill_phys_addr, &wqnp->src_mac,
 247             hwaddr_ill->ill_phys_addr_length);
 248 
 249         IBTF_DPRINTF_L4(cmlog, "ibcm_resolver_pr_lookup: outgoing if:%s",
 250             wqnp->ifname);
 251 
 252         /*
 253          * at this stage, we have the source address and the IB
 254          * interface, now get the destination mac address from
 255          * arp or ipv6 drivers
 256          */
 257         ib_s->status = ibcm_nce_lookup(wqnp, ill, myzoneid);
 258         if (ib_s->status != 0) {
 259                 IBTF_DPRINTF_L2(cmlog, "ibcm_resolver_pr_lookup: "
 260                     "ibcm_nce_lookup failed: %d", ib_s->status);
 261                 goto fail;
 262         }
 263 
 264         ill_refrele(hwaddr_ill);
 265         ill_refrele(ill);
 266         ire_refrele(ire);
 267         if (ipif != NULL)
 268                 ipif_refrele(ipif);
 269         netstack_rele(ipst->ips_netstack);
 270 
 271         IBTF_DPRINTF_L4(cmlog, "ibcm_resolver_pr_lookup: Return: 0x%p", wqnp);
 272         return (0);
 273 fail:
 274         if (hwaddr_ill != NULL)
 275                 ill_refrele(hwaddr_ill);
 276         if (ill != NULL)
 277                 ill_refrele(ill);
 278         if (ire != NULL)
 279                 ire_refrele(ire);
 280         if (ipif != NULL)
 281                 ipif_refrele(ipif);
 282         ibcm_arp_delete_prwqn(wqnp);
 283         netstack_rele(ipst->ips_netstack);
 284         return (1);
 285 }
 286 
 287 /*
 288  * Query the neighbor cache for IPv4/IPv6 to mac address mapping.
 289  */
 290 static int
 291 ibcm_nce_lookup(ibcm_arp_prwqn_t *wqnp, ill_t *ill, zoneid_t zoneid)
 292 {
 293         ip2mac_t        ip2m;
 294         sin_t           *sin;
 295         sin6_t          *sin6;
 296         ip2mac_id_t     ip2mid;
 297         int             err;
 298 
 299         if (wqnp->src_addr.family != wqnp->dst_addr.family) {
 300                 IBTF_DPRINTF_L2(cmlog, "ibcm_nce_lookup: Mis-match SRC_ADDR "
 301                     "Family: %d, DST_ADDR Family %d", wqnp->src_addr.family,
 302                     wqnp->dst_addr.family);
 303                 return (1);
 304         }
 305         bzero(&ip2m, sizeof (ip2m));
 306 
 307         if (wqnp->dst_addr.family == AF_INET) {
 308                 sin = (sin_t *)&ip2m.ip2mac_pa;
 309                 sin->sin_family = AF_INET;
 310                 sin->sin_addr.s_addr = wqnp->dst_addr.un.ip4addr;
 311         } else if (wqnp->dst_addr.family == AF_INET6) {
 312                 sin6 = (sin6_t *)&ip2m.ip2mac_pa;
 313                 sin6->sin6_family = AF_INET6;
 314                 sin6->sin6_addr = wqnp->dst_addr.un.ip6addr;
 315         } else {
 316                 IBTF_DPRINTF_L2(cmlog, "ibcm_nce_lookup: Invalid DST_ADDR "
 317                     "Family: %d", wqnp->dst_addr.family);
 318                 return (1);
 319         }
 320 
 321         ip2m.ip2mac_ifindex = ill->ill_phyint->phyint_ifindex;
 322 
 323         wqnp->flags |= IBCM_ARP_PR_RESOLVE_PENDING;
 324 
 325         /*
 326          * issue the request to IP for Neighbor Discovery
 327          */
 328         ip2mid = ip2mac(IP2MAC_RESOLVE, &ip2m, ibcm_resolver_ack, wqnp,
 329             zoneid);
 330         err = ip2m.ip2mac_err;
 331         if (err == EINPROGRESS) {
 332                 wqnp->ip2mac_id = ip2mid;
 333                 wqnp->flags |= IBCM_ARP_PR_RESOLVE_PENDING;
 334                 err = 0;
 335         } else if (err == 0) {
 336                 ibcm_resolver_ack(&ip2m, wqnp);
 337         }
 338         return (err);
 339 }
 340 
 341 /*
 342  * do sanity checks on the link-level sockaddr
 343  */
 344 static boolean_t
 345 ibcm_check_sockdl(struct sockaddr_dl *sdl)
 346 {
 347 
 348         if (sdl->sdl_type != IFT_IB || sdl->sdl_alen != IPOIB_ADDRL)
 349                 return (B_FALSE);
 350 
 351         return (B_TRUE);
 352 }
 353 
 354 /*
 355  * callback for resolver lookups, both for success and failure.
 356  * If Address resolution was succesful: return GID info.
 357  */
 358 static void
 359 ibcm_resolver_ack(ip2mac_t *ip2macp, void *arg)
 360 {
 361         ibcm_arp_prwqn_t *wqnp = (ibcm_arp_prwqn_t *)arg;
 362         ibcm_arp_streams_t *ib_s;
 363         uchar_t *cp;
 364         int err = 0;
 365 
 366         IBTF_DPRINTF_L4(cmlog, "ibcm_resolver_ack(%p, %p)", ip2macp, wqnp);
 367 
 368         ib_s = wqnp->ib_str;
 369         mutex_enter(&ib_s->lock);
 370 
 371         if (ip2macp->ip2mac_err != 0) {
 372                 wqnp->flags &= ~IBCM_ARP_PR_RESOLVE_PENDING;
 373                 cv_broadcast(&ib_s->cv);
 374                 err = EHOSTUNREACH;
 375                 goto user_callback;
 376         }
 377 
 378         if (!ibcm_check_sockdl(&ip2macp->ip2mac_ha)) {
 379                 IBTF_DPRINTF_L2(cmlog, "ibcm_resolver_ack: Error: "
 380                     "interface %s is not IB\n", wqnp->ifname);
 381                 err = EHOSTUNREACH;
 382                 goto user_callback;
 383         }
 384 
 385         cp = (uchar_t *)LLADDR(&ip2macp->ip2mac_ha);
 386         bcopy(cp, &wqnp->dst_mac, IPOIB_ADDRL);
 387 
 388         /*
 389          * at this point we have src/dst gid's derived from the mac addresses
 390          * now get the hca, port
 391          */
 392         bcopy(&wqnp->src_mac.ipoib_gidpref, &wqnp->sgid, sizeof (ib_gid_t));
 393         bcopy(&wqnp->dst_mac.ipoib_gidpref, &wqnp->dgid, sizeof (ib_gid_t));
 394 
 395         IBCM_H2N_GID(wqnp->sgid);
 396         IBCM_H2N_GID(wqnp->dgid);
 397 
 398 user_callback:
 399 
 400         ib_s->status = err;
 401         ib_s->done = B_TRUE;
 402 
 403         /* lock is held by the caller. */
 404         cv_signal(&ib_s->cv);
 405         mutex_exit(&ib_s->lock);
 406 }