1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  */
  25 /*
  26  * Copyright (c) 2013 by Delphix. All rights reserved.
  27  */
  28 
  29 #include <sys/conf.h>
  30 #include <sys/stat.h>
  31 #include <sys/file.h>
  32 #include <sys/ddi.h>
  33 #include <sys/sunddi.h>
  34 #include <sys/modctl.h>
  35 #include <sys/priv.h>
  36 #include <sys/cpuvar.h>
  37 #include <sys/socket.h>
  38 #include <sys/strsubr.h>
  39 #include <sys/sysmacros.h>
  40 #include <sys/sdt.h>
  41 #include <netinet/tcp.h>
  42 #include <inet/tcp.h>
  43 #include <sys/socketvar.h>
  44 #include <sys/pathname.h>
  45 #include <sys/fs/snode.h>
  46 #include <sys/fs/dv_node.h>
  47 #include <sys/vnode.h>
  48 #include <netinet/in.h>
  49 #include <net/if.h>
  50 #include <sys/sockio.h>
  51 #include <sys/ksocket.h>
  52 #include <sys/filio.h>            /* FIONBIO */
  53 #include <sys/iscsi_protocol.h>
  54 #include <sys/idm/idm.h>
  55 #include <sys/idm/idm_so.h>
  56 #include <sys/idm/idm_text.h>
  57 
  58 #define IN_PROGRESS_DELAY       1
  59 
  60 /*
  61  * in6addr_any is currently all zeroes, but use the macro in case this
  62  * ever changes.
  63  */
  64 static const struct in6_addr in6addr_any = IN6ADDR_ANY_INIT;
  65 
  66 static void idm_sorx_cache_pdu_cb(idm_pdu_t *pdu, idm_status_t status);
  67 static void idm_sorx_addl_pdu_cb(idm_pdu_t *pdu, idm_status_t status);
  68 static void idm_sotx_cache_pdu_cb(idm_pdu_t *pdu, idm_status_t status);
  69 
  70 static idm_status_t idm_so_conn_create_common(idm_conn_t *ic, ksocket_t new_so);
  71 static void idm_so_conn_destroy_common(idm_conn_t *ic);
  72 static void idm_so_conn_connect_common(idm_conn_t *ic);
  73 
  74 static void idm_set_ini_preconnect_options(idm_so_conn_t *sc,
  75     boolean_t boot_conn);
  76 static void idm_set_postconnect_options(ksocket_t so);
  77 static idm_status_t idm_i_so_tx(idm_pdu_t *pdu);
  78 
  79 static idm_status_t idm_sorecvdata(idm_conn_t *ic, idm_pdu_t *pdu);
  80 static void idm_so_send_rtt_data(idm_conn_t *ic, idm_task_t *idt,
  81     idm_buf_t *idb, uint32_t offset, uint32_t length);
  82 static void idm_so_send_rtt_data_done(idm_task_t *idt, idm_buf_t *idb);
  83 static idm_status_t idm_so_send_buf_region(idm_task_t *idt,
  84     idm_buf_t *idb, uint32_t buf_region_offset, uint32_t buf_region_length);
  85 
  86 static uint32_t idm_fill_iov(idm_pdu_t *pdu, idm_buf_t *idb,
  87     uint32_t ro, uint32_t dlength);
  88 
  89 static idm_status_t idm_so_handle_digest(idm_conn_t *it,
  90     nvpair_t *digest_choice, const idm_kv_xlate_t *ikvx);
  91 
  92 static void idm_so_socket_set_nonblock(struct sonode *node);
  93 static void idm_so_socket_set_block(struct sonode *node);
  94 
  95 /*
  96  * Transport ops prototypes
  97  */
  98 static void idm_so_tx(idm_conn_t *ic, idm_pdu_t *pdu);
  99 static idm_status_t idm_so_buf_tx_to_ini(idm_task_t *idt, idm_buf_t *idb);
 100 static idm_status_t idm_so_buf_rx_from_ini(idm_task_t *idt, idm_buf_t *idb);
 101 static void idm_so_rx_datain(idm_conn_t *ic, idm_pdu_t *pdu);
 102 static void idm_so_rx_rtt(idm_conn_t *ic, idm_pdu_t *pdu);
 103 static void idm_so_rx_dataout(idm_conn_t *ic, idm_pdu_t *pdu);
 104 static idm_status_t idm_so_free_task_rsrc(idm_task_t *idt);
 105 static kv_status_t idm_so_negotiate_key_values(idm_conn_t *it,
 106     nvlist_t *request_nvl, nvlist_t *response_nvl, nvlist_t *negotiated_nvl);
 107 static void idm_so_notice_key_values(idm_conn_t *it,
 108     nvlist_t *negotiated_nvl);
 109 static kv_status_t idm_so_declare_key_values(idm_conn_t *it,
 110     nvlist_t *config_nvl, nvlist_t *outgoing_nvl);
 111 static boolean_t idm_so_conn_is_capable(idm_conn_req_t *ic,
 112     idm_transport_caps_t *caps);
 113 static idm_status_t idm_so_buf_alloc(idm_buf_t *idb, uint64_t buflen);
 114 static void idm_so_buf_free(idm_buf_t *idb);
 115 static idm_status_t idm_so_buf_setup(idm_buf_t *idb);
 116 static void idm_so_buf_teardown(idm_buf_t *idb);
 117 static idm_status_t idm_so_tgt_svc_create(idm_svc_req_t *sr, idm_svc_t *is);
 118 static void idm_so_tgt_svc_destroy(idm_svc_t *is);
 119 static idm_status_t idm_so_tgt_svc_online(idm_svc_t *is);
 120 static void idm_so_tgt_svc_offline(idm_svc_t *is);
 121 static void idm_so_tgt_conn_destroy(idm_conn_t *ic);
 122 static idm_status_t idm_so_tgt_conn_connect(idm_conn_t *ic);
 123 static void idm_so_conn_disconnect(idm_conn_t *ic);
 124 static idm_status_t idm_so_ini_conn_create(idm_conn_req_t *cr, idm_conn_t *ic);
 125 static void idm_so_ini_conn_destroy(idm_conn_t *ic);
 126 static idm_status_t idm_so_ini_conn_connect(idm_conn_t *ic);
 127 
 128 /*
 129  * IDM Native Sockets transport operations
 130  */
 131 static
 132 idm_transport_ops_t idm_so_transport_ops = {
 133         idm_so_tx,                      /* it_tx_pdu */
 134         idm_so_buf_tx_to_ini,           /* it_buf_tx_to_ini */
 135         idm_so_buf_rx_from_ini,         /* it_buf_rx_from_ini */
 136         idm_so_rx_datain,               /* it_rx_datain */
 137         idm_so_rx_rtt,                  /* it_rx_rtt */
 138         idm_so_rx_dataout,              /* it_rx_dataout */
 139         NULL,                           /* it_alloc_conn_rsrc */
 140         NULL,                           /* it_free_conn_rsrc */
 141         NULL,                           /* it_tgt_enable_datamover */
 142         NULL,                           /* it_ini_enable_datamover */
 143         NULL,                           /* it_conn_terminate */
 144         idm_so_free_task_rsrc,          /* it_free_task_rsrc */
 145         idm_so_negotiate_key_values,    /* it_negotiate_key_values */
 146         idm_so_notice_key_values,       /* it_notice_key_values */
 147         idm_so_conn_is_capable,         /* it_conn_is_capable */
 148         idm_so_buf_alloc,               /* it_buf_alloc */
 149         idm_so_buf_free,                /* it_buf_free */
 150         idm_so_buf_setup,               /* it_buf_setup */
 151         idm_so_buf_teardown,            /* it_buf_teardown */
 152         idm_so_tgt_svc_create,          /* it_tgt_svc_create */
 153         idm_so_tgt_svc_destroy,         /* it_tgt_svc_destroy */
 154         idm_so_tgt_svc_online,          /* it_tgt_svc_online */
 155         idm_so_tgt_svc_offline,         /* it_tgt_svc_offline */
 156         idm_so_tgt_conn_destroy,        /* it_tgt_conn_destroy */
 157         idm_so_tgt_conn_connect,        /* it_tgt_conn_connect */
 158         idm_so_conn_disconnect,         /* it_tgt_conn_disconnect */
 159         idm_so_ini_conn_create,         /* it_ini_conn_create */
 160         idm_so_ini_conn_destroy,        /* it_ini_conn_destroy */
 161         idm_so_ini_conn_connect,        /* it_ini_conn_connect */
 162         idm_so_conn_disconnect,         /* it_ini_conn_disconnect */
 163         idm_so_declare_key_values       /* it_declare_key_values */
 164 };
 165 
 166 kmutex_t        idm_so_timed_socket_mutex;
 167 
 168 int32_t idm_so_sndbuf = IDM_SNDBUF_SIZE;
 169 int32_t idm_so_rcvbuf = IDM_RCVBUF_SIZE;
 170 
 171 /*
 172  * idm_so_init()
 173  * Sockets transport initialization
 174  */
 175 void
 176 idm_so_init(idm_transport_t *it)
 177 {
 178         /* Cache for IDM Data and R2T Transmit PDU's */
 179         idm.idm_sotx_pdu_cache = kmem_cache_create("idm_tx_pdu_cache",
 180             sizeof (idm_pdu_t) + sizeof (iscsi_hdr_t), 8,
 181             &idm_sotx_pdu_constructor, NULL, NULL, NULL, NULL, KM_SLEEP);
 182 
 183         /* Cache for IDM Receive PDU's */
 184         idm.idm_sorx_pdu_cache = kmem_cache_create("idm_rx_pdu_cache",
 185             sizeof (idm_pdu_t) + IDM_SORX_CACHE_HDRLEN, 8,
 186             &idm_sorx_pdu_constructor, NULL, NULL, NULL, NULL, KM_SLEEP);
 187 
 188         /* 128k buffer cache */
 189         idm.idm_so_128k_buf_cache = kmem_cache_create("idm_128k_buf_cache",
 190             IDM_SO_BUF_CACHE_UB, 8, NULL, NULL, NULL, NULL, NULL, KM_SLEEP);
 191 
 192         /* Set the sockets transport ops */
 193         it->it_ops = &idm_so_transport_ops;
 194 
 195         mutex_init(&idm_so_timed_socket_mutex, NULL, MUTEX_DEFAULT, NULL);
 196 
 197 }
 198 
 199 /*
 200  * idm_so_fini()
 201  * Sockets transport teardown
 202  */
 203 void
 204 idm_so_fini(void)
 205 {
 206         kmem_cache_destroy(idm.idm_so_128k_buf_cache);
 207         kmem_cache_destroy(idm.idm_sotx_pdu_cache);
 208         kmem_cache_destroy(idm.idm_sorx_pdu_cache);
 209         mutex_destroy(&idm_so_timed_socket_mutex);
 210 }
 211 
 212 ksocket_t
 213 idm_socreate(int domain, int type, int protocol)
 214 {
 215         ksocket_t ks;
 216 
 217         if (!ksocket_socket(&ks, domain, type, protocol, KSOCKET_NOSLEEP,
 218             CRED())) {
 219                 return (ks);
 220         } else {
 221                 return (NULL);
 222         }
 223 }
 224 
 225 /*
 226  * idm_soshutdown will disconnect the socket and prevent subsequent PDU
 227  * reception and transmission.  The sonode still exists but its state
 228  * gets modified to indicate it is no longer connected.  Calls to
 229  * idm_sorecv/idm_iov_sorecv will return so idm_soshutdown can be used
 230  * regain control of a thread stuck in idm_sorecv.
 231  */
 232 void
 233 idm_soshutdown(ksocket_t so)
 234 {
 235         (void) ksocket_shutdown(so, SHUT_RDWR, CRED());
 236 }
 237 
 238 /*
 239  * idm_sodestroy releases all resources associated with a socket previously
 240  * created with idm_socreate.  The socket must be shutdown using
 241  * idm_soshutdown before the socket is destroyed with idm_sodestroy,
 242  * otherwise undefined behavior will result.
 243  */
 244 void
 245 idm_sodestroy(ksocket_t ks)
 246 {
 247         (void) ksocket_close(ks, CRED());
 248 }
 249 
 250 /*
 251  * Function to compare two addresses in sockaddr_storage format
 252  */
 253 
 254 int
 255 idm_ss_compare(const struct sockaddr_storage *cmp_ss1,
 256     const struct sockaddr_storage *cmp_ss2,
 257     boolean_t v4_mapped_as_v4,
 258     boolean_t compare_ports)
 259 {
 260         struct sockaddr_storage                 mapped_v4_ss1, mapped_v4_ss2;
 261         const struct sockaddr_storage           *ss1, *ss2;
 262         struct in_addr                          *in1, *in2;
 263         struct in6_addr                         *in61, *in62;
 264         int i;
 265 
 266         /*
 267          * Normalize V4-mapped IPv6 addresses into V4 format if
 268          * v4_mapped_as_v4 is B_TRUE.
 269          */
 270         ss1 = cmp_ss1;
 271         ss2 = cmp_ss2;
 272         if (v4_mapped_as_v4 && (ss1->ss_family == AF_INET6)) {
 273                 in61 = &((struct sockaddr_in6 *)ss1)->sin6_addr;
 274                 if (IN6_IS_ADDR_V4MAPPED(in61)) {
 275                         bzero(&mapped_v4_ss1, sizeof (mapped_v4_ss1));
 276                         mapped_v4_ss1.ss_family = AF_INET;
 277                         ((struct sockaddr_in *)&mapped_v4_ss1)->sin_port =
 278                             ((struct sockaddr_in *)ss1)->sin_port;
 279                         IN6_V4MAPPED_TO_INADDR(in61,
 280                             &((struct sockaddr_in *)&mapped_v4_ss1)->sin_addr);
 281                         ss1 = &mapped_v4_ss1;
 282                 }
 283         }
 284         ss2 = cmp_ss2;
 285         if (v4_mapped_as_v4 && (ss2->ss_family == AF_INET6)) {
 286                 in62 = &((struct sockaddr_in6 *)ss2)->sin6_addr;
 287                 if (IN6_IS_ADDR_V4MAPPED(in62)) {
 288                         bzero(&mapped_v4_ss2, sizeof (mapped_v4_ss2));
 289                         mapped_v4_ss2.ss_family = AF_INET;
 290                         ((struct sockaddr_in *)&mapped_v4_ss2)->sin_port =
 291                             ((struct sockaddr_in *)ss2)->sin_port;
 292                         IN6_V4MAPPED_TO_INADDR(in62,
 293                             &((struct sockaddr_in *)&mapped_v4_ss2)->sin_addr);
 294                         ss2 = &mapped_v4_ss2;
 295                 }
 296         }
 297 
 298         /*
 299          * Compare ports, then address family, then ip address
 300          */
 301         if (compare_ports &&
 302             (((struct sockaddr_in *)ss1)->sin_port !=
 303             ((struct sockaddr_in *)ss2)->sin_port)) {
 304                 if (((struct sockaddr_in *)ss1)->sin_port >
 305                     ((struct sockaddr_in *)ss2)->sin_port)
 306                         return (1);
 307                 else
 308                         return (-1);
 309         }
 310 
 311         /*
 312          * ports are the same
 313          */
 314         if (ss1->ss_family != ss2->ss_family) {
 315                 if (ss1->ss_family == AF_INET)
 316                         return (1);
 317                 else
 318                         return (-1);
 319         }
 320 
 321         /*
 322          * address families are the same
 323          */
 324         if (ss1->ss_family == AF_INET) {
 325                 in1 = &((struct sockaddr_in *)ss1)->sin_addr;
 326                 in2 = &((struct sockaddr_in *)ss2)->sin_addr;
 327 
 328                 if (in1->s_addr > in2->s_addr)
 329                         return (1);
 330                 else if (in1->s_addr < in2->s_addr)
 331                         return (-1);
 332                 else
 333                         return (0);
 334         } else if (ss1->ss_family == AF_INET6) {
 335                 in61 = &((struct sockaddr_in6 *)ss1)->sin6_addr;
 336                 in62 = &((struct sockaddr_in6 *)ss2)->sin6_addr;
 337 
 338                 for (i = 0; i < 4; i++) {
 339                         if (in61->s6_addr32[i] > in62->s6_addr32[i])
 340                                 return (1);
 341                         else if (in61->s6_addr32[i] < in62->s6_addr32[i])
 342                                 return (-1);
 343                 }
 344                 return (0);
 345         }
 346 
 347         return (1);
 348 }
 349 
 350 /*
 351  * IP address filter functions to flag addresses that should not
 352  * go out to initiators through discovery.
 353  */
 354 static boolean_t
 355 idm_v4_addr_okay(struct in_addr *in_addr)
 356 {
 357         in_addr_t addr = ntohl(in_addr->s_addr);
 358 
 359         if ((INADDR_NONE == addr) ||
 360             (IN_MULTICAST(addr)) ||
 361             ((addr >> IN_CLASSA_NSHIFT) == 0) ||
 362             ((addr >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET)) {
 363                 return (B_FALSE);
 364         }
 365         return (B_TRUE);
 366 }
 367 
 368 static boolean_t
 369 idm_v6_addr_okay(struct in6_addr *addr6)
 370 {
 371 
 372         if ((IN6_IS_ADDR_UNSPECIFIED(addr6)) ||
 373             (IN6_IS_ADDR_LOOPBACK(addr6)) ||
 374             (IN6_IS_ADDR_MULTICAST(addr6)) ||
 375             (IN6_IS_ADDR_V4MAPPED(addr6)) ||
 376             (IN6_IS_ADDR_V4COMPAT(addr6)) ||
 377             (IN6_IS_ADDR_LINKLOCAL(addr6))) {
 378                 return (B_FALSE);
 379         }
 380         return (B_TRUE);
 381 }
 382 
 383 /*
 384  * idm_get_ipaddr will retrieve a list of IP Addresses which the host is
 385  * configured with by sending down a sequence of kernel ioctl to IP STREAMS.
 386  */
 387 int
 388 idm_get_ipaddr(idm_addr_list_t **ipaddr_p)
 389 {
 390         ksocket_t               so4, so6;
 391         struct lifnum           lifn;
 392         struct lifconf          lifc;
 393         struct lifreq           *lp;
 394         int                     rval;
 395         int                     numifs;
 396         int                     bufsize;
 397         void                    *buf;
 398         int                     i, j, n, rc;
 399         struct sockaddr_storage ss;
 400         struct sockaddr_in      *sin;
 401         struct sockaddr_in6     *sin6;
 402         idm_addr_t              *ip;
 403         idm_addr_list_t         *ipaddr = NULL;
 404         int                     size_ipaddr;
 405 
 406         *ipaddr_p = NULL;
 407         size_ipaddr = 0;
 408         buf = NULL;
 409 
 410         /* create an ipv4 and ipv6 UDP socket */
 411         if ((so6 = idm_socreate(PF_INET6, SOCK_DGRAM, 0)) == NULL)
 412                 return (0);
 413         if ((so4 = idm_socreate(PF_INET, SOCK_DGRAM, 0)) == NULL) {
 414                 idm_sodestroy(so6);
 415                 return (0);
 416         }
 417 
 418 
 419 retry_count:
 420         /* snapshot the current number of interfaces */
 421         lifn.lifn_family = PF_UNSPEC;
 422         lifn.lifn_flags = LIFC_NOXMIT | LIFC_TEMPORARY | LIFC_ALLZONES;
 423         lifn.lifn_count = 0;
 424         /* use vp6 for ioctls with unspecified families by default */
 425         if (ksocket_ioctl(so6, SIOCGLIFNUM, (intptr_t)&lifn, &rval, CRED())
 426             != 0) {
 427                 goto cleanup;
 428         }
 429 
 430         numifs = lifn.lifn_count;
 431         if (numifs <= 0) {
 432                 goto cleanup;
 433         }
 434 
 435         /* allocate extra room in case more interfaces appear */
 436         numifs += 10;
 437 
 438         /* get the interface names and ip addresses */
 439         bufsize = numifs * sizeof (struct lifreq);
 440         buf = kmem_alloc(bufsize, KM_SLEEP);
 441 
 442         lifc.lifc_family = AF_UNSPEC;
 443         lifc.lifc_flags = LIFC_NOXMIT | LIFC_TEMPORARY | LIFC_ALLZONES;
 444         lifc.lifc_len = bufsize;
 445         lifc.lifc_buf = buf;
 446         rc = ksocket_ioctl(so6, SIOCGLIFCONF, (intptr_t)&lifc, &rval, CRED());
 447         if (rc != 0) {
 448                 goto cleanup;
 449         }
 450         /* if our extra room is used up, try again */
 451         if (bufsize <= lifc.lifc_len) {
 452                 kmem_free(buf, bufsize);
 453                 buf = NULL;
 454                 goto retry_count;
 455         }
 456         /* calc actual number of ifconfs */
 457         n = lifc.lifc_len / sizeof (struct lifreq);
 458 
 459         /* get ip address */
 460         if (n > 0) {
 461                 size_ipaddr = sizeof (idm_addr_list_t) +
 462                     (n - 1) * sizeof (idm_addr_t);
 463                 ipaddr = kmem_zalloc(size_ipaddr, KM_SLEEP);
 464         } else {
 465                 goto cleanup;
 466         }
 467 
 468         /*
 469          * Examine the array of interfaces and filter uninteresting ones
 470          */
 471         for (i = 0, j = 0, lp = lifc.lifc_req; i < n; i++, lp++) {
 472 
 473                 /*
 474                  * Copy the address as the SIOCGLIFFLAGS ioctl is destructive
 475                  */
 476                 ss = lp->lifr_addr;
 477                 /*
 478                  * fetch the flags using the socket of the correct family
 479                  */
 480                 switch (ss.ss_family) {
 481                 case AF_INET:
 482                         rc = ksocket_ioctl(so4, SIOCGLIFFLAGS, (intptr_t)lp,
 483                             &rval, CRED());
 484                         break;
 485                 case AF_INET6:
 486                         rc = ksocket_ioctl(so6, SIOCGLIFFLAGS, (intptr_t)lp,
 487                             &rval, CRED());
 488                         break;
 489                 default:
 490                         continue;
 491                 }
 492                 if (rc == 0) {
 493                         /*
 494                          * If we got the flags, skip uninteresting
 495                          * interfaces based on flags
 496                          */
 497                         if ((lp->lifr_flags & IFF_UP) != IFF_UP)
 498                                 continue;
 499                         if (lp->lifr_flags &
 500                             (IFF_ANYCAST|IFF_NOLOCAL|IFF_DEPRECATED))
 501                                 continue;
 502                 }
 503 
 504                 /* save ip address */
 505                 ip = &ipaddr->al_addrs[j];
 506                 switch (ss.ss_family) {
 507                 case AF_INET:
 508                         sin = (struct sockaddr_in *)&ss;
 509                         if (!idm_v4_addr_okay(&sin->sin_addr))
 510                                 continue;
 511                         ip->a_addr.i_addr.in4 = sin->sin_addr;
 512                         ip->a_addr.i_insize = sizeof (struct in_addr);
 513                         break;
 514                 case AF_INET6:
 515                         sin6 = (struct sockaddr_in6 *)&ss;
 516                         if (!idm_v6_addr_okay(&sin6->sin6_addr))
 517                                 continue;
 518                         ip->a_addr.i_addr.in6 = sin6->sin6_addr;
 519                         ip->a_addr.i_insize = sizeof (struct in6_addr);
 520                         break;
 521                 default:
 522                         continue;
 523                 }
 524                 j++;
 525         }
 526 
 527         if (j == 0) {
 528                 /* no valid ifaddr */
 529                 kmem_free(ipaddr, size_ipaddr);
 530                 size_ipaddr = 0;
 531                 ipaddr = NULL;
 532         } else {
 533                 ipaddr->al_out_cnt = j;
 534         }
 535 
 536 
 537 cleanup:
 538         idm_sodestroy(so6);
 539         idm_sodestroy(so4);
 540 
 541         if (buf != NULL)
 542                 kmem_free(buf, bufsize);
 543 
 544         *ipaddr_p = ipaddr;
 545         return (size_ipaddr);
 546 }
 547 
 548 int
 549 idm_sorecv(ksocket_t so, void *msg, size_t len)
 550 {
 551         iovec_t iov;
 552 
 553         ASSERT(so != NULL);
 554         ASSERT(len != 0);
 555 
 556         /*
 557          * Fill in iovec and receive data
 558          */
 559         iov.iov_base = msg;
 560         iov.iov_len = len;
 561 
 562         return (idm_iov_sorecv(so, &iov, 1, len));
 563 }
 564 
 565 /*
 566  * idm_sosendto - Sends a buffered data on a non-connected socket.
 567  *
 568  * This function puts the data provided on the wire by calling sosendmsg.
 569  * It will return only when all the data has been sent or if an error
 570  * occurs.
 571  *
 572  * Returns 0 for success, the socket errno value if sosendmsg fails, and
 573  * -1 if sosendmsg returns success but uio_resid != 0
 574  */
 575 int
 576 idm_sosendto(ksocket_t so, void *buff, size_t len,
 577     struct sockaddr *name, socklen_t namelen)
 578 {
 579         struct msghdr           msg;
 580         struct iovec            iov[1];
 581         int                     error;
 582         size_t                  sent = 0;
 583 
 584         iov[0].iov_base = buff;
 585         iov[0].iov_len  = len;
 586 
 587         /* Initialization of the message header. */
 588         bzero(&msg, sizeof (msg));
 589         msg.msg_iov     = iov;
 590         msg.msg_iovlen  = 1;
 591         msg.msg_name    = name;
 592         msg.msg_namelen = namelen;
 593 
 594         if ((error = ksocket_sendmsg(so, &msg, 0, &sent, CRED())) == 0) {
 595                 /* Data sent */
 596                 if (sent == len) {
 597                         /* All data sent.  Success. */
 598                         return (0);
 599                 } else {
 600                         /* Not all data was sent.  Failure */
 601                         return (-1);
 602                 }
 603         }
 604 
 605         /* Send failed */
 606         return (error);
 607 }
 608 
 609 /*
 610  * idm_iov_sosend - Sends an iovec on a connection.
 611  *
 612  * This function puts the data provided on the wire by calling sosendmsg.
 613  * It will return only when all the data has been sent or if an error
 614  * occurs.
 615  *
 616  * Returns 0 for success, the socket errno value if sosendmsg fails, and
 617  * -1 if sosendmsg returns success but uio_resid != 0
 618  */
 619 int
 620 idm_iov_sosend(ksocket_t so, iovec_t *iop, int iovlen, size_t total_len)
 621 {
 622         struct msghdr           msg;
 623         int                     error;
 624         size_t                  sent = 0;
 625 
 626         ASSERT(iop != NULL);
 627 
 628         /* Initialization of the message header. */
 629         bzero(&msg, sizeof (msg));
 630         msg.msg_iov     = iop;
 631         msg.msg_iovlen  = iovlen;
 632 
 633         if ((error = ksocket_sendmsg(so, &msg, 0, &sent, CRED()))
 634             == 0) {
 635                 /* Data sent */
 636                 if (sent == total_len) {
 637                         /* All data sent.  Success. */
 638                         return (0);
 639                 } else {
 640                         /* Not all data was sent.  Failure */
 641                         return (-1);
 642                 }
 643         }
 644 
 645         /* Send failed */
 646         return (error);
 647 }
 648 
 649 /*
 650  * idm_iov_sorecv - Receives an iovec from a connection
 651  *
 652  * This function gets the data asked for from the socket.  It will return
 653  * only when all the requested data has been retrieved or if an error
 654  * occurs.
 655  *
 656  * Returns 0 for success, the socket errno value if sorecvmsg fails, and
 657  * -1 if sorecvmsg returns success but uio_resid != 0
 658  */
 659 int
 660 idm_iov_sorecv(ksocket_t so, iovec_t *iop, int iovlen, size_t total_len)
 661 {
 662         struct msghdr           msg;
 663         int                     error;
 664         size_t                  recv;
 665         int                     flags;
 666 
 667         ASSERT(iop != NULL);
 668 
 669         /* Initialization of the message header. */
 670         bzero(&msg, sizeof (msg));
 671         msg.msg_iov     = iop;
 672         msg.msg_iovlen  = iovlen;
 673         flags           = MSG_WAITALL;
 674 
 675         if ((error = ksocket_recvmsg(so, &msg, flags, &recv, CRED()))
 676             == 0) {
 677                 /* Received data */
 678                 if (recv == total_len) {
 679                         /* All requested data received.  Success */
 680                         return (0);
 681                 } else {
 682                         /*
 683                          * Not all data was received.  The connection has
 684                          * probably failed.
 685                          */
 686                         return (-1);
 687                 }
 688         }
 689 
 690         /* Receive failed */
 691         return (error);
 692 }
 693 
 694 static void
 695 idm_set_ini_preconnect_options(idm_so_conn_t *sc, boolean_t boot_conn)
 696 {
 697         int     conn_abort = 10000;
 698         int     conn_notify = 2000;
 699         int     abort = 30000;
 700 
 701         /* Pre-connect socket options */
 702         (void) ksocket_setsockopt(sc->ic_so, IPPROTO_TCP,
 703             TCP_CONN_NOTIFY_THRESHOLD, (char *)&conn_notify, sizeof (int),
 704             CRED());
 705         if (boot_conn == B_FALSE) {
 706                 (void) ksocket_setsockopt(sc->ic_so, IPPROTO_TCP,
 707                     TCP_CONN_ABORT_THRESHOLD, (char *)&conn_abort, sizeof (int),
 708                     CRED());
 709                 (void) ksocket_setsockopt(sc->ic_so, IPPROTO_TCP,
 710                     TCP_ABORT_THRESHOLD,
 711                     (char *)&abort, sizeof (int), CRED());
 712         }
 713 }
 714 
 715 static void
 716 idm_set_postconnect_options(ksocket_t ks)
 717 {
 718         const int       on = 1;
 719 
 720         /* Set connect options */
 721         (void) ksocket_setsockopt(ks, SOL_SOCKET, SO_RCVBUF,
 722             (char *)&idm_so_rcvbuf, sizeof (int), CRED());
 723         (void) ksocket_setsockopt(ks, SOL_SOCKET, SO_SNDBUF,
 724             (char *)&idm_so_sndbuf, sizeof (int), CRED());
 725         (void) ksocket_setsockopt(ks, IPPROTO_TCP, TCP_NODELAY,
 726             (char *)&on, sizeof (on), CRED());
 727 }
 728 
 729 static uint32_t
 730 n2h24(const uchar_t *ptr)
 731 {
 732         return ((ptr[0] << 16) | (ptr[1] << 8) | ptr[2]);
 733 }
 734 
 735 
 736 static idm_status_t
 737 idm_sorecvhdr(idm_conn_t *ic, idm_pdu_t *pdu)
 738 {
 739         iscsi_hdr_t     *bhs;
 740         uint32_t        hdr_digest_crc;
 741         uint32_t        crc_calculated;
 742         void            *new_hdr;
 743         int             ahslen = 0;
 744         int             total_len = 0;
 745         int             iovlen = 0;
 746         struct iovec    iov[2];
 747         idm_so_conn_t   *so_conn;
 748         int             rc;
 749 
 750         so_conn = ic->ic_transport_private;
 751 
 752         /*
 753          * Read BHS
 754          */
 755         bhs = pdu->isp_hdr;
 756         rc = idm_sorecv(so_conn->ic_so, pdu->isp_hdr, sizeof (iscsi_hdr_t));
 757         if (rc != IDM_STATUS_SUCCESS) {
 758                 return (IDM_STATUS_FAIL);
 759         }
 760 
 761         /*
 762          * Check actual AHS length against the amount available in the buffer
 763          */
 764         pdu->isp_hdrlen = sizeof (iscsi_hdr_t) +
 765             (bhs->hlength * sizeof (uint32_t));
 766         pdu->isp_datalen = n2h24(bhs->dlength);
 767         if (ic->ic_conn_type == CONN_TYPE_TGT &&
 768             pdu->isp_datalen > ic->ic_conn_params.max_recv_dataseglen) {
 769                 IDM_CONN_LOG(CE_WARN,
 770                     "idm_sorecvhdr: exceeded the max data segment length");
 771                 return (IDM_STATUS_FAIL);
 772         }
 773         if (bhs->hlength > IDM_SORX_CACHE_AHSLEN) {
 774                 /* Allocate a new header segment and change the callback */
 775                 new_hdr = kmem_alloc(pdu->isp_hdrlen, KM_SLEEP);
 776                 bcopy(pdu->isp_hdr, new_hdr, sizeof (iscsi_hdr_t));
 777                 pdu->isp_hdr = new_hdr;
 778                 pdu->isp_flags |= IDM_PDU_ADDL_HDR;
 779 
 780                 /*
 781                  * This callback will restore the expected values after
 782                  * the RX PDU has been processed.
 783                  */
 784                 pdu->isp_callback = idm_sorx_addl_pdu_cb;
 785         }
 786 
 787         /*
 788          * Setup receipt of additional header and header digest (if enabled).
 789          */
 790         if (bhs->hlength > 0) {
 791                 iov[iovlen].iov_base = (caddr_t)(pdu->isp_hdr + 1);
 792                 ahslen = pdu->isp_hdrlen - sizeof (iscsi_hdr_t);
 793                 iov[iovlen].iov_len = ahslen;
 794                 total_len += iov[iovlen].iov_len;
 795                 iovlen++;
 796         }
 797 
 798         if (ic->ic_conn_flags & IDM_CONN_HEADER_DIGEST) {
 799                 iov[iovlen].iov_base = (caddr_t)&hdr_digest_crc;
 800                 iov[iovlen].iov_len = sizeof (hdr_digest_crc);
 801                 total_len += iov[iovlen].iov_len;
 802                 iovlen++;
 803         }
 804 
 805         if ((iovlen != 0) &&
 806             (idm_iov_sorecv(so_conn->ic_so, &iov[0], iovlen,
 807             total_len) != 0)) {
 808                 return (IDM_STATUS_FAIL);
 809         }
 810 
 811         /*
 812          * Validate header digest if enabled
 813          */
 814         if (ic->ic_conn_flags & IDM_CONN_HEADER_DIGEST) {
 815                 crc_calculated = idm_crc32c(pdu->isp_hdr,
 816                     sizeof (iscsi_hdr_t) + ahslen);
 817                 if (crc_calculated != hdr_digest_crc) {
 818                         /* Invalid Header Digest */
 819                         return (IDM_STATUS_HEADER_DIGEST);
 820                 }
 821         }
 822 
 823         return (0);
 824 }
 825 
 826 /*
 827  * idm_so_ini_conn_create()
 828  * Allocate the sockets transport connection resources.
 829  */
 830 static idm_status_t
 831 idm_so_ini_conn_create(idm_conn_req_t *cr, idm_conn_t *ic)
 832 {
 833         ksocket_t       so;
 834         idm_so_conn_t   *so_conn;
 835         idm_status_t    idmrc;
 836 
 837         so = idm_socreate(cr->cr_domain, cr->cr_type,
 838             cr->cr_protocol);
 839         if (so == NULL) {
 840                 return (IDM_STATUS_FAIL);
 841         }
 842 
 843         /* Bind the socket if configured to do so */
 844         if (cr->cr_bound) {
 845                 if (ksocket_bind(so, &cr->cr_bound_addr.sin,
 846                     SIZEOF_SOCKADDR(&cr->cr_bound_addr.sin), CRED()) != 0) {
 847                         idm_sodestroy(so);
 848                         return (IDM_STATUS_FAIL);
 849                 }
 850         }
 851 
 852         idmrc = idm_so_conn_create_common(ic, so);
 853         if (idmrc != IDM_STATUS_SUCCESS) {
 854                 idm_soshutdown(so);
 855                 idm_sodestroy(so);
 856                 return (IDM_STATUS_FAIL);
 857         }
 858 
 859         so_conn = ic->ic_transport_private;
 860         /* Set up socket options */
 861         idm_set_ini_preconnect_options(so_conn, cr->cr_boot_conn);
 862 
 863         return (IDM_STATUS_SUCCESS);
 864 }
 865 
 866 /*
 867  * idm_so_ini_conn_destroy()
 868  * Tear down the sockets transport connection resources.
 869  */
 870 static void
 871 idm_so_ini_conn_destroy(idm_conn_t *ic)
 872 {
 873         idm_so_conn_destroy_common(ic);
 874 }
 875 
 876 /*
 877  * idm_so_ini_conn_connect()
 878  * Establish the connection referred to by the handle previously allocated via
 879  * idm_so_ini_conn_create().
 880  */
 881 static idm_status_t
 882 idm_so_ini_conn_connect(idm_conn_t *ic)
 883 {
 884         idm_so_conn_t   *so_conn;
 885         struct sonode   *node = NULL;
 886         int             rc;
 887         clock_t         lbolt, conn_login_max, conn_login_interval;
 888         boolean_t       nonblock;
 889 
 890         so_conn = ic->ic_transport_private;
 891         nonblock = ic->ic_conn_params.nonblock_socket;
 892         conn_login_max = ic->ic_conn_params.conn_login_max;
 893         conn_login_interval = ddi_get_lbolt() +
 894             SEC_TO_TICK(ic->ic_conn_params.conn_login_interval);
 895 
 896         if (nonblock == B_TRUE) {
 897                 node = ((struct sonode *)(so_conn->ic_so));
 898                 /* Set to none block socket mode */
 899                 idm_so_socket_set_nonblock(node);
 900                 do {
 901                         rc = ksocket_connect(so_conn->ic_so,
 902                             &ic->ic_ini_dst_addr.sin,
 903                             (SIZEOF_SOCKADDR(&ic->ic_ini_dst_addr.sin)),
 904                             CRED());
 905                         if (rc == 0 || rc == EISCONN) {
 906                                 /* socket success or already success */
 907                                 rc = IDM_STATUS_SUCCESS;
 908                                 break;
 909                         }
 910                         if ((rc == ETIMEDOUT) || (rc == ECONNREFUSED) ||
 911                             (rc == ECONNRESET)) {
 912                                 /* socket connection timeout or refuse */
 913                                 break;
 914                         }
 915                         lbolt = ddi_get_lbolt();
 916                         if (lbolt > conn_login_max) {
 917                                 /*
 918                                  * Connection retry timeout,
 919                                  * failed connect to target.
 920                                  */
 921                                 break;
 922                         }
 923                         if (lbolt < conn_login_interval) {
 924                                 if ((rc == EINPROGRESS) || (rc == EALREADY)) {
 925                                         /* TCP connect still in progress */
 926                                         delay(SEC_TO_TICK(IN_PROGRESS_DELAY));
 927                                         continue;
 928                                 } else {
 929                                         delay(conn_login_interval - lbolt);
 930                                 }
 931                         }
 932                         conn_login_interval = ddi_get_lbolt() +
 933                             SEC_TO_TICK(ic->ic_conn_params.conn_login_interval);
 934                 } while (rc != 0);
 935                 /* resume to nonblock mode */
 936                 if (rc == IDM_STATUS_SUCCESS) {
 937                         idm_so_socket_set_block(node);
 938                 }
 939         } else {
 940                 rc = ksocket_connect(so_conn->ic_so, &ic->ic_ini_dst_addr.sin,
 941                     (SIZEOF_SOCKADDR(&ic->ic_ini_dst_addr.sin)), CRED());
 942         }
 943 
 944         if (rc != 0) {
 945                 idm_soshutdown(so_conn->ic_so);
 946                 return (IDM_STATUS_FAIL);
 947         }
 948 
 949         idm_so_conn_connect_common(ic);
 950 
 951         idm_set_postconnect_options(so_conn->ic_so);
 952 
 953         return (IDM_STATUS_SUCCESS);
 954 }
 955 
 956 idm_status_t
 957 idm_so_tgt_conn_create(idm_conn_t *ic, ksocket_t new_so)
 958 {
 959         idm_status_t    idmrc;
 960 
 961         idm_set_postconnect_options(new_so);
 962         idmrc = idm_so_conn_create_common(ic, new_so);
 963 
 964         return (idmrc);
 965 }
 966 
 967 static void
 968 idm_so_tgt_conn_destroy(idm_conn_t *ic)
 969 {
 970         idm_so_conn_destroy_common(ic);
 971 }
 972 
 973 /*
 974  * idm_so_tgt_conn_connect()
 975  * Establish the connection in ic, passed from idm_tgt_conn_finish(), which
 976  * is invoked from the SM as a result of an inbound connection request.
 977  */
 978 static idm_status_t
 979 idm_so_tgt_conn_connect(idm_conn_t *ic)
 980 {
 981         idm_so_conn_connect_common(ic);
 982 
 983         return (IDM_STATUS_SUCCESS);
 984 }
 985 
 986 static idm_status_t
 987 idm_so_conn_create_common(idm_conn_t *ic, ksocket_t new_so)
 988 {
 989         idm_so_conn_t   *so_conn;
 990 
 991         so_conn = kmem_zalloc(sizeof (idm_so_conn_t), KM_SLEEP);
 992         so_conn->ic_so = new_so;
 993 
 994         ic->ic_transport_private = so_conn;
 995         ic->ic_transport_hdrlen = 0;
 996 
 997         /* Set the scoreboarding flag on this connection */
 998         ic->ic_conn_flags |= IDM_CONN_USE_SCOREBOARD;
 999         ic->ic_conn_params.max_recv_dataseglen =
1000             ISCSI_DEFAULT_MAX_RECV_SEG_LEN;
1001         ic->ic_conn_params.max_xmit_dataseglen =
1002             ISCSI_DEFAULT_MAX_XMIT_SEG_LEN;
1003 
1004         /*
1005          * Initialize tx thread mutex and list
1006          */
1007         mutex_init(&so_conn->ic_tx_mutex, NULL, MUTEX_DEFAULT, NULL);
1008         cv_init(&so_conn->ic_tx_cv, NULL, CV_DEFAULT, NULL);
1009         list_create(&so_conn->ic_tx_list, sizeof (idm_pdu_t),
1010             offsetof(idm_pdu_t, idm_tx_link));
1011 
1012         return (IDM_STATUS_SUCCESS);
1013 }
1014 
1015 static void
1016 idm_so_conn_destroy_common(idm_conn_t *ic)
1017 {
1018         idm_so_conn_t   *so_conn = ic->ic_transport_private;
1019 
1020         ic->ic_transport_private = NULL;
1021         idm_sodestroy(so_conn->ic_so);
1022         list_destroy(&so_conn->ic_tx_list);
1023         mutex_destroy(&so_conn->ic_tx_mutex);
1024         cv_destroy(&so_conn->ic_tx_cv);
1025 
1026         kmem_free(so_conn, sizeof (idm_so_conn_t));
1027 }
1028 
1029 static void
1030 idm_so_conn_connect_common(idm_conn_t *ic)
1031 {
1032         idm_so_conn_t   *so_conn;
1033         struct sockaddr_in6     t_addr;
1034         socklen_t       t_addrlen = 0;
1035 
1036         so_conn = ic->ic_transport_private;
1037         bzero(&t_addr, sizeof (struct sockaddr_in6));
1038         t_addrlen = sizeof (struct sockaddr_in6);
1039 
1040         /* Set the local and remote addresses in the idm conn handle */
1041         (void) ksocket_getsockname(so_conn->ic_so, (struct sockaddr *)&t_addr,
1042             &t_addrlen, CRED());
1043         bcopy(&t_addr, &ic->ic_laddr, t_addrlen);
1044         (void) ksocket_getpeername(so_conn->ic_so, (struct sockaddr *)&t_addr,
1045             &t_addrlen, CRED());
1046         bcopy(&t_addr, &ic->ic_raddr, t_addrlen);
1047 
1048         mutex_enter(&ic->ic_mutex);
1049         so_conn->ic_tx_thread = thread_create(NULL, 0, idm_sotx_thread, ic, 0,
1050             &p0, TS_RUN, minclsyspri);
1051         so_conn->ic_rx_thread = thread_create(NULL, 0, idm_sorx_thread, ic, 0,
1052             &p0, TS_RUN, minclsyspri);
1053 
1054         while (so_conn->ic_rx_thread_did == 0 ||
1055             so_conn->ic_tx_thread_did == 0)
1056                 cv_wait(&ic->ic_cv, &ic->ic_mutex);
1057         mutex_exit(&ic->ic_mutex);
1058 }
1059 
1060 /*
1061  * idm_so_conn_disconnect()
1062  * Shutdown the socket connection and stop the thread
1063  */
1064 static void
1065 idm_so_conn_disconnect(idm_conn_t *ic)
1066 {
1067         idm_so_conn_t   *so_conn;
1068 
1069         so_conn = ic->ic_transport_private;
1070 
1071         mutex_enter(&ic->ic_mutex);
1072         so_conn->ic_rx_thread_running = B_FALSE;
1073         so_conn->ic_tx_thread_running = B_FALSE;
1074         /* We need to wakeup the TX thread */
1075         mutex_enter(&so_conn->ic_tx_mutex);
1076         cv_signal(&so_conn->ic_tx_cv);
1077         mutex_exit(&so_conn->ic_tx_mutex);
1078         mutex_exit(&ic->ic_mutex);
1079 
1080         /* This should wakeup the RX thread if it is sleeping */
1081         idm_soshutdown(so_conn->ic_so);
1082 
1083         thread_join(so_conn->ic_tx_thread_did);
1084         thread_join(so_conn->ic_rx_thread_did);
1085 }
1086 
1087 /*
1088  * idm_so_tgt_svc_create()
1089  * Establish a service on an IP address and port.  idm_svc_req_t contains
1090  * the service parameters.
1091  */
1092 /*ARGSUSED*/
1093 static idm_status_t
1094 idm_so_tgt_svc_create(idm_svc_req_t *sr, idm_svc_t *is)
1095 {
1096         idm_so_svc_t            *so_svc;
1097 
1098         so_svc = kmem_zalloc(sizeof (idm_so_svc_t), KM_SLEEP);
1099 
1100         /* Set the new sockets service in svc handle */
1101         is->is_so_svc = (void *)so_svc;
1102 
1103         return (IDM_STATUS_SUCCESS);
1104 }
1105 
1106 /*
1107  * idm_so_tgt_svc_destroy()
1108  * Teardown sockets resources allocated in idm_so_tgt_svc_create()
1109  */
1110 static void
1111 idm_so_tgt_svc_destroy(idm_svc_t *is)
1112 {
1113         /* the socket will have been torn down; free the service */
1114         kmem_free(is->is_so_svc, sizeof (idm_so_svc_t));
1115 }
1116 
1117 /*
1118  * idm_so_tgt_svc_online()
1119  * Launch a watch thread on the svc allocated in idm_so_tgt_svc_create()
1120  */
1121 
1122 static idm_status_t
1123 idm_so_tgt_svc_online(idm_svc_t *is)
1124 {
1125         idm_so_svc_t            *so_svc;
1126         idm_svc_req_t           *sr = &is->is_svc_req;
1127         struct sockaddr_in6     sin6_ip;
1128         const uint32_t          on = 1;
1129         const uint32_t          off = 0;
1130 
1131         mutex_enter(&is->is_mutex);
1132         so_svc = (idm_so_svc_t *)is->is_so_svc;
1133 
1134         /*
1135          * Try creating an IPv6 socket first
1136          */
1137         if ((so_svc->is_so = idm_socreate(PF_INET6, SOCK_STREAM, 0)) == NULL) {
1138                 mutex_exit(&is->is_mutex);
1139                 return (IDM_STATUS_FAIL);
1140         } else {
1141                 bzero(&sin6_ip, sizeof (sin6_ip));
1142                 sin6_ip.sin6_family = AF_INET6;
1143                 sin6_ip.sin6_port = htons(sr->sr_port);
1144                 sin6_ip.sin6_addr = in6addr_any;
1145 
1146                 (void) ksocket_setsockopt(so_svc->is_so, SOL_SOCKET,
1147                     SO_REUSEADDR, (char *)&on, sizeof (on), CRED());
1148                 /*
1149                  * Turn off SO_MAC_EXEMPT so future sobinds succeed
1150                  */
1151                 (void) ksocket_setsockopt(so_svc->is_so, SOL_SOCKET,
1152                     SO_MAC_EXEMPT, (char *)&off, sizeof (off), CRED());
1153 
1154                 if (ksocket_bind(so_svc->is_so, (struct sockaddr *)&sin6_ip,
1155                     sizeof (sin6_ip), CRED()) != 0) {
1156                         mutex_exit(&is->is_mutex);
1157                         idm_sodestroy(so_svc->is_so);
1158                         return (IDM_STATUS_FAIL);
1159                 }
1160         }
1161 
1162         idm_set_postconnect_options(so_svc->is_so);
1163 
1164         if (ksocket_listen(so_svc->is_so, 5, CRED()) != 0) {
1165                 mutex_exit(&is->is_mutex);
1166                 idm_soshutdown(so_svc->is_so);
1167                 idm_sodestroy(so_svc->is_so);
1168                 return (IDM_STATUS_FAIL);
1169         }
1170 
1171         /* Launch a watch thread */
1172         so_svc->is_thread = thread_create(NULL, 0, idm_so_svc_port_watcher,
1173             is, 0, &p0, TS_RUN, minclsyspri);
1174 
1175         if (so_svc->is_thread == NULL) {
1176                 /* Failure to launch; teardown the socket */
1177                 mutex_exit(&is->is_mutex);
1178                 idm_soshutdown(so_svc->is_so);
1179                 idm_sodestroy(so_svc->is_so);
1180                 return (IDM_STATUS_FAIL);
1181         }
1182         ksocket_hold(so_svc->is_so);
1183         /* Wait for the port watcher thread to start */
1184         while (!so_svc->is_thread_running)
1185                 cv_wait(&is->is_cv, &is->is_mutex);
1186         mutex_exit(&is->is_mutex);
1187 
1188         return (IDM_STATUS_SUCCESS);
1189 }
1190 
1191 /*
1192  * idm_so_tgt_svc_offline
1193  *
1194  * Stop listening on the IP address and port identified by idm_svc_t.
1195  */
1196 static void
1197 idm_so_tgt_svc_offline(idm_svc_t *is)
1198 {
1199         idm_so_svc_t            *so_svc;
1200         mutex_enter(&is->is_mutex);
1201         so_svc = (idm_so_svc_t *)is->is_so_svc;
1202         so_svc->is_thread_running = B_FALSE;
1203         mutex_exit(&is->is_mutex);
1204 
1205         /*
1206          * Teardown socket
1207          */
1208         idm_sodestroy(so_svc->is_so);
1209 
1210         /*
1211          * Now we expect the port watcher thread to terminate
1212          */
1213         thread_join(so_svc->is_thread_did);
1214 }
1215 
1216 /*
1217  * Watch thread for target service connection establishment.
1218  */
1219 void
1220 idm_so_svc_port_watcher(void *arg)
1221 {
1222         idm_svc_t               *svc = arg;
1223         ksocket_t               new_so;
1224         idm_conn_t              *ic;
1225         idm_status_t            idmrc;
1226         idm_so_svc_t            *so_svc;
1227         int                     rc;
1228         const uint32_t          off = 0;
1229         struct sockaddr_in6     t_addr;
1230         socklen_t               t_addrlen;
1231 
1232         bzero(&t_addr, sizeof (struct sockaddr_in6));
1233         t_addrlen = sizeof (struct sockaddr_in6);
1234         mutex_enter(&svc->is_mutex);
1235 
1236         so_svc = svc->is_so_svc;
1237         so_svc->is_thread_running = B_TRUE;
1238         so_svc->is_thread_did = so_svc->is_thread->t_did;
1239 
1240         cv_signal(&svc->is_cv);
1241 
1242         IDM_SVC_LOG(CE_NOTE, "iSCSI service (%p/%d) online", (void *)svc,
1243             svc->is_svc_req.sr_port);
1244 
1245         while (so_svc->is_thread_running) {
1246                 mutex_exit(&svc->is_mutex);
1247 
1248                 if ((rc = ksocket_accept(so_svc->is_so,
1249                     (struct sockaddr *)&t_addr, &t_addrlen,
1250                     &new_so, CRED())) != 0) {
1251                         mutex_enter(&svc->is_mutex);
1252                         if (rc != ECONNABORTED && rc != EINTR) {
1253                                 IDM_SVC_LOG(CE_NOTE, "idm_so_svc_port_watcher:"
1254                                     " ksocket_accept failed %d", rc);
1255                         }
1256                         /*
1257                          * Unclean shutdown of this thread is not handled
1258                          * wait for !is_thread_running.
1259                          */
1260                         continue;
1261                 }
1262                 /*
1263                  * Turn off SO_MAC_EXEMPT so future sobinds succeed
1264                  */
1265                 (void) ksocket_setsockopt(new_so, SOL_SOCKET, SO_MAC_EXEMPT,
1266                     (char *)&off, sizeof (off), CRED());
1267 
1268                 idmrc = idm_svc_conn_create(svc, IDM_TRANSPORT_TYPE_SOCKETS,
1269                     &ic);
1270                 if (idmrc != IDM_STATUS_SUCCESS) {
1271                         /* Drop connection */
1272                         idm_soshutdown(new_so);
1273                         idm_sodestroy(new_so);
1274                         mutex_enter(&svc->is_mutex);
1275                         continue;
1276                 }
1277 
1278                 idmrc = idm_so_tgt_conn_create(ic, new_so);
1279                 if (idmrc != IDM_STATUS_SUCCESS) {
1280                         idm_svc_conn_destroy(ic);
1281                         idm_soshutdown(new_so);
1282                         idm_sodestroy(new_so);
1283                         mutex_enter(&svc->is_mutex);
1284                         continue;
1285                 }
1286 
1287                 /*
1288                  * Kick the state machine.  At CS_S3_XPT_UP the state machine
1289                  * will notify the client (target) about the new connection.
1290                  */
1291                 idm_conn_event(ic, CE_CONNECT_ACCEPT, NULL);
1292 
1293                 mutex_enter(&svc->is_mutex);
1294         }
1295         ksocket_rele(so_svc->is_so);
1296         so_svc->is_thread_running = B_FALSE;
1297         mutex_exit(&svc->is_mutex);
1298 
1299         IDM_SVC_LOG(CE_NOTE, "iSCSI service (%p/%d) offline", (void *)svc,
1300             svc->is_svc_req.sr_port);
1301 
1302         thread_exit();
1303 }
1304 
1305 /*
1306  * idm_so_free_task_rsrc() stops any ongoing processing of the task and
1307  * frees resources associated with the task.
1308  *
1309  * It's not clear that this should return idm_status_t.  What do we do
1310  * if it fails?
1311  */
1312 static idm_status_t
1313 idm_so_free_task_rsrc(idm_task_t *idt)
1314 {
1315         idm_buf_t       *idb, *next_idb;
1316 
1317         /*
1318          * There is nothing to cleanup on initiator connections
1319          */
1320         if (IDM_CONN_ISINI(idt->idt_ic))
1321                 return (IDM_STATUS_SUCCESS);
1322 
1323         /*
1324          * If this is a target connection, call idm_buf_rx_from_ini_done for
1325          * any buffer on the "outbufv" list with idb->idb_in_transport==B_TRUE.
1326          *
1327          * In addition, remove any buffers associated with this task from
1328          * the ic_tx_list.  We'll do this by walking the idt_inbufv list, but
1329          * items don't actually get removed from that list (and completion
1330          * routines called) until idm_task_cleanup.
1331          */
1332         mutex_enter(&idt->idt_mutex);
1333 
1334         for (idb = list_head(&idt->idt_outbufv); idb != NULL; idb = next_idb) {
1335                 next_idb = list_next(&idt->idt_outbufv, idb);
1336                 if (idb->idb_in_transport) {
1337                         /*
1338                          * idm_buf_rx_from_ini_done releases idt->idt_mutex
1339                          */
1340                         DTRACE_ISCSI_8(xfer__done, idm_conn_t *, idt->idt_ic,
1341                             uintptr_t, idb->idb_buf,
1342                             uint32_t, idb->idb_bufoffset,
1343                             uint64_t, 0, uint32_t, 0, uint32_t, 0,
1344                             uint32_t, idb->idb_xfer_len,
1345                             int, XFER_BUF_RX_FROM_INI);
1346                         idm_buf_rx_from_ini_done(idt, idb, IDM_STATUS_ABORTED);
1347                         mutex_enter(&idt->idt_mutex);
1348                 }
1349         }
1350 
1351         for (idb = list_head(&idt->idt_inbufv); idb != NULL; idb = next_idb) {
1352                 next_idb = list_next(&idt->idt_inbufv, idb);
1353                 /*
1354                  * We want to remove these items from the tx_list as well,
1355                  * but knowing it's in the idt_inbufv list is not a guarantee
1356                  * that it's in the tx_list.  If it's on the tx list then
1357                  * let idm_sotx_thread() clean it up.
1358                  */
1359                 if (idb->idb_in_transport && !idb->idb_tx_thread) {
1360                         /*
1361                          * idm_buf_tx_to_ini_done releases idt->idt_mutex
1362                          */
1363                         DTRACE_ISCSI_8(xfer__done, idm_conn_t *, idt->idt_ic,
1364                             uintptr_t, idb->idb_buf,
1365                             uint32_t, idb->idb_bufoffset,
1366                             uint64_t, 0, uint32_t, 0, uint32_t, 0,
1367                             uint32_t, idb->idb_xfer_len,
1368                             int, XFER_BUF_TX_TO_INI);
1369                         idm_buf_tx_to_ini_done(idt, idb, IDM_STATUS_ABORTED);
1370                         mutex_enter(&idt->idt_mutex);
1371                 }
1372         }
1373 
1374         mutex_exit(&idt->idt_mutex);
1375 
1376         return (IDM_STATUS_SUCCESS);
1377 }
1378 
1379 /*
1380  * idm_so_negotiate_key_values() validates the key values for this connection
1381  */
1382 /* ARGSUSED */
1383 static kv_status_t
1384 idm_so_negotiate_key_values(idm_conn_t *it, nvlist_t *request_nvl,
1385     nvlist_t *response_nvl, nvlist_t *negotiated_nvl)
1386 {
1387         /* All parameters are negotiated at the iscsit level */
1388         return (KV_HANDLED);
1389 }
1390 
1391 /*
1392  * idm_so_notice_key_values() activates the negotiated key values for
1393  * this connection.
1394  */
1395 static void
1396 idm_so_notice_key_values(idm_conn_t *it, nvlist_t *negotiated_nvl)
1397 {
1398         char                    *nvp_name;
1399         nvpair_t                *nvp;
1400         nvpair_t                *next_nvp;
1401         int                     nvrc;
1402         idm_status_t            idm_status;
1403         const idm_kv_xlate_t    *ikvx;
1404         uint64_t                num_val;
1405 
1406         for (nvp = nvlist_next_nvpair(negotiated_nvl, NULL);
1407             nvp != NULL; nvp = next_nvp) {
1408                 next_nvp = nvlist_next_nvpair(negotiated_nvl, nvp);
1409                 nvp_name = nvpair_name(nvp);
1410 
1411                 ikvx = idm_lookup_kv_xlate(nvp_name, strlen(nvp_name));
1412                 switch (ikvx->ik_key_id) {
1413                 case KI_HEADER_DIGEST:
1414                 case KI_DATA_DIGEST:
1415                         idm_status = idm_so_handle_digest(it, nvp, ikvx);
1416                         ASSERT(idm_status == 0);
1417 
1418                         /* Remove processed item from negotiated_nvl list */
1419                         nvrc = nvlist_remove_all(
1420                             negotiated_nvl, ikvx->ik_key_name);
1421                         ASSERT(nvrc == 0);
1422                         break;
1423                 case KI_MAX_RECV_DATA_SEGMENT_LENGTH:
1424                         /*
1425                          * Just pass the value down to idm layer.
1426                          * No need to remove it from negotiated_nvl list here.
1427                          */
1428                         nvrc = nvpair_value_uint64(nvp, &num_val);
1429                         ASSERT(nvrc == 0);
1430                         it->ic_conn_params.max_xmit_dataseglen =
1431                             (uint32_t)num_val;
1432                         break;
1433                 default:
1434                         break;
1435                 }
1436         }
1437 }
1438 
1439 /*
1440  * idm_so_declare_key_values() declares the key values for this connection
1441  */
1442 /* ARGSUSED */
1443 static kv_status_t
1444 idm_so_declare_key_values(idm_conn_t *it, nvlist_t *config_nvl,
1445     nvlist_t *outgoing_nvl)
1446 {
1447         char                    *nvp_name;
1448         nvpair_t                *nvp;
1449         nvpair_t                *next_nvp;
1450         kv_status_t             kvrc;
1451         int                     nvrc = 0;
1452         const idm_kv_xlate_t    *ikvx;
1453         uint64_t                num_val;
1454 
1455         for (nvp = nvlist_next_nvpair(config_nvl, NULL);
1456             nvp != NULL && nvrc == 0; nvp = next_nvp) {
1457                 next_nvp = nvlist_next_nvpair(config_nvl, nvp);
1458                 nvp_name = nvpair_name(nvp);
1459 
1460                 ikvx = idm_lookup_kv_xlate(nvp_name, strlen(nvp_name));
1461                 switch (ikvx->ik_key_id) {
1462                 case KI_MAX_RECV_DATA_SEGMENT_LENGTH:
1463                         if ((nvrc = nvpair_value_uint64(nvp, &num_val)) != 0) {
1464                                 break;
1465                         }
1466                         if (outgoing_nvl &&
1467                             (nvrc = nvlist_add_uint64(outgoing_nvl,
1468                             nvp_name, num_val)) != 0) {
1469                                 break;
1470                         }
1471                         it->ic_conn_params.max_recv_dataseglen =
1472                             (uint32_t)num_val;
1473                         break;
1474                 default:
1475                         break;
1476                 }
1477         }
1478         kvrc = idm_nvstat_to_kvstat(nvrc);
1479         return (kvrc);
1480 }
1481 
1482 static idm_status_t
1483 idm_so_handle_digest(idm_conn_t *it, nvpair_t *digest_choice,
1484     const idm_kv_xlate_t *ikvx)
1485 {
1486         int                     nvrc;
1487         char                    *digest_choice_string;
1488 
1489         nvrc = nvpair_value_string(digest_choice,
1490             &digest_choice_string);
1491         ASSERT(nvrc == 0);
1492         if (strcasecmp(digest_choice_string, "crc32c") == 0) {
1493                 switch (ikvx->ik_key_id) {
1494                 case KI_HEADER_DIGEST:
1495                         it->ic_conn_flags |= IDM_CONN_HEADER_DIGEST;
1496                         break;
1497                 case KI_DATA_DIGEST:
1498                         it->ic_conn_flags |= IDM_CONN_DATA_DIGEST;
1499                         break;
1500                 default:
1501                         ASSERT(0);
1502                         break;
1503                 }
1504         } else if (strcasecmp(digest_choice_string, "none") == 0) {
1505                 switch (ikvx->ik_key_id) {
1506                 case KI_HEADER_DIGEST:
1507                         it->ic_conn_flags &= ~IDM_CONN_HEADER_DIGEST;
1508                         break;
1509                 case KI_DATA_DIGEST:
1510                         it->ic_conn_flags &= ~IDM_CONN_DATA_DIGEST;
1511                         break;
1512                 default:
1513                         ASSERT(0);
1514                         break;
1515                 }
1516         } else {
1517                 ASSERT(0);
1518         }
1519 
1520         return (IDM_STATUS_SUCCESS);
1521 }
1522 
1523 
1524 /*
1525  * idm_so_conn_is_capable() verifies that the passed connection is provided
1526  * for by the sockets interface.
1527  */
1528 /* ARGSUSED */
1529 static boolean_t
1530 idm_so_conn_is_capable(idm_conn_req_t *ic, idm_transport_caps_t *caps)
1531 {
1532         return (B_TRUE);
1533 }
1534 
1535 /*
1536  * idm_so_rx_datain() validates the Data Sequence number of the PDU. The
1537  * idm_sorecv_scsidata() function invoked earlier actually reads the data
1538  * off the socket into the appropriate buffers.
1539  */
1540 static void
1541 idm_so_rx_datain(idm_conn_t *ic, idm_pdu_t *pdu)
1542 {
1543         iscsi_data_hdr_t        *bhs;
1544         idm_task_t              *idt;
1545         idm_buf_t               *idb;
1546         uint32_t                datasn;
1547         size_t                  offset;
1548         iscsi_hdr_t             *ihp = (iscsi_hdr_t *)pdu->isp_hdr;
1549         iscsi_data_rsp_hdr_t    *idrhp = (iscsi_data_rsp_hdr_t *)ihp;
1550 
1551         ASSERT(ic != NULL);
1552         ASSERT(pdu != NULL);
1553 
1554         bhs     = (iscsi_data_hdr_t *)pdu->isp_hdr;
1555         datasn  = ntohl(bhs->datasn);
1556         offset  = ntohl(bhs->offset);
1557 
1558         ASSERT(bhs->opcode == ISCSI_OP_SCSI_DATA_RSP);
1559 
1560         /*
1561          * Look up the task corresponding to the initiator task tag
1562          * to get the buffers affiliated with the task.
1563          */
1564         idt = idm_task_find(ic, bhs->itt, bhs->ttt);
1565         if (idt == NULL) {
1566                 IDM_CONN_LOG(CE_WARN, "idm_so_rx_datain: failed to find task");
1567                 idm_pdu_rx_protocol_error(ic, pdu);
1568                 return;
1569         }
1570 
1571         idb = pdu->isp_sorx_buf;
1572         if (idb == NULL) {
1573                 IDM_CONN_LOG(CE_WARN,
1574                     "idm_so_rx_datain: failed to find buffer");
1575                 idm_task_rele(idt);
1576                 idm_pdu_rx_protocol_error(ic, pdu);
1577                 return;
1578         }
1579 
1580         /*
1581          * DataSN values should be sequential and should not have any gaps or
1582          * repetitions. Check the DataSN with the one stored in the task.
1583          */
1584         if (datasn == idt->idt_exp_datasn) {
1585                 idt->idt_exp_datasn++; /* keep track of DataSN received */
1586         } else {
1587                 IDM_CONN_LOG(CE_WARN, "idm_so_rx_datain: datasn out of order");
1588                 idm_task_rele(idt);
1589                 idm_pdu_rx_protocol_error(ic, pdu);
1590                 return;
1591         }
1592 
1593         /*
1594          * PDUs in a sequence should be in continuously increasing
1595          * address offset
1596          */
1597         if (offset != idb->idb_exp_offset) {
1598                 IDM_CONN_LOG(CE_WARN, "idm_so_rx_datain: unexpected offset");
1599                 idm_task_rele(idt);
1600                 idm_pdu_rx_protocol_error(ic, pdu);
1601                 return;
1602         }
1603         /* Expected next relative buffer offset */
1604         idb->idb_exp_offset += n2h24(bhs->dlength);
1605         idt->idt_rx_bytes += n2h24(bhs->dlength);
1606 
1607         idm_task_rele(idt);
1608 
1609         /*
1610          * For now call scsi_rsp which will process the data rsp
1611          * Revisit, need to provide an explicit client entry point for
1612          * phase collapse completions.
1613          */
1614         if (((ihp->opcode & ISCSI_OPCODE_MASK) == ISCSI_OP_SCSI_DATA_RSP) &&
1615             (idrhp->flags & ISCSI_FLAG_DATA_STATUS)) {
1616                 (*ic->ic_conn_ops.icb_rx_scsi_rsp)(ic, pdu);
1617         }
1618 
1619         idm_pdu_complete(pdu, IDM_STATUS_SUCCESS);
1620 }
1621 
1622 /*
1623  * The idm_so_rx_dataout() function is used by the iSCSI target to read
1624  * data from the Data-Out PDU sent by the iSCSI initiator.
1625  *
1626  * This function gets the Initiator Task Tag from the PDU BHS and looks up the
1627  * task to get the buffers associated with the PDU. A PDU might span buffers.
1628  * The data is then read into the respective buffer.
1629  */
1630 static void
1631 idm_so_rx_dataout(idm_conn_t *ic, idm_pdu_t *pdu)
1632 {
1633 
1634         iscsi_data_hdr_t        *bhs;
1635         idm_task_t              *idt;
1636         idm_buf_t               *idb;
1637         size_t                  offset;
1638 
1639         ASSERT(ic != NULL);
1640         ASSERT(pdu != NULL);
1641 
1642         bhs = (iscsi_data_hdr_t *)pdu->isp_hdr;
1643         offset = ntohl(bhs->offset);
1644         ASSERT(bhs->opcode == ISCSI_OP_SCSI_DATA);
1645 
1646         /*
1647          * Look up the task corresponding to the initiator task tag
1648          * to get the buffers affiliated with the task.
1649          */
1650         idt = idm_task_find(ic, bhs->itt, bhs->ttt);
1651         if (idt == NULL) {
1652                 IDM_CONN_LOG(CE_WARN,
1653                     "idm_so_rx_dataout: failed to find task");
1654                 idm_pdu_rx_protocol_error(ic, pdu);
1655                 return;
1656         }
1657 
1658         idb = pdu->isp_sorx_buf;
1659         if (idb == NULL) {
1660                 IDM_CONN_LOG(CE_WARN,
1661                     "idm_so_rx_dataout: failed to find buffer");
1662                 idm_task_rele(idt);
1663                 idm_pdu_rx_protocol_error(ic, pdu);
1664                 return;
1665         }
1666 
1667         /* Keep track of data transferred - check data offsets */
1668         if (offset != idb->idb_exp_offset) {
1669                 IDM_CONN_LOG(CE_NOTE, "idm_so_rx_dataout: offset out of seq: "
1670                     "%ld, %d", offset, idb->idb_exp_offset);
1671                 idm_task_rele(idt);
1672                 idm_pdu_rx_protocol_error(ic, pdu);
1673                 return;
1674         }
1675         /* Expected next relative offset */
1676         idb->idb_exp_offset += ntoh24(bhs->dlength);
1677         idt->idt_rx_bytes += n2h24(bhs->dlength);
1678 
1679         /*
1680          * Call the buffer callback when the transfer is complete
1681          *
1682          * The connection state machine should only abort tasks after
1683          * shutting down the connection so we are assured that there
1684          * won't be a simultaneous attempt to abort this task at the
1685          * same time as we are processing this PDU (due to a connection
1686          * state change).
1687          */
1688         if (bhs->flags & ISCSI_FLAG_FINAL) {
1689                 /*
1690                  * We only want to call idm_buf_rx_from_ini_done once
1691                  * per transfer.  It's possible that this task has
1692                  * already been aborted in which case
1693                  * idm_so_free_task_rsrc will call idm_buf_rx_from_ini_done
1694                  * for each buffer with idb_in_transport==B_TRUE.  To
1695                  * close this window and ensure that this doesn't happen,
1696                  * we'll clear idb->idb_in_transport now while holding
1697                  * the task mutex.   This is only really an issue for
1698                  * SCSI task abort -- if tasks were being aborted because
1699                  * of a connection state change the state machine would
1700                  * have already stopped the receive thread.
1701                  */
1702                 mutex_enter(&idt->idt_mutex);
1703 
1704                 /*
1705                  * Release the task hold here (obtained in idm_task_find)
1706                  * because the task may complete synchronously during
1707                  * idm_buf_rx_from_ini_done.  Since we still have an active
1708                  * buffer we know there is at least one additional hold on idt.
1709                  */
1710                 idm_task_rele(idt);
1711 
1712                 /*
1713                  * idm_buf_rx_from_ini_done releases idt->idt_mutex
1714                  */
1715                 DTRACE_ISCSI_8(xfer__done, idm_conn_t *, idt->idt_ic,
1716                     uintptr_t, idb->idb_buf, uint32_t, idb->idb_bufoffset,
1717                     uint64_t, 0, uint32_t, 0, uint32_t, 0,
1718                     uint32_t, idb->idb_xfer_len,
1719                     int, XFER_BUF_RX_FROM_INI);
1720                 idm_buf_rx_from_ini_done(idt, idb, IDM_STATUS_SUCCESS);
1721                 idm_pdu_complete(pdu, IDM_STATUS_SUCCESS);
1722                 return;
1723         }
1724 
1725         idm_task_rele(idt);
1726         idm_pdu_complete(pdu, IDM_STATUS_SUCCESS);
1727 }
1728 
1729 /*
1730  * The idm_so_rx_rtt() function is used by the iSCSI initiator to handle
1731  * the R2T PDU sent by the iSCSI target indicating that it is ready to
1732  * accept data. This gets the Initiator Task Tag (itt) from the PDU BHS
1733  * and looks up the task in the task tree using the itt to get the output
1734  * buffers associated the task. The R2T PDU contains the offset of the
1735  * requested data and the data length. This function then constructs a
1736  * sequence of iSCSI PDUs and outputs the requested data. Each Data-Out
1737  * PDU is associated with the R2T by the Target Transfer Tag  (ttt).
1738  */
1739 
1740 static void
1741 idm_so_rx_rtt(idm_conn_t *ic, idm_pdu_t *pdu)
1742 {
1743         idm_task_t              *idt;
1744         idm_buf_t               *idb;
1745         iscsi_rtt_hdr_t         *rtt_hdr;
1746         uint32_t                data_offset;
1747         uint32_t                data_length;
1748 
1749         ASSERT(ic != NULL);
1750         ASSERT(pdu != NULL);
1751 
1752         rtt_hdr = (iscsi_rtt_hdr_t *)pdu->isp_hdr;
1753         data_offset = ntohl(rtt_hdr->data_offset);
1754         data_length = ntohl(rtt_hdr->data_length);
1755         idt     = idm_task_find(ic, rtt_hdr->itt, rtt_hdr->ttt);
1756 
1757         if (idt == NULL) {
1758                 IDM_CONN_LOG(CE_WARN, "idm_so_rx_rtt: could not find task");
1759                 idm_pdu_rx_protocol_error(ic, pdu);
1760                 return;
1761         }
1762 
1763         /* Find the buffer bound to the task by the iSCSI initiator */
1764         mutex_enter(&idt->idt_mutex);
1765         idb = idm_buf_find(&idt->idt_outbufv, data_offset);
1766         if (idb == NULL) {
1767                 mutex_exit(&idt->idt_mutex);
1768                 idm_task_rele(idt);
1769                 IDM_CONN_LOG(CE_WARN, "idm_so_rx_rtt: could not find buffer");
1770                 idm_pdu_rx_protocol_error(ic, pdu);
1771                 return;
1772         }
1773 
1774         /* return buffer contains this data */
1775         if (data_offset + data_length > idb->idb_buflen) {
1776                 /* Overflow */
1777                 mutex_exit(&idt->idt_mutex);
1778                 idm_task_rele(idt);
1779                 IDM_CONN_LOG(CE_WARN, "idm_so_rx_rtt: read from outside "
1780                     "buffer");
1781                 idm_pdu_rx_protocol_error(ic, pdu);
1782                 return;
1783         }
1784 
1785         idt->idt_r2t_ttt = rtt_hdr->ttt;
1786         idt->idt_exp_datasn = 0;
1787 
1788         idm_so_send_rtt_data(ic, idt, idb, data_offset,
1789             ntohl(rtt_hdr->data_length));
1790         /*
1791          * the idt_mutex is released in idm_so_send_rtt_data
1792          */
1793 
1794         idm_pdu_complete(pdu, IDM_STATUS_SUCCESS);
1795         idm_task_rele(idt);
1796 
1797 }
1798 
1799 idm_status_t
1800 idm_sorecvdata(idm_conn_t *ic, idm_pdu_t *pdu)
1801 {
1802         uint8_t         pad[ISCSI_PAD_WORD_LEN];
1803         int             pad_len;
1804         uint32_t        data_digest_crc;
1805         uint32_t        crc_calculated;
1806         int             total_len;
1807         idm_so_conn_t   *so_conn;
1808 
1809         so_conn = ic->ic_transport_private;
1810 
1811         pad_len = ((ISCSI_PAD_WORD_LEN -
1812             (pdu->isp_datalen & (ISCSI_PAD_WORD_LEN - 1))) &
1813             (ISCSI_PAD_WORD_LEN - 1));
1814 
1815         ASSERT(pdu->isp_iovlen < (PDU_MAX_IOVLEN - 2)); /* pad + data digest */
1816 
1817         total_len = pdu->isp_datalen;
1818 
1819         if (pad_len) {
1820                 pdu->isp_iov[pdu->isp_iovlen].iov_base    = (char *)&pad;
1821                 pdu->isp_iov[pdu->isp_iovlen].iov_len     = pad_len;
1822                 total_len               += pad_len;
1823                 pdu->isp_iovlen++;
1824         }
1825 
1826         /* setup data digest */
1827         if ((ic->ic_conn_flags & IDM_CONN_DATA_DIGEST) != 0) {
1828                 pdu->isp_iov[pdu->isp_iovlen].iov_base =
1829                     (char *)&data_digest_crc;
1830                 pdu->isp_iov[pdu->isp_iovlen].iov_len =
1831                     sizeof (data_digest_crc);
1832                 total_len               += sizeof (data_digest_crc);
1833                 pdu->isp_iovlen++;
1834         }
1835 
1836         pdu->isp_data = (uint8_t *)(uintptr_t)pdu->isp_iov[0].iov_base;
1837 
1838         if (idm_iov_sorecv(so_conn->ic_so, &pdu->isp_iov[0],
1839             pdu->isp_iovlen, total_len) != 0) {
1840                 return (IDM_STATUS_IO);
1841         }
1842 
1843         if ((ic->ic_conn_flags & IDM_CONN_DATA_DIGEST) != 0) {
1844                 crc_calculated = idm_crc32c(pdu->isp_data,
1845                     pdu->isp_datalen);
1846                 if (pad_len) {
1847                         crc_calculated = idm_crc32c_continued((char *)&pad,
1848                             pad_len, crc_calculated);
1849                 }
1850                 if (crc_calculated != data_digest_crc) {
1851                         IDM_CONN_LOG(CE_WARN,
1852                             "idm_sorecvdata: "
1853                             "CRC error: actual 0x%x, calc 0x%x",
1854                             data_digest_crc, crc_calculated);
1855 
1856                         /* Invalid Data Digest */
1857                         return (IDM_STATUS_DATA_DIGEST);
1858                 }
1859         }
1860 
1861         return (IDM_STATUS_SUCCESS);
1862 }
1863 
1864 /*
1865  * idm_sorecv_scsidata() is used to receive scsi data from the socket. The
1866  * Data-type PDU header must be read into the idm_pdu_t structure prior to
1867  * calling this function.
1868  */
1869 idm_status_t
1870 idm_sorecv_scsidata(idm_conn_t *ic, idm_pdu_t *pdu)
1871 {
1872         iscsi_data_hdr_t        *bhs;
1873         idm_task_t              *task;
1874         uint32_t                offset;
1875         uint8_t                 opcode;
1876         uint32_t                dlength;
1877         list_t                  *buflst;
1878         uint32_t                xfer_bytes;
1879         idm_status_t            status;
1880 
1881         ASSERT(ic != NULL);
1882         ASSERT(pdu != NULL);
1883 
1884         bhs     = (iscsi_data_hdr_t *)pdu->isp_hdr;
1885 
1886         offset  = ntohl(bhs->offset);
1887         opcode  = bhs->opcode;
1888         dlength = n2h24(bhs->dlength);
1889 
1890         ASSERT((opcode == ISCSI_OP_SCSI_DATA_RSP) ||
1891             (opcode == ISCSI_OP_SCSI_DATA));
1892 
1893         /*
1894          * Successful lookup implicitly gets a "hold" on the task.  This
1895          * hold must be released before leaving this function.  At one
1896          * point we were caching this task context and retaining the hold
1897          * but it turned out to be very difficult to release the hold properly.
1898          * The task can be aborted and the connection shutdown between this
1899          * call and the subsequent expected call to idm_so_rx_datain/
1900          * idm_so_rx_dataout (in which case those functions are not called).
1901          * Releasing the hold in the PDU callback doesn't work well either
1902          * because the whole task may be completed by then at which point
1903          * it is too late to release the hold -- for better or worse this
1904          * code doesn't wait on the refcnts during normal operation.
1905          * idm_task_find() is very fast and it is not a huge burden if we
1906          * have to do it twice.
1907          */
1908         task = idm_task_find(ic, bhs->itt, bhs->ttt);
1909         if (task == NULL) {
1910                 IDM_CONN_LOG(CE_WARN,
1911                     "idm_sorecv_scsidata: could not find task");
1912                 return (IDM_STATUS_FAIL);
1913         }
1914 
1915         mutex_enter(&task->idt_mutex);
1916         buflst  = (opcode == ISCSI_OP_SCSI_DATA_RSP) ?
1917             &task->idt_inbufv : &task->idt_outbufv;
1918         pdu->isp_sorx_buf = idm_buf_find(buflst, offset);
1919         mutex_exit(&task->idt_mutex);
1920 
1921         if (pdu->isp_sorx_buf == NULL) {
1922                 idm_task_rele(task);
1923                 IDM_CONN_LOG(CE_WARN, "idm_sorecv_scsidata: could not find "
1924                     "buffer for offset %x opcode=%x",
1925                     offset, opcode);
1926                 return (IDM_STATUS_FAIL);
1927         }
1928 
1929         xfer_bytes = idm_fill_iov(pdu, pdu->isp_sorx_buf, offset, dlength);
1930         ASSERT(xfer_bytes != 0);
1931         if (xfer_bytes != dlength) {
1932                 idm_task_rele(task);
1933                 /*
1934                  * Buffer overflow, connection error.  The PDU data is still
1935                  * sitting in the socket so we can't use the connection
1936                  * again until that data is drained.
1937                  */
1938                 return (IDM_STATUS_FAIL);
1939         }
1940 
1941         status = idm_sorecvdata(ic, pdu);
1942 
1943         idm_task_rele(task);
1944 
1945         return (status);
1946 }
1947 
1948 static uint32_t
1949 idm_fill_iov(idm_pdu_t *pdu, idm_buf_t *idb, uint32_t ro, uint32_t dlength)
1950 {
1951         uint32_t        buf_ro = ro - idb->idb_bufoffset;
1952         uint32_t        xfer_len = min(dlength, idb->idb_buflen - buf_ro);
1953 
1954         ASSERT(ro >= idb->idb_bufoffset);
1955 
1956         pdu->isp_iov[pdu->isp_iovlen].iov_base    =
1957             (caddr_t)idb->idb_buf + buf_ro;
1958         pdu->isp_iov[pdu->isp_iovlen].iov_len     = xfer_len;
1959         pdu->isp_iovlen++;
1960 
1961         return (xfer_len);
1962 }
1963 
1964 int
1965 idm_sorecv_nonscsidata(idm_conn_t *ic, idm_pdu_t *pdu)
1966 {
1967         pdu->isp_data = kmem_alloc(pdu->isp_datalen, KM_SLEEP);
1968         ASSERT(pdu->isp_data != NULL);
1969 
1970         pdu->isp_databuflen = pdu->isp_datalen;
1971         pdu->isp_iov[0].iov_base = (caddr_t)pdu->isp_data;
1972         pdu->isp_iov[0].iov_len = pdu->isp_datalen;
1973         pdu->isp_iovlen = 1;
1974         /*
1975          * Since we are associating a new data buffer with this received
1976          * PDU we need to set a specific callback to free the data
1977          * after the PDU is processed.
1978          */
1979         pdu->isp_flags |= IDM_PDU_ADDL_DATA;
1980         pdu->isp_callback = idm_sorx_addl_pdu_cb;
1981 
1982         return (idm_sorecvdata(ic, pdu));
1983 }
1984 
1985 void
1986 idm_sorx_thread(void *arg)
1987 {
1988         boolean_t       conn_failure = B_FALSE;
1989         idm_conn_t      *ic = (idm_conn_t *)arg;
1990         idm_so_conn_t   *so_conn;
1991         idm_pdu_t       *pdu;
1992         idm_status_t    rc;
1993 
1994         idm_conn_hold(ic);
1995 
1996         mutex_enter(&ic->ic_mutex);
1997 
1998         so_conn = ic->ic_transport_private;
1999         so_conn->ic_rx_thread_running = B_TRUE;
2000         so_conn->ic_rx_thread_did = so_conn->ic_rx_thread->t_did;
2001         cv_signal(&ic->ic_cv);
2002 
2003         while (so_conn->ic_rx_thread_running) {
2004                 mutex_exit(&ic->ic_mutex);
2005 
2006                 /*
2007                  * Get PDU with default header size (large enough for
2008                  * BHS plus any anticipated AHS).  PDU from
2009                  * the cache will have all values set correctly
2010                  * for sockets RX including callback.
2011                  */
2012                 pdu = kmem_cache_alloc(idm.idm_sorx_pdu_cache, KM_SLEEP);
2013                 pdu->isp_ic = ic;
2014                 pdu->isp_flags = 0;
2015                 pdu->isp_transport_hdrlen = 0;
2016 
2017                 if ((rc = idm_sorecvhdr(ic, pdu)) != 0) {
2018                         /*
2019                          * Call idm_pdu_complete so that we call the callback
2020                          * and ensure any memory allocated in idm_sorecvhdr
2021                          * gets freed up.
2022                          */
2023                         idm_pdu_complete(pdu, IDM_STATUS_FAIL);
2024 
2025                         /*
2026                          * If ic_rx_thread_running is still set then
2027                          * this is some kind of connection problem
2028                          * on the socket.  In this case we want to
2029                          * generate an event.  Otherwise some other
2030                          * thread closed the socket due to another
2031                          * issue in which case we don't need to
2032                          * generate an event.
2033                          */
2034                         mutex_enter(&ic->ic_mutex);
2035                         if (so_conn->ic_rx_thread_running) {
2036                                 conn_failure = B_TRUE;
2037                                 so_conn->ic_rx_thread_running = B_FALSE;
2038                         }
2039 
2040                         continue;
2041                 }
2042 
2043                 /*
2044                  * Header has been read and validated.  Now we need
2045                  * to read the PDU data payload (if present).  SCSI data
2046                  * need to be transferred from the socket directly into
2047                  * the associated transfer buffer for the SCSI task.
2048                  */
2049                 if (pdu->isp_datalen != 0) {
2050                         if ((IDM_PDU_OPCODE(pdu) == ISCSI_OP_SCSI_DATA) ||
2051                             (IDM_PDU_OPCODE(pdu) == ISCSI_OP_SCSI_DATA_RSP)) {
2052                                 rc = idm_sorecv_scsidata(ic, pdu);
2053                                 /*
2054                                  * All SCSI errors are fatal to the
2055                                  * connection right now since we have no
2056                                  * place to put the data.  What we need
2057                                  * is some kind of sink to dispose of unwanted
2058                                  * SCSI data.  For example an invalid task tag
2059                                  * should not kill the connection (although
2060                                  * we may want to drop the connection).
2061                                  */
2062                         } else {
2063                                 /*
2064                                  * Not data PDUs so allocate a buffer for the
2065                                  * data segment and read the remaining data.
2066                                  */
2067                                 rc = idm_sorecv_nonscsidata(ic, pdu);
2068                         }
2069                         if (rc != 0) {
2070                                 /*
2071                                  * Call idm_pdu_complete so that we call the
2072                                  * callback and ensure any memory allocated
2073                                  * in idm_sorecvhdr gets freed up.
2074                                  */
2075                                 idm_pdu_complete(pdu, IDM_STATUS_FAIL);
2076 
2077                                 /*
2078                                  * If ic_rx_thread_running is still set then
2079                                  * this is some kind of connection problem
2080                                  * on the socket.  In this case we want to
2081                                  * generate an event.  Otherwise some other
2082                                  * thread closed the socket due to another
2083                                  * issue in which case we don't need to
2084                                  * generate an event.
2085                                  */
2086                                 mutex_enter(&ic->ic_mutex);
2087                                 if (so_conn->ic_rx_thread_running) {
2088                                         conn_failure = B_TRUE;
2089                                         so_conn->ic_rx_thread_running = B_FALSE;
2090                                 }
2091                                 continue;
2092                         }
2093                 }
2094 
2095                 /*
2096                  * Process RX PDU
2097                  */
2098                 idm_pdu_rx(ic, pdu);
2099 
2100                 mutex_enter(&ic->ic_mutex);
2101         }
2102 
2103         mutex_exit(&ic->ic_mutex);
2104 
2105         /*
2106          * If we dropped out of the RX processing loop because of
2107          * a socket problem or other connection failure (including
2108          * digest errors) then we need to generate a state machine
2109          * event to shut the connection down.
2110          * If the state machine is already in, for example, INIT_ERROR, this
2111          * event will get dropped, and the TX thread will never be notified
2112          * to shut down.  To be safe, we'll just notify it here.
2113          */
2114         if (conn_failure) {
2115                 if (so_conn->ic_tx_thread_running) {
2116                         so_conn->ic_tx_thread_running = B_FALSE;
2117                         mutex_enter(&so_conn->ic_tx_mutex);
2118                         cv_signal(&so_conn->ic_tx_cv);
2119                         mutex_exit(&so_conn->ic_tx_mutex);
2120                 }
2121 
2122                 idm_conn_event(ic, CE_TRANSPORT_FAIL, rc);
2123         }
2124 
2125         idm_conn_rele(ic);
2126 
2127         thread_exit();
2128 }
2129 
2130 /*
2131  * idm_so_tx
2132  *
2133  * This is the implementation of idm_transport_ops_t's it_tx_pdu entry
2134  * point.  By definition, it is supposed to be fast.  So, simply queue
2135  * the entry and return.  The real work is done by idm_i_so_tx() via
2136  * idm_sotx_thread().
2137  */
2138 
2139 static void
2140 idm_so_tx(idm_conn_t *ic, idm_pdu_t *pdu)
2141 {
2142         idm_so_conn_t *so_conn = ic->ic_transport_private;
2143 
2144         ASSERT(pdu->isp_ic == ic);
2145         mutex_enter(&so_conn->ic_tx_mutex);
2146 
2147         if (!so_conn->ic_tx_thread_running) {
2148                 mutex_exit(&so_conn->ic_tx_mutex);
2149                 idm_pdu_complete(pdu, IDM_STATUS_ABORTED);
2150                 return;
2151         }
2152 
2153         list_insert_tail(&so_conn->ic_tx_list, (void *)pdu);
2154         cv_signal(&so_conn->ic_tx_cv);
2155         mutex_exit(&so_conn->ic_tx_mutex);
2156 }
2157 
2158 static idm_status_t
2159 idm_i_so_tx(idm_pdu_t *pdu)
2160 {
2161         idm_conn_t      *ic = pdu->isp_ic;
2162         idm_status_t    status = IDM_STATUS_SUCCESS;
2163         uint8_t         pad[ISCSI_PAD_WORD_LEN];
2164         int             pad_len;
2165         uint32_t        hdr_digest_crc;
2166         uint32_t        data_digest_crc = 0;
2167         int             total_len = 0;
2168         int             iovlen = 0;
2169         struct iovec    iov[6];
2170         idm_so_conn_t   *so_conn;
2171 
2172         so_conn = ic->ic_transport_private;
2173 
2174         /* Setup BHS */
2175         iov[iovlen].iov_base    = (caddr_t)pdu->isp_hdr;
2176         iov[iovlen].iov_len     = pdu->isp_hdrlen;
2177         total_len               += iov[iovlen].iov_len;
2178         iovlen++;
2179 
2180         /* Setup header digest */
2181         if (((pdu->isp_flags & IDM_PDU_LOGIN_TX) == 0) &&
2182             (ic->ic_conn_flags & IDM_CONN_HEADER_DIGEST)) {
2183                 hdr_digest_crc = idm_crc32c(pdu->isp_hdr, pdu->isp_hdrlen);
2184 
2185                 iov[iovlen].iov_base    = (caddr_t)&hdr_digest_crc;
2186                 iov[iovlen].iov_len     = sizeof (hdr_digest_crc);
2187                 total_len               += iov[iovlen].iov_len;
2188                 iovlen++;
2189         }
2190 
2191         /* Setup the data */
2192         if (pdu->isp_datalen) {
2193                 idm_task_t              *idt;
2194                 idm_buf_t               *idb;
2195                 iscsi_data_hdr_t        *ihp;
2196                 ihp = (iscsi_data_hdr_t *)pdu->isp_hdr;
2197                 /* Write of immediate data */
2198                 if (ic->ic_ffp &&
2199                     (ihp->opcode == ISCSI_OP_SCSI_CMD ||
2200                     ihp->opcode == ISCSI_OP_SCSI_DATA)) {
2201                         idt = idm_task_find(ic, ihp->itt, ihp->ttt);
2202                         if (idt) {
2203                                 mutex_enter(&idt->idt_mutex);
2204                                 idb = idm_buf_find(&idt->idt_outbufv, 0);
2205                                 mutex_exit(&idt->idt_mutex);
2206                                 /*
2207                                  * If the initiator call to idm_buf_alloc
2208                                  * failed then we can get to this point
2209                                  * without a bound buffer.  The associated
2210                                  * connection failure will clean things up
2211                                  * later.  It would be nice to come up with
2212                                  * a cleaner way to handle this.  In
2213                                  * particular it seems absurd to look up
2214                                  * the task and the buffer just to update
2215                                  * this counter.
2216                                  */
2217                                 if (idb)
2218                                         idb->idb_xfer_len += pdu->isp_datalen;
2219                                 idm_task_rele(idt);
2220                         }
2221                 }
2222 
2223                 iov[iovlen].iov_base = (caddr_t)pdu->isp_data;
2224                 iov[iovlen].iov_len  = pdu->isp_datalen;
2225                 total_len += iov[iovlen].iov_len;
2226                 iovlen++;
2227         }
2228 
2229         /* Setup the data pad if necessary */
2230         pad_len = ((ISCSI_PAD_WORD_LEN -
2231             (pdu->isp_datalen & (ISCSI_PAD_WORD_LEN - 1))) &
2232             (ISCSI_PAD_WORD_LEN - 1));
2233 
2234         if (pad_len) {
2235                 bzero(pad, sizeof (pad));
2236                 iov[iovlen].iov_base = (void *)&pad;
2237                 iov[iovlen].iov_len  = pad_len;
2238                 total_len               += iov[iovlen].iov_len;
2239                 iovlen++;
2240         }
2241 
2242         /*
2243          * Setup the data digest if enabled.  Data-digest is not sent
2244          * for login-phase PDUs.
2245          */
2246         if ((ic->ic_conn_flags & IDM_CONN_DATA_DIGEST) &&
2247             ((pdu->isp_flags & IDM_PDU_LOGIN_TX) == 0) &&
2248             (pdu->isp_datalen || pad_len)) {
2249                 /*
2250                  * RFC3720/10.2.3: A zero-length Data Segment also
2251                  * implies a zero-length data digest.
2252                  */
2253                 if (pdu->isp_datalen) {
2254                         data_digest_crc = idm_crc32c(pdu->isp_data,
2255                             pdu->isp_datalen);
2256                 }
2257                 if (pad_len) {
2258                         data_digest_crc = idm_crc32c_continued(&pad,
2259                             pad_len, data_digest_crc);
2260                 }
2261 
2262                 iov[iovlen].iov_base    = (caddr_t)&data_digest_crc;
2263                 iov[iovlen].iov_len     = sizeof (data_digest_crc);
2264                 total_len               += iov[iovlen].iov_len;
2265                 iovlen++;
2266         }
2267 
2268         /* Transmit the PDU */
2269         if (idm_iov_sosend(so_conn->ic_so, &iov[0], iovlen,
2270             total_len) != 0) {
2271                 /* Set error status */
2272                 IDM_CONN_LOG(CE_WARN,
2273                     "idm_so_tx: failed to transmit the PDU, so: %p ic: %p "
2274                     "data: %p", (void *) so_conn->ic_so, (void *) ic,
2275                     (void *) pdu->isp_data);
2276                 status = IDM_STATUS_IO;
2277         }
2278 
2279         /*
2280          * Success does not mean that the PDU actually reached the
2281          * remote node since it could get dropped along the way.
2282          */
2283         idm_pdu_complete(pdu, status);
2284 
2285         return (status);
2286 }
2287 
2288 /*
2289  * The idm_so_buf_tx_to_ini() is used by the target iSCSI layer to transmit the
2290  * Data-In PDUs using sockets. Based on the negotiated MaxRecvDataSegmentLength,
2291  * the buffer is segmented into a sequence of Data-In PDUs, ordered by DataSN.
2292  * A target can invoke this function multiple times for a single read command
2293  * (identified by the same ITT) to split the input into several sequences.
2294  *
2295  * DataSN starts with 0 for the first data PDU of an input command and advances
2296  * by 1 for each subsequent data PDU. Each sequence will have its own F bit,
2297  * which is set to 1 for the last data PDU of a sequence.
2298  * If the initiator supports phase collapse, the status bit must be set along
2299  * with the F bit to indicate that the status is shipped together with the last
2300  * Data-In PDU.
2301  *
2302  * The data PDUs within a sequence will be sent in order with the buffer offset
2303  * in increasing order. i.e. initiator and target must have negotiated the
2304  * "DataPDUInOrder" to "Yes". The order between sequences is not enforced.
2305  *
2306  * Caller holds idt->idt_mutex
2307  */
2308 static idm_status_t
2309 idm_so_buf_tx_to_ini(idm_task_t *idt, idm_buf_t *idb)
2310 {
2311         idm_so_conn_t   *so_conn = idb->idb_ic->ic_transport_private;
2312         idm_pdu_t       tmppdu;
2313 
2314         ASSERT(mutex_owned(&idt->idt_mutex));
2315 
2316         /*
2317          * Put the idm_buf_t on the tx queue.  It will be transmitted by
2318          * idm_sotx_thread.
2319          */
2320         mutex_enter(&so_conn->ic_tx_mutex);
2321 
2322         DTRACE_ISCSI_8(xfer__start, idm_conn_t *, idt->idt_ic,
2323             uintptr_t, idb->idb_buf, uint32_t, idb->idb_bufoffset,
2324             uint64_t, 0, uint32_t, 0, uint32_t, 0,
2325             uint32_t, idb->idb_xfer_len, int, XFER_BUF_TX_TO_INI);
2326 
2327         if (!so_conn->ic_tx_thread_running) {
2328                 mutex_exit(&so_conn->ic_tx_mutex);
2329                 /*
2330                  * Don't release idt->idt_mutex since we're supposed to hold
2331                  * in when calling idm_buf_tx_to_ini_done
2332                  */
2333                 DTRACE_ISCSI_8(xfer__done, idm_conn_t *, idt->idt_ic,
2334                     uintptr_t, idb->idb_buf, uint32_t, idb->idb_bufoffset,
2335                     uint64_t, 0, uint32_t, 0, uint32_t, 0,
2336                     uint32_t, idb->idb_xfer_len,
2337                     int, XFER_BUF_TX_TO_INI);
2338                 idm_buf_tx_to_ini_done(idt, idb, IDM_STATUS_ABORTED);
2339                 return (IDM_STATUS_FAIL);
2340         }
2341 
2342         /*
2343          * Build a template for the data PDU headers we will use so that
2344          * the SN values will stay consistent with other PDU's we are
2345          * transmitting like R2T and SCSI status.
2346          */
2347         bzero(&idb->idb_data_hdr_tmpl, sizeof (iscsi_hdr_t));
2348         tmppdu.isp_hdr = &idb->idb_data_hdr_tmpl;
2349         (*idt->idt_ic->ic_conn_ops.icb_build_hdr)(idt, &tmppdu,
2350             ISCSI_OP_SCSI_DATA_RSP);
2351         idb->idb_tx_thread = B_TRUE;
2352         list_insert_tail(&so_conn->ic_tx_list, (void *)idb);
2353         cv_signal(&so_conn->ic_tx_cv);
2354         mutex_exit(&so_conn->ic_tx_mutex);
2355         mutex_exit(&idt->idt_mutex);
2356 
2357         /*
2358          * Returning success here indicates the transfer was successfully
2359          * dispatched -- it does not mean that the transfer completed
2360          * successfully.
2361          */
2362         return (IDM_STATUS_SUCCESS);
2363 }
2364 
2365 /*
2366  * The idm_so_buf_rx_from_ini() is used by the target iSCSI layer to specify the
2367  * data blocks it is ready to receive from the initiator in response to a WRITE
2368  * SCSI command. The target iSCSI layer passes the information about the desired
2369  * data blocks to the initiator in one R2T PDU. The receiving buffer, the buffer
2370  * offset and datalen are passed via the 'idb' argument.
2371  *
2372  * Scope for Prototype build:
2373  * R2Ts are required for any Data-Out PDU, i.e. initiator and target must have
2374  * negotiated the "InitialR2T" to "Yes".
2375  *
2376  * Caller holds idt->idt_mutex
2377  */
2378 static idm_status_t
2379 idm_so_buf_rx_from_ini(idm_task_t *idt, idm_buf_t *idb)
2380 {
2381         idm_pdu_t               *pdu;
2382         iscsi_rtt_hdr_t         *rtt;
2383 
2384         ASSERT(mutex_owned(&idt->idt_mutex));
2385 
2386         DTRACE_ISCSI_8(xfer__start, idm_conn_t *, idt->idt_ic,
2387             uintptr_t, idb->idb_buf, uint32_t, idb->idb_bufoffset,
2388             uint64_t, 0, uint32_t, 0, uint32_t, 0,
2389             uint32_t, idb->idb_xfer_len, int, XFER_BUF_RX_FROM_INI);
2390 
2391         pdu = kmem_cache_alloc(idm.idm_sotx_pdu_cache, KM_SLEEP);
2392         pdu->isp_ic = idt->idt_ic;
2393         pdu->isp_flags = IDM_PDU_SET_STATSN;
2394         bzero(pdu->isp_hdr, sizeof (iscsi_rtt_hdr_t));
2395 
2396         /* iSCSI layer fills the TTT, ITT, ExpCmdSN, MaxCmdSN */
2397         (*idt->idt_ic->ic_conn_ops.icb_build_hdr)(idt, pdu, ISCSI_OP_RTT_RSP);
2398 
2399         /* set the rttsn, rtt.flags, rtt.data_offset and rtt.data_length */
2400         rtt = (iscsi_rtt_hdr_t *)(pdu->isp_hdr);
2401 
2402         rtt->opcode          = ISCSI_OP_RTT_RSP;
2403         rtt->flags           = ISCSI_FLAG_FINAL;
2404         rtt->data_offset     = htonl(idb->idb_bufoffset);
2405         rtt->data_length     = htonl(idb->idb_xfer_len);
2406         rtt->rttsn           = htonl(idt->idt_exp_rttsn++);
2407 
2408         /* Keep track of buffer offsets */
2409         idb->idb_exp_offset  = idb->idb_bufoffset;
2410         mutex_exit(&idt->idt_mutex);
2411 
2412         /*
2413          * Transmit the PDU.
2414          */
2415         idm_pdu_tx(pdu);
2416 
2417         return (IDM_STATUS_SUCCESS);
2418 }
2419 
2420 static idm_status_t
2421 idm_so_buf_alloc(idm_buf_t *idb, uint64_t buflen)
2422 {
2423         if ((buflen > IDM_SO_BUF_CACHE_LB) && (buflen <= IDM_SO_BUF_CACHE_UB)) {
2424                 idb->idb_buf = kmem_cache_alloc(idm.idm_so_128k_buf_cache,
2425                     KM_NOSLEEP);
2426                 idb->idb_buf_private = idm.idm_so_128k_buf_cache;
2427         } else {
2428                 idb->idb_buf = kmem_alloc(buflen, KM_NOSLEEP);
2429                 idb->idb_buf_private = NULL;
2430         }
2431 
2432         if (idb->idb_buf == NULL) {
2433                 IDM_CONN_LOG(CE_NOTE,
2434                     "idm_so_buf_alloc: failed buffer allocation");
2435                 return (IDM_STATUS_FAIL);
2436         }
2437 
2438         return (IDM_STATUS_SUCCESS);
2439 }
2440 
2441 /* ARGSUSED */
2442 static idm_status_t
2443 idm_so_buf_setup(idm_buf_t *idb)
2444 {
2445         /* Ensure bufalloc'd flag is unset */
2446         idb->idb_bufalloc = B_FALSE;
2447 
2448         return (IDM_STATUS_SUCCESS);
2449 }
2450 
2451 /* ARGSUSED */
2452 static void
2453 idm_so_buf_teardown(idm_buf_t *idb)
2454 {
2455         /* nothing to do here */
2456 }
2457 
2458 static void
2459 idm_so_buf_free(idm_buf_t *idb)
2460 {
2461         if (idb->idb_buf_private == NULL) {
2462                 kmem_free(idb->idb_buf, idb->idb_buflen);
2463         } else {
2464                 kmem_cache_free(idb->idb_buf_private, idb->idb_buf);
2465         }
2466 }
2467 
2468 static void
2469 idm_so_send_rtt_data(idm_conn_t *ic, idm_task_t *idt, idm_buf_t *idb,
2470     uint32_t offset, uint32_t length)
2471 {
2472         idm_so_conn_t   *so_conn = ic->ic_transport_private;
2473         idm_pdu_t       tmppdu;
2474         idm_buf_t       *rtt_buf;
2475 
2476         ASSERT(mutex_owned(&idt->idt_mutex));
2477 
2478         /*
2479          * Allocate a buffer to represent the RTT transfer.  We could further
2480          * optimize this by allocating the buffers internally from an rtt
2481          * specific buffer cache since this is socket-specific code but for
2482          * now we will keep it simple.
2483          */
2484         rtt_buf = idm_buf_alloc(ic, (uint8_t *)idb->idb_buf + offset, length);
2485         if (rtt_buf == NULL) {
2486                 /*
2487                  * If we're in FFP then the failure was likely a resource
2488                  * allocation issue and we should close the connection by
2489                  * sending a CE_TRANSPORT_FAIL event.
2490                  *
2491                  * If we're not in FFP then idm_buf_alloc will always
2492                  * fail and the state is transitioning to "complete" anyway
2493                  * so we won't bother to send an event.
2494                  */
2495                 mutex_enter(&ic->ic_state_mutex);
2496                 if (ic->ic_ffp)
2497                         idm_conn_event_locked(ic, CE_TRANSPORT_FAIL,
2498                             NULL, CT_NONE);
2499                 mutex_exit(&ic->ic_state_mutex);
2500                 mutex_exit(&idt->idt_mutex);
2501                 return;
2502         }
2503 
2504         rtt_buf->idb_buf_cb = NULL;
2505         rtt_buf->idb_cb_arg = NULL;
2506         rtt_buf->idb_bufoffset = offset;
2507         rtt_buf->idb_xfer_len = length;
2508         rtt_buf->idb_ic = idt->idt_ic;
2509         rtt_buf->idb_task_binding = idt;
2510 
2511         /*
2512          * The new buffer (if any) represents an additional
2513          * reference on the task
2514          */
2515         idm_task_hold(idt);
2516         mutex_exit(&idt->idt_mutex);
2517 
2518         /*
2519          * Put the idm_buf_t on the tx queue.  It will be transmitted by
2520          * idm_sotx_thread.
2521          */
2522         mutex_enter(&so_conn->ic_tx_mutex);
2523 
2524         if (!so_conn->ic_tx_thread_running) {
2525                 idm_buf_free(rtt_buf);
2526                 mutex_exit(&so_conn->ic_tx_mutex);
2527                 idm_task_rele(idt);
2528                 return;
2529         }
2530 
2531         /*
2532          * Build a template for the data PDU headers we will use so that
2533          * the SN values will stay consistent with other PDU's we are
2534          * transmitting like R2T and SCSI status.
2535          */
2536         bzero(&rtt_buf->idb_data_hdr_tmpl, sizeof (iscsi_hdr_t));
2537         tmppdu.isp_hdr = &rtt_buf->idb_data_hdr_tmpl;
2538         (*idt->idt_ic->ic_conn_ops.icb_build_hdr)(idt, &tmppdu,
2539             ISCSI_OP_SCSI_DATA);
2540         rtt_buf->idb_tx_thread = B_TRUE;
2541         rtt_buf->idb_in_transport = B_TRUE;
2542         list_insert_tail(&so_conn->ic_tx_list, (void *)rtt_buf);
2543         cv_signal(&so_conn->ic_tx_cv);
2544         mutex_exit(&so_conn->ic_tx_mutex);
2545 }
2546 
2547 static void
2548 idm_so_send_rtt_data_done(idm_task_t *idt, idm_buf_t *idb)
2549 {
2550         /*
2551          * Don't worry about status -- we assume any error handling
2552          * is performed by the caller (idm_sotx_thread).
2553          */
2554         idb->idb_in_transport = B_FALSE;
2555         idm_task_rele(idt);
2556         idm_buf_free(idb);
2557 }
2558 
2559 static idm_status_t
2560 idm_so_send_buf_region(idm_task_t *idt, idm_buf_t *idb,
2561     uint32_t buf_region_offset, uint32_t buf_region_length)
2562 {
2563         idm_conn_t              *ic;
2564         uint32_t                max_dataseglen;
2565         size_t                  remainder, chunk;
2566         uint32_t                data_offset = buf_region_offset;
2567         iscsi_data_hdr_t        *bhs;
2568         idm_pdu_t               *pdu;
2569         idm_status_t            tx_status;
2570 
2571         ASSERT(mutex_owned(&idt->idt_mutex));
2572 
2573         ic = idt->idt_ic;
2574 
2575         max_dataseglen = ic->ic_conn_params.max_xmit_dataseglen;
2576         remainder = buf_region_length;
2577 
2578         while (remainder) {
2579                 if (idt->idt_state != TASK_ACTIVE) {
2580                         ASSERT((idt->idt_state != TASK_IDLE) &&
2581                             (idt->idt_state != TASK_COMPLETE));
2582                         return (IDM_STATUS_ABORTED);
2583                 }
2584 
2585                 /* check to see if we need to chunk the data */
2586                 if (remainder > max_dataseglen) {
2587                         chunk = max_dataseglen;
2588                 } else {
2589                         chunk = remainder;
2590                 }
2591 
2592                 /* Data PDU headers will always be sizeof (iscsi_hdr_t) */
2593                 pdu = kmem_cache_alloc(idm.idm_sotx_pdu_cache, KM_SLEEP);
2594                 pdu->isp_ic = ic;
2595                 pdu->isp_flags = 0;  /* initialize isp_flags */
2596 
2597                 /*
2598                  * We've already built a build a header template
2599                  * to use during the transfer.  Use this template so that
2600                  * the SN values stay consistent with any unrelated PDU's
2601                  * being transmitted.
2602                  */
2603                 bcopy(&idb->idb_data_hdr_tmpl, pdu->isp_hdr,
2604                     sizeof (iscsi_hdr_t));
2605 
2606                 /*
2607                  * Set DataSN, data offset, and flags in BHS
2608                  * For the prototype build, A = 0, S = 0, U = 0
2609                  */
2610                 bhs = (iscsi_data_hdr_t *)(pdu->isp_hdr);
2611 
2612                 bhs->datasn          = htonl(idt->idt_exp_datasn++);
2613 
2614                 hton24(bhs->dlength, chunk);
2615                 bhs->offset = htonl(idb->idb_bufoffset + data_offset);
2616 
2617                 /* setup data */
2618                 pdu->isp_data        =  (uint8_t *)idb->idb_buf + data_offset;
2619                 pdu->isp_datalen = (uint_t)chunk;
2620 
2621                 if (chunk == remainder) {
2622                         bhs->flags = ISCSI_FLAG_FINAL; /* F bit set to 1 */
2623                         /* Piggyback the status with the last data PDU */
2624                         if (idt->idt_flags & IDM_TASK_PHASECOLLAPSE_REQ) {
2625                                 pdu->isp_flags |= IDM_PDU_SET_STATSN |
2626                                     IDM_PDU_ADVANCE_STATSN;
2627                                 (*idt->idt_ic->ic_conn_ops.icb_update_statsn)
2628                                     (idt, pdu);
2629                                 idt->idt_flags |=
2630                                     IDM_TASK_PHASECOLLAPSE_SUCCESS;
2631 
2632                         }
2633                 }
2634 
2635                 remainder       -= chunk;
2636                 data_offset     += chunk;
2637 
2638                 /* Instrument the data-send DTrace probe. */
2639                 if (IDM_PDU_OPCODE(pdu) == ISCSI_OP_SCSI_DATA_RSP) {
2640                         DTRACE_ISCSI_2(data__send,
2641                             idm_conn_t *, idt->idt_ic,
2642                             iscsi_data_rsp_hdr_t *,
2643                             (iscsi_data_rsp_hdr_t *)pdu->isp_hdr);
2644                 }
2645 
2646                 /*
2647                  * Now that we're done working with idt_exp_datasn,
2648                  * idt->idt_state and idb->idb_bufoffset we can release
2649                  * the task lock -- don't want to hold it across the
2650                  * call to idm_i_so_tx since we could block.
2651                  */
2652                 mutex_exit(&idt->idt_mutex);
2653 
2654                 /*
2655                  * Transmit the PDU.  Call the internal routine directly
2656                  * as there is already implicit ordering.
2657                  */
2658                 if ((tx_status = idm_i_so_tx(pdu)) != IDM_STATUS_SUCCESS) {
2659                         mutex_enter(&idt->idt_mutex);
2660                         return (tx_status);
2661                 }
2662 
2663                 mutex_enter(&idt->idt_mutex);
2664                 idt->idt_tx_bytes += chunk;
2665         }
2666 
2667         return (IDM_STATUS_SUCCESS);
2668 }
2669 
2670 /*
2671  * TX PDU cache
2672  */
2673 /* ARGSUSED */
2674 int
2675 idm_sotx_pdu_constructor(void *hdl, void *arg, int flags)
2676 {
2677         idm_pdu_t       *pdu = hdl;
2678 
2679         bzero(pdu, sizeof (idm_pdu_t));
2680         pdu->isp_hdr = (iscsi_hdr_t *)(pdu + 1); /* Ptr arithmetic */
2681         pdu->isp_hdrlen = sizeof (iscsi_hdr_t);
2682         pdu->isp_callback = idm_sotx_cache_pdu_cb;
2683         pdu->isp_magic = IDM_PDU_MAGIC;
2684         bzero(pdu->isp_hdr, sizeof (iscsi_hdr_t));
2685 
2686         return (0);
2687 }
2688 
2689 /* ARGSUSED */
2690 void
2691 idm_sotx_cache_pdu_cb(idm_pdu_t *pdu, idm_status_t status)
2692 {
2693         /* reset values between use */
2694         pdu->isp_datalen = 0;
2695 
2696         kmem_cache_free(idm.idm_sotx_pdu_cache, pdu);
2697 }
2698 
2699 /*
2700  * RX PDU cache
2701  */
2702 /* ARGSUSED */
2703 int
2704 idm_sorx_pdu_constructor(void *hdl, void *arg, int flags)
2705 {
2706         idm_pdu_t       *pdu = hdl;
2707 
2708         bzero(pdu, sizeof (idm_pdu_t));
2709         pdu->isp_magic = IDM_PDU_MAGIC;
2710         pdu->isp_hdr = (iscsi_hdr_t *)(pdu + 1); /* Ptr arithmetic */
2711         pdu->isp_callback = idm_sorx_cache_pdu_cb;
2712 
2713         return (0);
2714 }
2715 
2716 /* ARGSUSED */
2717 static void
2718 idm_sorx_cache_pdu_cb(idm_pdu_t *pdu, idm_status_t status)
2719 {
2720         pdu->isp_iovlen = 0;
2721         pdu->isp_sorx_buf = 0;
2722         kmem_cache_free(idm.idm_sorx_pdu_cache, pdu);
2723 }
2724 
2725 static void
2726 idm_sorx_addl_pdu_cb(idm_pdu_t *pdu, idm_status_t status)
2727 {
2728         /*
2729          * We had to modify our cached RX PDU with a longer header buffer
2730          * and/or a longer data buffer.  Release the new buffers and fix
2731          * the fields back to what we would expect for a cached RX PDU.
2732          */
2733         if (pdu->isp_flags & IDM_PDU_ADDL_HDR) {
2734                 kmem_free(pdu->isp_hdr, pdu->isp_hdrlen);
2735         }
2736         if (pdu->isp_flags & IDM_PDU_ADDL_DATA) {
2737                 kmem_free(pdu->isp_data, pdu->isp_datalen);
2738         }
2739         pdu->isp_hdr = (iscsi_hdr_t *)(pdu + 1);
2740         pdu->isp_hdrlen = sizeof (iscsi_hdr_t);
2741         pdu->isp_data = NULL;
2742         pdu->isp_datalen = 0;
2743         pdu->isp_sorx_buf = 0;
2744         pdu->isp_callback = idm_sorx_cache_pdu_cb;
2745         idm_sorx_cache_pdu_cb(pdu, status);
2746 }
2747 
2748 /*
2749  * This thread is only active when I/O is queued for transmit
2750  * because the socket is busy.
2751  */
2752 void
2753 idm_sotx_thread(void *arg)
2754 {
2755         idm_conn_t      *ic = arg;
2756         idm_tx_obj_t    *object, *next;
2757         idm_so_conn_t   *so_conn;
2758         idm_status_t    status = IDM_STATUS_SUCCESS;
2759 
2760         idm_conn_hold(ic);
2761 
2762         mutex_enter(&ic->ic_mutex);
2763         so_conn = ic->ic_transport_private;
2764         so_conn->ic_tx_thread_running = B_TRUE;
2765         so_conn->ic_tx_thread_did = so_conn->ic_tx_thread->t_did;
2766         cv_signal(&ic->ic_cv);
2767         mutex_exit(&ic->ic_mutex);
2768 
2769         mutex_enter(&so_conn->ic_tx_mutex);
2770 
2771         while (so_conn->ic_tx_thread_running) {
2772                 while (list_is_empty(&so_conn->ic_tx_list)) {
2773                         DTRACE_PROBE1(soconn__tx__sleep, idm_conn_t *, ic);
2774                         cv_wait(&so_conn->ic_tx_cv, &so_conn->ic_tx_mutex);
2775                         DTRACE_PROBE1(soconn__tx__wakeup, idm_conn_t *, ic);
2776 
2777                         if (!so_conn->ic_tx_thread_running) {
2778                                 goto tx_bail;
2779                         }
2780                 }
2781 
2782                 object = (idm_tx_obj_t *)list_head(&so_conn->ic_tx_list);
2783                 list_remove(&so_conn->ic_tx_list, object);
2784                 mutex_exit(&so_conn->ic_tx_mutex);
2785 
2786                 switch (object->idm_tx_obj_magic) {
2787                 case IDM_PDU_MAGIC: {
2788                         idm_pdu_t *pdu = (idm_pdu_t *)object;
2789                         DTRACE_PROBE2(soconn__tx__pdu, idm_conn_t *, ic,
2790                             idm_pdu_t *, (idm_pdu_t *)object);
2791 
2792                         if (pdu->isp_flags & IDM_PDU_SET_STATSN) {
2793                                 /* No IDM task */
2794                                 (ic->ic_conn_ops.icb_update_statsn)(NULL, pdu);
2795                         }
2796                         status = idm_i_so_tx((idm_pdu_t *)object);
2797                         break;
2798                 }
2799                 case IDM_BUF_MAGIC: {
2800                         idm_buf_t *idb = (idm_buf_t *)object;
2801                         idm_task_t *idt = idb->idb_task_binding;
2802 
2803                         DTRACE_PROBE2(soconn__tx__buf, idm_conn_t *, ic,
2804                             idm_buf_t *, idb);
2805 
2806                         mutex_enter(&idt->idt_mutex);
2807                         status = idm_so_send_buf_region(idt,
2808                             idb, 0, idb->idb_xfer_len);
2809 
2810                         /*
2811                          * TX thread owns the buffer so we expect it to
2812                          * be "in transport"
2813                          */
2814                         ASSERT(idb->idb_in_transport);
2815                         if (IDM_CONN_ISTGT(ic)) {
2816                                 /*
2817                                  * idm_buf_tx_to_ini_done releases
2818                                  * idt->idt_mutex
2819                                  */
2820                                 DTRACE_ISCSI_8(xfer__done,
2821                                     idm_conn_t *, idt->idt_ic,
2822                                     uintptr_t, idb->idb_buf,
2823                                     uint32_t, idb->idb_bufoffset,
2824                                     uint64_t, 0, uint32_t, 0, uint32_t, 0,
2825                                     uint32_t, idb->idb_xfer_len,
2826                                     int, XFER_BUF_TX_TO_INI);
2827                                 idm_buf_tx_to_ini_done(idt, idb, status);
2828                         } else {
2829                                 idm_so_send_rtt_data_done(idt, idb);
2830                                 mutex_exit(&idt->idt_mutex);
2831                         }
2832                         break;
2833                 }
2834 
2835                 default:
2836                         IDM_CONN_LOG(CE_WARN, "idm_sotx_thread: Unknown magic "
2837                             "(0x%08x)", object->idm_tx_obj_magic);
2838                         status = IDM_STATUS_FAIL;
2839                 }
2840 
2841                 mutex_enter(&so_conn->ic_tx_mutex);
2842 
2843                 if (status != IDM_STATUS_SUCCESS) {
2844                         so_conn->ic_tx_thread_running = B_FALSE;
2845                         idm_conn_event(ic, CE_TRANSPORT_FAIL, status);
2846                 }
2847         }
2848 
2849         /*
2850          * Before we leave, we need to abort every item remaining in the
2851          * TX list.
2852          */
2853 
2854 tx_bail:
2855         object = (idm_tx_obj_t *)list_head(&so_conn->ic_tx_list);
2856 
2857         while (object != NULL) {
2858                 next = list_next(&so_conn->ic_tx_list, object);
2859 
2860                 list_remove(&so_conn->ic_tx_list, object);
2861                 switch (object->idm_tx_obj_magic) {
2862                 case IDM_PDU_MAGIC:
2863                         idm_pdu_complete((idm_pdu_t *)object,
2864                             IDM_STATUS_ABORTED);
2865                         break;
2866 
2867                 case IDM_BUF_MAGIC: {
2868                         idm_buf_t *idb = (idm_buf_t *)object;
2869                         idm_task_t *idt = idb->idb_task_binding;
2870                         mutex_exit(&so_conn->ic_tx_mutex);
2871                         mutex_enter(&idt->idt_mutex);
2872                         /*
2873                          * TX thread owns the buffer so we expect it to
2874                          * be "in transport"
2875                          */
2876                         ASSERT(idb->idb_in_transport);
2877                         if (IDM_CONN_ISTGT(ic)) {
2878                                 /*
2879                                  * idm_buf_tx_to_ini_done releases
2880                                  * idt->idt_mutex
2881                                  */
2882                                 DTRACE_ISCSI_8(xfer__done,
2883                                     idm_conn_t *, idt->idt_ic,
2884                                     uintptr_t, idb->idb_buf,
2885                                     uint32_t, idb->idb_bufoffset,
2886                                     uint64_t, 0, uint32_t, 0, uint32_t, 0,
2887                                     uint32_t, idb->idb_xfer_len,
2888                                     int, XFER_BUF_TX_TO_INI);
2889                                 idm_buf_tx_to_ini_done(idt, idb,
2890                                     IDM_STATUS_ABORTED);
2891                         } else {
2892                                 idm_so_send_rtt_data_done(idt, idb);
2893                                 mutex_exit(&idt->idt_mutex);
2894                         }
2895                         mutex_enter(&so_conn->ic_tx_mutex);
2896                         break;
2897                 }
2898                 default:
2899                         IDM_CONN_LOG(CE_WARN,
2900                             "idm_sotx_thread: Unexpected magic "
2901                             "(0x%08x)", object->idm_tx_obj_magic);
2902                 }
2903 
2904                 object = next;
2905         }
2906 
2907         mutex_exit(&so_conn->ic_tx_mutex);
2908         idm_conn_rele(ic);
2909         thread_exit();
2910         /*NOTREACHED*/
2911 }
2912 
2913 static void
2914 idm_so_socket_set_nonblock(struct sonode *node)
2915 {
2916         (void) VOP_SETFL(node->so_vnode, node->so_flag,
2917             (node->so_state | FNONBLOCK), CRED(), NULL);
2918 }
2919 
2920 static void
2921 idm_so_socket_set_block(struct sonode *node)
2922 {
2923         (void) VOP_SETFL(node->so_vnode, node->so_flag,
2924             (node->so_state & (~FNONBLOCK)), CRED(), NULL);
2925 }
2926 
2927 
2928 /*
2929  * Called by kernel sockets when the connection has been accepted or
2930  * rejected. In early volo, a "disconnect" callback was sent instead of
2931  * "connectfailed", so we check for both.
2932  */
2933 /* ARGSUSED */
2934 void
2935 idm_so_timed_socket_connect_cb(ksocket_t ks,
2936     ksocket_callback_event_t ev, void *arg, uintptr_t info)
2937 {
2938         idm_so_timed_socket_t   *itp = arg;
2939         ASSERT(itp != NULL);
2940         ASSERT(ev == KSOCKET_EV_CONNECTED ||
2941             ev == KSOCKET_EV_CONNECTFAILED ||
2942             ev == KSOCKET_EV_DISCONNECTED);
2943 
2944         mutex_enter(&idm_so_timed_socket_mutex);
2945         itp->it_callback_called = B_TRUE;
2946         if (ev == KSOCKET_EV_CONNECTED) {
2947                 itp->it_socket_error_code = 0;
2948         } else {
2949                 /* Make sure the error code is non-zero on error */
2950                 if (info == 0)
2951                         info = ECONNRESET;
2952                 itp->it_socket_error_code = (int)info;
2953         }
2954         cv_signal(&itp->it_cv);
2955         mutex_exit(&idm_so_timed_socket_mutex);
2956 }
2957 
2958 int
2959 idm_so_timed_socket_connect(ksocket_t ks,
2960     struct sockaddr_storage *sa, int sa_sz, int login_max_usec)
2961 {
2962         clock_t                 conn_login_max;
2963         int                     rc, nonblocking, rval;
2964         idm_so_timed_socket_t   it;
2965         ksocket_callbacks_t     ks_cb;
2966 
2967         conn_login_max = ddi_get_lbolt() + drv_usectohz(login_max_usec);
2968 
2969         /*
2970          * Set to non-block socket mode, with callback on connect
2971          * Early volo used "disconnected" instead of "connectfailed",
2972          * so set callback to look for both.
2973          */
2974         bzero(&it, sizeof (it));
2975         ks_cb.ksock_cb_flags = KSOCKET_CB_CONNECTED |
2976             KSOCKET_CB_CONNECTFAILED | KSOCKET_CB_DISCONNECTED;
2977         ks_cb.ksock_cb_connected = idm_so_timed_socket_connect_cb;
2978         ks_cb.ksock_cb_connectfailed = idm_so_timed_socket_connect_cb;
2979         ks_cb.ksock_cb_disconnected = idm_so_timed_socket_connect_cb;
2980         cv_init(&it.it_cv, NULL, CV_DEFAULT, NULL);
2981         rc = ksocket_setcallbacks(ks, &ks_cb, &it, CRED());
2982         if (rc != 0)
2983                 return (rc);
2984 
2985         /* Set to non-blocking mode */
2986         nonblocking = 1;
2987         rc = ksocket_ioctl(ks, FIONBIO, (intptr_t)&nonblocking, &rval,
2988             CRED());
2989         if (rc != 0)
2990                 goto cleanup;
2991 
2992         bzero(&it, sizeof (it));
2993         for (;;) {
2994                 /*
2995                  * Warning -- in a loopback scenario, the call to
2996                  * the connect_cb can occur inside the call to
2997                  * ksocket_connect. Do not hold the mutex around the
2998                  * call to ksocket_connect.
2999                  */
3000                 rc = ksocket_connect(ks, (struct sockaddr *)sa, sa_sz, CRED());
3001                 if (rc == 0 || rc == EISCONN) {
3002                         /* socket success or already success */
3003                         rc = 0;
3004                         break;
3005                 }
3006                 if ((rc != EINPROGRESS) && (rc != EALREADY)) {
3007                         break;
3008                 }
3009 
3010                 /* TCP connect still in progress. See if out of time. */
3011                 if (ddi_get_lbolt() > conn_login_max) {
3012                         /*
3013                          * Connection retry timeout,
3014                          * failed connect to target.
3015                          */
3016                         rc = ETIMEDOUT;
3017                         break;
3018                 }
3019 
3020                 /*
3021                  * TCP connect still in progress.  Sleep until callback.
3022                  * Do NOT go to sleep if the callback already occurred!
3023                  */
3024                 mutex_enter(&idm_so_timed_socket_mutex);
3025                 if (!it.it_callback_called) {
3026                         (void) cv_timedwait(&it.it_cv,
3027                             &idm_so_timed_socket_mutex, conn_login_max);
3028                 }
3029                 if (it.it_callback_called) {
3030                         rc = it.it_socket_error_code;
3031                         mutex_exit(&idm_so_timed_socket_mutex);
3032                         break;
3033                 }
3034                 /* If timer expires, go call ksocket_connect one last time. */
3035                 mutex_exit(&idm_so_timed_socket_mutex);
3036         }
3037 
3038         /* resume blocking mode */
3039         nonblocking = 0;
3040         (void) ksocket_ioctl(ks, FIONBIO, (intptr_t)&nonblocking, &rval,
3041             CRED());
3042 cleanup:
3043         (void) ksocket_setcallbacks(ks, NULL, NULL, CRED());
3044         cv_destroy(&it.it_cv);
3045         if (rc != 0) {
3046                 idm_soshutdown(ks);
3047         }
3048         return (rc);
3049 }
3050 
3051 
3052 void
3053 idm_addr_to_sa(idm_addr_t *dportal, struct sockaddr_storage *sa)
3054 {
3055         int                     dp_addr_size;
3056         struct sockaddr_in      *sin;
3057         struct sockaddr_in6     *sin6;
3058 
3059         /* Build sockaddr_storage for this portal (idm_addr_t) */
3060         bzero(sa, sizeof (*sa));
3061         dp_addr_size = dportal->a_addr.i_insize;
3062         if (dp_addr_size == sizeof (struct in_addr)) {
3063                 /* IPv4 */
3064                 sa->ss_family = AF_INET;
3065                 sin = (struct sockaddr_in *)sa;
3066                 sin->sin_port = htons(dportal->a_port);
3067                 bcopy(&dportal->a_addr.i_addr.in4,
3068                     &sin->sin_addr, sizeof (struct in_addr));
3069         } else if (dp_addr_size == sizeof (struct in6_addr)) {
3070                 /* IPv6 */
3071                 sa->ss_family = AF_INET6;
3072                 sin6 = (struct sockaddr_in6 *)sa;
3073                 sin6->sin6_port = htons(dportal->a_port);
3074                 bcopy(&dportal->a_addr.i_addr.in6,
3075                     &sin6->sin6_addr, sizeof (struct in6_addr));
3076         } else {
3077                 ASSERT(0);
3078         }
3079 }
3080 
3081 
3082 /*
3083  * return a human-readable form of a sockaddr_storage, in the form
3084  * [ip-address]:port.  This is used in calls to logging functions.
3085  * If several calls to idm_sa_ntop are made within the same invocation
3086  * of a logging function, then each one needs its own buf.
3087  */
3088 const char *
3089 idm_sa_ntop(const struct sockaddr_storage *sa,
3090     char *buf, size_t size)
3091 {
3092         static const char bogus_ip[] = "[0].-1";
3093         char tmp[INET6_ADDRSTRLEN];
3094 
3095         switch (sa->ss_family) {
3096         case AF_INET6:
3097                 {
3098                         const struct sockaddr_in6 *in6 =
3099                             (const struct sockaddr_in6 *) sa;
3100 
3101                         if (inet_ntop(in6->sin6_family,
3102                             &in6->sin6_addr, tmp, sizeof (tmp)) == NULL) {
3103                                 goto err;
3104                         }
3105                         if (strlen(tmp) + sizeof ("[].65535") > size) {
3106                                 goto err;
3107                         }
3108                         /* struct sockaddr_storage gets port info from v4 loc */
3109                         (void) snprintf(buf, size, "[%s].%u", tmp,
3110                             ntohs(in6->sin6_port));
3111                         return (buf);
3112                 }
3113         case AF_INET:
3114                 {
3115                         const struct sockaddr_in *in =
3116                             (const struct sockaddr_in *) sa;
3117 
3118                         if (inet_ntop(in->sin_family, &in->sin_addr,
3119                             tmp, sizeof (tmp)) == NULL) {
3120                                 goto err;
3121                         }
3122                         if (strlen(tmp) + sizeof ("[].65535") > size) {
3123                                 goto err;
3124                         }
3125                         (void) snprintf(buf, size,  "[%s].%u", tmp,
3126                             ntohs(in->sin_port));
3127                         return (buf);
3128                 }
3129         default:
3130                 break;
3131         }
3132 err:
3133         (void) snprintf(buf, size, "%s", bogus_ip);
3134         return (buf);
3135 }