1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  */
  25 /*
  26  * Copyright (c) 2013 by Delphix. All rights reserved.
  27  */
  28 
  29 #include <sys/conf.h>
  30 #include <sys/stat.h>
  31 #include <sys/file.h>
  32 #include <sys/ddi.h>
  33 #include <sys/sunddi.h>
  34 #include <sys/modctl.h>
  35 #include <sys/priv.h>
  36 #include <sys/cpuvar.h>
  37 #include <sys/socket.h>
  38 #include <sys/strsubr.h>
  39 #include <sys/sysmacros.h>
  40 #include <sys/sdt.h>
  41 #include <netinet/tcp.h>
  42 #include <inet/tcp.h>
  43 #include <sys/socketvar.h>
  44 #include <sys/pathname.h>
  45 #include <sys/fs/snode.h>
  46 #include <sys/fs/dv_node.h>
  47 #include <sys/vnode.h>
  48 #include <netinet/in.h>
  49 #include <net/if.h>
  50 #include <sys/sockio.h>
  51 #include <sys/ksocket.h>
  52 #include <sys/filio.h>            /* FIONBIO */
  53 #include <sys/iscsi_protocol.h>
  54 #include <sys/idm/idm.h>
  55 #include <sys/idm/idm_so.h>
  56 #include <sys/idm/idm_text.h>
  57 
  58 #define IN_PROGRESS_DELAY       1
  59 
  60 /*
  61  * in6addr_any is currently all zeroes, but use the macro in case this
  62  * ever changes.
  63  */
  64 static const struct in6_addr in6addr_any = IN6ADDR_ANY_INIT;
  65 
  66 static void idm_sorx_cache_pdu_cb(idm_pdu_t *pdu, idm_status_t status);
  67 static void idm_sorx_addl_pdu_cb(idm_pdu_t *pdu, idm_status_t status);
  68 static void idm_sotx_cache_pdu_cb(idm_pdu_t *pdu, idm_status_t status);
  69 
  70 static idm_status_t idm_so_conn_create_common(idm_conn_t *ic, ksocket_t new_so);
  71 static void idm_so_conn_destroy_common(idm_conn_t *ic);
  72 static void idm_so_conn_connect_common(idm_conn_t *ic);
  73 
  74 static void idm_set_ini_preconnect_options(idm_so_conn_t *sc,
  75     boolean_t boot_conn);
  76 static void idm_set_postconnect_options(ksocket_t so);
  77 static idm_status_t idm_i_so_tx(idm_pdu_t *pdu);
  78 
  79 static idm_status_t idm_sorecvdata(idm_conn_t *ic, idm_pdu_t *pdu);
  80 static void idm_so_send_rtt_data(idm_conn_t *ic, idm_task_t *idt,
  81     idm_buf_t *idb, uint32_t offset, uint32_t length);
  82 static void idm_so_send_rtt_data_done(idm_task_t *idt, idm_buf_t *idb);
  83 static idm_status_t idm_so_send_buf_region(idm_task_t *idt,
  84     idm_buf_t *idb, uint32_t buf_region_offset, uint32_t buf_region_length);
  85 
  86 static uint32_t idm_fill_iov(idm_pdu_t *pdu, idm_buf_t *idb,
  87     uint32_t ro, uint32_t dlength);
  88 
  89 static idm_status_t idm_so_handle_digest(idm_conn_t *it,
  90     nvpair_t *digest_choice, const idm_kv_xlate_t *ikvx);
  91 
  92 static void idm_so_socket_set_nonblock(struct sonode *node);
  93 static void idm_so_socket_set_block(struct sonode *node);
  94 
  95 /*
  96  * Transport ops prototypes
  97  */
  98 static void idm_so_tx(idm_conn_t *ic, idm_pdu_t *pdu);
  99 static idm_status_t idm_so_buf_tx_to_ini(idm_task_t *idt, idm_buf_t *idb);
 100 static idm_status_t idm_so_buf_rx_from_ini(idm_task_t *idt, idm_buf_t *idb);
 101 static void idm_so_rx_datain(idm_conn_t *ic, idm_pdu_t *pdu);
 102 static void idm_so_rx_rtt(idm_conn_t *ic, idm_pdu_t *pdu);
 103 static void idm_so_rx_dataout(idm_conn_t *ic, idm_pdu_t *pdu);
 104 static idm_status_t idm_so_free_task_rsrc(idm_task_t *idt);
 105 static kv_status_t idm_so_negotiate_key_values(idm_conn_t *it,
 106     nvlist_t *request_nvl, nvlist_t *response_nvl, nvlist_t *negotiated_nvl);
 107 static void idm_so_notice_key_values(idm_conn_t *it,
 108     nvlist_t *negotiated_nvl);
 109 static kv_status_t idm_so_declare_key_values(idm_conn_t *it,
 110     nvlist_t *config_nvl, nvlist_t *outgoing_nvl);
 111 static boolean_t idm_so_conn_is_capable(idm_conn_req_t *ic,
 112     idm_transport_caps_t *caps);
 113 static idm_status_t idm_so_buf_alloc(idm_buf_t *idb, uint64_t buflen);
 114 static void idm_so_buf_free(idm_buf_t *idb);
 115 static idm_status_t idm_so_buf_setup(idm_buf_t *idb);
 116 static void idm_so_buf_teardown(idm_buf_t *idb);
 117 static idm_status_t idm_so_tgt_svc_create(idm_svc_req_t *sr, idm_svc_t *is);
 118 static void idm_so_tgt_svc_destroy(idm_svc_t *is);
 119 static idm_status_t idm_so_tgt_svc_online(idm_svc_t *is);
 120 static void idm_so_tgt_svc_offline(idm_svc_t *is);
 121 static void idm_so_tgt_conn_destroy(idm_conn_t *ic);
 122 static idm_status_t idm_so_tgt_conn_connect(idm_conn_t *ic);
 123 static void idm_so_conn_disconnect(idm_conn_t *ic);
 124 static idm_status_t idm_so_ini_conn_create(idm_conn_req_t *cr, idm_conn_t *ic);
 125 static void idm_so_ini_conn_destroy(idm_conn_t *ic);
 126 static idm_status_t idm_so_ini_conn_connect(idm_conn_t *ic);
 127 
 128 /*
 129  * IDM Native Sockets transport operations
 130  */
 131 static
 132 idm_transport_ops_t idm_so_transport_ops = {
 133         idm_so_tx,                      /* it_tx_pdu */
 134         idm_so_buf_tx_to_ini,           /* it_buf_tx_to_ini */
 135         idm_so_buf_rx_from_ini,         /* it_buf_rx_from_ini */
 136         idm_so_rx_datain,               /* it_rx_datain */
 137         idm_so_rx_rtt,                  /* it_rx_rtt */
 138         idm_so_rx_dataout,              /* it_rx_dataout */
 139         NULL,                           /* it_alloc_conn_rsrc */
 140         NULL,                           /* it_free_conn_rsrc */
 141         NULL,                           /* it_tgt_enable_datamover */
 142         NULL,                           /* it_ini_enable_datamover */
 143         NULL,                           /* it_conn_terminate */
 144         idm_so_free_task_rsrc,          /* it_free_task_rsrc */
 145         idm_so_negotiate_key_values,    /* it_negotiate_key_values */
 146         idm_so_notice_key_values,       /* it_notice_key_values */
 147         idm_so_conn_is_capable,         /* it_conn_is_capable */
 148         idm_so_buf_alloc,               /* it_buf_alloc */
 149         idm_so_buf_free,                /* it_buf_free */
 150         idm_so_buf_setup,               /* it_buf_setup */
 151         idm_so_buf_teardown,            /* it_buf_teardown */
 152         idm_so_tgt_svc_create,          /* it_tgt_svc_create */
 153         idm_so_tgt_svc_destroy,         /* it_tgt_svc_destroy */
 154         idm_so_tgt_svc_online,          /* it_tgt_svc_online */
 155         idm_so_tgt_svc_offline,         /* it_tgt_svc_offline */
 156         idm_so_tgt_conn_destroy,        /* it_tgt_conn_destroy */
 157         idm_so_tgt_conn_connect,        /* it_tgt_conn_connect */
 158         idm_so_conn_disconnect,         /* it_tgt_conn_disconnect */
 159         idm_so_ini_conn_create,         /* it_ini_conn_create */
 160         idm_so_ini_conn_destroy,        /* it_ini_conn_destroy */
 161         idm_so_ini_conn_connect,        /* it_ini_conn_connect */
 162         idm_so_conn_disconnect,         /* it_ini_conn_disconnect */
 163         idm_so_declare_key_values       /* it_declare_key_values */
 164 };
 165 
 166 kmutex_t        idm_so_timed_socket_mutex;
 167 
 168 int32_t idm_so_sndbuf = IDM_SNDBUF_SIZE;
 169 int32_t idm_so_rcvbuf = IDM_RCVBUF_SIZE;
 170 
 171 /*
 172  * idm_so_init()
 173  * Sockets transport initialization
 174  */
 175 void
 176 idm_so_init(idm_transport_t *it)
 177 {
 178         /* Cache for IDM Data and R2T Transmit PDU's */
 179         idm.idm_sotx_pdu_cache = kmem_cache_create("idm_tx_pdu_cache",
 180             sizeof (idm_pdu_t) + sizeof (iscsi_hdr_t), 8,
 181             &idm_sotx_pdu_constructor, NULL, NULL, NULL, NULL, KM_SLEEP);
 182 
 183         /* Cache for IDM Receive PDU's */
 184         idm.idm_sorx_pdu_cache = kmem_cache_create("idm_rx_pdu_cache",
 185             sizeof (idm_pdu_t) + IDM_SORX_CACHE_HDRLEN, 8,
 186             &idm_sorx_pdu_constructor, NULL, NULL, NULL, NULL, KM_SLEEP);
 187 
 188         /* 128k buffer cache */
 189         idm.idm_so_128k_buf_cache = kmem_cache_create("idm_128k_buf_cache",
 190             IDM_SO_BUF_CACHE_UB, 8, NULL, NULL, NULL, NULL, NULL, KM_SLEEP);
 191 
 192         /* Set the sockets transport ops */
 193         it->it_ops = &idm_so_transport_ops;
 194 
 195         mutex_init(&idm_so_timed_socket_mutex, NULL, MUTEX_DEFAULT, NULL);
 196 
 197 }
 198 
 199 /*
 200  * idm_so_fini()
 201  * Sockets transport teardown
 202  */
 203 void
 204 idm_so_fini(void)
 205 {
 206         kmem_cache_destroy(idm.idm_so_128k_buf_cache);
 207         kmem_cache_destroy(idm.idm_sotx_pdu_cache);
 208         kmem_cache_destroy(idm.idm_sorx_pdu_cache);
 209         mutex_destroy(&idm_so_timed_socket_mutex);
 210 }
 211 
 212 ksocket_t
 213 idm_socreate(int domain, int type, int protocol)
 214 {
 215         ksocket_t ks;
 216 
 217         if (!ksocket_socket(&ks, domain, type, protocol, KSOCKET_NOSLEEP,
 218             CRED())) {
 219                 return (ks);
 220         } else {
 221                 return (NULL);
 222         }
 223 }
 224 
 225 /*
 226  * idm_soshutdown will disconnect the socket and prevent subsequent PDU
 227  * reception and transmission.  The sonode still exists but its state
 228  * gets modified to indicate it is no longer connected.  Calls to
 229  * idm_sorecv/idm_iov_sorecv will return so idm_soshutdown can be used
 230  * regain control of a thread stuck in idm_sorecv.
 231  */
 232 void
 233 idm_soshutdown(ksocket_t so)
 234 {
 235         (void) ksocket_shutdown(so, SHUT_RDWR, CRED());
 236 }
 237 
 238 /*
 239  * idm_sodestroy releases all resources associated with a socket previously
 240  * created with idm_socreate.  The socket must be shutdown using
 241  * idm_soshutdown before the socket is destroyed with idm_sodestroy,
 242  * otherwise undefined behavior will result.
 243  */
 244 void
 245 idm_sodestroy(ksocket_t ks)
 246 {
 247         (void) ksocket_close(ks, CRED());
 248 }
 249 
 250 /*
 251  * Function to compare two addresses in sockaddr_storage format
 252  */
 253 
 254 int
 255 idm_ss_compare(const struct sockaddr_storage *cmp_ss1,
 256     const struct sockaddr_storage *cmp_ss2,
 257     boolean_t v4_mapped_as_v4,
 258     boolean_t compare_ports)
 259 {
 260         struct sockaddr_storage                 mapped_v4_ss1, mapped_v4_ss2;
 261         const struct sockaddr_storage           *ss1, *ss2;
 262         struct in_addr                          *in1, *in2;
 263         struct in6_addr                         *in61, *in62;
 264         int i;
 265 
 266         /*
 267          * Normalize V4-mapped IPv6 addresses into V4 format if
 268          * v4_mapped_as_v4 is B_TRUE.
 269          */
 270         ss1 = cmp_ss1;
 271         ss2 = cmp_ss2;
 272         if (v4_mapped_as_v4 && (ss1->ss_family == AF_INET6)) {
 273                 in61 = &((struct sockaddr_in6 *)ss1)->sin6_addr;
 274                 if (IN6_IS_ADDR_V4MAPPED(in61)) {
 275                         bzero(&mapped_v4_ss1, sizeof (mapped_v4_ss1));
 276                         mapped_v4_ss1.ss_family = AF_INET;
 277                         ((struct sockaddr_in *)&mapped_v4_ss1)->sin_port =
 278                             ((struct sockaddr_in *)ss1)->sin_port;
 279                         IN6_V4MAPPED_TO_INADDR(in61,
 280                             &((struct sockaddr_in *)&mapped_v4_ss1)->sin_addr);
 281                         ss1 = &mapped_v4_ss1;
 282                 }
 283         }
 284         ss2 = cmp_ss2;
 285         if (v4_mapped_as_v4 && (ss2->ss_family == AF_INET6)) {
 286                 in62 = &((struct sockaddr_in6 *)ss2)->sin6_addr;
 287                 if (IN6_IS_ADDR_V4MAPPED(in62)) {
 288                         bzero(&mapped_v4_ss2, sizeof (mapped_v4_ss2));
 289                         mapped_v4_ss2.ss_family = AF_INET;
 290                         ((struct sockaddr_in *)&mapped_v4_ss2)->sin_port =
 291                             ((struct sockaddr_in *)ss2)->sin_port;
 292                         IN6_V4MAPPED_TO_INADDR(in62,
 293                             &((struct sockaddr_in *)&mapped_v4_ss2)->sin_addr);
 294                         ss2 = &mapped_v4_ss2;
 295                 }
 296         }
 297 
 298         /*
 299          * Compare ports, then address family, then ip address
 300          */
 301         if (compare_ports &&
 302             (((struct sockaddr_in *)ss1)->sin_port !=
 303             ((struct sockaddr_in *)ss2)->sin_port)) {
 304                 if (((struct sockaddr_in *)ss1)->sin_port >
 305                     ((struct sockaddr_in *)ss2)->sin_port)
 306                         return (1);
 307                 else
 308                         return (-1);
 309         }
 310 
 311         /*
 312          * ports are the same
 313          */
 314         if (ss1->ss_family != ss2->ss_family) {
 315                 if (ss1->ss_family == AF_INET)
 316                         return (1);
 317                 else
 318                         return (-1);
 319         }
 320 
 321         /*
 322          * address families are the same
 323          */
 324         if (ss1->ss_family == AF_INET) {
 325                 in1 = &((struct sockaddr_in *)ss1)->sin_addr;
 326                 in2 = &((struct sockaddr_in *)ss2)->sin_addr;
 327 
 328                 if (in1->s_addr > in2->s_addr)
 329                         return (1);
 330                 else if (in1->s_addr < in2->s_addr)
 331                         return (-1);
 332                 else
 333                         return (0);
 334         } else if (ss1->ss_family == AF_INET6) {
 335                 in61 = &((struct sockaddr_in6 *)ss1)->sin6_addr;
 336                 in62 = &((struct sockaddr_in6 *)ss2)->sin6_addr;
 337 
 338                 for (i = 0; i < 4; i++) {
 339                         if (in61->s6_addr32[i] > in62->s6_addr32[i])
 340                                 return (1);
 341                         else if (in61->s6_addr32[i] < in62->s6_addr32[i])
 342                                 return (-1);
 343                 }
 344                 return (0);
 345         }
 346 
 347         return (1);
 348 }
 349 
 350 /*
 351  * IP address filter functions to flag addresses that should not
 352  * go out to initiators through discovery.
 353  */
 354 static boolean_t
 355 idm_v4_addr_okay(struct in_addr *in_addr)
 356 {
 357         in_addr_t addr = ntohl(in_addr->s_addr);
 358 
 359         if ((INADDR_NONE == addr) ||
 360             (IN_MULTICAST(addr)) ||
 361             ((addr >> IN_CLASSA_NSHIFT) == 0) ||
 362             ((addr >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET)) {
 363                 return (B_FALSE);
 364         }
 365         return (B_TRUE);
 366 }
 367 
 368 static boolean_t
 369 idm_v6_addr_okay(struct in6_addr *addr6)
 370 {
 371 
 372         if ((IN6_IS_ADDR_UNSPECIFIED(addr6)) ||
 373             (IN6_IS_ADDR_LOOPBACK(addr6)) ||
 374             (IN6_IS_ADDR_MULTICAST(addr6)) ||
 375             (IN6_IS_ADDR_V4MAPPED(addr6)) ||
 376             (IN6_IS_ADDR_V4COMPAT(addr6)) ||
 377             (IN6_IS_ADDR_LINKLOCAL(addr6))) {
 378                 return (B_FALSE);
 379         }
 380         return (B_TRUE);
 381 }
 382 
 383 /*
 384  * idm_get_ipaddr will retrieve a list of IP Addresses which the host is
 385  * configured with by sending down a sequence of kernel ioctl to IP STREAMS.
 386  */
 387 int
 388 idm_get_ipaddr(idm_addr_list_t **ipaddr_p)
 389 {
 390         ksocket_t               so4, so6;
 391         struct lifnum           lifn;
 392         struct lifconf          lifc;
 393         struct lifreq           *lp;
 394         int                     rval;
 395         int                     numifs;
 396         int                     bufsize;
 397         void                    *buf;
 398         int                     i, j, n, rc;
 399         struct sockaddr_storage ss;
 400         struct sockaddr_in      *sin;
 401         struct sockaddr_in6     *sin6;
 402         idm_addr_t              *ip;
 403         idm_addr_list_t         *ipaddr = NULL;
 404         int                     size_ipaddr;
 405 
 406         *ipaddr_p = NULL;
 407         size_ipaddr = 0;
 408         buf = NULL;
 409 
 410         /* create an ipv4 and ipv6 UDP socket */
 411         if ((so6 = idm_socreate(PF_INET6, SOCK_DGRAM, 0)) == NULL)
 412                 return (0);
 413         if ((so4 = idm_socreate(PF_INET, SOCK_DGRAM, 0)) == NULL) {
 414                 idm_sodestroy(so6);
 415                 return (0);
 416         }
 417 
 418 
 419 retry_count:
 420         /* snapshot the current number of interfaces */
 421         lifn.lifn_family = PF_UNSPEC;
 422         lifn.lifn_flags = LIFC_NOXMIT | LIFC_TEMPORARY | LIFC_ALLZONES;
 423         lifn.lifn_count = 0;
 424         /* use vp6 for ioctls with unspecified families by default */
 425         if (ksocket_ioctl(so6, SIOCGLIFNUM, (intptr_t)&lifn, &rval, CRED())
 426             != 0) {
 427                 goto cleanup;
 428         }
 429 
 430         numifs = lifn.lifn_count;
 431         if (numifs <= 0) {
 432                 goto cleanup;
 433         }
 434 
 435         /* allocate extra room in case more interfaces appear */
 436         numifs += 10;
 437 
 438         /* get the interface names and ip addresses */
 439         bufsize = numifs * sizeof (struct lifreq);
 440         buf = kmem_alloc(bufsize, KM_SLEEP);
 441 
 442         lifc.lifc_family = AF_UNSPEC;
 443         lifc.lifc_flags = LIFC_NOXMIT | LIFC_TEMPORARY | LIFC_ALLZONES;
 444         lifc.lifc_len = bufsize;
 445         lifc.lifc_buf = buf;
 446         rc = ksocket_ioctl(so6, SIOCGLIFCONF, (intptr_t)&lifc, &rval, CRED());
 447         if (rc != 0) {
 448                 goto cleanup;
 449         }
 450         /* if our extra room is used up, try again */
 451         if (bufsize <= lifc.lifc_len) {
 452                 kmem_free(buf, bufsize);
 453                 buf = NULL;
 454                 goto retry_count;
 455         }
 456         /* calc actual number of ifconfs */
 457         n = lifc.lifc_len / sizeof (struct lifreq);
 458 
 459         /* get ip address */
 460         if (n > 0) {
 461                 size_ipaddr = sizeof (idm_addr_list_t) +
 462                     (n - 1) * sizeof (idm_addr_t);
 463                 ipaddr = kmem_zalloc(size_ipaddr, KM_SLEEP);
 464         } else {
 465                 goto cleanup;
 466         }
 467 
 468         /*
 469          * Examine the array of interfaces and filter uninteresting ones
 470          */
 471         for (i = 0, j = 0, lp = lifc.lifc_req; i < n; i++, lp++) {
 472 
 473                 /*
 474                  * Copy the address as the SIOCGLIFFLAGS ioctl is destructive
 475                  */
 476                 ss = lp->lifr_addr;
 477                 /*
 478                  * fetch the flags using the socket of the correct family
 479                  */
 480                 switch (ss.ss_family) {
 481                 case AF_INET:
 482                         rc = ksocket_ioctl(so4, SIOCGLIFFLAGS, (intptr_t)lp,
 483                             &rval, CRED());
 484                         break;
 485                 case AF_INET6:
 486                         rc = ksocket_ioctl(so6, SIOCGLIFFLAGS, (intptr_t)lp,
 487                             &rval, CRED());
 488                         break;
 489                 default:
 490                         continue;
 491                 }
 492                 if (rc == 0) {
 493                         /*
 494                          * If we got the flags, skip uninteresting
 495                          * interfaces based on flags
 496                          */
 497                         if ((lp->lifr_flags & IFF_UP) != IFF_UP)
 498                                 continue;
 499                         if (lp->lifr_flags &
 500                             (IFF_ANYCAST|IFF_NOLOCAL|IFF_DEPRECATED))
 501                                 continue;
 502                 }
 503 
 504                 /* save ip address */
 505                 ip = &ipaddr->al_addrs[j];
 506                 switch (ss.ss_family) {
 507                 case AF_INET:
 508                         sin = (struct sockaddr_in *)&ss;
 509                         if (!idm_v4_addr_okay(&sin->sin_addr))
 510                                 continue;
 511                         ip->a_addr.i_addr.in4 = sin->sin_addr;
 512                         ip->a_addr.i_insize = sizeof (struct in_addr);
 513                         break;
 514                 case AF_INET6:
 515                         sin6 = (struct sockaddr_in6 *)&ss;
 516                         if (!idm_v6_addr_okay(&sin6->sin6_addr))
 517                                 continue;
 518                         ip->a_addr.i_addr.in6 = sin6->sin6_addr;
 519                         ip->a_addr.i_insize = sizeof (struct in6_addr);
 520                         break;
 521                 default:
 522                         continue;
 523                 }
 524                 j++;
 525         }
 526 
 527         if (j == 0) {
 528                 /* no valid ifaddr */
 529                 kmem_free(ipaddr, size_ipaddr);
 530                 size_ipaddr = 0;
 531                 ipaddr = NULL;
 532         } else {
 533                 ipaddr->al_out_cnt = j;
 534         }
 535 
 536 
 537 cleanup:
 538         idm_sodestroy(so6);
 539         idm_sodestroy(so4);
 540 
 541         if (buf != NULL)
 542                 kmem_free(buf, bufsize);
 543 
 544         *ipaddr_p = ipaddr;
 545         return (size_ipaddr);
 546 }
 547 
 548 int
 549 idm_sorecv(ksocket_t so, void *msg, size_t len)
 550 {
 551         iovec_t iov;
 552 
 553         ASSERT(so != NULL);
 554         ASSERT(len != 0);
 555 
 556         /*
 557          * Fill in iovec and receive data
 558          */
 559         iov.iov_base = msg;
 560         iov.iov_len = len;
 561 
 562         return (idm_iov_sorecv(so, &iov, 1, len));
 563 }
 564 
 565 /*
 566  * idm_sosendto - Sends a buffered data on a non-connected socket.
 567  *
 568  * This function puts the data provided on the wire by calling sosendmsg.
 569  * It will return only when all the data has been sent or if an error
 570  * occurs.
 571  *
 572  * Returns 0 for success, the socket errno value if sosendmsg fails, and
 573  * -1 if sosendmsg returns success but uio_resid != 0
 574  */
 575 int
 576 idm_sosendto(ksocket_t so, void *buff, size_t len,
 577     struct sockaddr *name, socklen_t namelen)
 578 {
 579         struct msghdr           msg;
 580         struct iovec            iov[1];
 581         int                     error;
 582         size_t                  sent = 0;
 583 
 584         iov[0].iov_base = buff;
 585         iov[0].iov_len  = len;
 586 
 587         /* Initialization of the message header. */
 588         bzero(&msg, sizeof (msg));
 589         msg.msg_iov     = iov;
 590         msg.msg_iovlen  = 1;
 591         msg.msg_name    = name;
 592         msg.msg_namelen = namelen;
 593 
 594         if ((error = ksocket_sendmsg(so, &msg, 0, &sent, CRED())) == 0) {
 595                 /* Data sent */
 596                 if (sent == len) {
 597                         /* All data sent.  Success. */
 598                         return (0);
 599                 } else {
 600                         /* Not all data was sent.  Failure */
 601                         return (-1);
 602                 }
 603         }
 604 
 605         /* Send failed */
 606         return (error);
 607 }
 608 
 609 /*
 610  * idm_iov_sosend - Sends an iovec on a connection.
 611  *
 612  * This function puts the data provided on the wire by calling sosendmsg.
 613  * It will return only when all the data has been sent or if an error
 614  * occurs.
 615  *
 616  * Returns 0 for success, the socket errno value if sosendmsg fails, and
 617  * -1 if sosendmsg returns success but uio_resid != 0
 618  */
 619 int
 620 idm_iov_sosend(ksocket_t so, iovec_t *iop, int iovlen, size_t total_len)
 621 {
 622         struct msghdr           msg;
 623         int                     error;
 624         size_t                  sent = 0;
 625 
 626         ASSERT(iop != NULL);
 627 
 628         /* Initialization of the message header. */
 629         bzero(&msg, sizeof (msg));
 630         msg.msg_iov     = iop;
 631         msg.msg_iovlen  = iovlen;
 632 
 633         if ((error = ksocket_sendmsg(so, &msg, 0, &sent, CRED()))
 634             == 0) {
 635                 /* Data sent */
 636                 if (sent == total_len) {
 637                         /* All data sent.  Success. */
 638                         return (0);
 639                 } else {
 640                         /* Not all data was sent.  Failure */
 641                         return (-1);
 642                 }
 643         }
 644 
 645         /* Send failed */
 646         return (error);
 647 }
 648 
 649 /*
 650  * idm_iov_sorecv - Receives an iovec from a connection
 651  *
 652  * This function gets the data asked for from the socket.  It will return
 653  * only when all the requested data has been retrieved or if an error
 654  * occurs.
 655  *
 656  * Returns 0 for success, the socket errno value if sorecvmsg fails, and
 657  * -1 if sorecvmsg returns success but uio_resid != 0
 658  */
 659 int
 660 idm_iov_sorecv(ksocket_t so, iovec_t *iop, int iovlen, size_t total_len)
 661 {
 662         struct msghdr           msg;
 663         int                     error;
 664         size_t                  recv;
 665         int                     flags;
 666 
 667         ASSERT(iop != NULL);
 668 
 669         /* Initialization of the message header. */
 670         bzero(&msg, sizeof (msg));
 671         msg.msg_iov     = iop;
 672         msg.msg_iovlen  = iovlen;
 673         flags           = MSG_WAITALL;
 674 
 675         if ((error = ksocket_recvmsg(so, &msg, flags, &recv, CRED()))
 676             == 0) {
 677                 /* Received data */
 678                 if (recv == total_len) {
 679                         /* All requested data received.  Success */
 680                         return (0);
 681                 } else {
 682                         /*
 683                          * Not all data was received.  The connection has
 684                          * probably failed.
 685                          */
 686                         return (-1);
 687                 }
 688         }
 689 
 690         /* Receive failed */
 691         return (error);
 692 }
 693 
 694 static void
 695 idm_set_ini_preconnect_options(idm_so_conn_t *sc, boolean_t boot_conn)
 696 {
 697         int     conn_abort = 10000;
 698         int     conn_notify = 2000;
 699         int     abort = 30000;
 700 
 701         /* Pre-connect socket options */
 702         (void) ksocket_setsockopt(sc->ic_so, IPPROTO_TCP,
 703             TCP_CONN_NOTIFY_THRESHOLD, (char *)&conn_notify, sizeof (int),
 704             CRED());
 705         if (boot_conn == B_FALSE) {
 706                 (void) ksocket_setsockopt(sc->ic_so, IPPROTO_TCP,
 707                     TCP_CONN_ABORT_THRESHOLD, (char *)&conn_abort, sizeof (int),
 708                     CRED());
 709                 (void) ksocket_setsockopt(sc->ic_so, IPPROTO_TCP,
 710                     TCP_ABORT_THRESHOLD,
 711                     (char *)&abort, sizeof (int), CRED());
 712         }
 713 }
 714 
 715 static void
 716 idm_set_postconnect_options(ksocket_t ks)
 717 {
 718         const int       on = 1;
 719 
 720         /* Set connect options */
 721         (void) ksocket_setsockopt(ks, SOL_SOCKET, SO_RCVBUF,
 722             (char *)&idm_so_rcvbuf, sizeof (int), CRED());
 723         (void) ksocket_setsockopt(ks, SOL_SOCKET, SO_SNDBUF,
 724             (char *)&idm_so_sndbuf, sizeof (int), CRED());
 725         (void) ksocket_setsockopt(ks, IPPROTO_TCP, TCP_NODELAY,
 726             (char *)&on, sizeof (on), CRED());
 727 }
 728 
 729 static uint32_t
 730 n2h24(const uchar_t *ptr)
 731 {
 732         return ((ptr[0] << 16) | (ptr[1] << 8) | ptr[2]);
 733 }
 734 
 735 
 736 static idm_status_t
 737 idm_sorecvhdr(idm_conn_t *ic, idm_pdu_t *pdu)
 738 {
 739         iscsi_hdr_t     *bhs;
 740         uint32_t        hdr_digest_crc;
 741         uint32_t        crc_calculated;
 742         void            *new_hdr;
 743         int             ahslen = 0;
 744         int             total_len = 0;
 745         int             iovlen = 0;
 746         struct iovec    iov[2];
 747         idm_so_conn_t   *so_conn;
 748         int             rc;
 749 
 750         so_conn = ic->ic_transport_private;
 751 
 752         /*
 753          * Read BHS
 754          */
 755         bhs = pdu->isp_hdr;
 756         rc = idm_sorecv(so_conn->ic_so, pdu->isp_hdr, sizeof (iscsi_hdr_t));
 757         if (rc != IDM_STATUS_SUCCESS) {
 758                 return (IDM_STATUS_FAIL);
 759         }
 760 
 761         /*
 762          * Check actual AHS length against the amount available in the buffer
 763          */
 764         pdu->isp_hdrlen = sizeof (iscsi_hdr_t) +
 765             (bhs->hlength * sizeof (uint32_t));
 766         pdu->isp_datalen = n2h24(bhs->dlength);
 767         if (ic->ic_conn_type == CONN_TYPE_TGT &&
 768             pdu->isp_datalen > ic->ic_conn_params.max_recv_dataseglen) {
 769                 IDM_CONN_LOG(CE_WARN,
 770                     "idm_sorecvhdr: exceeded the max data segment length");
 771                 return (IDM_STATUS_FAIL);
 772         }
 773         if (bhs->hlength > IDM_SORX_CACHE_AHSLEN) {
 774                 /* Allocate a new header segment and change the callback */
 775                 new_hdr = kmem_alloc(pdu->isp_hdrlen, KM_SLEEP);
 776                 bcopy(pdu->isp_hdr, new_hdr, sizeof (iscsi_hdr_t));
 777                 pdu->isp_hdr = new_hdr;
 778                 pdu->isp_flags |= IDM_PDU_ADDL_HDR;
 779 
 780                 /*
 781                  * This callback will restore the expected values after
 782                  * the RX PDU has been processed.
 783                  */
 784                 pdu->isp_callback = idm_sorx_addl_pdu_cb;
 785         }
 786 
 787         /*
 788          * Setup receipt of additional header and header digest (if enabled).
 789          */
 790         if (bhs->hlength > 0) {
 791                 iov[iovlen].iov_base = (caddr_t)(pdu->isp_hdr + 1);
 792                 ahslen = pdu->isp_hdrlen - sizeof (iscsi_hdr_t);
 793                 iov[iovlen].iov_len = ahslen;
 794                 total_len += iov[iovlen].iov_len;
 795                 iovlen++;
 796         }
 797 
 798         if (ic->ic_conn_flags & IDM_CONN_HEADER_DIGEST) {
 799                 iov[iovlen].iov_base = (caddr_t)&hdr_digest_crc;
 800                 iov[iovlen].iov_len = sizeof (hdr_digest_crc);
 801                 total_len += iov[iovlen].iov_len;
 802                 iovlen++;
 803         }
 804 
 805         if ((iovlen != 0) &&
 806             (idm_iov_sorecv(so_conn->ic_so, &iov[0], iovlen,
 807             total_len) != 0)) {
 808                 return (IDM_STATUS_FAIL);
 809         }
 810 
 811         /*
 812          * Validate header digest if enabled
 813          */
 814         if (ic->ic_conn_flags & IDM_CONN_HEADER_DIGEST) {
 815                 crc_calculated = idm_crc32c(pdu->isp_hdr,
 816                     sizeof (iscsi_hdr_t) + ahslen);
 817                 if (crc_calculated != hdr_digest_crc) {
 818                         /* Invalid Header Digest */
 819                         return (IDM_STATUS_HEADER_DIGEST);
 820                 }
 821         }
 822 
 823         return (0);
 824 }
 825 
 826 /*
 827  * idm_so_ini_conn_create()
 828  * Allocate the sockets transport connection resources.
 829  */
 830 static idm_status_t
 831 idm_so_ini_conn_create(idm_conn_req_t *cr, idm_conn_t *ic)
 832 {
 833         ksocket_t       so;
 834         idm_so_conn_t   *so_conn;
 835         idm_status_t    idmrc;
 836 
 837         so = idm_socreate(cr->cr_domain, cr->cr_type,
 838             cr->cr_protocol);
 839         if (so == NULL) {
 840                 return (IDM_STATUS_FAIL);
 841         }
 842 
 843         /* Bind the socket if configured to do so */
 844         if (cr->cr_bound) {
 845                 if (ksocket_bind(so, &cr->cr_bound_addr.sin,
 846                     SIZEOF_SOCKADDR(&cr->cr_bound_addr.sin), CRED()) != 0) {
 847                         idm_sodestroy(so);
 848                         return (IDM_STATUS_FAIL);
 849                 }
 850         }
 851 
 852         idmrc = idm_so_conn_create_common(ic, so);
 853         if (idmrc != IDM_STATUS_SUCCESS) {
 854                 idm_soshutdown(so);
 855                 idm_sodestroy(so);
 856                 return (IDM_STATUS_FAIL);
 857         }
 858 
 859         so_conn = ic->ic_transport_private;
 860         /* Set up socket options */
 861         idm_set_ini_preconnect_options(so_conn, cr->cr_boot_conn);
 862 
 863         return (IDM_STATUS_SUCCESS);
 864 }
 865 
 866 /*
 867  * idm_so_ini_conn_destroy()
 868  * Tear down the sockets transport connection resources.
 869  */
 870 static void
 871 idm_so_ini_conn_destroy(idm_conn_t *ic)
 872 {
 873         idm_so_conn_destroy_common(ic);
 874 }
 875 
 876 /*
 877  * idm_so_ini_conn_connect()
 878  * Establish the connection referred to by the handle previously allocated via
 879  * idm_so_ini_conn_create().
 880  */
 881 static idm_status_t
 882 idm_so_ini_conn_connect(idm_conn_t *ic)
 883 {
 884         idm_so_conn_t   *so_conn;
 885         struct sonode   *node = NULL;
 886         int             rc;
 887         clock_t         lbolt, conn_login_max, conn_login_interval;
 888         boolean_t       nonblock;
 889 
 890         so_conn = ic->ic_transport_private;
 891         nonblock = ic->ic_conn_params.nonblock_socket;
 892         conn_login_max = ic->ic_conn_params.conn_login_max;
 893         conn_login_interval = ddi_get_lbolt() +
 894             SEC_TO_TICK(ic->ic_conn_params.conn_login_interval);
 895 
 896         if (nonblock == B_TRUE) {
 897                 node = ((struct sonode *)(so_conn->ic_so));
 898                 /* Set to none block socket mode */
 899                 idm_so_socket_set_nonblock(node);
 900                 do {
 901                         rc = ksocket_connect(so_conn->ic_so,
 902                             &ic->ic_ini_dst_addr.sin,
 903                             (SIZEOF_SOCKADDR(&ic->ic_ini_dst_addr.sin)),
 904                             CRED());
 905                         if (rc == 0 || rc == EISCONN) {
 906                                 /* socket success or already success */
 907                                 rc = IDM_STATUS_SUCCESS;
 908                                 break;
 909                         }
 910                         if ((rc == ETIMEDOUT) || (rc == ECONNREFUSED) ||
 911                             (rc == ECONNRESET)) {
 912                                 /* socket connection timeout or refuse */
 913                                 break;
 914                         }
 915                         lbolt = ddi_get_lbolt();
 916                         if (lbolt > conn_login_max) {
 917                                 /*
 918                                  * Connection retry timeout,
 919                                  * failed connect to target.
 920                                  */
 921                                 break;
 922                         }
 923                         if (lbolt < conn_login_interval) {
 924                                 if ((rc == EINPROGRESS) || (rc == EALREADY)) {
 925                                         /* TCP connect still in progress */
 926                                         delay(SEC_TO_TICK(IN_PROGRESS_DELAY));
 927                                         continue;
 928                                 } else {
 929                                         delay(conn_login_interval - lbolt);
 930                                 }
 931                         }
 932                         conn_login_interval = ddi_get_lbolt() +
 933                             SEC_TO_TICK(ic->ic_conn_params.conn_login_interval);
 934                 } while (rc != 0);
 935                 /* resume to nonblock mode */
 936                 if (rc == IDM_STATUS_SUCCESS) {
 937                         idm_so_socket_set_block(node);
 938                 }
 939         } else {
 940                 rc = ksocket_connect(so_conn->ic_so, &ic->ic_ini_dst_addr.sin,
 941                     (SIZEOF_SOCKADDR(&ic->ic_ini_dst_addr.sin)), CRED());
 942         }
 943 
 944         if (rc != 0) {
 945                 idm_soshutdown(so_conn->ic_so);
 946                 return (IDM_STATUS_FAIL);
 947         }
 948 
 949         idm_so_conn_connect_common(ic);
 950 
 951         idm_set_postconnect_options(so_conn->ic_so);
 952 
 953         return (IDM_STATUS_SUCCESS);
 954 }
 955 
 956 idm_status_t
 957 idm_so_tgt_conn_create(idm_conn_t *ic, ksocket_t new_so)
 958 {
 959         idm_status_t    idmrc;
 960 
 961         idm_set_postconnect_options(new_so);
 962         idmrc = idm_so_conn_create_common(ic, new_so);
 963 
 964         return (idmrc);
 965 }
 966 
 967 static void
 968 idm_so_tgt_conn_destroy(idm_conn_t *ic)
 969 {
 970         idm_so_conn_destroy_common(ic);
 971 }
 972 
 973 /*
 974  * idm_so_tgt_conn_connect()
 975  * Establish the connection in ic, passed from idm_tgt_conn_finish(), which
 976  * is invoked from the SM as a result of an inbound connection request.
 977  */
 978 static idm_status_t
 979 idm_so_tgt_conn_connect(idm_conn_t *ic)
 980 {
 981         idm_so_conn_connect_common(ic);
 982 
 983         return (IDM_STATUS_SUCCESS);
 984 }
 985 
 986 static idm_status_t
 987 idm_so_conn_create_common(idm_conn_t *ic, ksocket_t new_so)
 988 {
 989         idm_so_conn_t   *so_conn;
 990 
 991         so_conn = kmem_zalloc(sizeof (idm_so_conn_t), KM_SLEEP);
 992         so_conn->ic_so = new_so;
 993 
 994         ic->ic_transport_private = so_conn;
 995         ic->ic_transport_hdrlen = 0;
 996 
 997         /* Set the scoreboarding flag on this connection */
 998         ic->ic_conn_flags |= IDM_CONN_USE_SCOREBOARD;
 999         ic->ic_conn_params.max_recv_dataseglen =
1000             ISCSI_DEFAULT_MAX_RECV_SEG_LEN;
1001         ic->ic_conn_params.max_xmit_dataseglen =
1002             ISCSI_DEFAULT_MAX_XMIT_SEG_LEN;
1003 
1004         /*
1005          * Initialize tx thread mutex and list
1006          */
1007         mutex_init(&so_conn->ic_tx_mutex, NULL, MUTEX_DEFAULT, NULL);
1008         cv_init(&so_conn->ic_tx_cv, NULL, CV_DEFAULT, NULL);
1009         list_create(&so_conn->ic_tx_list, sizeof (idm_pdu_t),
1010             offsetof(idm_pdu_t, idm_tx_link));
1011 
1012         return (IDM_STATUS_SUCCESS);
1013 }
1014 
1015 static void
1016 idm_so_conn_destroy_common(idm_conn_t *ic)
1017 {
1018         idm_so_conn_t   *so_conn = ic->ic_transport_private;
1019 
1020         ic->ic_transport_private = NULL;
1021         idm_sodestroy(so_conn->ic_so);
1022         list_destroy(&so_conn->ic_tx_list);
1023         mutex_destroy(&so_conn->ic_tx_mutex);
1024         cv_destroy(&so_conn->ic_tx_cv);
1025 
1026         kmem_free(so_conn, sizeof (idm_so_conn_t));
1027 }
1028 
1029 static void
1030 idm_so_conn_connect_common(idm_conn_t *ic)
1031 {
1032         idm_so_conn_t   *so_conn;
1033         struct sockaddr_in6     t_addr;
1034         socklen_t       t_addrlen = 0;
1035 
1036         so_conn = ic->ic_transport_private;
1037         bzero(&t_addr, sizeof (struct sockaddr_in6));
1038         t_addrlen = sizeof (struct sockaddr_in6);
1039 
1040         /* Set the local and remote addresses in the idm conn handle */
1041         (void) ksocket_getsockname(so_conn->ic_so, (struct sockaddr *)&t_addr,
1042             &t_addrlen, CRED());
1043         bcopy(&t_addr, &ic->ic_laddr, t_addrlen);
1044         (void) ksocket_getpeername(so_conn->ic_so, (struct sockaddr *)&t_addr,
1045             &t_addrlen, CRED());
1046         bcopy(&t_addr, &ic->ic_raddr, t_addrlen);
1047 
1048         mutex_enter(&ic->ic_mutex);
1049         so_conn->ic_tx_thread = thread_create(NULL, 0, idm_sotx_thread, ic, 0,
1050             &p0, TS_RUN, minclsyspri);
1051         so_conn->ic_rx_thread = thread_create(NULL, 0, idm_sorx_thread, ic, 0,
1052             &p0, TS_RUN, minclsyspri);
1053 
1054         while (so_conn->ic_rx_thread_did == 0 ||
1055             so_conn->ic_tx_thread_did == 0)
1056                 cv_wait(&ic->ic_cv, &ic->ic_mutex);
1057         mutex_exit(&ic->ic_mutex);
1058 }
1059 
1060 /*
1061  * idm_so_conn_disconnect()
1062  * Shutdown the socket connection and stop the thread
1063  */
1064 static void
1065 idm_so_conn_disconnect(idm_conn_t *ic)
1066 {
1067         idm_so_conn_t   *so_conn;
1068 
1069         so_conn = ic->ic_transport_private;
1070 
1071         mutex_enter(&ic->ic_mutex);
1072         so_conn->ic_rx_thread_running = B_FALSE;
1073         so_conn->ic_tx_thread_running = B_FALSE;
1074         /* We need to wakeup the TX thread */
1075         mutex_enter(&so_conn->ic_tx_mutex);
1076         cv_signal(&so_conn->ic_tx_cv);
1077         mutex_exit(&so_conn->ic_tx_mutex);
1078         mutex_exit(&ic->ic_mutex);
1079 
1080         /* This should wakeup the RX thread if it is sleeping */
1081         idm_soshutdown(so_conn->ic_so);
1082 
1083         thread_join(so_conn->ic_tx_thread_did);
1084         thread_join(so_conn->ic_rx_thread_did);
1085 }
1086 
1087 /*
1088  * idm_so_tgt_svc_create()
1089  * Establish a service on an IP address and port.  idm_svc_req_t contains
1090  * the service parameters.
1091  */
1092 /*ARGSUSED*/
1093 static idm_status_t
1094 idm_so_tgt_svc_create(idm_svc_req_t *sr, idm_svc_t *is)
1095 {
1096         idm_so_svc_t            *so_svc;
1097 
1098         so_svc = kmem_zalloc(sizeof (idm_so_svc_t), KM_SLEEP);
1099 
1100         /* Set the new sockets service in svc handle */
1101         is->is_so_svc = (void *)so_svc;
1102 
1103         return (IDM_STATUS_SUCCESS);
1104 }
1105 
1106 /*
1107  * idm_so_tgt_svc_destroy()
1108  * Teardown sockets resources allocated in idm_so_tgt_svc_create()
1109  */
1110 static void
1111 idm_so_tgt_svc_destroy(idm_svc_t *is)
1112 {
1113         /* the socket will have been torn down; free the service */
1114         kmem_free(is->is_so_svc, sizeof (idm_so_svc_t));
1115 }
1116 
1117 /*
1118  * idm_so_tgt_svc_online()
1119  * Launch a watch thread on the svc allocated in idm_so_tgt_svc_create()
1120  */
1121 
1122 static idm_status_t
1123 idm_so_tgt_svc_online(idm_svc_t *is)
1124 {
1125         idm_so_svc_t            *so_svc;
1126         idm_svc_req_t           *sr = &is->is_svc_req;
1127         struct sockaddr_in6     sin6_ip;
1128         const uint32_t          on = 1;
1129         const uint32_t          off = 0;
1130 
1131         mutex_enter(&is->is_mutex);
1132         so_svc = (idm_so_svc_t *)is->is_so_svc;
1133 
1134         /*
1135          * Try creating an IPv6 socket first
1136          */
1137         if ((so_svc->is_so = idm_socreate(PF_INET6, SOCK_STREAM, 0)) == NULL) {
1138                 mutex_exit(&is->is_mutex);
1139                 return (IDM_STATUS_FAIL);
1140         } else {
1141                 bzero(&sin6_ip, sizeof (sin6_ip));
1142                 sin6_ip.sin6_family = AF_INET6;
1143                 sin6_ip.sin6_port = htons(sr->sr_port);
1144                 sin6_ip.sin6_addr = in6addr_any;
1145 
1146                 (void) ksocket_setsockopt(so_svc->is_so, SOL_SOCKET,
1147                     SO_REUSEADDR, (char *)&on, sizeof (on), CRED());
1148                 /*
1149                  * Turn off SO_MAC_EXEMPT so future sobinds succeed
1150                  */
1151                 (void) ksocket_setsockopt(so_svc->is_so, SOL_SOCKET,
1152                     SO_MAC_EXEMPT, (char *)&off, sizeof (off), CRED());
1153 
1154                 if (ksocket_bind(so_svc->is_so, (struct sockaddr *)&sin6_ip,
1155                     sizeof (sin6_ip), CRED()) != 0) {
1156                         mutex_exit(&is->is_mutex);
1157                         idm_sodestroy(so_svc->is_so);
1158                         return (IDM_STATUS_FAIL);
1159                 }
1160         }
1161 
1162         idm_set_postconnect_options(so_svc->is_so);
1163 
1164         if (ksocket_listen(so_svc->is_so, 5, CRED()) != 0) {
1165                 mutex_exit(&is->is_mutex);
1166                 idm_soshutdown(so_svc->is_so);
1167                 idm_sodestroy(so_svc->is_so);
1168                 return (IDM_STATUS_FAIL);
1169         }
1170 
1171         /* Launch a watch thread */
1172         so_svc->is_thread = thread_create(NULL, 0, idm_so_svc_port_watcher,
1173             is, 0, &p0, TS_RUN, minclsyspri);
1174 
1175         if (so_svc->is_thread == NULL) {
1176                 /* Failure to launch; teardown the socket */
1177                 mutex_exit(&is->is_mutex);
1178                 idm_soshutdown(so_svc->is_so);
1179                 idm_sodestroy(so_svc->is_so);
1180                 return (IDM_STATUS_FAIL);
1181         }
1182         ksocket_hold(so_svc->is_so);
1183         /* Wait for the port watcher thread to start */
1184         while (!so_svc->is_thread_running)
1185                 cv_wait(&is->is_cv, &is->is_mutex);
1186         mutex_exit(&is->is_mutex);
1187 
1188         return (IDM_STATUS_SUCCESS);
1189 }
1190 
1191 /*
1192  * idm_so_tgt_svc_offline
1193  *
1194  * Stop listening on the IP address and port identified by idm_svc_t.
1195  */
1196 static void
1197 idm_so_tgt_svc_offline(idm_svc_t *is)
1198 {
1199         idm_so_svc_t            *so_svc;
1200         mutex_enter(&is->is_mutex);
1201         so_svc = (idm_so_svc_t *)is->is_so_svc;
1202         so_svc->is_thread_running = B_FALSE;
1203         mutex_exit(&is->is_mutex);
1204 
1205         /*
1206          * Teardown socket
1207          */
1208         idm_sodestroy(so_svc->is_so);
1209 
1210         /*
1211          * Now we expect the port watcher thread to terminate
1212          */
1213         thread_join(so_svc->is_thread_did);
1214 }
1215 
1216 /*
1217  * Watch thread for target service connection establishment.
1218  */
1219 void
1220 idm_so_svc_port_watcher(void *arg)
1221 {
1222         idm_svc_t               *svc = arg;
1223         ksocket_t               new_so;
1224         idm_conn_t              *ic;
1225         idm_status_t            idmrc;
1226         idm_so_svc_t            *so_svc;
1227         int                     rc;
1228         const uint32_t          off = 0;
1229         struct sockaddr_in6     t_addr;
1230         socklen_t               t_addrlen;
1231 
1232         bzero(&t_addr, sizeof (struct sockaddr_in6));
1233         t_addrlen = sizeof (struct sockaddr_in6);
1234         mutex_enter(&svc->is_mutex);
1235 
1236         so_svc = svc->is_so_svc;
1237         so_svc->is_thread_running = B_TRUE;
1238         so_svc->is_thread_did = so_svc->is_thread->t_did;
1239 
1240         cv_signal(&svc->is_cv);
1241 
1242         IDM_SVC_LOG(CE_NOTE, "iSCSI service (%p/%d) online", (void *)svc,
1243             svc->is_svc_req.sr_port);
1244 
1245         while (so_svc->is_thread_running) {
1246                 mutex_exit(&svc->is_mutex);
1247 
1248                 if ((rc = ksocket_accept(so_svc->is_so,
1249                     (struct sockaddr *)&t_addr, &t_addrlen,
1250                     &new_so, CRED())) != 0) {
1251                         mutex_enter(&svc->is_mutex);
1252                         if (rc == ECONNABORTED)
1253                                 continue;
1254                         /* Connection problem */
1255                         break;
1256                 }
1257                 /*
1258                  * Turn off SO_MAC_EXEMPT so future sobinds succeed
1259                  */
1260                 (void) ksocket_setsockopt(new_so, SOL_SOCKET, SO_MAC_EXEMPT,
1261                     (char *)&off, sizeof (off), CRED());
1262 
1263                 idmrc = idm_svc_conn_create(svc, IDM_TRANSPORT_TYPE_SOCKETS,
1264                     &ic);
1265                 if (idmrc != IDM_STATUS_SUCCESS) {
1266                         /* Drop connection */
1267                         idm_soshutdown(new_so);
1268                         idm_sodestroy(new_so);
1269                         mutex_enter(&svc->is_mutex);
1270                         continue;
1271                 }
1272 
1273                 idmrc = idm_so_tgt_conn_create(ic, new_so);
1274                 if (idmrc != IDM_STATUS_SUCCESS) {
1275                         idm_svc_conn_destroy(ic);
1276                         idm_soshutdown(new_so);
1277                         idm_sodestroy(new_so);
1278                         mutex_enter(&svc->is_mutex);
1279                         continue;
1280                 }
1281 
1282                 /*
1283                  * Kick the state machine.  At CS_S3_XPT_UP the state machine
1284                  * will notify the client (target) about the new connection.
1285                  */
1286                 idm_conn_event(ic, CE_CONNECT_ACCEPT, NULL);
1287 
1288                 mutex_enter(&svc->is_mutex);
1289         }
1290         ksocket_rele(so_svc->is_so);
1291         so_svc->is_thread_running = B_FALSE;
1292         mutex_exit(&svc->is_mutex);
1293 
1294         IDM_SVC_LOG(CE_NOTE, "iSCSI service (%p/%d) offline", (void *)svc,
1295             svc->is_svc_req.sr_port);
1296 
1297         thread_exit();
1298 }
1299 
1300 /*
1301  * idm_so_free_task_rsrc() stops any ongoing processing of the task and
1302  * frees resources associated with the task.
1303  *
1304  * It's not clear that this should return idm_status_t.  What do we do
1305  * if it fails?
1306  */
1307 static idm_status_t
1308 idm_so_free_task_rsrc(idm_task_t *idt)
1309 {
1310         idm_buf_t       *idb, *next_idb;
1311 
1312         /*
1313          * There is nothing to cleanup on initiator connections
1314          */
1315         if (IDM_CONN_ISINI(idt->idt_ic))
1316                 return (IDM_STATUS_SUCCESS);
1317 
1318         /*
1319          * If this is a target connection, call idm_buf_rx_from_ini_done for
1320          * any buffer on the "outbufv" list with idb->idb_in_transport==B_TRUE.
1321          *
1322          * In addition, remove any buffers associated with this task from
1323          * the ic_tx_list.  We'll do this by walking the idt_inbufv list, but
1324          * items don't actually get removed from that list (and completion
1325          * routines called) until idm_task_cleanup.
1326          */
1327         mutex_enter(&idt->idt_mutex);
1328 
1329         for (idb = list_head(&idt->idt_outbufv); idb != NULL; idb = next_idb) {
1330                 next_idb = list_next(&idt->idt_outbufv, idb);
1331                 if (idb->idb_in_transport) {
1332                         /*
1333                          * idm_buf_rx_from_ini_done releases idt->idt_mutex
1334                          */
1335                         DTRACE_ISCSI_8(xfer__done, idm_conn_t *, idt->idt_ic,
1336                             uintptr_t, idb->idb_buf,
1337                             uint32_t, idb->idb_bufoffset,
1338                             uint64_t, 0, uint32_t, 0, uint32_t, 0,
1339                             uint32_t, idb->idb_xfer_len,
1340                             int, XFER_BUF_RX_FROM_INI);
1341                         idm_buf_rx_from_ini_done(idt, idb, IDM_STATUS_ABORTED);
1342                         mutex_enter(&idt->idt_mutex);
1343                 }
1344         }
1345 
1346         for (idb = list_head(&idt->idt_inbufv); idb != NULL; idb = next_idb) {
1347                 next_idb = list_next(&idt->idt_inbufv, idb);
1348                 /*
1349                  * We want to remove these items from the tx_list as well,
1350                  * but knowing it's in the idt_inbufv list is not a guarantee
1351                  * that it's in the tx_list.  If it's on the tx list then
1352                  * let idm_sotx_thread() clean it up.
1353                  */
1354                 if (idb->idb_in_transport && !idb->idb_tx_thread) {
1355                         /*
1356                          * idm_buf_tx_to_ini_done releases idt->idt_mutex
1357                          */
1358                         DTRACE_ISCSI_8(xfer__done, idm_conn_t *, idt->idt_ic,
1359                             uintptr_t, idb->idb_buf,
1360                             uint32_t, idb->idb_bufoffset,
1361                             uint64_t, 0, uint32_t, 0, uint32_t, 0,
1362                             uint32_t, idb->idb_xfer_len,
1363                             int, XFER_BUF_TX_TO_INI);
1364                         idm_buf_tx_to_ini_done(idt, idb, IDM_STATUS_ABORTED);
1365                         mutex_enter(&idt->idt_mutex);
1366                 }
1367         }
1368 
1369         mutex_exit(&idt->idt_mutex);
1370 
1371         return (IDM_STATUS_SUCCESS);
1372 }
1373 
1374 /*
1375  * idm_so_negotiate_key_values() validates the key values for this connection
1376  */
1377 /* ARGSUSED */
1378 static kv_status_t
1379 idm_so_negotiate_key_values(idm_conn_t *it, nvlist_t *request_nvl,
1380     nvlist_t *response_nvl, nvlist_t *negotiated_nvl)
1381 {
1382         /* All parameters are negotiated at the iscsit level */
1383         return (KV_HANDLED);
1384 }
1385 
1386 /*
1387  * idm_so_notice_key_values() activates the negotiated key values for
1388  * this connection.
1389  */
1390 static void
1391 idm_so_notice_key_values(idm_conn_t *it, nvlist_t *negotiated_nvl)
1392 {
1393         char                    *nvp_name;
1394         nvpair_t                *nvp;
1395         nvpair_t                *next_nvp;
1396         int                     nvrc;
1397         idm_status_t            idm_status;
1398         const idm_kv_xlate_t    *ikvx;
1399         uint64_t                num_val;
1400 
1401         for (nvp = nvlist_next_nvpair(negotiated_nvl, NULL);
1402             nvp != NULL; nvp = next_nvp) {
1403                 next_nvp = nvlist_next_nvpair(negotiated_nvl, nvp);
1404                 nvp_name = nvpair_name(nvp);
1405 
1406                 ikvx = idm_lookup_kv_xlate(nvp_name, strlen(nvp_name));
1407                 switch (ikvx->ik_key_id) {
1408                 case KI_HEADER_DIGEST:
1409                 case KI_DATA_DIGEST:
1410                         idm_status = idm_so_handle_digest(it, nvp, ikvx);
1411                         ASSERT(idm_status == 0);
1412 
1413                         /* Remove processed item from negotiated_nvl list */
1414                         nvrc = nvlist_remove_all(
1415                             negotiated_nvl, ikvx->ik_key_name);
1416                         ASSERT(nvrc == 0);
1417                         break;
1418                 case KI_MAX_RECV_DATA_SEGMENT_LENGTH:
1419                         /*
1420                          * Just pass the value down to idm layer.
1421                          * No need to remove it from negotiated_nvl list here.
1422                          */
1423                         nvrc = nvpair_value_uint64(nvp, &num_val);
1424                         ASSERT(nvrc == 0);
1425                         it->ic_conn_params.max_xmit_dataseglen =
1426                             (uint32_t)num_val;
1427                         break;
1428                 default:
1429                         break;
1430                 }
1431         }
1432 }
1433 
1434 /*
1435  * idm_so_declare_key_values() declares the key values for this connection
1436  */
1437 /* ARGSUSED */
1438 static kv_status_t
1439 idm_so_declare_key_values(idm_conn_t *it, nvlist_t *config_nvl,
1440     nvlist_t *outgoing_nvl)
1441 {
1442         char                    *nvp_name;
1443         nvpair_t                *nvp;
1444         nvpair_t                *next_nvp;
1445         kv_status_t             kvrc;
1446         int                     nvrc = 0;
1447         const idm_kv_xlate_t    *ikvx;
1448         uint64_t                num_val;
1449 
1450         for (nvp = nvlist_next_nvpair(config_nvl, NULL);
1451             nvp != NULL && nvrc == 0; nvp = next_nvp) {
1452                 next_nvp = nvlist_next_nvpair(config_nvl, nvp);
1453                 nvp_name = nvpair_name(nvp);
1454 
1455                 ikvx = idm_lookup_kv_xlate(nvp_name, strlen(nvp_name));
1456                 switch (ikvx->ik_key_id) {
1457                 case KI_MAX_RECV_DATA_SEGMENT_LENGTH:
1458                         if ((nvrc = nvpair_value_uint64(nvp, &num_val)) != 0) {
1459                                 break;
1460                         }
1461                         if (outgoing_nvl &&
1462                             (nvrc = nvlist_add_uint64(outgoing_nvl,
1463                             nvp_name, num_val)) != 0) {
1464                                 break;
1465                         }
1466                         it->ic_conn_params.max_recv_dataseglen =
1467                             (uint32_t)num_val;
1468                         break;
1469                 default:
1470                         break;
1471                 }
1472         }
1473         kvrc = idm_nvstat_to_kvstat(nvrc);
1474         return (kvrc);
1475 }
1476 
1477 static idm_status_t
1478 idm_so_handle_digest(idm_conn_t *it, nvpair_t *digest_choice,
1479     const idm_kv_xlate_t *ikvx)
1480 {
1481         int                     nvrc;
1482         char                    *digest_choice_string;
1483 
1484         nvrc = nvpair_value_string(digest_choice,
1485             &digest_choice_string);
1486         ASSERT(nvrc == 0);
1487         if (strcasecmp(digest_choice_string, "crc32c") == 0) {
1488                 switch (ikvx->ik_key_id) {
1489                 case KI_HEADER_DIGEST:
1490                         it->ic_conn_flags |= IDM_CONN_HEADER_DIGEST;
1491                         break;
1492                 case KI_DATA_DIGEST:
1493                         it->ic_conn_flags |= IDM_CONN_DATA_DIGEST;
1494                         break;
1495                 default:
1496                         ASSERT(0);
1497                         break;
1498                 }
1499         } else if (strcasecmp(digest_choice_string, "none") == 0) {
1500                 switch (ikvx->ik_key_id) {
1501                 case KI_HEADER_DIGEST:
1502                         it->ic_conn_flags &= ~IDM_CONN_HEADER_DIGEST;
1503                         break;
1504                 case KI_DATA_DIGEST:
1505                         it->ic_conn_flags &= ~IDM_CONN_DATA_DIGEST;
1506                         break;
1507                 default:
1508                         ASSERT(0);
1509                         break;
1510                 }
1511         } else {
1512                 ASSERT(0);
1513         }
1514 
1515         return (IDM_STATUS_SUCCESS);
1516 }
1517 
1518 
1519 /*
1520  * idm_so_conn_is_capable() verifies that the passed connection is provided
1521  * for by the sockets interface.
1522  */
1523 /* ARGSUSED */
1524 static boolean_t
1525 idm_so_conn_is_capable(idm_conn_req_t *ic, idm_transport_caps_t *caps)
1526 {
1527         return (B_TRUE);
1528 }
1529 
1530 /*
1531  * idm_so_rx_datain() validates the Data Sequence number of the PDU. The
1532  * idm_sorecv_scsidata() function invoked earlier actually reads the data
1533  * off the socket into the appropriate buffers.
1534  */
1535 static void
1536 idm_so_rx_datain(idm_conn_t *ic, idm_pdu_t *pdu)
1537 {
1538         iscsi_data_hdr_t        *bhs;
1539         idm_task_t              *idt;
1540         idm_buf_t               *idb;
1541         uint32_t                datasn;
1542         size_t                  offset;
1543         iscsi_hdr_t             *ihp = (iscsi_hdr_t *)pdu->isp_hdr;
1544         iscsi_data_rsp_hdr_t    *idrhp = (iscsi_data_rsp_hdr_t *)ihp;
1545 
1546         ASSERT(ic != NULL);
1547         ASSERT(pdu != NULL);
1548 
1549         bhs     = (iscsi_data_hdr_t *)pdu->isp_hdr;
1550         datasn  = ntohl(bhs->datasn);
1551         offset  = ntohl(bhs->offset);
1552 
1553         ASSERT(bhs->opcode == ISCSI_OP_SCSI_DATA_RSP);
1554 
1555         /*
1556          * Look up the task corresponding to the initiator task tag
1557          * to get the buffers affiliated with the task.
1558          */
1559         idt = idm_task_find(ic, bhs->itt, bhs->ttt);
1560         if (idt == NULL) {
1561                 IDM_CONN_LOG(CE_WARN, "idm_so_rx_datain: failed to find task");
1562                 idm_pdu_rx_protocol_error(ic, pdu);
1563                 return;
1564         }
1565 
1566         idb = pdu->isp_sorx_buf;
1567         if (idb == NULL) {
1568                 IDM_CONN_LOG(CE_WARN,
1569                     "idm_so_rx_datain: failed to find buffer");
1570                 idm_task_rele(idt);
1571                 idm_pdu_rx_protocol_error(ic, pdu);
1572                 return;
1573         }
1574 
1575         /*
1576          * DataSN values should be sequential and should not have any gaps or
1577          * repetitions. Check the DataSN with the one stored in the task.
1578          */
1579         if (datasn == idt->idt_exp_datasn) {
1580                 idt->idt_exp_datasn++; /* keep track of DataSN received */
1581         } else {
1582                 IDM_CONN_LOG(CE_WARN, "idm_so_rx_datain: datasn out of order");
1583                 idm_task_rele(idt);
1584                 idm_pdu_rx_protocol_error(ic, pdu);
1585                 return;
1586         }
1587 
1588         /*
1589          * PDUs in a sequence should be in continuously increasing
1590          * address offset
1591          */
1592         if (offset != idb->idb_exp_offset) {
1593                 IDM_CONN_LOG(CE_WARN, "idm_so_rx_datain: unexpected offset");
1594                 idm_task_rele(idt);
1595                 idm_pdu_rx_protocol_error(ic, pdu);
1596                 return;
1597         }
1598         /* Expected next relative buffer offset */
1599         idb->idb_exp_offset += n2h24(bhs->dlength);
1600         idt->idt_rx_bytes += n2h24(bhs->dlength);
1601 
1602         idm_task_rele(idt);
1603 
1604         /*
1605          * For now call scsi_rsp which will process the data rsp
1606          * Revisit, need to provide an explicit client entry point for
1607          * phase collapse completions.
1608          */
1609         if (((ihp->opcode & ISCSI_OPCODE_MASK) == ISCSI_OP_SCSI_DATA_RSP) &&
1610             (idrhp->flags & ISCSI_FLAG_DATA_STATUS)) {
1611                 (*ic->ic_conn_ops.icb_rx_scsi_rsp)(ic, pdu);
1612         }
1613 
1614         idm_pdu_complete(pdu, IDM_STATUS_SUCCESS);
1615 }
1616 
1617 /*
1618  * The idm_so_rx_dataout() function is used by the iSCSI target to read
1619  * data from the Data-Out PDU sent by the iSCSI initiator.
1620  *
1621  * This function gets the Initiator Task Tag from the PDU BHS and looks up the
1622  * task to get the buffers associated with the PDU. A PDU might span buffers.
1623  * The data is then read into the respective buffer.
1624  */
1625 static void
1626 idm_so_rx_dataout(idm_conn_t *ic, idm_pdu_t *pdu)
1627 {
1628 
1629         iscsi_data_hdr_t        *bhs;
1630         idm_task_t              *idt;
1631         idm_buf_t               *idb;
1632         size_t                  offset;
1633 
1634         ASSERT(ic != NULL);
1635         ASSERT(pdu != NULL);
1636 
1637         bhs = (iscsi_data_hdr_t *)pdu->isp_hdr;
1638         offset = ntohl(bhs->offset);
1639         ASSERT(bhs->opcode == ISCSI_OP_SCSI_DATA);
1640 
1641         /*
1642          * Look up the task corresponding to the initiator task tag
1643          * to get the buffers affiliated with the task.
1644          */
1645         idt = idm_task_find(ic, bhs->itt, bhs->ttt);
1646         if (idt == NULL) {
1647                 IDM_CONN_LOG(CE_WARN,
1648                     "idm_so_rx_dataout: failed to find task");
1649                 idm_pdu_rx_protocol_error(ic, pdu);
1650                 return;
1651         }
1652 
1653         idb = pdu->isp_sorx_buf;
1654         if (idb == NULL) {
1655                 IDM_CONN_LOG(CE_WARN,
1656                     "idm_so_rx_dataout: failed to find buffer");
1657                 idm_task_rele(idt);
1658                 idm_pdu_rx_protocol_error(ic, pdu);
1659                 return;
1660         }
1661 
1662         /* Keep track of data transferred - check data offsets */
1663         if (offset != idb->idb_exp_offset) {
1664                 IDM_CONN_LOG(CE_NOTE, "idm_so_rx_dataout: offset out of seq: "
1665                     "%ld, %d", offset, idb->idb_exp_offset);
1666                 idm_task_rele(idt);
1667                 idm_pdu_rx_protocol_error(ic, pdu);
1668                 return;
1669         }
1670         /* Expected next relative offset */
1671         idb->idb_exp_offset += ntoh24(bhs->dlength);
1672         idt->idt_rx_bytes += n2h24(bhs->dlength);
1673 
1674         /*
1675          * Call the buffer callback when the transfer is complete
1676          *
1677          * The connection state machine should only abort tasks after
1678          * shutting down the connection so we are assured that there
1679          * won't be a simultaneous attempt to abort this task at the
1680          * same time as we are processing this PDU (due to a connection
1681          * state change).
1682          */
1683         if (bhs->flags & ISCSI_FLAG_FINAL) {
1684                 /*
1685                  * We only want to call idm_buf_rx_from_ini_done once
1686                  * per transfer.  It's possible that this task has
1687                  * already been aborted in which case
1688                  * idm_so_free_task_rsrc will call idm_buf_rx_from_ini_done
1689                  * for each buffer with idb_in_transport==B_TRUE.  To
1690                  * close this window and ensure that this doesn't happen,
1691                  * we'll clear idb->idb_in_transport now while holding
1692                  * the task mutex.   This is only really an issue for
1693                  * SCSI task abort -- if tasks were being aborted because
1694                  * of a connection state change the state machine would
1695                  * have already stopped the receive thread.
1696                  */
1697                 mutex_enter(&idt->idt_mutex);
1698 
1699                 /*
1700                  * Release the task hold here (obtained in idm_task_find)
1701                  * because the task may complete synchronously during
1702                  * idm_buf_rx_from_ini_done.  Since we still have an active
1703                  * buffer we know there is at least one additional hold on idt.
1704                  */
1705                 idm_task_rele(idt);
1706 
1707                 /*
1708                  * idm_buf_rx_from_ini_done releases idt->idt_mutex
1709                  */
1710                 DTRACE_ISCSI_8(xfer__done, idm_conn_t *, idt->idt_ic,
1711                     uintptr_t, idb->idb_buf, uint32_t, idb->idb_bufoffset,
1712                     uint64_t, 0, uint32_t, 0, uint32_t, 0,
1713                     uint32_t, idb->idb_xfer_len,
1714                     int, XFER_BUF_RX_FROM_INI);
1715                 idm_buf_rx_from_ini_done(idt, idb, IDM_STATUS_SUCCESS);
1716                 idm_pdu_complete(pdu, IDM_STATUS_SUCCESS);
1717                 return;
1718         }
1719 
1720         idm_task_rele(idt);
1721         idm_pdu_complete(pdu, IDM_STATUS_SUCCESS);
1722 }
1723 
1724 /*
1725  * The idm_so_rx_rtt() function is used by the iSCSI initiator to handle
1726  * the R2T PDU sent by the iSCSI target indicating that it is ready to
1727  * accept data. This gets the Initiator Task Tag (itt) from the PDU BHS
1728  * and looks up the task in the task tree using the itt to get the output
1729  * buffers associated the task. The R2T PDU contains the offset of the
1730  * requested data and the data length. This function then constructs a
1731  * sequence of iSCSI PDUs and outputs the requested data. Each Data-Out
1732  * PDU is associated with the R2T by the Target Transfer Tag  (ttt).
1733  */
1734 
1735 static void
1736 idm_so_rx_rtt(idm_conn_t *ic, idm_pdu_t *pdu)
1737 {
1738         idm_task_t              *idt;
1739         idm_buf_t               *idb;
1740         iscsi_rtt_hdr_t         *rtt_hdr;
1741         uint32_t                data_offset;
1742         uint32_t                data_length;
1743 
1744         ASSERT(ic != NULL);
1745         ASSERT(pdu != NULL);
1746 
1747         rtt_hdr = (iscsi_rtt_hdr_t *)pdu->isp_hdr;
1748         data_offset = ntohl(rtt_hdr->data_offset);
1749         data_length = ntohl(rtt_hdr->data_length);
1750         idt     = idm_task_find(ic, rtt_hdr->itt, rtt_hdr->ttt);
1751 
1752         if (idt == NULL) {
1753                 IDM_CONN_LOG(CE_WARN, "idm_so_rx_rtt: could not find task");
1754                 idm_pdu_rx_protocol_error(ic, pdu);
1755                 return;
1756         }
1757 
1758         /* Find the buffer bound to the task by the iSCSI initiator */
1759         mutex_enter(&idt->idt_mutex);
1760         idb = idm_buf_find(&idt->idt_outbufv, data_offset);
1761         if (idb == NULL) {
1762                 mutex_exit(&idt->idt_mutex);
1763                 idm_task_rele(idt);
1764                 IDM_CONN_LOG(CE_WARN, "idm_so_rx_rtt: could not find buffer");
1765                 idm_pdu_rx_protocol_error(ic, pdu);
1766                 return;
1767         }
1768 
1769         /* return buffer contains this data */
1770         if (data_offset + data_length > idb->idb_buflen) {
1771                 /* Overflow */
1772                 mutex_exit(&idt->idt_mutex);
1773                 idm_task_rele(idt);
1774                 IDM_CONN_LOG(CE_WARN, "idm_so_rx_rtt: read from outside "
1775                     "buffer");
1776                 idm_pdu_rx_protocol_error(ic, pdu);
1777                 return;
1778         }
1779 
1780         idt->idt_r2t_ttt = rtt_hdr->ttt;
1781         idt->idt_exp_datasn = 0;
1782 
1783         idm_so_send_rtt_data(ic, idt, idb, data_offset,
1784             ntohl(rtt_hdr->data_length));
1785         /*
1786          * the idt_mutex is released in idm_so_send_rtt_data
1787          */
1788 
1789         idm_pdu_complete(pdu, IDM_STATUS_SUCCESS);
1790         idm_task_rele(idt);
1791 
1792 }
1793 
1794 idm_status_t
1795 idm_sorecvdata(idm_conn_t *ic, idm_pdu_t *pdu)
1796 {
1797         uint8_t         pad[ISCSI_PAD_WORD_LEN];
1798         int             pad_len;
1799         uint32_t        data_digest_crc;
1800         uint32_t        crc_calculated;
1801         int             total_len;
1802         idm_so_conn_t   *so_conn;
1803 
1804         so_conn = ic->ic_transport_private;
1805 
1806         pad_len = ((ISCSI_PAD_WORD_LEN -
1807             (pdu->isp_datalen & (ISCSI_PAD_WORD_LEN - 1))) &
1808             (ISCSI_PAD_WORD_LEN - 1));
1809 
1810         ASSERT(pdu->isp_iovlen < (PDU_MAX_IOVLEN - 2)); /* pad + data digest */
1811 
1812         total_len = pdu->isp_datalen;
1813 
1814         if (pad_len) {
1815                 pdu->isp_iov[pdu->isp_iovlen].iov_base    = (char *)&pad;
1816                 pdu->isp_iov[pdu->isp_iovlen].iov_len     = pad_len;
1817                 total_len               += pad_len;
1818                 pdu->isp_iovlen++;
1819         }
1820 
1821         /* setup data digest */
1822         if ((ic->ic_conn_flags & IDM_CONN_DATA_DIGEST) != 0) {
1823                 pdu->isp_iov[pdu->isp_iovlen].iov_base =
1824                     (char *)&data_digest_crc;
1825                 pdu->isp_iov[pdu->isp_iovlen].iov_len =
1826                     sizeof (data_digest_crc);
1827                 total_len               += sizeof (data_digest_crc);
1828                 pdu->isp_iovlen++;
1829         }
1830 
1831         pdu->isp_data = (uint8_t *)(uintptr_t)pdu->isp_iov[0].iov_base;
1832 
1833         if (idm_iov_sorecv(so_conn->ic_so, &pdu->isp_iov[0],
1834             pdu->isp_iovlen, total_len) != 0) {
1835                 return (IDM_STATUS_IO);
1836         }
1837 
1838         if ((ic->ic_conn_flags & IDM_CONN_DATA_DIGEST) != 0) {
1839                 crc_calculated = idm_crc32c(pdu->isp_data,
1840                     pdu->isp_datalen);
1841                 if (pad_len) {
1842                         crc_calculated = idm_crc32c_continued((char *)&pad,
1843                             pad_len, crc_calculated);
1844                 }
1845                 if (crc_calculated != data_digest_crc) {
1846                         IDM_CONN_LOG(CE_WARN,
1847                             "idm_sorecvdata: "
1848                             "CRC error: actual 0x%x, calc 0x%x",
1849                             data_digest_crc, crc_calculated);
1850 
1851                         /* Invalid Data Digest */
1852                         return (IDM_STATUS_DATA_DIGEST);
1853                 }
1854         }
1855 
1856         return (IDM_STATUS_SUCCESS);
1857 }
1858 
1859 /*
1860  * idm_sorecv_scsidata() is used to receive scsi data from the socket. The
1861  * Data-type PDU header must be read into the idm_pdu_t structure prior to
1862  * calling this function.
1863  */
1864 idm_status_t
1865 idm_sorecv_scsidata(idm_conn_t *ic, idm_pdu_t *pdu)
1866 {
1867         iscsi_data_hdr_t        *bhs;
1868         idm_task_t              *task;
1869         uint32_t                offset;
1870         uint8_t                 opcode;
1871         uint32_t                dlength;
1872         list_t                  *buflst;
1873         uint32_t                xfer_bytes;
1874         idm_status_t            status;
1875 
1876         ASSERT(ic != NULL);
1877         ASSERT(pdu != NULL);
1878 
1879         bhs     = (iscsi_data_hdr_t *)pdu->isp_hdr;
1880 
1881         offset  = ntohl(bhs->offset);
1882         opcode  = bhs->opcode;
1883         dlength = n2h24(bhs->dlength);
1884 
1885         ASSERT((opcode == ISCSI_OP_SCSI_DATA_RSP) ||
1886             (opcode == ISCSI_OP_SCSI_DATA));
1887 
1888         /*
1889          * Successful lookup implicitly gets a "hold" on the task.  This
1890          * hold must be released before leaving this function.  At one
1891          * point we were caching this task context and retaining the hold
1892          * but it turned out to be very difficult to release the hold properly.
1893          * The task can be aborted and the connection shutdown between this
1894          * call and the subsequent expected call to idm_so_rx_datain/
1895          * idm_so_rx_dataout (in which case those functions are not called).
1896          * Releasing the hold in the PDU callback doesn't work well either
1897          * because the whole task may be completed by then at which point
1898          * it is too late to release the hold -- for better or worse this
1899          * code doesn't wait on the refcnts during normal operation.
1900          * idm_task_find() is very fast and it is not a huge burden if we
1901          * have to do it twice.
1902          */
1903         task = idm_task_find(ic, bhs->itt, bhs->ttt);
1904         if (task == NULL) {
1905                 IDM_CONN_LOG(CE_WARN,
1906                     "idm_sorecv_scsidata: could not find task");
1907                 return (IDM_STATUS_FAIL);
1908         }
1909 
1910         mutex_enter(&task->idt_mutex);
1911         buflst  = (opcode == ISCSI_OP_SCSI_DATA_RSP) ?
1912             &task->idt_inbufv : &task->idt_outbufv;
1913         pdu->isp_sorx_buf = idm_buf_find(buflst, offset);
1914         mutex_exit(&task->idt_mutex);
1915 
1916         if (pdu->isp_sorx_buf == NULL) {
1917                 idm_task_rele(task);
1918                 IDM_CONN_LOG(CE_WARN, "idm_sorecv_scsidata: could not find "
1919                     "buffer for offset %x opcode=%x",
1920                     offset, opcode);
1921                 return (IDM_STATUS_FAIL);
1922         }
1923 
1924         xfer_bytes = idm_fill_iov(pdu, pdu->isp_sorx_buf, offset, dlength);
1925         ASSERT(xfer_bytes != 0);
1926         if (xfer_bytes != dlength) {
1927                 idm_task_rele(task);
1928                 /*
1929                  * Buffer overflow, connection error.  The PDU data is still
1930                  * sitting in the socket so we can't use the connection
1931                  * again until that data is drained.
1932                  */
1933                 return (IDM_STATUS_FAIL);
1934         }
1935 
1936         status = idm_sorecvdata(ic, pdu);
1937 
1938         idm_task_rele(task);
1939 
1940         return (status);
1941 }
1942 
1943 static uint32_t
1944 idm_fill_iov(idm_pdu_t *pdu, idm_buf_t *idb, uint32_t ro, uint32_t dlength)
1945 {
1946         uint32_t        buf_ro = ro - idb->idb_bufoffset;
1947         uint32_t        xfer_len = min(dlength, idb->idb_buflen - buf_ro);
1948 
1949         ASSERT(ro >= idb->idb_bufoffset);
1950 
1951         pdu->isp_iov[pdu->isp_iovlen].iov_base    =
1952             (caddr_t)idb->idb_buf + buf_ro;
1953         pdu->isp_iov[pdu->isp_iovlen].iov_len     = xfer_len;
1954         pdu->isp_iovlen++;
1955 
1956         return (xfer_len);
1957 }
1958 
1959 int
1960 idm_sorecv_nonscsidata(idm_conn_t *ic, idm_pdu_t *pdu)
1961 {
1962         pdu->isp_data = kmem_alloc(pdu->isp_datalen, KM_SLEEP);
1963         ASSERT(pdu->isp_data != NULL);
1964 
1965         pdu->isp_databuflen = pdu->isp_datalen;
1966         pdu->isp_iov[0].iov_base = (caddr_t)pdu->isp_data;
1967         pdu->isp_iov[0].iov_len = pdu->isp_datalen;
1968         pdu->isp_iovlen = 1;
1969         /*
1970          * Since we are associating a new data buffer with this received
1971          * PDU we need to set a specific callback to free the data
1972          * after the PDU is processed.
1973          */
1974         pdu->isp_flags |= IDM_PDU_ADDL_DATA;
1975         pdu->isp_callback = idm_sorx_addl_pdu_cb;
1976 
1977         return (idm_sorecvdata(ic, pdu));
1978 }
1979 
1980 void
1981 idm_sorx_thread(void *arg)
1982 {
1983         boolean_t       conn_failure = B_FALSE;
1984         idm_conn_t      *ic = (idm_conn_t *)arg;
1985         idm_so_conn_t   *so_conn;
1986         idm_pdu_t       *pdu;
1987         idm_status_t    rc;
1988 
1989         idm_conn_hold(ic);
1990 
1991         mutex_enter(&ic->ic_mutex);
1992 
1993         so_conn = ic->ic_transport_private;
1994         so_conn->ic_rx_thread_running = B_TRUE;
1995         so_conn->ic_rx_thread_did = so_conn->ic_rx_thread->t_did;
1996         cv_signal(&ic->ic_cv);
1997 
1998         while (so_conn->ic_rx_thread_running) {
1999                 mutex_exit(&ic->ic_mutex);
2000 
2001                 /*
2002                  * Get PDU with default header size (large enough for
2003                  * BHS plus any anticipated AHS).  PDU from
2004                  * the cache will have all values set correctly
2005                  * for sockets RX including callback.
2006                  */
2007                 pdu = kmem_cache_alloc(idm.idm_sorx_pdu_cache, KM_SLEEP);
2008                 pdu->isp_ic = ic;
2009                 pdu->isp_flags = 0;
2010                 pdu->isp_transport_hdrlen = 0;
2011 
2012                 if ((rc = idm_sorecvhdr(ic, pdu)) != 0) {
2013                         /*
2014                          * Call idm_pdu_complete so that we call the callback
2015                          * and ensure any memory allocated in idm_sorecvhdr
2016                          * gets freed up.
2017                          */
2018                         idm_pdu_complete(pdu, IDM_STATUS_FAIL);
2019 
2020                         /*
2021                          * If ic_rx_thread_running is still set then
2022                          * this is some kind of connection problem
2023                          * on the socket.  In this case we want to
2024                          * generate an event.  Otherwise some other
2025                          * thread closed the socket due to another
2026                          * issue in which case we don't need to
2027                          * generate an event.
2028                          */
2029                         mutex_enter(&ic->ic_mutex);
2030                         if (so_conn->ic_rx_thread_running) {
2031                                 conn_failure = B_TRUE;
2032                                 so_conn->ic_rx_thread_running = B_FALSE;
2033                         }
2034 
2035                         continue;
2036                 }
2037 
2038                 /*
2039                  * Header has been read and validated.  Now we need
2040                  * to read the PDU data payload (if present).  SCSI data
2041                  * need to be transferred from the socket directly into
2042                  * the associated transfer buffer for the SCSI task.
2043                  */
2044                 if (pdu->isp_datalen != 0) {
2045                         if ((IDM_PDU_OPCODE(pdu) == ISCSI_OP_SCSI_DATA) ||
2046                             (IDM_PDU_OPCODE(pdu) == ISCSI_OP_SCSI_DATA_RSP)) {
2047                                 rc = idm_sorecv_scsidata(ic, pdu);
2048                                 /*
2049                                  * All SCSI errors are fatal to the
2050                                  * connection right now since we have no
2051                                  * place to put the data.  What we need
2052                                  * is some kind of sink to dispose of unwanted
2053                                  * SCSI data.  For example an invalid task tag
2054                                  * should not kill the connection (although
2055                                  * we may want to drop the connection).
2056                                  */
2057                         } else {
2058                                 /*
2059                                  * Not data PDUs so allocate a buffer for the
2060                                  * data segment and read the remaining data.
2061                                  */
2062                                 rc = idm_sorecv_nonscsidata(ic, pdu);
2063                         }
2064                         if (rc != 0) {
2065                                 /*
2066                                  * Call idm_pdu_complete so that we call the
2067                                  * callback and ensure any memory allocated
2068                                  * in idm_sorecvhdr gets freed up.
2069                                  */
2070                                 idm_pdu_complete(pdu, IDM_STATUS_FAIL);
2071 
2072                                 /*
2073                                  * If ic_rx_thread_running is still set then
2074                                  * this is some kind of connection problem
2075                                  * on the socket.  In this case we want to
2076                                  * generate an event.  Otherwise some other
2077                                  * thread closed the socket due to another
2078                                  * issue in which case we don't need to
2079                                  * generate an event.
2080                                  */
2081                                 mutex_enter(&ic->ic_mutex);
2082                                 if (so_conn->ic_rx_thread_running) {
2083                                         conn_failure = B_TRUE;
2084                                         so_conn->ic_rx_thread_running = B_FALSE;
2085                                 }
2086                                 continue;
2087                         }
2088                 }
2089 
2090                 /*
2091                  * Process RX PDU
2092                  */
2093                 idm_pdu_rx(ic, pdu);
2094 
2095                 mutex_enter(&ic->ic_mutex);
2096         }
2097 
2098         mutex_exit(&ic->ic_mutex);
2099 
2100         /*
2101          * If we dropped out of the RX processing loop because of
2102          * a socket problem or other connection failure (including
2103          * digest errors) then we need to generate a state machine
2104          * event to shut the connection down.
2105          * If the state machine is already in, for example, INIT_ERROR, this
2106          * event will get dropped, and the TX thread will never be notified
2107          * to shut down.  To be safe, we'll just notify it here.
2108          */
2109         if (conn_failure) {
2110                 if (so_conn->ic_tx_thread_running) {
2111                         so_conn->ic_tx_thread_running = B_FALSE;
2112                         mutex_enter(&so_conn->ic_tx_mutex);
2113                         cv_signal(&so_conn->ic_tx_cv);
2114                         mutex_exit(&so_conn->ic_tx_mutex);
2115                 }
2116 
2117                 idm_conn_event(ic, CE_TRANSPORT_FAIL, rc);
2118         }
2119 
2120         idm_conn_rele(ic);
2121 
2122         thread_exit();
2123 }
2124 
2125 /*
2126  * idm_so_tx
2127  *
2128  * This is the implementation of idm_transport_ops_t's it_tx_pdu entry
2129  * point.  By definition, it is supposed to be fast.  So, simply queue
2130  * the entry and return.  The real work is done by idm_i_so_tx() via
2131  * idm_sotx_thread().
2132  */
2133 
2134 static void
2135 idm_so_tx(idm_conn_t *ic, idm_pdu_t *pdu)
2136 {
2137         idm_so_conn_t *so_conn = ic->ic_transport_private;
2138 
2139         ASSERT(pdu->isp_ic == ic);
2140         mutex_enter(&so_conn->ic_tx_mutex);
2141 
2142         if (!so_conn->ic_tx_thread_running) {
2143                 mutex_exit(&so_conn->ic_tx_mutex);
2144                 idm_pdu_complete(pdu, IDM_STATUS_ABORTED);
2145                 return;
2146         }
2147 
2148         list_insert_tail(&so_conn->ic_tx_list, (void *)pdu);
2149         cv_signal(&so_conn->ic_tx_cv);
2150         mutex_exit(&so_conn->ic_tx_mutex);
2151 }
2152 
2153 static idm_status_t
2154 idm_i_so_tx(idm_pdu_t *pdu)
2155 {
2156         idm_conn_t      *ic = pdu->isp_ic;
2157         idm_status_t    status = IDM_STATUS_SUCCESS;
2158         uint8_t         pad[ISCSI_PAD_WORD_LEN];
2159         int             pad_len;
2160         uint32_t        hdr_digest_crc;
2161         uint32_t        data_digest_crc = 0;
2162         int             total_len = 0;
2163         int             iovlen = 0;
2164         struct iovec    iov[6];
2165         idm_so_conn_t   *so_conn;
2166 
2167         so_conn = ic->ic_transport_private;
2168 
2169         /* Setup BHS */
2170         iov[iovlen].iov_base    = (caddr_t)pdu->isp_hdr;
2171         iov[iovlen].iov_len     = pdu->isp_hdrlen;
2172         total_len               += iov[iovlen].iov_len;
2173         iovlen++;
2174 
2175         /* Setup header digest */
2176         if (((pdu->isp_flags & IDM_PDU_LOGIN_TX) == 0) &&
2177             (ic->ic_conn_flags & IDM_CONN_HEADER_DIGEST)) {
2178                 hdr_digest_crc = idm_crc32c(pdu->isp_hdr, pdu->isp_hdrlen);
2179 
2180                 iov[iovlen].iov_base    = (caddr_t)&hdr_digest_crc;
2181                 iov[iovlen].iov_len     = sizeof (hdr_digest_crc);
2182                 total_len               += iov[iovlen].iov_len;
2183                 iovlen++;
2184         }
2185 
2186         /* Setup the data */
2187         if (pdu->isp_datalen) {
2188                 idm_task_t              *idt;
2189                 idm_buf_t               *idb;
2190                 iscsi_data_hdr_t        *ihp;
2191                 ihp = (iscsi_data_hdr_t *)pdu->isp_hdr;
2192                 /* Write of immediate data */
2193                 if (ic->ic_ffp &&
2194                     (ihp->opcode == ISCSI_OP_SCSI_CMD ||
2195                     ihp->opcode == ISCSI_OP_SCSI_DATA)) {
2196                         idt = idm_task_find(ic, ihp->itt, ihp->ttt);
2197                         if (idt) {
2198                                 mutex_enter(&idt->idt_mutex);
2199                                 idb = idm_buf_find(&idt->idt_outbufv, 0);
2200                                 mutex_exit(&idt->idt_mutex);
2201                                 /*
2202                                  * If the initiator call to idm_buf_alloc
2203                                  * failed then we can get to this point
2204                                  * without a bound buffer.  The associated
2205                                  * connection failure will clean things up
2206                                  * later.  It would be nice to come up with
2207                                  * a cleaner way to handle this.  In
2208                                  * particular it seems absurd to look up
2209                                  * the task and the buffer just to update
2210                                  * this counter.
2211                                  */
2212                                 if (idb)
2213                                         idb->idb_xfer_len += pdu->isp_datalen;
2214                                 idm_task_rele(idt);
2215                         }
2216                 }
2217 
2218                 iov[iovlen].iov_base = (caddr_t)pdu->isp_data;
2219                 iov[iovlen].iov_len  = pdu->isp_datalen;
2220                 total_len += iov[iovlen].iov_len;
2221                 iovlen++;
2222         }
2223 
2224         /* Setup the data pad if necessary */
2225         pad_len = ((ISCSI_PAD_WORD_LEN -
2226             (pdu->isp_datalen & (ISCSI_PAD_WORD_LEN - 1))) &
2227             (ISCSI_PAD_WORD_LEN - 1));
2228 
2229         if (pad_len) {
2230                 bzero(pad, sizeof (pad));
2231                 iov[iovlen].iov_base = (void *)&pad;
2232                 iov[iovlen].iov_len  = pad_len;
2233                 total_len               += iov[iovlen].iov_len;
2234                 iovlen++;
2235         }
2236 
2237         /*
2238          * Setup the data digest if enabled.  Data-digest is not sent
2239          * for login-phase PDUs.
2240          */
2241         if ((ic->ic_conn_flags & IDM_CONN_DATA_DIGEST) &&
2242             ((pdu->isp_flags & IDM_PDU_LOGIN_TX) == 0) &&
2243             (pdu->isp_datalen || pad_len)) {
2244                 /*
2245                  * RFC3720/10.2.3: A zero-length Data Segment also
2246                  * implies a zero-length data digest.
2247                  */
2248                 if (pdu->isp_datalen) {
2249                         data_digest_crc = idm_crc32c(pdu->isp_data,
2250                             pdu->isp_datalen);
2251                 }
2252                 if (pad_len) {
2253                         data_digest_crc = idm_crc32c_continued(&pad,
2254                             pad_len, data_digest_crc);
2255                 }
2256 
2257                 iov[iovlen].iov_base    = (caddr_t)&data_digest_crc;
2258                 iov[iovlen].iov_len     = sizeof (data_digest_crc);
2259                 total_len               += iov[iovlen].iov_len;
2260                 iovlen++;
2261         }
2262 
2263         /* Transmit the PDU */
2264         if (idm_iov_sosend(so_conn->ic_so, &iov[0], iovlen,
2265             total_len) != 0) {
2266                 /* Set error status */
2267                 IDM_CONN_LOG(CE_WARN,
2268                     "idm_so_tx: failed to transmit the PDU, so: %p ic: %p "
2269                     "data: %p", (void *) so_conn->ic_so, (void *) ic,
2270                     (void *) pdu->isp_data);
2271                 status = IDM_STATUS_IO;
2272         }
2273 
2274         /*
2275          * Success does not mean that the PDU actually reached the
2276          * remote node since it could get dropped along the way.
2277          */
2278         idm_pdu_complete(pdu, status);
2279 
2280         return (status);
2281 }
2282 
2283 /*
2284  * The idm_so_buf_tx_to_ini() is used by the target iSCSI layer to transmit the
2285  * Data-In PDUs using sockets. Based on the negotiated MaxRecvDataSegmentLength,
2286  * the buffer is segmented into a sequence of Data-In PDUs, ordered by DataSN.
2287  * A target can invoke this function multiple times for a single read command
2288  * (identified by the same ITT) to split the input into several sequences.
2289  *
2290  * DataSN starts with 0 for the first data PDU of an input command and advances
2291  * by 1 for each subsequent data PDU. Each sequence will have its own F bit,
2292  * which is set to 1 for the last data PDU of a sequence.
2293  * If the initiator supports phase collapse, the status bit must be set along
2294  * with the F bit to indicate that the status is shipped together with the last
2295  * Data-In PDU.
2296  *
2297  * The data PDUs within a sequence will be sent in order with the buffer offset
2298  * in increasing order. i.e. initiator and target must have negotiated the
2299  * "DataPDUInOrder" to "Yes". The order between sequences is not enforced.
2300  *
2301  * Caller holds idt->idt_mutex
2302  */
2303 static idm_status_t
2304 idm_so_buf_tx_to_ini(idm_task_t *idt, idm_buf_t *idb)
2305 {
2306         idm_so_conn_t   *so_conn = idb->idb_ic->ic_transport_private;
2307         idm_pdu_t       tmppdu;
2308 
2309         ASSERT(mutex_owned(&idt->idt_mutex));
2310 
2311         /*
2312          * Put the idm_buf_t on the tx queue.  It will be transmitted by
2313          * idm_sotx_thread.
2314          */
2315         mutex_enter(&so_conn->ic_tx_mutex);
2316 
2317         DTRACE_ISCSI_8(xfer__start, idm_conn_t *, idt->idt_ic,
2318             uintptr_t, idb->idb_buf, uint32_t, idb->idb_bufoffset,
2319             uint64_t, 0, uint32_t, 0, uint32_t, 0,
2320             uint32_t, idb->idb_xfer_len, int, XFER_BUF_TX_TO_INI);
2321 
2322         if (!so_conn->ic_tx_thread_running) {
2323                 mutex_exit(&so_conn->ic_tx_mutex);
2324                 /*
2325                  * Don't release idt->idt_mutex since we're supposed to hold
2326                  * in when calling idm_buf_tx_to_ini_done
2327                  */
2328                 DTRACE_ISCSI_8(xfer__done, idm_conn_t *, idt->idt_ic,
2329                     uintptr_t, idb->idb_buf, uint32_t, idb->idb_bufoffset,
2330                     uint64_t, 0, uint32_t, 0, uint32_t, 0,
2331                     uint32_t, idb->idb_xfer_len,
2332                     int, XFER_BUF_TX_TO_INI);
2333                 idm_buf_tx_to_ini_done(idt, idb, IDM_STATUS_ABORTED);
2334                 return (IDM_STATUS_FAIL);
2335         }
2336 
2337         /*
2338          * Build a template for the data PDU headers we will use so that
2339          * the SN values will stay consistent with other PDU's we are
2340          * transmitting like R2T and SCSI status.
2341          */
2342         bzero(&idb->idb_data_hdr_tmpl, sizeof (iscsi_hdr_t));
2343         tmppdu.isp_hdr = &idb->idb_data_hdr_tmpl;
2344         (*idt->idt_ic->ic_conn_ops.icb_build_hdr)(idt, &tmppdu,
2345             ISCSI_OP_SCSI_DATA_RSP);
2346         idb->idb_tx_thread = B_TRUE;
2347         list_insert_tail(&so_conn->ic_tx_list, (void *)idb);
2348         cv_signal(&so_conn->ic_tx_cv);
2349         mutex_exit(&so_conn->ic_tx_mutex);
2350         mutex_exit(&idt->idt_mutex);
2351 
2352         /*
2353          * Returning success here indicates the transfer was successfully
2354          * dispatched -- it does not mean that the transfer completed
2355          * successfully.
2356          */
2357         return (IDM_STATUS_SUCCESS);
2358 }
2359 
2360 /*
2361  * The idm_so_buf_rx_from_ini() is used by the target iSCSI layer to specify the
2362  * data blocks it is ready to receive from the initiator in response to a WRITE
2363  * SCSI command. The target iSCSI layer passes the information about the desired
2364  * data blocks to the initiator in one R2T PDU. The receiving buffer, the buffer
2365  * offset and datalen are passed via the 'idb' argument.
2366  *
2367  * Scope for Prototype build:
2368  * R2Ts are required for any Data-Out PDU, i.e. initiator and target must have
2369  * negotiated the "InitialR2T" to "Yes".
2370  *
2371  * Caller holds idt->idt_mutex
2372  */
2373 static idm_status_t
2374 idm_so_buf_rx_from_ini(idm_task_t *idt, idm_buf_t *idb)
2375 {
2376         idm_pdu_t               *pdu;
2377         iscsi_rtt_hdr_t         *rtt;
2378 
2379         ASSERT(mutex_owned(&idt->idt_mutex));
2380 
2381         DTRACE_ISCSI_8(xfer__start, idm_conn_t *, idt->idt_ic,
2382             uintptr_t, idb->idb_buf, uint32_t, idb->idb_bufoffset,
2383             uint64_t, 0, uint32_t, 0, uint32_t, 0,
2384             uint32_t, idb->idb_xfer_len, int, XFER_BUF_RX_FROM_INI);
2385 
2386         pdu = kmem_cache_alloc(idm.idm_sotx_pdu_cache, KM_SLEEP);
2387         pdu->isp_ic = idt->idt_ic;
2388         pdu->isp_flags = IDM_PDU_SET_STATSN;
2389         bzero(pdu->isp_hdr, sizeof (iscsi_rtt_hdr_t));
2390 
2391         /* iSCSI layer fills the TTT, ITT, ExpCmdSN, MaxCmdSN */
2392         (*idt->idt_ic->ic_conn_ops.icb_build_hdr)(idt, pdu, ISCSI_OP_RTT_RSP);
2393 
2394         /* set the rttsn, rtt.flags, rtt.data_offset and rtt.data_length */
2395         rtt = (iscsi_rtt_hdr_t *)(pdu->isp_hdr);
2396 
2397         rtt->opcode          = ISCSI_OP_RTT_RSP;
2398         rtt->flags           = ISCSI_FLAG_FINAL;
2399         rtt->data_offset     = htonl(idb->idb_bufoffset);
2400         rtt->data_length     = htonl(idb->idb_xfer_len);
2401         rtt->rttsn           = htonl(idt->idt_exp_rttsn++);
2402 
2403         /* Keep track of buffer offsets */
2404         idb->idb_exp_offset  = idb->idb_bufoffset;
2405         mutex_exit(&idt->idt_mutex);
2406 
2407         /*
2408          * Transmit the PDU.
2409          */
2410         idm_pdu_tx(pdu);
2411 
2412         return (IDM_STATUS_SUCCESS);
2413 }
2414 
2415 static idm_status_t
2416 idm_so_buf_alloc(idm_buf_t *idb, uint64_t buflen)
2417 {
2418         if ((buflen > IDM_SO_BUF_CACHE_LB) && (buflen <= IDM_SO_BUF_CACHE_UB)) {
2419                 idb->idb_buf = kmem_cache_alloc(idm.idm_so_128k_buf_cache,
2420                     KM_NOSLEEP);
2421                 idb->idb_buf_private = idm.idm_so_128k_buf_cache;
2422         } else {
2423                 idb->idb_buf = kmem_alloc(buflen, KM_NOSLEEP);
2424                 idb->idb_buf_private = NULL;
2425         }
2426 
2427         if (idb->idb_buf == NULL) {
2428                 IDM_CONN_LOG(CE_NOTE,
2429                     "idm_so_buf_alloc: failed buffer allocation");
2430                 return (IDM_STATUS_FAIL);
2431         }
2432 
2433         return (IDM_STATUS_SUCCESS);
2434 }
2435 
2436 /* ARGSUSED */
2437 static idm_status_t
2438 idm_so_buf_setup(idm_buf_t *idb)
2439 {
2440         /* Ensure bufalloc'd flag is unset */
2441         idb->idb_bufalloc = B_FALSE;
2442 
2443         return (IDM_STATUS_SUCCESS);
2444 }
2445 
2446 /* ARGSUSED */
2447 static void
2448 idm_so_buf_teardown(idm_buf_t *idb)
2449 {
2450         /* nothing to do here */
2451 }
2452 
2453 static void
2454 idm_so_buf_free(idm_buf_t *idb)
2455 {
2456         if (idb->idb_buf_private == NULL) {
2457                 kmem_free(idb->idb_buf, idb->idb_buflen);
2458         } else {
2459                 kmem_cache_free(idb->idb_buf_private, idb->idb_buf);
2460         }
2461 }
2462 
2463 static void
2464 idm_so_send_rtt_data(idm_conn_t *ic, idm_task_t *idt, idm_buf_t *idb,
2465     uint32_t offset, uint32_t length)
2466 {
2467         idm_so_conn_t   *so_conn = ic->ic_transport_private;
2468         idm_pdu_t       tmppdu;
2469         idm_buf_t       *rtt_buf;
2470 
2471         ASSERT(mutex_owned(&idt->idt_mutex));
2472 
2473         /*
2474          * Allocate a buffer to represent the RTT transfer.  We could further
2475          * optimize this by allocating the buffers internally from an rtt
2476          * specific buffer cache since this is socket-specific code but for
2477          * now we will keep it simple.
2478          */
2479         rtt_buf = idm_buf_alloc(ic, (uint8_t *)idb->idb_buf + offset, length);
2480         if (rtt_buf == NULL) {
2481                 /*
2482                  * If we're in FFP then the failure was likely a resource
2483                  * allocation issue and we should close the connection by
2484                  * sending a CE_TRANSPORT_FAIL event.
2485                  *
2486                  * If we're not in FFP then idm_buf_alloc will always
2487                  * fail and the state is transitioning to "complete" anyway
2488                  * so we won't bother to send an event.
2489                  */
2490                 mutex_enter(&ic->ic_state_mutex);
2491                 if (ic->ic_ffp)
2492                         idm_conn_event_locked(ic, CE_TRANSPORT_FAIL,
2493                             NULL, CT_NONE);
2494                 mutex_exit(&ic->ic_state_mutex);
2495                 mutex_exit(&idt->idt_mutex);
2496                 return;
2497         }
2498 
2499         rtt_buf->idb_buf_cb = NULL;
2500         rtt_buf->idb_cb_arg = NULL;
2501         rtt_buf->idb_bufoffset = offset;
2502         rtt_buf->idb_xfer_len = length;
2503         rtt_buf->idb_ic = idt->idt_ic;
2504         rtt_buf->idb_task_binding = idt;
2505 
2506         /*
2507          * The new buffer (if any) represents an additional
2508          * reference on the task
2509          */
2510         idm_task_hold(idt);
2511         mutex_exit(&idt->idt_mutex);
2512 
2513         /*
2514          * Put the idm_buf_t on the tx queue.  It will be transmitted by
2515          * idm_sotx_thread.
2516          */
2517         mutex_enter(&so_conn->ic_tx_mutex);
2518 
2519         if (!so_conn->ic_tx_thread_running) {
2520                 idm_buf_free(rtt_buf);
2521                 mutex_exit(&so_conn->ic_tx_mutex);
2522                 idm_task_rele(idt);
2523                 return;
2524         }
2525 
2526         /*
2527          * Build a template for the data PDU headers we will use so that
2528          * the SN values will stay consistent with other PDU's we are
2529          * transmitting like R2T and SCSI status.
2530          */
2531         bzero(&rtt_buf->idb_data_hdr_tmpl, sizeof (iscsi_hdr_t));
2532         tmppdu.isp_hdr = &rtt_buf->idb_data_hdr_tmpl;
2533         (*idt->idt_ic->ic_conn_ops.icb_build_hdr)(idt, &tmppdu,
2534             ISCSI_OP_SCSI_DATA);
2535         rtt_buf->idb_tx_thread = B_TRUE;
2536         rtt_buf->idb_in_transport = B_TRUE;
2537         list_insert_tail(&so_conn->ic_tx_list, (void *)rtt_buf);
2538         cv_signal(&so_conn->ic_tx_cv);
2539         mutex_exit(&so_conn->ic_tx_mutex);
2540 }
2541 
2542 static void
2543 idm_so_send_rtt_data_done(idm_task_t *idt, idm_buf_t *idb)
2544 {
2545         /*
2546          * Don't worry about status -- we assume any error handling
2547          * is performed by the caller (idm_sotx_thread).
2548          */
2549         idb->idb_in_transport = B_FALSE;
2550         idm_task_rele(idt);
2551         idm_buf_free(idb);
2552 }
2553 
2554 static idm_status_t
2555 idm_so_send_buf_region(idm_task_t *idt, idm_buf_t *idb,
2556     uint32_t buf_region_offset, uint32_t buf_region_length)
2557 {
2558         idm_conn_t              *ic;
2559         uint32_t                max_dataseglen;
2560         size_t                  remainder, chunk;
2561         uint32_t                data_offset = buf_region_offset;
2562         iscsi_data_hdr_t        *bhs;
2563         idm_pdu_t               *pdu;
2564         idm_status_t            tx_status;
2565 
2566         ASSERT(mutex_owned(&idt->idt_mutex));
2567 
2568         ic = idt->idt_ic;
2569 
2570         max_dataseglen = ic->ic_conn_params.max_xmit_dataseglen;
2571         remainder = buf_region_length;
2572 
2573         while (remainder) {
2574                 if (idt->idt_state != TASK_ACTIVE) {
2575                         ASSERT((idt->idt_state != TASK_IDLE) &&
2576                             (idt->idt_state != TASK_COMPLETE));
2577                         return (IDM_STATUS_ABORTED);
2578                 }
2579 
2580                 /* check to see if we need to chunk the data */
2581                 if (remainder > max_dataseglen) {
2582                         chunk = max_dataseglen;
2583                 } else {
2584                         chunk = remainder;
2585                 }
2586 
2587                 /* Data PDU headers will always be sizeof (iscsi_hdr_t) */
2588                 pdu = kmem_cache_alloc(idm.idm_sotx_pdu_cache, KM_SLEEP);
2589                 pdu->isp_ic = ic;
2590                 pdu->isp_flags = 0;  /* initialize isp_flags */
2591 
2592                 /*
2593                  * We've already built a build a header template
2594                  * to use during the transfer.  Use this template so that
2595                  * the SN values stay consistent with any unrelated PDU's
2596                  * being transmitted.
2597                  */
2598                 bcopy(&idb->idb_data_hdr_tmpl, pdu->isp_hdr,
2599                     sizeof (iscsi_hdr_t));
2600 
2601                 /*
2602                  * Set DataSN, data offset, and flags in BHS
2603                  * For the prototype build, A = 0, S = 0, U = 0
2604                  */
2605                 bhs = (iscsi_data_hdr_t *)(pdu->isp_hdr);
2606 
2607                 bhs->datasn          = htonl(idt->idt_exp_datasn++);
2608 
2609                 hton24(bhs->dlength, chunk);
2610                 bhs->offset = htonl(idb->idb_bufoffset + data_offset);
2611 
2612                 /* setup data */
2613                 pdu->isp_data        =  (uint8_t *)idb->idb_buf + data_offset;
2614                 pdu->isp_datalen = (uint_t)chunk;
2615 
2616                 if (chunk == remainder) {
2617                         bhs->flags = ISCSI_FLAG_FINAL; /* F bit set to 1 */
2618                         /* Piggyback the status with the last data PDU */
2619                         if (idt->idt_flags & IDM_TASK_PHASECOLLAPSE_REQ) {
2620                                 pdu->isp_flags |= IDM_PDU_SET_STATSN |
2621                                     IDM_PDU_ADVANCE_STATSN;
2622                                 (*idt->idt_ic->ic_conn_ops.icb_update_statsn)
2623                                     (idt, pdu);
2624                                 idt->idt_flags |=
2625                                     IDM_TASK_PHASECOLLAPSE_SUCCESS;
2626 
2627                         }
2628                 }
2629 
2630                 remainder       -= chunk;
2631                 data_offset     += chunk;
2632 
2633                 /* Instrument the data-send DTrace probe. */
2634                 if (IDM_PDU_OPCODE(pdu) == ISCSI_OP_SCSI_DATA_RSP) {
2635                         DTRACE_ISCSI_2(data__send,
2636                             idm_conn_t *, idt->idt_ic,
2637                             iscsi_data_rsp_hdr_t *,
2638                             (iscsi_data_rsp_hdr_t *)pdu->isp_hdr);
2639                 }
2640 
2641                 /*
2642                  * Now that we're done working with idt_exp_datasn,
2643                  * idt->idt_state and idb->idb_bufoffset we can release
2644                  * the task lock -- don't want to hold it across the
2645                  * call to idm_i_so_tx since we could block.
2646                  */
2647                 mutex_exit(&idt->idt_mutex);
2648 
2649                 /*
2650                  * Transmit the PDU.  Call the internal routine directly
2651                  * as there is already implicit ordering.
2652                  */
2653                 if ((tx_status = idm_i_so_tx(pdu)) != IDM_STATUS_SUCCESS) {
2654                         mutex_enter(&idt->idt_mutex);
2655                         return (tx_status);
2656                 }
2657 
2658                 mutex_enter(&idt->idt_mutex);
2659                 idt->idt_tx_bytes += chunk;
2660         }
2661 
2662         return (IDM_STATUS_SUCCESS);
2663 }
2664 
2665 /*
2666  * TX PDU cache
2667  */
2668 /* ARGSUSED */
2669 int
2670 idm_sotx_pdu_constructor(void *hdl, void *arg, int flags)
2671 {
2672         idm_pdu_t       *pdu = hdl;
2673 
2674         bzero(pdu, sizeof (idm_pdu_t));
2675         pdu->isp_hdr = (iscsi_hdr_t *)(pdu + 1); /* Ptr arithmetic */
2676         pdu->isp_hdrlen = sizeof (iscsi_hdr_t);
2677         pdu->isp_callback = idm_sotx_cache_pdu_cb;
2678         pdu->isp_magic = IDM_PDU_MAGIC;
2679         bzero(pdu->isp_hdr, sizeof (iscsi_hdr_t));
2680 
2681         return (0);
2682 }
2683 
2684 /* ARGSUSED */
2685 void
2686 idm_sotx_cache_pdu_cb(idm_pdu_t *pdu, idm_status_t status)
2687 {
2688         /* reset values between use */
2689         pdu->isp_datalen = 0;
2690 
2691         kmem_cache_free(idm.idm_sotx_pdu_cache, pdu);
2692 }
2693 
2694 /*
2695  * RX PDU cache
2696  */
2697 /* ARGSUSED */
2698 int
2699 idm_sorx_pdu_constructor(void *hdl, void *arg, int flags)
2700 {
2701         idm_pdu_t       *pdu = hdl;
2702 
2703         bzero(pdu, sizeof (idm_pdu_t));
2704         pdu->isp_magic = IDM_PDU_MAGIC;
2705         pdu->isp_hdr = (iscsi_hdr_t *)(pdu + 1); /* Ptr arithmetic */
2706         pdu->isp_callback = idm_sorx_cache_pdu_cb;
2707 
2708         return (0);
2709 }
2710 
2711 /* ARGSUSED */
2712 static void
2713 idm_sorx_cache_pdu_cb(idm_pdu_t *pdu, idm_status_t status)
2714 {
2715         pdu->isp_iovlen = 0;
2716         pdu->isp_sorx_buf = 0;
2717         kmem_cache_free(idm.idm_sorx_pdu_cache, pdu);
2718 }
2719 
2720 static void
2721 idm_sorx_addl_pdu_cb(idm_pdu_t *pdu, idm_status_t status)
2722 {
2723         /*
2724          * We had to modify our cached RX PDU with a longer header buffer
2725          * and/or a longer data buffer.  Release the new buffers and fix
2726          * the fields back to what we would expect for a cached RX PDU.
2727          */
2728         if (pdu->isp_flags & IDM_PDU_ADDL_HDR) {
2729                 kmem_free(pdu->isp_hdr, pdu->isp_hdrlen);
2730         }
2731         if (pdu->isp_flags & IDM_PDU_ADDL_DATA) {
2732                 kmem_free(pdu->isp_data, pdu->isp_datalen);
2733         }
2734         pdu->isp_hdr = (iscsi_hdr_t *)(pdu + 1);
2735         pdu->isp_hdrlen = sizeof (iscsi_hdr_t);
2736         pdu->isp_data = NULL;
2737         pdu->isp_datalen = 0;
2738         pdu->isp_sorx_buf = 0;
2739         pdu->isp_callback = idm_sorx_cache_pdu_cb;
2740         idm_sorx_cache_pdu_cb(pdu, status);
2741 }
2742 
2743 /*
2744  * This thread is only active when I/O is queued for transmit
2745  * because the socket is busy.
2746  */
2747 void
2748 idm_sotx_thread(void *arg)
2749 {
2750         idm_conn_t      *ic = arg;
2751         idm_tx_obj_t    *object, *next;
2752         idm_so_conn_t   *so_conn;
2753         idm_status_t    status = IDM_STATUS_SUCCESS;
2754 
2755         idm_conn_hold(ic);
2756 
2757         mutex_enter(&ic->ic_mutex);
2758         so_conn = ic->ic_transport_private;
2759         so_conn->ic_tx_thread_running = B_TRUE;
2760         so_conn->ic_tx_thread_did = so_conn->ic_tx_thread->t_did;
2761         cv_signal(&ic->ic_cv);
2762         mutex_exit(&ic->ic_mutex);
2763 
2764         mutex_enter(&so_conn->ic_tx_mutex);
2765 
2766         while (so_conn->ic_tx_thread_running) {
2767                 while (list_is_empty(&so_conn->ic_tx_list)) {
2768                         DTRACE_PROBE1(soconn__tx__sleep, idm_conn_t *, ic);
2769                         cv_wait(&so_conn->ic_tx_cv, &so_conn->ic_tx_mutex);
2770                         DTRACE_PROBE1(soconn__tx__wakeup, idm_conn_t *, ic);
2771 
2772                         if (!so_conn->ic_tx_thread_running) {
2773                                 goto tx_bail;
2774                         }
2775                 }
2776 
2777                 object = (idm_tx_obj_t *)list_head(&so_conn->ic_tx_list);
2778                 list_remove(&so_conn->ic_tx_list, object);
2779                 mutex_exit(&so_conn->ic_tx_mutex);
2780 
2781                 switch (object->idm_tx_obj_magic) {
2782                 case IDM_PDU_MAGIC: {
2783                         idm_pdu_t *pdu = (idm_pdu_t *)object;
2784                         DTRACE_PROBE2(soconn__tx__pdu, idm_conn_t *, ic,
2785                             idm_pdu_t *, (idm_pdu_t *)object);
2786 
2787                         if (pdu->isp_flags & IDM_PDU_SET_STATSN) {
2788                                 /* No IDM task */
2789                                 (ic->ic_conn_ops.icb_update_statsn)(NULL, pdu);
2790                         }
2791                         status = idm_i_so_tx((idm_pdu_t *)object);
2792                         break;
2793                 }
2794                 case IDM_BUF_MAGIC: {
2795                         idm_buf_t *idb = (idm_buf_t *)object;
2796                         idm_task_t *idt = idb->idb_task_binding;
2797 
2798                         DTRACE_PROBE2(soconn__tx__buf, idm_conn_t *, ic,
2799                             idm_buf_t *, idb);
2800 
2801                         mutex_enter(&idt->idt_mutex);
2802                         status = idm_so_send_buf_region(idt,
2803                             idb, 0, idb->idb_xfer_len);
2804 
2805                         /*
2806                          * TX thread owns the buffer so we expect it to
2807                          * be "in transport"
2808                          */
2809                         ASSERT(idb->idb_in_transport);
2810                         if (IDM_CONN_ISTGT(ic)) {
2811                                 /*
2812                                  * idm_buf_tx_to_ini_done releases
2813                                  * idt->idt_mutex
2814                                  */
2815                                 DTRACE_ISCSI_8(xfer__done,
2816                                     idm_conn_t *, idt->idt_ic,
2817                                     uintptr_t, idb->idb_buf,
2818                                     uint32_t, idb->idb_bufoffset,
2819                                     uint64_t, 0, uint32_t, 0, uint32_t, 0,
2820                                     uint32_t, idb->idb_xfer_len,
2821                                     int, XFER_BUF_TX_TO_INI);
2822                                 idm_buf_tx_to_ini_done(idt, idb, status);
2823                         } else {
2824                                 idm_so_send_rtt_data_done(idt, idb);
2825                                 mutex_exit(&idt->idt_mutex);
2826                         }
2827                         break;
2828                 }
2829 
2830                 default:
2831                         IDM_CONN_LOG(CE_WARN, "idm_sotx_thread: Unknown magic "
2832                             "(0x%08x)", object->idm_tx_obj_magic);
2833                         status = IDM_STATUS_FAIL;
2834                 }
2835 
2836                 mutex_enter(&so_conn->ic_tx_mutex);
2837 
2838                 if (status != IDM_STATUS_SUCCESS) {
2839                         so_conn->ic_tx_thread_running = B_FALSE;
2840                         idm_conn_event(ic, CE_TRANSPORT_FAIL, status);
2841                 }
2842         }
2843 
2844         /*
2845          * Before we leave, we need to abort every item remaining in the
2846          * TX list.
2847          */
2848 
2849 tx_bail:
2850         object = (idm_tx_obj_t *)list_head(&so_conn->ic_tx_list);
2851 
2852         while (object != NULL) {
2853                 next = list_next(&so_conn->ic_tx_list, object);
2854 
2855                 list_remove(&so_conn->ic_tx_list, object);
2856                 switch (object->idm_tx_obj_magic) {
2857                 case IDM_PDU_MAGIC:
2858                         idm_pdu_complete((idm_pdu_t *)object,
2859                             IDM_STATUS_ABORTED);
2860                         break;
2861 
2862                 case IDM_BUF_MAGIC: {
2863                         idm_buf_t *idb = (idm_buf_t *)object;
2864                         idm_task_t *idt = idb->idb_task_binding;
2865                         mutex_exit(&so_conn->ic_tx_mutex);
2866                         mutex_enter(&idt->idt_mutex);
2867                         /*
2868                          * TX thread owns the buffer so we expect it to
2869                          * be "in transport"
2870                          */
2871                         ASSERT(idb->idb_in_transport);
2872                         if (IDM_CONN_ISTGT(ic)) {
2873                                 /*
2874                                  * idm_buf_tx_to_ini_done releases
2875                                  * idt->idt_mutex
2876                                  */
2877                                 DTRACE_ISCSI_8(xfer__done,
2878                                     idm_conn_t *, idt->idt_ic,
2879                                     uintptr_t, idb->idb_buf,
2880                                     uint32_t, idb->idb_bufoffset,
2881                                     uint64_t, 0, uint32_t, 0, uint32_t, 0,
2882                                     uint32_t, idb->idb_xfer_len,
2883                                     int, XFER_BUF_TX_TO_INI);
2884                                 idm_buf_tx_to_ini_done(idt, idb,
2885                                     IDM_STATUS_ABORTED);
2886                         } else {
2887                                 idm_so_send_rtt_data_done(idt, idb);
2888                                 mutex_exit(&idt->idt_mutex);
2889                         }
2890                         mutex_enter(&so_conn->ic_tx_mutex);
2891                         break;
2892                 }
2893                 default:
2894                         IDM_CONN_LOG(CE_WARN,
2895                             "idm_sotx_thread: Unexpected magic "
2896                             "(0x%08x)", object->idm_tx_obj_magic);
2897                 }
2898 
2899                 object = next;
2900         }
2901 
2902         mutex_exit(&so_conn->ic_tx_mutex);
2903         idm_conn_rele(ic);
2904         thread_exit();
2905         /*NOTREACHED*/
2906 }
2907 
2908 static void
2909 idm_so_socket_set_nonblock(struct sonode *node)
2910 {
2911         (void) VOP_SETFL(node->so_vnode, node->so_flag,
2912             (node->so_state | FNONBLOCK), CRED(), NULL);
2913 }
2914 
2915 static void
2916 idm_so_socket_set_block(struct sonode *node)
2917 {
2918         (void) VOP_SETFL(node->so_vnode, node->so_flag,
2919             (node->so_state & (~FNONBLOCK)), CRED(), NULL);
2920 }
2921 
2922 
2923 /*
2924  * Called by kernel sockets when the connection has been accepted or
2925  * rejected. In early volo, a "disconnect" callback was sent instead of
2926  * "connectfailed", so we check for both.
2927  */
2928 /* ARGSUSED */
2929 void
2930 idm_so_timed_socket_connect_cb(ksocket_t ks,
2931     ksocket_callback_event_t ev, void *arg, uintptr_t info)
2932 {
2933         idm_so_timed_socket_t   *itp = arg;
2934         ASSERT(itp != NULL);
2935         ASSERT(ev == KSOCKET_EV_CONNECTED ||
2936             ev == KSOCKET_EV_CONNECTFAILED ||
2937             ev == KSOCKET_EV_DISCONNECTED);
2938 
2939         mutex_enter(&idm_so_timed_socket_mutex);
2940         itp->it_callback_called = B_TRUE;
2941         if (ev == KSOCKET_EV_CONNECTED) {
2942                 itp->it_socket_error_code = 0;
2943         } else {
2944                 /* Make sure the error code is non-zero on error */
2945                 if (info == 0)
2946                         info = ECONNRESET;
2947                 itp->it_socket_error_code = (int)info;
2948         }
2949         cv_signal(&itp->it_cv);
2950         mutex_exit(&idm_so_timed_socket_mutex);
2951 }
2952 
2953 int
2954 idm_so_timed_socket_connect(ksocket_t ks,
2955     struct sockaddr_storage *sa, int sa_sz, int login_max_usec)
2956 {
2957         clock_t                 conn_login_max;
2958         int                     rc, nonblocking, rval;
2959         idm_so_timed_socket_t   it;
2960         ksocket_callbacks_t     ks_cb;
2961 
2962         conn_login_max = ddi_get_lbolt() + drv_usectohz(login_max_usec);
2963 
2964         /*
2965          * Set to non-block socket mode, with callback on connect
2966          * Early volo used "disconnected" instead of "connectfailed",
2967          * so set callback to look for both.
2968          */
2969         bzero(&it, sizeof (it));
2970         ks_cb.ksock_cb_flags = KSOCKET_CB_CONNECTED |
2971             KSOCKET_CB_CONNECTFAILED | KSOCKET_CB_DISCONNECTED;
2972         ks_cb.ksock_cb_connected = idm_so_timed_socket_connect_cb;
2973         ks_cb.ksock_cb_connectfailed = idm_so_timed_socket_connect_cb;
2974         ks_cb.ksock_cb_disconnected = idm_so_timed_socket_connect_cb;
2975         cv_init(&it.it_cv, NULL, CV_DEFAULT, NULL);
2976         rc = ksocket_setcallbacks(ks, &ks_cb, &it, CRED());
2977         if (rc != 0)
2978                 return (rc);
2979 
2980         /* Set to non-blocking mode */
2981         nonblocking = 1;
2982         rc = ksocket_ioctl(ks, FIONBIO, (intptr_t)&nonblocking, &rval,
2983             CRED());
2984         if (rc != 0)
2985                 goto cleanup;
2986 
2987         bzero(&it, sizeof (it));
2988         for (;;) {
2989                 /*
2990                  * Warning -- in a loopback scenario, the call to
2991                  * the connect_cb can occur inside the call to
2992                  * ksocket_connect. Do not hold the mutex around the
2993                  * call to ksocket_connect.
2994                  */
2995                 rc = ksocket_connect(ks, (struct sockaddr *)sa, sa_sz, CRED());
2996                 if (rc == 0 || rc == EISCONN) {
2997                         /* socket success or already success */
2998                         rc = 0;
2999                         break;
3000                 }
3001                 if ((rc != EINPROGRESS) && (rc != EALREADY)) {
3002                         break;
3003                 }
3004 
3005                 /* TCP connect still in progress. See if out of time. */
3006                 if (ddi_get_lbolt() > conn_login_max) {
3007                         /*
3008                          * Connection retry timeout,
3009                          * failed connect to target.
3010                          */
3011                         rc = ETIMEDOUT;
3012                         break;
3013                 }
3014 
3015                 /*
3016                  * TCP connect still in progress.  Sleep until callback.
3017                  * Do NOT go to sleep if the callback already occurred!
3018                  */
3019                 mutex_enter(&idm_so_timed_socket_mutex);
3020                 if (!it.it_callback_called) {
3021                         (void) cv_timedwait(&it.it_cv,
3022                             &idm_so_timed_socket_mutex, conn_login_max);
3023                 }
3024                 if (it.it_callback_called) {
3025                         rc = it.it_socket_error_code;
3026                         mutex_exit(&idm_so_timed_socket_mutex);
3027                         break;
3028                 }
3029                 /* If timer expires, go call ksocket_connect one last time. */
3030                 mutex_exit(&idm_so_timed_socket_mutex);
3031         }
3032 
3033         /* resume blocking mode */
3034         nonblocking = 0;
3035         (void) ksocket_ioctl(ks, FIONBIO, (intptr_t)&nonblocking, &rval,
3036             CRED());
3037 cleanup:
3038         (void) ksocket_setcallbacks(ks, NULL, NULL, CRED());
3039         cv_destroy(&it.it_cv);
3040         if (rc != 0) {
3041                 idm_soshutdown(ks);
3042         }
3043         return (rc);
3044 }
3045 
3046 
3047 void
3048 idm_addr_to_sa(idm_addr_t *dportal, struct sockaddr_storage *sa)
3049 {
3050         int                     dp_addr_size;
3051         struct sockaddr_in      *sin;
3052         struct sockaddr_in6     *sin6;
3053 
3054         /* Build sockaddr_storage for this portal (idm_addr_t) */
3055         bzero(sa, sizeof (*sa));
3056         dp_addr_size = dportal->a_addr.i_insize;
3057         if (dp_addr_size == sizeof (struct in_addr)) {
3058                 /* IPv4 */
3059                 sa->ss_family = AF_INET;
3060                 sin = (struct sockaddr_in *)sa;
3061                 sin->sin_port = htons(dportal->a_port);
3062                 bcopy(&dportal->a_addr.i_addr.in4,
3063                     &sin->sin_addr, sizeof (struct in_addr));
3064         } else if (dp_addr_size == sizeof (struct in6_addr)) {
3065                 /* IPv6 */
3066                 sa->ss_family = AF_INET6;
3067                 sin6 = (struct sockaddr_in6 *)sa;
3068                 sin6->sin6_port = htons(dportal->a_port);
3069                 bcopy(&dportal->a_addr.i_addr.in6,
3070                     &sin6->sin6_addr, sizeof (struct in6_addr));
3071         } else {
3072                 ASSERT(0);
3073         }
3074 }
3075 
3076 
3077 /*
3078  * return a human-readable form of a sockaddr_storage, in the form
3079  * [ip-address]:port.  This is used in calls to logging functions.
3080  * If several calls to idm_sa_ntop are made within the same invocation
3081  * of a logging function, then each one needs its own buf.
3082  */
3083 const char *
3084 idm_sa_ntop(const struct sockaddr_storage *sa,
3085     char *buf, size_t size)
3086 {
3087         static const char bogus_ip[] = "[0].-1";
3088         char tmp[INET6_ADDRSTRLEN];
3089 
3090         switch (sa->ss_family) {
3091         case AF_INET6:
3092                 {
3093                         const struct sockaddr_in6 *in6 =
3094                             (const struct sockaddr_in6 *) sa;
3095 
3096                         if (inet_ntop(in6->sin6_family,
3097                             &in6->sin6_addr, tmp, sizeof (tmp)) == NULL) {
3098                                 goto err;
3099                         }
3100                         if (strlen(tmp) + sizeof ("[].65535") > size) {
3101                                 goto err;
3102                         }
3103                         /* struct sockaddr_storage gets port info from v4 loc */
3104                         (void) snprintf(buf, size, "[%s].%u", tmp,
3105                             ntohs(in6->sin6_port));
3106                         return (buf);
3107                 }
3108         case AF_INET:
3109                 {
3110                         const struct sockaddr_in *in =
3111                             (const struct sockaddr_in *) sa;
3112 
3113                         if (inet_ntop(in->sin_family, &in->sin_addr,
3114                             tmp, sizeof (tmp)) == NULL) {
3115                                 goto err;
3116                         }
3117                         if (strlen(tmp) + sizeof ("[].65535") > size) {
3118                                 goto err;
3119                         }
3120                         (void) snprintf(buf, size,  "[%s].%u", tmp,
3121                             ntohs(in->sin_port));
3122                         return (buf);
3123                 }
3124         default:
3125                 break;
3126         }
3127 err:
3128         (void) snprintf(buf, size, "%s", bogus_ip);
3129         return (buf);
3130 }