1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
  23  */
  24 
  25 #include <sys/cpuvar.h>
  26 #include <sys/conf.h>
  27 #include <sys/file.h>
  28 #include <sys/ddi.h>
  29 #include <sys/sunddi.h>
  30 #include <sys/modctl.h>
  31 
  32 #include <sys/socket.h>
  33 #include <sys/strsubr.h>
  34 #include <sys/sysmacros.h>
  35 
  36 #include <sys/socketvar.h>
  37 #include <netinet/in.h>
  38 
  39 #include <sys/idm/idm.h>
  40 #include <sys/idm/idm_so.h>
  41 
  42 #define IDM_NAME_VERSION        "iSCSI Data Mover"
  43 
  44 extern struct mod_ops mod_miscops;
  45 extern struct mod_ops mod_miscops;
  46 
  47 static struct modlmisc modlmisc = {
  48         &mod_miscops,       /* Type of module */
  49         IDM_NAME_VERSION
  50 };
  51 
  52 static struct modlinkage modlinkage = {
  53         MODREV_1, (void *)&modlmisc, NULL
  54 };
  55 
  56 extern void idm_wd_thread(void *arg);
  57 
  58 static int _idm_init(void);
  59 static int _idm_fini(void);
  60 static void idm_buf_bind_in_locked(idm_task_t *idt, idm_buf_t *buf);
  61 static void idm_buf_bind_out_locked(idm_task_t *idt, idm_buf_t *buf);
  62 static void idm_buf_unbind_in_locked(idm_task_t *idt, idm_buf_t *buf);
  63 static void idm_buf_unbind_out_locked(idm_task_t *idt, idm_buf_t *buf);
  64 static void idm_task_abort_one(idm_conn_t *ic, idm_task_t *idt,
  65     idm_abort_type_t abort_type);
  66 static void idm_task_aborted(idm_task_t *idt, idm_status_t status);
  67 static idm_pdu_t *idm_pdu_alloc_common(uint_t hdrlen, uint_t datalen,
  68     int sleepflag);
  69 
  70 boolean_t idm_conn_logging = 0;
  71 boolean_t idm_svc_logging = 0;
  72 #ifdef DEBUG
  73 boolean_t idm_pattern_checking = 1;
  74 #else
  75 boolean_t idm_pattern_checking = 0;
  76 #endif
  77 
  78 /*
  79  * Potential tuneable for the maximum number of tasks.  Default to
  80  * IDM_TASKIDS_MAX
  81  */
  82 
  83 uint32_t        idm_max_taskids = IDM_TASKIDS_MAX;
  84 
  85 /*
  86  * Global list of transport handles
  87  *   These are listed in preferential order, so we can simply take the
  88  *   first "it_conn_is_capable" hit. Note also that the order maps to
  89  *   the order of the idm_transport_type_t list.
  90  */
  91 idm_transport_t idm_transport_list[] = {
  92 
  93         /* iSER on InfiniBand transport handle */
  94         {IDM_TRANSPORT_TYPE_ISER,       /* type */
  95         "/devices/ib/iser@0:iser",      /* device path */
  96         NULL,                           /* LDI handle */
  97         NULL,                           /* transport ops */
  98         NULL},                          /* transport caps */
  99 
 100         /* IDM native sockets transport handle */
 101         {IDM_TRANSPORT_TYPE_SOCKETS,    /* type */
 102         NULL,                           /* device path */
 103         NULL,                           /* LDI handle */
 104         NULL,                           /* transport ops */
 105         NULL}                           /* transport caps */
 106 
 107 };
 108 
 109 int
 110 _init(void)
 111 {
 112         int rc;
 113 
 114         if ((rc = _idm_init()) != 0) {
 115                 return (rc);
 116         }
 117 
 118         return (mod_install(&modlinkage));
 119 }
 120 
 121 int
 122 _fini(void)
 123 {
 124         int rc;
 125 
 126         if ((rc = _idm_fini()) != 0) {
 127                 return (rc);
 128         }
 129 
 130         if ((rc = mod_remove(&modlinkage)) != 0) {
 131                 return (rc);
 132         }
 133 
 134         return (rc);
 135 }
 136 
 137 int
 138 _info(struct modinfo *modinfop)
 139 {
 140         return (mod_info(&modlinkage, modinfop));
 141 }
 142 
 143 /*
 144  * idm_transport_register()
 145  *
 146  * Provides a mechanism for an IDM transport driver to register its
 147  * transport ops and caps with the IDM kernel module. Invoked during
 148  * a transport driver's attach routine.
 149  */
 150 idm_status_t
 151 idm_transport_register(idm_transport_attr_t *attr)
 152 {
 153         ASSERT(attr->it_ops != NULL);
 154         ASSERT(attr->it_caps != NULL);
 155 
 156         switch (attr->type) {
 157         /* All known non-native transports here; for now, iSER */
 158         case IDM_TRANSPORT_TYPE_ISER:
 159                 idm_transport_list[attr->type].it_ops        = attr->it_ops;
 160                 idm_transport_list[attr->type].it_caps       = attr->it_caps;
 161                 return (IDM_STATUS_SUCCESS);
 162 
 163         default:
 164                 cmn_err(CE_NOTE, "idm: unknown transport type (0x%x) in "
 165                     "idm_transport_register", attr->type);
 166                 return (IDM_STATUS_SUCCESS);
 167         }
 168 }
 169 
 170 /*
 171  * idm_ini_conn_create
 172  *
 173  * This function is invoked by the iSCSI layer to create a connection context.
 174  * This does not actually establish the socket connection.
 175  *
 176  * cr - Connection request parameters
 177  * new_con - Output parameter that contains the new request if successful
 178  *
 179  */
 180 idm_status_t
 181 idm_ini_conn_create(idm_conn_req_t *cr, idm_conn_t **new_con)
 182 {
 183         idm_transport_t         *it;
 184         idm_conn_t              *ic;
 185         int                     rc;
 186 
 187         it = idm_transport_lookup(cr);
 188 
 189 retry:
 190         ic = idm_conn_create_common(CONN_TYPE_INI, it->it_type,
 191             &cr->icr_conn_ops);
 192 
 193         bcopy(&cr->cr_ini_dst_addr, &ic->ic_ini_dst_addr,
 194             sizeof (cr->cr_ini_dst_addr));
 195 
 196         /* create the transport-specific connection components */
 197         rc = it->it_ops->it_ini_conn_create(cr, ic);
 198         if (rc != IDM_STATUS_SUCCESS) {
 199                 /* cleanup the failed connection */
 200                 idm_conn_destroy_common(ic);
 201 
 202                 /*
 203                  * It is possible for an IB client to connect to
 204                  * an ethernet-only client via an IB-eth gateway.
 205                  * Therefore, if we are attempting to use iSER and
 206                  * fail, retry with sockets before ultimately
 207                  * failing the connection.
 208                  */
 209                 if (it->it_type == IDM_TRANSPORT_TYPE_ISER) {
 210                         it = &idm_transport_list[IDM_TRANSPORT_TYPE_SOCKETS];
 211                         goto retry;
 212                 }
 213 
 214                 return (IDM_STATUS_FAIL);
 215         }
 216 
 217         *new_con = ic;
 218 
 219         mutex_enter(&idm.idm_global_mutex);
 220         list_insert_tail(&idm.idm_ini_conn_list, ic);
 221         mutex_exit(&idm.idm_global_mutex);
 222 
 223         return (IDM_STATUS_SUCCESS);
 224 }
 225 
 226 /*
 227  * idm_ini_conn_destroy
 228  *
 229  * Releases any resources associated with the connection.  This is the
 230  * complement to idm_ini_conn_create.
 231  * ic - idm_conn_t structure representing the relevant connection
 232  *
 233  */
 234 void
 235 idm_ini_conn_destroy_task(void *ic_void)
 236 {
 237         idm_conn_t *ic = ic_void;
 238 
 239         ic->ic_transport_ops->it_ini_conn_destroy(ic);
 240         idm_conn_destroy_common(ic);
 241 }
 242 
 243 void
 244 idm_ini_conn_destroy(idm_conn_t *ic)
 245 {
 246         /*
 247          * It's reasonable for the initiator to call idm_ini_conn_destroy
 248          * from within the context of the CN_CONNECT_DESTROY notification.
 249          * That's a problem since we want to destroy the taskq for the
 250          * state machine associated with the connection.  Remove the
 251          * connection from the list right away then handle the remaining
 252          * work via the idm_global_taskq.
 253          */
 254         mutex_enter(&idm.idm_global_mutex);
 255         list_remove(&idm.idm_ini_conn_list, ic);
 256         mutex_exit(&idm.idm_global_mutex);
 257 
 258         if (taskq_dispatch(idm.idm_global_taskq,
 259             &idm_ini_conn_destroy_task, ic, TQ_SLEEP) == NULL) {
 260                 cmn_err(CE_WARN,
 261                     "idm_ini_conn_destroy: Couldn't dispatch task");
 262         }
 263 }
 264 
 265 /*
 266  * idm_ini_conn_connect
 267  *
 268  * Establish connection to the remote system identified in idm_conn_t.
 269  * The connection parameters including the remote IP address were established
 270  * in the call to idm_ini_conn_create.  The IDM state machine will
 271  * perform client notifications as necessary to prompt the initiator through
 272  * the login process.  IDM also keeps a timer running so that if the login
 273  * process doesn't complete in a timely manner it will fail.
 274  *
 275  * ic - idm_conn_t structure representing the relevant connection
 276  *
 277  * Returns success if the connection was established, otherwise some kind
 278  * of meaningful error code.
 279  *
 280  * Upon return the login has either failed or is loggin in (ffp)
 281  */
 282 idm_status_t
 283 idm_ini_conn_connect(idm_conn_t *ic)
 284 {
 285         idm_status_t    rc;
 286 
 287         rc = idm_conn_sm_init(ic);
 288         if (rc != IDM_STATUS_SUCCESS) {
 289                 return (ic->ic_conn_sm_status);
 290         }
 291 
 292         /* Hold connection until we return */
 293         idm_conn_hold(ic);
 294 
 295         /* Kick state machine */
 296         idm_conn_event(ic, CE_CONNECT_REQ, NULL);
 297 
 298         /* Wait for login flag */
 299         mutex_enter(&ic->ic_state_mutex);
 300         while (!(ic->ic_state_flags & CF_LOGIN_READY) &&
 301             !(ic->ic_state_flags & CF_ERROR)) {
 302                 cv_wait(&ic->ic_state_cv, &ic->ic_state_mutex);
 303         }
 304 
 305         /*
 306          * The CN_READY_TO_LOGIN and/or the CN_CONNECT_FAIL call to
 307          * idm_notify_client has already been generated by the idm conn
 308          * state machine.  If connection fails any time after this
 309          * check, we will detect it in iscsi_login.
 310          */
 311         if (ic->ic_state_flags & CF_ERROR) {
 312                 rc = ic->ic_conn_sm_status;
 313         }
 314         mutex_exit(&ic->ic_state_mutex);
 315         idm_conn_rele(ic);
 316 
 317         return (rc);
 318 }
 319 
 320 /*
 321  * idm_ini_conn_disconnect
 322  *
 323  * Forces a connection (previously established using idm_ini_conn_connect)
 324  * to perform a controlled shutdown, cleaning up any outstanding requests.
 325  *
 326  * ic - idm_conn_t structure representing the relevant connection
 327  *
 328  * This is asynchronous and will return before the connection is properly
 329  * shutdown
 330  */
 331 /* ARGSUSED */
 332 void
 333 idm_ini_conn_disconnect(idm_conn_t *ic)
 334 {
 335         idm_conn_event(ic, CE_TRANSPORT_FAIL, NULL);
 336 }
 337 
 338 /*
 339  * idm_ini_conn_disconnect_wait
 340  *
 341  * Forces a connection (previously established using idm_ini_conn_connect)
 342  * to perform a controlled shutdown.  Blocks until the connection is
 343  * disconnected.
 344  *
 345  * ic - idm_conn_t structure representing the relevant connection
 346  */
 347 /* ARGSUSED */
 348 void
 349 idm_ini_conn_disconnect_sync(idm_conn_t *ic)
 350 {
 351         mutex_enter(&ic->ic_state_mutex);
 352         if ((ic->ic_state != CS_S9_INIT_ERROR) &&
 353             (ic->ic_state != CS_S11_COMPLETE)) {
 354                 idm_conn_event_locked(ic, CE_TRANSPORT_FAIL, NULL, CT_NONE);
 355                 while ((ic->ic_state != CS_S9_INIT_ERROR) &&
 356                     (ic->ic_state != CS_S11_COMPLETE))
 357                         cv_wait(&ic->ic_state_cv, &ic->ic_state_mutex);
 358         }
 359         mutex_exit(&ic->ic_state_mutex);
 360 }
 361 
 362 /*
 363  * idm_tgt_svc_create
 364  *
 365  * The target calls this service to obtain a service context for each available
 366  * transport, starting a service of each type related to the IP address and port
 367  * passed. The idm_svc_req_t contains the service parameters.
 368  */
 369 idm_status_t
 370 idm_tgt_svc_create(idm_svc_req_t *sr, idm_svc_t **new_svc)
 371 {
 372         idm_transport_type_t    type;
 373         idm_transport_t         *it;
 374         idm_svc_t               *is;
 375         int                     rc;
 376 
 377         *new_svc = NULL;
 378         is = kmem_zalloc(sizeof (idm_svc_t), KM_SLEEP);
 379 
 380         /* Initialize transport-agnostic components of the service handle */
 381         is->is_svc_req = *sr;
 382         mutex_init(&is->is_mutex, NULL, MUTEX_DEFAULT, NULL);
 383         cv_init(&is->is_cv, NULL, CV_DEFAULT, NULL);
 384         mutex_init(&is->is_count_mutex, NULL, MUTEX_DEFAULT, NULL);
 385         cv_init(&is->is_count_cv, NULL, CV_DEFAULT, NULL);
 386         idm_refcnt_init(&is->is_refcnt, is);
 387 
 388         /*
 389          * Make sure all available transports are setup.  We call this now
 390          * instead of at initialization time in case IB has become available
 391          * since we started (hotplug, etc).
 392          */
 393         idm_transport_setup(sr->sr_li, B_FALSE);
 394 
 395         /*
 396          * Loop through the transports, configuring the transport-specific
 397          * components of each one.
 398          */
 399         for (type = 0; type < IDM_TRANSPORT_NUM_TYPES; type++) {
 400 
 401                 it = &idm_transport_list[type];
 402                 /*
 403                  * If it_ops is NULL then the transport is unconfigured
 404                  * and we shouldn't try to start the service.
 405                  */
 406                 if (it->it_ops == NULL) {
 407                         continue;
 408                 }
 409 
 410                 rc = it->it_ops->it_tgt_svc_create(sr, is);
 411                 if (rc != IDM_STATUS_SUCCESS) {
 412                         /* Teardown any configured services */
 413                         while (type--) {
 414                                 it = &idm_transport_list[type];
 415                                 if (it->it_ops == NULL) {
 416                                         continue;
 417                                 }
 418                                 it->it_ops->it_tgt_svc_destroy(is);
 419                         }
 420                         /* Free the svc context and return */
 421                         kmem_free(is, sizeof (idm_svc_t));
 422                         return (rc);
 423                 }
 424         }
 425 
 426         *new_svc = is;
 427 
 428         mutex_enter(&idm.idm_global_mutex);
 429         list_insert_tail(&idm.idm_tgt_svc_list, is);
 430         mutex_exit(&idm.idm_global_mutex);
 431 
 432         return (IDM_STATUS_SUCCESS);
 433 }
 434 
 435 /*
 436  * idm_tgt_svc_destroy
 437  *
 438  * is - idm_svc_t returned by the call to idm_tgt_svc_create
 439  *
 440  * Cleanup any resources associated with the idm_svc_t.
 441  */
 442 void
 443 idm_tgt_svc_destroy(idm_svc_t *is)
 444 {
 445         idm_transport_type_t    type;
 446         idm_transport_t         *it;
 447 
 448         mutex_enter(&idm.idm_global_mutex);
 449         /* remove this service from the global list */
 450         list_remove(&idm.idm_tgt_svc_list, is);
 451         /* wakeup any waiters for service change */
 452         cv_broadcast(&idm.idm_tgt_svc_cv);
 453         mutex_exit(&idm.idm_global_mutex);
 454 
 455         /* teardown each transport-specific service */
 456         for (type = 0; type < IDM_TRANSPORT_NUM_TYPES; type++) {
 457                 it = &idm_transport_list[type];
 458                 if (it->it_ops == NULL) {
 459                         continue;
 460                 }
 461 
 462                 it->it_ops->it_tgt_svc_destroy(is);
 463         }
 464 
 465         /* tear down the svc resources */
 466         idm_refcnt_destroy(&is->is_refcnt);
 467         cv_destroy(&is->is_count_cv);
 468         mutex_destroy(&is->is_count_mutex);
 469         cv_destroy(&is->is_cv);
 470         mutex_destroy(&is->is_mutex);
 471 
 472         /* free the svc handle */
 473         kmem_free(is, sizeof (idm_svc_t));
 474 }
 475 
 476 void
 477 idm_tgt_svc_hold(idm_svc_t *is)
 478 {
 479         idm_refcnt_hold(&is->is_refcnt);
 480 }
 481 
 482 void
 483 idm_tgt_svc_rele_and_destroy(idm_svc_t *is)
 484 {
 485         idm_refcnt_rele_and_destroy(&is->is_refcnt,
 486             (idm_refcnt_cb_t *)&idm_tgt_svc_destroy);
 487 }
 488 
 489 /*
 490  * idm_tgt_svc_online
 491  *
 492  * is - idm_svc_t returned by the call to idm_tgt_svc_create
 493  *
 494  * Online each transport service, as we want this target to be accessible
 495  * via any configured transport.
 496  *
 497  * When the initiator establishes a new connection to the target, IDM will
 498  * call the "new connect" callback defined in the idm_svc_req_t structure
 499  * and it will pass an idm_conn_t structure representing that new connection.
 500  */
 501 idm_status_t
 502 idm_tgt_svc_online(idm_svc_t *is)
 503 {
 504 
 505         idm_transport_type_t    type, last_type;
 506         idm_transport_t         *it;
 507         int                     rc = IDM_STATUS_SUCCESS;
 508 
 509         mutex_enter(&is->is_mutex);
 510         if (is->is_online == 0) {
 511                 /* Walk through each of the transports and online them */
 512                 for (type = 0; type < IDM_TRANSPORT_NUM_TYPES; type++) {
 513                         it = &idm_transport_list[type];
 514                         if (it->it_ops == NULL) {
 515                                 /* transport is not registered */
 516                                 continue;
 517                         }
 518 
 519                         mutex_exit(&is->is_mutex);
 520                         rc = it->it_ops->it_tgt_svc_online(is);
 521                         mutex_enter(&is->is_mutex);
 522                         if (rc != IDM_STATUS_SUCCESS) {
 523                                 last_type = type;
 524                                 break;
 525                         }
 526                 }
 527                 if (rc != IDM_STATUS_SUCCESS) {
 528                         /*
 529                          * The last transport failed to online.
 530                          * Offline any transport onlined above and
 531                          * do not online the target.
 532                          */
 533                         for (type = 0; type < last_type; type++) {
 534                                 it = &idm_transport_list[type];
 535                                 if (it->it_ops == NULL) {
 536                                         /* transport is not registered */
 537                                         continue;
 538                                 }
 539 
 540                                 mutex_exit(&is->is_mutex);
 541                                 it->it_ops->it_tgt_svc_offline(is);
 542                                 mutex_enter(&is->is_mutex);
 543                         }
 544                 } else {
 545                         /* Target service now online */
 546                         is->is_online = 1;
 547                 }
 548         } else {
 549                 /* Target service already online, just bump the count */
 550                 is->is_online++;
 551         }
 552         mutex_exit(&is->is_mutex);
 553 
 554         return (rc);
 555 }
 556 
 557 /*
 558  * idm_tgt_svc_offline
 559  *
 560  * is - idm_svc_t returned by the call to idm_tgt_svc_create
 561  *
 562  * Shutdown any online target services.
 563  */
 564 void
 565 idm_tgt_svc_offline(idm_svc_t *is)
 566 {
 567         idm_transport_type_t    type;
 568         idm_transport_t         *it;
 569 
 570         mutex_enter(&is->is_mutex);
 571         is->is_online--;
 572         if (is->is_online == 0) {
 573                 /* Walk through each of the transports and offline them */
 574                 for (type = 0; type < IDM_TRANSPORT_NUM_TYPES; type++) {
 575                         it = &idm_transport_list[type];
 576                         if (it->it_ops == NULL) {
 577                                 /* transport is not registered */
 578                                 continue;
 579                         }
 580 
 581                         mutex_exit(&is->is_mutex);
 582                         it->it_ops->it_tgt_svc_offline(is);
 583                         mutex_enter(&is->is_mutex);
 584                 }
 585         }
 586         mutex_exit(&is->is_mutex);
 587 }
 588 
 589 /*
 590  * idm_tgt_svc_lookup
 591  *
 592  * Lookup a service instance listening on the specified port
 593  */
 594 
 595 idm_svc_t *
 596 idm_tgt_svc_lookup(uint16_t port)
 597 {
 598         idm_svc_t *result;
 599 
 600 retry:
 601         mutex_enter(&idm.idm_global_mutex);
 602         for (result = list_head(&idm.idm_tgt_svc_list);
 603             result != NULL;
 604             result = list_next(&idm.idm_tgt_svc_list, result)) {
 605                 if (result->is_svc_req.sr_port == port) {
 606                         if (result->is_online == 0) {
 607                                 /*
 608                                  * A service exists on this port, but it
 609                                  * is going away, wait for it to cleanup.
 610                                  */
 611                                 cv_wait(&idm.idm_tgt_svc_cv,
 612                                     &idm.idm_global_mutex);
 613                                 mutex_exit(&idm.idm_global_mutex);
 614                                 goto retry;
 615                         }
 616                         idm_tgt_svc_hold(result);
 617                         mutex_exit(&idm.idm_global_mutex);
 618                         return (result);
 619                 }
 620         }
 621         mutex_exit(&idm.idm_global_mutex);
 622 
 623         return (NULL);
 624 }
 625 
 626 /*
 627  * idm_negotiate_key_values()
 628  * Give IDM level a chance to negotiate any login parameters it should own.
 629  *  -- leave unhandled parameters alone on request_nvl
 630  *  -- move all handled parameters to response_nvl with an appropriate response
 631  *  -- also add an entry to negotiated_nvl for any accepted parameters
 632  */
 633 kv_status_t
 634 idm_negotiate_key_values(idm_conn_t *ic, nvlist_t *request_nvl,
 635     nvlist_t *response_nvl, nvlist_t *negotiated_nvl)
 636 {
 637         ASSERT(ic->ic_transport_ops != NULL);
 638         return (ic->ic_transport_ops->it_negotiate_key_values(ic,
 639             request_nvl, response_nvl, negotiated_nvl));
 640 }
 641 
 642 /*
 643  * idm_notice_key_values()
 644  * Activate at the IDM level any parameters that have been negotiated.
 645  * Passes the set of key value pairs to the transport for activation.
 646  * This will be invoked as the connection is entering full-feature mode.
 647  */
 648 void
 649 idm_notice_key_values(idm_conn_t *ic, nvlist_t *negotiated_nvl)
 650 {
 651         ASSERT(ic->ic_transport_ops != NULL);
 652         ic->ic_transport_ops->it_notice_key_values(ic, negotiated_nvl);
 653 }
 654 
 655 /*
 656  * idm_declare_key_values()
 657  * Activate an operational set of declarative parameters from the config_nvl,
 658  * and return the selected values in the outgoing_nvl.
 659  */
 660 kv_status_t
 661 idm_declare_key_values(idm_conn_t *ic, nvlist_t *config_nvl,
 662     nvlist_t *outgoing_nvl)
 663 {
 664         ASSERT(ic->ic_transport_ops != NULL);
 665         return (ic->ic_transport_ops->it_declare_key_values(ic, config_nvl,
 666             outgoing_nvl));
 667 }
 668 
 669 /*
 670  * idm_buf_tx_to_ini
 671  *
 672  * This is IDM's implementation of the 'Put_Data' operational primitive.
 673  *
 674  * This function is invoked by a target iSCSI layer to request its local
 675  * Datamover layer to transmit the Data-In PDU to the peer iSCSI layer
 676  * on the remote iSCSI node. The I/O buffer represented by 'idb' is
 677  * transferred to the initiator associated with task 'idt'. The connection
 678  * info, contents of the Data-In PDU header, the DataDescriptorIn, BHS,
 679  * and the callback (idb->idb_buf_cb) at transfer completion are
 680  * provided as input.
 681  *
 682  * This data transfer takes place transparently to the remote iSCSI layer,
 683  * i.e. without its participation.
 684  *
 685  * Using sockets, IDM implements the data transfer by segmenting the data
 686  * buffer into appropriately sized iSCSI PDUs and transmitting them to the
 687  * initiator. iSER performs the transfer using RDMA write.
 688  *
 689  */
 690 idm_status_t
 691 idm_buf_tx_to_ini(idm_task_t *idt, idm_buf_t *idb,
 692     uint32_t offset, uint32_t xfer_len,
 693     idm_buf_cb_t idb_buf_cb, void *cb_arg)
 694 {
 695         idm_status_t rc;
 696 
 697         idb->idb_bufoffset = offset;
 698         idb->idb_xfer_len = xfer_len;
 699         idb->idb_buf_cb = idb_buf_cb;
 700         idb->idb_cb_arg = cb_arg;
 701         gethrestime(&idb->idb_xfer_start);
 702 
 703         /*
 704          * Buffer should not contain the pattern.  If the pattern is
 705          * present then we've been asked to transmit initialized data
 706          */
 707         IDM_BUFPAT_CHECK(idb, xfer_len, BP_CHECK_ASSERT);
 708 
 709         mutex_enter(&idt->idt_mutex);
 710         switch (idt->idt_state) {
 711         case TASK_ACTIVE:
 712                 idt->idt_tx_to_ini_start++;
 713                 idm_task_hold(idt);
 714                 idm_buf_bind_in_locked(idt, idb);
 715                 idb->idb_in_transport = B_TRUE;
 716                 rc = (*idt->idt_ic->ic_transport_ops->it_buf_tx_to_ini)
 717                     (idt, idb);
 718                 return (rc);
 719 
 720         case TASK_SUSPENDING:
 721         case TASK_SUSPENDED:
 722                 /*
 723                  * Bind buffer but don't start a transfer since the task
 724                  * is suspended
 725                  */
 726                 idm_buf_bind_in_locked(idt, idb);
 727                 mutex_exit(&idt->idt_mutex);
 728                 return (IDM_STATUS_SUCCESS);
 729 
 730         case TASK_ABORTING:
 731         case TASK_ABORTED:
 732                 /*
 733                  * Once the task is aborted, any buffers added to the
 734                  * idt_inbufv will never get cleaned up, so just return
 735                  * SUCCESS.  The buffer should get cleaned up by the
 736                  * client or framework once task_aborted has completed.
 737                  */
 738                 mutex_exit(&idt->idt_mutex);
 739                 return (IDM_STATUS_SUCCESS);
 740 
 741         default:
 742                 ASSERT(0);
 743                 break;
 744         }
 745         mutex_exit(&idt->idt_mutex);
 746 
 747         return (IDM_STATUS_FAIL);
 748 }
 749 
 750 /*
 751  * idm_buf_rx_from_ini
 752  *
 753  * This is IDM's implementation of the 'Get_Data' operational primitive.
 754  *
 755  * This function is invoked by a target iSCSI layer to request its local
 756  * Datamover layer to retrieve certain data identified by the R2T PDU from the
 757  * peer iSCSI layer on the remote node. The retrieved Data-Out PDU will be
 758  * mapped to the respective buffer by the task tags (ITT & TTT).
 759  * The connection information, contents of an R2T PDU, DataDescriptor, BHS, and
 760  * the callback (idb->idb_buf_cb) notification for data transfer completion are
 761  * are provided as input.
 762  *
 763  * When an iSCSI node sends an R2T PDU to its local Datamover layer, the local
 764  * Datamover layer, the local and remote Datamover layers transparently bring
 765  * about the data transfer requested by the R2T PDU, without the participation
 766  * of the iSCSI layers.
 767  *
 768  * Using sockets, IDM transmits an R2T PDU for each buffer and the rx_data_out()
 769  * assembles the Data-Out PDUs into the buffer. iSER uses RDMA read.
 770  *
 771  */
 772 idm_status_t
 773 idm_buf_rx_from_ini(idm_task_t *idt, idm_buf_t *idb,
 774     uint32_t offset, uint32_t xfer_len,
 775     idm_buf_cb_t idb_buf_cb, void *cb_arg)
 776 {
 777         idm_status_t rc;
 778 
 779         idb->idb_bufoffset = offset;
 780         idb->idb_xfer_len = xfer_len;
 781         idb->idb_buf_cb = idb_buf_cb;
 782         idb->idb_cb_arg = cb_arg;
 783         gethrestime(&idb->idb_xfer_start);
 784 
 785         /*
 786          * "In" buf list is for "Data In" PDU's, "Out" buf list is for
 787          * "Data Out" PDU's
 788          */
 789         mutex_enter(&idt->idt_mutex);
 790         switch (idt->idt_state) {
 791         case TASK_ACTIVE:
 792                 idt->idt_rx_from_ini_start++;
 793                 idm_task_hold(idt);
 794                 idm_buf_bind_out_locked(idt, idb);
 795                 idb->idb_in_transport = B_TRUE;
 796                 rc = (*idt->idt_ic->ic_transport_ops->it_buf_rx_from_ini)
 797                     (idt, idb);
 798                 return (rc);
 799         case TASK_SUSPENDING:
 800         case TASK_SUSPENDED:
 801         case TASK_ABORTING:
 802         case TASK_ABORTED:
 803                 /*
 804                  * Bind buffer but don't start a transfer since the task
 805                  * is suspended
 806                  */
 807                 idm_buf_bind_out_locked(idt, idb);
 808                 mutex_exit(&idt->idt_mutex);
 809                 return (IDM_STATUS_SUCCESS);
 810         default:
 811                 ASSERT(0);
 812                 break;
 813         }
 814         mutex_exit(&idt->idt_mutex);
 815 
 816         return (IDM_STATUS_FAIL);
 817 }
 818 
 819 /*
 820  * idm_buf_tx_to_ini_done
 821  *
 822  * The transport calls this after it has completed a transfer requested by
 823  * a call to transport_buf_tx_to_ini
 824  *
 825  * Caller holds idt->idt_mutex, idt->idt_mutex is released before returning.
 826  * idt may be freed after the call to idb->idb_buf_cb.
 827  */
 828 void
 829 idm_buf_tx_to_ini_done(idm_task_t *idt, idm_buf_t *idb, idm_status_t status)
 830 {
 831         ASSERT(mutex_owned(&idt->idt_mutex));
 832         idb->idb_in_transport = B_FALSE;
 833         idb->idb_tx_thread = B_FALSE;
 834         idt->idt_tx_to_ini_done++;
 835         gethrestime(&idb->idb_xfer_done);
 836 
 837         /*
 838          * idm_refcnt_rele may cause TASK_SUSPENDING --> TASK_SUSPENDED or
 839          * TASK_ABORTING --> TASK_ABORTED transistion if the refcount goes
 840          * to 0.
 841          */
 842         idm_task_rele(idt);
 843         idb->idb_status = status;
 844 
 845         switch (idt->idt_state) {
 846         case TASK_ACTIVE:
 847                 idt->idt_ic->ic_timestamp = ddi_get_lbolt();
 848                 idm_buf_unbind_in_locked(idt, idb);
 849                 mutex_exit(&idt->idt_mutex);
 850                 (*idb->idb_buf_cb)(idb, status);
 851                 return;
 852         case TASK_SUSPENDING:
 853         case TASK_SUSPENDED:
 854         case TASK_ABORTING:
 855         case TASK_ABORTED:
 856                 /*
 857                  * To keep things simple we will ignore the case where the
 858                  * transfer was successful and leave all buffers bound to the
 859                  * task.  This allows us to also ignore the case where we've
 860                  * been asked to abort a task but the last transfer of the
 861                  * task has completed.  IDM has no idea whether this was, in
 862                  * fact, the last transfer of the task so it would be difficult
 863                  * to handle this case.  Everything should get sorted out again
 864                  * after task reassignment is complete.
 865                  *
 866                  * In the case of TASK_ABORTING we could conceivably call the
 867                  * buffer callback here but the timing of when the client's
 868                  * client_task_aborted callback is invoked vs. when the client's
 869                  * buffer callback gets invoked gets sticky.  We don't want
 870                  * the client to here from us again after the call to
 871                  * client_task_aborted() but we don't want to give it a bunch
 872                  * of failed buffer transfers until we've called
 873                  * client_task_aborted().  Instead we'll just leave all the
 874                  * buffers bound and allow the client to cleanup.
 875                  */
 876                 break;
 877         default:
 878                 ASSERT(0);
 879         }
 880         mutex_exit(&idt->idt_mutex);
 881 }
 882 
 883 /*
 884  * idm_buf_rx_from_ini_done
 885  *
 886  * The transport calls this after it has completed a transfer requested by
 887  * a call totransport_buf_tx_to_ini
 888  *
 889  * Caller holds idt->idt_mutex, idt->idt_mutex is released before returning.
 890  * idt may be freed after the call to idb->idb_buf_cb.
 891  */
 892 void
 893 idm_buf_rx_from_ini_done(idm_task_t *idt, idm_buf_t *idb, idm_status_t status)
 894 {
 895         ASSERT(mutex_owned(&idt->idt_mutex));
 896         idb->idb_in_transport = B_FALSE;
 897         idt->idt_rx_from_ini_done++;
 898         gethrestime(&idb->idb_xfer_done);
 899 
 900         /*
 901          * idm_refcnt_rele may cause TASK_SUSPENDING --> TASK_SUSPENDED or
 902          * TASK_ABORTING --> TASK_ABORTED transistion if the refcount goes
 903          * to 0.
 904          */
 905         idm_task_rele(idt);
 906         idb->idb_status = status;
 907 
 908         if (status == IDM_STATUS_SUCCESS) {
 909                 /*
 910                  * Buffer should not contain the pattern.  If it does then
 911                  * we did not get the data from the remote host.
 912                  */
 913                 IDM_BUFPAT_CHECK(idb, idb->idb_xfer_len, BP_CHECK_ASSERT);
 914         }
 915 
 916         switch (idt->idt_state) {
 917         case TASK_ACTIVE:
 918                 idt->idt_ic->ic_timestamp = ddi_get_lbolt();
 919                 idm_buf_unbind_out_locked(idt, idb);
 920                 mutex_exit(&idt->idt_mutex);
 921                 (*idb->idb_buf_cb)(idb, status);
 922                 return;
 923         case TASK_SUSPENDING:
 924         case TASK_SUSPENDED:
 925         case TASK_ABORTING:
 926         case TASK_ABORTED:
 927                 /*
 928                  * To keep things simple we will ignore the case where the
 929                  * transfer was successful and leave all buffers bound to the
 930                  * task.  This allows us to also ignore the case where we've
 931                  * been asked to abort a task but the last transfer of the
 932                  * task has completed.  IDM has no idea whether this was, in
 933                  * fact, the last transfer of the task so it would be difficult
 934                  * to handle this case.  Everything should get sorted out again
 935                  * after task reassignment is complete.
 936                  *
 937                  * In the case of TASK_ABORTING we could conceivably call the
 938                  * buffer callback here but the timing of when the client's
 939                  * client_task_aborted callback is invoked vs. when the client's
 940                  * buffer callback gets invoked gets sticky.  We don't want
 941                  * the client to here from us again after the call to
 942                  * client_task_aborted() but we don't want to give it a bunch
 943                  * of failed buffer transfers until we've called
 944                  * client_task_aborted().  Instead we'll just leave all the
 945                  * buffers bound and allow the client to cleanup.
 946                  */
 947                 break;
 948         default:
 949                 ASSERT(0);
 950         }
 951         mutex_exit(&idt->idt_mutex);
 952 }
 953 
 954 /*
 955  * idm_buf_alloc
 956  *
 957  * Allocates a buffer handle and registers it for use with the transport
 958  * layer. If a buffer is not passed on bufptr, the buffer will be allocated
 959  * as well as the handle.
 960  *
 961  * ic           - connection on which the buffer will be transferred
 962  * bufptr       - allocate memory for buffer if NULL, else assign to buffer
 963  * buflen       - length of buffer
 964  *
 965  * Returns idm_buf_t handle if successful, otherwise NULL
 966  */
 967 idm_buf_t *
 968 idm_buf_alloc(idm_conn_t *ic, void *bufptr, uint64_t buflen)
 969 {
 970         idm_buf_t       *buf = NULL;
 971         int             rc;
 972 
 973         ASSERT(ic != NULL);
 974         ASSERT(idm.idm_buf_cache != NULL);
 975         ASSERT(buflen > 0);
 976 
 977         /* Don't allocate new buffers if we are not in FFP */
 978         mutex_enter(&ic->ic_state_mutex);
 979         if (!ic->ic_ffp) {
 980                 mutex_exit(&ic->ic_state_mutex);
 981                 return (NULL);
 982         }
 983 
 984 
 985         idm_conn_hold(ic);
 986         mutex_exit(&ic->ic_state_mutex);
 987 
 988         buf = kmem_cache_alloc(idm.idm_buf_cache, KM_NOSLEEP);
 989         if (buf == NULL) {
 990                 idm_conn_rele(ic);
 991                 return (NULL);
 992         }
 993 
 994         buf->idb_ic          = ic;
 995         buf->idb_buflen              = buflen;
 996         buf->idb_exp_offset  = 0;
 997         buf->idb_bufoffset   = 0;
 998         buf->idb_xfer_len    = 0;
 999         buf->idb_magic               = IDM_BUF_MAGIC;
1000         buf->idb_in_transport        = B_FALSE;
1001         buf->idb_bufbcopy    = B_FALSE;
1002 
1003         /*
1004          * If bufptr is NULL, we have an implicit request to allocate
1005          * memory for this IDM buffer handle and register it for use
1006          * with the transport. To simplify this, and to give more freedom
1007          * to the transport layer for it's own buffer management, both of
1008          * these actions will take place in the transport layer.
1009          * If bufptr is set, then the caller has allocated memory (or more
1010          * likely it's been passed from an upper layer), and we need only
1011          * register the buffer for use with the transport layer.
1012          */
1013         if (bufptr == NULL) {
1014                 /*
1015                  * Allocate a buffer from the transport layer (which
1016                  * will also register the buffer for use).
1017                  */
1018                 rc = ic->ic_transport_ops->it_buf_alloc(buf, buflen);
1019                 if (rc != 0) {
1020                         idm_conn_rele(ic);
1021                         kmem_cache_free(idm.idm_buf_cache, buf);
1022                         return (NULL);
1023                 }
1024                 /* Set the bufalloc'd flag */
1025                 buf->idb_bufalloc = B_TRUE;
1026         } else {
1027                 /*
1028                  * For large transfers, Set the passed bufptr into
1029                  * the buf handle, and register the handle with the
1030                  * transport layer. As memory registration with the
1031                  * transport layer is a time/cpu intensive operation,
1032                  * for small transfers (up to a pre-defined bcopy
1033                  * threshold), use pre-registered memory buffers
1034                  * and bcopy data at the appropriate time.
1035                  */
1036                 buf->idb_buf = bufptr;
1037 
1038                 rc = ic->ic_transport_ops->it_buf_setup(buf);
1039                 if (rc != 0) {
1040                         idm_conn_rele(ic);
1041                         kmem_cache_free(idm.idm_buf_cache, buf);
1042                         return (NULL);
1043                 }
1044                 /*
1045                  * The transport layer is now expected to set the idb_bufalloc
1046                  * correctly to indicate if resources have been allocated.
1047                  */
1048         }
1049 
1050         IDM_BUFPAT_SET(buf);
1051 
1052         return (buf);
1053 }
1054 
1055 /*
1056  * idm_buf_free
1057  *
1058  * Release a buffer handle along with the associated buffer that was allocated
1059  * or assigned with idm_buf_alloc
1060  */
1061 void
1062 idm_buf_free(idm_buf_t *buf)
1063 {
1064         idm_conn_t *ic = buf->idb_ic;
1065 
1066 
1067         buf->idb_task_binding        = NULL;
1068 
1069         if (buf->idb_bufalloc) {
1070                 ic->ic_transport_ops->it_buf_free(buf);
1071         } else {
1072                 ic->ic_transport_ops->it_buf_teardown(buf);
1073         }
1074         kmem_cache_free(idm.idm_buf_cache, buf);
1075         idm_conn_rele(ic);
1076 }
1077 
1078 /*
1079  * idm_buf_bind_in
1080  *
1081  * This function associates a buffer with a task. This is only for use by the
1082  * iSCSI initiator that will have only one buffer per transfer direction
1083  *
1084  */
1085 void
1086 idm_buf_bind_in(idm_task_t *idt, idm_buf_t *buf)
1087 {
1088         mutex_enter(&idt->idt_mutex);
1089         idm_buf_bind_in_locked(idt, buf);
1090         mutex_exit(&idt->idt_mutex);
1091 }
1092 
1093 static void
1094 idm_buf_bind_in_locked(idm_task_t *idt, idm_buf_t *buf)
1095 {
1096         buf->idb_task_binding = idt;
1097         buf->idb_ic = idt->idt_ic;
1098         idm_listbuf_insert(&idt->idt_inbufv, buf);
1099 }
1100 
1101 void
1102 idm_buf_bind_out(idm_task_t *idt, idm_buf_t *buf)
1103 {
1104         /*
1105          * For small transfers, the iSER transport delegates the IDM
1106          * layer to bcopy the SCSI Write data for faster IOPS.
1107          */
1108         if (buf->idb_bufbcopy == B_TRUE) {
1109 
1110                 bcopy(buf->idb_bufptr, buf->idb_buf, buf->idb_buflen);
1111         }
1112         mutex_enter(&idt->idt_mutex);
1113         idm_buf_bind_out_locked(idt, buf);
1114         mutex_exit(&idt->idt_mutex);
1115 }
1116 
1117 static void
1118 idm_buf_bind_out_locked(idm_task_t *idt, idm_buf_t *buf)
1119 {
1120         buf->idb_task_binding = idt;
1121         buf->idb_ic = idt->idt_ic;
1122         idm_listbuf_insert(&idt->idt_outbufv, buf);
1123 }
1124 
1125 void
1126 idm_buf_unbind_in(idm_task_t *idt, idm_buf_t *buf)
1127 {
1128         /*
1129          * For small transfers, the iSER transport delegates the IDM
1130          * layer to bcopy the SCSI Read data into the read buufer
1131          * for faster IOPS.
1132          */
1133         if (buf->idb_bufbcopy == B_TRUE) {
1134                 bcopy(buf->idb_buf, buf->idb_bufptr, buf->idb_buflen);
1135         }
1136         mutex_enter(&idt->idt_mutex);
1137         idm_buf_unbind_in_locked(idt, buf);
1138         mutex_exit(&idt->idt_mutex);
1139 }
1140 
1141 static void
1142 idm_buf_unbind_in_locked(idm_task_t *idt, idm_buf_t *buf)
1143 {
1144         list_remove(&idt->idt_inbufv, buf);
1145 }
1146 
1147 void
1148 idm_buf_unbind_out(idm_task_t *idt, idm_buf_t *buf)
1149 {
1150         mutex_enter(&idt->idt_mutex);
1151         idm_buf_unbind_out_locked(idt, buf);
1152         mutex_exit(&idt->idt_mutex);
1153 }
1154 
1155 static void
1156 idm_buf_unbind_out_locked(idm_task_t *idt, idm_buf_t *buf)
1157 {
1158         list_remove(&idt->idt_outbufv, buf);
1159 }
1160 
1161 /*
1162  * idm_buf_find() will lookup the idm_buf_t based on the relative offset in the
1163  * iSCSI PDU
1164  */
1165 idm_buf_t *
1166 idm_buf_find(void *lbuf, size_t data_offset)
1167 {
1168         idm_buf_t       *idb;
1169         list_t          *lst = (list_t *)lbuf;
1170 
1171         /* iterate through the list to find the buffer */
1172         for (idb = list_head(lst); idb != NULL; idb = list_next(lst, idb)) {
1173 
1174                 ASSERT((idb->idb_ic->ic_conn_type == CONN_TYPE_TGT) ||
1175                     (idb->idb_bufoffset == 0));
1176 
1177                 if ((data_offset >= idb->idb_bufoffset) &&
1178                     (data_offset < (idb->idb_bufoffset + idb->idb_buflen))) {
1179 
1180                         return (idb);
1181                 }
1182         }
1183 
1184         return (NULL);
1185 }
1186 
1187 void
1188 idm_bufpat_set(idm_buf_t *idb)
1189 {
1190         idm_bufpat_t    *bufpat;
1191         int             len, i;
1192 
1193         len = idb->idb_buflen;
1194         len = (len / sizeof (idm_bufpat_t)) * sizeof (idm_bufpat_t);
1195 
1196         bufpat = idb->idb_buf;
1197         for (i = 0; i < len; i += sizeof (idm_bufpat_t)) {
1198                 bufpat->bufpat_idb = idb;
1199                 bufpat->bufpat_bufmagic = IDM_BUF_MAGIC;
1200                 bufpat->bufpat_offset = i;
1201                 bufpat++;
1202         }
1203 }
1204 
1205 boolean_t
1206 idm_bufpat_check(idm_buf_t *idb, int check_len, idm_bufpat_check_type_t type)
1207 {
1208         idm_bufpat_t    *bufpat;
1209         int             len, i;
1210 
1211         len = (type == BP_CHECK_QUICK) ? sizeof (idm_bufpat_t) : check_len;
1212         len = (len / sizeof (idm_bufpat_t)) * sizeof (idm_bufpat_t);
1213         ASSERT(len <= idb->idb_buflen);
1214         bufpat = idb->idb_buf;
1215 
1216         /*
1217          * Don't check the pattern in buffers that came from outside IDM
1218          * (these will be buffers from the initiator that we opted not
1219          * to double-buffer)
1220          */
1221         if (!idb->idb_bufalloc)
1222                 return (B_FALSE);
1223 
1224         /*
1225          * Return true if we find the pattern anywhere in the buffer
1226          */
1227         for (i = 0; i < len; i += sizeof (idm_bufpat_t)) {
1228                 if (BUFPAT_MATCH(bufpat, idb)) {
1229                         IDM_CONN_LOG(CE_WARN, "idm_bufpat_check found: "
1230                             "idb %p bufpat %p "
1231                             "bufpat_idb=%p bufmagic=%08x offset=%08x",
1232                             (void *)idb, (void *)bufpat, bufpat->bufpat_idb,
1233                             bufpat->bufpat_bufmagic, bufpat->bufpat_offset);
1234                         DTRACE_PROBE2(bufpat__pattern__found,
1235                             idm_buf_t *, idb, idm_bufpat_t *, bufpat);
1236                         if (type == BP_CHECK_ASSERT) {
1237                                 ASSERT(0);
1238                         }
1239                         return (B_TRUE);
1240                 }
1241                 bufpat++;
1242         }
1243 
1244         return (B_FALSE);
1245 }
1246 
1247 /*
1248  * idm_task_alloc
1249  *
1250  * This function will allocate a idm_task_t structure. A task tag is also
1251  * generated and saved in idt_tt. The task is not active.
1252  */
1253 idm_task_t *
1254 idm_task_alloc(idm_conn_t *ic)
1255 {
1256         idm_task_t      *idt;
1257 
1258         ASSERT(ic != NULL);
1259 
1260         /* Don't allocate new tasks if we are not in FFP */
1261         if (!ic->ic_ffp) {
1262                 return (NULL);
1263         }
1264         idt = kmem_cache_alloc(idm.idm_task_cache, KM_NOSLEEP);
1265         if (idt == NULL) {
1266                 return (NULL);
1267         }
1268 
1269         ASSERT(list_is_empty(&idt->idt_inbufv));
1270         ASSERT(list_is_empty(&idt->idt_outbufv));
1271 
1272         mutex_enter(&ic->ic_state_mutex);
1273         if (!ic->ic_ffp) {
1274                 mutex_exit(&ic->ic_state_mutex);
1275                 kmem_cache_free(idm.idm_task_cache, idt);
1276                 return (NULL);
1277         }
1278         idm_conn_hold(ic);
1279         mutex_exit(&ic->ic_state_mutex);
1280 
1281         idt->idt_state               = TASK_IDLE;
1282         idt->idt_ic          = ic;
1283         idt->idt_private     = NULL;
1284         idt->idt_exp_datasn  = 0;
1285         idt->idt_exp_rttsn   = 0;
1286         idt->idt_flags               = 0;
1287         return (idt);
1288 }
1289 
1290 /*
1291  * idm_task_start
1292  *
1293  * Mark the task active and initialize some stats. The caller
1294  * sets up the idm_task_t structure with a prior call to idm_task_alloc().
1295  * The task service does not function as a task/work engine, it is the
1296  * responsibility of the initiator to start the data transfer and free the
1297  * resources.
1298  */
1299 void
1300 idm_task_start(idm_task_t *idt, uintptr_t handle)
1301 {
1302         ASSERT(idt != NULL);
1303 
1304         /* mark the task as ACTIVE */
1305         idt->idt_state = TASK_ACTIVE;
1306         idt->idt_client_handle = handle;
1307         idt->idt_tx_to_ini_start = idt->idt_tx_to_ini_done =
1308             idt->idt_rx_from_ini_start = idt->idt_rx_from_ini_done =
1309             idt->idt_tx_bytes = idt->idt_rx_bytes = 0;
1310 }
1311 
1312 /*
1313  * idm_task_done
1314  *
1315  * This function sets the state to indicate that the task is no longer active.
1316  */
1317 void
1318 idm_task_done(idm_task_t *idt)
1319 {
1320         ASSERT(idt != NULL);
1321 
1322         mutex_enter(&idt->idt_mutex);
1323         idt->idt_state = TASK_IDLE;
1324         mutex_exit(&idt->idt_mutex);
1325 
1326         /*
1327          * Although unlikely it is possible for a reference to come in after
1328          * the client has decided the task is over but before we've marked
1329          * the task idle.  One specific unavoidable scenario is the case where
1330          * received PDU with the matching ITT/TTT results in a successful
1331          * lookup of this task.  We are at the mercy of the remote node in
1332          * that case so we need to handle it.  Now that the task state
1333          * has changed no more references will occur so a simple call to
1334          * idm_refcnt_wait_ref should deal with the situation.
1335          */
1336         idm_refcnt_wait_ref(&idt->idt_refcnt);
1337         idm_refcnt_reset(&idt->idt_refcnt);
1338 }
1339 
1340 /*
1341  * idm_task_free
1342  *
1343  * This function will free the Task Tag and the memory allocated for the task
1344  * idm_task_done should be called prior to this call
1345  */
1346 void
1347 idm_task_free(idm_task_t *idt)
1348 {
1349         idm_conn_t *ic;
1350 
1351         ASSERT(idt != NULL);
1352         ASSERT(idt->idt_refcnt.ir_refcnt == 0);
1353         ASSERT(idt->idt_state == TASK_IDLE);
1354 
1355         ic = idt->idt_ic;
1356 
1357         /*
1358          * It's possible for items to still be in the idt_inbufv list if
1359          * they were added after idm_free_task_rsrc was called.  We rely on
1360          * STMF to free all buffers associated with the task however STMF
1361          * doesn't know that we have this reference to the buffers.
1362          * Use list_create so that we don't end up with stale references
1363          * to these buffers.
1364          */
1365         list_create(&idt->idt_inbufv, sizeof (idm_buf_t),
1366             offsetof(idm_buf_t, idb_buflink));
1367         list_create(&idt->idt_outbufv, sizeof (idm_buf_t),
1368             offsetof(idm_buf_t, idb_buflink));
1369 
1370         kmem_cache_free(idm.idm_task_cache, idt);
1371 
1372         idm_conn_rele(ic);
1373 }
1374 
1375 /*
1376  * idm_task_find_common
1377  *      common code for idm_task_find() and idm_task_find_and_complete()
1378  */
1379 /*ARGSUSED*/
1380 static idm_task_t *
1381 idm_task_find_common(idm_conn_t *ic, uint32_t itt, uint32_t ttt,
1382     boolean_t complete)
1383 {
1384         uint32_t        tt, client_handle;
1385         idm_task_t      *idt;
1386 
1387         /*
1388          * Must match both itt and ttt.  The table is indexed by itt
1389          * for initiator connections and ttt for target connections.
1390          */
1391         if (IDM_CONN_ISTGT(ic)) {
1392                 tt = ttt;
1393                 client_handle = itt;
1394         } else {
1395                 tt = itt;
1396                 client_handle = ttt;
1397         }
1398 
1399         rw_enter(&idm.idm_taskid_table_lock, RW_READER);
1400         if (tt >= idm.idm_taskid_max) {
1401                 rw_exit(&idm.idm_taskid_table_lock);
1402                 return (NULL);
1403         }
1404 
1405         idt = idm.idm_taskid_table[tt];
1406 
1407         if (idt != NULL) {
1408                 mutex_enter(&idt->idt_mutex);
1409                 if ((idt->idt_state != TASK_ACTIVE) ||
1410                     (idt->idt_ic != ic) ||
1411                     (IDM_CONN_ISTGT(ic) &&
1412                     (idt->idt_client_handle != client_handle))) {
1413                         /*
1414                          * Task doesn't match or task is aborting and
1415                          * we don't want any more references.
1416                          */
1417                         if ((idt->idt_ic != ic) &&
1418                             (idt->idt_state == TASK_ACTIVE) &&
1419                             (IDM_CONN_ISINI(ic) || idt->idt_client_handle ==
1420                             client_handle)) {
1421                                 IDM_CONN_LOG(CE_WARN,
1422                                 "idm_task_find: wrong connection %p != %p",
1423                                     (void *)ic, (void *)idt->idt_ic);
1424                         }
1425                         mutex_exit(&idt->idt_mutex);
1426                         rw_exit(&idm.idm_taskid_table_lock);
1427                         return (NULL);
1428                 }
1429                 idm_task_hold(idt);
1430                 /*
1431                  * Set the task state to TASK_COMPLETE so it can no longer
1432                  * be found or aborted.
1433                  */
1434                 if (B_TRUE == complete)
1435                         idt->idt_state = TASK_COMPLETE;
1436                 mutex_exit(&idt->idt_mutex);
1437         }
1438         rw_exit(&idm.idm_taskid_table_lock);
1439 
1440         return (idt);
1441 }
1442 
1443 /*
1444  * This function looks up a task by task tag.
1445  */
1446 idm_task_t *
1447 idm_task_find(idm_conn_t *ic, uint32_t itt, uint32_t ttt)
1448 {
1449         return (idm_task_find_common(ic, itt, ttt, B_FALSE));
1450 }
1451 
1452 /*
1453  * This function looks up a task by task tag. If found, the task state
1454  * is atomically set to TASK_COMPLETE so it can longer be found or aborted.
1455  */
1456 idm_task_t *
1457 idm_task_find_and_complete(idm_conn_t *ic, uint32_t itt, uint32_t ttt)
1458 {
1459         return (idm_task_find_common(ic, itt, ttt, B_TRUE));
1460 }
1461 
1462 /*
1463  * idm_task_find_by_handle
1464  *
1465  * This function looks up a task by the client-private idt_client_handle.
1466  *
1467  * This function should NEVER be called in the performance path.  It is
1468  * intended strictly for error recovery/task management.
1469  */
1470 /*ARGSUSED*/
1471 void *
1472 idm_task_find_by_handle(idm_conn_t *ic, uintptr_t handle)
1473 {
1474         idm_task_t      *idt = NULL;
1475         int             idx = 0;
1476 
1477         rw_enter(&idm.idm_taskid_table_lock, RW_READER);
1478 
1479         for (idx = 0; idx < idm.idm_taskid_max; idx++) {
1480                 idt = idm.idm_taskid_table[idx];
1481 
1482                 if (idt == NULL)
1483                         continue;
1484 
1485                 mutex_enter(&idt->idt_mutex);
1486 
1487                 if (idt->idt_state != TASK_ACTIVE) {
1488                         /*
1489                          * Task is either in suspend, abort, or already
1490                          * complete.
1491                          */
1492                         mutex_exit(&idt->idt_mutex);
1493                         continue;
1494                 }
1495 
1496                 if (idt->idt_client_handle == handle) {
1497                         idm_task_hold(idt);
1498                         mutex_exit(&idt->idt_mutex);
1499                         break;
1500                 }
1501 
1502                 mutex_exit(&idt->idt_mutex);
1503         }
1504 
1505         rw_exit(&idm.idm_taskid_table_lock);
1506 
1507         if ((idt == NULL) || (idx == idm.idm_taskid_max))
1508                 return (NULL);
1509 
1510         return (idt->idt_private);
1511 }
1512 
1513 void
1514 idm_task_hold(idm_task_t *idt)
1515 {
1516         idm_refcnt_hold(&idt->idt_refcnt);
1517 }
1518 
1519 void
1520 idm_task_rele(idm_task_t *idt)
1521 {
1522         idm_refcnt_rele(&idt->idt_refcnt);
1523 }
1524 
1525 void
1526 idm_task_abort(idm_conn_t *ic, idm_task_t *idt, idm_abort_type_t abort_type)
1527 {
1528         idm_task_t      *task;
1529         int             idx;
1530 
1531         /*
1532          * Passing NULL as the task indicates that all tasks
1533          * for this connection should be aborted.
1534          */
1535         if (idt == NULL) {
1536                 /*
1537                  * Only the connection state machine should ask for
1538                  * all tasks to abort and this should never happen in FFP.
1539                  */
1540                 ASSERT(!ic->ic_ffp);
1541                 rw_enter(&idm.idm_taskid_table_lock, RW_READER);
1542                 for (idx = 0; idx < idm.idm_taskid_max; idx++) {
1543                         task = idm.idm_taskid_table[idx];
1544                         if (task == NULL)
1545                                 continue;
1546                         mutex_enter(&task->idt_mutex);
1547                         if ((task->idt_state != TASK_IDLE) &&
1548                             (task->idt_state != TASK_COMPLETE) &&
1549                             (task->idt_ic == ic)) {
1550                                 rw_exit(&idm.idm_taskid_table_lock);
1551                                 idm_task_abort_one(ic, task, abort_type);
1552                                 rw_enter(&idm.idm_taskid_table_lock, RW_READER);
1553                         } else
1554                                 mutex_exit(&task->idt_mutex);
1555                 }
1556                 rw_exit(&idm.idm_taskid_table_lock);
1557         } else {
1558                 mutex_enter(&idt->idt_mutex);
1559                 idm_task_abort_one(ic, idt, abort_type);
1560         }
1561 }
1562 
1563 static void
1564 idm_task_abort_unref_cb(void *ref)
1565 {
1566         idm_task_t *idt = ref;
1567 
1568         mutex_enter(&idt->idt_mutex);
1569         switch (idt->idt_state) {
1570         case TASK_SUSPENDING:
1571                 idt->idt_state = TASK_SUSPENDED;
1572                 mutex_exit(&idt->idt_mutex);
1573                 idm_task_aborted(idt, IDM_STATUS_SUSPENDED);
1574                 return;
1575         case TASK_ABORTING:
1576                 idt->idt_state = TASK_ABORTED;
1577                 mutex_exit(&idt->idt_mutex);
1578                 idm_task_aborted(idt, IDM_STATUS_ABORTED);
1579                 return;
1580         default:
1581                 mutex_exit(&idt->idt_mutex);
1582                 ASSERT(0);
1583                 break;
1584         }
1585 }
1586 
1587 /*
1588  * Abort the idm task.
1589  *    Caller must hold the task mutex, which will be released before return
1590  */
1591 static void
1592 idm_task_abort_one(idm_conn_t *ic, idm_task_t *idt, idm_abort_type_t abort_type)
1593 {
1594         /* Caller must hold connection mutex */
1595         ASSERT(mutex_owned(&idt->idt_mutex));
1596         switch (idt->idt_state) {
1597         case TASK_ACTIVE:
1598                 switch (abort_type) {
1599                 case AT_INTERNAL_SUSPEND:
1600                         /* Call transport to release any resources */
1601                         idt->idt_state = TASK_SUSPENDING;
1602                         mutex_exit(&idt->idt_mutex);
1603                         ic->ic_transport_ops->it_free_task_rsrc(idt);
1604 
1605                         /*
1606                          * Wait for outstanding references.  When all
1607                          * references are released the callback will call
1608                          * idm_task_aborted().
1609                          */
1610                         idm_refcnt_async_wait_ref(&idt->idt_refcnt,
1611                             &idm_task_abort_unref_cb);
1612                         return;
1613                 case AT_INTERNAL_ABORT:
1614                 case AT_TASK_MGMT_ABORT:
1615                         idt->idt_state = TASK_ABORTING;
1616                         mutex_exit(&idt->idt_mutex);
1617                         ic->ic_transport_ops->it_free_task_rsrc(idt);
1618 
1619                         /*
1620                          * Wait for outstanding references.  When all
1621                          * references are released the callback will call
1622                          * idm_task_aborted().
1623                          */
1624                         idm_refcnt_async_wait_ref(&idt->idt_refcnt,
1625                             &idm_task_abort_unref_cb);
1626                         return;
1627                 default:
1628                         ASSERT(0);
1629                 }
1630                 break;
1631         case TASK_SUSPENDING:
1632                 /* Already called transport_free_task_rsrc(); */
1633                 switch (abort_type) {
1634                 case AT_INTERNAL_SUSPEND:
1635                         /* Already doing it */
1636                         break;
1637                 case AT_INTERNAL_ABORT:
1638                 case AT_TASK_MGMT_ABORT:
1639                         idt->idt_state = TASK_ABORTING;
1640                         break;
1641                 default:
1642                         ASSERT(0);
1643                 }
1644                 break;
1645         case TASK_SUSPENDED:
1646                 /* Already called transport_free_task_rsrc(); */
1647                 switch (abort_type) {
1648                 case AT_INTERNAL_SUSPEND:
1649                         /* Already doing it */
1650                         break;
1651                 case AT_INTERNAL_ABORT:
1652                 case AT_TASK_MGMT_ABORT:
1653                         idt->idt_state = TASK_ABORTING;
1654                         mutex_exit(&idt->idt_mutex);
1655 
1656                         /*
1657                          * We could probably call idm_task_aborted directly
1658                          * here but we may be holding the conn lock. It's
1659                          * easier to just switch contexts.  Even though
1660                          * we shouldn't really have any references we'll
1661                          * set the state to TASK_ABORTING instead of
1662                          * TASK_ABORTED so we can use the same code path.
1663                          */
1664                         idm_refcnt_async_wait_ref(&idt->idt_refcnt,
1665                             &idm_task_abort_unref_cb);
1666                         return;
1667                 default:
1668                         ASSERT(0);
1669                 }
1670                 break;
1671         case TASK_ABORTING:
1672         case TASK_ABORTED:
1673                 switch (abort_type) {
1674                 case AT_INTERNAL_SUSPEND:
1675                         /* We're already past this point... */
1676                 case AT_INTERNAL_ABORT:
1677                 case AT_TASK_MGMT_ABORT:
1678                         /* Already doing it */
1679                         break;
1680                 default:
1681                         ASSERT(0);
1682                 }
1683                 break;
1684         case TASK_COMPLETE:
1685                 /*
1686                  * In this case, let it go.  The status has already been
1687                  * sent (which may or may not get successfully transmitted)
1688                  * and we don't want to end up in a race between completing
1689                  * the status PDU and marking the task suspended.
1690                  */
1691                 break;
1692         default:
1693                 ASSERT(0);
1694         }
1695         mutex_exit(&idt->idt_mutex);
1696 }
1697 
1698 static void
1699 idm_task_aborted(idm_task_t *idt, idm_status_t status)
1700 {
1701         (*idt->idt_ic->ic_conn_ops.icb_task_aborted)(idt, status);
1702 }
1703 
1704 /*
1705  * idm_pdu_tx
1706  *
1707  * This is IDM's implementation of the 'Send_Control' operational primitive.
1708  * This function is invoked by an initiator iSCSI layer requesting the transfer
1709  * of a iSCSI command PDU or a target iSCSI layer requesting the transfer of a
1710  * iSCSI response PDU. The PDU will be transmitted as-is by the local Datamover
1711  * layer to the peer iSCSI layer in the remote iSCSI node. The connection info
1712  * and iSCSI PDU-specific qualifiers namely BHS, AHS, DataDescriptor and Size
1713  * are provided as input.
1714  *
1715  */
1716 void
1717 idm_pdu_tx(idm_pdu_t *pdu)
1718 {
1719         idm_conn_t              *ic = pdu->isp_ic;
1720         iscsi_async_evt_hdr_t   *async_evt;
1721 
1722         /*
1723          * If we are in full-featured mode then route SCSI-related
1724          * commands to the appropriate function vector without checking
1725          * the connection state.  We will only be in full-feature mode
1726          * when we are in an acceptable state for SCSI PDU's.
1727          *
1728          * We also need to ensure that there are no PDU events outstanding
1729          * on the state machine.  Any non-SCSI PDU's received in full-feature
1730          * mode will result in PDU events and until these have been handled
1731          * we need to route all PDU's through the state machine as PDU
1732          * events to maintain ordering.
1733          *
1734          * Note that IDM cannot enter FFP mode until it processes in
1735          * its state machine the last xmit of the login process.
1736          * Hence, checking the IDM_PDU_LOGIN_TX flag here would be
1737          * superfluous.
1738          */
1739         mutex_enter(&ic->ic_state_mutex);
1740         if (ic->ic_ffp && (ic->ic_pdu_events == 0)) {
1741                 mutex_exit(&ic->ic_state_mutex);
1742                 switch (IDM_PDU_OPCODE(pdu)) {
1743                 case ISCSI_OP_SCSI_RSP:
1744                         /* Target only */
1745                         DTRACE_ISCSI_2(scsi__response, idm_conn_t *, ic,
1746                             iscsi_scsi_rsp_hdr_t *,
1747                             (iscsi_scsi_rsp_hdr_t *)pdu->isp_hdr);
1748                         idm_pdu_tx_forward(ic, pdu);
1749                         return;
1750                 case ISCSI_OP_SCSI_TASK_MGT_RSP:
1751                         /* Target only */
1752                         DTRACE_ISCSI_2(task__response, idm_conn_t *, ic,
1753                             iscsi_text_rsp_hdr_t *,
1754                             (iscsi_text_rsp_hdr_t *)pdu->isp_hdr);
1755                         idm_pdu_tx_forward(ic, pdu);
1756                         return;
1757                 case ISCSI_OP_SCSI_DATA_RSP:
1758                         /* Target only */
1759                         DTRACE_ISCSI_2(data__send, idm_conn_t *, ic,
1760                             iscsi_data_rsp_hdr_t *,
1761                             (iscsi_data_rsp_hdr_t *)pdu->isp_hdr);
1762                         idm_pdu_tx_forward(ic, pdu);
1763                         return;
1764                 case ISCSI_OP_RTT_RSP:
1765                         /* Target only */
1766                         DTRACE_ISCSI_2(data__request, idm_conn_t *, ic,
1767                             iscsi_rtt_hdr_t *,
1768                             (iscsi_rtt_hdr_t *)pdu->isp_hdr);
1769                         idm_pdu_tx_forward(ic, pdu);
1770                         return;
1771                 case ISCSI_OP_NOOP_IN:
1772                         /* Target only */
1773                         DTRACE_ISCSI_2(nop__send, idm_conn_t *, ic,
1774                             iscsi_nop_in_hdr_t *,
1775                             (iscsi_nop_in_hdr_t *)pdu->isp_hdr);
1776                         idm_pdu_tx_forward(ic, pdu);
1777                         return;
1778                 case ISCSI_OP_TEXT_RSP:
1779                         /* Target only */
1780                         DTRACE_ISCSI_2(text__response, idm_conn_t *, ic,
1781                             iscsi_text_rsp_hdr_t *,
1782                             (iscsi_text_rsp_hdr_t *)pdu->isp_hdr);
1783                         idm_pdu_tx_forward(ic, pdu);
1784                         return;
1785                 case ISCSI_OP_TEXT_CMD:
1786                 case ISCSI_OP_NOOP_OUT:
1787                 case ISCSI_OP_SCSI_CMD:
1788                 case ISCSI_OP_SCSI_DATA:
1789                 case ISCSI_OP_SCSI_TASK_MGT_MSG:
1790                         /* Initiator only */
1791                         idm_pdu_tx_forward(ic, pdu);
1792                         return;
1793                 default:
1794                         break;
1795                 }
1796 
1797                 mutex_enter(&ic->ic_state_mutex);
1798         }
1799 
1800         /*
1801          * Any PDU's processed outside of full-feature mode and non-SCSI
1802          * PDU's in full-feature mode are handled by generating an
1803          * event to the connection state machine.  The state machine
1804          * will validate the PDU against the current state and either
1805          * transmit the PDU if the opcode is allowed or handle an
1806          * error if the PDU is not allowed.
1807          *
1808          * This code-path will also generate any events that are implied
1809          * by the PDU opcode.  For example a "login response" with success
1810          * status generates a CE_LOGOUT_SUCCESS_SND event.
1811          */
1812         switch (IDM_PDU_OPCODE(pdu)) {
1813         case ISCSI_OP_LOGIN_CMD:
1814                 idm_conn_tx_pdu_event(ic, CE_LOGIN_SND, (uintptr_t)pdu);
1815                 break;
1816         case ISCSI_OP_LOGIN_RSP:
1817                 DTRACE_ISCSI_2(login__response, idm_conn_t *, ic,
1818                     iscsi_login_rsp_hdr_t *,
1819                     (iscsi_login_rsp_hdr_t *)pdu->isp_hdr);
1820                 idm_parse_login_rsp(ic, pdu, /* Is RX */ B_FALSE);
1821                 break;
1822         case ISCSI_OP_LOGOUT_CMD:
1823                 idm_parse_logout_req(ic, pdu, /* Is RX */ B_FALSE);
1824                 break;
1825         case ISCSI_OP_LOGOUT_RSP:
1826                 DTRACE_ISCSI_2(logout__response, idm_conn_t *, ic,
1827                     iscsi_logout_rsp_hdr_t *,
1828                     (iscsi_logout_rsp_hdr_t *)pdu->isp_hdr);
1829                 idm_parse_logout_rsp(ic, pdu, /* Is RX */ B_FALSE);
1830                 break;
1831         case ISCSI_OP_ASYNC_EVENT:
1832                 DTRACE_ISCSI_2(async__send, idm_conn_t *, ic,
1833                     iscsi_async_evt_hdr_t *,
1834                     (iscsi_async_evt_hdr_t *)pdu->isp_hdr);
1835                 async_evt = (iscsi_async_evt_hdr_t *)pdu->isp_hdr;
1836                 switch (async_evt->async_event) {
1837                 case ISCSI_ASYNC_EVENT_REQUEST_LOGOUT:
1838                         idm_conn_tx_pdu_event(ic, CE_ASYNC_LOGOUT_SND,
1839                             (uintptr_t)pdu);
1840                         break;
1841                 case ISCSI_ASYNC_EVENT_DROPPING_CONNECTION:
1842                         idm_conn_tx_pdu_event(ic, CE_ASYNC_DROP_CONN_SND,
1843                             (uintptr_t)pdu);
1844                         break;
1845                 case ISCSI_ASYNC_EVENT_DROPPING_ALL_CONNECTIONS:
1846                         idm_conn_tx_pdu_event(ic, CE_ASYNC_DROP_ALL_CONN_SND,
1847                             (uintptr_t)pdu);
1848                         break;
1849                 case ISCSI_ASYNC_EVENT_SCSI_EVENT:
1850                 case ISCSI_ASYNC_EVENT_PARAM_NEGOTIATION:
1851                 default:
1852                         idm_conn_tx_pdu_event(ic, CE_MISC_TX,
1853                             (uintptr_t)pdu);
1854                         break;
1855                 }
1856                 break;
1857         case ISCSI_OP_SCSI_RSP:
1858                 /* Target only */
1859                 DTRACE_ISCSI_2(scsi__response, idm_conn_t *, ic,
1860                     iscsi_scsi_rsp_hdr_t *,
1861                     (iscsi_scsi_rsp_hdr_t *)pdu->isp_hdr);
1862                 idm_conn_tx_pdu_event(ic, CE_MISC_TX, (uintptr_t)pdu);
1863                 break;
1864         case ISCSI_OP_SCSI_TASK_MGT_RSP:
1865                 /* Target only */
1866                 DTRACE_ISCSI_2(task__response, idm_conn_t *, ic,
1867                     iscsi_scsi_task_mgt_rsp_hdr_t *,
1868                     (iscsi_scsi_task_mgt_rsp_hdr_t *)pdu->isp_hdr);
1869                 idm_conn_tx_pdu_event(ic, CE_MISC_TX, (uintptr_t)pdu);
1870                 break;
1871         case ISCSI_OP_SCSI_DATA_RSP:
1872                 /* Target only */
1873                 DTRACE_ISCSI_2(data__send, idm_conn_t *, ic,
1874                     iscsi_data_rsp_hdr_t *,
1875                     (iscsi_data_rsp_hdr_t *)pdu->isp_hdr);
1876                 idm_conn_tx_pdu_event(ic, CE_MISC_TX, (uintptr_t)pdu);
1877                 break;
1878         case ISCSI_OP_RTT_RSP:
1879                 /* Target only */
1880                 DTRACE_ISCSI_2(data__request, idm_conn_t *, ic,
1881                     iscsi_rtt_hdr_t *,
1882                     (iscsi_rtt_hdr_t *)pdu->isp_hdr);
1883                 idm_conn_tx_pdu_event(ic, CE_MISC_TX, (uintptr_t)pdu);
1884                 break;
1885         case ISCSI_OP_NOOP_IN:
1886                 /* Target only */
1887                 DTRACE_ISCSI_2(nop__send, idm_conn_t *, ic,
1888                     iscsi_nop_in_hdr_t *,
1889                     (iscsi_nop_in_hdr_t *)pdu->isp_hdr);
1890                 idm_conn_tx_pdu_event(ic, CE_MISC_TX, (uintptr_t)pdu);
1891                 break;
1892         case ISCSI_OP_TEXT_RSP:
1893                 /* Target only */
1894                 DTRACE_ISCSI_2(text__response, idm_conn_t *, ic,
1895                     iscsi_text_rsp_hdr_t *,
1896                     (iscsi_text_rsp_hdr_t *)pdu->isp_hdr);
1897                 idm_conn_tx_pdu_event(ic, CE_MISC_TX, (uintptr_t)pdu);
1898                 break;
1899                 /* Initiator only */
1900         case ISCSI_OP_SCSI_CMD:
1901         case ISCSI_OP_SCSI_TASK_MGT_MSG:
1902         case ISCSI_OP_SCSI_DATA:
1903         case ISCSI_OP_NOOP_OUT:
1904         case ISCSI_OP_TEXT_CMD:
1905         case ISCSI_OP_SNACK_CMD:
1906         case ISCSI_OP_REJECT_MSG:
1907         default:
1908                 /*
1909                  * Connection state machine will validate these PDU's against
1910                  * the current state.  A PDU not allowed in the current
1911                  * state will cause a protocol error.
1912                  */
1913                 idm_conn_tx_pdu_event(ic, CE_MISC_TX, (uintptr_t)pdu);
1914                 break;
1915         }
1916         mutex_exit(&ic->ic_state_mutex);
1917 }
1918 
1919 /*
1920  * Common allocation of a PDU along with memory for header and data.
1921  */
1922 static idm_pdu_t *
1923 idm_pdu_alloc_common(uint_t hdrlen, uint_t datalen, int sleepflag)
1924 {
1925         idm_pdu_t *result;
1926 
1927         /*
1928          * IDM clients should cache these structures for performance
1929          * critical paths.  We can't cache effectively in IDM because we
1930          * don't know the correct header and data size.
1931          *
1932          * Valid header length is assumed to be hdrlen and valid data
1933          * length is assumed to be datalen.  isp_hdrlen and isp_datalen
1934          * can be adjusted after the PDU is returned if necessary.
1935          */
1936         result = kmem_zalloc(sizeof (idm_pdu_t) + hdrlen + datalen, sleepflag);
1937         if (result != NULL) {
1938                 /* For idm_pdu_free sanity check */
1939                 result->isp_flags |= IDM_PDU_ALLOC;
1940                 /* pointer arithmetic */
1941                 result->isp_hdr = (iscsi_hdr_t *)(result + 1);
1942                 result->isp_hdrlen = hdrlen;
1943                 result->isp_hdrbuflen = hdrlen;
1944                 result->isp_transport_hdrlen = 0;
1945                 if (datalen != 0)
1946                         result->isp_data = (uint8_t *)result->isp_hdr + hdrlen;
1947                 result->isp_datalen = datalen;
1948                 result->isp_databuflen = datalen;
1949                 result->isp_magic = IDM_PDU_MAGIC;
1950         }
1951 
1952         return (result);
1953 }
1954 
1955 /*
1956  * Typical idm_pdu_alloc invocation, will block for resources.
1957  */
1958 idm_pdu_t *
1959 idm_pdu_alloc(uint_t hdrlen, uint_t datalen)
1960 {
1961         return (idm_pdu_alloc_common(hdrlen, datalen, KM_SLEEP));
1962 }
1963 
1964 /*
1965  * Non-blocking idm_pdu_alloc implementation, returns NULL if resources
1966  * are not available.  Needed for transport-layer allocations which may
1967  * be invoking in interrupt context.
1968  */
1969 idm_pdu_t *
1970 idm_pdu_alloc_nosleep(uint_t hdrlen, uint_t datalen)
1971 {
1972         return (idm_pdu_alloc_common(hdrlen, datalen, KM_NOSLEEP));
1973 }
1974 
1975 /*
1976  * Free a PDU previously allocated with idm_pdu_alloc() including any
1977  * header and data space allocated as part of the original request.
1978  * Additional memory regions referenced by subsequent modification of
1979  * the isp_hdr and/or isp_data fields will not be freed.
1980  */
1981 void
1982 idm_pdu_free(idm_pdu_t *pdu)
1983 {
1984         /* Make sure the structure was allocated using idm_pdu_alloc() */
1985         ASSERT(pdu->isp_flags & IDM_PDU_ALLOC);
1986         kmem_free(pdu,
1987             sizeof (idm_pdu_t) + pdu->isp_hdrbuflen + pdu->isp_databuflen);
1988 }
1989 
1990 /*
1991  * Initialize the connection, private and callback fields in a PDU.
1992  */
1993 void
1994 idm_pdu_init(idm_pdu_t *pdu, idm_conn_t *ic, void *private, idm_pdu_cb_t *cb)
1995 {
1996         /*
1997          * idm_pdu_complete() will call idm_pdu_free if the callback is
1998          * NULL.  This will only work if the PDU was originally allocated
1999          * with idm_pdu_alloc().
2000          */
2001         ASSERT((pdu->isp_flags & IDM_PDU_ALLOC) ||
2002             (cb != NULL));
2003         pdu->isp_magic = IDM_PDU_MAGIC;
2004         pdu->isp_ic = ic;
2005         pdu->isp_private = private;
2006         pdu->isp_callback = cb;
2007 }
2008 
2009 /*
2010  * Initialize the header and header length field.  This function should
2011  * not be used to adjust the header length in a buffer allocated via
2012  * pdu_pdu_alloc since it overwrites the existing header pointer.
2013  */
2014 void
2015 idm_pdu_init_hdr(idm_pdu_t *pdu, uint8_t *hdr, uint_t hdrlen)
2016 {
2017         pdu->isp_hdr = (iscsi_hdr_t *)((void *)hdr);
2018         pdu->isp_hdrlen = hdrlen;
2019 }
2020 
2021 /*
2022  * Initialize the data and data length fields.  This function should
2023  * not be used to adjust the data length of a buffer allocated via
2024  * idm_pdu_alloc since it overwrites the existing data pointer.
2025  */
2026 void
2027 idm_pdu_init_data(idm_pdu_t *pdu, uint8_t *data, uint_t datalen)
2028 {
2029         pdu->isp_data = data;
2030         pdu->isp_datalen = datalen;
2031 }
2032 
2033 void
2034 idm_pdu_complete(idm_pdu_t *pdu, idm_status_t status)
2035 {
2036         if (pdu->isp_callback) {
2037                 pdu->isp_status = status;
2038                 (*pdu->isp_callback)(pdu, status);
2039         } else {
2040                 idm_pdu_free(pdu);
2041         }
2042 }
2043 
2044 /*
2045  * State machine auditing
2046  */
2047 
2048 void
2049 idm_sm_audit_init(sm_audit_buf_t *audit_buf)
2050 {
2051         bzero(audit_buf, sizeof (sm_audit_buf_t));
2052         audit_buf->sab_max_index = SM_AUDIT_BUF_MAX_REC - 1;
2053 }
2054 
2055 static
2056 sm_audit_record_t *
2057 idm_sm_audit_common(sm_audit_buf_t *audit_buf, sm_audit_record_type_t r_type,
2058     sm_audit_sm_type_t sm_type,
2059     int current_state)
2060 {
2061         sm_audit_record_t *sar;
2062 
2063         sar = audit_buf->sab_records;
2064         sar += audit_buf->sab_index;
2065         audit_buf->sab_index++;
2066         audit_buf->sab_index &= audit_buf->sab_max_index;
2067 
2068         sar->sar_type = r_type;
2069         gethrestime(&sar->sar_timestamp);
2070         sar->sar_sm_type = sm_type;
2071         sar->sar_state = current_state;
2072 
2073         return (sar);
2074 }
2075 
2076 void
2077 idm_sm_audit_event(sm_audit_buf_t *audit_buf,
2078     sm_audit_sm_type_t sm_type, int current_state,
2079     int event, uintptr_t event_info)
2080 {
2081         sm_audit_record_t *sar;
2082 
2083         sar = idm_sm_audit_common(audit_buf, SAR_STATE_EVENT,
2084             sm_type, current_state);
2085         sar->sar_event = event;
2086         sar->sar_event_info = event_info;
2087 }
2088 
2089 void
2090 idm_sm_audit_state_change(sm_audit_buf_t *audit_buf,
2091     sm_audit_sm_type_t sm_type, int current_state, int new_state)
2092 {
2093         sm_audit_record_t *sar;
2094 
2095         sar = idm_sm_audit_common(audit_buf, SAR_STATE_CHANGE,
2096             sm_type, current_state);
2097         sar->sar_new_state = new_state;
2098 }
2099 
2100 
2101 /*
2102  * Object reference tracking
2103  */
2104 
2105 void
2106 idm_refcnt_init(idm_refcnt_t *refcnt, void *referenced_obj)
2107 {
2108         bzero(refcnt, sizeof (*refcnt));
2109         idm_refcnt_reset(refcnt);
2110         refcnt->ir_referenced_obj = referenced_obj;
2111         bzero(&refcnt->ir_audit_buf, sizeof (refcnt_audit_buf_t));
2112         refcnt->ir_audit_buf.anb_max_index = REFCNT_AUDIT_BUF_MAX_REC - 1;
2113         mutex_init(&refcnt->ir_mutex, NULL, MUTEX_DEFAULT, NULL);
2114         cv_init(&refcnt->ir_cv, NULL, CV_DEFAULT, NULL);
2115 }
2116 
2117 void
2118 idm_refcnt_destroy(idm_refcnt_t *refcnt)
2119 {
2120         /*
2121          * Grab the mutex to there are no other lingering threads holding
2122          * the mutex before we destroy it (e.g. idm_refcnt_rele just after
2123          * the refcnt goes to zero if ir_waiting == REF_WAIT_ASYNC)
2124          */
2125         mutex_enter(&refcnt->ir_mutex);
2126         ASSERT(refcnt->ir_refcnt == 0);
2127         cv_destroy(&refcnt->ir_cv);
2128         mutex_destroy(&refcnt->ir_mutex);
2129 }
2130 
2131 void
2132 idm_refcnt_reset(idm_refcnt_t *refcnt)
2133 {
2134         refcnt->ir_waiting = REF_NOWAIT;
2135         refcnt->ir_refcnt = 0;
2136 }
2137 
2138 void
2139 idm_refcnt_hold(idm_refcnt_t *refcnt)
2140 {
2141         /*
2142          * Nothing should take a hold on an object after a call to
2143          * idm_refcnt_wait_ref or idm_refcnd_async_wait_ref
2144          */
2145         ASSERT(refcnt->ir_waiting == REF_NOWAIT);
2146 
2147         mutex_enter(&refcnt->ir_mutex);
2148         refcnt->ir_refcnt++;
2149         REFCNT_AUDIT(refcnt);
2150         mutex_exit(&refcnt->ir_mutex);
2151 }
2152 
2153 static void
2154 idm_refcnt_unref_task(void *refcnt_void)
2155 {
2156         idm_refcnt_t *refcnt = refcnt_void;
2157 
2158         REFCNT_AUDIT(refcnt);
2159         (*refcnt->ir_cb)(refcnt->ir_referenced_obj);
2160 }
2161 
2162 void
2163 idm_refcnt_rele(idm_refcnt_t *refcnt)
2164 {
2165         mutex_enter(&refcnt->ir_mutex);
2166         ASSERT(refcnt->ir_refcnt > 0);
2167         refcnt->ir_refcnt--;
2168         REFCNT_AUDIT(refcnt);
2169         if (refcnt->ir_waiting == REF_NOWAIT) {
2170                 /* No one is waiting on this object */
2171                 mutex_exit(&refcnt->ir_mutex);
2172                 return;
2173         }
2174 
2175         /*
2176          * Someone is waiting for this object to go idle so check if
2177          * refcnt is 0.  Waiting on an object then later grabbing another
2178          * reference is not allowed so we don't need to handle that case.
2179          */
2180         if (refcnt->ir_refcnt == 0) {
2181                 if (refcnt->ir_waiting == REF_WAIT_ASYNC) {
2182                         if (taskq_dispatch(idm.idm_global_taskq,
2183                             &idm_refcnt_unref_task, refcnt, TQ_SLEEP) == NULL) {
2184                                 cmn_err(CE_WARN,
2185                                     "idm_refcnt_rele: Couldn't dispatch task");
2186                         }
2187                 } else if (refcnt->ir_waiting == REF_WAIT_SYNC) {
2188                         cv_signal(&refcnt->ir_cv);
2189                 }
2190         }
2191         mutex_exit(&refcnt->ir_mutex);
2192 }
2193 
2194 void
2195 idm_refcnt_rele_and_destroy(idm_refcnt_t *refcnt, idm_refcnt_cb_t *cb_func)
2196 {
2197         mutex_enter(&refcnt->ir_mutex);
2198         ASSERT(refcnt->ir_refcnt > 0);
2199         refcnt->ir_refcnt--;
2200         REFCNT_AUDIT(refcnt);
2201 
2202         /*
2203          * Someone is waiting for this object to go idle so check if
2204          * refcnt is 0.  Waiting on an object then later grabbing another
2205          * reference is not allowed so we don't need to handle that case.
2206          */
2207         if (refcnt->ir_refcnt == 0) {
2208                 refcnt->ir_cb = cb_func;
2209                 refcnt->ir_waiting = REF_WAIT_ASYNC;
2210                 if (taskq_dispatch(idm.idm_global_taskq,
2211                     &idm_refcnt_unref_task, refcnt, TQ_SLEEP) == NULL) {
2212                         cmn_err(CE_WARN,
2213                             "idm_refcnt_rele: Couldn't dispatch task");
2214                 }
2215         }
2216         mutex_exit(&refcnt->ir_mutex);
2217 }
2218 
2219 void
2220 idm_refcnt_wait_ref(idm_refcnt_t *refcnt)
2221 {
2222         mutex_enter(&refcnt->ir_mutex);
2223         refcnt->ir_waiting = REF_WAIT_SYNC;
2224         REFCNT_AUDIT(refcnt);
2225         while (refcnt->ir_refcnt != 0)
2226                 cv_wait(&refcnt->ir_cv, &refcnt->ir_mutex);
2227         mutex_exit(&refcnt->ir_mutex);
2228 }
2229 
2230 void
2231 idm_refcnt_async_wait_ref(idm_refcnt_t *refcnt, idm_refcnt_cb_t *cb_func)
2232 {
2233         mutex_enter(&refcnt->ir_mutex);
2234         refcnt->ir_waiting = REF_WAIT_ASYNC;
2235         refcnt->ir_cb = cb_func;
2236         REFCNT_AUDIT(refcnt);
2237         /*
2238          * It's possible we don't have any references.  To make things easier
2239          * on the caller use a taskq to call the callback instead of
2240          * calling it synchronously
2241          */
2242         if (refcnt->ir_refcnt == 0) {
2243                 if (taskq_dispatch(idm.idm_global_taskq,
2244                     &idm_refcnt_unref_task, refcnt, TQ_SLEEP) == NULL) {
2245                         cmn_err(CE_WARN,
2246                             "idm_refcnt_async_wait_ref: "
2247                             "Couldn't dispatch task");
2248                 }
2249         }
2250         mutex_exit(&refcnt->ir_mutex);
2251 }
2252 
2253 void
2254 idm_refcnt_destroy_unref_obj(idm_refcnt_t *refcnt,
2255     idm_refcnt_cb_t *cb_func)
2256 {
2257         mutex_enter(&refcnt->ir_mutex);
2258         if (refcnt->ir_refcnt == 0) {
2259                 mutex_exit(&refcnt->ir_mutex);
2260                 (*cb_func)(refcnt->ir_referenced_obj);
2261                 return;
2262         }
2263         mutex_exit(&refcnt->ir_mutex);
2264 }
2265 
2266 void
2267 idm_conn_hold(idm_conn_t *ic)
2268 {
2269         idm_refcnt_hold(&ic->ic_refcnt);
2270 }
2271 
2272 void
2273 idm_conn_rele(idm_conn_t *ic)
2274 {
2275         idm_refcnt_rele(&ic->ic_refcnt);
2276 }
2277 
2278 void
2279 idm_conn_set_target_name(idm_conn_t *ic, char *target_name)
2280 {
2281         (void) strlcpy(ic->ic_target_name, target_name, ISCSI_MAX_NAME_LEN + 1);
2282 }
2283 
2284 void
2285 idm_conn_set_initiator_name(idm_conn_t *ic, char *initiator_name)
2286 {
2287         (void) strlcpy(ic->ic_initiator_name, initiator_name,
2288             ISCSI_MAX_NAME_LEN + 1);
2289 }
2290 
2291 void
2292 idm_conn_set_isid(idm_conn_t *ic, uint8_t isid[ISCSI_ISID_LEN])
2293 {
2294         (void) snprintf(ic->ic_isid, ISCSI_MAX_ISID_LEN + 1,
2295             "%02x%02x%02x%02x%02x%02x",
2296             isid[0], isid[1], isid[2], isid[3], isid[4], isid[5]);
2297 }
2298 
2299 static int
2300 _idm_init(void)
2301 {
2302         /* Initialize the rwlock for the taskid table */
2303         rw_init(&idm.idm_taskid_table_lock, NULL, RW_DRIVER, NULL);
2304 
2305         /* Initialize the global mutex and taskq */
2306         mutex_init(&idm.idm_global_mutex, NULL, MUTEX_DEFAULT, NULL);
2307 
2308         cv_init(&idm.idm_tgt_svc_cv, NULL, CV_DEFAULT, NULL);
2309         cv_init(&idm.idm_wd_cv, NULL, CV_DEFAULT, NULL);
2310 
2311         /*
2312          * The maximum allocation needs to be high here since there can be
2313          * many concurrent tasks using the global taskq.
2314          */
2315         idm.idm_global_taskq = taskq_create("idm_global_taskq", 1, minclsyspri,
2316             128, 16384, TASKQ_PREPOPULATE);
2317         if (idm.idm_global_taskq == NULL) {
2318                 cv_destroy(&idm.idm_wd_cv);
2319                 cv_destroy(&idm.idm_tgt_svc_cv);
2320                 mutex_destroy(&idm.idm_global_mutex);
2321                 rw_destroy(&idm.idm_taskid_table_lock);
2322                 return (ENOMEM);
2323         }
2324 
2325         /* Start watchdog thread */
2326         idm.idm_wd_thread = thread_create(NULL, 0,
2327             idm_wd_thread, NULL, 0, &p0, TS_RUN, minclsyspri);
2328         if (idm.idm_wd_thread == NULL) {
2329                 /* Couldn't create the watchdog thread */
2330                 taskq_destroy(idm.idm_global_taskq);
2331                 cv_destroy(&idm.idm_wd_cv);
2332                 cv_destroy(&idm.idm_tgt_svc_cv);
2333                 mutex_destroy(&idm.idm_global_mutex);
2334                 rw_destroy(&idm.idm_taskid_table_lock);
2335                 return (ENOMEM);
2336         }
2337 
2338         /* Pause until the watchdog thread is running */
2339         mutex_enter(&idm.idm_global_mutex);
2340         while (!idm.idm_wd_thread_running)
2341                 cv_wait(&idm.idm_wd_cv, &idm.idm_global_mutex);
2342         mutex_exit(&idm.idm_global_mutex);
2343 
2344         /*
2345          * Allocate the task ID table and set "next" to 0.
2346          */
2347 
2348         idm.idm_taskid_max = idm_max_taskids;
2349         idm.idm_taskid_table = (idm_task_t **)
2350             kmem_zalloc(idm.idm_taskid_max * sizeof (idm_task_t *), KM_SLEEP);
2351         idm.idm_taskid_next = 0;
2352 
2353         /* Create the global buffer and task kmem caches */
2354         idm.idm_buf_cache = kmem_cache_create("idm_buf_cache",
2355             sizeof (idm_buf_t), 8, NULL, NULL, NULL, NULL, NULL, KM_SLEEP);
2356 
2357         /*
2358          * Note, we're explicitly allocating an additional iSER header-
2359          * sized chunk for each of these elements. See idm_task_constructor().
2360          */
2361         idm.idm_task_cache = kmem_cache_create("idm_task_cache",
2362             sizeof (idm_task_t) + IDM_TRANSPORT_HEADER_LENGTH, 8,
2363             &idm_task_constructor, &idm_task_destructor,
2364             NULL, NULL, NULL, KM_SLEEP);
2365 
2366         /* Create the service and connection context lists */
2367         list_create(&idm.idm_tgt_svc_list, sizeof (idm_svc_t),
2368             offsetof(idm_svc_t, is_list_node));
2369         list_create(&idm.idm_tgt_conn_list, sizeof (idm_conn_t),
2370             offsetof(idm_conn_t, ic_list_node));
2371         list_create(&idm.idm_ini_conn_list, sizeof (idm_conn_t),
2372             offsetof(idm_conn_t, ic_list_node));
2373 
2374         /* Initialize the native sockets transport */
2375         idm_so_init(&idm_transport_list[IDM_TRANSPORT_TYPE_SOCKETS]);
2376 
2377         /* Create connection ID pool */
2378         (void) idm_idpool_create(&idm.idm_conn_id_pool);
2379 
2380         return (DDI_SUCCESS);
2381 }
2382 
2383 static int
2384 _idm_fini(void)
2385 {
2386         if (!list_is_empty(&idm.idm_ini_conn_list) ||
2387             !list_is_empty(&idm.idm_tgt_conn_list) ||
2388             !list_is_empty(&idm.idm_tgt_svc_list)) {
2389                 return (EBUSY);
2390         }
2391 
2392         mutex_enter(&idm.idm_global_mutex);
2393         idm.idm_wd_thread_running = B_FALSE;
2394         cv_signal(&idm.idm_wd_cv);
2395         mutex_exit(&idm.idm_global_mutex);
2396 
2397         thread_join(idm.idm_wd_thread_did);
2398 
2399         idm_idpool_destroy(&idm.idm_conn_id_pool);
2400 
2401         /* Close any LDI handles we have open on transport drivers */
2402         mutex_enter(&idm.idm_global_mutex);
2403         idm_transport_teardown();
2404         mutex_exit(&idm.idm_global_mutex);
2405 
2406         /* Teardown the native sockets transport */
2407         idm_so_fini();
2408 
2409         list_destroy(&idm.idm_ini_conn_list);
2410         list_destroy(&idm.idm_tgt_conn_list);
2411         list_destroy(&idm.idm_tgt_svc_list);
2412         kmem_cache_destroy(idm.idm_task_cache);
2413         kmem_cache_destroy(idm.idm_buf_cache);
2414         kmem_free(idm.idm_taskid_table,
2415             idm.idm_taskid_max * sizeof (idm_task_t *));
2416         mutex_destroy(&idm.idm_global_mutex);
2417         cv_destroy(&idm.idm_wd_cv);
2418         cv_destroy(&idm.idm_tgt_svc_cv);
2419         rw_destroy(&idm.idm_taskid_table_lock);
2420 
2421         return (0);
2422 }