1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
  24  */
  25 
  26 /*
  27  * I/O Controller functions for the Solaris COMSTAR SCSI RDMA Protocol
  28  * Target (SRPT) port provider.
  29  */
  30 
  31 #include <sys/types.h>
  32 #include <sys/ddi.h>
  33 #include <sys/types.h>
  34 #include <sys/sunddi.h>
  35 #include <sys/atomic.h>
  36 #include <sys/sysmacros.h>
  37 #include <sys/ib/ibtl/ibti.h>
  38 #include <sys/sdt.h>
  39 
  40 #include "srp.h"
  41 #include "srpt_impl.h"
  42 #include "srpt_ioc.h"
  43 #include "srpt_stp.h"
  44 #include "srpt_ch.h"
  45 #include "srpt_common.h"
  46 
  47 /*
  48  * srpt_ioc_srq_size - Tunable parameter that specifies the number
  49  * of receive WQ entries that can be posted to the IOC shared
  50  * receive queue.
  51  */
  52 uint32_t                srpt_ioc_srq_size = SRPT_DEFAULT_IOC_SRQ_SIZE;
  53 extern uint16_t         srpt_send_msg_depth;
  54 extern uint32_t         srpt_iu_size;
  55 extern boolean_t        srpt_enable_by_default;
  56 
  57 /* IOC profile capabilities mask must be big-endian */
  58 typedef struct srpt_ioc_opcap_bits_s {
  59 #if     defined(_BIT_FIELDS_LTOH)
  60         uint8_t         af:1,
  61                         at:1,
  62                         wf:1,
  63                         wt:1,
  64                         rf:1,
  65                         rt:1,
  66                         sf:1,
  67                         st:1;
  68 #elif   defined(_BIT_FIELDS_HTOL)
  69         uint8_t         st:1,
  70                         sf:1,
  71                         rt:1,
  72                         rf:1,
  73                         wt:1,
  74                         wf:1,
  75                         at:1,
  76                         af:1;
  77 #else
  78 #error  One of _BIT_FIELDS_LTOH or _BIT_FIELDS_HTOL must be defined
  79 #endif
  80 } srpt_ioc_opcap_bits_t;
  81 
  82 typedef union {
  83         srpt_ioc_opcap_bits_t   bits;
  84         uint8_t                 mask;
  85 } srpt_ioc_opcap_mask_t;
  86 
  87 /*
  88  * vmem arena variables - values derived from iSER
  89  */
  90 #define SRPT_MR_QUANTSIZE       0x400                   /* 1K */
  91 #define SRPT_MIN_CHUNKSIZE      0x100000                /* 1MB */
  92 
  93 /* use less memory on 32-bit kernels as it's much more constrained */
  94 #ifdef _LP64
  95 #define SRPT_BUF_MR_CHUNKSIZE   0x1000000               /* 16MB */
  96 #define SRPT_BUF_POOL_MAX       0x40000000              /* 1GB */
  97 #else
  98 #define SRPT_BUF_MR_CHUNKSIZE   0x400000                /* 4MB */
  99 #define SRPT_BUF_POOL_MAX       0x4000000               /* 64MB */
 100 #endif
 101 
 102 static ibt_mr_flags_t   srpt_dbuf_mr_flags =
 103     IBT_MR_ENABLE_LOCAL_WRITE | IBT_MR_ENABLE_REMOTE_WRITE |
 104     IBT_MR_ENABLE_REMOTE_READ;
 105 
 106 void srpt_ioc_ib_async_hdlr(void *clnt, ibt_hca_hdl_t hdl,
 107         ibt_async_code_t code, ibt_async_event_t *event);
 108 
 109 static struct ibt_clnt_modinfo_s srpt_ibt_modinfo = {
 110         IBTI_V_CURR,
 111         IBT_STORAGE_DEV,
 112         srpt_ioc_ib_async_hdlr,
 113         NULL,
 114         "srpt"
 115 };
 116 
 117 static srpt_ioc_t *srpt_ioc_init(ib_guid_t guid);
 118 static void srpt_ioc_fini(srpt_ioc_t *ioc);
 119 static boolean_t srpt_check_hca_cfg_enabled(ib_guid_t hca_guid);
 120 
 121 static srpt_vmem_pool_t *srpt_vmem_create(const char *name, srpt_ioc_t *ioc,
 122     ib_memlen_t chunksize, uint64_t maxsize, ibt_mr_flags_t flags);
 123 static void *srpt_vmem_alloc(srpt_vmem_pool_t *vm_pool, size_t size);
 124 static int srpt_vmem_mr_compare(const void *a, const void *b);
 125 static srpt_mr_t *srpt_vmem_chunk_alloc(srpt_vmem_pool_t *ioc,
 126     ib_memlen_t chunksize);
 127 static void srpt_vmem_destroy(srpt_vmem_pool_t *vm_pool);
 128 static void srpt_vmem_free(srpt_vmem_pool_t *vm_pool, void *vaddr, size_t size);
 129 static srpt_mr_t *srpt_reg_mem(srpt_vmem_pool_t *vm_pool, ib_vaddr_t vaddr,
 130     ib_memlen_t len);
 131 static void srpt_vmem_chunk_free(srpt_vmem_pool_t *vm_pool, srpt_mr_t *mr);
 132 static void srpt_dereg_mem(srpt_ioc_t *ioc, srpt_mr_t *mr);
 133 static int srpt_vmem_mr(srpt_vmem_pool_t *vm_pool, void *vaddr, size_t size,
 134     srpt_mr_t *mr);
 135 
 136 /*
 137  * srpt_ioc_attach() - I/O Controller attach
 138  *
 139  * Attach to IBTF and initialize I/O controllers. The srpt_ctxt->sc_rwlock
 140  * should be held outside of this call.
 141  */
 142 int
 143 srpt_ioc_attach()
 144 {
 145         int             status;
 146         int             hca_cnt;
 147         int             hca_ndx;
 148         ib_guid_t       *guid;
 149 
 150         ASSERT(srpt_ctxt != NULL);
 151 
 152         /*
 153          * Attach to IBTF and initialize a list of IB devices.  Each
 154          * HCA will be represented by an I/O Controller.
 155          */
 156         status = ibt_attach(&srpt_ibt_modinfo, srpt_ctxt->sc_dip,
 157             srpt_ctxt,  &srpt_ctxt->sc_ibt_hdl);
 158         if (status != DDI_SUCCESS) {
 159                 SRPT_DPRINTF_L1("ioc_attach, ibt_attach failed (0x%x)",
 160                     status);
 161                 return (DDI_FAILURE);
 162         }
 163 
 164         hca_cnt = ibt_get_hca_list(&guid);
 165         if (hca_cnt < 1) {
 166                 /*
 167                  * not a fatal error.  Service will be up and
 168                  * waiting for ATTACH events.
 169                  */
 170                 SRPT_DPRINTF_L2("ioc_attach, no HCA found");
 171                 return (DDI_SUCCESS);
 172         }
 173 
 174         for (hca_ndx = 0; hca_ndx < hca_cnt; hca_ndx++) {
 175                 SRPT_DPRINTF_L2("ioc_attach, attaching HCA %016llx",
 176                     (u_longlong_t)guid[hca_ndx]);
 177                 srpt_ioc_attach_hca(guid[hca_ndx], B_FALSE);
 178         }
 179 
 180         ibt_free_hca_list(guid, hca_cnt);
 181         SRPT_DPRINTF_L3("ioc_attach, added %d I/O Controller(s)",
 182             srpt_ctxt->sc_num_iocs);
 183         return (DDI_SUCCESS);
 184 }
 185 
 186 /*
 187  * Initialize I/O Controllers.  sprt_ctxt->sc_rwlock must be locked by the
 188  * caller.
 189  *
 190  * 'checked' indicates no need to lookup the hca in the HCA configuration
 191  * list.
 192  */
 193 void
 194 srpt_ioc_attach_hca(ib_guid_t hca_guid, boolean_t checked)
 195 {
 196         boolean_t       enable_hca = B_TRUE;
 197         srpt_ioc_t      *ioc;
 198 
 199         if (!checked) {
 200                 enable_hca = srpt_check_hca_cfg_enabled(hca_guid);
 201 
 202                 if (!enable_hca) {
 203                         /* nothing to do */
 204                         SRPT_DPRINTF_L2(
 205                             "ioc_attach_hca, HCA %016llx disabled "
 206                             "by srpt config",
 207                             (u_longlong_t)hca_guid);
 208                         return;
 209                 }
 210         }
 211 
 212         SRPT_DPRINTF_L2("ioc_attach_hca, adding I/O"
 213             " Controller (%016llx)", (u_longlong_t)hca_guid);
 214 
 215         ioc = srpt_ioc_init(hca_guid);
 216         if (ioc == NULL) {
 217                 /*
 218                  * IOC already exists or an error occurred.  Already
 219                  * logged by srpt_ioc_init()
 220                  */
 221                 return;
 222         }
 223 
 224         /*
 225          * Create the COMSTAR SRP Target for this IOC.  If this fails,
 226          * remove the IOC.
 227          */
 228         rw_enter(&ioc->ioc_rwlock, RW_WRITER);
 229         ioc->ioc_tgt_port = srpt_stp_alloc_port(ioc, ioc->ioc_guid);
 230         if (ioc->ioc_tgt_port == NULL) {
 231                 SRPT_DPRINTF_L1("ioc_attach_hca: alloc SCSI"
 232                     " Target Port error on GUID(%016llx)",
 233                     (u_longlong_t)ioc->ioc_guid);
 234                 rw_exit(&ioc->ioc_rwlock);
 235                 srpt_ioc_fini(ioc);
 236                 return;
 237         }
 238         rw_exit(&ioc->ioc_rwlock);
 239 
 240         /*
 241          * New HCA added with default SCSI Target Port, SRP service
 242          * will be started when SCSI Target Port is brought
 243          * on-line by STMF.
 244          */
 245         list_insert_tail(&srpt_ctxt->sc_ioc_list, ioc);
 246         SRPT_DPRINTF_L2("ioc_attach_hca, I/O Controller ibt HCA hdl (%p)",
 247             (void *)ioc->ioc_ibt_hdl);
 248 
 249         srpt_ctxt->sc_num_iocs++;
 250 }
 251 
 252 /*
 253  * srpt_check_hca_cfg_enabled()
 254  *
 255  * Function to check the configuration for the enabled status of a given
 256  * HCA.  Returns B_TRUE if SRPT services should be activated for this HCA,
 257  * B_FALSE if it should be disabled.
 258  */
 259 static boolean_t
 260 srpt_check_hca_cfg_enabled(ib_guid_t hca_guid)
 261 {
 262         int             status;
 263         char            buf[32];
 264         nvlist_t        *hcanv;
 265         boolean_t       enable_hca;
 266 
 267         enable_hca = srpt_enable_by_default;
 268 
 269         SRPT_FORMAT_HCAKEY(buf, sizeof (buf), (u_longlong_t)hca_guid);
 270 
 271         if (srpt_ctxt->sc_cfg_hca_nv != NULL) {
 272                 status = nvlist_lookup_nvlist(srpt_ctxt->sc_cfg_hca_nv,
 273                     buf, &hcanv);
 274                 if (status == 0) {
 275                         SRPT_DPRINTF_L3("check_hca_cfg, found guid %s",  buf);
 276                         (void) nvlist_lookup_boolean_value(hcanv,
 277                             SRPT_PROP_ENABLED, &enable_hca);
 278                 } else {
 279                         SRPT_DPRINTF_L3("check_hca_cfg, did not find guid %s",
 280                             buf);
 281                 }
 282         }
 283 
 284         return (enable_hca);
 285 }
 286 
 287 /*
 288  * srpt_ioc_update()
 289  *
 290  * Using the configuration nvlist, enables or disables SRP services
 291  * the provided HCAs.  srpt_ctxt->sc_rwlock should be held outside of this call.
 292  */
 293 void
 294 srpt_ioc_update(void)
 295 {
 296         boolean_t       enabled;
 297         nvpair_t        *nvp = NULL;
 298         uint64_t        hca_guid;
 299         nvlist_t        *nvl;
 300         nvlist_t        *cfg = srpt_ctxt->sc_cfg_hca_nv;
 301 
 302         if (cfg == NULL) {
 303                 SRPT_DPRINTF_L2("ioc_update, no configuration data");
 304                 return;
 305         }
 306 
 307         while ((nvp = nvlist_next_nvpair(cfg, nvp)) != NULL) {
 308                 enabled = srpt_enable_by_default;
 309 
 310                 if ((nvpair_value_nvlist(nvp, &nvl)) != 0) {
 311                         SRPT_DPRINTF_L2("ioc_update, did not find an nvlist");
 312                         continue;
 313                 }
 314 
 315                 if ((nvlist_lookup_uint64(nvl, SRPT_PROP_GUID, &hca_guid))
 316                     != 0) {
 317                         SRPT_DPRINTF_L2("ioc_update, did not find a guid");
 318                         continue;
 319                 }
 320 
 321                 (void) nvlist_lookup_boolean_value(nvl, SRPT_PROP_ENABLED,
 322                     &enabled);
 323 
 324                 if (enabled) {
 325                         SRPT_DPRINTF_L2("ioc_update, enabling guid %016llx",
 326                             (u_longlong_t)hca_guid);
 327                         srpt_ioc_attach_hca(hca_guid, B_TRUE);
 328                 } else {
 329                         SRPT_DPRINTF_L2("ioc_update, disabling guid %016llx",
 330                             (u_longlong_t)hca_guid);
 331                         srpt_ioc_detach_hca(hca_guid);
 332                 }
 333         }
 334 }
 335 
 336 /*
 337  * srpt_ioc_detach() - I/O Controller detach
 338  *
 339  * srpt_ctxt->sc_rwlock should be held outside of this call.
 340  */
 341 void
 342 srpt_ioc_detach()
 343 {
 344         srpt_ioc_t      *ioc;
 345 
 346         /*
 347          * All SRP targets must be destroyed before calling this
 348          * function.
 349          */
 350         while ((ioc = list_head(&srpt_ctxt->sc_ioc_list)) != NULL) {
 351                 SRPT_DPRINTF_L2("ioc_detach, removing I/O Controller(%p)"
 352                     " (%016llx), ibt_hdl(%p)",
 353                     (void *)ioc,
 354                     ioc ? (u_longlong_t)ioc->ioc_guid : 0x0ll,
 355                     (void *)ioc->ioc_ibt_hdl);
 356 
 357                 list_remove(&srpt_ctxt->sc_ioc_list, ioc);
 358                 srpt_ioc_fini(ioc);
 359                 srpt_ctxt->sc_num_iocs--;
 360         }
 361 
 362         srpt_ctxt->sc_ibt_hdl = NULL;
 363 }
 364 
 365 /*
 366  * srpt_ioc_detach_hca()
 367  *
 368  * Stop SRP Target services on this HCA
 369  *
 370  * Note that this is not entirely synchronous with srpt_ioc_attach_hca()
 371  * in that we don't need to check the configuration to know whether to
 372  * disable an HCA.  We get here either because the IB framework has told
 373  * us the HCA has been detached, or because the administrator has explicitly
 374  * disabled this HCA.
 375  *
 376  * Must be called with srpt_ctxt->sc_rwlock locked as RW_WRITER.
 377  */
 378 void
 379 srpt_ioc_detach_hca(ib_guid_t hca_guid)
 380 {
 381         srpt_ioc_t              *ioc;
 382         srpt_target_port_t      *tgt;
 383         stmf_status_t           stmf_status = STMF_SUCCESS;
 384 
 385         ioc = srpt_ioc_get_locked(hca_guid);
 386         if (ioc == NULL) {
 387                 /* doesn't exist, nothing to do */
 388                 return;
 389         }
 390 
 391         rw_enter(&ioc->ioc_rwlock, RW_WRITER);
 392         tgt = ioc->ioc_tgt_port;
 393 
 394         if (tgt != NULL) {
 395                 stmf_status = srpt_stp_destroy_port(tgt);
 396                 if (stmf_status == STMF_SUCCESS) {
 397                         ioc->ioc_tgt_port = NULL;
 398                         (void) srpt_stp_free_port(tgt);
 399                 }
 400         }
 401 
 402         rw_exit(&ioc->ioc_rwlock);
 403 
 404         if (stmf_status != STMF_SUCCESS) {
 405                 /* should never happen */
 406                 return;
 407         }
 408 
 409         list_remove(&srpt_ctxt->sc_ioc_list, ioc);
 410         srpt_ctxt->sc_num_iocs--;
 411 
 412         srpt_ioc_fini(ioc);
 413         SRPT_DPRINTF_L2("ioc_detach_hca, HCA %016llx detached",
 414             (u_longlong_t)hca_guid);
 415 }
 416 
 417 /*
 418  * srpt_ioc_init() - I/O Controller initialization
 419  *
 420  * Requires srpt_ctxt->rw_lock be held outside of call.
 421  */
 422 static srpt_ioc_t *
 423 srpt_ioc_init(ib_guid_t guid)
 424 {
 425         ibt_status_t            status;
 426         srpt_ioc_t              *ioc;
 427         ibt_hca_attr_t          hca_attr;
 428         uint_t                  iu_ndx;
 429         uint_t                  err_ndx;
 430         ibt_mr_attr_t           mr_attr;
 431         ibt_mr_desc_t           mr_desc;
 432         srpt_iu_t               *iu;
 433         ibt_srq_sizes_t         srq_attr;
 434         char                    namebuf[32];
 435         size_t                  iu_offset;
 436         uint_t                  srq_sz;
 437 
 438         status = ibt_query_hca_byguid(guid, &hca_attr);
 439         if (status != IBT_SUCCESS) {
 440                 SRPT_DPRINTF_L1("ioc_init, HCA query error (%d)",
 441                     status);
 442                 return (NULL);
 443         }
 444 
 445         ioc = srpt_ioc_get_locked(guid);
 446         if (ioc != NULL) {
 447                 SRPT_DPRINTF_L2("ioc_init, HCA already exists");
 448                 return (NULL);
 449         }
 450 
 451         ioc = kmem_zalloc(sizeof (srpt_ioc_t), KM_SLEEP);
 452 
 453         rw_init(&ioc->ioc_rwlock, NULL, RW_DRIVER, NULL);
 454         rw_enter(&ioc->ioc_rwlock, RW_WRITER);
 455 
 456         bcopy(&hca_attr, &ioc->ioc_attr, sizeof (ibt_hca_attr_t));
 457 
 458         SRPT_DPRINTF_L2("ioc_init, HCA max mr=%d, mrlen=%lld",
 459             hca_attr.hca_max_memr, (u_longlong_t)hca_attr.hca_max_memr_len);
 460         ioc->ioc_guid   = guid;
 461 
 462         status = ibt_open_hca(srpt_ctxt->sc_ibt_hdl, guid, &ioc->ioc_ibt_hdl);
 463         if (status != IBT_SUCCESS) {
 464                 SRPT_DPRINTF_L1("ioc_init, IBT open failed (%d)", status);
 465                 goto hca_open_err;
 466         }
 467 
 468         status = ibt_alloc_pd(ioc->ioc_ibt_hdl, IBT_PD_NO_FLAGS,
 469             &ioc->ioc_pd_hdl);
 470         if (status != IBT_SUCCESS) {
 471                 SRPT_DPRINTF_L1("ioc_init, IBT create PD failed (%d)", status);
 472                 goto pd_alloc_err;
 473         }
 474 
 475         /*
 476          * We require hardware support for SRQs.  We use a common SRQ to
 477          * reduce channel memory consumption.
 478          */
 479         if ((ioc->ioc_attr.hca_flags & IBT_HCA_SRQ) == 0) {
 480                 SRPT_DPRINTF_L0(
 481                     "ioc_init, no SRQ capability, HCA not supported");
 482                 goto srq_alloc_err;
 483         }
 484 
 485         SRPT_DPRINTF_L3("ioc_init, Using shared receive queues, max srq work"
 486             " queue size(%d), def size = %d", ioc->ioc_attr.hca_max_srqs_sz,
 487             srpt_ioc_srq_size);
 488         srq_sz = srq_attr.srq_wr_sz = min(srpt_ioc_srq_size,
 489             ioc->ioc_attr.hca_max_srqs_sz) - 1;
 490         srq_attr.srq_sgl_sz = 1;
 491 
 492         status = ibt_alloc_srq(ioc->ioc_ibt_hdl, IBT_SRQ_NO_FLAGS,
 493             ioc->ioc_pd_hdl, &srq_attr, &ioc->ioc_srq_hdl,
 494             &ioc->ioc_srq_attr);
 495         if (status != IBT_SUCCESS) {
 496                 SRPT_DPRINTF_L1("ioc_init, IBT create SRQ failed(%d)", status);
 497                 goto srq_alloc_err;
 498         }
 499 
 500         SRPT_DPRINTF_L2("ioc_init, Using SRQ size(%d), MAX SG size(%d)",
 501             srq_sz, 1);
 502 
 503         ibt_set_srq_private(ioc->ioc_srq_hdl, ioc);
 504 
 505         /*
 506          * Allocate a pool of SRP IU message buffers and post them to
 507          * the I/O Controller SRQ.  We let the SRQ manage the free IU
 508          * messages.
 509          */
 510         ioc->ioc_num_iu_entries = srq_sz;
 511 
 512         ioc->ioc_iu_pool = kmem_zalloc(sizeof (srpt_iu_t) *
 513             ioc->ioc_num_iu_entries, KM_SLEEP);
 514 
 515         ioc->ioc_iu_bufs = kmem_alloc(srpt_iu_size *
 516             ioc->ioc_num_iu_entries, KM_SLEEP);
 517 
 518         if ((ioc->ioc_iu_pool == NULL) || (ioc->ioc_iu_bufs == NULL)) {
 519                 SRPT_DPRINTF_L1("ioc_init, failed to allocate SRQ IUs");
 520                 goto srq_iu_alloc_err;
 521         }
 522 
 523         mr_attr.mr_vaddr = (ib_vaddr_t)(uintptr_t)ioc->ioc_iu_bufs;
 524         mr_attr.mr_len   = srpt_iu_size * ioc->ioc_num_iu_entries;
 525         mr_attr.mr_as    = NULL;
 526         mr_attr.mr_flags = IBT_MR_ENABLE_LOCAL_WRITE;
 527 
 528         status = ibt_register_mr(ioc->ioc_ibt_hdl, ioc->ioc_pd_hdl,
 529             &mr_attr, &ioc->ioc_iu_mr_hdl, &mr_desc);
 530         if (status != IBT_SUCCESS) {
 531                 SRPT_DPRINTF_L1("ioc_init, IU buffer pool MR err(%d)",
 532                     status);
 533                 goto srq_iu_alloc_err;
 534         }
 535 
 536         for (iu_ndx = 0, iu = ioc->ioc_iu_pool; iu_ndx <
 537             ioc->ioc_num_iu_entries; iu_ndx++, iu++) {
 538 
 539                 iu_offset = (iu_ndx * srpt_iu_size);
 540                 iu->iu_buf = (void *)((uintptr_t)ioc->ioc_iu_bufs + iu_offset);
 541 
 542                 mutex_init(&iu->iu_lock, NULL, MUTEX_DRIVER, NULL);
 543 
 544                 iu->iu_sge.ds_va  = mr_desc.md_vaddr + iu_offset;
 545                 iu->iu_sge.ds_key = mr_desc.md_lkey;
 546                 iu->iu_sge.ds_len = srpt_iu_size;
 547                 iu->iu_ioc     = ioc;
 548                 iu->iu_pool_ndx   = iu_ndx;
 549 
 550                 status = srpt_ioc_post_recv_iu(ioc, &ioc->ioc_iu_pool[iu_ndx]);
 551                 if (status != IBT_SUCCESS) {
 552                         SRPT_DPRINTF_L1("ioc_init, SRQ IU post err(%d)",
 553                             status);
 554                         goto srq_iu_post_err;
 555                 }
 556         }
 557 
 558         /*
 559          * Initialize the dbuf vmem arena
 560          */
 561         (void) snprintf(namebuf, sizeof (namebuf),
 562             "srpt_buf_pool_%16llX", (u_longlong_t)guid);
 563         ioc->ioc_dbuf_pool = srpt_vmem_create(namebuf, ioc,
 564             SRPT_BUF_MR_CHUNKSIZE, SRPT_BUF_POOL_MAX, srpt_dbuf_mr_flags);
 565 
 566         if (ioc->ioc_dbuf_pool == NULL) {
 567                 goto stmf_db_alloc_err;
 568         }
 569 
 570         /*
 571          * Allocate the I/O Controller STMF data buffer allocator.  The
 572          * data store will span all targets associated with this IOC.
 573          */
 574         ioc->ioc_stmf_ds = stmf_alloc(STMF_STRUCT_DBUF_STORE, 0, 0);
 575         if (ioc->ioc_stmf_ds == NULL) {
 576                 SRPT_DPRINTF_L1("ioc_attach, STMF DBUF alloc failure for IOC");
 577                 goto stmf_db_alloc_err;
 578         }
 579         ioc->ioc_stmf_ds->ds_alloc_data_buf = &srpt_ioc_ds_alloc_dbuf;
 580         ioc->ioc_stmf_ds->ds_free_data_buf  = &srpt_ioc_ds_free_dbuf;
 581         ioc->ioc_stmf_ds->ds_port_private   = ioc;
 582 
 583         rw_exit(&ioc->ioc_rwlock);
 584         return (ioc);
 585 
 586 stmf_db_alloc_err:
 587         if (ioc->ioc_dbuf_pool != NULL) {
 588                 srpt_vmem_destroy(ioc->ioc_dbuf_pool);
 589         }
 590 
 591 srq_iu_post_err:
 592         if (ioc->ioc_iu_mr_hdl != NULL) {
 593                 status = ibt_deregister_mr(ioc->ioc_ibt_hdl,
 594                     ioc->ioc_iu_mr_hdl);
 595                 if (status != IBT_SUCCESS) {
 596                         SRPT_DPRINTF_L1("ioc_init, error deregistering"
 597                             " memory region (%d)", status);
 598                 }
 599         }
 600         for (err_ndx = 0, iu = ioc->ioc_iu_pool; err_ndx < iu_ndx;
 601             err_ndx++, iu++) {
 602                 mutex_destroy(&iu->iu_lock);
 603         }
 604 
 605 srq_iu_alloc_err:
 606         if (ioc->ioc_iu_bufs != NULL) {
 607                 kmem_free(ioc->ioc_iu_bufs, srpt_iu_size *
 608                     ioc->ioc_num_iu_entries);
 609         }
 610         if (ioc->ioc_iu_pool != NULL) {
 611                 kmem_free(ioc->ioc_iu_pool,
 612                     sizeof (srpt_iu_t) * ioc->ioc_num_iu_entries);
 613         }
 614         if (ioc->ioc_srq_hdl != NULL) {
 615                 status = ibt_free_srq(ioc->ioc_srq_hdl);
 616                 if (status != IBT_SUCCESS) {
 617                         SRPT_DPRINTF_L1("ioc_init, error freeing SRQ (%d)",
 618                             status);
 619                 }
 620 
 621         }
 622 
 623 srq_alloc_err:
 624         status = ibt_free_pd(ioc->ioc_ibt_hdl, ioc->ioc_pd_hdl);
 625         if (status != IBT_SUCCESS) {
 626                 SRPT_DPRINTF_L1("ioc_init, free PD error (%d)", status);
 627         }
 628 
 629 pd_alloc_err:
 630         status = ibt_close_hca(ioc->ioc_ibt_hdl);
 631         if (status != IBT_SUCCESS) {
 632                 SRPT_DPRINTF_L1("ioc_init, close ioc error (%d)", status);
 633         }
 634 
 635 hca_open_err:
 636         rw_exit(&ioc->ioc_rwlock);
 637         rw_destroy(&ioc->ioc_rwlock);
 638         kmem_free(ioc, sizeof (*ioc));
 639         return (NULL);
 640 }
 641 
 642 /*
 643  * srpt_ioc_fini() - I/O Controller Cleanup
 644  *
 645  * Requires srpt_ctxt->sc_rwlock be held outside of call.
 646  */
 647 static void
 648 srpt_ioc_fini(srpt_ioc_t *ioc)
 649 {
 650         int             status;
 651         int             ndx;
 652 
 653         /*
 654          * Note driver flows will have already taken all SRP
 655          * services running on the I/O Controller off-line.
 656          */
 657         ASSERT(ioc->ioc_tgt_port == NULL);
 658         rw_enter(&ioc->ioc_rwlock, RW_WRITER);
 659         if (ioc->ioc_ibt_hdl != NULL) {
 660                 if (ioc->ioc_stmf_ds != NULL) {
 661                         stmf_free(ioc->ioc_stmf_ds);
 662                 }
 663 
 664                 if (ioc->ioc_srq_hdl != NULL) {
 665                         SRPT_DPRINTF_L4("ioc_fini, freeing SRQ");
 666                         status = ibt_free_srq(ioc->ioc_srq_hdl);
 667                         if (status != IBT_SUCCESS) {
 668                                 SRPT_DPRINTF_L1("ioc_fini, free SRQ"
 669                                     " error (%d)", status);
 670                         }
 671                 }
 672 
 673                 if (ioc->ioc_iu_mr_hdl != NULL) {
 674                         status = ibt_deregister_mr(
 675                             ioc->ioc_ibt_hdl, ioc->ioc_iu_mr_hdl);
 676                         if (status != IBT_SUCCESS) {
 677                                 SRPT_DPRINTF_L1("ioc_fini, error deregistering"
 678                                     " memory region (%d)", status);
 679                         }
 680                 }
 681 
 682                 if (ioc->ioc_iu_bufs != NULL) {
 683                         kmem_free(ioc->ioc_iu_bufs, srpt_iu_size *
 684                             ioc->ioc_num_iu_entries);
 685                 }
 686 
 687                 if (ioc->ioc_iu_pool != NULL) {
 688                         SRPT_DPRINTF_L4("ioc_fini, freeing IU entries");
 689                         for (ndx = 0; ndx < ioc->ioc_num_iu_entries; ndx++) {
 690                                 mutex_destroy(&ioc->ioc_iu_pool[ndx].iu_lock);
 691                         }
 692 
 693                         SRPT_DPRINTF_L4("ioc_fini, free IU pool struct");
 694                         kmem_free(ioc->ioc_iu_pool,
 695                             sizeof (srpt_iu_t) * (ioc->ioc_num_iu_entries));
 696                         ioc->ioc_iu_pool = NULL;
 697                         ioc->ioc_num_iu_entries = 0;
 698                 }
 699 
 700                 if (ioc->ioc_dbuf_pool != NULL) {
 701                         srpt_vmem_destroy(ioc->ioc_dbuf_pool);
 702                 }
 703 
 704                 if (ioc->ioc_pd_hdl != NULL) {
 705                         status = ibt_free_pd(ioc->ioc_ibt_hdl,
 706                             ioc->ioc_pd_hdl);
 707                         if (status != IBT_SUCCESS) {
 708                                 SRPT_DPRINTF_L1("ioc_fini, free PD"
 709                                     " error (%d)", status);
 710                         }
 711                 }
 712 
 713                 status = ibt_close_hca(ioc->ioc_ibt_hdl);
 714                 if (status != IBT_SUCCESS) {
 715                         SRPT_DPRINTF_L1(
 716                             "ioc_fini, close ioc error (%d)", status);
 717                 }
 718         }
 719         rw_exit(&ioc->ioc_rwlock);
 720         rw_destroy(&ioc->ioc_rwlock);
 721         kmem_free(ioc, sizeof (srpt_ioc_t));
 722 }
 723 
 724 /*
 725  * srpt_ioc_port_active() - I/O Controller port active
 726  */
 727 static void
 728 srpt_ioc_port_active(ibt_async_event_t *event)
 729 {
 730         ibt_status_t            status;
 731         srpt_ioc_t              *ioc;
 732         srpt_target_port_t      *tgt = NULL;
 733         boolean_t               online_target = B_FALSE;
 734         stmf_change_status_t    cstatus;
 735 
 736         ASSERT(event != NULL);
 737 
 738         SRPT_DPRINTF_L3("ioc_port_active event handler, invoked");
 739 
 740         /*
 741          * Find the HCA in question and if the HCA has completed
 742          * initialization, and the SRP Target service for the
 743          * the I/O Controller exists, then bind this port.
 744          */
 745         ioc = srpt_ioc_get(event->ev_hca_guid);
 746 
 747         if (ioc == NULL) {
 748                 SRPT_DPRINTF_L2("ioc_port_active, I/O Controller not"
 749                     " active");
 750                 return;
 751         }
 752 
 753         tgt = ioc->ioc_tgt_port;
 754         if (tgt == NULL) {
 755                 SRPT_DPRINTF_L2("ioc_port_active, no I/O Controller target"
 756                     " undefined");
 757                 return;
 758         }
 759 
 760 
 761         /*
 762          * We take the target lock here to serialize this operation
 763          * with any STMF initiated target state transitions.  If
 764          * SRP is off-line then the service handle is NULL.
 765          */
 766         mutex_enter(&tgt->tp_lock);
 767 
 768         if (tgt->tp_ibt_svc_hdl != NULL) {
 769                 status = srpt_ioc_svc_bind(tgt, event->ev_port);
 770                 if ((status != IBT_SUCCESS) &&
 771                     (status != IBT_HCA_PORT_NOT_ACTIVE)) {
 772                         SRPT_DPRINTF_L1("ioc_port_active, bind failed (%d)",
 773                             status);
 774                 }
 775         } else {
 776                 /* if we were offline because of no ports, try onlining now */
 777                 if ((tgt->tp_num_active_ports == 0) &&
 778                     (tgt->tp_requested_state != tgt->tp_state) &&
 779                     (tgt->tp_requested_state == SRPT_TGT_STATE_ONLINE)) {
 780                         online_target = B_TRUE;
 781                         cstatus.st_completion_status = STMF_SUCCESS;
 782                         cstatus.st_additional_info = "port active";
 783                 }
 784         }
 785 
 786         mutex_exit(&tgt->tp_lock);
 787 
 788         if (online_target) {
 789                 stmf_status_t   ret;
 790 
 791                 ret = stmf_ctl(STMF_CMD_LPORT_ONLINE, tgt->tp_lport, &cstatus);
 792 
 793                 if (ret == STMF_SUCCESS) {
 794                         SRPT_DPRINTF_L1("ioc_port_active, port %d active, "
 795                             "target %016llx online requested", event->ev_port,
 796                             (u_longlong_t)ioc->ioc_guid);
 797                 } else if (ret != STMF_ALREADY) {
 798                         SRPT_DPRINTF_L1("ioc_port_active, port %d active, "
 799                             "target %016llx failed online request: %d",
 800                             event->ev_port, (u_longlong_t)ioc->ioc_guid,
 801                             (int)ret);
 802                 }
 803         }
 804 }
 805 
 806 /*
 807  * srpt_ioc_port_down()
 808  */
 809 static void
 810 srpt_ioc_port_down(ibt_async_event_t *event)
 811 {
 812         srpt_ioc_t              *ioc;
 813         srpt_target_port_t      *tgt;
 814         srpt_channel_t          *ch;
 815         srpt_channel_t          *next_ch;
 816         boolean_t               offline_target = B_FALSE;
 817         stmf_change_status_t    cstatus;
 818 
 819         SRPT_DPRINTF_L3("ioc_port_down event handler, invoked");
 820 
 821         /*
 822          * Find the HCA in question and if the HCA has completed
 823          * initialization, and the SRP Target service for the
 824          * the I/O Controller exists, then logout initiators
 825          * through this port.
 826          */
 827         ioc = srpt_ioc_get(event->ev_hca_guid);
 828 
 829         if (ioc == NULL) {
 830                 SRPT_DPRINTF_L2("ioc_port_down, I/O Controller not"
 831                     " active");
 832                 return;
 833         }
 834 
 835         /*
 836          * We only have one target now, but we could go through all
 837          * SCSI target ports if more are added.
 838          */
 839         tgt = ioc->ioc_tgt_port;
 840         if (tgt == NULL) {
 841                 SRPT_DPRINTF_L2("ioc_port_down, no I/O Controller target"
 842                     " undefined");
 843                 return;
 844         }
 845         mutex_enter(&tgt->tp_lock);
 846 
 847         /*
 848          * For all channel's logged in through this port, initiate a
 849          * disconnect.
 850          */
 851         mutex_enter(&tgt->tp_ch_list_lock);
 852         ch = list_head(&tgt->tp_ch_list);
 853         while (ch != NULL) {
 854                 next_ch = list_next(&tgt->tp_ch_list, ch);
 855                 if (ch->ch_session && (ch->ch_session->ss_hw_port ==
 856                     event->ev_port)) {
 857                         srpt_ch_disconnect(ch);
 858                 }
 859                 ch = next_ch;
 860         }
 861         mutex_exit(&tgt->tp_ch_list_lock);
 862 
 863         tgt->tp_num_active_ports--;
 864 
 865         /* if we have no active ports, take the target offline */
 866         if ((tgt->tp_num_active_ports == 0) &&
 867             (tgt->tp_state == SRPT_TGT_STATE_ONLINE)) {
 868                 cstatus.st_completion_status = STMF_SUCCESS;
 869                 cstatus.st_additional_info = "no ports active";
 870                 offline_target = B_TRUE;
 871         }
 872 
 873         mutex_exit(&tgt->tp_lock);
 874 
 875         if (offline_target) {
 876                 stmf_status_t   ret;
 877 
 878                 ret = stmf_ctl(STMF_CMD_LPORT_OFFLINE, tgt->tp_lport, &cstatus);
 879 
 880                 if (ret == STMF_SUCCESS) {
 881                         SRPT_DPRINTF_L1("ioc_port_down, port %d down, target "
 882                             "%016llx offline requested", event->ev_port,
 883                             (u_longlong_t)ioc->ioc_guid);
 884                 } else if (ret != STMF_ALREADY) {
 885                         SRPT_DPRINTF_L1("ioc_port_down, port %d down, target "
 886                             "%016llx failed offline request: %d",
 887                             event->ev_port,
 888                             (u_longlong_t)ioc->ioc_guid, (int)ret);
 889                 }
 890         }
 891 }
 892 
 893 /*
 894  * srpt_ioc_ib_async_hdlr - I/O Controller IB asynchronous events
 895  */
 896 /* ARGSUSED */
 897 void
 898 srpt_ioc_ib_async_hdlr(void *clnt, ibt_hca_hdl_t hdl,
 899         ibt_async_code_t code, ibt_async_event_t *event)
 900 {
 901         srpt_channel_t          *ch;
 902 
 903         switch (code) {
 904         case IBT_EVENT_PORT_UP:
 905                 srpt_ioc_port_active(event);
 906                 break;
 907 
 908         case IBT_ERROR_PORT_DOWN:
 909                 srpt_ioc_port_down(event);
 910                 break;
 911 
 912         case IBT_HCA_ATTACH_EVENT:
 913                 SRPT_DPRINTF_L2(
 914                     "ib_async_hdlr, received attach event for HCA 0x%016llx",
 915                     (u_longlong_t)event->ev_hca_guid);
 916 
 917                 rw_enter(&srpt_ctxt->sc_rwlock, RW_WRITER);
 918                 srpt_ioc_attach_hca(event->ev_hca_guid, B_FALSE);
 919                 rw_exit(&srpt_ctxt->sc_rwlock);
 920 
 921                 break;
 922 
 923         case IBT_HCA_DETACH_EVENT:
 924                 SRPT_DPRINTF_L1(
 925                     "ioc_iob_async_hdlr, received HCA_DETACH_EVENT for "
 926                     "HCA 0x%016llx",
 927                     (u_longlong_t)event->ev_hca_guid);
 928 
 929                 rw_enter(&srpt_ctxt->sc_rwlock, RW_WRITER);
 930                 srpt_ioc_detach_hca(event->ev_hca_guid);
 931                 rw_exit(&srpt_ctxt->sc_rwlock);
 932 
 933                 break;
 934 
 935         case IBT_EVENT_EMPTY_CHAN:
 936                 /* Channel in ERROR state is now empty */
 937                 ch = (srpt_channel_t *)ibt_get_chan_private(event->ev_chan_hdl);
 938                 SRPT_DPRINTF_L3(
 939                     "ioc_iob_async_hdlr, received empty channel error on %p",
 940                     (void *)ch);
 941                 break;
 942 
 943         default:
 944                 SRPT_DPRINTF_L2("ioc_ib_async_hdlr, event not "
 945                     "handled (%d)", code);
 946                 break;
 947         }
 948 }
 949 
 950 /*
 951  * srpt_ioc_svc_bind()
 952  */
 953 ibt_status_t
 954 srpt_ioc_svc_bind(srpt_target_port_t *tgt, uint_t portnum)
 955 {
 956         ibt_status_t            status;
 957         srpt_hw_port_t          *port;
 958         ibt_hca_portinfo_t      *portinfo;
 959         uint_t                  qportinfo_sz;
 960         uint_t                  qportnum;
 961         ib_gid_t                new_gid;
 962         srpt_ioc_t              *ioc;
 963         srpt_session_t          sess;
 964 
 965         ASSERT(tgt != NULL);
 966         ASSERT(tgt->tp_ioc != NULL);
 967         ioc = tgt->tp_ioc;
 968 
 969         if (tgt->tp_ibt_svc_hdl == NULL) {
 970                 SRPT_DPRINTF_L2("ioc_svc_bind, NULL SCSI target port"
 971                     " service");
 972                 return (IBT_INVALID_PARAM);
 973         }
 974 
 975         if (portnum == 0 || portnum > tgt->tp_nports) {
 976                 SRPT_DPRINTF_L2("ioc_svc_bind, bad port (%d)", portnum);
 977                 return (IBT_INVALID_PARAM);
 978         }
 979         status = ibt_query_hca_ports(ioc->ioc_ibt_hdl, portnum,
 980             &portinfo, &qportnum, &qportinfo_sz);
 981         if (status != IBT_SUCCESS) {
 982                 SRPT_DPRINTF_L1("ioc_svc_bind, query port error (%d)",
 983                     portnum);
 984                 return (IBT_INVALID_PARAM);
 985         }
 986 
 987         ASSERT(portinfo != NULL);
 988 
 989         /*
 990          * If port is not active do nothing, caller should attempt to bind
 991          * after the port goes active.
 992          */
 993         if (portinfo->p_linkstate != IBT_PORT_ACTIVE) {
 994                 SRPT_DPRINTF_L2("ioc_svc_bind, port %d not in active state",
 995                     portnum);
 996                 ibt_free_portinfo(portinfo, qportinfo_sz);
 997                 return (IBT_HCA_PORT_NOT_ACTIVE);
 998         }
 999 
1000         port    = &tgt->tp_hw_port[portnum-1];
1001         new_gid = portinfo->p_sgid_tbl[0];
1002         ibt_free_portinfo(portinfo, qportinfo_sz);
1003 
1004         /*
1005          * If previously bound and the port GID has changed,
1006          * unbind the old GID.
1007          */
1008         if (port->hwp_bind_hdl != NULL) {
1009                 if (new_gid.gid_guid != port->hwp_gid.gid_guid ||
1010                     new_gid.gid_prefix != port->hwp_gid.gid_prefix) {
1011                         SRPT_DPRINTF_L2("ioc_svc_bind, unregister current"
1012                             " bind");
1013                         (void) ibt_unbind_service(tgt->tp_ibt_svc_hdl,
1014                             port->hwp_bind_hdl);
1015                         port->hwp_bind_hdl = NULL;
1016                 } else {
1017                         SRPT_DPRINTF_L2("ioc_svc_bind, port %d already bound",
1018                             portnum);
1019                 }
1020         }
1021 
1022         /* bind the new port GID */
1023         if (port->hwp_bind_hdl == NULL) {
1024                 SRPT_DPRINTF_L2("ioc_svc_bind, bind service, %016llx:%016llx",
1025                     (u_longlong_t)new_gid.gid_prefix,
1026                     (u_longlong_t)new_gid.gid_guid);
1027 
1028                 /*
1029                  * Pass SCSI Target Port as CM private data, the target will
1030                  * always exist while this service is bound.
1031                  */
1032                 status = ibt_bind_service(tgt->tp_ibt_svc_hdl, new_gid, NULL,
1033                     tgt, &port->hwp_bind_hdl);
1034                 if (status != IBT_SUCCESS && status != IBT_CM_SERVICE_EXISTS) {
1035                         SRPT_DPRINTF_L1("ioc_svc_bind, bind error (%d)",
1036                             status);
1037                         return (status);
1038                 }
1039                 port->hwp_gid.gid_prefix = new_gid.gid_prefix;
1040                 port->hwp_gid.gid_guid = new_gid.gid_guid;
1041         }
1042 
1043         /* port is now active */
1044         tgt->tp_num_active_ports++;
1045 
1046         /* setting up a transient structure for the dtrace probe. */
1047         bzero(&sess, sizeof (srpt_session_t));
1048         ALIAS_STR(sess.ss_t_gid, new_gid.gid_prefix, new_gid.gid_guid);
1049         EUI_STR(sess.ss_t_name, tgt->tp_ibt_svc_id);
1050 
1051         DTRACE_SRP_1(service__up, srpt_session_t, &sess);
1052 
1053         return (IBT_SUCCESS);
1054 }
1055 
1056 /*
1057  * srpt_ioc_svc_unbind()
1058  */
1059 void
1060 srpt_ioc_svc_unbind(srpt_target_port_t *tgt, uint_t portnum)
1061 {
1062         srpt_hw_port_t          *port;
1063         srpt_session_t          sess;
1064         ibt_status_t            ret;
1065 
1066         if (tgt == NULL) {
1067                 SRPT_DPRINTF_L2("ioc_svc_unbind, SCSI target does not exist");
1068                 return;
1069         }
1070 
1071         if (portnum == 0 || portnum > tgt->tp_nports) {
1072                 SRPT_DPRINTF_L2("ioc_svc_unbind, bad port (%d)", portnum);
1073                 return;
1074         }
1075         port = &tgt->tp_hw_port[portnum-1];
1076 
1077         /* setting up a transient structure for the dtrace probe. */
1078         bzero(&sess, sizeof (srpt_session_t));
1079         ALIAS_STR(sess.ss_t_gid, port->hwp_gid.gid_prefix,
1080             port->hwp_gid.gid_guid);
1081         EUI_STR(sess.ss_t_name, tgt->tp_ibt_svc_id);
1082 
1083         DTRACE_SRP_1(service__down, srpt_session_t, &sess);
1084 
1085         if (tgt->tp_ibt_svc_hdl != NULL && port->hwp_bind_hdl != NULL) {
1086                 SRPT_DPRINTF_L2("ioc_svc_unbind, unregister current bind");
1087                 ret = ibt_unbind_service(tgt->tp_ibt_svc_hdl,
1088                     port->hwp_bind_hdl);
1089                 if (ret != IBT_SUCCESS) {
1090                         SRPT_DPRINTF_L1(
1091                             "ioc_svc_unbind, unregister port %d failed: %d",
1092                             portnum, ret);
1093                 } else {
1094                         port->hwp_bind_hdl = NULL;
1095                         port->hwp_gid.gid_prefix = 0;
1096                         port->hwp_gid.gid_guid = 0;
1097                 }
1098         }
1099 }
1100 
1101 /*
1102  * srpt_ioc_svc_unbind_all()
1103  */
1104 void
1105 srpt_ioc_svc_unbind_all(srpt_target_port_t *tgt)
1106 {
1107         uint_t          portnum;
1108 
1109         if (tgt == NULL) {
1110                 SRPT_DPRINTF_L2("ioc_svc_unbind_all, NULL SCSI target port"
1111                     " specified");
1112                 return;
1113         }
1114         for (portnum = 1; portnum <= tgt->tp_nports; portnum++) {
1115                 srpt_ioc_svc_unbind(tgt, portnum);
1116         }
1117 }
1118 
1119 /*
1120  * srpt_ioc_get_locked()
1121  *
1122  * Requires srpt_ctxt->rw_lock be held outside of call.
1123  */
1124 srpt_ioc_t *
1125 srpt_ioc_get_locked(ib_guid_t guid)
1126 {
1127         srpt_ioc_t      *ioc;
1128 
1129         ioc = list_head(&srpt_ctxt->sc_ioc_list);
1130         while (ioc != NULL) {
1131                 if (ioc->ioc_guid == guid) {
1132                         break;
1133                 }
1134                 ioc = list_next(&srpt_ctxt->sc_ioc_list, ioc);
1135         }
1136         return (ioc);
1137 }
1138 
1139 /*
1140  * srpt_ioc_get()
1141  */
1142 srpt_ioc_t *
1143 srpt_ioc_get(ib_guid_t guid)
1144 {
1145         srpt_ioc_t      *ioc;
1146 
1147         rw_enter(&srpt_ctxt->sc_rwlock, RW_READER);
1148         ioc = srpt_ioc_get_locked(guid);
1149         rw_exit(&srpt_ctxt->sc_rwlock);
1150         return (ioc);
1151 }
1152 
1153 /*
1154  * srpt_ioc_post_recv_iu()
1155  */
1156 ibt_status_t
1157 srpt_ioc_post_recv_iu(srpt_ioc_t *ioc, srpt_iu_t *iu)
1158 {
1159         ibt_status_t            status;
1160         ibt_recv_wr_t           wr;
1161         uint_t                  posted;
1162 
1163         ASSERT(ioc != NULL);
1164         ASSERT(iu != NULL);
1165 
1166         wr.wr_id  = (ibt_wrid_t)(uintptr_t)iu;
1167         wr.wr_nds = 1;
1168         wr.wr_sgl = &iu->iu_sge;
1169         posted    = 0;
1170 
1171         status = ibt_post_srq(ioc->ioc_srq_hdl, &wr, 1, &posted);
1172         if (status != IBT_SUCCESS) {
1173                 SRPT_DPRINTF_L2("ioc_post_recv_iu, post error (%d)",
1174                     status);
1175         }
1176         return (status);
1177 }
1178 
1179 /*
1180  * srpt_ioc_repost_recv_iu()
1181  */
1182 void
1183 srpt_ioc_repost_recv_iu(srpt_ioc_t *ioc, srpt_iu_t *iu)
1184 {
1185         srpt_channel_t          *ch;
1186         ibt_status_t            status;
1187 
1188         ASSERT(iu != NULL);
1189         ASSERT(mutex_owned(&iu->iu_lock));
1190 
1191         /*
1192          * Some additional sanity checks while in debug state, all STMF
1193          * related task activities should be complete prior to returning
1194          * this IU to the available pool.
1195          */
1196         ASSERT(iu->iu_stmf_task == NULL);
1197         ASSERT(iu->iu_sq_posted_cnt == 0);
1198 
1199         ch = iu->iu_ch;
1200         iu->iu_ch = NULL;
1201         iu->iu_num_rdescs = 0;
1202         iu->iu_rdescs = NULL;
1203         iu->iu_tot_xfer_len = 0;
1204         iu->iu_tag = 0;
1205         iu->iu_flags = 0;
1206         iu->iu_sq_posted_cnt = 0;
1207 
1208         status = srpt_ioc_post_recv_iu(ioc, iu);
1209 
1210         if (status != IBT_SUCCESS) {
1211                 /*
1212                  * Very bad, we should initiate a shutdown of the I/O
1213                  * Controller here, off-lining any targets associated
1214                  * with this I/O Controller (and therefore disconnecting
1215                  * any logins that remain).
1216                  *
1217                  * In practice this should never happen so we put
1218                  * the code near the bottom of the implementation list.
1219                  */
1220                 SRPT_DPRINTF_L0("ioc_repost_recv_iu, error RX IU (%d)",
1221                     status);
1222                 ASSERT(0);
1223         } else if (ch != NULL) {
1224                 atomic_inc_32(&ch->ch_req_lim_delta);
1225         }
1226 }
1227 
1228 /*
1229  * srpt_ioc_init_profile()
1230  *
1231  * SRP I/O Controller serialization lock must be held when this
1232  * routine is invoked.
1233  */
1234 void
1235 srpt_ioc_init_profile(srpt_ioc_t *ioc)
1236 {
1237         srpt_ioc_opcap_mask_t           capmask = {0};
1238 
1239         ASSERT(ioc != NULL);
1240 
1241         ioc->ioc_profile.ioc_guid = h2b64(ioc->ioc_guid);
1242         (void) memcpy(ioc->ioc_profile.ioc_id_string,
1243             "Solaris SRP Target 0.9a", 23);
1244 
1245         /*
1246          * Note vendor ID and subsystem ID are 24 bit values.  Low order
1247          * 8 bits in vendor ID field is slot and is initialized to zero.
1248          * Low order 8 bits of subsystem ID is a reserved field and
1249          * initialized to zero.
1250          */
1251         ioc->ioc_profile.ioc_vendorid =
1252             h2b32((uint32_t)(ioc->ioc_attr.hca_vendor_id << 8));
1253         ioc->ioc_profile.ioc_deviceid =
1254             h2b32((uint32_t)ioc->ioc_attr.hca_device_id);
1255         ioc->ioc_profile.ioc_device_ver =
1256             h2b16((uint16_t)ioc->ioc_attr.hca_version_id);
1257         ioc->ioc_profile.ioc_subsys_vendorid =
1258             h2b32((uint32_t)(ioc->ioc_attr.hca_vendor_id << 8));
1259         ioc->ioc_profile.ioc_subsys_id = h2b32(0);
1260         ioc->ioc_profile.ioc_io_class = h2b16(SRP_REV_16A_IO_CLASS);
1261         ioc->ioc_profile.ioc_io_subclass = h2b16(SRP_IO_SUBCLASS);
1262         ioc->ioc_profile.ioc_protocol = h2b16(SRP_PROTOCOL);
1263         ioc->ioc_profile.ioc_protocol_ver = h2b16(SRP_PROTOCOL_VERSION);
1264         ioc->ioc_profile.ioc_send_msg_qdepth = h2b16(srpt_send_msg_depth);
1265         ioc->ioc_profile.ioc_rdma_read_qdepth =
1266             ioc->ioc_attr.hca_max_rdma_out_chan;
1267         ioc->ioc_profile.ioc_send_msg_sz = h2b32(srpt_iu_size);
1268         ioc->ioc_profile.ioc_rdma_xfer_sz = h2b32(SRPT_DEFAULT_MAX_RDMA_SIZE);
1269 
1270         capmask.bits.st = 1;    /* Messages can be sent to IOC */
1271         capmask.bits.sf = 1;    /* Messages can be sent from IOC */
1272         capmask.bits.rf = 1;    /* RDMA Reads can be sent from IOC */
1273         capmask.bits.wf = 1;    /* RDMA Writes can be sent from IOC */
1274         ioc->ioc_profile.ioc_ctrl_opcap_mask = capmask.mask;
1275 
1276         /*
1277          * We currently only have one target, but if we had a list we would
1278          * go through that list and only count those that are ONLINE when
1279          * setting the services count and entries.
1280          */
1281         if (ioc->ioc_tgt_port->tp_srp_enabled) {
1282                 ioc->ioc_profile.ioc_service_entries = 1;
1283                 ioc->ioc_svc.srv_id = h2b64(ioc->ioc_guid);
1284                 (void) snprintf((char *)ioc->ioc_svc.srv_name,
1285                     IB_DM_MAX_SVC_NAME_LEN, "SRP.T10:%016llx",
1286                     (u_longlong_t)ioc->ioc_guid);
1287         } else {
1288                 ioc->ioc_profile.ioc_service_entries = 0;
1289                 ioc->ioc_svc.srv_id = 0;
1290         }
1291 }
1292 
1293 /*
1294  * srpt_ioc_ds_alloc_dbuf()
1295  */
1296 /* ARGSUSED */
1297 stmf_data_buf_t *
1298 srpt_ioc_ds_alloc_dbuf(struct scsi_task *task, uint32_t size,
1299         uint32_t *pminsize, uint32_t flags)
1300 {
1301         srpt_iu_t               *iu;
1302         srpt_ioc_t              *ioc;
1303         srpt_ds_dbuf_t          *dbuf;
1304         stmf_data_buf_t         *stmf_dbuf;
1305         void                    *buf;
1306         srpt_mr_t               mr;
1307 
1308         ASSERT(task != NULL);
1309         iu  = task->task_port_private;
1310         ioc = iu->iu_ioc;
1311 
1312         SRPT_DPRINTF_L4("ioc_ds_alloc_dbuf, invoked ioc(%p)"
1313             " size(%d), flags(%x)",
1314             (void *)ioc, size, flags);
1315 
1316         buf = srpt_vmem_alloc(ioc->ioc_dbuf_pool, size);
1317         if (buf == NULL) {
1318                 return (NULL);
1319         }
1320 
1321         if (srpt_vmem_mr(ioc->ioc_dbuf_pool, buf, size, &mr) != 0) {
1322                 goto stmf_alloc_err;
1323         }
1324 
1325         stmf_dbuf = stmf_alloc(STMF_STRUCT_DATA_BUF, sizeof (srpt_ds_dbuf_t),
1326             0);
1327         if (stmf_dbuf == NULL) {
1328                 SRPT_DPRINTF_L2("ioc_ds_alloc_dbuf, stmf_alloc failed");
1329                 goto stmf_alloc_err;
1330         }
1331 
1332         dbuf = stmf_dbuf->db_port_private;
1333         dbuf->db_stmf_buf = stmf_dbuf;
1334         dbuf->db_mr_hdl = mr.mr_hdl;
1335         dbuf->db_ioc = ioc;
1336         dbuf->db_sge.ds_va = mr.mr_va;
1337         dbuf->db_sge.ds_key = mr.mr_lkey;
1338         dbuf->db_sge.ds_len = size;
1339 
1340         stmf_dbuf->db_buf_size = size;
1341         stmf_dbuf->db_data_size = size;
1342         stmf_dbuf->db_relative_offset = 0;
1343         stmf_dbuf->db_flags = 0;
1344         stmf_dbuf->db_xfer_status = 0;
1345         stmf_dbuf->db_sglist_length = 1;
1346         stmf_dbuf->db_sglist[0].seg_addr = buf;
1347         stmf_dbuf->db_sglist[0].seg_length = size;
1348 
1349         return (stmf_dbuf);
1350 
1351 buf_mr_err:
1352         stmf_free(stmf_dbuf);
1353 
1354 stmf_alloc_err:
1355         srpt_vmem_free(ioc->ioc_dbuf_pool, buf, size);
1356 
1357         return (NULL);
1358 }
1359 
1360 void
1361 srpt_ioc_ds_free_dbuf(struct stmf_dbuf_store *ds,
1362         stmf_data_buf_t *dbuf)
1363 {
1364         srpt_ioc_t      *ioc;
1365 
1366         SRPT_DPRINTF_L4("ioc_ds_free_dbuf, invoked buf (%p)",
1367             (void *)dbuf);
1368         ioc = ds->ds_port_private;
1369 
1370         srpt_vmem_free(ioc->ioc_dbuf_pool, dbuf->db_sglist[0].seg_addr,
1371             dbuf->db_buf_size);
1372         stmf_free(dbuf);
1373 }
1374 
1375 /* Memory arena routines */
1376 
1377 static srpt_vmem_pool_t *
1378 srpt_vmem_create(const char *name, srpt_ioc_t *ioc, ib_memlen_t chunksize,
1379     uint64_t maxsize, ibt_mr_flags_t flags)
1380 {
1381         srpt_mr_t               *chunk;
1382         srpt_vmem_pool_t        *result;
1383 
1384         ASSERT(chunksize <= maxsize);
1385 
1386         result = kmem_zalloc(sizeof (srpt_vmem_pool_t), KM_SLEEP);
1387 
1388         result->svp_ioc = ioc;
1389         result->svp_chunksize = chunksize;
1390         result->svp_max_size = maxsize;
1391         result->svp_flags = flags;
1392 
1393         rw_init(&result->svp_lock, NULL, RW_DRIVER, NULL);
1394         avl_create(&result->svp_mr_list, srpt_vmem_mr_compare,
1395             sizeof (srpt_mr_t), offsetof(srpt_mr_t, mr_avl));
1396 
1397         chunk = srpt_vmem_chunk_alloc(result, chunksize);
1398 
1399         avl_add(&result->svp_mr_list, chunk);
1400         result->svp_total_size = chunksize;
1401 
1402         result->svp_vmem = vmem_create(name,
1403             (void*)(uintptr_t)chunk->mr_va,
1404             (size_t)chunk->mr_len, SRPT_MR_QUANTSIZE,
1405             NULL, NULL, NULL, 0, VM_SLEEP);
1406 
1407         return (result);
1408 }
1409 
1410 static void
1411 srpt_vmem_destroy(srpt_vmem_pool_t *vm_pool)
1412 {
1413         srpt_mr_t               *chunk;
1414         srpt_mr_t               *next;
1415 
1416         rw_enter(&vm_pool->svp_lock, RW_WRITER);
1417         vmem_destroy(vm_pool->svp_vmem);
1418 
1419         chunk = avl_first(&vm_pool->svp_mr_list);
1420 
1421         while (chunk != NULL) {
1422                 next = AVL_NEXT(&vm_pool->svp_mr_list, chunk);
1423                 avl_remove(&vm_pool->svp_mr_list, chunk);
1424                 srpt_vmem_chunk_free(vm_pool, chunk);
1425                 chunk = next;
1426         }
1427 
1428         avl_destroy(&vm_pool->svp_mr_list);
1429 
1430         rw_exit(&vm_pool->svp_lock);
1431         rw_destroy(&vm_pool->svp_lock);
1432 
1433         kmem_free(vm_pool, sizeof (srpt_vmem_pool_t));
1434 }
1435 
1436 static void *
1437 srpt_vmem_alloc(srpt_vmem_pool_t *vm_pool, size_t size)
1438 {
1439         void            *result;
1440         srpt_mr_t       *next;
1441         ib_memlen_t     chunklen;
1442 
1443         ASSERT(vm_pool != NULL);
1444 
1445         result = vmem_alloc(vm_pool->svp_vmem, size,
1446             VM_NOSLEEP | VM_FIRSTFIT);
1447 
1448         if (result != NULL) {
1449                 /* memory successfully allocated */
1450                 return (result);
1451         }
1452 
1453         /* need more vmem */
1454         rw_enter(&vm_pool->svp_lock, RW_WRITER);
1455         chunklen = vm_pool->svp_chunksize;
1456 
1457         if (vm_pool->svp_total_size >= vm_pool->svp_max_size) {
1458                 /* no more room to alloc */
1459                 rw_exit(&vm_pool->svp_lock);
1460                 return (NULL);
1461         }
1462 
1463         if ((vm_pool->svp_total_size + chunklen) > vm_pool->svp_max_size) {
1464                 chunklen = vm_pool->svp_max_size - vm_pool->svp_total_size;
1465         }
1466 
1467         next = srpt_vmem_chunk_alloc(vm_pool, chunklen);
1468         if (next != NULL) {
1469                 /*
1470                  * Note that the size of the chunk we got
1471                  * may not be the size we requested.  Use the
1472                  * length returned in the chunk itself.
1473                  */
1474                 if (vmem_add(vm_pool->svp_vmem, (void*)(uintptr_t)next->mr_va,
1475                     next->mr_len, VM_NOSLEEP) == NULL) {
1476                         srpt_vmem_chunk_free(vm_pool, next);
1477                         SRPT_DPRINTF_L2("vmem_add failed");
1478                 } else {
1479                         vm_pool->svp_total_size += next->mr_len;
1480                         avl_add(&vm_pool->svp_mr_list, next);
1481                 }
1482         }
1483 
1484         rw_exit(&vm_pool->svp_lock);
1485 
1486         result = vmem_alloc(vm_pool->svp_vmem, size, VM_NOSLEEP | VM_FIRSTFIT);
1487 
1488         return (result);
1489 }
1490 
1491 static void
1492 srpt_vmem_free(srpt_vmem_pool_t *vm_pool, void *vaddr, size_t size)
1493 {
1494         vmem_free(vm_pool->svp_vmem, vaddr, size);
1495 }
1496 
1497 static int
1498 srpt_vmem_mr(srpt_vmem_pool_t *vm_pool, void *vaddr, size_t size,
1499     srpt_mr_t *mr)
1500 {
1501         avl_index_t             where;
1502         ib_vaddr_t              mrva = (ib_vaddr_t)(uintptr_t)vaddr;
1503         srpt_mr_t               chunk;
1504         srpt_mr_t               *nearest;
1505         ib_vaddr_t              chunk_end;
1506         int                     status = DDI_FAILURE;
1507 
1508         rw_enter(&vm_pool->svp_lock, RW_READER);
1509 
1510         chunk.mr_va = mrva;
1511         nearest = avl_find(&vm_pool->svp_mr_list, &chunk, &where);
1512 
1513         if (nearest == NULL) {
1514                 nearest = avl_nearest(&vm_pool->svp_mr_list, where,
1515                     AVL_BEFORE);
1516         }
1517 
1518         if (nearest != NULL) {
1519                 /* Verify this chunk contains the specified address range */
1520                 ASSERT(nearest->mr_va <= mrva);
1521 
1522                 chunk_end = nearest->mr_va + nearest->mr_len;
1523                 if (chunk_end >= mrva + size) {
1524                         mr->mr_hdl = nearest->mr_hdl;
1525                         mr->mr_va = mrva;
1526                         mr->mr_len = size;
1527                         mr->mr_lkey = nearest->mr_lkey;
1528                         mr->mr_rkey = nearest->mr_rkey;
1529                         status = DDI_SUCCESS;
1530                 }
1531         }
1532 
1533         rw_exit(&vm_pool->svp_lock);
1534         return (status);
1535 }
1536 
1537 static srpt_mr_t *
1538 srpt_vmem_chunk_alloc(srpt_vmem_pool_t *vm_pool, ib_memlen_t chunksize)
1539 {
1540         void                    *chunk = NULL;
1541         srpt_mr_t               *result = NULL;
1542 
1543         while ((chunk == NULL) && (chunksize >= SRPT_MIN_CHUNKSIZE)) {
1544                 chunk = kmem_alloc(chunksize, KM_NOSLEEP);
1545                 if (chunk == NULL) {
1546                         SRPT_DPRINTF_L2("srpt_vmem_chunk_alloc: "
1547                             "failed to alloc chunk of %d, trying %d",
1548                             (int)chunksize, (int)chunksize/2);
1549                         chunksize /= 2;
1550                 }
1551         }
1552 
1553         if (chunk != NULL) {
1554                 result = srpt_reg_mem(vm_pool, (ib_vaddr_t)(uintptr_t)chunk,
1555                     chunksize);
1556                 if (result == NULL) {
1557                         SRPT_DPRINTF_L2("srpt_vmem_chunk_alloc: "
1558                             "chunk registration failed");
1559                         kmem_free(chunk, chunksize);
1560                 }
1561         }
1562 
1563         return (result);
1564 }
1565 
1566 static void
1567 srpt_vmem_chunk_free(srpt_vmem_pool_t *vm_pool, srpt_mr_t *mr)
1568 {
1569         void                    *chunk = (void *)(uintptr_t)mr->mr_va;
1570         ib_memlen_t             chunksize = mr->mr_len;
1571 
1572         srpt_dereg_mem(vm_pool->svp_ioc, mr);
1573         kmem_free(chunk, chunksize);
1574 }
1575 
1576 static srpt_mr_t *
1577 srpt_reg_mem(srpt_vmem_pool_t *vm_pool, ib_vaddr_t vaddr, ib_memlen_t len)
1578 {
1579         srpt_mr_t               *result = NULL;
1580         ibt_mr_attr_t           mr_attr;
1581         ibt_mr_desc_t           mr_desc;
1582         ibt_status_t            status;
1583         srpt_ioc_t              *ioc = vm_pool->svp_ioc;
1584 
1585         result = kmem_zalloc(sizeof (srpt_mr_t), KM_NOSLEEP);
1586         if (result == NULL) {
1587                 SRPT_DPRINTF_L2("srpt_reg_mem: failed to allocate");
1588                 return (NULL);
1589         }
1590 
1591         bzero(&mr_attr, sizeof (ibt_mr_attr_t));
1592         bzero(&mr_desc, sizeof (ibt_mr_desc_t));
1593 
1594         mr_attr.mr_vaddr = vaddr;
1595         mr_attr.mr_len = len;
1596         mr_attr.mr_as = NULL;
1597         mr_attr.mr_flags = vm_pool->svp_flags;
1598 
1599         status = ibt_register_mr(ioc->ioc_ibt_hdl, ioc->ioc_pd_hdl,
1600             &mr_attr, &result->mr_hdl, &mr_desc);
1601         if (status != IBT_SUCCESS) {
1602                 SRPT_DPRINTF_L2("srpt_reg_mem: ibt_register_mr "
1603                     "failed %d", status);
1604                 kmem_free(result, sizeof (srpt_mr_t));
1605                 return (NULL);
1606         }
1607 
1608         result->mr_va = mr_attr.mr_vaddr;
1609         result->mr_len = mr_attr.mr_len;
1610         result->mr_lkey = mr_desc.md_lkey;
1611         result->mr_rkey = mr_desc.md_rkey;
1612 
1613         return (result);
1614 }
1615 
1616 static void
1617 srpt_dereg_mem(srpt_ioc_t *ioc, srpt_mr_t *mr)
1618 {
1619         ibt_status_t            status;
1620 
1621         status = ibt_deregister_mr(ioc->ioc_ibt_hdl, mr->mr_hdl);
1622         if (status != IBT_SUCCESS) {
1623                 SRPT_DPRINTF_L1("srpt_dereg_mem, error deregistering MR (%d)",
1624                     status);
1625         }
1626         kmem_free(mr, sizeof (srpt_mr_t));
1627 }
1628 
1629 static int
1630 srpt_vmem_mr_compare(const void *a, const void *b)
1631 {
1632         srpt_mr_t               *mr1 = (srpt_mr_t *)a;
1633         srpt_mr_t               *mr2 = (srpt_mr_t *)b;
1634 
1635         /* sort and match by virtual address */
1636         if (mr1->mr_va < mr2->mr_va) {
1637                 return (-1);
1638         } else if (mr1->mr_va > mr2->mr_va) {
1639                 return (1);
1640         }
1641 
1642         return (0);
1643 }