ilwluts Wdiff usr/src/uts/common/io/ib/adapters/tavor/tavor_srq.c

Print this page

8368 remove warlock leftovers from usr/src/uts

Split	Close
Expand all
Collapse all

          --- old/usr/src/uts/common/io/ib/adapters/tavor/tavor_srq.c
          +++ new/usr/src/uts/common/io/ib/adapters/tavor/tavor_srq.c

   1    1  /*
   2    2   * CDDL HEADER START
   3    3   *
   4    4   * The contents of this file are subject to the terms of the
   5    5   * Common Development and Distribution License (the "License").
   6    6   * You may not use this file except in compliance with the License.
   7    7   *
   8    8   * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9    9   * or http://www.opensolaris.org/os/licensing.
  10   10   * See the License for the specific language governing permissions
  11   11   * and limitations under the License.
  12   12   *
  13   13   * When distributing Covered Code, include this CDDL HEADER in each
  14   14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  
  22   22  /*
  23   23   * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  24   24   * Use is subject to license terms.
  25   25   */
  26   26  
  27   27  /*
  28   28   * tavor_srq.c
  29   29   *    Tavor Shared Receive Queue Processing Routines
  30   30   *
  31   31   *    Implements all the routines necessary for allocating, freeing, querying,
  32   32   *    modifying and posting shared receive queues.
  33   33   */
  34   34  
  35   35  #include <sys/sysmacros.h>
  36   36  #include <sys/types.h>
  37   37  #include <sys/conf.h>
  38   38  #include <sys/ddi.h>
  39   39  #include <sys/sunddi.h>
  40   40  #include <sys/modctl.h>
  41   41  #include <sys/bitmap.h>
  42   42  
  43   43  #include <sys/ib/adapters/tavor/tavor.h>
  44   44  
  45   45  static void tavor_srq_sgl_to_logwqesz(tavor_state_t *state, uint_t num_sgl,
  46   46      tavor_qp_wq_type_t wq_type, uint_t *logwqesz, uint_t *max_sgl);
  47   47  
  48   48  /*
  49   49   * tavor_srq_alloc()
  50   50   *    Context: Can be called only from user or kernel context.
  51   51   */
  52   52  int
  53   53  tavor_srq_alloc(tavor_state_t *state, tavor_srq_info_t *srqinfo,
  54   54      uint_t sleepflag, tavor_srq_options_t *op)
  55   55  {
  56   56          ibt_srq_hdl_t           ibt_srqhdl;
  57   57          tavor_pdhdl_t           pd;
  58   58          ibt_srq_sizes_t         *sizes;
  59   59          ibt_srq_sizes_t         *real_sizes;
  60   60          tavor_srqhdl_t          *srqhdl;
  61   61          ibt_srq_flags_t         flags;
  62   62          tavor_rsrc_t            *srqc, *rsrc;
  63   63          tavor_hw_srqc_t         srqc_entry;
  64   64          uint32_t                *buf;
  65   65          tavor_srqhdl_t          srq;
  66   66          tavor_umap_db_entry_t   *umapdb;
  67   67          ibt_mr_attr_t           mr_attr;
  68   68          tavor_mr_options_t      mr_op;
  69   69          tavor_mrhdl_t           mr;
  70   70          uint64_t                addr;

↓ open down ↓

70 lines elided

↑ open up ↑

  71   71          uint64_t                value, srq_desc_off;
  72   72          uint32_t                lkey;
  73   73          uint32_t                log_srq_size;
  74   74          uint32_t                uarpg;
  75   75          uint_t                  wq_location, dma_xfer_mode, srq_is_umap;
  76   76          int                     flag, status;
  77   77          char                    *errormsg;
  78   78          uint_t                  max_sgl;
  79   79          uint_t                  wqesz;
  80   80  
  81      -        _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*sizes))
  82      -
  83   81          TAVOR_TNF_ENTER(tavor_srq_alloc);
  84   82  
  85   83          /*
  86   84           * Check the "options" flag.  Currently this flag tells the driver
  87   85           * whether or not the SRQ's work queues should be come from normal
  88   86           * system memory or whether they should be allocated from DDR memory.
  89   87           */
  90   88          if (op == NULL) {
  91   89                  wq_location = TAVOR_QUEUE_LOCATION_NORMAL;
  92   90          } else {

  93   91                  wq_location = op->srqo_wq_loc;
  94   92          }
  95   93  
  96   94          /*
  97   95           * Extract the necessary info from the tavor_srq_info_t structure
  98   96           */
  99   97          real_sizes = srqinfo->srqi_real_sizes;
 100   98          sizes      = srqinfo->srqi_sizes;
 101   99          pd         = srqinfo->srqi_pd;
 102  100          ibt_srqhdl = srqinfo->srqi_ibt_srqhdl;
 103  101          flags      = srqinfo->srqi_flags;
 104  102          srqhdl     = srqinfo->srqi_srqhdl;
 105  103  
 106  104          /*
 107  105           * Determine whether SRQ is being allocated for userland access or
 108  106           * whether it is being allocated for kernel access.  If the SRQ is
 109  107           * being allocated for userland access, then lookup the UAR doorbell
 110  108           * page number for the current process.  Note:  If this is not found
 111  109           * (e.g. if the process has not previously open()'d the Tavor driver),
 112  110           * then an error is returned.
 113  111           */
 114  112          srq_is_umap = (flags & IBT_SRQ_USER_MAP) ? 1 : 0;
 115  113          if (srq_is_umap) {
 116  114                  status = tavor_umap_db_find(state->ts_instance, ddi_get_pid(),
 117  115                      MLNX_UMAP_UARPG_RSRC, &value, 0, NULL);
 118  116                  if (status != DDI_SUCCESS) {
 119  117                          /* Set "status" and "errormsg" and goto failure */
 120  118                          TAVOR_TNF_FAIL(IBT_INVALID_PARAM, "failed UAR page");
 121  119                          goto srqalloc_fail3;
 122  120                  }
 123  121                  uarpg = ((tavor_rsrc_t *)(uintptr_t)value)->tr_indx;
 124  122          }
 125  123  
 126  124          /* Increase PD refcnt */
 127  125          tavor_pd_refcnt_inc(pd);
 128  126  
 129  127          /* Allocate an SRQ context entry */
 130  128          status = tavor_rsrc_alloc(state, TAVOR_SRQC, 1, sleepflag, &srqc);
 131  129          if (status != DDI_SUCCESS) {
 132  130                  /* Set "status" and "errormsg" and goto failure */
 133  131                  TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed SRQ context");
 134  132                  goto srqalloc_fail1;
 135  133          }

↓ open down ↓

43 lines elided

↑ open up ↑

 136  134  
 137  135          /* Allocate the SRQ Handle entry */
 138  136          status = tavor_rsrc_alloc(state, TAVOR_SRQHDL, 1, sleepflag, &rsrc);
 139  137          if (status != DDI_SUCCESS) {
 140  138                  /* Set "status" and "errormsg" and goto failure */
 141  139                  TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed SRQ handle");
 142  140                  goto srqalloc_fail2;
 143  141          }
 144  142  
 145  143          srq = (tavor_srqhdl_t)rsrc->tr_addr;
 146      -        _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*srq))
 147  144  
 148  145          srq->srq_srqnum = srqc->tr_indx;        /* just use index */
 149  146  
 150  147          /*
 151  148           * If this will be a user-mappable SRQ, then allocate an entry for
 152  149           * the "userland resources database".  This will later be added to
 153  150           * the database (after all further SRQ operations are successful).
 154  151           * If we fail here, we must undo the reference counts and the
 155  152           * previous resource allocation.
 156  153           */

 157  154          if (srq_is_umap) {
 158  155                  umapdb = tavor_umap_db_alloc(state->ts_instance,
 159  156                      srq->srq_srqnum, MLNX_UMAP_SRQMEM_RSRC,
 160  157                      (uint64_t)(uintptr_t)rsrc);
 161  158                  if (umapdb == NULL) {
 162  159                          /* Set "status" and "errormsg" and goto failure */
 163  160                          TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed umap add");
 164  161                          goto srqalloc_fail3;
 165  162                  }
 166  163          }
 167  164  
 168  165          /*
 169  166           * Calculate the appropriate size for the SRQ.
 170  167           * Note:  All Tavor SRQs must be a power-of-2 in size.  Also
 171  168           * they may not be any smaller than TAVOR_SRQ_MIN_SIZE.  This step
 172  169           * is to round the requested size up to the next highest power-of-2
 173  170           */
 174  171          sizes->srq_wr_sz = max(sizes->srq_wr_sz, TAVOR_SRQ_MIN_SIZE);
 175  172          log_srq_size = highbit(sizes->srq_wr_sz);
 176  173          if (ISP2(sizes->srq_wr_sz)) {
 177  174                  log_srq_size = log_srq_size - 1;
 178  175          }
 179  176  
 180  177          /*
 181  178           * Next we verify that the rounded-up size is valid (i.e. consistent
 182  179           * with the device limits and/or software-configured limits).  If not,
 183  180           * then obviously we have a lot of cleanup to do before returning.
 184  181           */
 185  182          if (log_srq_size > state->ts_cfg_profile->cp_log_max_srq_sz) {
 186  183                  /* Set "status" and "errormsg" and goto failure */
 187  184                  TAVOR_TNF_FAIL(IBT_HCA_WR_EXCEEDED, "max SRQ size");
 188  185                  goto srqalloc_fail4;
 189  186          }
 190  187  
 191  188          /*
 192  189           * Next we verify that the requested number of SGL is valid (i.e.
 193  190           * consistent with the device limits and/or software-configured
 194  191           * limits).  If not, then obviously the same cleanup needs to be done.
 195  192           */
 196  193          max_sgl = state->ts_cfg_profile->cp_srq_max_sgl;
 197  194          if (sizes->srq_sgl_sz > max_sgl) {
 198  195                  /* Set "status" and "errormsg" and goto failure */
 199  196                  TAVOR_TNF_FAIL(IBT_HCA_SGL_EXCEEDED, "max SRQ SGL");
 200  197                  goto srqalloc_fail4;
 201  198          }
 202  199  
 203  200          /*
 204  201           * Determine the SRQ's WQE sizes.  This depends on the requested
 205  202           * number of SGLs.  Note: This also has the side-effect of
 206  203           * calculating the real number of SGLs (for the calculated WQE size)
 207  204           */
 208  205          tavor_srq_sgl_to_logwqesz(state, sizes->srq_sgl_sz,
 209  206              TAVOR_QP_WQ_TYPE_RECVQ, &srq->srq_wq_log_wqesz,
 210  207              &srq->srq_wq_sgl);
 211  208  
 212  209          /*
 213  210           * Allocate the memory for SRQ work queues.  Note:  The location from
 214  211           * which we will allocate these work queues has been passed in through
 215  212           * the tavor_qp_options_t structure.  Since Tavor work queues are not
 216  213           * allowed to cross a 32-bit (4GB) boundary, the alignment of the work
 217  214           * queue memory is very important.  We used to allocate work queues
 218  215           * (the combined receive and send queues) so that they would be aligned
 219  216           * on their combined size.  That alignment guaranteed that they would
 220  217           * never cross the 4GB boundary (Tavor work queues are on the order of
 221  218           * MBs at maximum).  Now we are able to relax this alignment constraint
 222  219           * by ensuring that the IB address assigned to the queue memory (as a
 223  220           * result of the tavor_mr_register() call) is offset from zero.
 224  221           * Previously, we had wanted to use the ddi_dma_mem_alloc() routine to
 225  222           * guarantee the alignment, but when attempting to use IOMMU bypass
 226  223           * mode we found that we were not allowed to specify any alignment that
 227  224           * was more restrictive than the system page size.  So we avoided this
 228  225           * constraint by passing two alignment values, one for the memory
 229  226           * allocation itself and the other for the DMA handle (for later bind).
 230  227           * This used to cause more memory than necessary to be allocated (in
 231  228           * order to guarantee the more restrictive alignment contraint).  But
 232  229           * be guaranteeing the zero-based IB virtual address for the queue, we
 233  230           * are able to conserve this memory.
 234  231           *
 235  232           * Note: If SRQ is not user-mappable, then it may come from either
 236  233           * kernel system memory or from HCA-attached local DDR memory.
 237  234           *
 238  235           * Note2: We align this queue on a pagesize boundary.  This is required
 239  236           * to make sure that all the resulting IB addresses will start at 0, for
 240  237           * a zero-based queue.  By making sure we are aligned on at least a
 241  238           * page, any offset we use into our queue will be the same as when we
 242  239           * perform tavor_srq_modify() operations later.
 243  240           */
 244  241          wqesz = (1 << srq->srq_wq_log_wqesz);
 245  242          srq->srq_wqinfo.qa_size = (1 << log_srq_size) * wqesz;
 246  243          srq->srq_wqinfo.qa_alloc_align = PAGESIZE;
 247  244          srq->srq_wqinfo.qa_bind_align = PAGESIZE;
 248  245          if (srq_is_umap) {
 249  246                  srq->srq_wqinfo.qa_location = TAVOR_QUEUE_LOCATION_USERLAND;

↓ open down ↓

93 lines elided

↑ open up ↑

 250  247          } else {
 251  248                  srq->srq_wqinfo.qa_location = wq_location;
 252  249          }
 253  250          status = tavor_queue_alloc(state, &srq->srq_wqinfo, sleepflag);
 254  251          if (status != DDI_SUCCESS) {
 255  252                  /* Set "status" and "errormsg" and goto failure */
 256  253                  TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed srq");
 257  254                  goto srqalloc_fail4;
 258  255          }
 259  256          buf = (uint32_t *)srq->srq_wqinfo.qa_buf_aligned;
 260      -        _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*buf))
 261  257  
 262  258          /*
 263  259           * Register the memory for the SRQ work queues.  The memory for the SRQ
 264  260           * must be registered in the Tavor TPT tables.  This gives us the LKey
 265  261           * to specify in the SRQ context later.  Note: If the work queue is to
 266  262           * be allocated from DDR memory, then only a "bypass" mapping is
 267  263           * appropriate.  And if the SRQ memory is user-mappable, then we force
 268  264           * DDI_DMA_CONSISTENT mapping.  Also, in order to meet the alignment
 269  265           * restriction, we pass the "mro_bind_override_addr" flag in the call
 270  266           * to tavor_mr_register().  This guarantees that the resulting IB vaddr

 271  267           * will be zero-based (modulo the offset into the first page).  If we
 272  268           * fail here, we still have the bunch of resource and reference count
 273  269           * cleanup to do.
 274  270           */
 275  271          flag = (sleepflag == TAVOR_SLEEP) ? IBT_MR_SLEEP :
 276  272              IBT_MR_NOSLEEP;
 277  273          mr_attr.mr_vaddr = (uint64_t)(uintptr_t)buf;
 278  274          mr_attr.mr_len   = srq->srq_wqinfo.qa_size;
 279  275          mr_attr.mr_as    = NULL;
 280  276          mr_attr.mr_flags = flag | IBT_MR_ENABLE_LOCAL_WRITE;
 281  277          if (srq_is_umap) {
 282  278                  mr_op.mro_bind_type   = state->ts_cfg_profile->cp_iommu_bypass;
 283  279          } else {
 284  280                  if (wq_location == TAVOR_QUEUE_LOCATION_NORMAL) {
 285  281                          mr_op.mro_bind_type =
 286  282                              state->ts_cfg_profile->cp_iommu_bypass;
 287  283                          dma_xfer_mode =
 288  284                              state->ts_cfg_profile->cp_streaming_consistent;
 289  285                          if (dma_xfer_mode == DDI_DMA_STREAMING) {
 290  286                                  mr_attr.mr_flags |= IBT_MR_NONCOHERENT;
 291  287                          }
 292  288                  } else {
 293  289                          mr_op.mro_bind_type = TAVOR_BINDMEM_BYPASS;

↓ open down ↓

23 lines elided

↑ open up ↑

 294  290                  }
 295  291          }
 296  292          mr_op.mro_bind_dmahdl = srq->srq_wqinfo.qa_dmahdl;
 297  293          mr_op.mro_bind_override_addr = 1;
 298  294          status = tavor_mr_register(state, pd, &mr_attr, &mr, &mr_op);
 299  295          if (status != DDI_SUCCESS) {
 300  296                  /* Set "status" and "errormsg" and goto failure */
 301  297                  TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed register mr");
 302  298                  goto srqalloc_fail5;
 303  299          }
 304      -        _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mr))
 305  300          addr = mr->mr_bindinfo.bi_addr;
 306  301          lkey = mr->mr_lkey;
 307  302  
 308  303          /*
 309  304           * Calculate the offset between the kernel virtual address space
 310  305           * and the IB virtual address space.  This will be used when
 311  306           * posting work requests to properly initialize each WQE.
 312  307           */
 313  308          srq_desc_off = (uint64_t)(uintptr_t)srq->srq_wqinfo.qa_buf_aligned -
 314  309              (uint64_t)mr->mr_bindinfo.bi_addr;
 315  310  
 316  311          /*
 317  312           * Create WQL and Wridlist for use by this SRQ
 318  313           */
 319  314          srq->srq_wrid_wql = tavor_wrid_wql_create(state);
 320  315          if (srq->srq_wrid_wql == NULL) {
 321  316                  /* Set "status" and "errormsg" and goto failure */
 322  317                  TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed wql create");
 323  318                  goto srqalloc_fail6;
 324  319          }
 325      -        _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*(srq->srq_wrid_wql)))
 326  320  
 327  321          srq->srq_wridlist = tavor_wrid_get_list(1 << log_srq_size);
 328  322          if (srq->srq_wridlist == NULL) {
 329  323                  /* Set "status" and "errormsg" and goto failure */
 330  324                  TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed wridlist create");
 331  325                  goto srqalloc_fail7;
 332  326          }
 333      -        _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*(srq->srq_wridlist)))
 334  327  
 335  328          srq->srq_wridlist->wl_srq_en = 1;
 336  329          srq->srq_wridlist->wl_free_list_indx = -1;
 337  330  
 338  331          /*
 339  332           * Fill in all the return arguments (if necessary).  This includes
 340  333           * real queue size and real SGLs.
 341  334           */
 342  335          if (real_sizes != NULL) {
 343  336                  real_sizes->srq_wr_sz = (1 << log_srq_size);

 344  337                  real_sizes->srq_sgl_sz = srq->srq_wq_sgl;
 345  338          }
 346  339  
 347  340          /*
 348  341           * Fill in the SRQC entry.  This is the final step before passing
 349  342           * ownership of the SRQC entry to the Tavor hardware.  We use all of
 350  343           * the information collected/calculated above to fill in the
 351  344           * requisite portions of the SRQC.  Note: If this SRQ is going to be
 352  345           * used for userland access, then we need to set the UAR page number
 353  346           * appropriately (otherwise it's a "don't care")
 354  347           */
 355  348          bzero(&srqc_entry, sizeof (tavor_hw_srqc_t));
 356  349          srqc_entry.wqe_addr_h      = (addr >> 32);
 357  350          srqc_entry.next_wqe_addr_l = 0;
 358  351          srqc_entry.ds              = (wqesz >> 4);
 359  352          srqc_entry.state           = TAVOR_SRQ_STATE_HW_OWNER;
 360  353          srqc_entry.pd              = pd->pd_pdnum;
 361  354          srqc_entry.lkey            = lkey;
 362  355          srqc_entry.wqe_cnt         = 0;
 363  356          if (srq_is_umap) {
 364  357                  srqc_entry.uar     = uarpg;
 365  358          } else {
 366  359                  srqc_entry.uar     = 0;
 367  360          }
 368  361  
 369  362          /*
 370  363           * Write the SRQC entry to hardware.  Lastly, we pass ownership of
 371  364           * the entry to the hardware (using the Tavor SW2HW_SRQ firmware
 372  365           * command).  Note: In general, this operation shouldn't fail.  But
 373  366           * if it does, we have to undo everything we've done above before
 374  367           * returning error.
 375  368           */
 376  369          status = tavor_cmn_ownership_cmd_post(state, SW2HW_SRQ, &srqc_entry,
 377  370              sizeof (tavor_hw_srqc_t), srq->srq_srqnum,
 378  371              sleepflag);
 379  372          if (status != TAVOR_CMD_SUCCESS) {
 380  373                  cmn_err(CE_CONT, "Tavor: SW2HW_SRQ command failed: %08x\n",
 381  374                      status);
 382  375                  TNF_PROBE_1(tavor_srq_alloc_sw2hw_srq_cmd_fail,
 383  376                      TAVOR_TNF_ERROR, "", tnf_uint, status, status);
 384  377                  /* Set "status" and "errormsg" and goto failure */
 385  378                  TAVOR_TNF_FAIL(IBT_FAILURE, "tavor SW2HW_SRQ command");
 386  379                  goto srqalloc_fail8;
 387  380          }
 388  381  
 389  382          /*
 390  383           * Fill in the rest of the Tavor SRQ handle.  We can update
 391  384           * the following fields for use in further operations on the SRQ.
 392  385           */
 393  386          srq->srq_srqcrsrcp = srqc;
 394  387          srq->srq_rsrcp     = rsrc;
 395  388          srq->srq_mrhdl     = mr;
 396  389          srq->srq_refcnt    = 0;
 397  390          srq->srq_is_umap   = srq_is_umap;
 398  391          srq->srq_uarpg     = (srq->srq_is_umap) ? uarpg : 0;
 399  392          srq->srq_umap_dhp  = (devmap_cookie_t)NULL;
 400  393          srq->srq_pdhdl     = pd;
 401  394          srq->srq_wq_lastwqeindx = -1;
 402  395          srq->srq_wq_bufsz  = (1 << log_srq_size);
 403  396          srq->srq_wq_buf    = buf;
 404  397          srq->srq_desc_off  = srq_desc_off;
 405  398          srq->srq_hdlrarg   = (void *)ibt_srqhdl;
 406  399          srq->srq_state     = 0;
 407  400          srq->srq_real_sizes.srq_wr_sz = (1 << log_srq_size);
 408  401          srq->srq_real_sizes.srq_sgl_sz = srq->srq_wq_sgl;
 409  402  
 410  403          /* Determine if later ddi_dma_sync will be necessary */
 411  404          srq->srq_sync = TAVOR_SRQ_IS_SYNC_REQ(state, srq->srq_wqinfo);
 412  405  
 413  406          /*
 414  407           * Put SRQ handle in Tavor SRQNum-to-SRQhdl list.  Then fill in the
 415  408           * "srqhdl" and return success
 416  409           */
 417  410          ASSERT(state->ts_srqhdl[srqc->tr_indx] == NULL);
 418  411          state->ts_srqhdl[srqc->tr_indx] = srq;
 419  412  
 420  413          /*
 421  414           * If this is a user-mappable SRQ, then we need to insert the
 422  415           * previously allocated entry into the "userland resources database".
 423  416           * This will allow for later lookup during devmap() (i.e. mmap())
 424  417           * calls.
 425  418           */
 426  419          if (srq->srq_is_umap) {
 427  420                  tavor_umap_db_add(umapdb);
 428  421          } else {
 429  422                  mutex_enter(&srq->srq_wrid_wql->wql_lock);
 430  423                  tavor_wrid_list_srq_init(srq->srq_wridlist, srq, 0);
 431  424                  mutex_exit(&srq->srq_wrid_wql->wql_lock);
 432  425          }
 433  426  
 434  427          *srqhdl = srq;
 435  428  
 436  429          TAVOR_TNF_EXIT(tavor_srq_alloc);
 437  430          return (status);
 438  431  
 439  432  /*
 440  433   * The following is cleanup for all possible failure cases in this routine
 441  434   */
 442  435  srqalloc_fail8:
 443  436          kmem_free(srq->srq_wridlist->wl_wre, srq->srq_wridlist->wl_size *
 444  437              sizeof (tavor_wrid_entry_t));
 445  438          kmem_free(srq->srq_wridlist, sizeof (tavor_wrid_list_hdr_t));
 446  439  srqalloc_fail7:
 447  440          tavor_wql_refcnt_dec(srq->srq_wrid_wql);
 448  441  srqalloc_fail6:
 449  442          if (tavor_mr_deregister(state, &mr, TAVOR_MR_DEREG_ALL,
 450  443              TAVOR_SLEEPFLAG_FOR_CONTEXT()) != DDI_SUCCESS) {
 451  444                  TAVOR_WARNING(state, "failed to deregister SRQ memory");
 452  445          }
 453  446  srqalloc_fail5:
 454  447          tavor_queue_free(state, &srq->srq_wqinfo);
 455  448  srqalloc_fail4:
 456  449          if (srq_is_umap) {
 457  450                  tavor_umap_db_free(umapdb);
 458  451          }
 459  452  srqalloc_fail3:
 460  453          tavor_rsrc_free(state, &rsrc);
 461  454  srqalloc_fail2:
 462  455          tavor_rsrc_free(state, &srqc);
 463  456  srqalloc_fail1:
 464  457          tavor_pd_refcnt_dec(pd);
 465  458  srqalloc_fail:
 466  459          TNF_PROBE_1(tavor_srq_alloc_fail, TAVOR_TNF_ERROR, "",
 467  460              tnf_string, msg, errormsg);
 468  461          TAVOR_TNF_EXIT(tavor_srq_alloc);
 469  462          return (status);
 470  463  }
 471  464  
 472  465  
 473  466  /*
 474  467   * tavor_srq_free()
 475  468   *    Context: Can be called only from user or kernel context.
 476  469   */
 477  470  /* ARGSUSED */
 478  471  int
 479  472  tavor_srq_free(tavor_state_t *state, tavor_srqhdl_t *srqhdl, uint_t sleepflag)
 480  473  {
 481  474          tavor_rsrc_t            *srqc, *rsrc;
 482  475          tavor_umap_db_entry_t   *umapdb;
 483  476          uint64_t                value;
 484  477          tavor_srqhdl_t          srq;
 485  478          tavor_mrhdl_t           mr;
 486  479          tavor_pdhdl_t           pd;
 487  480          tavor_hw_srqc_t         srqc_entry;
 488  481          uint32_t                srqnum;
 489  482          uint32_t                size;
 490  483          uint_t                  maxprot;
 491  484          int                     status;
 492  485  
 493  486          TAVOR_TNF_ENTER(tavor_srq_free);
 494  487  
 495  488          /*
 496  489           * Pull all the necessary information from the Tavor Shared Receive
 497  490           * Queue handle.  This is necessary here because the resource for the
 498  491           * SRQ handle is going to be freed up as part of this operation.
 499  492           */
 500  493          srq     = *srqhdl;
 501  494          mutex_enter(&srq->srq_lock);
 502  495          srqc    = srq->srq_srqcrsrcp;
 503  496          rsrc    = srq->srq_rsrcp;
 504  497          pd      = srq->srq_pdhdl;
 505  498          mr      = srq->srq_mrhdl;
 506  499          srqnum  = srq->srq_srqnum;
 507  500  
 508  501          /*
 509  502           * If there are work queues still associated with the SRQ, then return
 510  503           * an error.  Otherwise, we will be holding the SRQ lock.
 511  504           */
 512  505          if (srq->srq_refcnt != 0) {
 513  506                  mutex_exit(&srq->srq_lock);
 514  507                  TNF_PROBE_1(tavor_srq_free_refcnt_fail, TAVOR_TNF_ERROR, "",
 515  508                      tnf_int, refcnt, srq->srq_refcnt);
 516  509                  TAVOR_TNF_EXIT(tavor_srq_free);
 517  510                  return (IBT_SRQ_IN_USE);
 518  511          }
 519  512  
 520  513          /*
 521  514           * If this was a user-mappable SRQ, then we need to remove its entry
 522  515           * from the "userland resources database".  If it is also currently
 523  516           * mmap()'d out to a user process, then we need to call
 524  517           * devmap_devmem_remap() to remap the SRQ memory to an invalid mapping.
 525  518           * We also need to invalidate the SRQ tracking information for the
 526  519           * user mapping.
 527  520           */
 528  521          if (srq->srq_is_umap) {
 529  522                  status = tavor_umap_db_find(state->ts_instance, srq->srq_srqnum,
 530  523                      MLNX_UMAP_SRQMEM_RSRC, &value, TAVOR_UMAP_DB_REMOVE,
 531  524                      &umapdb);
 532  525                  if (status != DDI_SUCCESS) {
 533  526                          mutex_exit(&srq->srq_lock);
 534  527                          TAVOR_WARNING(state, "failed to find in database");
 535  528                          TAVOR_TNF_EXIT(tavor_srq_free);
 536  529                          return (ibc_get_ci_failure(0));
 537  530                  }
 538  531                  tavor_umap_db_free(umapdb);
 539  532                  if (srq->srq_umap_dhp != NULL) {
 540  533                          maxprot = (PROT_READ | PROT_WRITE | PROT_USER);
 541  534                          status = devmap_devmem_remap(srq->srq_umap_dhp,
 542  535                              state->ts_dip, 0, 0, srq->srq_wqinfo.qa_size,
 543  536                              maxprot, DEVMAP_MAPPING_INVALID, NULL);
 544  537                          if (status != DDI_SUCCESS) {
 545  538                                  mutex_exit(&srq->srq_lock);
 546  539                                  TAVOR_WARNING(state, "failed in SRQ memory "
 547  540                                      "devmap_devmem_remap()");
 548  541                                  TAVOR_TNF_EXIT(tavor_srq_free);
 549  542                                  return (ibc_get_ci_failure(0));
 550  543                          }
 551  544                          srq->srq_umap_dhp = (devmap_cookie_t)NULL;
 552  545                  }

↓ open down ↓

209 lines elided

↑ open up ↑

 553  546          }
 554  547  
 555  548          /*
 556  549           * Put NULL into the Tavor SRQNum-to-SRQHdl list.  This will allow any
 557  550           * in-progress events to detect that the SRQ corresponding to this
 558  551           * number has been freed.
 559  552           */
 560  553          state->ts_srqhdl[srqc->tr_indx] = NULL;
 561  554  
 562  555          mutex_exit(&srq->srq_lock);
 563      -        _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*srq));
 564      -        _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*srq->srq_wridlist));
 565  556  
 566  557          /*
 567  558           * Reclaim SRQC entry from hardware (using the Tavor HW2SW_SRQ
 568  559           * firmware command).  If the ownership transfer fails for any reason,
 569  560           * then it is an indication that something (either in HW or SW) has
 570  561           * gone seriously wrong.
 571  562           */
 572  563          status = tavor_cmn_ownership_cmd_post(state, HW2SW_SRQ, &srqc_entry,
 573  564              sizeof (tavor_hw_srqc_t), srqnum, sleepflag);
 574  565          if (status != TAVOR_CMD_SUCCESS) {

 575  566                  TAVOR_WARNING(state, "failed to reclaim SRQC ownership");
 576  567                  cmn_err(CE_CONT, "Tavor: HW2SW_SRQ command failed: %08x\n",
 577  568                      status);
 578  569                  TNF_PROBE_1(tavor_srq_free_hw2sw_srq_cmd_fail,
 579  570                      TAVOR_TNF_ERROR, "", tnf_uint, status, status);
 580  571                  TAVOR_TNF_EXIT(tavor_srq_free);
 581  572                  return (IBT_FAILURE);
 582  573          }
 583  574  
 584  575          /*
 585  576           * Deregister the memory for the Shared Receive Queue.  If this fails
 586  577           * for any reason, then it is an indication that something (either
 587  578           * in HW or SW) has gone seriously wrong.  So we print a warning
 588  579           * message and return.
 589  580           */
 590  581          status = tavor_mr_deregister(state, &mr, TAVOR_MR_DEREG_ALL,
 591  582              sleepflag);
 592  583          if (status != DDI_SUCCESS) {
 593  584                  TAVOR_WARNING(state, "failed to deregister SRQ memory");
 594  585                  TNF_PROBE_0(tavor_srq_free_dereg_mr_fail, TAVOR_TNF_ERROR, "");
 595  586                  TAVOR_TNF_EXIT(tavor_srq_free);
 596  587                  return (IBT_FAILURE);
 597  588          }
 598  589  
 599  590          /* Calculate the size and free the wridlist container */
 600  591          if (srq->srq_wridlist != NULL) {
 601  592                  size = (srq->srq_wridlist->wl_size *
 602  593                      sizeof (tavor_wrid_entry_t));
 603  594                  kmem_free(srq->srq_wridlist->wl_wre, size);
 604  595                  kmem_free(srq->srq_wridlist, sizeof (tavor_wrid_list_hdr_t));
 605  596  
 606  597                  /*
 607  598                   * Release reference to WQL; If this is the last reference,
 608  599                   * this call also has the side effect of freeing up the
 609  600                   * 'srq_wrid_wql' memory.
 610  601                   */
 611  602                  tavor_wql_refcnt_dec(srq->srq_wrid_wql);
 612  603          }
 613  604  
 614  605          /* Free the memory for the SRQ */
 615  606          tavor_queue_free(state, &srq->srq_wqinfo);
 616  607  
 617  608          /* Free the Tavor SRQ Handle */
 618  609          tavor_rsrc_free(state, &rsrc);
 619  610  
 620  611          /* Free the SRQC entry resource */
 621  612          tavor_rsrc_free(state, &srqc);
 622  613  
 623  614          /* Decrement the reference count on the protection domain (PD) */
 624  615          tavor_pd_refcnt_dec(pd);
 625  616  
 626  617          /* Set the srqhdl pointer to NULL and return success */
 627  618          *srqhdl = NULL;
 628  619  
 629  620          TAVOR_TNF_EXIT(tavor_srq_free);
 630  621          return (DDI_SUCCESS);
 631  622  }
 632  623  
 633  624  
 634  625  /*
 635  626   * tavor_srq_modify()
 636  627   *    Context: Can be called only from user or kernel context.
 637  628   */
 638  629  int
 639  630  tavor_srq_modify(tavor_state_t *state, tavor_srqhdl_t srq, uint_t size,
 640  631      uint_t *real_size, uint_t sleepflag)
 641  632  {
 642  633          tavor_qalloc_info_t     new_srqinfo, old_srqinfo;
 643  634          tavor_rsrc_t            *mtt, *mpt, *old_mtt;
 644  635          tavor_bind_info_t       bind;
 645  636          tavor_bind_info_t       old_bind;
 646  637          tavor_rsrc_pool_info_t  *rsrc_pool;
 647  638          tavor_mrhdl_t           mr;
 648  639          tavor_hw_mpt_t          mpt_entry;
 649  640          tavor_wrid_entry_t      *wre_new, *wre_old;
 650  641          uint64_t                mtt_ddrbaseaddr, mtt_addr;
 651  642          uint64_t                srq_desc_off;
 652  643          uint32_t                *buf, srq_old_bufsz;
 653  644          uint32_t                wqesz;
 654  645          uint_t                  max_srq_size;
 655  646          uint_t                  dma_xfer_mode, mtt_pgsize_bits;
 656  647          uint_t                  srq_sync, log_srq_size, maxprot;
 657  648          uint_t                  wq_location;
 658  649          int                     status;
 659  650          char                    *errormsg;
 660  651  
 661  652          TAVOR_TNF_ENTER(tavor_srq_modify);
 662  653  
 663  654          /*
 664  655           * Check the "inddr" flag.  This flag tells the driver whether or not
 665  656           * the SRQ's work queues should be come from normal system memory or
 666  657           * whether they should be allocated from DDR memory.
 667  658           */
 668  659          wq_location = state->ts_cfg_profile->cp_srq_wq_inddr;
 669  660  
 670  661          /*
 671  662           * If size requested is larger than device capability, return
 672  663           * Insufficient Resources
 673  664           */
 674  665          max_srq_size = (1 << state->ts_cfg_profile->cp_log_max_srq_sz);
 675  666          if (size > max_srq_size) {
 676  667                  TNF_PROBE_0(tavor_srq_modify_size_larger_than_maxsize,
 677  668                      TAVOR_TNF_ERROR, "");
 678  669                  TAVOR_TNF_EXIT(tavor_srq_modify);
 679  670                  return (IBT_HCA_WR_EXCEEDED);
 680  671          }
 681  672  
 682  673          /*
 683  674           * Calculate the appropriate size for the SRQ.
 684  675           * Note:  All Tavor SRQs must be a power-of-2 in size.  Also
 685  676           * they may not be any smaller than TAVOR_SRQ_MIN_SIZE.  This step
 686  677           * is to round the requested size up to the next highest power-of-2
 687  678           */
 688  679          size = max(size, TAVOR_SRQ_MIN_SIZE);
 689  680          log_srq_size = highbit(size);
 690  681          if (ISP2(size)) {
 691  682                  log_srq_size = log_srq_size - 1;
 692  683          }
 693  684  
 694  685          /*
 695  686           * Next we verify that the rounded-up size is valid (i.e. consistent
 696  687           * with the device limits and/or software-configured limits).
 697  688           */
 698  689          if (log_srq_size > state->ts_cfg_profile->cp_log_max_srq_sz) {
 699  690                  /* Set "status" and "errormsg" and goto failure */
 700  691                  TAVOR_TNF_FAIL(IBT_HCA_WR_EXCEEDED, "max SRQ size");
 701  692                  goto srqmodify_fail;
 702  693          }
 703  694  
 704  695          /*
 705  696           * Allocate the memory for newly resized Shared Receive Queue.
 706  697           *
 707  698           * Note: If SRQ is not user-mappable, then it may come from either
 708  699           * kernel system memory or from HCA-attached local DDR memory.
 709  700           *
 710  701           * Note2: We align this queue on a pagesize boundary.  This is required
 711  702           * to make sure that all the resulting IB addresses will start at 0,
 712  703           * for a zero-based queue.  By making sure we are aligned on at least a
 713  704           * page, any offset we use into our queue will be the same as it was
 714  705           * when we allocated it at tavor_srq_alloc() time.
 715  706           */
 716  707          wqesz = (1 << srq->srq_wq_log_wqesz);
 717  708          new_srqinfo.qa_size = (1 << log_srq_size) * wqesz;
 718  709          new_srqinfo.qa_alloc_align = PAGESIZE;
 719  710          new_srqinfo.qa_bind_align  = PAGESIZE;
 720  711          if (srq->srq_is_umap) {
 721  712                  new_srqinfo.qa_location = TAVOR_QUEUE_LOCATION_USERLAND;

↓ open down ↓

147 lines elided

↑ open up ↑

 722  713          } else {
 723  714                  new_srqinfo.qa_location = wq_location;
 724  715          }
 725  716          status = tavor_queue_alloc(state, &new_srqinfo, sleepflag);
 726  717          if (status != DDI_SUCCESS) {
 727  718                  /* Set "status" and "errormsg" and goto failure */
 728  719                  TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed srq");
 729  720                  goto srqmodify_fail;
 730  721          }
 731  722          buf = (uint32_t *)new_srqinfo.qa_buf_aligned;
 732      -        _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*buf))
 733  723  
 734  724          /*
 735  725           * Allocate the memory for the new WRE list.  This will be used later
 736  726           * when we resize the wridlist based on the new SRQ size.
 737  727           */
 738  728          wre_new = (tavor_wrid_entry_t *)kmem_zalloc((1 << log_srq_size) *
 739  729              sizeof (tavor_wrid_entry_t), sleepflag);
 740  730          if (wre_new == NULL) {
 741  731                  /* Set "status" and "errormsg" and goto failure */
 742  732                  TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE,

 743  733                      "failed wre_new alloc");
 744  734                  goto srqmodify_fail;

↓ open down ↓

2 lines elided

↑ open up ↑

 745  735          }
 746  736  
 747  737          /*
 748  738           * Fill in the "bind" struct.  This struct provides the majority
 749  739           * of the information that will be used to distinguish between an
 750  740           * "addr" binding (as is the case here) and a "buf" binding (see
 751  741           * below).  The "bind" struct is later passed to tavor_mr_mem_bind()
 752  742           * which does most of the "heavy lifting" for the Tavor memory
 753  743           * registration routines.
 754  744           */
 755      -        _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(bind))
 756  745          bzero(&bind, sizeof (tavor_bind_info_t));
 757  746          bind.bi_type  = TAVOR_BINDHDL_VADDR;
 758  747          bind.bi_addr  = (uint64_t)(uintptr_t)buf;
 759  748          bind.bi_len   = new_srqinfo.qa_size;
 760  749          bind.bi_as    = NULL;
 761  750          bind.bi_flags = sleepflag == TAVOR_SLEEP ? IBT_MR_SLEEP :
 762  751              IBT_MR_NOSLEEP | IBT_MR_ENABLE_LOCAL_WRITE;
 763  752          if (srq->srq_is_umap) {
 764  753                  bind.bi_bypass = state->ts_cfg_profile->cp_iommu_bypass;
 765  754          } else {

 766  755                  if (wq_location == TAVOR_QUEUE_LOCATION_NORMAL) {
 767  756                          bind.bi_bypass =
 768  757                              state->ts_cfg_profile->cp_iommu_bypass;
 769  758                          dma_xfer_mode =
 770  759                              state->ts_cfg_profile->cp_streaming_consistent;
 771  760                          if (dma_xfer_mode == DDI_DMA_STREAMING) {
 772  761                                  bind.bi_flags |= IBT_MR_NONCOHERENT;
 773  762                          }
 774  763                  } else {
 775  764                          bind.bi_bypass = TAVOR_BINDMEM_BYPASS;
 776  765                  }
 777  766          }
 778  767          status = tavor_mr_mtt_bind(state, &bind, new_srqinfo.qa_dmahdl, &mtt,
 779  768              &mtt_pgsize_bits);
 780  769          if (status != DDI_SUCCESS) {
 781  770                  /* Set "status" and "errormsg" and goto failure */
 782  771                  TAVOR_TNF_FAIL(status, "failed mtt bind");
 783  772                  kmem_free(wre_new, srq->srq_wq_bufsz *
 784  773                      sizeof (tavor_wrid_entry_t));
 785  774                  tavor_queue_free(state, &new_srqinfo);
 786  775                  goto srqmodify_fail;
 787  776          }
 788  777  
 789  778          /*
 790  779           * Calculate the offset between the kernel virtual address space
 791  780           * and the IB virtual address space.  This will be used when
 792  781           * posting work requests to properly initialize each WQE.
 793  782           *
 794  783           * Note: bind addr is zero-based (from alloc) so we calculate the
 795  784           * correct new offset here.
 796  785           */
 797  786          bind.bi_addr = bind.bi_addr & ((1 << mtt_pgsize_bits) - 1);
 798  787          srq_desc_off = (uint64_t)(uintptr_t)new_srqinfo.qa_buf_aligned -
 799  788              (uint64_t)bind.bi_addr;
 800  789  
 801  790          /*
 802  791           * Get the base address for the MTT table.  This will be necessary
 803  792           * below when we are modifying the MPT entry.
 804  793           */
 805  794          rsrc_pool = &state->ts_rsrc_hdl[TAVOR_MTT];
 806  795          mtt_ddrbaseaddr = (uint64_t)(uintptr_t)rsrc_pool->rsrc_ddr_offset;
 807  796  
 808  797          /*
 809  798           * Fill in the MPT entry.  This is the final step before passing
 810  799           * ownership of the MPT entry to the Tavor hardware.  We use all of
 811  800           * the information collected/calculated above to fill in the
 812  801           * requisite portions of the MPT.
 813  802           */
 814  803          bzero(&mpt_entry, sizeof (tavor_hw_mpt_t));
 815  804          mpt_entry.reg_win_len   = bind.bi_len;
 816  805          mtt_addr = mtt_ddrbaseaddr + (mtt->tr_indx << TAVOR_MTT_SIZE_SHIFT);
 817  806          mpt_entry.mttseg_addr_h = mtt_addr >> 32;
 818  807          mpt_entry.mttseg_addr_l = mtt_addr >> 6;
 819  808  
 820  809          /*
 821  810           * Now we grab the SRQ lock.  Since we will be updating the actual
 822  811           * SRQ location and the producer/consumer indexes, we should hold
 823  812           * the lock.
 824  813           *
 825  814           * We do a TAVOR_NOSLEEP here (and below), though, because we are
 826  815           * holding the "srq_lock" and if we got raised to interrupt level
 827  816           * by priority inversion, we would not want to block in this routine
 828  817           * waiting for success.
 829  818           */
 830  819          mutex_enter(&srq->srq_lock);
 831  820  
 832  821          /*
 833  822           * Copy old entries to new buffer
 834  823           */
 835  824          srq_old_bufsz = srq->srq_wq_bufsz;
 836  825          bcopy(srq->srq_wq_buf, buf, srq_old_bufsz * wqesz);
 837  826  
 838  827          /* Determine if later ddi_dma_sync will be necessary */
 839  828          srq_sync = TAVOR_SRQ_IS_SYNC_REQ(state, srq->srq_wqinfo);
 840  829  
 841  830          /* Sync entire "new" SRQ for use by hardware (if necessary) */
 842  831          if (srq_sync) {
 843  832                  (void) ddi_dma_sync(bind.bi_dmahdl, 0,
 844  833                      new_srqinfo.qa_size, DDI_DMA_SYNC_FORDEV);
 845  834          }
 846  835  
 847  836          /*
 848  837           * Setup MPT information for use in the MODIFY_MPT command
 849  838           */
 850  839          mr = srq->srq_mrhdl;
 851  840          mutex_enter(&mr->mr_lock);
 852  841          mpt = srq->srq_mrhdl->mr_mptrsrcp;
 853  842  
 854  843          /*
 855  844           * MODIFY_MPT
 856  845           *
 857  846           * If this fails for any reason, then it is an indication that
 858  847           * something (either in HW or SW) has gone seriously wrong.  So we
 859  848           * print a warning message and return.
 860  849           */
 861  850          status = tavor_modify_mpt_cmd_post(state, &mpt_entry, mpt->tr_indx,
 862  851              TAVOR_CMD_MODIFY_MPT_RESIZESRQ, sleepflag);
 863  852          if (status != TAVOR_CMD_SUCCESS) {
 864  853                  cmn_err(CE_CONT, "Tavor: MODIFY_MPT command failed: %08x\n",
 865  854                      status);
 866  855                  TNF_PROBE_1(tavor_mr_common_reg_sw2hw_mpt_cmd_fail,
 867  856                      TAVOR_TNF_ERROR, "", tnf_uint, status, status);
 868  857                  TAVOR_TNF_FAIL(status, "MODIFY_MPT command failed");
 869  858                  (void) tavor_mr_mtt_unbind(state, &srq->srq_mrhdl->mr_bindinfo,
 870  859                      srq->srq_mrhdl->mr_mttrsrcp);
 871  860                  kmem_free(wre_new, srq->srq_wq_bufsz *
 872  861                      sizeof (tavor_wrid_entry_t));
 873  862                  tavor_queue_free(state, &new_srqinfo);
 874  863                  mutex_exit(&mr->mr_lock);
 875  864                  mutex_exit(&srq->srq_lock);
 876  865                  return (ibc_get_ci_failure(0));
 877  866          }
 878  867  
 879  868          /*
 880  869           * Update the Tavor Shared Receive Queue handle with all the new
 881  870           * information.  At the same time, save away all the necessary
 882  871           * information for freeing up the old resources
 883  872           */
 884  873          old_srqinfo        = srq->srq_wqinfo;
 885  874          old_mtt            = srq->srq_mrhdl->mr_mttrsrcp;
 886  875          bcopy(&srq->srq_mrhdl->mr_bindinfo, &old_bind,
 887  876              sizeof (tavor_bind_info_t));
 888  877  
 889  878          /* Now set the new info */
 890  879          srq->srq_wqinfo    = new_srqinfo;
 891  880          srq->srq_wq_buf    = buf;

↓ open down ↓

126 lines elided

↑ open up ↑

 892  881          srq->srq_wq_bufsz  = (1 << log_srq_size);
 893  882          bcopy(&bind, &srq->srq_mrhdl->mr_bindinfo, sizeof (tavor_bind_info_t));
 894  883          srq->srq_mrhdl->mr_mttrsrcp = mtt;
 895  884          srq->srq_desc_off  = srq_desc_off;
 896  885          srq->srq_real_sizes.srq_wr_sz = (1 << log_srq_size);
 897  886  
 898  887          /* Update MR mtt pagesize */
 899  888          mr->mr_logmttpgsz = mtt_pgsize_bits;
 900  889          mutex_exit(&mr->mr_lock);
 901  890  
 902      -#ifdef __lock_lint
 903      -        mutex_enter(&srq->srq_wrid_wql->wql_lock);
 904      -#else
 905  891          if (srq->srq_wrid_wql != NULL) {
 906  892                  mutex_enter(&srq->srq_wrid_wql->wql_lock);
 907  893          }
 908      -#endif
 909  894  
 910  895          /*
 911  896           * Initialize new wridlist, if needed.
 912  897           *
 913  898           * If a wridlist already is setup on an SRQ (the QP associated with an
 914  899           * SRQ has moved "from_reset") then we must update this wridlist based
 915  900           * on the new SRQ size.  We allocate the new size of Work Request ID
 916  901           * Entries, copy over the old entries to the new list, and
 917  902           * re-initialize the srq wridlist in non-umap case
 918  903           */

 919  904          wre_old = NULL;
 920  905          if (srq->srq_wridlist != NULL) {
 921  906                  wre_old = srq->srq_wridlist->wl_wre;
 922  907  
 923  908                  bcopy(wre_old, wre_new, srq_old_bufsz *
 924  909                      sizeof (tavor_wrid_entry_t));
 925  910

↓ open down ↓

7 lines elided

↑ open up ↑

 926  911                  /* Setup new sizes in wre */
 927  912                  srq->srq_wridlist->wl_wre = wre_new;
 928  913                  srq->srq_wridlist->wl_size = srq->srq_wq_bufsz;
 929  914  
 930  915                  if (!srq->srq_is_umap) {
 931  916                          tavor_wrid_list_srq_init(srq->srq_wridlist, srq,
 932  917                              srq_old_bufsz);
 933  918                  }
 934  919          }
 935  920  
 936      -#ifdef __lock_lint
 937      -        mutex_exit(&srq->srq_wrid_wql->wql_lock);
 938      -#else
 939  921          if (srq->srq_wrid_wql != NULL) {
 940  922                  mutex_exit(&srq->srq_wrid_wql->wql_lock);
 941  923          }
 942      -#endif
 943  924  
 944  925          /*
 945  926           * If "old" SRQ was a user-mappable SRQ that is currently mmap()'d out
 946  927           * to a user process, then we need to call devmap_devmem_remap() to
 947  928           * invalidate the mapping to the SRQ memory.  We also need to
 948  929           * invalidate the SRQ tracking information for the user mapping.
 949  930           *
 950  931           * Note: On failure, the remap really shouldn't ever happen.  So, if it
 951  932           * does, it is an indication that something has gone seriously wrong.
 952  933           * So we print a warning message and return error (knowing, of course,

 953  934           * that the "old" SRQ memory will be leaked)
 954  935           */
 955  936          if ((srq->srq_is_umap) && (srq->srq_umap_dhp != NULL)) {
 956  937                  maxprot = (PROT_READ | PROT_WRITE | PROT_USER);
 957  938                  status = devmap_devmem_remap(srq->srq_umap_dhp,
 958  939                      state->ts_dip, 0, 0, srq->srq_wqinfo.qa_size, maxprot,
 959  940                      DEVMAP_MAPPING_INVALID, NULL);
 960  941                  if (status != DDI_SUCCESS) {
 961  942                          mutex_exit(&srq->srq_lock);
 962  943                          TAVOR_WARNING(state, "failed in SRQ memory "
 963  944                              "devmap_devmem_remap()");
 964  945                          /* We can, however, free the memory for old wre */
 965  946                          if (wre_old != NULL) {
 966  947                                  kmem_free(wre_old, srq_old_bufsz *
 967  948                                      sizeof (tavor_wrid_entry_t));
 968  949                          }
 969  950                          TAVOR_TNF_EXIT(tavor_srq_modify);
 970  951                          return (ibc_get_ci_failure(0));
 971  952                  }
 972  953                  srq->srq_umap_dhp = (devmap_cookie_t)NULL;
 973  954          }
 974  955  
 975  956          /*
 976  957           * Drop the SRQ lock now.  The only thing left to do is to free up
 977  958           * the old resources.
 978  959           */
 979  960          mutex_exit(&srq->srq_lock);
 980  961  
 981  962          /*
 982  963           * Unbind the MTT entries.
 983  964           */
 984  965          status = tavor_mr_mtt_unbind(state, &old_bind, old_mtt);
 985  966          if (status != DDI_SUCCESS) {
 986  967                  TAVOR_WARNING(state, "failed to unbind old SRQ memory");
 987  968                  /* Set "status" and "errormsg" and goto failure */
 988  969                  TAVOR_TNF_FAIL(ibc_get_ci_failure(0),
 989  970                      "failed to unbind (old)");
 990  971                  goto srqmodify_fail;
 991  972          }
 992  973  
 993  974          /* Free the memory for old wre */
 994  975          if (wre_old != NULL) {
 995  976                  kmem_free(wre_old, srq_old_bufsz *
 996  977                      sizeof (tavor_wrid_entry_t));
 997  978          }
 998  979  
 999  980          /* Free the memory for the old SRQ */
1000  981          tavor_queue_free(state, &old_srqinfo);
1001  982  
1002  983          /*
1003  984           * Fill in the return arguments (if necessary).  This includes the
1004  985           * real new completion queue size.
1005  986           */
1006  987          if (real_size != NULL) {
1007  988                  *real_size = (1 << log_srq_size);
1008  989          }
1009  990  
1010  991          TAVOR_TNF_EXIT(tavor_srq_modify);
1011  992          return (DDI_SUCCESS);
1012  993  
1013  994  srqmodify_fail:
1014  995          TNF_PROBE_1(tavor_srq_modify_fail, TAVOR_TNF_ERROR, "",
1015  996              tnf_string, msg, errormsg);
1016  997          TAVOR_TNF_EXIT(tavor_srq_modify);
1017  998          return (status);
1018  999  }
1019 1000  
1020 1001  
1021 1002  /*
1022 1003   * tavor_srq_refcnt_inc()
1023 1004   *    Context: Can be called from interrupt or base context.
1024 1005   */
1025 1006  void
1026 1007  tavor_srq_refcnt_inc(tavor_srqhdl_t srq)
1027 1008  {
1028 1009          mutex_enter(&srq->srq_lock);
1029 1010          TNF_PROBE_1_DEBUG(tavor_srq_refcnt_inc, TAVOR_TNF_TRACE, "",
1030 1011              tnf_uint, refcnt, srq->srq_refcnt);
1031 1012          srq->srq_refcnt++;
1032 1013          mutex_exit(&srq->srq_lock);
1033 1014  }
1034 1015  
1035 1016  
1036 1017  /*
1037 1018   * tavor_srq_refcnt_dec()
1038 1019   *    Context: Can be called from interrupt or base context.
1039 1020   */
1040 1021  void
1041 1022  tavor_srq_refcnt_dec(tavor_srqhdl_t srq)
1042 1023  {
1043 1024          mutex_enter(&srq->srq_lock);
1044 1025          srq->srq_refcnt--;
1045 1026          TNF_PROBE_1_DEBUG(tavor_srq_refcnt_dec, TAVOR_TNF_TRACE, "",
1046 1027              tnf_uint, refcnt, srq->srq_refcnt);
1047 1028          mutex_exit(&srq->srq_lock);
1048 1029  }
1049 1030  
1050 1031  
1051 1032  /*
1052 1033   * tavor_srqhdl_from_srqnum()
1053 1034   *    Context: Can be called from interrupt or base context.
1054 1035   *
1055 1036   *    This routine is important because changing the unconstrained
1056 1037   *    portion of the SRQ number is critical to the detection of a
1057 1038   *    potential race condition in the SRQ handler code (i.e. the case
1058 1039   *    where a SRQ is freed and alloc'd again before an event for the
1059 1040   *    "old" SRQ can be handled).
1060 1041   *
1061 1042   *    While this is not a perfect solution (not sure that one exists)
1062 1043   *    it does help to mitigate the chance that this race condition will
1063 1044   *    cause us to deliver a "stale" event to the new SRQ owner.  Note:
1064 1045   *    this solution does not scale well because the number of constrained
1065 1046   *    bits increases (and, hence, the number of unconstrained bits
1066 1047   *    decreases) as the number of supported SRQ grows.  For small and
1067 1048   *    intermediate values, it should hopefully provide sufficient
1068 1049   *    protection.
1069 1050   */
1070 1051  tavor_srqhdl_t
1071 1052  tavor_srqhdl_from_srqnum(tavor_state_t *state, uint_t srqnum)
1072 1053  {
1073 1054          uint_t  srqindx, srqmask;
1074 1055  
1075 1056          /* Calculate the SRQ table index from the srqnum */
1076 1057          srqmask = (1 << state->ts_cfg_profile->cp_log_num_srq) - 1;
1077 1058          srqindx = srqnum & srqmask;
1078 1059          return (state->ts_srqhdl[srqindx]);
1079 1060  }
1080 1061  
1081 1062  
1082 1063  /*
1083 1064   * tavor_srq_sgl_to_logwqesz()
1084 1065   *    Context: Can be called from interrupt or base context.
1085 1066   */
1086 1067  static void
1087 1068  tavor_srq_sgl_to_logwqesz(tavor_state_t *state, uint_t num_sgl,
1088 1069      tavor_qp_wq_type_t wq_type, uint_t *logwqesz, uint_t *max_sgl)
1089 1070  {
1090 1071          uint_t  max_size, log2, actual_sgl;
1091 1072  
1092 1073          TAVOR_TNF_ENTER(tavor_srq_sgl_to_logwqesz);
1093 1074  
1094 1075          switch (wq_type) {
1095 1076          case TAVOR_QP_WQ_TYPE_RECVQ:
1096 1077                  /*
1097 1078                   * Use requested maximum SGL to calculate max descriptor size
1098 1079                   * (while guaranteeing that the descriptor size is a
1099 1080                   * power-of-2 cachelines).
1100 1081                   */
1101 1082                  max_size = (TAVOR_QP_WQE_MLX_RCV_HDRS + (num_sgl << 4));
1102 1083                  log2 = highbit(max_size);
1103 1084                  if (ISP2(max_size)) {
1104 1085                          log2 = log2 - 1;
1105 1086                  }
1106 1087  
1107 1088                  /* Make sure descriptor is at least the minimum size */
1108 1089                  log2 = max(log2, TAVOR_QP_WQE_LOG_MINIMUM);
1109 1090  
1110 1091                  /* Calculate actual number of SGL (given WQE size) */
1111 1092                  actual_sgl = ((1 << log2) - TAVOR_QP_WQE_MLX_RCV_HDRS) >> 4;
1112 1093                  break;
1113 1094  
1114 1095          default:
1115 1096                  TAVOR_WARNING(state, "unexpected work queue type");
1116 1097                  TNF_PROBE_0(tavor_srq_sgl_to_logwqesz_inv_wqtype_fail,
1117 1098                      TAVOR_TNF_ERROR, "");
1118 1099                  break;
1119 1100          }
1120 1101  
1121 1102          /* Fill in the return values */
1122 1103          *logwqesz = log2;
1123 1104          *max_sgl  = min(state->ts_cfg_profile->cp_srq_max_sgl, actual_sgl);
1124 1105  
1125 1106          TAVOR_TNF_EXIT(tavor_qp_sgl_to_logwqesz);
1126 1107  }

↓ open down ↓

174 lines elided

↑ open up ↑

XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX