ilwluts Wdiff usr/src/uts/common/io/ib/adapters/tavor/tavor_cq.c

Print this page

8368 remove warlock leftovers from usr/src/uts

Split	Close
Expand all
Collapse all

          --- old/usr/src/uts/common/io/ib/adapters/tavor/tavor_cq.c
          +++ new/usr/src/uts/common/io/ib/adapters/tavor/tavor_cq.c

   1    1  /*
   2    2   * CDDL HEADER START
   3    3   *
   4    4   * The contents of this file are subject to the terms of the
   5    5   * Common Development and Distribution License (the "License").
   6    6   * You may not use this file except in compliance with the License.
   7    7   *
   8    8   * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9    9   * or http://www.opensolaris.org/os/licensing.
  10   10   * See the License for the specific language governing permissions
  11   11   * and limitations under the License.
  12   12   *
  13   13   * When distributing Covered Code, include this CDDL HEADER in each
  14   14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  
  22   22  /*
  23   23   * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  24   24   * Use is subject to license terms.
  25   25   */
  26   26  
  27   27  /*
  28   28   * tavor_cq.c
  29   29   *    Tavor Completion Queue Processing Routines
  30   30   *
  31   31   *    Implements all the routines necessary for allocating, freeing, resizing,
  32   32   *    and handling the completion type events that the Tavor hardware can
  33   33   *    generate.
  34   34   */
  35   35  
  36   36  #include <sys/types.h>
  37   37  #include <sys/conf.h>
  38   38  #include <sys/ddi.h>
  39   39  #include <sys/sunddi.h>
  40   40  #include <sys/modctl.h>
  41   41  #include <sys/bitmap.h>
  42   42  #include <sys/sysmacros.h>
  43   43  
  44   44  #include <sys/ib/adapters/tavor/tavor.h>
  45   45  
  46   46  static void tavor_cq_doorbell(tavor_state_t *state, uint32_t cq_cmd,
  47   47      uint32_t cqn, uint32_t cq_param);
  48   48  #pragma inline(tavor_cq_doorbell)
  49   49  static int tavor_cq_cqe_consume(tavor_state_t *state, tavor_cqhdl_t cq,
  50   50      tavor_hw_cqe_t *cqe, ibt_wc_t *wc);
  51   51  static int tavor_cq_errcqe_consume(tavor_state_t *state, tavor_cqhdl_t cq,
  52   52      tavor_hw_cqe_t *cqe, ibt_wc_t *wc);
  53   53  static void tavor_cqe_sync(tavor_cqhdl_t cq, tavor_hw_cqe_t *cqe,
  54   54      uint_t flag);
  55   55  static void tavor_cq_resize_helper(tavor_cqhdl_t cq, tavor_hw_cqe_t *new_cqbuf,
  56   56      uint32_t old_cons_indx, uint32_t num_newcqe);
  57   57  
  58   58  /*
  59   59   * tavor_cq_alloc()
  60   60   *    Context: Can be called only from user or kernel context.
  61   61   */
  62   62  int
  63   63  tavor_cq_alloc(tavor_state_t *state, ibt_cq_hdl_t ibt_cqhdl,
  64   64      ibt_cq_attr_t *cq_attr, uint_t *actual_size, tavor_cqhdl_t *cqhdl,
  65   65      uint_t sleepflag)
  66   66  {
  67   67          tavor_rsrc_t            *cqc, *rsrc;
  68   68          tavor_umap_db_entry_t   *umapdb;
  69   69          tavor_hw_cqc_t          cqc_entry;
  70   70          tavor_cqhdl_t           cq;
  71   71          ibt_mr_attr_t           mr_attr;
  72   72          tavor_mr_options_t      op;
  73   73          tavor_pdhdl_t           pd;

↓ open down ↓

73 lines elided

↑ open up ↑

  74   74          tavor_mrhdl_t           mr;
  75   75          tavor_hw_cqe_t          *buf;
  76   76          uint64_t                addr, value;
  77   77          uint32_t                log_cq_size, lkey, uarpg;
  78   78          uint_t                  dma_xfer_mode, cq_sync, cq_is_umap;
  79   79          int                     status, i, flag;
  80   80          char                    *errormsg;
  81   81  
  82   82          TAVOR_TNF_ENTER(tavor_cq_alloc);
  83   83  
  84      -        _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*cq_attr))
  85      -
  86   84          /*
  87   85           * Determine whether CQ is being allocated for userland access or
  88   86           * whether it is being allocated for kernel access.  If the CQ is
  89   87           * being allocated for userland access, then lookup the UAR doorbell
  90   88           * page number for the current process.  Note:  If this is not found
  91   89           * (e.g. if the process has not previously open()'d the Tavor driver),
  92   90           * then an error is returned.
  93   91           */
  94   92          cq_is_umap = (cq_attr->cq_flags & IBT_CQ_USER_MAP) ? 1 : 0;
  95   93          if (cq_is_umap) {

  96   94                  status = tavor_umap_db_find(state->ts_instance, ddi_get_pid(),
  97   95                      MLNX_UMAP_UARPG_RSRC, &value, 0, NULL);
  98   96                  if (status != DDI_SUCCESS) {
  99   97                          /* Set "status" and "errormsg" and goto failure */
 100   98                          TAVOR_TNF_FAIL(IBT_INVALID_PARAM, "failed UAR page");
 101   99                          goto cqalloc_fail;
 102  100                  }
 103  101                  uarpg = ((tavor_rsrc_t *)(uintptr_t)value)->tr_indx;
 104  102          }
 105  103  
 106  104          /* Use the internal protection domain (PD) for setting up CQs */
 107  105          pd = state->ts_pdhdl_internal;
 108  106  
 109  107          /* Increment the reference count on the protection domain (PD) */
 110  108          tavor_pd_refcnt_inc(pd);
 111  109  
 112  110          /*
 113  111           * Allocate an CQ context entry.  This will be filled in with all
 114  112           * the necessary parameters to define the Completion Queue.  And then
 115  113           * ownership will be passed to the hardware in the final step
 116  114           * below.  If we fail here, we must undo the protection domain
 117  115           * reference count.
 118  116           */
 119  117          status = tavor_rsrc_alloc(state, TAVOR_CQC, 1, sleepflag, &cqc);
 120  118          if (status != DDI_SUCCESS) {
 121  119                  /* Set "status" and "errormsg" and goto failure */
 122  120                  TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed CQ context");
 123  121                  goto cqalloc_fail1;
 124  122          }
 125  123  
 126  124          /*
 127  125           * Allocate the software structure for tracking the completion queue
 128  126           * (i.e. the Tavor Completion Queue handle).  If we fail here, we must

↓ open down ↓

33 lines elided

↑ open up ↑

 129  127           * undo the protection domain reference count and the previous
 130  128           * resource allocation.
 131  129           */
 132  130          status = tavor_rsrc_alloc(state, TAVOR_CQHDL, 1, sleepflag, &rsrc);
 133  131          if (status != DDI_SUCCESS) {
 134  132                  /* Set "status" and "errormsg" and goto failure */
 135  133                  TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed CQ handle");
 136  134                  goto cqalloc_fail2;
 137  135          }
 138  136          cq = (tavor_cqhdl_t)rsrc->tr_addr;
 139      -        _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*cq))
 140  137          cq->cq_is_umap = cq_is_umap;
 141  138  
 142  139          /* Use the index as CQ number */
 143  140          cq->cq_cqnum = cqc->tr_indx;
 144  141  
 145  142          /*
 146  143           * If this will be a user-mappable CQ, then allocate an entry for
 147  144           * the "userland resources database".  This will later be added to
 148  145           * the database (after all further CQ operations are successful).
 149  146           * If we fail here, we must undo the reference counts and the

 150  147           * previous resource allocation.
 151  148           */
 152  149          if (cq->cq_is_umap) {
 153  150                  umapdb = tavor_umap_db_alloc(state->ts_instance, cq->cq_cqnum,
 154  151                      MLNX_UMAP_CQMEM_RSRC, (uint64_t)(uintptr_t)rsrc);
 155  152                  if (umapdb == NULL) {
 156  153                          /* Set "status" and "errormsg" and goto failure */
 157  154                          TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed umap add");
 158  155                          goto cqalloc_fail3;
 159  156                  }
 160  157          }
 161  158  
 162  159          /*
 163  160           * Calculate the appropriate size for the completion queue.
 164  161           * Note:  All Tavor CQs must be a power-of-2 minus 1 in size.  Also
 165  162           * they may not be any smaller than TAVOR_CQ_MIN_SIZE.  This step is
 166  163           * to round the requested size up to the next highest power-of-2
 167  164           */
 168  165          cq_attr->cq_size = max(cq_attr->cq_size, TAVOR_CQ_MIN_SIZE);
 169  166          log_cq_size = highbit(cq_attr->cq_size);
 170  167  
 171  168          /*
 172  169           * Next we verify that the rounded-up size is valid (i.e. consistent
 173  170           * with the device limits and/or software-configured limits)
 174  171           */
 175  172          if (log_cq_size > state->ts_cfg_profile->cp_log_max_cq_sz) {
 176  173                  /* Set "status" and "errormsg" and goto failure */
 177  174                  TAVOR_TNF_FAIL(IBT_HCA_CQ_EXCEEDED, "max CQ size");
 178  175                  goto cqalloc_fail4;
 179  176          }
 180  177  
 181  178          /*
 182  179           * Allocate the memory for Completion Queue.
 183  180           *
 184  181           * Note: Although we use the common queue allocation routine, we
 185  182           * always specify TAVOR_QUEUE_LOCATION_NORMAL (i.e. CQ located in
 186  183           * kernel system memory) for kernel CQs because it would be
 187  184           * inefficient to have CQs located in DDR memory.  This is primarily
 188  185           * because CQs are read from (by software) more than they are written
 189  186           * to. (We always specify TAVOR_QUEUE_LOCATION_USERLAND for all
 190  187           * user-mappable CQs for a similar reason.)
 191  188           * It is also worth noting that, unlike Tavor QP work queues,
 192  189           * completion queues do not have the same strict alignment
 193  190           * requirements.  It is sufficient for the CQ memory to be both
 194  191           * aligned to and bound to addresses which are a multiple of CQE size.
 195  192           */
 196  193          cq->cq_cqinfo.qa_size = (1 << log_cq_size) * sizeof (tavor_hw_cqe_t);
 197  194          cq->cq_cqinfo.qa_alloc_align = sizeof (tavor_hw_cqe_t);
 198  195          cq->cq_cqinfo.qa_bind_align  = sizeof (tavor_hw_cqe_t);
 199  196          if (cq->cq_is_umap) {
 200  197                  cq->cq_cqinfo.qa_location = TAVOR_QUEUE_LOCATION_USERLAND;

↓ open down ↓

51 lines elided

↑ open up ↑

 201  198          } else {
 202  199                  cq->cq_cqinfo.qa_location = TAVOR_QUEUE_LOCATION_NORMAL;
 203  200          }
 204  201          status = tavor_queue_alloc(state, &cq->cq_cqinfo, sleepflag);
 205  202          if (status != DDI_SUCCESS) {
 206  203                  /* Set "status" and "errormsg" and goto failure */
 207  204                  TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed completion queue");
 208  205                  goto cqalloc_fail4;
 209  206          }
 210  207          buf = (tavor_hw_cqe_t *)cq->cq_cqinfo.qa_buf_aligned;
 211      -        _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*buf))
 212  208  
 213  209          /*
 214  210           * Initialize each of the Completion Queue Entries (CQE) by setting
 215  211           * their ownership to hardware ("owner" bit set to HW).  This is in
 216  212           * preparation for the final transfer of ownership (below) of the
 217  213           * CQ context itself.
 218  214           */
 219  215          for (i = 0; i < (1 << log_cq_size); i++) {
 220  216                  TAVOR_CQE_OWNER_SET_HW(cq, &buf[i]);
 221  217          }

 222  218  
 223  219          /*
 224  220           * Register the memory for the CQ.  The memory for the CQ must
 225  221           * be registered in the Tavor TPT tables.  This gives us the LKey
 226  222           * to specify in the CQ context below.  Note: If this is a user-
 227  223           * mappable CQ, then we will force DDI_DMA_CONSISTENT mapping.
 228  224           */
 229  225          flag = (sleepflag == TAVOR_SLEEP) ?  IBT_MR_SLEEP : IBT_MR_NOSLEEP;
 230  226          mr_attr.mr_vaddr = (uint64_t)(uintptr_t)buf;
 231  227          mr_attr.mr_len   = cq->cq_cqinfo.qa_size;
 232  228          mr_attr.mr_as    = NULL;
 233  229          mr_attr.mr_flags = flag | IBT_MR_ENABLE_LOCAL_WRITE;
 234  230          if (cq->cq_is_umap) {
 235  231                  dma_xfer_mode = DDI_DMA_CONSISTENT;
 236  232          } else {
 237  233                  dma_xfer_mode = state->ts_cfg_profile->cp_streaming_consistent;
 238  234          }
 239  235          if (dma_xfer_mode == DDI_DMA_STREAMING) {
 240  236                  mr_attr.mr_flags |= IBT_MR_NONCOHERENT;

↓ open down ↓

19 lines elided

↑ open up ↑

 241  237          }
 242  238          op.mro_bind_type   = state->ts_cfg_profile->cp_iommu_bypass;
 243  239          op.mro_bind_dmahdl = cq->cq_cqinfo.qa_dmahdl;
 244  240          op.mro_bind_override_addr = 0;
 245  241          status = tavor_mr_register(state, pd, &mr_attr, &mr, &op);
 246  242          if (status != DDI_SUCCESS) {
 247  243                  /* Set "status" and "errormsg" and goto failure */
 248  244                  TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed register mr");
 249  245                  goto cqalloc_fail5;
 250  246          }
 251      -        _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mr))
 252  247          addr = mr->mr_bindinfo.bi_addr;
 253  248          lkey = mr->mr_lkey;
 254  249  
 255  250          /* Determine if later ddi_dma_sync will be necessary */
 256  251          cq_sync = TAVOR_CQ_IS_SYNC_REQ(state, cq->cq_cqinfo);
 257  252  
 258  253          /* Sync entire CQ for use by the hardware (if necessary). */
 259  254          if (cq_sync) {
 260  255                  (void) ddi_dma_sync(mr->mr_bindinfo.bi_dmahdl, 0,
 261  256                      cq->cq_cqinfo.qa_size, DDI_DMA_SYNC_FORDEV);

 262  257          }
 263  258  
 264  259          /*
 265  260           * Fill in the CQC entry.  This is the final step before passing
 266  261           * ownership of the CQC entry to the Tavor hardware.  We use all of
 267  262           * the information collected/calculated above to fill in the
 268  263           * requisite portions of the CQC.  Note: If this CQ is going to be
 269  264           * used for userland access, then we need to set the UAR page number
 270  265           * appropriately (otherwise it's a "don't care")
 271  266           */
 272  267          bzero(&cqc_entry, sizeof (tavor_hw_cqc_t));
 273  268          cq->cq_eqnum            = TAVOR_CQ_EQNUM_GET(cq->cq_cqnum);
 274  269          cq->cq_erreqnum         = TAVOR_CQ_ERREQNUM_GET(cq->cq_cqnum);
 275  270          cqc_entry.xlat          = TAVOR_VA2PA_XLAT_ENABLED;
 276  271          cqc_entry.state         = TAVOR_CQ_DISARMED;
 277  272          cqc_entry.start_addr_h  = (addr >> 32);
 278  273          cqc_entry.start_addr_l  = (addr & 0xFFFFFFFF);
 279  274          cqc_entry.log_cq_sz     = log_cq_size;
 280  275          if (cq->cq_is_umap) {
 281  276                  cqc_entry.usr_page = uarpg;
 282  277          } else {
 283  278                  cqc_entry.usr_page = 0;
 284  279          }
 285  280          cqc_entry.pd            = pd->pd_pdnum;
 286  281          cqc_entry.lkey          = lkey;
 287  282          cqc_entry.e_eqn         = cq->cq_erreqnum;
 288  283          cqc_entry.c_eqn         = cq->cq_eqnum;
 289  284          cqc_entry.cqn           = cq->cq_cqnum;
 290  285  
 291  286          /*
 292  287           * Write the CQC entry to hardware.  Lastly, we pass ownership of
 293  288           * the entry to the hardware (using the Tavor SW2HW_CQ firmware
 294  289           * command).  Note: In general, this operation shouldn't fail.  But
 295  290           * if it does, we have to undo everything we've done above before
 296  291           * returning error.
 297  292           */
 298  293          status = tavor_cmn_ownership_cmd_post(state, SW2HW_CQ, &cqc_entry,
 299  294              sizeof (tavor_hw_cqc_t), cq->cq_cqnum, sleepflag);
 300  295          if (status != TAVOR_CMD_SUCCESS) {
 301  296                  cmn_err(CE_CONT, "Tavor: SW2HW_CQ command failed: %08x\n",
 302  297                      status);
 303  298                  TNF_PROBE_1(tavor_cq_alloc_sw2hw_cq_cmd_fail,
 304  299                      TAVOR_TNF_ERROR, "", tnf_uint, status, status);
 305  300                  /* Set "status" and "errormsg" and goto failure */
 306  301                  TAVOR_TNF_FAIL(ibc_get_ci_failure(0), "tavor SW2HW_CQ command");
 307  302                  goto cqalloc_fail6;
 308  303          }
 309  304  
 310  305          /*
 311  306           * Fill in the rest of the Tavor Completion Queue handle.  Having
 312  307           * successfully transferred ownership of the CQC, we can update the
 313  308           * following fields for use in further operations on the CQ.
 314  309           */
 315  310          cq->cq_cqcrsrcp   = cqc;
 316  311          cq->cq_rsrcp      = rsrc;
 317  312          cq->cq_consindx   = 0;
 318  313          cq->cq_buf        = buf;
 319  314          cq->cq_bufsz      = (1 << log_cq_size);
 320  315          cq->cq_mrhdl      = mr;
 321  316          cq->cq_sync       = cq_sync;
 322  317          cq->cq_refcnt     = 0;
 323  318          cq->cq_is_special = 0;
 324  319          cq->cq_uarpg      = uarpg;
 325  320          cq->cq_umap_dhp   = (devmap_cookie_t)NULL;
 326  321          avl_create(&cq->cq_wrid_wqhdr_avl_tree, tavor_wrid_wqhdr_compare,
 327  322              sizeof (struct tavor_workq_hdr_s),
 328  323              offsetof(struct tavor_workq_hdr_s, wq_avl_link));
 329  324  
 330  325          cq->cq_wrid_reap_head  = NULL;
 331  326          cq->cq_wrid_reap_tail  = NULL;
 332  327          cq->cq_hdlrarg    = (void *)ibt_cqhdl;
 333  328  
 334  329          /*
 335  330           * Put CQ handle in Tavor CQNum-to-CQHdl list.  Then fill in the
 336  331           * "actual_size" and "cqhdl" and return success
 337  332           */
 338  333          ASSERT(state->ts_cqhdl[cqc->tr_indx] == NULL);
 339  334          state->ts_cqhdl[cqc->tr_indx] = cq;
 340  335  
 341  336          /*
 342  337           * If this is a user-mappable CQ, then we need to insert the previously
 343  338           * allocated entry into the "userland resources database".  This will
 344  339           * allow for later lookup during devmap() (i.e. mmap()) calls.
 345  340           */
 346  341          if (cq->cq_is_umap) {
 347  342                  tavor_umap_db_add(umapdb);
 348  343          }
 349  344  
 350  345          /*
 351  346           * Fill in the return arguments (if necessary).  This includes the
 352  347           * real completion queue size.
 353  348           */
 354  349          if (actual_size != NULL) {
 355  350                  *actual_size = (1 << log_cq_size) - 1;
 356  351          }
 357  352          *cqhdl = cq;
 358  353  
 359  354          TAVOR_TNF_EXIT(tavor_cq_alloc);
 360  355          return (DDI_SUCCESS);
 361  356  
 362  357  /*
 363  358   * The following is cleanup for all possible failure cases in this routine
 364  359   */
 365  360  cqalloc_fail6:
 366  361          if (tavor_mr_deregister(state, &mr, TAVOR_MR_DEREG_ALL,
 367  362              sleepflag) != DDI_SUCCESS) {
 368  363                  TAVOR_WARNING(state, "failed to deregister CQ memory");
 369  364          }
 370  365  cqalloc_fail5:
 371  366          tavor_queue_free(state, &cq->cq_cqinfo);
 372  367  cqalloc_fail4:
 373  368          if (cq_is_umap) {
 374  369                  tavor_umap_db_free(umapdb);
 375  370          }
 376  371  cqalloc_fail3:
 377  372          tavor_rsrc_free(state, &rsrc);
 378  373  cqalloc_fail2:
 379  374          tavor_rsrc_free(state, &cqc);
 380  375  cqalloc_fail1:
 381  376          tavor_pd_refcnt_dec(pd);
 382  377  cqalloc_fail:
 383  378          TNF_PROBE_1(tavor_cq_alloc_fail, TAVOR_TNF_ERROR, "",
 384  379              tnf_string, msg, errormsg);
 385  380          TAVOR_TNF_EXIT(tavor_cq_alloc);
 386  381          return (status);
 387  382  }
 388  383  
 389  384  
 390  385  /*
 391  386   * tavor_cq_free()
 392  387   *    Context: Can be called only from user or kernel context.
 393  388   */
 394  389  /* ARGSUSED */
 395  390  int
 396  391  tavor_cq_free(tavor_state_t *state, tavor_cqhdl_t *cqhdl, uint_t sleepflag)
 397  392  {
 398  393          tavor_rsrc_t            *cqc, *rsrc;
 399  394          tavor_umap_db_entry_t   *umapdb;
 400  395          tavor_hw_cqc_t          cqc_entry;
 401  396          tavor_pdhdl_t           pd;
 402  397          tavor_mrhdl_t           mr;
 403  398          tavor_cqhdl_t           cq;
 404  399          uint32_t                cqnum;
 405  400          uint64_t                value;
 406  401          uint_t                  maxprot;
 407  402          int                     status;
 408  403  
 409  404          TAVOR_TNF_ENTER(tavor_cq_free);
 410  405  
 411  406          /*
 412  407           * Pull all the necessary information from the Tavor Completion Queue
 413  408           * handle.  This is necessary here because the resource for the
 414  409           * CQ handle is going to be freed up as part of this operation.
 415  410           */
 416  411          cq      = *cqhdl;
 417  412          mutex_enter(&cq->cq_lock);
 418  413          cqc     = cq->cq_cqcrsrcp;
 419  414          rsrc    = cq->cq_rsrcp;
 420  415          pd      = state->ts_pdhdl_internal;
 421  416          mr      = cq->cq_mrhdl;
 422  417          cqnum   = cq->cq_cqnum;
 423  418  
 424  419          /*
 425  420           * If there are work queues still associated with the CQ, then return
 426  421           * an error.  Otherwise, we will be holding the CQ lock.
 427  422           */
 428  423          if (cq->cq_refcnt != 0) {
 429  424                  mutex_exit(&cq->cq_lock);
 430  425                  TNF_PROBE_1(tavor_cq_free_refcnt_fail, TAVOR_TNF_ERROR, "",
 431  426                      tnf_int, refcnt, cq->cq_refcnt);
 432  427                  TAVOR_TNF_EXIT(tavor_cq_free);
 433  428                  return (IBT_CQ_BUSY);
 434  429          }
 435  430  
 436  431          /*
 437  432           * If this was a user-mappable CQ, then we need to remove its entry
 438  433           * from the "userland resources database".  If it is also currently
 439  434           * mmap()'d out to a user process, then we need to call
 440  435           * devmap_devmem_remap() to remap the CQ memory to an invalid mapping.
 441  436           * We also need to invalidate the CQ tracking information for the
 442  437           * user mapping.
 443  438           */
 444  439          if (cq->cq_is_umap) {
 445  440                  status = tavor_umap_db_find(state->ts_instance, cqnum,
 446  441                      MLNX_UMAP_CQMEM_RSRC, &value, TAVOR_UMAP_DB_REMOVE,
 447  442                      &umapdb);
 448  443                  if (status != DDI_SUCCESS) {
 449  444                          mutex_exit(&cq->cq_lock);
 450  445                          TAVOR_WARNING(state, "failed to find in database");
 451  446                          TAVOR_TNF_EXIT(tavor_cq_free);
 452  447                          return (ibc_get_ci_failure(0));
 453  448                  }
 454  449                  tavor_umap_db_free(umapdb);
 455  450                  if (cq->cq_umap_dhp != NULL) {
 456  451                          maxprot = (PROT_READ | PROT_WRITE | PROT_USER);
 457  452                          status = devmap_devmem_remap(cq->cq_umap_dhp,
 458  453                              state->ts_dip, 0, 0, cq->cq_cqinfo.qa_size,
 459  454                              maxprot, DEVMAP_MAPPING_INVALID, NULL);
 460  455                          if (status != DDI_SUCCESS) {
 461  456                                  mutex_exit(&cq->cq_lock);
 462  457                                  TAVOR_WARNING(state, "failed in CQ memory "
 463  458                                      "devmap_devmem_remap()");
 464  459                                  TAVOR_TNF_EXIT(tavor_cq_free);
 465  460                                  return (ibc_get_ci_failure(0));
 466  461                          }
 467  462                          cq->cq_umap_dhp = (devmap_cookie_t)NULL;
 468  463                  }
 469  464          }
 470  465  
 471  466          /*
 472  467           * Put NULL into the Tavor CQNum-to-CQHdl list.  This will allow any
 473  468           * in-progress events to detect that the CQ corresponding to this
 474  469           * number has been freed.
 475  470           */
 476  471          state->ts_cqhdl[cqc->tr_indx] = NULL;

↓ open down ↓

215 lines elided

↑ open up ↑

 477  472  
 478  473          /*
 479  474           * While we hold the CQ lock, do a "forced reap" of the workQ WRID
 480  475           * list.  This cleans up all the structures associated with the WRID
 481  476           * processing for this CQ.  Once we complete, drop the lock and finish
 482  477           * the deallocation of the CQ.
 483  478           */
 484  479          tavor_wrid_cq_force_reap(cq);
 485  480  
 486  481          mutex_exit(&cq->cq_lock);
 487      -        _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*cq))
 488  482  
 489  483          /*
 490  484           * Reclaim CQC entry from hardware (using the Tavor HW2SW_CQ
 491  485           * firmware command).  If the ownership transfer fails for any reason,
 492  486           * then it is an indication that something (either in HW or SW) has
 493  487           * gone seriously wrong.
 494  488           */
 495  489          status = tavor_cmn_ownership_cmd_post(state, HW2SW_CQ, &cqc_entry,
 496  490              sizeof (tavor_hw_cqc_t), cqnum, sleepflag);
 497  491          if (status != TAVOR_CMD_SUCCESS) {

 498  492                  TAVOR_WARNING(state, "failed to reclaim CQC ownership");
 499  493                  cmn_err(CE_CONT, "Tavor: HW2SW_CQ command failed: %08x\n",
 500  494                      status);
 501  495                  TNF_PROBE_1(tavor_cq_free_hw2sw_cq_cmd_fail,
 502  496                      TAVOR_TNF_ERROR, "", tnf_uint, status, status);
 503  497                  TAVOR_TNF_EXIT(tavor_cq_free);
 504  498                  return (ibc_get_ci_failure(0));
 505  499          }
 506  500  
 507  501          /*
 508  502           * Deregister the memory for the Completion Queue.  If this fails
 509  503           * for any reason, then it is an indication that something (either
 510  504           * in HW or SW) has gone seriously wrong.  So we print a warning
 511  505           * message and return.
 512  506           */
 513  507          status = tavor_mr_deregister(state, &mr, TAVOR_MR_DEREG_ALL,
 514  508              sleepflag);
 515  509          if (status != DDI_SUCCESS) {
 516  510                  TAVOR_WARNING(state, "failed to deregister CQ memory");
 517  511                  TNF_PROBE_0(tavor_cq_free_dereg_mr_fail, TAVOR_TNF_ERROR, "");
 518  512                  TAVOR_TNF_EXIT(tavor_cq_free);
 519  513                  return (ibc_get_ci_failure(0));
 520  514          }
 521  515  
 522  516          /* Free the memory for the CQ */
 523  517          tavor_queue_free(state, &cq->cq_cqinfo);
 524  518  
 525  519          /* Free the Tavor Completion Queue handle */
 526  520          tavor_rsrc_free(state, &rsrc);
 527  521  
 528  522          /* Free up the CQC entry resource */
 529  523          tavor_rsrc_free(state, &cqc);
 530  524  
 531  525          /* Decrement the reference count on the protection domain (PD) */
 532  526          tavor_pd_refcnt_dec(pd);
 533  527  
 534  528          /* Set the cqhdl pointer to NULL and return success */
 535  529          *cqhdl = NULL;
 536  530  
 537  531          TAVOR_TNF_EXIT(tavor_cq_free);
 538  532          return (DDI_SUCCESS);
 539  533  }
 540  534  
 541  535  
 542  536  /*
 543  537   * tavor_cq_resize()
 544  538   *    Context: Can be called only from user or kernel context.
 545  539   */
 546  540  int
 547  541  tavor_cq_resize(tavor_state_t *state, tavor_cqhdl_t cq, uint_t req_size,
 548  542      uint_t *actual_size, uint_t sleepflag)
 549  543  {
 550  544          tavor_hw_cqc_t          cqc_entry;
 551  545          tavor_qalloc_info_t     new_cqinfo, old_cqinfo;
 552  546          ibt_mr_attr_t           mr_attr;
 553  547          tavor_mr_options_t      op;
 554  548          tavor_pdhdl_t           pd;
 555  549          tavor_mrhdl_t           mr, mr_old;
 556  550          tavor_hw_cqe_t          *buf;
 557  551          uint32_t                new_prod_indx, old_cons_indx;
 558  552          uint_t                  dma_xfer_mode, cq_sync, log_cq_size, maxprot;
 559  553          int                     status, i, flag;
 560  554          char                    *errormsg;
 561  555  
 562  556          TAVOR_TNF_ENTER(tavor_cq_resize);
 563  557  
 564  558          /* Use the internal protection domain (PD) for CQs */
 565  559          pd = state->ts_pdhdl_internal;
 566  560  
 567  561          /*
 568  562           * Calculate the appropriate size for the new resized completion queue.
 569  563           * Note:  All Tavor CQs must be a power-of-2 minus 1 in size.  Also
 570  564           * they may not be any smaller than TAVOR_CQ_MIN_SIZE.  This step is
 571  565           * to round the requested size up to the next highest power-of-2
 572  566           */
 573  567          req_size = max(req_size, TAVOR_CQ_MIN_SIZE);
 574  568          log_cq_size = highbit(req_size);
 575  569  
 576  570          /*
 577  571           * Next we verify that the rounded-up size is valid (i.e. consistent
 578  572           * with the device limits and/or software-configured limits)
 579  573           */
 580  574          if (log_cq_size > state->ts_cfg_profile->cp_log_max_cq_sz) {
 581  575                  /* Set "status" and "errormsg" and goto failure */
 582  576                  TAVOR_TNF_FAIL(IBT_HCA_CQ_EXCEEDED, "max CQ size");
 583  577                  goto cqresize_fail;
 584  578          }
 585  579  
 586  580          /*
 587  581           * Allocate the memory for newly resized Completion Queue.
 588  582           *
 589  583           * Note: Although we use the common queue allocation routine, we
 590  584           * always specify TAVOR_QUEUE_LOCATION_NORMAL (i.e. CQ located in
 591  585           * kernel system memory) for kernel CQs because it would be
 592  586           * inefficient to have CQs located in DDR memory.  This is the same
 593  587           * as we do when we first allocate completion queues primarily
 594  588           * because CQs are read from (by software) more than they are written
 595  589           * to. (We always specify TAVOR_QUEUE_LOCATION_USERLAND for all
 596  590           * user-mappable CQs for a similar reason.)
 597  591           * It is also worth noting that, unlike Tavor QP work queues,
 598  592           * completion queues do not have the same strict alignment
 599  593           * requirements.  It is sufficient for the CQ memory to be both
 600  594           * aligned to and bound to addresses which are a multiple of CQE size.
 601  595           */
 602  596          new_cqinfo.qa_size = (1 << log_cq_size) * sizeof (tavor_hw_cqe_t);
 603  597          new_cqinfo.qa_alloc_align = sizeof (tavor_hw_cqe_t);
 604  598          new_cqinfo.qa_bind_align  = sizeof (tavor_hw_cqe_t);
 605  599          if (cq->cq_is_umap) {
 606  600                  new_cqinfo.qa_location = TAVOR_QUEUE_LOCATION_USERLAND;

↓ open down ↓

109 lines elided

↑ open up ↑

 607  601          } else {
 608  602                  new_cqinfo.qa_location = TAVOR_QUEUE_LOCATION_NORMAL;
 609  603          }
 610  604          status = tavor_queue_alloc(state, &new_cqinfo, sleepflag);
 611  605          if (status != DDI_SUCCESS) {
 612  606                  /* Set "status" and "errormsg" and goto failure */
 613  607                  TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed completion queue");
 614  608                  goto cqresize_fail;
 615  609          }
 616  610          buf = (tavor_hw_cqe_t *)new_cqinfo.qa_buf_aligned;
 617      -        _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*buf))
 618  611  
 619  612          /*
 620  613           * Initialize each of the Completion Queue Entries (CQE) by setting
 621  614           * their ownership to hardware ("owner" bit set to HW).  This is in
 622  615           * preparation for the final resize operation (below).
 623  616           */
 624  617          for (i = 0; i < (1 << log_cq_size); i++) {
 625  618                  TAVOR_CQE_OWNER_SET_HW(cq, &buf[i]);
 626  619          }
 627  620

 628  621          /*
 629  622           * Register the memory for the CQ.  The memory for the CQ must
 630  623           * be registered in the Tavor TPT tables.  This gives us the LKey
 631  624           * to specify in the CQ context below.
 632  625           */
 633  626          flag = (sleepflag == TAVOR_SLEEP) ? IBT_MR_SLEEP : IBT_MR_NOSLEEP;
 634  627          mr_attr.mr_vaddr = (uint64_t)(uintptr_t)buf;
 635  628          mr_attr.mr_len   = new_cqinfo.qa_size;
 636  629          mr_attr.mr_as    = NULL;
 637  630          mr_attr.mr_flags = flag | IBT_MR_ENABLE_LOCAL_WRITE;
 638  631          if (cq->cq_is_umap) {
 639  632                  dma_xfer_mode = DDI_DMA_CONSISTENT;
 640  633          } else {
 641  634                  dma_xfer_mode = state->ts_cfg_profile->cp_streaming_consistent;
 642  635          }
 643  636          if (dma_xfer_mode == DDI_DMA_STREAMING) {
 644  637                  mr_attr.mr_flags |= IBT_MR_NONCOHERENT;
 645  638          }

↓ open down ↓

18 lines elided

↑ open up ↑

 646  639          op.mro_bind_type = state->ts_cfg_profile->cp_iommu_bypass;
 647  640          op.mro_bind_dmahdl = new_cqinfo.qa_dmahdl;
 648  641          op.mro_bind_override_addr = 0;
 649  642          status = tavor_mr_register(state, pd, &mr_attr, &mr, &op);
 650  643          if (status != DDI_SUCCESS) {
 651  644                  tavor_queue_free(state, &new_cqinfo);
 652  645                  /* Set "status" and "errormsg" and goto failure */
 653  646                  TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed register mr");
 654  647                  goto cqresize_fail;
 655  648          }
 656      -        _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mr))
 657  649  
 658  650          /* Determine if later ddi_dma_sync will be necessary */
 659  651          cq_sync = TAVOR_CQ_IS_SYNC_REQ(state, new_cqinfo);
 660  652  
 661  653          /* Sync entire "new" CQ for use by hardware (if necessary) */
 662  654          if (cq_sync) {
 663  655                  (void) ddi_dma_sync(mr->mr_bindinfo.bi_dmahdl, 0,
 664  656                      new_cqinfo.qa_size, DDI_DMA_SYNC_FORDEV);
 665  657          }
 666  658

 667  659          /*
 668  660           * Now we grab the CQ lock.  Since we will be updating the actual
 669  661           * CQ location and the producer/consumer indexes, we should hold
 670  662           * the lock.
 671  663           *
 672  664           * We do a TAVOR_NOSLEEP here (and below), though, because we are
 673  665           * holding the "cq_lock" and if we got raised to interrupt level
 674  666           * by priority inversion, we would not want to block in this routine
 675  667           * waiting for success.
 676  668           */
 677  669          mutex_enter(&cq->cq_lock);
 678  670  
 679  671          /*
 680  672           * Determine the current CQ "consumer index".
 681  673           *
 682  674           * Note:  This will depend on whether the CQ had previously been
 683  675           * mapped for user access or whether it is a kernel CQ.  If this
 684  676           * is a kernel CQ, then all PollCQ() operations have come through
 685  677           * the IBTF and, hence, the driver's CQ state structure will
 686  678           * contain the current consumer index.  If, however, the user has
 687  679           * accessed this CQ by bypassing the driver (OS-bypass), then we
 688  680           * need to query the firmware to determine the current CQ consumer
 689  681           * index.  This also assumes that the user process will not continue
 690  682           * to consume entries while at the same time doing the ResizeCQ()
 691  683           * operation.  If the user process does not guarantee this, then it
 692  684           * may see duplicate or missed completions.  But under no
 693  685           * circumstances should this panic the system.
 694  686           */
 695  687          if (cq->cq_is_umap) {
 696  688                  status = tavor_cmn_query_cmd_post(state, QUERY_CQ,
 697  689                      cq->cq_cqnum, &cqc_entry, sizeof (tavor_hw_cqc_t),
 698  690                      TAVOR_NOSLEEP);
 699  691                  if (status != TAVOR_CMD_SUCCESS) {
 700  692                          /* Query CQ has failed, drop CQ lock and cleanup */
 701  693                          mutex_exit(&cq->cq_lock);
 702  694                          if (tavor_mr_deregister(state, &mr, TAVOR_MR_DEREG_ALL,
 703  695                              sleepflag) != DDI_SUCCESS) {
 704  696                                  TAVOR_WARNING(state, "failed to deregister "
 705  697                                      "CQ memory");
 706  698                          }
 707  699                          tavor_queue_free(state, &new_cqinfo);
 708  700                          TAVOR_WARNING(state, "failed to find in database");
 709  701  
 710  702                          /* Set "status" and "errormsg" and goto failure */
 711  703                          TAVOR_TNF_FAIL(ibc_get_ci_failure(0),
 712  704                              "failed umap lookup");
 713  705                          goto cqresize_fail;
 714  706                  }
 715  707                  old_cons_indx = cqc_entry.cons_indx;
 716  708          } else {
 717  709                  old_cons_indx = cq->cq_consindx;
 718  710          }
 719  711  
 720  712          /*
 721  713           * Fill in the CQC entry.  For the resize operation this is the
 722  714           * final step before attempting the resize operation on the CQC entry.
 723  715           * We use all of the information collected/calculated above to fill
 724  716           * in the requisite portions of the CQC.
 725  717           */
 726  718          bzero(&cqc_entry, sizeof (tavor_hw_cqc_t));
 727  719          cqc_entry.start_addr_h  = (mr->mr_bindinfo.bi_addr >> 32);
 728  720          cqc_entry.start_addr_l  = (mr->mr_bindinfo.bi_addr & 0xFFFFFFFF);
 729  721          cqc_entry.log_cq_sz     = log_cq_size;
 730  722          cqc_entry.lkey          = mr->mr_lkey;
 731  723  
 732  724          /*
 733  725           * Write the CQC entry to hardware.  Lastly, we pass ownership of
 734  726           * the entry to the hardware (using the Tavor RESIZE_CQ firmware
 735  727           * command).  Note: In general, this operation shouldn't fail.  But
 736  728           * if it does, we have to undo everything we've done above before
 737  729           * returning error.  Also note that the status returned may indicate
 738  730           * the code to return to the IBTF.
 739  731           */
 740  732          status = tavor_resize_cq_cmd_post(state, &cqc_entry, cq->cq_cqnum,
 741  733              &new_prod_indx, TAVOR_CMD_NOSLEEP_SPIN);
 742  734          if (status != TAVOR_CMD_SUCCESS) {
 743  735                  /* Resize attempt has failed, drop CQ lock and cleanup */
 744  736                  mutex_exit(&cq->cq_lock);
 745  737                  if (tavor_mr_deregister(state, &mr, TAVOR_MR_DEREG_ALL,
 746  738                      sleepflag) != DDI_SUCCESS) {
 747  739                          TAVOR_WARNING(state, "failed to deregister CQ memory");
 748  740                  }
 749  741                  tavor_queue_free(state, &new_cqinfo);
 750  742                  if (status == TAVOR_CMD_BAD_SIZE) {
 751  743                          TAVOR_TNF_EXIT(tavor_cq_resize);
 752  744                          return (IBT_CQ_SZ_INSUFFICIENT);
 753  745                  } else {
 754  746                          cmn_err(CE_CONT, "Tavor: RESIZE_CQ command failed: "
 755  747                              "%08x\n", status);
 756  748                          TNF_PROBE_1(tavor_cq_resize_cq_cmd_fail,
 757  749                              TAVOR_TNF_ERROR, "", tnf_uint, status, status);
 758  750                          TAVOR_TNF_EXIT(tavor_cq_resize);
 759  751                          return (ibc_get_ci_failure(0));
 760  752                  }
 761  753          }
 762  754  
 763  755          /*
 764  756           * The CQ resize attempt was successful.  Before dropping the CQ lock,
 765  757           * copy all of the CQEs from the "old" CQ into the "new" CQ.  Note:
 766  758           * the Tavor firmware guarantees us that sufficient space is set aside
 767  759           * in the "new" CQ to handle any un-polled CQEs from the "old" CQ.
 768  760           * The two parameters to this helper function ("old_cons_indx" and
 769  761           * "new_prod_indx") essentially indicate the starting index and number
 770  762           * of any CQEs that might remain in the "old" CQ memory.
 771  763           */
 772  764          tavor_cq_resize_helper(cq, buf, old_cons_indx, new_prod_indx);
 773  765  
 774  766          /* Sync entire "new" CQ for use by hardware (if necessary) */
 775  767          if (cq_sync) {
 776  768                  (void) ddi_dma_sync(mr->mr_bindinfo.bi_dmahdl, 0,
 777  769                      new_cqinfo.qa_size, DDI_DMA_SYNC_FORDEV);
 778  770          }
 779  771  
 780  772          /*
 781  773           * Update the Tavor Completion Queue handle with all the new
 782  774           * information.  At the same time, save away all the necessary
 783  775           * information for freeing up the old resources
 784  776           */
 785  777          mr_old           = cq->cq_mrhdl;
 786  778          old_cqinfo       = cq->cq_cqinfo;
 787  779          cq->cq_cqinfo    = new_cqinfo;
 788  780          cq->cq_consindx  = 0;
 789  781          cq->cq_buf       = buf;
 790  782          cq->cq_bufsz     = (1 << log_cq_size);
 791  783          cq->cq_mrhdl     = mr;
 792  784          cq->cq_sync      = cq_sync;
 793  785  
 794  786          /*
 795  787           * If "old" CQ was a user-mappable CQ that is currently mmap()'d out
 796  788           * to a user process, then we need to call devmap_devmem_remap() to
 797  789           * invalidate the mapping to the CQ memory.  We also need to
 798  790           * invalidate the CQ tracking information for the user mapping.
 799  791           */
 800  792          if ((cq->cq_is_umap) && (cq->cq_umap_dhp != NULL)) {
 801  793                  maxprot = (PROT_READ | PROT_WRITE | PROT_USER);
 802  794                  status = devmap_devmem_remap(cq->cq_umap_dhp,
 803  795                      state->ts_dip, 0, 0, cq->cq_cqinfo.qa_size, maxprot,
 804  796                      DEVMAP_MAPPING_INVALID, NULL);
 805  797                  if (status != DDI_SUCCESS) {
 806  798                          mutex_exit(&cq->cq_lock);
 807  799                          TAVOR_WARNING(state, "failed in CQ memory "
 808  800                              "devmap_devmem_remap()");
 809  801                          TAVOR_TNF_EXIT(tavor_cq_free);
 810  802                          return (ibc_get_ci_failure(0));
 811  803                  }
 812  804                  cq->cq_umap_dhp = (devmap_cookie_t)NULL;
 813  805          }
 814  806  
 815  807          /*
 816  808           * Drop the CQ lock now.  The only thing left to do is to free up
 817  809           * the old resources.
 818  810           */
 819  811          mutex_exit(&cq->cq_lock);
 820  812  
 821  813          /*
 822  814           * Deregister the memory for the old Completion Queue.  Note: We
 823  815           * really can't return error here because we have no good way to
 824  816           * cleanup.  Plus, the deregistration really shouldn't ever happen.
 825  817           * So, if it does, it is an indication that something has gone
 826  818           * seriously wrong.  So we print a warning message and return error
 827  819           * (knowing, of course, that the "old" CQ memory will be leaked)
 828  820           */
 829  821          status = tavor_mr_deregister(state, &mr_old, TAVOR_MR_DEREG_ALL,
 830  822              sleepflag);
 831  823          if (status != DDI_SUCCESS) {
 832  824                  TAVOR_WARNING(state, "failed to deregister old CQ memory");
 833  825                  /* Set "status" and "errormsg" and goto failure */
 834  826                  TAVOR_TNF_FAIL(ibc_get_ci_failure(0),
 835  827                      "failed deregister mr (old)");
 836  828                  goto cqresize_fail;
 837  829          }
 838  830  
 839  831          /* Free the memory for the old CQ */
 840  832          tavor_queue_free(state, &old_cqinfo);
 841  833  
 842  834          /*
 843  835           * Fill in the return arguments (if necessary).  This includes the
 844  836           * real new completion queue size.
 845  837           */
 846  838          if (actual_size != NULL) {
 847  839                  *actual_size = (1 << log_cq_size) - 1;
 848  840          }
 849  841  
 850  842          TAVOR_TNF_EXIT(tavor_cq_resize);
 851  843          return (DDI_SUCCESS);
 852  844  
 853  845  cqresize_fail:
 854  846          TNF_PROBE_1(tavor_cq_resize_fail, TAVOR_TNF_ERROR, "",
 855  847              tnf_string, msg, errormsg);
 856  848          TAVOR_TNF_EXIT(tavor_cq_resize);
 857  849          return (status);
 858  850  }
 859  851  
 860  852  
 861  853  /*
 862  854   * tavor_cq_notify()
 863  855   *    Context: Can be called from interrupt or base context.
 864  856   */
 865  857  int
 866  858  tavor_cq_notify(tavor_state_t *state, tavor_cqhdl_t cq,
 867  859      ibt_cq_notify_flags_t flags)
 868  860  {
 869  861          uint_t          cqnum;
 870  862  
 871  863          TAVOR_TNF_ENTER(tavor_cq_notify);
 872  864  
 873  865          /*
 874  866           * Determine if we are trying to get the next completion or the next
 875  867           * "solicited" completion.  Then hit the appropriate doorbell.
 876  868           *
 877  869           * NOTE: Please see the comment in tavor_event.c:tavor_eq_poll
 878  870           * regarding why we do not have to do an extra PIO read here, and we
 879  871           * will not lose an event after writing this doorbell.
 880  872           */
 881  873          cqnum = cq->cq_cqnum;
 882  874          if (flags == IBT_NEXT_COMPLETION) {
 883  875                  tavor_cq_doorbell(state, TAVOR_CQDB_NOTIFY_CQ, cqnum,
 884  876                      TAVOR_CQDB_DEFAULT_PARAM);
 885  877  
 886  878          } else if (flags == IBT_NEXT_SOLICITED) {
 887  879                  tavor_cq_doorbell(state, TAVOR_CQDB_NOTIFY_CQ_SOLICIT,
 888  880                      cqnum, TAVOR_CQDB_DEFAULT_PARAM);
 889  881  
 890  882          } else {
 891  883                  TNF_PROBE_1(tavor_cq_notify_invflags_fail, TAVOR_TNF_ERROR, "",
 892  884                      tnf_int, flags, flags);
 893  885                  TAVOR_TNF_EXIT(tavor_cq_notify);
 894  886                  return (IBT_CQ_NOTIFY_TYPE_INVALID);
 895  887          }
 896  888  
 897  889          TAVOR_TNF_EXIT(tavor_cq_notify);
 898  890          return (DDI_SUCCESS);
 899  891  }
 900  892  
 901  893  
 902  894  /*
 903  895   * tavor_cq_poll()
 904  896   *    Context: Can be called from interrupt or base context.
 905  897   */
 906  898  int
 907  899  tavor_cq_poll(tavor_state_t *state, tavor_cqhdl_t cq, ibt_wc_t *wc_p,
 908  900      uint_t num_wc, uint_t *num_polled)
 909  901  {
 910  902          tavor_hw_cqe_t  *cqe;
 911  903          uint32_t        cons_indx, wrap_around_mask;
 912  904          uint32_t        polled_cnt, num_to_increment;
 913  905          int             status;
 914  906  
 915  907          TAVOR_TNF_ENTER(tavor_cq_poll);
 916  908  
 917  909          /*
 918  910           * Check for user-mappable CQ memory.  Note:  We do not allow kernel
 919  911           * clients to poll CQ memory that is accessible directly by the user.
 920  912           * If the CQ memory is user accessible, then return an error.
 921  913           */
 922  914          if (cq->cq_is_umap) {
 923  915                  TNF_PROBE_0(tavor_cq_poll_inv_usrmapped_type,
 924  916                      TAVOR_TNF_ERROR, "");
 925  917                  TAVOR_TNF_EXIT(tavor_cq_poll);
 926  918                  return (IBT_CQ_HDL_INVALID);
 927  919          }
 928  920  
 929  921          mutex_enter(&cq->cq_lock);
 930  922  
 931  923          /* Get the consumer index */
 932  924          cons_indx = cq->cq_consindx;
 933  925  
 934  926          /*
 935  927           * Calculate the wrap around mask.  Note: This operation only works
 936  928           * because all Tavor completion queues have power-of-2 sizes
 937  929           */
 938  930          wrap_around_mask = (cq->cq_bufsz - 1);
 939  931  
 940  932          /* Calculate the pointer to the first CQ entry */
 941  933          cqe = &cq->cq_buf[cons_indx];
 942  934  
 943  935          /* Sync the current CQE to read */
 944  936          tavor_cqe_sync(cq, cqe, DDI_DMA_SYNC_FORCPU);
 945  937  
 946  938          /*
 947  939           * Keep pulling entries from the CQ until we find an entry owned by
 948  940           * the hardware.  As long as there the CQE's owned by SW, process
 949  941           * each entry by calling tavor_cq_cqe_consume() and updating the CQ
 950  942           * consumer index.  Note:  We only update the consumer index if
 951  943           * tavor_cq_cqe_consume() returns TAVOR_CQ_SYNC_AND_DB.  Otherwise,
 952  944           * it indicates that we are going to "recycle" the CQE (probably
 953  945           * because it is a error CQE and corresponds to more than one
 954  946           * completion).
 955  947           */
 956  948          polled_cnt = 0;
 957  949          while (TAVOR_CQE_OWNER_IS_SW(cq, cqe)) {
 958  950                  status = tavor_cq_cqe_consume(state, cq, cqe,
 959  951                      &wc_p[polled_cnt++]);
 960  952                  if (status == TAVOR_CQ_SYNC_AND_DB) {
 961  953                          /* Reset entry to hardware ownership */
 962  954                          TAVOR_CQE_OWNER_SET_HW(cq, cqe);
 963  955  
 964  956                          /* Sync the current CQE for device */
 965  957                          tavor_cqe_sync(cq, cqe, DDI_DMA_SYNC_FORDEV);
 966  958  
 967  959                          /* Increment the consumer index */
 968  960                          cons_indx = (cons_indx + 1) & wrap_around_mask;
 969  961  
 970  962                          /* Update the pointer to the next CQ entry */
 971  963                          cqe = &cq->cq_buf[cons_indx];
 972  964  
 973  965                          /* Sync the next CQE to read */
 974  966                          tavor_cqe_sync(cq, cqe, DDI_DMA_SYNC_FORCPU);
 975  967                  }
 976  968  
 977  969                  /*
 978  970                   * If we have run out of space to store work completions,
 979  971                   * then stop and return the ones we have pulled of the CQ.
 980  972                   */
 981  973                  if (polled_cnt >= num_wc) {
 982  974                          break;
 983  975                  }
 984  976          }
 985  977  
 986  978          /*
 987  979           * Now we only ring the doorbell (to update the consumer index) if
 988  980           * we've actually consumed a CQ entry.  If we have, for example,
 989  981           * pulled from a CQE that we are still in the process of "recycling"
 990  982           * for error purposes, then we would not update the consumer index.
 991  983           */
 992  984          if ((polled_cnt != 0) && (cq->cq_consindx != cons_indx)) {
 993  985                  /*
 994  986                   * Post doorbell to update the consumer index.  Doorbell
 995  987                   * value indicates number of entries consumed (minus 1)
 996  988                   */
 997  989                  if (cons_indx > cq->cq_consindx) {
 998  990                          num_to_increment = (cons_indx - cq->cq_consindx) - 1;
 999  991                  } else {
1000  992                          num_to_increment = ((cons_indx + cq->cq_bufsz) -
1001  993                              cq->cq_consindx) - 1;
1002  994                  }
1003  995                  cq->cq_consindx = cons_indx;
1004  996                  tavor_cq_doorbell(state, TAVOR_CQDB_INCR_CONSINDX,
1005  997                      cq->cq_cqnum, num_to_increment);
1006  998  
1007  999          } else if (polled_cnt == 0) {
1008 1000                  /*
1009 1001                   * If the CQ is empty, we can try to free up some of the WRID
1010 1002                   * list containers.  See tavor_wr.c for more details on this
1011 1003                   * operation.
1012 1004                   */
1013 1005                  tavor_wrid_cq_reap(cq);
1014 1006          }
1015 1007  
1016 1008          mutex_exit(&cq->cq_lock);
1017 1009  
1018 1010          /* Set "num_polled" (if necessary) */
1019 1011          if (num_polled != NULL) {
1020 1012                  *num_polled = polled_cnt;
1021 1013          }
1022 1014  
1023 1015          /* Set CQ_EMPTY condition if needed, otherwise return success */
1024 1016          if (polled_cnt == 0) {
1025 1017                  status = IBT_CQ_EMPTY;
1026 1018          } else {
1027 1019                  status = DDI_SUCCESS;
1028 1020          }
1029 1021  
1030 1022          /*
1031 1023           * Check if the system is currently panicking.  If it is, then call
1032 1024           * the Tavor interrupt service routine.  This step is necessary here
1033 1025           * because we might be in a polled I/O mode and without the call to
1034 1026           * tavor_isr() - and its subsequent calls to poll and rearm each
1035 1027           * event queue - we might overflow our EQs and render the system
1036 1028           * unable to sync/dump.
1037 1029           */
1038 1030          if (ddi_in_panic() != 0) {
1039 1031                  (void) tavor_isr((caddr_t)state, (caddr_t)NULL);
1040 1032          }
1041 1033  
1042 1034          TAVOR_TNF_EXIT(tavor_cq_poll);
1043 1035          return (status);
1044 1036  }
1045 1037  
1046 1038  
1047 1039  /*
1048 1040   * tavor_cq_handler()
1049 1041   *    Context: Only called from interrupt context
1050 1042   */
1051 1043  int
1052 1044  tavor_cq_handler(tavor_state_t *state, tavor_eqhdl_t eq,
1053 1045      tavor_hw_eqe_t *eqe)
1054 1046  {
1055 1047          tavor_cqhdl_t           cq;
1056 1048          uint_t                  cqnum;
1057 1049          uint_t                  eqe_evttype;
1058 1050  
1059 1051          TAVOR_TNF_ENTER(tavor_cq_handler);
1060 1052  
1061 1053          eqe_evttype = TAVOR_EQE_EVTTYPE_GET(eq, eqe);
1062 1054  
1063 1055          ASSERT(eqe_evttype == TAVOR_EVT_COMPLETION ||
1064 1056              eqe_evttype == TAVOR_EVT_EQ_OVERFLOW);
1065 1057  
1066 1058          if (eqe_evttype == TAVOR_EVT_EQ_OVERFLOW) {
1067 1059                  TNF_PROBE_0(tavor_cq_handler_eq_overflow_condition,
1068 1060                      TAVOR_TNF_ERROR, "");
1069 1061                  tavor_eq_overflow_handler(state, eq, eqe);
1070 1062  
1071 1063                  TAVOR_TNF_EXIT(tavor_cq_handler);
1072 1064                  return (DDI_FAILURE);
1073 1065          }
1074 1066  
1075 1067  
1076 1068          /* Get the CQ handle from CQ number in event descriptor */
1077 1069          cqnum = TAVOR_EQE_CQNUM_GET(eq, eqe);
1078 1070          cq = tavor_cqhdl_from_cqnum(state, cqnum);
1079 1071  
1080 1072          /*
1081 1073           * Post the EQ doorbell to move the CQ to the "disarmed" state.
1082 1074           * This operation is to enable subsequent CQ doorbells (e.g. those
1083 1075           * that can be rung by tavor_cq_notify() above) to rearm the CQ.
1084 1076           */
1085 1077          tavor_eq_doorbell(state, TAVOR_EQDB_DISARM_CQ, eq->eq_eqnum, cqnum);
1086 1078  
1087 1079          /*
1088 1080           * If the CQ handle is NULL, this is probably an indication
1089 1081           * that the CQ has been freed already.  In which case, we
1090 1082           * should not deliver this event.
1091 1083           *
1092 1084           * We also check that the CQ number in the handle is the
1093 1085           * same as the CQ number in the event queue entry.  This
1094 1086           * extra check allows us to handle the case where a CQ was
1095 1087           * freed and then allocated again in the time it took to
1096 1088           * handle the event queue processing.  By constantly incrementing
1097 1089           * the non-constrained portion of the CQ number every time
1098 1090           * a new CQ is allocated, we mitigate (somewhat) the chance
1099 1091           * that a stale event could be passed to the client's CQ
1100 1092           * handler.
1101 1093           *
1102 1094           * Lastly, we check if "ts_ibtfpriv" is NULL.  If it is then it
1103 1095           * means that we've have either received this event before we
1104 1096           * finished attaching to the IBTF or we've received it while we
1105 1097           * are in the process of detaching.
1106 1098           */
1107 1099          if ((cq != NULL) && (cq->cq_cqnum == cqnum) &&
1108 1100              (state->ts_ibtfpriv != NULL)) {
1109 1101                  TAVOR_DO_IBTF_CQ_CALLB(state, cq);
1110 1102          } else {
1111 1103                  TNF_PROBE_2(tavor_cq_handler_dropped_event,
1112 1104                      TAVOR_TNF_ERROR, "", tnf_uint, ev_cqnum, cqnum,
1113 1105                      tnf_uint, hdl_cqnum, cqnum);
1114 1106          }
1115 1107  
1116 1108          TAVOR_TNF_EXIT(tavor_cq_handler);
1117 1109          return (DDI_SUCCESS);
1118 1110  }
1119 1111  
1120 1112  
1121 1113  /*
1122 1114   * tavor_cq_err_handler()
1123 1115   *    Context: Only called from interrupt context
1124 1116   */
1125 1117  int
1126 1118  tavor_cq_err_handler(tavor_state_t *state, tavor_eqhdl_t eq,
1127 1119      tavor_hw_eqe_t *eqe)
1128 1120  {
1129 1121          tavor_cqhdl_t           cq;
1130 1122          uint_t                  cqnum;
1131 1123          ibc_async_event_t       event;
1132 1124          ibt_async_code_t        type;
1133 1125          uint_t                  eqe_evttype;
1134 1126  
1135 1127          TAVOR_TNF_ENTER(tavor_cq_err_handler);
1136 1128  
1137 1129          eqe_evttype = TAVOR_EQE_EVTTYPE_GET(eq, eqe);
1138 1130  
1139 1131          ASSERT(eqe_evttype == TAVOR_EVT_CQ_ERRORS ||
1140 1132              eqe_evttype == TAVOR_EVT_EQ_OVERFLOW);
1141 1133  
1142 1134          if (eqe_evttype == TAVOR_EVT_EQ_OVERFLOW) {
1143 1135                  TNF_PROBE_0(tavor_cq_err_handler_eq_overflow_condition,
1144 1136                      TAVOR_TNF_ERROR, "");
1145 1137                  tavor_eq_overflow_handler(state, eq, eqe);
1146 1138  
1147 1139                  TAVOR_TNF_EXIT(tavor_cq_err_handler);
1148 1140                  return (DDI_FAILURE);
1149 1141          }
1150 1142  
1151 1143          /* cmn_err(CE_CONT, "CQ Error handler\n"); */
1152 1144  
1153 1145          /* Get the CQ handle from CQ number in event descriptor */
1154 1146          cqnum = TAVOR_EQE_CQNUM_GET(eq, eqe);
1155 1147          cq = tavor_cqhdl_from_cqnum(state, cqnum);
1156 1148  
1157 1149          /*
1158 1150           * If the CQ handle is NULL, this is probably an indication
1159 1151           * that the CQ has been freed already.  In which case, we
1160 1152           * should not deliver this event.
1161 1153           *
1162 1154           * We also check that the CQ number in the handle is the
1163 1155           * same as the CQ number in the event queue entry.  This
1164 1156           * extra check allows us to handle the case where a CQ was
1165 1157           * freed and then allocated again in the time it took to
1166 1158           * handle the event queue processing.  By constantly incrementing
1167 1159           * the non-constrained portion of the CQ number every time
1168 1160           * a new CQ is allocated, we mitigate (somewhat) the chance
1169 1161           * that a stale event could be passed to the client's CQ
1170 1162           * handler.
1171 1163           *
1172 1164           * And then we check if "ts_ibtfpriv" is NULL.  If it is then it
1173 1165           * means that we've have either received this event before we
1174 1166           * finished attaching to the IBTF or we've received it while we
1175 1167           * are in the process of detaching.
1176 1168           */
1177 1169          if ((cq != NULL) && (cq->cq_cqnum == cqnum) &&
1178 1170              (state->ts_ibtfpriv != NULL)) {
1179 1171                  event.ev_cq_hdl = (ibt_cq_hdl_t)cq->cq_hdlrarg;
1180 1172                  type            = IBT_ERROR_CQ;
1181 1173  
1182 1174                  TAVOR_DO_IBTF_ASYNC_CALLB(state, type, &event);
1183 1175          } else {
1184 1176                  TNF_PROBE_2(tavor_cq_err_handler_dropped_event,
1185 1177                      TAVOR_TNF_ERROR, "", tnf_uint, ev_cqnum, cqnum,
1186 1178                      tnf_uint, hdl_cqnum, cqnum);
1187 1179          }
1188 1180  
1189 1181          TAVOR_TNF_EXIT(tavor_cq_err_handler);
1190 1182          return (DDI_SUCCESS);
1191 1183  }
1192 1184  
1193 1185  
1194 1186  /*
1195 1187   * tavor_cq_refcnt_inc()
1196 1188   *    Context: Can be called from interrupt or base context.
1197 1189   */
1198 1190  int
1199 1191  tavor_cq_refcnt_inc(tavor_cqhdl_t cq, uint_t is_special)
1200 1192  {
1201 1193          /*
1202 1194           * Increment the completion queue's reference count.  Note: In order
1203 1195           * to ensure compliance with IBA C11-15, we must ensure that a given
1204 1196           * CQ is not used for both special (SMI/GSI) QP and non-special QP.
1205 1197           * This is accomplished here by keeping track of how the referenced
1206 1198           * CQ is being used.
1207 1199           */
1208 1200          mutex_enter(&cq->cq_lock);
1209 1201          TNF_PROBE_1_DEBUG(tavor_cq_refcnt_inc, TAVOR_TNF_TRACE, "",
1210 1202              tnf_uint, refcnt, cq->cq_refcnt);
1211 1203          if (cq->cq_refcnt == 0) {
1212 1204                  cq->cq_is_special = is_special;
1213 1205          } else {
1214 1206                  if (cq->cq_is_special != is_special) {
1215 1207                          mutex_exit(&cq->cq_lock);
1216 1208                          return (DDI_FAILURE);
1217 1209                  }
1218 1210          }
1219 1211          cq->cq_refcnt++;
1220 1212          mutex_exit(&cq->cq_lock);
1221 1213          return (DDI_SUCCESS);
1222 1214  }
1223 1215  
1224 1216  
1225 1217  /*
1226 1218   * tavor_cq_refcnt_dec()
1227 1219   *    Context: Can be called from interrupt or base context.
1228 1220   */
1229 1221  void
1230 1222  tavor_cq_refcnt_dec(tavor_cqhdl_t cq)
1231 1223  {
1232 1224          /* Decrement the completion queue's reference count */
1233 1225          mutex_enter(&cq->cq_lock);
1234 1226          cq->cq_refcnt--;
1235 1227          TNF_PROBE_1_DEBUG(tavor_cq_refcnt_dec, TAVOR_TNF_TRACE, "",
1236 1228              tnf_uint, refcnt, cq->cq_refcnt);
1237 1229          mutex_exit(&cq->cq_lock);
1238 1230  }
1239 1231  
1240 1232  
1241 1233  /*
1242 1234   * tavor_cq_doorbell()
1243 1235   *    Context: Can be called from interrupt or base context.
1244 1236   */
1245 1237  static void
1246 1238  tavor_cq_doorbell(tavor_state_t *state, uint32_t cq_cmd, uint32_t cqn,
1247 1239      uint32_t cq_param)
1248 1240  {
1249 1241          uint64_t        doorbell = 0;
1250 1242  
1251 1243          /* Build the doorbell from the parameters */
1252 1244          doorbell = ((uint64_t)cq_cmd << TAVOR_CQDB_CMD_SHIFT) |
1253 1245              ((uint64_t)cqn << TAVOR_CQDB_CQN_SHIFT) | cq_param;
1254 1246  
1255 1247          TNF_PROBE_1_DEBUG(tavor_cq_doorbell, TAVOR_TNF_TRACE, "",
1256 1248              tnf_ulong, doorbell, doorbell);
1257 1249  
1258 1250          /* Write the doorbell to UAR */
1259 1251          TAVOR_UAR_DOORBELL(state, (uint64_t *)&state->ts_uar->cq,
1260 1252              doorbell);
1261 1253  }
1262 1254  
1263 1255  
1264 1256  /*
1265 1257   * tavor_cqhdl_from_cqnum()
1266 1258   *    Context: Can be called from interrupt or base context.
1267 1259   *
1268 1260   *    This routine is important because changing the unconstrained
1269 1261   *    portion of the CQ number is critical to the detection of a
1270 1262   *    potential race condition in the CQ handler code (i.e. the case
1271 1263   *    where a CQ is freed and alloc'd again before an event for the
1272 1264   *    "old" CQ can be handled).
1273 1265   *
1274 1266   *    While this is not a perfect solution (not sure that one exists)
1275 1267   *    it does help to mitigate the chance that this race condition will
1276 1268   *    cause us to deliver a "stale" event to the new CQ owner.  Note:
1277 1269   *    this solution does not scale well because the number of constrained
1278 1270   *    bits increases (and, hence, the number of unconstrained bits
1279 1271   *    decreases) as the number of supported CQs grows.  For small and
1280 1272   *    intermediate values, it should hopefully provide sufficient
1281 1273   *    protection.
1282 1274   */
1283 1275  tavor_cqhdl_t
1284 1276  tavor_cqhdl_from_cqnum(tavor_state_t *state, uint_t cqnum)
1285 1277  {
1286 1278          uint_t  cqindx, cqmask;
1287 1279  
1288 1280          /* Calculate the CQ table index from the cqnum */
1289 1281          cqmask = (1 << state->ts_cfg_profile->cp_log_num_cq) - 1;
1290 1282          cqindx = cqnum & cqmask;
1291 1283          return (state->ts_cqhdl[cqindx]);
1292 1284  }
1293 1285  
1294 1286  
1295 1287  /*
1296 1288   * tavor_cq_cqe_consume()
1297 1289   *    Context: Can be called from interrupt or base context.
1298 1290   */
1299 1291  static int
1300 1292  tavor_cq_cqe_consume(tavor_state_t *state, tavor_cqhdl_t cq,
1301 1293      tavor_hw_cqe_t *cqe, ibt_wc_t *wc)
1302 1294  {
1303 1295          uint_t          flags, type, opcode, qpnum, qp1_indx;
1304 1296          int             status;
1305 1297  
1306 1298          TAVOR_TNF_ENTER(tavor_cq_cqe_consume);
1307 1299  
1308 1300          /*
1309 1301           * Determine if this is an "error" CQE by examining "opcode".  If it
1310 1302           * is an error CQE, then call tavor_cq_errcqe_consume() and return
1311 1303           * whatever status it returns.  Otherwise, this is a successful
1312 1304           * completion.
1313 1305           */
1314 1306          opcode = TAVOR_CQE_OPCODE_GET(cq, cqe);
1315 1307          if ((opcode == TAVOR_CQE_SEND_ERR_OPCODE) ||
1316 1308              (opcode == TAVOR_CQE_RECV_ERR_OPCODE)) {
1317 1309                  status = tavor_cq_errcqe_consume(state, cq, cqe, wc);
1318 1310                  TAVOR_TNF_EXIT(tavor_cq_cqe_consume);
1319 1311                  return (status);
1320 1312          }
1321 1313  
1322 1314          /*
1323 1315           * Fetch the Work Request ID using the information in the CQE.
1324 1316           * See tavor_wr.c for more details.
1325 1317           */
1326 1318          wc->wc_id = tavor_wrid_get_entry(cq, cqe, NULL);
1327 1319  
1328 1320          /*
1329 1321           * Parse the CQE opcode to determine completion type.  This will set
1330 1322           * not only the type of the completion, but also any flags that might
1331 1323           * be associated with it (e.g. whether immediate data is present).
1332 1324           */
1333 1325          flags = IBT_WC_NO_FLAGS;
1334 1326          if (TAVOR_CQE_SENDRECV_GET(cq, cqe) != TAVOR_COMPLETION_RECV) {
1335 1327  
1336 1328                  /* Send CQE */
1337 1329                  switch (opcode) {
1338 1330                  case TAVOR_CQE_SND_RDMAWR_IMM:
1339 1331                          flags |= IBT_WC_IMMED_DATA_PRESENT;
1340 1332                          /* FALLTHROUGH */
1341 1333                  case TAVOR_CQE_SND_RDMAWR:
1342 1334                          type = IBT_WRC_RDMAW;
1343 1335                          break;
1344 1336  
1345 1337                  case TAVOR_CQE_SND_SEND_IMM:
1346 1338                          flags |= IBT_WC_IMMED_DATA_PRESENT;
1347 1339                          /* FALLTHROUGH */
1348 1340                  case TAVOR_CQE_SND_SEND:
1349 1341                          type = IBT_WRC_SEND;
1350 1342                          break;
1351 1343  
1352 1344                  case TAVOR_CQE_SND_RDMARD:
1353 1345                          type = IBT_WRC_RDMAR;
1354 1346                          break;
1355 1347  
1356 1348                  case TAVOR_CQE_SND_ATOMIC_CS:
1357 1349                          type = IBT_WRC_CSWAP;
1358 1350                          break;
1359 1351  
1360 1352                  case TAVOR_CQE_SND_ATOMIC_FA:
1361 1353                          type = IBT_WRC_FADD;
1362 1354                          break;
1363 1355  
1364 1356                  case TAVOR_CQE_SND_BIND_MW:
1365 1357                          type = IBT_WRC_BIND;
1366 1358                          break;
1367 1359  
1368 1360                  default:
1369 1361                          TAVOR_WARNING(state, "unknown send CQE type");
1370 1362                          wc->wc_status = IBT_WC_LOCAL_QP_OP_ERR;
1371 1363                          TNF_PROBE_1(tavor_cq_cqe_consume_unknown_send_type,
1372 1364                              TAVOR_TNF_ERROR, "", tnf_uint, opcode, opcode);
1373 1365                          TAVOR_TNF_EXIT(tavor_cq_cqe_consume);
1374 1366                          return (TAVOR_CQ_SYNC_AND_DB);
1375 1367                  }
1376 1368          } else {
1377 1369  
1378 1370                  /* Receive CQE */
1379 1371                  switch (opcode & 0x1F) {
1380 1372                  case TAVOR_CQE_RCV_RECV_IMM:
1381 1373                          /* FALLTHROUGH */
1382 1374                  case TAVOR_CQE_RCV_RECV_IMM2:
1383 1375                          /*
1384 1376                           * Note:  According to the Tavor PRM, all QP1 recv
1385 1377                           * completions look like the result of a Send with
1386 1378                           * Immediate.  They are not, however, (MADs are Send
1387 1379                           * Only) so we need to check the QP number and set
1388 1380                           * the flag only if it is non-QP1.
1389 1381                           */
1390 1382                          qpnum    = TAVOR_CQE_QPNUM_GET(cq, cqe);
1391 1383                          qp1_indx = state->ts_spec_qp1->tr_indx;
1392 1384                          if ((qpnum < qp1_indx) || (qpnum > qp1_indx + 1)) {
1393 1385                                  flags |= IBT_WC_IMMED_DATA_PRESENT;
1394 1386                          }
1395 1387                          /* FALLTHROUGH */
1396 1388                  case TAVOR_CQE_RCV_RECV:
1397 1389                          /* FALLTHROUGH */
1398 1390                  case TAVOR_CQE_RCV_RECV2:
1399 1391                          type = IBT_WRC_RECV;
1400 1392                          break;
1401 1393  
1402 1394                  case TAVOR_CQE_RCV_RDMAWR_IMM:
1403 1395                          /* FALLTHROUGH */
1404 1396                  case TAVOR_CQE_RCV_RDMAWR_IMM2:
1405 1397                          flags |= IBT_WC_IMMED_DATA_PRESENT;
1406 1398                          type = IBT_WRC_RECV_RDMAWI;
1407 1399                          break;
1408 1400  
1409 1401                  default:
1410 1402                          TAVOR_WARNING(state, "unknown recv CQE type");
1411 1403                          wc->wc_status = IBT_WC_LOCAL_QP_OP_ERR;
1412 1404                          TNF_PROBE_1(tavor_cq_cqe_consume_unknown_rcv_type,
1413 1405                              TAVOR_TNF_ERROR, "", tnf_uint, opcode, opcode);
1414 1406                          TAVOR_TNF_EXIT(tavor_cq_cqe_consume);
1415 1407                          return (TAVOR_CQ_SYNC_AND_DB);
1416 1408                  }
1417 1409          }
1418 1410          wc->wc_type = type;
1419 1411  
1420 1412          /*
1421 1413           * Check for GRH, update the flags, then fill in "wc_flags" field
1422 1414           * in the work completion
1423 1415           */
1424 1416          if (TAVOR_CQE_GRH_GET(cq, cqe) != 0) {
1425 1417                  flags |= IBT_WC_GRH_PRESENT;
1426 1418          }
1427 1419          wc->wc_flags = flags;
1428 1420  
1429 1421          /* If we got here, completion status must be success */
1430 1422          wc->wc_status = IBT_WC_SUCCESS;
1431 1423  
1432 1424          /*
1433 1425           * Parse the remaining contents of the CQE into the work completion.
1434 1426           * This means filling in SL, QP number, SLID, immediate data, etc.
1435 1427           * Note:  Not all of these fields are valid in a given completion.
1436 1428           * Many of them depend on the actual type of completion.  So we fill
1437 1429           * in all of the fields and leave it up to the IBTF and consumer to
1438 1430           * sort out which are valid based on their context.
1439 1431           */
1440 1432          wc->wc_sl         = TAVOR_CQE_SL_GET(cq, cqe);
1441 1433          wc->wc_immed_data = TAVOR_CQE_IMM_ETH_PKEY_CRED_GET(cq, cqe);
1442 1434          wc->wc_qpn        = TAVOR_CQE_DQPN_GET(cq, cqe);
1443 1435          wc->wc_res_hash   = 0;
1444 1436          wc->wc_slid       = TAVOR_CQE_DLID_GET(cq, cqe);
1445 1437          wc->wc_ethertype  = (wc->wc_immed_data & 0xFFFF);
1446 1438          wc->wc_pkey_ix    = (wc->wc_immed_data >> 16);
1447 1439  
1448 1440          /*
1449 1441           * Depending on whether the completion was a receive or a send
1450 1442           * completion, fill in "bytes transferred" as appropriate.  Also,
1451 1443           * if necessary, fill in the "path bits" field.
1452 1444           */
1453 1445          if (TAVOR_CQE_SENDRECV_GET(cq, cqe) == TAVOR_COMPLETION_RECV) {
1454 1446                  wc->wc_path_bits = TAVOR_CQE_PATHBITS_GET(cq, cqe);
1455 1447                  wc->wc_bytes_xfer = TAVOR_CQE_BYTECNT_GET(cq, cqe);
1456 1448  
1457 1449          } else if ((wc->wc_type == IBT_WRC_RDMAR) ||
1458 1450              (wc->wc_type == IBT_WRC_CSWAP) || (wc->wc_type == IBT_WRC_FADD)) {
1459 1451                  wc->wc_bytes_xfer = TAVOR_CQE_BYTECNT_GET(cq, cqe);
1460 1452          }
1461 1453  
1462 1454          TAVOR_TNF_EXIT(tavor_cq_cqe_consume);
1463 1455          return (TAVOR_CQ_SYNC_AND_DB);
1464 1456  }
1465 1457  
1466 1458  
1467 1459  /*
1468 1460   * tavor_cq_errcqe_consume()
1469 1461   *    Context: Can be called from interrupt or base context.
1470 1462   */
1471 1463  static int
1472 1464  tavor_cq_errcqe_consume(tavor_state_t *state, tavor_cqhdl_t cq,
1473 1465      tavor_hw_cqe_t *cqe, ibt_wc_t *wc)
1474 1466  {
1475 1467          uint64_t                next_wqeaddr;
1476 1468          uint32_t                imm_eth_pkey_cred;
1477 1469          uint_t                  nextwqesize, dbd;
1478 1470          uint_t                  doorbell_cnt, status;
1479 1471          tavor_wrid_entry_t      wre;
1480 1472  
1481 1473          TAVOR_TNF_ENTER(tavor_cq_errcqe_consume);
1482 1474  
1483 1475          /*
1484 1476           * Fetch the Work Request ID using the information in the CQE.
1485 1477           * See tavor_wr.c for more details.
1486 1478           */
1487 1479          wc->wc_id = tavor_wrid_get_entry(cq, cqe, &wre);
1488 1480  
1489 1481          /*
1490 1482           * Parse the CQE opcode to determine completion type.  We know that
1491 1483           * the CQE is an error completion, so we extract only the completion
1492 1484           * status here.
1493 1485           */
1494 1486          imm_eth_pkey_cred = TAVOR_CQE_IMM_ETH_PKEY_CRED_GET(cq, cqe);
1495 1487          status = imm_eth_pkey_cred >> TAVOR_CQE_ERR_STATUS_SHIFT;
1496 1488          switch (status) {
1497 1489          case TAVOR_CQE_LOC_LEN_ERR:
1498 1490                  status = IBT_WC_LOCAL_LEN_ERR;
1499 1491                  break;
1500 1492  
1501 1493          case TAVOR_CQE_LOC_OP_ERR:
1502 1494                  status = IBT_WC_LOCAL_QP_OP_ERR;
1503 1495                  break;
1504 1496  
1505 1497          case TAVOR_CQE_LOC_PROT_ERR:
1506 1498                  status = IBT_WC_LOCAL_PROTECT_ERR;
1507 1499                  break;
1508 1500  
1509 1501          case TAVOR_CQE_WR_FLUSHED_ERR:
1510 1502                  status = IBT_WC_WR_FLUSHED_ERR;
1511 1503                  break;
1512 1504  
1513 1505          case TAVOR_CQE_MW_BIND_ERR:
1514 1506                  status = IBT_WC_MEM_WIN_BIND_ERR;
1515 1507                  break;
1516 1508  
1517 1509          case TAVOR_CQE_BAD_RESPONSE_ERR:
1518 1510                  status = IBT_WC_BAD_RESPONSE_ERR;
1519 1511                  break;
1520 1512  
1521 1513          case TAVOR_CQE_LOCAL_ACCESS_ERR:
1522 1514                  status = IBT_WC_LOCAL_ACCESS_ERR;
1523 1515                  break;
1524 1516  
1525 1517          case TAVOR_CQE_REM_INV_REQ_ERR:
1526 1518                  status = IBT_WC_REMOTE_INVALID_REQ_ERR;
1527 1519                  break;
1528 1520  
1529 1521          case TAVOR_CQE_REM_ACC_ERR:
1530 1522                  status = IBT_WC_REMOTE_ACCESS_ERR;
1531 1523                  break;
1532 1524  
1533 1525          case TAVOR_CQE_REM_OP_ERR:
1534 1526                  status = IBT_WC_REMOTE_OP_ERR;
1535 1527                  break;
1536 1528  
1537 1529          case TAVOR_CQE_TRANS_TO_ERR:
1538 1530                  status = IBT_WC_TRANS_TIMEOUT_ERR;
1539 1531                  break;
1540 1532  
1541 1533          case TAVOR_CQE_RNRNAK_TO_ERR:
1542 1534                  status = IBT_WC_RNR_NAK_TIMEOUT_ERR;
1543 1535                  break;
1544 1536  
1545 1537          /*
1546 1538           * The following error codes are not supported in the Tavor driver
1547 1539           * as they relate only to Reliable Datagram completion statuses:
1548 1540           *    case TAVOR_CQE_LOCAL_RDD_VIO_ERR:
1549 1541           *    case TAVOR_CQE_REM_INV_RD_REQ_ERR:
1550 1542           *    case TAVOR_CQE_EEC_REM_ABORTED_ERR:
1551 1543           *    case TAVOR_CQE_INV_EEC_NUM_ERR:
1552 1544           *    case TAVOR_CQE_INV_EEC_STATE_ERR:
1553 1545           *    case TAVOR_CQE_LOC_EEC_ERR:
1554 1546           */
1555 1547  
1556 1548          default:
1557 1549                  TAVOR_WARNING(state, "unknown error CQE status");
1558 1550                  status = IBT_WC_LOCAL_QP_OP_ERR;
1559 1551                  TNF_PROBE_1(tavor_cq_errcqe_consume_unknown_status,
1560 1552                      TAVOR_TNF_ERROR, "", tnf_uint, status, status);
1561 1553                  break;
1562 1554          }
1563 1555          wc->wc_status = status;
1564 1556  
1565 1557          /*
1566 1558           * Now we do all the checking that's necessary to handle completion
1567 1559           * queue entry "recycling"
1568 1560           *
1569 1561           * It is not necessary here to try to sync the WQE as we are only
1570 1562           * attempting to read from the Work Queue (and hardware does not
1571 1563           * write to it).
1572 1564           */
1573 1565  
1574 1566          /*
1575 1567           * We can get doorbell info, WQE address, size for the next WQE
1576 1568           * from the "wre" (which was filled in above in the call to the
1577 1569           * tavor_wrid_get_entry() routine)
1578 1570           */
1579 1571          dbd = (wre.wr_signaled_dbd & TAVOR_WRID_ENTRY_DOORBELLED) ? 1 : 0;
1580 1572          next_wqeaddr = wre.wr_wqeaddrsz;
1581 1573          nextwqesize  = wre.wr_wqeaddrsz & TAVOR_WQE_NDS_MASK;
1582 1574  
1583 1575          /*
1584 1576           * Get the doorbell count from the CQE.  This indicates how many
1585 1577           * completions this one CQE represents.
1586 1578           */
1587 1579          doorbell_cnt = imm_eth_pkey_cred & TAVOR_CQE_ERR_DBDCNT_MASK;
1588 1580  
1589 1581          /*
1590 1582           * Determine if we're ready to consume this CQE yet or not.  If the
1591 1583           * next WQE has size zero (i.e. no next WQE) or if the doorbell count
1592 1584           * is down to zero, then this is the last/only completion represented
1593 1585           * by the current CQE (return TAVOR_CQ_SYNC_AND_DB).  Otherwise, the
1594 1586           * current CQE needs to be recycled (see below).
1595 1587           */
1596 1588          if ((nextwqesize == 0) || ((doorbell_cnt == 0) && (dbd == 1))) {
1597 1589                  /*
1598 1590                   * Consume the CQE
1599 1591                   *    Return status to indicate that doorbell and sync may be
1600 1592                   *    necessary.
1601 1593                   */
1602 1594                  TAVOR_TNF_EXIT(tavor_cq_errcqe_consume);
1603 1595                  return (TAVOR_CQ_SYNC_AND_DB);
1604 1596  
1605 1597          } else {
1606 1598                  /*
1607 1599                   * Recycle the CQE for use in the next PollCQ() call
1608 1600                   *    Decrement the doorbell count, modify the error status,
1609 1601                   *    and update the WQE address and size (to point to the
1610 1602                   *    next WQE on the chain.  Put these update entries back
1611 1603                   *    into the CQE.
1612 1604                   *    Despite the fact that we have updated the CQE, it is not
1613 1605                   *    necessary for us to attempt to sync this entry just yet
1614 1606                   *    as we have not changed the "hardware's view" of the
1615 1607                   *    entry (i.e. we have not modified the "owner" bit - which
1616 1608                   *    is all that the Tavor hardware really cares about.
1617 1609                   */
1618 1610                  doorbell_cnt = doorbell_cnt - dbd;
1619 1611                  TAVOR_CQE_IMM_ETH_PKEY_CRED_SET(cq, cqe,
1620 1612                      ((TAVOR_CQE_WR_FLUSHED_ERR << TAVOR_CQE_ERR_STATUS_SHIFT) |
1621 1613                      (doorbell_cnt & TAVOR_CQE_ERR_DBDCNT_MASK)));
1622 1614                  TAVOR_CQE_WQEADDRSZ_SET(cq, cqe,
1623 1615                      TAVOR_QP_WQEADDRSZ(next_wqeaddr, nextwqesize));
1624 1616  
1625 1617                  TAVOR_TNF_EXIT(tavor_cq_errcqe_consume);
1626 1618                  return (TAVOR_CQ_RECYCLE_ENTRY);
1627 1619          }
1628 1620  }
1629 1621  
1630 1622  
1631 1623  /*
1632 1624   * tavor_cqe_sync()
1633 1625   *    Context: Can be called from interrupt or base context.
1634 1626   */
1635 1627  static void
1636 1628  tavor_cqe_sync(tavor_cqhdl_t cq, tavor_hw_cqe_t *cqe, uint_t flag)
1637 1629  {
1638 1630          ddi_dma_handle_t        dmahdl;
1639 1631          off_t                   offset;
1640 1632          int                     status;
1641 1633  
1642 1634          TAVOR_TNF_ENTER(tavor_cqe_sync);
1643 1635  
1644 1636          /* Determine if CQ needs to be synced or not */
1645 1637          if (cq->cq_sync == 0) {
1646 1638                  TAVOR_TNF_EXIT(tavor_cqe_sync);
1647 1639                  return;
1648 1640          }
1649 1641  
1650 1642          /* Get the DMA handle from CQ context */
1651 1643          dmahdl = cq->cq_mrhdl->mr_bindinfo.bi_dmahdl;
1652 1644  
1653 1645          /* Calculate offset of next CQE */
1654 1646          offset = (off_t)((uintptr_t)cqe - (uintptr_t)&cq->cq_buf[0]);
1655 1647          status = ddi_dma_sync(dmahdl, offset, sizeof (tavor_hw_cqe_t), flag);
1656 1648          if (status != DDI_SUCCESS) {
1657 1649                  TNF_PROBE_0(tavor_cqe_sync_getnextentry_fail,
1658 1650                      TAVOR_TNF_ERROR, "");
1659 1651                  TAVOR_TNF_EXIT(tavor_cqe_sync);
1660 1652                  return;
1661 1653          }
1662 1654  
1663 1655          TAVOR_TNF_EXIT(tavor_cqe_sync);
1664 1656  }
1665 1657  
1666 1658  
1667 1659  /*
1668 1660   * tavor_cq_resize_helper()
1669 1661   *    Context: Can be called only from user or kernel context.
1670 1662   */
1671 1663  static void
1672 1664  tavor_cq_resize_helper(tavor_cqhdl_t cq, tavor_hw_cqe_t *new_cqbuf,
1673 1665      uint32_t old_cons_indx, uint32_t num_newcqe)
1674 1666  {
1675 1667          tavor_hw_cqe_t  *old_cqe, *new_cqe;
1676 1668          uint32_t        new_cons_indx, wrap_around_mask;
1677 1669          int             i;
1678 1670  
1679 1671          TAVOR_TNF_ENTER(tavor_cq_resize_helper);
1680 1672  
1681 1673          ASSERT(MUTEX_HELD(&cq->cq_lock));
1682 1674  
1683 1675          /* Get the consumer index */
1684 1676          new_cons_indx = 0;
1685 1677  
1686 1678          /*
1687 1679           * Calculate the wrap around mask.  Note: This operation only works
1688 1680           * because all Tavor completion queues have power-of-2 sizes
1689 1681           */
1690 1682          wrap_around_mask = (cq->cq_bufsz - 1);
1691 1683  
1692 1684          /*
1693 1685           * Calculate the pointers to the first CQ entry (in the "old" CQ)
1694 1686           * and the first CQ entry in the "new" CQ
1695 1687           */
1696 1688          old_cqe = &cq->cq_buf[old_cons_indx];
1697 1689          new_cqe = &new_cqbuf[new_cons_indx];
1698 1690  
1699 1691          /* Sync entire "old" CQ for use by software (if necessary). */
1700 1692          if (cq->cq_sync) {
1701 1693                  (void) ddi_dma_sync(cq->cq_mrhdl->mr_bindinfo.bi_dmahdl,
1702 1694                      0, cq->cq_cqinfo.qa_size, DDI_DMA_SYNC_FORCPU);
1703 1695          }
1704 1696  
1705 1697          /*
1706 1698           * Keep pulling entries from the "old" CQ until we find an entry owned
1707 1699           * by the hardware.  Process each entry by copying it into the "new"
1708 1700           * CQ and updating respective indices and pointers in the "old" CQ.
1709 1701           */
1710 1702          for (i = 0; i < num_newcqe; i++) {
1711 1703  
1712 1704                  /* Copy this old CQE into the "new_cqe" pointer */
1713 1705                  bcopy(old_cqe, new_cqe, sizeof (tavor_hw_cqe_t));
1714 1706  
1715 1707                  /* Increment the consumer index (for both CQs) */
1716 1708                  old_cons_indx = (old_cons_indx + 1) & wrap_around_mask;
1717 1709                  new_cons_indx = (new_cons_indx + 1);
1718 1710  
1719 1711                  /* Update the pointer to the next CQ entry */
1720 1712                  old_cqe = &cq->cq_buf[old_cons_indx];
1721 1713                  new_cqe = &new_cqbuf[new_cons_indx];
1722 1714          }
1723 1715  
1724 1716          TAVOR_TNF_EXIT(tavor_cq_resize_helper);
1725 1717  }
1726 1718  
1727 1719  /*
1728 1720   * tavor_cq_srq_entries_flush()
1729 1721   * Context: Can be called from interrupt or base context.
1730 1722   */
1731 1723  void
1732 1724  tavor_cq_srq_entries_flush(tavor_state_t *state, tavor_qphdl_t qp)
1733 1725  {
1734 1726          tavor_cqhdl_t           cq;
1735 1727          tavor_workq_hdr_t       *wqhdr;
1736 1728          tavor_hw_cqe_t          *cqe;
1737 1729          tavor_hw_cqe_t          *next_cqe;
1738 1730          uint32_t                cons_indx, tail_cons_indx, wrap_around_mask;
1739 1731          uint32_t                new_indx, check_indx, indx;
1740 1732          uint32_t                num_to_increment;
1741 1733          int                     cqe_qpnum, cqe_type;
1742 1734          int                     outstanding_cqes, removed_cqes;
1743 1735          int                     i;
1744 1736  
1745 1737          ASSERT(MUTEX_HELD(&qp->qp_rq_cqhdl->cq_lock));
1746 1738  
1747 1739          cq = qp->qp_rq_cqhdl;
1748 1740          wqhdr = qp->qp_rq_wqhdr;
1749 1741  
1750 1742          ASSERT(wqhdr->wq_wrid_post != NULL);
1751 1743          ASSERT(wqhdr->wq_wrid_post->wl_srq_en != 0);
1752 1744  
1753 1745          /*
1754 1746           * Check for user-mapped CQ memory.  Note:  We do not allow kernel
1755 1747           * clients to modify any userland mapping CQ.  If the CQ is
1756 1748           * user-mapped, then we simply return here, and this "flush" function
1757 1749           * becomes a NO-OP in this case.
1758 1750           */
1759 1751          if (cq->cq_is_umap) {
1760 1752                  return;
1761 1753          }
1762 1754  
1763 1755          /* Get the consumer index */
1764 1756          cons_indx = cq->cq_consindx;
1765 1757  
1766 1758          /*
1767 1759           * Calculate the wrap around mask.  Note: This operation only works
1768 1760           * because all Tavor completion queues have power-of-2 sizes
1769 1761           */
1770 1762          wrap_around_mask = (cq->cq_bufsz - 1);
1771 1763  
1772 1764          /* Calculate the pointer to the first CQ entry */
1773 1765          cqe = &cq->cq_buf[cons_indx];
1774 1766  
1775 1767          /* Sync the current CQE to read */
1776 1768          tavor_cqe_sync(cq, cqe, DDI_DMA_SYNC_FORCPU);
1777 1769  
1778 1770          /*
1779 1771           * Loop through the CQ looking for entries owned by software.  If an
1780 1772           * entry is owned by software then we increment an 'outstanding_cqes'
1781 1773           * count to know how many entries total we have on our CQ.  We use this
1782 1774           * value further down to know how many entries to loop through looking
1783 1775           * for our same QP number.
1784 1776           */
1785 1777          outstanding_cqes = 0;
1786 1778          tail_cons_indx = cons_indx;
1787 1779          while (TAVOR_CQE_OWNER_IS_SW(cq, cqe)) {
1788 1780                  /* increment total cqes count */
1789 1781                  outstanding_cqes++;
1790 1782  
1791 1783                  /* increment the consumer index */
1792 1784                  tail_cons_indx = (tail_cons_indx + 1) & wrap_around_mask;
1793 1785  
1794 1786                  /* update the pointer to the next cq entry */
1795 1787                  cqe = &cq->cq_buf[tail_cons_indx];
1796 1788  
1797 1789                  /* sync the next cqe to read */
1798 1790                  tavor_cqe_sync(cq, cqe, DDI_DMA_SYNC_FORCPU);
1799 1791          }
1800 1792  
1801 1793          /*
1802 1794           * Using the 'tail_cons_indx' that was just set, we now know how many
1803 1795           * total CQEs possible there are.  Set the 'check_indx' and the
1804 1796           * 'new_indx' to the last entry identified by 'tail_cons_indx'
1805 1797           */
1806 1798          check_indx = new_indx = (tail_cons_indx - 1) & wrap_around_mask;
1807 1799  
1808 1800          for (i = 0; i < outstanding_cqes; i++) {
1809 1801                  cqe = &cq->cq_buf[check_indx];
1810 1802  
1811 1803                  /* Grab QP number from CQE */
1812 1804                  cqe_qpnum = TAVOR_CQE_QPNUM_GET(cq, cqe);
1813 1805                  cqe_type = TAVOR_CQE_SENDRECV_GET(cq, cqe);
1814 1806  
1815 1807                  /*
1816 1808                   * If the QP number is the same in the CQE as the QP that we
1817 1809                   * have on this SRQ, then we must free up the entry off the
1818 1810                   * SRQ.  We also make sure that the completion type is of the
1819 1811                   * 'TAVOR_COMPLETION_RECV' type.  So any send completions on
1820 1812                   * this CQ will be left as-is.  The handling of returning
1821 1813                   * entries back to HW ownership happens further down.
1822 1814                   */
1823 1815                  if (cqe_qpnum == qp->qp_qpnum &&
1824 1816                      cqe_type == TAVOR_COMPLETION_RECV) {
1825 1817  
1826 1818                          /* Add back to SRQ free list */
1827 1819                          (void) tavor_wrid_find_match_srq(wqhdr->wq_wrid_post,
1828 1820                              cq, cqe);
1829 1821                  } else {
1830 1822                          /* Do Copy */
1831 1823                          if (check_indx != new_indx) {
1832 1824                                  next_cqe = &cq->cq_buf[new_indx];
1833 1825  
1834 1826                                  /*
1835 1827                                   * Copy the CQE into the "next_cqe"
1836 1828                                   * pointer.
1837 1829                                   */
1838 1830                                  bcopy(cqe, next_cqe, sizeof (tavor_hw_cqe_t));
1839 1831                          }
1840 1832                          new_indx = (new_indx - 1) & wrap_around_mask;
1841 1833                  }
1842 1834                  /* Move index to next CQE to check */
1843 1835                  check_indx = (check_indx - 1) & wrap_around_mask;
1844 1836          }
1845 1837  
1846 1838          /* Initialize removed cqes count */
1847 1839          removed_cqes = 0;
1848 1840  
1849 1841          /* If an entry was removed */
1850 1842          if (check_indx != new_indx) {
1851 1843  
1852 1844                  /*
1853 1845                   * Set current pointer back to the beginning consumer index.
1854 1846                   * At this point, all unclaimed entries have been copied to the
1855 1847                   * index specified by 'new_indx'.  This 'new_indx' will be used
1856 1848                   * as the new consumer index after we mark all freed entries as
1857 1849                   * having HW ownership.  We do that here.
1858 1850                   */
1859 1851  
1860 1852                  /* Loop through all entries until we reach our new pointer */
1861 1853                  for (indx = cons_indx; indx <= new_indx;
1862 1854                      indx = (indx + 1) & wrap_around_mask) {
1863 1855                          removed_cqes++;
1864 1856                          cqe = &cq->cq_buf[indx];
1865 1857  
1866 1858                          /* Reset entry to hardware ownership */
1867 1859                          TAVOR_CQE_OWNER_SET_HW(cq, cqe);
1868 1860                  }
1869 1861          }
1870 1862  
1871 1863          /*
1872 1864           * Update consumer index to be the 'new_indx'.  This moves it past all
1873 1865           * removed entries.  Because 'new_indx' is pointing to the last
1874 1866           * previously valid SW owned entry, we add 1 to point the cons_indx to
1875 1867           * the first HW owned entry.
1876 1868           */
1877 1869          cons_indx = (new_indx + 1) & wrap_around_mask;
1878 1870  
1879 1871          /*
1880 1872           * Now we only ring the doorbell (to update the consumer index) if
1881 1873           * we've actually consumed a CQ entry.  If we found no QP number
1882 1874           * matches above, then we would not have removed anything.  So only if
1883 1875           * something was removed do we ring the doorbell.
1884 1876           */
1885 1877          if ((removed_cqes != 0) && (cq->cq_consindx != cons_indx)) {
1886 1878                  /*
1887 1879                   * Post doorbell to update the consumer index.  Doorbell
1888 1880                   * value indicates number of entries consumed (minus 1)
1889 1881                   */
1890 1882                  if (cons_indx > cq->cq_consindx) {
1891 1883                          num_to_increment = (cons_indx - cq->cq_consindx) - 1;
1892 1884                  } else {
1893 1885                          num_to_increment = ((cons_indx + cq->cq_bufsz) -
1894 1886                              cq->cq_consindx) - 1;
1895 1887                  }
1896 1888                  cq->cq_consindx = cons_indx;
1897 1889  
1898 1890                  tavor_cq_doorbell(state, TAVOR_CQDB_INCR_CONSINDX,
1899 1891                      cq->cq_cqnum, num_to_increment);
1900 1892          }
1901 1893  }

↓ open down ↓

1235 lines elided

↑ open up ↑

XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX