Print this page
8368 remove warlock leftovers from usr/src/uts


2344         tavor_wrid_wqhdr_lock_both(qp);
2345         swq = tavor_wrid_wqhdr_find(qp->qp_sq_cqhdl, qp->qp_qpnum,
2346             TAVOR_WR_SEND);
2347         if (swq == NULL) {
2348                 /* Couldn't find matching work queue header, create it */
2349                 create_new_swq = create_wql = 1;
2350                 swq = tavor_wrid_wqhdr_create(state, qp->qp_sq_cqhdl,
2351                     qp->qp_qpnum, TAVOR_WR_SEND, create_wql);
2352                 if (swq == NULL) {
2353                         /*
2354                          * If we couldn't find/allocate space for the workq
2355                          * header, then drop the lock(s) and return failure.
2356                          */
2357                         tavor_wrid_wqhdr_unlock_both(qp);
2358                         TNF_PROBE_0(tavor_wrid_from_reset_handling_wqhdr_fail,
2359                             TAVOR_TNF_ERROR, "");
2360                         TAVOR_TNF_EXIT(tavor_wrid_from_reset_handling);
2361                         return (ibc_get_ci_failure(0));
2362                 }
2363         }
2364         _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*swq))
2365         qp->qp_sq_wqhdr = swq;
2366         swq->wq_size = qp->qp_sq_bufsz;
2367         swq->wq_head = 0;
2368         swq->wq_tail = 0;
2369         swq->wq_full = 0;
2370 
2371         /*
2372          * Allocate space for the tavor_wrid_entry_t container
2373          */
2374         s_wridlist = tavor_wrid_get_list(swq->wq_size);
2375         if (s_wridlist == NULL) {
2376                 /*
2377                  * If we couldn't allocate space for tracking the WRID
2378                  * entries, then cleanup the workq header from above (if
2379                  * necessary, i.e. if we created the workq header).  Then
2380                  * drop the lock(s) and return failure.
2381                  */
2382                 if (create_new_swq) {
2383                         tavor_cq_wqhdr_remove(qp->qp_sq_cqhdl, swq);
2384                 }
2385 
2386                 tavor_wrid_wqhdr_unlock_both(qp);
2387                 TNF_PROBE_0(tavor_wrid_from_reset_handling_wridlist_fail,
2388                     TAVOR_TNF_ERROR, "");
2389                 TAVOR_TNF_EXIT(tavor_wrid_from_reset_handling);
2390                 return (ibc_get_ci_failure(0));
2391         }
2392         _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*s_wridlist))
2393         s_wridlist->wl_wqhdr = swq;
2394 
2395         /* Chain the new WRID list container to the workq hdr list */
2396         mutex_enter(&swq->wq_wrid_wql->wql_lock);
2397         tavor_wrid_wqhdr_add(swq, s_wridlist);
2398         mutex_exit(&swq->wq_wrid_wql->wql_lock);
2399 
2400         qp_srq_en = qp->qp_srq_en;
2401 
2402 #ifdef __lock_lint
2403         mutex_enter(&qp->qp_srqhdl->srq_lock);
2404 #else
2405         if (qp_srq_en == TAVOR_QP_SRQ_ENABLED) {
2406                 mutex_enter(&qp->qp_srqhdl->srq_lock);
2407         }
2408 #endif
2409         /*
2410          * Now we repeat all the above operations for the receive work queue,
2411          * or shared receive work queue.
2412          *
2413          * Note: We still use the 'qp_rq_cqhdl' even in the SRQ case.
2414          */
2415         rwq = tavor_wrid_wqhdr_find(qp->qp_rq_cqhdl, qp->qp_qpnum,
2416             TAVOR_WR_RECV);
2417         if (rwq == NULL) {
2418                 create_new_rwq = create_wql = 1;
2419 
2420                 /*
2421                  * If this QP is associated with an SRQ, and this isn't the
2422                  * first QP on the SRQ, then the 'srq_wrid_wql' will already be
2423                  * created.  Since the WQL is created at 'wqhdr_create' time we
2424                  * pass in the flag 'create_wql' here to be 0 if we have
2425                  * already created it.  And later on below we then next setup
2426                  * the WQL and rwq information based off the existing SRQ info.
2427                  */
2428                 if (qp_srq_en == TAVOR_QP_SRQ_ENABLED &&


2430                         create_wql = 0;
2431                 }
2432 
2433                 rwq = tavor_wrid_wqhdr_create(state, qp->qp_rq_cqhdl,
2434                     qp->qp_qpnum, TAVOR_WR_RECV, create_wql);
2435                 if (rwq == NULL) {
2436                         /*
2437                          * If we couldn't find/allocate space for the workq
2438                          * header, then free all the send queue resources we
2439                          * just allocated and setup (above), drop the lock(s)
2440                          * and return failure.
2441                          */
2442                         mutex_enter(&swq->wq_wrid_wql->wql_lock);
2443                         tavor_wrid_wqhdr_remove(swq, s_wridlist);
2444                         mutex_exit(&swq->wq_wrid_wql->wql_lock);
2445                         if (create_new_swq) {
2446                                 tavor_cq_wqhdr_remove(qp->qp_sq_cqhdl,
2447                                     swq);
2448                         }
2449 
2450 #ifdef __lock_lint
2451                         mutex_exit(&qp->qp_srqhdl->srq_lock);
2452 #else
2453                         if (qp_srq_en == TAVOR_QP_SRQ_ENABLED) {
2454                                 mutex_exit(&qp->qp_srqhdl->srq_lock);
2455                         }
2456 #endif
2457 
2458                         tavor_wrid_wqhdr_unlock_both(qp);
2459                         TNF_PROBE_0(tavor_wrid_from_reset_handling_wqhdr_fail,
2460                             TAVOR_TNF_ERROR, "");
2461                         TAVOR_TNF_EXIT(tavor_wrid_from_reset_handling);
2462                         return (ibc_get_ci_failure(0));
2463                 }
2464         }
2465         _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*rwq))
2466 
2467         /*
2468          * Setup receive workq hdr
2469          *
2470          * If the QP is on an SRQ, we setup the SRQ specific fields, setting
2471          * keeping a copy of the rwq pointer, setting the rwq bufsize
2472          * appropriately, and initializing our part of the WQLock.
2473          *
2474          * In the normal QP case, the QP recv queue bufsize is used.
2475          */
2476         if (qp_srq_en == TAVOR_QP_SRQ_ENABLED) {
2477                 rwq->wq_size = qp->qp_srqhdl->srq_wq_bufsz;
2478                 if (qp->qp_srqhdl->srq_wrid_wql == NULL) {
2479                         qp->qp_srqhdl->srq_wrid_wql = rwq->wq_wrid_wql;
2480                 } else {
2481                         rwq->wq_wrid_wql = qp->qp_srqhdl->srq_wrid_wql;
2482                 }
2483                 tavor_wql_refcnt_inc(qp->qp_srqhdl->srq_wrid_wql);
2484 
2485         } else {


2514          * If the memory allocation failed for r_wridlist (or the SRQ pointer
2515          * is mistakenly NULL), we cleanup our previous swq allocation from
2516          * above
2517          */
2518         if (r_wridlist == NULL) {
2519                 /*
2520                  * If we couldn't allocate space for tracking the WRID
2521                  * entries, then cleanup all the stuff from above.  Then
2522                  * drop the lock(s) and return failure.
2523                  */
2524                 mutex_enter(&swq->wq_wrid_wql->wql_lock);
2525                 tavor_wrid_wqhdr_remove(swq, s_wridlist);
2526                 mutex_exit(&swq->wq_wrid_wql->wql_lock);
2527                 if (create_new_swq) {
2528                         tavor_cq_wqhdr_remove(qp->qp_sq_cqhdl, swq);
2529                 }
2530                 if (create_new_rwq) {
2531                         tavor_cq_wqhdr_remove(qp->qp_rq_cqhdl, rwq);
2532                 }
2533 
2534 #ifdef __lock_lint
2535                 mutex_exit(&qp->qp_srqhdl->srq_lock);
2536 #else
2537                 if (qp_srq_en == TAVOR_QP_SRQ_ENABLED) {
2538                         mutex_exit(&qp->qp_srqhdl->srq_lock);
2539                 }
2540 #endif
2541 
2542                 tavor_wrid_wqhdr_unlock_both(qp);
2543                 TNF_PROBE_0(tavor_wrid_from_reset_handling_wridlist_fail,
2544                     TAVOR_TNF_ERROR, "");
2545                 TAVOR_TNF_EXIT(tavor_wrid_from_reset_handling);
2546                 return (ibc_get_ci_failure(0));
2547         }
2548         _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*r_wridlist))
2549 
2550         /*
2551          * Initialize the wridlist
2552          *
2553          * In the normal QP case, there is no special initialization needed.
2554          * We simply setup the wridlist backpointer to be the receive wqhdr
2555          * (rwq).
2556          *
2557          * But in the SRQ case, there is no backpointer to the wqhdr possible.
2558          * Instead we set 'wl_srq_en', specifying this wridlist is on an SRQ
2559          * and thus potentially shared across multiple QPs with the SRQ.  We
2560          * also setup the srq_wridlist pointer to be the r_wridlist, and
2561          * intialize the freelist to an invalid index.  This srq_wridlist
2562          * pointer is used above on future moves from_reset to let us know that
2563          * the srq_wridlist has been initialized already.
2564          *
2565          * And finally, if we are in a non-UMAP case, we setup the srq wrid
2566          * free list.
2567          */
2568         if (qp_srq_en == TAVOR_QP_SRQ_ENABLED &&
2569             qp->qp_srqhdl->srq_wridlist == NULL) {
2570                 r_wridlist->wl_srq_en = 1;
2571                 r_wridlist->wl_free_list_indx = -1;
2572                 qp->qp_srqhdl->srq_wridlist = r_wridlist;
2573 
2574                 /* Initialize srq wrid free list */
2575                 if (qp->qp_srqhdl->srq_is_umap == 0) {
2576                         mutex_enter(&rwq->wq_wrid_wql->wql_lock);
2577                         tavor_wrid_list_srq_init(r_wridlist, qp->qp_srqhdl, 0);
2578                         mutex_exit(&rwq->wq_wrid_wql->wql_lock);
2579                 }
2580         } else {
2581                 r_wridlist->wl_wqhdr = rwq;
2582         }
2583 
2584         /* Chain the WRID list "container" to the workq hdr list */
2585         mutex_enter(&rwq->wq_wrid_wql->wql_lock);
2586         tavor_wrid_wqhdr_add(rwq, r_wridlist);
2587         mutex_exit(&rwq->wq_wrid_wql->wql_lock);
2588 
2589 #ifdef __lock_lint
2590         mutex_exit(&qp->qp_srqhdl->srq_lock);
2591 #else
2592         if (qp_srq_en == TAVOR_QP_SRQ_ENABLED) {
2593                 mutex_exit(&qp->qp_srqhdl->srq_lock);
2594         }
2595 #endif
2596 
2597         _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*r_wridlist))
2598         _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*rwq))
2599         _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*s_wridlist))
2600         _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*swq))
2601 
2602         tavor_wrid_wqhdr_unlock_both(qp);
2603         TAVOR_TNF_EXIT(tavor_wrid_from_reset_handling);
2604         return (DDI_SUCCESS);
2605 }
2606 
2607 
2608 /*
2609  * tavor_wrid_to_reset_handling()
2610  *    Context: Can be called from interrupt or base context.
2611  */
2612 void
2613 tavor_wrid_to_reset_handling(tavor_state_t *state, tavor_qphdl_t qp)
2614 {
2615         uint_t          free_wqhdr = 0;
2616 
2617         TAVOR_TNF_ENTER(tavor_wrid_to_reset_handling);
2618 
2619         /*
2620          * For each of this QP's Work Queues, move the WRID "container" to
2621          * the "reapable" list.  Although there may still be unpolled


3106         /*
3107          * The first step is to walk the "reapable" list and free up those
3108          * containers.  This is necessary because the containers on the
3109          * reapable list are not otherwise connected to the work queue headers
3110          * anymore.
3111          */
3112         tavor_wrid_cq_reap(cq);
3113 
3114         /* Now lock the list of work queues associated with this CQ */
3115         mutex_enter(&cq->cq_wrid_wqhdr_lock);
3116 
3117         /*
3118          * Walk the list of work queue headers and free up all the WRID list
3119          * containers chained to it.  Note: We don't need to grab the locks
3120          * for each of the individual WRID lists here because the only way
3121          * things can be added or removed from the list at this point would be
3122          * through post a work request to a QP.  But if we've come this far,
3123          * then we can be assured that there are no longer any QP associated
3124          * with the CQ that we are trying to free.
3125          */
3126 #ifdef __lock_lint
3127         tavor_wrid_wqhdr_compare(NULL, NULL);
3128 #endif
3129         treep = &cq->cq_wrid_wqhdr_avl_tree;
3130         while ((curr = avl_destroy_nodes(treep, &cookie)) != NULL) {
3131                 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*curr))
3132                 container = curr->wq_wrid_poll;
3133                 while (container != NULL) {
3134                         to_free   = container;
3135                         container = container->wl_next;
3136                         /*
3137                          * If reaping the WRID list containers pulls the last
3138                          * container from the given work queue header, then
3139                          * we free the work queue header as well.  Note: we
3140                          * ignore the return value because we know that the
3141                          * work queue header should always be freed once the
3142                          * list of containers has come to an end.
3143                          */
3144                         (void) tavor_wrid_list_reap(to_free);
3145                         if (container == NULL) {
3146                                 tavor_cq_wqhdr_remove(cq, curr);
3147                         }
3148                 }
3149         }
3150         avl_destroy(treep);
3151 


3168          * The WRID list "container" consists of the tavor_wrid_list_hdr_t,
3169          * which holds the pointers necessary for maintaining the "reapable"
3170          * list, chaining together multiple "containers" old and new, and
3171          * tracking the head, tail, size, etc. for each container.
3172          *
3173          * The "container" also holds all the tavor_wrid_entry_t's, which is
3174          * allocated separately, one for each entry on the corresponding work
3175          * queue.
3176          */
3177         size = sizeof (tavor_wrid_list_hdr_t);
3178 
3179         /*
3180          * Note that this allocation has to be a NOSLEEP operation here
3181          * because we are holding the "wqhdr_list_lock" and, therefore,
3182          * could get raised to the interrupt level.
3183          */
3184         wridlist = (tavor_wrid_list_hdr_t *)kmem_zalloc(size, KM_NOSLEEP);
3185         if (wridlist == NULL) {
3186                 return (NULL);
3187         }
3188         _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*wridlist))
3189 
3190         /* Complete the "container" initialization */
3191         wridlist->wl_size = qsize;
3192         wridlist->wl_full = 0;
3193         wridlist->wl_head = 0;
3194         wridlist->wl_tail = 0;
3195         wridlist->wl_wre = (tavor_wrid_entry_t *)kmem_zalloc(qsize *
3196             sizeof (tavor_wrid_entry_t), KM_NOSLEEP);
3197         if (wridlist->wl_wre == NULL) {
3198                 kmem_free(wridlist, size);
3199                 return (NULL);
3200         }
3201         wridlist->wl_wre_old_tail  = NULL;
3202         wridlist->wl_reap_next = NULL;
3203         wridlist->wl_next  = NULL;
3204         wridlist->wl_prev  = NULL;
3205         wridlist->wl_srq_en = 0;
3206 
3207         return (wridlist);
3208 }


3310  *    Context: Can be called from interrupt or base context.
3311  */
3312 static tavor_workq_hdr_t *
3313 tavor_wrid_wqhdr_find(tavor_cqhdl_t cq, uint_t qpn, uint_t wq_type)
3314 {
3315         tavor_workq_hdr_t       *curr;
3316         tavor_workq_compare_t   cmp;
3317 
3318         TAVOR_TNF_ENTER(tavor_wrid_wqhdr_find);
3319 
3320         ASSERT(MUTEX_HELD(&cq->cq_wrid_wqhdr_lock));
3321 
3322         /*
3323          * Walk the CQ's work queue list, trying to find a send or recv queue
3324          * with the same QP number.  We do this even if we are going to later
3325          * create a new entry because it helps us easily find the end of the
3326          * list.
3327          */
3328         cmp.cmp_qpn = qpn;
3329         cmp.cmp_type = wq_type;
3330 #ifdef __lock_lint
3331         tavor_wrid_wqhdr_compare(NULL, NULL);
3332 #endif
3333         curr = avl_find(&cq->cq_wrid_wqhdr_avl_tree, &cmp, NULL);
3334 
3335         TAVOR_TNF_EXIT(tavor_wrid_wqhdr_find);
3336         return (curr);
3337 }
3338 
3339 
3340 /*
3341  * tavor_wrid_wqhdr_create()
3342  *    Context: Can be called from interrupt or base context.
3343  */
3344 static tavor_workq_hdr_t *
3345 tavor_wrid_wqhdr_create(tavor_state_t *state, tavor_cqhdl_t cq, uint_t qpn,
3346     uint_t wq_type, uint_t create_wql)
3347 {
3348         tavor_workq_hdr_t       *wqhdr_tmp;
3349 
3350         TAVOR_TNF_ENTER(tavor_wrid_wqhdr_create);
3351 
3352         ASSERT(MUTEX_HELD(&cq->cq_wrid_wqhdr_lock));
3353 
3354         /*
3355          * Allocate space a work queue header structure and initialize it.
3356          * Each work queue header structure includes a "wq_wrid_wql"
3357          * which needs to be initialized.  Note that this allocation has to be
3358          * a NOSLEEP operation because we are holding the "cq_wrid_wqhdr_lock"
3359          * and, therefore, could get raised to the interrupt level.
3360          */
3361         wqhdr_tmp = (tavor_workq_hdr_t *)kmem_zalloc(
3362             sizeof (tavor_workq_hdr_t), KM_NOSLEEP);
3363         if (wqhdr_tmp == NULL) {
3364                 TAVOR_TNF_EXIT(tavor_wrid_wqhdr_create);
3365                 return (NULL);
3366         }
3367         _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*wqhdr_tmp))
3368         wqhdr_tmp->wq_qpn    = qpn;
3369         wqhdr_tmp->wq_type   = wq_type;
3370 
3371         if (create_wql) {
3372                 wqhdr_tmp->wq_wrid_wql = tavor_wrid_wql_create(state);
3373                 if (wqhdr_tmp->wq_wrid_wql == NULL) {
3374                         kmem_free(wqhdr_tmp, sizeof (tavor_workq_hdr_t));
3375                         TAVOR_TNF_EXIT(tavor_wrid_wqhdr_create);
3376                         return (NULL);
3377                 }
3378         }
3379 
3380         wqhdr_tmp->wq_wrid_poll = NULL;
3381         wqhdr_tmp->wq_wrid_post = NULL;
3382 
3383         /* Chain the newly allocated work queue header to the CQ's list */
3384         tavor_cq_wqhdr_add(cq, wqhdr_tmp);
3385 
3386         TAVOR_TNF_EXIT(tavor_wrid_wqhdr_create);
3387         return (wqhdr_tmp);


3570 
3571         mutex_exit(&wqhdr->wq_wrid_wql->wql_lock);
3572 
3573         TAVOR_TNF_EXIT(tavor_wrid_list_reap);
3574         return (consume_wqhdr);
3575 }
3576 
3577 
3578 /*
3579  * tavor_wrid_wqhdr_lock_both()
3580  *    Context: Can be called from interrupt or base context.
3581  */
3582 static void
3583 tavor_wrid_wqhdr_lock_both(tavor_qphdl_t qp)
3584 {
3585         tavor_cqhdl_t   sq_cq, rq_cq;
3586 
3587         sq_cq = qp->qp_sq_cqhdl;
3588         rq_cq = qp->qp_rq_cqhdl;
3589 
3590 _NOTE(MUTEX_ACQUIRED_AS_SIDE_EFFECT(&sq_cq->cq_wrid_wqhdr_lock))
3591 _NOTE(MUTEX_ACQUIRED_AS_SIDE_EFFECT(&rq_cq->cq_wrid_wqhdr_lock))
3592 
3593         /*
3594          * If both work queues (send and recv) share a completion queue, then
3595          * grab the common lock.  If they use different CQs (hence different
3596          * "cq_wrid_wqhdr_list" locks), then grab the send one first, then the
3597          * receive.  We do this consistently and correctly in
3598          * tavor_wrid_wqhdr_unlock_both() below to avoid introducing any kind
3599          * of dead lock condition.  Note:  We add the "__lock_lint" code here
3600          * to fake out warlock into thinking we've grabbed both locks (when,
3601          * in fact, we only needed the one).
3602          */
3603         if (sq_cq == rq_cq) {
3604                 mutex_enter(&sq_cq->cq_wrid_wqhdr_lock);
3605 #ifdef  __lock_lint
3606                 mutex_enter(&rq_cq->cq_wrid_wqhdr_lock);
3607 #endif
3608         } else {
3609                 mutex_enter(&sq_cq->cq_wrid_wqhdr_lock);
3610                 mutex_enter(&rq_cq->cq_wrid_wqhdr_lock);
3611         }
3612 }
3613 
3614 /*
3615  * tavor_wrid_wqhdr_unlock_both()
3616  *    Context: Can be called from interrupt or base context.
3617  */
3618 static void
3619 tavor_wrid_wqhdr_unlock_both(tavor_qphdl_t qp)
3620 {
3621         tavor_cqhdl_t   sq_cq, rq_cq;
3622 
3623         sq_cq = qp->qp_sq_cqhdl;
3624         rq_cq = qp->qp_rq_cqhdl;
3625 
3626 _NOTE(LOCK_RELEASED_AS_SIDE_EFFECT(&rq_cq->cq_wrid_wqhdr_lock))
3627 _NOTE(LOCK_RELEASED_AS_SIDE_EFFECT(&sq_cq->cq_wrid_wqhdr_lock))
3628 
3629         /*
3630          * See tavor_wrid_wqhdr_lock_both() above for more detail
3631          */
3632         if (sq_cq == rq_cq) {
3633 #ifdef  __lock_lint
3634                 mutex_exit(&rq_cq->cq_wrid_wqhdr_lock);
3635 #endif
3636                 mutex_exit(&sq_cq->cq_wrid_wqhdr_lock);
3637         } else {
3638                 mutex_exit(&rq_cq->cq_wrid_wqhdr_lock);
3639                 mutex_exit(&sq_cq->cq_wrid_wqhdr_lock);
3640         }
3641 }
3642 
3643 
3644 /*
3645  * tavor_cq_wqhdr_add()
3646  *    Context: Can be called from interrupt or base context.
3647  */
3648 static void
3649 tavor_cq_wqhdr_add(tavor_cqhdl_t cq, tavor_workq_hdr_t *wqhdr)
3650 {
3651         tavor_workq_compare_t   cmp;
3652         avl_index_t             where;
3653 
3654         ASSERT(MUTEX_HELD(&cq->cq_wrid_wqhdr_lock));
3655 
3656         cmp.cmp_qpn = wqhdr->wq_qpn;
3657         cmp.cmp_type = wqhdr->wq_type;
3658 #ifdef __lock_lint
3659         tavor_wrid_wqhdr_compare(NULL, NULL);
3660 #endif
3661         (void) avl_find(&cq->cq_wrid_wqhdr_avl_tree, &cmp, &where);
3662         /*
3663          * If the CQ's work queue list is empty, then just add it.
3664          * Otherwise, chain it to the beginning of the list.
3665          */
3666         avl_insert(&cq->cq_wrid_wqhdr_avl_tree, wqhdr, where);
3667 }
3668 
3669 
3670 /*
3671  * tavor_cq_wqhdr_remove()
3672  *    Context: Can be called from interrupt or base context.
3673  */
3674 static void
3675 tavor_cq_wqhdr_remove(tavor_cqhdl_t cq, tavor_workq_hdr_t *wqhdr)
3676 {
3677         ASSERT(MUTEX_HELD(&cq->cq_wrid_wqhdr_lock));
3678 
3679 #ifdef __lock_lint
3680         tavor_wrid_wqhdr_compare(NULL, NULL);
3681 #endif
3682         /* Remove "wqhdr" from the work queue header list on "cq" */
3683         avl_remove(&cq->cq_wrid_wqhdr_avl_tree, wqhdr);
3684 
3685         /*
3686          * Release reference to WQL; If this is the last reference, this call
3687          * also has the side effect of freeing up the 'wq_wrid_wql' memory.
3688          */
3689         tavor_wql_refcnt_dec(wqhdr->wq_wrid_wql);
3690 
3691         /* Free the memory associated with "wqhdr" */
3692         kmem_free(wqhdr, sizeof (tavor_workq_hdr_t));
3693 }
3694 
3695 
3696 /*
3697  * tavor_wql_refcnt_inc()
3698  * Context: Can be called from interrupt or base context
3699  */
3700 void
3701 tavor_wql_refcnt_inc(tavor_wq_lock_t *wql)




2344         tavor_wrid_wqhdr_lock_both(qp);
2345         swq = tavor_wrid_wqhdr_find(qp->qp_sq_cqhdl, qp->qp_qpnum,
2346             TAVOR_WR_SEND);
2347         if (swq == NULL) {
2348                 /* Couldn't find matching work queue header, create it */
2349                 create_new_swq = create_wql = 1;
2350                 swq = tavor_wrid_wqhdr_create(state, qp->qp_sq_cqhdl,
2351                     qp->qp_qpnum, TAVOR_WR_SEND, create_wql);
2352                 if (swq == NULL) {
2353                         /*
2354                          * If we couldn't find/allocate space for the workq
2355                          * header, then drop the lock(s) and return failure.
2356                          */
2357                         tavor_wrid_wqhdr_unlock_both(qp);
2358                         TNF_PROBE_0(tavor_wrid_from_reset_handling_wqhdr_fail,
2359                             TAVOR_TNF_ERROR, "");
2360                         TAVOR_TNF_EXIT(tavor_wrid_from_reset_handling);
2361                         return (ibc_get_ci_failure(0));
2362                 }
2363         }

2364         qp->qp_sq_wqhdr = swq;
2365         swq->wq_size = qp->qp_sq_bufsz;
2366         swq->wq_head = 0;
2367         swq->wq_tail = 0;
2368         swq->wq_full = 0;
2369 
2370         /*
2371          * Allocate space for the tavor_wrid_entry_t container
2372          */
2373         s_wridlist = tavor_wrid_get_list(swq->wq_size);
2374         if (s_wridlist == NULL) {
2375                 /*
2376                  * If we couldn't allocate space for tracking the WRID
2377                  * entries, then cleanup the workq header from above (if
2378                  * necessary, i.e. if we created the workq header).  Then
2379                  * drop the lock(s) and return failure.
2380                  */
2381                 if (create_new_swq) {
2382                         tavor_cq_wqhdr_remove(qp->qp_sq_cqhdl, swq);
2383                 }
2384 
2385                 tavor_wrid_wqhdr_unlock_both(qp);
2386                 TNF_PROBE_0(tavor_wrid_from_reset_handling_wridlist_fail,
2387                     TAVOR_TNF_ERROR, "");
2388                 TAVOR_TNF_EXIT(tavor_wrid_from_reset_handling);
2389                 return (ibc_get_ci_failure(0));
2390         }

2391         s_wridlist->wl_wqhdr = swq;
2392 
2393         /* Chain the new WRID list container to the workq hdr list */
2394         mutex_enter(&swq->wq_wrid_wql->wql_lock);
2395         tavor_wrid_wqhdr_add(swq, s_wridlist);
2396         mutex_exit(&swq->wq_wrid_wql->wql_lock);
2397 
2398         qp_srq_en = qp->qp_srq_en;
2399 



2400         if (qp_srq_en == TAVOR_QP_SRQ_ENABLED) {
2401                 mutex_enter(&qp->qp_srqhdl->srq_lock);
2402         }
2403 
2404         /*
2405          * Now we repeat all the above operations for the receive work queue,
2406          * or shared receive work queue.
2407          *
2408          * Note: We still use the 'qp_rq_cqhdl' even in the SRQ case.
2409          */
2410         rwq = tavor_wrid_wqhdr_find(qp->qp_rq_cqhdl, qp->qp_qpnum,
2411             TAVOR_WR_RECV);
2412         if (rwq == NULL) {
2413                 create_new_rwq = create_wql = 1;
2414 
2415                 /*
2416                  * If this QP is associated with an SRQ, and this isn't the
2417                  * first QP on the SRQ, then the 'srq_wrid_wql' will already be
2418                  * created.  Since the WQL is created at 'wqhdr_create' time we
2419                  * pass in the flag 'create_wql' here to be 0 if we have
2420                  * already created it.  And later on below we then next setup
2421                  * the WQL and rwq information based off the existing SRQ info.
2422                  */
2423                 if (qp_srq_en == TAVOR_QP_SRQ_ENABLED &&


2425                         create_wql = 0;
2426                 }
2427 
2428                 rwq = tavor_wrid_wqhdr_create(state, qp->qp_rq_cqhdl,
2429                     qp->qp_qpnum, TAVOR_WR_RECV, create_wql);
2430                 if (rwq == NULL) {
2431                         /*
2432                          * If we couldn't find/allocate space for the workq
2433                          * header, then free all the send queue resources we
2434                          * just allocated and setup (above), drop the lock(s)
2435                          * and return failure.
2436                          */
2437                         mutex_enter(&swq->wq_wrid_wql->wql_lock);
2438                         tavor_wrid_wqhdr_remove(swq, s_wridlist);
2439                         mutex_exit(&swq->wq_wrid_wql->wql_lock);
2440                         if (create_new_swq) {
2441                                 tavor_cq_wqhdr_remove(qp->qp_sq_cqhdl,
2442                                     swq);
2443                         }
2444 



2445                         if (qp_srq_en == TAVOR_QP_SRQ_ENABLED) {
2446                                 mutex_exit(&qp->qp_srqhdl->srq_lock);
2447                         }

2448 
2449                         tavor_wrid_wqhdr_unlock_both(qp);
2450                         TNF_PROBE_0(tavor_wrid_from_reset_handling_wqhdr_fail,
2451                             TAVOR_TNF_ERROR, "");
2452                         TAVOR_TNF_EXIT(tavor_wrid_from_reset_handling);
2453                         return (ibc_get_ci_failure(0));
2454                 }
2455         }

2456 
2457         /*
2458          * Setup receive workq hdr
2459          *
2460          * If the QP is on an SRQ, we setup the SRQ specific fields, setting
2461          * keeping a copy of the rwq pointer, setting the rwq bufsize
2462          * appropriately, and initializing our part of the WQLock.
2463          *
2464          * In the normal QP case, the QP recv queue bufsize is used.
2465          */
2466         if (qp_srq_en == TAVOR_QP_SRQ_ENABLED) {
2467                 rwq->wq_size = qp->qp_srqhdl->srq_wq_bufsz;
2468                 if (qp->qp_srqhdl->srq_wrid_wql == NULL) {
2469                         qp->qp_srqhdl->srq_wrid_wql = rwq->wq_wrid_wql;
2470                 } else {
2471                         rwq->wq_wrid_wql = qp->qp_srqhdl->srq_wrid_wql;
2472                 }
2473                 tavor_wql_refcnt_inc(qp->qp_srqhdl->srq_wrid_wql);
2474 
2475         } else {


2504          * If the memory allocation failed for r_wridlist (or the SRQ pointer
2505          * is mistakenly NULL), we cleanup our previous swq allocation from
2506          * above
2507          */
2508         if (r_wridlist == NULL) {
2509                 /*
2510                  * If we couldn't allocate space for tracking the WRID
2511                  * entries, then cleanup all the stuff from above.  Then
2512                  * drop the lock(s) and return failure.
2513                  */
2514                 mutex_enter(&swq->wq_wrid_wql->wql_lock);
2515                 tavor_wrid_wqhdr_remove(swq, s_wridlist);
2516                 mutex_exit(&swq->wq_wrid_wql->wql_lock);
2517                 if (create_new_swq) {
2518                         tavor_cq_wqhdr_remove(qp->qp_sq_cqhdl, swq);
2519                 }
2520                 if (create_new_rwq) {
2521                         tavor_cq_wqhdr_remove(qp->qp_rq_cqhdl, rwq);
2522                 }
2523 



2524                 if (qp_srq_en == TAVOR_QP_SRQ_ENABLED) {
2525                         mutex_exit(&qp->qp_srqhdl->srq_lock);
2526                 }

2527 
2528                 tavor_wrid_wqhdr_unlock_both(qp);
2529                 TNF_PROBE_0(tavor_wrid_from_reset_handling_wridlist_fail,
2530                     TAVOR_TNF_ERROR, "");
2531                 TAVOR_TNF_EXIT(tavor_wrid_from_reset_handling);
2532                 return (ibc_get_ci_failure(0));
2533         }

2534 
2535         /*
2536          * Initialize the wridlist
2537          *
2538          * In the normal QP case, there is no special initialization needed.
2539          * We simply setup the wridlist backpointer to be the receive wqhdr
2540          * (rwq).
2541          *
2542          * But in the SRQ case, there is no backpointer to the wqhdr possible.
2543          * Instead we set 'wl_srq_en', specifying this wridlist is on an SRQ
2544          * and thus potentially shared across multiple QPs with the SRQ.  We
2545          * also setup the srq_wridlist pointer to be the r_wridlist, and
2546          * intialize the freelist to an invalid index.  This srq_wridlist
2547          * pointer is used above on future moves from_reset to let us know that
2548          * the srq_wridlist has been initialized already.
2549          *
2550          * And finally, if we are in a non-UMAP case, we setup the srq wrid
2551          * free list.
2552          */
2553         if (qp_srq_en == TAVOR_QP_SRQ_ENABLED &&
2554             qp->qp_srqhdl->srq_wridlist == NULL) {
2555                 r_wridlist->wl_srq_en = 1;
2556                 r_wridlist->wl_free_list_indx = -1;
2557                 qp->qp_srqhdl->srq_wridlist = r_wridlist;
2558 
2559                 /* Initialize srq wrid free list */
2560                 if (qp->qp_srqhdl->srq_is_umap == 0) {
2561                         mutex_enter(&rwq->wq_wrid_wql->wql_lock);
2562                         tavor_wrid_list_srq_init(r_wridlist, qp->qp_srqhdl, 0);
2563                         mutex_exit(&rwq->wq_wrid_wql->wql_lock);
2564                 }
2565         } else {
2566                 r_wridlist->wl_wqhdr = rwq;
2567         }
2568 
2569         /* Chain the WRID list "container" to the workq hdr list */
2570         mutex_enter(&rwq->wq_wrid_wql->wql_lock);
2571         tavor_wrid_wqhdr_add(rwq, r_wridlist);
2572         mutex_exit(&rwq->wq_wrid_wql->wql_lock);
2573 



2574         if (qp_srq_en == TAVOR_QP_SRQ_ENABLED) {
2575                 mutex_exit(&qp->qp_srqhdl->srq_lock);
2576         }

2577 





2578         tavor_wrid_wqhdr_unlock_both(qp);
2579         TAVOR_TNF_EXIT(tavor_wrid_from_reset_handling);
2580         return (DDI_SUCCESS);
2581 }
2582 
2583 
2584 /*
2585  * tavor_wrid_to_reset_handling()
2586  *    Context: Can be called from interrupt or base context.
2587  */
2588 void
2589 tavor_wrid_to_reset_handling(tavor_state_t *state, tavor_qphdl_t qp)
2590 {
2591         uint_t          free_wqhdr = 0;
2592 
2593         TAVOR_TNF_ENTER(tavor_wrid_to_reset_handling);
2594 
2595         /*
2596          * For each of this QP's Work Queues, move the WRID "container" to
2597          * the "reapable" list.  Although there may still be unpolled


3082         /*
3083          * The first step is to walk the "reapable" list and free up those
3084          * containers.  This is necessary because the containers on the
3085          * reapable list are not otherwise connected to the work queue headers
3086          * anymore.
3087          */
3088         tavor_wrid_cq_reap(cq);
3089 
3090         /* Now lock the list of work queues associated with this CQ */
3091         mutex_enter(&cq->cq_wrid_wqhdr_lock);
3092 
3093         /*
3094          * Walk the list of work queue headers and free up all the WRID list
3095          * containers chained to it.  Note: We don't need to grab the locks
3096          * for each of the individual WRID lists here because the only way
3097          * things can be added or removed from the list at this point would be
3098          * through post a work request to a QP.  But if we've come this far,
3099          * then we can be assured that there are no longer any QP associated
3100          * with the CQ that we are trying to free.
3101          */



3102         treep = &cq->cq_wrid_wqhdr_avl_tree;
3103         while ((curr = avl_destroy_nodes(treep, &cookie)) != NULL) {

3104                 container = curr->wq_wrid_poll;
3105                 while (container != NULL) {
3106                         to_free   = container;
3107                         container = container->wl_next;
3108                         /*
3109                          * If reaping the WRID list containers pulls the last
3110                          * container from the given work queue header, then
3111                          * we free the work queue header as well.  Note: we
3112                          * ignore the return value because we know that the
3113                          * work queue header should always be freed once the
3114                          * list of containers has come to an end.
3115                          */
3116                         (void) tavor_wrid_list_reap(to_free);
3117                         if (container == NULL) {
3118                                 tavor_cq_wqhdr_remove(cq, curr);
3119                         }
3120                 }
3121         }
3122         avl_destroy(treep);
3123 


3140          * The WRID list "container" consists of the tavor_wrid_list_hdr_t,
3141          * which holds the pointers necessary for maintaining the "reapable"
3142          * list, chaining together multiple "containers" old and new, and
3143          * tracking the head, tail, size, etc. for each container.
3144          *
3145          * The "container" also holds all the tavor_wrid_entry_t's, which is
3146          * allocated separately, one for each entry on the corresponding work
3147          * queue.
3148          */
3149         size = sizeof (tavor_wrid_list_hdr_t);
3150 
3151         /*
3152          * Note that this allocation has to be a NOSLEEP operation here
3153          * because we are holding the "wqhdr_list_lock" and, therefore,
3154          * could get raised to the interrupt level.
3155          */
3156         wridlist = (tavor_wrid_list_hdr_t *)kmem_zalloc(size, KM_NOSLEEP);
3157         if (wridlist == NULL) {
3158                 return (NULL);
3159         }

3160 
3161         /* Complete the "container" initialization */
3162         wridlist->wl_size = qsize;
3163         wridlist->wl_full = 0;
3164         wridlist->wl_head = 0;
3165         wridlist->wl_tail = 0;
3166         wridlist->wl_wre = (tavor_wrid_entry_t *)kmem_zalloc(qsize *
3167             sizeof (tavor_wrid_entry_t), KM_NOSLEEP);
3168         if (wridlist->wl_wre == NULL) {
3169                 kmem_free(wridlist, size);
3170                 return (NULL);
3171         }
3172         wridlist->wl_wre_old_tail  = NULL;
3173         wridlist->wl_reap_next = NULL;
3174         wridlist->wl_next  = NULL;
3175         wridlist->wl_prev  = NULL;
3176         wridlist->wl_srq_en = 0;
3177 
3178         return (wridlist);
3179 }


3281  *    Context: Can be called from interrupt or base context.
3282  */
3283 static tavor_workq_hdr_t *
3284 tavor_wrid_wqhdr_find(tavor_cqhdl_t cq, uint_t qpn, uint_t wq_type)
3285 {
3286         tavor_workq_hdr_t       *curr;
3287         tavor_workq_compare_t   cmp;
3288 
3289         TAVOR_TNF_ENTER(tavor_wrid_wqhdr_find);
3290 
3291         ASSERT(MUTEX_HELD(&cq->cq_wrid_wqhdr_lock));
3292 
3293         /*
3294          * Walk the CQ's work queue list, trying to find a send or recv queue
3295          * with the same QP number.  We do this even if we are going to later
3296          * create a new entry because it helps us easily find the end of the
3297          * list.
3298          */
3299         cmp.cmp_qpn = qpn;
3300         cmp.cmp_type = wq_type;



3301         curr = avl_find(&cq->cq_wrid_wqhdr_avl_tree, &cmp, NULL);
3302 
3303         TAVOR_TNF_EXIT(tavor_wrid_wqhdr_find);
3304         return (curr);
3305 }
3306 
3307 
3308 /*
3309  * tavor_wrid_wqhdr_create()
3310  *    Context: Can be called from interrupt or base context.
3311  */
3312 static tavor_workq_hdr_t *
3313 tavor_wrid_wqhdr_create(tavor_state_t *state, tavor_cqhdl_t cq, uint_t qpn,
3314     uint_t wq_type, uint_t create_wql)
3315 {
3316         tavor_workq_hdr_t       *wqhdr_tmp;
3317 
3318         TAVOR_TNF_ENTER(tavor_wrid_wqhdr_create);
3319 
3320         ASSERT(MUTEX_HELD(&cq->cq_wrid_wqhdr_lock));
3321 
3322         /*
3323          * Allocate space a work queue header structure and initialize it.
3324          * Each work queue header structure includes a "wq_wrid_wql"
3325          * which needs to be initialized.  Note that this allocation has to be
3326          * a NOSLEEP operation because we are holding the "cq_wrid_wqhdr_lock"
3327          * and, therefore, could get raised to the interrupt level.
3328          */
3329         wqhdr_tmp = (tavor_workq_hdr_t *)kmem_zalloc(
3330             sizeof (tavor_workq_hdr_t), KM_NOSLEEP);
3331         if (wqhdr_tmp == NULL) {
3332                 TAVOR_TNF_EXIT(tavor_wrid_wqhdr_create);
3333                 return (NULL);
3334         }

3335         wqhdr_tmp->wq_qpn    = qpn;
3336         wqhdr_tmp->wq_type   = wq_type;
3337 
3338         if (create_wql) {
3339                 wqhdr_tmp->wq_wrid_wql = tavor_wrid_wql_create(state);
3340                 if (wqhdr_tmp->wq_wrid_wql == NULL) {
3341                         kmem_free(wqhdr_tmp, sizeof (tavor_workq_hdr_t));
3342                         TAVOR_TNF_EXIT(tavor_wrid_wqhdr_create);
3343                         return (NULL);
3344                 }
3345         }
3346 
3347         wqhdr_tmp->wq_wrid_poll = NULL;
3348         wqhdr_tmp->wq_wrid_post = NULL;
3349 
3350         /* Chain the newly allocated work queue header to the CQ's list */
3351         tavor_cq_wqhdr_add(cq, wqhdr_tmp);
3352 
3353         TAVOR_TNF_EXIT(tavor_wrid_wqhdr_create);
3354         return (wqhdr_tmp);


3537 
3538         mutex_exit(&wqhdr->wq_wrid_wql->wql_lock);
3539 
3540         TAVOR_TNF_EXIT(tavor_wrid_list_reap);
3541         return (consume_wqhdr);
3542 }
3543 
3544 
3545 /*
3546  * tavor_wrid_wqhdr_lock_both()
3547  *    Context: Can be called from interrupt or base context.
3548  */
3549 static void
3550 tavor_wrid_wqhdr_lock_both(tavor_qphdl_t qp)
3551 {
3552         tavor_cqhdl_t   sq_cq, rq_cq;
3553 
3554         sq_cq = qp->qp_sq_cqhdl;
3555         rq_cq = qp->qp_rq_cqhdl;
3556 



3557         /*
3558          * If both work queues (send and recv) share a completion queue, then
3559          * grab the common lock.  If they use different CQs (hence different
3560          * "cq_wrid_wqhdr_list" locks), then grab the send one first, then the
3561          * receive.  We do this consistently and correctly in
3562          * tavor_wrid_wqhdr_unlock_both() below to avoid introducing any kind
3563          * of dead lock condition.


3564          */
3565         if (sq_cq == rq_cq) {
3566                 mutex_enter(&sq_cq->cq_wrid_wqhdr_lock);



3567         } else {
3568                 mutex_enter(&sq_cq->cq_wrid_wqhdr_lock);
3569                 mutex_enter(&rq_cq->cq_wrid_wqhdr_lock);
3570         }
3571 }
3572 
3573 /*
3574  * tavor_wrid_wqhdr_unlock_both()
3575  *    Context: Can be called from interrupt or base context.
3576  */
3577 static void
3578 tavor_wrid_wqhdr_unlock_both(tavor_qphdl_t qp)
3579 {
3580         tavor_cqhdl_t   sq_cq, rq_cq;
3581 
3582         sq_cq = qp->qp_sq_cqhdl;
3583         rq_cq = qp->qp_rq_cqhdl;
3584 



3585         /*
3586          * See tavor_wrid_wqhdr_lock_both() above for more detail
3587          */
3588         if (sq_cq == rq_cq) {



3589                 mutex_exit(&sq_cq->cq_wrid_wqhdr_lock);
3590         } else {
3591                 mutex_exit(&rq_cq->cq_wrid_wqhdr_lock);
3592                 mutex_exit(&sq_cq->cq_wrid_wqhdr_lock);
3593         }
3594 }
3595 
3596 
3597 /*
3598  * tavor_cq_wqhdr_add()
3599  *    Context: Can be called from interrupt or base context.
3600  */
3601 static void
3602 tavor_cq_wqhdr_add(tavor_cqhdl_t cq, tavor_workq_hdr_t *wqhdr)
3603 {
3604         tavor_workq_compare_t   cmp;
3605         avl_index_t             where;
3606 
3607         ASSERT(MUTEX_HELD(&cq->cq_wrid_wqhdr_lock));
3608 
3609         cmp.cmp_qpn = wqhdr->wq_qpn;
3610         cmp.cmp_type = wqhdr->wq_type;



3611         (void) avl_find(&cq->cq_wrid_wqhdr_avl_tree, &cmp, &where);
3612         /*
3613          * If the CQ's work queue list is empty, then just add it.
3614          * Otherwise, chain it to the beginning of the list.
3615          */
3616         avl_insert(&cq->cq_wrid_wqhdr_avl_tree, wqhdr, where);
3617 }
3618 
3619 
3620 /*
3621  * tavor_cq_wqhdr_remove()
3622  *    Context: Can be called from interrupt or base context.
3623  */
3624 static void
3625 tavor_cq_wqhdr_remove(tavor_cqhdl_t cq, tavor_workq_hdr_t *wqhdr)
3626 {
3627         ASSERT(MUTEX_HELD(&cq->cq_wrid_wqhdr_lock));
3628 



3629         /* Remove "wqhdr" from the work queue header list on "cq" */
3630         avl_remove(&cq->cq_wrid_wqhdr_avl_tree, wqhdr);
3631 
3632         /*
3633          * Release reference to WQL; If this is the last reference, this call
3634          * also has the side effect of freeing up the 'wq_wrid_wql' memory.
3635          */
3636         tavor_wql_refcnt_dec(wqhdr->wq_wrid_wql);
3637 
3638         /* Free the memory associated with "wqhdr" */
3639         kmem_free(wqhdr, sizeof (tavor_workq_hdr_t));
3640 }
3641 
3642 
3643 /*
3644  * tavor_wql_refcnt_inc()
3645  * Context: Can be called from interrupt or base context
3646  */
3647 void
3648 tavor_wql_refcnt_inc(tavor_wq_lock_t *wql)