2344 tavor_wrid_wqhdr_lock_both(qp);
2345 swq = tavor_wrid_wqhdr_find(qp->qp_sq_cqhdl, qp->qp_qpnum,
2346 TAVOR_WR_SEND);
2347 if (swq == NULL) {
2348 /* Couldn't find matching work queue header, create it */
2349 create_new_swq = create_wql = 1;
2350 swq = tavor_wrid_wqhdr_create(state, qp->qp_sq_cqhdl,
2351 qp->qp_qpnum, TAVOR_WR_SEND, create_wql);
2352 if (swq == NULL) {
2353 /*
2354 * If we couldn't find/allocate space for the workq
2355 * header, then drop the lock(s) and return failure.
2356 */
2357 tavor_wrid_wqhdr_unlock_both(qp);
2358 TNF_PROBE_0(tavor_wrid_from_reset_handling_wqhdr_fail,
2359 TAVOR_TNF_ERROR, "");
2360 TAVOR_TNF_EXIT(tavor_wrid_from_reset_handling);
2361 return (ibc_get_ci_failure(0));
2362 }
2363 }
2364 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*swq))
2365 qp->qp_sq_wqhdr = swq;
2366 swq->wq_size = qp->qp_sq_bufsz;
2367 swq->wq_head = 0;
2368 swq->wq_tail = 0;
2369 swq->wq_full = 0;
2370
2371 /*
2372 * Allocate space for the tavor_wrid_entry_t container
2373 */
2374 s_wridlist = tavor_wrid_get_list(swq->wq_size);
2375 if (s_wridlist == NULL) {
2376 /*
2377 * If we couldn't allocate space for tracking the WRID
2378 * entries, then cleanup the workq header from above (if
2379 * necessary, i.e. if we created the workq header). Then
2380 * drop the lock(s) and return failure.
2381 */
2382 if (create_new_swq) {
2383 tavor_cq_wqhdr_remove(qp->qp_sq_cqhdl, swq);
2384 }
2385
2386 tavor_wrid_wqhdr_unlock_both(qp);
2387 TNF_PROBE_0(tavor_wrid_from_reset_handling_wridlist_fail,
2388 TAVOR_TNF_ERROR, "");
2389 TAVOR_TNF_EXIT(tavor_wrid_from_reset_handling);
2390 return (ibc_get_ci_failure(0));
2391 }
2392 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*s_wridlist))
2393 s_wridlist->wl_wqhdr = swq;
2394
2395 /* Chain the new WRID list container to the workq hdr list */
2396 mutex_enter(&swq->wq_wrid_wql->wql_lock);
2397 tavor_wrid_wqhdr_add(swq, s_wridlist);
2398 mutex_exit(&swq->wq_wrid_wql->wql_lock);
2399
2400 qp_srq_en = qp->qp_srq_en;
2401
2402 #ifdef __lock_lint
2403 mutex_enter(&qp->qp_srqhdl->srq_lock);
2404 #else
2405 if (qp_srq_en == TAVOR_QP_SRQ_ENABLED) {
2406 mutex_enter(&qp->qp_srqhdl->srq_lock);
2407 }
2408 #endif
2409 /*
2410 * Now we repeat all the above operations for the receive work queue,
2411 * or shared receive work queue.
2412 *
2413 * Note: We still use the 'qp_rq_cqhdl' even in the SRQ case.
2414 */
2415 rwq = tavor_wrid_wqhdr_find(qp->qp_rq_cqhdl, qp->qp_qpnum,
2416 TAVOR_WR_RECV);
2417 if (rwq == NULL) {
2418 create_new_rwq = create_wql = 1;
2419
2420 /*
2421 * If this QP is associated with an SRQ, and this isn't the
2422 * first QP on the SRQ, then the 'srq_wrid_wql' will already be
2423 * created. Since the WQL is created at 'wqhdr_create' time we
2424 * pass in the flag 'create_wql' here to be 0 if we have
2425 * already created it. And later on below we then next setup
2426 * the WQL and rwq information based off the existing SRQ info.
2427 */
2428 if (qp_srq_en == TAVOR_QP_SRQ_ENABLED &&
2430 create_wql = 0;
2431 }
2432
2433 rwq = tavor_wrid_wqhdr_create(state, qp->qp_rq_cqhdl,
2434 qp->qp_qpnum, TAVOR_WR_RECV, create_wql);
2435 if (rwq == NULL) {
2436 /*
2437 * If we couldn't find/allocate space for the workq
2438 * header, then free all the send queue resources we
2439 * just allocated and setup (above), drop the lock(s)
2440 * and return failure.
2441 */
2442 mutex_enter(&swq->wq_wrid_wql->wql_lock);
2443 tavor_wrid_wqhdr_remove(swq, s_wridlist);
2444 mutex_exit(&swq->wq_wrid_wql->wql_lock);
2445 if (create_new_swq) {
2446 tavor_cq_wqhdr_remove(qp->qp_sq_cqhdl,
2447 swq);
2448 }
2449
2450 #ifdef __lock_lint
2451 mutex_exit(&qp->qp_srqhdl->srq_lock);
2452 #else
2453 if (qp_srq_en == TAVOR_QP_SRQ_ENABLED) {
2454 mutex_exit(&qp->qp_srqhdl->srq_lock);
2455 }
2456 #endif
2457
2458 tavor_wrid_wqhdr_unlock_both(qp);
2459 TNF_PROBE_0(tavor_wrid_from_reset_handling_wqhdr_fail,
2460 TAVOR_TNF_ERROR, "");
2461 TAVOR_TNF_EXIT(tavor_wrid_from_reset_handling);
2462 return (ibc_get_ci_failure(0));
2463 }
2464 }
2465 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*rwq))
2466
2467 /*
2468 * Setup receive workq hdr
2469 *
2470 * If the QP is on an SRQ, we setup the SRQ specific fields, setting
2471 * keeping a copy of the rwq pointer, setting the rwq bufsize
2472 * appropriately, and initializing our part of the WQLock.
2473 *
2474 * In the normal QP case, the QP recv queue bufsize is used.
2475 */
2476 if (qp_srq_en == TAVOR_QP_SRQ_ENABLED) {
2477 rwq->wq_size = qp->qp_srqhdl->srq_wq_bufsz;
2478 if (qp->qp_srqhdl->srq_wrid_wql == NULL) {
2479 qp->qp_srqhdl->srq_wrid_wql = rwq->wq_wrid_wql;
2480 } else {
2481 rwq->wq_wrid_wql = qp->qp_srqhdl->srq_wrid_wql;
2482 }
2483 tavor_wql_refcnt_inc(qp->qp_srqhdl->srq_wrid_wql);
2484
2485 } else {
2514 * If the memory allocation failed for r_wridlist (or the SRQ pointer
2515 * is mistakenly NULL), we cleanup our previous swq allocation from
2516 * above
2517 */
2518 if (r_wridlist == NULL) {
2519 /*
2520 * If we couldn't allocate space for tracking the WRID
2521 * entries, then cleanup all the stuff from above. Then
2522 * drop the lock(s) and return failure.
2523 */
2524 mutex_enter(&swq->wq_wrid_wql->wql_lock);
2525 tavor_wrid_wqhdr_remove(swq, s_wridlist);
2526 mutex_exit(&swq->wq_wrid_wql->wql_lock);
2527 if (create_new_swq) {
2528 tavor_cq_wqhdr_remove(qp->qp_sq_cqhdl, swq);
2529 }
2530 if (create_new_rwq) {
2531 tavor_cq_wqhdr_remove(qp->qp_rq_cqhdl, rwq);
2532 }
2533
2534 #ifdef __lock_lint
2535 mutex_exit(&qp->qp_srqhdl->srq_lock);
2536 #else
2537 if (qp_srq_en == TAVOR_QP_SRQ_ENABLED) {
2538 mutex_exit(&qp->qp_srqhdl->srq_lock);
2539 }
2540 #endif
2541
2542 tavor_wrid_wqhdr_unlock_both(qp);
2543 TNF_PROBE_0(tavor_wrid_from_reset_handling_wridlist_fail,
2544 TAVOR_TNF_ERROR, "");
2545 TAVOR_TNF_EXIT(tavor_wrid_from_reset_handling);
2546 return (ibc_get_ci_failure(0));
2547 }
2548 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*r_wridlist))
2549
2550 /*
2551 * Initialize the wridlist
2552 *
2553 * In the normal QP case, there is no special initialization needed.
2554 * We simply setup the wridlist backpointer to be the receive wqhdr
2555 * (rwq).
2556 *
2557 * But in the SRQ case, there is no backpointer to the wqhdr possible.
2558 * Instead we set 'wl_srq_en', specifying this wridlist is on an SRQ
2559 * and thus potentially shared across multiple QPs with the SRQ. We
2560 * also setup the srq_wridlist pointer to be the r_wridlist, and
2561 * intialize the freelist to an invalid index. This srq_wridlist
2562 * pointer is used above on future moves from_reset to let us know that
2563 * the srq_wridlist has been initialized already.
2564 *
2565 * And finally, if we are in a non-UMAP case, we setup the srq wrid
2566 * free list.
2567 */
2568 if (qp_srq_en == TAVOR_QP_SRQ_ENABLED &&
2569 qp->qp_srqhdl->srq_wridlist == NULL) {
2570 r_wridlist->wl_srq_en = 1;
2571 r_wridlist->wl_free_list_indx = -1;
2572 qp->qp_srqhdl->srq_wridlist = r_wridlist;
2573
2574 /* Initialize srq wrid free list */
2575 if (qp->qp_srqhdl->srq_is_umap == 0) {
2576 mutex_enter(&rwq->wq_wrid_wql->wql_lock);
2577 tavor_wrid_list_srq_init(r_wridlist, qp->qp_srqhdl, 0);
2578 mutex_exit(&rwq->wq_wrid_wql->wql_lock);
2579 }
2580 } else {
2581 r_wridlist->wl_wqhdr = rwq;
2582 }
2583
2584 /* Chain the WRID list "container" to the workq hdr list */
2585 mutex_enter(&rwq->wq_wrid_wql->wql_lock);
2586 tavor_wrid_wqhdr_add(rwq, r_wridlist);
2587 mutex_exit(&rwq->wq_wrid_wql->wql_lock);
2588
2589 #ifdef __lock_lint
2590 mutex_exit(&qp->qp_srqhdl->srq_lock);
2591 #else
2592 if (qp_srq_en == TAVOR_QP_SRQ_ENABLED) {
2593 mutex_exit(&qp->qp_srqhdl->srq_lock);
2594 }
2595 #endif
2596
2597 _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*r_wridlist))
2598 _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*rwq))
2599 _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*s_wridlist))
2600 _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*swq))
2601
2602 tavor_wrid_wqhdr_unlock_both(qp);
2603 TAVOR_TNF_EXIT(tavor_wrid_from_reset_handling);
2604 return (DDI_SUCCESS);
2605 }
2606
2607
2608 /*
2609 * tavor_wrid_to_reset_handling()
2610 * Context: Can be called from interrupt or base context.
2611 */
2612 void
2613 tavor_wrid_to_reset_handling(tavor_state_t *state, tavor_qphdl_t qp)
2614 {
2615 uint_t free_wqhdr = 0;
2616
2617 TAVOR_TNF_ENTER(tavor_wrid_to_reset_handling);
2618
2619 /*
2620 * For each of this QP's Work Queues, move the WRID "container" to
2621 * the "reapable" list. Although there may still be unpolled
3106 /*
3107 * The first step is to walk the "reapable" list and free up those
3108 * containers. This is necessary because the containers on the
3109 * reapable list are not otherwise connected to the work queue headers
3110 * anymore.
3111 */
3112 tavor_wrid_cq_reap(cq);
3113
3114 /* Now lock the list of work queues associated with this CQ */
3115 mutex_enter(&cq->cq_wrid_wqhdr_lock);
3116
3117 /*
3118 * Walk the list of work queue headers and free up all the WRID list
3119 * containers chained to it. Note: We don't need to grab the locks
3120 * for each of the individual WRID lists here because the only way
3121 * things can be added or removed from the list at this point would be
3122 * through post a work request to a QP. But if we've come this far,
3123 * then we can be assured that there are no longer any QP associated
3124 * with the CQ that we are trying to free.
3125 */
3126 #ifdef __lock_lint
3127 tavor_wrid_wqhdr_compare(NULL, NULL);
3128 #endif
3129 treep = &cq->cq_wrid_wqhdr_avl_tree;
3130 while ((curr = avl_destroy_nodes(treep, &cookie)) != NULL) {
3131 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*curr))
3132 container = curr->wq_wrid_poll;
3133 while (container != NULL) {
3134 to_free = container;
3135 container = container->wl_next;
3136 /*
3137 * If reaping the WRID list containers pulls the last
3138 * container from the given work queue header, then
3139 * we free the work queue header as well. Note: we
3140 * ignore the return value because we know that the
3141 * work queue header should always be freed once the
3142 * list of containers has come to an end.
3143 */
3144 (void) tavor_wrid_list_reap(to_free);
3145 if (container == NULL) {
3146 tavor_cq_wqhdr_remove(cq, curr);
3147 }
3148 }
3149 }
3150 avl_destroy(treep);
3151
3168 * The WRID list "container" consists of the tavor_wrid_list_hdr_t,
3169 * which holds the pointers necessary for maintaining the "reapable"
3170 * list, chaining together multiple "containers" old and new, and
3171 * tracking the head, tail, size, etc. for each container.
3172 *
3173 * The "container" also holds all the tavor_wrid_entry_t's, which is
3174 * allocated separately, one for each entry on the corresponding work
3175 * queue.
3176 */
3177 size = sizeof (tavor_wrid_list_hdr_t);
3178
3179 /*
3180 * Note that this allocation has to be a NOSLEEP operation here
3181 * because we are holding the "wqhdr_list_lock" and, therefore,
3182 * could get raised to the interrupt level.
3183 */
3184 wridlist = (tavor_wrid_list_hdr_t *)kmem_zalloc(size, KM_NOSLEEP);
3185 if (wridlist == NULL) {
3186 return (NULL);
3187 }
3188 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*wridlist))
3189
3190 /* Complete the "container" initialization */
3191 wridlist->wl_size = qsize;
3192 wridlist->wl_full = 0;
3193 wridlist->wl_head = 0;
3194 wridlist->wl_tail = 0;
3195 wridlist->wl_wre = (tavor_wrid_entry_t *)kmem_zalloc(qsize *
3196 sizeof (tavor_wrid_entry_t), KM_NOSLEEP);
3197 if (wridlist->wl_wre == NULL) {
3198 kmem_free(wridlist, size);
3199 return (NULL);
3200 }
3201 wridlist->wl_wre_old_tail = NULL;
3202 wridlist->wl_reap_next = NULL;
3203 wridlist->wl_next = NULL;
3204 wridlist->wl_prev = NULL;
3205 wridlist->wl_srq_en = 0;
3206
3207 return (wridlist);
3208 }
3310 * Context: Can be called from interrupt or base context.
3311 */
3312 static tavor_workq_hdr_t *
3313 tavor_wrid_wqhdr_find(tavor_cqhdl_t cq, uint_t qpn, uint_t wq_type)
3314 {
3315 tavor_workq_hdr_t *curr;
3316 tavor_workq_compare_t cmp;
3317
3318 TAVOR_TNF_ENTER(tavor_wrid_wqhdr_find);
3319
3320 ASSERT(MUTEX_HELD(&cq->cq_wrid_wqhdr_lock));
3321
3322 /*
3323 * Walk the CQ's work queue list, trying to find a send or recv queue
3324 * with the same QP number. We do this even if we are going to later
3325 * create a new entry because it helps us easily find the end of the
3326 * list.
3327 */
3328 cmp.cmp_qpn = qpn;
3329 cmp.cmp_type = wq_type;
3330 #ifdef __lock_lint
3331 tavor_wrid_wqhdr_compare(NULL, NULL);
3332 #endif
3333 curr = avl_find(&cq->cq_wrid_wqhdr_avl_tree, &cmp, NULL);
3334
3335 TAVOR_TNF_EXIT(tavor_wrid_wqhdr_find);
3336 return (curr);
3337 }
3338
3339
3340 /*
3341 * tavor_wrid_wqhdr_create()
3342 * Context: Can be called from interrupt or base context.
3343 */
3344 static tavor_workq_hdr_t *
3345 tavor_wrid_wqhdr_create(tavor_state_t *state, tavor_cqhdl_t cq, uint_t qpn,
3346 uint_t wq_type, uint_t create_wql)
3347 {
3348 tavor_workq_hdr_t *wqhdr_tmp;
3349
3350 TAVOR_TNF_ENTER(tavor_wrid_wqhdr_create);
3351
3352 ASSERT(MUTEX_HELD(&cq->cq_wrid_wqhdr_lock));
3353
3354 /*
3355 * Allocate space a work queue header structure and initialize it.
3356 * Each work queue header structure includes a "wq_wrid_wql"
3357 * which needs to be initialized. Note that this allocation has to be
3358 * a NOSLEEP operation because we are holding the "cq_wrid_wqhdr_lock"
3359 * and, therefore, could get raised to the interrupt level.
3360 */
3361 wqhdr_tmp = (tavor_workq_hdr_t *)kmem_zalloc(
3362 sizeof (tavor_workq_hdr_t), KM_NOSLEEP);
3363 if (wqhdr_tmp == NULL) {
3364 TAVOR_TNF_EXIT(tavor_wrid_wqhdr_create);
3365 return (NULL);
3366 }
3367 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*wqhdr_tmp))
3368 wqhdr_tmp->wq_qpn = qpn;
3369 wqhdr_tmp->wq_type = wq_type;
3370
3371 if (create_wql) {
3372 wqhdr_tmp->wq_wrid_wql = tavor_wrid_wql_create(state);
3373 if (wqhdr_tmp->wq_wrid_wql == NULL) {
3374 kmem_free(wqhdr_tmp, sizeof (tavor_workq_hdr_t));
3375 TAVOR_TNF_EXIT(tavor_wrid_wqhdr_create);
3376 return (NULL);
3377 }
3378 }
3379
3380 wqhdr_tmp->wq_wrid_poll = NULL;
3381 wqhdr_tmp->wq_wrid_post = NULL;
3382
3383 /* Chain the newly allocated work queue header to the CQ's list */
3384 tavor_cq_wqhdr_add(cq, wqhdr_tmp);
3385
3386 TAVOR_TNF_EXIT(tavor_wrid_wqhdr_create);
3387 return (wqhdr_tmp);
3570
3571 mutex_exit(&wqhdr->wq_wrid_wql->wql_lock);
3572
3573 TAVOR_TNF_EXIT(tavor_wrid_list_reap);
3574 return (consume_wqhdr);
3575 }
3576
3577
3578 /*
3579 * tavor_wrid_wqhdr_lock_both()
3580 * Context: Can be called from interrupt or base context.
3581 */
3582 static void
3583 tavor_wrid_wqhdr_lock_both(tavor_qphdl_t qp)
3584 {
3585 tavor_cqhdl_t sq_cq, rq_cq;
3586
3587 sq_cq = qp->qp_sq_cqhdl;
3588 rq_cq = qp->qp_rq_cqhdl;
3589
3590 _NOTE(MUTEX_ACQUIRED_AS_SIDE_EFFECT(&sq_cq->cq_wrid_wqhdr_lock))
3591 _NOTE(MUTEX_ACQUIRED_AS_SIDE_EFFECT(&rq_cq->cq_wrid_wqhdr_lock))
3592
3593 /*
3594 * If both work queues (send and recv) share a completion queue, then
3595 * grab the common lock. If they use different CQs (hence different
3596 * "cq_wrid_wqhdr_list" locks), then grab the send one first, then the
3597 * receive. We do this consistently and correctly in
3598 * tavor_wrid_wqhdr_unlock_both() below to avoid introducing any kind
3599 * of dead lock condition. Note: We add the "__lock_lint" code here
3600 * to fake out warlock into thinking we've grabbed both locks (when,
3601 * in fact, we only needed the one).
3602 */
3603 if (sq_cq == rq_cq) {
3604 mutex_enter(&sq_cq->cq_wrid_wqhdr_lock);
3605 #ifdef __lock_lint
3606 mutex_enter(&rq_cq->cq_wrid_wqhdr_lock);
3607 #endif
3608 } else {
3609 mutex_enter(&sq_cq->cq_wrid_wqhdr_lock);
3610 mutex_enter(&rq_cq->cq_wrid_wqhdr_lock);
3611 }
3612 }
3613
3614 /*
3615 * tavor_wrid_wqhdr_unlock_both()
3616 * Context: Can be called from interrupt or base context.
3617 */
3618 static void
3619 tavor_wrid_wqhdr_unlock_both(tavor_qphdl_t qp)
3620 {
3621 tavor_cqhdl_t sq_cq, rq_cq;
3622
3623 sq_cq = qp->qp_sq_cqhdl;
3624 rq_cq = qp->qp_rq_cqhdl;
3625
3626 _NOTE(LOCK_RELEASED_AS_SIDE_EFFECT(&rq_cq->cq_wrid_wqhdr_lock))
3627 _NOTE(LOCK_RELEASED_AS_SIDE_EFFECT(&sq_cq->cq_wrid_wqhdr_lock))
3628
3629 /*
3630 * See tavor_wrid_wqhdr_lock_both() above for more detail
3631 */
3632 if (sq_cq == rq_cq) {
3633 #ifdef __lock_lint
3634 mutex_exit(&rq_cq->cq_wrid_wqhdr_lock);
3635 #endif
3636 mutex_exit(&sq_cq->cq_wrid_wqhdr_lock);
3637 } else {
3638 mutex_exit(&rq_cq->cq_wrid_wqhdr_lock);
3639 mutex_exit(&sq_cq->cq_wrid_wqhdr_lock);
3640 }
3641 }
3642
3643
3644 /*
3645 * tavor_cq_wqhdr_add()
3646 * Context: Can be called from interrupt or base context.
3647 */
3648 static void
3649 tavor_cq_wqhdr_add(tavor_cqhdl_t cq, tavor_workq_hdr_t *wqhdr)
3650 {
3651 tavor_workq_compare_t cmp;
3652 avl_index_t where;
3653
3654 ASSERT(MUTEX_HELD(&cq->cq_wrid_wqhdr_lock));
3655
3656 cmp.cmp_qpn = wqhdr->wq_qpn;
3657 cmp.cmp_type = wqhdr->wq_type;
3658 #ifdef __lock_lint
3659 tavor_wrid_wqhdr_compare(NULL, NULL);
3660 #endif
3661 (void) avl_find(&cq->cq_wrid_wqhdr_avl_tree, &cmp, &where);
3662 /*
3663 * If the CQ's work queue list is empty, then just add it.
3664 * Otherwise, chain it to the beginning of the list.
3665 */
3666 avl_insert(&cq->cq_wrid_wqhdr_avl_tree, wqhdr, where);
3667 }
3668
3669
3670 /*
3671 * tavor_cq_wqhdr_remove()
3672 * Context: Can be called from interrupt or base context.
3673 */
3674 static void
3675 tavor_cq_wqhdr_remove(tavor_cqhdl_t cq, tavor_workq_hdr_t *wqhdr)
3676 {
3677 ASSERT(MUTEX_HELD(&cq->cq_wrid_wqhdr_lock));
3678
3679 #ifdef __lock_lint
3680 tavor_wrid_wqhdr_compare(NULL, NULL);
3681 #endif
3682 /* Remove "wqhdr" from the work queue header list on "cq" */
3683 avl_remove(&cq->cq_wrid_wqhdr_avl_tree, wqhdr);
3684
3685 /*
3686 * Release reference to WQL; If this is the last reference, this call
3687 * also has the side effect of freeing up the 'wq_wrid_wql' memory.
3688 */
3689 tavor_wql_refcnt_dec(wqhdr->wq_wrid_wql);
3690
3691 /* Free the memory associated with "wqhdr" */
3692 kmem_free(wqhdr, sizeof (tavor_workq_hdr_t));
3693 }
3694
3695
3696 /*
3697 * tavor_wql_refcnt_inc()
3698 * Context: Can be called from interrupt or base context
3699 */
3700 void
3701 tavor_wql_refcnt_inc(tavor_wq_lock_t *wql)
|
2344 tavor_wrid_wqhdr_lock_both(qp);
2345 swq = tavor_wrid_wqhdr_find(qp->qp_sq_cqhdl, qp->qp_qpnum,
2346 TAVOR_WR_SEND);
2347 if (swq == NULL) {
2348 /* Couldn't find matching work queue header, create it */
2349 create_new_swq = create_wql = 1;
2350 swq = tavor_wrid_wqhdr_create(state, qp->qp_sq_cqhdl,
2351 qp->qp_qpnum, TAVOR_WR_SEND, create_wql);
2352 if (swq == NULL) {
2353 /*
2354 * If we couldn't find/allocate space for the workq
2355 * header, then drop the lock(s) and return failure.
2356 */
2357 tavor_wrid_wqhdr_unlock_both(qp);
2358 TNF_PROBE_0(tavor_wrid_from_reset_handling_wqhdr_fail,
2359 TAVOR_TNF_ERROR, "");
2360 TAVOR_TNF_EXIT(tavor_wrid_from_reset_handling);
2361 return (ibc_get_ci_failure(0));
2362 }
2363 }
2364 qp->qp_sq_wqhdr = swq;
2365 swq->wq_size = qp->qp_sq_bufsz;
2366 swq->wq_head = 0;
2367 swq->wq_tail = 0;
2368 swq->wq_full = 0;
2369
2370 /*
2371 * Allocate space for the tavor_wrid_entry_t container
2372 */
2373 s_wridlist = tavor_wrid_get_list(swq->wq_size);
2374 if (s_wridlist == NULL) {
2375 /*
2376 * If we couldn't allocate space for tracking the WRID
2377 * entries, then cleanup the workq header from above (if
2378 * necessary, i.e. if we created the workq header). Then
2379 * drop the lock(s) and return failure.
2380 */
2381 if (create_new_swq) {
2382 tavor_cq_wqhdr_remove(qp->qp_sq_cqhdl, swq);
2383 }
2384
2385 tavor_wrid_wqhdr_unlock_both(qp);
2386 TNF_PROBE_0(tavor_wrid_from_reset_handling_wridlist_fail,
2387 TAVOR_TNF_ERROR, "");
2388 TAVOR_TNF_EXIT(tavor_wrid_from_reset_handling);
2389 return (ibc_get_ci_failure(0));
2390 }
2391 s_wridlist->wl_wqhdr = swq;
2392
2393 /* Chain the new WRID list container to the workq hdr list */
2394 mutex_enter(&swq->wq_wrid_wql->wql_lock);
2395 tavor_wrid_wqhdr_add(swq, s_wridlist);
2396 mutex_exit(&swq->wq_wrid_wql->wql_lock);
2397
2398 qp_srq_en = qp->qp_srq_en;
2399
2400 if (qp_srq_en == TAVOR_QP_SRQ_ENABLED) {
2401 mutex_enter(&qp->qp_srqhdl->srq_lock);
2402 }
2403
2404 /*
2405 * Now we repeat all the above operations for the receive work queue,
2406 * or shared receive work queue.
2407 *
2408 * Note: We still use the 'qp_rq_cqhdl' even in the SRQ case.
2409 */
2410 rwq = tavor_wrid_wqhdr_find(qp->qp_rq_cqhdl, qp->qp_qpnum,
2411 TAVOR_WR_RECV);
2412 if (rwq == NULL) {
2413 create_new_rwq = create_wql = 1;
2414
2415 /*
2416 * If this QP is associated with an SRQ, and this isn't the
2417 * first QP on the SRQ, then the 'srq_wrid_wql' will already be
2418 * created. Since the WQL is created at 'wqhdr_create' time we
2419 * pass in the flag 'create_wql' here to be 0 if we have
2420 * already created it. And later on below we then next setup
2421 * the WQL and rwq information based off the existing SRQ info.
2422 */
2423 if (qp_srq_en == TAVOR_QP_SRQ_ENABLED &&
2425 create_wql = 0;
2426 }
2427
2428 rwq = tavor_wrid_wqhdr_create(state, qp->qp_rq_cqhdl,
2429 qp->qp_qpnum, TAVOR_WR_RECV, create_wql);
2430 if (rwq == NULL) {
2431 /*
2432 * If we couldn't find/allocate space for the workq
2433 * header, then free all the send queue resources we
2434 * just allocated and setup (above), drop the lock(s)
2435 * and return failure.
2436 */
2437 mutex_enter(&swq->wq_wrid_wql->wql_lock);
2438 tavor_wrid_wqhdr_remove(swq, s_wridlist);
2439 mutex_exit(&swq->wq_wrid_wql->wql_lock);
2440 if (create_new_swq) {
2441 tavor_cq_wqhdr_remove(qp->qp_sq_cqhdl,
2442 swq);
2443 }
2444
2445 if (qp_srq_en == TAVOR_QP_SRQ_ENABLED) {
2446 mutex_exit(&qp->qp_srqhdl->srq_lock);
2447 }
2448
2449 tavor_wrid_wqhdr_unlock_both(qp);
2450 TNF_PROBE_0(tavor_wrid_from_reset_handling_wqhdr_fail,
2451 TAVOR_TNF_ERROR, "");
2452 TAVOR_TNF_EXIT(tavor_wrid_from_reset_handling);
2453 return (ibc_get_ci_failure(0));
2454 }
2455 }
2456
2457 /*
2458 * Setup receive workq hdr
2459 *
2460 * If the QP is on an SRQ, we setup the SRQ specific fields, setting
2461 * keeping a copy of the rwq pointer, setting the rwq bufsize
2462 * appropriately, and initializing our part of the WQLock.
2463 *
2464 * In the normal QP case, the QP recv queue bufsize is used.
2465 */
2466 if (qp_srq_en == TAVOR_QP_SRQ_ENABLED) {
2467 rwq->wq_size = qp->qp_srqhdl->srq_wq_bufsz;
2468 if (qp->qp_srqhdl->srq_wrid_wql == NULL) {
2469 qp->qp_srqhdl->srq_wrid_wql = rwq->wq_wrid_wql;
2470 } else {
2471 rwq->wq_wrid_wql = qp->qp_srqhdl->srq_wrid_wql;
2472 }
2473 tavor_wql_refcnt_inc(qp->qp_srqhdl->srq_wrid_wql);
2474
2475 } else {
2504 * If the memory allocation failed for r_wridlist (or the SRQ pointer
2505 * is mistakenly NULL), we cleanup our previous swq allocation from
2506 * above
2507 */
2508 if (r_wridlist == NULL) {
2509 /*
2510 * If we couldn't allocate space for tracking the WRID
2511 * entries, then cleanup all the stuff from above. Then
2512 * drop the lock(s) and return failure.
2513 */
2514 mutex_enter(&swq->wq_wrid_wql->wql_lock);
2515 tavor_wrid_wqhdr_remove(swq, s_wridlist);
2516 mutex_exit(&swq->wq_wrid_wql->wql_lock);
2517 if (create_new_swq) {
2518 tavor_cq_wqhdr_remove(qp->qp_sq_cqhdl, swq);
2519 }
2520 if (create_new_rwq) {
2521 tavor_cq_wqhdr_remove(qp->qp_rq_cqhdl, rwq);
2522 }
2523
2524 if (qp_srq_en == TAVOR_QP_SRQ_ENABLED) {
2525 mutex_exit(&qp->qp_srqhdl->srq_lock);
2526 }
2527
2528 tavor_wrid_wqhdr_unlock_both(qp);
2529 TNF_PROBE_0(tavor_wrid_from_reset_handling_wridlist_fail,
2530 TAVOR_TNF_ERROR, "");
2531 TAVOR_TNF_EXIT(tavor_wrid_from_reset_handling);
2532 return (ibc_get_ci_failure(0));
2533 }
2534
2535 /*
2536 * Initialize the wridlist
2537 *
2538 * In the normal QP case, there is no special initialization needed.
2539 * We simply setup the wridlist backpointer to be the receive wqhdr
2540 * (rwq).
2541 *
2542 * But in the SRQ case, there is no backpointer to the wqhdr possible.
2543 * Instead we set 'wl_srq_en', specifying this wridlist is on an SRQ
2544 * and thus potentially shared across multiple QPs with the SRQ. We
2545 * also setup the srq_wridlist pointer to be the r_wridlist, and
2546 * intialize the freelist to an invalid index. This srq_wridlist
2547 * pointer is used above on future moves from_reset to let us know that
2548 * the srq_wridlist has been initialized already.
2549 *
2550 * And finally, if we are in a non-UMAP case, we setup the srq wrid
2551 * free list.
2552 */
2553 if (qp_srq_en == TAVOR_QP_SRQ_ENABLED &&
2554 qp->qp_srqhdl->srq_wridlist == NULL) {
2555 r_wridlist->wl_srq_en = 1;
2556 r_wridlist->wl_free_list_indx = -1;
2557 qp->qp_srqhdl->srq_wridlist = r_wridlist;
2558
2559 /* Initialize srq wrid free list */
2560 if (qp->qp_srqhdl->srq_is_umap == 0) {
2561 mutex_enter(&rwq->wq_wrid_wql->wql_lock);
2562 tavor_wrid_list_srq_init(r_wridlist, qp->qp_srqhdl, 0);
2563 mutex_exit(&rwq->wq_wrid_wql->wql_lock);
2564 }
2565 } else {
2566 r_wridlist->wl_wqhdr = rwq;
2567 }
2568
2569 /* Chain the WRID list "container" to the workq hdr list */
2570 mutex_enter(&rwq->wq_wrid_wql->wql_lock);
2571 tavor_wrid_wqhdr_add(rwq, r_wridlist);
2572 mutex_exit(&rwq->wq_wrid_wql->wql_lock);
2573
2574 if (qp_srq_en == TAVOR_QP_SRQ_ENABLED) {
2575 mutex_exit(&qp->qp_srqhdl->srq_lock);
2576 }
2577
2578 tavor_wrid_wqhdr_unlock_both(qp);
2579 TAVOR_TNF_EXIT(tavor_wrid_from_reset_handling);
2580 return (DDI_SUCCESS);
2581 }
2582
2583
2584 /*
2585 * tavor_wrid_to_reset_handling()
2586 * Context: Can be called from interrupt or base context.
2587 */
2588 void
2589 tavor_wrid_to_reset_handling(tavor_state_t *state, tavor_qphdl_t qp)
2590 {
2591 uint_t free_wqhdr = 0;
2592
2593 TAVOR_TNF_ENTER(tavor_wrid_to_reset_handling);
2594
2595 /*
2596 * For each of this QP's Work Queues, move the WRID "container" to
2597 * the "reapable" list. Although there may still be unpolled
3082 /*
3083 * The first step is to walk the "reapable" list and free up those
3084 * containers. This is necessary because the containers on the
3085 * reapable list are not otherwise connected to the work queue headers
3086 * anymore.
3087 */
3088 tavor_wrid_cq_reap(cq);
3089
3090 /* Now lock the list of work queues associated with this CQ */
3091 mutex_enter(&cq->cq_wrid_wqhdr_lock);
3092
3093 /*
3094 * Walk the list of work queue headers and free up all the WRID list
3095 * containers chained to it. Note: We don't need to grab the locks
3096 * for each of the individual WRID lists here because the only way
3097 * things can be added or removed from the list at this point would be
3098 * through post a work request to a QP. But if we've come this far,
3099 * then we can be assured that there are no longer any QP associated
3100 * with the CQ that we are trying to free.
3101 */
3102 treep = &cq->cq_wrid_wqhdr_avl_tree;
3103 while ((curr = avl_destroy_nodes(treep, &cookie)) != NULL) {
3104 container = curr->wq_wrid_poll;
3105 while (container != NULL) {
3106 to_free = container;
3107 container = container->wl_next;
3108 /*
3109 * If reaping the WRID list containers pulls the last
3110 * container from the given work queue header, then
3111 * we free the work queue header as well. Note: we
3112 * ignore the return value because we know that the
3113 * work queue header should always be freed once the
3114 * list of containers has come to an end.
3115 */
3116 (void) tavor_wrid_list_reap(to_free);
3117 if (container == NULL) {
3118 tavor_cq_wqhdr_remove(cq, curr);
3119 }
3120 }
3121 }
3122 avl_destroy(treep);
3123
3140 * The WRID list "container" consists of the tavor_wrid_list_hdr_t,
3141 * which holds the pointers necessary for maintaining the "reapable"
3142 * list, chaining together multiple "containers" old and new, and
3143 * tracking the head, tail, size, etc. for each container.
3144 *
3145 * The "container" also holds all the tavor_wrid_entry_t's, which is
3146 * allocated separately, one for each entry on the corresponding work
3147 * queue.
3148 */
3149 size = sizeof (tavor_wrid_list_hdr_t);
3150
3151 /*
3152 * Note that this allocation has to be a NOSLEEP operation here
3153 * because we are holding the "wqhdr_list_lock" and, therefore,
3154 * could get raised to the interrupt level.
3155 */
3156 wridlist = (tavor_wrid_list_hdr_t *)kmem_zalloc(size, KM_NOSLEEP);
3157 if (wridlist == NULL) {
3158 return (NULL);
3159 }
3160
3161 /* Complete the "container" initialization */
3162 wridlist->wl_size = qsize;
3163 wridlist->wl_full = 0;
3164 wridlist->wl_head = 0;
3165 wridlist->wl_tail = 0;
3166 wridlist->wl_wre = (tavor_wrid_entry_t *)kmem_zalloc(qsize *
3167 sizeof (tavor_wrid_entry_t), KM_NOSLEEP);
3168 if (wridlist->wl_wre == NULL) {
3169 kmem_free(wridlist, size);
3170 return (NULL);
3171 }
3172 wridlist->wl_wre_old_tail = NULL;
3173 wridlist->wl_reap_next = NULL;
3174 wridlist->wl_next = NULL;
3175 wridlist->wl_prev = NULL;
3176 wridlist->wl_srq_en = 0;
3177
3178 return (wridlist);
3179 }
3281 * Context: Can be called from interrupt or base context.
3282 */
3283 static tavor_workq_hdr_t *
3284 tavor_wrid_wqhdr_find(tavor_cqhdl_t cq, uint_t qpn, uint_t wq_type)
3285 {
3286 tavor_workq_hdr_t *curr;
3287 tavor_workq_compare_t cmp;
3288
3289 TAVOR_TNF_ENTER(tavor_wrid_wqhdr_find);
3290
3291 ASSERT(MUTEX_HELD(&cq->cq_wrid_wqhdr_lock));
3292
3293 /*
3294 * Walk the CQ's work queue list, trying to find a send or recv queue
3295 * with the same QP number. We do this even if we are going to later
3296 * create a new entry because it helps us easily find the end of the
3297 * list.
3298 */
3299 cmp.cmp_qpn = qpn;
3300 cmp.cmp_type = wq_type;
3301 curr = avl_find(&cq->cq_wrid_wqhdr_avl_tree, &cmp, NULL);
3302
3303 TAVOR_TNF_EXIT(tavor_wrid_wqhdr_find);
3304 return (curr);
3305 }
3306
3307
3308 /*
3309 * tavor_wrid_wqhdr_create()
3310 * Context: Can be called from interrupt or base context.
3311 */
3312 static tavor_workq_hdr_t *
3313 tavor_wrid_wqhdr_create(tavor_state_t *state, tavor_cqhdl_t cq, uint_t qpn,
3314 uint_t wq_type, uint_t create_wql)
3315 {
3316 tavor_workq_hdr_t *wqhdr_tmp;
3317
3318 TAVOR_TNF_ENTER(tavor_wrid_wqhdr_create);
3319
3320 ASSERT(MUTEX_HELD(&cq->cq_wrid_wqhdr_lock));
3321
3322 /*
3323 * Allocate space a work queue header structure and initialize it.
3324 * Each work queue header structure includes a "wq_wrid_wql"
3325 * which needs to be initialized. Note that this allocation has to be
3326 * a NOSLEEP operation because we are holding the "cq_wrid_wqhdr_lock"
3327 * and, therefore, could get raised to the interrupt level.
3328 */
3329 wqhdr_tmp = (tavor_workq_hdr_t *)kmem_zalloc(
3330 sizeof (tavor_workq_hdr_t), KM_NOSLEEP);
3331 if (wqhdr_tmp == NULL) {
3332 TAVOR_TNF_EXIT(tavor_wrid_wqhdr_create);
3333 return (NULL);
3334 }
3335 wqhdr_tmp->wq_qpn = qpn;
3336 wqhdr_tmp->wq_type = wq_type;
3337
3338 if (create_wql) {
3339 wqhdr_tmp->wq_wrid_wql = tavor_wrid_wql_create(state);
3340 if (wqhdr_tmp->wq_wrid_wql == NULL) {
3341 kmem_free(wqhdr_tmp, sizeof (tavor_workq_hdr_t));
3342 TAVOR_TNF_EXIT(tavor_wrid_wqhdr_create);
3343 return (NULL);
3344 }
3345 }
3346
3347 wqhdr_tmp->wq_wrid_poll = NULL;
3348 wqhdr_tmp->wq_wrid_post = NULL;
3349
3350 /* Chain the newly allocated work queue header to the CQ's list */
3351 tavor_cq_wqhdr_add(cq, wqhdr_tmp);
3352
3353 TAVOR_TNF_EXIT(tavor_wrid_wqhdr_create);
3354 return (wqhdr_tmp);
3537
3538 mutex_exit(&wqhdr->wq_wrid_wql->wql_lock);
3539
3540 TAVOR_TNF_EXIT(tavor_wrid_list_reap);
3541 return (consume_wqhdr);
3542 }
3543
3544
3545 /*
3546 * tavor_wrid_wqhdr_lock_both()
3547 * Context: Can be called from interrupt or base context.
3548 */
3549 static void
3550 tavor_wrid_wqhdr_lock_both(tavor_qphdl_t qp)
3551 {
3552 tavor_cqhdl_t sq_cq, rq_cq;
3553
3554 sq_cq = qp->qp_sq_cqhdl;
3555 rq_cq = qp->qp_rq_cqhdl;
3556
3557 /*
3558 * If both work queues (send and recv) share a completion queue, then
3559 * grab the common lock. If they use different CQs (hence different
3560 * "cq_wrid_wqhdr_list" locks), then grab the send one first, then the
3561 * receive. We do this consistently and correctly in
3562 * tavor_wrid_wqhdr_unlock_both() below to avoid introducing any kind
3563 * of dead lock condition.
3564 */
3565 if (sq_cq == rq_cq) {
3566 mutex_enter(&sq_cq->cq_wrid_wqhdr_lock);
3567 } else {
3568 mutex_enter(&sq_cq->cq_wrid_wqhdr_lock);
3569 mutex_enter(&rq_cq->cq_wrid_wqhdr_lock);
3570 }
3571 }
3572
3573 /*
3574 * tavor_wrid_wqhdr_unlock_both()
3575 * Context: Can be called from interrupt or base context.
3576 */
3577 static void
3578 tavor_wrid_wqhdr_unlock_both(tavor_qphdl_t qp)
3579 {
3580 tavor_cqhdl_t sq_cq, rq_cq;
3581
3582 sq_cq = qp->qp_sq_cqhdl;
3583 rq_cq = qp->qp_rq_cqhdl;
3584
3585 /*
3586 * See tavor_wrid_wqhdr_lock_both() above for more detail
3587 */
3588 if (sq_cq == rq_cq) {
3589 mutex_exit(&sq_cq->cq_wrid_wqhdr_lock);
3590 } else {
3591 mutex_exit(&rq_cq->cq_wrid_wqhdr_lock);
3592 mutex_exit(&sq_cq->cq_wrid_wqhdr_lock);
3593 }
3594 }
3595
3596
3597 /*
3598 * tavor_cq_wqhdr_add()
3599 * Context: Can be called from interrupt or base context.
3600 */
3601 static void
3602 tavor_cq_wqhdr_add(tavor_cqhdl_t cq, tavor_workq_hdr_t *wqhdr)
3603 {
3604 tavor_workq_compare_t cmp;
3605 avl_index_t where;
3606
3607 ASSERT(MUTEX_HELD(&cq->cq_wrid_wqhdr_lock));
3608
3609 cmp.cmp_qpn = wqhdr->wq_qpn;
3610 cmp.cmp_type = wqhdr->wq_type;
3611 (void) avl_find(&cq->cq_wrid_wqhdr_avl_tree, &cmp, &where);
3612 /*
3613 * If the CQ's work queue list is empty, then just add it.
3614 * Otherwise, chain it to the beginning of the list.
3615 */
3616 avl_insert(&cq->cq_wrid_wqhdr_avl_tree, wqhdr, where);
3617 }
3618
3619
3620 /*
3621 * tavor_cq_wqhdr_remove()
3622 * Context: Can be called from interrupt or base context.
3623 */
3624 static void
3625 tavor_cq_wqhdr_remove(tavor_cqhdl_t cq, tavor_workq_hdr_t *wqhdr)
3626 {
3627 ASSERT(MUTEX_HELD(&cq->cq_wrid_wqhdr_lock));
3628
3629 /* Remove "wqhdr" from the work queue header list on "cq" */
3630 avl_remove(&cq->cq_wrid_wqhdr_avl_tree, wqhdr);
3631
3632 /*
3633 * Release reference to WQL; If this is the last reference, this call
3634 * also has the side effect of freeing up the 'wq_wrid_wql' memory.
3635 */
3636 tavor_wql_refcnt_dec(wqhdr->wq_wrid_wql);
3637
3638 /* Free the memory associated with "wqhdr" */
3639 kmem_free(wqhdr, sizeof (tavor_workq_hdr_t));
3640 }
3641
3642
3643 /*
3644 * tavor_wql_refcnt_inc()
3645 * Context: Can be called from interrupt or base context
3646 */
3647 void
3648 tavor_wql_refcnt_inc(tavor_wq_lock_t *wql)
|