1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright(c) 2007-2010 Intel Corporation. All rights reserved. 24 */ 25 26 /* 27 * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. 28 * Copyright 2012 Nexenta Systems, Inc. All rights reserved. 29 * Copyright 2016 OmniTI Computer Consulting, Inc. All rights reserved. 30 */ 31 32 #include "ixgbe_sw.h" 33 34 static int ixgbe_tx_copy(ixgbe_tx_ring_t *, tx_control_block_t *, mblk_t *, 35 uint32_t, boolean_t); 36 static int ixgbe_tx_bind(ixgbe_tx_ring_t *, tx_control_block_t *, mblk_t *, 37 uint32_t); 38 static int ixgbe_tx_fill_ring(ixgbe_tx_ring_t *, link_list_t *, 39 ixgbe_tx_context_t *, size_t); 40 static void ixgbe_save_desc(tx_control_block_t *, uint64_t, size_t); 41 static tx_control_block_t *ixgbe_get_free_list(ixgbe_tx_ring_t *); 42 43 static int ixgbe_get_context(mblk_t *, ixgbe_tx_context_t *); 44 static boolean_t ixgbe_check_context(ixgbe_tx_ring_t *, 45 ixgbe_tx_context_t *); 46 static void ixgbe_fill_context(struct ixgbe_adv_tx_context_desc *, 47 ixgbe_tx_context_t *); 48 49 #ifndef IXGBE_DEBUG 50 #pragma inline(ixgbe_save_desc) 51 #pragma inline(ixgbe_get_context) 52 #pragma inline(ixgbe_check_context) 53 #pragma inline(ixgbe_fill_context) 54 #endif 55 56 /* 57 * ixgbe_ring_tx 58 * 59 * To transmit one mblk through one specified ring. 60 * 61 * One mblk can consist of several fragments, each fragment 62 * will be processed with different methods based on the size. 63 * For the fragments with size less than the bcopy threshold, 64 * they will be processed by using bcopy; otherwise, they will 65 * be processed by using DMA binding. 66 * 67 * To process the mblk, a tx control block is got from the 68 * free list. One tx control block contains one tx buffer, which 69 * is used to copy mblk fragments' data; and one tx DMA handle, 70 * which is used to bind a mblk fragment with DMA resource. 71 * 72 * Several small mblk fragments can be copied into one tx control 73 * block's buffer, and then the buffer will be transmitted with 74 * one tx descriptor. 75 * 76 * A large fragment only binds with one tx control block's DMA 77 * handle, and it can span several tx descriptors for transmitting. 78 * 79 * So to transmit a packet (mblk), several tx control blocks can 80 * be used. After the processing, those tx control blocks will 81 * be put to the work list. 82 */ 83 mblk_t * 84 ixgbe_ring_tx(void *arg, mblk_t *mp) 85 { 86 ixgbe_tx_ring_t *tx_ring = (ixgbe_tx_ring_t *)arg; 87 ixgbe_t *ixgbe = tx_ring->ixgbe; 88 tx_type_t current_flag, next_flag; 89 uint32_t current_len, next_len; 90 uint32_t desc_total; 91 size_t mbsize; 92 int desc_num; 93 boolean_t copy_done, eop; 94 mblk_t *current_mp, *next_mp, *nmp, *pull_mp = NULL; 95 tx_control_block_t *tcb; 96 ixgbe_tx_context_t tx_context, *ctx; 97 link_list_t pending_list; 98 uint32_t len, hdr_frag_len, hdr_len; 99 uint32_t copy_thresh; 100 mblk_t *hdr_new_mp = NULL; 101 mblk_t *hdr_pre_mp = NULL; 102 mblk_t *hdr_nmp = NULL; 103 104 ASSERT(mp->b_next == NULL); 105 106 if ((ixgbe->ixgbe_state & IXGBE_SUSPENDED) || 107 (ixgbe->ixgbe_state & IXGBE_ERROR) || 108 (ixgbe->ixgbe_state & IXGBE_OVERTEMP) || 109 !(ixgbe->ixgbe_state & IXGBE_STARTED) || 110 ixgbe->link_state != LINK_STATE_UP) { 111 freemsg(mp); 112 return (NULL); 113 } 114 115 copy_thresh = ixgbe->tx_copy_thresh; 116 117 /* Get the mblk size */ 118 mbsize = 0; 119 for (nmp = mp; nmp != NULL; nmp = nmp->b_cont) { 120 mbsize += MBLKL(nmp); 121 } 122 123 if (ixgbe->tx_hcksum_enable) { 124 /* 125 * Retrieve checksum context information from the mblk 126 * that will be used to decide whether/how to fill the 127 * context descriptor. 128 */ 129 ctx = &tx_context; 130 if (ixgbe_get_context(mp, ctx) < 0) { 131 freemsg(mp); 132 return (NULL); 133 } 134 135 /* 136 * If the mblk size exceeds the max size ixgbe could 137 * process, then discard this mblk, and return NULL. 138 */ 139 if ((ctx->lso_flag && 140 ((mbsize - ctx->mac_hdr_len) > IXGBE_LSO_MAXLEN)) || 141 (!ctx->lso_flag && 142 (mbsize > (ixgbe->max_frame_size - ETHERFCSL)))) { 143 freemsg(mp); 144 IXGBE_DEBUGLOG_0(ixgbe, "ixgbe_tx: packet oversize"); 145 return (NULL); 146 } 147 } else { 148 ctx = NULL; 149 } 150 151 /* 152 * Check and recycle tx descriptors. 153 * The recycle threshold here should be selected carefully 154 */ 155 if (tx_ring->tbd_free < ixgbe->tx_recycle_thresh) { 156 tx_ring->tx_recycle(tx_ring); 157 } 158 159 /* 160 * After the recycling, if the tbd_free is less than the 161 * overload_threshold, assert overload, return mp; 162 * and we need to re-schedule the tx again. 163 */ 164 if (tx_ring->tbd_free < ixgbe->tx_overload_thresh) { 165 tx_ring->reschedule = B_TRUE; 166 IXGBE_DEBUG_STAT(tx_ring->stat_overload); 167 return (mp); 168 } 169 170 /* 171 * The pending_list is a linked list that is used to save 172 * the tx control blocks that have packet data processed 173 * but have not put the data to the tx descriptor ring. 174 * It is used to reduce the lock contention of the tx_lock. 175 */ 176 LINK_LIST_INIT(&pending_list); 177 desc_num = 0; 178 desc_total = 0; 179 180 /* 181 * The software should guarantee LSO packet header(MAC+IP+TCP) 182 * to be within one descriptor. Here we reallocate and refill the 183 * the header if it's physical memory non-contiguous. 184 */ 185 if ((ctx != NULL) && ctx->lso_flag) { 186 /* find the last fragment of the header */ 187 len = MBLKL(mp); 188 ASSERT(len > 0); 189 hdr_nmp = mp; 190 hdr_len = ctx->ip_hdr_len + ctx->mac_hdr_len + ctx->l4_hdr_len; 191 while (len < hdr_len) { 192 hdr_pre_mp = hdr_nmp; 193 hdr_nmp = hdr_nmp->b_cont; 194 len += MBLKL(hdr_nmp); 195 } 196 /* 197 * If the header and the payload are in different mblks, 198 * we simply force the header to be copied into pre-allocated 199 * page-aligned buffer. 200 */ 201 if (len == hdr_len) 202 goto adjust_threshold; 203 204 hdr_frag_len = hdr_len - (len - MBLKL(hdr_nmp)); 205 /* 206 * There are two cases we need to reallocate a mblk for the 207 * last header fragment: 208 * 1. the header is in multiple mblks and the last fragment 209 * share the same mblk with the payload 210 * 2. the header is in a single mblk shared with the payload 211 * and the header is physical memory non-contiguous 212 */ 213 if ((hdr_nmp != mp) || 214 (P2NPHASE((uintptr_t)hdr_nmp->b_rptr, ixgbe->sys_page_size) 215 < hdr_len)) { 216 IXGBE_DEBUG_STAT(tx_ring->stat_lso_header_fail); 217 /* 218 * reallocate the mblk for the last header fragment, 219 * expect to bcopy into pre-allocated page-aligned 220 * buffer 221 */ 222 hdr_new_mp = allocb(hdr_frag_len, NULL); 223 if (!hdr_new_mp) 224 return (mp); 225 bcopy(hdr_nmp->b_rptr, hdr_new_mp->b_rptr, 226 hdr_frag_len); 227 /* link the new header fragment with the other parts */ 228 hdr_new_mp->b_wptr = hdr_new_mp->b_rptr + hdr_frag_len; 229 hdr_new_mp->b_cont = hdr_nmp; 230 if (hdr_pre_mp) 231 hdr_pre_mp->b_cont = hdr_new_mp; 232 else 233 mp = hdr_new_mp; 234 hdr_nmp->b_rptr += hdr_frag_len; 235 } 236 adjust_threshold: 237 /* 238 * adjust the bcopy threshhold to guarantee 239 * the header to use bcopy way 240 */ 241 if (copy_thresh < hdr_len) 242 copy_thresh = hdr_len; 243 } 244 245 current_mp = mp; 246 current_len = MBLKL(current_mp); 247 /* 248 * Decide which method to use for the first fragment 249 */ 250 current_flag = (current_len <= copy_thresh) ? 251 USE_COPY : USE_DMA; 252 /* 253 * If the mblk includes several contiguous small fragments, 254 * they may be copied into one buffer. This flag is used to 255 * indicate whether there are pending fragments that need to 256 * be copied to the current tx buffer. 257 * 258 * If this flag is B_TRUE, it indicates that a new tx control 259 * block is needed to process the next fragment using either 260 * copy or DMA binding. 261 * 262 * Otherwise, it indicates that the next fragment will be 263 * copied to the current tx buffer that is maintained by the 264 * current tx control block. No new tx control block is needed. 265 */ 266 copy_done = B_TRUE; 267 while (current_mp) { 268 next_mp = current_mp->b_cont; 269 eop = (next_mp == NULL); /* Last fragment of the packet? */ 270 next_len = eop ? 0: MBLKL(next_mp); 271 272 /* 273 * When the current fragment is an empty fragment, if 274 * the next fragment will still be copied to the current 275 * tx buffer, we cannot skip this fragment here. Because 276 * the copy processing is pending for completion. We have 277 * to process this empty fragment in the tx_copy routine. 278 * 279 * If the copy processing is completed or a DMA binding 280 * processing is just completed, we can just skip this 281 * empty fragment. 282 */ 283 if ((current_len == 0) && (copy_done)) { 284 current_mp = next_mp; 285 current_len = next_len; 286 current_flag = (current_len <= copy_thresh) ? 287 USE_COPY : USE_DMA; 288 continue; 289 } 290 291 if (copy_done) { 292 /* 293 * Get a new tx control block from the free list 294 */ 295 tcb = ixgbe_get_free_list(tx_ring); 296 297 if (tcb == NULL) { 298 IXGBE_DEBUG_STAT(tx_ring->stat_fail_no_tcb); 299 goto tx_failure; 300 } 301 302 /* 303 * Push the tx control block to the pending list 304 * to avoid using lock too early 305 */ 306 LIST_PUSH_TAIL(&pending_list, &tcb->link); 307 } 308 309 if (current_flag == USE_COPY) { 310 /* 311 * Check whether to use bcopy or DMA binding to process 312 * the next fragment, and if using bcopy, whether we 313 * need to continue copying the next fragment into the 314 * current tx buffer. 315 */ 316 ASSERT((tcb->tx_buf.len + current_len) <= 317 tcb->tx_buf.size); 318 319 if (eop) { 320 /* 321 * This is the last fragment of the packet, so 322 * the copy processing will be completed with 323 * this fragment. 324 */ 325 next_flag = USE_NONE; 326 copy_done = B_TRUE; 327 } else if ((tcb->tx_buf.len + current_len + next_len) > 328 tcb->tx_buf.size) { 329 /* 330 * If the next fragment is too large to be 331 * copied to the current tx buffer, we need 332 * to complete the current copy processing. 333 */ 334 next_flag = (next_len > copy_thresh) ? 335 USE_DMA: USE_COPY; 336 copy_done = B_TRUE; 337 } else if (next_len > copy_thresh) { 338 /* 339 * The next fragment needs to be processed with 340 * DMA binding. So the copy prcessing will be 341 * completed with the current fragment. 342 */ 343 next_flag = USE_DMA; 344 copy_done = B_TRUE; 345 } else { 346 /* 347 * Continue to copy the next fragment to the 348 * current tx buffer. 349 */ 350 next_flag = USE_COPY; 351 copy_done = B_FALSE; 352 } 353 354 desc_num = ixgbe_tx_copy(tx_ring, tcb, current_mp, 355 current_len, copy_done); 356 } else { 357 /* 358 * Check whether to use bcopy or DMA binding to process 359 * the next fragment. 360 */ 361 next_flag = (next_len > copy_thresh) ? 362 USE_DMA: USE_COPY; 363 ASSERT(copy_done == B_TRUE); 364 365 desc_num = ixgbe_tx_bind(tx_ring, tcb, current_mp, 366 current_len); 367 } 368 369 if (desc_num > 0) 370 desc_total += desc_num; 371 else if (desc_num < 0) 372 goto tx_failure; 373 374 current_mp = next_mp; 375 current_len = next_len; 376 current_flag = next_flag; 377 } 378 379 /* 380 * Attach the mblk to the last tx control block 381 */ 382 ASSERT(tcb); 383 ASSERT(tcb->mp == NULL); 384 tcb->mp = mp; 385 386 /* 387 * 82598/82599 chipset has a limitation that no more than 32 tx 388 * descriptors can be transmited out at one time. 389 * 390 * Here is a workaround for it: pull up the mblk then send it 391 * out with bind way. By doing so, no more than MAX_COOKIE (18) 392 * descriptors is needed. 393 */ 394 if (desc_total + 1 > IXGBE_TX_DESC_LIMIT) { 395 IXGBE_DEBUG_STAT(tx_ring->stat_break_tbd_limit); 396 397 /* 398 * Discard the mblk and free the used resources 399 */ 400 tcb = (tx_control_block_t *)LIST_GET_HEAD(&pending_list); 401 while (tcb) { 402 tcb->mp = NULL; 403 ixgbe_free_tcb(tcb); 404 tcb = (tx_control_block_t *) 405 LIST_GET_NEXT(&pending_list, &tcb->link); 406 } 407 408 /* 409 * Return the tx control blocks in the pending list to 410 * the free list. 411 */ 412 ixgbe_put_free_list(tx_ring, &pending_list); 413 414 /* 415 * pull up the mblk and send it out with bind way 416 */ 417 if ((pull_mp = msgpullup(mp, -1)) == NULL) { 418 tx_ring->reschedule = B_TRUE; 419 420 /* 421 * If new mblk has been allocted for the last header 422 * fragment of a LSO packet, we should restore the 423 * modified mp. 424 */ 425 if (hdr_new_mp) { 426 hdr_new_mp->b_cont = NULL; 427 freeb(hdr_new_mp); 428 hdr_nmp->b_rptr -= hdr_frag_len; 429 if (hdr_pre_mp) 430 hdr_pre_mp->b_cont = hdr_nmp; 431 else 432 mp = hdr_nmp; 433 } 434 return (mp); 435 } 436 437 LINK_LIST_INIT(&pending_list); 438 desc_total = 0; 439 440 /* 441 * if the packet is a LSO packet, we simply 442 * transmit the header in one descriptor using the copy way 443 */ 444 if ((ctx != NULL) && ctx->lso_flag) { 445 hdr_len = ctx->ip_hdr_len + ctx->mac_hdr_len + 446 ctx->l4_hdr_len; 447 448 tcb = ixgbe_get_free_list(tx_ring); 449 if (tcb == NULL) { 450 IXGBE_DEBUG_STAT(tx_ring->stat_fail_no_tcb); 451 goto tx_failure; 452 } 453 desc_num = ixgbe_tx_copy(tx_ring, tcb, pull_mp, 454 hdr_len, B_TRUE); 455 LIST_PUSH_TAIL(&pending_list, &tcb->link); 456 desc_total += desc_num; 457 458 pull_mp->b_rptr += hdr_len; 459 } 460 461 tcb = ixgbe_get_free_list(tx_ring); 462 if (tcb == NULL) { 463 IXGBE_DEBUG_STAT(tx_ring->stat_fail_no_tcb); 464 goto tx_failure; 465 } 466 if ((ctx != NULL) && ctx->lso_flag) { 467 desc_num = ixgbe_tx_bind(tx_ring, tcb, pull_mp, 468 mbsize - hdr_len); 469 } else { 470 desc_num = ixgbe_tx_bind(tx_ring, tcb, pull_mp, 471 mbsize); 472 } 473 if (desc_num < 0) { 474 goto tx_failure; 475 } 476 LIST_PUSH_TAIL(&pending_list, &tcb->link); 477 478 desc_total += desc_num; 479 tcb->mp = pull_mp; 480 } 481 482 /* 483 * Before fill the tx descriptor ring with the data, we need to 484 * ensure there are adequate free descriptors for transmit 485 * (including one context descriptor). 486 * Do not use up all the tx descriptors. 487 * Otherwise tx recycle will fail and cause false hang. 488 */ 489 if (tx_ring->tbd_free <= (desc_total + 1)) { 490 tx_ring->tx_recycle(tx_ring); 491 } 492 493 mutex_enter(&tx_ring->tx_lock); 494 /* 495 * If the number of free tx descriptors is not enough for transmit 496 * then return mp. 497 * 498 * Note: we must put this check under the mutex protection to 499 * ensure the correctness when multiple threads access it in 500 * parallel. 501 */ 502 if (tx_ring->tbd_free <= (desc_total + 1)) { 503 IXGBE_DEBUG_STAT(tx_ring->stat_fail_no_tbd); 504 mutex_exit(&tx_ring->tx_lock); 505 goto tx_failure; 506 } 507 508 desc_num = ixgbe_tx_fill_ring(tx_ring, &pending_list, ctx, 509 mbsize); 510 511 ASSERT((desc_num == desc_total) || (desc_num == (desc_total + 1))); 512 513 tx_ring->stat_obytes += mbsize; 514 tx_ring->stat_opackets ++; 515 516 mutex_exit(&tx_ring->tx_lock); 517 518 /* 519 * now that the transmission succeeds, need to free the original 520 * mp if we used the pulling up mblk for transmission. 521 */ 522 if (pull_mp) { 523 freemsg(mp); 524 } 525 526 return (NULL); 527 528 tx_failure: 529 /* 530 * If transmission fails, need to free the pulling up mblk. 531 */ 532 if (pull_mp) { 533 freemsg(pull_mp); 534 } 535 536 /* 537 * If new mblk has been allocted for the last header 538 * fragment of a LSO packet, we should restore the 539 * modified mp. 540 */ 541 if (hdr_new_mp) { 542 hdr_new_mp->b_cont = NULL; 543 freeb(hdr_new_mp); 544 hdr_nmp->b_rptr -= hdr_frag_len; 545 if (hdr_pre_mp) 546 hdr_pre_mp->b_cont = hdr_nmp; 547 else 548 mp = hdr_nmp; 549 } 550 /* 551 * Discard the mblk and free the used resources 552 */ 553 tcb = (tx_control_block_t *)LIST_GET_HEAD(&pending_list); 554 while (tcb) { 555 tcb->mp = NULL; 556 557 ixgbe_free_tcb(tcb); 558 559 tcb = (tx_control_block_t *) 560 LIST_GET_NEXT(&pending_list, &tcb->link); 561 } 562 563 /* 564 * Return the tx control blocks in the pending list to the free list. 565 */ 566 ixgbe_put_free_list(tx_ring, &pending_list); 567 568 /* Transmit failed, do not drop the mblk, rechedule the transmit */ 569 tx_ring->reschedule = B_TRUE; 570 571 return (mp); 572 } 573 574 /* 575 * ixgbe_tx_copy 576 * 577 * Copy the mblk fragment to the pre-allocated tx buffer 578 */ 579 static int 580 ixgbe_tx_copy(ixgbe_tx_ring_t *tx_ring, tx_control_block_t *tcb, mblk_t *mp, 581 uint32_t len, boolean_t copy_done) 582 { 583 dma_buffer_t *tx_buf; 584 uint32_t desc_num; 585 _NOTE(ARGUNUSED(tx_ring)); 586 587 tx_buf = &tcb->tx_buf; 588 589 /* 590 * Copy the packet data of the mblk fragment into the 591 * pre-allocated tx buffer, which is maintained by the 592 * tx control block. 593 * 594 * Several mblk fragments can be copied into one tx buffer. 595 * The destination address of the current copied fragment in 596 * the tx buffer is next to the end of the previous copied 597 * fragment. 598 */ 599 if (len > 0) { 600 bcopy(mp->b_rptr, tx_buf->address + tx_buf->len, len); 601 602 tx_buf->len += len; 603 tcb->frag_num++; 604 } 605 606 desc_num = 0; 607 608 /* 609 * If it is the last fragment copied to the current tx buffer, 610 * in other words, if there's no remaining fragment or the remaining 611 * fragment requires a new tx control block to process, we need to 612 * complete the current copy processing by syncing up the current 613 * DMA buffer and saving the descriptor data. 614 */ 615 if (copy_done) { 616 /* 617 * Sync the DMA buffer of the packet data 618 */ 619 DMA_SYNC(tx_buf, DDI_DMA_SYNC_FORDEV); 620 621 tcb->tx_type = USE_COPY; 622 623 /* 624 * Save the address and length to the private data structure 625 * of the tx control block, which will be used to fill the 626 * tx descriptor ring after all the fragments are processed. 627 */ 628 ixgbe_save_desc(tcb, tx_buf->dma_address, tx_buf->len); 629 desc_num++; 630 } 631 632 return (desc_num); 633 } 634 635 /* 636 * ixgbe_tx_bind 637 * 638 * Bind the mblk fragment with DMA 639 */ 640 static int 641 ixgbe_tx_bind(ixgbe_tx_ring_t *tx_ring, tx_control_block_t *tcb, mblk_t *mp, 642 uint32_t len) 643 { 644 int status, i; 645 ddi_dma_cookie_t dma_cookie; 646 uint_t ncookies; 647 int desc_num; 648 649 /* 650 * Use DMA binding to process the mblk fragment 651 */ 652 status = ddi_dma_addr_bind_handle(tcb->tx_dma_handle, NULL, 653 (caddr_t)mp->b_rptr, len, 654 DDI_DMA_WRITE | DDI_DMA_STREAMING, DDI_DMA_DONTWAIT, 655 0, &dma_cookie, &ncookies); 656 657 if (status != DDI_DMA_MAPPED) { 658 IXGBE_DEBUG_STAT(tx_ring->stat_fail_dma_bind); 659 return (-1); 660 } 661 662 tcb->frag_num++; 663 tcb->tx_type = USE_DMA; 664 /* 665 * Each fragment can span several cookies. One cookie will have 666 * one tx descriptor to transmit. 667 */ 668 desc_num = 0; 669 for (i = ncookies; i > 0; i--) { 670 /* 671 * Save the address and length to the private data structure 672 * of the tx control block, which will be used to fill the 673 * tx descriptor ring after all the fragments are processed. 674 */ 675 ixgbe_save_desc(tcb, 676 dma_cookie.dmac_laddress, 677 dma_cookie.dmac_size); 678 679 desc_num++; 680 681 if (i > 1) 682 ddi_dma_nextcookie(tcb->tx_dma_handle, &dma_cookie); 683 } 684 685 return (desc_num); 686 } 687 688 /* 689 * ixgbe_get_context 690 * 691 * Get the context information from the mblk 692 */ 693 static int 694 ixgbe_get_context(mblk_t *mp, ixgbe_tx_context_t *ctx) 695 { 696 uint32_t start; 697 uint32_t hckflags; 698 uint32_t lsoflags; 699 uint32_t mss; 700 uint32_t len; 701 uint32_t size; 702 uint32_t offset; 703 unsigned char *pos; 704 ushort_t etype; 705 uint32_t mac_hdr_len; 706 uint32_t l4_proto; 707 uint32_t l4_hdr_len; 708 709 ASSERT(mp != NULL); 710 711 mac_hcksum_get(mp, &start, NULL, NULL, NULL, &hckflags); 712 bzero(ctx, sizeof (ixgbe_tx_context_t)); 713 714 if (hckflags == 0) { 715 return (0); 716 } 717 718 ctx->hcksum_flags = hckflags; 719 720 mac_lso_get(mp, &mss, &lsoflags); 721 ctx->mss = mss; 722 ctx->lso_flag = (lsoflags == HW_LSO); 723 724 /* 725 * LSO relies on tx h/w checksum, so here will drop the package 726 * if h/w checksum flag is not declared. 727 */ 728 if (ctx->lso_flag) { 729 if (!((ctx->hcksum_flags & HCK_PARTIALCKSUM) && 730 (ctx->hcksum_flags & HCK_IPV4_HDRCKSUM))) { 731 IXGBE_DEBUGLOG_0(NULL, "ixgbe_tx: h/w " 732 "checksum flags are not specified when doing LSO"); 733 return (-1); 734 } 735 } 736 737 etype = 0; 738 mac_hdr_len = 0; 739 l4_proto = 0; 740 741 /* 742 * Firstly get the position of the ether_type/ether_tpid. 743 * Here we don't assume the ether (VLAN) header is fully included 744 * in one mblk fragment, so we go thourgh the fragments to parse 745 * the ether type. 746 */ 747 size = len = MBLKL(mp); 748 offset = offsetof(struct ether_header, ether_type); 749 while (size <= offset) { 750 mp = mp->b_cont; 751 ASSERT(mp != NULL); 752 len = MBLKL(mp); 753 size += len; 754 } 755 pos = mp->b_rptr + offset + len - size; 756 757 etype = ntohs(*(ushort_t *)(uintptr_t)pos); 758 if (etype == ETHERTYPE_VLAN) { 759 /* 760 * Get the position of the ether_type in VLAN header 761 */ 762 offset = offsetof(struct ether_vlan_header, ether_type); 763 while (size <= offset) { 764 mp = mp->b_cont; 765 ASSERT(mp != NULL); 766 len = MBLKL(mp); 767 size += len; 768 } 769 pos = mp->b_rptr + offset + len - size; 770 771 etype = ntohs(*(ushort_t *)(uintptr_t)pos); 772 mac_hdr_len = sizeof (struct ether_vlan_header); 773 } else { 774 mac_hdr_len = sizeof (struct ether_header); 775 } 776 777 /* 778 * Here we don't assume the IP(V6) header is fully included in 779 * one mblk fragment. 780 */ 781 switch (etype) { 782 case ETHERTYPE_IP: 783 if (ctx->lso_flag) { 784 offset = offsetof(ipha_t, ipha_length) + mac_hdr_len; 785 while (size <= offset) { 786 mp = mp->b_cont; 787 ASSERT(mp != NULL); 788 len = MBLKL(mp); 789 size += len; 790 } 791 pos = mp->b_rptr + offset + len - size; 792 *((uint16_t *)(uintptr_t)(pos)) = 0; 793 794 offset = offsetof(ipha_t, ipha_hdr_checksum) + 795 mac_hdr_len; 796 while (size <= offset) { 797 mp = mp->b_cont; 798 ASSERT(mp != NULL); 799 len = MBLKL(mp); 800 size += len; 801 } 802 pos = mp->b_rptr + offset + len - size; 803 *((uint16_t *)(uintptr_t)(pos)) = 0; 804 805 /* 806 * To perform ixgbe LSO, here also need to fill 807 * the tcp checksum field of the packet with the 808 * following pseudo-header checksum: 809 * (ip_source_addr, ip_destination_addr, l4_proto) 810 * Currently the tcp/ip stack has done it. 811 */ 812 } 813 814 offset = offsetof(ipha_t, ipha_protocol) + mac_hdr_len; 815 while (size <= offset) { 816 mp = mp->b_cont; 817 ASSERT(mp != NULL); 818 len = MBLKL(mp); 819 size += len; 820 } 821 pos = mp->b_rptr + offset + len - size; 822 823 l4_proto = *(uint8_t *)pos; 824 break; 825 case ETHERTYPE_IPV6: 826 offset = offsetof(ip6_t, ip6_nxt) + mac_hdr_len; 827 while (size <= offset) { 828 mp = mp->b_cont; 829 ASSERT(mp != NULL); 830 len = MBLKL(mp); 831 size += len; 832 } 833 pos = mp->b_rptr + offset + len - size; 834 835 l4_proto = *(uint8_t *)pos; 836 break; 837 default: 838 /* Unrecoverable error */ 839 IXGBE_DEBUGLOG_0(NULL, "Ether type error with tx hcksum"); 840 return (-2); 841 } 842 843 if (ctx->lso_flag) { 844 offset = mac_hdr_len + start; 845 while (size <= offset) { 846 mp = mp->b_cont; 847 ASSERT(mp != NULL); 848 len = MBLKL(mp); 849 size += len; 850 } 851 pos = mp->b_rptr + offset + len - size; 852 853 l4_hdr_len = TCP_HDR_LENGTH((tcph_t *)pos); 854 } else { 855 /* 856 * l4 header length is only required for LSO 857 */ 858 l4_hdr_len = 0; 859 } 860 861 ctx->mac_hdr_len = mac_hdr_len; 862 ctx->ip_hdr_len = start; 863 ctx->l4_proto = l4_proto; 864 ctx->l4_hdr_len = l4_hdr_len; 865 866 return (0); 867 } 868 869 /* 870 * ixgbe_check_context 871 * 872 * Check if a new context descriptor is needed 873 */ 874 static boolean_t 875 ixgbe_check_context(ixgbe_tx_ring_t *tx_ring, ixgbe_tx_context_t *ctx) 876 { 877 ixgbe_tx_context_t *last; 878 879 if (ctx == NULL) 880 return (B_FALSE); 881 882 /* 883 * Compare the context data retrieved from the mblk and the 884 * stored data of the last context descriptor. The data need 885 * to be checked are: 886 * hcksum_flags 887 * l4_proto 888 * mac_hdr_len 889 * ip_hdr_len 890 * lso_flag 891 * mss (only checked for LSO) 892 * l4_hr_len (only checked for LSO) 893 * Either one of the above data is changed, a new context descriptor 894 * will be needed. 895 */ 896 last = &tx_ring->tx_context; 897 898 if ((ctx->hcksum_flags != last->hcksum_flags) || 899 (ctx->l4_proto != last->l4_proto) || 900 (ctx->mac_hdr_len != last->mac_hdr_len) || 901 (ctx->ip_hdr_len != last->ip_hdr_len) || 902 (ctx->lso_flag != last->lso_flag) || 903 (ctx->lso_flag && ((ctx->mss != last->mss) || 904 (ctx->l4_hdr_len != last->l4_hdr_len)))) { 905 return (B_TRUE); 906 } 907 908 return (B_FALSE); 909 } 910 911 /* 912 * ixgbe_fill_context 913 * 914 * Fill the context descriptor with hardware checksum informations 915 */ 916 static void 917 ixgbe_fill_context(struct ixgbe_adv_tx_context_desc *ctx_tbd, 918 ixgbe_tx_context_t *ctx) 919 { 920 /* 921 * Fill the context descriptor with the checksum 922 * context information we've got. 923 */ 924 ctx_tbd->vlan_macip_lens = ctx->ip_hdr_len; 925 ctx_tbd->vlan_macip_lens |= ctx->mac_hdr_len << 926 IXGBE_ADVTXD_MACLEN_SHIFT; 927 928 ctx_tbd->type_tucmd_mlhl = 929 IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT; 930 931 if (ctx->hcksum_flags & HCK_IPV4_HDRCKSUM) 932 ctx_tbd->type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4; 933 934 if (ctx->hcksum_flags & HCK_PARTIALCKSUM) { 935 switch (ctx->l4_proto) { 936 case IPPROTO_TCP: 937 ctx_tbd->type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP; 938 break; 939 case IPPROTO_UDP: 940 /* 941 * We don't have to explicitly set: 942 * ctx_tbd->type_tucmd_mlhl |= 943 * IXGBE_ADVTXD_TUCMD_L4T_UDP; 944 * Because IXGBE_ADVTXD_TUCMD_L4T_UDP == 0b 945 */ 946 break; 947 default: 948 /* Unrecoverable error */ 949 IXGBE_DEBUGLOG_0(NULL, "L4 type error with tx hcksum"); 950 break; 951 } 952 } 953 954 ctx_tbd->seqnum_seed = 0; 955 956 if (ctx->lso_flag) { 957 ctx_tbd->mss_l4len_idx = 958 (ctx->l4_hdr_len << IXGBE_ADVTXD_L4LEN_SHIFT) | 959 (ctx->mss << IXGBE_ADVTXD_MSS_SHIFT); 960 } else { 961 ctx_tbd->mss_l4len_idx = 0; 962 } 963 } 964 965 /* 966 * ixgbe_tx_fill_ring 967 * 968 * Fill the tx descriptor ring with the data 969 */ 970 static int 971 ixgbe_tx_fill_ring(ixgbe_tx_ring_t *tx_ring, link_list_t *pending_list, 972 ixgbe_tx_context_t *ctx, size_t mbsize) 973 { 974 struct ixgbe_hw *hw = &tx_ring->ixgbe->hw; 975 boolean_t load_context; 976 uint32_t index, tcb_index, desc_num; 977 union ixgbe_adv_tx_desc *tbd, *first_tbd; 978 tx_control_block_t *tcb, *first_tcb; 979 uint32_t hcksum_flags; 980 int i; 981 982 ASSERT(mutex_owned(&tx_ring->tx_lock)); 983 984 tbd = NULL; 985 first_tbd = NULL; 986 first_tcb = NULL; 987 desc_num = 0; 988 hcksum_flags = 0; 989 load_context = B_FALSE; 990 991 /* 992 * Get the index of the first tx descriptor that will be filled, 993 * and the index of the first work list item that will be attached 994 * with the first used tx control block in the pending list. 995 * Note: the two indexes are the same. 996 */ 997 index = tx_ring->tbd_tail; 998 tcb_index = tx_ring->tbd_tail; 999 1000 if (ctx != NULL) { 1001 hcksum_flags = ctx->hcksum_flags; 1002 1003 /* 1004 * Check if a new context descriptor is needed for this packet 1005 */ 1006 load_context = ixgbe_check_context(tx_ring, ctx); 1007 1008 if (load_context) { 1009 tbd = &tx_ring->tbd_ring[index]; 1010 1011 /* 1012 * Fill the context descriptor with the 1013 * hardware checksum offload informations. 1014 */ 1015 ixgbe_fill_context( 1016 (struct ixgbe_adv_tx_context_desc *)tbd, ctx); 1017 1018 index = NEXT_INDEX(index, 1, tx_ring->ring_size); 1019 desc_num++; 1020 1021 /* 1022 * Store the checksum context data if 1023 * a new context descriptor is added 1024 */ 1025 tx_ring->tx_context = *ctx; 1026 } 1027 } 1028 1029 first_tbd = &tx_ring->tbd_ring[index]; 1030 1031 /* 1032 * Fill tx data descriptors with the data saved in the pending list. 1033 * The tx control blocks in the pending list are added to the work list 1034 * at the same time. 1035 * 1036 * The work list is strictly 1:1 corresponding to the descriptor ring. 1037 * One item of the work list corresponds to one tx descriptor. Because 1038 * one tx control block can span multiple tx descriptors, the tx 1039 * control block will be added to the first work list item that 1040 * corresponds to the first tx descriptor generated from that tx 1041 * control block. 1042 */ 1043 tcb = (tx_control_block_t *)LIST_POP_HEAD(pending_list); 1044 first_tcb = tcb; 1045 while (tcb != NULL) { 1046 1047 for (i = 0; i < tcb->desc_num; i++) { 1048 tbd = &tx_ring->tbd_ring[index]; 1049 1050 tbd->read.buffer_addr = tcb->desc[i].address; 1051 tbd->read.cmd_type_len = tcb->desc[i].length; 1052 1053 tbd->read.cmd_type_len |= IXGBE_ADVTXD_DCMD_DEXT 1054 | IXGBE_ADVTXD_DTYP_DATA; 1055 1056 tbd->read.olinfo_status = 0; 1057 1058 index = NEXT_INDEX(index, 1, tx_ring->ring_size); 1059 desc_num++; 1060 } 1061 1062 /* 1063 * Add the tx control block to the work list 1064 */ 1065 ASSERT(tx_ring->work_list[tcb_index] == NULL); 1066 tx_ring->work_list[tcb_index] = tcb; 1067 1068 tcb_index = index; 1069 tcb = (tx_control_block_t *)LIST_POP_HEAD(pending_list); 1070 } 1071 1072 if (load_context) { 1073 /* 1074 * Count the context descriptor for 1075 * the first tx control block. 1076 */ 1077 first_tcb->desc_num++; 1078 } 1079 first_tcb->last_index = PREV_INDEX(index, 1, tx_ring->ring_size); 1080 1081 /* 1082 * The Insert Ethernet CRC (IFCS) bit and the checksum fields are only 1083 * valid in the first descriptor of the packet. 1084 * Setting paylen in every first_tbd for all parts. 1085 * 82599, X540 and X550 require the packet length in paylen field 1086 * with or without LSO and 82598 will ignore it in non-LSO mode. 1087 */ 1088 ASSERT(first_tbd != NULL); 1089 first_tbd->read.cmd_type_len |= IXGBE_ADVTXD_DCMD_IFCS; 1090 1091 switch (hw->mac.type) { 1092 case ixgbe_mac_82598EB: 1093 if (ctx != NULL && ctx->lso_flag) { 1094 first_tbd->read.cmd_type_len |= IXGBE_ADVTXD_DCMD_TSE; 1095 first_tbd->read.olinfo_status |= 1096 (mbsize - ctx->mac_hdr_len - ctx->ip_hdr_len 1097 - ctx->l4_hdr_len) << IXGBE_ADVTXD_PAYLEN_SHIFT; 1098 } 1099 break; 1100 1101 case ixgbe_mac_82599EB: 1102 case ixgbe_mac_X540: 1103 case ixgbe_mac_X550: 1104 case ixgbe_mac_X550EM_x: 1105 if (ctx != NULL && ctx->lso_flag) { 1106 first_tbd->read.cmd_type_len |= IXGBE_ADVTXD_DCMD_TSE; 1107 first_tbd->read.olinfo_status |= 1108 (mbsize - ctx->mac_hdr_len - ctx->ip_hdr_len 1109 - ctx->l4_hdr_len) << IXGBE_ADVTXD_PAYLEN_SHIFT; 1110 } else { 1111 first_tbd->read.olinfo_status |= 1112 (mbsize << IXGBE_ADVTXD_PAYLEN_SHIFT); 1113 } 1114 break; 1115 1116 default: 1117 break; 1118 } 1119 1120 /* Set hardware checksum bits */ 1121 if (hcksum_flags != 0) { 1122 if (hcksum_flags & HCK_IPV4_HDRCKSUM) 1123 first_tbd->read.olinfo_status |= 1124 IXGBE_ADVTXD_POPTS_IXSM; 1125 if (hcksum_flags & HCK_PARTIALCKSUM) 1126 first_tbd->read.olinfo_status |= 1127 IXGBE_ADVTXD_POPTS_TXSM; 1128 } 1129 1130 /* 1131 * The last descriptor of packet needs End Of Packet (EOP), 1132 * and Report Status (RS) bits set 1133 */ 1134 ASSERT(tbd != NULL); 1135 tbd->read.cmd_type_len |= 1136 IXGBE_ADVTXD_DCMD_EOP | IXGBE_ADVTXD_DCMD_RS; 1137 1138 /* 1139 * Sync the DMA buffer of the tx descriptor ring 1140 */ 1141 DMA_SYNC(&tx_ring->tbd_area, DDI_DMA_SYNC_FORDEV); 1142 1143 /* 1144 * Update the number of the free tx descriptors. 1145 * The mutual exclusion between the transmission and the recycling 1146 * (for the tx descriptor ring and the work list) is implemented 1147 * with the atomic operation on the number of the free tx descriptors. 1148 * 1149 * Note: we should always decrement the counter tbd_free before 1150 * advancing the hardware TDT pointer to avoid the race condition - 1151 * before the counter tbd_free is decremented, the transmit of the 1152 * tx descriptors has done and the counter tbd_free is increased by 1153 * the tx recycling. 1154 */ 1155 i = ixgbe_atomic_reserve(&tx_ring->tbd_free, desc_num); 1156 ASSERT(i >= 0); 1157 1158 tx_ring->tbd_tail = index; 1159 1160 /* 1161 * Advance the hardware TDT pointer of the tx descriptor ring 1162 */ 1163 IXGBE_WRITE_REG(hw, IXGBE_TDT(tx_ring->index), index); 1164 1165 if (ixgbe_check_acc_handle(tx_ring->ixgbe->osdep.reg_handle) != 1166 DDI_FM_OK) { 1167 ddi_fm_service_impact(tx_ring->ixgbe->dip, 1168 DDI_SERVICE_DEGRADED); 1169 atomic_or_32(&tx_ring->ixgbe->ixgbe_state, IXGBE_ERROR); 1170 } 1171 1172 return (desc_num); 1173 } 1174 1175 /* 1176 * ixgbe_save_desc 1177 * 1178 * Save the address/length pair to the private array 1179 * of the tx control block. The address/length pairs 1180 * will be filled into the tx descriptor ring later. 1181 */ 1182 static void 1183 ixgbe_save_desc(tx_control_block_t *tcb, uint64_t address, size_t length) 1184 { 1185 sw_desc_t *desc; 1186 1187 desc = &tcb->desc[tcb->desc_num]; 1188 desc->address = address; 1189 desc->length = length; 1190 1191 tcb->desc_num++; 1192 } 1193 1194 /* 1195 * ixgbe_tx_recycle_legacy 1196 * 1197 * Recycle the tx descriptors and tx control blocks. 1198 * 1199 * The work list is traversed to check if the corresponding 1200 * tx descriptors have been transmitted. If so, the resources 1201 * bound to the tx control blocks will be freed, and those 1202 * tx control blocks will be returned to the free list. 1203 */ 1204 uint32_t 1205 ixgbe_tx_recycle_legacy(ixgbe_tx_ring_t *tx_ring) 1206 { 1207 uint32_t index, last_index, prev_index; 1208 int desc_num; 1209 boolean_t desc_done; 1210 tx_control_block_t *tcb; 1211 link_list_t pending_list; 1212 ixgbe_t *ixgbe = tx_ring->ixgbe; 1213 1214 mutex_enter(&tx_ring->recycle_lock); 1215 1216 ASSERT(tx_ring->tbd_free <= tx_ring->ring_size); 1217 1218 if (tx_ring->tbd_free == tx_ring->ring_size) { 1219 tx_ring->recycle_fail = 0; 1220 tx_ring->stall_watchdog = 0; 1221 if (tx_ring->reschedule) { 1222 tx_ring->reschedule = B_FALSE; 1223 mac_tx_ring_update(ixgbe->mac_hdl, 1224 tx_ring->ring_handle); 1225 } 1226 mutex_exit(&tx_ring->recycle_lock); 1227 return (0); 1228 } 1229 1230 /* 1231 * Sync the DMA buffer of the tx descriptor ring 1232 */ 1233 DMA_SYNC(&tx_ring->tbd_area, DDI_DMA_SYNC_FORKERNEL); 1234 1235 if (ixgbe_check_dma_handle(tx_ring->tbd_area.dma_handle) != DDI_FM_OK) { 1236 mutex_exit(&tx_ring->recycle_lock); 1237 ddi_fm_service_impact(ixgbe->dip, DDI_SERVICE_DEGRADED); 1238 atomic_or_32(&ixgbe->ixgbe_state, IXGBE_ERROR); 1239 return (0); 1240 } 1241 1242 LINK_LIST_INIT(&pending_list); 1243 desc_num = 0; 1244 index = tx_ring->tbd_head; /* Index of next tbd/tcb to recycle */ 1245 1246 tcb = tx_ring->work_list[index]; 1247 ASSERT(tcb != NULL); 1248 1249 while (tcb != NULL) { 1250 /* 1251 * Get the last tx descriptor of this packet. 1252 * If the last tx descriptor is done, then 1253 * we can recycle all descriptors of a packet 1254 * which usually includes several tx control blocks. 1255 * For 82599, LSO descriptors can not be recycled 1256 * unless the whole packet's transmission is done. 1257 * That's why packet level recycling is used here. 1258 * For 82598, there's not such limit. 1259 */ 1260 last_index = tcb->last_index; 1261 /* 1262 * MAX_TX_RING_SIZE is used to judge whether 1263 * the index is a valid value or not. 1264 */ 1265 if (last_index == MAX_TX_RING_SIZE) 1266 break; 1267 1268 /* 1269 * Check if the Descriptor Done bit is set 1270 */ 1271 desc_done = tx_ring->tbd_ring[last_index].wb.status & 1272 IXGBE_TXD_STAT_DD; 1273 if (desc_done) { 1274 /* 1275 * recycle all descriptors of the packet 1276 */ 1277 while (tcb != NULL) { 1278 /* 1279 * Strip off the tx control block from 1280 * the work list, and add it to the 1281 * pending list. 1282 */ 1283 tx_ring->work_list[index] = NULL; 1284 LIST_PUSH_TAIL(&pending_list, &tcb->link); 1285 1286 /* 1287 * Count the total number of the tx 1288 * descriptors recycled 1289 */ 1290 desc_num += tcb->desc_num; 1291 1292 index = NEXT_INDEX(index, tcb->desc_num, 1293 tx_ring->ring_size); 1294 1295 tcb = tx_ring->work_list[index]; 1296 1297 prev_index = PREV_INDEX(index, 1, 1298 tx_ring->ring_size); 1299 if (prev_index == last_index) 1300 break; 1301 } 1302 } else { 1303 break; 1304 } 1305 } 1306 1307 /* 1308 * If no tx descriptors are recycled, no need to do more processing 1309 */ 1310 if (desc_num == 0) { 1311 tx_ring->recycle_fail++; 1312 mutex_exit(&tx_ring->recycle_lock); 1313 return (0); 1314 } 1315 1316 tx_ring->recycle_fail = 0; 1317 tx_ring->stall_watchdog = 0; 1318 1319 /* 1320 * Update the head index of the tx descriptor ring 1321 */ 1322 tx_ring->tbd_head = index; 1323 1324 /* 1325 * Update the number of the free tx descriptors with atomic operations 1326 */ 1327 atomic_add_32(&tx_ring->tbd_free, desc_num); 1328 1329 if ((tx_ring->tbd_free >= ixgbe->tx_resched_thresh) && 1330 (tx_ring->reschedule)) { 1331 tx_ring->reschedule = B_FALSE; 1332 mac_tx_ring_update(ixgbe->mac_hdl, 1333 tx_ring->ring_handle); 1334 } 1335 mutex_exit(&tx_ring->recycle_lock); 1336 1337 /* 1338 * Free the resources used by the tx control blocks 1339 * in the pending list 1340 */ 1341 tcb = (tx_control_block_t *)LIST_GET_HEAD(&pending_list); 1342 while (tcb != NULL) { 1343 /* 1344 * Release the resources occupied by the tx control block 1345 */ 1346 ixgbe_free_tcb(tcb); 1347 1348 tcb = (tx_control_block_t *) 1349 LIST_GET_NEXT(&pending_list, &tcb->link); 1350 } 1351 1352 /* 1353 * Add the tx control blocks in the pending list to the free list. 1354 */ 1355 ixgbe_put_free_list(tx_ring, &pending_list); 1356 1357 return (desc_num); 1358 } 1359 1360 /* 1361 * ixgbe_tx_recycle_head_wb 1362 * 1363 * Check the head write-back, and recycle all the transmitted 1364 * tx descriptors and tx control blocks. 1365 */ 1366 uint32_t 1367 ixgbe_tx_recycle_head_wb(ixgbe_tx_ring_t *tx_ring) 1368 { 1369 uint32_t index; 1370 uint32_t head_wb; 1371 int desc_num; 1372 tx_control_block_t *tcb; 1373 link_list_t pending_list; 1374 ixgbe_t *ixgbe = tx_ring->ixgbe; 1375 1376 mutex_enter(&tx_ring->recycle_lock); 1377 1378 ASSERT(tx_ring->tbd_free <= tx_ring->ring_size); 1379 1380 if (tx_ring->tbd_free == tx_ring->ring_size) { 1381 tx_ring->recycle_fail = 0; 1382 tx_ring->stall_watchdog = 0; 1383 if (tx_ring->reschedule) { 1384 tx_ring->reschedule = B_FALSE; 1385 mac_tx_ring_update(ixgbe->mac_hdl, 1386 tx_ring->ring_handle); 1387 } 1388 mutex_exit(&tx_ring->recycle_lock); 1389 return (0); 1390 } 1391 1392 /* 1393 * Sync the DMA buffer of the tx descriptor ring 1394 * 1395 * Note: For head write-back mode, the tx descriptors will not 1396 * be written back, but the head write-back value is stored at 1397 * the last extra tbd at the end of the DMA area, we still need 1398 * to sync the head write-back value for kernel. 1399 * 1400 * DMA_SYNC(&tx_ring->tbd_area, DDI_DMA_SYNC_FORKERNEL); 1401 */ 1402 (void) ddi_dma_sync(tx_ring->tbd_area.dma_handle, 1403 sizeof (union ixgbe_adv_tx_desc) * tx_ring->ring_size, 1404 sizeof (uint32_t), 1405 DDI_DMA_SYNC_FORKERNEL); 1406 1407 if (ixgbe_check_dma_handle(tx_ring->tbd_area.dma_handle) != DDI_FM_OK) { 1408 mutex_exit(&tx_ring->recycle_lock); 1409 ddi_fm_service_impact(ixgbe->dip, 1410 DDI_SERVICE_DEGRADED); 1411 atomic_or_32(&ixgbe->ixgbe_state, IXGBE_ERROR); 1412 return (0); 1413 } 1414 1415 LINK_LIST_INIT(&pending_list); 1416 desc_num = 0; 1417 index = tx_ring->tbd_head; /* Next index to clean */ 1418 1419 /* 1420 * Get the value of head write-back 1421 */ 1422 head_wb = *tx_ring->tbd_head_wb; 1423 while (index != head_wb) { 1424 tcb = tx_ring->work_list[index]; 1425 ASSERT(tcb != NULL); 1426 1427 if (OFFSET(index, head_wb, tx_ring->ring_size) < 1428 tcb->desc_num) { 1429 /* 1430 * The current tx control block is not 1431 * completely transmitted, stop recycling 1432 */ 1433 break; 1434 } 1435 1436 /* 1437 * Strip off the tx control block from the work list, 1438 * and add it to the pending list. 1439 */ 1440 tx_ring->work_list[index] = NULL; 1441 LIST_PUSH_TAIL(&pending_list, &tcb->link); 1442 1443 /* 1444 * Advance the index of the tx descriptor ring 1445 */ 1446 index = NEXT_INDEX(index, tcb->desc_num, tx_ring->ring_size); 1447 1448 /* 1449 * Count the total number of the tx descriptors recycled 1450 */ 1451 desc_num += tcb->desc_num; 1452 } 1453 1454 /* 1455 * If no tx descriptors are recycled, no need to do more processing 1456 */ 1457 if (desc_num == 0) { 1458 tx_ring->recycle_fail++; 1459 mutex_exit(&tx_ring->recycle_lock); 1460 return (0); 1461 } 1462 1463 tx_ring->recycle_fail = 0; 1464 tx_ring->stall_watchdog = 0; 1465 1466 /* 1467 * Update the head index of the tx descriptor ring 1468 */ 1469 tx_ring->tbd_head = index; 1470 1471 /* 1472 * Update the number of the free tx descriptors with atomic operations 1473 */ 1474 atomic_add_32(&tx_ring->tbd_free, desc_num); 1475 1476 if ((tx_ring->tbd_free >= ixgbe->tx_resched_thresh) && 1477 (tx_ring->reschedule)) { 1478 tx_ring->reschedule = B_FALSE; 1479 mac_tx_ring_update(ixgbe->mac_hdl, 1480 tx_ring->ring_handle); 1481 } 1482 mutex_exit(&tx_ring->recycle_lock); 1483 1484 /* 1485 * Free the resources used by the tx control blocks 1486 * in the pending list 1487 */ 1488 tcb = (tx_control_block_t *)LIST_GET_HEAD(&pending_list); 1489 while (tcb) { 1490 /* 1491 * Release the resources occupied by the tx control block 1492 */ 1493 ixgbe_free_tcb(tcb); 1494 1495 tcb = (tx_control_block_t *) 1496 LIST_GET_NEXT(&pending_list, &tcb->link); 1497 } 1498 1499 /* 1500 * Add the tx control blocks in the pending list to the free list. 1501 */ 1502 ixgbe_put_free_list(tx_ring, &pending_list); 1503 1504 return (desc_num); 1505 } 1506 1507 /* 1508 * ixgbe_free_tcb - free up the tx control block 1509 * 1510 * Free the resources of the tx control block, including 1511 * unbind the previously bound DMA handle, and reset other 1512 * control fields. 1513 */ 1514 void 1515 ixgbe_free_tcb(tx_control_block_t *tcb) 1516 { 1517 switch (tcb->tx_type) { 1518 case USE_COPY: 1519 /* 1520 * Reset the buffer length that is used for copy 1521 */ 1522 tcb->tx_buf.len = 0; 1523 break; 1524 case USE_DMA: 1525 /* 1526 * Release the DMA resource that is used for 1527 * DMA binding. 1528 */ 1529 (void) ddi_dma_unbind_handle(tcb->tx_dma_handle); 1530 break; 1531 default: 1532 break; 1533 } 1534 1535 /* 1536 * Free the mblk 1537 */ 1538 if (tcb->mp != NULL) { 1539 freemsg(tcb->mp); 1540 tcb->mp = NULL; 1541 } 1542 1543 tcb->tx_type = USE_NONE; 1544 tcb->last_index = MAX_TX_RING_SIZE; 1545 tcb->frag_num = 0; 1546 tcb->desc_num = 0; 1547 } 1548 1549 /* 1550 * ixgbe_get_free_list - Get a free tx control block from the free list 1551 * 1552 * The atomic operation on the number of the available tx control block 1553 * in the free list is used to keep this routine mutual exclusive with 1554 * the routine ixgbe_put_check_list. 1555 */ 1556 static tx_control_block_t * 1557 ixgbe_get_free_list(ixgbe_tx_ring_t *tx_ring) 1558 { 1559 tx_control_block_t *tcb; 1560 1561 /* 1562 * Check and update the number of the free tx control block 1563 * in the free list. 1564 */ 1565 if (ixgbe_atomic_reserve(&tx_ring->tcb_free, 1) < 0) 1566 return (NULL); 1567 1568 mutex_enter(&tx_ring->tcb_head_lock); 1569 1570 tcb = tx_ring->free_list[tx_ring->tcb_head]; 1571 ASSERT(tcb != NULL); 1572 tx_ring->free_list[tx_ring->tcb_head] = NULL; 1573 tx_ring->tcb_head = NEXT_INDEX(tx_ring->tcb_head, 1, 1574 tx_ring->free_list_size); 1575 1576 mutex_exit(&tx_ring->tcb_head_lock); 1577 1578 return (tcb); 1579 } 1580 1581 /* 1582 * ixgbe_put_free_list 1583 * 1584 * Put a list of used tx control blocks back to the free list 1585 * 1586 * A mutex is used here to ensure the serialization. The mutual exclusion 1587 * between ixgbe_get_free_list and ixgbe_put_free_list is implemented with 1588 * the atomic operation on the counter tcb_free. 1589 */ 1590 void 1591 ixgbe_put_free_list(ixgbe_tx_ring_t *tx_ring, link_list_t *pending_list) 1592 { 1593 uint32_t index; 1594 int tcb_num; 1595 tx_control_block_t *tcb; 1596 1597 mutex_enter(&tx_ring->tcb_tail_lock); 1598 1599 index = tx_ring->tcb_tail; 1600 1601 tcb_num = 0; 1602 tcb = (tx_control_block_t *)LIST_POP_HEAD(pending_list); 1603 while (tcb != NULL) { 1604 ASSERT(tx_ring->free_list[index] == NULL); 1605 tx_ring->free_list[index] = tcb; 1606 1607 tcb_num++; 1608 1609 index = NEXT_INDEX(index, 1, tx_ring->free_list_size); 1610 1611 tcb = (tx_control_block_t *)LIST_POP_HEAD(pending_list); 1612 } 1613 1614 tx_ring->tcb_tail = index; 1615 1616 /* 1617 * Update the number of the free tx control block 1618 * in the free list. This operation must be placed 1619 * under the protection of the lock. 1620 */ 1621 atomic_add_32(&tx_ring->tcb_free, tcb_num); 1622 1623 mutex_exit(&tx_ring->tcb_tail_lock); 1624 }