1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright(c) 2007-2010 Intel Corporation. All rights reserved. 24 */ 25 26 /* 27 * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. 28 * Copyright 2012 Nexenta Systems, Inc. All rights reserved. 29 */ 30 31 #include "ixgbe_sw.h" 32 33 static int ixgbe_tx_copy(ixgbe_tx_ring_t *, tx_control_block_t *, mblk_t *, 34 uint32_t, boolean_t); 35 static int ixgbe_tx_bind(ixgbe_tx_ring_t *, tx_control_block_t *, mblk_t *, 36 uint32_t); 37 static int ixgbe_tx_fill_ring(ixgbe_tx_ring_t *, link_list_t *, 38 ixgbe_tx_context_t *, size_t); 39 static void ixgbe_save_desc(tx_control_block_t *, uint64_t, size_t); 40 static tx_control_block_t *ixgbe_get_free_list(ixgbe_tx_ring_t *); 41 42 static int ixgbe_get_context(mblk_t *, ixgbe_tx_context_t *); 43 static boolean_t ixgbe_check_context(ixgbe_tx_ring_t *, 44 ixgbe_tx_context_t *); 45 static void ixgbe_fill_context(struct ixgbe_adv_tx_context_desc *, 46 ixgbe_tx_context_t *); 47 48 #ifndef IXGBE_DEBUG 49 #pragma inline(ixgbe_save_desc) 50 #pragma inline(ixgbe_get_context) 51 #pragma inline(ixgbe_check_context) 52 #pragma inline(ixgbe_fill_context) 53 #endif 54 55 /* 56 * ixgbe_ring_tx 57 * 58 * To transmit one mblk through one specified ring. 59 * 60 * One mblk can consist of several fragments, each fragment 61 * will be processed with different methods based on the size. 62 * For the fragments with size less than the bcopy threshold, 63 * they will be processed by using bcopy; otherwise, they will 64 * be processed by using DMA binding. 65 * 66 * To process the mblk, a tx control block is got from the 67 * free list. One tx control block contains one tx buffer, which 68 * is used to copy mblk fragments' data; and one tx DMA handle, 69 * which is used to bind a mblk fragment with DMA resource. 70 * 71 * Several small mblk fragments can be copied into one tx control 72 * block's buffer, and then the buffer will be transmitted with 73 * one tx descriptor. 74 * 75 * A large fragment only binds with one tx control block's DMA 76 * handle, and it can span several tx descriptors for transmitting. 77 * 78 * So to transmit a packet (mblk), several tx control blocks can 79 * be used. After the processing, those tx control blocks will 80 * be put to the work list. 81 */ 82 mblk_t * 83 ixgbe_ring_tx(void *arg, mblk_t *mp) 84 { 85 ixgbe_tx_ring_t *tx_ring = (ixgbe_tx_ring_t *)arg; 86 ixgbe_t *ixgbe = tx_ring->ixgbe; 87 tx_type_t current_flag, next_flag; 88 uint32_t current_len, next_len; 89 uint32_t desc_total; 90 size_t mbsize; 91 int desc_num; 92 boolean_t copy_done, eop; 93 mblk_t *current_mp, *next_mp, *nmp, *pull_mp = NULL; 94 tx_control_block_t *tcb; 95 ixgbe_tx_context_t tx_context, *ctx; 96 link_list_t pending_list; 97 uint32_t len, hdr_frag_len, hdr_len; 98 uint32_t copy_thresh; 99 mblk_t *hdr_new_mp = NULL; 100 mblk_t *hdr_pre_mp = NULL; 101 mblk_t *hdr_nmp = NULL; 102 103 ASSERT(mp->b_next == NULL); 104 105 if ((ixgbe->ixgbe_state & IXGBE_SUSPENDED) || 106 (ixgbe->ixgbe_state & IXGBE_ERROR) || 107 (ixgbe->ixgbe_state & IXGBE_OVERTEMP) || 108 !(ixgbe->ixgbe_state & IXGBE_STARTED) || 109 ixgbe->link_state != LINK_STATE_UP) { 110 freemsg(mp); 111 return (NULL); 112 } 113 114 copy_thresh = ixgbe->tx_copy_thresh; 115 116 /* Get the mblk size */ 117 mbsize = 0; 118 for (nmp = mp; nmp != NULL; nmp = nmp->b_cont) { 119 mbsize += MBLKL(nmp); 120 } 121 122 if (ixgbe->tx_hcksum_enable) { 123 /* 124 * Retrieve checksum context information from the mblk 125 * that will be used to decide whether/how to fill the 126 * context descriptor. 127 */ 128 ctx = &tx_context; 129 if (ixgbe_get_context(mp, ctx) < 0) { 130 freemsg(mp); 131 return (NULL); 132 } 133 134 /* 135 * If the mblk size exceeds the max size ixgbe could 136 * process, then discard this mblk, and return NULL. 137 */ 138 if ((ctx->lso_flag && 139 ((mbsize - ctx->mac_hdr_len) > IXGBE_LSO_MAXLEN)) || 140 (!ctx->lso_flag && 141 (mbsize > (ixgbe->max_frame_size - ETHERFCSL)))) { 142 freemsg(mp); 143 IXGBE_DEBUGLOG_0(ixgbe, "ixgbe_tx: packet oversize"); 144 return (NULL); 145 } 146 } else { 147 ctx = NULL; 148 } 149 150 /* 151 * Check and recycle tx descriptors. 152 * The recycle threshold here should be selected carefully 153 */ 154 if (tx_ring->tbd_free < ixgbe->tx_recycle_thresh) { 155 tx_ring->tx_recycle(tx_ring); 156 } 157 158 /* 159 * After the recycling, if the tbd_free is less than the 160 * overload_threshold, assert overload, return mp; 161 * and we need to re-schedule the tx again. 162 */ 163 if (tx_ring->tbd_free < ixgbe->tx_overload_thresh) { 164 tx_ring->reschedule = B_TRUE; 165 IXGBE_DEBUG_STAT(tx_ring->stat_overload); 166 return (mp); 167 } 168 169 /* 170 * The pending_list is a linked list that is used to save 171 * the tx control blocks that have packet data processed 172 * but have not put the data to the tx descriptor ring. 173 * It is used to reduce the lock contention of the tx_lock. 174 */ 175 LINK_LIST_INIT(&pending_list); 176 desc_num = 0; 177 desc_total = 0; 178 179 /* 180 * The software should guarantee LSO packet header(MAC+IP+TCP) 181 * to be within one descriptor. Here we reallocate and refill the 182 * the header if it's physical memory non-contiguous. 183 */ 184 if ((ctx != NULL) && ctx->lso_flag) { 185 /* find the last fragment of the header */ 186 len = MBLKL(mp); 187 ASSERT(len > 0); 188 hdr_nmp = mp; 189 hdr_len = ctx->ip_hdr_len + ctx->mac_hdr_len + ctx->l4_hdr_len; 190 while (len < hdr_len) { 191 hdr_pre_mp = hdr_nmp; 192 hdr_nmp = hdr_nmp->b_cont; 193 len += MBLKL(hdr_nmp); 194 } 195 /* 196 * If the header and the payload are in different mblks, 197 * we simply force the header to be copied into pre-allocated 198 * page-aligned buffer. 199 */ 200 if (len == hdr_len) 201 goto adjust_threshold; 202 203 hdr_frag_len = hdr_len - (len - MBLKL(hdr_nmp)); 204 /* 205 * There are two cases we need to reallocate a mblk for the 206 * last header fragment: 207 * 1. the header is in multiple mblks and the last fragment 208 * share the same mblk with the payload 209 * 2. the header is in a single mblk shared with the payload 210 * and the header is physical memory non-contiguous 211 */ 212 if ((hdr_nmp != mp) || 213 (P2NPHASE((uintptr_t)hdr_nmp->b_rptr, ixgbe->sys_page_size) 214 < hdr_len)) { 215 IXGBE_DEBUG_STAT(tx_ring->stat_lso_header_fail); 216 /* 217 * reallocate the mblk for the last header fragment, 218 * expect to bcopy into pre-allocated page-aligned 219 * buffer 220 */ 221 hdr_new_mp = allocb(hdr_frag_len, NULL); 222 if (!hdr_new_mp) 223 return (mp); 224 bcopy(hdr_nmp->b_rptr, hdr_new_mp->b_rptr, 225 hdr_frag_len); 226 /* link the new header fragment with the other parts */ 227 hdr_new_mp->b_wptr = hdr_new_mp->b_rptr + hdr_frag_len; 228 hdr_new_mp->b_cont = hdr_nmp; 229 if (hdr_pre_mp) 230 hdr_pre_mp->b_cont = hdr_new_mp; 231 else 232 mp = hdr_new_mp; 233 hdr_nmp->b_rptr += hdr_frag_len; 234 } 235 adjust_threshold: 236 /* 237 * adjust the bcopy threshhold to guarantee 238 * the header to use bcopy way 239 */ 240 if (copy_thresh < hdr_len) 241 copy_thresh = hdr_len; 242 } 243 244 current_mp = mp; 245 current_len = MBLKL(current_mp); 246 /* 247 * Decide which method to use for the first fragment 248 */ 249 current_flag = (current_len <= copy_thresh) ? 250 USE_COPY : USE_DMA; 251 /* 252 * If the mblk includes several contiguous small fragments, 253 * they may be copied into one buffer. This flag is used to 254 * indicate whether there are pending fragments that need to 255 * be copied to the current tx buffer. 256 * 257 * If this flag is B_TRUE, it indicates that a new tx control 258 * block is needed to process the next fragment using either 259 * copy or DMA binding. 260 * 261 * Otherwise, it indicates that the next fragment will be 262 * copied to the current tx buffer that is maintained by the 263 * current tx control block. No new tx control block is needed. 264 */ 265 copy_done = B_TRUE; 266 while (current_mp) { 267 next_mp = current_mp->b_cont; 268 eop = (next_mp == NULL); /* Last fragment of the packet? */ 269 next_len = eop ? 0: MBLKL(next_mp); 270 271 /* 272 * When the current fragment is an empty fragment, if 273 * the next fragment will still be copied to the current 274 * tx buffer, we cannot skip this fragment here. Because 275 * the copy processing is pending for completion. We have 276 * to process this empty fragment in the tx_copy routine. 277 * 278 * If the copy processing is completed or a DMA binding 279 * processing is just completed, we can just skip this 280 * empty fragment. 281 */ 282 if ((current_len == 0) && (copy_done)) { 283 current_mp = next_mp; 284 current_len = next_len; 285 current_flag = (current_len <= copy_thresh) ? 286 USE_COPY : USE_DMA; 287 continue; 288 } 289 290 if (copy_done) { 291 /* 292 * Get a new tx control block from the free list 293 */ 294 tcb = ixgbe_get_free_list(tx_ring); 295 296 if (tcb == NULL) { 297 IXGBE_DEBUG_STAT(tx_ring->stat_fail_no_tcb); 298 goto tx_failure; 299 } 300 301 /* 302 * Push the tx control block to the pending list 303 * to avoid using lock too early 304 */ 305 LIST_PUSH_TAIL(&pending_list, &tcb->link); 306 } 307 308 if (current_flag == USE_COPY) { 309 /* 310 * Check whether to use bcopy or DMA binding to process 311 * the next fragment, and if using bcopy, whether we 312 * need to continue copying the next fragment into the 313 * current tx buffer. 314 */ 315 ASSERT((tcb->tx_buf.len + current_len) <= 316 tcb->tx_buf.size); 317 318 if (eop) { 319 /* 320 * This is the last fragment of the packet, so 321 * the copy processing will be completed with 322 * this fragment. 323 */ 324 next_flag = USE_NONE; 325 copy_done = B_TRUE; 326 } else if ((tcb->tx_buf.len + current_len + next_len) > 327 tcb->tx_buf.size) { 328 /* 329 * If the next fragment is too large to be 330 * copied to the current tx buffer, we need 331 * to complete the current copy processing. 332 */ 333 next_flag = (next_len > copy_thresh) ? 334 USE_DMA: USE_COPY; 335 copy_done = B_TRUE; 336 } else if (next_len > copy_thresh) { 337 /* 338 * The next fragment needs to be processed with 339 * DMA binding. So the copy prcessing will be 340 * completed with the current fragment. 341 */ 342 next_flag = USE_DMA; 343 copy_done = B_TRUE; 344 } else { 345 /* 346 * Continue to copy the next fragment to the 347 * current tx buffer. 348 */ 349 next_flag = USE_COPY; 350 copy_done = B_FALSE; 351 } 352 353 desc_num = ixgbe_tx_copy(tx_ring, tcb, current_mp, 354 current_len, copy_done); 355 } else { 356 /* 357 * Check whether to use bcopy or DMA binding to process 358 * the next fragment. 359 */ 360 next_flag = (next_len > copy_thresh) ? 361 USE_DMA: USE_COPY; 362 ASSERT(copy_done == B_TRUE); 363 364 desc_num = ixgbe_tx_bind(tx_ring, tcb, current_mp, 365 current_len); 366 } 367 368 if (desc_num > 0) 369 desc_total += desc_num; 370 else if (desc_num < 0) 371 goto tx_failure; 372 373 current_mp = next_mp; 374 current_len = next_len; 375 current_flag = next_flag; 376 } 377 378 /* 379 * Attach the mblk to the last tx control block 380 */ 381 ASSERT(tcb); 382 ASSERT(tcb->mp == NULL); 383 tcb->mp = mp; 384 385 /* 386 * 82598/82599 chipset has a limitation that no more than 32 tx 387 * descriptors can be transmited out at one time. 388 * 389 * Here is a workaround for it: pull up the mblk then send it 390 * out with bind way. By doing so, no more than MAX_COOKIE (18) 391 * descriptors is needed. 392 */ 393 if (desc_total + 1 > IXGBE_TX_DESC_LIMIT) { 394 IXGBE_DEBUG_STAT(tx_ring->stat_break_tbd_limit); 395 396 /* 397 * Discard the mblk and free the used resources 398 */ 399 tcb = (tx_control_block_t *)LIST_GET_HEAD(&pending_list); 400 while (tcb) { 401 tcb->mp = NULL; 402 ixgbe_free_tcb(tcb); 403 tcb = (tx_control_block_t *) 404 LIST_GET_NEXT(&pending_list, &tcb->link); 405 } 406 407 /* 408 * Return the tx control blocks in the pending list to 409 * the free list. 410 */ 411 ixgbe_put_free_list(tx_ring, &pending_list); 412 413 /* 414 * pull up the mblk and send it out with bind way 415 */ 416 if ((pull_mp = msgpullup(mp, -1)) == NULL) { 417 tx_ring->reschedule = B_TRUE; 418 419 /* 420 * If new mblk has been allocted for the last header 421 * fragment of a LSO packet, we should restore the 422 * modified mp. 423 */ 424 if (hdr_new_mp) { 425 hdr_new_mp->b_cont = NULL; 426 freeb(hdr_new_mp); 427 hdr_nmp->b_rptr -= hdr_frag_len; 428 if (hdr_pre_mp) 429 hdr_pre_mp->b_cont = hdr_nmp; 430 else 431 mp = hdr_nmp; 432 } 433 return (mp); 434 } 435 436 LINK_LIST_INIT(&pending_list); 437 desc_total = 0; 438 439 /* 440 * if the packet is a LSO packet, we simply 441 * transmit the header in one descriptor using the copy way 442 */ 443 if ((ctx != NULL) && ctx->lso_flag) { 444 hdr_len = ctx->ip_hdr_len + ctx->mac_hdr_len + 445 ctx->l4_hdr_len; 446 447 tcb = ixgbe_get_free_list(tx_ring); 448 if (tcb == NULL) { 449 IXGBE_DEBUG_STAT(tx_ring->stat_fail_no_tcb); 450 goto tx_failure; 451 } 452 desc_num = ixgbe_tx_copy(tx_ring, tcb, pull_mp, 453 hdr_len, B_TRUE); 454 LIST_PUSH_TAIL(&pending_list, &tcb->link); 455 desc_total += desc_num; 456 457 pull_mp->b_rptr += hdr_len; 458 } 459 460 tcb = ixgbe_get_free_list(tx_ring); 461 if (tcb == NULL) { 462 IXGBE_DEBUG_STAT(tx_ring->stat_fail_no_tcb); 463 goto tx_failure; 464 } 465 if ((ctx != NULL) && ctx->lso_flag) { 466 desc_num = ixgbe_tx_bind(tx_ring, tcb, pull_mp, 467 mbsize - hdr_len); 468 } else { 469 desc_num = ixgbe_tx_bind(tx_ring, tcb, pull_mp, 470 mbsize); 471 } 472 if (desc_num < 0) { 473 goto tx_failure; 474 } 475 LIST_PUSH_TAIL(&pending_list, &tcb->link); 476 477 desc_total += desc_num; 478 tcb->mp = pull_mp; 479 } 480 481 /* 482 * Before fill the tx descriptor ring with the data, we need to 483 * ensure there are adequate free descriptors for transmit 484 * (including one context descriptor). 485 * Do not use up all the tx descriptors. 486 * Otherwise tx recycle will fail and cause false hang. 487 */ 488 if (tx_ring->tbd_free <= (desc_total + 1)) { 489 tx_ring->tx_recycle(tx_ring); 490 } 491 492 mutex_enter(&tx_ring->tx_lock); 493 /* 494 * If the number of free tx descriptors is not enough for transmit 495 * then return mp. 496 * 497 * Note: we must put this check under the mutex protection to 498 * ensure the correctness when multiple threads access it in 499 * parallel. 500 */ 501 if (tx_ring->tbd_free <= (desc_total + 1)) { 502 IXGBE_DEBUG_STAT(tx_ring->stat_fail_no_tbd); 503 mutex_exit(&tx_ring->tx_lock); 504 goto tx_failure; 505 } 506 507 desc_num = ixgbe_tx_fill_ring(tx_ring, &pending_list, ctx, 508 mbsize); 509 510 ASSERT((desc_num == desc_total) || (desc_num == (desc_total + 1))); 511 512 tx_ring->stat_obytes += mbsize; 513 tx_ring->stat_opackets ++; 514 515 mutex_exit(&tx_ring->tx_lock); 516 517 /* 518 * now that the transmission succeeds, need to free the original 519 * mp if we used the pulling up mblk for transmission. 520 */ 521 if (pull_mp) { 522 freemsg(mp); 523 } 524 525 return (NULL); 526 527 tx_failure: 528 /* 529 * If transmission fails, need to free the pulling up mblk. 530 */ 531 if (pull_mp) { 532 freemsg(pull_mp); 533 } 534 535 /* 536 * If new mblk has been allocted for the last header 537 * fragment of a LSO packet, we should restore the 538 * modified mp. 539 */ 540 if (hdr_new_mp) { 541 hdr_new_mp->b_cont = NULL; 542 freeb(hdr_new_mp); 543 hdr_nmp->b_rptr -= hdr_frag_len; 544 if (hdr_pre_mp) 545 hdr_pre_mp->b_cont = hdr_nmp; 546 else 547 mp = hdr_nmp; 548 } 549 /* 550 * Discard the mblk and free the used resources 551 */ 552 tcb = (tx_control_block_t *)LIST_GET_HEAD(&pending_list); 553 while (tcb) { 554 tcb->mp = NULL; 555 556 ixgbe_free_tcb(tcb); 557 558 tcb = (tx_control_block_t *) 559 LIST_GET_NEXT(&pending_list, &tcb->link); 560 } 561 562 /* 563 * Return the tx control blocks in the pending list to the free list. 564 */ 565 ixgbe_put_free_list(tx_ring, &pending_list); 566 567 /* Transmit failed, do not drop the mblk, rechedule the transmit */ 568 tx_ring->reschedule = B_TRUE; 569 570 return (mp); 571 } 572 573 /* 574 * ixgbe_tx_copy 575 * 576 * Copy the mblk fragment to the pre-allocated tx buffer 577 */ 578 static int 579 ixgbe_tx_copy(ixgbe_tx_ring_t *tx_ring, tx_control_block_t *tcb, mblk_t *mp, 580 uint32_t len, boolean_t copy_done) 581 { 582 dma_buffer_t *tx_buf; 583 uint32_t desc_num; 584 _NOTE(ARGUNUSED(tx_ring)); 585 586 tx_buf = &tcb->tx_buf; 587 588 /* 589 * Copy the packet data of the mblk fragment into the 590 * pre-allocated tx buffer, which is maintained by the 591 * tx control block. 592 * 593 * Several mblk fragments can be copied into one tx buffer. 594 * The destination address of the current copied fragment in 595 * the tx buffer is next to the end of the previous copied 596 * fragment. 597 */ 598 if (len > 0) { 599 bcopy(mp->b_rptr, tx_buf->address + tx_buf->len, len); 600 601 tx_buf->len += len; 602 tcb->frag_num++; 603 } 604 605 desc_num = 0; 606 607 /* 608 * If it is the last fragment copied to the current tx buffer, 609 * in other words, if there's no remaining fragment or the remaining 610 * fragment requires a new tx control block to process, we need to 611 * complete the current copy processing by syncing up the current 612 * DMA buffer and saving the descriptor data. 613 */ 614 if (copy_done) { 615 /* 616 * Sync the DMA buffer of the packet data 617 */ 618 DMA_SYNC(tx_buf, DDI_DMA_SYNC_FORDEV); 619 620 tcb->tx_type = USE_COPY; 621 622 /* 623 * Save the address and length to the private data structure 624 * of the tx control block, which will be used to fill the 625 * tx descriptor ring after all the fragments are processed. 626 */ 627 ixgbe_save_desc(tcb, tx_buf->dma_address, tx_buf->len); 628 desc_num++; 629 } 630 631 return (desc_num); 632 } 633 634 /* 635 * ixgbe_tx_bind 636 * 637 * Bind the mblk fragment with DMA 638 */ 639 static int 640 ixgbe_tx_bind(ixgbe_tx_ring_t *tx_ring, tx_control_block_t *tcb, mblk_t *mp, 641 uint32_t len) 642 { 643 int status, i; 644 ddi_dma_cookie_t dma_cookie; 645 uint_t ncookies; 646 int desc_num; 647 648 /* 649 * Use DMA binding to process the mblk fragment 650 */ 651 status = ddi_dma_addr_bind_handle(tcb->tx_dma_handle, NULL, 652 (caddr_t)mp->b_rptr, len, 653 DDI_DMA_WRITE | DDI_DMA_STREAMING, DDI_DMA_DONTWAIT, 654 0, &dma_cookie, &ncookies); 655 656 if (status != DDI_DMA_MAPPED) { 657 IXGBE_DEBUG_STAT(tx_ring->stat_fail_dma_bind); 658 return (-1); 659 } 660 661 tcb->frag_num++; 662 tcb->tx_type = USE_DMA; 663 /* 664 * Each fragment can span several cookies. One cookie will have 665 * one tx descriptor to transmit. 666 */ 667 desc_num = 0; 668 for (i = ncookies; i > 0; i--) { 669 /* 670 * Save the address and length to the private data structure 671 * of the tx control block, which will be used to fill the 672 * tx descriptor ring after all the fragments are processed. 673 */ 674 ixgbe_save_desc(tcb, 675 dma_cookie.dmac_laddress, 676 dma_cookie.dmac_size); 677 678 desc_num++; 679 680 if (i > 1) 681 ddi_dma_nextcookie(tcb->tx_dma_handle, &dma_cookie); 682 } 683 684 return (desc_num); 685 } 686 687 /* 688 * ixgbe_get_context 689 * 690 * Get the context information from the mblk 691 */ 692 static int 693 ixgbe_get_context(mblk_t *mp, ixgbe_tx_context_t *ctx) 694 { 695 uint32_t start; 696 uint32_t hckflags; 697 uint32_t lsoflags; 698 uint32_t mss; 699 uint32_t len; 700 uint32_t size; 701 uint32_t offset; 702 unsigned char *pos; 703 ushort_t etype; 704 uint32_t mac_hdr_len; 705 uint32_t l4_proto; 706 uint32_t l4_hdr_len; 707 708 ASSERT(mp != NULL); 709 710 mac_hcksum_get(mp, &start, NULL, NULL, NULL, &hckflags); 711 bzero(ctx, sizeof (ixgbe_tx_context_t)); 712 713 if (hckflags == 0) { 714 return (0); 715 } 716 717 ctx->hcksum_flags = hckflags; 718 719 mac_lso_get(mp, &mss, &lsoflags); 720 ctx->mss = mss; 721 ctx->lso_flag = (lsoflags == HW_LSO); 722 723 /* 724 * LSO relies on tx h/w checksum, so here will drop the package 725 * if h/w checksum flag is not declared. 726 */ 727 if (ctx->lso_flag) { 728 if (!((ctx->hcksum_flags & HCK_PARTIALCKSUM) && 729 (ctx->hcksum_flags & HCK_IPV4_HDRCKSUM))) { 730 IXGBE_DEBUGLOG_0(NULL, "ixgbe_tx: h/w " 731 "checksum flags are not specified when doing LSO"); 732 return (-1); 733 } 734 } 735 736 etype = 0; 737 mac_hdr_len = 0; 738 l4_proto = 0; 739 740 /* 741 * Firstly get the position of the ether_type/ether_tpid. 742 * Here we don't assume the ether (VLAN) header is fully included 743 * in one mblk fragment, so we go thourgh the fragments to parse 744 * the ether type. 745 */ 746 size = len = MBLKL(mp); 747 offset = offsetof(struct ether_header, ether_type); 748 while (size <= offset) { 749 mp = mp->b_cont; 750 ASSERT(mp != NULL); 751 len = MBLKL(mp); 752 size += len; 753 } 754 pos = mp->b_rptr + offset + len - size; 755 756 etype = ntohs(*(ushort_t *)(uintptr_t)pos); 757 if (etype == ETHERTYPE_VLAN) { 758 /* 759 * Get the position of the ether_type in VLAN header 760 */ 761 offset = offsetof(struct ether_vlan_header, ether_type); 762 while (size <= offset) { 763 mp = mp->b_cont; 764 ASSERT(mp != NULL); 765 len = MBLKL(mp); 766 size += len; 767 } 768 pos = mp->b_rptr + offset + len - size; 769 770 etype = ntohs(*(ushort_t *)(uintptr_t)pos); 771 mac_hdr_len = sizeof (struct ether_vlan_header); 772 } else { 773 mac_hdr_len = sizeof (struct ether_header); 774 } 775 776 /* 777 * Here we don't assume the IP(V6) header is fully included in 778 * one mblk fragment. 779 */ 780 switch (etype) { 781 case ETHERTYPE_IP: 782 if (ctx->lso_flag) { 783 offset = offsetof(ipha_t, ipha_length) + mac_hdr_len; 784 while (size <= offset) { 785 mp = mp->b_cont; 786 ASSERT(mp != NULL); 787 len = MBLKL(mp); 788 size += len; 789 } 790 pos = mp->b_rptr + offset + len - size; 791 *((uint16_t *)(uintptr_t)(pos)) = 0; 792 793 offset = offsetof(ipha_t, ipha_hdr_checksum) + 794 mac_hdr_len; 795 while (size <= offset) { 796 mp = mp->b_cont; 797 ASSERT(mp != NULL); 798 len = MBLKL(mp); 799 size += len; 800 } 801 pos = mp->b_rptr + offset + len - size; 802 *((uint16_t *)(uintptr_t)(pos)) = 0; 803 804 /* 805 * To perform ixgbe LSO, here also need to fill 806 * the tcp checksum field of the packet with the 807 * following pseudo-header checksum: 808 * (ip_source_addr, ip_destination_addr, l4_proto) 809 * Currently the tcp/ip stack has done it. 810 */ 811 } 812 813 offset = offsetof(ipha_t, ipha_protocol) + mac_hdr_len; 814 while (size <= offset) { 815 mp = mp->b_cont; 816 ASSERT(mp != NULL); 817 len = MBLKL(mp); 818 size += len; 819 } 820 pos = mp->b_rptr + offset + len - size; 821 822 l4_proto = *(uint8_t *)pos; 823 break; 824 case ETHERTYPE_IPV6: 825 offset = offsetof(ip6_t, ip6_nxt) + mac_hdr_len; 826 while (size <= offset) { 827 mp = mp->b_cont; 828 ASSERT(mp != NULL); 829 len = MBLKL(mp); 830 size += len; 831 } 832 pos = mp->b_rptr + offset + len - size; 833 834 l4_proto = *(uint8_t *)pos; 835 break; 836 default: 837 /* Unrecoverable error */ 838 IXGBE_DEBUGLOG_0(NULL, "Ether type error with tx hcksum"); 839 return (-2); 840 } 841 842 if (ctx->lso_flag) { 843 offset = mac_hdr_len + start; 844 while (size <= offset) { 845 mp = mp->b_cont; 846 ASSERT(mp != NULL); 847 len = MBLKL(mp); 848 size += len; 849 } 850 pos = mp->b_rptr + offset + len - size; 851 852 l4_hdr_len = TCP_HDR_LENGTH((tcph_t *)pos); 853 } else { 854 /* 855 * l4 header length is only required for LSO 856 */ 857 l4_hdr_len = 0; 858 } 859 860 ctx->mac_hdr_len = mac_hdr_len; 861 ctx->ip_hdr_len = start; 862 ctx->l4_proto = l4_proto; 863 ctx->l4_hdr_len = l4_hdr_len; 864 865 return (0); 866 } 867 868 /* 869 * ixgbe_check_context 870 * 871 * Check if a new context descriptor is needed 872 */ 873 static boolean_t 874 ixgbe_check_context(ixgbe_tx_ring_t *tx_ring, ixgbe_tx_context_t *ctx) 875 { 876 ixgbe_tx_context_t *last; 877 878 if (ctx == NULL) 879 return (B_FALSE); 880 881 /* 882 * Compare the context data retrieved from the mblk and the 883 * stored data of the last context descriptor. The data need 884 * to be checked are: 885 * hcksum_flags 886 * l4_proto 887 * mac_hdr_len 888 * ip_hdr_len 889 * lso_flag 890 * mss (only checked for LSO) 891 * l4_hr_len (only checked for LSO) 892 * Either one of the above data is changed, a new context descriptor 893 * will be needed. 894 */ 895 last = &tx_ring->tx_context; 896 897 if ((ctx->hcksum_flags != last->hcksum_flags) || 898 (ctx->l4_proto != last->l4_proto) || 899 (ctx->mac_hdr_len != last->mac_hdr_len) || 900 (ctx->ip_hdr_len != last->ip_hdr_len) || 901 (ctx->lso_flag != last->lso_flag) || 902 (ctx->lso_flag && ((ctx->mss != last->mss) || 903 (ctx->l4_hdr_len != last->l4_hdr_len)))) { 904 return (B_TRUE); 905 } 906 907 return (B_FALSE); 908 } 909 910 /* 911 * ixgbe_fill_context 912 * 913 * Fill the context descriptor with hardware checksum informations 914 */ 915 static void 916 ixgbe_fill_context(struct ixgbe_adv_tx_context_desc *ctx_tbd, 917 ixgbe_tx_context_t *ctx) 918 { 919 /* 920 * Fill the context descriptor with the checksum 921 * context information we've got. 922 */ 923 ctx_tbd->vlan_macip_lens = ctx->ip_hdr_len; 924 ctx_tbd->vlan_macip_lens |= ctx->mac_hdr_len << 925 IXGBE_ADVTXD_MACLEN_SHIFT; 926 927 ctx_tbd->type_tucmd_mlhl = 928 IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT; 929 930 if (ctx->hcksum_flags & HCK_IPV4_HDRCKSUM) 931 ctx_tbd->type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4; 932 933 if (ctx->hcksum_flags & HCK_PARTIALCKSUM) { 934 switch (ctx->l4_proto) { 935 case IPPROTO_TCP: 936 ctx_tbd->type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP; 937 break; 938 case IPPROTO_UDP: 939 /* 940 * We don't have to explicitly set: 941 * ctx_tbd->type_tucmd_mlhl |= 942 * IXGBE_ADVTXD_TUCMD_L4T_UDP; 943 * Because IXGBE_ADVTXD_TUCMD_L4T_UDP == 0b 944 */ 945 break; 946 default: 947 /* Unrecoverable error */ 948 IXGBE_DEBUGLOG_0(NULL, "L4 type error with tx hcksum"); 949 break; 950 } 951 } 952 953 ctx_tbd->seqnum_seed = 0; 954 955 if (ctx->lso_flag) { 956 ctx_tbd->mss_l4len_idx = 957 (ctx->l4_hdr_len << IXGBE_ADVTXD_L4LEN_SHIFT) | 958 (ctx->mss << IXGBE_ADVTXD_MSS_SHIFT); 959 } else { 960 ctx_tbd->mss_l4len_idx = 0; 961 } 962 } 963 964 /* 965 * ixgbe_tx_fill_ring 966 * 967 * Fill the tx descriptor ring with the data 968 */ 969 static int 970 ixgbe_tx_fill_ring(ixgbe_tx_ring_t *tx_ring, link_list_t *pending_list, 971 ixgbe_tx_context_t *ctx, size_t mbsize) 972 { 973 struct ixgbe_hw *hw = &tx_ring->ixgbe->hw; 974 boolean_t load_context; 975 uint32_t index, tcb_index, desc_num; 976 union ixgbe_adv_tx_desc *tbd, *first_tbd; 977 tx_control_block_t *tcb, *first_tcb; 978 uint32_t hcksum_flags; 979 int i; 980 981 ASSERT(mutex_owned(&tx_ring->tx_lock)); 982 983 tbd = NULL; 984 first_tbd = NULL; 985 first_tcb = NULL; 986 desc_num = 0; 987 hcksum_flags = 0; 988 load_context = B_FALSE; 989 990 /* 991 * Get the index of the first tx descriptor that will be filled, 992 * and the index of the first work list item that will be attached 993 * with the first used tx control block in the pending list. 994 * Note: the two indexes are the same. 995 */ 996 index = tx_ring->tbd_tail; 997 tcb_index = tx_ring->tbd_tail; 998 999 if (ctx != NULL) { 1000 hcksum_flags = ctx->hcksum_flags; 1001 1002 /* 1003 * Check if a new context descriptor is needed for this packet 1004 */ 1005 load_context = ixgbe_check_context(tx_ring, ctx); 1006 1007 if (load_context) { 1008 tbd = &tx_ring->tbd_ring[index]; 1009 1010 /* 1011 * Fill the context descriptor with the 1012 * hardware checksum offload informations. 1013 */ 1014 ixgbe_fill_context( 1015 (struct ixgbe_adv_tx_context_desc *)tbd, ctx); 1016 1017 index = NEXT_INDEX(index, 1, tx_ring->ring_size); 1018 desc_num++; 1019 1020 /* 1021 * Store the checksum context data if 1022 * a new context descriptor is added 1023 */ 1024 tx_ring->tx_context = *ctx; 1025 } 1026 } 1027 1028 first_tbd = &tx_ring->tbd_ring[index]; 1029 1030 /* 1031 * Fill tx data descriptors with the data saved in the pending list. 1032 * The tx control blocks in the pending list are added to the work list 1033 * at the same time. 1034 * 1035 * The work list is strictly 1:1 corresponding to the descriptor ring. 1036 * One item of the work list corresponds to one tx descriptor. Because 1037 * one tx control block can span multiple tx descriptors, the tx 1038 * control block will be added to the first work list item that 1039 * corresponds to the first tx descriptor generated from that tx 1040 * control block. 1041 */ 1042 tcb = (tx_control_block_t *)LIST_POP_HEAD(pending_list); 1043 first_tcb = tcb; 1044 while (tcb != NULL) { 1045 1046 for (i = 0; i < tcb->desc_num; i++) { 1047 tbd = &tx_ring->tbd_ring[index]; 1048 1049 tbd->read.buffer_addr = tcb->desc[i].address; 1050 tbd->read.cmd_type_len = tcb->desc[i].length; 1051 1052 tbd->read.cmd_type_len |= IXGBE_ADVTXD_DCMD_DEXT 1053 | IXGBE_ADVTXD_DTYP_DATA; 1054 1055 tbd->read.olinfo_status = 0; 1056 1057 index = NEXT_INDEX(index, 1, tx_ring->ring_size); 1058 desc_num++; 1059 } 1060 1061 /* 1062 * Add the tx control block to the work list 1063 */ 1064 ASSERT(tx_ring->work_list[tcb_index] == NULL); 1065 tx_ring->work_list[tcb_index] = tcb; 1066 1067 tcb_index = index; 1068 tcb = (tx_control_block_t *)LIST_POP_HEAD(pending_list); 1069 } 1070 1071 if (load_context) { 1072 /* 1073 * Count the context descriptor for 1074 * the first tx control block. 1075 */ 1076 first_tcb->desc_num++; 1077 } 1078 first_tcb->last_index = PREV_INDEX(index, 1, tx_ring->ring_size); 1079 1080 /* 1081 * The Insert Ethernet CRC (IFCS) bit and the checksum fields are only 1082 * valid in the first descriptor of the packet. 1083 * Setting paylen in every first_tbd for all parts. 1084 * 82599 and X540 require the packet length in paylen field with or 1085 * without LSO and 82598 will ignore it in non-LSO mode. 1086 */ 1087 ASSERT(first_tbd != NULL); 1088 first_tbd->read.cmd_type_len |= IXGBE_ADVTXD_DCMD_IFCS; 1089 1090 switch (hw->mac.type) { 1091 case ixgbe_mac_82598EB: 1092 if (ctx != NULL && ctx->lso_flag) { 1093 first_tbd->read.cmd_type_len |= IXGBE_ADVTXD_DCMD_TSE; 1094 first_tbd->read.olinfo_status |= 1095 (mbsize - ctx->mac_hdr_len - ctx->ip_hdr_len 1096 - ctx->l4_hdr_len) << IXGBE_ADVTXD_PAYLEN_SHIFT; 1097 } 1098 break; 1099 1100 case ixgbe_mac_82599EB: 1101 case ixgbe_mac_X540: 1102 if (ctx != NULL && ctx->lso_flag) { 1103 first_tbd->read.cmd_type_len |= IXGBE_ADVTXD_DCMD_TSE; 1104 first_tbd->read.olinfo_status |= 1105 (mbsize - ctx->mac_hdr_len - ctx->ip_hdr_len 1106 - ctx->l4_hdr_len) << IXGBE_ADVTXD_PAYLEN_SHIFT; 1107 } else { 1108 first_tbd->read.olinfo_status |= 1109 (mbsize << IXGBE_ADVTXD_PAYLEN_SHIFT); 1110 } 1111 break; 1112 1113 default: 1114 break; 1115 } 1116 1117 /* Set hardware checksum bits */ 1118 if (hcksum_flags != 0) { 1119 if (hcksum_flags & HCK_IPV4_HDRCKSUM) 1120 first_tbd->read.olinfo_status |= 1121 IXGBE_ADVTXD_POPTS_IXSM; 1122 if (hcksum_flags & HCK_PARTIALCKSUM) 1123 first_tbd->read.olinfo_status |= 1124 IXGBE_ADVTXD_POPTS_TXSM; 1125 } 1126 1127 /* 1128 * The last descriptor of packet needs End Of Packet (EOP), 1129 * and Report Status (RS) bits set 1130 */ 1131 ASSERT(tbd != NULL); 1132 tbd->read.cmd_type_len |= 1133 IXGBE_ADVTXD_DCMD_EOP | IXGBE_ADVTXD_DCMD_RS; 1134 1135 /* 1136 * Sync the DMA buffer of the tx descriptor ring 1137 */ 1138 DMA_SYNC(&tx_ring->tbd_area, DDI_DMA_SYNC_FORDEV); 1139 1140 /* 1141 * Update the number of the free tx descriptors. 1142 * The mutual exclusion between the transmission and the recycling 1143 * (for the tx descriptor ring and the work list) is implemented 1144 * with the atomic operation on the number of the free tx descriptors. 1145 * 1146 * Note: we should always decrement the counter tbd_free before 1147 * advancing the hardware TDT pointer to avoid the race condition - 1148 * before the counter tbd_free is decremented, the transmit of the 1149 * tx descriptors has done and the counter tbd_free is increased by 1150 * the tx recycling. 1151 */ 1152 i = ixgbe_atomic_reserve(&tx_ring->tbd_free, desc_num); 1153 ASSERT(i >= 0); 1154 1155 tx_ring->tbd_tail = index; 1156 1157 /* 1158 * Advance the hardware TDT pointer of the tx descriptor ring 1159 */ 1160 IXGBE_WRITE_REG(hw, IXGBE_TDT(tx_ring->index), index); 1161 1162 if (ixgbe_check_acc_handle(tx_ring->ixgbe->osdep.reg_handle) != 1163 DDI_FM_OK) { 1164 ddi_fm_service_impact(tx_ring->ixgbe->dip, 1165 DDI_SERVICE_DEGRADED); 1166 atomic_or_32(&tx_ring->ixgbe->ixgbe_state, IXGBE_ERROR); 1167 } 1168 1169 return (desc_num); 1170 } 1171 1172 /* 1173 * ixgbe_save_desc 1174 * 1175 * Save the address/length pair to the private array 1176 * of the tx control block. The address/length pairs 1177 * will be filled into the tx descriptor ring later. 1178 */ 1179 static void 1180 ixgbe_save_desc(tx_control_block_t *tcb, uint64_t address, size_t length) 1181 { 1182 sw_desc_t *desc; 1183 1184 desc = &tcb->desc[tcb->desc_num]; 1185 desc->address = address; 1186 desc->length = length; 1187 1188 tcb->desc_num++; 1189 } 1190 1191 /* 1192 * ixgbe_tx_recycle_legacy 1193 * 1194 * Recycle the tx descriptors and tx control blocks. 1195 * 1196 * The work list is traversed to check if the corresponding 1197 * tx descriptors have been transmitted. If so, the resources 1198 * bound to the tx control blocks will be freed, and those 1199 * tx control blocks will be returned to the free list. 1200 */ 1201 uint32_t 1202 ixgbe_tx_recycle_legacy(ixgbe_tx_ring_t *tx_ring) 1203 { 1204 uint32_t index, last_index, prev_index; 1205 int desc_num; 1206 boolean_t desc_done; 1207 tx_control_block_t *tcb; 1208 link_list_t pending_list; 1209 ixgbe_t *ixgbe = tx_ring->ixgbe; 1210 1211 mutex_enter(&tx_ring->recycle_lock); 1212 1213 ASSERT(tx_ring->tbd_free <= tx_ring->ring_size); 1214 1215 if (tx_ring->tbd_free == tx_ring->ring_size) { 1216 tx_ring->recycle_fail = 0; 1217 tx_ring->stall_watchdog = 0; 1218 if (tx_ring->reschedule) { 1219 tx_ring->reschedule = B_FALSE; 1220 mac_tx_ring_update(ixgbe->mac_hdl, 1221 tx_ring->ring_handle); 1222 } 1223 mutex_exit(&tx_ring->recycle_lock); 1224 return (0); 1225 } 1226 1227 /* 1228 * Sync the DMA buffer of the tx descriptor ring 1229 */ 1230 DMA_SYNC(&tx_ring->tbd_area, DDI_DMA_SYNC_FORKERNEL); 1231 1232 if (ixgbe_check_dma_handle(tx_ring->tbd_area.dma_handle) != DDI_FM_OK) { 1233 mutex_exit(&tx_ring->recycle_lock); 1234 ddi_fm_service_impact(ixgbe->dip, DDI_SERVICE_DEGRADED); 1235 atomic_or_32(&ixgbe->ixgbe_state, IXGBE_ERROR); 1236 return (0); 1237 } 1238 1239 LINK_LIST_INIT(&pending_list); 1240 desc_num = 0; 1241 index = tx_ring->tbd_head; /* Index of next tbd/tcb to recycle */ 1242 1243 tcb = tx_ring->work_list[index]; 1244 ASSERT(tcb != NULL); 1245 1246 while (tcb != NULL) { 1247 /* 1248 * Get the last tx descriptor of this packet. 1249 * If the last tx descriptor is done, then 1250 * we can recycle all descriptors of a packet 1251 * which usually includes several tx control blocks. 1252 * For 82599, LSO descriptors can not be recycled 1253 * unless the whole packet's transmission is done. 1254 * That's why packet level recycling is used here. 1255 * For 82598, there's not such limit. 1256 */ 1257 last_index = tcb->last_index; 1258 /* 1259 * MAX_TX_RING_SIZE is used to judge whether 1260 * the index is a valid value or not. 1261 */ 1262 if (last_index == MAX_TX_RING_SIZE) 1263 break; 1264 1265 /* 1266 * Check if the Descriptor Done bit is set 1267 */ 1268 desc_done = tx_ring->tbd_ring[last_index].wb.status & 1269 IXGBE_TXD_STAT_DD; 1270 if (desc_done) { 1271 /* 1272 * recycle all descriptors of the packet 1273 */ 1274 while (tcb != NULL) { 1275 /* 1276 * Strip off the tx control block from 1277 * the work list, and add it to the 1278 * pending list. 1279 */ 1280 tx_ring->work_list[index] = NULL; 1281 LIST_PUSH_TAIL(&pending_list, &tcb->link); 1282 1283 /* 1284 * Count the total number of the tx 1285 * descriptors recycled 1286 */ 1287 desc_num += tcb->desc_num; 1288 1289 index = NEXT_INDEX(index, tcb->desc_num, 1290 tx_ring->ring_size); 1291 1292 tcb = tx_ring->work_list[index]; 1293 1294 prev_index = PREV_INDEX(index, 1, 1295 tx_ring->ring_size); 1296 if (prev_index == last_index) 1297 break; 1298 } 1299 } else { 1300 break; 1301 } 1302 } 1303 1304 /* 1305 * If no tx descriptors are recycled, no need to do more processing 1306 */ 1307 if (desc_num == 0) { 1308 tx_ring->recycle_fail++; 1309 mutex_exit(&tx_ring->recycle_lock); 1310 return (0); 1311 } 1312 1313 tx_ring->recycle_fail = 0; 1314 tx_ring->stall_watchdog = 0; 1315 1316 /* 1317 * Update the head index of the tx descriptor ring 1318 */ 1319 tx_ring->tbd_head = index; 1320 1321 /* 1322 * Update the number of the free tx descriptors with atomic operations 1323 */ 1324 atomic_add_32(&tx_ring->tbd_free, desc_num); 1325 1326 if ((tx_ring->tbd_free >= ixgbe->tx_resched_thresh) && 1327 (tx_ring->reschedule)) { 1328 tx_ring->reschedule = B_FALSE; 1329 mac_tx_ring_update(ixgbe->mac_hdl, 1330 tx_ring->ring_handle); 1331 } 1332 mutex_exit(&tx_ring->recycle_lock); 1333 1334 /* 1335 * Free the resources used by the tx control blocks 1336 * in the pending list 1337 */ 1338 tcb = (tx_control_block_t *)LIST_GET_HEAD(&pending_list); 1339 while (tcb != NULL) { 1340 /* 1341 * Release the resources occupied by the tx control block 1342 */ 1343 ixgbe_free_tcb(tcb); 1344 1345 tcb = (tx_control_block_t *) 1346 LIST_GET_NEXT(&pending_list, &tcb->link); 1347 } 1348 1349 /* 1350 * Add the tx control blocks in the pending list to the free list. 1351 */ 1352 ixgbe_put_free_list(tx_ring, &pending_list); 1353 1354 return (desc_num); 1355 } 1356 1357 /* 1358 * ixgbe_tx_recycle_head_wb 1359 * 1360 * Check the head write-back, and recycle all the transmitted 1361 * tx descriptors and tx control blocks. 1362 */ 1363 uint32_t 1364 ixgbe_tx_recycle_head_wb(ixgbe_tx_ring_t *tx_ring) 1365 { 1366 uint32_t index; 1367 uint32_t head_wb; 1368 int desc_num; 1369 tx_control_block_t *tcb; 1370 link_list_t pending_list; 1371 ixgbe_t *ixgbe = tx_ring->ixgbe; 1372 1373 mutex_enter(&tx_ring->recycle_lock); 1374 1375 ASSERT(tx_ring->tbd_free <= tx_ring->ring_size); 1376 1377 if (tx_ring->tbd_free == tx_ring->ring_size) { 1378 tx_ring->recycle_fail = 0; 1379 tx_ring->stall_watchdog = 0; 1380 if (tx_ring->reschedule) { 1381 tx_ring->reschedule = B_FALSE; 1382 mac_tx_ring_update(ixgbe->mac_hdl, 1383 tx_ring->ring_handle); 1384 } 1385 mutex_exit(&tx_ring->recycle_lock); 1386 return (0); 1387 } 1388 1389 /* 1390 * Sync the DMA buffer of the tx descriptor ring 1391 * 1392 * Note: For head write-back mode, the tx descriptors will not 1393 * be written back, but the head write-back value is stored at 1394 * the last extra tbd at the end of the DMA area, we still need 1395 * to sync the head write-back value for kernel. 1396 * 1397 * DMA_SYNC(&tx_ring->tbd_area, DDI_DMA_SYNC_FORKERNEL); 1398 */ 1399 (void) ddi_dma_sync(tx_ring->tbd_area.dma_handle, 1400 sizeof (union ixgbe_adv_tx_desc) * tx_ring->ring_size, 1401 sizeof (uint32_t), 1402 DDI_DMA_SYNC_FORKERNEL); 1403 1404 if (ixgbe_check_dma_handle(tx_ring->tbd_area.dma_handle) != DDI_FM_OK) { 1405 mutex_exit(&tx_ring->recycle_lock); 1406 ddi_fm_service_impact(ixgbe->dip, 1407 DDI_SERVICE_DEGRADED); 1408 atomic_or_32(&ixgbe->ixgbe_state, IXGBE_ERROR); 1409 return (0); 1410 } 1411 1412 LINK_LIST_INIT(&pending_list); 1413 desc_num = 0; 1414 index = tx_ring->tbd_head; /* Next index to clean */ 1415 1416 /* 1417 * Get the value of head write-back 1418 */ 1419 head_wb = *tx_ring->tbd_head_wb; 1420 while (index != head_wb) { 1421 tcb = tx_ring->work_list[index]; 1422 ASSERT(tcb != NULL); 1423 1424 if (OFFSET(index, head_wb, tx_ring->ring_size) < 1425 tcb->desc_num) { 1426 /* 1427 * The current tx control block is not 1428 * completely transmitted, stop recycling 1429 */ 1430 break; 1431 } 1432 1433 /* 1434 * Strip off the tx control block from the work list, 1435 * and add it to the pending list. 1436 */ 1437 tx_ring->work_list[index] = NULL; 1438 LIST_PUSH_TAIL(&pending_list, &tcb->link); 1439 1440 /* 1441 * Advance the index of the tx descriptor ring 1442 */ 1443 index = NEXT_INDEX(index, tcb->desc_num, tx_ring->ring_size); 1444 1445 /* 1446 * Count the total number of the tx descriptors recycled 1447 */ 1448 desc_num += tcb->desc_num; 1449 } 1450 1451 /* 1452 * If no tx descriptors are recycled, no need to do more processing 1453 */ 1454 if (desc_num == 0) { 1455 tx_ring->recycle_fail++; 1456 mutex_exit(&tx_ring->recycle_lock); 1457 return (0); 1458 } 1459 1460 tx_ring->recycle_fail = 0; 1461 tx_ring->stall_watchdog = 0; 1462 1463 /* 1464 * Update the head index of the tx descriptor ring 1465 */ 1466 tx_ring->tbd_head = index; 1467 1468 /* 1469 * Update the number of the free tx descriptors with atomic operations 1470 */ 1471 atomic_add_32(&tx_ring->tbd_free, desc_num); 1472 1473 if ((tx_ring->tbd_free >= ixgbe->tx_resched_thresh) && 1474 (tx_ring->reschedule)) { 1475 tx_ring->reschedule = B_FALSE; 1476 mac_tx_ring_update(ixgbe->mac_hdl, 1477 tx_ring->ring_handle); 1478 } 1479 mutex_exit(&tx_ring->recycle_lock); 1480 1481 /* 1482 * Free the resources used by the tx control blocks 1483 * in the pending list 1484 */ 1485 tcb = (tx_control_block_t *)LIST_GET_HEAD(&pending_list); 1486 while (tcb) { 1487 /* 1488 * Release the resources occupied by the tx control block 1489 */ 1490 ixgbe_free_tcb(tcb); 1491 1492 tcb = (tx_control_block_t *) 1493 LIST_GET_NEXT(&pending_list, &tcb->link); 1494 } 1495 1496 /* 1497 * Add the tx control blocks in the pending list to the free list. 1498 */ 1499 ixgbe_put_free_list(tx_ring, &pending_list); 1500 1501 return (desc_num); 1502 } 1503 1504 /* 1505 * ixgbe_free_tcb - free up the tx control block 1506 * 1507 * Free the resources of the tx control block, including 1508 * unbind the previously bound DMA handle, and reset other 1509 * control fields. 1510 */ 1511 void 1512 ixgbe_free_tcb(tx_control_block_t *tcb) 1513 { 1514 switch (tcb->tx_type) { 1515 case USE_COPY: 1516 /* 1517 * Reset the buffer length that is used for copy 1518 */ 1519 tcb->tx_buf.len = 0; 1520 break; 1521 case USE_DMA: 1522 /* 1523 * Release the DMA resource that is used for 1524 * DMA binding. 1525 */ 1526 (void) ddi_dma_unbind_handle(tcb->tx_dma_handle); 1527 break; 1528 default: 1529 break; 1530 } 1531 1532 /* 1533 * Free the mblk 1534 */ 1535 if (tcb->mp != NULL) { 1536 freemsg(tcb->mp); 1537 tcb->mp = NULL; 1538 } 1539 1540 tcb->tx_type = USE_NONE; 1541 tcb->last_index = MAX_TX_RING_SIZE; 1542 tcb->frag_num = 0; 1543 tcb->desc_num = 0; 1544 } 1545 1546 /* 1547 * ixgbe_get_free_list - Get a free tx control block from the free list 1548 * 1549 * The atomic operation on the number of the available tx control block 1550 * in the free list is used to keep this routine mutual exclusive with 1551 * the routine ixgbe_put_check_list. 1552 */ 1553 static tx_control_block_t * 1554 ixgbe_get_free_list(ixgbe_tx_ring_t *tx_ring) 1555 { 1556 tx_control_block_t *tcb; 1557 1558 /* 1559 * Check and update the number of the free tx control block 1560 * in the free list. 1561 */ 1562 if (ixgbe_atomic_reserve(&tx_ring->tcb_free, 1) < 0) 1563 return (NULL); 1564 1565 mutex_enter(&tx_ring->tcb_head_lock); 1566 1567 tcb = tx_ring->free_list[tx_ring->tcb_head]; 1568 ASSERT(tcb != NULL); 1569 tx_ring->free_list[tx_ring->tcb_head] = NULL; 1570 tx_ring->tcb_head = NEXT_INDEX(tx_ring->tcb_head, 1, 1571 tx_ring->free_list_size); 1572 1573 mutex_exit(&tx_ring->tcb_head_lock); 1574 1575 return (tcb); 1576 } 1577 1578 /* 1579 * ixgbe_put_free_list 1580 * 1581 * Put a list of used tx control blocks back to the free list 1582 * 1583 * A mutex is used here to ensure the serialization. The mutual exclusion 1584 * between ixgbe_get_free_list and ixgbe_put_free_list is implemented with 1585 * the atomic operation on the counter tcb_free. 1586 */ 1587 void 1588 ixgbe_put_free_list(ixgbe_tx_ring_t *tx_ring, link_list_t *pending_list) 1589 { 1590 uint32_t index; 1591 int tcb_num; 1592 tx_control_block_t *tcb; 1593 1594 mutex_enter(&tx_ring->tcb_tail_lock); 1595 1596 index = tx_ring->tcb_tail; 1597 1598 tcb_num = 0; 1599 tcb = (tx_control_block_t *)LIST_POP_HEAD(pending_list); 1600 while (tcb != NULL) { 1601 ASSERT(tx_ring->free_list[index] == NULL); 1602 tx_ring->free_list[index] = tcb; 1603 1604 tcb_num++; 1605 1606 index = NEXT_INDEX(index, 1, tx_ring->free_list_size); 1607 1608 tcb = (tx_control_block_t *)LIST_POP_HEAD(pending_list); 1609 } 1610 1611 tx_ring->tcb_tail = index; 1612 1613 /* 1614 * Update the number of the free tx control block 1615 * in the free list. This operation must be placed 1616 * under the protection of the lock. 1617 */ 1618 atomic_add_32(&tx_ring->tcb_free, tcb_num); 1619 1620 mutex_exit(&tx_ring->tcb_tail_lock); 1621 }