1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2008-2013 Solarflare Communications Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #include <sys/types.h> 28 #include <sys/sysmacros.h> 29 #include <sys/ddi.h> 30 #include <sys/sunddi.h> 31 #include <sys/atomic.h> 32 #include <sys/stream.h> 33 #include <sys/strsun.h> 34 #include <sys/strsubr.h> 35 #include <sys/pattr.h> 36 #include <sys/cpu.h> 37 38 #include <sys/ethernet.h> 39 #include <inet/ip.h> 40 41 #include <netinet/in.h> 42 #include <netinet/ip.h> 43 #include <netinet/tcp.h> 44 45 #include "sfxge.h" 46 47 #include "efx.h" 48 49 /* TXQ flush response timeout (in microseconds) */ 50 #define SFXGE_TX_QFLUSH_USEC (2000000) 51 #define EVQ_0 0 52 53 /* See sfxge.conf.private for descriptions */ 54 #define SFXGE_TX_DPL_GET_PKT_LIMIT_DEFAULT 4096 55 #define SFXGE_TX_DPL_PUT_PKT_LIMIT_DEFAULT 256 56 57 58 /* Transmit buffer DMA attributes */ 59 static ddi_device_acc_attr_t sfxge_tx_buffer_devacc = { 60 61 DDI_DEVICE_ATTR_V0, /* devacc_attr_version */ 62 DDI_NEVERSWAP_ACC, /* devacc_attr_endian_flags */ 63 DDI_STRICTORDER_ACC /* devacc_attr_dataorder */ 64 }; 65 66 static ddi_dma_attr_t sfxge_tx_buffer_dma_attr = { 67 DMA_ATTR_V0, /* dma_attr_version */ 68 0, /* dma_attr_addr_lo */ 69 0xffffffffffffffffull, /* dma_attr_addr_hi */ 70 0xffffffffffffffffull, /* dma_attr_count_max */ 71 SFXGE_TX_BUFFER_SIZE, /* dma_attr_align */ 72 0xffffffff, /* dma_attr_burstsizes */ 73 1, /* dma_attr_minxfer */ 74 0xffffffffffffffffull, /* dma_attr_maxxfer */ 75 0xffffffffffffffffull, /* dma_attr_seg */ 76 1, /* dma_attr_sgllen */ 77 1, /* dma_attr_granular */ 78 0 /* dma_attr_flags */ 79 }; 80 81 /* Transmit mapping DMA attributes */ 82 static ddi_dma_attr_t sfxge_tx_mapping_dma_attr = { 83 DMA_ATTR_V0, /* dma_attr_version */ 84 0, /* dma_attr_addr_lo */ 85 0xffffffffffffffffull, /* dma_attr_addr_hi */ 86 0xffffffffffffffffull, /* dma_attr_count_max */ 87 1, /* dma_attr_align */ 88 0xffffffff, /* dma_attr_burstsizes */ 89 1, /* dma_attr_minxfer */ 90 0xffffffffffffffffull, /* dma_attr_maxxfer */ 91 0xffffffffffffffffull, /* dma_attr_seg */ 92 0x7fffffff, /* dma_attr_sgllen */ 93 1, /* dma_attr_granular */ 94 0 /* dma_attr_flags */ 95 }; 96 97 /* Transmit queue DMA attributes */ 98 static ddi_device_acc_attr_t sfxge_txq_devacc = { 99 100 DDI_DEVICE_ATTR_V0, /* devacc_attr_version */ 101 DDI_NEVERSWAP_ACC, /* devacc_attr_endian_flags */ 102 DDI_STRICTORDER_ACC /* devacc_attr_dataorder */ 103 }; 104 105 static ddi_dma_attr_t sfxge_txq_dma_attr = { 106 DMA_ATTR_V0, /* dma_attr_version */ 107 0, /* dma_attr_addr_lo */ 108 0xffffffffffffffffull, /* dma_attr_addr_hi */ 109 0xffffffffffffffffull, /* dma_attr_count_max */ 110 EFX_BUF_SIZE, /* dma_attr_align */ 111 0xffffffff, /* dma_attr_burstsizes */ 112 1, /* dma_attr_minxfer */ 113 0xffffffffffffffffull, /* dma_attr_maxxfer */ 114 0xffffffffffffffffull, /* dma_attr_seg */ 115 1, /* dma_attr_sgllen */ 116 1, /* dma_attr_granular */ 117 0 /* dma_attr_flags */ 118 }; 119 120 121 /* 122 * A sfxge_tx_qdpl_swizzle() can happen when the DPL get list is one packet 123 * under the limit, and must move all packets from the DPL put->get list 124 * Hence this is the real maximum length of the TX DPL get list. 125 */ 126 static int 127 sfxge_tx_dpl_get_pkt_max(sfxge_txq_t *stp) 128 { 129 sfxge_tx_dpl_t *stdp = &(stp->st_dpl); 130 return (stdp->get_pkt_limit + stdp->put_pkt_limit - 1); 131 } 132 133 134 static int 135 sfxge_tx_packet_ctor(void *buf, void *arg, int kmflags) 136 { 137 _NOTE(ARGUNUSED(arg, kmflags)) 138 139 bzero(buf, sizeof (sfxge_tx_packet_t)); 140 141 return (0); 142 } 143 144 static void 145 sfxge_tx_packet_dtor(void *buf, void *arg) 146 { 147 sfxge_tx_packet_t *stpp = buf; 148 149 _NOTE(ARGUNUSED(arg)) 150 151 SFXGE_OBJ_CHECK(stpp, sfxge_tx_packet_t); 152 } 153 154 static int 155 sfxge_tx_buffer_ctor(void *buf, void *arg, int kmflags) 156 { 157 sfxge_tx_buffer_t *stbp = buf; 158 sfxge_t *sp = arg; 159 sfxge_dma_buffer_attr_t dma_attr; 160 int rc; 161 162 bzero(buf, sizeof (sfxge_tx_buffer_t)); 163 164 dma_attr.sdba_dip = sp->s_dip; 165 dma_attr.sdba_dattrp = &sfxge_tx_buffer_dma_attr; 166 dma_attr.sdba_callback = ((kmflags == KM_SLEEP) ? 167 DDI_DMA_SLEEP : DDI_DMA_DONTWAIT); 168 dma_attr.sdba_length = SFXGE_TX_BUFFER_SIZE; 169 dma_attr.sdba_memflags = DDI_DMA_STREAMING; 170 dma_attr.sdba_devaccp = &sfxge_tx_buffer_devacc; 171 dma_attr.sdba_bindflags = DDI_DMA_WRITE | DDI_DMA_STREAMING; 172 dma_attr.sdba_maxcookies = 1; 173 dma_attr.sdba_zeroinit = B_FALSE; 174 175 if ((rc = sfxge_dma_buffer_create(&(stbp->stb_esm), &dma_attr)) != 0) 176 goto fail1; 177 178 return (0); 179 180 fail1: 181 DTRACE_PROBE1(fail1, int, rc); 182 183 SFXGE_OBJ_CHECK(stbp, sfxge_tx_buffer_t); 184 185 return (-1); 186 } 187 188 static void 189 sfxge_tx_buffer_dtor(void *buf, void *arg) 190 { 191 sfxge_tx_buffer_t *stbp = buf; 192 193 _NOTE(ARGUNUSED(arg)) 194 195 sfxge_dma_buffer_destroy(&(stbp->stb_esm)); 196 197 SFXGE_OBJ_CHECK(stbp, sfxge_tx_buffer_t); 198 } 199 200 static int 201 sfxge_tx_mapping_ctor(void *buf, void *arg, int kmflags) 202 { 203 sfxge_tx_mapping_t *stmp = buf; 204 sfxge_t *sp = arg; 205 dev_info_t *dip = sp->s_dip; 206 int rc; 207 208 bzero(buf, sizeof (sfxge_tx_mapping_t)); 209 210 stmp->stm_sp = sp; 211 212 /* Allocate DMA handle */ 213 rc = ddi_dma_alloc_handle(dip, &sfxge_tx_mapping_dma_attr, 214 (kmflags == KM_SLEEP) ? DDI_DMA_SLEEP : DDI_DMA_DONTWAIT, 215 NULL, &(stmp->stm_dma_handle)); 216 if (rc != DDI_SUCCESS) 217 goto fail1; 218 219 return (0); 220 221 fail1: 222 DTRACE_PROBE1(fail1, int, rc); 223 224 stmp->stm_sp = NULL; 225 226 SFXGE_OBJ_CHECK(stmp, sfxge_tx_mapping_t); 227 228 return (-1); 229 } 230 231 static void 232 sfxge_tx_mapping_dtor(void *buf, void *arg) 233 { 234 sfxge_tx_mapping_t *stmp = buf; 235 236 _NOTE(ARGUNUSED(arg)) 237 238 ASSERT3P(stmp->stm_sp, ==, arg); 239 240 /* Free the DMA handle */ 241 ddi_dma_free_handle(&(stmp->stm_dma_handle)); 242 stmp->stm_dma_handle = NULL; 243 244 stmp->stm_sp = NULL; 245 246 SFXGE_OBJ_CHECK(stmp, sfxge_tx_mapping_t); 247 } 248 249 static int 250 sfxge_tx_qctor(void *buf, void *arg, int kmflags) 251 { 252 sfxge_txq_t *stp = buf; 253 efsys_mem_t *esmp = &(stp->st_mem); 254 sfxge_t *sp = arg; 255 sfxge_dma_buffer_attr_t dma_attr; 256 sfxge_tx_dpl_t *stdp; 257 int rc; 258 259 /* Compile-time structure layout checks */ 260 EFX_STATIC_ASSERT(sizeof (stp->__st_u1.__st_s1) <= 261 sizeof (stp->__st_u1.__st_pad)); 262 EFX_STATIC_ASSERT(sizeof (stp->__st_u2.__st_s2) <= 263 sizeof (stp->__st_u2.__st_pad)); 264 EFX_STATIC_ASSERT(sizeof (stp->__st_u3.__st_s3) <= 265 sizeof (stp->__st_u3.__st_pad)); 266 EFX_STATIC_ASSERT(sizeof (stp->__st_u4.__st_s4) <= 267 sizeof (stp->__st_u4.__st_pad)); 268 269 bzero(buf, sizeof (sfxge_txq_t)); 270 271 stp->st_sp = sp; 272 273 dma_attr.sdba_dip = sp->s_dip; 274 dma_attr.sdba_dattrp = &sfxge_txq_dma_attr; 275 dma_attr.sdba_callback = DDI_DMA_SLEEP; 276 dma_attr.sdba_length = EFX_TXQ_SIZE(SFXGE_TX_NDESCS); 277 dma_attr.sdba_memflags = DDI_DMA_CONSISTENT; 278 dma_attr.sdba_devaccp = &sfxge_txq_devacc; 279 dma_attr.sdba_bindflags = DDI_DMA_READ | DDI_DMA_CONSISTENT; 280 dma_attr.sdba_maxcookies = EFX_TXQ_NBUFS(SFXGE_TX_NDESCS); 281 dma_attr.sdba_zeroinit = B_FALSE; 282 283 if ((rc = sfxge_dma_buffer_create(esmp, &dma_attr)) != 0) 284 goto fail1; 285 286 /* Allocate some buffer table entries */ 287 if ((rc = sfxge_sram_buf_tbl_alloc(sp, EFX_TXQ_NBUFS(SFXGE_TX_NDESCS), 288 &(stp->st_id))) != 0) 289 goto fail2; 290 291 /* Allocate the descriptor array */ 292 if ((stp->st_eb = kmem_zalloc(sizeof (efx_buffer_t) * 293 EFX_TXQ_LIMIT(SFXGE_TX_NDESCS), kmflags)) == NULL) { 294 rc = ENOMEM; 295 goto fail3; 296 } 297 298 /* Allocate the context arrays */ 299 if ((stp->st_stmp = kmem_zalloc(sizeof (sfxge_tx_mapping_t *) * 300 SFXGE_TX_NDESCS, kmflags)) == NULL) { 301 rc = ENOMEM; 302 goto fail4; 303 } 304 305 if ((stp->st_stbp = kmem_zalloc(sizeof (sfxge_tx_buffer_t *) * 306 SFXGE_TX_NDESCS, kmflags)) == NULL) { 307 rc = ENOMEM; 308 goto fail5; 309 } 310 311 if ((stp->st_mp = kmem_zalloc(sizeof (mblk_t *) * 312 SFXGE_TX_NDESCS, kmflags)) == NULL) { 313 rc = ENOMEM; 314 goto fail6; 315 } 316 317 /* Initialize the deferred packet list */ 318 stdp = &(stp->st_dpl); 319 stdp->std_getp = &(stdp->std_get); 320 321 stp->st_unblock = SFXGE_TXQ_NOT_BLOCKED; 322 323 return (0); 324 325 fail6: 326 DTRACE_PROBE(fail6); 327 328 kmem_free(stp->st_stbp, sizeof (sfxge_tx_buffer_t *) * SFXGE_TX_NDESCS); 329 stp->st_stbp = NULL; 330 331 fail5: 332 DTRACE_PROBE(fail5); 333 334 kmem_free(stp->st_stmp, 335 sizeof (sfxge_tx_mapping_t *) * SFXGE_TX_NDESCS); 336 stp->st_stmp = NULL; 337 338 fail4: 339 DTRACE_PROBE(fail4); 340 341 /* Free the descriptor array */ 342 kmem_free(stp->st_eb, sizeof (efx_buffer_t) * 343 EFX_TXQ_LIMIT(SFXGE_TX_NDESCS)); 344 stp->st_eb = NULL; 345 346 fail3: 347 DTRACE_PROBE(fail3); 348 349 /* Free the buffer table entries */ 350 sfxge_sram_buf_tbl_free(sp, stp->st_id, EFX_TXQ_NBUFS(SFXGE_TX_NDESCS)); 351 stp->st_id = 0; 352 353 fail2: 354 DTRACE_PROBE(fail2); 355 356 /* Tear down DMA setup */ 357 sfxge_dma_buffer_destroy(esmp); 358 359 fail1: 360 DTRACE_PROBE1(fail1, int, rc); 361 362 stp->st_sp = NULL; 363 364 SFXGE_OBJ_CHECK(stp, sfxge_txq_t); 365 366 return (-1); 367 } 368 369 static void 370 sfxge_tx_qdtor(void *buf, void *arg) 371 { 372 sfxge_txq_t *stp = buf; 373 efsys_mem_t *esmp = &(stp->st_mem); 374 sfxge_t *sp = stp->st_sp; 375 sfxge_tx_dpl_t *stdp; 376 377 _NOTE(ARGUNUSED(arg)) 378 379 stp->st_unblock = 0; 380 381 /* Tear down the deferred packet list */ 382 stdp = &(stp->st_dpl); 383 ASSERT3P(stdp->std_getp, ==, &(stdp->std_get)); 384 stdp->std_getp = NULL; 385 386 /* Free the context arrays */ 387 kmem_free(stp->st_mp, sizeof (mblk_t *) * SFXGE_TX_NDESCS); 388 stp->st_mp = NULL; 389 390 kmem_free(stp->st_stbp, sizeof (sfxge_tx_buffer_t *) * SFXGE_TX_NDESCS); 391 stp->st_stbp = NULL; 392 393 kmem_free(stp->st_stmp, 394 sizeof (sfxge_tx_mapping_t *) * SFXGE_TX_NDESCS); 395 stp->st_stmp = NULL; 396 397 /* Free the descriptor array */ 398 kmem_free(stp->st_eb, sizeof (efx_buffer_t) * 399 EFX_TXQ_LIMIT(SFXGE_TX_NDESCS)); 400 stp->st_eb = NULL; 401 402 /* Free the buffer table entries */ 403 sfxge_sram_buf_tbl_free(sp, stp->st_id, EFX_TXQ_NBUFS(SFXGE_TX_NDESCS)); 404 stp->st_id = 0; 405 406 /* Tear down dma setup */ 407 sfxge_dma_buffer_destroy(esmp); 408 409 stp->st_sp = NULL; 410 411 SFXGE_OBJ_CHECK(stp, sfxge_txq_t); 412 } 413 414 static void 415 sfxge_tx_packet_destroy(sfxge_t *sp, sfxge_tx_packet_t *stpp) 416 { 417 kmem_cache_free(sp->s_tpc, stpp); 418 } 419 420 static sfxge_tx_packet_t * 421 sfxge_tx_packet_create(sfxge_t *sp) 422 { 423 sfxge_tx_packet_t *stpp; 424 425 stpp = kmem_cache_alloc(sp->s_tpc, KM_NOSLEEP); 426 427 return (stpp); 428 } 429 430 static inline int 431 sfxge_tx_qfpp_put(sfxge_txq_t *stp, sfxge_tx_packet_t *stpp) 432 { 433 sfxge_tx_fpp_t *stfp = &(stp->st_fpp); 434 435 ASSERT(mutex_owned(&(stp->st_lock))); 436 437 ASSERT3P(stpp->stp_next, ==, NULL); 438 ASSERT3P(stpp->stp_mp, ==, NULL); 439 ASSERT3P(stpp->stp_etherhp, ==, NULL); 440 ASSERT3P(stpp->stp_iphp, ==, NULL); 441 ASSERT3P(stpp->stp_thp, ==, NULL); 442 ASSERT3U(stpp->stp_off, ==, 0); 443 ASSERT3U(stpp->stp_size, ==, 0); 444 ASSERT3U(stpp->stp_mss, ==, 0); 445 ASSERT3U(stpp->stp_dpl_put_len, ==, 0); 446 447 if (stfp->stf_count < SFXGE_TX_FPP_MAX) { 448 /* Add to the start of the list */ 449 stpp->stp_next = stfp->stf_stpp; 450 stfp->stf_stpp = stpp; 451 stfp->stf_count++; 452 453 return (0); 454 } 455 456 DTRACE_PROBE(fpp_full); 457 return (ENOSPC); 458 } 459 460 static inline sfxge_tx_packet_t * 461 sfxge_tx_qfpp_get(sfxge_txq_t *stp) 462 { 463 sfxge_tx_packet_t *stpp; 464 sfxge_tx_fpp_t *stfp = &(stp->st_fpp); 465 466 ASSERT(mutex_owned(&(stp->st_lock))); 467 468 stpp = stfp->stf_stpp; 469 if (stpp == NULL) { 470 ASSERT3U(stfp->stf_count, ==, 0); 471 return (NULL); 472 } 473 474 /* Remove item from the head of the list */ 475 stfp->stf_stpp = stpp->stp_next; 476 stpp->stp_next = NULL; 477 478 ASSERT3U(stfp->stf_count, >, 0); 479 stfp->stf_count--; 480 481 if (stfp->stf_count != 0) { 482 ASSERT(stfp->stf_stpp != NULL); 483 prefetch_read_many(stfp->stf_stpp); 484 } 485 return (stpp); 486 } 487 488 static void 489 sfxge_tx_qfpp_empty(sfxge_txq_t *stp) 490 { 491 sfxge_t *sp = stp->st_sp; 492 sfxge_tx_fpp_t *stfp = &(stp->st_fpp); 493 sfxge_tx_packet_t *stpp; 494 495 mutex_enter(&(stp->st_lock)); 496 497 stpp = stfp->stf_stpp; 498 stfp->stf_stpp = NULL; 499 500 while (stpp != NULL) { 501 sfxge_tx_packet_t *next; 502 503 next = stpp->stp_next; 504 stpp->stp_next = NULL; 505 506 ASSERT3U(stfp->stf_count, >, 0); 507 stfp->stf_count--; 508 509 sfxge_tx_packet_destroy(sp, stpp); 510 511 stpp = next; 512 } 513 ASSERT3U(stfp->stf_count, ==, 0); 514 515 mutex_exit(&(stp->st_lock)); 516 } 517 518 static inline void 519 sfxge_tx_qfbp_put(sfxge_txq_t *stp, sfxge_tx_buffer_t *stbp) 520 { 521 sfxge_tx_fbp_t *stfp = &(stp->st_fbp); 522 523 ASSERT3P(stbp->stb_next, ==, NULL); 524 ASSERT3U(stbp->stb_off, ==, 0); 525 ASSERT3U(stbp->stb_esm.esm_size, ==, 0); 526 527 stbp->stb_next = stfp->stf_stbp; 528 stfp->stf_stbp = stbp; 529 stfp->stf_count++; 530 } 531 532 533 static inline sfxge_tx_buffer_t * 534 sfxge_tx_qfbp_get(sfxge_txq_t *stp) 535 { 536 sfxge_tx_buffer_t *stbp; 537 sfxge_tx_fbp_t *stfp = &(stp->st_fbp); 538 539 stbp = stfp->stf_stbp; 540 if (stbp == NULL) { 541 ASSERT3U(stfp->stf_count, ==, 0); 542 return (NULL); 543 } 544 545 stfp->stf_stbp = stbp->stb_next; 546 stbp->stb_next = NULL; 547 548 ASSERT3U(stfp->stf_count, >, 0); 549 stfp->stf_count--; 550 551 if (stfp->stf_count != 0) { 552 ASSERT(stfp->stf_stbp != NULL); 553 prefetch_read_many(stfp->stf_stbp); 554 } 555 556 return (stbp); 557 } 558 559 static void 560 sfxge_tx_qfbp_empty(sfxge_txq_t *stp) 561 { 562 sfxge_t *sp = stp->st_sp; 563 sfxge_tx_fbp_t *stfp = &(stp->st_fbp); 564 sfxge_tx_buffer_t *stbp; 565 566 mutex_enter(&(stp->st_lock)); 567 568 stbp = stfp->stf_stbp; 569 stfp->stf_stbp = NULL; 570 571 while (stbp != NULL) { 572 sfxge_tx_buffer_t *next; 573 574 next = stbp->stb_next; 575 stbp->stb_next = NULL; 576 577 ASSERT3U(stfp->stf_count, >, 0); 578 stfp->stf_count--; 579 580 kmem_cache_free(sp->s_tbc, stbp); 581 582 stbp = next; 583 } 584 ASSERT3U(stfp->stf_count, ==, 0); 585 586 mutex_exit(&(stp->st_lock)); 587 } 588 589 static inline void 590 sfxge_tx_qfmp_put(sfxge_txq_t *stp, sfxge_tx_mapping_t *stmp) 591 { 592 sfxge_tx_fmp_t *stfp = &(stp->st_fmp); 593 594 ASSERT3P(stmp->stm_next, ==, NULL); 595 ASSERT3P(stmp->stm_mp, ==, NULL); 596 ASSERT3P(stmp->stm_base, ==, NULL); 597 ASSERT3U(stmp->stm_off, ==, 0); 598 ASSERT3U(stmp->stm_size, ==, 0); 599 600 stmp->stm_next = stfp->stf_stmp; 601 stfp->stf_stmp = stmp; 602 stfp->stf_count++; 603 } 604 605 static inline sfxge_tx_mapping_t * 606 sfxge_tx_qfmp_get(sfxge_txq_t *stp) 607 { 608 sfxge_tx_mapping_t *stmp; 609 sfxge_tx_fmp_t *stfp = &(stp->st_fmp); 610 611 stmp = stfp->stf_stmp; 612 if (stmp == NULL) { 613 ASSERT3U(stfp->stf_count, ==, 0); 614 return (NULL); 615 } 616 617 stfp->stf_stmp = stmp->stm_next; 618 stmp->stm_next = NULL; 619 620 ASSERT3U(stfp->stf_count, >, 0); 621 stfp->stf_count--; 622 623 if (stfp->stf_count != 0) { 624 ASSERT(stfp->stf_stmp != NULL); 625 prefetch_read_many(stfp->stf_stmp); 626 } 627 return (stmp); 628 } 629 630 static void 631 sfxge_tx_qfmp_empty(sfxge_txq_t *stp) 632 { 633 sfxge_t *sp = stp->st_sp; 634 sfxge_tx_fmp_t *stfp = &(stp->st_fmp); 635 sfxge_tx_mapping_t *stmp; 636 637 mutex_enter(&(stp->st_lock)); 638 639 stmp = stfp->stf_stmp; 640 stfp->stf_stmp = NULL; 641 642 while (stmp != NULL) { 643 sfxge_tx_mapping_t *next; 644 645 next = stmp->stm_next; 646 stmp->stm_next = NULL; 647 648 ASSERT3U(stfp->stf_count, >, 0); 649 stfp->stf_count--; 650 651 kmem_cache_free(sp->s_tmc, stmp); 652 653 stmp = next; 654 } 655 ASSERT3U(stfp->stf_count, ==, 0); 656 657 mutex_exit(&(stp->st_lock)); 658 } 659 660 static void 661 sfxge_tx_msgb_unbind(sfxge_tx_mapping_t *stmp) 662 { 663 bzero(stmp->stm_addr, sizeof (uint64_t) * SFXGE_TX_MAPPING_NADDR); 664 stmp->stm_off = 0; 665 666 (void) ddi_dma_unbind_handle(stmp->stm_dma_handle); 667 668 stmp->stm_size = 0; 669 stmp->stm_base = NULL; 670 671 stmp->stm_mp = NULL; 672 } 673 674 #define SFXGE_TX_DESCSHIFT 12 675 #define SFXGE_TX_DESCSIZE (1 << 12) 676 677 #define SFXGE_TX_DESCOFFSET (SFXGE_TX_DESCSIZE - 1) 678 #define SFXGE_TX_DESCMASK (~SFXGE_TX_DESCOFFSET) 679 680 static int 681 sfxge_tx_msgb_bind(mblk_t *mp, sfxge_tx_mapping_t *stmp) 682 { 683 ddi_dma_cookie_t dmac; 684 unsigned int ncookies; 685 size_t size; 686 unsigned int n; 687 int rc; 688 689 ASSERT(mp != NULL); 690 ASSERT3U(DB_TYPE(mp), ==, M_DATA); 691 692 ASSERT(stmp->stm_mp == NULL); 693 stmp->stm_mp = mp; 694 695 stmp->stm_base = (caddr_t)(mp->b_rptr); 696 stmp->stm_size = MBLKL(mp); 697 698 /* Bind the STREAMS block to the mapping */ 699 rc = ddi_dma_addr_bind_handle(stmp->stm_dma_handle, NULL, 700 stmp->stm_base, stmp->stm_size, DDI_DMA_WRITE | DDI_DMA_STREAMING, 701 DDI_DMA_DONTWAIT, NULL, &dmac, &ncookies); 702 if (rc != DDI_DMA_MAPPED) 703 goto fail1; 704 705 ASSERT3U(ncookies, <=, SFXGE_TX_MAPPING_NADDR); 706 707 /* 708 * Construct an array of addresses and an initial 709 * offset. 710 */ 711 n = 0; 712 stmp->stm_addr[n++] = dmac.dmac_laddress & SFXGE_TX_DESCMASK; 713 DTRACE_PROBE1(addr, uint64_t, dmac.dmac_laddress & SFXGE_TX_DESCMASK); 714 715 stmp->stm_off = dmac.dmac_laddress & SFXGE_TX_DESCOFFSET; 716 717 size = MIN(SFXGE_TX_DESCSIZE - stmp->stm_off, dmac.dmac_size); 718 dmac.dmac_laddress += size; 719 dmac.dmac_size -= size; 720 721 for (;;) { 722 ASSERT3U(n, <, SFXGE_TX_MAPPING_NADDR); 723 724 if (dmac.dmac_size == 0) { 725 if (--ncookies == 0) 726 break; 727 728 ddi_dma_nextcookie(stmp->stm_dma_handle, &dmac); 729 } 730 731 ASSERT((dmac.dmac_laddress & SFXGE_TX_DESCMASK) != 0); 732 ASSERT((dmac.dmac_laddress & SFXGE_TX_DESCOFFSET) == 0); 733 stmp->stm_addr[n++] = dmac.dmac_laddress; 734 DTRACE_PROBE1(addr, uint64_t, dmac.dmac_laddress); 735 736 size = MIN(SFXGE_TX_DESCSIZE, dmac.dmac_size); 737 dmac.dmac_laddress += size; 738 dmac.dmac_size -= size; 739 } 740 ASSERT3U(n, <=, SFXGE_TX_MAPPING_NADDR); 741 742 return (0); 743 744 fail1: 745 DTRACE_PROBE1(fail1, int, rc); 746 747 stmp->stm_size = 0; 748 stmp->stm_base = NULL; 749 750 stmp->stm_mp = NULL; 751 752 return (-1); 753 } 754 755 static void 756 sfxge_tx_qreap(sfxge_txq_t *stp) 757 { 758 unsigned int reaped; 759 760 ASSERT(mutex_owned(&(stp->st_lock))); 761 762 reaped = stp->st_reaped; 763 while (reaped != stp->st_completed) { 764 unsigned int id; 765 sfxge_tx_mapping_t *stmp; 766 sfxge_tx_buffer_t *stbp; 767 768 id = reaped++ & (SFXGE_TX_NDESCS - 1); 769 770 ASSERT3P(stp->st_mp[id], ==, NULL); 771 772 if ((stmp = stp->st_stmp[id]) != NULL) { 773 stp->st_stmp[id] = NULL; 774 775 /* Free all the mappings */ 776 do { 777 sfxge_tx_mapping_t *next; 778 779 next = stmp->stm_next; 780 stmp->stm_next = NULL; 781 782 sfxge_tx_qfmp_put(stp, stmp); 783 784 stmp = next; 785 } while (stmp != NULL); 786 } 787 788 if ((stbp = stp->st_stbp[id]) != NULL) { 789 stp->st_stbp[id] = NULL; 790 791 /* Free all the buffers */ 792 do { 793 sfxge_tx_buffer_t *next; 794 795 next = stbp->stb_next; 796 stbp->stb_next = NULL; 797 798 stbp->stb_esm.esm_size = 0; 799 stbp->stb_off = 0; 800 801 sfxge_tx_qfbp_put(stp, stbp); 802 803 stbp = next; 804 } while (stbp != NULL); 805 } 806 } 807 stp->st_reaped = reaped; 808 } 809 810 static void 811 sfxge_tx_qlist_abort(sfxge_txq_t *stp) 812 { 813 unsigned int id; 814 sfxge_tx_mapping_t *stmp; 815 sfxge_tx_buffer_t *stbp; 816 mblk_t *mp; 817 818 ASSERT(mutex_owned(&(stp->st_lock))); 819 820 id = stp->st_added & (SFXGE_TX_NDESCS - 1); 821 822 /* Clear the completion information */ 823 stmp = stp->st_stmp[id]; 824 stp->st_stmp[id] = NULL; 825 826 /* Free any mappings that were used */ 827 while (stmp != NULL) { 828 sfxge_tx_mapping_t *next; 829 830 next = stmp->stm_next; 831 stmp->stm_next = NULL; 832 833 if (stmp->stm_mp != NULL) 834 sfxge_tx_msgb_unbind(stmp); 835 836 sfxge_tx_qfmp_put(stp, stmp); 837 838 stmp = next; 839 } 840 841 stbp = stp->st_stbp[id]; 842 stp->st_stbp[id] = NULL; 843 844 /* Free any buffers that were used */ 845 while (stbp != NULL) { 846 sfxge_tx_buffer_t *next; 847 848 next = stbp->stb_next; 849 stbp->stb_next = NULL; 850 851 stbp->stb_off = 0; 852 stbp->stb_esm.esm_size = 0; 853 854 sfxge_tx_qfbp_put(stp, stbp); 855 856 stbp = next; 857 } 858 859 mp = stp->st_mp[id]; 860 stp->st_mp[id] = NULL; 861 862 if (mp != NULL) 863 freemsg(mp); 864 865 /* Clear the fragment list */ 866 stp->st_n = 0; 867 } 868 869 /* Push descriptors to the TX ring setting blocked if no space */ 870 static void 871 sfxge_tx_qlist_post(sfxge_txq_t *stp) 872 { 873 unsigned int id; 874 unsigned int level; 875 unsigned int available; 876 int rc; 877 878 ASSERT(mutex_owned(&(stp->st_lock))); 879 880 ASSERT(stp->st_n != 0); 881 882 again: 883 level = stp->st_added - stp->st_reaped; 884 available = EFX_TXQ_LIMIT(SFXGE_TX_NDESCS) - level; 885 886 id = stp->st_added & (SFXGE_TX_NDESCS - 1); 887 888 if (available < stp->st_n) { 889 rc = ENOSPC; 890 goto fail1; 891 } 892 893 ASSERT3U(available, >=, stp->st_n); 894 895 /* Post the fragment list */ 896 if ((rc = efx_tx_qpost(stp->st_etp, stp->st_eb, stp->st_n, 897 stp->st_reaped, &(stp->st_added))) != 0) 898 goto fail2; 899 900 /* 901 * If the list took more than a single descriptor then we need to 902 * to move the completion information so it is referenced by the last 903 * descriptor. 904 */ 905 if (((stp->st_added - 1) & (SFXGE_TX_NDESCS - 1)) != id) { 906 sfxge_tx_mapping_t *stmp; 907 sfxge_tx_buffer_t *stbp; 908 mblk_t *mp; 909 910 stmp = stp->st_stmp[id]; 911 stp->st_stmp[id] = NULL; 912 913 stbp = stp->st_stbp[id]; 914 stp->st_stbp[id] = NULL; 915 916 mp = stp->st_mp[id]; 917 stp->st_mp[id] = NULL; 918 919 id = (stp->st_added - 1) & (SFXGE_TX_NDESCS - 1); 920 921 ASSERT(stp->st_stmp[id] == NULL); 922 stp->st_stmp[id] = stmp; 923 924 ASSERT(stp->st_stbp[id] == NULL); 925 stp->st_stbp[id] = stbp; 926 927 ASSERT(stp->st_mp[id] == NULL); 928 stp->st_mp[id] = mp; 929 } 930 931 /* Make the descriptors visible to the hardware */ 932 (void) ddi_dma_sync(stp->st_mem.esm_dma_handle, 933 0, 934 EFX_TXQ_SIZE(SFXGE_TX_NDESCS), 935 DDI_DMA_SYNC_FORDEV); 936 937 /* Clear the list */ 938 stp->st_n = 0; 939 940 ASSERT3U(stp->st_unblock, ==, SFXGE_TXQ_NOT_BLOCKED); 941 return; 942 943 fail2: 944 DTRACE_PROBE(fail2); 945 fail1: 946 DTRACE_PROBE1(fail1, int, rc); 947 948 ASSERT(rc == ENOSPC); 949 950 level = stp->st_added - stp->st_completed; 951 available = EFX_TXQ_LIMIT(SFXGE_TX_NDESCS) - level; 952 953 /* 954 * If there would be enough space after we've reaped any completed 955 * mappings and buffers, and we gain sufficient queue space by doing 956 * so, then reap now and try posting again. 957 */ 958 if (stp->st_n <= available && 959 stp->st_completed - stp->st_reaped >= SFXGE_TX_BATCH) { 960 sfxge_tx_qreap(stp); 961 962 goto again; 963 } 964 965 /* Set the unblock level */ 966 if (stp->st_unblock == SFXGE_TXQ_NOT_BLOCKED) { 967 stp->st_unblock = SFXGE_TXQ_UNBLOCK_LEVEL1; 968 } else { 969 ASSERT(stp->st_unblock == SFXGE_TXQ_UNBLOCK_LEVEL1); 970 971 stp->st_unblock = SFXGE_TXQ_UNBLOCK_LEVEL2; 972 } 973 974 /* 975 * Avoid a race with completion interrupt handling that could leave the 976 * queue blocked. 977 * 978 * NOTE: The use of st_pending rather than st_completed is intentional 979 * as st_pending is updated per-event rather than per-batch and 980 * therefore avoids needless deferring. 981 */ 982 if (stp->st_pending == stp->st_added) { 983 sfxge_tx_qreap(stp); 984 985 stp->st_unblock = SFXGE_TXQ_NOT_BLOCKED; 986 goto again; 987 } 988 989 ASSERT(stp->st_unblock != SFXGE_TXQ_NOT_BLOCKED); 990 } 991 992 static int 993 sfxge_tx_kstat_update(kstat_t *ksp, int rw) 994 { 995 sfxge_txq_t *stp = ksp->ks_private; 996 sfxge_tx_dpl_t *stdp = &(stp->st_dpl); 997 kstat_named_t *knp; 998 int rc; 999 1000 ASSERT(mutex_owned(&(stp->st_lock))); 1001 1002 if (rw != KSTAT_READ) { 1003 rc = EACCES; 1004 goto fail1; 1005 } 1006 1007 if (stp->st_state != SFXGE_TXQ_STARTED) 1008 goto done; 1009 1010 efx_tx_qstats_update(stp->st_etp, stp->st_stat); 1011 knp = (kstat_named_t *)ksp->ks_data + TX_NQSTATS; 1012 knp->value.ui64 = stdp->get_pkt_limit; 1013 knp++; 1014 knp->value.ui64 = stdp->put_pkt_limit; 1015 knp++; 1016 knp->value.ui64 = stdp->get_full_count; 1017 knp++; 1018 knp->value.ui64 = stdp->put_full_count; 1019 1020 done: 1021 return (0); 1022 1023 fail1: 1024 DTRACE_PROBE1(fail1, int, rc); 1025 1026 return (rc); 1027 } 1028 1029 static int 1030 sfxge_tx_kstat_init(sfxge_txq_t *stp) 1031 { 1032 sfxge_t *sp = stp->st_sp; 1033 unsigned int index = stp->st_index; 1034 dev_info_t *dip = sp->s_dip; 1035 kstat_t *ksp; 1036 kstat_named_t *knp; 1037 char name[MAXNAMELEN]; 1038 unsigned int id; 1039 int rc; 1040 1041 /* Create the set */ 1042 (void) snprintf(name, MAXNAMELEN - 1, "%s_txq%04d", 1043 ddi_driver_name(dip), index); 1044 1045 if ((ksp = kstat_create((char *)ddi_driver_name(dip), 1046 ddi_get_instance(dip), name, "queue", KSTAT_TYPE_NAMED, 1047 TX_NQSTATS + 4, 0)) == NULL) { 1048 rc = ENOMEM; 1049 goto fail1; 1050 } 1051 1052 stp->st_ksp = ksp; 1053 1054 ksp->ks_update = sfxge_tx_kstat_update; 1055 ksp->ks_private = stp; 1056 ksp->ks_lock = &(stp->st_lock); 1057 1058 /* Initialise the named stats */ 1059 stp->st_stat = knp = ksp->ks_data; 1060 for (id = 0; id < TX_NQSTATS; id++) { 1061 kstat_named_init(knp, (char *)efx_tx_qstat_name(sp->s_enp, id), 1062 KSTAT_DATA_UINT64); 1063 knp++; 1064 } 1065 kstat_named_init(knp, "dpl_get_pkt_limit", KSTAT_DATA_UINT64); 1066 knp++; 1067 kstat_named_init(knp, "dpl_put_pkt_limit", KSTAT_DATA_UINT64); 1068 knp++; 1069 kstat_named_init(knp, "dpl_get_full_count", KSTAT_DATA_UINT64); 1070 knp++; 1071 kstat_named_init(knp, "dpl_put_full_count", KSTAT_DATA_UINT64); 1072 1073 kstat_install(ksp); 1074 return (0); 1075 1076 fail1: 1077 DTRACE_PROBE1(fail1, int, rc); 1078 1079 return (rc); 1080 } 1081 1082 static void 1083 sfxge_tx_kstat_fini(sfxge_txq_t *stp) 1084 { 1085 /* Destroy the set */ 1086 kstat_delete(stp->st_ksp); 1087 stp->st_ksp = NULL; 1088 stp->st_stat = NULL; 1089 } 1090 1091 static int 1092 sfxge_tx_qinit(sfxge_t *sp, unsigned int index, sfxge_txq_type_t type, 1093 unsigned int evq) 1094 { 1095 sfxge_txq_t *stp; 1096 sfxge_tx_dpl_t *stdp; 1097 int rc; 1098 1099 ASSERT3U(index, <, SFXGE_TXQ_NTYPES + SFXGE_RX_SCALE_MAX); 1100 ASSERT3U(type, <, SFXGE_TXQ_NTYPES); 1101 ASSERT3U(evq, <, SFXGE_RX_SCALE_MAX); 1102 1103 stp = kmem_cache_alloc(sp->s_tqc, KM_SLEEP); 1104 stdp = &(stp->st_dpl); 1105 1106 ASSERT3U(stp->st_state, ==, SFXGE_TXQ_UNINITIALIZED); 1107 1108 stp->st_index = index; 1109 stp->st_type = type; 1110 stp->st_evq = evq; 1111 1112 mutex_init(&(stp->st_lock), NULL, MUTEX_DRIVER, 1113 DDI_INTR_PRI(sp->s_intr.si_intr_pri)); 1114 1115 /* Initialize the statistics */ 1116 if ((rc = sfxge_tx_kstat_init(stp)) != 0) 1117 goto fail1; 1118 1119 stdp->get_pkt_limit = ddi_prop_get_int(DDI_DEV_T_ANY, sp->s_dip, 1120 DDI_PROP_DONTPASS, "tx_dpl_get_pkt_limit", 1121 SFXGE_TX_DPL_GET_PKT_LIMIT_DEFAULT); 1122 1123 stdp->put_pkt_limit = ddi_prop_get_int(DDI_DEV_T_ANY, sp->s_dip, 1124 DDI_PROP_DONTPASS, "tx_dpl_put_pkt_limit", 1125 SFXGE_TX_DPL_PUT_PKT_LIMIT_DEFAULT); 1126 1127 stp->st_state = SFXGE_TXQ_INITIALIZED; 1128 1129 /* Attach the TXQ to the driver */ 1130 ASSERT3P(sp->s_stp[index], ==, NULL); 1131 sp->s_stp[index] = stp; 1132 sp->s_tx_qcount++; 1133 1134 return (0); 1135 1136 fail1: 1137 DTRACE_PROBE1(fail1, int, rc); 1138 1139 stp->st_evq = 0; 1140 stp->st_type = 0; 1141 stp->st_index = 0; 1142 1143 mutex_destroy(&(stp->st_lock)); 1144 1145 kmem_cache_free(sp->s_tqc, stp); 1146 1147 return (rc); 1148 } 1149 1150 static int 1151 sfxge_tx_qstart(sfxge_t *sp, unsigned int index) 1152 { 1153 sfxge_txq_t *stp = sp->s_stp[index]; 1154 efx_nic_t *enp = sp->s_enp; 1155 efsys_mem_t *esmp; 1156 sfxge_evq_t *sep; 1157 unsigned int evq; 1158 unsigned int flags; 1159 int rc; 1160 1161 mutex_enter(&(stp->st_lock)); 1162 1163 esmp = &(stp->st_mem); 1164 evq = stp->st_evq; 1165 sep = sp->s_sep[evq]; 1166 1167 ASSERT3U(stp->st_state, ==, SFXGE_TXQ_INITIALIZED); 1168 ASSERT3U(sep->se_state, ==, SFXGE_EVQ_STARTED); 1169 1170 /* Zero the memory */ 1171 (void) memset(esmp->esm_base, 0, EFX_TXQ_SIZE(SFXGE_TX_NDESCS)); 1172 1173 /* Program the buffer table */ 1174 if ((rc = sfxge_sram_buf_tbl_set(sp, stp->st_id, esmp, 1175 EFX_TXQ_NBUFS(SFXGE_TX_NDESCS))) != 0) 1176 goto fail1; 1177 1178 switch (stp->st_type) { 1179 case SFXGE_TXQ_NON_CKSUM: 1180 flags = 0; 1181 break; 1182 1183 case SFXGE_TXQ_IP_CKSUM: 1184 flags = EFX_CKSUM_IPV4; 1185 break; 1186 1187 case SFXGE_TXQ_IP_TCP_UDP_CKSUM: 1188 flags = EFX_CKSUM_IPV4 | EFX_CKSUM_TCPUDP; 1189 break; 1190 1191 default: 1192 ASSERT(B_FALSE); 1193 1194 flags = 0; 1195 break; 1196 } 1197 1198 /* Create the transmit queue */ 1199 if ((rc = efx_tx_qcreate(enp, index, index, esmp, SFXGE_TX_NDESCS, 1200 stp->st_id, flags, sep->se_eep, &(stp->st_etp))) != 0) 1201 goto fail2; 1202 1203 /* Enable the transmit queue */ 1204 efx_tx_qenable(stp->st_etp); 1205 1206 stp->st_state = SFXGE_TXQ_STARTED; 1207 1208 mutex_exit(&(stp->st_lock)); 1209 1210 return (0); 1211 1212 fail2: 1213 DTRACE_PROBE(fail2); 1214 1215 /* Clear entries from the buffer table */ 1216 sfxge_sram_buf_tbl_clear(sp, stp->st_id, 1217 EFX_TXQ_NBUFS(SFXGE_TX_NDESCS)); 1218 1219 fail1: 1220 DTRACE_PROBE1(fail1, int, rc); 1221 1222 mutex_exit(&(stp->st_lock)); 1223 1224 return (rc); 1225 } 1226 1227 static inline int 1228 sfxge_tx_qmapping_add(sfxge_txq_t *stp, sfxge_tx_mapping_t *stmp, 1229 size_t *offp, size_t *limitp) 1230 { 1231 mblk_t *mp; 1232 size_t mapping_off; 1233 size_t mapping_size; 1234 int rc; 1235 1236 ASSERT3U(*offp, <, stmp->stm_size); 1237 ASSERT(*limitp != 0); 1238 1239 mp = stmp->stm_mp; 1240 1241 ASSERT3P(stmp->stm_base, ==, mp->b_rptr); 1242 ASSERT3U(stmp->stm_size, ==, MBLKL(mp)); 1243 1244 mapping_off = stmp->stm_off + *offp; 1245 mapping_size = stmp->stm_size - *offp; 1246 1247 while (mapping_size != 0 && *limitp != 0) { 1248 size_t page = 1249 mapping_off >> SFXGE_TX_DESCSHIFT; 1250 size_t page_off = 1251 mapping_off & SFXGE_TX_DESCOFFSET; 1252 size_t page_size = 1253 SFXGE_TX_DESCSIZE - page_off; 1254 efx_buffer_t *ebp; 1255 1256 ASSERT3U(page, <, SFXGE_TX_MAPPING_NADDR); 1257 ASSERT((stmp->stm_addr[page] & 1258 SFXGE_TX_DESCMASK) != 0); 1259 1260 page_size = MIN(page_size, mapping_size); 1261 page_size = MIN(page_size, *limitp); 1262 1263 ASSERT3U(stp->st_n, <=, 1264 EFX_TXQ_LIMIT(SFXGE_TX_NDESCS)); 1265 if (stp->st_n == 1266 EFX_TXQ_LIMIT(SFXGE_TX_NDESCS)) { 1267 rc = ENOSPC; 1268 goto fail1; 1269 } 1270 1271 ebp = &(stp->st_eb[stp->st_n++]); 1272 ebp->eb_addr = stmp->stm_addr[page] + 1273 page_off; 1274 ebp->eb_size = page_size; 1275 1276 *offp += page_size; 1277 *limitp -= page_size; 1278 1279 mapping_off += page_size; 1280 mapping_size -= page_size; 1281 1282 ebp->eb_eop = (*limitp == 0 || 1283 (mapping_size == 0 && mp->b_cont == NULL)); 1284 1285 DTRACE_PROBE5(tx_mapping_add, 1286 unsigned int, stp->st_index, 1287 unsigned int, stp->st_n - 1, 1288 uint64_t, ebp->eb_addr, 1289 size_t, ebp->eb_size, 1290 boolean_t, ebp->eb_eop); 1291 } 1292 1293 ASSERT3U(*offp, <=, stmp->stm_size); 1294 1295 return (0); 1296 1297 fail1: 1298 DTRACE_PROBE1(fail1, int, rc); 1299 1300 return (rc); 1301 } 1302 1303 static inline int 1304 sfxge_tx_qbuffer_add(sfxge_txq_t *stp, sfxge_tx_buffer_t *stbp, boolean_t eop) 1305 { 1306 efx_buffer_t *ebp; 1307 int rc; 1308 1309 ASSERT3U(stp->st_n, <=, 1310 EFX_TXQ_LIMIT(SFXGE_TX_NDESCS)); 1311 if (stp->st_n == EFX_TXQ_LIMIT(SFXGE_TX_NDESCS)) { 1312 rc = ENOSPC; 1313 goto fail1; 1314 } 1315 1316 ebp = &(stp->st_eb[stp->st_n++]); 1317 ebp->eb_addr = stbp->stb_esm.esm_addr + stbp->stb_off; 1318 ebp->eb_size = stbp->stb_esm.esm_size - stbp->stb_off; 1319 ebp->eb_eop = eop; 1320 1321 (void) ddi_dma_sync(stbp->stb_esm.esm_dma_handle, 1322 stbp->stb_off, ebp->eb_size, 1323 DDI_DMA_SYNC_FORDEV); 1324 1325 stbp->stb_off = stbp->stb_esm.esm_size; 1326 1327 DTRACE_PROBE5(tx_buffer_add, 1328 unsigned int, stp->st_index, 1329 unsigned int, stp->st_n - 1, 1330 uint64_t, ebp->eb_addr, size_t, ebp->eb_size, 1331 boolean_t, ebp->eb_eop); 1332 1333 return (0); 1334 1335 fail1: 1336 DTRACE_PROBE1(fail1, int, rc); 1337 1338 return (rc); 1339 } 1340 1341 static inline boolean_t 1342 sfxge_tx_msgb_copy(mblk_t *mp, sfxge_tx_buffer_t *stbp, size_t *offp, 1343 size_t *limitp) 1344 { 1345 size_t data_off; 1346 size_t data_size; 1347 size_t copy_off; 1348 size_t copy_size; 1349 boolean_t eop; 1350 1351 ASSERT3U(*offp, <=, MBLKL(mp)); 1352 ASSERT(*limitp != 0); 1353 1354 data_off = *offp; 1355 data_size = MBLKL(mp) - *offp; 1356 1357 copy_off = stbp->stb_esm.esm_size; 1358 copy_size = SFXGE_TX_BUFFER_SIZE - copy_off; 1359 1360 copy_size = MIN(copy_size, data_size); 1361 copy_size = MIN(copy_size, *limitp); 1362 1363 bcopy(mp->b_rptr + data_off, 1364 stbp->stb_esm.esm_base + copy_off, copy_size); 1365 1366 stbp->stb_esm.esm_size += copy_size; 1367 ASSERT3U(stbp->stb_esm.esm_size, <=, 1368 SFXGE_TX_BUFFER_SIZE); 1369 1370 *offp += copy_size; 1371 *limitp -= copy_size; 1372 1373 data_off += copy_size; 1374 data_size -= copy_size; 1375 1376 eop = (*limitp == 0 || 1377 (data_size == 0 && mp->b_cont == NULL)); 1378 1379 ASSERT3U(*offp, <=, MBLKL(mp)); 1380 1381 return (eop); 1382 } 1383 1384 static int 1385 sfxge_tx_qpayload_fragment(sfxge_txq_t *stp, unsigned int id, mblk_t **mpp, 1386 size_t *offp, size_t size, boolean_t copy) 1387 { 1388 sfxge_t *sp = stp->st_sp; 1389 mblk_t *mp = *mpp; 1390 size_t off = *offp; 1391 sfxge_tx_buffer_t *stbp; 1392 sfxge_tx_mapping_t *stmp; 1393 int rc; 1394 1395 stbp = stp->st_stbp[id]; 1396 ASSERT(stbp == NULL || (stbp->stb_esm.esm_size == stbp->stb_off)); 1397 1398 stmp = stp->st_stmp[id]; 1399 1400 while (size != 0) { 1401 boolean_t eop; 1402 1403 ASSERT(mp != NULL); 1404 1405 if (mp->b_cont != NULL) 1406 prefetch_read_many(mp->b_cont); 1407 1408 ASSERT3U(off, <, MBLKL(mp)); 1409 1410 if (copy) 1411 goto copy; 1412 1413 /* 1414 * Check whether we have already mapped this data block for 1415 * DMA. 1416 */ 1417 if (stmp == NULL || stmp->stm_mp != mp) { 1418 /* 1419 * If we are part way through copying a data block then 1420 * there's no point in trying to map it for DMA. 1421 */ 1422 if (off != 0) 1423 goto copy; 1424 1425 /* 1426 * If the data block is too short then the cost of 1427 * mapping it for DMA would outweigh the cost of 1428 * copying it. 1429 */ 1430 if (MBLKL(mp) < SFXGE_TX_COPY_THRESHOLD) 1431 goto copy; 1432 1433 /* Try to grab a transmit mapping from the pool */ 1434 stmp = sfxge_tx_qfmp_get(stp); 1435 if (stmp == NULL) { 1436 /* 1437 * The pool was empty so allocate a new 1438 * mapping. 1439 */ 1440 if ((stmp = kmem_cache_alloc(sp->s_tmc, 1441 KM_NOSLEEP)) == NULL) 1442 goto copy; 1443 } 1444 1445 /* Add the DMA mapping to the list */ 1446 stmp->stm_next = stp->st_stmp[id]; 1447 stp->st_stmp[id] = stmp; 1448 1449 /* Try to bind the data block to the mapping */ 1450 if (sfxge_tx_msgb_bind(mp, stmp) != 0) 1451 goto copy; 1452 } 1453 ASSERT3P(stmp->stm_mp, ==, mp); 1454 1455 /* 1456 * If we have a partially filled buffer then we must add it to 1457 * the fragment list before adding the mapping. 1458 */ 1459 if (stbp != NULL && (stbp->stb_esm.esm_size > stbp->stb_off)) { 1460 rc = sfxge_tx_qbuffer_add(stp, stbp, B_FALSE); 1461 if (rc != 0) 1462 goto fail1; 1463 } 1464 1465 /* Add the mapping to the fragment list */ 1466 rc = sfxge_tx_qmapping_add(stp, stmp, &off, &size); 1467 if (rc != 0) 1468 goto fail2; 1469 1470 ASSERT(off == MBLKL(mp) || size == 0); 1471 1472 /* 1473 * If the data block has been exhausted then Skip over the 1474 * control block and advance to the next data block. 1475 */ 1476 if (off == MBLKL(mp)) { 1477 mp = mp->b_cont; 1478 off = 0; 1479 } 1480 1481 continue; 1482 1483 copy: 1484 if (stbp == NULL || 1485 stbp->stb_esm.esm_size == SFXGE_TX_BUFFER_SIZE) { 1486 /* Try to grab a buffer from the pool */ 1487 stbp = sfxge_tx_qfbp_get(stp); 1488 if (stbp == NULL) { 1489 /* 1490 * The pool was empty so allocate a new 1491 * buffer. 1492 */ 1493 if ((stbp = kmem_cache_alloc(sp->s_tbc, 1494 KM_NOSLEEP)) == NULL) { 1495 rc = ENOMEM; 1496 goto fail3; 1497 } 1498 } 1499 1500 /* Add it to the list */ 1501 stbp->stb_next = stp->st_stbp[id]; 1502 stp->st_stbp[id] = stbp; 1503 } 1504 1505 /* Copy as much of the data block as we can into the buffer */ 1506 eop = sfxge_tx_msgb_copy(mp, stbp, &off, &size); 1507 1508 ASSERT(off == MBLKL(mp) || size == 0 || 1509 stbp->stb_esm.esm_size == SFXGE_TX_BUFFER_SIZE); 1510 1511 /* 1512 * If we have reached the end of the packet, or the buffer is 1513 * full, then add the buffer to the fragment list. 1514 */ 1515 if (stbp->stb_esm.esm_size == SFXGE_TX_BUFFER_SIZE || eop) { 1516 rc = sfxge_tx_qbuffer_add(stp, stbp, eop); 1517 if (rc != 0) 1518 goto fail4; 1519 } 1520 1521 /* 1522 * If the data block has been exhaused then advance to the next 1523 * one. 1524 */ 1525 if (off == MBLKL(mp)) { 1526 mp = mp->b_cont; 1527 off = 0; 1528 } 1529 } 1530 1531 *mpp = mp; 1532 *offp = off; 1533 1534 return (0); 1535 1536 fail4: 1537 DTRACE_PROBE(fail4); 1538 fail3: 1539 DTRACE_PROBE(fail3); 1540 fail2: 1541 DTRACE_PROBE(fail2); 1542 fail1: 1543 DTRACE_PROBE1(fail1, int, rc); 1544 1545 return (rc); 1546 } 1547 1548 static int 1549 sfxge_tx_qlso_fragment(sfxge_txq_t *stp, sfxge_tx_packet_t *stpp, 1550 boolean_t copy) 1551 { 1552 sfxge_t *sp = stp->st_sp; 1553 mblk_t *mp = stpp->stp_mp; 1554 struct ether_header *etherhp = stpp->stp_etherhp; 1555 struct ip *iphp = stpp->stp_iphp; 1556 struct tcphdr *thp = stpp->stp_thp; 1557 size_t size = stpp->stp_size; 1558 size_t off = stpp->stp_off; 1559 size_t mss = stpp->stp_mss; 1560 unsigned int id; 1561 caddr_t hp; 1562 size_t ehs, hs; 1563 uint16_t start_len; 1564 uint16_t start_id; 1565 uint16_t ip_id; 1566 uint8_t start_flags; 1567 uint32_t start_seq; 1568 uint32_t th_seq; 1569 size_t lss; 1570 sfxge_tx_buffer_t *stbp; 1571 int rc; 1572 1573 ASSERT(mutex_owned(&(stp->st_lock))); 1574 1575 if ((DB_LSOFLAGS(mp) & HW_LSO) == 0) { 1576 rc = EINVAL; 1577 goto fail1; 1578 } 1579 1580 id = stp->st_added & (SFXGE_TX_NDESCS - 1); 1581 1582 ASSERT(stp->st_n == 0); 1583 ASSERT(stp->st_stbp[id] == NULL); 1584 ASSERT(stp->st_stmp[id] == NULL); 1585 1586 ehs = (etherhp->ether_type == htons(ETHERTYPE_VLAN)) ? 1587 sizeof (struct ether_vlan_header) : 1588 sizeof (struct ether_header); 1589 if (msgdsize(mp) != ehs + ntohs(iphp->ip_len)) { 1590 rc = EINVAL; 1591 goto fail2; 1592 } 1593 1594 /* The payload offset is equivalent to the size of the headers */ 1595 hp = (caddr_t)(mp->b_rptr); 1596 hs = off; 1597 1598 /* 1599 * If the initial data block only contains the headers then advance 1600 * to the next one. 1601 */ 1602 if (hs > MBLKL(mp)) { 1603 rc = EINVAL; 1604 goto fail3; 1605 } 1606 mp->b_rptr += hs; 1607 1608 if (MBLKL(mp) == 0) 1609 mp = mp->b_cont; 1610 1611 off = 0; 1612 1613 /* Check IP and TCP headers are suitable for LSO */ 1614 if (((iphp->ip_off & ~htons(IP_DF)) != 0) || 1615 ((thp->th_flags & (TH_URG | TH_SYN)) != 0) || 1616 (thp->th_urp != 0)) { 1617 rc = EINVAL; 1618 goto fail4; 1619 } 1620 1621 if (size + (thp->th_off << 2) + (iphp->ip_hl << 2) != 1622 ntohs(iphp->ip_len)) { 1623 rc = EINVAL; 1624 goto fail4; 1625 } 1626 1627 /* 1628 * Get the base IP id, The stack leaves enough of a gap in id space 1629 * for us to increment this for each segment we send out. 1630 */ 1631 start_len = ntohs(iphp->ip_len); 1632 start_id = ip_id = ntohs(iphp->ip_id); 1633 1634 /* Get the base TCP sequence number and flags */ 1635 start_flags = thp->th_flags; 1636 start_seq = th_seq = ntohl(thp->th_seq); 1637 1638 /* Adjust the header for interim segments */ 1639 iphp->ip_len = htons((iphp->ip_hl << 2) + (thp->th_off << 2) + mss); 1640 thp->th_flags = start_flags & ~(TH_PUSH | TH_FIN); 1641 1642 lss = size; 1643 if ((lss / mss) >= (EFX_TXQ_LIMIT(SFXGE_TX_NDESCS) / 2)) { 1644 rc = EINVAL; 1645 goto fail5; 1646 } 1647 1648 stbp = NULL; 1649 while (lss != 0) { 1650 size_t ss = MIN(lss, mss); 1651 boolean_t eol = (ss == lss); 1652 1653 /* Adjust the header for this segment */ 1654 iphp->ip_id = htons(ip_id); 1655 ip_id++; 1656 1657 thp->th_seq = htonl(th_seq); 1658 th_seq += ss; 1659 1660 /* If this is the final segment then do some extra adjustment */ 1661 if (eol) { 1662 iphp->ip_len = htons((iphp->ip_hl << 2) + 1663 (thp->th_off << 2) + ss); 1664 thp->th_flags = start_flags; 1665 } 1666 1667 if (stbp == NULL || 1668 stbp->stb_esm.esm_size + hs > SFXGE_TX_BUFFER_SIZE) { 1669 /* Try to grab a buffer from the pool */ 1670 stbp = sfxge_tx_qfbp_get(stp); 1671 if (stbp == NULL) { 1672 /* 1673 * The pool was empty so allocate a new 1674 * buffer. 1675 */ 1676 if ((stbp = kmem_cache_alloc(sp->s_tbc, 1677 KM_NOSLEEP)) == NULL) { 1678 rc = ENOMEM; 1679 goto fail6; 1680 } 1681 } 1682 1683 /* Add it to the list */ 1684 stbp->stb_next = stp->st_stbp[id]; 1685 stp->st_stbp[id] = stbp; 1686 } 1687 1688 /* Copy in the headers */ 1689 ASSERT3U(stbp->stb_off, ==, stbp->stb_esm.esm_size); 1690 bcopy(hp, stbp->stb_esm.esm_base + stbp->stb_off, hs); 1691 stbp->stb_esm.esm_size += hs; 1692 1693 /* Add the buffer to the fragment list */ 1694 rc = sfxge_tx_qbuffer_add(stp, stbp, B_FALSE); 1695 if (rc != 0) 1696 goto fail7; 1697 1698 /* Add the payload to the fragment list */ 1699 if ((rc = sfxge_tx_qpayload_fragment(stp, id, &mp, &off, 1700 ss, copy)) != 0) 1701 goto fail8; 1702 1703 lss -= ss; 1704 } 1705 ASSERT3U(off, ==, 0); 1706 ASSERT3P(mp, ==, NULL); 1707 1708 ASSERT3U(th_seq - start_seq, ==, size); 1709 1710 /* 1711 * If no part of the packet has been mapped for DMA then we can free 1712 * it now, otherwise it can only be freed on completion. 1713 */ 1714 if (stp->st_stmp[id] == NULL) 1715 freemsg(stpp->stp_mp); 1716 else 1717 stp->st_mp[id] = stpp->stp_mp; 1718 1719 stpp->stp_mp = NULL; 1720 1721 return (0); 1722 1723 fail8: 1724 DTRACE_PROBE(fail8); 1725 fail7: 1726 DTRACE_PROBE(fail7); 1727 fail6: 1728 DTRACE_PROBE(fail6); 1729 fail5: 1730 DTRACE_PROBE(fail5); 1731 1732 /* Restore the header */ 1733 thp->th_seq = htonl(start_seq); 1734 thp->th_flags = start_flags; 1735 1736 iphp->ip_len = htons(start_len); 1737 iphp->ip_id = htons(start_id); 1738 1739 fail4: 1740 DTRACE_PROBE(fail4); 1741 1742 mp = stpp->stp_mp; 1743 mp->b_rptr -= hs; 1744 1745 ASSERT3U(((etherhp->ether_type == htons(ETHERTYPE_VLAN)) ? 1746 sizeof (struct ether_vlan_header) : 1747 sizeof (struct ether_header)) + 1748 ntohs(iphp->ip_len), ==, msgdsize(mp)); 1749 1750 ASSERT(stp->st_mp[id] == NULL); 1751 1752 fail3: 1753 DTRACE_PROBE(fail3); 1754 fail2: 1755 DTRACE_PROBE(fail2); 1756 fail1: 1757 DTRACE_PROBE1(fail1, int, rc); 1758 1759 return (rc); 1760 } 1761 1762 static int 1763 sfxge_tx_qpacket_fragment(sfxge_txq_t *stp, sfxge_tx_packet_t *stpp, 1764 boolean_t copy) 1765 { 1766 sfxge_t *sp = stp->st_sp; 1767 mblk_t *mp = stpp->stp_mp; 1768 unsigned int id; 1769 size_t off; 1770 size_t size; 1771 sfxge_tx_mapping_t *stmp; 1772 sfxge_tx_buffer_t *stbp; 1773 int rc; 1774 1775 ASSERT(mutex_owned(&(stp->st_lock))); 1776 1777 ASSERT(stp->st_n == 0); 1778 1779 id = stp->st_added & (SFXGE_TX_NDESCS - 1); 1780 1781 ASSERT(stp->st_stbp[id] == NULL); 1782 ASSERT(stp->st_stmp[id] == NULL); 1783 1784 off = 0; 1785 size = LONG_MAX; /* must be larger than the packet */ 1786 1787 stbp = NULL; 1788 stmp = NULL; 1789 1790 while (mp != NULL) { 1791 boolean_t eop; 1792 1793 ASSERT(mp != NULL); 1794 1795 if (mp->b_cont != NULL) 1796 prefetch_read_many(mp->b_cont); 1797 1798 ASSERT(stmp == NULL || stmp->stm_mp != mp); 1799 1800 if (copy) 1801 goto copy; 1802 1803 /* 1804 * If we are part way through copying a data block then there's 1805 * no point in trying to map it for DMA. 1806 */ 1807 if (off != 0) 1808 goto copy; 1809 1810 /* 1811 * If the data block is too short then the cost of mapping it 1812 * for DMA would outweigh the cost of copying it. 1813 * 1814 * TX copy break 1815 */ 1816 if (MBLKL(mp) < SFXGE_TX_COPY_THRESHOLD) 1817 goto copy; 1818 1819 /* Try to grab a transmit mapping from the pool */ 1820 stmp = sfxge_tx_qfmp_get(stp); 1821 if (stmp == NULL) { 1822 /* 1823 * The pool was empty so allocate a new 1824 * mapping. 1825 */ 1826 if ((stmp = kmem_cache_alloc(sp->s_tmc, 1827 KM_NOSLEEP)) == NULL) 1828 goto copy; 1829 } 1830 1831 /* Add the DMA mapping to the list */ 1832 stmp->stm_next = stp->st_stmp[id]; 1833 stp->st_stmp[id] = stmp; 1834 1835 /* Try to bind the data block to the mapping */ 1836 if (sfxge_tx_msgb_bind(mp, stmp) != 0) 1837 goto copy; 1838 1839 /* 1840 * If we have a partially filled buffer then we must add it to 1841 * the fragment list before adding the mapping. 1842 */ 1843 if (stbp != NULL && (stbp->stb_esm.esm_size > stbp->stb_off)) { 1844 rc = sfxge_tx_qbuffer_add(stp, stbp, B_FALSE); 1845 if (rc != 0) 1846 goto fail1; 1847 } 1848 1849 /* Add the mapping to the fragment list */ 1850 rc = sfxge_tx_qmapping_add(stp, stmp, &off, &size); 1851 if (rc != 0) 1852 goto fail2; 1853 1854 ASSERT3U(off, ==, MBLKL(mp)); 1855 1856 /* Advance to the next data block */ 1857 mp = mp->b_cont; 1858 off = 0; 1859 continue; 1860 1861 copy: 1862 if (stbp == NULL || 1863 stbp->stb_esm.esm_size == SFXGE_TX_BUFFER_SIZE) { 1864 /* Try to grab a buffer from the pool */ 1865 stbp = sfxge_tx_qfbp_get(stp); 1866 if (stbp == NULL) { 1867 /* 1868 * The pool was empty so allocate a new 1869 * buffer. 1870 */ 1871 if ((stbp = kmem_cache_alloc(sp->s_tbc, 1872 KM_NOSLEEP)) == NULL) { 1873 rc = ENOMEM; 1874 goto fail3; 1875 } 1876 } 1877 1878 /* Add it to the list */ 1879 stbp->stb_next = stp->st_stbp[id]; 1880 stp->st_stbp[id] = stbp; 1881 } 1882 1883 /* Copy as much of the data block as we can into the buffer */ 1884 eop = sfxge_tx_msgb_copy(mp, stbp, &off, &size); 1885 1886 ASSERT(off == MBLKL(mp) || 1887 stbp->stb_esm.esm_size == SFXGE_TX_BUFFER_SIZE); 1888 1889 /* 1890 * If we have reached the end of the packet, or the buffer is 1891 * full, then add the buffer to the fragment list. 1892 */ 1893 if (stbp->stb_esm.esm_size == SFXGE_TX_BUFFER_SIZE || eop) { 1894 rc = sfxge_tx_qbuffer_add(stp, stbp, eop); 1895 if (rc != 0) 1896 goto fail4; 1897 } 1898 1899 /* 1900 * If the data block has been exhaused then advance to the next 1901 * one. 1902 */ 1903 if (off == MBLKL(mp)) { 1904 mp = mp->b_cont; 1905 off = 0; 1906 } 1907 } 1908 ASSERT3U(off, ==, 0); 1909 ASSERT3P(mp, ==, NULL); 1910 ASSERT3U(size, !=, 0); 1911 1912 /* 1913 * If no part of the packet has been mapped for DMA then we can free 1914 * it now, otherwise it can only be freed on completion. 1915 */ 1916 if (stp->st_stmp[id] == NULL) 1917 freemsg(stpp->stp_mp); 1918 else 1919 stp->st_mp[id] = stpp->stp_mp; 1920 1921 stpp->stp_mp = NULL; 1922 1923 return (0); 1924 1925 fail4: 1926 DTRACE_PROBE(fail4); 1927 fail3: 1928 DTRACE_PROBE(fail3); 1929 fail2: 1930 DTRACE_PROBE(fail2); 1931 fail1: 1932 DTRACE_PROBE1(fail1, int, rc); 1933 1934 ASSERT(stp->st_stmp[id] == NULL); 1935 1936 return (rc); 1937 } 1938 1939 1940 #define SFXGE_TX_QDPL_PUT_PENDING(_stp) \ 1941 ((_stp)->st_dpl.std_put != 0) 1942 1943 static void 1944 sfxge_tx_qdpl_swizzle(sfxge_txq_t *stp) 1945 { 1946 sfxge_tx_dpl_t *stdp = &(stp->st_dpl); 1947 volatile uintptr_t *putp; 1948 uintptr_t put; 1949 sfxge_tx_packet_t *stpp; 1950 sfxge_tx_packet_t *p; 1951 sfxge_tx_packet_t **pp; 1952 unsigned int count; 1953 1954 ASSERT(mutex_owned(&(stp->st_lock))); 1955 1956 /* 1957 * Guaranteed that in flight TX packets will cause more TX completions 1958 * hence more swizzles must happen 1959 */ 1960 ASSERT3U(stdp->std_count, <=, sfxge_tx_dpl_get_pkt_max(stp)); 1961 if (stdp->std_count >= stdp->get_pkt_limit) 1962 return; 1963 1964 /* Acquire the put list - replacing with an empty list */ 1965 putp = &(stdp->std_put); 1966 put = atomic_swap_ulong(putp, 0); 1967 stpp = (void *)put; 1968 1969 if (stpp == NULL) 1970 return; 1971 1972 /* Reverse the list */ 1973 pp = &(stpp->stp_next); 1974 p = NULL; 1975 1976 count = 0; 1977 do { 1978 sfxge_tx_packet_t *next; 1979 1980 next = stpp->stp_next; 1981 1982 stpp->stp_next = p; 1983 p = stpp; 1984 1985 count++; 1986 stpp = next; 1987 } while (stpp != NULL); 1988 1989 /* Add it to the tail of the get list */ 1990 ASSERT3P(*pp, ==, NULL); 1991 1992 *(stdp->std_getp) = p; 1993 stdp->std_getp = pp; 1994 stdp->std_count += count; 1995 ASSERT3U(stdp->std_count, <=, sfxge_tx_dpl_get_pkt_max(stp)); 1996 1997 DTRACE_PROBE2(dpl_counts, int, stdp->std_count, int, count); 1998 } 1999 2000 2001 /* 2002 * If TXQ locked, add the RX DPL put list and this packet to the TX DPL get list 2003 * If TXQ unlocked, atomically add this packet to TX DPL put list 2004 * 2005 * The only possible error is ENOSPC (used for TX backpressure) 2006 * For the TX DPL put or get list becoming full, in both cases there must be 2007 * future TX completions (as represented by the packets on the DPL get lists). 2008 * 2009 * This ensures that in the future mac_tx_update() will be called from 2010 * sfxge_tx_qcomplete() 2011 */ 2012 static inline int 2013 sfxge_tx_qdpl_add(sfxge_txq_t *stp, sfxge_tx_packet_t *stpp, int locked) 2014 { 2015 sfxge_tx_dpl_t *stdp = &stp->st_dpl; 2016 2017 ASSERT3P(stpp->stp_next, ==, NULL); 2018 2019 if (locked) { 2020 ASSERT(mutex_owned(&stp->st_lock)); 2021 2022 if (stdp->std_count >= stdp->get_pkt_limit) { 2023 stdp->get_full_count++; 2024 return (ENOSPC); 2025 } 2026 2027 /* Reverse the put list onto the get list */ 2028 sfxge_tx_qdpl_swizzle(stp); 2029 2030 /* Add to the tail of the get list */ 2031 *(stdp->std_getp) = stpp; 2032 stdp->std_getp = &stpp->stp_next; 2033 stdp->std_count++; 2034 ASSERT3U(stdp->std_count, <=, sfxge_tx_dpl_get_pkt_max(stp)); 2035 2036 } else { 2037 volatile uintptr_t *putp; 2038 uintptr_t old; 2039 uintptr_t new; 2040 sfxge_tx_packet_t *old_pkt; 2041 2042 putp = &(stdp->std_put); 2043 new = (uintptr_t)stpp; 2044 2045 /* Add to the head of the put list, keeping a list length */ 2046 do { 2047 old = *putp; 2048 old_pkt = (sfxge_tx_packet_t *)old; 2049 2050 stpp->stp_dpl_put_len = old ? 2051 old_pkt->stp_dpl_put_len + 1 : 1; 2052 2053 if (stpp->stp_dpl_put_len >= stdp->put_pkt_limit) { 2054 stpp->stp_next = 0; 2055 stpp->stp_dpl_put_len = 0; 2056 stdp->put_full_count++; 2057 return (ENOSPC); 2058 } 2059 2060 stpp->stp_next = (void *)old; 2061 } while (atomic_cas_ulong(putp, old, new) != old); 2062 } 2063 return (0); 2064 } 2065 2066 2067 /* Take all packets from DPL get list and try to send to HW */ 2068 static void 2069 sfxge_tx_qdpl_drain(sfxge_txq_t *stp) 2070 { 2071 sfxge_t *sp = stp->st_sp; 2072 sfxge_tx_dpl_t *stdp = &(stp->st_dpl); 2073 unsigned int pushed = stp->st_added; 2074 sfxge_tx_packet_t *stpp; 2075 unsigned int count; 2076 2077 ASSERT(mutex_owned(&(stp->st_lock))); 2078 2079 prefetch_read_many(sp->s_enp); 2080 prefetch_read_many(stp->st_etp); 2081 2082 stpp = stdp->std_get; 2083 count = stdp->std_count; 2084 2085 while (count != 0) { 2086 sfxge_tx_packet_t *next; 2087 boolean_t copy; 2088 int rc; 2089 2090 ASSERT(stpp != NULL); 2091 2092 /* Split stpp off */ 2093 next = stpp->stp_next; 2094 stpp->stp_next = NULL; 2095 2096 if (next != NULL) 2097 prefetch_read_many(next); 2098 2099 if (stp->st_state != SFXGE_TXQ_STARTED) 2100 goto reject; 2101 2102 copy = B_FALSE; 2103 2104 again: 2105 /* Fragment the packet */ 2106 if (stpp->stp_mss != 0) { 2107 rc = sfxge_tx_qlso_fragment(stp, stpp, copy); 2108 } else { 2109 rc = sfxge_tx_qpacket_fragment(stp, stpp, copy); 2110 } 2111 2112 switch (rc) { 2113 case 0: 2114 break; 2115 2116 case ENOSPC: 2117 if (!copy) 2118 goto copy; 2119 2120 /*FALLTHRU*/ 2121 default: 2122 goto reject; 2123 } 2124 2125 /* Free the packet structure */ 2126 stpp->stp_etherhp = NULL; 2127 stpp->stp_iphp = NULL; 2128 stpp->stp_thp = NULL; 2129 stpp->stp_off = 0; 2130 stpp->stp_size = 0; 2131 stpp->stp_mss = 0; 2132 stpp->stp_dpl_put_len = 0; 2133 2134 ASSERT3P(stpp->stp_mp, ==, NULL); 2135 2136 if (sfxge_tx_qfpp_put(stp, stpp) != 0) { 2137 sfxge_tx_packet_destroy(sp, stpp); 2138 stpp = NULL; 2139 } 2140 2141 --count; 2142 stpp = next; 2143 2144 /* Post the packet */ 2145 sfxge_tx_qlist_post(stp); 2146 2147 if (stp->st_unblock != SFXGE_TXQ_NOT_BLOCKED) 2148 goto defer; 2149 2150 if (stp->st_added - pushed >= SFXGE_TX_BATCH) { 2151 efx_tx_qpush(stp->st_etp, stp->st_added); 2152 pushed = stp->st_added; 2153 } 2154 2155 continue; 2156 2157 copy: 2158 /* Abort the current fragment list */ 2159 sfxge_tx_qlist_abort(stp); 2160 2161 /* Try copying the packet to flatten it */ 2162 ASSERT(!copy); 2163 copy = B_TRUE; 2164 2165 goto again; 2166 2167 reject: 2168 /* Abort the current fragment list */ 2169 sfxge_tx_qlist_abort(stp); 2170 2171 /* Discard the packet */ 2172 freemsg(stpp->stp_mp); 2173 stpp->stp_mp = NULL; 2174 2175 /* Free the packet structure */ 2176 stpp->stp_etherhp = NULL; 2177 stpp->stp_iphp = NULL; 2178 stpp->stp_thp = NULL; 2179 stpp->stp_off = 0; 2180 stpp->stp_size = 0; 2181 stpp->stp_mss = 0; 2182 stpp->stp_dpl_put_len = 0; 2183 2184 if (sfxge_tx_qfpp_put(stp, stpp) != 0) { 2185 sfxge_tx_packet_destroy(sp, stpp); 2186 stpp = NULL; 2187 } 2188 2189 --count; 2190 stpp = next; 2191 continue; 2192 defer: 2193 DTRACE_PROBE1(defer, unsigned int, stp->st_index); 2194 break; 2195 } 2196 2197 if (count == 0) { 2198 /* New empty get list */ 2199 ASSERT3P(stpp, ==, NULL); 2200 stdp->std_get = NULL; 2201 stdp->std_count = 0; 2202 2203 stdp->std_getp = &(stdp->std_get); 2204 } else { 2205 /* shorten the list by moving the head */ 2206 stdp->std_get = stpp; 2207 stdp->std_count = count; 2208 ASSERT3U(stdp->std_count, <=, sfxge_tx_dpl_get_pkt_max(stp)); 2209 } 2210 2211 if (stp->st_added != pushed) 2212 efx_tx_qpush(stp->st_etp, stp->st_added); 2213 2214 ASSERT(stp->st_unblock != SFXGE_TXQ_NOT_BLOCKED || 2215 stdp->std_count == 0); 2216 } 2217 2218 /* Swizzle deferred packet list, try and push to HW */ 2219 static inline void 2220 sfxge_tx_qdpl_service(sfxge_txq_t *stp) 2221 { 2222 do { 2223 ASSERT(mutex_owned(&(stp->st_lock))); 2224 2225 if (SFXGE_TX_QDPL_PUT_PENDING(stp)) 2226 sfxge_tx_qdpl_swizzle(stp); 2227 2228 if (stp->st_unblock == SFXGE_TXQ_NOT_BLOCKED) 2229 sfxge_tx_qdpl_drain(stp); 2230 2231 mutex_exit(&(stp->st_lock)); 2232 2233 if (!SFXGE_TX_QDPL_PUT_PENDING(stp)) 2234 break; 2235 } while (mutex_tryenter(&(stp->st_lock))); 2236 } 2237 2238 static void 2239 sfxge_tx_qdpl_flush_locked(sfxge_txq_t *stp) 2240 { 2241 sfxge_t *sp = stp->st_sp; 2242 sfxge_tx_dpl_t *stdp = &(stp->st_dpl); 2243 sfxge_tx_packet_t *stpp; 2244 unsigned int count; 2245 2246 ASSERT(mutex_owned(&(stp->st_lock))); 2247 2248 /* Swizzle put list to the get list */ 2249 sfxge_tx_qdpl_swizzle(stp); 2250 2251 stpp = stdp->std_get; 2252 count = stdp->std_count; 2253 2254 while (count != 0) { 2255 sfxge_tx_packet_t *next; 2256 2257 next = stpp->stp_next; 2258 stpp->stp_next = NULL; 2259 2260 /* Discard the packet */ 2261 freemsg(stpp->stp_mp); 2262 stpp->stp_mp = NULL; 2263 2264 /* Free the packet structure */ 2265 stpp->stp_etherhp = NULL; 2266 stpp->stp_iphp = NULL; 2267 stpp->stp_thp = NULL; 2268 stpp->stp_off = 0; 2269 stpp->stp_size = 0; 2270 stpp->stp_mss = 0; 2271 stpp->stp_dpl_put_len = 0; 2272 2273 sfxge_tx_packet_destroy(sp, stpp); 2274 2275 --count; 2276 stpp = next; 2277 } 2278 2279 ASSERT3P(stpp, ==, NULL); 2280 2281 /* Empty list */ 2282 stdp->std_get = NULL; 2283 stdp->std_count = 0; 2284 stdp->std_getp = &(stdp->std_get); 2285 } 2286 2287 2288 void 2289 sfxge_tx_qdpl_flush(sfxge_txq_t *stp) 2290 { 2291 mutex_enter(&(stp->st_lock)); 2292 sfxge_tx_qdpl_flush_locked(stp); 2293 mutex_exit(&(stp->st_lock)); 2294 } 2295 2296 2297 static void 2298 sfxge_tx_qunblock(sfxge_txq_t *stp) 2299 { 2300 sfxge_t *sp = stp->st_sp; 2301 unsigned int evq = stp->st_evq; 2302 sfxge_evq_t *sep = sp->s_sep[evq]; 2303 2304 ASSERT(mutex_owned(&(sep->se_lock))); 2305 2306 if (stp->st_state != SFXGE_TXQ_STARTED) 2307 return; 2308 2309 mutex_enter(&(stp->st_lock)); 2310 2311 if (stp->st_unblock != SFXGE_TXQ_NOT_BLOCKED) { 2312 unsigned int level; 2313 2314 level = stp->st_added - stp->st_completed; 2315 if (level <= stp->st_unblock) { 2316 stp->st_unblock = SFXGE_TXQ_NOT_BLOCKED; 2317 sfxge_tx_qlist_post(stp); 2318 } 2319 } 2320 2321 sfxge_tx_qdpl_service(stp); 2322 /* lock has been dropped */ 2323 } 2324 2325 void 2326 sfxge_tx_qcomplete(sfxge_txq_t *stp) 2327 { 2328 sfxge_t *sp = stp->st_sp; 2329 sfxge_tx_dpl_t *stdp = &(stp->st_dpl); 2330 unsigned int evq = stp->st_evq; 2331 sfxge_evq_t *sep = sp->s_sep[evq]; 2332 unsigned int completed; 2333 2334 ASSERT(mutex_owned(&(sep->se_lock))); 2335 2336 completed = stp->st_completed; 2337 while (completed != stp->st_pending) { 2338 unsigned int id; 2339 sfxge_tx_mapping_t *stmp; 2340 2341 id = completed++ & (SFXGE_TX_NDESCS - 1); 2342 2343 if ((stmp = stp->st_stmp[id]) != NULL) { 2344 mblk_t *mp; 2345 2346 /* Unbind all the mappings */ 2347 do { 2348 ASSERT(stmp->stm_mp != NULL); 2349 sfxge_tx_msgb_unbind(stmp); 2350 2351 stmp = stmp->stm_next; 2352 } while (stmp != NULL); 2353 2354 /* 2355 * Now that the packet is no longer mapped for DMA it 2356 * can be freed. 2357 */ 2358 mp = stp->st_mp[id]; 2359 stp->st_mp[id] = NULL; 2360 2361 ASSERT(mp != NULL); 2362 freemsg(mp); 2363 } 2364 } 2365 stp->st_completed = completed; 2366 2367 /* Check whether we need to unblock the queue */ 2368 if (stp->st_unblock != SFXGE_TXQ_NOT_BLOCKED) { 2369 unsigned int level; 2370 2371 level = stp->st_added - stp->st_completed; 2372 if (level <= stp->st_unblock) 2373 sfxge_tx_qunblock(stp); 2374 } 2375 2376 /* Release TX backpressure from the TX DPL put/get list being full */ 2377 if (stdp->std_count < stdp->get_pkt_limit) 2378 mac_tx_update(sp->s_mh); 2379 } 2380 2381 void 2382 sfxge_tx_qflush_done(sfxge_txq_t *stp) 2383 { 2384 sfxge_t *sp = stp->st_sp; 2385 2386 ASSERT(mutex_owned(&(sp->s_sep[stp->st_evq]->se_lock))); 2387 2388 mutex_enter(&(stp->st_lock)); 2389 2390 if (stp->st_flush == SFXGE_FLUSH_PENDING) 2391 stp->st_flush = SFXGE_FLUSH_DONE; 2392 2393 mutex_exit(&(stp->st_lock)); 2394 2395 mutex_enter(&(sp->s_tx_flush_lock)); 2396 sp->s_tx_flush_pending--; 2397 if (sp->s_tx_flush_pending <= 0) { 2398 /* All queues flushed: wakeup sfxge_tx_stop() */ 2399 cv_signal(&(sp->s_tx_flush_kv)); 2400 } 2401 mutex_exit(&(sp->s_tx_flush_lock)); 2402 } 2403 2404 static void 2405 sfxge_tx_qflush(sfxge_t *sp, unsigned int index, boolean_t do_flush) 2406 { 2407 sfxge_txq_t *stp = sp->s_stp[index]; 2408 2409 ASSERT(mutex_owned(&(sp->s_state_lock))); 2410 2411 mutex_enter(&(stp->st_lock)); 2412 2413 /* Prepare to flush and stop the queue */ 2414 if (stp->st_state == SFXGE_TXQ_STARTED) 2415 stp->st_state = SFXGE_TXQ_INITIALIZED; 2416 else 2417 do_flush = B_FALSE; /* No hardware ring, so don't flush */ 2418 2419 if (do_flush) 2420 stp->st_flush = SFXGE_FLUSH_PENDING; 2421 else 2422 stp->st_flush = SFXGE_FLUSH_INACTIVE; 2423 2424 mutex_exit(&(stp->st_lock)); 2425 2426 /* Flush the transmit queue */ 2427 if (do_flush) 2428 efx_tx_qflush(stp->st_etp); 2429 } 2430 2431 static void 2432 sfxge_tx_qstop(sfxge_t *sp, unsigned int index) 2433 { 2434 sfxge_txq_t *stp = sp->s_stp[index]; 2435 unsigned int evq = stp->st_evq; 2436 sfxge_evq_t *sep = sp->s_sep[evq]; 2437 2438 mutex_enter(&(sep->se_lock)); 2439 mutex_enter(&(stp->st_lock)); 2440 ASSERT3U(stp->st_state, ==, SFXGE_TXQ_INITIALIZED); 2441 2442 /* All queues should have been flushed */ 2443 ASSERT3S(stp->st_sp->s_tx_flush_pending, ==, 0); 2444 ASSERT(stp->st_flush != SFXGE_FLUSH_FAILED); 2445 2446 /* in case of TX flush timeout */ 2447 stp->st_flush = SFXGE_FLUSH_DONE; 2448 2449 /* Destroy the transmit queue */ 2450 efx_tx_qdestroy(stp->st_etp); 2451 stp->st_etp = NULL; 2452 2453 /* Clear entries from the buffer table */ 2454 sfxge_sram_buf_tbl_clear(sp, stp->st_id, 2455 EFX_TXQ_NBUFS(SFXGE_TX_NDESCS)); 2456 2457 sfxge_tx_qlist_abort(stp); 2458 ASSERT3U(stp->st_n, ==, 0); 2459 2460 stp->st_unblock = SFXGE_TXQ_NOT_BLOCKED; 2461 2462 stp->st_pending = stp->st_added; 2463 2464 sfxge_tx_qcomplete(stp); 2465 ASSERT3U(stp->st_completed, ==, stp->st_pending); 2466 2467 sfxge_tx_qreap(stp); 2468 ASSERT3U(stp->st_reaped, ==, stp->st_completed); 2469 2470 /* 2471 * Ensure the deferred packet list is cleared 2472 * Can race with sfxge_tx_packet_add() adding to the put list 2473 */ 2474 sfxge_tx_qdpl_flush_locked(stp); 2475 2476 stp->st_added = 0; 2477 stp->st_pending = 0; 2478 stp->st_completed = 0; 2479 stp->st_reaped = 0; 2480 2481 mutex_exit(&(stp->st_lock)); 2482 mutex_exit(&(sep->se_lock)); 2483 } 2484 2485 static void 2486 sfxge_tx_qfini(sfxge_t *sp, unsigned int index) 2487 { 2488 sfxge_txq_t *stp = sp->s_stp[index]; 2489 sfxge_tx_dpl_t *stdp = &(stp->st_dpl); 2490 2491 /* Detach the TXQ from the driver */ 2492 sp->s_stp[index] = NULL; 2493 ASSERT(sp->s_tx_qcount > 0); 2494 sp->s_tx_qcount--; 2495 2496 ASSERT3U(stp->st_state, ==, SFXGE_TXQ_INITIALIZED); 2497 stp->st_state = SFXGE_TXQ_UNINITIALIZED; 2498 2499 /* Tear down the statistics */ 2500 sfxge_tx_kstat_fini(stp); 2501 2502 /* Ensure the deferred packet list is empty */ 2503 ASSERT3U(stdp->std_count, ==, 0); 2504 ASSERT3P(stdp->std_get, ==, NULL); 2505 ASSERT3U(stdp->std_put, ==, 0); 2506 2507 /* Clear the free buffer pool */ 2508 sfxge_tx_qfbp_empty(stp); 2509 2510 /* Clear the free mapping pool */ 2511 sfxge_tx_qfmp_empty(stp); 2512 2513 /* Clear the free packet pool */ 2514 sfxge_tx_qfpp_empty(stp); 2515 2516 mutex_destroy(&(stp->st_lock)); 2517 2518 stp->st_evq = 0; 2519 stp->st_type = 0; 2520 stp->st_index = 0; 2521 2522 kmem_cache_free(sp->s_tqc, stp); 2523 } 2524 2525 int 2526 sfxge_tx_init(sfxge_t *sp) 2527 { 2528 sfxge_intr_t *sip = &(sp->s_intr); 2529 char name[MAXNAMELEN]; 2530 int index; 2531 int rc; 2532 2533 (void) snprintf(name, MAXNAMELEN - 1, "%s%d_tx_packet_cache", 2534 ddi_driver_name(sp->s_dip), ddi_get_instance(sp->s_dip)); 2535 2536 sp->s_tpc = kmem_cache_create(name, sizeof (sfxge_tx_packet_t), 2537 SFXGE_CPU_CACHE_SIZE, sfxge_tx_packet_ctor, sfxge_tx_packet_dtor, 2538 NULL, sp, NULL, 0); 2539 ASSERT(sp->s_tpc != NULL); 2540 2541 (void) snprintf(name, MAXNAMELEN - 1, "%s%d_tx_buffer_cache", 2542 ddi_driver_name(sp->s_dip), ddi_get_instance(sp->s_dip)); 2543 2544 sp->s_tbc = kmem_cache_create(name, sizeof (sfxge_tx_buffer_t), 2545 SFXGE_CPU_CACHE_SIZE, sfxge_tx_buffer_ctor, sfxge_tx_buffer_dtor, 2546 NULL, sp, NULL, 0); 2547 ASSERT(sp->s_tbc != NULL); 2548 2549 (void) snprintf(name, MAXNAMELEN - 1, "%s%d_tx_mapping_cache", 2550 ddi_driver_name(sp->s_dip), ddi_get_instance(sp->s_dip)); 2551 2552 sp->s_tmc = kmem_cache_create(name, sizeof (sfxge_tx_mapping_t), 2553 SFXGE_CPU_CACHE_SIZE, sfxge_tx_mapping_ctor, sfxge_tx_mapping_dtor, 2554 NULL, sp, NULL, 0); 2555 ASSERT(sp->s_tmc != NULL); 2556 2557 (void) snprintf(name, MAXNAMELEN - 1, "%s%d_txq_cache", 2558 ddi_driver_name(sp->s_dip), ddi_get_instance(sp->s_dip)); 2559 2560 sp->s_tqc = kmem_cache_create(name, sizeof (sfxge_txq_t), 2561 SFXGE_CPU_CACHE_SIZE, sfxge_tx_qctor, sfxge_tx_qdtor, NULL, sp, 2562 NULL, 0); 2563 ASSERT(sp->s_tqc != NULL); 2564 2565 /* Initialize the special non-checksummed transmit queues */ 2566 2567 /* NB sfxge_ev_qinit() is sensitive to using EVQ_0 */ 2568 if ((rc = sfxge_tx_qinit(sp, SFXGE_TXQ_NON_CKSUM, 2569 SFXGE_TXQ_NON_CKSUM, EVQ_0)) != 0) 2570 goto fail1; 2571 2572 /* NB sfxge_ev_qinit() is sensitive to using EVQ_0 */ 2573 if ((rc = sfxge_tx_qinit(sp, SFXGE_TXQ_IP_CKSUM, 2574 SFXGE_TXQ_IP_CKSUM, EVQ_0)) != 0) 2575 goto fail2; 2576 2577 /* Initialize the normal transmit queues */ 2578 for (index = 0; index < sip->si_nalloc; index++) { 2579 if ((rc = sfxge_tx_qinit(sp, SFXGE_TXQ_IP_TCP_UDP_CKSUM + index, 2580 SFXGE_TXQ_IP_TCP_UDP_CKSUM, index)) != 0) 2581 goto fail3; 2582 } 2583 2584 return (0); 2585 2586 fail3: 2587 DTRACE_PROBE(fail3); 2588 2589 while (--index >= 0) 2590 sfxge_tx_qfini(sp, SFXGE_TXQ_IP_TCP_UDP_CKSUM + index); 2591 2592 sfxge_tx_qfini(sp, SFXGE_TXQ_IP_CKSUM); 2593 2594 fail2: 2595 DTRACE_PROBE(fail2); 2596 2597 fail1: 2598 DTRACE_PROBE1(fail1, int, rc); 2599 2600 sfxge_tx_qfini(sp, SFXGE_TXQ_NON_CKSUM); 2601 2602 kmem_cache_destroy(sp->s_tqc); 2603 sp->s_tqc = NULL; 2604 2605 kmem_cache_destroy(sp->s_tmc); 2606 sp->s_tmc = NULL; 2607 2608 kmem_cache_destroy(sp->s_tbc); 2609 sp->s_tbc = NULL; 2610 2611 kmem_cache_destroy(sp->s_tpc); 2612 sp->s_tpc = NULL; 2613 2614 return (rc); 2615 } 2616 2617 int 2618 sfxge_tx_start(sfxge_t *sp) 2619 { 2620 efx_nic_t *enp = sp->s_enp; 2621 int index; 2622 int rc; 2623 2624 /* Initialize the transmit module */ 2625 if ((rc = efx_tx_init(enp)) != 0) 2626 goto fail1; 2627 2628 for (index = 0; index < sp->s_tx_qcount; index++) { 2629 if ((rc = sfxge_tx_qstart(sp, index)) != 0) 2630 goto fail2; 2631 } 2632 2633 return (0); 2634 2635 fail2: 2636 DTRACE_PROBE(fail2); 2637 2638 while (--index >= 0) 2639 sfxge_tx_qstop(sp, index); 2640 2641 /* Tear down the transmit module */ 2642 efx_tx_fini(enp); 2643 2644 fail1: 2645 DTRACE_PROBE1(fail1, int, rc); 2646 2647 return (rc); 2648 } 2649 2650 2651 /* 2652 * Add a packet to the TX Deferred Packet List and if the TX queue lock 2653 * can be acquired then call sfxge_tx_qdpl_service() to fragment and push 2654 * to the H/W transmit descriptor ring 2655 * 2656 * If ENOSPC is returned then the DPL is full or the packet create failed, but 2657 * the mblk isn't freed so that the caller can return this mblk from mc_tx() to 2658 * back-pressure the OS stack. 2659 * 2660 * For all other errors the mblk is freed 2661 */ 2662 int 2663 sfxge_tx_packet_add(sfxge_t *sp, mblk_t *mp) 2664 { 2665 struct ether_header *etherhp; 2666 struct ip *iphp; 2667 struct tcphdr *thp; 2668 size_t off; 2669 size_t size; 2670 size_t mss; 2671 sfxge_txq_t *stp; 2672 boolean_t locked; 2673 sfxge_tx_packet_t *stpp; 2674 int rc = 0; 2675 2676 ASSERT3P(mp->b_next, ==, NULL); 2677 ASSERT(!(DB_CKSUMFLAGS(mp) & HCK_PARTIALCKSUM)); 2678 2679 /* 2680 * Do not enqueue packets during startup/shutdown; 2681 * 2682 * NOTE: This access to the state is NOT protected by the state lock. It 2683 * is an imperfect test and anything further getting onto the get/put 2684 * deferred packet lists is cleaned up in (possibly repeated) calls to 2685 * sfxge_can_destroy(). 2686 */ 2687 if (sp->s_state != SFXGE_STARTED) { 2688 rc = EINVAL; 2689 goto fail1; 2690 } 2691 2692 etherhp = NULL; 2693 iphp = NULL; 2694 thp = NULL; 2695 off = 0; 2696 size = 0; 2697 mss = 0; 2698 2699 /* Check whether we need the header pointers for LSO segmentation */ 2700 if (DB_LSOFLAGS(mp) & HW_LSO) { 2701 /* LSO segmentation relies on hardware checksum offload */ 2702 DB_CKSUMFLAGS(mp) |= HCK_FULLCKSUM; 2703 2704 if ((mss = DB_LSOMSS(mp)) == 0) { 2705 rc = EINVAL; 2706 goto fail1; 2707 } 2708 2709 sfxge_tcp_parse(mp, ðerhp, &iphp, &thp, &off, &size); 2710 2711 if (etherhp == NULL || 2712 iphp == NULL || 2713 thp == NULL || 2714 off == 0) { 2715 rc = EINVAL; 2716 goto fail2; 2717 } 2718 } 2719 2720 /* Choose the appropriate transit queue */ 2721 if (DB_CKSUMFLAGS(mp) & HCK_FULLCKSUM) { 2722 sfxge_rx_scale_t *srsp = &(sp->s_rx_scale); 2723 2724 if (srsp->srs_state == SFXGE_RX_SCALE_STARTED) { 2725 uint16_t hash; 2726 int index; 2727 2728 if (srsp->srs_count > 1) { 2729 /* 2730 * If we have not already parsed the headers 2731 * for LSO segmentation then we need to do it 2732 * now so we can calculate the hash. 2733 */ 2734 if (thp == NULL) 2735 sfxge_tcp_parse(mp, ðerhp, &iphp, 2736 &thp, &off, &size); 2737 2738 if (thp != NULL) { 2739 SFXGE_TCP_HASH( 2740 ntohl(iphp->ip_dst.s_addr), 2741 ntohs(thp->th_dport), 2742 ntohl(iphp->ip_src.s_addr), 2743 ntohs(thp->th_sport), hash); 2744 2745 index = srsp->srs_tbl[hash % 2746 SFXGE_RX_SCALE_MAX]; 2747 } else { 2748 /* 2749 * Non-TCP traffix always goes to the 2750 * the queue in the zero-th entry of 2751 * the RSS table. 2752 */ 2753 index = srsp->srs_tbl[0]; 2754 } 2755 } else { 2756 /* 2757 * It does not matter what the hash is 2758 * because all the RSS table entries will be 2759 * the same. 2760 */ 2761 index = srsp->srs_tbl[0]; 2762 } 2763 2764 /* 2765 * Find the event queue corresponding to the hash in 2766 * the RSS table. 2767 */ 2768 stp = sp->s_stp[SFXGE_TXQ_IP_TCP_UDP_CKSUM + index]; 2769 ASSERT3U(stp->st_evq, ==, index); 2770 } else { 2771 stp = sp->s_stp[SFXGE_TXQ_IP_TCP_UDP_CKSUM]; 2772 } 2773 } else if (DB_CKSUMFLAGS(mp) & HCK_IPV4_HDRCKSUM) { 2774 stp = sp->s_stp[SFXGE_TXQ_IP_CKSUM]; 2775 } else { 2776 if ((stp = sp->s_stp[SFXGE_TXQ_NON_CKSUM]) == NULL) 2777 stp = sp->s_stp[SFXGE_TXQ_IP_CKSUM]; 2778 } 2779 ASSERT(stp != NULL); 2780 2781 ASSERT(mss == 0 || (DB_LSOFLAGS(mp) & HW_LSO)); 2782 2783 /* Try to grab the lock */ 2784 locked = mutex_tryenter(&(stp->st_lock)); 2785 2786 if (locked) { 2787 /* Try to grab a packet from the pool */ 2788 stpp = sfxge_tx_qfpp_get(stp); 2789 } else { 2790 stpp = NULL; 2791 } 2792 2793 if (stpp == NULL) { 2794 /* 2795 * Either the pool was empty or we don't have the lock so 2796 * allocate a new packet. 2797 */ 2798 if ((stpp = sfxge_tx_packet_create(sp)) == NULL) { 2799 rc = ENOSPC; 2800 goto fail3; 2801 } 2802 } 2803 2804 stpp->stp_mp = mp; 2805 stpp->stp_etherhp = etherhp; 2806 stpp->stp_iphp = iphp; 2807 stpp->stp_thp = thp; 2808 stpp->stp_off = off; 2809 stpp->stp_size = size; 2810 stpp->stp_mss = mss; 2811 stpp->stp_dpl_put_len = 0; 2812 2813 rc = sfxge_tx_qdpl_add(stp, stpp, locked); 2814 if (rc != 0) { 2815 /* ENOSPC can happen for DPL get or put list is full */ 2816 ASSERT3U(rc, ==, ENOSPC); 2817 2818 /* 2819 * Note; if this is the unlocked DPL put list full case there is 2820 * no need to worry about a race with locked 2821 * sfxge_tx_qdpl_swizzle() as we know that the TX DPL put list 2822 * was full and would have been swizzle'd to the TX DPL get 2823 * list; hence guaranteeing future TX completions and calls 2824 * to mac_tx_update() via sfxge_tx_qcomplete() 2825 */ 2826 goto fail4; 2827 } 2828 2829 /* Try to grab the lock again */ 2830 if (!locked) 2831 locked = mutex_tryenter(&(stp->st_lock)); 2832 2833 if (locked) { 2834 /* Try to service the list */ 2835 sfxge_tx_qdpl_service(stp); 2836 /* lock has been dropped */ 2837 } 2838 2839 return (0); 2840 2841 fail4: 2842 DTRACE_PROBE(fail4); 2843 sfxge_tx_packet_destroy(sp, stpp); 2844 fail3: 2845 DTRACE_PROBE(fail3); 2846 if (locked) 2847 mutex_exit(&(stp->st_lock)); 2848 fail2: 2849 DTRACE_PROBE(fail2); 2850 fail1: 2851 DTRACE_PROBE1(fail1, int, rc); 2852 2853 if (rc != ENOSPC) 2854 freemsg(mp); 2855 return (rc); 2856 } 2857 2858 int 2859 sfxge_tx_loopback(sfxge_t *sp, unsigned int count) 2860 { 2861 uint8_t unicst[ETHERADDRL]; 2862 size_t mtu; 2863 mblk_t *mp; 2864 struct ether_header *etherhp; 2865 unsigned int byte; 2866 int rc; 2867 2868 if (count == 0) { 2869 rc = EINVAL; 2870 goto fail1; 2871 } 2872 2873 rc = sfxge_mac_unicst_get(sp, SFXGE_UNICST_LAA, unicst); 2874 2875 if (rc == ENOENT) 2876 rc = sfxge_mac_unicst_get(sp, SFXGE_UNICST_BIA, unicst); 2877 2878 if (rc != 0) 2879 goto fail2; 2880 2881 mtu = sp->s_mtu; 2882 2883 if ((mp = allocb(sizeof (struct ether_header) + mtu, 2884 BPRI_HI)) == NULL) { 2885 rc = ENOMEM; 2886 goto fail3; 2887 } 2888 2889 mp->b_wptr = mp->b_rptr + sizeof (struct ether_header); 2890 bzero(mp->b_rptr, MBLKL(mp)); 2891 2892 /*LINTED*/ 2893 etherhp = (struct ether_header *)(mp->b_rptr); 2894 bcopy(sfxge_brdcst, &(etherhp->ether_dhost), ETHERADDRL); 2895 bcopy(unicst, &(etherhp->ether_shost), ETHERADDRL); 2896 etherhp->ether_type = htons(SFXGE_ETHERTYPE_LOOPBACK); 2897 2898 for (byte = 0; byte < 30; byte++) 2899 *(mp->b_wptr++) = (byte & 1) ? 0xaa : 0x55; 2900 2901 do { 2902 mblk_t *nmp; 2903 2904 if ((nmp = dupb(mp)) == NULL) { 2905 rc = ENOMEM; 2906 goto fail4; 2907 } 2908 2909 rc = sfxge_tx_packet_add(sp, nmp); 2910 if (rc != 0) { 2911 freeb(nmp); 2912 goto fail5; 2913 } 2914 2915 } while (--count != 0); 2916 2917 freeb(mp); 2918 return (0); 2919 2920 fail5: 2921 DTRACE_PROBE(fail5); 2922 fail4: 2923 DTRACE_PROBE(fail4); 2924 2925 freeb(mp); 2926 2927 fail3: 2928 DTRACE_PROBE(fail3); 2929 fail2: 2930 DTRACE_PROBE(fail2); 2931 fail1: 2932 DTRACE_PROBE1(fail1, int, rc); 2933 2934 return (rc); 2935 } 2936 2937 int 2938 sfxge_tx_ioctl(sfxge_t *sp, sfxge_tx_ioc_t *stip) 2939 { 2940 int rc; 2941 2942 switch (stip->sti_op) { 2943 case SFXGE_TX_OP_LOOPBACK: { 2944 unsigned int count = stip->sti_data; 2945 2946 if ((rc = sfxge_tx_loopback(sp, count)) != 0) 2947 goto fail1; 2948 2949 break; 2950 } 2951 default: 2952 rc = ENOTSUP; 2953 goto fail1; 2954 } 2955 2956 return (0); 2957 2958 fail1: 2959 DTRACE_PROBE1(fail1, int, rc); 2960 2961 return (rc); 2962 } 2963 2964 void 2965 sfxge_tx_stop(sfxge_t *sp) 2966 { 2967 efx_nic_t *enp = sp->s_enp; 2968 clock_t timeout; 2969 boolean_t do_flush; 2970 int index; 2971 2972 ASSERT(mutex_owned(&(sp->s_state_lock))); 2973 2974 mutex_enter(&(sp->s_tx_flush_lock)); 2975 2976 /* Flush all the queues */ 2977 if (sp->s_hw_err == SFXGE_HW_OK) { 2978 sp->s_tx_flush_pending = sp->s_tx_qcount; 2979 do_flush = B_TRUE; 2980 } else { 2981 sp->s_tx_flush_pending = 0; 2982 do_flush = B_FALSE; 2983 } 2984 2985 /* Prepare queues to stop and flush the hardware ring */ 2986 for (index = 0; index < sp->s_tx_qcount; index++) 2987 sfxge_tx_qflush(sp, index, do_flush); 2988 2989 if (do_flush == B_FALSE) 2990 goto flush_done; 2991 2992 /* Wait upto 2sec for queue flushing to complete */ 2993 timeout = ddi_get_lbolt() + drv_usectohz(SFXGE_TX_QFLUSH_USEC); 2994 2995 while (sp->s_tx_flush_pending > 0) { 2996 if (cv_timedwait(&(sp->s_tx_flush_kv), &(sp->s_tx_flush_lock), 2997 timeout) < 0) { 2998 /* Timeout waiting for queues to flush */ 2999 dev_info_t *dip = sp->s_dip; 3000 3001 DTRACE_PROBE(timeout); 3002 cmn_err(CE_NOTE, 3003 SFXGE_CMN_ERR "[%s%d] tx qflush timeout", 3004 ddi_driver_name(dip), ddi_get_instance(dip)); 3005 break; 3006 } 3007 } 3008 sp->s_tx_flush_pending = 0; 3009 3010 flush_done: 3011 mutex_exit(&(sp->s_tx_flush_lock)); 3012 3013 /* Stop all the queues */ 3014 for (index = 0; index < sp->s_tx_qcount; index++) 3015 sfxge_tx_qstop(sp, index); 3016 3017 /* Tear down the transmit module */ 3018 efx_tx_fini(enp); 3019 } 3020 3021 void 3022 sfxge_tx_fini(sfxge_t *sp) 3023 { 3024 int index; 3025 3026 index = sp->s_tx_qcount; 3027 while (--index >= 0) 3028 sfxge_tx_qfini(sp, index); 3029 3030 kmem_cache_destroy(sp->s_tqc); 3031 sp->s_tqc = NULL; 3032 3033 kmem_cache_destroy(sp->s_tmc); 3034 sp->s_tmc = NULL; 3035 3036 kmem_cache_destroy(sp->s_tbc); 3037 sp->s_tbc = NULL; 3038 3039 kmem_cache_destroy(sp->s_tpc); 3040 sp->s_tpc = NULL; 3041 }