1 /* 2 * Copyright (c) 2008-2016 Solarflare Communications Inc. 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions are met: 7 * 8 * 1. Redistributions of source code must retain the above copyright notice, 9 * this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright notice, 11 * this list of conditions and the following disclaimer in the documentation 12 * and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 15 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 16 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 17 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 18 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 19 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 20 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; 21 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 22 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR 23 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, 24 * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 * 26 * The views and conclusions contained in the software and documentation are 27 * those of the authors and should not be interpreted as representing official 28 * policies, either expressed or implied, of the FreeBSD Project. 29 */ 30 31 #include <sys/types.h> 32 #include <sys/sysmacros.h> 33 #include <sys/ddi.h> 34 #include <sys/sunddi.h> 35 #include <sys/atomic.h> 36 #include <sys/stream.h> 37 #include <sys/strsun.h> 38 #include <sys/strsubr.h> 39 #include <sys/strft.h> 40 #include <sys/ksynch.h> 41 #include <sys/ethernet.h> 42 #include <sys/crc32.h> 43 #include <sys/pattr.h> 44 #include <sys/cpu.h> 45 46 #include <sys/ethernet.h> 47 #include <inet/ip.h> 48 49 #include <netinet/in.h> 50 #include <netinet/ip.h> 51 #include <netinet/tcp.h> 52 53 #include "sfxge.h" 54 55 #include "efx.h" 56 57 /* RXQ flush response timeout (in microseconds) */ 58 #define SFXGE_RX_QFLUSH_USEC (2000000) 59 60 /* RXQ flush tries in the case of failure */ 61 #define SFXGE_RX_QFLUSH_TRIES (5) 62 63 /* RXQ default packet buffer preallocation (number of packet buffers) */ 64 #define SFXGE_RX_QPREALLOC (0) 65 66 /* Receive packet DMA attributes */ 67 static ddi_device_acc_attr_t sfxge_rx_packet_devacc = { 68 69 DDI_DEVICE_ATTR_V0, /* devacc_attr_version */ 70 DDI_NEVERSWAP_ACC, /* devacc_attr_endian_flags */ 71 DDI_STRICTORDER_ACC /* devacc_attr_dataorder */ 72 }; 73 74 static ddi_dma_attr_t sfxge_rx_packet_dma_attr = { 75 DMA_ATTR_V0, /* dma_attr_version */ 76 0, /* dma_attr_addr_lo */ 77 0xffffffffffffffffull, /* dma_attr_addr_hi */ 78 0xffffffffffffffffull, /* dma_attr_count_max */ 79 SFXGE_CPU_CACHE_SIZE, /* dma_attr_align */ 80 0xffffffff, /* dma_attr_burstsizes */ 81 1, /* dma_attr_minxfer */ 82 0xffffffffffffffffull, /* dma_attr_maxxfer */ 83 0xffffffffffffffffull, /* dma_attr_seg */ 84 1, /* dma_attr_sgllen */ 85 1, /* dma_attr_granular */ 86 0 /* dma_attr_flags */ 87 }; 88 89 /* Receive queue DMA attributes */ 90 static ddi_device_acc_attr_t sfxge_rxq_devacc = { 91 92 DDI_DEVICE_ATTR_V0, /* devacc_attr_version */ 93 DDI_NEVERSWAP_ACC, /* devacc_attr_endian_flags */ 94 DDI_STRICTORDER_ACC /* devacc_attr_dataorder */ 95 }; 96 97 static ddi_dma_attr_t sfxge_rxq_dma_attr = { 98 DMA_ATTR_V0, /* dma_attr_version */ 99 0, /* dma_attr_addr_lo */ 100 0xffffffffffffffffull, /* dma_attr_addr_hi */ 101 0xffffffffffffffffull, /* dma_attr_count_max */ 102 EFX_BUF_SIZE, /* dma_attr_align */ 103 0xffffffff, /* dma_attr_burstsizes */ 104 1, /* dma_attr_minxfer */ 105 0xffffffffffffffffull, /* dma_attr_maxxfer */ 106 0xffffffffffffffffull, /* dma_attr_seg */ 107 1, /* dma_attr_sgllen */ 108 1, /* dma_attr_granular */ 109 0 /* dma_attr_flags */ 110 }; 111 112 /* Forward declaration */ 113 static void sfxge_rx_qpreallocate(sfxge_rxq_t *srp, int nprealloc); 114 115 static int 116 sfxge_rx_packet_ctor(void *buf, void *arg, int kmflags) 117 { 118 sfxge_rx_packet_t *srpp = buf; 119 sfxge_t *sp = arg; 120 dev_info_t *dip = sp->s_dip; 121 int err; 122 123 ASSERT3U(sizeof (srpp->__srp_u1.__srp_s1), <=, 124 sizeof (srpp->__srp_u1.__srp_pad)); 125 ASSERT3U(sizeof (srpp->__srp_u2.__srp_s2), <=, 126 sizeof (srpp->__srp_u2.__srp_pad)); 127 128 bzero(buf, sizeof (sfxge_rx_packet_t)); 129 130 /* Allocate a DMA handle */ 131 err = ddi_dma_alloc_handle(dip, &sfxge_rx_packet_dma_attr, 132 (kmflags == KM_SLEEP) ? DDI_DMA_SLEEP : DDI_DMA_DONTWAIT, 133 NULL, &(srpp->srp_dma_handle)); 134 if (err != DDI_SUCCESS) 135 goto fail1; 136 137 return (0); 138 139 fail1: 140 DTRACE_PROBE1(fail1, int, err); 141 142 SFXGE_OBJ_CHECK(srpp, sfxge_rx_packet_t); 143 144 return (-1); 145 } 146 147 static void 148 sfxge_rx_packet_dtor(void *buf, void *arg) 149 { 150 sfxge_rx_packet_t *srpp = buf; 151 152 _NOTE(ARGUNUSED(arg)) 153 154 /* Free the DMA handle */ 155 ddi_dma_free_handle(&(srpp->srp_dma_handle)); 156 srpp->srp_dma_handle = NULL; 157 158 SFXGE_OBJ_CHECK(srpp, sfxge_rx_packet_t); 159 } 160 161 static int 162 sfxge_rx_qctor(void *buf, void *arg, int kmflags) 163 { 164 sfxge_rxq_t *srp = buf; 165 efsys_mem_t *esmp = &(srp->sr_mem); 166 sfxge_t *sp = arg; 167 sfxge_dma_buffer_attr_t dma_attr; 168 sfxge_rx_fpp_t *srfppp; 169 int nprealloc; 170 unsigned int id; 171 int rc; 172 173 /* Compile-time structure layout checks */ 174 EFX_STATIC_ASSERT(sizeof (srp->__sr_u1.__sr_s1) <= 175 sizeof (srp->__sr_u1.__sr_pad)); 176 EFX_STATIC_ASSERT(sizeof (srp->__sr_u2.__sr_s2) <= 177 sizeof (srp->__sr_u2.__sr_pad)); 178 EFX_STATIC_ASSERT(sizeof (srp->__sr_u3.__sr_s3) <= 179 sizeof (srp->__sr_u3.__sr_pad)); 180 181 bzero(buf, sizeof (sfxge_rxq_t)); 182 183 srp->sr_sp = sp; 184 185 dma_attr.sdba_dip = sp->s_dip; 186 dma_attr.sdba_dattrp = &sfxge_rxq_dma_attr; 187 dma_attr.sdba_callback = DDI_DMA_SLEEP; 188 dma_attr.sdba_length = EFX_RXQ_SIZE(sp->s_rxq_size); 189 dma_attr.sdba_memflags = DDI_DMA_CONSISTENT; 190 dma_attr.sdba_devaccp = &sfxge_rxq_devacc; 191 dma_attr.sdba_bindflags = DDI_DMA_READ | DDI_DMA_CONSISTENT; 192 dma_attr.sdba_maxcookies = 1; 193 dma_attr.sdba_zeroinit = B_FALSE; 194 195 if ((rc = sfxge_dma_buffer_create(esmp, &dma_attr)) != 0) 196 goto fail1; 197 198 /* Allocate some buffer table entries */ 199 if ((rc = sfxge_sram_buf_tbl_alloc(sp, EFX_RXQ_NBUFS(sp->s_rxq_size), 200 &(srp->sr_id))) != 0) 201 goto fail2; 202 203 /* Allocate the context array */ 204 if ((srp->sr_srpp = kmem_zalloc(sizeof (sfxge_rx_packet_t *) * 205 sp->s_rxq_size, kmflags)) == NULL) { 206 rc = ENOMEM; 207 goto fail3; 208 } 209 210 /* Allocate the flow table */ 211 if ((srp->sr_flow = kmem_zalloc(sizeof (sfxge_rx_flow_t) * 212 SFXGE_MAX_FLOW, kmflags)) == NULL) { 213 rc = ENOMEM; 214 goto fail4; 215 } 216 217 srp->sr_srfpp = &(srp->sr_srfp); 218 srp->sr_rto = drv_usectohz(200000); 219 220 srp->sr_mpp = &(srp->sr_mp); 221 222 /* Initialize the free packet pool */ 223 srfppp = &(srp->sr_fpp); 224 if ((srfppp->srfpp_putp = kmem_zalloc(SFXGE_CPU_CACHE_SIZE * 225 SFXGE_RX_FPP_NSLOTS, kmflags)) == NULL) { 226 rc = ENOMEM; 227 goto fail5; 228 } 229 for (id = 0; id < SFXGE_RX_FPP_NSLOTS; id++) { 230 sfxge_rx_fpp_putlist_t *putp; 231 size_t off; 232 233 off = id * SFXGE_CPU_CACHE_SIZE; 234 putp = (void *)(srfppp->srfpp_putp + off); 235 236 putp->srfpl_putp = NULL; 237 putp->srfpl_putpp = &(putp->srfpl_putp); 238 mutex_init(&(putp->srfpl_lock), NULL, MUTEX_DRIVER, 239 DDI_INTR_PRI(sp->s_intr.si_intr_pri)); 240 } 241 242 cv_init(&(srp->sr_flush_kv), NULL, CV_DRIVER, NULL); 243 244 /* Preallocate some packets on the free packet pool */ 245 nprealloc = ddi_prop_get_int(DDI_DEV_T_ANY, sp->s_dip, 246 DDI_PROP_DONTPASS, "rx_prealloc_pkt_buffers", SFXGE_RX_QPREALLOC); 247 sfxge_rx_qpreallocate(srp, nprealloc); 248 249 250 return (0); 251 252 fail5: 253 DTRACE_PROBE(fail5); 254 255 srp->sr_mpp = NULL; 256 257 srp->sr_rto = 0; 258 srp->sr_srfpp = NULL; 259 260 /* Free the flow table */ 261 kmem_free(srp->sr_flow, sizeof (sfxge_rx_flow_t) * 262 SFXGE_MAX_FLOW); 263 srp->sr_flow = NULL; 264 265 fail4: 266 DTRACE_PROBE(fail4); 267 268 /* Free the context array */ 269 kmem_free(srp->sr_srpp, sizeof (sfxge_rx_packet_t *) * 270 sp->s_rxq_size); 271 srp->sr_srpp = NULL; 272 273 fail3: 274 DTRACE_PROBE(fail3); 275 276 /* Free the buffer table entries */ 277 sfxge_sram_buf_tbl_free(sp, srp->sr_id, 278 EFX_RXQ_NBUFS(sp->s_rxq_size)); 279 srp->sr_id = 0; 280 281 fail2: 282 DTRACE_PROBE(fail2); 283 /* Remove dma setup */ 284 sfxge_dma_buffer_destroy(esmp); 285 286 fail1: 287 DTRACE_PROBE1(fail1, int, rc); 288 289 srp->sr_sp = NULL; 290 291 SFXGE_OBJ_CHECK(srp, sfxge_rxq_t); 292 293 return (-1); 294 } 295 296 static void 297 sfxge_rx_qdtor(void *buf, void *arg) 298 { 299 sfxge_rxq_t *srp = buf; 300 efsys_mem_t *esmp = &(srp->sr_mem); 301 sfxge_t *sp = srp->sr_sp; 302 sfxge_rx_fpp_t *srfppp = &(srp->sr_fpp); 303 unsigned int id; 304 305 _NOTE(ARGUNUSED(arg)) 306 307 cv_destroy(&(srp->sr_flush_kv)); 308 309 /* Tear down the free packet pool */ 310 for (id = 0; id < SFXGE_RX_FPP_NSLOTS; id++) { 311 sfxge_rx_fpp_putlist_t *putp; 312 size_t off; 313 314 off = id * SFXGE_CPU_CACHE_SIZE; 315 putp = (void *)(srfppp->srfpp_putp + off); 316 317 putp->srfpl_putpp = NULL; 318 mutex_destroy(&(putp->srfpl_lock)); 319 320 SFXGE_OBJ_CHECK(putp, sfxge_rx_fpp_putlist_t); 321 } 322 kmem_free(srfppp->srfpp_putp, SFXGE_CPU_CACHE_SIZE * 323 SFXGE_RX_FPP_NSLOTS); 324 srfppp->srfpp_putp = NULL; 325 326 srp->sr_mpp = NULL; 327 328 srp->sr_rto = 0; 329 srp->sr_srfpp = NULL; 330 331 /* Free the flow table */ 332 kmem_free(srp->sr_flow, sizeof (sfxge_rx_flow_t) * 333 SFXGE_MAX_FLOW); 334 srp->sr_flow = NULL; 335 336 /* Free the context array */ 337 kmem_free(srp->sr_srpp, sizeof (sfxge_rx_packet_t *) * 338 sp->s_rxq_size); 339 srp->sr_srpp = NULL; 340 341 /* Free the buffer table entries */ 342 sfxge_sram_buf_tbl_free(sp, srp->sr_id, 343 EFX_RXQ_NBUFS(sp->s_rxq_size)); 344 srp->sr_id = 0; 345 346 /* Tear down dma setup */ 347 sfxge_dma_buffer_destroy(esmp); 348 349 SFXGE_OBJ_CHECK(srp, sfxge_rxq_t); 350 } 351 352 /* Note: This function takes ownership of *srpp. */ 353 static inline void 354 sfxge_rx_qfpp_put(sfxge_rxq_t *srp, sfxge_rx_packet_t *srpp) 355 { 356 sfxge_rx_fpp_t *srfppp = &(srp->sr_fpp); 357 mblk_t *mp = srpp->srp_mp; 358 unsigned int id; 359 size_t off; 360 sfxge_rx_fpp_putlist_t *putp; 361 362 ASSERT3P(mp->b_next, ==, NULL); 363 ASSERT3P(mp->b_prev, ==, NULL); 364 365 id = CPU->cpu_seqid & SFXGE_RX_FPP_MASK; 366 off = id * SFXGE_CPU_CACHE_SIZE; 367 368 ASSERT3P(srpp->srp_putp, ==, srfppp->srfpp_putp); 369 putp = (void *)(srpp->srp_putp + off); 370 371 mutex_enter(&(putp->srfpl_lock)); 372 putp->srfpl_count++; 373 *putp->srfpl_putpp = mp; 374 putp->srfpl_putpp = &(mp->b_next); 375 mutex_exit(&(putp->srfpl_lock)); 376 } 377 378 static unsigned int 379 sfxge_rx_qfpp_swizzle(sfxge_rxq_t *srp) 380 { 381 sfxge_t *sp = srp->sr_sp; 382 unsigned int index = srp->sr_index; 383 sfxge_evq_t *sep = sp->s_sep[index]; 384 sfxge_rx_fpp_t *srfppp = &(srp->sr_fpp); 385 unsigned int start; 386 unsigned int id; 387 mblk_t *p; 388 mblk_t **pp; 389 unsigned int count; 390 unsigned int loaned; 391 392 ASSERT(mutex_owned(&(sep->se_lock))); 393 394 /* We want to access the put list for the current CPU last */ 395 id = start = (CPU->cpu_seqid + 1) & SFXGE_RX_FPP_MASK; 396 397 do { 398 sfxge_rx_fpp_putlist_t *putp; 399 size_t off; 400 401 off = id * SFXGE_CPU_CACHE_SIZE; 402 id = (id + 1) & SFXGE_RX_FPP_MASK; 403 404 putp = (void *)(srfppp->srfpp_putp + off); 405 406 /* Acquire the put list */ 407 mutex_enter(&(putp->srfpl_lock)); 408 409 p = putp->srfpl_putp; 410 pp = putp->srfpl_putpp; 411 count = putp->srfpl_count; 412 413 putp->srfpl_putp = NULL; 414 putp->srfpl_putpp = &(putp->srfpl_putp); 415 putp->srfpl_count = 0; 416 417 mutex_exit(&(putp->srfpl_lock)); 418 419 if (p == NULL) 420 continue; 421 422 /* Add the list to the head of the get list */ 423 *pp = srfppp->srfpp_get; 424 srfppp->srfpp_get = p; 425 426 /* Adjust the counters */ 427 ASSERT3U(srfppp->srfpp_loaned, >=, count); 428 srfppp->srfpp_loaned -= count; 429 srfppp->srfpp_count += count; 430 431 #if 0 432 /* NOTE: this probe is disabled because it is expensive!! */ 433 DTRACE_PROBE2(count, 434 unsigned int, (id - 1) & SFXGE_RX_FPP_MASK, 435 unsigned int, count); 436 #endif 437 438 } while (id != start); 439 440 /* Return the number of packets yet to appear in the put list */ 441 loaned = srfppp->srfpp_loaned; 442 443 444 return (loaned); 445 } 446 447 448 #define DB_FRTNP(mp) ((mp)->b_datap->db_frtnp) 449 450 static void 451 sfxge_rx_qfpp_empty(sfxge_rxq_t *srp) 452 { 453 sfxge_t *sp = srp->sr_sp; 454 unsigned int index = srp->sr_index; 455 sfxge_evq_t *sep = sp->s_sep[index]; 456 sfxge_rx_fpp_t *srfppp; 457 mblk_t *mp; 458 459 mutex_enter(&(sep->se_lock)); 460 srfppp = &(srp->sr_fpp); 461 462 /* Swizzle put list to get list */ 463 (void) sfxge_rx_qfpp_swizzle(srp); 464 ASSERT3U(srfppp->srfpp_loaned, ==, 0); 465 466 mp = srfppp->srfpp_get; 467 srfppp->srfpp_get = NULL; 468 469 /* Free the remainder */ 470 while (mp != NULL) { 471 mblk_t *next; 472 frtn_t *freep; 473 sfxge_rx_packet_t *srpp; 474 475 next = mp->b_next; 476 mp->b_next = NULL; 477 478 ASSERT3U(srfppp->srfpp_count, >, 0); 479 srfppp->srfpp_count--; 480 481 freep = DB_FRTNP(mp); 482 /* 483 * ASSERT3P(freep->free_func, ==, sfxge_rx_qpacket_free); 484 * is implied by srpp test below 485 */ 486 /*LINTED*/ 487 srpp = (sfxge_rx_packet_t *)(freep->free_arg); 488 ASSERT3P(srpp->srp_mp, ==, mp); 489 ASSERT3P(mp->b_cont, ==, NULL); 490 srpp->srp_recycle = B_FALSE; 491 492 freeb(mp); 493 494 mp = next; 495 } 496 ASSERT3U(srfppp->srfpp_count, ==, 0); 497 498 srfppp->srfpp_min = 0; 499 500 mutex_exit(&(sep->se_lock)); 501 } 502 503 /* 504 * This is an estimate of all memory consumed per RX packet 505 * it can be inaccurate but but sp->s_rx_pkt_mem_alloc mustn't drift 506 */ 507 static uint64_t 508 sfxge_rx_pkt_mem_approx(const sfxge_rx_packet_t *srpp) 509 { 510 return (srpp->srp_mblksize + sizeof (mblk_t) + sizeof (dblk_t) + 511 sizeof (sfxge_rx_packet_t)); 512 } 513 514 static void 515 sfxge_rx_qpacket_destroy(sfxge_rxq_t *srp, sfxge_rx_packet_t *srpp) 516 { 517 sfxge_t *sp = srp->sr_sp; 518 int64_t delta = sfxge_rx_pkt_mem_approx(srpp); 519 520 ASSERT(!(srpp->srp_recycle)); 521 ASSERT3P(srpp->srp_mp, ==, NULL); 522 523 srpp->srp_off = 0; 524 srpp->srp_thp = NULL; 525 srpp->srp_iphp = NULL; 526 srpp->srp_etherhp = NULL; 527 srpp->srp_size = 0; 528 srpp->srp_flags = 0; 529 530 bzero(&(srpp->srp_free), sizeof (frtn_t)); 531 532 srpp->srp_mblksize = 0; 533 srpp->srp_base = NULL; 534 535 /* Unbind the DMA memory from the DMA handle */ 536 srpp->srp_addr = 0; 537 (void) ddi_dma_unbind_handle(srpp->srp_dma_handle); 538 539 /* Free the DMA memory */ 540 srpp->srp_base = NULL; 541 ddi_dma_mem_free(&(srpp->srp_acc_handle)); 542 srpp->srp_acc_handle = NULL; 543 544 srpp->srp_putp = NULL; 545 srpp->srp_srp = NULL; 546 547 kmem_cache_free(sp->s_rpc, srpp); 548 if (sp->s_rx_pkt_mem_max) 549 atomic_add_64(&sp->s_rx_pkt_mem_alloc, -delta); 550 } 551 552 static void 553 sfxge_rx_qpacket_free(void *arg) 554 { 555 sfxge_rx_packet_t *srpp = arg; 556 sfxge_rxq_t *srp = srpp->srp_srp; 557 558 /* 559 * WARNING "man -s 9f esballoc" states: 560 * => runs sync from the thread calling freeb() 561 * => must not sleep, or access data structures that could be freed 562 */ 563 564 /* Check whether we want to recycle the receive packets */ 565 if (srpp->srp_recycle) { 566 frtn_t *freep; 567 mblk_t *mp; 568 size_t size; 569 570 freep = &(srpp->srp_free); 571 ASSERT3P(freep->free_func, ==, sfxge_rx_qpacket_free); 572 ASSERT3P(freep->free_arg, ==, (caddr_t)srpp); 573 574 /* 575 * Allocate a matching mblk_t before the current one is 576 * freed. 577 */ 578 size = srpp->srp_mblksize; 579 580 if ((mp = desballoc(srpp->srp_base, size, BPRI_HI, 581 freep)) != NULL) { 582 srpp->srp_mp = mp; 583 584 /* NORMAL recycled case */ 585 sfxge_rx_qfpp_put(srp, srpp); 586 return; 587 } 588 } 589 590 srpp->srp_mp = NULL; 591 592 sfxge_rx_qpacket_destroy(srp, srpp); 593 } 594 595 static sfxge_rx_packet_t * 596 sfxge_rx_qpacket_create(sfxge_rxq_t *srp) 597 { 598 sfxge_t *sp = srp->sr_sp; 599 sfxge_rx_fpp_t *srfppp = &(srp->sr_fpp); 600 sfxge_rx_packet_t *srpp; 601 size_t size; 602 caddr_t base; 603 size_t unit; 604 ddi_dma_cookie_t dmac; 605 unsigned int ncookies; 606 frtn_t *freep; 607 mblk_t *mp; 608 int err; 609 int rc; 610 611 size = sp->s_rx_buffer_size; 612 613 if (sp->s_rx_pkt_mem_max && 614 (sp->s_rx_pkt_mem_alloc + size >= sp->s_rx_pkt_mem_max)) { 615 DTRACE_PROBE(rx_pkt_mem_max); 616 srp->sr_kstat.srk_rx_pkt_mem_limit++; 617 return (NULL); 618 } 619 620 /* Allocate a new packet */ 621 if ((srpp = kmem_cache_alloc(sp->s_rpc, KM_NOSLEEP)) == NULL) { 622 srp->sr_kstat.srk_kcache_alloc_nomem++; 623 rc = ENOMEM; 624 goto fail1; 625 } 626 627 srpp->srp_srp = srp; 628 srpp->srp_putp = srfppp->srfpp_putp; 629 630 /* Allocate some DMA memory */ 631 err = ddi_dma_mem_alloc(srpp->srp_dma_handle, size, 632 &sfxge_rx_packet_devacc, DDI_DMA_STREAMING, DDI_DMA_DONTWAIT, 633 NULL, &base, &unit, &(srpp->srp_acc_handle)); 634 switch (err) { 635 case DDI_SUCCESS: 636 break; 637 638 case DDI_FAILURE: 639 srp->sr_kstat.srk_dma_alloc_nomem++; 640 rc = ENOMEM; 641 goto fail2; 642 643 default: 644 srp->sr_kstat.srk_dma_alloc_fail++; 645 rc = EFAULT; 646 goto fail2; 647 } 648 649 /* Adjust the buffer to align the start of the DMA area correctly */ 650 base += sp->s_rx_buffer_align; 651 size -= sp->s_rx_buffer_align; 652 653 /* Bind the DMA memory to the DMA handle */ 654 err = ddi_dma_addr_bind_handle(srpp->srp_dma_handle, NULL, 655 base, size, DDI_DMA_READ | DDI_DMA_STREAMING, 656 DDI_DMA_DONTWAIT, NULL, &dmac, &ncookies); 657 switch (err) { 658 case DDI_DMA_MAPPED: 659 break; 660 661 case DDI_DMA_INUSE: 662 srp->sr_kstat.srk_dma_bind_fail++; 663 rc = EEXIST; 664 goto fail3; 665 666 case DDI_DMA_NORESOURCES: 667 srp->sr_kstat.srk_dma_bind_nomem++; 668 rc = ENOMEM; 669 goto fail3; 670 671 case DDI_DMA_NOMAPPING: 672 srp->sr_kstat.srk_dma_bind_fail++; 673 rc = ENOTSUP; 674 goto fail3; 675 676 case DDI_DMA_TOOBIG: 677 srp->sr_kstat.srk_dma_bind_fail++; 678 rc = EFBIG; 679 goto fail3; 680 681 default: 682 srp->sr_kstat.srk_dma_bind_fail++; 683 rc = EFAULT; 684 goto fail3; 685 } 686 ASSERT3U(ncookies, ==, 1); 687 688 srpp->srp_addr = dmac.dmac_laddress; 689 690 srpp->srp_base = (unsigned char *)base; 691 srpp->srp_mblksize = size; 692 693 /* 694 * Allocate a STREAMS block: We use size 1 so that the allocator will 695 * use the first (and smallest) dblk cache. 696 */ 697 freep = &(srpp->srp_free); 698 freep->free_func = sfxge_rx_qpacket_free; 699 freep->free_arg = (caddr_t)srpp; 700 701 if ((mp = desballoc(srpp->srp_base, size, BPRI_HI, freep)) == NULL) { 702 srp->sr_kstat.srk_desballoc_fail++; 703 rc = ENOMEM; 704 goto fail4; 705 } 706 707 srpp->srp_mp = mp; 708 srpp->srp_recycle = B_TRUE; 709 710 if (sp->s_rx_pkt_mem_max) { 711 int64_t delta = sfxge_rx_pkt_mem_approx(srpp); 712 atomic_add_64(&sp->s_rx_pkt_mem_alloc, delta); 713 } 714 715 return (srpp); 716 717 fail4: 718 DTRACE_PROBE(fail4); 719 720 bzero(&(srpp->srp_free), sizeof (frtn_t)); 721 722 srpp->srp_mblksize = 0; 723 srpp->srp_base = NULL; 724 725 /* Unbind the DMA memory from the DMA handle */ 726 srpp->srp_addr = 0; 727 (void) ddi_dma_unbind_handle(srpp->srp_dma_handle); 728 729 fail3: 730 DTRACE_PROBE(fail3); 731 732 /* Free the DMA memory */ 733 ddi_dma_mem_free(&(srpp->srp_acc_handle)); 734 srpp->srp_acc_handle = NULL; 735 736 fail2: 737 DTRACE_PROBE(fail2); 738 739 srpp->srp_putp = NULL; 740 srpp->srp_srp = NULL; 741 742 kmem_cache_free(sp->s_rpc, srpp); 743 744 fail1: 745 DTRACE_PROBE1(fail1, int, rc); 746 747 return (NULL); 748 } 749 750 #define SFXGE_REFILL_BATCH 64 751 752 /* Try to refill the RX descriptor ring from the associated free pkt pool */ 753 static void 754 sfxge_rx_qrefill(sfxge_rxq_t *srp, unsigned int target) 755 { 756 sfxge_t *sp = srp->sr_sp; 757 sfxge_rx_fpp_t *srfppp = &(srp->sr_fpp); 758 unsigned int index = srp->sr_index; 759 sfxge_evq_t *sep = sp->s_sep[index]; 760 efsys_dma_addr_t addr[SFXGE_REFILL_BATCH]; 761 mblk_t *mp; 762 int ntodo; 763 unsigned int count; 764 unsigned int batch; 765 unsigned int rxfill; 766 unsigned int mblksize; 767 768 prefetch_read_many(sp->s_enp); 769 prefetch_read_many(srp->sr_erp); 770 771 ASSERT(mutex_owned(&(sep->se_lock))); 772 773 if (srp->sr_state != SFXGE_RXQ_STARTED) 774 return; 775 776 rxfill = srp->sr_added - srp->sr_completed; 777 ASSERT3U(rxfill, <=, EFX_RXQ_LIMIT(sp->s_rxq_size)); 778 ntodo = min(EFX_RXQ_LIMIT(sp->s_rxq_size) - rxfill, target); 779 ASSERT3U(ntodo, <=, EFX_RXQ_LIMIT(sp->s_rxq_size)); 780 781 if (ntodo == 0) 782 goto out; 783 784 (void) sfxge_rx_qfpp_swizzle(srp); 785 786 mp = srfppp->srfpp_get; 787 count = srfppp->srfpp_count; 788 mblksize = sp->s_rx_buffer_size - sp->s_rx_buffer_align; 789 790 batch = 0; 791 while (ntodo-- > 0) { 792 mblk_t *next; 793 frtn_t *freep; 794 sfxge_rx_packet_t *srpp; 795 unsigned int id; 796 797 if (mp == NULL) 798 break; 799 800 next = mp->b_next; 801 mp->b_next = NULL; 802 803 if (next != NULL) 804 prefetch_read_many(next); 805 806 freep = DB_FRTNP(mp); 807 /*LINTED*/ 808 srpp = (sfxge_rx_packet_t *)(freep->free_arg); 809 ASSERT3P(srpp->srp_mp, ==, mp); 810 811 /* The MTU may have changed since the packet was allocated */ 812 if (MBLKSIZE(mp) != mblksize) { 813 srpp->srp_recycle = B_FALSE; 814 815 freeb(mp); 816 817 --count; 818 mp = next; 819 continue; 820 } 821 822 srpp->srp_off = 0; 823 srpp->srp_thp = NULL; 824 srpp->srp_iphp = NULL; 825 srpp->srp_etherhp = NULL; 826 srpp->srp_size = 0; 827 srpp->srp_flags = EFX_DISCARD; 828 829 id = (srp->sr_added + batch) & (sp->s_rxq_size - 1); 830 ASSERT(srp->sr_srpp[id] == NULL); 831 srp->sr_srpp[id] = srpp; 832 833 addr[batch++] = srpp->srp_addr; 834 if (batch == SFXGE_REFILL_BATCH) { 835 efx_rx_qpost(srp->sr_erp, addr, mblksize, batch, 836 srp->sr_completed, srp->sr_added); 837 srp->sr_added += batch; 838 batch = 0; 839 } 840 841 --count; 842 mp = next; 843 } 844 845 srfppp->srfpp_get = mp; 846 srfppp->srfpp_count = count; 847 848 if (batch != 0) { 849 efx_rx_qpost(srp->sr_erp, addr, mblksize, batch, 850 srp->sr_completed, srp->sr_added); 851 srp->sr_added += batch; 852 } 853 854 efx_rx_qpush(srp->sr_erp, srp->sr_added, &srp->sr_pushed); 855 856 out: 857 if (srfppp->srfpp_count < srfppp->srfpp_min) 858 srfppp->srfpp_min = srfppp->srfpp_count; 859 } 860 861 /* Preallocate packets and put them in the free packet pool */ 862 static void 863 sfxge_rx_qpreallocate(sfxge_rxq_t *srp, int nprealloc) 864 { 865 sfxge_rx_fpp_t *srfppp = &((srp)->sr_fpp); 866 srfppp->srfpp_lowat = nprealloc; 867 while (nprealloc-- > 0) { 868 sfxge_rx_packet_t *srpp; 869 870 if ((srpp = sfxge_rx_qpacket_create(srp)) == NULL) 871 break; 872 sfxge_rx_qfpp_put(srp, srpp); 873 } 874 } 875 876 /* Try to refill the RX descriptor ring by allocating new packets */ 877 static void 878 sfxge_rx_qfill(sfxge_rxq_t *srp, unsigned int target) 879 { 880 sfxge_t *sp = srp->sr_sp; 881 unsigned int index = srp->sr_index; 882 sfxge_evq_t *sep = sp->s_sep[index]; 883 unsigned int batch; 884 unsigned int rxfill; 885 unsigned int mblksize; 886 int ntodo; 887 efsys_dma_addr_t addr[SFXGE_REFILL_BATCH]; 888 mblk_t *mp = NULL; 889 890 prefetch_read_many(sp->s_enp); 891 prefetch_read_many(srp->sr_erp); 892 893 ASSERT(mutex_owned(&(sep->se_lock))); 894 895 if (srp->sr_state != SFXGE_RXQ_STARTED) 896 return; 897 898 rxfill = srp->sr_added - srp->sr_completed; 899 ASSERT3U(rxfill, <=, EFX_RXQ_LIMIT(sp->s_rxq_size)); 900 ntodo = min(EFX_RXQ_LIMIT(sp->s_rxq_size) - rxfill, target); 901 ASSERT3U(ntodo, <=, EFX_RXQ_LIMIT(sp->s_rxq_size)); 902 903 if (ntodo == 0) 904 return; 905 906 mblksize = sp->s_rx_buffer_size - sp->s_rx_buffer_align; 907 908 batch = 0; 909 while (ntodo-- > 0) { 910 sfxge_rx_packet_t *srpp; 911 unsigned int id; 912 913 if ((srpp = sfxge_rx_qpacket_create(srp)) == NULL) 914 break; 915 916 mp = srpp->srp_mp; 917 918 ASSERT3U(MBLKSIZE(mp), ==, mblksize); 919 920 ASSERT3U(srpp->srp_off, ==, 0); 921 ASSERT3P(srpp->srp_thp, ==, NULL); 922 ASSERT3P(srpp->srp_iphp, ==, NULL); 923 ASSERT3P(srpp->srp_etherhp, ==, NULL); 924 ASSERT3U(srpp->srp_size, ==, 0); 925 926 srpp->srp_flags = EFX_DISCARD; 927 928 id = (srp->sr_added + batch) & (sp->s_rxq_size - 1); 929 ASSERT(srp->sr_srpp[id] == NULL); 930 srp->sr_srpp[id] = srpp; 931 932 addr[batch++] = srpp->srp_addr; 933 if (batch == SFXGE_REFILL_BATCH) { 934 efx_rx_qpost(srp->sr_erp, addr, mblksize, batch, 935 srp->sr_completed, srp->sr_added); 936 srp->sr_added += batch; 937 batch = 0; 938 } 939 } 940 941 if (batch != 0) { 942 efx_rx_qpost(srp->sr_erp, addr, mblksize, batch, 943 srp->sr_completed, srp->sr_added); 944 srp->sr_added += batch; 945 } 946 947 efx_rx_qpush(srp->sr_erp, srp->sr_added, &srp->sr_pushed); 948 } 949 950 void 951 sfxge_rx_qfpp_trim(sfxge_rxq_t *srp) 952 { 953 sfxge_rx_fpp_t *srfppp = &(srp->sr_fpp); 954 sfxge_t *sp = srp->sr_sp; 955 unsigned int index = srp->sr_index; 956 sfxge_evq_t *sep = sp->s_sep[index]; 957 mblk_t *p; 958 mblk_t **pp; 959 int count; 960 961 ASSERT(mutex_owned(&(sep->se_lock))); 962 963 if (srp->sr_state != SFXGE_RXQ_STARTED) 964 goto done; 965 966 /* Make sure the queue is full */ 967 sfxge_rx_qrefill(srp, EFX_RXQ_LIMIT(sp->s_rxq_size)); 968 969 /* The refill may have emptied the pool */ 970 if (srfppp->srfpp_min == 0) 971 goto done; 972 973 /* Don't trim below the pool's low water mark */ 974 if (srfppp->srfpp_count <= srfppp->srfpp_lowat) 975 goto done; 976 977 ASSERT(srfppp->srfpp_min <= srfppp->srfpp_count); 978 979 /* Trim to the largest of srfppp->srfpp_min and srfpp->srfpp_lowat */ 980 if (srfppp->srfpp_lowat > srfppp->srfpp_min) 981 count = srfppp->srfpp_count - srfppp->srfpp_lowat; 982 else 983 count = srfppp->srfpp_count - srfppp->srfpp_min; 984 985 /* Walk the get list */ 986 pp = &(srfppp->srfpp_get); 987 while (--count >= 0) { 988 ASSERT(pp); 989 p = *pp; 990 ASSERT(p != NULL); 991 992 pp = &(p->b_next); 993 } 994 ASSERT(pp); 995 p = *pp; 996 997 /* Truncate the get list */ 998 *pp = NULL; 999 1000 /* Free the remainder */ 1001 while (p != NULL) { 1002 mblk_t *next; 1003 frtn_t *freep; 1004 sfxge_rx_packet_t *srpp; 1005 1006 next = p->b_next; 1007 p->b_next = NULL; 1008 1009 ASSERT3U(srfppp->srfpp_min, >, 0); 1010 srfppp->srfpp_min--; 1011 srfppp->srfpp_count--; 1012 1013 freep = DB_FRTNP(p); 1014 /*LINTED*/ 1015 srpp = (sfxge_rx_packet_t *)(freep->free_arg); 1016 ASSERT3P(srpp->srp_mp, ==, p); 1017 1018 srpp->srp_recycle = B_FALSE; 1019 1020 freeb(p); 1021 1022 p = next; 1023 } 1024 1025 done: 1026 srfppp->srfpp_min = srfppp->srfpp_count; 1027 } 1028 1029 static void 1030 sfxge_rx_qpoll(void *arg) 1031 { 1032 sfxge_rxq_t *srp = arg; 1033 sfxge_t *sp = srp->sr_sp; 1034 unsigned int index = srp->sr_index; 1035 sfxge_evq_t *sep = sp->s_sep[index]; 1036 uint16_t magic; 1037 1038 /* 1039 * man timeout(9f) states that this code should adhere to the 1040 * same requirements as a softirq handler - DO NOT BLOCK 1041 */ 1042 1043 /* 1044 * Post an event to the event queue to cause the free packet pool to be 1045 * trimmed if it is oversize. 1046 */ 1047 magic = SFXGE_MAGIC_RX_QFPP_TRIM | index; 1048 1049 #if defined(DEBUG) 1050 /* This is guaranteed due to the start/stop order of rx and ev */ 1051 ASSERT3U(sep->se_state, ==, SFXGE_EVQ_STARTED); 1052 ASSERT3U(srp->sr_state, ==, SFXGE_RXQ_STARTED); 1053 #else 1054 /* 1055 * Bug22691 WORKAROUND: 1056 * This handler has been observed in the field to be invoked for a 1057 * queue in the INITIALIZED state, which should never happen. 1058 * Until the mechanism for this is properly understood, add defensive 1059 * checks. 1060 */ 1061 if ((sep->se_state != SFXGE_EVQ_STARTED) || 1062 (srp->sr_state != SFXGE_RXQ_STARTED) || 1063 (!sep->se_eep)) { 1064 dev_err(sp->s_dip, CE_WARN, SFXGE_CMN_ERR 1065 "RXQ[%d] bad state in sfxge_rx_qpoll %d %d %p", 1066 index, sep->se_state, srp->sr_state, sep->se_eep); 1067 return; 1068 } 1069 #endif 1070 efx_ev_qpost(sep->se_eep, magic); 1071 1072 srp->sr_tid = timeout(sfxge_rx_qpoll, srp, 1073 drv_usectohz(sp->s_rxq_poll_usec)); 1074 } 1075 1076 static void 1077 sfxge_rx_qpoll_start(sfxge_rxq_t *srp) 1078 { 1079 sfxge_t *sp = srp->sr_sp; 1080 unsigned int index = srp->sr_index; 1081 sfxge_evq_t *sep = sp->s_sep[index]; 1082 1083 ASSERT(mutex_owned(&(sep->se_lock))); 1084 ASSERT3U(srp->sr_state, ==, SFXGE_RXQ_STARTED); 1085 1086 /* Schedule a poll */ 1087 ASSERT3P(srp->sr_tid, ==, 0); 1088 srp->sr_tid = timeout(sfxge_rx_qpoll, srp, 0); 1089 } 1090 1091 static void 1092 sfxge_rx_qpoll_stop(sfxge_rxq_t *srp) 1093 { 1094 sfxge_t *sp = srp->sr_sp; 1095 unsigned int index = srp->sr_index; 1096 sfxge_evq_t *sep = sp->s_sep[index]; 1097 timeout_id_t tid; 1098 1099 ASSERT(mutex_owned(&(sep->se_lock))); 1100 ASSERT3U(srp->sr_state, ==, SFXGE_RXQ_STARTED); 1101 1102 /* 1103 * Cancel the qpoll timer. Care is needed as this function 1104 * can race with sfxge_rx_qpoll() for timeout id updates. 1105 * 1106 * Do not hold locks used by any timeout(9f) handlers across 1107 * calls to untimeout(9f) as this will deadlock. 1108 */ 1109 tid = 0; 1110 while ((srp->sr_tid != 0) && (srp->sr_tid != tid)) { 1111 tid = srp->sr_tid; 1112 (void) untimeout(tid); 1113 } 1114 srp->sr_tid = 0; 1115 } 1116 1117 static int 1118 sfxge_rx_kstat_update(kstat_t *ksp, int rw) 1119 { 1120 sfxge_rxq_t *srp = ksp->ks_private; 1121 sfxge_t *sp = srp->sr_sp; 1122 unsigned int index = srp->sr_index; 1123 sfxge_evq_t *sep = sp->s_sep[index]; 1124 kstat_named_t *knp; 1125 int rc; 1126 1127 if (rw != KSTAT_READ) { 1128 rc = EACCES; 1129 goto fail1; 1130 } 1131 1132 ASSERT(mutex_owned(&(sep->se_lock))); 1133 if (srp->sr_state != SFXGE_RXQ_STARTED) 1134 goto done; 1135 1136 knp = ksp->ks_data; 1137 /* NB pointer post-increment below */ 1138 knp++->value.ui32 = srp->sr_kstat.srk_rx_pkt_mem_limit; 1139 knp++->value.ui32 = srp->sr_kstat.srk_kcache_alloc_nomem; 1140 knp++->value.ui32 = srp->sr_kstat.srk_dma_alloc_nomem; 1141 knp++->value.ui32 = srp->sr_kstat.srk_dma_alloc_fail; 1142 knp++->value.ui32 = srp->sr_kstat.srk_dma_bind_nomem; 1143 knp++->value.ui32 = srp->sr_kstat.srk_dma_bind_fail; 1144 knp++->value.ui32 = srp->sr_kstat.srk_desballoc_fail; 1145 knp++->value.ui32 = srp->sr_kstat.srk_rxq_empty_discard; 1146 1147 done: 1148 return (0); 1149 1150 fail1: 1151 DTRACE_PROBE1(fail1, int, rc); 1152 1153 return (rc); 1154 } 1155 1156 static int 1157 sfxge_rx_kstat_init(sfxge_rxq_t *srp) 1158 { 1159 sfxge_t *sp = srp->sr_sp; 1160 unsigned int index = srp->sr_index; 1161 sfxge_evq_t *sep = sp->s_sep[index]; 1162 dev_info_t *dip = sp->s_dip; 1163 char name[MAXNAMELEN]; 1164 kstat_t *ksp; 1165 kstat_named_t *knp; 1166 int rc; 1167 1168 /* Create the set */ 1169 (void) snprintf(name, MAXNAMELEN - 1, "%s_rxq%04d", 1170 ddi_driver_name(dip), index); 1171 1172 if ((ksp = kstat_create((char *)ddi_driver_name(dip), 1173 ddi_get_instance(dip), name, "rxq", KSTAT_TYPE_NAMED, 1174 SFXGE_RX_NSTATS, 0)) == NULL) { 1175 rc = ENOMEM; 1176 goto fail1; 1177 } 1178 1179 srp->sr_ksp = ksp; 1180 1181 ksp->ks_update = sfxge_rx_kstat_update; 1182 ksp->ks_private = srp; 1183 ksp->ks_lock = &(sep->se_lock); 1184 1185 /* Initialise the named stats */ 1186 knp = ksp->ks_data; 1187 kstat_named_init(knp, "rx_pkt_mem_limit", KSTAT_DATA_UINT32); 1188 knp++; 1189 kstat_named_init(knp, "kcache_alloc_nomem", KSTAT_DATA_UINT32); 1190 knp++; 1191 kstat_named_init(knp, "dma_alloc_nomem", KSTAT_DATA_UINT32); 1192 knp++; 1193 kstat_named_init(knp, "dma_alloc_fail", KSTAT_DATA_UINT32); 1194 knp++; 1195 kstat_named_init(knp, "dma_bind_nomem", KSTAT_DATA_UINT32); 1196 knp++; 1197 kstat_named_init(knp, "dma_bind_fail", KSTAT_DATA_UINT32); 1198 knp++; 1199 kstat_named_init(knp, "desballoc_fail", KSTAT_DATA_UINT32); 1200 knp++; 1201 kstat_named_init(knp, "rxq_empty_discard", KSTAT_DATA_UINT32); 1202 1203 kstat_install(ksp); 1204 return (0); 1205 1206 fail1: 1207 DTRACE_PROBE1(fail1, int, rc); 1208 1209 return (rc); 1210 } 1211 1212 static int 1213 sfxge_rx_qinit(sfxge_t *sp, unsigned int index) 1214 { 1215 sfxge_rxq_t *srp; 1216 int rc; 1217 1218 ASSERT3U(index, <, SFXGE_RX_SCALE_MAX); 1219 1220 if ((srp = kmem_cache_alloc(sp->s_rqc, KM_SLEEP)) == NULL) { 1221 rc = ENOMEM; 1222 goto fail1; 1223 } 1224 ASSERT3U(srp->sr_state, ==, SFXGE_RXQ_UNINITIALIZED); 1225 1226 srp->sr_index = index; 1227 sp->s_srp[index] = srp; 1228 1229 if ((rc = sfxge_rx_kstat_init(srp)) != 0) 1230 goto fail2; 1231 1232 srp->sr_state = SFXGE_RXQ_INITIALIZED; 1233 1234 return (0); 1235 1236 fail2: 1237 DTRACE_PROBE(fail2); 1238 kmem_cache_free(sp->s_rqc, srp); 1239 1240 fail1: 1241 DTRACE_PROBE1(fail1, int, rc); 1242 1243 return (rc); 1244 } 1245 1246 static int 1247 sfxge_rx_qstart(sfxge_t *sp, unsigned int index) 1248 { 1249 sfxge_evq_t *sep = sp->s_sep[index]; 1250 sfxge_rxq_t *srp; 1251 efsys_mem_t *esmp; 1252 efx_nic_t *enp; 1253 unsigned int level; 1254 int rc; 1255 1256 mutex_enter(&(sep->se_lock)); 1257 srp = sp->s_srp[index]; 1258 enp = sp->s_enp; 1259 esmp = &(srp->sr_mem); 1260 1261 ASSERT3U(srp->sr_state, ==, SFXGE_RXQ_INITIALIZED); 1262 ASSERT3U(sep->se_state, ==, SFXGE_EVQ_STARTED); 1263 1264 /* Zero the memory */ 1265 bzero(esmp->esm_base, EFX_RXQ_SIZE(sp->s_rxq_size)); 1266 1267 /* Program the buffer table */ 1268 if ((rc = sfxge_sram_buf_tbl_set(sp, srp->sr_id, esmp, 1269 EFX_RXQ_NBUFS(sp->s_rxq_size))) != 0) 1270 goto fail1; 1271 1272 /* Create the receive queue */ 1273 if ((rc = efx_rx_qcreate(enp, index, index, EFX_RXQ_TYPE_DEFAULT, 1274 esmp, sp->s_rxq_size, srp->sr_id, sep->se_eep, &(srp->sr_erp))) 1275 != 0) 1276 goto fail2; 1277 1278 /* Enable the receive queue */ 1279 efx_rx_qenable(srp->sr_erp); 1280 1281 /* Set the water marks */ 1282 srp->sr_hiwat = EFX_RXQ_LIMIT(sp->s_rxq_size) * 9 / 10; 1283 srp->sr_lowat = srp->sr_hiwat / 2; 1284 1285 srp->sr_state = SFXGE_RXQ_STARTED; 1286 srp->sr_flush = SFXGE_FLUSH_INACTIVE; 1287 1288 sfxge_rx_qpoll_start(srp); 1289 1290 /* Try to fill the queue from the pool */ 1291 sfxge_rx_qrefill(srp, EFX_RXQ_LIMIT(sp->s_rxq_size)); 1292 1293 /* 1294 * If there were insufficient buffers in the pool to reach the at 1295 * least a batch then allocate some. 1296 */ 1297 level = srp->sr_added - srp->sr_completed; 1298 if (level < SFXGE_RX_BATCH) 1299 sfxge_rx_qfill(srp, SFXGE_RX_BATCH); 1300 1301 mutex_exit(&(sep->se_lock)); 1302 1303 return (0); 1304 1305 fail2: 1306 DTRACE_PROBE(fail2); 1307 1308 /* Clear entries from the buffer table */ 1309 sfxge_sram_buf_tbl_clear(sp, srp->sr_id, 1310 EFX_RXQ_NBUFS(sp->s_rxq_size)); 1311 1312 fail1: 1313 DTRACE_PROBE1(fail1, int, rc); 1314 1315 mutex_exit(&(sep->se_lock)); 1316 1317 return (rc); 1318 } 1319 1320 static void 1321 sfxge_rx_qflow_complete(sfxge_rxq_t *srp, sfxge_rx_flow_t *srfp) 1322 { 1323 mblk_t *mp; 1324 struct ether_header *etherhp; 1325 struct ip *iphp; 1326 struct tcphdr *thp; 1327 1328 if (srfp->srf_mp == NULL) 1329 return; 1330 1331 mp = srfp->srf_mp; 1332 etherhp = srfp->srf_etherhp; 1333 iphp = srfp->srf_iphp; 1334 thp = srfp->srf_last_thp; 1335 1336 ASSERT3U(((etherhp->ether_type == htons(ETHERTYPE_VLAN)) ? 1337 sizeof (struct ether_vlan_header) : 1338 sizeof (struct ether_header)) + 1339 srfp->srf_len, ==, msgdsize(mp)); 1340 1341 ASSERT3U(srfp->srf_len & 0xffff, ==, srfp->srf_len); 1342 iphp->ip_len = htons(srfp->srf_len); 1343 1344 srfp->srf_first_thp->th_ack = thp->th_ack; 1345 srfp->srf_first_thp->th_win = thp->th_win; 1346 srfp->srf_first_thp->th_flags = thp->th_flags; 1347 1348 DTRACE_PROBE2(flow_complete, uint32_t, srfp->srf_tag, 1349 size_t, srfp->srf_len); 1350 1351 srfp->srf_mp = NULL; 1352 srfp->srf_len = 0; 1353 1354 ASSERT(mp->b_next == NULL); 1355 *(srp->sr_mpp) = mp; 1356 srp->sr_mpp = &(mp->b_next); 1357 } 1358 1359 static boolean_t 1360 sfxge_rx_qflow_add(sfxge_rxq_t *srp, sfxge_rx_flow_t *srfp, 1361 sfxge_rx_packet_t *srpp, clock_t now) 1362 { 1363 sfxge_t *sp = srp->sr_sp; 1364 struct ether_header *etherhp = srpp->srp_etherhp; 1365 struct ip *iphp = srpp->srp_iphp; 1366 struct tcphdr *thp = srpp->srp_thp; 1367 size_t off = srpp->srp_off; 1368 size_t size = (size_t)(srpp->srp_size); 1369 mblk_t *mp = srpp->srp_mp; 1370 uint32_t seq; 1371 unsigned int shift; 1372 1373 ASSERT3U(MBLKL(mp), ==, off + size); 1374 ASSERT3U(DB_CKSUMFLAGS(mp), ==, 1375 HCK_FULLCKSUM | HCK_FULLCKSUM_OK | HCK_IPV4_HDRCKSUM); 1376 1377 seq = htonl(thp->th_seq); 1378 1379 /* 1380 * If the time between this segment and the last is greater than RTO 1381 * then consider this a new flow. 1382 */ 1383 if (now - srfp->srf_lbolt > srp->sr_rto) { 1384 srfp->srf_count = 1; 1385 srfp->srf_seq = seq + size; 1386 1387 goto fail1; 1388 } 1389 1390 if (seq != srfp->srf_seq) { 1391 if (srfp->srf_count > SFXGE_SLOW_START) 1392 srfp->srf_count = SFXGE_SLOW_START; 1393 1394 srfp->srf_count >>= 1; 1395 1396 srfp->srf_count++; 1397 srfp->srf_seq = seq + size; 1398 1399 goto fail2; 1400 } 1401 1402 /* Update the in-order segment count and sequence number */ 1403 srfp->srf_count++; 1404 srfp->srf_seq = seq + size; 1405 1406 /* Don't merge across pure ACK, URG, SYN or RST segments */ 1407 if (size == 0 || thp->th_flags & (TH_URG | TH_SYN | TH_RST) || 1408 thp->th_urp != 0) 1409 goto fail3; 1410 1411 /* 1412 * If the in-order segment count has not yet reached the slow-start 1413 * threshold then we cannot coalesce. 1414 */ 1415 if (srfp->srf_count < SFXGE_SLOW_START) 1416 goto fail4; 1417 1418 /* Scale up the packet size from 4k (the maximum being 64k) */ 1419 ASSERT3U(srfp->srf_count, >=, SFXGE_SLOW_START); 1420 shift = MIN(srfp->srf_count - SFXGE_SLOW_START + 12, 16); 1421 if (srfp->srf_len + size >= (1 << shift)) 1422 sfxge_rx_qflow_complete(srp, srfp); 1423 1424 ASSERT(mp->b_cont == NULL); 1425 1426 if (srfp->srf_mp == NULL) { 1427 /* First packet in this flow */ 1428 srfp->srf_etherhp = etherhp; 1429 srfp->srf_iphp = iphp; 1430 srfp->srf_first_thp = srfp->srf_last_thp = thp; 1431 1432 ASSERT3P(mp->b_cont, ==, NULL); 1433 srfp->srf_mp = mp; 1434 srfp->srf_mpp = &(mp->b_cont); 1435 1436 srfp->srf_len = ntohs(iphp->ip_len); 1437 1438 /* 1439 * If the flow is not already in the list of occupied flows then 1440 * add it. 1441 */ 1442 if (srfp->srf_next == NULL && 1443 srp->sr_srfpp != &(srfp->srf_next)) { 1444 *(srp->sr_srfpp) = srfp; 1445 srp->sr_srfpp = &(srfp->srf_next); 1446 } 1447 } else { 1448 /* Later packet in this flow - skip TCP header */ 1449 srfp->srf_last_thp = thp; 1450 1451 mp->b_rptr += off; 1452 ASSERT3U(MBLKL(mp), ==, size); 1453 1454 ASSERT3P(mp->b_cont, ==, NULL); 1455 *(srfp->srf_mpp) = mp; 1456 srfp->srf_mpp = &(mp->b_cont); 1457 1458 srfp->srf_len += size; 1459 1460 ASSERT(srfp->srf_next != NULL || 1461 srp->sr_srfpp == &(srfp->srf_next)); 1462 } 1463 1464 DTRACE_PROBE2(flow_add, uint32_t, srfp->srf_tag, size_t, size); 1465 1466 /* 1467 * Try to align coalesced segments on push boundaries, unless they 1468 * are too frequent. 1469 */ 1470 if (sp->s_rx_coalesce_mode == SFXGE_RX_COALESCE_ALLOW_PUSH && 1471 thp->th_flags & TH_PUSH) 1472 sfxge_rx_qflow_complete(srp, srfp); 1473 1474 srfp->srf_lbolt = now; 1475 return (B_TRUE); 1476 1477 fail4: 1478 fail3: 1479 fail2: 1480 fail1: 1481 sfxge_rx_qflow_complete(srp, srfp); 1482 1483 srfp->srf_lbolt = now; 1484 return (B_FALSE); 1485 } 1486 1487 void 1488 sfxge_rx_qpacket_coalesce(sfxge_rxq_t *srp) 1489 { 1490 sfxge_t *sp = srp->sr_sp; 1491 clock_t now; 1492 mblk_t *mp; 1493 sfxge_rx_flow_t *srfp; 1494 1495 ASSERT(sp->s_rx_coalesce_mode != SFXGE_RX_COALESCE_OFF); 1496 1497 now = ddi_get_lbolt(); 1498 1499 mp = srp->sr_mp; 1500 1501 srp->sr_mp = NULL; 1502 srp->sr_mpp = &(srp->sr_mp); 1503 1504 /* Start with the last flow to be appended to */ 1505 srfp = *(srp->sr_srfpp); 1506 1507 while (mp != NULL) { 1508 frtn_t *freep; 1509 sfxge_rx_packet_t *srpp; 1510 struct ether_header *etherhp; 1511 struct ip *iphp; 1512 struct tcphdr *thp; 1513 size_t off; 1514 size_t size; 1515 uint16_t ether_tci; 1516 uint32_t hash; 1517 uint32_t tag; 1518 mblk_t *next; 1519 sfxge_packet_type_t pkt_type; 1520 uint16_t sport, dport; 1521 1522 next = mp->b_next; 1523 mp->b_next = NULL; 1524 1525 if (next != NULL) 1526 prefetch_read_many(next); 1527 1528 freep = DB_FRTNP(mp); 1529 /*LINTED*/ 1530 srpp = (sfxge_rx_packet_t *)(freep->free_arg); 1531 ASSERT3P(srpp->srp_mp, ==, mp); 1532 1533 /* If the packet is not TCP then we cannot coalesce it */ 1534 if (~(srpp->srp_flags) & EFX_PKT_TCP) 1535 goto reject; 1536 1537 /* 1538 * If the packet is not fully checksummed then we cannot 1539 * coalesce it. 1540 */ 1541 if (~(srpp->srp_flags) & (EFX_CKSUM_TCPUDP | EFX_CKSUM_IPV4)) 1542 goto reject; 1543 1544 /* Parse the TCP header */ 1545 pkt_type = sfxge_pkthdr_parse(mp, ðerhp, &iphp, &thp, &off, 1546 &size, &sport, &dport); 1547 ASSERT(pkt_type == SFXGE_PACKET_TYPE_IPV4_TCP); 1548 ASSERT(etherhp != NULL); 1549 ASSERT(iphp != NULL); 1550 ASSERT(thp != NULL); 1551 ASSERT(off != 0); 1552 1553 if ((iphp->ip_off & ~htons(IP_DF)) != 0) 1554 goto reject; 1555 1556 if (etherhp->ether_type == htons(ETHERTYPE_VLAN)) { 1557 struct ether_vlan_header *ethervhp; 1558 1559 ethervhp = (struct ether_vlan_header *)etherhp; 1560 ether_tci = ethervhp->ether_tci; 1561 } else { 1562 ether_tci = 0; 1563 } 1564 1565 /* 1566 * Make sure any minimum length padding is stripped 1567 * before we try to add the packet to a flow. 1568 */ 1569 ASSERT3U(sp->s_rx_prefix_size + MBLKL(mp), ==, 1570 (size_t)(srpp->srp_size)); 1571 ASSERT3U(sp->s_rx_prefix_size + off + size, <=, 1572 (size_t)(srpp->srp_size)); 1573 1574 if (sp->s_rx_prefix_size + off + size < 1575 (size_t)(srpp->srp_size)) 1576 mp->b_wptr = mp->b_rptr + off + size; 1577 1578 /* 1579 * If there is no current flow, or the segment does not match 1580 * the current flow then we must attempt to look up the 1581 * correct flow in the table. 1582 */ 1583 if (srfp == NULL) 1584 goto lookup; 1585 1586 if (srfp->srf_saddr != iphp->ip_src.s_addr || 1587 srfp->srf_daddr != iphp->ip_dst.s_addr) 1588 goto lookup; 1589 1590 if (srfp->srf_sport != thp->th_sport || 1591 srfp->srf_dport != thp->th_dport) 1592 goto lookup; 1593 1594 if (srfp->srf_tci != ether_tci) 1595 goto lookup; 1596 1597 add: 1598 ASSERT(srfp != NULL); 1599 1600 srpp->srp_etherhp = etherhp; 1601 srpp->srp_iphp = iphp; 1602 srpp->srp_thp = thp; 1603 srpp->srp_off = off; 1604 1605 ASSERT3U(size, <, (1 << 16)); 1606 srpp->srp_size = (uint16_t)size; 1607 1608 /* Try to append the packet to the flow */ 1609 if (!sfxge_rx_qflow_add(srp, srfp, srpp, now)) 1610 goto reject; 1611 1612 mp = next; 1613 continue; 1614 1615 lookup: 1616 /* 1617 * If there is a prefix area then read the hash from that, 1618 * otherwise calculate it. 1619 */ 1620 if (sp->s_rx_prefix_size != 0) { 1621 hash = efx_psuedo_hdr_hash_get(sp->s_enp, 1622 EFX_RX_HASHALG_TOEPLITZ, 1623 DB_BASE(mp)); 1624 } else { 1625 SFXGE_TCP_HASH(sp, 1626 &iphp->ip_src.s_addr, 1627 thp->th_sport, 1628 &iphp->ip_dst.s_addr, 1629 thp->th_dport, 1630 hash); 1631 } 1632 1633 srfp = &(srp->sr_flow[(hash >> 6) % SFXGE_MAX_FLOW]); 1634 tag = hash + 1; /* Make sure it's not zero */ 1635 1636 /* 1637 * If the flow we have found does not match the hash then 1638 * it may be an unused flow, or it may be stale. 1639 */ 1640 if (tag != srfp->srf_tag) { 1641 if (srfp->srf_count != 0) { 1642 if (now - srfp->srf_lbolt <= srp->sr_rto) 1643 goto reject; 1644 } 1645 1646 if (srfp->srf_mp != NULL) 1647 goto reject; 1648 1649 /* Start a new flow */ 1650 ASSERT(srfp->srf_next == NULL); 1651 1652 srfp->srf_tag = tag; 1653 1654 srfp->srf_saddr = iphp->ip_src.s_addr; 1655 srfp->srf_daddr = iphp->ip_dst.s_addr; 1656 srfp->srf_sport = thp->th_sport; 1657 srfp->srf_dport = thp->th_dport; 1658 srfp->srf_tci = ether_tci; 1659 1660 srfp->srf_count = 0; 1661 srfp->srf_seq = ntohl(thp->th_seq); 1662 1663 srfp->srf_lbolt = now; 1664 goto add; 1665 } 1666 1667 /* 1668 * If the flow we have found does match the hash then it could 1669 * still be an alias. 1670 */ 1671 if (srfp->srf_saddr != iphp->ip_src.s_addr || 1672 srfp->srf_daddr != iphp->ip_dst.s_addr) 1673 goto reject; 1674 1675 if (srfp->srf_sport != thp->th_sport || 1676 srfp->srf_dport != thp->th_dport) 1677 goto reject; 1678 1679 if (srfp->srf_tci != ether_tci) 1680 goto reject; 1681 1682 goto add; 1683 1684 reject: 1685 *(srp->sr_mpp) = mp; 1686 srp->sr_mpp = &(mp->b_next); 1687 1688 mp = next; 1689 } 1690 } 1691 1692 void 1693 sfxge_rx_qcomplete(sfxge_rxq_t *srp, boolean_t eop) 1694 { 1695 sfxge_t *sp = srp->sr_sp; 1696 unsigned int index = srp->sr_index; 1697 sfxge_evq_t *sep = sp->s_sep[index]; 1698 unsigned int completed; 1699 sfxge_rx_fpp_t *srfppp = &(srp->sr_fpp); 1700 unsigned int level; 1701 1702 ASSERT(mutex_owned(&(sep->se_lock))); 1703 1704 ASSERT(srp->sr_mp == NULL); 1705 ASSERT(srp->sr_mpp == &(srp->sr_mp)); 1706 1707 completed = srp->sr_completed; 1708 while (completed != srp->sr_pending) { 1709 unsigned int id; 1710 sfxge_rx_packet_t *srpp; 1711 mblk_t *mp; 1712 size_t size; 1713 uint16_t flags; 1714 int rc; 1715 1716 id = completed++ & (sp->s_rxq_size - 1); 1717 1718 if (srp->sr_pending - completed >= 4) { 1719 unsigned int prefetch; 1720 1721 prefetch = (id + 4) & (sp->s_rxq_size - 1); 1722 1723 srpp = srp->sr_srpp[prefetch]; 1724 ASSERT(srpp != NULL); 1725 1726 mp = srpp->srp_mp; 1727 prefetch_read_many(mp->b_datap); 1728 } else if (completed == srp->sr_pending) { 1729 prefetch_read_many(srp->sr_mp); 1730 } 1731 1732 srpp = srp->sr_srpp[id]; 1733 ASSERT(srpp != NULL); 1734 1735 srp->sr_srpp[id] = NULL; 1736 1737 mp = srpp->srp_mp; 1738 ASSERT(mp->b_cont == NULL); 1739 1740 /* when called from sfxge_rx_qstop() */ 1741 if (srp->sr_state != SFXGE_RXQ_STARTED) 1742 goto discard; 1743 1744 if (srpp->srp_flags & (EFX_ADDR_MISMATCH | EFX_DISCARD)) 1745 goto discard; 1746 1747 /* Make the data visible to the kernel */ 1748 rc = ddi_dma_sync(srpp->srp_dma_handle, 0, 1749 sp->s_rx_buffer_size, DDI_DMA_SYNC_FORKERNEL); 1750 ASSERT3P(rc, ==, DDI_SUCCESS); 1751 1752 /* Read the length from the psuedo header if required */ 1753 if (srpp->srp_flags & EFX_PKT_PREFIX_LEN) { 1754 rc = efx_psuedo_hdr_pkt_length_get(sp->s_enp, 1755 mp->b_rptr, 1756 &srpp->srp_size); 1757 ASSERT3P(rc, ==, 0); 1758 srpp->srp_size += sp->s_rx_prefix_size; 1759 } 1760 1761 /* Set up the packet length */ 1762 ASSERT3P(mp->b_rptr, ==, DB_BASE(mp)); 1763 mp->b_rptr += sp->s_rx_prefix_size; 1764 1765 prefetch_read_many(mp->b_rptr); 1766 1767 ASSERT3P(mp->b_wptr, ==, DB_BASE(mp)); 1768 mp->b_wptr += (size_t)(srpp->srp_size); 1769 ASSERT3P(mp->b_wptr, <=, DB_LIM(mp)); 1770 1771 /* Calculate the maximum packet size */ 1772 size = sp->s_mtu; 1773 size += (srpp->srp_flags & EFX_PKT_VLAN_TAGGED) ? 1774 sizeof (struct ether_vlan_header) : 1775 sizeof (struct ether_header); 1776 1777 if (MBLKL(mp) > size) 1778 goto discard; 1779 1780 /* Check for loopback packets */ 1781 if (!(srpp->srp_flags & EFX_PKT_IPV4) && 1782 !(srpp->srp_flags & EFX_PKT_IPV6)) { 1783 struct ether_header *etherhp; 1784 1785 /*LINTED*/ 1786 etherhp = (struct ether_header *)(mp->b_rptr); 1787 1788 if (etherhp->ether_type == 1789 htons(SFXGE_ETHERTYPE_LOOPBACK)) { 1790 DTRACE_PROBE(loopback); 1791 1792 srp->sr_loopback++; 1793 goto discard; 1794 } 1795 } 1796 1797 /* Set up the checksum information */ 1798 flags = 0; 1799 1800 if (srpp->srp_flags & EFX_CKSUM_IPV4) { 1801 ASSERT(srpp->srp_flags & EFX_PKT_IPV4); 1802 flags |= HCK_IPV4_HDRCKSUM; 1803 } 1804 1805 if (srpp->srp_flags & EFX_CKSUM_TCPUDP) { 1806 ASSERT(srpp->srp_flags & EFX_PKT_TCP || 1807 srpp->srp_flags & EFX_PKT_UDP); 1808 flags |= HCK_FULLCKSUM | HCK_FULLCKSUM_OK; 1809 } 1810 1811 DB_CKSUMSTART(mp) = 0; 1812 DB_CKSUMSTUFF(mp) = 0; 1813 DB_CKSUMEND(mp) = 0; 1814 DB_CKSUMFLAGS(mp) = flags; 1815 DB_CKSUM16(mp) = 0; 1816 1817 /* Add the packet to the tail of the chain */ 1818 srfppp->srfpp_loaned++; 1819 1820 ASSERT(mp->b_next == NULL); 1821 *(srp->sr_mpp) = mp; 1822 srp->sr_mpp = &(mp->b_next); 1823 1824 continue; 1825 1826 discard: 1827 /* Return the packet to the pool */ 1828 srfppp->srfpp_loaned++; 1829 freeb(mp); /* Equivalent to freemsg() as b_cont==0 */ 1830 } 1831 srp->sr_completed = completed; 1832 1833 /* Attempt to coalesce any TCP packets */ 1834 if (sp->s_rx_coalesce_mode != SFXGE_RX_COALESCE_OFF) 1835 sfxge_rx_qpacket_coalesce(srp); 1836 1837 /* 1838 * If there are any pending flows and this is the end of the 1839 * poll then they must be completed. 1840 */ 1841 if (srp->sr_srfp != NULL && eop) { 1842 sfxge_rx_flow_t *srfp; 1843 1844 srfp = srp->sr_srfp; 1845 1846 srp->sr_srfp = NULL; 1847 srp->sr_srfpp = &(srp->sr_srfp); 1848 1849 do { 1850 sfxge_rx_flow_t *next; 1851 1852 next = srfp->srf_next; 1853 srfp->srf_next = NULL; 1854 1855 sfxge_rx_qflow_complete(srp, srfp); 1856 1857 srfp = next; 1858 } while (srfp != NULL); 1859 } 1860 1861 level = srp->sr_pushed - srp->sr_completed; 1862 1863 /* If there are any packets then pass them up the stack */ 1864 if (srp->sr_mp != NULL) { 1865 mblk_t *mp; 1866 1867 mp = srp->sr_mp; 1868 1869 srp->sr_mp = NULL; 1870 srp->sr_mpp = &(srp->sr_mp); 1871 1872 if (level == 0) { 1873 /* Try to refill ASAP */ 1874 sfxge_rx_qrefill(srp, EFX_RXQ_LIMIT(sp->s_rxq_size)); 1875 level = srp->sr_pushed - srp->sr_completed; 1876 } 1877 1878 /* 1879 * If the RXQ is still empty, discard and recycle the 1880 * current entry to ensure that the ring always 1881 * contains at least one descriptor. This ensures that 1882 * the next hardware RX will trigger an event 1883 * (possibly delayed by interrupt moderation) and 1884 * trigger another refill/fill attempt. 1885 * 1886 * Note this drops a complete LRO fragment from the 1887 * start of the batch. 1888 * 1889 * Note also that copymsgchain() does not help with 1890 * resource starvation here, unless we are short of DMA 1891 * mappings. 1892 */ 1893 if (level == 0) { 1894 mblk_t *nmp; 1895 1896 srp->sr_kstat.srk_rxq_empty_discard++; 1897 DTRACE_PROBE1(rxq_empty_discard, int, index); 1898 nmp = mp->b_next; 1899 if (nmp) 1900 sfxge_gld_rx_post(sp, index, nmp); 1901 /* as level==0 will swizzle,rxpost below */ 1902 freemsg(mp); 1903 } else { 1904 sfxge_gld_rx_post(sp, index, mp); 1905 } 1906 } 1907 1908 /* Top up the queue if necessary */ 1909 if (level < srp->sr_hiwat) { 1910 sfxge_rx_qrefill(srp, EFX_RXQ_LIMIT(sp->s_rxq_size)); 1911 1912 level = srp->sr_added - srp->sr_completed; 1913 if (level < srp->sr_lowat) 1914 sfxge_rx_qfill(srp, EFX_RXQ_LIMIT(sp->s_rxq_size)); 1915 } 1916 } 1917 1918 void 1919 sfxge_rx_qflush_done(sfxge_rxq_t *srp) 1920 { 1921 sfxge_t *sp = srp->sr_sp; 1922 unsigned int index = srp->sr_index; 1923 sfxge_evq_t *sep = sp->s_sep[index]; 1924 boolean_t flush_pending; 1925 1926 ASSERT(mutex_owned(&(sep->se_lock))); 1927 1928 /* 1929 * Flush successful: wakeup sfxge_rx_qstop() if flush is pending. 1930 * 1931 * A delayed flush event received after RxQ stop has timed out 1932 * will be ignored, as then the flush state will not be PENDING 1933 * (see SFCbug22989). 1934 */ 1935 flush_pending = (srp->sr_flush == SFXGE_FLUSH_PENDING); 1936 srp->sr_flush = SFXGE_FLUSH_DONE; 1937 if (flush_pending) 1938 cv_broadcast(&(srp->sr_flush_kv)); 1939 } 1940 1941 void 1942 sfxge_rx_qflush_failed(sfxge_rxq_t *srp) 1943 { 1944 sfxge_t *sp = srp->sr_sp; 1945 unsigned int index = srp->sr_index; 1946 sfxge_evq_t *sep = sp->s_sep[index]; 1947 boolean_t flush_pending; 1948 1949 ASSERT(mutex_owned(&(sep->se_lock))); 1950 1951 /* 1952 * Flush failed: wakeup sfxge_rx_qstop() if flush is pending. 1953 * 1954 * A delayed flush event received after RxQ stop has timed out 1955 * will be ignored, as then the flush state will not be PENDING 1956 * (see SFCbug22989). 1957 */ 1958 flush_pending = (srp->sr_flush == SFXGE_FLUSH_PENDING); 1959 srp->sr_flush = SFXGE_FLUSH_FAILED; 1960 if (flush_pending) 1961 cv_broadcast(&(srp->sr_flush_kv)); 1962 } 1963 1964 static void 1965 sfxge_rx_qstop(sfxge_t *sp, unsigned int index) 1966 { 1967 dev_info_t *dip = sp->s_dip; 1968 sfxge_evq_t *sep = sp->s_sep[index]; 1969 sfxge_rxq_t *srp; 1970 clock_t timeout; 1971 unsigned int flush_tries = SFXGE_RX_QFLUSH_TRIES; 1972 int rc; 1973 1974 ASSERT(mutex_owned(&(sp->s_state_lock))); 1975 1976 mutex_enter(&(sep->se_lock)); 1977 1978 srp = sp->s_srp[index]; 1979 ASSERT3U(srp->sr_state, ==, SFXGE_RXQ_STARTED); 1980 1981 sfxge_rx_qpoll_stop(srp); 1982 1983 /* Further packets are discarded by sfxge_rx_qcomplete() */ 1984 srp->sr_state = SFXGE_RXQ_INITIALIZED; 1985 1986 if (sp->s_hw_err != SFXGE_HW_OK) { 1987 /* 1988 * Flag indicates possible hardware failure. 1989 * Attempt flush but do not wait for it to complete. 1990 */ 1991 srp->sr_flush = SFXGE_FLUSH_DONE; 1992 (void) efx_rx_qflush(srp->sr_erp); 1993 } 1994 1995 /* Wait upto 2sec for queue flushing to complete */ 1996 timeout = ddi_get_lbolt() + drv_usectohz(SFXGE_RX_QFLUSH_USEC); 1997 1998 while (srp->sr_flush != SFXGE_FLUSH_DONE && flush_tries-- > 0) { 1999 if ((rc = efx_rx_qflush(srp->sr_erp)) != 0) { 2000 if (rc == EALREADY) 2001 srp->sr_flush = SFXGE_FLUSH_DONE; 2002 else 2003 srp->sr_flush = SFXGE_FLUSH_FAILED; 2004 break; 2005 } 2006 srp->sr_flush = SFXGE_FLUSH_PENDING; 2007 if (cv_timedwait(&(srp->sr_flush_kv), &(sep->se_lock), 2008 timeout) < 0) { 2009 /* Timeout waiting for successful or failed flush */ 2010 dev_err(dip, CE_NOTE, 2011 SFXGE_CMN_ERR "rxq[%d] flush timeout", index); 2012 break; 2013 } 2014 } 2015 2016 if (srp->sr_flush == SFXGE_FLUSH_FAILED) 2017 dev_err(dip, CE_NOTE, 2018 SFXGE_CMN_ERR "rxq[%d] flush failed", index); 2019 2020 DTRACE_PROBE1(flush, sfxge_flush_state_t, srp->sr_flush); 2021 srp->sr_flush = SFXGE_FLUSH_DONE; 2022 2023 /* Destroy the receive queue */ 2024 efx_rx_qdestroy(srp->sr_erp); 2025 srp->sr_erp = NULL; 2026 2027 /* Clear entries from the buffer table */ 2028 sfxge_sram_buf_tbl_clear(sp, srp->sr_id, 2029 EFX_RXQ_NBUFS(sp->s_rxq_size)); 2030 2031 /* 2032 * Free any unused RX packets which had descriptors on the RXQ 2033 * Packets will be discard as state != STARTED 2034 */ 2035 srp->sr_pending = srp->sr_added; 2036 sfxge_rx_qcomplete(srp, B_TRUE); 2037 2038 ASSERT3U(srp->sr_completed, ==, srp->sr_pending); 2039 2040 srp->sr_added = 0; 2041 srp->sr_pushed = 0; 2042 srp->sr_pending = 0; 2043 srp->sr_completed = 0; 2044 srp->sr_loopback = 0; 2045 2046 srp->sr_lowat = 0; 2047 srp->sr_hiwat = 0; 2048 2049 mutex_exit(&(sep->se_lock)); 2050 } 2051 2052 static void 2053 sfxge_rx_kstat_fini(sfxge_rxq_t *srp) 2054 { 2055 kstat_delete(srp->sr_ksp); 2056 srp->sr_ksp = NULL; 2057 } 2058 2059 static void 2060 sfxge_rx_qfini(sfxge_t *sp, unsigned int index) 2061 { 2062 sfxge_rxq_t *srp = sp->s_srp[index]; 2063 2064 ASSERT3U(srp->sr_state, ==, SFXGE_RXQ_INITIALIZED); 2065 2066 sp->s_srp[index] = NULL; 2067 srp->sr_state = SFXGE_RXQ_UNINITIALIZED; 2068 2069 sfxge_rx_kstat_fini(srp); 2070 2071 /* Empty the pool */ 2072 sfxge_rx_qfpp_empty(srp); 2073 2074 srp->sr_index = 0; 2075 2076 kmem_cache_free(sp->s_rqc, srp); 2077 } 2078 2079 static int 2080 sfxge_rx_scale_kstat_update(kstat_t *ksp, int rw) 2081 { 2082 sfxge_t *sp = ksp->ks_private; 2083 sfxge_rx_scale_t *srsp = &(sp->s_rx_scale); 2084 sfxge_intr_t *sip = &(sp->s_intr); 2085 kstat_named_t *knp; 2086 unsigned int index; 2087 unsigned int entry; 2088 unsigned int *freq; 2089 int rc; 2090 2091 ASSERT(mutex_owned(&(srsp->srs_lock))); 2092 2093 if (rw != KSTAT_READ) { 2094 rc = EACCES; 2095 goto fail1; 2096 } 2097 2098 if ((freq = kmem_zalloc(sizeof (unsigned int) * sip->si_nalloc, 2099 KM_NOSLEEP)) == NULL) { 2100 rc = ENOMEM; 2101 goto fail2; 2102 } 2103 2104 for (entry = 0; entry < SFXGE_RX_SCALE_MAX; entry++) { 2105 index = srsp->srs_tbl[entry]; 2106 2107 freq[index]++; 2108 } 2109 2110 knp = ksp->ks_data; 2111 for (index = 0; index < sip->si_nalloc; index++) { 2112 knp->value.ui64 = freq[index]; 2113 knp++; 2114 } 2115 2116 knp->value.ui64 = srsp->srs_count; 2117 2118 kmem_free(freq, sizeof (unsigned int) * sip->si_nalloc); 2119 2120 return (0); 2121 2122 fail2: 2123 DTRACE_PROBE(fail2); 2124 fail1: 2125 DTRACE_PROBE1(fail1, int, rc); 2126 return (rc); 2127 } 2128 2129 static int 2130 sfxge_rx_scale_kstat_init(sfxge_t *sp) 2131 { 2132 dev_info_t *dip = sp->s_dip; 2133 sfxge_rx_scale_t *srsp = &(sp->s_rx_scale); 2134 sfxge_intr_t *sip = &(sp->s_intr); 2135 char name[MAXNAMELEN]; 2136 kstat_t *ksp; 2137 kstat_named_t *knp; 2138 unsigned int index; 2139 int rc; 2140 2141 /* Create the set */ 2142 (void) snprintf(name, MAXNAMELEN - 1, "%s_rss", ddi_driver_name(dip)); 2143 2144 if ((ksp = kstat_create((char *)ddi_driver_name(dip), 2145 ddi_get_instance(dip), name, "rss", KSTAT_TYPE_NAMED, 2146 sip->si_nalloc + 1, 0)) == NULL) { 2147 rc = ENOMEM; 2148 goto fail1; 2149 } 2150 2151 srsp->srs_ksp = ksp; 2152 2153 ksp->ks_update = sfxge_rx_scale_kstat_update; 2154 ksp->ks_private = sp; 2155 ksp->ks_lock = &(srsp->srs_lock); 2156 2157 /* Initialise the named stats */ 2158 knp = ksp->ks_data; 2159 for (index = 0; index < sip->si_nalloc; index++) { 2160 char name[MAXNAMELEN]; 2161 2162 (void) snprintf(name, MAXNAMELEN - 1, "evq%04d_count", index); 2163 kstat_named_init(knp, name, KSTAT_DATA_UINT64); 2164 knp++; 2165 } 2166 2167 kstat_named_init(knp, "scale", KSTAT_DATA_UINT64); 2168 2169 kstat_install(ksp); 2170 return (0); 2171 2172 fail1: 2173 DTRACE_PROBE1(fail1, int, rc); 2174 2175 return (rc); 2176 } 2177 2178 static void 2179 sfxge_rx_scale_kstat_fini(sfxge_t *sp) 2180 { 2181 sfxge_rx_scale_t *srsp = &(sp->s_rx_scale); 2182 2183 /* Destroy the set */ 2184 kstat_delete(srsp->srs_ksp); 2185 srsp->srs_ksp = NULL; 2186 } 2187 2188 2189 unsigned int 2190 sfxge_rx_scale_prop_get(sfxge_t *sp) 2191 { 2192 int rx_scale; 2193 2194 rx_scale = ddi_prop_get_int(DDI_DEV_T_ANY, sp->s_dip, 2195 DDI_PROP_DONTPASS, "rx_scale_count", SFXGE_RX_SCALE_MAX); 2196 /* 0 and all -ve numbers sets to number of logical CPUs */ 2197 if (rx_scale <= 0) 2198 rx_scale = ncpus; 2199 2200 return (rx_scale); 2201 } 2202 2203 2204 static int 2205 sfxge_rx_scale_init(sfxge_t *sp) 2206 { 2207 sfxge_rx_scale_t *srsp = &(sp->s_rx_scale); 2208 sfxge_intr_t *sip = &(sp->s_intr); 2209 int rc; 2210 2211 ASSERT3U(srsp->srs_state, ==, SFXGE_RX_SCALE_UNINITIALIZED); 2212 2213 /* Create tables for CPU, core, cache and chip counts */ 2214 srsp->srs_cpu = kmem_zalloc(sizeof (unsigned int) * NCPU, KM_SLEEP); 2215 2216 mutex_init(&(srsp->srs_lock), NULL, MUTEX_DRIVER, NULL); 2217 2218 /* We need at least one event queue */ 2219 srsp->srs_count = sfxge_rx_scale_prop_get(sp); 2220 if (srsp->srs_count > sip->si_nalloc) 2221 srsp->srs_count = sip->si_nalloc; 2222 if (srsp->srs_count < 1) 2223 srsp->srs_count = 1; 2224 2225 /* Set up the kstats */ 2226 if ((rc = sfxge_rx_scale_kstat_init(sp)) != 0) 2227 goto fail1; 2228 2229 srsp->srs_state = SFXGE_RX_SCALE_INITIALIZED; 2230 2231 return (0); 2232 2233 fail1: 2234 DTRACE_PROBE1(fail1, int, rc); 2235 mutex_destroy(&(srsp->srs_lock)); 2236 2237 return (rc); 2238 } 2239 2240 void 2241 sfxge_rx_scale_update(void *arg) 2242 { 2243 sfxge_t *sp = arg; 2244 sfxge_rx_scale_t *srsp = &(sp->s_rx_scale); 2245 sfxge_intr_t *sip; 2246 processorid_t id; 2247 unsigned int count; 2248 unsigned int *tbl; 2249 unsigned int *rating; 2250 unsigned int entry; 2251 int rc; 2252 2253 mutex_enter(&(srsp->srs_lock)); 2254 2255 if (srsp->srs_state != SFXGE_RX_SCALE_STARTED) { 2256 rc = EFAULT; 2257 goto fail1; 2258 } 2259 2260 if ((tbl = kmem_zalloc(sizeof (unsigned int) * SFXGE_RX_SCALE_MAX, 2261 KM_NOSLEEP)) == NULL) { 2262 rc = ENOMEM; 2263 goto fail2; 2264 } 2265 2266 sip = &(sp->s_intr); 2267 if ((rating = kmem_zalloc(sizeof (unsigned int) * sip->si_nalloc, 2268 KM_NOSLEEP)) == NULL) { 2269 rc = ENOMEM; 2270 goto fail3; 2271 } 2272 2273 mutex_enter(&cpu_lock); 2274 2275 /* 2276 * Substract any current CPU, core, cache and chip usage from the 2277 * global contention tables. 2278 */ 2279 for (id = 0; id < NCPU; id++) { 2280 ASSERT3U(sfxge_cpu[id], >=, srsp->srs_cpu[id]); 2281 sfxge_cpu[id] -= srsp->srs_cpu[id]; 2282 srsp->srs_cpu[id] = 0; 2283 } 2284 2285 ASSERT(srsp->srs_count != 0); 2286 2287 /* Choose as many event queues as we need */ 2288 for (count = 0; count < srsp->srs_count; count++) { 2289 unsigned int index; 2290 sfxge_evq_t *sep; 2291 unsigned int choice; 2292 unsigned int choice_rating; 2293 2294 bzero(rating, sizeof (unsigned int) * sip->si_nalloc); 2295 2296 /* 2297 * Rate each event queue on its global level of CPU 2298 * contention. 2299 */ 2300 for (index = 0; index < sip->si_nalloc; index++) { 2301 sep = sp->s_sep[index]; 2302 2303 id = sep->se_cpu_id; 2304 rating[index] += sfxge_cpu[id]; 2305 } 2306 2307 /* Choose the queue with the lowest CPU contention */ 2308 choice = 0; 2309 choice_rating = rating[0]; 2310 2311 for (index = 1; index < sip->si_nalloc; index++) { 2312 if (rating[index] < choice_rating) { 2313 choice = index; 2314 choice_rating = rating[index]; 2315 } 2316 } 2317 2318 /* Add our choice to the condensed RSS table */ 2319 tbl[count] = choice; 2320 2321 /* Add information to the global contention tables */ 2322 sep = sp->s_sep[choice]; 2323 2324 id = sep->se_cpu_id; 2325 srsp->srs_cpu[id]++; 2326 sfxge_cpu[id]++; 2327 } 2328 2329 mutex_exit(&cpu_lock); 2330 2331 /* Build the expanded RSS table */ 2332 count = 0; 2333 for (entry = 0; entry < SFXGE_RX_SCALE_MAX; entry++) { 2334 unsigned int index; 2335 2336 index = tbl[count]; 2337 count = (count + 1) % srsp->srs_count; 2338 2339 srsp->srs_tbl[entry] = index; 2340 } 2341 2342 /* Program the expanded RSS table into the hardware */ 2343 (void) efx_rx_scale_tbl_set(sp->s_enp, srsp->srs_tbl, 2344 SFXGE_RX_SCALE_MAX); 2345 2346 mutex_exit(&(srsp->srs_lock)); 2347 kmem_free(rating, sizeof (unsigned int) * sip->si_nalloc); 2348 kmem_free(tbl, sizeof (unsigned int) * SFXGE_RX_SCALE_MAX); 2349 return; 2350 2351 fail3: 2352 DTRACE_PROBE(fail3); 2353 kmem_free(tbl, sizeof (unsigned int) * SFXGE_RX_SCALE_MAX); 2354 fail2: 2355 DTRACE_PROBE(fail2); 2356 fail1: 2357 DTRACE_PROBE1(fail1, int, rc); 2358 2359 mutex_exit(&(srsp->srs_lock)); 2360 } 2361 2362 static int 2363 sfxge_rx_scale_start(sfxge_t *sp) 2364 { 2365 sfxge_rx_scale_t *srsp = &(sp->s_rx_scale); 2366 int rc; 2367 2368 mutex_enter(&(srsp->srs_lock)); 2369 2370 ASSERT3U(srsp->srs_state, ==, SFXGE_RX_SCALE_INITIALIZED); 2371 2372 /* Clear down the RSS table */ 2373 bzero(srsp->srs_tbl, sizeof (unsigned int) * SFXGE_RX_SCALE_MAX); 2374 2375 (void) efx_rx_scale_tbl_set(sp->s_enp, srsp->srs_tbl, 2376 SFXGE_RX_SCALE_MAX); 2377 2378 if ((rc = sfxge_toeplitz_hash_init(sp)) != 0) 2379 goto fail1; 2380 2381 srsp->srs_state = SFXGE_RX_SCALE_STARTED; 2382 2383 mutex_exit(&(srsp->srs_lock)); 2384 2385 /* sfxge_t->s_state_lock held */ 2386 (void) ddi_taskq_dispatch(sp->s_tqp, sfxge_rx_scale_update, sp, 2387 DDI_SLEEP); 2388 2389 return (0); 2390 2391 fail1: 2392 DTRACE_PROBE1(fail1, int, rc); 2393 2394 mutex_exit(&(srsp->srs_lock)); 2395 2396 return (rc); 2397 } 2398 2399 int 2400 sfxge_rx_scale_count_get(sfxge_t *sp, unsigned int *countp) 2401 { 2402 sfxge_rx_scale_t *srsp = &(sp->s_rx_scale); 2403 int rc; 2404 2405 mutex_enter(&(srsp->srs_lock)); 2406 2407 if (srsp->srs_state != SFXGE_RX_SCALE_INITIALIZED && 2408 srsp->srs_state != SFXGE_RX_SCALE_STARTED) { 2409 rc = ENOTSUP; 2410 goto fail1; 2411 } 2412 2413 *countp = srsp->srs_count; 2414 2415 mutex_exit(&(srsp->srs_lock)); 2416 2417 return (0); 2418 2419 fail1: 2420 DTRACE_PROBE1(fail1, int, rc); 2421 2422 mutex_exit(&(srsp->srs_lock)); 2423 2424 return (rc); 2425 } 2426 2427 int 2428 sfxge_rx_scale_count_set(sfxge_t *sp, unsigned int count) 2429 { 2430 sfxge_rx_scale_t *srsp = &(sp->s_rx_scale); 2431 sfxge_intr_t *sip = &(sp->s_intr); 2432 int dispatch = 1; 2433 int rc; 2434 2435 if (count < 1 || count > sip->si_nalloc) { 2436 rc = EINVAL; 2437 goto fail1; 2438 } 2439 2440 mutex_enter(&(srsp->srs_lock)); 2441 2442 if (srsp->srs_state != SFXGE_RX_SCALE_INITIALIZED && 2443 srsp->srs_state != SFXGE_RX_SCALE_STARTED) { 2444 rc = ENOTSUP; 2445 goto fail2; 2446 } 2447 2448 srsp->srs_count = count; 2449 2450 if (srsp->srs_state != SFXGE_RX_SCALE_STARTED) 2451 dispatch = 0; 2452 2453 mutex_exit(&(srsp->srs_lock)); 2454 2455 if (dispatch) 2456 /* no locks held */ 2457 (void) ddi_taskq_dispatch(sp->s_tqp, sfxge_rx_scale_update, sp, 2458 DDI_SLEEP); 2459 2460 return (0); 2461 2462 fail2: 2463 DTRACE_PROBE(fail2); 2464 2465 mutex_exit(&(srsp->srs_lock)); 2466 2467 fail1: 2468 DTRACE_PROBE1(fail1, int, rc); 2469 2470 return (rc); 2471 } 2472 2473 static void 2474 sfxge_rx_scale_stop(sfxge_t *sp) 2475 { 2476 sfxge_rx_scale_t *srsp = &(sp->s_rx_scale); 2477 processorid_t id; 2478 2479 mutex_enter(&(srsp->srs_lock)); 2480 2481 ASSERT3U(srsp->srs_state, ==, SFXGE_RX_SCALE_STARTED); 2482 2483 srsp->srs_state = SFXGE_RX_SCALE_INITIALIZED; 2484 2485 mutex_enter(&cpu_lock); 2486 2487 /* 2488 * Substract any current CPU, core, cache and chip usage from the 2489 * global contention tables. 2490 */ 2491 for (id = 0; id < NCPU; id++) { 2492 ASSERT3U(sfxge_cpu[id], >=, srsp->srs_cpu[id]); 2493 sfxge_cpu[id] -= srsp->srs_cpu[id]; 2494 srsp->srs_cpu[id] = 0; 2495 } 2496 2497 mutex_exit(&cpu_lock); 2498 2499 /* Clear down the RSS table */ 2500 bzero(srsp->srs_tbl, sizeof (unsigned int) * SFXGE_RX_SCALE_MAX); 2501 2502 (void) efx_rx_scale_tbl_set(sp->s_enp, srsp->srs_tbl, 2503 SFXGE_RX_SCALE_MAX); 2504 2505 mutex_exit(&(srsp->srs_lock)); 2506 } 2507 2508 static void 2509 sfxge_rx_scale_fini(sfxge_t *sp) 2510 { 2511 sfxge_rx_scale_t *srsp = &(sp->s_rx_scale); 2512 2513 ASSERT3U(srsp->srs_state, ==, SFXGE_RX_SCALE_INITIALIZED); 2514 2515 srsp->srs_state = SFXGE_RX_SCALE_UNINITIALIZED; 2516 2517 /* Tear down the kstats */ 2518 sfxge_rx_scale_kstat_fini(sp); 2519 2520 srsp->srs_count = 0; 2521 2522 mutex_destroy(&(srsp->srs_lock)); 2523 2524 /* Destroy tables */ 2525 kmem_free(srsp->srs_cpu, sizeof (unsigned int) * NCPU); 2526 srsp->srs_cpu = NULL; 2527 2528 sfxge_toeplitz_hash_fini(sp); 2529 } 2530 2531 int 2532 sfxge_rx_init(sfxge_t *sp) 2533 { 2534 sfxge_intr_t *sip = &(sp->s_intr); 2535 char name[MAXNAMELEN]; 2536 int index; 2537 int rc; 2538 2539 if (sip->si_state == SFXGE_INTR_UNINITIALIZED) { 2540 rc = EINVAL; 2541 goto fail1; 2542 } 2543 2544 if ((rc = sfxge_rx_scale_init(sp)) != 0) 2545 goto fail2; 2546 2547 (void) snprintf(name, MAXNAMELEN - 1, "%s%d_rx_packet_cache", 2548 ddi_driver_name(sp->s_dip), ddi_get_instance(sp->s_dip)); 2549 2550 sp->s_rpc = kmem_cache_create(name, sizeof (sfxge_rx_packet_t), 2551 SFXGE_CPU_CACHE_SIZE, sfxge_rx_packet_ctor, sfxge_rx_packet_dtor, 2552 NULL, sp, NULL, 0); 2553 ASSERT(sp->s_rpc != NULL); 2554 2555 (void) snprintf(name, MAXNAMELEN - 1, "%s%d_rxq_cache", 2556 ddi_driver_name(sp->s_dip), ddi_get_instance(sp->s_dip)); 2557 2558 sp->s_rqc = kmem_cache_create(name, sizeof (sfxge_rxq_t), 2559 SFXGE_CPU_CACHE_SIZE, sfxge_rx_qctor, sfxge_rx_qdtor, NULL, sp, 2560 NULL, 0); 2561 ASSERT(sp->s_rqc != NULL); 2562 2563 sp->s_rx_pkt_mem_max = ddi_prop_get_int64(DDI_DEV_T_ANY, sp->s_dip, 2564 DDI_PROP_DONTPASS, "rx_pkt_mem_max", 0); /* disabled */ 2565 2566 /* Initialize the receive queue(s) */ 2567 for (index = 0; index < sip->si_nalloc; index++) { 2568 if ((rc = sfxge_rx_qinit(sp, index)) != 0) 2569 goto fail3; 2570 } 2571 2572 sp->s_rx_coalesce_mode = ddi_prop_get_int(DDI_DEV_T_ANY, sp->s_dip, 2573 DDI_PROP_DONTPASS, "rx_coalesce_mode", SFXGE_RX_COALESCE_OFF); 2574 2575 return (0); 2576 2577 fail3: 2578 DTRACE_PROBE(fail3); 2579 2580 /* Tear down the receive queue(s) */ 2581 while (--index >= 0) 2582 sfxge_rx_qfini(sp, index); 2583 2584 kmem_cache_destroy(sp->s_rqc); 2585 sp->s_rqc = NULL; 2586 2587 kmem_cache_destroy(sp->s_rpc); 2588 sp->s_rpc = NULL; 2589 2590 sfxge_rx_scale_fini(sp); 2591 2592 fail2: 2593 DTRACE_PROBE(fail2); 2594 fail1: 2595 DTRACE_PROBE1(fail1, int, rc); 2596 2597 return (rc); 2598 } 2599 2600 int 2601 sfxge_rx_start(sfxge_t *sp) 2602 { 2603 sfxge_mac_t *smp = &(sp->s_mac); 2604 sfxge_intr_t *sip; 2605 const efx_nic_cfg_t *encp; 2606 size_t hdrlen, align; 2607 int index; 2608 int rc; 2609 2610 mutex_enter(&(smp->sm_lock)); 2611 2612 /* Calculate the receive packet buffer size and alignment */ 2613 sp->s_rx_buffer_size = EFX_MAC_PDU(sp->s_mtu); 2614 2615 encp = efx_nic_cfg_get(sp->s_enp); 2616 2617 /* Packet buffer allocations are cache line aligned */ 2618 EFSYS_ASSERT3U(encp->enc_rx_buf_align_start, <=, SFXGE_CPU_CACHE_SIZE); 2619 2620 if (sp->s_family == EFX_FAMILY_HUNTINGTON) { 2621 sp->s_rx_prefix_size = encp->enc_rx_prefix_size; 2622 2623 hdrlen = sp->s_rx_prefix_size + sizeof (struct ether_header); 2624 2625 /* Ensure IP headers are 32bit aligned */ 2626 sp->s_rx_buffer_align = P2ROUNDUP(hdrlen, 4) - hdrlen; 2627 sp->s_rx_buffer_size += sp->s_rx_buffer_align; 2628 2629 } else if (encp->enc_features & EFX_FEATURE_LFSR_HASH_INSERT) { 2630 sp->s_rx_prefix_size = encp->enc_rx_prefix_size; 2631 2632 /* 2633 * Place the start of the buffer a prefix length minus 2 2634 * before the start of a cache line. This ensures that the 2635 * last two bytes of the prefix (which is where the LFSR hash 2636 * is located) are in the same cache line as the headers, and 2637 * the IP header is 32-bit aligned. 2638 */ 2639 sp->s_rx_buffer_align = 2640 SFXGE_CPU_CACHE_SIZE - (encp->enc_rx_prefix_size - 2); 2641 sp->s_rx_buffer_size += sp->s_rx_buffer_align; 2642 } else { 2643 sp->s_rx_prefix_size = 0; 2644 2645 /* 2646 * Place the start of the buffer 2 bytes after a cache line 2647 * boundary so that the headers fit into the cache line and 2648 * the IP header is 32-bit aligned. 2649 */ 2650 hdrlen = sp->s_rx_prefix_size + sizeof (struct ether_header); 2651 2652 sp->s_rx_buffer_align = P2ROUNDUP(hdrlen, 4) - hdrlen; 2653 sp->s_rx_buffer_size += sp->s_rx_buffer_align; 2654 } 2655 2656 /* Align end of packet buffer for RX DMA end padding */ 2657 align = MAX(1, encp->enc_rx_buf_align_end); 2658 EFSYS_ASSERT(ISP2(align)); 2659 sp->s_rx_buffer_size = P2ROUNDUP(sp->s_rx_buffer_size, align); 2660 2661 /* Initialize the receive module */ 2662 if ((rc = efx_rx_init(sp->s_enp)) != 0) 2663 goto fail1; 2664 2665 mutex_exit(&(smp->sm_lock)); 2666 2667 if ((rc = sfxge_rx_scale_start(sp)) != 0) 2668 goto fail2; 2669 2670 /* Start the receive queue(s) */ 2671 sip = &(sp->s_intr); 2672 for (index = 0; index < sip->si_nalloc; index++) { 2673 if ((rc = sfxge_rx_qstart(sp, index)) != 0) 2674 goto fail3; 2675 } 2676 2677 ASSERT3U(sp->s_srp[0]->sr_state, ==, SFXGE_RXQ_STARTED); 2678 /* It is sufficient to have Rx scale initialized */ 2679 ASSERT3U(sp->s_rx_scale.srs_state, ==, SFXGE_RX_SCALE_STARTED); 2680 rc = efx_mac_filter_default_rxq_set(sp->s_enp, sp->s_srp[0]->sr_erp, 2681 sp->s_rx_scale.srs_count > 1); 2682 if (rc != 0) 2683 goto fail4; 2684 2685 return (0); 2686 2687 fail4: 2688 DTRACE_PROBE(fail4); 2689 2690 fail3: 2691 DTRACE_PROBE(fail3); 2692 2693 /* Stop the receive queue(s) */ 2694 while (--index >= 0) 2695 sfxge_rx_qstop(sp, index); 2696 2697 sfxge_rx_scale_stop(sp); 2698 2699 fail2: 2700 DTRACE_PROBE(fail2); 2701 2702 mutex_enter(&(smp->sm_lock)); 2703 2704 /* Tear down the receive module */ 2705 efx_rx_fini(sp->s_enp); 2706 2707 fail1: 2708 DTRACE_PROBE1(fail1, int, rc); 2709 2710 mutex_exit(&(smp->sm_lock)); 2711 2712 return (rc); 2713 } 2714 2715 void 2716 sfxge_rx_coalesce_mode_get(sfxge_t *sp, sfxge_rx_coalesce_mode_t *modep) 2717 { 2718 *modep = sp->s_rx_coalesce_mode; 2719 } 2720 2721 int 2722 sfxge_rx_coalesce_mode_set(sfxge_t *sp, sfxge_rx_coalesce_mode_t mode) 2723 { 2724 int rc; 2725 2726 switch (mode) { 2727 case SFXGE_RX_COALESCE_OFF: 2728 case SFXGE_RX_COALESCE_DISALLOW_PUSH: 2729 case SFXGE_RX_COALESCE_ALLOW_PUSH: 2730 break; 2731 2732 default: 2733 rc = EINVAL; 2734 goto fail1; 2735 } 2736 2737 sp->s_rx_coalesce_mode = mode; 2738 2739 return (0); 2740 2741 fail1: 2742 DTRACE_PROBE1(fail1, int, rc); 2743 2744 return (rc); 2745 } 2746 2747 void 2748 sfxge_rx_stop(sfxge_t *sp) 2749 { 2750 sfxge_mac_t *smp = &(sp->s_mac); 2751 sfxge_intr_t *sip = &(sp->s_intr); 2752 efx_nic_t *enp = sp->s_enp; 2753 int index; 2754 2755 ASSERT(mutex_owned(&(sp->s_state_lock))); 2756 2757 efx_mac_filter_default_rxq_clear(enp); 2758 2759 /* Stop the receive queue(s) */ 2760 index = sip->si_nalloc; 2761 while (--index >= 0) { 2762 /* TBD: Flush RXQs in parallel; HW has limit + may need retry */ 2763 sfxge_rx_qstop(sp, index); 2764 } 2765 2766 sfxge_rx_scale_stop(sp); 2767 2768 mutex_enter(&(smp->sm_lock)); 2769 2770 /* Tear down the receive module */ 2771 efx_rx_fini(enp); 2772 2773 sp->s_rx_buffer_align = 0; 2774 sp->s_rx_prefix_size = 0; 2775 sp->s_rx_buffer_size = 0; 2776 2777 mutex_exit(&(smp->sm_lock)); 2778 } 2779 2780 unsigned int 2781 sfxge_rx_loaned(sfxge_t *sp) 2782 { 2783 sfxge_intr_t *sip = &(sp->s_intr); 2784 int index; 2785 unsigned int loaned; 2786 2787 ASSERT3U(sip->si_state, ==, SFXGE_INTR_INITIALIZED); 2788 2789 loaned = 0; 2790 for (index = 0; index < sip->si_nalloc; index++) { 2791 sfxge_rxq_t *srp = sp->s_srp[index]; 2792 sfxge_evq_t *sep = sp->s_sep[srp->sr_index]; 2793 2794 mutex_enter(&(sep->se_lock)); 2795 2796 loaned += sfxge_rx_qfpp_swizzle(srp); 2797 2798 mutex_exit(&(sep->se_lock)); 2799 } 2800 2801 return (loaned); 2802 } 2803 2804 void 2805 sfxge_rx_fini(sfxge_t *sp) 2806 { 2807 sfxge_intr_t *sip = &(sp->s_intr); 2808 int index; 2809 2810 ASSERT3U(sip->si_state, ==, SFXGE_INTR_INITIALIZED); 2811 2812 sp->s_rx_coalesce_mode = SFXGE_RX_COALESCE_OFF; 2813 2814 /* Tear down the receive queue(s) */ 2815 index = sip->si_nalloc; 2816 while (--index >= 0) 2817 sfxge_rx_qfini(sp, index); 2818 2819 ASSERT3U(sp->s_rx_pkt_mem_alloc, ==, 0); 2820 2821 kmem_cache_destroy(sp->s_rqc); 2822 sp->s_rqc = NULL; 2823 2824 kmem_cache_destroy(sp->s_rpc); 2825 sp->s_rpc = NULL; 2826 2827 sfxge_rx_scale_fini(sp); 2828 }