1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2008-2013 Solarflare Communications Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #include <sys/types.h> 28 #include <sys/sysmacros.h> 29 #include <sys/ddi.h> 30 #include <sys/sunddi.h> 31 #include <sys/atomic.h> 32 #include <sys/stream.h> 33 #include <sys/strsun.h> 34 #include <sys/strsubr.h> 35 #include <sys/strft.h> 36 #include <sys/ksynch.h> 37 #include <sys/ethernet.h> 38 #include <sys/crc32.h> 39 #include <sys/pattr.h> 40 #include <sys/cpu.h> 41 42 #include <sys/ethernet.h> 43 #include <inet/ip.h> 44 45 #include <netinet/in.h> 46 #include <netinet/ip.h> 47 #include <netinet/tcp.h> 48 49 #include "sfxge.h" 50 51 #include "efx.h" 52 53 /* RXQ flush response timeout (in microseconds) */ 54 #define SFXGE_RX_QFLUSH_USEC (2000000) 55 56 /* RXQ default packet buffer preallocation (number of packet buffers) */ 57 #define SFXGE_RX_QPREALLOC (0) 58 59 /* Receive packet DMA attributes */ 60 static ddi_device_acc_attr_t sfxge_rx_packet_devacc = { 61 62 DDI_DEVICE_ATTR_V0, /* devacc_attr_version */ 63 DDI_NEVERSWAP_ACC, /* devacc_attr_endian_flags */ 64 DDI_STRICTORDER_ACC /* devacc_attr_dataorder */ 65 }; 66 67 static ddi_dma_attr_t sfxge_rx_packet_dma_attr = { 68 DMA_ATTR_V0, /* dma_attr_version */ 69 0, /* dma_attr_addr_lo */ 70 0xffffffffffffffffull, /* dma_attr_addr_hi */ 71 0xffffffffffffffffull, /* dma_attr_count_max */ 72 SFXGE_CPU_CACHE_SIZE, /* dma_attr_align */ 73 0xffffffff, /* dma_attr_burstsizes */ 74 1, /* dma_attr_minxfer */ 75 0xffffffffffffffffull, /* dma_attr_maxxfer */ 76 0xffffffffffffffffull, /* dma_attr_seg */ 77 1, /* dma_attr_sgllen */ 78 1, /* dma_attr_granular */ 79 0 /* dma_attr_flags */ 80 }; 81 82 /* Receive queue DMA attributes */ 83 static ddi_device_acc_attr_t sfxge_rxq_devacc = { 84 85 DDI_DEVICE_ATTR_V0, /* devacc_attr_version */ 86 DDI_NEVERSWAP_ACC, /* devacc_attr_endian_flags */ 87 DDI_STRICTORDER_ACC /* devacc_attr_dataorder */ 88 }; 89 90 static ddi_dma_attr_t sfxge_rxq_dma_attr = { 91 DMA_ATTR_V0, /* dma_attr_version */ 92 0, /* dma_attr_addr_lo */ 93 0xffffffffffffffffull, /* dma_attr_addr_hi */ 94 0xffffffffffffffffull, /* dma_attr_count_max */ 95 EFX_BUF_SIZE, /* dma_attr_align */ 96 0xffffffff, /* dma_attr_burstsizes */ 97 1, /* dma_attr_minxfer */ 98 0xffffffffffffffffull, /* dma_attr_maxxfer */ 99 0xffffffffffffffffull, /* dma_attr_seg */ 100 1, /* dma_attr_sgllen */ 101 1, /* dma_attr_granular */ 102 0 /* dma_attr_flags */ 103 }; 104 105 /* Forward declaration */ 106 static int 107 sfxge_rx_qpreallocate(sfxge_rxq_t *srp, int nprealloc); 108 109 static int 110 sfxge_rx_packet_ctor(void *buf, void *arg, int kmflags) 111 { 112 sfxge_rx_packet_t *srpp = buf; 113 sfxge_t *sp = arg; 114 dev_info_t *dip = sp->s_dip; 115 int err; 116 117 ASSERT3U(sizeof (srpp->__srp_u1.__srp_s1), <=, 118 sizeof (srpp->__srp_u1.__srp_pad)); 119 ASSERT3U(sizeof (srpp->__srp_u2.__srp_s2), <=, 120 sizeof (srpp->__srp_u2.__srp_pad)); 121 122 bzero(buf, sizeof (sfxge_rx_packet_t)); 123 124 /* Allocate a DMA handle */ 125 err = ddi_dma_alloc_handle(dip, &sfxge_rx_packet_dma_attr, 126 (kmflags == KM_SLEEP) ? DDI_DMA_SLEEP : DDI_DMA_DONTWAIT, 127 NULL, &(srpp->srp_dma_handle)); 128 if (err != DDI_SUCCESS) 129 goto fail1; 130 131 return (0); 132 133 fail1: 134 DTRACE_PROBE1(fail1, int, err); 135 136 SFXGE_OBJ_CHECK(srpp, sfxge_rx_packet_t); 137 138 return (-1); 139 } 140 141 static void 142 sfxge_rx_packet_dtor(void *buf, void *arg) 143 { 144 sfxge_rx_packet_t *srpp = buf; 145 146 _NOTE(ARGUNUSED(arg)) 147 148 /* Free the DMA handle */ 149 ddi_dma_free_handle(&(srpp->srp_dma_handle)); 150 srpp->srp_dma_handle = NULL; 151 152 SFXGE_OBJ_CHECK(srpp, sfxge_rx_packet_t); 153 } 154 155 static int 156 sfxge_rx_qctor(void *buf, void *arg, int kmflags) 157 { 158 sfxge_rxq_t *srp = buf; 159 efsys_mem_t *esmp = &(srp->sr_mem); 160 sfxge_t *sp = arg; 161 sfxge_dma_buffer_attr_t dma_attr; 162 sfxge_rx_fpp_t *srfppp; 163 int nprealloc; 164 unsigned int id; 165 int rc; 166 167 /* Compile-time structure layout checks */ 168 EFX_STATIC_ASSERT(sizeof (srp->__sr_u1.__sr_s1) <= 169 sizeof (srp->__sr_u1.__sr_pad)); 170 EFX_STATIC_ASSERT(sizeof (srp->__sr_u2.__sr_s2) <= 171 sizeof (srp->__sr_u2.__sr_pad)); 172 EFX_STATIC_ASSERT(sizeof (srp->__sr_u3.__sr_s3) <= 173 sizeof (srp->__sr_u3.__sr_pad)); 174 175 bzero(buf, sizeof (sfxge_rxq_t)); 176 177 srp->sr_sp = sp; 178 179 dma_attr.sdba_dip = sp->s_dip; 180 dma_attr.sdba_dattrp = &sfxge_rxq_dma_attr; 181 dma_attr.sdba_callback = DDI_DMA_SLEEP; 182 dma_attr.sdba_length = EFX_RXQ_SIZE(sp->s_rxq_size); 183 dma_attr.sdba_memflags = DDI_DMA_CONSISTENT; 184 dma_attr.sdba_devaccp = &sfxge_rxq_devacc; 185 dma_attr.sdba_bindflags = DDI_DMA_READ | DDI_DMA_CONSISTENT; 186 dma_attr.sdba_maxcookies = 1; 187 dma_attr.sdba_zeroinit = B_FALSE; 188 189 if ((rc = sfxge_dma_buffer_create(esmp, &dma_attr)) != 0) 190 goto fail1; 191 192 /* Allocate some buffer table entries */ 193 if ((rc = sfxge_sram_buf_tbl_alloc(sp, EFX_RXQ_NBUFS(sp->s_rxq_size), 194 &(srp->sr_id))) != 0) 195 goto fail2; 196 197 /* Allocate the context array */ 198 if ((srp->sr_srpp = kmem_zalloc(sizeof (sfxge_rx_packet_t *) * 199 sp->s_rxq_size, kmflags)) == NULL) { 200 rc = ENOMEM; 201 goto fail3; 202 } 203 204 /* Allocate the flow table */ 205 if ((srp->sr_flow = kmem_zalloc(sizeof (sfxge_rx_flow_t) * 206 SFXGE_MAX_FLOW, kmflags)) == NULL) { 207 rc = ENOMEM; 208 goto fail4; 209 } 210 211 srp->sr_srfpp = &(srp->sr_srfp); 212 srp->sr_rto = drv_usectohz(200000); 213 214 srp->sr_mpp = &(srp->sr_mp); 215 216 /* Initialize the free packet pool */ 217 srfppp = &(srp->sr_fpp); 218 if ((srfppp->srfpp_putp = kmem_zalloc(SFXGE_CPU_CACHE_SIZE * 219 SFXGE_RX_FPP_NSLOTS, kmflags)) == NULL) { 220 rc = ENOMEM; 221 goto fail5; 222 } 223 for (id = 0; id < SFXGE_RX_FPP_NSLOTS; id++) { 224 sfxge_rx_fpp_putlist_t *putp; 225 size_t off; 226 227 off = id * SFXGE_CPU_CACHE_SIZE; 228 putp = (void *)(srfppp->srfpp_putp + off); 229 230 putp->srfpl_putp = NULL; 231 putp->srfpl_putpp = &(putp->srfpl_putp); 232 mutex_init(&(putp->srfpl_lock), NULL, MUTEX_DRIVER, 233 DDI_INTR_PRI(sp->s_intr.si_intr_pri)); 234 } 235 236 cv_init(&(srp->sr_flush_kv), NULL, CV_DRIVER, NULL); 237 238 /* Preallocate some packets on the free packet pool */ 239 nprealloc = ddi_prop_get_int(DDI_DEV_T_ANY, sp->s_dip, 240 DDI_PROP_DONTPASS, "rx_prealloc_pkt_buffers", SFXGE_RX_QPREALLOC); 241 sfxge_rx_qpreallocate(srp, nprealloc); 242 243 244 return (0); 245 246 fail5: 247 DTRACE_PROBE(fail5); 248 249 srp->sr_mpp = NULL; 250 251 srp->sr_rto = 0; 252 srp->sr_srfpp = NULL; 253 254 /* Free the flow table */ 255 kmem_free(srp->sr_flow, sizeof (sfxge_rx_flow_t) * 256 SFXGE_MAX_FLOW); 257 srp->sr_flow = NULL; 258 259 fail4: 260 DTRACE_PROBE(fail4); 261 262 /* Free the context array */ 263 kmem_free(srp->sr_srpp, sizeof (sfxge_rx_packet_t *) * 264 sp->s_rxq_size); 265 srp->sr_srpp = NULL; 266 267 fail3: 268 DTRACE_PROBE(fail3); 269 270 /* Free the buffer table entries */ 271 sfxge_sram_buf_tbl_free(sp, srp->sr_id, 272 EFX_RXQ_NBUFS(sp->s_rxq_size)); 273 srp->sr_id = 0; 274 275 fail2: 276 DTRACE_PROBE(fail2); 277 /* Remove dma setup */ 278 sfxge_dma_buffer_destroy(esmp); 279 280 fail1: 281 DTRACE_PROBE1(fail1, int, rc); 282 283 srp->sr_sp = NULL; 284 285 SFXGE_OBJ_CHECK(srp, sfxge_rxq_t); 286 287 return (-1); 288 } 289 290 static void 291 sfxge_rx_qdtor(void *buf, void *arg) 292 { 293 sfxge_rxq_t *srp = buf; 294 efsys_mem_t *esmp = &(srp->sr_mem); 295 sfxge_t *sp = srp->sr_sp; 296 sfxge_rx_fpp_t *srfppp = &(srp->sr_fpp); 297 unsigned int id; 298 299 _NOTE(ARGUNUSED(arg)) 300 301 cv_destroy(&(srp->sr_flush_kv)); 302 303 /* Tear down the free packet pool */ 304 for (id = 0; id < SFXGE_RX_FPP_NSLOTS; id++) { 305 sfxge_rx_fpp_putlist_t *putp; 306 size_t off; 307 308 off = id * SFXGE_CPU_CACHE_SIZE; 309 putp = (void *)(srfppp->srfpp_putp + off); 310 311 putp->srfpl_putpp = NULL; 312 mutex_destroy(&(putp->srfpl_lock)); 313 314 SFXGE_OBJ_CHECK(putp, sfxge_rx_fpp_putlist_t); 315 } 316 kmem_free(srfppp->srfpp_putp, SFXGE_CPU_CACHE_SIZE * 317 SFXGE_RX_FPP_NSLOTS); 318 srfppp->srfpp_putp = NULL; 319 320 srp->sr_mpp = NULL; 321 322 srp->sr_rto = 0; 323 srp->sr_srfpp = NULL; 324 325 /* Free the flow table */ 326 kmem_free(srp->sr_flow, sizeof (sfxge_rx_flow_t) * 327 SFXGE_MAX_FLOW); 328 srp->sr_flow = NULL; 329 330 /* Free the context array */ 331 kmem_free(srp->sr_srpp, sizeof (sfxge_rx_packet_t *) * 332 sp->s_rxq_size); 333 srp->sr_srpp = NULL; 334 335 /* Free the buffer table entries */ 336 sfxge_sram_buf_tbl_free(sp, srp->sr_id, 337 EFX_RXQ_NBUFS(sp->s_rxq_size)); 338 srp->sr_id = 0; 339 340 /* Tear down dma setup */ 341 sfxge_dma_buffer_destroy(esmp); 342 343 SFXGE_OBJ_CHECK(srp, sfxge_rxq_t); 344 } 345 346 /* Note: This function takes ownership of *srpp. */ 347 static inline void 348 sfxge_rx_qfpp_put(sfxge_rxq_t *srp, sfxge_rx_packet_t *srpp) 349 { 350 sfxge_rx_fpp_t *srfppp = &(srp->sr_fpp); 351 mblk_t *mp = srpp->srp_mp; 352 unsigned int id; 353 size_t off; 354 sfxge_rx_fpp_putlist_t *putp; 355 356 ASSERT3P(mp->b_next, ==, NULL); 357 ASSERT3P(mp->b_prev, ==, NULL); 358 359 id = CPU->cpu_seqid & SFXGE_RX_FPP_MASK; 360 off = id * SFXGE_CPU_CACHE_SIZE; 361 362 ASSERT3P(srpp->srp_putp, ==, srfppp->srfpp_putp); 363 putp = (void *)(srpp->srp_putp + off); 364 365 mutex_enter(&(putp->srfpl_lock)); 366 putp->srfpl_count++; 367 *putp->srfpl_putpp = mp; 368 putp->srfpl_putpp = &(mp->b_next); 369 mutex_exit(&(putp->srfpl_lock)); 370 } 371 372 static unsigned int 373 sfxge_rx_qfpp_swizzle(sfxge_rxq_t *srp) 374 { 375 sfxge_t *sp = srp->sr_sp; 376 unsigned int index = srp->sr_index; 377 sfxge_evq_t *sep = sp->s_sep[index]; 378 sfxge_rx_fpp_t *srfppp = &(srp->sr_fpp); 379 unsigned int start; 380 unsigned int id; 381 mblk_t *p; 382 mblk_t **pp; 383 unsigned int count; 384 unsigned int loaned; 385 386 ASSERT(mutex_owned(&(sep->se_lock))); 387 388 /* We want to access the put list for the current CPU last */ 389 id = start = (CPU->cpu_seqid + 1) & SFXGE_RX_FPP_MASK; 390 391 do { 392 sfxge_rx_fpp_putlist_t *putp; 393 size_t off; 394 395 off = id * SFXGE_CPU_CACHE_SIZE; 396 id = (id + 1) & SFXGE_RX_FPP_MASK; 397 398 putp = (void *)(srfppp->srfpp_putp + off); 399 400 /* Acquire the put list */ 401 mutex_enter(&(putp->srfpl_lock)); 402 403 p = putp->srfpl_putp; 404 pp = putp->srfpl_putpp; 405 count = putp->srfpl_count; 406 407 putp->srfpl_putp = NULL; 408 putp->srfpl_putpp = &(putp->srfpl_putp); 409 putp->srfpl_count = 0; 410 411 mutex_exit(&(putp->srfpl_lock)); 412 413 if (p == NULL) 414 continue; 415 416 /* Add the list to the head of the get list */ 417 *pp = srfppp->srfpp_get; 418 srfppp->srfpp_get = p; 419 420 /* Adjust the counters */ 421 ASSERT3U(srfppp->srfpp_loaned, >=, count); 422 srfppp->srfpp_loaned -= count; 423 srfppp->srfpp_count += count; 424 425 #if 0 426 /* NOTE: this probe is disabled because it is expensive!! */ 427 DTRACE_PROBE2(count, 428 unsigned int, (id - 1) & SFXGE_RX_FPP_MASK, 429 unsigned int, count); 430 #endif 431 432 } while (id != start); 433 434 /* Return the number of packets yet to appear in the put list */ 435 loaned = srfppp->srfpp_loaned; 436 437 438 return (loaned); 439 } 440 441 442 #define DB_FRTNP(mp) ((mp)->b_datap->db_frtnp) 443 444 static void 445 sfxge_rx_qfpp_empty(sfxge_rxq_t *srp) 446 { 447 sfxge_t *sp = srp->sr_sp; 448 unsigned int index = srp->sr_index; 449 sfxge_evq_t *sep = sp->s_sep[index]; 450 sfxge_rx_fpp_t *srfppp; 451 mblk_t *mp; 452 453 mutex_enter(&(sep->se_lock)); 454 srfppp = &(srp->sr_fpp); 455 456 /* Swizzle put list to get list */ 457 (void) sfxge_rx_qfpp_swizzle(srp); 458 ASSERT3U(srfppp->srfpp_loaned, ==, 0); 459 460 mp = srfppp->srfpp_get; 461 srfppp->srfpp_get = NULL; 462 463 /* Free the remainder */ 464 while (mp != NULL) { 465 mblk_t *next; 466 frtn_t *freep; 467 sfxge_rx_packet_t *srpp; 468 469 next = mp->b_next; 470 mp->b_next = NULL; 471 472 ASSERT3U(srfppp->srfpp_count, >, 0); 473 srfppp->srfpp_count--; 474 475 freep = DB_FRTNP(mp); 476 /* 477 * ASSERT3P(freep->free_func, ==, sfxge_rx_qpacket_free); 478 * is implied by srpp test below 479 */ 480 /*LINTED*/ 481 srpp = (sfxge_rx_packet_t *)(freep->free_arg); 482 ASSERT3P(srpp->srp_mp, ==, mp); 483 ASSERT3P(mp->b_cont, ==, NULL); 484 srpp->srp_recycle = B_FALSE; 485 486 freeb(mp); 487 488 mp = next; 489 } 490 ASSERT3U(srfppp->srfpp_count, ==, 0); 491 492 srfppp->srfpp_min = 0; 493 494 mutex_exit(&(sep->se_lock)); 495 } 496 497 /* 498 * This is an estimate of all memory consumed per RX packet 499 * it can be inaccurate but but sp->s_rx_pkt_mem_alloc mustn't drift 500 */ 501 static uint64_t 502 sfxge_rx_pkt_mem_approx(const sfxge_rx_packet_t *srpp) 503 { 504 return (srpp->srp_mblksize + sizeof (mblk_t) + sizeof (dblk_t) + 505 sizeof (sfxge_rx_packet_t)); 506 } 507 508 static void 509 sfxge_rx_qpacket_destroy(sfxge_rxq_t *srp, sfxge_rx_packet_t *srpp) 510 { 511 sfxge_t *sp = srp->sr_sp; 512 int64_t delta = sfxge_rx_pkt_mem_approx(srpp); 513 514 ASSERT(!(srpp->srp_recycle)); 515 ASSERT3P(srpp->srp_mp, ==, NULL); 516 517 srpp->srp_off = 0; 518 srpp->srp_thp = NULL; 519 srpp->srp_iphp = NULL; 520 srpp->srp_etherhp = NULL; 521 srpp->srp_size = 0; 522 srpp->srp_flags = 0; 523 524 bzero(&(srpp->srp_free), sizeof (frtn_t)); 525 526 srpp->srp_mblksize = 0; 527 srpp->srp_base = NULL; 528 529 /* Unbind the DMA memory from the DMA handle */ 530 srpp->srp_addr = 0; 531 (void) ddi_dma_unbind_handle(srpp->srp_dma_handle); 532 533 /* Free the DMA memory */ 534 srpp->srp_base = NULL; 535 ddi_dma_mem_free(&(srpp->srp_acc_handle)); 536 srpp->srp_acc_handle = NULL; 537 538 srpp->srp_putp = NULL; 539 srpp->srp_srp = NULL; 540 541 kmem_cache_free(sp->s_rpc, srpp); 542 if (sp->s_rx_pkt_mem_max) 543 atomic_add_64(&sp->s_rx_pkt_mem_alloc, -delta); 544 } 545 546 #ifdef _USE_XESBALLOC 547 static void 548 sfxge_rx_qpacket_free(void *arg, mblk_t *mp, boolean_t *recyclep) 549 { 550 sfxge_rx_packet_t *srpp = arg; 551 sfxge_rxq_t *srp = srpp->srp_srp; 552 553 /* 554 * WARNING "man -s 9f esballoc" states: 555 * => runs async in a background context 556 * => must not sleep, or access data structures that could be freed 557 */ 558 ASSERT3P(DB_BASE(mp), ==, srpp->srp_base); 559 ASSERT3P(MBLKSIZE(mp), ==, srpp->srp_mblksize); 560 561 /* Check whether we want to recycle the receive packets */ 562 if (srpp->srp_recycle) { 563 ASSERT3P(DB_FRTNP(mp), ==, &(srpp->srp_free)); 564 565 srpp->srp_mp = mp; 566 567 /* NORMAL recycled case */ 568 sfxge_rx_qfpp_put(srp, srpp); 569 *recyclep = B_TRUE; 570 return; 571 } 572 573 srpp->srp_mp = NULL; 574 575 sfxge_rx_qpacket_destroy(srp, srpp); 576 *recyclep = B_FALSE; 577 } 578 #endif /* _USE_XESBALLOC */ 579 580 #ifdef _USE_DESBALLOC 581 static void 582 sfxge_rx_qpacket_free(void *arg) 583 { 584 sfxge_rx_packet_t *srpp = arg; 585 sfxge_rxq_t *srp = srpp->srp_srp; 586 587 /* 588 * WARNING "man -s 9f esballoc" states: 589 * => runs sync from the thread calling freeb() 590 * => must not sleep, or access data structures that could be freed 591 */ 592 593 /* Check whether we want to recycle the receive packets */ 594 if (srpp->srp_recycle) { 595 frtn_t *freep; 596 mblk_t *mp; 597 size_t size; 598 599 freep = &(srpp->srp_free); 600 ASSERT3P(freep->free_func, ==, sfxge_rx_qpacket_free); 601 ASSERT3P(freep->free_arg, ==, (caddr_t)srpp); 602 603 /* 604 * Allocate a matching mblk_t before the current one is 605 * freed. 606 */ 607 size = srpp->srp_mblksize; 608 609 if ((mp = desballoc(srpp->srp_base, size, BPRI_HI, 610 freep)) != NULL) { 611 srpp->srp_mp = mp; 612 613 /* NORMAL recycled case */ 614 sfxge_rx_qfpp_put(srp, srpp); 615 return; 616 } 617 } 618 619 srpp->srp_mp = NULL; 620 621 sfxge_rx_qpacket_destroy(srp, srpp); 622 } 623 #endif /* _USE_DESBALLOC */ 624 625 static sfxge_rx_packet_t * 626 sfxge_rx_qpacket_create(sfxge_rxq_t *srp) 627 { 628 sfxge_t *sp = srp->sr_sp; 629 sfxge_rx_fpp_t *srfppp = &(srp->sr_fpp); 630 sfxge_rx_packet_t *srpp; 631 size_t size; 632 caddr_t base; 633 size_t unit; 634 ddi_dma_cookie_t dmac; 635 unsigned int ncookies; 636 frtn_t *freep; 637 mblk_t *mp; 638 int err; 639 int rc; 640 641 size = sp->s_rx_buffer_size; 642 643 if (sp->s_rx_pkt_mem_max && 644 (sp->s_rx_pkt_mem_alloc + size >= sp->s_rx_pkt_mem_max)) { 645 DTRACE_PROBE(rx_pkt_mem_max); 646 srp->sr_kstat.srk_rx_pkt_mem_limit++; 647 return (NULL); 648 } 649 650 /* Allocate a new packet */ 651 if ((srpp = kmem_cache_alloc(sp->s_rpc, KM_NOSLEEP)) == NULL) { 652 srp->sr_kstat.srk_kcache_alloc_nomem++; 653 rc = ENOMEM; 654 goto fail1; 655 } 656 657 srpp->srp_srp = srp; 658 srpp->srp_putp = srfppp->srfpp_putp; 659 660 /* Allocate some DMA memory */ 661 err = ddi_dma_mem_alloc(srpp->srp_dma_handle, size, 662 &sfxge_rx_packet_devacc, DDI_DMA_STREAMING, DDI_DMA_DONTWAIT, 663 NULL, &base, &unit, &(srpp->srp_acc_handle)); 664 switch (err) { 665 case DDI_SUCCESS: 666 break; 667 668 case DDI_FAILURE: 669 srp->sr_kstat.srk_dma_alloc_nomem++; 670 rc = ENOMEM; 671 goto fail2; 672 673 default: 674 srp->sr_kstat.srk_dma_alloc_fail++; 675 rc = EFAULT; 676 goto fail2; 677 } 678 679 /* Adjust the buffer to align the start of the DMA area correctly */ 680 base += sp->s_rx_buffer_align; 681 size -= sp->s_rx_buffer_align; 682 683 /* Bind the DMA memory to the DMA handle */ 684 err = ddi_dma_addr_bind_handle(srpp->srp_dma_handle, NULL, 685 base, size, DDI_DMA_READ | DDI_DMA_STREAMING, 686 DDI_DMA_DONTWAIT, NULL, &dmac, &ncookies); 687 switch (err) { 688 case DDI_DMA_MAPPED: 689 break; 690 691 case DDI_DMA_INUSE: 692 srp->sr_kstat.srk_dma_bind_fail++; 693 rc = EEXIST; 694 goto fail3; 695 696 case DDI_DMA_NORESOURCES: 697 srp->sr_kstat.srk_dma_bind_nomem++; 698 rc = ENOMEM; 699 goto fail3; 700 701 case DDI_DMA_NOMAPPING: 702 srp->sr_kstat.srk_dma_bind_fail++; 703 rc = ENOTSUP; 704 goto fail3; 705 706 case DDI_DMA_TOOBIG: 707 srp->sr_kstat.srk_dma_bind_fail++; 708 rc = EFBIG; 709 goto fail3; 710 711 default: 712 srp->sr_kstat.srk_dma_bind_fail++; 713 rc = EFAULT; 714 goto fail3; 715 } 716 ASSERT3U(ncookies, ==, 1); 717 718 srpp->srp_addr = dmac.dmac_laddress; 719 720 srpp->srp_base = (unsigned char *)base; 721 srpp->srp_mblksize = size; 722 723 /* 724 * Allocate a STREAMS block: We use size 1 so that the allocator will 725 * use the first (and smallest) dblk cache. 726 */ 727 freep = &(srpp->srp_free); 728 freep->free_func = sfxge_rx_qpacket_free; 729 freep->free_arg = (caddr_t)srpp; 730 731 #ifdef _USE_XESBALLOC 732 if ((mp = xesballoc(srpp->srp_base, size, BPRI_HI, freep)) == NULL) { 733 srp->sr_kstat.srk_xesballoc_fail++; 734 rc = ENOMEM; 735 goto fail4; 736 } 737 #endif /* _USE_XESBALLOC */ 738 739 #ifdef _USE_DESBALLOC 740 if ((mp = desballoc(srpp->srp_base, size, BPRI_HI, freep)) == NULL) { 741 srp->sr_kstat.srk_desballoc_fail++; 742 rc = ENOMEM; 743 goto fail4; 744 } 745 #endif /* _USE_DESBALLOC */ 746 747 srpp->srp_mp = mp; 748 srpp->srp_recycle = B_TRUE; 749 750 if (sp->s_rx_pkt_mem_max) { 751 int64_t delta = sfxge_rx_pkt_mem_approx(srpp); 752 atomic_add_64(&sp->s_rx_pkt_mem_alloc, delta); 753 } 754 755 return (srpp); 756 757 fail4: 758 DTRACE_PROBE(fail4); 759 760 bzero(&(srpp->srp_free), sizeof (frtn_t)); 761 762 srpp->srp_mblksize = 0; 763 srpp->srp_base = NULL; 764 765 /* Unbind the DMA memory from the DMA handle */ 766 srpp->srp_addr = 0; 767 (void) ddi_dma_unbind_handle(srpp->srp_dma_handle); 768 769 fail3: 770 DTRACE_PROBE(fail3); 771 772 /* Free the DMA memory */ 773 ddi_dma_mem_free(&(srpp->srp_acc_handle)); 774 srpp->srp_acc_handle = NULL; 775 776 fail2: 777 DTRACE_PROBE(fail2); 778 779 srpp->srp_putp = NULL; 780 srpp->srp_srp = NULL; 781 782 kmem_cache_free(sp->s_rpc, srpp); 783 784 fail1: 785 DTRACE_PROBE1(fail1, int, rc); 786 787 return (NULL); 788 } 789 790 #define SFXGE_REFILL_BATCH 64 791 792 /* Try to refill the RX descriptor ring from the associated free pkt pool */ 793 static void 794 sfxge_rx_qrefill(sfxge_rxq_t *srp, unsigned int target) 795 { 796 sfxge_t *sp = srp->sr_sp; 797 sfxge_rx_fpp_t *srfppp = &(srp->sr_fpp); 798 unsigned int index = srp->sr_index; 799 sfxge_evq_t *sep = sp->s_sep[index]; 800 efsys_dma_addr_t addr[SFXGE_REFILL_BATCH]; 801 mblk_t *mp; 802 int ntodo; 803 unsigned int count; 804 unsigned int batch; 805 unsigned int rxfill; 806 unsigned int mblksize; 807 808 prefetch_read_many(sp->s_enp); 809 prefetch_read_many(srp->sr_erp); 810 811 ASSERT(mutex_owned(&(sep->se_lock))); 812 813 if (srp->sr_state != SFXGE_RXQ_STARTED) 814 return; 815 816 rxfill = srp->sr_added - srp->sr_completed; 817 ASSERT3U(rxfill, <=, EFX_RXQ_LIMIT(sp->s_rxq_size)); 818 ntodo = min(EFX_RXQ_LIMIT(sp->s_rxq_size) - rxfill, target); 819 ASSERT3U(ntodo, <=, EFX_RXQ_LIMIT(sp->s_rxq_size)); 820 821 if (ntodo == 0) 822 goto out; 823 824 (void) sfxge_rx_qfpp_swizzle(srp); 825 826 mp = srfppp->srfpp_get; 827 count = srfppp->srfpp_count; 828 mblksize = sp->s_rx_buffer_size - sp->s_rx_buffer_align; 829 830 batch = 0; 831 while (ntodo-- > 0) { 832 mblk_t *next; 833 frtn_t *freep; 834 sfxge_rx_packet_t *srpp; 835 unsigned int id; 836 837 if (mp == NULL) 838 break; 839 840 next = mp->b_next; 841 mp->b_next = NULL; 842 843 if (next != NULL) 844 prefetch_read_many(next); 845 846 freep = DB_FRTNP(mp); 847 /*LINTED*/ 848 srpp = (sfxge_rx_packet_t *)(freep->free_arg); 849 ASSERT3P(srpp->srp_mp, ==, mp); 850 851 /* The MTU may have changed since the packet was allocated */ 852 if (MBLKSIZE(mp) != mblksize) { 853 srpp->srp_recycle = B_FALSE; 854 855 freeb(mp); 856 857 --count; 858 mp = next; 859 continue; 860 } 861 862 srpp->srp_off = 0; 863 srpp->srp_thp = NULL; 864 srpp->srp_iphp = NULL; 865 srpp->srp_etherhp = NULL; 866 srpp->srp_size = 0; 867 srpp->srp_flags = EFX_DISCARD; 868 869 id = (srp->sr_added + batch) & (sp->s_rxq_size - 1); 870 ASSERT(srp->sr_srpp[id] == NULL); 871 srp->sr_srpp[id] = srpp; 872 873 addr[batch++] = srpp->srp_addr; 874 if (batch == SFXGE_REFILL_BATCH) { 875 efx_rx_qpost(srp->sr_erp, addr, mblksize, batch, 876 srp->sr_completed, srp->sr_added); 877 srp->sr_added += batch; 878 batch = 0; 879 } 880 881 --count; 882 mp = next; 883 } 884 885 srfppp->srfpp_get = mp; 886 srfppp->srfpp_count = count; 887 888 if (batch != 0) { 889 efx_rx_qpost(srp->sr_erp, addr, mblksize, batch, 890 srp->sr_completed, srp->sr_added); 891 srp->sr_added += batch; 892 } 893 894 /* Make the descriptors visible to the hardware */ 895 (void) ddi_dma_sync(srp->sr_mem.esm_dma_handle, 896 0, 897 EFX_RXQ_SIZE(sp->s_rxq_size), 898 DDI_DMA_SYNC_FORDEV); 899 900 efx_rx_qpush(srp->sr_erp, srp->sr_added); 901 902 out: 903 if (srfppp->srfpp_count < srfppp->srfpp_min) 904 srfppp->srfpp_min = srfppp->srfpp_count; 905 } 906 907 /* Preallocate packets and put them in the free packet pool */ 908 static int 909 sfxge_rx_qpreallocate(sfxge_rxq_t *srp, int nprealloc) 910 { 911 sfxge_rx_fpp_t *srfppp = &((srp)->sr_fpp); 912 srfppp->srfpp_lowat = nprealloc; 913 while (nprealloc-- > 0) { 914 sfxge_rx_packet_t *srpp; 915 916 if ((srpp = sfxge_rx_qpacket_create(srp)) == NULL) 917 break; 918 sfxge_rx_qfpp_put(srp, srpp); 919 } 920 return (0); 921 } 922 923 /* Try to refill the RX descriptor ring by allocating new packets */ 924 static void 925 sfxge_rx_qfill(sfxge_rxq_t *srp, unsigned int target) 926 { 927 sfxge_t *sp = srp->sr_sp; 928 unsigned int index = srp->sr_index; 929 sfxge_evq_t *sep = sp->s_sep[index]; 930 unsigned int batch; 931 unsigned int rxfill; 932 unsigned int mblksize; 933 int ntodo; 934 efsys_dma_addr_t addr[SFXGE_REFILL_BATCH]; 935 mblk_t *mp = NULL; 936 937 prefetch_read_many(sp->s_enp); 938 prefetch_read_many(srp->sr_erp); 939 940 ASSERT(mutex_owned(&(sep->se_lock))); 941 942 if (srp->sr_state != SFXGE_RXQ_STARTED) 943 return; 944 945 rxfill = srp->sr_added - srp->sr_completed; 946 ASSERT3U(rxfill, <=, EFX_RXQ_LIMIT(sp->s_rxq_size)); 947 ntodo = min(EFX_RXQ_LIMIT(sp->s_rxq_size) - rxfill, target); 948 ASSERT3U(ntodo, <=, EFX_RXQ_LIMIT(sp->s_rxq_size)); 949 950 if (ntodo == 0) 951 return; 952 953 mblksize = sp->s_rx_buffer_size - sp->s_rx_buffer_align; 954 955 batch = 0; 956 while (ntodo-- > 0) { 957 sfxge_rx_packet_t *srpp; 958 unsigned int id; 959 960 if ((srpp = sfxge_rx_qpacket_create(srp)) == NULL) 961 break; 962 963 mp = srpp->srp_mp; 964 965 ASSERT3U(MBLKSIZE(mp), ==, mblksize); 966 967 ASSERT3U(srpp->srp_off, ==, 0); 968 ASSERT3P(srpp->srp_thp, ==, NULL); 969 ASSERT3P(srpp->srp_iphp, ==, NULL); 970 ASSERT3P(srpp->srp_etherhp, ==, NULL); 971 ASSERT3U(srpp->srp_size, ==, 0); 972 973 srpp->srp_flags = EFX_DISCARD; 974 975 id = (srp->sr_added + batch) & (sp->s_rxq_size - 1); 976 ASSERT(srp->sr_srpp[id] == NULL); 977 srp->sr_srpp[id] = srpp; 978 979 addr[batch++] = srpp->srp_addr; 980 if (batch == SFXGE_REFILL_BATCH) { 981 efx_rx_qpost(srp->sr_erp, addr, mblksize, batch, 982 srp->sr_completed, srp->sr_added); 983 srp->sr_added += batch; 984 batch = 0; 985 } 986 } 987 988 if (batch != 0) { 989 efx_rx_qpost(srp->sr_erp, addr, mblksize, batch, 990 srp->sr_completed, srp->sr_added); 991 srp->sr_added += batch; 992 } 993 994 /* Make the descriptors visible to the hardware */ 995 (void) ddi_dma_sync(srp->sr_mem.esm_dma_handle, 996 0, 997 EFX_RXQ_SIZE(sp->s_rxq_size), 998 DDI_DMA_SYNC_FORDEV); 999 1000 efx_rx_qpush(srp->sr_erp, srp->sr_added); 1001 } 1002 1003 void 1004 sfxge_rx_qfpp_trim(sfxge_rxq_t *srp) 1005 { 1006 sfxge_rx_fpp_t *srfppp = &(srp->sr_fpp); 1007 sfxge_t *sp = srp->sr_sp; 1008 unsigned int index = srp->sr_index; 1009 sfxge_evq_t *sep = sp->s_sep[index]; 1010 mblk_t *p; 1011 mblk_t **pp; 1012 int count; 1013 1014 ASSERT(mutex_owned(&(sep->se_lock))); 1015 1016 if (srp->sr_state != SFXGE_RXQ_STARTED) 1017 goto done; 1018 1019 /* Make sure the queue is full */ 1020 sfxge_rx_qrefill(srp, EFX_RXQ_LIMIT(sp->s_rxq_size)); 1021 1022 /* The refill may have emptied the pool */ 1023 if (srfppp->srfpp_min == 0) 1024 goto done; 1025 1026 /* Don't trim below the pool's low water mark */ 1027 if (srfppp->srfpp_count <= srfppp->srfpp_lowat) 1028 goto done; 1029 1030 ASSERT(srfppp->srfpp_min <= srfppp->srfpp_count); 1031 1032 /* Trim to the largest of srfppp->srfpp_min and srfpp->srfpp_lowat */ 1033 if (srfppp->srfpp_lowat > srfppp->srfpp_min) 1034 count = srfppp->srfpp_count - srfppp->srfpp_lowat; 1035 else 1036 count = srfppp->srfpp_count - srfppp->srfpp_min; 1037 1038 /* Walk the get list */ 1039 pp = &(srfppp->srfpp_get); 1040 while (--count >= 0) { 1041 ASSERT(pp); 1042 p = *pp; 1043 ASSERT(p != NULL); 1044 1045 pp = &(p->b_next); 1046 } 1047 ASSERT(pp); 1048 p = *pp; 1049 1050 /* Truncate the get list */ 1051 *pp = NULL; 1052 1053 /* Free the remainder */ 1054 while (p != NULL) { 1055 mblk_t *next; 1056 frtn_t *freep; 1057 sfxge_rx_packet_t *srpp; 1058 1059 next = p->b_next; 1060 p->b_next = NULL; 1061 1062 ASSERT3U(srfppp->srfpp_min, >, 0); 1063 srfppp->srfpp_min--; 1064 srfppp->srfpp_count--; 1065 1066 freep = DB_FRTNP(p); 1067 /*LINTED*/ 1068 srpp = (sfxge_rx_packet_t *)(freep->free_arg); 1069 ASSERT3P(srpp->srp_mp, ==, p); 1070 1071 srpp->srp_recycle = B_FALSE; 1072 1073 freeb(p); 1074 1075 p = next; 1076 } 1077 1078 done: 1079 srfppp->srfpp_min = srfppp->srfpp_count; 1080 } 1081 1082 static void 1083 sfxge_rx_qpoll(void *arg) 1084 { 1085 sfxge_rxq_t *srp = arg; 1086 sfxge_t *sp = srp->sr_sp; 1087 unsigned int index = srp->sr_index; 1088 sfxge_evq_t *sep = sp->s_sep[index]; 1089 uint16_t magic; 1090 1091 /* 1092 * man timeout(9f) states that this code should adhere to the 1093 * same requirements as a softirq handler - DO NOT BLOCK 1094 */ 1095 1096 /* 1097 * Post an event to the event queue to cause the free packet pool to be 1098 * trimmed if it is oversize. 1099 */ 1100 magic = SFXGE_MAGIC_RX_QFPP_TRIM | index; 1101 1102 #if defined(DEBUG) 1103 /* This is guaranteed due to the start/stop order of rx and ev */ 1104 ASSERT3U(sep->se_state, ==, SFXGE_EVQ_STARTED); 1105 ASSERT3U(srp->sr_state, ==, SFXGE_RXQ_STARTED); 1106 #else 1107 /* 1108 * Bug22691 WORKAROUND: 1109 * This handler has been observed in the field to be invoked for a 1110 * queue in the INITIALIZED state, which should never happen. 1111 * Until the mechanism for this is properly understood, add defensive 1112 * checks. 1113 */ 1114 if ((sep->se_state != SFXGE_EVQ_STARTED) || 1115 (srp->sr_state != SFXGE_RXQ_STARTED) || 1116 (!sep->se_eep)) { 1117 cmn_err(CE_WARN, SFXGE_CMN_ERR 1118 "[%s%d] RXQ[%d] bad state in sfxge_rx_qpoll %d %d %p", 1119 ddi_driver_name(sp->s_dip), ddi_get_instance(sp->s_dip), 1120 index, sep->se_state, srp->sr_state, sep->se_eep); 1121 return; 1122 } 1123 #endif 1124 efx_ev_qpost(sep->se_eep, magic); 1125 1126 srp->sr_tid = timeout(sfxge_rx_qpoll, srp, 1127 drv_usectohz(sp->s_rxq_poll_usec)); 1128 } 1129 1130 static void 1131 sfxge_rx_qpoll_start(sfxge_rxq_t *srp) 1132 { 1133 sfxge_t *sp = srp->sr_sp; 1134 unsigned int index = srp->sr_index; 1135 sfxge_evq_t *sep = sp->s_sep[index]; 1136 1137 ASSERT(mutex_owned(&(sep->se_lock))); 1138 ASSERT3U(srp->sr_state, ==, SFXGE_RXQ_STARTED); 1139 1140 /* Schedule a poll */ 1141 ASSERT3P(srp->sr_tid, ==, 0); 1142 srp->sr_tid = timeout(sfxge_rx_qpoll, srp, 0); 1143 } 1144 1145 static void 1146 sfxge_rx_qpoll_stop(sfxge_rxq_t *srp) 1147 { 1148 sfxge_t *sp = srp->sr_sp; 1149 unsigned int index = srp->sr_index; 1150 sfxge_evq_t *sep = sp->s_sep[index]; 1151 timeout_id_t tid; 1152 1153 ASSERT(mutex_owned(&(sep->se_lock))); 1154 ASSERT3U(srp->sr_state, ==, SFXGE_RXQ_STARTED); 1155 1156 /* 1157 * Cancel the qpoll timer. Care is needed as this function 1158 * can race with sfxge_rx_qpoll() for timeout id updates. 1159 * 1160 * Do not hold locks used by any timeout(9f) handlers across 1161 * calls to untimeout(9f) as this will deadlock. 1162 */ 1163 tid = 0; 1164 while ((srp->sr_tid != 0) && (srp->sr_tid != tid)) { 1165 tid = srp->sr_tid; 1166 (void) untimeout(tid); 1167 } 1168 srp->sr_tid = 0; 1169 } 1170 1171 static int 1172 sfxge_rx_kstat_update(kstat_t *ksp, int rw) 1173 { 1174 sfxge_rxq_t *srp = ksp->ks_private; 1175 sfxge_t *sp = srp->sr_sp; 1176 unsigned int index = srp->sr_index; 1177 sfxge_evq_t *sep = sp->s_sep[index]; 1178 kstat_named_t *knp; 1179 int rc; 1180 1181 if (rw != KSTAT_READ) { 1182 rc = EACCES; 1183 goto fail1; 1184 } 1185 1186 ASSERT(mutex_owned(&(sep->se_lock))); 1187 if (srp->sr_state != SFXGE_RXQ_STARTED) 1188 goto done; 1189 1190 knp = ksp->ks_data; 1191 /* NB pointer post-increment below */ 1192 knp++->value.ui32 = srp->sr_kstat.srk_rx_pkt_mem_limit; 1193 knp++->value.ui32 = srp->sr_kstat.srk_kcache_alloc_nomem; 1194 knp++->value.ui32 = srp->sr_kstat.srk_dma_alloc_nomem; 1195 knp++->value.ui32 = srp->sr_kstat.srk_dma_alloc_fail; 1196 knp++->value.ui32 = srp->sr_kstat.srk_dma_bind_nomem; 1197 knp++->value.ui32 = srp->sr_kstat.srk_dma_bind_fail; 1198 #ifdef _USE_XESBALLOC 1199 knp++->value.ui32 = srp->sr_kstat.srk_xesballoc_fail; 1200 #endif 1201 #ifdef _USE_DESBALLOC 1202 knp++->value.ui32 = srp->sr_kstat.srk_desballoc_fail; 1203 #endif 1204 knp++->value.ui32 = srp->sr_kstat.srk_rxq_empty_discard; 1205 1206 done: 1207 return (0); 1208 1209 fail1: 1210 DTRACE_PROBE1(fail1, int, rc); 1211 1212 return (rc); 1213 } 1214 1215 static int 1216 sfxge_rx_kstat_init(sfxge_rxq_t *srp) 1217 { 1218 sfxge_t *sp = srp->sr_sp; 1219 unsigned int index = srp->sr_index; 1220 sfxge_evq_t *sep = sp->s_sep[index]; 1221 dev_info_t *dip = sp->s_dip; 1222 char name[MAXNAMELEN]; 1223 kstat_t *ksp; 1224 kstat_named_t *knp; 1225 int rc; 1226 1227 /* Create the set */ 1228 (void) snprintf(name, MAXNAMELEN - 1, "%s_rxq%04d", 1229 ddi_driver_name(dip), index); 1230 1231 if ((ksp = kstat_create((char *)ddi_driver_name(dip), 1232 ddi_get_instance(dip), name, "rxq", KSTAT_TYPE_NAMED, 1233 SFXGE_RX_NSTATS, 0)) == NULL) { 1234 rc = ENOMEM; 1235 goto fail1; 1236 } 1237 1238 srp->sr_ksp = ksp; 1239 1240 ksp->ks_update = sfxge_rx_kstat_update; 1241 ksp->ks_private = srp; 1242 ksp->ks_lock = &(sep->se_lock); 1243 1244 /* Initialise the named stats */ 1245 knp = ksp->ks_data; 1246 kstat_named_init(knp, "rx_pkt_mem_limit", KSTAT_DATA_UINT32); 1247 knp++; 1248 kstat_named_init(knp, "kcache_alloc_nomem", KSTAT_DATA_UINT32); 1249 knp++; 1250 kstat_named_init(knp, "dma_alloc_nomem", KSTAT_DATA_UINT32); 1251 knp++; 1252 kstat_named_init(knp, "dma_alloc_fail", KSTAT_DATA_UINT32); 1253 knp++; 1254 kstat_named_init(knp, "dma_bind_nomem", KSTAT_DATA_UINT32); 1255 knp++; 1256 kstat_named_init(knp, "dma_bind_fail", KSTAT_DATA_UINT32); 1257 knp++; 1258 #ifdef _USE_XESBALLOC 1259 kstat_named_init(knp, "xesballoc_fail", KSTAT_DATA_UINT32); 1260 #endif 1261 #ifdef _USE_DESBALLOC 1262 kstat_named_init(knp, "desballoc_fail", KSTAT_DATA_UINT32); 1263 #endif 1264 kstat_named_init(knp, "rxq_empty_discard", KSTAT_DATA_UINT32); 1265 1266 kstat_install(ksp); 1267 return (0); 1268 1269 fail1: 1270 DTRACE_PROBE1(fail1, int, rc); 1271 1272 return (rc); 1273 } 1274 1275 static int 1276 sfxge_rx_qinit(sfxge_t *sp, unsigned int index) 1277 { 1278 sfxge_rxq_t *srp; 1279 int rc; 1280 1281 ASSERT3U(index, <, SFXGE_RX_SCALE_MAX); 1282 1283 srp = kmem_cache_alloc(sp->s_rqc, KM_SLEEP); 1284 1285 ASSERT3U(srp->sr_state, ==, SFXGE_RXQ_UNINITIALIZED); 1286 1287 srp->sr_index = index; 1288 sp->s_srp[index] = srp; 1289 1290 if ((rc = sfxge_rx_kstat_init(srp)) != 0) 1291 goto fail1; 1292 1293 srp->sr_state = SFXGE_RXQ_INITIALIZED; 1294 1295 return (0); 1296 fail1: 1297 DTRACE_PROBE1(fail1, int, rc); 1298 kmem_cache_free(sp->s_rqc, srp); 1299 1300 return (rc); 1301 } 1302 1303 static int 1304 sfxge_rx_qstart(sfxge_t *sp, unsigned int index) 1305 { 1306 sfxge_evq_t *sep = sp->s_sep[index]; 1307 sfxge_rxq_t *srp; 1308 efsys_mem_t *esmp; 1309 efx_nic_t *enp; 1310 unsigned int level; 1311 int rc; 1312 1313 mutex_enter(&(sep->se_lock)); 1314 srp = sp->s_srp[index]; 1315 enp = sp->s_enp; 1316 esmp = &(srp->sr_mem); 1317 1318 ASSERT3U(srp->sr_state, ==, SFXGE_RXQ_INITIALIZED); 1319 ASSERT3U(sep->se_state, ==, SFXGE_EVQ_STARTED); 1320 1321 /* Zero the memory */ 1322 (void) memset(esmp->esm_base, 0, EFX_RXQ_SIZE(sp->s_rxq_size)); 1323 1324 /* Program the buffer table */ 1325 if ((rc = sfxge_sram_buf_tbl_set(sp, srp->sr_id, esmp, 1326 EFX_RXQ_NBUFS(sp->s_rxq_size))) != 0) 1327 goto fail1; 1328 1329 /* Create the receive queue */ 1330 if ((rc = efx_rx_qcreate(enp, index, index, EFX_RXQ_TYPE_DEFAULT, 1331 esmp, sp->s_rxq_size, srp->sr_id, sep->se_eep, &(srp->sr_erp))) 1332 != 0) 1333 goto fail2; 1334 1335 /* Enable the receive queue */ 1336 efx_rx_qenable(srp->sr_erp); 1337 1338 /* Set the water marks */ 1339 srp->sr_hiwat = EFX_RXQ_LIMIT(sp->s_rxq_size) * 9 / 10; 1340 srp->sr_lowat = srp->sr_hiwat / 2; 1341 1342 srp->sr_state = SFXGE_RXQ_STARTED; 1343 1344 sfxge_rx_qpoll_start(srp); 1345 1346 /* Try to fill the queue from the pool */ 1347 sfxge_rx_qrefill(srp, EFX_RXQ_LIMIT(sp->s_rxq_size)); 1348 1349 /* 1350 * If there were insufficient buffers in the pool to reach the at 1351 * least a batch then allocate some. 1352 */ 1353 level = srp->sr_added - srp->sr_completed; 1354 if (level < SFXGE_RX_BATCH) 1355 sfxge_rx_qfill(srp, SFXGE_RX_BATCH); 1356 1357 mutex_exit(&(sep->se_lock)); 1358 1359 return (0); 1360 1361 fail2: 1362 DTRACE_PROBE(fail2); 1363 1364 /* Clear entries from the buffer table */ 1365 sfxge_sram_buf_tbl_clear(sp, srp->sr_id, 1366 EFX_RXQ_NBUFS(sp->s_rxq_size)); 1367 1368 fail1: 1369 DTRACE_PROBE1(fail1, int, rc); 1370 1371 mutex_exit(&(sep->se_lock)); 1372 1373 return (rc); 1374 } 1375 1376 static void 1377 sfxge_rx_qflow_complete(sfxge_rxq_t *srp, sfxge_rx_flow_t *srfp) 1378 { 1379 mblk_t *mp; 1380 struct ether_header *etherhp; 1381 struct ip *iphp; 1382 struct tcphdr *thp; 1383 1384 if (srfp->srf_mp == NULL) 1385 return; 1386 1387 mp = srfp->srf_mp; 1388 etherhp = srfp->srf_etherhp; 1389 iphp = srfp->srf_iphp; 1390 thp = srfp->srf_last_thp; 1391 1392 ASSERT3U(((etherhp->ether_type == htons(ETHERTYPE_VLAN)) ? 1393 sizeof (struct ether_vlan_header) : 1394 sizeof (struct ether_header)) + 1395 srfp->srf_len, ==, msgdsize(mp)); 1396 1397 ASSERT3U(srfp->srf_len & 0xffff, ==, srfp->srf_len); 1398 iphp->ip_len = htons(srfp->srf_len); 1399 1400 srfp->srf_first_thp->th_ack = thp->th_ack; 1401 srfp->srf_first_thp->th_win = thp->th_win; 1402 srfp->srf_first_thp->th_flags = thp->th_flags; 1403 1404 DTRACE_PROBE2(flow_complete, uint32_t, srfp->srf_tag, 1405 size_t, srfp->srf_len); 1406 1407 srfp->srf_mp = NULL; 1408 srfp->srf_len = 0; 1409 1410 ASSERT(mp->b_next == NULL); 1411 *(srp->sr_mpp) = mp; 1412 srp->sr_mpp = &(mp->b_next); 1413 } 1414 1415 static boolean_t 1416 sfxge_rx_qflow_add(sfxge_rxq_t *srp, sfxge_rx_flow_t *srfp, 1417 sfxge_rx_packet_t *srpp, clock_t now) 1418 { 1419 sfxge_t *sp = srp->sr_sp; 1420 struct ether_header *etherhp = srpp->srp_etherhp; 1421 struct ip *iphp = srpp->srp_iphp; 1422 struct tcphdr *thp = srpp->srp_thp; 1423 size_t off = srpp->srp_off; 1424 size_t size = (size_t)(srpp->srp_size); 1425 mblk_t *mp = srpp->srp_mp; 1426 uint32_t seq; 1427 unsigned int shift; 1428 1429 ASSERT3U(MBLKL(mp), ==, off + size); 1430 ASSERT3U(DB_CKSUMFLAGS(mp), ==, 1431 HCK_FULLCKSUM | HCK_FULLCKSUM_OK | HCK_IPV4_HDRCKSUM); 1432 1433 seq = htonl(thp->th_seq); 1434 1435 /* 1436 * If the time between this segment and the last is greater than RTO 1437 * then consider this a new flow. 1438 */ 1439 if (now - srfp->srf_lbolt > srp->sr_rto) { 1440 srfp->srf_count = 1; 1441 srfp->srf_seq = seq + size; 1442 1443 goto fail1; 1444 } 1445 1446 if (seq != srfp->srf_seq) { 1447 if (srfp->srf_count > SFXGE_SLOW_START) 1448 srfp->srf_count = SFXGE_SLOW_START; 1449 1450 srfp->srf_count >>= 1; 1451 1452 srfp->srf_count++; 1453 srfp->srf_seq = seq + size; 1454 1455 goto fail2; 1456 } 1457 1458 /* Update the in-order segment count and sequence number */ 1459 srfp->srf_count++; 1460 srfp->srf_seq = seq + size; 1461 1462 /* Don't merge across pure ACK, URG, SYN or RST segments */ 1463 if (size == 0 || thp->th_flags & (TH_URG | TH_SYN | TH_RST) || 1464 thp->th_urp != 0) 1465 goto fail3; 1466 1467 /* 1468 * If the in-order segment count has not yet reached the slow-start 1469 * threshold then we cannot coalesce. 1470 */ 1471 if (srfp->srf_count < SFXGE_SLOW_START) 1472 goto fail4; 1473 1474 /* Scale up the packet size from 4k (the maximum being 64k) */ 1475 ASSERT3U(srfp->srf_count, >=, SFXGE_SLOW_START); 1476 shift = MIN(srfp->srf_count - SFXGE_SLOW_START + 12, 16); 1477 if (srfp->srf_len + size >= (1 << shift)) 1478 sfxge_rx_qflow_complete(srp, srfp); 1479 1480 ASSERT(mp->b_cont == NULL); 1481 1482 #ifdef _USE_GLD_V3_SOL10 1483 /* 1484 * The IP and UDP layers in Solaris 10 have slow paths for 1485 * handling mblks with more than 2 fragments. 1486 * UDP: see OpenSolaris CR 6305037 1487 * IP: see <http://www.mail-archive.com/networking-discuss@ 1488 * opensolaris.org/msg07366.html> 1489 */ 1490 if (srfp->srf_mp && srfp->srf_mp->b_cont) { 1491 sfxge_rx_qflow_complete(srp, srfp); 1492 } 1493 #endif 1494 1495 if (srfp->srf_mp == NULL) { 1496 /* First packet in this flow */ 1497 srfp->srf_etherhp = etherhp; 1498 srfp->srf_iphp = iphp; 1499 srfp->srf_first_thp = srfp->srf_last_thp = thp; 1500 1501 ASSERT3P(mp->b_cont, ==, NULL); 1502 srfp->srf_mp = mp; 1503 srfp->srf_mpp = &(mp->b_cont); 1504 1505 srfp->srf_len = ntohs(iphp->ip_len); 1506 1507 /* 1508 * If the flow is not already in the list of occupied flows then 1509 * add it. 1510 */ 1511 if (srfp->srf_next == NULL && 1512 srp->sr_srfpp != &(srfp->srf_next)) { 1513 *(srp->sr_srfpp) = srfp; 1514 srp->sr_srfpp = &(srfp->srf_next); 1515 } 1516 } else { 1517 /* Later packet in this flow - skip TCP header */ 1518 srfp->srf_last_thp = thp; 1519 1520 mp->b_rptr += off; 1521 ASSERT3U(MBLKL(mp), ==, size); 1522 1523 ASSERT3P(mp->b_cont, ==, NULL); 1524 *(srfp->srf_mpp) = mp; 1525 srfp->srf_mpp = &(mp->b_cont); 1526 1527 srfp->srf_len += size; 1528 1529 ASSERT(srfp->srf_next != NULL || 1530 srp->sr_srfpp == &(srfp->srf_next)); 1531 } 1532 1533 DTRACE_PROBE2(flow_add, uint32_t, srfp->srf_tag, size_t, size); 1534 1535 /* 1536 * Try to align coalesced segments on push boundaries, unless they 1537 * are too frequent. 1538 */ 1539 if (sp->s_rx_coalesce_mode == SFXGE_RX_COALESCE_ALLOW_PUSH && 1540 thp->th_flags & TH_PUSH) 1541 sfxge_rx_qflow_complete(srp, srfp); 1542 1543 srfp->srf_lbolt = now; 1544 return (B_TRUE); 1545 1546 fail4: 1547 fail3: 1548 fail2: 1549 fail1: 1550 sfxge_rx_qflow_complete(srp, srfp); 1551 1552 srfp->srf_lbolt = now; 1553 return (B_FALSE); 1554 } 1555 1556 void 1557 sfxge_rx_qpacket_coalesce(sfxge_rxq_t *srp) 1558 { 1559 sfxge_t *sp = srp->sr_sp; 1560 clock_t now; 1561 mblk_t *mp; 1562 sfxge_rx_flow_t *srfp; 1563 1564 ASSERT(sp->s_rx_coalesce_mode != SFXGE_RX_COALESCE_OFF); 1565 1566 now = ddi_get_lbolt(); 1567 1568 mp = srp->sr_mp; 1569 1570 srp->sr_mp = NULL; 1571 srp->sr_mpp = &(srp->sr_mp); 1572 1573 /* Start with the last flow to be appended to */ 1574 srfp = *(srp->sr_srfpp); 1575 1576 while (mp != NULL) { 1577 frtn_t *freep; 1578 sfxge_rx_packet_t *srpp; 1579 struct ether_header *etherhp; 1580 struct ip *iphp; 1581 struct tcphdr *thp; 1582 size_t off; 1583 size_t size; 1584 uint16_t ether_tci; 1585 uint16_t hash; 1586 uint32_t tag; 1587 mblk_t *next; 1588 1589 next = mp->b_next; 1590 mp->b_next = NULL; 1591 1592 if (next != NULL) 1593 prefetch_read_many(next); 1594 1595 freep = DB_FRTNP(mp); 1596 /*LINTED*/ 1597 srpp = (sfxge_rx_packet_t *)(freep->free_arg); 1598 ASSERT3P(srpp->srp_mp, ==, mp); 1599 1600 /* If the packet is not TCP then we cannot coalesce it */ 1601 if (~(srpp->srp_flags) & EFX_PKT_TCP) 1602 goto reject; 1603 1604 /* 1605 * If the packet is not fully checksummed then we cannot 1606 * coalesce it. 1607 */ 1608 if (~(srpp->srp_flags) & (EFX_CKSUM_TCPUDP | EFX_CKSUM_IPV4)) 1609 goto reject; 1610 1611 /* Parse the TCP header */ 1612 sfxge_tcp_parse(mp, ðerhp, &iphp, &thp, &off, 1613 &size); 1614 ASSERT(etherhp != NULL); 1615 ASSERT(iphp != NULL); 1616 ASSERT(thp != NULL); 1617 ASSERT(off != 0); 1618 1619 if ((iphp->ip_off & ~htons(IP_DF)) != 0) 1620 goto reject; 1621 1622 if (etherhp->ether_type == htons(ETHERTYPE_VLAN)) { 1623 struct ether_vlan_header *ethervhp; 1624 1625 ethervhp = (struct ether_vlan_header *)etherhp; 1626 ether_tci = ethervhp->ether_tci; 1627 } else { 1628 ether_tci = 0; 1629 } 1630 1631 /* 1632 * Make sure any minimum length padding is stripped 1633 * before we try to add the packet to a flow. 1634 */ 1635 ASSERT3U(sp->s_rx_prefix_size + MBLKL(mp), ==, 1636 (size_t)(srpp->srp_size)); 1637 ASSERT3U(sp->s_rx_prefix_size + off + size, <=, 1638 (size_t)(srpp->srp_size)); 1639 1640 if (sp->s_rx_prefix_size + off + size < 1641 (size_t)(srpp->srp_size)) 1642 mp->b_wptr = mp->b_rptr + off + size; 1643 1644 /* 1645 * If there is no current flow, or the segment does not match 1646 * the current flow then we must attempt to look up the 1647 * correct flow in the table. 1648 */ 1649 if (srfp == NULL) 1650 goto lookup; 1651 1652 if (srfp->srf_saddr != iphp->ip_src.s_addr || 1653 srfp->srf_daddr != iphp->ip_dst.s_addr) 1654 goto lookup; 1655 1656 if (srfp->srf_sport != thp->th_sport || 1657 srfp->srf_dport != thp->th_dport) 1658 goto lookup; 1659 1660 if (srfp->srf_tci != ether_tci) 1661 goto lookup; 1662 1663 add: 1664 ASSERT(srfp != NULL); 1665 1666 srpp->srp_etherhp = etherhp; 1667 srpp->srp_iphp = iphp; 1668 srpp->srp_thp = thp; 1669 srpp->srp_off = off; 1670 1671 ASSERT3U(size, <, (1 << 16)); 1672 srpp->srp_size = (uint16_t)size; 1673 1674 /* Try to append the packet to the flow */ 1675 if (!sfxge_rx_qflow_add(srp, srfp, srpp, now)) 1676 goto reject; 1677 1678 mp = next; 1679 continue; 1680 1681 lookup: 1682 /* 1683 * If there is a prefix area then read the hash from that, 1684 * otherwise calculate it. 1685 */ 1686 if (sp->s_rx_prefix_size != 0) { 1687 hash = EFX_RX_HASH_VALUE(EFX_RX_HASHALG_LFSR, 1688 DB_BASE(mp)); 1689 } else { 1690 SFXGE_TCP_HASH( 1691 ntohl(iphp->ip_src.s_addr), 1692 ntohs(thp->th_sport), 1693 ntohl(iphp->ip_dst.s_addr), 1694 ntohs(thp->th_dport), 1695 hash); 1696 } 1697 1698 srfp = &(srp->sr_flow[(hash >> 6) % SFXGE_MAX_FLOW]); 1699 tag = (uint32_t)hash + 1; /* Make sure it's not zero */ 1700 1701 /* 1702 * If the flow we have found does not match the hash then 1703 * it may be an unused flow, or it may be stale. 1704 */ 1705 if (tag != srfp->srf_tag) { 1706 if (srfp->srf_count != 0) { 1707 if (now - srfp->srf_lbolt <= srp->sr_rto) 1708 goto reject; 1709 } 1710 1711 if (srfp->srf_mp != NULL) 1712 goto reject; 1713 1714 /* Start a new flow */ 1715 ASSERT(srfp->srf_next == NULL); 1716 1717 srfp->srf_tag = tag; 1718 1719 srfp->srf_saddr = iphp->ip_src.s_addr; 1720 srfp->srf_daddr = iphp->ip_dst.s_addr; 1721 srfp->srf_sport = thp->th_sport; 1722 srfp->srf_dport = thp->th_dport; 1723 srfp->srf_tci = ether_tci; 1724 1725 srfp->srf_count = 0; 1726 srfp->srf_seq = ntohl(thp->th_seq); 1727 1728 srfp->srf_lbolt = now; 1729 goto add; 1730 } 1731 1732 /* 1733 * If the flow we have found does match the hash then it could 1734 * still be an alias. 1735 */ 1736 if (srfp->srf_saddr != iphp->ip_src.s_addr || 1737 srfp->srf_daddr != iphp->ip_dst.s_addr) 1738 goto reject; 1739 1740 if (srfp->srf_sport != thp->th_sport || 1741 srfp->srf_dport != thp->th_dport) 1742 goto reject; 1743 1744 if (srfp->srf_tci != ether_tci) 1745 goto reject; 1746 1747 goto add; 1748 1749 reject: 1750 *(srp->sr_mpp) = mp; 1751 srp->sr_mpp = &(mp->b_next); 1752 1753 mp = next; 1754 } 1755 } 1756 1757 void 1758 sfxge_rx_qcomplete(sfxge_rxq_t *srp, boolean_t eop) 1759 { 1760 sfxge_t *sp = srp->sr_sp; 1761 unsigned int index = srp->sr_index; 1762 sfxge_evq_t *sep = sp->s_sep[index]; 1763 unsigned int completed; 1764 sfxge_rx_fpp_t *srfppp = &(srp->sr_fpp); 1765 unsigned int level; 1766 1767 ASSERT(mutex_owned(&(sep->se_lock))); 1768 1769 ASSERT(srp->sr_mp == NULL); 1770 ASSERT(srp->sr_mpp == &(srp->sr_mp)); 1771 1772 completed = srp->sr_completed; 1773 while (completed != srp->sr_pending) { 1774 unsigned int id; 1775 sfxge_rx_packet_t *srpp; 1776 mblk_t *mp; 1777 size_t size; 1778 uint16_t flags; 1779 1780 id = completed++ & (sp->s_rxq_size - 1); 1781 1782 if (srp->sr_pending - completed >= 4) { 1783 unsigned int prefetch; 1784 1785 prefetch = (id + 4) & (sp->s_rxq_size - 1); 1786 1787 srpp = srp->sr_srpp[prefetch]; 1788 ASSERT(srpp != NULL); 1789 1790 mp = srpp->srp_mp; 1791 prefetch_read_many(mp->b_datap); 1792 } else if (completed == srp->sr_pending) { 1793 prefetch_read_many(srp->sr_mp); 1794 } 1795 1796 srpp = srp->sr_srpp[id]; 1797 ASSERT(srpp != NULL); 1798 1799 srp->sr_srpp[id] = NULL; 1800 1801 mp = srpp->srp_mp; 1802 ASSERT(mp->b_cont == NULL); 1803 1804 /* when called from sfxge_rx_qstop() */ 1805 if (srp->sr_state != SFXGE_RXQ_STARTED) 1806 goto discard; 1807 1808 if (srpp->srp_flags & (EFX_ADDR_MISMATCH | EFX_DISCARD)) 1809 goto discard; 1810 1811 /* Set up the packet length */ 1812 ASSERT3P(mp->b_rptr, ==, DB_BASE(mp)); 1813 mp->b_rptr += sp->s_rx_prefix_size; 1814 1815 prefetch_read_many(mp->b_rptr); 1816 1817 ASSERT3P(mp->b_wptr, ==, DB_BASE(mp)); 1818 mp->b_wptr += (size_t)(srpp->srp_size); 1819 ASSERT3P(mp->b_wptr, <=, DB_LIM(mp)); 1820 1821 /* Calculate the maximum packet size */ 1822 size = sp->s_mtu; 1823 size += (srpp->srp_flags & EFX_PKT_VLAN_TAGGED) ? 1824 sizeof (struct ether_vlan_header) : 1825 sizeof (struct ether_header); 1826 1827 if (MBLKL(mp) > size) 1828 goto discard; 1829 1830 /* Make the data visible to the kernel */ 1831 (void) ddi_dma_sync(srpp->srp_dma_handle, 0, 1832 (size_t)(srpp->srp_size), DDI_DMA_SYNC_FORKERNEL); 1833 1834 /* Check for loopback packets */ 1835 if (!(srpp->srp_flags & EFX_PKT_IPV4) && 1836 !(srpp->srp_flags & EFX_PKT_IPV6)) { 1837 struct ether_header *etherhp; 1838 1839 /*LINTED*/ 1840 etherhp = (struct ether_header *)(mp->b_rptr); 1841 1842 if (etherhp->ether_type == 1843 htons(SFXGE_ETHERTYPE_LOOPBACK)) { 1844 DTRACE_PROBE(loopback); 1845 1846 srp->sr_loopback++; 1847 goto discard; 1848 } 1849 } 1850 1851 /* Set up the checksum information */ 1852 flags = 0; 1853 1854 if (srpp->srp_flags & EFX_CKSUM_IPV4) { 1855 ASSERT(srpp->srp_flags & EFX_PKT_IPV4); 1856 flags |= HCK_IPV4_HDRCKSUM; 1857 } 1858 1859 if (srpp->srp_flags & EFX_CKSUM_TCPUDP) { 1860 ASSERT(srpp->srp_flags & EFX_PKT_TCP || 1861 srpp->srp_flags & EFX_PKT_UDP); 1862 flags |= HCK_FULLCKSUM | HCK_FULLCKSUM_OK; 1863 } 1864 1865 DB_CKSUMSTART(mp) = 0; 1866 DB_CKSUMSTUFF(mp) = 0; 1867 DB_CKSUMEND(mp) = 0; 1868 DB_CKSUMFLAGS(mp) = flags; 1869 DB_CKSUM16(mp) = 0; 1870 1871 /* Add the packet to the tail of the chain */ 1872 srfppp->srfpp_loaned++; 1873 1874 ASSERT(mp->b_next == NULL); 1875 *(srp->sr_mpp) = mp; 1876 srp->sr_mpp = &(mp->b_next); 1877 1878 continue; 1879 1880 discard: 1881 /* Return the packet to the pool */ 1882 srfppp->srfpp_loaned++; 1883 freeb(mp); /* Equivalent to freemsg() as b_cont==0 */ 1884 } 1885 srp->sr_completed = completed; 1886 1887 /* Attempt to coalesce any TCP packets */ 1888 if (sp->s_rx_coalesce_mode != SFXGE_RX_COALESCE_OFF) 1889 sfxge_rx_qpacket_coalesce(srp); 1890 1891 /* 1892 * If there are any pending flows and this is the end of the 1893 * poll then they must be completed. 1894 */ 1895 if (srp->sr_srfp != NULL && eop) { 1896 sfxge_rx_flow_t *srfp; 1897 1898 srfp = srp->sr_srfp; 1899 1900 srp->sr_srfp = NULL; 1901 srp->sr_srfpp = &(srp->sr_srfp); 1902 1903 do { 1904 sfxge_rx_flow_t *next; 1905 1906 next = srfp->srf_next; 1907 srfp->srf_next = NULL; 1908 1909 sfxge_rx_qflow_complete(srp, srfp); 1910 1911 srfp = next; 1912 } while (srfp != NULL); 1913 } 1914 1915 level = srp->sr_added - srp->sr_completed; 1916 1917 /* If there are any packets then pass them up the stack */ 1918 if (srp->sr_mp != NULL) { 1919 mblk_t *mp; 1920 1921 mp = srp->sr_mp; 1922 1923 srp->sr_mp = NULL; 1924 srp->sr_mpp = &(srp->sr_mp); 1925 1926 if (level == 0) { 1927 /* Try to refill ASAP */ 1928 sfxge_rx_qrefill(srp, EFX_RXQ_LIMIT(sp->s_rxq_size)); 1929 level = srp->sr_added - srp->sr_completed; 1930 } 1931 1932 /* 1933 * If the RXQ is still empty, discard and recycle the 1934 * current entry to ensure that the ring always 1935 * contains at least one descriptor. This ensures that 1936 * the next hardware RX will trigger an event 1937 * (possibly delayed by interrupt moderation) and 1938 * trigger another refill/fill attempt. 1939 * 1940 * Note this drops a complete LRO fragment from the 1941 * start of the batch. 1942 * 1943 * Note also that copymsgchain() does not help with 1944 * resource starvation here, unless we are short of DMA 1945 * mappings. 1946 */ 1947 if (level == 0) { 1948 mblk_t *nmp; 1949 1950 srp->sr_kstat.srk_rxq_empty_discard++; 1951 DTRACE_PROBE1(rxq_empty_discard, int, index); 1952 nmp = mp->b_next; 1953 if (nmp) 1954 sfxge_gld_rx_post(sp, index, nmp); 1955 /* as level==0 will swizzle,rxpost below */ 1956 freemsg(mp); 1957 } else { 1958 sfxge_gld_rx_post(sp, index, mp); 1959 } 1960 } 1961 1962 /* Top up the queue if necessary */ 1963 if (level < srp->sr_hiwat) { 1964 sfxge_rx_qrefill(srp, EFX_RXQ_LIMIT(sp->s_rxq_size)); 1965 1966 level = srp->sr_added - srp->sr_completed; 1967 if (level < srp->sr_lowat) 1968 sfxge_rx_qfill(srp, EFX_RXQ_LIMIT(sp->s_rxq_size)); 1969 } 1970 } 1971 1972 static unsigned int 1973 sfxge_rx_qloopback(sfxge_t *sp, unsigned int index) 1974 { 1975 sfxge_evq_t *sep = sp->s_sep[index]; 1976 sfxge_rxq_t *srp; 1977 unsigned int count; 1978 1979 mutex_enter(&(sep->se_lock)); 1980 srp = sp->s_srp[index]; 1981 count = srp->sr_loopback; 1982 srp->sr_loopback = 0; 1983 mutex_exit(&(sep->se_lock)); 1984 1985 return (count); 1986 } 1987 1988 void 1989 sfxge_rx_qflush_done(sfxge_rxq_t *srp) 1990 { 1991 sfxge_t *sp = srp->sr_sp; 1992 unsigned int index = srp->sr_index; 1993 sfxge_evq_t *sep = sp->s_sep[index]; 1994 1995 ASSERT(mutex_owned(&(sep->se_lock))); 1996 1997 /* SFCbug22989: events may be delayed. EVQs are stopped after RXQs */ 1998 if ((srp->sr_state != SFXGE_RXQ_INITIALIZED) || 1999 (srp->sr_flush == SFXGE_FLUSH_DONE)) 2000 return; 2001 2002 /* Flush successful: wakeup sfxge_rx_qstop() */ 2003 srp->sr_flush = SFXGE_FLUSH_DONE; 2004 cv_broadcast(&(srp->sr_flush_kv)); 2005 } 2006 2007 void 2008 sfxge_rx_qflush_failed(sfxge_rxq_t *srp) 2009 { 2010 sfxge_t *sp = srp->sr_sp; 2011 unsigned int index = srp->sr_index; 2012 sfxge_evq_t *sep = sp->s_sep[index]; 2013 2014 ASSERT(mutex_owned(&(sep->se_lock))); 2015 2016 /* SFCbug22989: events may be delayed. EVQs are stopped after RXQs */ 2017 if ((srp->sr_state != SFXGE_RXQ_INITIALIZED) || 2018 (srp->sr_flush == SFXGE_FLUSH_DONE)) 2019 return; 2020 2021 /* SFCbug22989: events may be delayed. EVQs are stopped after RXQs */ 2022 if (srp->sr_state != SFXGE_RXQ_STARTED) 2023 return; 2024 2025 /* Flush failed, so retry until timeout in sfxge_rx_qstop() */ 2026 srp->sr_flush = SFXGE_FLUSH_FAILED; 2027 efx_rx_qflush(srp->sr_erp); 2028 } 2029 2030 static void 2031 sfxge_rx_qstop(sfxge_t *sp, unsigned int index) 2032 { 2033 sfxge_evq_t *sep = sp->s_sep[index]; 2034 sfxge_rxq_t *srp; 2035 clock_t timeout; 2036 2037 mutex_enter(&(sep->se_lock)); 2038 2039 srp = sp->s_srp[index]; 2040 ASSERT3U(srp->sr_state, ==, SFXGE_RXQ_STARTED); 2041 2042 sfxge_rx_qpoll_stop(srp); 2043 2044 srp->sr_state = SFXGE_RXQ_INITIALIZED; 2045 2046 if (sp->s_hw_err == SFXGE_HW_OK) { 2047 /* Wait upto 2sec for queue flushing to complete */ 2048 srp->sr_flush = SFXGE_FLUSH_PENDING; 2049 efx_rx_qflush(srp->sr_erp); 2050 } else { 2051 /* Do not attempt flush if indication of H/W failure */ 2052 srp->sr_flush = SFXGE_FLUSH_DONE; 2053 } 2054 2055 timeout = ddi_get_lbolt() + drv_usectohz(SFXGE_RX_QFLUSH_USEC); 2056 2057 while (srp->sr_flush != SFXGE_FLUSH_DONE) { 2058 if (cv_timedwait(&(srp->sr_flush_kv), &(sep->se_lock), 2059 timeout) < 0) { 2060 /* Timeout waiting for successful flush */ 2061 dev_info_t *dip = sp->s_dip; 2062 2063 ddi_driver_name(sp->s_dip), 2064 cmn_err(CE_NOTE, 2065 SFXGE_CMN_ERR "[%s%d] rxq[%d] flush timeout", 2066 ddi_driver_name(dip), ddi_get_instance(dip), index); 2067 break; 2068 } 2069 } 2070 2071 DTRACE_PROBE1(flush, sfxge_flush_state_t, srp->sr_flush); 2072 srp->sr_flush = SFXGE_FLUSH_DONE; 2073 2074 /* Destroy the receive queue */ 2075 efx_rx_qdestroy(srp->sr_erp); 2076 srp->sr_erp = NULL; 2077 2078 /* Clear entries from the buffer table */ 2079 sfxge_sram_buf_tbl_clear(sp, srp->sr_id, 2080 EFX_RXQ_NBUFS(sp->s_rxq_size)); 2081 2082 /* 2083 * Free any unused RX packets which had descriptors on the RXQ 2084 * Packets will be discard as state != STARTED 2085 */ 2086 srp->sr_pending = srp->sr_added; 2087 sfxge_rx_qcomplete(srp, B_TRUE); 2088 2089 ASSERT3U(srp->sr_completed, ==, srp->sr_pending); 2090 2091 srp->sr_added = 0; 2092 srp->sr_pending = 0; 2093 srp->sr_completed = 0; 2094 srp->sr_loopback = 0; 2095 2096 srp->sr_lowat = 0; 2097 srp->sr_hiwat = 0; 2098 2099 mutex_exit(&(sep->se_lock)); 2100 } 2101 2102 static void 2103 sfxge_rx_kstat_fini(sfxge_rxq_t *srp) 2104 { 2105 kstat_delete(srp->sr_ksp); 2106 srp->sr_ksp = NULL; 2107 } 2108 2109 static void 2110 sfxge_rx_qfini(sfxge_t *sp, unsigned int index) 2111 { 2112 sfxge_rxq_t *srp = sp->s_srp[index]; 2113 2114 ASSERT3U(srp->sr_state, ==, SFXGE_RXQ_INITIALIZED); 2115 2116 sp->s_srp[index] = NULL; 2117 srp->sr_state = SFXGE_RXQ_UNINITIALIZED; 2118 2119 sfxge_rx_kstat_fini(srp); 2120 2121 /* Empty the pool */ 2122 sfxge_rx_qfpp_empty(srp); 2123 2124 srp->sr_index = 0; 2125 2126 kmem_cache_free(sp->s_rqc, srp); 2127 } 2128 2129 static int 2130 sfxge_rx_scale_kstat_update(kstat_t *ksp, int rw) 2131 { 2132 sfxge_t *sp = ksp->ks_private; 2133 sfxge_rx_scale_t *srsp = &(sp->s_rx_scale); 2134 sfxge_intr_t *sip = &(sp->s_intr); 2135 kstat_named_t *knp; 2136 unsigned int index; 2137 unsigned int entry; 2138 unsigned int *freq; 2139 int rc; 2140 2141 ASSERT(mutex_owned(&(srsp->srs_lock))); 2142 2143 if (rw != KSTAT_READ) { 2144 rc = EACCES; 2145 goto fail1; 2146 } 2147 2148 if ((freq = kmem_zalloc(sizeof (unsigned int) * sip->si_nalloc, 2149 KM_NOSLEEP)) == NULL) { 2150 rc = ENOMEM; 2151 goto fail2; 2152 } 2153 2154 for (index = 0; index < sip->si_nalloc; index++) 2155 freq[index] = 0; 2156 2157 for (entry = 0; entry < SFXGE_RX_SCALE_MAX; entry++) { 2158 index = srsp->srs_tbl[entry]; 2159 2160 freq[index]++; 2161 } 2162 2163 knp = ksp->ks_data; 2164 for (index = 0; index < sip->si_nalloc; index++) { 2165 knp->value.ui64 = freq[index]; 2166 knp++; 2167 } 2168 2169 knp->value.ui64 = srsp->srs_count; 2170 2171 kmem_free(freq, sizeof (unsigned int) * sip->si_nalloc); 2172 2173 return (0); 2174 2175 fail2: 2176 DTRACE_PROBE(fail2); 2177 fail1: 2178 DTRACE_PROBE1(fail1, int, rc); 2179 return (rc); 2180 } 2181 2182 static int 2183 sfxge_rx_scale_kstat_init(sfxge_t *sp) 2184 { 2185 dev_info_t *dip = sp->s_dip; 2186 sfxge_rx_scale_t *srsp = &(sp->s_rx_scale); 2187 sfxge_intr_t *sip = &(sp->s_intr); 2188 char name[MAXNAMELEN]; 2189 kstat_t *ksp; 2190 kstat_named_t *knp; 2191 unsigned int index; 2192 int rc; 2193 2194 /* Create the set */ 2195 (void) snprintf(name, MAXNAMELEN - 1, "%s_rss", ddi_driver_name(dip)); 2196 2197 if ((ksp = kstat_create((char *)ddi_driver_name(dip), 2198 ddi_get_instance(dip), name, "rss", KSTAT_TYPE_NAMED, 2199 sip->si_nalloc + 1, 0)) == NULL) { 2200 rc = ENOMEM; 2201 goto fail1; 2202 } 2203 2204 srsp->srs_ksp = ksp; 2205 2206 ksp->ks_update = sfxge_rx_scale_kstat_update; 2207 ksp->ks_private = sp; 2208 ksp->ks_lock = &(srsp->srs_lock); 2209 2210 /* Initialise the named stats */ 2211 knp = ksp->ks_data; 2212 for (index = 0; index < sip->si_nalloc; index++) { 2213 char name[MAXNAMELEN]; 2214 2215 (void) snprintf(name, MAXNAMELEN - 1, "evq%04d_count", index); 2216 kstat_named_init(knp, name, KSTAT_DATA_UINT64); 2217 knp++; 2218 } 2219 2220 kstat_named_init(knp, "scale", KSTAT_DATA_UINT64); 2221 2222 kstat_install(ksp); 2223 return (0); 2224 2225 fail1: 2226 DTRACE_PROBE1(fail1, int, rc); 2227 2228 return (rc); 2229 } 2230 2231 static void 2232 sfxge_rx_scale_kstat_fini(sfxge_t *sp) 2233 { 2234 sfxge_rx_scale_t *srsp = &(sp->s_rx_scale); 2235 2236 /* Destroy the set */ 2237 kstat_delete(srsp->srs_ksp); 2238 srsp->srs_ksp = NULL; 2239 } 2240 2241 2242 unsigned int 2243 sfxge_rx_scale_prop_get(sfxge_t *sp) 2244 { 2245 int rx_scale; 2246 2247 rx_scale = ddi_prop_get_int(DDI_DEV_T_ANY, sp->s_dip, 2248 DDI_PROP_DONTPASS, "rx_scale_count", 2249 SFXGE_RX_SCALE_MAX); 2250 /* 0 and all -ve numbers sets to number of logical CPUs */ 2251 if (rx_scale <= 0) 2252 rx_scale = ncpus; 2253 2254 return (rx_scale); 2255 } 2256 2257 2258 static int 2259 sfxge_rx_scale_init(sfxge_t *sp) 2260 { 2261 sfxge_rx_scale_t *srsp = &(sp->s_rx_scale); 2262 sfxge_intr_t *sip = &(sp->s_intr); 2263 int rc; 2264 2265 ASSERT3U(srsp->srs_state, ==, SFXGE_RX_SCALE_UNINITIALIZED); 2266 2267 /* Create tables for CPU, core, cache and chip counts */ 2268 srsp->srs_cpu = kmem_zalloc(sizeof (unsigned int) * NCPU, KM_SLEEP); 2269 #ifdef _USE_CPU_PHYSID 2270 srsp->srs_core = kmem_zalloc(sizeof (unsigned int) * NCPU, KM_SLEEP); 2271 srsp->srs_cache = kmem_zalloc(sizeof (unsigned int) * NCPU, KM_SLEEP); 2272 srsp->srs_chip = kmem_zalloc(sizeof (unsigned int) * NCPU, KM_SLEEP); 2273 #endif 2274 2275 mutex_init(&(srsp->srs_lock), NULL, MUTEX_DRIVER, NULL); 2276 2277 /* We need at least one event queue */ 2278 srsp->srs_count = sfxge_rx_scale_prop_get(sp); 2279 if (srsp->srs_count > sip->si_nalloc) 2280 srsp->srs_count = sip->si_nalloc; 2281 if (srsp->srs_count < 1) 2282 srsp->srs_count = 1; 2283 2284 /* Set up the kstats */ 2285 if ((rc = sfxge_rx_scale_kstat_init(sp)) != 0) 2286 goto fail1; 2287 2288 srsp->srs_state = SFXGE_RX_SCALE_INITIALIZED; 2289 2290 return (0); 2291 2292 fail1: 2293 DTRACE_PROBE1(fail1, int, rc); 2294 mutex_destroy(&(srsp->srs_lock)); 2295 2296 return (rc); 2297 } 2298 2299 void 2300 sfxge_rx_scale_update(void *arg) 2301 { 2302 sfxge_t *sp = arg; 2303 sfxge_rx_scale_t *srsp = &(sp->s_rx_scale); 2304 sfxge_intr_t *sip; 2305 processorid_t id; 2306 unsigned int count; 2307 unsigned int *tbl; 2308 unsigned int *rating; 2309 unsigned int entry; 2310 int rc; 2311 2312 mutex_enter(&(srsp->srs_lock)); 2313 2314 if (srsp->srs_state != SFXGE_RX_SCALE_STARTED) { 2315 rc = EFAULT; 2316 goto fail1; 2317 } 2318 2319 if ((tbl = kmem_zalloc(sizeof (unsigned int) * SFXGE_RX_SCALE_MAX, 2320 KM_NOSLEEP)) == NULL) { 2321 rc = ENOMEM; 2322 goto fail2; 2323 } 2324 2325 sip = &(sp->s_intr); 2326 if ((rating = kmem_zalloc(sizeof (unsigned int) * sip->si_nalloc, 2327 KM_NOSLEEP)) == NULL) { 2328 rc = ENOMEM; 2329 goto fail3; 2330 } 2331 2332 mutex_enter(&cpu_lock); 2333 2334 /* 2335 * Substract any current CPU, core, cache and chip usage from the 2336 * global contention tables. 2337 */ 2338 for (id = 0; id < NCPU; id++) { 2339 ASSERT3U(sfxge_cpu[id], >=, srsp->srs_cpu[id]); 2340 sfxge_cpu[id] -= srsp->srs_cpu[id]; 2341 srsp->srs_cpu[id] = 0; 2342 2343 #ifdef _USE_CPU_PHYSID 2344 ASSERT3U(sfxge_core[id], >=, srsp->srs_core[id]); 2345 sfxge_core[id] -= srsp->srs_core[id]; 2346 srsp->srs_core[id] = 0; 2347 2348 ASSERT3U(sfxge_cache[id], >=, srsp->srs_cache[id]); 2349 sfxge_cache[id] -= srsp->srs_cache[id]; 2350 srsp->srs_cache[id] = 0; 2351 2352 ASSERT3U(sfxge_chip[id], >=, srsp->srs_chip[id]); 2353 sfxge_chip[id] -= srsp->srs_chip[id]; 2354 srsp->srs_chip[id] = 0; 2355 #endif 2356 } 2357 2358 ASSERT(srsp->srs_count != 0); 2359 2360 /* Choose as many event queues as we need */ 2361 for (count = 0; count < srsp->srs_count; count++) { 2362 unsigned int index; 2363 sfxge_evq_t *sep; 2364 unsigned int choice; 2365 unsigned int choice_rating; 2366 2367 bzero(rating, sizeof (unsigned int) * sip->si_nalloc); 2368 2369 /* 2370 * Rate each event queue on its global level of CPU 2371 * contention. 2372 */ 2373 for (index = 0; index < sip->si_nalloc; index++) { 2374 sep = sp->s_sep[index]; 2375 2376 id = sep->se_cpu_id; 2377 rating[index] += sfxge_cpu[id]; 2378 2379 #ifdef _USE_CPU_PHYSID 2380 id = sep->se_core_id; 2381 rating[index] += sfxge_core[id]; 2382 2383 id = sep->se_cache_id; 2384 rating[index] += sfxge_cache[id]; 2385 2386 id = sep->se_chip_id; 2387 rating[index] += sfxge_chip[id]; 2388 #endif 2389 } 2390 2391 /* Choose the queue with the lowest CPU contention */ 2392 choice = 0; 2393 choice_rating = rating[0]; 2394 2395 for (index = 1; index < sip->si_nalloc; index++) { 2396 if (rating[index] < choice_rating) { 2397 choice = index; 2398 choice_rating = rating[index]; 2399 } 2400 } 2401 2402 /* Add our choice to the condensed RSS table */ 2403 tbl[count] = choice; 2404 2405 /* Add information to the global contention tables */ 2406 sep = sp->s_sep[choice]; 2407 2408 id = sep->se_cpu_id; 2409 srsp->srs_cpu[id]++; 2410 sfxge_cpu[id]++; 2411 2412 #ifdef _USE_CPU_PHYSID 2413 id = sep->se_core_id; 2414 srsp->srs_core[id]++; 2415 sfxge_core[id]++; 2416 2417 id = sep->se_cache_id; 2418 srsp->srs_cache[id]++; 2419 sfxge_cache[id]++; 2420 2421 id = sep->se_chip_id; 2422 srsp->srs_chip[id]++; 2423 sfxge_chip[id]++; 2424 #endif 2425 } 2426 2427 mutex_exit(&cpu_lock); 2428 2429 /* Build the expanded RSS table */ 2430 count = 0; 2431 for (entry = 0; entry < SFXGE_RX_SCALE_MAX; entry++) { 2432 unsigned int index; 2433 2434 index = tbl[count]; 2435 count = (count + 1) % srsp->srs_count; 2436 2437 srsp->srs_tbl[entry] = index; 2438 } 2439 2440 /* Program the expanded RSS table into the hardware */ 2441 (void) efx_rx_scale_tbl_set(sp->s_enp, srsp->srs_tbl, 2442 SFXGE_RX_SCALE_MAX); 2443 2444 mutex_exit(&(srsp->srs_lock)); 2445 kmem_free(rating, sizeof (unsigned int) * sip->si_nalloc); 2446 kmem_free(tbl, sizeof (unsigned int) * SFXGE_RX_SCALE_MAX); 2447 return; 2448 2449 fail3: 2450 DTRACE_PROBE(fail3); 2451 kmem_free(tbl, sizeof (unsigned int) * SFXGE_RX_SCALE_MAX); 2452 fail2: 2453 DTRACE_PROBE(fail2); 2454 fail1: 2455 DTRACE_PROBE1(fail1, int, rc); 2456 2457 mutex_exit(&(srsp->srs_lock)); 2458 } 2459 2460 static int 2461 sfxge_rx_scale_start(sfxge_t *sp) 2462 { 2463 sfxge_rx_scale_t *srsp = &(sp->s_rx_scale); 2464 const efx_nic_cfg_t *encp; 2465 int rc; 2466 2467 mutex_enter(&(srsp->srs_lock)); 2468 2469 ASSERT3U(srsp->srs_state, ==, SFXGE_RX_SCALE_INITIALIZED); 2470 2471 /* Clear down the RSS table */ 2472 bzero(srsp->srs_tbl, sizeof (unsigned int) * SFXGE_RX_SCALE_MAX); 2473 2474 (void) efx_rx_scale_tbl_set(sp->s_enp, srsp->srs_tbl, 2475 SFXGE_RX_SCALE_MAX); 2476 2477 /* Make sure the LFSR hash is selected */ 2478 encp = efx_nic_cfg_get(sp->s_enp); 2479 if ((rc = efx_rx_scale_mode_set(sp->s_enp, EFX_RX_HASHALG_LFSR, 0, 2480 (encp->enc_features & EFX_FEATURE_LFSR_HASH_INSERT))) != 0) 2481 goto fail1; 2482 2483 srsp->srs_state = SFXGE_RX_SCALE_STARTED; 2484 2485 mutex_exit(&(srsp->srs_lock)); 2486 2487 /* sfxge_t->s_state_lock held */ 2488 (void) ddi_taskq_dispatch(sp->s_tqp, sfxge_rx_scale_update, sp, 2489 DDI_SLEEP); 2490 2491 return (0); 2492 2493 fail1: 2494 DTRACE_PROBE1(fail1, int, rc); 2495 2496 mutex_exit(&(srsp->srs_lock)); 2497 2498 return (rc); 2499 } 2500 2501 int 2502 sfxge_rx_scale_count_get(sfxge_t *sp, unsigned int *countp) 2503 { 2504 sfxge_rx_scale_t *srsp = &(sp->s_rx_scale); 2505 int rc; 2506 2507 mutex_enter(&(srsp->srs_lock)); 2508 2509 if (srsp->srs_state != SFXGE_RX_SCALE_INITIALIZED && 2510 srsp->srs_state != SFXGE_RX_SCALE_STARTED) { 2511 rc = ENOTSUP; 2512 goto fail1; 2513 } 2514 2515 *countp = srsp->srs_count; 2516 2517 mutex_exit(&(srsp->srs_lock)); 2518 2519 return (0); 2520 2521 fail1: 2522 DTRACE_PROBE1(fail1, int, rc); 2523 2524 mutex_exit(&(srsp->srs_lock)); 2525 2526 return (rc); 2527 } 2528 2529 int 2530 sfxge_rx_scale_count_set(sfxge_t *sp, unsigned int count) 2531 { 2532 sfxge_rx_scale_t *srsp = &(sp->s_rx_scale); 2533 sfxge_intr_t *sip = &(sp->s_intr); 2534 int dispatch = 1; 2535 int rc; 2536 2537 if (count < 1 || count > sip->si_nalloc) { 2538 rc = EINVAL; 2539 goto fail1; 2540 } 2541 2542 mutex_enter(&(srsp->srs_lock)); 2543 2544 if (srsp->srs_state != SFXGE_RX_SCALE_INITIALIZED && 2545 srsp->srs_state != SFXGE_RX_SCALE_STARTED) { 2546 rc = ENOTSUP; 2547 goto fail2; 2548 } 2549 2550 srsp->srs_count = count; 2551 2552 if (srsp->srs_state != SFXGE_RX_SCALE_STARTED) 2553 dispatch = 0; 2554 2555 mutex_exit(&(srsp->srs_lock)); 2556 2557 if (dispatch) 2558 /* no locks held */ 2559 (void) ddi_taskq_dispatch(sp->s_tqp, sfxge_rx_scale_update, sp, 2560 DDI_SLEEP); 2561 2562 return (0); 2563 2564 fail2: 2565 DTRACE_PROBE(fail2); 2566 2567 mutex_exit(&(srsp->srs_lock)); 2568 2569 fail1: 2570 DTRACE_PROBE1(fail1, int, rc); 2571 2572 return (rc); 2573 } 2574 2575 static void 2576 sfxge_rx_scale_stop(sfxge_t *sp) 2577 { 2578 sfxge_rx_scale_t *srsp = &(sp->s_rx_scale); 2579 processorid_t id; 2580 2581 mutex_enter(&(srsp->srs_lock)); 2582 2583 ASSERT3U(srsp->srs_state, ==, SFXGE_RX_SCALE_STARTED); 2584 2585 srsp->srs_state = SFXGE_RX_SCALE_INITIALIZED; 2586 2587 mutex_enter(&cpu_lock); 2588 2589 /* 2590 * Substract any current CPU, core, cache and chip usage from the 2591 * global contention tables. 2592 */ 2593 for (id = 0; id < NCPU; id++) { 2594 ASSERT3U(sfxge_cpu[id], >=, srsp->srs_cpu[id]); 2595 sfxge_cpu[id] -= srsp->srs_cpu[id]; 2596 srsp->srs_cpu[id] = 0; 2597 2598 #ifdef _USE_CPU_PHYSID 2599 ASSERT3U(sfxge_core[id], >=, srsp->srs_core[id]); 2600 sfxge_core[id] -= srsp->srs_core[id]; 2601 srsp->srs_core[id] = 0; 2602 2603 ASSERT3U(sfxge_cache[id], >=, srsp->srs_cache[id]); 2604 sfxge_cache[id] -= srsp->srs_cache[id]; 2605 srsp->srs_cache[id] = 0; 2606 2607 ASSERT3U(sfxge_chip[id], >=, srsp->srs_chip[id]); 2608 sfxge_chip[id] -= srsp->srs_chip[id]; 2609 srsp->srs_chip[id] = 0; 2610 #endif 2611 } 2612 2613 mutex_exit(&cpu_lock); 2614 2615 /* Clear down the RSS table */ 2616 bzero(srsp->srs_tbl, sizeof (unsigned int) * SFXGE_RX_SCALE_MAX); 2617 2618 (void) efx_rx_scale_tbl_set(sp->s_enp, srsp->srs_tbl, 2619 SFXGE_RX_SCALE_MAX); 2620 2621 mutex_exit(&(srsp->srs_lock)); 2622 } 2623 2624 static void 2625 sfxge_rx_scale_fini(sfxge_t *sp) 2626 { 2627 sfxge_rx_scale_t *srsp = &(sp->s_rx_scale); 2628 2629 ASSERT3U(srsp->srs_state, ==, SFXGE_RX_SCALE_INITIALIZED); 2630 2631 srsp->srs_state = SFXGE_RX_SCALE_UNINITIALIZED; 2632 2633 /* Tear down the kstats */ 2634 sfxge_rx_scale_kstat_fini(sp); 2635 2636 srsp->srs_count = 0; 2637 2638 mutex_destroy(&(srsp->srs_lock)); 2639 2640 /* Destroy tables */ 2641 #ifdef _USE_CPU_PHYSID 2642 kmem_free(srsp->srs_chip, sizeof (unsigned int) * NCPU); 2643 srsp->srs_chip = NULL; 2644 2645 kmem_free(srsp->srs_cache, sizeof (unsigned int) * NCPU); 2646 srsp->srs_cache = NULL; 2647 2648 kmem_free(srsp->srs_core, sizeof (unsigned int) * NCPU); 2649 srsp->srs_core = NULL; 2650 #endif 2651 kmem_free(srsp->srs_cpu, sizeof (unsigned int) * NCPU); 2652 srsp->srs_cpu = NULL; 2653 } 2654 2655 int 2656 sfxge_rx_init(sfxge_t *sp) 2657 { 2658 sfxge_intr_t *sip = &(sp->s_intr); 2659 const efx_nic_cfg_t *encp; 2660 char name[MAXNAMELEN]; 2661 int index; 2662 int rc; 2663 2664 if (sip->si_state == SFXGE_INTR_UNINITIALIZED) { 2665 rc = EINVAL; 2666 goto fail1; 2667 } 2668 2669 encp = efx_nic_cfg_get(sp->s_enp); 2670 if ((rc = sfxge_rx_scale_init(sp)) != 0) 2671 goto fail2; 2672 2673 (void) snprintf(name, MAXNAMELEN - 1, "%s%d_rx_packet_cache", 2674 ddi_driver_name(sp->s_dip), ddi_get_instance(sp->s_dip)); 2675 2676 sp->s_rpc = kmem_cache_create(name, sizeof (sfxge_rx_packet_t), 2677 SFXGE_CPU_CACHE_SIZE, sfxge_rx_packet_ctor, sfxge_rx_packet_dtor, 2678 NULL, sp, NULL, 0); 2679 ASSERT(sp->s_rpc != NULL); 2680 2681 (void) snprintf(name, MAXNAMELEN - 1, "%s%d_rxq_cache", 2682 ddi_driver_name(sp->s_dip), ddi_get_instance(sp->s_dip)); 2683 2684 sp->s_rqc = kmem_cache_create(name, sizeof (sfxge_rxq_t), 2685 SFXGE_CPU_CACHE_SIZE, sfxge_rx_qctor, sfxge_rx_qdtor, NULL, sp, 2686 NULL, 0); 2687 ASSERT(sp->s_rqc != NULL); 2688 2689 sp->s_rx_pkt_mem_max = ddi_prop_get_int64(DDI_DEV_T_ANY, sp->s_dip, 2690 DDI_PROP_DONTPASS, "rx_pkt_mem_max", 0); /* disabled */ 2691 2692 /* Initialize the receive queue(s) */ 2693 for (index = 0; index < sip->si_nalloc; index++) { 2694 if ((rc = sfxge_rx_qinit(sp, index)) != 0) 2695 goto fail3; 2696 } 2697 2698 sp->s_rx_coalesce_mode = ddi_prop_get_int(DDI_DEV_T_ANY, sp->s_dip, 2699 DDI_PROP_DONTPASS, "rx_coalesce_mode", SFXGE_RX_COALESCE_OFF); 2700 2701 return (0); 2702 2703 fail3: 2704 DTRACE_PROBE(fail3); 2705 2706 /* Tear down the receive queue(s) */ 2707 while (--index >= 0) 2708 sfxge_rx_qfini(sp, index); 2709 2710 kmem_cache_destroy(sp->s_rqc); 2711 sp->s_rqc = NULL; 2712 2713 kmem_cache_destroy(sp->s_rpc); 2714 sp->s_rpc = NULL; 2715 2716 sfxge_rx_scale_fini(sp); 2717 2718 fail2: 2719 DTRACE_PROBE(fail2); 2720 fail1: 2721 DTRACE_PROBE1(fail1, int, rc); 2722 2723 return (rc); 2724 } 2725 2726 int 2727 sfxge_rx_start(sfxge_t *sp) 2728 { 2729 sfxge_mac_t *smp = &(sp->s_mac); 2730 sfxge_intr_t *sip; 2731 const efx_nic_cfg_t *encp; 2732 int index; 2733 int rc; 2734 2735 mutex_enter(&(smp->sm_lock)); 2736 2737 /* Calculate the receive packet buffer size and alignment */ 2738 sp->s_rx_buffer_size = EFX_MAC_PDU(sp->s_mtu); 2739 2740 encp = efx_nic_cfg_get(sp->s_enp); 2741 if (encp->enc_features & EFX_FEATURE_LFSR_HASH_INSERT) { 2742 size_t align; 2743 2744 sp->s_rx_prefix_size = EFX_RX_PREFIX_SIZE; 2745 2746 /* 2747 * Place the start of the buffer a prefix length minus 2 2748 * before the start of a cache line. This ensures that the 2749 * last two bytes of the prefix (which is where the LFSR hash 2750 * is located) are in the same cache line as the headers, and 2751 * the IP header is 32-bit aligned. 2752 */ 2753 align = SFXGE_CPU_CACHE_SIZE + SFXGE_IP_ALIGN - 2754 EFX_RX_PREFIX_SIZE; 2755 2756 sp->s_rx_buffer_align = align; 2757 sp->s_rx_buffer_size += align; 2758 } else { 2759 sp->s_rx_prefix_size = 0; 2760 2761 /* 2762 * Place the start of the buffer 2 bytes after a cache line 2763 * boundary so that the headers fit into the cache line and 2764 * the IP header is 32-bit aligned. 2765 */ 2766 2767 sp->s_rx_buffer_align = SFXGE_IP_ALIGN; 2768 sp->s_rx_buffer_size += SFXGE_IP_ALIGN; 2769 } 2770 2771 /* Initialize the receive module */ 2772 if ((rc = efx_rx_init(sp->s_enp)) != 0) 2773 goto fail1; 2774 2775 mutex_exit(&(smp->sm_lock)); 2776 2777 if ((rc = sfxge_rx_scale_start(sp)) != 0) 2778 goto fail2; 2779 2780 /* Start the receive queue(s) */ 2781 sip = &(sp->s_intr); 2782 for (index = 0; index < sip->si_nalloc; index++) { 2783 if ((rc = sfxge_rx_qstart(sp, index)) != 0) 2784 goto fail3; 2785 } 2786 2787 return (0); 2788 2789 fail3: 2790 DTRACE_PROBE(fail3); 2791 2792 /* Stop the receive queue(s) */ 2793 while (--index >= 0) 2794 sfxge_rx_qstop(sp, index); 2795 2796 sfxge_rx_scale_stop(sp); 2797 2798 fail2: 2799 DTRACE_PROBE(fail2); 2800 2801 mutex_enter(&(smp->sm_lock)); 2802 2803 /* Tear down the receive module */ 2804 efx_rx_fini(sp->s_enp); 2805 2806 fail1: 2807 DTRACE_PROBE1(fail1, int, rc); 2808 2809 mutex_exit(&(smp->sm_lock)); 2810 2811 return (rc); 2812 } 2813 2814 void 2815 sfxge_rx_coalesce_mode_get(sfxge_t *sp, sfxge_rx_coalesce_mode_t *modep) 2816 { 2817 *modep = sp->s_rx_coalesce_mode; 2818 } 2819 2820 int 2821 sfxge_rx_coalesce_mode_set(sfxge_t *sp, sfxge_rx_coalesce_mode_t mode) 2822 { 2823 int rc; 2824 2825 switch (mode) { 2826 case SFXGE_RX_COALESCE_OFF: 2827 case SFXGE_RX_COALESCE_DISALLOW_PUSH: 2828 case SFXGE_RX_COALESCE_ALLOW_PUSH: 2829 break; 2830 2831 default: 2832 rc = EINVAL; 2833 goto fail1; 2834 } 2835 2836 sp->s_rx_coalesce_mode = mode; 2837 2838 return (0); 2839 2840 fail1: 2841 DTRACE_PROBE1(fail1, int, rc); 2842 2843 return (rc); 2844 } 2845 2846 void 2847 sfxge_rx_loopback(sfxge_t *sp, unsigned int *countp) 2848 { 2849 sfxge_intr_t *sip = &(sp->s_intr); 2850 int index; 2851 2852 *countp = 0; 2853 for (index = 0; index < sip->si_nalloc; index++) 2854 *countp += sfxge_rx_qloopback(sp, index); 2855 } 2856 2857 int 2858 sfxge_rx_ioctl(sfxge_t *sp, sfxge_rx_ioc_t *srip) 2859 { 2860 int rc; 2861 2862 switch (srip->sri_op) { 2863 case SFXGE_RX_OP_LOOPBACK: { 2864 unsigned int count; 2865 2866 sfxge_rx_loopback(sp, &count); 2867 2868 srip->sri_data = count; 2869 2870 break; 2871 } 2872 default: 2873 rc = ENOTSUP; 2874 goto fail1; 2875 } 2876 2877 return (0); 2878 2879 fail1: 2880 DTRACE_PROBE1(fail1, int, rc); 2881 2882 return (rc); 2883 } 2884 2885 void 2886 sfxge_rx_stop(sfxge_t *sp) 2887 { 2888 sfxge_mac_t *smp = &(sp->s_mac); 2889 sfxge_intr_t *sip = &(sp->s_intr); 2890 efx_nic_t *enp = sp->s_enp; 2891 const efx_nic_cfg_t *encp; 2892 int index; 2893 2894 /* Stop the receive queue(s) */ 2895 index = sip->si_nalloc; 2896 while (--index >= 0) 2897 /* TBD: Flush RXQs in parallel; HW has limit + may need retry */ 2898 sfxge_rx_qstop(sp, index); 2899 2900 encp = efx_nic_cfg_get(sp->s_enp); 2901 sfxge_rx_scale_stop(sp); 2902 2903 mutex_enter(&(smp->sm_lock)); 2904 2905 /* Tear down the receive module */ 2906 efx_rx_fini(enp); 2907 2908 sp->s_rx_buffer_align = 0; 2909 sp->s_rx_prefix_size = 0; 2910 sp->s_rx_buffer_size = 0; 2911 2912 mutex_exit(&(smp->sm_lock)); 2913 } 2914 2915 unsigned int 2916 sfxge_rx_loaned(sfxge_t *sp) 2917 { 2918 sfxge_intr_t *sip = &(sp->s_intr); 2919 int index; 2920 unsigned int loaned; 2921 2922 ASSERT3U(sip->si_state, ==, SFXGE_INTR_INITIALIZED); 2923 2924 loaned = 0; 2925 for (index = 0; index < sip->si_nalloc; index++) { 2926 sfxge_rxq_t *srp = sp->s_srp[index]; 2927 sfxge_evq_t *sep = sp->s_sep[srp->sr_index]; 2928 2929 mutex_enter(&(sep->se_lock)); 2930 2931 loaned += sfxge_rx_qfpp_swizzle(srp); 2932 2933 mutex_exit(&(sep->se_lock)); 2934 } 2935 2936 return (loaned); 2937 } 2938 2939 void 2940 sfxge_rx_fini(sfxge_t *sp) 2941 { 2942 sfxge_intr_t *sip = &(sp->s_intr); 2943 const efx_nic_cfg_t *encp; 2944 int index; 2945 2946 ASSERT3U(sip->si_state, ==, SFXGE_INTR_INITIALIZED); 2947 2948 sp->s_rx_coalesce_mode = SFXGE_RX_COALESCE_OFF; 2949 2950 /* Tear down the receive queue(s) */ 2951 index = sip->si_nalloc; 2952 while (--index >= 0) 2953 sfxge_rx_qfini(sp, index); 2954 2955 ASSERT3U(sp->s_rx_pkt_mem_alloc, ==, 0); 2956 2957 kmem_cache_destroy(sp->s_rqc); 2958 sp->s_rqc = NULL; 2959 2960 kmem_cache_destroy(sp->s_rpc); 2961 sp->s_rpc = NULL; 2962 2963 encp = efx_nic_cfg_get(sp->s_enp); 2964 sfxge_rx_scale_fini(sp); 2965 }