1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright 2008-2013 Solarflare Communications Inc. All rights reserved.
24 * Use is subject to license terms.
25 */
26
27 #include <sys/types.h>
28 #include <sys/sysmacros.h>
29 #include <sys/ddi.h>
30 #include <sys/sunddi.h>
31 #include <sys/atomic.h>
32 #include <sys/stream.h>
33 #include <sys/strsun.h>
34 #include <sys/strsubr.h>
35 #include <sys/strft.h>
36 #include <sys/ksynch.h>
37 #include <sys/ethernet.h>
38 #include <sys/crc32.h>
39 #include <sys/pattr.h>
40 #include <sys/cpu.h>
41
42 #include <sys/ethernet.h>
43 #include <inet/ip.h>
44
45 #include <netinet/in.h>
46 #include <netinet/ip.h>
47 #include <netinet/tcp.h>
48
49 #include "sfxge.h"
50
51 #include "efx.h"
52
53 /* RXQ flush response timeout (in microseconds) */
54 #define SFXGE_RX_QFLUSH_USEC (2000000)
55
56 /* RXQ default packet buffer preallocation (number of packet buffers) */
57 #define SFXGE_RX_QPREALLOC (0)
58
59 /* Receive packet DMA attributes */
60 static ddi_device_acc_attr_t sfxge_rx_packet_devacc = {
61
62 DDI_DEVICE_ATTR_V0, /* devacc_attr_version */
63 DDI_NEVERSWAP_ACC, /* devacc_attr_endian_flags */
64 DDI_STRICTORDER_ACC /* devacc_attr_dataorder */
65 };
66
67 static ddi_dma_attr_t sfxge_rx_packet_dma_attr = {
68 DMA_ATTR_V0, /* dma_attr_version */
69 0, /* dma_attr_addr_lo */
70 0xffffffffffffffffull, /* dma_attr_addr_hi */
71 0xffffffffffffffffull, /* dma_attr_count_max */
72 SFXGE_CPU_CACHE_SIZE, /* dma_attr_align */
73 0xffffffff, /* dma_attr_burstsizes */
74 1, /* dma_attr_minxfer */
75 0xffffffffffffffffull, /* dma_attr_maxxfer */
76 0xffffffffffffffffull, /* dma_attr_seg */
77 1, /* dma_attr_sgllen */
78 1, /* dma_attr_granular */
79 0 /* dma_attr_flags */
80 };
81
82 /* Receive queue DMA attributes */
83 static ddi_device_acc_attr_t sfxge_rxq_devacc = {
84
85 DDI_DEVICE_ATTR_V0, /* devacc_attr_version */
86 DDI_NEVERSWAP_ACC, /* devacc_attr_endian_flags */
87 DDI_STRICTORDER_ACC /* devacc_attr_dataorder */
88 };
89
90 static ddi_dma_attr_t sfxge_rxq_dma_attr = {
91 DMA_ATTR_V0, /* dma_attr_version */
92 0, /* dma_attr_addr_lo */
93 0xffffffffffffffffull, /* dma_attr_addr_hi */
94 0xffffffffffffffffull, /* dma_attr_count_max */
95 EFX_BUF_SIZE, /* dma_attr_align */
96 0xffffffff, /* dma_attr_burstsizes */
97 1, /* dma_attr_minxfer */
98 0xffffffffffffffffull, /* dma_attr_maxxfer */
99 0xffffffffffffffffull, /* dma_attr_seg */
100 1, /* dma_attr_sgllen */
101 1, /* dma_attr_granular */
102 0 /* dma_attr_flags */
103 };
104
105 /* Forward declaration */
106 static int
107 sfxge_rx_qpreallocate(sfxge_rxq_t *srp, int nprealloc);
108
109 static int
110 sfxge_rx_packet_ctor(void *buf, void *arg, int kmflags)
111 {
112 sfxge_rx_packet_t *srpp = buf;
113 sfxge_t *sp = arg;
114 dev_info_t *dip = sp->s_dip;
115 int err;
116
117 ASSERT3U(sizeof (srpp->__srp_u1.__srp_s1), <=,
118 sizeof (srpp->__srp_u1.__srp_pad));
119 ASSERT3U(sizeof (srpp->__srp_u2.__srp_s2), <=,
120 sizeof (srpp->__srp_u2.__srp_pad));
121
122 bzero(buf, sizeof (sfxge_rx_packet_t));
123
124 /* Allocate a DMA handle */
125 err = ddi_dma_alloc_handle(dip, &sfxge_rx_packet_dma_attr,
126 (kmflags == KM_SLEEP) ? DDI_DMA_SLEEP : DDI_DMA_DONTWAIT,
127 NULL, &(srpp->srp_dma_handle));
128 if (err != DDI_SUCCESS)
129 goto fail1;
130
131 return (0);
132
133 fail1:
134 DTRACE_PROBE1(fail1, int, err);
135
136 SFXGE_OBJ_CHECK(srpp, sfxge_rx_packet_t);
137
138 return (-1);
139 }
140
141 static void
142 sfxge_rx_packet_dtor(void *buf, void *arg)
143 {
144 sfxge_rx_packet_t *srpp = buf;
145
146 _NOTE(ARGUNUSED(arg))
147
148 /* Free the DMA handle */
149 ddi_dma_free_handle(&(srpp->srp_dma_handle));
150 srpp->srp_dma_handle = NULL;
151
152 SFXGE_OBJ_CHECK(srpp, sfxge_rx_packet_t);
153 }
154
155 static int
156 sfxge_rx_qctor(void *buf, void *arg, int kmflags)
157 {
158 sfxge_rxq_t *srp = buf;
159 efsys_mem_t *esmp = &(srp->sr_mem);
160 sfxge_t *sp = arg;
161 sfxge_dma_buffer_attr_t dma_attr;
162 sfxge_rx_fpp_t *srfppp;
163 int nprealloc;
164 unsigned int id;
165 int rc;
166
167 /* Compile-time structure layout checks */
168 EFX_STATIC_ASSERT(sizeof (srp->__sr_u1.__sr_s1) <=
169 sizeof (srp->__sr_u1.__sr_pad));
170 EFX_STATIC_ASSERT(sizeof (srp->__sr_u2.__sr_s2) <=
171 sizeof (srp->__sr_u2.__sr_pad));
172 EFX_STATIC_ASSERT(sizeof (srp->__sr_u3.__sr_s3) <=
173 sizeof (srp->__sr_u3.__sr_pad));
174
175 bzero(buf, sizeof (sfxge_rxq_t));
176
177 srp->sr_sp = sp;
178
179 dma_attr.sdba_dip = sp->s_dip;
180 dma_attr.sdba_dattrp = &sfxge_rxq_dma_attr;
181 dma_attr.sdba_callback = DDI_DMA_SLEEP;
182 dma_attr.sdba_length = EFX_RXQ_SIZE(sp->s_rxq_size);
183 dma_attr.sdba_memflags = DDI_DMA_CONSISTENT;
184 dma_attr.sdba_devaccp = &sfxge_rxq_devacc;
185 dma_attr.sdba_bindflags = DDI_DMA_READ | DDI_DMA_CONSISTENT;
186 dma_attr.sdba_maxcookies = 1;
187 dma_attr.sdba_zeroinit = B_FALSE;
188
189 if ((rc = sfxge_dma_buffer_create(esmp, &dma_attr)) != 0)
190 goto fail1;
191
192 /* Allocate some buffer table entries */
193 if ((rc = sfxge_sram_buf_tbl_alloc(sp, EFX_RXQ_NBUFS(sp->s_rxq_size),
194 &(srp->sr_id))) != 0)
195 goto fail2;
196
197 /* Allocate the context array */
198 if ((srp->sr_srpp = kmem_zalloc(sizeof (sfxge_rx_packet_t *) *
199 sp->s_rxq_size, kmflags)) == NULL) {
200 rc = ENOMEM;
201 goto fail3;
202 }
203
204 /* Allocate the flow table */
205 if ((srp->sr_flow = kmem_zalloc(sizeof (sfxge_rx_flow_t) *
206 SFXGE_MAX_FLOW, kmflags)) == NULL) {
207 rc = ENOMEM;
208 goto fail4;
209 }
210
211 srp->sr_srfpp = &(srp->sr_srfp);
212 srp->sr_rto = drv_usectohz(200000);
213
214 srp->sr_mpp = &(srp->sr_mp);
215
216 /* Initialize the free packet pool */
217 srfppp = &(srp->sr_fpp);
218 if ((srfppp->srfpp_putp = kmem_zalloc(SFXGE_CPU_CACHE_SIZE *
219 SFXGE_RX_FPP_NSLOTS, kmflags)) == NULL) {
220 rc = ENOMEM;
221 goto fail5;
222 }
223 for (id = 0; id < SFXGE_RX_FPP_NSLOTS; id++) {
224 sfxge_rx_fpp_putlist_t *putp;
225 size_t off;
226
227 off = id * SFXGE_CPU_CACHE_SIZE;
228 putp = (void *)(srfppp->srfpp_putp + off);
229
230 putp->srfpl_putp = NULL;
231 putp->srfpl_putpp = &(putp->srfpl_putp);
232 mutex_init(&(putp->srfpl_lock), NULL, MUTEX_DRIVER,
233 DDI_INTR_PRI(sp->s_intr.si_intr_pri));
234 }
235
236 cv_init(&(srp->sr_flush_kv), NULL, CV_DRIVER, NULL);
237
238 /* Preallocate some packets on the free packet pool */
239 nprealloc = ddi_prop_get_int(DDI_DEV_T_ANY, sp->s_dip,
240 DDI_PROP_DONTPASS, "rx_prealloc_pkt_buffers", SFXGE_RX_QPREALLOC);
241 sfxge_rx_qpreallocate(srp, nprealloc);
242
243
244 return (0);
245
246 fail5:
247 DTRACE_PROBE(fail5);
248
249 srp->sr_mpp = NULL;
250
251 srp->sr_rto = 0;
252 srp->sr_srfpp = NULL;
253
254 /* Free the flow table */
255 kmem_free(srp->sr_flow, sizeof (sfxge_rx_flow_t) *
256 SFXGE_MAX_FLOW);
257 srp->sr_flow = NULL;
258
259 fail4:
260 DTRACE_PROBE(fail4);
261
262 /* Free the context array */
263 kmem_free(srp->sr_srpp, sizeof (sfxge_rx_packet_t *) *
264 sp->s_rxq_size);
265 srp->sr_srpp = NULL;
266
267 fail3:
268 DTRACE_PROBE(fail3);
269
270 /* Free the buffer table entries */
271 sfxge_sram_buf_tbl_free(sp, srp->sr_id,
272 EFX_RXQ_NBUFS(sp->s_rxq_size));
273 srp->sr_id = 0;
274
275 fail2:
276 DTRACE_PROBE(fail2);
277 /* Remove dma setup */
278 sfxge_dma_buffer_destroy(esmp);
279
280 fail1:
281 DTRACE_PROBE1(fail1, int, rc);
282
283 srp->sr_sp = NULL;
284
285 SFXGE_OBJ_CHECK(srp, sfxge_rxq_t);
286
287 return (-1);
288 }
289
290 static void
291 sfxge_rx_qdtor(void *buf, void *arg)
292 {
293 sfxge_rxq_t *srp = buf;
294 efsys_mem_t *esmp = &(srp->sr_mem);
295 sfxge_t *sp = srp->sr_sp;
296 sfxge_rx_fpp_t *srfppp = &(srp->sr_fpp);
297 unsigned int id;
298
299 _NOTE(ARGUNUSED(arg))
300
301 cv_destroy(&(srp->sr_flush_kv));
302
303 /* Tear down the free packet pool */
304 for (id = 0; id < SFXGE_RX_FPP_NSLOTS; id++) {
305 sfxge_rx_fpp_putlist_t *putp;
306 size_t off;
307
308 off = id * SFXGE_CPU_CACHE_SIZE;
309 putp = (void *)(srfppp->srfpp_putp + off);
310
311 putp->srfpl_putpp = NULL;
312 mutex_destroy(&(putp->srfpl_lock));
313
314 SFXGE_OBJ_CHECK(putp, sfxge_rx_fpp_putlist_t);
315 }
316 kmem_free(srfppp->srfpp_putp, SFXGE_CPU_CACHE_SIZE *
317 SFXGE_RX_FPP_NSLOTS);
318 srfppp->srfpp_putp = NULL;
319
320 srp->sr_mpp = NULL;
321
322 srp->sr_rto = 0;
323 srp->sr_srfpp = NULL;
324
325 /* Free the flow table */
326 kmem_free(srp->sr_flow, sizeof (sfxge_rx_flow_t) *
327 SFXGE_MAX_FLOW);
328 srp->sr_flow = NULL;
329
330 /* Free the context array */
331 kmem_free(srp->sr_srpp, sizeof (sfxge_rx_packet_t *) *
332 sp->s_rxq_size);
333 srp->sr_srpp = NULL;
334
335 /* Free the buffer table entries */
336 sfxge_sram_buf_tbl_free(sp, srp->sr_id,
337 EFX_RXQ_NBUFS(sp->s_rxq_size));
338 srp->sr_id = 0;
339
340 /* Tear down dma setup */
341 sfxge_dma_buffer_destroy(esmp);
342
343 SFXGE_OBJ_CHECK(srp, sfxge_rxq_t);
344 }
345
346 /* Note: This function takes ownership of *srpp. */
347 static inline void
348 sfxge_rx_qfpp_put(sfxge_rxq_t *srp, sfxge_rx_packet_t *srpp)
349 {
350 sfxge_rx_fpp_t *srfppp = &(srp->sr_fpp);
351 mblk_t *mp = srpp->srp_mp;
352 unsigned int id;
353 size_t off;
354 sfxge_rx_fpp_putlist_t *putp;
355
356 ASSERT3P(mp->b_next, ==, NULL);
357 ASSERT3P(mp->b_prev, ==, NULL);
358
359 id = CPU->cpu_seqid & SFXGE_RX_FPP_MASK;
360 off = id * SFXGE_CPU_CACHE_SIZE;
361
362 ASSERT3P(srpp->srp_putp, ==, srfppp->srfpp_putp);
363 putp = (void *)(srpp->srp_putp + off);
364
365 mutex_enter(&(putp->srfpl_lock));
366 putp->srfpl_count++;
367 *putp->srfpl_putpp = mp;
368 putp->srfpl_putpp = &(mp->b_next);
369 mutex_exit(&(putp->srfpl_lock));
370 }
371
372 static unsigned int
373 sfxge_rx_qfpp_swizzle(sfxge_rxq_t *srp)
374 {
375 sfxge_t *sp = srp->sr_sp;
376 unsigned int index = srp->sr_index;
377 sfxge_evq_t *sep = sp->s_sep[index];
378 sfxge_rx_fpp_t *srfppp = &(srp->sr_fpp);
379 unsigned int start;
380 unsigned int id;
381 mblk_t *p;
382 mblk_t **pp;
383 unsigned int count;
384 unsigned int loaned;
385
386 ASSERT(mutex_owned(&(sep->se_lock)));
387
388 /* We want to access the put list for the current CPU last */
389 id = start = (CPU->cpu_seqid + 1) & SFXGE_RX_FPP_MASK;
390
391 do {
392 sfxge_rx_fpp_putlist_t *putp;
393 size_t off;
394
395 off = id * SFXGE_CPU_CACHE_SIZE;
396 id = (id + 1) & SFXGE_RX_FPP_MASK;
397
398 putp = (void *)(srfppp->srfpp_putp + off);
399
400 /* Acquire the put list */
401 mutex_enter(&(putp->srfpl_lock));
402
403 p = putp->srfpl_putp;
404 pp = putp->srfpl_putpp;
405 count = putp->srfpl_count;
406
407 putp->srfpl_putp = NULL;
408 putp->srfpl_putpp = &(putp->srfpl_putp);
409 putp->srfpl_count = 0;
410
411 mutex_exit(&(putp->srfpl_lock));
412
413 if (p == NULL)
414 continue;
415
416 /* Add the list to the head of the get list */
417 *pp = srfppp->srfpp_get;
418 srfppp->srfpp_get = p;
419
420 /* Adjust the counters */
421 ASSERT3U(srfppp->srfpp_loaned, >=, count);
422 srfppp->srfpp_loaned -= count;
423 srfppp->srfpp_count += count;
424
425 #if 0
426 /* NOTE: this probe is disabled because it is expensive!! */
427 DTRACE_PROBE2(count,
428 unsigned int, (id - 1) & SFXGE_RX_FPP_MASK,
429 unsigned int, count);
430 #endif
431
432 } while (id != start);
433
434 /* Return the number of packets yet to appear in the put list */
435 loaned = srfppp->srfpp_loaned;
436
437
438 return (loaned);
439 }
440
441
442 #define DB_FRTNP(mp) ((mp)->b_datap->db_frtnp)
443
444 static void
445 sfxge_rx_qfpp_empty(sfxge_rxq_t *srp)
446 {
447 sfxge_t *sp = srp->sr_sp;
448 unsigned int index = srp->sr_index;
449 sfxge_evq_t *sep = sp->s_sep[index];
450 sfxge_rx_fpp_t *srfppp;
451 mblk_t *mp;
452
453 mutex_enter(&(sep->se_lock));
454 srfppp = &(srp->sr_fpp);
455
456 /* Swizzle put list to get list */
457 (void) sfxge_rx_qfpp_swizzle(srp);
458 ASSERT3U(srfppp->srfpp_loaned, ==, 0);
459
460 mp = srfppp->srfpp_get;
461 srfppp->srfpp_get = NULL;
462
463 /* Free the remainder */
464 while (mp != NULL) {
465 mblk_t *next;
466 frtn_t *freep;
467 sfxge_rx_packet_t *srpp;
468
469 next = mp->b_next;
470 mp->b_next = NULL;
471
472 ASSERT3U(srfppp->srfpp_count, >, 0);
473 srfppp->srfpp_count--;
474
475 freep = DB_FRTNP(mp);
476 /*
477 * ASSERT3P(freep->free_func, ==, sfxge_rx_qpacket_free);
478 * is implied by srpp test below
479 */
480 /*LINTED*/
481 srpp = (sfxge_rx_packet_t *)(freep->free_arg);
482 ASSERT3P(srpp->srp_mp, ==, mp);
483 ASSERT3P(mp->b_cont, ==, NULL);
484 srpp->srp_recycle = B_FALSE;
485
486 freeb(mp);
487
488 mp = next;
489 }
490 ASSERT3U(srfppp->srfpp_count, ==, 0);
491
492 srfppp->srfpp_min = 0;
493
494 mutex_exit(&(sep->se_lock));
495 }
496
497 /*
498 * This is an estimate of all memory consumed per RX packet
499 * it can be inaccurate but but sp->s_rx_pkt_mem_alloc mustn't drift
500 */
501 static uint64_t
502 sfxge_rx_pkt_mem_approx(const sfxge_rx_packet_t *srpp)
503 {
504 return (srpp->srp_mblksize + sizeof (mblk_t) + sizeof (dblk_t) +
505 sizeof (sfxge_rx_packet_t));
506 }
507
508 static void
509 sfxge_rx_qpacket_destroy(sfxge_rxq_t *srp, sfxge_rx_packet_t *srpp)
510 {
511 sfxge_t *sp = srp->sr_sp;
512 int64_t delta = sfxge_rx_pkt_mem_approx(srpp);
513
514 ASSERT(!(srpp->srp_recycle));
515 ASSERT3P(srpp->srp_mp, ==, NULL);
516
517 srpp->srp_off = 0;
518 srpp->srp_thp = NULL;
519 srpp->srp_iphp = NULL;
520 srpp->srp_etherhp = NULL;
521 srpp->srp_size = 0;
522 srpp->srp_flags = 0;
523
524 bzero(&(srpp->srp_free), sizeof (frtn_t));
525
526 srpp->srp_mblksize = 0;
527 srpp->srp_base = NULL;
528
529 /* Unbind the DMA memory from the DMA handle */
530 srpp->srp_addr = 0;
531 (void) ddi_dma_unbind_handle(srpp->srp_dma_handle);
532
533 /* Free the DMA memory */
534 srpp->srp_base = NULL;
535 ddi_dma_mem_free(&(srpp->srp_acc_handle));
536 srpp->srp_acc_handle = NULL;
537
538 srpp->srp_putp = NULL;
539 srpp->srp_srp = NULL;
540
541 kmem_cache_free(sp->s_rpc, srpp);
542 if (sp->s_rx_pkt_mem_max)
543 atomic_add_64(&sp->s_rx_pkt_mem_alloc, -delta);
544 }
545
546 #ifdef _USE_XESBALLOC
547 static void
548 sfxge_rx_qpacket_free(void *arg, mblk_t *mp, boolean_t *recyclep)
549 {
550 sfxge_rx_packet_t *srpp = arg;
551 sfxge_rxq_t *srp = srpp->srp_srp;
552
553 /*
554 * WARNING "man -s 9f esballoc" states:
555 * => runs async in a background context
556 * => must not sleep, or access data structures that could be freed
557 */
558 ASSERT3P(DB_BASE(mp), ==, srpp->srp_base);
559 ASSERT3P(MBLKSIZE(mp), ==, srpp->srp_mblksize);
560
561 /* Check whether we want to recycle the receive packets */
562 if (srpp->srp_recycle) {
563 ASSERT3P(DB_FRTNP(mp), ==, &(srpp->srp_free));
564
565 srpp->srp_mp = mp;
566
567 /* NORMAL recycled case */
568 sfxge_rx_qfpp_put(srp, srpp);
569 *recyclep = B_TRUE;
570 return;
571 }
572
573 srpp->srp_mp = NULL;
574
575 sfxge_rx_qpacket_destroy(srp, srpp);
576 *recyclep = B_FALSE;
577 }
578 #endif /* _USE_XESBALLOC */
579
580 #ifdef _USE_DESBALLOC
581 static void
582 sfxge_rx_qpacket_free(void *arg)
583 {
584 sfxge_rx_packet_t *srpp = arg;
585 sfxge_rxq_t *srp = srpp->srp_srp;
586
587 /*
588 * WARNING "man -s 9f esballoc" states:
589 * => runs sync from the thread calling freeb()
590 * => must not sleep, or access data structures that could be freed
591 */
592
593 /* Check whether we want to recycle the receive packets */
594 if (srpp->srp_recycle) {
595 frtn_t *freep;
596 mblk_t *mp;
597 size_t size;
598
599 freep = &(srpp->srp_free);
600 ASSERT3P(freep->free_func, ==, sfxge_rx_qpacket_free);
601 ASSERT3P(freep->free_arg, ==, (caddr_t)srpp);
602
603 /*
604 * Allocate a matching mblk_t before the current one is
605 * freed.
606 */
607 size = srpp->srp_mblksize;
608
609 if ((mp = desballoc(srpp->srp_base, size, BPRI_HI,
610 freep)) != NULL) {
611 srpp->srp_mp = mp;
612
613 /* NORMAL recycled case */
614 sfxge_rx_qfpp_put(srp, srpp);
615 return;
616 }
617 }
618
619 srpp->srp_mp = NULL;
620
621 sfxge_rx_qpacket_destroy(srp, srpp);
622 }
623 #endif /* _USE_DESBALLOC */
624
625 static sfxge_rx_packet_t *
626 sfxge_rx_qpacket_create(sfxge_rxq_t *srp)
627 {
628 sfxge_t *sp = srp->sr_sp;
629 sfxge_rx_fpp_t *srfppp = &(srp->sr_fpp);
630 sfxge_rx_packet_t *srpp;
631 size_t size;
632 caddr_t base;
633 size_t unit;
634 ddi_dma_cookie_t dmac;
635 unsigned int ncookies;
636 frtn_t *freep;
637 mblk_t *mp;
638 int err;
639 int rc;
640
641 size = sp->s_rx_buffer_size;
642
643 if (sp->s_rx_pkt_mem_max &&
644 (sp->s_rx_pkt_mem_alloc + size >= sp->s_rx_pkt_mem_max)) {
645 DTRACE_PROBE(rx_pkt_mem_max);
646 srp->sr_kstat.srk_rx_pkt_mem_limit++;
647 return (NULL);
648 }
649
650 /* Allocate a new packet */
651 if ((srpp = kmem_cache_alloc(sp->s_rpc, KM_NOSLEEP)) == NULL) {
652 srp->sr_kstat.srk_kcache_alloc_nomem++;
653 rc = ENOMEM;
654 goto fail1;
655 }
656
657 srpp->srp_srp = srp;
658 srpp->srp_putp = srfppp->srfpp_putp;
659
660 /* Allocate some DMA memory */
661 err = ddi_dma_mem_alloc(srpp->srp_dma_handle, size,
662 &sfxge_rx_packet_devacc, DDI_DMA_STREAMING, DDI_DMA_DONTWAIT,
663 NULL, &base, &unit, &(srpp->srp_acc_handle));
664 switch (err) {
665 case DDI_SUCCESS:
666 break;
667
668 case DDI_FAILURE:
669 srp->sr_kstat.srk_dma_alloc_nomem++;
670 rc = ENOMEM;
671 goto fail2;
672
673 default:
674 srp->sr_kstat.srk_dma_alloc_fail++;
675 rc = EFAULT;
676 goto fail2;
677 }
678
679 /* Adjust the buffer to align the start of the DMA area correctly */
680 base += sp->s_rx_buffer_align;
681 size -= sp->s_rx_buffer_align;
682
683 /* Bind the DMA memory to the DMA handle */
684 err = ddi_dma_addr_bind_handle(srpp->srp_dma_handle, NULL,
685 base, size, DDI_DMA_READ | DDI_DMA_STREAMING,
686 DDI_DMA_DONTWAIT, NULL, &dmac, &ncookies);
687 switch (err) {
688 case DDI_DMA_MAPPED:
689 break;
690
691 case DDI_DMA_INUSE:
692 srp->sr_kstat.srk_dma_bind_fail++;
693 rc = EEXIST;
694 goto fail3;
695
696 case DDI_DMA_NORESOURCES:
697 srp->sr_kstat.srk_dma_bind_nomem++;
698 rc = ENOMEM;
699 goto fail3;
700
701 case DDI_DMA_NOMAPPING:
702 srp->sr_kstat.srk_dma_bind_fail++;
703 rc = ENOTSUP;
704 goto fail3;
705
706 case DDI_DMA_TOOBIG:
707 srp->sr_kstat.srk_dma_bind_fail++;
708 rc = EFBIG;
709 goto fail3;
710
711 default:
712 srp->sr_kstat.srk_dma_bind_fail++;
713 rc = EFAULT;
714 goto fail3;
715 }
716 ASSERT3U(ncookies, ==, 1);
717
718 srpp->srp_addr = dmac.dmac_laddress;
719
720 srpp->srp_base = (unsigned char *)base;
721 srpp->srp_mblksize = size;
722
723 /*
724 * Allocate a STREAMS block: We use size 1 so that the allocator will
725 * use the first (and smallest) dblk cache.
726 */
727 freep = &(srpp->srp_free);
728 freep->free_func = sfxge_rx_qpacket_free;
729 freep->free_arg = (caddr_t)srpp;
730
731 #ifdef _USE_XESBALLOC
732 if ((mp = xesballoc(srpp->srp_base, size, BPRI_HI, freep)) == NULL) {
733 srp->sr_kstat.srk_xesballoc_fail++;
734 rc = ENOMEM;
735 goto fail4;
736 }
737 #endif /* _USE_XESBALLOC */
738
739 #ifdef _USE_DESBALLOC
740 if ((mp = desballoc(srpp->srp_base, size, BPRI_HI, freep)) == NULL) {
741 srp->sr_kstat.srk_desballoc_fail++;
742 rc = ENOMEM;
743 goto fail4;
744 }
745 #endif /* _USE_DESBALLOC */
746
747 srpp->srp_mp = mp;
748 srpp->srp_recycle = B_TRUE;
749
750 if (sp->s_rx_pkt_mem_max) {
751 int64_t delta = sfxge_rx_pkt_mem_approx(srpp);
752 atomic_add_64(&sp->s_rx_pkt_mem_alloc, delta);
753 }
754
755 return (srpp);
756
757 fail4:
758 DTRACE_PROBE(fail4);
759
760 bzero(&(srpp->srp_free), sizeof (frtn_t));
761
762 srpp->srp_mblksize = 0;
763 srpp->srp_base = NULL;
764
765 /* Unbind the DMA memory from the DMA handle */
766 srpp->srp_addr = 0;
767 (void) ddi_dma_unbind_handle(srpp->srp_dma_handle);
768
769 fail3:
770 DTRACE_PROBE(fail3);
771
772 /* Free the DMA memory */
773 ddi_dma_mem_free(&(srpp->srp_acc_handle));
774 srpp->srp_acc_handle = NULL;
775
776 fail2:
777 DTRACE_PROBE(fail2);
778
779 srpp->srp_putp = NULL;
780 srpp->srp_srp = NULL;
781
782 kmem_cache_free(sp->s_rpc, srpp);
783
784 fail1:
785 DTRACE_PROBE1(fail1, int, rc);
786
787 return (NULL);
788 }
789
790 #define SFXGE_REFILL_BATCH 64
791
792 /* Try to refill the RX descriptor ring from the associated free pkt pool */
793 static void
794 sfxge_rx_qrefill(sfxge_rxq_t *srp, unsigned int target)
795 {
796 sfxge_t *sp = srp->sr_sp;
797 sfxge_rx_fpp_t *srfppp = &(srp->sr_fpp);
798 unsigned int index = srp->sr_index;
799 sfxge_evq_t *sep = sp->s_sep[index];
800 efsys_dma_addr_t addr[SFXGE_REFILL_BATCH];
801 mblk_t *mp;
802 int ntodo;
803 unsigned int count;
804 unsigned int batch;
805 unsigned int rxfill;
806 unsigned int mblksize;
807
808 prefetch_read_many(sp->s_enp);
809 prefetch_read_many(srp->sr_erp);
810
811 ASSERT(mutex_owned(&(sep->se_lock)));
812
813 if (srp->sr_state != SFXGE_RXQ_STARTED)
814 return;
815
816 rxfill = srp->sr_added - srp->sr_completed;
817 ASSERT3U(rxfill, <=, EFX_RXQ_LIMIT(sp->s_rxq_size));
818 ntodo = min(EFX_RXQ_LIMIT(sp->s_rxq_size) - rxfill, target);
819 ASSERT3U(ntodo, <=, EFX_RXQ_LIMIT(sp->s_rxq_size));
820
821 if (ntodo == 0)
822 goto out;
823
824 (void) sfxge_rx_qfpp_swizzle(srp);
825
826 mp = srfppp->srfpp_get;
827 count = srfppp->srfpp_count;
828 mblksize = sp->s_rx_buffer_size - sp->s_rx_buffer_align;
829
830 batch = 0;
831 while (ntodo-- > 0) {
832 mblk_t *next;
833 frtn_t *freep;
834 sfxge_rx_packet_t *srpp;
835 unsigned int id;
836
837 if (mp == NULL)
838 break;
839
840 next = mp->b_next;
841 mp->b_next = NULL;
842
843 if (next != NULL)
844 prefetch_read_many(next);
845
846 freep = DB_FRTNP(mp);
847 /*LINTED*/
848 srpp = (sfxge_rx_packet_t *)(freep->free_arg);
849 ASSERT3P(srpp->srp_mp, ==, mp);
850
851 /* The MTU may have changed since the packet was allocated */
852 if (MBLKSIZE(mp) != mblksize) {
853 srpp->srp_recycle = B_FALSE;
854
855 freeb(mp);
856
857 --count;
858 mp = next;
859 continue;
860 }
861
862 srpp->srp_off = 0;
863 srpp->srp_thp = NULL;
864 srpp->srp_iphp = NULL;
865 srpp->srp_etherhp = NULL;
866 srpp->srp_size = 0;
867 srpp->srp_flags = EFX_DISCARD;
868
869 id = (srp->sr_added + batch) & (sp->s_rxq_size - 1);
870 ASSERT(srp->sr_srpp[id] == NULL);
871 srp->sr_srpp[id] = srpp;
872
873 addr[batch++] = srpp->srp_addr;
874 if (batch == SFXGE_REFILL_BATCH) {
875 efx_rx_qpost(srp->sr_erp, addr, mblksize, batch,
876 srp->sr_completed, srp->sr_added);
877 srp->sr_added += batch;
878 batch = 0;
879 }
880
881 --count;
882 mp = next;
883 }
884
885 srfppp->srfpp_get = mp;
886 srfppp->srfpp_count = count;
887
888 if (batch != 0) {
889 efx_rx_qpost(srp->sr_erp, addr, mblksize, batch,
890 srp->sr_completed, srp->sr_added);
891 srp->sr_added += batch;
892 }
893
894 /* Make the descriptors visible to the hardware */
895 (void) ddi_dma_sync(srp->sr_mem.esm_dma_handle,
896 0,
897 EFX_RXQ_SIZE(sp->s_rxq_size),
898 DDI_DMA_SYNC_FORDEV);
899
900 efx_rx_qpush(srp->sr_erp, srp->sr_added);
901
902 out:
903 if (srfppp->srfpp_count < srfppp->srfpp_min)
904 srfppp->srfpp_min = srfppp->srfpp_count;
905 }
906
907 /* Preallocate packets and put them in the free packet pool */
908 static int
909 sfxge_rx_qpreallocate(sfxge_rxq_t *srp, int nprealloc)
910 {
911 sfxge_rx_fpp_t *srfppp = &((srp)->sr_fpp);
912 srfppp->srfpp_lowat = nprealloc;
913 while (nprealloc-- > 0) {
914 sfxge_rx_packet_t *srpp;
915
916 if ((srpp = sfxge_rx_qpacket_create(srp)) == NULL)
917 break;
918 sfxge_rx_qfpp_put(srp, srpp);
919 }
920 return (0);
921 }
922
923 /* Try to refill the RX descriptor ring by allocating new packets */
924 static void
925 sfxge_rx_qfill(sfxge_rxq_t *srp, unsigned int target)
926 {
927 sfxge_t *sp = srp->sr_sp;
928 unsigned int index = srp->sr_index;
929 sfxge_evq_t *sep = sp->s_sep[index];
930 unsigned int batch;
931 unsigned int rxfill;
932 unsigned int mblksize;
933 int ntodo;
934 efsys_dma_addr_t addr[SFXGE_REFILL_BATCH];
935 mblk_t *mp = NULL;
936
937 prefetch_read_many(sp->s_enp);
938 prefetch_read_many(srp->sr_erp);
939
940 ASSERT(mutex_owned(&(sep->se_lock)));
941
942 if (srp->sr_state != SFXGE_RXQ_STARTED)
943 return;
944
945 rxfill = srp->sr_added - srp->sr_completed;
946 ASSERT3U(rxfill, <=, EFX_RXQ_LIMIT(sp->s_rxq_size));
947 ntodo = min(EFX_RXQ_LIMIT(sp->s_rxq_size) - rxfill, target);
948 ASSERT3U(ntodo, <=, EFX_RXQ_LIMIT(sp->s_rxq_size));
949
950 if (ntodo == 0)
951 return;
952
953 mblksize = sp->s_rx_buffer_size - sp->s_rx_buffer_align;
954
955 batch = 0;
956 while (ntodo-- > 0) {
957 sfxge_rx_packet_t *srpp;
958 unsigned int id;
959
960 if ((srpp = sfxge_rx_qpacket_create(srp)) == NULL)
961 break;
962
963 mp = srpp->srp_mp;
964
965 ASSERT3U(MBLKSIZE(mp), ==, mblksize);
966
967 ASSERT3U(srpp->srp_off, ==, 0);
968 ASSERT3P(srpp->srp_thp, ==, NULL);
969 ASSERT3P(srpp->srp_iphp, ==, NULL);
970 ASSERT3P(srpp->srp_etherhp, ==, NULL);
971 ASSERT3U(srpp->srp_size, ==, 0);
972
973 srpp->srp_flags = EFX_DISCARD;
974
975 id = (srp->sr_added + batch) & (sp->s_rxq_size - 1);
976 ASSERT(srp->sr_srpp[id] == NULL);
977 srp->sr_srpp[id] = srpp;
978
979 addr[batch++] = srpp->srp_addr;
980 if (batch == SFXGE_REFILL_BATCH) {
981 efx_rx_qpost(srp->sr_erp, addr, mblksize, batch,
982 srp->sr_completed, srp->sr_added);
983 srp->sr_added += batch;
984 batch = 0;
985 }
986 }
987
988 if (batch != 0) {
989 efx_rx_qpost(srp->sr_erp, addr, mblksize, batch,
990 srp->sr_completed, srp->sr_added);
991 srp->sr_added += batch;
992 }
993
994 /* Make the descriptors visible to the hardware */
995 (void) ddi_dma_sync(srp->sr_mem.esm_dma_handle,
996 0,
997 EFX_RXQ_SIZE(sp->s_rxq_size),
998 DDI_DMA_SYNC_FORDEV);
999
1000 efx_rx_qpush(srp->sr_erp, srp->sr_added);
1001 }
1002
1003 void
1004 sfxge_rx_qfpp_trim(sfxge_rxq_t *srp)
1005 {
1006 sfxge_rx_fpp_t *srfppp = &(srp->sr_fpp);
1007 sfxge_t *sp = srp->sr_sp;
1008 unsigned int index = srp->sr_index;
1009 sfxge_evq_t *sep = sp->s_sep[index];
1010 mblk_t *p;
1011 mblk_t **pp;
1012 int count;
1013
1014 ASSERT(mutex_owned(&(sep->se_lock)));
1015
1016 if (srp->sr_state != SFXGE_RXQ_STARTED)
1017 goto done;
1018
1019 /* Make sure the queue is full */
1020 sfxge_rx_qrefill(srp, EFX_RXQ_LIMIT(sp->s_rxq_size));
1021
1022 /* The refill may have emptied the pool */
1023 if (srfppp->srfpp_min == 0)
1024 goto done;
1025
1026 /* Don't trim below the pool's low water mark */
1027 if (srfppp->srfpp_count <= srfppp->srfpp_lowat)
1028 goto done;
1029
1030 ASSERT(srfppp->srfpp_min <= srfppp->srfpp_count);
1031
1032 /* Trim to the largest of srfppp->srfpp_min and srfpp->srfpp_lowat */
1033 if (srfppp->srfpp_lowat > srfppp->srfpp_min)
1034 count = srfppp->srfpp_count - srfppp->srfpp_lowat;
1035 else
1036 count = srfppp->srfpp_count - srfppp->srfpp_min;
1037
1038 /* Walk the get list */
1039 pp = &(srfppp->srfpp_get);
1040 while (--count >= 0) {
1041 ASSERT(pp);
1042 p = *pp;
1043 ASSERT(p != NULL);
1044
1045 pp = &(p->b_next);
1046 }
1047 ASSERT(pp);
1048 p = *pp;
1049
1050 /* Truncate the get list */
1051 *pp = NULL;
1052
1053 /* Free the remainder */
1054 while (p != NULL) {
1055 mblk_t *next;
1056 frtn_t *freep;
1057 sfxge_rx_packet_t *srpp;
1058
1059 next = p->b_next;
1060 p->b_next = NULL;
1061
1062 ASSERT3U(srfppp->srfpp_min, >, 0);
1063 srfppp->srfpp_min--;
1064 srfppp->srfpp_count--;
1065
1066 freep = DB_FRTNP(p);
1067 /*LINTED*/
1068 srpp = (sfxge_rx_packet_t *)(freep->free_arg);
1069 ASSERT3P(srpp->srp_mp, ==, p);
1070
1071 srpp->srp_recycle = B_FALSE;
1072
1073 freeb(p);
1074
1075 p = next;
1076 }
1077
1078 done:
1079 srfppp->srfpp_min = srfppp->srfpp_count;
1080 }
1081
1082 static void
1083 sfxge_rx_qpoll(void *arg)
1084 {
1085 sfxge_rxq_t *srp = arg;
1086 sfxge_t *sp = srp->sr_sp;
1087 unsigned int index = srp->sr_index;
1088 sfxge_evq_t *sep = sp->s_sep[index];
1089 uint16_t magic;
1090
1091 /*
1092 * man timeout(9f) states that this code should adhere to the
1093 * same requirements as a softirq handler - DO NOT BLOCK
1094 */
1095
1096 /*
1097 * Post an event to the event queue to cause the free packet pool to be
1098 * trimmed if it is oversize.
1099 */
1100 magic = SFXGE_MAGIC_RX_QFPP_TRIM | index;
1101
1102 #if defined(DEBUG)
1103 /* This is guaranteed due to the start/stop order of rx and ev */
1104 ASSERT3U(sep->se_state, ==, SFXGE_EVQ_STARTED);
1105 ASSERT3U(srp->sr_state, ==, SFXGE_RXQ_STARTED);
1106 #else
1107 /*
1108 * Bug22691 WORKAROUND:
1109 * This handler has been observed in the field to be invoked for a
1110 * queue in the INITIALIZED state, which should never happen.
1111 * Until the mechanism for this is properly understood, add defensive
1112 * checks.
1113 */
1114 if ((sep->se_state != SFXGE_EVQ_STARTED) ||
1115 (srp->sr_state != SFXGE_RXQ_STARTED) ||
1116 (!sep->se_eep)) {
1117 cmn_err(CE_WARN, SFXGE_CMN_ERR
1118 "[%s%d] RXQ[%d] bad state in sfxge_rx_qpoll %d %d %p",
1119 ddi_driver_name(sp->s_dip), ddi_get_instance(sp->s_dip),
1120 index, sep->se_state, srp->sr_state, sep->se_eep);
1121 return;
1122 }
1123 #endif
1124 efx_ev_qpost(sep->se_eep, magic);
1125
1126 srp->sr_tid = timeout(sfxge_rx_qpoll, srp,
1127 drv_usectohz(sp->s_rxq_poll_usec));
1128 }
1129
1130 static void
1131 sfxge_rx_qpoll_start(sfxge_rxq_t *srp)
1132 {
1133 sfxge_t *sp = srp->sr_sp;
1134 unsigned int index = srp->sr_index;
1135 sfxge_evq_t *sep = sp->s_sep[index];
1136
1137 ASSERT(mutex_owned(&(sep->se_lock)));
1138 ASSERT3U(srp->sr_state, ==, SFXGE_RXQ_STARTED);
1139
1140 /* Schedule a poll */
1141 ASSERT3P(srp->sr_tid, ==, 0);
1142 srp->sr_tid = timeout(sfxge_rx_qpoll, srp, 0);
1143 }
1144
1145 static void
1146 sfxge_rx_qpoll_stop(sfxge_rxq_t *srp)
1147 {
1148 sfxge_t *sp = srp->sr_sp;
1149 unsigned int index = srp->sr_index;
1150 sfxge_evq_t *sep = sp->s_sep[index];
1151 timeout_id_t tid;
1152
1153 ASSERT(mutex_owned(&(sep->se_lock)));
1154 ASSERT3U(srp->sr_state, ==, SFXGE_RXQ_STARTED);
1155
1156 /*
1157 * Cancel the qpoll timer. Care is needed as this function
1158 * can race with sfxge_rx_qpoll() for timeout id updates.
1159 *
1160 * Do not hold locks used by any timeout(9f) handlers across
1161 * calls to untimeout(9f) as this will deadlock.
1162 */
1163 tid = 0;
1164 while ((srp->sr_tid != 0) && (srp->sr_tid != tid)) {
1165 tid = srp->sr_tid;
1166 (void) untimeout(tid);
1167 }
1168 srp->sr_tid = 0;
1169 }
1170
1171 static int
1172 sfxge_rx_kstat_update(kstat_t *ksp, int rw)
1173 {
1174 sfxge_rxq_t *srp = ksp->ks_private;
1175 sfxge_t *sp = srp->sr_sp;
1176 unsigned int index = srp->sr_index;
1177 sfxge_evq_t *sep = sp->s_sep[index];
1178 kstat_named_t *knp;
1179 int rc;
1180
1181 if (rw != KSTAT_READ) {
1182 rc = EACCES;
1183 goto fail1;
1184 }
1185
1186 ASSERT(mutex_owned(&(sep->se_lock)));
1187 if (srp->sr_state != SFXGE_RXQ_STARTED)
1188 goto done;
1189
1190 knp = ksp->ks_data;
1191 /* NB pointer post-increment below */
1192 knp++->value.ui32 = srp->sr_kstat.srk_rx_pkt_mem_limit;
1193 knp++->value.ui32 = srp->sr_kstat.srk_kcache_alloc_nomem;
1194 knp++->value.ui32 = srp->sr_kstat.srk_dma_alloc_nomem;
1195 knp++->value.ui32 = srp->sr_kstat.srk_dma_alloc_fail;
1196 knp++->value.ui32 = srp->sr_kstat.srk_dma_bind_nomem;
1197 knp++->value.ui32 = srp->sr_kstat.srk_dma_bind_fail;
1198 #ifdef _USE_XESBALLOC
1199 knp++->value.ui32 = srp->sr_kstat.srk_xesballoc_fail;
1200 #endif
1201 #ifdef _USE_DESBALLOC
1202 knp++->value.ui32 = srp->sr_kstat.srk_desballoc_fail;
1203 #endif
1204 knp++->value.ui32 = srp->sr_kstat.srk_rxq_empty_discard;
1205
1206 done:
1207 return (0);
1208
1209 fail1:
1210 DTRACE_PROBE1(fail1, int, rc);
1211
1212 return (rc);
1213 }
1214
1215 static int
1216 sfxge_rx_kstat_init(sfxge_rxq_t *srp)
1217 {
1218 sfxge_t *sp = srp->sr_sp;
1219 unsigned int index = srp->sr_index;
1220 sfxge_evq_t *sep = sp->s_sep[index];
1221 dev_info_t *dip = sp->s_dip;
1222 char name[MAXNAMELEN];
1223 kstat_t *ksp;
1224 kstat_named_t *knp;
1225 int rc;
1226
1227 /* Create the set */
1228 (void) snprintf(name, MAXNAMELEN - 1, "%s_rxq%04d",
1229 ddi_driver_name(dip), index);
1230
1231 if ((ksp = kstat_create((char *)ddi_driver_name(dip),
1232 ddi_get_instance(dip), name, "rxq", KSTAT_TYPE_NAMED,
1233 SFXGE_RX_NSTATS, 0)) == NULL) {
1234 rc = ENOMEM;
1235 goto fail1;
1236 }
1237
1238 srp->sr_ksp = ksp;
1239
1240 ksp->ks_update = sfxge_rx_kstat_update;
1241 ksp->ks_private = srp;
1242 ksp->ks_lock = &(sep->se_lock);
1243
1244 /* Initialise the named stats */
1245 knp = ksp->ks_data;
1246 kstat_named_init(knp, "rx_pkt_mem_limit", KSTAT_DATA_UINT32);
1247 knp++;
1248 kstat_named_init(knp, "kcache_alloc_nomem", KSTAT_DATA_UINT32);
1249 knp++;
1250 kstat_named_init(knp, "dma_alloc_nomem", KSTAT_DATA_UINT32);
1251 knp++;
1252 kstat_named_init(knp, "dma_alloc_fail", KSTAT_DATA_UINT32);
1253 knp++;
1254 kstat_named_init(knp, "dma_bind_nomem", KSTAT_DATA_UINT32);
1255 knp++;
1256 kstat_named_init(knp, "dma_bind_fail", KSTAT_DATA_UINT32);
1257 knp++;
1258 #ifdef _USE_XESBALLOC
1259 kstat_named_init(knp, "xesballoc_fail", KSTAT_DATA_UINT32);
1260 #endif
1261 #ifdef _USE_DESBALLOC
1262 kstat_named_init(knp, "desballoc_fail", KSTAT_DATA_UINT32);
1263 #endif
1264 kstat_named_init(knp, "rxq_empty_discard", KSTAT_DATA_UINT32);
1265
1266 kstat_install(ksp);
1267 return (0);
1268
1269 fail1:
1270 DTRACE_PROBE1(fail1, int, rc);
1271
1272 return (rc);
1273 }
1274
1275 static int
1276 sfxge_rx_qinit(sfxge_t *sp, unsigned int index)
1277 {
1278 sfxge_rxq_t *srp;
1279 int rc;
1280
1281 ASSERT3U(index, <, SFXGE_RX_SCALE_MAX);
1282
1283 srp = kmem_cache_alloc(sp->s_rqc, KM_SLEEP);
1284
1285 ASSERT3U(srp->sr_state, ==, SFXGE_RXQ_UNINITIALIZED);
1286
1287 srp->sr_index = index;
1288 sp->s_srp[index] = srp;
1289
1290 if ((rc = sfxge_rx_kstat_init(srp)) != 0)
1291 goto fail1;
1292
1293 srp->sr_state = SFXGE_RXQ_INITIALIZED;
1294
1295 return (0);
1296 fail1:
1297 DTRACE_PROBE1(fail1, int, rc);
1298 kmem_cache_free(sp->s_rqc, srp);
1299
1300 return (rc);
1301 }
1302
1303 static int
1304 sfxge_rx_qstart(sfxge_t *sp, unsigned int index)
1305 {
1306 sfxge_evq_t *sep = sp->s_sep[index];
1307 sfxge_rxq_t *srp;
1308 efsys_mem_t *esmp;
1309 efx_nic_t *enp;
1310 unsigned int level;
1311 int rc;
1312
1313 mutex_enter(&(sep->se_lock));
1314 srp = sp->s_srp[index];
1315 enp = sp->s_enp;
1316 esmp = &(srp->sr_mem);
1317
1318 ASSERT3U(srp->sr_state, ==, SFXGE_RXQ_INITIALIZED);
1319 ASSERT3U(sep->se_state, ==, SFXGE_EVQ_STARTED);
1320
1321 /* Zero the memory */
1322 (void) memset(esmp->esm_base, 0, EFX_RXQ_SIZE(sp->s_rxq_size));
1323
1324 /* Program the buffer table */
1325 if ((rc = sfxge_sram_buf_tbl_set(sp, srp->sr_id, esmp,
1326 EFX_RXQ_NBUFS(sp->s_rxq_size))) != 0)
1327 goto fail1;
1328
1329 /* Create the receive queue */
1330 if ((rc = efx_rx_qcreate(enp, index, index, EFX_RXQ_TYPE_DEFAULT,
1331 esmp, sp->s_rxq_size, srp->sr_id, sep->se_eep, &(srp->sr_erp)))
1332 != 0)
1333 goto fail2;
1334
1335 /* Enable the receive queue */
1336 efx_rx_qenable(srp->sr_erp);
1337
1338 /* Set the water marks */
1339 srp->sr_hiwat = EFX_RXQ_LIMIT(sp->s_rxq_size) * 9 / 10;
1340 srp->sr_lowat = srp->sr_hiwat / 2;
1341
1342 srp->sr_state = SFXGE_RXQ_STARTED;
1343
1344 sfxge_rx_qpoll_start(srp);
1345
1346 /* Try to fill the queue from the pool */
1347 sfxge_rx_qrefill(srp, EFX_RXQ_LIMIT(sp->s_rxq_size));
1348
1349 /*
1350 * If there were insufficient buffers in the pool to reach the at
1351 * least a batch then allocate some.
1352 */
1353 level = srp->sr_added - srp->sr_completed;
1354 if (level < SFXGE_RX_BATCH)
1355 sfxge_rx_qfill(srp, SFXGE_RX_BATCH);
1356
1357 mutex_exit(&(sep->se_lock));
1358
1359 return (0);
1360
1361 fail2:
1362 DTRACE_PROBE(fail2);
1363
1364 /* Clear entries from the buffer table */
1365 sfxge_sram_buf_tbl_clear(sp, srp->sr_id,
1366 EFX_RXQ_NBUFS(sp->s_rxq_size));
1367
1368 fail1:
1369 DTRACE_PROBE1(fail1, int, rc);
1370
1371 mutex_exit(&(sep->se_lock));
1372
1373 return (rc);
1374 }
1375
1376 static void
1377 sfxge_rx_qflow_complete(sfxge_rxq_t *srp, sfxge_rx_flow_t *srfp)
1378 {
1379 mblk_t *mp;
1380 struct ether_header *etherhp;
1381 struct ip *iphp;
1382 struct tcphdr *thp;
1383
1384 if (srfp->srf_mp == NULL)
1385 return;
1386
1387 mp = srfp->srf_mp;
1388 etherhp = srfp->srf_etherhp;
1389 iphp = srfp->srf_iphp;
1390 thp = srfp->srf_last_thp;
1391
1392 ASSERT3U(((etherhp->ether_type == htons(ETHERTYPE_VLAN)) ?
1393 sizeof (struct ether_vlan_header) :
1394 sizeof (struct ether_header)) +
1395 srfp->srf_len, ==, msgdsize(mp));
1396
1397 ASSERT3U(srfp->srf_len & 0xffff, ==, srfp->srf_len);
1398 iphp->ip_len = htons(srfp->srf_len);
1399
1400 srfp->srf_first_thp->th_ack = thp->th_ack;
1401 srfp->srf_first_thp->th_win = thp->th_win;
1402 srfp->srf_first_thp->th_flags = thp->th_flags;
1403
1404 DTRACE_PROBE2(flow_complete, uint32_t, srfp->srf_tag,
1405 size_t, srfp->srf_len);
1406
1407 srfp->srf_mp = NULL;
1408 srfp->srf_len = 0;
1409
1410 ASSERT(mp->b_next == NULL);
1411 *(srp->sr_mpp) = mp;
1412 srp->sr_mpp = &(mp->b_next);
1413 }
1414
1415 static boolean_t
1416 sfxge_rx_qflow_add(sfxge_rxq_t *srp, sfxge_rx_flow_t *srfp,
1417 sfxge_rx_packet_t *srpp, clock_t now)
1418 {
1419 sfxge_t *sp = srp->sr_sp;
1420 struct ether_header *etherhp = srpp->srp_etherhp;
1421 struct ip *iphp = srpp->srp_iphp;
1422 struct tcphdr *thp = srpp->srp_thp;
1423 size_t off = srpp->srp_off;
1424 size_t size = (size_t)(srpp->srp_size);
1425 mblk_t *mp = srpp->srp_mp;
1426 uint32_t seq;
1427 unsigned int shift;
1428
1429 ASSERT3U(MBLKL(mp), ==, off + size);
1430 ASSERT3U(DB_CKSUMFLAGS(mp), ==,
1431 HCK_FULLCKSUM | HCK_FULLCKSUM_OK | HCK_IPV4_HDRCKSUM);
1432
1433 seq = htonl(thp->th_seq);
1434
1435 /*
1436 * If the time between this segment and the last is greater than RTO
1437 * then consider this a new flow.
1438 */
1439 if (now - srfp->srf_lbolt > srp->sr_rto) {
1440 srfp->srf_count = 1;
1441 srfp->srf_seq = seq + size;
1442
1443 goto fail1;
1444 }
1445
1446 if (seq != srfp->srf_seq) {
1447 if (srfp->srf_count > SFXGE_SLOW_START)
1448 srfp->srf_count = SFXGE_SLOW_START;
1449
1450 srfp->srf_count >>= 1;
1451
1452 srfp->srf_count++;
1453 srfp->srf_seq = seq + size;
1454
1455 goto fail2;
1456 }
1457
1458 /* Update the in-order segment count and sequence number */
1459 srfp->srf_count++;
1460 srfp->srf_seq = seq + size;
1461
1462 /* Don't merge across pure ACK, URG, SYN or RST segments */
1463 if (size == 0 || thp->th_flags & (TH_URG | TH_SYN | TH_RST) ||
1464 thp->th_urp != 0)
1465 goto fail3;
1466
1467 /*
1468 * If the in-order segment count has not yet reached the slow-start
1469 * threshold then we cannot coalesce.
1470 */
1471 if (srfp->srf_count < SFXGE_SLOW_START)
1472 goto fail4;
1473
1474 /* Scale up the packet size from 4k (the maximum being 64k) */
1475 ASSERT3U(srfp->srf_count, >=, SFXGE_SLOW_START);
1476 shift = MIN(srfp->srf_count - SFXGE_SLOW_START + 12, 16);
1477 if (srfp->srf_len + size >= (1 << shift))
1478 sfxge_rx_qflow_complete(srp, srfp);
1479
1480 ASSERT(mp->b_cont == NULL);
1481
1482 #ifdef _USE_GLD_V3_SOL10
1483 /*
1484 * The IP and UDP layers in Solaris 10 have slow paths for
1485 * handling mblks with more than 2 fragments.
1486 * UDP: see OpenSolaris CR 6305037
1487 * IP: see <http://www.mail-archive.com/networking-discuss@
1488 * opensolaris.org/msg07366.html>
1489 */
1490 if (srfp->srf_mp && srfp->srf_mp->b_cont) {
1491 sfxge_rx_qflow_complete(srp, srfp);
1492 }
1493 #endif
1494
1495 if (srfp->srf_mp == NULL) {
1496 /* First packet in this flow */
1497 srfp->srf_etherhp = etherhp;
1498 srfp->srf_iphp = iphp;
1499 srfp->srf_first_thp = srfp->srf_last_thp = thp;
1500
1501 ASSERT3P(mp->b_cont, ==, NULL);
1502 srfp->srf_mp = mp;
1503 srfp->srf_mpp = &(mp->b_cont);
1504
1505 srfp->srf_len = ntohs(iphp->ip_len);
1506
1507 /*
1508 * If the flow is not already in the list of occupied flows then
1509 * add it.
1510 */
1511 if (srfp->srf_next == NULL &&
1512 srp->sr_srfpp != &(srfp->srf_next)) {
1513 *(srp->sr_srfpp) = srfp;
1514 srp->sr_srfpp = &(srfp->srf_next);
1515 }
1516 } else {
1517 /* Later packet in this flow - skip TCP header */
1518 srfp->srf_last_thp = thp;
1519
1520 mp->b_rptr += off;
1521 ASSERT3U(MBLKL(mp), ==, size);
1522
1523 ASSERT3P(mp->b_cont, ==, NULL);
1524 *(srfp->srf_mpp) = mp;
1525 srfp->srf_mpp = &(mp->b_cont);
1526
1527 srfp->srf_len += size;
1528
1529 ASSERT(srfp->srf_next != NULL ||
1530 srp->sr_srfpp == &(srfp->srf_next));
1531 }
1532
1533 DTRACE_PROBE2(flow_add, uint32_t, srfp->srf_tag, size_t, size);
1534
1535 /*
1536 * Try to align coalesced segments on push boundaries, unless they
1537 * are too frequent.
1538 */
1539 if (sp->s_rx_coalesce_mode == SFXGE_RX_COALESCE_ALLOW_PUSH &&
1540 thp->th_flags & TH_PUSH)
1541 sfxge_rx_qflow_complete(srp, srfp);
1542
1543 srfp->srf_lbolt = now;
1544 return (B_TRUE);
1545
1546 fail4:
1547 fail3:
1548 fail2:
1549 fail1:
1550 sfxge_rx_qflow_complete(srp, srfp);
1551
1552 srfp->srf_lbolt = now;
1553 return (B_FALSE);
1554 }
1555
1556 void
1557 sfxge_rx_qpacket_coalesce(sfxge_rxq_t *srp)
1558 {
1559 sfxge_t *sp = srp->sr_sp;
1560 clock_t now;
1561 mblk_t *mp;
1562 sfxge_rx_flow_t *srfp;
1563
1564 ASSERT(sp->s_rx_coalesce_mode != SFXGE_RX_COALESCE_OFF);
1565
1566 now = ddi_get_lbolt();
1567
1568 mp = srp->sr_mp;
1569
1570 srp->sr_mp = NULL;
1571 srp->sr_mpp = &(srp->sr_mp);
1572
1573 /* Start with the last flow to be appended to */
1574 srfp = *(srp->sr_srfpp);
1575
1576 while (mp != NULL) {
1577 frtn_t *freep;
1578 sfxge_rx_packet_t *srpp;
1579 struct ether_header *etherhp;
1580 struct ip *iphp;
1581 struct tcphdr *thp;
1582 size_t off;
1583 size_t size;
1584 uint16_t ether_tci;
1585 uint16_t hash;
1586 uint32_t tag;
1587 mblk_t *next;
1588
1589 next = mp->b_next;
1590 mp->b_next = NULL;
1591
1592 if (next != NULL)
1593 prefetch_read_many(next);
1594
1595 freep = DB_FRTNP(mp);
1596 /*LINTED*/
1597 srpp = (sfxge_rx_packet_t *)(freep->free_arg);
1598 ASSERT3P(srpp->srp_mp, ==, mp);
1599
1600 /* If the packet is not TCP then we cannot coalesce it */
1601 if (~(srpp->srp_flags) & EFX_PKT_TCP)
1602 goto reject;
1603
1604 /*
1605 * If the packet is not fully checksummed then we cannot
1606 * coalesce it.
1607 */
1608 if (~(srpp->srp_flags) & (EFX_CKSUM_TCPUDP | EFX_CKSUM_IPV4))
1609 goto reject;
1610
1611 /* Parse the TCP header */
1612 sfxge_tcp_parse(mp, ðerhp, &iphp, &thp, &off,
1613 &size);
1614 ASSERT(etherhp != NULL);
1615 ASSERT(iphp != NULL);
1616 ASSERT(thp != NULL);
1617 ASSERT(off != 0);
1618
1619 if ((iphp->ip_off & ~htons(IP_DF)) != 0)
1620 goto reject;
1621
1622 if (etherhp->ether_type == htons(ETHERTYPE_VLAN)) {
1623 struct ether_vlan_header *ethervhp;
1624
1625 ethervhp = (struct ether_vlan_header *)etherhp;
1626 ether_tci = ethervhp->ether_tci;
1627 } else {
1628 ether_tci = 0;
1629 }
1630
1631 /*
1632 * Make sure any minimum length padding is stripped
1633 * before we try to add the packet to a flow.
1634 */
1635 ASSERT3U(sp->s_rx_prefix_size + MBLKL(mp), ==,
1636 (size_t)(srpp->srp_size));
1637 ASSERT3U(sp->s_rx_prefix_size + off + size, <=,
1638 (size_t)(srpp->srp_size));
1639
1640 if (sp->s_rx_prefix_size + off + size <
1641 (size_t)(srpp->srp_size))
1642 mp->b_wptr = mp->b_rptr + off + size;
1643
1644 /*
1645 * If there is no current flow, or the segment does not match
1646 * the current flow then we must attempt to look up the
1647 * correct flow in the table.
1648 */
1649 if (srfp == NULL)
1650 goto lookup;
1651
1652 if (srfp->srf_saddr != iphp->ip_src.s_addr ||
1653 srfp->srf_daddr != iphp->ip_dst.s_addr)
1654 goto lookup;
1655
1656 if (srfp->srf_sport != thp->th_sport ||
1657 srfp->srf_dport != thp->th_dport)
1658 goto lookup;
1659
1660 if (srfp->srf_tci != ether_tci)
1661 goto lookup;
1662
1663 add:
1664 ASSERT(srfp != NULL);
1665
1666 srpp->srp_etherhp = etherhp;
1667 srpp->srp_iphp = iphp;
1668 srpp->srp_thp = thp;
1669 srpp->srp_off = off;
1670
1671 ASSERT3U(size, <, (1 << 16));
1672 srpp->srp_size = (uint16_t)size;
1673
1674 /* Try to append the packet to the flow */
1675 if (!sfxge_rx_qflow_add(srp, srfp, srpp, now))
1676 goto reject;
1677
1678 mp = next;
1679 continue;
1680
1681 lookup:
1682 /*
1683 * If there is a prefix area then read the hash from that,
1684 * otherwise calculate it.
1685 */
1686 if (sp->s_rx_prefix_size != 0) {
1687 hash = EFX_RX_HASH_VALUE(EFX_RX_HASHALG_LFSR,
1688 DB_BASE(mp));
1689 } else {
1690 SFXGE_TCP_HASH(
1691 ntohl(iphp->ip_src.s_addr),
1692 ntohs(thp->th_sport),
1693 ntohl(iphp->ip_dst.s_addr),
1694 ntohs(thp->th_dport),
1695 hash);
1696 }
1697
1698 srfp = &(srp->sr_flow[(hash >> 6) % SFXGE_MAX_FLOW]);
1699 tag = (uint32_t)hash + 1; /* Make sure it's not zero */
1700
1701 /*
1702 * If the flow we have found does not match the hash then
1703 * it may be an unused flow, or it may be stale.
1704 */
1705 if (tag != srfp->srf_tag) {
1706 if (srfp->srf_count != 0) {
1707 if (now - srfp->srf_lbolt <= srp->sr_rto)
1708 goto reject;
1709 }
1710
1711 if (srfp->srf_mp != NULL)
1712 goto reject;
1713
1714 /* Start a new flow */
1715 ASSERT(srfp->srf_next == NULL);
1716
1717 srfp->srf_tag = tag;
1718
1719 srfp->srf_saddr = iphp->ip_src.s_addr;
1720 srfp->srf_daddr = iphp->ip_dst.s_addr;
1721 srfp->srf_sport = thp->th_sport;
1722 srfp->srf_dport = thp->th_dport;
1723 srfp->srf_tci = ether_tci;
1724
1725 srfp->srf_count = 0;
1726 srfp->srf_seq = ntohl(thp->th_seq);
1727
1728 srfp->srf_lbolt = now;
1729 goto add;
1730 }
1731
1732 /*
1733 * If the flow we have found does match the hash then it could
1734 * still be an alias.
1735 */
1736 if (srfp->srf_saddr != iphp->ip_src.s_addr ||
1737 srfp->srf_daddr != iphp->ip_dst.s_addr)
1738 goto reject;
1739
1740 if (srfp->srf_sport != thp->th_sport ||
1741 srfp->srf_dport != thp->th_dport)
1742 goto reject;
1743
1744 if (srfp->srf_tci != ether_tci)
1745 goto reject;
1746
1747 goto add;
1748
1749 reject:
1750 *(srp->sr_mpp) = mp;
1751 srp->sr_mpp = &(mp->b_next);
1752
1753 mp = next;
1754 }
1755 }
1756
1757 void
1758 sfxge_rx_qcomplete(sfxge_rxq_t *srp, boolean_t eop)
1759 {
1760 sfxge_t *sp = srp->sr_sp;
1761 unsigned int index = srp->sr_index;
1762 sfxge_evq_t *sep = sp->s_sep[index];
1763 unsigned int completed;
1764 sfxge_rx_fpp_t *srfppp = &(srp->sr_fpp);
1765 unsigned int level;
1766
1767 ASSERT(mutex_owned(&(sep->se_lock)));
1768
1769 ASSERT(srp->sr_mp == NULL);
1770 ASSERT(srp->sr_mpp == &(srp->sr_mp));
1771
1772 completed = srp->sr_completed;
1773 while (completed != srp->sr_pending) {
1774 unsigned int id;
1775 sfxge_rx_packet_t *srpp;
1776 mblk_t *mp;
1777 size_t size;
1778 uint16_t flags;
1779
1780 id = completed++ & (sp->s_rxq_size - 1);
1781
1782 if (srp->sr_pending - completed >= 4) {
1783 unsigned int prefetch;
1784
1785 prefetch = (id + 4) & (sp->s_rxq_size - 1);
1786
1787 srpp = srp->sr_srpp[prefetch];
1788 ASSERT(srpp != NULL);
1789
1790 mp = srpp->srp_mp;
1791 prefetch_read_many(mp->b_datap);
1792 } else if (completed == srp->sr_pending) {
1793 prefetch_read_many(srp->sr_mp);
1794 }
1795
1796 srpp = srp->sr_srpp[id];
1797 ASSERT(srpp != NULL);
1798
1799 srp->sr_srpp[id] = NULL;
1800
1801 mp = srpp->srp_mp;
1802 ASSERT(mp->b_cont == NULL);
1803
1804 /* when called from sfxge_rx_qstop() */
1805 if (srp->sr_state != SFXGE_RXQ_STARTED)
1806 goto discard;
1807
1808 if (srpp->srp_flags & (EFX_ADDR_MISMATCH | EFX_DISCARD))
1809 goto discard;
1810
1811 /* Set up the packet length */
1812 ASSERT3P(mp->b_rptr, ==, DB_BASE(mp));
1813 mp->b_rptr += sp->s_rx_prefix_size;
1814
1815 prefetch_read_many(mp->b_rptr);
1816
1817 ASSERT3P(mp->b_wptr, ==, DB_BASE(mp));
1818 mp->b_wptr += (size_t)(srpp->srp_size);
1819 ASSERT3P(mp->b_wptr, <=, DB_LIM(mp));
1820
1821 /* Calculate the maximum packet size */
1822 size = sp->s_mtu;
1823 size += (srpp->srp_flags & EFX_PKT_VLAN_TAGGED) ?
1824 sizeof (struct ether_vlan_header) :
1825 sizeof (struct ether_header);
1826
1827 if (MBLKL(mp) > size)
1828 goto discard;
1829
1830 /* Make the data visible to the kernel */
1831 (void) ddi_dma_sync(srpp->srp_dma_handle, 0,
1832 (size_t)(srpp->srp_size), DDI_DMA_SYNC_FORKERNEL);
1833
1834 /* Check for loopback packets */
1835 if (!(srpp->srp_flags & EFX_PKT_IPV4) &&
1836 !(srpp->srp_flags & EFX_PKT_IPV6)) {
1837 struct ether_header *etherhp;
1838
1839 /*LINTED*/
1840 etherhp = (struct ether_header *)(mp->b_rptr);
1841
1842 if (etherhp->ether_type ==
1843 htons(SFXGE_ETHERTYPE_LOOPBACK)) {
1844 DTRACE_PROBE(loopback);
1845
1846 srp->sr_loopback++;
1847 goto discard;
1848 }
1849 }
1850
1851 /* Set up the checksum information */
1852 flags = 0;
1853
1854 if (srpp->srp_flags & EFX_CKSUM_IPV4) {
1855 ASSERT(srpp->srp_flags & EFX_PKT_IPV4);
1856 flags |= HCK_IPV4_HDRCKSUM;
1857 }
1858
1859 if (srpp->srp_flags & EFX_CKSUM_TCPUDP) {
1860 ASSERT(srpp->srp_flags & EFX_PKT_TCP ||
1861 srpp->srp_flags & EFX_PKT_UDP);
1862 flags |= HCK_FULLCKSUM | HCK_FULLCKSUM_OK;
1863 }
1864
1865 DB_CKSUMSTART(mp) = 0;
1866 DB_CKSUMSTUFF(mp) = 0;
1867 DB_CKSUMEND(mp) = 0;
1868 DB_CKSUMFLAGS(mp) = flags;
1869 DB_CKSUM16(mp) = 0;
1870
1871 /* Add the packet to the tail of the chain */
1872 srfppp->srfpp_loaned++;
1873
1874 ASSERT(mp->b_next == NULL);
1875 *(srp->sr_mpp) = mp;
1876 srp->sr_mpp = &(mp->b_next);
1877
1878 continue;
1879
1880 discard:
1881 /* Return the packet to the pool */
1882 srfppp->srfpp_loaned++;
1883 freeb(mp); /* Equivalent to freemsg() as b_cont==0 */
1884 }
1885 srp->sr_completed = completed;
1886
1887 /* Attempt to coalesce any TCP packets */
1888 if (sp->s_rx_coalesce_mode != SFXGE_RX_COALESCE_OFF)
1889 sfxge_rx_qpacket_coalesce(srp);
1890
1891 /*
1892 * If there are any pending flows and this is the end of the
1893 * poll then they must be completed.
1894 */
1895 if (srp->sr_srfp != NULL && eop) {
1896 sfxge_rx_flow_t *srfp;
1897
1898 srfp = srp->sr_srfp;
1899
1900 srp->sr_srfp = NULL;
1901 srp->sr_srfpp = &(srp->sr_srfp);
1902
1903 do {
1904 sfxge_rx_flow_t *next;
1905
1906 next = srfp->srf_next;
1907 srfp->srf_next = NULL;
1908
1909 sfxge_rx_qflow_complete(srp, srfp);
1910
1911 srfp = next;
1912 } while (srfp != NULL);
1913 }
1914
1915 level = srp->sr_added - srp->sr_completed;
1916
1917 /* If there are any packets then pass them up the stack */
1918 if (srp->sr_mp != NULL) {
1919 mblk_t *mp;
1920
1921 mp = srp->sr_mp;
1922
1923 srp->sr_mp = NULL;
1924 srp->sr_mpp = &(srp->sr_mp);
1925
1926 if (level == 0) {
1927 /* Try to refill ASAP */
1928 sfxge_rx_qrefill(srp, EFX_RXQ_LIMIT(sp->s_rxq_size));
1929 level = srp->sr_added - srp->sr_completed;
1930 }
1931
1932 /*
1933 * If the RXQ is still empty, discard and recycle the
1934 * current entry to ensure that the ring always
1935 * contains at least one descriptor. This ensures that
1936 * the next hardware RX will trigger an event
1937 * (possibly delayed by interrupt moderation) and
1938 * trigger another refill/fill attempt.
1939 *
1940 * Note this drops a complete LRO fragment from the
1941 * start of the batch.
1942 *
1943 * Note also that copymsgchain() does not help with
1944 * resource starvation here, unless we are short of DMA
1945 * mappings.
1946 */
1947 if (level == 0) {
1948 mblk_t *nmp;
1949
1950 srp->sr_kstat.srk_rxq_empty_discard++;
1951 DTRACE_PROBE1(rxq_empty_discard, int, index);
1952 nmp = mp->b_next;
1953 if (nmp)
1954 sfxge_gld_rx_post(sp, index, nmp);
1955 /* as level==0 will swizzle,rxpost below */
1956 freemsg(mp);
1957 } else {
1958 sfxge_gld_rx_post(sp, index, mp);
1959 }
1960 }
1961
1962 /* Top up the queue if necessary */
1963 if (level < srp->sr_hiwat) {
1964 sfxge_rx_qrefill(srp, EFX_RXQ_LIMIT(sp->s_rxq_size));
1965
1966 level = srp->sr_added - srp->sr_completed;
1967 if (level < srp->sr_lowat)
1968 sfxge_rx_qfill(srp, EFX_RXQ_LIMIT(sp->s_rxq_size));
1969 }
1970 }
1971
1972 static unsigned int
1973 sfxge_rx_qloopback(sfxge_t *sp, unsigned int index)
1974 {
1975 sfxge_evq_t *sep = sp->s_sep[index];
1976 sfxge_rxq_t *srp;
1977 unsigned int count;
1978
1979 mutex_enter(&(sep->se_lock));
1980 srp = sp->s_srp[index];
1981 count = srp->sr_loopback;
1982 srp->sr_loopback = 0;
1983 mutex_exit(&(sep->se_lock));
1984
1985 return (count);
1986 }
1987
1988 void
1989 sfxge_rx_qflush_done(sfxge_rxq_t *srp)
1990 {
1991 sfxge_t *sp = srp->sr_sp;
1992 unsigned int index = srp->sr_index;
1993 sfxge_evq_t *sep = sp->s_sep[index];
1994
1995 ASSERT(mutex_owned(&(sep->se_lock)));
1996
1997 /* SFCbug22989: events may be delayed. EVQs are stopped after RXQs */
1998 if ((srp->sr_state != SFXGE_RXQ_INITIALIZED) ||
1999 (srp->sr_flush == SFXGE_FLUSH_DONE))
2000 return;
2001
2002 /* Flush successful: wakeup sfxge_rx_qstop() */
2003 srp->sr_flush = SFXGE_FLUSH_DONE;
2004 cv_broadcast(&(srp->sr_flush_kv));
2005 }
2006
2007 void
2008 sfxge_rx_qflush_failed(sfxge_rxq_t *srp)
2009 {
2010 sfxge_t *sp = srp->sr_sp;
2011 unsigned int index = srp->sr_index;
2012 sfxge_evq_t *sep = sp->s_sep[index];
2013
2014 ASSERT(mutex_owned(&(sep->se_lock)));
2015
2016 /* SFCbug22989: events may be delayed. EVQs are stopped after RXQs */
2017 if ((srp->sr_state != SFXGE_RXQ_INITIALIZED) ||
2018 (srp->sr_flush == SFXGE_FLUSH_DONE))
2019 return;
2020
2021 /* SFCbug22989: events may be delayed. EVQs are stopped after RXQs */
2022 if (srp->sr_state != SFXGE_RXQ_STARTED)
2023 return;
2024
2025 /* Flush failed, so retry until timeout in sfxge_rx_qstop() */
2026 srp->sr_flush = SFXGE_FLUSH_FAILED;
2027 efx_rx_qflush(srp->sr_erp);
2028 }
2029
2030 static void
2031 sfxge_rx_qstop(sfxge_t *sp, unsigned int index)
2032 {
2033 sfxge_evq_t *sep = sp->s_sep[index];
2034 sfxge_rxq_t *srp;
2035 clock_t timeout;
2036
2037 mutex_enter(&(sep->se_lock));
2038
2039 srp = sp->s_srp[index];
2040 ASSERT3U(srp->sr_state, ==, SFXGE_RXQ_STARTED);
2041
2042 sfxge_rx_qpoll_stop(srp);
2043
2044 srp->sr_state = SFXGE_RXQ_INITIALIZED;
2045
2046 if (sp->s_hw_err == SFXGE_HW_OK) {
2047 /* Wait upto 2sec for queue flushing to complete */
2048 srp->sr_flush = SFXGE_FLUSH_PENDING;
2049 efx_rx_qflush(srp->sr_erp);
2050 } else {
2051 /* Do not attempt flush if indication of H/W failure */
2052 srp->sr_flush = SFXGE_FLUSH_DONE;
2053 }
2054
2055 timeout = ddi_get_lbolt() + drv_usectohz(SFXGE_RX_QFLUSH_USEC);
2056
2057 while (srp->sr_flush != SFXGE_FLUSH_DONE) {
2058 if (cv_timedwait(&(srp->sr_flush_kv), &(sep->se_lock),
2059 timeout) < 0) {
2060 /* Timeout waiting for successful flush */
2061 dev_info_t *dip = sp->s_dip;
2062
2063 ddi_driver_name(sp->s_dip),
2064 cmn_err(CE_NOTE,
2065 SFXGE_CMN_ERR "[%s%d] rxq[%d] flush timeout",
2066 ddi_driver_name(dip), ddi_get_instance(dip), index);
2067 break;
2068 }
2069 }
2070
2071 DTRACE_PROBE1(flush, sfxge_flush_state_t, srp->sr_flush);
2072 srp->sr_flush = SFXGE_FLUSH_DONE;
2073
2074 /* Destroy the receive queue */
2075 efx_rx_qdestroy(srp->sr_erp);
2076 srp->sr_erp = NULL;
2077
2078 /* Clear entries from the buffer table */
2079 sfxge_sram_buf_tbl_clear(sp, srp->sr_id,
2080 EFX_RXQ_NBUFS(sp->s_rxq_size));
2081
2082 /*
2083 * Free any unused RX packets which had descriptors on the RXQ
2084 * Packets will be discard as state != STARTED
2085 */
2086 srp->sr_pending = srp->sr_added;
2087 sfxge_rx_qcomplete(srp, B_TRUE);
2088
2089 ASSERT3U(srp->sr_completed, ==, srp->sr_pending);
2090
2091 srp->sr_added = 0;
2092 srp->sr_pending = 0;
2093 srp->sr_completed = 0;
2094 srp->sr_loopback = 0;
2095
2096 srp->sr_lowat = 0;
2097 srp->sr_hiwat = 0;
2098
2099 mutex_exit(&(sep->se_lock));
2100 }
2101
2102 static void
2103 sfxge_rx_kstat_fini(sfxge_rxq_t *srp)
2104 {
2105 kstat_delete(srp->sr_ksp);
2106 srp->sr_ksp = NULL;
2107 }
2108
2109 static void
2110 sfxge_rx_qfini(sfxge_t *sp, unsigned int index)
2111 {
2112 sfxge_rxq_t *srp = sp->s_srp[index];
2113
2114 ASSERT3U(srp->sr_state, ==, SFXGE_RXQ_INITIALIZED);
2115
2116 sp->s_srp[index] = NULL;
2117 srp->sr_state = SFXGE_RXQ_UNINITIALIZED;
2118
2119 sfxge_rx_kstat_fini(srp);
2120
2121 /* Empty the pool */
2122 sfxge_rx_qfpp_empty(srp);
2123
2124 srp->sr_index = 0;
2125
2126 kmem_cache_free(sp->s_rqc, srp);
2127 }
2128
2129 static int
2130 sfxge_rx_scale_kstat_update(kstat_t *ksp, int rw)
2131 {
2132 sfxge_t *sp = ksp->ks_private;
2133 sfxge_rx_scale_t *srsp = &(sp->s_rx_scale);
2134 sfxge_intr_t *sip = &(sp->s_intr);
2135 kstat_named_t *knp;
2136 unsigned int index;
2137 unsigned int entry;
2138 unsigned int *freq;
2139 int rc;
2140
2141 ASSERT(mutex_owned(&(srsp->srs_lock)));
2142
2143 if (rw != KSTAT_READ) {
2144 rc = EACCES;
2145 goto fail1;
2146 }
2147
2148 if ((freq = kmem_zalloc(sizeof (unsigned int) * sip->si_nalloc,
2149 KM_NOSLEEP)) == NULL) {
2150 rc = ENOMEM;
2151 goto fail2;
2152 }
2153
2154 for (index = 0; index < sip->si_nalloc; index++)
2155 freq[index] = 0;
2156
2157 for (entry = 0; entry < SFXGE_RX_SCALE_MAX; entry++) {
2158 index = srsp->srs_tbl[entry];
2159
2160 freq[index]++;
2161 }
2162
2163 knp = ksp->ks_data;
2164 for (index = 0; index < sip->si_nalloc; index++) {
2165 knp->value.ui64 = freq[index];
2166 knp++;
2167 }
2168
2169 knp->value.ui64 = srsp->srs_count;
2170
2171 kmem_free(freq, sizeof (unsigned int) * sip->si_nalloc);
2172
2173 return (0);
2174
2175 fail2:
2176 DTRACE_PROBE(fail2);
2177 fail1:
2178 DTRACE_PROBE1(fail1, int, rc);
2179 return (rc);
2180 }
2181
2182 static int
2183 sfxge_rx_scale_kstat_init(sfxge_t *sp)
2184 {
2185 dev_info_t *dip = sp->s_dip;
2186 sfxge_rx_scale_t *srsp = &(sp->s_rx_scale);
2187 sfxge_intr_t *sip = &(sp->s_intr);
2188 char name[MAXNAMELEN];
2189 kstat_t *ksp;
2190 kstat_named_t *knp;
2191 unsigned int index;
2192 int rc;
2193
2194 /* Create the set */
2195 (void) snprintf(name, MAXNAMELEN - 1, "%s_rss", ddi_driver_name(dip));
2196
2197 if ((ksp = kstat_create((char *)ddi_driver_name(dip),
2198 ddi_get_instance(dip), name, "rss", KSTAT_TYPE_NAMED,
2199 sip->si_nalloc + 1, 0)) == NULL) {
2200 rc = ENOMEM;
2201 goto fail1;
2202 }
2203
2204 srsp->srs_ksp = ksp;
2205
2206 ksp->ks_update = sfxge_rx_scale_kstat_update;
2207 ksp->ks_private = sp;
2208 ksp->ks_lock = &(srsp->srs_lock);
2209
2210 /* Initialise the named stats */
2211 knp = ksp->ks_data;
2212 for (index = 0; index < sip->si_nalloc; index++) {
2213 char name[MAXNAMELEN];
2214
2215 (void) snprintf(name, MAXNAMELEN - 1, "evq%04d_count", index);
2216 kstat_named_init(knp, name, KSTAT_DATA_UINT64);
2217 knp++;
2218 }
2219
2220 kstat_named_init(knp, "scale", KSTAT_DATA_UINT64);
2221
2222 kstat_install(ksp);
2223 return (0);
2224
2225 fail1:
2226 DTRACE_PROBE1(fail1, int, rc);
2227
2228 return (rc);
2229 }
2230
2231 static void
2232 sfxge_rx_scale_kstat_fini(sfxge_t *sp)
2233 {
2234 sfxge_rx_scale_t *srsp = &(sp->s_rx_scale);
2235
2236 /* Destroy the set */
2237 kstat_delete(srsp->srs_ksp);
2238 srsp->srs_ksp = NULL;
2239 }
2240
2241
2242 unsigned int
2243 sfxge_rx_scale_prop_get(sfxge_t *sp)
2244 {
2245 int rx_scale;
2246
2247 rx_scale = ddi_prop_get_int(DDI_DEV_T_ANY, sp->s_dip,
2248 DDI_PROP_DONTPASS, "rx_scale_count",
2249 SFXGE_RX_SCALE_MAX);
2250 /* 0 and all -ve numbers sets to number of logical CPUs */
2251 if (rx_scale <= 0)
2252 rx_scale = ncpus;
2253
2254 return (rx_scale);
2255 }
2256
2257
2258 static int
2259 sfxge_rx_scale_init(sfxge_t *sp)
2260 {
2261 sfxge_rx_scale_t *srsp = &(sp->s_rx_scale);
2262 sfxge_intr_t *sip = &(sp->s_intr);
2263 int rc;
2264
2265 ASSERT3U(srsp->srs_state, ==, SFXGE_RX_SCALE_UNINITIALIZED);
2266
2267 /* Create tables for CPU, core, cache and chip counts */
2268 srsp->srs_cpu = kmem_zalloc(sizeof (unsigned int) * NCPU, KM_SLEEP);
2269 #ifdef _USE_CPU_PHYSID
2270 srsp->srs_core = kmem_zalloc(sizeof (unsigned int) * NCPU, KM_SLEEP);
2271 srsp->srs_cache = kmem_zalloc(sizeof (unsigned int) * NCPU, KM_SLEEP);
2272 srsp->srs_chip = kmem_zalloc(sizeof (unsigned int) * NCPU, KM_SLEEP);
2273 #endif
2274
2275 mutex_init(&(srsp->srs_lock), NULL, MUTEX_DRIVER, NULL);
2276
2277 /* We need at least one event queue */
2278 srsp->srs_count = sfxge_rx_scale_prop_get(sp);
2279 if (srsp->srs_count > sip->si_nalloc)
2280 srsp->srs_count = sip->si_nalloc;
2281 if (srsp->srs_count < 1)
2282 srsp->srs_count = 1;
2283
2284 /* Set up the kstats */
2285 if ((rc = sfxge_rx_scale_kstat_init(sp)) != 0)
2286 goto fail1;
2287
2288 srsp->srs_state = SFXGE_RX_SCALE_INITIALIZED;
2289
2290 return (0);
2291
2292 fail1:
2293 DTRACE_PROBE1(fail1, int, rc);
2294 mutex_destroy(&(srsp->srs_lock));
2295
2296 return (rc);
2297 }
2298
2299 void
2300 sfxge_rx_scale_update(void *arg)
2301 {
2302 sfxge_t *sp = arg;
2303 sfxge_rx_scale_t *srsp = &(sp->s_rx_scale);
2304 sfxge_intr_t *sip;
2305 processorid_t id;
2306 unsigned int count;
2307 unsigned int *tbl;
2308 unsigned int *rating;
2309 unsigned int entry;
2310 int rc;
2311
2312 mutex_enter(&(srsp->srs_lock));
2313
2314 if (srsp->srs_state != SFXGE_RX_SCALE_STARTED) {
2315 rc = EFAULT;
2316 goto fail1;
2317 }
2318
2319 if ((tbl = kmem_zalloc(sizeof (unsigned int) * SFXGE_RX_SCALE_MAX,
2320 KM_NOSLEEP)) == NULL) {
2321 rc = ENOMEM;
2322 goto fail2;
2323 }
2324
2325 sip = &(sp->s_intr);
2326 if ((rating = kmem_zalloc(sizeof (unsigned int) * sip->si_nalloc,
2327 KM_NOSLEEP)) == NULL) {
2328 rc = ENOMEM;
2329 goto fail3;
2330 }
2331
2332 mutex_enter(&cpu_lock);
2333
2334 /*
2335 * Substract any current CPU, core, cache and chip usage from the
2336 * global contention tables.
2337 */
2338 for (id = 0; id < NCPU; id++) {
2339 ASSERT3U(sfxge_cpu[id], >=, srsp->srs_cpu[id]);
2340 sfxge_cpu[id] -= srsp->srs_cpu[id];
2341 srsp->srs_cpu[id] = 0;
2342
2343 #ifdef _USE_CPU_PHYSID
2344 ASSERT3U(sfxge_core[id], >=, srsp->srs_core[id]);
2345 sfxge_core[id] -= srsp->srs_core[id];
2346 srsp->srs_core[id] = 0;
2347
2348 ASSERT3U(sfxge_cache[id], >=, srsp->srs_cache[id]);
2349 sfxge_cache[id] -= srsp->srs_cache[id];
2350 srsp->srs_cache[id] = 0;
2351
2352 ASSERT3U(sfxge_chip[id], >=, srsp->srs_chip[id]);
2353 sfxge_chip[id] -= srsp->srs_chip[id];
2354 srsp->srs_chip[id] = 0;
2355 #endif
2356 }
2357
2358 ASSERT(srsp->srs_count != 0);
2359
2360 /* Choose as many event queues as we need */
2361 for (count = 0; count < srsp->srs_count; count++) {
2362 unsigned int index;
2363 sfxge_evq_t *sep;
2364 unsigned int choice;
2365 unsigned int choice_rating;
2366
2367 bzero(rating, sizeof (unsigned int) * sip->si_nalloc);
2368
2369 /*
2370 * Rate each event queue on its global level of CPU
2371 * contention.
2372 */
2373 for (index = 0; index < sip->si_nalloc; index++) {
2374 sep = sp->s_sep[index];
2375
2376 id = sep->se_cpu_id;
2377 rating[index] += sfxge_cpu[id];
2378
2379 #ifdef _USE_CPU_PHYSID
2380 id = sep->se_core_id;
2381 rating[index] += sfxge_core[id];
2382
2383 id = sep->se_cache_id;
2384 rating[index] += sfxge_cache[id];
2385
2386 id = sep->se_chip_id;
2387 rating[index] += sfxge_chip[id];
2388 #endif
2389 }
2390
2391 /* Choose the queue with the lowest CPU contention */
2392 choice = 0;
2393 choice_rating = rating[0];
2394
2395 for (index = 1; index < sip->si_nalloc; index++) {
2396 if (rating[index] < choice_rating) {
2397 choice = index;
2398 choice_rating = rating[index];
2399 }
2400 }
2401
2402 /* Add our choice to the condensed RSS table */
2403 tbl[count] = choice;
2404
2405 /* Add information to the global contention tables */
2406 sep = sp->s_sep[choice];
2407
2408 id = sep->se_cpu_id;
2409 srsp->srs_cpu[id]++;
2410 sfxge_cpu[id]++;
2411
2412 #ifdef _USE_CPU_PHYSID
2413 id = sep->se_core_id;
2414 srsp->srs_core[id]++;
2415 sfxge_core[id]++;
2416
2417 id = sep->se_cache_id;
2418 srsp->srs_cache[id]++;
2419 sfxge_cache[id]++;
2420
2421 id = sep->se_chip_id;
2422 srsp->srs_chip[id]++;
2423 sfxge_chip[id]++;
2424 #endif
2425 }
2426
2427 mutex_exit(&cpu_lock);
2428
2429 /* Build the expanded RSS table */
2430 count = 0;
2431 for (entry = 0; entry < SFXGE_RX_SCALE_MAX; entry++) {
2432 unsigned int index;
2433
2434 index = tbl[count];
2435 count = (count + 1) % srsp->srs_count;
2436
2437 srsp->srs_tbl[entry] = index;
2438 }
2439
2440 /* Program the expanded RSS table into the hardware */
2441 (void) efx_rx_scale_tbl_set(sp->s_enp, srsp->srs_tbl,
2442 SFXGE_RX_SCALE_MAX);
2443
2444 mutex_exit(&(srsp->srs_lock));
2445 kmem_free(rating, sizeof (unsigned int) * sip->si_nalloc);
2446 kmem_free(tbl, sizeof (unsigned int) * SFXGE_RX_SCALE_MAX);
2447 return;
2448
2449 fail3:
2450 DTRACE_PROBE(fail3);
2451 kmem_free(tbl, sizeof (unsigned int) * SFXGE_RX_SCALE_MAX);
2452 fail2:
2453 DTRACE_PROBE(fail2);
2454 fail1:
2455 DTRACE_PROBE1(fail1, int, rc);
2456
2457 mutex_exit(&(srsp->srs_lock));
2458 }
2459
2460 static int
2461 sfxge_rx_scale_start(sfxge_t *sp)
2462 {
2463 sfxge_rx_scale_t *srsp = &(sp->s_rx_scale);
2464 const efx_nic_cfg_t *encp;
2465 int rc;
2466
2467 mutex_enter(&(srsp->srs_lock));
2468
2469 ASSERT3U(srsp->srs_state, ==, SFXGE_RX_SCALE_INITIALIZED);
2470
2471 /* Clear down the RSS table */
2472 bzero(srsp->srs_tbl, sizeof (unsigned int) * SFXGE_RX_SCALE_MAX);
2473
2474 (void) efx_rx_scale_tbl_set(sp->s_enp, srsp->srs_tbl,
2475 SFXGE_RX_SCALE_MAX);
2476
2477 /* Make sure the LFSR hash is selected */
2478 encp = efx_nic_cfg_get(sp->s_enp);
2479 if ((rc = efx_rx_scale_mode_set(sp->s_enp, EFX_RX_HASHALG_LFSR, 0,
2480 (encp->enc_features & EFX_FEATURE_LFSR_HASH_INSERT))) != 0)
2481 goto fail1;
2482
2483 srsp->srs_state = SFXGE_RX_SCALE_STARTED;
2484
2485 mutex_exit(&(srsp->srs_lock));
2486
2487 /* sfxge_t->s_state_lock held */
2488 (void) ddi_taskq_dispatch(sp->s_tqp, sfxge_rx_scale_update, sp,
2489 DDI_SLEEP);
2490
2491 return (0);
2492
2493 fail1:
2494 DTRACE_PROBE1(fail1, int, rc);
2495
2496 mutex_exit(&(srsp->srs_lock));
2497
2498 return (rc);
2499 }
2500
2501 int
2502 sfxge_rx_scale_count_get(sfxge_t *sp, unsigned int *countp)
2503 {
2504 sfxge_rx_scale_t *srsp = &(sp->s_rx_scale);
2505 int rc;
2506
2507 mutex_enter(&(srsp->srs_lock));
2508
2509 if (srsp->srs_state != SFXGE_RX_SCALE_INITIALIZED &&
2510 srsp->srs_state != SFXGE_RX_SCALE_STARTED) {
2511 rc = ENOTSUP;
2512 goto fail1;
2513 }
2514
2515 *countp = srsp->srs_count;
2516
2517 mutex_exit(&(srsp->srs_lock));
2518
2519 return (0);
2520
2521 fail1:
2522 DTRACE_PROBE1(fail1, int, rc);
2523
2524 mutex_exit(&(srsp->srs_lock));
2525
2526 return (rc);
2527 }
2528
2529 int
2530 sfxge_rx_scale_count_set(sfxge_t *sp, unsigned int count)
2531 {
2532 sfxge_rx_scale_t *srsp = &(sp->s_rx_scale);
2533 sfxge_intr_t *sip = &(sp->s_intr);
2534 int dispatch = 1;
2535 int rc;
2536
2537 if (count < 1 || count > sip->si_nalloc) {
2538 rc = EINVAL;
2539 goto fail1;
2540 }
2541
2542 mutex_enter(&(srsp->srs_lock));
2543
2544 if (srsp->srs_state != SFXGE_RX_SCALE_INITIALIZED &&
2545 srsp->srs_state != SFXGE_RX_SCALE_STARTED) {
2546 rc = ENOTSUP;
2547 goto fail2;
2548 }
2549
2550 srsp->srs_count = count;
2551
2552 if (srsp->srs_state != SFXGE_RX_SCALE_STARTED)
2553 dispatch = 0;
2554
2555 mutex_exit(&(srsp->srs_lock));
2556
2557 if (dispatch)
2558 /* no locks held */
2559 (void) ddi_taskq_dispatch(sp->s_tqp, sfxge_rx_scale_update, sp,
2560 DDI_SLEEP);
2561
2562 return (0);
2563
2564 fail2:
2565 DTRACE_PROBE(fail2);
2566
2567 mutex_exit(&(srsp->srs_lock));
2568
2569 fail1:
2570 DTRACE_PROBE1(fail1, int, rc);
2571
2572 return (rc);
2573 }
2574
2575 static void
2576 sfxge_rx_scale_stop(sfxge_t *sp)
2577 {
2578 sfxge_rx_scale_t *srsp = &(sp->s_rx_scale);
2579 processorid_t id;
2580
2581 mutex_enter(&(srsp->srs_lock));
2582
2583 ASSERT3U(srsp->srs_state, ==, SFXGE_RX_SCALE_STARTED);
2584
2585 srsp->srs_state = SFXGE_RX_SCALE_INITIALIZED;
2586
2587 mutex_enter(&cpu_lock);
2588
2589 /*
2590 * Substract any current CPU, core, cache and chip usage from the
2591 * global contention tables.
2592 */
2593 for (id = 0; id < NCPU; id++) {
2594 ASSERT3U(sfxge_cpu[id], >=, srsp->srs_cpu[id]);
2595 sfxge_cpu[id] -= srsp->srs_cpu[id];
2596 srsp->srs_cpu[id] = 0;
2597
2598 #ifdef _USE_CPU_PHYSID
2599 ASSERT3U(sfxge_core[id], >=, srsp->srs_core[id]);
2600 sfxge_core[id] -= srsp->srs_core[id];
2601 srsp->srs_core[id] = 0;
2602
2603 ASSERT3U(sfxge_cache[id], >=, srsp->srs_cache[id]);
2604 sfxge_cache[id] -= srsp->srs_cache[id];
2605 srsp->srs_cache[id] = 0;
2606
2607 ASSERT3U(sfxge_chip[id], >=, srsp->srs_chip[id]);
2608 sfxge_chip[id] -= srsp->srs_chip[id];
2609 srsp->srs_chip[id] = 0;
2610 #endif
2611 }
2612
2613 mutex_exit(&cpu_lock);
2614
2615 /* Clear down the RSS table */
2616 bzero(srsp->srs_tbl, sizeof (unsigned int) * SFXGE_RX_SCALE_MAX);
2617
2618 (void) efx_rx_scale_tbl_set(sp->s_enp, srsp->srs_tbl,
2619 SFXGE_RX_SCALE_MAX);
2620
2621 mutex_exit(&(srsp->srs_lock));
2622 }
2623
2624 static void
2625 sfxge_rx_scale_fini(sfxge_t *sp)
2626 {
2627 sfxge_rx_scale_t *srsp = &(sp->s_rx_scale);
2628
2629 ASSERT3U(srsp->srs_state, ==, SFXGE_RX_SCALE_INITIALIZED);
2630
2631 srsp->srs_state = SFXGE_RX_SCALE_UNINITIALIZED;
2632
2633 /* Tear down the kstats */
2634 sfxge_rx_scale_kstat_fini(sp);
2635
2636 srsp->srs_count = 0;
2637
2638 mutex_destroy(&(srsp->srs_lock));
2639
2640 /* Destroy tables */
2641 #ifdef _USE_CPU_PHYSID
2642 kmem_free(srsp->srs_chip, sizeof (unsigned int) * NCPU);
2643 srsp->srs_chip = NULL;
2644
2645 kmem_free(srsp->srs_cache, sizeof (unsigned int) * NCPU);
2646 srsp->srs_cache = NULL;
2647
2648 kmem_free(srsp->srs_core, sizeof (unsigned int) * NCPU);
2649 srsp->srs_core = NULL;
2650 #endif
2651 kmem_free(srsp->srs_cpu, sizeof (unsigned int) * NCPU);
2652 srsp->srs_cpu = NULL;
2653 }
2654
2655 int
2656 sfxge_rx_init(sfxge_t *sp)
2657 {
2658 sfxge_intr_t *sip = &(sp->s_intr);
2659 const efx_nic_cfg_t *encp;
2660 char name[MAXNAMELEN];
2661 int index;
2662 int rc;
2663
2664 if (sip->si_state == SFXGE_INTR_UNINITIALIZED) {
2665 rc = EINVAL;
2666 goto fail1;
2667 }
2668
2669 encp = efx_nic_cfg_get(sp->s_enp);
2670 if ((rc = sfxge_rx_scale_init(sp)) != 0)
2671 goto fail2;
2672
2673 (void) snprintf(name, MAXNAMELEN - 1, "%s%d_rx_packet_cache",
2674 ddi_driver_name(sp->s_dip), ddi_get_instance(sp->s_dip));
2675
2676 sp->s_rpc = kmem_cache_create(name, sizeof (sfxge_rx_packet_t),
2677 SFXGE_CPU_CACHE_SIZE, sfxge_rx_packet_ctor, sfxge_rx_packet_dtor,
2678 NULL, sp, NULL, 0);
2679 ASSERT(sp->s_rpc != NULL);
2680
2681 (void) snprintf(name, MAXNAMELEN - 1, "%s%d_rxq_cache",
2682 ddi_driver_name(sp->s_dip), ddi_get_instance(sp->s_dip));
2683
2684 sp->s_rqc = kmem_cache_create(name, sizeof (sfxge_rxq_t),
2685 SFXGE_CPU_CACHE_SIZE, sfxge_rx_qctor, sfxge_rx_qdtor, NULL, sp,
2686 NULL, 0);
2687 ASSERT(sp->s_rqc != NULL);
2688
2689 sp->s_rx_pkt_mem_max = ddi_prop_get_int64(DDI_DEV_T_ANY, sp->s_dip,
2690 DDI_PROP_DONTPASS, "rx_pkt_mem_max", 0); /* disabled */
2691
2692 /* Initialize the receive queue(s) */
2693 for (index = 0; index < sip->si_nalloc; index++) {
2694 if ((rc = sfxge_rx_qinit(sp, index)) != 0)
2695 goto fail3;
2696 }
2697
2698 sp->s_rx_coalesce_mode = ddi_prop_get_int(DDI_DEV_T_ANY, sp->s_dip,
2699 DDI_PROP_DONTPASS, "rx_coalesce_mode", SFXGE_RX_COALESCE_OFF);
2700
2701 return (0);
2702
2703 fail3:
2704 DTRACE_PROBE(fail3);
2705
2706 /* Tear down the receive queue(s) */
2707 while (--index >= 0)
2708 sfxge_rx_qfini(sp, index);
2709
2710 kmem_cache_destroy(sp->s_rqc);
2711 sp->s_rqc = NULL;
2712
2713 kmem_cache_destroy(sp->s_rpc);
2714 sp->s_rpc = NULL;
2715
2716 sfxge_rx_scale_fini(sp);
2717
2718 fail2:
2719 DTRACE_PROBE(fail2);
2720 fail1:
2721 DTRACE_PROBE1(fail1, int, rc);
2722
2723 return (rc);
2724 }
2725
2726 int
2727 sfxge_rx_start(sfxge_t *sp)
2728 {
2729 sfxge_mac_t *smp = &(sp->s_mac);
2730 sfxge_intr_t *sip;
2731 const efx_nic_cfg_t *encp;
2732 int index;
2733 int rc;
2734
2735 mutex_enter(&(smp->sm_lock));
2736
2737 /* Calculate the receive packet buffer size and alignment */
2738 sp->s_rx_buffer_size = EFX_MAC_PDU(sp->s_mtu);
2739
2740 encp = efx_nic_cfg_get(sp->s_enp);
2741 if (encp->enc_features & EFX_FEATURE_LFSR_HASH_INSERT) {
2742 size_t align;
2743
2744 sp->s_rx_prefix_size = EFX_RX_PREFIX_SIZE;
2745
2746 /*
2747 * Place the start of the buffer a prefix length minus 2
2748 * before the start of a cache line. This ensures that the
2749 * last two bytes of the prefix (which is where the LFSR hash
2750 * is located) are in the same cache line as the headers, and
2751 * the IP header is 32-bit aligned.
2752 */
2753 align = SFXGE_CPU_CACHE_SIZE + SFXGE_IP_ALIGN -
2754 EFX_RX_PREFIX_SIZE;
2755
2756 sp->s_rx_buffer_align = align;
2757 sp->s_rx_buffer_size += align;
2758 } else {
2759 sp->s_rx_prefix_size = 0;
2760
2761 /*
2762 * Place the start of the buffer 2 bytes after a cache line
2763 * boundary so that the headers fit into the cache line and
2764 * the IP header is 32-bit aligned.
2765 */
2766
2767 sp->s_rx_buffer_align = SFXGE_IP_ALIGN;
2768 sp->s_rx_buffer_size += SFXGE_IP_ALIGN;
2769 }
2770
2771 /* Initialize the receive module */
2772 if ((rc = efx_rx_init(sp->s_enp)) != 0)
2773 goto fail1;
2774
2775 mutex_exit(&(smp->sm_lock));
2776
2777 if ((rc = sfxge_rx_scale_start(sp)) != 0)
2778 goto fail2;
2779
2780 /* Start the receive queue(s) */
2781 sip = &(sp->s_intr);
2782 for (index = 0; index < sip->si_nalloc; index++) {
2783 if ((rc = sfxge_rx_qstart(sp, index)) != 0)
2784 goto fail3;
2785 }
2786
2787 return (0);
2788
2789 fail3:
2790 DTRACE_PROBE(fail3);
2791
2792 /* Stop the receive queue(s) */
2793 while (--index >= 0)
2794 sfxge_rx_qstop(sp, index);
2795
2796 sfxge_rx_scale_stop(sp);
2797
2798 fail2:
2799 DTRACE_PROBE(fail2);
2800
2801 mutex_enter(&(smp->sm_lock));
2802
2803 /* Tear down the receive module */
2804 efx_rx_fini(sp->s_enp);
2805
2806 fail1:
2807 DTRACE_PROBE1(fail1, int, rc);
2808
2809 mutex_exit(&(smp->sm_lock));
2810
2811 return (rc);
2812 }
2813
2814 void
2815 sfxge_rx_coalesce_mode_get(sfxge_t *sp, sfxge_rx_coalesce_mode_t *modep)
2816 {
2817 *modep = sp->s_rx_coalesce_mode;
2818 }
2819
2820 int
2821 sfxge_rx_coalesce_mode_set(sfxge_t *sp, sfxge_rx_coalesce_mode_t mode)
2822 {
2823 int rc;
2824
2825 switch (mode) {
2826 case SFXGE_RX_COALESCE_OFF:
2827 case SFXGE_RX_COALESCE_DISALLOW_PUSH:
2828 case SFXGE_RX_COALESCE_ALLOW_PUSH:
2829 break;
2830
2831 default:
2832 rc = EINVAL;
2833 goto fail1;
2834 }
2835
2836 sp->s_rx_coalesce_mode = mode;
2837
2838 return (0);
2839
2840 fail1:
2841 DTRACE_PROBE1(fail1, int, rc);
2842
2843 return (rc);
2844 }
2845
2846 void
2847 sfxge_rx_loopback(sfxge_t *sp, unsigned int *countp)
2848 {
2849 sfxge_intr_t *sip = &(sp->s_intr);
2850 int index;
2851
2852 *countp = 0;
2853 for (index = 0; index < sip->si_nalloc; index++)
2854 *countp += sfxge_rx_qloopback(sp, index);
2855 }
2856
2857 int
2858 sfxge_rx_ioctl(sfxge_t *sp, sfxge_rx_ioc_t *srip)
2859 {
2860 int rc;
2861
2862 switch (srip->sri_op) {
2863 case SFXGE_RX_OP_LOOPBACK: {
2864 unsigned int count;
2865
2866 sfxge_rx_loopback(sp, &count);
2867
2868 srip->sri_data = count;
2869
2870 break;
2871 }
2872 default:
2873 rc = ENOTSUP;
2874 goto fail1;
2875 }
2876
2877 return (0);
2878
2879 fail1:
2880 DTRACE_PROBE1(fail1, int, rc);
2881
2882 return (rc);
2883 }
2884
2885 void
2886 sfxge_rx_stop(sfxge_t *sp)
2887 {
2888 sfxge_mac_t *smp = &(sp->s_mac);
2889 sfxge_intr_t *sip = &(sp->s_intr);
2890 efx_nic_t *enp = sp->s_enp;
2891 const efx_nic_cfg_t *encp;
2892 int index;
2893
2894 /* Stop the receive queue(s) */
2895 index = sip->si_nalloc;
2896 while (--index >= 0)
2897 /* TBD: Flush RXQs in parallel; HW has limit + may need retry */
2898 sfxge_rx_qstop(sp, index);
2899
2900 encp = efx_nic_cfg_get(sp->s_enp);
2901 sfxge_rx_scale_stop(sp);
2902
2903 mutex_enter(&(smp->sm_lock));
2904
2905 /* Tear down the receive module */
2906 efx_rx_fini(enp);
2907
2908 sp->s_rx_buffer_align = 0;
2909 sp->s_rx_prefix_size = 0;
2910 sp->s_rx_buffer_size = 0;
2911
2912 mutex_exit(&(smp->sm_lock));
2913 }
2914
2915 unsigned int
2916 sfxge_rx_loaned(sfxge_t *sp)
2917 {
2918 sfxge_intr_t *sip = &(sp->s_intr);
2919 int index;
2920 unsigned int loaned;
2921
2922 ASSERT3U(sip->si_state, ==, SFXGE_INTR_INITIALIZED);
2923
2924 loaned = 0;
2925 for (index = 0; index < sip->si_nalloc; index++) {
2926 sfxge_rxq_t *srp = sp->s_srp[index];
2927 sfxge_evq_t *sep = sp->s_sep[srp->sr_index];
2928
2929 mutex_enter(&(sep->se_lock));
2930
2931 loaned += sfxge_rx_qfpp_swizzle(srp);
2932
2933 mutex_exit(&(sep->se_lock));
2934 }
2935
2936 return (loaned);
2937 }
2938
2939 void
2940 sfxge_rx_fini(sfxge_t *sp)
2941 {
2942 sfxge_intr_t *sip = &(sp->s_intr);
2943 const efx_nic_cfg_t *encp;
2944 int index;
2945
2946 ASSERT3U(sip->si_state, ==, SFXGE_INTR_INITIALIZED);
2947
2948 sp->s_rx_coalesce_mode = SFXGE_RX_COALESCE_OFF;
2949
2950 /* Tear down the receive queue(s) */
2951 index = sip->si_nalloc;
2952 while (--index >= 0)
2953 sfxge_rx_qfini(sp, index);
2954
2955 ASSERT3U(sp->s_rx_pkt_mem_alloc, ==, 0);
2956
2957 kmem_cache_destroy(sp->s_rqc);
2958 sp->s_rqc = NULL;
2959
2960 kmem_cache_destroy(sp->s_rpc);
2961 sp->s_rpc = NULL;
2962
2963 encp = efx_nic_cfg_get(sp->s_enp);
2964 sfxge_rx_scale_fini(sp);
2965 }