1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright 2008-2013 Solarflare Communications Inc. All rights reserved.
24 * Use is subject to license terms.
25 */
26
27 #include <sys/types.h>
28 #include <sys/sysmacros.h>
29 #include <sys/ddi.h>
30 #include <sys/sunddi.h>
31 #include <sys/atomic.h>
32 #include <sys/stream.h>
33 #include <sys/strsun.h>
34 #include <sys/strsubr.h>
35 #include <sys/pattr.h>
36 #include <sys/cpu.h>
37
38 #include <sys/ethernet.h>
39 #include <inet/ip.h>
40
41 #include <netinet/in.h>
42 #include <netinet/ip.h>
43 #include <netinet/tcp.h>
44
45 #include "sfxge.h"
46
47 #include "efx.h"
48
49 /* TXQ flush response timeout (in microseconds) */
50 #define SFXGE_TX_QFLUSH_USEC (2000000)
51 #define EVQ_0 0
52
53 /* See sfxge.conf.private for descriptions */
54 #define SFXGE_TX_DPL_GET_PKT_LIMIT_DEFAULT 4096
55 #define SFXGE_TX_DPL_PUT_PKT_LIMIT_DEFAULT 256
56
57
58 /* Transmit buffer DMA attributes */
59 static ddi_device_acc_attr_t sfxge_tx_buffer_devacc = {
60
61 DDI_DEVICE_ATTR_V0, /* devacc_attr_version */
62 DDI_NEVERSWAP_ACC, /* devacc_attr_endian_flags */
63 DDI_STRICTORDER_ACC /* devacc_attr_dataorder */
64 };
65
66 static ddi_dma_attr_t sfxge_tx_buffer_dma_attr = {
67 DMA_ATTR_V0, /* dma_attr_version */
68 0, /* dma_attr_addr_lo */
69 0xffffffffffffffffull, /* dma_attr_addr_hi */
70 0xffffffffffffffffull, /* dma_attr_count_max */
71 SFXGE_TX_BUFFER_SIZE, /* dma_attr_align */
72 0xffffffff, /* dma_attr_burstsizes */
73 1, /* dma_attr_minxfer */
74 0xffffffffffffffffull, /* dma_attr_maxxfer */
75 0xffffffffffffffffull, /* dma_attr_seg */
76 1, /* dma_attr_sgllen */
77 1, /* dma_attr_granular */
78 0 /* dma_attr_flags */
79 };
80
81 /* Transmit mapping DMA attributes */
82 static ddi_dma_attr_t sfxge_tx_mapping_dma_attr = {
83 DMA_ATTR_V0, /* dma_attr_version */
84 0, /* dma_attr_addr_lo */
85 0xffffffffffffffffull, /* dma_attr_addr_hi */
86 0xffffffffffffffffull, /* dma_attr_count_max */
87 1, /* dma_attr_align */
88 0xffffffff, /* dma_attr_burstsizes */
89 1, /* dma_attr_minxfer */
90 0xffffffffffffffffull, /* dma_attr_maxxfer */
91 0xffffffffffffffffull, /* dma_attr_seg */
92 0x7fffffff, /* dma_attr_sgllen */
93 1, /* dma_attr_granular */
94 0 /* dma_attr_flags */
95 };
96
97 /* Transmit queue DMA attributes */
98 static ddi_device_acc_attr_t sfxge_txq_devacc = {
99
100 DDI_DEVICE_ATTR_V0, /* devacc_attr_version */
101 DDI_NEVERSWAP_ACC, /* devacc_attr_endian_flags */
102 DDI_STRICTORDER_ACC /* devacc_attr_dataorder */
103 };
104
105 static ddi_dma_attr_t sfxge_txq_dma_attr = {
106 DMA_ATTR_V0, /* dma_attr_version */
107 0, /* dma_attr_addr_lo */
108 0xffffffffffffffffull, /* dma_attr_addr_hi */
109 0xffffffffffffffffull, /* dma_attr_count_max */
110 EFX_BUF_SIZE, /* dma_attr_align */
111 0xffffffff, /* dma_attr_burstsizes */
112 1, /* dma_attr_minxfer */
113 0xffffffffffffffffull, /* dma_attr_maxxfer */
114 0xffffffffffffffffull, /* dma_attr_seg */
115 1, /* dma_attr_sgllen */
116 1, /* dma_attr_granular */
117 0 /* dma_attr_flags */
118 };
119
120
121 /*
122 * A sfxge_tx_qdpl_swizzle() can happen when the DPL get list is one packet
123 * under the limit, and must move all packets from the DPL put->get list
124 * Hence this is the real maximum length of the TX DPL get list.
125 */
126 static int
127 sfxge_tx_dpl_get_pkt_max(sfxge_txq_t *stp)
128 {
129 sfxge_tx_dpl_t *stdp = &(stp->st_dpl);
130 return (stdp->get_pkt_limit + stdp->put_pkt_limit - 1);
131 }
132
133
134 static int
135 sfxge_tx_packet_ctor(void *buf, void *arg, int kmflags)
136 {
137 _NOTE(ARGUNUSED(arg, kmflags))
138
139 bzero(buf, sizeof (sfxge_tx_packet_t));
140
141 return (0);
142 }
143
144 static void
145 sfxge_tx_packet_dtor(void *buf, void *arg)
146 {
147 sfxge_tx_packet_t *stpp = buf;
148
149 _NOTE(ARGUNUSED(arg))
150
151 SFXGE_OBJ_CHECK(stpp, sfxge_tx_packet_t);
152 }
153
154 static int
155 sfxge_tx_buffer_ctor(void *buf, void *arg, int kmflags)
156 {
157 sfxge_tx_buffer_t *stbp = buf;
158 sfxge_t *sp = arg;
159 sfxge_dma_buffer_attr_t dma_attr;
160 int rc;
161
162 bzero(buf, sizeof (sfxge_tx_buffer_t));
163
164 dma_attr.sdba_dip = sp->s_dip;
165 dma_attr.sdba_dattrp = &sfxge_tx_buffer_dma_attr;
166 dma_attr.sdba_callback = ((kmflags == KM_SLEEP) ?
167 DDI_DMA_SLEEP : DDI_DMA_DONTWAIT);
168 dma_attr.sdba_length = SFXGE_TX_BUFFER_SIZE;
169 dma_attr.sdba_memflags = DDI_DMA_STREAMING;
170 dma_attr.sdba_devaccp = &sfxge_tx_buffer_devacc;
171 dma_attr.sdba_bindflags = DDI_DMA_WRITE | DDI_DMA_STREAMING;
172 dma_attr.sdba_maxcookies = 1;
173 dma_attr.sdba_zeroinit = B_FALSE;
174
175 if ((rc = sfxge_dma_buffer_create(&(stbp->stb_esm), &dma_attr)) != 0)
176 goto fail1;
177
178 return (0);
179
180 fail1:
181 DTRACE_PROBE1(fail1, int, rc);
182
183 SFXGE_OBJ_CHECK(stbp, sfxge_tx_buffer_t);
184
185 return (-1);
186 }
187
188 static void
189 sfxge_tx_buffer_dtor(void *buf, void *arg)
190 {
191 sfxge_tx_buffer_t *stbp = buf;
192
193 _NOTE(ARGUNUSED(arg))
194
195 sfxge_dma_buffer_destroy(&(stbp->stb_esm));
196
197 SFXGE_OBJ_CHECK(stbp, sfxge_tx_buffer_t);
198 }
199
200 static int
201 sfxge_tx_mapping_ctor(void *buf, void *arg, int kmflags)
202 {
203 sfxge_tx_mapping_t *stmp = buf;
204 sfxge_t *sp = arg;
205 dev_info_t *dip = sp->s_dip;
206 int rc;
207
208 bzero(buf, sizeof (sfxge_tx_mapping_t));
209
210 stmp->stm_sp = sp;
211
212 /* Allocate DMA handle */
213 rc = ddi_dma_alloc_handle(dip, &sfxge_tx_mapping_dma_attr,
214 (kmflags == KM_SLEEP) ? DDI_DMA_SLEEP : DDI_DMA_DONTWAIT,
215 NULL, &(stmp->stm_dma_handle));
216 if (rc != DDI_SUCCESS)
217 goto fail1;
218
219 return (0);
220
221 fail1:
222 DTRACE_PROBE1(fail1, int, rc);
223
224 stmp->stm_sp = NULL;
225
226 SFXGE_OBJ_CHECK(stmp, sfxge_tx_mapping_t);
227
228 return (-1);
229 }
230
231 static void
232 sfxge_tx_mapping_dtor(void *buf, void *arg)
233 {
234 sfxge_tx_mapping_t *stmp = buf;
235
236 _NOTE(ARGUNUSED(arg))
237
238 ASSERT3P(stmp->stm_sp, ==, arg);
239
240 /* Free the DMA handle */
241 ddi_dma_free_handle(&(stmp->stm_dma_handle));
242 stmp->stm_dma_handle = NULL;
243
244 stmp->stm_sp = NULL;
245
246 SFXGE_OBJ_CHECK(stmp, sfxge_tx_mapping_t);
247 }
248
249 static int
250 sfxge_tx_qctor(void *buf, void *arg, int kmflags)
251 {
252 sfxge_txq_t *stp = buf;
253 efsys_mem_t *esmp = &(stp->st_mem);
254 sfxge_t *sp = arg;
255 sfxge_dma_buffer_attr_t dma_attr;
256 sfxge_tx_dpl_t *stdp;
257 int rc;
258
259 /* Compile-time structure layout checks */
260 EFX_STATIC_ASSERT(sizeof (stp->__st_u1.__st_s1) <=
261 sizeof (stp->__st_u1.__st_pad));
262 EFX_STATIC_ASSERT(sizeof (stp->__st_u2.__st_s2) <=
263 sizeof (stp->__st_u2.__st_pad));
264 EFX_STATIC_ASSERT(sizeof (stp->__st_u3.__st_s3) <=
265 sizeof (stp->__st_u3.__st_pad));
266 EFX_STATIC_ASSERT(sizeof (stp->__st_u4.__st_s4) <=
267 sizeof (stp->__st_u4.__st_pad));
268
269 bzero(buf, sizeof (sfxge_txq_t));
270
271 stp->st_sp = sp;
272
273 dma_attr.sdba_dip = sp->s_dip;
274 dma_attr.sdba_dattrp = &sfxge_txq_dma_attr;
275 dma_attr.sdba_callback = DDI_DMA_SLEEP;
276 dma_attr.sdba_length = EFX_TXQ_SIZE(SFXGE_TX_NDESCS);
277 dma_attr.sdba_memflags = DDI_DMA_CONSISTENT;
278 dma_attr.sdba_devaccp = &sfxge_txq_devacc;
279 dma_attr.sdba_bindflags = DDI_DMA_READ | DDI_DMA_CONSISTENT;
280 dma_attr.sdba_maxcookies = EFX_TXQ_NBUFS(SFXGE_TX_NDESCS);
281 dma_attr.sdba_zeroinit = B_FALSE;
282
283 if ((rc = sfxge_dma_buffer_create(esmp, &dma_attr)) != 0)
284 goto fail1;
285
286 /* Allocate some buffer table entries */
287 if ((rc = sfxge_sram_buf_tbl_alloc(sp, EFX_TXQ_NBUFS(SFXGE_TX_NDESCS),
288 &(stp->st_id))) != 0)
289 goto fail2;
290
291 /* Allocate the descriptor array */
292 if ((stp->st_eb = kmem_zalloc(sizeof (efx_buffer_t) *
293 EFX_TXQ_LIMIT(SFXGE_TX_NDESCS), kmflags)) == NULL) {
294 rc = ENOMEM;
295 goto fail3;
296 }
297
298 /* Allocate the context arrays */
299 if ((stp->st_stmp = kmem_zalloc(sizeof (sfxge_tx_mapping_t *) *
300 SFXGE_TX_NDESCS, kmflags)) == NULL) {
301 rc = ENOMEM;
302 goto fail4;
303 }
304
305 if ((stp->st_stbp = kmem_zalloc(sizeof (sfxge_tx_buffer_t *) *
306 SFXGE_TX_NDESCS, kmflags)) == NULL) {
307 rc = ENOMEM;
308 goto fail5;
309 }
310
311 if ((stp->st_mp = kmem_zalloc(sizeof (mblk_t *) *
312 SFXGE_TX_NDESCS, kmflags)) == NULL) {
313 rc = ENOMEM;
314 goto fail6;
315 }
316
317 /* Initialize the deferred packet list */
318 stdp = &(stp->st_dpl);
319 stdp->std_getp = &(stdp->std_get);
320
321 stp->st_unblock = SFXGE_TXQ_NOT_BLOCKED;
322
323 return (0);
324
325 fail6:
326 DTRACE_PROBE(fail6);
327
328 kmem_free(stp->st_stbp, sizeof (sfxge_tx_buffer_t *) * SFXGE_TX_NDESCS);
329 stp->st_stbp = NULL;
330
331 fail5:
332 DTRACE_PROBE(fail5);
333
334 kmem_free(stp->st_stmp,
335 sizeof (sfxge_tx_mapping_t *) * SFXGE_TX_NDESCS);
336 stp->st_stmp = NULL;
337
338 fail4:
339 DTRACE_PROBE(fail4);
340
341 /* Free the descriptor array */
342 kmem_free(stp->st_eb, sizeof (efx_buffer_t) *
343 EFX_TXQ_LIMIT(SFXGE_TX_NDESCS));
344 stp->st_eb = NULL;
345
346 fail3:
347 DTRACE_PROBE(fail3);
348
349 /* Free the buffer table entries */
350 sfxge_sram_buf_tbl_free(sp, stp->st_id, EFX_TXQ_NBUFS(SFXGE_TX_NDESCS));
351 stp->st_id = 0;
352
353 fail2:
354 DTRACE_PROBE(fail2);
355
356 /* Tear down DMA setup */
357 sfxge_dma_buffer_destroy(esmp);
358
359 fail1:
360 DTRACE_PROBE1(fail1, int, rc);
361
362 stp->st_sp = NULL;
363
364 SFXGE_OBJ_CHECK(stp, sfxge_txq_t);
365
366 return (-1);
367 }
368
369 static void
370 sfxge_tx_qdtor(void *buf, void *arg)
371 {
372 sfxge_txq_t *stp = buf;
373 efsys_mem_t *esmp = &(stp->st_mem);
374 sfxge_t *sp = stp->st_sp;
375 sfxge_tx_dpl_t *stdp;
376
377 _NOTE(ARGUNUSED(arg))
378
379 stp->st_unblock = 0;
380
381 /* Tear down the deferred packet list */
382 stdp = &(stp->st_dpl);
383 ASSERT3P(stdp->std_getp, ==, &(stdp->std_get));
384 stdp->std_getp = NULL;
385
386 /* Free the context arrays */
387 kmem_free(stp->st_mp, sizeof (mblk_t *) * SFXGE_TX_NDESCS);
388 stp->st_mp = NULL;
389
390 kmem_free(stp->st_stbp, sizeof (sfxge_tx_buffer_t *) * SFXGE_TX_NDESCS);
391 stp->st_stbp = NULL;
392
393 kmem_free(stp->st_stmp,
394 sizeof (sfxge_tx_mapping_t *) * SFXGE_TX_NDESCS);
395 stp->st_stmp = NULL;
396
397 /* Free the descriptor array */
398 kmem_free(stp->st_eb, sizeof (efx_buffer_t) *
399 EFX_TXQ_LIMIT(SFXGE_TX_NDESCS));
400 stp->st_eb = NULL;
401
402 /* Free the buffer table entries */
403 sfxge_sram_buf_tbl_free(sp, stp->st_id, EFX_TXQ_NBUFS(SFXGE_TX_NDESCS));
404 stp->st_id = 0;
405
406 /* Tear down dma setup */
407 sfxge_dma_buffer_destroy(esmp);
408
409 stp->st_sp = NULL;
410
411 SFXGE_OBJ_CHECK(stp, sfxge_txq_t);
412 }
413
414 static void
415 sfxge_tx_packet_destroy(sfxge_t *sp, sfxge_tx_packet_t *stpp)
416 {
417 kmem_cache_free(sp->s_tpc, stpp);
418 }
419
420 static sfxge_tx_packet_t *
421 sfxge_tx_packet_create(sfxge_t *sp)
422 {
423 sfxge_tx_packet_t *stpp;
424
425 stpp = kmem_cache_alloc(sp->s_tpc, KM_NOSLEEP);
426
427 return (stpp);
428 }
429
430 static inline int
431 sfxge_tx_qfpp_put(sfxge_txq_t *stp, sfxge_tx_packet_t *stpp)
432 {
433 sfxge_tx_fpp_t *stfp = &(stp->st_fpp);
434
435 ASSERT(mutex_owned(&(stp->st_lock)));
436
437 ASSERT3P(stpp->stp_next, ==, NULL);
438 ASSERT3P(stpp->stp_mp, ==, NULL);
439 ASSERT3P(stpp->stp_etherhp, ==, NULL);
440 ASSERT3P(stpp->stp_iphp, ==, NULL);
441 ASSERT3P(stpp->stp_thp, ==, NULL);
442 ASSERT3U(stpp->stp_off, ==, 0);
443 ASSERT3U(stpp->stp_size, ==, 0);
444 ASSERT3U(stpp->stp_mss, ==, 0);
445 ASSERT3U(stpp->stp_dpl_put_len, ==, 0);
446
447 if (stfp->stf_count < SFXGE_TX_FPP_MAX) {
448 /* Add to the start of the list */
449 stpp->stp_next = stfp->stf_stpp;
450 stfp->stf_stpp = stpp;
451 stfp->stf_count++;
452
453 return (0);
454 }
455
456 DTRACE_PROBE(fpp_full);
457 return (ENOSPC);
458 }
459
460 static inline sfxge_tx_packet_t *
461 sfxge_tx_qfpp_get(sfxge_txq_t *stp)
462 {
463 sfxge_tx_packet_t *stpp;
464 sfxge_tx_fpp_t *stfp = &(stp->st_fpp);
465
466 ASSERT(mutex_owned(&(stp->st_lock)));
467
468 stpp = stfp->stf_stpp;
469 if (stpp == NULL) {
470 ASSERT3U(stfp->stf_count, ==, 0);
471 return (NULL);
472 }
473
474 /* Remove item from the head of the list */
475 stfp->stf_stpp = stpp->stp_next;
476 stpp->stp_next = NULL;
477
478 ASSERT3U(stfp->stf_count, >, 0);
479 stfp->stf_count--;
480
481 if (stfp->stf_count != 0) {
482 ASSERT(stfp->stf_stpp != NULL);
483 prefetch_read_many(stfp->stf_stpp);
484 }
485 return (stpp);
486 }
487
488 static void
489 sfxge_tx_qfpp_empty(sfxge_txq_t *stp)
490 {
491 sfxge_t *sp = stp->st_sp;
492 sfxge_tx_fpp_t *stfp = &(stp->st_fpp);
493 sfxge_tx_packet_t *stpp;
494
495 mutex_enter(&(stp->st_lock));
496
497 stpp = stfp->stf_stpp;
498 stfp->stf_stpp = NULL;
499
500 while (stpp != NULL) {
501 sfxge_tx_packet_t *next;
502
503 next = stpp->stp_next;
504 stpp->stp_next = NULL;
505
506 ASSERT3U(stfp->stf_count, >, 0);
507 stfp->stf_count--;
508
509 sfxge_tx_packet_destroy(sp, stpp);
510
511 stpp = next;
512 }
513 ASSERT3U(stfp->stf_count, ==, 0);
514
515 mutex_exit(&(stp->st_lock));
516 }
517
518 static inline void
519 sfxge_tx_qfbp_put(sfxge_txq_t *stp, sfxge_tx_buffer_t *stbp)
520 {
521 sfxge_tx_fbp_t *stfp = &(stp->st_fbp);
522
523 ASSERT3P(stbp->stb_next, ==, NULL);
524 ASSERT3U(stbp->stb_off, ==, 0);
525 ASSERT3U(stbp->stb_esm.esm_size, ==, 0);
526
527 stbp->stb_next = stfp->stf_stbp;
528 stfp->stf_stbp = stbp;
529 stfp->stf_count++;
530 }
531
532
533 static inline sfxge_tx_buffer_t *
534 sfxge_tx_qfbp_get(sfxge_txq_t *stp)
535 {
536 sfxge_tx_buffer_t *stbp;
537 sfxge_tx_fbp_t *stfp = &(stp->st_fbp);
538
539 stbp = stfp->stf_stbp;
540 if (stbp == NULL) {
541 ASSERT3U(stfp->stf_count, ==, 0);
542 return (NULL);
543 }
544
545 stfp->stf_stbp = stbp->stb_next;
546 stbp->stb_next = NULL;
547
548 ASSERT3U(stfp->stf_count, >, 0);
549 stfp->stf_count--;
550
551 if (stfp->stf_count != 0) {
552 ASSERT(stfp->stf_stbp != NULL);
553 prefetch_read_many(stfp->stf_stbp);
554 }
555
556 return (stbp);
557 }
558
559 static void
560 sfxge_tx_qfbp_empty(sfxge_txq_t *stp)
561 {
562 sfxge_t *sp = stp->st_sp;
563 sfxge_tx_fbp_t *stfp = &(stp->st_fbp);
564 sfxge_tx_buffer_t *stbp;
565
566 mutex_enter(&(stp->st_lock));
567
568 stbp = stfp->stf_stbp;
569 stfp->stf_stbp = NULL;
570
571 while (stbp != NULL) {
572 sfxge_tx_buffer_t *next;
573
574 next = stbp->stb_next;
575 stbp->stb_next = NULL;
576
577 ASSERT3U(stfp->stf_count, >, 0);
578 stfp->stf_count--;
579
580 kmem_cache_free(sp->s_tbc, stbp);
581
582 stbp = next;
583 }
584 ASSERT3U(stfp->stf_count, ==, 0);
585
586 mutex_exit(&(stp->st_lock));
587 }
588
589 static inline void
590 sfxge_tx_qfmp_put(sfxge_txq_t *stp, sfxge_tx_mapping_t *stmp)
591 {
592 sfxge_tx_fmp_t *stfp = &(stp->st_fmp);
593
594 ASSERT3P(stmp->stm_next, ==, NULL);
595 ASSERT3P(stmp->stm_mp, ==, NULL);
596 ASSERT3P(stmp->stm_base, ==, NULL);
597 ASSERT3U(stmp->stm_off, ==, 0);
598 ASSERT3U(stmp->stm_size, ==, 0);
599
600 stmp->stm_next = stfp->stf_stmp;
601 stfp->stf_stmp = stmp;
602 stfp->stf_count++;
603 }
604
605 static inline sfxge_tx_mapping_t *
606 sfxge_tx_qfmp_get(sfxge_txq_t *stp)
607 {
608 sfxge_tx_mapping_t *stmp;
609 sfxge_tx_fmp_t *stfp = &(stp->st_fmp);
610
611 stmp = stfp->stf_stmp;
612 if (stmp == NULL) {
613 ASSERT3U(stfp->stf_count, ==, 0);
614 return (NULL);
615 }
616
617 stfp->stf_stmp = stmp->stm_next;
618 stmp->stm_next = NULL;
619
620 ASSERT3U(stfp->stf_count, >, 0);
621 stfp->stf_count--;
622
623 if (stfp->stf_count != 0) {
624 ASSERT(stfp->stf_stmp != NULL);
625 prefetch_read_many(stfp->stf_stmp);
626 }
627 return (stmp);
628 }
629
630 static void
631 sfxge_tx_qfmp_empty(sfxge_txq_t *stp)
632 {
633 sfxge_t *sp = stp->st_sp;
634 sfxge_tx_fmp_t *stfp = &(stp->st_fmp);
635 sfxge_tx_mapping_t *stmp;
636
637 mutex_enter(&(stp->st_lock));
638
639 stmp = stfp->stf_stmp;
640 stfp->stf_stmp = NULL;
641
642 while (stmp != NULL) {
643 sfxge_tx_mapping_t *next;
644
645 next = stmp->stm_next;
646 stmp->stm_next = NULL;
647
648 ASSERT3U(stfp->stf_count, >, 0);
649 stfp->stf_count--;
650
651 kmem_cache_free(sp->s_tmc, stmp);
652
653 stmp = next;
654 }
655 ASSERT3U(stfp->stf_count, ==, 0);
656
657 mutex_exit(&(stp->st_lock));
658 }
659
660 static void
661 sfxge_tx_msgb_unbind(sfxge_tx_mapping_t *stmp)
662 {
663 bzero(stmp->stm_addr, sizeof (uint64_t) * SFXGE_TX_MAPPING_NADDR);
664 stmp->stm_off = 0;
665
666 (void) ddi_dma_unbind_handle(stmp->stm_dma_handle);
667
668 stmp->stm_size = 0;
669 stmp->stm_base = NULL;
670
671 stmp->stm_mp = NULL;
672 }
673
674 #define SFXGE_TX_DESCSHIFT 12
675 #define SFXGE_TX_DESCSIZE (1 << 12)
676
677 #define SFXGE_TX_DESCOFFSET (SFXGE_TX_DESCSIZE - 1)
678 #define SFXGE_TX_DESCMASK (~SFXGE_TX_DESCOFFSET)
679
680 static int
681 sfxge_tx_msgb_bind(mblk_t *mp, sfxge_tx_mapping_t *stmp)
682 {
683 ddi_dma_cookie_t dmac;
684 unsigned int ncookies;
685 size_t size;
686 unsigned int n;
687 int rc;
688
689 ASSERT(mp != NULL);
690 ASSERT3U(DB_TYPE(mp), ==, M_DATA);
691
692 ASSERT(stmp->stm_mp == NULL);
693 stmp->stm_mp = mp;
694
695 stmp->stm_base = (caddr_t)(mp->b_rptr);
696 stmp->stm_size = MBLKL(mp);
697
698 /* Bind the STREAMS block to the mapping */
699 rc = ddi_dma_addr_bind_handle(stmp->stm_dma_handle, NULL,
700 stmp->stm_base, stmp->stm_size, DDI_DMA_WRITE | DDI_DMA_STREAMING,
701 DDI_DMA_DONTWAIT, NULL, &dmac, &ncookies);
702 if (rc != DDI_DMA_MAPPED)
703 goto fail1;
704
705 ASSERT3U(ncookies, <=, SFXGE_TX_MAPPING_NADDR);
706
707 /*
708 * Construct an array of addresses and an initial
709 * offset.
710 */
711 n = 0;
712 stmp->stm_addr[n++] = dmac.dmac_laddress & SFXGE_TX_DESCMASK;
713 DTRACE_PROBE1(addr, uint64_t, dmac.dmac_laddress & SFXGE_TX_DESCMASK);
714
715 stmp->stm_off = dmac.dmac_laddress & SFXGE_TX_DESCOFFSET;
716
717 size = MIN(SFXGE_TX_DESCSIZE - stmp->stm_off, dmac.dmac_size);
718 dmac.dmac_laddress += size;
719 dmac.dmac_size -= size;
720
721 for (;;) {
722 ASSERT3U(n, <, SFXGE_TX_MAPPING_NADDR);
723
724 if (dmac.dmac_size == 0) {
725 if (--ncookies == 0)
726 break;
727
728 ddi_dma_nextcookie(stmp->stm_dma_handle, &dmac);
729 }
730
731 ASSERT((dmac.dmac_laddress & SFXGE_TX_DESCMASK) != 0);
732 ASSERT((dmac.dmac_laddress & SFXGE_TX_DESCOFFSET) == 0);
733 stmp->stm_addr[n++] = dmac.dmac_laddress;
734 DTRACE_PROBE1(addr, uint64_t, dmac.dmac_laddress);
735
736 size = MIN(SFXGE_TX_DESCSIZE, dmac.dmac_size);
737 dmac.dmac_laddress += size;
738 dmac.dmac_size -= size;
739 }
740 ASSERT3U(n, <=, SFXGE_TX_MAPPING_NADDR);
741
742 return (0);
743
744 fail1:
745 DTRACE_PROBE1(fail1, int, rc);
746
747 stmp->stm_size = 0;
748 stmp->stm_base = NULL;
749
750 stmp->stm_mp = NULL;
751
752 return (-1);
753 }
754
755 static void
756 sfxge_tx_qreap(sfxge_txq_t *stp)
757 {
758 unsigned int reaped;
759
760 ASSERT(mutex_owned(&(stp->st_lock)));
761
762 reaped = stp->st_reaped;
763 while (reaped != stp->st_completed) {
764 unsigned int id;
765 sfxge_tx_mapping_t *stmp;
766 sfxge_tx_buffer_t *stbp;
767
768 id = reaped++ & (SFXGE_TX_NDESCS - 1);
769
770 ASSERT3P(stp->st_mp[id], ==, NULL);
771
772 if ((stmp = stp->st_stmp[id]) != NULL) {
773 stp->st_stmp[id] = NULL;
774
775 /* Free all the mappings */
776 do {
777 sfxge_tx_mapping_t *next;
778
779 next = stmp->stm_next;
780 stmp->stm_next = NULL;
781
782 sfxge_tx_qfmp_put(stp, stmp);
783
784 stmp = next;
785 } while (stmp != NULL);
786 }
787
788 if ((stbp = stp->st_stbp[id]) != NULL) {
789 stp->st_stbp[id] = NULL;
790
791 /* Free all the buffers */
792 do {
793 sfxge_tx_buffer_t *next;
794
795 next = stbp->stb_next;
796 stbp->stb_next = NULL;
797
798 stbp->stb_esm.esm_size = 0;
799 stbp->stb_off = 0;
800
801 sfxge_tx_qfbp_put(stp, stbp);
802
803 stbp = next;
804 } while (stbp != NULL);
805 }
806 }
807 stp->st_reaped = reaped;
808 }
809
810 static void
811 sfxge_tx_qlist_abort(sfxge_txq_t *stp)
812 {
813 unsigned int id;
814 sfxge_tx_mapping_t *stmp;
815 sfxge_tx_buffer_t *stbp;
816 mblk_t *mp;
817
818 ASSERT(mutex_owned(&(stp->st_lock)));
819
820 id = stp->st_added & (SFXGE_TX_NDESCS - 1);
821
822 /* Clear the completion information */
823 stmp = stp->st_stmp[id];
824 stp->st_stmp[id] = NULL;
825
826 /* Free any mappings that were used */
827 while (stmp != NULL) {
828 sfxge_tx_mapping_t *next;
829
830 next = stmp->stm_next;
831 stmp->stm_next = NULL;
832
833 if (stmp->stm_mp != NULL)
834 sfxge_tx_msgb_unbind(stmp);
835
836 sfxge_tx_qfmp_put(stp, stmp);
837
838 stmp = next;
839 }
840
841 stbp = stp->st_stbp[id];
842 stp->st_stbp[id] = NULL;
843
844 /* Free any buffers that were used */
845 while (stbp != NULL) {
846 sfxge_tx_buffer_t *next;
847
848 next = stbp->stb_next;
849 stbp->stb_next = NULL;
850
851 stbp->stb_off = 0;
852 stbp->stb_esm.esm_size = 0;
853
854 sfxge_tx_qfbp_put(stp, stbp);
855
856 stbp = next;
857 }
858
859 mp = stp->st_mp[id];
860 stp->st_mp[id] = NULL;
861
862 if (mp != NULL)
863 freemsg(mp);
864
865 /* Clear the fragment list */
866 stp->st_n = 0;
867 }
868
869 /* Push descriptors to the TX ring setting blocked if no space */
870 static void
871 sfxge_tx_qlist_post(sfxge_txq_t *stp)
872 {
873 unsigned int id;
874 unsigned int level;
875 unsigned int available;
876 int rc;
877
878 ASSERT(mutex_owned(&(stp->st_lock)));
879
880 ASSERT(stp->st_n != 0);
881
882 again:
883 level = stp->st_added - stp->st_reaped;
884 available = EFX_TXQ_LIMIT(SFXGE_TX_NDESCS) - level;
885
886 id = stp->st_added & (SFXGE_TX_NDESCS - 1);
887
888 if (available < stp->st_n) {
889 rc = ENOSPC;
890 goto fail1;
891 }
892
893 ASSERT3U(available, >=, stp->st_n);
894
895 /* Post the fragment list */
896 if ((rc = efx_tx_qpost(stp->st_etp, stp->st_eb, stp->st_n,
897 stp->st_reaped, &(stp->st_added))) != 0)
898 goto fail2;
899
900 /*
901 * If the list took more than a single descriptor then we need to
902 * to move the completion information so it is referenced by the last
903 * descriptor.
904 */
905 if (((stp->st_added - 1) & (SFXGE_TX_NDESCS - 1)) != id) {
906 sfxge_tx_mapping_t *stmp;
907 sfxge_tx_buffer_t *stbp;
908 mblk_t *mp;
909
910 stmp = stp->st_stmp[id];
911 stp->st_stmp[id] = NULL;
912
913 stbp = stp->st_stbp[id];
914 stp->st_stbp[id] = NULL;
915
916 mp = stp->st_mp[id];
917 stp->st_mp[id] = NULL;
918
919 id = (stp->st_added - 1) & (SFXGE_TX_NDESCS - 1);
920
921 ASSERT(stp->st_stmp[id] == NULL);
922 stp->st_stmp[id] = stmp;
923
924 ASSERT(stp->st_stbp[id] == NULL);
925 stp->st_stbp[id] = stbp;
926
927 ASSERT(stp->st_mp[id] == NULL);
928 stp->st_mp[id] = mp;
929 }
930
931 /* Make the descriptors visible to the hardware */
932 (void) ddi_dma_sync(stp->st_mem.esm_dma_handle,
933 0,
934 EFX_TXQ_SIZE(SFXGE_TX_NDESCS),
935 DDI_DMA_SYNC_FORDEV);
936
937 /* Clear the list */
938 stp->st_n = 0;
939
940 ASSERT3U(stp->st_unblock, ==, SFXGE_TXQ_NOT_BLOCKED);
941 return;
942
943 fail2:
944 DTRACE_PROBE(fail2);
945 fail1:
946 DTRACE_PROBE1(fail1, int, rc);
947
948 ASSERT(rc == ENOSPC);
949
950 level = stp->st_added - stp->st_completed;
951 available = EFX_TXQ_LIMIT(SFXGE_TX_NDESCS) - level;
952
953 /*
954 * If there would be enough space after we've reaped any completed
955 * mappings and buffers, and we gain sufficient queue space by doing
956 * so, then reap now and try posting again.
957 */
958 if (stp->st_n <= available &&
959 stp->st_completed - stp->st_reaped >= SFXGE_TX_BATCH) {
960 sfxge_tx_qreap(stp);
961
962 goto again;
963 }
964
965 /* Set the unblock level */
966 if (stp->st_unblock == SFXGE_TXQ_NOT_BLOCKED) {
967 stp->st_unblock = SFXGE_TXQ_UNBLOCK_LEVEL1;
968 } else {
969 ASSERT(stp->st_unblock == SFXGE_TXQ_UNBLOCK_LEVEL1);
970
971 stp->st_unblock = SFXGE_TXQ_UNBLOCK_LEVEL2;
972 }
973
974 /*
975 * Avoid a race with completion interrupt handling that could leave the
976 * queue blocked.
977 *
978 * NOTE: The use of st_pending rather than st_completed is intentional
979 * as st_pending is updated per-event rather than per-batch and
980 * therefore avoids needless deferring.
981 */
982 if (stp->st_pending == stp->st_added) {
983 sfxge_tx_qreap(stp);
984
985 stp->st_unblock = SFXGE_TXQ_NOT_BLOCKED;
986 goto again;
987 }
988
989 ASSERT(stp->st_unblock != SFXGE_TXQ_NOT_BLOCKED);
990 }
991
992 static int
993 sfxge_tx_kstat_update(kstat_t *ksp, int rw)
994 {
995 sfxge_txq_t *stp = ksp->ks_private;
996 sfxge_tx_dpl_t *stdp = &(stp->st_dpl);
997 kstat_named_t *knp;
998 int rc;
999
1000 ASSERT(mutex_owned(&(stp->st_lock)));
1001
1002 if (rw != KSTAT_READ) {
1003 rc = EACCES;
1004 goto fail1;
1005 }
1006
1007 if (stp->st_state != SFXGE_TXQ_STARTED)
1008 goto done;
1009
1010 efx_tx_qstats_update(stp->st_etp, stp->st_stat);
1011 knp = (kstat_named_t *)ksp->ks_data + TX_NQSTATS;
1012 knp->value.ui64 = stdp->get_pkt_limit;
1013 knp++;
1014 knp->value.ui64 = stdp->put_pkt_limit;
1015 knp++;
1016 knp->value.ui64 = stdp->get_full_count;
1017 knp++;
1018 knp->value.ui64 = stdp->put_full_count;
1019
1020 done:
1021 return (0);
1022
1023 fail1:
1024 DTRACE_PROBE1(fail1, int, rc);
1025
1026 return (rc);
1027 }
1028
1029 static int
1030 sfxge_tx_kstat_init(sfxge_txq_t *stp)
1031 {
1032 sfxge_t *sp = stp->st_sp;
1033 unsigned int index = stp->st_index;
1034 dev_info_t *dip = sp->s_dip;
1035 kstat_t *ksp;
1036 kstat_named_t *knp;
1037 char name[MAXNAMELEN];
1038 unsigned int id;
1039 int rc;
1040
1041 /* Create the set */
1042 (void) snprintf(name, MAXNAMELEN - 1, "%s_txq%04d",
1043 ddi_driver_name(dip), index);
1044
1045 if ((ksp = kstat_create((char *)ddi_driver_name(dip),
1046 ddi_get_instance(dip), name, "queue", KSTAT_TYPE_NAMED,
1047 TX_NQSTATS + 4, 0)) == NULL) {
1048 rc = ENOMEM;
1049 goto fail1;
1050 }
1051
1052 stp->st_ksp = ksp;
1053
1054 ksp->ks_update = sfxge_tx_kstat_update;
1055 ksp->ks_private = stp;
1056 ksp->ks_lock = &(stp->st_lock);
1057
1058 /* Initialise the named stats */
1059 stp->st_stat = knp = ksp->ks_data;
1060 for (id = 0; id < TX_NQSTATS; id++) {
1061 kstat_named_init(knp, (char *)efx_tx_qstat_name(sp->s_enp, id),
1062 KSTAT_DATA_UINT64);
1063 knp++;
1064 }
1065 kstat_named_init(knp, "dpl_get_pkt_limit", KSTAT_DATA_UINT64);
1066 knp++;
1067 kstat_named_init(knp, "dpl_put_pkt_limit", KSTAT_DATA_UINT64);
1068 knp++;
1069 kstat_named_init(knp, "dpl_get_full_count", KSTAT_DATA_UINT64);
1070 knp++;
1071 kstat_named_init(knp, "dpl_put_full_count", KSTAT_DATA_UINT64);
1072
1073 kstat_install(ksp);
1074 return (0);
1075
1076 fail1:
1077 DTRACE_PROBE1(fail1, int, rc);
1078
1079 return (rc);
1080 }
1081
1082 static void
1083 sfxge_tx_kstat_fini(sfxge_txq_t *stp)
1084 {
1085 /* Destroy the set */
1086 kstat_delete(stp->st_ksp);
1087 stp->st_ksp = NULL;
1088 stp->st_stat = NULL;
1089 }
1090
1091 static int
1092 sfxge_tx_qinit(sfxge_t *sp, unsigned int index, sfxge_txq_type_t type,
1093 unsigned int evq)
1094 {
1095 sfxge_txq_t *stp;
1096 sfxge_tx_dpl_t *stdp;
1097 int rc;
1098
1099 ASSERT3U(index, <, SFXGE_TXQ_NTYPES + SFXGE_RX_SCALE_MAX);
1100 ASSERT3U(type, <, SFXGE_TXQ_NTYPES);
1101 ASSERT3U(evq, <, SFXGE_RX_SCALE_MAX);
1102
1103 stp = kmem_cache_alloc(sp->s_tqc, KM_SLEEP);
1104 stdp = &(stp->st_dpl);
1105
1106 ASSERT3U(stp->st_state, ==, SFXGE_TXQ_UNINITIALIZED);
1107
1108 stp->st_index = index;
1109 stp->st_type = type;
1110 stp->st_evq = evq;
1111
1112 mutex_init(&(stp->st_lock), NULL, MUTEX_DRIVER,
1113 DDI_INTR_PRI(sp->s_intr.si_intr_pri));
1114
1115 /* Initialize the statistics */
1116 if ((rc = sfxge_tx_kstat_init(stp)) != 0)
1117 goto fail1;
1118
1119 stdp->get_pkt_limit = ddi_prop_get_int(DDI_DEV_T_ANY, sp->s_dip,
1120 DDI_PROP_DONTPASS, "tx_dpl_get_pkt_limit",
1121 SFXGE_TX_DPL_GET_PKT_LIMIT_DEFAULT);
1122
1123 stdp->put_pkt_limit = ddi_prop_get_int(DDI_DEV_T_ANY, sp->s_dip,
1124 DDI_PROP_DONTPASS, "tx_dpl_put_pkt_limit",
1125 SFXGE_TX_DPL_PUT_PKT_LIMIT_DEFAULT);
1126
1127 stp->st_state = SFXGE_TXQ_INITIALIZED;
1128
1129 /* Attach the TXQ to the driver */
1130 ASSERT3P(sp->s_stp[index], ==, NULL);
1131 sp->s_stp[index] = stp;
1132 sp->s_tx_qcount++;
1133
1134 return (0);
1135
1136 fail1:
1137 DTRACE_PROBE1(fail1, int, rc);
1138
1139 stp->st_evq = 0;
1140 stp->st_type = 0;
1141 stp->st_index = 0;
1142
1143 mutex_destroy(&(stp->st_lock));
1144
1145 kmem_cache_free(sp->s_tqc, stp);
1146
1147 return (rc);
1148 }
1149
1150 static int
1151 sfxge_tx_qstart(sfxge_t *sp, unsigned int index)
1152 {
1153 sfxge_txq_t *stp = sp->s_stp[index];
1154 efx_nic_t *enp = sp->s_enp;
1155 efsys_mem_t *esmp;
1156 sfxge_evq_t *sep;
1157 unsigned int evq;
1158 unsigned int flags;
1159 int rc;
1160
1161 mutex_enter(&(stp->st_lock));
1162
1163 esmp = &(stp->st_mem);
1164 evq = stp->st_evq;
1165 sep = sp->s_sep[evq];
1166
1167 ASSERT3U(stp->st_state, ==, SFXGE_TXQ_INITIALIZED);
1168 ASSERT3U(sep->se_state, ==, SFXGE_EVQ_STARTED);
1169
1170 /* Zero the memory */
1171 (void) memset(esmp->esm_base, 0, EFX_TXQ_SIZE(SFXGE_TX_NDESCS));
1172
1173 /* Program the buffer table */
1174 if ((rc = sfxge_sram_buf_tbl_set(sp, stp->st_id, esmp,
1175 EFX_TXQ_NBUFS(SFXGE_TX_NDESCS))) != 0)
1176 goto fail1;
1177
1178 switch (stp->st_type) {
1179 case SFXGE_TXQ_NON_CKSUM:
1180 flags = 0;
1181 break;
1182
1183 case SFXGE_TXQ_IP_CKSUM:
1184 flags = EFX_CKSUM_IPV4;
1185 break;
1186
1187 case SFXGE_TXQ_IP_TCP_UDP_CKSUM:
1188 flags = EFX_CKSUM_IPV4 | EFX_CKSUM_TCPUDP;
1189 break;
1190
1191 default:
1192 ASSERT(B_FALSE);
1193
1194 flags = 0;
1195 break;
1196 }
1197
1198 /* Create the transmit queue */
1199 if ((rc = efx_tx_qcreate(enp, index, index, esmp, SFXGE_TX_NDESCS,
1200 stp->st_id, flags, sep->se_eep, &(stp->st_etp))) != 0)
1201 goto fail2;
1202
1203 /* Enable the transmit queue */
1204 efx_tx_qenable(stp->st_etp);
1205
1206 stp->st_state = SFXGE_TXQ_STARTED;
1207
1208 mutex_exit(&(stp->st_lock));
1209
1210 return (0);
1211
1212 fail2:
1213 DTRACE_PROBE(fail2);
1214
1215 /* Clear entries from the buffer table */
1216 sfxge_sram_buf_tbl_clear(sp, stp->st_id,
1217 EFX_TXQ_NBUFS(SFXGE_TX_NDESCS));
1218
1219 fail1:
1220 DTRACE_PROBE1(fail1, int, rc);
1221
1222 mutex_exit(&(stp->st_lock));
1223
1224 return (rc);
1225 }
1226
1227 static inline int
1228 sfxge_tx_qmapping_add(sfxge_txq_t *stp, sfxge_tx_mapping_t *stmp,
1229 size_t *offp, size_t *limitp)
1230 {
1231 mblk_t *mp;
1232 size_t mapping_off;
1233 size_t mapping_size;
1234 int rc;
1235
1236 ASSERT3U(*offp, <, stmp->stm_size);
1237 ASSERT(*limitp != 0);
1238
1239 mp = stmp->stm_mp;
1240
1241 ASSERT3P(stmp->stm_base, ==, mp->b_rptr);
1242 ASSERT3U(stmp->stm_size, ==, MBLKL(mp));
1243
1244 mapping_off = stmp->stm_off + *offp;
1245 mapping_size = stmp->stm_size - *offp;
1246
1247 while (mapping_size != 0 && *limitp != 0) {
1248 size_t page =
1249 mapping_off >> SFXGE_TX_DESCSHIFT;
1250 size_t page_off =
1251 mapping_off & SFXGE_TX_DESCOFFSET;
1252 size_t page_size =
1253 SFXGE_TX_DESCSIZE - page_off;
1254 efx_buffer_t *ebp;
1255
1256 ASSERT3U(page, <, SFXGE_TX_MAPPING_NADDR);
1257 ASSERT((stmp->stm_addr[page] &
1258 SFXGE_TX_DESCMASK) != 0);
1259
1260 page_size = MIN(page_size, mapping_size);
1261 page_size = MIN(page_size, *limitp);
1262
1263 ASSERT3U(stp->st_n, <=,
1264 EFX_TXQ_LIMIT(SFXGE_TX_NDESCS));
1265 if (stp->st_n ==
1266 EFX_TXQ_LIMIT(SFXGE_TX_NDESCS)) {
1267 rc = ENOSPC;
1268 goto fail1;
1269 }
1270
1271 ebp = &(stp->st_eb[stp->st_n++]);
1272 ebp->eb_addr = stmp->stm_addr[page] +
1273 page_off;
1274 ebp->eb_size = page_size;
1275
1276 *offp += page_size;
1277 *limitp -= page_size;
1278
1279 mapping_off += page_size;
1280 mapping_size -= page_size;
1281
1282 ebp->eb_eop = (*limitp == 0 ||
1283 (mapping_size == 0 && mp->b_cont == NULL));
1284
1285 DTRACE_PROBE5(tx_mapping_add,
1286 unsigned int, stp->st_index,
1287 unsigned int, stp->st_n - 1,
1288 uint64_t, ebp->eb_addr,
1289 size_t, ebp->eb_size,
1290 boolean_t, ebp->eb_eop);
1291 }
1292
1293 ASSERT3U(*offp, <=, stmp->stm_size);
1294
1295 return (0);
1296
1297 fail1:
1298 DTRACE_PROBE1(fail1, int, rc);
1299
1300 return (rc);
1301 }
1302
1303 static inline int
1304 sfxge_tx_qbuffer_add(sfxge_txq_t *stp, sfxge_tx_buffer_t *stbp, boolean_t eop)
1305 {
1306 efx_buffer_t *ebp;
1307 int rc;
1308
1309 ASSERT3U(stp->st_n, <=,
1310 EFX_TXQ_LIMIT(SFXGE_TX_NDESCS));
1311 if (stp->st_n == EFX_TXQ_LIMIT(SFXGE_TX_NDESCS)) {
1312 rc = ENOSPC;
1313 goto fail1;
1314 }
1315
1316 ebp = &(stp->st_eb[stp->st_n++]);
1317 ebp->eb_addr = stbp->stb_esm.esm_addr + stbp->stb_off;
1318 ebp->eb_size = stbp->stb_esm.esm_size - stbp->stb_off;
1319 ebp->eb_eop = eop;
1320
1321 (void) ddi_dma_sync(stbp->stb_esm.esm_dma_handle,
1322 stbp->stb_off, ebp->eb_size,
1323 DDI_DMA_SYNC_FORDEV);
1324
1325 stbp->stb_off = stbp->stb_esm.esm_size;
1326
1327 DTRACE_PROBE5(tx_buffer_add,
1328 unsigned int, stp->st_index,
1329 unsigned int, stp->st_n - 1,
1330 uint64_t, ebp->eb_addr, size_t, ebp->eb_size,
1331 boolean_t, ebp->eb_eop);
1332
1333 return (0);
1334
1335 fail1:
1336 DTRACE_PROBE1(fail1, int, rc);
1337
1338 return (rc);
1339 }
1340
1341 static inline boolean_t
1342 sfxge_tx_msgb_copy(mblk_t *mp, sfxge_tx_buffer_t *stbp, size_t *offp,
1343 size_t *limitp)
1344 {
1345 size_t data_off;
1346 size_t data_size;
1347 size_t copy_off;
1348 size_t copy_size;
1349 boolean_t eop;
1350
1351 ASSERT3U(*offp, <=, MBLKL(mp));
1352 ASSERT(*limitp != 0);
1353
1354 data_off = *offp;
1355 data_size = MBLKL(mp) - *offp;
1356
1357 copy_off = stbp->stb_esm.esm_size;
1358 copy_size = SFXGE_TX_BUFFER_SIZE - copy_off;
1359
1360 copy_size = MIN(copy_size, data_size);
1361 copy_size = MIN(copy_size, *limitp);
1362
1363 bcopy(mp->b_rptr + data_off,
1364 stbp->stb_esm.esm_base + copy_off, copy_size);
1365
1366 stbp->stb_esm.esm_size += copy_size;
1367 ASSERT3U(stbp->stb_esm.esm_size, <=,
1368 SFXGE_TX_BUFFER_SIZE);
1369
1370 *offp += copy_size;
1371 *limitp -= copy_size;
1372
1373 data_off += copy_size;
1374 data_size -= copy_size;
1375
1376 eop = (*limitp == 0 ||
1377 (data_size == 0 && mp->b_cont == NULL));
1378
1379 ASSERT3U(*offp, <=, MBLKL(mp));
1380
1381 return (eop);
1382 }
1383
1384 static int
1385 sfxge_tx_qpayload_fragment(sfxge_txq_t *stp, unsigned int id, mblk_t **mpp,
1386 size_t *offp, size_t size, boolean_t copy)
1387 {
1388 sfxge_t *sp = stp->st_sp;
1389 mblk_t *mp = *mpp;
1390 size_t off = *offp;
1391 sfxge_tx_buffer_t *stbp;
1392 sfxge_tx_mapping_t *stmp;
1393 int rc;
1394
1395 stbp = stp->st_stbp[id];
1396 ASSERT(stbp == NULL || (stbp->stb_esm.esm_size == stbp->stb_off));
1397
1398 stmp = stp->st_stmp[id];
1399
1400 while (size != 0) {
1401 boolean_t eop;
1402
1403 ASSERT(mp != NULL);
1404
1405 if (mp->b_cont != NULL)
1406 prefetch_read_many(mp->b_cont);
1407
1408 ASSERT3U(off, <, MBLKL(mp));
1409
1410 if (copy)
1411 goto copy;
1412
1413 /*
1414 * Check whether we have already mapped this data block for
1415 * DMA.
1416 */
1417 if (stmp == NULL || stmp->stm_mp != mp) {
1418 /*
1419 * If we are part way through copying a data block then
1420 * there's no point in trying to map it for DMA.
1421 */
1422 if (off != 0)
1423 goto copy;
1424
1425 /*
1426 * If the data block is too short then the cost of
1427 * mapping it for DMA would outweigh the cost of
1428 * copying it.
1429 */
1430 if (MBLKL(mp) < SFXGE_TX_COPY_THRESHOLD)
1431 goto copy;
1432
1433 /* Try to grab a transmit mapping from the pool */
1434 stmp = sfxge_tx_qfmp_get(stp);
1435 if (stmp == NULL) {
1436 /*
1437 * The pool was empty so allocate a new
1438 * mapping.
1439 */
1440 if ((stmp = kmem_cache_alloc(sp->s_tmc,
1441 KM_NOSLEEP)) == NULL)
1442 goto copy;
1443 }
1444
1445 /* Add the DMA mapping to the list */
1446 stmp->stm_next = stp->st_stmp[id];
1447 stp->st_stmp[id] = stmp;
1448
1449 /* Try to bind the data block to the mapping */
1450 if (sfxge_tx_msgb_bind(mp, stmp) != 0)
1451 goto copy;
1452 }
1453 ASSERT3P(stmp->stm_mp, ==, mp);
1454
1455 /*
1456 * If we have a partially filled buffer then we must add it to
1457 * the fragment list before adding the mapping.
1458 */
1459 if (stbp != NULL && (stbp->stb_esm.esm_size > stbp->stb_off)) {
1460 rc = sfxge_tx_qbuffer_add(stp, stbp, B_FALSE);
1461 if (rc != 0)
1462 goto fail1;
1463 }
1464
1465 /* Add the mapping to the fragment list */
1466 rc = sfxge_tx_qmapping_add(stp, stmp, &off, &size);
1467 if (rc != 0)
1468 goto fail2;
1469
1470 ASSERT(off == MBLKL(mp) || size == 0);
1471
1472 /*
1473 * If the data block has been exhausted then Skip over the
1474 * control block and advance to the next data block.
1475 */
1476 if (off == MBLKL(mp)) {
1477 mp = mp->b_cont;
1478 off = 0;
1479 }
1480
1481 continue;
1482
1483 copy:
1484 if (stbp == NULL ||
1485 stbp->stb_esm.esm_size == SFXGE_TX_BUFFER_SIZE) {
1486 /* Try to grab a buffer from the pool */
1487 stbp = sfxge_tx_qfbp_get(stp);
1488 if (stbp == NULL) {
1489 /*
1490 * The pool was empty so allocate a new
1491 * buffer.
1492 */
1493 if ((stbp = kmem_cache_alloc(sp->s_tbc,
1494 KM_NOSLEEP)) == NULL) {
1495 rc = ENOMEM;
1496 goto fail3;
1497 }
1498 }
1499
1500 /* Add it to the list */
1501 stbp->stb_next = stp->st_stbp[id];
1502 stp->st_stbp[id] = stbp;
1503 }
1504
1505 /* Copy as much of the data block as we can into the buffer */
1506 eop = sfxge_tx_msgb_copy(mp, stbp, &off, &size);
1507
1508 ASSERT(off == MBLKL(mp) || size == 0 ||
1509 stbp->stb_esm.esm_size == SFXGE_TX_BUFFER_SIZE);
1510
1511 /*
1512 * If we have reached the end of the packet, or the buffer is
1513 * full, then add the buffer to the fragment list.
1514 */
1515 if (stbp->stb_esm.esm_size == SFXGE_TX_BUFFER_SIZE || eop) {
1516 rc = sfxge_tx_qbuffer_add(stp, stbp, eop);
1517 if (rc != 0)
1518 goto fail4;
1519 }
1520
1521 /*
1522 * If the data block has been exhaused then advance to the next
1523 * one.
1524 */
1525 if (off == MBLKL(mp)) {
1526 mp = mp->b_cont;
1527 off = 0;
1528 }
1529 }
1530
1531 *mpp = mp;
1532 *offp = off;
1533
1534 return (0);
1535
1536 fail4:
1537 DTRACE_PROBE(fail4);
1538 fail3:
1539 DTRACE_PROBE(fail3);
1540 fail2:
1541 DTRACE_PROBE(fail2);
1542 fail1:
1543 DTRACE_PROBE1(fail1, int, rc);
1544
1545 return (rc);
1546 }
1547
1548 static int
1549 sfxge_tx_qlso_fragment(sfxge_txq_t *stp, sfxge_tx_packet_t *stpp,
1550 boolean_t copy)
1551 {
1552 sfxge_t *sp = stp->st_sp;
1553 mblk_t *mp = stpp->stp_mp;
1554 struct ether_header *etherhp = stpp->stp_etherhp;
1555 struct ip *iphp = stpp->stp_iphp;
1556 struct tcphdr *thp = stpp->stp_thp;
1557 size_t size = stpp->stp_size;
1558 size_t off = stpp->stp_off;
1559 size_t mss = stpp->stp_mss;
1560 unsigned int id;
1561 caddr_t hp;
1562 size_t ehs, hs;
1563 uint16_t start_len;
1564 uint16_t start_id;
1565 uint16_t ip_id;
1566 uint8_t start_flags;
1567 uint32_t start_seq;
1568 uint32_t th_seq;
1569 size_t lss;
1570 sfxge_tx_buffer_t *stbp;
1571 int rc;
1572
1573 ASSERT(mutex_owned(&(stp->st_lock)));
1574
1575 if ((DB_LSOFLAGS(mp) & HW_LSO) == 0) {
1576 rc = EINVAL;
1577 goto fail1;
1578 }
1579
1580 id = stp->st_added & (SFXGE_TX_NDESCS - 1);
1581
1582 ASSERT(stp->st_n == 0);
1583 ASSERT(stp->st_stbp[id] == NULL);
1584 ASSERT(stp->st_stmp[id] == NULL);
1585
1586 ehs = (etherhp->ether_type == htons(ETHERTYPE_VLAN)) ?
1587 sizeof (struct ether_vlan_header) :
1588 sizeof (struct ether_header);
1589 if (msgdsize(mp) != ehs + ntohs(iphp->ip_len)) {
1590 rc = EINVAL;
1591 goto fail2;
1592 }
1593
1594 /* The payload offset is equivalent to the size of the headers */
1595 hp = (caddr_t)(mp->b_rptr);
1596 hs = off;
1597
1598 /*
1599 * If the initial data block only contains the headers then advance
1600 * to the next one.
1601 */
1602 if (hs > MBLKL(mp)) {
1603 rc = EINVAL;
1604 goto fail3;
1605 }
1606 mp->b_rptr += hs;
1607
1608 if (MBLKL(mp) == 0)
1609 mp = mp->b_cont;
1610
1611 off = 0;
1612
1613 /* Check IP and TCP headers are suitable for LSO */
1614 if (((iphp->ip_off & ~htons(IP_DF)) != 0) ||
1615 ((thp->th_flags & (TH_URG | TH_SYN)) != 0) ||
1616 (thp->th_urp != 0)) {
1617 rc = EINVAL;
1618 goto fail4;
1619 }
1620
1621 if (size + (thp->th_off << 2) + (iphp->ip_hl << 2) !=
1622 ntohs(iphp->ip_len)) {
1623 rc = EINVAL;
1624 goto fail4;
1625 }
1626
1627 /*
1628 * Get the base IP id, The stack leaves enough of a gap in id space
1629 * for us to increment this for each segment we send out.
1630 */
1631 start_len = ntohs(iphp->ip_len);
1632 start_id = ip_id = ntohs(iphp->ip_id);
1633
1634 /* Get the base TCP sequence number and flags */
1635 start_flags = thp->th_flags;
1636 start_seq = th_seq = ntohl(thp->th_seq);
1637
1638 /* Adjust the header for interim segments */
1639 iphp->ip_len = htons((iphp->ip_hl << 2) + (thp->th_off << 2) + mss);
1640 thp->th_flags = start_flags & ~(TH_PUSH | TH_FIN);
1641
1642 lss = size;
1643 if ((lss / mss) >= (EFX_TXQ_LIMIT(SFXGE_TX_NDESCS) / 2)) {
1644 rc = EINVAL;
1645 goto fail5;
1646 }
1647
1648 stbp = NULL;
1649 while (lss != 0) {
1650 size_t ss = MIN(lss, mss);
1651 boolean_t eol = (ss == lss);
1652
1653 /* Adjust the header for this segment */
1654 iphp->ip_id = htons(ip_id);
1655 ip_id++;
1656
1657 thp->th_seq = htonl(th_seq);
1658 th_seq += ss;
1659
1660 /* If this is the final segment then do some extra adjustment */
1661 if (eol) {
1662 iphp->ip_len = htons((iphp->ip_hl << 2) +
1663 (thp->th_off << 2) + ss);
1664 thp->th_flags = start_flags;
1665 }
1666
1667 if (stbp == NULL ||
1668 stbp->stb_esm.esm_size + hs > SFXGE_TX_BUFFER_SIZE) {
1669 /* Try to grab a buffer from the pool */
1670 stbp = sfxge_tx_qfbp_get(stp);
1671 if (stbp == NULL) {
1672 /*
1673 * The pool was empty so allocate a new
1674 * buffer.
1675 */
1676 if ((stbp = kmem_cache_alloc(sp->s_tbc,
1677 KM_NOSLEEP)) == NULL) {
1678 rc = ENOMEM;
1679 goto fail6;
1680 }
1681 }
1682
1683 /* Add it to the list */
1684 stbp->stb_next = stp->st_stbp[id];
1685 stp->st_stbp[id] = stbp;
1686 }
1687
1688 /* Copy in the headers */
1689 ASSERT3U(stbp->stb_off, ==, stbp->stb_esm.esm_size);
1690 bcopy(hp, stbp->stb_esm.esm_base + stbp->stb_off, hs);
1691 stbp->stb_esm.esm_size += hs;
1692
1693 /* Add the buffer to the fragment list */
1694 rc = sfxge_tx_qbuffer_add(stp, stbp, B_FALSE);
1695 if (rc != 0)
1696 goto fail7;
1697
1698 /* Add the payload to the fragment list */
1699 if ((rc = sfxge_tx_qpayload_fragment(stp, id, &mp, &off,
1700 ss, copy)) != 0)
1701 goto fail8;
1702
1703 lss -= ss;
1704 }
1705 ASSERT3U(off, ==, 0);
1706 ASSERT3P(mp, ==, NULL);
1707
1708 ASSERT3U(th_seq - start_seq, ==, size);
1709
1710 /*
1711 * If no part of the packet has been mapped for DMA then we can free
1712 * it now, otherwise it can only be freed on completion.
1713 */
1714 if (stp->st_stmp[id] == NULL)
1715 freemsg(stpp->stp_mp);
1716 else
1717 stp->st_mp[id] = stpp->stp_mp;
1718
1719 stpp->stp_mp = NULL;
1720
1721 return (0);
1722
1723 fail8:
1724 DTRACE_PROBE(fail8);
1725 fail7:
1726 DTRACE_PROBE(fail7);
1727 fail6:
1728 DTRACE_PROBE(fail6);
1729 fail5:
1730 DTRACE_PROBE(fail5);
1731
1732 /* Restore the header */
1733 thp->th_seq = htonl(start_seq);
1734 thp->th_flags = start_flags;
1735
1736 iphp->ip_len = htons(start_len);
1737 iphp->ip_id = htons(start_id);
1738
1739 fail4:
1740 DTRACE_PROBE(fail4);
1741
1742 mp = stpp->stp_mp;
1743 mp->b_rptr -= hs;
1744
1745 ASSERT3U(((etherhp->ether_type == htons(ETHERTYPE_VLAN)) ?
1746 sizeof (struct ether_vlan_header) :
1747 sizeof (struct ether_header)) +
1748 ntohs(iphp->ip_len), ==, msgdsize(mp));
1749
1750 ASSERT(stp->st_mp[id] == NULL);
1751
1752 fail3:
1753 DTRACE_PROBE(fail3);
1754 fail2:
1755 DTRACE_PROBE(fail2);
1756 fail1:
1757 DTRACE_PROBE1(fail1, int, rc);
1758
1759 return (rc);
1760 }
1761
1762 static int
1763 sfxge_tx_qpacket_fragment(sfxge_txq_t *stp, sfxge_tx_packet_t *stpp,
1764 boolean_t copy)
1765 {
1766 sfxge_t *sp = stp->st_sp;
1767 mblk_t *mp = stpp->stp_mp;
1768 unsigned int id;
1769 size_t off;
1770 size_t size;
1771 sfxge_tx_mapping_t *stmp;
1772 sfxge_tx_buffer_t *stbp;
1773 int rc;
1774
1775 ASSERT(mutex_owned(&(stp->st_lock)));
1776
1777 ASSERT(stp->st_n == 0);
1778
1779 id = stp->st_added & (SFXGE_TX_NDESCS - 1);
1780
1781 ASSERT(stp->st_stbp[id] == NULL);
1782 ASSERT(stp->st_stmp[id] == NULL);
1783
1784 off = 0;
1785 size = LONG_MAX; /* must be larger than the packet */
1786
1787 stbp = NULL;
1788 stmp = NULL;
1789
1790 while (mp != NULL) {
1791 boolean_t eop;
1792
1793 ASSERT(mp != NULL);
1794
1795 if (mp->b_cont != NULL)
1796 prefetch_read_many(mp->b_cont);
1797
1798 ASSERT(stmp == NULL || stmp->stm_mp != mp);
1799
1800 if (copy)
1801 goto copy;
1802
1803 /*
1804 * If we are part way through copying a data block then there's
1805 * no point in trying to map it for DMA.
1806 */
1807 if (off != 0)
1808 goto copy;
1809
1810 /*
1811 * If the data block is too short then the cost of mapping it
1812 * for DMA would outweigh the cost of copying it.
1813 *
1814 * TX copy break
1815 */
1816 if (MBLKL(mp) < SFXGE_TX_COPY_THRESHOLD)
1817 goto copy;
1818
1819 /* Try to grab a transmit mapping from the pool */
1820 stmp = sfxge_tx_qfmp_get(stp);
1821 if (stmp == NULL) {
1822 /*
1823 * The pool was empty so allocate a new
1824 * mapping.
1825 */
1826 if ((stmp = kmem_cache_alloc(sp->s_tmc,
1827 KM_NOSLEEP)) == NULL)
1828 goto copy;
1829 }
1830
1831 /* Add the DMA mapping to the list */
1832 stmp->stm_next = stp->st_stmp[id];
1833 stp->st_stmp[id] = stmp;
1834
1835 /* Try to bind the data block to the mapping */
1836 if (sfxge_tx_msgb_bind(mp, stmp) != 0)
1837 goto copy;
1838
1839 /*
1840 * If we have a partially filled buffer then we must add it to
1841 * the fragment list before adding the mapping.
1842 */
1843 if (stbp != NULL && (stbp->stb_esm.esm_size > stbp->stb_off)) {
1844 rc = sfxge_tx_qbuffer_add(stp, stbp, B_FALSE);
1845 if (rc != 0)
1846 goto fail1;
1847 }
1848
1849 /* Add the mapping to the fragment list */
1850 rc = sfxge_tx_qmapping_add(stp, stmp, &off, &size);
1851 if (rc != 0)
1852 goto fail2;
1853
1854 ASSERT3U(off, ==, MBLKL(mp));
1855
1856 /* Advance to the next data block */
1857 mp = mp->b_cont;
1858 off = 0;
1859 continue;
1860
1861 copy:
1862 if (stbp == NULL ||
1863 stbp->stb_esm.esm_size == SFXGE_TX_BUFFER_SIZE) {
1864 /* Try to grab a buffer from the pool */
1865 stbp = sfxge_tx_qfbp_get(stp);
1866 if (stbp == NULL) {
1867 /*
1868 * The pool was empty so allocate a new
1869 * buffer.
1870 */
1871 if ((stbp = kmem_cache_alloc(sp->s_tbc,
1872 KM_NOSLEEP)) == NULL) {
1873 rc = ENOMEM;
1874 goto fail3;
1875 }
1876 }
1877
1878 /* Add it to the list */
1879 stbp->stb_next = stp->st_stbp[id];
1880 stp->st_stbp[id] = stbp;
1881 }
1882
1883 /* Copy as much of the data block as we can into the buffer */
1884 eop = sfxge_tx_msgb_copy(mp, stbp, &off, &size);
1885
1886 ASSERT(off == MBLKL(mp) ||
1887 stbp->stb_esm.esm_size == SFXGE_TX_BUFFER_SIZE);
1888
1889 /*
1890 * If we have reached the end of the packet, or the buffer is
1891 * full, then add the buffer to the fragment list.
1892 */
1893 if (stbp->stb_esm.esm_size == SFXGE_TX_BUFFER_SIZE || eop) {
1894 rc = sfxge_tx_qbuffer_add(stp, stbp, eop);
1895 if (rc != 0)
1896 goto fail4;
1897 }
1898
1899 /*
1900 * If the data block has been exhaused then advance to the next
1901 * one.
1902 */
1903 if (off == MBLKL(mp)) {
1904 mp = mp->b_cont;
1905 off = 0;
1906 }
1907 }
1908 ASSERT3U(off, ==, 0);
1909 ASSERT3P(mp, ==, NULL);
1910 ASSERT3U(size, !=, 0);
1911
1912 /*
1913 * If no part of the packet has been mapped for DMA then we can free
1914 * it now, otherwise it can only be freed on completion.
1915 */
1916 if (stp->st_stmp[id] == NULL)
1917 freemsg(stpp->stp_mp);
1918 else
1919 stp->st_mp[id] = stpp->stp_mp;
1920
1921 stpp->stp_mp = NULL;
1922
1923 return (0);
1924
1925 fail4:
1926 DTRACE_PROBE(fail4);
1927 fail3:
1928 DTRACE_PROBE(fail3);
1929 fail2:
1930 DTRACE_PROBE(fail2);
1931 fail1:
1932 DTRACE_PROBE1(fail1, int, rc);
1933
1934 ASSERT(stp->st_stmp[id] == NULL);
1935
1936 return (rc);
1937 }
1938
1939
1940 #define SFXGE_TX_QDPL_PUT_PENDING(_stp) \
1941 ((_stp)->st_dpl.std_put != 0)
1942
1943 static void
1944 sfxge_tx_qdpl_swizzle(sfxge_txq_t *stp)
1945 {
1946 sfxge_tx_dpl_t *stdp = &(stp->st_dpl);
1947 volatile uintptr_t *putp;
1948 uintptr_t put;
1949 sfxge_tx_packet_t *stpp;
1950 sfxge_tx_packet_t *p;
1951 sfxge_tx_packet_t **pp;
1952 unsigned int count;
1953
1954 ASSERT(mutex_owned(&(stp->st_lock)));
1955
1956 /*
1957 * Guaranteed that in flight TX packets will cause more TX completions
1958 * hence more swizzles must happen
1959 */
1960 ASSERT3U(stdp->std_count, <=, sfxge_tx_dpl_get_pkt_max(stp));
1961 if (stdp->std_count >= stdp->get_pkt_limit)
1962 return;
1963
1964 /* Acquire the put list - replacing with an empty list */
1965 putp = &(stdp->std_put);
1966 put = atomic_swap_ulong(putp, 0);
1967 stpp = (void *)put;
1968
1969 if (stpp == NULL)
1970 return;
1971
1972 /* Reverse the list */
1973 pp = &(stpp->stp_next);
1974 p = NULL;
1975
1976 count = 0;
1977 do {
1978 sfxge_tx_packet_t *next;
1979
1980 next = stpp->stp_next;
1981
1982 stpp->stp_next = p;
1983 p = stpp;
1984
1985 count++;
1986 stpp = next;
1987 } while (stpp != NULL);
1988
1989 /* Add it to the tail of the get list */
1990 ASSERT3P(*pp, ==, NULL);
1991
1992 *(stdp->std_getp) = p;
1993 stdp->std_getp = pp;
1994 stdp->std_count += count;
1995 ASSERT3U(stdp->std_count, <=, sfxge_tx_dpl_get_pkt_max(stp));
1996
1997 DTRACE_PROBE2(dpl_counts, int, stdp->std_count, int, count);
1998 }
1999
2000
2001 /*
2002 * If TXQ locked, add the RX DPL put list and this packet to the TX DPL get list
2003 * If TXQ unlocked, atomically add this packet to TX DPL put list
2004 *
2005 * The only possible error is ENOSPC (used for TX backpressure)
2006 * For the TX DPL put or get list becoming full, in both cases there must be
2007 * future TX completions (as represented by the packets on the DPL get lists).
2008 *
2009 * This ensures that in the future mac_tx_update() will be called from
2010 * sfxge_tx_qcomplete()
2011 */
2012 static inline int
2013 sfxge_tx_qdpl_add(sfxge_txq_t *stp, sfxge_tx_packet_t *stpp, int locked)
2014 {
2015 sfxge_tx_dpl_t *stdp = &stp->st_dpl;
2016
2017 ASSERT3P(stpp->stp_next, ==, NULL);
2018
2019 if (locked) {
2020 ASSERT(mutex_owned(&stp->st_lock));
2021
2022 if (stdp->std_count >= stdp->get_pkt_limit) {
2023 stdp->get_full_count++;
2024 return (ENOSPC);
2025 }
2026
2027 /* Reverse the put list onto the get list */
2028 sfxge_tx_qdpl_swizzle(stp);
2029
2030 /* Add to the tail of the get list */
2031 *(stdp->std_getp) = stpp;
2032 stdp->std_getp = &stpp->stp_next;
2033 stdp->std_count++;
2034 ASSERT3U(stdp->std_count, <=, sfxge_tx_dpl_get_pkt_max(stp));
2035
2036 } else {
2037 volatile uintptr_t *putp;
2038 uintptr_t old;
2039 uintptr_t new;
2040 sfxge_tx_packet_t *old_pkt;
2041
2042 putp = &(stdp->std_put);
2043 new = (uintptr_t)stpp;
2044
2045 /* Add to the head of the put list, keeping a list length */
2046 do {
2047 old = *putp;
2048 old_pkt = (sfxge_tx_packet_t *)old;
2049
2050 stpp->stp_dpl_put_len = old ?
2051 old_pkt->stp_dpl_put_len + 1 : 1;
2052
2053 if (stpp->stp_dpl_put_len >= stdp->put_pkt_limit) {
2054 stpp->stp_next = 0;
2055 stpp->stp_dpl_put_len = 0;
2056 stdp->put_full_count++;
2057 return (ENOSPC);
2058 }
2059
2060 stpp->stp_next = (void *)old;
2061 } while (atomic_cas_ulong(putp, old, new) != old);
2062 }
2063 return (0);
2064 }
2065
2066
2067 /* Take all packets from DPL get list and try to send to HW */
2068 static void
2069 sfxge_tx_qdpl_drain(sfxge_txq_t *stp)
2070 {
2071 sfxge_t *sp = stp->st_sp;
2072 sfxge_tx_dpl_t *stdp = &(stp->st_dpl);
2073 unsigned int pushed = stp->st_added;
2074 sfxge_tx_packet_t *stpp;
2075 unsigned int count;
2076
2077 ASSERT(mutex_owned(&(stp->st_lock)));
2078
2079 prefetch_read_many(sp->s_enp);
2080 prefetch_read_many(stp->st_etp);
2081
2082 stpp = stdp->std_get;
2083 count = stdp->std_count;
2084
2085 while (count != 0) {
2086 sfxge_tx_packet_t *next;
2087 boolean_t copy;
2088 int rc;
2089
2090 ASSERT(stpp != NULL);
2091
2092 /* Split stpp off */
2093 next = stpp->stp_next;
2094 stpp->stp_next = NULL;
2095
2096 if (next != NULL)
2097 prefetch_read_many(next);
2098
2099 if (stp->st_state != SFXGE_TXQ_STARTED)
2100 goto reject;
2101
2102 copy = B_FALSE;
2103
2104 again:
2105 /* Fragment the packet */
2106 if (stpp->stp_mss != 0) {
2107 rc = sfxge_tx_qlso_fragment(stp, stpp, copy);
2108 } else {
2109 rc = sfxge_tx_qpacket_fragment(stp, stpp, copy);
2110 }
2111
2112 switch (rc) {
2113 case 0:
2114 break;
2115
2116 case ENOSPC:
2117 if (!copy)
2118 goto copy;
2119
2120 /*FALLTHRU*/
2121 default:
2122 goto reject;
2123 }
2124
2125 /* Free the packet structure */
2126 stpp->stp_etherhp = NULL;
2127 stpp->stp_iphp = NULL;
2128 stpp->stp_thp = NULL;
2129 stpp->stp_off = 0;
2130 stpp->stp_size = 0;
2131 stpp->stp_mss = 0;
2132 stpp->stp_dpl_put_len = 0;
2133
2134 ASSERT3P(stpp->stp_mp, ==, NULL);
2135
2136 if (sfxge_tx_qfpp_put(stp, stpp) != 0) {
2137 sfxge_tx_packet_destroy(sp, stpp);
2138 stpp = NULL;
2139 }
2140
2141 --count;
2142 stpp = next;
2143
2144 /* Post the packet */
2145 sfxge_tx_qlist_post(stp);
2146
2147 if (stp->st_unblock != SFXGE_TXQ_NOT_BLOCKED)
2148 goto defer;
2149
2150 if (stp->st_added - pushed >= SFXGE_TX_BATCH) {
2151 efx_tx_qpush(stp->st_etp, stp->st_added);
2152 pushed = stp->st_added;
2153 }
2154
2155 continue;
2156
2157 copy:
2158 /* Abort the current fragment list */
2159 sfxge_tx_qlist_abort(stp);
2160
2161 /* Try copying the packet to flatten it */
2162 ASSERT(!copy);
2163 copy = B_TRUE;
2164
2165 goto again;
2166
2167 reject:
2168 /* Abort the current fragment list */
2169 sfxge_tx_qlist_abort(stp);
2170
2171 /* Discard the packet */
2172 freemsg(stpp->stp_mp);
2173 stpp->stp_mp = NULL;
2174
2175 /* Free the packet structure */
2176 stpp->stp_etherhp = NULL;
2177 stpp->stp_iphp = NULL;
2178 stpp->stp_thp = NULL;
2179 stpp->stp_off = 0;
2180 stpp->stp_size = 0;
2181 stpp->stp_mss = 0;
2182 stpp->stp_dpl_put_len = 0;
2183
2184 if (sfxge_tx_qfpp_put(stp, stpp) != 0) {
2185 sfxge_tx_packet_destroy(sp, stpp);
2186 stpp = NULL;
2187 }
2188
2189 --count;
2190 stpp = next;
2191 continue;
2192 defer:
2193 DTRACE_PROBE1(defer, unsigned int, stp->st_index);
2194 break;
2195 }
2196
2197 if (count == 0) {
2198 /* New empty get list */
2199 ASSERT3P(stpp, ==, NULL);
2200 stdp->std_get = NULL;
2201 stdp->std_count = 0;
2202
2203 stdp->std_getp = &(stdp->std_get);
2204 } else {
2205 /* shorten the list by moving the head */
2206 stdp->std_get = stpp;
2207 stdp->std_count = count;
2208 ASSERT3U(stdp->std_count, <=, sfxge_tx_dpl_get_pkt_max(stp));
2209 }
2210
2211 if (stp->st_added != pushed)
2212 efx_tx_qpush(stp->st_etp, stp->st_added);
2213
2214 ASSERT(stp->st_unblock != SFXGE_TXQ_NOT_BLOCKED ||
2215 stdp->std_count == 0);
2216 }
2217
2218 /* Swizzle deferred packet list, try and push to HW */
2219 static inline void
2220 sfxge_tx_qdpl_service(sfxge_txq_t *stp)
2221 {
2222 do {
2223 ASSERT(mutex_owned(&(stp->st_lock)));
2224
2225 if (SFXGE_TX_QDPL_PUT_PENDING(stp))
2226 sfxge_tx_qdpl_swizzle(stp);
2227
2228 if (stp->st_unblock == SFXGE_TXQ_NOT_BLOCKED)
2229 sfxge_tx_qdpl_drain(stp);
2230
2231 mutex_exit(&(stp->st_lock));
2232
2233 if (!SFXGE_TX_QDPL_PUT_PENDING(stp))
2234 break;
2235 } while (mutex_tryenter(&(stp->st_lock)));
2236 }
2237
2238 static void
2239 sfxge_tx_qdpl_flush_locked(sfxge_txq_t *stp)
2240 {
2241 sfxge_t *sp = stp->st_sp;
2242 sfxge_tx_dpl_t *stdp = &(stp->st_dpl);
2243 sfxge_tx_packet_t *stpp;
2244 unsigned int count;
2245
2246 ASSERT(mutex_owned(&(stp->st_lock)));
2247
2248 /* Swizzle put list to the get list */
2249 sfxge_tx_qdpl_swizzle(stp);
2250
2251 stpp = stdp->std_get;
2252 count = stdp->std_count;
2253
2254 while (count != 0) {
2255 sfxge_tx_packet_t *next;
2256
2257 next = stpp->stp_next;
2258 stpp->stp_next = NULL;
2259
2260 /* Discard the packet */
2261 freemsg(stpp->stp_mp);
2262 stpp->stp_mp = NULL;
2263
2264 /* Free the packet structure */
2265 stpp->stp_etherhp = NULL;
2266 stpp->stp_iphp = NULL;
2267 stpp->stp_thp = NULL;
2268 stpp->stp_off = 0;
2269 stpp->stp_size = 0;
2270 stpp->stp_mss = 0;
2271 stpp->stp_dpl_put_len = 0;
2272
2273 sfxge_tx_packet_destroy(sp, stpp);
2274
2275 --count;
2276 stpp = next;
2277 }
2278
2279 ASSERT3P(stpp, ==, NULL);
2280
2281 /* Empty list */
2282 stdp->std_get = NULL;
2283 stdp->std_count = 0;
2284 stdp->std_getp = &(stdp->std_get);
2285 }
2286
2287
2288 void
2289 sfxge_tx_qdpl_flush(sfxge_txq_t *stp)
2290 {
2291 mutex_enter(&(stp->st_lock));
2292 sfxge_tx_qdpl_flush_locked(stp);
2293 mutex_exit(&(stp->st_lock));
2294 }
2295
2296
2297 static void
2298 sfxge_tx_qunblock(sfxge_txq_t *stp)
2299 {
2300 sfxge_t *sp = stp->st_sp;
2301 unsigned int evq = stp->st_evq;
2302 sfxge_evq_t *sep = sp->s_sep[evq];
2303
2304 ASSERT(mutex_owned(&(sep->se_lock)));
2305
2306 if (stp->st_state != SFXGE_TXQ_STARTED)
2307 return;
2308
2309 mutex_enter(&(stp->st_lock));
2310
2311 if (stp->st_unblock != SFXGE_TXQ_NOT_BLOCKED) {
2312 unsigned int level;
2313
2314 level = stp->st_added - stp->st_completed;
2315 if (level <= stp->st_unblock) {
2316 stp->st_unblock = SFXGE_TXQ_NOT_BLOCKED;
2317 sfxge_tx_qlist_post(stp);
2318 }
2319 }
2320
2321 sfxge_tx_qdpl_service(stp);
2322 /* lock has been dropped */
2323 }
2324
2325 void
2326 sfxge_tx_qcomplete(sfxge_txq_t *stp)
2327 {
2328 sfxge_t *sp = stp->st_sp;
2329 sfxge_tx_dpl_t *stdp = &(stp->st_dpl);
2330 unsigned int evq = stp->st_evq;
2331 sfxge_evq_t *sep = sp->s_sep[evq];
2332 unsigned int completed;
2333
2334 ASSERT(mutex_owned(&(sep->se_lock)));
2335
2336 completed = stp->st_completed;
2337 while (completed != stp->st_pending) {
2338 unsigned int id;
2339 sfxge_tx_mapping_t *stmp;
2340
2341 id = completed++ & (SFXGE_TX_NDESCS - 1);
2342
2343 if ((stmp = stp->st_stmp[id]) != NULL) {
2344 mblk_t *mp;
2345
2346 /* Unbind all the mappings */
2347 do {
2348 ASSERT(stmp->stm_mp != NULL);
2349 sfxge_tx_msgb_unbind(stmp);
2350
2351 stmp = stmp->stm_next;
2352 } while (stmp != NULL);
2353
2354 /*
2355 * Now that the packet is no longer mapped for DMA it
2356 * can be freed.
2357 */
2358 mp = stp->st_mp[id];
2359 stp->st_mp[id] = NULL;
2360
2361 ASSERT(mp != NULL);
2362 freemsg(mp);
2363 }
2364 }
2365 stp->st_completed = completed;
2366
2367 /* Check whether we need to unblock the queue */
2368 if (stp->st_unblock != SFXGE_TXQ_NOT_BLOCKED) {
2369 unsigned int level;
2370
2371 level = stp->st_added - stp->st_completed;
2372 if (level <= stp->st_unblock)
2373 sfxge_tx_qunblock(stp);
2374 }
2375
2376 /* Release TX backpressure from the TX DPL put/get list being full */
2377 if (stdp->std_count < stdp->get_pkt_limit)
2378 mac_tx_update(sp->s_mh);
2379 }
2380
2381 void
2382 sfxge_tx_qflush_done(sfxge_txq_t *stp)
2383 {
2384 sfxge_t *sp = stp->st_sp;
2385
2386 ASSERT(mutex_owned(&(sp->s_sep[stp->st_evq]->se_lock)));
2387
2388 mutex_enter(&(stp->st_lock));
2389
2390 if (stp->st_flush == SFXGE_FLUSH_PENDING)
2391 stp->st_flush = SFXGE_FLUSH_DONE;
2392
2393 mutex_exit(&(stp->st_lock));
2394
2395 mutex_enter(&(sp->s_tx_flush_lock));
2396 sp->s_tx_flush_pending--;
2397 if (sp->s_tx_flush_pending <= 0) {
2398 /* All queues flushed: wakeup sfxge_tx_stop() */
2399 cv_signal(&(sp->s_tx_flush_kv));
2400 }
2401 mutex_exit(&(sp->s_tx_flush_lock));
2402 }
2403
2404 static void
2405 sfxge_tx_qflush(sfxge_t *sp, unsigned int index, boolean_t do_flush)
2406 {
2407 sfxge_txq_t *stp = sp->s_stp[index];
2408
2409 ASSERT(mutex_owned(&(sp->s_state_lock)));
2410
2411 mutex_enter(&(stp->st_lock));
2412
2413 /* Prepare to flush and stop the queue */
2414 if (stp->st_state == SFXGE_TXQ_STARTED)
2415 stp->st_state = SFXGE_TXQ_INITIALIZED;
2416 else
2417 do_flush = B_FALSE; /* No hardware ring, so don't flush */
2418
2419 if (do_flush)
2420 stp->st_flush = SFXGE_FLUSH_PENDING;
2421 else
2422 stp->st_flush = SFXGE_FLUSH_INACTIVE;
2423
2424 mutex_exit(&(stp->st_lock));
2425
2426 /* Flush the transmit queue */
2427 if (do_flush)
2428 efx_tx_qflush(stp->st_etp);
2429 }
2430
2431 static void
2432 sfxge_tx_qstop(sfxge_t *sp, unsigned int index)
2433 {
2434 sfxge_txq_t *stp = sp->s_stp[index];
2435 unsigned int evq = stp->st_evq;
2436 sfxge_evq_t *sep = sp->s_sep[evq];
2437
2438 mutex_enter(&(sep->se_lock));
2439 mutex_enter(&(stp->st_lock));
2440 ASSERT3U(stp->st_state, ==, SFXGE_TXQ_INITIALIZED);
2441
2442 /* All queues should have been flushed */
2443 ASSERT3S(stp->st_sp->s_tx_flush_pending, ==, 0);
2444 ASSERT(stp->st_flush != SFXGE_FLUSH_FAILED);
2445
2446 /* in case of TX flush timeout */
2447 stp->st_flush = SFXGE_FLUSH_DONE;
2448
2449 /* Destroy the transmit queue */
2450 efx_tx_qdestroy(stp->st_etp);
2451 stp->st_etp = NULL;
2452
2453 /* Clear entries from the buffer table */
2454 sfxge_sram_buf_tbl_clear(sp, stp->st_id,
2455 EFX_TXQ_NBUFS(SFXGE_TX_NDESCS));
2456
2457 sfxge_tx_qlist_abort(stp);
2458 ASSERT3U(stp->st_n, ==, 0);
2459
2460 stp->st_unblock = SFXGE_TXQ_NOT_BLOCKED;
2461
2462 stp->st_pending = stp->st_added;
2463
2464 sfxge_tx_qcomplete(stp);
2465 ASSERT3U(stp->st_completed, ==, stp->st_pending);
2466
2467 sfxge_tx_qreap(stp);
2468 ASSERT3U(stp->st_reaped, ==, stp->st_completed);
2469
2470 /*
2471 * Ensure the deferred packet list is cleared
2472 * Can race with sfxge_tx_packet_add() adding to the put list
2473 */
2474 sfxge_tx_qdpl_flush_locked(stp);
2475
2476 stp->st_added = 0;
2477 stp->st_pending = 0;
2478 stp->st_completed = 0;
2479 stp->st_reaped = 0;
2480
2481 mutex_exit(&(stp->st_lock));
2482 mutex_exit(&(sep->se_lock));
2483 }
2484
2485 static void
2486 sfxge_tx_qfini(sfxge_t *sp, unsigned int index)
2487 {
2488 sfxge_txq_t *stp = sp->s_stp[index];
2489 sfxge_tx_dpl_t *stdp = &(stp->st_dpl);
2490
2491 /* Detach the TXQ from the driver */
2492 sp->s_stp[index] = NULL;
2493 ASSERT(sp->s_tx_qcount > 0);
2494 sp->s_tx_qcount--;
2495
2496 ASSERT3U(stp->st_state, ==, SFXGE_TXQ_INITIALIZED);
2497 stp->st_state = SFXGE_TXQ_UNINITIALIZED;
2498
2499 /* Tear down the statistics */
2500 sfxge_tx_kstat_fini(stp);
2501
2502 /* Ensure the deferred packet list is empty */
2503 ASSERT3U(stdp->std_count, ==, 0);
2504 ASSERT3P(stdp->std_get, ==, NULL);
2505 ASSERT3U(stdp->std_put, ==, 0);
2506
2507 /* Clear the free buffer pool */
2508 sfxge_tx_qfbp_empty(stp);
2509
2510 /* Clear the free mapping pool */
2511 sfxge_tx_qfmp_empty(stp);
2512
2513 /* Clear the free packet pool */
2514 sfxge_tx_qfpp_empty(stp);
2515
2516 mutex_destroy(&(stp->st_lock));
2517
2518 stp->st_evq = 0;
2519 stp->st_type = 0;
2520 stp->st_index = 0;
2521
2522 kmem_cache_free(sp->s_tqc, stp);
2523 }
2524
2525 int
2526 sfxge_tx_init(sfxge_t *sp)
2527 {
2528 sfxge_intr_t *sip = &(sp->s_intr);
2529 char name[MAXNAMELEN];
2530 int index;
2531 int rc;
2532
2533 (void) snprintf(name, MAXNAMELEN - 1, "%s%d_tx_packet_cache",
2534 ddi_driver_name(sp->s_dip), ddi_get_instance(sp->s_dip));
2535
2536 sp->s_tpc = kmem_cache_create(name, sizeof (sfxge_tx_packet_t),
2537 SFXGE_CPU_CACHE_SIZE, sfxge_tx_packet_ctor, sfxge_tx_packet_dtor,
2538 NULL, sp, NULL, 0);
2539 ASSERT(sp->s_tpc != NULL);
2540
2541 (void) snprintf(name, MAXNAMELEN - 1, "%s%d_tx_buffer_cache",
2542 ddi_driver_name(sp->s_dip), ddi_get_instance(sp->s_dip));
2543
2544 sp->s_tbc = kmem_cache_create(name, sizeof (sfxge_tx_buffer_t),
2545 SFXGE_CPU_CACHE_SIZE, sfxge_tx_buffer_ctor, sfxge_tx_buffer_dtor,
2546 NULL, sp, NULL, 0);
2547 ASSERT(sp->s_tbc != NULL);
2548
2549 (void) snprintf(name, MAXNAMELEN - 1, "%s%d_tx_mapping_cache",
2550 ddi_driver_name(sp->s_dip), ddi_get_instance(sp->s_dip));
2551
2552 sp->s_tmc = kmem_cache_create(name, sizeof (sfxge_tx_mapping_t),
2553 SFXGE_CPU_CACHE_SIZE, sfxge_tx_mapping_ctor, sfxge_tx_mapping_dtor,
2554 NULL, sp, NULL, 0);
2555 ASSERT(sp->s_tmc != NULL);
2556
2557 (void) snprintf(name, MAXNAMELEN - 1, "%s%d_txq_cache",
2558 ddi_driver_name(sp->s_dip), ddi_get_instance(sp->s_dip));
2559
2560 sp->s_tqc = kmem_cache_create(name, sizeof (sfxge_txq_t),
2561 SFXGE_CPU_CACHE_SIZE, sfxge_tx_qctor, sfxge_tx_qdtor, NULL, sp,
2562 NULL, 0);
2563 ASSERT(sp->s_tqc != NULL);
2564
2565 /* Initialize the special non-checksummed transmit queues */
2566
2567 /* NB sfxge_ev_qinit() is sensitive to using EVQ_0 */
2568 if ((rc = sfxge_tx_qinit(sp, SFXGE_TXQ_NON_CKSUM,
2569 SFXGE_TXQ_NON_CKSUM, EVQ_0)) != 0)
2570 goto fail1;
2571
2572 /* NB sfxge_ev_qinit() is sensitive to using EVQ_0 */
2573 if ((rc = sfxge_tx_qinit(sp, SFXGE_TXQ_IP_CKSUM,
2574 SFXGE_TXQ_IP_CKSUM, EVQ_0)) != 0)
2575 goto fail2;
2576
2577 /* Initialize the normal transmit queues */
2578 for (index = 0; index < sip->si_nalloc; index++) {
2579 if ((rc = sfxge_tx_qinit(sp, SFXGE_TXQ_IP_TCP_UDP_CKSUM + index,
2580 SFXGE_TXQ_IP_TCP_UDP_CKSUM, index)) != 0)
2581 goto fail3;
2582 }
2583
2584 return (0);
2585
2586 fail3:
2587 DTRACE_PROBE(fail3);
2588
2589 while (--index >= 0)
2590 sfxge_tx_qfini(sp, SFXGE_TXQ_IP_TCP_UDP_CKSUM + index);
2591
2592 sfxge_tx_qfini(sp, SFXGE_TXQ_IP_CKSUM);
2593
2594 fail2:
2595 DTRACE_PROBE(fail2);
2596
2597 fail1:
2598 DTRACE_PROBE1(fail1, int, rc);
2599
2600 sfxge_tx_qfini(sp, SFXGE_TXQ_NON_CKSUM);
2601
2602 kmem_cache_destroy(sp->s_tqc);
2603 sp->s_tqc = NULL;
2604
2605 kmem_cache_destroy(sp->s_tmc);
2606 sp->s_tmc = NULL;
2607
2608 kmem_cache_destroy(sp->s_tbc);
2609 sp->s_tbc = NULL;
2610
2611 kmem_cache_destroy(sp->s_tpc);
2612 sp->s_tpc = NULL;
2613
2614 return (rc);
2615 }
2616
2617 int
2618 sfxge_tx_start(sfxge_t *sp)
2619 {
2620 efx_nic_t *enp = sp->s_enp;
2621 int index;
2622 int rc;
2623
2624 /* Initialize the transmit module */
2625 if ((rc = efx_tx_init(enp)) != 0)
2626 goto fail1;
2627
2628 for (index = 0; index < sp->s_tx_qcount; index++) {
2629 if ((rc = sfxge_tx_qstart(sp, index)) != 0)
2630 goto fail2;
2631 }
2632
2633 return (0);
2634
2635 fail2:
2636 DTRACE_PROBE(fail2);
2637
2638 while (--index >= 0)
2639 sfxge_tx_qstop(sp, index);
2640
2641 /* Tear down the transmit module */
2642 efx_tx_fini(enp);
2643
2644 fail1:
2645 DTRACE_PROBE1(fail1, int, rc);
2646
2647 return (rc);
2648 }
2649
2650
2651 /*
2652 * Add a packet to the TX Deferred Packet List and if the TX queue lock
2653 * can be acquired then call sfxge_tx_qdpl_service() to fragment and push
2654 * to the H/W transmit descriptor ring
2655 *
2656 * If ENOSPC is returned then the DPL is full or the packet create failed, but
2657 * the mblk isn't freed so that the caller can return this mblk from mc_tx() to
2658 * back-pressure the OS stack.
2659 *
2660 * For all other errors the mblk is freed
2661 */
2662 int
2663 sfxge_tx_packet_add(sfxge_t *sp, mblk_t *mp)
2664 {
2665 struct ether_header *etherhp;
2666 struct ip *iphp;
2667 struct tcphdr *thp;
2668 size_t off;
2669 size_t size;
2670 size_t mss;
2671 sfxge_txq_t *stp;
2672 boolean_t locked;
2673 sfxge_tx_packet_t *stpp;
2674 int rc = 0;
2675
2676 ASSERT3P(mp->b_next, ==, NULL);
2677 ASSERT(!(DB_CKSUMFLAGS(mp) & HCK_PARTIALCKSUM));
2678
2679 /*
2680 * Do not enqueue packets during startup/shutdown;
2681 *
2682 * NOTE: This access to the state is NOT protected by the state lock. It
2683 * is an imperfect test and anything further getting onto the get/put
2684 * deferred packet lists is cleaned up in (possibly repeated) calls to
2685 * sfxge_can_destroy().
2686 */
2687 if (sp->s_state != SFXGE_STARTED) {
2688 rc = EINVAL;
2689 goto fail1;
2690 }
2691
2692 etherhp = NULL;
2693 iphp = NULL;
2694 thp = NULL;
2695 off = 0;
2696 size = 0;
2697 mss = 0;
2698
2699 /* Check whether we need the header pointers for LSO segmentation */
2700 if (DB_LSOFLAGS(mp) & HW_LSO) {
2701 /* LSO segmentation relies on hardware checksum offload */
2702 DB_CKSUMFLAGS(mp) |= HCK_FULLCKSUM;
2703
2704 if ((mss = DB_LSOMSS(mp)) == 0) {
2705 rc = EINVAL;
2706 goto fail1;
2707 }
2708
2709 sfxge_tcp_parse(mp, ðerhp, &iphp, &thp, &off, &size);
2710
2711 if (etherhp == NULL ||
2712 iphp == NULL ||
2713 thp == NULL ||
2714 off == 0) {
2715 rc = EINVAL;
2716 goto fail2;
2717 }
2718 }
2719
2720 /* Choose the appropriate transit queue */
2721 if (DB_CKSUMFLAGS(mp) & HCK_FULLCKSUM) {
2722 sfxge_rx_scale_t *srsp = &(sp->s_rx_scale);
2723
2724 if (srsp->srs_state == SFXGE_RX_SCALE_STARTED) {
2725 uint16_t hash;
2726 int index;
2727
2728 if (srsp->srs_count > 1) {
2729 /*
2730 * If we have not already parsed the headers
2731 * for LSO segmentation then we need to do it
2732 * now so we can calculate the hash.
2733 */
2734 if (thp == NULL)
2735 sfxge_tcp_parse(mp, ðerhp, &iphp,
2736 &thp, &off, &size);
2737
2738 if (thp != NULL) {
2739 SFXGE_TCP_HASH(
2740 ntohl(iphp->ip_dst.s_addr),
2741 ntohs(thp->th_dport),
2742 ntohl(iphp->ip_src.s_addr),
2743 ntohs(thp->th_sport), hash);
2744
2745 index = srsp->srs_tbl[hash %
2746 SFXGE_RX_SCALE_MAX];
2747 } else {
2748 /*
2749 * Non-TCP traffix always goes to the
2750 * the queue in the zero-th entry of
2751 * the RSS table.
2752 */
2753 index = srsp->srs_tbl[0];
2754 }
2755 } else {
2756 /*
2757 * It does not matter what the hash is
2758 * because all the RSS table entries will be
2759 * the same.
2760 */
2761 index = srsp->srs_tbl[0];
2762 }
2763
2764 /*
2765 * Find the event queue corresponding to the hash in
2766 * the RSS table.
2767 */
2768 stp = sp->s_stp[SFXGE_TXQ_IP_TCP_UDP_CKSUM + index];
2769 ASSERT3U(stp->st_evq, ==, index);
2770 } else {
2771 stp = sp->s_stp[SFXGE_TXQ_IP_TCP_UDP_CKSUM];
2772 }
2773 } else if (DB_CKSUMFLAGS(mp) & HCK_IPV4_HDRCKSUM) {
2774 stp = sp->s_stp[SFXGE_TXQ_IP_CKSUM];
2775 } else {
2776 if ((stp = sp->s_stp[SFXGE_TXQ_NON_CKSUM]) == NULL)
2777 stp = sp->s_stp[SFXGE_TXQ_IP_CKSUM];
2778 }
2779 ASSERT(stp != NULL);
2780
2781 ASSERT(mss == 0 || (DB_LSOFLAGS(mp) & HW_LSO));
2782
2783 /* Try to grab the lock */
2784 locked = mutex_tryenter(&(stp->st_lock));
2785
2786 if (locked) {
2787 /* Try to grab a packet from the pool */
2788 stpp = sfxge_tx_qfpp_get(stp);
2789 } else {
2790 stpp = NULL;
2791 }
2792
2793 if (stpp == NULL) {
2794 /*
2795 * Either the pool was empty or we don't have the lock so
2796 * allocate a new packet.
2797 */
2798 if ((stpp = sfxge_tx_packet_create(sp)) == NULL) {
2799 rc = ENOSPC;
2800 goto fail3;
2801 }
2802 }
2803
2804 stpp->stp_mp = mp;
2805 stpp->stp_etherhp = etherhp;
2806 stpp->stp_iphp = iphp;
2807 stpp->stp_thp = thp;
2808 stpp->stp_off = off;
2809 stpp->stp_size = size;
2810 stpp->stp_mss = mss;
2811 stpp->stp_dpl_put_len = 0;
2812
2813 rc = sfxge_tx_qdpl_add(stp, stpp, locked);
2814 if (rc != 0) {
2815 /* ENOSPC can happen for DPL get or put list is full */
2816 ASSERT3U(rc, ==, ENOSPC);
2817
2818 /*
2819 * Note; if this is the unlocked DPL put list full case there is
2820 * no need to worry about a race with locked
2821 * sfxge_tx_qdpl_swizzle() as we know that the TX DPL put list
2822 * was full and would have been swizzle'd to the TX DPL get
2823 * list; hence guaranteeing future TX completions and calls
2824 * to mac_tx_update() via sfxge_tx_qcomplete()
2825 */
2826 goto fail4;
2827 }
2828
2829 /* Try to grab the lock again */
2830 if (!locked)
2831 locked = mutex_tryenter(&(stp->st_lock));
2832
2833 if (locked) {
2834 /* Try to service the list */
2835 sfxge_tx_qdpl_service(stp);
2836 /* lock has been dropped */
2837 }
2838
2839 return (0);
2840
2841 fail4:
2842 DTRACE_PROBE(fail4);
2843 sfxge_tx_packet_destroy(sp, stpp);
2844 fail3:
2845 DTRACE_PROBE(fail3);
2846 if (locked)
2847 mutex_exit(&(stp->st_lock));
2848 fail2:
2849 DTRACE_PROBE(fail2);
2850 fail1:
2851 DTRACE_PROBE1(fail1, int, rc);
2852
2853 if (rc != ENOSPC)
2854 freemsg(mp);
2855 return (rc);
2856 }
2857
2858 int
2859 sfxge_tx_loopback(sfxge_t *sp, unsigned int count)
2860 {
2861 uint8_t unicst[ETHERADDRL];
2862 size_t mtu;
2863 mblk_t *mp;
2864 struct ether_header *etherhp;
2865 unsigned int byte;
2866 int rc;
2867
2868 if (count == 0) {
2869 rc = EINVAL;
2870 goto fail1;
2871 }
2872
2873 rc = sfxge_mac_unicst_get(sp, SFXGE_UNICST_LAA, unicst);
2874
2875 if (rc == ENOENT)
2876 rc = sfxge_mac_unicst_get(sp, SFXGE_UNICST_BIA, unicst);
2877
2878 if (rc != 0)
2879 goto fail2;
2880
2881 mtu = sp->s_mtu;
2882
2883 if ((mp = allocb(sizeof (struct ether_header) + mtu,
2884 BPRI_HI)) == NULL) {
2885 rc = ENOMEM;
2886 goto fail3;
2887 }
2888
2889 mp->b_wptr = mp->b_rptr + sizeof (struct ether_header);
2890 bzero(mp->b_rptr, MBLKL(mp));
2891
2892 /*LINTED*/
2893 etherhp = (struct ether_header *)(mp->b_rptr);
2894 bcopy(sfxge_brdcst, &(etherhp->ether_dhost), ETHERADDRL);
2895 bcopy(unicst, &(etherhp->ether_shost), ETHERADDRL);
2896 etherhp->ether_type = htons(SFXGE_ETHERTYPE_LOOPBACK);
2897
2898 for (byte = 0; byte < 30; byte++)
2899 *(mp->b_wptr++) = (byte & 1) ? 0xaa : 0x55;
2900
2901 do {
2902 mblk_t *nmp;
2903
2904 if ((nmp = dupb(mp)) == NULL) {
2905 rc = ENOMEM;
2906 goto fail4;
2907 }
2908
2909 rc = sfxge_tx_packet_add(sp, nmp);
2910 if (rc != 0) {
2911 freeb(nmp);
2912 goto fail5;
2913 }
2914
2915 } while (--count != 0);
2916
2917 freeb(mp);
2918 return (0);
2919
2920 fail5:
2921 DTRACE_PROBE(fail5);
2922 fail4:
2923 DTRACE_PROBE(fail4);
2924
2925 freeb(mp);
2926
2927 fail3:
2928 DTRACE_PROBE(fail3);
2929 fail2:
2930 DTRACE_PROBE(fail2);
2931 fail1:
2932 DTRACE_PROBE1(fail1, int, rc);
2933
2934 return (rc);
2935 }
2936
2937 int
2938 sfxge_tx_ioctl(sfxge_t *sp, sfxge_tx_ioc_t *stip)
2939 {
2940 int rc;
2941
2942 switch (stip->sti_op) {
2943 case SFXGE_TX_OP_LOOPBACK: {
2944 unsigned int count = stip->sti_data;
2945
2946 if ((rc = sfxge_tx_loopback(sp, count)) != 0)
2947 goto fail1;
2948
2949 break;
2950 }
2951 default:
2952 rc = ENOTSUP;
2953 goto fail1;
2954 }
2955
2956 return (0);
2957
2958 fail1:
2959 DTRACE_PROBE1(fail1, int, rc);
2960
2961 return (rc);
2962 }
2963
2964 void
2965 sfxge_tx_stop(sfxge_t *sp)
2966 {
2967 efx_nic_t *enp = sp->s_enp;
2968 clock_t timeout;
2969 boolean_t do_flush;
2970 int index;
2971
2972 ASSERT(mutex_owned(&(sp->s_state_lock)));
2973
2974 mutex_enter(&(sp->s_tx_flush_lock));
2975
2976 /* Flush all the queues */
2977 if (sp->s_hw_err == SFXGE_HW_OK) {
2978 sp->s_tx_flush_pending = sp->s_tx_qcount;
2979 do_flush = B_TRUE;
2980 } else {
2981 sp->s_tx_flush_pending = 0;
2982 do_flush = B_FALSE;
2983 }
2984
2985 /* Prepare queues to stop and flush the hardware ring */
2986 for (index = 0; index < sp->s_tx_qcount; index++)
2987 sfxge_tx_qflush(sp, index, do_flush);
2988
2989 if (do_flush == B_FALSE)
2990 goto flush_done;
2991
2992 /* Wait upto 2sec for queue flushing to complete */
2993 timeout = ddi_get_lbolt() + drv_usectohz(SFXGE_TX_QFLUSH_USEC);
2994
2995 while (sp->s_tx_flush_pending > 0) {
2996 if (cv_timedwait(&(sp->s_tx_flush_kv), &(sp->s_tx_flush_lock),
2997 timeout) < 0) {
2998 /* Timeout waiting for queues to flush */
2999 dev_info_t *dip = sp->s_dip;
3000
3001 DTRACE_PROBE(timeout);
3002 cmn_err(CE_NOTE,
3003 SFXGE_CMN_ERR "[%s%d] tx qflush timeout",
3004 ddi_driver_name(dip), ddi_get_instance(dip));
3005 break;
3006 }
3007 }
3008 sp->s_tx_flush_pending = 0;
3009
3010 flush_done:
3011 mutex_exit(&(sp->s_tx_flush_lock));
3012
3013 /* Stop all the queues */
3014 for (index = 0; index < sp->s_tx_qcount; index++)
3015 sfxge_tx_qstop(sp, index);
3016
3017 /* Tear down the transmit module */
3018 efx_tx_fini(enp);
3019 }
3020
3021 void
3022 sfxge_tx_fini(sfxge_t *sp)
3023 {
3024 int index;
3025
3026 index = sp->s_tx_qcount;
3027 while (--index >= 0)
3028 sfxge_tx_qfini(sp, index);
3029
3030 kmem_cache_destroy(sp->s_tqc);
3031 sp->s_tqc = NULL;
3032
3033 kmem_cache_destroy(sp->s_tmc);
3034 sp->s_tmc = NULL;
3035
3036 kmem_cache_destroy(sp->s_tbc);
3037 sp->s_tbc = NULL;
3038
3039 kmem_cache_destroy(sp->s_tpc);
3040 sp->s_tpc = NULL;
3041 }