Print this page
7127 remove -Wno-missing-braces from Makefile.uts
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/uts/common/xen/io/xnf.c
+++ new/usr/src/uts/common/xen/io/xnf.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21
22 22 /*
23 23 * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
24 24 * Use is subject to license terms.
25 25 */
26 26
27 27 /*
28 28 *
29 29 * Copyright (c) 2004 Christian Limpach.
30 30 * All rights reserved.
31 31 *
32 32 * Redistribution and use in source and binary forms, with or without
33 33 * modification, are permitted provided that the following conditions
34 34 * are met:
35 35 * 1. Redistributions of source code must retain the above copyright
36 36 * notice, this list of conditions and the following disclaimer.
37 37 * 2. Redistributions in binary form must reproduce the above copyright
38 38 * notice, this list of conditions and the following disclaimer in the
39 39 * documentation and/or other materials provided with the distribution.
40 40 * 3. This section intentionally left blank.
41 41 * 4. The name of the author may not be used to endorse or promote products
42 42 * derived from this software without specific prior written permission.
43 43 *
44 44 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
45 45 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
46 46 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
47 47 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
48 48 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
49 49 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
50 50 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
51 51 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
52 52 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
53 53 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
54 54 */
55 55 /*
56 56 * Section 3 of the above license was updated in response to bug 6379571.
57 57 */
58 58
59 59 /*
60 60 * xnf.c - GLDv3 network driver for domU.
61 61 */
62 62
63 63 /*
64 64 * This driver uses four per-instance locks:
65 65 *
66 66 * xnf_gref_lock:
67 67 *
68 68 * Protects access to the grant reference list stored in
69 69 * xnf_gref_head. Grant references should be acquired and released
70 70 * using gref_get() and gref_put() respectively.
71 71 *
72 72 * xnf_schedlock:
73 73 *
74 74 * Protects:
75 75 * xnf_need_sched - used to record that a previous transmit attempt
76 76 * failed (and consequently it will be necessary to call
77 77 * mac_tx_update() when transmit resources are available).
78 78 * xnf_pending_multicast - the number of multicast requests that
79 79 * have been submitted to the backend for which we have not
80 80 * processed responses.
81 81 *
82 82 * xnf_txlock:
83 83 *
84 84 * Protects the transmit ring (xnf_tx_ring) and associated
85 85 * structures (notably xnf_tx_pkt_id and xnf_tx_pkt_id_head).
86 86 *
87 87 * xnf_rxlock:
88 88 *
89 89 * Protects the receive ring (xnf_rx_ring) and associated
90 90 * structures (notably xnf_rx_pkt_info).
91 91 *
92 92 * If driver-global state that affects both the transmit and receive
93 93 * rings is manipulated, both xnf_txlock and xnf_rxlock should be
94 94 * held, in that order.
95 95 *
96 96 * xnf_schedlock is acquired both whilst holding xnf_txlock and
97 97 * without. It should always be acquired after xnf_txlock if both are
98 98 * held.
99 99 *
100 100 * Notes:
101 101 * - atomic_add_64() is used to manipulate counters where we require
102 102 * accuracy. For counters intended only for observation by humans,
103 103 * post increment/decrement are used instead.
104 104 */
105 105
106 106 #include <sys/types.h>
107 107 #include <sys/errno.h>
108 108 #include <sys/param.h>
109 109 #include <sys/sysmacros.h>
110 110 #include <sys/systm.h>
111 111 #include <sys/stream.h>
112 112 #include <sys/strsubr.h>
113 113 #include <sys/strsun.h>
114 114 #include <sys/conf.h>
115 115 #include <sys/ddi.h>
116 116 #include <sys/devops.h>
117 117 #include <sys/sunddi.h>
118 118 #include <sys/sunndi.h>
119 119 #include <sys/dlpi.h>
120 120 #include <sys/ethernet.h>
121 121 #include <sys/strsun.h>
122 122 #include <sys/pattr.h>
123 123 #include <inet/ip.h>
124 124 #include <inet/ip_impl.h>
125 125 #include <sys/gld.h>
126 126 #include <sys/modctl.h>
127 127 #include <sys/mac_provider.h>
128 128 #include <sys/mac_ether.h>
129 129 #include <sys/bootinfo.h>
130 130 #include <sys/mach_mmu.h>
131 131 #ifdef XPV_HVM_DRIVER
132 132 #include <sys/xpv_support.h>
133 133 #include <sys/hypervisor.h>
134 134 #else
135 135 #include <sys/hypervisor.h>
136 136 #include <sys/evtchn_impl.h>
137 137 #include <sys/balloon_impl.h>
138 138 #endif
139 139 #include <xen/public/io/netif.h>
140 140 #include <sys/gnttab.h>
141 141 #include <xen/sys/xendev.h>
142 142 #include <sys/sdt.h>
143 143 #include <sys/note.h>
144 144 #include <sys/debug.h>
145 145
146 146 #include <io/xnf.h>
147 147
148 148 #if defined(DEBUG) || defined(__lint)
149 149 #define XNF_DEBUG
150 150 #endif
151 151
152 152 #ifdef XNF_DEBUG
153 153 int xnf_debug = 0;
154 154 xnf_t *xnf_debug_instance = NULL;
155 155 #endif
156 156
157 157 /*
158 158 * On a 32 bit PAE system physical and machine addresses are larger
159 159 * than 32 bits. ddi_btop() on such systems take an unsigned long
160 160 * argument, and so addresses above 4G are truncated before ddi_btop()
161 161 * gets to see them. To avoid this, code the shift operation here.
162 162 */
163 163 #define xnf_btop(addr) ((addr) >> PAGESHIFT)
164 164
165 165 unsigned int xnf_max_tx_frags = 1;
166 166
167 167 /*
168 168 * Should we use the multicast control feature if the backend provides
169 169 * it?
170 170 */
171 171 boolean_t xnf_multicast_control = B_TRUE;
172 172
173 173 /*
174 174 * Received packets below this size are copied to a new streams buffer
175 175 * rather than being desballoc'ed.
176 176 *
177 177 * This value is chosen to accommodate traffic where there are a large
178 178 * number of small packets. For data showing a typical distribution,
179 179 * see:
180 180 *
181 181 * Sinha07a:
182 182 * Rishi Sinha, Christos Papadopoulos, and John
183 183 * Heidemann. Internet Packet Size Distributions: Some
184 184 * Observations. Technical Report ISI-TR-2007-643,
185 185 * USC/Information Sciences Institute, May, 2007. Orignally
186 186 * released October 2005 as web page
187 187 * http://netweb.usc.edu/~sinha/pkt-sizes/.
188 188 * <http://www.isi.edu/~johnh/PAPERS/Sinha07a.html>.
189 189 */
190 190 size_t xnf_rx_copy_limit = 64;
191 191
192 192 #define INVALID_GRANT_HANDLE ((grant_handle_t)-1)
193 193 #define INVALID_GRANT_REF ((grant_ref_t)-1)
194 194 #define INVALID_TX_ID ((uint16_t)-1)
195 195
196 196 #define TX_ID_TO_TXID(p, id) (&((p)->xnf_tx_pkt_id[(id)]))
197 197 #define TX_ID_VALID(i) (((i) != INVALID_TX_ID) && ((i) < NET_TX_RING_SIZE))
198 198
199 199 /* Required system entry points */
200 200 static int xnf_attach(dev_info_t *, ddi_attach_cmd_t);
201 201 static int xnf_detach(dev_info_t *, ddi_detach_cmd_t);
202 202
203 203 /* Required driver entry points for Nemo */
204 204 static int xnf_start(void *);
205 205 static void xnf_stop(void *);
206 206 static int xnf_set_mac_addr(void *, const uint8_t *);
207 207 static int xnf_set_multicast(void *, boolean_t, const uint8_t *);
208 208 static int xnf_set_promiscuous(void *, boolean_t);
209 209 static mblk_t *xnf_send(void *, mblk_t *);
210 210 static uint_t xnf_intr(caddr_t);
211 211 static int xnf_stat(void *, uint_t, uint64_t *);
212 212 static boolean_t xnf_getcapab(void *, mac_capab_t, void *);
213 213
214 214 /* Driver private functions */
215 215 static int xnf_alloc_dma_resources(xnf_t *);
216 216 static void xnf_release_dma_resources(xnf_t *);
217 217 static void xnf_release_mblks(xnf_t *);
218 218
219 219 static int xnf_buf_constructor(void *, void *, int);
220 220 static void xnf_buf_destructor(void *, void *);
221 221 static xnf_buf_t *xnf_buf_get(xnf_t *, int, boolean_t);
222 222 #pragma inline(xnf_buf_get)
223 223 static void xnf_buf_put(xnf_t *, xnf_buf_t *, boolean_t);
224 224 #pragma inline(xnf_buf_put)
225 225 static void xnf_buf_refresh(xnf_buf_t *);
226 226 #pragma inline(xnf_buf_refresh)
227 227 static void xnf_buf_recycle(xnf_buf_t *);
228 228
229 229 static int xnf_tx_buf_constructor(void *, void *, int);
230 230 static void xnf_tx_buf_destructor(void *, void *);
231 231
232 232 static grant_ref_t gref_get(xnf_t *);
233 233 #pragma inline(gref_get)
234 234 static void gref_put(xnf_t *, grant_ref_t);
235 235 #pragma inline(gref_put)
236 236
237 237 static xnf_txid_t *txid_get(xnf_t *);
238 238 #pragma inline(txid_get)
239 239 static void txid_put(xnf_t *, xnf_txid_t *);
240 240 #pragma inline(txid_put)
241 241
242 242 void xnf_send_driver_status(int, int);
243 243 static void xnf_rxbuf_hang(xnf_t *, xnf_buf_t *);
244 244 static int xnf_tx_clean_ring(xnf_t *);
245 245 static void oe_state_change(dev_info_t *, ddi_eventcookie_t,
246 246 void *, void *);
247 247 static boolean_t xnf_kstat_init(xnf_t *);
248 248 static void xnf_rx_collect(xnf_t *);
249 249
250 250 static mac_callbacks_t xnf_callbacks = {
251 251 MC_GETCAPAB,
252 252 xnf_stat,
253 253 xnf_start,
254 254 xnf_stop,
255 255 xnf_set_promiscuous,
256 256 xnf_set_multicast,
257 257 xnf_set_mac_addr,
258 258 xnf_send,
259 259 NULL,
260 260 NULL,
261 261 xnf_getcapab
262 262 };
263 263
264 264 /* DMA attributes for network ring buffer */
265 265 static ddi_dma_attr_t ringbuf_dma_attr = {
266 266 DMA_ATTR_V0, /* version of this structure */
267 267 0, /* lowest usable address */
268 268 0xffffffffffffffffULL, /* highest usable address */
269 269 0x7fffffff, /* maximum DMAable byte count */
270 270 MMU_PAGESIZE, /* alignment in bytes */
271 271 0x7ff, /* bitmap of burst sizes */
272 272 1, /* minimum transfer */
273 273 0xffffffffU, /* maximum transfer */
274 274 0xffffffffffffffffULL, /* maximum segment length */
275 275 1, /* maximum number of segments */
276 276 1, /* granularity */
277 277 0, /* flags (reserved) */
278 278 };
279 279
280 280 /* DMA attributes for transmit and receive data */
281 281 static ddi_dma_attr_t buf_dma_attr = {
282 282 DMA_ATTR_V0, /* version of this structure */
283 283 0, /* lowest usable address */
284 284 0xffffffffffffffffULL, /* highest usable address */
285 285 0x7fffffff, /* maximum DMAable byte count */
286 286 MMU_PAGESIZE, /* alignment in bytes */
287 287 0x7ff, /* bitmap of burst sizes */
288 288 1, /* minimum transfer */
289 289 0xffffffffU, /* maximum transfer */
290 290 0xffffffffffffffffULL, /* maximum segment length */
291 291 1, /* maximum number of segments */
292 292 1, /* granularity */
293 293 0, /* flags (reserved) */
294 294 };
295 295
296 296 /* DMA access attributes for registers and descriptors */
297 297 static ddi_device_acc_attr_t accattr = {
298 298 DDI_DEVICE_ATTR_V0,
299 299 DDI_STRUCTURE_LE_ACC, /* This is a little-endian device */
300 300 DDI_STRICTORDER_ACC
301 301 };
302 302
303 303 /* DMA access attributes for data: NOT to be byte swapped. */
304 304 static ddi_device_acc_attr_t data_accattr = {
305 305 DDI_DEVICE_ATTR_V0,
306 306 DDI_NEVERSWAP_ACC,
307 307 DDI_STRICTORDER_ACC
308 308 };
309 309
↓ open down ↓ |
309 lines elided |
↑ open up ↑ |
310 310 DDI_DEFINE_STREAM_OPS(xnf_dev_ops, nulldev, nulldev, xnf_attach, xnf_detach,
311 311 nodev, NULL, D_MP, NULL, ddi_quiesce_not_supported);
312 312
313 313 static struct modldrv xnf_modldrv = {
314 314 &mod_driverops,
315 315 "Virtual Ethernet driver",
316 316 &xnf_dev_ops
317 317 };
318 318
319 319 static struct modlinkage modlinkage = {
320 - MODREV_1, &xnf_modldrv, NULL
320 + MODREV_1, { &xnf_modldrv, NULL }
321 321 };
322 322
323 323 int
324 324 _init(void)
325 325 {
326 326 int r;
327 327
328 328 mac_init_ops(&xnf_dev_ops, "xnf");
329 329 r = mod_install(&modlinkage);
330 330 if (r != DDI_SUCCESS)
331 331 mac_fini_ops(&xnf_dev_ops);
332 332
333 333 return (r);
334 334 }
335 335
336 336 int
337 337 _fini(void)
338 338 {
339 339 return (EBUSY); /* XXPV should be removable */
340 340 }
341 341
342 342 int
343 343 _info(struct modinfo *modinfop)
344 344 {
345 345 return (mod_info(&modlinkage, modinfop));
346 346 }
347 347
348 348 /*
349 349 * Acquire a grant reference.
350 350 */
351 351 static grant_ref_t
352 352 gref_get(xnf_t *xnfp)
353 353 {
354 354 grant_ref_t gref;
355 355
356 356 mutex_enter(&xnfp->xnf_gref_lock);
357 357
358 358 do {
359 359 gref = gnttab_claim_grant_reference(&xnfp->xnf_gref_head);
360 360
361 361 } while ((gref == INVALID_GRANT_REF) &&
362 362 (gnttab_alloc_grant_references(16, &xnfp->xnf_gref_head) == 0));
363 363
364 364 mutex_exit(&xnfp->xnf_gref_lock);
365 365
366 366 if (gref == INVALID_GRANT_REF) {
367 367 xnfp->xnf_stat_gref_failure++;
368 368 } else {
369 369 atomic_inc_64(&xnfp->xnf_stat_gref_outstanding);
370 370 if (xnfp->xnf_stat_gref_outstanding > xnfp->xnf_stat_gref_peak)
371 371 xnfp->xnf_stat_gref_peak =
372 372 xnfp->xnf_stat_gref_outstanding;
373 373 }
374 374
375 375 return (gref);
376 376 }
377 377
378 378 /*
379 379 * Release a grant reference.
380 380 */
381 381 static void
382 382 gref_put(xnf_t *xnfp, grant_ref_t gref)
383 383 {
384 384 ASSERT(gref != INVALID_GRANT_REF);
385 385
386 386 mutex_enter(&xnfp->xnf_gref_lock);
387 387 gnttab_release_grant_reference(&xnfp->xnf_gref_head, gref);
388 388 mutex_exit(&xnfp->xnf_gref_lock);
389 389
390 390 atomic_dec_64(&xnfp->xnf_stat_gref_outstanding);
391 391 }
392 392
393 393 /*
394 394 * Acquire a transmit id.
395 395 */
396 396 static xnf_txid_t *
397 397 txid_get(xnf_t *xnfp)
398 398 {
399 399 xnf_txid_t *tidp;
400 400
401 401 ASSERT(MUTEX_HELD(&xnfp->xnf_txlock));
402 402
403 403 if (xnfp->xnf_tx_pkt_id_head == INVALID_TX_ID)
404 404 return (NULL);
405 405
406 406 ASSERT(TX_ID_VALID(xnfp->xnf_tx_pkt_id_head));
407 407
408 408 tidp = TX_ID_TO_TXID(xnfp, xnfp->xnf_tx_pkt_id_head);
409 409 xnfp->xnf_tx_pkt_id_head = tidp->next;
410 410 tidp->next = INVALID_TX_ID;
411 411
412 412 ASSERT(tidp->txbuf == NULL);
413 413
414 414 return (tidp);
415 415 }
416 416
417 417 /*
418 418 * Release a transmit id.
419 419 */
420 420 static void
421 421 txid_put(xnf_t *xnfp, xnf_txid_t *tidp)
422 422 {
423 423 ASSERT(MUTEX_HELD(&xnfp->xnf_txlock));
424 424 ASSERT(TX_ID_VALID(tidp->id));
425 425 ASSERT(tidp->next == INVALID_TX_ID);
426 426
427 427 tidp->txbuf = NULL;
428 428 tidp->next = xnfp->xnf_tx_pkt_id_head;
429 429 xnfp->xnf_tx_pkt_id_head = tidp->id;
430 430 }
431 431
432 432 /*
433 433 * Get `wanted' slots in the transmit ring, waiting for at least that
434 434 * number if `wait' is B_TRUE. Force the ring to be cleaned by setting
435 435 * `wanted' to zero.
436 436 *
437 437 * Return the number of slots available.
438 438 */
439 439 static int
440 440 tx_slots_get(xnf_t *xnfp, int wanted, boolean_t wait)
441 441 {
442 442 int slotsfree;
443 443 boolean_t forced_clean = (wanted == 0);
444 444
445 445 ASSERT(MUTEX_HELD(&xnfp->xnf_txlock));
446 446
447 447 /* LINTED: constant in conditional context */
448 448 while (B_TRUE) {
449 449 slotsfree = RING_FREE_REQUESTS(&xnfp->xnf_tx_ring);
450 450
451 451 if ((slotsfree < wanted) || forced_clean)
452 452 slotsfree = xnf_tx_clean_ring(xnfp);
453 453
454 454 /*
455 455 * If there are more than we need free, tell other
456 456 * people to come looking again. We hold txlock, so we
457 457 * are able to take our slots before anyone else runs.
458 458 */
459 459 if (slotsfree > wanted)
460 460 cv_broadcast(&xnfp->xnf_cv_tx_slots);
461 461
462 462 if (slotsfree >= wanted)
463 463 break;
464 464
465 465 if (!wait)
466 466 break;
467 467
468 468 cv_wait(&xnfp->xnf_cv_tx_slots, &xnfp->xnf_txlock);
469 469 }
470 470
471 471 ASSERT(slotsfree <= RING_SIZE(&(xnfp->xnf_tx_ring)));
472 472
473 473 return (slotsfree);
474 474 }
475 475
476 476 static int
477 477 xnf_setup_rings(xnf_t *xnfp)
478 478 {
479 479 domid_t oeid;
480 480 struct xenbus_device *xsd;
481 481 RING_IDX i;
482 482 int err;
483 483 xnf_txid_t *tidp;
484 484 xnf_buf_t **bdescp;
485 485
486 486 oeid = xvdi_get_oeid(xnfp->xnf_devinfo);
487 487 xsd = xvdi_get_xsd(xnfp->xnf_devinfo);
488 488
489 489 if (xnfp->xnf_tx_ring_ref != INVALID_GRANT_REF)
490 490 gnttab_end_foreign_access(xnfp->xnf_tx_ring_ref, 0, 0);
491 491
492 492 err = gnttab_grant_foreign_access(oeid,
493 493 xnf_btop(pa_to_ma(xnfp->xnf_tx_ring_phys_addr)), 0);
494 494 if (err <= 0) {
495 495 err = -err;
496 496 xenbus_dev_error(xsd, err, "granting access to tx ring page");
497 497 goto out;
498 498 }
499 499 xnfp->xnf_tx_ring_ref = (grant_ref_t)err;
500 500
501 501 if (xnfp->xnf_rx_ring_ref != INVALID_GRANT_REF)
502 502 gnttab_end_foreign_access(xnfp->xnf_rx_ring_ref, 0, 0);
503 503
504 504 err = gnttab_grant_foreign_access(oeid,
505 505 xnf_btop(pa_to_ma(xnfp->xnf_rx_ring_phys_addr)), 0);
506 506 if (err <= 0) {
507 507 err = -err;
508 508 xenbus_dev_error(xsd, err, "granting access to rx ring page");
509 509 goto out;
510 510 }
511 511 xnfp->xnf_rx_ring_ref = (grant_ref_t)err;
512 512
513 513 mutex_enter(&xnfp->xnf_txlock);
514 514
515 515 /*
516 516 * Setup/cleanup the TX ring. Note that this can lose packets
517 517 * after a resume, but we expect to stagger on.
518 518 */
519 519 xnfp->xnf_tx_pkt_id_head = INVALID_TX_ID; /* I.e. emtpy list. */
520 520 for (i = 0, tidp = &xnfp->xnf_tx_pkt_id[0];
521 521 i < NET_TX_RING_SIZE;
522 522 i++, tidp++) {
523 523 xnf_txbuf_t *txp;
524 524
525 525 tidp->id = i;
526 526
527 527 txp = tidp->txbuf;
528 528 if (txp == NULL) {
529 529 tidp->next = INVALID_TX_ID; /* Appease txid_put(). */
530 530 txid_put(xnfp, tidp);
531 531 continue;
532 532 }
533 533
534 534 ASSERT(txp->tx_txreq.gref != INVALID_GRANT_REF);
535 535 ASSERT(txp->tx_mp != NULL);
536 536
537 537 switch (txp->tx_type) {
538 538 case TX_DATA:
539 539 VERIFY(gnttab_query_foreign_access(txp->tx_txreq.gref)
540 540 == 0);
541 541
542 542 if (txp->tx_bdesc == NULL) {
543 543 (void) gnttab_end_foreign_access_ref(
544 544 txp->tx_txreq.gref, 1);
545 545 gref_put(xnfp, txp->tx_txreq.gref);
546 546 (void) ddi_dma_unbind_handle(
547 547 txp->tx_dma_handle);
548 548 } else {
549 549 xnf_buf_put(xnfp, txp->tx_bdesc, B_TRUE);
550 550 }
551 551
552 552 freemsg(txp->tx_mp);
553 553 txid_put(xnfp, tidp);
554 554 kmem_cache_free(xnfp->xnf_tx_buf_cache, txp);
555 555
556 556 break;
557 557
558 558 case TX_MCAST_REQ:
559 559 txp->tx_type = TX_MCAST_RSP;
560 560 txp->tx_status = NETIF_RSP_DROPPED;
561 561 cv_broadcast(&xnfp->xnf_cv_multicast);
562 562
563 563 /*
564 564 * The request consumed two slots in the ring,
565 565 * yet only a single xnf_txid_t is used. Step
566 566 * over the empty slot.
567 567 */
568 568 i++;
569 569 ASSERT(i < NET_TX_RING_SIZE);
570 570
571 571 break;
572 572
573 573 case TX_MCAST_RSP:
574 574 break;
575 575 }
576 576 }
577 577
578 578 /* LINTED: constant in conditional context */
579 579 SHARED_RING_INIT(xnfp->xnf_tx_ring.sring);
580 580 /* LINTED: constant in conditional context */
581 581 FRONT_RING_INIT(&xnfp->xnf_tx_ring,
582 582 xnfp->xnf_tx_ring.sring, PAGESIZE);
583 583
584 584 mutex_exit(&xnfp->xnf_txlock);
585 585
586 586 mutex_enter(&xnfp->xnf_rxlock);
587 587
588 588 /*
589 589 * Clean out any buffers currently posted to the receive ring
590 590 * before we reset it.
591 591 */
592 592 for (i = 0, bdescp = &xnfp->xnf_rx_pkt_info[0];
593 593 i < NET_RX_RING_SIZE;
594 594 i++, bdescp++) {
595 595 if (*bdescp != NULL) {
596 596 xnf_buf_put(xnfp, *bdescp, B_FALSE);
597 597 *bdescp = NULL;
598 598 }
599 599 }
600 600
601 601 /* LINTED: constant in conditional context */
602 602 SHARED_RING_INIT(xnfp->xnf_rx_ring.sring);
603 603 /* LINTED: constant in conditional context */
604 604 FRONT_RING_INIT(&xnfp->xnf_rx_ring,
605 605 xnfp->xnf_rx_ring.sring, PAGESIZE);
606 606
607 607 /*
608 608 * Fill the ring with buffers.
609 609 */
610 610 for (i = 0; i < NET_RX_RING_SIZE; i++) {
611 611 xnf_buf_t *bdesc;
612 612
613 613 bdesc = xnf_buf_get(xnfp, KM_SLEEP, B_FALSE);
614 614 VERIFY(bdesc != NULL);
615 615 xnf_rxbuf_hang(xnfp, bdesc);
616 616 }
617 617
618 618 /* LINTED: constant in conditional context */
619 619 RING_PUSH_REQUESTS(&xnfp->xnf_rx_ring);
620 620
621 621 mutex_exit(&xnfp->xnf_rxlock);
622 622
623 623 return (0);
624 624
625 625 out:
626 626 if (xnfp->xnf_tx_ring_ref != INVALID_GRANT_REF)
627 627 gnttab_end_foreign_access(xnfp->xnf_tx_ring_ref, 0, 0);
628 628 xnfp->xnf_tx_ring_ref = INVALID_GRANT_REF;
629 629
630 630 if (xnfp->xnf_rx_ring_ref != INVALID_GRANT_REF)
631 631 gnttab_end_foreign_access(xnfp->xnf_rx_ring_ref, 0, 0);
632 632 xnfp->xnf_rx_ring_ref = INVALID_GRANT_REF;
633 633
634 634 return (err);
635 635 }
636 636
637 637 /*
638 638 * Connect driver to back end, called to set up communication with
639 639 * back end driver both initially and on resume after restore/migrate.
640 640 */
641 641 void
642 642 xnf_be_connect(xnf_t *xnfp)
643 643 {
644 644 const char *message;
645 645 xenbus_transaction_t xbt;
646 646 struct xenbus_device *xsd;
647 647 char *xsname;
648 648 int err;
649 649
650 650 ASSERT(!xnfp->xnf_connected);
651 651
652 652 xsd = xvdi_get_xsd(xnfp->xnf_devinfo);
653 653 xsname = xvdi_get_xsname(xnfp->xnf_devinfo);
654 654
655 655 err = xnf_setup_rings(xnfp);
656 656 if (err != 0) {
657 657 cmn_err(CE_WARN, "failed to set up tx/rx rings");
658 658 xenbus_dev_error(xsd, err, "setting up ring");
659 659 return;
660 660 }
661 661
662 662 again:
663 663 err = xenbus_transaction_start(&xbt);
664 664 if (err != 0) {
665 665 xenbus_dev_error(xsd, EIO, "starting transaction");
666 666 return;
667 667 }
668 668
669 669 err = xenbus_printf(xbt, xsname, "tx-ring-ref", "%u",
670 670 xnfp->xnf_tx_ring_ref);
671 671 if (err != 0) {
672 672 message = "writing tx ring-ref";
673 673 goto abort_transaction;
674 674 }
675 675
676 676 err = xenbus_printf(xbt, xsname, "rx-ring-ref", "%u",
677 677 xnfp->xnf_rx_ring_ref);
678 678 if (err != 0) {
679 679 message = "writing rx ring-ref";
680 680 goto abort_transaction;
681 681 }
682 682
683 683 err = xenbus_printf(xbt, xsname, "event-channel", "%u",
684 684 xnfp->xnf_evtchn);
685 685 if (err != 0) {
686 686 message = "writing event-channel";
687 687 goto abort_transaction;
688 688 }
689 689
690 690 err = xenbus_printf(xbt, xsname, "feature-rx-notify", "%d", 1);
691 691 if (err != 0) {
692 692 message = "writing feature-rx-notify";
693 693 goto abort_transaction;
694 694 }
695 695
696 696 err = xenbus_printf(xbt, xsname, "request-rx-copy", "%d", 1);
697 697 if (err != 0) {
698 698 message = "writing request-rx-copy";
699 699 goto abort_transaction;
700 700 }
701 701
702 702 if (xnfp->xnf_be_mcast_control) {
703 703 err = xenbus_printf(xbt, xsname, "request-multicast-control",
704 704 "%d", 1);
705 705 if (err != 0) {
706 706 message = "writing request-multicast-control";
707 707 goto abort_transaction;
708 708 }
709 709 }
710 710
711 711 err = xvdi_switch_state(xnfp->xnf_devinfo, xbt, XenbusStateConnected);
712 712 if (err != 0) {
713 713 message = "switching state to XenbusStateConnected";
714 714 goto abort_transaction;
715 715 }
716 716
717 717 err = xenbus_transaction_end(xbt, 0);
718 718 if (err != 0) {
719 719 if (err == EAGAIN)
720 720 goto again;
721 721 xenbus_dev_error(xsd, err, "completing transaction");
722 722 }
723 723
724 724 return;
725 725
726 726 abort_transaction:
727 727 (void) xenbus_transaction_end(xbt, 1);
728 728 xenbus_dev_error(xsd, err, "%s", message);
729 729 }
730 730
731 731 /*
732 732 * Read configuration information from xenstore.
733 733 */
734 734 void
735 735 xnf_read_config(xnf_t *xnfp)
736 736 {
737 737 int err, be_cap;
738 738 char mac[ETHERADDRL * 3];
739 739 char *oename = xvdi_get_oename(xnfp->xnf_devinfo);
740 740
741 741 err = xenbus_scanf(XBT_NULL, oename, "mac",
742 742 "%s", (char *)&mac[0]);
743 743 if (err != 0) {
744 744 /*
745 745 * bad: we're supposed to be set up with a proper mac
746 746 * addr. at this point
747 747 */
748 748 cmn_err(CE_WARN, "%s%d: no mac address",
749 749 ddi_driver_name(xnfp->xnf_devinfo),
750 750 ddi_get_instance(xnfp->xnf_devinfo));
751 751 return;
752 752 }
753 753 if (ether_aton(mac, xnfp->xnf_mac_addr) != ETHERADDRL) {
754 754 err = ENOENT;
755 755 xenbus_dev_error(xvdi_get_xsd(xnfp->xnf_devinfo), ENOENT,
756 756 "parsing %s/mac", xvdi_get_xsname(xnfp->xnf_devinfo));
757 757 return;
758 758 }
759 759
760 760 err = xenbus_scanf(XBT_NULL, oename,
761 761 "feature-rx-copy", "%d", &be_cap);
762 762 /*
763 763 * If we fail to read the store we assume that the key is
764 764 * absent, implying an older domain at the far end. Older
765 765 * domains cannot do HV copy.
766 766 */
767 767 if (err != 0)
768 768 be_cap = 0;
769 769 xnfp->xnf_be_rx_copy = (be_cap != 0);
770 770
771 771 err = xenbus_scanf(XBT_NULL, oename,
772 772 "feature-multicast-control", "%d", &be_cap);
773 773 /*
774 774 * If we fail to read the store we assume that the key is
775 775 * absent, implying an older domain at the far end. Older
776 776 * domains do not support multicast control.
777 777 */
778 778 if (err != 0)
779 779 be_cap = 0;
780 780 xnfp->xnf_be_mcast_control = (be_cap != 0) && xnf_multicast_control;
781 781 }
782 782
783 783 /*
784 784 * attach(9E) -- Attach a device to the system
785 785 */
786 786 static int
787 787 xnf_attach(dev_info_t *devinfo, ddi_attach_cmd_t cmd)
788 788 {
789 789 mac_register_t *macp;
790 790 xnf_t *xnfp;
791 791 int err;
792 792 char cachename[32];
793 793
794 794 #ifdef XNF_DEBUG
795 795 if (xnf_debug & XNF_DEBUG_DDI)
796 796 printf("xnf%d: attach(0x%p)\n", ddi_get_instance(devinfo),
797 797 (void *)devinfo);
798 798 #endif
799 799
800 800 switch (cmd) {
801 801 case DDI_RESUME:
802 802 xnfp = ddi_get_driver_private(devinfo);
803 803 xnfp->xnf_gen++;
804 804
805 805 (void) xvdi_resume(devinfo);
806 806 (void) xvdi_alloc_evtchn(devinfo);
807 807 xnfp->xnf_evtchn = xvdi_get_evtchn(devinfo);
808 808 #ifdef XPV_HVM_DRIVER
809 809 ec_bind_evtchn_to_handler(xnfp->xnf_evtchn, IPL_VIF, xnf_intr,
810 810 xnfp);
811 811 #else
812 812 (void) ddi_add_intr(devinfo, 0, NULL, NULL, xnf_intr,
813 813 (caddr_t)xnfp);
814 814 #endif
815 815 return (DDI_SUCCESS);
816 816
817 817 case DDI_ATTACH:
818 818 break;
819 819
820 820 default:
821 821 return (DDI_FAILURE);
822 822 }
823 823
824 824 /*
825 825 * Allocate gld_mac_info_t and xnf_instance structures
826 826 */
827 827 macp = mac_alloc(MAC_VERSION);
828 828 if (macp == NULL)
829 829 return (DDI_FAILURE);
830 830 xnfp = kmem_zalloc(sizeof (*xnfp), KM_SLEEP);
831 831
832 832 macp->m_dip = devinfo;
833 833 macp->m_driver = xnfp;
834 834 xnfp->xnf_devinfo = devinfo;
835 835
836 836 macp->m_type_ident = MAC_PLUGIN_IDENT_ETHER;
837 837 macp->m_src_addr = xnfp->xnf_mac_addr;
838 838 macp->m_callbacks = &xnf_callbacks;
839 839 macp->m_min_sdu = 0;
840 840 macp->m_max_sdu = XNF_MAXPKT;
841 841
842 842 xnfp->xnf_running = B_FALSE;
843 843 xnfp->xnf_connected = B_FALSE;
844 844 xnfp->xnf_be_rx_copy = B_FALSE;
845 845 xnfp->xnf_be_mcast_control = B_FALSE;
846 846 xnfp->xnf_need_sched = B_FALSE;
847 847
848 848 xnfp->xnf_rx_head = NULL;
849 849 xnfp->xnf_rx_tail = NULL;
850 850 xnfp->xnf_rx_new_buffers_posted = B_FALSE;
851 851
852 852 #ifdef XPV_HVM_DRIVER
853 853 /*
854 854 * Report our version to dom0.
855 855 */
856 856 if (xenbus_printf(XBT_NULL, "guest/xnf", "version", "%d",
857 857 HVMPV_XNF_VERS))
858 858 cmn_err(CE_WARN, "xnf: couldn't write version\n");
859 859 #endif
860 860
861 861 /*
862 862 * Get the iblock cookie with which to initialize the mutexes.
863 863 */
864 864 if (ddi_get_iblock_cookie(devinfo, 0, &xnfp->xnf_icookie)
865 865 != DDI_SUCCESS)
866 866 goto failure;
867 867
868 868 mutex_init(&xnfp->xnf_txlock,
869 869 NULL, MUTEX_DRIVER, xnfp->xnf_icookie);
870 870 mutex_init(&xnfp->xnf_rxlock,
871 871 NULL, MUTEX_DRIVER, xnfp->xnf_icookie);
872 872 mutex_init(&xnfp->xnf_schedlock,
873 873 NULL, MUTEX_DRIVER, xnfp->xnf_icookie);
874 874 mutex_init(&xnfp->xnf_gref_lock,
875 875 NULL, MUTEX_DRIVER, xnfp->xnf_icookie);
876 876
877 877 cv_init(&xnfp->xnf_cv_state, NULL, CV_DEFAULT, NULL);
878 878 cv_init(&xnfp->xnf_cv_multicast, NULL, CV_DEFAULT, NULL);
879 879 cv_init(&xnfp->xnf_cv_tx_slots, NULL, CV_DEFAULT, NULL);
880 880
881 881 (void) sprintf(cachename, "xnf_buf_cache_%d",
882 882 ddi_get_instance(devinfo));
883 883 xnfp->xnf_buf_cache = kmem_cache_create(cachename,
884 884 sizeof (xnf_buf_t), 0,
885 885 xnf_buf_constructor, xnf_buf_destructor,
886 886 NULL, xnfp, NULL, 0);
887 887 if (xnfp->xnf_buf_cache == NULL)
888 888 goto failure_0;
889 889
890 890 (void) sprintf(cachename, "xnf_tx_buf_cache_%d",
891 891 ddi_get_instance(devinfo));
892 892 xnfp->xnf_tx_buf_cache = kmem_cache_create(cachename,
893 893 sizeof (xnf_txbuf_t), 0,
894 894 xnf_tx_buf_constructor, xnf_tx_buf_destructor,
895 895 NULL, xnfp, NULL, 0);
896 896 if (xnfp->xnf_tx_buf_cache == NULL)
897 897 goto failure_1;
898 898
899 899 xnfp->xnf_gref_head = INVALID_GRANT_REF;
900 900
901 901 if (xnf_alloc_dma_resources(xnfp) == DDI_FAILURE) {
902 902 cmn_err(CE_WARN, "xnf%d: failed to allocate and initialize "
903 903 "driver data structures",
904 904 ddi_get_instance(xnfp->xnf_devinfo));
905 905 goto failure_2;
906 906 }
907 907
908 908 xnfp->xnf_rx_ring.sring->rsp_event =
909 909 xnfp->xnf_tx_ring.sring->rsp_event = 1;
910 910
911 911 xnfp->xnf_tx_ring_ref = INVALID_GRANT_REF;
912 912 xnfp->xnf_rx_ring_ref = INVALID_GRANT_REF;
913 913
914 914 /* set driver private pointer now */
915 915 ddi_set_driver_private(devinfo, xnfp);
916 916
917 917 if (!xnf_kstat_init(xnfp))
918 918 goto failure_3;
919 919
920 920 /*
921 921 * Allocate an event channel, add the interrupt handler and
922 922 * bind it to the event channel.
923 923 */
924 924 (void) xvdi_alloc_evtchn(devinfo);
925 925 xnfp->xnf_evtchn = xvdi_get_evtchn(devinfo);
926 926 #ifdef XPV_HVM_DRIVER
927 927 ec_bind_evtchn_to_handler(xnfp->xnf_evtchn, IPL_VIF, xnf_intr, xnfp);
928 928 #else
929 929 (void) ddi_add_intr(devinfo, 0, NULL, NULL, xnf_intr, (caddr_t)xnfp);
930 930 #endif
931 931
932 932 err = mac_register(macp, &xnfp->xnf_mh);
933 933 mac_free(macp);
934 934 macp = NULL;
935 935 if (err != 0)
936 936 goto failure_4;
937 937
938 938 if (xvdi_add_event_handler(devinfo, XS_OE_STATE, oe_state_change, NULL)
939 939 != DDI_SUCCESS)
940 940 goto failure_5;
941 941
942 942 #ifdef XPV_HVM_DRIVER
943 943 /*
944 944 * In the HVM case, this driver essentially replaces a driver for
945 945 * a 'real' PCI NIC. Without the "model" property set to
946 946 * "Ethernet controller", like the PCI code does, netbooting does
947 947 * not work correctly, as strplumb_get_netdev_path() will not find
948 948 * this interface.
949 949 */
950 950 (void) ndi_prop_update_string(DDI_DEV_T_NONE, devinfo, "model",
951 951 "Ethernet controller");
952 952 #endif
953 953
954 954 #ifdef XNF_DEBUG
955 955 if (xnf_debug_instance == NULL)
956 956 xnf_debug_instance = xnfp;
957 957 #endif
958 958
959 959 return (DDI_SUCCESS);
960 960
961 961 failure_5:
962 962 (void) mac_unregister(xnfp->xnf_mh);
963 963
964 964 failure_4:
965 965 #ifdef XPV_HVM_DRIVER
966 966 ec_unbind_evtchn(xnfp->xnf_evtchn);
967 967 xvdi_free_evtchn(devinfo);
968 968 #else
969 969 ddi_remove_intr(devinfo, 0, xnfp->xnf_icookie);
970 970 #endif
971 971 xnfp->xnf_evtchn = INVALID_EVTCHN;
972 972 kstat_delete(xnfp->xnf_kstat_aux);
973 973
974 974 failure_3:
975 975 xnf_release_dma_resources(xnfp);
976 976
977 977 failure_2:
978 978 kmem_cache_destroy(xnfp->xnf_tx_buf_cache);
979 979
980 980 failure_1:
981 981 kmem_cache_destroy(xnfp->xnf_buf_cache);
982 982
983 983 failure_0:
984 984 cv_destroy(&xnfp->xnf_cv_tx_slots);
985 985 cv_destroy(&xnfp->xnf_cv_multicast);
986 986 cv_destroy(&xnfp->xnf_cv_state);
987 987
988 988 mutex_destroy(&xnfp->xnf_gref_lock);
989 989 mutex_destroy(&xnfp->xnf_schedlock);
990 990 mutex_destroy(&xnfp->xnf_rxlock);
991 991 mutex_destroy(&xnfp->xnf_txlock);
992 992
993 993 failure:
994 994 kmem_free(xnfp, sizeof (*xnfp));
995 995 if (macp != NULL)
996 996 mac_free(macp);
997 997
998 998 return (DDI_FAILURE);
999 999 }
1000 1000
1001 1001 /* detach(9E) -- Detach a device from the system */
1002 1002 static int
1003 1003 xnf_detach(dev_info_t *devinfo, ddi_detach_cmd_t cmd)
1004 1004 {
1005 1005 xnf_t *xnfp; /* Our private device info */
1006 1006
1007 1007 #ifdef XNF_DEBUG
1008 1008 if (xnf_debug & XNF_DEBUG_DDI)
1009 1009 printf("xnf_detach(0x%p)\n", (void *)devinfo);
1010 1010 #endif
1011 1011
1012 1012 xnfp = ddi_get_driver_private(devinfo);
1013 1013
1014 1014 switch (cmd) {
1015 1015 case DDI_SUSPEND:
1016 1016 #ifdef XPV_HVM_DRIVER
1017 1017 ec_unbind_evtchn(xnfp->xnf_evtchn);
1018 1018 xvdi_free_evtchn(devinfo);
1019 1019 #else
1020 1020 ddi_remove_intr(devinfo, 0, xnfp->xnf_icookie);
1021 1021 #endif
1022 1022
1023 1023 xvdi_suspend(devinfo);
1024 1024
1025 1025 mutex_enter(&xnfp->xnf_rxlock);
1026 1026 mutex_enter(&xnfp->xnf_txlock);
1027 1027
1028 1028 xnfp->xnf_evtchn = INVALID_EVTCHN;
1029 1029 xnfp->xnf_connected = B_FALSE;
1030 1030 mutex_exit(&xnfp->xnf_txlock);
1031 1031 mutex_exit(&xnfp->xnf_rxlock);
1032 1032
1033 1033 /* claim link to be down after disconnect */
1034 1034 mac_link_update(xnfp->xnf_mh, LINK_STATE_DOWN);
1035 1035 return (DDI_SUCCESS);
1036 1036
1037 1037 case DDI_DETACH:
1038 1038 break;
1039 1039
1040 1040 default:
1041 1041 return (DDI_FAILURE);
1042 1042 }
1043 1043
1044 1044 if (xnfp->xnf_connected)
1045 1045 return (DDI_FAILURE);
1046 1046
1047 1047 /*
1048 1048 * Cannot detach if we have xnf_buf_t outstanding.
1049 1049 */
1050 1050 if (xnfp->xnf_stat_buf_allocated > 0)
1051 1051 return (DDI_FAILURE);
1052 1052
1053 1053 if (mac_unregister(xnfp->xnf_mh) != 0)
1054 1054 return (DDI_FAILURE);
1055 1055
1056 1056 kstat_delete(xnfp->xnf_kstat_aux);
1057 1057
1058 1058 /* Stop the receiver */
1059 1059 xnf_stop(xnfp);
1060 1060
1061 1061 xvdi_remove_event_handler(devinfo, XS_OE_STATE);
1062 1062
1063 1063 /* Remove the interrupt */
1064 1064 #ifdef XPV_HVM_DRIVER
1065 1065 ec_unbind_evtchn(xnfp->xnf_evtchn);
1066 1066 xvdi_free_evtchn(devinfo);
1067 1067 #else
1068 1068 ddi_remove_intr(devinfo, 0, xnfp->xnf_icookie);
1069 1069 #endif
1070 1070
1071 1071 /* Release any pending xmit mblks */
1072 1072 xnf_release_mblks(xnfp);
1073 1073
1074 1074 /* Release all DMA resources */
1075 1075 xnf_release_dma_resources(xnfp);
1076 1076
1077 1077 cv_destroy(&xnfp->xnf_cv_tx_slots);
1078 1078 cv_destroy(&xnfp->xnf_cv_multicast);
1079 1079 cv_destroy(&xnfp->xnf_cv_state);
1080 1080
1081 1081 kmem_cache_destroy(xnfp->xnf_tx_buf_cache);
1082 1082 kmem_cache_destroy(xnfp->xnf_buf_cache);
1083 1083
1084 1084 mutex_destroy(&xnfp->xnf_gref_lock);
1085 1085 mutex_destroy(&xnfp->xnf_schedlock);
1086 1086 mutex_destroy(&xnfp->xnf_rxlock);
1087 1087 mutex_destroy(&xnfp->xnf_txlock);
1088 1088
1089 1089 kmem_free(xnfp, sizeof (*xnfp));
1090 1090
1091 1091 return (DDI_SUCCESS);
1092 1092 }
1093 1093
1094 1094 /*
1095 1095 * xnf_set_mac_addr() -- set the physical network address on the board.
1096 1096 */
1097 1097 static int
1098 1098 xnf_set_mac_addr(void *arg, const uint8_t *macaddr)
1099 1099 {
1100 1100 _NOTE(ARGUNUSED(arg, macaddr));
1101 1101
1102 1102 /*
1103 1103 * We can't set our macaddr.
1104 1104 */
1105 1105 return (ENOTSUP);
1106 1106 }
1107 1107
1108 1108 /*
1109 1109 * xnf_set_multicast() -- set (enable) or disable a multicast address.
1110 1110 *
1111 1111 * Program the hardware to enable/disable the multicast address
1112 1112 * in "mca". Enable if "add" is true, disable if false.
1113 1113 */
1114 1114 static int
1115 1115 xnf_set_multicast(void *arg, boolean_t add, const uint8_t *mca)
1116 1116 {
1117 1117 xnf_t *xnfp = arg;
1118 1118 xnf_txbuf_t *txp;
1119 1119 int n_slots;
1120 1120 RING_IDX slot;
1121 1121 xnf_txid_t *tidp;
1122 1122 netif_tx_request_t *txrp;
1123 1123 struct netif_extra_info *erp;
1124 1124 boolean_t notify, result;
1125 1125
1126 1126 /*
1127 1127 * If the backend does not support multicast control then we
1128 1128 * must assume that the right packets will just arrive.
1129 1129 */
1130 1130 if (!xnfp->xnf_be_mcast_control)
1131 1131 return (0);
1132 1132
1133 1133 txp = kmem_cache_alloc(xnfp->xnf_tx_buf_cache, KM_SLEEP);
1134 1134
1135 1135 mutex_enter(&xnfp->xnf_txlock);
1136 1136
1137 1137 /*
1138 1138 * If we're not yet connected then claim success. This is
1139 1139 * acceptable because we refresh the entire set of multicast
1140 1140 * addresses when we get connected.
1141 1141 *
1142 1142 * We can't wait around here because the MAC layer expects
1143 1143 * this to be a non-blocking operation - waiting ends up
1144 1144 * causing a deadlock during resume.
1145 1145 */
1146 1146 if (!xnfp->xnf_connected) {
1147 1147 mutex_exit(&xnfp->xnf_txlock);
1148 1148 return (0);
1149 1149 }
1150 1150
1151 1151 /*
1152 1152 * 1. Acquire two slots in the ring.
1153 1153 * 2. Fill in the slots.
1154 1154 * 3. Request notification when the operation is done.
1155 1155 * 4. Kick the peer.
1156 1156 * 5. Wait for the response via xnf_tx_clean_ring().
1157 1157 */
1158 1158
1159 1159 n_slots = tx_slots_get(xnfp, 2, B_TRUE);
1160 1160 ASSERT(n_slots >= 2);
1161 1161
1162 1162 slot = xnfp->xnf_tx_ring.req_prod_pvt;
1163 1163 tidp = txid_get(xnfp);
1164 1164 VERIFY(tidp != NULL);
1165 1165
1166 1166 txp->tx_type = TX_MCAST_REQ;
1167 1167 txp->tx_slot = slot;
1168 1168
1169 1169 txrp = RING_GET_REQUEST(&xnfp->xnf_tx_ring, slot);
1170 1170 erp = (struct netif_extra_info *)
1171 1171 RING_GET_REQUEST(&xnfp->xnf_tx_ring, slot + 1);
1172 1172
1173 1173 txrp->gref = 0;
1174 1174 txrp->size = 0;
1175 1175 txrp->offset = 0;
1176 1176 /* Set tx_txreq.id to appease xnf_tx_clean_ring(). */
1177 1177 txrp->id = txp->tx_txreq.id = tidp->id;
1178 1178 txrp->flags = NETTXF_extra_info;
1179 1179
1180 1180 erp->type = add ? XEN_NETIF_EXTRA_TYPE_MCAST_ADD :
1181 1181 XEN_NETIF_EXTRA_TYPE_MCAST_DEL;
1182 1182 bcopy((void *)mca, &erp->u.mcast.addr, ETHERADDRL);
1183 1183
1184 1184 tidp->txbuf = txp;
1185 1185
1186 1186 xnfp->xnf_tx_ring.req_prod_pvt = slot + 2;
1187 1187
1188 1188 mutex_enter(&xnfp->xnf_schedlock);
1189 1189 xnfp->xnf_pending_multicast++;
1190 1190 mutex_exit(&xnfp->xnf_schedlock);
1191 1191
1192 1192 /* LINTED: constant in conditional context */
1193 1193 RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&xnfp->xnf_tx_ring,
1194 1194 notify);
1195 1195 if (notify)
1196 1196 ec_notify_via_evtchn(xnfp->xnf_evtchn);
1197 1197
1198 1198 while (txp->tx_type == TX_MCAST_REQ)
1199 1199 cv_wait(&xnfp->xnf_cv_multicast,
1200 1200 &xnfp->xnf_txlock);
1201 1201
1202 1202 ASSERT(txp->tx_type == TX_MCAST_RSP);
1203 1203
1204 1204 mutex_enter(&xnfp->xnf_schedlock);
1205 1205 xnfp->xnf_pending_multicast--;
1206 1206 mutex_exit(&xnfp->xnf_schedlock);
1207 1207
1208 1208 result = (txp->tx_status == NETIF_RSP_OKAY);
1209 1209
1210 1210 txid_put(xnfp, tidp);
1211 1211
1212 1212 mutex_exit(&xnfp->xnf_txlock);
1213 1213
1214 1214 kmem_cache_free(xnfp->xnf_tx_buf_cache, txp);
1215 1215
1216 1216 return (result ? 0 : 1);
1217 1217 }
1218 1218
1219 1219 /*
1220 1220 * xnf_set_promiscuous() -- set or reset promiscuous mode on the board
1221 1221 *
1222 1222 * Program the hardware to enable/disable promiscuous mode.
1223 1223 */
1224 1224 static int
1225 1225 xnf_set_promiscuous(void *arg, boolean_t on)
1226 1226 {
1227 1227 _NOTE(ARGUNUSED(arg, on));
1228 1228
1229 1229 /*
1230 1230 * We can't really do this, but we pretend that we can in
1231 1231 * order that snoop will work.
1232 1232 */
1233 1233 return (0);
1234 1234 }
1235 1235
1236 1236 /*
1237 1237 * Clean buffers that we have responses for from the transmit ring.
1238 1238 */
1239 1239 static int
1240 1240 xnf_tx_clean_ring(xnf_t *xnfp)
1241 1241 {
1242 1242 boolean_t work_to_do;
1243 1243
1244 1244 ASSERT(MUTEX_HELD(&xnfp->xnf_txlock));
1245 1245
1246 1246 loop:
1247 1247 while (RING_HAS_UNCONSUMED_RESPONSES(&xnfp->xnf_tx_ring)) {
1248 1248 RING_IDX cons, prod, i;
1249 1249
1250 1250 cons = xnfp->xnf_tx_ring.rsp_cons;
1251 1251 prod = xnfp->xnf_tx_ring.sring->rsp_prod;
1252 1252 membar_consumer();
1253 1253 /*
1254 1254 * Clean tx requests from ring that we have responses
1255 1255 * for.
1256 1256 */
1257 1257 DTRACE_PROBE2(xnf_tx_clean_range, int, cons, int, prod);
1258 1258 for (i = cons; i != prod; i++) {
1259 1259 netif_tx_response_t *trp;
1260 1260 xnf_txid_t *tidp;
1261 1261 xnf_txbuf_t *txp;
1262 1262
1263 1263 trp = RING_GET_RESPONSE(&xnfp->xnf_tx_ring, i);
1264 1264 ASSERT(TX_ID_VALID(trp->id));
1265 1265
1266 1266 tidp = TX_ID_TO_TXID(xnfp, trp->id);
1267 1267 ASSERT(tidp->id == trp->id);
1268 1268 ASSERT(tidp->next == INVALID_TX_ID);
1269 1269
1270 1270 txp = tidp->txbuf;
1271 1271 ASSERT(txp != NULL);
1272 1272 ASSERT(txp->tx_txreq.id == trp->id);
1273 1273
1274 1274 switch (txp->tx_type) {
1275 1275 case TX_DATA:
1276 1276 if (gnttab_query_foreign_access(
1277 1277 txp->tx_txreq.gref) != 0)
1278 1278 cmn_err(CE_PANIC,
1279 1279 "tx grant %d still in use by "
1280 1280 "backend domain",
1281 1281 txp->tx_txreq.gref);
1282 1282
1283 1283 if (txp->tx_bdesc == NULL) {
1284 1284 (void) gnttab_end_foreign_access_ref(
1285 1285 txp->tx_txreq.gref, 1);
1286 1286 gref_put(xnfp, txp->tx_txreq.gref);
1287 1287 (void) ddi_dma_unbind_handle(
1288 1288 txp->tx_dma_handle);
1289 1289 } else {
1290 1290 xnf_buf_put(xnfp, txp->tx_bdesc,
1291 1291 B_TRUE);
1292 1292 }
1293 1293
1294 1294 freemsg(txp->tx_mp);
1295 1295 txid_put(xnfp, tidp);
1296 1296 kmem_cache_free(xnfp->xnf_tx_buf_cache, txp);
1297 1297
1298 1298 break;
1299 1299
1300 1300 case TX_MCAST_REQ:
1301 1301 txp->tx_type = TX_MCAST_RSP;
1302 1302 txp->tx_status = trp->status;
1303 1303 cv_broadcast(&xnfp->xnf_cv_multicast);
1304 1304
1305 1305 break;
1306 1306
1307 1307 case TX_MCAST_RSP:
1308 1308 break;
1309 1309
1310 1310 default:
1311 1311 cmn_err(CE_PANIC, "xnf_tx_clean_ring: "
1312 1312 "invalid xnf_txbuf_t type: %d",
1313 1313 txp->tx_type);
1314 1314 break;
1315 1315 }
1316 1316 }
1317 1317 /*
1318 1318 * Record the last response we dealt with so that we
1319 1319 * know where to start next time around.
1320 1320 */
1321 1321 xnfp->xnf_tx_ring.rsp_cons = prod;
1322 1322 membar_enter();
1323 1323 }
1324 1324
1325 1325 /* LINTED: constant in conditional context */
1326 1326 RING_FINAL_CHECK_FOR_RESPONSES(&xnfp->xnf_tx_ring, work_to_do);
1327 1327 if (work_to_do)
1328 1328 goto loop;
1329 1329
1330 1330 return (RING_FREE_REQUESTS(&xnfp->xnf_tx_ring));
1331 1331 }
1332 1332
1333 1333 /*
1334 1334 * Allocate and fill in a look-aside buffer for the packet `mp'. Used
1335 1335 * to ensure that the packet is physically contiguous and contained
1336 1336 * within a single page.
1337 1337 */
1338 1338 static xnf_buf_t *
1339 1339 xnf_tx_pullup(xnf_t *xnfp, mblk_t *mp)
1340 1340 {
1341 1341 xnf_buf_t *bd;
1342 1342 caddr_t bp;
1343 1343
1344 1344 bd = xnf_buf_get(xnfp, KM_SLEEP, B_TRUE);
1345 1345 if (bd == NULL)
1346 1346 return (NULL);
1347 1347
1348 1348 bp = bd->buf;
1349 1349 while (mp != NULL) {
1350 1350 size_t len = MBLKL(mp);
1351 1351
1352 1352 bcopy(mp->b_rptr, bp, len);
1353 1353 bp += len;
1354 1354
1355 1355 mp = mp->b_cont;
1356 1356 }
1357 1357
1358 1358 ASSERT((bp - bd->buf) <= PAGESIZE);
1359 1359
1360 1360 xnfp->xnf_stat_tx_pullup++;
1361 1361
1362 1362 return (bd);
1363 1363 }
1364 1364
1365 1365 /*
1366 1366 * Insert the pseudo-header checksum into the packet `buf'.
1367 1367 */
1368 1368 void
1369 1369 xnf_pseudo_cksum(caddr_t buf, int length)
1370 1370 {
1371 1371 struct ether_header *ehp;
1372 1372 uint16_t sap, len, *stuff;
1373 1373 uint32_t cksum;
1374 1374 size_t offset;
1375 1375 ipha_t *ipha;
1376 1376 ipaddr_t src, dst;
1377 1377
1378 1378 ASSERT(length >= sizeof (*ehp));
1379 1379 ehp = (struct ether_header *)buf;
1380 1380
1381 1381 if (ntohs(ehp->ether_type) == VLAN_TPID) {
1382 1382 struct ether_vlan_header *evhp;
1383 1383
1384 1384 ASSERT(length >= sizeof (*evhp));
1385 1385 evhp = (struct ether_vlan_header *)buf;
1386 1386 sap = ntohs(evhp->ether_type);
1387 1387 offset = sizeof (*evhp);
1388 1388 } else {
1389 1389 sap = ntohs(ehp->ether_type);
1390 1390 offset = sizeof (*ehp);
1391 1391 }
1392 1392
1393 1393 ASSERT(sap == ETHERTYPE_IP);
1394 1394
1395 1395 /* Packet should have been pulled up by the caller. */
1396 1396 if ((offset + sizeof (ipha_t)) > length) {
1397 1397 cmn_err(CE_WARN, "xnf_pseudo_cksum: no room for checksum");
1398 1398 return;
1399 1399 }
1400 1400
1401 1401 ipha = (ipha_t *)(buf + offset);
1402 1402
1403 1403 ASSERT(IPH_HDR_LENGTH(ipha) == IP_SIMPLE_HDR_LENGTH);
1404 1404
1405 1405 len = ntohs(ipha->ipha_length) - IP_SIMPLE_HDR_LENGTH;
1406 1406
1407 1407 switch (ipha->ipha_protocol) {
1408 1408 case IPPROTO_TCP:
1409 1409 stuff = IPH_TCPH_CHECKSUMP(ipha, IP_SIMPLE_HDR_LENGTH);
1410 1410 cksum = IP_TCP_CSUM_COMP;
1411 1411 break;
1412 1412 case IPPROTO_UDP:
1413 1413 stuff = IPH_UDPH_CHECKSUMP(ipha, IP_SIMPLE_HDR_LENGTH);
1414 1414 cksum = IP_UDP_CSUM_COMP;
1415 1415 break;
1416 1416 default:
1417 1417 cmn_err(CE_WARN, "xnf_pseudo_cksum: unexpected protocol %d",
1418 1418 ipha->ipha_protocol);
1419 1419 return;
1420 1420 }
1421 1421
1422 1422 src = ipha->ipha_src;
1423 1423 dst = ipha->ipha_dst;
1424 1424
1425 1425 cksum += (dst >> 16) + (dst & 0xFFFF);
1426 1426 cksum += (src >> 16) + (src & 0xFFFF);
1427 1427 cksum += htons(len);
1428 1428
1429 1429 cksum = (cksum >> 16) + (cksum & 0xFFFF);
1430 1430 cksum = (cksum >> 16) + (cksum & 0xFFFF);
1431 1431
1432 1432 ASSERT(cksum <= 0xFFFF);
1433 1433
1434 1434 *stuff = (uint16_t)(cksum ? cksum : ~cksum);
1435 1435 }
1436 1436
1437 1437 /*
1438 1438 * Push a list of prepared packets (`txp') into the transmit ring.
1439 1439 */
1440 1440 static xnf_txbuf_t *
1441 1441 tx_push_packets(xnf_t *xnfp, xnf_txbuf_t *txp)
1442 1442 {
1443 1443 int slots_free;
1444 1444 RING_IDX slot;
1445 1445 boolean_t notify;
1446 1446
1447 1447 mutex_enter(&xnfp->xnf_txlock);
1448 1448
1449 1449 ASSERT(xnfp->xnf_running);
1450 1450
1451 1451 /*
1452 1452 * Wait until we are connected to the backend.
1453 1453 */
1454 1454 while (!xnfp->xnf_connected)
1455 1455 cv_wait(&xnfp->xnf_cv_state, &xnfp->xnf_txlock);
1456 1456
1457 1457 slots_free = tx_slots_get(xnfp, 1, B_FALSE);
1458 1458 DTRACE_PROBE1(xnf_send_slotsfree, int, slots_free);
1459 1459
1460 1460 slot = xnfp->xnf_tx_ring.req_prod_pvt;
1461 1461
1462 1462 while ((txp != NULL) && (slots_free > 0)) {
1463 1463 xnf_txid_t *tidp;
1464 1464 netif_tx_request_t *txrp;
1465 1465
1466 1466 tidp = txid_get(xnfp);
1467 1467 VERIFY(tidp != NULL);
1468 1468
1469 1469 txrp = RING_GET_REQUEST(&xnfp->xnf_tx_ring, slot);
1470 1470
1471 1471 txp->tx_slot = slot;
1472 1472 txp->tx_txreq.id = tidp->id;
1473 1473 *txrp = txp->tx_txreq;
1474 1474
1475 1475 tidp->txbuf = txp;
1476 1476
1477 1477 xnfp->xnf_stat_opackets++;
1478 1478 xnfp->xnf_stat_obytes += txp->tx_txreq.size;
1479 1479
1480 1480 txp = txp->tx_next;
1481 1481 slots_free--;
1482 1482 slot++;
1483 1483
1484 1484 }
1485 1485
1486 1486 xnfp->xnf_tx_ring.req_prod_pvt = slot;
1487 1487
1488 1488 /*
1489 1489 * Tell the peer that we sent something, if it cares.
1490 1490 */
1491 1491 /* LINTED: constant in conditional context */
1492 1492 RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&xnfp->xnf_tx_ring,
1493 1493 notify);
1494 1494 if (notify)
1495 1495 ec_notify_via_evtchn(xnfp->xnf_evtchn);
1496 1496
1497 1497 mutex_exit(&xnfp->xnf_txlock);
1498 1498
1499 1499 return (txp);
1500 1500 }
1501 1501
1502 1502 /*
1503 1503 * Send the chain of packets `mp'. Called by the MAC framework.
1504 1504 */
1505 1505 static mblk_t *
1506 1506 xnf_send(void *arg, mblk_t *mp)
1507 1507 {
1508 1508 xnf_t *xnfp = arg;
1509 1509 domid_t oeid;
1510 1510 xnf_txbuf_t *head, *tail;
1511 1511 mblk_t *ml;
1512 1512 int prepared;
1513 1513
1514 1514 oeid = xvdi_get_oeid(xnfp->xnf_devinfo);
1515 1515
1516 1516 /*
1517 1517 * Prepare packets for transmission.
1518 1518 */
1519 1519 head = tail = NULL;
1520 1520 prepared = 0;
1521 1521 while (mp != NULL) {
1522 1522 xnf_txbuf_t *txp;
1523 1523 int n_chunks, length;
1524 1524 boolean_t page_oops;
1525 1525 uint32_t pflags;
1526 1526
1527 1527 for (ml = mp, n_chunks = length = 0, page_oops = B_FALSE;
1528 1528 ml != NULL;
1529 1529 ml = ml->b_cont, n_chunks++) {
1530 1530
1531 1531 /*
1532 1532 * Test if this buffer includes a page
1533 1533 * boundary. The test assumes that the range
1534 1534 * b_rptr...b_wptr can include only a single
1535 1535 * boundary.
1536 1536 */
1537 1537 if (xnf_btop((size_t)ml->b_rptr) !=
1538 1538 xnf_btop((size_t)ml->b_wptr)) {
1539 1539 xnfp->xnf_stat_tx_pagebndry++;
1540 1540 page_oops = B_TRUE;
1541 1541 }
1542 1542
1543 1543 length += MBLKL(ml);
1544 1544 }
1545 1545 DTRACE_PROBE1(xnf_send_b_cont, int, n_chunks);
1546 1546
1547 1547 /*
1548 1548 * Make sure packet isn't too large.
1549 1549 */
1550 1550 if (length > XNF_FRAMESIZE) {
1551 1551 cmn_err(CE_WARN,
1552 1552 "xnf%d: oversized packet (%d bytes) dropped",
1553 1553 ddi_get_instance(xnfp->xnf_devinfo), length);
1554 1554 freemsg(mp);
1555 1555 continue;
1556 1556 }
1557 1557
1558 1558 txp = kmem_cache_alloc(xnfp->xnf_tx_buf_cache, KM_SLEEP);
1559 1559
1560 1560 txp->tx_type = TX_DATA;
1561 1561
1562 1562 if ((n_chunks > xnf_max_tx_frags) || page_oops) {
1563 1563 /*
1564 1564 * Loan a side buffer rather than the mblk
1565 1565 * itself.
1566 1566 */
1567 1567 txp->tx_bdesc = xnf_tx_pullup(xnfp, mp);
1568 1568 if (txp->tx_bdesc == NULL) {
1569 1569 kmem_cache_free(xnfp->xnf_tx_buf_cache, txp);
1570 1570 break;
1571 1571 }
1572 1572
1573 1573 txp->tx_bufp = txp->tx_bdesc->buf;
1574 1574 txp->tx_mfn = txp->tx_bdesc->buf_mfn;
1575 1575 txp->tx_txreq.gref = txp->tx_bdesc->grant_ref;
1576 1576
1577 1577 } else {
1578 1578 int rc;
1579 1579 ddi_dma_cookie_t dma_cookie;
1580 1580 uint_t ncookies;
1581 1581
1582 1582 rc = ddi_dma_addr_bind_handle(txp->tx_dma_handle,
1583 1583 NULL, (char *)mp->b_rptr, length,
1584 1584 DDI_DMA_WRITE | DDI_DMA_STREAMING,
1585 1585 DDI_DMA_DONTWAIT, 0, &dma_cookie,
1586 1586 &ncookies);
1587 1587 if (rc != DDI_DMA_MAPPED) {
1588 1588 ASSERT(rc != DDI_DMA_INUSE);
1589 1589 ASSERT(rc != DDI_DMA_PARTIAL_MAP);
1590 1590
1591 1591 #ifdef XNF_DEBUG
1592 1592 if (rc != DDI_DMA_NORESOURCES)
1593 1593 cmn_err(CE_WARN,
1594 1594 "xnf%d: bind_handle failed (%x)",
1595 1595 ddi_get_instance(xnfp->xnf_devinfo),
1596 1596 rc);
1597 1597 #endif
1598 1598 kmem_cache_free(xnfp->xnf_tx_buf_cache, txp);
1599 1599 break;
1600 1600 }
1601 1601 ASSERT(ncookies == 1);
1602 1602
1603 1603 txp->tx_bdesc = NULL;
1604 1604 txp->tx_bufp = (caddr_t)mp->b_rptr;
1605 1605 txp->tx_mfn =
1606 1606 xnf_btop(pa_to_ma(dma_cookie.dmac_laddress));
1607 1607 txp->tx_txreq.gref = gref_get(xnfp);
1608 1608 if (txp->tx_txreq.gref == INVALID_GRANT_REF) {
1609 1609 (void) ddi_dma_unbind_handle(
1610 1610 txp->tx_dma_handle);
1611 1611 kmem_cache_free(xnfp->xnf_tx_buf_cache, txp);
1612 1612 break;
1613 1613 }
1614 1614 gnttab_grant_foreign_access_ref(txp->tx_txreq.gref,
1615 1615 oeid, txp->tx_mfn, 1);
1616 1616 }
1617 1617
1618 1618 txp->tx_next = NULL;
1619 1619 txp->tx_mp = mp;
1620 1620 txp->tx_txreq.size = length;
1621 1621 txp->tx_txreq.offset = (uintptr_t)txp->tx_bufp & PAGEOFFSET;
1622 1622 txp->tx_txreq.flags = 0;
1623 1623 mac_hcksum_get(mp, NULL, NULL, NULL, NULL, &pflags);
1624 1624 if (pflags != 0) {
1625 1625 /*
1626 1626 * If the local protocol stack requests checksum
1627 1627 * offload we set the 'checksum blank' flag,
1628 1628 * indicating to the peer that we need the checksum
1629 1629 * calculated for us.
1630 1630 *
1631 1631 * We _don't_ set the validated flag, because we haven't
1632 1632 * validated that the data and the checksum match.
1633 1633 */
1634 1634 xnf_pseudo_cksum(txp->tx_bufp, length);
1635 1635 txp->tx_txreq.flags |= NETTXF_csum_blank;
1636 1636
1637 1637 xnfp->xnf_stat_tx_cksum_deferred++;
1638 1638 }
1639 1639
1640 1640 if (head == NULL) {
1641 1641 ASSERT(tail == NULL);
1642 1642
1643 1643 head = txp;
1644 1644 } else {
1645 1645 ASSERT(tail != NULL);
1646 1646
1647 1647 tail->tx_next = txp;
1648 1648 }
1649 1649 tail = txp;
1650 1650
1651 1651 mp = mp->b_next;
1652 1652 prepared++;
1653 1653
1654 1654 /*
1655 1655 * There is no point in preparing more than
1656 1656 * NET_TX_RING_SIZE, as we won't be able to push them
1657 1657 * into the ring in one go and would hence have to
1658 1658 * un-prepare the extra.
1659 1659 */
1660 1660 if (prepared == NET_TX_RING_SIZE)
1661 1661 break;
1662 1662 }
1663 1663
1664 1664 DTRACE_PROBE1(xnf_send_prepared, int, prepared);
1665 1665
1666 1666 if (mp != NULL) {
1667 1667 #ifdef XNF_DEBUG
1668 1668 int notprepared = 0;
1669 1669 mblk_t *l = mp;
1670 1670
1671 1671 while (l != NULL) {
1672 1672 notprepared++;
1673 1673 l = l->b_next;
1674 1674 }
1675 1675
1676 1676 DTRACE_PROBE1(xnf_send_notprepared, int, notprepared);
1677 1677 #else /* !XNF_DEBUG */
1678 1678 DTRACE_PROBE1(xnf_send_notprepared, int, -1);
1679 1679 #endif /* XNF_DEBUG */
1680 1680 }
1681 1681
1682 1682 /*
1683 1683 * Push the packets we have prepared into the ring. They may
1684 1684 * not all go.
1685 1685 */
1686 1686 if (head != NULL)
1687 1687 head = tx_push_packets(xnfp, head);
1688 1688
1689 1689 /*
1690 1690 * If some packets that we prepared were not sent, unprepare
1691 1691 * them and add them back to the head of those we didn't
1692 1692 * prepare.
1693 1693 */
1694 1694 {
1695 1695 xnf_txbuf_t *loop;
1696 1696 mblk_t *mp_head, *mp_tail;
1697 1697 int unprepared = 0;
1698 1698
1699 1699 mp_head = mp_tail = NULL;
1700 1700 loop = head;
1701 1701
1702 1702 while (loop != NULL) {
1703 1703 xnf_txbuf_t *next = loop->tx_next;
1704 1704
1705 1705 if (loop->tx_bdesc == NULL) {
1706 1706 (void) gnttab_end_foreign_access_ref(
1707 1707 loop->tx_txreq.gref, 1);
1708 1708 gref_put(xnfp, loop->tx_txreq.gref);
1709 1709 (void) ddi_dma_unbind_handle(
1710 1710 loop->tx_dma_handle);
1711 1711 } else {
1712 1712 xnf_buf_put(xnfp, loop->tx_bdesc, B_TRUE);
1713 1713 }
1714 1714
1715 1715 ASSERT(loop->tx_mp != NULL);
1716 1716 if (mp_head == NULL)
1717 1717 mp_head = loop->tx_mp;
1718 1718 mp_tail = loop->tx_mp;
1719 1719
1720 1720 kmem_cache_free(xnfp->xnf_tx_buf_cache, loop);
1721 1721 loop = next;
1722 1722 unprepared++;
1723 1723 }
1724 1724
1725 1725 if (mp_tail == NULL) {
1726 1726 ASSERT(mp_head == NULL);
1727 1727 } else {
1728 1728 ASSERT(mp_head != NULL);
1729 1729
1730 1730 mp_tail->b_next = mp;
1731 1731 mp = mp_head;
1732 1732 }
1733 1733
1734 1734 DTRACE_PROBE1(xnf_send_unprepared, int, unprepared);
1735 1735 }
1736 1736
1737 1737 /*
1738 1738 * If any mblks are left then we have deferred for some reason
1739 1739 * and need to ask for a re-schedule later. This is typically
1740 1740 * due to the ring filling.
1741 1741 */
1742 1742 if (mp != NULL) {
1743 1743 mutex_enter(&xnfp->xnf_schedlock);
1744 1744 xnfp->xnf_need_sched = B_TRUE;
1745 1745 mutex_exit(&xnfp->xnf_schedlock);
1746 1746
1747 1747 xnfp->xnf_stat_tx_defer++;
1748 1748 }
1749 1749
1750 1750 return (mp);
1751 1751 }
1752 1752
1753 1753 /*
1754 1754 * Notification of RX packets. Currently no TX-complete interrupt is
1755 1755 * used, as we clean the TX ring lazily.
1756 1756 */
1757 1757 static uint_t
1758 1758 xnf_intr(caddr_t arg)
1759 1759 {
1760 1760 xnf_t *xnfp = (xnf_t *)arg;
1761 1761 mblk_t *mp;
1762 1762 boolean_t need_sched, clean_ring;
1763 1763
1764 1764 mutex_enter(&xnfp->xnf_rxlock);
1765 1765
1766 1766 /*
1767 1767 * Interrupts before we are connected are spurious.
1768 1768 */
1769 1769 if (!xnfp->xnf_connected) {
1770 1770 mutex_exit(&xnfp->xnf_rxlock);
1771 1771 xnfp->xnf_stat_unclaimed_interrupts++;
1772 1772 return (DDI_INTR_UNCLAIMED);
1773 1773 }
1774 1774
1775 1775 /*
1776 1776 * Receive side processing.
1777 1777 */
1778 1778 do {
1779 1779 /*
1780 1780 * Collect buffers from the ring.
1781 1781 */
1782 1782 xnf_rx_collect(xnfp);
1783 1783
1784 1784 /*
1785 1785 * Interrupt me when the next receive buffer is consumed.
1786 1786 */
1787 1787 xnfp->xnf_rx_ring.sring->rsp_event =
1788 1788 xnfp->xnf_rx_ring.rsp_cons + 1;
1789 1789 xen_mb();
1790 1790
1791 1791 } while (RING_HAS_UNCONSUMED_RESPONSES(&xnfp->xnf_rx_ring));
1792 1792
1793 1793 if (xnfp->xnf_rx_new_buffers_posted) {
1794 1794 boolean_t notify;
1795 1795
1796 1796 /*
1797 1797 * Indicate to the peer that we have re-filled the
1798 1798 * receive ring, if it cares.
1799 1799 */
1800 1800 /* LINTED: constant in conditional context */
1801 1801 RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&xnfp->xnf_rx_ring, notify);
1802 1802 if (notify)
1803 1803 ec_notify_via_evtchn(xnfp->xnf_evtchn);
1804 1804 xnfp->xnf_rx_new_buffers_posted = B_FALSE;
1805 1805 }
1806 1806
1807 1807 mp = xnfp->xnf_rx_head;
1808 1808 xnfp->xnf_rx_head = xnfp->xnf_rx_tail = NULL;
1809 1809
1810 1810 xnfp->xnf_stat_interrupts++;
1811 1811 mutex_exit(&xnfp->xnf_rxlock);
1812 1812
1813 1813 if (mp != NULL)
1814 1814 mac_rx(xnfp->xnf_mh, NULL, mp);
1815 1815
1816 1816 /*
1817 1817 * Transmit side processing.
1818 1818 *
1819 1819 * If a previous transmit attempt failed or we have pending
1820 1820 * multicast requests, clean the ring.
1821 1821 *
1822 1822 * If we previously stalled transmission and cleaning produces
1823 1823 * some free slots, tell upstream to attempt sending again.
1824 1824 *
1825 1825 * The odd style is to avoid acquiring xnf_txlock unless we
1826 1826 * will actually look inside the tx machinery.
1827 1827 */
1828 1828 mutex_enter(&xnfp->xnf_schedlock);
1829 1829 need_sched = xnfp->xnf_need_sched;
1830 1830 clean_ring = need_sched || (xnfp->xnf_pending_multicast > 0);
1831 1831 mutex_exit(&xnfp->xnf_schedlock);
1832 1832
1833 1833 if (clean_ring) {
1834 1834 int free_slots;
1835 1835
1836 1836 mutex_enter(&xnfp->xnf_txlock);
1837 1837 free_slots = tx_slots_get(xnfp, 0, B_FALSE);
1838 1838
1839 1839 if (need_sched && (free_slots > 0)) {
1840 1840 mutex_enter(&xnfp->xnf_schedlock);
1841 1841 xnfp->xnf_need_sched = B_FALSE;
1842 1842 mutex_exit(&xnfp->xnf_schedlock);
1843 1843
1844 1844 mac_tx_update(xnfp->xnf_mh);
1845 1845 }
1846 1846 mutex_exit(&xnfp->xnf_txlock);
1847 1847 }
1848 1848
1849 1849 return (DDI_INTR_CLAIMED);
1850 1850 }
1851 1851
1852 1852 /*
1853 1853 * xnf_start() -- start the board receiving and enable interrupts.
1854 1854 */
1855 1855 static int
1856 1856 xnf_start(void *arg)
1857 1857 {
1858 1858 xnf_t *xnfp = arg;
1859 1859
1860 1860 #ifdef XNF_DEBUG
1861 1861 if (xnf_debug & XNF_DEBUG_TRACE)
1862 1862 printf("xnf%d start(0x%p)\n",
1863 1863 ddi_get_instance(xnfp->xnf_devinfo), (void *)xnfp);
1864 1864 #endif
1865 1865
1866 1866 mutex_enter(&xnfp->xnf_rxlock);
1867 1867 mutex_enter(&xnfp->xnf_txlock);
1868 1868
1869 1869 /* Accept packets from above. */
1870 1870 xnfp->xnf_running = B_TRUE;
1871 1871
1872 1872 mutex_exit(&xnfp->xnf_txlock);
1873 1873 mutex_exit(&xnfp->xnf_rxlock);
1874 1874
1875 1875 return (0);
1876 1876 }
1877 1877
1878 1878 /* xnf_stop() - disable hardware */
1879 1879 static void
1880 1880 xnf_stop(void *arg)
1881 1881 {
1882 1882 xnf_t *xnfp = arg;
1883 1883
1884 1884 #ifdef XNF_DEBUG
1885 1885 if (xnf_debug & XNF_DEBUG_TRACE)
1886 1886 printf("xnf%d stop(0x%p)\n",
1887 1887 ddi_get_instance(xnfp->xnf_devinfo), (void *)xnfp);
1888 1888 #endif
1889 1889
1890 1890 mutex_enter(&xnfp->xnf_rxlock);
1891 1891 mutex_enter(&xnfp->xnf_txlock);
1892 1892
1893 1893 xnfp->xnf_running = B_FALSE;
1894 1894
1895 1895 mutex_exit(&xnfp->xnf_txlock);
1896 1896 mutex_exit(&xnfp->xnf_rxlock);
1897 1897 }
1898 1898
1899 1899 /*
1900 1900 * Hang buffer `bdesc' on the RX ring.
1901 1901 */
1902 1902 static void
1903 1903 xnf_rxbuf_hang(xnf_t *xnfp, xnf_buf_t *bdesc)
1904 1904 {
1905 1905 netif_rx_request_t *reqp;
1906 1906 RING_IDX hang_ix;
1907 1907
1908 1908 ASSERT(MUTEX_HELD(&xnfp->xnf_rxlock));
1909 1909
1910 1910 reqp = RING_GET_REQUEST(&xnfp->xnf_rx_ring,
1911 1911 xnfp->xnf_rx_ring.req_prod_pvt);
1912 1912 hang_ix = (RING_IDX) (reqp - RING_GET_REQUEST(&xnfp->xnf_rx_ring, 0));
1913 1913 ASSERT(xnfp->xnf_rx_pkt_info[hang_ix] == NULL);
1914 1914
1915 1915 reqp->id = bdesc->id = hang_ix;
1916 1916 reqp->gref = bdesc->grant_ref;
1917 1917
1918 1918 xnfp->xnf_rx_pkt_info[hang_ix] = bdesc;
1919 1919 xnfp->xnf_rx_ring.req_prod_pvt++;
1920 1920
1921 1921 xnfp->xnf_rx_new_buffers_posted = B_TRUE;
1922 1922 }
1923 1923
1924 1924 /*
1925 1925 * Collect packets from the RX ring, storing them in `xnfp' for later
1926 1926 * use.
1927 1927 */
1928 1928 static void
1929 1929 xnf_rx_collect(xnf_t *xnfp)
1930 1930 {
1931 1931 mblk_t *head, *tail;
1932 1932
1933 1933 ASSERT(MUTEX_HELD(&xnfp->xnf_rxlock));
1934 1934
1935 1935 /*
1936 1936 * Loop over unconsumed responses:
1937 1937 * 1. get a response
1938 1938 * 2. take corresponding buffer off recv. ring
1939 1939 * 3. indicate this by setting slot to NULL
1940 1940 * 4. create a new message and
1941 1941 * 5. copy data in, adjust ptr
1942 1942 */
1943 1943
1944 1944 head = tail = NULL;
1945 1945
1946 1946 while (RING_HAS_UNCONSUMED_RESPONSES(&xnfp->xnf_rx_ring)) {
1947 1947 netif_rx_response_t *rxpkt;
1948 1948 xnf_buf_t *bdesc;
1949 1949 ssize_t len;
1950 1950 size_t off;
1951 1951 mblk_t *mp = NULL;
1952 1952 boolean_t hwcsum = B_FALSE;
1953 1953 grant_ref_t ref;
1954 1954
1955 1955 /* 1. */
1956 1956 rxpkt = RING_GET_RESPONSE(&xnfp->xnf_rx_ring,
1957 1957 xnfp->xnf_rx_ring.rsp_cons);
1958 1958
1959 1959 DTRACE_PROBE4(xnf_rx_got_rsp, int, (int)rxpkt->id,
1960 1960 int, (int)rxpkt->offset,
1961 1961 int, (int)rxpkt->flags,
1962 1962 int, (int)rxpkt->status);
1963 1963
1964 1964 /*
1965 1965 * 2.
1966 1966 */
1967 1967 bdesc = xnfp->xnf_rx_pkt_info[rxpkt->id];
1968 1968
1969 1969 /*
1970 1970 * 3.
1971 1971 */
1972 1972 xnfp->xnf_rx_pkt_info[rxpkt->id] = NULL;
1973 1973 ASSERT(bdesc->id == rxpkt->id);
1974 1974
1975 1975 ref = bdesc->grant_ref;
1976 1976 off = rxpkt->offset;
1977 1977 len = rxpkt->status;
1978 1978
1979 1979 if (!xnfp->xnf_running) {
1980 1980 DTRACE_PROBE4(xnf_rx_not_running,
1981 1981 int, rxpkt->status,
1982 1982 char *, bdesc->buf, int, rxpkt->offset,
1983 1983 char *, ((char *)bdesc->buf) + rxpkt->offset);
1984 1984
1985 1985 xnfp->xnf_stat_drop++;
1986 1986
1987 1987 } else if (len <= 0) {
1988 1988 DTRACE_PROBE4(xnf_rx_pkt_status_negative,
1989 1989 int, rxpkt->status,
1990 1990 char *, bdesc->buf, int, rxpkt->offset,
1991 1991 char *, ((char *)bdesc->buf) + rxpkt->offset);
1992 1992
1993 1993 xnfp->xnf_stat_errrx++;
1994 1994
1995 1995 switch (len) {
1996 1996 case 0:
1997 1997 xnfp->xnf_stat_runt++;
1998 1998 break;
1999 1999 case NETIF_RSP_ERROR:
2000 2000 xnfp->xnf_stat_mac_rcv_error++;
2001 2001 break;
2002 2002 case NETIF_RSP_DROPPED:
2003 2003 xnfp->xnf_stat_norxbuf++;
2004 2004 break;
2005 2005 }
2006 2006
2007 2007 } else if (bdesc->grant_ref == INVALID_GRANT_REF) {
2008 2008 cmn_err(CE_WARN, "Bad rx grant reference %d "
2009 2009 "from domain %d", ref,
2010 2010 xvdi_get_oeid(xnfp->xnf_devinfo));
2011 2011
2012 2012 } else if ((off + len) > PAGESIZE) {
2013 2013 cmn_err(CE_WARN, "Rx packet overflows page "
2014 2014 "(offset %ld, length %ld) from domain %d",
2015 2015 off, len, xvdi_get_oeid(xnfp->xnf_devinfo));
2016 2016 } else {
2017 2017 xnf_buf_t *nbuf = NULL;
2018 2018
2019 2019 DTRACE_PROBE4(xnf_rx_packet, int, len,
2020 2020 char *, bdesc->buf, int, off,
2021 2021 char *, ((char *)bdesc->buf) + off);
2022 2022
2023 2023 ASSERT(off + len <= PAGEOFFSET);
2024 2024
2025 2025 if (rxpkt->flags & NETRXF_data_validated)
2026 2026 hwcsum = B_TRUE;
2027 2027
2028 2028 /*
2029 2029 * If the packet is below a pre-determined
2030 2030 * size we will copy data out rather than
2031 2031 * replace it.
2032 2032 */
2033 2033 if (len > xnf_rx_copy_limit)
2034 2034 nbuf = xnf_buf_get(xnfp, KM_NOSLEEP, B_FALSE);
2035 2035
2036 2036 /*
2037 2037 * If we have a replacement buffer, attempt to
2038 2038 * wrap the existing one with an mblk_t in
2039 2039 * order that the upper layers of the stack
2040 2040 * might use it directly.
2041 2041 */
2042 2042 if (nbuf != NULL) {
2043 2043 mp = desballoc((unsigned char *)bdesc->buf,
2044 2044 bdesc->len, 0, &bdesc->free_rtn);
2045 2045 if (mp == NULL) {
2046 2046 xnfp->xnf_stat_rx_desballoc_fail++;
2047 2047 xnfp->xnf_stat_norxbuf++;
2048 2048
2049 2049 xnf_buf_put(xnfp, nbuf, B_FALSE);
2050 2050 nbuf = NULL;
2051 2051 } else {
2052 2052 mp->b_rptr = mp->b_rptr + off;
2053 2053 mp->b_wptr = mp->b_rptr + len;
2054 2054
2055 2055 /*
2056 2056 * Release the grant reference
2057 2057 * associated with this buffer
2058 2058 * - they are scarce and the
2059 2059 * upper layers of the stack
2060 2060 * don't need it.
2061 2061 */
2062 2062 (void) gnttab_end_foreign_access_ref(
2063 2063 bdesc->grant_ref, 0);
2064 2064 gref_put(xnfp, bdesc->grant_ref);
2065 2065 bdesc->grant_ref = INVALID_GRANT_REF;
2066 2066
2067 2067 bdesc = nbuf;
2068 2068 }
2069 2069 }
2070 2070
2071 2071 if (nbuf == NULL) {
2072 2072 /*
2073 2073 * No replacement buffer allocated -
2074 2074 * attempt to copy the data out and
2075 2075 * re-hang the existing buffer.
2076 2076 */
2077 2077
2078 2078 /* 4. */
2079 2079 mp = allocb(len, BPRI_MED);
2080 2080 if (mp == NULL) {
2081 2081 xnfp->xnf_stat_rx_allocb_fail++;
2082 2082 xnfp->xnf_stat_norxbuf++;
2083 2083 } else {
2084 2084 /* 5. */
2085 2085 bcopy(bdesc->buf + off, mp->b_wptr,
2086 2086 len);
2087 2087 mp->b_wptr += len;
2088 2088 }
2089 2089 }
2090 2090 }
2091 2091
2092 2092 /* Re-hang the buffer. */
2093 2093 xnf_rxbuf_hang(xnfp, bdesc);
2094 2094
2095 2095 if (mp != NULL) {
2096 2096 if (hwcsum) {
2097 2097 /*
2098 2098 * If the peer says that the data has
2099 2099 * been validated then we declare that
2100 2100 * the full checksum has been
2101 2101 * verified.
2102 2102 *
2103 2103 * We don't look at the "checksum
2104 2104 * blank" flag, and hence could have a
2105 2105 * packet here that we are asserting
2106 2106 * is good with a blank checksum.
2107 2107 */
2108 2108 mac_hcksum_set(mp, 0, 0, 0, 0,
2109 2109 HCK_FULLCKSUM_OK);
2110 2110 xnfp->xnf_stat_rx_cksum_no_need++;
2111 2111 }
2112 2112 if (head == NULL) {
2113 2113 ASSERT(tail == NULL);
2114 2114
2115 2115 head = mp;
2116 2116 } else {
2117 2117 ASSERT(tail != NULL);
2118 2118
2119 2119 tail->b_next = mp;
2120 2120 }
2121 2121 tail = mp;
2122 2122
2123 2123 ASSERT(mp->b_next == NULL);
2124 2124
2125 2125 xnfp->xnf_stat_ipackets++;
2126 2126 xnfp->xnf_stat_rbytes += len;
2127 2127 }
2128 2128
2129 2129 xnfp->xnf_rx_ring.rsp_cons++;
2130 2130 }
2131 2131
2132 2132 /*
2133 2133 * Store the mblks we have collected.
2134 2134 */
2135 2135 if (head != NULL) {
2136 2136 ASSERT(tail != NULL);
2137 2137
2138 2138 if (xnfp->xnf_rx_head == NULL) {
2139 2139 ASSERT(xnfp->xnf_rx_tail == NULL);
2140 2140
2141 2141 xnfp->xnf_rx_head = head;
2142 2142 } else {
2143 2143 ASSERT(xnfp->xnf_rx_tail != NULL);
2144 2144
2145 2145 xnfp->xnf_rx_tail->b_next = head;
2146 2146 }
2147 2147 xnfp->xnf_rx_tail = tail;
2148 2148 }
2149 2149 }
2150 2150
2151 2151 /*
2152 2152 * xnf_alloc_dma_resources() -- initialize the drivers structures
2153 2153 */
2154 2154 static int
2155 2155 xnf_alloc_dma_resources(xnf_t *xnfp)
2156 2156 {
2157 2157 dev_info_t *devinfo = xnfp->xnf_devinfo;
2158 2158 size_t len;
2159 2159 ddi_dma_cookie_t dma_cookie;
2160 2160 uint_t ncookies;
2161 2161 int rc;
2162 2162 caddr_t rptr;
2163 2163
2164 2164 /*
2165 2165 * The code below allocates all the DMA data structures that
2166 2166 * need to be released when the driver is detached.
2167 2167 *
2168 2168 * Allocate page for the transmit descriptor ring.
2169 2169 */
2170 2170 if (ddi_dma_alloc_handle(devinfo, &ringbuf_dma_attr,
2171 2171 DDI_DMA_SLEEP, 0, &xnfp->xnf_tx_ring_dma_handle) != DDI_SUCCESS)
2172 2172 goto alloc_error;
2173 2173
2174 2174 if (ddi_dma_mem_alloc(xnfp->xnf_tx_ring_dma_handle,
2175 2175 PAGESIZE, &accattr, DDI_DMA_CONSISTENT,
2176 2176 DDI_DMA_SLEEP, 0, &rptr, &len,
2177 2177 &xnfp->xnf_tx_ring_dma_acchandle) != DDI_SUCCESS) {
2178 2178 ddi_dma_free_handle(&xnfp->xnf_tx_ring_dma_handle);
2179 2179 xnfp->xnf_tx_ring_dma_handle = NULL;
2180 2180 goto alloc_error;
2181 2181 }
2182 2182
2183 2183 if ((rc = ddi_dma_addr_bind_handle(xnfp->xnf_tx_ring_dma_handle, NULL,
2184 2184 rptr, PAGESIZE, DDI_DMA_RDWR | DDI_DMA_CONSISTENT,
2185 2185 DDI_DMA_SLEEP, 0, &dma_cookie, &ncookies)) != DDI_DMA_MAPPED) {
2186 2186 ddi_dma_mem_free(&xnfp->xnf_tx_ring_dma_acchandle);
2187 2187 ddi_dma_free_handle(&xnfp->xnf_tx_ring_dma_handle);
2188 2188 xnfp->xnf_tx_ring_dma_handle = NULL;
2189 2189 xnfp->xnf_tx_ring_dma_acchandle = NULL;
2190 2190 if (rc == DDI_DMA_NORESOURCES)
2191 2191 goto alloc_error;
2192 2192 else
2193 2193 goto error;
2194 2194 }
2195 2195
2196 2196 ASSERT(ncookies == 1);
2197 2197 bzero(rptr, PAGESIZE);
2198 2198 /* LINTED: constant in conditional context */
2199 2199 SHARED_RING_INIT((netif_tx_sring_t *)rptr);
2200 2200 /* LINTED: constant in conditional context */
2201 2201 FRONT_RING_INIT(&xnfp->xnf_tx_ring, (netif_tx_sring_t *)rptr, PAGESIZE);
2202 2202 xnfp->xnf_tx_ring_phys_addr = dma_cookie.dmac_laddress;
2203 2203
2204 2204 /*
2205 2205 * Allocate page for the receive descriptor ring.
2206 2206 */
2207 2207 if (ddi_dma_alloc_handle(devinfo, &ringbuf_dma_attr,
2208 2208 DDI_DMA_SLEEP, 0, &xnfp->xnf_rx_ring_dma_handle) != DDI_SUCCESS)
2209 2209 goto alloc_error;
2210 2210
2211 2211 if (ddi_dma_mem_alloc(xnfp->xnf_rx_ring_dma_handle,
2212 2212 PAGESIZE, &accattr, DDI_DMA_CONSISTENT,
2213 2213 DDI_DMA_SLEEP, 0, &rptr, &len,
2214 2214 &xnfp->xnf_rx_ring_dma_acchandle) != DDI_SUCCESS) {
2215 2215 ddi_dma_free_handle(&xnfp->xnf_rx_ring_dma_handle);
2216 2216 xnfp->xnf_rx_ring_dma_handle = NULL;
2217 2217 goto alloc_error;
2218 2218 }
2219 2219
2220 2220 if ((rc = ddi_dma_addr_bind_handle(xnfp->xnf_rx_ring_dma_handle, NULL,
2221 2221 rptr, PAGESIZE, DDI_DMA_RDWR | DDI_DMA_CONSISTENT,
2222 2222 DDI_DMA_SLEEP, 0, &dma_cookie, &ncookies)) != DDI_DMA_MAPPED) {
2223 2223 ddi_dma_mem_free(&xnfp->xnf_rx_ring_dma_acchandle);
2224 2224 ddi_dma_free_handle(&xnfp->xnf_rx_ring_dma_handle);
2225 2225 xnfp->xnf_rx_ring_dma_handle = NULL;
2226 2226 xnfp->xnf_rx_ring_dma_acchandle = NULL;
2227 2227 if (rc == DDI_DMA_NORESOURCES)
2228 2228 goto alloc_error;
2229 2229 else
2230 2230 goto error;
2231 2231 }
2232 2232
2233 2233 ASSERT(ncookies == 1);
2234 2234 bzero(rptr, PAGESIZE);
2235 2235 /* LINTED: constant in conditional context */
2236 2236 SHARED_RING_INIT((netif_rx_sring_t *)rptr);
2237 2237 /* LINTED: constant in conditional context */
2238 2238 FRONT_RING_INIT(&xnfp->xnf_rx_ring, (netif_rx_sring_t *)rptr, PAGESIZE);
2239 2239 xnfp->xnf_rx_ring_phys_addr = dma_cookie.dmac_laddress;
2240 2240
2241 2241 return (DDI_SUCCESS);
2242 2242
2243 2243 alloc_error:
2244 2244 cmn_err(CE_WARN, "xnf%d: could not allocate enough DMA memory",
2245 2245 ddi_get_instance(xnfp->xnf_devinfo));
2246 2246 error:
2247 2247 xnf_release_dma_resources(xnfp);
2248 2248 return (DDI_FAILURE);
2249 2249 }
2250 2250
2251 2251 /*
2252 2252 * Release all DMA resources in the opposite order from acquisition
2253 2253 */
2254 2254 static void
2255 2255 xnf_release_dma_resources(xnf_t *xnfp)
2256 2256 {
2257 2257 int i;
2258 2258
2259 2259 /*
2260 2260 * Free receive buffers which are currently associated with
2261 2261 * descriptors.
2262 2262 */
2263 2263 mutex_enter(&xnfp->xnf_rxlock);
2264 2264 for (i = 0; i < NET_RX_RING_SIZE; i++) {
2265 2265 xnf_buf_t *bp;
2266 2266
2267 2267 if ((bp = xnfp->xnf_rx_pkt_info[i]) == NULL)
2268 2268 continue;
2269 2269 xnfp->xnf_rx_pkt_info[i] = NULL;
2270 2270 xnf_buf_put(xnfp, bp, B_FALSE);
2271 2271 }
2272 2272 mutex_exit(&xnfp->xnf_rxlock);
2273 2273
2274 2274 /* Free the receive ring buffer. */
2275 2275 if (xnfp->xnf_rx_ring_dma_acchandle != NULL) {
2276 2276 (void) ddi_dma_unbind_handle(xnfp->xnf_rx_ring_dma_handle);
2277 2277 ddi_dma_mem_free(&xnfp->xnf_rx_ring_dma_acchandle);
2278 2278 ddi_dma_free_handle(&xnfp->xnf_rx_ring_dma_handle);
2279 2279 xnfp->xnf_rx_ring_dma_acchandle = NULL;
2280 2280 }
2281 2281 /* Free the transmit ring buffer. */
2282 2282 if (xnfp->xnf_tx_ring_dma_acchandle != NULL) {
2283 2283 (void) ddi_dma_unbind_handle(xnfp->xnf_tx_ring_dma_handle);
2284 2284 ddi_dma_mem_free(&xnfp->xnf_tx_ring_dma_acchandle);
2285 2285 ddi_dma_free_handle(&xnfp->xnf_tx_ring_dma_handle);
2286 2286 xnfp->xnf_tx_ring_dma_acchandle = NULL;
2287 2287 }
2288 2288
2289 2289 }
2290 2290
2291 2291 /*
2292 2292 * Release any packets and associated structures used by the TX ring.
2293 2293 */
2294 2294 static void
2295 2295 xnf_release_mblks(xnf_t *xnfp)
2296 2296 {
2297 2297 RING_IDX i;
2298 2298 xnf_txid_t *tidp;
2299 2299
2300 2300 for (i = 0, tidp = &xnfp->xnf_tx_pkt_id[0];
2301 2301 i < NET_TX_RING_SIZE;
2302 2302 i++, tidp++) {
2303 2303 xnf_txbuf_t *txp = tidp->txbuf;
2304 2304
2305 2305 if (txp != NULL) {
2306 2306 ASSERT(txp->tx_mp != NULL);
2307 2307 freemsg(txp->tx_mp);
2308 2308
2309 2309 txid_put(xnfp, tidp);
2310 2310 kmem_cache_free(xnfp->xnf_tx_buf_cache, txp);
2311 2311 }
2312 2312 }
2313 2313 }
2314 2314
2315 2315 static int
2316 2316 xnf_buf_constructor(void *buf, void *arg, int kmflag)
2317 2317 {
2318 2318 int (*ddiflags)(caddr_t) = DDI_DMA_SLEEP;
2319 2319 xnf_buf_t *bdesc = buf;
2320 2320 xnf_t *xnfp = arg;
2321 2321 ddi_dma_cookie_t dma_cookie;
2322 2322 uint_t ncookies;
2323 2323 size_t len;
2324 2324
2325 2325 if (kmflag & KM_NOSLEEP)
2326 2326 ddiflags = DDI_DMA_DONTWAIT;
2327 2327
2328 2328 /* Allocate a DMA access handle for the buffer. */
2329 2329 if (ddi_dma_alloc_handle(xnfp->xnf_devinfo, &buf_dma_attr,
2330 2330 ddiflags, 0, &bdesc->dma_handle) != DDI_SUCCESS)
2331 2331 goto failure;
2332 2332
2333 2333 /* Allocate DMA-able memory for buffer. */
2334 2334 if (ddi_dma_mem_alloc(bdesc->dma_handle,
2335 2335 PAGESIZE, &data_accattr, DDI_DMA_STREAMING, ddiflags, 0,
2336 2336 &bdesc->buf, &len, &bdesc->acc_handle) != DDI_SUCCESS)
2337 2337 goto failure_1;
2338 2338
2339 2339 /* Bind to virtual address of buffer to get physical address. */
2340 2340 if (ddi_dma_addr_bind_handle(bdesc->dma_handle, NULL,
2341 2341 bdesc->buf, len, DDI_DMA_RDWR | DDI_DMA_STREAMING,
2342 2342 ddiflags, 0, &dma_cookie, &ncookies) != DDI_DMA_MAPPED)
2343 2343 goto failure_2;
2344 2344 ASSERT(ncookies == 1);
2345 2345
2346 2346 bdesc->free_rtn.free_func = xnf_buf_recycle;
2347 2347 bdesc->free_rtn.free_arg = (caddr_t)bdesc;
2348 2348 bdesc->xnfp = xnfp;
2349 2349 bdesc->buf_phys = dma_cookie.dmac_laddress;
2350 2350 bdesc->buf_mfn = pfn_to_mfn(xnf_btop(bdesc->buf_phys));
2351 2351 bdesc->len = dma_cookie.dmac_size;
2352 2352 bdesc->grant_ref = INVALID_GRANT_REF;
2353 2353 bdesc->gen = xnfp->xnf_gen;
2354 2354
2355 2355 atomic_inc_64(&xnfp->xnf_stat_buf_allocated);
2356 2356
2357 2357 return (0);
2358 2358
2359 2359 failure_2:
2360 2360 ddi_dma_mem_free(&bdesc->acc_handle);
2361 2361
2362 2362 failure_1:
2363 2363 ddi_dma_free_handle(&bdesc->dma_handle);
2364 2364
2365 2365 failure:
2366 2366
2367 2367 ASSERT(kmflag & KM_NOSLEEP); /* Cannot fail for KM_SLEEP. */
2368 2368 return (-1);
2369 2369 }
2370 2370
2371 2371 static void
2372 2372 xnf_buf_destructor(void *buf, void *arg)
2373 2373 {
2374 2374 xnf_buf_t *bdesc = buf;
2375 2375 xnf_t *xnfp = arg;
2376 2376
2377 2377 (void) ddi_dma_unbind_handle(bdesc->dma_handle);
2378 2378 ddi_dma_mem_free(&bdesc->acc_handle);
2379 2379 ddi_dma_free_handle(&bdesc->dma_handle);
2380 2380
2381 2381 atomic_dec_64(&xnfp->xnf_stat_buf_allocated);
2382 2382 }
2383 2383
2384 2384 static xnf_buf_t *
2385 2385 xnf_buf_get(xnf_t *xnfp, int flags, boolean_t readonly)
2386 2386 {
2387 2387 grant_ref_t gref;
2388 2388 xnf_buf_t *bufp;
2389 2389
2390 2390 /*
2391 2391 * Usually grant references are more scarce than memory, so we
2392 2392 * attempt to acquire a grant reference first.
2393 2393 */
2394 2394 gref = gref_get(xnfp);
2395 2395 if (gref == INVALID_GRANT_REF)
2396 2396 return (NULL);
2397 2397
2398 2398 bufp = kmem_cache_alloc(xnfp->xnf_buf_cache, flags);
2399 2399 if (bufp == NULL) {
2400 2400 gref_put(xnfp, gref);
2401 2401 return (NULL);
2402 2402 }
2403 2403
2404 2404 ASSERT(bufp->grant_ref == INVALID_GRANT_REF);
2405 2405
2406 2406 bufp->grant_ref = gref;
2407 2407
2408 2408 if (bufp->gen != xnfp->xnf_gen)
2409 2409 xnf_buf_refresh(bufp);
2410 2410
2411 2411 gnttab_grant_foreign_access_ref(bufp->grant_ref,
2412 2412 xvdi_get_oeid(bufp->xnfp->xnf_devinfo),
2413 2413 bufp->buf_mfn, readonly ? 1 : 0);
2414 2414
2415 2415 atomic_inc_64(&xnfp->xnf_stat_buf_outstanding);
2416 2416
2417 2417 return (bufp);
2418 2418 }
2419 2419
2420 2420 static void
2421 2421 xnf_buf_put(xnf_t *xnfp, xnf_buf_t *bufp, boolean_t readonly)
2422 2422 {
2423 2423 if (bufp->grant_ref != INVALID_GRANT_REF) {
2424 2424 (void) gnttab_end_foreign_access_ref(
2425 2425 bufp->grant_ref, readonly ? 1 : 0);
2426 2426 gref_put(xnfp, bufp->grant_ref);
2427 2427 bufp->grant_ref = INVALID_GRANT_REF;
2428 2428 }
2429 2429
2430 2430 kmem_cache_free(xnfp->xnf_buf_cache, bufp);
2431 2431
2432 2432 atomic_dec_64(&xnfp->xnf_stat_buf_outstanding);
2433 2433 }
2434 2434
2435 2435 /*
2436 2436 * Refresh any cached data about a buffer after resume.
2437 2437 */
2438 2438 static void
2439 2439 xnf_buf_refresh(xnf_buf_t *bdesc)
2440 2440 {
2441 2441 bdesc->buf_mfn = pfn_to_mfn(xnf_btop(bdesc->buf_phys));
2442 2442 bdesc->gen = bdesc->xnfp->xnf_gen;
2443 2443 }
2444 2444
2445 2445 /*
2446 2446 * Streams `freeb' routine for `xnf_buf_t' when used as transmit
2447 2447 * look-aside buffers.
2448 2448 */
2449 2449 static void
2450 2450 xnf_buf_recycle(xnf_buf_t *bdesc)
2451 2451 {
2452 2452 xnf_t *xnfp = bdesc->xnfp;
2453 2453
2454 2454 xnf_buf_put(xnfp, bdesc, B_TRUE);
2455 2455 }
2456 2456
2457 2457 static int
2458 2458 xnf_tx_buf_constructor(void *buf, void *arg, int kmflag)
2459 2459 {
2460 2460 int (*ddiflags)(caddr_t) = DDI_DMA_SLEEP;
2461 2461 xnf_txbuf_t *txp = buf;
2462 2462 xnf_t *xnfp = arg;
2463 2463
2464 2464 if (kmflag & KM_NOSLEEP)
2465 2465 ddiflags = DDI_DMA_DONTWAIT;
2466 2466
2467 2467 if (ddi_dma_alloc_handle(xnfp->xnf_devinfo, &buf_dma_attr,
2468 2468 ddiflags, 0, &txp->tx_dma_handle) != DDI_SUCCESS) {
2469 2469 ASSERT(kmflag & KM_NOSLEEP); /* Cannot fail for KM_SLEEP. */
2470 2470 return (-1);
2471 2471 }
2472 2472
2473 2473 return (0);
2474 2474 }
2475 2475
2476 2476 static void
2477 2477 xnf_tx_buf_destructor(void *buf, void *arg)
2478 2478 {
2479 2479 _NOTE(ARGUNUSED(arg));
2480 2480 xnf_txbuf_t *txp = buf;
2481 2481
2482 2482 ddi_dma_free_handle(&txp->tx_dma_handle);
2483 2483 }
2484 2484
2485 2485 /*
2486 2486 * Statistics.
2487 2487 */
2488 2488 static char *xnf_aux_statistics[] = {
2489 2489 "tx_cksum_deferred",
2490 2490 "rx_cksum_no_need",
2491 2491 "interrupts",
2492 2492 "unclaimed_interrupts",
2493 2493 "tx_pullup",
2494 2494 "tx_pagebndry",
2495 2495 "tx_attempt",
2496 2496 "buf_allocated",
2497 2497 "buf_outstanding",
2498 2498 "gref_outstanding",
2499 2499 "gref_failure",
2500 2500 "gref_peak",
2501 2501 "rx_allocb_fail",
2502 2502 "rx_desballoc_fail",
2503 2503 };
2504 2504
2505 2505 static int
2506 2506 xnf_kstat_aux_update(kstat_t *ksp, int flag)
2507 2507 {
2508 2508 xnf_t *xnfp;
2509 2509 kstat_named_t *knp;
2510 2510
2511 2511 if (flag != KSTAT_READ)
2512 2512 return (EACCES);
2513 2513
2514 2514 xnfp = ksp->ks_private;
2515 2515 knp = ksp->ks_data;
2516 2516
2517 2517 /*
2518 2518 * Assignment order must match that of the names in
2519 2519 * xnf_aux_statistics.
2520 2520 */
2521 2521 (knp++)->value.ui64 = xnfp->xnf_stat_tx_cksum_deferred;
2522 2522 (knp++)->value.ui64 = xnfp->xnf_stat_rx_cksum_no_need;
2523 2523
2524 2524 (knp++)->value.ui64 = xnfp->xnf_stat_interrupts;
2525 2525 (knp++)->value.ui64 = xnfp->xnf_stat_unclaimed_interrupts;
2526 2526 (knp++)->value.ui64 = xnfp->xnf_stat_tx_pullup;
2527 2527 (knp++)->value.ui64 = xnfp->xnf_stat_tx_pagebndry;
2528 2528 (knp++)->value.ui64 = xnfp->xnf_stat_tx_attempt;
2529 2529
2530 2530 (knp++)->value.ui64 = xnfp->xnf_stat_buf_allocated;
2531 2531 (knp++)->value.ui64 = xnfp->xnf_stat_buf_outstanding;
2532 2532 (knp++)->value.ui64 = xnfp->xnf_stat_gref_outstanding;
2533 2533 (knp++)->value.ui64 = xnfp->xnf_stat_gref_failure;
2534 2534 (knp++)->value.ui64 = xnfp->xnf_stat_gref_peak;
2535 2535 (knp++)->value.ui64 = xnfp->xnf_stat_rx_allocb_fail;
2536 2536 (knp++)->value.ui64 = xnfp->xnf_stat_rx_desballoc_fail;
2537 2537
2538 2538 return (0);
2539 2539 }
2540 2540
2541 2541 static boolean_t
2542 2542 xnf_kstat_init(xnf_t *xnfp)
2543 2543 {
2544 2544 int nstat = sizeof (xnf_aux_statistics) /
2545 2545 sizeof (xnf_aux_statistics[0]);
2546 2546 char **cp = xnf_aux_statistics;
2547 2547 kstat_named_t *knp;
2548 2548
2549 2549 /*
2550 2550 * Create and initialise kstats.
2551 2551 */
2552 2552 if ((xnfp->xnf_kstat_aux = kstat_create("xnf",
2553 2553 ddi_get_instance(xnfp->xnf_devinfo),
2554 2554 "aux_statistics", "net", KSTAT_TYPE_NAMED,
2555 2555 nstat, 0)) == NULL)
2556 2556 return (B_FALSE);
2557 2557
2558 2558 xnfp->xnf_kstat_aux->ks_private = xnfp;
2559 2559 xnfp->xnf_kstat_aux->ks_update = xnf_kstat_aux_update;
2560 2560
2561 2561 knp = xnfp->xnf_kstat_aux->ks_data;
2562 2562 while (nstat > 0) {
2563 2563 kstat_named_init(knp, *cp, KSTAT_DATA_UINT64);
2564 2564
2565 2565 knp++;
2566 2566 cp++;
2567 2567 nstat--;
2568 2568 }
2569 2569
2570 2570 kstat_install(xnfp->xnf_kstat_aux);
2571 2571
2572 2572 return (B_TRUE);
2573 2573 }
2574 2574
2575 2575 static int
2576 2576 xnf_stat(void *arg, uint_t stat, uint64_t *val)
2577 2577 {
2578 2578 xnf_t *xnfp = arg;
2579 2579
2580 2580 mutex_enter(&xnfp->xnf_rxlock);
2581 2581 mutex_enter(&xnfp->xnf_txlock);
2582 2582
2583 2583 #define mac_stat(q, r) \
2584 2584 case (MAC_STAT_##q): \
2585 2585 *val = xnfp->xnf_stat_##r; \
2586 2586 break
2587 2587
2588 2588 #define ether_stat(q, r) \
2589 2589 case (ETHER_STAT_##q): \
2590 2590 *val = xnfp->xnf_stat_##r; \
2591 2591 break
2592 2592
2593 2593 switch (stat) {
2594 2594
2595 2595 mac_stat(IPACKETS, ipackets);
2596 2596 mac_stat(OPACKETS, opackets);
2597 2597 mac_stat(RBYTES, rbytes);
2598 2598 mac_stat(OBYTES, obytes);
2599 2599 mac_stat(NORCVBUF, norxbuf);
2600 2600 mac_stat(IERRORS, errrx);
2601 2601 mac_stat(NOXMTBUF, tx_defer);
2602 2602
2603 2603 ether_stat(MACRCV_ERRORS, mac_rcv_error);
2604 2604 ether_stat(TOOSHORT_ERRORS, runt);
2605 2605
2606 2606 /* always claim to be in full duplex mode */
2607 2607 case ETHER_STAT_LINK_DUPLEX:
2608 2608 *val = LINK_DUPLEX_FULL;
2609 2609 break;
2610 2610
2611 2611 /* always claim to be at 1Gb/s link speed */
2612 2612 case MAC_STAT_IFSPEED:
2613 2613 *val = 1000000000ull;
2614 2614 break;
2615 2615
2616 2616 default:
2617 2617 mutex_exit(&xnfp->xnf_txlock);
2618 2618 mutex_exit(&xnfp->xnf_rxlock);
2619 2619
2620 2620 return (ENOTSUP);
2621 2621 }
2622 2622
2623 2623 #undef mac_stat
2624 2624 #undef ether_stat
2625 2625
2626 2626 mutex_exit(&xnfp->xnf_txlock);
2627 2627 mutex_exit(&xnfp->xnf_rxlock);
2628 2628
2629 2629 return (0);
2630 2630 }
2631 2631
2632 2632 static boolean_t
2633 2633 xnf_getcapab(void *arg, mac_capab_t cap, void *cap_data)
2634 2634 {
2635 2635 _NOTE(ARGUNUSED(arg));
2636 2636
2637 2637 switch (cap) {
2638 2638 case MAC_CAPAB_HCKSUM: {
2639 2639 uint32_t *capab = cap_data;
2640 2640
2641 2641 /*
2642 2642 * Whilst the flag used to communicate with the IO
2643 2643 * domain is called "NETTXF_csum_blank", the checksum
2644 2644 * in the packet must contain the pseudo-header
2645 2645 * checksum and not zero.
2646 2646 *
2647 2647 * To help out the IO domain, we might use
2648 2648 * HCKSUM_INET_PARTIAL. Unfortunately our stack will
2649 2649 * then use checksum offload for IPv6 packets, which
2650 2650 * the IO domain can't handle.
2651 2651 *
2652 2652 * As a result, we declare outselves capable of
2653 2653 * HCKSUM_INET_FULL_V4. This means that we receive
2654 2654 * IPv4 packets from the stack with a blank checksum
2655 2655 * field and must insert the pseudo-header checksum
2656 2656 * before passing the packet to the IO domain.
2657 2657 */
2658 2658 *capab = HCKSUM_INET_FULL_V4;
2659 2659 break;
2660 2660 }
2661 2661 default:
2662 2662 return (B_FALSE);
2663 2663 }
2664 2664
2665 2665 return (B_TRUE);
2666 2666 }
2667 2667
2668 2668 /*
2669 2669 * The state of the peer has changed - react accordingly.
2670 2670 */
2671 2671 static void
2672 2672 oe_state_change(dev_info_t *dip, ddi_eventcookie_t id,
2673 2673 void *arg, void *impl_data)
2674 2674 {
2675 2675 _NOTE(ARGUNUSED(id, arg));
2676 2676 xnf_t *xnfp = ddi_get_driver_private(dip);
2677 2677 XenbusState new_state = *(XenbusState *)impl_data;
2678 2678
2679 2679 ASSERT(xnfp != NULL);
2680 2680
2681 2681 switch (new_state) {
2682 2682 case XenbusStateUnknown:
2683 2683 case XenbusStateInitialising:
2684 2684 case XenbusStateInitialised:
2685 2685 case XenbusStateClosing:
2686 2686 case XenbusStateClosed:
2687 2687 case XenbusStateReconfiguring:
2688 2688 case XenbusStateReconfigured:
2689 2689 break;
2690 2690
2691 2691 case XenbusStateInitWait:
2692 2692 xnf_read_config(xnfp);
2693 2693
2694 2694 if (!xnfp->xnf_be_rx_copy) {
2695 2695 cmn_err(CE_WARN,
2696 2696 "The xnf driver requires a dom0 that "
2697 2697 "supports 'feature-rx-copy'.");
2698 2698 (void) xvdi_switch_state(xnfp->xnf_devinfo,
2699 2699 XBT_NULL, XenbusStateClosed);
2700 2700 break;
2701 2701 }
2702 2702
2703 2703 /*
2704 2704 * Connect to the backend.
2705 2705 */
2706 2706 xnf_be_connect(xnfp);
2707 2707
2708 2708 /*
2709 2709 * Our MAC address as discovered by xnf_read_config().
2710 2710 */
2711 2711 mac_unicst_update(xnfp->xnf_mh, xnfp->xnf_mac_addr);
2712 2712
2713 2713 break;
2714 2714
2715 2715 case XenbusStateConnected:
2716 2716 mutex_enter(&xnfp->xnf_rxlock);
2717 2717 mutex_enter(&xnfp->xnf_txlock);
2718 2718
2719 2719 xnfp->xnf_connected = B_TRUE;
2720 2720 /*
2721 2721 * Wake up any threads waiting to send data to
2722 2722 * backend.
2723 2723 */
2724 2724 cv_broadcast(&xnfp->xnf_cv_state);
2725 2725
2726 2726 mutex_exit(&xnfp->xnf_txlock);
2727 2727 mutex_exit(&xnfp->xnf_rxlock);
2728 2728
2729 2729 /*
2730 2730 * Kick the peer in case it missed any transmits
2731 2731 * request in the TX ring.
2732 2732 */
2733 2733 ec_notify_via_evtchn(xnfp->xnf_evtchn);
2734 2734
2735 2735 /*
2736 2736 * There may already be completed receive requests in
2737 2737 * the ring sent by backend after it gets connected
2738 2738 * but before we see its state change here, so we call
2739 2739 * xnf_intr() to handle them, if any.
2740 2740 */
2741 2741 (void) xnf_intr((caddr_t)xnfp);
2742 2742
2743 2743 /*
2744 2744 * Mark the link up now that we are connected.
2745 2745 */
2746 2746 mac_link_update(xnfp->xnf_mh, LINK_STATE_UP);
2747 2747
2748 2748 /*
2749 2749 * Tell the backend about the multicast addresses in
2750 2750 * which we are interested.
2751 2751 */
2752 2752 mac_multicast_refresh(xnfp->xnf_mh, NULL, xnfp, B_TRUE);
2753 2753
2754 2754 break;
2755 2755
2756 2756 default:
2757 2757 break;
2758 2758 }
2759 2759 }
↓ open down ↓ |
2429 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX