Print this page
7127 remove -Wno-missing-braces from Makefile.uts
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/uts/common/xen/io/xnb.c
+++ new/usr/src/uts/common/xen/io/xnb.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21
22 22 /*
23 23 * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
24 24 * Use is subject to license terms.
25 25 */
26 26
27 27 #ifdef DEBUG
28 28 #define XNB_DEBUG 1
29 29 #endif /* DEBUG */
30 30
31 31 #include "xnb.h"
32 32
33 33 #include <sys/sunddi.h>
34 34 #include <sys/sunndi.h>
35 35 #include <sys/modctl.h>
36 36 #include <sys/conf.h>
37 37 #include <sys/mac.h>
38 38 #include <sys/mac_impl.h> /* For mac_fix_cksum(). */
39 39 #include <sys/dlpi.h>
40 40 #include <sys/strsubr.h>
41 41 #include <sys/strsun.h>
42 42 #include <sys/types.h>
43 43 #include <sys/pattr.h>
44 44 #include <vm/seg_kmem.h>
45 45 #include <vm/hat_i86.h>
46 46 #include <xen/sys/xenbus_impl.h>
47 47 #include <xen/sys/xendev.h>
48 48 #include <sys/balloon_impl.h>
49 49 #include <sys/evtchn_impl.h>
50 50 #include <sys/gnttab.h>
51 51 #include <vm/vm_dep.h>
52 52 #include <sys/note.h>
53 53 #include <sys/gld.h>
54 54 #include <inet/ip.h>
55 55 #include <inet/ip_impl.h>
56 56
57 57 /*
58 58 * The terms "transmit" and "receive" are used in alignment with domU,
59 59 * which means that packets originating from the peer domU are "transmitted"
60 60 * to other parts of the system and packets are "received" from them.
61 61 */
62 62
63 63 /*
64 64 * Should we allow guests to manipulate multicast group membership?
65 65 */
66 66 static boolean_t xnb_multicast_control = B_TRUE;
67 67
68 68 static boolean_t xnb_connect_rings(dev_info_t *);
69 69 static void xnb_disconnect_rings(dev_info_t *);
70 70 static void xnb_oe_state_change(dev_info_t *, ddi_eventcookie_t,
71 71 void *, void *);
72 72 static void xnb_hp_state_change(dev_info_t *, ddi_eventcookie_t,
73 73 void *, void *);
74 74
75 75 static int xnb_txbuf_constructor(void *, void *, int);
76 76 static void xnb_txbuf_destructor(void *, void *);
77 77 static void xnb_tx_notify_peer(xnb_t *, boolean_t);
78 78 static void xnb_tx_mark_complete(xnb_t *, RING_IDX, int16_t);
79 79
80 80 mblk_t *xnb_to_peer(xnb_t *, mblk_t *);
81 81 mblk_t *xnb_copy_to_peer(xnb_t *, mblk_t *);
82 82
83 83 static void setup_gop(xnb_t *, gnttab_copy_t *, uchar_t *,
84 84 size_t, size_t, size_t, grant_ref_t);
85 85 #pragma inline(setup_gop)
86 86 static boolean_t is_foreign(void *);
87 87 #pragma inline(is_foreign)
88 88
89 89 #define INVALID_GRANT_HANDLE ((grant_handle_t)-1)
90 90 #define INVALID_GRANT_REF ((grant_ref_t)-1)
91 91
92 92 static kmutex_t xnb_alloc_page_lock;
93 93
94 94 /*
95 95 * On a 32 bit PAE system physical and machine addresses are larger
96 96 * than 32 bits. ddi_btop() on such systems take an unsigned long
97 97 * argument, and so addresses above 4G are truncated before ddi_btop()
98 98 * gets to see them. To avoid this, code the shift operation here.
99 99 */
100 100 #define xnb_btop(addr) ((addr) >> PAGESHIFT)
101 101
102 102 /* DMA attributes for transmit and receive data */
103 103 static ddi_dma_attr_t buf_dma_attr = {
104 104 DMA_ATTR_V0, /* version of this structure */
105 105 0, /* lowest usable address */
106 106 0xffffffffffffffffULL, /* highest usable address */
107 107 0x7fffffff, /* maximum DMAable byte count */
108 108 MMU_PAGESIZE, /* alignment in bytes */
109 109 0x7ff, /* bitmap of burst sizes */
110 110 1, /* minimum transfer */
111 111 0xffffffffU, /* maximum transfer */
112 112 0xffffffffffffffffULL, /* maximum segment length */
113 113 1, /* maximum number of segments */
114 114 1, /* granularity */
115 115 0, /* flags (reserved) */
116 116 };
117 117
118 118 /* DMA access attributes for data: NOT to be byte swapped. */
119 119 static ddi_device_acc_attr_t data_accattr = {
120 120 DDI_DEVICE_ATTR_V0,
121 121 DDI_NEVERSWAP_ACC,
122 122 DDI_STRICTORDER_ACC
123 123 };
124 124
125 125 /*
126 126 * Statistics.
127 127 */
128 128 static const char * const aux_statistics[] = {
129 129 "rx_cksum_deferred",
130 130 "tx_cksum_no_need",
131 131 "rx_rsp_notok",
132 132 "tx_notify_deferred",
133 133 "tx_notify_sent",
134 134 "rx_notify_deferred",
135 135 "rx_notify_sent",
136 136 "tx_too_early",
137 137 "rx_too_early",
138 138 "rx_allocb_failed",
139 139 "tx_allocb_failed",
140 140 "rx_foreign_page",
141 141 "mac_full",
142 142 "spurious_intr",
143 143 "allocation_success",
144 144 "allocation_failure",
145 145 "small_allocation_success",
146 146 "small_allocation_failure",
147 147 "other_allocation_failure",
148 148 "rx_pageboundary_crossed",
149 149 "rx_cpoparea_grown",
150 150 "csum_hardware",
151 151 "csum_software",
152 152 "tx_overflow_page",
153 153 "tx_unexpected_flags",
154 154 };
155 155
156 156 static int
157 157 xnb_ks_aux_update(kstat_t *ksp, int flag)
158 158 {
159 159 xnb_t *xnbp;
160 160 kstat_named_t *knp;
161 161
162 162 if (flag != KSTAT_READ)
163 163 return (EACCES);
164 164
165 165 xnbp = ksp->ks_private;
166 166 knp = ksp->ks_data;
167 167
168 168 /*
169 169 * Assignment order should match that of the names in
170 170 * aux_statistics.
171 171 */
172 172 (knp++)->value.ui64 = xnbp->xnb_stat_rx_cksum_deferred;
173 173 (knp++)->value.ui64 = xnbp->xnb_stat_tx_cksum_no_need;
174 174 (knp++)->value.ui64 = xnbp->xnb_stat_rx_rsp_notok;
175 175 (knp++)->value.ui64 = xnbp->xnb_stat_tx_notify_deferred;
176 176 (knp++)->value.ui64 = xnbp->xnb_stat_tx_notify_sent;
177 177 (knp++)->value.ui64 = xnbp->xnb_stat_rx_notify_deferred;
178 178 (knp++)->value.ui64 = xnbp->xnb_stat_rx_notify_sent;
179 179 (knp++)->value.ui64 = xnbp->xnb_stat_tx_too_early;
180 180 (knp++)->value.ui64 = xnbp->xnb_stat_rx_too_early;
181 181 (knp++)->value.ui64 = xnbp->xnb_stat_rx_allocb_failed;
182 182 (knp++)->value.ui64 = xnbp->xnb_stat_tx_allocb_failed;
183 183 (knp++)->value.ui64 = xnbp->xnb_stat_rx_foreign_page;
184 184 (knp++)->value.ui64 = xnbp->xnb_stat_mac_full;
185 185 (knp++)->value.ui64 = xnbp->xnb_stat_spurious_intr;
186 186 (knp++)->value.ui64 = xnbp->xnb_stat_allocation_success;
187 187 (knp++)->value.ui64 = xnbp->xnb_stat_allocation_failure;
188 188 (knp++)->value.ui64 = xnbp->xnb_stat_small_allocation_success;
189 189 (knp++)->value.ui64 = xnbp->xnb_stat_small_allocation_failure;
190 190 (knp++)->value.ui64 = xnbp->xnb_stat_other_allocation_failure;
191 191 (knp++)->value.ui64 = xnbp->xnb_stat_rx_pagebndry_crossed;
192 192 (knp++)->value.ui64 = xnbp->xnb_stat_rx_cpoparea_grown;
193 193 (knp++)->value.ui64 = xnbp->xnb_stat_csum_hardware;
194 194 (knp++)->value.ui64 = xnbp->xnb_stat_csum_software;
195 195 (knp++)->value.ui64 = xnbp->xnb_stat_tx_overflow_page;
196 196 (knp++)->value.ui64 = xnbp->xnb_stat_tx_unexpected_flags;
197 197
198 198 return (0);
199 199 }
200 200
201 201 static boolean_t
202 202 xnb_ks_init(xnb_t *xnbp)
203 203 {
204 204 int nstat = sizeof (aux_statistics) /
205 205 sizeof (aux_statistics[0]);
206 206 const char * const *cp = aux_statistics;
207 207 kstat_named_t *knp;
208 208
209 209 /*
210 210 * Create and initialise kstats.
211 211 */
212 212 xnbp->xnb_kstat_aux = kstat_create(ddi_driver_name(xnbp->xnb_devinfo),
213 213 ddi_get_instance(xnbp->xnb_devinfo), "aux_statistics", "net",
214 214 KSTAT_TYPE_NAMED, nstat, 0);
215 215 if (xnbp->xnb_kstat_aux == NULL)
216 216 return (B_FALSE);
217 217
218 218 xnbp->xnb_kstat_aux->ks_private = xnbp;
219 219 xnbp->xnb_kstat_aux->ks_update = xnb_ks_aux_update;
220 220
221 221 knp = xnbp->xnb_kstat_aux->ks_data;
222 222 while (nstat > 0) {
223 223 kstat_named_init(knp, *cp, KSTAT_DATA_UINT64);
224 224
225 225 knp++;
226 226 cp++;
227 227 nstat--;
228 228 }
229 229
230 230 kstat_install(xnbp->xnb_kstat_aux);
231 231
232 232 return (B_TRUE);
233 233 }
234 234
235 235 static void
236 236 xnb_ks_free(xnb_t *xnbp)
237 237 {
238 238 kstat_delete(xnbp->xnb_kstat_aux);
239 239 }
240 240
241 241 /*
242 242 * Calculate and insert the transport checksum for an arbitrary packet.
243 243 */
244 244 static mblk_t *
245 245 xnb_software_csum(xnb_t *xnbp, mblk_t *mp)
246 246 {
247 247 _NOTE(ARGUNUSED(xnbp));
248 248
249 249 /*
250 250 * XXPV dme: shouldn't rely on mac_fix_cksum(), not least
251 251 * because it doesn't cover all of the interesting cases :-(
252 252 */
253 253 mac_hcksum_set(mp, 0, 0, 0, 0, HCK_FULLCKSUM);
254 254
255 255 return (mac_fix_cksum(mp));
256 256 }
257 257
258 258 mblk_t *
259 259 xnb_process_cksum_flags(xnb_t *xnbp, mblk_t *mp, uint32_t capab)
260 260 {
261 261 struct ether_header *ehp;
262 262 uint16_t sap;
263 263 uint32_t offset;
264 264 ipha_t *ipha;
265 265
266 266 ASSERT(mp->b_next == NULL);
267 267
268 268 /*
269 269 * Check that the packet is contained in a single mblk. In
270 270 * the "from peer" path this is true today, but may change
271 271 * when scatter gather support is added. In the "to peer"
272 272 * path we cannot be sure, but in most cases it will be true
273 273 * (in the xnbo case the packet has come from a MAC device
274 274 * which is unlikely to split packets).
275 275 */
276 276 if (mp->b_cont != NULL)
277 277 goto software;
278 278
279 279 /*
280 280 * If the MAC has no hardware capability don't do any further
281 281 * checking.
282 282 */
283 283 if (capab == 0)
284 284 goto software;
285 285
286 286 ASSERT(MBLKL(mp) >= sizeof (struct ether_header));
287 287 ehp = (struct ether_header *)mp->b_rptr;
288 288
289 289 if (ntohs(ehp->ether_type) == VLAN_TPID) {
290 290 struct ether_vlan_header *evhp;
291 291
292 292 ASSERT(MBLKL(mp) >= sizeof (struct ether_vlan_header));
293 293 evhp = (struct ether_vlan_header *)mp->b_rptr;
294 294 sap = ntohs(evhp->ether_type);
295 295 offset = sizeof (struct ether_vlan_header);
296 296 } else {
297 297 sap = ntohs(ehp->ether_type);
298 298 offset = sizeof (struct ether_header);
299 299 }
300 300
301 301 /*
302 302 * We only attempt to do IPv4 packets in hardware.
303 303 */
304 304 if (sap != ETHERTYPE_IP)
305 305 goto software;
306 306
307 307 /*
308 308 * We know that this is an IPv4 packet.
309 309 */
310 310 ipha = (ipha_t *)(mp->b_rptr + offset);
311 311
312 312 switch (ipha->ipha_protocol) {
313 313 case IPPROTO_TCP:
314 314 case IPPROTO_UDP: {
315 315 uint32_t start, length, stuff, cksum;
316 316 uint16_t *stuffp;
317 317
318 318 /*
319 319 * This is a TCP/IPv4 or UDP/IPv4 packet, for which we
320 320 * can use full IPv4 and partial checksum offload.
321 321 */
322 322 if ((capab & (HCKSUM_INET_FULL_V4|HCKSUM_INET_PARTIAL)) == 0)
323 323 break;
324 324
325 325 start = IP_SIMPLE_HDR_LENGTH;
326 326 length = ntohs(ipha->ipha_length);
327 327 if (ipha->ipha_protocol == IPPROTO_TCP) {
328 328 stuff = start + TCP_CHECKSUM_OFFSET;
329 329 cksum = IP_TCP_CSUM_COMP;
330 330 } else {
331 331 stuff = start + UDP_CHECKSUM_OFFSET;
332 332 cksum = IP_UDP_CSUM_COMP;
333 333 }
334 334 stuffp = (uint16_t *)(mp->b_rptr + offset + stuff);
335 335
336 336 if (capab & HCKSUM_INET_FULL_V4) {
337 337 /*
338 338 * Some devices require that the checksum
339 339 * field of the packet is zero for full
340 340 * offload.
341 341 */
342 342 *stuffp = 0;
343 343
344 344 mac_hcksum_set(mp, 0, 0, 0, 0, HCK_FULLCKSUM);
345 345
346 346 xnbp->xnb_stat_csum_hardware++;
347 347
348 348 return (mp);
349 349 }
350 350
351 351 if (capab & HCKSUM_INET_PARTIAL) {
352 352 if (*stuffp == 0) {
353 353 ipaddr_t src, dst;
354 354
355 355 /*
356 356 * Older Solaris guests don't insert
357 357 * the pseudo-header checksum, so we
358 358 * calculate it here.
359 359 */
360 360 src = ipha->ipha_src;
361 361 dst = ipha->ipha_dst;
362 362
363 363 cksum += (dst >> 16) + (dst & 0xFFFF);
364 364 cksum += (src >> 16) + (src & 0xFFFF);
365 365 cksum += length - IP_SIMPLE_HDR_LENGTH;
366 366
367 367 cksum = (cksum >> 16) + (cksum & 0xFFFF);
368 368 cksum = (cksum >> 16) + (cksum & 0xFFFF);
369 369
370 370 ASSERT(cksum <= 0xFFFF);
371 371
372 372 *stuffp = (uint16_t)(cksum ? cksum : ~cksum);
373 373 }
374 374
375 375 mac_hcksum_set(mp, start, stuff, length, 0,
376 376 HCK_PARTIALCKSUM);
377 377
378 378 xnbp->xnb_stat_csum_hardware++;
379 379
380 380 return (mp);
381 381 }
382 382
383 383 /* NOTREACHED */
384 384 break;
385 385 }
386 386
387 387 default:
388 388 /* Use software. */
389 389 break;
390 390 }
391 391
392 392 software:
393 393 /*
394 394 * We are not able to use any offload so do the whole thing in
395 395 * software.
396 396 */
397 397 xnbp->xnb_stat_csum_software++;
398 398
399 399 return (xnb_software_csum(xnbp, mp));
400 400 }
401 401
402 402 int
403 403 xnb_attach(dev_info_t *dip, xnb_flavour_t *flavour, void *flavour_data)
404 404 {
405 405 xnb_t *xnbp;
406 406 char *xsname;
407 407 char cachename[32];
408 408
409 409 xnbp = kmem_zalloc(sizeof (*xnbp), KM_SLEEP);
410 410
411 411 xnbp->xnb_flavour = flavour;
412 412 xnbp->xnb_flavour_data = flavour_data;
413 413 xnbp->xnb_devinfo = dip;
414 414 xnbp->xnb_evtchn = INVALID_EVTCHN;
415 415 xnbp->xnb_irq = B_FALSE;
416 416 xnbp->xnb_tx_ring_handle = INVALID_GRANT_HANDLE;
417 417 xnbp->xnb_rx_ring_handle = INVALID_GRANT_HANDLE;
418 418 xnbp->xnb_connected = B_FALSE;
419 419 xnbp->xnb_hotplugged = B_FALSE;
420 420 xnbp->xnb_detachable = B_FALSE;
421 421 xnbp->xnb_peer = xvdi_get_oeid(dip);
422 422 xnbp->xnb_be_status = XNB_STATE_INIT;
423 423 xnbp->xnb_fe_status = XNB_STATE_INIT;
424 424
425 425 xnbp->xnb_tx_buf_count = 0;
426 426
427 427 xnbp->xnb_rx_hv_copy = B_FALSE;
428 428 xnbp->xnb_multicast_control = B_FALSE;
429 429
430 430 xnbp->xnb_rx_va = vmem_alloc(heap_arena, PAGESIZE, VM_SLEEP);
431 431 ASSERT(xnbp->xnb_rx_va != NULL);
432 432
433 433 if (ddi_get_iblock_cookie(dip, 0, &xnbp->xnb_icookie)
434 434 != DDI_SUCCESS)
435 435 goto failure;
436 436
437 437 /* Allocated on demand, when/if we enter xnb_copy_to_peer(). */
438 438 xnbp->xnb_rx_cpop = NULL;
439 439 xnbp->xnb_rx_cpop_count = 0;
440 440
441 441 mutex_init(&xnbp->xnb_tx_lock, NULL, MUTEX_DRIVER,
442 442 xnbp->xnb_icookie);
443 443 mutex_init(&xnbp->xnb_rx_lock, NULL, MUTEX_DRIVER,
444 444 xnbp->xnb_icookie);
445 445 mutex_init(&xnbp->xnb_state_lock, NULL, MUTEX_DRIVER,
446 446 xnbp->xnb_icookie);
447 447
448 448 /* Set driver private pointer now. */
449 449 ddi_set_driver_private(dip, xnbp);
450 450
451 451 (void) sprintf(cachename, "xnb_tx_buf_cache_%d", ddi_get_instance(dip));
452 452 xnbp->xnb_tx_buf_cache = kmem_cache_create(cachename,
453 453 sizeof (xnb_txbuf_t), 0,
454 454 xnb_txbuf_constructor, xnb_txbuf_destructor,
455 455 NULL, xnbp, NULL, 0);
456 456 if (xnbp->xnb_tx_buf_cache == NULL)
457 457 goto failure_0;
458 458
459 459 if (!xnb_ks_init(xnbp))
460 460 goto failure_1;
461 461
462 462 /*
463 463 * Receive notification of changes in the state of the
464 464 * driver in the guest domain.
465 465 */
466 466 if (xvdi_add_event_handler(dip, XS_OE_STATE, xnb_oe_state_change,
467 467 NULL) != DDI_SUCCESS)
468 468 goto failure_2;
469 469
470 470 /*
471 471 * Receive notification of hotplug events.
472 472 */
473 473 if (xvdi_add_event_handler(dip, XS_HP_STATE, xnb_hp_state_change,
474 474 NULL) != DDI_SUCCESS)
475 475 goto failure_2;
476 476
477 477 xsname = xvdi_get_xsname(dip);
478 478
479 479 if (xenbus_printf(XBT_NULL, xsname,
480 480 "feature-multicast-control", "%d",
481 481 xnb_multicast_control ? 1 : 0) != 0)
482 482 goto failure_3;
483 483
484 484 if (xenbus_printf(XBT_NULL, xsname,
485 485 "feature-rx-copy", "%d", 1) != 0)
486 486 goto failure_3;
487 487 /*
488 488 * Linux domUs seem to depend on "feature-rx-flip" being 0
489 489 * in addition to "feature-rx-copy" being 1. It seems strange
490 490 * to use four possible states to describe a binary decision,
491 491 * but we might as well play nice.
492 492 */
493 493 if (xenbus_printf(XBT_NULL, xsname,
494 494 "feature-rx-flip", "%d", 0) != 0)
495 495 goto failure_3;
496 496
497 497 (void) xvdi_switch_state(dip, XBT_NULL, XenbusStateInitWait);
498 498 (void) xvdi_post_event(dip, XEN_HP_ADD);
499 499
500 500 return (DDI_SUCCESS);
501 501
502 502 failure_3:
503 503 xvdi_remove_event_handler(dip, NULL);
504 504
505 505 failure_2:
506 506 xnb_ks_free(xnbp);
507 507
508 508 failure_1:
509 509 kmem_cache_destroy(xnbp->xnb_tx_buf_cache);
510 510
511 511 failure_0:
512 512 mutex_destroy(&xnbp->xnb_state_lock);
513 513 mutex_destroy(&xnbp->xnb_rx_lock);
514 514 mutex_destroy(&xnbp->xnb_tx_lock);
515 515
516 516 failure:
517 517 vmem_free(heap_arena, xnbp->xnb_rx_va, PAGESIZE);
518 518 kmem_free(xnbp, sizeof (*xnbp));
519 519 return (DDI_FAILURE);
520 520 }
521 521
522 522 void
523 523 xnb_detach(dev_info_t *dip)
524 524 {
525 525 xnb_t *xnbp = ddi_get_driver_private(dip);
526 526
527 527 ASSERT(xnbp != NULL);
528 528 ASSERT(!xnbp->xnb_connected);
529 529 ASSERT(xnbp->xnb_tx_buf_count == 0);
530 530
531 531 xnb_disconnect_rings(dip);
532 532
533 533 xvdi_remove_event_handler(dip, NULL);
534 534
535 535 xnb_ks_free(xnbp);
536 536
537 537 kmem_cache_destroy(xnbp->xnb_tx_buf_cache);
538 538
539 539 ddi_set_driver_private(dip, NULL);
540 540
541 541 mutex_destroy(&xnbp->xnb_state_lock);
542 542 mutex_destroy(&xnbp->xnb_rx_lock);
543 543 mutex_destroy(&xnbp->xnb_tx_lock);
544 544
545 545 if (xnbp->xnb_rx_cpop_count > 0)
546 546 kmem_free(xnbp->xnb_rx_cpop, sizeof (xnbp->xnb_rx_cpop[0])
547 547 * xnbp->xnb_rx_cpop_count);
548 548
549 549 ASSERT(xnbp->xnb_rx_va != NULL);
550 550 vmem_free(heap_arena, xnbp->xnb_rx_va, PAGESIZE);
551 551
552 552 kmem_free(xnbp, sizeof (*xnbp));
553 553 }
554 554
555 555 /*
556 556 * Allocate a page from the hypervisor to be flipped to the peer.
557 557 *
558 558 * Try to get pages in batches to reduce the overhead of calls into
559 559 * the balloon driver.
560 560 */
561 561 static mfn_t
562 562 xnb_alloc_page(xnb_t *xnbp)
563 563 {
564 564 #define WARNING_RATE_LIMIT 100
565 565 #define BATCH_SIZE 256
566 566 static mfn_t mfns[BATCH_SIZE]; /* common across all instances */
567 567 static int nth = BATCH_SIZE;
568 568 mfn_t mfn;
569 569
570 570 mutex_enter(&xnb_alloc_page_lock);
571 571 if (nth == BATCH_SIZE) {
572 572 if (balloon_alloc_pages(BATCH_SIZE, mfns) != BATCH_SIZE) {
573 573 xnbp->xnb_stat_allocation_failure++;
574 574 mutex_exit(&xnb_alloc_page_lock);
575 575
576 576 /*
577 577 * Try for a single page in low memory situations.
578 578 */
579 579 if (balloon_alloc_pages(1, &mfn) != 1) {
580 580 if ((xnbp->xnb_stat_small_allocation_failure++
581 581 % WARNING_RATE_LIMIT) == 0)
582 582 cmn_err(CE_WARN, "xnb_alloc_page: "
583 583 "Cannot allocate memory to "
584 584 "transfer packets to peer.");
585 585 return (0);
586 586 } else {
587 587 xnbp->xnb_stat_small_allocation_success++;
588 588 return (mfn);
589 589 }
590 590 }
591 591
592 592 nth = 0;
593 593 xnbp->xnb_stat_allocation_success++;
594 594 }
595 595
596 596 mfn = mfns[nth++];
597 597 mutex_exit(&xnb_alloc_page_lock);
598 598
599 599 ASSERT(mfn != 0);
600 600
601 601 return (mfn);
602 602 #undef BATCH_SIZE
603 603 #undef WARNING_RATE_LIMIT
604 604 }
605 605
606 606 /*
607 607 * Free a page back to the hypervisor.
608 608 *
609 609 * This happens only in the error path, so batching is not worth the
610 610 * complication.
611 611 */
612 612 static void
613 613 xnb_free_page(xnb_t *xnbp, mfn_t mfn)
614 614 {
615 615 _NOTE(ARGUNUSED(xnbp));
616 616 int r;
617 617 pfn_t pfn;
618 618
619 619 pfn = xen_assign_pfn(mfn);
620 620 pfnzero(pfn, 0, PAGESIZE);
621 621 xen_release_pfn(pfn);
622 622
623 623 if ((r = balloon_free_pages(1, &mfn, NULL, NULL)) != 1) {
624 624 cmn_err(CE_WARN, "free_page: cannot decrease memory "
625 625 "reservation (%d): page kept but unusable (mfn = 0x%lx).",
626 626 r, mfn);
627 627 }
628 628 }
629 629
630 630 /*
631 631 * Similar to RING_HAS_UNCONSUMED_REQUESTS(&xnbp->rx_ring) but using
632 632 * local variables. Used in both xnb_to_peer() and xnb_copy_to_peer().
633 633 */
634 634 #define XNB_RING_HAS_UNCONSUMED_REQUESTS(_r) \
635 635 ((((_r)->sring->req_prod - loop) < \
636 636 (RING_SIZE(_r) - (loop - prod))) ? \
637 637 ((_r)->sring->req_prod - loop) : \
638 638 (RING_SIZE(_r) - (loop - prod)))
639 639
640 640 /*
641 641 * Pass packets to the peer using page flipping.
642 642 */
643 643 mblk_t *
644 644 xnb_to_peer(xnb_t *xnbp, mblk_t *mp)
645 645 {
646 646 mblk_t *free = mp, *prev = NULL;
647 647 size_t len;
648 648 gnttab_transfer_t *gop;
649 649 boolean_t notify;
650 650 RING_IDX loop, prod, end;
651 651
652 652 /*
653 653 * For each packet the sequence of operations is:
654 654 *
655 655 * 1. get a new page from the hypervisor.
656 656 * 2. get a request slot from the ring.
657 657 * 3. copy the data into the new page.
658 658 * 4. transfer the page to the peer.
659 659 * 5. update the request slot.
660 660 * 6. kick the peer.
661 661 * 7. free mp.
662 662 *
663 663 * In order to reduce the number of hypercalls, we prepare
664 664 * several packets for the peer and perform a single hypercall
665 665 * to transfer them.
666 666 */
667 667
668 668 mutex_enter(&xnbp->xnb_rx_lock);
669 669
670 670 /*
671 671 * If we are not connected to the peer or have not yet
672 672 * finished hotplug it is too early to pass packets to the
673 673 * peer.
674 674 */
675 675 if (!(xnbp->xnb_connected && xnbp->xnb_hotplugged)) {
676 676 mutex_exit(&xnbp->xnb_rx_lock);
677 677 DTRACE_PROBE(flip_rx_too_early);
678 678 xnbp->xnb_stat_rx_too_early++;
679 679 return (mp);
680 680 }
681 681
682 682 loop = xnbp->xnb_rx_ring.req_cons;
683 683 prod = xnbp->xnb_rx_ring.rsp_prod_pvt;
684 684 gop = xnbp->xnb_rx_top;
685 685
686 686 while ((mp != NULL) &&
687 687 XNB_RING_HAS_UNCONSUMED_REQUESTS(&xnbp->xnb_rx_ring)) {
688 688
689 689 mfn_t mfn;
690 690 pfn_t pfn;
691 691 netif_rx_request_t *rxreq;
692 692 netif_rx_response_t *rxresp;
693 693 char *valoop;
694 694 mblk_t *ml;
695 695 uint16_t cksum_flags;
696 696
697 697 /* 1 */
698 698 if ((mfn = xnb_alloc_page(xnbp)) == 0) {
699 699 xnbp->xnb_stat_rx_defer++;
700 700 break;
701 701 }
702 702
703 703 /* 2 */
704 704 rxreq = RING_GET_REQUEST(&xnbp->xnb_rx_ring, loop);
705 705
706 706 #ifdef XNB_DEBUG
707 707 if (!(rxreq->id < NET_RX_RING_SIZE))
708 708 cmn_err(CE_PANIC, "xnb_to_peer: "
709 709 "id %d out of range in request 0x%p",
710 710 rxreq->id, (void *)rxreq);
711 711 #endif /* XNB_DEBUG */
712 712
713 713 /* Assign a pfn and map the new page at the allocated va. */
714 714 pfn = xen_assign_pfn(mfn);
715 715 hat_devload(kas.a_hat, xnbp->xnb_rx_va, PAGESIZE,
716 716 pfn, PROT_READ | PROT_WRITE, HAT_LOAD);
717 717
718 718 /* 3 */
719 719 len = 0;
720 720 valoop = xnbp->xnb_rx_va;
721 721 for (ml = mp; ml != NULL; ml = ml->b_cont) {
722 722 size_t chunk = ml->b_wptr - ml->b_rptr;
723 723
724 724 bcopy(ml->b_rptr, valoop, chunk);
725 725 valoop += chunk;
726 726 len += chunk;
727 727 }
728 728
729 729 ASSERT(len < PAGESIZE);
730 730
731 731 /* Release the pfn. */
732 732 hat_unload(kas.a_hat, xnbp->xnb_rx_va, PAGESIZE,
733 733 HAT_UNLOAD_UNMAP);
734 734 xen_release_pfn(pfn);
735 735
736 736 /* 4 */
737 737 gop->mfn = mfn;
738 738 gop->domid = xnbp->xnb_peer;
739 739 gop->ref = rxreq->gref;
740 740
741 741 /* 5.1 */
742 742 rxresp = RING_GET_RESPONSE(&xnbp->xnb_rx_ring, prod);
743 743 rxresp->offset = 0;
744 744 rxresp->flags = 0;
745 745
746 746 cksum_flags = xnbp->xnb_flavour->xf_cksum_to_peer(xnbp, mp);
747 747 if (cksum_flags != 0)
748 748 xnbp->xnb_stat_rx_cksum_deferred++;
749 749 rxresp->flags |= cksum_flags;
750 750
751 751 rxresp->id = RING_GET_REQUEST(&xnbp->xnb_rx_ring, prod)->id;
752 752 rxresp->status = len;
753 753
754 754 loop++;
755 755 prod++;
756 756 gop++;
757 757 prev = mp;
758 758 mp = mp->b_next;
759 759 }
760 760
761 761 /*
762 762 * Did we actually do anything?
763 763 */
764 764 if (loop == xnbp->xnb_rx_ring.req_cons) {
765 765 mutex_exit(&xnbp->xnb_rx_lock);
766 766 return (mp);
767 767 }
768 768
769 769 end = loop;
770 770
771 771 /*
772 772 * Unlink the end of the 'done' list from the remainder.
773 773 */
774 774 ASSERT(prev != NULL);
775 775 prev->b_next = NULL;
776 776
777 777 if (HYPERVISOR_grant_table_op(GNTTABOP_transfer, xnbp->xnb_rx_top,
778 778 loop - xnbp->xnb_rx_ring.req_cons) != 0) {
779 779 cmn_err(CE_WARN, "xnb_to_peer: transfer operation failed");
780 780 }
781 781
782 782 loop = xnbp->xnb_rx_ring.req_cons;
783 783 prod = xnbp->xnb_rx_ring.rsp_prod_pvt;
784 784 gop = xnbp->xnb_rx_top;
785 785
786 786 while (loop < end) {
787 787 int16_t status = NETIF_RSP_OKAY;
788 788
789 789 if (gop->status != 0) {
790 790 status = NETIF_RSP_ERROR;
791 791
792 792 /*
793 793 * If the status is anything other than
794 794 * GNTST_bad_page then we don't own the page
795 795 * any more, so don't try to give it back.
796 796 */
797 797 if (gop->status != GNTST_bad_page)
798 798 gop->mfn = 0;
799 799 } else {
800 800 /* The page is no longer ours. */
801 801 gop->mfn = 0;
802 802 }
803 803
804 804 if (gop->mfn != 0)
805 805 /*
806 806 * Give back the page, as we won't be using
807 807 * it.
808 808 */
809 809 xnb_free_page(xnbp, gop->mfn);
810 810 else
811 811 /*
812 812 * We gave away a page, update our accounting
813 813 * now.
814 814 */
815 815 balloon_drv_subtracted(1);
816 816
817 817 /* 5.2 */
818 818 if (status != NETIF_RSP_OKAY) {
819 819 RING_GET_RESPONSE(&xnbp->xnb_rx_ring, prod)->status =
820 820 status;
821 821 } else {
822 822 xnbp->xnb_stat_ipackets++;
823 823 xnbp->xnb_stat_rbytes += len;
824 824 }
825 825
826 826 loop++;
827 827 prod++;
828 828 gop++;
829 829 }
830 830
831 831 xnbp->xnb_rx_ring.req_cons = loop;
832 832 xnbp->xnb_rx_ring.rsp_prod_pvt = prod;
833 833
834 834 /* 6 */
835 835 /* LINTED: constant in conditional context */
836 836 RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&xnbp->xnb_rx_ring, notify);
837 837 if (notify) {
838 838 ec_notify_via_evtchn(xnbp->xnb_evtchn);
839 839 xnbp->xnb_stat_rx_notify_sent++;
840 840 } else {
841 841 xnbp->xnb_stat_rx_notify_deferred++;
842 842 }
843 843
844 844 if (mp != NULL)
845 845 xnbp->xnb_stat_rx_defer++;
846 846
847 847 mutex_exit(&xnbp->xnb_rx_lock);
848 848
849 849 /* Free mblk_t's that we consumed. */
850 850 freemsgchain(free);
851 851
852 852 return (mp);
853 853 }
854 854
855 855 /* Helper functions for xnb_copy_to_peer(). */
856 856
857 857 /*
858 858 * Grow the array of copy operation descriptors.
859 859 */
860 860 static boolean_t
861 861 grow_cpop_area(xnb_t *xnbp)
862 862 {
863 863 size_t count;
864 864 gnttab_copy_t *new;
865 865
866 866 ASSERT(MUTEX_HELD(&xnbp->xnb_rx_lock));
867 867
868 868 count = xnbp->xnb_rx_cpop_count + CPOP_DEFCNT;
869 869
870 870 if ((new = kmem_alloc(sizeof (new[0]) * count, KM_NOSLEEP)) == NULL) {
871 871 xnbp->xnb_stat_other_allocation_failure++;
872 872 return (B_FALSE);
873 873 }
874 874
875 875 bcopy(xnbp->xnb_rx_cpop, new,
876 876 sizeof (xnbp->xnb_rx_cpop[0]) * xnbp->xnb_rx_cpop_count);
877 877
878 878 kmem_free(xnbp->xnb_rx_cpop,
879 879 sizeof (xnbp->xnb_rx_cpop[0]) * xnbp->xnb_rx_cpop_count);
880 880
881 881 xnbp->xnb_rx_cpop = new;
882 882 xnbp->xnb_rx_cpop_count = count;
883 883
884 884 xnbp->xnb_stat_rx_cpoparea_grown++;
885 885
886 886 return (B_TRUE);
887 887 }
888 888
889 889 /*
890 890 * Check whether an address is on a page that's foreign to this domain.
891 891 */
892 892 static boolean_t
893 893 is_foreign(void *addr)
894 894 {
895 895 pfn_t pfn = hat_getpfnum(kas.a_hat, addr);
896 896
897 897 return ((pfn & PFN_IS_FOREIGN_MFN) == PFN_IS_FOREIGN_MFN);
898 898 }
899 899
900 900 /*
901 901 * Insert a newly allocated mblk into a chain, replacing the old one.
902 902 */
903 903 static mblk_t *
904 904 replace_msg(mblk_t *mp, size_t len, mblk_t *mp_prev, mblk_t *ml_prev)
905 905 {
906 906 uint32_t start, stuff, end, value, flags;
907 907 mblk_t *new_mp;
908 908
909 909 new_mp = copyb(mp);
910 910 if (new_mp == NULL) {
911 911 cmn_err(CE_PANIC, "replace_msg: cannot alloc new message"
912 912 "for %p, len %lu", (void *) mp, len);
913 913 }
914 914
915 915 mac_hcksum_get(mp, &start, &stuff, &end, &value, &flags);
916 916 mac_hcksum_set(new_mp, start, stuff, end, value, flags);
917 917
918 918 new_mp->b_next = mp->b_next;
919 919 new_mp->b_prev = mp->b_prev;
920 920 new_mp->b_cont = mp->b_cont;
921 921
922 922 /* Make sure we only overwrite pointers to the mblk being replaced. */
923 923 if (mp_prev != NULL && mp_prev->b_next == mp)
924 924 mp_prev->b_next = new_mp;
925 925
926 926 if (ml_prev != NULL && ml_prev->b_cont == mp)
927 927 ml_prev->b_cont = new_mp;
928 928
929 929 mp->b_next = mp->b_prev = mp->b_cont = NULL;
930 930 freemsg(mp);
931 931
932 932 return (new_mp);
933 933 }
934 934
935 935 /*
936 936 * Set all the fields in a gnttab_copy_t.
937 937 */
938 938 static void
939 939 setup_gop(xnb_t *xnbp, gnttab_copy_t *gp, uchar_t *rptr,
940 940 size_t s_off, size_t d_off, size_t len, grant_ref_t d_ref)
941 941 {
942 942 ASSERT(xnbp != NULL && gp != NULL);
943 943
944 944 gp->source.offset = s_off;
945 945 gp->source.u.gmfn = pfn_to_mfn(hat_getpfnum(kas.a_hat, (caddr_t)rptr));
946 946 gp->source.domid = DOMID_SELF;
947 947
948 948 gp->len = (uint16_t)len;
949 949 gp->flags = GNTCOPY_dest_gref;
950 950 gp->status = 0;
951 951
952 952 gp->dest.u.ref = d_ref;
953 953 gp->dest.offset = d_off;
954 954 gp->dest.domid = xnbp->xnb_peer;
955 955 }
956 956
957 957 /*
958 958 * Pass packets to the peer using hypervisor copy operations.
959 959 */
960 960 mblk_t *
961 961 xnb_copy_to_peer(xnb_t *xnbp, mblk_t *mp)
962 962 {
963 963 mblk_t *free = mp, *mp_prev = NULL, *saved_mp = mp;
964 964 mblk_t *ml, *ml_prev;
965 965 boolean_t notify;
966 966 RING_IDX loop, prod;
967 967 int i;
968 968
969 969 /*
970 970 * If the peer does not pre-post buffers for received packets,
971 971 * use page flipping to pass packets to it.
972 972 */
973 973 if (!xnbp->xnb_rx_hv_copy)
974 974 return (xnb_to_peer(xnbp, mp));
975 975
976 976 /*
977 977 * For each packet the sequence of operations is:
978 978 *
979 979 * 1. get a request slot from the ring.
980 980 * 2. set up data for hypercall (see NOTE below)
981 981 * 3. have the hypervisore copy the data
982 982 * 4. update the request slot.
983 983 * 5. kick the peer.
984 984 *
985 985 * NOTE ad 2.
986 986 * In order to reduce the number of hypercalls, we prepare
987 987 * several mblks (mp->b_cont != NULL) for the peer and
988 988 * perform a single hypercall to transfer them. We also have
989 989 * to set up a seperate copy operation for every page.
990 990 *
991 991 * If we have more than one packet (mp->b_next != NULL), we do
992 992 * this whole dance repeatedly.
993 993 */
994 994
995 995 mutex_enter(&xnbp->xnb_rx_lock);
996 996
997 997 if (!(xnbp->xnb_connected && xnbp->xnb_hotplugged)) {
998 998 mutex_exit(&xnbp->xnb_rx_lock);
999 999 DTRACE_PROBE(copy_rx_too_early);
1000 1000 xnbp->xnb_stat_rx_too_early++;
1001 1001 return (mp);
1002 1002 }
1003 1003
1004 1004 loop = xnbp->xnb_rx_ring.req_cons;
1005 1005 prod = xnbp->xnb_rx_ring.rsp_prod_pvt;
1006 1006
1007 1007 while ((mp != NULL) &&
1008 1008 XNB_RING_HAS_UNCONSUMED_REQUESTS(&xnbp->xnb_rx_ring)) {
1009 1009 netif_rx_request_t *rxreq;
1010 1010 size_t d_offset, len;
1011 1011 int item_count;
1012 1012 gnttab_copy_t *gop_cp;
1013 1013 netif_rx_response_t *rxresp;
1014 1014 uint16_t cksum_flags;
1015 1015 int16_t status = NETIF_RSP_OKAY;
1016 1016
1017 1017 /* 1 */
1018 1018 rxreq = RING_GET_REQUEST(&xnbp->xnb_rx_ring, loop);
1019 1019
1020 1020 #ifdef XNB_DEBUG
1021 1021 if (!(rxreq->id < NET_RX_RING_SIZE))
1022 1022 cmn_err(CE_PANIC, "xnb_copy_to_peer: "
1023 1023 "id %d out of range in request 0x%p",
1024 1024 rxreq->id, (void *)rxreq);
1025 1025 #endif /* XNB_DEBUG */
1026 1026
1027 1027 /* 2 */
1028 1028 d_offset = 0;
1029 1029 len = 0;
1030 1030 item_count = 0;
1031 1031
1032 1032 gop_cp = xnbp->xnb_rx_cpop;
1033 1033
1034 1034 /*
1035 1035 * We walk the b_cont pointers and set up a
1036 1036 * gnttab_copy_t for each sub-page chunk in each data
1037 1037 * block.
1038 1038 */
1039 1039 /* 2a */
1040 1040 for (ml = mp, ml_prev = NULL; ml != NULL; ml = ml->b_cont) {
1041 1041 size_t chunk = ml->b_wptr - ml->b_rptr;
1042 1042 uchar_t *r_tmp, *rpt_align;
1043 1043 size_t r_offset;
1044 1044
1045 1045 /*
1046 1046 * The hypervisor will not allow us to
1047 1047 * reference a foreign page (e.g. one
1048 1048 * belonging to another domain) by mfn in the
1049 1049 * copy operation. If the data in this mblk is
1050 1050 * on such a page we must copy the data into a
1051 1051 * local page before initiating the hypervisor
1052 1052 * copy operation.
1053 1053 */
1054 1054 if (is_foreign(ml->b_rptr) || is_foreign(ml->b_wptr)) {
1055 1055 mblk_t *ml_new = replace_msg(ml, chunk,
1056 1056 mp_prev, ml_prev);
1057 1057
1058 1058 /* We can still use old ml, but not *ml! */
1059 1059 if (free == ml)
1060 1060 free = ml_new;
1061 1061 if (mp == ml)
1062 1062 mp = ml_new;
1063 1063 ml = ml_new;
1064 1064
1065 1065 xnbp->xnb_stat_rx_foreign_page++;
1066 1066 }
1067 1067
1068 1068 rpt_align = (uchar_t *)ALIGN2PAGE(ml->b_rptr);
1069 1069 r_offset = (uint16_t)(ml->b_rptr - rpt_align);
1070 1070 r_tmp = ml->b_rptr;
1071 1071
1072 1072 if (d_offset + chunk > PAGESIZE)
1073 1073 cmn_err(CE_PANIC, "xnb_copy_to_peer: mp %p "
1074 1074 "(svd: %p), ml %p,rpt_alg. %p, d_offset "
1075 1075 "(%lu) + chunk (%lu) > PAGESIZE %d!",
1076 1076 (void *)mp, (void *)saved_mp, (void *)ml,
1077 1077 (void *)rpt_align,
1078 1078 d_offset, chunk, (int)PAGESIZE);
1079 1079
1080 1080 while (chunk > 0) {
1081 1081 size_t part_len;
1082 1082
1083 1083 if (item_count == xnbp->xnb_rx_cpop_count) {
1084 1084 if (!grow_cpop_area(xnbp))
1085 1085 goto failure;
1086 1086 gop_cp = &xnbp->xnb_rx_cpop[item_count];
1087 1087 }
1088 1088 /*
1089 1089 * If our mblk crosses a page boundary, we need
1090 1090 * to do a seperate copy for each page.
1091 1091 */
1092 1092 if (r_offset + chunk > PAGESIZE) {
1093 1093 part_len = PAGESIZE - r_offset;
1094 1094
1095 1095 DTRACE_PROBE3(mblk_page_crossed,
1096 1096 (mblk_t *), ml, int, chunk, int,
1097 1097 (int)r_offset);
1098 1098
1099 1099 xnbp->xnb_stat_rx_pagebndry_crossed++;
1100 1100 } else {
1101 1101 part_len = chunk;
1102 1102 }
1103 1103
1104 1104 setup_gop(xnbp, gop_cp, r_tmp, r_offset,
1105 1105 d_offset, part_len, rxreq->gref);
1106 1106
1107 1107 chunk -= part_len;
1108 1108
1109 1109 len += part_len;
1110 1110 d_offset += part_len;
1111 1111 r_tmp += part_len;
1112 1112 /*
1113 1113 * The 2nd, 3rd ... last copies will always
1114 1114 * start at r_tmp, therefore r_offset is 0.
1115 1115 */
1116 1116 r_offset = 0;
1117 1117 gop_cp++;
1118 1118 item_count++;
1119 1119 }
1120 1120 ml_prev = ml;
1121 1121
1122 1122 DTRACE_PROBE4(mblk_loop_end, (mblk_t *), ml, int,
1123 1123 chunk, int, len, int, item_count);
1124 1124 }
1125 1125 /* 3 */
1126 1126 if (HYPERVISOR_grant_table_op(GNTTABOP_copy, xnbp->xnb_rx_cpop,
1127 1127 item_count) != 0) {
1128 1128 cmn_err(CE_WARN, "xnb_copy_to_peer: copy op. failed");
1129 1129 DTRACE_PROBE(HV_granttableopfailed);
1130 1130 }
1131 1131
1132 1132 /* 4 */
1133 1133 rxresp = RING_GET_RESPONSE(&xnbp->xnb_rx_ring, prod);
1134 1134 rxresp->offset = 0;
1135 1135
1136 1136 rxresp->flags = 0;
1137 1137
1138 1138 DTRACE_PROBE4(got_RX_rsp, int, (int)rxresp->id, int,
1139 1139 (int)rxresp->offset, int, (int)rxresp->flags, int,
1140 1140 (int)rxresp->status);
1141 1141
1142 1142 cksum_flags = xnbp->xnb_flavour->xf_cksum_to_peer(xnbp, mp);
1143 1143 if (cksum_flags != 0)
1144 1144 xnbp->xnb_stat_rx_cksum_deferred++;
1145 1145 rxresp->flags |= cksum_flags;
1146 1146
1147 1147 rxresp->id = RING_GET_REQUEST(&xnbp->xnb_rx_ring, prod)->id;
1148 1148 rxresp->status = len;
1149 1149
1150 1150 DTRACE_PROBE4(RX_rsp_set, int, (int)rxresp->id, int,
1151 1151 (int)rxresp->offset, int, (int)rxresp->flags, int,
1152 1152 (int)rxresp->status);
1153 1153
1154 1154 for (i = 0; i < item_count; i++) {
1155 1155 if (xnbp->xnb_rx_cpop[i].status != 0) {
1156 1156 DTRACE_PROBE2(cpop_status_nonnull, int,
1157 1157 (int)xnbp->xnb_rx_cpop[i].status,
1158 1158 int, i);
1159 1159 status = NETIF_RSP_ERROR;
1160 1160 }
1161 1161 }
1162 1162
1163 1163 /* 5.2 */
1164 1164 if (status != NETIF_RSP_OKAY) {
1165 1165 RING_GET_RESPONSE(&xnbp->xnb_rx_ring, prod)->status =
1166 1166 status;
1167 1167 xnbp->xnb_stat_rx_rsp_notok++;
1168 1168 } else {
1169 1169 xnbp->xnb_stat_ipackets++;
1170 1170 xnbp->xnb_stat_rbytes += len;
1171 1171 }
1172 1172
1173 1173 loop++;
1174 1174 prod++;
1175 1175 mp_prev = mp;
1176 1176 mp = mp->b_next;
1177 1177 }
1178 1178 failure:
1179 1179 /*
1180 1180 * Did we actually do anything?
1181 1181 */
1182 1182 if (loop == xnbp->xnb_rx_ring.req_cons) {
1183 1183 mutex_exit(&xnbp->xnb_rx_lock);
1184 1184 return (mp);
1185 1185 }
1186 1186
1187 1187 /*
1188 1188 * Unlink the end of the 'done' list from the remainder.
1189 1189 */
1190 1190 ASSERT(mp_prev != NULL);
1191 1191 mp_prev->b_next = NULL;
1192 1192
1193 1193 xnbp->xnb_rx_ring.req_cons = loop;
1194 1194 xnbp->xnb_rx_ring.rsp_prod_pvt = prod;
1195 1195
1196 1196 /* 6 */
1197 1197 /* LINTED: constant in conditional context */
1198 1198 RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&xnbp->xnb_rx_ring, notify);
1199 1199 if (notify) {
1200 1200 ec_notify_via_evtchn(xnbp->xnb_evtchn);
1201 1201 xnbp->xnb_stat_rx_notify_sent++;
1202 1202 } else {
1203 1203 xnbp->xnb_stat_rx_notify_deferred++;
1204 1204 }
1205 1205
1206 1206 if (mp != NULL)
1207 1207 xnbp->xnb_stat_rx_defer++;
1208 1208
1209 1209 mutex_exit(&xnbp->xnb_rx_lock);
1210 1210
1211 1211 /* Free mblk_t structs we have consumed. */
1212 1212 freemsgchain(free);
1213 1213
1214 1214 return (mp);
1215 1215 }
1216 1216
1217 1217
1218 1218 static void
1219 1219 xnb_tx_notify_peer(xnb_t *xnbp, boolean_t force)
1220 1220 {
1221 1221 boolean_t notify;
1222 1222
1223 1223 ASSERT(MUTEX_HELD(&xnbp->xnb_tx_lock));
1224 1224
1225 1225 /* LINTED: constant in conditional context */
1226 1226 RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&xnbp->xnb_tx_ring, notify);
1227 1227 if (notify || force) {
1228 1228 ec_notify_via_evtchn(xnbp->xnb_evtchn);
1229 1229 xnbp->xnb_stat_tx_notify_sent++;
1230 1230 } else {
1231 1231 xnbp->xnb_stat_tx_notify_deferred++;
1232 1232 }
1233 1233 }
1234 1234
1235 1235 static void
1236 1236 xnb_tx_mark_complete(xnb_t *xnbp, RING_IDX id, int16_t status)
1237 1237 {
1238 1238 RING_IDX i;
1239 1239 netif_tx_response_t *txresp;
1240 1240
1241 1241 ASSERT(MUTEX_HELD(&xnbp->xnb_tx_lock));
1242 1242
1243 1243 i = xnbp->xnb_tx_ring.rsp_prod_pvt;
1244 1244
1245 1245 txresp = RING_GET_RESPONSE(&xnbp->xnb_tx_ring, i);
1246 1246 txresp->id = id;
1247 1247 txresp->status = status;
1248 1248
1249 1249 xnbp->xnb_tx_ring.rsp_prod_pvt = i + 1;
1250 1250
1251 1251 /*
1252 1252 * Note that we don't push the change to the peer here - that
1253 1253 * is the callers responsibility.
1254 1254 */
1255 1255 }
1256 1256
1257 1257 static void
1258 1258 xnb_txbuf_recycle(xnb_txbuf_t *txp)
1259 1259 {
1260 1260 xnb_t *xnbp = txp->xt_xnbp;
1261 1261
1262 1262 kmem_cache_free(xnbp->xnb_tx_buf_cache, txp);
1263 1263
1264 1264 xnbp->xnb_tx_buf_outstanding--;
1265 1265 }
1266 1266
1267 1267 static int
1268 1268 xnb_txbuf_constructor(void *buf, void *arg, int kmflag)
1269 1269 {
1270 1270 _NOTE(ARGUNUSED(kmflag));
1271 1271 xnb_txbuf_t *txp = buf;
1272 1272 xnb_t *xnbp = arg;
1273 1273 size_t len;
1274 1274 ddi_dma_cookie_t dma_cookie;
1275 1275 uint_t ncookies;
1276 1276
1277 1277 txp->xt_free_rtn.free_func = xnb_txbuf_recycle;
1278 1278 txp->xt_free_rtn.free_arg = (caddr_t)txp;
1279 1279 txp->xt_xnbp = xnbp;
1280 1280 txp->xt_next = NULL;
1281 1281
1282 1282 if (ddi_dma_alloc_handle(xnbp->xnb_devinfo, &buf_dma_attr,
1283 1283 0, 0, &txp->xt_dma_handle) != DDI_SUCCESS)
1284 1284 goto failure;
1285 1285
1286 1286 if (ddi_dma_mem_alloc(txp->xt_dma_handle, PAGESIZE, &data_accattr,
1287 1287 DDI_DMA_STREAMING, 0, 0, &txp->xt_buf, &len,
1288 1288 &txp->xt_acc_handle) != DDI_SUCCESS)
1289 1289 goto failure_1;
1290 1290
1291 1291 if (ddi_dma_addr_bind_handle(txp->xt_dma_handle, NULL, txp->xt_buf,
1292 1292 len, DDI_DMA_RDWR | DDI_DMA_STREAMING, DDI_DMA_DONTWAIT, 0,
1293 1293 &dma_cookie, &ncookies)
1294 1294 != DDI_DMA_MAPPED)
1295 1295 goto failure_2;
1296 1296 ASSERT(ncookies == 1);
1297 1297
1298 1298 txp->xt_mfn = xnb_btop(dma_cookie.dmac_laddress);
1299 1299 txp->xt_buflen = dma_cookie.dmac_size;
1300 1300
1301 1301 DTRACE_PROBE(txbuf_allocated);
1302 1302
1303 1303 atomic_inc_32(&xnbp->xnb_tx_buf_count);
1304 1304 xnbp->xnb_tx_buf_outstanding++;
1305 1305
1306 1306 return (0);
1307 1307
1308 1308 failure_2:
1309 1309 ddi_dma_mem_free(&txp->xt_acc_handle);
1310 1310
1311 1311 failure_1:
1312 1312 ddi_dma_free_handle(&txp->xt_dma_handle);
1313 1313
1314 1314 failure:
1315 1315
1316 1316 return (-1);
1317 1317 }
1318 1318
1319 1319 static void
1320 1320 xnb_txbuf_destructor(void *buf, void *arg)
1321 1321 {
1322 1322 xnb_txbuf_t *txp = buf;
1323 1323 xnb_t *xnbp = arg;
1324 1324
1325 1325 (void) ddi_dma_unbind_handle(txp->xt_dma_handle);
1326 1326 ddi_dma_mem_free(&txp->xt_acc_handle);
1327 1327 ddi_dma_free_handle(&txp->xt_dma_handle);
1328 1328
1329 1329 atomic_dec_32(&xnbp->xnb_tx_buf_count);
1330 1330 }
1331 1331
1332 1332 /*
1333 1333 * Take packets from the peer and deliver them onward.
1334 1334 */
1335 1335 static mblk_t *
1336 1336 xnb_from_peer(xnb_t *xnbp)
1337 1337 {
1338 1338 RING_IDX start, end, loop;
1339 1339 gnttab_copy_t *cop;
1340 1340 xnb_txbuf_t **txpp;
1341 1341 netif_tx_request_t *txreq;
1342 1342 boolean_t work_to_do, need_notify = B_FALSE;
1343 1343 mblk_t *head, *tail;
1344 1344 int n_data_req, i;
1345 1345
1346 1346 ASSERT(MUTEX_HELD(&xnbp->xnb_tx_lock));
1347 1347
1348 1348 head = tail = NULL;
1349 1349 around:
1350 1350
1351 1351 /* LINTED: constant in conditional context */
1352 1352 RING_FINAL_CHECK_FOR_REQUESTS(&xnbp->xnb_tx_ring, work_to_do);
1353 1353 if (!work_to_do) {
1354 1354 finished:
1355 1355 xnb_tx_notify_peer(xnbp, need_notify);
1356 1356
1357 1357 return (head);
1358 1358 }
1359 1359
1360 1360 start = xnbp->xnb_tx_ring.req_cons;
1361 1361 end = xnbp->xnb_tx_ring.sring->req_prod;
1362 1362
1363 1363 if ((end - start) > NET_TX_RING_SIZE) {
1364 1364 /*
1365 1365 * This usually indicates that the frontend driver is
1366 1366 * misbehaving, as it's not possible to have more than
1367 1367 * NET_TX_RING_SIZE ring elements in play at any one
1368 1368 * time.
1369 1369 *
1370 1370 * We reset the ring pointers to the state declared by
1371 1371 * the frontend and try to carry on.
1372 1372 */
1373 1373 cmn_err(CE_WARN, "xnb_from_peer: domain %d tried to give us %u "
1374 1374 "items in the ring, resetting and trying to recover.",
1375 1375 xnbp->xnb_peer, (end - start));
1376 1376
1377 1377 /* LINTED: constant in conditional context */
1378 1378 BACK_RING_ATTACH(&xnbp->xnb_tx_ring,
1379 1379 (netif_tx_sring_t *)xnbp->xnb_tx_ring_addr, PAGESIZE);
1380 1380
1381 1381 goto around;
1382 1382 }
1383 1383
1384 1384 loop = start;
1385 1385 cop = xnbp->xnb_tx_cop;
1386 1386 txpp = xnbp->xnb_tx_bufp;
1387 1387 n_data_req = 0;
1388 1388
1389 1389 while (loop < end) {
1390 1390 static const uint16_t acceptable_flags =
1391 1391 NETTXF_csum_blank |
1392 1392 NETTXF_data_validated |
1393 1393 NETTXF_extra_info;
1394 1394 uint16_t unexpected_flags;
1395 1395
1396 1396 txreq = RING_GET_REQUEST(&xnbp->xnb_tx_ring, loop);
1397 1397
1398 1398 unexpected_flags = txreq->flags & ~acceptable_flags;
1399 1399 if (unexpected_flags != 0) {
1400 1400 /*
1401 1401 * The peer used flag bits that we do not
1402 1402 * recognize.
1403 1403 */
1404 1404 cmn_err(CE_WARN, "xnb_from_peer: "
1405 1405 "unexpected flag bits (0x%x) from peer "
1406 1406 "in transmit request",
1407 1407 unexpected_flags);
1408 1408 xnbp->xnb_stat_tx_unexpected_flags++;
1409 1409
1410 1410 /* Mark this entry as failed. */
1411 1411 xnb_tx_mark_complete(xnbp, txreq->id, NETIF_RSP_ERROR);
1412 1412 need_notify = B_TRUE;
1413 1413
1414 1414 } else if (txreq->flags & NETTXF_extra_info) {
1415 1415 struct netif_extra_info *erp;
1416 1416 boolean_t status;
1417 1417
1418 1418 loop++; /* Consume another slot in the ring. */
1419 1419 ASSERT(loop <= end);
1420 1420
1421 1421 erp = (struct netif_extra_info *)
1422 1422 RING_GET_REQUEST(&xnbp->xnb_tx_ring, loop);
1423 1423
1424 1424 switch (erp->type) {
1425 1425 case XEN_NETIF_EXTRA_TYPE_MCAST_ADD:
1426 1426 ASSERT(xnbp->xnb_multicast_control);
1427 1427 status = xnbp->xnb_flavour->xf_mcast_add(xnbp,
1428 1428 &erp->u.mcast.addr);
1429 1429 break;
1430 1430 case XEN_NETIF_EXTRA_TYPE_MCAST_DEL:
1431 1431 ASSERT(xnbp->xnb_multicast_control);
1432 1432 status = xnbp->xnb_flavour->xf_mcast_del(xnbp,
1433 1433 &erp->u.mcast.addr);
1434 1434 break;
1435 1435 default:
1436 1436 status = B_FALSE;
1437 1437 cmn_err(CE_WARN, "xnb_from_peer: "
1438 1438 "unknown extra type %d", erp->type);
1439 1439 break;
1440 1440 }
1441 1441
1442 1442 xnb_tx_mark_complete(xnbp, txreq->id,
1443 1443 status ? NETIF_RSP_OKAY : NETIF_RSP_ERROR);
1444 1444 need_notify = B_TRUE;
1445 1445
1446 1446 } else if ((txreq->offset > PAGESIZE) ||
1447 1447 (txreq->offset + txreq->size > PAGESIZE)) {
1448 1448 /*
1449 1449 * Peer attempted to refer to data beyond the
1450 1450 * end of the granted page.
1451 1451 */
1452 1452 cmn_err(CE_WARN, "xnb_from_peer: "
1453 1453 "attempt to refer beyond the end of granted "
1454 1454 "page in txreq (offset %d, size %d).",
1455 1455 txreq->offset, txreq->size);
1456 1456 xnbp->xnb_stat_tx_overflow_page++;
1457 1457
1458 1458 /* Mark this entry as failed. */
1459 1459 xnb_tx_mark_complete(xnbp, txreq->id, NETIF_RSP_ERROR);
1460 1460 need_notify = B_TRUE;
1461 1461
1462 1462 } else {
1463 1463 xnb_txbuf_t *txp;
1464 1464
1465 1465 txp = kmem_cache_alloc(xnbp->xnb_tx_buf_cache,
1466 1466 KM_NOSLEEP);
1467 1467 if (txp == NULL)
1468 1468 break;
1469 1469
1470 1470 txp->xt_mblk = desballoc((unsigned char *)txp->xt_buf,
1471 1471 txp->xt_buflen, 0, &txp->xt_free_rtn);
1472 1472 if (txp->xt_mblk == NULL) {
1473 1473 kmem_cache_free(xnbp->xnb_tx_buf_cache, txp);
1474 1474 break;
1475 1475 }
1476 1476
1477 1477 txp->xt_idx = loop;
1478 1478 txp->xt_id = txreq->id;
1479 1479
1480 1480 cop->source.u.ref = txreq->gref;
1481 1481 cop->source.domid = xnbp->xnb_peer;
1482 1482 cop->source.offset = txreq->offset;
1483 1483
1484 1484 cop->dest.u.gmfn = txp->xt_mfn;
1485 1485 cop->dest.domid = DOMID_SELF;
1486 1486 cop->dest.offset = 0;
1487 1487
1488 1488 cop->len = txreq->size;
1489 1489 cop->flags = GNTCOPY_source_gref;
1490 1490 cop->status = 0;
1491 1491
1492 1492 *txpp = txp;
1493 1493
1494 1494 txpp++;
1495 1495 cop++;
1496 1496 n_data_req++;
1497 1497
1498 1498 ASSERT(n_data_req <= NET_TX_RING_SIZE);
1499 1499 }
1500 1500
1501 1501 loop++;
1502 1502 }
1503 1503
1504 1504 xnbp->xnb_tx_ring.req_cons = loop;
1505 1505
1506 1506 if (n_data_req == 0)
1507 1507 goto around;
1508 1508
1509 1509 if (HYPERVISOR_grant_table_op(GNTTABOP_copy,
1510 1510 xnbp->xnb_tx_cop, n_data_req) != 0) {
1511 1511
1512 1512 cmn_err(CE_WARN, "xnb_from_peer: copy operation failed");
1513 1513
1514 1514 txpp = xnbp->xnb_tx_bufp;
1515 1515 i = n_data_req;
1516 1516 while (i > 0) {
1517 1517 kmem_cache_free(xnbp->xnb_tx_buf_cache, *txpp);
1518 1518 txpp++;
1519 1519 i--;
1520 1520 }
1521 1521
1522 1522 goto finished;
1523 1523 }
1524 1524
1525 1525 txpp = xnbp->xnb_tx_bufp;
1526 1526 cop = xnbp->xnb_tx_cop;
1527 1527 i = n_data_req;
1528 1528
1529 1529 while (i > 0) {
1530 1530 xnb_txbuf_t *txp = *txpp;
1531 1531
1532 1532 txreq = RING_GET_REQUEST(&xnbp->xnb_tx_ring, txp->xt_idx);
1533 1533
1534 1534 if (cop->status != 0) {
1535 1535 #ifdef XNB_DEBUG
1536 1536 cmn_err(CE_WARN, "xnb_from_peer: "
1537 1537 "txpp 0x%p failed (%d)",
1538 1538 (void *)*txpp, cop->status);
1539 1539 #endif /* XNB_DEBUG */
1540 1540 xnb_tx_mark_complete(xnbp, txp->xt_id, NETIF_RSP_ERROR);
1541 1541 freemsg(txp->xt_mblk);
1542 1542 } else {
1543 1543 mblk_t *mp;
1544 1544
1545 1545 mp = txp->xt_mblk;
1546 1546 mp->b_rptr = mp->b_wptr = (unsigned char *)txp->xt_buf;
1547 1547 mp->b_wptr += txreq->size;
1548 1548 mp->b_next = NULL;
1549 1549
1550 1550 /*
1551 1551 * If there are checksum flags, process them
1552 1552 * appropriately.
1553 1553 */
1554 1554 if ((txreq->flags &
1555 1555 (NETTXF_csum_blank | NETTXF_data_validated))
1556 1556 != 0) {
1557 1557 mp = xnbp->xnb_flavour->xf_cksum_from_peer(xnbp,
1558 1558 mp, txreq->flags);
1559 1559 xnbp->xnb_stat_tx_cksum_no_need++;
1560 1560
1561 1561 txp->xt_mblk = mp;
1562 1562 }
1563 1563
1564 1564 if (head == NULL) {
1565 1565 ASSERT(tail == NULL);
1566 1566 head = mp;
1567 1567 } else {
1568 1568 ASSERT(tail != NULL);
1569 1569 tail->b_next = mp;
1570 1570 }
1571 1571 tail = mp;
1572 1572
1573 1573 xnbp->xnb_stat_opackets++;
1574 1574 xnbp->xnb_stat_obytes += txreq->size;
1575 1575
1576 1576 xnb_tx_mark_complete(xnbp, txp->xt_id, NETIF_RSP_OKAY);
1577 1577 }
1578 1578
1579 1579 txpp++;
1580 1580 cop++;
1581 1581 i--;
1582 1582 }
1583 1583
1584 1584 goto around;
1585 1585 /* NOTREACHED */
1586 1586 }
1587 1587
1588 1588 static uint_t
1589 1589 xnb_intr(caddr_t arg)
1590 1590 {
1591 1591 xnb_t *xnbp = (xnb_t *)arg;
1592 1592 mblk_t *mp;
1593 1593
1594 1594 xnbp->xnb_stat_intr++;
1595 1595
1596 1596 mutex_enter(&xnbp->xnb_tx_lock);
1597 1597
1598 1598 ASSERT(xnbp->xnb_connected);
1599 1599
1600 1600 mp = xnb_from_peer(xnbp);
1601 1601
1602 1602 mutex_exit(&xnbp->xnb_tx_lock);
1603 1603
1604 1604 if (!xnbp->xnb_hotplugged) {
1605 1605 xnbp->xnb_stat_tx_too_early++;
1606 1606 goto fail;
1607 1607 }
1608 1608 if (mp == NULL) {
1609 1609 xnbp->xnb_stat_spurious_intr++;
1610 1610 goto fail;
1611 1611 }
1612 1612
1613 1613 xnbp->xnb_flavour->xf_from_peer(xnbp, mp);
1614 1614
1615 1615 return (DDI_INTR_CLAIMED);
1616 1616
1617 1617 fail:
1618 1618 freemsgchain(mp);
1619 1619 return (DDI_INTR_CLAIMED);
1620 1620 }
1621 1621
1622 1622 /*
1623 1623 * Read our configuration from xenstore.
1624 1624 */
1625 1625 boolean_t
1626 1626 xnb_read_xs_config(xnb_t *xnbp)
1627 1627 {
1628 1628 char *xsname;
1629 1629 char mac[ETHERADDRL * 3];
1630 1630
1631 1631 xsname = xvdi_get_xsname(xnbp->xnb_devinfo);
1632 1632
1633 1633 if (xenbus_scanf(XBT_NULL, xsname,
1634 1634 "mac", "%s", mac) != 0) {
1635 1635 cmn_err(CE_WARN, "xnb_attach: "
1636 1636 "cannot read mac address from %s",
1637 1637 xsname);
1638 1638 return (B_FALSE);
1639 1639 }
1640 1640
1641 1641 if (ether_aton(mac, xnbp->xnb_mac_addr) != ETHERADDRL) {
1642 1642 cmn_err(CE_WARN,
1643 1643 "xnb_attach: cannot parse mac address %s",
1644 1644 mac);
1645 1645 return (B_FALSE);
1646 1646 }
1647 1647
1648 1648 return (B_TRUE);
1649 1649 }
1650 1650
1651 1651 /*
1652 1652 * Read the configuration of the peer from xenstore.
1653 1653 */
1654 1654 boolean_t
1655 1655 xnb_read_oe_config(xnb_t *xnbp)
1656 1656 {
1657 1657 char *oename;
1658 1658 int i;
1659 1659
1660 1660 oename = xvdi_get_oename(xnbp->xnb_devinfo);
1661 1661
1662 1662 if (xenbus_gather(XBT_NULL, oename,
1663 1663 "event-channel", "%u", &xnbp->xnb_fe_evtchn,
1664 1664 "tx-ring-ref", "%lu", &xnbp->xnb_tx_ring_ref,
1665 1665 "rx-ring-ref", "%lu", &xnbp->xnb_rx_ring_ref,
1666 1666 NULL) != 0) {
1667 1667 cmn_err(CE_WARN, "xnb_read_oe_config: "
1668 1668 "cannot read other-end details from %s",
1669 1669 oename);
1670 1670 return (B_FALSE);
1671 1671 }
1672 1672
1673 1673 /*
1674 1674 * Check whether our peer requests receive side hypervisor
1675 1675 * copy.
1676 1676 */
1677 1677 if (xenbus_scanf(XBT_NULL, oename,
1678 1678 "request-rx-copy", "%d", &i) != 0)
1679 1679 i = 0;
1680 1680 if (i != 0)
1681 1681 xnbp->xnb_rx_hv_copy = B_TRUE;
1682 1682
1683 1683 /*
1684 1684 * Check whether our peer requests multicast_control.
1685 1685 */
1686 1686 if (xenbus_scanf(XBT_NULL, oename,
1687 1687 "request-multicast-control", "%d", &i) != 0)
1688 1688 i = 0;
1689 1689 if (i != 0)
1690 1690 xnbp->xnb_multicast_control = B_TRUE;
1691 1691
1692 1692 /*
1693 1693 * The Linux backend driver here checks to see if the peer has
1694 1694 * set 'feature-no-csum-offload'. This is used to indicate
1695 1695 * that the guest cannot handle receiving packets without a
1696 1696 * valid checksum. We don't check here, because packets passed
1697 1697 * to the peer _always_ have a valid checksum.
1698 1698 *
1699 1699 * There are three cases:
1700 1700 *
1701 1701 * - the NIC is dedicated: packets from the wire should always
1702 1702 * have a valid checksum. If the hardware validates the
1703 1703 * checksum then the relevant bit will be set in the packet
1704 1704 * attributes and we will inform the peer. It can choose to
1705 1705 * ignore the hardware verification.
1706 1706 *
1707 1707 * - the NIC is shared (VNIC) and a packet originates from the
1708 1708 * wire: this is the same as the case above - the packets
1709 1709 * will have a valid checksum.
1710 1710 *
1711 1711 * - the NIC is shared (VNIC) and a packet originates from the
1712 1712 * host: the MAC layer ensures that all such packets have a
1713 1713 * valid checksum by calculating one if the stack did not.
1714 1714 */
1715 1715
1716 1716 return (B_TRUE);
1717 1717 }
1718 1718
1719 1719 void
1720 1720 xnb_start_connect(xnb_t *xnbp)
1721 1721 {
1722 1722 dev_info_t *dip = xnbp->xnb_devinfo;
1723 1723
1724 1724 if (!xnb_connect_rings(dip)) {
1725 1725 cmn_err(CE_WARN, "xnb_start_connect: "
1726 1726 "cannot connect rings");
1727 1727 goto failed;
1728 1728 }
1729 1729
1730 1730 if (!xnbp->xnb_flavour->xf_start_connect(xnbp)) {
1731 1731 cmn_err(CE_WARN, "xnb_start_connect: "
1732 1732 "flavour failed to connect");
1733 1733 goto failed;
1734 1734 }
1735 1735
1736 1736 (void) xvdi_switch_state(dip, XBT_NULL, XenbusStateConnected);
1737 1737 return;
1738 1738
1739 1739 failed:
1740 1740 xnbp->xnb_flavour->xf_peer_disconnected(xnbp);
1741 1741 xnb_disconnect_rings(dip);
1742 1742 (void) xvdi_switch_state(dip, XBT_NULL,
1743 1743 XenbusStateClosed);
1744 1744 (void) xvdi_post_event(dip, XEN_HP_REMOVE);
1745 1745 }
1746 1746
1747 1747 static boolean_t
1748 1748 xnb_connect_rings(dev_info_t *dip)
1749 1749 {
1750 1750 xnb_t *xnbp = ddi_get_driver_private(dip);
1751 1751 struct gnttab_map_grant_ref map_op;
1752 1752
1753 1753 /*
1754 1754 * Cannot attempt to connect the rings if already connected.
1755 1755 */
1756 1756 ASSERT(!xnbp->xnb_connected);
1757 1757
1758 1758 /*
1759 1759 * 1. allocate a vaddr for the tx page, one for the rx page.
1760 1760 * 2. call GNTTABOP_map_grant_ref to map the relevant pages
1761 1761 * into the allocated vaddr (one for tx, one for rx).
1762 1762 * 3. call EVTCHNOP_bind_interdomain to have the event channel
1763 1763 * bound to this domain.
1764 1764 * 4. associate the event channel with an interrupt.
1765 1765 * 5. enable the interrupt.
1766 1766 */
1767 1767
1768 1768 /* 1.tx */
1769 1769 xnbp->xnb_tx_ring_addr = vmem_xalloc(heap_arena, PAGESIZE, PAGESIZE,
1770 1770 0, 0, 0, 0, VM_SLEEP);
1771 1771 ASSERT(xnbp->xnb_tx_ring_addr != NULL);
1772 1772
1773 1773 /* 2.tx */
1774 1774 map_op.host_addr = (uint64_t)((long)xnbp->xnb_tx_ring_addr);
1775 1775 map_op.flags = GNTMAP_host_map;
1776 1776 map_op.ref = xnbp->xnb_tx_ring_ref;
1777 1777 map_op.dom = xnbp->xnb_peer;
1778 1778 hat_prepare_mapping(kas.a_hat, xnbp->xnb_tx_ring_addr, NULL);
1779 1779 if (xen_map_gref(GNTTABOP_map_grant_ref, &map_op, 1, B_FALSE) != 0 ||
1780 1780 map_op.status != 0) {
1781 1781 cmn_err(CE_WARN, "xnb_connect_rings: cannot map tx-ring page.");
1782 1782 goto fail;
1783 1783 }
1784 1784 xnbp->xnb_tx_ring_handle = map_op.handle;
1785 1785
1786 1786 /* LINTED: constant in conditional context */
1787 1787 BACK_RING_INIT(&xnbp->xnb_tx_ring,
1788 1788 (netif_tx_sring_t *)xnbp->xnb_tx_ring_addr, PAGESIZE);
1789 1789
1790 1790 /* 1.rx */
1791 1791 xnbp->xnb_rx_ring_addr = vmem_xalloc(heap_arena, PAGESIZE, PAGESIZE,
1792 1792 0, 0, 0, 0, VM_SLEEP);
1793 1793 ASSERT(xnbp->xnb_rx_ring_addr != NULL);
1794 1794
1795 1795 /* 2.rx */
1796 1796 map_op.host_addr = (uint64_t)((long)xnbp->xnb_rx_ring_addr);
1797 1797 map_op.flags = GNTMAP_host_map;
1798 1798 map_op.ref = xnbp->xnb_rx_ring_ref;
1799 1799 map_op.dom = xnbp->xnb_peer;
1800 1800 hat_prepare_mapping(kas.a_hat, xnbp->xnb_rx_ring_addr, NULL);
1801 1801 if (xen_map_gref(GNTTABOP_map_grant_ref, &map_op, 1, B_FALSE) != 0 ||
1802 1802 map_op.status != 0) {
1803 1803 cmn_err(CE_WARN, "xnb_connect_rings: cannot map rx-ring page.");
1804 1804 goto fail;
1805 1805 }
1806 1806 xnbp->xnb_rx_ring_handle = map_op.handle;
1807 1807
1808 1808 /* LINTED: constant in conditional context */
1809 1809 BACK_RING_INIT(&xnbp->xnb_rx_ring,
1810 1810 (netif_rx_sring_t *)xnbp->xnb_rx_ring_addr, PAGESIZE);
1811 1811
1812 1812 /* 3 */
1813 1813 if (xvdi_bind_evtchn(dip, xnbp->xnb_fe_evtchn) != DDI_SUCCESS) {
1814 1814 cmn_err(CE_WARN, "xnb_connect_rings: "
1815 1815 "cannot bind event channel %d", xnbp->xnb_evtchn);
1816 1816 xnbp->xnb_evtchn = INVALID_EVTCHN;
1817 1817 goto fail;
1818 1818 }
1819 1819 xnbp->xnb_evtchn = xvdi_get_evtchn(dip);
1820 1820
1821 1821 /*
1822 1822 * It would be good to set the state to XenbusStateConnected
1823 1823 * here as well, but then what if ddi_add_intr() failed?
1824 1824 * Changing the state in the store will be noticed by the peer
1825 1825 * and cannot be "taken back".
1826 1826 */
1827 1827 mutex_enter(&xnbp->xnb_tx_lock);
1828 1828 mutex_enter(&xnbp->xnb_rx_lock);
1829 1829
1830 1830 xnbp->xnb_connected = B_TRUE;
1831 1831
1832 1832 mutex_exit(&xnbp->xnb_rx_lock);
1833 1833 mutex_exit(&xnbp->xnb_tx_lock);
1834 1834
1835 1835 /* 4, 5 */
1836 1836 if (ddi_add_intr(dip, 0, NULL, NULL, xnb_intr, (caddr_t)xnbp)
1837 1837 != DDI_SUCCESS) {
1838 1838 cmn_err(CE_WARN, "xnb_connect_rings: cannot add interrupt");
1839 1839 goto fail;
1840 1840 }
1841 1841 xnbp->xnb_irq = B_TRUE;
1842 1842
1843 1843 return (B_TRUE);
1844 1844
1845 1845 fail:
1846 1846 mutex_enter(&xnbp->xnb_tx_lock);
1847 1847 mutex_enter(&xnbp->xnb_rx_lock);
1848 1848
1849 1849 xnbp->xnb_connected = B_FALSE;
1850 1850
1851 1851 mutex_exit(&xnbp->xnb_rx_lock);
1852 1852 mutex_exit(&xnbp->xnb_tx_lock);
1853 1853
1854 1854 return (B_FALSE);
1855 1855 }
1856 1856
1857 1857 static void
1858 1858 xnb_disconnect_rings(dev_info_t *dip)
1859 1859 {
1860 1860 xnb_t *xnbp = ddi_get_driver_private(dip);
1861 1861
1862 1862 if (xnbp->xnb_irq) {
1863 1863 ddi_remove_intr(dip, 0, NULL);
1864 1864 xnbp->xnb_irq = B_FALSE;
1865 1865 }
1866 1866
1867 1867 if (xnbp->xnb_evtchn != INVALID_EVTCHN) {
1868 1868 xvdi_free_evtchn(dip);
1869 1869 xnbp->xnb_evtchn = INVALID_EVTCHN;
1870 1870 }
1871 1871
1872 1872 if (xnbp->xnb_rx_ring_handle != INVALID_GRANT_HANDLE) {
1873 1873 struct gnttab_unmap_grant_ref unmap_op;
1874 1874
1875 1875 unmap_op.host_addr = (uint64_t)(uintptr_t)
1876 1876 xnbp->xnb_rx_ring_addr;
1877 1877 unmap_op.dev_bus_addr = 0;
1878 1878 unmap_op.handle = xnbp->xnb_rx_ring_handle;
1879 1879 if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref,
1880 1880 &unmap_op, 1) != 0)
1881 1881 cmn_err(CE_WARN, "xnb_disconnect_rings: "
1882 1882 "cannot unmap rx-ring page (%d)",
1883 1883 unmap_op.status);
1884 1884
1885 1885 xnbp->xnb_rx_ring_handle = INVALID_GRANT_HANDLE;
1886 1886 }
1887 1887
1888 1888 if (xnbp->xnb_rx_ring_addr != NULL) {
1889 1889 hat_release_mapping(kas.a_hat, xnbp->xnb_rx_ring_addr);
1890 1890 vmem_free(heap_arena, xnbp->xnb_rx_ring_addr, PAGESIZE);
1891 1891 xnbp->xnb_rx_ring_addr = NULL;
1892 1892 }
1893 1893
1894 1894 if (xnbp->xnb_tx_ring_handle != INVALID_GRANT_HANDLE) {
1895 1895 struct gnttab_unmap_grant_ref unmap_op;
1896 1896
1897 1897 unmap_op.host_addr = (uint64_t)(uintptr_t)
1898 1898 xnbp->xnb_tx_ring_addr;
1899 1899 unmap_op.dev_bus_addr = 0;
1900 1900 unmap_op.handle = xnbp->xnb_tx_ring_handle;
1901 1901 if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref,
1902 1902 &unmap_op, 1) != 0)
1903 1903 cmn_err(CE_WARN, "xnb_disconnect_rings: "
1904 1904 "cannot unmap tx-ring page (%d)",
1905 1905 unmap_op.status);
1906 1906
1907 1907 xnbp->xnb_tx_ring_handle = INVALID_GRANT_HANDLE;
1908 1908 }
1909 1909
1910 1910 if (xnbp->xnb_tx_ring_addr != NULL) {
1911 1911 hat_release_mapping(kas.a_hat, xnbp->xnb_tx_ring_addr);
1912 1912 vmem_free(heap_arena, xnbp->xnb_tx_ring_addr, PAGESIZE);
1913 1913 xnbp->xnb_tx_ring_addr = NULL;
1914 1914 }
1915 1915 }
1916 1916
1917 1917 static void
1918 1918 xnb_oe_state_change(dev_info_t *dip, ddi_eventcookie_t id,
1919 1919 void *arg, void *impl_data)
1920 1920 {
1921 1921 _NOTE(ARGUNUSED(id, arg));
1922 1922 xnb_t *xnbp = ddi_get_driver_private(dip);
1923 1923 XenbusState new_state = *(XenbusState *)impl_data;
1924 1924
1925 1925 ASSERT(xnbp != NULL);
1926 1926
1927 1927 switch (new_state) {
1928 1928 case XenbusStateConnected:
1929 1929 /* spurious state change */
1930 1930 if (xnbp->xnb_connected)
1931 1931 return;
1932 1932
1933 1933 if (!xnb_read_oe_config(xnbp) ||
1934 1934 !xnbp->xnb_flavour->xf_peer_connected(xnbp)) {
1935 1935 cmn_err(CE_WARN, "xnb_oe_state_change: "
1936 1936 "read otherend config error");
1937 1937 (void) xvdi_switch_state(dip, XBT_NULL,
1938 1938 XenbusStateClosed);
1939 1939 (void) xvdi_post_event(dip, XEN_HP_REMOVE);
1940 1940
1941 1941 break;
1942 1942 }
1943 1943
1944 1944
1945 1945 mutex_enter(&xnbp->xnb_state_lock);
1946 1946 xnbp->xnb_fe_status = XNB_STATE_READY;
1947 1947 if (xnbp->xnb_be_status == XNB_STATE_READY)
1948 1948 xnb_start_connect(xnbp);
1949 1949 mutex_exit(&xnbp->xnb_state_lock);
1950 1950
1951 1951 /*
1952 1952 * Now that we've attempted to connect it's reasonable
1953 1953 * to allow an attempt to detach.
1954 1954 */
1955 1955 xnbp->xnb_detachable = B_TRUE;
1956 1956
1957 1957 break;
1958 1958
1959 1959 case XenbusStateClosing:
1960 1960 (void) xvdi_switch_state(dip, XBT_NULL, XenbusStateClosing);
1961 1961
1962 1962 break;
1963 1963
1964 1964 case XenbusStateClosed:
1965 1965 xnbp->xnb_flavour->xf_peer_disconnected(xnbp);
1966 1966
1967 1967 mutex_enter(&xnbp->xnb_tx_lock);
1968 1968 mutex_enter(&xnbp->xnb_rx_lock);
1969 1969
1970 1970 xnb_disconnect_rings(dip);
1971 1971 xnbp->xnb_connected = B_FALSE;
1972 1972
1973 1973 mutex_exit(&xnbp->xnb_rx_lock);
1974 1974 mutex_exit(&xnbp->xnb_tx_lock);
1975 1975
1976 1976 (void) xvdi_switch_state(dip, XBT_NULL, XenbusStateClosed);
1977 1977 (void) xvdi_post_event(dip, XEN_HP_REMOVE);
1978 1978 /*
1979 1979 * In all likelyhood this is already set (in the above
1980 1980 * case), but if the peer never attempted to connect
1981 1981 * and the domain is destroyed we get here without
1982 1982 * having been through the case above, so we set it to
1983 1983 * be sure.
1984 1984 */
1985 1985 xnbp->xnb_detachable = B_TRUE;
1986 1986
1987 1987 break;
1988 1988
1989 1989 default:
1990 1990 break;
1991 1991 }
1992 1992 }
1993 1993
1994 1994 static void
1995 1995 xnb_hp_state_change(dev_info_t *dip, ddi_eventcookie_t id,
1996 1996 void *arg, void *impl_data)
1997 1997 {
1998 1998 _NOTE(ARGUNUSED(id, arg));
1999 1999 xnb_t *xnbp = ddi_get_driver_private(dip);
2000 2000 xendev_hotplug_state_t state = *(xendev_hotplug_state_t *)impl_data;
2001 2001
2002 2002 ASSERT(xnbp != NULL);
2003 2003
2004 2004 switch (state) {
2005 2005 case Connected:
2006 2006 /* spurious hotplug event */
2007 2007 if (xnbp->xnb_hotplugged)
2008 2008 break;
2009 2009
2010 2010 if (!xnb_read_xs_config(xnbp))
2011 2011 break;
2012 2012
2013 2013 if (!xnbp->xnb_flavour->xf_hotplug_connected(xnbp))
2014 2014 break;
2015 2015
2016 2016 mutex_enter(&xnbp->xnb_tx_lock);
2017 2017 mutex_enter(&xnbp->xnb_rx_lock);
2018 2018
2019 2019 xnbp->xnb_hotplugged = B_TRUE;
2020 2020
2021 2021 mutex_exit(&xnbp->xnb_rx_lock);
2022 2022 mutex_exit(&xnbp->xnb_tx_lock);
2023 2023
2024 2024 mutex_enter(&xnbp->xnb_state_lock);
2025 2025 xnbp->xnb_be_status = XNB_STATE_READY;
2026 2026 if (xnbp->xnb_fe_status == XNB_STATE_READY)
2027 2027 xnb_start_connect(xnbp);
2028 2028 mutex_exit(&xnbp->xnb_state_lock);
2029 2029
2030 2030 break;
2031 2031
↓ open down ↓ |
2031 lines elided |
↑ open up ↑ |
2032 2032 default:
2033 2033 break;
2034 2034 }
2035 2035 }
2036 2036
2037 2037 static struct modldrv modldrv = {
2038 2038 &mod_miscops, "xnb",
2039 2039 };
2040 2040
2041 2041 static struct modlinkage modlinkage = {
2042 - MODREV_1, &modldrv, NULL
2042 + MODREV_1, { &modldrv, NULL }
2043 2043 };
2044 2044
2045 2045 int
2046 2046 _init(void)
2047 2047 {
2048 2048 int i;
2049 2049
2050 2050 mutex_init(&xnb_alloc_page_lock, NULL, MUTEX_DRIVER, NULL);
2051 2051
2052 2052 i = mod_install(&modlinkage);
2053 2053 if (i != DDI_SUCCESS)
2054 2054 mutex_destroy(&xnb_alloc_page_lock);
2055 2055
2056 2056 return (i);
2057 2057 }
2058 2058
2059 2059 int
2060 2060 _info(struct modinfo *modinfop)
2061 2061 {
2062 2062 return (mod_info(&modlinkage, modinfop));
2063 2063 }
2064 2064
2065 2065 int
2066 2066 _fini(void)
2067 2067 {
2068 2068 int i;
2069 2069
2070 2070 i = mod_remove(&modlinkage);
2071 2071 if (i == DDI_SUCCESS)
2072 2072 mutex_destroy(&xnb_alloc_page_lock);
2073 2073
2074 2074 return (i);
2075 2075 }
↓ open down ↓ |
23 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX