Print this page
7127 remove -Wno-missing-braces from Makefile.uts
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/uts/common/xen/io/xpvtap.c
+++ new/usr/src/uts/common/xen/io/xpvtap.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21
22 22 /*
23 23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
24 24 * Use is subject to license terms.
25 25 */
26 26
27 27
28 28 #include <sys/errno.h>
29 29 #include <sys/types.h>
30 30 #include <sys/conf.h>
31 31 #include <sys/kmem.h>
32 32 #include <sys/ddi.h>
33 33 #include <sys/stat.h>
34 34 #include <sys/sunddi.h>
35 35 #include <sys/file.h>
36 36 #include <sys/open.h>
37 37 #include <sys/modctl.h>
38 38 #include <sys/ddi_impldefs.h>
39 39 #include <sys/sysmacros.h>
40 40 #include <sys/ddidevmap.h>
41 41 #include <sys/policy.h>
42 42
43 43 #include <sys/vmsystm.h>
44 44 #include <vm/hat_i86.h>
45 45 #include <vm/hat_pte.h>
46 46 #include <vm/seg_kmem.h>
47 47 #include <vm/seg_mf.h>
48 48
49 49 #include <xen/io/blkif_impl.h>
50 50 #include <xen/io/blk_common.h>
51 51 #include <xen/io/xpvtap.h>
52 52
53 53
54 54 static int xpvtap_open(dev_t *devp, int flag, int otyp, cred_t *cred);
55 55 static int xpvtap_close(dev_t devp, int flag, int otyp, cred_t *cred);
56 56 static int xpvtap_ioctl(dev_t dev, int cmd, intptr_t arg, int mode,
57 57 cred_t *cred, int *rval);
58 58 static int xpvtap_devmap(dev_t dev, devmap_cookie_t dhp, offset_t off,
59 59 size_t len, size_t *maplen, uint_t model);
60 60 static int xpvtap_segmap(dev_t dev, off_t off, struct as *asp, caddr_t *addrp,
61 61 off_t len, unsigned int prot, unsigned int maxprot, unsigned int flags,
62 62 cred_t *cred_p);
63 63 static int xpvtap_chpoll(dev_t dev, short events, int anyyet, short *reventsp,
64 64 struct pollhead **phpp);
65 65
66 66 static struct cb_ops xpvtap_cb_ops = {
67 67 xpvtap_open, /* cb_open */
68 68 xpvtap_close, /* cb_close */
69 69 nodev, /* cb_strategy */
70 70 nodev, /* cb_print */
71 71 nodev, /* cb_dump */
72 72 nodev, /* cb_read */
73 73 nodev, /* cb_write */
74 74 xpvtap_ioctl, /* cb_ioctl */
75 75 xpvtap_devmap, /* cb_devmap */
76 76 nodev, /* cb_mmap */
77 77 xpvtap_segmap, /* cb_segmap */
78 78 xpvtap_chpoll, /* cb_chpoll */
79 79 ddi_prop_op, /* cb_prop_op */
80 80 NULL, /* cb_stream */
81 81 D_NEW | D_MP | D_64BIT | D_DEVMAP, /* cb_flag */
82 82 CB_REV
83 83 };
84 84
85 85 static int xpvtap_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg,
86 86 void **result);
87 87 static int xpvtap_attach(dev_info_t *devi, ddi_attach_cmd_t cmd);
88 88 static int xpvtap_detach(dev_info_t *devi, ddi_detach_cmd_t cmd);
89 89
90 90 static struct dev_ops xpvtap_dev_ops = {
91 91 DEVO_REV, /* devo_rev */
92 92 0, /* devo_refcnt */
93 93 xpvtap_getinfo, /* devo_getinfo */
94 94 nulldev, /* devo_identify */
95 95 nulldev, /* devo_probe */
96 96 xpvtap_attach, /* devo_attach */
97 97 xpvtap_detach, /* devo_detach */
98 98 nodev, /* devo_reset */
99 99 &xpvtap_cb_ops, /* devo_cb_ops */
100 100 NULL, /* devo_bus_ops */
101 101 NULL /* power */
102 102 };
↓ open down ↓ |
102 lines elided |
↑ open up ↑ |
103 103
104 104
105 105 static struct modldrv xpvtap_modldrv = {
106 106 &mod_driverops, /* Type of module. This one is a driver */
107 107 "xpvtap driver", /* Name of the module. */
108 108 &xpvtap_dev_ops, /* driver ops */
109 109 };
110 110
111 111 static struct modlinkage xpvtap_modlinkage = {
112 112 MODREV_1,
113 - (void *) &xpvtap_modldrv,
114 - NULL
113 + { (void *) &xpvtap_modldrv, NULL }
115 114 };
116 115
117 116
118 117 void *xpvtap_statep;
119 118
120 119
121 120 static xpvtap_state_t *xpvtap_drv_init(int instance);
122 121 static void xpvtap_drv_fini(xpvtap_state_t *state);
123 122 static uint_t xpvtap_intr(caddr_t arg);
124 123
125 124 typedef void (*xpvtap_rs_cleanup_t)(xpvtap_state_t *state, uint_t rs);
126 125 static void xpvtap_rs_init(uint_t min_val, uint_t max_val,
127 126 xpvtap_rs_hdl_t *handle);
128 127 static void xpvtap_rs_fini(xpvtap_rs_hdl_t *handle);
129 128 static int xpvtap_rs_alloc(xpvtap_rs_hdl_t handle, uint_t *rs);
130 129 static void xpvtap_rs_free(xpvtap_rs_hdl_t handle, uint_t rs);
131 130 static void xpvtap_rs_flush(xpvtap_rs_hdl_t handle,
132 131 xpvtap_rs_cleanup_t callback, void *arg);
133 132
134 133 static int xpvtap_segmf_register(xpvtap_state_t *state);
135 134 static void xpvtap_segmf_unregister(struct as *as, void *arg, uint_t event);
136 135
137 136 static int xpvtap_user_init(xpvtap_state_t *state);
138 137 static void xpvtap_user_fini(xpvtap_state_t *state);
139 138 static int xpvtap_user_ring_init(xpvtap_state_t *state);
140 139 static void xpvtap_user_ring_fini(xpvtap_state_t *state);
141 140 static int xpvtap_user_thread_init(xpvtap_state_t *state);
142 141 static void xpvtap_user_thread_fini(xpvtap_state_t *state);
143 142 static void xpvtap_user_thread_start(caddr_t arg);
144 143 static void xpvtap_user_thread_stop(xpvtap_state_t *state);
145 144 static void xpvtap_user_thread(void *arg);
146 145
147 146 static void xpvtap_user_app_stop(caddr_t arg);
148 147
149 148 static int xpvtap_user_request_map(xpvtap_state_t *state, blkif_request_t *req,
150 149 uint_t *uid);
151 150 static int xpvtap_user_request_push(xpvtap_state_t *state,
152 151 blkif_request_t *req, uint_t uid);
153 152 static int xpvtap_user_response_get(xpvtap_state_t *state,
154 153 blkif_response_t *resp, uint_t *uid);
155 154 static void xpvtap_user_request_unmap(xpvtap_state_t *state, uint_t uid);
156 155
157 156
158 157 /*
159 158 * _init()
160 159 */
161 160 int
162 161 _init(void)
163 162 {
164 163 int e;
165 164
166 165 e = ddi_soft_state_init(&xpvtap_statep, sizeof (xpvtap_state_t), 1);
167 166 if (e != 0) {
168 167 return (e);
169 168 }
170 169
171 170 e = mod_install(&xpvtap_modlinkage);
172 171 if (e != 0) {
173 172 ddi_soft_state_fini(&xpvtap_statep);
174 173 return (e);
175 174 }
176 175
177 176 return (0);
178 177 }
179 178
180 179
181 180 /*
182 181 * _info()
183 182 */
184 183 int
185 184 _info(struct modinfo *modinfop)
186 185 {
187 186 return (mod_info(&xpvtap_modlinkage, modinfop));
188 187 }
189 188
190 189
191 190 /*
192 191 * _fini()
193 192 */
194 193 int
195 194 _fini(void)
196 195 {
197 196 int e;
198 197
199 198 e = mod_remove(&xpvtap_modlinkage);
200 199 if (e != 0) {
201 200 return (e);
202 201 }
203 202
204 203 ddi_soft_state_fini(&xpvtap_statep);
205 204
206 205 return (0);
207 206 }
208 207
209 208
210 209 /*
211 210 * xpvtap_attach()
212 211 */
213 212 static int
214 213 xpvtap_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
215 214 {
216 215 blk_ringinit_args_t args;
217 216 xpvtap_state_t *state;
218 217 int instance;
219 218 int e;
220 219
221 220
222 221 switch (cmd) {
223 222 case DDI_ATTACH:
224 223 break;
225 224
226 225 case DDI_RESUME:
227 226 return (DDI_SUCCESS);
228 227
229 228 default:
230 229 return (DDI_FAILURE);
231 230 }
232 231
233 232 /* initialize our state info */
234 233 instance = ddi_get_instance(dip);
235 234 state = xpvtap_drv_init(instance);
236 235 if (state == NULL) {
237 236 return (DDI_FAILURE);
238 237 }
239 238 state->bt_dip = dip;
240 239
241 240 /* Initialize the guest ring */
242 241 args.ar_dip = state->bt_dip;
243 242 args.ar_intr = xpvtap_intr;
244 243 args.ar_intr_arg = (caddr_t)state;
245 244 args.ar_ringup = xpvtap_user_thread_start;
246 245 args.ar_ringup_arg = (caddr_t)state;
247 246 args.ar_ringdown = xpvtap_user_app_stop;
248 247 args.ar_ringdown_arg = (caddr_t)state;
249 248 e = blk_ring_init(&args, &state->bt_guest_ring);
250 249 if (e != DDI_SUCCESS) {
251 250 goto attachfail_ringinit;
252 251 }
253 252
254 253 /* create the minor node (for ioctl/mmap) */
255 254 e = ddi_create_minor_node(dip, "xpvtap", S_IFCHR, instance,
256 255 DDI_PSEUDO, 0);
257 256 if (e != DDI_SUCCESS) {
258 257 goto attachfail_minor_node;
259 258 }
260 259
261 260 /* Report that driver was loaded */
262 261 ddi_report_dev(dip);
263 262
264 263 return (DDI_SUCCESS);
265 264
266 265 attachfail_minor_node:
267 266 blk_ring_fini(&state->bt_guest_ring);
268 267 attachfail_ringinit:
269 268 xpvtap_drv_fini(state);
270 269 return (DDI_FAILURE);
271 270 }
272 271
273 272
274 273 /*
275 274 * xpvtap_detach()
276 275 */
277 276 static int
278 277 xpvtap_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
279 278 {
280 279 xpvtap_state_t *state;
281 280 int instance;
282 281
283 282
284 283 instance = ddi_get_instance(dip);
285 284 state = ddi_get_soft_state(xpvtap_statep, instance);
286 285 if (state == NULL) {
287 286 return (DDI_FAILURE);
288 287 }
289 288
290 289 switch (cmd) {
291 290 case DDI_DETACH:
292 291 break;
293 292
294 293 case DDI_SUSPEND:
295 294 default:
296 295 return (DDI_FAILURE);
297 296 }
298 297
299 298 xpvtap_user_thread_stop(state);
300 299 blk_ring_fini(&state->bt_guest_ring);
301 300 xpvtap_drv_fini(state);
302 301 ddi_remove_minor_node(dip, NULL);
303 302
304 303 return (DDI_SUCCESS);
305 304 }
306 305
307 306
308 307 /*
309 308 * xpvtap_getinfo()
310 309 */
311 310 /*ARGSUSED*/
312 311 static int
313 312 xpvtap_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **result)
314 313 {
315 314 xpvtap_state_t *state;
316 315 int instance;
317 316 dev_t dev;
318 317 int e;
319 318
320 319
321 320 dev = (dev_t)arg;
322 321 instance = getminor(dev);
323 322
324 323 switch (cmd) {
325 324 case DDI_INFO_DEVT2DEVINFO:
326 325 state = ddi_get_soft_state(xpvtap_statep, instance);
327 326 if (state == NULL) {
328 327 return (DDI_FAILURE);
329 328 }
330 329 *result = (void *)state->bt_dip;
331 330 e = DDI_SUCCESS;
332 331 break;
333 332
334 333 case DDI_INFO_DEVT2INSTANCE:
335 334 *result = (void *)(uintptr_t)instance;
336 335 e = DDI_SUCCESS;
337 336 break;
338 337
339 338 default:
340 339 e = DDI_FAILURE;
341 340 break;
342 341 }
343 342
344 343 return (e);
345 344 }
346 345
347 346
348 347 /*
349 348 * xpvtap_open()
350 349 */
351 350 /*ARGSUSED*/
352 351 static int
353 352 xpvtap_open(dev_t *devp, int flag, int otyp, cred_t *cred)
354 353 {
355 354 xpvtap_state_t *state;
356 355 int instance;
357 356
358 357
359 358 if (secpolicy_xvm_control(cred)) {
360 359 return (EPERM);
361 360 }
362 361
363 362 instance = getminor(*devp);
364 363 state = ddi_get_soft_state(xpvtap_statep, instance);
365 364 if (state == NULL) {
366 365 return (ENXIO);
367 366 }
368 367
369 368 /* we should only be opened once */
370 369 mutex_enter(&state->bt_open.bo_mutex);
371 370 if (state->bt_open.bo_opened) {
372 371 mutex_exit(&state->bt_open.bo_mutex);
373 372 return (EBUSY);
374 373 }
375 374 state->bt_open.bo_opened = B_TRUE;
376 375 mutex_exit(&state->bt_open.bo_mutex);
377 376
378 377 /*
379 378 * save the apps address space. need it for mapping/unmapping grefs
380 379 * since will be doing it in a separate kernel thread.
381 380 */
382 381 state->bt_map.um_as = curproc->p_as;
383 382
384 383 return (0);
385 384 }
386 385
387 386
388 387 /*
389 388 * xpvtap_close()
390 389 */
391 390 /*ARGSUSED*/
392 391 static int
393 392 xpvtap_close(dev_t devp, int flag, int otyp, cred_t *cred)
394 393 {
395 394 xpvtap_state_t *state;
396 395 int instance;
397 396
398 397
399 398 instance = getminor(devp);
400 399 state = ddi_get_soft_state(xpvtap_statep, instance);
401 400 if (state == NULL) {
402 401 return (ENXIO);
403 402 }
404 403
405 404 /*
406 405 * wake thread so it can cleanup and wait for it to exit so we can
407 406 * be sure it's not in the middle of processing a request/response.
408 407 */
409 408 mutex_enter(&state->bt_thread.ut_mutex);
410 409 state->bt_thread.ut_wake = B_TRUE;
411 410 state->bt_thread.ut_exit = B_TRUE;
412 411 cv_signal(&state->bt_thread.ut_wake_cv);
413 412 if (!state->bt_thread.ut_exit_done) {
414 413 cv_wait(&state->bt_thread.ut_exit_done_cv,
415 414 &state->bt_thread.ut_mutex);
416 415 }
417 416 ASSERT(state->bt_thread.ut_exit_done);
418 417 mutex_exit(&state->bt_thread.ut_mutex);
419 418
420 419 state->bt_map.um_as = NULL;
421 420 state->bt_map.um_guest_pages = NULL;
422 421
423 422 /*
424 423 * when the ring is brought down, a userland hotplug script is run
425 424 * which tries to bring the userland app down. We'll wait for a bit
426 425 * for the user app to exit. Notify the thread waiting that the app
427 426 * has closed the driver.
428 427 */
429 428 mutex_enter(&state->bt_open.bo_mutex);
430 429 ASSERT(state->bt_open.bo_opened);
431 430 state->bt_open.bo_opened = B_FALSE;
432 431 cv_signal(&state->bt_open.bo_exit_cv);
433 432 mutex_exit(&state->bt_open.bo_mutex);
434 433
435 434 return (0);
436 435 }
437 436
438 437
439 438 /*
440 439 * xpvtap_ioctl()
441 440 */
442 441 /*ARGSUSED*/
443 442 static int
444 443 xpvtap_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *cred,
445 444 int *rval)
446 445 {
447 446 xpvtap_state_t *state;
448 447 int instance;
449 448
450 449
451 450 if (secpolicy_xvm_control(cred)) {
452 451 return (EPERM);
453 452 }
454 453
455 454 instance = getminor(dev);
456 455 if (instance == -1) {
457 456 return (EBADF);
458 457 }
459 458
460 459 state = ddi_get_soft_state(xpvtap_statep, instance);
461 460 if (state == NULL) {
462 461 return (EBADF);
463 462 }
464 463
465 464 switch (cmd) {
466 465 case XPVTAP_IOCTL_RESP_PUSH:
467 466 /*
468 467 * wake thread, thread handles guest requests and user app
469 468 * responses.
470 469 */
471 470 mutex_enter(&state->bt_thread.ut_mutex);
472 471 state->bt_thread.ut_wake = B_TRUE;
473 472 cv_signal(&state->bt_thread.ut_wake_cv);
474 473 mutex_exit(&state->bt_thread.ut_mutex);
475 474 break;
476 475
477 476 default:
478 477 cmn_err(CE_WARN, "ioctl(%d) not supported\n", cmd);
479 478 return (ENXIO);
480 479 }
481 480
482 481 return (0);
483 482 }
484 483
485 484
486 485 /*
487 486 * xpvtap_segmap()
488 487 */
489 488 /*ARGSUSED*/
490 489 static int
491 490 xpvtap_segmap(dev_t dev, off_t off, struct as *asp, caddr_t *addrp,
492 491 off_t len, unsigned int prot, unsigned int maxprot, unsigned int flags,
493 492 cred_t *cred_p)
494 493 {
495 494 struct segmf_crargs a;
496 495 xpvtap_state_t *state;
497 496 int instance;
498 497 int e;
499 498
500 499
501 500 if (secpolicy_xvm_control(cred_p)) {
502 501 return (EPERM);
503 502 }
504 503
505 504 instance = getminor(dev);
506 505 state = ddi_get_soft_state(xpvtap_statep, instance);
507 506 if (state == NULL) {
508 507 return (EBADF);
509 508 }
510 509
511 510 /* the user app should be doing a MAP_SHARED mapping */
512 511 if ((flags & MAP_TYPE) != MAP_SHARED) {
513 512 return (EINVAL);
514 513 }
515 514
516 515 /*
517 516 * if this is the user ring (offset = 0), devmap it (which ends up in
518 517 * xpvtap_devmap). devmap will alloc and map the ring into the
519 518 * app's VA space.
520 519 */
521 520 if (off == 0) {
522 521 e = devmap_setup(dev, (offset_t)off, asp, addrp, (size_t)len,
523 522 prot, maxprot, flags, cred_p);
524 523 return (e);
525 524 }
526 525
527 526 /* this should be the mmap for the gref pages (offset = PAGESIZE) */
528 527 if (off != PAGESIZE) {
529 528 return (EINVAL);
530 529 }
531 530
532 531 /* make sure we get the size we're expecting */
533 532 if (len != XPVTAP_GREF_BUFSIZE) {
534 533 return (EINVAL);
535 534 }
536 535
537 536 /*
538 537 * reserve user app VA space for the gref pages and use segmf to
539 538 * manage the backing store for the physical memory. segmf will
540 539 * map in/out the grefs and fault them in/out.
541 540 */
542 541 ASSERT(asp == state->bt_map.um_as);
543 542 as_rangelock(asp);
544 543 if ((flags & MAP_FIXED) == 0) {
545 544 map_addr(addrp, len, 0, 0, flags);
546 545 if (*addrp == NULL) {
547 546 as_rangeunlock(asp);
548 547 return (ENOMEM);
549 548 }
550 549 } else {
551 550 /* User specified address */
552 551 (void) as_unmap(asp, *addrp, len);
553 552 }
554 553 a.dev = dev;
555 554 a.prot = (uchar_t)prot;
556 555 a.maxprot = (uchar_t)maxprot;
557 556 e = as_map(asp, *addrp, len, segmf_create, &a);
558 557 if (e != 0) {
559 558 as_rangeunlock(asp);
560 559 return (e);
561 560 }
562 561 as_rangeunlock(asp);
563 562
564 563 /*
565 564 * Stash user base address, and compute address where the request
566 565 * array will end up.
567 566 */
568 567 state->bt_map.um_guest_pages = (caddr_t)*addrp;
569 568 state->bt_map.um_guest_size = (size_t)len;
570 569
571 570 /* register an as callback so we can cleanup when the app goes away */
572 571 e = as_add_callback(asp, xpvtap_segmf_unregister, state,
573 572 AS_UNMAP_EVENT, *addrp, len, KM_SLEEP);
574 573 if (e != 0) {
575 574 (void) as_unmap(asp, *addrp, len);
576 575 return (EINVAL);
577 576 }
578 577
579 578 /* wake thread to see if there are requests already queued up */
580 579 mutex_enter(&state->bt_thread.ut_mutex);
581 580 state->bt_thread.ut_wake = B_TRUE;
582 581 cv_signal(&state->bt_thread.ut_wake_cv);
583 582 mutex_exit(&state->bt_thread.ut_mutex);
584 583
585 584 return (0);
586 585 }
587 586
588 587
589 588 /*
590 589 * xpvtap_devmap()
591 590 */
592 591 /*ARGSUSED*/
593 592 static int
594 593 xpvtap_devmap(dev_t dev, devmap_cookie_t dhp, offset_t off, size_t len,
595 594 size_t *maplen, uint_t model)
596 595 {
597 596 xpvtap_user_ring_t *usring;
598 597 xpvtap_state_t *state;
599 598 int instance;
600 599 int e;
601 600
602 601
603 602 instance = getminor(dev);
604 603 state = ddi_get_soft_state(xpvtap_statep, instance);
605 604 if (state == NULL) {
606 605 return (EBADF);
607 606 }
608 607
609 608 /* we should only get here if the offset was == 0 */
610 609 if (off != 0) {
611 610 return (EINVAL);
612 611 }
613 612
614 613 /* we should only be mapping in one page */
615 614 if (len != PAGESIZE) {
616 615 return (EINVAL);
617 616 }
618 617
619 618 /*
620 619 * we already allocated the user ring during driver attach, all we
621 620 * need to do is map it into the user app's VA.
622 621 */
623 622 usring = &state->bt_user_ring;
624 623 e = devmap_umem_setup(dhp, state->bt_dip, NULL, usring->ur_cookie, 0,
625 624 PAGESIZE, PROT_ALL, DEVMAP_DEFAULTS, NULL);
626 625 if (e < 0) {
627 626 return (e);
628 627 }
629 628
630 629 /* return the size to compete the devmap */
631 630 *maplen = PAGESIZE;
632 631
633 632 return (0);
634 633 }
635 634
636 635
637 636 /*
638 637 * xpvtap_chpoll()
639 638 */
640 639 static int
641 640 xpvtap_chpoll(dev_t dev, short events, int anyyet, short *reventsp,
642 641 struct pollhead **phpp)
643 642 {
644 643 xpvtap_user_ring_t *usring;
645 644 xpvtap_state_t *state;
646 645 int instance;
647 646
648 647
649 648 instance = getminor(dev);
650 649 if (instance == -1) {
651 650 return (EBADF);
652 651 }
653 652 state = ddi_get_soft_state(xpvtap_statep, instance);
654 653 if (state == NULL) {
655 654 return (EBADF);
656 655 }
657 656
658 657 if (((events & (POLLIN | POLLRDNORM)) == 0) && !anyyet) {
659 658 *reventsp = 0;
660 659 return (EINVAL);
661 660 }
662 661
663 662 /*
664 663 * if we pushed requests on the user ring since the last poll, wakeup
665 664 * the user app
666 665 */
667 666 usring = &state->bt_user_ring;
668 667 if (usring->ur_prod_polled != usring->ur_ring.req_prod_pvt) {
669 668
670 669 /*
671 670 * XXX - is this faster here or xpvtap_user_request_push??
672 671 * prelim data says here. Because less membars or because
673 672 * user thread will spin in poll requests before getting to
674 673 * responses?
675 674 */
676 675 RING_PUSH_REQUESTS(&usring->ur_ring);
677 676
678 677 usring->ur_prod_polled = usring->ur_ring.sring->req_prod;
679 678 *reventsp = POLLIN | POLLRDNORM;
680 679
681 680 /* no new requests */
682 681 } else {
683 682 *reventsp = 0;
684 683 if (!anyyet) {
685 684 *phpp = &state->bt_pollhead;
686 685 }
687 686 }
688 687
689 688 return (0);
690 689 }
691 690
692 691
693 692 /*
694 693 * xpvtap_drv_init()
695 694 */
696 695 static xpvtap_state_t *
697 696 xpvtap_drv_init(int instance)
698 697 {
699 698 xpvtap_state_t *state;
700 699 int e;
701 700
702 701
703 702 e = ddi_soft_state_zalloc(xpvtap_statep, instance);
704 703 if (e != DDI_SUCCESS) {
705 704 return (NULL);
706 705 }
707 706 state = ddi_get_soft_state(xpvtap_statep, instance);
708 707 if (state == NULL) {
709 708 goto drvinitfail_get_soft_state;
710 709 }
711 710
712 711 state->bt_instance = instance;
713 712 mutex_init(&state->bt_open.bo_mutex, NULL, MUTEX_DRIVER, NULL);
714 713 cv_init(&state->bt_open.bo_exit_cv, NULL, CV_DRIVER, NULL);
715 714 state->bt_open.bo_opened = B_FALSE;
716 715 state->bt_map.um_registered = B_FALSE;
717 716
718 717 /* initialize user ring, thread, mapping state */
719 718 e = xpvtap_user_init(state);
720 719 if (e != DDI_SUCCESS) {
721 720 goto drvinitfail_userinit;
722 721 }
723 722
724 723 return (state);
725 724
726 725 drvinitfail_userinit:
727 726 cv_destroy(&state->bt_open.bo_exit_cv);
728 727 mutex_destroy(&state->bt_open.bo_mutex);
729 728 drvinitfail_get_soft_state:
730 729 (void) ddi_soft_state_free(xpvtap_statep, instance);
731 730 return (NULL);
732 731 }
733 732
734 733
735 734 /*
736 735 * xpvtap_drv_fini()
737 736 */
738 737 static void
739 738 xpvtap_drv_fini(xpvtap_state_t *state)
740 739 {
741 740 xpvtap_user_fini(state);
742 741 cv_destroy(&state->bt_open.bo_exit_cv);
743 742 mutex_destroy(&state->bt_open.bo_mutex);
744 743 (void) ddi_soft_state_free(xpvtap_statep, state->bt_instance);
745 744 }
746 745
747 746
748 747 /*
749 748 * xpvtap_intr()
750 749 * this routine will be called when we have a request on the guest ring.
751 750 */
752 751 static uint_t
753 752 xpvtap_intr(caddr_t arg)
754 753 {
755 754 xpvtap_state_t *state;
756 755
757 756
758 757 state = (xpvtap_state_t *)arg;
759 758
760 759 /* wake thread, thread handles guest requests and user app responses */
761 760 mutex_enter(&state->bt_thread.ut_mutex);
762 761 state->bt_thread.ut_wake = B_TRUE;
763 762 cv_signal(&state->bt_thread.ut_wake_cv);
764 763 mutex_exit(&state->bt_thread.ut_mutex);
765 764
766 765 return (DDI_INTR_CLAIMED);
767 766 }
768 767
769 768
770 769 /*
771 770 * xpvtap_segmf_register()
772 771 */
773 772 static int
774 773 xpvtap_segmf_register(xpvtap_state_t *state)
775 774 {
776 775 struct seg *seg;
777 776 uint64_t pte_ma;
778 777 struct as *as;
779 778 caddr_t uaddr;
780 779 uint_t pgcnt;
781 780 int i;
782 781
783 782
784 783 as = state->bt_map.um_as;
785 784 pgcnt = btopr(state->bt_map.um_guest_size);
786 785 uaddr = state->bt_map.um_guest_pages;
787 786
788 787 if (pgcnt == 0) {
789 788 return (DDI_FAILURE);
790 789 }
791 790
792 791 AS_LOCK_ENTER(as, RW_READER);
793 792
794 793 seg = as_findseg(as, state->bt_map.um_guest_pages, 0);
795 794 if ((seg == NULL) || ((uaddr + state->bt_map.um_guest_size) >
796 795 (seg->s_base + seg->s_size))) {
797 796 AS_LOCK_EXIT(as);
798 797 return (DDI_FAILURE);
799 798 }
800 799
801 800 /*
802 801 * lock down the htables so the HAT can't steal them. Register the
803 802 * PTE MA's for each gref page with seg_mf so we can do user space
804 803 * gref mappings.
805 804 */
806 805 for (i = 0; i < pgcnt; i++) {
807 806 hat_prepare_mapping(as->a_hat, uaddr, &pte_ma);
808 807 hat_devload(as->a_hat, uaddr, PAGESIZE, (pfn_t)0,
809 808 PROT_READ | PROT_WRITE | PROT_USER | HAT_UNORDERED_OK,
810 809 HAT_LOAD_NOCONSIST | HAT_LOAD_LOCK);
811 810 hat_release_mapping(as->a_hat, uaddr);
812 811 segmf_add_gref_pte(seg, uaddr, pte_ma);
813 812 uaddr += PAGESIZE;
814 813 }
815 814
816 815 state->bt_map.um_registered = B_TRUE;
817 816
818 817 AS_LOCK_EXIT(as);
819 818
820 819 return (DDI_SUCCESS);
821 820 }
822 821
823 822
824 823 /*
825 824 * xpvtap_segmf_unregister()
826 825 * as_callback routine
827 826 */
828 827 /*ARGSUSED*/
829 828 static void
830 829 xpvtap_segmf_unregister(struct as *as, void *arg, uint_t event)
831 830 {
832 831 xpvtap_state_t *state;
833 832 caddr_t uaddr;
834 833 uint_t pgcnt;
835 834 int i;
836 835
837 836
838 837 state = (xpvtap_state_t *)arg;
839 838 if (!state->bt_map.um_registered) {
840 839 /* remove the callback (which is this routine) */
841 840 (void) as_delete_callback(as, arg);
842 841 return;
843 842 }
844 843
845 844 pgcnt = btopr(state->bt_map.um_guest_size);
846 845 uaddr = state->bt_map.um_guest_pages;
847 846
848 847 /* unmap any outstanding req's grefs */
849 848 xpvtap_rs_flush(state->bt_map.um_rs, xpvtap_user_request_unmap, state);
850 849
851 850 /* Unlock the gref pages */
852 851 for (i = 0; i < pgcnt; i++) {
853 852 AS_LOCK_ENTER(as, RW_WRITER);
854 853 hat_prepare_mapping(as->a_hat, uaddr, NULL);
855 854 hat_unload(as->a_hat, uaddr, PAGESIZE, HAT_UNLOAD_UNLOCK);
856 855 hat_release_mapping(as->a_hat, uaddr);
857 856 AS_LOCK_EXIT(as);
858 857 uaddr += PAGESIZE;
859 858 }
860 859
861 860 /* remove the callback (which is this routine) */
862 861 (void) as_delete_callback(as, arg);
863 862
864 863 state->bt_map.um_registered = B_FALSE;
865 864 }
866 865
867 866
868 867 /*
869 868 * xpvtap_user_init()
870 869 */
871 870 static int
872 871 xpvtap_user_init(xpvtap_state_t *state)
873 872 {
874 873 xpvtap_user_map_t *map;
875 874 int e;
876 875
877 876
878 877 map = &state->bt_map;
879 878
880 879 /* Setup the ring between the driver and user app */
881 880 e = xpvtap_user_ring_init(state);
882 881 if (e != DDI_SUCCESS) {
883 882 return (DDI_FAILURE);
884 883 }
885 884
886 885 /*
887 886 * the user ring can handle BLKIF_RING_SIZE outstanding requests. This
888 887 * is the same number of requests as the guest ring. Initialize the
889 888 * state we use to track request IDs to the user app. These IDs will
890 889 * also identify which group of gref pages correspond with the
891 890 * request.
892 891 */
893 892 xpvtap_rs_init(0, (BLKIF_RING_SIZE - 1), &map->um_rs);
894 893
895 894 /*
896 895 * allocate the space to store a copy of each outstanding requests. We
897 896 * will need to reference the ID and the number of segments when we
898 897 * get the response from the user app.
899 898 */
900 899 map->um_outstanding_reqs = kmem_zalloc(
901 900 sizeof (*map->um_outstanding_reqs) * BLKIF_RING_SIZE,
902 901 KM_SLEEP);
903 902
904 903 /*
905 904 * initialize the thread we use to process guest requests and user
906 905 * responses.
907 906 */
908 907 e = xpvtap_user_thread_init(state);
909 908 if (e != DDI_SUCCESS) {
910 909 goto userinitfail_user_thread_init;
911 910 }
912 911
913 912 return (DDI_SUCCESS);
914 913
915 914 userinitfail_user_thread_init:
916 915 xpvtap_rs_fini(&map->um_rs);
917 916 kmem_free(map->um_outstanding_reqs,
918 917 sizeof (*map->um_outstanding_reqs) * BLKIF_RING_SIZE);
919 918 xpvtap_user_ring_fini(state);
920 919 return (DDI_FAILURE);
921 920 }
922 921
923 922
924 923 /*
925 924 * xpvtap_user_ring_init()
926 925 */
927 926 static int
928 927 xpvtap_user_ring_init(xpvtap_state_t *state)
929 928 {
930 929 xpvtap_user_ring_t *usring;
931 930
932 931
933 932 usring = &state->bt_user_ring;
934 933
935 934 /* alocate and initialize the page for the shared user ring */
936 935 usring->ur_sring = (blkif_sring_t *)ddi_umem_alloc(PAGESIZE,
937 936 DDI_UMEM_SLEEP, &usring->ur_cookie);
938 937 SHARED_RING_INIT(usring->ur_sring);
939 938 FRONT_RING_INIT(&usring->ur_ring, usring->ur_sring, PAGESIZE);
940 939 usring->ur_prod_polled = 0;
941 940
942 941 return (DDI_SUCCESS);
943 942 }
944 943
945 944
946 945 /*
947 946 * xpvtap_user_thread_init()
948 947 */
949 948 static int
950 949 xpvtap_user_thread_init(xpvtap_state_t *state)
951 950 {
952 951 xpvtap_user_thread_t *thread;
953 952 char taskqname[32];
954 953
955 954
956 955 thread = &state->bt_thread;
957 956
958 957 mutex_init(&thread->ut_mutex, NULL, MUTEX_DRIVER, NULL);
959 958 cv_init(&thread->ut_wake_cv, NULL, CV_DRIVER, NULL);
960 959 cv_init(&thread->ut_exit_done_cv, NULL, CV_DRIVER, NULL);
961 960 thread->ut_wake = B_FALSE;
962 961 thread->ut_exit = B_FALSE;
963 962 thread->ut_exit_done = B_TRUE;
964 963
965 964 /* create but don't start the user thread */
966 965 (void) sprintf(taskqname, "xvptap_%d", state->bt_instance);
967 966 thread->ut_taskq = ddi_taskq_create(state->bt_dip, taskqname, 1,
968 967 TASKQ_DEFAULTPRI, 0);
969 968 if (thread->ut_taskq == NULL) {
970 969 goto userinitthrfail_taskq_create;
971 970 }
972 971
973 972 return (DDI_SUCCESS);
974 973
975 974 userinitthrfail_taskq_dispatch:
976 975 ddi_taskq_destroy(thread->ut_taskq);
977 976 userinitthrfail_taskq_create:
978 977 cv_destroy(&thread->ut_exit_done_cv);
979 978 cv_destroy(&thread->ut_wake_cv);
980 979 mutex_destroy(&thread->ut_mutex);
981 980
982 981 return (DDI_FAILURE);
983 982 }
984 983
985 984
986 985 /*
987 986 * xpvtap_user_thread_start()
988 987 */
989 988 static void
990 989 xpvtap_user_thread_start(caddr_t arg)
991 990 {
992 991 xpvtap_user_thread_t *thread;
993 992 xpvtap_state_t *state;
994 993 int e;
995 994
996 995
997 996 state = (xpvtap_state_t *)arg;
998 997 thread = &state->bt_thread;
999 998
1000 999 /* start the user thread */
1001 1000 thread->ut_exit_done = B_FALSE;
1002 1001 e = ddi_taskq_dispatch(thread->ut_taskq, xpvtap_user_thread, state,
1003 1002 DDI_SLEEP);
1004 1003 if (e != DDI_SUCCESS) {
1005 1004 thread->ut_exit_done = B_TRUE;
1006 1005 cmn_err(CE_WARN, "Unable to start user thread\n");
1007 1006 }
1008 1007 }
1009 1008
1010 1009
1011 1010 /*
1012 1011 * xpvtap_user_thread_stop()
1013 1012 */
1014 1013 static void
1015 1014 xpvtap_user_thread_stop(xpvtap_state_t *state)
1016 1015 {
1017 1016 /* wake thread so it can exit */
1018 1017 mutex_enter(&state->bt_thread.ut_mutex);
1019 1018 state->bt_thread.ut_wake = B_TRUE;
1020 1019 state->bt_thread.ut_exit = B_TRUE;
1021 1020 cv_signal(&state->bt_thread.ut_wake_cv);
1022 1021 if (!state->bt_thread.ut_exit_done) {
1023 1022 cv_wait(&state->bt_thread.ut_exit_done_cv,
1024 1023 &state->bt_thread.ut_mutex);
1025 1024 }
1026 1025 mutex_exit(&state->bt_thread.ut_mutex);
1027 1026 ASSERT(state->bt_thread.ut_exit_done);
1028 1027 }
1029 1028
1030 1029
1031 1030 /*
1032 1031 * xpvtap_user_fini()
1033 1032 */
1034 1033 static void
1035 1034 xpvtap_user_fini(xpvtap_state_t *state)
1036 1035 {
1037 1036 xpvtap_user_map_t *map;
1038 1037
1039 1038
1040 1039 map = &state->bt_map;
1041 1040
1042 1041 xpvtap_user_thread_fini(state);
1043 1042 xpvtap_rs_fini(&map->um_rs);
1044 1043 kmem_free(map->um_outstanding_reqs,
1045 1044 sizeof (*map->um_outstanding_reqs) * BLKIF_RING_SIZE);
1046 1045 xpvtap_user_ring_fini(state);
1047 1046 }
1048 1047
1049 1048
1050 1049 /*
1051 1050 * xpvtap_user_ring_fini()
1052 1051 */
1053 1052 static void
1054 1053 xpvtap_user_ring_fini(xpvtap_state_t *state)
1055 1054 {
1056 1055 ddi_umem_free(state->bt_user_ring.ur_cookie);
1057 1056 }
1058 1057
1059 1058
1060 1059 /*
1061 1060 * xpvtap_user_thread_fini()
1062 1061 */
1063 1062 static void
1064 1063 xpvtap_user_thread_fini(xpvtap_state_t *state)
1065 1064 {
1066 1065 ddi_taskq_destroy(state->bt_thread.ut_taskq);
1067 1066 cv_destroy(&state->bt_thread.ut_exit_done_cv);
1068 1067 cv_destroy(&state->bt_thread.ut_wake_cv);
1069 1068 mutex_destroy(&state->bt_thread.ut_mutex);
1070 1069 }
1071 1070
1072 1071
1073 1072 /*
1074 1073 * xpvtap_user_thread()
1075 1074 */
1076 1075 static void
1077 1076 xpvtap_user_thread(void *arg)
1078 1077 {
1079 1078 xpvtap_user_thread_t *thread;
1080 1079 blkif_response_t resp;
1081 1080 xpvtap_state_t *state;
1082 1081 blkif_request_t req;
1083 1082 boolean_t b;
1084 1083 uint_t uid;
1085 1084 int e;
1086 1085
1087 1086
1088 1087 state = (xpvtap_state_t *)arg;
1089 1088 thread = &state->bt_thread;
1090 1089
1091 1090 xpvtap_thread_start:
1092 1091 /* See if we are supposed to exit */
1093 1092 mutex_enter(&thread->ut_mutex);
1094 1093 if (thread->ut_exit) {
1095 1094 thread->ut_exit_done = B_TRUE;
1096 1095 cv_signal(&state->bt_thread.ut_exit_done_cv);
1097 1096 mutex_exit(&thread->ut_mutex);
1098 1097 return;
1099 1098 }
1100 1099
1101 1100 /*
1102 1101 * if we aren't supposed to be awake, wait until someone wakes us.
1103 1102 * when we wake up, check for a kill or someone telling us to exit.
1104 1103 */
1105 1104 if (!thread->ut_wake) {
1106 1105 e = cv_wait_sig(&thread->ut_wake_cv, &thread->ut_mutex);
1107 1106 if ((e == 0) || (thread->ut_exit)) {
1108 1107 thread->ut_exit = B_TRUE;
1109 1108 mutex_exit(&thread->ut_mutex);
1110 1109 goto xpvtap_thread_start;
1111 1110 }
1112 1111 }
1113 1112
1114 1113 /* if someone didn't wake us, go back to the start of the thread */
1115 1114 if (!thread->ut_wake) {
1116 1115 mutex_exit(&thread->ut_mutex);
1117 1116 goto xpvtap_thread_start;
1118 1117 }
1119 1118
1120 1119 /* we are awake */
1121 1120 thread->ut_wake = B_FALSE;
1122 1121 mutex_exit(&thread->ut_mutex);
1123 1122
1124 1123 /* process requests from the guest */
1125 1124 do {
1126 1125 /*
1127 1126 * check for requests from the guest. if we don't have any,
1128 1127 * break out of the loop.
1129 1128 */
1130 1129 e = blk_ring_request_get(state->bt_guest_ring, &req);
1131 1130 if (e == B_FALSE) {
1132 1131 break;
1133 1132 }
1134 1133
1135 1134 /* we got a request, map the grefs into the user app's VA */
1136 1135 e = xpvtap_user_request_map(state, &req, &uid);
1137 1136 if (e != DDI_SUCCESS) {
1138 1137 /*
1139 1138 * If we couldn't map the request (e.g. user app hasn't
1140 1139 * opened the device yet), requeue it and try again
1141 1140 * later
1142 1141 */
1143 1142 blk_ring_request_requeue(state->bt_guest_ring);
1144 1143 break;
1145 1144 }
1146 1145
1147 1146 /* push the request to the user app */
1148 1147 e = xpvtap_user_request_push(state, &req, uid);
1149 1148 if (e != DDI_SUCCESS) {
1150 1149 resp.id = req.id;
1151 1150 resp.operation = req.operation;
1152 1151 resp.status = BLKIF_RSP_ERROR;
1153 1152 blk_ring_response_put(state->bt_guest_ring, &resp);
1154 1153 }
1155 1154 } while (!thread->ut_exit);
1156 1155
1157 1156 /* process reponses from the user app */
1158 1157 do {
1159 1158 /*
1160 1159 * check for responses from the user app. if we don't have any,
1161 1160 * break out of the loop.
1162 1161 */
1163 1162 b = xpvtap_user_response_get(state, &resp, &uid);
1164 1163 if (b != B_TRUE) {
1165 1164 break;
1166 1165 }
1167 1166
1168 1167 /*
1169 1168 * if we got a response, unmap the grefs from the matching
1170 1169 * request.
1171 1170 */
1172 1171 xpvtap_user_request_unmap(state, uid);
1173 1172
1174 1173 /* push the response to the guest */
1175 1174 blk_ring_response_put(state->bt_guest_ring, &resp);
1176 1175 } while (!thread->ut_exit);
1177 1176
1178 1177 goto xpvtap_thread_start;
1179 1178 }
1180 1179
1181 1180
1182 1181 /*
1183 1182 * xpvtap_user_request_map()
1184 1183 */
1185 1184 static int
1186 1185 xpvtap_user_request_map(xpvtap_state_t *state, blkif_request_t *req,
1187 1186 uint_t *uid)
1188 1187 {
1189 1188 grant_ref_t gref[BLKIF_MAX_SEGMENTS_PER_REQUEST];
1190 1189 struct seg *seg;
1191 1190 struct as *as;
1192 1191 domid_t domid;
1193 1192 caddr_t uaddr;
1194 1193 uint_t flags;
1195 1194 int i;
1196 1195 int e;
1197 1196
1198 1197
1199 1198 domid = xvdi_get_oeid(state->bt_dip);
1200 1199
1201 1200 as = state->bt_map.um_as;
1202 1201 if ((as == NULL) || (state->bt_map.um_guest_pages == NULL)) {
1203 1202 return (DDI_FAILURE);
1204 1203 }
1205 1204
1206 1205 /* has to happen after segmap returns */
1207 1206 if (!state->bt_map.um_registered) {
1208 1207 /* register the pte's with segmf */
1209 1208 e = xpvtap_segmf_register(state);
1210 1209 if (e != DDI_SUCCESS) {
1211 1210 return (DDI_FAILURE);
1212 1211 }
1213 1212 }
1214 1213
1215 1214 /* alloc an ID for the user ring */
1216 1215 e = xpvtap_rs_alloc(state->bt_map.um_rs, uid);
1217 1216 if (e != DDI_SUCCESS) {
1218 1217 return (DDI_FAILURE);
1219 1218 }
1220 1219
1221 1220 /* if we don't have any segments to map, we're done */
1222 1221 if ((req->operation == BLKIF_OP_WRITE_BARRIER) ||
1223 1222 (req->operation == BLKIF_OP_FLUSH_DISKCACHE) ||
1224 1223 (req->nr_segments == 0)) {
1225 1224 return (DDI_SUCCESS);
1226 1225 }
1227 1226
1228 1227 /* get the apps gref address */
1229 1228 uaddr = XPVTAP_GREF_REQADDR(state->bt_map.um_guest_pages, *uid);
1230 1229
1231 1230 AS_LOCK_ENTER(as, RW_READER);
1232 1231 seg = as_findseg(as, state->bt_map.um_guest_pages, 0);
1233 1232 if ((seg == NULL) || ((uaddr + mmu_ptob(req->nr_segments)) >
1234 1233 (seg->s_base + seg->s_size))) {
1235 1234 AS_LOCK_EXIT(as);
1236 1235 return (DDI_FAILURE);
1237 1236 }
1238 1237
1239 1238 /* if we are reading from disk, we are writing into memory */
1240 1239 flags = 0;
1241 1240 if (req->operation == BLKIF_OP_READ) {
1242 1241 flags |= SEGMF_GREF_WR;
1243 1242 }
1244 1243
1245 1244 /* Load the grefs into seg_mf */
1246 1245 for (i = 0; i < req->nr_segments; i++) {
1247 1246 gref[i] = req->seg[i].gref;
1248 1247 }
1249 1248 (void) segmf_add_grefs(seg, uaddr, flags, gref, req->nr_segments,
1250 1249 domid);
1251 1250
1252 1251 AS_LOCK_EXIT(as);
1253 1252
1254 1253 return (DDI_SUCCESS);
1255 1254 }
1256 1255
1257 1256
1258 1257 /*
1259 1258 * xpvtap_user_request_push()
1260 1259 */
1261 1260 static int
1262 1261 xpvtap_user_request_push(xpvtap_state_t *state, blkif_request_t *req,
1263 1262 uint_t uid)
1264 1263 {
1265 1264 blkif_request_t *outstanding_req;
1266 1265 blkif_front_ring_t *uring;
1267 1266 blkif_request_t *target;
1268 1267 xpvtap_user_map_t *map;
1269 1268
1270 1269
1271 1270 uring = &state->bt_user_ring.ur_ring;
1272 1271 map = &state->bt_map;
1273 1272
1274 1273 target = RING_GET_REQUEST(uring, uring->req_prod_pvt);
1275 1274
1276 1275 /*
1277 1276 * Save request from the frontend. used for ID mapping and unmap
1278 1277 * on response/cleanup
1279 1278 */
1280 1279 outstanding_req = &map->um_outstanding_reqs[uid];
1281 1280 bcopy(req, outstanding_req, sizeof (*outstanding_req));
1282 1281
1283 1282 /* put the request on the user ring */
1284 1283 bcopy(req, target, sizeof (*req));
1285 1284 target->id = (uint64_t)uid;
1286 1285 uring->req_prod_pvt++;
1287 1286
1288 1287 pollwakeup(&state->bt_pollhead, POLLIN | POLLRDNORM);
1289 1288
1290 1289 return (DDI_SUCCESS);
1291 1290 }
1292 1291
1293 1292
1294 1293 static void
1295 1294 xpvtap_user_request_unmap(xpvtap_state_t *state, uint_t uid)
1296 1295 {
1297 1296 blkif_request_t *req;
1298 1297 struct seg *seg;
1299 1298 struct as *as;
1300 1299 caddr_t uaddr;
1301 1300 int e;
1302 1301
1303 1302
1304 1303 as = state->bt_map.um_as;
1305 1304 if (as == NULL) {
1306 1305 return;
1307 1306 }
1308 1307
1309 1308 /* get a copy of the original request */
1310 1309 req = &state->bt_map.um_outstanding_reqs[uid];
1311 1310
1312 1311 /* unmap the grefs for this request */
1313 1312 if ((req->operation != BLKIF_OP_WRITE_BARRIER) &&
1314 1313 (req->operation != BLKIF_OP_FLUSH_DISKCACHE) &&
1315 1314 (req->nr_segments != 0)) {
1316 1315 uaddr = XPVTAP_GREF_REQADDR(state->bt_map.um_guest_pages, uid);
1317 1316 AS_LOCK_ENTER(as, RW_READER);
1318 1317 seg = as_findseg(as, state->bt_map.um_guest_pages, 0);
1319 1318 if ((seg == NULL) || ((uaddr + mmu_ptob(req->nr_segments)) >
1320 1319 (seg->s_base + seg->s_size))) {
1321 1320 AS_LOCK_EXIT(as);
1322 1321 xpvtap_rs_free(state->bt_map.um_rs, uid);
1323 1322 return;
1324 1323 }
1325 1324
1326 1325 e = segmf_release_grefs(seg, uaddr, req->nr_segments);
1327 1326 if (e != 0) {
1328 1327 cmn_err(CE_WARN, "unable to release grefs");
1329 1328 }
1330 1329
1331 1330 AS_LOCK_EXIT(as);
1332 1331 }
1333 1332
1334 1333 /* free up the user ring id */
1335 1334 xpvtap_rs_free(state->bt_map.um_rs, uid);
1336 1335 }
1337 1336
1338 1337
1339 1338 static int
1340 1339 xpvtap_user_response_get(xpvtap_state_t *state, blkif_response_t *resp,
1341 1340 uint_t *uid)
1342 1341 {
1343 1342 blkif_front_ring_t *uring;
1344 1343 blkif_response_t *target;
1345 1344
1346 1345
1347 1346 uring = &state->bt_user_ring.ur_ring;
1348 1347
1349 1348 if (!RING_HAS_UNCONSUMED_RESPONSES(uring)) {
1350 1349 return (B_FALSE);
1351 1350 }
1352 1351
1353 1352 target = NULL;
1354 1353 target = RING_GET_RESPONSE(uring, uring->rsp_cons);
1355 1354 if (target == NULL) {
1356 1355 return (B_FALSE);
1357 1356 }
1358 1357
1359 1358 /* copy out the user app response */
1360 1359 bcopy(target, resp, sizeof (*resp));
1361 1360 uring->rsp_cons++;
1362 1361
1363 1362 /* restore the quests id from the original request */
1364 1363 *uid = (uint_t)resp->id;
1365 1364 resp->id = state->bt_map.um_outstanding_reqs[*uid].id;
1366 1365
1367 1366 return (B_TRUE);
1368 1367 }
1369 1368
1370 1369
1371 1370 /*
1372 1371 * xpvtap_user_app_stop()
1373 1372 */
1374 1373 static void xpvtap_user_app_stop(caddr_t arg)
1375 1374 {
1376 1375 xpvtap_state_t *state;
1377 1376 clock_t rc;
1378 1377
1379 1378 state = (xpvtap_state_t *)arg;
1380 1379
1381 1380 /*
1382 1381 * Give the app 10 secs to exit. If it doesn't exit, it's not a serious
1383 1382 * problem, we just won't auto-detach the driver.
1384 1383 */
1385 1384 mutex_enter(&state->bt_open.bo_mutex);
1386 1385 if (state->bt_open.bo_opened) {
1387 1386 rc = cv_reltimedwait(&state->bt_open.bo_exit_cv,
1388 1387 &state->bt_open.bo_mutex, drv_usectohz(10000000),
1389 1388 TR_CLOCK_TICK);
1390 1389 if (rc <= 0) {
1391 1390 cmn_err(CE_NOTE, "!user process still has driver open, "
1392 1391 "deferring detach\n");
1393 1392 }
1394 1393 }
1395 1394 mutex_exit(&state->bt_open.bo_mutex);
1396 1395 }
1397 1396
1398 1397
1399 1398 /*
1400 1399 * xpvtap_rs_init()
1401 1400 * Initialize the resource structure. init() returns a handle to be used
1402 1401 * for the rest of the resource functions. This code is written assuming
1403 1402 * that min_val will be close to 0. Therefore, we will allocate the free
1404 1403 * buffer only taking max_val into account.
1405 1404 */
1406 1405 static void
1407 1406 xpvtap_rs_init(uint_t min_val, uint_t max_val, xpvtap_rs_hdl_t *handle)
1408 1407 {
1409 1408 xpvtap_rs_t *rstruct;
1410 1409 uint_t array_size;
1411 1410 uint_t index;
1412 1411
1413 1412
1414 1413 ASSERT(handle != NULL);
1415 1414 ASSERT(min_val < max_val);
1416 1415
1417 1416 /* alloc space for resource structure */
1418 1417 rstruct = kmem_alloc(sizeof (xpvtap_rs_t), KM_SLEEP);
1419 1418
1420 1419 /*
1421 1420 * Test to see if the max value is 64-bit aligned. If so, we don't need
1422 1421 * to allocate an extra 64-bit word. alloc space for free buffer
1423 1422 * (8 bytes per uint64_t).
1424 1423 */
1425 1424 if ((max_val & 0x3F) == 0) {
1426 1425 rstruct->rs_free_size = (max_val >> 6) * 8;
1427 1426 } else {
1428 1427 rstruct->rs_free_size = ((max_val >> 6) + 1) * 8;
1429 1428 }
1430 1429 rstruct->rs_free = kmem_alloc(rstruct->rs_free_size, KM_SLEEP);
1431 1430
1432 1431 /* Initialize resource structure */
1433 1432 rstruct->rs_min = min_val;
1434 1433 rstruct->rs_last = min_val;
1435 1434 rstruct->rs_max = max_val;
1436 1435 mutex_init(&rstruct->rs_mutex, NULL, MUTEX_DRIVER, NULL);
1437 1436 rstruct->rs_flushing = B_FALSE;
1438 1437
1439 1438 /* Mark all resources as free */
1440 1439 array_size = rstruct->rs_free_size >> 3;
1441 1440 for (index = 0; index < array_size; index++) {
1442 1441 rstruct->rs_free[index] = (uint64_t)0xFFFFFFFFFFFFFFFF;
1443 1442 }
1444 1443
1445 1444 /* setup handle which is returned from this function */
1446 1445 *handle = rstruct;
1447 1446 }
1448 1447
1449 1448
1450 1449 /*
1451 1450 * xpvtap_rs_fini()
1452 1451 * Frees up the space allocated in init(). Notice that a pointer to the
1453 1452 * handle is used for the parameter. fini() will set the handle to NULL
1454 1453 * before returning.
1455 1454 */
1456 1455 static void
1457 1456 xpvtap_rs_fini(xpvtap_rs_hdl_t *handle)
1458 1457 {
1459 1458 xpvtap_rs_t *rstruct;
1460 1459
1461 1460
1462 1461 ASSERT(handle != NULL);
1463 1462
1464 1463 rstruct = (xpvtap_rs_t *)*handle;
1465 1464
1466 1465 mutex_destroy(&rstruct->rs_mutex);
1467 1466 kmem_free(rstruct->rs_free, rstruct->rs_free_size);
1468 1467 kmem_free(rstruct, sizeof (xpvtap_rs_t));
1469 1468
1470 1469 /* set handle to null. This helps catch bugs. */
1471 1470 *handle = NULL;
1472 1471 }
1473 1472
1474 1473
1475 1474 /*
1476 1475 * xpvtap_rs_alloc()
1477 1476 * alloc a resource. If alloc fails, we are out of resources.
1478 1477 */
1479 1478 static int
1480 1479 xpvtap_rs_alloc(xpvtap_rs_hdl_t handle, uint_t *resource)
1481 1480 {
1482 1481 xpvtap_rs_t *rstruct;
1483 1482 uint_t array_idx;
1484 1483 uint64_t free;
1485 1484 uint_t index;
1486 1485 uint_t last;
1487 1486 uint_t min;
1488 1487 uint_t max;
1489 1488
1490 1489
1491 1490 ASSERT(handle != NULL);
1492 1491 ASSERT(resource != NULL);
1493 1492
1494 1493 rstruct = (xpvtap_rs_t *)handle;
1495 1494
1496 1495 mutex_enter(&rstruct->rs_mutex);
1497 1496 min = rstruct->rs_min;
1498 1497 max = rstruct->rs_max;
1499 1498
1500 1499 /*
1501 1500 * Find a free resource. This will return out of the loop once it finds
1502 1501 * a free resource. There are a total of 'max'-'min'+1 resources.
1503 1502 * Performs a round robin allocation.
1504 1503 */
1505 1504 for (index = min; index <= max; index++) {
1506 1505
1507 1506 array_idx = rstruct->rs_last >> 6;
1508 1507 free = rstruct->rs_free[array_idx];
1509 1508 last = rstruct->rs_last & 0x3F;
1510 1509
1511 1510 /* if the next resource to check is free */
1512 1511 if ((free & ((uint64_t)1 << last)) != 0) {
1513 1512 /* we are using this resource */
1514 1513 *resource = rstruct->rs_last;
1515 1514
1516 1515 /* take it out of the free list */
1517 1516 rstruct->rs_free[array_idx] &= ~((uint64_t)1 << last);
1518 1517
1519 1518 /*
1520 1519 * increment the last count so we start checking the
1521 1520 * next resource on the next alloc(). Note the rollover
1522 1521 * at 'max'+1.
1523 1522 */
1524 1523 rstruct->rs_last++;
1525 1524 if (rstruct->rs_last > max) {
1526 1525 rstruct->rs_last = rstruct->rs_min;
1527 1526 }
1528 1527
1529 1528 /* unlock the resource structure */
1530 1529 mutex_exit(&rstruct->rs_mutex);
1531 1530
1532 1531 return (DDI_SUCCESS);
1533 1532 }
1534 1533
1535 1534 /*
1536 1535 * This resource is not free, lets go to the next one. Note the
1537 1536 * rollover at 'max'.
1538 1537 */
1539 1538 rstruct->rs_last++;
1540 1539 if (rstruct->rs_last > max) {
1541 1540 rstruct->rs_last = rstruct->rs_min;
1542 1541 }
1543 1542 }
1544 1543
1545 1544 mutex_exit(&rstruct->rs_mutex);
1546 1545
1547 1546 return (DDI_FAILURE);
1548 1547 }
1549 1548
1550 1549
1551 1550 /*
1552 1551 * xpvtap_rs_free()
1553 1552 * Free the previously alloc'd resource. Once a resource has been free'd,
1554 1553 * it can be used again when alloc is called.
1555 1554 */
1556 1555 static void
1557 1556 xpvtap_rs_free(xpvtap_rs_hdl_t handle, uint_t resource)
1558 1557 {
1559 1558 xpvtap_rs_t *rstruct;
1560 1559 uint_t array_idx;
1561 1560 uint_t offset;
1562 1561
1563 1562
1564 1563 ASSERT(handle != NULL);
1565 1564
1566 1565 rstruct = (xpvtap_rs_t *)handle;
1567 1566 ASSERT(resource >= rstruct->rs_min);
1568 1567 ASSERT(resource <= rstruct->rs_max);
1569 1568
1570 1569 if (!rstruct->rs_flushing) {
1571 1570 mutex_enter(&rstruct->rs_mutex);
1572 1571 }
1573 1572
1574 1573 /* Put the resource back in the free list */
1575 1574 array_idx = resource >> 6;
1576 1575 offset = resource & 0x3F;
1577 1576 rstruct->rs_free[array_idx] |= ((uint64_t)1 << offset);
1578 1577
1579 1578 if (!rstruct->rs_flushing) {
1580 1579 mutex_exit(&rstruct->rs_mutex);
1581 1580 }
1582 1581 }
1583 1582
1584 1583
1585 1584 /*
1586 1585 * xpvtap_rs_flush()
1587 1586 */
1588 1587 static void
1589 1588 xpvtap_rs_flush(xpvtap_rs_hdl_t handle, xpvtap_rs_cleanup_t callback,
1590 1589 void *arg)
1591 1590 {
1592 1591 xpvtap_rs_t *rstruct;
1593 1592 uint_t array_idx;
1594 1593 uint64_t free;
1595 1594 uint_t index;
1596 1595 uint_t last;
1597 1596 uint_t min;
1598 1597 uint_t max;
1599 1598
1600 1599
1601 1600 ASSERT(handle != NULL);
1602 1601
1603 1602 rstruct = (xpvtap_rs_t *)handle;
1604 1603
1605 1604 mutex_enter(&rstruct->rs_mutex);
1606 1605 min = rstruct->rs_min;
1607 1606 max = rstruct->rs_max;
1608 1607
1609 1608 rstruct->rs_flushing = B_TRUE;
1610 1609
1611 1610 /*
1612 1611 * for all resources not free, call the callback routine to clean it
1613 1612 * up.
1614 1613 */
1615 1614 for (index = min; index <= max; index++) {
1616 1615
1617 1616 array_idx = rstruct->rs_last >> 6;
1618 1617 free = rstruct->rs_free[array_idx];
1619 1618 last = rstruct->rs_last & 0x3F;
1620 1619
1621 1620 /* if the next resource to check is not free */
1622 1621 if ((free & ((uint64_t)1 << last)) == 0) {
1623 1622 /* call the callback to cleanup */
1624 1623 (*callback)(arg, rstruct->rs_last);
1625 1624
1626 1625 /* put it back in the free list */
1627 1626 rstruct->rs_free[array_idx] |= ((uint64_t)1 << last);
1628 1627 }
1629 1628
1630 1629 /* go to the next one. Note the rollover at 'max' */
1631 1630 rstruct->rs_last++;
1632 1631 if (rstruct->rs_last > max) {
1633 1632 rstruct->rs_last = rstruct->rs_min;
1634 1633 }
1635 1634 }
1636 1635
1637 1636 mutex_exit(&rstruct->rs_mutex);
1638 1637 }
↓ open down ↓ |
1514 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX