1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 28 #include <sys/errno.h> 29 #include <sys/types.h> 30 #include <sys/conf.h> 31 #include <sys/kmem.h> 32 #include <sys/ddi.h> 33 #include <sys/stat.h> 34 #include <sys/sunddi.h> 35 #include <sys/file.h> 36 #include <sys/open.h> 37 #include <sys/modctl.h> 38 #include <sys/ddi_impldefs.h> 39 #include <sys/sysmacros.h> 40 #include <sys/ddidevmap.h> 41 #include <sys/policy.h> 42 43 #include <sys/vmsystm.h> 44 #include <vm/hat_i86.h> 45 #include <vm/hat_pte.h> 46 #include <vm/seg_kmem.h> 47 #include <vm/seg_mf.h> 48 49 #include <xen/io/blkif_impl.h> 50 #include <xen/io/blk_common.h> 51 #include <xen/io/xpvtap.h> 52 53 54 static int xpvtap_open(dev_t *devp, int flag, int otyp, cred_t *cred); 55 static int xpvtap_close(dev_t devp, int flag, int otyp, cred_t *cred); 56 static int xpvtap_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, 57 cred_t *cred, int *rval); 58 static int xpvtap_devmap(dev_t dev, devmap_cookie_t dhp, offset_t off, 59 size_t len, size_t *maplen, uint_t model); 60 static int xpvtap_segmap(dev_t dev, off_t off, struct as *asp, caddr_t *addrp, 61 off_t len, unsigned int prot, unsigned int maxprot, unsigned int flags, 62 cred_t *cred_p); 63 static int xpvtap_chpoll(dev_t dev, short events, int anyyet, short *reventsp, 64 struct pollhead **phpp); 65 66 static struct cb_ops xpvtap_cb_ops = { 67 xpvtap_open, /* cb_open */ 68 xpvtap_close, /* cb_close */ 69 nodev, /* cb_strategy */ 70 nodev, /* cb_print */ 71 nodev, /* cb_dump */ 72 nodev, /* cb_read */ 73 nodev, /* cb_write */ 74 xpvtap_ioctl, /* cb_ioctl */ 75 xpvtap_devmap, /* cb_devmap */ 76 nodev, /* cb_mmap */ 77 xpvtap_segmap, /* cb_segmap */ 78 xpvtap_chpoll, /* cb_chpoll */ 79 ddi_prop_op, /* cb_prop_op */ 80 NULL, /* cb_stream */ 81 D_NEW | D_MP | D_64BIT | D_DEVMAP, /* cb_flag */ 82 CB_REV 83 }; 84 85 static int xpvtap_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, 86 void **result); 87 static int xpvtap_attach(dev_info_t *devi, ddi_attach_cmd_t cmd); 88 static int xpvtap_detach(dev_info_t *devi, ddi_detach_cmd_t cmd); 89 90 static struct dev_ops xpvtap_dev_ops = { 91 DEVO_REV, /* devo_rev */ 92 0, /* devo_refcnt */ 93 xpvtap_getinfo, /* devo_getinfo */ 94 nulldev, /* devo_identify */ 95 nulldev, /* devo_probe */ 96 xpvtap_attach, /* devo_attach */ 97 xpvtap_detach, /* devo_detach */ 98 nodev, /* devo_reset */ 99 &xpvtap_cb_ops, /* devo_cb_ops */ 100 NULL, /* devo_bus_ops */ 101 NULL /* power */ 102 }; 103 104 105 static struct modldrv xpvtap_modldrv = { 106 &mod_driverops, /* Type of module. This one is a driver */ 107 "xpvtap driver", /* Name of the module. */ 108 &xpvtap_dev_ops, /* driver ops */ 109 }; 110 111 static struct modlinkage xpvtap_modlinkage = { 112 MODREV_1, 113 { (void *) &xpvtap_modldrv, NULL } 114 }; 115 116 117 void *xpvtap_statep; 118 119 120 static xpvtap_state_t *xpvtap_drv_init(int instance); 121 static void xpvtap_drv_fini(xpvtap_state_t *state); 122 static uint_t xpvtap_intr(caddr_t arg); 123 124 typedef void (*xpvtap_rs_cleanup_t)(xpvtap_state_t *state, uint_t rs); 125 static void xpvtap_rs_init(uint_t min_val, uint_t max_val, 126 xpvtap_rs_hdl_t *handle); 127 static void xpvtap_rs_fini(xpvtap_rs_hdl_t *handle); 128 static int xpvtap_rs_alloc(xpvtap_rs_hdl_t handle, uint_t *rs); 129 static void xpvtap_rs_free(xpvtap_rs_hdl_t handle, uint_t rs); 130 static void xpvtap_rs_flush(xpvtap_rs_hdl_t handle, 131 xpvtap_rs_cleanup_t callback, void *arg); 132 133 static int xpvtap_segmf_register(xpvtap_state_t *state); 134 static void xpvtap_segmf_unregister(struct as *as, void *arg, uint_t event); 135 136 static int xpvtap_user_init(xpvtap_state_t *state); 137 static void xpvtap_user_fini(xpvtap_state_t *state); 138 static int xpvtap_user_ring_init(xpvtap_state_t *state); 139 static void xpvtap_user_ring_fini(xpvtap_state_t *state); 140 static int xpvtap_user_thread_init(xpvtap_state_t *state); 141 static void xpvtap_user_thread_fini(xpvtap_state_t *state); 142 static void xpvtap_user_thread_start(caddr_t arg); 143 static void xpvtap_user_thread_stop(xpvtap_state_t *state); 144 static void xpvtap_user_thread(void *arg); 145 146 static void xpvtap_user_app_stop(caddr_t arg); 147 148 static int xpvtap_user_request_map(xpvtap_state_t *state, blkif_request_t *req, 149 uint_t *uid); 150 static int xpvtap_user_request_push(xpvtap_state_t *state, 151 blkif_request_t *req, uint_t uid); 152 static int xpvtap_user_response_get(xpvtap_state_t *state, 153 blkif_response_t *resp, uint_t *uid); 154 static void xpvtap_user_request_unmap(xpvtap_state_t *state, uint_t uid); 155 156 157 /* 158 * _init() 159 */ 160 int 161 _init(void) 162 { 163 int e; 164 165 e = ddi_soft_state_init(&xpvtap_statep, sizeof (xpvtap_state_t), 1); 166 if (e != 0) { 167 return (e); 168 } 169 170 e = mod_install(&xpvtap_modlinkage); 171 if (e != 0) { 172 ddi_soft_state_fini(&xpvtap_statep); 173 return (e); 174 } 175 176 return (0); 177 } 178 179 180 /* 181 * _info() 182 */ 183 int 184 _info(struct modinfo *modinfop) 185 { 186 return (mod_info(&xpvtap_modlinkage, modinfop)); 187 } 188 189 190 /* 191 * _fini() 192 */ 193 int 194 _fini(void) 195 { 196 int e; 197 198 e = mod_remove(&xpvtap_modlinkage); 199 if (e != 0) { 200 return (e); 201 } 202 203 ddi_soft_state_fini(&xpvtap_statep); 204 205 return (0); 206 } 207 208 209 /* 210 * xpvtap_attach() 211 */ 212 static int 213 xpvtap_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) 214 { 215 blk_ringinit_args_t args; 216 xpvtap_state_t *state; 217 int instance; 218 int e; 219 220 221 switch (cmd) { 222 case DDI_ATTACH: 223 break; 224 225 case DDI_RESUME: 226 return (DDI_SUCCESS); 227 228 default: 229 return (DDI_FAILURE); 230 } 231 232 /* initialize our state info */ 233 instance = ddi_get_instance(dip); 234 state = xpvtap_drv_init(instance); 235 if (state == NULL) { 236 return (DDI_FAILURE); 237 } 238 state->bt_dip = dip; 239 240 /* Initialize the guest ring */ 241 args.ar_dip = state->bt_dip; 242 args.ar_intr = xpvtap_intr; 243 args.ar_intr_arg = (caddr_t)state; 244 args.ar_ringup = xpvtap_user_thread_start; 245 args.ar_ringup_arg = (caddr_t)state; 246 args.ar_ringdown = xpvtap_user_app_stop; 247 args.ar_ringdown_arg = (caddr_t)state; 248 e = blk_ring_init(&args, &state->bt_guest_ring); 249 if (e != DDI_SUCCESS) { 250 goto attachfail_ringinit; 251 } 252 253 /* create the minor node (for ioctl/mmap) */ 254 e = ddi_create_minor_node(dip, "xpvtap", S_IFCHR, instance, 255 DDI_PSEUDO, 0); 256 if (e != DDI_SUCCESS) { 257 goto attachfail_minor_node; 258 } 259 260 /* Report that driver was loaded */ 261 ddi_report_dev(dip); 262 263 return (DDI_SUCCESS); 264 265 attachfail_minor_node: 266 blk_ring_fini(&state->bt_guest_ring); 267 attachfail_ringinit: 268 xpvtap_drv_fini(state); 269 return (DDI_FAILURE); 270 } 271 272 273 /* 274 * xpvtap_detach() 275 */ 276 static int 277 xpvtap_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 278 { 279 xpvtap_state_t *state; 280 int instance; 281 282 283 instance = ddi_get_instance(dip); 284 state = ddi_get_soft_state(xpvtap_statep, instance); 285 if (state == NULL) { 286 return (DDI_FAILURE); 287 } 288 289 switch (cmd) { 290 case DDI_DETACH: 291 break; 292 293 case DDI_SUSPEND: 294 default: 295 return (DDI_FAILURE); 296 } 297 298 xpvtap_user_thread_stop(state); 299 blk_ring_fini(&state->bt_guest_ring); 300 xpvtap_drv_fini(state); 301 ddi_remove_minor_node(dip, NULL); 302 303 return (DDI_SUCCESS); 304 } 305 306 307 /* 308 * xpvtap_getinfo() 309 */ 310 /*ARGSUSED*/ 311 static int 312 xpvtap_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **result) 313 { 314 xpvtap_state_t *state; 315 int instance; 316 dev_t dev; 317 int e; 318 319 320 dev = (dev_t)arg; 321 instance = getminor(dev); 322 323 switch (cmd) { 324 case DDI_INFO_DEVT2DEVINFO: 325 state = ddi_get_soft_state(xpvtap_statep, instance); 326 if (state == NULL) { 327 return (DDI_FAILURE); 328 } 329 *result = (void *)state->bt_dip; 330 e = DDI_SUCCESS; 331 break; 332 333 case DDI_INFO_DEVT2INSTANCE: 334 *result = (void *)(uintptr_t)instance; 335 e = DDI_SUCCESS; 336 break; 337 338 default: 339 e = DDI_FAILURE; 340 break; 341 } 342 343 return (e); 344 } 345 346 347 /* 348 * xpvtap_open() 349 */ 350 /*ARGSUSED*/ 351 static int 352 xpvtap_open(dev_t *devp, int flag, int otyp, cred_t *cred) 353 { 354 xpvtap_state_t *state; 355 int instance; 356 357 358 if (secpolicy_xvm_control(cred)) { 359 return (EPERM); 360 } 361 362 instance = getminor(*devp); 363 state = ddi_get_soft_state(xpvtap_statep, instance); 364 if (state == NULL) { 365 return (ENXIO); 366 } 367 368 /* we should only be opened once */ 369 mutex_enter(&state->bt_open.bo_mutex); 370 if (state->bt_open.bo_opened) { 371 mutex_exit(&state->bt_open.bo_mutex); 372 return (EBUSY); 373 } 374 state->bt_open.bo_opened = B_TRUE; 375 mutex_exit(&state->bt_open.bo_mutex); 376 377 /* 378 * save the apps address space. need it for mapping/unmapping grefs 379 * since will be doing it in a separate kernel thread. 380 */ 381 state->bt_map.um_as = curproc->p_as; 382 383 return (0); 384 } 385 386 387 /* 388 * xpvtap_close() 389 */ 390 /*ARGSUSED*/ 391 static int 392 xpvtap_close(dev_t devp, int flag, int otyp, cred_t *cred) 393 { 394 xpvtap_state_t *state; 395 int instance; 396 397 398 instance = getminor(devp); 399 state = ddi_get_soft_state(xpvtap_statep, instance); 400 if (state == NULL) { 401 return (ENXIO); 402 } 403 404 /* 405 * wake thread so it can cleanup and wait for it to exit so we can 406 * be sure it's not in the middle of processing a request/response. 407 */ 408 mutex_enter(&state->bt_thread.ut_mutex); 409 state->bt_thread.ut_wake = B_TRUE; 410 state->bt_thread.ut_exit = B_TRUE; 411 cv_signal(&state->bt_thread.ut_wake_cv); 412 if (!state->bt_thread.ut_exit_done) { 413 cv_wait(&state->bt_thread.ut_exit_done_cv, 414 &state->bt_thread.ut_mutex); 415 } 416 ASSERT(state->bt_thread.ut_exit_done); 417 mutex_exit(&state->bt_thread.ut_mutex); 418 419 state->bt_map.um_as = NULL; 420 state->bt_map.um_guest_pages = NULL; 421 422 /* 423 * when the ring is brought down, a userland hotplug script is run 424 * which tries to bring the userland app down. We'll wait for a bit 425 * for the user app to exit. Notify the thread waiting that the app 426 * has closed the driver. 427 */ 428 mutex_enter(&state->bt_open.bo_mutex); 429 ASSERT(state->bt_open.bo_opened); 430 state->bt_open.bo_opened = B_FALSE; 431 cv_signal(&state->bt_open.bo_exit_cv); 432 mutex_exit(&state->bt_open.bo_mutex); 433 434 return (0); 435 } 436 437 438 /* 439 * xpvtap_ioctl() 440 */ 441 /*ARGSUSED*/ 442 static int 443 xpvtap_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *cred, 444 int *rval) 445 { 446 xpvtap_state_t *state; 447 int instance; 448 449 450 if (secpolicy_xvm_control(cred)) { 451 return (EPERM); 452 } 453 454 instance = getminor(dev); 455 if (instance == -1) { 456 return (EBADF); 457 } 458 459 state = ddi_get_soft_state(xpvtap_statep, instance); 460 if (state == NULL) { 461 return (EBADF); 462 } 463 464 switch (cmd) { 465 case XPVTAP_IOCTL_RESP_PUSH: 466 /* 467 * wake thread, thread handles guest requests and user app 468 * responses. 469 */ 470 mutex_enter(&state->bt_thread.ut_mutex); 471 state->bt_thread.ut_wake = B_TRUE; 472 cv_signal(&state->bt_thread.ut_wake_cv); 473 mutex_exit(&state->bt_thread.ut_mutex); 474 break; 475 476 default: 477 cmn_err(CE_WARN, "ioctl(%d) not supported\n", cmd); 478 return (ENXIO); 479 } 480 481 return (0); 482 } 483 484 485 /* 486 * xpvtap_segmap() 487 */ 488 /*ARGSUSED*/ 489 static int 490 xpvtap_segmap(dev_t dev, off_t off, struct as *asp, caddr_t *addrp, 491 off_t len, unsigned int prot, unsigned int maxprot, unsigned int flags, 492 cred_t *cred_p) 493 { 494 struct segmf_crargs a; 495 xpvtap_state_t *state; 496 int instance; 497 int e; 498 499 500 if (secpolicy_xvm_control(cred_p)) { 501 return (EPERM); 502 } 503 504 instance = getminor(dev); 505 state = ddi_get_soft_state(xpvtap_statep, instance); 506 if (state == NULL) { 507 return (EBADF); 508 } 509 510 /* the user app should be doing a MAP_SHARED mapping */ 511 if ((flags & MAP_TYPE) != MAP_SHARED) { 512 return (EINVAL); 513 } 514 515 /* 516 * if this is the user ring (offset = 0), devmap it (which ends up in 517 * xpvtap_devmap). devmap will alloc and map the ring into the 518 * app's VA space. 519 */ 520 if (off == 0) { 521 e = devmap_setup(dev, (offset_t)off, asp, addrp, (size_t)len, 522 prot, maxprot, flags, cred_p); 523 return (e); 524 } 525 526 /* this should be the mmap for the gref pages (offset = PAGESIZE) */ 527 if (off != PAGESIZE) { 528 return (EINVAL); 529 } 530 531 /* make sure we get the size we're expecting */ 532 if (len != XPVTAP_GREF_BUFSIZE) { 533 return (EINVAL); 534 } 535 536 /* 537 * reserve user app VA space for the gref pages and use segmf to 538 * manage the backing store for the physical memory. segmf will 539 * map in/out the grefs and fault them in/out. 540 */ 541 ASSERT(asp == state->bt_map.um_as); 542 as_rangelock(asp); 543 if ((flags & MAP_FIXED) == 0) { 544 map_addr(addrp, len, 0, 0, flags); 545 if (*addrp == NULL) { 546 as_rangeunlock(asp); 547 return (ENOMEM); 548 } 549 } else { 550 /* User specified address */ 551 (void) as_unmap(asp, *addrp, len); 552 } 553 a.dev = dev; 554 a.prot = (uchar_t)prot; 555 a.maxprot = (uchar_t)maxprot; 556 e = as_map(asp, *addrp, len, segmf_create, &a); 557 if (e != 0) { 558 as_rangeunlock(asp); 559 return (e); 560 } 561 as_rangeunlock(asp); 562 563 /* 564 * Stash user base address, and compute address where the request 565 * array will end up. 566 */ 567 state->bt_map.um_guest_pages = (caddr_t)*addrp; 568 state->bt_map.um_guest_size = (size_t)len; 569 570 /* register an as callback so we can cleanup when the app goes away */ 571 e = as_add_callback(asp, xpvtap_segmf_unregister, state, 572 AS_UNMAP_EVENT, *addrp, len, KM_SLEEP); 573 if (e != 0) { 574 (void) as_unmap(asp, *addrp, len); 575 return (EINVAL); 576 } 577 578 /* wake thread to see if there are requests already queued up */ 579 mutex_enter(&state->bt_thread.ut_mutex); 580 state->bt_thread.ut_wake = B_TRUE; 581 cv_signal(&state->bt_thread.ut_wake_cv); 582 mutex_exit(&state->bt_thread.ut_mutex); 583 584 return (0); 585 } 586 587 588 /* 589 * xpvtap_devmap() 590 */ 591 /*ARGSUSED*/ 592 static int 593 xpvtap_devmap(dev_t dev, devmap_cookie_t dhp, offset_t off, size_t len, 594 size_t *maplen, uint_t model) 595 { 596 xpvtap_user_ring_t *usring; 597 xpvtap_state_t *state; 598 int instance; 599 int e; 600 601 602 instance = getminor(dev); 603 state = ddi_get_soft_state(xpvtap_statep, instance); 604 if (state == NULL) { 605 return (EBADF); 606 } 607 608 /* we should only get here if the offset was == 0 */ 609 if (off != 0) { 610 return (EINVAL); 611 } 612 613 /* we should only be mapping in one page */ 614 if (len != PAGESIZE) { 615 return (EINVAL); 616 } 617 618 /* 619 * we already allocated the user ring during driver attach, all we 620 * need to do is map it into the user app's VA. 621 */ 622 usring = &state->bt_user_ring; 623 e = devmap_umem_setup(dhp, state->bt_dip, NULL, usring->ur_cookie, 0, 624 PAGESIZE, PROT_ALL, DEVMAP_DEFAULTS, NULL); 625 if (e < 0) { 626 return (e); 627 } 628 629 /* return the size to compete the devmap */ 630 *maplen = PAGESIZE; 631 632 return (0); 633 } 634 635 636 /* 637 * xpvtap_chpoll() 638 */ 639 static int 640 xpvtap_chpoll(dev_t dev, short events, int anyyet, short *reventsp, 641 struct pollhead **phpp) 642 { 643 xpvtap_user_ring_t *usring; 644 xpvtap_state_t *state; 645 int instance; 646 647 648 instance = getminor(dev); 649 if (instance == -1) { 650 return (EBADF); 651 } 652 state = ddi_get_soft_state(xpvtap_statep, instance); 653 if (state == NULL) { 654 return (EBADF); 655 } 656 657 if (((events & (POLLIN | POLLRDNORM)) == 0) && !anyyet) { 658 *reventsp = 0; 659 return (EINVAL); 660 } 661 662 /* 663 * if we pushed requests on the user ring since the last poll, wakeup 664 * the user app 665 */ 666 usring = &state->bt_user_ring; 667 if (usring->ur_prod_polled != usring->ur_ring.req_prod_pvt) { 668 669 /* 670 * XXX - is this faster here or xpvtap_user_request_push?? 671 * prelim data says here. Because less membars or because 672 * user thread will spin in poll requests before getting to 673 * responses? 674 */ 675 RING_PUSH_REQUESTS(&usring->ur_ring); 676 677 usring->ur_prod_polled = usring->ur_ring.sring->req_prod; 678 *reventsp = POLLIN | POLLRDNORM; 679 680 /* no new requests */ 681 } else { 682 *reventsp = 0; 683 if (!anyyet) { 684 *phpp = &state->bt_pollhead; 685 } 686 } 687 688 return (0); 689 } 690 691 692 /* 693 * xpvtap_drv_init() 694 */ 695 static xpvtap_state_t * 696 xpvtap_drv_init(int instance) 697 { 698 xpvtap_state_t *state; 699 int e; 700 701 702 e = ddi_soft_state_zalloc(xpvtap_statep, instance); 703 if (e != DDI_SUCCESS) { 704 return (NULL); 705 } 706 state = ddi_get_soft_state(xpvtap_statep, instance); 707 if (state == NULL) { 708 goto drvinitfail_get_soft_state; 709 } 710 711 state->bt_instance = instance; 712 mutex_init(&state->bt_open.bo_mutex, NULL, MUTEX_DRIVER, NULL); 713 cv_init(&state->bt_open.bo_exit_cv, NULL, CV_DRIVER, NULL); 714 state->bt_open.bo_opened = B_FALSE; 715 state->bt_map.um_registered = B_FALSE; 716 717 /* initialize user ring, thread, mapping state */ 718 e = xpvtap_user_init(state); 719 if (e != DDI_SUCCESS) { 720 goto drvinitfail_userinit; 721 } 722 723 return (state); 724 725 drvinitfail_userinit: 726 cv_destroy(&state->bt_open.bo_exit_cv); 727 mutex_destroy(&state->bt_open.bo_mutex); 728 drvinitfail_get_soft_state: 729 (void) ddi_soft_state_free(xpvtap_statep, instance); 730 return (NULL); 731 } 732 733 734 /* 735 * xpvtap_drv_fini() 736 */ 737 static void 738 xpvtap_drv_fini(xpvtap_state_t *state) 739 { 740 xpvtap_user_fini(state); 741 cv_destroy(&state->bt_open.bo_exit_cv); 742 mutex_destroy(&state->bt_open.bo_mutex); 743 (void) ddi_soft_state_free(xpvtap_statep, state->bt_instance); 744 } 745 746 747 /* 748 * xpvtap_intr() 749 * this routine will be called when we have a request on the guest ring. 750 */ 751 static uint_t 752 xpvtap_intr(caddr_t arg) 753 { 754 xpvtap_state_t *state; 755 756 757 state = (xpvtap_state_t *)arg; 758 759 /* wake thread, thread handles guest requests and user app responses */ 760 mutex_enter(&state->bt_thread.ut_mutex); 761 state->bt_thread.ut_wake = B_TRUE; 762 cv_signal(&state->bt_thread.ut_wake_cv); 763 mutex_exit(&state->bt_thread.ut_mutex); 764 765 return (DDI_INTR_CLAIMED); 766 } 767 768 769 /* 770 * xpvtap_segmf_register() 771 */ 772 static int 773 xpvtap_segmf_register(xpvtap_state_t *state) 774 { 775 struct seg *seg; 776 uint64_t pte_ma; 777 struct as *as; 778 caddr_t uaddr; 779 uint_t pgcnt; 780 int i; 781 782 783 as = state->bt_map.um_as; 784 pgcnt = btopr(state->bt_map.um_guest_size); 785 uaddr = state->bt_map.um_guest_pages; 786 787 if (pgcnt == 0) { 788 return (DDI_FAILURE); 789 } 790 791 AS_LOCK_ENTER(as, RW_READER); 792 793 seg = as_findseg(as, state->bt_map.um_guest_pages, 0); 794 if ((seg == NULL) || ((uaddr + state->bt_map.um_guest_size) > 795 (seg->s_base + seg->s_size))) { 796 AS_LOCK_EXIT(as); 797 return (DDI_FAILURE); 798 } 799 800 /* 801 * lock down the htables so the HAT can't steal them. Register the 802 * PTE MA's for each gref page with seg_mf so we can do user space 803 * gref mappings. 804 */ 805 for (i = 0; i < pgcnt; i++) { 806 hat_prepare_mapping(as->a_hat, uaddr, &pte_ma); 807 hat_devload(as->a_hat, uaddr, PAGESIZE, (pfn_t)0, 808 PROT_READ | PROT_WRITE | PROT_USER | HAT_UNORDERED_OK, 809 HAT_LOAD_NOCONSIST | HAT_LOAD_LOCK); 810 hat_release_mapping(as->a_hat, uaddr); 811 segmf_add_gref_pte(seg, uaddr, pte_ma); 812 uaddr += PAGESIZE; 813 } 814 815 state->bt_map.um_registered = B_TRUE; 816 817 AS_LOCK_EXIT(as); 818 819 return (DDI_SUCCESS); 820 } 821 822 823 /* 824 * xpvtap_segmf_unregister() 825 * as_callback routine 826 */ 827 /*ARGSUSED*/ 828 static void 829 xpvtap_segmf_unregister(struct as *as, void *arg, uint_t event) 830 { 831 xpvtap_state_t *state; 832 caddr_t uaddr; 833 uint_t pgcnt; 834 int i; 835 836 837 state = (xpvtap_state_t *)arg; 838 if (!state->bt_map.um_registered) { 839 /* remove the callback (which is this routine) */ 840 (void) as_delete_callback(as, arg); 841 return; 842 } 843 844 pgcnt = btopr(state->bt_map.um_guest_size); 845 uaddr = state->bt_map.um_guest_pages; 846 847 /* unmap any outstanding req's grefs */ 848 xpvtap_rs_flush(state->bt_map.um_rs, xpvtap_user_request_unmap, state); 849 850 /* Unlock the gref pages */ 851 for (i = 0; i < pgcnt; i++) { 852 AS_LOCK_ENTER(as, RW_WRITER); 853 hat_prepare_mapping(as->a_hat, uaddr, NULL); 854 hat_unload(as->a_hat, uaddr, PAGESIZE, HAT_UNLOAD_UNLOCK); 855 hat_release_mapping(as->a_hat, uaddr); 856 AS_LOCK_EXIT(as); 857 uaddr += PAGESIZE; 858 } 859 860 /* remove the callback (which is this routine) */ 861 (void) as_delete_callback(as, arg); 862 863 state->bt_map.um_registered = B_FALSE; 864 } 865 866 867 /* 868 * xpvtap_user_init() 869 */ 870 static int 871 xpvtap_user_init(xpvtap_state_t *state) 872 { 873 xpvtap_user_map_t *map; 874 int e; 875 876 877 map = &state->bt_map; 878 879 /* Setup the ring between the driver and user app */ 880 e = xpvtap_user_ring_init(state); 881 if (e != DDI_SUCCESS) { 882 return (DDI_FAILURE); 883 } 884 885 /* 886 * the user ring can handle BLKIF_RING_SIZE outstanding requests. This 887 * is the same number of requests as the guest ring. Initialize the 888 * state we use to track request IDs to the user app. These IDs will 889 * also identify which group of gref pages correspond with the 890 * request. 891 */ 892 xpvtap_rs_init(0, (BLKIF_RING_SIZE - 1), &map->um_rs); 893 894 /* 895 * allocate the space to store a copy of each outstanding requests. We 896 * will need to reference the ID and the number of segments when we 897 * get the response from the user app. 898 */ 899 map->um_outstanding_reqs = kmem_zalloc( 900 sizeof (*map->um_outstanding_reqs) * BLKIF_RING_SIZE, 901 KM_SLEEP); 902 903 /* 904 * initialize the thread we use to process guest requests and user 905 * responses. 906 */ 907 e = xpvtap_user_thread_init(state); 908 if (e != DDI_SUCCESS) { 909 goto userinitfail_user_thread_init; 910 } 911 912 return (DDI_SUCCESS); 913 914 userinitfail_user_thread_init: 915 xpvtap_rs_fini(&map->um_rs); 916 kmem_free(map->um_outstanding_reqs, 917 sizeof (*map->um_outstanding_reqs) * BLKIF_RING_SIZE); 918 xpvtap_user_ring_fini(state); 919 return (DDI_FAILURE); 920 } 921 922 923 /* 924 * xpvtap_user_ring_init() 925 */ 926 static int 927 xpvtap_user_ring_init(xpvtap_state_t *state) 928 { 929 xpvtap_user_ring_t *usring; 930 931 932 usring = &state->bt_user_ring; 933 934 /* alocate and initialize the page for the shared user ring */ 935 usring->ur_sring = (blkif_sring_t *)ddi_umem_alloc(PAGESIZE, 936 DDI_UMEM_SLEEP, &usring->ur_cookie); 937 SHARED_RING_INIT(usring->ur_sring); 938 FRONT_RING_INIT(&usring->ur_ring, usring->ur_sring, PAGESIZE); 939 usring->ur_prod_polled = 0; 940 941 return (DDI_SUCCESS); 942 } 943 944 945 /* 946 * xpvtap_user_thread_init() 947 */ 948 static int 949 xpvtap_user_thread_init(xpvtap_state_t *state) 950 { 951 xpvtap_user_thread_t *thread; 952 char taskqname[32]; 953 954 955 thread = &state->bt_thread; 956 957 mutex_init(&thread->ut_mutex, NULL, MUTEX_DRIVER, NULL); 958 cv_init(&thread->ut_wake_cv, NULL, CV_DRIVER, NULL); 959 cv_init(&thread->ut_exit_done_cv, NULL, CV_DRIVER, NULL); 960 thread->ut_wake = B_FALSE; 961 thread->ut_exit = B_FALSE; 962 thread->ut_exit_done = B_TRUE; 963 964 /* create but don't start the user thread */ 965 (void) sprintf(taskqname, "xvptap_%d", state->bt_instance); 966 thread->ut_taskq = ddi_taskq_create(state->bt_dip, taskqname, 1, 967 TASKQ_DEFAULTPRI, 0); 968 if (thread->ut_taskq == NULL) { 969 goto userinitthrfail_taskq_create; 970 } 971 972 return (DDI_SUCCESS); 973 974 userinitthrfail_taskq_dispatch: 975 ddi_taskq_destroy(thread->ut_taskq); 976 userinitthrfail_taskq_create: 977 cv_destroy(&thread->ut_exit_done_cv); 978 cv_destroy(&thread->ut_wake_cv); 979 mutex_destroy(&thread->ut_mutex); 980 981 return (DDI_FAILURE); 982 } 983 984 985 /* 986 * xpvtap_user_thread_start() 987 */ 988 static void 989 xpvtap_user_thread_start(caddr_t arg) 990 { 991 xpvtap_user_thread_t *thread; 992 xpvtap_state_t *state; 993 int e; 994 995 996 state = (xpvtap_state_t *)arg; 997 thread = &state->bt_thread; 998 999 /* start the user thread */ 1000 thread->ut_exit_done = B_FALSE; 1001 e = ddi_taskq_dispatch(thread->ut_taskq, xpvtap_user_thread, state, 1002 DDI_SLEEP); 1003 if (e != DDI_SUCCESS) { 1004 thread->ut_exit_done = B_TRUE; 1005 cmn_err(CE_WARN, "Unable to start user thread\n"); 1006 } 1007 } 1008 1009 1010 /* 1011 * xpvtap_user_thread_stop() 1012 */ 1013 static void 1014 xpvtap_user_thread_stop(xpvtap_state_t *state) 1015 { 1016 /* wake thread so it can exit */ 1017 mutex_enter(&state->bt_thread.ut_mutex); 1018 state->bt_thread.ut_wake = B_TRUE; 1019 state->bt_thread.ut_exit = B_TRUE; 1020 cv_signal(&state->bt_thread.ut_wake_cv); 1021 if (!state->bt_thread.ut_exit_done) { 1022 cv_wait(&state->bt_thread.ut_exit_done_cv, 1023 &state->bt_thread.ut_mutex); 1024 } 1025 mutex_exit(&state->bt_thread.ut_mutex); 1026 ASSERT(state->bt_thread.ut_exit_done); 1027 } 1028 1029 1030 /* 1031 * xpvtap_user_fini() 1032 */ 1033 static void 1034 xpvtap_user_fini(xpvtap_state_t *state) 1035 { 1036 xpvtap_user_map_t *map; 1037 1038 1039 map = &state->bt_map; 1040 1041 xpvtap_user_thread_fini(state); 1042 xpvtap_rs_fini(&map->um_rs); 1043 kmem_free(map->um_outstanding_reqs, 1044 sizeof (*map->um_outstanding_reqs) * BLKIF_RING_SIZE); 1045 xpvtap_user_ring_fini(state); 1046 } 1047 1048 1049 /* 1050 * xpvtap_user_ring_fini() 1051 */ 1052 static void 1053 xpvtap_user_ring_fini(xpvtap_state_t *state) 1054 { 1055 ddi_umem_free(state->bt_user_ring.ur_cookie); 1056 } 1057 1058 1059 /* 1060 * xpvtap_user_thread_fini() 1061 */ 1062 static void 1063 xpvtap_user_thread_fini(xpvtap_state_t *state) 1064 { 1065 ddi_taskq_destroy(state->bt_thread.ut_taskq); 1066 cv_destroy(&state->bt_thread.ut_exit_done_cv); 1067 cv_destroy(&state->bt_thread.ut_wake_cv); 1068 mutex_destroy(&state->bt_thread.ut_mutex); 1069 } 1070 1071 1072 /* 1073 * xpvtap_user_thread() 1074 */ 1075 static void 1076 xpvtap_user_thread(void *arg) 1077 { 1078 xpvtap_user_thread_t *thread; 1079 blkif_response_t resp; 1080 xpvtap_state_t *state; 1081 blkif_request_t req; 1082 boolean_t b; 1083 uint_t uid; 1084 int e; 1085 1086 1087 state = (xpvtap_state_t *)arg; 1088 thread = &state->bt_thread; 1089 1090 xpvtap_thread_start: 1091 /* See if we are supposed to exit */ 1092 mutex_enter(&thread->ut_mutex); 1093 if (thread->ut_exit) { 1094 thread->ut_exit_done = B_TRUE; 1095 cv_signal(&state->bt_thread.ut_exit_done_cv); 1096 mutex_exit(&thread->ut_mutex); 1097 return; 1098 } 1099 1100 /* 1101 * if we aren't supposed to be awake, wait until someone wakes us. 1102 * when we wake up, check for a kill or someone telling us to exit. 1103 */ 1104 if (!thread->ut_wake) { 1105 e = cv_wait_sig(&thread->ut_wake_cv, &thread->ut_mutex); 1106 if ((e == 0) || (thread->ut_exit)) { 1107 thread->ut_exit = B_TRUE; 1108 mutex_exit(&thread->ut_mutex); 1109 goto xpvtap_thread_start; 1110 } 1111 } 1112 1113 /* if someone didn't wake us, go back to the start of the thread */ 1114 if (!thread->ut_wake) { 1115 mutex_exit(&thread->ut_mutex); 1116 goto xpvtap_thread_start; 1117 } 1118 1119 /* we are awake */ 1120 thread->ut_wake = B_FALSE; 1121 mutex_exit(&thread->ut_mutex); 1122 1123 /* process requests from the guest */ 1124 do { 1125 /* 1126 * check for requests from the guest. if we don't have any, 1127 * break out of the loop. 1128 */ 1129 e = blk_ring_request_get(state->bt_guest_ring, &req); 1130 if (e == B_FALSE) { 1131 break; 1132 } 1133 1134 /* we got a request, map the grefs into the user app's VA */ 1135 e = xpvtap_user_request_map(state, &req, &uid); 1136 if (e != DDI_SUCCESS) { 1137 /* 1138 * If we couldn't map the request (e.g. user app hasn't 1139 * opened the device yet), requeue it and try again 1140 * later 1141 */ 1142 blk_ring_request_requeue(state->bt_guest_ring); 1143 break; 1144 } 1145 1146 /* push the request to the user app */ 1147 e = xpvtap_user_request_push(state, &req, uid); 1148 if (e != DDI_SUCCESS) { 1149 resp.id = req.id; 1150 resp.operation = req.operation; 1151 resp.status = BLKIF_RSP_ERROR; 1152 blk_ring_response_put(state->bt_guest_ring, &resp); 1153 } 1154 } while (!thread->ut_exit); 1155 1156 /* process reponses from the user app */ 1157 do { 1158 /* 1159 * check for responses from the user app. if we don't have any, 1160 * break out of the loop. 1161 */ 1162 b = xpvtap_user_response_get(state, &resp, &uid); 1163 if (b != B_TRUE) { 1164 break; 1165 } 1166 1167 /* 1168 * if we got a response, unmap the grefs from the matching 1169 * request. 1170 */ 1171 xpvtap_user_request_unmap(state, uid); 1172 1173 /* push the response to the guest */ 1174 blk_ring_response_put(state->bt_guest_ring, &resp); 1175 } while (!thread->ut_exit); 1176 1177 goto xpvtap_thread_start; 1178 } 1179 1180 1181 /* 1182 * xpvtap_user_request_map() 1183 */ 1184 static int 1185 xpvtap_user_request_map(xpvtap_state_t *state, blkif_request_t *req, 1186 uint_t *uid) 1187 { 1188 grant_ref_t gref[BLKIF_MAX_SEGMENTS_PER_REQUEST]; 1189 struct seg *seg; 1190 struct as *as; 1191 domid_t domid; 1192 caddr_t uaddr; 1193 uint_t flags; 1194 int i; 1195 int e; 1196 1197 1198 domid = xvdi_get_oeid(state->bt_dip); 1199 1200 as = state->bt_map.um_as; 1201 if ((as == NULL) || (state->bt_map.um_guest_pages == NULL)) { 1202 return (DDI_FAILURE); 1203 } 1204 1205 /* has to happen after segmap returns */ 1206 if (!state->bt_map.um_registered) { 1207 /* register the pte's with segmf */ 1208 e = xpvtap_segmf_register(state); 1209 if (e != DDI_SUCCESS) { 1210 return (DDI_FAILURE); 1211 } 1212 } 1213 1214 /* alloc an ID for the user ring */ 1215 e = xpvtap_rs_alloc(state->bt_map.um_rs, uid); 1216 if (e != DDI_SUCCESS) { 1217 return (DDI_FAILURE); 1218 } 1219 1220 /* if we don't have any segments to map, we're done */ 1221 if ((req->operation == BLKIF_OP_WRITE_BARRIER) || 1222 (req->operation == BLKIF_OP_FLUSH_DISKCACHE) || 1223 (req->nr_segments == 0)) { 1224 return (DDI_SUCCESS); 1225 } 1226 1227 /* get the apps gref address */ 1228 uaddr = XPVTAP_GREF_REQADDR(state->bt_map.um_guest_pages, *uid); 1229 1230 AS_LOCK_ENTER(as, RW_READER); 1231 seg = as_findseg(as, state->bt_map.um_guest_pages, 0); 1232 if ((seg == NULL) || ((uaddr + mmu_ptob(req->nr_segments)) > 1233 (seg->s_base + seg->s_size))) { 1234 AS_LOCK_EXIT(as); 1235 return (DDI_FAILURE); 1236 } 1237 1238 /* if we are reading from disk, we are writing into memory */ 1239 flags = 0; 1240 if (req->operation == BLKIF_OP_READ) { 1241 flags |= SEGMF_GREF_WR; 1242 } 1243 1244 /* Load the grefs into seg_mf */ 1245 for (i = 0; i < req->nr_segments; i++) { 1246 gref[i] = req->seg[i].gref; 1247 } 1248 (void) segmf_add_grefs(seg, uaddr, flags, gref, req->nr_segments, 1249 domid); 1250 1251 AS_LOCK_EXIT(as); 1252 1253 return (DDI_SUCCESS); 1254 } 1255 1256 1257 /* 1258 * xpvtap_user_request_push() 1259 */ 1260 static int 1261 xpvtap_user_request_push(xpvtap_state_t *state, blkif_request_t *req, 1262 uint_t uid) 1263 { 1264 blkif_request_t *outstanding_req; 1265 blkif_front_ring_t *uring; 1266 blkif_request_t *target; 1267 xpvtap_user_map_t *map; 1268 1269 1270 uring = &state->bt_user_ring.ur_ring; 1271 map = &state->bt_map; 1272 1273 target = RING_GET_REQUEST(uring, uring->req_prod_pvt); 1274 1275 /* 1276 * Save request from the frontend. used for ID mapping and unmap 1277 * on response/cleanup 1278 */ 1279 outstanding_req = &map->um_outstanding_reqs[uid]; 1280 bcopy(req, outstanding_req, sizeof (*outstanding_req)); 1281 1282 /* put the request on the user ring */ 1283 bcopy(req, target, sizeof (*req)); 1284 target->id = (uint64_t)uid; 1285 uring->req_prod_pvt++; 1286 1287 pollwakeup(&state->bt_pollhead, POLLIN | POLLRDNORM); 1288 1289 return (DDI_SUCCESS); 1290 } 1291 1292 1293 static void 1294 xpvtap_user_request_unmap(xpvtap_state_t *state, uint_t uid) 1295 { 1296 blkif_request_t *req; 1297 struct seg *seg; 1298 struct as *as; 1299 caddr_t uaddr; 1300 int e; 1301 1302 1303 as = state->bt_map.um_as; 1304 if (as == NULL) { 1305 return; 1306 } 1307 1308 /* get a copy of the original request */ 1309 req = &state->bt_map.um_outstanding_reqs[uid]; 1310 1311 /* unmap the grefs for this request */ 1312 if ((req->operation != BLKIF_OP_WRITE_BARRIER) && 1313 (req->operation != BLKIF_OP_FLUSH_DISKCACHE) && 1314 (req->nr_segments != 0)) { 1315 uaddr = XPVTAP_GREF_REQADDR(state->bt_map.um_guest_pages, uid); 1316 AS_LOCK_ENTER(as, RW_READER); 1317 seg = as_findseg(as, state->bt_map.um_guest_pages, 0); 1318 if ((seg == NULL) || ((uaddr + mmu_ptob(req->nr_segments)) > 1319 (seg->s_base + seg->s_size))) { 1320 AS_LOCK_EXIT(as); 1321 xpvtap_rs_free(state->bt_map.um_rs, uid); 1322 return; 1323 } 1324 1325 e = segmf_release_grefs(seg, uaddr, req->nr_segments); 1326 if (e != 0) { 1327 cmn_err(CE_WARN, "unable to release grefs"); 1328 } 1329 1330 AS_LOCK_EXIT(as); 1331 } 1332 1333 /* free up the user ring id */ 1334 xpvtap_rs_free(state->bt_map.um_rs, uid); 1335 } 1336 1337 1338 static int 1339 xpvtap_user_response_get(xpvtap_state_t *state, blkif_response_t *resp, 1340 uint_t *uid) 1341 { 1342 blkif_front_ring_t *uring; 1343 blkif_response_t *target; 1344 1345 1346 uring = &state->bt_user_ring.ur_ring; 1347 1348 if (!RING_HAS_UNCONSUMED_RESPONSES(uring)) { 1349 return (B_FALSE); 1350 } 1351 1352 target = NULL; 1353 target = RING_GET_RESPONSE(uring, uring->rsp_cons); 1354 if (target == NULL) { 1355 return (B_FALSE); 1356 } 1357 1358 /* copy out the user app response */ 1359 bcopy(target, resp, sizeof (*resp)); 1360 uring->rsp_cons++; 1361 1362 /* restore the quests id from the original request */ 1363 *uid = (uint_t)resp->id; 1364 resp->id = state->bt_map.um_outstanding_reqs[*uid].id; 1365 1366 return (B_TRUE); 1367 } 1368 1369 1370 /* 1371 * xpvtap_user_app_stop() 1372 */ 1373 static void xpvtap_user_app_stop(caddr_t arg) 1374 { 1375 xpvtap_state_t *state; 1376 clock_t rc; 1377 1378 state = (xpvtap_state_t *)arg; 1379 1380 /* 1381 * Give the app 10 secs to exit. If it doesn't exit, it's not a serious 1382 * problem, we just won't auto-detach the driver. 1383 */ 1384 mutex_enter(&state->bt_open.bo_mutex); 1385 if (state->bt_open.bo_opened) { 1386 rc = cv_reltimedwait(&state->bt_open.bo_exit_cv, 1387 &state->bt_open.bo_mutex, drv_usectohz(10000000), 1388 TR_CLOCK_TICK); 1389 if (rc <= 0) { 1390 cmn_err(CE_NOTE, "!user process still has driver open, " 1391 "deferring detach\n"); 1392 } 1393 } 1394 mutex_exit(&state->bt_open.bo_mutex); 1395 } 1396 1397 1398 /* 1399 * xpvtap_rs_init() 1400 * Initialize the resource structure. init() returns a handle to be used 1401 * for the rest of the resource functions. This code is written assuming 1402 * that min_val will be close to 0. Therefore, we will allocate the free 1403 * buffer only taking max_val into account. 1404 */ 1405 static void 1406 xpvtap_rs_init(uint_t min_val, uint_t max_val, xpvtap_rs_hdl_t *handle) 1407 { 1408 xpvtap_rs_t *rstruct; 1409 uint_t array_size; 1410 uint_t index; 1411 1412 1413 ASSERT(handle != NULL); 1414 ASSERT(min_val < max_val); 1415 1416 /* alloc space for resource structure */ 1417 rstruct = kmem_alloc(sizeof (xpvtap_rs_t), KM_SLEEP); 1418 1419 /* 1420 * Test to see if the max value is 64-bit aligned. If so, we don't need 1421 * to allocate an extra 64-bit word. alloc space for free buffer 1422 * (8 bytes per uint64_t). 1423 */ 1424 if ((max_val & 0x3F) == 0) { 1425 rstruct->rs_free_size = (max_val >> 6) * 8; 1426 } else { 1427 rstruct->rs_free_size = ((max_val >> 6) + 1) * 8; 1428 } 1429 rstruct->rs_free = kmem_alloc(rstruct->rs_free_size, KM_SLEEP); 1430 1431 /* Initialize resource structure */ 1432 rstruct->rs_min = min_val; 1433 rstruct->rs_last = min_val; 1434 rstruct->rs_max = max_val; 1435 mutex_init(&rstruct->rs_mutex, NULL, MUTEX_DRIVER, NULL); 1436 rstruct->rs_flushing = B_FALSE; 1437 1438 /* Mark all resources as free */ 1439 array_size = rstruct->rs_free_size >> 3; 1440 for (index = 0; index < array_size; index++) { 1441 rstruct->rs_free[index] = (uint64_t)0xFFFFFFFFFFFFFFFF; 1442 } 1443 1444 /* setup handle which is returned from this function */ 1445 *handle = rstruct; 1446 } 1447 1448 1449 /* 1450 * xpvtap_rs_fini() 1451 * Frees up the space allocated in init(). Notice that a pointer to the 1452 * handle is used for the parameter. fini() will set the handle to NULL 1453 * before returning. 1454 */ 1455 static void 1456 xpvtap_rs_fini(xpvtap_rs_hdl_t *handle) 1457 { 1458 xpvtap_rs_t *rstruct; 1459 1460 1461 ASSERT(handle != NULL); 1462 1463 rstruct = (xpvtap_rs_t *)*handle; 1464 1465 mutex_destroy(&rstruct->rs_mutex); 1466 kmem_free(rstruct->rs_free, rstruct->rs_free_size); 1467 kmem_free(rstruct, sizeof (xpvtap_rs_t)); 1468 1469 /* set handle to null. This helps catch bugs. */ 1470 *handle = NULL; 1471 } 1472 1473 1474 /* 1475 * xpvtap_rs_alloc() 1476 * alloc a resource. If alloc fails, we are out of resources. 1477 */ 1478 static int 1479 xpvtap_rs_alloc(xpvtap_rs_hdl_t handle, uint_t *resource) 1480 { 1481 xpvtap_rs_t *rstruct; 1482 uint_t array_idx; 1483 uint64_t free; 1484 uint_t index; 1485 uint_t last; 1486 uint_t min; 1487 uint_t max; 1488 1489 1490 ASSERT(handle != NULL); 1491 ASSERT(resource != NULL); 1492 1493 rstruct = (xpvtap_rs_t *)handle; 1494 1495 mutex_enter(&rstruct->rs_mutex); 1496 min = rstruct->rs_min; 1497 max = rstruct->rs_max; 1498 1499 /* 1500 * Find a free resource. This will return out of the loop once it finds 1501 * a free resource. There are a total of 'max'-'min'+1 resources. 1502 * Performs a round robin allocation. 1503 */ 1504 for (index = min; index <= max; index++) { 1505 1506 array_idx = rstruct->rs_last >> 6; 1507 free = rstruct->rs_free[array_idx]; 1508 last = rstruct->rs_last & 0x3F; 1509 1510 /* if the next resource to check is free */ 1511 if ((free & ((uint64_t)1 << last)) != 0) { 1512 /* we are using this resource */ 1513 *resource = rstruct->rs_last; 1514 1515 /* take it out of the free list */ 1516 rstruct->rs_free[array_idx] &= ~((uint64_t)1 << last); 1517 1518 /* 1519 * increment the last count so we start checking the 1520 * next resource on the next alloc(). Note the rollover 1521 * at 'max'+1. 1522 */ 1523 rstruct->rs_last++; 1524 if (rstruct->rs_last > max) { 1525 rstruct->rs_last = rstruct->rs_min; 1526 } 1527 1528 /* unlock the resource structure */ 1529 mutex_exit(&rstruct->rs_mutex); 1530 1531 return (DDI_SUCCESS); 1532 } 1533 1534 /* 1535 * This resource is not free, lets go to the next one. Note the 1536 * rollover at 'max'. 1537 */ 1538 rstruct->rs_last++; 1539 if (rstruct->rs_last > max) { 1540 rstruct->rs_last = rstruct->rs_min; 1541 } 1542 } 1543 1544 mutex_exit(&rstruct->rs_mutex); 1545 1546 return (DDI_FAILURE); 1547 } 1548 1549 1550 /* 1551 * xpvtap_rs_free() 1552 * Free the previously alloc'd resource. Once a resource has been free'd, 1553 * it can be used again when alloc is called. 1554 */ 1555 static void 1556 xpvtap_rs_free(xpvtap_rs_hdl_t handle, uint_t resource) 1557 { 1558 xpvtap_rs_t *rstruct; 1559 uint_t array_idx; 1560 uint_t offset; 1561 1562 1563 ASSERT(handle != NULL); 1564 1565 rstruct = (xpvtap_rs_t *)handle; 1566 ASSERT(resource >= rstruct->rs_min); 1567 ASSERT(resource <= rstruct->rs_max); 1568 1569 if (!rstruct->rs_flushing) { 1570 mutex_enter(&rstruct->rs_mutex); 1571 } 1572 1573 /* Put the resource back in the free list */ 1574 array_idx = resource >> 6; 1575 offset = resource & 0x3F; 1576 rstruct->rs_free[array_idx] |= ((uint64_t)1 << offset); 1577 1578 if (!rstruct->rs_flushing) { 1579 mutex_exit(&rstruct->rs_mutex); 1580 } 1581 } 1582 1583 1584 /* 1585 * xpvtap_rs_flush() 1586 */ 1587 static void 1588 xpvtap_rs_flush(xpvtap_rs_hdl_t handle, xpvtap_rs_cleanup_t callback, 1589 void *arg) 1590 { 1591 xpvtap_rs_t *rstruct; 1592 uint_t array_idx; 1593 uint64_t free; 1594 uint_t index; 1595 uint_t last; 1596 uint_t min; 1597 uint_t max; 1598 1599 1600 ASSERT(handle != NULL); 1601 1602 rstruct = (xpvtap_rs_t *)handle; 1603 1604 mutex_enter(&rstruct->rs_mutex); 1605 min = rstruct->rs_min; 1606 max = rstruct->rs_max; 1607 1608 rstruct->rs_flushing = B_TRUE; 1609 1610 /* 1611 * for all resources not free, call the callback routine to clean it 1612 * up. 1613 */ 1614 for (index = min; index <= max; index++) { 1615 1616 array_idx = rstruct->rs_last >> 6; 1617 free = rstruct->rs_free[array_idx]; 1618 last = rstruct->rs_last & 0x3F; 1619 1620 /* if the next resource to check is not free */ 1621 if ((free & ((uint64_t)1 << last)) == 0) { 1622 /* call the callback to cleanup */ 1623 (*callback)(arg, rstruct->rs_last); 1624 1625 /* put it back in the free list */ 1626 rstruct->rs_free[array_idx] |= ((uint64_t)1 << last); 1627 } 1628 1629 /* go to the next one. Note the rollover at 'max' */ 1630 rstruct->rs_last++; 1631 if (rstruct->rs_last > max) { 1632 rstruct->rs_last = rstruct->rs_min; 1633 } 1634 } 1635 1636 mutex_exit(&rstruct->rs_mutex); 1637 }