1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  24  * Use is subject to license terms.
  25  */
  26 
  27 
  28 #include <sys/errno.h>
  29 #include <sys/types.h>
  30 #include <sys/conf.h>
  31 #include <sys/kmem.h>
  32 #include <sys/ddi.h>
  33 #include <sys/stat.h>
  34 #include <sys/sunddi.h>
  35 #include <sys/file.h>
  36 #include <sys/open.h>
  37 #include <sys/modctl.h>
  38 #include <sys/ddi_impldefs.h>
  39 #include <sys/sysmacros.h>
  40 #include <sys/ddidevmap.h>
  41 #include <sys/policy.h>
  42 
  43 #include <sys/vmsystm.h>
  44 #include <vm/hat_i86.h>
  45 #include <vm/hat_pte.h>
  46 #include <vm/seg_kmem.h>
  47 #include <vm/seg_mf.h>
  48 
  49 #include <xen/io/blkif_impl.h>
  50 #include <xen/io/blk_common.h>
  51 #include <xen/io/xpvtap.h>
  52 
  53 
  54 static int xpvtap_open(dev_t *devp, int flag, int otyp, cred_t *cred);
  55 static int xpvtap_close(dev_t devp, int flag, int otyp, cred_t *cred);
  56 static int xpvtap_ioctl(dev_t dev, int cmd, intptr_t arg, int mode,
  57     cred_t *cred, int *rval);
  58 static int xpvtap_devmap(dev_t dev, devmap_cookie_t dhp, offset_t off,
  59     size_t len, size_t *maplen, uint_t model);
  60 static int xpvtap_segmap(dev_t dev, off_t off, struct as *asp, caddr_t *addrp,
  61     off_t len, unsigned int prot, unsigned int maxprot, unsigned int flags,
  62     cred_t *cred_p);
  63 static int xpvtap_chpoll(dev_t dev, short events, int anyyet, short *reventsp,
  64     struct pollhead **phpp);
  65 
  66 static  struct cb_ops xpvtap_cb_ops = {
  67         xpvtap_open,            /* cb_open */
  68         xpvtap_close,           /* cb_close */
  69         nodev,                  /* cb_strategy */
  70         nodev,                  /* cb_print */
  71         nodev,                  /* cb_dump */
  72         nodev,                  /* cb_read */
  73         nodev,                  /* cb_write */
  74         xpvtap_ioctl,           /* cb_ioctl */
  75         xpvtap_devmap,          /* cb_devmap */
  76         nodev,                  /* cb_mmap */
  77         xpvtap_segmap,          /* cb_segmap */
  78         xpvtap_chpoll,          /* cb_chpoll */
  79         ddi_prop_op,            /* cb_prop_op */
  80         NULL,                   /* cb_stream */
  81         D_NEW | D_MP | D_64BIT | D_DEVMAP,      /* cb_flag */
  82         CB_REV
  83 };
  84 
  85 static int xpvtap_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg,
  86     void **result);
  87 static int xpvtap_attach(dev_info_t *devi, ddi_attach_cmd_t cmd);
  88 static int xpvtap_detach(dev_info_t *devi, ddi_detach_cmd_t cmd);
  89 
  90 static struct dev_ops xpvtap_dev_ops = {
  91         DEVO_REV,               /* devo_rev */
  92         0,                      /* devo_refcnt */
  93         xpvtap_getinfo,         /* devo_getinfo */
  94         nulldev,                /* devo_identify */
  95         nulldev,                /* devo_probe */
  96         xpvtap_attach,          /* devo_attach */
  97         xpvtap_detach,          /* devo_detach */
  98         nodev,                  /* devo_reset */
  99         &xpvtap_cb_ops,             /* devo_cb_ops */
 100         NULL,                   /* devo_bus_ops */
 101         NULL                    /* power */
 102 };
 103 
 104 
 105 static struct modldrv xpvtap_modldrv = {
 106         &mod_driverops,             /* Type of module.  This one is a driver */
 107         "xpvtap driver",        /* Name of the module. */
 108         &xpvtap_dev_ops,    /* driver ops */
 109 };
 110 
 111 static struct modlinkage xpvtap_modlinkage = {
 112         MODREV_1,
 113         { (void *) &xpvtap_modldrv, NULL }
 114 };
 115 
 116 
 117 void *xpvtap_statep;
 118 
 119 
 120 static xpvtap_state_t *xpvtap_drv_init(int instance);
 121 static void xpvtap_drv_fini(xpvtap_state_t *state);
 122 static uint_t xpvtap_intr(caddr_t arg);
 123 
 124 typedef void (*xpvtap_rs_cleanup_t)(xpvtap_state_t *state, uint_t rs);
 125 static void xpvtap_rs_init(uint_t min_val, uint_t max_val,
 126     xpvtap_rs_hdl_t *handle);
 127 static void xpvtap_rs_fini(xpvtap_rs_hdl_t *handle);
 128 static int xpvtap_rs_alloc(xpvtap_rs_hdl_t handle, uint_t *rs);
 129 static void xpvtap_rs_free(xpvtap_rs_hdl_t handle, uint_t rs);
 130 static void xpvtap_rs_flush(xpvtap_rs_hdl_t handle,
 131     xpvtap_rs_cleanup_t callback, void *arg);
 132 
 133 static int xpvtap_segmf_register(xpvtap_state_t *state);
 134 static void xpvtap_segmf_unregister(struct as *as, void *arg, uint_t event);
 135 
 136 static int xpvtap_user_init(xpvtap_state_t *state);
 137 static void xpvtap_user_fini(xpvtap_state_t *state);
 138 static int xpvtap_user_ring_init(xpvtap_state_t *state);
 139 static void xpvtap_user_ring_fini(xpvtap_state_t *state);
 140 static int xpvtap_user_thread_init(xpvtap_state_t *state);
 141 static void xpvtap_user_thread_fini(xpvtap_state_t *state);
 142 static void xpvtap_user_thread_start(caddr_t arg);
 143 static void xpvtap_user_thread_stop(xpvtap_state_t *state);
 144 static void xpvtap_user_thread(void *arg);
 145 
 146 static void xpvtap_user_app_stop(caddr_t arg);
 147 
 148 static int xpvtap_user_request_map(xpvtap_state_t *state, blkif_request_t *req,
 149     uint_t *uid);
 150 static int xpvtap_user_request_push(xpvtap_state_t *state,
 151     blkif_request_t *req, uint_t uid);
 152 static int xpvtap_user_response_get(xpvtap_state_t *state,
 153     blkif_response_t *resp, uint_t *uid);
 154 static void xpvtap_user_request_unmap(xpvtap_state_t *state, uint_t uid);
 155 
 156 
 157 /*
 158  * _init()
 159  */
 160 int
 161 _init(void)
 162 {
 163         int e;
 164 
 165         e = ddi_soft_state_init(&xpvtap_statep, sizeof (xpvtap_state_t), 1);
 166         if (e != 0) {
 167                 return (e);
 168         }
 169 
 170         e = mod_install(&xpvtap_modlinkage);
 171         if (e != 0) {
 172                 ddi_soft_state_fini(&xpvtap_statep);
 173                 return (e);
 174         }
 175 
 176         return (0);
 177 }
 178 
 179 
 180 /*
 181  * _info()
 182  */
 183 int
 184 _info(struct modinfo *modinfop)
 185 {
 186         return (mod_info(&xpvtap_modlinkage, modinfop));
 187 }
 188 
 189 
 190 /*
 191  * _fini()
 192  */
 193 int
 194 _fini(void)
 195 {
 196         int e;
 197 
 198         e = mod_remove(&xpvtap_modlinkage);
 199         if (e != 0) {
 200                 return (e);
 201         }
 202 
 203         ddi_soft_state_fini(&xpvtap_statep);
 204 
 205         return (0);
 206 }
 207 
 208 
 209 /*
 210  * xpvtap_attach()
 211  */
 212 static int
 213 xpvtap_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
 214 {
 215         blk_ringinit_args_t args;
 216         xpvtap_state_t *state;
 217         int instance;
 218         int e;
 219 
 220 
 221         switch (cmd) {
 222         case DDI_ATTACH:
 223                 break;
 224 
 225         case DDI_RESUME:
 226                 return (DDI_SUCCESS);
 227 
 228         default:
 229                 return (DDI_FAILURE);
 230         }
 231 
 232         /* initialize our state info */
 233         instance = ddi_get_instance(dip);
 234         state = xpvtap_drv_init(instance);
 235         if (state == NULL) {
 236                 return (DDI_FAILURE);
 237         }
 238         state->bt_dip = dip;
 239 
 240         /* Initialize the guest ring */
 241         args.ar_dip = state->bt_dip;
 242         args.ar_intr = xpvtap_intr;
 243         args.ar_intr_arg = (caddr_t)state;
 244         args.ar_ringup = xpvtap_user_thread_start;
 245         args.ar_ringup_arg = (caddr_t)state;
 246         args.ar_ringdown = xpvtap_user_app_stop;
 247         args.ar_ringdown_arg = (caddr_t)state;
 248         e = blk_ring_init(&args, &state->bt_guest_ring);
 249         if (e != DDI_SUCCESS) {
 250                 goto attachfail_ringinit;
 251         }
 252 
 253         /* create the minor node (for ioctl/mmap) */
 254         e = ddi_create_minor_node(dip, "xpvtap", S_IFCHR, instance,
 255             DDI_PSEUDO, 0);
 256         if (e != DDI_SUCCESS) {
 257                 goto attachfail_minor_node;
 258         }
 259 
 260         /* Report that driver was loaded */
 261         ddi_report_dev(dip);
 262 
 263         return (DDI_SUCCESS);
 264 
 265 attachfail_minor_node:
 266         blk_ring_fini(&state->bt_guest_ring);
 267 attachfail_ringinit:
 268         xpvtap_drv_fini(state);
 269         return (DDI_FAILURE);
 270 }
 271 
 272 
 273 /*
 274  * xpvtap_detach()
 275  */
 276 static int
 277 xpvtap_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
 278 {
 279         xpvtap_state_t *state;
 280         int instance;
 281 
 282 
 283         instance = ddi_get_instance(dip);
 284         state = ddi_get_soft_state(xpvtap_statep, instance);
 285         if (state == NULL) {
 286                 return (DDI_FAILURE);
 287         }
 288 
 289         switch (cmd) {
 290         case DDI_DETACH:
 291                 break;
 292 
 293         case DDI_SUSPEND:
 294         default:
 295                 return (DDI_FAILURE);
 296         }
 297 
 298         xpvtap_user_thread_stop(state);
 299         blk_ring_fini(&state->bt_guest_ring);
 300         xpvtap_drv_fini(state);
 301         ddi_remove_minor_node(dip, NULL);
 302 
 303         return (DDI_SUCCESS);
 304 }
 305 
 306 
 307 /*
 308  * xpvtap_getinfo()
 309  */
 310 /*ARGSUSED*/
 311 static int
 312 xpvtap_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **result)
 313 {
 314         xpvtap_state_t *state;
 315         int instance;
 316         dev_t dev;
 317         int e;
 318 
 319 
 320         dev = (dev_t)arg;
 321         instance = getminor(dev);
 322 
 323         switch (cmd) {
 324         case DDI_INFO_DEVT2DEVINFO:
 325                 state = ddi_get_soft_state(xpvtap_statep, instance);
 326                 if (state == NULL) {
 327                         return (DDI_FAILURE);
 328                 }
 329                 *result = (void *)state->bt_dip;
 330                 e = DDI_SUCCESS;
 331                 break;
 332 
 333         case DDI_INFO_DEVT2INSTANCE:
 334                 *result = (void *)(uintptr_t)instance;
 335                 e = DDI_SUCCESS;
 336                 break;
 337 
 338         default:
 339                 e = DDI_FAILURE;
 340                 break;
 341         }
 342 
 343         return (e);
 344 }
 345 
 346 
 347 /*
 348  * xpvtap_open()
 349  */
 350 /*ARGSUSED*/
 351 static int
 352 xpvtap_open(dev_t *devp, int flag, int otyp, cred_t *cred)
 353 {
 354         xpvtap_state_t *state;
 355         int instance;
 356 
 357 
 358         if (secpolicy_xvm_control(cred)) {
 359                 return (EPERM);
 360         }
 361 
 362         instance = getminor(*devp);
 363         state = ddi_get_soft_state(xpvtap_statep, instance);
 364         if (state == NULL) {
 365                 return (ENXIO);
 366         }
 367 
 368         /* we should only be opened once */
 369         mutex_enter(&state->bt_open.bo_mutex);
 370         if (state->bt_open.bo_opened) {
 371                 mutex_exit(&state->bt_open.bo_mutex);
 372                 return (EBUSY);
 373         }
 374         state->bt_open.bo_opened = B_TRUE;
 375         mutex_exit(&state->bt_open.bo_mutex);
 376 
 377         /*
 378          * save the apps address space. need it for mapping/unmapping grefs
 379          * since will be doing it in a separate kernel thread.
 380          */
 381         state->bt_map.um_as = curproc->p_as;
 382 
 383         return (0);
 384 }
 385 
 386 
 387 /*
 388  * xpvtap_close()
 389  */
 390 /*ARGSUSED*/
 391 static int
 392 xpvtap_close(dev_t devp, int flag, int otyp, cred_t *cred)
 393 {
 394         xpvtap_state_t *state;
 395         int instance;
 396 
 397 
 398         instance = getminor(devp);
 399         state = ddi_get_soft_state(xpvtap_statep, instance);
 400         if (state == NULL) {
 401                 return (ENXIO);
 402         }
 403 
 404         /*
 405          * wake thread so it can cleanup and wait for it to exit so we can
 406          * be sure it's not in the middle of processing a request/response.
 407          */
 408         mutex_enter(&state->bt_thread.ut_mutex);
 409         state->bt_thread.ut_wake = B_TRUE;
 410         state->bt_thread.ut_exit = B_TRUE;
 411         cv_signal(&state->bt_thread.ut_wake_cv);
 412         if (!state->bt_thread.ut_exit_done) {
 413                 cv_wait(&state->bt_thread.ut_exit_done_cv,
 414                     &state->bt_thread.ut_mutex);
 415         }
 416         ASSERT(state->bt_thread.ut_exit_done);
 417         mutex_exit(&state->bt_thread.ut_mutex);
 418 
 419         state->bt_map.um_as = NULL;
 420         state->bt_map.um_guest_pages = NULL;
 421 
 422         /*
 423          * when the ring is brought down, a userland hotplug script is run
 424          * which tries to bring the userland app down. We'll wait for a bit
 425          * for the user app to exit. Notify the thread waiting that the app
 426          * has closed the driver.
 427          */
 428         mutex_enter(&state->bt_open.bo_mutex);
 429         ASSERT(state->bt_open.bo_opened);
 430         state->bt_open.bo_opened = B_FALSE;
 431         cv_signal(&state->bt_open.bo_exit_cv);
 432         mutex_exit(&state->bt_open.bo_mutex);
 433 
 434         return (0);
 435 }
 436 
 437 
 438 /*
 439  * xpvtap_ioctl()
 440  */
 441 /*ARGSUSED*/
 442 static int
 443 xpvtap_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *cred,
 444     int *rval)
 445 {
 446         xpvtap_state_t *state;
 447         int instance;
 448 
 449 
 450         if (secpolicy_xvm_control(cred)) {
 451                 return (EPERM);
 452         }
 453 
 454         instance = getminor(dev);
 455         if (instance == -1) {
 456                 return (EBADF);
 457         }
 458 
 459         state = ddi_get_soft_state(xpvtap_statep, instance);
 460         if (state == NULL) {
 461                 return (EBADF);
 462         }
 463 
 464         switch (cmd) {
 465         case XPVTAP_IOCTL_RESP_PUSH:
 466                 /*
 467                  * wake thread, thread handles guest requests and user app
 468                  * responses.
 469                  */
 470                 mutex_enter(&state->bt_thread.ut_mutex);
 471                 state->bt_thread.ut_wake = B_TRUE;
 472                 cv_signal(&state->bt_thread.ut_wake_cv);
 473                 mutex_exit(&state->bt_thread.ut_mutex);
 474                 break;
 475 
 476         default:
 477                 cmn_err(CE_WARN, "ioctl(%d) not supported\n", cmd);
 478                 return (ENXIO);
 479         }
 480 
 481         return (0);
 482 }
 483 
 484 
 485 /*
 486  * xpvtap_segmap()
 487  */
 488 /*ARGSUSED*/
 489 static int
 490 xpvtap_segmap(dev_t dev, off_t off, struct as *asp, caddr_t *addrp,
 491     off_t len, unsigned int prot, unsigned int maxprot, unsigned int flags,
 492     cred_t *cred_p)
 493 {
 494         struct segmf_crargs a;
 495         xpvtap_state_t *state;
 496         int instance;
 497         int e;
 498 
 499 
 500         if (secpolicy_xvm_control(cred_p)) {
 501                 return (EPERM);
 502         }
 503 
 504         instance = getminor(dev);
 505         state = ddi_get_soft_state(xpvtap_statep, instance);
 506         if (state == NULL) {
 507                 return (EBADF);
 508         }
 509 
 510         /* the user app should be doing a MAP_SHARED mapping */
 511         if ((flags & MAP_TYPE) != MAP_SHARED) {
 512                 return (EINVAL);
 513         }
 514 
 515         /*
 516          * if this is the user ring (offset = 0), devmap it (which ends up in
 517          * xpvtap_devmap). devmap will alloc and map the ring into the
 518          * app's VA space.
 519          */
 520         if (off == 0) {
 521                 e = devmap_setup(dev, (offset_t)off, asp, addrp, (size_t)len,
 522                     prot, maxprot, flags, cred_p);
 523                 return (e);
 524         }
 525 
 526         /* this should be the mmap for the gref pages (offset = PAGESIZE) */
 527         if (off != PAGESIZE) {
 528                 return (EINVAL);
 529         }
 530 
 531         /* make sure we get the size we're expecting */
 532         if (len != XPVTAP_GREF_BUFSIZE) {
 533                 return (EINVAL);
 534         }
 535 
 536         /*
 537          * reserve user app VA space for the gref pages and use segmf to
 538          * manage the backing store for the physical memory. segmf will
 539          * map in/out the grefs and fault them in/out.
 540          */
 541         ASSERT(asp == state->bt_map.um_as);
 542         as_rangelock(asp);
 543         if ((flags & MAP_FIXED) == 0) {
 544                 map_addr(addrp, len, 0, 0, flags);
 545                 if (*addrp == NULL) {
 546                         as_rangeunlock(asp);
 547                         return (ENOMEM);
 548                 }
 549         } else {
 550                 /* User specified address */
 551                 (void) as_unmap(asp, *addrp, len);
 552         }
 553         a.dev = dev;
 554         a.prot = (uchar_t)prot;
 555         a.maxprot = (uchar_t)maxprot;
 556         e = as_map(asp, *addrp, len, segmf_create, &a);
 557         if (e != 0) {
 558                 as_rangeunlock(asp);
 559                 return (e);
 560         }
 561         as_rangeunlock(asp);
 562 
 563         /*
 564          * Stash user base address, and compute address where the request
 565          * array will end up.
 566          */
 567         state->bt_map.um_guest_pages = (caddr_t)*addrp;
 568         state->bt_map.um_guest_size = (size_t)len;
 569 
 570         /* register an as callback so we can cleanup when the app goes away */
 571         e = as_add_callback(asp, xpvtap_segmf_unregister, state,
 572             AS_UNMAP_EVENT, *addrp, len, KM_SLEEP);
 573         if (e != 0) {
 574                 (void) as_unmap(asp, *addrp, len);
 575                 return (EINVAL);
 576         }
 577 
 578         /* wake thread to see if there are requests already queued up */
 579         mutex_enter(&state->bt_thread.ut_mutex);
 580         state->bt_thread.ut_wake = B_TRUE;
 581         cv_signal(&state->bt_thread.ut_wake_cv);
 582         mutex_exit(&state->bt_thread.ut_mutex);
 583 
 584         return (0);
 585 }
 586 
 587 
 588 /*
 589  * xpvtap_devmap()
 590  */
 591 /*ARGSUSED*/
 592 static int
 593 xpvtap_devmap(dev_t dev, devmap_cookie_t dhp, offset_t off, size_t len,
 594     size_t *maplen, uint_t model)
 595 {
 596         xpvtap_user_ring_t *usring;
 597         xpvtap_state_t *state;
 598         int instance;
 599         int e;
 600 
 601 
 602         instance = getminor(dev);
 603         state = ddi_get_soft_state(xpvtap_statep, instance);
 604         if (state == NULL) {
 605                 return (EBADF);
 606         }
 607 
 608         /* we should only get here if the offset was == 0 */
 609         if (off != 0) {
 610                 return (EINVAL);
 611         }
 612 
 613         /* we should only be mapping in one page */
 614         if (len != PAGESIZE) {
 615                 return (EINVAL);
 616         }
 617 
 618         /*
 619          * we already allocated the user ring during driver attach, all we
 620          * need to do is map it into the user app's VA.
 621          */
 622         usring = &state->bt_user_ring;
 623         e = devmap_umem_setup(dhp, state->bt_dip, NULL, usring->ur_cookie, 0,
 624             PAGESIZE, PROT_ALL, DEVMAP_DEFAULTS, NULL);
 625         if (e < 0) {
 626                 return (e);
 627         }
 628 
 629         /* return the size to compete the devmap */
 630         *maplen = PAGESIZE;
 631 
 632         return (0);
 633 }
 634 
 635 
 636 /*
 637  * xpvtap_chpoll()
 638  */
 639 static int
 640 xpvtap_chpoll(dev_t dev, short events, int anyyet, short *reventsp,
 641     struct pollhead **phpp)
 642 {
 643         xpvtap_user_ring_t *usring;
 644         xpvtap_state_t *state;
 645         int instance;
 646 
 647 
 648         instance = getminor(dev);
 649         if (instance == -1) {
 650                 return (EBADF);
 651         }
 652         state = ddi_get_soft_state(xpvtap_statep, instance);
 653         if (state == NULL) {
 654                 return (EBADF);
 655         }
 656 
 657         if (((events & (POLLIN | POLLRDNORM)) == 0) && !anyyet) {
 658                 *reventsp = 0;
 659                 return (EINVAL);
 660         }
 661 
 662         /*
 663          * if we pushed requests on the user ring since the last poll, wakeup
 664          * the user app
 665          */
 666         usring = &state->bt_user_ring;
 667         if (usring->ur_prod_polled != usring->ur_ring.req_prod_pvt) {
 668 
 669                 /*
 670                  * XXX - is this faster here or xpvtap_user_request_push??
 671                  * prelim data says here.  Because less membars or because
 672                  * user thread will spin in poll requests before getting to
 673                  * responses?
 674                  */
 675                 RING_PUSH_REQUESTS(&usring->ur_ring);
 676 
 677                 usring->ur_prod_polled = usring->ur_ring.sring->req_prod;
 678                 *reventsp =  POLLIN | POLLRDNORM;
 679 
 680         /* no new requests */
 681         } else {
 682                 *reventsp = 0;
 683                 if (!anyyet) {
 684                         *phpp = &state->bt_pollhead;
 685                 }
 686         }
 687 
 688         return (0);
 689 }
 690 
 691 
 692 /*
 693  * xpvtap_drv_init()
 694  */
 695 static xpvtap_state_t *
 696 xpvtap_drv_init(int instance)
 697 {
 698         xpvtap_state_t *state;
 699         int e;
 700 
 701 
 702         e = ddi_soft_state_zalloc(xpvtap_statep, instance);
 703         if (e != DDI_SUCCESS) {
 704                 return (NULL);
 705         }
 706         state = ddi_get_soft_state(xpvtap_statep, instance);
 707         if (state == NULL) {
 708                 goto drvinitfail_get_soft_state;
 709         }
 710 
 711         state->bt_instance = instance;
 712         mutex_init(&state->bt_open.bo_mutex, NULL, MUTEX_DRIVER, NULL);
 713         cv_init(&state->bt_open.bo_exit_cv, NULL, CV_DRIVER, NULL);
 714         state->bt_open.bo_opened = B_FALSE;
 715         state->bt_map.um_registered = B_FALSE;
 716 
 717         /* initialize user ring, thread, mapping state */
 718         e = xpvtap_user_init(state);
 719         if (e != DDI_SUCCESS) {
 720                 goto drvinitfail_userinit;
 721         }
 722 
 723         return (state);
 724 
 725 drvinitfail_userinit:
 726         cv_destroy(&state->bt_open.bo_exit_cv);
 727         mutex_destroy(&state->bt_open.bo_mutex);
 728 drvinitfail_get_soft_state:
 729         (void) ddi_soft_state_free(xpvtap_statep, instance);
 730         return (NULL);
 731 }
 732 
 733 
 734 /*
 735  * xpvtap_drv_fini()
 736  */
 737 static void
 738 xpvtap_drv_fini(xpvtap_state_t *state)
 739 {
 740         xpvtap_user_fini(state);
 741         cv_destroy(&state->bt_open.bo_exit_cv);
 742         mutex_destroy(&state->bt_open.bo_mutex);
 743         (void) ddi_soft_state_free(xpvtap_statep, state->bt_instance);
 744 }
 745 
 746 
 747 /*
 748  * xpvtap_intr()
 749  *    this routine will be called when we have a request on the guest ring.
 750  */
 751 static uint_t
 752 xpvtap_intr(caddr_t arg)
 753 {
 754         xpvtap_state_t *state;
 755 
 756 
 757         state = (xpvtap_state_t *)arg;
 758 
 759         /* wake thread, thread handles guest requests and user app responses */
 760         mutex_enter(&state->bt_thread.ut_mutex);
 761         state->bt_thread.ut_wake = B_TRUE;
 762         cv_signal(&state->bt_thread.ut_wake_cv);
 763         mutex_exit(&state->bt_thread.ut_mutex);
 764 
 765         return (DDI_INTR_CLAIMED);
 766 }
 767 
 768 
 769 /*
 770  * xpvtap_segmf_register()
 771  */
 772 static int
 773 xpvtap_segmf_register(xpvtap_state_t *state)
 774 {
 775         struct seg *seg;
 776         uint64_t pte_ma;
 777         struct as *as;
 778         caddr_t uaddr;
 779         uint_t pgcnt;
 780         int i;
 781 
 782 
 783         as = state->bt_map.um_as;
 784         pgcnt = btopr(state->bt_map.um_guest_size);
 785         uaddr = state->bt_map.um_guest_pages;
 786 
 787         if (pgcnt == 0) {
 788                 return (DDI_FAILURE);
 789         }
 790 
 791         AS_LOCK_ENTER(as, RW_READER);
 792 
 793         seg = as_findseg(as, state->bt_map.um_guest_pages, 0);
 794         if ((seg == NULL) || ((uaddr + state->bt_map.um_guest_size) >
 795             (seg->s_base + seg->s_size))) {
 796                 AS_LOCK_EXIT(as);
 797                 return (DDI_FAILURE);
 798         }
 799 
 800         /*
 801          * lock down the htables so the HAT can't steal them. Register the
 802          * PTE MA's for each gref page with seg_mf so we can do user space
 803          * gref mappings.
 804          */
 805         for (i = 0; i < pgcnt; i++) {
 806                 hat_prepare_mapping(as->a_hat, uaddr, &pte_ma);
 807                 hat_devload(as->a_hat, uaddr, PAGESIZE, (pfn_t)0,
 808                     PROT_READ | PROT_WRITE | PROT_USER | HAT_UNORDERED_OK,
 809                     HAT_LOAD_NOCONSIST | HAT_LOAD_LOCK);
 810                 hat_release_mapping(as->a_hat, uaddr);
 811                 segmf_add_gref_pte(seg, uaddr, pte_ma);
 812                 uaddr += PAGESIZE;
 813         }
 814 
 815         state->bt_map.um_registered = B_TRUE;
 816 
 817         AS_LOCK_EXIT(as);
 818 
 819         return (DDI_SUCCESS);
 820 }
 821 
 822 
 823 /*
 824  * xpvtap_segmf_unregister()
 825  *    as_callback routine
 826  */
 827 /*ARGSUSED*/
 828 static void
 829 xpvtap_segmf_unregister(struct as *as, void *arg, uint_t event)
 830 {
 831         xpvtap_state_t *state;
 832         caddr_t uaddr;
 833         uint_t pgcnt;
 834         int i;
 835 
 836 
 837         state = (xpvtap_state_t *)arg;
 838         if (!state->bt_map.um_registered) {
 839                 /* remove the callback (which is this routine) */
 840                 (void) as_delete_callback(as, arg);
 841                 return;
 842         }
 843 
 844         pgcnt = btopr(state->bt_map.um_guest_size);
 845         uaddr = state->bt_map.um_guest_pages;
 846 
 847         /* unmap any outstanding req's grefs */
 848         xpvtap_rs_flush(state->bt_map.um_rs, xpvtap_user_request_unmap, state);
 849 
 850         /* Unlock the gref pages */
 851         for (i = 0; i < pgcnt; i++) {
 852                 AS_LOCK_ENTER(as, RW_WRITER);
 853                 hat_prepare_mapping(as->a_hat, uaddr, NULL);
 854                 hat_unload(as->a_hat, uaddr, PAGESIZE, HAT_UNLOAD_UNLOCK);
 855                 hat_release_mapping(as->a_hat, uaddr);
 856                 AS_LOCK_EXIT(as);
 857                 uaddr += PAGESIZE;
 858         }
 859 
 860         /* remove the callback (which is this routine) */
 861         (void) as_delete_callback(as, arg);
 862 
 863         state->bt_map.um_registered = B_FALSE;
 864 }
 865 
 866 
 867 /*
 868  * xpvtap_user_init()
 869  */
 870 static int
 871 xpvtap_user_init(xpvtap_state_t *state)
 872 {
 873         xpvtap_user_map_t *map;
 874         int e;
 875 
 876 
 877         map = &state->bt_map;
 878 
 879         /* Setup the ring between the driver and user app */
 880         e = xpvtap_user_ring_init(state);
 881         if (e != DDI_SUCCESS) {
 882                 return (DDI_FAILURE);
 883         }
 884 
 885         /*
 886          * the user ring can handle BLKIF_RING_SIZE outstanding requests. This
 887          * is the same number of requests as the guest ring. Initialize the
 888          * state we use to track request IDs to the user app. These IDs will
 889          * also identify which group of gref pages correspond with the
 890          * request.
 891          */
 892         xpvtap_rs_init(0, (BLKIF_RING_SIZE - 1), &map->um_rs);
 893 
 894         /*
 895          * allocate the space to store a copy of each outstanding requests. We
 896          * will need to reference the ID and the number of segments when we
 897          * get the response from the user app.
 898          */
 899         map->um_outstanding_reqs = kmem_zalloc(
 900             sizeof (*map->um_outstanding_reqs) * BLKIF_RING_SIZE,
 901             KM_SLEEP);
 902 
 903         /*
 904          * initialize the thread we use to process guest requests and user
 905          * responses.
 906          */
 907         e = xpvtap_user_thread_init(state);
 908         if (e != DDI_SUCCESS) {
 909                 goto userinitfail_user_thread_init;
 910         }
 911 
 912         return (DDI_SUCCESS);
 913 
 914 userinitfail_user_thread_init:
 915         xpvtap_rs_fini(&map->um_rs);
 916         kmem_free(map->um_outstanding_reqs,
 917             sizeof (*map->um_outstanding_reqs) * BLKIF_RING_SIZE);
 918         xpvtap_user_ring_fini(state);
 919         return (DDI_FAILURE);
 920 }
 921 
 922 
 923 /*
 924  * xpvtap_user_ring_init()
 925  */
 926 static int
 927 xpvtap_user_ring_init(xpvtap_state_t *state)
 928 {
 929         xpvtap_user_ring_t *usring;
 930 
 931 
 932         usring = &state->bt_user_ring;
 933 
 934         /* alocate and initialize the page for the shared user ring */
 935         usring->ur_sring = (blkif_sring_t *)ddi_umem_alloc(PAGESIZE,
 936             DDI_UMEM_SLEEP, &usring->ur_cookie);
 937         SHARED_RING_INIT(usring->ur_sring);
 938         FRONT_RING_INIT(&usring->ur_ring, usring->ur_sring, PAGESIZE);
 939         usring->ur_prod_polled = 0;
 940 
 941         return (DDI_SUCCESS);
 942 }
 943 
 944 
 945 /*
 946  * xpvtap_user_thread_init()
 947  */
 948 static int
 949 xpvtap_user_thread_init(xpvtap_state_t *state)
 950 {
 951         xpvtap_user_thread_t *thread;
 952         char taskqname[32];
 953 
 954 
 955         thread = &state->bt_thread;
 956 
 957         mutex_init(&thread->ut_mutex, NULL, MUTEX_DRIVER, NULL);
 958         cv_init(&thread->ut_wake_cv, NULL, CV_DRIVER, NULL);
 959         cv_init(&thread->ut_exit_done_cv, NULL, CV_DRIVER, NULL);
 960         thread->ut_wake = B_FALSE;
 961         thread->ut_exit = B_FALSE;
 962         thread->ut_exit_done = B_TRUE;
 963 
 964         /* create but don't start the user thread */
 965         (void) sprintf(taskqname, "xvptap_%d", state->bt_instance);
 966         thread->ut_taskq = ddi_taskq_create(state->bt_dip, taskqname, 1,
 967             TASKQ_DEFAULTPRI, 0);
 968         if (thread->ut_taskq == NULL) {
 969                 goto userinitthrfail_taskq_create;
 970         }
 971 
 972         return (DDI_SUCCESS);
 973 
 974 userinitthrfail_taskq_dispatch:
 975         ddi_taskq_destroy(thread->ut_taskq);
 976 userinitthrfail_taskq_create:
 977         cv_destroy(&thread->ut_exit_done_cv);
 978         cv_destroy(&thread->ut_wake_cv);
 979         mutex_destroy(&thread->ut_mutex);
 980 
 981         return (DDI_FAILURE);
 982 }
 983 
 984 
 985 /*
 986  * xpvtap_user_thread_start()
 987  */
 988 static void
 989 xpvtap_user_thread_start(caddr_t arg)
 990 {
 991         xpvtap_user_thread_t *thread;
 992         xpvtap_state_t *state;
 993         int e;
 994 
 995 
 996         state = (xpvtap_state_t *)arg;
 997         thread = &state->bt_thread;
 998 
 999         /* start the user thread */
1000         thread->ut_exit_done = B_FALSE;
1001         e = ddi_taskq_dispatch(thread->ut_taskq, xpvtap_user_thread, state,
1002             DDI_SLEEP);
1003         if (e != DDI_SUCCESS) {
1004                 thread->ut_exit_done = B_TRUE;
1005                 cmn_err(CE_WARN, "Unable to start user thread\n");
1006         }
1007 }
1008 
1009 
1010 /*
1011  * xpvtap_user_thread_stop()
1012  */
1013 static void
1014 xpvtap_user_thread_stop(xpvtap_state_t *state)
1015 {
1016         /* wake thread so it can exit */
1017         mutex_enter(&state->bt_thread.ut_mutex);
1018         state->bt_thread.ut_wake = B_TRUE;
1019         state->bt_thread.ut_exit = B_TRUE;
1020         cv_signal(&state->bt_thread.ut_wake_cv);
1021         if (!state->bt_thread.ut_exit_done) {
1022                 cv_wait(&state->bt_thread.ut_exit_done_cv,
1023                     &state->bt_thread.ut_mutex);
1024         }
1025         mutex_exit(&state->bt_thread.ut_mutex);
1026         ASSERT(state->bt_thread.ut_exit_done);
1027 }
1028 
1029 
1030 /*
1031  * xpvtap_user_fini()
1032  */
1033 static void
1034 xpvtap_user_fini(xpvtap_state_t *state)
1035 {
1036         xpvtap_user_map_t *map;
1037 
1038 
1039         map = &state->bt_map;
1040 
1041         xpvtap_user_thread_fini(state);
1042         xpvtap_rs_fini(&map->um_rs);
1043         kmem_free(map->um_outstanding_reqs,
1044             sizeof (*map->um_outstanding_reqs) * BLKIF_RING_SIZE);
1045         xpvtap_user_ring_fini(state);
1046 }
1047 
1048 
1049 /*
1050  * xpvtap_user_ring_fini()
1051  */
1052 static void
1053 xpvtap_user_ring_fini(xpvtap_state_t *state)
1054 {
1055         ddi_umem_free(state->bt_user_ring.ur_cookie);
1056 }
1057 
1058 
1059 /*
1060  * xpvtap_user_thread_fini()
1061  */
1062 static void
1063 xpvtap_user_thread_fini(xpvtap_state_t *state)
1064 {
1065         ddi_taskq_destroy(state->bt_thread.ut_taskq);
1066         cv_destroy(&state->bt_thread.ut_exit_done_cv);
1067         cv_destroy(&state->bt_thread.ut_wake_cv);
1068         mutex_destroy(&state->bt_thread.ut_mutex);
1069 }
1070 
1071 
1072 /*
1073  * xpvtap_user_thread()
1074  */
1075 static void
1076 xpvtap_user_thread(void *arg)
1077 {
1078         xpvtap_user_thread_t *thread;
1079         blkif_response_t resp;
1080         xpvtap_state_t *state;
1081         blkif_request_t req;
1082         boolean_t b;
1083         uint_t uid;
1084         int e;
1085 
1086 
1087         state = (xpvtap_state_t *)arg;
1088         thread = &state->bt_thread;
1089 
1090 xpvtap_thread_start:
1091         /* See if we are supposed to exit */
1092         mutex_enter(&thread->ut_mutex);
1093         if (thread->ut_exit) {
1094                 thread->ut_exit_done = B_TRUE;
1095                 cv_signal(&state->bt_thread.ut_exit_done_cv);
1096                 mutex_exit(&thread->ut_mutex);
1097                 return;
1098         }
1099 
1100         /*
1101          * if we aren't supposed to be awake, wait until someone wakes us.
1102          * when we wake up, check for a kill or someone telling us to exit.
1103          */
1104         if (!thread->ut_wake) {
1105                 e = cv_wait_sig(&thread->ut_wake_cv, &thread->ut_mutex);
1106                 if ((e == 0) || (thread->ut_exit)) {
1107                         thread->ut_exit = B_TRUE;
1108                         mutex_exit(&thread->ut_mutex);
1109                         goto xpvtap_thread_start;
1110                 }
1111         }
1112 
1113         /* if someone didn't wake us, go back to the start of the thread */
1114         if (!thread->ut_wake) {
1115                 mutex_exit(&thread->ut_mutex);
1116                 goto xpvtap_thread_start;
1117         }
1118 
1119         /* we are awake */
1120         thread->ut_wake = B_FALSE;
1121         mutex_exit(&thread->ut_mutex);
1122 
1123         /* process requests from the guest */
1124         do {
1125                 /*
1126                  * check for requests from the guest. if we don't have any,
1127                  * break out of the loop.
1128                  */
1129                 e = blk_ring_request_get(state->bt_guest_ring, &req);
1130                 if (e == B_FALSE) {
1131                         break;
1132                 }
1133 
1134                 /* we got a request, map the grefs into the user app's VA */
1135                 e = xpvtap_user_request_map(state, &req, &uid);
1136                 if (e != DDI_SUCCESS) {
1137                         /*
1138                          * If we couldn't map the request (e.g. user app hasn't
1139                          * opened the device yet), requeue it and try again
1140                          * later
1141                          */
1142                         blk_ring_request_requeue(state->bt_guest_ring);
1143                         break;
1144                 }
1145 
1146                 /* push the request to the user app */
1147                 e = xpvtap_user_request_push(state, &req, uid);
1148                 if (e != DDI_SUCCESS) {
1149                         resp.id = req.id;
1150                         resp.operation = req.operation;
1151                         resp.status = BLKIF_RSP_ERROR;
1152                         blk_ring_response_put(state->bt_guest_ring, &resp);
1153                 }
1154         } while (!thread->ut_exit);
1155 
1156         /* process reponses from the user app */
1157         do {
1158                 /*
1159                  * check for responses from the user app. if we don't have any,
1160                  * break out of the loop.
1161                  */
1162                 b = xpvtap_user_response_get(state, &resp, &uid);
1163                 if (b != B_TRUE) {
1164                         break;
1165                 }
1166 
1167                 /*
1168                  * if we got a response, unmap the grefs from the matching
1169                  * request.
1170                  */
1171                 xpvtap_user_request_unmap(state, uid);
1172 
1173                 /* push the response to the guest */
1174                 blk_ring_response_put(state->bt_guest_ring, &resp);
1175         } while (!thread->ut_exit);
1176 
1177         goto xpvtap_thread_start;
1178 }
1179 
1180 
1181 /*
1182  * xpvtap_user_request_map()
1183  */
1184 static int
1185 xpvtap_user_request_map(xpvtap_state_t *state, blkif_request_t *req,
1186     uint_t *uid)
1187 {
1188         grant_ref_t gref[BLKIF_MAX_SEGMENTS_PER_REQUEST];
1189         struct seg *seg;
1190         struct as *as;
1191         domid_t domid;
1192         caddr_t uaddr;
1193         uint_t flags;
1194         int i;
1195         int e;
1196 
1197 
1198         domid = xvdi_get_oeid(state->bt_dip);
1199 
1200         as = state->bt_map.um_as;
1201         if ((as == NULL) || (state->bt_map.um_guest_pages == NULL)) {
1202                 return (DDI_FAILURE);
1203         }
1204 
1205         /* has to happen after segmap returns */
1206         if (!state->bt_map.um_registered) {
1207                 /* register the pte's with segmf */
1208                 e = xpvtap_segmf_register(state);
1209                 if (e != DDI_SUCCESS) {
1210                         return (DDI_FAILURE);
1211                 }
1212         }
1213 
1214         /* alloc an ID for the user ring */
1215         e = xpvtap_rs_alloc(state->bt_map.um_rs, uid);
1216         if (e != DDI_SUCCESS) {
1217                 return (DDI_FAILURE);
1218         }
1219 
1220         /* if we don't have any segments to map, we're done */
1221         if ((req->operation == BLKIF_OP_WRITE_BARRIER) ||
1222             (req->operation == BLKIF_OP_FLUSH_DISKCACHE) ||
1223             (req->nr_segments == 0)) {
1224                 return (DDI_SUCCESS);
1225         }
1226 
1227         /* get the apps gref address */
1228         uaddr = XPVTAP_GREF_REQADDR(state->bt_map.um_guest_pages, *uid);
1229 
1230         AS_LOCK_ENTER(as, RW_READER);
1231         seg = as_findseg(as, state->bt_map.um_guest_pages, 0);
1232         if ((seg == NULL) || ((uaddr + mmu_ptob(req->nr_segments)) >
1233             (seg->s_base + seg->s_size))) {
1234                 AS_LOCK_EXIT(as);
1235                 return (DDI_FAILURE);
1236         }
1237 
1238         /* if we are reading from disk, we are writing into memory */
1239         flags = 0;
1240         if (req->operation == BLKIF_OP_READ) {
1241                 flags |= SEGMF_GREF_WR;
1242         }
1243 
1244         /* Load the grefs into seg_mf */
1245         for (i = 0; i < req->nr_segments; i++) {
1246                 gref[i] = req->seg[i].gref;
1247         }
1248         (void) segmf_add_grefs(seg, uaddr, flags, gref, req->nr_segments,
1249             domid);
1250 
1251         AS_LOCK_EXIT(as);
1252 
1253         return (DDI_SUCCESS);
1254 }
1255 
1256 
1257 /*
1258  * xpvtap_user_request_push()
1259  */
1260 static int
1261 xpvtap_user_request_push(xpvtap_state_t *state, blkif_request_t *req,
1262     uint_t uid)
1263 {
1264         blkif_request_t *outstanding_req;
1265         blkif_front_ring_t *uring;
1266         blkif_request_t *target;
1267         xpvtap_user_map_t *map;
1268 
1269 
1270         uring = &state->bt_user_ring.ur_ring;
1271         map = &state->bt_map;
1272 
1273         target = RING_GET_REQUEST(uring, uring->req_prod_pvt);
1274 
1275         /*
1276          * Save request from the frontend. used for ID mapping and unmap
1277          * on response/cleanup
1278          */
1279         outstanding_req = &map->um_outstanding_reqs[uid];
1280         bcopy(req, outstanding_req, sizeof (*outstanding_req));
1281 
1282         /* put the request on the user ring */
1283         bcopy(req, target, sizeof (*req));
1284         target->id = (uint64_t)uid;
1285         uring->req_prod_pvt++;
1286 
1287         pollwakeup(&state->bt_pollhead, POLLIN | POLLRDNORM);
1288 
1289         return (DDI_SUCCESS);
1290 }
1291 
1292 
1293 static void
1294 xpvtap_user_request_unmap(xpvtap_state_t *state, uint_t uid)
1295 {
1296         blkif_request_t *req;
1297         struct seg *seg;
1298         struct as *as;
1299         caddr_t uaddr;
1300         int e;
1301 
1302 
1303         as = state->bt_map.um_as;
1304         if (as == NULL) {
1305                 return;
1306         }
1307 
1308         /* get a copy of the original request */
1309         req = &state->bt_map.um_outstanding_reqs[uid];
1310 
1311         /* unmap the grefs for this request */
1312         if ((req->operation != BLKIF_OP_WRITE_BARRIER) &&
1313             (req->operation != BLKIF_OP_FLUSH_DISKCACHE) &&
1314             (req->nr_segments != 0)) {
1315                 uaddr = XPVTAP_GREF_REQADDR(state->bt_map.um_guest_pages, uid);
1316                 AS_LOCK_ENTER(as, RW_READER);
1317                 seg = as_findseg(as, state->bt_map.um_guest_pages, 0);
1318                 if ((seg == NULL) || ((uaddr + mmu_ptob(req->nr_segments)) >
1319                     (seg->s_base + seg->s_size))) {
1320                         AS_LOCK_EXIT(as);
1321                         xpvtap_rs_free(state->bt_map.um_rs, uid);
1322                         return;
1323                 }
1324 
1325                 e = segmf_release_grefs(seg, uaddr, req->nr_segments);
1326                 if (e != 0) {
1327                         cmn_err(CE_WARN, "unable to release grefs");
1328                 }
1329 
1330                 AS_LOCK_EXIT(as);
1331         }
1332 
1333         /* free up the user ring id */
1334         xpvtap_rs_free(state->bt_map.um_rs, uid);
1335 }
1336 
1337 
1338 static int
1339 xpvtap_user_response_get(xpvtap_state_t *state, blkif_response_t *resp,
1340     uint_t *uid)
1341 {
1342         blkif_front_ring_t *uring;
1343         blkif_response_t *target;
1344 
1345 
1346         uring = &state->bt_user_ring.ur_ring;
1347 
1348         if (!RING_HAS_UNCONSUMED_RESPONSES(uring)) {
1349                 return (B_FALSE);
1350         }
1351 
1352         target = NULL;
1353         target = RING_GET_RESPONSE(uring, uring->rsp_cons);
1354         if (target == NULL) {
1355                 return (B_FALSE);
1356         }
1357 
1358         /* copy out the user app response */
1359         bcopy(target, resp, sizeof (*resp));
1360         uring->rsp_cons++;
1361 
1362         /* restore the quests id from the original request */
1363         *uid = (uint_t)resp->id;
1364         resp->id = state->bt_map.um_outstanding_reqs[*uid].id;
1365 
1366         return (B_TRUE);
1367 }
1368 
1369 
1370 /*
1371  * xpvtap_user_app_stop()
1372  */
1373 static void xpvtap_user_app_stop(caddr_t arg)
1374 {
1375         xpvtap_state_t *state;
1376         clock_t rc;
1377 
1378         state = (xpvtap_state_t *)arg;
1379 
1380         /*
1381          * Give the app 10 secs to exit. If it doesn't exit, it's not a serious
1382          * problem, we just won't auto-detach the driver.
1383          */
1384         mutex_enter(&state->bt_open.bo_mutex);
1385         if (state->bt_open.bo_opened) {
1386                 rc = cv_reltimedwait(&state->bt_open.bo_exit_cv,
1387                     &state->bt_open.bo_mutex, drv_usectohz(10000000),
1388                     TR_CLOCK_TICK);
1389                 if (rc <= 0) {
1390                         cmn_err(CE_NOTE, "!user process still has driver open, "
1391                             "deferring detach\n");
1392                 }
1393         }
1394         mutex_exit(&state->bt_open.bo_mutex);
1395 }
1396 
1397 
1398 /*
1399  * xpvtap_rs_init()
1400  *    Initialize the resource structure. init() returns a handle to be used
1401  *    for the rest of the resource functions. This code is written assuming
1402  *    that min_val will be close to 0. Therefore, we will allocate the free
1403  *    buffer only taking max_val into account.
1404  */
1405 static void
1406 xpvtap_rs_init(uint_t min_val, uint_t max_val, xpvtap_rs_hdl_t *handle)
1407 {
1408         xpvtap_rs_t *rstruct;
1409         uint_t array_size;
1410         uint_t index;
1411 
1412 
1413         ASSERT(handle != NULL);
1414         ASSERT(min_val < max_val);
1415 
1416         /* alloc space for resource structure */
1417         rstruct = kmem_alloc(sizeof (xpvtap_rs_t), KM_SLEEP);
1418 
1419         /*
1420          * Test to see if the max value is 64-bit aligned. If so, we don't need
1421          * to allocate an extra 64-bit word. alloc space for free buffer
1422          * (8 bytes per uint64_t).
1423          */
1424         if ((max_val & 0x3F) == 0) {
1425                 rstruct->rs_free_size = (max_val >> 6) * 8;
1426         } else {
1427                 rstruct->rs_free_size = ((max_val >> 6) + 1) * 8;
1428         }
1429         rstruct->rs_free = kmem_alloc(rstruct->rs_free_size, KM_SLEEP);
1430 
1431         /* Initialize resource structure */
1432         rstruct->rs_min = min_val;
1433         rstruct->rs_last = min_val;
1434         rstruct->rs_max = max_val;
1435         mutex_init(&rstruct->rs_mutex, NULL, MUTEX_DRIVER, NULL);
1436         rstruct->rs_flushing = B_FALSE;
1437 
1438         /* Mark all resources as free */
1439         array_size = rstruct->rs_free_size >> 3;
1440         for (index = 0; index < array_size; index++) {
1441                 rstruct->rs_free[index] = (uint64_t)0xFFFFFFFFFFFFFFFF;
1442         }
1443 
1444         /* setup handle which is returned from this function */
1445         *handle = rstruct;
1446 }
1447 
1448 
1449 /*
1450  * xpvtap_rs_fini()
1451  *    Frees up the space allocated in init().  Notice that a pointer to the
1452  *    handle is used for the parameter.  fini() will set the handle to NULL
1453  *    before returning.
1454  */
1455 static void
1456 xpvtap_rs_fini(xpvtap_rs_hdl_t *handle)
1457 {
1458         xpvtap_rs_t *rstruct;
1459 
1460 
1461         ASSERT(handle != NULL);
1462 
1463         rstruct = (xpvtap_rs_t *)*handle;
1464 
1465         mutex_destroy(&rstruct->rs_mutex);
1466         kmem_free(rstruct->rs_free, rstruct->rs_free_size);
1467         kmem_free(rstruct, sizeof (xpvtap_rs_t));
1468 
1469         /* set handle to null.  This helps catch bugs. */
1470         *handle = NULL;
1471 }
1472 
1473 
1474 /*
1475  * xpvtap_rs_alloc()
1476  *    alloc a resource. If alloc fails, we are out of resources.
1477  */
1478 static int
1479 xpvtap_rs_alloc(xpvtap_rs_hdl_t handle, uint_t *resource)
1480 {
1481         xpvtap_rs_t *rstruct;
1482         uint_t array_idx;
1483         uint64_t free;
1484         uint_t index;
1485         uint_t last;
1486         uint_t min;
1487         uint_t max;
1488 
1489 
1490         ASSERT(handle != NULL);
1491         ASSERT(resource != NULL);
1492 
1493         rstruct = (xpvtap_rs_t *)handle;
1494 
1495         mutex_enter(&rstruct->rs_mutex);
1496         min = rstruct->rs_min;
1497         max = rstruct->rs_max;
1498 
1499         /*
1500          * Find a free resource. This will return out of the loop once it finds
1501          * a free resource. There are a total of 'max'-'min'+1 resources.
1502          * Performs a round robin allocation.
1503          */
1504         for (index = min; index <= max; index++) {
1505 
1506                 array_idx = rstruct->rs_last >> 6;
1507                 free = rstruct->rs_free[array_idx];
1508                 last = rstruct->rs_last & 0x3F;
1509 
1510                 /* if the next resource to check is free */
1511                 if ((free & ((uint64_t)1 << last)) != 0) {
1512                         /* we are using this resource */
1513                         *resource = rstruct->rs_last;
1514 
1515                         /* take it out of the free list */
1516                         rstruct->rs_free[array_idx] &= ~((uint64_t)1 << last);
1517 
1518                         /*
1519                          * increment the last count so we start checking the
1520                          * next resource on the next alloc().  Note the rollover
1521                          * at 'max'+1.
1522                          */
1523                         rstruct->rs_last++;
1524                         if (rstruct->rs_last > max) {
1525                                 rstruct->rs_last = rstruct->rs_min;
1526                         }
1527 
1528                         /* unlock the resource structure */
1529                         mutex_exit(&rstruct->rs_mutex);
1530 
1531                         return (DDI_SUCCESS);
1532                 }
1533 
1534                 /*
1535                  * This resource is not free, lets go to the next one. Note the
1536                  * rollover at 'max'.
1537                  */
1538                 rstruct->rs_last++;
1539                 if (rstruct->rs_last > max) {
1540                         rstruct->rs_last = rstruct->rs_min;
1541                 }
1542         }
1543 
1544         mutex_exit(&rstruct->rs_mutex);
1545 
1546         return (DDI_FAILURE);
1547 }
1548 
1549 
1550 /*
1551  * xpvtap_rs_free()
1552  *    Free the previously alloc'd resource.  Once a resource has been free'd,
1553  *    it can be used again when alloc is called.
1554  */
1555 static void
1556 xpvtap_rs_free(xpvtap_rs_hdl_t handle, uint_t resource)
1557 {
1558         xpvtap_rs_t *rstruct;
1559         uint_t array_idx;
1560         uint_t offset;
1561 
1562 
1563         ASSERT(handle != NULL);
1564 
1565         rstruct = (xpvtap_rs_t *)handle;
1566         ASSERT(resource >= rstruct->rs_min);
1567         ASSERT(resource <= rstruct->rs_max);
1568 
1569         if (!rstruct->rs_flushing) {
1570                 mutex_enter(&rstruct->rs_mutex);
1571         }
1572 
1573         /* Put the resource back in the free list */
1574         array_idx = resource >> 6;
1575         offset = resource & 0x3F;
1576         rstruct->rs_free[array_idx] |= ((uint64_t)1 << offset);
1577 
1578         if (!rstruct->rs_flushing) {
1579                 mutex_exit(&rstruct->rs_mutex);
1580         }
1581 }
1582 
1583 
1584 /*
1585  * xpvtap_rs_flush()
1586  */
1587 static void
1588 xpvtap_rs_flush(xpvtap_rs_hdl_t handle, xpvtap_rs_cleanup_t callback,
1589     void *arg)
1590 {
1591         xpvtap_rs_t *rstruct;
1592         uint_t array_idx;
1593         uint64_t free;
1594         uint_t index;
1595         uint_t last;
1596         uint_t min;
1597         uint_t max;
1598 
1599 
1600         ASSERT(handle != NULL);
1601 
1602         rstruct = (xpvtap_rs_t *)handle;
1603 
1604         mutex_enter(&rstruct->rs_mutex);
1605         min = rstruct->rs_min;
1606         max = rstruct->rs_max;
1607 
1608         rstruct->rs_flushing = B_TRUE;
1609 
1610         /*
1611          * for all resources not free, call the callback routine to clean it
1612          * up.
1613          */
1614         for (index = min; index <= max; index++) {
1615 
1616                 array_idx = rstruct->rs_last >> 6;
1617                 free = rstruct->rs_free[array_idx];
1618                 last = rstruct->rs_last & 0x3F;
1619 
1620                 /* if the next resource to check is not free */
1621                 if ((free & ((uint64_t)1 << last)) == 0) {
1622                         /* call the callback to cleanup */
1623                         (*callback)(arg, rstruct->rs_last);
1624 
1625                         /* put it back in the free list */
1626                         rstruct->rs_free[array_idx] |= ((uint64_t)1 << last);
1627                 }
1628 
1629                 /* go to the next one. Note the rollover at 'max' */
1630                 rstruct->rs_last++;
1631                 if (rstruct->rs_last > max) {
1632                         rstruct->rs_last = rstruct->rs_min;
1633                 }
1634         }
1635 
1636         mutex_exit(&rstruct->rs_mutex);
1637 }