1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  24  * Use is subject to license terms.
  25  */
  26 
  27 #include <sys/errno.h>
  28 #include <sys/types.h>
  29 #include <sys/conf.h>
  30 #include <sys/kmem.h>
  31 #include <sys/ddi.h>
  32 #include <sys/stat.h>
  33 #include <sys/sunddi.h>
  34 #include <sys/file.h>
  35 #include <sys/open.h>
  36 #include <sys/modctl.h>
  37 #include <sys/ddi_impldefs.h>
  38 #include <vm/seg_kmem.h>
  39 #include <sys/vmsystm.h>
  40 #include <sys/sysmacros.h>
  41 #include <sys/ddidevmap.h>
  42 #include <sys/avl.h>
  43 #ifdef __xpv
  44 #include <sys/hypervisor.h>
  45 #endif
  46 
  47 #include <sys/xsvc.h>
  48 
  49 /* total max memory which can be alloced with ioctl interface */
  50 uint64_t xsvc_max_memory = 10 * 1024 * 1024;
  51 
  52 extern void i86_va_map(caddr_t vaddr, struct as *asp, caddr_t kaddr);
  53 
  54 
  55 static int xsvc_open(dev_t *devp, int flag, int otyp, cred_t *cred);
  56 static int xsvc_close(dev_t devp, int flag, int otyp, cred_t *cred);
  57 static int xsvc_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *cred,
  58     int *rval);
  59 static int xsvc_devmap(dev_t dev, devmap_cookie_t dhp, offset_t off, size_t len,
  60     size_t *maplen, uint_t model);
  61 static int xsvc_attach(dev_info_t *devi, ddi_attach_cmd_t cmd);
  62 static int xsvc_detach(dev_info_t *devi, ddi_detach_cmd_t cmd);
  63 static int xsvc_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg,
  64     void **result);
  65 
  66 static  struct cb_ops xsvc_cb_ops = {
  67         xsvc_open,              /* cb_open */
  68         xsvc_close,             /* cb_close */
  69         nodev,                  /* cb_strategy */
  70         nodev,                  /* cb_print */
  71         nodev,                  /* cb_dump */
  72         nodev,                  /* cb_read */
  73         nodev,                  /* cb_write */
  74         xsvc_ioctl,             /* cb_ioctl */
  75         xsvc_devmap,            /* cb_devmap */
  76         NULL,                   /* cb_mmap */
  77         NULL,                   /* cb_segmap */
  78         nochpoll,               /* cb_chpoll */
  79         ddi_prop_op,            /* cb_prop_op */
  80         NULL,                   /* cb_stream */
  81         D_NEW | D_MP | D_64BIT | D_DEVMAP,      /* cb_flag */
  82         CB_REV
  83 };
  84 
  85 static struct dev_ops xsvc_dev_ops = {
  86         DEVO_REV,               /* devo_rev */
  87         0,                      /* devo_refcnt */
  88         xsvc_getinfo,           /* devo_getinfo */
  89         nulldev,                /* devo_identify */
  90         nulldev,                /* devo_probe */
  91         xsvc_attach,            /* devo_attach */
  92         xsvc_detach,            /* devo_detach */
  93         nodev,                  /* devo_reset */
  94         &xsvc_cb_ops,               /* devo_cb_ops */
  95         NULL,                   /* devo_bus_ops */
  96         NULL,                   /* power */
  97         ddi_quiesce_not_needed,         /* quiesce */
  98 };
  99 
 100 static struct modldrv xsvc_modldrv = {
 101         &mod_driverops,             /* Type of module.  This one is a driver */
 102         "xsvc driver",          /* Name of the module. */
 103         &xsvc_dev_ops,              /* driver ops */
 104 };
 105 
 106 static struct modlinkage xsvc_modlinkage = {
 107         MODREV_1,
 108         (void *) &xsvc_modldrv,
 109         NULL
 110 };
 111 
 112 
 113 static int xsvc_ioctl_alloc_memory(xsvc_state_t *state, void *arg, int mode);
 114 static int xsvc_ioctl_flush_memory(xsvc_state_t *state, void *arg, int mode);
 115 static int xsvc_ioctl_free_memory(xsvc_state_t *state, void *arg, int mode);
 116 static int xsvc_mem_alloc(xsvc_state_t *state, uint64_t key,
 117     xsvc_mem_t **mp);
 118 static void xsvc_mem_free(xsvc_state_t *state, xsvc_mem_t *mp);
 119 static xsvc_mem_t *xsvc_mem_lookup(xsvc_state_t *state,
 120     uint64_t key);
 121 static int xsvc_mnode_key_compare(const void *q, const void *e);
 122 static int xsvc_umem_cookie_alloc(caddr_t kva, size_t size, int flags,
 123     ddi_umem_cookie_t *cookiep);
 124 static void xsvc_umem_cookie_free(ddi_umem_cookie_t *cookiep);
 125 
 126 
 127 void *xsvc_statep;
 128 
 129 static ddi_device_acc_attr_t xsvc_device_attr = {
 130         DDI_DEVICE_ATTR_V0,
 131         DDI_NEVERSWAP_ACC,
 132         DDI_STRICTORDER_ACC
 133 };
 134 
 135 static int xsvc_devmap_map(devmap_cookie_t dhp, dev_t dev, uint_t flags,
 136     offset_t off, size_t len, void **pvtp);
 137 static int xsvc_devmap_dup(devmap_cookie_t dhp, void *pvtp,
 138     devmap_cookie_t new_dhp, void **new_pvtp);
 139 static void xsvc_devmap_unmap(devmap_cookie_t dhp, void *pvtp, offset_t off,
 140     size_t len, devmap_cookie_t new_dhp1, void **new_pvtp1,
 141     devmap_cookie_t new_dhp2, void **new_pvtp2);
 142 
 143 
 144 static struct devmap_callback_ctl xsvc_callbk = {
 145         DEVMAP_OPS_REV,
 146         xsvc_devmap_map,
 147         NULL,
 148         xsvc_devmap_dup,
 149         xsvc_devmap_unmap
 150 };
 151 
 152 
 153 /*
 154  * _init()
 155  *
 156  */
 157 int
 158 _init(void)
 159 {
 160         int err;
 161 
 162         err = ddi_soft_state_init(&xsvc_statep, sizeof (xsvc_state_t), 1);
 163         if (err != 0) {
 164                 return (err);
 165         }
 166 
 167         err = mod_install(&xsvc_modlinkage);
 168         if (err != 0) {
 169                 ddi_soft_state_fini(&xsvc_statep);
 170                 return (err);
 171         }
 172 
 173         return (0);
 174 }
 175 
 176 /*
 177  * _info()
 178  *
 179  */
 180 int
 181 _info(struct modinfo *modinfop)
 182 {
 183         return (mod_info(&xsvc_modlinkage, modinfop));
 184 }
 185 
 186 /*
 187  * _fini()
 188  *
 189  */
 190 int
 191 _fini(void)
 192 {
 193         int err;
 194 
 195         err = mod_remove(&xsvc_modlinkage);
 196         if (err != 0) {
 197                 return (err);
 198         }
 199 
 200         ddi_soft_state_fini(&xsvc_statep);
 201 
 202         return (0);
 203 }
 204 
 205 /*
 206  * xsvc_attach()
 207  *
 208  */
 209 static int
 210 xsvc_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
 211 {
 212         xsvc_state_t *state;
 213         int maxallocmem;
 214         int instance;
 215         int err;
 216 
 217 
 218         switch (cmd) {
 219         case DDI_ATTACH:
 220                 break;
 221 
 222         case DDI_RESUME:
 223                 return (DDI_SUCCESS);
 224 
 225         default:
 226                 return (DDI_FAILURE);
 227         }
 228 
 229         instance = ddi_get_instance(dip);
 230         err = ddi_soft_state_zalloc(xsvc_statep, instance);
 231         if (err != DDI_SUCCESS) {
 232                 return (DDI_FAILURE);
 233         }
 234         state = ddi_get_soft_state(xsvc_statep, instance);
 235         if (state == NULL) {
 236                 goto attachfail_get_soft_state;
 237         }
 238 
 239         state->xs_dip = dip;
 240         state->xs_instance = instance;
 241 
 242         /* Initialize allocation count */
 243         mutex_init(&state->xs_mutex, NULL, MUTEX_DRIVER, NULL);
 244         state->xs_currently_alloced = 0;
 245 
 246         mutex_init(&state->xs_cookie_mutex, NULL, MUTEX_DRIVER, NULL);
 247 
 248         /* create the minor node (for the ioctl) */
 249         err = ddi_create_minor_node(dip, "xsvc", S_IFCHR, instance, DDI_PSEUDO,
 250             0);
 251         if (err != DDI_SUCCESS) {
 252                 goto attachfail_minor_node;
 253         }
 254 
 255         /*
 256          * the maxallocmem property will override the default (xsvc_max_memory).
 257          * This is the maximum total memory the ioctl will allow to be alloced.
 258          */
 259         maxallocmem = ddi_prop_get_int(DDI_DEV_T_ANY, state->xs_dip,
 260             DDI_PROP_DONTPASS, "maxallocmem", -1);
 261         if (maxallocmem >= 0) {
 262                 xsvc_max_memory = maxallocmem * 1024;
 263         }
 264 
 265         /* Initialize list of memory allocs */
 266         mutex_init(&state->xs_mlist.ml_mutex, NULL, MUTEX_DRIVER, NULL);
 267         avl_create(&state->xs_mlist.ml_avl, xsvc_mnode_key_compare,
 268             sizeof (xsvc_mnode_t), offsetof(xsvc_mnode_t, mn_link));
 269 
 270         /* Report that driver was loaded */
 271         ddi_report_dev(dip);
 272 
 273         return (DDI_SUCCESS);
 274 
 275 attachfail_minor_node:
 276         mutex_destroy(&state->xs_cookie_mutex);
 277         mutex_destroy(&state->xs_mutex);
 278 attachfail_get_soft_state:
 279         (void) ddi_soft_state_free(xsvc_statep, instance);
 280 
 281         return (err);
 282 }
 283 
 284 /*
 285  * xsvc_detach()
 286  *
 287  */
 288 static int
 289 xsvc_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
 290 {
 291         xsvc_state_t *state;
 292         xsvc_mnode_t *mnode;
 293         xsvc_mem_t *mp;
 294         int instance;
 295 
 296 
 297         instance = ddi_get_instance(dip);
 298         state = ddi_get_soft_state(xsvc_statep, instance);
 299         if (state == NULL) {
 300                 return (DDI_FAILURE);
 301         }
 302 
 303         switch (cmd) {
 304         case DDI_DETACH:
 305                 break;
 306 
 307         case DDI_SUSPEND:
 308                 return (DDI_SUCCESS);
 309 
 310         default:
 311                 return (DDI_FAILURE);
 312         }
 313 
 314         ddi_remove_minor_node(dip, NULL);
 315 
 316         /* Free any memory on list */
 317         while ((mnode = avl_first(&state->xs_mlist.ml_avl)) != NULL) {
 318                 mp = mnode->mn_home;
 319                 xsvc_mem_free(state, mp);
 320         }
 321 
 322         /* remove list */
 323         avl_destroy(&state->xs_mlist.ml_avl);
 324         mutex_destroy(&state->xs_mlist.ml_mutex);
 325 
 326         mutex_destroy(&state->xs_cookie_mutex);
 327         mutex_destroy(&state->xs_mutex);
 328         (void) ddi_soft_state_free(xsvc_statep, state->xs_instance);
 329         return (DDI_SUCCESS);
 330 }
 331 
 332 /*
 333  * xsvc_getinfo()
 334  *
 335  */
 336 /*ARGSUSED*/
 337 static int
 338 xsvc_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **result)
 339 {
 340         xsvc_state_t *state;
 341         int instance;
 342         dev_t dev;
 343         int err;
 344 
 345 
 346         dev = (dev_t)arg;
 347         instance = getminor(dev);
 348 
 349         switch (cmd) {
 350         case DDI_INFO_DEVT2DEVINFO:
 351                 state = ddi_get_soft_state(xsvc_statep, instance);
 352                 if (state == NULL) {
 353                         return (DDI_FAILURE);
 354                 }
 355                 *result = (void *)state->xs_dip;
 356                 err = DDI_SUCCESS;
 357                 break;
 358 
 359         case DDI_INFO_DEVT2INSTANCE:
 360                 *result = (void *)(uintptr_t)instance;
 361                 err = DDI_SUCCESS;
 362                 break;
 363 
 364         default:
 365                 err = DDI_FAILURE;
 366                 break;
 367         }
 368 
 369         return (err);
 370 }
 371 
 372 
 373 /*
 374  * xsvc_open()
 375  *
 376  */
 377 /*ARGSUSED*/
 378 static int
 379 xsvc_open(dev_t *devp, int flag, int otyp, cred_t *cred)
 380 {
 381         xsvc_state_t *state;
 382         int instance;
 383 
 384         instance = getminor(*devp);
 385         state = ddi_get_soft_state(xsvc_statep, instance);
 386         if (state == NULL) {
 387                 return (ENXIO);
 388         }
 389 
 390         return (0);
 391 }
 392 
 393 /*
 394  * xsvc_close()
 395  *
 396  */
 397 /*ARGSUSED*/
 398 static int
 399 xsvc_close(dev_t devp, int flag, int otyp, cred_t *cred)
 400 {
 401         return (0);
 402 }
 403 
 404 /*
 405  * xsvc_ioctl()
 406  *
 407  */
 408 /*ARGSUSED*/
 409 static int
 410 xsvc_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *cred, int *rval)
 411 {
 412         xsvc_state_t *state;
 413         int instance;
 414         int err;
 415 
 416 
 417         err = drv_priv(cred);
 418         if (err != 0) {
 419                 return (EPERM);
 420         }
 421         instance = getminor(dev);
 422         if (instance == -1) {
 423                 return (EBADF);
 424         }
 425         state = ddi_get_soft_state(xsvc_statep, instance);
 426         if (state == NULL) {
 427                 return (EBADF);
 428         }
 429 
 430         switch (cmd) {
 431         case XSVC_ALLOC_MEM:
 432                 err = xsvc_ioctl_alloc_memory(state, (void *)arg, mode);
 433                 break;
 434 
 435         case XSVC_FREE_MEM:
 436                 err = xsvc_ioctl_free_memory(state, (void *)arg, mode);
 437                 break;
 438 
 439         case XSVC_FLUSH_MEM:
 440                 err = xsvc_ioctl_flush_memory(state, (void *)arg, mode);
 441                 break;
 442 
 443         default:
 444                 err = ENXIO;
 445         }
 446 
 447         return (err);
 448 }
 449 
 450 /*
 451  * xsvc_ioctl_alloc_memory()
 452  *
 453  */
 454 static int
 455 xsvc_ioctl_alloc_memory(xsvc_state_t *state, void *arg, int mode)
 456 {
 457         xsvc_mem_req_32 params32;
 458         xsvc_mloc_32 *usgl32;
 459         xsvc_mem_req params;
 460         xsvc_mloc_32 sgl32;
 461         xsvc_mloc *usgl;
 462         xsvc_mem_t *mp;
 463         xsvc_mloc sgl;
 464         uint64_t key;
 465         size_t size;
 466         int err;
 467         int i;
 468 
 469 
 470         /* Copy in the params, then get the size and key */
 471         if (ddi_model_convert_from(mode & FMODELS) == DDI_MODEL_ILP32) {
 472                 err = ddi_copyin(arg, &params32, sizeof (xsvc_mem_req_32),
 473                     mode);
 474                 if (err != 0) {
 475                         return (EFAULT);
 476                 }
 477 
 478                 key = (uint64_t)params32.xsvc_mem_reqid;
 479                 size = P2ROUNDUP((size_t)params32.xsvc_mem_size, PAGESIZE);
 480         } else {
 481                 err = ddi_copyin(arg, &params, sizeof (xsvc_mem_req), mode);
 482                 if (err != 0) {
 483                         return (EFAULT);
 484                 }
 485                 key = (uint64_t)params.xsvc_mem_reqid;
 486                 size = P2ROUNDUP(params.xsvc_mem_size, PAGESIZE);
 487         }
 488 
 489         /*
 490          * make sure this doesn't put us over the maximum allowed to be
 491          * allocated
 492          */
 493         mutex_enter(&state->xs_mutex);
 494         if ((state->xs_currently_alloced + size) > xsvc_max_memory) {
 495                 mutex_exit(&state->xs_mutex);
 496                 return (EAGAIN);
 497         }
 498         state->xs_currently_alloced += size;
 499         mutex_exit(&state->xs_mutex);
 500 
 501         /* get state to track this memory */
 502         err = xsvc_mem_alloc(state, key, &mp);
 503         if (err != 0) {
 504                 return (err);
 505         }
 506         mp->xm_size = size;
 507 
 508         /* allocate and bind the memory */
 509         mp->xm_dma_attr.dma_attr_version = DMA_ATTR_V0;
 510         mp->xm_dma_attr.dma_attr_count_max = (uint64_t)0xFFFFFFFF;
 511         mp->xm_dma_attr.dma_attr_burstsizes = 1;
 512         mp->xm_dma_attr.dma_attr_minxfer = 1;
 513         mp->xm_dma_attr.dma_attr_maxxfer = (uint64_t)0xFFFFFFFF;
 514         mp->xm_dma_attr.dma_attr_seg = (uint64_t)0xFFFFFFFF;
 515         mp->xm_dma_attr.dma_attr_granular = 1;
 516         mp->xm_dma_attr.dma_attr_flags = 0;
 517 
 518         /* Finish converting params */
 519         if (ddi_model_convert_from(mode & FMODELS) == DDI_MODEL_ILP32) {
 520                 mp->xm_dma_attr.dma_attr_addr_lo = params32.xsvc_mem_addr_lo;
 521                 mp->xm_dma_attr.dma_attr_addr_hi = params32.xsvc_mem_addr_hi;
 522                 mp->xm_dma_attr.dma_attr_sgllen = params32.xsvc_mem_sgllen;
 523                 usgl32 = (xsvc_mloc_32 *)(uintptr_t)params32.xsvc_sg_list;
 524                 mp->xm_dma_attr.dma_attr_align = P2ROUNDUP(
 525                     params32.xsvc_mem_align, PAGESIZE);
 526         } else {
 527                 mp->xm_dma_attr.dma_attr_addr_lo = params.xsvc_mem_addr_lo;
 528                 mp->xm_dma_attr.dma_attr_addr_hi = params.xsvc_mem_addr_hi;
 529                 mp->xm_dma_attr.dma_attr_sgllen = params.xsvc_mem_sgllen;
 530                 usgl = (xsvc_mloc *)(uintptr_t)params.xsvc_sg_list;
 531                 mp->xm_dma_attr.dma_attr_align = P2ROUNDUP(
 532                     params.xsvc_mem_align, PAGESIZE);
 533         }
 534 
 535         mp->xm_device_attr = xsvc_device_attr;
 536 
 537         err = ddi_dma_alloc_handle(state->xs_dip, &mp->xm_dma_attr,
 538             DDI_DMA_SLEEP, NULL, &mp->xm_dma_handle);
 539         if (err != DDI_SUCCESS) {
 540                 err = EINVAL;
 541                 goto allocfail_alloc_handle;
 542         }
 543 
 544         /* don't sleep here so we don't get stuck in contig alloc */
 545         err = ddi_dma_mem_alloc(mp->xm_dma_handle, mp->xm_size,
 546             &mp->xm_device_attr, DDI_DMA_CONSISTENT, DDI_DMA_DONTWAIT, NULL,
 547             &mp->xm_addr, &mp->xm_real_length, &mp->xm_mem_handle);
 548         if (err != DDI_SUCCESS) {
 549                 err = EINVAL;
 550                 goto allocfail_alloc_mem;
 551         }
 552 
 553         err = ddi_dma_addr_bind_handle(mp->xm_dma_handle, NULL, mp->xm_addr,
 554             mp->xm_size, DDI_DMA_RDWR | DDI_DMA_CONSISTENT, DDI_DMA_SLEEP,
 555             NULL, &mp->xm_cookie, &mp->xm_cookie_count);
 556         if (err != DDI_DMA_MAPPED) {
 557                 err = EFAULT;
 558                 goto allocfail_bind;
 559         }
 560 
 561         /* return sgl */
 562         for (i = 0; i < mp->xm_cookie_count; i++) {
 563                 if (ddi_model_convert_from(mode & FMODELS) == DDI_MODEL_ILP32) {
 564                         sgl32.mloc_addr = mp->xm_cookie.dmac_laddress;
 565                         sgl32.mloc_size = mp->xm_cookie.dmac_size;
 566                         err = ddi_copyout(&sgl32, &usgl32[i],
 567                             sizeof (xsvc_mloc_32), mode);
 568                         if (err != 0) {
 569                                 err = EFAULT;
 570                                 goto allocfail_copyout;
 571                         }
 572                 } else {
 573                         sgl.mloc_addr = mp->xm_cookie.dmac_laddress;
 574                         sgl.mloc_size = mp->xm_cookie.dmac_size;
 575                         err = ddi_copyout(&sgl, &usgl[i], sizeof (xsvc_mloc),
 576                             mode);
 577                         if (err != 0) {
 578                                 err = EFAULT;
 579                                 goto allocfail_copyout;
 580                         }
 581                 }
 582                 ddi_dma_nextcookie(mp->xm_dma_handle, &mp->xm_cookie);
 583         }
 584 
 585         /* set the last sgl entry to 0 to indicate cookie count */
 586         if (ddi_model_convert_from(mode & FMODELS) == DDI_MODEL_ILP32) {
 587                 sgl32.mloc_addr = 0;
 588                 sgl32.mloc_size = 0;
 589                 err = ddi_copyout(&sgl32, &usgl32[i], sizeof (xsvc_mloc_32),
 590                     mode);
 591                 if (err != 0) {
 592                         err = EFAULT;
 593                         goto allocfail_copyout;
 594                 }
 595         } else {
 596                 sgl.mloc_addr = 0;
 597                 sgl.mloc_size = 0;
 598                 err = ddi_copyout(&sgl, &usgl[i], sizeof (xsvc_mloc), mode);
 599                 if (err != 0) {
 600                         err = EFAULT;
 601                         goto allocfail_copyout;
 602                 }
 603         }
 604 
 605         return (0);
 606 
 607 allocfail_copyout:
 608         (void) ddi_dma_unbind_handle(mp->xm_dma_handle);
 609 allocfail_bind:
 610         ddi_dma_mem_free(&mp->xm_mem_handle);
 611 allocfail_alloc_mem:
 612         ddi_dma_free_handle(&mp->xm_dma_handle);
 613 allocfail_alloc_handle:
 614         mp->xm_dma_handle = NULL;
 615         xsvc_mem_free(state, mp);
 616 
 617         mutex_enter(&state->xs_mutex);
 618         state->xs_currently_alloced = state->xs_currently_alloced - size;
 619         mutex_exit(&state->xs_mutex);
 620 
 621         return (err);
 622 }
 623 
 624 /*
 625  * xsvc_ioctl_flush_memory()
 626  *
 627  */
 628 static int
 629 xsvc_ioctl_flush_memory(xsvc_state_t *state, void *arg, int mode)
 630 {
 631         xsvc_mem_req_32 params32;
 632         xsvc_mem_req params;
 633         xsvc_mem_t *mp;
 634         uint64_t key;
 635         int err;
 636 
 637 
 638         if (ddi_model_convert_from(mode & FMODELS) == DDI_MODEL_ILP32) {
 639                 err = ddi_copyin(arg, &params32, sizeof (xsvc_mem_req_32),
 640                     mode);
 641                 if (err != 0) {
 642                         return (EFAULT);
 643                 }
 644                 key = (uint64_t)params32.xsvc_mem_reqid;
 645         } else {
 646                 err = ddi_copyin(arg, &params, sizeof (xsvc_mem_req), mode);
 647                 if (err != 0) {
 648                         return (EFAULT);
 649                 }
 650                 key = (uint64_t)params.xsvc_mem_reqid;
 651         }
 652 
 653         /* find the memory */
 654         mp = xsvc_mem_lookup(state, key);
 655         if (mp == NULL) {
 656                 return (EINVAL);
 657         }
 658 
 659         (void) ddi_dma_sync(mp->xm_dma_handle, 0, 0, DDI_DMA_SYNC_FORCPU);
 660 
 661         return (0);
 662 }
 663 
 664 
 665 /*
 666  * xsvc_ioctl_free_memory()
 667  *
 668  */
 669 static int
 670 xsvc_ioctl_free_memory(xsvc_state_t *state, void *arg, int mode)
 671 {
 672         xsvc_mem_req_32 params32;
 673         xsvc_mem_req params;
 674         xsvc_mem_t *mp;
 675         uint64_t key;
 676         int err;
 677 
 678 
 679         if (ddi_model_convert_from(mode & FMODELS) == DDI_MODEL_ILP32) {
 680                 err = ddi_copyin(arg, &params32, sizeof (xsvc_mem_req_32),
 681                     mode);
 682                 if (err != 0) {
 683                         return (EFAULT);
 684                 }
 685                 key = (uint64_t)params32.xsvc_mem_reqid;
 686         } else {
 687                 err = ddi_copyin(arg, &params, sizeof (xsvc_mem_req), mode);
 688                 if (err != 0) {
 689                         return (EFAULT);
 690                 }
 691                 key = (uint64_t)params.xsvc_mem_reqid;
 692         }
 693 
 694         /* find the memory */
 695         mp = xsvc_mem_lookup(state, key);
 696         if (mp == NULL) {
 697                 return (EINVAL);
 698         }
 699 
 700         xsvc_mem_free(state, mp);
 701 
 702         return (0);
 703 }
 704 
 705 /*
 706  * xsvc_mem_alloc()
 707  *
 708  */
 709 static int
 710 xsvc_mem_alloc(xsvc_state_t *state, uint64_t key, xsvc_mem_t **mp)
 711 {
 712         xsvc_mem_t *mem;
 713 
 714         mem = xsvc_mem_lookup(state, key);
 715         if (mem != NULL) {
 716                 xsvc_mem_free(state, mem);
 717         }
 718 
 719         *mp = kmem_alloc(sizeof (xsvc_mem_t), KM_SLEEP);
 720         (*mp)->xm_mnode.mn_home = *mp;
 721         (*mp)->xm_mnode.mn_key = key;
 722 
 723         mutex_enter(&state->xs_mlist.ml_mutex);
 724         avl_add(&state->xs_mlist.ml_avl, &(*mp)->xm_mnode);
 725         mutex_exit(&state->xs_mlist.ml_mutex);
 726 
 727         return (0);
 728 }
 729 
 730 /*
 731  * xsvc_mem_free()
 732  *
 733  */
 734 static void
 735 xsvc_mem_free(xsvc_state_t *state, xsvc_mem_t *mp)
 736 {
 737         if (mp->xm_dma_handle != NULL) {
 738                 (void) ddi_dma_unbind_handle(mp->xm_dma_handle);
 739                 ddi_dma_mem_free(&mp->xm_mem_handle);
 740                 ddi_dma_free_handle(&mp->xm_dma_handle);
 741 
 742                 mutex_enter(&state->xs_mutex);
 743                 state->xs_currently_alloced = state->xs_currently_alloced -
 744                     mp->xm_size;
 745                 mutex_exit(&state->xs_mutex);
 746         }
 747 
 748         mutex_enter(&state->xs_mlist.ml_mutex);
 749         avl_remove(&state->xs_mlist.ml_avl, &mp->xm_mnode);
 750         mutex_exit(&state->xs_mlist.ml_mutex);
 751 
 752         kmem_free(mp, sizeof (*mp));
 753 }
 754 
 755 /*
 756  * xsvc_mem_lookup()
 757  *
 758  */
 759 static xsvc_mem_t *
 760 xsvc_mem_lookup(xsvc_state_t *state, uint64_t key)
 761 {
 762         xsvc_mnode_t mnode;
 763         xsvc_mnode_t *mnp;
 764         avl_index_t where;
 765         xsvc_mem_t *mp;
 766 
 767         mnode.mn_key = key;
 768         mutex_enter(&state->xs_mlist.ml_mutex);
 769         mnp = avl_find(&state->xs_mlist.ml_avl, &mnode, &where);
 770         mutex_exit(&state->xs_mlist.ml_mutex);
 771 
 772         if (mnp != NULL) {
 773                 mp = mnp->mn_home;
 774         } else {
 775                 mp = NULL;
 776         }
 777 
 778         return (mp);
 779 }
 780 
 781 /*
 782  * xsvc_mnode_key_compare()
 783  *
 784  */
 785 static int
 786 xsvc_mnode_key_compare(const void *q, const void *e)
 787 {
 788         xsvc_mnode_t *n1;
 789         xsvc_mnode_t *n2;
 790 
 791         n1 = (xsvc_mnode_t *)q;
 792         n2 = (xsvc_mnode_t *)e;
 793 
 794         if (n1->mn_key < n2->mn_key) {
 795                 return (-1);
 796         } else if (n1->mn_key > n2->mn_key) {
 797                 return (1);
 798         } else {
 799                 return (0);
 800         }
 801 }
 802 
 803 /*
 804  * xsvc_devmap()
 805  *
 806  */
 807 /*ARGSUSED*/
 808 static int
 809 xsvc_devmap(dev_t dev, devmap_cookie_t dhp, offset_t off, size_t len,
 810                 size_t *maplen, uint_t model)
 811 {
 812         ddi_umem_cookie_t cookie;
 813         xsvc_state_t *state;
 814         offset_t off_align;
 815         size_t npages;
 816         caddr_t kvai;
 817         size_t psize;
 818         int instance;
 819         caddr_t kva;
 820         pfn_t pfn;
 821         int err;
 822         int i;
 823 
 824 
 825         instance = getminor(dev);
 826         state = ddi_get_soft_state(xsvc_statep, instance);
 827         if (state == NULL) {
 828                 return (ENXIO);
 829         }
 830 
 831         /*
 832          * On 64-bit kernels, if we have a 32-bit application doing a mmap(),
 833          * smmap32 will sign extend the offset. We need to undo that since
 834          * we are passed a physical address in off, not a offset.
 835          */
 836 #if defined(__amd64)
 837         if (((model & DDI_MODEL_MASK) == DDI_MODEL_ILP32) &&
 838             ((off & ~0xFFFFFFFFll) == ~0xFFFFFFFFll)) {
 839                 off = off & 0xFFFFFFFF;
 840         }
 841 #endif
 842 
 843 #ifdef __xpv
 844         /*
 845          * we won't allow guest OSes to devmap mfn/pfns. Maybe we'll relax
 846          * this some later when there is a good reason.
 847          */
 848         if (!DOMAIN_IS_INITDOMAIN(xen_info)) {
 849                 return (-1);
 850         }
 851 
 852         /* we will always treat this as a foreign MFN */
 853         pfn = xen_assign_pfn(btop(off));
 854 #else
 855         pfn = btop(off);
 856 #endif
 857         /* always work with whole pages */
 858 
 859         off_align = P2ALIGN(off, PAGESIZE);
 860         psize = P2ROUNDUP(off + len, PAGESIZE) - off_align;
 861 
 862         /*
 863          * if this is memory we're trying to map into user space, we first
 864          * need to map the PFNs into KVA, then build up a umem cookie, and
 865          * finally do a umem_setup to map it in.
 866          */
 867         if (pf_is_memory(pfn)) {
 868                 npages = btop(psize);
 869 
 870                 kva = vmem_alloc(heap_arena, psize, VM_SLEEP);
 871                 if (kva == NULL) {
 872                         return (-1);
 873                 }
 874 
 875                 kvai = kva;
 876                 for (i = 0; i < npages; i++) {
 877                         hat_devload(kas.a_hat, kvai, PAGESIZE, pfn,
 878                             PROT_READ | PROT_WRITE, HAT_LOAD_LOCK);
 879                         pfn++;
 880                         kvai = (caddr_t)((uintptr_t)kvai + PAGESIZE);
 881                 }
 882 
 883                 err = xsvc_umem_cookie_alloc(kva, psize, KM_SLEEP, &cookie);
 884                 if (err != 0) {
 885                         goto devmapfail_cookie_alloc;
 886                 }
 887 
 888                 if ((err = devmap_umem_setup(dhp, state->xs_dip, &xsvc_callbk,
 889                     cookie, 0, psize, PROT_ALL, 0, &xsvc_device_attr)) < 0) {
 890                         goto devmapfail_umem_setup;
 891                 }
 892                 *maplen = psize;
 893 
 894         /*
 895          * If this is not memory (or a foreign MFN in i86xpv), go through
 896          * devmem_setup.
 897          */
 898         } else {
 899                 if ((err = devmap_devmem_setup(dhp, state->xs_dip, NULL, 0,
 900                     off_align, psize, PROT_ALL, 0, &xsvc_device_attr)) < 0) {
 901                         return (err);
 902                 }
 903                 *maplen = psize;
 904         }
 905 
 906         return (0);
 907 
 908 devmapfail_umem_setup:
 909         xsvc_umem_cookie_free(&cookie);
 910 
 911 devmapfail_cookie_alloc:
 912         kvai = kva;
 913         for (i = 0; i < npages; i++) {
 914                 hat_unload(kas.a_hat, kvai, PAGESIZE,
 915                     HAT_UNLOAD_UNLOCK);
 916                 kvai = (caddr_t)((uintptr_t)kvai + PAGESIZE);
 917         }
 918         vmem_free(heap_arena, kva, psize);
 919 
 920         return (err);
 921 }
 922 
 923 /*
 924  * xsvc_umem_cookie_alloc()
 925  *
 926  *   allocate a umem cookie to be used in devmap_umem_setup using KVA already
 927  *   allocated.
 928  */
 929 int
 930 xsvc_umem_cookie_alloc(caddr_t kva, size_t size, int flags,
 931     ddi_umem_cookie_t *cookiep)
 932 {
 933         struct ddi_umem_cookie *umem_cookiep;
 934 
 935         umem_cookiep = kmem_zalloc(sizeof (struct ddi_umem_cookie), flags);
 936         if (umem_cookiep == NULL) {
 937                 *cookiep = NULL;
 938                 return (-1);
 939         }
 940 
 941         umem_cookiep->cvaddr = kva;
 942         umem_cookiep->type = KMEM_NON_PAGEABLE;
 943         umem_cookiep->size = size;
 944         *cookiep = (ddi_umem_cookie_t *)umem_cookiep;
 945 
 946         return (0);
 947 }
 948 
 949 /*
 950  * xsvc_umem_cookie_free()
 951  *
 952  */
 953 static void
 954 xsvc_umem_cookie_free(ddi_umem_cookie_t *cookiep)
 955 {
 956         kmem_free(*cookiep, sizeof (struct ddi_umem_cookie));
 957         *cookiep = NULL;
 958 }
 959 
 960 
 961 /*
 962  * xsvc_devmap_map()
 963  *
 964  */
 965 /*ARGSUSED*/
 966 static int
 967 xsvc_devmap_map(devmap_cookie_t dhc, dev_t dev, uint_t flags, offset_t off,
 968     size_t len, void **pvtp)
 969 {
 970         struct ddi_umem_cookie *cp;
 971         devmap_handle_t *dhp;
 972         xsvc_state_t *state;
 973         int instance;
 974 
 975 
 976         instance = getminor(dev);
 977         state = ddi_get_soft_state(xsvc_statep, instance);
 978         if (state == NULL) {
 979                 return (ENXIO);
 980         }
 981 
 982         dhp = (devmap_handle_t *)dhc;
 983         /* This driver only supports MAP_SHARED, not MAP_PRIVATE */
 984         if (flags & MAP_PRIVATE) {
 985                 cmn_err(CE_WARN, "!xsvc driver doesn't support MAP_PRIVATE");
 986                 return (EINVAL);
 987         }
 988 
 989         cp = (struct ddi_umem_cookie *)dhp->dh_cookie;
 990         cp->cook_refcnt = 1;
 991 
 992         *pvtp = state;
 993         return (0);
 994 }
 995 
 996 
 997 /*
 998  * xsvc_devmap_dup()
 999  *
1000  *   keep a reference count for forks so we don't unmap if we have multiple
1001  *   mappings.
1002  */
1003 /*ARGSUSED*/
1004 static int
1005 xsvc_devmap_dup(devmap_cookie_t dhc, void *pvtp, devmap_cookie_t new_dhp,
1006     void **new_pvtp)
1007 {
1008         struct ddi_umem_cookie *cp;
1009         devmap_handle_t *dhp;
1010         xsvc_state_t *state;
1011 
1012 
1013         state = (xsvc_state_t *)pvtp;
1014         dhp = (devmap_handle_t *)dhc;
1015 
1016         mutex_enter(&state->xs_cookie_mutex);
1017         cp = (struct ddi_umem_cookie *)dhp->dh_cookie;
1018         if (cp == NULL) {
1019                 mutex_exit(&state->xs_cookie_mutex);
1020                 return (ENOMEM);
1021         }
1022 
1023         cp->cook_refcnt++;
1024         mutex_exit(&state->xs_cookie_mutex);
1025 
1026         *new_pvtp = state;
1027         return (0);
1028 }
1029 
1030 
1031 /*
1032  * xsvc_devmap_unmap()
1033  *
1034  *   This routine is only call if we were mapping in memory in xsvc_devmap().
1035  *   i.e. we only pass in xsvc_callbk to devmap_umem_setup if pf_is_memory()
1036  *   was true. It would have been nice if devmap_callback_ctl had an args param.
1037  *   We wouldn't have had to look into the devmap_handle and into the umem
1038  *   cookie.
1039  */
1040 /*ARGSUSED*/
1041 static void
1042 xsvc_devmap_unmap(devmap_cookie_t dhc, void *pvtp, offset_t off, size_t len,
1043     devmap_cookie_t new_dhp1, void **new_pvtp1, devmap_cookie_t new_dhp2,
1044     void **new_pvtp2)
1045 {
1046         struct ddi_umem_cookie *ncp;
1047         struct ddi_umem_cookie *cp;
1048         devmap_handle_t *ndhp;
1049         devmap_handle_t *dhp;
1050         xsvc_state_t *state;
1051         size_t npages;
1052         caddr_t kvai;
1053         caddr_t kva;
1054         size_t size;
1055         int i;
1056 
1057 
1058         state = (xsvc_state_t *)pvtp;
1059         mutex_enter(&state->xs_cookie_mutex);
1060 
1061         /* peek into the umem cookie to figure out what we need to free up */
1062         dhp = (devmap_handle_t *)dhc;
1063         cp = (struct ddi_umem_cookie *)dhp->dh_cookie;
1064         ASSERT(cp != NULL);
1065 
1066         if (new_dhp1 != NULL) {
1067                 ndhp = (devmap_handle_t *)new_dhp1;
1068                 ncp = (struct ddi_umem_cookie *)ndhp->dh_cookie;
1069                 ncp->cook_refcnt++;
1070                 *new_pvtp1 = state;
1071         }
1072         if (new_dhp2 != NULL) {
1073                 ndhp = (devmap_handle_t *)new_dhp2;
1074                 ncp = (struct ddi_umem_cookie *)ndhp->dh_cookie;
1075                 ncp->cook_refcnt++;
1076                 *new_pvtp2 = state;
1077         }
1078 
1079         cp->cook_refcnt--;
1080         if (cp->cook_refcnt == 0) {
1081                 kva = cp->cvaddr;
1082                 size = cp->size;
1083 
1084                 /*
1085                  * free up the umem cookie, then unmap all the pages what we
1086                  * mapped in during devmap, then free up the kva space.
1087                  */
1088                 npages = btop(size);
1089                 xsvc_umem_cookie_free(&dhp->dh_cookie);
1090                 kvai = kva;
1091                 for (i = 0; i < npages; i++) {
1092                         hat_unload(kas.a_hat, kvai, PAGESIZE,
1093                             HAT_UNLOAD_UNLOCK);
1094                         kvai = (caddr_t)((uintptr_t)kvai + PAGESIZE);
1095                 }
1096                 vmem_free(heap_arena, kva, size);
1097         }
1098 
1099         mutex_exit(&state->xs_cookie_mutex);
1100 }