1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  24  * Use is subject to license terms.
  25  */
  26 
  27 #include <sys/errno.h>
  28 #include <sys/types.h>
  29 #include <sys/conf.h>
  30 #include <sys/kmem.h>
  31 #include <sys/ddi.h>
  32 #include <sys/stat.h>
  33 #include <sys/sunddi.h>
  34 #include <sys/file.h>
  35 #include <sys/open.h>
  36 #include <sys/modctl.h>
  37 #include <sys/ddi_impldefs.h>
  38 #include <vm/seg_kmem.h>
  39 #include <sys/vmsystm.h>
  40 #include <sys/sysmacros.h>
  41 #include <sys/ddidevmap.h>
  42 #include <sys/avl.h>
  43 #ifdef __xpv
  44 #include <sys/hypervisor.h>
  45 #endif
  46 
  47 #include <sys/xsvc.h>
  48 
  49 /* total max memory which can be alloced with ioctl interface */
  50 uint64_t xsvc_max_memory = 10 * 1024 * 1024;
  51 
  52 extern void i86_va_map(caddr_t vaddr, struct as *asp, caddr_t kaddr);
  53 
  54 
  55 static int xsvc_open(dev_t *devp, int flag, int otyp, cred_t *cred);
  56 static int xsvc_close(dev_t devp, int flag, int otyp, cred_t *cred);
  57 static int xsvc_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *cred,
  58     int *rval);
  59 static int xsvc_devmap(dev_t dev, devmap_cookie_t dhp, offset_t off, size_t len,
  60     size_t *maplen, uint_t model);
  61 static int xsvc_attach(dev_info_t *devi, ddi_attach_cmd_t cmd);
  62 static int xsvc_detach(dev_info_t *devi, ddi_detach_cmd_t cmd);
  63 static int xsvc_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg,
  64     void **result);
  65 
  66 static  struct cb_ops xsvc_cb_ops = {
  67         xsvc_open,              /* cb_open */
  68         xsvc_close,             /* cb_close */
  69         nodev,                  /* cb_strategy */
  70         nodev,                  /* cb_print */
  71         nodev,                  /* cb_dump */
  72         nodev,                  /* cb_read */
  73         nodev,                  /* cb_write */
  74         xsvc_ioctl,             /* cb_ioctl */
  75         xsvc_devmap,            /* cb_devmap */
  76         NULL,                   /* cb_mmap */
  77         NULL,                   /* cb_segmap */
  78         nochpoll,               /* cb_chpoll */
  79         ddi_prop_op,            /* cb_prop_op */
  80         NULL,                   /* cb_stream */
  81         D_NEW | D_MP | D_64BIT | D_DEVMAP,      /* cb_flag */
  82         CB_REV
  83 };
  84 
  85 static struct dev_ops xsvc_dev_ops = {
  86         DEVO_REV,               /* devo_rev */
  87         0,                      /* devo_refcnt */
  88         xsvc_getinfo,           /* devo_getinfo */
  89         nulldev,                /* devo_identify */
  90         nulldev,                /* devo_probe */
  91         xsvc_attach,            /* devo_attach */
  92         xsvc_detach,            /* devo_detach */
  93         nodev,                  /* devo_reset */
  94         &xsvc_cb_ops,               /* devo_cb_ops */
  95         NULL,                   /* devo_bus_ops */
  96         NULL,                   /* power */
  97         ddi_quiesce_not_needed,         /* quiesce */
  98 };
  99 
 100 static struct modldrv xsvc_modldrv = {
 101         &mod_driverops,             /* Type of module.  This one is a driver */
 102         "xsvc driver",          /* Name of the module. */
 103         &xsvc_dev_ops,              /* driver ops */
 104 };
 105 
 106 static struct modlinkage xsvc_modlinkage = {
 107         MODREV_1,
 108         { (void *) &xsvc_modldrv, NULL }
 109 };
 110 
 111 
 112 static int xsvc_ioctl_alloc_memory(xsvc_state_t *state, void *arg, int mode);
 113 static int xsvc_ioctl_flush_memory(xsvc_state_t *state, void *arg, int mode);
 114 static int xsvc_ioctl_free_memory(xsvc_state_t *state, void *arg, int mode);
 115 static int xsvc_mem_alloc(xsvc_state_t *state, uint64_t key,
 116     xsvc_mem_t **mp);
 117 static void xsvc_mem_free(xsvc_state_t *state, xsvc_mem_t *mp);
 118 static xsvc_mem_t *xsvc_mem_lookup(xsvc_state_t *state,
 119     uint64_t key);
 120 static int xsvc_mnode_key_compare(const void *q, const void *e);
 121 static int xsvc_umem_cookie_alloc(caddr_t kva, size_t size, int flags,
 122     ddi_umem_cookie_t *cookiep);
 123 static void xsvc_umem_cookie_free(ddi_umem_cookie_t *cookiep);
 124 
 125 
 126 void *xsvc_statep;
 127 
 128 static ddi_device_acc_attr_t xsvc_device_attr = {
 129         DDI_DEVICE_ATTR_V0,
 130         DDI_NEVERSWAP_ACC,
 131         DDI_STRICTORDER_ACC
 132 };
 133 
 134 static int xsvc_devmap_map(devmap_cookie_t dhp, dev_t dev, uint_t flags,
 135     offset_t off, size_t len, void **pvtp);
 136 static int xsvc_devmap_dup(devmap_cookie_t dhp, void *pvtp,
 137     devmap_cookie_t new_dhp, void **new_pvtp);
 138 static void xsvc_devmap_unmap(devmap_cookie_t dhp, void *pvtp, offset_t off,
 139     size_t len, devmap_cookie_t new_dhp1, void **new_pvtp1,
 140     devmap_cookie_t new_dhp2, void **new_pvtp2);
 141 
 142 
 143 static struct devmap_callback_ctl xsvc_callbk = {
 144         DEVMAP_OPS_REV,
 145         xsvc_devmap_map,
 146         NULL,
 147         xsvc_devmap_dup,
 148         xsvc_devmap_unmap
 149 };
 150 
 151 
 152 /*
 153  * _init()
 154  *
 155  */
 156 int
 157 _init(void)
 158 {
 159         int err;
 160 
 161         err = ddi_soft_state_init(&xsvc_statep, sizeof (xsvc_state_t), 1);
 162         if (err != 0) {
 163                 return (err);
 164         }
 165 
 166         err = mod_install(&xsvc_modlinkage);
 167         if (err != 0) {
 168                 ddi_soft_state_fini(&xsvc_statep);
 169                 return (err);
 170         }
 171 
 172         return (0);
 173 }
 174 
 175 /*
 176  * _info()
 177  *
 178  */
 179 int
 180 _info(struct modinfo *modinfop)
 181 {
 182         return (mod_info(&xsvc_modlinkage, modinfop));
 183 }
 184 
 185 /*
 186  * _fini()
 187  *
 188  */
 189 int
 190 _fini(void)
 191 {
 192         int err;
 193 
 194         err = mod_remove(&xsvc_modlinkage);
 195         if (err != 0) {
 196                 return (err);
 197         }
 198 
 199         ddi_soft_state_fini(&xsvc_statep);
 200 
 201         return (0);
 202 }
 203 
 204 /*
 205  * xsvc_attach()
 206  *
 207  */
 208 static int
 209 xsvc_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
 210 {
 211         xsvc_state_t *state;
 212         int maxallocmem;
 213         int instance;
 214         int err;
 215 
 216 
 217         switch (cmd) {
 218         case DDI_ATTACH:
 219                 break;
 220 
 221         case DDI_RESUME:
 222                 return (DDI_SUCCESS);
 223 
 224         default:
 225                 return (DDI_FAILURE);
 226         }
 227 
 228         instance = ddi_get_instance(dip);
 229         err = ddi_soft_state_zalloc(xsvc_statep, instance);
 230         if (err != DDI_SUCCESS) {
 231                 return (DDI_FAILURE);
 232         }
 233         state = ddi_get_soft_state(xsvc_statep, instance);
 234         if (state == NULL) {
 235                 goto attachfail_get_soft_state;
 236         }
 237 
 238         state->xs_dip = dip;
 239         state->xs_instance = instance;
 240 
 241         /* Initialize allocation count */
 242         mutex_init(&state->xs_mutex, NULL, MUTEX_DRIVER, NULL);
 243         state->xs_currently_alloced = 0;
 244 
 245         mutex_init(&state->xs_cookie_mutex, NULL, MUTEX_DRIVER, NULL);
 246 
 247         /* create the minor node (for the ioctl) */
 248         err = ddi_create_minor_node(dip, "xsvc", S_IFCHR, instance, DDI_PSEUDO,
 249             0);
 250         if (err != DDI_SUCCESS) {
 251                 goto attachfail_minor_node;
 252         }
 253 
 254         /*
 255          * the maxallocmem property will override the default (xsvc_max_memory).
 256          * This is the maximum total memory the ioctl will allow to be alloced.
 257          */
 258         maxallocmem = ddi_prop_get_int(DDI_DEV_T_ANY, state->xs_dip,
 259             DDI_PROP_DONTPASS, "maxallocmem", -1);
 260         if (maxallocmem >= 0) {
 261                 xsvc_max_memory = maxallocmem * 1024;
 262         }
 263 
 264         /* Initialize list of memory allocs */
 265         mutex_init(&state->xs_mlist.ml_mutex, NULL, MUTEX_DRIVER, NULL);
 266         avl_create(&state->xs_mlist.ml_avl, xsvc_mnode_key_compare,
 267             sizeof (xsvc_mnode_t), offsetof(xsvc_mnode_t, mn_link));
 268 
 269         /* Report that driver was loaded */
 270         ddi_report_dev(dip);
 271 
 272         return (DDI_SUCCESS);
 273 
 274 attachfail_minor_node:
 275         mutex_destroy(&state->xs_cookie_mutex);
 276         mutex_destroy(&state->xs_mutex);
 277 attachfail_get_soft_state:
 278         (void) ddi_soft_state_free(xsvc_statep, instance);
 279 
 280         return (err);
 281 }
 282 
 283 /*
 284  * xsvc_detach()
 285  *
 286  */
 287 static int
 288 xsvc_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
 289 {
 290         xsvc_state_t *state;
 291         xsvc_mnode_t *mnode;
 292         xsvc_mem_t *mp;
 293         int instance;
 294 
 295 
 296         instance = ddi_get_instance(dip);
 297         state = ddi_get_soft_state(xsvc_statep, instance);
 298         if (state == NULL) {
 299                 return (DDI_FAILURE);
 300         }
 301 
 302         switch (cmd) {
 303         case DDI_DETACH:
 304                 break;
 305 
 306         case DDI_SUSPEND:
 307                 return (DDI_SUCCESS);
 308 
 309         default:
 310                 return (DDI_FAILURE);
 311         }
 312 
 313         ddi_remove_minor_node(dip, NULL);
 314 
 315         /* Free any memory on list */
 316         while ((mnode = avl_first(&state->xs_mlist.ml_avl)) != NULL) {
 317                 mp = mnode->mn_home;
 318                 xsvc_mem_free(state, mp);
 319         }
 320 
 321         /* remove list */
 322         avl_destroy(&state->xs_mlist.ml_avl);
 323         mutex_destroy(&state->xs_mlist.ml_mutex);
 324 
 325         mutex_destroy(&state->xs_cookie_mutex);
 326         mutex_destroy(&state->xs_mutex);
 327         (void) ddi_soft_state_free(xsvc_statep, state->xs_instance);
 328         return (DDI_SUCCESS);
 329 }
 330 
 331 /*
 332  * xsvc_getinfo()
 333  *
 334  */
 335 /*ARGSUSED*/
 336 static int
 337 xsvc_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **result)
 338 {
 339         xsvc_state_t *state;
 340         int instance;
 341         dev_t dev;
 342         int err;
 343 
 344 
 345         dev = (dev_t)arg;
 346         instance = getminor(dev);
 347 
 348         switch (cmd) {
 349         case DDI_INFO_DEVT2DEVINFO:
 350                 state = ddi_get_soft_state(xsvc_statep, instance);
 351                 if (state == NULL) {
 352                         return (DDI_FAILURE);
 353                 }
 354                 *result = (void *)state->xs_dip;
 355                 err = DDI_SUCCESS;
 356                 break;
 357 
 358         case DDI_INFO_DEVT2INSTANCE:
 359                 *result = (void *)(uintptr_t)instance;
 360                 err = DDI_SUCCESS;
 361                 break;
 362 
 363         default:
 364                 err = DDI_FAILURE;
 365                 break;
 366         }
 367 
 368         return (err);
 369 }
 370 
 371 
 372 /*
 373  * xsvc_open()
 374  *
 375  */
 376 /*ARGSUSED*/
 377 static int
 378 xsvc_open(dev_t *devp, int flag, int otyp, cred_t *cred)
 379 {
 380         xsvc_state_t *state;
 381         int instance;
 382 
 383         instance = getminor(*devp);
 384         state = ddi_get_soft_state(xsvc_statep, instance);
 385         if (state == NULL) {
 386                 return (ENXIO);
 387         }
 388 
 389         return (0);
 390 }
 391 
 392 /*
 393  * xsvc_close()
 394  *
 395  */
 396 /*ARGSUSED*/
 397 static int
 398 xsvc_close(dev_t devp, int flag, int otyp, cred_t *cred)
 399 {
 400         return (0);
 401 }
 402 
 403 /*
 404  * xsvc_ioctl()
 405  *
 406  */
 407 /*ARGSUSED*/
 408 static int
 409 xsvc_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *cred, int *rval)
 410 {
 411         xsvc_state_t *state;
 412         int instance;
 413         int err;
 414 
 415 
 416         err = drv_priv(cred);
 417         if (err != 0) {
 418                 return (EPERM);
 419         }
 420         instance = getminor(dev);
 421         if (instance == -1) {
 422                 return (EBADF);
 423         }
 424         state = ddi_get_soft_state(xsvc_statep, instance);
 425         if (state == NULL) {
 426                 return (EBADF);
 427         }
 428 
 429         switch (cmd) {
 430         case XSVC_ALLOC_MEM:
 431                 err = xsvc_ioctl_alloc_memory(state, (void *)arg, mode);
 432                 break;
 433 
 434         case XSVC_FREE_MEM:
 435                 err = xsvc_ioctl_free_memory(state, (void *)arg, mode);
 436                 break;
 437 
 438         case XSVC_FLUSH_MEM:
 439                 err = xsvc_ioctl_flush_memory(state, (void *)arg, mode);
 440                 break;
 441 
 442         default:
 443                 err = ENXIO;
 444         }
 445 
 446         return (err);
 447 }
 448 
 449 /*
 450  * xsvc_ioctl_alloc_memory()
 451  *
 452  */
 453 static int
 454 xsvc_ioctl_alloc_memory(xsvc_state_t *state, void *arg, int mode)
 455 {
 456         xsvc_mem_req_32 params32;
 457         xsvc_mloc_32 *usgl32;
 458         xsvc_mem_req params;
 459         xsvc_mloc_32 sgl32;
 460         xsvc_mloc *usgl;
 461         xsvc_mem_t *mp;
 462         xsvc_mloc sgl;
 463         uint64_t key;
 464         size_t size;
 465         int err;
 466         int i;
 467 
 468 
 469         /* Copy in the params, then get the size and key */
 470         if (ddi_model_convert_from(mode & FMODELS) == DDI_MODEL_ILP32) {
 471                 err = ddi_copyin(arg, &params32, sizeof (xsvc_mem_req_32),
 472                     mode);
 473                 if (err != 0) {
 474                         return (EFAULT);
 475                 }
 476 
 477                 key = (uint64_t)params32.xsvc_mem_reqid;
 478                 size = P2ROUNDUP((size_t)params32.xsvc_mem_size, PAGESIZE);
 479         } else {
 480                 err = ddi_copyin(arg, &params, sizeof (xsvc_mem_req), mode);
 481                 if (err != 0) {
 482                         return (EFAULT);
 483                 }
 484                 key = (uint64_t)params.xsvc_mem_reqid;
 485                 size = P2ROUNDUP(params.xsvc_mem_size, PAGESIZE);
 486         }
 487 
 488         /*
 489          * make sure this doesn't put us over the maximum allowed to be
 490          * allocated
 491          */
 492         mutex_enter(&state->xs_mutex);
 493         if ((state->xs_currently_alloced + size) > xsvc_max_memory) {
 494                 mutex_exit(&state->xs_mutex);
 495                 return (EAGAIN);
 496         }
 497         state->xs_currently_alloced += size;
 498         mutex_exit(&state->xs_mutex);
 499 
 500         /* get state to track this memory */
 501         err = xsvc_mem_alloc(state, key, &mp);
 502         if (err != 0) {
 503                 return (err);
 504         }
 505         mp->xm_size = size;
 506 
 507         /* allocate and bind the memory */
 508         mp->xm_dma_attr.dma_attr_version = DMA_ATTR_V0;
 509         mp->xm_dma_attr.dma_attr_count_max = (uint64_t)0xFFFFFFFF;
 510         mp->xm_dma_attr.dma_attr_burstsizes = 1;
 511         mp->xm_dma_attr.dma_attr_minxfer = 1;
 512         mp->xm_dma_attr.dma_attr_maxxfer = (uint64_t)0xFFFFFFFF;
 513         mp->xm_dma_attr.dma_attr_seg = (uint64_t)0xFFFFFFFF;
 514         mp->xm_dma_attr.dma_attr_granular = 1;
 515         mp->xm_dma_attr.dma_attr_flags = 0;
 516 
 517         /* Finish converting params */
 518         if (ddi_model_convert_from(mode & FMODELS) == DDI_MODEL_ILP32) {
 519                 mp->xm_dma_attr.dma_attr_addr_lo = params32.xsvc_mem_addr_lo;
 520                 mp->xm_dma_attr.dma_attr_addr_hi = params32.xsvc_mem_addr_hi;
 521                 mp->xm_dma_attr.dma_attr_sgllen = params32.xsvc_mem_sgllen;
 522                 usgl32 = (xsvc_mloc_32 *)(uintptr_t)params32.xsvc_sg_list;
 523                 mp->xm_dma_attr.dma_attr_align = P2ROUNDUP(
 524                     params32.xsvc_mem_align, PAGESIZE);
 525         } else {
 526                 mp->xm_dma_attr.dma_attr_addr_lo = params.xsvc_mem_addr_lo;
 527                 mp->xm_dma_attr.dma_attr_addr_hi = params.xsvc_mem_addr_hi;
 528                 mp->xm_dma_attr.dma_attr_sgllen = params.xsvc_mem_sgllen;
 529                 usgl = (xsvc_mloc *)(uintptr_t)params.xsvc_sg_list;
 530                 mp->xm_dma_attr.dma_attr_align = P2ROUNDUP(
 531                     params.xsvc_mem_align, PAGESIZE);
 532         }
 533 
 534         mp->xm_device_attr = xsvc_device_attr;
 535 
 536         err = ddi_dma_alloc_handle(state->xs_dip, &mp->xm_dma_attr,
 537             DDI_DMA_SLEEP, NULL, &mp->xm_dma_handle);
 538         if (err != DDI_SUCCESS) {
 539                 err = EINVAL;
 540                 goto allocfail_alloc_handle;
 541         }
 542 
 543         /* don't sleep here so we don't get stuck in contig alloc */
 544         err = ddi_dma_mem_alloc(mp->xm_dma_handle, mp->xm_size,
 545             &mp->xm_device_attr, DDI_DMA_CONSISTENT, DDI_DMA_DONTWAIT, NULL,
 546             &mp->xm_addr, &mp->xm_real_length, &mp->xm_mem_handle);
 547         if (err != DDI_SUCCESS) {
 548                 err = EINVAL;
 549                 goto allocfail_alloc_mem;
 550         }
 551 
 552         err = ddi_dma_addr_bind_handle(mp->xm_dma_handle, NULL, mp->xm_addr,
 553             mp->xm_size, DDI_DMA_RDWR | DDI_DMA_CONSISTENT, DDI_DMA_SLEEP,
 554             NULL, &mp->xm_cookie, &mp->xm_cookie_count);
 555         if (err != DDI_DMA_MAPPED) {
 556                 err = EFAULT;
 557                 goto allocfail_bind;
 558         }
 559 
 560         /* return sgl */
 561         for (i = 0; i < mp->xm_cookie_count; i++) {
 562                 if (ddi_model_convert_from(mode & FMODELS) == DDI_MODEL_ILP32) {
 563                         sgl32.mloc_addr = mp->xm_cookie.dmac_laddress;
 564                         sgl32.mloc_size = mp->xm_cookie.dmac_size;
 565                         err = ddi_copyout(&sgl32, &usgl32[i],
 566                             sizeof (xsvc_mloc_32), mode);
 567                         if (err != 0) {
 568                                 err = EFAULT;
 569                                 goto allocfail_copyout;
 570                         }
 571                 } else {
 572                         sgl.mloc_addr = mp->xm_cookie.dmac_laddress;
 573                         sgl.mloc_size = mp->xm_cookie.dmac_size;
 574                         err = ddi_copyout(&sgl, &usgl[i], sizeof (xsvc_mloc),
 575                             mode);
 576                         if (err != 0) {
 577                                 err = EFAULT;
 578                                 goto allocfail_copyout;
 579                         }
 580                 }
 581                 ddi_dma_nextcookie(mp->xm_dma_handle, &mp->xm_cookie);
 582         }
 583 
 584         /* set the last sgl entry to 0 to indicate cookie count */
 585         if (ddi_model_convert_from(mode & FMODELS) == DDI_MODEL_ILP32) {
 586                 sgl32.mloc_addr = 0;
 587                 sgl32.mloc_size = 0;
 588                 err = ddi_copyout(&sgl32, &usgl32[i], sizeof (xsvc_mloc_32),
 589                     mode);
 590                 if (err != 0) {
 591                         err = EFAULT;
 592                         goto allocfail_copyout;
 593                 }
 594         } else {
 595                 sgl.mloc_addr = 0;
 596                 sgl.mloc_size = 0;
 597                 err = ddi_copyout(&sgl, &usgl[i], sizeof (xsvc_mloc), mode);
 598                 if (err != 0) {
 599                         err = EFAULT;
 600                         goto allocfail_copyout;
 601                 }
 602         }
 603 
 604         return (0);
 605 
 606 allocfail_copyout:
 607         (void) ddi_dma_unbind_handle(mp->xm_dma_handle);
 608 allocfail_bind:
 609         ddi_dma_mem_free(&mp->xm_mem_handle);
 610 allocfail_alloc_mem:
 611         ddi_dma_free_handle(&mp->xm_dma_handle);
 612 allocfail_alloc_handle:
 613         mp->xm_dma_handle = NULL;
 614         xsvc_mem_free(state, mp);
 615 
 616         mutex_enter(&state->xs_mutex);
 617         state->xs_currently_alloced = state->xs_currently_alloced - size;
 618         mutex_exit(&state->xs_mutex);
 619 
 620         return (err);
 621 }
 622 
 623 /*
 624  * xsvc_ioctl_flush_memory()
 625  *
 626  */
 627 static int
 628 xsvc_ioctl_flush_memory(xsvc_state_t *state, void *arg, int mode)
 629 {
 630         xsvc_mem_req_32 params32;
 631         xsvc_mem_req params;
 632         xsvc_mem_t *mp;
 633         uint64_t key;
 634         int err;
 635 
 636 
 637         if (ddi_model_convert_from(mode & FMODELS) == DDI_MODEL_ILP32) {
 638                 err = ddi_copyin(arg, &params32, sizeof (xsvc_mem_req_32),
 639                     mode);
 640                 if (err != 0) {
 641                         return (EFAULT);
 642                 }
 643                 key = (uint64_t)params32.xsvc_mem_reqid;
 644         } else {
 645                 err = ddi_copyin(arg, &params, sizeof (xsvc_mem_req), mode);
 646                 if (err != 0) {
 647                         return (EFAULT);
 648                 }
 649                 key = (uint64_t)params.xsvc_mem_reqid;
 650         }
 651 
 652         /* find the memory */
 653         mp = xsvc_mem_lookup(state, key);
 654         if (mp == NULL) {
 655                 return (EINVAL);
 656         }
 657 
 658         (void) ddi_dma_sync(mp->xm_dma_handle, 0, 0, DDI_DMA_SYNC_FORCPU);
 659 
 660         return (0);
 661 }
 662 
 663 
 664 /*
 665  * xsvc_ioctl_free_memory()
 666  *
 667  */
 668 static int
 669 xsvc_ioctl_free_memory(xsvc_state_t *state, void *arg, int mode)
 670 {
 671         xsvc_mem_req_32 params32;
 672         xsvc_mem_req params;
 673         xsvc_mem_t *mp;
 674         uint64_t key;
 675         int err;
 676 
 677 
 678         if (ddi_model_convert_from(mode & FMODELS) == DDI_MODEL_ILP32) {
 679                 err = ddi_copyin(arg, &params32, sizeof (xsvc_mem_req_32),
 680                     mode);
 681                 if (err != 0) {
 682                         return (EFAULT);
 683                 }
 684                 key = (uint64_t)params32.xsvc_mem_reqid;
 685         } else {
 686                 err = ddi_copyin(arg, &params, sizeof (xsvc_mem_req), mode);
 687                 if (err != 0) {
 688                         return (EFAULT);
 689                 }
 690                 key = (uint64_t)params.xsvc_mem_reqid;
 691         }
 692 
 693         /* find the memory */
 694         mp = xsvc_mem_lookup(state, key);
 695         if (mp == NULL) {
 696                 return (EINVAL);
 697         }
 698 
 699         xsvc_mem_free(state, mp);
 700 
 701         return (0);
 702 }
 703 
 704 /*
 705  * xsvc_mem_alloc()
 706  *
 707  */
 708 static int
 709 xsvc_mem_alloc(xsvc_state_t *state, uint64_t key, xsvc_mem_t **mp)
 710 {
 711         xsvc_mem_t *mem;
 712 
 713         mem = xsvc_mem_lookup(state, key);
 714         if (mem != NULL) {
 715                 xsvc_mem_free(state, mem);
 716         }
 717 
 718         *mp = kmem_alloc(sizeof (xsvc_mem_t), KM_SLEEP);
 719         (*mp)->xm_mnode.mn_home = *mp;
 720         (*mp)->xm_mnode.mn_key = key;
 721 
 722         mutex_enter(&state->xs_mlist.ml_mutex);
 723         avl_add(&state->xs_mlist.ml_avl, &(*mp)->xm_mnode);
 724         mutex_exit(&state->xs_mlist.ml_mutex);
 725 
 726         return (0);
 727 }
 728 
 729 /*
 730  * xsvc_mem_free()
 731  *
 732  */
 733 static void
 734 xsvc_mem_free(xsvc_state_t *state, xsvc_mem_t *mp)
 735 {
 736         if (mp->xm_dma_handle != NULL) {
 737                 (void) ddi_dma_unbind_handle(mp->xm_dma_handle);
 738                 ddi_dma_mem_free(&mp->xm_mem_handle);
 739                 ddi_dma_free_handle(&mp->xm_dma_handle);
 740 
 741                 mutex_enter(&state->xs_mutex);
 742                 state->xs_currently_alloced = state->xs_currently_alloced -
 743                     mp->xm_size;
 744                 mutex_exit(&state->xs_mutex);
 745         }
 746 
 747         mutex_enter(&state->xs_mlist.ml_mutex);
 748         avl_remove(&state->xs_mlist.ml_avl, &mp->xm_mnode);
 749         mutex_exit(&state->xs_mlist.ml_mutex);
 750 
 751         kmem_free(mp, sizeof (*mp));
 752 }
 753 
 754 /*
 755  * xsvc_mem_lookup()
 756  *
 757  */
 758 static xsvc_mem_t *
 759 xsvc_mem_lookup(xsvc_state_t *state, uint64_t key)
 760 {
 761         xsvc_mnode_t mnode;
 762         xsvc_mnode_t *mnp;
 763         avl_index_t where;
 764         xsvc_mem_t *mp;
 765 
 766         mnode.mn_key = key;
 767         mutex_enter(&state->xs_mlist.ml_mutex);
 768         mnp = avl_find(&state->xs_mlist.ml_avl, &mnode, &where);
 769         mutex_exit(&state->xs_mlist.ml_mutex);
 770 
 771         if (mnp != NULL) {
 772                 mp = mnp->mn_home;
 773         } else {
 774                 mp = NULL;
 775         }
 776 
 777         return (mp);
 778 }
 779 
 780 /*
 781  * xsvc_mnode_key_compare()
 782  *
 783  */
 784 static int
 785 xsvc_mnode_key_compare(const void *q, const void *e)
 786 {
 787         xsvc_mnode_t *n1;
 788         xsvc_mnode_t *n2;
 789 
 790         n1 = (xsvc_mnode_t *)q;
 791         n2 = (xsvc_mnode_t *)e;
 792 
 793         if (n1->mn_key < n2->mn_key) {
 794                 return (-1);
 795         } else if (n1->mn_key > n2->mn_key) {
 796                 return (1);
 797         } else {
 798                 return (0);
 799         }
 800 }
 801 
 802 /*
 803  * xsvc_devmap()
 804  *
 805  */
 806 /*ARGSUSED*/
 807 static int
 808 xsvc_devmap(dev_t dev, devmap_cookie_t dhp, offset_t off, size_t len,
 809                 size_t *maplen, uint_t model)
 810 {
 811         ddi_umem_cookie_t cookie;
 812         xsvc_state_t *state;
 813         offset_t off_align;
 814         size_t npages;
 815         caddr_t kvai;
 816         size_t psize;
 817         int instance;
 818         caddr_t kva;
 819         pfn_t pfn;
 820         int err;
 821         int i;
 822 
 823 
 824         instance = getminor(dev);
 825         state = ddi_get_soft_state(xsvc_statep, instance);
 826         if (state == NULL) {
 827                 return (ENXIO);
 828         }
 829 
 830         /*
 831          * On 64-bit kernels, if we have a 32-bit application doing a mmap(),
 832          * smmap32 will sign extend the offset. We need to undo that since
 833          * we are passed a physical address in off, not a offset.
 834          */
 835 #if defined(__amd64)
 836         if (((model & DDI_MODEL_MASK) == DDI_MODEL_ILP32) &&
 837             ((off & ~0xFFFFFFFFll) == ~0xFFFFFFFFll)) {
 838                 off = off & 0xFFFFFFFF;
 839         }
 840 #endif
 841 
 842 #ifdef __xpv
 843         /*
 844          * we won't allow guest OSes to devmap mfn/pfns. Maybe we'll relax
 845          * this some later when there is a good reason.
 846          */
 847         if (!DOMAIN_IS_INITDOMAIN(xen_info)) {
 848                 return (-1);
 849         }
 850 
 851         /* we will always treat this as a foreign MFN */
 852         pfn = xen_assign_pfn(btop(off));
 853 #else
 854         pfn = btop(off);
 855 #endif
 856         /* always work with whole pages */
 857 
 858         off_align = P2ALIGN(off, PAGESIZE);
 859         psize = P2ROUNDUP(off + len, PAGESIZE) - off_align;
 860 
 861         /*
 862          * if this is memory we're trying to map into user space, we first
 863          * need to map the PFNs into KVA, then build up a umem cookie, and
 864          * finally do a umem_setup to map it in.
 865          */
 866         if (pf_is_memory(pfn)) {
 867                 npages = btop(psize);
 868 
 869                 kva = vmem_alloc(heap_arena, psize, VM_SLEEP);
 870                 if (kva == NULL) {
 871                         return (-1);
 872                 }
 873 
 874                 kvai = kva;
 875                 for (i = 0; i < npages; i++) {
 876                         hat_devload(kas.a_hat, kvai, PAGESIZE, pfn,
 877                             PROT_READ | PROT_WRITE, HAT_LOAD_LOCK);
 878                         pfn++;
 879                         kvai = (caddr_t)((uintptr_t)kvai + PAGESIZE);
 880                 }
 881 
 882                 err = xsvc_umem_cookie_alloc(kva, psize, KM_SLEEP, &cookie);
 883                 if (err != 0) {
 884                         goto devmapfail_cookie_alloc;
 885                 }
 886 
 887                 if ((err = devmap_umem_setup(dhp, state->xs_dip, &xsvc_callbk,
 888                     cookie, 0, psize, PROT_ALL, 0, &xsvc_device_attr)) < 0) {
 889                         goto devmapfail_umem_setup;
 890                 }
 891                 *maplen = psize;
 892 
 893         /*
 894          * If this is not memory (or a foreign MFN in i86xpv), go through
 895          * devmem_setup.
 896          */
 897         } else {
 898                 if ((err = devmap_devmem_setup(dhp, state->xs_dip, NULL, 0,
 899                     off_align, psize, PROT_ALL, 0, &xsvc_device_attr)) < 0) {
 900                         return (err);
 901                 }
 902                 *maplen = psize;
 903         }
 904 
 905         return (0);
 906 
 907 devmapfail_umem_setup:
 908         xsvc_umem_cookie_free(&cookie);
 909 
 910 devmapfail_cookie_alloc:
 911         kvai = kva;
 912         for (i = 0; i < npages; i++) {
 913                 hat_unload(kas.a_hat, kvai, PAGESIZE,
 914                     HAT_UNLOAD_UNLOCK);
 915                 kvai = (caddr_t)((uintptr_t)kvai + PAGESIZE);
 916         }
 917         vmem_free(heap_arena, kva, psize);
 918 
 919         return (err);
 920 }
 921 
 922 /*
 923  * xsvc_umem_cookie_alloc()
 924  *
 925  *   allocate a umem cookie to be used in devmap_umem_setup using KVA already
 926  *   allocated.
 927  */
 928 int
 929 xsvc_umem_cookie_alloc(caddr_t kva, size_t size, int flags,
 930     ddi_umem_cookie_t *cookiep)
 931 {
 932         struct ddi_umem_cookie *umem_cookiep;
 933 
 934         umem_cookiep = kmem_zalloc(sizeof (struct ddi_umem_cookie), flags);
 935         if (umem_cookiep == NULL) {
 936                 *cookiep = NULL;
 937                 return (-1);
 938         }
 939 
 940         umem_cookiep->cvaddr = kva;
 941         umem_cookiep->type = KMEM_NON_PAGEABLE;
 942         umem_cookiep->size = size;
 943         *cookiep = (ddi_umem_cookie_t *)umem_cookiep;
 944 
 945         return (0);
 946 }
 947 
 948 /*
 949  * xsvc_umem_cookie_free()
 950  *
 951  */
 952 static void
 953 xsvc_umem_cookie_free(ddi_umem_cookie_t *cookiep)
 954 {
 955         kmem_free(*cookiep, sizeof (struct ddi_umem_cookie));
 956         *cookiep = NULL;
 957 }
 958 
 959 
 960 /*
 961  * xsvc_devmap_map()
 962  *
 963  */
 964 /*ARGSUSED*/
 965 static int
 966 xsvc_devmap_map(devmap_cookie_t dhc, dev_t dev, uint_t flags, offset_t off,
 967     size_t len, void **pvtp)
 968 {
 969         struct ddi_umem_cookie *cp;
 970         devmap_handle_t *dhp;
 971         xsvc_state_t *state;
 972         int instance;
 973 
 974 
 975         instance = getminor(dev);
 976         state = ddi_get_soft_state(xsvc_statep, instance);
 977         if (state == NULL) {
 978                 return (ENXIO);
 979         }
 980 
 981         dhp = (devmap_handle_t *)dhc;
 982         /* This driver only supports MAP_SHARED, not MAP_PRIVATE */
 983         if (flags & MAP_PRIVATE) {
 984                 cmn_err(CE_WARN, "!xsvc driver doesn't support MAP_PRIVATE");
 985                 return (EINVAL);
 986         }
 987 
 988         cp = (struct ddi_umem_cookie *)dhp->dh_cookie;
 989         cp->cook_refcnt = 1;
 990 
 991         *pvtp = state;
 992         return (0);
 993 }
 994 
 995 
 996 /*
 997  * xsvc_devmap_dup()
 998  *
 999  *   keep a reference count for forks so we don't unmap if we have multiple
1000  *   mappings.
1001  */
1002 /*ARGSUSED*/
1003 static int
1004 xsvc_devmap_dup(devmap_cookie_t dhc, void *pvtp, devmap_cookie_t new_dhp,
1005     void **new_pvtp)
1006 {
1007         struct ddi_umem_cookie *cp;
1008         devmap_handle_t *dhp;
1009         xsvc_state_t *state;
1010 
1011 
1012         state = (xsvc_state_t *)pvtp;
1013         dhp = (devmap_handle_t *)dhc;
1014 
1015         mutex_enter(&state->xs_cookie_mutex);
1016         cp = (struct ddi_umem_cookie *)dhp->dh_cookie;
1017         if (cp == NULL) {
1018                 mutex_exit(&state->xs_cookie_mutex);
1019                 return (ENOMEM);
1020         }
1021 
1022         cp->cook_refcnt++;
1023         mutex_exit(&state->xs_cookie_mutex);
1024 
1025         *new_pvtp = state;
1026         return (0);
1027 }
1028 
1029 
1030 /*
1031  * xsvc_devmap_unmap()
1032  *
1033  *   This routine is only call if we were mapping in memory in xsvc_devmap().
1034  *   i.e. we only pass in xsvc_callbk to devmap_umem_setup if pf_is_memory()
1035  *   was true. It would have been nice if devmap_callback_ctl had an args param.
1036  *   We wouldn't have had to look into the devmap_handle and into the umem
1037  *   cookie.
1038  */
1039 /*ARGSUSED*/
1040 static void
1041 xsvc_devmap_unmap(devmap_cookie_t dhc, void *pvtp, offset_t off, size_t len,
1042     devmap_cookie_t new_dhp1, void **new_pvtp1, devmap_cookie_t new_dhp2,
1043     void **new_pvtp2)
1044 {
1045         struct ddi_umem_cookie *ncp;
1046         struct ddi_umem_cookie *cp;
1047         devmap_handle_t *ndhp;
1048         devmap_handle_t *dhp;
1049         xsvc_state_t *state;
1050         size_t npages;
1051         caddr_t kvai;
1052         caddr_t kva;
1053         size_t size;
1054         int i;
1055 
1056 
1057         state = (xsvc_state_t *)pvtp;
1058         mutex_enter(&state->xs_cookie_mutex);
1059 
1060         /* peek into the umem cookie to figure out what we need to free up */
1061         dhp = (devmap_handle_t *)dhc;
1062         cp = (struct ddi_umem_cookie *)dhp->dh_cookie;
1063         ASSERT(cp != NULL);
1064 
1065         if (new_dhp1 != NULL) {
1066                 ndhp = (devmap_handle_t *)new_dhp1;
1067                 ncp = (struct ddi_umem_cookie *)ndhp->dh_cookie;
1068                 ncp->cook_refcnt++;
1069                 *new_pvtp1 = state;
1070         }
1071         if (new_dhp2 != NULL) {
1072                 ndhp = (devmap_handle_t *)new_dhp2;
1073                 ncp = (struct ddi_umem_cookie *)ndhp->dh_cookie;
1074                 ncp->cook_refcnt++;
1075                 *new_pvtp2 = state;
1076         }
1077 
1078         cp->cook_refcnt--;
1079         if (cp->cook_refcnt == 0) {
1080                 kva = cp->cvaddr;
1081                 size = cp->size;
1082 
1083                 /*
1084                  * free up the umem cookie, then unmap all the pages what we
1085                  * mapped in during devmap, then free up the kva space.
1086                  */
1087                 npages = btop(size);
1088                 xsvc_umem_cookie_free(&dhp->dh_cookie);
1089                 kvai = kva;
1090                 for (i = 0; i < npages; i++) {
1091                         hat_unload(kas.a_hat, kvai, PAGESIZE,
1092                             HAT_UNLOAD_UNLOCK);
1093                         kvai = (caddr_t)((uintptr_t)kvai + PAGESIZE);
1094                 }
1095                 vmem_free(heap_arena, kva, size);
1096         }
1097 
1098         mutex_exit(&state->xs_cookie_mutex);
1099 }