1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Portions Copyright (c) 2010, Oracle and/or its affiliates.
  23  * All rights reserved.
  24  */
  25 /*
  26  * Copyright (c) 2009, Intel Corporation.
  27  * All rights reserved.
  28  */
  29 /*
  30  * Copyright 2012 Garrett D'Amore <garrett@damore.org>.  All rights reserved.
  31  */
  32 
  33 /*
  34  * DVMA code
  35  * This file contains Intel IOMMU code that deals with DVMA
  36  * i.e. DMA remapping.
  37  */
  38 
  39 #include <sys/sysmacros.h>
  40 #include <sys/pcie.h>
  41 #include <sys/pci_cfgspace.h>
  42 #include <vm/hat_i86.h>
  43 #include <sys/memlist.h>
  44 #include <sys/acpi/acpi.h>
  45 #include <sys/acpica.h>
  46 #include <sys/modhash.h>
  47 #include <sys/immu.h>
  48 #include <sys/x86_archext.h>
  49 #include <sys/archsystm.h>
  50 
  51 #undef  TEST
  52 
  53 /*
  54  * Macros based on PCI spec
  55  */
  56 #define IMMU_PCI_REV2CLASS(r)   ((r) >> 8)  /* classcode from revid */
  57 #define IMMU_PCI_CLASS2BASE(c)  ((c) >> 16) /* baseclass from classcode */
  58 #define IMMU_PCI_CLASS2SUB(c)   (((c) >> 8) & 0xff); /* classcode */
  59 
  60 #define IMMU_CONTIG_PADDR(d, p) \
  61         ((d).dck_paddr && ((d).dck_paddr + IMMU_PAGESIZE) == (p))
  62 
  63 typedef struct dvma_arg {
  64         immu_t *dva_immu;
  65         dev_info_t *dva_rdip;
  66         dev_info_t *dva_ddip;
  67         domain_t *dva_domain;
  68         int dva_level;
  69         immu_flags_t dva_flags;
  70         list_t *dva_list;
  71         int dva_error;
  72 } dvma_arg_t;
  73 
  74 static domain_t *domain_create(immu_t *immu, dev_info_t *ddip,
  75     dev_info_t *rdip, immu_flags_t immu_flags);
  76 static immu_devi_t *create_immu_devi(dev_info_t *rdip, int bus,
  77     int dev, int func, immu_flags_t immu_flags);
  78 static void destroy_immu_devi(immu_devi_t *immu_devi);
  79 static boolean_t dvma_map(domain_t *domain, uint64_t sdvma,
  80     uint64_t nvpages, immu_dcookie_t *dcookies, int dcount, dev_info_t *rdip,
  81     immu_flags_t immu_flags);
  82 
  83 /* Extern globals */
  84 extern struct memlist  *phys_install;
  85 
  86 /*
  87  * iommulib interface functions.
  88  */
  89 static int immu_probe(iommulib_handle_t unitp, dev_info_t *dip);
  90 static int immu_allochdl(iommulib_handle_t handle,
  91     dev_info_t *dip, dev_info_t *rdip, ddi_dma_attr_t *attr,
  92     int (*waitfp)(caddr_t), caddr_t arg, ddi_dma_handle_t *dma_handlep);
  93 static int immu_freehdl(iommulib_handle_t handle,
  94     dev_info_t *dip, dev_info_t *rdip, ddi_dma_handle_t dma_handle);
  95 static int immu_bindhdl(iommulib_handle_t handle, dev_info_t *dip,
  96     dev_info_t *rdip, ddi_dma_handle_t dma_handle, struct ddi_dma_req *dma_req,
  97     ddi_dma_cookie_t *cookiep, uint_t *ccountp);
  98 static int immu_unbindhdl(iommulib_handle_t handle,
  99     dev_info_t *dip, dev_info_t *rdip, ddi_dma_handle_t dma_handle);
 100 static int immu_sync(iommulib_handle_t handle, dev_info_t *dip,
 101     dev_info_t *rdip, ddi_dma_handle_t dma_handle, off_t off, size_t len,
 102     uint_t cachefl);
 103 static int immu_win(iommulib_handle_t handle, dev_info_t *dip,
 104     dev_info_t *rdip, ddi_dma_handle_t dma_handle, uint_t win,
 105     off_t *offp, size_t *lenp, ddi_dma_cookie_t *cookiep, uint_t *ccountp);
 106 static int immu_mapobject(iommulib_handle_t handle, dev_info_t *dip,
 107     dev_info_t *rdip, ddi_dma_handle_t dma_handle,
 108     struct ddi_dma_req *dmareq, ddi_dma_obj_t *dmao);
 109 static int immu_unmapobject(iommulib_handle_t handle, dev_info_t *dip,
 110     dev_info_t *rdip, ddi_dma_handle_t dma_handle, ddi_dma_obj_t *dmao);
 111 
 112 /* static Globals */
 113 
 114 /*
 115  * Used to setup DMA objects (memory regions)
 116  * for DMA reads by IOMMU units
 117  */
 118 static ddi_dma_attr_t immu_dma_attr = {
 119         DMA_ATTR_V0,
 120         0U,
 121         0xffffffffffffffffULL,
 122         0xffffffffU,
 123         MMU_PAGESIZE, /* MMU page aligned */
 124         0x1,
 125         0x1,
 126         0xffffffffU,
 127         0xffffffffffffffffULL,
 128         1,
 129         4,
 130         0
 131 };
 132 
 133 static ddi_device_acc_attr_t immu_acc_attr = {
 134         DDI_DEVICE_ATTR_V0,
 135         DDI_NEVERSWAP_ACC,
 136         DDI_STRICTORDER_ACC
 137 };
 138 
 139 struct iommulib_ops immulib_ops = {
 140         IOMMU_OPS_VERSION,
 141         INTEL_IOMMU,
 142         "Intel IOMMU",
 143         NULL,
 144         immu_probe,
 145         immu_allochdl,
 146         immu_freehdl,
 147         immu_bindhdl,
 148         immu_unbindhdl,
 149         immu_sync,
 150         immu_win,
 151         immu_mapobject,
 152         immu_unmapobject,
 153 };
 154 
 155 /*
 156  * Fake physical address range used to set up initial prealloc mappings.
 157  * This memory is never actually accessed. It is mapped read-only,
 158  * and is overwritten as soon as the first DMA bind operation is
 159  * performed. Since 0 is a special case, just start at the 2nd
 160  * physical page.
 161  */
 162 
 163 static immu_dcookie_t immu_precookie = { MMU_PAGESIZE, IMMU_NPREPTES };
 164 
 165 /* globals private to this file */
 166 static kmutex_t immu_domain_lock;
 167 static list_t immu_unity_domain_list;
 168 static list_t immu_xlate_domain_list;
 169 
 170 /* structure used to store idx into each level of the page tables */
 171 typedef struct xlate {
 172         int xlt_level;
 173         uint_t xlt_idx;
 174         pgtable_t *xlt_pgtable;
 175 } xlate_t;
 176 
 177 /* 0 is reserved by Vt-d spec. Solaris reserves 1 */
 178 #define IMMU_UNITY_DID   1
 179 
 180 static mod_hash_t *bdf_domain_hash;
 181 
 182 int immu_use_alh;
 183 int immu_use_tm;
 184 
 185 static domain_t *
 186 bdf_domain_lookup(immu_devi_t *immu_devi)
 187 {
 188         domain_t *domain;
 189         int16_t seg = immu_devi->imd_seg;
 190         int16_t bus = immu_devi->imd_bus;
 191         int16_t devfunc = immu_devi->imd_devfunc;
 192         uintptr_t bdf = (seg << 16 | bus << 8 | devfunc);
 193 
 194         if (seg < 0 || bus < 0 || devfunc < 0) {
 195                 return (NULL);
 196         }
 197 
 198         domain = NULL;
 199         if (mod_hash_find(bdf_domain_hash,
 200             (void *)bdf, (void *)&domain) == 0) {
 201                 ASSERT(domain);
 202                 ASSERT(domain->dom_did > 0);
 203                 return (domain);
 204         } else {
 205                 return (NULL);
 206         }
 207 }
 208 
 209 static void
 210 bdf_domain_insert(immu_devi_t *immu_devi, domain_t *domain)
 211 {
 212         int16_t seg = immu_devi->imd_seg;
 213         int16_t bus = immu_devi->imd_bus;
 214         int16_t devfunc = immu_devi->imd_devfunc;
 215         uintptr_t bdf = (seg << 16 | bus << 8 | devfunc);
 216 
 217         if (seg < 0 || bus < 0 || devfunc < 0) {
 218                 return;
 219         }
 220 
 221         (void) mod_hash_insert(bdf_domain_hash, (void *)bdf, (void *)domain);
 222 }
 223 
 224 static int
 225 match_lpc(dev_info_t *pdip, void *arg)
 226 {
 227         immu_devi_t *immu_devi;
 228         dvma_arg_t *dvap = (dvma_arg_t *)arg;
 229 
 230         if (list_is_empty(dvap->dva_list)) {
 231                 return (DDI_WALK_TERMINATE);
 232         }
 233 
 234         immu_devi = list_head(dvap->dva_list);
 235         for (; immu_devi; immu_devi = list_next(dvap->dva_list,
 236             immu_devi)) {
 237                 if (immu_devi->imd_dip == pdip) {
 238                         dvap->dva_ddip = pdip;
 239                         dvap->dva_error = DDI_SUCCESS;
 240                         return (DDI_WALK_TERMINATE);
 241                 }
 242         }
 243 
 244         return (DDI_WALK_CONTINUE);
 245 }
 246 
 247 static void
 248 immu_devi_set_spclist(dev_info_t *dip, immu_t *immu)
 249 {
 250         list_t *spclist = NULL;
 251         immu_devi_t *immu_devi;
 252 
 253         immu_devi = IMMU_DEVI(dip);
 254         if (immu_devi->imd_display == B_TRUE) {
 255                 spclist = &(immu->immu_dvma_gfx_list);
 256         } else if (immu_devi->imd_lpc == B_TRUE) {
 257                 spclist = &(immu->immu_dvma_lpc_list);
 258         }
 259 
 260         if (spclist) {
 261                 mutex_enter(&(immu->immu_lock));
 262                 list_insert_head(spclist, immu_devi);
 263                 mutex_exit(&(immu->immu_lock));
 264         }
 265 }
 266 
 267 /*
 268  * Set the immu_devi struct in the immu_devi field of a devinfo node
 269  */
 270 int
 271 immu_devi_set(dev_info_t *dip, immu_flags_t immu_flags)
 272 {
 273         int bus, dev, func;
 274         immu_devi_t *new_imd;
 275         immu_devi_t *immu_devi;
 276 
 277         immu_devi = immu_devi_get(dip);
 278         if (immu_devi != NULL) {
 279                 return (DDI_SUCCESS);
 280         }
 281 
 282         bus = dev = func = -1;
 283 
 284         /*
 285          * Assume a new immu_devi struct is needed
 286          */
 287         if (!DEVI_IS_PCI(dip) || acpica_get_bdf(dip, &bus, &dev, &func) != 0) {
 288                 /*
 289                  * No BDF. Set bus = -1 to indicate this.
 290                  * We still need to create a immu_devi struct
 291                  * though
 292                  */
 293                 bus = -1;
 294                 dev = 0;
 295                 func = 0;
 296         }
 297 
 298         new_imd = create_immu_devi(dip, bus, dev, func, immu_flags);
 299         if (new_imd  == NULL) {
 300                 ddi_err(DER_WARN, dip, "Failed to create immu_devi "
 301                     "structure");
 302                 return (DDI_FAILURE);
 303         }
 304 
 305         /*
 306          * Check if some other thread allocated a immu_devi while we
 307          * didn't own the lock.
 308          */
 309         mutex_enter(&(DEVI(dip)->devi_lock));
 310         if (IMMU_DEVI(dip) == NULL) {
 311                 IMMU_DEVI_SET(dip, new_imd);
 312         } else {
 313                 destroy_immu_devi(new_imd);
 314         }
 315         mutex_exit(&(DEVI(dip)->devi_lock));
 316 
 317         return (DDI_SUCCESS);
 318 }
 319 
 320 static dev_info_t *
 321 get_lpc_devinfo(immu_t *immu, dev_info_t *rdip, immu_flags_t immu_flags)
 322 {
 323         dvma_arg_t dvarg = {0};
 324         dvarg.dva_list = &(immu->immu_dvma_lpc_list);
 325         dvarg.dva_rdip = rdip;
 326         dvarg.dva_error = DDI_FAILURE;
 327 
 328         if (immu_walk_ancestor(rdip, NULL, match_lpc,
 329             &dvarg, NULL, immu_flags) != DDI_SUCCESS) {
 330                 ddi_err(DER_MODE, rdip, "Could not walk ancestors to "
 331                     "find lpc_devinfo for ISA device");
 332                 return (NULL);
 333         }
 334 
 335         if (dvarg.dva_error != DDI_SUCCESS || dvarg.dva_ddip == NULL) {
 336                 ddi_err(DER_MODE, rdip, "Could not find lpc_devinfo for "
 337                     "ISA device");
 338                 return (NULL);
 339         }
 340 
 341         return (dvarg.dva_ddip);
 342 }
 343 
 344 static dev_info_t *
 345 get_gfx_devinfo(dev_info_t *rdip)
 346 {
 347         immu_t *immu;
 348         immu_devi_t *immu_devi;
 349         list_t *list_gfx;
 350 
 351         /*
 352          * The GFX device may not be on the same iommu unit as "agpgart"
 353          * so search globally
 354          */
 355         immu_devi = NULL;
 356         immu = list_head(&immu_list);
 357         for (; immu; immu = list_next(&immu_list, immu)) {
 358                 list_gfx = &(immu->immu_dvma_gfx_list);
 359                 if (!list_is_empty(list_gfx)) {
 360                         immu_devi = list_head(list_gfx);
 361                         break;
 362                 }
 363         }
 364 
 365         if (immu_devi == NULL) {
 366                 ddi_err(DER_WARN, rdip, "iommu: No GFX device. "
 367                     "Cannot redirect agpgart");
 368                 return (NULL);
 369         }
 370 
 371         ddi_err(DER_LOG, rdip, "iommu: GFX redirect to %s",
 372             ddi_node_name(immu_devi->imd_dip));
 373 
 374         return (immu_devi->imd_dip);
 375 }
 376 
 377 static immu_flags_t
 378 dma_to_immu_flags(struct ddi_dma_req *dmareq)
 379 {
 380         immu_flags_t flags = 0;
 381 
 382         if (dmareq->dmar_fp == DDI_DMA_SLEEP) {
 383                 flags |= IMMU_FLAGS_SLEEP;
 384         } else {
 385                 flags |= IMMU_FLAGS_NOSLEEP;
 386         }
 387 
 388 #ifdef BUGGY_DRIVERS
 389 
 390         flags |= (IMMU_FLAGS_READ | IMMU_FLAGS_WRITE);
 391 
 392 #else
 393         /*
 394          * Read and write flags need to be reversed.
 395          * DMA_READ means read from device and write
 396          * to memory. So DMA read means DVMA write.
 397          */
 398         if (dmareq->dmar_flags & DDI_DMA_READ)
 399                 flags |= IMMU_FLAGS_WRITE;
 400 
 401         if (dmareq->dmar_flags & DDI_DMA_WRITE)
 402                 flags |= IMMU_FLAGS_READ;
 403 
 404         /*
 405          * Some buggy drivers specify neither READ or WRITE
 406          * For such drivers set both read and write permissions
 407          */
 408         if ((dmareq->dmar_flags & (DDI_DMA_READ | DDI_DMA_WRITE)) == 0) {
 409                 flags |= (IMMU_FLAGS_READ | IMMU_FLAGS_WRITE);
 410         }
 411 #endif
 412 
 413         return (flags);
 414 }
 415 
 416 /*ARGSUSED*/
 417 int
 418 pgtable_ctor(void *buf, void *arg, int kmflag)
 419 {
 420         size_t actual_size = 0;
 421         pgtable_t *pgtable;
 422         int (*dmafp)(caddr_t);
 423         caddr_t vaddr;
 424         void *next;
 425         uint_t flags;
 426         immu_t *immu = arg;
 427 
 428         pgtable = (pgtable_t *)buf;
 429 
 430         dmafp = (kmflag & KM_NOSLEEP) ? DDI_DMA_DONTWAIT : DDI_DMA_SLEEP;
 431 
 432         next = kmem_zalloc(IMMU_PAGESIZE, kmflag);
 433         if (next == NULL) {
 434                 return (-1);
 435         }
 436 
 437         if (ddi_dma_alloc_handle(root_devinfo, &immu_dma_attr,
 438             dmafp, NULL, &pgtable->hwpg_dmahdl) != DDI_SUCCESS) {
 439                 kmem_free(next, IMMU_PAGESIZE);
 440                 return (-1);
 441         }
 442 
 443         flags = DDI_DMA_CONSISTENT;
 444         if (!immu->immu_dvma_coherent)
 445                 flags |= IOMEM_DATA_UC_WR_COMBINE;
 446 
 447         if (ddi_dma_mem_alloc(pgtable->hwpg_dmahdl, IMMU_PAGESIZE,
 448             &immu_acc_attr, flags,
 449             dmafp, NULL, &vaddr, &actual_size,
 450             &pgtable->hwpg_memhdl) != DDI_SUCCESS) {
 451                 ddi_dma_free_handle(&pgtable->hwpg_dmahdl);
 452                 kmem_free(next, IMMU_PAGESIZE);
 453                 return (-1);
 454         }
 455 
 456         /*
 457          * Memory allocation failure. Maybe a temporary condition
 458          * so return error rather than panic, so we can try again
 459          */
 460         if (actual_size < IMMU_PAGESIZE) {
 461                 ddi_dma_mem_free(&pgtable->hwpg_memhdl);
 462                 ddi_dma_free_handle(&pgtable->hwpg_dmahdl);
 463                 kmem_free(next, IMMU_PAGESIZE);
 464                 return (-1);
 465         }
 466 
 467         pgtable->hwpg_paddr = pfn_to_pa(hat_getpfnum(kas.a_hat, vaddr));
 468         pgtable->hwpg_vaddr = vaddr;
 469         pgtable->swpg_next_array = next;
 470 
 471         rw_init(&(pgtable->swpg_rwlock), NULL, RW_DEFAULT, NULL);
 472 
 473         return (0);
 474 }
 475 
 476 /*ARGSUSED*/
 477 void
 478 pgtable_dtor(void *buf, void *arg)
 479 {
 480         pgtable_t *pgtable;
 481 
 482         pgtable = (pgtable_t *)buf;
 483 
 484         /* destroy will panic if lock is held. */
 485         rw_destroy(&(pgtable->swpg_rwlock));
 486 
 487         ddi_dma_mem_free(&pgtable->hwpg_memhdl);
 488         ddi_dma_free_handle(&pgtable->hwpg_dmahdl);
 489         kmem_free(pgtable->swpg_next_array, IMMU_PAGESIZE);
 490 }
 491 
 492 /*
 493  * pgtable_alloc()
 494  *      alloc a IOMMU pgtable structure.
 495  *      This same struct is used for root and context tables as well.
 496  *      This routine allocs the f/ollowing:
 497  *      - a pgtable_t struct
 498  *      - a HW page which holds PTEs/entries which is accesssed by HW
 499  *        so we set up DMA for this page
 500  *      - a SW page which is only for our bookeeping
 501  *        (for example to  hold pointers to the next level pgtable).
 502  *        So a simple kmem_alloc suffices
 503  */
 504 static pgtable_t *
 505 pgtable_alloc(immu_t *immu, immu_flags_t immu_flags)
 506 {
 507         pgtable_t *pgtable;
 508         int kmflags;
 509 
 510         kmflags = (immu_flags & IMMU_FLAGS_NOSLEEP) ? KM_NOSLEEP : KM_SLEEP;
 511 
 512         pgtable = kmem_cache_alloc(immu->immu_pgtable_cache, kmflags);
 513         if (pgtable == NULL) {
 514                 return (NULL);
 515         }
 516         return (pgtable);
 517 }
 518 
 519 static void
 520 pgtable_zero(pgtable_t *pgtable)
 521 {
 522         bzero(pgtable->hwpg_vaddr, IMMU_PAGESIZE);
 523         bzero(pgtable->swpg_next_array, IMMU_PAGESIZE);
 524 }
 525 
 526 static void
 527 pgtable_free(immu_t *immu, pgtable_t *pgtable)
 528 {
 529         kmem_cache_free(immu->immu_pgtable_cache, pgtable);
 530 }
 531 
 532 /*
 533  * Function to identify a display device from the PCI class code
 534  */
 535 static boolean_t
 536 device_is_display(uint_t classcode)
 537 {
 538         static uint_t disp_classes[] = {
 539                 0x000100,
 540                 0x030000,
 541                 0x030001
 542         };
 543         int i, nclasses = sizeof (disp_classes) / sizeof (uint_t);
 544 
 545         for (i = 0; i < nclasses; i++) {
 546                 if (classcode == disp_classes[i])
 547                         return (B_TRUE);
 548         }
 549         return (B_FALSE);
 550 }
 551 
 552 /*
 553  * Function that determines if device is PCIEX and/or PCIEX bridge
 554  */
 555 static boolean_t
 556 device_is_pciex(
 557         uchar_t bus, uchar_t dev, uchar_t func, boolean_t *is_pcib)
 558 {
 559         ushort_t cap;
 560         ushort_t capsp;
 561         ushort_t cap_count = PCI_CAP_MAX_PTR;
 562         ushort_t status;
 563         boolean_t is_pciex = B_FALSE;
 564 
 565         *is_pcib = B_FALSE;
 566 
 567         status = pci_getw_func(bus, dev, func, PCI_CONF_STAT);
 568         if (!(status & PCI_STAT_CAP))
 569                 return (B_FALSE);
 570 
 571         capsp = pci_getb_func(bus, dev, func, PCI_CONF_CAP_PTR);
 572         while (cap_count-- && capsp >= PCI_CAP_PTR_OFF) {
 573                 capsp &= PCI_CAP_PTR_MASK;
 574                 cap = pci_getb_func(bus, dev, func, capsp);
 575 
 576                 if (cap == PCI_CAP_ID_PCI_E) {
 577                         status = pci_getw_func(bus, dev, func, capsp + 2);
 578                         /*
 579                          * See section 7.8.2 of PCI-Express Base Spec v1.0a
 580                          * for Device/Port Type.
 581                          * PCIE_PCIECAP_DEV_TYPE_PCIE2PCI implies that the
 582                          * device is a PCIE2PCI bridge
 583                          */
 584                         *is_pcib =
 585                             ((status & PCIE_PCIECAP_DEV_TYPE_MASK) ==
 586                             PCIE_PCIECAP_DEV_TYPE_PCIE2PCI) ? B_TRUE : B_FALSE;
 587                         is_pciex = B_TRUE;
 588                 }
 589 
 590                 capsp = (*pci_getb_func)(bus, dev, func,
 591                     capsp + PCI_CAP_NEXT_PTR);
 592         }
 593 
 594         return (is_pciex);
 595 }
 596 
 597 static boolean_t
 598 device_use_premap(uint_t classcode)
 599 {
 600         if (IMMU_PCI_CLASS2BASE(classcode) == PCI_CLASS_NET)
 601                 return (B_TRUE);
 602         return (B_FALSE);
 603 }
 604 
 605 
 606 /*
 607  * immu_dvma_get_immu()
 608  *   get the immu unit structure for a dev_info node
 609  */
 610 immu_t *
 611 immu_dvma_get_immu(dev_info_t *dip, immu_flags_t immu_flags)
 612 {
 613         immu_devi_t *immu_devi;
 614         immu_t *immu;
 615 
 616         /*
 617          * check if immu unit was already found earlier.
 618          * If yes, then it will be stashed in immu_devi struct.
 619          */
 620         immu_devi = immu_devi_get(dip);
 621         if (immu_devi == NULL) {
 622                 if (immu_devi_set(dip, immu_flags) != DDI_SUCCESS) {
 623                         /*
 624                          * May fail because of low memory. Return error rather
 625                          * than panic as we want driver to rey again later
 626                          */
 627                         ddi_err(DER_PANIC, dip, "immu_dvma_get_immu: "
 628                             "No immu_devi structure");
 629                         /*NOTREACHED*/
 630                 }
 631                 immu_devi = immu_devi_get(dip);
 632         }
 633 
 634         mutex_enter(&(DEVI(dip)->devi_lock));
 635         if (immu_devi->imd_immu) {
 636                 immu = immu_devi->imd_immu;
 637                 mutex_exit(&(DEVI(dip)->devi_lock));
 638                 return (immu);
 639         }
 640         mutex_exit(&(DEVI(dip)->devi_lock));
 641 
 642         immu = immu_dmar_get_immu(dip);
 643         if (immu == NULL) {
 644                 ddi_err(DER_PANIC, dip, "immu_dvma_get_immu: "
 645                     "Cannot find immu_t for device");
 646                 /*NOTREACHED*/
 647         }
 648 
 649         /*
 650          * Check if some other thread found immu
 651          * while lock was not held
 652          */
 653         immu_devi = immu_devi_get(dip);
 654         /* immu_devi should be present as we found it earlier */
 655         if (immu_devi == NULL) {
 656                 ddi_err(DER_PANIC, dip,
 657                     "immu_dvma_get_immu: No immu_devi structure");
 658                 /*NOTREACHED*/
 659         }
 660 
 661         mutex_enter(&(DEVI(dip)->devi_lock));
 662         if (immu_devi->imd_immu == NULL) {
 663                 /* nobody else set it, so we should do it */
 664                 immu_devi->imd_immu = immu;
 665                 immu_devi_set_spclist(dip, immu);
 666         } else {
 667                 /*
 668                  * if some other thread got immu before
 669                  * us, it should get the same results
 670                  */
 671                 if (immu_devi->imd_immu != immu) {
 672                         ddi_err(DER_PANIC, dip, "Multiple "
 673                             "immu units found for device. Expected (%p), "
 674                             "actual (%p)", (void *)immu,
 675                             (void *)immu_devi->imd_immu);
 676                         mutex_exit(&(DEVI(dip)->devi_lock));
 677                         /*NOTREACHED*/
 678                 }
 679         }
 680         mutex_exit(&(DEVI(dip)->devi_lock));
 681 
 682         return (immu);
 683 }
 684 
 685 
 686 /* ############################# IMMU_DEVI code ############################ */
 687 
 688 /*
 689  * Allocate a immu_devi structure and initialize it
 690  */
 691 static immu_devi_t *
 692 create_immu_devi(dev_info_t *rdip, int bus, int dev, int func,
 693     immu_flags_t immu_flags)
 694 {
 695         uchar_t baseclass, subclass;
 696         uint_t classcode, revclass;
 697         immu_devi_t *immu_devi;
 698         boolean_t pciex = B_FALSE;
 699         int kmflags;
 700         boolean_t is_pcib = B_FALSE;
 701 
 702         /* bus ==  -1 indicate non-PCI device (no BDF) */
 703         ASSERT(bus == -1 || bus >= 0);
 704         ASSERT(dev >= 0);
 705         ASSERT(func >= 0);
 706 
 707         kmflags = (immu_flags & IMMU_FLAGS_NOSLEEP) ? KM_NOSLEEP : KM_SLEEP;
 708         immu_devi = kmem_zalloc(sizeof (immu_devi_t), kmflags);
 709         if (immu_devi == NULL) {
 710                 ddi_err(DER_WARN, rdip, "Failed to allocate memory for "
 711                     "Intel IOMMU immu_devi structure");
 712                 return (NULL);
 713         }
 714         immu_devi->imd_dip = rdip;
 715         immu_devi->imd_seg = 0; /* Currently seg can only be 0 */
 716         immu_devi->imd_bus = bus;
 717         immu_devi->imd_pcib_type = IMMU_PCIB_BAD;
 718 
 719         if (bus == -1) {
 720                 immu_devi->imd_pcib_type = IMMU_PCIB_NOBDF;
 721                 return (immu_devi);
 722         }
 723 
 724         immu_devi->imd_devfunc = IMMU_PCI_DEVFUNC(dev, func);
 725         immu_devi->imd_sec = 0;
 726         immu_devi->imd_sub = 0;
 727 
 728         revclass = pci_getl_func(bus, dev, func, PCI_CONF_REVID);
 729 
 730         classcode = IMMU_PCI_REV2CLASS(revclass);
 731         baseclass = IMMU_PCI_CLASS2BASE(classcode);
 732         subclass = IMMU_PCI_CLASS2SUB(classcode);
 733 
 734         if (baseclass == PCI_CLASS_BRIDGE && subclass == PCI_BRIDGE_PCI) {
 735 
 736                 immu_devi->imd_sec = pci_getb_func(bus, dev, func,
 737                     PCI_BCNF_SECBUS);
 738                 immu_devi->imd_sub = pci_getb_func(bus, dev, func,
 739                     PCI_BCNF_SUBBUS);
 740 
 741                 pciex = device_is_pciex(bus, dev, func, &is_pcib);
 742                 if (pciex  == B_TRUE && is_pcib == B_TRUE) {
 743                         immu_devi->imd_pcib_type = IMMU_PCIB_PCIE_PCI;
 744                 } else if (pciex == B_TRUE) {
 745                         immu_devi->imd_pcib_type = IMMU_PCIB_PCIE_PCIE;
 746                 } else {
 747                         immu_devi->imd_pcib_type = IMMU_PCIB_PCI_PCI;
 748                 }
 749         } else {
 750                 immu_devi->imd_pcib_type = IMMU_PCIB_ENDPOINT;
 751         }
 752 
 753         /* check for certain special devices */
 754         immu_devi->imd_display = device_is_display(classcode);
 755         immu_devi->imd_lpc = ((baseclass == PCI_CLASS_BRIDGE) &&
 756             (subclass == PCI_BRIDGE_ISA)) ? B_TRUE : B_FALSE;
 757         immu_devi->imd_use_premap = device_use_premap(classcode);
 758 
 759         immu_devi->imd_domain = NULL;
 760 
 761         immu_devi->imd_dvma_flags = immu_global_dvma_flags;
 762 
 763         return (immu_devi);
 764 }
 765 
 766 static void
 767 destroy_immu_devi(immu_devi_t *immu_devi)
 768 {
 769         kmem_free(immu_devi, sizeof (immu_devi_t));
 770 }
 771 
 772 static domain_t *
 773 immu_devi_domain(dev_info_t *rdip, dev_info_t **ddipp)
 774 {
 775         immu_devi_t *immu_devi;
 776         domain_t *domain;
 777         dev_info_t *ddip;
 778 
 779         *ddipp = NULL;
 780 
 781         immu_devi = immu_devi_get(rdip);
 782         if (immu_devi == NULL) {
 783                 return (NULL);
 784         }
 785 
 786         mutex_enter(&(DEVI(rdip)->devi_lock));
 787         domain = immu_devi->imd_domain;
 788         ddip = immu_devi->imd_ddip;
 789         mutex_exit(&(DEVI(rdip)->devi_lock));
 790 
 791         if (domain)
 792                 *ddipp = ddip;
 793 
 794         return (domain);
 795 
 796 }
 797 
 798 /* ############################# END IMMU_DEVI code ######################## */
 799 /* ############################# DOMAIN code ############################### */
 800 
 801 /*
 802  * This routine always succeeds
 803  */
 804 static int
 805 did_alloc(immu_t *immu, dev_info_t *rdip,
 806     dev_info_t *ddip, immu_flags_t immu_flags)
 807 {
 808         int did;
 809 
 810         did = (uintptr_t)vmem_alloc(immu->immu_did_arena, 1,
 811             (immu_flags & IMMU_FLAGS_NOSLEEP) ? VM_NOSLEEP : VM_SLEEP);
 812 
 813         if (did == 0) {
 814                 ddi_err(DER_WARN, rdip, "device domain-id alloc error"
 815                     " domain-device: %s%d. immu unit is %s. Using "
 816                     "unity domain with domain-id (%d)",
 817                     ddi_driver_name(ddip), ddi_get_instance(ddip),
 818                     immu->immu_name, immu->immu_unity_domain->dom_did);
 819                 did = immu->immu_unity_domain->dom_did;
 820         }
 821 
 822         return (did);
 823 }
 824 
 825 static int
 826 get_branch_domain(dev_info_t *pdip, void *arg)
 827 {
 828         immu_devi_t *immu_devi;
 829         domain_t *domain;
 830         dev_info_t *ddip;
 831         immu_t *immu;
 832         dvma_arg_t *dvp = (dvma_arg_t *)arg;
 833 
 834         /*
 835          * The field dvp->dva_rdip is a work-in-progress
 836          * and gets updated as we walk up the ancestor
 837          * tree. The final ddip is set only when we reach
 838          * the top of the tree. So the dvp->dva_ddip field cannot
 839          * be relied on until we reach the top of the field.
 840          */
 841 
 842         /* immu_devi may not be set. */
 843         immu_devi = immu_devi_get(pdip);
 844         if (immu_devi == NULL) {
 845                 if (immu_devi_set(pdip, dvp->dva_flags) != DDI_SUCCESS) {
 846                         dvp->dva_error = DDI_FAILURE;
 847                         return (DDI_WALK_TERMINATE);
 848                 }
 849         }
 850 
 851         immu_devi = immu_devi_get(pdip);
 852         immu = immu_devi->imd_immu;
 853         if (immu == NULL)
 854                 immu = immu_dvma_get_immu(pdip, dvp->dva_flags);
 855 
 856         /*
 857          * If we encounter a PCIE_PCIE bridge *ANCESTOR* we need to
 858          * terminate the walk (since the device under the PCIE bridge
 859          * is a PCIE device and has an independent entry in the
 860          * root/context table)
 861          */
 862         if (dvp->dva_rdip != pdip &&
 863             immu_devi->imd_pcib_type == IMMU_PCIB_PCIE_PCIE) {
 864                 return (DDI_WALK_TERMINATE);
 865         }
 866 
 867         /*
 868          * In order to be a domain-dim, it must be a PCI device i.e.
 869          * must have valid BDF. This also eliminates the root complex.
 870          */
 871         if (immu_devi->imd_pcib_type != IMMU_PCIB_BAD &&
 872             immu_devi->imd_pcib_type != IMMU_PCIB_NOBDF) {
 873                 ASSERT(immu_devi->imd_bus >= 0);
 874                 ASSERT(immu_devi->imd_devfunc >= 0);
 875                 dvp->dva_ddip = pdip;
 876         }
 877 
 878         if (immu_devi->imd_display == B_TRUE ||
 879             (dvp->dva_flags & IMMU_FLAGS_UNITY)) {
 880                 dvp->dva_domain = immu->immu_unity_domain;
 881                 /* continue walking to find ddip */
 882                 return (DDI_WALK_CONTINUE);
 883         }
 884 
 885         mutex_enter(&(DEVI(pdip)->devi_lock));
 886         domain = immu_devi->imd_domain;
 887         ddip = immu_devi->imd_ddip;
 888         mutex_exit(&(DEVI(pdip)->devi_lock));
 889 
 890         if (domain && ddip) {
 891                 /* if domain is set, it must be the same */
 892                 if (dvp->dva_domain) {
 893                         ASSERT(domain == dvp->dva_domain);
 894                 }
 895                 dvp->dva_domain = domain;
 896                 dvp->dva_ddip = ddip;
 897                 return (DDI_WALK_TERMINATE);
 898         }
 899 
 900         /* Domain may already be set, continue walking so that ddip gets set */
 901         if (dvp->dva_domain) {
 902                 return (DDI_WALK_CONTINUE);
 903         }
 904 
 905         /* domain is not set in either immu_devi or dvp */
 906         domain = bdf_domain_lookup(immu_devi);
 907         if (domain == NULL) {
 908                 return (DDI_WALK_CONTINUE);
 909         }
 910 
 911         /* ok, the BDF hash had a domain for this BDF. */
 912 
 913         /* Grab lock again to check if something else set immu_devi fields */
 914         mutex_enter(&(DEVI(pdip)->devi_lock));
 915         if (immu_devi->imd_domain != NULL) {
 916                 dvp->dva_domain = domain;
 917         } else {
 918                 dvp->dva_domain = domain;
 919         }
 920         mutex_exit(&(DEVI(pdip)->devi_lock));
 921 
 922         /*
 923          * walk upwards until the topmost PCI bridge is found
 924          */
 925         return (DDI_WALK_CONTINUE);
 926 
 927 }
 928 
 929 static void
 930 map_unity_domain(domain_t *domain)
 931 {
 932         struct memlist *mp;
 933         uint64_t start;
 934         uint64_t npages;
 935         immu_dcookie_t dcookies[1] = {0};
 936         int dcount = 0;
 937 
 938         /*
 939          * UNITY arenas are a mirror of the physical memory
 940          * installed on the system.
 941          */
 942 
 943 #ifdef BUGGY_DRIVERS
 944         /*
 945          * Dont skip page0. Some broken HW/FW access it.
 946          */
 947         dcookies[0].dck_paddr = 0;
 948         dcookies[0].dck_npages = 1;
 949         dcount = 1;
 950         (void) dvma_map(domain, 0, 1, dcookies, dcount, NULL,
 951             IMMU_FLAGS_READ | IMMU_FLAGS_WRITE | IMMU_FLAGS_PAGE1);
 952 #endif
 953 
 954         memlist_read_lock();
 955 
 956         mp = phys_install;
 957 
 958         if (mp->ml_address == 0) {
 959                 /* since we already mapped page1 above */
 960                 start = IMMU_PAGESIZE;
 961         } else {
 962                 start = mp->ml_address;
 963         }
 964         npages = mp->ml_size/IMMU_PAGESIZE + 1;
 965 
 966         dcookies[0].dck_paddr = start;
 967         dcookies[0].dck_npages = npages;
 968         dcount = 1;
 969         (void) dvma_map(domain, start, npages, dcookies,
 970             dcount, NULL, IMMU_FLAGS_READ | IMMU_FLAGS_WRITE);
 971 
 972         ddi_err(DER_LOG, domain->dom_dip, "iommu: mapping PHYS span [0x%" PRIx64
 973             " - 0x%" PRIx64 "]", start, start + mp->ml_size);
 974 
 975         mp = mp->ml_next;
 976         while (mp) {
 977                 ddi_err(DER_LOG, domain->dom_dip,
 978                     "iommu: mapping PHYS span [0x%" PRIx64 " - 0x%" PRIx64 "]",
 979                     mp->ml_address, mp->ml_address + mp->ml_size);
 980 
 981                 start = mp->ml_address;
 982                 npages = mp->ml_size/IMMU_PAGESIZE + 1;
 983 
 984                 dcookies[0].dck_paddr = start;
 985                 dcookies[0].dck_npages = npages;
 986                 dcount = 1;
 987                 (void) dvma_map(domain, start, npages,
 988                     dcookies, dcount, NULL, IMMU_FLAGS_READ | IMMU_FLAGS_WRITE);
 989                 mp = mp->ml_next;
 990         }
 991 
 992         mp = bios_rsvd;
 993         while (mp) {
 994                 ddi_err(DER_LOG, domain->dom_dip,
 995                     "iommu: mapping PHYS span [0x%" PRIx64 " - 0x%" PRIx64 "]",
 996                     mp->ml_address, mp->ml_address + mp->ml_size);
 997 
 998                 start = mp->ml_address;
 999                 npages = mp->ml_size/IMMU_PAGESIZE + 1;
1000 
1001                 dcookies[0].dck_paddr = start;
1002                 dcookies[0].dck_npages = npages;
1003                 dcount = 1;
1004                 (void) dvma_map(domain, start, npages,
1005                     dcookies, dcount, NULL, IMMU_FLAGS_READ | IMMU_FLAGS_WRITE);
1006 
1007                 mp = mp->ml_next;
1008         }
1009 
1010         memlist_read_unlock();
1011 }
1012 
1013 /*
1014  * create_xlate_arena()
1015  *      Create the dvma arena for a domain with translation
1016  *      mapping
1017  */
1018 static void
1019 create_xlate_arena(immu_t *immu, domain_t *domain,
1020     dev_info_t *rdip, immu_flags_t immu_flags)
1021 {
1022         char *arena_name;
1023         struct memlist *mp;
1024         int vmem_flags;
1025         uint64_t start;
1026         uint_t mgaw;
1027         uint64_t size;
1028         uint64_t maxaddr;
1029         void *vmem_ret;
1030 
1031         arena_name = domain->dom_dvma_arena_name;
1032 
1033         /* Note, don't do sizeof (arena_name) - it is just a pointer */
1034         (void) snprintf(arena_name,
1035             sizeof (domain->dom_dvma_arena_name),
1036             "%s-domain-%d-xlate-DVMA-arena", immu->immu_name,
1037             domain->dom_did);
1038 
1039         vmem_flags = (immu_flags & IMMU_FLAGS_NOSLEEP) ? VM_NOSLEEP : VM_SLEEP;
1040 
1041         /* Restrict mgaddr (max guest addr) to MGAW */
1042         mgaw = IMMU_CAP_MGAW(immu->immu_regs_cap);
1043 
1044         /*
1045          * To ensure we avoid ioapic and PCI MMIO ranges we just
1046          * use the physical memory address range of the system as the
1047          * range
1048          */
1049         maxaddr = ((uint64_t)1 << mgaw);
1050 
1051         memlist_read_lock();
1052 
1053         mp = phys_install;
1054 
1055         if (mp->ml_address == 0)
1056                 start = MMU_PAGESIZE;
1057         else
1058                 start = mp->ml_address;
1059 
1060         if (start + mp->ml_size > maxaddr)
1061                 size = maxaddr - start;
1062         else
1063                 size = mp->ml_size;
1064 
1065         ddi_err(DER_VERB, rdip,
1066             "iommu: %s: Creating dvma vmem arena [0x%" PRIx64
1067             " - 0x%" PRIx64 "]", arena_name, start, start + size);
1068 
1069         /*
1070          * We always allocate in quanta of IMMU_PAGESIZE
1071          */
1072         domain->dom_dvma_arena = vmem_create(arena_name,
1073             (void *)(uintptr_t)start,   /* start addr */
1074             size,                       /* size */
1075             IMMU_PAGESIZE,              /* quantum */
1076             NULL,                       /* afunc */
1077             NULL,                       /* ffunc */
1078             NULL,                       /* source */
1079             0,                          /* qcache_max */
1080             vmem_flags);
1081 
1082         if (domain->dom_dvma_arena == NULL) {
1083                 ddi_err(DER_PANIC, rdip,
1084                     "Failed to allocate DVMA arena(%s) "
1085                     "for domain ID (%d)", arena_name, domain->dom_did);
1086                 /*NOTREACHED*/
1087         }
1088 
1089         mp = mp->ml_next;
1090         while (mp) {
1091 
1092                 if (mp->ml_address == 0)
1093                         start = MMU_PAGESIZE;
1094                 else
1095                         start = mp->ml_address;
1096 
1097                 if (start + mp->ml_size > maxaddr)
1098                         size = maxaddr - start;
1099                 else
1100                         size = mp->ml_size;
1101 
1102                 ddi_err(DER_VERB, rdip,
1103                     "iommu: %s: Adding dvma vmem span [0x%" PRIx64
1104                     " - 0x%" PRIx64 "]", arena_name, start,
1105                     start + size);
1106 
1107                 vmem_ret = vmem_add(domain->dom_dvma_arena,
1108                     (void *)(uintptr_t)start, size,  vmem_flags);
1109 
1110                 if (vmem_ret == NULL) {
1111                         ddi_err(DER_PANIC, rdip,
1112                             "Failed to allocate DVMA arena(%s) "
1113                             "for domain ID (%d)",
1114                             arena_name, domain->dom_did);
1115                         /*NOTREACHED*/
1116                 }
1117                 mp = mp->ml_next;
1118         }
1119         memlist_read_unlock();
1120 }
1121 
1122 /* ################################### DOMAIN CODE ######################### */
1123 
1124 /*
1125  * Set the domain and domain-dip for a dip
1126  */
1127 static void
1128 set_domain(
1129         dev_info_t *dip,
1130         dev_info_t *ddip,
1131         domain_t *domain)
1132 {
1133         immu_devi_t *immu_devi;
1134         domain_t *fdomain;
1135         dev_info_t *fddip;
1136 
1137         immu_devi = immu_devi_get(dip);
1138 
1139         mutex_enter(&(DEVI(dip)->devi_lock));
1140         fddip = immu_devi->imd_ddip;
1141         fdomain = immu_devi->imd_domain;
1142 
1143         if (fddip) {
1144                 ASSERT(fddip == ddip);
1145         } else {
1146                 immu_devi->imd_ddip = ddip;
1147         }
1148 
1149         if (fdomain) {
1150                 ASSERT(fdomain == domain);
1151         } else {
1152                 immu_devi->imd_domain = domain;
1153         }
1154         mutex_exit(&(DEVI(dip)->devi_lock));
1155 }
1156 
1157 /*
1158  * device_domain()
1159  *      Get domain for a device. The domain may be global in which case it
1160  *      is shared between all IOMMU units. Due to potential AGAW differences
1161  *      between IOMMU units, such global domains *have to be* UNITY mapping
1162  *      domains. Alternatively, the domain may be local to a IOMMU unit.
1163  *      Local domains may be shared or immu_devi, although the
1164  *      scope of sharing
1165  *      is restricted to devices controlled by the IOMMU unit to
1166  *      which the domain
1167  *      belongs. If shared, they (currently) have to be UNITY domains. If
1168  *      immu_devi a domain may be either UNITY or translation (XLATE) domain.
1169  */
1170 static domain_t *
1171 device_domain(dev_info_t *rdip, dev_info_t **ddipp, immu_flags_t immu_flags)
1172 {
1173         dev_info_t *ddip; /* topmost dip in domain i.e. domain owner */
1174         immu_t *immu;
1175         domain_t *domain;
1176         dvma_arg_t dvarg = {0};
1177         int level;
1178 
1179         *ddipp = NULL;
1180 
1181         /*
1182          * Check if the domain is already set. This is usually true
1183          * if this is not the first DVMA transaction.
1184          */
1185         ddip = NULL;
1186         domain = immu_devi_domain(rdip, &ddip);
1187         if (domain) {
1188                 *ddipp = ddip;
1189                 return (domain);
1190         }
1191 
1192         immu = immu_dvma_get_immu(rdip, immu_flags);
1193         if (immu == NULL) {
1194                 /*
1195                  * possible that there is no IOMMU unit for this device
1196                  * - BIOS bugs are one example.
1197                  */
1198                 ddi_err(DER_WARN, rdip, "No iommu unit found for device");
1199                 return (NULL);
1200         }
1201 
1202         immu_flags |= immu_devi_get(rdip)->imd_dvma_flags;
1203 
1204         dvarg.dva_rdip = rdip;
1205         dvarg.dva_ddip = NULL;
1206         dvarg.dva_domain = NULL;
1207         dvarg.dva_flags = immu_flags;
1208         level = 0;
1209         if (immu_walk_ancestor(rdip, NULL, get_branch_domain,
1210             &dvarg, &level, immu_flags) != DDI_SUCCESS) {
1211                 /*
1212                  * maybe low memory. return error,
1213                  * so driver tries again later
1214                  */
1215                 return (NULL);
1216         }
1217 
1218         /* should have walked at least 1 dip (i.e. edip) */
1219         ASSERT(level > 0);
1220 
1221         ddip = dvarg.dva_ddip;  /* must be present */
1222         domain = dvarg.dva_domain;      /* may be NULL */
1223 
1224         /*
1225          * We may find the domain during our ancestor walk on any one of our
1226          * ancestor dips, If the domain is found then the domain-dip
1227          * (i.e. ddip) will also be found in the same immu_devi struct.
1228          * The domain-dip is the highest ancestor dip which shares the
1229          * same domain with edip.
1230          * The domain may or may not be found, but the domain dip must
1231          * be found.
1232          */
1233         if (ddip == NULL) {
1234                 ddi_err(DER_MODE, rdip, "Cannot find domain dip for device.");
1235                 return (NULL);
1236         }
1237 
1238         /*
1239          * Did we find a domain ?
1240          */
1241         if (domain) {
1242                 goto found;
1243         }
1244 
1245         /* nope, so allocate */
1246         domain = domain_create(immu, ddip, rdip, immu_flags);
1247         if (domain == NULL) {
1248                 return (NULL);
1249         }
1250 
1251         /*FALLTHROUGH*/
1252 found:
1253         /*
1254          * We know *domain *is* the right domain, so panic if
1255          * another domain is set for either the request-dip or
1256          * effective dip.
1257          */
1258         set_domain(ddip, ddip, domain);
1259         set_domain(rdip, ddip, domain);
1260 
1261         *ddipp = ddip;
1262         return (domain);
1263 }
1264 
1265 static void
1266 create_unity_domain(immu_t *immu)
1267 {
1268         domain_t *domain;
1269 
1270         /* domain created during boot and always use sleep flag */
1271         domain = kmem_zalloc(sizeof (domain_t), KM_SLEEP);
1272 
1273         rw_init(&(domain->dom_pgtable_rwlock), NULL, RW_DEFAULT, NULL);
1274 
1275         domain->dom_did = IMMU_UNITY_DID;
1276         domain->dom_maptype = IMMU_MAPTYPE_UNITY;
1277 
1278         domain->dom_immu = immu;
1279         immu->immu_unity_domain = domain;
1280 
1281         /*
1282          * Setup the domain's initial page table
1283          * should never fail.
1284          */
1285         domain->dom_pgtable_root = pgtable_alloc(immu, IMMU_FLAGS_SLEEP);
1286         pgtable_zero(domain->dom_pgtable_root);
1287 
1288         /*
1289          * Only map all physical memory in to the unity domain
1290          * if passthrough is not supported. If it is supported,
1291          * passthrough is set in the context entry instead.
1292          */
1293         if (!IMMU_ECAP_GET_PT(immu->immu_regs_excap))
1294                 map_unity_domain(domain);
1295 
1296 
1297         /*
1298          * put it on the system-wide UNITY domain list
1299          */
1300         mutex_enter(&(immu_domain_lock));
1301         list_insert_tail(&immu_unity_domain_list, domain);
1302         mutex_exit(&(immu_domain_lock));
1303 }
1304 
1305 /*
1306  * ddip is the domain-dip - the topmost dip in a domain
1307  * rdip is the requesting-dip - the device which is
1308  * requesting DVMA setup
1309  * if domain is a non-shared domain rdip == ddip
1310  */
1311 static domain_t *
1312 domain_create(immu_t *immu, dev_info_t *ddip, dev_info_t *rdip,
1313     immu_flags_t immu_flags)
1314 {
1315         int kmflags;
1316         domain_t *domain;
1317         char mod_hash_name[128];
1318         immu_devi_t *immu_devi;
1319         int did;
1320         immu_dcookie_t dcookies[1] = {0};
1321         int dcount = 0;
1322 
1323         immu_devi = immu_devi_get(rdip);
1324 
1325         /*
1326          * First allocate a domainid.
1327          * This routine will never fail, since if we run out
1328          * of domains the unity domain will be allocated.
1329          */
1330         did = did_alloc(immu, rdip, ddip, immu_flags);
1331         if (did == IMMU_UNITY_DID) {
1332                 /* domain overflow */
1333                 ASSERT(immu->immu_unity_domain);
1334                 return (immu->immu_unity_domain);
1335         }
1336 
1337         kmflags = (immu_flags & IMMU_FLAGS_NOSLEEP) ? KM_NOSLEEP : KM_SLEEP;
1338         domain = kmem_zalloc(sizeof (domain_t), kmflags);
1339         if (domain == NULL) {
1340                 ddi_err(DER_PANIC, rdip, "Failed to alloc DVMA domain "
1341                     "structure for device. IOMMU unit: %s", immu->immu_name);
1342                 /*NOTREACHED*/
1343         }
1344 
1345         rw_init(&(domain->dom_pgtable_rwlock), NULL, RW_DEFAULT, NULL);
1346 
1347         (void) snprintf(mod_hash_name, sizeof (mod_hash_name),
1348             "immu%s-domain%d-pava-hash", immu->immu_name, did);
1349 
1350         domain->dom_did = did;
1351         domain->dom_immu = immu;
1352         domain->dom_maptype = IMMU_MAPTYPE_XLATE;
1353         domain->dom_dip = ddip;
1354 
1355         /*
1356          * Create xlate DVMA arena for this domain.
1357          */
1358         create_xlate_arena(immu, domain, rdip, immu_flags);
1359 
1360         /*
1361          * Setup the domain's initial page table
1362          */
1363         domain->dom_pgtable_root = pgtable_alloc(immu, immu_flags);
1364         if (domain->dom_pgtable_root == NULL) {
1365                 ddi_err(DER_PANIC, rdip, "Failed to alloc root "
1366                     "pgtable for domain (%d). IOMMU unit: %s",
1367                     domain->dom_did, immu->immu_name);
1368                 /*NOTREACHED*/
1369         }
1370         pgtable_zero(domain->dom_pgtable_root);
1371 
1372         /*
1373          * Since this is a immu unit-specific domain, put it on
1374          * the per-immu domain list.
1375          */
1376         mutex_enter(&(immu->immu_lock));
1377         list_insert_head(&immu->immu_domain_list, domain);
1378         mutex_exit(&(immu->immu_lock));
1379 
1380         /*
1381          * Also put it on the system-wide xlate domain list
1382          */
1383         mutex_enter(&(immu_domain_lock));
1384         list_insert_head(&immu_xlate_domain_list, domain);
1385         mutex_exit(&(immu_domain_lock));
1386 
1387         bdf_domain_insert(immu_devi, domain);
1388 
1389 #ifdef BUGGY_DRIVERS
1390         /*
1391          * Map page0. Some broken HW/FW access it.
1392          */
1393         dcookies[0].dck_paddr = 0;
1394         dcookies[0].dck_npages = 1;
1395         dcount = 1;
1396         (void) dvma_map(domain, 0, 1, dcookies, dcount, NULL,
1397             IMMU_FLAGS_READ | IMMU_FLAGS_WRITE | IMMU_FLAGS_PAGE1);
1398 #endif
1399         return (domain);
1400 }
1401 
1402 /*
1403  * Create domainid arena.
1404  * Domainid 0 is reserved by Vt-d spec and cannot be used by
1405  * system software.
1406  * Domainid 1 is reserved by solaris and used for *all* of the following:
1407  *      as the "uninitialized" domain - For devices not yet controlled
1408  *      by Solaris
1409  *      as the "unity" domain - For devices that will always belong
1410  *      to the unity domain
1411  *      as the "overflow" domain - Used for any new device after we
1412  *      run out of domains
1413  * All of the above domains map into a single domain with
1414  * domainid 1 and UNITY DVMA mapping
1415  * Each IMMU unity has its own unity/uninit/overflow domain
1416  */
1417 static void
1418 did_init(immu_t *immu)
1419 {
1420         (void) snprintf(immu->immu_did_arena_name,
1421             sizeof (immu->immu_did_arena_name),
1422             "%s_domainid_arena", immu->immu_name);
1423 
1424         ddi_err(DER_VERB, immu->immu_dip, "creating domainid arena %s",
1425             immu->immu_did_arena_name);
1426 
1427         immu->immu_did_arena = vmem_create(
1428             immu->immu_did_arena_name,
1429             (void *)(uintptr_t)(IMMU_UNITY_DID + 1),   /* start addr */
1430             immu->immu_max_domains - IMMU_UNITY_DID,
1431             1,                          /* quantum */
1432             NULL,                       /* afunc */
1433             NULL,                       /* ffunc */
1434             NULL,                       /* source */
1435             0,                          /* qcache_max */
1436             VM_SLEEP);
1437 
1438         /* Even with SLEEP flag, vmem_create() can fail */
1439         if (immu->immu_did_arena == NULL) {
1440                 ddi_err(DER_PANIC, NULL, "%s: Failed to create Intel "
1441                     "IOMMU domainid allocator: %s", immu->immu_name,
1442                     immu->immu_did_arena_name);
1443         }
1444 }
1445 
1446 /* #########################  CONTEXT CODE ################################# */
1447 
1448 static void
1449 context_set(immu_t *immu, domain_t *domain, pgtable_t *root_table,
1450     int bus, int devfunc)
1451 {
1452         pgtable_t *context;
1453         pgtable_t *pgtable_root;
1454         hw_rce_t *hw_rent;
1455         hw_rce_t *hw_cent;
1456         hw_rce_t *ctxp;
1457         int sid;
1458         krw_t rwtype;
1459         boolean_t fill_root;
1460         boolean_t fill_ctx;
1461 
1462         pgtable_root = domain->dom_pgtable_root;
1463 
1464         ctxp = (hw_rce_t *)(root_table->swpg_next_array);
1465         context = *(pgtable_t **)(ctxp + bus);
1466         hw_rent = (hw_rce_t *)(root_table->hwpg_vaddr) + bus;
1467 
1468         fill_root = B_FALSE;
1469         fill_ctx = B_FALSE;
1470 
1471         /* Check the most common case first with reader lock */
1472         rw_enter(&(immu->immu_ctx_rwlock), RW_READER);
1473         rwtype = RW_READER;
1474 again:
1475         if (ROOT_GET_P(hw_rent)) {
1476                 hw_cent = (hw_rce_t *)(context->hwpg_vaddr) + devfunc;
1477                 if (CONT_GET_AVAIL(hw_cent) == IMMU_CONT_INITED) {
1478                         rw_exit(&(immu->immu_ctx_rwlock));
1479                         return;
1480                 } else {
1481                         fill_ctx = B_TRUE;
1482                 }
1483         } else {
1484                 fill_root = B_TRUE;
1485                 fill_ctx = B_TRUE;
1486         }
1487 
1488         if (rwtype == RW_READER &&
1489             rw_tryupgrade(&(immu->immu_ctx_rwlock)) == 0) {
1490                 rw_exit(&(immu->immu_ctx_rwlock));
1491                 rw_enter(&(immu->immu_ctx_rwlock), RW_WRITER);
1492                 rwtype = RW_WRITER;
1493                 goto again;
1494         }
1495         rwtype = RW_WRITER;
1496 
1497         if (fill_root == B_TRUE) {
1498                 ROOT_SET_CONT(hw_rent, context->hwpg_paddr);
1499                 ROOT_SET_P(hw_rent);
1500                 immu_regs_cpu_flush(immu, (caddr_t)hw_rent, sizeof (hw_rce_t));
1501         }
1502 
1503         if (fill_ctx == B_TRUE) {
1504                 hw_cent = (hw_rce_t *)(context->hwpg_vaddr) + devfunc;
1505                 /* need to disable context entry before reprogramming it */
1506                 bzero(hw_cent, sizeof (hw_rce_t));
1507 
1508                 /* flush caches */
1509                 immu_regs_cpu_flush(immu, (caddr_t)hw_cent, sizeof (hw_rce_t));
1510 
1511                 sid = ((bus << 8) | devfunc);
1512                 immu_flush_context_fsi(immu, 0, sid, domain->dom_did,
1513                     &immu->immu_ctx_inv_wait);
1514 
1515                 CONT_SET_AVAIL(hw_cent, IMMU_CONT_INITED);
1516                 CONT_SET_DID(hw_cent, domain->dom_did);
1517                 CONT_SET_AW(hw_cent, immu->immu_dvma_agaw);
1518                 CONT_SET_ASR(hw_cent, pgtable_root->hwpg_paddr);
1519                 if (domain->dom_did == IMMU_UNITY_DID &&
1520                     IMMU_ECAP_GET_PT(immu->immu_regs_excap))
1521                         CONT_SET_TTYPE(hw_cent, TTYPE_PASSTHRU);
1522                 else
1523                         /*LINTED*/
1524                         CONT_SET_TTYPE(hw_cent, TTYPE_XLATE_ONLY);
1525                 CONT_SET_P(hw_cent);
1526                 if (IMMU_ECAP_GET_CH(immu->immu_regs_excap)) {
1527                         CONT_SET_EH(hw_cent);
1528                         if (immu_use_alh)
1529                                 CONT_SET_ALH(hw_cent);
1530                 }
1531                 immu_regs_cpu_flush(immu, (caddr_t)hw_cent, sizeof (hw_rce_t));
1532         }
1533         rw_exit(&(immu->immu_ctx_rwlock));
1534 }
1535 
1536 static pgtable_t *
1537 context_create(immu_t *immu)
1538 {
1539         int     bus;
1540         int     devfunc;
1541         pgtable_t *root_table;
1542         pgtable_t *context;
1543         pgtable_t *pgtable_root;
1544         hw_rce_t *ctxp;
1545         hw_rce_t *hw_rent;
1546         hw_rce_t *hw_cent;
1547 
1548         /* Allocate a zeroed root table (4K 256b entries) */
1549         root_table = pgtable_alloc(immu, IMMU_FLAGS_SLEEP);
1550         pgtable_zero(root_table);
1551 
1552         /*
1553          * Setup context tables for all possible root table entries.
1554          * Start out with unity domains for all entries.
1555          */
1556         ctxp = (hw_rce_t *)(root_table->swpg_next_array);
1557         hw_rent = (hw_rce_t *)(root_table->hwpg_vaddr);
1558         for (bus = 0; bus < IMMU_ROOT_NUM; bus++, ctxp++, hw_rent++) {
1559                 context = pgtable_alloc(immu, IMMU_FLAGS_SLEEP);
1560                 pgtable_zero(context);
1561                 ROOT_SET_P(hw_rent);
1562                 ROOT_SET_CONT(hw_rent, context->hwpg_paddr);
1563                 hw_cent = (hw_rce_t *)(context->hwpg_vaddr);
1564                 for (devfunc = 0; devfunc < IMMU_CONT_NUM;
1565                     devfunc++, hw_cent++) {
1566                         pgtable_root =
1567                             immu->immu_unity_domain->dom_pgtable_root;
1568                         CONT_SET_DID(hw_cent,
1569                             immu->immu_unity_domain->dom_did);
1570                         CONT_SET_AW(hw_cent, immu->immu_dvma_agaw);
1571                         CONT_SET_ASR(hw_cent, pgtable_root->hwpg_paddr);
1572                         if (IMMU_ECAP_GET_PT(immu->immu_regs_excap))
1573                                 CONT_SET_TTYPE(hw_cent, TTYPE_PASSTHRU);
1574                         else
1575                                 /*LINTED*/
1576                                 CONT_SET_TTYPE(hw_cent, TTYPE_XLATE_ONLY);
1577                         CONT_SET_AVAIL(hw_cent, IMMU_CONT_UNINITED);
1578                         CONT_SET_P(hw_cent);
1579                 }
1580                 immu_regs_cpu_flush(immu, context->hwpg_vaddr, IMMU_PAGESIZE);
1581                 *((pgtable_t **)ctxp) = context;
1582         }
1583 
1584         return (root_table);
1585 }
1586 
1587 /*
1588  * Called during rootnex attach, so no locks needed
1589  */
1590 static void
1591 context_init(immu_t *immu)
1592 {
1593         rw_init(&(immu->immu_ctx_rwlock), NULL, RW_DEFAULT, NULL);
1594 
1595         immu_init_inv_wait(&immu->immu_ctx_inv_wait, "ctxglobal", B_TRUE);
1596 
1597         immu_regs_wbf_flush(immu);
1598 
1599         immu->immu_ctx_root = context_create(immu);
1600 
1601         immu_regs_set_root_table(immu);
1602 
1603         rw_enter(&(immu->immu_ctx_rwlock), RW_WRITER);
1604         immu_flush_context_gbl(immu, &immu->immu_ctx_inv_wait);
1605         immu_flush_iotlb_gbl(immu, &immu->immu_ctx_inv_wait);
1606         rw_exit(&(immu->immu_ctx_rwlock));
1607 }
1608 
1609 
1610 /*
1611  * Find top pcib
1612  */
1613 static int
1614 find_top_pcib(dev_info_t *dip, void *arg)
1615 {
1616         immu_devi_t *immu_devi;
1617         dev_info_t **pcibdipp = (dev_info_t **)arg;
1618 
1619         immu_devi = immu_devi_get(dip);
1620 
1621         if (immu_devi->imd_pcib_type == IMMU_PCIB_PCI_PCI) {
1622                 *pcibdipp = dip;
1623         }
1624 
1625         return (DDI_WALK_CONTINUE);
1626 }
1627 
1628 static int
1629 immu_context_update(immu_t *immu, domain_t *domain, dev_info_t *ddip,
1630     dev_info_t *rdip, immu_flags_t immu_flags)
1631 {
1632         immu_devi_t *r_immu_devi;
1633         immu_devi_t *d_immu_devi;
1634         int r_bus;
1635         int d_bus;
1636         int r_devfunc;
1637         int d_devfunc;
1638         immu_pcib_t d_pcib_type;
1639         dev_info_t *pcibdip;
1640 
1641         if (ddip == NULL || rdip == NULL ||
1642             ddip == root_devinfo || rdip == root_devinfo) {
1643                 ddi_err(DER_MODE, rdip, "immu_contexts_update: domain-dip or "
1644                     "request-dip are NULL or are root devinfo");
1645                 return (DDI_FAILURE);
1646         }
1647 
1648         /*
1649          * We need to set the context fields
1650          * based on what type of device rdip and ddip are.
1651          * To do that we need the immu_devi field.
1652          * Set the immu_devi field (if not already set)
1653          */
1654         if (immu_devi_set(ddip, immu_flags) == DDI_FAILURE) {
1655                 ddi_err(DER_MODE, rdip,
1656                     "immu_context_update: failed to set immu_devi for ddip");
1657                 return (DDI_FAILURE);
1658         }
1659 
1660         if (immu_devi_set(rdip, immu_flags) == DDI_FAILURE) {
1661                 ddi_err(DER_MODE, rdip,
1662                     "immu_context_update: failed to set immu_devi for rdip");
1663                 return (DDI_FAILURE);
1664         }
1665 
1666         d_immu_devi = immu_devi_get(ddip);
1667         r_immu_devi = immu_devi_get(rdip);
1668 
1669         d_bus = d_immu_devi->imd_bus;
1670         d_devfunc = d_immu_devi->imd_devfunc;
1671         d_pcib_type = d_immu_devi->imd_pcib_type;
1672         r_bus = r_immu_devi->imd_bus;
1673         r_devfunc = r_immu_devi->imd_devfunc;
1674 
1675         if (rdip == ddip) {
1676                 /* rdip is a PCIE device. set context for it only */
1677                 context_set(immu, domain, immu->immu_ctx_root, r_bus,
1678                     r_devfunc);
1679 #ifdef BUGGY_DRIVERS
1680         } else if (r_immu_devi == d_immu_devi) {
1681 #ifdef TEST
1682                 ddi_err(DER_WARN, rdip, "Driver bug: Devices 0x%lx and "
1683                     "0x%lx are identical", rdip, ddip);
1684 #endif
1685                 /* rdip is a PCIE device. set context for it only */
1686                 context_set(immu, domain, immu->immu_ctx_root, r_bus,
1687                     r_devfunc);
1688 #endif
1689         } else if (d_pcib_type == IMMU_PCIB_PCIE_PCI) {
1690                 /*
1691                  * ddip is a PCIE_PCI bridge. Set context for ddip's
1692                  * secondary bus. If rdip is on ddip's secondary
1693                  * bus, set context for rdip. Else, set context
1694                  * for rdip's PCI bridge on ddip's secondary bus.
1695                  */
1696                 context_set(immu, domain, immu->immu_ctx_root,
1697                     d_immu_devi->imd_sec, 0);
1698                 if (d_immu_devi->imd_sec == r_bus) {
1699                         context_set(immu, domain, immu->immu_ctx_root,
1700                             r_bus, r_devfunc);
1701                 } else {
1702                         pcibdip = NULL;
1703                         if (immu_walk_ancestor(rdip, ddip, find_top_pcib,
1704                             &pcibdip, NULL, immu_flags) == DDI_SUCCESS &&
1705                             pcibdip != NULL) {
1706                                 r_immu_devi = immu_devi_get(pcibdip);
1707                                 r_bus = r_immu_devi->imd_bus;
1708                                 r_devfunc = r_immu_devi->imd_devfunc;
1709                                 context_set(immu, domain, immu->immu_ctx_root,
1710                                     r_bus, r_devfunc);
1711                         } else {
1712                                 ddi_err(DER_PANIC, rdip, "Failed to find PCI "
1713                                     " bridge for PCI device");
1714                                 /*NOTREACHED*/
1715                         }
1716                 }
1717         } else if (d_pcib_type == IMMU_PCIB_PCI_PCI) {
1718                 context_set(immu, domain, immu->immu_ctx_root, d_bus,
1719                     d_devfunc);
1720         } else if (d_pcib_type == IMMU_PCIB_ENDPOINT) {
1721                 /*
1722                  * ddip is a PCIE device which has a non-PCI device under it
1723                  * i.e. it is a PCI-nonPCI bridge. Example: pciicde-ata
1724                  */
1725                 context_set(immu, domain, immu->immu_ctx_root, d_bus,
1726                     d_devfunc);
1727         } else {
1728                 ddi_err(DER_PANIC, rdip, "unknown device type. Cannot "
1729                     "set iommu context.");
1730                 /*NOTREACHED*/
1731         }
1732 
1733         /* XXX do we need a membar_producer() here */
1734         return (DDI_SUCCESS);
1735 }
1736 
1737 /* ##################### END CONTEXT CODE ################################## */
1738 /* ##################### MAPPING CODE ################################## */
1739 
1740 
1741 #ifdef DEBUG
1742 static boolean_t
1743 PDTE_check(immu_t *immu, hw_pdte_t pdte, pgtable_t *next, paddr_t paddr,
1744     dev_info_t *rdip, immu_flags_t immu_flags)
1745 {
1746         /* The PDTE must be set i.e. present bit is set */
1747         if (!PDTE_P(pdte)) {
1748                 ddi_err(DER_MODE, rdip, "No present flag");
1749                 return (B_FALSE);
1750         }
1751 
1752         /*
1753          * Just assert to check most significant system software field
1754          * (PDTE_SW4) as it is same as present bit and we
1755          * checked that above
1756          */
1757         ASSERT(PDTE_SW4(pdte));
1758 
1759         /*
1760          * TM field should be clear if not reserved.
1761          * non-leaf is always reserved
1762          */
1763         if (next == NULL && immu->immu_TM_reserved == B_FALSE) {
1764                 if (PDTE_TM(pdte)) {
1765                         ddi_err(DER_MODE, rdip, "TM flag set");
1766                         return (B_FALSE);
1767                 }
1768         }
1769 
1770         /*
1771          * The SW3 field is not used and must be clear
1772          */
1773         if (PDTE_SW3(pdte)) {
1774                 ddi_err(DER_MODE, rdip, "SW3 set");
1775                 return (B_FALSE);
1776         }
1777 
1778         /*
1779          * PFN (for PTE) or next level pgtable-paddr (for PDE) must be set
1780          */
1781         if (next == NULL) {
1782                 ASSERT(paddr % IMMU_PAGESIZE == 0);
1783                 if (PDTE_PADDR(pdte) != paddr) {
1784                         ddi_err(DER_MODE, rdip,
1785                             "PTE paddr mismatch: %lx != %lx",
1786                             PDTE_PADDR(pdte), paddr);
1787                         return (B_FALSE);
1788                 }
1789         } else {
1790                 if (PDTE_PADDR(pdte) != next->hwpg_paddr) {
1791                         ddi_err(DER_MODE, rdip,
1792                             "PDE paddr mismatch: %lx != %lx",
1793                             PDTE_PADDR(pdte), next->hwpg_paddr);
1794                         return (B_FALSE);
1795                 }
1796         }
1797 
1798         /*
1799          * SNP field should be clear if not reserved.
1800          * non-leaf is always reserved
1801          */
1802         if (next == NULL && immu->immu_SNP_reserved == B_FALSE) {
1803                 if (PDTE_SNP(pdte)) {
1804                         ddi_err(DER_MODE, rdip, "SNP set");
1805                         return (B_FALSE);
1806                 }
1807         }
1808 
1809         /* second field available for system software should be clear */
1810         if (PDTE_SW2(pdte)) {
1811                 ddi_err(DER_MODE, rdip, "SW2 set");
1812                 return (B_FALSE);
1813         }
1814 
1815         /* Super pages field should be clear */
1816         if (PDTE_SP(pdte)) {
1817                 ddi_err(DER_MODE, rdip, "SP set");
1818                 return (B_FALSE);
1819         }
1820 
1821         /*
1822          * least significant field available for
1823          * system software should be clear
1824          */
1825         if (PDTE_SW1(pdte)) {
1826                 ddi_err(DER_MODE, rdip, "SW1 set");
1827                 return (B_FALSE);
1828         }
1829 
1830         if ((immu_flags & IMMU_FLAGS_READ) && !PDTE_READ(pdte)) {
1831                 ddi_err(DER_MODE, rdip, "READ not set");
1832                 return (B_FALSE);
1833         }
1834 
1835         if ((immu_flags & IMMU_FLAGS_WRITE) && !PDTE_WRITE(pdte)) {
1836                 ddi_err(DER_MODE, rdip, "WRITE not set");
1837                 return (B_FALSE);
1838         }
1839 
1840         return (B_TRUE);
1841 }
1842 #endif
1843 
1844 /*ARGSUSED*/
1845 static void
1846 PTE_clear_all(immu_t *immu, domain_t *domain, xlate_t *xlate,
1847     uint64_t *dvma_ptr, uint64_t *npages_ptr, dev_info_t *rdip)
1848 {
1849         uint64_t npages;
1850         uint64_t dvma;
1851         pgtable_t *pgtable;
1852         hw_pdte_t *hwp;
1853         hw_pdte_t *shwp;
1854         int idx;
1855 
1856         pgtable = xlate->xlt_pgtable;
1857         idx = xlate->xlt_idx;
1858 
1859         dvma = *dvma_ptr;
1860         npages = *npages_ptr;
1861 
1862         /*
1863          * since a caller gets a unique dvma for a physical address,
1864          * no other concurrent thread will be writing to the same
1865          * PTE even if it has the same paddr. So no locks needed.
1866          */
1867         shwp = (hw_pdte_t *)(pgtable->hwpg_vaddr) + idx;
1868 
1869         hwp = shwp;
1870         for (; npages > 0 && idx <= IMMU_PGTABLE_MAXIDX; idx++, hwp++) {
1871                 PDTE_CLEAR_P(*hwp);
1872                 dvma += IMMU_PAGESIZE;
1873                 npages--;
1874         }
1875 
1876         *dvma_ptr = dvma;
1877         *npages_ptr = npages;
1878 
1879         xlate->xlt_idx = idx;
1880 }
1881 
1882 static void
1883 xlate_setup(uint64_t dvma, xlate_t *xlate, int nlevels)
1884 {
1885         int level;
1886         uint64_t offbits;
1887 
1888         /*
1889          * Skip the first 12 bits which is the offset into
1890          * 4K PFN (phys page frame based on IMMU_PAGESIZE)
1891          */
1892         offbits = dvma >> IMMU_PAGESHIFT;
1893 
1894         /* skip to level 1 i.e. leaf PTE */
1895         for (level = 1, xlate++; level <= nlevels; level++, xlate++) {
1896                 xlate->xlt_level = level;
1897                 xlate->xlt_idx = (offbits & IMMU_PGTABLE_LEVEL_MASK);
1898                 ASSERT(xlate->xlt_idx <= IMMU_PGTABLE_MAXIDX);
1899                 xlate->xlt_pgtable = NULL;
1900                 offbits >>= IMMU_PGTABLE_LEVEL_STRIDE;
1901         }
1902 }
1903 
1904 /*
1905  * Read the pgtables
1906  */
1907 static boolean_t
1908 PDE_lookup(domain_t *domain, xlate_t *xlate, int nlevels)
1909 {
1910         pgtable_t *pgtable;
1911         pgtable_t *next;
1912         uint_t idx;
1913 
1914         /* start with highest level pgtable i.e. root */
1915         xlate += nlevels;
1916 
1917         if (xlate->xlt_pgtable == NULL) {
1918                 xlate->xlt_pgtable = domain->dom_pgtable_root;
1919         }
1920 
1921         for (; xlate->xlt_level > 1; xlate--) {
1922                 idx = xlate->xlt_idx;
1923                 pgtable = xlate->xlt_pgtable;
1924 
1925                 if ((xlate - 1)->xlt_pgtable) {
1926                         continue;
1927                 }
1928 
1929                 /* Lock the pgtable in read mode */
1930                 rw_enter(&(pgtable->swpg_rwlock), RW_READER);
1931 
1932                 /*
1933                  * since we are unmapping, the pgtable should
1934                  * already point to a leafier pgtable.
1935                  */
1936                 next = *(pgtable->swpg_next_array + idx);
1937                 (xlate - 1)->xlt_pgtable = next;
1938                 rw_exit(&(pgtable->swpg_rwlock));
1939                 if (next == NULL)
1940                         return (B_FALSE);
1941         }
1942 
1943         return (B_TRUE);
1944 }
1945 
1946 static void
1947 immu_fault_walk(void *arg, void *base, size_t len)
1948 {
1949         uint64_t dvma, start;
1950 
1951         dvma = *(uint64_t *)arg;
1952         start = (uint64_t)(uintptr_t)base;
1953 
1954         if (dvma >= start && dvma < (start + len)) {
1955                 ddi_err(DER_WARN, NULL,
1956                     "faulting DVMA address is in vmem arena "
1957                     "(%" PRIx64 "-%" PRIx64 ")",
1958                     start, start + len);
1959                 *(uint64_t *)arg = ~0ULL;
1960         }
1961 }
1962 
1963 void
1964 immu_print_fault_info(uint_t sid, uint64_t dvma)
1965 {
1966         int nlevels;
1967         xlate_t xlate[IMMU_PGTABLE_MAX_LEVELS + 1] = {0};
1968         xlate_t *xlatep;
1969         hw_pdte_t pte;
1970         domain_t *domain;
1971         immu_t *immu;
1972         uint64_t dvma_arg;
1973 
1974         if (mod_hash_find(bdf_domain_hash,
1975             (void *)(uintptr_t)sid, (void *)&domain) != 0) {
1976                 ddi_err(DER_WARN, NULL,
1977                     "no domain for faulting SID %08x", sid);
1978                 return;
1979         }
1980 
1981         immu = domain->dom_immu;
1982 
1983         dvma_arg = dvma;
1984         vmem_walk(domain->dom_dvma_arena, VMEM_ALLOC, immu_fault_walk,
1985             (void *)&dvma_arg);
1986         if (dvma_arg != ~0ULL)
1987                 ddi_err(DER_WARN, domain->dom_dip,
1988                     "faulting DVMA address is not in vmem arena");
1989 
1990         nlevels = immu->immu_dvma_nlevels;
1991         xlate_setup(dvma, xlate, nlevels);
1992 
1993         if (!PDE_lookup(domain, xlate, nlevels)) {
1994                 ddi_err(DER_WARN, domain->dom_dip,
1995                     "pte not found in domid %d for faulting addr %" PRIx64,
1996                     domain->dom_did, dvma);
1997                 return;
1998         }
1999 
2000         xlatep = &xlate[1];
2001         pte = *((hw_pdte_t *)
2002             (xlatep->xlt_pgtable->hwpg_vaddr) + xlatep->xlt_idx);
2003 
2004         ddi_err(DER_WARN, domain->dom_dip,
2005             "domid %d pte: %" PRIx64 "(paddr %" PRIx64 ")", domain->dom_did,
2006             (unsigned long long)pte, (unsigned long long)PDTE_PADDR(pte));
2007 }
2008 
2009 /*ARGSUSED*/
2010 static void
2011 PTE_set_one(immu_t *immu, hw_pdte_t *hwp, paddr_t paddr,
2012     dev_info_t *rdip, immu_flags_t immu_flags)
2013 {
2014         hw_pdte_t pte;
2015 
2016 #ifndef DEBUG
2017         pte = immu->immu_ptemask;
2018         PDTE_SET_PADDR(pte, paddr);
2019 #else
2020         pte = *hwp;
2021 
2022         if (PDTE_P(pte)) {
2023                 if (PDTE_PADDR(pte) != paddr) {
2024                         ddi_err(DER_MODE, rdip, "PTE paddr %lx != paddr %lx",
2025                             PDTE_PADDR(pte), paddr);
2026                 }
2027 #ifdef BUGGY_DRIVERS
2028                 return;
2029 #else
2030                 goto out;
2031 #endif
2032         }
2033 
2034         /* clear TM field if not reserved */
2035         if (immu->immu_TM_reserved == B_FALSE) {
2036                 PDTE_CLEAR_TM(pte);
2037         }
2038 
2039         /* Clear 3rd field for system software  - not used */
2040         PDTE_CLEAR_SW3(pte);
2041 
2042         /* Set paddr */
2043         ASSERT(paddr % IMMU_PAGESIZE == 0);
2044         PDTE_CLEAR_PADDR(pte);
2045         PDTE_SET_PADDR(pte, paddr);
2046 
2047         /*  clear SNP field if not reserved. */
2048         if (immu->immu_SNP_reserved == B_FALSE) {
2049                 PDTE_CLEAR_SNP(pte);
2050         }
2051 
2052         /* Clear SW2 field available for software */
2053         PDTE_CLEAR_SW2(pte);
2054 
2055 
2056         /* SP is don't care for PTEs. Clear it for cleanliness */
2057         PDTE_CLEAR_SP(pte);
2058 
2059         /* Clear SW1 field available for software */
2060         PDTE_CLEAR_SW1(pte);
2061 
2062         /*
2063          * Now that we are done writing the PTE
2064          * set the "present" flag. Note this present
2065          * flag is a bit in the PDE/PTE that the
2066          * spec says is available for system software.
2067          * This is an implementation detail of Solaris
2068          * bare-metal Intel IOMMU.
2069          * The present field in a PDE/PTE is not defined
2070          * by the Vt-d spec
2071          */
2072 
2073         PDTE_SET_P(pte);
2074 
2075         pte |= immu->immu_ptemask;
2076 
2077 out:
2078 #endif /* DEBUG */
2079 #ifdef BUGGY_DRIVERS
2080         PDTE_SET_READ(pte);
2081         PDTE_SET_WRITE(pte);
2082 #else
2083         if (immu_flags & IMMU_FLAGS_READ)
2084                 PDTE_SET_READ(pte);
2085         if (immu_flags & IMMU_FLAGS_WRITE)
2086                 PDTE_SET_WRITE(pte);
2087 #endif /* BUGGY_DRIVERS */
2088 
2089         *hwp = pte;
2090 }
2091 
2092 /*ARGSUSED*/
2093 static void
2094 PTE_set_all(immu_t *immu, domain_t *domain, xlate_t *xlate,
2095     uint64_t *dvma_ptr, uint64_t *nvpages_ptr, immu_dcookie_t *dcookies,
2096     int dcount, dev_info_t *rdip, immu_flags_t immu_flags)
2097 {
2098         paddr_t paddr;
2099         uint64_t nvpages;
2100         uint64_t nppages;
2101         uint64_t dvma;
2102         pgtable_t *pgtable;
2103         hw_pdte_t *hwp;
2104         hw_pdte_t *shwp;
2105         int idx, nset;
2106         int j;
2107 
2108         pgtable = xlate->xlt_pgtable;
2109         idx = xlate->xlt_idx;
2110 
2111         dvma = *dvma_ptr;
2112         nvpages = *nvpages_ptr;
2113 
2114         /*
2115          * since a caller gets a unique dvma for a physical address,
2116          * no other concurrent thread will be writing to the same
2117          * PTE even if it has the same paddr. So no locks needed.
2118          */
2119         shwp = (hw_pdte_t *)(pgtable->hwpg_vaddr) + idx;
2120 
2121         hwp = shwp;
2122         for (j = dcount - 1; j >= 0; j--) {
2123                 if (nvpages <= dcookies[j].dck_npages)
2124                         break;
2125                 nvpages -= dcookies[j].dck_npages;
2126         }
2127 
2128         nppages = nvpages;
2129         paddr = dcookies[j].dck_paddr +
2130             (dcookies[j].dck_npages - nppages) * IMMU_PAGESIZE;
2131 
2132         nvpages = *nvpages_ptr;
2133         nset = 0;
2134         for (; nvpages > 0 && idx <= IMMU_PGTABLE_MAXIDX; idx++, hwp++) {
2135                 PTE_set_one(immu, hwp, paddr, rdip, immu_flags);
2136                 nset++;
2137 
2138                 ASSERT(PDTE_check(immu, *hwp, NULL, paddr, rdip, immu_flags)
2139                     == B_TRUE);
2140                 nppages--;
2141                 nvpages--;
2142                 paddr += IMMU_PAGESIZE;
2143                 dvma += IMMU_PAGESIZE;
2144 
2145                 if (nppages == 0) {
2146                         j++;
2147                 }
2148 
2149                 if (j == dcount)
2150                         break;
2151 
2152                 if (nppages == 0) {
2153                         nppages = dcookies[j].dck_npages;
2154                         paddr = dcookies[j].dck_paddr;
2155                 }
2156         }
2157 
2158         if (nvpages) {
2159                 *dvma_ptr = dvma;
2160                 *nvpages_ptr = nvpages;
2161         } else {
2162                 *dvma_ptr = 0;
2163                 *nvpages_ptr = 0;
2164         }
2165 
2166         xlate->xlt_idx = idx;
2167 }
2168 
2169 /*ARGSUSED*/
2170 static void
2171 PDE_set_one(immu_t *immu, hw_pdte_t *hwp, pgtable_t *next,
2172     dev_info_t *rdip, immu_flags_t immu_flags)
2173 {
2174         hw_pdte_t pde;
2175 
2176         pde = *hwp;
2177 
2178         /* if PDE is already set, make sure it is correct */
2179         if (PDTE_P(pde)) {
2180                 ASSERT(PDTE_PADDR(pde) == next->hwpg_paddr);
2181 #ifdef BUGGY_DRIVERS
2182                 return;
2183 #else
2184                 goto out;
2185 #endif
2186         }
2187 
2188         /* Dont touch SW4, it is the present bit */
2189 
2190         /* don't touch TM field it is reserved for PDEs */
2191 
2192         /* 3rd field available for system software is not used */
2193         PDTE_CLEAR_SW3(pde);
2194 
2195         /* Set next level pgtable-paddr for PDE */
2196         PDTE_CLEAR_PADDR(pde);
2197         PDTE_SET_PADDR(pde, next->hwpg_paddr);
2198 
2199         /* don't touch SNP field it is reserved for PDEs */
2200 
2201         /* Clear second field available for system software */
2202         PDTE_CLEAR_SW2(pde);
2203 
2204         /* No super pages for PDEs */
2205         PDTE_CLEAR_SP(pde);
2206 
2207         /* Clear SW1 for software */
2208         PDTE_CLEAR_SW1(pde);
2209 
2210         /*
2211          * Now that we are done writing the PDE
2212          * set the "present" flag. Note this present
2213          * flag is a bit in the PDE/PTE that the
2214          * spec says is available for system software.
2215          * This is an implementation detail of Solaris
2216          * base-metal Intel IOMMU.
2217          * The present field in a PDE/PTE is not defined
2218          * by the Vt-d spec
2219          */
2220 
2221 out:
2222 #ifdef  BUGGY_DRIVERS
2223         PDTE_SET_READ(pde);
2224         PDTE_SET_WRITE(pde);
2225 #else
2226         if (immu_flags & IMMU_FLAGS_READ)
2227                 PDTE_SET_READ(pde);
2228         if (immu_flags & IMMU_FLAGS_WRITE)
2229                 PDTE_SET_WRITE(pde);
2230 #endif
2231 
2232         PDTE_SET_P(pde);
2233 
2234         *hwp = pde;
2235 }
2236 
2237 /*
2238  * Used to set PDEs
2239  */
2240 static boolean_t
2241 PDE_set_all(immu_t *immu, domain_t *domain, xlate_t *xlate, int nlevels,
2242     dev_info_t *rdip, immu_flags_t immu_flags)
2243 {
2244         pgtable_t *pgtable;
2245         pgtable_t *new;
2246         pgtable_t *next;
2247         hw_pdte_t *hwp;
2248         int level;
2249         uint_t idx;
2250         krw_t rwtype;
2251         boolean_t set = B_FALSE;
2252 
2253         /* start with highest level pgtable i.e. root */
2254         xlate += nlevels;
2255 
2256         new = NULL;
2257         xlate->xlt_pgtable = domain->dom_pgtable_root;
2258         for (level = nlevels; level > 1; level--, xlate--) {
2259                 idx = xlate->xlt_idx;
2260                 pgtable = xlate->xlt_pgtable;
2261 
2262                 /* Lock the pgtable in READ mode first */
2263                 rw_enter(&(pgtable->swpg_rwlock), RW_READER);
2264                 rwtype = RW_READER;
2265 again:
2266                 hwp = (hw_pdte_t *)(pgtable->hwpg_vaddr) + idx;
2267                 next = (pgtable->swpg_next_array)[idx];
2268 
2269                 /*
2270                  * check if leafier level already has a pgtable
2271                  * if yes, verify
2272                  */
2273                 if (next == NULL) {
2274                         if (new == NULL) {
2275 
2276                                 IMMU_DPROBE2(immu__pdp__alloc, dev_info_t *,
2277                                     rdip, int, level);
2278 
2279                                 new = pgtable_alloc(immu, immu_flags);
2280                                 if (new == NULL) {
2281                                         ddi_err(DER_PANIC, rdip,
2282                                             "pgtable alloc err");
2283                                 }
2284                                 pgtable_zero(new);
2285                         }
2286 
2287                         /* Change to a write lock */
2288                         if (rwtype == RW_READER &&
2289                             rw_tryupgrade(&(pgtable->swpg_rwlock)) == 0) {
2290                                 rw_exit(&(pgtable->swpg_rwlock));
2291                                 rw_enter(&(pgtable->swpg_rwlock), RW_WRITER);
2292                                 rwtype = RW_WRITER;
2293                                 goto again;
2294                         }
2295                         rwtype = RW_WRITER;
2296                         next = new;
2297                         (pgtable->swpg_next_array)[idx] = next;
2298                         new = NULL;
2299                         PDE_set_one(immu, hwp, next, rdip, immu_flags);
2300                         set = B_TRUE;
2301                         rw_downgrade(&(pgtable->swpg_rwlock));
2302                         rwtype = RW_READER;
2303                 }
2304 #ifndef  BUGGY_DRIVERS
2305                 else {
2306                         hw_pdte_t pde = *hwp;
2307 
2308                         /*
2309                          * If buggy driver we already set permission
2310                          * READ+WRITE so nothing to do for that case
2311                          * XXX Check that read writer perms change before
2312                          * actually setting perms. Also need to hold lock
2313                          */
2314                         if (immu_flags & IMMU_FLAGS_READ)
2315                                 PDTE_SET_READ(pde);
2316                         if (immu_flags & IMMU_FLAGS_WRITE)
2317                                 PDTE_SET_WRITE(pde);
2318 
2319                         *hwp = pde;
2320                 }
2321 #endif
2322 
2323                 ASSERT(PDTE_check(immu, *hwp, next, 0, rdip, immu_flags)
2324                     == B_TRUE);
2325 
2326                 (xlate - 1)->xlt_pgtable = next;
2327                 rw_exit(&(pgtable->swpg_rwlock));
2328         }
2329 
2330         if (new) {
2331                 pgtable_free(immu, new);
2332         }
2333 
2334         return (set);
2335 }
2336 
2337 /*
2338  * dvma_map()
2339  *     map a contiguous range of DVMA pages
2340  *
2341  *     immu: IOMMU unit for which we are generating DVMA cookies
2342  *   domain: domain
2343  *    sdvma: Starting dvma
2344  *   spaddr: Starting paddr
2345  *   npages: Number of pages
2346  *     rdip: requesting device
2347  *     immu_flags: flags
2348  */
2349 static boolean_t
2350 dvma_map(domain_t *domain, uint64_t sdvma, uint64_t snvpages,
2351     immu_dcookie_t *dcookies, int dcount, dev_info_t *rdip,
2352     immu_flags_t immu_flags)
2353 {
2354         uint64_t dvma;
2355         uint64_t n;
2356         immu_t *immu = domain->dom_immu;
2357         int nlevels = immu->immu_dvma_nlevels;
2358         xlate_t xlate[IMMU_PGTABLE_MAX_LEVELS + 1] = {0};
2359         boolean_t pde_set = B_FALSE;
2360 
2361         n = snvpages;
2362         dvma = sdvma;
2363 
2364         while (n > 0) {
2365                 xlate_setup(dvma, xlate, nlevels);
2366 
2367                 /* Lookup or allocate PGDIRs and PGTABLEs if necessary */
2368                 if (PDE_set_all(immu, domain, xlate, nlevels, rdip, immu_flags)
2369                     == B_TRUE) {
2370                         pde_set = B_TRUE;
2371                 }
2372 
2373                 /* set all matching ptes that fit into this leaf pgtable */
2374                 PTE_set_all(immu, domain, &xlate[1], &dvma, &n, dcookies,
2375                     dcount, rdip, immu_flags);
2376         }
2377 
2378         return (pde_set);
2379 }
2380 
2381 /*
2382  * dvma_unmap()
2383  *   unmap a range of DVMAs
2384  *
2385  * immu: IOMMU unit state
2386  * domain: domain for requesting device
2387  * ddip: domain-dip
2388  * dvma: starting DVMA
2389  * npages: Number of IMMU pages to be unmapped
2390  * rdip: requesting device
2391  */
2392 static void
2393 dvma_unmap(domain_t *domain, uint64_t sdvma, uint64_t snpages,
2394     dev_info_t *rdip)
2395 {
2396         immu_t *immu = domain->dom_immu;
2397         int nlevels = immu->immu_dvma_nlevels;
2398         xlate_t xlate[IMMU_PGTABLE_MAX_LEVELS + 1] = {0};
2399         uint64_t n;
2400         uint64_t dvma;
2401 
2402         dvma = sdvma;
2403         n = snpages;
2404 
2405         while (n > 0) {
2406                 /* setup the xlate array */
2407                 xlate_setup(dvma, xlate, nlevels);
2408 
2409                 /* just lookup existing pgtables. Should never fail */
2410                 if (!PDE_lookup(domain, xlate, nlevels))
2411                         ddi_err(DER_PANIC, rdip,
2412                             "PTE not found for addr %" PRIx64,
2413                             (unsigned long long)dvma);
2414 
2415                 /* clear all matching ptes that fit into this leaf pgtable */
2416                 PTE_clear_all(immu, domain, &xlate[1], &dvma, &n, rdip);
2417         }
2418 
2419         /* No need to flush IOTLB after unmap */
2420 }
2421 
2422 static uint64_t
2423 dvma_alloc(domain_t *domain, ddi_dma_attr_t *dma_attr, uint_t npages, int kmf)
2424 {
2425         uint64_t dvma;
2426         size_t xsize, align;
2427         uint64_t minaddr, maxaddr;
2428 
2429         /* parameters */
2430         xsize = npages * IMMU_PAGESIZE;
2431         align = MAX((size_t)(dma_attr->dma_attr_align), IMMU_PAGESIZE);
2432         minaddr = dma_attr->dma_attr_addr_lo;
2433         maxaddr = dma_attr->dma_attr_addr_hi + 1;
2434 
2435         /* handle the rollover cases */
2436         if (maxaddr < dma_attr->dma_attr_addr_hi) {
2437                 maxaddr = dma_attr->dma_attr_addr_hi;
2438         }
2439 
2440         /*
2441          * allocate from vmem arena.
2442          */
2443         dvma = (uint64_t)(uintptr_t)vmem_xalloc(domain->dom_dvma_arena,
2444             xsize, align, 0, 0, (void *)(uintptr_t)minaddr,
2445             (void *)(uintptr_t)maxaddr, kmf);
2446 
2447         return (dvma);
2448 }
2449 
2450 static void
2451 dvma_prealloc(dev_info_t *rdip, immu_hdl_priv_t *ihp, ddi_dma_attr_t *dma_attr)
2452 {
2453         int nlevels;
2454         xlate_t xlate[IMMU_PGTABLE_MAX_LEVELS + 1] = {0}, *xlp;
2455         uint64_t dvma, n;
2456         size_t xsize, align;
2457         uint64_t minaddr, maxaddr, dmamax;
2458         int on, npte, pindex;
2459         hw_pdte_t *shwp;
2460         immu_t *immu;
2461         domain_t *domain;
2462 
2463         /* parameters */
2464         domain = IMMU_DEVI(rdip)->imd_domain;
2465         immu = domain->dom_immu;
2466         nlevels = immu->immu_dvma_nlevels;
2467         xsize = IMMU_NPREPTES * IMMU_PAGESIZE;
2468         align = MAX((size_t)(dma_attr->dma_attr_align), IMMU_PAGESIZE);
2469         minaddr = dma_attr->dma_attr_addr_lo;
2470         if (dma_attr->dma_attr_flags & _DDI_DMA_BOUNCE_ON_SEG)
2471                 dmamax = dma_attr->dma_attr_seg;
2472         else
2473                 dmamax = dma_attr->dma_attr_addr_hi;
2474         maxaddr = dmamax + 1;
2475 
2476         if (maxaddr < dmamax)
2477                 maxaddr = dmamax;
2478 
2479         dvma = (uint64_t)(uintptr_t)vmem_xalloc(domain->dom_dvma_arena,
2480             xsize, align, 0, dma_attr->dma_attr_seg + 1,
2481             (void *)(uintptr_t)minaddr, (void *)(uintptr_t)maxaddr, VM_NOSLEEP);
2482 
2483         ihp->ihp_predvma = dvma;
2484         ihp->ihp_npremapped = 0;
2485         if (dvma == 0)
2486                 return;
2487 
2488         n = IMMU_NPREPTES;
2489         pindex = 0;
2490 
2491         /*
2492          * Set up a mapping at address 0, just so that all PDPs get allocated
2493          * now. Although this initial mapping should never be used,
2494          * explicitly set it to read-only, just to be safe.
2495          */
2496         while (n > 0) {
2497                 xlate_setup(dvma, xlate, nlevels);
2498 
2499                 (void) PDE_set_all(immu, domain, xlate, nlevels, rdip,
2500                     IMMU_FLAGS_READ | IMMU_FLAGS_WRITE);
2501 
2502                 xlp = &xlate[1];
2503                 shwp = (hw_pdte_t *)(xlp->xlt_pgtable->hwpg_vaddr)
2504                     + xlp->xlt_idx;
2505                 on = n;
2506 
2507                 PTE_set_all(immu, domain, xlp, &dvma, &n, &immu_precookie,
2508                     1, rdip, IMMU_FLAGS_READ);
2509 
2510                 npte = on - n;
2511 
2512                 while (npte > 0) {
2513                         ihp->ihp_preptes[pindex++] = shwp;
2514 #ifdef BUGGY_DRIVERS
2515                         PDTE_CLEAR_WRITE(*shwp);
2516 #endif
2517                         shwp++;
2518                         npte--;
2519                 }
2520         }
2521 }
2522 
2523 static void
2524 dvma_prefree(dev_info_t *rdip, immu_hdl_priv_t *ihp)
2525 {
2526         domain_t *domain;
2527 
2528         domain = IMMU_DEVI(rdip)->imd_domain;
2529 
2530         if (ihp->ihp_predvma != 0) {
2531                 dvma_unmap(domain, ihp->ihp_predvma, IMMU_NPREPTES, rdip);
2532                 vmem_free(domain->dom_dvma_arena,
2533                     (void *)(uintptr_t)ihp->ihp_predvma,
2534                     IMMU_NPREPTES * IMMU_PAGESIZE);
2535         }
2536 }
2537 
2538 static void
2539 dvma_free(domain_t *domain, uint64_t dvma, uint64_t npages)
2540 {
2541         uint64_t size = npages * IMMU_PAGESIZE;
2542 
2543         if (domain->dom_maptype != IMMU_MAPTYPE_XLATE)
2544                 return;
2545 
2546         vmem_free(domain->dom_dvma_arena, (void *)(uintptr_t)dvma, size);
2547 }
2548 
2549 static int
2550 immu_map_dvmaseg(dev_info_t *rdip, ddi_dma_handle_t handle,
2551     immu_hdl_priv_t *ihp, struct ddi_dma_req *dmareq,
2552     ddi_dma_obj_t *dma_out)
2553 {
2554         domain_t *domain;
2555         immu_t *immu;
2556         immu_flags_t immu_flags;
2557         ddi_dma_atyp_t buftype;
2558         ddi_dma_obj_t *dmar_object;
2559         ddi_dma_attr_t *attrp;
2560         uint64_t offset, paddr, dvma, sdvma, rwmask;
2561         size_t npages, npgalloc;
2562         uint_t psize, size, pcnt, dmax;
2563         page_t **pparray;
2564         caddr_t vaddr;
2565         page_t *page;
2566         struct as *vas;
2567         immu_dcookie_t *dcookies;
2568         int pde_set;
2569 
2570         domain = IMMU_DEVI(rdip)->imd_domain;
2571         immu = domain->dom_immu;
2572         immu_flags = dma_to_immu_flags(dmareq);
2573 
2574         attrp = &((ddi_dma_impl_t *)handle)->dmai_attr;
2575 
2576         dmar_object = &dmareq->dmar_object;
2577         pparray = dmar_object->dmao_obj.virt_obj.v_priv;
2578         vaddr = dmar_object->dmao_obj.virt_obj.v_addr;
2579         buftype = dmar_object->dmao_type;
2580         size = dmar_object->dmao_size;
2581 
2582         IMMU_DPROBE3(immu__map__dvma, dev_info_t *, rdip, ddi_dma_atyp_t,
2583             buftype, uint_t, size);
2584 
2585         dcookies = &ihp->ihp_dcookies[0];
2586 
2587         pcnt = dmax = 0;
2588 
2589         /* retrieve paddr, psize, offset from dmareq */
2590         if (buftype == DMA_OTYP_PAGES) {
2591                 page = dmar_object->dmao_obj.pp_obj.pp_pp;
2592                 offset =  dmar_object->dmao_obj.pp_obj.pp_offset &
2593                     MMU_PAGEOFFSET;
2594                 paddr = pfn_to_pa(page->p_pagenum) + offset;
2595                 psize = MIN((MMU_PAGESIZE - offset), size);
2596                 page = page->p_next;
2597                 vas = dmar_object->dmao_obj.virt_obj.v_as;
2598         } else {
2599                 if (vas == NULL) {
2600                         vas = &kas;
2601                 }
2602                 offset = (uintptr_t)vaddr & MMU_PAGEOFFSET;
2603                 if (pparray != NULL) {
2604                         paddr = pfn_to_pa(pparray[pcnt]->p_pagenum) + offset;
2605                         psize = MIN((MMU_PAGESIZE - offset), size);
2606                         pcnt++;
2607                 } else {
2608                         paddr = pfn_to_pa(hat_getpfnum(vas->a_hat,
2609                             vaddr)) + offset;
2610                         psize = MIN(size, (MMU_PAGESIZE - offset));
2611                         vaddr += psize;
2612                 }
2613         }
2614 
2615         npgalloc = IMMU_BTOPR(size + offset);
2616 
2617         if (npgalloc <= IMMU_NPREPTES && ihp->ihp_predvma != 0) {
2618 #ifdef BUGGY_DRIVERS
2619                 rwmask = PDTE_MASK_R | PDTE_MASK_W | immu->immu_ptemask;
2620 #else
2621                 rwmask = immu->immu_ptemask;
2622                 if (immu_flags & IMMU_FLAGS_READ)
2623                         rwmask |= PDTE_MASK_R;
2624                 if (immu_flags & IMMU_FLAGS_WRITE)
2625                         rwmask |= PDTE_MASK_W;
2626 #endif
2627 #ifdef DEBUG
2628                 rwmask |= PDTE_MASK_P;
2629 #endif
2630                 sdvma = ihp->ihp_predvma;
2631                 ihp->ihp_npremapped = npgalloc;
2632                 *ihp->ihp_preptes[0] =
2633                     PDTE_PADDR(paddr & ~MMU_PAGEOFFSET) | rwmask;
2634         } else {
2635                 ihp->ihp_npremapped = 0;
2636                 sdvma = dvma_alloc(domain, attrp, npgalloc,
2637                     dmareq->dmar_fp == DDI_DMA_SLEEP ? VM_SLEEP : VM_NOSLEEP);
2638                 if (sdvma == 0)
2639                         return (DDI_DMA_NORESOURCES);
2640 
2641                 dcookies[0].dck_paddr = (paddr & ~MMU_PAGEOFFSET);
2642                 dcookies[0].dck_npages = 1;
2643         }
2644 
2645         IMMU_DPROBE3(immu__dvma__alloc, dev_info_t *, rdip, uint64_t, npgalloc,
2646             uint64_t, sdvma);
2647 
2648         dvma = sdvma;
2649         pde_set = 0;
2650         npages = 1;
2651         size -= psize;
2652         while (size > 0) {
2653                 /* get the size for this page (i.e. partial or full page) */
2654                 psize = MIN(size, MMU_PAGESIZE);
2655                 if (buftype == DMA_OTYP_PAGES) {
2656                         /* get the paddr from the page_t */
2657                         paddr = pfn_to_pa(page->p_pagenum);
2658                         page = page->p_next;
2659                 } else if (pparray != NULL) {
2660                         /* index into the array of page_t's to get the paddr */
2661                         paddr = pfn_to_pa(pparray[pcnt]->p_pagenum);
2662                         pcnt++;
2663                 } else {
2664                         /* call into the VM to get the paddr */
2665                         paddr = pfn_to_pa(hat_getpfnum(vas->a_hat, vaddr));
2666                         vaddr += psize;
2667                 }
2668 
2669                 npages++;
2670 
2671                 if (ihp->ihp_npremapped > 0) {
2672                         *ihp->ihp_preptes[npages - 1] =
2673                             PDTE_PADDR(paddr) | rwmask;
2674                 } else if (IMMU_CONTIG_PADDR(dcookies[dmax], paddr)) {
2675                         dcookies[dmax].dck_npages++;
2676                 } else {
2677                         /* No, we need a new dcookie */
2678                         if (dmax == (IMMU_NDCK - 1)) {
2679                                 /*
2680                                  * Ran out of dcookies. Map them now.
2681                                  */
2682                                 if (dvma_map(domain, dvma,
2683                                     npages, dcookies, dmax + 1, rdip,
2684                                     immu_flags))
2685                                         pde_set++;
2686 
2687                                 IMMU_DPROBE4(immu__dvmamap__early,
2688                                     dev_info_t *, rdip, uint64_t, dvma,
2689                                     uint_t, npages, uint_t, dmax+1);
2690 
2691                                 dvma += (npages << IMMU_PAGESHIFT);
2692                                 npages = 0;
2693                                 dmax = 0;
2694                         } else
2695                                 dmax++;
2696                         dcookies[dmax].dck_paddr = paddr;
2697                         dcookies[dmax].dck_npages = 1;
2698                 }
2699                 size -= psize;
2700         }
2701 
2702         /*
2703          * Finish up, mapping all, or all of the remaining,
2704          * physical memory ranges.
2705          */
2706         if (ihp->ihp_npremapped == 0 && npages > 0) {
2707                 IMMU_DPROBE4(immu__dvmamap__late, dev_info_t *, rdip, \
2708                     uint64_t, dvma, uint_t, npages, uint_t, dmax+1);
2709 
2710                 if (dvma_map(domain, dvma, npages, dcookies,
2711                     dmax + 1, rdip, immu_flags))
2712                         pde_set++;
2713         }
2714 
2715         /* Invalidate the IOTLB */
2716         immu_flush_iotlb_psi(immu, domain->dom_did, sdvma, npgalloc,
2717             pde_set > 0 ? TLB_IVA_WHOLE : TLB_IVA_LEAF,
2718             &ihp->ihp_inv_wait);
2719 
2720         ihp->ihp_ndvseg = 1;
2721         ihp->ihp_dvseg[0].dvs_start = sdvma;
2722         ihp->ihp_dvseg[0].dvs_len = dmar_object->dmao_size;
2723 
2724         dma_out->dmao_size = dmar_object->dmao_size;
2725         dma_out->dmao_obj.dvma_obj.dv_off = offset & IMMU_PAGEOFFSET;
2726         dma_out->dmao_obj.dvma_obj.dv_nseg = 1;
2727         dma_out->dmao_obj.dvma_obj.dv_seg = &ihp->ihp_dvseg[0];
2728         dma_out->dmao_type = DMA_OTYP_DVADDR;
2729 
2730         return (DDI_DMA_MAPPED);
2731 }
2732 
2733 static int
2734 immu_unmap_dvmaseg(dev_info_t *rdip, ddi_dma_obj_t *dmao)
2735 {
2736         uint64_t dvma, npages;
2737         domain_t *domain;
2738         struct dvmaseg *dvs;
2739 
2740         domain = IMMU_DEVI(rdip)->imd_domain;
2741         dvs = dmao->dmao_obj.dvma_obj.dv_seg;
2742 
2743         dvma = dvs[0].dvs_start;
2744         npages = IMMU_BTOPR(dvs[0].dvs_len + dmao->dmao_obj.dvma_obj.dv_off);
2745 
2746 #ifdef DEBUG
2747         /* Unmap only in DEBUG mode */
2748         dvma_unmap(domain, dvma, npages, rdip);
2749 #endif
2750         dvma_free(domain, dvma, npages);
2751 
2752         IMMU_DPROBE3(immu__dvma__free, dev_info_t *, rdip, uint_t, npages,
2753             uint64_t, dvma);
2754 
2755 #ifdef DEBUG
2756         /*
2757          * In the DEBUG case, the unmap was actually done,
2758          * but an IOTLB flush was not done. So, an explicit
2759          * write back flush is needed.
2760          */
2761         immu_regs_wbf_flush(domain->dom_immu);
2762 #endif
2763 
2764         return (DDI_SUCCESS);
2765 }
2766 
2767 /* ############################# Functions exported ######################## */
2768 
2769 /*
2770  * setup the DVMA subsystem
2771  * this code runs only for the first IOMMU unit
2772  */
2773 void
2774 immu_dvma_setup(list_t *listp)
2775 {
2776         immu_t *immu;
2777         uint_t kval;
2778         size_t nchains;
2779 
2780         /* locks */
2781         mutex_init(&immu_domain_lock, NULL, MUTEX_DEFAULT, NULL);
2782 
2783         /* Create lists */
2784         list_create(&immu_unity_domain_list, sizeof (domain_t),
2785             offsetof(domain_t, dom_maptype_node));
2786         list_create(&immu_xlate_domain_list, sizeof (domain_t),
2787             offsetof(domain_t, dom_maptype_node));
2788 
2789         /* Setup BDF domain hash */
2790         nchains = 0xff;
2791         kval = mod_hash_iddata_gen(nchains);
2792 
2793         bdf_domain_hash = mod_hash_create_extended("BDF-DOMAIN_HASH",
2794             nchains, mod_hash_null_keydtor, mod_hash_null_valdtor,
2795             mod_hash_byid, (void *)(uintptr_t)kval, mod_hash_idkey_cmp,
2796             KM_NOSLEEP);
2797 
2798         immu = list_head(listp);
2799         for (; immu; immu = list_next(listp, immu)) {
2800                 create_unity_domain(immu);
2801                 did_init(immu);
2802                 context_init(immu);
2803                 immu->immu_dvma_setup = B_TRUE;
2804         }
2805 }
2806 
2807 /*
2808  * Startup up one DVMA unit
2809  */
2810 void
2811 immu_dvma_startup(immu_t *immu)
2812 {
2813         if (immu_gfxdvma_enable == B_FALSE &&
2814             immu->immu_dvma_gfx_only == B_TRUE) {
2815                 return;
2816         }
2817 
2818         /*
2819          * DVMA will start once IOMMU is "running"
2820          */
2821         immu->immu_dvma_running = B_TRUE;
2822 }
2823 
2824 /*
2825  * immu_dvma_physmem_update()
2826  *       called when the installed memory on a
2827  *       system increases, to expand domain DVMA
2828  *       for domains with UNITY mapping
2829  */
2830 void
2831 immu_dvma_physmem_update(uint64_t addr, uint64_t size)
2832 {
2833         uint64_t start;
2834         uint64_t npages;
2835         int dcount;
2836         immu_dcookie_t dcookies[1] = {0};
2837         domain_t *domain;
2838 
2839         /*
2840          * Just walk the system-wide list of domains with
2841          * UNITY mapping. Both the list of *all* domains
2842          * and *UNITY* domains is protected by the same
2843          * single lock
2844          */
2845         mutex_enter(&immu_domain_lock);
2846         domain = list_head(&immu_unity_domain_list);
2847         for (; domain; domain = list_next(&immu_unity_domain_list, domain)) {
2848                 /*
2849                  * Nothing to do if the IOMMU supports passthrough.
2850                  */
2851                 if (IMMU_ECAP_GET_PT(domain->dom_immu->immu_regs_excap))
2852                         continue;
2853 
2854                 /* There is no vmem_arena for unity domains. Just map it */
2855                 ddi_err(DER_LOG, domain->dom_dip,
2856                     "iommu: unity-domain: Adding map "
2857                     "[0x%" PRIx64 " - 0x%" PRIx64 "]", addr, addr + size);
2858 
2859                 start = IMMU_ROUNDOWN(addr);
2860                 npages = (IMMU_ROUNDUP(size) / IMMU_PAGESIZE) + 1;
2861 
2862                 dcookies[0].dck_paddr = start;
2863                 dcookies[0].dck_npages = npages;
2864                 dcount = 1;
2865                 (void) dvma_map(domain, start, npages,
2866                     dcookies, dcount, NULL, IMMU_FLAGS_READ | IMMU_FLAGS_WRITE);
2867 
2868         }
2869         mutex_exit(&immu_domain_lock);
2870 }
2871 
2872 int
2873 immu_dvma_device_setup(dev_info_t *rdip, immu_flags_t immu_flags)
2874 {
2875         dev_info_t *ddip, *odip;
2876         immu_t *immu;
2877         domain_t *domain;
2878 
2879         odip = rdip;
2880 
2881         immu = immu_dvma_get_immu(rdip, immu_flags);
2882         if (immu == NULL) {
2883                 /*
2884                  * possible that there is no IOMMU unit for this device
2885                  * - BIOS bugs are one example.
2886                  */
2887                 ddi_err(DER_WARN, rdip, "No iommu unit found for device");
2888                 return (DDI_DMA_NORESOURCES);
2889         }
2890 
2891         /*
2892          * redirect isa devices attached under lpc to lpc dip
2893          */
2894         if (strcmp(ddi_node_name(ddi_get_parent(rdip)), "isa") == 0) {
2895                 rdip = get_lpc_devinfo(immu, rdip, immu_flags);
2896                 if (rdip == NULL) {
2897                         ddi_err(DER_PANIC, rdip, "iommu redirect failed");
2898                         /*NOTREACHED*/
2899                 }
2900         }
2901 
2902         /* Reset immu, as redirection can change IMMU */
2903         immu = NULL;
2904 
2905         /*
2906          * for gart, redirect to the real graphic devinfo
2907          */
2908         if (strcmp(ddi_node_name(rdip), "agpgart") == 0) {
2909                 rdip = get_gfx_devinfo(rdip);
2910                 if (rdip == NULL) {
2911                         ddi_err(DER_PANIC, rdip, "iommu redirect failed");
2912                         /*NOTREACHED*/
2913                 }
2914         }
2915 
2916         /*
2917          * Setup DVMA domain for the device. This does
2918          * work only the first time we do DVMA for a
2919          * device.
2920          */
2921         ddip = NULL;
2922         domain = device_domain(rdip, &ddip, immu_flags);
2923         if (domain == NULL) {
2924                 ddi_err(DER_MODE, rdip, "Intel IOMMU setup failed for device");
2925                 return (DDI_DMA_NORESOURCES);
2926         }
2927 
2928         immu = domain->dom_immu;
2929 
2930         /*
2931          * If a domain is found, we must also have a domain dip
2932          * which is the topmost ancestor dip of rdip that shares
2933          * the same domain with rdip.
2934          */
2935         if (domain->dom_did == 0 || ddip == NULL) {
2936                 ddi_err(DER_MODE, rdip, "domain did 0(%d) or ddip NULL(%p)",
2937                     domain->dom_did, ddip);
2938                 return (DDI_DMA_NORESOURCES);
2939         }
2940 
2941         if (odip != rdip)
2942                 set_domain(odip, ddip, domain);
2943 
2944         /*
2945          * Update the root and context entries
2946          */
2947         if (immu_context_update(immu, domain, ddip, rdip, immu_flags)
2948             != DDI_SUCCESS) {
2949                 ddi_err(DER_MODE, rdip, "DVMA map: context update failed");
2950                 return (DDI_DMA_NORESOURCES);
2951         }
2952 
2953         return (DDI_SUCCESS);
2954 }
2955 
2956 int
2957 immu_map_memrange(dev_info_t *rdip, memrng_t *mrng)
2958 {
2959         immu_dcookie_t dcookies[1] = {0};
2960         boolean_t pde_set;
2961         immu_t *immu;
2962         domain_t *domain;
2963         immu_inv_wait_t iw;
2964 
2965         dcookies[0].dck_paddr = mrng->mrng_start;
2966         dcookies[0].dck_npages = mrng->mrng_npages;
2967 
2968         domain = IMMU_DEVI(rdip)->imd_domain;
2969         immu = domain->dom_immu;
2970 
2971         pde_set = dvma_map(domain, mrng->mrng_start,
2972             mrng->mrng_npages, dcookies, 1, rdip,
2973             IMMU_FLAGS_READ | IMMU_FLAGS_WRITE);
2974 
2975         immu_init_inv_wait(&iw, "memrange", B_TRUE);
2976 
2977         immu_flush_iotlb_psi(immu, domain->dom_did, mrng->mrng_start,
2978             mrng->mrng_npages, pde_set == B_TRUE ?
2979             TLB_IVA_WHOLE : TLB_IVA_LEAF, &iw);
2980 
2981         return (DDI_SUCCESS);
2982 }
2983 
2984 immu_devi_t *
2985 immu_devi_get(dev_info_t *rdip)
2986 {
2987         immu_devi_t *immu_devi;
2988         volatile uintptr_t *vptr = (uintptr_t *)&(DEVI(rdip)->devi_iommu);
2989 
2990         /* Just want atomic reads. No need for lock */
2991         immu_devi = (immu_devi_t *)(uintptr_t)atomic_or_64_nv((uint64_t *)vptr,
2992             0);
2993         return (immu_devi);
2994 }
2995 
2996 /*ARGSUSED*/
2997 int
2998 immu_hdl_priv_ctor(void *buf, void *arg, int kmf)
2999 {
3000         immu_hdl_priv_t *ihp;
3001 
3002         ihp = buf;
3003         immu_init_inv_wait(&ihp->ihp_inv_wait, "dmahandle", B_FALSE);
3004 
3005         return (0);
3006 }
3007 
3008 /*
3009  * iommulib interface functions
3010  */
3011 static int
3012 immu_probe(iommulib_handle_t handle, dev_info_t *dip)
3013 {
3014         immu_devi_t *immu_devi;
3015         int ret;
3016 
3017         if (!immu_enable)
3018                 return (DDI_FAILURE);
3019 
3020         /*
3021          * Make sure the device has all the IOMMU structures
3022          * initialized. If this device goes through an IOMMU
3023          * unit (e.g. this probe function returns success),
3024          * this will be called at most N times, with N being
3025          * the number of IOMMUs in the system.
3026          *
3027          * After that, when iommulib_nex_open succeeds,
3028          * we can always assume that this device has all
3029          * the structures initialized. IOMMU_USED(dip) will
3030          * be true. There is no need to find the controlling
3031          * IOMMU/domain again.
3032          */
3033         ret = immu_dvma_device_setup(dip, IMMU_FLAGS_NOSLEEP);
3034         if (ret != DDI_SUCCESS)
3035                 return (ret);
3036 
3037         immu_devi = IMMU_DEVI(dip);
3038 
3039         /*
3040          * For unity domains, there is no need to call in to
3041          * the IOMMU code.
3042          */
3043         if (immu_devi->imd_domain->dom_did == IMMU_UNITY_DID)
3044                 return (DDI_FAILURE);
3045 
3046         if (immu_devi->imd_immu->immu_dip == iommulib_iommu_getdip(handle))
3047                 return (DDI_SUCCESS);
3048 
3049         return (DDI_FAILURE);
3050 }
3051 
3052 /*ARGSUSED*/
3053 static int
3054 immu_allochdl(iommulib_handle_t handle,
3055     dev_info_t *dip, dev_info_t *rdip, ddi_dma_attr_t *attr,
3056     int (*waitfp)(caddr_t), caddr_t arg, ddi_dma_handle_t *dma_handlep)
3057 {
3058         int ret;
3059         immu_hdl_priv_t *ihp;
3060         immu_t *immu;
3061 
3062         ret = iommulib_iommu_dma_allochdl(dip, rdip, attr, waitfp,
3063             arg, dma_handlep);
3064         if (ret == DDI_SUCCESS) {
3065                 immu = IMMU_DEVI(rdip)->imd_immu;
3066 
3067                 ihp = kmem_cache_alloc(immu->immu_hdl_cache,
3068                     waitfp == DDI_DMA_SLEEP ? KM_SLEEP : KM_NOSLEEP);
3069                 if (ihp == NULL) {
3070                         (void) iommulib_iommu_dma_freehdl(dip, rdip,
3071                             *dma_handlep);
3072                         return (DDI_DMA_NORESOURCES);
3073                 }
3074 
3075                 if (IMMU_DEVI(rdip)->imd_use_premap)
3076                         dvma_prealloc(rdip, ihp, attr);
3077                 else {
3078                         ihp->ihp_npremapped = 0;
3079                         ihp->ihp_predvma = 0;
3080                 }
3081                 ret = iommulib_iommu_dmahdl_setprivate(dip, rdip, *dma_handlep,
3082                     ihp);
3083         }
3084         return (ret);
3085 }
3086 
3087 /*ARGSUSED*/
3088 static int
3089 immu_freehdl(iommulib_handle_t handle,
3090     dev_info_t *dip, dev_info_t *rdip, ddi_dma_handle_t dma_handle)
3091 {
3092         immu_hdl_priv_t *ihp;
3093 
3094         ihp = iommulib_iommu_dmahdl_getprivate(dip, rdip, dma_handle);
3095         if (ihp != NULL) {
3096                 if (IMMU_DEVI(rdip)->imd_use_premap)
3097                         dvma_prefree(rdip, ihp);
3098                 kmem_cache_free(IMMU_DEVI(rdip)->imd_immu->immu_hdl_cache, ihp);
3099         }
3100 
3101         return (iommulib_iommu_dma_freehdl(dip, rdip, dma_handle));
3102 }
3103 
3104 
3105 /*ARGSUSED*/
3106 static int
3107 immu_bindhdl(iommulib_handle_t handle, dev_info_t *dip,
3108     dev_info_t *rdip, ddi_dma_handle_t dma_handle,
3109     struct ddi_dma_req *dma_req, ddi_dma_cookie_t *cookiep,
3110     uint_t *ccountp)
3111 {
3112         int ret;
3113         immu_hdl_priv_t *ihp;
3114 
3115         ret = iommulib_iommu_dma_bindhdl(dip, rdip, dma_handle,
3116             dma_req, cookiep, ccountp);
3117 
3118         if (ret == DDI_DMA_MAPPED) {
3119                 ihp = iommulib_iommu_dmahdl_getprivate(dip, rdip, dma_handle);
3120                 immu_flush_wait(IMMU_DEVI(rdip)->imd_immu, &ihp->ihp_inv_wait);
3121         }
3122 
3123         return (ret);
3124 }
3125 
3126 /*ARGSUSED*/
3127 static int
3128 immu_unbindhdl(iommulib_handle_t handle,
3129     dev_info_t *dip, dev_info_t *rdip, ddi_dma_handle_t dma_handle)
3130 {
3131         return (iommulib_iommu_dma_unbindhdl(dip, rdip, dma_handle));
3132 }
3133 
3134 /*ARGSUSED*/
3135 static int
3136 immu_sync(iommulib_handle_t handle, dev_info_t *dip,
3137     dev_info_t *rdip, ddi_dma_handle_t dma_handle, off_t off,
3138     size_t len, uint_t cachefl)
3139 {
3140         return (iommulib_iommu_dma_sync(dip, rdip, dma_handle, off, len,
3141             cachefl));
3142 }
3143 
3144 /*ARGSUSED*/
3145 static int
3146 immu_win(iommulib_handle_t handle, dev_info_t *dip,
3147     dev_info_t *rdip, ddi_dma_handle_t dma_handle, uint_t win,
3148     off_t *offp, size_t *lenp, ddi_dma_cookie_t *cookiep,
3149     uint_t *ccountp)
3150 {
3151         return (iommulib_iommu_dma_win(dip, rdip, dma_handle, win, offp,
3152             lenp, cookiep, ccountp));
3153 }
3154 
3155 /*ARGSUSED*/
3156 static int
3157 immu_mapobject(iommulib_handle_t handle, dev_info_t *dip,
3158     dev_info_t *rdip, ddi_dma_handle_t dma_handle,
3159     struct ddi_dma_req *dmareq, ddi_dma_obj_t *dmao)
3160 {
3161         immu_hdl_priv_t *ihp;
3162 
3163         ihp = iommulib_iommu_dmahdl_getprivate(dip, rdip, dma_handle);
3164 
3165         return (immu_map_dvmaseg(rdip, dma_handle, ihp, dmareq, dmao));
3166 }
3167 
3168 /*ARGSUSED*/
3169 static int
3170 immu_unmapobject(iommulib_handle_t handle, dev_info_t *dip,
3171     dev_info_t *rdip, ddi_dma_handle_t dma_handle, ddi_dma_obj_t *dmao)
3172 {
3173         immu_hdl_priv_t *ihp;
3174 
3175         ihp = iommulib_iommu_dmahdl_getprivate(dip, rdip, dma_handle);
3176         if (ihp->ihp_npremapped > 0)
3177                 return (DDI_SUCCESS);
3178         return (immu_unmap_dvmaseg(rdip, dmao));
3179 }