Print this page
PANKOVs restructure
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/uts/i86pc/io/immu_dvma.c
+++ new/usr/src/uts/i86pc/io/immu_dvma.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21 /*
22 22 * Portions Copyright (c) 2010, Oracle and/or its affiliates.
23 23 * All rights reserved.
24 24 */
25 25 /*
26 26 * Copyright (c) 2009, Intel Corporation.
27 27 * All rights reserved.
28 28 */
29 29 /*
30 30 * Copyright 2012 Garrett D'Amore <garrett@damore.org>. All rights reserved.
31 31 */
32 32
33 33 /*
↓ open down ↓ |
33 lines elided |
↑ open up ↑ |
34 34 * DVMA code
35 35 * This file contains Intel IOMMU code that deals with DVMA
36 36 * i.e. DMA remapping.
37 37 */
38 38
39 39 #include <sys/sysmacros.h>
40 40 #include <sys/pcie.h>
41 41 #include <sys/pci_cfgspace.h>
42 42 #include <vm/hat_i86.h>
43 43 #include <sys/memlist.h>
44 -#include <sys/acpi/acpi.h>
44 +#include <acpica/include/acpi.h>
45 45 #include <sys/acpica.h>
46 46 #include <sys/modhash.h>
47 47 #include <sys/immu.h>
48 48 #include <sys/x86_archext.h>
49 49 #include <sys/archsystm.h>
50 50
51 51 #undef TEST
52 52
53 53 /*
54 54 * Macros based on PCI spec
55 55 */
56 56 #define IMMU_PCI_REV2CLASS(r) ((r) >> 8) /* classcode from revid */
57 57 #define IMMU_PCI_CLASS2BASE(c) ((c) >> 16) /* baseclass from classcode */
58 58 #define IMMU_PCI_CLASS2SUB(c) (((c) >> 8) & 0xff); /* classcode */
59 59
60 60 #define IMMU_CONTIG_PADDR(d, p) \
61 61 ((d).dck_paddr && ((d).dck_paddr + IMMU_PAGESIZE) == (p))
62 62
63 63 typedef struct dvma_arg {
64 64 immu_t *dva_immu;
65 65 dev_info_t *dva_rdip;
66 66 dev_info_t *dva_ddip;
67 67 domain_t *dva_domain;
68 68 int dva_level;
69 69 immu_flags_t dva_flags;
70 70 list_t *dva_list;
71 71 int dva_error;
72 72 } dvma_arg_t;
73 73
74 74 static domain_t *domain_create(immu_t *immu, dev_info_t *ddip,
75 75 dev_info_t *rdip, immu_flags_t immu_flags);
76 76 static immu_devi_t *create_immu_devi(dev_info_t *rdip, int bus,
77 77 int dev, int func, immu_flags_t immu_flags);
78 78 static void destroy_immu_devi(immu_devi_t *immu_devi);
79 79 static boolean_t dvma_map(domain_t *domain, uint64_t sdvma,
80 80 uint64_t nvpages, immu_dcookie_t *dcookies, int dcount, dev_info_t *rdip,
81 81 immu_flags_t immu_flags);
82 82
83 83 /* Extern globals */
84 84 extern struct memlist *phys_install;
85 85
86 86 /*
87 87 * iommulib interface functions.
88 88 */
89 89 static int immu_probe(iommulib_handle_t unitp, dev_info_t *dip);
90 90 static int immu_allochdl(iommulib_handle_t handle,
91 91 dev_info_t *dip, dev_info_t *rdip, ddi_dma_attr_t *attr,
92 92 int (*waitfp)(caddr_t), caddr_t arg, ddi_dma_handle_t *dma_handlep);
93 93 static int immu_freehdl(iommulib_handle_t handle,
94 94 dev_info_t *dip, dev_info_t *rdip, ddi_dma_handle_t dma_handle);
95 95 static int immu_bindhdl(iommulib_handle_t handle, dev_info_t *dip,
96 96 dev_info_t *rdip, ddi_dma_handle_t dma_handle, struct ddi_dma_req *dma_req,
97 97 ddi_dma_cookie_t *cookiep, uint_t *ccountp);
98 98 static int immu_unbindhdl(iommulib_handle_t handle,
99 99 dev_info_t *dip, dev_info_t *rdip, ddi_dma_handle_t dma_handle);
100 100 static int immu_sync(iommulib_handle_t handle, dev_info_t *dip,
101 101 dev_info_t *rdip, ddi_dma_handle_t dma_handle, off_t off, size_t len,
102 102 uint_t cachefl);
103 103 static int immu_win(iommulib_handle_t handle, dev_info_t *dip,
104 104 dev_info_t *rdip, ddi_dma_handle_t dma_handle, uint_t win,
105 105 off_t *offp, size_t *lenp, ddi_dma_cookie_t *cookiep, uint_t *ccountp);
106 106 static int immu_mapobject(iommulib_handle_t handle, dev_info_t *dip,
107 107 dev_info_t *rdip, ddi_dma_handle_t dma_handle,
108 108 struct ddi_dma_req *dmareq, ddi_dma_obj_t *dmao);
109 109 static int immu_unmapobject(iommulib_handle_t handle, dev_info_t *dip,
110 110 dev_info_t *rdip, ddi_dma_handle_t dma_handle, ddi_dma_obj_t *dmao);
111 111
112 112 /* static Globals */
113 113
114 114 /*
115 115 * Used to setup DMA objects (memory regions)
116 116 * for DMA reads by IOMMU units
117 117 */
118 118 static ddi_dma_attr_t immu_dma_attr = {
119 119 DMA_ATTR_V0,
120 120 0U,
121 121 0xffffffffffffffffULL,
122 122 0xffffffffU,
123 123 MMU_PAGESIZE, /* MMU page aligned */
124 124 0x1,
125 125 0x1,
126 126 0xffffffffU,
127 127 0xffffffffffffffffULL,
128 128 1,
129 129 4,
130 130 0
131 131 };
132 132
133 133 static ddi_device_acc_attr_t immu_acc_attr = {
134 134 DDI_DEVICE_ATTR_V0,
135 135 DDI_NEVERSWAP_ACC,
136 136 DDI_STRICTORDER_ACC
137 137 };
138 138
139 139 struct iommulib_ops immulib_ops = {
140 140 IOMMU_OPS_VERSION,
141 141 INTEL_IOMMU,
142 142 "Intel IOMMU",
143 143 NULL,
144 144 immu_probe,
145 145 immu_allochdl,
146 146 immu_freehdl,
147 147 immu_bindhdl,
148 148 immu_unbindhdl,
149 149 immu_sync,
150 150 immu_win,
151 151 immu_mapobject,
152 152 immu_unmapobject,
153 153 };
154 154
155 155 /*
156 156 * Fake physical address range used to set up initial prealloc mappings.
157 157 * This memory is never actually accessed. It is mapped read-only,
158 158 * and is overwritten as soon as the first DMA bind operation is
159 159 * performed. Since 0 is a special case, just start at the 2nd
160 160 * physical page.
161 161 */
162 162
163 163 static immu_dcookie_t immu_precookie = { MMU_PAGESIZE, IMMU_NPREPTES };
164 164
165 165 /* globals private to this file */
166 166 static kmutex_t immu_domain_lock;
167 167 static list_t immu_unity_domain_list;
168 168 static list_t immu_xlate_domain_list;
169 169
170 170 /* structure used to store idx into each level of the page tables */
171 171 typedef struct xlate {
172 172 int xlt_level;
173 173 uint_t xlt_idx;
174 174 pgtable_t *xlt_pgtable;
175 175 } xlate_t;
176 176
177 177 /* 0 is reserved by Vt-d spec. Solaris reserves 1 */
178 178 #define IMMU_UNITY_DID 1
179 179
180 180 static mod_hash_t *bdf_domain_hash;
181 181
182 182 int immu_use_alh;
183 183 int immu_use_tm;
184 184
185 185 static domain_t *
186 186 bdf_domain_lookup(immu_devi_t *immu_devi)
187 187 {
188 188 domain_t *domain;
189 189 int16_t seg = immu_devi->imd_seg;
190 190 int16_t bus = immu_devi->imd_bus;
191 191 int16_t devfunc = immu_devi->imd_devfunc;
192 192 uintptr_t bdf = (seg << 16 | bus << 8 | devfunc);
193 193
194 194 if (seg < 0 || bus < 0 || devfunc < 0) {
195 195 return (NULL);
196 196 }
197 197
198 198 domain = NULL;
199 199 if (mod_hash_find(bdf_domain_hash,
200 200 (void *)bdf, (void *)&domain) == 0) {
201 201 ASSERT(domain);
202 202 ASSERT(domain->dom_did > 0);
203 203 return (domain);
204 204 } else {
205 205 return (NULL);
206 206 }
207 207 }
208 208
209 209 static void
210 210 bdf_domain_insert(immu_devi_t *immu_devi, domain_t *domain)
211 211 {
212 212 int16_t seg = immu_devi->imd_seg;
213 213 int16_t bus = immu_devi->imd_bus;
214 214 int16_t devfunc = immu_devi->imd_devfunc;
215 215 uintptr_t bdf = (seg << 16 | bus << 8 | devfunc);
216 216
217 217 if (seg < 0 || bus < 0 || devfunc < 0) {
218 218 return;
219 219 }
220 220
221 221 (void) mod_hash_insert(bdf_domain_hash, (void *)bdf, (void *)domain);
222 222 }
223 223
224 224 static int
225 225 match_lpc(dev_info_t *pdip, void *arg)
226 226 {
227 227 immu_devi_t *immu_devi;
228 228 dvma_arg_t *dvap = (dvma_arg_t *)arg;
229 229
230 230 if (list_is_empty(dvap->dva_list)) {
231 231 return (DDI_WALK_TERMINATE);
232 232 }
233 233
234 234 immu_devi = list_head(dvap->dva_list);
235 235 for (; immu_devi; immu_devi = list_next(dvap->dva_list,
236 236 immu_devi)) {
237 237 if (immu_devi->imd_dip == pdip) {
238 238 dvap->dva_ddip = pdip;
239 239 dvap->dva_error = DDI_SUCCESS;
240 240 return (DDI_WALK_TERMINATE);
241 241 }
242 242 }
243 243
244 244 return (DDI_WALK_CONTINUE);
245 245 }
246 246
247 247 static void
248 248 immu_devi_set_spclist(dev_info_t *dip, immu_t *immu)
249 249 {
250 250 list_t *spclist = NULL;
251 251 immu_devi_t *immu_devi;
252 252
253 253 immu_devi = IMMU_DEVI(dip);
254 254 if (immu_devi->imd_display == B_TRUE) {
255 255 spclist = &(immu->immu_dvma_gfx_list);
256 256 } else if (immu_devi->imd_lpc == B_TRUE) {
257 257 spclist = &(immu->immu_dvma_lpc_list);
258 258 }
259 259
260 260 if (spclist) {
261 261 mutex_enter(&(immu->immu_lock));
262 262 list_insert_head(spclist, immu_devi);
263 263 mutex_exit(&(immu->immu_lock));
264 264 }
265 265 }
266 266
267 267 /*
268 268 * Set the immu_devi struct in the immu_devi field of a devinfo node
269 269 */
270 270 int
271 271 immu_devi_set(dev_info_t *dip, immu_flags_t immu_flags)
272 272 {
273 273 int bus, dev, func;
274 274 immu_devi_t *new_imd;
275 275 immu_devi_t *immu_devi;
276 276
277 277 immu_devi = immu_devi_get(dip);
278 278 if (immu_devi != NULL) {
279 279 return (DDI_SUCCESS);
280 280 }
281 281
282 282 bus = dev = func = -1;
283 283
284 284 /*
285 285 * Assume a new immu_devi struct is needed
286 286 */
287 287 if (!DEVI_IS_PCI(dip) || acpica_get_bdf(dip, &bus, &dev, &func) != 0) {
288 288 /*
289 289 * No BDF. Set bus = -1 to indicate this.
290 290 * We still need to create a immu_devi struct
291 291 * though
292 292 */
293 293 bus = -1;
294 294 dev = 0;
295 295 func = 0;
296 296 }
297 297
298 298 new_imd = create_immu_devi(dip, bus, dev, func, immu_flags);
299 299 if (new_imd == NULL) {
300 300 ddi_err(DER_WARN, dip, "Failed to create immu_devi "
301 301 "structure");
302 302 return (DDI_FAILURE);
303 303 }
304 304
305 305 /*
306 306 * Check if some other thread allocated a immu_devi while we
307 307 * didn't own the lock.
308 308 */
309 309 mutex_enter(&(DEVI(dip)->devi_lock));
310 310 if (IMMU_DEVI(dip) == NULL) {
311 311 IMMU_DEVI_SET(dip, new_imd);
312 312 } else {
313 313 destroy_immu_devi(new_imd);
314 314 }
315 315 mutex_exit(&(DEVI(dip)->devi_lock));
316 316
317 317 return (DDI_SUCCESS);
318 318 }
319 319
320 320 static dev_info_t *
321 321 get_lpc_devinfo(immu_t *immu, dev_info_t *rdip, immu_flags_t immu_flags)
322 322 {
323 323 dvma_arg_t dvarg = {0};
324 324 dvarg.dva_list = &(immu->immu_dvma_lpc_list);
325 325 dvarg.dva_rdip = rdip;
326 326 dvarg.dva_error = DDI_FAILURE;
327 327
328 328 if (immu_walk_ancestor(rdip, NULL, match_lpc,
329 329 &dvarg, NULL, immu_flags) != DDI_SUCCESS) {
330 330 ddi_err(DER_MODE, rdip, "Could not walk ancestors to "
331 331 "find lpc_devinfo for ISA device");
332 332 return (NULL);
333 333 }
334 334
335 335 if (dvarg.dva_error != DDI_SUCCESS || dvarg.dva_ddip == NULL) {
336 336 ddi_err(DER_MODE, rdip, "Could not find lpc_devinfo for "
337 337 "ISA device");
338 338 return (NULL);
339 339 }
340 340
341 341 return (dvarg.dva_ddip);
342 342 }
343 343
344 344 static dev_info_t *
345 345 get_gfx_devinfo(dev_info_t *rdip)
346 346 {
347 347 immu_t *immu;
348 348 immu_devi_t *immu_devi;
349 349 list_t *list_gfx;
350 350
351 351 /*
352 352 * The GFX device may not be on the same iommu unit as "agpgart"
353 353 * so search globally
354 354 */
355 355 immu_devi = NULL;
356 356 immu = list_head(&immu_list);
357 357 for (; immu; immu = list_next(&immu_list, immu)) {
358 358 list_gfx = &(immu->immu_dvma_gfx_list);
359 359 if (!list_is_empty(list_gfx)) {
360 360 immu_devi = list_head(list_gfx);
361 361 break;
362 362 }
363 363 }
364 364
365 365 if (immu_devi == NULL) {
366 366 ddi_err(DER_WARN, rdip, "iommu: No GFX device. "
367 367 "Cannot redirect agpgart");
368 368 return (NULL);
369 369 }
370 370
371 371 ddi_err(DER_LOG, rdip, "iommu: GFX redirect to %s",
372 372 ddi_node_name(immu_devi->imd_dip));
373 373
374 374 return (immu_devi->imd_dip);
375 375 }
376 376
377 377 static immu_flags_t
378 378 dma_to_immu_flags(struct ddi_dma_req *dmareq)
379 379 {
380 380 immu_flags_t flags = 0;
381 381
382 382 if (dmareq->dmar_fp == DDI_DMA_SLEEP) {
383 383 flags |= IMMU_FLAGS_SLEEP;
384 384 } else {
385 385 flags |= IMMU_FLAGS_NOSLEEP;
386 386 }
387 387
388 388 #ifdef BUGGY_DRIVERS
389 389
390 390 flags |= (IMMU_FLAGS_READ | IMMU_FLAGS_WRITE);
391 391
392 392 #else
393 393 /*
394 394 * Read and write flags need to be reversed.
395 395 * DMA_READ means read from device and write
396 396 * to memory. So DMA read means DVMA write.
397 397 */
398 398 if (dmareq->dmar_flags & DDI_DMA_READ)
399 399 flags |= IMMU_FLAGS_WRITE;
400 400
401 401 if (dmareq->dmar_flags & DDI_DMA_WRITE)
402 402 flags |= IMMU_FLAGS_READ;
403 403
404 404 /*
405 405 * Some buggy drivers specify neither READ or WRITE
406 406 * For such drivers set both read and write permissions
407 407 */
408 408 if ((dmareq->dmar_flags & (DDI_DMA_READ | DDI_DMA_WRITE)) == 0) {
409 409 flags |= (IMMU_FLAGS_READ | IMMU_FLAGS_WRITE);
410 410 }
411 411 #endif
412 412
413 413 return (flags);
414 414 }
415 415
416 416 /*ARGSUSED*/
417 417 int
418 418 pgtable_ctor(void *buf, void *arg, int kmflag)
419 419 {
420 420 size_t actual_size = 0;
421 421 pgtable_t *pgtable;
422 422 int (*dmafp)(caddr_t);
423 423 caddr_t vaddr;
424 424 void *next;
425 425 uint_t flags;
426 426 immu_t *immu = arg;
427 427
428 428 pgtable = (pgtable_t *)buf;
429 429
430 430 dmafp = (kmflag & KM_NOSLEEP) ? DDI_DMA_DONTWAIT : DDI_DMA_SLEEP;
431 431
432 432 next = kmem_zalloc(IMMU_PAGESIZE, kmflag);
433 433 if (next == NULL) {
434 434 return (-1);
435 435 }
436 436
437 437 if (ddi_dma_alloc_handle(root_devinfo, &immu_dma_attr,
438 438 dmafp, NULL, &pgtable->hwpg_dmahdl) != DDI_SUCCESS) {
439 439 kmem_free(next, IMMU_PAGESIZE);
440 440 return (-1);
441 441 }
442 442
443 443 flags = DDI_DMA_CONSISTENT;
444 444 if (!immu->immu_dvma_coherent)
445 445 flags |= IOMEM_DATA_UC_WR_COMBINE;
446 446
447 447 if (ddi_dma_mem_alloc(pgtable->hwpg_dmahdl, IMMU_PAGESIZE,
448 448 &immu_acc_attr, flags,
449 449 dmafp, NULL, &vaddr, &actual_size,
450 450 &pgtable->hwpg_memhdl) != DDI_SUCCESS) {
451 451 ddi_dma_free_handle(&pgtable->hwpg_dmahdl);
452 452 kmem_free(next, IMMU_PAGESIZE);
453 453 return (-1);
454 454 }
455 455
456 456 /*
457 457 * Memory allocation failure. Maybe a temporary condition
458 458 * so return error rather than panic, so we can try again
459 459 */
460 460 if (actual_size < IMMU_PAGESIZE) {
461 461 ddi_dma_mem_free(&pgtable->hwpg_memhdl);
462 462 ddi_dma_free_handle(&pgtable->hwpg_dmahdl);
463 463 kmem_free(next, IMMU_PAGESIZE);
464 464 return (-1);
465 465 }
466 466
467 467 pgtable->hwpg_paddr = pfn_to_pa(hat_getpfnum(kas.a_hat, vaddr));
468 468 pgtable->hwpg_vaddr = vaddr;
469 469 pgtable->swpg_next_array = next;
470 470
471 471 rw_init(&(pgtable->swpg_rwlock), NULL, RW_DEFAULT, NULL);
472 472
473 473 return (0);
474 474 }
475 475
476 476 /*ARGSUSED*/
477 477 void
478 478 pgtable_dtor(void *buf, void *arg)
479 479 {
480 480 pgtable_t *pgtable;
481 481
482 482 pgtable = (pgtable_t *)buf;
483 483
484 484 /* destroy will panic if lock is held. */
485 485 rw_destroy(&(pgtable->swpg_rwlock));
486 486
487 487 ddi_dma_mem_free(&pgtable->hwpg_memhdl);
488 488 ddi_dma_free_handle(&pgtable->hwpg_dmahdl);
489 489 kmem_free(pgtable->swpg_next_array, IMMU_PAGESIZE);
490 490 }
491 491
492 492 /*
493 493 * pgtable_alloc()
494 494 * alloc a IOMMU pgtable structure.
495 495 * This same struct is used for root and context tables as well.
496 496 * This routine allocs the f/ollowing:
497 497 * - a pgtable_t struct
498 498 * - a HW page which holds PTEs/entries which is accesssed by HW
499 499 * so we set up DMA for this page
500 500 * - a SW page which is only for our bookeeping
501 501 * (for example to hold pointers to the next level pgtable).
502 502 * So a simple kmem_alloc suffices
503 503 */
504 504 static pgtable_t *
505 505 pgtable_alloc(immu_t *immu, immu_flags_t immu_flags)
506 506 {
507 507 pgtable_t *pgtable;
508 508 int kmflags;
509 509
510 510 kmflags = (immu_flags & IMMU_FLAGS_NOSLEEP) ? KM_NOSLEEP : KM_SLEEP;
511 511
512 512 pgtable = kmem_cache_alloc(immu->immu_pgtable_cache, kmflags);
513 513 if (pgtable == NULL) {
514 514 return (NULL);
515 515 }
516 516 return (pgtable);
517 517 }
518 518
519 519 static void
520 520 pgtable_zero(pgtable_t *pgtable)
521 521 {
522 522 bzero(pgtable->hwpg_vaddr, IMMU_PAGESIZE);
523 523 bzero(pgtable->swpg_next_array, IMMU_PAGESIZE);
524 524 }
525 525
526 526 static void
527 527 pgtable_free(immu_t *immu, pgtable_t *pgtable)
528 528 {
529 529 kmem_cache_free(immu->immu_pgtable_cache, pgtable);
530 530 }
531 531
532 532 /*
533 533 * Function to identify a display device from the PCI class code
534 534 */
535 535 static boolean_t
536 536 device_is_display(uint_t classcode)
537 537 {
538 538 static uint_t disp_classes[] = {
539 539 0x000100,
540 540 0x030000,
541 541 0x030001
542 542 };
543 543 int i, nclasses = sizeof (disp_classes) / sizeof (uint_t);
544 544
545 545 for (i = 0; i < nclasses; i++) {
546 546 if (classcode == disp_classes[i])
547 547 return (B_TRUE);
548 548 }
549 549 return (B_FALSE);
550 550 }
551 551
552 552 /*
553 553 * Function that determines if device is PCIEX and/or PCIEX bridge
554 554 */
555 555 static boolean_t
556 556 device_is_pciex(
557 557 uchar_t bus, uchar_t dev, uchar_t func, boolean_t *is_pcib)
558 558 {
559 559 ushort_t cap;
560 560 ushort_t capsp;
561 561 ushort_t cap_count = PCI_CAP_MAX_PTR;
562 562 ushort_t status;
563 563 boolean_t is_pciex = B_FALSE;
564 564
565 565 *is_pcib = B_FALSE;
566 566
567 567 status = pci_getw_func(bus, dev, func, PCI_CONF_STAT);
568 568 if (!(status & PCI_STAT_CAP))
569 569 return (B_FALSE);
570 570
571 571 capsp = pci_getb_func(bus, dev, func, PCI_CONF_CAP_PTR);
572 572 while (cap_count-- && capsp >= PCI_CAP_PTR_OFF) {
573 573 capsp &= PCI_CAP_PTR_MASK;
574 574 cap = pci_getb_func(bus, dev, func, capsp);
575 575
576 576 if (cap == PCI_CAP_ID_PCI_E) {
577 577 status = pci_getw_func(bus, dev, func, capsp + 2);
578 578 /*
579 579 * See section 7.8.2 of PCI-Express Base Spec v1.0a
580 580 * for Device/Port Type.
581 581 * PCIE_PCIECAP_DEV_TYPE_PCIE2PCI implies that the
582 582 * device is a PCIE2PCI bridge
583 583 */
584 584 *is_pcib =
585 585 ((status & PCIE_PCIECAP_DEV_TYPE_MASK) ==
586 586 PCIE_PCIECAP_DEV_TYPE_PCIE2PCI) ? B_TRUE : B_FALSE;
587 587 is_pciex = B_TRUE;
588 588 }
589 589
590 590 capsp = (*pci_getb_func)(bus, dev, func,
591 591 capsp + PCI_CAP_NEXT_PTR);
592 592 }
593 593
594 594 return (is_pciex);
595 595 }
596 596
597 597 static boolean_t
598 598 device_use_premap(uint_t classcode)
599 599 {
600 600 if (IMMU_PCI_CLASS2BASE(classcode) == PCI_CLASS_NET)
601 601 return (B_TRUE);
602 602 return (B_FALSE);
603 603 }
604 604
605 605
606 606 /*
607 607 * immu_dvma_get_immu()
608 608 * get the immu unit structure for a dev_info node
609 609 */
610 610 immu_t *
611 611 immu_dvma_get_immu(dev_info_t *dip, immu_flags_t immu_flags)
612 612 {
613 613 immu_devi_t *immu_devi;
614 614 immu_t *immu;
615 615
616 616 /*
617 617 * check if immu unit was already found earlier.
618 618 * If yes, then it will be stashed in immu_devi struct.
619 619 */
620 620 immu_devi = immu_devi_get(dip);
621 621 if (immu_devi == NULL) {
622 622 if (immu_devi_set(dip, immu_flags) != DDI_SUCCESS) {
623 623 /*
624 624 * May fail because of low memory. Return error rather
625 625 * than panic as we want driver to rey again later
626 626 */
627 627 ddi_err(DER_PANIC, dip, "immu_dvma_get_immu: "
628 628 "No immu_devi structure");
629 629 /*NOTREACHED*/
630 630 }
631 631 immu_devi = immu_devi_get(dip);
632 632 }
633 633
634 634 mutex_enter(&(DEVI(dip)->devi_lock));
635 635 if (immu_devi->imd_immu) {
636 636 immu = immu_devi->imd_immu;
637 637 mutex_exit(&(DEVI(dip)->devi_lock));
638 638 return (immu);
639 639 }
640 640 mutex_exit(&(DEVI(dip)->devi_lock));
641 641
642 642 immu = immu_dmar_get_immu(dip);
643 643 if (immu == NULL) {
644 644 ddi_err(DER_PANIC, dip, "immu_dvma_get_immu: "
645 645 "Cannot find immu_t for device");
646 646 /*NOTREACHED*/
647 647 }
648 648
649 649 /*
650 650 * Check if some other thread found immu
651 651 * while lock was not held
652 652 */
653 653 immu_devi = immu_devi_get(dip);
654 654 /* immu_devi should be present as we found it earlier */
655 655 if (immu_devi == NULL) {
656 656 ddi_err(DER_PANIC, dip,
657 657 "immu_dvma_get_immu: No immu_devi structure");
658 658 /*NOTREACHED*/
659 659 }
660 660
661 661 mutex_enter(&(DEVI(dip)->devi_lock));
662 662 if (immu_devi->imd_immu == NULL) {
663 663 /* nobody else set it, so we should do it */
664 664 immu_devi->imd_immu = immu;
665 665 immu_devi_set_spclist(dip, immu);
666 666 } else {
667 667 /*
668 668 * if some other thread got immu before
669 669 * us, it should get the same results
670 670 */
671 671 if (immu_devi->imd_immu != immu) {
672 672 ddi_err(DER_PANIC, dip, "Multiple "
673 673 "immu units found for device. Expected (%p), "
674 674 "actual (%p)", (void *)immu,
675 675 (void *)immu_devi->imd_immu);
676 676 mutex_exit(&(DEVI(dip)->devi_lock));
677 677 /*NOTREACHED*/
678 678 }
679 679 }
680 680 mutex_exit(&(DEVI(dip)->devi_lock));
681 681
682 682 return (immu);
683 683 }
684 684
685 685
686 686 /* ############################# IMMU_DEVI code ############################ */
687 687
688 688 /*
689 689 * Allocate a immu_devi structure and initialize it
690 690 */
691 691 static immu_devi_t *
692 692 create_immu_devi(dev_info_t *rdip, int bus, int dev, int func,
693 693 immu_flags_t immu_flags)
694 694 {
695 695 uchar_t baseclass, subclass;
696 696 uint_t classcode, revclass;
697 697 immu_devi_t *immu_devi;
698 698 boolean_t pciex = B_FALSE;
699 699 int kmflags;
700 700 boolean_t is_pcib = B_FALSE;
701 701
702 702 /* bus == -1 indicate non-PCI device (no BDF) */
703 703 ASSERT(bus == -1 || bus >= 0);
704 704 ASSERT(dev >= 0);
705 705 ASSERT(func >= 0);
706 706
707 707 kmflags = (immu_flags & IMMU_FLAGS_NOSLEEP) ? KM_NOSLEEP : KM_SLEEP;
708 708 immu_devi = kmem_zalloc(sizeof (immu_devi_t), kmflags);
709 709 if (immu_devi == NULL) {
710 710 ddi_err(DER_WARN, rdip, "Failed to allocate memory for "
711 711 "Intel IOMMU immu_devi structure");
712 712 return (NULL);
713 713 }
714 714 immu_devi->imd_dip = rdip;
715 715 immu_devi->imd_seg = 0; /* Currently seg can only be 0 */
716 716 immu_devi->imd_bus = bus;
717 717 immu_devi->imd_pcib_type = IMMU_PCIB_BAD;
718 718
719 719 if (bus == -1) {
720 720 immu_devi->imd_pcib_type = IMMU_PCIB_NOBDF;
721 721 return (immu_devi);
722 722 }
723 723
724 724 immu_devi->imd_devfunc = IMMU_PCI_DEVFUNC(dev, func);
725 725 immu_devi->imd_sec = 0;
726 726 immu_devi->imd_sub = 0;
727 727
728 728 revclass = pci_getl_func(bus, dev, func, PCI_CONF_REVID);
729 729
730 730 classcode = IMMU_PCI_REV2CLASS(revclass);
731 731 baseclass = IMMU_PCI_CLASS2BASE(classcode);
732 732 subclass = IMMU_PCI_CLASS2SUB(classcode);
733 733
734 734 if (baseclass == PCI_CLASS_BRIDGE && subclass == PCI_BRIDGE_PCI) {
735 735
736 736 immu_devi->imd_sec = pci_getb_func(bus, dev, func,
737 737 PCI_BCNF_SECBUS);
738 738 immu_devi->imd_sub = pci_getb_func(bus, dev, func,
739 739 PCI_BCNF_SUBBUS);
740 740
741 741 pciex = device_is_pciex(bus, dev, func, &is_pcib);
742 742 if (pciex == B_TRUE && is_pcib == B_TRUE) {
743 743 immu_devi->imd_pcib_type = IMMU_PCIB_PCIE_PCI;
744 744 } else if (pciex == B_TRUE) {
745 745 immu_devi->imd_pcib_type = IMMU_PCIB_PCIE_PCIE;
746 746 } else {
747 747 immu_devi->imd_pcib_type = IMMU_PCIB_PCI_PCI;
748 748 }
749 749 } else {
750 750 immu_devi->imd_pcib_type = IMMU_PCIB_ENDPOINT;
751 751 }
752 752
753 753 /* check for certain special devices */
754 754 immu_devi->imd_display = device_is_display(classcode);
755 755 immu_devi->imd_lpc = ((baseclass == PCI_CLASS_BRIDGE) &&
756 756 (subclass == PCI_BRIDGE_ISA)) ? B_TRUE : B_FALSE;
757 757 immu_devi->imd_use_premap = device_use_premap(classcode);
758 758
759 759 immu_devi->imd_domain = NULL;
760 760
761 761 immu_devi->imd_dvma_flags = immu_global_dvma_flags;
762 762
763 763 return (immu_devi);
764 764 }
765 765
766 766 static void
767 767 destroy_immu_devi(immu_devi_t *immu_devi)
768 768 {
769 769 kmem_free(immu_devi, sizeof (immu_devi_t));
770 770 }
771 771
772 772 static domain_t *
773 773 immu_devi_domain(dev_info_t *rdip, dev_info_t **ddipp)
774 774 {
775 775 immu_devi_t *immu_devi;
776 776 domain_t *domain;
777 777 dev_info_t *ddip;
778 778
779 779 *ddipp = NULL;
780 780
781 781 immu_devi = immu_devi_get(rdip);
782 782 if (immu_devi == NULL) {
783 783 return (NULL);
784 784 }
785 785
786 786 mutex_enter(&(DEVI(rdip)->devi_lock));
787 787 domain = immu_devi->imd_domain;
788 788 ddip = immu_devi->imd_ddip;
789 789 mutex_exit(&(DEVI(rdip)->devi_lock));
790 790
791 791 if (domain)
792 792 *ddipp = ddip;
793 793
794 794 return (domain);
795 795
796 796 }
797 797
798 798 /* ############################# END IMMU_DEVI code ######################## */
799 799 /* ############################# DOMAIN code ############################### */
800 800
801 801 /*
802 802 * This routine always succeeds
803 803 */
804 804 static int
805 805 did_alloc(immu_t *immu, dev_info_t *rdip,
806 806 dev_info_t *ddip, immu_flags_t immu_flags)
807 807 {
808 808 int did;
809 809
810 810 did = (uintptr_t)vmem_alloc(immu->immu_did_arena, 1,
811 811 (immu_flags & IMMU_FLAGS_NOSLEEP) ? VM_NOSLEEP : VM_SLEEP);
812 812
813 813 if (did == 0) {
814 814 ddi_err(DER_WARN, rdip, "device domain-id alloc error"
815 815 " domain-device: %s%d. immu unit is %s. Using "
816 816 "unity domain with domain-id (%d)",
817 817 ddi_driver_name(ddip), ddi_get_instance(ddip),
818 818 immu->immu_name, immu->immu_unity_domain->dom_did);
819 819 did = immu->immu_unity_domain->dom_did;
820 820 }
821 821
822 822 return (did);
823 823 }
824 824
825 825 static int
826 826 get_branch_domain(dev_info_t *pdip, void *arg)
827 827 {
828 828 immu_devi_t *immu_devi;
829 829 domain_t *domain;
830 830 dev_info_t *ddip;
831 831 immu_t *immu;
832 832 dvma_arg_t *dvp = (dvma_arg_t *)arg;
833 833
834 834 /*
835 835 * The field dvp->dva_rdip is a work-in-progress
836 836 * and gets updated as we walk up the ancestor
837 837 * tree. The final ddip is set only when we reach
838 838 * the top of the tree. So the dvp->dva_ddip field cannot
839 839 * be relied on until we reach the top of the field.
840 840 */
841 841
842 842 /* immu_devi may not be set. */
843 843 immu_devi = immu_devi_get(pdip);
844 844 if (immu_devi == NULL) {
845 845 if (immu_devi_set(pdip, dvp->dva_flags) != DDI_SUCCESS) {
846 846 dvp->dva_error = DDI_FAILURE;
847 847 return (DDI_WALK_TERMINATE);
848 848 }
849 849 }
850 850
851 851 immu_devi = immu_devi_get(pdip);
852 852 immu = immu_devi->imd_immu;
853 853 if (immu == NULL)
854 854 immu = immu_dvma_get_immu(pdip, dvp->dva_flags);
855 855
856 856 /*
857 857 * If we encounter a PCIE_PCIE bridge *ANCESTOR* we need to
858 858 * terminate the walk (since the device under the PCIE bridge
859 859 * is a PCIE device and has an independent entry in the
860 860 * root/context table)
861 861 */
862 862 if (dvp->dva_rdip != pdip &&
863 863 immu_devi->imd_pcib_type == IMMU_PCIB_PCIE_PCIE) {
864 864 return (DDI_WALK_TERMINATE);
865 865 }
866 866
867 867 /*
868 868 * In order to be a domain-dim, it must be a PCI device i.e.
869 869 * must have valid BDF. This also eliminates the root complex.
870 870 */
871 871 if (immu_devi->imd_pcib_type != IMMU_PCIB_BAD &&
872 872 immu_devi->imd_pcib_type != IMMU_PCIB_NOBDF) {
873 873 ASSERT(immu_devi->imd_bus >= 0);
874 874 ASSERT(immu_devi->imd_devfunc >= 0);
875 875 dvp->dva_ddip = pdip;
876 876 }
877 877
878 878 if (immu_devi->imd_display == B_TRUE ||
879 879 (dvp->dva_flags & IMMU_FLAGS_UNITY)) {
880 880 dvp->dva_domain = immu->immu_unity_domain;
881 881 /* continue walking to find ddip */
882 882 return (DDI_WALK_CONTINUE);
883 883 }
884 884
885 885 mutex_enter(&(DEVI(pdip)->devi_lock));
886 886 domain = immu_devi->imd_domain;
887 887 ddip = immu_devi->imd_ddip;
888 888 mutex_exit(&(DEVI(pdip)->devi_lock));
889 889
890 890 if (domain && ddip) {
891 891 /* if domain is set, it must be the same */
892 892 if (dvp->dva_domain) {
893 893 ASSERT(domain == dvp->dva_domain);
894 894 }
895 895 dvp->dva_domain = domain;
896 896 dvp->dva_ddip = ddip;
897 897 return (DDI_WALK_TERMINATE);
898 898 }
899 899
900 900 /* Domain may already be set, continue walking so that ddip gets set */
901 901 if (dvp->dva_domain) {
902 902 return (DDI_WALK_CONTINUE);
903 903 }
904 904
905 905 /* domain is not set in either immu_devi or dvp */
906 906 domain = bdf_domain_lookup(immu_devi);
907 907 if (domain == NULL) {
908 908 return (DDI_WALK_CONTINUE);
909 909 }
910 910
911 911 /* ok, the BDF hash had a domain for this BDF. */
912 912
913 913 /* Grab lock again to check if something else set immu_devi fields */
914 914 mutex_enter(&(DEVI(pdip)->devi_lock));
915 915 if (immu_devi->imd_domain != NULL) {
916 916 dvp->dva_domain = domain;
917 917 } else {
918 918 dvp->dva_domain = domain;
919 919 }
920 920 mutex_exit(&(DEVI(pdip)->devi_lock));
921 921
922 922 /*
923 923 * walk upwards until the topmost PCI bridge is found
924 924 */
925 925 return (DDI_WALK_CONTINUE);
926 926
927 927 }
928 928
929 929 static void
930 930 map_unity_domain(domain_t *domain)
931 931 {
932 932 struct memlist *mp;
933 933 uint64_t start;
934 934 uint64_t npages;
935 935 immu_dcookie_t dcookies[1] = {0};
936 936 int dcount = 0;
937 937
938 938 /*
939 939 * UNITY arenas are a mirror of the physical memory
940 940 * installed on the system.
941 941 */
942 942
943 943 #ifdef BUGGY_DRIVERS
944 944 /*
945 945 * Dont skip page0. Some broken HW/FW access it.
946 946 */
947 947 dcookies[0].dck_paddr = 0;
948 948 dcookies[0].dck_npages = 1;
949 949 dcount = 1;
950 950 (void) dvma_map(domain, 0, 1, dcookies, dcount, NULL,
951 951 IMMU_FLAGS_READ | IMMU_FLAGS_WRITE | IMMU_FLAGS_PAGE1);
952 952 #endif
953 953
954 954 memlist_read_lock();
955 955
956 956 mp = phys_install;
957 957
958 958 if (mp->ml_address == 0) {
959 959 /* since we already mapped page1 above */
960 960 start = IMMU_PAGESIZE;
961 961 } else {
962 962 start = mp->ml_address;
963 963 }
964 964 npages = mp->ml_size/IMMU_PAGESIZE + 1;
965 965
966 966 dcookies[0].dck_paddr = start;
967 967 dcookies[0].dck_npages = npages;
968 968 dcount = 1;
969 969 (void) dvma_map(domain, start, npages, dcookies,
970 970 dcount, NULL, IMMU_FLAGS_READ | IMMU_FLAGS_WRITE);
971 971
972 972 ddi_err(DER_LOG, domain->dom_dip, "iommu: mapping PHYS span [0x%" PRIx64
973 973 " - 0x%" PRIx64 "]", start, start + mp->ml_size);
974 974
975 975 mp = mp->ml_next;
976 976 while (mp) {
977 977 ddi_err(DER_LOG, domain->dom_dip,
978 978 "iommu: mapping PHYS span [0x%" PRIx64 " - 0x%" PRIx64 "]",
979 979 mp->ml_address, mp->ml_address + mp->ml_size);
980 980
981 981 start = mp->ml_address;
982 982 npages = mp->ml_size/IMMU_PAGESIZE + 1;
983 983
984 984 dcookies[0].dck_paddr = start;
985 985 dcookies[0].dck_npages = npages;
986 986 dcount = 1;
987 987 (void) dvma_map(domain, start, npages,
988 988 dcookies, dcount, NULL, IMMU_FLAGS_READ | IMMU_FLAGS_WRITE);
989 989 mp = mp->ml_next;
990 990 }
991 991
992 992 mp = bios_rsvd;
993 993 while (mp) {
994 994 ddi_err(DER_LOG, domain->dom_dip,
995 995 "iommu: mapping PHYS span [0x%" PRIx64 " - 0x%" PRIx64 "]",
996 996 mp->ml_address, mp->ml_address + mp->ml_size);
997 997
998 998 start = mp->ml_address;
999 999 npages = mp->ml_size/IMMU_PAGESIZE + 1;
1000 1000
1001 1001 dcookies[0].dck_paddr = start;
1002 1002 dcookies[0].dck_npages = npages;
1003 1003 dcount = 1;
1004 1004 (void) dvma_map(domain, start, npages,
1005 1005 dcookies, dcount, NULL, IMMU_FLAGS_READ | IMMU_FLAGS_WRITE);
1006 1006
1007 1007 mp = mp->ml_next;
1008 1008 }
1009 1009
1010 1010 memlist_read_unlock();
1011 1011 }
1012 1012
1013 1013 /*
1014 1014 * create_xlate_arena()
1015 1015 * Create the dvma arena for a domain with translation
1016 1016 * mapping
1017 1017 */
1018 1018 static void
1019 1019 create_xlate_arena(immu_t *immu, domain_t *domain,
1020 1020 dev_info_t *rdip, immu_flags_t immu_flags)
1021 1021 {
1022 1022 char *arena_name;
1023 1023 struct memlist *mp;
1024 1024 int vmem_flags;
1025 1025 uint64_t start;
1026 1026 uint_t mgaw;
1027 1027 uint64_t size;
1028 1028 uint64_t maxaddr;
1029 1029 void *vmem_ret;
1030 1030
1031 1031 arena_name = domain->dom_dvma_arena_name;
1032 1032
1033 1033 /* Note, don't do sizeof (arena_name) - it is just a pointer */
1034 1034 (void) snprintf(arena_name,
1035 1035 sizeof (domain->dom_dvma_arena_name),
1036 1036 "%s-domain-%d-xlate-DVMA-arena", immu->immu_name,
1037 1037 domain->dom_did);
1038 1038
1039 1039 vmem_flags = (immu_flags & IMMU_FLAGS_NOSLEEP) ? VM_NOSLEEP : VM_SLEEP;
1040 1040
1041 1041 /* Restrict mgaddr (max guest addr) to MGAW */
1042 1042 mgaw = IMMU_CAP_MGAW(immu->immu_regs_cap);
1043 1043
1044 1044 /*
1045 1045 * To ensure we avoid ioapic and PCI MMIO ranges we just
1046 1046 * use the physical memory address range of the system as the
1047 1047 * range
1048 1048 */
1049 1049 maxaddr = ((uint64_t)1 << mgaw);
1050 1050
1051 1051 memlist_read_lock();
1052 1052
1053 1053 mp = phys_install;
1054 1054
1055 1055 if (mp->ml_address == 0)
1056 1056 start = MMU_PAGESIZE;
1057 1057 else
1058 1058 start = mp->ml_address;
1059 1059
1060 1060 if (start + mp->ml_size > maxaddr)
1061 1061 size = maxaddr - start;
1062 1062 else
1063 1063 size = mp->ml_size;
1064 1064
1065 1065 ddi_err(DER_VERB, rdip,
1066 1066 "iommu: %s: Creating dvma vmem arena [0x%" PRIx64
1067 1067 " - 0x%" PRIx64 "]", arena_name, start, start + size);
1068 1068
1069 1069 /*
1070 1070 * We always allocate in quanta of IMMU_PAGESIZE
1071 1071 */
1072 1072 domain->dom_dvma_arena = vmem_create(arena_name,
1073 1073 (void *)(uintptr_t)start, /* start addr */
1074 1074 size, /* size */
1075 1075 IMMU_PAGESIZE, /* quantum */
1076 1076 NULL, /* afunc */
1077 1077 NULL, /* ffunc */
1078 1078 NULL, /* source */
1079 1079 0, /* qcache_max */
1080 1080 vmem_flags);
1081 1081
1082 1082 if (domain->dom_dvma_arena == NULL) {
1083 1083 ddi_err(DER_PANIC, rdip,
1084 1084 "Failed to allocate DVMA arena(%s) "
1085 1085 "for domain ID (%d)", arena_name, domain->dom_did);
1086 1086 /*NOTREACHED*/
1087 1087 }
1088 1088
1089 1089 mp = mp->ml_next;
1090 1090 while (mp) {
1091 1091
1092 1092 if (mp->ml_address == 0)
1093 1093 start = MMU_PAGESIZE;
1094 1094 else
1095 1095 start = mp->ml_address;
1096 1096
1097 1097 if (start + mp->ml_size > maxaddr)
1098 1098 size = maxaddr - start;
1099 1099 else
1100 1100 size = mp->ml_size;
1101 1101
1102 1102 ddi_err(DER_VERB, rdip,
1103 1103 "iommu: %s: Adding dvma vmem span [0x%" PRIx64
1104 1104 " - 0x%" PRIx64 "]", arena_name, start,
1105 1105 start + size);
1106 1106
1107 1107 vmem_ret = vmem_add(domain->dom_dvma_arena,
1108 1108 (void *)(uintptr_t)start, size, vmem_flags);
1109 1109
1110 1110 if (vmem_ret == NULL) {
1111 1111 ddi_err(DER_PANIC, rdip,
1112 1112 "Failed to allocate DVMA arena(%s) "
1113 1113 "for domain ID (%d)",
1114 1114 arena_name, domain->dom_did);
1115 1115 /*NOTREACHED*/
1116 1116 }
1117 1117 mp = mp->ml_next;
1118 1118 }
1119 1119 memlist_read_unlock();
1120 1120 }
1121 1121
1122 1122 /* ################################### DOMAIN CODE ######################### */
1123 1123
1124 1124 /*
1125 1125 * Set the domain and domain-dip for a dip
1126 1126 */
1127 1127 static void
1128 1128 set_domain(
1129 1129 dev_info_t *dip,
1130 1130 dev_info_t *ddip,
1131 1131 domain_t *domain)
1132 1132 {
1133 1133 immu_devi_t *immu_devi;
1134 1134 domain_t *fdomain;
1135 1135 dev_info_t *fddip;
1136 1136
1137 1137 immu_devi = immu_devi_get(dip);
1138 1138
1139 1139 mutex_enter(&(DEVI(dip)->devi_lock));
1140 1140 fddip = immu_devi->imd_ddip;
1141 1141 fdomain = immu_devi->imd_domain;
1142 1142
1143 1143 if (fddip) {
1144 1144 ASSERT(fddip == ddip);
1145 1145 } else {
1146 1146 immu_devi->imd_ddip = ddip;
1147 1147 }
1148 1148
1149 1149 if (fdomain) {
1150 1150 ASSERT(fdomain == domain);
1151 1151 } else {
1152 1152 immu_devi->imd_domain = domain;
1153 1153 }
1154 1154 mutex_exit(&(DEVI(dip)->devi_lock));
1155 1155 }
1156 1156
1157 1157 /*
1158 1158 * device_domain()
1159 1159 * Get domain for a device. The domain may be global in which case it
1160 1160 * is shared between all IOMMU units. Due to potential AGAW differences
1161 1161 * between IOMMU units, such global domains *have to be* UNITY mapping
1162 1162 * domains. Alternatively, the domain may be local to a IOMMU unit.
1163 1163 * Local domains may be shared or immu_devi, although the
1164 1164 * scope of sharing
1165 1165 * is restricted to devices controlled by the IOMMU unit to
1166 1166 * which the domain
1167 1167 * belongs. If shared, they (currently) have to be UNITY domains. If
1168 1168 * immu_devi a domain may be either UNITY or translation (XLATE) domain.
1169 1169 */
1170 1170 static domain_t *
1171 1171 device_domain(dev_info_t *rdip, dev_info_t **ddipp, immu_flags_t immu_flags)
1172 1172 {
1173 1173 dev_info_t *ddip; /* topmost dip in domain i.e. domain owner */
1174 1174 immu_t *immu;
1175 1175 domain_t *domain;
1176 1176 dvma_arg_t dvarg = {0};
1177 1177 int level;
1178 1178
1179 1179 *ddipp = NULL;
1180 1180
1181 1181 /*
1182 1182 * Check if the domain is already set. This is usually true
1183 1183 * if this is not the first DVMA transaction.
1184 1184 */
1185 1185 ddip = NULL;
1186 1186 domain = immu_devi_domain(rdip, &ddip);
1187 1187 if (domain) {
1188 1188 *ddipp = ddip;
1189 1189 return (domain);
1190 1190 }
1191 1191
1192 1192 immu = immu_dvma_get_immu(rdip, immu_flags);
1193 1193 if (immu == NULL) {
1194 1194 /*
1195 1195 * possible that there is no IOMMU unit for this device
1196 1196 * - BIOS bugs are one example.
1197 1197 */
1198 1198 ddi_err(DER_WARN, rdip, "No iommu unit found for device");
1199 1199 return (NULL);
1200 1200 }
1201 1201
1202 1202 immu_flags |= immu_devi_get(rdip)->imd_dvma_flags;
1203 1203
1204 1204 dvarg.dva_rdip = rdip;
1205 1205 dvarg.dva_ddip = NULL;
1206 1206 dvarg.dva_domain = NULL;
1207 1207 dvarg.dva_flags = immu_flags;
1208 1208 level = 0;
1209 1209 if (immu_walk_ancestor(rdip, NULL, get_branch_domain,
1210 1210 &dvarg, &level, immu_flags) != DDI_SUCCESS) {
1211 1211 /*
1212 1212 * maybe low memory. return error,
1213 1213 * so driver tries again later
1214 1214 */
1215 1215 return (NULL);
1216 1216 }
1217 1217
1218 1218 /* should have walked at least 1 dip (i.e. edip) */
1219 1219 ASSERT(level > 0);
1220 1220
1221 1221 ddip = dvarg.dva_ddip; /* must be present */
1222 1222 domain = dvarg.dva_domain; /* may be NULL */
1223 1223
1224 1224 /*
1225 1225 * We may find the domain during our ancestor walk on any one of our
1226 1226 * ancestor dips, If the domain is found then the domain-dip
1227 1227 * (i.e. ddip) will also be found in the same immu_devi struct.
1228 1228 * The domain-dip is the highest ancestor dip which shares the
1229 1229 * same domain with edip.
1230 1230 * The domain may or may not be found, but the domain dip must
1231 1231 * be found.
1232 1232 */
1233 1233 if (ddip == NULL) {
1234 1234 ddi_err(DER_MODE, rdip, "Cannot find domain dip for device.");
1235 1235 return (NULL);
1236 1236 }
1237 1237
1238 1238 /*
1239 1239 * Did we find a domain ?
1240 1240 */
1241 1241 if (domain) {
1242 1242 goto found;
1243 1243 }
1244 1244
1245 1245 /* nope, so allocate */
1246 1246 domain = domain_create(immu, ddip, rdip, immu_flags);
1247 1247 if (domain == NULL) {
1248 1248 return (NULL);
1249 1249 }
1250 1250
1251 1251 /*FALLTHROUGH*/
1252 1252 found:
1253 1253 /*
1254 1254 * We know *domain *is* the right domain, so panic if
1255 1255 * another domain is set for either the request-dip or
1256 1256 * effective dip.
1257 1257 */
1258 1258 set_domain(ddip, ddip, domain);
1259 1259 set_domain(rdip, ddip, domain);
1260 1260
1261 1261 *ddipp = ddip;
1262 1262 return (domain);
1263 1263 }
1264 1264
1265 1265 static void
1266 1266 create_unity_domain(immu_t *immu)
1267 1267 {
1268 1268 domain_t *domain;
1269 1269
1270 1270 /* domain created during boot and always use sleep flag */
1271 1271 domain = kmem_zalloc(sizeof (domain_t), KM_SLEEP);
1272 1272
1273 1273 rw_init(&(domain->dom_pgtable_rwlock), NULL, RW_DEFAULT, NULL);
1274 1274
1275 1275 domain->dom_did = IMMU_UNITY_DID;
1276 1276 domain->dom_maptype = IMMU_MAPTYPE_UNITY;
1277 1277
1278 1278 domain->dom_immu = immu;
1279 1279 immu->immu_unity_domain = domain;
1280 1280
1281 1281 /*
1282 1282 * Setup the domain's initial page table
1283 1283 * should never fail.
1284 1284 */
1285 1285 domain->dom_pgtable_root = pgtable_alloc(immu, IMMU_FLAGS_SLEEP);
1286 1286 pgtable_zero(domain->dom_pgtable_root);
1287 1287
1288 1288 /*
1289 1289 * Only map all physical memory in to the unity domain
1290 1290 * if passthrough is not supported. If it is supported,
1291 1291 * passthrough is set in the context entry instead.
1292 1292 */
1293 1293 if (!IMMU_ECAP_GET_PT(immu->immu_regs_excap))
1294 1294 map_unity_domain(domain);
1295 1295
1296 1296
1297 1297 /*
1298 1298 * put it on the system-wide UNITY domain list
1299 1299 */
1300 1300 mutex_enter(&(immu_domain_lock));
1301 1301 list_insert_tail(&immu_unity_domain_list, domain);
1302 1302 mutex_exit(&(immu_domain_lock));
1303 1303 }
1304 1304
1305 1305 /*
1306 1306 * ddip is the domain-dip - the topmost dip in a domain
1307 1307 * rdip is the requesting-dip - the device which is
1308 1308 * requesting DVMA setup
1309 1309 * if domain is a non-shared domain rdip == ddip
1310 1310 */
1311 1311 static domain_t *
1312 1312 domain_create(immu_t *immu, dev_info_t *ddip, dev_info_t *rdip,
1313 1313 immu_flags_t immu_flags)
1314 1314 {
1315 1315 int kmflags;
1316 1316 domain_t *domain;
1317 1317 char mod_hash_name[128];
1318 1318 immu_devi_t *immu_devi;
1319 1319 int did;
1320 1320 immu_dcookie_t dcookies[1] = {0};
1321 1321 int dcount = 0;
1322 1322
1323 1323 immu_devi = immu_devi_get(rdip);
1324 1324
1325 1325 /*
1326 1326 * First allocate a domainid.
1327 1327 * This routine will never fail, since if we run out
1328 1328 * of domains the unity domain will be allocated.
1329 1329 */
1330 1330 did = did_alloc(immu, rdip, ddip, immu_flags);
1331 1331 if (did == IMMU_UNITY_DID) {
1332 1332 /* domain overflow */
1333 1333 ASSERT(immu->immu_unity_domain);
1334 1334 return (immu->immu_unity_domain);
1335 1335 }
1336 1336
1337 1337 kmflags = (immu_flags & IMMU_FLAGS_NOSLEEP) ? KM_NOSLEEP : KM_SLEEP;
1338 1338 domain = kmem_zalloc(sizeof (domain_t), kmflags);
1339 1339 if (domain == NULL) {
1340 1340 ddi_err(DER_PANIC, rdip, "Failed to alloc DVMA domain "
1341 1341 "structure for device. IOMMU unit: %s", immu->immu_name);
1342 1342 /*NOTREACHED*/
1343 1343 }
1344 1344
1345 1345 rw_init(&(domain->dom_pgtable_rwlock), NULL, RW_DEFAULT, NULL);
1346 1346
1347 1347 (void) snprintf(mod_hash_name, sizeof (mod_hash_name),
1348 1348 "immu%s-domain%d-pava-hash", immu->immu_name, did);
1349 1349
1350 1350 domain->dom_did = did;
1351 1351 domain->dom_immu = immu;
1352 1352 domain->dom_maptype = IMMU_MAPTYPE_XLATE;
1353 1353 domain->dom_dip = ddip;
1354 1354
1355 1355 /*
1356 1356 * Create xlate DVMA arena for this domain.
1357 1357 */
1358 1358 create_xlate_arena(immu, domain, rdip, immu_flags);
1359 1359
1360 1360 /*
1361 1361 * Setup the domain's initial page table
1362 1362 */
1363 1363 domain->dom_pgtable_root = pgtable_alloc(immu, immu_flags);
1364 1364 if (domain->dom_pgtable_root == NULL) {
1365 1365 ddi_err(DER_PANIC, rdip, "Failed to alloc root "
1366 1366 "pgtable for domain (%d). IOMMU unit: %s",
1367 1367 domain->dom_did, immu->immu_name);
1368 1368 /*NOTREACHED*/
1369 1369 }
1370 1370 pgtable_zero(domain->dom_pgtable_root);
1371 1371
1372 1372 /*
1373 1373 * Since this is a immu unit-specific domain, put it on
1374 1374 * the per-immu domain list.
1375 1375 */
1376 1376 mutex_enter(&(immu->immu_lock));
1377 1377 list_insert_head(&immu->immu_domain_list, domain);
1378 1378 mutex_exit(&(immu->immu_lock));
1379 1379
1380 1380 /*
1381 1381 * Also put it on the system-wide xlate domain list
1382 1382 */
1383 1383 mutex_enter(&(immu_domain_lock));
1384 1384 list_insert_head(&immu_xlate_domain_list, domain);
1385 1385 mutex_exit(&(immu_domain_lock));
1386 1386
1387 1387 bdf_domain_insert(immu_devi, domain);
1388 1388
1389 1389 #ifdef BUGGY_DRIVERS
1390 1390 /*
1391 1391 * Map page0. Some broken HW/FW access it.
1392 1392 */
1393 1393 dcookies[0].dck_paddr = 0;
1394 1394 dcookies[0].dck_npages = 1;
1395 1395 dcount = 1;
1396 1396 (void) dvma_map(domain, 0, 1, dcookies, dcount, NULL,
1397 1397 IMMU_FLAGS_READ | IMMU_FLAGS_WRITE | IMMU_FLAGS_PAGE1);
1398 1398 #endif
1399 1399 return (domain);
1400 1400 }
1401 1401
1402 1402 /*
1403 1403 * Create domainid arena.
1404 1404 * Domainid 0 is reserved by Vt-d spec and cannot be used by
1405 1405 * system software.
1406 1406 * Domainid 1 is reserved by solaris and used for *all* of the following:
1407 1407 * as the "uninitialized" domain - For devices not yet controlled
1408 1408 * by Solaris
1409 1409 * as the "unity" domain - For devices that will always belong
1410 1410 * to the unity domain
1411 1411 * as the "overflow" domain - Used for any new device after we
1412 1412 * run out of domains
1413 1413 * All of the above domains map into a single domain with
1414 1414 * domainid 1 and UNITY DVMA mapping
1415 1415 * Each IMMU unity has its own unity/uninit/overflow domain
1416 1416 */
1417 1417 static void
1418 1418 did_init(immu_t *immu)
1419 1419 {
1420 1420 (void) snprintf(immu->immu_did_arena_name,
1421 1421 sizeof (immu->immu_did_arena_name),
1422 1422 "%s_domainid_arena", immu->immu_name);
1423 1423
1424 1424 ddi_err(DER_VERB, immu->immu_dip, "creating domainid arena %s",
1425 1425 immu->immu_did_arena_name);
1426 1426
1427 1427 immu->immu_did_arena = vmem_create(
1428 1428 immu->immu_did_arena_name,
1429 1429 (void *)(uintptr_t)(IMMU_UNITY_DID + 1), /* start addr */
1430 1430 immu->immu_max_domains - IMMU_UNITY_DID,
1431 1431 1, /* quantum */
1432 1432 NULL, /* afunc */
1433 1433 NULL, /* ffunc */
1434 1434 NULL, /* source */
1435 1435 0, /* qcache_max */
1436 1436 VM_SLEEP);
1437 1437
1438 1438 /* Even with SLEEP flag, vmem_create() can fail */
1439 1439 if (immu->immu_did_arena == NULL) {
1440 1440 ddi_err(DER_PANIC, NULL, "%s: Failed to create Intel "
1441 1441 "IOMMU domainid allocator: %s", immu->immu_name,
1442 1442 immu->immu_did_arena_name);
1443 1443 }
1444 1444 }
1445 1445
1446 1446 /* ######################### CONTEXT CODE ################################# */
1447 1447
1448 1448 static void
1449 1449 context_set(immu_t *immu, domain_t *domain, pgtable_t *root_table,
1450 1450 int bus, int devfunc)
1451 1451 {
1452 1452 pgtable_t *context;
1453 1453 pgtable_t *pgtable_root;
1454 1454 hw_rce_t *hw_rent;
1455 1455 hw_rce_t *hw_cent;
1456 1456 hw_rce_t *ctxp;
1457 1457 int sid;
1458 1458 krw_t rwtype;
1459 1459 boolean_t fill_root;
1460 1460 boolean_t fill_ctx;
1461 1461
1462 1462 pgtable_root = domain->dom_pgtable_root;
1463 1463
1464 1464 ctxp = (hw_rce_t *)(root_table->swpg_next_array);
1465 1465 context = *(pgtable_t **)(ctxp + bus);
1466 1466 hw_rent = (hw_rce_t *)(root_table->hwpg_vaddr) + bus;
1467 1467
1468 1468 fill_root = B_FALSE;
1469 1469 fill_ctx = B_FALSE;
1470 1470
1471 1471 /* Check the most common case first with reader lock */
1472 1472 rw_enter(&(immu->immu_ctx_rwlock), RW_READER);
1473 1473 rwtype = RW_READER;
1474 1474 again:
1475 1475 if (ROOT_GET_P(hw_rent)) {
1476 1476 hw_cent = (hw_rce_t *)(context->hwpg_vaddr) + devfunc;
1477 1477 if (CONT_GET_AVAIL(hw_cent) == IMMU_CONT_INITED) {
1478 1478 rw_exit(&(immu->immu_ctx_rwlock));
1479 1479 return;
1480 1480 } else {
1481 1481 fill_ctx = B_TRUE;
1482 1482 }
1483 1483 } else {
1484 1484 fill_root = B_TRUE;
1485 1485 fill_ctx = B_TRUE;
1486 1486 }
1487 1487
1488 1488 if (rwtype == RW_READER &&
1489 1489 rw_tryupgrade(&(immu->immu_ctx_rwlock)) == 0) {
1490 1490 rw_exit(&(immu->immu_ctx_rwlock));
1491 1491 rw_enter(&(immu->immu_ctx_rwlock), RW_WRITER);
1492 1492 rwtype = RW_WRITER;
1493 1493 goto again;
1494 1494 }
1495 1495 rwtype = RW_WRITER;
1496 1496
1497 1497 if (fill_root == B_TRUE) {
1498 1498 ROOT_SET_CONT(hw_rent, context->hwpg_paddr);
1499 1499 ROOT_SET_P(hw_rent);
1500 1500 immu_regs_cpu_flush(immu, (caddr_t)hw_rent, sizeof (hw_rce_t));
1501 1501 }
1502 1502
1503 1503 if (fill_ctx == B_TRUE) {
1504 1504 hw_cent = (hw_rce_t *)(context->hwpg_vaddr) + devfunc;
1505 1505 /* need to disable context entry before reprogramming it */
1506 1506 bzero(hw_cent, sizeof (hw_rce_t));
1507 1507
1508 1508 /* flush caches */
1509 1509 immu_regs_cpu_flush(immu, (caddr_t)hw_cent, sizeof (hw_rce_t));
1510 1510
1511 1511 sid = ((bus << 8) | devfunc);
1512 1512 immu_flush_context_fsi(immu, 0, sid, domain->dom_did,
1513 1513 &immu->immu_ctx_inv_wait);
1514 1514
1515 1515 CONT_SET_AVAIL(hw_cent, IMMU_CONT_INITED);
1516 1516 CONT_SET_DID(hw_cent, domain->dom_did);
1517 1517 CONT_SET_AW(hw_cent, immu->immu_dvma_agaw);
1518 1518 CONT_SET_ASR(hw_cent, pgtable_root->hwpg_paddr);
1519 1519 if (domain->dom_did == IMMU_UNITY_DID &&
1520 1520 IMMU_ECAP_GET_PT(immu->immu_regs_excap))
1521 1521 CONT_SET_TTYPE(hw_cent, TTYPE_PASSTHRU);
1522 1522 else
1523 1523 /*LINTED*/
1524 1524 CONT_SET_TTYPE(hw_cent, TTYPE_XLATE_ONLY);
1525 1525 CONT_SET_P(hw_cent);
1526 1526 if (IMMU_ECAP_GET_CH(immu->immu_regs_excap)) {
1527 1527 CONT_SET_EH(hw_cent);
1528 1528 if (immu_use_alh)
1529 1529 CONT_SET_ALH(hw_cent);
1530 1530 }
1531 1531 immu_regs_cpu_flush(immu, (caddr_t)hw_cent, sizeof (hw_rce_t));
1532 1532 }
1533 1533 rw_exit(&(immu->immu_ctx_rwlock));
1534 1534 }
1535 1535
1536 1536 static pgtable_t *
1537 1537 context_create(immu_t *immu)
1538 1538 {
1539 1539 int bus;
1540 1540 int devfunc;
1541 1541 pgtable_t *root_table;
1542 1542 pgtable_t *context;
1543 1543 pgtable_t *pgtable_root;
1544 1544 hw_rce_t *ctxp;
1545 1545 hw_rce_t *hw_rent;
1546 1546 hw_rce_t *hw_cent;
1547 1547
1548 1548 /* Allocate a zeroed root table (4K 256b entries) */
1549 1549 root_table = pgtable_alloc(immu, IMMU_FLAGS_SLEEP);
1550 1550 pgtable_zero(root_table);
1551 1551
1552 1552 /*
1553 1553 * Setup context tables for all possible root table entries.
1554 1554 * Start out with unity domains for all entries.
1555 1555 */
1556 1556 ctxp = (hw_rce_t *)(root_table->swpg_next_array);
1557 1557 hw_rent = (hw_rce_t *)(root_table->hwpg_vaddr);
1558 1558 for (bus = 0; bus < IMMU_ROOT_NUM; bus++, ctxp++, hw_rent++) {
1559 1559 context = pgtable_alloc(immu, IMMU_FLAGS_SLEEP);
1560 1560 pgtable_zero(context);
1561 1561 ROOT_SET_P(hw_rent);
1562 1562 ROOT_SET_CONT(hw_rent, context->hwpg_paddr);
1563 1563 hw_cent = (hw_rce_t *)(context->hwpg_vaddr);
1564 1564 for (devfunc = 0; devfunc < IMMU_CONT_NUM;
1565 1565 devfunc++, hw_cent++) {
1566 1566 pgtable_root =
1567 1567 immu->immu_unity_domain->dom_pgtable_root;
1568 1568 CONT_SET_DID(hw_cent,
1569 1569 immu->immu_unity_domain->dom_did);
1570 1570 CONT_SET_AW(hw_cent, immu->immu_dvma_agaw);
1571 1571 CONT_SET_ASR(hw_cent, pgtable_root->hwpg_paddr);
1572 1572 if (IMMU_ECAP_GET_PT(immu->immu_regs_excap))
1573 1573 CONT_SET_TTYPE(hw_cent, TTYPE_PASSTHRU);
1574 1574 else
1575 1575 /*LINTED*/
1576 1576 CONT_SET_TTYPE(hw_cent, TTYPE_XLATE_ONLY);
1577 1577 CONT_SET_AVAIL(hw_cent, IMMU_CONT_UNINITED);
1578 1578 CONT_SET_P(hw_cent);
1579 1579 }
1580 1580 immu_regs_cpu_flush(immu, context->hwpg_vaddr, IMMU_PAGESIZE);
1581 1581 *((pgtable_t **)ctxp) = context;
1582 1582 }
1583 1583
1584 1584 return (root_table);
1585 1585 }
1586 1586
1587 1587 /*
1588 1588 * Called during rootnex attach, so no locks needed
1589 1589 */
1590 1590 static void
1591 1591 context_init(immu_t *immu)
1592 1592 {
1593 1593 rw_init(&(immu->immu_ctx_rwlock), NULL, RW_DEFAULT, NULL);
1594 1594
1595 1595 immu_init_inv_wait(&immu->immu_ctx_inv_wait, "ctxglobal", B_TRUE);
1596 1596
1597 1597 immu_regs_wbf_flush(immu);
1598 1598
1599 1599 immu->immu_ctx_root = context_create(immu);
1600 1600
1601 1601 immu_regs_set_root_table(immu);
1602 1602
1603 1603 rw_enter(&(immu->immu_ctx_rwlock), RW_WRITER);
1604 1604 immu_flush_context_gbl(immu, &immu->immu_ctx_inv_wait);
1605 1605 immu_flush_iotlb_gbl(immu, &immu->immu_ctx_inv_wait);
1606 1606 rw_exit(&(immu->immu_ctx_rwlock));
1607 1607 }
1608 1608
1609 1609
1610 1610 /*
1611 1611 * Find top pcib
1612 1612 */
1613 1613 static int
1614 1614 find_top_pcib(dev_info_t *dip, void *arg)
1615 1615 {
1616 1616 immu_devi_t *immu_devi;
1617 1617 dev_info_t **pcibdipp = (dev_info_t **)arg;
1618 1618
1619 1619 immu_devi = immu_devi_get(dip);
1620 1620
1621 1621 if (immu_devi->imd_pcib_type == IMMU_PCIB_PCI_PCI) {
1622 1622 *pcibdipp = dip;
1623 1623 }
1624 1624
1625 1625 return (DDI_WALK_CONTINUE);
1626 1626 }
1627 1627
1628 1628 static int
1629 1629 immu_context_update(immu_t *immu, domain_t *domain, dev_info_t *ddip,
1630 1630 dev_info_t *rdip, immu_flags_t immu_flags)
1631 1631 {
1632 1632 immu_devi_t *r_immu_devi;
1633 1633 immu_devi_t *d_immu_devi;
1634 1634 int r_bus;
1635 1635 int d_bus;
1636 1636 int r_devfunc;
1637 1637 int d_devfunc;
1638 1638 immu_pcib_t d_pcib_type;
1639 1639 dev_info_t *pcibdip;
1640 1640
1641 1641 if (ddip == NULL || rdip == NULL ||
1642 1642 ddip == root_devinfo || rdip == root_devinfo) {
1643 1643 ddi_err(DER_MODE, rdip, "immu_contexts_update: domain-dip or "
1644 1644 "request-dip are NULL or are root devinfo");
1645 1645 return (DDI_FAILURE);
1646 1646 }
1647 1647
1648 1648 /*
1649 1649 * We need to set the context fields
1650 1650 * based on what type of device rdip and ddip are.
1651 1651 * To do that we need the immu_devi field.
1652 1652 * Set the immu_devi field (if not already set)
1653 1653 */
1654 1654 if (immu_devi_set(ddip, immu_flags) == DDI_FAILURE) {
1655 1655 ddi_err(DER_MODE, rdip,
1656 1656 "immu_context_update: failed to set immu_devi for ddip");
1657 1657 return (DDI_FAILURE);
1658 1658 }
1659 1659
1660 1660 if (immu_devi_set(rdip, immu_flags) == DDI_FAILURE) {
1661 1661 ddi_err(DER_MODE, rdip,
1662 1662 "immu_context_update: failed to set immu_devi for rdip");
1663 1663 return (DDI_FAILURE);
1664 1664 }
1665 1665
1666 1666 d_immu_devi = immu_devi_get(ddip);
1667 1667 r_immu_devi = immu_devi_get(rdip);
1668 1668
1669 1669 d_bus = d_immu_devi->imd_bus;
1670 1670 d_devfunc = d_immu_devi->imd_devfunc;
1671 1671 d_pcib_type = d_immu_devi->imd_pcib_type;
1672 1672 r_bus = r_immu_devi->imd_bus;
1673 1673 r_devfunc = r_immu_devi->imd_devfunc;
1674 1674
1675 1675 if (rdip == ddip) {
1676 1676 /* rdip is a PCIE device. set context for it only */
1677 1677 context_set(immu, domain, immu->immu_ctx_root, r_bus,
1678 1678 r_devfunc);
1679 1679 #ifdef BUGGY_DRIVERS
1680 1680 } else if (r_immu_devi == d_immu_devi) {
1681 1681 #ifdef TEST
1682 1682 ddi_err(DER_WARN, rdip, "Driver bug: Devices 0x%lx and "
1683 1683 "0x%lx are identical", rdip, ddip);
1684 1684 #endif
1685 1685 /* rdip is a PCIE device. set context for it only */
1686 1686 context_set(immu, domain, immu->immu_ctx_root, r_bus,
1687 1687 r_devfunc);
1688 1688 #endif
1689 1689 } else if (d_pcib_type == IMMU_PCIB_PCIE_PCI) {
1690 1690 /*
1691 1691 * ddip is a PCIE_PCI bridge. Set context for ddip's
1692 1692 * secondary bus. If rdip is on ddip's secondary
1693 1693 * bus, set context for rdip. Else, set context
1694 1694 * for rdip's PCI bridge on ddip's secondary bus.
1695 1695 */
1696 1696 context_set(immu, domain, immu->immu_ctx_root,
1697 1697 d_immu_devi->imd_sec, 0);
1698 1698 if (d_immu_devi->imd_sec == r_bus) {
1699 1699 context_set(immu, domain, immu->immu_ctx_root,
1700 1700 r_bus, r_devfunc);
1701 1701 } else {
1702 1702 pcibdip = NULL;
1703 1703 if (immu_walk_ancestor(rdip, ddip, find_top_pcib,
1704 1704 &pcibdip, NULL, immu_flags) == DDI_SUCCESS &&
1705 1705 pcibdip != NULL) {
1706 1706 r_immu_devi = immu_devi_get(pcibdip);
1707 1707 r_bus = r_immu_devi->imd_bus;
1708 1708 r_devfunc = r_immu_devi->imd_devfunc;
1709 1709 context_set(immu, domain, immu->immu_ctx_root,
1710 1710 r_bus, r_devfunc);
1711 1711 } else {
1712 1712 ddi_err(DER_PANIC, rdip, "Failed to find PCI "
1713 1713 " bridge for PCI device");
1714 1714 /*NOTREACHED*/
1715 1715 }
1716 1716 }
1717 1717 } else if (d_pcib_type == IMMU_PCIB_PCI_PCI) {
1718 1718 context_set(immu, domain, immu->immu_ctx_root, d_bus,
1719 1719 d_devfunc);
1720 1720 } else if (d_pcib_type == IMMU_PCIB_ENDPOINT) {
1721 1721 /*
1722 1722 * ddip is a PCIE device which has a non-PCI device under it
1723 1723 * i.e. it is a PCI-nonPCI bridge. Example: pciicde-ata
1724 1724 */
1725 1725 context_set(immu, domain, immu->immu_ctx_root, d_bus,
1726 1726 d_devfunc);
1727 1727 } else {
1728 1728 ddi_err(DER_PANIC, rdip, "unknown device type. Cannot "
1729 1729 "set iommu context.");
1730 1730 /*NOTREACHED*/
1731 1731 }
1732 1732
1733 1733 /* XXX do we need a membar_producer() here */
1734 1734 return (DDI_SUCCESS);
1735 1735 }
1736 1736
1737 1737 /* ##################### END CONTEXT CODE ################################## */
1738 1738 /* ##################### MAPPING CODE ################################## */
1739 1739
1740 1740
1741 1741 #ifdef DEBUG
1742 1742 static boolean_t
1743 1743 PDTE_check(immu_t *immu, hw_pdte_t pdte, pgtable_t *next, paddr_t paddr,
1744 1744 dev_info_t *rdip, immu_flags_t immu_flags)
1745 1745 {
1746 1746 /* The PDTE must be set i.e. present bit is set */
1747 1747 if (!PDTE_P(pdte)) {
1748 1748 ddi_err(DER_MODE, rdip, "No present flag");
1749 1749 return (B_FALSE);
1750 1750 }
1751 1751
1752 1752 /*
1753 1753 * Just assert to check most significant system software field
1754 1754 * (PDTE_SW4) as it is same as present bit and we
1755 1755 * checked that above
1756 1756 */
1757 1757 ASSERT(PDTE_SW4(pdte));
1758 1758
1759 1759 /*
1760 1760 * TM field should be clear if not reserved.
1761 1761 * non-leaf is always reserved
1762 1762 */
1763 1763 if (next == NULL && immu->immu_TM_reserved == B_FALSE) {
1764 1764 if (PDTE_TM(pdte)) {
1765 1765 ddi_err(DER_MODE, rdip, "TM flag set");
1766 1766 return (B_FALSE);
1767 1767 }
1768 1768 }
1769 1769
1770 1770 /*
1771 1771 * The SW3 field is not used and must be clear
1772 1772 */
1773 1773 if (PDTE_SW3(pdte)) {
1774 1774 ddi_err(DER_MODE, rdip, "SW3 set");
1775 1775 return (B_FALSE);
1776 1776 }
1777 1777
1778 1778 /*
1779 1779 * PFN (for PTE) or next level pgtable-paddr (for PDE) must be set
1780 1780 */
1781 1781 if (next == NULL) {
1782 1782 ASSERT(paddr % IMMU_PAGESIZE == 0);
1783 1783 if (PDTE_PADDR(pdte) != paddr) {
1784 1784 ddi_err(DER_MODE, rdip,
1785 1785 "PTE paddr mismatch: %lx != %lx",
1786 1786 PDTE_PADDR(pdte), paddr);
1787 1787 return (B_FALSE);
1788 1788 }
1789 1789 } else {
1790 1790 if (PDTE_PADDR(pdte) != next->hwpg_paddr) {
1791 1791 ddi_err(DER_MODE, rdip,
1792 1792 "PDE paddr mismatch: %lx != %lx",
1793 1793 PDTE_PADDR(pdte), next->hwpg_paddr);
1794 1794 return (B_FALSE);
1795 1795 }
1796 1796 }
1797 1797
1798 1798 /*
1799 1799 * SNP field should be clear if not reserved.
1800 1800 * non-leaf is always reserved
1801 1801 */
1802 1802 if (next == NULL && immu->immu_SNP_reserved == B_FALSE) {
1803 1803 if (PDTE_SNP(pdte)) {
1804 1804 ddi_err(DER_MODE, rdip, "SNP set");
1805 1805 return (B_FALSE);
1806 1806 }
1807 1807 }
1808 1808
1809 1809 /* second field available for system software should be clear */
1810 1810 if (PDTE_SW2(pdte)) {
1811 1811 ddi_err(DER_MODE, rdip, "SW2 set");
1812 1812 return (B_FALSE);
1813 1813 }
1814 1814
1815 1815 /* Super pages field should be clear */
1816 1816 if (PDTE_SP(pdte)) {
1817 1817 ddi_err(DER_MODE, rdip, "SP set");
1818 1818 return (B_FALSE);
1819 1819 }
1820 1820
1821 1821 /*
1822 1822 * least significant field available for
1823 1823 * system software should be clear
1824 1824 */
1825 1825 if (PDTE_SW1(pdte)) {
1826 1826 ddi_err(DER_MODE, rdip, "SW1 set");
1827 1827 return (B_FALSE);
1828 1828 }
1829 1829
1830 1830 if ((immu_flags & IMMU_FLAGS_READ) && !PDTE_READ(pdte)) {
1831 1831 ddi_err(DER_MODE, rdip, "READ not set");
1832 1832 return (B_FALSE);
1833 1833 }
1834 1834
1835 1835 if ((immu_flags & IMMU_FLAGS_WRITE) && !PDTE_WRITE(pdte)) {
1836 1836 ddi_err(DER_MODE, rdip, "WRITE not set");
1837 1837 return (B_FALSE);
1838 1838 }
1839 1839
1840 1840 return (B_TRUE);
1841 1841 }
1842 1842 #endif
1843 1843
1844 1844 /*ARGSUSED*/
1845 1845 static void
1846 1846 PTE_clear_all(immu_t *immu, domain_t *domain, xlate_t *xlate,
1847 1847 uint64_t *dvma_ptr, uint64_t *npages_ptr, dev_info_t *rdip)
1848 1848 {
1849 1849 uint64_t npages;
1850 1850 uint64_t dvma;
1851 1851 pgtable_t *pgtable;
1852 1852 hw_pdte_t *hwp;
1853 1853 hw_pdte_t *shwp;
1854 1854 int idx;
1855 1855
1856 1856 pgtable = xlate->xlt_pgtable;
1857 1857 idx = xlate->xlt_idx;
1858 1858
1859 1859 dvma = *dvma_ptr;
1860 1860 npages = *npages_ptr;
1861 1861
1862 1862 /*
1863 1863 * since a caller gets a unique dvma for a physical address,
1864 1864 * no other concurrent thread will be writing to the same
1865 1865 * PTE even if it has the same paddr. So no locks needed.
1866 1866 */
1867 1867 shwp = (hw_pdte_t *)(pgtable->hwpg_vaddr) + idx;
1868 1868
1869 1869 hwp = shwp;
1870 1870 for (; npages > 0 && idx <= IMMU_PGTABLE_MAXIDX; idx++, hwp++) {
1871 1871 PDTE_CLEAR_P(*hwp);
1872 1872 dvma += IMMU_PAGESIZE;
1873 1873 npages--;
1874 1874 }
1875 1875
1876 1876 *dvma_ptr = dvma;
1877 1877 *npages_ptr = npages;
1878 1878
1879 1879 xlate->xlt_idx = idx;
1880 1880 }
1881 1881
1882 1882 static void
1883 1883 xlate_setup(uint64_t dvma, xlate_t *xlate, int nlevels)
1884 1884 {
1885 1885 int level;
1886 1886 uint64_t offbits;
1887 1887
1888 1888 /*
1889 1889 * Skip the first 12 bits which is the offset into
1890 1890 * 4K PFN (phys page frame based on IMMU_PAGESIZE)
1891 1891 */
1892 1892 offbits = dvma >> IMMU_PAGESHIFT;
1893 1893
1894 1894 /* skip to level 1 i.e. leaf PTE */
1895 1895 for (level = 1, xlate++; level <= nlevels; level++, xlate++) {
1896 1896 xlate->xlt_level = level;
1897 1897 xlate->xlt_idx = (offbits & IMMU_PGTABLE_LEVEL_MASK);
1898 1898 ASSERT(xlate->xlt_idx <= IMMU_PGTABLE_MAXIDX);
1899 1899 xlate->xlt_pgtable = NULL;
1900 1900 offbits >>= IMMU_PGTABLE_LEVEL_STRIDE;
1901 1901 }
1902 1902 }
1903 1903
1904 1904 /*
1905 1905 * Read the pgtables
1906 1906 */
1907 1907 static boolean_t
1908 1908 PDE_lookup(domain_t *domain, xlate_t *xlate, int nlevels)
1909 1909 {
1910 1910 pgtable_t *pgtable;
1911 1911 pgtable_t *next;
1912 1912 uint_t idx;
1913 1913
1914 1914 /* start with highest level pgtable i.e. root */
1915 1915 xlate += nlevels;
1916 1916
1917 1917 if (xlate->xlt_pgtable == NULL) {
1918 1918 xlate->xlt_pgtable = domain->dom_pgtable_root;
1919 1919 }
1920 1920
1921 1921 for (; xlate->xlt_level > 1; xlate--) {
1922 1922 idx = xlate->xlt_idx;
1923 1923 pgtable = xlate->xlt_pgtable;
1924 1924
1925 1925 if ((xlate - 1)->xlt_pgtable) {
1926 1926 continue;
1927 1927 }
1928 1928
1929 1929 /* Lock the pgtable in read mode */
1930 1930 rw_enter(&(pgtable->swpg_rwlock), RW_READER);
1931 1931
1932 1932 /*
1933 1933 * since we are unmapping, the pgtable should
1934 1934 * already point to a leafier pgtable.
1935 1935 */
1936 1936 next = *(pgtable->swpg_next_array + idx);
1937 1937 (xlate - 1)->xlt_pgtable = next;
1938 1938 rw_exit(&(pgtable->swpg_rwlock));
1939 1939 if (next == NULL)
1940 1940 return (B_FALSE);
1941 1941 }
1942 1942
1943 1943 return (B_TRUE);
1944 1944 }
1945 1945
1946 1946 static void
1947 1947 immu_fault_walk(void *arg, void *base, size_t len)
1948 1948 {
1949 1949 uint64_t dvma, start;
1950 1950
1951 1951 dvma = *(uint64_t *)arg;
1952 1952 start = (uint64_t)(uintptr_t)base;
1953 1953
1954 1954 if (dvma >= start && dvma < (start + len)) {
1955 1955 ddi_err(DER_WARN, NULL,
1956 1956 "faulting DVMA address is in vmem arena "
1957 1957 "(%" PRIx64 "-%" PRIx64 ")",
1958 1958 start, start + len);
1959 1959 *(uint64_t *)arg = ~0ULL;
1960 1960 }
1961 1961 }
1962 1962
1963 1963 void
1964 1964 immu_print_fault_info(uint_t sid, uint64_t dvma)
1965 1965 {
1966 1966 int nlevels;
1967 1967 xlate_t xlate[IMMU_PGTABLE_MAX_LEVELS + 1] = {0};
1968 1968 xlate_t *xlatep;
1969 1969 hw_pdte_t pte;
1970 1970 domain_t *domain;
1971 1971 immu_t *immu;
1972 1972 uint64_t dvma_arg;
1973 1973
1974 1974 if (mod_hash_find(bdf_domain_hash,
1975 1975 (void *)(uintptr_t)sid, (void *)&domain) != 0) {
1976 1976 ddi_err(DER_WARN, NULL,
1977 1977 "no domain for faulting SID %08x", sid);
1978 1978 return;
1979 1979 }
1980 1980
1981 1981 immu = domain->dom_immu;
1982 1982
1983 1983 dvma_arg = dvma;
1984 1984 vmem_walk(domain->dom_dvma_arena, VMEM_ALLOC, immu_fault_walk,
1985 1985 (void *)&dvma_arg);
1986 1986 if (dvma_arg != ~0ULL)
1987 1987 ddi_err(DER_WARN, domain->dom_dip,
1988 1988 "faulting DVMA address is not in vmem arena");
1989 1989
1990 1990 nlevels = immu->immu_dvma_nlevels;
1991 1991 xlate_setup(dvma, xlate, nlevels);
1992 1992
1993 1993 if (!PDE_lookup(domain, xlate, nlevels)) {
1994 1994 ddi_err(DER_WARN, domain->dom_dip,
1995 1995 "pte not found in domid %d for faulting addr %" PRIx64,
1996 1996 domain->dom_did, dvma);
1997 1997 return;
1998 1998 }
1999 1999
2000 2000 xlatep = &xlate[1];
2001 2001 pte = *((hw_pdte_t *)
2002 2002 (xlatep->xlt_pgtable->hwpg_vaddr) + xlatep->xlt_idx);
2003 2003
2004 2004 ddi_err(DER_WARN, domain->dom_dip,
2005 2005 "domid %d pte: %" PRIx64 "(paddr %" PRIx64 ")", domain->dom_did,
2006 2006 (unsigned long long)pte, (unsigned long long)PDTE_PADDR(pte));
2007 2007 }
2008 2008
2009 2009 /*ARGSUSED*/
2010 2010 static void
2011 2011 PTE_set_one(immu_t *immu, hw_pdte_t *hwp, paddr_t paddr,
2012 2012 dev_info_t *rdip, immu_flags_t immu_flags)
2013 2013 {
2014 2014 hw_pdte_t pte;
2015 2015
2016 2016 #ifndef DEBUG
2017 2017 pte = immu->immu_ptemask;
2018 2018 PDTE_SET_PADDR(pte, paddr);
2019 2019 #else
2020 2020 pte = *hwp;
2021 2021
2022 2022 if (PDTE_P(pte)) {
2023 2023 if (PDTE_PADDR(pte) != paddr) {
2024 2024 ddi_err(DER_MODE, rdip, "PTE paddr %lx != paddr %lx",
2025 2025 PDTE_PADDR(pte), paddr);
2026 2026 }
2027 2027 #ifdef BUGGY_DRIVERS
2028 2028 return;
2029 2029 #else
2030 2030 goto out;
2031 2031 #endif
2032 2032 }
2033 2033
2034 2034 /* clear TM field if not reserved */
2035 2035 if (immu->immu_TM_reserved == B_FALSE) {
2036 2036 PDTE_CLEAR_TM(pte);
2037 2037 }
2038 2038
2039 2039 /* Clear 3rd field for system software - not used */
2040 2040 PDTE_CLEAR_SW3(pte);
2041 2041
2042 2042 /* Set paddr */
2043 2043 ASSERT(paddr % IMMU_PAGESIZE == 0);
2044 2044 PDTE_CLEAR_PADDR(pte);
2045 2045 PDTE_SET_PADDR(pte, paddr);
2046 2046
2047 2047 /* clear SNP field if not reserved. */
2048 2048 if (immu->immu_SNP_reserved == B_FALSE) {
2049 2049 PDTE_CLEAR_SNP(pte);
2050 2050 }
2051 2051
2052 2052 /* Clear SW2 field available for software */
2053 2053 PDTE_CLEAR_SW2(pte);
2054 2054
2055 2055
2056 2056 /* SP is don't care for PTEs. Clear it for cleanliness */
2057 2057 PDTE_CLEAR_SP(pte);
2058 2058
2059 2059 /* Clear SW1 field available for software */
2060 2060 PDTE_CLEAR_SW1(pte);
2061 2061
2062 2062 /*
2063 2063 * Now that we are done writing the PTE
2064 2064 * set the "present" flag. Note this present
2065 2065 * flag is a bit in the PDE/PTE that the
2066 2066 * spec says is available for system software.
2067 2067 * This is an implementation detail of Solaris
2068 2068 * bare-metal Intel IOMMU.
2069 2069 * The present field in a PDE/PTE is not defined
2070 2070 * by the Vt-d spec
2071 2071 */
2072 2072
2073 2073 PDTE_SET_P(pte);
2074 2074
2075 2075 pte |= immu->immu_ptemask;
2076 2076
2077 2077 out:
2078 2078 #endif /* DEBUG */
2079 2079 #ifdef BUGGY_DRIVERS
2080 2080 PDTE_SET_READ(pte);
2081 2081 PDTE_SET_WRITE(pte);
2082 2082 #else
2083 2083 if (immu_flags & IMMU_FLAGS_READ)
2084 2084 PDTE_SET_READ(pte);
2085 2085 if (immu_flags & IMMU_FLAGS_WRITE)
2086 2086 PDTE_SET_WRITE(pte);
2087 2087 #endif /* BUGGY_DRIVERS */
2088 2088
2089 2089 *hwp = pte;
2090 2090 }
2091 2091
2092 2092 /*ARGSUSED*/
2093 2093 static void
2094 2094 PTE_set_all(immu_t *immu, domain_t *domain, xlate_t *xlate,
2095 2095 uint64_t *dvma_ptr, uint64_t *nvpages_ptr, immu_dcookie_t *dcookies,
2096 2096 int dcount, dev_info_t *rdip, immu_flags_t immu_flags)
2097 2097 {
2098 2098 paddr_t paddr;
2099 2099 uint64_t nvpages;
2100 2100 uint64_t nppages;
2101 2101 uint64_t dvma;
2102 2102 pgtable_t *pgtable;
2103 2103 hw_pdte_t *hwp;
2104 2104 hw_pdte_t *shwp;
2105 2105 int idx, nset;
2106 2106 int j;
2107 2107
2108 2108 pgtable = xlate->xlt_pgtable;
2109 2109 idx = xlate->xlt_idx;
2110 2110
2111 2111 dvma = *dvma_ptr;
2112 2112 nvpages = *nvpages_ptr;
2113 2113
2114 2114 /*
2115 2115 * since a caller gets a unique dvma for a physical address,
2116 2116 * no other concurrent thread will be writing to the same
2117 2117 * PTE even if it has the same paddr. So no locks needed.
2118 2118 */
2119 2119 shwp = (hw_pdte_t *)(pgtable->hwpg_vaddr) + idx;
2120 2120
2121 2121 hwp = shwp;
2122 2122 for (j = dcount - 1; j >= 0; j--) {
2123 2123 if (nvpages <= dcookies[j].dck_npages)
2124 2124 break;
2125 2125 nvpages -= dcookies[j].dck_npages;
2126 2126 }
2127 2127
2128 2128 nppages = nvpages;
2129 2129 paddr = dcookies[j].dck_paddr +
2130 2130 (dcookies[j].dck_npages - nppages) * IMMU_PAGESIZE;
2131 2131
2132 2132 nvpages = *nvpages_ptr;
2133 2133 nset = 0;
2134 2134 for (; nvpages > 0 && idx <= IMMU_PGTABLE_MAXIDX; idx++, hwp++) {
2135 2135 PTE_set_one(immu, hwp, paddr, rdip, immu_flags);
2136 2136 nset++;
2137 2137
2138 2138 ASSERT(PDTE_check(immu, *hwp, NULL, paddr, rdip, immu_flags)
2139 2139 == B_TRUE);
2140 2140 nppages--;
2141 2141 nvpages--;
2142 2142 paddr += IMMU_PAGESIZE;
2143 2143 dvma += IMMU_PAGESIZE;
2144 2144
2145 2145 if (nppages == 0) {
2146 2146 j++;
2147 2147 }
2148 2148
2149 2149 if (j == dcount)
2150 2150 break;
2151 2151
2152 2152 if (nppages == 0) {
2153 2153 nppages = dcookies[j].dck_npages;
2154 2154 paddr = dcookies[j].dck_paddr;
2155 2155 }
2156 2156 }
2157 2157
2158 2158 if (nvpages) {
2159 2159 *dvma_ptr = dvma;
2160 2160 *nvpages_ptr = nvpages;
2161 2161 } else {
2162 2162 *dvma_ptr = 0;
2163 2163 *nvpages_ptr = 0;
2164 2164 }
2165 2165
2166 2166 xlate->xlt_idx = idx;
2167 2167 }
2168 2168
2169 2169 /*ARGSUSED*/
2170 2170 static void
2171 2171 PDE_set_one(immu_t *immu, hw_pdte_t *hwp, pgtable_t *next,
2172 2172 dev_info_t *rdip, immu_flags_t immu_flags)
2173 2173 {
2174 2174 hw_pdte_t pde;
2175 2175
2176 2176 pde = *hwp;
2177 2177
2178 2178 /* if PDE is already set, make sure it is correct */
2179 2179 if (PDTE_P(pde)) {
2180 2180 ASSERT(PDTE_PADDR(pde) == next->hwpg_paddr);
2181 2181 #ifdef BUGGY_DRIVERS
2182 2182 return;
2183 2183 #else
2184 2184 goto out;
2185 2185 #endif
2186 2186 }
2187 2187
2188 2188 /* Dont touch SW4, it is the present bit */
2189 2189
2190 2190 /* don't touch TM field it is reserved for PDEs */
2191 2191
2192 2192 /* 3rd field available for system software is not used */
2193 2193 PDTE_CLEAR_SW3(pde);
2194 2194
2195 2195 /* Set next level pgtable-paddr for PDE */
2196 2196 PDTE_CLEAR_PADDR(pde);
2197 2197 PDTE_SET_PADDR(pde, next->hwpg_paddr);
2198 2198
2199 2199 /* don't touch SNP field it is reserved for PDEs */
2200 2200
2201 2201 /* Clear second field available for system software */
2202 2202 PDTE_CLEAR_SW2(pde);
2203 2203
2204 2204 /* No super pages for PDEs */
2205 2205 PDTE_CLEAR_SP(pde);
2206 2206
2207 2207 /* Clear SW1 for software */
2208 2208 PDTE_CLEAR_SW1(pde);
2209 2209
2210 2210 /*
2211 2211 * Now that we are done writing the PDE
2212 2212 * set the "present" flag. Note this present
2213 2213 * flag is a bit in the PDE/PTE that the
2214 2214 * spec says is available for system software.
2215 2215 * This is an implementation detail of Solaris
2216 2216 * base-metal Intel IOMMU.
2217 2217 * The present field in a PDE/PTE is not defined
2218 2218 * by the Vt-d spec
2219 2219 */
2220 2220
2221 2221 out:
2222 2222 #ifdef BUGGY_DRIVERS
2223 2223 PDTE_SET_READ(pde);
2224 2224 PDTE_SET_WRITE(pde);
2225 2225 #else
2226 2226 if (immu_flags & IMMU_FLAGS_READ)
2227 2227 PDTE_SET_READ(pde);
2228 2228 if (immu_flags & IMMU_FLAGS_WRITE)
2229 2229 PDTE_SET_WRITE(pde);
2230 2230 #endif
2231 2231
2232 2232 PDTE_SET_P(pde);
2233 2233
2234 2234 *hwp = pde;
2235 2235 }
2236 2236
2237 2237 /*
2238 2238 * Used to set PDEs
2239 2239 */
2240 2240 static boolean_t
2241 2241 PDE_set_all(immu_t *immu, domain_t *domain, xlate_t *xlate, int nlevels,
2242 2242 dev_info_t *rdip, immu_flags_t immu_flags)
2243 2243 {
2244 2244 pgtable_t *pgtable;
2245 2245 pgtable_t *new;
2246 2246 pgtable_t *next;
2247 2247 hw_pdte_t *hwp;
2248 2248 int level;
2249 2249 uint_t idx;
2250 2250 krw_t rwtype;
2251 2251 boolean_t set = B_FALSE;
2252 2252
2253 2253 /* start with highest level pgtable i.e. root */
2254 2254 xlate += nlevels;
2255 2255
2256 2256 new = NULL;
2257 2257 xlate->xlt_pgtable = domain->dom_pgtable_root;
2258 2258 for (level = nlevels; level > 1; level--, xlate--) {
2259 2259 idx = xlate->xlt_idx;
2260 2260 pgtable = xlate->xlt_pgtable;
2261 2261
2262 2262 /* Lock the pgtable in READ mode first */
2263 2263 rw_enter(&(pgtable->swpg_rwlock), RW_READER);
2264 2264 rwtype = RW_READER;
2265 2265 again:
2266 2266 hwp = (hw_pdte_t *)(pgtable->hwpg_vaddr) + idx;
2267 2267 next = (pgtable->swpg_next_array)[idx];
2268 2268
2269 2269 /*
2270 2270 * check if leafier level already has a pgtable
2271 2271 * if yes, verify
2272 2272 */
2273 2273 if (next == NULL) {
2274 2274 if (new == NULL) {
2275 2275
2276 2276 IMMU_DPROBE2(immu__pdp__alloc, dev_info_t *,
2277 2277 rdip, int, level);
2278 2278
2279 2279 new = pgtable_alloc(immu, immu_flags);
2280 2280 if (new == NULL) {
2281 2281 ddi_err(DER_PANIC, rdip,
2282 2282 "pgtable alloc err");
2283 2283 }
2284 2284 pgtable_zero(new);
2285 2285 }
2286 2286
2287 2287 /* Change to a write lock */
2288 2288 if (rwtype == RW_READER &&
2289 2289 rw_tryupgrade(&(pgtable->swpg_rwlock)) == 0) {
2290 2290 rw_exit(&(pgtable->swpg_rwlock));
2291 2291 rw_enter(&(pgtable->swpg_rwlock), RW_WRITER);
2292 2292 rwtype = RW_WRITER;
2293 2293 goto again;
2294 2294 }
2295 2295 rwtype = RW_WRITER;
2296 2296 next = new;
2297 2297 (pgtable->swpg_next_array)[idx] = next;
2298 2298 new = NULL;
2299 2299 PDE_set_one(immu, hwp, next, rdip, immu_flags);
2300 2300 set = B_TRUE;
2301 2301 rw_downgrade(&(pgtable->swpg_rwlock));
2302 2302 rwtype = RW_READER;
2303 2303 }
2304 2304 #ifndef BUGGY_DRIVERS
2305 2305 else {
2306 2306 hw_pdte_t pde = *hwp;
2307 2307
2308 2308 /*
2309 2309 * If buggy driver we already set permission
2310 2310 * READ+WRITE so nothing to do for that case
2311 2311 * XXX Check that read writer perms change before
2312 2312 * actually setting perms. Also need to hold lock
2313 2313 */
2314 2314 if (immu_flags & IMMU_FLAGS_READ)
2315 2315 PDTE_SET_READ(pde);
2316 2316 if (immu_flags & IMMU_FLAGS_WRITE)
2317 2317 PDTE_SET_WRITE(pde);
2318 2318
2319 2319 *hwp = pde;
2320 2320 }
2321 2321 #endif
2322 2322
2323 2323 ASSERT(PDTE_check(immu, *hwp, next, 0, rdip, immu_flags)
2324 2324 == B_TRUE);
2325 2325
2326 2326 (xlate - 1)->xlt_pgtable = next;
2327 2327 rw_exit(&(pgtable->swpg_rwlock));
2328 2328 }
2329 2329
2330 2330 if (new) {
2331 2331 pgtable_free(immu, new);
2332 2332 }
2333 2333
2334 2334 return (set);
2335 2335 }
2336 2336
2337 2337 /*
2338 2338 * dvma_map()
2339 2339 * map a contiguous range of DVMA pages
2340 2340 *
2341 2341 * immu: IOMMU unit for which we are generating DVMA cookies
2342 2342 * domain: domain
2343 2343 * sdvma: Starting dvma
2344 2344 * spaddr: Starting paddr
2345 2345 * npages: Number of pages
2346 2346 * rdip: requesting device
2347 2347 * immu_flags: flags
2348 2348 */
2349 2349 static boolean_t
2350 2350 dvma_map(domain_t *domain, uint64_t sdvma, uint64_t snvpages,
2351 2351 immu_dcookie_t *dcookies, int dcount, dev_info_t *rdip,
2352 2352 immu_flags_t immu_flags)
2353 2353 {
2354 2354 uint64_t dvma;
2355 2355 uint64_t n;
2356 2356 immu_t *immu = domain->dom_immu;
2357 2357 int nlevels = immu->immu_dvma_nlevels;
2358 2358 xlate_t xlate[IMMU_PGTABLE_MAX_LEVELS + 1] = {0};
2359 2359 boolean_t pde_set = B_FALSE;
2360 2360
2361 2361 n = snvpages;
2362 2362 dvma = sdvma;
2363 2363
2364 2364 while (n > 0) {
2365 2365 xlate_setup(dvma, xlate, nlevels);
2366 2366
2367 2367 /* Lookup or allocate PGDIRs and PGTABLEs if necessary */
2368 2368 if (PDE_set_all(immu, domain, xlate, nlevels, rdip, immu_flags)
2369 2369 == B_TRUE) {
2370 2370 pde_set = B_TRUE;
2371 2371 }
2372 2372
2373 2373 /* set all matching ptes that fit into this leaf pgtable */
2374 2374 PTE_set_all(immu, domain, &xlate[1], &dvma, &n, dcookies,
2375 2375 dcount, rdip, immu_flags);
2376 2376 }
2377 2377
2378 2378 return (pde_set);
2379 2379 }
2380 2380
2381 2381 /*
2382 2382 * dvma_unmap()
2383 2383 * unmap a range of DVMAs
2384 2384 *
2385 2385 * immu: IOMMU unit state
2386 2386 * domain: domain for requesting device
2387 2387 * ddip: domain-dip
2388 2388 * dvma: starting DVMA
2389 2389 * npages: Number of IMMU pages to be unmapped
2390 2390 * rdip: requesting device
2391 2391 */
2392 2392 static void
2393 2393 dvma_unmap(domain_t *domain, uint64_t sdvma, uint64_t snpages,
2394 2394 dev_info_t *rdip)
2395 2395 {
2396 2396 immu_t *immu = domain->dom_immu;
2397 2397 int nlevels = immu->immu_dvma_nlevels;
2398 2398 xlate_t xlate[IMMU_PGTABLE_MAX_LEVELS + 1] = {0};
2399 2399 uint64_t n;
2400 2400 uint64_t dvma;
2401 2401
2402 2402 dvma = sdvma;
2403 2403 n = snpages;
2404 2404
2405 2405 while (n > 0) {
2406 2406 /* setup the xlate array */
2407 2407 xlate_setup(dvma, xlate, nlevels);
2408 2408
2409 2409 /* just lookup existing pgtables. Should never fail */
2410 2410 if (!PDE_lookup(domain, xlate, nlevels))
2411 2411 ddi_err(DER_PANIC, rdip,
2412 2412 "PTE not found for addr %" PRIx64,
2413 2413 (unsigned long long)dvma);
2414 2414
2415 2415 /* clear all matching ptes that fit into this leaf pgtable */
2416 2416 PTE_clear_all(immu, domain, &xlate[1], &dvma, &n, rdip);
2417 2417 }
2418 2418
2419 2419 /* No need to flush IOTLB after unmap */
2420 2420 }
2421 2421
2422 2422 static uint64_t
2423 2423 dvma_alloc(domain_t *domain, ddi_dma_attr_t *dma_attr, uint_t npages, int kmf)
2424 2424 {
2425 2425 uint64_t dvma;
2426 2426 size_t xsize, align;
2427 2427 uint64_t minaddr, maxaddr;
2428 2428
2429 2429 /* parameters */
2430 2430 xsize = npages * IMMU_PAGESIZE;
2431 2431 align = MAX((size_t)(dma_attr->dma_attr_align), IMMU_PAGESIZE);
2432 2432 minaddr = dma_attr->dma_attr_addr_lo;
2433 2433 maxaddr = dma_attr->dma_attr_addr_hi + 1;
2434 2434
2435 2435 /* handle the rollover cases */
2436 2436 if (maxaddr < dma_attr->dma_attr_addr_hi) {
2437 2437 maxaddr = dma_attr->dma_attr_addr_hi;
2438 2438 }
2439 2439
2440 2440 /*
2441 2441 * allocate from vmem arena.
2442 2442 */
2443 2443 dvma = (uint64_t)(uintptr_t)vmem_xalloc(domain->dom_dvma_arena,
2444 2444 xsize, align, 0, 0, (void *)(uintptr_t)minaddr,
2445 2445 (void *)(uintptr_t)maxaddr, kmf);
2446 2446
2447 2447 return (dvma);
2448 2448 }
2449 2449
2450 2450 static void
2451 2451 dvma_prealloc(dev_info_t *rdip, immu_hdl_priv_t *ihp, ddi_dma_attr_t *dma_attr)
2452 2452 {
2453 2453 int nlevels;
2454 2454 xlate_t xlate[IMMU_PGTABLE_MAX_LEVELS + 1] = {0}, *xlp;
2455 2455 uint64_t dvma, n;
2456 2456 size_t xsize, align;
2457 2457 uint64_t minaddr, maxaddr, dmamax;
2458 2458 int on, npte, pindex;
2459 2459 hw_pdte_t *shwp;
2460 2460 immu_t *immu;
2461 2461 domain_t *domain;
2462 2462
2463 2463 /* parameters */
2464 2464 domain = IMMU_DEVI(rdip)->imd_domain;
2465 2465 immu = domain->dom_immu;
2466 2466 nlevels = immu->immu_dvma_nlevels;
2467 2467 xsize = IMMU_NPREPTES * IMMU_PAGESIZE;
2468 2468 align = MAX((size_t)(dma_attr->dma_attr_align), IMMU_PAGESIZE);
2469 2469 minaddr = dma_attr->dma_attr_addr_lo;
2470 2470 if (dma_attr->dma_attr_flags & _DDI_DMA_BOUNCE_ON_SEG)
2471 2471 dmamax = dma_attr->dma_attr_seg;
2472 2472 else
2473 2473 dmamax = dma_attr->dma_attr_addr_hi;
2474 2474 maxaddr = dmamax + 1;
2475 2475
2476 2476 if (maxaddr < dmamax)
2477 2477 maxaddr = dmamax;
2478 2478
2479 2479 dvma = (uint64_t)(uintptr_t)vmem_xalloc(domain->dom_dvma_arena,
2480 2480 xsize, align, 0, dma_attr->dma_attr_seg + 1,
2481 2481 (void *)(uintptr_t)minaddr, (void *)(uintptr_t)maxaddr, VM_NOSLEEP);
2482 2482
2483 2483 ihp->ihp_predvma = dvma;
2484 2484 ihp->ihp_npremapped = 0;
2485 2485 if (dvma == 0)
2486 2486 return;
2487 2487
2488 2488 n = IMMU_NPREPTES;
2489 2489 pindex = 0;
2490 2490
2491 2491 /*
2492 2492 * Set up a mapping at address 0, just so that all PDPs get allocated
2493 2493 * now. Although this initial mapping should never be used,
2494 2494 * explicitly set it to read-only, just to be safe.
2495 2495 */
2496 2496 while (n > 0) {
2497 2497 xlate_setup(dvma, xlate, nlevels);
2498 2498
2499 2499 (void) PDE_set_all(immu, domain, xlate, nlevels, rdip,
2500 2500 IMMU_FLAGS_READ | IMMU_FLAGS_WRITE);
2501 2501
2502 2502 xlp = &xlate[1];
2503 2503 shwp = (hw_pdte_t *)(xlp->xlt_pgtable->hwpg_vaddr)
2504 2504 + xlp->xlt_idx;
2505 2505 on = n;
2506 2506
2507 2507 PTE_set_all(immu, domain, xlp, &dvma, &n, &immu_precookie,
2508 2508 1, rdip, IMMU_FLAGS_READ);
2509 2509
2510 2510 npte = on - n;
2511 2511
2512 2512 while (npte > 0) {
2513 2513 ihp->ihp_preptes[pindex++] = shwp;
2514 2514 #ifdef BUGGY_DRIVERS
2515 2515 PDTE_CLEAR_WRITE(*shwp);
2516 2516 #endif
2517 2517 shwp++;
2518 2518 npte--;
2519 2519 }
2520 2520 }
2521 2521 }
2522 2522
2523 2523 static void
2524 2524 dvma_prefree(dev_info_t *rdip, immu_hdl_priv_t *ihp)
2525 2525 {
2526 2526 domain_t *domain;
2527 2527
2528 2528 domain = IMMU_DEVI(rdip)->imd_domain;
2529 2529
2530 2530 if (ihp->ihp_predvma != 0) {
2531 2531 dvma_unmap(domain, ihp->ihp_predvma, IMMU_NPREPTES, rdip);
2532 2532 vmem_free(domain->dom_dvma_arena,
2533 2533 (void *)(uintptr_t)ihp->ihp_predvma,
2534 2534 IMMU_NPREPTES * IMMU_PAGESIZE);
2535 2535 }
2536 2536 }
2537 2537
2538 2538 static void
2539 2539 dvma_free(domain_t *domain, uint64_t dvma, uint64_t npages)
2540 2540 {
2541 2541 uint64_t size = npages * IMMU_PAGESIZE;
2542 2542
2543 2543 if (domain->dom_maptype != IMMU_MAPTYPE_XLATE)
2544 2544 return;
2545 2545
2546 2546 vmem_free(domain->dom_dvma_arena, (void *)(uintptr_t)dvma, size);
2547 2547 }
2548 2548
2549 2549 static int
2550 2550 immu_map_dvmaseg(dev_info_t *rdip, ddi_dma_handle_t handle,
2551 2551 immu_hdl_priv_t *ihp, struct ddi_dma_req *dmareq,
2552 2552 ddi_dma_obj_t *dma_out)
2553 2553 {
2554 2554 domain_t *domain;
2555 2555 immu_t *immu;
2556 2556 immu_flags_t immu_flags;
2557 2557 ddi_dma_atyp_t buftype;
2558 2558 ddi_dma_obj_t *dmar_object;
2559 2559 ddi_dma_attr_t *attrp;
2560 2560 uint64_t offset, paddr, dvma, sdvma, rwmask;
2561 2561 size_t npages, npgalloc;
2562 2562 uint_t psize, size, pcnt, dmax;
2563 2563 page_t **pparray;
2564 2564 caddr_t vaddr;
2565 2565 page_t *page;
2566 2566 struct as *vas;
2567 2567 immu_dcookie_t *dcookies;
2568 2568 int pde_set;
2569 2569
2570 2570 domain = IMMU_DEVI(rdip)->imd_domain;
2571 2571 immu = domain->dom_immu;
2572 2572 immu_flags = dma_to_immu_flags(dmareq);
2573 2573
2574 2574 attrp = &((ddi_dma_impl_t *)handle)->dmai_attr;
2575 2575
2576 2576 dmar_object = &dmareq->dmar_object;
2577 2577 pparray = dmar_object->dmao_obj.virt_obj.v_priv;
2578 2578 vaddr = dmar_object->dmao_obj.virt_obj.v_addr;
2579 2579 buftype = dmar_object->dmao_type;
2580 2580 size = dmar_object->dmao_size;
2581 2581
2582 2582 IMMU_DPROBE3(immu__map__dvma, dev_info_t *, rdip, ddi_dma_atyp_t,
2583 2583 buftype, uint_t, size);
2584 2584
2585 2585 dcookies = &ihp->ihp_dcookies[0];
2586 2586
2587 2587 pcnt = dmax = 0;
2588 2588
2589 2589 /* retrieve paddr, psize, offset from dmareq */
2590 2590 if (buftype == DMA_OTYP_PAGES) {
2591 2591 page = dmar_object->dmao_obj.pp_obj.pp_pp;
2592 2592 offset = dmar_object->dmao_obj.pp_obj.pp_offset &
2593 2593 MMU_PAGEOFFSET;
2594 2594 paddr = pfn_to_pa(page->p_pagenum) + offset;
2595 2595 psize = MIN((MMU_PAGESIZE - offset), size);
2596 2596 page = page->p_next;
2597 2597 vas = dmar_object->dmao_obj.virt_obj.v_as;
2598 2598 } else {
2599 2599 if (vas == NULL) {
2600 2600 vas = &kas;
2601 2601 }
2602 2602 offset = (uintptr_t)vaddr & MMU_PAGEOFFSET;
2603 2603 if (pparray != NULL) {
2604 2604 paddr = pfn_to_pa(pparray[pcnt]->p_pagenum) + offset;
2605 2605 psize = MIN((MMU_PAGESIZE - offset), size);
2606 2606 pcnt++;
2607 2607 } else {
2608 2608 paddr = pfn_to_pa(hat_getpfnum(vas->a_hat,
2609 2609 vaddr)) + offset;
2610 2610 psize = MIN(size, (MMU_PAGESIZE - offset));
2611 2611 vaddr += psize;
2612 2612 }
2613 2613 }
2614 2614
2615 2615 npgalloc = IMMU_BTOPR(size + offset);
2616 2616
2617 2617 if (npgalloc <= IMMU_NPREPTES && ihp->ihp_predvma != 0) {
2618 2618 #ifdef BUGGY_DRIVERS
2619 2619 rwmask = PDTE_MASK_R | PDTE_MASK_W | immu->immu_ptemask;
2620 2620 #else
2621 2621 rwmask = immu->immu_ptemask;
2622 2622 if (immu_flags & IMMU_FLAGS_READ)
2623 2623 rwmask |= PDTE_MASK_R;
2624 2624 if (immu_flags & IMMU_FLAGS_WRITE)
2625 2625 rwmask |= PDTE_MASK_W;
2626 2626 #endif
2627 2627 #ifdef DEBUG
2628 2628 rwmask |= PDTE_MASK_P;
2629 2629 #endif
2630 2630 sdvma = ihp->ihp_predvma;
2631 2631 ihp->ihp_npremapped = npgalloc;
2632 2632 *ihp->ihp_preptes[0] =
2633 2633 PDTE_PADDR(paddr & ~MMU_PAGEOFFSET) | rwmask;
2634 2634 } else {
2635 2635 ihp->ihp_npremapped = 0;
2636 2636 sdvma = dvma_alloc(domain, attrp, npgalloc,
2637 2637 dmareq->dmar_fp == DDI_DMA_SLEEP ? VM_SLEEP : VM_NOSLEEP);
2638 2638 if (sdvma == 0)
2639 2639 return (DDI_DMA_NORESOURCES);
2640 2640
2641 2641 dcookies[0].dck_paddr = (paddr & ~MMU_PAGEOFFSET);
2642 2642 dcookies[0].dck_npages = 1;
2643 2643 }
2644 2644
2645 2645 IMMU_DPROBE3(immu__dvma__alloc, dev_info_t *, rdip, uint64_t, npgalloc,
2646 2646 uint64_t, sdvma);
2647 2647
2648 2648 dvma = sdvma;
2649 2649 pde_set = 0;
2650 2650 npages = 1;
2651 2651 size -= psize;
2652 2652 while (size > 0) {
2653 2653 /* get the size for this page (i.e. partial or full page) */
2654 2654 psize = MIN(size, MMU_PAGESIZE);
2655 2655 if (buftype == DMA_OTYP_PAGES) {
2656 2656 /* get the paddr from the page_t */
2657 2657 paddr = pfn_to_pa(page->p_pagenum);
2658 2658 page = page->p_next;
2659 2659 } else if (pparray != NULL) {
2660 2660 /* index into the array of page_t's to get the paddr */
2661 2661 paddr = pfn_to_pa(pparray[pcnt]->p_pagenum);
2662 2662 pcnt++;
2663 2663 } else {
2664 2664 /* call into the VM to get the paddr */
2665 2665 paddr = pfn_to_pa(hat_getpfnum(vas->a_hat, vaddr));
2666 2666 vaddr += psize;
2667 2667 }
2668 2668
2669 2669 npages++;
2670 2670
2671 2671 if (ihp->ihp_npremapped > 0) {
2672 2672 *ihp->ihp_preptes[npages - 1] =
2673 2673 PDTE_PADDR(paddr) | rwmask;
2674 2674 } else if (IMMU_CONTIG_PADDR(dcookies[dmax], paddr)) {
2675 2675 dcookies[dmax].dck_npages++;
2676 2676 } else {
2677 2677 /* No, we need a new dcookie */
2678 2678 if (dmax == (IMMU_NDCK - 1)) {
2679 2679 /*
2680 2680 * Ran out of dcookies. Map them now.
2681 2681 */
2682 2682 if (dvma_map(domain, dvma,
2683 2683 npages, dcookies, dmax + 1, rdip,
2684 2684 immu_flags))
2685 2685 pde_set++;
2686 2686
2687 2687 IMMU_DPROBE4(immu__dvmamap__early,
2688 2688 dev_info_t *, rdip, uint64_t, dvma,
2689 2689 uint_t, npages, uint_t, dmax+1);
2690 2690
2691 2691 dvma += (npages << IMMU_PAGESHIFT);
2692 2692 npages = 0;
2693 2693 dmax = 0;
2694 2694 } else
2695 2695 dmax++;
2696 2696 dcookies[dmax].dck_paddr = paddr;
2697 2697 dcookies[dmax].dck_npages = 1;
2698 2698 }
2699 2699 size -= psize;
2700 2700 }
2701 2701
2702 2702 /*
2703 2703 * Finish up, mapping all, or all of the remaining,
2704 2704 * physical memory ranges.
2705 2705 */
2706 2706 if (ihp->ihp_npremapped == 0 && npages > 0) {
2707 2707 IMMU_DPROBE4(immu__dvmamap__late, dev_info_t *, rdip, \
2708 2708 uint64_t, dvma, uint_t, npages, uint_t, dmax+1);
2709 2709
2710 2710 if (dvma_map(domain, dvma, npages, dcookies,
2711 2711 dmax + 1, rdip, immu_flags))
2712 2712 pde_set++;
2713 2713 }
2714 2714
2715 2715 /* Invalidate the IOTLB */
2716 2716 immu_flush_iotlb_psi(immu, domain->dom_did, sdvma, npgalloc,
2717 2717 pde_set > 0 ? TLB_IVA_WHOLE : TLB_IVA_LEAF,
2718 2718 &ihp->ihp_inv_wait);
2719 2719
2720 2720 ihp->ihp_ndvseg = 1;
2721 2721 ihp->ihp_dvseg[0].dvs_start = sdvma;
2722 2722 ihp->ihp_dvseg[0].dvs_len = dmar_object->dmao_size;
2723 2723
2724 2724 dma_out->dmao_size = dmar_object->dmao_size;
2725 2725 dma_out->dmao_obj.dvma_obj.dv_off = offset & IMMU_PAGEOFFSET;
2726 2726 dma_out->dmao_obj.dvma_obj.dv_nseg = 1;
2727 2727 dma_out->dmao_obj.dvma_obj.dv_seg = &ihp->ihp_dvseg[0];
2728 2728 dma_out->dmao_type = DMA_OTYP_DVADDR;
2729 2729
2730 2730 return (DDI_DMA_MAPPED);
2731 2731 }
2732 2732
2733 2733 static int
2734 2734 immu_unmap_dvmaseg(dev_info_t *rdip, ddi_dma_obj_t *dmao)
2735 2735 {
2736 2736 uint64_t dvma, npages;
2737 2737 domain_t *domain;
2738 2738 struct dvmaseg *dvs;
2739 2739
2740 2740 domain = IMMU_DEVI(rdip)->imd_domain;
2741 2741 dvs = dmao->dmao_obj.dvma_obj.dv_seg;
2742 2742
2743 2743 dvma = dvs[0].dvs_start;
2744 2744 npages = IMMU_BTOPR(dvs[0].dvs_len + dmao->dmao_obj.dvma_obj.dv_off);
2745 2745
2746 2746 #ifdef DEBUG
2747 2747 /* Unmap only in DEBUG mode */
2748 2748 dvma_unmap(domain, dvma, npages, rdip);
2749 2749 #endif
2750 2750 dvma_free(domain, dvma, npages);
2751 2751
2752 2752 IMMU_DPROBE3(immu__dvma__free, dev_info_t *, rdip, uint_t, npages,
2753 2753 uint64_t, dvma);
2754 2754
2755 2755 #ifdef DEBUG
2756 2756 /*
2757 2757 * In the DEBUG case, the unmap was actually done,
2758 2758 * but an IOTLB flush was not done. So, an explicit
2759 2759 * write back flush is needed.
2760 2760 */
2761 2761 immu_regs_wbf_flush(domain->dom_immu);
2762 2762 #endif
2763 2763
2764 2764 return (DDI_SUCCESS);
2765 2765 }
2766 2766
2767 2767 /* ############################# Functions exported ######################## */
2768 2768
2769 2769 /*
2770 2770 * setup the DVMA subsystem
2771 2771 * this code runs only for the first IOMMU unit
2772 2772 */
2773 2773 void
2774 2774 immu_dvma_setup(list_t *listp)
2775 2775 {
2776 2776 immu_t *immu;
2777 2777 uint_t kval;
2778 2778 size_t nchains;
2779 2779
2780 2780 /* locks */
2781 2781 mutex_init(&immu_domain_lock, NULL, MUTEX_DEFAULT, NULL);
2782 2782
2783 2783 /* Create lists */
2784 2784 list_create(&immu_unity_domain_list, sizeof (domain_t),
2785 2785 offsetof(domain_t, dom_maptype_node));
2786 2786 list_create(&immu_xlate_domain_list, sizeof (domain_t),
2787 2787 offsetof(domain_t, dom_maptype_node));
2788 2788
2789 2789 /* Setup BDF domain hash */
2790 2790 nchains = 0xff;
2791 2791 kval = mod_hash_iddata_gen(nchains);
2792 2792
2793 2793 bdf_domain_hash = mod_hash_create_extended("BDF-DOMAIN_HASH",
2794 2794 nchains, mod_hash_null_keydtor, mod_hash_null_valdtor,
2795 2795 mod_hash_byid, (void *)(uintptr_t)kval, mod_hash_idkey_cmp,
2796 2796 KM_NOSLEEP);
2797 2797
2798 2798 immu = list_head(listp);
2799 2799 for (; immu; immu = list_next(listp, immu)) {
2800 2800 create_unity_domain(immu);
2801 2801 did_init(immu);
2802 2802 context_init(immu);
2803 2803 immu->immu_dvma_setup = B_TRUE;
2804 2804 }
2805 2805 }
2806 2806
2807 2807 /*
2808 2808 * Startup up one DVMA unit
2809 2809 */
2810 2810 void
2811 2811 immu_dvma_startup(immu_t *immu)
2812 2812 {
2813 2813 if (immu_gfxdvma_enable == B_FALSE &&
2814 2814 immu->immu_dvma_gfx_only == B_TRUE) {
2815 2815 return;
2816 2816 }
2817 2817
2818 2818 /*
2819 2819 * DVMA will start once IOMMU is "running"
2820 2820 */
2821 2821 immu->immu_dvma_running = B_TRUE;
2822 2822 }
2823 2823
2824 2824 /*
2825 2825 * immu_dvma_physmem_update()
2826 2826 * called when the installed memory on a
2827 2827 * system increases, to expand domain DVMA
2828 2828 * for domains with UNITY mapping
2829 2829 */
2830 2830 void
2831 2831 immu_dvma_physmem_update(uint64_t addr, uint64_t size)
2832 2832 {
2833 2833 uint64_t start;
2834 2834 uint64_t npages;
2835 2835 int dcount;
2836 2836 immu_dcookie_t dcookies[1] = {0};
2837 2837 domain_t *domain;
2838 2838
2839 2839 /*
2840 2840 * Just walk the system-wide list of domains with
2841 2841 * UNITY mapping. Both the list of *all* domains
2842 2842 * and *UNITY* domains is protected by the same
2843 2843 * single lock
2844 2844 */
2845 2845 mutex_enter(&immu_domain_lock);
2846 2846 domain = list_head(&immu_unity_domain_list);
2847 2847 for (; domain; domain = list_next(&immu_unity_domain_list, domain)) {
2848 2848 /*
2849 2849 * Nothing to do if the IOMMU supports passthrough.
2850 2850 */
2851 2851 if (IMMU_ECAP_GET_PT(domain->dom_immu->immu_regs_excap))
2852 2852 continue;
2853 2853
2854 2854 /* There is no vmem_arena for unity domains. Just map it */
2855 2855 ddi_err(DER_LOG, domain->dom_dip,
2856 2856 "iommu: unity-domain: Adding map "
2857 2857 "[0x%" PRIx64 " - 0x%" PRIx64 "]", addr, addr + size);
2858 2858
2859 2859 start = IMMU_ROUNDOWN(addr);
2860 2860 npages = (IMMU_ROUNDUP(size) / IMMU_PAGESIZE) + 1;
2861 2861
2862 2862 dcookies[0].dck_paddr = start;
2863 2863 dcookies[0].dck_npages = npages;
2864 2864 dcount = 1;
2865 2865 (void) dvma_map(domain, start, npages,
2866 2866 dcookies, dcount, NULL, IMMU_FLAGS_READ | IMMU_FLAGS_WRITE);
2867 2867
2868 2868 }
2869 2869 mutex_exit(&immu_domain_lock);
2870 2870 }
2871 2871
2872 2872 int
2873 2873 immu_dvma_device_setup(dev_info_t *rdip, immu_flags_t immu_flags)
2874 2874 {
2875 2875 dev_info_t *ddip, *odip;
2876 2876 immu_t *immu;
2877 2877 domain_t *domain;
2878 2878
2879 2879 odip = rdip;
2880 2880
2881 2881 immu = immu_dvma_get_immu(rdip, immu_flags);
2882 2882 if (immu == NULL) {
2883 2883 /*
2884 2884 * possible that there is no IOMMU unit for this device
2885 2885 * - BIOS bugs are one example.
2886 2886 */
2887 2887 ddi_err(DER_WARN, rdip, "No iommu unit found for device");
2888 2888 return (DDI_DMA_NORESOURCES);
2889 2889 }
2890 2890
2891 2891 /*
2892 2892 * redirect isa devices attached under lpc to lpc dip
2893 2893 */
2894 2894 if (strcmp(ddi_node_name(ddi_get_parent(rdip)), "isa") == 0) {
2895 2895 rdip = get_lpc_devinfo(immu, rdip, immu_flags);
2896 2896 if (rdip == NULL) {
2897 2897 ddi_err(DER_PANIC, rdip, "iommu redirect failed");
2898 2898 /*NOTREACHED*/
2899 2899 }
2900 2900 }
2901 2901
2902 2902 /* Reset immu, as redirection can change IMMU */
2903 2903 immu = NULL;
2904 2904
2905 2905 /*
2906 2906 * for gart, redirect to the real graphic devinfo
2907 2907 */
2908 2908 if (strcmp(ddi_node_name(rdip), "agpgart") == 0) {
2909 2909 rdip = get_gfx_devinfo(rdip);
2910 2910 if (rdip == NULL) {
2911 2911 ddi_err(DER_PANIC, rdip, "iommu redirect failed");
2912 2912 /*NOTREACHED*/
2913 2913 }
2914 2914 }
2915 2915
2916 2916 /*
2917 2917 * Setup DVMA domain for the device. This does
2918 2918 * work only the first time we do DVMA for a
2919 2919 * device.
2920 2920 */
2921 2921 ddip = NULL;
2922 2922 domain = device_domain(rdip, &ddip, immu_flags);
2923 2923 if (domain == NULL) {
2924 2924 ddi_err(DER_MODE, rdip, "Intel IOMMU setup failed for device");
2925 2925 return (DDI_DMA_NORESOURCES);
2926 2926 }
2927 2927
2928 2928 immu = domain->dom_immu;
2929 2929
2930 2930 /*
2931 2931 * If a domain is found, we must also have a domain dip
2932 2932 * which is the topmost ancestor dip of rdip that shares
2933 2933 * the same domain with rdip.
2934 2934 */
2935 2935 if (domain->dom_did == 0 || ddip == NULL) {
2936 2936 ddi_err(DER_MODE, rdip, "domain did 0(%d) or ddip NULL(%p)",
2937 2937 domain->dom_did, ddip);
2938 2938 return (DDI_DMA_NORESOURCES);
2939 2939 }
2940 2940
2941 2941 if (odip != rdip)
2942 2942 set_domain(odip, ddip, domain);
2943 2943
2944 2944 /*
2945 2945 * Update the root and context entries
2946 2946 */
2947 2947 if (immu_context_update(immu, domain, ddip, rdip, immu_flags)
2948 2948 != DDI_SUCCESS) {
2949 2949 ddi_err(DER_MODE, rdip, "DVMA map: context update failed");
2950 2950 return (DDI_DMA_NORESOURCES);
2951 2951 }
2952 2952
2953 2953 return (DDI_SUCCESS);
2954 2954 }
2955 2955
2956 2956 int
2957 2957 immu_map_memrange(dev_info_t *rdip, memrng_t *mrng)
2958 2958 {
2959 2959 immu_dcookie_t dcookies[1] = {0};
2960 2960 boolean_t pde_set;
2961 2961 immu_t *immu;
2962 2962 domain_t *domain;
2963 2963 immu_inv_wait_t iw;
2964 2964
2965 2965 dcookies[0].dck_paddr = mrng->mrng_start;
2966 2966 dcookies[0].dck_npages = mrng->mrng_npages;
2967 2967
2968 2968 domain = IMMU_DEVI(rdip)->imd_domain;
2969 2969 immu = domain->dom_immu;
2970 2970
2971 2971 pde_set = dvma_map(domain, mrng->mrng_start,
2972 2972 mrng->mrng_npages, dcookies, 1, rdip,
2973 2973 IMMU_FLAGS_READ | IMMU_FLAGS_WRITE);
2974 2974
2975 2975 immu_init_inv_wait(&iw, "memrange", B_TRUE);
2976 2976
2977 2977 immu_flush_iotlb_psi(immu, domain->dom_did, mrng->mrng_start,
2978 2978 mrng->mrng_npages, pde_set == B_TRUE ?
2979 2979 TLB_IVA_WHOLE : TLB_IVA_LEAF, &iw);
2980 2980
2981 2981 return (DDI_SUCCESS);
2982 2982 }
2983 2983
2984 2984 immu_devi_t *
2985 2985 immu_devi_get(dev_info_t *rdip)
2986 2986 {
2987 2987 immu_devi_t *immu_devi;
2988 2988 volatile uintptr_t *vptr = (uintptr_t *)&(DEVI(rdip)->devi_iommu);
2989 2989
2990 2990 /* Just want atomic reads. No need for lock */
2991 2991 immu_devi = (immu_devi_t *)(uintptr_t)atomic_or_64_nv((uint64_t *)vptr,
2992 2992 0);
2993 2993 return (immu_devi);
2994 2994 }
2995 2995
2996 2996 /*ARGSUSED*/
2997 2997 int
2998 2998 immu_hdl_priv_ctor(void *buf, void *arg, int kmf)
2999 2999 {
3000 3000 immu_hdl_priv_t *ihp;
3001 3001
3002 3002 ihp = buf;
3003 3003 immu_init_inv_wait(&ihp->ihp_inv_wait, "dmahandle", B_FALSE);
3004 3004
3005 3005 return (0);
3006 3006 }
3007 3007
3008 3008 /*
3009 3009 * iommulib interface functions
3010 3010 */
3011 3011 static int
3012 3012 immu_probe(iommulib_handle_t handle, dev_info_t *dip)
3013 3013 {
3014 3014 immu_devi_t *immu_devi;
3015 3015 int ret;
3016 3016
3017 3017 if (!immu_enable)
3018 3018 return (DDI_FAILURE);
3019 3019
3020 3020 /*
3021 3021 * Make sure the device has all the IOMMU structures
3022 3022 * initialized. If this device goes through an IOMMU
3023 3023 * unit (e.g. this probe function returns success),
3024 3024 * this will be called at most N times, with N being
3025 3025 * the number of IOMMUs in the system.
3026 3026 *
3027 3027 * After that, when iommulib_nex_open succeeds,
3028 3028 * we can always assume that this device has all
3029 3029 * the structures initialized. IOMMU_USED(dip) will
3030 3030 * be true. There is no need to find the controlling
3031 3031 * IOMMU/domain again.
3032 3032 */
3033 3033 ret = immu_dvma_device_setup(dip, IMMU_FLAGS_NOSLEEP);
3034 3034 if (ret != DDI_SUCCESS)
3035 3035 return (ret);
3036 3036
3037 3037 immu_devi = IMMU_DEVI(dip);
3038 3038
3039 3039 /*
3040 3040 * For unity domains, there is no need to call in to
3041 3041 * the IOMMU code.
3042 3042 */
3043 3043 if (immu_devi->imd_domain->dom_did == IMMU_UNITY_DID)
3044 3044 return (DDI_FAILURE);
3045 3045
3046 3046 if (immu_devi->imd_immu->immu_dip == iommulib_iommu_getdip(handle))
3047 3047 return (DDI_SUCCESS);
3048 3048
3049 3049 return (DDI_FAILURE);
3050 3050 }
3051 3051
3052 3052 /*ARGSUSED*/
3053 3053 static int
3054 3054 immu_allochdl(iommulib_handle_t handle,
3055 3055 dev_info_t *dip, dev_info_t *rdip, ddi_dma_attr_t *attr,
3056 3056 int (*waitfp)(caddr_t), caddr_t arg, ddi_dma_handle_t *dma_handlep)
3057 3057 {
3058 3058 int ret;
3059 3059 immu_hdl_priv_t *ihp;
3060 3060 immu_t *immu;
3061 3061
3062 3062 ret = iommulib_iommu_dma_allochdl(dip, rdip, attr, waitfp,
3063 3063 arg, dma_handlep);
3064 3064 if (ret == DDI_SUCCESS) {
3065 3065 immu = IMMU_DEVI(rdip)->imd_immu;
3066 3066
3067 3067 ihp = kmem_cache_alloc(immu->immu_hdl_cache,
3068 3068 waitfp == DDI_DMA_SLEEP ? KM_SLEEP : KM_NOSLEEP);
3069 3069 if (ihp == NULL) {
3070 3070 (void) iommulib_iommu_dma_freehdl(dip, rdip,
3071 3071 *dma_handlep);
3072 3072 return (DDI_DMA_NORESOURCES);
3073 3073 }
3074 3074
3075 3075 if (IMMU_DEVI(rdip)->imd_use_premap)
3076 3076 dvma_prealloc(rdip, ihp, attr);
3077 3077 else {
3078 3078 ihp->ihp_npremapped = 0;
3079 3079 ihp->ihp_predvma = 0;
3080 3080 }
3081 3081 ret = iommulib_iommu_dmahdl_setprivate(dip, rdip, *dma_handlep,
3082 3082 ihp);
3083 3083 }
3084 3084 return (ret);
3085 3085 }
3086 3086
3087 3087 /*ARGSUSED*/
3088 3088 static int
3089 3089 immu_freehdl(iommulib_handle_t handle,
3090 3090 dev_info_t *dip, dev_info_t *rdip, ddi_dma_handle_t dma_handle)
3091 3091 {
3092 3092 immu_hdl_priv_t *ihp;
3093 3093
3094 3094 ihp = iommulib_iommu_dmahdl_getprivate(dip, rdip, dma_handle);
3095 3095 if (ihp != NULL) {
3096 3096 if (IMMU_DEVI(rdip)->imd_use_premap)
3097 3097 dvma_prefree(rdip, ihp);
3098 3098 kmem_cache_free(IMMU_DEVI(rdip)->imd_immu->immu_hdl_cache, ihp);
3099 3099 }
3100 3100
3101 3101 return (iommulib_iommu_dma_freehdl(dip, rdip, dma_handle));
3102 3102 }
3103 3103
3104 3104
3105 3105 /*ARGSUSED*/
3106 3106 static int
3107 3107 immu_bindhdl(iommulib_handle_t handle, dev_info_t *dip,
3108 3108 dev_info_t *rdip, ddi_dma_handle_t dma_handle,
3109 3109 struct ddi_dma_req *dma_req, ddi_dma_cookie_t *cookiep,
3110 3110 uint_t *ccountp)
3111 3111 {
3112 3112 int ret;
3113 3113 immu_hdl_priv_t *ihp;
3114 3114
3115 3115 ret = iommulib_iommu_dma_bindhdl(dip, rdip, dma_handle,
3116 3116 dma_req, cookiep, ccountp);
3117 3117
3118 3118 if (ret == DDI_DMA_MAPPED) {
3119 3119 ihp = iommulib_iommu_dmahdl_getprivate(dip, rdip, dma_handle);
3120 3120 immu_flush_wait(IMMU_DEVI(rdip)->imd_immu, &ihp->ihp_inv_wait);
3121 3121 }
3122 3122
3123 3123 return (ret);
3124 3124 }
3125 3125
3126 3126 /*ARGSUSED*/
3127 3127 static int
3128 3128 immu_unbindhdl(iommulib_handle_t handle,
3129 3129 dev_info_t *dip, dev_info_t *rdip, ddi_dma_handle_t dma_handle)
3130 3130 {
3131 3131 return (iommulib_iommu_dma_unbindhdl(dip, rdip, dma_handle));
3132 3132 }
3133 3133
3134 3134 /*ARGSUSED*/
3135 3135 static int
3136 3136 immu_sync(iommulib_handle_t handle, dev_info_t *dip,
3137 3137 dev_info_t *rdip, ddi_dma_handle_t dma_handle, off_t off,
3138 3138 size_t len, uint_t cachefl)
3139 3139 {
3140 3140 return (iommulib_iommu_dma_sync(dip, rdip, dma_handle, off, len,
3141 3141 cachefl));
3142 3142 }
3143 3143
3144 3144 /*ARGSUSED*/
3145 3145 static int
3146 3146 immu_win(iommulib_handle_t handle, dev_info_t *dip,
3147 3147 dev_info_t *rdip, ddi_dma_handle_t dma_handle, uint_t win,
3148 3148 off_t *offp, size_t *lenp, ddi_dma_cookie_t *cookiep,
3149 3149 uint_t *ccountp)
3150 3150 {
3151 3151 return (iommulib_iommu_dma_win(dip, rdip, dma_handle, win, offp,
3152 3152 lenp, cookiep, ccountp));
3153 3153 }
3154 3154
3155 3155 /*ARGSUSED*/
3156 3156 static int
3157 3157 immu_mapobject(iommulib_handle_t handle, dev_info_t *dip,
3158 3158 dev_info_t *rdip, ddi_dma_handle_t dma_handle,
3159 3159 struct ddi_dma_req *dmareq, ddi_dma_obj_t *dmao)
3160 3160 {
3161 3161 immu_hdl_priv_t *ihp;
3162 3162
3163 3163 ihp = iommulib_iommu_dmahdl_getprivate(dip, rdip, dma_handle);
3164 3164
3165 3165 return (immu_map_dvmaseg(rdip, dma_handle, ihp, dmareq, dmao));
3166 3166 }
3167 3167
3168 3168 /*ARGSUSED*/
3169 3169 static int
3170 3170 immu_unmapobject(iommulib_handle_t handle, dev_info_t *dip,
3171 3171 dev_info_t *rdip, ddi_dma_handle_t dma_handle, ddi_dma_obj_t *dmao)
3172 3172 {
3173 3173 immu_hdl_priv_t *ihp;
3174 3174
3175 3175 ihp = iommulib_iommu_dmahdl_getprivate(dip, rdip, dma_handle);
3176 3176 if (ihp->ihp_npremapped > 0)
3177 3177 return (DDI_SUCCESS);
3178 3178 return (immu_unmap_dvmaseg(rdip, dmao));
3179 3179 }
↓ open down ↓ |
3125 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX