1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  24  * Use is subject to license terms.
  25  */
  26 
  27 
  28 #include <sys/mach_mmu.h>
  29 #include <sys/machsystm.h>
  30 #include <sys/cmn_err.h>
  31 #include <sys/promif.h>
  32 #include <sys/hypervisor.h>
  33 #include <sys/bootconf.h>
  34 #include <sys/ontrap.h>
  35 #include <sys/rwlock.h>
  36 #include <sys/sysmacros.h>
  37 #include <vm/seg_kmem.h>
  38 #include <vm/kboot_mmu.h>
  39 #include <vm/hat_pte.h>
  40 #include <vm/hat.h>
  41 #include <vm/htable.h>
  42 #include <vm/hat_i86.h>
  43 
  44 start_info_t *xen_info;
  45 ulong_t mfn_count;
  46 mfn_t *mfn_list;
  47 mfn_t *mfn_list_pages;          /* pages that make a table of mfn's */
  48                                 /* that make up the pa_to_ma table */
  49 mfn_t *mfn_list_pages_page;     /* page of mfn's for mfn_list_pages */
  50 mfn_t cached_max_mfn;
  51 uintptr_t xen_virt_start;
  52 pfn_t *mfn_to_pfn_mapping;
  53 caddr_t xb_addr;                /* virtual addr for the store_mfn page */
  54 
  55 
  56 /*
  57  * We need to prevent migration or suspension of a domU while it's
  58  * manipulating MFN values, as the MFN values will spontaneously
  59  * change. The next 4 routines provide a mechanism for that.
  60  * The basic idea is to use reader/writer mutex, readers are any thread
  61  * that is manipulating MFNs. Only the thread which is going to actually call
  62  * HYPERVISOR_suspend() will become a writer.
  63  *
  64  * Since various places need to manipulate MFNs and also call the HAT,
  65  * we track if a thread acquires reader status and allow it to recursively
  66  * do so again. This prevents deadlocks if a migration request
  67  * is started and waits for some reader, but then the previous reader needs
  68  * to call into the HAT.
  69  */
  70 #define NUM_M2P_LOCKS 128
  71 static struct {
  72         krwlock_t m2p_rwlock;
  73         char m2p_pad[64 - sizeof (krwlock_t)];  /* 64 byte cache line size */
  74 } m2p_lock[NUM_M2P_LOCKS];
  75 
  76 #define XM2P_HASH       ((uintptr_t)curthread->t_tid & (NUM_M2P_LOCKS - 1))
  77 
  78 void
  79 xen_block_migrate(void)
  80 {
  81         if (!DOMAIN_IS_INITDOMAIN(xen_info) &&
  82             ++curthread->t_xpvcntr == 1)
  83                 rw_enter(&m2p_lock[XM2P_HASH].m2p_rwlock, RW_READER);
  84 }
  85 
  86 void
  87 xen_allow_migrate(void)
  88 {
  89         if (!DOMAIN_IS_INITDOMAIN(xen_info) &&
  90             --curthread->t_xpvcntr == 0)
  91                 rw_exit(&m2p_lock[XM2P_HASH].m2p_rwlock);
  92 }
  93 
  94 void
  95 xen_start_migrate(void)
  96 {
  97         int i;
  98 
  99         ASSERT(curthread->t_xpvcntr == 0);
 100         ++curthread->t_xpvcntr; /* this allows calls into HAT */
 101         for (i = 0; i < NUM_M2P_LOCKS; ++i)
 102                 rw_enter(&m2p_lock[i].m2p_rwlock, RW_WRITER);
 103 }
 104 
 105 void
 106 xen_end_migrate(void)
 107 {
 108         int i;
 109 
 110         for (i = 0; i < NUM_M2P_LOCKS; ++i)
 111                 rw_exit(&m2p_lock[i].m2p_rwlock);
 112         ASSERT(curthread->t_xpvcntr == 1);
 113         --curthread->t_xpvcntr;
 114 }
 115 
 116 /*ARGSUSED*/
 117 void
 118 set_pteval(paddr_t table, uint_t index, uint_t level, x86pte_t pteval)
 119 {
 120         mmu_update_t t;
 121         maddr_t mtable = pa_to_ma(table);
 122         int retcnt;
 123 
 124         t.ptr = (mtable + index * pte_size) | MMU_NORMAL_PT_UPDATE;
 125         t.val = pteval;
 126         if (HYPERVISOR_mmu_update(&t, 1, &retcnt, DOMID_SELF) || retcnt != 1)
 127                 bop_panic("HYPERVISOR_mmu_update() failed");
 128 }
 129 
 130 /*
 131  * The start_info_t and mfn_list are initially mapped in low "boot" memory.
 132  * Each has a page aligned address and size. We relocate them up into the
 133  * kernel's normal address space at this point in time. We also create
 134  * the arrays that let the hypervisor suspend/resume a domain.
 135  */
 136 void
 137 xen_relocate_start_info(void)
 138 {
 139         maddr_t mach_addr;
 140         size_t sz;
 141         size_t sz2;
 142         offset_t off;
 143         uintptr_t addr;
 144         uintptr_t old;
 145         int i, j;
 146 
 147         /*
 148          * In dom0, we have to account for the console_info structure
 149          * which might immediately follow the start_info in memory.
 150          */
 151         sz = sizeof (start_info_t);
 152         if (DOMAIN_IS_INITDOMAIN(xen_info) &&
 153             xen_info->console.dom0.info_off >= sizeof (start_info_t)) {
 154                 sz += xen_info->console.dom0.info_off - sizeof (start_info_t) +
 155                     xen_info->console.dom0.info_size;
 156         }
 157         sz = P2ROUNDUP(sz, MMU_PAGESIZE);
 158         addr = (uintptr_t)vmem_alloc(heap_arena, sz, VM_SLEEP);
 159         for (off = 0; off < sz; off += MMU_PAGESIZE) {
 160                 mach_addr = pa_to_ma(pfn_to_pa(va_to_pfn(
 161                     (caddr_t)xen_info + off)));
 162                 kbm_map_ma(mach_addr + off, addr + off, 0);
 163         }
 164         boot_mapin((caddr_t)addr, sz);
 165         old = (uintptr_t)xen_info;
 166         xen_info = (start_info_t *)addr;
 167         for (off = 0; off < sz; off += MMU_PAGESIZE)
 168                 kbm_unmap(old + off);
 169 
 170         /*
 171          * Relocate the mfn_list, any number of pages.
 172          */
 173         sz = P2ROUNDUP(mfn_count * sizeof (mfn_t), MMU_PAGESIZE);
 174         addr = (uintptr_t)vmem_xalloc(heap_arena, sz, MMU_PAGESIZE, 0,
 175             0, 0, 0, VM_SLEEP);
 176         for (off = 0; off < sz; off += MMU_PAGESIZE) {
 177                 mach_addr =
 178                     pa_to_ma(pfn_to_pa(va_to_pfn((caddr_t)mfn_list + off)));
 179                 kbm_map_ma(mach_addr, addr + off, 0);
 180         }
 181         boot_mapin((caddr_t)addr, sz);
 182         old = (uintptr_t)mfn_list;
 183         mfn_list = (mfn_t *)addr;
 184         xen_info->mfn_list = (mfn_t)addr;
 185         for (off = 0; off < sz; off += MMU_PAGESIZE)
 186                 kbm_unmap(old + off);
 187 
 188         /*
 189          * Create the lists of mfn_list pages needed by suspend/resume.
 190          * Note we skip this for domain 0 as it can't suspend/resume.
 191          */
 192         if (!DOMAIN_IS_INITDOMAIN(xen_info)) {
 193                 sz2 = P2ROUNDUP(mmu_btop(sz) * sizeof (mfn_t), MMU_PAGESIZE);
 194                 mfn_list_pages = kmem_zalloc(sz2, VM_SLEEP);
 195                 mfn_list_pages_page = kmem_zalloc(MMU_PAGESIZE, VM_SLEEP);
 196                 i = 0;
 197                 for (off = 0; off < sz; off += MMU_PAGESIZE) {
 198                         j = mmu_btop(off);
 199                         if (((j * sizeof (mfn_t)) & MMU_PAGEOFFSET) == 0) {
 200                                 mfn_list_pages_page[i++] =
 201                                     pfn_to_mfn(va_to_pfn(&mfn_list_pages[j]));
 202                         }
 203                         mfn_list_pages[j] =
 204                             pfn_to_mfn(va_to_pfn((caddr_t)mfn_list + off));
 205                 }
 206                 HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list =
 207                     pfn_to_mfn(va_to_pfn(mfn_list_pages_page));
 208                 HYPERVISOR_shared_info->arch.max_pfn = xen_info->nr_pages;
 209         }
 210 
 211         /*
 212          * Remap the shared info (for I/O) into high memory, too.
 213          */
 214         sz = MMU_PAGESIZE;
 215         addr = (uintptr_t)vmem_alloc(heap_arena, sz, VM_SLEEP);
 216         kbm_map_ma(xen_info->shared_info, addr, 0);
 217         /* shared info has no PFN so don't do: boot_mapin((caddr_t)addr, sz) */
 218         old = (uintptr_t)HYPERVISOR_shared_info;
 219         HYPERVISOR_shared_info = (void *)addr;
 220         kbm_unmap(old);
 221 
 222         /*
 223          * Remap the console info into high memory, too.
 224          */
 225         if (!DOMAIN_IS_INITDOMAIN(xen_info)) {
 226                 sz = MMU_PAGESIZE;
 227                 addr = (uintptr_t)vmem_alloc(heap_arena, sz, VM_SLEEP);
 228                 kbm_map_ma(pfn_to_pa(xen_info->console.domU.mfn), addr, 0);
 229                 boot_mapin((caddr_t)addr, sz);
 230                 old = (uintptr_t)HYPERVISOR_console_page;
 231                 HYPERVISOR_console_page = (void *)addr;
 232                 kbm_unmap(old);
 233         } else {
 234                 HYPERVISOR_console_page = NULL;
 235         }
 236 
 237         /*
 238          * On domUs we need to have the xenbus page (store_mfn) mapped into
 239          * the kernel. This is referenced as xb_addr.
 240          */
 241         if (!DOMAIN_IS_INITDOMAIN(xen_info)) {
 242                 xb_addr = vmem_alloc(heap_arena, MMU_PAGESIZE, VM_SLEEP);
 243                 kbm_map_ma(mfn_to_ma(xen_info->store_mfn),
 244                     (uintptr_t)xb_addr, 0);
 245                 boot_mapin(xb_addr, MMU_PAGESIZE);
 246         }
 247 }
 248 
 249 /*
 250  * Generate the pfn value to use for a foreign mfn.
 251  */
 252 pfn_t
 253 xen_assign_pfn(mfn_t mfn)
 254 {
 255         pfn_t pfn;
 256 
 257 #ifdef DEBUG
 258         /*
 259          * make sure this MFN isn't in our list of MFNs
 260          */
 261         on_trap_data_t otd;
 262         uint_t  on_trap_ready = (t0.t_stk != NULL);
 263 
 264         if (on_trap_ready) {
 265                 if (on_trap(&otd, OT_DATA_ACCESS) == 0) {
 266                         pfn = mfn_to_pfn_mapping[mfn];
 267                         if (pfn < mfn_count && mfn_list[pfn] == mfn)
 268                                 panic("xen_assign_pfn() mfn belongs to us");
 269                 }
 270                 no_trap();
 271         }
 272 #endif /* DEBUG */
 273 
 274         if (mfn == MFN_INVALID)
 275                 panic("xen_assign_pfn(MFN_INVALID) not allowed");
 276         pfn = (pfn_t)mfn | PFN_IS_FOREIGN_MFN;
 277         if (pfn == mfn)
 278                 panic("xen_assign_pfn(mfn) PFN_IS_FOREIGN_MFN bit already set");
 279         return (pfn);
 280 }
 281 
 282 void
 283 xen_release_pfn(pfn_t pfn)
 284 {
 285         if (pfn == PFN_INVALID)
 286                 panic("xen_release_pfn(PFN_INVALID) not allowed");
 287         if ((pfn & PFN_IS_FOREIGN_MFN) == 0)
 288                 panic("mfn high bit not set");
 289 }
 290 
 291 uint_t
 292 pfn_is_foreign(pfn_t pfn)
 293 {
 294         if (pfn == PFN_INVALID)
 295                 return (0);
 296         return ((pfn & PFN_IS_FOREIGN_MFN) != 0);
 297 }
 298 
 299 pfn_t
 300 pte2pfn(x86pte_t pte, level_t l)
 301 {
 302         mfn_t mfn = PTE2MFN(pte, l);
 303 
 304         if ((pte & PT_SOFTWARE) >= PT_FOREIGN)
 305                 return ((pfn_t)mfn | PFN_IS_FOREIGN_MFN);
 306         return (mfn_to_pfn(mfn));
 307 }
 308 
 309 mfn_t
 310 pfn_to_mfn(pfn_t pfn)
 311 {
 312         if (pfn == PFN_INVALID)
 313                 panic("pfn_to_mfn(PFN_INVALID) not allowed");
 314 
 315         if (pfn & PFN_IS_FOREIGN_MFN)
 316                 return (pfn & ~PFN_IS_FOREIGN_MFN);
 317 
 318         if (pfn >= mfn_count)
 319                 panic("pfn_to_mfn(): illegal PFN 0x%lx", pfn);
 320 
 321         return (mfn_list[pfn]);
 322 }
 323 
 324 /*
 325  * This routine translates an MFN back into the corresponding PFN value.
 326  * It has to be careful since the mfn_to_pfn_mapping[] might fault
 327  * as that table is sparse. It also has to check for non-faulting, but out of
 328  * range that exceed the table.
 329  */
 330 pfn_t
 331 mfn_to_pfn(mfn_t mfn)
 332 {
 333         pfn_t pfn;
 334         on_trap_data_t otd;
 335         uint_t  on_trap_ready = (t0.t_stk != NULL);
 336 
 337         /*
 338          * Cleared at a suspend or migrate
 339          */
 340         if (cached_max_mfn == 0)
 341                 cached_max_mfn =
 342                     HYPERVISOR_memory_op(XENMEM_maximum_ram_page, NULL);
 343 
 344         if (cached_max_mfn < mfn)
 345                 return ((pfn_t)mfn | PFN_IS_FOREIGN_MFN);
 346 
 347         if (on_trap_ready && on_trap(&otd, OT_DATA_ACCESS)) {
 348                 pfn = (pfn_t)mfn | PFN_IS_FOREIGN_MFN;
 349         } else {
 350                 pfn = mfn_to_pfn_mapping[mfn];
 351 
 352                 if (pfn == PFN_INVALID || pfn >= mfn_count ||
 353                     pfn_to_mfn(pfn) != mfn)
 354                         pfn = (pfn_t)mfn | PFN_IS_FOREIGN_MFN;
 355         }
 356 
 357         if (on_trap_ready)
 358                 no_trap();
 359 
 360         /*
 361          * If khat_running is set then we should be checking
 362          * in domUs that migration is blocked while using the
 363          * mfn_to_pfn_mapping[] table.
 364          */
 365         ASSERT(!khat_running || DOMAIN_IS_INITDOMAIN(xen_info) ||
 366             rw_read_held(&m2p_lock[XM2P_HASH].m2p_rwlock));
 367 
 368         return (pfn);
 369 }
 370 
 371 /*
 372  * From a pseudo-physical address, find the corresponding machine address.
 373  */
 374 maddr_t
 375 pa_to_ma(paddr_t pa)
 376 {
 377         mfn_t mfn = pfn_to_mfn(mmu_btop(pa));
 378 
 379         if (mfn == MFN_INVALID)
 380                 panic("pa_to_ma() got MFN_INVALID");
 381         return (mfn_to_ma(mfn) + (pa & MMU_PAGEOFFSET));
 382 }
 383 
 384 /*
 385  * From a machine address, find the corresponding pseudo-physical address.
 386  */
 387 paddr_t
 388 ma_to_pa(maddr_t ma)
 389 {
 390         pfn_t pfn = mfn_to_pfn(mmu_btop(ma));
 391 
 392         if (pfn == PFN_INVALID)
 393                 panic("ma_to_pa() got PFN_INVALID");
 394         return (pfn_to_pa(pfn) + (ma & MMU_PAGEOFFSET));
 395 }
 396 
 397 /*
 398  * When calling reassign_pfn(), the page must be (at least) read locked
 399  * to make sure swrand does not try to grab it.
 400  */
 401 #ifdef DEBUG
 402 #define CHECK_PAGE_LOCK(pfn)    {                       \
 403         page_t *pp = page_numtopp_nolock(pfn);          \
 404         if ((pp != NULL) && (!PAGE_LOCKED(pp))) {       \
 405                 panic("reassign_pfn() called with unlocked page (pfn 0x%lx)", \
 406                     pfn);                               \
 407         }                                               \
 408 }
 409 #else   /* DEBUG */
 410 #define CHECK_PAGE_LOCK(pfn)
 411 #endif  /* DEBUG */
 412 
 413 /*
 414  * Reassign a new machine page to back a physical address.
 415  */
 416 void
 417 reassign_pfn(pfn_t pfn, mfn_t mfn)
 418 {
 419         int mmu_update_return;
 420         mmu_update_t t;
 421         extern void update_contig_pfnlist(pfn_t, mfn_t, mfn_t);
 422 
 423         ASSERT(pfn != PFN_INVALID);
 424         ASSERT(!pfn_is_foreign(pfn));
 425 
 426         ASSERT(pfn < mfn_count);
 427         update_contig_pfnlist(pfn, mfn_list[pfn], mfn);
 428         if (mfn == MFN_INVALID) {
 429                 CHECK_PAGE_LOCK(pfn);
 430                 if (kpm_vbase != NULL && xen_kpm_page(pfn, 0) < 0)
 431                         panic("reassign_pfn(): failed to remove kpm mapping");
 432                 mfn_list[pfn] = mfn;
 433                 return;
 434         }
 435 
 436         /*
 437          * Verify that previously given away pages are still page locked.
 438          */
 439         if (mfn_list[pfn] == MFN_INVALID) {
 440                 CHECK_PAGE_LOCK(pfn);
 441         }
 442         mfn_list[pfn] = mfn;
 443 
 444         t.ptr = mfn_to_ma(mfn) | MMU_MACHPHYS_UPDATE;
 445         t.val = pfn;
 446 
 447         if (HYPERVISOR_mmu_update(&t, 1, &mmu_update_return, DOMID_SELF))
 448                 panic("HYPERVISOR_mmu_update() failed");
 449         ASSERT(mmu_update_return == 1);
 450 
 451         if (kpm_vbase != NULL && xen_kpm_page(pfn, PT_VALID | PT_WRITABLE) < 0)
 452                 panic("reassign_pfn(): failed to enable kpm mapping");
 453 }