1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 1993, 2010, Oracle and/or its affiliates. All rights reserved.
  23  * Copyright (c) 2017, Joyent, Inc.  All rights reserved.
  24  */
  25 
  26 #include <sys/types.h>
  27 #include <sys/kstat.h>
  28 #include <sys/param.h>
  29 #include <sys/stack.h>
  30 #include <sys/regset.h>
  31 #include <sys/thread.h>
  32 #include <sys/proc.h>
  33 #include <sys/procfs_isa.h>
  34 #include <sys/kmem.h>
  35 #include <sys/cpuvar.h>
  36 #include <sys/systm.h>
  37 #include <sys/machpcb.h>
  38 #include <sys/machasi.h>
  39 #include <sys/vis.h>
  40 #include <sys/fpu/fpusystm.h>
  41 #include <sys/cpu_module.h>
  42 #include <sys/privregs.h>
  43 #include <sys/archsystm.h>
  44 #include <sys/atomic.h>
  45 #include <sys/cmn_err.h>
  46 #include <sys/time.h>
  47 #include <sys/clock.h>
  48 #include <sys/cmp.h>
  49 #include <sys/platform_module.h>
  50 #include <sys/bl.h>
  51 #include <sys/nvpair.h>
  52 #include <sys/kdi_impl.h>
  53 #include <sys/machsystm.h>
  54 #include <sys/sysmacros.h>
  55 #include <sys/promif.h>
  56 #include <sys/pool_pset.h>
  57 #include <sys/mem.h>
  58 #include <sys/dumphdr.h>
  59 #include <vm/seg_kmem.h>
  60 #include <sys/hold_page.h>
  61 #include <sys/cpu.h>
  62 #include <sys/ivintr.h>
  63 #include <sys/clock_impl.h>
  64 #include <sys/machclock.h>
  65 
  66 int maxphys = MMU_PAGESIZE * 16;        /* 128k */
  67 int klustsize = MMU_PAGESIZE * 16;      /* 128k */
  68 
  69 /*
  70  * Initialize kernel thread's stack.
  71  */
  72 caddr_t
  73 thread_stk_init(caddr_t stk)
  74 {
  75         kfpu_t *fp;
  76         ulong_t align;
  77 
  78         /* allocate extra space for floating point state */
  79         stk -= SA(sizeof (kfpu_t) + GSR_SIZE);
  80         align = (uintptr_t)stk & 0x3f;
  81         stk -= align;           /* force v9_fpu to be 16 byte aligned */
  82         fp = (kfpu_t *)stk;
  83         fp->fpu_fprs = 0;
  84 
  85         stk -= SA(MINFRAME);
  86         return (stk);
  87 }
  88 
  89 #define WIN32_SIZE      (MAXWIN * sizeof (struct rwindow32))
  90 #define WIN64_SIZE      (MAXWIN * sizeof (struct rwindow64))
  91 
  92 kmem_cache_t    *wbuf32_cache;
  93 kmem_cache_t    *wbuf64_cache;
  94 
  95 void
  96 lwp_stk_cache_init(void)
  97 {
  98         /*
  99          * Window buffers are allocated from the static arena
 100          * because they are accessed at TL>0. We also must use
 101          * KMC_NOHASH to prevent them from straddling page
 102          * boundaries as they are accessed by physical address.
 103          */
 104         wbuf32_cache = kmem_cache_create("wbuf32_cache", WIN32_SIZE,
 105             0, NULL, NULL, NULL, NULL, static_arena, KMC_NOHASH);
 106         wbuf64_cache = kmem_cache_create("wbuf64_cache", WIN64_SIZE,
 107             0, NULL, NULL, NULL, NULL, static_arena, KMC_NOHASH);
 108 }
 109 
 110 /*
 111  * Initialize lwp's kernel stack.
 112  * Note that now that the floating point register save area (kfpu_t)
 113  * has been broken out from machpcb and aligned on a 64 byte boundary so that
 114  * we can do block load/stores to/from it, there are a couple of potential
 115  * optimizations to save stack space. 1. The floating point register save
 116  * area could be aligned on a 16 byte boundary, and the floating point code
 117  * changed to (a) check the alignment and (b) use different save/restore
 118  * macros depending upon the alignment. 2. The lwp_stk_init code below
 119  * could be changed to calculate if less space would be wasted if machpcb
 120  * was first instead of second. However there is a REGOFF macro used in
 121  * locore, syscall_trap, machdep and mlsetup that assumes that the saved
 122  * register area is a fixed distance from the %sp, and would have to be
 123  * changed to a pointer or something...JJ said later.
 124  */
 125 caddr_t
 126 lwp_stk_init(klwp_t *lwp, caddr_t stk)
 127 {
 128         struct machpcb *mpcb;
 129         kfpu_t *fp;
 130         uintptr_t aln;
 131 
 132         stk -= SA(sizeof (kfpu_t) + GSR_SIZE);
 133         aln = (uintptr_t)stk & 0x3F;
 134         stk -= aln;
 135         fp = (kfpu_t *)stk;
 136         stk -= SA(sizeof (struct machpcb));
 137         mpcb = (struct machpcb *)stk;
 138         bzero(mpcb, sizeof (struct machpcb));
 139         bzero(fp, sizeof (kfpu_t) + GSR_SIZE);
 140         lwp->lwp_regs = (void *)&mpcb->mpcb_regs;
 141         lwp->lwp_fpu = (void *)fp;
 142         mpcb->mpcb_fpu = fp;
 143         mpcb->mpcb_fpu->fpu_q = mpcb->mpcb_fpu_q;
 144         mpcb->mpcb_thread = lwp->lwp_thread;
 145         mpcb->mpcb_wbcnt = 0;
 146         if (lwp->lwp_procp->p_model == DATAMODEL_ILP32) {
 147                 mpcb->mpcb_wstate = WSTATE_USER32;
 148                 mpcb->mpcb_wbuf = kmem_cache_alloc(wbuf32_cache, KM_SLEEP);
 149         } else {
 150                 mpcb->mpcb_wstate = WSTATE_USER64;
 151                 mpcb->mpcb_wbuf = kmem_cache_alloc(wbuf64_cache, KM_SLEEP);
 152         }
 153         ASSERT(((uintptr_t)mpcb->mpcb_wbuf & 7) == 0);
 154         mpcb->mpcb_wbuf_pa = va_to_pa(mpcb->mpcb_wbuf);
 155         mpcb->mpcb_pa = va_to_pa(mpcb);
 156         return (stk);
 157 }
 158 
 159 void
 160 lwp_stk_fini(klwp_t *lwp)
 161 {
 162         struct machpcb *mpcb = lwptompcb(lwp);
 163 
 164         /*
 165          * there might be windows still in the wbuf due to unmapped
 166          * stack, misaligned stack pointer, etc.  We just free it.
 167          */
 168         mpcb->mpcb_wbcnt = 0;
 169         if (mpcb->mpcb_wstate == WSTATE_USER32)
 170                 kmem_cache_free(wbuf32_cache, mpcb->mpcb_wbuf);
 171         else
 172                 kmem_cache_free(wbuf64_cache, mpcb->mpcb_wbuf);
 173         mpcb->mpcb_wbuf = NULL;
 174         mpcb->mpcb_wbuf_pa = -1;
 175 }
 176 
 177 /*ARGSUSED*/
 178 void
 179 lwp_fp_init(klwp_t *lwp)
 180 {
 181 }
 182 
 183 /*
 184  * Copy regs from parent to child.
 185  */
 186 void
 187 lwp_forkregs(klwp_t *lwp, klwp_t *clwp)
 188 {
 189         kthread_t *t, *pt = lwptot(lwp);
 190         struct machpcb *mpcb = lwptompcb(clwp);
 191         struct machpcb *pmpcb = lwptompcb(lwp);
 192         kfpu_t *fp, *pfp = lwptofpu(lwp);
 193         caddr_t wbuf;
 194         uint_t wstate;
 195 
 196         t = mpcb->mpcb_thread;
 197         /*
 198          * remember child's fp and wbuf since they will get erased during
 199          * the bcopy.
 200          */
 201         fp = mpcb->mpcb_fpu;
 202         wbuf = mpcb->mpcb_wbuf;
 203         wstate = mpcb->mpcb_wstate;
 204         /*
 205          * Don't copy mpcb_frame since we hand-crafted it
 206          * in thread_load().
 207          */
 208         bcopy(lwp->lwp_regs, clwp->lwp_regs, sizeof (struct machpcb) - REGOFF);
 209         mpcb->mpcb_thread = t;
 210         mpcb->mpcb_fpu = fp;
 211         fp->fpu_q = mpcb->mpcb_fpu_q;
 212 
 213         /*
 214          * It is theoretically possibly for the lwp's wstate to
 215          * be different from its value assigned in lwp_stk_init,
 216          * since lwp_stk_init assumed the data model of the process.
 217          * Here, we took on the data model of the cloned lwp.
 218          */
 219         if (mpcb->mpcb_wstate != wstate) {
 220                 if (wstate == WSTATE_USER32) {
 221                         kmem_cache_free(wbuf32_cache, wbuf);
 222                         wbuf = kmem_cache_alloc(wbuf64_cache, KM_SLEEP);
 223                         wstate = WSTATE_USER64;
 224                 } else {
 225                         kmem_cache_free(wbuf64_cache, wbuf);
 226                         wbuf = kmem_cache_alloc(wbuf32_cache, KM_SLEEP);
 227                         wstate = WSTATE_USER32;
 228                 }
 229         }
 230 
 231         mpcb->mpcb_pa = va_to_pa(mpcb);
 232         mpcb->mpcb_wbuf = wbuf;
 233         mpcb->mpcb_wbuf_pa = va_to_pa(wbuf);
 234 
 235         ASSERT(mpcb->mpcb_wstate == wstate);
 236 
 237         if (mpcb->mpcb_wbcnt != 0) {
 238                 bcopy(pmpcb->mpcb_wbuf, mpcb->mpcb_wbuf,
 239                     mpcb->mpcb_wbcnt * ((mpcb->mpcb_wstate == WSTATE_USER32) ?
 240                     sizeof (struct rwindow32) : sizeof (struct rwindow64)));
 241         }
 242 
 243         if (pt == curthread)
 244                 pfp->fpu_fprs = _fp_read_fprs();
 245         if ((pfp->fpu_en) || (pfp->fpu_fprs & FPRS_FEF)) {
 246                 if (pt == curthread && fpu_exists) {
 247                         save_gsr(clwp->lwp_fpu);
 248                 } else {
 249                         uint64_t gsr;
 250                         gsr = get_gsr(lwp->lwp_fpu);
 251                         set_gsr(gsr, clwp->lwp_fpu);
 252                 }
 253                 fp_fork(lwp, clwp);
 254         }
 255 }
 256 
 257 /*
 258  * Free lwp fpu regs.
 259  */
 260 void
 261 lwp_freeregs(klwp_t *lwp, int isexec)
 262 {
 263         kfpu_t *fp = lwptofpu(lwp);
 264 
 265         if (lwptot(lwp) == curthread)
 266                 fp->fpu_fprs = _fp_read_fprs();
 267         if ((fp->fpu_en) || (fp->fpu_fprs & FPRS_FEF))
 268                 fp_free(fp, isexec);
 269 }
 270 
 271 /*
 272  * These function are currently unused on sparc.
 273  */
 274 /*ARGSUSED*/
 275 void
 276 lwp_attach_brand_hdlrs(klwp_t *lwp)
 277 {}
 278 
 279 /*ARGSUSED*/
 280 void
 281 lwp_detach_brand_hdlrs(klwp_t *lwp)
 282 {}
 283 
 284 /*
 285  * fill in the extra register state area specified with the
 286  * specified lwp's platform-dependent non-floating-point extra
 287  * register state information
 288  */
 289 /* ARGSUSED */
 290 void
 291 xregs_getgfiller(klwp_id_t lwp, caddr_t xrp)
 292 {
 293         /* for sun4u nothing to do here, added for symmetry */
 294 }
 295 
 296 /*
 297  * fill in the extra register state area specified with the specified lwp's
 298  * platform-dependent floating-point extra register state information.
 299  * NOTE:  'lwp' might not correspond to 'curthread' since this is
 300  * called from code in /proc to get the registers of another lwp.
 301  */
 302 void
 303 xregs_getfpfiller(klwp_id_t lwp, caddr_t xrp)
 304 {
 305         prxregset_t *xregs = (prxregset_t *)xrp;
 306         kfpu_t *fp = lwptofpu(lwp);
 307         uint32_t fprs = (FPRS_FEF|FPRS_DU|FPRS_DL);
 308         uint64_t gsr;
 309 
 310         /*
 311          * fp_fksave() does not flush the GSR register into
 312          * the lwp area, so do it now
 313          */
 314         kpreempt_disable();
 315         if (ttolwp(curthread) == lwp && fpu_exists) {
 316                 fp->fpu_fprs = _fp_read_fprs();
 317                 if ((fp->fpu_fprs & FPRS_FEF) != FPRS_FEF) {
 318                         _fp_write_fprs(fprs);
 319                         fp->fpu_fprs = (V9_FPU_FPRS_TYPE)fprs;
 320                 }
 321                 save_gsr(fp);
 322         }
 323         gsr = get_gsr(fp);
 324         kpreempt_enable();
 325         PRXREG_GSR(xregs) = gsr;
 326 }
 327 
 328 /*
 329  * set the specified lwp's platform-dependent non-floating-point
 330  * extra register state based on the specified input
 331  */
 332 /* ARGSUSED */
 333 void
 334 xregs_setgfiller(klwp_id_t lwp, caddr_t xrp)
 335 {
 336         /* for sun4u nothing to do here, added for symmetry */
 337 }
 338 
 339 /*
 340  * set the specified lwp's platform-dependent floating-point
 341  * extra register state based on the specified input
 342  */
 343 void
 344 xregs_setfpfiller(klwp_id_t lwp, caddr_t xrp)
 345 {
 346         prxregset_t *xregs = (prxregset_t *)xrp;
 347         kfpu_t *fp = lwptofpu(lwp);
 348         uint32_t fprs = (FPRS_FEF|FPRS_DU|FPRS_DL);
 349         uint64_t gsr = PRXREG_GSR(xregs);
 350 
 351         kpreempt_disable();
 352         set_gsr(gsr, lwptofpu(lwp));
 353 
 354         if ((lwp == ttolwp(curthread)) && fpu_exists) {
 355                 fp->fpu_fprs = _fp_read_fprs();
 356                 if ((fp->fpu_fprs & FPRS_FEF) != FPRS_FEF) {
 357                         _fp_write_fprs(fprs);
 358                         fp->fpu_fprs = (V9_FPU_FPRS_TYPE)fprs;
 359                 }
 360                 restore_gsr(lwptofpu(lwp));
 361         }
 362         kpreempt_enable();
 363 }
 364 
 365 /*
 366  * fill in the sun4u asrs, ie, the lwp's platform-dependent
 367  * non-floating-point extra register state information
 368  */
 369 /* ARGSUSED */
 370 void
 371 getasrs(klwp_t *lwp, asrset_t asr)
 372 {
 373         /* for sun4u nothing to do here, added for symmetry */
 374 }
 375 
 376 /*
 377  * fill in the sun4u asrs, ie, the lwp's platform-dependent
 378  * floating-point extra register state information
 379  */
 380 void
 381 getfpasrs(klwp_t *lwp, asrset_t asr)
 382 {
 383         kfpu_t *fp = lwptofpu(lwp);
 384         uint32_t fprs = (FPRS_FEF|FPRS_DU|FPRS_DL);
 385 
 386         kpreempt_disable();
 387         if (ttolwp(curthread) == lwp)
 388                 fp->fpu_fprs = _fp_read_fprs();
 389         if ((fp->fpu_en) || (fp->fpu_fprs & FPRS_FEF)) {
 390                 if (fpu_exists && ttolwp(curthread) == lwp) {
 391                         if ((fp->fpu_fprs & FPRS_FEF) != FPRS_FEF) {
 392                                 _fp_write_fprs(fprs);
 393                                 fp->fpu_fprs = (V9_FPU_FPRS_TYPE)fprs;
 394                         }
 395                         save_gsr(fp);
 396                 }
 397                 asr[ASR_GSR] = (int64_t)get_gsr(fp);
 398         }
 399         kpreempt_enable();
 400 }
 401 
 402 /*
 403  * set the sun4u asrs, ie, the lwp's platform-dependent
 404  * non-floating-point extra register state information
 405  */
 406 /* ARGSUSED */
 407 void
 408 setasrs(klwp_t *lwp, asrset_t asr)
 409 {
 410         /* for sun4u nothing to do here, added for symmetry */
 411 }
 412 
 413 void
 414 setfpasrs(klwp_t *lwp, asrset_t asr)
 415 {
 416         kfpu_t *fp = lwptofpu(lwp);
 417         uint32_t fprs = (FPRS_FEF|FPRS_DU|FPRS_DL);
 418 
 419         kpreempt_disable();
 420         if (ttolwp(curthread) == lwp)
 421                 fp->fpu_fprs = _fp_read_fprs();
 422         if ((fp->fpu_en) || (fp->fpu_fprs & FPRS_FEF)) {
 423                 set_gsr(asr[ASR_GSR], fp);
 424                 if (fpu_exists && ttolwp(curthread) == lwp) {
 425                         if ((fp->fpu_fprs & FPRS_FEF) != FPRS_FEF) {
 426                                 _fp_write_fprs(fprs);
 427                                 fp->fpu_fprs = (V9_FPU_FPRS_TYPE)fprs;
 428                         }
 429                         restore_gsr(fp);
 430                 }
 431         }
 432         kpreempt_enable();
 433 }
 434 
 435 /*
 436  * Create interrupt kstats for this CPU.
 437  */
 438 void
 439 cpu_create_intrstat(cpu_t *cp)
 440 {
 441         int             i;
 442         kstat_t         *intr_ksp;
 443         kstat_named_t   *knp;
 444         char            name[KSTAT_STRLEN];
 445         zoneid_t        zoneid;
 446 
 447         ASSERT(MUTEX_HELD(&cpu_lock));
 448 
 449         if (pool_pset_enabled())
 450                 zoneid = GLOBAL_ZONEID;
 451         else
 452                 zoneid = ALL_ZONES;
 453 
 454         intr_ksp = kstat_create_zone("cpu", cp->cpu_id, "intrstat", "misc",
 455             KSTAT_TYPE_NAMED, PIL_MAX * 2, NULL, zoneid);
 456 
 457         /*
 458          * Initialize each PIL's named kstat
 459          */
 460         if (intr_ksp != NULL) {
 461                 intr_ksp->ks_update = cpu_kstat_intrstat_update;
 462                 knp = (kstat_named_t *)intr_ksp->ks_data;
 463                 intr_ksp->ks_private = cp;
 464                 for (i = 0; i < PIL_MAX; i++) {
 465                         (void) snprintf(name, KSTAT_STRLEN, "level-%d-time",
 466                             i + 1);
 467                         kstat_named_init(&knp[i * 2], name, KSTAT_DATA_UINT64);
 468                         (void) snprintf(name, KSTAT_STRLEN, "level-%d-count",
 469                             i + 1);
 470                         kstat_named_init(&knp[(i * 2) + 1], name,
 471                             KSTAT_DATA_UINT64);
 472                 }
 473                 kstat_install(intr_ksp);
 474         }
 475 }
 476 
 477 /*
 478  * Delete interrupt kstats for this CPU.
 479  */
 480 void
 481 cpu_delete_intrstat(cpu_t *cp)
 482 {
 483         kstat_delete_byname_zone("cpu", cp->cpu_id, "intrstat", ALL_ZONES);
 484 }
 485 
 486 /*
 487  * Convert interrupt statistics from CPU ticks to nanoseconds and
 488  * update kstat.
 489  */
 490 int
 491 cpu_kstat_intrstat_update(kstat_t *ksp, int rw)
 492 {
 493         kstat_named_t   *knp = ksp->ks_data;
 494         cpu_t           *cpup = (cpu_t *)ksp->ks_private;
 495         int             i;
 496 
 497         if (rw == KSTAT_WRITE)
 498                 return (EACCES);
 499 
 500         /*
 501          * We use separate passes to copy and convert the statistics to
 502          * nanoseconds. This assures that the snapshot of the data is as
 503          * self-consistent as possible.
 504          */
 505 
 506         for (i = 0; i < PIL_MAX; i++) {
 507                 knp[i * 2].value.ui64 = cpup->cpu_m.intrstat[i + 1][0];
 508                 knp[(i * 2) + 1].value.ui64 = cpup->cpu_stats.sys.intr[i];
 509         }
 510 
 511         for (i = 0; i < PIL_MAX; i++) {
 512                 knp[i * 2].value.ui64 =
 513                     (uint64_t)tick2ns((hrtime_t)knp[i * 2].value.ui64,
 514                     cpup->cpu_id);
 515         }
 516 
 517         return (0);
 518 }
 519 
 520 /*
 521  * Called by common/os/cpu.c for psrinfo(1m) kstats
 522  */
 523 char *
 524 cpu_fru_fmri(cpu_t *cp)
 525 {
 526         return (cpunodes[cp->cpu_id].fru_fmri);
 527 }
 528 
 529 /*
 530  * An interrupt thread is ending a time slice, so compute the interval it
 531  * ran for and update the statistic for its PIL.
 532  */
 533 void
 534 cpu_intr_swtch_enter(kthread_id_t t)
 535 {
 536         uint64_t        interval;
 537         uint64_t        start;
 538         cpu_t           *cpu;
 539 
 540         ASSERT((t->t_flag & T_INTR_THREAD) != 0);
 541         ASSERT(t->t_pil > 0 && t->t_pil <= LOCK_LEVEL);
 542 
 543         /*
 544          * We could be here with a zero timestamp. This could happen if:
 545          * an interrupt thread which no longer has a pinned thread underneath
 546          * it (i.e. it blocked at some point in its past) has finished running
 547          * its handler. intr_thread() updated the interrupt statistic for its
 548          * PIL and zeroed its timestamp. Since there was no pinned thread to
 549          * return to, swtch() gets called and we end up here.
 550          *
 551          * It can also happen if an interrupt thread in intr_thread() calls
 552          * preempt. It will have already taken care of updating stats. In
 553          * this event, the interrupt thread will be runnable.
 554          */
 555         if (t->t_intr_start) {
 556                 do {
 557                         start = t->t_intr_start;
 558                         interval = CLOCK_TICK_COUNTER() - start;
 559                 } while (atomic_cas_64(&t->t_intr_start, start, 0) != start);
 560                 cpu = CPU;
 561                 if (cpu->cpu_m.divisor > 1)
 562                         interval *= cpu->cpu_m.divisor;
 563                 cpu->cpu_m.intrstat[t->t_pil][0] += interval;
 564 
 565                 atomic_add_64((uint64_t *)&cpu->cpu_intracct[cpu->cpu_mstate],
 566                     interval);
 567         } else
 568                 ASSERT(t->t_intr == NULL || t->t_state == TS_RUN);
 569 }
 570 
 571 
 572 /*
 573  * An interrupt thread is returning from swtch(). Place a starting timestamp
 574  * in its thread structure.
 575  */
 576 void
 577 cpu_intr_swtch_exit(kthread_id_t t)
 578 {
 579         uint64_t ts;
 580 
 581         ASSERT((t->t_flag & T_INTR_THREAD) != 0);
 582         ASSERT(t->t_pil > 0 && t->t_pil <= LOCK_LEVEL);
 583 
 584         do {
 585                 ts = t->t_intr_start;
 586         } while (atomic_cas_64(&t->t_intr_start, ts, CLOCK_TICK_COUNTER()) !=
 587             ts);
 588 }
 589 
 590 
 591 int
 592 blacklist(int cmd, const char *scheme, nvlist_t *fmri, const char *class)
 593 {
 594         if (&plat_blacklist)
 595                 return (plat_blacklist(cmd, scheme, fmri, class));
 596 
 597         return (ENOTSUP);
 598 }
 599 
 600 int
 601 kdi_pread(caddr_t buf, size_t nbytes, uint64_t addr, size_t *ncopiedp)
 602 {
 603         extern void kdi_flush_caches(void);
 604         size_t nread = 0;
 605         uint32_t word;
 606         int slop, i;
 607 
 608         kdi_flush_caches();
 609         membar_enter();
 610 
 611         /* We might not begin on a word boundary. */
 612         if ((slop = addr & 3) != 0) {
 613                 word = ldphys(addr & ~3);
 614                 for (i = slop; i < 4 && nbytes > 0; i++, nbytes--, nread++)
 615                         *buf++ = ((uchar_t *)&word)[i];
 616                 addr = roundup(addr, 4);
 617         }
 618 
 619         while (nbytes > 0) {
 620                 word = ldphys(addr);
 621                 for (i = 0; i < 4 && nbytes > 0; i++, nbytes--, nread++, addr++)
 622                         *buf++ = ((uchar_t *)&word)[i];
 623         }
 624 
 625         kdi_flush_caches();
 626 
 627         *ncopiedp = nread;
 628         return (0);
 629 }
 630 
 631 int
 632 kdi_pwrite(caddr_t buf, size_t nbytes, uint64_t addr, size_t *ncopiedp)
 633 {
 634         extern void kdi_flush_caches(void);
 635         size_t nwritten = 0;
 636         uint32_t word;
 637         int slop, i;
 638 
 639         kdi_flush_caches();
 640 
 641         /* We might not begin on a word boundary. */
 642         if ((slop = addr & 3) != 0) {
 643                 word = ldphys(addr & ~3);
 644                 for (i = slop; i < 4 && nbytes > 0; i++, nbytes--, nwritten++)
 645                         ((uchar_t *)&word)[i] = *buf++;
 646                 stphys(addr & ~3, word);
 647                 addr = roundup(addr, 4);
 648         }
 649 
 650         while (nbytes > 3) {
 651                 for (word = 0, i = 0; i < 4; i++, nbytes--, nwritten++)
 652                         ((uchar_t *)&word)[i] = *buf++;
 653                 stphys(addr, word);
 654                 addr += 4;
 655         }
 656 
 657         /* We might not end with a whole word. */
 658         if (nbytes > 0) {
 659                 word = ldphys(addr);
 660                 for (i = 0; nbytes > 0; i++, nbytes--, nwritten++)
 661                         ((uchar_t *)&word)[i] = *buf++;
 662                 stphys(addr, word);
 663         }
 664 
 665         membar_enter();
 666         kdi_flush_caches();
 667 
 668         *ncopiedp = nwritten;
 669         return (0);
 670 }
 671 
 672 static void
 673 kdi_kernpanic(struct regs *regs, uint_t tt)
 674 {
 675         sync_reg_buf = *regs;
 676         sync_tt = tt;
 677 
 678         sync_handler();
 679 }
 680 
 681 static void
 682 kdi_plat_call(void (*platfn)(void))
 683 {
 684         if (platfn != NULL) {
 685                 prom_suspend_prepost();
 686                 platfn();
 687                 prom_resume_prepost();
 688         }
 689 }
 690 
 691 /*
 692  * kdi_system_claim and release are defined here for all sun4 platforms and
 693  * pointed to by mach_kdi_init() to provide default callbacks for such systems.
 694  * Specific sun4u or sun4v platforms may implement their own claim and release
 695  * routines, at which point their respective callbacks will be updated.
 696  */
 697 static void
 698 kdi_system_claim(void)
 699 {
 700         lbolt_debug_entry();
 701 }
 702 
 703 static void
 704 kdi_system_release(void)
 705 {
 706         lbolt_debug_return();
 707 }
 708 
 709 void
 710 mach_kdi_init(kdi_t *kdi)
 711 {
 712         kdi->kdi_plat_call = kdi_plat_call;
 713         kdi->kdi_kmdb_enter = kmdb_enter;
 714         kdi->pkdi_system_claim = kdi_system_claim;
 715         kdi->pkdi_system_release = kdi_system_release;
 716         kdi->mkdi_cpu_index = kdi_cpu_index;
 717         kdi->mkdi_trap_vatotte = kdi_trap_vatotte;
 718         kdi->mkdi_kernpanic = kdi_kernpanic;
 719 }
 720 
 721 
 722 /*
 723  * get_cpu_mstate() is passed an array of timestamps, NCMSTATES
 724  * long, and it fills in the array with the time spent on cpu in
 725  * each of the mstates, where time is returned in nsec.
 726  *
 727  * No guarantee is made that the returned values in times[] will
 728  * monotonically increase on sequential calls, although this will
 729  * be true in the long run. Any such guarantee must be handled by
 730  * the caller, if needed. This can happen if we fail to account
 731  * for elapsed time due to a generation counter conflict, yet we
 732  * did account for it on a prior call (see below).
 733  *
 734  * The complication is that the cpu in question may be updating
 735  * its microstate at the same time that we are reading it.
 736  * Because the microstate is only updated when the CPU's state
 737  * changes, the values in cpu_intracct[] can be indefinitely out
 738  * of date. To determine true current values, it is necessary to
 739  * compare the current time with cpu_mstate_start, and add the
 740  * difference to times[cpu_mstate].
 741  *
 742  * This can be a problem if those values are changing out from
 743  * under us. Because the code path in new_cpu_mstate() is
 744  * performance critical, we have not added a lock to it. Instead,
 745  * we have added a generation counter. Before beginning
 746  * modifications, the counter is set to 0. After modifications,
 747  * it is set to the old value plus one.
 748  *
 749  * get_cpu_mstate() will not consider the values of cpu_mstate
 750  * and cpu_mstate_start to be usable unless the value of
 751  * cpu_mstate_gen is both non-zero and unchanged, both before and
 752  * after reading the mstate information. Note that we must
 753  * protect against out-of-order loads around accesses to the
 754  * generation counter. Also, this is a best effort approach in
 755  * that we do not retry should the counter be found to have
 756  * changed.
 757  *
 758  * cpu_intracct[] is used to identify time spent in each CPU
 759  * mstate while handling interrupts. Such time should be reported
 760  * against system time, and so is subtracted out from its
 761  * corresponding cpu_acct[] time and added to
 762  * cpu_acct[CMS_SYSTEM]. Additionally, intracct time is stored in
 763  * %ticks, but acct time may be stored as %sticks, thus requiring
 764  * different conversions before they can be compared.
 765  */
 766 
 767 void
 768 get_cpu_mstate(cpu_t *cpu, hrtime_t *times)
 769 {
 770         int i;
 771         hrtime_t now, start;
 772         uint16_t gen;
 773         uint16_t state;
 774         hrtime_t intracct[NCMSTATES];
 775 
 776         /*
 777          * Load all volatile state under the protection of membar.
 778          * cpu_acct[cpu_mstate] must be loaded to avoid double counting
 779          * of (now - cpu_mstate_start) by a change in CPU mstate that
 780          * arrives after we make our last check of cpu_mstate_gen.
 781          */
 782 
 783         now = gethrtime_unscaled();
 784         gen = cpu->cpu_mstate_gen;
 785 
 786         membar_consumer();      /* guarantee load ordering */
 787         start = cpu->cpu_mstate_start;
 788         state = cpu->cpu_mstate;
 789         for (i = 0; i < NCMSTATES; i++) {
 790                 intracct[i] = cpu->cpu_intracct[i];
 791                 times[i] = cpu->cpu_acct[i];
 792         }
 793         membar_consumer();      /* guarantee load ordering */
 794 
 795         if (gen != 0 && gen == cpu->cpu_mstate_gen && now > start)
 796                 times[state] += now - start;
 797 
 798         for (i = 0; i < NCMSTATES; i++) {
 799                 scalehrtime(&times[i]);
 800                 intracct[i] = tick2ns((hrtime_t)intracct[i], cpu->cpu_id);
 801         }
 802 
 803         for (i = 0; i < NCMSTATES; i++) {
 804                 if (i == CMS_SYSTEM)
 805                         continue;
 806                 times[i] -= intracct[i];
 807                 if (times[i] < 0) {
 808                         intracct[i] += times[i];
 809                         times[i] = 0;
 810                 }
 811                 times[CMS_SYSTEM] += intracct[i];
 812         }
 813 }
 814 
 815 void
 816 mach_cpu_pause(volatile char *safe)
 817 {
 818         /*
 819          * This cpu is now safe.
 820          */
 821         *safe = PAUSE_WAIT;
 822         membar_enter(); /* make sure stores are flushed */
 823 
 824         /*
 825          * Now we wait.  When we are allowed to continue, safe
 826          * will be set to PAUSE_IDLE.
 827          */
 828         while (*safe != PAUSE_IDLE)
 829                 SMT_PAUSE();
 830 }
 831 
 832 /*ARGSUSED*/
 833 int
 834 plat_mem_do_mmio(struct uio *uio, enum uio_rw rw)
 835 {
 836         return (ENOTSUP);
 837 }
 838 
 839 /* cpu threshold for compressed dumps */
 840 #ifdef sun4v
 841 uint_t dump_plat_mincpu_default = DUMP_PLAT_SUN4V_MINCPU;
 842 #else
 843 uint_t dump_plat_mincpu_default = DUMP_PLAT_SUN4U_MINCPU;
 844 #endif
 845 
 846 int
 847 dump_plat_addr()
 848 {
 849         return (0);
 850 }
 851 
 852 void
 853 dump_plat_pfn()
 854 {
 855 }
 856 
 857 /* ARGSUSED */
 858 int
 859 dump_plat_data(void *dump_cdata)
 860 {
 861         return (0);
 862 }
 863 
 864 /* ARGSUSED */
 865 int
 866 plat_hold_page(pfn_t pfn, int lock, page_t **pp_ret)
 867 {
 868         return (PLAT_HOLD_OK);
 869 }
 870 
 871 /* ARGSUSED */
 872 void
 873 plat_release_page(page_t *pp)
 874 {
 875 }
 876 
 877 /* ARGSUSED */
 878 void
 879 progressbar_key_abort(ldi_ident_t li)
 880 {
 881 }
 882 
 883 /*
 884  * We need to post a soft interrupt to reprogram the lbolt cyclic when
 885  * switching from event to cyclic driven lbolt. The following code adds
 886  * and posts the softint for sun4 platforms.
 887  */
 888 static uint64_t lbolt_softint_inum;
 889 
 890 void
 891 lbolt_softint_add(void)
 892 {
 893         lbolt_softint_inum = add_softintr(LOCK_LEVEL,
 894             (softintrfunc)lbolt_ev_to_cyclic, NULL, SOFTINT_MT);
 895 }
 896 
 897 void
 898 lbolt_softint_post(void)
 899 {
 900         setsoftint(lbolt_softint_inum);
 901 }