1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 1993, 2010, Oracle and/or its affiliates. All rights reserved.
  23  * Copyright 2018 Nexenta Systems, Inc.  All rights reserved.
  24  * Copyright (c) 2017, Joyent, Inc.  All rights reserved.
  25  */
  26 
  27 #include <sys/types.h>
  28 #include <sys/kstat.h>
  29 #include <sys/param.h>
  30 #include <sys/stack.h>
  31 #include <sys/regset.h>
  32 #include <sys/thread.h>
  33 #include <sys/proc.h>
  34 #include <sys/procfs_isa.h>
  35 #include <sys/kmem.h>
  36 #include <sys/cpuvar.h>
  37 #include <sys/systm.h>
  38 #include <sys/machpcb.h>
  39 #include <sys/machasi.h>
  40 #include <sys/vis.h>
  41 #include <sys/fpu/fpusystm.h>
  42 #include <sys/cpu_module.h>
  43 #include <sys/privregs.h>
  44 #include <sys/archsystm.h>
  45 #include <sys/atomic.h>
  46 #include <sys/cmn_err.h>
  47 #include <sys/time.h>
  48 #include <sys/clock.h>
  49 #include <sys/cmp.h>
  50 #include <sys/platform_module.h>
  51 #include <sys/bl.h>
  52 #include <sys/nvpair.h>
  53 #include <sys/kdi_impl.h>
  54 #include <sys/machsystm.h>
  55 #include <sys/sysmacros.h>
  56 #include <sys/promif.h>
  57 #include <sys/pool_pset.h>
  58 #include <sys/mem.h>
  59 #include <sys/dumphdr.h>
  60 #include <vm/seg_kmem.h>
  61 #include <sys/hold_page.h>
  62 #include <sys/cpu.h>
  63 #include <sys/ivintr.h>
  64 #include <sys/clock_impl.h>
  65 #include <sys/machclock.h>
  66 
  67 int maxphys = MMU_PAGESIZE * 16;        /* 128k */
  68 int klustsize = MMU_PAGESIZE * 16;      /* 128k */
  69 
  70 /*
  71  * Initialize kernel thread's stack.
  72  */
  73 caddr_t
  74 thread_stk_init(caddr_t stk)
  75 {
  76         kfpu_t *fp;
  77         ulong_t align;
  78 
  79         /* allocate extra space for floating point state */
  80         stk -= SA(sizeof (kfpu_t) + GSR_SIZE);
  81         align = (uintptr_t)stk & 0x3f;
  82         stk -= align;           /* force v9_fpu to be 16 byte aligned */
  83         fp = (kfpu_t *)stk;
  84         fp->fpu_fprs = 0;
  85 
  86         stk -= SA(MINFRAME);
  87         return (stk);
  88 }
  89 
  90 #define WIN32_SIZE      (MAXWIN * sizeof (struct rwindow32))
  91 #define WIN64_SIZE      (MAXWIN * sizeof (struct rwindow64))
  92 
  93 kmem_cache_t    *wbuf32_cache;
  94 kmem_cache_t    *wbuf64_cache;
  95 
  96 void
  97 lwp_stk_cache_init(void)
  98 {
  99         /*
 100          * Window buffers are allocated from the static arena
 101          * because they are accessed at TL>0. We also must use
 102          * KMC_NOHASH to prevent them from straddling page
 103          * boundaries as they are accessed by physical address.
 104          */
 105         wbuf32_cache = kmem_cache_create("wbuf32_cache", WIN32_SIZE,
 106             0, NULL, NULL, NULL, NULL, static_arena, KMC_NOHASH);
 107         wbuf64_cache = kmem_cache_create("wbuf64_cache", WIN64_SIZE,
 108             0, NULL, NULL, NULL, NULL, static_arena, KMC_NOHASH);
 109 }
 110 
 111 /*
 112  * Initialize lwp's kernel stack.
 113  * Note that now that the floating point register save area (kfpu_t)
 114  * has been broken out from machpcb and aligned on a 64 byte boundary so that
 115  * we can do block load/stores to/from it, there are a couple of potential
 116  * optimizations to save stack space. 1. The floating point register save
 117  * area could be aligned on a 16 byte boundary, and the floating point code
 118  * changed to (a) check the alignment and (b) use different save/restore
 119  * macros depending upon the alignment. 2. The lwp_stk_init code below
 120  * could be changed to calculate if less space would be wasted if machpcb
 121  * was first instead of second. However there is a REGOFF macro used in
 122  * locore, syscall_trap, machdep and mlsetup that assumes that the saved
 123  * register area is a fixed distance from the %sp, and would have to be
 124  * changed to a pointer or something...JJ said later.
 125  */
 126 caddr_t
 127 lwp_stk_init(klwp_t *lwp, caddr_t stk)
 128 {
 129         struct machpcb *mpcb;
 130         kfpu_t *fp;
 131         uintptr_t aln;
 132 
 133         stk -= SA(sizeof (kfpu_t) + GSR_SIZE);
 134         aln = (uintptr_t)stk & 0x3F;
 135         stk -= aln;
 136         fp = (kfpu_t *)stk;
 137         stk -= SA(sizeof (struct machpcb));
 138         mpcb = (struct machpcb *)stk;
 139         bzero(mpcb, sizeof (struct machpcb));
 140         bzero(fp, sizeof (kfpu_t) + GSR_SIZE);
 141         lwp->lwp_regs = (void *)&mpcb->mpcb_regs;
 142         lwp->lwp_fpu = (void *)fp;
 143         mpcb->mpcb_fpu = fp;
 144         mpcb->mpcb_fpu->fpu_q = mpcb->mpcb_fpu_q;
 145         mpcb->mpcb_thread = lwp->lwp_thread;
 146         mpcb->mpcb_wbcnt = 0;
 147         if (lwp->lwp_procp->p_model == DATAMODEL_ILP32) {
 148                 mpcb->mpcb_wstate = WSTATE_USER32;
 149                 mpcb->mpcb_wbuf = kmem_cache_alloc(wbuf32_cache, KM_SLEEP);
 150         } else {
 151                 mpcb->mpcb_wstate = WSTATE_USER64;
 152                 mpcb->mpcb_wbuf = kmem_cache_alloc(wbuf64_cache, KM_SLEEP);
 153         }
 154         ASSERT(((uintptr_t)mpcb->mpcb_wbuf & 7) == 0);
 155         mpcb->mpcb_wbuf_pa = va_to_pa(mpcb->mpcb_wbuf);
 156         mpcb->mpcb_pa = va_to_pa(mpcb);
 157         return (stk);
 158 }
 159 
 160 void
 161 lwp_stk_fini(klwp_t *lwp)
 162 {
 163         struct machpcb *mpcb = lwptompcb(lwp);
 164 
 165         /*
 166          * there might be windows still in the wbuf due to unmapped
 167          * stack, misaligned stack pointer, etc.  We just free it.
 168          */
 169         mpcb->mpcb_wbcnt = 0;
 170         if (mpcb->mpcb_wstate == WSTATE_USER32)
 171                 kmem_cache_free(wbuf32_cache, mpcb->mpcb_wbuf);
 172         else
 173                 kmem_cache_free(wbuf64_cache, mpcb->mpcb_wbuf);
 174         mpcb->mpcb_wbuf = NULL;
 175         mpcb->mpcb_wbuf_pa = -1;
 176 }
 177 
 178 /*ARGSUSED*/
 179 void
 180 lwp_fp_init(klwp_t *lwp)
 181 {
 182 }
 183 
 184 /*
 185  * Copy regs from parent to child.
 186  */
 187 void
 188 lwp_forkregs(klwp_t *lwp, klwp_t *clwp)
 189 {
 190         kthread_t *t, *pt = lwptot(lwp);
 191         struct machpcb *mpcb = lwptompcb(clwp);
 192         struct machpcb *pmpcb = lwptompcb(lwp);
 193         kfpu_t *fp, *pfp = lwptofpu(lwp);
 194         caddr_t wbuf;
 195         uint_t wstate;
 196 
 197         t = mpcb->mpcb_thread;
 198         /*
 199          * remember child's fp and wbuf since they will get erased during
 200          * the bcopy.
 201          */
 202         fp = mpcb->mpcb_fpu;
 203         wbuf = mpcb->mpcb_wbuf;
 204         wstate = mpcb->mpcb_wstate;
 205         /*
 206          * Don't copy mpcb_frame since we hand-crafted it
 207          * in thread_load().
 208          */
 209         bcopy(lwp->lwp_regs, clwp->lwp_regs, sizeof (struct machpcb) - REGOFF);
 210         mpcb->mpcb_thread = t;
 211         mpcb->mpcb_fpu = fp;
 212         fp->fpu_q = mpcb->mpcb_fpu_q;
 213 
 214         /*
 215          * It is theoretically possibly for the lwp's wstate to
 216          * be different from its value assigned in lwp_stk_init,
 217          * since lwp_stk_init assumed the data model of the process.
 218          * Here, we took on the data model of the cloned lwp.
 219          */
 220         if (mpcb->mpcb_wstate != wstate) {
 221                 if (wstate == WSTATE_USER32) {
 222                         kmem_cache_free(wbuf32_cache, wbuf);
 223                         wbuf = kmem_cache_alloc(wbuf64_cache, KM_SLEEP);
 224                         wstate = WSTATE_USER64;
 225                 } else {
 226                         kmem_cache_free(wbuf64_cache, wbuf);
 227                         wbuf = kmem_cache_alloc(wbuf32_cache, KM_SLEEP);
 228                         wstate = WSTATE_USER32;
 229                 }
 230         }
 231 
 232         mpcb->mpcb_pa = va_to_pa(mpcb);
 233         mpcb->mpcb_wbuf = wbuf;
 234         mpcb->mpcb_wbuf_pa = va_to_pa(wbuf);
 235 
 236         ASSERT(mpcb->mpcb_wstate == wstate);
 237 
 238         if (mpcb->mpcb_wbcnt != 0) {
 239                 bcopy(pmpcb->mpcb_wbuf, mpcb->mpcb_wbuf,
 240                     mpcb->mpcb_wbcnt * ((mpcb->mpcb_wstate == WSTATE_USER32) ?
 241                     sizeof (struct rwindow32) : sizeof (struct rwindow64)));
 242         }
 243 
 244         if (pt == curthread)
 245                 pfp->fpu_fprs = _fp_read_fprs();
 246         if ((pfp->fpu_en) || (pfp->fpu_fprs & FPRS_FEF)) {
 247                 if (pt == curthread && fpu_exists) {
 248                         save_gsr(clwp->lwp_fpu);
 249                 } else {
 250                         uint64_t gsr;
 251                         gsr = get_gsr(lwp->lwp_fpu);
 252                         set_gsr(gsr, clwp->lwp_fpu);
 253                 }
 254                 fp_fork(lwp, clwp);
 255         }
 256 }
 257 
 258 /*
 259  * Free lwp fpu regs.
 260  */
 261 void
 262 lwp_freeregs(klwp_t *lwp, int isexec)
 263 {
 264         kfpu_t *fp = lwptofpu(lwp);
 265 
 266         if (lwptot(lwp) == curthread)
 267                 fp->fpu_fprs = _fp_read_fprs();
 268         if ((fp->fpu_en) || (fp->fpu_fprs & FPRS_FEF))
 269                 fp_free(fp, isexec);
 270 }
 271 
 272 /*
 273  * These function are currently unused on sparc.
 274  */
 275 /*ARGSUSED*/
 276 void
 277 lwp_attach_brand_hdlrs(klwp_t *lwp)
 278 {}
 279 
 280 /*ARGSUSED*/
 281 void
 282 lwp_detach_brand_hdlrs(klwp_t *lwp)
 283 {}
 284 
 285 /*
 286  * fill in the extra register state area specified with the
 287  * specified lwp's platform-dependent non-floating-point extra
 288  * register state information
 289  */
 290 /* ARGSUSED */
 291 void
 292 xregs_getgfiller(klwp_id_t lwp, caddr_t xrp)
 293 {
 294         /* for sun4u nothing to do here, added for symmetry */
 295 }
 296 
 297 /*
 298  * fill in the extra register state area specified with the specified lwp's
 299  * platform-dependent floating-point extra register state information.
 300  * NOTE:  'lwp' might not correspond to 'curthread' since this is
 301  * called from code in /proc to get the registers of another lwp.
 302  */
 303 void
 304 xregs_getfpfiller(klwp_id_t lwp, caddr_t xrp)
 305 {
 306         prxregset_t *xregs = (prxregset_t *)xrp;
 307         kfpu_t *fp = lwptofpu(lwp);
 308         uint32_t fprs = (FPRS_FEF|FPRS_DU|FPRS_DL);
 309         uint64_t gsr;
 310 
 311         /*
 312          * fp_fksave() does not flush the GSR register into
 313          * the lwp area, so do it now
 314          */
 315         kpreempt_disable();
 316         if (ttolwp(curthread) == lwp && fpu_exists) {
 317                 fp->fpu_fprs = _fp_read_fprs();
 318                 if ((fp->fpu_fprs & FPRS_FEF) != FPRS_FEF) {
 319                         _fp_write_fprs(fprs);
 320                         fp->fpu_fprs = (V9_FPU_FPRS_TYPE)fprs;
 321                 }
 322                 save_gsr(fp);
 323         }
 324         gsr = get_gsr(fp);
 325         kpreempt_enable();
 326         PRXREG_GSR(xregs) = gsr;
 327 }
 328 
 329 /*
 330  * set the specified lwp's platform-dependent non-floating-point
 331  * extra register state based on the specified input
 332  */
 333 /* ARGSUSED */
 334 void
 335 xregs_setgfiller(klwp_id_t lwp, caddr_t xrp)
 336 {
 337         /* for sun4u nothing to do here, added for symmetry */
 338 }
 339 
 340 /*
 341  * set the specified lwp's platform-dependent floating-point
 342  * extra register state based on the specified input
 343  */
 344 void
 345 xregs_setfpfiller(klwp_id_t lwp, caddr_t xrp)
 346 {
 347         prxregset_t *xregs = (prxregset_t *)xrp;
 348         kfpu_t *fp = lwptofpu(lwp);
 349         uint32_t fprs = (FPRS_FEF|FPRS_DU|FPRS_DL);
 350         uint64_t gsr = PRXREG_GSR(xregs);
 351 
 352         kpreempt_disable();
 353         set_gsr(gsr, lwptofpu(lwp));
 354 
 355         if ((lwp == ttolwp(curthread)) && fpu_exists) {
 356                 fp->fpu_fprs = _fp_read_fprs();
 357                 if ((fp->fpu_fprs & FPRS_FEF) != FPRS_FEF) {
 358                         _fp_write_fprs(fprs);
 359                         fp->fpu_fprs = (V9_FPU_FPRS_TYPE)fprs;
 360                 }
 361                 restore_gsr(lwptofpu(lwp));
 362         }
 363         kpreempt_enable();
 364 }
 365 
 366 /*
 367  * fill in the sun4u asrs, ie, the lwp's platform-dependent
 368  * non-floating-point extra register state information
 369  */
 370 /* ARGSUSED */
 371 void
 372 getasrs(klwp_t *lwp, asrset_t asr)
 373 {
 374         /* for sun4u nothing to do here, added for symmetry */
 375 }
 376 
 377 /*
 378  * fill in the sun4u asrs, ie, the lwp's platform-dependent
 379  * floating-point extra register state information
 380  */
 381 void
 382 getfpasrs(klwp_t *lwp, asrset_t asr)
 383 {
 384         kfpu_t *fp = lwptofpu(lwp);
 385         uint32_t fprs = (FPRS_FEF|FPRS_DU|FPRS_DL);
 386 
 387         kpreempt_disable();
 388         if (ttolwp(curthread) == lwp)
 389                 fp->fpu_fprs = _fp_read_fprs();
 390         if ((fp->fpu_en) || (fp->fpu_fprs & FPRS_FEF)) {
 391                 if (fpu_exists && ttolwp(curthread) == lwp) {
 392                         if ((fp->fpu_fprs & FPRS_FEF) != FPRS_FEF) {
 393                                 _fp_write_fprs(fprs);
 394                                 fp->fpu_fprs = (V9_FPU_FPRS_TYPE)fprs;
 395                         }
 396                         save_gsr(fp);
 397                 }
 398                 asr[ASR_GSR] = (int64_t)get_gsr(fp);
 399         }
 400         kpreempt_enable();
 401 }
 402 
 403 /*
 404  * set the sun4u asrs, ie, the lwp's platform-dependent
 405  * non-floating-point extra register state information
 406  */
 407 /* ARGSUSED */
 408 void
 409 setasrs(klwp_t *lwp, asrset_t asr)
 410 {
 411         /* for sun4u nothing to do here, added for symmetry */
 412 }
 413 
 414 void
 415 setfpasrs(klwp_t *lwp, asrset_t asr)
 416 {
 417         kfpu_t *fp = lwptofpu(lwp);
 418         uint32_t fprs = (FPRS_FEF|FPRS_DU|FPRS_DL);
 419 
 420         kpreempt_disable();
 421         if (ttolwp(curthread) == lwp)
 422                 fp->fpu_fprs = _fp_read_fprs();
 423         if ((fp->fpu_en) || (fp->fpu_fprs & FPRS_FEF)) {
 424                 set_gsr(asr[ASR_GSR], fp);
 425                 if (fpu_exists && ttolwp(curthread) == lwp) {
 426                         if ((fp->fpu_fprs & FPRS_FEF) != FPRS_FEF) {
 427                                 _fp_write_fprs(fprs);
 428                                 fp->fpu_fprs = (V9_FPU_FPRS_TYPE)fprs;
 429                         }
 430                         restore_gsr(fp);
 431                 }
 432         }
 433         kpreempt_enable();
 434 }
 435 
 436 /*
 437  * Create interrupt kstats for this CPU.
 438  */
 439 void
 440 cpu_create_intrstat(cpu_t *cp)
 441 {
 442         int             i;
 443         kstat_t         *intr_ksp;
 444         kstat_named_t   *knp;
 445         char            name[KSTAT_STRLEN];
 446         zoneid_t        zoneid;
 447 
 448         ASSERT(MUTEX_HELD(&cpu_lock));
 449 
 450         if (pool_pset_enabled())
 451                 zoneid = GLOBAL_ZONEID;
 452         else
 453                 zoneid = ALL_ZONES;
 454 
 455         intr_ksp = kstat_create_zone("cpu", cp->cpu_id, "intrstat", "misc",
 456             KSTAT_TYPE_NAMED, PIL_MAX * 2, NULL, zoneid);
 457 
 458         /*
 459          * Initialize each PIL's named kstat
 460          */
 461         if (intr_ksp != NULL) {
 462                 intr_ksp->ks_update = cpu_kstat_intrstat_update;
 463                 knp = (kstat_named_t *)intr_ksp->ks_data;
 464                 intr_ksp->ks_private = cp;
 465                 for (i = 0; i < PIL_MAX; i++) {
 466                         (void) snprintf(name, KSTAT_STRLEN, "level-%d-time",
 467                             i + 1);
 468                         kstat_named_init(&knp[i * 2], name, KSTAT_DATA_UINT64);
 469                         (void) snprintf(name, KSTAT_STRLEN, "level-%d-count",
 470                             i + 1);
 471                         kstat_named_init(&knp[(i * 2) + 1], name,
 472                             KSTAT_DATA_UINT64);
 473                 }
 474                 kstat_install(intr_ksp);
 475         }
 476 }
 477 
 478 /*
 479  * Delete interrupt kstats for this CPU.
 480  */
 481 void
 482 cpu_delete_intrstat(cpu_t *cp)
 483 {
 484         kstat_delete_byname_zone("cpu", cp->cpu_id, "intrstat", ALL_ZONES);
 485 }
 486 
 487 /*
 488  * Convert interrupt statistics from CPU ticks to nanoseconds and
 489  * update kstat.
 490  */
 491 int
 492 cpu_kstat_intrstat_update(kstat_t *ksp, int rw)
 493 {
 494         kstat_named_t   *knp = ksp->ks_data;
 495         cpu_t           *cpup = (cpu_t *)ksp->ks_private;
 496         int             i;
 497 
 498         if (rw == KSTAT_WRITE)
 499                 return (EACCES);
 500 
 501         /*
 502          * We use separate passes to copy and convert the statistics to
 503          * nanoseconds. This assures that the snapshot of the data is as
 504          * self-consistent as possible.
 505          */
 506 
 507         for (i = 0; i < PIL_MAX; i++) {
 508                 knp[i * 2].value.ui64 = cpup->cpu_m.intrstat[i + 1][0];
 509                 knp[(i * 2) + 1].value.ui64 = cpup->cpu_stats.sys.intr[i];
 510         }
 511 
 512         for (i = 0; i < PIL_MAX; i++) {
 513                 knp[i * 2].value.ui64 =
 514                     (uint64_t)tick2ns((hrtime_t)knp[i * 2].value.ui64,
 515                     cpup->cpu_id);
 516         }
 517 
 518         return (0);
 519 }
 520 
 521 /*
 522  * Called by common/os/cpu.c for psrinfo(1m) kstats
 523  */
 524 char *
 525 cpu_fru_fmri(cpu_t *cp)
 526 {
 527         return (cpunodes[cp->cpu_id].fru_fmri);
 528 }
 529 
 530 /*
 531  * An interrupt thread is ending a time slice, so compute the interval it
 532  * ran for and update the statistic for its PIL.
 533  */
 534 void
 535 cpu_intr_swtch_enter(kthread_id_t t)
 536 {
 537         uint64_t        interval;
 538         uint64_t        start;
 539         cpu_t           *cpu;
 540 
 541         ASSERT((t->t_flag & T_INTR_THREAD) != 0);
 542         ASSERT(t->t_pil > 0 && t->t_pil <= LOCK_LEVEL);
 543 
 544         /*
 545          * We could be here with a zero timestamp. This could happen if:
 546          * an interrupt thread which no longer has a pinned thread underneath
 547          * it (i.e. it blocked at some point in its past) has finished running
 548          * its handler. intr_thread() updated the interrupt statistic for its
 549          * PIL and zeroed its timestamp. Since there was no pinned thread to
 550          * return to, swtch() gets called and we end up here.
 551          *
 552          * It can also happen if an interrupt thread in intr_thread() calls
 553          * preempt. It will have already taken care of updating stats. In
 554          * this event, the interrupt thread will be runnable.
 555          */
 556         if (t->t_intr_start) {
 557                 do {
 558                         start = t->t_intr_start;
 559                         interval = CLOCK_TICK_COUNTER() - start;
 560                 } while (atomic_cas_64(&t->t_intr_start, start, 0) != start);
 561                 cpu = CPU;
 562                 if (cpu->cpu_m.divisor > 1)
 563                         interval *= cpu->cpu_m.divisor;
 564                 cpu->cpu_m.intrstat[t->t_pil][0] += interval;
 565 
 566                 atomic_add_64((uint64_t *)&cpu->cpu_intracct[cpu->cpu_mstate],
 567                     interval);
 568         } else
 569                 ASSERT(t->t_intr == NULL || t->t_state == TS_RUN);
 570 }
 571 
 572 
 573 /*
 574  * An interrupt thread is returning from swtch(). Place a starting timestamp
 575  * in its thread structure.
 576  */
 577 void
 578 cpu_intr_swtch_exit(kthread_id_t t)
 579 {
 580         uint64_t ts;
 581 
 582         ASSERT((t->t_flag & T_INTR_THREAD) != 0);
 583         ASSERT(t->t_pil > 0 && t->t_pil <= LOCK_LEVEL);
 584 
 585         do {
 586                 ts = t->t_intr_start;
 587         } while (atomic_cas_64(&t->t_intr_start, ts, CLOCK_TICK_COUNTER()) !=
 588             ts);
 589 }
 590 
 591 
 592 int
 593 blacklist(int cmd, const char *scheme, nvlist_t *fmri, const char *class)
 594 {
 595         if (&plat_blacklist)
 596                 return (plat_blacklist(cmd, scheme, fmri, class));
 597 
 598         return (ENOTSUP);
 599 }
 600 
 601 int
 602 kdi_pread(caddr_t buf, size_t nbytes, uint64_t addr, size_t *ncopiedp)
 603 {
 604         extern void kdi_flush_caches(void);
 605         size_t nread = 0;
 606         uint32_t word;
 607         int slop, i;
 608 
 609         kdi_flush_caches();
 610         membar_enter();
 611 
 612         /* We might not begin on a word boundary. */
 613         if ((slop = addr & 3) != 0) {
 614                 word = ldphys(addr & ~3);
 615                 for (i = slop; i < 4 && nbytes > 0; i++, nbytes--, nread++)
 616                         *buf++ = ((uchar_t *)&word)[i];
 617                 addr = roundup(addr, 4);
 618         }
 619 
 620         while (nbytes > 0) {
 621                 word = ldphys(addr);
 622                 for (i = 0; i < 4 && nbytes > 0; i++, nbytes--, nread++, addr++)
 623                         *buf++ = ((uchar_t *)&word)[i];
 624         }
 625 
 626         kdi_flush_caches();
 627 
 628         *ncopiedp = nread;
 629         return (0);
 630 }
 631 
 632 int
 633 kdi_pwrite(caddr_t buf, size_t nbytes, uint64_t addr, size_t *ncopiedp)
 634 {
 635         extern void kdi_flush_caches(void);
 636         size_t nwritten = 0;
 637         uint32_t word;
 638         int slop, i;
 639 
 640         kdi_flush_caches();
 641 
 642         /* We might not begin on a word boundary. */
 643         if ((slop = addr & 3) != 0) {
 644                 word = ldphys(addr & ~3);
 645                 for (i = slop; i < 4 && nbytes > 0; i++, nbytes--, nwritten++)
 646                         ((uchar_t *)&word)[i] = *buf++;
 647                 stphys(addr & ~3, word);
 648                 addr = roundup(addr, 4);
 649         }
 650 
 651         while (nbytes > 3) {
 652                 for (word = 0, i = 0; i < 4; i++, nbytes--, nwritten++)
 653                         ((uchar_t *)&word)[i] = *buf++;
 654                 stphys(addr, word);
 655                 addr += 4;
 656         }
 657 
 658         /* We might not end with a whole word. */
 659         if (nbytes > 0) {
 660                 word = ldphys(addr);
 661                 for (i = 0; nbytes > 0; i++, nbytes--, nwritten++)
 662                         ((uchar_t *)&word)[i] = *buf++;
 663                 stphys(addr, word);
 664         }
 665 
 666         membar_enter();
 667         kdi_flush_caches();
 668 
 669         *ncopiedp = nwritten;
 670         return (0);
 671 }
 672 
 673 static void
 674 kdi_kernpanic(struct regs *regs, uint_t tt)
 675 {
 676         sync_reg_buf = *regs;
 677         sync_tt = tt;
 678 
 679         sync_handler();
 680 }
 681 
 682 static void
 683 kdi_plat_call(void (*platfn)(void))
 684 {
 685         if (platfn != NULL) {
 686                 prom_suspend_prepost();
 687                 platfn();
 688                 prom_resume_prepost();
 689         }
 690 }
 691 
 692 /*
 693  * kdi_system_claim and release are defined here for all sun4 platforms and
 694  * pointed to by mach_kdi_init() to provide default callbacks for such systems.
 695  * Specific sun4u or sun4v platforms may implement their own claim and release
 696  * routines, at which point their respective callbacks will be updated.
 697  */
 698 static void
 699 kdi_system_claim(void)
 700 {
 701         lbolt_debug_entry();
 702 }
 703 
 704 static void
 705 kdi_system_release(void)
 706 {
 707         lbolt_debug_return();
 708 }
 709 
 710 void
 711 mach_kdi_init(kdi_t *kdi)
 712 {
 713         kdi->kdi_plat_call = kdi_plat_call;
 714         kdi->kdi_kmdb_enter = kmdb_enter;
 715         kdi->pkdi_system_claim = kdi_system_claim;
 716         kdi->pkdi_system_release = kdi_system_release;
 717         kdi->mkdi_cpu_index = kdi_cpu_index;
 718         kdi->mkdi_trap_vatotte = kdi_trap_vatotte;
 719         kdi->mkdi_kernpanic = kdi_kernpanic;
 720 }
 721 
 722 
 723 /*
 724  * get_cpu_mstate() is passed an array of timestamps, NCMSTATES
 725  * long, and it fills in the array with the time spent on cpu in
 726  * each of the mstates, where time is returned in nsec.
 727  *
 728  * No guarantee is made that the returned values in times[] will
 729  * monotonically increase on sequential calls, although this will
 730  * be true in the long run. Any such guarantee must be handled by
 731  * the caller, if needed. This can happen if we fail to account
 732  * for elapsed time due to a generation counter conflict, yet we
 733  * did account for it on a prior call (see below).
 734  *
 735  * The complication is that the cpu in question may be updating
 736  * its microstate at the same time that we are reading it.
 737  * Because the microstate is only updated when the CPU's state
 738  * changes, the values in cpu_intracct[] can be indefinitely out
 739  * of date. To determine true current values, it is necessary to
 740  * compare the current time with cpu_mstate_start, and add the
 741  * difference to times[cpu_mstate].
 742  *
 743  * This can be a problem if those values are changing out from
 744  * under us. Because the code path in new_cpu_mstate() is
 745  * performance critical, we have not added a lock to it. Instead,
 746  * we have added a generation counter. Before beginning
 747  * modifications, the counter is set to 0. After modifications,
 748  * it is set to the old value plus one.
 749  *
 750  * get_cpu_mstate() will not consider the values of cpu_mstate
 751  * and cpu_mstate_start to be usable unless the value of
 752  * cpu_mstate_gen is both non-zero and unchanged, both before and
 753  * after reading the mstate information. Note that we must
 754  * protect against out-of-order loads around accesses to the
 755  * generation counter. Also, this is a best effort approach in
 756  * that we do not retry should the counter be found to have
 757  * changed.
 758  *
 759  * cpu_intracct[] is used to identify time spent in each CPU
 760  * mstate while handling interrupts. Such time should be reported
 761  * against system time, and so is subtracted out from its
 762  * corresponding cpu_acct[] time and added to
 763  * cpu_acct[CMS_SYSTEM]. Additionally, intracct time is stored in
 764  * %ticks, but acct time may be stored as %sticks, thus requiring
 765  * different conversions before they can be compared.
 766  */
 767 
 768 void
 769 get_cpu_mstate(cpu_t *cpu, hrtime_t *times)
 770 {
 771         int i;
 772         hrtime_t now, start;
 773         uint16_t gen;
 774         uint16_t state;
 775         hrtime_t intracct[NCMSTATES];
 776 
 777         /*
 778          * Load all volatile state under the protection of membar.
 779          * cpu_acct[cpu_mstate] must be loaded to avoid double counting
 780          * of (now - cpu_mstate_start) by a change in CPU mstate that
 781          * arrives after we make our last check of cpu_mstate_gen.
 782          */
 783 
 784         now = gethrtime_unscaled();
 785         gen = cpu->cpu_mstate_gen;
 786 
 787         membar_consumer();      /* guarantee load ordering */
 788         start = cpu->cpu_mstate_start;
 789         state = cpu->cpu_mstate;
 790         for (i = 0; i < NCMSTATES; i++) {
 791                 intracct[i] = cpu->cpu_intracct[i];
 792                 times[i] = cpu->cpu_acct[i];
 793         }
 794         membar_consumer();      /* guarantee load ordering */
 795 
 796         if (gen != 0 && gen == cpu->cpu_mstate_gen && now > start)
 797                 times[state] += now - start;
 798 
 799         for (i = 0; i < NCMSTATES; i++) {
 800                 scalehrtime(&times[i]);
 801                 intracct[i] = tick2ns((hrtime_t)intracct[i], cpu->cpu_id);
 802         }
 803 
 804         for (i = 0; i < NCMSTATES; i++) {
 805                 if (i == CMS_SYSTEM)
 806                         continue;
 807                 times[i] -= intracct[i];
 808                 if (times[i] < 0) {
 809                         intracct[i] += times[i];
 810                         times[i] = 0;
 811                 }
 812                 times[CMS_SYSTEM] += intracct[i];
 813         }
 814 }
 815 
 816 void
 817 mach_cpu_pause(volatile char *safe)
 818 {
 819         /*
 820          * This cpu is now safe.
 821          */
 822         *safe = PAUSE_WAIT;
 823         membar_enter(); /* make sure stores are flushed */
 824 
 825         /*
 826          * Now we wait.  When we are allowed to continue, safe
 827          * will be set to PAUSE_IDLE.
 828          */
 829         while (*safe != PAUSE_IDLE)
 830                 SMT_PAUSE();
 831 }
 832 
 833 /*ARGSUSED*/
 834 int
 835 plat_mem_do_mmio(struct uio *uio, enum uio_rw rw)
 836 {
 837         return (ENOTSUP);
 838 }
 839 
 840 int
 841 dump_plat_addr()
 842 {
 843         return (0);
 844 }
 845 
 846 void
 847 dump_plat_pfn()
 848 {
 849 }
 850 
 851 /* ARGSUSED */
 852 int
 853 dump_plat_data(void *dump_cdata)
 854 {
 855         return (0);
 856 }
 857 
 858 /* ARGSUSED */
 859 int
 860 plat_hold_page(pfn_t pfn, int lock, page_t **pp_ret)
 861 {
 862         return (PLAT_HOLD_OK);
 863 }
 864 
 865 /* ARGSUSED */
 866 void
 867 plat_release_page(page_t *pp)
 868 {
 869 }
 870 
 871 /* ARGSUSED */
 872 void
 873 progressbar_key_abort(ldi_ident_t li)
 874 {
 875 }
 876 
 877 /*
 878  * We need to post a soft interrupt to reprogram the lbolt cyclic when
 879  * switching from event to cyclic driven lbolt. The following code adds
 880  * and posts the softint for sun4 platforms.
 881  */
 882 static uint64_t lbolt_softint_inum;
 883 
 884 void
 885 lbolt_softint_add(void)
 886 {
 887         lbolt_softint_inum = add_softintr(LOCK_LEVEL,
 888             (softintrfunc)lbolt_ev_to_cyclic, NULL, SOFTINT_MT);
 889 }
 890 
 891 void
 892 lbolt_softint_post(void)
 893 {
 894         setsoftint(lbolt_softint_inum);
 895 }