1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  24  * Use is subject to license terms.
  25  * Copyright 2016 Joyent, Inc.
  26  */
  27 
  28 #include <sys/types.h>
  29 #include <sys/clock.h>
  30 #include <sys/panic.h>
  31 #include <sys/atomic.h>
  32 #include <sys/hypervisor.h>
  33 
  34 #include <sys/archsystm.h>
  35 
  36 /*
  37  * On the hypervisor, we have a virtualized system time based upon the
  38  * information provided for each VCPU, which is updated every time it is
  39  * scheduled onto a real CPU.  Thus, none of the traditional code in
  40  * i86pc/os/timestamp.c applies, our gethrtime() implementation is run through
  41  * the PSM, and there is no scaling step to apply.
  42  *
  43  * However, the platform does not guarantee monotonicity; thus we have to fake
  44  * this up, which is a deeply unpleasant thing to have to do.
  45  *
  46  * Note that the virtualized interface still relies on the current TSC to
  47  * calculate the time in nanoseconds since the VCPU was scheduled, and is thus
  48  * subject to all the problems with that.  For the most part, the hypervisor is
  49  * supposed to deal with them.
  50  *
  51  * Another wrinkle involves suspend/resume/migration.  If we come back and time
  52  * is apparently less, we may have resumed on a different machine or on the
  53  * same machine after a reboot.  In this case we need to maintain an addend to
  54  * ensure time continues reasonably.  Otherwise we could end up taking a very
  55  * long time to expire cyclics in the heap.  Thus we have two functions:
  56  *
  57  * xpv_getsystime()
  58  *
  59  *      The unadulterated system time from the hypervisor.  This is only to be
  60  *      used when programming the hypervisor (setting a timer or calculating
  61  *      the TOD).
  62  *
  63  * xpv_gethrtime()
  64  *
  65  *      This is the monotonic hrtime counter to be used by everything else such
  66  *      as the cyclic subsystem.  We should never pass an hrtime directly into
  67  *      a hypervisor interface, as hrtime_addend may well be non-zero.
  68  */
  69 
  70 int hrtime_fake_mt = 1;
  71 static volatile hrtime_t hrtime_last;
  72 static hrtime_t hrtime_suspend_time;
  73 static hrtime_t hrtime_addend;
  74 
  75 volatile uint32_t hres_lock;
  76 hrtime_t hres_last_tick;
  77 int64_t hrestime_adj;
  78 volatile timestruc_t hrestime;
  79 
  80 /*
  81  * These functions are used in DTrace probe context, and must be removed from
  82  * fbt consideration.  Currently fbt ignores all weak symbols, so this will
  83  * achieve that.
  84  */
  85 #pragma weak xpv_gethrtime = dtrace_xpv_gethrtime
  86 #pragma weak xpv_getsystime = dtrace_xpv_getsystime
  87 #pragma weak dtrace_gethrtime = dtrace_xpv_gethrtime
  88 #pragma weak tsc_read = dtrace_xpv_gethrtime
  89 
  90 hrtime_t
  91 dtrace_xpv_getsystime(void)
  92 {
  93         vcpu_time_info_t *src;
  94         vcpu_time_info_t __vti, *dst = &__vti;
  95         uint64_t tsc_delta;
  96         uint64_t tsc;
  97         hrtime_t result;
  98         uint32_t stamp;
  99 
 100         src = &CPU->cpu_m.mcpu_vcpu_info->time;
 101 
 102         /*
 103          * Loop until version has not been changed during our update, and a Xen
 104          * update is not under way (lowest bit is set).
 105          */
 106         do {
 107                 dst->version = src->version;
 108                 stamp = CPU->cpu_m.mcpu_istamp;
 109 
 110                 membar_consumer();
 111 
 112                 dst->tsc_timestamp = src->tsc_timestamp;
 113                 dst->system_time = src->system_time;
 114                 dst->tsc_to_system_mul = src->tsc_to_system_mul;
 115                 dst->tsc_shift = src->tsc_shift;
 116 
 117                 /*
 118                  * Note that this use of the -actual- TSC register
 119                  * should probably be the SOLE one in the system on this
 120                  * paravirtualized platform.
 121                  */
 122                 tsc = __rdtsc_insn();
 123                 tsc_delta = tsc - dst->tsc_timestamp;
 124 
 125                 membar_consumer();
 126 
 127         } while (((src->version & 1) | (dst->version ^ src->version)) ||
 128             CPU->cpu_m.mcpu_istamp != stamp);
 129 
 130         if (dst->tsc_shift >= 0)
 131                 tsc_delta <<= dst->tsc_shift;
 132         else if (dst->tsc_shift < 0)
 133                 tsc_delta >>= -dst->tsc_shift;
 134 
 135         result = dst->system_time +
 136             ((uint64_t)(tsc_delta * (uint64_t)dst->tsc_to_system_mul) >> 32);
 137 
 138         return (result);
 139 }
 140 
 141 hrtime_t
 142 dtrace_xpv_gethrtime(void)
 143 {
 144         hrtime_t result = xpv_getsystime() + hrtime_addend;
 145 
 146         if (hrtime_fake_mt) {
 147                 hrtime_t last;
 148                 do {
 149                         last = hrtime_last;
 150                         if (result < last)
 151                                 result = last + 1;
 152                 } while (atomic_cas_64((volatile uint64_t *)&hrtime_last,
 153                     last, result) != last);
 154         }
 155 
 156         return (result);
 157 }
 158 
 159 void
 160 xpv_time_suspend(void)
 161 {
 162         hrtime_suspend_time = xpv_getsystime();
 163 }
 164 
 165 void
 166 xpv_time_resume(void)
 167 {
 168         hrtime_t delta = xpv_getsystime() - hrtime_suspend_time;
 169 
 170         if (delta < 0)
 171                 hrtime_addend += -delta;
 172 }