1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  24  * Use is subject to license terms.
  25  */
  26 
  27 #ifndef __xpv
  28 #error "This file is for i86xpv only"
  29 #endif
  30 
  31 #include <sys/types.h>
  32 #include <sys/mca_x86.h>
  33 #include <sys/archsystm.h>
  34 #include <sys/hypervisor.h>
  35 
  36 #include "../../i86pc/cpu/generic_cpu/gcpu.h"
  37 
  38 extern xpv_mca_panic_data_t *xpv_mca_panic_data;
  39 
  40 mc_info_t gcpu_mce_data;
  41 
  42 enum mctelem_direction {
  43         MCTELEM_FORWARD,
  44         MCTELEM_REVERSE
  45 };
  46 
  47 static uint32_t gcpu_xpv_hdl_lookupfails;
  48 static uint32_t gcpu_xpv_bankhdr_found;
  49 static uint32_t gcpu_xpv_spechdr_found;
  50 
  51 static uint32_t gcpu_xpv_mca_hcall_fails[16];
  52 static uint32_t gcpu_xpv_globalhdr_found;
  53 
  54 static cmi_mca_regs_t *gcpu_xpv_bankregs;
  55 size_t gcpu_xpv_bankregs_sz;
  56 
  57 #define GCPU_XPV_ARCH_NREGS     3
  58 
  59 void
  60 gcpu_xpv_mca_init(int nbanks)
  61 {
  62         if (gcpu_xpv_bankregs == NULL) {
  63                 gcpu_xpv_bankregs_sz = nbanks * GCPU_XPV_ARCH_NREGS *
  64                     sizeof (cmi_mca_regs_t);
  65 
  66                 gcpu_xpv_bankregs = kmem_zalloc(gcpu_xpv_bankregs_sz, KM_SLEEP);
  67         }
  68 }
  69 
  70 static void
  71 gcpu_xpv_proxy_logout(int what, struct mc_info *mi, struct mcinfo_common **micp,
  72     int *idxp, cmi_mca_regs_t *bankregs, size_t bankregs_sz)
  73 {
  74         struct mcinfo_global *mgi = (struct mcinfo_global *)(uintptr_t)*micp;
  75         struct mcinfo_common *mic;
  76         struct mcinfo_bank *mib;
  77         cmi_hdl_t hdl = NULL;
  78         cmi_mca_regs_t *mcrp;
  79         int idx = *idxp;
  80         int tried = 0;
  81         int j;
  82 
  83         /* Skip over the MC_TYPE_GLOBAL record */
  84         ASSERT(mgi->common.type == MC_TYPE_GLOBAL);
  85         mic = x86_mcinfo_next((struct mcinfo_common *)(uintptr_t)mgi);
  86         idx++;
  87 
  88         /*
  89          * Process all MC_TYPE_BANK and MC_TYPE_EXTENDED records that
  90          * follow the MC_TYPE_GLOBAL record, ending when we reach any
  91          * other record type or when we're out of record.
  92          *
  93          * We skip over MC_TYPE_EXTENDED for now - nothing consumes
  94          * the extended MSR data even in native Solaris.
  95          */
  96         while (idx < x86_mcinfo_nentries(mi) &&
  97             (mic->type == MC_TYPE_BANK || mic->type == MC_TYPE_EXTENDED)) {
  98                 if (mic->type == MC_TYPE_EXTENDED) {
  99                         gcpu_xpv_spechdr_found++;
 100                         goto next_record;
 101                 } else {
 102                         gcpu_xpv_bankhdr_found++;
 103                 }
 104 
 105                 if (hdl == NULL && !tried++) {
 106                         if ((hdl = cmi_hdl_lookup(CMI_HDL_SOLARIS_xVM_MCA,
 107                             mgi->mc_socketid, mgi->mc_coreid,
 108                             mgi->mc_core_threadid)) == NULL) {
 109                                 gcpu_xpv_hdl_lookupfails++;
 110                                 goto next_record;
 111                         } else {
 112                                 bzero(bankregs, bankregs_sz);
 113                                 mcrp = bankregs;
 114                         }
 115                 }
 116 
 117                 mib = (struct mcinfo_bank *)(uintptr_t)mic;
 118 
 119                 mcrp->cmr_msrnum = IA32_MSR_MC(mib->mc_bank, STATUS);
 120                 mcrp->cmr_msrval = mib->mc_status;
 121                 mcrp++;
 122 
 123                 mcrp->cmr_msrnum = IA32_MSR_MC(mib->mc_bank, ADDR);
 124                 mcrp->cmr_msrval = mib->mc_addr;
 125                 mcrp++;
 126 
 127                 mcrp->cmr_msrnum = IA32_MSR_MC(mib->mc_bank, MISC);
 128                 mcrp->cmr_msrval = mib->mc_misc;
 129                 mcrp++;
 130 
 131 next_record:
 132                 idx++;
 133                 mic = x86_mcinfo_next(mic);
 134         }
 135 
 136         /*
 137          * If we found some telemetry and a handle to associate it with
 138          * then "forward" that telemetry into the MSR interpose layer
 139          * and then request logout which will find that interposed
 140          * telemetry.  Indicate that logout code should clear bank
 141          * status registers so that it can invalidate them in the interpose
 142          * layer - they won't actually make it as far as real MSR writes.
 143          */
 144         if (hdl != NULL) {
 145                 cmi_mca_regs_t gsr;
 146                 gcpu_mce_status_t mce;
 147 
 148                 gsr.cmr_msrnum = IA32_MSR_MCG_STATUS;
 149                 gsr.cmr_msrval = mgi->mc_gstatus;
 150                 cmi_hdl_msrforward(hdl, &gsr, 1);
 151 
 152                 cmi_hdl_msrforward(hdl, bankregs, mcrp - bankregs);
 153                 gcpu_mca_logout(hdl, NULL, (uint64_t)-1, &mce, B_TRUE, what);
 154                 cmi_hdl_rele(hdl);
 155         }
 156 
 157         /*
 158          * We must move the index on at least one record or our caller
 159          * may loop forever;  our initial increment over the global
 160          * record assures this.
 161          */
 162         ASSERT(idx > *idxp);
 163         *idxp = idx;
 164         *micp = mic;
 165 }
 166 
 167 /*
 168  * Process a struct mc_info.
 169  *
 170  * There are x86_mcinfo_nentries(mi) entries.  An entry of type
 171  * MC_TYPE_GLOBAL precedes a number (potentially zero) of
 172  * entries of type MC_TYPE_BANK for telemetry from MCA banks
 173  * of the resource identified in the MC_TYPE_GLOBAL entry.
 174  * I think there can be multiple MC_TYPE_GLOBAL entries per buffer.
 175  */
 176 void
 177 gcpu_xpv_mci_process(mc_info_t *mi, int type,
 178     cmi_mca_regs_t *bankregs, size_t bankregs_sz)
 179 {
 180         struct mcinfo_common *mic;
 181         int idx;
 182 
 183         mic = x86_mcinfo_first(mi);
 184 
 185         idx = 0;
 186         while (idx < x86_mcinfo_nentries(mi)) {
 187                 if (mic->type == MC_TYPE_GLOBAL) {
 188                         gcpu_xpv_globalhdr_found++;
 189                         gcpu_xpv_proxy_logout(type == XEN_MC_URGENT ?
 190                             GCPU_MPT_WHAT_MC_ERR : GCPU_MPT_WHAT_XPV_VIRQ,
 191                             mi, &mic, &idx, bankregs, bankregs_sz);
 192                 } else {
 193                         idx++;
 194                         mic = x86_mcinfo_next(mic);
 195                 }
 196         }
 197 }
 198 
 199 int
 200 gcpu_xpv_telem_read(mc_info_t *mci, int type, uint64_t *idp)
 201 {
 202         xen_mc_t xmc;
 203         xen_mc_fetch_t *mcf = &xmc.u.mc_fetch;
 204         long err;
 205 
 206         mcf->flags = type;
 207         set_xen_guest_handle(mcf->data, mci);
 208 
 209         if ((err = HYPERVISOR_mca(XEN_MC_fetch, &xmc)) != 0) {
 210                 gcpu_xpv_mca_hcall_fails[err < 16 ? err : 0]++;
 211                 return (0);
 212         }
 213 
 214         if (mcf->flags == XEN_MC_OK) {
 215                 *idp = mcf->fetch_id;
 216                 return (1);
 217         } else {
 218                 *idp = 0;
 219                 return (0);
 220         }
 221 }
 222 
 223 void
 224 gcpu_xpv_telem_ack(int type, uint64_t fetch_id)
 225 {
 226         xen_mc_t xmc;
 227         struct xen_mc_fetch *mcf = &xmc.u.mc_fetch;
 228 
 229         mcf->flags = type | XEN_MC_ACK;
 230         mcf->fetch_id = fetch_id;
 231         (void) HYPERVISOR_mca(XEN_MC_fetch, &xmc);
 232 }
 233 
 234 static void
 235 mctelem_traverse(void *head, enum mctelem_direction direction,
 236     boolean_t urgent)
 237 {
 238         char *tep = head, **ntepp;
 239         int noff = (direction == MCTELEM_FORWARD) ?
 240             xpv_mca_panic_data->mpd_fwdptr_offset :
 241             xpv_mca_panic_data->mpd_revptr_offset;
 242 
 243 
 244         while (tep != NULL) {
 245                 struct mc_info **mcip = (struct mc_info **)
 246                     (tep + xpv_mca_panic_data->mpd_dataptr_offset);
 247 
 248                 gcpu_xpv_mci_process(*mcip,
 249                     urgent ? XEN_MC_URGENT : XEN_MC_NONURGENT,
 250                     gcpu_xpv_bankregs, gcpu_xpv_bankregs_sz);
 251 
 252                 ntepp = (char **)(tep + noff);
 253                 tep = *ntepp;
 254         }
 255 }
 256 
 257 /*
 258  * Callback made from panicsys.  We may have reached panicsys from a
 259  * Solaris-initiated panic or a hypervisor-initiated panic;  for the
 260  * latter we may not perform any hypercalls.  Our task is to retrieve
 261  * unprocessed MCA telemetry from the hypervisor and shovel it into
 262  * errorqs for later processing during panic.
 263  */
 264 void
 265 gcpu_xpv_panic_callback(void)
 266 {
 267         if (IN_XPV_PANIC()) {
 268                 xpv_mca_panic_data_t *ti = xpv_mca_panic_data;
 269 
 270                 if (ti == NULL ||
 271                     ti->mpd_magic != MCA_PANICDATA_MAGIC ||
 272                     ti->mpd_version != MCA_PANICDATA_VERS)
 273                         return;
 274 
 275                 mctelem_traverse(ti->mpd_urgent_processing, MCTELEM_FORWARD,
 276                     B_TRUE);
 277                 mctelem_traverse(ti->mpd_urgent_dangling, MCTELEM_REVERSE,
 278                     B_TRUE);
 279                 mctelem_traverse(ti->mpd_urgent_committed, MCTELEM_REVERSE,
 280                     B_TRUE);
 281 
 282                 mctelem_traverse(ti->mpd_nonurgent_processing, MCTELEM_FORWARD,
 283                     B_FALSE);
 284                 mctelem_traverse(ti->mpd_nonurgent_dangling, MCTELEM_REVERSE,
 285                     B_FALSE);
 286                 mctelem_traverse(ti->mpd_nonurgent_committed, MCTELEM_REVERSE,
 287                     B_FALSE);
 288         } else {
 289                 int types[] = { XEN_MC_URGENT, XEN_MC_NONURGENT };
 290                 uint64_t fetch_id;
 291                 int i;
 292 
 293                 for (i = 0; i < sizeof (types) / sizeof (types[0]); i++) {
 294                         while (gcpu_xpv_telem_read(&gcpu_mce_data,
 295                             types[i], &fetch_id)) {
 296                                 gcpu_xpv_mci_process(&gcpu_mce_data, types[i],
 297                                     gcpu_xpv_bankregs, gcpu_xpv_bankregs_sz);
 298                                 gcpu_xpv_telem_ack(types[i], fetch_id);
 299                         }
 300                 }
 301         }
 302 }