Print this page
cstyle sort of updates
7127 remove -Wno-missing-braces from Makefile.uts
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/uts/intel/io/mc-amd/mcamd_drv.c
+++ new/usr/src/uts/intel/io/mc-amd/mcamd_drv.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21
22 22 /*
23 23 * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
24 24 */
25 25
26 26 #include <sys/conf.h>
27 27 #include <sys/ddi.h>
28 28 #include <sys/ddifm.h>
29 29 #include <sys/sunddi.h>
30 30 #include <sys/sunndi.h>
31 31 #include <sys/stat.h>
32 32 #include <sys/modctl.h>
33 33 #include <sys/types.h>
34 34 #include <sys/cpuvar.h>
35 35 #include <sys/cmn_err.h>
36 36 #include <sys/kmem.h>
37 37 #include <sys/cred.h>
38 38 #include <sys/ksynch.h>
39 39 #include <sys/rwlock.h>
40 40 #include <sys/pghw.h>
41 41 #include <sys/open.h>
42 42 #include <sys/policy.h>
43 43 #include <sys/x86_archext.h>
44 44 #include <sys/cpu_module.h>
45 45 #include <qsort.h>
46 46 #include <sys/pci_cfgspace.h>
47 47 #include <sys/mc.h>
48 48 #include <sys/mc_amd.h>
49 49 #include <sys/smbios.h>
50 50 #include <sys/pci.h>
51 51 #include <mcamd.h>
52 52 #include <mcamd_dimmcfg.h>
53 53 #include <mcamd_pcicfg.h>
54 54 #include <mcamd_api.h>
55 55 #include <sys/fm/cpu/AMD.h>
56 56 #include <sys/fm/smb/fmsmb.h>
57 57 #include <sys/fm/protocol.h>
58 58 #include <sys/fm/util.h>
59 59
60 60 /*
61 61 * Set to prevent mc-amd from attaching.
62 62 */
63 63 int mc_no_attach = 0;
64 64
65 65 /*
66 66 * Of the 754/939/940 packages, only socket 940 supports quadrank registered
67 67 * dimms. Unfortunately, no memory-controller register indicates the
68 68 * presence of quadrank dimm support or presence (i.e., in terms of number
69 69 * of slots per cpu, and chip-select lines per slot, The following may be set
70 70 * in /etc/system to indicate the presence of quadrank support on a motherboard.
71 71 *
72 72 * There is no need to set this for F(1207) and S1g1.
73 73 */
74 74 int mc_quadranksupport = 0;
75 75
76 76 mc_t *mc_list, *mc_last;
77 77 krwlock_t mc_lock;
78 78 int mc_hold_attached = 1;
79 79
80 80 #define MAX(m, n) ((m) >= (n) ? (m) : (n))
81 81 #define MIN(m, n) ((m) <= (n) ? (m) : (n))
82 82
83 83 /*
84 84 * The following tuneable is used to determine the DRAM scrubbing rate.
85 85 * The values range from 0x00-0x16 as described in the BKDG. Zero
86 86 * disables DRAM scrubbing. Values above zero indicate rates in descending
87 87 * order.
88 88 *
89 89 * The default value below is used on several Sun systems. In the future
90 90 * this code should assign values dynamically based on memory sizing.
91 91 */
92 92 uint32_t mc_scrub_rate_dram = 0xd; /* 64B every 163.8 us; 1GB per 45 min */
93 93
94 94 enum {
95 95 MC_SCRUB_BIOSDEFAULT, /* retain system default value */
96 96 MC_SCRUB_FIXED, /* assign mc_scrub_rate_* values */
97 97 MC_SCRUB_MAX /* assign max of system and tunables */
98 98 } mc_scrub_policy = MC_SCRUB_MAX;
99 99
100 100 static void
101 101 mc_snapshot_destroy(mc_t *mc)
102 102 {
103 103 ASSERT(RW_LOCK_HELD(&mc_lock));
104 104
105 105 if (mc->mc_snapshot == NULL)
106 106 return;
107 107
108 108 kmem_free(mc->mc_snapshot, mc->mc_snapshotsz);
109 109 mc->mc_snapshot = NULL;
110 110 mc->mc_snapshotsz = 0;
111 111 mc->mc_snapshotgen++;
112 112 }
113 113
114 114 static int
115 115 mc_snapshot_update(mc_t *mc)
116 116 {
117 117 ASSERT(RW_LOCK_HELD(&mc_lock));
118 118
119 119 if (mc->mc_snapshot != NULL)
120 120 return (0);
121 121
122 122 if (nvlist_pack(mc->mc_nvl, &mc->mc_snapshot, &mc->mc_snapshotsz,
123 123 NV_ENCODE_XDR, KM_SLEEP) != 0)
124 124 return (-1);
125 125
126 126 return (0);
127 127 }
128 128
129 129 static mc_t *
130 130 mc_lookup_by_chipid(int chipid)
131 131 {
132 132 mc_t *mc;
133 133
134 134 ASSERT(RW_LOCK_HELD(&mc_lock));
135 135
136 136 for (mc = mc_list; mc != NULL; mc = mc->mc_next) {
137 137 if (mc->mc_props.mcp_num == chipid)
138 138 return (mc);
139 139 }
140 140
141 141 return (NULL);
142 142 }
143 143
144 144 /*
145 145 * Read config register pairs into the two arrays provided on the given
146 146 * handle and at offsets as follows:
147 147 *
148 148 * Index Array r1 offset Array r2 offset
149 149 * 0 r1addr r2addr
150 150 * 1 r1addr + incr r2addr + incr
151 151 * 2 r1addr + 2 * incr r2addr + 2 * incr
152 152 * ...
153 153 * n - 1 r1addr + (n - 1) * incr r2addr + (n - 1) * incr
154 154 *
155 155 * The number of registers to read into the r1 array is r1n; the number
156 156 * for the r2 array is r2n.
157 157 */
158 158 static void
159 159 mc_prop_read_pair(mc_pcicfg_hdl_t cfghdl, uint32_t *r1, off_t r1addr,
160 160 int r1n, uint32_t *r2, off_t r2addr, int r2n, off_t incr)
161 161 {
162 162 int i;
163 163
164 164 for (i = 0; i < MAX(r1n, r2n); i++, r1addr += incr, r2addr += incr) {
165 165 if (i < r1n)
166 166 r1[i] = mc_pcicfg_get32(cfghdl, r1addr);
167 167 if (i < r2n)
168 168 r2[i] = mc_pcicfg_get32(cfghdl, r2addr);
169 169 }
170 170 }
171 171
172 172 /*ARGSUSED*/
173 173 static int
174 174 mc_nvl_add_socket_cb(cmi_hdl_t whdl, void *arg1, void *arg2, void *arg3)
175 175 {
176 176 uint32_t skt = *((uint32_t *)arg1);
177 177 cmi_hdl_t *hdlp = (cmi_hdl_t *)arg2;
178 178
179 179 if (cmi_hdl_getsockettype(whdl) == skt) {
180 180 cmi_hdl_hold(whdl); /* short-term hold */
181 181 *hdlp = whdl;
182 182 return (CMI_HDL_WALK_DONE);
183 183 } else {
184 184 return (CMI_HDL_WALK_NEXT);
185 185 }
186 186 }
187 187
188 188 static void
189 189 mc_nvl_add_socket(nvlist_t *nvl, mc_t *mc)
190 190 {
191 191 cmi_hdl_t hdl = NULL;
192 192 const char *s;
193 193
194 194 cmi_hdl_walk(mc_nvl_add_socket_cb, (void *)&mc->mc_socket,
195 195 (void *)&hdl, NULL);
196 196 if (hdl == NULL)
197 197 s = "Unknown"; /* no cpu for this chipid found */
198 198 else
199 199 s = cmi_hdl_getsocketstr(hdl);
200 200
201 201 (void) nvlist_add_string(nvl, "socket", s);
202 202
203 203 if (hdl != NULL)
204 204 cmi_hdl_rele(hdl);
205 205 }
206 206
207 207 static uint32_t
208 208 mc_ecc_enabled(mc_t *mc)
209 209 {
210 210 uint32_t rev = mc->mc_props.mcp_rev;
211 211 union mcreg_nbcfg nbcfg;
212 212
213 213 MCREG_VAL32(&nbcfg) = mc->mc_cfgregs.mcr_nbcfg;
214 214
215 215 return (MC_REV_MATCH(rev, MC_F_REVS_BCDE) ?
216 216 MCREG_FIELD_F_preF(&nbcfg, EccEn) :
217 217 MCREG_FIELD_F_revFG(&nbcfg, EccEn));
218 218 }
219 219
220 220 static uint32_t
221 221 mc_ck_enabled(mc_t *mc)
222 222 {
223 223 uint32_t rev = mc->mc_props.mcp_rev;
224 224 union mcreg_nbcfg nbcfg;
225 225
226 226 MCREG_VAL32(&nbcfg) = mc->mc_cfgregs.mcr_nbcfg;
227 227
228 228 return (MC_REV_MATCH(rev, MC_F_REVS_BCDE) ?
229 229 MCREG_FIELD_F_preF(&nbcfg, ChipKillEccEn) :
230 230 MCREG_FIELD_F_revFG(&nbcfg, ChipKillEccEn));
231 231 }
232 232
233 233 static void
234 234 mc_nvl_add_ecctype(nvlist_t *nvl, mc_t *mc)
235 235 {
236 236 (void) nvlist_add_string(nvl, "ecc-type", mc_ecc_enabled(mc) ?
237 237 (mc_ck_enabled(mc) ? "ChipKill 128/16" : "Normal 64/8") : "None");
238 238 }
239 239
240 240 static void
241 241 mc_nvl_add_prop(nvlist_t *nvl, void *node, mcamd_propcode_t code, int reqval)
242 242 {
243 243 int valfound;
244 244 uint64_t value;
245 245 const char *name = mcamd_get_propname(code);
246 246
247 247 valfound = mcamd_get_numprop(NULL, (mcamd_node_t *)node, code, &value);
248 248
249 249 ASSERT(name != NULL && valfound);
250 250 if (name != NULL && valfound && (!reqval || value != MC_INVALNUM))
251 251 (void) nvlist_add_uint64(nvl, name, value);
252 252 }
253 253
254 254 static void
255 255 mc_nvl_add_cslist(nvlist_t *mcnvl, mc_t *mc)
256 256 {
257 257 mc_cs_t *mccs = mc->mc_cslist;
258 258 nvlist_t *cslist[MC_CHIP_NCS];
259 259 int nelem, i;
260 260
261 261 for (nelem = 0; mccs != NULL; mccs = mccs->mccs_next, nelem++) {
262 262 nvlist_t **csp = &cslist[nelem];
263 263 char csname[MCDCFG_CSNAMELEN];
264 264
265 265 (void) nvlist_alloc(csp, NV_UNIQUE_NAME, KM_SLEEP);
266 266 mc_nvl_add_prop(*csp, mccs, MCAMD_PROP_NUM, 0);
267 267 mc_nvl_add_prop(*csp, mccs, MCAMD_PROP_BASE_ADDR, 0);
268 268 mc_nvl_add_prop(*csp, mccs, MCAMD_PROP_MASK, 0);
269 269 mc_nvl_add_prop(*csp, mccs, MCAMD_PROP_SIZE, 0);
270 270
271 271 /*
272 272 * It is possible for an mc_cs_t not to have associated
273 273 * DIMM info if mcdcfg_lookup failed.
274 274 */
275 275 if (mccs->mccs_csl[0] != NULL) {
276 276 mc_nvl_add_prop(*csp, mccs, MCAMD_PROP_CSDIMM1, 1);
277 277 mcdcfg_csname(mc->mc_socket, mccs->mccs_csl[0], csname,
278 278 sizeof (csname));
279 279 (void) nvlist_add_string(*csp, "dimm1-csname", csname);
280 280 }
281 281
282 282 if (mccs->mccs_csl[1] != NULL) {
283 283 mc_nvl_add_prop(*csp, mccs, MCAMD_PROP_CSDIMM2, 1);
284 284 mcdcfg_csname(mc->mc_socket, mccs->mccs_csl[1], csname,
285 285 sizeof (csname));
286 286 (void) nvlist_add_string(*csp, "dimm2-csname", csname);
287 287 }
288 288 }
289 289
290 290 /* Add cslist nvlist array even if zero members */
291 291 (void) nvlist_add_nvlist_array(mcnvl, "cslist", cslist, nelem);
292 292 for (i = 0; i < nelem; i++)
293 293 nvlist_free(cslist[i]);
294 294 }
295 295
296 296 static void
297 297 mc_nvl_add_dimmlist(nvlist_t *mcnvl, mc_t *mc)
298 298 {
299 299 nvlist_t *dimmlist[MC_CHIP_NDIMM];
300 300 mc_dimm_t *mcd;
301 301 int nelem, i;
302 302
303 303 for (nelem = 0, mcd = mc->mc_dimmlist; mcd != NULL;
304 304 mcd = mcd->mcd_next, nelem++) {
305 305 nvlist_t **dimmp = &dimmlist[nelem];
306 306 uint64_t csnums[MC_CHIP_DIMMRANKMAX];
307 307 char csname[4][MCDCFG_CSNAMELEN];
308 308 char *csnamep[4];
309 309 int ncs = 0;
310 310
311 311 (void) nvlist_alloc(dimmp, NV_UNIQUE_NAME, KM_SLEEP);
312 312
313 313 mc_nvl_add_prop(*dimmp, mcd, MCAMD_PROP_NUM, 1);
314 314 mc_nvl_add_prop(*dimmp, mcd, MCAMD_PROP_SIZE, 1);
315 315
316 316 for (i = 0; i < MC_CHIP_DIMMRANKMAX; i++) {
317 317 if (mcd->mcd_cs[i] != NULL) {
318 318 csnums[ncs] =
319 319 mcd->mcd_cs[i]->mccs_props.csp_num;
320 320 mcdcfg_csname(mc->mc_socket, mcd->mcd_csl[i],
321 321 csname[ncs], MCDCFG_CSNAMELEN);
322 322 csnamep[ncs] = csname[ncs];
323 323 ncs++;
324 324 }
325 325 }
326 326
327 327 (void) nvlist_add_uint64_array(*dimmp, "csnums", csnums, ncs);
328 328 (void) nvlist_add_string_array(*dimmp, "csnames", csnamep, ncs);
329 329 }
330 330
331 331 /* Add dimmlist nvlist array even if zero members */
332 332 (void) nvlist_add_nvlist_array(mcnvl, "dimmlist", dimmlist, nelem);
333 333 for (i = 0; i < nelem; i++)
334 334 nvlist_free(dimmlist[i]);
335 335 }
336 336
337 337 static void
338 338 mc_nvl_add_htconfig(nvlist_t *mcnvl, mc_t *mc)
339 339 {
340 340 mc_cfgregs_t *mcr = &mc->mc_cfgregs;
341 341 union mcreg_htroute *htrp = (union mcreg_htroute *)&mcr->mcr_htroute[0];
342 342 union mcreg_nodeid *nip = (union mcreg_nodeid *)&mcr->mcr_htnodeid;
343 343 union mcreg_unitid *uip = (union mcreg_unitid *)&mcr->mcr_htunitid;
344 344 int ndcnt = HT_COHERENTNODES(nip);
345 345 uint32_t BCRte[MC_CHIP_MAXNODES];
346 346 uint32_t RPRte[MC_CHIP_MAXNODES];
347 347 uint32_t RQRte[MC_CHIP_MAXNODES];
348 348 nvlist_t *nvl;
349 349 int i;
350 350
351 351 (void) nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP);
352 352
353 353 (void) nvlist_add_uint32(nvl, "NodeId", MCREG_FIELD_CMN(nip, NodeId));
354 354 (void) nvlist_add_uint32(nvl, "CoherentNodes", HT_COHERENTNODES(nip));
355 355 (void) nvlist_add_uint32(nvl, "SbNode", MCREG_FIELD_CMN(nip, SbNode));
356 356 (void) nvlist_add_uint32(nvl, "LkNode", MCREG_FIELD_CMN(nip, LkNode));
357 357 (void) nvlist_add_uint32(nvl, "SystemCoreCount",
358 358 HT_SYSTEMCORECOUNT(nip));
359 359
360 360 (void) nvlist_add_uint32(nvl, "C0Unit", MCREG_FIELD_CMN(uip, C0Unit));
361 361 (void) nvlist_add_uint32(nvl, "C1Unit", MCREG_FIELD_CMN(uip, C1Unit));
362 362 (void) nvlist_add_uint32(nvl, "McUnit", MCREG_FIELD_CMN(uip, McUnit));
363 363 (void) nvlist_add_uint32(nvl, "HbUnit", MCREG_FIELD_CMN(uip, HbUnit));
364 364 (void) nvlist_add_uint32(nvl, "SbLink", MCREG_FIELD_CMN(uip, SbLink));
365 365
366 366 if (ndcnt <= MC_CHIP_MAXNODES) {
367 367 for (i = 0; i < ndcnt; i++, htrp++) {
368 368 BCRte[i] = MCREG_FIELD_CMN(htrp, BCRte);
369 369 RPRte[i] = MCREG_FIELD_CMN(htrp, RPRte);
370 370 RQRte[i] = MCREG_FIELD_CMN(htrp, RQRte);
371 371 }
372 372
373 373 (void) nvlist_add_uint32_array(nvl, "BroadcastRoutes",
374 374 &BCRte[0], ndcnt);
375 375 (void) nvlist_add_uint32_array(nvl, "ResponseRoutes",
376 376 &RPRte[0], ndcnt);
377 377 (void) nvlist_add_uint32_array(nvl, "RequestRoutes",
378 378 &RQRte[0], ndcnt);
379 379 }
380 380
381 381 (void) nvlist_add_nvlist(mcnvl, "htconfig", nvl);
382 382 nvlist_free(nvl);
383 383 }
384 384
385 385 static nvlist_t *
386 386 mc_nvl_create(mc_t *mc)
387 387 {
388 388 nvlist_t *mcnvl;
389 389
390 390 (void) nvlist_alloc(&mcnvl, NV_UNIQUE_NAME, KM_SLEEP);
391 391
392 392 /*
393 393 * Since this nvlist is used in populating the topo tree changes
394 394 * made here may propogate through to changed property names etc
395 395 * in the topo tree. Some properties in the topo tree will be
396 396 * contracted via ARC, so be careful what you change here.
397 397 */
398 398 (void) nvlist_add_uint8(mcnvl, MC_NVLIST_VERSTR, MC_NVLIST_VERS1);
399 399
400 400 mc_nvl_add_prop(mcnvl, mc, MCAMD_PROP_NUM, 0);
401 401 mc_nvl_add_prop(mcnvl, mc, MCAMD_PROP_REV, 0);
402 402 (void) nvlist_add_string(mcnvl, "revname", mc->mc_revname);
403 403 mc_nvl_add_socket(mcnvl, mc);
404 404 mc_nvl_add_ecctype(mcnvl, mc);
405 405
406 406 mc_nvl_add_prop(mcnvl, mc, MCAMD_PROP_BASE_ADDR, 0);
407 407 mc_nvl_add_prop(mcnvl, mc, MCAMD_PROP_LIM_ADDR, 0);
408 408 mc_nvl_add_prop(mcnvl, mc, MCAMD_PROP_ILEN, 0);
409 409 mc_nvl_add_prop(mcnvl, mc, MCAMD_PROP_ILSEL, 0);
410 410 mc_nvl_add_prop(mcnvl, mc, MCAMD_PROP_CSINTLVFCTR, 0);
411 411 mc_nvl_add_prop(mcnvl, mc, MCAMD_PROP_DRAMHOLE_SIZE, 0);
412 412 mc_nvl_add_prop(mcnvl, mc, MCAMD_PROP_ACCESS_WIDTH, 0);
413 413 mc_nvl_add_prop(mcnvl, mc, MCAMD_PROP_CSBANKMAPREG, 0);
414 414 mc_nvl_add_prop(mcnvl, mc, MCAMD_PROP_BANKSWZL, 0);
415 415 mc_nvl_add_prop(mcnvl, mc, MCAMD_PROP_MOD64MUX, 0);
416 416 mc_nvl_add_prop(mcnvl, mc, MCAMD_PROP_SPARECS, 1);
417 417 mc_nvl_add_prop(mcnvl, mc, MCAMD_PROP_BADCS, 1);
418 418
419 419 mc_nvl_add_cslist(mcnvl, mc);
420 420 mc_nvl_add_dimmlist(mcnvl, mc);
421 421 mc_nvl_add_htconfig(mcnvl, mc);
422 422
423 423 return (mcnvl);
424 424 }
425 425
426 426 /*
427 427 * Link a dimm to its associated chip-selects and chip-select lines.
428 428 * Total the size of all ranks of this dimm.
429 429 */
430 430 static void
431 431 mc_dimm_csadd(mc_t *mc, mc_dimm_t *mcd, mc_cs_t *mccs, const mcdcfg_csl_t *csl)
432 432 {
433 433 int factor = (mc->mc_props.mcp_accwidth == 128) ? 2 : 1;
434 434 uint64_t sz = 0;
435 435 int i;
436 436
437 437 /* Skip to first unused rank slot */
438 438 for (i = 0; i < MC_CHIP_DIMMRANKMAX; i++) {
439 439 if (mcd->mcd_cs[i] == NULL) {
440 440 mcd->mcd_cs[i] = mccs;
441 441 mcd->mcd_csl[i] = csl;
442 442 sz += mccs->mccs_props.csp_size / factor;
443 443 break;
444 444 } else {
445 445 sz += mcd->mcd_cs[i]->mccs_props.csp_size / factor;
446 446 }
447 447 }
448 448
449 449 ASSERT(i != MC_CHIP_DIMMRANKMAX);
450 450
451 451 mcd->mcd_size = sz;
452 452 }
453 453
454 454 /*
455 455 * Create a dimm structure and call to link it to its associated chip-selects.
456 456 */
457 457 static mc_dimm_t *
458 458 mc_dimm_create(mc_t *mc, uint_t num)
459 459 {
460 460 mc_dimm_t *mcd = kmem_zalloc(sizeof (mc_dimm_t), KM_SLEEP);
461 461
462 462 mcd->mcd_hdr.mch_type = MC_NT_DIMM;
463 463 mcd->mcd_mc = mc;
464 464 mcd->mcd_num = num;
465 465
466 466 return (mcd);
467 467 }
468 468
469 469 /*
470 470 * The chip-select structure includes an array of dimms associated with
471 471 * that chip-select. This function fills that array, and also builds
472 472 * the list of all dimms on this memory controller mc_dimmlist. The
473 473 * caller has filled a structure with all there is to know about the
474 474 * associated dimm(s).
475 475 */
476 476 static void
477 477 mc_csdimms_create(mc_t *mc, mc_cs_t *mccs, mcdcfg_rslt_t *rsltp)
478 478 {
479 479 mc_dimm_t *found[MC_CHIP_DIMMPERCS];
480 480 mc_dimm_t *mcd;
481 481 int nfound = 0;
482 482 int i;
483 483
484 484 /*
485 485 * Has some other chip-select already created this dimm or dimms?
486 486 * If so then link to the dimm(s) from the mccs_dimm array,
487 487 * record their topo numbers in the csp_dimmnums array, and link
488 488 * the dimm(s) to the additional chip-select.
489 489 */
490 490 for (mcd = mc->mc_dimmlist; mcd != NULL; mcd = mcd->mcd_next) {
491 491 for (i = 0; i < rsltp->ndimm; i++) {
492 492 if (mcd->mcd_num == rsltp->dimm[i].toponum)
493 493 found[nfound++] = mcd;
494 494 }
495 495 }
496 496 ASSERT(nfound == 0 || nfound == rsltp->ndimm);
497 497
498 498 for (i = 0; i < rsltp->ndimm; i++) {
499 499 if (nfound == 0) {
500 500 mcd = mc_dimm_create(mc, rsltp->dimm[i].toponum);
501 501 if (mc->mc_dimmlist == NULL)
502 502 mc->mc_dimmlist = mcd;
503 503 else
504 504 mc->mc_dimmlast->mcd_next = mcd;
505 505 mc->mc_dimmlast = mcd;
506 506 } else {
507 507 mcd = found[i];
508 508 }
509 509
510 510 mccs->mccs_dimm[i] = mcd;
511 511 mccs->mccs_csl[i] = rsltp->dimm[i].cslp;
512 512 mccs->mccs_props.csp_dimmnums[i] = mcd->mcd_num;
513 513 mc_dimm_csadd(mc, mcd, mccs, rsltp->dimm[i].cslp);
514 514
515 515 }
516 516
517 517 /* The rank number is constant across all constituent dimm(s) */
518 518 mccs->mccs_props.csp_dimmrank = rsltp->dimm[0].cslp->csl_rank;
519 519 }
520 520
521 521 /*
522 522 * mc_dimmlist_create is called after we have discovered all enabled
523 523 * (and spare or testfailed on revs F and G) chip-selects on the
524 524 * given memory controller. For each chip-select we must derive
525 525 * the associated dimms, remembering that a chip-select csbase/csmask
526 526 * pair may be associated with up to 2 chip-select lines (in 128 bit mode)
527 527 * and that any one dimm may be associated with 1, 2, or 4 chip-selects
528 528 * depending on whether it is single, dual or quadrank.
529 529 */
530 530 static void
531 531 mc_dimmlist_create(mc_t *mc)
532 532 {
533 533 union mcreg_dramcfg_hi *drcfghip =
534 534 (union mcreg_dramcfg_hi *)(&mc->mc_cfgregs.mcr_dramcfghi);
535 535 mc_props_t *mcp = &mc->mc_props;
536 536 uint32_t rev = mcp->mcp_rev;
537 537 mc_cs_t *mccs;
538 538 int r4 = 0, s4 = 0;
539 539
540 540 /*
541 541 * Are we dealing with quadrank registered dimms?
542 542 *
543 543 * For socket 940 we can't tell and we'll assume we're not.
544 544 * This can be over-ridden by the admin in /etc/system by setting
545 545 * mc_quadranksupport nonzero. A possible optimisation in systems
546 546 * that export an SMBIOS table would be to count the number of
547 547 * dimm slots per cpu - more than 4 would indicate no quadrank support
548 548 * and 4 or fewer would indicate that if we see any of the upper
549 549 * chip-selects enabled then a quadrank dimm is present.
550 550 *
551 551 * For socket F(1207) we can check a bit in the dram config high reg.
552 552 *
553 553 * Other socket types do not support registered dimms.
554 554 */
555 555 if (mc->mc_socket == X86_SOCKET_940)
556 556 r4 = mc_quadranksupport != 0;
557 557 else if (mc->mc_socket == X86_SOCKET_F1207)
558 558 r4 = MCREG_FIELD_F_revFG(drcfghip, FourRankRDimm);
559 559
560 560 /*
561 561 * Are we dealing with quadrank SO-DIMMs? These are supported
562 562 * in AM2 and S1g1 packages only, but in all rev F/G cases we
563 563 * can detect their presence via a bit in the dram config high reg.
564 564 */
565 565 if (MC_REV_MATCH(rev, MC_F_REVS_FG))
566 566 s4 = MCREG_FIELD_F_revFG(drcfghip, FourRankSODimm);
567 567
568 568 for (mccs = mc->mc_cslist; mccs != NULL; mccs = mccs->mccs_next) {
569 569 mcdcfg_rslt_t rslt;
570 570
571 571 /*
572 572 * If lookup fails we will not create dimm structures for
573 573 * this chip-select. In the mc_cs_t we will have both
574 574 * csp_dimmnum members set to MC_INVALNUM and patounum
575 575 * code will see from those that we do not have dimm info
576 576 * for this chip-select.
577 577 */
578 578 if (mcdcfg_lookup(rev, mcp->mcp_mod64mux, mcp->mcp_accwidth,
579 579 mccs->mccs_props.csp_num, mc->mc_socket,
580 580 r4, s4, &rslt) < 0)
581 581 continue;
582 582
583 583 mc_csdimms_create(mc, mccs, &rslt);
584 584 }
585 585 }
586 586
587 587 static mc_cs_t *
588 588 mc_cs_create(mc_t *mc, uint_t num, uint64_t base, uint64_t mask, size_t sz,
589 589 int csbe, int spare, int testfail)
590 590 {
591 591 mc_cs_t *mccs = kmem_zalloc(sizeof (mc_cs_t), KM_SLEEP);
592 592 mccs_props_t *csp = &mccs->mccs_props;
593 593 int i;
594 594
595 595 mccs->mccs_hdr.mch_type = MC_NT_CS;
596 596 mccs->mccs_mc = mc;
597 597 csp->csp_num = num;
598 598 csp->csp_base = base;
599 599 csp->csp_mask = mask;
600 600 csp->csp_size = sz;
601 601 csp->csp_csbe = csbe;
602 602 csp->csp_spare = spare;
603 603 csp->csp_testfail = testfail;
604 604
605 605 for (i = 0; i < MC_CHIP_DIMMPERCS; i++)
606 606 csp->csp_dimmnums[i] = MC_INVALNUM;
607 607
608 608 if (spare)
609 609 mc->mc_props.mcp_sparecs = num;
610 610
611 611 return (mccs);
612 612 }
613 613
614 614 /*
615 615 * For any cs# of this mc marked TestFail generate an ereport with
616 616 * resource identifying the associated dimm(s).
617 617 */
618 618 static void
619 619 mc_report_testfails(mc_t *mc)
620 620 {
621 621 mc_unum_t unum;
622 622 mc_cs_t *mccs;
623 623 int i;
624 624
625 625 for (mccs = mc->mc_cslist; mccs != NULL; mccs = mccs->mccs_next) {
626 626 if (mccs->mccs_props.csp_testfail) {
627 627 unum.unum_board = 0;
628 628 unum.unum_chip = mc->mc_props.mcp_num;
629 629 unum.unum_mc = 0;
630 630 unum.unum_chan = MC_INVALNUM;
631 631 unum.unum_cs = mccs->mccs_props.csp_num;
632 632 unum.unum_rank = mccs->mccs_props.csp_dimmrank;
633 633 unum.unum_offset = MCAMD_RC_INVALID_OFFSET;
634 634 for (i = 0; i < MC_CHIP_DIMMPERCS; i++)
635 635 unum.unum_dimms[i] = MC_INVALNUM;
636 636
637 637 mcamd_ereport_post(mc, FM_EREPORT_CPU_AMD_MC_TESTFAIL,
638 638 &unum,
639 639 FM_EREPORT_PAYLOAD_FLAGS_CPU_AMD_MC_TESTFAIL);
640 640 }
641 641 }
642 642 }
643 643
644 644 /*
645 645 * Function 0 - HyperTransport Technology Configuration
646 646 */
647 647 static void
648 648 mc_mkprops_htcfg(mc_pcicfg_hdl_t cfghdl, mc_t *mc)
649 649 {
650 650 union mcreg_nodeid nodeid;
651 651 off_t offset;
652 652 int i;
653 653
654 654 mc->mc_cfgregs.mcr_htnodeid = MCREG_VAL32(&nodeid) =
655 655 mc_pcicfg_get32(cfghdl, MC_HT_REG_NODEID);
656 656
657 657 mc->mc_cfgregs.mcr_htunitid = mc_pcicfg_get32(cfghdl, MC_HT_REG_UNITID);
658 658
659 659 for (i = 0, offset = MC_HT_REG_RTBL_NODE_0;
660 660 i < HT_COHERENTNODES(&nodeid);
661 661 i++, offset += MC_HT_REG_RTBL_INCR)
662 662 mc->mc_cfgregs.mcr_htroute[i] = mc_pcicfg_get32(cfghdl, offset);
663 663 }
664 664
665 665 /*
666 666 * Function 1 Configuration - Address Map (see BKDG 3.4.4 DRAM Address Map)
667 667 *
668 668 * Read the Function 1 Address Map for each potential DRAM node. The Base
669 669 * Address for a node gives the starting system address mapped at that node,
670 670 * and the limit gives the last valid address mapped at that node. Regions for
671 671 * different nodes should not overlap, unless node-interleaving is enabled.
672 672 * The base register also indicates the node-interleaving settings (IntlvEn).
673 673 * The limit register includes IntlvSel which determines which 4K blocks will
674 674 * be routed to this node and the destination node ID for addresses that fall
675 675 * within the [base, limit] range - this must match the pair number.
676 676 */
677 677 static void
678 678 mc_mkprops_addrmap(mc_pcicfg_hdl_t cfghdl, mc_t *mc)
679 679 {
680 680 union mcreg_drambase basereg;
681 681 union mcreg_dramlimit limreg;
682 682 mc_props_t *mcp = &mc->mc_props;
683 683 mc_cfgregs_t *mcr = &mc->mc_cfgregs;
684 684 union mcreg_dramhole hole;
685 685 int nodeid = mc->mc_props.mcp_num;
686 686
687 687 mcr->mcr_drambase = MCREG_VAL32(&basereg) = mc_pcicfg_get32(cfghdl,
688 688 MC_AM_REG_DRAMBASE_0 + nodeid * MC_AM_REG_DRAM_INCR);
689 689
690 690 mcr->mcr_dramlimit = MCREG_VAL32(&limreg) = mc_pcicfg_get32(cfghdl,
691 691 MC_AM_REG_DRAMLIM_0 + nodeid * MC_AM_REG_DRAM_INCR);
692 692
693 693 /*
694 694 * Derive some "cooked" properties for nodes that have a range of
695 695 * physical addresses that are read or write enabled and for which
696 696 * the DstNode matches the node we are attaching.
697 697 */
698 698 if (MCREG_FIELD_CMN(&limreg, DRAMLimiti) != 0 &&
699 699 MCREG_FIELD_CMN(&limreg, DstNode) == nodeid &&
700 700 (MCREG_FIELD_CMN(&basereg, WE) || MCREG_FIELD_CMN(&basereg, RE))) {
701 701 mcp->mcp_base = MC_DRAMBASE(&basereg);
702 702 mcp->mcp_lim = MC_DRAMLIM(&limreg);
703 703 mcp->mcp_ilen = MCREG_FIELD_CMN(&basereg, IntlvEn);
704 704 mcp->mcp_ilsel = MCREG_FIELD_CMN(&limreg, IntlvSel);
705 705 }
706 706
707 707 /*
708 708 * The Function 1 DRAM Hole Address Register tells us which node(s)
709 709 * own the DRAM space that is hoisted above 4GB, together with the
710 710 * hole base and offset for this node. This was introduced in
711 711 * revision E.
712 712 */
713 713 if (MC_REV_ATLEAST(mc->mc_props.mcp_rev, MC_F_REV_E)) {
714 714 mcr->mcr_dramhole = MCREG_VAL32(&hole) =
715 715 mc_pcicfg_get32(cfghdl, MC_AM_REG_HOLEADDR);
716 716
717 717 if (MCREG_FIELD_CMN(&hole, DramHoleValid))
718 718 mcp->mcp_dramhole_size = MC_DRAMHOLE_SIZE(&hole);
719 719 }
720 720 }
721 721
722 722 /*
723 723 * Read some function 3 parameters via PCI Mechanism 1 accesses (which
724 724 * will serialize any NB accesses).
725 725 */
726 726 static void
727 727 mc_getmiscctl(mc_t *mc)
728 728 {
729 729 uint32_t rev = mc->mc_props.mcp_rev;
730 730 union mcreg_nbcfg nbcfg;
731 731 union mcreg_sparectl sparectl;
732 732
733 733 mc->mc_cfgregs.mcr_nbcfg = MCREG_VAL32(&nbcfg) =
734 734 mc_pcicfg_get32_nohdl(mc, MC_FUNC_MISCCTL, MC_CTL_REG_NBCFG);
735 735
736 736 if (MC_REV_MATCH(rev, MC_F_REVS_FG)) {
737 737 mc->mc_cfgregs.mcr_sparectl = MCREG_VAL32(&sparectl) =
738 738 mc_pcicfg_get32_nohdl(mc, MC_FUNC_MISCCTL,
739 739 MC_CTL_REG_SPARECTL);
740 740
741 741 if (MCREG_FIELD_F_revFG(&sparectl, SwapDone)) {
742 742 mc->mc_props.mcp_badcs =
743 743 MCREG_FIELD_F_revFG(&sparectl, BadDramCs);
744 744 }
745 745 }
746 746 }
747 747
748 748 static int
749 749 csbasecmp(mc_cs_t **csapp, mc_cs_t **csbpp)
750 750 {
751 751 uint64_t basea = (*csapp)->mccs_props.csp_base;
752 752 uint64_t baseb = (*csbpp)->mccs_props.csp_base;
753 753
754 754 if (basea == baseb)
755 755 return (0);
756 756 else if (basea < baseb)
757 757 return (-1);
758 758 else
759 759 return (1);
760 760 }
761 761
762 762 /*
763 763 * The following are for use in simulating TestFail for a chip-select
764 764 * without poking at the hardware (which tends to get upset if you do
765 765 * since the BIOS needs to restart to map a failed cs out). For internal
766 766 * testing only! Note that setting these does not give the full experience -
767 767 * the select chip-select *is* enabled and can give errors etc and the
768 768 * patounum logic will get confused.
769 769 */
770 770 int testfail_mcnum = -1;
771 771 int testfail_csnum = -1;
772 772
773 773 /*
774 774 * Function 2 configuration - DRAM Controller
775 775 */
776 776 static void
777 777 mc_mkprops_dramctl(mc_pcicfg_hdl_t cfghdl, mc_t *mc)
778 778 {
779 779 union mcreg_csbase base[MC_CHIP_NCS];
780 780 union mcreg_csmask mask[MC_CHIP_NCS];
781 781 union mcreg_dramcfg_lo drcfg_lo;
782 782 union mcreg_dramcfg_hi drcfg_hi;
783 783 union mcreg_drammisc drmisc;
784 784 union mcreg_bankaddrmap baddrmap;
785 785 mc_props_t *mcp = &mc->mc_props;
786 786 mc_cfgregs_t *mcr = &mc->mc_cfgregs;
787 787 int maskdivisor;
788 788 int wide = 0;
789 789 uint32_t rev = mc->mc_props.mcp_rev;
790 790 int i;
791 791 mcamd_hdl_t hdl;
792 792
793 793 mcamd_mkhdl(&hdl); /* to call into common code */
794 794
795 795 /*
796 796 * Read Function 2 DRAM Configuration High and Low registers. The High
797 797 * part is mostly concerned with memory clocks etc and we'll not have
798 798 * any use for that. The Low component tells us if ECC is enabled,
799 799 * if we're in 64- or 128-bit MC mode, how the upper chip-selects
800 800 * are mapped, which chip-select pairs are using x4 parts, etc.
801 801 */
802 802 MCREG_VAL32(&drcfg_lo) = mc_pcicfg_get32(cfghdl, MC_DC_REG_DRAMCFGLO);
803 803 MCREG_VAL32(&drcfg_hi) = mc_pcicfg_get32(cfghdl, MC_DC_REG_DRAMCFGHI);
804 804 mcr->mcr_dramcfglo = MCREG_VAL32(&drcfg_lo);
805 805 mcr->mcr_dramcfghi = MCREG_VAL32(&drcfg_hi);
806 806
807 807 /*
808 808 * Note the DRAM controller width. The 64/128 bit is in a different
809 809 * bit position for revision F and G.
810 810 */
811 811 if (MC_REV_MATCH(rev, MC_F_REVS_FG)) {
812 812 wide = MCREG_FIELD_F_revFG(&drcfg_lo, Width128);
813 813 } else {
814 814 wide = MCREG_FIELD_F_preF(&drcfg_lo, Width128);
815 815 }
816 816 mcp->mcp_accwidth = wide ? 128 : 64;
817 817
818 818 /*
819 819 * Read Function 2 DRAM Controller Miscellaenous Regsiter for those
820 820 * revs that support it. This include the Mod64Mux indication on
821 821 * these revs - for rev E it is in DRAM config low.
822 822 */
823 823 if (MC_REV_MATCH(rev, MC_F_REVS_FG)) {
824 824 mcr->mcr_drammisc = MCREG_VAL32(&drmisc) =
825 825 mc_pcicfg_get32(cfghdl, MC_DC_REG_DRAMMISC);
826 826 mcp->mcp_mod64mux = MCREG_FIELD_F_revFG(&drmisc, Mod64Mux);
827 827 } else if (MC_REV_MATCH(rev, MC_F_REV_E)) {
828 828 mcp->mcp_mod64mux = MCREG_FIELD_F_preF(&drcfg_lo, Mod64BitMux);
829 829 }
830 830
831 831 /*
832 832 * Read Function 2 DRAM Bank Address Mapping. This encodes the
833 833 * type of DIMM module in use for each chip-select pair.
834 834 * Prior ro revision F it also tells us whether BankSwizzle mode
835 835 * is enabled - in rev F that has moved to dram config hi register.
836 836 */
837 837 mcp->mcp_csbankmapreg = MCREG_VAL32(&baddrmap) =
838 838 mc_pcicfg_get32(cfghdl, MC_DC_REG_BANKADDRMAP);
839 839
840 840 /*
841 841 * Determine whether bank swizzle mode is active. Bank swizzling was
842 842 * introduced as an option in rev E, but the bit that indicates it
843 843 * is enabled has moved in revs F/G.
844 844 */
845 845 if (MC_REV_MATCH(rev, MC_F_REV_E)) {
846 846 mcp->mcp_bnkswzl =
847 847 MCREG_FIELD_F_preF(&baddrmap, BankSwizzleMode);
848 848 } else if (MC_REV_MATCH(rev, MC_F_REVS_FG)) {
849 849 mcp->mcp_bnkswzl = MCREG_FIELD_F_revFG(&drcfg_hi,
850 850 BankSwizzleMode);
851 851 }
852 852
853 853 /*
854 854 * Read the DRAM CS Base and DRAM CS Mask registers. Revisions prior
855 855 * to F have an equal number of base and mask registers; revision F
856 856 * has twice as many base registers as masks.
857 857 */
858 858 maskdivisor = MC_REV_MATCH(rev, MC_F_REVS_FG) ? 2 : 1;
859 859
860 860 mc_prop_read_pair(cfghdl,
861 861 (uint32_t *)base, MC_DC_REG_CSBASE_0, MC_CHIP_NCS,
862 862 (uint32_t *)mask, MC_DC_REG_CSMASK_0, MC_CHIP_NCS / maskdivisor,
863 863 MC_DC_REG_CS_INCR);
864 864
865 865 /*
866 866 * Create a cs node for each enabled chip-select as well as
867 867 * any appointed online spare chip-selects and for any that have
868 868 * failed test.
869 869 */
870 870 for (i = 0; i < MC_CHIP_NCS; i++) {
871 871 mc_cs_t *mccs;
872 872 uint64_t csbase, csmask;
873 873 size_t sz;
874 874 int csbe, spare, testfail;
875 875
876 876 if (MC_REV_MATCH(rev, MC_F_REVS_FG)) {
877 877 csbe = MCREG_FIELD_F_revFG(&base[i], CSEnable);
878 878 spare = MCREG_FIELD_F_revFG(&base[i], Spare);
879 879 testfail = MCREG_FIELD_F_revFG(&base[i], TestFail);
880 880 } else {
881 881 csbe = MCREG_FIELD_F_preF(&base[i], CSEnable);
882 882 spare = 0;
883 883 testfail = 0;
884 884 }
885 885
886 886 /* Testing hook */
887 887 if (testfail_mcnum != -1 && testfail_csnum != -1 &&
888 888 mcp->mcp_num == testfail_mcnum && i == testfail_csnum) {
889 889 csbe = spare = 0;
890 890 testfail = 1;
891 891 cmn_err(CE_NOTE, "Pretending MC %d CS %d failed test",
892 892 testfail_mcnum, testfail_csnum);
893 893 }
894 894
895 895 /*
896 896 * If the chip-select is not enabled then skip it unless
897 897 * it is a designated online spare or is marked with TestFail.
898 898 */
899 899 if (!csbe && !(spare || testfail))
900 900 continue;
901 901
902 902 /*
903 903 * For an enabled or spare chip-select the Bank Address Mapping
904 904 * register will be valid as will the chip-select mask. The
905 905 * base will not be valid but we'll read and store it anyway.
906 906 * We will not know whether the spare is already swapped in
907 907 * until MC function 3 attaches.
908 908 */
909 909 if (csbe || spare) {
910 910 if (mcamd_cs_size(&hdl, (mcamd_node_t *)mc, i, &sz) < 0)
911 911 continue;
912 912 csbase = MC_CSBASE(&base[i], rev);
913 913 csmask = MC_CSMASK(&mask[i / maskdivisor], rev);
914 914 } else {
915 915 sz = 0;
916 916 csbase = csmask = 0;
917 917 }
918 918
919 919 mccs = mc_cs_create(mc, i, csbase, csmask, sz,
920 920 csbe, spare, testfail);
921 921
922 922 if (mc->mc_cslist == NULL)
923 923 mc->mc_cslist = mccs;
924 924 else
925 925 mc->mc_cslast->mccs_next = mccs;
926 926 mc->mc_cslast = mccs;
927 927
928 928 mccs->mccs_cfgregs.csr_csbase = MCREG_VAL32(&base[i]);
929 929 mccs->mccs_cfgregs.csr_csmask =
930 930 MCREG_VAL32(&mask[i / maskdivisor]);
931 931
932 932 /*
933 933 * Check for cs bank interleaving - some bits clear in the
934 934 * lower mask. All banks must/will have the same lomask bits
935 935 * if cs interleaving is active.
936 936 */
937 937 if (csbe && !mcp->mcp_csintlvfctr) {
938 938 int bitno, ibits = 0;
939 939 for (bitno = MC_CSMASKLO_LOBIT(rev);
940 940 bitno <= MC_CSMASKLO_HIBIT(rev); bitno++) {
941 941 if (!(csmask & (1 << bitno)))
942 942 ibits++;
943 943 }
944 944 mcp->mcp_csintlvfctr = 1 << ibits;
945 945 }
946 946 }
947 947
948 948 /*
949 949 * If there is no chip-select interleave on this node determine
950 950 * whether the chip-select ranks are contiguous or if there
951 951 * is a hole.
952 952 */
953 953 if (mcp->mcp_csintlvfctr == 1) {
954 954 mc_cs_t *csp[MC_CHIP_NCS];
955 955 mc_cs_t *mccs;
956 956 int ncsbe = 0;
957 957
958 958 for (mccs = mc->mc_cslist; mccs != NULL;
959 959 mccs = mccs->mccs_next) {
960 960 if (mccs->mccs_props.csp_csbe)
961 961 csp[ncsbe++] = mccs;
962 962 }
963 963
964 964 if (ncsbe != 0) {
965 965 qsort((void *)csp, ncsbe, sizeof (mc_cs_t *),
966 966 (int (*)(const void *, const void *))csbasecmp);
967 967
968 968 for (i = 1; i < ncsbe; i++) {
969 969 if (csp[i]->mccs_props.csp_base !=
970 970 csp[i - 1]->mccs_props.csp_base +
971 971 csp[i - 1]->mccs_props.csp_size)
972 972 mc->mc_csdiscontig = 1;
973 973 }
974 974 }
975 975 }
976 976
977 977
978 978 /*
979 979 * Since we do not attach to MC function 3 go ahead and read some
980 980 * config parameters from it now.
981 981 */
982 982 mc_getmiscctl(mc);
983 983
984 984 /*
985 985 * Now that we have discovered all enabled/spare/testfail chip-selects
986 986 * we divine the associated DIMM configuration.
987 987 */
988 988 mc_dimmlist_create(mc);
989 989 }
990 990
991 991 typedef struct mc_bind_map {
992 992 const char *bm_bindnm; /* attachment binding name */
993 993 enum mc_funcnum bm_func; /* PCI config space function number for bind */
994 994 const char *bm_model; /* value for device node model property */
995 995 void (*bm_mkprops)(mc_pcicfg_hdl_t, mc_t *);
996 996 } mc_bind_map_t;
997 997
998 998 /*
999 999 * Do not attach to MC function 3 - agpgart already attaches to that.
1000 1000 * Function 3 may be a good candidate for a nexus driver to fan it out
1001 1001 * into virtual devices by functionality. We will use pci_mech1_getl
1002 1002 * to retrieve the function 3 parameters we require.
↓ open down ↓ |
1002 lines elided |
↑ open up ↑ |
1003 1003 */
1004 1004
1005 1005 static const mc_bind_map_t mc_bind_map[] = {
1006 1006 { MC_FUNC_HTCONFIG_BINDNM, MC_FUNC_HTCONFIG,
1007 1007 "AMD Memory Controller (HT Configuration)", mc_mkprops_htcfg },
1008 1008 { MC_FUNC_ADDRMAP_BINDNM, MC_FUNC_ADDRMAP,
1009 1009 "AMD Memory Controller (Address Map)", mc_mkprops_addrmap },
1010 1010 { MC_FUNC_DRAMCTL_BINDNM, MC_FUNC_DRAMCTL,
1011 1011 "AMD Memory Controller (DRAM Controller & HT Trace)",
1012 1012 mc_mkprops_dramctl },
1013 - NULL
1013 + { NULL }
1014 1014 };
1015 1015
1016 1016 /*ARGSUSED*/
1017 1017 static int
1018 1018 mc_open(dev_t *devp, int flag, int otyp, cred_t *credp)
1019 1019 {
1020 1020 if (otyp != OTYP_CHR)
1021 1021 return (EINVAL);
1022 1022
1023 1023 rw_enter(&mc_lock, RW_READER);
1024 1024 if (mc_lookup_by_chipid(getminor(*devp)) == NULL) {
1025 1025 rw_exit(&mc_lock);
1026 1026 return (EINVAL);
1027 1027 }
1028 1028 rw_exit(&mc_lock);
1029 1029
1030 1030 return (0);
1031 1031 }
1032 1032
1033 1033 /*ARGSUSED*/
1034 1034 static int
1035 1035 mc_close(dev_t dev, int flag, int otyp, cred_t *credp)
1036 1036 {
1037 1037 return (0);
1038 1038 }
1039 1039
1040 1040 /*
1041 1041 * Enable swap from chip-select csnum to the spare chip-select on this
1042 1042 * memory controller (if any).
1043 1043 */
1044 1044
1045 1045 int mc_swapdonetime = 30; /* max number of seconds to wait for SwapDone */
1046 1046
1047 1047 static int
1048 1048 mc_onlinespare(mc_t *mc, int csnum)
1049 1049 {
1050 1050 mc_props_t *mcp = &mc->mc_props;
1051 1051 union mcreg_sparectl sparectl;
1052 1052 union mcreg_scrubctl scrubctl;
1053 1053 mc_cs_t *mccs;
1054 1054 hrtime_t tmax;
1055 1055 int i = 0;
1056 1056
1057 1057 ASSERT(RW_WRITE_HELD(&mc_lock));
1058 1058
1059 1059 if (!MC_REV_MATCH(mcp->mcp_rev, MC_F_REVS_FG))
1060 1060 return (ENOTSUP); /* MC rev does not offer online spare */
1061 1061 else if (mcp->mcp_sparecs == MC_INVALNUM)
1062 1062 return (ENODEV); /* Supported, but no spare configured */
1063 1063 else if (mcp->mcp_badcs != MC_INVALNUM)
1064 1064 return (EBUSY); /* Spare already swapped in */
1065 1065 else if (csnum == mcp->mcp_sparecs)
1066 1066 return (EINVAL); /* Can't spare the spare! */
1067 1067
1068 1068 for (mccs = mc->mc_cslist; mccs != NULL; mccs = mccs->mccs_next) {
1069 1069 if (mccs->mccs_props.csp_num == csnum)
1070 1070 break;
1071 1071 }
1072 1072 if (mccs == NULL)
1073 1073 return (EINVAL); /* nominated bad CS does not exist */
1074 1074
1075 1075 /*
1076 1076 * If the DRAM Scrubber is not enabled then the swap cannot succeed.
1077 1077 */
1078 1078 MCREG_VAL32(&scrubctl) = mc_pcicfg_get32_nohdl(mc, MC_FUNC_MISCCTL,
1079 1079 MC_CTL_REG_SCRUBCTL);
1080 1080 if (MCREG_FIELD_CMN(&scrubctl, DramScrub) == 0)
1081 1081 return (ENODEV); /* DRAM scrubber not enabled */
1082 1082
1083 1083 /*
1084 1084 * Read Online Spare Comtrol Register again, just in case our
1085 1085 * state does not reflect reality.
1086 1086 */
1087 1087 MCREG_VAL32(&sparectl) = mc_pcicfg_get32_nohdl(mc, MC_FUNC_MISCCTL,
1088 1088 MC_CTL_REG_SPARECTL);
1089 1089
1090 1090 if (MCREG_FIELD_F_revFG(&sparectl, SwapDone))
1091 1091 return (EBUSY);
1092 1092
1093 1093 /* Write to the BadDramCs field */
1094 1094 MCREG_FIELD_F_revFG(&sparectl, BadDramCs) = csnum;
1095 1095 mc_pcicfg_put32_nohdl(mc, MC_FUNC_MISCCTL, MC_CTL_REG_SPARECTL,
1096 1096 MCREG_VAL32(&sparectl));
1097 1097
1098 1098 /* And request that the swap to the spare start */
1099 1099 MCREG_FIELD_F_revFG(&sparectl, SwapEn) = 1;
1100 1100 mc_pcicfg_put32_nohdl(mc, MC_FUNC_MISCCTL, MC_CTL_REG_SPARECTL,
1101 1101 MCREG_VAL32(&sparectl));
1102 1102
1103 1103 /*
1104 1104 * Poll for SwapDone - we have disabled notification by interrupt.
1105 1105 * Swap takes "several CPU cycles, depending on the DRAM speed, but
1106 1106 * is performed in the background" (Family 0Fh Bios Porting Guide).
1107 1107 * We're in a slow ioctl path so there is no harm in waiting around
1108 1108 * a bit - consumers of the ioctl must be aware that it may take
1109 1109 * a moment. We will poll for up to mc_swapdonetime seconds,
1110 1110 * limiting that to 120s.
1111 1111 *
1112 1112 * The swap is performed by the DRAM scrubber (which must be enabled)
1113 1113 * whose scrub rate is accelerated for the duration of the swap.
1114 1114 * The maximum swap rate is 40.0ns per 64 bytes, so the maximum
1115 1115 * supported cs size of 16GB would take 10.7s at that max rate
1116 1116 * of 25000000 scrubs/second.
1117 1117 */
1118 1118 tmax = gethrtime() + MIN(mc_swapdonetime, 120) * 1000000000ULL;
1119 1119 do {
1120 1120 if (i++ < 20)
1121 1121 delay(drv_usectohz(100000)); /* 0.1s for up to 2s */
1122 1122 else
1123 1123 delay(drv_usectohz(500000)); /* 0.5s */
1124 1124
1125 1125 MCREG_VAL32(&sparectl) = mc_pcicfg_get32_nohdl(mc,
1126 1126 MC_FUNC_MISCCTL, MC_CTL_REG_SPARECTL);
1127 1127 } while (!MCREG_FIELD_F_revFG(&sparectl, SwapDone) &&
1128 1128 gethrtime() < tmax);
1129 1129
1130 1130 if (!MCREG_FIELD_F_revFG(&sparectl, SwapDone))
1131 1131 return (ETIME); /* Operation timed out */
1132 1132
1133 1133 mcp->mcp_badcs = csnum;
1134 1134 mc->mc_cfgregs.mcr_sparectl = MCREG_VAL32(&sparectl);
1135 1135 mc->mc_spareswaptime = gethrtime();
1136 1136
1137 1137 return (0);
1138 1138 }
1139 1139
1140 1140 /*ARGSUSED*/
1141 1141 static int
1142 1142 mc_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, int *rvalp)
1143 1143 {
1144 1144 int rc = 0;
1145 1145 mc_t *mc;
1146 1146
1147 1147 if (cmd != MC_IOC_SNAPSHOT_INFO && cmd != MC_IOC_SNAPSHOT &&
1148 1148 cmd != MC_IOC_ONLINESPARE_EN)
1149 1149 return (EINVAL);
1150 1150
1151 1151 rw_enter(&mc_lock, RW_READER);
1152 1152
1153 1153 if ((mc = mc_lookup_by_chipid(getminor(dev))) == NULL) {
1154 1154 rw_exit(&mc_lock);
1155 1155 return (EINVAL);
1156 1156 }
1157 1157
1158 1158 switch (cmd) {
1159 1159 case MC_IOC_SNAPSHOT_INFO: {
1160 1160 mc_snapshot_info_t mcs;
1161 1161
1162 1162 if (mc_snapshot_update(mc) < 0) {
1163 1163 rw_exit(&mc_lock);
1164 1164 return (EIO);
1165 1165 }
1166 1166
1167 1167 mcs.mcs_size = mc->mc_snapshotsz;
1168 1168 mcs.mcs_gen = mc->mc_snapshotgen;
1169 1169
1170 1170 if (ddi_copyout(&mcs, (void *)arg, sizeof (mc_snapshot_info_t),
1171 1171 mode) < 0)
1172 1172 rc = EFAULT;
1173 1173 break;
1174 1174 }
1175 1175
1176 1176 case MC_IOC_SNAPSHOT:
1177 1177 if (mc_snapshot_update(mc) < 0) {
1178 1178 rw_exit(&mc_lock);
1179 1179 return (EIO);
1180 1180 }
1181 1181
1182 1182 if (ddi_copyout(mc->mc_snapshot, (void *)arg, mc->mc_snapshotsz,
1183 1183 mode) < 0)
1184 1184 rc = EFAULT;
1185 1185 break;
1186 1186
1187 1187 case MC_IOC_ONLINESPARE_EN:
1188 1188 if (drv_priv(credp) != 0) {
1189 1189 rw_exit(&mc_lock);
1190 1190 return (EPERM);
1191 1191 }
1192 1192
1193 1193 if (!rw_tryupgrade(&mc_lock)) {
1194 1194 rw_exit(&mc_lock);
1195 1195 return (EAGAIN);
1196 1196 }
1197 1197
1198 1198 if ((rc = mc_onlinespare(mc, (int)arg)) == 0) {
1199 1199 mc_snapshot_destroy(mc);
1200 1200 nvlist_free(mc->mc_nvl);
1201 1201 mc->mc_nvl = mc_nvl_create(mc);
1202 1202 }
1203 1203
1204 1204 break;
1205 1205 }
1206 1206
1207 1207 rw_exit(&mc_lock);
1208 1208
1209 1209 return (rc);
1210 1210 }
1211 1211
1212 1212 static struct cb_ops mc_cb_ops = {
1213 1213 mc_open,
1214 1214 mc_close,
1215 1215 nodev, /* not a block driver */
1216 1216 nodev, /* no print routine */
1217 1217 nodev, /* no dump routine */
1218 1218 nodev, /* no read routine */
1219 1219 nodev, /* no write routine */
1220 1220 mc_ioctl,
1221 1221 nodev, /* no devmap routine */
1222 1222 nodev, /* no mmap routine */
1223 1223 nodev, /* no segmap routine */
1224 1224 nochpoll, /* no chpoll routine */
1225 1225 ddi_prop_op,
1226 1226 0, /* not a STREAMS driver */
1227 1227 D_NEW | D_MP, /* safe for multi-thread/multi-processor */
1228 1228 };
1229 1229
1230 1230 /*ARGSUSED*/
1231 1231 static int
1232 1232 mc_getinfo(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
1233 1233 {
1234 1234 int rc = DDI_SUCCESS;
1235 1235 mc_t *mc;
1236 1236
1237 1237 if (infocmd != DDI_INFO_DEVT2DEVINFO &&
1238 1238 infocmd != DDI_INFO_DEVT2INSTANCE) {
1239 1239 *result = NULL;
1240 1240 return (DDI_FAILURE);
1241 1241 }
1242 1242
1243 1243 rw_enter(&mc_lock, RW_READER);
1244 1244
1245 1245 if ((mc = mc_lookup_by_chipid(getminor((dev_t)arg))) == NULL ||
1246 1246 mc->mc_funcs[MC_FUNC_DEVIMAP].mcf_devi == NULL) {
1247 1247 rc = DDI_FAILURE;
1248 1248 } else if (infocmd == DDI_INFO_DEVT2DEVINFO) {
1249 1249 *result = mc->mc_funcs[MC_FUNC_DEVIMAP].mcf_devi;
1250 1250 } else {
1251 1251 *result = (void *)(uintptr_t)
1252 1252 mc->mc_funcs[MC_FUNC_DEVIMAP].mcf_instance;
1253 1253 }
1254 1254
1255 1255 rw_exit(&mc_lock);
1256 1256
1257 1257 return (rc);
1258 1258 }
1259 1259
1260 1260 /*ARGSUSED2*/
1261 1261 static int
1262 1262 mc_fm_handle(dev_info_t *dip, ddi_fm_error_t *fmerr, const void *arg)
1263 1263 {
1264 1264 pci_ereport_post(dip, fmerr, NULL);
1265 1265 return (fmerr->fme_status);
1266 1266 }
1267 1267
1268 1268 static void
1269 1269 mc_fm_init(dev_info_t *dip)
1270 1270 {
1271 1271 int fmcap = DDI_FM_EREPORT_CAPABLE | DDI_FM_ERRCB_CAPABLE;
1272 1272 ddi_fm_init(dip, &fmcap, NULL);
1273 1273 pci_ereport_setup(dip);
1274 1274 ddi_fm_handler_register(dip, mc_fm_handle, NULL);
1275 1275 }
1276 1276
1277 1277 static void
1278 1278 mc_read_smbios(mc_t *mc, dev_info_t *dip)
1279 1279 {
1280 1280
1281 1281 uint16_t bdf;
1282 1282 pci_regspec_t *pci_rp = NULL;
1283 1283 uint32_t phys_hi;
1284 1284 int m = 0;
1285 1285 uint_t chip_inst;
1286 1286 int rc = 0;
1287 1287
1288 1288 if (ddi_getlongprop(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS, "reg",
1289 1289 (caddr_t)&pci_rp, &m) == DDI_SUCCESS) {
1290 1290 phys_hi = pci_rp->pci_phys_hi;
1291 1291 bdf = (uint16_t)(PCI_REG_BDFR_G(phys_hi) >>
1292 1292 PCI_REG_FUNC_SHIFT);
1293 1293 kmem_free(pci_rp, m);
1294 1294 pci_rp = NULL;
1295 1295
1296 1296 rc = fm_smb_mc_chipinst(bdf, &chip_inst);
1297 1297 if (rc == 0) {
1298 1298 mc->smb_chipid = chip_inst;
1299 1299 } else {
1300 1300 #ifdef DEBUG
1301 1301 cmn_err(CE_NOTE, "!mc read smbios chip info failed");
1302 1302 #endif /* DEBUG */
1303 1303 return;
1304 1304 }
1305 1305 mc->smb_bboard = fm_smb_mc_bboards(bdf);
1306 1306 #ifdef DEBUG
1307 1307 if (mc->smb_bboard == NULL)
1308 1308 cmn_err(CE_NOTE,
1309 1309 "!mc read smbios base boards info failed");
1310 1310 #endif /* DEBUG */
1311 1311 }
1312 1312
1313 1313 if (pci_rp != NULL)
1314 1314 kmem_free(pci_rp, m);
1315 1315 }
1316 1316
1317 1317 /*ARGSUSED*/
1318 1318 static int
1319 1319 mc_create_cb(cmi_hdl_t whdl, void *arg1, void *arg2, void *arg3)
1320 1320 {
1321 1321 chipid_t chipid = *((chipid_t *)arg1);
1322 1322 cmi_hdl_t *hdlp = (cmi_hdl_t *)arg2;
1323 1323
1324 1324 if (cmi_hdl_chipid(whdl) == chipid) {
1325 1325 cmi_hdl_hold(whdl); /* short-term hold */
1326 1326 *hdlp = whdl;
1327 1327 return (CMI_HDL_WALK_DONE);
1328 1328 } else {
1329 1329 return (CMI_HDL_WALK_NEXT);
1330 1330 }
1331 1331 }
1332 1332
1333 1333 static mc_t *
1334 1334 mc_create(chipid_t chipid, dev_info_t *dip)
1335 1335 {
1336 1336 mc_t *mc;
1337 1337 cmi_hdl_t hdl = NULL;
1338 1338
1339 1339 ASSERT(RW_WRITE_HELD(&mc_lock));
1340 1340
1341 1341 /*
1342 1342 * Find a handle for one of a chip's CPU.
1343 1343 *
1344 1344 * We can use one of the chip's CPUs since all cores
1345 1345 * of a chip share the same revision and socket type.
1346 1346 */
1347 1347 cmi_hdl_walk(mc_create_cb, (void *)&chipid, (void *)&hdl, NULL);
1348 1348 if (hdl == NULL)
1349 1349 return (NULL); /* no cpu for this chipid found! */
1350 1350
1351 1351 mc = kmem_zalloc(sizeof (mc_t), KM_SLEEP);
1352 1352
1353 1353 mc->mc_hdr.mch_type = MC_NT_MC;
1354 1354 mc->mc_props.mcp_num = chipid;
1355 1355 mc->mc_props.mcp_sparecs = MC_INVALNUM;
1356 1356 mc->mc_props.mcp_badcs = MC_INVALNUM;
1357 1357
1358 1358 mc->mc_props.mcp_rev = cmi_hdl_chiprev(hdl);
1359 1359 mc->mc_revname = cmi_hdl_chiprevstr(hdl);
1360 1360 mc->mc_socket = cmi_hdl_getsockettype(hdl);
1361 1361
1362 1362 mc_read_smbios(mc, dip);
1363 1363
1364 1364 if (mc_list == NULL)
1365 1365 mc_list = mc;
1366 1366 if (mc_last != NULL)
1367 1367 mc_last->mc_next = mc;
1368 1368
1369 1369 mc->mc_next = NULL;
1370 1370 mc_last = mc;
1371 1371
1372 1372 cmi_hdl_rele(hdl);
1373 1373
1374 1374 return (mc);
1375 1375 }
1376 1376
1377 1377 /*
1378 1378 * Return the maximum scrubbing rate between r1 and r2, where r2 is extracted
1379 1379 * from the specified 'cfg' register value using 'mask' and 'shift'. If a
1380 1380 * value is zero, scrubbing is off so return the opposite value. Otherwise
1381 1381 * the maximum rate is the smallest non-zero value of the two values.
1382 1382 */
1383 1383 static uint32_t
1384 1384 mc_scrubber_max(uint32_t r1, uint32_t cfg, uint32_t mask, uint32_t shift)
1385 1385 {
1386 1386 uint32_t r2 = (cfg & mask) >> shift;
1387 1387
1388 1388 if (r1 != 0 && r2 != 0)
1389 1389 return (MIN(r1, r2));
1390 1390
1391 1391 return (r1 ? r1 : r2);
1392 1392 }
1393 1393
1394 1394
1395 1395 /*
1396 1396 * Enable the memory scrubber. We must use the mc_pcicfg_{get32,put32}_nohdl
1397 1397 * interfaces since we do not bind to function 3.
1398 1398 */
1399 1399 cmi_errno_t
1400 1400 mc_scrubber_enable(mc_t *mc)
1401 1401 {
1402 1402 mc_props_t *mcp = &mc->mc_props;
1403 1403 chipid_t chipid = (chipid_t)mcp->mcp_num;
1404 1404 uint32_t rev = (uint32_t)mcp->mcp_rev;
1405 1405 mc_cfgregs_t *mcr = &mc->mc_cfgregs;
1406 1406 union mcreg_scrubctl scrubctl;
1407 1407 union mcreg_dramscrublo dalo;
1408 1408 union mcreg_dramscrubhi dahi;
1409 1409
1410 1410 mcr->mcr_scrubctl = MCREG_VAL32(&scrubctl) =
1411 1411 mc_pcicfg_get32_nohdl(mc, MC_FUNC_MISCCTL, MC_CTL_REG_SCRUBCTL);
1412 1412
1413 1413 mcr->mcr_scrubaddrlo = MCREG_VAL32(&dalo) =
1414 1414 mc_pcicfg_get32_nohdl(mc, MC_FUNC_MISCCTL, MC_CTL_REG_SCRUBADDR_LO);
1415 1415
1416 1416 mcr->mcr_scrubaddrhi = MCREG_VAL32(&dahi) =
1417 1417 mc_pcicfg_get32_nohdl(mc, MC_FUNC_MISCCTL, MC_CTL_REG_SCRUBADDR_HI);
1418 1418
1419 1419 if (mc_scrub_policy == MC_SCRUB_BIOSDEFAULT)
1420 1420 return (MCREG_FIELD_CMN(&scrubctl, DramScrub) !=
1421 1421 AMD_NB_SCRUBCTL_RATE_NONE ?
1422 1422 CMI_SUCCESS : CMIERR_MC_NOMEMSCRUB);
1423 1423
1424 1424 /*
1425 1425 * Disable DRAM scrubbing while we fiddle.
1426 1426 */
1427 1427 MCREG_FIELD_CMN(&scrubctl, DramScrub) = AMD_NB_SCRUBCTL_RATE_NONE;
1428 1428 mc_pcicfg_put32_nohdl(mc, MC_FUNC_MISCCTL, MC_CTL_REG_SCRUBCTL,
1429 1429 MCREG_VAL32(&scrubctl));
1430 1430
1431 1431 /*
1432 1432 * Setup DRAM Scrub Address Low and High registers for the
1433 1433 * base address of this node, and to select srubber redirect.
1434 1434 */
1435 1435 MCREG_FIELD_CMN(&dalo, ScrubReDirEn) = 1;
1436 1436 MCREG_FIELD_CMN(&dalo, ScrubAddrLo) =
1437 1437 AMD_NB_SCRUBADDR_MKLO(mcp->mcp_base);
1438 1438
1439 1439 MCREG_FIELD_CMN(&dahi, ScrubAddrHi) =
1440 1440 AMD_NB_SCRUBADDR_MKHI(mcp->mcp_base);
1441 1441
1442 1442 mc_pcicfg_put32_nohdl(mc, MC_FUNC_MISCCTL, MC_CTL_REG_SCRUBADDR_LO,
1443 1443 MCREG_VAL32(&dalo));
1444 1444 mc_pcicfg_put32_nohdl(mc, MC_FUNC_MISCCTL, MC_CTL_REG_SCRUBADDR_HI,
1445 1445 MCREG_VAL32(&dahi));
1446 1446
1447 1447 if (mc_scrub_rate_dram > AMD_NB_SCRUBCTL_RATE_MAX) {
1448 1448 cmn_err(CE_WARN, "mc_scrub_rate_dram is too large; "
1449 1449 "resetting to 0x%x\n", AMD_NB_SCRUBCTL_RATE_MAX);
1450 1450 mc_scrub_rate_dram = AMD_NB_SCRUBCTL_RATE_MAX;
1451 1451 }
1452 1452
1453 1453 switch (mc_scrub_policy) {
1454 1454 case MC_SCRUB_FIXED:
1455 1455 /* Use the system value checked above */
1456 1456 break;
1457 1457
1458 1458 default:
1459 1459 cmn_err(CE_WARN, "Unknown mc_scrub_policy value %d - "
1460 1460 "using default policy of MC_SCRUB_MAX", mc_scrub_policy);
1461 1461 /*FALLTHRU*/
1462 1462
1463 1463 case MC_SCRUB_MAX:
1464 1464 mc_scrub_rate_dram = mc_scrubber_max(mc_scrub_rate_dram,
1465 1465 mcr->mcr_scrubctl, AMD_NB_SCRUBCTL_DRAM_MASK,
1466 1466 AMD_NB_SCRUBCTL_DRAM_SHIFT);
1467 1467 break;
1468 1468 }
1469 1469
1470 1470 /*
1471 1471 * OPTERON_ERRATUM_99:
1472 1472 * This erratum applies on revisions D and earlier.
1473 1473 * This erratum also applies on revisions E and later,
1474 1474 * if BIOS uses chip-select hoisting instead of DRAM hole
1475 1475 * mapping.
1476 1476 *
1477 1477 * Do not enable the dram scrubber if the chip-select ranges
1478 1478 * for the node are not contiguous.
1479 1479 */
1480 1480 if (mc_scrub_rate_dram != AMD_NB_SCRUBCTL_RATE_NONE &&
1481 1481 mc->mc_csdiscontig) {
1482 1482 cmn_err(CE_CONT, "?Opteron DRAM scrubber disabled on revision "
1483 1483 "%s chip %d because DRAM hole is present on this node",
1484 1484 mc->mc_revname, chipid);
1485 1485 mc_scrub_rate_dram = AMD_NB_SCRUBCTL_RATE_NONE;
1486 1486 }
1487 1487
1488 1488 /*
1489 1489 * OPTERON_ERRATUM_101:
1490 1490 * This erratum applies on revisions D and earlier.
1491 1491 *
1492 1492 * If the DRAM Base Address register's IntlvEn field indicates that
1493 1493 * node interleaving is enabled, we must disable the DRAM scrubber
1494 1494 * and return zero to indicate that Solaris should use s/w instead.
1495 1495 */
1496 1496 if (mc_scrub_rate_dram != AMD_NB_SCRUBCTL_RATE_NONE &&
1497 1497 mcp->mcp_ilen != 0 &&
1498 1498 !X86_CHIPREV_ATLEAST(rev, X86_CHIPREV_AMD_F_REV_E)) {
1499 1499 cmn_err(CE_CONT, "?Opteron DRAM scrubber disabled on revision "
1500 1500 "%s chip %d because DRAM memory is node-interleaved",
1501 1501 mc->mc_revname, chipid);
1502 1502 mc_scrub_rate_dram = AMD_NB_SCRUBCTL_RATE_NONE;
1503 1503 }
1504 1504
1505 1505 if (mc_scrub_rate_dram != AMD_NB_SCRUBCTL_RATE_NONE) {
1506 1506 MCREG_FIELD_CMN(&scrubctl, DramScrub) = mc_scrub_rate_dram;
1507 1507 mc_pcicfg_put32_nohdl(mc, MC_FUNC_MISCCTL, MC_CTL_REG_SCRUBCTL,
1508 1508 MCREG_VAL32(&scrubctl));
1509 1509 }
1510 1510
1511 1511 return (mc_scrub_rate_dram != AMD_NB_SCRUBCTL_RATE_NONE ?
1512 1512 CMI_SUCCESS : CMIERR_MC_NOMEMSCRUB);
1513 1513 }
1514 1514
1515 1515 /*ARGSUSED*/
1516 1516 static int
1517 1517 mc_attach_cb(cmi_hdl_t whdl, void *arg1, void *arg2, void *arg3)
1518 1518 {
1519 1519 mc_t *mc = (mc_t *)arg1;
1520 1520 mcamd_prop_t chipid = *((mcamd_prop_t *)arg2);
1521 1521
1522 1522 if (cmi_hdl_chipid(whdl) == chipid) {
1523 1523 mcamd_mc_register(whdl, mc);
1524 1524 }
1525 1525
1526 1526 return (CMI_HDL_WALK_NEXT);
1527 1527 }
1528 1528
1529 1529 static int mc_sw_scrub_disabled = 0;
1530 1530
1531 1531 static int
1532 1532 mc_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
1533 1533 {
1534 1534 mc_pcicfg_hdl_t cfghdl;
1535 1535 const mc_bind_map_t *bm;
1536 1536 const char *bindnm;
1537 1537 char *unitstr = NULL;
1538 1538 enum mc_funcnum func;
1539 1539 long unitaddr;
1540 1540 int chipid, rc;
1541 1541 mc_t *mc;
1542 1542
1543 1543 /*
1544 1544 * This driver has no hardware state, but does
1545 1545 * claim to have a reg property, so it will be
1546 1546 * called on suspend. It is probably better to
1547 1547 * make sure it doesn't get called on suspend,
1548 1548 * but it is just as easy to make sure we just
1549 1549 * return DDI_SUCCESS if called.
1550 1550 */
1551 1551 if (cmd == DDI_RESUME)
1552 1552 return (DDI_SUCCESS);
1553 1553
1554 1554 if (cmd != DDI_ATTACH || mc_no_attach != 0)
1555 1555 return (DDI_FAILURE);
1556 1556
1557 1557 bindnm = ddi_binding_name(dip);
1558 1558 for (bm = mc_bind_map; bm->bm_bindnm != NULL; bm++) {
1559 1559 if (strcmp(bindnm, bm->bm_bindnm) == 0) {
1560 1560 func = bm->bm_func;
1561 1561 break;
1562 1562 }
1563 1563 }
1564 1564
1565 1565 if (bm->bm_bindnm == NULL)
1566 1566 return (DDI_FAILURE);
1567 1567
1568 1568 /*
1569 1569 * We need the device number, which corresponds to the processor node
1570 1570 * number plus 24. The node number can then be used to associate this
1571 1571 * memory controller device with a given processor chip.
1572 1572 */
1573 1573 if (ddi_prop_lookup_string(DDI_DEV_T_ANY, dip,
1574 1574 DDI_PROP_DONTPASS, "unit-address", &unitstr) != DDI_PROP_SUCCESS) {
1575 1575 cmn_err(CE_WARN, "failed to find unit-address for %s", bindnm);
1576 1576 return (DDI_FAILURE);
1577 1577 }
1578 1578
1579 1579 rc = ddi_strtol(unitstr, NULL, 16, &unitaddr);
1580 1580 ASSERT(rc == 0 && unitaddr >= MC_AMD_DEV_OFFSET);
1581 1581
1582 1582 if (rc != 0 || unitaddr < MC_AMD_DEV_OFFSET) {
1583 1583 cmn_err(CE_WARN, "failed to parse unit address %s for %s\n",
1584 1584 unitstr, bindnm);
1585 1585 ddi_prop_free(unitstr);
1586 1586 return (DDI_FAILURE);
1587 1587 }
1588 1588 ddi_prop_free(unitstr);
1589 1589
1590 1590 chipid = unitaddr - MC_AMD_DEV_OFFSET;
1591 1591
1592 1592 rw_enter(&mc_lock, RW_WRITER);
1593 1593
1594 1594 for (mc = mc_list; mc != NULL; mc = mc->mc_next) {
1595 1595 if (mc->mc_props.mcp_num == chipid)
1596 1596 break;
1597 1597 }
1598 1598
1599 1599 /* Integrate this memory controller device into existing set */
1600 1600 if (mc == NULL) {
1601 1601 mc = mc_create(chipid, dip);
1602 1602
1603 1603 if (mc == NULL) {
1604 1604 /*
1605 1605 * We don't complain here because this is a legitimate
1606 1606 * path for MP systems. On those machines, we'll attach
1607 1607 * before all CPUs have been initialized, and thus the
1608 1608 * chip verification in mc_create will fail. We'll be
1609 1609 * reattached later for those CPUs.
1610 1610 */
1611 1611 rw_exit(&mc_lock);
1612 1612 return (DDI_FAILURE);
1613 1613 }
1614 1614 } else {
1615 1615 mc_snapshot_destroy(mc);
1616 1616 }
1617 1617
1618 1618 /* Beyond this point, we're committed to creating this node */
1619 1619
1620 1620 mc_fm_init(dip);
1621 1621
1622 1622 ASSERT(mc->mc_funcs[func].mcf_devi == NULL);
1623 1623 mc->mc_funcs[func].mcf_devi = dip;
1624 1624 mc->mc_funcs[func].mcf_instance = ddi_get_instance(dip);
1625 1625
1626 1626 mc->mc_ref++;
1627 1627
1628 1628 /*
1629 1629 * Add the common properties to this node, and then add any properties
1630 1630 * that are specific to this node based upon its configuration space.
1631 1631 */
1632 1632 (void) ddi_prop_update_string(DDI_DEV_T_NONE,
1633 1633 dip, "model", (char *)bm->bm_model);
1634 1634
1635 1635 (void) ddi_prop_update_int(DDI_DEV_T_NONE,
1636 1636 dip, "chip-id", mc->mc_props.mcp_num);
1637 1637
1638 1638 if (bm->bm_mkprops != NULL &&
1639 1639 mc_pcicfg_setup(mc, bm->bm_func, &cfghdl) == DDI_SUCCESS) {
1640 1640 bm->bm_mkprops(cfghdl, mc);
1641 1641 mc_pcicfg_teardown(cfghdl);
1642 1642 }
1643 1643
1644 1644 /*
1645 1645 * If this is the last node to be attached for this memory controller,
1646 1646 * then create the minor node, enable scrubbers, and register with
1647 1647 * cpu module(s) for this chip.
1648 1648 */
1649 1649 if (func == MC_FUNC_DEVIMAP) {
1650 1650 mc_props_t *mcp = &mc->mc_props;
1651 1651 int dram_present = 0;
1652 1652
1653 1653 if (ddi_create_minor_node(dip, "mc-amd", S_IFCHR,
1654 1654 mcp->mcp_num, "ddi_mem_ctrl",
1655 1655 0) != DDI_SUCCESS) {
1656 1656 cmn_err(CE_WARN, "failed to create minor node for chip "
1657 1657 "%d memory controller\n",
1658 1658 (chipid_t)mcp->mcp_num);
1659 1659 }
1660 1660
1661 1661 /*
1662 1662 * Register the memory controller for every CPU of this chip.
1663 1663 *
1664 1664 * If there is memory present on this node and ECC is enabled
1665 1665 * attempt to enable h/w memory scrubbers for this node.
1666 1666 * If we are successful in enabling *any* hardware scrubbers,
1667 1667 * disable the software memory scrubber.
1668 1668 */
1669 1669 cmi_hdl_walk(mc_attach_cb, (void *)mc, (void *)&mcp->mcp_num,
1670 1670 NULL);
1671 1671
1672 1672 if (mcp->mcp_lim != mcp->mcp_base) {
1673 1673 /*
1674 1674 * This node may map non-dram memory alone, so we
1675 1675 * must check for an enabled chip-select to be
1676 1676 * sure there is dram present.
1677 1677 */
1678 1678 mc_cs_t *mccs;
1679 1679
1680 1680 for (mccs = mc->mc_cslist; mccs != NULL;
1681 1681 mccs = mccs->mccs_next) {
1682 1682 if (mccs->mccs_props.csp_csbe) {
1683 1683 dram_present = 1;
1684 1684 break;
1685 1685 }
1686 1686 }
1687 1687 }
1688 1688
1689 1689 if (dram_present && !mc_ecc_enabled(mc)) {
1690 1690 /*
1691 1691 * On a single chip system there is no point in
1692 1692 * scrubbing if there is no ECC on the single node.
1693 1693 * On a multichip system, necessarily Opteron using
1694 1694 * registered ECC-capable DIMMs, if there is memory
1695 1695 * present on a node but no ECC there then we'll assume
1696 1696 * ECC is disabled for all nodes and we will not enable
1697 1697 * the scrubber and wll also disable the software
1698 1698 * memscrub thread.
1699 1699 */
1700 1700 rc = 1;
1701 1701 } else if (!dram_present) {
1702 1702 /* No memory on this node - others decide memscrub */
1703 1703 rc = 0;
1704 1704 } else {
1705 1705 /*
1706 1706 * There is memory on this node and ECC is enabled.
1707 1707 * Call via the cpu module to enable memory scrubbing
1708 1708 * on this node - we could call directly but then
1709 1709 * we may overlap with a request to enable chip-cache
1710 1710 * scrubbing.
1711 1711 */
1712 1712 rc = mc_scrubber_enable(mc);
1713 1713 }
1714 1714
1715 1715 if (rc == CMI_SUCCESS && !mc_sw_scrub_disabled++)
1716 1716 cmi_mc_sw_memscrub_disable();
1717 1717
1718 1718 mc_report_testfails(mc);
1719 1719 }
1720 1720
1721 1721 /*
1722 1722 * Update nvlist for as far as we have gotten in attach/init.
1723 1723 */
1724 1724 nvlist_free(mc->mc_nvl);
1725 1725 mc->mc_nvl = mc_nvl_create(mc);
1726 1726
1727 1727 rw_exit(&mc_lock);
1728 1728 return (DDI_SUCCESS);
1729 1729 }
1730 1730
1731 1731 /*ARGSUSED*/
1732 1732 static int
1733 1733 mc_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
1734 1734 {
1735 1735 /*
1736 1736 * See the comment about suspend in
1737 1737 * mc_attach().
1738 1738 */
1739 1739 if (cmd == DDI_SUSPEND)
1740 1740 return (DDI_SUCCESS);
1741 1741 else
1742 1742 return (DDI_FAILURE);
1743 1743 }
1744 1744
1745 1745
1746 1746 static struct dev_ops mc_ops = {
1747 1747 DEVO_REV, /* devo_rev */
1748 1748 0, /* devo_refcnt */
1749 1749 mc_getinfo, /* devo_getinfo */
1750 1750 nulldev, /* devo_identify */
1751 1751 nulldev, /* devo_probe */
1752 1752 mc_attach, /* devo_attach */
1753 1753 mc_detach, /* devo_detach */
1754 1754 nodev, /* devo_reset */
1755 1755 &mc_cb_ops, /* devo_cb_ops */
1756 1756 NULL, /* devo_bus_ops */
1757 1757 NULL, /* devo_power */
1758 1758 ddi_quiesce_not_needed, /* devo_quiesce */
↓ open down ↓ |
735 lines elided |
↑ open up ↑ |
1759 1759 };
1760 1760
1761 1761 static struct modldrv modldrv = {
1762 1762 &mod_driverops,
1763 1763 "Memory Controller for AMD processors",
1764 1764 &mc_ops
1765 1765 };
1766 1766
1767 1767 static struct modlinkage modlinkage = {
1768 1768 MODREV_1,
1769 - (void *)&modldrv,
1770 - NULL
1769 + { (void *)&modldrv, NULL }
1771 1770 };
1772 1771
1773 1772 int
1774 1773 _init(void)
1775 1774 {
1776 1775 /*
1777 1776 * Refuse to load if there is no PCI config space support.
1778 1777 */
1779 1778 if (pci_getl_func == NULL)
1780 1779 return (ENOTSUP);
1781 1780
1782 1781 rw_init(&mc_lock, NULL, RW_DRIVER, NULL);
1783 1782 return (mod_install(&modlinkage));
1784 1783 }
1785 1784
1786 1785 int
1787 1786 _info(struct modinfo *modinfop)
1788 1787 {
1789 1788 return (mod_info(&modlinkage, modinfop));
1790 1789 }
1791 1790
1792 1791 int
1793 1792 _fini(void)
1794 1793 {
1795 1794 int rc;
1796 1795
1797 1796 if ((rc = mod_remove(&modlinkage)) != 0)
1798 1797 return (rc);
1799 1798
1800 1799 rw_destroy(&mc_lock);
1801 1800 return (0);
1802 1801 }
↓ open down ↓ |
22 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX