Print this page
5461 #pragma align before the declaration
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/uts/sun4/vm/vm_dep.h
+++ new/usr/src/uts/sun4/vm/vm_dep.h
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21 /*
22 22 * Copyright (c) 1995, 2010, Oracle and/or its affiliates. All rights reserved.
23 23 */
24 24
25 25 /*
26 26 * UNIX machine dependent virtual memory support.
27 27 */
28 28
29 29 #ifndef _VM_DEP_H
30 30 #define _VM_DEP_H
31 31
32 32 #ifdef __cplusplus
33 33 extern "C" {
34 34 #endif
35 35
36 36 #include <vm/hat_sfmmu.h>
37 37 #include <sys/archsystm.h>
38 38 #include <sys/memnode.h>
39 39
40 40 #define GETTICK() gettick()
41 41
42 42 /* tick value that should be used for random values */
43 43 extern u_longlong_t randtick(void);
44 44
45 45 /*
46 46 * Per page size free lists. Allocated dynamically.
47 47 */
48 48 #define MAX_MEM_TYPES 2 /* 0 = reloc, 1 = noreloc */
49 49 #define MTYPE_RELOC 0
50 50 #define MTYPE_NORELOC 1
51 51
52 52 #define PP_2_MTYPE(pp) (PP_ISNORELOC(pp) ? MTYPE_NORELOC : MTYPE_RELOC)
53 53
54 54 #define MTYPE_INIT(mtype, vp, vaddr, flags, pgsz) \
55 55 mtype = (flags & PG_NORELOC) ? MTYPE_NORELOC : MTYPE_RELOC;
56 56
57 57 /* mtype init for page_get_replacement_page */
58 58 #define MTYPE_PGR_INIT(mtype, flags, pp, mnode, pgcnt) \
59 59 mtype = (flags & PG_NORELOC) ? MTYPE_NORELOC : MTYPE_RELOC;
60 60
61 61 #define MNODETYPE_2_PFN(mnode, mtype, pfnlo, pfnhi) \
62 62 pfnlo = mem_node_config[mnode].physbase; \
63 63 pfnhi = mem_node_config[mnode].physmax;
64 64
65 65 /*
66 66 * candidate counters in vm_pagelist.c are indexed by color and range
67 67 */
68 68 #define MAX_MNODE_MRANGES MAX_MEM_TYPES
69 69 #define MNODE_RANGE_CNT(mnode) MAX_MNODE_MRANGES
70 70 #define MNODE_MAX_MRANGE(mnode) (MAX_MEM_TYPES - 1)
71 71 #define MTYPE_2_MRANGE(mnode, mtype) (mtype)
72 72
73 73 /*
74 74 * Internal PG_ flags.
75 75 */
76 76 #define PGI_RELOCONLY 0x10000 /* acts in the opposite sense to PG_NORELOC */
77 77 #define PGI_NOCAGE 0x20000 /* indicates Cage is disabled */
78 78 #define PGI_PGCPHIPRI 0x40000 /* page_get_contig_page priority allocation */
79 79 #define PGI_PGCPSZC0 0x80000 /* relocate base pagesize page */
80 80
81 81 /*
82 82 * PGI mtype flags - should not overlap PGI flags
83 83 */
84 84 #define PGI_MT_RANGE 0x1000000 /* mtype range */
85 85 #define PGI_MT_NEXT 0x2000000 /* get next mtype */
86 86
87 87 extern page_t ***page_freelists[MMU_PAGE_SIZES][MAX_MEM_TYPES];
88 88 extern page_t ***page_cachelists[MAX_MEM_TYPES];
89 89
90 90 #define PAGE_FREELISTS(mnode, szc, color, mtype) \
91 91 (*(page_freelists[szc][mtype][mnode] + (color)))
92 92
93 93 #define PAGE_CACHELISTS(mnode, color, mtype) \
94 94 (*(page_cachelists[mtype][mnode] + (color)))
95 95
96 96 /*
97 97 * There are 'page_colors' colors/bins. Spread them out under a
98 98 * couple of locks. There are mutexes for both the page freelist
99 99 * and the page cachelist. We want enough locks to make contention
100 100 * reasonable, but not too many -- otherwise page_freelist_lock() gets
101 101 * so expensive that it becomes the bottleneck!
102 102 */
103 103 #define NPC_MUTEX 16
104 104
105 105 extern kmutex_t *fpc_mutex[NPC_MUTEX];
106 106 extern kmutex_t *cpc_mutex[NPC_MUTEX];
107 107
108 108 /*
109 109 * Iterator provides the info needed to convert RA to PA.
110 110 * MEM_NODE_ITERATOR_INIT() should be called before
111 111 * PAGE_NEXT_PFN_FOR_COLOR() if pfn was not obtained via a previous
112 112 * PAGE_NEXT_PFN_FOR_COLOR() call. Iterator caches color 2 hash
113 113 * translations requiring initializer call if color or ceq_mask changes,
114 114 * even if pfn doesn't. MEM_NODE_ITERATOR_INIT() must also be called before
115 115 * PFN_2_COLOR() that uses a valid iterator argument.
116 116 *
117 117 * plat_mem_node_iterator_init() starts from last mblock in continuation
118 118 * case which may be invalid because memory DR. To detect this situation
119 119 * mi_genid is checked against mpo_genid which is incremented after a
120 120 * memory DR operation. See also plat_slice_add()/plat_slice_del().
121 121 */
122 122 #ifdef sun4v
123 123
124 124 typedef struct mem_node_iterator {
125 125 uint_t mi_mnode; /* mnode in which to iterate */
126 126 int mi_init; /* set to 1 when first init */
127 127 int mi_genid; /* set/checked against mpo_genid */
128 128 int mi_last_mblock; /* last mblock visited */
129 129 uint_t mi_hash_ceq_mask; /* cached copy of ceq_mask */
130 130 uint_t mi_hash_color; /* cached copy of color */
131 131 uint_t mi_mnode_mask; /* number of mask bits */
132 132 uint_t mi_mnode_pfn_shift; /* mnode position in pfn */
133 133 pfn_t mi_mblock_base; /* first valid pfn in current mblock */
134 134 pfn_t mi_mblock_end; /* last valid pfn in current mblock */
135 135 pfn_t mi_ra_to_pa; /* ra adjustment for current mblock */
136 136 pfn_t mi_mnode_pfn_mask; /* mask to obtain mnode id bits */
137 137 } mem_node_iterator_t;
138 138
139 139 #define MEM_NODE_ITERATOR_DECL(it) \
140 140 mem_node_iterator_t it
141 141 #define MEM_NODE_ITERATOR_INIT(pfn, mnode, szc, it) \
142 142 (pfn) = plat_mem_node_iterator_init((pfn), (mnode), (szc), (it), 1)
143 143
144 144 extern pfn_t plat_mem_node_iterator_init(pfn_t, int, uchar_t,
145 145 mem_node_iterator_t *, int);
146 146 extern pfn_t plat_rapfn_to_papfn(pfn_t);
147 147 extern int interleaved_mnodes;
148 148
149 149 #else /* sun4v */
150 150
151 151 #define MEM_NODE_ITERATOR_DECL(it) \
152 152 void *it = NULL
153 153 #define MEM_NODE_ITERATOR_INIT(pfn, mnode, szc, it)
154 154
155 155 #endif /* sun4v */
156 156
157 157 /*
158 158 * Return the mnode limits so that hpc_counters length and base
159 159 * index can be determined. When interleaved_mnodes is set, we
160 160 * create an array only for the first mnode that exists. All other
161 161 * mnodes will share the array in this case.
162 162 * If interleaved_mnodes is not set, simply return the limits for
163 163 * the given mnode.
164 164 */
165 165 #define HPM_COUNTERS_LIMITS(mnode, physbase, physmax, first) \
166 166 if (!interleaved_mnodes) { \
167 167 (physbase) = mem_node_config[(mnode)].physbase; \
168 168 (physmax) = mem_node_config[(mnode)].physmax; \
169 169 (first) = (mnode); \
170 170 } else if ((first) < 0) { \
171 171 mem_node_max_range(&(physbase), &(physmax)); \
172 172 (first) = (mnode); \
173 173 }
174 174
175 175 #define PAGE_CTRS_WRITE_LOCK(mnode) \
176 176 if (!interleaved_mnodes) { \
177 177 rw_enter(&page_ctrs_rwlock[(mnode)], RW_WRITER); \
178 178 page_freelist_lock(mnode); \
179 179 } else { \
180 180 /* changing shared hpm_counters */ \
181 181 int _i; \
182 182 for (_i = 0; _i < max_mem_nodes; _i++) { \
183 183 rw_enter(&page_ctrs_rwlock[_i], RW_WRITER); \
184 184 page_freelist_lock(_i); \
185 185 } \
186 186 }
187 187
188 188 #define PAGE_CTRS_WRITE_UNLOCK(mnode) \
189 189 if (!interleaved_mnodes) { \
190 190 page_freelist_unlock(mnode); \
191 191 rw_exit(&page_ctrs_rwlock[(mnode)]); \
192 192 } else { \
193 193 int _i; \
194 194 for (_i = 0; _i < max_mem_nodes; _i++) { \
195 195 page_freelist_unlock(_i); \
196 196 rw_exit(&page_ctrs_rwlock[_i]); \
197 197 } \
198 198 }
199 199
200 200 /*
201 201 * cpu specific color conversion functions
202 202 */
203 203 extern uint_t page_get_nsz_color_mask_cpu(uchar_t, uint_t);
204 204 #pragma weak page_get_nsz_color_mask_cpu
205 205
206 206 extern uint_t page_get_nsz_color_cpu(uchar_t, uint_t);
207 207 #pragma weak page_get_nsz_color_cpu
208 208
209 209 extern uint_t page_get_color_shift_cpu(uchar_t, uchar_t);
210 210 #pragma weak page_get_color_shift_cpu
211 211
212 212 extern uint_t page_convert_color_cpu(uint_t, uchar_t, uchar_t);
213 213 #pragma weak page_convert_color_cpu
214 214
215 215 extern pfn_t page_next_pfn_for_color_cpu(pfn_t,
216 216 uchar_t, uint_t, uint_t, uint_t, void *);
217 217 #pragma weak page_next_pfn_for_color_cpu
218 218
219 219 extern uint_t page_pfn_2_color_cpu(pfn_t, uchar_t, void *);
220 220 #pragma weak page_pfn_2_color_cpu
221 221
222 222 #define PAGE_GET_COLOR_SHIFT(szc, nszc) \
223 223 ((&page_get_color_shift_cpu != NULL) ? \
224 224 page_get_color_shift_cpu(szc, nszc) : \
225 225 (hw_page_array[(nszc)].hp_shift - \
226 226 hw_page_array[(szc)].hp_shift))
227 227
228 228 #define PAGE_CONVERT_COLOR(ncolor, szc, nszc) \
229 229 ((&page_convert_color_cpu != NULL) ? \
230 230 page_convert_color_cpu(ncolor, szc, nszc) : \
231 231 ((ncolor) << PAGE_GET_COLOR_SHIFT((szc), (nszc))))
232 232
233 233 #define PFN_2_COLOR(pfn, szc, it) \
234 234 ((&page_pfn_2_color_cpu != NULL) ? \
235 235 page_pfn_2_color_cpu(pfn, szc, it) : \
236 236 ((pfn & (hw_page_array[0].hp_colors - 1)) >> \
237 237 (hw_page_array[szc].hp_shift - \
238 238 hw_page_array[0].hp_shift)))
239 239
240 240 #define PNUM_SIZE(szc) \
241 241 (hw_page_array[(szc)].hp_pgcnt)
242 242 #define PNUM_SHIFT(szc) \
243 243 (hw_page_array[(szc)].hp_shift - hw_page_array[0].hp_shift)
244 244 #define PAGE_GET_SHIFT(szc) \
245 245 (hw_page_array[(szc)].hp_shift)
246 246 #define PAGE_GET_PAGECOLORS(szc) \
247 247 (hw_page_array[(szc)].hp_colors)
248 248
249 249 /*
250 250 * This macro calculates the next sequential pfn with the specified
251 251 * color using color equivalency mask
252 252 */
253 253 #define PAGE_NEXT_PFN_FOR_COLOR(pfn, szc, color, ceq_mask, color_mask, it) \
254 254 { \
255 255 ASSERT(((color) & ~(ceq_mask)) == 0); \
256 256 if (&page_next_pfn_for_color_cpu == NULL) { \
257 257 uint_t pfn_shift = PAGE_BSZS_SHIFT(szc); \
258 258 pfn_t spfn = pfn >> pfn_shift; \
259 259 pfn_t stride = (ceq_mask) + 1; \
260 260 ASSERT((((ceq_mask) + 1) & (ceq_mask)) == 0); \
261 261 if (((spfn ^ (color)) & (ceq_mask)) == 0) { \
262 262 pfn += stride << pfn_shift; \
263 263 } else { \
264 264 pfn = (spfn & ~(pfn_t)(ceq_mask)) | (color); \
265 265 pfn = (pfn > spfn ? pfn : pfn + stride) << \
266 266 pfn_shift; \
267 267 } \
268 268 } else { \
269 269 pfn = page_next_pfn_for_color_cpu(pfn, szc, color, \
270 270 ceq_mask, color_mask, it); \
271 271 } \
272 272 }
273 273
274 274 /* get the color equivalency mask for the next szc */
275 275 #define PAGE_GET_NSZ_MASK(szc, mask) \
276 276 ((&page_get_nsz_color_mask_cpu == NULL) ? \
277 277 ((mask) >> (PAGE_GET_SHIFT((szc) + 1) - PAGE_GET_SHIFT(szc))) : \
278 278 page_get_nsz_color_mask_cpu(szc, mask))
279 279
280 280 /* get the color of the next szc */
281 281 #define PAGE_GET_NSZ_COLOR(szc, color) \
282 282 ((&page_get_nsz_color_cpu == NULL) ? \
283 283 ((color) >> (PAGE_GET_SHIFT((szc) + 1) - PAGE_GET_SHIFT(szc))) : \
284 284 page_get_nsz_color_cpu(szc, color))
285 285
286 286 /* Find the bin for the given page if it was of size szc */
287 287 #define PP_2_BIN_SZC(pp, szc) (PFN_2_COLOR(pp->p_pagenum, szc, (void *)(-1)))
288 288
289 289 #define PP_2_BIN(pp) (PP_2_BIN_SZC(pp, pp->p_szc))
290 290
291 291 #define PP_2_MEM_NODE(pp) (PFN_2_MEM_NODE(pp->p_pagenum))
292 292
293 293 #define PC_BIN_MUTEX(mnode, bin, flags) ((flags & PG_FREE_LIST) ? \
294 294 &fpc_mutex[(bin) & (NPC_MUTEX - 1)][mnode] : \
295 295 &cpc_mutex[(bin) & (NPC_MUTEX - 1)][mnode])
296 296
297 297 #define FPC_MUTEX(mnode, i) (&fpc_mutex[i][mnode])
298 298 #define CPC_MUTEX(mnode, i) (&cpc_mutex[i][mnode])
299 299
300 300 #define PFN_BASE(pfnum, szc) (pfnum & ~((1 << PAGE_BSZS_SHIFT(szc)) - 1))
301 301
302 302 /*
303 303 * this structure is used for walking free page lists
304 304 * controls when to split large pages into smaller pages,
305 305 * and when to coalesce smaller pages into larger pages
306 306 */
307 307 typedef struct page_list_walker {
308 308 uint_t plw_colors; /* num of colors for szc */
309 309 uint_t plw_color_mask; /* colors-1 */
310 310 uint_t plw_bin_step; /* next bin: 1 or 2 */
311 311 uint_t plw_count; /* loop count */
312 312 uint_t plw_bin0; /* starting bin */
313 313 uint_t plw_bin_marker; /* bin after initial jump */
314 314 uint_t plw_bin_split_prev; /* last bin we tried to split */
315 315 uint_t plw_do_split; /* set if OK to split */
316 316 uint_t plw_split_next; /* next bin to split */
317 317 uint_t plw_ceq_dif; /* number of different color groups */
318 318 /* to check */
319 319 uint_t plw_ceq_mask[MMU_PAGE_SIZES + 1]; /* color equiv mask */
320 320 uint_t plw_bins[MMU_PAGE_SIZES + 1]; /* num of bins */
321 321 } page_list_walker_t;
322 322
323 323 void page_list_walk_init(uchar_t szc, uint_t flags, uint_t bin,
324 324 int can_split, int use_ceq, page_list_walker_t *plw);
325 325
326 326 typedef char hpmctr_t;
327 327
328 328 #ifdef DEBUG
329 329 #define CHK_LPG(pp, szc) chk_lpg(pp, szc)
330 330 extern void chk_lpg(page_t *, uchar_t);
331 331 #else
332 332 #define CHK_LPG(pp, szc)
333 333 #endif
334 334
335 335 /*
336 336 * page list count per mnode and type.
337 337 */
338 338 typedef struct {
339 339 pgcnt_t plc_mt_pgmax; /* max page cnt */
340 340 pgcnt_t plc_mt_clpgcnt; /* cache list cnt */
341 341 pgcnt_t plc_mt_flpgcnt; /* free list cnt - small pages */
342 342 pgcnt_t plc_mt_lgpgcnt; /* free list cnt - large pages */
343 343 #ifdef DEBUG
344 344 struct {
345 345 pgcnt_t plc_mts_pgcnt; /* per page size count */
346 346 int plc_mts_colors;
347 347 pgcnt_t *plc_mtsc_pgcnt; /* per color bin count */
348 348 } plc_mts[MMU_PAGE_SIZES];
349 349 #endif
350 350 } plcnt_t[MAX_MEM_NODES][MAX_MEM_TYPES];
351 351
352 352 #ifdef DEBUG
353 353
354 354 #define PLCNT_SZ(ctrs_sz) { \
355 355 int szc; \
356 356 for (szc = 0; szc < mmu_page_sizes; szc++) { \
357 357 int colors = page_get_pagecolors(szc); \
358 358 ctrs_sz += (max_mem_nodes * MAX_MEM_TYPES * \
359 359 colors * sizeof (pgcnt_t)); \
360 360 } \
361 361 }
362 362
363 363 #define PLCNT_INIT(base) { \
364 364 int mn, mt, szc, colors; \
365 365 for (szc = 0; szc < mmu_page_sizes; szc++) { \
366 366 colors = page_get_pagecolors(szc); \
367 367 for (mn = 0; mn < max_mem_nodes; mn++) { \
368 368 for (mt = 0; mt < MAX_MEM_TYPES; mt++) { \
369 369 plcnt[mn][mt].plc_mts[szc]. \
370 370 plc_mts_colors = colors; \
371 371 plcnt[mn][mt].plc_mts[szc]. \
372 372 plc_mtsc_pgcnt = (pgcnt_t *)base; \
373 373 base += (colors * sizeof (pgcnt_t)); \
374 374 } \
375 375 } \
376 376 } \
377 377 }
378 378
379 379 #define PLCNT_DO(pp, mn, mtype, szc, cnt, flags) { \
380 380 int bin = PP_2_BIN(pp); \
381 381 if (flags & PG_CACHE_LIST) \
382 382 atomic_add_long(&plcnt[mn][mtype].plc_mt_clpgcnt, cnt); \
383 383 else if (szc) \
384 384 atomic_add_long(&plcnt[mn][mtype].plc_mt_lgpgcnt, cnt); \
385 385 else \
386 386 atomic_add_long(&plcnt[mn][mtype].plc_mt_flpgcnt, cnt); \
387 387 atomic_add_long(&plcnt[mn][mtype].plc_mts[szc].plc_mts_pgcnt, \
388 388 cnt); \
389 389 atomic_add_long(&plcnt[mn][mtype].plc_mts[szc]. \
390 390 plc_mtsc_pgcnt[bin], cnt); \
391 391 }
392 392
393 393 #else
394 394
395 395 #define PLCNT_SZ(ctrs_sz)
396 396
397 397 #define PLCNT_INIT(base)
398 398
399 399 /* PG_FREE_LIST may not be explicitly set in flags for large pages */
400 400
401 401 #define PLCNT_DO(pp, mn, mtype, szc, cnt, flags) { \
402 402 if (flags & PG_CACHE_LIST) \
403 403 atomic_add_long(&plcnt[mn][mtype].plc_mt_clpgcnt, cnt); \
404 404 else if (szc) \
405 405 atomic_add_long(&plcnt[mn][mtype].plc_mt_lgpgcnt, cnt); \
406 406 else \
407 407 atomic_add_long(&plcnt[mn][mtype].plc_mt_flpgcnt, cnt); \
408 408 }
409 409
410 410 #endif
411 411
412 412 #define PLCNT_INCR(pp, mn, mtype, szc, flags) { \
413 413 long cnt = (1 << PAGE_BSZS_SHIFT(szc)); \
414 414 PLCNT_DO(pp, mn, mtype, szc, cnt, flags); \
415 415 }
416 416
417 417 #define PLCNT_DECR(pp, mn, mtype, szc, flags) { \
418 418 long cnt = ((-1) << PAGE_BSZS_SHIFT(szc)); \
419 419 PLCNT_DO(pp, mn, mtype, szc, cnt, flags); \
420 420 }
421 421
422 422 /*
423 423 * macros to update page list max counts - done when pages transferred
424 424 * from RELOC to NORELOC mtype (kcage_init or kcage_assimilate_page).
425 425 */
426 426
427 427 #define PLCNT_XFER_NORELOC(pp) { \
428 428 long cnt = (1 << PAGE_BSZS_SHIFT((pp)->p_szc)); \
429 429 int mn = PP_2_MEM_NODE(pp); \
430 430 atomic_add_long(&plcnt[mn][MTYPE_NORELOC].plc_mt_pgmax, cnt); \
431 431 atomic_add_long(&plcnt[mn][MTYPE_RELOC].plc_mt_pgmax, -cnt); \
432 432 }
433 433
434 434 /*
435 435 * macro to modify the page list max counts when memory is added to
436 436 * the page lists during startup (add_physmem) or during a DR operation
437 437 * when memory is added (kphysm_add_memory_dynamic) or deleted
438 438 * (kphysm_del_cleanup).
439 439 */
440 440 #define PLCNT_MODIFY_MAX(pfn, cnt) { \
441 441 spgcnt_t _cnt = (spgcnt_t)(cnt); \
442 442 pgcnt_t _acnt = ABS(_cnt); \
443 443 int _mn; \
444 444 pgcnt_t _np; \
445 445 if (&plat_mem_node_intersect_range != NULL) { \
446 446 for (_mn = 0; _mn < max_mem_nodes; _mn++) { \
447 447 plat_mem_node_intersect_range((pfn), _acnt, _mn, &_np);\
448 448 if (_np == 0) \
449 449 continue; \
450 450 atomic_add_long(&plcnt[_mn][MTYPE_RELOC].plc_mt_pgmax, \
451 451 (_cnt < 0) ? -_np : _np); \
452 452 } \
453 453 } else { \
454 454 pfn_t _pfn = (pfn); \
455 455 pfn_t _endpfn = _pfn + _acnt; \
456 456 while (_pfn < _endpfn) { \
457 457 _mn = PFN_2_MEM_NODE(_pfn); \
458 458 _np = MIN(_endpfn, mem_node_config[_mn].physmax + 1) - \
459 459 _pfn; \
460 460 _pfn += _np; \
461 461 atomic_add_long(&plcnt[_mn][MTYPE_RELOC].plc_mt_pgmax, \
462 462 (_cnt < 0) ? -_np : _np); \
463 463 } \
464 464 } \
465 465 }
466 466
467 467 /*
468 468 * macro to call page_ctrs_adjust() when memory is added
469 469 * during a DR operation.
470 470 */
471 471 #define PAGE_CTRS_ADJUST(pfn, cnt, rv) { \
472 472 spgcnt_t _cnt = (spgcnt_t)(cnt); \
473 473 int _mn; \
474 474 pgcnt_t _np; \
475 475 if (&plat_mem_node_intersect_range != NULL) { \
476 476 for (_mn = 0; _mn < max_mem_nodes; _mn++) { \
477 477 plat_mem_node_intersect_range((pfn), _cnt, _mn, &_np); \
478 478 if (_np == 0) \
479 479 continue; \
480 480 if ((rv = page_ctrs_adjust(_mn)) != 0) \
481 481 break; \
482 482 } \
483 483 } else { \
484 484 pfn_t _pfn = (pfn); \
485 485 pfn_t _endpfn = _pfn + _cnt; \
486 486 while (_pfn < _endpfn) { \
487 487 _mn = PFN_2_MEM_NODE(_pfn); \
488 488 _np = MIN(_endpfn, mem_node_config[_mn].physmax + 1) - \
489 489 _pfn; \
490 490 _pfn += _np; \
491 491 if ((rv = page_ctrs_adjust(_mn)) != 0) \
492 492 break; \
493 493 } \
494 494 } \
495 495 }
496 496
497 497 extern plcnt_t plcnt;
498 498
499 499 #define MNODE_PGCNT(mn) \
500 500 (plcnt[mn][MTYPE_RELOC].plc_mt_clpgcnt + \
501 501 plcnt[mn][MTYPE_NORELOC].plc_mt_clpgcnt + \
502 502 plcnt[mn][MTYPE_RELOC].plc_mt_flpgcnt + \
503 503 plcnt[mn][MTYPE_NORELOC].plc_mt_flpgcnt + \
504 504 plcnt[mn][MTYPE_RELOC].plc_mt_lgpgcnt + \
505 505 plcnt[mn][MTYPE_NORELOC].plc_mt_lgpgcnt)
506 506
507 507 #define MNODETYPE_PGCNT(mn, mtype) \
508 508 (plcnt[mn][mtype].plc_mt_clpgcnt + \
509 509 plcnt[mn][mtype].plc_mt_flpgcnt + \
510 510 plcnt[mn][mtype].plc_mt_lgpgcnt)
511 511
512 512 /*
513 513 * macros to loop through the mtype range - MTYPE_START returns -1 in
514 514 * mtype if no pages in mnode/mtype and possibly NEXT mtype.
515 515 */
516 516 #define MTYPE_START(mnode, mtype, flags) { \
517 517 if (plcnt[mnode][mtype].plc_mt_pgmax == 0) { \
518 518 ASSERT(mtype == MTYPE_RELOC || \
519 519 MNODETYPE_PGCNT(mnode, mtype) == 0 || \
520 520 plcnt[mnode][mtype].plc_mt_pgmax != 0); \
521 521 MTYPE_NEXT(mnode, mtype, flags); \
522 522 } \
523 523 }
524 524
525 525 /*
526 526 * if allocation from the RELOC pool failed and there is sufficient cage
527 527 * memory, attempt to allocate from the NORELOC pool.
528 528 */
529 529 #define MTYPE_NEXT(mnode, mtype, flags) { \
530 530 if (!(flags & (PG_NORELOC | PGI_NOCAGE | PGI_RELOCONLY)) && \
531 531 (kcage_freemem >= kcage_lotsfree)) { \
532 532 if (plcnt[mnode][MTYPE_NORELOC].plc_mt_pgmax == 0) { \
533 533 ASSERT(MNODETYPE_PGCNT(mnode, MTYPE_NORELOC) == 0 || \
534 534 plcnt[mnode][MTYPE_NORELOC].plc_mt_pgmax != 0); \
535 535 mtype = -1; \
536 536 } else { \
537 537 mtype = MTYPE_NORELOC; \
538 538 flags |= PG_NORELOC; \
539 539 } \
540 540 } else { \
541 541 mtype = -1; \
542 542 } \
543 543 }
544 544
545 545 /*
546 546 * get the ecache setsize for the current cpu.
547 547 */
548 548 #define CPUSETSIZE() (cpunodes[CPU->cpu_id].ecache_setsize)
549 549
550 550 extern struct cpu cpu0;
551 551 #define CPU0 &cpu0
552 552
553 553 #define PAGE_BSZS_SHIFT(szc) TTE_BSZS_SHIFT(szc)
554 554 /*
555 555 * For sfmmu each larger page is 8 times the size of the previous
556 556 * size page.
557 557 */
558 558 #define FULL_REGION_CNT(rg_szc) (8)
559 559
560 560 /*
561 561 * The counter base must be per page_counter element to prevent
562 562 * races when re-indexing, and the base page size element should
563 563 * be aligned on a boundary of the given region size.
564 564 *
565 565 * We also round up the number of pages spanned by the counters
566 566 * for a given region to PC_BASE_ALIGN in certain situations to simplify
567 567 * the coding for some non-performance critical routines.
568 568 */
569 569 #define PC_BASE_ALIGN ((pfn_t)1 << PAGE_BSZS_SHIFT(mmu_page_sizes-1))
570 570 #define PC_BASE_ALIGN_MASK (PC_BASE_ALIGN - 1)
571 571
572 572 extern int ecache_alignsize;
573 573 #define L2CACHE_ALIGN ecache_alignsize
574 574 #define L2CACHE_ALIGN_MAX 512
575 575
576 576 extern int update_proc_pgcolorbase_after_fork;
577 577 extern int consistent_coloring;
578 578 extern uint_t vac_colors_mask;
579 579 extern int vac_size;
580 580 extern int vac_shift;
581 581
582 582 /*
583 583 * Kernel mem segment in 64-bit space
584 584 */
585 585 extern caddr_t kmem64_base, kmem64_end, kmem64_aligned_end;
586 586 extern int kmem64_alignsize, kmem64_szc;
587 587 extern uint64_t kmem64_pabase;
588 588 extern int max_bootlp_tteszc;
589 589
590 590 /*
591 591 * Maximum and default values for user heap, stack, private and shared
592 592 * anonymous memory, and user text and initialized data.
593 593 *
594 594 * Initial values are defined in architecture specific mach_vm_dep.c file.
595 595 * Used by map_pgsz*() routines.
596 596 */
597 597 extern size_t max_uheap_lpsize;
598 598 extern size_t default_uheap_lpsize;
599 599 extern size_t max_ustack_lpsize;
600 600 extern size_t default_ustack_lpsize;
601 601 extern size_t max_privmap_lpsize;
602 602 extern size_t max_uidata_lpsize;
603 603 extern size_t max_utext_lpsize;
604 604 extern size_t max_shm_lpsize;
605 605
606 606 /*
607 607 * For adjusting the default lpsize, for DTLB-limited page sizes.
608 608 */
609 609 extern void adjust_data_maxlpsize(size_t ismpagesize);
610 610
611 611 /*
612 612 * Sanity control. Don't use large pages regardless of user
613 613 * settings if there's less than priv or shm_lpg_min_physmem memory installed.
614 614 * The units for this variable are 8K pages.
615 615 */
616 616 extern pgcnt_t privm_lpg_min_physmem;
617 617 extern pgcnt_t shm_lpg_min_physmem;
618 618
619 619 /*
620 620 * AS_2_BIN macro controls the page coloring policy.
621 621 * 0 (default) uses various vaddr bits
622 622 * 1 virtual=paddr
623 623 * 2 bin hopping
624 624 */
625 625 #define AS_2_BIN(as, seg, vp, addr, bin, szc) \
626 626 switch (consistent_coloring) { \
627 627 default: \
628 628 cmn_err(CE_WARN, \
629 629 "AS_2_BIN: bad consistent coloring value"); \
630 630 /* assume default algorithm -> continue */ \
631 631 case 0: { \
632 632 uint32_t ndx, new; \
633 633 int slew = 0; \
634 634 pfn_t pfn; \
635 635 \
636 636 if (vp != NULL && IS_SWAPVP(vp) && \
637 637 seg->s_ops == &segvn_ops) \
638 638 slew = as_color_bin(as); \
639 639 \
640 640 pfn = ((uintptr_t)addr >> MMU_PAGESHIFT) + \
641 641 (((uintptr_t)addr >> page_coloring_shift) << \
642 642 (vac_shift - MMU_PAGESHIFT)); \
643 643 if ((szc) == 0 || &page_pfn_2_color_cpu == NULL) { \
644 644 pfn += slew; \
645 645 bin = PFN_2_COLOR(pfn, szc, NULL); \
646 646 } else { \
647 647 bin = PFN_2_COLOR(pfn, szc, NULL); \
648 648 bin += slew >> (vac_shift - MMU_PAGESHIFT); \
649 649 bin &= hw_page_array[(szc)].hp_colors - 1; \
650 650 } \
651 651 break; \
652 652 } \
653 653 case 1: \
654 654 bin = PFN_2_COLOR(((uintptr_t)addr >> MMU_PAGESHIFT), \
655 655 szc, NULL); \
656 656 break; \
657 657 case 2: { \
658 658 int cnt = as_color_bin(as); \
659 659 uint_t color_mask = page_get_pagecolors(0) - 1; \
660 660 \
661 661 /* make sure physical color aligns with vac color */ \
662 662 while ((cnt & vac_colors_mask) != \
663 663 addr_to_vcolor(addr)) { \
664 664 cnt++; \
665 665 } \
666 666 bin = cnt = cnt & color_mask; \
667 667 bin >>= PAGE_GET_COLOR_SHIFT(0, szc); \
668 668 /* update per as page coloring fields */ \
669 669 cnt = (cnt + 1) & color_mask; \
670 670 if (cnt == (as_color_start(as) & color_mask)) { \
671 671 cnt = as_color_start(as) = as_color_start(as) + \
672 672 PGCLR_LOOPFACTOR; \
673 673 } \
674 674 as_color_bin(as) = cnt & color_mask; \
675 675 break; \
676 676 } \
677 677 } \
678 678 ASSERT(bin < page_get_pagecolors(szc));
679 679
680 680 /*
681 681 * cpu private vm data - accessed thru CPU->cpu_vm_data
682 682 * vc_pnum_memseg: tracks last memseg visited in page_numtopp_nolock()
683 683 * vc_pnext_memseg: tracks last memseg visited in page_nextn()
684 684 * vc_kmptr: unaligned kmem pointer for this vm_cpu_data_t
685 685 * vc_kmsize: orignal kmem size for this vm_cpu_data_t
686 686 */
687 687
688 688 typedef struct {
↓ open down ↓ |
688 lines elided |
↑ open up ↑ |
689 689 struct memseg *vc_pnum_memseg;
690 690 struct memseg *vc_pnext_memseg;
691 691 void *vc_kmptr;
692 692 size_t vc_kmsize;
693 693 } vm_cpu_data_t;
694 694
695 695 /* allocation size to ensure vm_cpu_data_t resides in its own cache line */
696 696 #define VM_CPU_DATA_PADSIZE \
697 697 (P2ROUNDUP(sizeof (vm_cpu_data_t), L2CACHE_ALIGN_MAX))
698 698
699 -/* for boot cpu before kmem is initialized */
700 -extern char vm_cpu_data0[];
701 -
702 699 /*
703 700 * Function to get an ecache color bin: F(as, cnt, vcolor).
704 701 * the goal of this function is to:
705 702 * - to spread a processes' physical pages across the entire ecache to
706 703 * maximize its use.
707 704 * - to minimize vac flushes caused when we reuse a physical page on a
708 705 * different vac color than it was previously used.
709 706 * - to prevent all processes to use the same exact colors and trash each
710 707 * other.
711 708 *
712 709 * cnt is a bin ptr kept on a per as basis. As we page_create we increment
713 710 * the ptr so we spread out the physical pages to cover the entire ecache.
714 711 * The virtual color is made a subset of the physical color in order to
715 712 * in minimize virtual cache flushing.
716 713 * We add in the as to spread out different as. This happens when we
717 714 * initialize the start count value.
718 715 * sizeof(struct as) is 60 so we shift by 3 to get into the bit range
719 716 * that will tend to change. For example, on spitfire based machines
720 717 * (vcshft == 1) contigous as are spread bu ~6 bins.
721 718 * vcshft provides for proper virtual color alignment.
722 719 * In theory cnt should be updated using cas only but if we are off by one
723 720 * or 2 it is no big deal.
724 721 * We also keep a start value which is used to randomize on what bin we
725 722 * start counting when it is time to start another loop. This avoids
726 723 * contigous allocations of ecache size to point to the same bin.
727 724 * Why 3? Seems work ok. Better than 7 or anything larger.
728 725 */
729 726 #define PGCLR_LOOPFACTOR 3
730 727
731 728 /*
732 729 * When a bin is empty, and we can't satisfy a color request correctly,
733 730 * we scan. If we assume that the programs have reasonable spatial
734 731 * behavior, then it will not be a good idea to use the adjacent color.
735 732 * Using the adjacent color would result in virtually adjacent addresses
736 733 * mapping into the same spot in the cache. So, if we stumble across
737 734 * an empty bin, skip a bunch before looking. After the first skip,
738 735 * then just look one bin at a time so we don't miss our cache on
739 736 * every look. Be sure to check every bin. Page_create() will panic
740 737 * if we miss a page.
741 738 *
742 739 * This also explains the `<=' in the for loops in both page_get_freelist()
743 740 * and page_get_cachelist(). Since we checked the target bin, skipped
744 741 * a bunch, then continued one a time, we wind up checking the target bin
745 742 * twice to make sure we get all of them bins.
746 743 */
747 744 #define BIN_STEP 20
748 745
749 746 #ifdef VM_STATS
750 747 struct vmm_vmstats_str {
751 748 ulong_t pgf_alloc[MMU_PAGE_SIZES]; /* page_get_freelist */
752 749 ulong_t pgf_allocok[MMU_PAGE_SIZES];
753 750 ulong_t pgf_allocokrem[MMU_PAGE_SIZES];
754 751 ulong_t pgf_allocfailed[MMU_PAGE_SIZES];
755 752 ulong_t pgf_allocdeferred;
756 753 ulong_t pgf_allocretry[MMU_PAGE_SIZES];
757 754 ulong_t pgc_alloc; /* page_get_cachelist */
758 755 ulong_t pgc_allocok;
759 756 ulong_t pgc_allocokrem;
760 757 ulong_t pgc_allocokdeferred;
761 758 ulong_t pgc_allocfailed;
762 759 ulong_t pgcp_alloc[MMU_PAGE_SIZES]; /* page_get_contig_pages */
763 760 ulong_t pgcp_allocfailed[MMU_PAGE_SIZES];
764 761 ulong_t pgcp_allocempty[MMU_PAGE_SIZES];
765 762 ulong_t pgcp_allocok[MMU_PAGE_SIZES];
766 763 ulong_t ptcp[MMU_PAGE_SIZES]; /* page_trylock_contig_pages */
767 764 ulong_t ptcpfreethresh[MMU_PAGE_SIZES];
768 765 ulong_t ptcpfailexcl[MMU_PAGE_SIZES];
769 766 ulong_t ptcpfailszc[MMU_PAGE_SIZES];
770 767 ulong_t ptcpfailcage[MMU_PAGE_SIZES];
771 768 ulong_t ptcpok[MMU_PAGE_SIZES];
772 769 ulong_t pgmf_alloc[MMU_PAGE_SIZES]; /* page_get_mnode_freelist */
773 770 ulong_t pgmf_allocfailed[MMU_PAGE_SIZES];
774 771 ulong_t pgmf_allocempty[MMU_PAGE_SIZES];
775 772 ulong_t pgmf_allocok[MMU_PAGE_SIZES];
776 773 ulong_t pgmc_alloc; /* page_get_mnode_cachelist */
777 774 ulong_t pgmc_allocfailed;
778 775 ulong_t pgmc_allocempty;
779 776 ulong_t pgmc_allocok;
780 777 ulong_t pladd_free[MMU_PAGE_SIZES]; /* page_list_add/sub */
781 778 ulong_t plsub_free[MMU_PAGE_SIZES];
782 779 ulong_t pladd_cache;
783 780 ulong_t plsub_cache;
784 781 ulong_t plsubpages_szcbig;
785 782 ulong_t plsubpages_szc0;
786 783 ulong_t pfs_req[MMU_PAGE_SIZES]; /* page_freelist_split */
787 784 ulong_t pfs_demote[MMU_PAGE_SIZES];
788 785 ulong_t pfc_coalok[MMU_PAGE_SIZES][MAX_MNODE_MRANGES];
789 786 ulong_t ppr_reloc[MMU_PAGE_SIZES]; /* page_relocate */
790 787 ulong_t ppr_relocok[MMU_PAGE_SIZES];
791 788 ulong_t ppr_relocnoroot[MMU_PAGE_SIZES];
792 789 ulong_t ppr_reloc_replnoroot[MMU_PAGE_SIZES];
793 790 ulong_t ppr_relocnolock[MMU_PAGE_SIZES];
794 791 ulong_t ppr_relocnomem[MMU_PAGE_SIZES];
795 792 ulong_t ppr_krelocfail[MMU_PAGE_SIZES];
796 793 ulong_t ppr_copyfail;
797 794 /* page coalesce counter */
798 795 ulong_t page_ctrs_coalesce[MMU_PAGE_SIZES][MAX_MNODE_MRANGES];
799 796 /* candidates useful */
800 797 ulong_t page_ctrs_cands_skip[MMU_PAGE_SIZES][MAX_MNODE_MRANGES];
801 798 /* ctrs changed after locking */
802 799 ulong_t page_ctrs_changed[MMU_PAGE_SIZES][MAX_MNODE_MRANGES];
803 800 /* page_freelist_coalesce failed */
804 801 ulong_t page_ctrs_failed[MMU_PAGE_SIZES][MAX_MNODE_MRANGES];
805 802 ulong_t page_ctrs_coalesce_all; /* page coalesce all counter */
806 803 ulong_t page_ctrs_cands_skip_all; /* candidates useful for all func */
807 804 };
808 805 extern struct vmm_vmstats_str vmm_vmstats;
809 806 #endif /* VM_STATS */
810 807
811 808 /*
812 809 * Used to hold off page relocations into the cage until OBP has completed
813 810 * its boot-time handoff of its resources to the kernel.
814 811 */
815 812 extern int page_relocate_ready;
816 813
817 814 /*
818 815 * cpu/mmu-dependent vm variables may be reset at bootup.
819 816 */
820 817 extern uint_t mmu_page_sizes;
821 818 extern uint_t max_mmu_page_sizes;
822 819 extern uint_t mmu_hashcnt;
823 820 extern uint_t max_mmu_hashcnt;
824 821 extern size_t mmu_ism_pagesize;
825 822 extern int mmu_exported_pagesize_mask;
826 823 extern uint_t mmu_exported_page_sizes;
827 824 extern uint_t szc_2_userszc[];
828 825 extern uint_t userszc_2_szc[];
829 826
830 827 #define mmu_legacy_page_sizes mmu_exported_page_sizes
831 828 #define USERSZC_2_SZC(userszc) (userszc_2_szc[userszc])
832 829 #define SZC_2_USERSZC(szc) (szc_2_userszc[szc])
833 830
834 831 /*
835 832 * Platform specific page routines
836 833 */
837 834 extern void mach_page_add(page_t **, page_t *);
838 835 extern void mach_page_sub(page_t **, page_t *);
839 836 extern uint_t page_get_pagecolors(uint_t);
840 837 extern void ppcopy_kernel__relocatable(page_t *, page_t *);
841 838 #define ppcopy_kernel(p1, p2) ppcopy_kernel__relocatable(p1, p2)
842 839
843 840 /*
844 841 * platform specific large pages for kernel heap support
845 842 */
846 843 extern size_t get_segkmem_lpsize(size_t lpsize);
847 844 extern size_t mmu_get_kernel_lpsize(size_t lpsize);
848 845 extern void mmu_init_kernel_pgsz(struct hat *hat);
849 846 extern void mmu_init_kcontext();
850 847 extern uint64_t kcontextreg;
851 848
852 849 /*
853 850 * Nucleus data page allocator routines
854 851 */
855 852 extern void ndata_alloc_init(struct memlist *, uintptr_t, uintptr_t);
856 853 extern void *ndata_alloc(struct memlist *, size_t, size_t);
857 854 extern void *ndata_extra_base(struct memlist *, size_t, caddr_t);
858 855 extern size_t ndata_maxsize(struct memlist *);
859 856 extern size_t ndata_spare(struct memlist *, size_t, size_t);
860 857
861 858 #ifdef __cplusplus
862 859 }
863 860 #endif
864 861
865 862 #endif /* _VM_DEP_H */
↓ open down ↓ |
154 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX