7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (c) 1992, 2010, Oracle and/or its affiliates. All rights reserved.
23 */
24 /*
25 * Copyright (c) 2010, Intel Corporation.
26 * All rights reserved.
27 * Copyright 2018 Joyent, Inc.
28 */
29
30 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
31 /* All Rights Reserved */
32
33 /*
34 * Portions of this source code were derived from Berkeley 4.3 BSD
35 * under license from the Regents of the University of California.
36 */
37
38 /*
39 * UNIX machine dependent virtual memory support.
40 */
41
42 #include <sys/types.h>
43 #include <sys/param.h>
44 #include <sys/systm.h>
45 #include <sys/user.h>
46 #include <sys/proc.h>
47 #include <sys/kmem.h>
185 #define PFN_4GIG 0x100000
186 #define PFN_16MEG 0x1000
187 /* Indices into the memory range (arch_memranges) array. */
188 #define MRI_4G 0
189 #define MRI_2G 1
190 #define MRI_16M 2
191 #define MRI_0 3
192 static pfn_t arch_memranges[NUM_MEM_RANGES] = {
193 PFN_4GIG, /* pfn range for 4G and above */
194 0x80000, /* pfn range for 2G-4G */
195 PFN_16MEG, /* pfn range for 16M-2G */
196 0x00000, /* pfn range for 0-16M */
197 };
198 pfn_t *memranges = &arch_memranges[0];
199 int nranges = NUM_MEM_RANGES;
200
201 /*
202 * This combines mem_node_config and memranges into one data
203 * structure to be used for page list management.
204 */
205 mnoderange_t *mnoderanges;
206 int mnoderangecnt;
207 int mtype4g;
208 int mtype16m;
209 int mtypetop; /* index of highest pfn'ed mnoderange */
210
211 /*
212 * 4g memory management variables for systems with more than 4g of memory:
213 *
214 * physical memory below 4g is required for 32bit dma devices and, currently,
215 * for kmem memory. On systems with more than 4g of memory, the pool of memory
216 * below 4g can be depleted without any paging activity given that there is
217 * likely to be sufficient memory above 4g.
218 *
219 * physmax4g is set true if the largest pfn is over 4g. The rest of the
220 * 4g memory management code is enabled only when physmax4g is true.
221 *
222 * maxmem4g is the count of the maximum number of pages on the page lists
223 * with physical addresses below 4g. It can be a lot less then 4g given that
224 * BIOS may reserve large chunks of space below 4g for hot plug pci devices,
225 * agp aperture etc.
226 *
227 * freemem4g maintains the count of the number of available pages on the
228 * page lists with physical addresses below 4g.
229 *
246 static int physmax4g;
247 static int desfree4gshift = 4; /* maxmem4g shift to derive DESFREE4G */
248
249 /*
250 * 16m memory management:
251 *
252 * reserve some amount of physical memory below 16m for legacy devices.
253 *
254 * RESTRICT16M_ALLOC returns true if an there are sufficient free pages above
255 * 16m or if the 16m pool drops below DESFREE16M.
256 *
257 * In this case, general page allocations via page_get_{free,cache}list
258 * routines will be restricted from allocating from the 16m pool. Allocations
259 * that require specific pfn ranges (page_get_anylist) and PG_PANIC allocations
260 * are not restricted.
261 */
262
263 #define FREEMEM16M MTYPE_FREEMEM(mtype16m)
264 #define DESFREE16M desfree16m
265 #define RESTRICT16M_ALLOC(freemem, pgcnt, flags) \
266 ((freemem != 0) && ((flags & PG_PANIC) == 0) && \
267 ((freemem >= (FREEMEM16M)) || \
268 (FREEMEM16M < (DESFREE16M + pgcnt))))
269
270 static pgcnt_t desfree16m = 0x380;
271
272 /*
273 * This can be patched via /etc/system to allow old non-PAE aware device
274 * drivers to use kmem_alloc'd memory on 32 bit systems with > 4Gig RAM.
275 */
276 int restricted_kmemalloc = 0;
277
278 #ifdef VM_STATS
279 struct {
280 ulong_t pga_alloc;
281 ulong_t pga_notfullrange;
282 ulong_t pga_nulldmaattr;
283 ulong_t pga_allocok;
284 ulong_t pga_allocfailed;
285 ulong_t pgma_alloc;
286 ulong_t pgma_allocok;
1372 mri--;
1373
1374 /*
1375 * increment mnode range counter when memranges or mnode
1376 * boundary is reached.
1377 */
1378 while (mri >= 0 &&
1379 mem_node_config[mnode].physmax >= MEMRANGELO(mri)) {
1380 mnrcnt++;
1381 if (mem_node_config[mnode].physmax > MEMRANGEHI(mri))
1382 mri--;
1383 else
1384 break;
1385 }
1386 }
1387 ASSERT(mnrcnt <= MAX_MNODE_MRANGES);
1388 return (mnrcnt);
1389 #endif /* __xpv */
1390 }
1391
1392 /*
1393 * mnode_range_setup() initializes mnoderanges.
1394 */
1395 void
1396 mnode_range_setup(mnoderange_t *mnoderanges)
1397 {
1398 mnoderange_t *mp = mnoderanges;
1399 int mnode, mri;
1400 int mindex = 0; /* current index into mnoderanges array */
1401 int i, j;
1402 pfn_t hipfn;
1403 int last, hi;
1404
1405 for (mnode = 0; mnode < max_mem_nodes; mnode++) {
1406 if (mem_node_config[mnode].exists == 0)
1407 continue;
1408
1409 mri = nranges - 1;
1410
1411 while (MEMRANGEHI(mri) < mem_node_config[mnode].physbase)
1412 mri--;
1413
1414 while (mri >= 0 && mem_node_config[mnode].physmax >=
1415 MEMRANGELO(mri)) {
1416 mnoderanges->mnr_pfnlo = MAX(MEMRANGELO(mri),
1417 mem_node_config[mnode].physbase);
1418 mnoderanges->mnr_pfnhi = MIN(MEMRANGEHI(mri),
1419 mem_node_config[mnode].physmax);
1420 mnoderanges->mnr_mnode = mnode;
1421 mnoderanges->mnr_memrange = mri;
1422 mnoderanges->mnr_exists = 1;
1423 mnoderanges++;
1424 mindex++;
1425 if (mem_node_config[mnode].physmax > MEMRANGEHI(mri))
1426 mri--;
1427 else
1428 break;
1429 }
1430 }
1431
1432 /*
1433 * For now do a simple sort of the mnoderanges array to fill in
1434 * the mnr_next fields. Since mindex is expected to be relatively
1435 * small, using a simple O(N^2) algorithm.
1436 */
1437 for (i = 0; i < mindex; i++) {
1438 if (mp[i].mnr_pfnlo == 0) /* find lowest */
1439 break;
1440 }
1441 ASSERT(i < mindex);
1442 last = i;
1443 mtype16m = last;
1444 mp[last].mnr_next = -1;
1445 for (i = 0; i < mindex - 1; i++) {
1446 hipfn = (pfn_t)(-1);
1447 hi = -1;
1448 /* find next highest mnode range */
1449 for (j = 0; j < mindex; j++) {
1450 if (mp[j].mnr_pfnlo > mp[last].mnr_pfnlo &&
1451 mp[j].mnr_pfnlo < hipfn) {
1452 hipfn = mp[j].mnr_pfnlo;
1453 hi = j;
1454 }
1455 }
1456 mp[hi].mnr_next = last;
1457 last = hi;
1458 }
1459 mtypetop = last;
1460 }
1461
1462 #ifndef __xpv
1463 /*
1464 * Update mnoderanges for memory hot-add DR operations.
1465 */
1466 static void
1467 mnode_range_add(int mnode)
1468 {
1469 int *prev;
1470 int n, mri;
1471 pfn_t start, end;
1472 extern void membar_sync(void);
1473
1474 ASSERT(0 <= mnode && mnode < max_mem_nodes);
1475 ASSERT(mem_node_config[mnode].exists);
1476 start = mem_node_config[mnode].physbase;
1477 end = mem_node_config[mnode].physmax;
1478 ASSERT(start <= end);
1479 mutex_enter(&mnoderange_lock);
1961 */
1962 void
1963 page_coloring_setup(caddr_t pcmemaddr)
1964 {
1965 int i;
1966 int j;
1967 int k;
1968 caddr_t addr;
1969 int colors;
1970
1971 /*
1972 * do page coloring setup
1973 */
1974 addr = pcmemaddr;
1975
1976 mnoderanges = (mnoderange_t *)addr;
1977 addr += (mnoderangecnt * sizeof (mnoderange_t));
1978
1979 mnode_range_setup(mnoderanges);
1980
1981 if (physmax4g)
1982 mtype4g = pfn_2_mtype(0xfffff);
1983
1984 for (k = 0; k < NPC_MUTEX; k++) {
1985 fpc_mutex[k] = (kmutex_t *)addr;
1986 addr += (max_mem_nodes * sizeof (kmutex_t));
1987 }
1988 for (k = 0; k < NPC_MUTEX; k++) {
1989 cpc_mutex[k] = (kmutex_t *)addr;
1990 addr += (max_mem_nodes * sizeof (kmutex_t));
1991 }
1992 page_freelists = (page_t ****)addr;
1993 addr += (mnoderangecnt * sizeof (page_t ***));
1994
1995 page_cachelists = (page_t ***)addr;
1996 addr += (mnoderangecnt * sizeof (page_t **));
1997
1998 for (i = 0; i < mnoderangecnt; i++) {
1999 page_freelists[i] = (page_t ***)addr;
2000 addr += (mmu_page_sizes * sizeof (page_t **));
2001
2002 for (j = 0; j < mmu_page_sizes; j++) {
2003 colors = page_get_pagecolors(j);
|
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (c) 1992, 2010, Oracle and/or its affiliates. All rights reserved.
23 */
24 /*
25 * Copyright (c) 2010, Intel Corporation.
26 * All rights reserved.
27 * Copyright 2019, Joyent, Inc.
28 */
29
30 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
31 /* All Rights Reserved */
32
33 /*
34 * Portions of this source code were derived from Berkeley 4.3 BSD
35 * under license from the Regents of the University of California.
36 */
37
38 /*
39 * UNIX machine dependent virtual memory support.
40 */
41
42 #include <sys/types.h>
43 #include <sys/param.h>
44 #include <sys/systm.h>
45 #include <sys/user.h>
46 #include <sys/proc.h>
47 #include <sys/kmem.h>
185 #define PFN_4GIG 0x100000
186 #define PFN_16MEG 0x1000
187 /* Indices into the memory range (arch_memranges) array. */
188 #define MRI_4G 0
189 #define MRI_2G 1
190 #define MRI_16M 2
191 #define MRI_0 3
192 static pfn_t arch_memranges[NUM_MEM_RANGES] = {
193 PFN_4GIG, /* pfn range for 4G and above */
194 0x80000, /* pfn range for 2G-4G */
195 PFN_16MEG, /* pfn range for 16M-2G */
196 0x00000, /* pfn range for 0-16M */
197 };
198 pfn_t *memranges = &arch_memranges[0];
199 int nranges = NUM_MEM_RANGES;
200
201 /*
202 * This combines mem_node_config and memranges into one data
203 * structure to be used for page list management.
204 */
205 static mnoderange_t *mnoderanges;
206 static int mnoderangecnt;
207 static int mtype4g;
208 static int mtype16m;
209 static int mtypetop;
210
211 /*
212 * 4g memory management variables for systems with more than 4g of memory:
213 *
214 * physical memory below 4g is required for 32bit dma devices and, currently,
215 * for kmem memory. On systems with more than 4g of memory, the pool of memory
216 * below 4g can be depleted without any paging activity given that there is
217 * likely to be sufficient memory above 4g.
218 *
219 * physmax4g is set true if the largest pfn is over 4g. The rest of the
220 * 4g memory management code is enabled only when physmax4g is true.
221 *
222 * maxmem4g is the count of the maximum number of pages on the page lists
223 * with physical addresses below 4g. It can be a lot less then 4g given that
224 * BIOS may reserve large chunks of space below 4g for hot plug pci devices,
225 * agp aperture etc.
226 *
227 * freemem4g maintains the count of the number of available pages on the
228 * page lists with physical addresses below 4g.
229 *
246 static int physmax4g;
247 static int desfree4gshift = 4; /* maxmem4g shift to derive DESFREE4G */
248
249 /*
250 * 16m memory management:
251 *
252 * reserve some amount of physical memory below 16m for legacy devices.
253 *
254 * RESTRICT16M_ALLOC returns true if an there are sufficient free pages above
255 * 16m or if the 16m pool drops below DESFREE16M.
256 *
257 * In this case, general page allocations via page_get_{free,cache}list
258 * routines will be restricted from allocating from the 16m pool. Allocations
259 * that require specific pfn ranges (page_get_anylist) and PG_PANIC allocations
260 * are not restricted.
261 */
262
263 #define FREEMEM16M MTYPE_FREEMEM(mtype16m)
264 #define DESFREE16M desfree16m
265 #define RESTRICT16M_ALLOC(freemem, pgcnt, flags) \
266 (mtype16m != -1 && (freemem != 0) && ((flags & PG_PANIC) == 0) && \
267 ((freemem >= (FREEMEM16M)) || \
268 (FREEMEM16M < (DESFREE16M + pgcnt))))
269
270 static pgcnt_t desfree16m = 0x380;
271
272 /*
273 * This can be patched via /etc/system to allow old non-PAE aware device
274 * drivers to use kmem_alloc'd memory on 32 bit systems with > 4Gig RAM.
275 */
276 int restricted_kmemalloc = 0;
277
278 #ifdef VM_STATS
279 struct {
280 ulong_t pga_alloc;
281 ulong_t pga_notfullrange;
282 ulong_t pga_nulldmaattr;
283 ulong_t pga_allocok;
284 ulong_t pga_allocfailed;
285 ulong_t pgma_alloc;
286 ulong_t pgma_allocok;
1372 mri--;
1373
1374 /*
1375 * increment mnode range counter when memranges or mnode
1376 * boundary is reached.
1377 */
1378 while (mri >= 0 &&
1379 mem_node_config[mnode].physmax >= MEMRANGELO(mri)) {
1380 mnrcnt++;
1381 if (mem_node_config[mnode].physmax > MEMRANGEHI(mri))
1382 mri--;
1383 else
1384 break;
1385 }
1386 }
1387 ASSERT(mnrcnt <= MAX_MNODE_MRANGES);
1388 return (mnrcnt);
1389 #endif /* __xpv */
1390 }
1391
1392 static int
1393 mnoderange_cmp(const void *v1, const void *v2)
1394 {
1395 const mnoderange_t *m1 = v1;
1396 const mnoderange_t *m2 = v2;
1397
1398 if (m1->mnr_pfnlo < m2->mnr_pfnlo)
1399 return (-1);
1400 return (m1->mnr_pfnlo > m2->mnr_pfnlo);
1401 }
1402
1403 void
1404 mnode_range_setup(mnoderange_t *mnoderanges)
1405 {
1406 mnoderange_t *mp;
1407 size_t nr_ranges;
1408 size_t mnode;
1409
1410 for (mnode = 0, nr_ranges = 0, mp = mnoderanges;
1411 mnode < max_mem_nodes; mnode++) {
1412 size_t mri = nranges - 1;
1413
1414 if (mem_node_config[mnode].exists == 0)
1415 continue;
1416
1417 while (MEMRANGEHI(mri) < mem_node_config[mnode].physbase)
1418 mri--;
1419
1420 while (mri >= 0 && mem_node_config[mnode].physmax >=
1421 MEMRANGELO(mri)) {
1422 mp->mnr_pfnlo = MAX(MEMRANGELO(mri),
1423 mem_node_config[mnode].physbase);
1424 mp->mnr_pfnhi = MIN(MEMRANGEHI(mri),
1425 mem_node_config[mnode].physmax);
1426 mp->mnr_mnode = mnode;
1427 mp->mnr_memrange = mri;
1428 mp->mnr_next = -1;
1429 mp->mnr_exists = 1;
1430 mp++;
1431 nr_ranges++;
1432 if (mem_node_config[mnode].physmax > MEMRANGEHI(mri))
1433 mri--;
1434 else
1435 break;
1436 }
1437 }
1438
1439 /*
1440 * mnoderangecnt can be larger than nr_ranges when memory DR is
1441 * supposedly supported.
1442 */
1443 VERIFY3U(nr_ranges, <=, mnoderangecnt);
1444
1445 qsort(mnoderanges, nr_ranges, sizeof (mnoderange_t), mnoderange_cmp);
1446
1447 /*
1448 * If some intrepid soul takes the axe to the memory DR code, we can
1449 * remove ->mnr_next altogether, as we just sorted by ->mnr_pfnlo order.
1450 *
1451 * The VERIFY3U() above can be "==" then too.
1452 */
1453 for (size_t i = 1; i < nr_ranges; i++)
1454 mnoderanges[i].mnr_next = i - 1;
1455
1456 mtypetop = nr_ranges - 1;
1457 mtype16m = pfn_2_mtype(PFN_16MEG - 1); /* Can be -1 ... */
1458 if (physmax4g)
1459 mtype4g = pfn_2_mtype(0xfffff);
1460 }
1461
1462 #ifndef __xpv
1463 /*
1464 * Update mnoderanges for memory hot-add DR operations.
1465 */
1466 static void
1467 mnode_range_add(int mnode)
1468 {
1469 int *prev;
1470 int n, mri;
1471 pfn_t start, end;
1472 extern void membar_sync(void);
1473
1474 ASSERT(0 <= mnode && mnode < max_mem_nodes);
1475 ASSERT(mem_node_config[mnode].exists);
1476 start = mem_node_config[mnode].physbase;
1477 end = mem_node_config[mnode].physmax;
1478 ASSERT(start <= end);
1479 mutex_enter(&mnoderange_lock);
1961 */
1962 void
1963 page_coloring_setup(caddr_t pcmemaddr)
1964 {
1965 int i;
1966 int j;
1967 int k;
1968 caddr_t addr;
1969 int colors;
1970
1971 /*
1972 * do page coloring setup
1973 */
1974 addr = pcmemaddr;
1975
1976 mnoderanges = (mnoderange_t *)addr;
1977 addr += (mnoderangecnt * sizeof (mnoderange_t));
1978
1979 mnode_range_setup(mnoderanges);
1980
1981 for (k = 0; k < NPC_MUTEX; k++) {
1982 fpc_mutex[k] = (kmutex_t *)addr;
1983 addr += (max_mem_nodes * sizeof (kmutex_t));
1984 }
1985 for (k = 0; k < NPC_MUTEX; k++) {
1986 cpc_mutex[k] = (kmutex_t *)addr;
1987 addr += (max_mem_nodes * sizeof (kmutex_t));
1988 }
1989 page_freelists = (page_t ****)addr;
1990 addr += (mnoderangecnt * sizeof (page_t ***));
1991
1992 page_cachelists = (page_t ***)addr;
1993 addr += (mnoderangecnt * sizeof (page_t **));
1994
1995 for (i = 0; i < mnoderangecnt; i++) {
1996 page_freelists[i] = (page_t ***)addr;
1997 addr += (mmu_page_sizes * sizeof (page_t **));
1998
1999 for (j = 0; j < mmu_page_sizes; j++) {
2000 colors = page_get_pagecolors(j);
|