1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #include <sys/types.h> 27 #include <sys/param.h> 28 #include <sys/systm.h> 29 #include <sys/errno.h> 30 #include <sys/kmem.h> 31 #include <sys/vnode.h> 32 #include <sys/vfs_opreg.h> 33 #include <sys/swap.h> 34 #include <sys/sysmacros.h> 35 #include <sys/buf.h> 36 #include <sys/callb.h> 37 #include <sys/debug.h> 38 #include <vm/seg.h> 39 #include <sys/fs/swapnode.h> 40 #include <fs/fs_subr.h> 41 #include <sys/cmn_err.h> 42 #include <sys/mem_config.h> 43 #include <sys/atomic.h> 44 45 extern const fs_operation_def_t swap_vnodeops_template[]; 46 47 /* 48 * swapfs_minfree is the amount of physical memory (actually remaining 49 * availrmem) that we want to keep free for the rest of the system. This 50 * means that swapfs can only grow to availrmem - swapfs_minfree. This 51 * can be set as just constant value or a certain percentage of installed 52 * physical memory. It is set in swapinit(). 53 * 54 * Users who want to change the amount of memory that can be used as swap 55 * space should do so by setting swapfs_desfree at boot time, 56 * not swapfs_minfree. 57 */ 58 59 pgcnt_t swapfs_desfree = 0; 60 pgcnt_t swapfs_minfree = 0; 61 pgcnt_t swapfs_reserve = 0; 62 63 #ifdef SWAPFS_DEBUG 64 int swapfs_debug; 65 #endif /* SWAPFS_DEBUG */ 66 67 68 static int swapfs_vpcount; 69 static kmutex_t swapfs_lock; 70 static struct async_reqs *sw_ar, *sw_pendlist, *sw_freelist; 71 72 static struct vnode **swap_vnodes; /* ptr's to swap vnodes */ 73 74 static void swap_init_mem_config(void); 75 76 static pgcnt_t initial_swapfs_desfree; 77 static pgcnt_t initial_swapfs_minfree; 78 static pgcnt_t initial_swapfs_reserve; 79 80 static int swap_sync(struct vfs *vfsp, short flag, struct cred *cr); 81 82 static void 83 swapfs_recalc_save_initial(void) 84 { 85 initial_swapfs_desfree = swapfs_desfree; 86 initial_swapfs_minfree = swapfs_minfree; 87 initial_swapfs_reserve = swapfs_reserve; 88 } 89 90 static int 91 swapfs_recalc(pgcnt_t pgs) 92 { 93 pgcnt_t new_swapfs_desfree; 94 pgcnt_t new_swapfs_minfree; 95 pgcnt_t new_swapfs_reserve; 96 97 new_swapfs_desfree = initial_swapfs_desfree; 98 new_swapfs_minfree = initial_swapfs_minfree; 99 new_swapfs_reserve = initial_swapfs_reserve; 100 101 if (new_swapfs_desfree == 0) 102 new_swapfs_desfree = btopr(7 * 512 * 1024); /* 3-1/2Mb */; 103 104 if (new_swapfs_minfree == 0) { 105 /* 106 * We set this lower than we'd like here, 2Mb, because we 107 * always boot on swapfs. It's up to a safer value, 108 * swapfs_desfree, when/if we add physical swap devices 109 * in swapadd(). Users who want to change the amount of 110 * memory that can be used as swap space should do so by 111 * setting swapfs_desfree at boot time, not swapfs_minfree. 112 * However, swapfs_minfree is tunable by install as a 113 * workaround for bugid 1147463. 114 */ 115 new_swapfs_minfree = MAX(btopr(2 * 1024 * 1024), pgs >> 3); 116 } 117 118 /* 119 * priv processes can reserve memory as swap as long as availrmem 120 * remains greater than swapfs_minfree; in the case of non-priv 121 * processes, memory can be reserved as swap only if availrmem 122 * doesn't fall below (swapfs_minfree + swapfs_reserve). Thus, 123 * swapfs_reserve amount of memswap is not available to non-priv 124 * processes. This protects daemons such as automounter dying 125 * as a result of application processes eating away almost entire 126 * membased swap. This safeguard becomes useless if apps are run 127 * with root access. 128 * 129 * set swapfs_reserve to a minimum of 4Mb or 1/128 of physmem whichever 130 * is greater up to the limit of 128 MB. 131 */ 132 if (new_swapfs_reserve == 0) 133 new_swapfs_reserve = MIN(btopr(128 * 1024 * 1024), 134 MAX(btopr(4 * 1024 * 1024), pgs >> 7)); 135 136 /* Test basic numeric viability. */ 137 if (new_swapfs_minfree > pgs) 138 return (0); 139 140 /* Equivalent test to anon_resvmem() check. */ 141 if (availrmem < new_swapfs_minfree) { 142 /* 143 * If ism pages are being used, then there must be agreement 144 * between these two policies. 145 */ 146 if ((availrmem > segspt_minfree) && (segspt_minfree > 0)) { 147 new_swapfs_minfree = segspt_minfree; 148 } else { 149 return (0); 150 } 151 } 152 153 swapfs_desfree = new_swapfs_desfree; 154 swapfs_minfree = new_swapfs_minfree; 155 swapfs_reserve = new_swapfs_reserve; 156 157 return (1); 158 } 159 160 /*ARGSUSED1*/ 161 int 162 swapinit(int fstype, char *name) 163 { /* reserve for mp */ 164 ssize_t sw_freelist_size = klustsize / PAGESIZE * 2; 165 int i, error; 166 167 static const fs_operation_def_t swap_vfsops[] = { 168 { VFSNAME_SYNC, { .vfs_sync = swap_sync }}, 169 { NULL, {NULL} } 170 }; 171 172 SWAPFS_PRINT(SWAP_SUBR, "swapinit\n", 0, 0, 0, 0, 0); 173 mutex_init(&swapfs_lock, NULL, MUTEX_DEFAULT, NULL); 174 175 swap_vnodes = kmem_zalloc(MAX_SWAP_VNODES * sizeof (struct vnode *), 176 KM_SLEEP); 177 178 swapfs_recalc_save_initial(); 179 if (!swapfs_recalc(physmem)) 180 cmn_err(CE_PANIC, "swapfs_minfree(%lu) > physmem(%lu)", 181 swapfs_minfree, physmem); 182 183 /* 184 * Arrange for a callback on memory size change. 185 */ 186 swap_init_mem_config(); 187 188 sw_ar = (struct async_reqs *) 189 kmem_zalloc(sw_freelist_size*sizeof (struct async_reqs), KM_SLEEP); 190 191 error = vfs_setfsops(fstype, swap_vfsops, NULL); 192 if (error != 0) { 193 cmn_err(CE_WARN, "swapinit: bad vfs ops template"); 194 return (error); 195 } 196 197 error = vn_make_ops(name, swap_vnodeops_template, &swap_vnodeops); 198 if (error != 0) { 199 (void) vfs_freevfsops_by_type(fstype); 200 cmn_err(CE_WARN, "swapinit: bad vnode ops template"); 201 return (error); 202 } 203 sw_freelist = sw_ar; 204 for (i = 0; i < sw_freelist_size - 1; i++) 205 sw_ar[i].a_next = &sw_ar[i + 1]; 206 207 return (0); 208 } 209 210 /* 211 * Get a swapfs vnode corresponding to the specified identifier. 212 */ 213 struct vnode * 214 swapfs_getvp(ulong_t vidx) 215 { 216 struct vnode *vp; 217 218 vp = swap_vnodes[vidx]; 219 if (vp) { 220 return (vp); 221 } 222 223 mutex_enter(&swapfs_lock); 224 vp = swap_vnodes[vidx]; 225 if (vp == NULL) { 226 vp = vn_alloc(KM_SLEEP); 227 vn_setops(vp, swap_vnodeops); 228 vp->v_type = VREG; 229 vp->v_flag |= (VISSWAP|VISSWAPFS); 230 swap_vnodes[vidx] = vp; 231 swapfs_vpcount++; 232 } 233 mutex_exit(&swapfs_lock); 234 return (vp); 235 } 236 237 int swap_lo; 238 239 /*ARGSUSED*/ 240 static int 241 swap_sync(struct vfs *vfsp, short flag, struct cred *cr) 242 { 243 struct vnode *vp; 244 int i; 245 246 if (!(flag & SYNC_ALL)) 247 return (1); 248 249 /* 250 * assumes that we are the only one left to access this so that 251 * no need to use swapfs_lock (since it's staticly defined) 252 */ 253 for (i = 0; i < MAX_SWAP_VNODES; i++) { 254 vp = swap_vnodes[i]; 255 if (vp) { 256 VN_HOLD(vp); 257 (void) VOP_PUTPAGE(vp, (offset_t)0, 0, 258 (B_ASYNC | B_FREE), kcred, NULL); 259 VN_RELE(vp); 260 } 261 } 262 return (0); 263 } 264 265 extern int sw_pending_size; 266 267 /* 268 * Take an async request off the pending queue 269 */ 270 struct async_reqs * 271 sw_getreq() 272 { 273 struct async_reqs *arg; 274 275 mutex_enter(&swapfs_lock); 276 arg = sw_pendlist; 277 if (arg) { 278 sw_pendlist = arg->a_next; 279 arg->a_next = NULL; 280 sw_pending_size -= PAGESIZE; 281 } 282 ASSERT(sw_pending_size >= 0); 283 mutex_exit(&swapfs_lock); 284 return (arg); 285 } 286 287 /* 288 * Put an async request on the pending queue 289 */ 290 void 291 sw_putreq(struct async_reqs *arg) 292 { 293 /* Hold onto it */ 294 VN_HOLD(arg->a_vp); 295 296 mutex_enter(&swapfs_lock); 297 arg->a_next = sw_pendlist; 298 sw_pendlist = arg; 299 sw_pending_size += PAGESIZE; 300 mutex_exit(&swapfs_lock); 301 } 302 303 /* 304 * Put an async request back on the pending queue 305 */ 306 void 307 sw_putbackreq(struct async_reqs *arg) 308 { 309 mutex_enter(&swapfs_lock); 310 arg->a_next = sw_pendlist; 311 sw_pendlist = arg; 312 sw_pending_size += PAGESIZE; 313 mutex_exit(&swapfs_lock); 314 } 315 316 /* 317 * Take an async request structure off the free list 318 */ 319 struct async_reqs * 320 sw_getfree() 321 { 322 struct async_reqs *arg; 323 324 mutex_enter(&swapfs_lock); 325 arg = sw_freelist; 326 if (arg) { 327 sw_freelist = arg->a_next; 328 arg->a_next = NULL; 329 } 330 mutex_exit(&swapfs_lock); 331 return (arg); 332 } 333 334 /* 335 * Put an async request structure on the free list 336 */ 337 void 338 sw_putfree(struct async_reqs *arg) 339 { 340 /* Release our hold - should have locked the page by now */ 341 VN_RELE(arg->a_vp); 342 343 mutex_enter(&swapfs_lock); 344 arg->a_next = sw_freelist; 345 sw_freelist = arg; 346 mutex_exit(&swapfs_lock); 347 } 348 349 static pgcnt_t swapfs_pending_delete; 350 351 /*ARGSUSED*/ 352 static void 353 swap_mem_config_post_add( 354 void *arg, 355 pgcnt_t delta_swaps) 356 { 357 (void) swapfs_recalc(physmem - swapfs_pending_delete); 358 } 359 360 /*ARGSUSED*/ 361 static int 362 swap_mem_config_pre_del( 363 void *arg, 364 pgcnt_t delta_swaps) 365 { 366 pgcnt_t nv; 367 368 nv = atomic_add_long_nv(&swapfs_pending_delete, (spgcnt_t)delta_swaps); 369 if (!swapfs_recalc(physmem - nv)) { 370 /* 371 * Tidy-up is done by the call to post_del which 372 * is always made. 373 */ 374 cmn_err(CE_NOTE, "Memory operation refused to ensure system " 375 "doesn't deadlock due to excessive consumption by swapfs."); 376 return (EBUSY); 377 } 378 return (0); 379 } 380 381 /*ARGSUSED*/ 382 static void 383 swap_mem_config_post_del( 384 void *arg, 385 pgcnt_t delta_swaps, 386 int cancelled) 387 { 388 pgcnt_t nv; 389 390 nv = atomic_add_long_nv(&swapfs_pending_delete, -(spgcnt_t)delta_swaps); 391 (void) swapfs_recalc(physmem - nv); 392 } 393 394 static kphysm_setup_vector_t swap_mem_config_vec = { 395 KPHYSM_SETUP_VECTOR_VERSION, 396 swap_mem_config_post_add, 397 swap_mem_config_pre_del, 398 swap_mem_config_post_del, 399 }; 400 401 static void 402 swap_init_mem_config(void) 403 { 404 int ret; 405 406 ret = kphysm_setup_func_register(&swap_mem_config_vec, (void *)NULL); 407 ASSERT(ret == 0); 408 }