Print this page
6198 Let's EOL cachefs
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/uts/common/cpr/cpr_misc.c
+++ new/usr/src/uts/common/cpr/cpr_misc.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
↓ open down ↓ |
13 lines elided |
↑ open up ↑ |
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21 /*
22 22 * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
23 23 * Use is subject to license terms.
24 + * Copyright 2015 Nexenta Systems, Inc. All rights reserved.
24 25 */
25 26
26 27 #include <sys/types.h>
27 28 #include <sys/errno.h>
28 29 #include <sys/cpuvar.h>
29 30 #include <sys/vfs.h>
30 31 #include <sys/vnode.h>
31 32 #include <sys/pathname.h>
32 33 #include <sys/callb.h>
33 34 #include <sys/fs/ufs_inode.h>
34 35 #include <vm/anon.h>
35 36 #include <sys/fs/swapnode.h> /* for swapfs_minfree */
36 37 #include <sys/kmem.h>
37 38 #include <sys/cpr.h>
38 39 #include <sys/conf.h>
39 40 #include <sys/machclock.h>
40 41
41 42 /*
42 43 * CPR miscellaneous support routines
43 44 */
44 45 #define cpr_open(path, mode, vpp) (vn_open(path, UIO_SYSSPACE, \
45 46 mode, 0600, vpp, CRCREAT, 0))
46 47 #define cpr_rdwr(rw, vp, basep, cnt) (vn_rdwr(rw, vp, (caddr_t)(basep), \
47 48 cnt, 0LL, UIO_SYSSPACE, 0, (rlim64_t)MAXOFF_T, CRED(), \
48 49 (ssize_t *)NULL))
49 50
50 51 extern void clkset(time_t);
51 52 extern cpu_t *i_cpr_bootcpu(void);
52 53 extern caddr_t i_cpr_map_setup(void);
53 54 extern void i_cpr_free_memory_resources(void);
54 55
55 56 extern kmutex_t cpr_slock;
56 57 extern size_t cpr_buf_size;
57 58 extern char *cpr_buf;
58 59 extern size_t cpr_pagedata_size;
59 60 extern char *cpr_pagedata;
60 61 extern int cpr_bufs_allocated;
61 62 extern int cpr_bitmaps_allocated;
62 63
63 64 #if defined(__sparc)
64 65 static struct cprconfig cprconfig;
65 66 static int cprconfig_loaded = 0;
66 67 static int cpr_statefile_ok(vnode_t *, int);
67 68 static int cpr_p_online(cpu_t *, int);
68 69 static void cpr_save_mp_state(void);
69 70 #endif
70 71
71 72 int cpr_is_ufs(struct vfs *);
72 73 int cpr_is_zfs(struct vfs *);
73 74
74 75 char cpr_default_path[] = CPR_DEFAULT;
75 76
76 77 #define COMPRESS_PERCENT 40 /* approx compression ratio in percent */
77 78 #define SIZE_RATE 115 /* increase size by 15% */
78 79 #define INTEGRAL 100 /* for integer math */
79 80
80 81
81 82 /*
82 83 * cmn_err() followed by a 1/4 second delay; this gives the
83 84 * logging service a chance to flush messages and helps avoid
84 85 * intermixing output from prom_printf().
85 86 */
86 87 /*PRINTFLIKE2*/
87 88 void
88 89 cpr_err(int ce, const char *fmt, ...)
89 90 {
90 91 va_list adx;
91 92
92 93 va_start(adx, fmt);
93 94 vcmn_err(ce, fmt, adx);
94 95 va_end(adx);
95 96 drv_usecwait(MICROSEC >> 2);
96 97 }
97 98
98 99
99 100 int
100 101 cpr_init(int fcn)
101 102 {
102 103 /*
103 104 * Allow only one suspend/resume process.
104 105 */
105 106 if (mutex_tryenter(&cpr_slock) == 0)
106 107 return (EBUSY);
107 108
108 109 CPR->c_flags = 0;
109 110 CPR->c_substate = 0;
110 111 CPR->c_cprboot_magic = 0;
111 112 CPR->c_alloc_cnt = 0;
112 113
113 114 CPR->c_fcn = fcn;
114 115 if (fcn == AD_CPR_REUSABLE)
115 116 CPR->c_flags |= C_REUSABLE;
116 117 else
117 118 CPR->c_flags |= C_SUSPENDING;
118 119 if (fcn == AD_SUSPEND_TO_RAM || fcn == DEV_SUSPEND_TO_RAM) {
119 120 return (0);
120 121 }
121 122 #if defined(__sparc)
122 123 if (fcn != AD_CPR_NOCOMPRESS && fcn != AD_CPR_TESTNOZ)
123 124 CPR->c_flags |= C_COMPRESSING;
124 125 /*
125 126 * reserve CPR_MAXCONTIG virtual pages for cpr_dump()
126 127 */
127 128 CPR->c_mapping_area = i_cpr_map_setup();
128 129 if (CPR->c_mapping_area == 0) { /* no space in kernelmap */
129 130 cpr_err(CE_CONT, "Unable to alloc from kernelmap.\n");
130 131 mutex_exit(&cpr_slock);
131 132 return (EAGAIN);
132 133 }
133 134 if (cpr_debug & CPR_DEBUG3)
134 135 cpr_err(CE_CONT, "Reserved virtual range from 0x%p for writing "
135 136 "kas\n", (void *)CPR->c_mapping_area);
136 137 #endif
137 138
138 139 return (0);
139 140 }
140 141
141 142 /*
142 143 * This routine releases any resources used during the checkpoint.
143 144 */
144 145 void
145 146 cpr_done(void)
146 147 {
147 148 cpr_stat_cleanup();
148 149 i_cpr_bitmap_cleanup();
149 150
150 151 /*
151 152 * Free pages used by cpr buffers.
152 153 */
153 154 if (cpr_buf) {
154 155 kmem_free(cpr_buf, cpr_buf_size);
155 156 cpr_buf = NULL;
156 157 }
157 158 if (cpr_pagedata) {
158 159 kmem_free(cpr_pagedata, cpr_pagedata_size);
159 160 cpr_pagedata = NULL;
160 161 }
161 162
162 163 i_cpr_free_memory_resources();
163 164 mutex_exit(&cpr_slock);
164 165 cpr_err(CE_CONT, "System has been resumed.\n");
165 166 }
166 167
167 168
168 169 #if defined(__sparc)
169 170 /*
170 171 * reads config data into cprconfig
171 172 */
172 173 static int
173 174 cpr_get_config(void)
174 175 {
175 176 static char config_path[] = CPR_CONFIG;
176 177 struct cprconfig *cf = &cprconfig;
177 178 struct vnode *vp;
178 179 char *fmt;
179 180 int err;
180 181
181 182 if (cprconfig_loaded)
182 183 return (0);
183 184
184 185 fmt = "cannot %s config file \"%s\", error %d\n";
185 186 if (err = vn_open(config_path, UIO_SYSSPACE, FREAD, 0, &vp, 0, 0)) {
186 187 cpr_err(CE_CONT, fmt, "open", config_path, err);
187 188 return (err);
188 189 }
189 190
190 191 err = cpr_rdwr(UIO_READ, vp, cf, sizeof (*cf));
191 192 (void) VOP_CLOSE(vp, FREAD, 1, (offset_t)0, CRED(), NULL);
192 193 VN_RELE(vp);
193 194 if (err) {
194 195 cpr_err(CE_CONT, fmt, "read", config_path, err);
195 196 return (err);
196 197 }
197 198
198 199 if (cf->cf_magic == CPR_CONFIG_MAGIC)
199 200 cprconfig_loaded = 1;
200 201 else {
201 202 cpr_err(CE_CONT, "invalid config file \"%s\", "
202 203 "rerun pmconfig(1M)\n", config_path);
203 204 err = EINVAL;
204 205 }
205 206
206 207 return (err);
207 208 }
208 209
209 210
210 211 /*
211 212 * concat fs and path fields of the cprconfig structure;
212 213 * returns pointer to the base of static data
213 214 */
214 215 static char *
215 216 cpr_cprconfig_to_path(void)
216 217 {
217 218 static char full_path[MAXNAMELEN];
218 219 struct cprconfig *cf = &cprconfig;
219 220 char *ptr;
220 221
221 222 /*
222 223 * build /fs/path without extra '/'
223 224 */
224 225 (void) strcpy(full_path, cf->cf_fs);
225 226 if (strcmp(cf->cf_fs, "/"))
226 227 (void) strcat(full_path, "/");
227 228 ptr = cf->cf_path;
228 229 if (*ptr == '/')
229 230 ptr++;
230 231 (void) strcat(full_path, ptr);
231 232 return (full_path);
232 233 }
233 234
234 235
235 236 /*
236 237 * Verify that the information in the configuration file regarding the
237 238 * location for the statefile is still valid, depending on cf_type.
238 239 * for CFT_UFS, cf_fs must still be a mounted filesystem, it must be
239 240 * mounted on the same device as when pmconfig was last run,
240 241 * and the translation of that device to a node in the prom's
241 242 * device tree must be the same as when pmconfig was last run.
242 243 * for CFT_SPEC and CFT_ZVOL, cf_path must be the path to a block
243 244 * special file, it must have no file system mounted on it,
244 245 * and the translation of that device to a node in the prom's
245 246 * device tree must be the same as when pmconfig was last run.
246 247 */
247 248 static int
248 249 cpr_verify_statefile_path(void)
249 250 {
250 251 struct cprconfig *cf = &cprconfig;
251 252 static const char long_name[] = "Statefile pathname is too long.\n";
252 253 static const char lookup_fmt[] = "Lookup failed for "
253 254 "cpr statefile device %s.\n";
254 255 static const char path_chg_fmt[] = "Device path for statefile "
255 256 "has changed from %s to %s.\t%s\n";
256 257 static const char rerun[] = "Please rerun pmconfig(1m).";
257 258 struct vfs *vfsp = NULL, *vfsp_save = rootvfs;
258 259 ufsvfs_t *ufsvfsp = (ufsvfs_t *)rootvfs->vfs_data;
259 260 ufsvfs_t *ufsvfsp_save = ufsvfsp;
260 261 int error;
261 262 struct vnode *vp;
262 263 char *slash, *tail, *longest;
263 264 char *errstr;
264 265 int found = 0;
265 266 union {
266 267 char un_devpath[OBP_MAXPATHLEN];
267 268 char un_sfpath[MAXNAMELEN];
268 269 } un;
269 270 #define devpath un.un_devpath
270 271 #define sfpath un.un_sfpath
271 272
272 273 ASSERT(cprconfig_loaded);
273 274 /*
274 275 * We need not worry about locking or the timing of releasing
275 276 * the vnode, since we are single-threaded now.
276 277 */
277 278
278 279 switch (cf->cf_type) {
279 280 case CFT_SPEC:
280 281 error = i_devname_to_promname(cf->cf_devfs, devpath,
281 282 OBP_MAXPATHLEN);
282 283 if (error || strcmp(devpath, cf->cf_dev_prom)) {
283 284 cpr_err(CE_CONT, path_chg_fmt,
284 285 cf->cf_dev_prom, devpath, rerun);
285 286 return (error);
286 287 }
287 288 /*FALLTHROUGH*/
288 289 case CFT_ZVOL:
289 290 if (strlen(cf->cf_path) > sizeof (sfpath)) {
290 291 cpr_err(CE_CONT, long_name);
291 292 return (ENAMETOOLONG);
292 293 }
293 294 if ((error = lookupname(cf->cf_devfs,
294 295 UIO_SYSSPACE, FOLLOW, NULLVPP, &vp)) != 0) {
295 296 cpr_err(CE_CONT, lookup_fmt, cf->cf_devfs);
296 297 return (error);
297 298 }
298 299 if (vp->v_type != VBLK)
299 300 errstr = "statefile must be a block device";
300 301 else if (vfs_devismounted(vp->v_rdev))
301 302 errstr = "statefile device must not "
302 303 "have a file system mounted on it";
303 304 else if (IS_SWAPVP(vp))
304 305 errstr = "statefile device must not "
305 306 "be configured as swap file";
306 307 else
307 308 errstr = NULL;
308 309
309 310 VN_RELE(vp);
310 311 if (errstr) {
311 312 cpr_err(CE_CONT, "%s.\n", errstr);
312 313 return (ENOTSUP);
313 314 }
314 315
315 316 return (error);
316 317 case CFT_UFS:
317 318 break; /* don't indent all the original code */
318 319 default:
319 320 cpr_err(CE_PANIC, "invalid cf_type");
320 321 }
321 322
322 323 /*
323 324 * The original code for UFS statefile
324 325 */
325 326 if (strlen(cf->cf_fs) + strlen(cf->cf_path) + 2 > sizeof (sfpath)) {
326 327 cpr_err(CE_CONT, long_name);
327 328 return (ENAMETOOLONG);
328 329 }
329 330
330 331 bzero(sfpath, sizeof (sfpath));
331 332 (void) strcpy(sfpath, cpr_cprconfig_to_path());
332 333
333 334 if (*sfpath != '/') {
334 335 cpr_err(CE_CONT, "Statefile pathname %s "
335 336 "must begin with a /\n", sfpath);
336 337 return (EINVAL);
337 338 }
338 339
339 340 /*
340 341 * Find the longest prefix of the statefile pathname which
341 342 * is the mountpoint of a filesystem. This string must
342 343 * match the cf_fs field we read from the config file. Other-
343 344 * wise the user has changed things without running pmconfig.
344 345 */
345 346 tail = longest = sfpath + 1; /* pt beyond the leading "/" */
346 347 while ((slash = strchr(tail, '/')) != NULL) {
347 348 *slash = '\0'; /* temporarily terminate the string */
348 349 if ((error = lookupname(sfpath,
349 350 UIO_SYSSPACE, FOLLOW, NULLVPP, &vp)) != 0) {
350 351 *slash = '/';
351 352 cpr_err(CE_CONT, "A directory in the "
352 353 "statefile path %s was not found.\n", sfpath);
353 354 VN_RELE(vp);
354 355
355 356 return (error);
356 357 }
357 358
358 359 vfs_list_read_lock();
359 360 vfsp = rootvfs;
360 361 do {
361 362 ufsvfsp = (struct ufsvfs *)vfsp->vfs_data;
362 363 if (ufsvfsp != NULL && ufsvfsp->vfs_root == vp) {
363 364 found = 1;
364 365 break;
365 366 }
366 367 vfsp = vfsp->vfs_next;
367 368 } while (vfsp != rootvfs);
368 369 vfs_list_unlock();
369 370
370 371 /*
371 372 * If we have found a filesystem mounted on the current
372 373 * path prefix, remember the end of the string in
373 374 * "longest". If it happens to be the the exact fs
374 375 * saved in the configuration file, save the current
375 376 * ufsvfsp so we can make additional checks further down.
376 377 */
377 378 if (found) {
378 379 longest = slash;
379 380 if (strcmp(cf->cf_fs, sfpath) == 0) {
380 381 ufsvfsp_save = ufsvfsp;
381 382 vfsp_save = vfsp;
382 383 }
383 384 found = 0;
384 385 }
385 386
386 387 VN_RELE(vp);
387 388 *slash = '/';
388 389 tail = slash + 1;
389 390 }
390 391 *longest = '\0';
391 392 if (cpr_is_ufs(vfsp_save) == 0 || strcmp(cf->cf_fs, sfpath)) {
392 393 cpr_err(CE_CONT, "Filesystem containing "
393 394 "the statefile when pmconfig was run (%s) has "
394 395 "changed to %s. %s\n", cf->cf_fs, sfpath, rerun);
395 396 return (EINVAL);
396 397 }
397 398
398 399 if ((error = lookupname(cf->cf_devfs,
399 400 UIO_SYSSPACE, FOLLOW, NULLVPP, &vp)) != 0) {
400 401 cpr_err(CE_CONT, lookup_fmt, cf->cf_devfs);
401 402 return (error);
402 403 }
403 404
404 405 if (ufsvfsp_save->vfs_devvp->v_rdev != vp->v_rdev) {
405 406 cpr_err(CE_CONT, "Filesystem containing "
406 407 "statefile no longer mounted on device %s. "
407 408 "See power.conf(4).", cf->cf_devfs);
408 409 VN_RELE(vp);
409 410 return (ENXIO);
410 411 }
411 412 VN_RELE(vp);
412 413
413 414 error = i_devname_to_promname(cf->cf_devfs, devpath, OBP_MAXPATHLEN);
414 415 if (error || strcmp(devpath, cf->cf_dev_prom)) {
415 416 cpr_err(CE_CONT, path_chg_fmt,
416 417 cf->cf_dev_prom, devpath, rerun);
417 418 return (error);
418 419 }
419 420
420 421 return (0);
421 422 }
422 423
423 424 /*
424 425 * Make sure that the statefile can be used as a block special statefile
425 426 * (meaning that is exists and has nothing mounted on it)
426 427 * Returns errno if not a valid statefile.
427 428 */
428 429 int
429 430 cpr_check_spec_statefile(void)
430 431 {
431 432 int err;
432 433
433 434 if (err = cpr_get_config())
434 435 return (err);
435 436 ASSERT(cprconfig.cf_type == CFT_SPEC ||
436 437 cprconfig.cf_type == CFT_ZVOL);
437 438
438 439 if (cprconfig.cf_devfs == NULL)
439 440 return (ENXIO);
440 441
441 442 return (cpr_verify_statefile_path());
442 443
443 444 }
444 445
445 446 int
446 447 cpr_alloc_statefile(int alloc_retry)
447 448 {
448 449 register int rc = 0;
449 450 char *str;
450 451
451 452 /*
452 453 * Statefile size validation. If checkpoint the first time, disk blocks
453 454 * allocation will be done; otherwise, just do file size check.
454 455 * if statefile allocation is being retried, C_VP will be inited
455 456 */
456 457 if (alloc_retry) {
457 458 str = "\n-->Retrying statefile allocation...";
458 459 if (cpr_debug & (CPR_DEBUG1 | CPR_DEBUG7))
459 460 prom_printf(str);
460 461 if (C_VP->v_type != VBLK)
461 462 (void) VOP_DUMPCTL(C_VP, DUMP_FREE, NULL, NULL);
462 463 } else {
463 464 /*
464 465 * Open an exiting file for writing, the state file needs to be
465 466 * pre-allocated since we can't and don't want to do allocation
466 467 * during checkpoint (too much of the OS is disabled).
467 468 * - do a preliminary size checking here, if it is too small,
468 469 * allocate more space internally and retry.
469 470 * - check the vp to make sure it's the right type.
470 471 */
471 472 char *path = cpr_build_statefile_path();
472 473
473 474 if (path == NULL)
474 475 return (ENXIO);
475 476 else if (rc = cpr_verify_statefile_path())
476 477 return (rc);
477 478
478 479 if (rc = vn_open(path, UIO_SYSSPACE,
479 480 FCREAT|FWRITE, 0600, &C_VP, CRCREAT, 0)) {
480 481 cpr_err(CE_WARN, "cannot open statefile %s", path);
481 482 return (rc);
482 483 }
483 484 }
484 485
485 486 /*
486 487 * Only ufs and block special statefiles supported
487 488 */
488 489 if (C_VP->v_type != VREG && C_VP->v_type != VBLK) {
489 490 cpr_err(CE_CONT,
490 491 "Statefile must be regular file or block special file.");
491 492 return (EACCES);
492 493 }
493 494
494 495 if (rc = cpr_statefile_ok(C_VP, alloc_retry))
495 496 return (rc);
496 497
497 498 if (C_VP->v_type != VBLK) {
498 499 /*
499 500 * sync out the fs change due to the statefile reservation.
500 501 */
501 502 (void) VFS_SYNC(C_VP->v_vfsp, 0, CRED());
502 503
503 504 /*
504 505 * Validate disk blocks allocation for the state file.
505 506 * Ask the file system prepare itself for the dump operation.
506 507 */
507 508 if (rc = VOP_DUMPCTL(C_VP, DUMP_ALLOC, NULL, NULL)) {
508 509 cpr_err(CE_CONT, "Error allocating "
509 510 "blocks for cpr statefile.");
510 511 return (rc);
511 512 }
512 513 }
513 514 return (0);
514 515 }
515 516
516 517
517 518 /*
518 519 * Lookup device size and return available space in bytes.
519 520 * NOTE: Since prop_op(9E) can't tell the difference between a character
520 521 * and a block reference, it is ok to ask for "Size" instead of "Nblocks".
521 522 */
522 523 size_t
523 524 cpr_get_devsize(dev_t dev)
524 525 {
525 526 size_t bytes = 0;
526 527
527 528 bytes = cdev_Size(dev);
528 529 if (bytes == 0)
529 530 bytes = cdev_size(dev);
530 531
531 532 if (bytes > CPR_SPEC_OFFSET)
532 533 bytes -= CPR_SPEC_OFFSET;
533 534 else
534 535 bytes = 0;
535 536
536 537 return (bytes);
537 538 }
538 539
539 540
540 541 /*
541 542 * increase statefile size
542 543 */
543 544 static int
544 545 cpr_grow_statefile(vnode_t *vp, u_longlong_t newsize)
545 546 {
546 547 extern uchar_t cpr_pagecopy[];
547 548 struct inode *ip = VTOI(vp);
548 549 u_longlong_t offset;
549 550 int error, increase;
550 551 ssize_t resid;
551 552
552 553 rw_enter(&ip->i_contents, RW_READER);
553 554 increase = (ip->i_size < newsize);
554 555 offset = ip->i_size;
555 556 rw_exit(&ip->i_contents);
556 557
557 558 if (increase == 0)
558 559 return (0);
559 560
560 561 /*
561 562 * write to each logical block to reserve disk space
562 563 */
563 564 error = 0;
564 565 cpr_pagecopy[0] = '1';
565 566 for (; offset < newsize; offset += ip->i_fs->fs_bsize) {
566 567 if (error = vn_rdwr(UIO_WRITE, vp, (caddr_t)cpr_pagecopy,
567 568 ip->i_fs->fs_bsize, (offset_t)offset, UIO_SYSSPACE, 0,
568 569 (rlim64_t)MAXOFF_T, CRED(), &resid)) {
569 570 if (error == ENOSPC) {
570 571 cpr_err(CE_WARN, "error %d while reserving "
571 572 "disk space for statefile %s\n"
572 573 "wanted %lld bytes, file is %lld short",
573 574 error, cpr_cprconfig_to_path(),
574 575 newsize, newsize - offset);
575 576 }
576 577 break;
577 578 }
578 579 }
579 580 return (error);
580 581 }
581 582
582 583
583 584 /*
584 585 * do a simple estimate of the space needed to hold the statefile
585 586 * taking compression into account, but be fairly conservative
586 587 * so we have a better chance of completing; when dump fails,
587 588 * the retry cost is fairly high.
588 589 *
589 590 * Do disk blocks allocation for the state file if no space has
590 591 * been allocated yet. Since the state file will not be removed,
591 592 * allocation should only be done once.
592 593 */
593 594 static int
594 595 cpr_statefile_ok(vnode_t *vp, int alloc_retry)
595 596 {
596 597 extern size_t cpr_bitmap_size;
597 598 struct inode *ip = VTOI(vp);
598 599 const int UCOMP_RATE = 20; /* comp. ratio*10 for user pages */
599 600 u_longlong_t size, isize, ksize, raw_data;
600 601 char *str, *est_fmt;
601 602 size_t space;
602 603 int error;
603 604
604 605 /*
605 606 * number of pages short for swapping.
606 607 */
607 608 STAT->cs_nosw_pages = k_anoninfo.ani_mem_resv;
608 609 if (STAT->cs_nosw_pages < 0)
609 610 STAT->cs_nosw_pages = 0;
610 611
611 612 str = "cpr_statefile_ok:";
612 613
613 614 CPR_DEBUG(CPR_DEBUG9, "Phys swap: max=%lu resv=%lu\n",
614 615 k_anoninfo.ani_max, k_anoninfo.ani_phys_resv);
615 616 CPR_DEBUG(CPR_DEBUG9, "Mem swap: max=%ld resv=%lu\n",
616 617 MAX(availrmem - swapfs_minfree, 0),
617 618 k_anoninfo.ani_mem_resv);
618 619 CPR_DEBUG(CPR_DEBUG9, "Total available swap: %ld\n",
619 620 CURRENT_TOTAL_AVAILABLE_SWAP);
620 621
621 622 /*
622 623 * try increasing filesize by 15%
623 624 */
624 625 if (alloc_retry) {
625 626 /*
626 627 * block device doesn't get any bigger
627 628 */
628 629 if (vp->v_type == VBLK) {
629 630 if (cpr_debug & (CPR_DEBUG1 | CPR_DEBUG6))
630 631 prom_printf(
631 632 "Retry statefile on special file\n");
632 633 return (ENOMEM);
633 634 } else {
634 635 rw_enter(&ip->i_contents, RW_READER);
635 636 size = (ip->i_size * SIZE_RATE) / INTEGRAL;
636 637 rw_exit(&ip->i_contents);
637 638 }
638 639 if (cpr_debug & (CPR_DEBUG1 | CPR_DEBUG6))
639 640 prom_printf("Retry statefile size = %lld\n", size);
640 641 } else {
641 642 u_longlong_t cpd_size;
642 643 pgcnt_t npages, nback;
643 644 int ndvram;
644 645
645 646 ndvram = 0;
646 647 (void) callb_execute_class(CB_CL_CPR_FB,
647 648 (int)(uintptr_t)&ndvram);
648 649 if (cpr_debug & (CPR_DEBUG1 | CPR_DEBUG6))
649 650 prom_printf("ndvram size = %d\n", ndvram);
650 651
651 652 /*
652 653 * estimate 1 cpd_t for every (CPR_MAXCONTIG / 2) pages
653 654 */
654 655 npages = cpr_count_kpages(REGULAR_BITMAP, cpr_nobit);
655 656 cpd_size = sizeof (cpd_t) * (npages / (CPR_MAXCONTIG / 2));
656 657 raw_data = cpd_size + cpr_bitmap_size;
657 658 ksize = ndvram + mmu_ptob(npages);
658 659
659 660 est_fmt = "%s estimated size with "
660 661 "%scompression %lld, ksize %lld\n";
661 662 nback = mmu_ptob(STAT->cs_nosw_pages);
662 663 if (CPR->c_flags & C_COMPRESSING) {
663 664 size = ((ksize * COMPRESS_PERCENT) / INTEGRAL) +
664 665 raw_data + ((nback * 10) / UCOMP_RATE);
665 666 CPR_DEBUG(CPR_DEBUG1, est_fmt, str, "", size, ksize);
666 667 } else {
667 668 size = ksize + raw_data + nback;
668 669 CPR_DEBUG(CPR_DEBUG1, est_fmt, str, "no ",
669 670 size, ksize);
670 671 }
671 672 }
672 673
673 674 /*
674 675 * All this is much simpler for a block device
675 676 */
676 677 if (vp->v_type == VBLK) {
677 678 space = cpr_get_devsize(vp->v_rdev);
678 679 if (cpr_debug & (CPR_DEBUG1 | CPR_DEBUG6))
679 680 prom_printf("statefile dev size %lu\n", space);
680 681
681 682 /*
682 683 * Export the estimated filesize info, this value will be
683 684 * compared before dumping out the statefile in the case of
684 685 * no compression.
685 686 */
686 687 STAT->cs_est_statefsz = size;
687 688 if (cpr_debug & (CPR_DEBUG1 | CPR_DEBUG6))
688 689 prom_printf("%s Estimated statefile size %llu, "
689 690 "space %lu\n", str, size, space);
690 691 if (size > space) {
691 692 cpr_err(CE_CONT, "Statefile partition too small.");
692 693 return (ENOMEM);
693 694 }
694 695 return (0);
695 696 } else {
696 697 if (CPR->c_alloc_cnt++ > C_MAX_ALLOC_RETRY) {
697 698 cpr_err(CE_CONT, "Statefile allocation retry failed\n");
698 699 return (ENOMEM);
699 700 }
700 701
701 702 /*
702 703 * Estimate space needed for the state file.
703 704 *
704 705 * State file size in bytes:
705 706 * kernel size + non-cache pte seg +
706 707 * bitmap size + cpr state file headers size
707 708 * (round up to fs->fs_bsize)
708 709 */
709 710 size = blkroundup(ip->i_fs, size);
710 711
711 712 /*
712 713 * Export the estimated filesize info, this value will be
713 714 * compared before dumping out the statefile in the case of
714 715 * no compression.
715 716 */
716 717 STAT->cs_est_statefsz = size;
717 718 error = cpr_grow_statefile(vp, size);
718 719 if (cpr_debug & (CPR_DEBUG1 | CPR_DEBUG6)) {
719 720 rw_enter(&ip->i_contents, RW_READER);
720 721 isize = ip->i_size;
721 722 rw_exit(&ip->i_contents);
722 723 prom_printf("%s Estimated statefile size %lld, "
723 724 "i_size %lld\n", str, size, isize);
724 725 }
725 726
726 727 return (error);
727 728 }
728 729 }
729 730
730 731
731 732 void
732 733 cpr_statef_close(void)
733 734 {
734 735 if (C_VP) {
735 736 if (!cpr_reusable_mode)
736 737 (void) VOP_DUMPCTL(C_VP, DUMP_FREE, NULL, NULL);
737 738 (void) VOP_CLOSE(C_VP, FWRITE, 1, (offset_t)0, CRED(), NULL);
738 739 VN_RELE(C_VP);
739 740 C_VP = 0;
740 741 }
741 742 }
742 743
743 744
744 745 /*
745 746 * open cpr default file and display error
746 747 */
747 748 int
748 749 cpr_open_deffile(int mode, vnode_t **vpp)
749 750 {
750 751 int error;
751 752
752 753 if (error = cpr_open(cpr_default_path, mode, vpp))
753 754 cpr_err(CE_CONT, "cannot open \"%s\", error %d\n",
754 755 cpr_default_path, error);
755 756 return (error);
756 757 }
757 758
758 759
759 760 /*
760 761 * write cdef_t to disk. This contains the original values of prom
761 762 * properties that we modify. We fill in the magic number of the file
762 763 * here as a signal to the booter code that the state file is valid.
763 764 * Be sure the file gets synced, since we may be shutting down the OS.
764 765 */
765 766 int
766 767 cpr_write_deffile(cdef_t *cdef)
767 768 {
768 769 struct vnode *vp;
769 770 char *str;
770 771 int rc;
771 772
772 773 if (rc = cpr_open_deffile(FCREAT|FWRITE, &vp))
773 774 return (rc);
774 775
775 776 if (rc = cpr_rdwr(UIO_WRITE, vp, cdef, sizeof (*cdef)))
776 777 str = "write";
777 778 else if (rc = VOP_FSYNC(vp, FSYNC, CRED(), NULL))
778 779 str = "fsync";
779 780 (void) VOP_CLOSE(vp, FWRITE, 1, (offset_t)0, CRED(), NULL);
780 781 VN_RELE(vp);
781 782
782 783 if (rc) {
783 784 cpr_err(CE_WARN, "%s error %d, file \"%s\"",
784 785 str, rc, cpr_default_path);
785 786 }
786 787 return (rc);
787 788 }
788 789
789 790 /*
790 791 * Clear the magic number in the defaults file. This tells the booter
791 792 * program that the state file is not current and thus prevents
792 793 * any attempt to restore from an obsolete state file.
793 794 */
794 795 void
795 796 cpr_clear_definfo(void)
796 797 {
797 798 struct vnode *vp;
798 799 cmini_t mini;
799 800
800 801 if ((CPR->c_cprboot_magic != CPR_DEFAULT_MAGIC) ||
801 802 cpr_open_deffile(FCREAT|FWRITE, &vp))
802 803 return;
803 804 mini.magic = mini.reusable = 0;
804 805 (void) cpr_rdwr(UIO_WRITE, vp, &mini, sizeof (mini));
805 806 (void) VOP_CLOSE(vp, FWRITE, 1, (offset_t)0, CRED(), NULL);
806 807 VN_RELE(vp);
807 808 }
808 809
809 810 /*
810 811 * If the cpr default file is invalid, then we must not be in reusable mode
811 812 * if it is valid, it tells us our mode
812 813 */
813 814 int
814 815 cpr_get_reusable_mode(void)
815 816 {
816 817 struct vnode *vp;
817 818 cmini_t mini;
818 819 int rc;
819 820
820 821 if (cpr_open(cpr_default_path, FREAD, &vp))
821 822 return (0);
822 823
823 824 rc = cpr_rdwr(UIO_READ, vp, &mini, sizeof (mini));
824 825 (void) VOP_CLOSE(vp, FREAD, 1, (offset_t)0, CRED(), NULL);
825 826 VN_RELE(vp);
826 827 if (rc == 0 && mini.magic == CPR_DEFAULT_MAGIC)
827 828 return (mini.reusable);
828 829
829 830 return (0);
830 831 }
831 832 #endif
832 833
833 834 /*
834 835 * clock/time related routines
835 836 */
836 837 static time_t cpr_time_stamp;
837 838
838 839
839 840 void
840 841 cpr_tod_get(cpr_time_t *ctp)
841 842 {
842 843 timestruc_t ts;
843 844
844 845 mutex_enter(&tod_lock);
845 846 ts = TODOP_GET(tod_ops);
846 847 mutex_exit(&tod_lock);
847 848 ctp->tv_sec = (time32_t)ts.tv_sec;
848 849 ctp->tv_nsec = (int32_t)ts.tv_nsec;
849 850 }
850 851
851 852 void
852 853 cpr_tod_status_set(int tod_flag)
853 854 {
854 855 mutex_enter(&tod_lock);
855 856 tod_status_set(tod_flag);
856 857 mutex_exit(&tod_lock);
857 858 }
858 859
859 860 void
860 861 cpr_save_time(void)
861 862 {
862 863 cpr_time_stamp = gethrestime_sec();
863 864 }
864 865
865 866 /*
866 867 * correct time based on saved time stamp or hardware clock
867 868 */
868 869 void
869 870 cpr_restore_time(void)
870 871 {
871 872 clkset(cpr_time_stamp);
872 873 }
873 874
874 875 #if defined(__sparc)
875 876 /*
876 877 * CPU ONLINE/OFFLINE CODE
877 878 */
878 879 int
879 880 cpr_mp_offline(void)
880 881 {
881 882 cpu_t *cp, *bootcpu;
882 883 int rc = 0;
883 884 int brought_up_boot = 0;
884 885
885 886 /*
886 887 * Do nothing for UP.
887 888 */
888 889 if (ncpus == 1)
889 890 return (0);
890 891
891 892 mutex_enter(&cpu_lock);
892 893
893 894 cpr_save_mp_state();
894 895
895 896 bootcpu = i_cpr_bootcpu();
896 897 if (!CPU_ACTIVE(bootcpu)) {
897 898 if ((rc = cpr_p_online(bootcpu, CPU_CPR_ONLINE))) {
898 899 mutex_exit(&cpu_lock);
899 900 return (rc);
900 901 }
901 902 brought_up_boot = 1;
902 903 }
903 904
904 905 cp = cpu_list;
905 906 do {
906 907 if (cp == bootcpu)
907 908 continue;
908 909 if (cp->cpu_flags & CPU_OFFLINE)
909 910 continue;
910 911 if ((rc = cpr_p_online(cp, CPU_CPR_OFFLINE))) {
911 912 mutex_exit(&cpu_lock);
912 913 return (rc);
913 914 }
914 915 } while ((cp = cp->cpu_next) != cpu_list);
915 916 if (brought_up_boot && (cpr_debug & (CPR_DEBUG1 | CPR_DEBUG6)))
916 917 prom_printf("changed cpu %p to state %d\n",
917 918 (void *)bootcpu, CPU_CPR_ONLINE);
918 919 mutex_exit(&cpu_lock);
919 920
920 921 return (rc);
921 922 }
922 923
923 924 int
924 925 cpr_mp_online(void)
925 926 {
926 927 cpu_t *cp, *bootcpu = CPU;
927 928 int rc = 0;
928 929
929 930 /*
930 931 * Do nothing for UP.
931 932 */
932 933 if (ncpus == 1)
933 934 return (0);
934 935
935 936 /*
936 937 * cpr_save_mp_state() sets CPU_CPR_ONLINE in cpu_cpr_flags
937 938 * to indicate a cpu was online at the time of cpr_suspend();
938 939 * now restart those cpus that were marked as CPU_CPR_ONLINE
939 940 * and actually are offline.
940 941 */
941 942 mutex_enter(&cpu_lock);
942 943 for (cp = bootcpu->cpu_next; cp != bootcpu; cp = cp->cpu_next) {
943 944 /*
944 945 * Clear the CPU_FROZEN flag in all cases.
945 946 */
946 947 cp->cpu_flags &= ~CPU_FROZEN;
947 948
948 949 if (CPU_CPR_IS_OFFLINE(cp))
949 950 continue;
950 951 if (CPU_ACTIVE(cp))
951 952 continue;
952 953 if ((rc = cpr_p_online(cp, CPU_CPR_ONLINE))) {
953 954 mutex_exit(&cpu_lock);
954 955 return (rc);
955 956 }
956 957 }
957 958
958 959 /*
959 960 * turn off the boot cpu if it was offlined
960 961 */
961 962 if (CPU_CPR_IS_OFFLINE(bootcpu)) {
962 963 if ((rc = cpr_p_online(bootcpu, CPU_CPR_OFFLINE))) {
963 964 mutex_exit(&cpu_lock);
964 965 return (rc);
965 966 }
966 967 }
967 968 mutex_exit(&cpu_lock);
968 969 return (0);
969 970 }
970 971
971 972 static void
972 973 cpr_save_mp_state(void)
973 974 {
974 975 cpu_t *cp;
975 976
976 977 ASSERT(MUTEX_HELD(&cpu_lock));
977 978
978 979 cp = cpu_list;
979 980 do {
980 981 cp->cpu_cpr_flags &= ~CPU_CPR_ONLINE;
981 982 if (CPU_ACTIVE(cp))
982 983 CPU_SET_CPR_FLAGS(cp, CPU_CPR_ONLINE);
983 984 } while ((cp = cp->cpu_next) != cpu_list);
984 985 }
985 986
986 987 /*
987 988 * change cpu to online/offline
988 989 */
989 990 static int
990 991 cpr_p_online(cpu_t *cp, int state)
991 992 {
992 993 int rc;
993 994
994 995 ASSERT(MUTEX_HELD(&cpu_lock));
995 996
996 997 switch (state) {
997 998 case CPU_CPR_ONLINE:
998 999 rc = cpu_online(cp);
999 1000 break;
1000 1001 case CPU_CPR_OFFLINE:
1001 1002 rc = cpu_offline(cp, CPU_FORCED);
1002 1003 break;
1003 1004 }
1004 1005 if (rc) {
1005 1006 cpr_err(CE_WARN, "Failed to change processor %d to "
1006 1007 "state %d, (errno %d)", cp->cpu_id, state, rc);
1007 1008 }
1008 1009 return (rc);
1009 1010 }
1010 1011
1011 1012 /*
1012 1013 * Construct the pathname of the state file and return a pointer to
1013 1014 * caller. Read the config file to get the mount point of the
1014 1015 * filesystem and the pathname within fs.
1015 1016 */
1016 1017 char *
1017 1018 cpr_build_statefile_path(void)
1018 1019 {
1019 1020 struct cprconfig *cf = &cprconfig;
1020 1021
1021 1022 if (cpr_get_config())
1022 1023 return (NULL);
1023 1024
1024 1025 switch (cf->cf_type) {
1025 1026 case CFT_UFS:
1026 1027 if (strlen(cf->cf_path) + strlen(cf->cf_fs) >= MAXNAMELEN - 1) {
1027 1028 cpr_err(CE_CONT, "Statefile path is too long.\n");
1028 1029 return (NULL);
1029 1030 }
1030 1031 return (cpr_cprconfig_to_path());
1031 1032 case CFT_ZVOL:
1032 1033 /*FALLTHROUGH*/
1033 1034 case CFT_SPEC:
1034 1035 return (cf->cf_devfs);
1035 1036 default:
1036 1037 cpr_err(CE_PANIC, "invalid statefile type");
1037 1038 /*NOTREACHED*/
1038 1039 return (NULL);
1039 1040 }
1040 1041 }
1041 1042
1042 1043 int
1043 1044 cpr_statefile_is_spec(void)
1044 1045 {
1045 1046 if (cpr_get_config())
1046 1047 return (0);
1047 1048 return (cprconfig.cf_type == CFT_SPEC);
1048 1049 }
1049 1050
1050 1051 char *
1051 1052 cpr_get_statefile_prom_path(void)
1052 1053 {
1053 1054 struct cprconfig *cf = &cprconfig;
1054 1055
1055 1056 ASSERT(cprconfig_loaded);
1056 1057 ASSERT(cf->cf_magic == CPR_CONFIG_MAGIC);
1057 1058 ASSERT(cf->cf_type == CFT_SPEC || cf->cf_type == CFT_ZVOL);
1058 1059 return (cf->cf_dev_prom);
1059 1060 }
1060 1061
1061 1062
1062 1063 /*
1063 1064 * XXX The following routines need to be in the vfs source code.
1064 1065 */
1065 1066
1066 1067 int
1067 1068 cpr_is_ufs(struct vfs *vfsp)
1068 1069 {
1069 1070 char *fsname;
1070 1071
1071 1072 fsname = vfssw[vfsp->vfs_fstype].vsw_name;
1072 1073 return (strcmp(fsname, "ufs") == 0);
1073 1074 }
1074 1075
1075 1076 int
1076 1077 cpr_is_zfs(struct vfs *vfsp)
1077 1078 {
↓ open down ↓ |
1044 lines elided |
↑ open up ↑ |
1078 1079 char *fsname;
1079 1080
1080 1081 fsname = vfssw[vfsp->vfs_fstype].vsw_name;
1081 1082 return (strcmp(fsname, "zfs") == 0);
1082 1083 }
1083 1084
1084 1085 /*
1085 1086 * This is a list of file systems that are allowed to be writeable when a
1086 1087 * reusable statefile checkpoint is taken. They must not have any state that
1087 1088 * cannot be restored to consistency by simply rebooting using the checkpoint.
1088 - * (In contrast to ufs, cachefs and pcfs which have disk state that could get
1089 + * (In contrast to ufs and pcfs which have disk state that could get
1089 1090 * out of sync with the in-kernel data).
1090 1091 */
1091 1092 int
1092 1093 cpr_reusable_mount_check(void)
1093 1094 {
1094 1095 struct vfs *vfsp;
1095 1096 char *fsname;
1096 1097 char **cpp;
1097 1098 static char *cpr_writeok_fss[] = {
1098 1099 "autofs", "devfs", "fd", "lofs", "mntfs", "namefs", "nfs",
1099 1100 "proc", "tmpfs", "ctfs", "objfs", "dev", NULL
1100 1101 };
1101 1102
1102 1103 vfs_list_read_lock();
1103 1104 vfsp = rootvfs;
1104 1105 do {
1105 1106 if (vfsp->vfs_flag & VFS_RDONLY) {
1106 1107 vfsp = vfsp->vfs_next;
1107 1108 continue;
1108 1109 }
1109 1110 fsname = vfssw[vfsp->vfs_fstype].vsw_name;
1110 1111 for (cpp = cpr_writeok_fss; *cpp; cpp++) {
1111 1112 if (strcmp(fsname, *cpp) == 0)
1112 1113 break;
1113 1114 }
1114 1115 /*
1115 1116 * if the inner loop reached the NULL terminator,
1116 1117 * the current fs-type does not match any OK-type
1117 1118 */
1118 1119 if (*cpp == NULL) {
1119 1120 cpr_err(CE_CONT, "a filesystem of type %s is "
1120 1121 "mounted read/write.\nReusable statefile requires "
1121 1122 "no writeable filesystem of this type be mounted\n",
1122 1123 fsname);
1123 1124 vfs_list_unlock();
1124 1125 return (EINVAL);
1125 1126 }
1126 1127 vfsp = vfsp->vfs_next;
1127 1128 } while (vfsp != rootvfs);
1128 1129 vfs_list_unlock();
1129 1130 return (0);
1130 1131 }
1131 1132
1132 1133 /*
1133 1134 * return statefile offset in DEV_BSIZE units
1134 1135 */
1135 1136 int
1136 1137 cpr_statefile_offset(void)
1137 1138 {
1138 1139 return (cprconfig.cf_type != CFT_UFS ? btod(CPR_SPEC_OFFSET) : 0);
1139 1140 }
1140 1141
1141 1142 /*
1142 1143 * Force a fresh read of the cprinfo per uadmin 3 call
1143 1144 */
1144 1145 void
1145 1146 cpr_forget_cprconfig(void)
1146 1147 {
1147 1148 cprconfig_loaded = 0;
1148 1149 }
1149 1150 #endif
↓ open down ↓ |
51 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX