1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 1983, 2010, Oracle and/or its affiliates. All rights reserved. 23 */ 24 /* 25 * Copyright 2012 Nexenta Systems, Inc. All rights reserved. 26 */ 27 28 #include <stdio.h> 29 #include <stdlib.h> 30 #include <stdarg.h> 31 #include <unistd.h> 32 #include <fcntl.h> 33 #include <errno.h> 34 #include <string.h> 35 #include <deflt.h> 36 #include <time.h> 37 #include <syslog.h> 38 #include <stropts.h> 39 #include <pthread.h> 40 #include <limits.h> 41 #include <atomic.h> 42 #include <libnvpair.h> 43 #include <libintl.h> 44 #include <sys/mem.h> 45 #include <sys/statvfs.h> 46 #include <sys/dumphdr.h> 47 #include <sys/dumpadm.h> 48 #include <sys/compress.h> 49 #include <sys/panic.h> 50 #include <sys/sysmacros.h> 51 #include <sys/stat.h> 52 #include <sys/resource.h> 53 #include <bzip2/bzlib.h> 54 #include <sys/fm/util.h> 55 #include <fm/libfmevent.h> 56 #include <sys/int_fmtio.h> 57 58 59 /* fread/fwrite buffer size */ 60 #define FBUFSIZE (1ULL << 20) 61 62 /* minimum size for output buffering */ 63 #define MINCOREBLKSIZE (1ULL << 17) 64 65 /* create this file if metrics collection is enabled in the kernel */ 66 #define METRICSFILE "METRICS.csv" 67 68 static char progname[9] = "savecore"; 69 static char *savedir; /* savecore directory */ 70 static char *dumpfile; /* source of raw crash dump */ 71 static long bounds = -1; /* numeric suffix */ 72 static long pagesize; /* dump pagesize */ 73 static int dumpfd = -1; /* dumpfile descriptor */ 74 static dumphdr_t corehdr, dumphdr; /* initial and terminal dumphdrs */ 75 static boolean_t dump_incomplete; /* dumphdr indicates incomplete */ 76 static boolean_t fm_panic; /* dump is the result of fm_panic */ 77 static offset_t endoff; /* offset of end-of-dump header */ 78 static int verbose; /* chatty mode */ 79 static int disregard_valid_flag; /* disregard valid flag */ 80 static int livedump; /* dump the current running system */ 81 static int interactive; /* user invoked; no syslog */ 82 static int csave; /* save dump compressed */ 83 static int filemode; /* processing file, not dump device */ 84 static int percent_done; /* progress indicator */ 85 static hrtime_t startts; /* timestamp at start */ 86 static volatile uint64_t saved; /* count of pages written */ 87 static volatile uint64_t zpages; /* count of zero pages not written */ 88 static dumpdatahdr_t datahdr; /* compression info */ 89 static long coreblksize; /* preferred write size (st_blksize) */ 90 static int cflag; /* run as savecore -c */ 91 static int mflag; /* run as savecore -m */ 92 93 /* 94 * Payload information for the events we raise. These are used 95 * in raise_event to determine what payload to include. 96 */ 97 #define SC_PAYLOAD_SAVEDIR 0x0001 /* Include savedir in event */ 98 #define SC_PAYLOAD_INSTANCE 0x0002 /* Include bounds instance number */ 99 #define SC_PAYLOAD_IMAGEUUID 0x0004 /* Include dump OS instance uuid */ 100 #define SC_PAYLOAD_CRASHTIME 0x0008 /* Include epoch crashtime */ 101 #define SC_PAYLOAD_PANICSTR 0x0010 /* Include panic string */ 102 #define SC_PAYLOAD_PANICSTACK 0x0020 /* Include panic string */ 103 #define SC_PAYLOAD_FAILREASON 0x0040 /* Include failure reason */ 104 #define SC_PAYLOAD_DUMPCOMPLETE 0x0080 /* Include completeness indicator */ 105 #define SC_PAYLOAD_ISCOMPRESSED 0x0100 /* Dump is in vmdump.N form */ 106 #define SC_PAYLOAD_DUMPADM_EN 0x0200 /* Is dumpadm enabled or not? */ 107 #define SC_PAYLOAD_FM_PANIC 0x0400 /* Panic initiated by FMA */ 108 #define SC_PAYLOAD_JUSTCHECKING 0x0800 /* Run with -c flag? */ 109 110 enum sc_event_type { 111 SC_EVENT_DUMP_PENDING, 112 SC_EVENT_SAVECORE_FAILURE, 113 SC_EVENT_DUMP_AVAILABLE 114 }; 115 116 /* 117 * Common payload 118 */ 119 #define _SC_PAYLOAD_CMN \ 120 SC_PAYLOAD_IMAGEUUID | \ 121 SC_PAYLOAD_CRASHTIME | \ 122 SC_PAYLOAD_PANICSTR | \ 123 SC_PAYLOAD_PANICSTACK | \ 124 SC_PAYLOAD_DUMPCOMPLETE | \ 125 SC_PAYLOAD_FM_PANIC | \ 126 SC_PAYLOAD_SAVEDIR 127 128 static const struct { 129 const char *sce_subclass; 130 uint32_t sce_payload; 131 } sc_event[] = { 132 /* 133 * SC_EVENT_DUMP_PENDING 134 */ 135 { 136 "dump_pending_on_device", 137 _SC_PAYLOAD_CMN | SC_PAYLOAD_DUMPADM_EN | 138 SC_PAYLOAD_JUSTCHECKING 139 }, 140 141 /* 142 * SC_EVENT_SAVECORE_FAILURE 143 */ 144 { 145 "savecore_failure", 146 _SC_PAYLOAD_CMN | SC_PAYLOAD_INSTANCE | SC_PAYLOAD_FAILREASON 147 }, 148 149 /* 150 * SC_EVENT_DUMP_AVAILABLE 151 */ 152 { 153 "dump_available", 154 _SC_PAYLOAD_CMN | SC_PAYLOAD_INSTANCE | SC_PAYLOAD_ISCOMPRESSED 155 }, 156 }; 157 158 static void raise_event(enum sc_event_type, char *); 159 160 static void 161 usage(void) 162 { 163 (void) fprintf(stderr, 164 "usage: %s [-Lvd] [-f dumpfile] [dirname]\n", progname); 165 exit(1); 166 } 167 168 #define SC_SL_NONE 0x0001 /* no syslog */ 169 #define SC_SL_ERR 0x0002 /* syslog if !interactive, LOG_ERR */ 170 #define SC_SL_WARN 0x0004 /* syslog if !interactive, LOG_WARNING */ 171 #define SC_IF_VERBOSE 0x0008 /* message only if -v */ 172 #define SC_IF_ISATTY 0x0010 /* message only if interactive */ 173 #define SC_EXIT_OK 0x0020 /* exit(0) */ 174 #define SC_EXIT_ERR 0x0040 /* exit(1) */ 175 #define SC_EXIT_PEND 0x0080 /* exit(2) */ 176 #define SC_EXIT_FM 0x0100 /* exit(3) */ 177 178 #define _SC_ALLEXIT (SC_EXIT_OK | SC_EXIT_ERR | SC_EXIT_PEND | SC_EXIT_FM) 179 180 static void 181 logprint(uint32_t flags, char *message, ...) 182 { 183 va_list args; 184 char buf[1024]; 185 int do_always = ((flags & (SC_IF_VERBOSE | SC_IF_ISATTY)) == 0); 186 int do_ifverb = (flags & SC_IF_VERBOSE) && verbose; 187 int do_ifisatty = (flags & SC_IF_ISATTY) && interactive; 188 int code; 189 static int logprint_raised = 0; 190 191 if (do_always || do_ifverb || do_ifisatty) { 192 va_start(args, message); 193 /*LINTED: E_SEC_PRINTF_VAR_FMT*/ 194 (void) vsnprintf(buf, sizeof (buf), message, args); 195 (void) fprintf(stderr, "%s: %s\n", progname, buf); 196 if (!interactive) { 197 switch (flags & (SC_SL_NONE | SC_SL_ERR | SC_SL_WARN)) { 198 case SC_SL_ERR: 199 /*LINTED: E_SEC_PRINTF_VAR_FMT*/ 200 syslog(LOG_ERR, buf); 201 break; 202 203 case SC_SL_WARN: 204 /*LINTED: E_SEC_PRINTF_VAR_FMT*/ 205 syslog(LOG_WARNING, buf); 206 break; 207 208 default: 209 break; 210 } 211 } 212 va_end(args); 213 } 214 215 switch (flags & _SC_ALLEXIT) { 216 case 0: 217 return; 218 219 case SC_EXIT_OK: 220 code = 0; 221 break; 222 223 case SC_EXIT_PEND: 224 /* 225 * Raise an ireport saying why we are exiting. Do not 226 * raise if run as savecore -m. If something in the 227 * raise_event codepath calls logprint avoid recursion. 228 */ 229 if (!mflag && logprint_raised++ == 0) 230 raise_event(SC_EVENT_SAVECORE_FAILURE, buf); 231 code = 2; 232 break; 233 234 case SC_EXIT_FM: 235 code = 3; 236 break; 237 238 case SC_EXIT_ERR: 239 default: 240 if (!mflag && logprint_raised++ == 0) 241 raise_event(SC_EVENT_SAVECORE_FAILURE, buf); 242 code = 1; 243 break; 244 } 245 246 exit(code); 247 } 248 249 /* 250 * System call / libc wrappers that exit on error. 251 */ 252 static int 253 Open(const char *name, int oflags, mode_t mode) 254 { 255 int fd; 256 257 if ((fd = open64(name, oflags, mode)) == -1) 258 logprint(SC_SL_ERR | SC_EXIT_ERR, "open(\"%s\"): %s", 259 name, strerror(errno)); 260 return (fd); 261 } 262 263 static void 264 Fread(void *buf, size_t size, FILE *f) 265 { 266 if (fread(buf, size, 1, f) != 1) 267 logprint(SC_SL_ERR | SC_EXIT_ERR, "fread: ferror %d feof %d", 268 ferror(f), feof(f)); 269 } 270 271 static void 272 Fwrite(void *buf, size_t size, FILE *f) 273 { 274 if (fwrite(buf, size, 1, f) != 1) 275 logprint(SC_SL_ERR | SC_EXIT_ERR, "fwrite: %s", 276 strerror(errno)); 277 } 278 279 static void 280 Fseek(offset_t off, FILE *f) 281 { 282 if (fseeko64(f, off, SEEK_SET) != 0) 283 logprint(SC_SL_ERR | SC_EXIT_ERR, "fseeko64: %s", 284 strerror(errno)); 285 } 286 287 typedef struct stat64 Stat_t; 288 289 static void 290 Fstat(int fd, Stat_t *sb, const char *fname) 291 { 292 if (fstat64(fd, sb) != 0) 293 logprint(SC_SL_ERR | SC_EXIT_ERR, "fstat(\"%s\"): %s", fname, 294 strerror(errno)); 295 } 296 297 static void 298 Stat(const char *fname, Stat_t *sb) 299 { 300 if (stat64(fname, sb) != 0) 301 logprint(SC_SL_ERR | SC_EXIT_ERR, "stat(\"%s\"): %s", fname, 302 strerror(errno)); 303 } 304 305 static void 306 Pread(int fd, void *buf, size_t size, offset_t off) 307 { 308 ssize_t sz = pread64(fd, buf, size, off); 309 310 if (sz < 0) 311 logprint(SC_SL_ERR | SC_EXIT_ERR, 312 "pread: %s", strerror(errno)); 313 else if (sz != size) 314 logprint(SC_SL_ERR | SC_EXIT_ERR, 315 "pread: size %ld != %ld", sz, size); 316 } 317 318 static void 319 Pwrite(int fd, void *buf, size_t size, off64_t off) 320 { 321 if (pwrite64(fd, buf, size, off) != size) 322 logprint(SC_SL_ERR | SC_EXIT_ERR, "pwrite: %s", 323 strerror(errno)); 324 } 325 326 static void * 327 Zalloc(size_t size) 328 { 329 void *buf; 330 331 if ((buf = calloc(size, 1)) == NULL) 332 logprint(SC_SL_ERR | SC_EXIT_ERR, "calloc: %s", 333 strerror(errno)); 334 return (buf); 335 } 336 337 static long 338 read_number_from_file(const char *filename, long default_value) 339 { 340 long file_value = -1; 341 FILE *fp; 342 343 if ((fp = fopen(filename, "r")) != NULL) { 344 (void) fscanf(fp, "%ld", &file_value); 345 (void) fclose(fp); 346 } 347 return (file_value < 0 ? default_value : file_value); 348 } 349 350 static void 351 read_dumphdr(void) 352 { 353 if (filemode) 354 dumpfd = Open(dumpfile, O_RDONLY, 0644); 355 else 356 dumpfd = Open(dumpfile, O_RDWR | O_DSYNC, 0644); 357 endoff = llseek(dumpfd, -DUMP_OFFSET, SEEK_END) & -DUMP_OFFSET; 358 Pread(dumpfd, &dumphdr, sizeof (dumphdr), endoff); 359 Pread(dumpfd, &datahdr, sizeof (datahdr), endoff + sizeof (dumphdr)); 360 361 pagesize = dumphdr.dump_pagesize; 362 363 if (dumphdr.dump_magic != DUMP_MAGIC) 364 logprint(SC_SL_NONE | SC_EXIT_PEND, "bad magic number %x", 365 dumphdr.dump_magic); 366 367 if ((dumphdr.dump_flags & DF_VALID) == 0 && !disregard_valid_flag) 368 logprint(SC_SL_NONE | SC_IF_VERBOSE | SC_EXIT_OK, 369 "dump already processed"); 370 371 if (dumphdr.dump_version != DUMP_VERSION) 372 logprint(SC_SL_NONE | SC_IF_VERBOSE | SC_EXIT_PEND, 373 "dump version (%d) != %s version (%d)", 374 dumphdr.dump_version, progname, DUMP_VERSION); 375 376 if (dumphdr.dump_wordsize != DUMP_WORDSIZE) 377 logprint(SC_SL_NONE | SC_EXIT_PEND, 378 "dump is from %u-bit kernel - cannot save on %u-bit kernel", 379 dumphdr.dump_wordsize, DUMP_WORDSIZE); 380 381 if (datahdr.dump_datahdr_magic == DUMP_DATAHDR_MAGIC) { 382 if (datahdr.dump_datahdr_version != DUMP_DATAHDR_VERSION) 383 logprint(SC_SL_NONE | SC_IF_VERBOSE | SC_EXIT_PEND, 384 "dump data version (%d) != %s data version (%d)", 385 datahdr.dump_datahdr_version, progname, 386 DUMP_DATAHDR_VERSION); 387 } else { 388 (void) memset(&datahdr, 0, sizeof (datahdr)); 389 datahdr.dump_maxcsize = pagesize; 390 } 391 392 /* 393 * Read the initial header, clear the valid bits, and compare headers. 394 * The main header may have been overwritten by swapping if we're 395 * using a swap partition as the dump device, in which case we bail. 396 */ 397 Pread(dumpfd, &corehdr, sizeof (dumphdr_t), dumphdr.dump_start); 398 399 corehdr.dump_flags &= ~DF_VALID; 400 dumphdr.dump_flags &= ~DF_VALID; 401 402 if (memcmp(&corehdr, &dumphdr, sizeof (dumphdr_t)) != 0) { 403 /* 404 * Clear valid bit so we don't complain on every invocation. 405 */ 406 if (!filemode) 407 Pwrite(dumpfd, &dumphdr, sizeof (dumphdr), endoff); 408 logprint(SC_SL_ERR | SC_EXIT_ERR, 409 "initial dump header corrupt"); 410 } 411 } 412 413 static void 414 check_space(int csave) 415 { 416 struct statvfs fsb; 417 int64_t spacefree, dumpsize, minfree, datasize; 418 419 if (statvfs(".", &fsb) < 0) 420 logprint(SC_SL_ERR | SC_EXIT_ERR, "statvfs: %s", 421 strerror(errno)); 422 423 dumpsize = dumphdr.dump_data - dumphdr.dump_start; 424 datasize = dumphdr.dump_npages * pagesize; 425 if (!csave) 426 dumpsize += datasize; 427 else 428 dumpsize += datahdr.dump_data_csize; 429 430 spacefree = (int64_t)fsb.f_bavail * fsb.f_frsize; 431 minfree = 1024LL * read_number_from_file("minfree", 1024); 432 if (spacefree < minfree + dumpsize) { 433 logprint(SC_SL_ERR | SC_EXIT_ERR, 434 "not enough space in %s (%lld MB avail, %lld MB needed)", 435 savedir, spacefree >> 20, (minfree + dumpsize) >> 20); 436 } 437 } 438 439 static void 440 build_dump_map(int corefd, const pfn_t *pfn_table) 441 { 442 long i; 443 static long misses = 0; 444 size_t dump_mapsize = (corehdr.dump_hashmask + 1) * sizeof (dump_map_t); 445 mem_vtop_t vtop; 446 dump_map_t *dmp = Zalloc(dump_mapsize); 447 char *inbuf = Zalloc(FBUFSIZE); 448 FILE *in = fdopen(dup(dumpfd), "rb"); 449 450 (void) setvbuf(in, inbuf, _IOFBF, FBUFSIZE); 451 Fseek(dumphdr.dump_map, in); 452 453 corehdr.dump_data = corehdr.dump_map + roundup(dump_mapsize, pagesize); 454 455 for (i = 0; i < corehdr.dump_nvtop; i++) { 456 long first = 0; 457 long last = corehdr.dump_npages - 1; 458 long middle; 459 pfn_t pfn; 460 uintptr_t h; 461 462 Fread(&vtop, sizeof (mem_vtop_t), in); 463 while (last >= first) { 464 middle = (first + last) / 2; 465 pfn = pfn_table[middle]; 466 if (pfn == vtop.m_pfn) 467 break; 468 if (pfn < vtop.m_pfn) 469 first = middle + 1; 470 else 471 last = middle - 1; 472 } 473 if (pfn != vtop.m_pfn) { 474 if (++misses <= 10) 475 (void) fprintf(stderr, 476 "pfn %ld not found for as=%p, va=%p\n", 477 vtop.m_pfn, (void *)vtop.m_as, vtop.m_va); 478 continue; 479 } 480 481 dmp[i].dm_as = vtop.m_as; 482 dmp[i].dm_va = (uintptr_t)vtop.m_va; 483 dmp[i].dm_data = corehdr.dump_data + 484 ((uint64_t)middle << corehdr.dump_pageshift); 485 486 h = DUMP_HASH(&corehdr, dmp[i].dm_as, dmp[i].dm_va); 487 dmp[i].dm_next = dmp[h].dm_first; 488 dmp[h].dm_first = corehdr.dump_map + i * sizeof (dump_map_t); 489 } 490 491 Pwrite(corefd, dmp, dump_mapsize, corehdr.dump_map); 492 free(dmp); 493 (void) fclose(in); 494 free(inbuf); 495 } 496 497 /* 498 * Copy whole sections of the dump device to the file. 499 */ 500 static void 501 Copy(offset_t dumpoff, len_t nb, offset_t *offp, int fd, char *buf, 502 size_t sz) 503 { 504 size_t nr; 505 offset_t off = *offp; 506 507 while (nb > 0) { 508 nr = sz < nb ? sz : (size_t)nb; 509 Pread(dumpfd, buf, nr, dumpoff); 510 Pwrite(fd, buf, nr, off); 511 off += nr; 512 dumpoff += nr; 513 nb -= nr; 514 } 515 *offp = off; 516 } 517 518 /* 519 * Copy pages when the dump data header is missing. 520 * This supports older kernels with latest savecore. 521 */ 522 static void 523 CopyPages(offset_t *offp, int fd, char *buf, size_t sz) 524 { 525 uint32_t csize; 526 FILE *in = fdopen(dup(dumpfd), "rb"); 527 FILE *out = fdopen(dup(fd), "wb"); 528 char *cbuf = Zalloc(pagesize); 529 char *outbuf = Zalloc(FBUFSIZE); 530 pgcnt_t np = dumphdr.dump_npages; 531 532 (void) setvbuf(out, outbuf, _IOFBF, FBUFSIZE); 533 (void) setvbuf(in, buf, _IOFBF, sz); 534 Fseek(dumphdr.dump_data, in); 535 536 Fseek(*offp, out); 537 while (np > 0) { 538 Fread(&csize, sizeof (uint32_t), in); 539 Fwrite(&csize, sizeof (uint32_t), out); 540 *offp += sizeof (uint32_t); 541 if (csize > pagesize || csize == 0) { 542 logprint(SC_SL_ERR, 543 "CopyPages: page %lu csize %d (0x%x) pagesize %d", 544 dumphdr.dump_npages - np, csize, csize, 545 pagesize); 546 break; 547 } 548 Fread(cbuf, csize, in); 549 Fwrite(cbuf, csize, out); 550 *offp += csize; 551 np--; 552 } 553 (void) fclose(in); 554 (void) fclose(out); 555 free(outbuf); 556 free(buf); 557 } 558 559 /* 560 * Concatenate dump contents into a new file. 561 * Update corehdr with new offsets. 562 */ 563 static void 564 copy_crashfile(const char *corefile) 565 { 566 int corefd = Open(corefile, O_WRONLY | O_CREAT | O_TRUNC, 0644); 567 size_t bufsz = FBUFSIZE; 568 char *inbuf = Zalloc(bufsz); 569 offset_t coreoff; 570 size_t nb; 571 572 logprint(SC_SL_ERR | SC_IF_VERBOSE, 573 "Copying %s to %s/%s\n", dumpfile, savedir, corefile); 574 575 /* 576 * This dump file is still compressed 577 */ 578 corehdr.dump_flags |= DF_COMPRESSED | DF_VALID; 579 580 /* 581 * Leave room for corehdr, it is updated and written last 582 */ 583 corehdr.dump_start = 0; 584 coreoff = sizeof (corehdr); 585 586 /* 587 * Read in the compressed symbol table, copy it to corefile. 588 */ 589 coreoff = roundup(coreoff, pagesize); 590 corehdr.dump_ksyms = coreoff; 591 Copy(dumphdr.dump_ksyms, dumphdr.dump_ksyms_csize, &coreoff, corefd, 592 inbuf, bufsz); 593 594 /* 595 * Save the pfn table. 596 */ 597 coreoff = roundup(coreoff, pagesize); 598 corehdr.dump_pfn = coreoff; 599 Copy(dumphdr.dump_pfn, dumphdr.dump_npages * sizeof (pfn_t), &coreoff, 600 corefd, inbuf, bufsz); 601 602 /* 603 * Save the dump map. 604 */ 605 coreoff = roundup(coreoff, pagesize); 606 corehdr.dump_map = coreoff; 607 Copy(dumphdr.dump_map, dumphdr.dump_nvtop * sizeof (mem_vtop_t), 608 &coreoff, corefd, inbuf, bufsz); 609 610 /* 611 * Save the data pages. 612 */ 613 coreoff = roundup(coreoff, pagesize); 614 corehdr.dump_data = coreoff; 615 if (datahdr.dump_data_csize != 0) 616 Copy(dumphdr.dump_data, datahdr.dump_data_csize, &coreoff, 617 corefd, inbuf, bufsz); 618 else 619 CopyPages(&coreoff, corefd, inbuf, bufsz); 620 621 /* 622 * Now write the modified dump header to front and end of the copy. 623 * Make it look like a valid dump device. 624 * 625 * From dumphdr.h: Two headers are written out: one at the 626 * beginning of the dump, and the other at the very end of the 627 * dump device. The terminal header is at a known location 628 * (end of device) so we can always find it. 629 * 630 * Pad with zeros to each DUMP_OFFSET boundary. 631 */ 632 (void) memset(inbuf, 0, DUMP_OFFSET); 633 634 nb = DUMP_OFFSET - (coreoff & (DUMP_OFFSET - 1)); 635 if (nb > 0) { 636 Pwrite(corefd, inbuf, nb, coreoff); 637 coreoff += nb; 638 } 639 640 Pwrite(corefd, &corehdr, sizeof (corehdr), coreoff); 641 coreoff += sizeof (corehdr); 642 643 Pwrite(corefd, &datahdr, sizeof (datahdr), coreoff); 644 coreoff += sizeof (datahdr); 645 646 nb = DUMP_OFFSET - (coreoff & (DUMP_OFFSET - 1)); 647 if (nb > 0) { 648 Pwrite(corefd, inbuf, nb, coreoff); 649 } 650 651 free(inbuf); 652 Pwrite(corefd, &corehdr, sizeof (corehdr), corehdr.dump_start); 653 654 /* 655 * Write out the modified dump header to the dump device. 656 * The dump device has been processed, so DF_VALID is clear. 657 */ 658 if (!filemode) 659 Pwrite(dumpfd, &dumphdr, sizeof (dumphdr), endoff); 660 661 (void) close(corefd); 662 } 663 664 /* 665 * compressed streams 666 */ 667 typedef struct blockhdr blockhdr_t; 668 typedef struct block block_t; 669 670 struct blockhdr { 671 block_t *head; 672 block_t *tail; 673 }; 674 675 struct block { 676 block_t *next; 677 char *block; 678 int size; 679 }; 680 681 typedef enum streamstate { 682 STREAMSTART, 683 STREAMPAGES 684 } streamstate_t; 685 686 typedef struct stream { 687 streamstate_t state; 688 int init; 689 int tag; 690 int bound; 691 int nout; 692 char *blkbuf; 693 blockhdr_t blocks; 694 pgcnt_t pagenum; 695 pgcnt_t curpage; 696 pgcnt_t npages; 697 pgcnt_t done; 698 bz_stream strm; 699 dumpcsize_t sc; 700 dumpstreamhdr_t sh; 701 } stream_t; 702 703 static stream_t *streams; 704 static stream_t *endstreams; 705 706 const int cs = sizeof (dumpcsize_t); 707 708 typedef struct tinfo { 709 pthread_t tid; 710 int corefd; 711 } tinfo_t; 712 713 static int threads_stop; 714 static int threads_active; 715 static tinfo_t *tinfo; 716 static tinfo_t *endtinfo; 717 718 static pthread_mutex_t lock = PTHREAD_MUTEX_INITIALIZER; 719 static pthread_cond_t cvfree = PTHREAD_COND_INITIALIZER; 720 static pthread_cond_t cvwork = PTHREAD_COND_INITIALIZER; 721 static pthread_cond_t cvbarrier = PTHREAD_COND_INITIALIZER; 722 723 static blockhdr_t freeblocks; 724 725 static void 726 enqt(blockhdr_t *h, block_t *b) 727 { 728 b->next = NULL; 729 if (h->tail == NULL) 730 h->head = b; 731 else 732 h->tail->next = b; 733 h->tail = b; 734 } 735 736 static block_t * 737 deqh(blockhdr_t *h) 738 { 739 block_t *b = h->head; 740 741 if (b != NULL) { 742 h->head = b->next; 743 if (h->head == NULL) 744 h->tail = NULL; 745 } 746 return (b); 747 } 748 749 static void *runstreams(void *arg); 750 751 static void 752 initstreams(int corefd, int nstreams, int maxcsize) 753 { 754 int nthreads; 755 int nblocks; 756 int i; 757 block_t *b; 758 tinfo_t *t; 759 760 nthreads = sysconf(_SC_NPROCESSORS_ONLN); 761 if (nstreams < nthreads) 762 nthreads = nstreams; 763 if (nthreads < 1) 764 nthreads = 1; 765 nblocks = nthreads * 2; 766 767 tinfo = Zalloc(nthreads * sizeof (tinfo_t)); 768 endtinfo = &tinfo[nthreads]; 769 770 /* init streams */ 771 streams = Zalloc(nstreams * sizeof (stream_t)); 772 endstreams = &streams[nstreams]; 773 774 /* init stream block buffers */ 775 for (i = 0; i < nblocks; i++) { 776 b = Zalloc(sizeof (block_t)); 777 b->block = Zalloc(maxcsize); 778 enqt(&freeblocks, b); 779 } 780 781 /* init worker threads */ 782 (void) pthread_mutex_lock(&lock); 783 threads_active = 1; 784 threads_stop = 0; 785 for (t = tinfo; t != endtinfo; t++) { 786 t->corefd = dup(corefd); 787 if (t->corefd < 0) { 788 nthreads = t - tinfo; 789 endtinfo = t; 790 break; 791 } 792 if (pthread_create(&t->tid, NULL, runstreams, t) != 0) 793 logprint(SC_SL_ERR | SC_EXIT_ERR, "pthread_create: %s", 794 strerror(errno)); 795 } 796 (void) pthread_mutex_unlock(&lock); 797 } 798 799 static void 800 sbarrier() 801 { 802 stream_t *s; 803 804 (void) pthread_mutex_lock(&lock); 805 for (s = streams; s != endstreams; s++) { 806 while (s->bound || s->blocks.head != NULL) 807 (void) pthread_cond_wait(&cvbarrier, &lock); 808 } 809 (void) pthread_mutex_unlock(&lock); 810 } 811 812 static void 813 stopstreams() 814 { 815 tinfo_t *t; 816 817 if (threads_active) { 818 sbarrier(); 819 (void) pthread_mutex_lock(&lock); 820 threads_stop = 1; 821 (void) pthread_cond_signal(&cvwork); 822 (void) pthread_mutex_unlock(&lock); 823 for (t = tinfo; t != endtinfo; t++) 824 (void) pthread_join(t->tid, NULL); 825 free(tinfo); 826 tinfo = NULL; 827 threads_active = 0; 828 } 829 } 830 831 static block_t * 832 getfreeblock() 833 { 834 block_t *b; 835 836 (void) pthread_mutex_lock(&lock); 837 while ((b = deqh(&freeblocks)) == NULL) 838 (void) pthread_cond_wait(&cvfree, &lock); 839 (void) pthread_mutex_unlock(&lock); 840 return (b); 841 } 842 843 /* data page offset from page number */ 844 #define BTOP(b) ((b) >> dumphdr.dump_pageshift) 845 #define PTOB(p) ((p) << dumphdr.dump_pageshift) 846 #define DATAOFF(p) (corehdr.dump_data + PTOB(p)) 847 848 /* check for coreblksize boundary */ 849 static int 850 isblkbnd(pgcnt_t pgnum) 851 { 852 return (P2PHASE(DATAOFF(pgnum), coreblksize) == 0); 853 } 854 855 static int 856 iszpage(char *buf) 857 { 858 size_t sz; 859 uint64_t *pl; 860 861 /*LINTED:E_BAD_PTR_CAST_ALIGN*/ 862 pl = (uint64_t *)(buf); 863 for (sz = 0; sz < pagesize; sz += sizeof (*pl)) 864 if (*pl++ != 0) 865 return (0); 866 return (1); 867 } 868 869 volatile uint_t *hist; 870 871 /* write pages to the core file */ 872 static void 873 putpage(int corefd, char *buf, pgcnt_t pgnum, pgcnt_t np) 874 { 875 atomic_inc_uint(&hist[np]); 876 if (np > 0) 877 Pwrite(corefd, buf, PTOB(np), DATAOFF(pgnum)); 878 } 879 880 /* 881 * Process one lzjb block. 882 * No object (stream header or page) will be split over a block boundary. 883 */ 884 static void 885 lzjbblock(int corefd, stream_t *s, char *block, size_t blocksz) 886 { 887 int in = 0; 888 int csize; 889 int doflush; 890 char *out; 891 size_t dsize; 892 dumpcsize_t sc; 893 dumpstreamhdr_t sh; 894 895 if (!s->init) { 896 s->init = 1; 897 if (s->blkbuf == NULL) 898 s->blkbuf = Zalloc(coreblksize); 899 s->state = STREAMSTART; 900 } 901 while (in < blocksz) { 902 switch (s->state) { 903 case STREAMSTART: 904 (void) memcpy(&sh, block + in, sizeof (sh)); 905 in += sizeof (sh); 906 if (strcmp(DUMP_STREAM_MAGIC, sh.stream_magic) != 0) 907 logprint(SC_SL_ERR | SC_EXIT_ERR, 908 "LZJB STREAMSTART: bad stream header"); 909 if (sh.stream_npages > datahdr.dump_maxrange) 910 logprint(SC_SL_ERR | SC_EXIT_ERR, 911 "LZJB STREAMSTART: bad range: %d > %d", 912 sh.stream_npages, datahdr.dump_maxrange); 913 s->pagenum = sh.stream_pagenum; 914 s->npages = sh.stream_npages; 915 s->curpage = s->pagenum; 916 s->nout = 0; 917 s->done = 0; 918 s->state = STREAMPAGES; 919 break; 920 case STREAMPAGES: 921 (void) memcpy(&sc, block + in, cs); 922 in += cs; 923 csize = DUMP_GET_CSIZE(sc); 924 if (csize > pagesize) 925 logprint(SC_SL_ERR | SC_EXIT_ERR, 926 "LZJB STREAMPAGES: bad csize=%d", csize); 927 928 out = s->blkbuf + PTOB(s->nout); 929 dsize = decompress(block + in, out, csize, pagesize); 930 931 if (dsize != pagesize) 932 logprint(SC_SL_ERR | SC_EXIT_ERR, 933 "LZJB STREAMPAGES: dsize %d != pagesize %d", 934 dsize, pagesize); 935 936 in += csize; 937 atomic_inc_64(&saved); 938 939 doflush = 0; 940 if (s->nout == 0 && iszpage(out)) { 941 doflush = 1; 942 atomic_inc_64(&zpages); 943 } else if (++s->nout >= BTOP(coreblksize) || 944 isblkbnd(s->curpage + s->nout)) { 945 doflush = 1; 946 } 947 if (++s->done >= s->npages) { 948 s->state = STREAMSTART; 949 doflush = 1; 950 } 951 if (doflush) { 952 putpage(corefd, s->blkbuf, s->curpage, s->nout); 953 s->nout = 0; 954 s->curpage = s->pagenum + s->done; 955 } 956 break; 957 } 958 } 959 } 960 961 /* bzlib library reports errors with this callback */ 962 void 963 bz_internal_error(int errcode) 964 { 965 logprint(SC_SL_ERR | SC_EXIT_ERR, "bz_internal_error: err %s\n", 966 BZ2_bzErrorString(errcode)); 967 } 968 969 /* 970 * Return one object in the stream. 971 * 972 * An object (stream header or page) will likely span an input block 973 * of compression data. Return non-zero when an entire object has been 974 * retrieved from the stream. 975 */ 976 static int 977 bz2decompress(stream_t *s, void *buf, size_t size) 978 { 979 int rc; 980 981 if (s->strm.avail_out == 0) { 982 s->strm.next_out = buf; 983 s->strm.avail_out = size; 984 } 985 while (s->strm.avail_in > 0) { 986 rc = BZ2_bzDecompress(&s->strm); 987 if (rc == BZ_STREAM_END) { 988 rc = BZ2_bzDecompressReset(&s->strm); 989 if (rc != BZ_OK) 990 logprint(SC_SL_ERR | SC_EXIT_ERR, 991 "BZ2_bzDecompressReset: %s", 992 BZ2_bzErrorString(rc)); 993 continue; 994 } 995 996 if (s->strm.avail_out == 0) 997 break; 998 } 999 return (s->strm.avail_out == 0); 1000 } 1001 1002 /* 1003 * Process one bzip2 block. 1004 * The interface is documented here: 1005 * http://www.bzip.org/1.0.5/bzip2-manual-1.0.5.html 1006 */ 1007 static void 1008 bz2block(int corefd, stream_t *s, char *block, size_t blocksz) 1009 { 1010 int rc = 0; 1011 int doflush; 1012 char *out; 1013 1014 if (!s->init) { 1015 s->init = 1; 1016 rc = BZ2_bzDecompressInit(&s->strm, 0, 0); 1017 if (rc != BZ_OK) 1018 logprint(SC_SL_ERR | SC_EXIT_ERR, 1019 "BZ2_bzDecompressInit: %s", BZ2_bzErrorString(rc)); 1020 if (s->blkbuf == NULL) 1021 s->blkbuf = Zalloc(coreblksize); 1022 s->strm.avail_out = 0; 1023 s->state = STREAMSTART; 1024 } 1025 s->strm.next_in = block; 1026 s->strm.avail_in = blocksz; 1027 1028 while (s->strm.avail_in > 0) { 1029 switch (s->state) { 1030 case STREAMSTART: 1031 if (!bz2decompress(s, &s->sh, sizeof (s->sh))) 1032 return; 1033 if (strcmp(DUMP_STREAM_MAGIC, s->sh.stream_magic) != 0) 1034 logprint(SC_SL_ERR | SC_EXIT_ERR, 1035 "BZ2 STREAMSTART: bad stream header"); 1036 if (s->sh.stream_npages > datahdr.dump_maxrange) 1037 logprint(SC_SL_ERR | SC_EXIT_ERR, 1038 "BZ2 STREAMSTART: bad range: %d > %d", 1039 s->sh.stream_npages, datahdr.dump_maxrange); 1040 s->pagenum = s->sh.stream_pagenum; 1041 s->npages = s->sh.stream_npages; 1042 s->curpage = s->pagenum; 1043 s->nout = 0; 1044 s->done = 0; 1045 s->state = STREAMPAGES; 1046 break; 1047 case STREAMPAGES: 1048 out = s->blkbuf + PTOB(s->nout); 1049 if (!bz2decompress(s, out, pagesize)) 1050 return; 1051 1052 atomic_inc_64(&saved); 1053 1054 doflush = 0; 1055 if (s->nout == 0 && iszpage(out)) { 1056 doflush = 1; 1057 atomic_inc_64(&zpages); 1058 } else if (++s->nout >= BTOP(coreblksize) || 1059 isblkbnd(s->curpage + s->nout)) { 1060 doflush = 1; 1061 } 1062 if (++s->done >= s->npages) { 1063 s->state = STREAMSTART; 1064 doflush = 1; 1065 } 1066 if (doflush) { 1067 putpage(corefd, s->blkbuf, s->curpage, s->nout); 1068 s->nout = 0; 1069 s->curpage = s->pagenum + s->done; 1070 } 1071 break; 1072 } 1073 } 1074 } 1075 1076 /* report progress */ 1077 static void 1078 report_progress() 1079 { 1080 int sec, percent; 1081 1082 if (!interactive) 1083 return; 1084 1085 percent = saved * 100LL / corehdr.dump_npages; 1086 if (percent > percent_done) { 1087 sec = (gethrtime() - startts) / 1000 / 1000 / 1000; 1088 (void) printf("\r%2d:%02d %3d%% done", sec / 60, sec % 60, 1089 percent); 1090 (void) fflush(stdout); 1091 percent_done = percent; 1092 } 1093 } 1094 1095 /* thread body */ 1096 static void * 1097 runstreams(void *arg) 1098 { 1099 tinfo_t *t = arg; 1100 stream_t *s; 1101 block_t *b; 1102 int bound; 1103 1104 (void) pthread_mutex_lock(&lock); 1105 while (!threads_stop) { 1106 bound = 0; 1107 for (s = streams; s != endstreams; s++) { 1108 if (s->bound || s->blocks.head == NULL) 1109 continue; 1110 s->bound = 1; 1111 bound = 1; 1112 (void) pthread_cond_signal(&cvwork); 1113 while (s->blocks.head != NULL) { 1114 b = deqh(&s->blocks); 1115 (void) pthread_mutex_unlock(&lock); 1116 1117 if (datahdr.dump_clevel < DUMP_CLEVEL_BZIP2) 1118 lzjbblock(t->corefd, s, b->block, 1119 b->size); 1120 else 1121 bz2block(t->corefd, s, b->block, 1122 b->size); 1123 1124 (void) pthread_mutex_lock(&lock); 1125 enqt(&freeblocks, b); 1126 (void) pthread_cond_signal(&cvfree); 1127 1128 report_progress(); 1129 } 1130 s->bound = 0; 1131 (void) pthread_cond_signal(&cvbarrier); 1132 } 1133 if (!bound && !threads_stop) 1134 (void) pthread_cond_wait(&cvwork, &lock); 1135 } 1136 (void) close(t->corefd); 1137 (void) pthread_cond_signal(&cvwork); 1138 (void) pthread_mutex_unlock(&lock); 1139 return (arg); 1140 } 1141 1142 /* 1143 * Process compressed pages. 1144 * 1145 * The old format, now called single-threaded lzjb, is a 32-bit size 1146 * word followed by 'size' bytes of lzjb compression data for one 1147 * page. The new format extends this by storing a 12-bit "tag" in the 1148 * upper bits of the size word. When the size word is pagesize or 1149 * less, it is assumed to be one lzjb page. When the size word is 1150 * greater than pagesize, it is assumed to be a "stream block", 1151 * belonging to up to 4095 streams. In practice, the number of streams 1152 * is set to one less than the number of CPUs running at crash 1153 * time. One CPU processes the crash dump, the remaining CPUs 1154 * separately process groups of data pages. 1155 * 1156 * savecore creates a thread per stream, but never more threads than 1157 * the number of CPUs running savecore. This is because savecore can 1158 * be processing a crash file from a remote machine, which may have 1159 * more CPUs. 1160 * 1161 * When the kernel uses parallel lzjb or parallel bzip2, we expect a 1162 * series of 128KB blocks of compression data. In this case, each 1163 * block has a "tag", in the range 1-4095. Each block is handed off to 1164 * to the threads running "runstreams". The dump format is either lzjb 1165 * or bzip2, never a mixture. These threads, in turn, process the 1166 * compression data for groups of pages. Groups of pages are delimited 1167 * by a "stream header", which indicates a starting pfn and number of 1168 * pages. When a stream block has been read, the condition variable 1169 * "cvwork" is signalled, which causes one of the avaiable threads to 1170 * wake up and process the stream. 1171 * 1172 * In the parallel case there will be streams blocks encoding all data 1173 * pages. The stream of blocks is terminated by a zero size 1174 * word. There can be a few lzjb pages tacked on the end, depending on 1175 * the architecture. The sbarrier function ensures that all stream 1176 * blocks have been processed so that the page number for the few 1177 * single pages at the end can be known. 1178 */ 1179 static void 1180 decompress_pages(int corefd) 1181 { 1182 char *cpage = NULL; 1183 char *dpage = NULL; 1184 char *out; 1185 pgcnt_t curpage; 1186 block_t *b; 1187 FILE *dumpf; 1188 FILE *tracef = NULL; 1189 stream_t *s; 1190 size_t dsize; 1191 size_t insz = FBUFSIZE; 1192 char *inbuf = Zalloc(insz); 1193 uint32_t csize; 1194 dumpcsize_t dcsize; 1195 int nstreams = datahdr.dump_nstreams; 1196 int maxcsize = datahdr.dump_maxcsize; 1197 int nout, tag, doflush; 1198 1199 dumpf = fdopen(dup(dumpfd), "rb"); 1200 if (dumpf == NULL) 1201 logprint(SC_SL_ERR | SC_EXIT_ERR, "fdopen: %s", 1202 strerror(errno)); 1203 1204 (void) setvbuf(dumpf, inbuf, _IOFBF, insz); 1205 Fseek(dumphdr.dump_data, dumpf); 1206 1207 /*LINTED: E_CONSTANT_CONDITION*/ 1208 while (1) { 1209 1210 /* 1211 * The csize word delimits stream blocks. 1212 * See dumphdr.h for a description. 1213 */ 1214 Fread(&dcsize, sizeof (dcsize), dumpf); 1215 1216 tag = DUMP_GET_TAG(dcsize); 1217 csize = DUMP_GET_CSIZE(dcsize); 1218 1219 if (tag != 0) { /* a stream block */ 1220 1221 if (nstreams == 0) 1222 logprint(SC_SL_ERR | SC_EXIT_ERR, 1223 "starting data header is missing"); 1224 1225 if (tag > nstreams) 1226 logprint(SC_SL_ERR | SC_EXIT_ERR, 1227 "stream tag %d not in range 1..%d", 1228 tag, nstreams); 1229 1230 if (csize > maxcsize) 1231 logprint(SC_SL_ERR | SC_EXIT_ERR, 1232 "block size 0x%x > max csize 0x%x", 1233 csize, maxcsize); 1234 1235 if (streams == NULL) 1236 initstreams(corefd, nstreams, maxcsize); 1237 s = &streams[tag - 1]; 1238 s->tag = tag; 1239 1240 b = getfreeblock(); 1241 b->size = csize; 1242 Fread(b->block, csize, dumpf); 1243 1244 (void) pthread_mutex_lock(&lock); 1245 enqt(&s->blocks, b); 1246 if (!s->bound) 1247 (void) pthread_cond_signal(&cvwork); 1248 (void) pthread_mutex_unlock(&lock); 1249 1250 } else if (csize > 0) { /* one lzjb page */ 1251 1252 if (csize > pagesize) 1253 logprint(SC_SL_ERR | SC_EXIT_ERR, 1254 "csize 0x%x > pagesize 0x%x", 1255 csize, pagesize); 1256 1257 if (cpage == NULL) 1258 cpage = Zalloc(pagesize); 1259 if (dpage == NULL) { 1260 dpage = Zalloc(coreblksize); 1261 nout = 0; 1262 } 1263 1264 Fread(cpage, csize, dumpf); 1265 1266 out = dpage + PTOB(nout); 1267 dsize = decompress(cpage, out, csize, pagesize); 1268 1269 if (dsize != pagesize) 1270 logprint(SC_SL_ERR | SC_EXIT_ERR, 1271 "dsize 0x%x != pagesize 0x%x", 1272 dsize, pagesize); 1273 1274 /* 1275 * wait for streams to flush so that 'saved' is correct 1276 */ 1277 if (threads_active) 1278 sbarrier(); 1279 1280 doflush = 0; 1281 if (nout == 0) 1282 curpage = saved; 1283 1284 atomic_inc_64(&saved); 1285 1286 if (nout == 0 && iszpage(dpage)) { 1287 doflush = 1; 1288 atomic_inc_64(&zpages); 1289 } else if (++nout >= BTOP(coreblksize) || 1290 isblkbnd(curpage + nout) || 1291 saved >= dumphdr.dump_npages) { 1292 doflush = 1; 1293 } 1294 1295 if (doflush) { 1296 putpage(corefd, dpage, curpage, nout); 1297 nout = 0; 1298 } 1299 1300 report_progress(); 1301 1302 /* 1303 * Non-streams lzjb does not use blocks. Stop 1304 * here if all the pages have been decompressed. 1305 */ 1306 if (saved >= dumphdr.dump_npages) 1307 break; 1308 1309 } else { 1310 break; /* end of data */ 1311 } 1312 } 1313 1314 stopstreams(); 1315 if (tracef != NULL) 1316 (void) fclose(tracef); 1317 (void) fclose(dumpf); 1318 if (inbuf) 1319 free(inbuf); 1320 if (cpage) 1321 free(cpage); 1322 if (dpage) 1323 free(dpage); 1324 if (streams) 1325 free(streams); 1326 } 1327 1328 static void 1329 build_corefile(const char *namelist, const char *corefile) 1330 { 1331 size_t pfn_table_size = dumphdr.dump_npages * sizeof (pfn_t); 1332 size_t ksyms_size = dumphdr.dump_ksyms_size; 1333 size_t ksyms_csize = dumphdr.dump_ksyms_csize; 1334 pfn_t *pfn_table; 1335 char *ksyms_base = Zalloc(ksyms_size); 1336 char *ksyms_cbase = Zalloc(ksyms_csize); 1337 size_t ksyms_dsize; 1338 Stat_t st; 1339 int corefd = Open(corefile, O_WRONLY | O_CREAT | O_TRUNC, 0644); 1340 int namefd = Open(namelist, O_WRONLY | O_CREAT | O_TRUNC, 0644); 1341 1342 (void) printf("Constructing namelist %s/%s\n", savedir, namelist); 1343 1344 /* 1345 * Determine the optimum write size for the core file 1346 */ 1347 Fstat(corefd, &st, corefile); 1348 1349 if (verbose > 1) 1350 (void) printf("%s: %ld block size\n", corefile, 1351 (long)st.st_blksize); 1352 coreblksize = st.st_blksize; 1353 if (coreblksize < MINCOREBLKSIZE || !ISP2(coreblksize)) 1354 coreblksize = MINCOREBLKSIZE; 1355 1356 hist = Zalloc((sizeof (uint64_t) * BTOP(coreblksize)) + 1); 1357 1358 /* 1359 * This dump file is now uncompressed 1360 */ 1361 corehdr.dump_flags &= ~DF_COMPRESSED; 1362 1363 /* 1364 * Read in the compressed symbol table, copy it to corefile, 1365 * decompress it, and write the result to namelist. 1366 */ 1367 corehdr.dump_ksyms = pagesize; 1368 Pread(dumpfd, ksyms_cbase, ksyms_csize, dumphdr.dump_ksyms); 1369 Pwrite(corefd, ksyms_cbase, ksyms_csize, corehdr.dump_ksyms); 1370 1371 ksyms_dsize = decompress(ksyms_cbase, ksyms_base, ksyms_csize, 1372 ksyms_size); 1373 if (ksyms_dsize != ksyms_size) 1374 logprint(SC_SL_WARN, 1375 "bad data in symbol table, %lu of %lu bytes saved", 1376 ksyms_dsize, ksyms_size); 1377 1378 Pwrite(namefd, ksyms_base, ksyms_size, 0); 1379 (void) close(namefd); 1380 free(ksyms_cbase); 1381 free(ksyms_base); 1382 1383 (void) printf("Constructing corefile %s/%s\n", savedir, corefile); 1384 1385 /* 1386 * Read in and write out the pfn table. 1387 */ 1388 pfn_table = Zalloc(pfn_table_size); 1389 corehdr.dump_pfn = corehdr.dump_ksyms + roundup(ksyms_size, pagesize); 1390 Pread(dumpfd, pfn_table, pfn_table_size, dumphdr.dump_pfn); 1391 Pwrite(corefd, pfn_table, pfn_table_size, corehdr.dump_pfn); 1392 1393 /* 1394 * Convert the raw translation data into a hashed dump map. 1395 */ 1396 corehdr.dump_map = corehdr.dump_pfn + roundup(pfn_table_size, pagesize); 1397 build_dump_map(corefd, pfn_table); 1398 free(pfn_table); 1399 1400 /* 1401 * Decompress the pages 1402 */ 1403 decompress_pages(corefd); 1404 (void) printf(": %ld of %ld pages saved\n", (pgcnt_t)saved, 1405 dumphdr.dump_npages); 1406 1407 if (verbose) 1408 (void) printf("%ld (%ld%%) zero pages were not written\n", 1409 (pgcnt_t)zpages, (pgcnt_t)zpages * 100 / 1410 dumphdr.dump_npages); 1411 1412 if (saved != dumphdr.dump_npages) 1413 logprint(SC_SL_WARN, "bad data after page %ld", saved); 1414 1415 /* 1416 * Write out the modified dump headers. 1417 */ 1418 Pwrite(corefd, &corehdr, sizeof (corehdr), 0); 1419 if (!filemode) 1420 Pwrite(dumpfd, &dumphdr, sizeof (dumphdr), endoff); 1421 1422 (void) close(corefd); 1423 } 1424 1425 /* 1426 * When the system panics, the kernel saves all undelivered messages (messages 1427 * that never made it out to syslogd(1M)) in the dump. At a mimimum, the 1428 * panic message itself will always fall into this category. Upon reboot, 1429 * the syslog startup script runs savecore -m to recover these messages. 1430 * 1431 * To do this, we read the unsent messages from the dump and send them to 1432 * /dev/conslog on priority band 1. This has the effect of prepending them 1433 * to any already-accumulated messages in the console backlog, thus preserving 1434 * temporal ordering across the reboot. 1435 * 1436 * Note: since savecore -m is used *only* for this purpose, it does *not* 1437 * attempt to save the crash dump. The dump will be saved later, after 1438 * syslogd(1M) starts, by the savecore startup script. 1439 */ 1440 static int 1441 message_save(void) 1442 { 1443 offset_t dumpoff = -(DUMP_OFFSET + DUMP_LOGSIZE); 1444 offset_t ldoff; 1445 log_dump_t ld; 1446 log_ctl_t lc; 1447 struct strbuf ctl, dat; 1448 int logfd; 1449 1450 logfd = Open("/dev/conslog", O_WRONLY, 0644); 1451 dumpfd = Open(dumpfile, O_RDWR | O_DSYNC, 0644); 1452 dumpoff = llseek(dumpfd, dumpoff, SEEK_END) & -DUMP_OFFSET; 1453 1454 ctl.buf = (void *)&lc; 1455 ctl.len = sizeof (log_ctl_t); 1456 1457 dat.buf = Zalloc(DUMP_LOGSIZE); 1458 1459 for (;;) { 1460 ldoff = dumpoff; 1461 1462 Pread(dumpfd, &ld, sizeof (log_dump_t), dumpoff); 1463 dumpoff += sizeof (log_dump_t); 1464 dat.len = ld.ld_msgsize; 1465 1466 if (ld.ld_magic == 0) 1467 break; 1468 1469 if (ld.ld_magic != LOG_MAGIC) 1470 logprint(SC_SL_ERR | SC_IF_VERBOSE | SC_EXIT_ERR, 1471 "bad magic %x", ld.ld_magic); 1472 1473 if (dat.len >= DUMP_LOGSIZE) 1474 logprint(SC_SL_ERR | SC_IF_VERBOSE | SC_EXIT_ERR, 1475 "bad size %d", ld.ld_msgsize); 1476 1477 Pread(dumpfd, ctl.buf, ctl.len, dumpoff); 1478 dumpoff += ctl.len; 1479 1480 if (ld.ld_csum != checksum32(ctl.buf, ctl.len)) 1481 logprint(SC_SL_ERR | SC_IF_VERBOSE | SC_EXIT_OK, 1482 "bad log_ctl checksum"); 1483 1484 lc.flags |= SL_LOGONLY; 1485 1486 Pread(dumpfd, dat.buf, dat.len, dumpoff); 1487 dumpoff += dat.len; 1488 1489 if (ld.ld_msum != checksum32(dat.buf, dat.len)) 1490 logprint(SC_SL_ERR | SC_IF_VERBOSE | SC_EXIT_OK, 1491 "bad message checksum"); 1492 1493 if (putpmsg(logfd, &ctl, &dat, 1, MSG_BAND) == -1) 1494 logprint(SC_SL_ERR | SC_EXIT_ERR, "putpmsg: %s", 1495 strerror(errno)); 1496 1497 ld.ld_magic = 0; /* clear magic so we never save twice */ 1498 Pwrite(dumpfd, &ld, sizeof (log_dump_t), ldoff); 1499 } 1500 return (0); 1501 } 1502 1503 static long 1504 getbounds(const char *f) 1505 { 1506 long b = -1; 1507 const char *p = strrchr(f, '/'); 1508 1509 (void) sscanf(p ? p + 1 : f, "vmdump.%ld", &b); 1510 return (b); 1511 } 1512 1513 static void 1514 stack_retrieve(char *stack) 1515 { 1516 summary_dump_t sd; 1517 offset_t dumpoff = -(DUMP_OFFSET + DUMP_LOGSIZE + 1518 DUMP_ERPTSIZE); 1519 dumpoff -= DUMP_SUMMARYSIZE; 1520 1521 dumpfd = Open(dumpfile, O_RDWR | O_DSYNC, 0644); 1522 dumpoff = llseek(dumpfd, dumpoff, SEEK_END) & -DUMP_OFFSET; 1523 1524 Pread(dumpfd, &sd, sizeof (summary_dump_t), dumpoff); 1525 dumpoff += sizeof (summary_dump_t); 1526 1527 if (sd.sd_magic == 0) { 1528 *stack = '\0'; 1529 return; 1530 } 1531 1532 if (sd.sd_magic != SUMMARY_MAGIC) { 1533 *stack = '\0'; 1534 logprint(SC_SL_NONE | SC_IF_VERBOSE, 1535 "bad summary magic %x", sd.sd_magic); 1536 return; 1537 } 1538 Pread(dumpfd, stack, STACK_BUF_SIZE, dumpoff); 1539 if (sd.sd_ssum != checksum32(stack, STACK_BUF_SIZE)) 1540 logprint(SC_SL_NONE | SC_IF_VERBOSE, "bad stack checksum"); 1541 } 1542 1543 static void 1544 raise_event(enum sc_event_type evidx, char *warn_string) 1545 { 1546 uint32_t pl = sc_event[evidx].sce_payload; 1547 char panic_stack[STACK_BUF_SIZE]; 1548 nvlist_t *attr = NULL; 1549 char uuidbuf[36 + 1]; 1550 int err = 0; 1551 1552 if (nvlist_alloc(&attr, NV_UNIQUE_NAME, 0) != 0) 1553 goto publish; /* try to send payload-free event */ 1554 1555 if (pl & SC_PAYLOAD_SAVEDIR && savedir != NULL) 1556 err |= nvlist_add_string(attr, "dumpdir", savedir); 1557 1558 if (pl & SC_PAYLOAD_INSTANCE && bounds != -1) 1559 err |= nvlist_add_int64(attr, "instance", bounds); 1560 1561 if (pl & SC_PAYLOAD_ISCOMPRESSED) { 1562 err |= nvlist_add_boolean_value(attr, "compressed", 1563 csave ? B_TRUE : B_FALSE); 1564 } 1565 1566 if (pl & SC_PAYLOAD_DUMPADM_EN) { 1567 char *disabled = defread("DUMPADM_ENABLE=no"); 1568 1569 err |= nvlist_add_boolean_value(attr, "savecore-enabled", 1570 disabled ? B_FALSE : B_TRUE); 1571 } 1572 1573 if (pl & SC_PAYLOAD_IMAGEUUID) { 1574 (void) strncpy(uuidbuf, corehdr.dump_uuid, 36); 1575 uuidbuf[36] = '\0'; 1576 err |= nvlist_add_string(attr, "os-instance-uuid", uuidbuf); 1577 } 1578 1579 if (pl & SC_PAYLOAD_CRASHTIME) { 1580 err |= nvlist_add_int64(attr, "crashtime", 1581 (int64_t)corehdr.dump_crashtime); 1582 } 1583 1584 if (pl & SC_PAYLOAD_PANICSTR && corehdr.dump_panicstring[0] != '\0') { 1585 err |= nvlist_add_string(attr, "panicstr", 1586 corehdr.dump_panicstring); 1587 } 1588 1589 if (pl & SC_PAYLOAD_PANICSTACK) { 1590 stack_retrieve(panic_stack); 1591 1592 if (panic_stack[0] != '\0') { 1593 /* 1594 * The summary page may not be present if the dump 1595 * was previously recorded compressed. 1596 */ 1597 (void) nvlist_add_string(attr, "panicstack", 1598 panic_stack); 1599 } 1600 } 1601 1602 /* add warning string if this is an ireport for dump failure */ 1603 if (pl & SC_PAYLOAD_FAILREASON && warn_string != NULL) 1604 (void) nvlist_add_string(attr, "failure-reason", warn_string); 1605 1606 if (pl & SC_PAYLOAD_DUMPCOMPLETE) 1607 err |= nvlist_add_boolean_value(attr, "dump-incomplete", 1608 dump_incomplete ? B_TRUE : B_FALSE); 1609 1610 if (pl & SC_PAYLOAD_FM_PANIC) { 1611 err |= nvlist_add_boolean_value(attr, "fm-panic", 1612 fm_panic ? B_TRUE : B_FALSE); 1613 } 1614 1615 if (pl & SC_PAYLOAD_JUSTCHECKING) { 1616 err |= nvlist_add_boolean_value(attr, "will-attempt-savecore", 1617 cflag ? B_FALSE : B_TRUE); 1618 } 1619 1620 if (err) 1621 logprint(SC_SL_WARN, "Errors while constructing '%s' " 1622 "event payload; will try to publish anyway."); 1623 publish: 1624 if (fmev_rspublish_nvl(FMEV_RULESET_ON_SUNOS, 1625 "panic", sc_event[evidx].sce_subclass, FMEV_HIPRI, 1626 attr) != FMEV_SUCCESS) { 1627 logprint(SC_SL_ERR, "failed to publish '%s' event: %s", 1628 sc_event[evidx].sce_subclass, fmev_strerror(fmev_errno)); 1629 nvlist_free(attr); 1630 } 1631 1632 } 1633 1634 1635 int 1636 main(int argc, char *argv[]) 1637 { 1638 int i, c, bfd; 1639 Stat_t st; 1640 struct rlimit rl; 1641 long filebounds = -1; 1642 char namelist[30], corefile[30], boundstr[30]; 1643 1644 startts = gethrtime(); 1645 1646 (void) getrlimit(RLIMIT_NOFILE, &rl); 1647 rl.rlim_cur = rl.rlim_max; 1648 (void) setrlimit(RLIMIT_NOFILE, &rl); 1649 1650 openlog(progname, LOG_ODELAY, LOG_AUTH); 1651 1652 (void) defopen("/etc/dumpadm.conf"); 1653 savedir = defread("DUMPADM_SAVDIR="); 1654 if (savedir != NULL) 1655 savedir = strdup(savedir); 1656 1657 while ((c = getopt(argc, argv, "Lvcdmf:")) != EOF) { 1658 switch (c) { 1659 case 'L': 1660 livedump++; 1661 break; 1662 case 'v': 1663 verbose++; 1664 break; 1665 case 'c': 1666 cflag++; 1667 break; 1668 case 'd': 1669 disregard_valid_flag++; 1670 break; 1671 case 'm': 1672 mflag++; 1673 break; 1674 case 'f': 1675 dumpfile = optarg; 1676 filebounds = getbounds(dumpfile); 1677 break; 1678 case '?': 1679 usage(); 1680 } 1681 } 1682 1683 if (geteuid() != 0 && filebounds < 0) { 1684 (void) fprintf(stderr, "%s: %s %s\n", progname, 1685 gettext("you must be root to use"), progname); 1686 exit(1); 1687 } 1688 1689 interactive = isatty(STDOUT_FILENO); 1690 1691 if (cflag && livedump) 1692 usage(); 1693 1694 if (dumpfile == NULL || livedump) 1695 dumpfd = Open("/dev/dump", O_RDONLY, 0444); 1696 1697 if (dumpfile == NULL) { 1698 dumpfile = Zalloc(MAXPATHLEN); 1699 if (ioctl(dumpfd, DIOCGETDEV, dumpfile) == -1) 1700 logprint(SC_SL_NONE | SC_IF_ISATTY | SC_EXIT_ERR, 1701 "no dump device configured"); 1702 } 1703 1704 if (mflag) 1705 return (message_save()); 1706 1707 if (optind == argc - 1) 1708 savedir = argv[optind]; 1709 1710 if (savedir == NULL || optind < argc - 1) 1711 usage(); 1712 1713 if (livedump && ioctl(dumpfd, DIOCDUMP, NULL) == -1) 1714 logprint(SC_SL_NONE | SC_EXIT_ERR, 1715 "dedicated dump device required"); 1716 1717 (void) close(dumpfd); 1718 dumpfd = -1; 1719 1720 Stat(dumpfile, &st); 1721 1722 filemode = S_ISREG(st.st_mode); 1723 1724 if (!filemode && defread("DUMPADM_CSAVE=off") == NULL) 1725 csave = 1; 1726 1727 read_dumphdr(); 1728 1729 /* 1730 * We want this message to go to the log file, but not the console. 1731 * There's no good way to do that with the existing syslog facility. 1732 * We could extend it to handle this, but there doesn't seem to be 1733 * a general need for it, so we isolate the complexity here instead. 1734 */ 1735 if (dumphdr.dump_panicstring[0] != '\0') { 1736 int logfd = Open("/dev/conslog", O_WRONLY, 0644); 1737 log_ctl_t lc; 1738 struct strbuf ctl, dat; 1739 char msg[DUMP_PANICSIZE + 100]; 1740 char fmt[] = "reboot after panic: %s"; 1741 uint32_t msgid; 1742 1743 STRLOG_MAKE_MSGID(fmt, msgid); 1744 1745 /* LINTED: E_SEC_SPRINTF_UNBOUNDED_COPY */ 1746 (void) sprintf(msg, "%s: [ID %u FACILITY_AND_PRIORITY] ", 1747 progname, msgid); 1748 /* LINTED: E_SEC_PRINTF_VAR_FMT */ 1749 (void) sprintf(msg + strlen(msg), fmt, 1750 dumphdr.dump_panicstring); 1751 1752 lc.pri = LOG_AUTH | LOG_ERR; 1753 lc.flags = SL_CONSOLE | SL_LOGONLY; 1754 lc.level = 0; 1755 1756 ctl.buf = (void *)&lc; 1757 ctl.len = sizeof (log_ctl_t); 1758 1759 dat.buf = (void *)msg; 1760 dat.len = strlen(msg) + 1; 1761 1762 (void) putmsg(logfd, &ctl, &dat, 0); 1763 (void) close(logfd); 1764 } 1765 1766 if ((dumphdr.dump_flags & DF_COMPLETE) == 0) { 1767 logprint(SC_SL_WARN, "incomplete dump on dump device"); 1768 dump_incomplete = B_TRUE; 1769 } 1770 1771 if (dumphdr.dump_fm_panic) 1772 fm_panic = B_TRUE; 1773 1774 /* 1775 * We have a valid dump on a dump device and know as much about 1776 * it as we're going to at this stage. Raise an event for 1777 * logging and so that FMA can open a case for this panic. 1778 * Avoid this step for FMA-initiated panics - FMA will replay 1779 * ereports off the dump device independently of savecore and 1780 * will make a diagnosis, so we don't want to open two cases 1781 * for the same event. Also avoid raising an event for a 1782 * livedump, or when we inflating a compressed dump. 1783 */ 1784 if (!fm_panic && !livedump && !filemode) 1785 raise_event(SC_EVENT_DUMP_PENDING, NULL); 1786 1787 logprint(SC_SL_WARN, "System dump time: %s", 1788 ctime(&dumphdr.dump_crashtime)); 1789 1790 /* 1791 * Option -c is designed for use from svc-dumpadm where we know 1792 * that dumpadm -n is in effect but run savecore -c just to 1793 * get the above dump_pending_on_device event raised. If it is run 1794 * interactively then just print further panic details. 1795 */ 1796 if (cflag) { 1797 char *disabled = defread("DUMPADM_ENABLE=no"); 1798 int lvl = interactive ? SC_SL_WARN : SC_SL_ERR; 1799 int ec = fm_panic ? SC_EXIT_FM : SC_EXIT_PEND; 1800 1801 logprint(lvl | ec, 1802 "Panic crashdump pending on dump device%s " 1803 "run savecore(1M) manually to extract. " 1804 "Image UUID %s%s.", 1805 disabled ? " but dumpadm -n in effect;" : ";", 1806 corehdr.dump_uuid, 1807 fm_panic ? "(fault-management initiated)" : ""); 1808 /*NOTREACHED*/ 1809 } 1810 1811 if (chdir(savedir) == -1) 1812 logprint(SC_SL_ERR | SC_EXIT_ERR, "chdir(\"%s\"): %s", 1813 savedir, strerror(errno)); 1814 1815 check_space(csave); 1816 1817 if (filebounds < 0) 1818 bounds = read_number_from_file("bounds", 0); 1819 else 1820 bounds = filebounds; 1821 1822 if (csave) { 1823 size_t metrics_size = datahdr.dump_metrics; 1824 1825 (void) sprintf(corefile, "vmdump.%ld", bounds); 1826 1827 datahdr.dump_metrics = 0; 1828 1829 logprint(SC_SL_ERR, 1830 "Saving compressed system crash dump in %s/%s", 1831 savedir, corefile); 1832 1833 copy_crashfile(corefile); 1834 1835 /* 1836 * Raise a fault management event that indicates the system 1837 * has panicked. We know a reasonable amount about the 1838 * condition at this time, but the dump is still compressed. 1839 */ 1840 if (!livedump && !fm_panic) 1841 raise_event(SC_EVENT_DUMP_AVAILABLE, NULL); 1842 1843 if (metrics_size > 0) { 1844 int sec = (gethrtime() - startts) / 1000 / 1000 / 1000; 1845 FILE *mfile = fopen(METRICSFILE, "a"); 1846 char *metrics = Zalloc(metrics_size + 1); 1847 1848 Pread(dumpfd, metrics, metrics_size, endoff + 1849 sizeof (dumphdr) + sizeof (datahdr)); 1850 1851 if (sec < 1) 1852 sec = 1; 1853 1854 if (mfile == NULL) { 1855 logprint(SC_SL_WARN, 1856 "Can't create %s:\n%s", 1857 METRICSFILE, metrics); 1858 } else { 1859 (void) fprintf(mfile, "[[[[,,,"); 1860 for (i = 0; i < argc; i++) 1861 (void) fprintf(mfile, "%s ", argv[i]); 1862 (void) fprintf(mfile, "\n"); 1863 (void) fprintf(mfile, ",,,%s %s %s %s %s\n", 1864 dumphdr.dump_utsname.sysname, 1865 dumphdr.dump_utsname.nodename, 1866 dumphdr.dump_utsname.release, 1867 dumphdr.dump_utsname.version, 1868 dumphdr.dump_utsname.machine); 1869 (void) fprintf(mfile, ",,,%s dump time %s\n", 1870 dumphdr.dump_flags & DF_LIVE ? "Live" : 1871 "Crash", ctime(&dumphdr.dump_crashtime)); 1872 (void) fprintf(mfile, ",,,%s/%s\n", savedir, 1873 corefile); 1874 (void) fprintf(mfile, "Metrics:\n%s\n", 1875 metrics); 1876 (void) fprintf(mfile, "Copy pages,%ld\n", 1877 dumphdr. dump_npages); 1878 (void) fprintf(mfile, "Copy time,%d\n", sec); 1879 (void) fprintf(mfile, "Copy pages/sec,%ld\n", 1880 dumphdr.dump_npages / sec); 1881 (void) fprintf(mfile, "]]]]\n"); 1882 (void) fclose(mfile); 1883 } 1884 free(metrics); 1885 } 1886 1887 logprint(SC_SL_ERR, 1888 "Decompress the crash dump with " 1889 "\n'savecore -vf %s/%s'", 1890 savedir, corefile); 1891 1892 } else { 1893 (void) sprintf(namelist, "unix.%ld", bounds); 1894 (void) sprintf(corefile, "vmcore.%ld", bounds); 1895 1896 if (interactive && filebounds >= 0 && access(corefile, F_OK) 1897 == 0) 1898 logprint(SC_SL_NONE | SC_EXIT_ERR, 1899 "%s already exists: remove with " 1900 "'rm -f %s/{unix,vmcore}.%ld'", 1901 corefile, savedir, bounds); 1902 1903 logprint(SC_SL_ERR, 1904 "saving system crash dump in %s/{unix,vmcore}.%ld", 1905 savedir, bounds); 1906 1907 build_corefile(namelist, corefile); 1908 1909 if (!livedump && !filemode && !fm_panic) 1910 raise_event(SC_EVENT_DUMP_AVAILABLE, NULL); 1911 1912 if (access(METRICSFILE, F_OK) == 0) { 1913 int sec = (gethrtime() - startts) / 1000 / 1000 / 1000; 1914 FILE *mfile = fopen(METRICSFILE, "a"); 1915 1916 if (sec < 1) 1917 sec = 1; 1918 1919 (void) fprintf(mfile, "[[[[,,,"); 1920 for (i = 0; i < argc; i++) 1921 (void) fprintf(mfile, "%s ", argv[i]); 1922 (void) fprintf(mfile, "\n"); 1923 (void) fprintf(mfile, ",,,%s/%s\n", savedir, corefile); 1924 (void) fprintf(mfile, ",,,%s %s %s %s %s\n", 1925 dumphdr.dump_utsname.sysname, 1926 dumphdr.dump_utsname.nodename, 1927 dumphdr.dump_utsname.release, 1928 dumphdr.dump_utsname.version, 1929 dumphdr.dump_utsname.machine); 1930 (void) fprintf(mfile, "Uncompress pages,%"PRIu64"\n", 1931 saved); 1932 (void) fprintf(mfile, "Uncompress time,%d\n", sec); 1933 (void) fprintf(mfile, "Uncompress pages/sec,%" 1934 PRIu64"\n", saved / sec); 1935 (void) fprintf(mfile, "]]]]\n"); 1936 (void) fclose(mfile); 1937 } 1938 } 1939 1940 if (filebounds < 0) { 1941 (void) sprintf(boundstr, "%ld\n", bounds + 1); 1942 bfd = Open("bounds", O_WRONLY | O_CREAT | O_TRUNC, 0644); 1943 Pwrite(bfd, boundstr, strlen(boundstr), 0); 1944 (void) close(bfd); 1945 } 1946 1947 if (verbose) { 1948 int sec = (gethrtime() - startts) / 1000 / 1000 / 1000; 1949 1950 (void) printf("%d:%02d dump %s is done\n", 1951 sec / 60, sec % 60, 1952 csave ? "copy" : "decompress"); 1953 } 1954 1955 if (verbose > 1 && hist != NULL) { 1956 int i, nw; 1957 1958 for (i = 1, nw = 0; i <= BTOP(coreblksize); ++i) 1959 nw += hist[i] * i; 1960 (void) printf("pages count %%\n"); 1961 for (i = 0; i <= BTOP(coreblksize); ++i) { 1962 if (hist[i] == 0) 1963 continue; 1964 (void) printf("%3d %5u %6.2f\n", 1965 i, hist[i], 100.0 * hist[i] * i / nw); 1966 } 1967 } 1968 1969 (void) close(dumpfd); 1970 dumpfd = -1; 1971 1972 return (0); 1973 }