1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 1983, 2010, Oracle and/or its affiliates. All rights reserved. 23 * Copyright 2016 Joyent, Inc. 24 */ 25 /* 26 * Copyright 2018 Nexenta Systems, Inc. All rights reserved. 27 */ 28 29 #include <stdio.h> 30 #include <stdlib.h> 31 #include <stdarg.h> 32 #include <unistd.h> 33 #include <fcntl.h> 34 #include <errno.h> 35 #include <string.h> 36 #include <deflt.h> 37 #include <time.h> 38 #include <syslog.h> 39 #include <stropts.h> 40 #include <pthread.h> 41 #include <limits.h> 42 #include <atomic.h> 43 #include <libnvpair.h> 44 #include <libintl.h> 45 #include <sys/mem.h> 46 #include <sys/statvfs.h> 47 #include <sys/dumphdr.h> 48 #include <sys/dumpadm.h> 49 #include <sys/compress.h> 50 #include <sys/panic.h> 51 #include <sys/sysmacros.h> 52 #include <sys/stat.h> 53 #include <sys/resource.h> 54 #include <sys/fm/util.h> 55 #include <fm/libfmevent.h> 56 #include <sys/int_fmtio.h> 57 58 59 /* fread/fwrite buffer size */ 60 #define FBUFSIZE (1ULL << 20) 61 62 /* minimum size for output buffering */ 63 #define MINCOREBLKSIZE (1ULL << 17) 64 65 /* create this file if metrics collection is enabled in the kernel */ 66 #define METRICSFILE "METRICS.csv" 67 68 static char progname[9] = "savecore"; 69 static char *savedir; /* savecore directory */ 70 static char *dumpfile; /* source of raw crash dump */ 71 static long bounds = -1; /* numeric suffix */ 72 static long pagesize; /* dump pagesize */ 73 static int dumpfd = -1; /* dumpfile descriptor */ 74 static boolean_t have_dumpfile = B_TRUE; /* dumpfile existence */ 75 static dumphdr_t corehdr, dumphdr; /* initial and terminal dumphdrs */ 76 static boolean_t dump_incomplete; /* dumphdr indicates incomplete */ 77 static boolean_t fm_panic; /* dump is the result of fm_panic */ 78 static offset_t endoff; /* offset of end-of-dump header */ 79 static int verbose; /* chatty mode */ 80 static int disregard_valid_flag; /* disregard valid flag */ 81 static int livedump; /* dump the current running system */ 82 static int interactive; /* user invoked; no syslog */ 83 static int csave; /* save dump compressed */ 84 static int filemode; /* processing file, not dump device */ 85 static int percent_done; /* progress indicator */ 86 static int sec_done; /* progress last report time */ 87 static hrtime_t startts; /* timestamp at start */ 88 static volatile uint64_t saved; /* count of pages written */ 89 static volatile uint64_t zpages; /* count of zero pages not written */ 90 static dumpdatahdr_t datahdr; /* compression info */ 91 static long coreblksize; /* preferred write size (st_blksize) */ 92 static int cflag; /* run as savecore -c */ 93 static int mflag; /* run as savecore -m */ 94 95 /* 96 * Payload information for the events we raise. These are used 97 * in raise_event to determine what payload to include. 98 */ 99 #define SC_PAYLOAD_SAVEDIR 0x0001 /* Include savedir in event */ 100 #define SC_PAYLOAD_INSTANCE 0x0002 /* Include bounds instance number */ 101 #define SC_PAYLOAD_IMAGEUUID 0x0004 /* Include dump OS instance uuid */ 102 #define SC_PAYLOAD_CRASHTIME 0x0008 /* Include epoch crashtime */ 103 #define SC_PAYLOAD_PANICSTR 0x0010 /* Include panic string */ 104 #define SC_PAYLOAD_PANICSTACK 0x0020 /* Include panic string */ 105 #define SC_PAYLOAD_FAILREASON 0x0040 /* Include failure reason */ 106 #define SC_PAYLOAD_DUMPCOMPLETE 0x0080 /* Include completeness indicator */ 107 #define SC_PAYLOAD_ISCOMPRESSED 0x0100 /* Dump is in vmdump.N form */ 108 #define SC_PAYLOAD_DUMPADM_EN 0x0200 /* Is dumpadm enabled or not? */ 109 #define SC_PAYLOAD_FM_PANIC 0x0400 /* Panic initiated by FMA */ 110 #define SC_PAYLOAD_JUSTCHECKING 0x0800 /* Run with -c flag? */ 111 112 enum sc_event_type { 113 SC_EVENT_DUMP_PENDING, 114 SC_EVENT_SAVECORE_FAILURE, 115 SC_EVENT_DUMP_AVAILABLE 116 }; 117 118 /* 119 * Common payload 120 */ 121 #define _SC_PAYLOAD_CMN \ 122 SC_PAYLOAD_IMAGEUUID | \ 123 SC_PAYLOAD_CRASHTIME | \ 124 SC_PAYLOAD_PANICSTR | \ 125 SC_PAYLOAD_PANICSTACK | \ 126 SC_PAYLOAD_DUMPCOMPLETE | \ 127 SC_PAYLOAD_FM_PANIC | \ 128 SC_PAYLOAD_SAVEDIR 129 130 static const struct { 131 const char *sce_subclass; 132 uint32_t sce_payload; 133 } sc_event[] = { 134 /* 135 * SC_EVENT_DUMP_PENDING 136 */ 137 { 138 "dump_pending_on_device", 139 _SC_PAYLOAD_CMN | SC_PAYLOAD_DUMPADM_EN | 140 SC_PAYLOAD_JUSTCHECKING 141 }, 142 143 /* 144 * SC_EVENT_SAVECORE_FAILURE 145 */ 146 { 147 "savecore_failure", 148 _SC_PAYLOAD_CMN | SC_PAYLOAD_INSTANCE | SC_PAYLOAD_FAILREASON 149 }, 150 151 /* 152 * SC_EVENT_DUMP_AVAILABLE 153 */ 154 { 155 "dump_available", 156 _SC_PAYLOAD_CMN | SC_PAYLOAD_INSTANCE | SC_PAYLOAD_ISCOMPRESSED 157 }, 158 }; 159 160 static void raise_event(enum sc_event_type, char *); 161 162 static void 163 usage(void) 164 { 165 (void) fprintf(stderr, 166 "usage: %s [-Lvd] [-f dumpfile] [dirname]\n", progname); 167 exit(1); 168 } 169 170 #define SC_SL_NONE 0x0001 /* no syslog */ 171 #define SC_SL_ERR 0x0002 /* syslog if !interactive, LOG_ERR */ 172 #define SC_SL_WARN 0x0004 /* syslog if !interactive, LOG_WARNING */ 173 #define SC_IF_VERBOSE 0x0008 /* message only if -v */ 174 #define SC_IF_ISATTY 0x0010 /* message only if interactive */ 175 #define SC_EXIT_OK 0x0020 /* exit(0) */ 176 #define SC_EXIT_ERR 0x0040 /* exit(1) */ 177 #define SC_EXIT_PEND 0x0080 /* exit(2) */ 178 #define SC_EXIT_FM 0x0100 /* exit(3) */ 179 180 #define _SC_ALLEXIT (SC_EXIT_OK | SC_EXIT_ERR | SC_EXIT_PEND | SC_EXIT_FM) 181 182 static void 183 logprint(uint32_t flags, char *message, ...) 184 { 185 va_list args; 186 char buf[1024]; 187 int do_always = ((flags & (SC_IF_VERBOSE | SC_IF_ISATTY)) == 0); 188 int do_ifverb = (flags & SC_IF_VERBOSE) && verbose; 189 int do_ifisatty = (flags & SC_IF_ISATTY) && interactive; 190 int code; 191 static int logprint_raised = 0; 192 193 if (do_always || do_ifverb || do_ifisatty) { 194 va_start(args, message); 195 /*LINTED: E_SEC_PRINTF_VAR_FMT*/ 196 (void) vsnprintf(buf, sizeof (buf), message, args); 197 (void) fprintf(stderr, "%s: %s\n", progname, buf); 198 if (!interactive) { 199 switch (flags & (SC_SL_NONE | SC_SL_ERR | SC_SL_WARN)) { 200 case SC_SL_ERR: 201 /*LINTED: E_SEC_PRINTF_VAR_FMT*/ 202 syslog(LOG_ERR, buf); 203 break; 204 205 case SC_SL_WARN: 206 /*LINTED: E_SEC_PRINTF_VAR_FMT*/ 207 syslog(LOG_WARNING, buf); 208 break; 209 210 default: 211 break; 212 } 213 } 214 va_end(args); 215 } 216 217 switch (flags & _SC_ALLEXIT) { 218 case 0: 219 return; 220 221 case SC_EXIT_OK: 222 code = 0; 223 break; 224 225 case SC_EXIT_PEND: 226 /* 227 * Raise an ireport saying why we are exiting. Do not 228 * raise if run as savecore -m. If something in the 229 * raise_event codepath calls logprint avoid recursion. 230 */ 231 if (!mflag && logprint_raised++ == 0) 232 raise_event(SC_EVENT_SAVECORE_FAILURE, buf); 233 code = 2; 234 break; 235 236 case SC_EXIT_FM: 237 code = 3; 238 break; 239 240 case SC_EXIT_ERR: 241 default: 242 if (!mflag && logprint_raised++ == 0 && have_dumpfile) 243 raise_event(SC_EVENT_SAVECORE_FAILURE, buf); 244 code = 1; 245 break; 246 } 247 248 exit(code); 249 } 250 251 /* 252 * System call / libc wrappers that exit on error. 253 */ 254 static int 255 Open(const char *name, int oflags, mode_t mode) 256 { 257 int fd; 258 259 if ((fd = open64(name, oflags, mode)) == -1) 260 logprint(SC_SL_ERR | SC_EXIT_ERR, "open(\"%s\"): %s", 261 name, strerror(errno)); 262 return (fd); 263 } 264 265 static void 266 Fread(void *buf, size_t size, FILE *f) 267 { 268 if (fread(buf, size, 1, f) != 1) 269 logprint(SC_SL_ERR | SC_EXIT_ERR, "fread: %s", 270 strerror(errno)); 271 } 272 273 static void 274 Fwrite(void *buf, size_t size, FILE *f) 275 { 276 if (fwrite(buf, size, 1, f) != 1) 277 logprint(SC_SL_ERR | SC_EXIT_ERR, "fwrite: %s", 278 strerror(errno)); 279 } 280 281 static void 282 Fseek(offset_t off, FILE *f) 283 { 284 if (fseeko64(f, off, SEEK_SET) != 0) 285 logprint(SC_SL_ERR | SC_EXIT_ERR, "fseeko64: %s", 286 strerror(errno)); 287 } 288 289 typedef struct stat64 Stat_t; 290 291 static void 292 Fstat(int fd, Stat_t *sb, const char *fname) 293 { 294 if (fstat64(fd, sb) != 0) 295 logprint(SC_SL_ERR | SC_EXIT_ERR, "fstat(\"%s\"): %s", fname, 296 strerror(errno)); 297 } 298 299 static void 300 Stat(const char *fname, Stat_t *sb) 301 { 302 if (stat64(fname, sb) != 0) { 303 have_dumpfile = B_FALSE; 304 logprint(SC_SL_ERR | SC_EXIT_ERR, "failed to get status " 305 "of file %s", fname); 306 } 307 } 308 309 static void 310 Pread(int fd, void *buf, size_t size, offset_t off) 311 { 312 ssize_t sz = pread64(fd, buf, size, off); 313 314 if (sz < 0) 315 logprint(SC_SL_ERR | SC_EXIT_ERR, 316 "pread: %s", strerror(errno)); 317 else if (sz != size) 318 logprint(SC_SL_ERR | SC_EXIT_ERR, 319 "pread: size %ld != %ld", sz, size); 320 } 321 322 static void 323 Pwrite(int fd, void *buf, size_t size, off64_t off) 324 { 325 if (pwrite64(fd, buf, size, off) != size) 326 logprint(SC_SL_ERR | SC_EXIT_ERR, "pwrite: %s", 327 strerror(errno)); 328 } 329 330 static void * 331 Zalloc(size_t size) 332 { 333 void *buf; 334 335 if ((buf = calloc(size, 1)) == NULL) 336 logprint(SC_SL_ERR | SC_EXIT_ERR, "calloc: %s", 337 strerror(errno)); 338 return (buf); 339 } 340 341 static long 342 read_number_from_file(const char *filename, long default_value) 343 { 344 long file_value = -1; 345 FILE *fp; 346 347 if ((fp = fopen(filename, "r")) != NULL) { 348 (void) fscanf(fp, "%ld", &file_value); 349 (void) fclose(fp); 350 } 351 return (file_value < 0 ? default_value : file_value); 352 } 353 354 static void 355 read_dumphdr(void) 356 { 357 if (filemode) 358 dumpfd = Open(dumpfile, O_RDONLY, 0644); 359 else 360 dumpfd = Open(dumpfile, O_RDWR | O_DSYNC, 0644); 361 endoff = llseek(dumpfd, -DUMP_OFFSET, SEEK_END) & -DUMP_OFFSET; 362 Pread(dumpfd, &dumphdr, sizeof (dumphdr), endoff); 363 Pread(dumpfd, &datahdr, sizeof (datahdr), endoff + sizeof (dumphdr)); 364 365 pagesize = dumphdr.dump_pagesize; 366 367 if (dumphdr.dump_magic != DUMP_MAGIC) 368 logprint(SC_SL_NONE | SC_EXIT_PEND, "bad magic number %x", 369 dumphdr.dump_magic); 370 371 if ((dumphdr.dump_flags & DF_VALID) == 0 && !disregard_valid_flag) 372 logprint(SC_SL_NONE | SC_IF_VERBOSE | SC_EXIT_OK, 373 "dump already processed"); 374 375 if (dumphdr.dump_version != DUMP_VERSION) 376 logprint(SC_SL_NONE | SC_IF_VERBOSE | SC_EXIT_PEND, 377 "dump version (%d) != %s version (%d)", 378 dumphdr.dump_version, progname, DUMP_VERSION); 379 380 if (datahdr.dump_clevel > DUMP_CLEVEL_LZJB) 381 logprint(SC_SL_NONE | SC_EXIT_PEND, 382 "unsupported compression format (%d)", datahdr.dump_clevel); 383 384 if (dumphdr.dump_wordsize != DUMP_WORDSIZE) 385 logprint(SC_SL_NONE | SC_EXIT_PEND, 386 "dump is from %u-bit kernel - cannot save on %u-bit kernel", 387 dumphdr.dump_wordsize, DUMP_WORDSIZE); 388 389 if (datahdr.dump_datahdr_magic == DUMP_DATAHDR_MAGIC) { 390 if (datahdr.dump_datahdr_version != DUMP_DATAHDR_VERSION) 391 logprint(SC_SL_NONE | SC_IF_VERBOSE | SC_EXIT_PEND, 392 "dump data version (%d) != %s data version (%d)", 393 datahdr.dump_datahdr_version, progname, 394 DUMP_DATAHDR_VERSION); 395 } else { 396 (void) memset(&datahdr, 0, sizeof (datahdr)); 397 datahdr.dump_maxcsize = pagesize; 398 } 399 400 /* 401 * Read the initial header, clear the valid bits, and compare headers. 402 * The main header may have been overwritten by swapping if we're 403 * using a swap partition as the dump device, in which case we bail. 404 */ 405 Pread(dumpfd, &corehdr, sizeof (dumphdr_t), dumphdr.dump_start); 406 407 corehdr.dump_flags &= ~DF_VALID; 408 dumphdr.dump_flags &= ~DF_VALID; 409 410 if (memcmp(&corehdr, &dumphdr, sizeof (dumphdr_t)) != 0) { 411 /* 412 * Clear valid bit so we don't complain on every invocation. 413 */ 414 if (!filemode) 415 Pwrite(dumpfd, &dumphdr, sizeof (dumphdr), endoff); 416 logprint(SC_SL_ERR | SC_EXIT_ERR, 417 "initial dump header corrupt"); 418 } 419 } 420 421 static void 422 check_space(int csave) 423 { 424 struct statvfs fsb; 425 int64_t spacefree, dumpsize, minfree, datasize; 426 427 if (statvfs(".", &fsb) < 0) 428 logprint(SC_SL_ERR | SC_EXIT_ERR, "statvfs: %s", 429 strerror(errno)); 430 431 dumpsize = dumphdr.dump_data - dumphdr.dump_start; 432 datasize = dumphdr.dump_npages * pagesize; 433 if (!csave) 434 dumpsize += datasize; 435 else 436 dumpsize += datahdr.dump_data_csize; 437 438 spacefree = (int64_t)fsb.f_bavail * fsb.f_frsize; 439 minfree = 1024LL * read_number_from_file("minfree", 1024); 440 if (spacefree < minfree + dumpsize) { 441 logprint(SC_SL_ERR | SC_EXIT_ERR, 442 "not enough space in %s (%lld MB avail, %lld MB needed)", 443 savedir, spacefree >> 20, (minfree + dumpsize) >> 20); 444 } 445 } 446 447 static void 448 build_dump_map(int corefd, const pfn_t *pfn_table) 449 { 450 long i; 451 static long misses = 0; 452 size_t dump_mapsize = (corehdr.dump_hashmask + 1) * sizeof (dump_map_t); 453 mem_vtop_t vtop; 454 dump_map_t *dmp = Zalloc(dump_mapsize); 455 char *inbuf = Zalloc(FBUFSIZE); 456 FILE *in = fdopen(dup(dumpfd), "rb"); 457 458 (void) setvbuf(in, inbuf, _IOFBF, FBUFSIZE); 459 Fseek(dumphdr.dump_map, in); 460 461 corehdr.dump_data = corehdr.dump_map + roundup(dump_mapsize, pagesize); 462 463 for (i = 0; i < corehdr.dump_nvtop; i++) { 464 long first = 0; 465 long last = corehdr.dump_npages - 1; 466 long middle = 0; 467 pfn_t pfn = 0; 468 uintptr_t h; 469 470 Fread(&vtop, sizeof (mem_vtop_t), in); 471 while (last >= first) { 472 middle = (first + last) / 2; 473 pfn = pfn_table[middle]; 474 if (pfn == vtop.m_pfn) 475 break; 476 if (pfn < vtop.m_pfn) 477 first = middle + 1; 478 else 479 last = middle - 1; 480 } 481 if (pfn != vtop.m_pfn) { 482 if (++misses <= 10) 483 (void) fprintf(stderr, 484 "pfn %ld not found for as=%p, va=%p\n", 485 vtop.m_pfn, (void *)vtop.m_as, vtop.m_va); 486 continue; 487 } 488 489 dmp[i].dm_as = vtop.m_as; 490 dmp[i].dm_va = (uintptr_t)vtop.m_va; 491 dmp[i].dm_data = corehdr.dump_data + 492 ((uint64_t)middle << corehdr.dump_pageshift); 493 494 h = DUMP_HASH(&corehdr, dmp[i].dm_as, dmp[i].dm_va); 495 dmp[i].dm_next = dmp[h].dm_first; 496 dmp[h].dm_first = corehdr.dump_map + i * sizeof (dump_map_t); 497 } 498 499 Pwrite(corefd, dmp, dump_mapsize, corehdr.dump_map); 500 free(dmp); 501 (void) fclose(in); 502 free(inbuf); 503 } 504 505 /* 506 * Copy whole sections of the dump device to the file. 507 */ 508 static void 509 Copy(offset_t dumpoff, len_t nb, offset_t *offp, int fd, char *buf, 510 size_t sz) 511 { 512 size_t nr; 513 offset_t off = *offp; 514 515 while (nb > 0) { 516 nr = sz < nb ? sz : (size_t)nb; 517 Pread(dumpfd, buf, nr, dumpoff); 518 Pwrite(fd, buf, nr, off); 519 off += nr; 520 dumpoff += nr; 521 nb -= nr; 522 } 523 *offp = off; 524 } 525 526 /* 527 * Copy pages when the dump data header is missing. 528 * This supports older kernels with latest savecore. 529 */ 530 static void 531 CopyPages(offset_t *offp, int fd, char *buf, size_t sz) 532 { 533 uint32_t csize; 534 FILE *in = fdopen(dup(dumpfd), "rb"); 535 FILE *out = fdopen(dup(fd), "wb"); 536 char *cbuf = Zalloc(pagesize); 537 char *outbuf = Zalloc(FBUFSIZE); 538 pgcnt_t np = dumphdr.dump_npages; 539 540 (void) setvbuf(out, outbuf, _IOFBF, FBUFSIZE); 541 (void) setvbuf(in, buf, _IOFBF, sz); 542 Fseek(dumphdr.dump_data, in); 543 544 Fseek(*offp, out); 545 while (np > 0) { 546 Fread(&csize, sizeof (uint32_t), in); 547 Fwrite(&csize, sizeof (uint32_t), out); 548 *offp += sizeof (uint32_t); 549 if (csize > pagesize || csize == 0) { 550 logprint(SC_SL_ERR, 551 "CopyPages: page %lu csize %d (0x%x) pagesize %d", 552 dumphdr.dump_npages - np, csize, csize, 553 pagesize); 554 break; 555 } 556 Fread(cbuf, csize, in); 557 Fwrite(cbuf, csize, out); 558 *offp += csize; 559 np--; 560 } 561 (void) fclose(in); 562 (void) fclose(out); 563 free(outbuf); 564 free(buf); 565 } 566 567 /* 568 * Concatenate dump contents into a new file. 569 * Update corehdr with new offsets. 570 */ 571 static void 572 copy_crashfile(const char *corefile) 573 { 574 int corefd = Open(corefile, O_WRONLY | O_CREAT | O_TRUNC, 0644); 575 size_t bufsz = FBUFSIZE; 576 char *inbuf = Zalloc(bufsz); 577 offset_t coreoff; 578 size_t nb; 579 580 logprint(SC_SL_ERR | SC_IF_VERBOSE, 581 "Copying %s to %s/%s\n", dumpfile, savedir, corefile); 582 583 /* 584 * This dump file is still compressed 585 */ 586 corehdr.dump_flags |= DF_COMPRESSED | DF_VALID; 587 588 /* 589 * Leave room for corehdr, it is updated and written last 590 */ 591 corehdr.dump_start = 0; 592 coreoff = sizeof (corehdr); 593 594 /* 595 * Read in the compressed symbol table, copy it to corefile. 596 */ 597 coreoff = roundup(coreoff, pagesize); 598 corehdr.dump_ksyms = coreoff; 599 Copy(dumphdr.dump_ksyms, dumphdr.dump_ksyms_csize, &coreoff, corefd, 600 inbuf, bufsz); 601 602 /* 603 * Save the pfn table. 604 */ 605 coreoff = roundup(coreoff, pagesize); 606 corehdr.dump_pfn = coreoff; 607 Copy(dumphdr.dump_pfn, dumphdr.dump_npages * sizeof (pfn_t), &coreoff, 608 corefd, inbuf, bufsz); 609 610 /* 611 * Save the dump map. 612 */ 613 coreoff = roundup(coreoff, pagesize); 614 corehdr.dump_map = coreoff; 615 Copy(dumphdr.dump_map, dumphdr.dump_nvtop * sizeof (mem_vtop_t), 616 &coreoff, corefd, inbuf, bufsz); 617 618 /* 619 * Save the data pages. 620 */ 621 coreoff = roundup(coreoff, pagesize); 622 corehdr.dump_data = coreoff; 623 if (datahdr.dump_data_csize != 0) 624 Copy(dumphdr.dump_data, datahdr.dump_data_csize, &coreoff, 625 corefd, inbuf, bufsz); 626 else 627 CopyPages(&coreoff, corefd, inbuf, bufsz); 628 629 /* 630 * Now write the modified dump header to front and end of the copy. 631 * Make it look like a valid dump device. 632 * 633 * From dumphdr.h: Two headers are written out: one at the 634 * beginning of the dump, and the other at the very end of the 635 * dump device. The terminal header is at a known location 636 * (end of device) so we can always find it. 637 * 638 * Pad with zeros to each DUMP_OFFSET boundary. 639 */ 640 (void) memset(inbuf, 0, DUMP_OFFSET); 641 642 nb = DUMP_OFFSET - (coreoff & (DUMP_OFFSET - 1)); 643 if (nb > 0) { 644 Pwrite(corefd, inbuf, nb, coreoff); 645 coreoff += nb; 646 } 647 648 Pwrite(corefd, &corehdr, sizeof (corehdr), coreoff); 649 coreoff += sizeof (corehdr); 650 651 Pwrite(corefd, &datahdr, sizeof (datahdr), coreoff); 652 coreoff += sizeof (datahdr); 653 654 nb = DUMP_OFFSET - (coreoff & (DUMP_OFFSET - 1)); 655 if (nb > 0) { 656 Pwrite(corefd, inbuf, nb, coreoff); 657 } 658 659 free(inbuf); 660 Pwrite(corefd, &corehdr, sizeof (corehdr), corehdr.dump_start); 661 662 /* 663 * Write out the modified dump header to the dump device. 664 * The dump device has been processed, so DF_VALID is clear. 665 */ 666 if (!filemode) 667 Pwrite(dumpfd, &dumphdr, sizeof (dumphdr), endoff); 668 669 (void) close(corefd); 670 } 671 672 /* 673 * compressed streams 674 */ 675 typedef struct blockhdr blockhdr_t; 676 typedef struct block block_t; 677 678 struct blockhdr { 679 block_t *head; 680 block_t *tail; 681 }; 682 683 struct block { 684 block_t *next; 685 char *block; 686 int size; 687 }; 688 689 typedef enum streamstate { 690 STREAMSTART, 691 STREAMPAGES 692 } streamstate_t; 693 694 typedef struct stream { 695 streamstate_t state; 696 int init; 697 int tag; 698 int bound; 699 int nout; 700 char *blkbuf; 701 blockhdr_t blocks; 702 pgcnt_t pagenum; 703 pgcnt_t curpage; 704 pgcnt_t npages; 705 pgcnt_t done; 706 dumpcsize_t sc; 707 dumpstreamhdr_t sh; 708 } stream_t; 709 710 static stream_t *streams; 711 static stream_t *endstreams; 712 713 const int cs = sizeof (dumpcsize_t); 714 715 typedef struct tinfo { 716 pthread_t tid; 717 int corefd; 718 } tinfo_t; 719 720 static int threads_stop; 721 static int threads_active; 722 static tinfo_t *tinfo; 723 static tinfo_t *endtinfo; 724 725 static pthread_mutex_t lock = PTHREAD_MUTEX_INITIALIZER; 726 static pthread_cond_t cvfree = PTHREAD_COND_INITIALIZER; 727 static pthread_cond_t cvwork = PTHREAD_COND_INITIALIZER; 728 static pthread_cond_t cvbarrier = PTHREAD_COND_INITIALIZER; 729 730 static blockhdr_t freeblocks; 731 732 static void 733 enqt(blockhdr_t *h, block_t *b) 734 { 735 b->next = NULL; 736 if (h->tail == NULL) 737 h->head = b; 738 else 739 h->tail->next = b; 740 h->tail = b; 741 } 742 743 static block_t * 744 deqh(blockhdr_t *h) 745 { 746 block_t *b = h->head; 747 748 if (b != NULL) { 749 h->head = b->next; 750 if (h->head == NULL) 751 h->tail = NULL; 752 } 753 return (b); 754 } 755 756 static void *runstreams(void *arg); 757 758 static void 759 initstreams(int corefd, int nstreams, int maxcsize) 760 { 761 int nthreads; 762 int nblocks; 763 int i; 764 block_t *b; 765 tinfo_t *t; 766 767 nthreads = sysconf(_SC_NPROCESSORS_ONLN); 768 if (nstreams < nthreads) 769 nthreads = nstreams; 770 if (nthreads < 1) 771 nthreads = 1; 772 nblocks = nthreads * 2; 773 774 tinfo = Zalloc(nthreads * sizeof (tinfo_t)); 775 endtinfo = &tinfo[nthreads]; 776 777 /* init streams */ 778 streams = Zalloc(nstreams * sizeof (stream_t)); 779 endstreams = &streams[nstreams]; 780 781 /* init stream block buffers */ 782 for (i = 0; i < nblocks; i++) { 783 b = Zalloc(sizeof (block_t)); 784 b->block = Zalloc(maxcsize); 785 enqt(&freeblocks, b); 786 } 787 788 /* init worker threads */ 789 (void) pthread_mutex_lock(&lock); 790 threads_active = 1; 791 threads_stop = 0; 792 for (t = tinfo; t != endtinfo; t++) { 793 t->corefd = dup(corefd); 794 if (t->corefd < 0) { 795 nthreads = t - tinfo; 796 endtinfo = t; 797 break; 798 } 799 if (pthread_create(&t->tid, NULL, runstreams, t) != 0) 800 logprint(SC_SL_ERR | SC_EXIT_ERR, "pthread_create: %s", 801 strerror(errno)); 802 } 803 (void) pthread_mutex_unlock(&lock); 804 } 805 806 static void 807 sbarrier() 808 { 809 stream_t *s; 810 811 (void) pthread_mutex_lock(&lock); 812 for (s = streams; s != endstreams; s++) { 813 while (s->bound || s->blocks.head != NULL) 814 (void) pthread_cond_wait(&cvbarrier, &lock); 815 } 816 (void) pthread_mutex_unlock(&lock); 817 } 818 819 static void 820 stopstreams() 821 { 822 tinfo_t *t; 823 824 if (threads_active) { 825 sbarrier(); 826 (void) pthread_mutex_lock(&lock); 827 threads_stop = 1; 828 (void) pthread_cond_signal(&cvwork); 829 (void) pthread_mutex_unlock(&lock); 830 for (t = tinfo; t != endtinfo; t++) 831 (void) pthread_join(t->tid, NULL); 832 free(tinfo); 833 tinfo = NULL; 834 threads_active = 0; 835 } 836 } 837 838 static block_t * 839 getfreeblock() 840 { 841 block_t *b; 842 843 (void) pthread_mutex_lock(&lock); 844 while ((b = deqh(&freeblocks)) == NULL) 845 (void) pthread_cond_wait(&cvfree, &lock); 846 (void) pthread_mutex_unlock(&lock); 847 return (b); 848 } 849 850 /* data page offset from page number */ 851 #define BTOP(b) ((b) >> dumphdr.dump_pageshift) 852 #define PTOB(p) ((p) << dumphdr.dump_pageshift) 853 #define DATAOFF(p) (corehdr.dump_data + PTOB(p)) 854 855 /* check for coreblksize boundary */ 856 static int 857 isblkbnd(pgcnt_t pgnum) 858 { 859 return (P2PHASE(DATAOFF(pgnum), coreblksize) == 0); 860 } 861 862 static int 863 iszpage(char *buf) 864 { 865 size_t sz; 866 uint64_t *pl; 867 868 /*LINTED:E_BAD_PTR_CAST_ALIGN*/ 869 pl = (uint64_t *)(buf); 870 for (sz = 0; sz < pagesize; sz += sizeof (*pl)) 871 if (*pl++ != 0) 872 return (0); 873 return (1); 874 } 875 876 volatile uint_t *hist; 877 878 /* write pages to the core file */ 879 static void 880 putpage(int corefd, char *buf, pgcnt_t pgnum, pgcnt_t np) 881 { 882 atomic_inc_uint(&hist[np]); 883 if (np > 0) 884 Pwrite(corefd, buf, PTOB(np), DATAOFF(pgnum)); 885 } 886 887 /* 888 * Process one lzjb block. 889 * No object (stream header or page) will be split over a block boundary. 890 */ 891 static void 892 lzjbblock(int corefd, stream_t *s, char *block, size_t blocksz) 893 { 894 int in = 0; 895 int csize; 896 int doflush; 897 char *out; 898 size_t dsize; 899 dumpcsize_t sc; 900 dumpstreamhdr_t sh; 901 902 if (!s->init) { 903 s->init = 1; 904 if (s->blkbuf == NULL) 905 s->blkbuf = Zalloc(coreblksize); 906 s->state = STREAMSTART; 907 } 908 while (in < blocksz) { 909 switch (s->state) { 910 case STREAMSTART: 911 (void) memcpy(&sh, block + in, sizeof (sh)); 912 in += sizeof (sh); 913 if (strcmp(DUMP_STREAM_MAGIC, sh.stream_magic) != 0) 914 logprint(SC_SL_ERR | SC_EXIT_ERR, 915 "LZJB STREAMSTART: bad stream header"); 916 if (sh.stream_npages > datahdr.dump_maxrange) 917 logprint(SC_SL_ERR | SC_EXIT_ERR, 918 "LZJB STREAMSTART: bad range: %d > %d", 919 sh.stream_npages, datahdr.dump_maxrange); 920 s->pagenum = sh.stream_pagenum; 921 s->npages = sh.stream_npages; 922 s->curpage = s->pagenum; 923 s->nout = 0; 924 s->done = 0; 925 s->state = STREAMPAGES; 926 break; 927 case STREAMPAGES: 928 (void) memcpy(&sc, block + in, cs); 929 in += cs; 930 csize = DUMP_GET_CSIZE(sc); 931 if (csize > pagesize) 932 logprint(SC_SL_ERR | SC_EXIT_ERR, 933 "LZJB STREAMPAGES: bad csize=%d", csize); 934 935 out = s->blkbuf + PTOB(s->nout); 936 dsize = decompress(block + in, out, csize, pagesize); 937 938 if (dsize != pagesize) 939 logprint(SC_SL_ERR | SC_EXIT_ERR, 940 "LZJB STREAMPAGES: dsize %d != pagesize %d", 941 dsize, pagesize); 942 943 in += csize; 944 atomic_inc_64(&saved); 945 946 doflush = 0; 947 if (s->nout == 0 && iszpage(out)) { 948 doflush = 1; 949 atomic_inc_64(&zpages); 950 } else if (++s->nout >= BTOP(coreblksize) || 951 isblkbnd(s->curpage + s->nout)) { 952 doflush = 1; 953 } 954 if (++s->done >= s->npages) { 955 s->state = STREAMSTART; 956 doflush = 1; 957 } 958 if (doflush) { 959 putpage(corefd, s->blkbuf, s->curpage, s->nout); 960 s->nout = 0; 961 s->curpage = s->pagenum + s->done; 962 } 963 break; 964 } 965 } 966 } 967 968 /* report progress */ 969 static void 970 report_progress() 971 { 972 int sec, percent; 973 974 if (!interactive) 975 return; 976 977 percent = saved * 100LL / corehdr.dump_npages; 978 sec = (gethrtime() - startts) / NANOSEC; 979 if (percent > percent_done || sec > sec_done) { 980 (void) printf("\r%2d:%02d %3d%% done", sec / 60, sec % 60, 981 percent); 982 (void) fflush(stdout); 983 sec_done = sec; 984 percent_done = percent; 985 } 986 } 987 988 /* thread body */ 989 static void * 990 runstreams(void *arg) 991 { 992 tinfo_t *t = arg; 993 stream_t *s; 994 block_t *b; 995 int bound; 996 997 (void) pthread_mutex_lock(&lock); 998 while (!threads_stop) { 999 bound = 0; 1000 for (s = streams; s != endstreams; s++) { 1001 if (s->bound || s->blocks.head == NULL) 1002 continue; 1003 s->bound = 1; 1004 bound = 1; 1005 (void) pthread_cond_signal(&cvwork); 1006 while (s->blocks.head != NULL) { 1007 b = deqh(&s->blocks); 1008 (void) pthread_mutex_unlock(&lock); 1009 1010 lzjbblock(t->corefd, s, b->block, 1011 b->size); 1012 1013 (void) pthread_mutex_lock(&lock); 1014 enqt(&freeblocks, b); 1015 (void) pthread_cond_signal(&cvfree); 1016 1017 report_progress(); 1018 } 1019 s->bound = 0; 1020 (void) pthread_cond_signal(&cvbarrier); 1021 } 1022 if (!bound && !threads_stop) 1023 (void) pthread_cond_wait(&cvwork, &lock); 1024 } 1025 (void) close(t->corefd); 1026 (void) pthread_cond_signal(&cvwork); 1027 (void) pthread_mutex_unlock(&lock); 1028 return (arg); 1029 } 1030 1031 /* 1032 * Process compressed pages. 1033 * 1034 * The old format, now called single-threaded lzjb, is a 32-bit size 1035 * word followed by 'size' bytes of lzjb compression data for one 1036 * page. The new format extends this by storing a 12-bit "tag" in the 1037 * upper bits of the size word. When the size word is pagesize or 1038 * less, it is assumed to be one lzjb page. When the size word is 1039 * greater than pagesize, it is assumed to be a "stream block", 1040 * belonging to up to 4095 streams. In practice, the number of streams 1041 * is set to one less than the number of CPUs running at crash 1042 * time. One CPU processes the crash dump, the remaining CPUs 1043 * separately process groups of data pages. 1044 * 1045 * savecore creates a thread per stream, but never more threads than 1046 * the number of CPUs running savecore. This is because savecore can 1047 * be processing a crash file from a remote machine, which may have 1048 * more CPUs. 1049 * 1050 * When the kernel uses parallel compression we expect a series of 128KB 1051 * blocks of compression data. In this case, each block has a "tag" in 1052 * the range 1-4095. Each block is handed off to the threads running 1053 * "runstreams". These threads, in turn, process the compression data 1054 * for groups of pages. Groups of pages are delimited by a "stream header", 1055 * which indicates a starting pfn and number of pages. When a stream block 1056 * has been read, the condition variable "cvwork" is signalled, which causes 1057 * one of the available threads to wake up and process the stream. 1058 * 1059 * In the parallel case there will be streams blocks encoding all data 1060 * pages. The stream of blocks is terminated by a zero size 1061 * word. There can be a few lzjb pages tacked on the end, depending on 1062 * the architecture. The sbarrier function ensures that all stream 1063 * blocks have been processed so that the page number for the few 1064 * single pages at the end can be known. 1065 */ 1066 static void 1067 decompress_pages(int corefd) 1068 { 1069 char *cpage = NULL; 1070 char *dpage = NULL; 1071 char *out; 1072 pgcnt_t curpage = 0; 1073 block_t *b; 1074 FILE *dumpf; 1075 FILE *tracef = NULL; 1076 stream_t *s; 1077 size_t dsize; 1078 size_t insz = FBUFSIZE; 1079 char *inbuf = Zalloc(insz); 1080 uint32_t csize; 1081 dumpcsize_t dcsize; 1082 int nstreams = datahdr.dump_nstreams; 1083 int maxcsize = datahdr.dump_maxcsize; 1084 int nout = 0, tag, doflush; 1085 1086 dumpf = fdopen(dup(dumpfd), "rb"); 1087 if (dumpf == NULL) 1088 logprint(SC_SL_ERR | SC_EXIT_ERR, "fdopen: %s", 1089 strerror(errno)); 1090 1091 (void) setvbuf(dumpf, inbuf, _IOFBF, insz); 1092 Fseek(dumphdr.dump_data, dumpf); 1093 1094 /*LINTED: E_CONSTANT_CONDITION*/ 1095 while (1) { 1096 1097 /* 1098 * The csize word delimits stream blocks. 1099 * See dumphdr.h for a description. 1100 */ 1101 Fread(&dcsize, sizeof (dcsize), dumpf); 1102 1103 tag = DUMP_GET_TAG(dcsize); 1104 csize = DUMP_GET_CSIZE(dcsize); 1105 1106 if (tag != 0) { /* a stream block */ 1107 1108 if (nstreams == 0) 1109 logprint(SC_SL_ERR | SC_EXIT_ERR, 1110 "starting data header is missing"); 1111 1112 if (tag > nstreams) 1113 logprint(SC_SL_ERR | SC_EXIT_ERR, 1114 "stream tag %d not in range 1..%d", 1115 tag, nstreams); 1116 1117 if (csize > maxcsize) 1118 logprint(SC_SL_ERR | SC_EXIT_ERR, 1119 "block size 0x%x > max csize 0x%x", 1120 csize, maxcsize); 1121 1122 if (streams == NULL) 1123 initstreams(corefd, nstreams, maxcsize); 1124 s = &streams[tag - 1]; 1125 s->tag = tag; 1126 1127 b = getfreeblock(); 1128 b->size = csize; 1129 Fread(b->block, csize, dumpf); 1130 1131 (void) pthread_mutex_lock(&lock); 1132 enqt(&s->blocks, b); 1133 if (!s->bound) 1134 (void) pthread_cond_signal(&cvwork); 1135 (void) pthread_mutex_unlock(&lock); 1136 1137 } else if (csize > 0) { /* one lzjb page */ 1138 1139 if (csize > pagesize) 1140 logprint(SC_SL_ERR | SC_EXIT_ERR, 1141 "csize 0x%x > pagesize 0x%x", 1142 csize, pagesize); 1143 1144 if (cpage == NULL) 1145 cpage = Zalloc(pagesize); 1146 if (dpage == NULL) { 1147 dpage = Zalloc(coreblksize); 1148 nout = 0; 1149 } 1150 1151 Fread(cpage, csize, dumpf); 1152 1153 out = dpage + PTOB(nout); 1154 dsize = decompress(cpage, out, csize, pagesize); 1155 1156 if (dsize != pagesize) 1157 logprint(SC_SL_ERR | SC_EXIT_ERR, 1158 "dsize 0x%x != pagesize 0x%x", 1159 dsize, pagesize); 1160 1161 /* 1162 * wait for streams to flush so that 'saved' is correct 1163 */ 1164 if (threads_active) 1165 sbarrier(); 1166 1167 doflush = 0; 1168 if (nout == 0) 1169 curpage = saved; 1170 1171 atomic_inc_64(&saved); 1172 1173 if (nout == 0 && iszpage(dpage)) { 1174 doflush = 1; 1175 atomic_inc_64(&zpages); 1176 } else if (++nout >= BTOP(coreblksize) || 1177 isblkbnd(curpage + nout) || 1178 saved >= dumphdr.dump_npages) { 1179 doflush = 1; 1180 } 1181 1182 if (doflush) { 1183 putpage(corefd, dpage, curpage, nout); 1184 nout = 0; 1185 } 1186 1187 report_progress(); 1188 1189 /* 1190 * Non-streams lzjb does not use blocks. Stop 1191 * here if all the pages have been decompressed. 1192 */ 1193 if (saved >= dumphdr.dump_npages) 1194 break; 1195 1196 } else { 1197 break; /* end of data */ 1198 } 1199 } 1200 1201 stopstreams(); 1202 if (tracef != NULL) 1203 (void) fclose(tracef); 1204 (void) fclose(dumpf); 1205 if (inbuf) 1206 free(inbuf); 1207 if (cpage) 1208 free(cpage); 1209 if (dpage) 1210 free(dpage); 1211 if (streams) 1212 free(streams); 1213 } 1214 1215 static void 1216 build_corefile(const char *namelist, const char *corefile) 1217 { 1218 size_t pfn_table_size = dumphdr.dump_npages * sizeof (pfn_t); 1219 size_t ksyms_size = dumphdr.dump_ksyms_size; 1220 size_t ksyms_csize = dumphdr.dump_ksyms_csize; 1221 pfn_t *pfn_table; 1222 char *ksyms_base = Zalloc(ksyms_size); 1223 char *ksyms_cbase = Zalloc(ksyms_csize); 1224 size_t ksyms_dsize; 1225 Stat_t st; 1226 int corefd = Open(corefile, O_WRONLY | O_CREAT | O_TRUNC, 0644); 1227 int namefd = Open(namelist, O_WRONLY | O_CREAT | O_TRUNC, 0644); 1228 1229 (void) printf("Constructing namelist %s/%s\n", savedir, namelist); 1230 1231 /* 1232 * Determine the optimum write size for the core file 1233 */ 1234 Fstat(corefd, &st, corefile); 1235 1236 if (verbose > 1) 1237 (void) printf("%s: %ld block size\n", corefile, 1238 (long)st.st_blksize); 1239 coreblksize = st.st_blksize; 1240 if (coreblksize < MINCOREBLKSIZE || !ISP2(coreblksize)) 1241 coreblksize = MINCOREBLKSIZE; 1242 1243 hist = Zalloc((sizeof (uint64_t) * BTOP(coreblksize)) + 1); 1244 1245 /* 1246 * This dump file is now uncompressed 1247 */ 1248 corehdr.dump_flags &= ~DF_COMPRESSED; 1249 1250 /* 1251 * Read in the compressed symbol table, copy it to corefile, 1252 * decompress it, and write the result to namelist. 1253 */ 1254 corehdr.dump_ksyms = pagesize; 1255 Pread(dumpfd, ksyms_cbase, ksyms_csize, dumphdr.dump_ksyms); 1256 Pwrite(corefd, ksyms_cbase, ksyms_csize, corehdr.dump_ksyms); 1257 1258 ksyms_dsize = decompress(ksyms_cbase, ksyms_base, ksyms_csize, 1259 ksyms_size); 1260 if (ksyms_dsize != ksyms_size) 1261 logprint(SC_SL_WARN, 1262 "bad data in symbol table, %lu of %lu bytes saved", 1263 ksyms_dsize, ksyms_size); 1264 1265 Pwrite(namefd, ksyms_base, ksyms_size, 0); 1266 (void) close(namefd); 1267 free(ksyms_cbase); 1268 free(ksyms_base); 1269 1270 (void) printf("Constructing corefile %s/%s\n", savedir, corefile); 1271 1272 /* 1273 * Read in and write out the pfn table. 1274 */ 1275 pfn_table = Zalloc(pfn_table_size); 1276 corehdr.dump_pfn = corehdr.dump_ksyms + roundup(ksyms_size, pagesize); 1277 Pread(dumpfd, pfn_table, pfn_table_size, dumphdr.dump_pfn); 1278 Pwrite(corefd, pfn_table, pfn_table_size, corehdr.dump_pfn); 1279 1280 /* 1281 * Convert the raw translation data into a hashed dump map. 1282 */ 1283 corehdr.dump_map = corehdr.dump_pfn + roundup(pfn_table_size, pagesize); 1284 build_dump_map(corefd, pfn_table); 1285 free(pfn_table); 1286 1287 /* 1288 * Decompress the pages 1289 */ 1290 decompress_pages(corefd); 1291 (void) printf(": %ld of %ld pages saved\n", (pgcnt_t)saved, 1292 dumphdr.dump_npages); 1293 1294 if (verbose) 1295 (void) printf("%ld (%ld%%) zero pages were not written\n", 1296 (pgcnt_t)zpages, (pgcnt_t)zpages * 100 / 1297 dumphdr.dump_npages); 1298 1299 if (saved != dumphdr.dump_npages) 1300 logprint(SC_SL_WARN, "bad data after page %ld", saved); 1301 1302 /* 1303 * Write out the modified dump headers. 1304 */ 1305 Pwrite(corefd, &corehdr, sizeof (corehdr), 0); 1306 if (!filemode) 1307 Pwrite(dumpfd, &dumphdr, sizeof (dumphdr), endoff); 1308 1309 (void) close(corefd); 1310 } 1311 1312 /* 1313 * When the system panics, the kernel saves all undelivered messages (messages 1314 * that never made it out to syslogd(1M)) in the dump. At a mimimum, the 1315 * panic message itself will always fall into this category. Upon reboot, 1316 * the syslog startup script runs savecore -m to recover these messages. 1317 * 1318 * To do this, we read the unsent messages from the dump and send them to 1319 * /dev/conslog on priority band 1. This has the effect of prepending them 1320 * to any already-accumulated messages in the console backlog, thus preserving 1321 * temporal ordering across the reboot. 1322 * 1323 * Note: since savecore -m is used *only* for this purpose, it does *not* 1324 * attempt to save the crash dump. The dump will be saved later, after 1325 * syslogd(1M) starts, by the savecore startup script. 1326 */ 1327 static int 1328 message_save(void) 1329 { 1330 offset_t dumpoff = -(DUMP_OFFSET + DUMP_LOGSIZE); 1331 offset_t ldoff; 1332 log_dump_t ld; 1333 log_ctl_t lc; 1334 struct strbuf ctl, dat; 1335 int logfd; 1336 1337 logfd = Open("/dev/conslog", O_WRONLY, 0644); 1338 dumpfd = Open(dumpfile, O_RDWR | O_DSYNC, 0644); 1339 dumpoff = llseek(dumpfd, dumpoff, SEEK_END) & -DUMP_OFFSET; 1340 1341 ctl.buf = (void *)&lc; 1342 ctl.len = sizeof (log_ctl_t); 1343 1344 dat.buf = Zalloc(DUMP_LOGSIZE); 1345 1346 for (;;) { 1347 ldoff = dumpoff; 1348 1349 Pread(dumpfd, &ld, sizeof (log_dump_t), dumpoff); 1350 dumpoff += sizeof (log_dump_t); 1351 dat.len = ld.ld_msgsize; 1352 1353 if (ld.ld_magic == 0) 1354 break; 1355 1356 if (ld.ld_magic != LOG_MAGIC) 1357 logprint(SC_SL_ERR | SC_IF_VERBOSE | SC_EXIT_ERR, 1358 "bad magic %x", ld.ld_magic); 1359 1360 if (dat.len >= DUMP_LOGSIZE) 1361 logprint(SC_SL_ERR | SC_IF_VERBOSE | SC_EXIT_ERR, 1362 "bad size %d", ld.ld_msgsize); 1363 1364 Pread(dumpfd, ctl.buf, ctl.len, dumpoff); 1365 dumpoff += ctl.len; 1366 1367 if (ld.ld_csum != checksum32(ctl.buf, ctl.len)) 1368 logprint(SC_SL_ERR | SC_IF_VERBOSE | SC_EXIT_OK, 1369 "bad log_ctl checksum"); 1370 1371 lc.flags |= SL_LOGONLY; 1372 1373 Pread(dumpfd, dat.buf, dat.len, dumpoff); 1374 dumpoff += dat.len; 1375 1376 if (ld.ld_msum != checksum32(dat.buf, dat.len)) 1377 logprint(SC_SL_ERR | SC_IF_VERBOSE | SC_EXIT_OK, 1378 "bad message checksum"); 1379 1380 if (putpmsg(logfd, &ctl, &dat, 1, MSG_BAND) == -1) 1381 logprint(SC_SL_ERR | SC_EXIT_ERR, "putpmsg: %s", 1382 strerror(errno)); 1383 1384 ld.ld_magic = 0; /* clear magic so we never save twice */ 1385 Pwrite(dumpfd, &ld, sizeof (log_dump_t), ldoff); 1386 } 1387 return (0); 1388 } 1389 1390 static long 1391 getbounds(const char *f) 1392 { 1393 long b = -1; 1394 const char *p = strrchr(f, '/'); 1395 1396 if (p == NULL || strncmp(p, "vmdump", 6) != 0) 1397 p = strstr(f, "vmdump"); 1398 1399 if (p != NULL && *p == '/') 1400 p++; 1401 1402 (void) sscanf(p ? p : f, "vmdump.%ld", &b); 1403 1404 return (b); 1405 } 1406 1407 static void 1408 stack_retrieve(char *stack) 1409 { 1410 summary_dump_t sd; 1411 offset_t dumpoff = -(DUMP_OFFSET + DUMP_LOGSIZE + 1412 DUMP_ERPTSIZE); 1413 dumpoff -= DUMP_SUMMARYSIZE; 1414 1415 dumpfd = Open(dumpfile, O_RDWR | O_DSYNC, 0644); 1416 dumpoff = llseek(dumpfd, dumpoff, SEEK_END) & -DUMP_OFFSET; 1417 1418 Pread(dumpfd, &sd, sizeof (summary_dump_t), dumpoff); 1419 dumpoff += sizeof (summary_dump_t); 1420 1421 if (sd.sd_magic == 0) { 1422 *stack = '\0'; 1423 return; 1424 } 1425 1426 if (sd.sd_magic != SUMMARY_MAGIC) { 1427 *stack = '\0'; 1428 logprint(SC_SL_NONE | SC_IF_VERBOSE, 1429 "bad summary magic %x", sd.sd_magic); 1430 return; 1431 } 1432 Pread(dumpfd, stack, STACK_BUF_SIZE, dumpoff); 1433 if (sd.sd_ssum != checksum32(stack, STACK_BUF_SIZE)) 1434 logprint(SC_SL_NONE | SC_IF_VERBOSE, "bad stack checksum"); 1435 } 1436 1437 static void 1438 raise_event(enum sc_event_type evidx, char *warn_string) 1439 { 1440 uint32_t pl = sc_event[evidx].sce_payload; 1441 char panic_stack[STACK_BUF_SIZE]; 1442 nvlist_t *attr = NULL; 1443 char uuidbuf[36 + 1]; 1444 int err = 0; 1445 1446 if (nvlist_alloc(&attr, NV_UNIQUE_NAME, 0) != 0) 1447 goto publish; /* try to send payload-free event */ 1448 1449 if (pl & SC_PAYLOAD_SAVEDIR && savedir != NULL) 1450 err |= nvlist_add_string(attr, "dumpdir", savedir); 1451 1452 if (pl & SC_PAYLOAD_INSTANCE && bounds != -1) 1453 err |= nvlist_add_int64(attr, "instance", bounds); 1454 1455 if (pl & SC_PAYLOAD_ISCOMPRESSED) { 1456 err |= nvlist_add_boolean_value(attr, "compressed", 1457 csave ? B_TRUE : B_FALSE); 1458 } 1459 1460 if (pl & SC_PAYLOAD_DUMPADM_EN) { 1461 char *disabled = defread("DUMPADM_ENABLE=no"); 1462 1463 err |= nvlist_add_boolean_value(attr, "savecore-enabled", 1464 disabled ? B_FALSE : B_TRUE); 1465 } 1466 1467 if (pl & SC_PAYLOAD_IMAGEUUID) { 1468 (void) strncpy(uuidbuf, corehdr.dump_uuid, 36); 1469 uuidbuf[36] = '\0'; 1470 err |= nvlist_add_string(attr, "os-instance-uuid", uuidbuf); 1471 } 1472 1473 if (pl & SC_PAYLOAD_CRASHTIME) { 1474 err |= nvlist_add_int64(attr, "crashtime", 1475 (int64_t)corehdr.dump_crashtime); 1476 } 1477 1478 if (pl & SC_PAYLOAD_PANICSTR && corehdr.dump_panicstring[0] != '\0') { 1479 err |= nvlist_add_string(attr, "panicstr", 1480 corehdr.dump_panicstring); 1481 } 1482 1483 if (pl & SC_PAYLOAD_PANICSTACK) { 1484 stack_retrieve(panic_stack); 1485 1486 if (panic_stack[0] != '\0') { 1487 /* 1488 * The summary page may not be present if the dump 1489 * was previously recorded compressed. 1490 */ 1491 (void) nvlist_add_string(attr, "panicstack", 1492 panic_stack); 1493 } 1494 } 1495 1496 /* add warning string if this is an ireport for dump failure */ 1497 if (pl & SC_PAYLOAD_FAILREASON && warn_string != NULL) 1498 (void) nvlist_add_string(attr, "failure-reason", warn_string); 1499 1500 if (pl & SC_PAYLOAD_DUMPCOMPLETE) 1501 err |= nvlist_add_boolean_value(attr, "dump-incomplete", 1502 dump_incomplete ? B_TRUE : B_FALSE); 1503 1504 if (pl & SC_PAYLOAD_FM_PANIC) { 1505 err |= nvlist_add_boolean_value(attr, "fm-panic", 1506 fm_panic ? B_TRUE : B_FALSE); 1507 } 1508 1509 if (pl & SC_PAYLOAD_JUSTCHECKING) { 1510 err |= nvlist_add_boolean_value(attr, "will-attempt-savecore", 1511 cflag ? B_FALSE : B_TRUE); 1512 } 1513 1514 if (err) 1515 logprint(SC_SL_WARN, "Errors while constructing '%s' " 1516 "event payload; will try to publish anyway."); 1517 publish: 1518 if (fmev_rspublish_nvl(FMEV_RULESET_ON_SUNOS, 1519 "panic", sc_event[evidx].sce_subclass, FMEV_HIPRI, 1520 attr) != FMEV_SUCCESS) { 1521 logprint(SC_SL_ERR, "failed to publish '%s' event: %s", 1522 sc_event[evidx].sce_subclass, fmev_strerror(fmev_errno)); 1523 nvlist_free(attr); 1524 } 1525 1526 } 1527 1528 1529 int 1530 main(int argc, char *argv[]) 1531 { 1532 int i, c, bfd; 1533 Stat_t st; 1534 struct rlimit rl; 1535 long filebounds = -1; 1536 char namelist[30], corefile[30], boundstr[30]; 1537 dumpfile = NULL; 1538 1539 startts = gethrtime(); 1540 1541 (void) getrlimit(RLIMIT_NOFILE, &rl); 1542 rl.rlim_cur = rl.rlim_max; 1543 (void) setrlimit(RLIMIT_NOFILE, &rl); 1544 1545 openlog(progname, LOG_ODELAY, LOG_AUTH); 1546 1547 (void) defopen("/etc/dumpadm.conf"); 1548 savedir = defread("DUMPADM_SAVDIR="); 1549 if (savedir != NULL) 1550 savedir = strdup(savedir); 1551 1552 while ((c = getopt(argc, argv, "Lvcdmf:")) != EOF) { 1553 switch (c) { 1554 case 'L': 1555 livedump++; 1556 break; 1557 case 'v': 1558 verbose++; 1559 break; 1560 case 'c': 1561 cflag++; 1562 break; 1563 case 'd': 1564 disregard_valid_flag++; 1565 break; 1566 case 'm': 1567 mflag++; 1568 break; 1569 case 'f': 1570 dumpfile = optarg; 1571 filebounds = getbounds(dumpfile); 1572 break; 1573 case '?': 1574 usage(); 1575 } 1576 } 1577 1578 /* 1579 * If doing something other than extracting an existing dump (i.e. 1580 * dumpfile has been provided as an option), the user must be root. 1581 */ 1582 if (geteuid() != 0 && dumpfile == NULL) { 1583 (void) fprintf(stderr, "%s: %s %s\n", progname, 1584 gettext("you must be root to use"), progname); 1585 exit(1); 1586 } 1587 1588 interactive = isatty(STDOUT_FILENO); 1589 1590 if (cflag && livedump) 1591 usage(); 1592 1593 if (dumpfile == NULL || livedump) 1594 dumpfd = Open("/dev/dump", O_RDONLY, 0444); 1595 1596 if (dumpfile == NULL) { 1597 dumpfile = Zalloc(MAXPATHLEN); 1598 if (ioctl(dumpfd, DIOCGETDEV, dumpfile) == -1) { 1599 have_dumpfile = B_FALSE; 1600 logprint(SC_SL_NONE | SC_IF_ISATTY | SC_EXIT_ERR, 1601 "no dump device configured"); 1602 } 1603 } 1604 1605 if (mflag) 1606 return (message_save()); 1607 1608 if (optind == argc - 1) 1609 savedir = argv[optind]; 1610 1611 if (savedir == NULL || optind < argc - 1) 1612 usage(); 1613 1614 if (livedump && ioctl(dumpfd, DIOCDUMP, NULL) == -1) 1615 logprint(SC_SL_NONE | SC_EXIT_ERR, 1616 "dedicated dump device required"); 1617 1618 (void) close(dumpfd); 1619 dumpfd = -1; 1620 1621 Stat(dumpfile, &st); 1622 1623 filemode = S_ISREG(st.st_mode); 1624 1625 if (!filemode && defread("DUMPADM_CSAVE=off") == NULL) 1626 csave = 1; 1627 1628 read_dumphdr(); 1629 1630 /* 1631 * We want this message to go to the log file, but not the console. 1632 * There's no good way to do that with the existing syslog facility. 1633 * We could extend it to handle this, but there doesn't seem to be 1634 * a general need for it, so we isolate the complexity here instead. 1635 */ 1636 if (dumphdr.dump_panicstring[0] != '\0') { 1637 int logfd = Open("/dev/conslog", O_WRONLY, 0644); 1638 log_ctl_t lc; 1639 struct strbuf ctl, dat; 1640 char msg[DUMP_PANICSIZE + 100]; 1641 char fmt[] = "reboot after panic: %s"; 1642 uint32_t msgid; 1643 1644 STRLOG_MAKE_MSGID(fmt, msgid); 1645 1646 /* LINTED: E_SEC_SPRINTF_UNBOUNDED_COPY */ 1647 (void) sprintf(msg, "%s: [ID %u FACILITY_AND_PRIORITY] ", 1648 progname, msgid); 1649 /* LINTED: E_SEC_PRINTF_VAR_FMT */ 1650 (void) sprintf(msg + strlen(msg), fmt, 1651 dumphdr.dump_panicstring); 1652 1653 lc.pri = LOG_AUTH | LOG_ERR; 1654 lc.flags = SL_CONSOLE | SL_LOGONLY; 1655 lc.level = 0; 1656 1657 ctl.buf = (void *)&lc; 1658 ctl.len = sizeof (log_ctl_t); 1659 1660 dat.buf = (void *)msg; 1661 dat.len = strlen(msg) + 1; 1662 1663 (void) putmsg(logfd, &ctl, &dat, 0); 1664 (void) close(logfd); 1665 } 1666 1667 if ((dumphdr.dump_flags & DF_COMPLETE) == 0) { 1668 logprint(SC_SL_WARN, "incomplete dump on dump device"); 1669 dump_incomplete = B_TRUE; 1670 } 1671 1672 if (dumphdr.dump_fm_panic) 1673 fm_panic = B_TRUE; 1674 1675 /* 1676 * We have a valid dump on a dump device and know as much about 1677 * it as we're going to at this stage. Raise an event for 1678 * logging and so that FMA can open a case for this panic. 1679 * Avoid this step for FMA-initiated panics - FMA will replay 1680 * ereports off the dump device independently of savecore and 1681 * will make a diagnosis, so we don't want to open two cases 1682 * for the same event. Also avoid raising an event for a 1683 * livedump, or when we inflating a compressed dump. 1684 */ 1685 if (!fm_panic && !livedump && !filemode) 1686 raise_event(SC_EVENT_DUMP_PENDING, NULL); 1687 1688 logprint(SC_SL_WARN, "System dump time: %s", 1689 ctime(&dumphdr.dump_crashtime)); 1690 1691 /* 1692 * Option -c is designed for use from svc-dumpadm where we know 1693 * that dumpadm -n is in effect but run savecore -c just to 1694 * get the above dump_pending_on_device event raised. If it is run 1695 * interactively then just print further panic details. 1696 */ 1697 if (cflag) { 1698 char *disabled = defread("DUMPADM_ENABLE=no"); 1699 int lvl = interactive ? SC_SL_WARN : SC_SL_ERR; 1700 int ec = fm_panic ? SC_EXIT_FM : SC_EXIT_PEND; 1701 1702 logprint(lvl | ec, 1703 "Panic crashdump pending on dump device%s " 1704 "run savecore(1M) manually to extract. " 1705 "Image UUID %s%s.", 1706 disabled ? " but dumpadm -n in effect;" : ";", 1707 corehdr.dump_uuid, 1708 fm_panic ? "(fault-management initiated)" : ""); 1709 /*NOTREACHED*/ 1710 } 1711 1712 if (chdir(savedir) == -1) 1713 logprint(SC_SL_ERR | SC_EXIT_ERR, "chdir(\"%s\"): %s", 1714 savedir, strerror(errno)); 1715 1716 check_space(csave); 1717 1718 if (filebounds < 0) 1719 bounds = read_number_from_file("bounds", 0); 1720 else 1721 bounds = filebounds; 1722 1723 if (csave) { 1724 size_t metrics_size = datahdr.dump_metrics; 1725 1726 (void) sprintf(corefile, "vmdump.%ld", bounds); 1727 1728 datahdr.dump_metrics = 0; 1729 1730 logprint(SC_SL_ERR, 1731 "Saving compressed system crash dump in %s/%s", 1732 savedir, corefile); 1733 1734 copy_crashfile(corefile); 1735 1736 /* 1737 * Raise a fault management event that indicates the system 1738 * has panicked. We know a reasonable amount about the 1739 * condition at this time, but the dump is still compressed. 1740 */ 1741 if (!livedump && !fm_panic) 1742 raise_event(SC_EVENT_DUMP_AVAILABLE, NULL); 1743 1744 if (metrics_size > 0) { 1745 int sec = (gethrtime() - startts) / 1000 / 1000 / 1000; 1746 FILE *mfile = fopen(METRICSFILE, "a"); 1747 char *metrics = Zalloc(metrics_size + 1); 1748 1749 Pread(dumpfd, metrics, metrics_size, endoff + 1750 sizeof (dumphdr) + sizeof (datahdr)); 1751 1752 if (sec < 1) 1753 sec = 1; 1754 1755 if (mfile == NULL) { 1756 logprint(SC_SL_WARN, 1757 "Can't create %s:\n%s", 1758 METRICSFILE, metrics); 1759 } else { 1760 (void) fprintf(mfile, "[[[[,,,"); 1761 for (i = 0; i < argc; i++) 1762 (void) fprintf(mfile, "%s ", argv[i]); 1763 (void) fprintf(mfile, "\n"); 1764 (void) fprintf(mfile, ",,,%s %s %s %s %s\n", 1765 dumphdr.dump_utsname.sysname, 1766 dumphdr.dump_utsname.nodename, 1767 dumphdr.dump_utsname.release, 1768 dumphdr.dump_utsname.version, 1769 dumphdr.dump_utsname.machine); 1770 (void) fprintf(mfile, ",,,%s dump time %s\n", 1771 dumphdr.dump_flags & DF_LIVE ? "Live" : 1772 "Crash", ctime(&dumphdr.dump_crashtime)); 1773 (void) fprintf(mfile, ",,,%s/%s\n", savedir, 1774 corefile); 1775 (void) fprintf(mfile, "Metrics:\n%s\n", 1776 metrics); 1777 (void) fprintf(mfile, "Copy pages,%ld\n", 1778 dumphdr. dump_npages); 1779 (void) fprintf(mfile, "Copy time,%d\n", sec); 1780 (void) fprintf(mfile, "Copy pages/sec,%ld\n", 1781 dumphdr.dump_npages / sec); 1782 (void) fprintf(mfile, "]]]]\n"); 1783 (void) fclose(mfile); 1784 } 1785 free(metrics); 1786 } 1787 1788 logprint(SC_SL_ERR, 1789 "Decompress the crash dump with " 1790 "\n'savecore -vf %s/%s'", 1791 savedir, corefile); 1792 1793 } else { 1794 (void) sprintf(namelist, "unix.%ld", bounds); 1795 (void) sprintf(corefile, "vmcore.%ld", bounds); 1796 1797 if (interactive && filebounds >= 0 && access(corefile, F_OK) 1798 == 0) 1799 logprint(SC_SL_NONE | SC_EXIT_ERR, 1800 "%s already exists: remove with " 1801 "'rm -f %s/{unix,vmcore}.%ld'", 1802 corefile, savedir, bounds); 1803 1804 logprint(SC_SL_ERR, 1805 "saving system crash dump in %s/{unix,vmcore}.%ld", 1806 savedir, bounds); 1807 1808 build_corefile(namelist, corefile); 1809 1810 if (!livedump && !filemode && !fm_panic) 1811 raise_event(SC_EVENT_DUMP_AVAILABLE, NULL); 1812 1813 if (access(METRICSFILE, F_OK) == 0) { 1814 int sec = (gethrtime() - startts) / 1000 / 1000 / 1000; 1815 FILE *mfile = fopen(METRICSFILE, "a"); 1816 1817 if (sec < 1) 1818 sec = 1; 1819 1820 if (mfile == NULL) { 1821 logprint(SC_SL_WARN, 1822 "Can't create %s: %s", 1823 METRICSFILE, strerror(errno)); 1824 } else { 1825 (void) fprintf(mfile, "[[[[,,,"); 1826 for (i = 0; i < argc; i++) 1827 (void) fprintf(mfile, "%s ", argv[i]); 1828 (void) fprintf(mfile, "\n"); 1829 (void) fprintf(mfile, ",,,%s/%s\n", savedir, 1830 corefile); 1831 (void) fprintf(mfile, ",,,%s %s %s %s %s\n", 1832 dumphdr.dump_utsname.sysname, 1833 dumphdr.dump_utsname.nodename, 1834 dumphdr.dump_utsname.release, 1835 dumphdr.dump_utsname.version, 1836 dumphdr.dump_utsname.machine); 1837 (void) fprintf(mfile, 1838 "Uncompress pages,%"PRIu64"\n", saved); 1839 (void) fprintf(mfile, "Uncompress time,%d\n", 1840 sec); 1841 (void) fprintf(mfile, "Uncompress pages/sec,%" 1842 PRIu64"\n", saved / sec); 1843 (void) fprintf(mfile, "]]]]\n"); 1844 (void) fclose(mfile); 1845 } 1846 } 1847 } 1848 1849 if (filebounds < 0) { 1850 (void) sprintf(boundstr, "%ld\n", bounds + 1); 1851 bfd = Open("bounds", O_WRONLY | O_CREAT | O_TRUNC, 0644); 1852 Pwrite(bfd, boundstr, strlen(boundstr), 0); 1853 (void) close(bfd); 1854 } 1855 1856 if (verbose) { 1857 int sec = (gethrtime() - startts) / 1000 / 1000 / 1000; 1858 1859 (void) printf("%d:%02d dump %s is done\n", 1860 sec / 60, sec % 60, 1861 csave ? "copy" : "decompress"); 1862 } 1863 1864 if (verbose > 1 && hist != NULL) { 1865 int i, nw; 1866 1867 for (i = 1, nw = 0; i <= BTOP(coreblksize); ++i) 1868 nw += hist[i] * i; 1869 (void) printf("pages count %%\n"); 1870 for (i = 0; i <= BTOP(coreblksize); ++i) { 1871 if (hist[i] == 0) 1872 continue; 1873 (void) printf("%3d %5u %6.2f\n", 1874 i, hist[i], 100.0 * hist[i] * i / nw); 1875 } 1876 } 1877 1878 (void) close(dumpfd); 1879 dumpfd = -1; 1880 1881 return (0); 1882 }