1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (c) 1983, 2010, Oracle and/or its affiliates. All rights reserved.
23 * Copyright 2016 Joyent, Inc.
24 */
25 /*
26 * Copyright 2018 Nexenta Systems, Inc. All rights reserved.
27 */
28
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <stdarg.h>
32 #include <unistd.h>
33 #include <fcntl.h>
34 #include <errno.h>
35 #include <string.h>
36 #include <deflt.h>
37 #include <time.h>
38 #include <syslog.h>
39 #include <stropts.h>
40 #include <pthread.h>
41 #include <limits.h>
42 #include <atomic.h>
43 #include <libnvpair.h>
44 #include <libintl.h>
45 #include <sys/mem.h>
46 #include <sys/statvfs.h>
47 #include <sys/dumphdr.h>
48 #include <sys/dumpadm.h>
49 #include <sys/compress.h>
50 #include <sys/panic.h>
51 #include <sys/sysmacros.h>
52 #include <sys/stat.h>
53 #include <sys/resource.h>
54 #include <sys/fm/util.h>
55 #include <fm/libfmevent.h>
56 #include <sys/int_fmtio.h>
57
58
59 /* fread/fwrite buffer size */
60 #define FBUFSIZE (1ULL << 20)
61
62 /* minimum size for output buffering */
63 #define MINCOREBLKSIZE (1ULL << 17)
64
65 /* create this file if metrics collection is enabled in the kernel */
66 #define METRICSFILE "METRICS.csv"
67
68 static char progname[9] = "savecore";
69 static char *savedir; /* savecore directory */
70 static char *dumpfile; /* source of raw crash dump */
71 static long bounds = -1; /* numeric suffix */
72 static long pagesize; /* dump pagesize */
73 static int dumpfd = -1; /* dumpfile descriptor */
74 static boolean_t have_dumpfile = B_TRUE; /* dumpfile existence */
75 static dumphdr_t corehdr, dumphdr; /* initial and terminal dumphdrs */
76 static boolean_t dump_incomplete; /* dumphdr indicates incomplete */
77 static boolean_t fm_panic; /* dump is the result of fm_panic */
78 static offset_t endoff; /* offset of end-of-dump header */
79 static int verbose; /* chatty mode */
80 static int disregard_valid_flag; /* disregard valid flag */
81 static int livedump; /* dump the current running system */
82 static int interactive; /* user invoked; no syslog */
83 static int csave; /* save dump compressed */
84 static int filemode; /* processing file, not dump device */
85 static int percent_done; /* progress indicator */
86 static int sec_done; /* progress last report time */
87 static hrtime_t startts; /* timestamp at start */
88 static volatile uint64_t saved; /* count of pages written */
89 static volatile uint64_t zpages; /* count of zero pages not written */
90 static dumpdatahdr_t datahdr; /* compression info */
91 static long coreblksize; /* preferred write size (st_blksize) */
92 static int cflag; /* run as savecore -c */
93 static int mflag; /* run as savecore -m */
94
95 /*
96 * Payload information for the events we raise. These are used
97 * in raise_event to determine what payload to include.
98 */
99 #define SC_PAYLOAD_SAVEDIR 0x0001 /* Include savedir in event */
100 #define SC_PAYLOAD_INSTANCE 0x0002 /* Include bounds instance number */
101 #define SC_PAYLOAD_IMAGEUUID 0x0004 /* Include dump OS instance uuid */
102 #define SC_PAYLOAD_CRASHTIME 0x0008 /* Include epoch crashtime */
103 #define SC_PAYLOAD_PANICSTR 0x0010 /* Include panic string */
104 #define SC_PAYLOAD_PANICSTACK 0x0020 /* Include panic string */
105 #define SC_PAYLOAD_FAILREASON 0x0040 /* Include failure reason */
106 #define SC_PAYLOAD_DUMPCOMPLETE 0x0080 /* Include completeness indicator */
107 #define SC_PAYLOAD_ISCOMPRESSED 0x0100 /* Dump is in vmdump.N form */
108 #define SC_PAYLOAD_DUMPADM_EN 0x0200 /* Is dumpadm enabled or not? */
109 #define SC_PAYLOAD_FM_PANIC 0x0400 /* Panic initiated by FMA */
110 #define SC_PAYLOAD_JUSTCHECKING 0x0800 /* Run with -c flag? */
111
112 enum sc_event_type {
113 SC_EVENT_DUMP_PENDING,
114 SC_EVENT_SAVECORE_FAILURE,
115 SC_EVENT_DUMP_AVAILABLE
116 };
117
118 /*
119 * Common payload
120 */
121 #define _SC_PAYLOAD_CMN \
122 SC_PAYLOAD_IMAGEUUID | \
123 SC_PAYLOAD_CRASHTIME | \
124 SC_PAYLOAD_PANICSTR | \
125 SC_PAYLOAD_PANICSTACK | \
126 SC_PAYLOAD_DUMPCOMPLETE | \
127 SC_PAYLOAD_FM_PANIC | \
128 SC_PAYLOAD_SAVEDIR
129
130 static const struct {
131 const char *sce_subclass;
132 uint32_t sce_payload;
133 } sc_event[] = {
134 /*
135 * SC_EVENT_DUMP_PENDING
136 */
137 {
138 "dump_pending_on_device",
139 _SC_PAYLOAD_CMN | SC_PAYLOAD_DUMPADM_EN |
140 SC_PAYLOAD_JUSTCHECKING
141 },
142
143 /*
144 * SC_EVENT_SAVECORE_FAILURE
145 */
146 {
147 "savecore_failure",
148 _SC_PAYLOAD_CMN | SC_PAYLOAD_INSTANCE | SC_PAYLOAD_FAILREASON
149 },
150
151 /*
152 * SC_EVENT_DUMP_AVAILABLE
153 */
154 {
155 "dump_available",
156 _SC_PAYLOAD_CMN | SC_PAYLOAD_INSTANCE | SC_PAYLOAD_ISCOMPRESSED
157 },
158 };
159
160 static void raise_event(enum sc_event_type, char *);
161
162 static void
163 usage(void)
164 {
165 (void) fprintf(stderr,
166 "usage: %s [-Lvd] [-f dumpfile] [dirname]\n", progname);
167 exit(1);
168 }
169
170 #define SC_SL_NONE 0x0001 /* no syslog */
171 #define SC_SL_ERR 0x0002 /* syslog if !interactive, LOG_ERR */
172 #define SC_SL_WARN 0x0004 /* syslog if !interactive, LOG_WARNING */
173 #define SC_IF_VERBOSE 0x0008 /* message only if -v */
174 #define SC_IF_ISATTY 0x0010 /* message only if interactive */
175 #define SC_EXIT_OK 0x0020 /* exit(0) */
176 #define SC_EXIT_ERR 0x0040 /* exit(1) */
177 #define SC_EXIT_PEND 0x0080 /* exit(2) */
178 #define SC_EXIT_FM 0x0100 /* exit(3) */
179
180 #define _SC_ALLEXIT (SC_EXIT_OK | SC_EXIT_ERR | SC_EXIT_PEND | SC_EXIT_FM)
181
182 static void
183 logprint(uint32_t flags, char *message, ...)
184 {
185 va_list args;
186 char buf[1024];
187 int do_always = ((flags & (SC_IF_VERBOSE | SC_IF_ISATTY)) == 0);
188 int do_ifverb = (flags & SC_IF_VERBOSE) && verbose;
189 int do_ifisatty = (flags & SC_IF_ISATTY) && interactive;
190 int code;
191 static int logprint_raised = 0;
192
193 if (do_always || do_ifverb || do_ifisatty) {
194 va_start(args, message);
195 /*LINTED: E_SEC_PRINTF_VAR_FMT*/
196 (void) vsnprintf(buf, sizeof (buf), message, args);
197 (void) fprintf(stderr, "%s: %s\n", progname, buf);
198 if (!interactive) {
199 switch (flags & (SC_SL_NONE | SC_SL_ERR | SC_SL_WARN)) {
200 case SC_SL_ERR:
201 /*LINTED: E_SEC_PRINTF_VAR_FMT*/
202 syslog(LOG_ERR, buf);
203 break;
204
205 case SC_SL_WARN:
206 /*LINTED: E_SEC_PRINTF_VAR_FMT*/
207 syslog(LOG_WARNING, buf);
208 break;
209
210 default:
211 break;
212 }
213 }
214 va_end(args);
215 }
216
217 switch (flags & _SC_ALLEXIT) {
218 case 0:
219 return;
220
221 case SC_EXIT_OK:
222 code = 0;
223 break;
224
225 case SC_EXIT_PEND:
226 /*
227 * Raise an ireport saying why we are exiting. Do not
228 * raise if run as savecore -m. If something in the
229 * raise_event codepath calls logprint avoid recursion.
230 */
231 if (!mflag && logprint_raised++ == 0)
232 raise_event(SC_EVENT_SAVECORE_FAILURE, buf);
233 code = 2;
234 break;
235
236 case SC_EXIT_FM:
237 code = 3;
238 break;
239
240 case SC_EXIT_ERR:
241 default:
242 if (!mflag && logprint_raised++ == 0 && have_dumpfile)
243 raise_event(SC_EVENT_SAVECORE_FAILURE, buf);
244 code = 1;
245 break;
246 }
247
248 exit(code);
249 }
250
251 /*
252 * System call / libc wrappers that exit on error.
253 */
254 static int
255 Open(const char *name, int oflags, mode_t mode)
256 {
257 int fd;
258
259 if ((fd = open64(name, oflags, mode)) == -1)
260 logprint(SC_SL_ERR | SC_EXIT_ERR, "open(\"%s\"): %s",
261 name, strerror(errno));
262 return (fd);
263 }
264
265 static void
266 Fread(void *buf, size_t size, FILE *f)
267 {
268 if (fread(buf, size, 1, f) != 1)
269 logprint(SC_SL_ERR | SC_EXIT_ERR, "fread: %s",
270 strerror(errno));
271 }
272
273 static void
274 Fwrite(void *buf, size_t size, FILE *f)
275 {
276 if (fwrite(buf, size, 1, f) != 1)
277 logprint(SC_SL_ERR | SC_EXIT_ERR, "fwrite: %s",
278 strerror(errno));
279 }
280
281 static void
282 Fseek(offset_t off, FILE *f)
283 {
284 if (fseeko64(f, off, SEEK_SET) != 0)
285 logprint(SC_SL_ERR | SC_EXIT_ERR, "fseeko64: %s",
286 strerror(errno));
287 }
288
289 typedef struct stat64 Stat_t;
290
291 static void
292 Fstat(int fd, Stat_t *sb, const char *fname)
293 {
294 if (fstat64(fd, sb) != 0)
295 logprint(SC_SL_ERR | SC_EXIT_ERR, "fstat(\"%s\"): %s", fname,
296 strerror(errno));
297 }
298
299 static void
300 Stat(const char *fname, Stat_t *sb)
301 {
302 if (stat64(fname, sb) != 0) {
303 have_dumpfile = B_FALSE;
304 logprint(SC_SL_ERR | SC_EXIT_ERR, "failed to get status "
305 "of file %s", fname);
306 }
307 }
308
309 static void
310 Pread(int fd, void *buf, size_t size, offset_t off)
311 {
312 ssize_t sz = pread64(fd, buf, size, off);
313
314 if (sz < 0)
315 logprint(SC_SL_ERR | SC_EXIT_ERR,
316 "pread: %s", strerror(errno));
317 else if (sz != size)
318 logprint(SC_SL_ERR | SC_EXIT_ERR,
319 "pread: size %ld != %ld", sz, size);
320 }
321
322 static void
323 Pwrite(int fd, void *buf, size_t size, off64_t off)
324 {
325 if (pwrite64(fd, buf, size, off) != size)
326 logprint(SC_SL_ERR | SC_EXIT_ERR, "pwrite: %s",
327 strerror(errno));
328 }
329
330 static void *
331 Zalloc(size_t size)
332 {
333 void *buf;
334
335 if ((buf = calloc(size, 1)) == NULL)
336 logprint(SC_SL_ERR | SC_EXIT_ERR, "calloc: %s",
337 strerror(errno));
338 return (buf);
339 }
340
341 static long
342 read_number_from_file(const char *filename, long default_value)
343 {
344 long file_value = -1;
345 FILE *fp;
346
347 if ((fp = fopen(filename, "r")) != NULL) {
348 (void) fscanf(fp, "%ld", &file_value);
349 (void) fclose(fp);
350 }
351 return (file_value < 0 ? default_value : file_value);
352 }
353
354 static void
355 read_dumphdr(void)
356 {
357 if (filemode)
358 dumpfd = Open(dumpfile, O_RDONLY, 0644);
359 else
360 dumpfd = Open(dumpfile, O_RDWR | O_DSYNC, 0644);
361 endoff = llseek(dumpfd, -DUMP_OFFSET, SEEK_END) & -DUMP_OFFSET;
362 Pread(dumpfd, &dumphdr, sizeof (dumphdr), endoff);
363 Pread(dumpfd, &datahdr, sizeof (datahdr), endoff + sizeof (dumphdr));
364
365 pagesize = dumphdr.dump_pagesize;
366
367 if (dumphdr.dump_magic != DUMP_MAGIC)
368 logprint(SC_SL_NONE | SC_EXIT_PEND, "bad magic number %x",
369 dumphdr.dump_magic);
370
371 if ((dumphdr.dump_flags & DF_VALID) == 0 && !disregard_valid_flag)
372 logprint(SC_SL_NONE | SC_IF_VERBOSE | SC_EXIT_OK,
373 "dump already processed");
374
375 if (dumphdr.dump_version != DUMP_VERSION)
376 logprint(SC_SL_NONE | SC_IF_VERBOSE | SC_EXIT_PEND,
377 "dump version (%d) != %s version (%d)",
378 dumphdr.dump_version, progname, DUMP_VERSION);
379
380 if (datahdr.dump_clevel > DUMP_CLEVEL_LZJB)
381 logprint(SC_SL_NONE | SC_EXIT_PEND,
382 "unsupported compression format (%d)", datahdr.dump_clevel);
383
384 if (dumphdr.dump_wordsize != DUMP_WORDSIZE)
385 logprint(SC_SL_NONE | SC_EXIT_PEND,
386 "dump is from %u-bit kernel - cannot save on %u-bit kernel",
387 dumphdr.dump_wordsize, DUMP_WORDSIZE);
388
389 if (datahdr.dump_datahdr_magic == DUMP_DATAHDR_MAGIC) {
390 if (datahdr.dump_datahdr_version != DUMP_DATAHDR_VERSION)
391 logprint(SC_SL_NONE | SC_IF_VERBOSE | SC_EXIT_PEND,
392 "dump data version (%d) != %s data version (%d)",
393 datahdr.dump_datahdr_version, progname,
394 DUMP_DATAHDR_VERSION);
395 } else {
396 (void) memset(&datahdr, 0, sizeof (datahdr));
397 datahdr.dump_maxcsize = pagesize;
398 }
399
400 /*
401 * Read the initial header, clear the valid bits, and compare headers.
402 * The main header may have been overwritten by swapping if we're
403 * using a swap partition as the dump device, in which case we bail.
404 */
405 Pread(dumpfd, &corehdr, sizeof (dumphdr_t), dumphdr.dump_start);
406
407 corehdr.dump_flags &= ~DF_VALID;
408 dumphdr.dump_flags &= ~DF_VALID;
409
410 if (memcmp(&corehdr, &dumphdr, sizeof (dumphdr_t)) != 0) {
411 /*
412 * Clear valid bit so we don't complain on every invocation.
413 */
414 if (!filemode)
415 Pwrite(dumpfd, &dumphdr, sizeof (dumphdr), endoff);
416 logprint(SC_SL_ERR | SC_EXIT_ERR,
417 "initial dump header corrupt");
418 }
419 }
420
421 static void
422 check_space(int csave)
423 {
424 struct statvfs fsb;
425 int64_t spacefree, dumpsize, minfree, datasize;
426
427 if (statvfs(".", &fsb) < 0)
428 logprint(SC_SL_ERR | SC_EXIT_ERR, "statvfs: %s",
429 strerror(errno));
430
431 dumpsize = dumphdr.dump_data - dumphdr.dump_start;
432 datasize = dumphdr.dump_npages * pagesize;
433 if (!csave)
434 dumpsize += datasize;
435 else
436 dumpsize += datahdr.dump_data_csize;
437
438 spacefree = (int64_t)fsb.f_bavail * fsb.f_frsize;
439 minfree = 1024LL * read_number_from_file("minfree", 1024);
440 if (spacefree < minfree + dumpsize) {
441 logprint(SC_SL_ERR | SC_EXIT_ERR,
442 "not enough space in %s (%lld MB avail, %lld MB needed)",
443 savedir, spacefree >> 20, (minfree + dumpsize) >> 20);
444 }
445 }
446
447 static void
448 build_dump_map(int corefd, const pfn_t *pfn_table)
449 {
450 long i;
451 static long misses = 0;
452 size_t dump_mapsize = (corehdr.dump_hashmask + 1) * sizeof (dump_map_t);
453 mem_vtop_t vtop;
454 dump_map_t *dmp = Zalloc(dump_mapsize);
455 char *inbuf = Zalloc(FBUFSIZE);
456 FILE *in = fdopen(dup(dumpfd), "rb");
457
458 (void) setvbuf(in, inbuf, _IOFBF, FBUFSIZE);
459 Fseek(dumphdr.dump_map, in);
460
461 corehdr.dump_data = corehdr.dump_map + roundup(dump_mapsize, pagesize);
462
463 for (i = 0; i < corehdr.dump_nvtop; i++) {
464 long first = 0;
465 long last = corehdr.dump_npages - 1;
466 long middle = 0;
467 pfn_t pfn = 0;
468 uintptr_t h;
469
470 Fread(&vtop, sizeof (mem_vtop_t), in);
471 while (last >= first) {
472 middle = (first + last) / 2;
473 pfn = pfn_table[middle];
474 if (pfn == vtop.m_pfn)
475 break;
476 if (pfn < vtop.m_pfn)
477 first = middle + 1;
478 else
479 last = middle - 1;
480 }
481 if (pfn != vtop.m_pfn) {
482 if (++misses <= 10)
483 (void) fprintf(stderr,
484 "pfn %ld not found for as=%p, va=%p\n",
485 vtop.m_pfn, (void *)vtop.m_as, vtop.m_va);
486 continue;
487 }
488
489 dmp[i].dm_as = vtop.m_as;
490 dmp[i].dm_va = (uintptr_t)vtop.m_va;
491 dmp[i].dm_data = corehdr.dump_data +
492 ((uint64_t)middle << corehdr.dump_pageshift);
493
494 h = DUMP_HASH(&corehdr, dmp[i].dm_as, dmp[i].dm_va);
495 dmp[i].dm_next = dmp[h].dm_first;
496 dmp[h].dm_first = corehdr.dump_map + i * sizeof (dump_map_t);
497 }
498
499 Pwrite(corefd, dmp, dump_mapsize, corehdr.dump_map);
500 free(dmp);
501 (void) fclose(in);
502 free(inbuf);
503 }
504
505 /*
506 * Copy whole sections of the dump device to the file.
507 */
508 static void
509 Copy(offset_t dumpoff, len_t nb, offset_t *offp, int fd, char *buf,
510 size_t sz)
511 {
512 size_t nr;
513 offset_t off = *offp;
514
515 while (nb > 0) {
516 nr = sz < nb ? sz : (size_t)nb;
517 Pread(dumpfd, buf, nr, dumpoff);
518 Pwrite(fd, buf, nr, off);
519 off += nr;
520 dumpoff += nr;
521 nb -= nr;
522 }
523 *offp = off;
524 }
525
526 /*
527 * Copy pages when the dump data header is missing.
528 * This supports older kernels with latest savecore.
529 */
530 static void
531 CopyPages(offset_t *offp, int fd, char *buf, size_t sz)
532 {
533 uint32_t csize;
534 FILE *in = fdopen(dup(dumpfd), "rb");
535 FILE *out = fdopen(dup(fd), "wb");
536 char *cbuf = Zalloc(pagesize);
537 char *outbuf = Zalloc(FBUFSIZE);
538 pgcnt_t np = dumphdr.dump_npages;
539
540 (void) setvbuf(out, outbuf, _IOFBF, FBUFSIZE);
541 (void) setvbuf(in, buf, _IOFBF, sz);
542 Fseek(dumphdr.dump_data, in);
543
544 Fseek(*offp, out);
545 while (np > 0) {
546 Fread(&csize, sizeof (uint32_t), in);
547 Fwrite(&csize, sizeof (uint32_t), out);
548 *offp += sizeof (uint32_t);
549 if (csize > pagesize || csize == 0) {
550 logprint(SC_SL_ERR,
551 "CopyPages: page %lu csize %d (0x%x) pagesize %d",
552 dumphdr.dump_npages - np, csize, csize,
553 pagesize);
554 break;
555 }
556 Fread(cbuf, csize, in);
557 Fwrite(cbuf, csize, out);
558 *offp += csize;
559 np--;
560 }
561 (void) fclose(in);
562 (void) fclose(out);
563 free(outbuf);
564 free(buf);
565 }
566
567 /*
568 * Concatenate dump contents into a new file.
569 * Update corehdr with new offsets.
570 */
571 static void
572 copy_crashfile(const char *corefile)
573 {
574 int corefd = Open(corefile, O_WRONLY | O_CREAT | O_TRUNC, 0644);
575 size_t bufsz = FBUFSIZE;
576 char *inbuf = Zalloc(bufsz);
577 offset_t coreoff;
578 size_t nb;
579
580 logprint(SC_SL_ERR | SC_IF_VERBOSE,
581 "Copying %s to %s/%s\n", dumpfile, savedir, corefile);
582
583 /*
584 * This dump file is still compressed
585 */
586 corehdr.dump_flags |= DF_COMPRESSED | DF_VALID;
587
588 /*
589 * Leave room for corehdr, it is updated and written last
590 */
591 corehdr.dump_start = 0;
592 coreoff = sizeof (corehdr);
593
594 /*
595 * Read in the compressed symbol table, copy it to corefile.
596 */
597 coreoff = roundup(coreoff, pagesize);
598 corehdr.dump_ksyms = coreoff;
599 Copy(dumphdr.dump_ksyms, dumphdr.dump_ksyms_csize, &coreoff, corefd,
600 inbuf, bufsz);
601
602 /*
603 * Save the pfn table.
604 */
605 coreoff = roundup(coreoff, pagesize);
606 corehdr.dump_pfn = coreoff;
607 Copy(dumphdr.dump_pfn, dumphdr.dump_npages * sizeof (pfn_t), &coreoff,
608 corefd, inbuf, bufsz);
609
610 /*
611 * Save the dump map.
612 */
613 coreoff = roundup(coreoff, pagesize);
614 corehdr.dump_map = coreoff;
615 Copy(dumphdr.dump_map, dumphdr.dump_nvtop * sizeof (mem_vtop_t),
616 &coreoff, corefd, inbuf, bufsz);
617
618 /*
619 * Save the data pages.
620 */
621 coreoff = roundup(coreoff, pagesize);
622 corehdr.dump_data = coreoff;
623 if (datahdr.dump_data_csize != 0)
624 Copy(dumphdr.dump_data, datahdr.dump_data_csize, &coreoff,
625 corefd, inbuf, bufsz);
626 else
627 CopyPages(&coreoff, corefd, inbuf, bufsz);
628
629 /*
630 * Now write the modified dump header to front and end of the copy.
631 * Make it look like a valid dump device.
632 *
633 * From dumphdr.h: Two headers are written out: one at the
634 * beginning of the dump, and the other at the very end of the
635 * dump device. The terminal header is at a known location
636 * (end of device) so we can always find it.
637 *
638 * Pad with zeros to each DUMP_OFFSET boundary.
639 */
640 (void) memset(inbuf, 0, DUMP_OFFSET);
641
642 nb = DUMP_OFFSET - (coreoff & (DUMP_OFFSET - 1));
643 if (nb > 0) {
644 Pwrite(corefd, inbuf, nb, coreoff);
645 coreoff += nb;
646 }
647
648 Pwrite(corefd, &corehdr, sizeof (corehdr), coreoff);
649 coreoff += sizeof (corehdr);
650
651 Pwrite(corefd, &datahdr, sizeof (datahdr), coreoff);
652 coreoff += sizeof (datahdr);
653
654 nb = DUMP_OFFSET - (coreoff & (DUMP_OFFSET - 1));
655 if (nb > 0) {
656 Pwrite(corefd, inbuf, nb, coreoff);
657 }
658
659 free(inbuf);
660 Pwrite(corefd, &corehdr, sizeof (corehdr), corehdr.dump_start);
661
662 /*
663 * Write out the modified dump header to the dump device.
664 * The dump device has been processed, so DF_VALID is clear.
665 */
666 if (!filemode)
667 Pwrite(dumpfd, &dumphdr, sizeof (dumphdr), endoff);
668
669 (void) close(corefd);
670 }
671
672 /*
673 * compressed streams
674 */
675 typedef struct blockhdr blockhdr_t;
676 typedef struct block block_t;
677
678 struct blockhdr {
679 block_t *head;
680 block_t *tail;
681 };
682
683 struct block {
684 block_t *next;
685 char *block;
686 int size;
687 };
688
689 typedef enum streamstate {
690 STREAMSTART,
691 STREAMPAGES
692 } streamstate_t;
693
694 typedef struct stream {
695 streamstate_t state;
696 int init;
697 int tag;
698 int bound;
699 int nout;
700 char *blkbuf;
701 blockhdr_t blocks;
702 pgcnt_t pagenum;
703 pgcnt_t curpage;
704 pgcnt_t npages;
705 pgcnt_t done;
706 dumpcsize_t sc;
707 dumpstreamhdr_t sh;
708 } stream_t;
709
710 static stream_t *streams;
711 static stream_t *endstreams;
712
713 const int cs = sizeof (dumpcsize_t);
714
715 typedef struct tinfo {
716 pthread_t tid;
717 int corefd;
718 } tinfo_t;
719
720 static int threads_stop;
721 static int threads_active;
722 static tinfo_t *tinfo;
723 static tinfo_t *endtinfo;
724
725 static pthread_mutex_t lock = PTHREAD_MUTEX_INITIALIZER;
726 static pthread_cond_t cvfree = PTHREAD_COND_INITIALIZER;
727 static pthread_cond_t cvwork = PTHREAD_COND_INITIALIZER;
728 static pthread_cond_t cvbarrier = PTHREAD_COND_INITIALIZER;
729
730 static blockhdr_t freeblocks;
731
732 static void
733 enqt(blockhdr_t *h, block_t *b)
734 {
735 b->next = NULL;
736 if (h->tail == NULL)
737 h->head = b;
738 else
739 h->tail->next = b;
740 h->tail = b;
741 }
742
743 static block_t *
744 deqh(blockhdr_t *h)
745 {
746 block_t *b = h->head;
747
748 if (b != NULL) {
749 h->head = b->next;
750 if (h->head == NULL)
751 h->tail = NULL;
752 }
753 return (b);
754 }
755
756 static void *runstreams(void *arg);
757
758 static void
759 initstreams(int corefd, int nstreams, int maxcsize)
760 {
761 int nthreads;
762 int nblocks;
763 int i;
764 block_t *b;
765 tinfo_t *t;
766
767 nthreads = sysconf(_SC_NPROCESSORS_ONLN);
768 if (nstreams < nthreads)
769 nthreads = nstreams;
770 if (nthreads < 1)
771 nthreads = 1;
772 nblocks = nthreads * 2;
773
774 tinfo = Zalloc(nthreads * sizeof (tinfo_t));
775 endtinfo = &tinfo[nthreads];
776
777 /* init streams */
778 streams = Zalloc(nstreams * sizeof (stream_t));
779 endstreams = &streams[nstreams];
780
781 /* init stream block buffers */
782 for (i = 0; i < nblocks; i++) {
783 b = Zalloc(sizeof (block_t));
784 b->block = Zalloc(maxcsize);
785 enqt(&freeblocks, b);
786 }
787
788 /* init worker threads */
789 (void) pthread_mutex_lock(&lock);
790 threads_active = 1;
791 threads_stop = 0;
792 for (t = tinfo; t != endtinfo; t++) {
793 t->corefd = dup(corefd);
794 if (t->corefd < 0) {
795 nthreads = t - tinfo;
796 endtinfo = t;
797 break;
798 }
799 if (pthread_create(&t->tid, NULL, runstreams, t) != 0)
800 logprint(SC_SL_ERR | SC_EXIT_ERR, "pthread_create: %s",
801 strerror(errno));
802 }
803 (void) pthread_mutex_unlock(&lock);
804 }
805
806 static void
807 sbarrier()
808 {
809 stream_t *s;
810
811 (void) pthread_mutex_lock(&lock);
812 for (s = streams; s != endstreams; s++) {
813 while (s->bound || s->blocks.head != NULL)
814 (void) pthread_cond_wait(&cvbarrier, &lock);
815 }
816 (void) pthread_mutex_unlock(&lock);
817 }
818
819 static void
820 stopstreams()
821 {
822 tinfo_t *t;
823
824 if (threads_active) {
825 sbarrier();
826 (void) pthread_mutex_lock(&lock);
827 threads_stop = 1;
828 (void) pthread_cond_signal(&cvwork);
829 (void) pthread_mutex_unlock(&lock);
830 for (t = tinfo; t != endtinfo; t++)
831 (void) pthread_join(t->tid, NULL);
832 free(tinfo);
833 tinfo = NULL;
834 threads_active = 0;
835 }
836 }
837
838 static block_t *
839 getfreeblock()
840 {
841 block_t *b;
842
843 (void) pthread_mutex_lock(&lock);
844 while ((b = deqh(&freeblocks)) == NULL)
845 (void) pthread_cond_wait(&cvfree, &lock);
846 (void) pthread_mutex_unlock(&lock);
847 return (b);
848 }
849
850 /* data page offset from page number */
851 #define BTOP(b) ((b) >> dumphdr.dump_pageshift)
852 #define PTOB(p) ((p) << dumphdr.dump_pageshift)
853 #define DATAOFF(p) (corehdr.dump_data + PTOB(p))
854
855 /* check for coreblksize boundary */
856 static int
857 isblkbnd(pgcnt_t pgnum)
858 {
859 return (P2PHASE(DATAOFF(pgnum), coreblksize) == 0);
860 }
861
862 static int
863 iszpage(char *buf)
864 {
865 size_t sz;
866 uint64_t *pl;
867
868 /*LINTED:E_BAD_PTR_CAST_ALIGN*/
869 pl = (uint64_t *)(buf);
870 for (sz = 0; sz < pagesize; sz += sizeof (*pl))
871 if (*pl++ != 0)
872 return (0);
873 return (1);
874 }
875
876 volatile uint_t *hist;
877
878 /* write pages to the core file */
879 static void
880 putpage(int corefd, char *buf, pgcnt_t pgnum, pgcnt_t np)
881 {
882 atomic_inc_uint(&hist[np]);
883 if (np > 0)
884 Pwrite(corefd, buf, PTOB(np), DATAOFF(pgnum));
885 }
886
887 /*
888 * Process one lzjb block.
889 * No object (stream header or page) will be split over a block boundary.
890 */
891 static void
892 lzjbblock(int corefd, stream_t *s, char *block, size_t blocksz)
893 {
894 int in = 0;
895 int csize;
896 int doflush;
897 char *out;
898 size_t dsize;
899 dumpcsize_t sc;
900 dumpstreamhdr_t sh;
901
902 if (!s->init) {
903 s->init = 1;
904 if (s->blkbuf == NULL)
905 s->blkbuf = Zalloc(coreblksize);
906 s->state = STREAMSTART;
907 }
908 while (in < blocksz) {
909 switch (s->state) {
910 case STREAMSTART:
911 (void) memcpy(&sh, block + in, sizeof (sh));
912 in += sizeof (sh);
913 if (strcmp(DUMP_STREAM_MAGIC, sh.stream_magic) != 0)
914 logprint(SC_SL_ERR | SC_EXIT_ERR,
915 "LZJB STREAMSTART: bad stream header");
916 if (sh.stream_npages > datahdr.dump_maxrange)
917 logprint(SC_SL_ERR | SC_EXIT_ERR,
918 "LZJB STREAMSTART: bad range: %d > %d",
919 sh.stream_npages, datahdr.dump_maxrange);
920 s->pagenum = sh.stream_pagenum;
921 s->npages = sh.stream_npages;
922 s->curpage = s->pagenum;
923 s->nout = 0;
924 s->done = 0;
925 s->state = STREAMPAGES;
926 break;
927 case STREAMPAGES:
928 (void) memcpy(&sc, block + in, cs);
929 in += cs;
930 csize = DUMP_GET_CSIZE(sc);
931 if (csize > pagesize)
932 logprint(SC_SL_ERR | SC_EXIT_ERR,
933 "LZJB STREAMPAGES: bad csize=%d", csize);
934
935 out = s->blkbuf + PTOB(s->nout);
936 dsize = decompress(block + in, out, csize, pagesize);
937
938 if (dsize != pagesize)
939 logprint(SC_SL_ERR | SC_EXIT_ERR,
940 "LZJB STREAMPAGES: dsize %d != pagesize %d",
941 dsize, pagesize);
942
943 in += csize;
944 atomic_inc_64(&saved);
945
946 doflush = 0;
947 if (s->nout == 0 && iszpage(out)) {
948 doflush = 1;
949 atomic_inc_64(&zpages);
950 } else if (++s->nout >= BTOP(coreblksize) ||
951 isblkbnd(s->curpage + s->nout)) {
952 doflush = 1;
953 }
954 if (++s->done >= s->npages) {
955 s->state = STREAMSTART;
956 doflush = 1;
957 }
958 if (doflush) {
959 putpage(corefd, s->blkbuf, s->curpage, s->nout);
960 s->nout = 0;
961 s->curpage = s->pagenum + s->done;
962 }
963 break;
964 }
965 }
966 }
967
968 /* report progress */
969 static void
970 report_progress()
971 {
972 int sec, percent;
973
974 if (!interactive)
975 return;
976
977 percent = saved * 100LL / corehdr.dump_npages;
978 sec = (gethrtime() - startts) / NANOSEC;
979 if (percent > percent_done || sec > sec_done) {
980 (void) printf("\r%2d:%02d %3d%% done", sec / 60, sec % 60,
981 percent);
982 (void) fflush(stdout);
983 sec_done = sec;
984 percent_done = percent;
985 }
986 }
987
988 /* thread body */
989 static void *
990 runstreams(void *arg)
991 {
992 tinfo_t *t = arg;
993 stream_t *s;
994 block_t *b;
995 int bound;
996
997 (void) pthread_mutex_lock(&lock);
998 while (!threads_stop) {
999 bound = 0;
1000 for (s = streams; s != endstreams; s++) {
1001 if (s->bound || s->blocks.head == NULL)
1002 continue;
1003 s->bound = 1;
1004 bound = 1;
1005 (void) pthread_cond_signal(&cvwork);
1006 while (s->blocks.head != NULL) {
1007 b = deqh(&s->blocks);
1008 (void) pthread_mutex_unlock(&lock);
1009
1010 lzjbblock(t->corefd, s, b->block,
1011 b->size);
1012
1013 (void) pthread_mutex_lock(&lock);
1014 enqt(&freeblocks, b);
1015 (void) pthread_cond_signal(&cvfree);
1016
1017 report_progress();
1018 }
1019 s->bound = 0;
1020 (void) pthread_cond_signal(&cvbarrier);
1021 }
1022 if (!bound && !threads_stop)
1023 (void) pthread_cond_wait(&cvwork, &lock);
1024 }
1025 (void) close(t->corefd);
1026 (void) pthread_cond_signal(&cvwork);
1027 (void) pthread_mutex_unlock(&lock);
1028 return (arg);
1029 }
1030
1031 /*
1032 * Process compressed pages.
1033 *
1034 * The old format, now called single-threaded lzjb, is a 32-bit size
1035 * word followed by 'size' bytes of lzjb compression data for one
1036 * page. The new format extends this by storing a 12-bit "tag" in the
1037 * upper bits of the size word. When the size word is pagesize or
1038 * less, it is assumed to be one lzjb page. When the size word is
1039 * greater than pagesize, it is assumed to be a "stream block",
1040 * belonging to up to 4095 streams. In practice, the number of streams
1041 * is set to one less than the number of CPUs running at crash
1042 * time. One CPU processes the crash dump, the remaining CPUs
1043 * separately process groups of data pages.
1044 *
1045 * savecore creates a thread per stream, but never more threads than
1046 * the number of CPUs running savecore. This is because savecore can
1047 * be processing a crash file from a remote machine, which may have
1048 * more CPUs.
1049 *
1050 * When the kernel uses parallel compression we expect a series of 128KB
1051 * blocks of compression data. In this case, each block has a "tag" in
1052 * the range 1-4095. Each block is handed off to the threads running
1053 * "runstreams". These threads, in turn, process the compression data
1054 * for groups of pages. Groups of pages are delimited by a "stream header",
1055 * which indicates a starting pfn and number of pages. When a stream block
1056 * has been read, the condition variable "cvwork" is signalled, which causes
1057 * one of the available threads to wake up and process the stream.
1058 *
1059 * In the parallel case there will be streams blocks encoding all data
1060 * pages. The stream of blocks is terminated by a zero size
1061 * word. There can be a few lzjb pages tacked on the end, depending on
1062 * the architecture. The sbarrier function ensures that all stream
1063 * blocks have been processed so that the page number for the few
1064 * single pages at the end can be known.
1065 */
1066 static void
1067 decompress_pages(int corefd)
1068 {
1069 char *cpage = NULL;
1070 char *dpage = NULL;
1071 char *out;
1072 pgcnt_t curpage = 0;
1073 block_t *b;
1074 FILE *dumpf;
1075 FILE *tracef = NULL;
1076 stream_t *s;
1077 size_t dsize;
1078 size_t insz = FBUFSIZE;
1079 char *inbuf = Zalloc(insz);
1080 uint32_t csize;
1081 dumpcsize_t dcsize;
1082 int nstreams = datahdr.dump_nstreams;
1083 int maxcsize = datahdr.dump_maxcsize;
1084 int nout = 0, tag, doflush;
1085
1086 dumpf = fdopen(dup(dumpfd), "rb");
1087 if (dumpf == NULL)
1088 logprint(SC_SL_ERR | SC_EXIT_ERR, "fdopen: %s",
1089 strerror(errno));
1090
1091 (void) setvbuf(dumpf, inbuf, _IOFBF, insz);
1092 Fseek(dumphdr.dump_data, dumpf);
1093
1094 /*LINTED: E_CONSTANT_CONDITION*/
1095 while (1) {
1096
1097 /*
1098 * The csize word delimits stream blocks.
1099 * See dumphdr.h for a description.
1100 */
1101 Fread(&dcsize, sizeof (dcsize), dumpf);
1102
1103 tag = DUMP_GET_TAG(dcsize);
1104 csize = DUMP_GET_CSIZE(dcsize);
1105
1106 if (tag != 0) { /* a stream block */
1107
1108 if (nstreams == 0)
1109 logprint(SC_SL_ERR | SC_EXIT_ERR,
1110 "starting data header is missing");
1111
1112 if (tag > nstreams)
1113 logprint(SC_SL_ERR | SC_EXIT_ERR,
1114 "stream tag %d not in range 1..%d",
1115 tag, nstreams);
1116
1117 if (csize > maxcsize)
1118 logprint(SC_SL_ERR | SC_EXIT_ERR,
1119 "block size 0x%x > max csize 0x%x",
1120 csize, maxcsize);
1121
1122 if (streams == NULL)
1123 initstreams(corefd, nstreams, maxcsize);
1124 s = &streams[tag - 1];
1125 s->tag = tag;
1126
1127 b = getfreeblock();
1128 b->size = csize;
1129 Fread(b->block, csize, dumpf);
1130
1131 (void) pthread_mutex_lock(&lock);
1132 enqt(&s->blocks, b);
1133 if (!s->bound)
1134 (void) pthread_cond_signal(&cvwork);
1135 (void) pthread_mutex_unlock(&lock);
1136
1137 } else if (csize > 0) { /* one lzjb page */
1138
1139 if (csize > pagesize)
1140 logprint(SC_SL_ERR | SC_EXIT_ERR,
1141 "csize 0x%x > pagesize 0x%x",
1142 csize, pagesize);
1143
1144 if (cpage == NULL)
1145 cpage = Zalloc(pagesize);
1146 if (dpage == NULL) {
1147 dpage = Zalloc(coreblksize);
1148 nout = 0;
1149 }
1150
1151 Fread(cpage, csize, dumpf);
1152
1153 out = dpage + PTOB(nout);
1154 dsize = decompress(cpage, out, csize, pagesize);
1155
1156 if (dsize != pagesize)
1157 logprint(SC_SL_ERR | SC_EXIT_ERR,
1158 "dsize 0x%x != pagesize 0x%x",
1159 dsize, pagesize);
1160
1161 /*
1162 * wait for streams to flush so that 'saved' is correct
1163 */
1164 if (threads_active)
1165 sbarrier();
1166
1167 doflush = 0;
1168 if (nout == 0)
1169 curpage = saved;
1170
1171 atomic_inc_64(&saved);
1172
1173 if (nout == 0 && iszpage(dpage)) {
1174 doflush = 1;
1175 atomic_inc_64(&zpages);
1176 } else if (++nout >= BTOP(coreblksize) ||
1177 isblkbnd(curpage + nout) ||
1178 saved >= dumphdr.dump_npages) {
1179 doflush = 1;
1180 }
1181
1182 if (doflush) {
1183 putpage(corefd, dpage, curpage, nout);
1184 nout = 0;
1185 }
1186
1187 report_progress();
1188
1189 /*
1190 * Non-streams lzjb does not use blocks. Stop
1191 * here if all the pages have been decompressed.
1192 */
1193 if (saved >= dumphdr.dump_npages)
1194 break;
1195
1196 } else {
1197 break; /* end of data */
1198 }
1199 }
1200
1201 stopstreams();
1202 if (tracef != NULL)
1203 (void) fclose(tracef);
1204 (void) fclose(dumpf);
1205 if (inbuf)
1206 free(inbuf);
1207 if (cpage)
1208 free(cpage);
1209 if (dpage)
1210 free(dpage);
1211 if (streams)
1212 free(streams);
1213 }
1214
1215 static void
1216 build_corefile(const char *namelist, const char *corefile)
1217 {
1218 size_t pfn_table_size = dumphdr.dump_npages * sizeof (pfn_t);
1219 size_t ksyms_size = dumphdr.dump_ksyms_size;
1220 size_t ksyms_csize = dumphdr.dump_ksyms_csize;
1221 pfn_t *pfn_table;
1222 char *ksyms_base = Zalloc(ksyms_size);
1223 char *ksyms_cbase = Zalloc(ksyms_csize);
1224 size_t ksyms_dsize;
1225 Stat_t st;
1226 int corefd = Open(corefile, O_WRONLY | O_CREAT | O_TRUNC, 0644);
1227 int namefd = Open(namelist, O_WRONLY | O_CREAT | O_TRUNC, 0644);
1228
1229 (void) printf("Constructing namelist %s/%s\n", savedir, namelist);
1230
1231 /*
1232 * Determine the optimum write size for the core file
1233 */
1234 Fstat(corefd, &st, corefile);
1235
1236 if (verbose > 1)
1237 (void) printf("%s: %ld block size\n", corefile,
1238 (long)st.st_blksize);
1239 coreblksize = st.st_blksize;
1240 if (coreblksize < MINCOREBLKSIZE || !ISP2(coreblksize))
1241 coreblksize = MINCOREBLKSIZE;
1242
1243 hist = Zalloc((sizeof (uint64_t) * BTOP(coreblksize)) + 1);
1244
1245 /*
1246 * This dump file is now uncompressed
1247 */
1248 corehdr.dump_flags &= ~DF_COMPRESSED;
1249
1250 /*
1251 * Read in the compressed symbol table, copy it to corefile,
1252 * decompress it, and write the result to namelist.
1253 */
1254 corehdr.dump_ksyms = pagesize;
1255 Pread(dumpfd, ksyms_cbase, ksyms_csize, dumphdr.dump_ksyms);
1256 Pwrite(corefd, ksyms_cbase, ksyms_csize, corehdr.dump_ksyms);
1257
1258 ksyms_dsize = decompress(ksyms_cbase, ksyms_base, ksyms_csize,
1259 ksyms_size);
1260 if (ksyms_dsize != ksyms_size)
1261 logprint(SC_SL_WARN,
1262 "bad data in symbol table, %lu of %lu bytes saved",
1263 ksyms_dsize, ksyms_size);
1264
1265 Pwrite(namefd, ksyms_base, ksyms_size, 0);
1266 (void) close(namefd);
1267 free(ksyms_cbase);
1268 free(ksyms_base);
1269
1270 (void) printf("Constructing corefile %s/%s\n", savedir, corefile);
1271
1272 /*
1273 * Read in and write out the pfn table.
1274 */
1275 pfn_table = Zalloc(pfn_table_size);
1276 corehdr.dump_pfn = corehdr.dump_ksyms + roundup(ksyms_size, pagesize);
1277 Pread(dumpfd, pfn_table, pfn_table_size, dumphdr.dump_pfn);
1278 Pwrite(corefd, pfn_table, pfn_table_size, corehdr.dump_pfn);
1279
1280 /*
1281 * Convert the raw translation data into a hashed dump map.
1282 */
1283 corehdr.dump_map = corehdr.dump_pfn + roundup(pfn_table_size, pagesize);
1284 build_dump_map(corefd, pfn_table);
1285 free(pfn_table);
1286
1287 /*
1288 * Decompress the pages
1289 */
1290 decompress_pages(corefd);
1291 (void) printf(": %ld of %ld pages saved\n", (pgcnt_t)saved,
1292 dumphdr.dump_npages);
1293
1294 if (verbose)
1295 (void) printf("%ld (%ld%%) zero pages were not written\n",
1296 (pgcnt_t)zpages, (pgcnt_t)zpages * 100 /
1297 dumphdr.dump_npages);
1298
1299 if (saved != dumphdr.dump_npages)
1300 logprint(SC_SL_WARN, "bad data after page %ld", saved);
1301
1302 /*
1303 * Write out the modified dump headers.
1304 */
1305 Pwrite(corefd, &corehdr, sizeof (corehdr), 0);
1306 if (!filemode)
1307 Pwrite(dumpfd, &dumphdr, sizeof (dumphdr), endoff);
1308
1309 (void) close(corefd);
1310 }
1311
1312 /*
1313 * When the system panics, the kernel saves all undelivered messages (messages
1314 * that never made it out to syslogd(1M)) in the dump. At a mimimum, the
1315 * panic message itself will always fall into this category. Upon reboot,
1316 * the syslog startup script runs savecore -m to recover these messages.
1317 *
1318 * To do this, we read the unsent messages from the dump and send them to
1319 * /dev/conslog on priority band 1. This has the effect of prepending them
1320 * to any already-accumulated messages in the console backlog, thus preserving
1321 * temporal ordering across the reboot.
1322 *
1323 * Note: since savecore -m is used *only* for this purpose, it does *not*
1324 * attempt to save the crash dump. The dump will be saved later, after
1325 * syslogd(1M) starts, by the savecore startup script.
1326 */
1327 static int
1328 message_save(void)
1329 {
1330 offset_t dumpoff = -(DUMP_OFFSET + DUMP_LOGSIZE);
1331 offset_t ldoff;
1332 log_dump_t ld;
1333 log_ctl_t lc;
1334 struct strbuf ctl, dat;
1335 int logfd;
1336
1337 logfd = Open("/dev/conslog", O_WRONLY, 0644);
1338 dumpfd = Open(dumpfile, O_RDWR | O_DSYNC, 0644);
1339 dumpoff = llseek(dumpfd, dumpoff, SEEK_END) & -DUMP_OFFSET;
1340
1341 ctl.buf = (void *)&lc;
1342 ctl.len = sizeof (log_ctl_t);
1343
1344 dat.buf = Zalloc(DUMP_LOGSIZE);
1345
1346 for (;;) {
1347 ldoff = dumpoff;
1348
1349 Pread(dumpfd, &ld, sizeof (log_dump_t), dumpoff);
1350 dumpoff += sizeof (log_dump_t);
1351 dat.len = ld.ld_msgsize;
1352
1353 if (ld.ld_magic == 0)
1354 break;
1355
1356 if (ld.ld_magic != LOG_MAGIC)
1357 logprint(SC_SL_ERR | SC_IF_VERBOSE | SC_EXIT_ERR,
1358 "bad magic %x", ld.ld_magic);
1359
1360 if (dat.len >= DUMP_LOGSIZE)
1361 logprint(SC_SL_ERR | SC_IF_VERBOSE | SC_EXIT_ERR,
1362 "bad size %d", ld.ld_msgsize);
1363
1364 Pread(dumpfd, ctl.buf, ctl.len, dumpoff);
1365 dumpoff += ctl.len;
1366
1367 if (ld.ld_csum != checksum32(ctl.buf, ctl.len))
1368 logprint(SC_SL_ERR | SC_IF_VERBOSE | SC_EXIT_OK,
1369 "bad log_ctl checksum");
1370
1371 lc.flags |= SL_LOGONLY;
1372
1373 Pread(dumpfd, dat.buf, dat.len, dumpoff);
1374 dumpoff += dat.len;
1375
1376 if (ld.ld_msum != checksum32(dat.buf, dat.len))
1377 logprint(SC_SL_ERR | SC_IF_VERBOSE | SC_EXIT_OK,
1378 "bad message checksum");
1379
1380 if (putpmsg(logfd, &ctl, &dat, 1, MSG_BAND) == -1)
1381 logprint(SC_SL_ERR | SC_EXIT_ERR, "putpmsg: %s",
1382 strerror(errno));
1383
1384 ld.ld_magic = 0; /* clear magic so we never save twice */
1385 Pwrite(dumpfd, &ld, sizeof (log_dump_t), ldoff);
1386 }
1387 return (0);
1388 }
1389
1390 static long
1391 getbounds(const char *f)
1392 {
1393 long b = -1;
1394 const char *p = strrchr(f, '/');
1395
1396 if (p == NULL || strncmp(p, "vmdump", 6) != 0)
1397 p = strstr(f, "vmdump");
1398
1399 if (p != NULL && *p == '/')
1400 p++;
1401
1402 (void) sscanf(p ? p : f, "vmdump.%ld", &b);
1403
1404 return (b);
1405 }
1406
1407 static void
1408 stack_retrieve(char *stack)
1409 {
1410 summary_dump_t sd;
1411 offset_t dumpoff = -(DUMP_OFFSET + DUMP_LOGSIZE +
1412 DUMP_ERPTSIZE);
1413 dumpoff -= DUMP_SUMMARYSIZE;
1414
1415 dumpfd = Open(dumpfile, O_RDWR | O_DSYNC, 0644);
1416 dumpoff = llseek(dumpfd, dumpoff, SEEK_END) & -DUMP_OFFSET;
1417
1418 Pread(dumpfd, &sd, sizeof (summary_dump_t), dumpoff);
1419 dumpoff += sizeof (summary_dump_t);
1420
1421 if (sd.sd_magic == 0) {
1422 *stack = '\0';
1423 return;
1424 }
1425
1426 if (sd.sd_magic != SUMMARY_MAGIC) {
1427 *stack = '\0';
1428 logprint(SC_SL_NONE | SC_IF_VERBOSE,
1429 "bad summary magic %x", sd.sd_magic);
1430 return;
1431 }
1432 Pread(dumpfd, stack, STACK_BUF_SIZE, dumpoff);
1433 if (sd.sd_ssum != checksum32(stack, STACK_BUF_SIZE))
1434 logprint(SC_SL_NONE | SC_IF_VERBOSE, "bad stack checksum");
1435 }
1436
1437 static void
1438 raise_event(enum sc_event_type evidx, char *warn_string)
1439 {
1440 uint32_t pl = sc_event[evidx].sce_payload;
1441 char panic_stack[STACK_BUF_SIZE];
1442 nvlist_t *attr = NULL;
1443 char uuidbuf[36 + 1];
1444 int err = 0;
1445
1446 if (nvlist_alloc(&attr, NV_UNIQUE_NAME, 0) != 0)
1447 goto publish; /* try to send payload-free event */
1448
1449 if (pl & SC_PAYLOAD_SAVEDIR && savedir != NULL)
1450 err |= nvlist_add_string(attr, "dumpdir", savedir);
1451
1452 if (pl & SC_PAYLOAD_INSTANCE && bounds != -1)
1453 err |= nvlist_add_int64(attr, "instance", bounds);
1454
1455 if (pl & SC_PAYLOAD_ISCOMPRESSED) {
1456 err |= nvlist_add_boolean_value(attr, "compressed",
1457 csave ? B_TRUE : B_FALSE);
1458 }
1459
1460 if (pl & SC_PAYLOAD_DUMPADM_EN) {
1461 char *disabled = defread("DUMPADM_ENABLE=no");
1462
1463 err |= nvlist_add_boolean_value(attr, "savecore-enabled",
1464 disabled ? B_FALSE : B_TRUE);
1465 }
1466
1467 if (pl & SC_PAYLOAD_IMAGEUUID) {
1468 (void) strncpy(uuidbuf, corehdr.dump_uuid, 36);
1469 uuidbuf[36] = '\0';
1470 err |= nvlist_add_string(attr, "os-instance-uuid", uuidbuf);
1471 }
1472
1473 if (pl & SC_PAYLOAD_CRASHTIME) {
1474 err |= nvlist_add_int64(attr, "crashtime",
1475 (int64_t)corehdr.dump_crashtime);
1476 }
1477
1478 if (pl & SC_PAYLOAD_PANICSTR && corehdr.dump_panicstring[0] != '\0') {
1479 err |= nvlist_add_string(attr, "panicstr",
1480 corehdr.dump_panicstring);
1481 }
1482
1483 if (pl & SC_PAYLOAD_PANICSTACK) {
1484 stack_retrieve(panic_stack);
1485
1486 if (panic_stack[0] != '\0') {
1487 /*
1488 * The summary page may not be present if the dump
1489 * was previously recorded compressed.
1490 */
1491 (void) nvlist_add_string(attr, "panicstack",
1492 panic_stack);
1493 }
1494 }
1495
1496 /* add warning string if this is an ireport for dump failure */
1497 if (pl & SC_PAYLOAD_FAILREASON && warn_string != NULL)
1498 (void) nvlist_add_string(attr, "failure-reason", warn_string);
1499
1500 if (pl & SC_PAYLOAD_DUMPCOMPLETE)
1501 err |= nvlist_add_boolean_value(attr, "dump-incomplete",
1502 dump_incomplete ? B_TRUE : B_FALSE);
1503
1504 if (pl & SC_PAYLOAD_FM_PANIC) {
1505 err |= nvlist_add_boolean_value(attr, "fm-panic",
1506 fm_panic ? B_TRUE : B_FALSE);
1507 }
1508
1509 if (pl & SC_PAYLOAD_JUSTCHECKING) {
1510 err |= nvlist_add_boolean_value(attr, "will-attempt-savecore",
1511 cflag ? B_FALSE : B_TRUE);
1512 }
1513
1514 if (err)
1515 logprint(SC_SL_WARN, "Errors while constructing '%s' "
1516 "event payload; will try to publish anyway.");
1517 publish:
1518 if (fmev_rspublish_nvl(FMEV_RULESET_ON_SUNOS,
1519 "panic", sc_event[evidx].sce_subclass, FMEV_HIPRI,
1520 attr) != FMEV_SUCCESS) {
1521 logprint(SC_SL_ERR, "failed to publish '%s' event: %s",
1522 sc_event[evidx].sce_subclass, fmev_strerror(fmev_errno));
1523 nvlist_free(attr);
1524 }
1525
1526 }
1527
1528
1529 int
1530 main(int argc, char *argv[])
1531 {
1532 int i, c, bfd;
1533 Stat_t st;
1534 struct rlimit rl;
1535 long filebounds = -1;
1536 char namelist[30], corefile[30], boundstr[30];
1537 dumpfile = NULL;
1538
1539 startts = gethrtime();
1540
1541 (void) getrlimit(RLIMIT_NOFILE, &rl);
1542 rl.rlim_cur = rl.rlim_max;
1543 (void) setrlimit(RLIMIT_NOFILE, &rl);
1544
1545 openlog(progname, LOG_ODELAY, LOG_AUTH);
1546
1547 (void) defopen("/etc/dumpadm.conf");
1548 savedir = defread("DUMPADM_SAVDIR=");
1549 if (savedir != NULL)
1550 savedir = strdup(savedir);
1551
1552 while ((c = getopt(argc, argv, "Lvcdmf:")) != EOF) {
1553 switch (c) {
1554 case 'L':
1555 livedump++;
1556 break;
1557 case 'v':
1558 verbose++;
1559 break;
1560 case 'c':
1561 cflag++;
1562 break;
1563 case 'd':
1564 disregard_valid_flag++;
1565 break;
1566 case 'm':
1567 mflag++;
1568 break;
1569 case 'f':
1570 dumpfile = optarg;
1571 filebounds = getbounds(dumpfile);
1572 break;
1573 case '?':
1574 usage();
1575 }
1576 }
1577
1578 /*
1579 * If doing something other than extracting an existing dump (i.e.
1580 * dumpfile has been provided as an option), the user must be root.
1581 */
1582 if (geteuid() != 0 && dumpfile == NULL) {
1583 (void) fprintf(stderr, "%s: %s %s\n", progname,
1584 gettext("you must be root to use"), progname);
1585 exit(1);
1586 }
1587
1588 interactive = isatty(STDOUT_FILENO);
1589
1590 if (cflag && livedump)
1591 usage();
1592
1593 if (dumpfile == NULL || livedump)
1594 dumpfd = Open("/dev/dump", O_RDONLY, 0444);
1595
1596 if (dumpfile == NULL) {
1597 dumpfile = Zalloc(MAXPATHLEN);
1598 if (ioctl(dumpfd, DIOCGETDEV, dumpfile) == -1) {
1599 have_dumpfile = B_FALSE;
1600 logprint(SC_SL_NONE | SC_IF_ISATTY | SC_EXIT_ERR,
1601 "no dump device configured");
1602 }
1603 }
1604
1605 if (mflag)
1606 return (message_save());
1607
1608 if (optind == argc - 1)
1609 savedir = argv[optind];
1610
1611 if (savedir == NULL || optind < argc - 1)
1612 usage();
1613
1614 if (livedump && ioctl(dumpfd, DIOCDUMP, NULL) == -1)
1615 logprint(SC_SL_NONE | SC_EXIT_ERR,
1616 "dedicated dump device required");
1617
1618 (void) close(dumpfd);
1619 dumpfd = -1;
1620
1621 Stat(dumpfile, &st);
1622
1623 filemode = S_ISREG(st.st_mode);
1624
1625 if (!filemode && defread("DUMPADM_CSAVE=off") == NULL)
1626 csave = 1;
1627
1628 read_dumphdr();
1629
1630 /*
1631 * We want this message to go to the log file, but not the console.
1632 * There's no good way to do that with the existing syslog facility.
1633 * We could extend it to handle this, but there doesn't seem to be
1634 * a general need for it, so we isolate the complexity here instead.
1635 */
1636 if (dumphdr.dump_panicstring[0] != '\0') {
1637 int logfd = Open("/dev/conslog", O_WRONLY, 0644);
1638 log_ctl_t lc;
1639 struct strbuf ctl, dat;
1640 char msg[DUMP_PANICSIZE + 100];
1641 char fmt[] = "reboot after panic: %s";
1642 uint32_t msgid;
1643
1644 STRLOG_MAKE_MSGID(fmt, msgid);
1645
1646 /* LINTED: E_SEC_SPRINTF_UNBOUNDED_COPY */
1647 (void) sprintf(msg, "%s: [ID %u FACILITY_AND_PRIORITY] ",
1648 progname, msgid);
1649 /* LINTED: E_SEC_PRINTF_VAR_FMT */
1650 (void) sprintf(msg + strlen(msg), fmt,
1651 dumphdr.dump_panicstring);
1652
1653 lc.pri = LOG_AUTH | LOG_ERR;
1654 lc.flags = SL_CONSOLE | SL_LOGONLY;
1655 lc.level = 0;
1656
1657 ctl.buf = (void *)&lc;
1658 ctl.len = sizeof (log_ctl_t);
1659
1660 dat.buf = (void *)msg;
1661 dat.len = strlen(msg) + 1;
1662
1663 (void) putmsg(logfd, &ctl, &dat, 0);
1664 (void) close(logfd);
1665 }
1666
1667 if ((dumphdr.dump_flags & DF_COMPLETE) == 0) {
1668 logprint(SC_SL_WARN, "incomplete dump on dump device");
1669 dump_incomplete = B_TRUE;
1670 }
1671
1672 if (dumphdr.dump_fm_panic)
1673 fm_panic = B_TRUE;
1674
1675 /*
1676 * We have a valid dump on a dump device and know as much about
1677 * it as we're going to at this stage. Raise an event for
1678 * logging and so that FMA can open a case for this panic.
1679 * Avoid this step for FMA-initiated panics - FMA will replay
1680 * ereports off the dump device independently of savecore and
1681 * will make a diagnosis, so we don't want to open two cases
1682 * for the same event. Also avoid raising an event for a
1683 * livedump, or when we inflating a compressed dump.
1684 */
1685 if (!fm_panic && !livedump && !filemode)
1686 raise_event(SC_EVENT_DUMP_PENDING, NULL);
1687
1688 logprint(SC_SL_WARN, "System dump time: %s",
1689 ctime(&dumphdr.dump_crashtime));
1690
1691 /*
1692 * Option -c is designed for use from svc-dumpadm where we know
1693 * that dumpadm -n is in effect but run savecore -c just to
1694 * get the above dump_pending_on_device event raised. If it is run
1695 * interactively then just print further panic details.
1696 */
1697 if (cflag) {
1698 char *disabled = defread("DUMPADM_ENABLE=no");
1699 int lvl = interactive ? SC_SL_WARN : SC_SL_ERR;
1700 int ec = fm_panic ? SC_EXIT_FM : SC_EXIT_PEND;
1701
1702 logprint(lvl | ec,
1703 "Panic crashdump pending on dump device%s "
1704 "run savecore(1M) manually to extract. "
1705 "Image UUID %s%s.",
1706 disabled ? " but dumpadm -n in effect;" : ";",
1707 corehdr.dump_uuid,
1708 fm_panic ? "(fault-management initiated)" : "");
1709 /*NOTREACHED*/
1710 }
1711
1712 if (chdir(savedir) == -1)
1713 logprint(SC_SL_ERR | SC_EXIT_ERR, "chdir(\"%s\"): %s",
1714 savedir, strerror(errno));
1715
1716 check_space(csave);
1717
1718 if (filebounds < 0)
1719 bounds = read_number_from_file("bounds", 0);
1720 else
1721 bounds = filebounds;
1722
1723 if (csave) {
1724 size_t metrics_size = datahdr.dump_metrics;
1725
1726 (void) sprintf(corefile, "vmdump.%ld", bounds);
1727
1728 datahdr.dump_metrics = 0;
1729
1730 logprint(SC_SL_ERR,
1731 "Saving compressed system crash dump in %s/%s",
1732 savedir, corefile);
1733
1734 copy_crashfile(corefile);
1735
1736 /*
1737 * Raise a fault management event that indicates the system
1738 * has panicked. We know a reasonable amount about the
1739 * condition at this time, but the dump is still compressed.
1740 */
1741 if (!livedump && !fm_panic)
1742 raise_event(SC_EVENT_DUMP_AVAILABLE, NULL);
1743
1744 if (metrics_size > 0) {
1745 int sec = (gethrtime() - startts) / 1000 / 1000 / 1000;
1746 FILE *mfile = fopen(METRICSFILE, "a");
1747 char *metrics = Zalloc(metrics_size + 1);
1748
1749 Pread(dumpfd, metrics, metrics_size, endoff +
1750 sizeof (dumphdr) + sizeof (datahdr));
1751
1752 if (sec < 1)
1753 sec = 1;
1754
1755 if (mfile == NULL) {
1756 logprint(SC_SL_WARN,
1757 "Can't create %s:\n%s",
1758 METRICSFILE, metrics);
1759 } else {
1760 (void) fprintf(mfile, "[[[[,,,");
1761 for (i = 0; i < argc; i++)
1762 (void) fprintf(mfile, "%s ", argv[i]);
1763 (void) fprintf(mfile, "\n");
1764 (void) fprintf(mfile, ",,,%s %s %s %s %s\n",
1765 dumphdr.dump_utsname.sysname,
1766 dumphdr.dump_utsname.nodename,
1767 dumphdr.dump_utsname.release,
1768 dumphdr.dump_utsname.version,
1769 dumphdr.dump_utsname.machine);
1770 (void) fprintf(mfile, ",,,%s dump time %s\n",
1771 dumphdr.dump_flags & DF_LIVE ? "Live" :
1772 "Crash", ctime(&dumphdr.dump_crashtime));
1773 (void) fprintf(mfile, ",,,%s/%s\n", savedir,
1774 corefile);
1775 (void) fprintf(mfile, "Metrics:\n%s\n",
1776 metrics);
1777 (void) fprintf(mfile, "Copy pages,%ld\n",
1778 dumphdr. dump_npages);
1779 (void) fprintf(mfile, "Copy time,%d\n", sec);
1780 (void) fprintf(mfile, "Copy pages/sec,%ld\n",
1781 dumphdr.dump_npages / sec);
1782 (void) fprintf(mfile, "]]]]\n");
1783 (void) fclose(mfile);
1784 }
1785 free(metrics);
1786 }
1787
1788 logprint(SC_SL_ERR,
1789 "Decompress the crash dump with "
1790 "\n'savecore -vf %s/%s'",
1791 savedir, corefile);
1792
1793 } else {
1794 (void) sprintf(namelist, "unix.%ld", bounds);
1795 (void) sprintf(corefile, "vmcore.%ld", bounds);
1796
1797 if (interactive && filebounds >= 0 && access(corefile, F_OK)
1798 == 0)
1799 logprint(SC_SL_NONE | SC_EXIT_ERR,
1800 "%s already exists: remove with "
1801 "'rm -f %s/{unix,vmcore}.%ld'",
1802 corefile, savedir, bounds);
1803
1804 logprint(SC_SL_ERR,
1805 "saving system crash dump in %s/{unix,vmcore}.%ld",
1806 savedir, bounds);
1807
1808 build_corefile(namelist, corefile);
1809
1810 if (!livedump && !filemode && !fm_panic)
1811 raise_event(SC_EVENT_DUMP_AVAILABLE, NULL);
1812
1813 if (access(METRICSFILE, F_OK) == 0) {
1814 int sec = (gethrtime() - startts) / 1000 / 1000 / 1000;
1815 FILE *mfile = fopen(METRICSFILE, "a");
1816
1817 if (sec < 1)
1818 sec = 1;
1819
1820 if (mfile == NULL) {
1821 logprint(SC_SL_WARN,
1822 "Can't create %s: %s",
1823 METRICSFILE, strerror(errno));
1824 } else {
1825 (void) fprintf(mfile, "[[[[,,,");
1826 for (i = 0; i < argc; i++)
1827 (void) fprintf(mfile, "%s ", argv[i]);
1828 (void) fprintf(mfile, "\n");
1829 (void) fprintf(mfile, ",,,%s/%s\n", savedir,
1830 corefile);
1831 (void) fprintf(mfile, ",,,%s %s %s %s %s\n",
1832 dumphdr.dump_utsname.sysname,
1833 dumphdr.dump_utsname.nodename,
1834 dumphdr.dump_utsname.release,
1835 dumphdr.dump_utsname.version,
1836 dumphdr.dump_utsname.machine);
1837 (void) fprintf(mfile,
1838 "Uncompress pages,%"PRIu64"\n", saved);
1839 (void) fprintf(mfile, "Uncompress time,%d\n",
1840 sec);
1841 (void) fprintf(mfile, "Uncompress pages/sec,%"
1842 PRIu64"\n", saved / sec);
1843 (void) fprintf(mfile, "]]]]\n");
1844 (void) fclose(mfile);
1845 }
1846 }
1847 }
1848
1849 if (filebounds < 0) {
1850 (void) sprintf(boundstr, "%ld\n", bounds + 1);
1851 bfd = Open("bounds", O_WRONLY | O_CREAT | O_TRUNC, 0644);
1852 Pwrite(bfd, boundstr, strlen(boundstr), 0);
1853 (void) close(bfd);
1854 }
1855
1856 if (verbose) {
1857 int sec = (gethrtime() - startts) / 1000 / 1000 / 1000;
1858
1859 (void) printf("%d:%02d dump %s is done\n",
1860 sec / 60, sec % 60,
1861 csave ? "copy" : "decompress");
1862 }
1863
1864 if (verbose > 1 && hist != NULL) {
1865 int i, nw;
1866
1867 for (i = 1, nw = 0; i <= BTOP(coreblksize); ++i)
1868 nw += hist[i] * i;
1869 (void) printf("pages count %%\n");
1870 for (i = 0; i <= BTOP(coreblksize); ++i) {
1871 if (hist[i] == 0)
1872 continue;
1873 (void) printf("%3d %5u %6.2f\n",
1874 i, hist[i], 100.0 * hist[i] * i / nw);
1875 }
1876 }
1877
1878 (void) close(dumpfd);
1879 dumpfd = -1;
1880
1881 return (0);
1882 }