57 #include <fs/fs_subr.h>
58 #include <sys/fs/snode.h>
59 #include <sys/ontrap.h>
60 #include <sys/panic.h>
61 #include <sys/dkio.h>
62 #include <sys/vtoc.h>
63 #include <sys/errorq.h>
64 #include <sys/fm/util.h>
65 #include <sys/fs/zfs.h>
66
67 #include <vm/hat.h>
68 #include <vm/as.h>
69 #include <vm/page.h>
70 #include <vm/pvn.h>
71 #include <vm/seg.h>
72 #include <vm/seg_kmem.h>
73 #include <sys/clock_impl.h>
74 #include <sys/hold_page.h>
75 #include <sys/cpu.h>
76
77 #include <bzip2/bzlib.h>
78
79 #define ONE_GIG (1024 * 1024 * 1024UL)
80
81 /*
82 * Crash dump time is dominated by disk write time. To reduce this,
83 * the stronger compression method bzip2 is applied to reduce the dump
84 * size and hence reduce I/O time. However, bzip2 is much more
85 * computationally expensive than the existing lzjb algorithm, so to
86 * avoid increasing compression time, CPUs that are otherwise idle
87 * during panic are employed to parallelize the compression task.
88 * Many helper CPUs are needed to prevent bzip2 from being a
89 * bottleneck, and on systems with too few CPUs, the lzjb algorithm is
90 * parallelized instead. Lastly, I/O and compression are performed by
91 * different CPUs, and are hence overlapped in time, unlike the older
92 * serial code.
93 *
94 * Another important consideration is the speed of the dump
95 * device. Faster disks need less CPUs in order to benefit from
96 * parallel lzjb versus parallel bzip2. Therefore, the CPU count
97 * threshold for switching from parallel lzjb to paralled bzip2 is
98 * elevated for faster disks. The dump device speed is adduced from
99 * the setting for dumpbuf.iosize, see dump_update_clevel.
100 */
101
102 /*
103 * exported vars
104 */
105 kmutex_t dump_lock; /* lock for dump configuration */
106 dumphdr_t *dumphdr; /* dump header */
107 int dump_conflags = DUMP_KERNEL; /* dump configuration flags */
108 vnode_t *dumpvp; /* dump device vnode pointer */
109 u_offset_t dumpvp_size; /* size of dump device, in bytes */
110 char *dumppath; /* pathname of dump device */
111 int dump_timeout = 120; /* timeout for dumping pages */
112 int dump_timeleft; /* portion of dump_timeout remaining */
113 int dump_ioerr; /* dump i/o error */
114 int dump_check_used; /* enable check for used pages */
115 char *dump_stack_scratch; /* scratch area for saving stack summary */
116
117 /*
118 * Tunables for dump compression and parallelism. These can be set via
119 * /etc/system.
120 *
121 * dump_ncpu_low number of helpers for parallel lzjb
122 * This is also the minimum configuration.
123 *
124 * dump_bzip2_level bzip2 compression level: 1-9
125 * Higher numbers give greater compression, but take more memory
126 * and time. Memory used per helper is ~(dump_bzip2_level * 1MB).
127 *
128 * dump_plat_mincpu the cross-over limit for using bzip2 (per platform):
129 * if dump_plat_mincpu == 0, then always do single threaded dump
130 * if ncpu >= dump_plat_mincpu then try to use bzip2
131 *
132 * dump_metrics_on if set, metrics are collected in the kernel, passed
133 * to savecore via the dump file, and recorded by savecore in
134 * METRICS.txt.
135 */
136 uint_t dump_ncpu_low = 4; /* minimum config for parallel lzjb */
137 uint_t dump_bzip2_level = 1; /* bzip2 level (1-9) */
138
139 /* Use dump_plat_mincpu_default unless this variable is set by /etc/system */
140 #define MINCPU_NOT_SET ((uint_t)-1)
141 uint_t dump_plat_mincpu = MINCPU_NOT_SET;
142
143 /* tunables for pre-reserved heap */
144 uint_t dump_kmem_permap = 1024;
145 uint_t dump_kmem_pages = 0;
146
147 /* Define multiple buffers per helper to avoid stalling */
148 #define NCBUF_PER_HELPER 2
149 #define NCMAP_PER_HELPER 4
150
151 /* minimum number of helpers configured */
152 #define MINHELPERS (dump_ncpu_low)
153 #define MINCBUFS (MINHELPERS * NCBUF_PER_HELPER)
154
155 /*
156 * Define constant parameters.
157 *
158 * CBUF_SIZE size of an output buffer
159 *
160 * CBUF_MAPSIZE size of virtual range for mapping pages
161 *
162 * CBUF_MAPNP size of virtual range in pages
163 *
164 */
165 #define DUMP_1KB ((size_t)1 << 10)
166 #define DUMP_1MB ((size_t)1 << 20)
167 #define CBUF_SIZE ((size_t)1 << 17)
168 #define CBUF_MAPSHIFT (22)
169 #define CBUF_MAPSIZE ((size_t)1 << CBUF_MAPSHIFT)
170 #define CBUF_MAPNP ((size_t)1 << (CBUF_MAPSHIFT - PAGESHIFT))
171
172 /*
367 /*
368 * helper_t helpers: contains the context for a stream. CPUs run in
369 * parallel at dump time; each CPU creates a single stream of
370 * compression data. Stream data is divided into CBUF_SIZE blocks.
371 * The blocks are written in order within a stream. But, blocks from
372 * multiple streams can be interleaved. Each stream is identified by a
373 * unique tag.
374 */
375 typedef struct helper {
376 int helper; /* bound helper id */
377 int tag; /* compression stream tag */
378 perpage_t perpage; /* per page metrics */
379 perpage_t perpagets; /* per page metrics (timestamps) */
380 taskqid_t taskqid; /* live dump task ptr */
381 int in, out; /* buffer offsets */
382 cbuf_t *cpin, *cpout, *cperr; /* cbuf objects in process */
383 dumpsync_t *ds; /* pointer to sync vars */
384 size_t used; /* counts input consumed */
385 char *page; /* buffer for page copy */
386 char *lzbuf; /* lzjb output */
387 bz_stream bzstream; /* bzip2 state */
388 } helper_t;
389
390 #define MAINHELPER (-1) /* helper is also the main task */
391 #define FREEHELPER (-2) /* unbound helper */
392 #define DONEHELPER (-3) /* helper finished */
393
394 /*
395 * configuration vars for dumpsys
396 */
397 typedef struct dumpcfg {
398 int threshold; /* ncpu threshold for bzip2 */
399 int nhelper; /* number of helpers */
400 int nhelper_used; /* actual number of helpers used */
401 int ncmap; /* number VA pages for compression */
402 int ncbuf; /* number of bufs for compression */
403 int ncbuf_used; /* number of bufs in use */
404 uint_t clevel; /* dump compression level */
405 helper_t *helper; /* array of helpers */
406 cbuf_t *cmap; /* array of input (map) buffers */
407 cbuf_t *cbuf; /* array of output buffers */
408 ulong_t *helpermap; /* set of dumpsys helper CPU ids */
409 ulong_t *bitmap; /* bitmap for marking pages to dump */
410 ulong_t *rbitmap; /* bitmap for used CBUF_MAPSIZE ranges */
411 pgcnt_t bitmapsize; /* size of bitmap */
412 pgcnt_t rbitmapsize; /* size of bitmap for ranges */
413 pgcnt_t found4m; /* number ranges allocated by dump */
414 pgcnt_t foundsm; /* number small pages allocated by dump */
415 pid_t *pids; /* list of process IDs at dump time */
416 size_t maxsize; /* memory size needed at dump time */
417 size_t maxvmsize; /* size of reserved VM */
418 char *maxvm; /* reserved VM for spare pages */
485 char *old_buf = dumpbuf.start;
486 size_t old_size = dumpbuf.size;
487 char *new_buf;
488 size_t new_size;
489
490 ASSERT(MUTEX_HELD(&dump_lock));
491
492 new_size = dumpbuf_iosize(MAX(dumpbuf.iosize, maxphys));
493 if (new_size <= old_size)
494 return; /* no need to reallocate buffer */
495
496 new_buf = kmem_alloc(new_size, KM_SLEEP);
497 dumpbuf.size = new_size;
498 dumpbuf.start = new_buf;
499 dumpbuf.end = new_buf + new_size;
500 kmem_free(old_buf, old_size);
501 }
502
503 /*
504 * dump_update_clevel is called when dumpadm configures the dump device.
505 * Calculate number of helpers and buffers.
506 * Allocate the minimum configuration for now.
507 *
508 * When the dump file is configured we reserve a minimum amount of
509 * memory for use at crash time. But we reserve VA for all the memory
510 * we really want in order to do the fastest dump possible. The VA is
511 * backed by pages not being dumped, according to the bitmap. If
512 * there is insufficient spare memory, however, we fall back to the
513 * minimum.
514 *
515 * Live dump (savecore -L) always uses the minimum config.
516 *
517 * clevel 0 is single threaded lzjb
518 * clevel 1 is parallel lzjb
519 * clevel 2 is parallel bzip2
520 *
521 * The ncpu threshold is selected with dump_plat_mincpu.
522 * On OPL, set_platform_defaults() overrides the sun4u setting.
523 * The actual values are defined via DUMP_PLAT_*_MINCPU macros.
524 *
525 * Architecture Threshold Algorithm
526 * sun4u < 51 parallel lzjb
527 * sun4u >= 51 parallel bzip2(*)
528 * sun4u OPL < 8 parallel lzjb
529 * sun4u OPL >= 8 parallel bzip2(*)
530 * sun4v < 128 parallel lzjb
531 * sun4v >= 128 parallel bzip2(*)
532 * x86 < 11 parallel lzjb
533 * x86 >= 11 parallel bzip2(*)
534 * 32-bit N/A single-threaded lzjb
535 *
536 * (*) bzip2 is only chosen if there is sufficient available
537 * memory for buffers at dump time. See dumpsys_get_maxmem().
538 *
539 * Faster dump devices have larger I/O buffers. The threshold value is
540 * increased according to the size of the dump I/O buffer, because
541 * parallel lzjb performs better with faster disks. For buffers >= 1MB
542 * the threshold is 3X; for buffers >= 256K threshold is 2X.
543 *
544 * For parallel dumps, the number of helpers is ncpu-1. The CPU
545 * running panic runs the main task. For single-threaded dumps, the
546 * panic CPU does lzjb compression (it is tagged as MAINHELPER.)
547 *
548 * Need multiple buffers per helper so that they do not block waiting
549 * for the main task.
550 * parallel single-threaded
551 * Number of output buffers: nhelper*2 1
552 * Number of mapping buffers: nhelper*4 1
553 *
554 */
555 static void
556 dump_update_clevel()
557 {
558 int tag;
559 size_t bz2size;
560 helper_t *hp, *hpend;
561 cbuf_t *cp, *cpend;
562 dumpcfg_t *old = &dumpcfg;
563 dumpcfg_t newcfg = *old;
564 dumpcfg_t *new = &newcfg;
565
566 ASSERT(MUTEX_HELD(&dump_lock));
567
568 /*
569 * Free the previously allocated bufs and VM.
570 */
571 if (old->helper != NULL) {
572
573 /* helpers */
574 hpend = &old->helper[old->nhelper];
575 for (hp = old->helper; hp != hpend; hp++) {
576 if (hp->lzbuf != NULL)
577 kmem_free(hp->lzbuf, PAGESIZE);
578 if (hp->page != NULL)
579 kmem_free(hp->page, PAGESIZE);
592 if (cp->buf != NULL)
593 kmem_free(cp->buf, cp->size);
594 kmem_free(old->cbuf, old->ncbuf * sizeof (cbuf_t));
595
596 /* reserved VM for dumpsys_get_maxmem */
597 if (old->maxvmsize > 0)
598 vmem_xfree(heap_arena, old->maxvm, old->maxvmsize);
599 }
600
601 /*
602 * Allocate memory and VM.
603 * One CPU runs dumpsys, the rest are helpers.
604 */
605 new->nhelper = ncpus - 1;
606 if (new->nhelper < 1)
607 new->nhelper = 1;
608
609 if (new->nhelper > DUMP_MAX_NHELPER)
610 new->nhelper = DUMP_MAX_NHELPER;
611
612 /* use platform default, unless /etc/system overrides */
613 if (dump_plat_mincpu == MINCPU_NOT_SET)
614 dump_plat_mincpu = dump_plat_mincpu_default;
615
616 /* increase threshold for faster disks */
617 new->threshold = dump_plat_mincpu;
618 if (dumpbuf.iosize >= DUMP_1MB)
619 new->threshold *= 3;
620 else if (dumpbuf.iosize >= (256 * DUMP_1KB))
621 new->threshold *= 2;
622
623 /* figure compression level based upon the computed threshold. */
624 if (dump_plat_mincpu == 0 || new->nhelper < 2) {
625 new->clevel = 0;
626 new->nhelper = 1;
627 } else if ((new->nhelper + 1) >= new->threshold) {
628 new->clevel = DUMP_CLEVEL_BZIP2;
629 } else {
630 new->clevel = DUMP_CLEVEL_LZJB;
631 }
632
633 if (new->clevel == 0) {
634 new->ncbuf = 1;
635 new->ncmap = 1;
636 } else {
637 new->ncbuf = NCBUF_PER_HELPER * new->nhelper;
638 new->ncmap = NCMAP_PER_HELPER * new->nhelper;
639 }
640
641 /*
642 * Allocate new data structures and buffers for MINHELPERS,
643 * and also figure the max desired size.
644 */
645 bz2size = BZ2_bzCompressInitSize(dump_bzip2_level);
646 new->maxsize = 0;
647 new->maxvmsize = 0;
648 new->maxvm = NULL;
649 tag = 1;
650 new->helper = kmem_zalloc(new->nhelper * sizeof (helper_t), KM_SLEEP);
651 hpend = &new->helper[new->nhelper];
652 for (hp = new->helper; hp != hpend; hp++) {
653 hp->tag = tag++;
654 if (hp < &new->helper[MINHELPERS]) {
655 hp->lzbuf = kmem_alloc(PAGESIZE, KM_SLEEP);
656 hp->page = kmem_alloc(PAGESIZE, KM_SLEEP);
657 } else if (new->clevel < DUMP_CLEVEL_BZIP2) {
658 new->maxsize += 2 * PAGESIZE;
659 } else {
660 new->maxsize += PAGESIZE;
661 }
662 if (new->clevel >= DUMP_CLEVEL_BZIP2)
663 new->maxsize += bz2size;
664 }
665
666 new->cbuf = kmem_zalloc(new->ncbuf * sizeof (cbuf_t), KM_SLEEP);
667 cpend = &new->cbuf[new->ncbuf];
668 for (cp = new->cbuf; cp != cpend; cp++) {
669 cp->state = CBUF_FREEBUF;
670 cp->size = CBUF_SIZE;
671 if (cp < &new->cbuf[MINCBUFS])
672 cp->buf = kmem_alloc(cp->size, KM_SLEEP);
673 else
674 new->maxsize += cp->size;
675 }
676
677 new->cmap = kmem_zalloc(new->ncmap * sizeof (cbuf_t), KM_SLEEP);
678 cpend = &new->cmap[new->ncmap];
679 for (cp = new->cmap; cp != cpend; cp++) {
680 cp->state = CBUF_FREEMAP;
681 cp->size = CBUF_MAPSIZE;
682 cp->buf = vmem_xalloc(heap_arena, CBUF_MAPSIZE, CBUF_MAPSIZE,
683 0, 0, NULL, NULL, VM_SLEEP);
808 ASSERT(rbitnum < dumpcfg.rbitmapsize);
809
810 BT_SET(dumpcfg.rbitmap, rbitnum);
811 }
812
813 int
814 dump_test_used(pfn_t pfn)
815 {
816 pgcnt_t bitnum, rbitnum;
817
818 bitnum = dump_pfn_to_bitnum(pfn);
819 ASSERT(bitnum != (pgcnt_t)-1);
820
821 rbitnum = CBUF_MAPP2R(bitnum);
822 ASSERT(rbitnum < dumpcfg.rbitmapsize);
823
824 return (BT_TEST(dumpcfg.rbitmap, rbitnum));
825 }
826
827 /*
828 * dumpbzalloc and dumpbzfree are callbacks from the bzip2 library.
829 * dumpsys_get_maxmem() uses them for BZ2_bzCompressInit().
830 */
831 static void *
832 dumpbzalloc(void *opaque, int items, int size)
833 {
834 size_t *sz;
835 char *ret;
836
837 ASSERT(opaque != NULL);
838 sz = opaque;
839 ret = dumpcfg.maxvm + *sz;
840 *sz += items * size;
841 *sz = P2ROUNDUP(*sz, BZ2_BZALLOC_ALIGN);
842 ASSERT(*sz <= dumpcfg.maxvmsize);
843 return (ret);
844 }
845
846 /*ARGSUSED*/
847 static void
848 dumpbzfree(void *opaque, void *addr)
849 {
850 }
851
852 /*
853 * Perform additional checks on the page to see if we can really use
854 * it. The kernel (kas) pages are always set in the bitmap. However,
855 * boot memory pages (prom_ppages or P_BOOTPAGES) are not in the
856 * bitmap. So we check for them.
857 */
858 static inline int
859 dump_pfn_check(pfn_t pfn)
860 {
861 page_t *pp = page_numtopp_nolock(pfn);
862 if (pp == NULL || pp->p_pagenum != pfn ||
863 #if defined(__sparc)
864 pp->p_vnode == &promvp ||
865 #else
866 PP_ISBOOTPAGES(pp) ||
867 #endif
868 pp->p_toxic != 0)
869 return (0);
870 return (1);
871 }
872
873 /*
874 * Check a range to see if all contained pages are available and
875 * return non-zero if the range can be used.
876 */
877 static inline int
878 dump_range_check(pgcnt_t start, pgcnt_t end, pfn_t pfn)
879 {
880 for (; start < end; start++, pfn++) {
881 if (BT_TEST(dumpcfg.bitmap, start))
882 return (0);
883 if (!dump_pfn_check(pfn))
884 return (0);
885 }
886 return (1);
887 }
888
889 /*
890 * dumpsys_get_maxmem() is called during panic. Find unused ranges
891 * and use them for buffers. If we find enough memory switch to
892 * parallel bzip2, otherwise use parallel lzjb.
893 *
894 * It searches the dump bitmap in 2 passes. The first time it looks
895 * for CBUF_MAPSIZE ranges. On the second pass it uses small pages.
896 */
897 static void
898 dumpsys_get_maxmem()
899 {
900 dumpcfg_t *cfg = &dumpcfg;
901 cbuf_t *endcp = &cfg->cbuf[cfg->ncbuf];
902 helper_t *endhp = &cfg->helper[cfg->nhelper];
903 pgcnt_t bitnum, end;
904 size_t sz, endsz, bz2size;
905 pfn_t pfn, off;
906 cbuf_t *cp;
907 helper_t *hp, *ohp;
908 dumpmlw_t mlw;
909 int k;
910
911 /*
912 * Setting dump_plat_mincpu to 0 at any time forces a serial
913 * dump.
914 */
915 if (dump_plat_mincpu == 0) {
916 cfg->clevel = 0;
917 return;
918 }
919
920 /*
921 * There may be no point in looking for spare memory. If
922 * dumping all memory, then none is spare. If doing a serial
923 * dump, then already have buffers.
924 */
925 if (cfg->maxsize == 0 || cfg->clevel < DUMP_CLEVEL_LZJB ||
926 (dump_conflags & DUMP_ALL) != 0) {
927 if (cfg->clevel > DUMP_CLEVEL_LZJB)
928 cfg->clevel = DUMP_CLEVEL_LZJB;
929 return;
930 }
931
932 sz = 0;
933 cfg->found4m = 0;
934 cfg->foundsm = 0;
935
936 /* bitmap of ranges used to estimate which pfns are being used */
937 bzero(dumpcfg.rbitmap, BT_SIZEOFMAP(dumpcfg.rbitmapsize));
938
939 /* find ranges that are not being dumped to use for buffers */
940 dump_init_memlist_walker(&mlw);
941 for (bitnum = 0; bitnum < dumpcfg.bitmapsize; bitnum = end) {
942 dump_timeleft = dump_timeout;
943 end = bitnum + CBUF_MAPNP;
944 pfn = dump_bitnum_to_pfn(bitnum, &mlw);
945 ASSERT(pfn != PFN_INVALID);
946
947 /* skip partial range at end of mem segment */
948 if (mlw.mpleft < CBUF_MAPNP) {
992 continue;
993 }
994
995 for (; bitnum < end; bitnum++, pfn++) {
996 dump_timeleft = dump_timeout;
997 if (BT_TEST(dumpcfg.bitmap, bitnum))
998 continue;
999 if (!dump_pfn_check(pfn))
1000 continue;
1001 ASSERT((sz + PAGESIZE) <= cfg->maxvmsize);
1002 hat_devload(kas.a_hat, cfg->maxvm + sz, PAGESIZE, pfn,
1003 PROT_READ | PROT_WRITE, HAT_LOAD_NOCONSIST);
1004 sz += PAGESIZE;
1005 cfg->foundsm++;
1006 dump_set_used(pfn);
1007 if (sz >= cfg->maxsize)
1008 goto foundmax;
1009 }
1010 }
1011
1012 /* Fall back to lzjb if we did not get enough memory for bzip2. */
1013 endsz = (cfg->maxsize * cfg->threshold) / cfg->nhelper;
1014 if (sz < endsz) {
1015 cfg->clevel = DUMP_CLEVEL_LZJB;
1016 }
1017
1018 /* Allocate memory for as many helpers as we can. */
1019 foundmax:
1020
1021 /* Byte offsets into memory found and mapped above */
1022 endsz = sz;
1023 sz = 0;
1024
1025 /* Set the size for bzip2 state. Only bzip2 needs it. */
1026 bz2size = BZ2_bzCompressInitSize(dump_bzip2_level);
1027
1028 /* Skip the preallocate output buffers. */
1029 cp = &cfg->cbuf[MINCBUFS];
1030
1031 /* Use this to move memory up from the preallocated helpers. */
1032 ohp = cfg->helper;
1033
1034 /* Loop over all helpers and allocate memory. */
1035 for (hp = cfg->helper; hp < endhp; hp++) {
1036
1037 /* Skip preallocated helpers by checking hp->page. */
1038 if (hp->page == NULL) {
1039 if (cfg->clevel <= DUMP_CLEVEL_LZJB) {
1040 /* lzjb needs 2 1-page buffers */
1041 if ((sz + (2 * PAGESIZE)) > endsz)
1042 break;
1043 hp->page = cfg->maxvm + sz;
1044 sz += PAGESIZE;
1045 hp->lzbuf = cfg->maxvm + sz;
1046 sz += PAGESIZE;
1047
1048 } else if (ohp->lzbuf != NULL) {
1049 /* re-use the preallocted lzjb page for bzip2 */
1050 hp->page = ohp->lzbuf;
1051 ohp->lzbuf = NULL;
1052 ++ohp;
1053
1054 } else {
1055 /* bzip2 needs a 1-page buffer */
1056 if ((sz + PAGESIZE) > endsz)
1057 break;
1058 hp->page = cfg->maxvm + sz;
1059 sz += PAGESIZE;
1060 }
1061 }
1062
1063 /*
1064 * Add output buffers per helper. The number of
1065 * buffers per helper is determined by the ratio of
1066 * ncbuf to nhelper.
1067 */
1068 for (k = 0; cp < endcp && (sz + CBUF_SIZE) <= endsz &&
1069 k < NCBUF_PER_HELPER; k++) {
1070 cp->state = CBUF_FREEBUF;
1071 cp->size = CBUF_SIZE;
1072 cp->buf = cfg->maxvm + sz;
1073 sz += CBUF_SIZE;
1074 ++cp;
1075 }
1076
1077 /*
1078 * bzip2 needs compression state. Use the dumpbzalloc
1079 * and dumpbzfree callbacks to allocate the memory.
1080 * bzip2 does allocation only at init time.
1081 */
1082 if (cfg->clevel >= DUMP_CLEVEL_BZIP2) {
1083 if ((sz + bz2size) > endsz) {
1084 hp->page = NULL;
1085 break;
1086 } else {
1087 hp->bzstream.opaque = &sz;
1088 hp->bzstream.bzalloc = dumpbzalloc;
1089 hp->bzstream.bzfree = dumpbzfree;
1090 (void) BZ2_bzCompressInit(&hp->bzstream,
1091 dump_bzip2_level, 0, 0);
1092 hp->bzstream.opaque = NULL;
1093 }
1094 }
1095 }
1096
1097 /* Finish allocating output buffers */
1098 for (; cp < endcp && (sz + CBUF_SIZE) <= endsz; cp++) {
1099 cp->state = CBUF_FREEBUF;
1100 cp->size = CBUF_SIZE;
1101 cp->buf = cfg->maxvm + sz;
1102 sz += CBUF_SIZE;
1103 }
1104
1105 /* Enable IS_DUMP_PAGE macro, which checks for pages we took. */
1106 if (cfg->found4m || cfg->foundsm)
1107 dump_check_used = 1;
1108
1109 ASSERT(sz <= endsz);
1110 }
1111
1112 static void
1113 dumphdr_init(void)
1114 {
1115 pgcnt_t npages = 0;
1930
1931 } else {
1932
1933 /*
1934 * Done with the input. Flush the VM and
1935 * return the buffer to the main task.
1936 */
1937 if (panicstr && hp->helper != MAINHELPER)
1938 hat_flush_range(kas.a_hat,
1939 hp->cpin->buf, hp->cpin->size);
1940 dumpsys_errmsg(hp, NULL);
1941 CQ_PUT(mainq, hp->cpin, CBUF_USEDMAP);
1942 hp->cpin = NULL;
1943 }
1944 }
1945
1946 return (hp->cpin != NULL);
1947 }
1948
1949 /*
1950 * Compress size bytes starting at buf with bzip2
1951 * mode:
1952 * BZ_RUN add one more compressed page
1953 * BZ_FINISH no more input, flush the state
1954 */
1955 static void
1956 dumpsys_bzrun(helper_t *hp, void *buf, size_t size, int mode)
1957 {
1958 dumpsync_t *ds = hp->ds;
1959 const int CSIZE = sizeof (dumpcsize_t);
1960 bz_stream *ps = &hp->bzstream;
1961 int rc = 0;
1962 uint32_t csize;
1963 dumpcsize_t cs;
1964
1965 /* Set input pointers to new input page */
1966 if (size > 0) {
1967 ps->avail_in = size;
1968 ps->next_in = buf;
1969 }
1970
1971 /* CONSTCOND */
1972 while (1) {
1973
1974 /* Quit when all input has been consumed */
1975 if (ps->avail_in == 0 && mode == BZ_RUN)
1976 break;
1977
1978 /* Get a new output buffer */
1979 if (hp->cpout == NULL) {
1980 HRSTART(hp->perpage, outwait);
1981 hp->cpout = CQ_GET(freebufq);
1982 HRSTOP(hp->perpage, outwait);
1983 ps->avail_out = hp->cpout->size - CSIZE;
1984 ps->next_out = hp->cpout->buf + CSIZE;
1985 }
1986
1987 /* Compress input, or finalize */
1988 HRSTART(hp->perpage, compress);
1989 rc = BZ2_bzCompress(ps, mode);
1990 HRSTOP(hp->perpage, compress);
1991
1992 /* Check for error */
1993 if (mode == BZ_RUN && rc != BZ_RUN_OK) {
1994 dumpsys_errmsg(hp, "%d: BZ_RUN error %s at page %lx\n",
1995 hp->helper, BZ2_bzErrorString(rc),
1996 hp->cpin->pagenum);
1997 break;
1998 }
1999
2000 /* Write the buffer if it is full, or we are flushing */
2001 if (ps->avail_out == 0 || mode == BZ_FINISH) {
2002 csize = hp->cpout->size - CSIZE - ps->avail_out;
2003 cs = DUMP_SET_TAG(csize, hp->tag);
2004 if (csize > 0) {
2005 (void) memcpy(hp->cpout->buf, &cs, CSIZE);
2006 dumpsys_swrite(hp, hp->cpout, csize + CSIZE);
2007 hp->cpout = NULL;
2008 }
2009 }
2010
2011 /* Check for final complete */
2012 if (mode == BZ_FINISH) {
2013 if (rc == BZ_STREAM_END)
2014 break;
2015 if (rc != BZ_FINISH_OK) {
2016 dumpsys_errmsg(hp, "%d: BZ_FINISH error %s\n",
2017 hp->helper, BZ2_bzErrorString(rc));
2018 break;
2019 }
2020 }
2021 }
2022
2023 /* Cleanup state and buffers */
2024 if (mode == BZ_FINISH) {
2025
2026 /* Reset state so that it is re-usable. */
2027 (void) BZ2_bzCompressReset(&hp->bzstream);
2028
2029 /* Give any unused outout buffer to the main task */
2030 if (hp->cpout != NULL) {
2031 hp->cpout->used = 0;
2032 CQ_PUT(mainq, hp->cpout, CBUF_ERRMSG);
2033 hp->cpout = NULL;
2034 }
2035 }
2036 }
2037
2038 static void
2039 dumpsys_bz2compress(helper_t *hp)
2040 {
2041 dumpsync_t *ds = hp->ds;
2042 dumpstreamhdr_t sh;
2043
2044 (void) strcpy(sh.stream_magic, DUMP_STREAM_MAGIC);
2045 sh.stream_pagenum = (pgcnt_t)-1;
2046 sh.stream_npages = 0;
2047 hp->cpin = NULL;
2048 hp->cpout = NULL;
2049 hp->cperr = NULL;
2050 hp->in = 0;
2051 hp->out = 0;
2052 hp->bzstream.avail_in = 0;
2053
2054 /* Bump reference to mainq while we are running */
2055 CQ_OPEN(mainq);
2056
2057 /* Get one page at a time */
2058 while (dumpsys_sread(hp)) {
2059 if (sh.stream_pagenum != hp->cpin->pagenum) {
2060 sh.stream_pagenum = hp->cpin->pagenum;
2061 sh.stream_npages = btop(hp->cpin->used);
2062 dumpsys_bzrun(hp, &sh, sizeof (sh), BZ_RUN);
2063 }
2064 dumpsys_bzrun(hp, hp->page, PAGESIZE, 0);
2065 }
2066
2067 /* Done with input, flush any partial buffer */
2068 if (sh.stream_pagenum != (pgcnt_t)-1) {
2069 dumpsys_bzrun(hp, NULL, 0, BZ_FINISH);
2070 dumpsys_errmsg(hp, NULL);
2071 }
2072
2073 ASSERT(hp->cpin == NULL && hp->cpout == NULL && hp->cperr == NULL);
2074
2075 /* Decrement main queue count, we are done */
2076 CQ_CLOSE(mainq);
2077 }
2078
2079 /*
2080 * Compress with lzjb
2081 * write stream block if full or size==0
2082 * if csize==0 write stream header, else write <csize, data>
2083 * size==0 is a call to flush a buffer
2084 * hp->cpout is the buffer we are flushing or filling
2085 * hp->out is the next index to fill data
2086 * osize is either csize+data, or the size of a stream header
2087 */
2088 static void
2089 dumpsys_lzjbrun(helper_t *hp, size_t csize, void *buf, size_t size)
2090 {
2091 dumpsync_t *ds = hp->ds;
2092 const int CSIZE = sizeof (dumpcsize_t);
2093 dumpcsize_t cs;
2094 size_t osize = csize > 0 ? CSIZE + size : size;
2095
2096 /* If flush, and there is no buffer, just return */
2097 if (size == 0 && hp->cpout == NULL)
2098 return;
2099
2201 * panic CPU.
2202 *
2203 * At dump configuration time, helper_lock is set and helpers_wanted
2204 * is 0. dumpsys() decides whether to set helpers_wanted before
2205 * clearing helper_lock.
2206 *
2207 * At panic time, idle CPUs spin-wait on helper_lock, then alternately
2208 * take the lock and become a helper, or return.
2209 */
2210 void
2211 dumpsys_helper()
2212 {
2213 dumpsys_spinlock(&dumpcfg.helper_lock);
2214 if (dumpcfg.helpers_wanted) {
2215 helper_t *hp, *hpend = &dumpcfg.helper[dumpcfg.nhelper];
2216
2217 for (hp = dumpcfg.helper; hp != hpend; hp++) {
2218 if (hp->helper == FREEHELPER) {
2219 hp->helper = CPU->cpu_id;
2220 BT_SET(dumpcfg.helpermap, CPU->cpu_seqid);
2221
2222 dumpsys_spinunlock(&dumpcfg.helper_lock);
2223
2224 if (dumpcfg.clevel < DUMP_CLEVEL_BZIP2)
2225 dumpsys_lzjbcompress(hp);
2226 else
2227 dumpsys_bz2compress(hp);
2228
2229 hp->helper = DONEHELPER;
2230 return;
2231 }
2232 }
2233
2234 /* No more helpers are needed. */
2235 dumpcfg.helpers_wanted = 0;
2236
2237 }
2238 dumpsys_spinunlock(&dumpcfg.helper_lock);
2239 }
2240
2241 /*
2242 * No-wait helper callable in spin loops.
2243 *
2244 * Do not wait for helper_lock. Just check helpers_wanted. The caller
2245 * may decide to continue. This is the "c)ontinue, s)ync, r)eset? s"
2246 * case.
2247 */
2248 void
2249 dumpsys_helper_nw()
2250 {
2251 if (dumpcfg.helpers_wanted)
2252 dumpsys_helper();
2253 }
2254
2255 /*
2256 * Dump helper for live dumps.
2257 * These run as a system task.
2258 */
2259 static void
2260 dumpsys_live_helper(void *arg)
2261 {
2262 helper_t *hp = arg;
2263
2264 BT_ATOMIC_SET(dumpcfg.helpermap, CPU->cpu_seqid);
2265 if (dumpcfg.clevel < DUMP_CLEVEL_BZIP2)
2266 dumpsys_lzjbcompress(hp);
2267 else
2268 dumpsys_bz2compress(hp);
2269 }
2270
2271 /*
2272 * Compress one page with lzjb (single threaded case)
2273 */
2274 static void
2275 dumpsys_lzjb_page(helper_t *hp, cbuf_t *cp)
2276 {
2277 dumpsync_t *ds = hp->ds;
2278 uint32_t csize;
2279
2280 hp->helper = MAINHELPER;
2281 hp->in = 0;
2282 hp->used = 0;
2283 hp->cpin = cp;
2284 while (hp->used < cp->used) {
2285 HRSTART(hp->perpage, copy);
2286 hp->in = dumpsys_copy_page(hp, hp->in);
2287 hp->used += PAGESIZE;
2288 HRSTOP(hp->perpage, copy);
2295 dumpvp_write(&csize, sizeof (csize));
2296 dumpvp_write(hp->lzbuf, csize);
2297 HRSTOP(hp->perpage, write);
2298 }
2299 CQ_PUT(mainq, hp->cpin, CBUF_USEDMAP);
2300 hp->cpin = NULL;
2301 }
2302
2303 /*
2304 * Main task to dump pages. This is called on the dump CPU.
2305 */
2306 static void
2307 dumpsys_main_task(void *arg)
2308 {
2309 dumpsync_t *ds = arg;
2310 pgcnt_t pagenum = 0, bitnum = 0, hibitnum;
2311 dumpmlw_t mlw;
2312 cbuf_t *cp;
2313 pgcnt_t baseoff, pfnoff;
2314 pfn_t base, pfn;
2315 boolean_t dumpserial;
2316 int i;
2317
2318 /*
2319 * Fall back to serial mode if there are no helpers.
2320 * dump_plat_mincpu can be set to 0 at any time.
2321 * dumpcfg.helpermap must contain at least one member.
2322 *
2323 * It is possible that the helpers haven't registered
2324 * in helpermap yet; wait up to DUMP_HELPER_MAX_WAIT for
2325 * at least one helper to register.
2326 */
2327 dumpserial = B_TRUE;
2328 if (dump_plat_mincpu != 0 && dumpcfg.clevel != 0) {
2329 hrtime_t hrtmax = MSEC2NSEC(DUMP_HELPER_MAX_WAIT);
2330 hrtime_t hrtstart = gethrtime();
2331
2332 for (;;) {
2333 for (i = 0; i < BT_BITOUL(NCPU); ++i) {
2334 if (dumpcfg.helpermap[i] != 0) {
2335 dumpserial = B_FALSE;
2336 break;
2337 }
2338 }
2339
2340 if ((!dumpserial) ||
2341 ((gethrtime() - hrtstart) >= hrtmax)) {
2342 break;
2343 }
2344
2345 SMT_PAUSE();
2346 }
2347
2348 if (dumpserial) {
2349 dumpcfg.clevel = 0;
2350 if (dumpcfg.helper[0].lzbuf == NULL) {
2351 dumpcfg.helper[0].lzbuf =
2352 dumpcfg.helper[1].page;
2353 }
2354 }
2355 }
2356
2357 dump_init_memlist_walker(&mlw);
2358
2359 for (;;) {
2360 int sec = (gethrtime() - ds->start) / NANOSEC;
2361
2362 /*
2363 * Render a simple progress display on the system console to
2364 * make clear to the operator that the system has not hung.
2365 * Emit an update when dump progress has advanced by one
2366 * percent, or when no update has been drawn in the last
2367 * second.
2368 */
2369 if (ds->percent > ds->percent_done || sec > ds->sec_done) {
2477 if (BT_TEST(dumpcfg.bitmap, bitnum))
2478 pagenum++;
2479
2480 dump_timeleft = dump_timeout;
2481 cp->used = ptob(pagenum - cp->pagenum);
2482
2483 HRSTART(ds->perpage, map);
2484 hat_devload(kas.a_hat, cp->buf, cp->size, base,
2485 PROT_READ, HAT_LOAD_NOCONSIST);
2486 HRSTOP(ds->perpage, map);
2487
2488 ds->pages_mapped += btop(cp->size);
2489 ds->pages_used += pagenum - cp->pagenum;
2490
2491 CQ_OPEN(mainq);
2492
2493 /*
2494 * If there are no helpers the main task does
2495 * non-streams lzjb compress.
2496 */
2497 if (dumpserial) {
2498 dumpsys_lzjb_page(dumpcfg.helper, cp);
2499 } else {
2500 /* pass mapped pages to a helper */
2501 CQ_PUT(helperq, cp, CBUF_INREADY);
2502 }
2503
2504 /* the last page was done */
2505 if (bitnum >= dumpcfg.bitmapsize)
2506 CQ_CLOSE(helperq);
2507
2508 break;
2509
2510 case CBUF_USEDMAP:
2511
2512 ds->npages += btop(cp->used);
2513
2514 HRSTART(ds->perpage, unmap);
2515 hat_unload(kas.a_hat, cp->buf, cp->size, HAT_UNLOAD);
2516 HRSTOP(ds->perpage, unmap);
2517
2588 for (i = 0; i < ncpus; i++) {
2589 if ((i & 15) == 0)
2590 P(",,%03d,", i);
2591 if (i == myid)
2592 P(" M");
2593 else if (BT_TEST(cfg->helpermap, i))
2594 P("%4d", cpu_seq[i]->cpu_id);
2595 else
2596 P(" *");
2597 if ((i & 15) == 15)
2598 P("\n");
2599 }
2600
2601 P("ncbuf_used,%d\n", cfg->ncbuf_used);
2602 P("ncmap,%d\n", cfg->ncmap);
2603
2604 P("Found %ldM ranges,%ld\n", (CBUF_MAPSIZE / DUMP_1MB), cfg->found4m);
2605 P("Found small pages,%ld\n", cfg->foundsm);
2606
2607 P("Compression level,%d\n", cfg->clevel);
2608 P("Compression type,%s %s", cfg->clevel == 0 ? "serial" : "parallel",
2609 cfg->clevel >= DUMP_CLEVEL_BZIP2 ? "bzip2" : "lzjb");
2610 if (cfg->clevel >= DUMP_CLEVEL_BZIP2)
2611 P(" (level %d)\n", dump_bzip2_level);
2612 else
2613 P("\n");
2614 P("Compression ratio,%d.%02d\n", compress_ratio / 100, compress_ratio %
2615 100);
2616 P("nhelper_used,%d\n", cfg->nhelper_used);
2617
2618 P("Dump I/O rate MBS,%d.%02d\n", iorate / 100, iorate % 100);
2619 P("..total bytes,%lld\n", (u_longlong_t)ds->nwrite);
2620 P("..total nsec,%lld\n", (u_longlong_t)ds->iotime);
2621 P("dumpbuf.iosize,%ld\n", dumpbuf.iosize);
2622 P("dumpbuf.size,%ld\n", dumpbuf.size);
2623
2624 P("Dump pages/sec,%llu\n", (u_longlong_t)ds->npages / sec);
2625 P("Dump pages,%llu\n", (u_longlong_t)ds->npages);
2626 P("Dump time,%d\n", sec);
2627
2628 if (ds->pages_mapped > 0)
2629 P("per-cent map utilization,%d\n", (int)((100 * ds->pages_used)
2630 / ds->pages_mapped));
2631
2632 P("\nPer-page metrics:\n");
2633 if (ds->npages > 0) {
2854 dumphdr->dump_pfn = dumpvp_flush();
2855 dump_init_memlist_walker(&mlw);
2856 for (bitnum = 0; bitnum < dumpcfg.bitmapsize; bitnum++) {
2857 dump_timeleft = dump_timeout;
2858 if (!BT_TEST(dumpcfg.bitmap, bitnum))
2859 continue;
2860 pfn = dump_bitnum_to_pfn(bitnum, &mlw);
2861 ASSERT(pfn != PFN_INVALID);
2862 dumpvp_write(&pfn, sizeof (pfn_t));
2863 }
2864 dump_plat_pfn();
2865
2866 /*
2867 * Write out all the pages.
2868 * Map pages, copy them handling UEs, compress, and write them out.
2869 * Cooperate with any helpers running on CPUs in panic_idle().
2870 */
2871 dumphdr->dump_data = dumpvp_flush();
2872
2873 bzero(dumpcfg.helpermap, BT_SIZEOFMAP(NCPU));
2874 ds->live = dumpcfg.clevel > 0 &&
2875 (dumphdr->dump_flags & DF_LIVE) != 0;
2876
2877 save_dump_clevel = dumpcfg.clevel;
2878 if (panicstr)
2879 dumpsys_get_maxmem();
2880 else if (dumpcfg.clevel >= DUMP_CLEVEL_BZIP2)
2881 dumpcfg.clevel = DUMP_CLEVEL_LZJB;
2882
2883 dumpcfg.nhelper_used = 0;
2884 for (hp = dumpcfg.helper; hp != hpend; hp++) {
2885 if (hp->page == NULL) {
2886 hp->helper = DONEHELPER;
2887 continue;
2888 }
2889 ++dumpcfg.nhelper_used;
2890 hp->helper = FREEHELPER;
2891 hp->taskqid = NULL;
2892 hp->ds = ds;
2893 bzero(&hp->perpage, sizeof (hp->perpage));
2894 if (dumpcfg.clevel >= DUMP_CLEVEL_BZIP2)
2895 (void) BZ2_bzCompressReset(&hp->bzstream);
2896 }
2897
2898 CQ_OPEN(freebufq);
2899 CQ_OPEN(helperq);
2900
2901 dumpcfg.ncbuf_used = 0;
2902 for (cp = dumpcfg.cbuf; cp != &dumpcfg.cbuf[dumpcfg.ncbuf]; cp++) {
2903 if (cp->buf != NULL) {
2904 CQ_PUT(freebufq, cp, CBUF_FREEBUF);
2905 ++dumpcfg.ncbuf_used;
2906 }
2907 }
2908
2909 for (cp = dumpcfg.cmap; cp != &dumpcfg.cmap[dumpcfg.ncmap]; cp++)
2910 CQ_PUT(mainq, cp, CBUF_FREEMAP);
2911
2912 ds->start = gethrtime();
2913 ds->iowaitts = ds->start;
2914
2915 /* start helpers */
2916 if (ds->live) {
2917 int n = dumpcfg.nhelper_used;
2918 int pri = MINCLSYSPRI - 25;
2919
2920 livetaskq = taskq_create("LiveDump", n, pri, n, n,
2921 TASKQ_PREPOPULATE);
2922 for (hp = dumpcfg.helper; hp != hpend; hp++) {
2923 if (hp->page == NULL)
2924 continue;
2925 hp->helper = hp - dumpcfg.helper;
2926 hp->taskqid = taskq_dispatch(livetaskq,
2927 dumpsys_live_helper, (void *)hp, TQ_NOSLEEP);
2928 }
2929
2930 } else {
2931 if (panicstr)
2932 kmem_dump_begin();
2933 dumpcfg.helpers_wanted = dumpcfg.clevel > 0;
2934 dumpsys_spinunlock(&dumpcfg.helper_lock);
2935 }
2936
2937 /* run main task */
2938 dumpsys_main_task(ds);
2939
2940 ds->elapsed = gethrtime() - ds->start;
2941 if (ds->elapsed < 1)
2942 ds->elapsed = 1;
2943
2944 if (livetaskq != NULL)
2945 taskq_destroy(livetaskq);
2946
2947 if (ds->neednl) {
2948 uprintf("\n");
2949 ds->neednl = 0;
2950 }
2951
2952 /* record actual pages dumped */
2953 dumphdr->dump_npages = ds->npages;
|
57 #include <fs/fs_subr.h>
58 #include <sys/fs/snode.h>
59 #include <sys/ontrap.h>
60 #include <sys/panic.h>
61 #include <sys/dkio.h>
62 #include <sys/vtoc.h>
63 #include <sys/errorq.h>
64 #include <sys/fm/util.h>
65 #include <sys/fs/zfs.h>
66
67 #include <vm/hat.h>
68 #include <vm/as.h>
69 #include <vm/page.h>
70 #include <vm/pvn.h>
71 #include <vm/seg.h>
72 #include <vm/seg_kmem.h>
73 #include <sys/clock_impl.h>
74 #include <sys/hold_page.h>
75 #include <sys/cpu.h>
76
77 #define ONE_GIG (1024 * 1024 * 1024UL)
78
79 /*
80 * Parallel Dump:
81 * CPUs that are otherwise idle during panic are employed to parallelize
82 * the compression task. I/O and compression are performed by different
83 * CPUs, and are hence overlapped in time, unlike the older serial code.
84 */
85
86 /*
87 * exported vars
88 */
89 kmutex_t dump_lock; /* lock for dump configuration */
90 dumphdr_t *dumphdr; /* dump header */
91 int dump_conflags = DUMP_KERNEL; /* dump configuration flags */
92 vnode_t *dumpvp; /* dump device vnode pointer */
93 u_offset_t dumpvp_size; /* size of dump device, in bytes */
94 char *dumppath; /* pathname of dump device */
95 int dump_timeout = 120; /* timeout for dumping pages */
96 int dump_timeleft; /* portion of dump_timeout remaining */
97 int dump_ioerr; /* dump i/o error */
98 int dump_check_used; /* enable check for used pages */
99 char *dump_stack_scratch; /* scratch area for saving stack summary */
100
101 /*
102 * Tunables for dump compression and parallelism.
103 * These can be set via /etc/system.
104 *
105 * dump_ncpu_low:
106 * This is the minimum configuration for parallel lzjb.
107 * A special value of 0 means that parallel dump will not be used.
108 *
109 * dump_metrics_on:
110 * If set, metrics are collected in the kernel, passed to savecore
111 * via the dump file, and recorded by savecore in METRICS.txt.
112 */
113 uint_t dump_ncpu_low = 4; /* minimum config for parallel lzjb */
114
115 /* tunables for pre-reserved heap */
116 uint_t dump_kmem_permap = 1024;
117 uint_t dump_kmem_pages = 0;
118
119 /* Define multiple buffers per helper to avoid stalling */
120 #define NCBUF_PER_HELPER 2
121 #define NCMAP_PER_HELPER 4
122
123 /* minimum number of helpers configured */
124 #define MINHELPERS (MAX(dump_ncpu_low, 1))
125 #define MINCBUFS (MINHELPERS * NCBUF_PER_HELPER)
126
127 /*
128 * Define constant parameters.
129 *
130 * CBUF_SIZE size of an output buffer
131 *
132 * CBUF_MAPSIZE size of virtual range for mapping pages
133 *
134 * CBUF_MAPNP size of virtual range in pages
135 *
136 */
137 #define DUMP_1KB ((size_t)1 << 10)
138 #define DUMP_1MB ((size_t)1 << 20)
139 #define CBUF_SIZE ((size_t)1 << 17)
140 #define CBUF_MAPSHIFT (22)
141 #define CBUF_MAPSIZE ((size_t)1 << CBUF_MAPSHIFT)
142 #define CBUF_MAPNP ((size_t)1 << (CBUF_MAPSHIFT - PAGESHIFT))
143
144 /*
339 /*
340 * helper_t helpers: contains the context for a stream. CPUs run in
341 * parallel at dump time; each CPU creates a single stream of
342 * compression data. Stream data is divided into CBUF_SIZE blocks.
343 * The blocks are written in order within a stream. But, blocks from
344 * multiple streams can be interleaved. Each stream is identified by a
345 * unique tag.
346 */
347 typedef struct helper {
348 int helper; /* bound helper id */
349 int tag; /* compression stream tag */
350 perpage_t perpage; /* per page metrics */
351 perpage_t perpagets; /* per page metrics (timestamps) */
352 taskqid_t taskqid; /* live dump task ptr */
353 int in, out; /* buffer offsets */
354 cbuf_t *cpin, *cpout, *cperr; /* cbuf objects in process */
355 dumpsync_t *ds; /* pointer to sync vars */
356 size_t used; /* counts input consumed */
357 char *page; /* buffer for page copy */
358 char *lzbuf; /* lzjb output */
359 } helper_t;
360
361 #define MAINHELPER (-1) /* helper is also the main task */
362 #define FREEHELPER (-2) /* unbound helper */
363 #define DONEHELPER (-3) /* helper finished */
364
365 /*
366 * configuration vars for dumpsys
367 */
368 typedef struct dumpcfg {
369 int nhelper; /* number of helpers */
370 int nhelper_used; /* actual number of helpers used */
371 int ncmap; /* number VA pages for compression */
372 int ncbuf; /* number of bufs for compression */
373 int ncbuf_used; /* number of bufs in use */
374 uint_t clevel; /* dump compression level */
375 helper_t *helper; /* array of helpers */
376 cbuf_t *cmap; /* array of input (map) buffers */
377 cbuf_t *cbuf; /* array of output buffers */
378 ulong_t *helpermap; /* set of dumpsys helper CPU ids */
379 ulong_t *bitmap; /* bitmap for marking pages to dump */
380 ulong_t *rbitmap; /* bitmap for used CBUF_MAPSIZE ranges */
381 pgcnt_t bitmapsize; /* size of bitmap */
382 pgcnt_t rbitmapsize; /* size of bitmap for ranges */
383 pgcnt_t found4m; /* number ranges allocated by dump */
384 pgcnt_t foundsm; /* number small pages allocated by dump */
385 pid_t *pids; /* list of process IDs at dump time */
386 size_t maxsize; /* memory size needed at dump time */
387 size_t maxvmsize; /* size of reserved VM */
388 char *maxvm; /* reserved VM for spare pages */
455 char *old_buf = dumpbuf.start;
456 size_t old_size = dumpbuf.size;
457 char *new_buf;
458 size_t new_size;
459
460 ASSERT(MUTEX_HELD(&dump_lock));
461
462 new_size = dumpbuf_iosize(MAX(dumpbuf.iosize, maxphys));
463 if (new_size <= old_size)
464 return; /* no need to reallocate buffer */
465
466 new_buf = kmem_alloc(new_size, KM_SLEEP);
467 dumpbuf.size = new_size;
468 dumpbuf.start = new_buf;
469 dumpbuf.end = new_buf + new_size;
470 kmem_free(old_buf, old_size);
471 }
472
473 /*
474 * dump_update_clevel is called when dumpadm configures the dump device.
475 * Determine the compression level / type
476 * - DUMP_CLEVEL_SERIAL is single threaded lzjb
477 * - DUMP_CLEVEL_LZJB is parallel lzjb
478 * Calculate number of helpers and buffers.
479 * Allocate the minimum configuration for now.
480 *
481 * When the dump file is configured we reserve a minimum amount of
482 * memory for use at crash time. But we reserve VA for all the memory
483 * we really want in order to do the fastest dump possible. The VA is
484 * backed by pages not being dumped, according to the bitmap. If
485 * there is insufficient spare memory, however, we fall back to the
486 * minimum.
487 *
488 * Live dump (savecore -L) always uses the minimum config.
489 *
490 * For parallel dumps, the number of helpers is ncpu-1. The CPU
491 * running panic runs the main task. For single-threaded dumps, the
492 * panic CPU does lzjb compression (it is tagged as MAINHELPER.)
493 *
494 * Need multiple buffers per helper so that they do not block waiting
495 * for the main task.
496 * parallel single-threaded
497 * Number of output buffers: nhelper*2 1
498 * Number of mapping buffers: nhelper*4 1
499 *
500 */
501 static void
502 dump_update_clevel()
503 {
504 int tag;
505 helper_t *hp, *hpend;
506 cbuf_t *cp, *cpend;
507 dumpcfg_t *old = &dumpcfg;
508 dumpcfg_t newcfg = *old;
509 dumpcfg_t *new = &newcfg;
510
511 ASSERT(MUTEX_HELD(&dump_lock));
512
513 /*
514 * Free the previously allocated bufs and VM.
515 */
516 if (old->helper != NULL) {
517
518 /* helpers */
519 hpend = &old->helper[old->nhelper];
520 for (hp = old->helper; hp != hpend; hp++) {
521 if (hp->lzbuf != NULL)
522 kmem_free(hp->lzbuf, PAGESIZE);
523 if (hp->page != NULL)
524 kmem_free(hp->page, PAGESIZE);
537 if (cp->buf != NULL)
538 kmem_free(cp->buf, cp->size);
539 kmem_free(old->cbuf, old->ncbuf * sizeof (cbuf_t));
540
541 /* reserved VM for dumpsys_get_maxmem */
542 if (old->maxvmsize > 0)
543 vmem_xfree(heap_arena, old->maxvm, old->maxvmsize);
544 }
545
546 /*
547 * Allocate memory and VM.
548 * One CPU runs dumpsys, the rest are helpers.
549 */
550 new->nhelper = ncpus - 1;
551 if (new->nhelper < 1)
552 new->nhelper = 1;
553
554 if (new->nhelper > DUMP_MAX_NHELPER)
555 new->nhelper = DUMP_MAX_NHELPER;
556
557 /* If dump_ncpu_low is 0 or greater than ncpus, do serial dump */
558 if (dump_ncpu_low == 0 || dump_ncpu_low > ncpus || new->nhelper < 2) {
559 new->clevel = DUMP_CLEVEL_SERIAL;
560 new->nhelper = 1;
561 new->ncbuf = 1;
562 new->ncmap = 1;
563 } else {
564 new->clevel = DUMP_CLEVEL_LZJB;
565 new->ncbuf = NCBUF_PER_HELPER * new->nhelper;
566 new->ncmap = NCMAP_PER_HELPER * new->nhelper;
567 }
568
569 /*
570 * Allocate new data structures and buffers for MINHELPERS,
571 * and also figure the max desired size.
572 */
573 new->maxsize = 0;
574 new->maxvmsize = 0;
575 new->maxvm = NULL;
576 tag = 1;
577 new->helper = kmem_zalloc(new->nhelper * sizeof (helper_t), KM_SLEEP);
578 hpend = &new->helper[new->nhelper];
579 for (hp = new->helper; hp != hpend; hp++) {
580 hp->tag = tag++;
581 if (hp < &new->helper[MINHELPERS]) {
582 hp->lzbuf = kmem_alloc(PAGESIZE, KM_SLEEP);
583 hp->page = kmem_alloc(PAGESIZE, KM_SLEEP);
584 } else {
585 new->maxsize += 2 * PAGESIZE;
586 }
587 }
588
589 new->cbuf = kmem_zalloc(new->ncbuf * sizeof (cbuf_t), KM_SLEEP);
590 cpend = &new->cbuf[new->ncbuf];
591 for (cp = new->cbuf; cp != cpend; cp++) {
592 cp->state = CBUF_FREEBUF;
593 cp->size = CBUF_SIZE;
594 if (cp < &new->cbuf[MINCBUFS])
595 cp->buf = kmem_alloc(cp->size, KM_SLEEP);
596 else
597 new->maxsize += cp->size;
598 }
599
600 new->cmap = kmem_zalloc(new->ncmap * sizeof (cbuf_t), KM_SLEEP);
601 cpend = &new->cmap[new->ncmap];
602 for (cp = new->cmap; cp != cpend; cp++) {
603 cp->state = CBUF_FREEMAP;
604 cp->size = CBUF_MAPSIZE;
605 cp->buf = vmem_xalloc(heap_arena, CBUF_MAPSIZE, CBUF_MAPSIZE,
606 0, 0, NULL, NULL, VM_SLEEP);
731 ASSERT(rbitnum < dumpcfg.rbitmapsize);
732
733 BT_SET(dumpcfg.rbitmap, rbitnum);
734 }
735
736 int
737 dump_test_used(pfn_t pfn)
738 {
739 pgcnt_t bitnum, rbitnum;
740
741 bitnum = dump_pfn_to_bitnum(pfn);
742 ASSERT(bitnum != (pgcnt_t)-1);
743
744 rbitnum = CBUF_MAPP2R(bitnum);
745 ASSERT(rbitnum < dumpcfg.rbitmapsize);
746
747 return (BT_TEST(dumpcfg.rbitmap, rbitnum));
748 }
749
750 /*
751 * Perform additional checks on the page to see if we can really use
752 * it. The kernel (kas) pages are always set in the bitmap. However,
753 * boot memory pages (prom_ppages or P_BOOTPAGES) are not in the
754 * bitmap. So we check for them.
755 */
756 static inline int
757 dump_pfn_check(pfn_t pfn)
758 {
759 page_t *pp = page_numtopp_nolock(pfn);
760 if (pp == NULL || pp->p_pagenum != pfn ||
761 #if defined(__sparc)
762 pp->p_vnode == &promvp ||
763 #else
764 PP_ISBOOTPAGES(pp) ||
765 #endif
766 pp->p_toxic != 0)
767 return (0);
768 return (1);
769 }
770
771 /*
772 * Check a range to see if all contained pages are available and
773 * return non-zero if the range can be used.
774 */
775 static inline int
776 dump_range_check(pgcnt_t start, pgcnt_t end, pfn_t pfn)
777 {
778 for (; start < end; start++, pfn++) {
779 if (BT_TEST(dumpcfg.bitmap, start))
780 return (0);
781 if (!dump_pfn_check(pfn))
782 return (0);
783 }
784 return (1);
785 }
786
787 /*
788 * dumpsys_get_maxmem() is called during panic. Find unused ranges
789 * and use them for buffers.
790 * It searches the dump bitmap in 2 passes. The first time it looks
791 * for CBUF_MAPSIZE ranges. On the second pass it uses small pages.
792 */
793 static void
794 dumpsys_get_maxmem()
795 {
796 dumpcfg_t *cfg = &dumpcfg;
797 cbuf_t *endcp = &cfg->cbuf[cfg->ncbuf];
798 helper_t *endhp = &cfg->helper[cfg->nhelper];
799 pgcnt_t bitnum, end;
800 size_t sz, endsz;
801 pfn_t pfn, off;
802 cbuf_t *cp;
803 helper_t *hp;
804 dumpmlw_t mlw;
805 int k;
806
807 /*
808 * Setting dump_ncpu_low to 0 forces a single threaded dump.
809 */
810 if (dump_ncpu_low == 0) {
811 cfg->clevel = DUMP_CLEVEL_SERIAL;
812 return;
813 }
814
815 /*
816 * There may be no point in looking for spare memory. If
817 * dumping all memory, then none is spare. If doing a serial
818 * dump, then already have buffers.
819 */
820 if (cfg->maxsize == 0 || cfg->clevel == DUMP_CLEVEL_SERIAL ||
821 (dump_conflags & DUMP_ALL) != 0) {
822 return;
823 }
824
825 sz = 0;
826 cfg->found4m = 0;
827 cfg->foundsm = 0;
828
829 /* bitmap of ranges used to estimate which pfns are being used */
830 bzero(dumpcfg.rbitmap, BT_SIZEOFMAP(dumpcfg.rbitmapsize));
831
832 /* find ranges that are not being dumped to use for buffers */
833 dump_init_memlist_walker(&mlw);
834 for (bitnum = 0; bitnum < dumpcfg.bitmapsize; bitnum = end) {
835 dump_timeleft = dump_timeout;
836 end = bitnum + CBUF_MAPNP;
837 pfn = dump_bitnum_to_pfn(bitnum, &mlw);
838 ASSERT(pfn != PFN_INVALID);
839
840 /* skip partial range at end of mem segment */
841 if (mlw.mpleft < CBUF_MAPNP) {
885 continue;
886 }
887
888 for (; bitnum < end; bitnum++, pfn++) {
889 dump_timeleft = dump_timeout;
890 if (BT_TEST(dumpcfg.bitmap, bitnum))
891 continue;
892 if (!dump_pfn_check(pfn))
893 continue;
894 ASSERT((sz + PAGESIZE) <= cfg->maxvmsize);
895 hat_devload(kas.a_hat, cfg->maxvm + sz, PAGESIZE, pfn,
896 PROT_READ | PROT_WRITE, HAT_LOAD_NOCONSIST);
897 sz += PAGESIZE;
898 cfg->foundsm++;
899 dump_set_used(pfn);
900 if (sz >= cfg->maxsize)
901 goto foundmax;
902 }
903 }
904
905 /* Allocate memory for as many helpers as we can. */
906 foundmax:
907
908 /* Byte offsets into memory found and mapped above */
909 endsz = sz;
910 sz = 0;
911
912 /* Skip the preallocate output buffers. */
913 cp = &cfg->cbuf[MINCBUFS];
914
915 /* Loop over all helpers and allocate memory. */
916 for (hp = cfg->helper; hp < endhp; hp++) {
917
918 /* Skip preallocated helpers by checking hp->page. */
919 if (hp->page == NULL) {
920 /* lzjb needs 2 1-page buffers */
921 if ((sz + (2 * PAGESIZE)) > endsz)
922 break;
923 hp->page = cfg->maxvm + sz;
924 sz += PAGESIZE;
925 hp->lzbuf = cfg->maxvm + sz;
926 sz += PAGESIZE;
927 }
928
929 /*
930 * Add output buffers per helper. The number of
931 * buffers per helper is determined by the ratio of
932 * ncbuf to nhelper.
933 */
934 for (k = 0; cp < endcp && (sz + CBUF_SIZE) <= endsz &&
935 k < NCBUF_PER_HELPER; k++) {
936 cp->state = CBUF_FREEBUF;
937 cp->size = CBUF_SIZE;
938 cp->buf = cfg->maxvm + sz;
939 sz += CBUF_SIZE;
940 ++cp;
941 }
942 }
943
944 /* Finish allocating output buffers */
945 for (; cp < endcp && (sz + CBUF_SIZE) <= endsz; cp++) {
946 cp->state = CBUF_FREEBUF;
947 cp->size = CBUF_SIZE;
948 cp->buf = cfg->maxvm + sz;
949 sz += CBUF_SIZE;
950 }
951
952 /* Enable IS_DUMP_PAGE macro, which checks for pages we took. */
953 if (cfg->found4m || cfg->foundsm)
954 dump_check_used = 1;
955
956 ASSERT(sz <= endsz);
957 }
958
959 static void
960 dumphdr_init(void)
961 {
962 pgcnt_t npages = 0;
1777
1778 } else {
1779
1780 /*
1781 * Done with the input. Flush the VM and
1782 * return the buffer to the main task.
1783 */
1784 if (panicstr && hp->helper != MAINHELPER)
1785 hat_flush_range(kas.a_hat,
1786 hp->cpin->buf, hp->cpin->size);
1787 dumpsys_errmsg(hp, NULL);
1788 CQ_PUT(mainq, hp->cpin, CBUF_USEDMAP);
1789 hp->cpin = NULL;
1790 }
1791 }
1792
1793 return (hp->cpin != NULL);
1794 }
1795
1796 /*
1797 * Compress with lzjb
1798 * write stream block if full or size==0
1799 * if csize==0 write stream header, else write <csize, data>
1800 * size==0 is a call to flush a buffer
1801 * hp->cpout is the buffer we are flushing or filling
1802 * hp->out is the next index to fill data
1803 * osize is either csize+data, or the size of a stream header
1804 */
1805 static void
1806 dumpsys_lzjbrun(helper_t *hp, size_t csize, void *buf, size_t size)
1807 {
1808 dumpsync_t *ds = hp->ds;
1809 const int CSIZE = sizeof (dumpcsize_t);
1810 dumpcsize_t cs;
1811 size_t osize = csize > 0 ? CSIZE + size : size;
1812
1813 /* If flush, and there is no buffer, just return */
1814 if (size == 0 && hp->cpout == NULL)
1815 return;
1816
1918 * panic CPU.
1919 *
1920 * At dump configuration time, helper_lock is set and helpers_wanted
1921 * is 0. dumpsys() decides whether to set helpers_wanted before
1922 * clearing helper_lock.
1923 *
1924 * At panic time, idle CPUs spin-wait on helper_lock, then alternately
1925 * take the lock and become a helper, or return.
1926 */
1927 void
1928 dumpsys_helper()
1929 {
1930 dumpsys_spinlock(&dumpcfg.helper_lock);
1931 if (dumpcfg.helpers_wanted) {
1932 helper_t *hp, *hpend = &dumpcfg.helper[dumpcfg.nhelper];
1933
1934 for (hp = dumpcfg.helper; hp != hpend; hp++) {
1935 if (hp->helper == FREEHELPER) {
1936 hp->helper = CPU->cpu_id;
1937 BT_SET(dumpcfg.helpermap, CPU->cpu_seqid);
1938 dumpsys_spinunlock(&dumpcfg.helper_lock);
1939 dumpsys_lzjbcompress(hp);
1940 hp->helper = DONEHELPER;
1941 return;
1942 }
1943 }
1944
1945 /* No more helpers are needed. */
1946 dumpcfg.helpers_wanted = 0;
1947
1948 }
1949 dumpsys_spinunlock(&dumpcfg.helper_lock);
1950 }
1951
1952 /*
1953 * No-wait helper callable in spin loops.
1954 *
1955 * Do not wait for helper_lock. Just check helpers_wanted. The caller
1956 * may decide to continue. This is the "c)ontinue, s)ync, r)eset? s"
1957 * case.
1958 */
1959 void
1960 dumpsys_helper_nw()
1961 {
1962 if (dumpcfg.helpers_wanted)
1963 dumpsys_helper();
1964 }
1965
1966 /*
1967 * Dump helper for live dumps.
1968 * These run as a system task.
1969 */
1970 static void
1971 dumpsys_live_helper(void *arg)
1972 {
1973 helper_t *hp = arg;
1974
1975 BT_ATOMIC_SET(dumpcfg.helpermap, CPU->cpu_seqid);
1976 dumpsys_lzjbcompress(hp);
1977 }
1978
1979 /*
1980 * Compress one page with lzjb (single threaded case)
1981 */
1982 static void
1983 dumpsys_lzjb_page(helper_t *hp, cbuf_t *cp)
1984 {
1985 dumpsync_t *ds = hp->ds;
1986 uint32_t csize;
1987
1988 hp->helper = MAINHELPER;
1989 hp->in = 0;
1990 hp->used = 0;
1991 hp->cpin = cp;
1992 while (hp->used < cp->used) {
1993 HRSTART(hp->perpage, copy);
1994 hp->in = dumpsys_copy_page(hp, hp->in);
1995 hp->used += PAGESIZE;
1996 HRSTOP(hp->perpage, copy);
2003 dumpvp_write(&csize, sizeof (csize));
2004 dumpvp_write(hp->lzbuf, csize);
2005 HRSTOP(hp->perpage, write);
2006 }
2007 CQ_PUT(mainq, hp->cpin, CBUF_USEDMAP);
2008 hp->cpin = NULL;
2009 }
2010
2011 /*
2012 * Main task to dump pages. This is called on the dump CPU.
2013 */
2014 static void
2015 dumpsys_main_task(void *arg)
2016 {
2017 dumpsync_t *ds = arg;
2018 pgcnt_t pagenum = 0, bitnum = 0, hibitnum;
2019 dumpmlw_t mlw;
2020 cbuf_t *cp;
2021 pgcnt_t baseoff, pfnoff;
2022 pfn_t base, pfn;
2023 int i;
2024
2025 /*
2026 * Fall back to serial mode if there are no helpers.
2027 * dump_ncpu_low can be set to 0 at any time.
2028 * dumpcfg.helpermap must contain at least one member.
2029 *
2030 * It is possible that the helpers haven't registered
2031 * in helpermap yet; wait up to DUMP_HELPER_MAX_WAIT for
2032 * at least one helper to register.
2033 */
2034 if (dump_ncpu_low != 0 && dumpcfg.clevel != DUMP_CLEVEL_SERIAL) {
2035 boolean_t dumpserial = B_TRUE;
2036 hrtime_t hrtmax = MSEC2NSEC(DUMP_HELPER_MAX_WAIT);
2037 hrtime_t hrtstart = gethrtime();
2038
2039 for (;;) {
2040 for (i = 0; i < BT_BITOUL(NCPU); ++i) {
2041 if (dumpcfg.helpermap[i] != 0) {
2042 dumpserial = B_FALSE;
2043 break;
2044 }
2045 }
2046
2047 if ((!dumpserial) ||
2048 ((gethrtime() - hrtstart) >= hrtmax)) {
2049 break;
2050 }
2051
2052 SMT_PAUSE();
2053 }
2054
2055 if (dumpserial) {
2056 dumpcfg.clevel = DUMP_CLEVEL_SERIAL;
2057 if (dumpcfg.helper[0].lzbuf == NULL) {
2058 dumpcfg.helper[0].lzbuf =
2059 dumpcfg.helper[1].page;
2060 }
2061 }
2062 }
2063
2064 dump_init_memlist_walker(&mlw);
2065
2066 for (;;) {
2067 int sec = (gethrtime() - ds->start) / NANOSEC;
2068
2069 /*
2070 * Render a simple progress display on the system console to
2071 * make clear to the operator that the system has not hung.
2072 * Emit an update when dump progress has advanced by one
2073 * percent, or when no update has been drawn in the last
2074 * second.
2075 */
2076 if (ds->percent > ds->percent_done || sec > ds->sec_done) {
2184 if (BT_TEST(dumpcfg.bitmap, bitnum))
2185 pagenum++;
2186
2187 dump_timeleft = dump_timeout;
2188 cp->used = ptob(pagenum - cp->pagenum);
2189
2190 HRSTART(ds->perpage, map);
2191 hat_devload(kas.a_hat, cp->buf, cp->size, base,
2192 PROT_READ, HAT_LOAD_NOCONSIST);
2193 HRSTOP(ds->perpage, map);
2194
2195 ds->pages_mapped += btop(cp->size);
2196 ds->pages_used += pagenum - cp->pagenum;
2197
2198 CQ_OPEN(mainq);
2199
2200 /*
2201 * If there are no helpers the main task does
2202 * non-streams lzjb compress.
2203 */
2204 if (dumpcfg.clevel == DUMP_CLEVEL_SERIAL) {
2205 dumpsys_lzjb_page(dumpcfg.helper, cp);
2206 } else {
2207 /* pass mapped pages to a helper */
2208 CQ_PUT(helperq, cp, CBUF_INREADY);
2209 }
2210
2211 /* the last page was done */
2212 if (bitnum >= dumpcfg.bitmapsize)
2213 CQ_CLOSE(helperq);
2214
2215 break;
2216
2217 case CBUF_USEDMAP:
2218
2219 ds->npages += btop(cp->used);
2220
2221 HRSTART(ds->perpage, unmap);
2222 hat_unload(kas.a_hat, cp->buf, cp->size, HAT_UNLOAD);
2223 HRSTOP(ds->perpage, unmap);
2224
2295 for (i = 0; i < ncpus; i++) {
2296 if ((i & 15) == 0)
2297 P(",,%03d,", i);
2298 if (i == myid)
2299 P(" M");
2300 else if (BT_TEST(cfg->helpermap, i))
2301 P("%4d", cpu_seq[i]->cpu_id);
2302 else
2303 P(" *");
2304 if ((i & 15) == 15)
2305 P("\n");
2306 }
2307
2308 P("ncbuf_used,%d\n", cfg->ncbuf_used);
2309 P("ncmap,%d\n", cfg->ncmap);
2310
2311 P("Found %ldM ranges,%ld\n", (CBUF_MAPSIZE / DUMP_1MB), cfg->found4m);
2312 P("Found small pages,%ld\n", cfg->foundsm);
2313
2314 P("Compression level,%d\n", cfg->clevel);
2315 P("Compression type,%s lzjb\n",
2316 cfg->clevel == DUMP_CLEVEL_SERIAL ? "serial" : "parallel");
2317 P("Compression ratio,%d.%02d\n", compress_ratio / 100, compress_ratio %
2318 100);
2319 P("nhelper_used,%d\n", cfg->nhelper_used);
2320
2321 P("Dump I/O rate MBS,%d.%02d\n", iorate / 100, iorate % 100);
2322 P("..total bytes,%lld\n", (u_longlong_t)ds->nwrite);
2323 P("..total nsec,%lld\n", (u_longlong_t)ds->iotime);
2324 P("dumpbuf.iosize,%ld\n", dumpbuf.iosize);
2325 P("dumpbuf.size,%ld\n", dumpbuf.size);
2326
2327 P("Dump pages/sec,%llu\n", (u_longlong_t)ds->npages / sec);
2328 P("Dump pages,%llu\n", (u_longlong_t)ds->npages);
2329 P("Dump time,%d\n", sec);
2330
2331 if (ds->pages_mapped > 0)
2332 P("per-cent map utilization,%d\n", (int)((100 * ds->pages_used)
2333 / ds->pages_mapped));
2334
2335 P("\nPer-page metrics:\n");
2336 if (ds->npages > 0) {
2557 dumphdr->dump_pfn = dumpvp_flush();
2558 dump_init_memlist_walker(&mlw);
2559 for (bitnum = 0; bitnum < dumpcfg.bitmapsize; bitnum++) {
2560 dump_timeleft = dump_timeout;
2561 if (!BT_TEST(dumpcfg.bitmap, bitnum))
2562 continue;
2563 pfn = dump_bitnum_to_pfn(bitnum, &mlw);
2564 ASSERT(pfn != PFN_INVALID);
2565 dumpvp_write(&pfn, sizeof (pfn_t));
2566 }
2567 dump_plat_pfn();
2568
2569 /*
2570 * Write out all the pages.
2571 * Map pages, copy them handling UEs, compress, and write them out.
2572 * Cooperate with any helpers running on CPUs in panic_idle().
2573 */
2574 dumphdr->dump_data = dumpvp_flush();
2575
2576 bzero(dumpcfg.helpermap, BT_SIZEOFMAP(NCPU));
2577 ds->live = dumpcfg.clevel > DUMP_CLEVEL_SERIAL &&
2578 (dumphdr->dump_flags & DF_LIVE) != 0;
2579
2580 save_dump_clevel = dumpcfg.clevel;
2581 if (panicstr)
2582 dumpsys_get_maxmem();
2583
2584 dumpcfg.nhelper_used = 0;
2585 for (hp = dumpcfg.helper; hp != hpend; hp++) {
2586 if (hp->page == NULL) {
2587 hp->helper = DONEHELPER;
2588 continue;
2589 }
2590 ++dumpcfg.nhelper_used;
2591 hp->helper = FREEHELPER;
2592 hp->taskqid = NULL;
2593 hp->ds = ds;
2594 bzero(&hp->perpage, sizeof (hp->perpage));
2595 }
2596
2597 CQ_OPEN(freebufq);
2598 CQ_OPEN(helperq);
2599
2600 dumpcfg.ncbuf_used = 0;
2601 for (cp = dumpcfg.cbuf; cp != &dumpcfg.cbuf[dumpcfg.ncbuf]; cp++) {
2602 if (cp->buf != NULL) {
2603 CQ_PUT(freebufq, cp, CBUF_FREEBUF);
2604 ++dumpcfg.ncbuf_used;
2605 }
2606 }
2607
2608 for (cp = dumpcfg.cmap; cp != &dumpcfg.cmap[dumpcfg.ncmap]; cp++)
2609 CQ_PUT(mainq, cp, CBUF_FREEMAP);
2610
2611 ds->start = gethrtime();
2612 ds->iowaitts = ds->start;
2613
2614 /* start helpers */
2615 if (ds->live) {
2616 int n = dumpcfg.nhelper_used;
2617 int pri = MINCLSYSPRI - 25;
2618
2619 livetaskq = taskq_create("LiveDump", n, pri, n, n,
2620 TASKQ_PREPOPULATE);
2621 for (hp = dumpcfg.helper; hp != hpend; hp++) {
2622 if (hp->page == NULL)
2623 continue;
2624 hp->helper = hp - dumpcfg.helper;
2625 hp->taskqid = taskq_dispatch(livetaskq,
2626 dumpsys_live_helper, (void *)hp, TQ_NOSLEEP);
2627 }
2628
2629 } else {
2630 if (panicstr)
2631 kmem_dump_begin();
2632 dumpcfg.helpers_wanted = dumpcfg.clevel > DUMP_CLEVEL_SERIAL;
2633 dumpsys_spinunlock(&dumpcfg.helper_lock);
2634 }
2635
2636 /* run main task */
2637 dumpsys_main_task(ds);
2638
2639 ds->elapsed = gethrtime() - ds->start;
2640 if (ds->elapsed < 1)
2641 ds->elapsed = 1;
2642
2643 if (livetaskq != NULL)
2644 taskq_destroy(livetaskq);
2645
2646 if (ds->neednl) {
2647 uprintf("\n");
2648 ds->neednl = 0;
2649 }
2650
2651 /* record actual pages dumped */
2652 dumphdr->dump_npages = ds->npages;
|