Print this page
9709 Remove support for BZIP2 from dump
Reviewed by: Sanjay Nadkarni <sanjay.nadkarni@nexenta.com>
Reviewed by: Yuri Pankov <yuri.pankov@nexenta.com>
9707 Enable parallel crash dump
Reviewed by: Sanjay Nadkarni <sanjay.nadkarni@nexenta.com>
Reviewed by: Yuri Pankov <yuri.pankov@nexenta.com>


  57 #include <fs/fs_subr.h>
  58 #include <sys/fs/snode.h>
  59 #include <sys/ontrap.h>
  60 #include <sys/panic.h>
  61 #include <sys/dkio.h>
  62 #include <sys/vtoc.h>
  63 #include <sys/errorq.h>
  64 #include <sys/fm/util.h>
  65 #include <sys/fs/zfs.h>
  66 
  67 #include <vm/hat.h>
  68 #include <vm/as.h>
  69 #include <vm/page.h>
  70 #include <vm/pvn.h>
  71 #include <vm/seg.h>
  72 #include <vm/seg_kmem.h>
  73 #include <sys/clock_impl.h>
  74 #include <sys/hold_page.h>
  75 #include <sys/cpu.h>
  76 
  77 #include <bzip2/bzlib.h>
  78 
  79 #define ONE_GIG (1024 * 1024 * 1024UL)
  80 
  81 /*
  82  * Crash dump time is dominated by disk write time.  To reduce this,
  83  * the stronger compression method bzip2 is applied to reduce the dump
  84  * size and hence reduce I/O time.  However, bzip2 is much more
  85  * computationally expensive than the existing lzjb algorithm, so to
  86  * avoid increasing compression time, CPUs that are otherwise idle
  87  * during panic are employed to parallelize the compression task.
  88  * Many helper CPUs are needed to prevent bzip2 from being a
  89  * bottleneck, and on systems with too few CPUs, the lzjb algorithm is
  90  * parallelized instead. Lastly, I/O and compression are performed by
  91  * different CPUs, and are hence overlapped in time, unlike the older
  92  * serial code.
  93  *
  94  * Another important consideration is the speed of the dump
  95  * device. Faster disks need less CPUs in order to benefit from
  96  * parallel lzjb versus parallel bzip2. Therefore, the CPU count
  97  * threshold for switching from parallel lzjb to paralled bzip2 is
  98  * elevated for faster disks. The dump device speed is adduced from
  99  * the setting for dumpbuf.iosize, see dump_update_clevel.
 100  */
 101 
 102 /*
 103  * exported vars
 104  */
 105 kmutex_t        dump_lock;              /* lock for dump configuration */
 106 dumphdr_t       *dumphdr;               /* dump header */
 107 int             dump_conflags = DUMP_KERNEL; /* dump configuration flags */
 108 vnode_t         *dumpvp;                /* dump device vnode pointer */
 109 u_offset_t      dumpvp_size;            /* size of dump device, in bytes */
 110 char            *dumppath;              /* pathname of dump device */
 111 int             dump_timeout = 120;     /* timeout for dumping pages */
 112 int             dump_timeleft;          /* portion of dump_timeout remaining */
 113 int             dump_ioerr;             /* dump i/o error */
 114 int             dump_check_used;        /* enable check for used pages */
 115 char        *dump_stack_scratch; /* scratch area for saving stack summary */
 116 
 117 /*
 118  * Tunables for dump compression and parallelism. These can be set via
 119  * /etc/system.
 120  *
 121  * dump_ncpu_low        number of helpers for parallel lzjb
 122  *      This is also the minimum configuration.

 123  *
 124  * dump_bzip2_level     bzip2 compression level: 1-9
 125  *      Higher numbers give greater compression, but take more memory
 126  *      and time. Memory used per helper is ~(dump_bzip2_level * 1MB).
 127  *
 128  * dump_plat_mincpu     the cross-over limit for using bzip2 (per platform):
 129  *      if dump_plat_mincpu == 0, then always do single threaded dump
 130  *      if ncpu >= dump_plat_mincpu then try to use bzip2
 131  *
 132  * dump_metrics_on      if set, metrics are collected in the kernel, passed
 133  *      to savecore via the dump file, and recorded by savecore in
 134  *      METRICS.txt.
 135  */
 136 uint_t dump_ncpu_low = 4;       /* minimum config for parallel lzjb */
 137 uint_t dump_bzip2_level = 1;    /* bzip2 level (1-9) */
 138 
 139 /* Use dump_plat_mincpu_default unless this variable is set by /etc/system */
 140 #define MINCPU_NOT_SET  ((uint_t)-1)
 141 uint_t dump_plat_mincpu = MINCPU_NOT_SET;
 142 
 143 /* tunables for pre-reserved heap */
 144 uint_t dump_kmem_permap = 1024;
 145 uint_t dump_kmem_pages = 0;
 146 
 147 /* Define multiple buffers per helper to avoid stalling */
 148 #define NCBUF_PER_HELPER        2
 149 #define NCMAP_PER_HELPER        4
 150 
 151 /* minimum number of helpers configured */
 152 #define MINHELPERS      (dump_ncpu_low)
 153 #define MINCBUFS        (MINHELPERS * NCBUF_PER_HELPER)
 154 
 155 /*
 156  * Define constant parameters.
 157  *
 158  * CBUF_SIZE            size of an output buffer
 159  *
 160  * CBUF_MAPSIZE         size of virtual range for mapping pages
 161  *
 162  * CBUF_MAPNP           size of virtual range in pages
 163  *
 164  */
 165 #define DUMP_1KB        ((size_t)1 << 10)
 166 #define DUMP_1MB        ((size_t)1 << 20)
 167 #define CBUF_SIZE       ((size_t)1 << 17)
 168 #define CBUF_MAPSHIFT   (22)
 169 #define CBUF_MAPSIZE    ((size_t)1 << CBUF_MAPSHIFT)
 170 #define CBUF_MAPNP      ((size_t)1 << (CBUF_MAPSHIFT - PAGESHIFT))
 171 
 172 /*


 367 /*
 368  * helper_t helpers: contains the context for a stream. CPUs run in
 369  * parallel at dump time; each CPU creates a single stream of
 370  * compression data.  Stream data is divided into CBUF_SIZE blocks.
 371  * The blocks are written in order within a stream. But, blocks from
 372  * multiple streams can be interleaved. Each stream is identified by a
 373  * unique tag.
 374  */
 375 typedef struct helper {
 376         int helper;                     /* bound helper id */
 377         int tag;                        /* compression stream tag */
 378         perpage_t perpage;              /* per page metrics */
 379         perpage_t perpagets;            /* per page metrics (timestamps) */
 380         taskqid_t taskqid;              /* live dump task ptr */
 381         int in, out;                    /* buffer offsets */
 382         cbuf_t *cpin, *cpout, *cperr;   /* cbuf objects in process */
 383         dumpsync_t *ds;                 /* pointer to sync vars */
 384         size_t used;                    /* counts input consumed */
 385         char *page;                     /* buffer for page copy */
 386         char *lzbuf;                    /* lzjb output */
 387         bz_stream bzstream;             /* bzip2 state */
 388 } helper_t;
 389 
 390 #define MAINHELPER      (-1)            /* helper is also the main task */
 391 #define FREEHELPER      (-2)            /* unbound helper */
 392 #define DONEHELPER      (-3)            /* helper finished */
 393 
 394 /*
 395  * configuration vars for dumpsys
 396  */
 397 typedef struct dumpcfg {
 398         int     threshold;      /* ncpu threshold for bzip2 */
 399         int     nhelper;        /* number of helpers */
 400         int     nhelper_used;   /* actual number of helpers used */
 401         int     ncmap;          /* number VA pages for compression */
 402         int     ncbuf;          /* number of bufs for compression */
 403         int     ncbuf_used;     /* number of bufs in use */
 404         uint_t  clevel;         /* dump compression level */
 405         helper_t *helper;       /* array of helpers */
 406         cbuf_t  *cmap;          /* array of input (map) buffers */
 407         cbuf_t  *cbuf;          /* array of output  buffers */
 408         ulong_t *helpermap;     /* set of dumpsys helper CPU ids */
 409         ulong_t *bitmap;        /* bitmap for marking pages to dump */
 410         ulong_t *rbitmap;       /* bitmap for used CBUF_MAPSIZE ranges */
 411         pgcnt_t bitmapsize;     /* size of bitmap */
 412         pgcnt_t rbitmapsize;    /* size of bitmap for ranges */
 413         pgcnt_t found4m;        /* number ranges allocated by dump */
 414         pgcnt_t foundsm;        /* number small pages allocated by dump */
 415         pid_t   *pids;          /* list of process IDs at dump time */
 416         size_t  maxsize;        /* memory size needed at dump time */
 417         size_t  maxvmsize;      /* size of reserved VM */
 418         char    *maxvm;         /* reserved VM for spare pages */


 485         char *old_buf = dumpbuf.start;
 486         size_t old_size = dumpbuf.size;
 487         char *new_buf;
 488         size_t new_size;
 489 
 490         ASSERT(MUTEX_HELD(&dump_lock));
 491 
 492         new_size = dumpbuf_iosize(MAX(dumpbuf.iosize, maxphys));
 493         if (new_size <= old_size)
 494                 return; /* no need to reallocate buffer */
 495 
 496         new_buf = kmem_alloc(new_size, KM_SLEEP);
 497         dumpbuf.size = new_size;
 498         dumpbuf.start = new_buf;
 499         dumpbuf.end = new_buf + new_size;
 500         kmem_free(old_buf, old_size);
 501 }
 502 
 503 /*
 504  * dump_update_clevel is called when dumpadm configures the dump device.



 505  *      Calculate number of helpers and buffers.
 506  *      Allocate the minimum configuration for now.
 507  *
 508  * When the dump file is configured we reserve a minimum amount of
 509  * memory for use at crash time. But we reserve VA for all the memory
 510  * we really want in order to do the fastest dump possible. The VA is
 511  * backed by pages not being dumped, according to the bitmap. If
 512  * there is insufficient spare memory, however, we fall back to the
 513  * minimum.
 514  *
 515  * Live dump (savecore -L) always uses the minimum config.
 516  *
 517  * clevel 0 is single threaded lzjb
 518  * clevel 1 is parallel lzjb
 519  * clevel 2 is parallel bzip2
 520  *
 521  * The ncpu threshold is selected with dump_plat_mincpu.
 522  * On OPL, set_platform_defaults() overrides the sun4u setting.
 523  * The actual values are defined via DUMP_PLAT_*_MINCPU macros.
 524  *
 525  * Architecture         Threshold       Algorithm
 526  * sun4u                <  51                parallel lzjb
 527  * sun4u                >= 51                parallel bzip2(*)
 528  * sun4u OPL            <  8         parallel lzjb
 529  * sun4u OPL            >= 8         parallel bzip2(*)
 530  * sun4v                <  128               parallel lzjb
 531  * sun4v                >= 128               parallel bzip2(*)
 532  * x86                  < 11         parallel lzjb
 533  * x86                  >= 11                parallel bzip2(*)
 534  * 32-bit               N/A             single-threaded lzjb
 535  *
 536  * (*) bzip2 is only chosen if there is sufficient available
 537  * memory for buffers at dump time. See dumpsys_get_maxmem().
 538  *
 539  * Faster dump devices have larger I/O buffers. The threshold value is
 540  * increased according to the size of the dump I/O buffer, because
 541  * parallel lzjb performs better with faster disks. For buffers >= 1MB
 542  * the threshold is 3X; for buffers >= 256K threshold is 2X.
 543  *
 544  * For parallel dumps, the number of helpers is ncpu-1. The CPU
 545  * running panic runs the main task. For single-threaded dumps, the
 546  * panic CPU does lzjb compression (it is tagged as MAINHELPER.)
 547  *
 548  * Need multiple buffers per helper so that they do not block waiting
 549  * for the main task.
 550  *                              parallel        single-threaded
 551  * Number of output buffers:    nhelper*2               1
 552  * Number of mapping buffers:   nhelper*4               1
 553  *
 554  */
 555 static void
 556 dump_update_clevel()
 557 {
 558         int tag;
 559         size_t bz2size;
 560         helper_t *hp, *hpend;
 561         cbuf_t *cp, *cpend;
 562         dumpcfg_t *old = &dumpcfg;
 563         dumpcfg_t newcfg = *old;
 564         dumpcfg_t *new = &newcfg;
 565 
 566         ASSERT(MUTEX_HELD(&dump_lock));
 567 
 568         /*
 569          * Free the previously allocated bufs and VM.
 570          */
 571         if (old->helper != NULL) {
 572 
 573                 /* helpers */
 574                 hpend = &old->helper[old->nhelper];
 575                 for (hp = old->helper; hp != hpend; hp++) {
 576                         if (hp->lzbuf != NULL)
 577                                 kmem_free(hp->lzbuf, PAGESIZE);
 578                         if (hp->page != NULL)
 579                                 kmem_free(hp->page, PAGESIZE);


 592                         if (cp->buf != NULL)
 593                                 kmem_free(cp->buf, cp->size);
 594                 kmem_free(old->cbuf, old->ncbuf * sizeof (cbuf_t));
 595 
 596                 /* reserved VM for dumpsys_get_maxmem */
 597                 if (old->maxvmsize > 0)
 598                         vmem_xfree(heap_arena, old->maxvm, old->maxvmsize);
 599         }
 600 
 601         /*
 602          * Allocate memory and VM.
 603          * One CPU runs dumpsys, the rest are helpers.
 604          */
 605         new->nhelper = ncpus - 1;
 606         if (new->nhelper < 1)
 607                 new->nhelper = 1;
 608 
 609         if (new->nhelper > DUMP_MAX_NHELPER)
 610                 new->nhelper = DUMP_MAX_NHELPER;
 611 
 612         /* use platform default, unless /etc/system overrides */
 613         if (dump_plat_mincpu == MINCPU_NOT_SET)
 614                 dump_plat_mincpu = dump_plat_mincpu_default;
 615 
 616         /* increase threshold for faster disks */
 617         new->threshold = dump_plat_mincpu;
 618         if (dumpbuf.iosize >= DUMP_1MB)
 619                 new->threshold *= 3;
 620         else if (dumpbuf.iosize >= (256 * DUMP_1KB))
 621                 new->threshold *= 2;
 622 
 623         /* figure compression level based upon the computed threshold. */
 624         if (dump_plat_mincpu == 0 || new->nhelper < 2) {
 625                 new->clevel = 0;
 626                 new->nhelper = 1;
 627         } else if ((new->nhelper + 1) >= new->threshold) {
 628                 new->clevel = DUMP_CLEVEL_BZIP2;
 629         } else {
 630                 new->clevel = DUMP_CLEVEL_LZJB;
 631         }
 632 
 633         if (new->clevel == 0) {
 634                 new->ncbuf = 1;
 635                 new->ncmap = 1;
 636         } else {

 637                 new->ncbuf = NCBUF_PER_HELPER * new->nhelper;
 638                 new->ncmap = NCMAP_PER_HELPER * new->nhelper;
 639         }
 640 
 641         /*
 642          * Allocate new data structures and buffers for MINHELPERS,
 643          * and also figure the max desired size.
 644          */
 645         bz2size = BZ2_bzCompressInitSize(dump_bzip2_level);
 646         new->maxsize = 0;
 647         new->maxvmsize = 0;
 648         new->maxvm = NULL;
 649         tag = 1;
 650         new->helper = kmem_zalloc(new->nhelper * sizeof (helper_t), KM_SLEEP);
 651         hpend = &new->helper[new->nhelper];
 652         for (hp = new->helper; hp != hpend; hp++) {
 653                 hp->tag = tag++;
 654                 if (hp < &new->helper[MINHELPERS]) {
 655                         hp->lzbuf = kmem_alloc(PAGESIZE, KM_SLEEP);
 656                         hp->page = kmem_alloc(PAGESIZE, KM_SLEEP);
 657                 } else if (new->clevel < DUMP_CLEVEL_BZIP2) {
 658                         new->maxsize += 2 * PAGESIZE;
 659                 } else {
 660                         new->maxsize += PAGESIZE;
 661                 }
 662                 if (new->clevel >= DUMP_CLEVEL_BZIP2)
 663                         new->maxsize += bz2size;
 664         }
 665 
 666         new->cbuf = kmem_zalloc(new->ncbuf * sizeof (cbuf_t), KM_SLEEP);
 667         cpend = &new->cbuf[new->ncbuf];
 668         for (cp = new->cbuf; cp != cpend; cp++) {
 669                 cp->state = CBUF_FREEBUF;
 670                 cp->size = CBUF_SIZE;
 671                 if (cp < &new->cbuf[MINCBUFS])
 672                         cp->buf = kmem_alloc(cp->size, KM_SLEEP);
 673                 else
 674                         new->maxsize += cp->size;
 675         }
 676 
 677         new->cmap = kmem_zalloc(new->ncmap * sizeof (cbuf_t), KM_SLEEP);
 678         cpend = &new->cmap[new->ncmap];
 679         for (cp = new->cmap; cp != cpend; cp++) {
 680                 cp->state = CBUF_FREEMAP;
 681                 cp->size = CBUF_MAPSIZE;
 682                 cp->buf = vmem_xalloc(heap_arena, CBUF_MAPSIZE, CBUF_MAPSIZE,
 683                     0, 0, NULL, NULL, VM_SLEEP);


 808         ASSERT(rbitnum < dumpcfg.rbitmapsize);
 809 
 810         BT_SET(dumpcfg.rbitmap, rbitnum);
 811 }
 812 
 813 int
 814 dump_test_used(pfn_t pfn)
 815 {
 816         pgcnt_t bitnum, rbitnum;
 817 
 818         bitnum = dump_pfn_to_bitnum(pfn);
 819         ASSERT(bitnum != (pgcnt_t)-1);
 820 
 821         rbitnum = CBUF_MAPP2R(bitnum);
 822         ASSERT(rbitnum < dumpcfg.rbitmapsize);
 823 
 824         return (BT_TEST(dumpcfg.rbitmap, rbitnum));
 825 }
 826 
 827 /*
 828  * dumpbzalloc and dumpbzfree are callbacks from the bzip2 library.
 829  * dumpsys_get_maxmem() uses them for BZ2_bzCompressInit().
 830  */
 831 static void *
 832 dumpbzalloc(void *opaque, int items, int size)
 833 {
 834         size_t *sz;
 835         char *ret;
 836 
 837         ASSERT(opaque != NULL);
 838         sz = opaque;
 839         ret = dumpcfg.maxvm + *sz;
 840         *sz += items * size;
 841         *sz = P2ROUNDUP(*sz, BZ2_BZALLOC_ALIGN);
 842         ASSERT(*sz <= dumpcfg.maxvmsize);
 843         return (ret);
 844 }
 845 
 846 /*ARGSUSED*/
 847 static void
 848 dumpbzfree(void *opaque, void *addr)
 849 {
 850 }
 851 
 852 /*
 853  * Perform additional checks on the page to see if we can really use
 854  * it. The kernel (kas) pages are always set in the bitmap. However,
 855  * boot memory pages (prom_ppages or P_BOOTPAGES) are not in the
 856  * bitmap. So we check for them.
 857  */
 858 static inline int
 859 dump_pfn_check(pfn_t pfn)
 860 {
 861         page_t *pp = page_numtopp_nolock(pfn);
 862         if (pp == NULL || pp->p_pagenum != pfn ||
 863 #if defined(__sparc)
 864             pp->p_vnode == &promvp ||
 865 #else
 866             PP_ISBOOTPAGES(pp) ||
 867 #endif
 868             pp->p_toxic != 0)
 869                 return (0);
 870         return (1);
 871 }
 872 
 873 /*
 874  * Check a range to see if all contained pages are available and
 875  * return non-zero if the range can be used.
 876  */
 877 static inline int
 878 dump_range_check(pgcnt_t start, pgcnt_t end, pfn_t pfn)
 879 {
 880         for (; start < end; start++, pfn++) {
 881                 if (BT_TEST(dumpcfg.bitmap, start))
 882                         return (0);
 883                 if (!dump_pfn_check(pfn))
 884                         return (0);
 885         }
 886         return (1);
 887 }
 888 
 889 /*
 890  * dumpsys_get_maxmem() is called during panic. Find unused ranges
 891  * and use them for buffers. If we find enough memory switch to
 892  * parallel bzip2, otherwise use parallel lzjb.
 893  *
 894  * It searches the dump bitmap in 2 passes. The first time it looks
 895  * for CBUF_MAPSIZE ranges. On the second pass it uses small pages.
 896  */
 897 static void
 898 dumpsys_get_maxmem()
 899 {
 900         dumpcfg_t *cfg = &dumpcfg;
 901         cbuf_t *endcp = &cfg->cbuf[cfg->ncbuf];
 902         helper_t *endhp = &cfg->helper[cfg->nhelper];
 903         pgcnt_t bitnum, end;
 904         size_t sz, endsz, bz2size;
 905         pfn_t pfn, off;
 906         cbuf_t *cp;
 907         helper_t *hp, *ohp;
 908         dumpmlw_t mlw;
 909         int k;
 910 
 911         /*
 912          * Setting dump_plat_mincpu to 0 at any time forces a serial
 913          * dump.
 914          */
 915         if (dump_plat_mincpu == 0) {
 916                 cfg->clevel = 0;
 917                 return;
 918         }
 919 
 920         /*
 921          * There may be no point in looking for spare memory. If
 922          * dumping all memory, then none is spare. If doing a serial
 923          * dump, then already have buffers.
 924          */
 925         if (cfg->maxsize == 0 || cfg->clevel < DUMP_CLEVEL_LZJB ||
 926             (dump_conflags & DUMP_ALL) != 0) {
 927                 if (cfg->clevel > DUMP_CLEVEL_LZJB)
 928                         cfg->clevel = DUMP_CLEVEL_LZJB;
 929                 return;
 930         }
 931 
 932         sz = 0;
 933         cfg->found4m = 0;
 934         cfg->foundsm = 0;
 935 
 936         /* bitmap of ranges used to estimate which pfns are being used */
 937         bzero(dumpcfg.rbitmap, BT_SIZEOFMAP(dumpcfg.rbitmapsize));
 938 
 939         /* find ranges that are not being dumped to use for buffers */
 940         dump_init_memlist_walker(&mlw);
 941         for (bitnum = 0; bitnum < dumpcfg.bitmapsize; bitnum = end) {
 942                 dump_timeleft = dump_timeout;
 943                 end = bitnum + CBUF_MAPNP;
 944                 pfn = dump_bitnum_to_pfn(bitnum, &mlw);
 945                 ASSERT(pfn != PFN_INVALID);
 946 
 947                 /* skip partial range at end of mem segment */
 948                 if (mlw.mpleft < CBUF_MAPNP) {


 992                         continue;
 993                 }
 994 
 995                 for (; bitnum < end; bitnum++, pfn++) {
 996                         dump_timeleft = dump_timeout;
 997                         if (BT_TEST(dumpcfg.bitmap, bitnum))
 998                                 continue;
 999                         if (!dump_pfn_check(pfn))
1000                                 continue;
1001                         ASSERT((sz + PAGESIZE) <= cfg->maxvmsize);
1002                         hat_devload(kas.a_hat, cfg->maxvm + sz, PAGESIZE, pfn,
1003                             PROT_READ | PROT_WRITE, HAT_LOAD_NOCONSIST);
1004                         sz += PAGESIZE;
1005                         cfg->foundsm++;
1006                         dump_set_used(pfn);
1007                         if (sz >= cfg->maxsize)
1008                                 goto foundmax;
1009                 }
1010         }
1011 
1012         /* Fall back to lzjb if we did not get enough memory for bzip2. */
1013         endsz = (cfg->maxsize * cfg->threshold) / cfg->nhelper;
1014         if (sz < endsz) {
1015                 cfg->clevel = DUMP_CLEVEL_LZJB;
1016         }
1017 
1018         /* Allocate memory for as many helpers as we can. */
1019 foundmax:
1020 
1021         /* Byte offsets into memory found and mapped above */
1022         endsz = sz;
1023         sz = 0;
1024 
1025         /* Set the size for bzip2 state. Only bzip2 needs it. */
1026         bz2size = BZ2_bzCompressInitSize(dump_bzip2_level);
1027 
1028         /* Skip the preallocate output buffers. */
1029         cp = &cfg->cbuf[MINCBUFS];
1030 
1031         /* Use this to move memory up from the preallocated helpers. */
1032         ohp = cfg->helper;
1033 
1034         /* Loop over all helpers and allocate memory. */
1035         for (hp = cfg->helper; hp < endhp; hp++) {
1036 
1037                 /* Skip preallocated helpers by checking hp->page. */
1038                 if (hp->page == NULL) {
1039                         if (cfg->clevel <= DUMP_CLEVEL_LZJB) {
1040                                 /* lzjb needs 2 1-page buffers */
1041                                 if ((sz + (2 * PAGESIZE)) > endsz)
1042                                         break;
1043                                 hp->page = cfg->maxvm + sz;
1044                                 sz += PAGESIZE;
1045                                 hp->lzbuf = cfg->maxvm + sz;
1046                                 sz += PAGESIZE;
1047 
1048                         } else if (ohp->lzbuf != NULL) {
1049                                 /* re-use the preallocted lzjb page for bzip2 */
1050                                 hp->page = ohp->lzbuf;
1051                                 ohp->lzbuf = NULL;
1052                                 ++ohp;
1053 
1054                         } else {
1055                                 /* bzip2 needs a 1-page buffer */
1056                                 if ((sz + PAGESIZE) > endsz)
1057                                         break;
1058                                 hp->page = cfg->maxvm + sz;
1059                                 sz += PAGESIZE;
1060                         }
1061                 }
1062 
1063                 /*
1064                  * Add output buffers per helper. The number of
1065                  * buffers per helper is determined by the ratio of
1066                  * ncbuf to nhelper.
1067                  */
1068                 for (k = 0; cp < endcp && (sz + CBUF_SIZE) <= endsz &&
1069                     k < NCBUF_PER_HELPER; k++) {
1070                         cp->state = CBUF_FREEBUF;
1071                         cp->size = CBUF_SIZE;
1072                         cp->buf = cfg->maxvm + sz;
1073                         sz += CBUF_SIZE;
1074                         ++cp;
1075                 }
1076 
1077                 /*
1078                  * bzip2 needs compression state. Use the dumpbzalloc
1079                  * and dumpbzfree callbacks to allocate the memory.
1080                  * bzip2 does allocation only at init time.
1081                  */
1082                 if (cfg->clevel >= DUMP_CLEVEL_BZIP2) {
1083                         if ((sz + bz2size) > endsz) {
1084                                 hp->page = NULL;
1085                                 break;
1086                         } else {
1087                                 hp->bzstream.opaque = &sz;
1088                                 hp->bzstream.bzalloc = dumpbzalloc;
1089                                 hp->bzstream.bzfree = dumpbzfree;
1090                                 (void) BZ2_bzCompressInit(&hp->bzstream,
1091                                     dump_bzip2_level, 0, 0);
1092                                 hp->bzstream.opaque = NULL;
1093                         }
1094                 }
1095         }
1096 
1097         /* Finish allocating output buffers */
1098         for (; cp < endcp && (sz + CBUF_SIZE) <= endsz; cp++) {
1099                 cp->state = CBUF_FREEBUF;
1100                 cp->size = CBUF_SIZE;
1101                 cp->buf = cfg->maxvm + sz;
1102                 sz += CBUF_SIZE;
1103         }
1104 
1105         /* Enable IS_DUMP_PAGE macro, which checks for pages we took. */
1106         if (cfg->found4m || cfg->foundsm)
1107                 dump_check_used = 1;
1108 
1109         ASSERT(sz <= endsz);
1110 }
1111 
1112 static void
1113 dumphdr_init(void)
1114 {
1115         pgcnt_t npages = 0;


1930 
1931                 } else {
1932 
1933                         /*
1934                          * Done with the input. Flush the VM and
1935                          * return the buffer to the main task.
1936                          */
1937                         if (panicstr && hp->helper != MAINHELPER)
1938                                 hat_flush_range(kas.a_hat,
1939                                     hp->cpin->buf, hp->cpin->size);
1940                         dumpsys_errmsg(hp, NULL);
1941                         CQ_PUT(mainq, hp->cpin, CBUF_USEDMAP);
1942                         hp->cpin = NULL;
1943                 }
1944         }
1945 
1946         return (hp->cpin != NULL);
1947 }
1948 
1949 /*
1950  * Compress size bytes starting at buf with bzip2
1951  * mode:
1952  *      BZ_RUN          add one more compressed page
1953  *      BZ_FINISH       no more input, flush the state
1954  */
1955 static void
1956 dumpsys_bzrun(helper_t *hp, void *buf, size_t size, int mode)
1957 {
1958         dumpsync_t *ds = hp->ds;
1959         const int CSIZE = sizeof (dumpcsize_t);
1960         bz_stream *ps = &hp->bzstream;
1961         int rc = 0;
1962         uint32_t csize;
1963         dumpcsize_t cs;
1964 
1965         /* Set input pointers to new input page */
1966         if (size > 0) {
1967                 ps->avail_in = size;
1968                 ps->next_in = buf;
1969         }
1970 
1971         /* CONSTCOND */
1972         while (1) {
1973 
1974                 /* Quit when all input has been consumed */
1975                 if (ps->avail_in == 0 && mode == BZ_RUN)
1976                         break;
1977 
1978                 /* Get a new output buffer */
1979                 if (hp->cpout == NULL) {
1980                         HRSTART(hp->perpage, outwait);
1981                         hp->cpout = CQ_GET(freebufq);
1982                         HRSTOP(hp->perpage, outwait);
1983                         ps->avail_out = hp->cpout->size - CSIZE;
1984                         ps->next_out = hp->cpout->buf + CSIZE;
1985                 }
1986 
1987                 /* Compress input, or finalize */
1988                 HRSTART(hp->perpage, compress);
1989                 rc = BZ2_bzCompress(ps, mode);
1990                 HRSTOP(hp->perpage, compress);
1991 
1992                 /* Check for error */
1993                 if (mode == BZ_RUN && rc != BZ_RUN_OK) {
1994                         dumpsys_errmsg(hp, "%d: BZ_RUN error %s at page %lx\n",
1995                             hp->helper, BZ2_bzErrorString(rc),
1996                             hp->cpin->pagenum);
1997                         break;
1998                 }
1999 
2000                 /* Write the buffer if it is full, or we are flushing */
2001                 if (ps->avail_out == 0 || mode == BZ_FINISH) {
2002                         csize = hp->cpout->size - CSIZE - ps->avail_out;
2003                         cs = DUMP_SET_TAG(csize, hp->tag);
2004                         if (csize > 0) {
2005                                 (void) memcpy(hp->cpout->buf, &cs, CSIZE);
2006                                 dumpsys_swrite(hp, hp->cpout, csize + CSIZE);
2007                                 hp->cpout = NULL;
2008                         }
2009                 }
2010 
2011                 /* Check for final complete */
2012                 if (mode == BZ_FINISH) {
2013                         if (rc == BZ_STREAM_END)
2014                                 break;
2015                         if (rc != BZ_FINISH_OK) {
2016                                 dumpsys_errmsg(hp, "%d: BZ_FINISH error %s\n",
2017                                     hp->helper, BZ2_bzErrorString(rc));
2018                                 break;
2019                         }
2020                 }
2021         }
2022 
2023         /* Cleanup state and buffers */
2024         if (mode == BZ_FINISH) {
2025 
2026                 /* Reset state so that it is re-usable. */
2027                 (void) BZ2_bzCompressReset(&hp->bzstream);
2028 
2029                 /* Give any unused outout buffer to the main task */
2030                 if (hp->cpout != NULL) {
2031                         hp->cpout->used = 0;
2032                         CQ_PUT(mainq, hp->cpout, CBUF_ERRMSG);
2033                         hp->cpout = NULL;
2034                 }
2035         }
2036 }
2037 
2038 static void
2039 dumpsys_bz2compress(helper_t *hp)
2040 {
2041         dumpsync_t *ds = hp->ds;
2042         dumpstreamhdr_t sh;
2043 
2044         (void) strcpy(sh.stream_magic, DUMP_STREAM_MAGIC);
2045         sh.stream_pagenum = (pgcnt_t)-1;
2046         sh.stream_npages = 0;
2047         hp->cpin = NULL;
2048         hp->cpout = NULL;
2049         hp->cperr = NULL;
2050         hp->in = 0;
2051         hp->out = 0;
2052         hp->bzstream.avail_in = 0;
2053 
2054         /* Bump reference to mainq while we are running */
2055         CQ_OPEN(mainq);
2056 
2057         /* Get one page at a time */
2058         while (dumpsys_sread(hp)) {
2059                 if (sh.stream_pagenum != hp->cpin->pagenum) {
2060                         sh.stream_pagenum = hp->cpin->pagenum;
2061                         sh.stream_npages = btop(hp->cpin->used);
2062                         dumpsys_bzrun(hp, &sh, sizeof (sh), BZ_RUN);
2063                 }
2064                 dumpsys_bzrun(hp, hp->page, PAGESIZE, 0);
2065         }
2066 
2067         /* Done with input, flush any partial buffer */
2068         if (sh.stream_pagenum != (pgcnt_t)-1) {
2069                 dumpsys_bzrun(hp, NULL, 0, BZ_FINISH);
2070                 dumpsys_errmsg(hp, NULL);
2071         }
2072 
2073         ASSERT(hp->cpin == NULL && hp->cpout == NULL && hp->cperr == NULL);
2074 
2075         /* Decrement main queue count, we are done */
2076         CQ_CLOSE(mainq);
2077 }
2078 
2079 /*
2080  * Compress with lzjb
2081  * write stream block if full or size==0
2082  * if csize==0 write stream header, else write <csize, data>
2083  * size==0 is a call to flush a buffer
2084  * hp->cpout is the buffer we are flushing or filling
2085  * hp->out is the next index to fill data
2086  * osize is either csize+data, or the size of a stream header
2087  */
2088 static void
2089 dumpsys_lzjbrun(helper_t *hp, size_t csize, void *buf, size_t size)
2090 {
2091         dumpsync_t *ds = hp->ds;
2092         const int CSIZE = sizeof (dumpcsize_t);
2093         dumpcsize_t cs;
2094         size_t osize = csize > 0 ? CSIZE + size : size;
2095 
2096         /* If flush, and there is no buffer, just return */
2097         if (size == 0 && hp->cpout == NULL)
2098                 return;
2099 


2201  * panic CPU.
2202  *
2203  * At dump configuration time, helper_lock is set and helpers_wanted
2204  * is 0. dumpsys() decides whether to set helpers_wanted before
2205  * clearing helper_lock.
2206  *
2207  * At panic time, idle CPUs spin-wait on helper_lock, then alternately
2208  * take the lock and become a helper, or return.
2209  */
2210 void
2211 dumpsys_helper()
2212 {
2213         dumpsys_spinlock(&dumpcfg.helper_lock);
2214         if (dumpcfg.helpers_wanted) {
2215                 helper_t *hp, *hpend = &dumpcfg.helper[dumpcfg.nhelper];
2216 
2217                 for (hp = dumpcfg.helper; hp != hpend; hp++) {
2218                         if (hp->helper == FREEHELPER) {
2219                                 hp->helper = CPU->cpu_id;
2220                                 BT_SET(dumpcfg.helpermap, CPU->cpu_seqid);
2221 
2222                                 dumpsys_spinunlock(&dumpcfg.helper_lock);
2223 
2224                                 if (dumpcfg.clevel < DUMP_CLEVEL_BZIP2)
2225                                         dumpsys_lzjbcompress(hp);
2226                                 else
2227                                         dumpsys_bz2compress(hp);
2228 
2229                                 hp->helper = DONEHELPER;
2230                                 return;
2231                         }
2232                 }
2233 
2234                 /* No more helpers are needed. */
2235                 dumpcfg.helpers_wanted = 0;
2236 
2237         }
2238         dumpsys_spinunlock(&dumpcfg.helper_lock);
2239 }
2240 
2241 /*
2242  * No-wait helper callable in spin loops.
2243  *
2244  * Do not wait for helper_lock. Just check helpers_wanted. The caller
2245  * may decide to continue. This is the "c)ontinue, s)ync, r)eset? s"
2246  * case.
2247  */
2248 void
2249 dumpsys_helper_nw()
2250 {
2251         if (dumpcfg.helpers_wanted)
2252                 dumpsys_helper();
2253 }
2254 
2255 /*
2256  * Dump helper for live dumps.
2257  * These run as a system task.
2258  */
2259 static void
2260 dumpsys_live_helper(void *arg)
2261 {
2262         helper_t *hp = arg;
2263 
2264         BT_ATOMIC_SET(dumpcfg.helpermap, CPU->cpu_seqid);
2265         if (dumpcfg.clevel < DUMP_CLEVEL_BZIP2)
2266                 dumpsys_lzjbcompress(hp);
2267         else
2268                 dumpsys_bz2compress(hp);
2269 }
2270 
2271 /*
2272  * Compress one page with lzjb (single threaded case)
2273  */
2274 static void
2275 dumpsys_lzjb_page(helper_t *hp, cbuf_t *cp)
2276 {
2277         dumpsync_t *ds = hp->ds;
2278         uint32_t csize;
2279 
2280         hp->helper = MAINHELPER;
2281         hp->in = 0;
2282         hp->used = 0;
2283         hp->cpin = cp;
2284         while (hp->used < cp->used) {
2285                 HRSTART(hp->perpage, copy);
2286                 hp->in = dumpsys_copy_page(hp, hp->in);
2287                 hp->used += PAGESIZE;
2288                 HRSTOP(hp->perpage, copy);


2295                 dumpvp_write(&csize, sizeof (csize));
2296                 dumpvp_write(hp->lzbuf, csize);
2297                 HRSTOP(hp->perpage, write);
2298         }
2299         CQ_PUT(mainq, hp->cpin, CBUF_USEDMAP);
2300         hp->cpin = NULL;
2301 }
2302 
2303 /*
2304  * Main task to dump pages. This is called on the dump CPU.
2305  */
2306 static void
2307 dumpsys_main_task(void *arg)
2308 {
2309         dumpsync_t *ds = arg;
2310         pgcnt_t pagenum = 0, bitnum = 0, hibitnum;
2311         dumpmlw_t mlw;
2312         cbuf_t *cp;
2313         pgcnt_t baseoff, pfnoff;
2314         pfn_t base, pfn;
2315         boolean_t dumpserial;
2316         int i;
2317 
2318         /*
2319          * Fall back to serial mode if there are no helpers.
2320          * dump_plat_mincpu can be set to 0 at any time.
2321          * dumpcfg.helpermap must contain at least one member.
2322          *
2323          * It is possible that the helpers haven't registered
2324          * in helpermap yet; wait up to DUMP_HELPER_MAX_WAIT for
2325          * at least one helper to register.
2326          */
2327         dumpserial = B_TRUE;
2328         if (dump_plat_mincpu != 0 && dumpcfg.clevel != 0) {
2329                 hrtime_t hrtmax = MSEC2NSEC(DUMP_HELPER_MAX_WAIT);
2330                 hrtime_t hrtstart = gethrtime();
2331 
2332                 for (;;) {
2333                         for (i = 0; i < BT_BITOUL(NCPU); ++i) {
2334                                 if (dumpcfg.helpermap[i] != 0) {
2335                                         dumpserial = B_FALSE;
2336                                         break;
2337                                 }
2338                         }
2339 
2340                         if ((!dumpserial) ||
2341                             ((gethrtime() - hrtstart) >= hrtmax)) {
2342                                 break;
2343                         }
2344 
2345                         SMT_PAUSE();
2346                 }
2347 
2348                 if (dumpserial) {
2349                         dumpcfg.clevel = 0;
2350                         if (dumpcfg.helper[0].lzbuf == NULL) {
2351                                 dumpcfg.helper[0].lzbuf =
2352                                     dumpcfg.helper[1].page;
2353                         }
2354                 }
2355         }
2356 
2357         dump_init_memlist_walker(&mlw);
2358 
2359         for (;;) {
2360                 int sec = (gethrtime() - ds->start) / NANOSEC;
2361 
2362                 /*
2363                  * Render a simple progress display on the system console to
2364                  * make clear to the operator that the system has not hung.
2365                  * Emit an update when dump progress has advanced by one
2366                  * percent, or when no update has been drawn in the last
2367                  * second.
2368                  */
2369                 if (ds->percent > ds->percent_done || sec > ds->sec_done) {


2477                                 if (BT_TEST(dumpcfg.bitmap, bitnum))
2478                                         pagenum++;
2479 
2480                         dump_timeleft = dump_timeout;
2481                         cp->used = ptob(pagenum - cp->pagenum);
2482 
2483                         HRSTART(ds->perpage, map);
2484                         hat_devload(kas.a_hat, cp->buf, cp->size, base,
2485                             PROT_READ, HAT_LOAD_NOCONSIST);
2486                         HRSTOP(ds->perpage, map);
2487 
2488                         ds->pages_mapped += btop(cp->size);
2489                         ds->pages_used += pagenum - cp->pagenum;
2490 
2491                         CQ_OPEN(mainq);
2492 
2493                         /*
2494                          * If there are no helpers the main task does
2495                          * non-streams lzjb compress.
2496                          */
2497                         if (dumpserial) {
2498                                 dumpsys_lzjb_page(dumpcfg.helper, cp);
2499                         } else {
2500                                 /* pass mapped pages to a helper */
2501                                 CQ_PUT(helperq, cp, CBUF_INREADY);
2502                         }
2503 
2504                         /* the last page was done */
2505                         if (bitnum >= dumpcfg.bitmapsize)
2506                                 CQ_CLOSE(helperq);
2507 
2508                         break;
2509 
2510                 case CBUF_USEDMAP:
2511 
2512                         ds->npages += btop(cp->used);
2513 
2514                         HRSTART(ds->perpage, unmap);
2515                         hat_unload(kas.a_hat, cp->buf, cp->size, HAT_UNLOAD);
2516                         HRSTOP(ds->perpage, unmap);
2517 


2588         for (i = 0; i < ncpus; i++) {
2589                 if ((i & 15) == 0)
2590                         P(",,%03d,", i);
2591                 if (i == myid)
2592                         P("   M");
2593                 else if (BT_TEST(cfg->helpermap, i))
2594                         P("%4d", cpu_seq[i]->cpu_id);
2595                 else
2596                         P("   *");
2597                 if ((i & 15) == 15)
2598                         P("\n");
2599         }
2600 
2601         P("ncbuf_used,%d\n", cfg->ncbuf_used);
2602         P("ncmap,%d\n", cfg->ncmap);
2603 
2604         P("Found %ldM ranges,%ld\n", (CBUF_MAPSIZE / DUMP_1MB), cfg->found4m);
2605         P("Found small pages,%ld\n", cfg->foundsm);
2606 
2607         P("Compression level,%d\n", cfg->clevel);
2608         P("Compression type,%s %s", cfg->clevel == 0 ? "serial" : "parallel",
2609             cfg->clevel >= DUMP_CLEVEL_BZIP2 ? "bzip2" : "lzjb");
2610         if (cfg->clevel >= DUMP_CLEVEL_BZIP2)
2611                 P(" (level %d)\n", dump_bzip2_level);
2612         else
2613                 P("\n");
2614         P("Compression ratio,%d.%02d\n", compress_ratio / 100, compress_ratio %
2615             100);
2616         P("nhelper_used,%d\n", cfg->nhelper_used);
2617 
2618         P("Dump I/O rate MBS,%d.%02d\n", iorate / 100, iorate % 100);
2619         P("..total bytes,%lld\n", (u_longlong_t)ds->nwrite);
2620         P("..total nsec,%lld\n", (u_longlong_t)ds->iotime);
2621         P("dumpbuf.iosize,%ld\n", dumpbuf.iosize);
2622         P("dumpbuf.size,%ld\n", dumpbuf.size);
2623 
2624         P("Dump pages/sec,%llu\n", (u_longlong_t)ds->npages / sec);
2625         P("Dump pages,%llu\n", (u_longlong_t)ds->npages);
2626         P("Dump time,%d\n", sec);
2627 
2628         if (ds->pages_mapped > 0)
2629                 P("per-cent map utilization,%d\n", (int)((100 * ds->pages_used)
2630                     / ds->pages_mapped));
2631 
2632         P("\nPer-page metrics:\n");
2633         if (ds->npages > 0) {


2854         dumphdr->dump_pfn = dumpvp_flush();
2855         dump_init_memlist_walker(&mlw);
2856         for (bitnum = 0; bitnum < dumpcfg.bitmapsize; bitnum++) {
2857                 dump_timeleft = dump_timeout;
2858                 if (!BT_TEST(dumpcfg.bitmap, bitnum))
2859                         continue;
2860                 pfn = dump_bitnum_to_pfn(bitnum, &mlw);
2861                 ASSERT(pfn != PFN_INVALID);
2862                 dumpvp_write(&pfn, sizeof (pfn_t));
2863         }
2864         dump_plat_pfn();
2865 
2866         /*
2867          * Write out all the pages.
2868          * Map pages, copy them handling UEs, compress, and write them out.
2869          * Cooperate with any helpers running on CPUs in panic_idle().
2870          */
2871         dumphdr->dump_data = dumpvp_flush();
2872 
2873         bzero(dumpcfg.helpermap, BT_SIZEOFMAP(NCPU));
2874         ds->live = dumpcfg.clevel > 0 &&
2875             (dumphdr->dump_flags & DF_LIVE) != 0;
2876 
2877         save_dump_clevel = dumpcfg.clevel;
2878         if (panicstr)
2879                 dumpsys_get_maxmem();
2880         else if (dumpcfg.clevel >= DUMP_CLEVEL_BZIP2)
2881                 dumpcfg.clevel = DUMP_CLEVEL_LZJB;
2882 
2883         dumpcfg.nhelper_used = 0;
2884         for (hp = dumpcfg.helper; hp != hpend; hp++) {
2885                 if (hp->page == NULL) {
2886                         hp->helper = DONEHELPER;
2887                         continue;
2888                 }
2889                 ++dumpcfg.nhelper_used;
2890                 hp->helper = FREEHELPER;
2891                 hp->taskqid = NULL;
2892                 hp->ds = ds;
2893                 bzero(&hp->perpage, sizeof (hp->perpage));
2894                 if (dumpcfg.clevel >= DUMP_CLEVEL_BZIP2)
2895                         (void) BZ2_bzCompressReset(&hp->bzstream);
2896         }
2897 
2898         CQ_OPEN(freebufq);
2899         CQ_OPEN(helperq);
2900 
2901         dumpcfg.ncbuf_used = 0;
2902         for (cp = dumpcfg.cbuf; cp != &dumpcfg.cbuf[dumpcfg.ncbuf]; cp++) {
2903                 if (cp->buf != NULL) {
2904                         CQ_PUT(freebufq, cp, CBUF_FREEBUF);
2905                         ++dumpcfg.ncbuf_used;
2906                 }
2907         }
2908 
2909         for (cp = dumpcfg.cmap; cp != &dumpcfg.cmap[dumpcfg.ncmap]; cp++)
2910                 CQ_PUT(mainq, cp, CBUF_FREEMAP);
2911 
2912         ds->start = gethrtime();
2913         ds->iowaitts = ds->start;
2914 
2915         /* start helpers */
2916         if (ds->live) {
2917                 int n = dumpcfg.nhelper_used;
2918                 int pri = MINCLSYSPRI - 25;
2919 
2920                 livetaskq = taskq_create("LiveDump", n, pri, n, n,
2921                     TASKQ_PREPOPULATE);
2922                 for (hp = dumpcfg.helper; hp != hpend; hp++) {
2923                         if (hp->page == NULL)
2924                                 continue;
2925                         hp->helper = hp - dumpcfg.helper;
2926                         hp->taskqid = taskq_dispatch(livetaskq,
2927                             dumpsys_live_helper, (void *)hp, TQ_NOSLEEP);
2928                 }
2929 
2930         } else {
2931                 if (panicstr)
2932                         kmem_dump_begin();
2933                 dumpcfg.helpers_wanted = dumpcfg.clevel > 0;
2934                 dumpsys_spinunlock(&dumpcfg.helper_lock);
2935         }
2936 
2937         /* run main task */
2938         dumpsys_main_task(ds);
2939 
2940         ds->elapsed = gethrtime() - ds->start;
2941         if (ds->elapsed < 1)
2942                 ds->elapsed = 1;
2943 
2944         if (livetaskq != NULL)
2945                 taskq_destroy(livetaskq);
2946 
2947         if (ds->neednl) {
2948                 uprintf("\n");
2949                 ds->neednl = 0;
2950         }
2951 
2952         /* record actual pages dumped */
2953         dumphdr->dump_npages = ds->npages;




  57 #include <fs/fs_subr.h>
  58 #include <sys/fs/snode.h>
  59 #include <sys/ontrap.h>
  60 #include <sys/panic.h>
  61 #include <sys/dkio.h>
  62 #include <sys/vtoc.h>
  63 #include <sys/errorq.h>
  64 #include <sys/fm/util.h>
  65 #include <sys/fs/zfs.h>
  66 
  67 #include <vm/hat.h>
  68 #include <vm/as.h>
  69 #include <vm/page.h>
  70 #include <vm/pvn.h>
  71 #include <vm/seg.h>
  72 #include <vm/seg_kmem.h>
  73 #include <sys/clock_impl.h>
  74 #include <sys/hold_page.h>
  75 #include <sys/cpu.h>
  76 


  77 #define ONE_GIG (1024 * 1024 * 1024UL)
  78 
  79 /*
  80  * Parallel Dump:
  81  * CPUs that are otherwise idle during panic are employed to parallelize
  82  * the compression task. I/O and compression are performed by different
  83  * CPUs, and are hence overlapped in time, unlike the older serial code.














  84  */
  85 
  86 /*
  87  * exported vars
  88  */
  89 kmutex_t        dump_lock;              /* lock for dump configuration */
  90 dumphdr_t       *dumphdr;               /* dump header */
  91 int             dump_conflags = DUMP_KERNEL; /* dump configuration flags */
  92 vnode_t         *dumpvp;                /* dump device vnode pointer */
  93 u_offset_t      dumpvp_size;            /* size of dump device, in bytes */
  94 char            *dumppath;              /* pathname of dump device */
  95 int             dump_timeout = 120;     /* timeout for dumping pages */
  96 int             dump_timeleft;          /* portion of dump_timeout remaining */
  97 int             dump_ioerr;             /* dump i/o error */
  98 int             dump_check_used;        /* enable check for used pages */
  99 char            *dump_stack_scratch; /* scratch area for saving stack summary */
 100 
 101 /*
 102  * Tunables for dump compression and parallelism.
 103  * These can be set via /etc/system.
 104  *
 105  * dump_ncpu_low:
 106  * This is the minimum configuration for parallel lzjb.
 107  * A special value of 0 means that parallel dump will not be used.
 108  *
 109  * dump_metrics_on:
 110  * If set, metrics are collected in the kernel, passed to savecore
 111  * via the dump file, and recorded by savecore in METRICS.txt.








 112  */
 113 uint_t dump_ncpu_low = 4;       /* minimum config for parallel lzjb */

 114 




 115 /* tunables for pre-reserved heap */
 116 uint_t dump_kmem_permap = 1024;
 117 uint_t dump_kmem_pages = 0;
 118 
 119 /* Define multiple buffers per helper to avoid stalling */
 120 #define NCBUF_PER_HELPER        2
 121 #define NCMAP_PER_HELPER        4
 122 
 123 /* minimum number of helpers configured */
 124 #define MINHELPERS      (MAX(dump_ncpu_low, 1))
 125 #define MINCBUFS        (MINHELPERS * NCBUF_PER_HELPER)
 126 
 127 /*
 128  * Define constant parameters.
 129  *
 130  * CBUF_SIZE            size of an output buffer
 131  *
 132  * CBUF_MAPSIZE         size of virtual range for mapping pages
 133  *
 134  * CBUF_MAPNP           size of virtual range in pages
 135  *
 136  */
 137 #define DUMP_1KB        ((size_t)1 << 10)
 138 #define DUMP_1MB        ((size_t)1 << 20)
 139 #define CBUF_SIZE       ((size_t)1 << 17)
 140 #define CBUF_MAPSHIFT   (22)
 141 #define CBUF_MAPSIZE    ((size_t)1 << CBUF_MAPSHIFT)
 142 #define CBUF_MAPNP      ((size_t)1 << (CBUF_MAPSHIFT - PAGESHIFT))
 143 
 144 /*


 339 /*
 340  * helper_t helpers: contains the context for a stream. CPUs run in
 341  * parallel at dump time; each CPU creates a single stream of
 342  * compression data.  Stream data is divided into CBUF_SIZE blocks.
 343  * The blocks are written in order within a stream. But, blocks from
 344  * multiple streams can be interleaved. Each stream is identified by a
 345  * unique tag.
 346  */
 347 typedef struct helper {
 348         int helper;                     /* bound helper id */
 349         int tag;                        /* compression stream tag */
 350         perpage_t perpage;              /* per page metrics */
 351         perpage_t perpagets;            /* per page metrics (timestamps) */
 352         taskqid_t taskqid;              /* live dump task ptr */
 353         int in, out;                    /* buffer offsets */
 354         cbuf_t *cpin, *cpout, *cperr;   /* cbuf objects in process */
 355         dumpsync_t *ds;                 /* pointer to sync vars */
 356         size_t used;                    /* counts input consumed */
 357         char *page;                     /* buffer for page copy */
 358         char *lzbuf;                    /* lzjb output */

 359 } helper_t;
 360 
 361 #define MAINHELPER      (-1)            /* helper is also the main task */
 362 #define FREEHELPER      (-2)            /* unbound helper */
 363 #define DONEHELPER      (-3)            /* helper finished */
 364 
 365 /*
 366  * configuration vars for dumpsys
 367  */
 368 typedef struct dumpcfg {

 369         int     nhelper;        /* number of helpers */
 370         int     nhelper_used;   /* actual number of helpers used */
 371         int     ncmap;          /* number VA pages for compression */
 372         int     ncbuf;          /* number of bufs for compression */
 373         int     ncbuf_used;     /* number of bufs in use */
 374         uint_t  clevel;         /* dump compression level */
 375         helper_t *helper;       /* array of helpers */
 376         cbuf_t  *cmap;          /* array of input (map) buffers */
 377         cbuf_t  *cbuf;          /* array of output  buffers */
 378         ulong_t *helpermap;     /* set of dumpsys helper CPU ids */
 379         ulong_t *bitmap;        /* bitmap for marking pages to dump */
 380         ulong_t *rbitmap;       /* bitmap for used CBUF_MAPSIZE ranges */
 381         pgcnt_t bitmapsize;     /* size of bitmap */
 382         pgcnt_t rbitmapsize;    /* size of bitmap for ranges */
 383         pgcnt_t found4m;        /* number ranges allocated by dump */
 384         pgcnt_t foundsm;        /* number small pages allocated by dump */
 385         pid_t   *pids;          /* list of process IDs at dump time */
 386         size_t  maxsize;        /* memory size needed at dump time */
 387         size_t  maxvmsize;      /* size of reserved VM */
 388         char    *maxvm;         /* reserved VM for spare pages */


 455         char *old_buf = dumpbuf.start;
 456         size_t old_size = dumpbuf.size;
 457         char *new_buf;
 458         size_t new_size;
 459 
 460         ASSERT(MUTEX_HELD(&dump_lock));
 461 
 462         new_size = dumpbuf_iosize(MAX(dumpbuf.iosize, maxphys));
 463         if (new_size <= old_size)
 464                 return; /* no need to reallocate buffer */
 465 
 466         new_buf = kmem_alloc(new_size, KM_SLEEP);
 467         dumpbuf.size = new_size;
 468         dumpbuf.start = new_buf;
 469         dumpbuf.end = new_buf + new_size;
 470         kmem_free(old_buf, old_size);
 471 }
 472 
 473 /*
 474  * dump_update_clevel is called when dumpadm configures the dump device.
 475  *      Determine the compression level / type
 476  *      - DUMP_CLEVEL_SERIAL is single threaded lzjb
 477  *      - DUMP_CLEVEL_LZJB   is parallel lzjb
 478  *      Calculate number of helpers and buffers.
 479  *      Allocate the minimum configuration for now.
 480  *
 481  * When the dump file is configured we reserve a minimum amount of
 482  * memory for use at crash time. But we reserve VA for all the memory
 483  * we really want in order to do the fastest dump possible. The VA is
 484  * backed by pages not being dumped, according to the bitmap. If
 485  * there is insufficient spare memory, however, we fall back to the
 486  * minimum.
 487  *
 488  * Live dump (savecore -L) always uses the minimum config.
 489  *



























 490  * For parallel dumps, the number of helpers is ncpu-1. The CPU
 491  * running panic runs the main task. For single-threaded dumps, the
 492  * panic CPU does lzjb compression (it is tagged as MAINHELPER.)
 493  *
 494  * Need multiple buffers per helper so that they do not block waiting
 495  * for the main task.
 496  *                              parallel        single-threaded
 497  * Number of output buffers:    nhelper*2               1
 498  * Number of mapping buffers:   nhelper*4               1
 499  *
 500  */
 501 static void
 502 dump_update_clevel()
 503 {
 504         int tag;

 505         helper_t *hp, *hpend;
 506         cbuf_t *cp, *cpend;
 507         dumpcfg_t *old = &dumpcfg;
 508         dumpcfg_t newcfg = *old;
 509         dumpcfg_t *new = &newcfg;
 510 
 511         ASSERT(MUTEX_HELD(&dump_lock));
 512 
 513         /*
 514          * Free the previously allocated bufs and VM.
 515          */
 516         if (old->helper != NULL) {
 517 
 518                 /* helpers */
 519                 hpend = &old->helper[old->nhelper];
 520                 for (hp = old->helper; hp != hpend; hp++) {
 521                         if (hp->lzbuf != NULL)
 522                                 kmem_free(hp->lzbuf, PAGESIZE);
 523                         if (hp->page != NULL)
 524                                 kmem_free(hp->page, PAGESIZE);


 537                         if (cp->buf != NULL)
 538                                 kmem_free(cp->buf, cp->size);
 539                 kmem_free(old->cbuf, old->ncbuf * sizeof (cbuf_t));
 540 
 541                 /* reserved VM for dumpsys_get_maxmem */
 542                 if (old->maxvmsize > 0)
 543                         vmem_xfree(heap_arena, old->maxvm, old->maxvmsize);
 544         }
 545 
 546         /*
 547          * Allocate memory and VM.
 548          * One CPU runs dumpsys, the rest are helpers.
 549          */
 550         new->nhelper = ncpus - 1;
 551         if (new->nhelper < 1)
 552                 new->nhelper = 1;
 553 
 554         if (new->nhelper > DUMP_MAX_NHELPER)
 555                 new->nhelper = DUMP_MAX_NHELPER;
 556 
 557         /* If dump_ncpu_low is 0 or greater than ncpus, do serial dump */
 558         if (dump_ncpu_low == 0 || dump_ncpu_low > ncpus || new->nhelper < 2) {
 559                 new->clevel = DUMP_CLEVEL_SERIAL;











 560                 new->nhelper = 1;







 561                 new->ncbuf = 1;
 562                 new->ncmap = 1;
 563         } else {
 564                 new->clevel = DUMP_CLEVEL_LZJB;
 565                 new->ncbuf = NCBUF_PER_HELPER * new->nhelper;
 566                 new->ncmap = NCMAP_PER_HELPER * new->nhelper;
 567         }
 568 
 569         /*
 570          * Allocate new data structures and buffers for MINHELPERS,
 571          * and also figure the max desired size.
 572          */

 573         new->maxsize = 0;
 574         new->maxvmsize = 0;
 575         new->maxvm = NULL;
 576         tag = 1;
 577         new->helper = kmem_zalloc(new->nhelper * sizeof (helper_t), KM_SLEEP);
 578         hpend = &new->helper[new->nhelper];
 579         for (hp = new->helper; hp != hpend; hp++) {
 580                 hp->tag = tag++;
 581                 if (hp < &new->helper[MINHELPERS]) {
 582                         hp->lzbuf = kmem_alloc(PAGESIZE, KM_SLEEP);
 583                         hp->page = kmem_alloc(PAGESIZE, KM_SLEEP);


 584                 } else  {
 585                         new->maxsize += 2 * PAGESIZE;
 586                 }


 587         }
 588 
 589         new->cbuf = kmem_zalloc(new->ncbuf * sizeof (cbuf_t), KM_SLEEP);
 590         cpend = &new->cbuf[new->ncbuf];
 591         for (cp = new->cbuf; cp != cpend; cp++) {
 592                 cp->state = CBUF_FREEBUF;
 593                 cp->size = CBUF_SIZE;
 594                 if (cp < &new->cbuf[MINCBUFS])
 595                         cp->buf = kmem_alloc(cp->size, KM_SLEEP);
 596                 else
 597                         new->maxsize += cp->size;
 598         }
 599 
 600         new->cmap = kmem_zalloc(new->ncmap * sizeof (cbuf_t), KM_SLEEP);
 601         cpend = &new->cmap[new->ncmap];
 602         for (cp = new->cmap; cp != cpend; cp++) {
 603                 cp->state = CBUF_FREEMAP;
 604                 cp->size = CBUF_MAPSIZE;
 605                 cp->buf = vmem_xalloc(heap_arena, CBUF_MAPSIZE, CBUF_MAPSIZE,
 606                     0, 0, NULL, NULL, VM_SLEEP);


 731         ASSERT(rbitnum < dumpcfg.rbitmapsize);
 732 
 733         BT_SET(dumpcfg.rbitmap, rbitnum);
 734 }
 735 
 736 int
 737 dump_test_used(pfn_t pfn)
 738 {
 739         pgcnt_t bitnum, rbitnum;
 740 
 741         bitnum = dump_pfn_to_bitnum(pfn);
 742         ASSERT(bitnum != (pgcnt_t)-1);
 743 
 744         rbitnum = CBUF_MAPP2R(bitnum);
 745         ASSERT(rbitnum < dumpcfg.rbitmapsize);
 746 
 747         return (BT_TEST(dumpcfg.rbitmap, rbitnum));
 748 }
 749 
 750 /*

























 751  * Perform additional checks on the page to see if we can really use
 752  * it. The kernel (kas) pages are always set in the bitmap. However,
 753  * boot memory pages (prom_ppages or P_BOOTPAGES) are not in the
 754  * bitmap. So we check for them.
 755  */
 756 static inline int
 757 dump_pfn_check(pfn_t pfn)
 758 {
 759         page_t *pp = page_numtopp_nolock(pfn);
 760         if (pp == NULL || pp->p_pagenum != pfn ||
 761 #if defined(__sparc)
 762             pp->p_vnode == &promvp ||
 763 #else
 764             PP_ISBOOTPAGES(pp) ||
 765 #endif
 766             pp->p_toxic != 0)
 767                 return (0);
 768         return (1);
 769 }
 770 
 771 /*
 772  * Check a range to see if all contained pages are available and
 773  * return non-zero if the range can be used.
 774  */
 775 static inline int
 776 dump_range_check(pgcnt_t start, pgcnt_t end, pfn_t pfn)
 777 {
 778         for (; start < end; start++, pfn++) {
 779                 if (BT_TEST(dumpcfg.bitmap, start))
 780                         return (0);
 781                 if (!dump_pfn_check(pfn))
 782                         return (0);
 783         }
 784         return (1);
 785 }
 786 
 787 /*
 788  * dumpsys_get_maxmem() is called during panic. Find unused ranges
 789  * and use them for buffers.


 790  * It searches the dump bitmap in 2 passes. The first time it looks
 791  * for CBUF_MAPSIZE ranges. On the second pass it uses small pages.
 792  */
 793 static void
 794 dumpsys_get_maxmem()
 795 {
 796         dumpcfg_t *cfg = &dumpcfg;
 797         cbuf_t *endcp = &cfg->cbuf[cfg->ncbuf];
 798         helper_t *endhp = &cfg->helper[cfg->nhelper];
 799         pgcnt_t bitnum, end;
 800         size_t sz, endsz;
 801         pfn_t pfn, off;
 802         cbuf_t *cp;
 803         helper_t *hp;
 804         dumpmlw_t mlw;
 805         int k;
 806 
 807         /*
 808          * Setting dump_ncpu_low to 0 forces a single threaded dump.

 809          */
 810         if (dump_ncpu_low == 0) {
 811                 cfg->clevel = DUMP_CLEVEL_SERIAL;
 812                 return;
 813         }
 814 
 815         /*
 816          * There may be no point in looking for spare memory. If
 817          * dumping all memory, then none is spare. If doing a serial
 818          * dump, then already have buffers.
 819          */
 820         if (cfg->maxsize == 0 || cfg->clevel == DUMP_CLEVEL_SERIAL ||
 821             (dump_conflags & DUMP_ALL) != 0) {


 822                 return;
 823         }
 824 
 825         sz = 0;
 826         cfg->found4m = 0;
 827         cfg->foundsm = 0;
 828 
 829         /* bitmap of ranges used to estimate which pfns are being used */
 830         bzero(dumpcfg.rbitmap, BT_SIZEOFMAP(dumpcfg.rbitmapsize));
 831 
 832         /* find ranges that are not being dumped to use for buffers */
 833         dump_init_memlist_walker(&mlw);
 834         for (bitnum = 0; bitnum < dumpcfg.bitmapsize; bitnum = end) {
 835                 dump_timeleft = dump_timeout;
 836                 end = bitnum + CBUF_MAPNP;
 837                 pfn = dump_bitnum_to_pfn(bitnum, &mlw);
 838                 ASSERT(pfn != PFN_INVALID);
 839 
 840                 /* skip partial range at end of mem segment */
 841                 if (mlw.mpleft < CBUF_MAPNP) {


 885                         continue;
 886                 }
 887 
 888                 for (; bitnum < end; bitnum++, pfn++) {
 889                         dump_timeleft = dump_timeout;
 890                         if (BT_TEST(dumpcfg.bitmap, bitnum))
 891                                 continue;
 892                         if (!dump_pfn_check(pfn))
 893                                 continue;
 894                         ASSERT((sz + PAGESIZE) <= cfg->maxvmsize);
 895                         hat_devload(kas.a_hat, cfg->maxvm + sz, PAGESIZE, pfn,
 896                             PROT_READ | PROT_WRITE, HAT_LOAD_NOCONSIST);
 897                         sz += PAGESIZE;
 898                         cfg->foundsm++;
 899                         dump_set_used(pfn);
 900                         if (sz >= cfg->maxsize)
 901                                 goto foundmax;
 902                 }
 903         }
 904 






 905         /* Allocate memory for as many helpers as we can. */
 906 foundmax:
 907 
 908         /* Byte offsets into memory found and mapped above */
 909         endsz = sz;
 910         sz = 0;
 911 



 912         /* Skip the preallocate output buffers. */
 913         cp = &cfg->cbuf[MINCBUFS];
 914 



 915         /* Loop over all helpers and allocate memory. */
 916         for (hp = cfg->helper; hp < endhp; hp++) {
 917 
 918                 /* Skip preallocated helpers by checking hp->page. */
 919                 if (hp->page == NULL) {

 920                         /* lzjb needs 2 1-page buffers */
 921                         if ((sz + (2 * PAGESIZE)) > endsz)
 922                                 break;
 923                         hp->page = cfg->maxvm + sz;
 924                         sz += PAGESIZE;
 925                         hp->lzbuf = cfg->maxvm + sz;
 926                         sz += PAGESIZE;













 927                 }

 928 
 929                 /*
 930                  * Add output buffers per helper. The number of
 931                  * buffers per helper is determined by the ratio of
 932                  * ncbuf to nhelper.
 933                  */
 934                 for (k = 0; cp < endcp && (sz + CBUF_SIZE) <= endsz &&
 935                     k < NCBUF_PER_HELPER; k++) {
 936                         cp->state = CBUF_FREEBUF;
 937                         cp->size = CBUF_SIZE;
 938                         cp->buf = cfg->maxvm + sz;
 939                         sz += CBUF_SIZE;
 940                         ++cp;
 941                 }

















 942         }


 943 
 944         /* Finish allocating output buffers */
 945         for (; cp < endcp && (sz + CBUF_SIZE) <= endsz; cp++) {
 946                 cp->state = CBUF_FREEBUF;
 947                 cp->size = CBUF_SIZE;
 948                 cp->buf = cfg->maxvm + sz;
 949                 sz += CBUF_SIZE;
 950         }
 951 
 952         /* Enable IS_DUMP_PAGE macro, which checks for pages we took. */
 953         if (cfg->found4m || cfg->foundsm)
 954                 dump_check_used = 1;
 955 
 956         ASSERT(sz <= endsz);
 957 }
 958 
 959 static void
 960 dumphdr_init(void)
 961 {
 962         pgcnt_t npages = 0;


1777 
1778                 } else {
1779 
1780                         /*
1781                          * Done with the input. Flush the VM and
1782                          * return the buffer to the main task.
1783                          */
1784                         if (panicstr && hp->helper != MAINHELPER)
1785                                 hat_flush_range(kas.a_hat,
1786                                     hp->cpin->buf, hp->cpin->size);
1787                         dumpsys_errmsg(hp, NULL);
1788                         CQ_PUT(mainq, hp->cpin, CBUF_USEDMAP);
1789                         hp->cpin = NULL;
1790                 }
1791         }
1792 
1793         return (hp->cpin != NULL);
1794 }
1795 
1796 /*


































































































































1797  * Compress with lzjb
1798  * write stream block if full or size==0
1799  * if csize==0 write stream header, else write <csize, data>
1800  * size==0 is a call to flush a buffer
1801  * hp->cpout is the buffer we are flushing or filling
1802  * hp->out is the next index to fill data
1803  * osize is either csize+data, or the size of a stream header
1804  */
1805 static void
1806 dumpsys_lzjbrun(helper_t *hp, size_t csize, void *buf, size_t size)
1807 {
1808         dumpsync_t *ds = hp->ds;
1809         const int CSIZE = sizeof (dumpcsize_t);
1810         dumpcsize_t cs;
1811         size_t osize = csize > 0 ? CSIZE + size : size;
1812 
1813         /* If flush, and there is no buffer, just return */
1814         if (size == 0 && hp->cpout == NULL)
1815                 return;
1816 


1918  * panic CPU.
1919  *
1920  * At dump configuration time, helper_lock is set and helpers_wanted
1921  * is 0. dumpsys() decides whether to set helpers_wanted before
1922  * clearing helper_lock.
1923  *
1924  * At panic time, idle CPUs spin-wait on helper_lock, then alternately
1925  * take the lock and become a helper, or return.
1926  */
1927 void
1928 dumpsys_helper()
1929 {
1930         dumpsys_spinlock(&dumpcfg.helper_lock);
1931         if (dumpcfg.helpers_wanted) {
1932                 helper_t *hp, *hpend = &dumpcfg.helper[dumpcfg.nhelper];
1933 
1934                 for (hp = dumpcfg.helper; hp != hpend; hp++) {
1935                         if (hp->helper == FREEHELPER) {
1936                                 hp->helper = CPU->cpu_id;
1937                                 BT_SET(dumpcfg.helpermap, CPU->cpu_seqid);

1938                                 dumpsys_spinunlock(&dumpcfg.helper_lock);


1939                                 dumpsys_lzjbcompress(hp);



1940                                 hp->helper = DONEHELPER;
1941                                 return;
1942                         }
1943                 }
1944 
1945                 /* No more helpers are needed. */
1946                 dumpcfg.helpers_wanted = 0;
1947 
1948         }
1949         dumpsys_spinunlock(&dumpcfg.helper_lock);
1950 }
1951 
1952 /*
1953  * No-wait helper callable in spin loops.
1954  *
1955  * Do not wait for helper_lock. Just check helpers_wanted. The caller
1956  * may decide to continue. This is the "c)ontinue, s)ync, r)eset? s"
1957  * case.
1958  */
1959 void
1960 dumpsys_helper_nw()
1961 {
1962         if (dumpcfg.helpers_wanted)
1963                 dumpsys_helper();
1964 }
1965 
1966 /*
1967  * Dump helper for live dumps.
1968  * These run as a system task.
1969  */
1970 static void
1971 dumpsys_live_helper(void *arg)
1972 {
1973         helper_t *hp = arg;
1974 
1975         BT_ATOMIC_SET(dumpcfg.helpermap, CPU->cpu_seqid);

1976         dumpsys_lzjbcompress(hp);


1977 }
1978 
1979 /*
1980  * Compress one page with lzjb (single threaded case)
1981  */
1982 static void
1983 dumpsys_lzjb_page(helper_t *hp, cbuf_t *cp)
1984 {
1985         dumpsync_t *ds = hp->ds;
1986         uint32_t csize;
1987 
1988         hp->helper = MAINHELPER;
1989         hp->in = 0;
1990         hp->used = 0;
1991         hp->cpin = cp;
1992         while (hp->used < cp->used) {
1993                 HRSTART(hp->perpage, copy);
1994                 hp->in = dumpsys_copy_page(hp, hp->in);
1995                 hp->used += PAGESIZE;
1996                 HRSTOP(hp->perpage, copy);


2003                 dumpvp_write(&csize, sizeof (csize));
2004                 dumpvp_write(hp->lzbuf, csize);
2005                 HRSTOP(hp->perpage, write);
2006         }
2007         CQ_PUT(mainq, hp->cpin, CBUF_USEDMAP);
2008         hp->cpin = NULL;
2009 }
2010 
2011 /*
2012  * Main task to dump pages. This is called on the dump CPU.
2013  */
2014 static void
2015 dumpsys_main_task(void *arg)
2016 {
2017         dumpsync_t *ds = arg;
2018         pgcnt_t pagenum = 0, bitnum = 0, hibitnum;
2019         dumpmlw_t mlw;
2020         cbuf_t *cp;
2021         pgcnt_t baseoff, pfnoff;
2022         pfn_t base, pfn;

2023         int i;
2024 
2025         /*
2026          * Fall back to serial mode if there are no helpers.
2027          * dump_ncpu_low can be set to 0 at any time.
2028          * dumpcfg.helpermap must contain at least one member.
2029          *
2030          * It is possible that the helpers haven't registered
2031          * in helpermap yet; wait up to DUMP_HELPER_MAX_WAIT for
2032          * at least one helper to register.
2033          */
2034         if (dump_ncpu_low != 0 && dumpcfg.clevel != DUMP_CLEVEL_SERIAL) {
2035                 boolean_t dumpserial = B_TRUE;
2036                 hrtime_t hrtmax = MSEC2NSEC(DUMP_HELPER_MAX_WAIT);
2037                 hrtime_t hrtstart = gethrtime();
2038 
2039                 for (;;) {
2040                         for (i = 0; i < BT_BITOUL(NCPU); ++i) {
2041                                 if (dumpcfg.helpermap[i] != 0) {
2042                                         dumpserial = B_FALSE;
2043                                         break;
2044                                 }
2045                         }
2046 
2047                         if ((!dumpserial) ||
2048                             ((gethrtime() - hrtstart) >= hrtmax)) {
2049                                 break;
2050                         }
2051 
2052                         SMT_PAUSE();
2053                 }
2054 
2055                 if (dumpserial) {
2056                         dumpcfg.clevel = DUMP_CLEVEL_SERIAL;
2057                         if (dumpcfg.helper[0].lzbuf == NULL) {
2058                                 dumpcfg.helper[0].lzbuf =
2059                                     dumpcfg.helper[1].page;
2060                         }
2061                 }
2062         }
2063 
2064         dump_init_memlist_walker(&mlw);
2065 
2066         for (;;) {
2067                 int sec = (gethrtime() - ds->start) / NANOSEC;
2068 
2069                 /*
2070                  * Render a simple progress display on the system console to
2071                  * make clear to the operator that the system has not hung.
2072                  * Emit an update when dump progress has advanced by one
2073                  * percent, or when no update has been drawn in the last
2074                  * second.
2075                  */
2076                 if (ds->percent > ds->percent_done || sec > ds->sec_done) {


2184                                 if (BT_TEST(dumpcfg.bitmap, bitnum))
2185                                         pagenum++;
2186 
2187                         dump_timeleft = dump_timeout;
2188                         cp->used = ptob(pagenum - cp->pagenum);
2189 
2190                         HRSTART(ds->perpage, map);
2191                         hat_devload(kas.a_hat, cp->buf, cp->size, base,
2192                             PROT_READ, HAT_LOAD_NOCONSIST);
2193                         HRSTOP(ds->perpage, map);
2194 
2195                         ds->pages_mapped += btop(cp->size);
2196                         ds->pages_used += pagenum - cp->pagenum;
2197 
2198                         CQ_OPEN(mainq);
2199 
2200                         /*
2201                          * If there are no helpers the main task does
2202                          * non-streams lzjb compress.
2203                          */
2204                         if (dumpcfg.clevel == DUMP_CLEVEL_SERIAL) {
2205                                 dumpsys_lzjb_page(dumpcfg.helper, cp);
2206                         } else {
2207                                 /* pass mapped pages to a helper */
2208                                 CQ_PUT(helperq, cp, CBUF_INREADY);
2209                         }
2210 
2211                         /* the last page was done */
2212                         if (bitnum >= dumpcfg.bitmapsize)
2213                                 CQ_CLOSE(helperq);
2214 
2215                         break;
2216 
2217                 case CBUF_USEDMAP:
2218 
2219                         ds->npages += btop(cp->used);
2220 
2221                         HRSTART(ds->perpage, unmap);
2222                         hat_unload(kas.a_hat, cp->buf, cp->size, HAT_UNLOAD);
2223                         HRSTOP(ds->perpage, unmap);
2224 


2295         for (i = 0; i < ncpus; i++) {
2296                 if ((i & 15) == 0)
2297                         P(",,%03d,", i);
2298                 if (i == myid)
2299                         P("   M");
2300                 else if (BT_TEST(cfg->helpermap, i))
2301                         P("%4d", cpu_seq[i]->cpu_id);
2302                 else
2303                         P("   *");
2304                 if ((i & 15) == 15)
2305                         P("\n");
2306         }
2307 
2308         P("ncbuf_used,%d\n", cfg->ncbuf_used);
2309         P("ncmap,%d\n", cfg->ncmap);
2310 
2311         P("Found %ldM ranges,%ld\n", (CBUF_MAPSIZE / DUMP_1MB), cfg->found4m);
2312         P("Found small pages,%ld\n", cfg->foundsm);
2313 
2314         P("Compression level,%d\n", cfg->clevel);
2315         P("Compression type,%s lzjb\n",
2316             cfg->clevel == DUMP_CLEVEL_SERIAL ? "serial" : "parallel");




2317         P("Compression ratio,%d.%02d\n", compress_ratio / 100, compress_ratio %
2318             100);
2319         P("nhelper_used,%d\n", cfg->nhelper_used);
2320 
2321         P("Dump I/O rate MBS,%d.%02d\n", iorate / 100, iorate % 100);
2322         P("..total bytes,%lld\n", (u_longlong_t)ds->nwrite);
2323         P("..total nsec,%lld\n", (u_longlong_t)ds->iotime);
2324         P("dumpbuf.iosize,%ld\n", dumpbuf.iosize);
2325         P("dumpbuf.size,%ld\n", dumpbuf.size);
2326 
2327         P("Dump pages/sec,%llu\n", (u_longlong_t)ds->npages / sec);
2328         P("Dump pages,%llu\n", (u_longlong_t)ds->npages);
2329         P("Dump time,%d\n", sec);
2330 
2331         if (ds->pages_mapped > 0)
2332                 P("per-cent map utilization,%d\n", (int)((100 * ds->pages_used)
2333                     / ds->pages_mapped));
2334 
2335         P("\nPer-page metrics:\n");
2336         if (ds->npages > 0) {


2557         dumphdr->dump_pfn = dumpvp_flush();
2558         dump_init_memlist_walker(&mlw);
2559         for (bitnum = 0; bitnum < dumpcfg.bitmapsize; bitnum++) {
2560                 dump_timeleft = dump_timeout;
2561                 if (!BT_TEST(dumpcfg.bitmap, bitnum))
2562                         continue;
2563                 pfn = dump_bitnum_to_pfn(bitnum, &mlw);
2564                 ASSERT(pfn != PFN_INVALID);
2565                 dumpvp_write(&pfn, sizeof (pfn_t));
2566         }
2567         dump_plat_pfn();
2568 
2569         /*
2570          * Write out all the pages.
2571          * Map pages, copy them handling UEs, compress, and write them out.
2572          * Cooperate with any helpers running on CPUs in panic_idle().
2573          */
2574         dumphdr->dump_data = dumpvp_flush();
2575 
2576         bzero(dumpcfg.helpermap, BT_SIZEOFMAP(NCPU));
2577         ds->live = dumpcfg.clevel > DUMP_CLEVEL_SERIAL &&
2578             (dumphdr->dump_flags & DF_LIVE) != 0;
2579 
2580         save_dump_clevel = dumpcfg.clevel;
2581         if (panicstr)
2582                 dumpsys_get_maxmem();


2583 
2584         dumpcfg.nhelper_used = 0;
2585         for (hp = dumpcfg.helper; hp != hpend; hp++) {
2586                 if (hp->page == NULL) {
2587                         hp->helper = DONEHELPER;
2588                         continue;
2589                 }
2590                 ++dumpcfg.nhelper_used;
2591                 hp->helper = FREEHELPER;
2592                 hp->taskqid = NULL;
2593                 hp->ds = ds;
2594                 bzero(&hp->perpage, sizeof (hp->perpage));


2595         }
2596 
2597         CQ_OPEN(freebufq);
2598         CQ_OPEN(helperq);
2599 
2600         dumpcfg.ncbuf_used = 0;
2601         for (cp = dumpcfg.cbuf; cp != &dumpcfg.cbuf[dumpcfg.ncbuf]; cp++) {
2602                 if (cp->buf != NULL) {
2603                         CQ_PUT(freebufq, cp, CBUF_FREEBUF);
2604                         ++dumpcfg.ncbuf_used;
2605                 }
2606         }
2607 
2608         for (cp = dumpcfg.cmap; cp != &dumpcfg.cmap[dumpcfg.ncmap]; cp++)
2609                 CQ_PUT(mainq, cp, CBUF_FREEMAP);
2610 
2611         ds->start = gethrtime();
2612         ds->iowaitts = ds->start;
2613 
2614         /* start helpers */
2615         if (ds->live) {
2616                 int n = dumpcfg.nhelper_used;
2617                 int pri = MINCLSYSPRI - 25;
2618 
2619                 livetaskq = taskq_create("LiveDump", n, pri, n, n,
2620                     TASKQ_PREPOPULATE);
2621                 for (hp = dumpcfg.helper; hp != hpend; hp++) {
2622                         if (hp->page == NULL)
2623                                 continue;
2624                         hp->helper = hp - dumpcfg.helper;
2625                         hp->taskqid = taskq_dispatch(livetaskq,
2626                             dumpsys_live_helper, (void *)hp, TQ_NOSLEEP);
2627                 }
2628 
2629         } else {
2630                 if (panicstr)
2631                         kmem_dump_begin();
2632                 dumpcfg.helpers_wanted = dumpcfg.clevel > DUMP_CLEVEL_SERIAL;
2633                 dumpsys_spinunlock(&dumpcfg.helper_lock);
2634         }
2635 
2636         /* run main task */
2637         dumpsys_main_task(ds);
2638 
2639         ds->elapsed = gethrtime() - ds->start;
2640         if (ds->elapsed < 1)
2641                 ds->elapsed = 1;
2642 
2643         if (livetaskq != NULL)
2644                 taskq_destroy(livetaskq);
2645 
2646         if (ds->neednl) {
2647                 uprintf("\n");
2648                 ds->neednl = 0;
2649         }
2650 
2651         /* record actual pages dumped */
2652         dumphdr->dump_npages = ds->npages;