Print this page
9709 Remove support for BZIP2 from dump
Reviewed by: Sanjay Nadkarni <sanjay.nadkarni@nexenta.com>
Reviewed by: Yuri Pankov <yuri.pankov@nexenta.com>
9707 Enable parallel crash dump
Reviewed by: Sanjay Nadkarni <sanjay.nadkarni@nexenta.com>
Reviewed by: Yuri Pankov <yuri.pankov@nexenta.com>

Split Close
Expand all
Collapse all
          --- old/usr/src/uts/common/os/dumpsubr.c
          +++ new/usr/src/uts/common/os/dumpsubr.c
↓ open down ↓ 66 lines elided ↑ open up ↑
  67   67  #include <vm/hat.h>
  68   68  #include <vm/as.h>
  69   69  #include <vm/page.h>
  70   70  #include <vm/pvn.h>
  71   71  #include <vm/seg.h>
  72   72  #include <vm/seg_kmem.h>
  73   73  #include <sys/clock_impl.h>
  74   74  #include <sys/hold_page.h>
  75   75  #include <sys/cpu.h>
  76   76  
  77      -#include <bzip2/bzlib.h>
  78      -
  79   77  #define ONE_GIG (1024 * 1024 * 1024UL)
  80   78  
  81   79  /*
  82      - * Crash dump time is dominated by disk write time.  To reduce this,
  83      - * the stronger compression method bzip2 is applied to reduce the dump
  84      - * size and hence reduce I/O time.  However, bzip2 is much more
  85      - * computationally expensive than the existing lzjb algorithm, so to
  86      - * avoid increasing compression time, CPUs that are otherwise idle
  87      - * during panic are employed to parallelize the compression task.
  88      - * Many helper CPUs are needed to prevent bzip2 from being a
  89      - * bottleneck, and on systems with too few CPUs, the lzjb algorithm is
  90      - * parallelized instead. Lastly, I/O and compression are performed by
  91      - * different CPUs, and are hence overlapped in time, unlike the older
  92      - * serial code.
  93      - *
  94      - * Another important consideration is the speed of the dump
  95      - * device. Faster disks need less CPUs in order to benefit from
  96      - * parallel lzjb versus parallel bzip2. Therefore, the CPU count
  97      - * threshold for switching from parallel lzjb to paralled bzip2 is
  98      - * elevated for faster disks. The dump device speed is adduced from
  99      - * the setting for dumpbuf.iosize, see dump_update_clevel.
       80 + * Parallel Dump:
       81 + * CPUs that are otherwise idle during panic are employed to parallelize
       82 + * the compression task. I/O and compression are performed by different
       83 + * CPUs, and are hence overlapped in time, unlike the older serial code.
 100   84   */
 101   85  
 102   86  /*
 103   87   * exported vars
 104   88   */
 105   89  kmutex_t        dump_lock;              /* lock for dump configuration */
 106   90  dumphdr_t       *dumphdr;               /* dump header */
 107   91  int             dump_conflags = DUMP_KERNEL; /* dump configuration flags */
 108   92  vnode_t         *dumpvp;                /* dump device vnode pointer */
 109   93  u_offset_t      dumpvp_size;            /* size of dump device, in bytes */
 110   94  char            *dumppath;              /* pathname of dump device */
 111   95  int             dump_timeout = 120;     /* timeout for dumping pages */
 112   96  int             dump_timeleft;          /* portion of dump_timeout remaining */
 113   97  int             dump_ioerr;             /* dump i/o error */
 114   98  int             dump_check_used;        /* enable check for used pages */
 115      -char        *dump_stack_scratch; /* scratch area for saving stack summary */
       99 +char            *dump_stack_scratch; /* scratch area for saving stack summary */
 116  100  
 117  101  /*
 118      - * Tunables for dump compression and parallelism. These can be set via
 119      - * /etc/system.
      102 + * Tunables for dump compression and parallelism.
      103 + * These can be set via /etc/system.
 120  104   *
 121      - * dump_ncpu_low        number of helpers for parallel lzjb
 122      - *      This is also the minimum configuration.
      105 + * dump_ncpu_low:
      106 + * This is the minimum configuration for parallel lzjb.
      107 + * A special value of 0 means that parallel dump will not be used.
 123  108   *
 124      - * dump_bzip2_level     bzip2 compression level: 1-9
 125      - *      Higher numbers give greater compression, but take more memory
 126      - *      and time. Memory used per helper is ~(dump_bzip2_level * 1MB).
 127      - *
 128      - * dump_plat_mincpu     the cross-over limit for using bzip2 (per platform):
 129      - *      if dump_plat_mincpu == 0, then always do single threaded dump
 130      - *      if ncpu >= dump_plat_mincpu then try to use bzip2
 131      - *
 132      - * dump_metrics_on      if set, metrics are collected in the kernel, passed
 133      - *      to savecore via the dump file, and recorded by savecore in
 134      - *      METRICS.txt.
      109 + * dump_metrics_on:
      110 + * If set, metrics are collected in the kernel, passed to savecore
      111 + * via the dump file, and recorded by savecore in METRICS.txt.
 135  112   */
 136  113  uint_t dump_ncpu_low = 4;       /* minimum config for parallel lzjb */
 137      -uint_t dump_bzip2_level = 1;    /* bzip2 level (1-9) */
 138  114  
 139      -/* Use dump_plat_mincpu_default unless this variable is set by /etc/system */
 140      -#define MINCPU_NOT_SET  ((uint_t)-1)
 141      -uint_t dump_plat_mincpu = MINCPU_NOT_SET;
 142      -
 143  115  /* tunables for pre-reserved heap */
 144  116  uint_t dump_kmem_permap = 1024;
 145  117  uint_t dump_kmem_pages = 0;
 146  118  
 147  119  /* Define multiple buffers per helper to avoid stalling */
 148  120  #define NCBUF_PER_HELPER        2
 149  121  #define NCMAP_PER_HELPER        4
 150  122  
 151  123  /* minimum number of helpers configured */
 152      -#define MINHELPERS      (dump_ncpu_low)
      124 +#define MINHELPERS      (MAX(dump_ncpu_low, 1))
 153  125  #define MINCBUFS        (MINHELPERS * NCBUF_PER_HELPER)
 154  126  
 155  127  /*
 156  128   * Define constant parameters.
 157  129   *
 158  130   * CBUF_SIZE            size of an output buffer
 159  131   *
 160  132   * CBUF_MAPSIZE         size of virtual range for mapping pages
 161  133   *
 162  134   * CBUF_MAPNP           size of virtual range in pages
↓ open down ↓ 214 lines elided ↑ open up ↑
 377  349          int tag;                        /* compression stream tag */
 378  350          perpage_t perpage;              /* per page metrics */
 379  351          perpage_t perpagets;            /* per page metrics (timestamps) */
 380  352          taskqid_t taskqid;              /* live dump task ptr */
 381  353          int in, out;                    /* buffer offsets */
 382  354          cbuf_t *cpin, *cpout, *cperr;   /* cbuf objects in process */
 383  355          dumpsync_t *ds;                 /* pointer to sync vars */
 384  356          size_t used;                    /* counts input consumed */
 385  357          char *page;                     /* buffer for page copy */
 386  358          char *lzbuf;                    /* lzjb output */
 387      -        bz_stream bzstream;             /* bzip2 state */
 388  359  } helper_t;
 389  360  
 390  361  #define MAINHELPER      (-1)            /* helper is also the main task */
 391  362  #define FREEHELPER      (-2)            /* unbound helper */
 392  363  #define DONEHELPER      (-3)            /* helper finished */
 393  364  
 394  365  /*
 395  366   * configuration vars for dumpsys
 396  367   */
 397  368  typedef struct dumpcfg {
 398      -        int     threshold;      /* ncpu threshold for bzip2 */
 399  369          int     nhelper;        /* number of helpers */
 400  370          int     nhelper_used;   /* actual number of helpers used */
 401  371          int     ncmap;          /* number VA pages for compression */
 402  372          int     ncbuf;          /* number of bufs for compression */
 403  373          int     ncbuf_used;     /* number of bufs in use */
 404  374          uint_t  clevel;         /* dump compression level */
 405  375          helper_t *helper;       /* array of helpers */
 406  376          cbuf_t  *cmap;          /* array of input (map) buffers */
 407  377          cbuf_t  *cbuf;          /* array of output  buffers */
 408  378          ulong_t *helpermap;     /* set of dumpsys helper CPU ids */
↓ open down ↓ 86 lines elided ↑ open up ↑
 495  465  
 496  466          new_buf = kmem_alloc(new_size, KM_SLEEP);
 497  467          dumpbuf.size = new_size;
 498  468          dumpbuf.start = new_buf;
 499  469          dumpbuf.end = new_buf + new_size;
 500  470          kmem_free(old_buf, old_size);
 501  471  }
 502  472  
 503  473  /*
 504  474   * dump_update_clevel is called when dumpadm configures the dump device.
      475 + *      Determine the compression level / type
      476 + *      - DUMP_CLEVEL_SERIAL is single threaded lzjb
      477 + *      - DUMP_CLEVEL_LZJB   is parallel lzjb
 505  478   *      Calculate number of helpers and buffers.
 506  479   *      Allocate the minimum configuration for now.
 507  480   *
 508  481   * When the dump file is configured we reserve a minimum amount of
 509  482   * memory for use at crash time. But we reserve VA for all the memory
 510  483   * we really want in order to do the fastest dump possible. The VA is
 511  484   * backed by pages not being dumped, according to the bitmap. If
 512  485   * there is insufficient spare memory, however, we fall back to the
 513  486   * minimum.
 514  487   *
 515  488   * Live dump (savecore -L) always uses the minimum config.
 516  489   *
 517      - * clevel 0 is single threaded lzjb
 518      - * clevel 1 is parallel lzjb
 519      - * clevel 2 is parallel bzip2
 520      - *
 521      - * The ncpu threshold is selected with dump_plat_mincpu.
 522      - * On OPL, set_platform_defaults() overrides the sun4u setting.
 523      - * The actual values are defined via DUMP_PLAT_*_MINCPU macros.
 524      - *
 525      - * Architecture         Threshold       Algorithm
 526      - * sun4u                <  51           parallel lzjb
 527      - * sun4u                >= 51           parallel bzip2(*)
 528      - * sun4u OPL            <  8            parallel lzjb
 529      - * sun4u OPL            >= 8            parallel bzip2(*)
 530      - * sun4v                <  128          parallel lzjb
 531      - * sun4v                >= 128          parallel bzip2(*)
 532      - * x86                  < 11            parallel lzjb
 533      - * x86                  >= 11           parallel bzip2(*)
 534      - * 32-bit               N/A             single-threaded lzjb
 535      - *
 536      - * (*) bzip2 is only chosen if there is sufficient available
 537      - * memory for buffers at dump time. See dumpsys_get_maxmem().
 538      - *
 539      - * Faster dump devices have larger I/O buffers. The threshold value is
 540      - * increased according to the size of the dump I/O buffer, because
 541      - * parallel lzjb performs better with faster disks. For buffers >= 1MB
 542      - * the threshold is 3X; for buffers >= 256K threshold is 2X.
 543      - *
 544  490   * For parallel dumps, the number of helpers is ncpu-1. The CPU
 545  491   * running panic runs the main task. For single-threaded dumps, the
 546  492   * panic CPU does lzjb compression (it is tagged as MAINHELPER.)
 547  493   *
 548  494   * Need multiple buffers per helper so that they do not block waiting
 549  495   * for the main task.
 550  496   *                              parallel        single-threaded
 551  497   * Number of output buffers:    nhelper*2               1
 552  498   * Number of mapping buffers:   nhelper*4               1
 553  499   *
 554  500   */
 555  501  static void
 556  502  dump_update_clevel()
 557  503  {
 558  504          int tag;
 559      -        size_t bz2size;
 560  505          helper_t *hp, *hpend;
 561  506          cbuf_t *cp, *cpend;
 562  507          dumpcfg_t *old = &dumpcfg;
 563  508          dumpcfg_t newcfg = *old;
 564  509          dumpcfg_t *new = &newcfg;
 565  510  
 566  511          ASSERT(MUTEX_HELD(&dump_lock));
 567  512  
 568  513          /*
 569  514           * Free the previously allocated bufs and VM.
↓ open down ↓ 32 lines elided ↑ open up ↑
 602  547           * Allocate memory and VM.
 603  548           * One CPU runs dumpsys, the rest are helpers.
 604  549           */
 605  550          new->nhelper = ncpus - 1;
 606  551          if (new->nhelper < 1)
 607  552                  new->nhelper = 1;
 608  553  
 609  554          if (new->nhelper > DUMP_MAX_NHELPER)
 610  555                  new->nhelper = DUMP_MAX_NHELPER;
 611  556  
 612      -        /* use platform default, unless /etc/system overrides */
 613      -        if (dump_plat_mincpu == MINCPU_NOT_SET)
 614      -                dump_plat_mincpu = dump_plat_mincpu_default;
 615      -
 616      -        /* increase threshold for faster disks */
 617      -        new->threshold = dump_plat_mincpu;
 618      -        if (dumpbuf.iosize >= DUMP_1MB)
 619      -                new->threshold *= 3;
 620      -        else if (dumpbuf.iosize >= (256 * DUMP_1KB))
 621      -                new->threshold *= 2;
 622      -
 623      -        /* figure compression level based upon the computed threshold. */
 624      -        if (dump_plat_mincpu == 0 || new->nhelper < 2) {
 625      -                new->clevel = 0;
      557 +        /* If dump_ncpu_low is 0 or greater than ncpus, do serial dump */
      558 +        if (dump_ncpu_low == 0 || dump_ncpu_low > ncpus || new->nhelper < 2) {
      559 +                new->clevel = DUMP_CLEVEL_SERIAL;
 626  560                  new->nhelper = 1;
 627      -        } else if ((new->nhelper + 1) >= new->threshold) {
 628      -                new->clevel = DUMP_CLEVEL_BZIP2;
 629      -        } else {
 630      -                new->clevel = DUMP_CLEVEL_LZJB;
 631      -        }
 632      -
 633      -        if (new->clevel == 0) {
 634  561                  new->ncbuf = 1;
 635  562                  new->ncmap = 1;
 636  563          } else {
      564 +                new->clevel = DUMP_CLEVEL_LZJB;
 637  565                  new->ncbuf = NCBUF_PER_HELPER * new->nhelper;
 638  566                  new->ncmap = NCMAP_PER_HELPER * new->nhelper;
 639  567          }
 640  568  
 641  569          /*
 642  570           * Allocate new data structures and buffers for MINHELPERS,
 643  571           * and also figure the max desired size.
 644  572           */
 645      -        bz2size = BZ2_bzCompressInitSize(dump_bzip2_level);
 646  573          new->maxsize = 0;
 647  574          new->maxvmsize = 0;
 648  575          new->maxvm = NULL;
 649  576          tag = 1;
 650  577          new->helper = kmem_zalloc(new->nhelper * sizeof (helper_t), KM_SLEEP);
 651  578          hpend = &new->helper[new->nhelper];
 652  579          for (hp = new->helper; hp != hpend; hp++) {
 653  580                  hp->tag = tag++;
 654  581                  if (hp < &new->helper[MINHELPERS]) {
 655  582                          hp->lzbuf = kmem_alloc(PAGESIZE, KM_SLEEP);
 656  583                          hp->page = kmem_alloc(PAGESIZE, KM_SLEEP);
 657      -                } else if (new->clevel < DUMP_CLEVEL_BZIP2) {
      584 +                } else  {
 658  585                          new->maxsize += 2 * PAGESIZE;
 659      -                } else {
 660      -                        new->maxsize += PAGESIZE;
 661  586                  }
 662      -                if (new->clevel >= DUMP_CLEVEL_BZIP2)
 663      -                        new->maxsize += bz2size;
 664  587          }
 665  588  
 666  589          new->cbuf = kmem_zalloc(new->ncbuf * sizeof (cbuf_t), KM_SLEEP);
 667  590          cpend = &new->cbuf[new->ncbuf];
 668  591          for (cp = new->cbuf; cp != cpend; cp++) {
 669  592                  cp->state = CBUF_FREEBUF;
 670  593                  cp->size = CBUF_SIZE;
 671  594                  if (cp < &new->cbuf[MINCBUFS])
 672  595                          cp->buf = kmem_alloc(cp->size, KM_SLEEP);
 673  596                  else
↓ open down ↓ 144 lines elided ↑ open up ↑
 818  741          bitnum = dump_pfn_to_bitnum(pfn);
 819  742          ASSERT(bitnum != (pgcnt_t)-1);
 820  743  
 821  744          rbitnum = CBUF_MAPP2R(bitnum);
 822  745          ASSERT(rbitnum < dumpcfg.rbitmapsize);
 823  746  
 824  747          return (BT_TEST(dumpcfg.rbitmap, rbitnum));
 825  748  }
 826  749  
 827  750  /*
 828      - * dumpbzalloc and dumpbzfree are callbacks from the bzip2 library.
 829      - * dumpsys_get_maxmem() uses them for BZ2_bzCompressInit().
 830      - */
 831      -static void *
 832      -dumpbzalloc(void *opaque, int items, int size)
 833      -{
 834      -        size_t *sz;
 835      -        char *ret;
 836      -
 837      -        ASSERT(opaque != NULL);
 838      -        sz = opaque;
 839      -        ret = dumpcfg.maxvm + *sz;
 840      -        *sz += items * size;
 841      -        *sz = P2ROUNDUP(*sz, BZ2_BZALLOC_ALIGN);
 842      -        ASSERT(*sz <= dumpcfg.maxvmsize);
 843      -        return (ret);
 844      -}
 845      -
 846      -/*ARGSUSED*/
 847      -static void
 848      -dumpbzfree(void *opaque, void *addr)
 849      -{
 850      -}
 851      -
 852      -/*
 853  751   * Perform additional checks on the page to see if we can really use
 854  752   * it. The kernel (kas) pages are always set in the bitmap. However,
 855  753   * boot memory pages (prom_ppages or P_BOOTPAGES) are not in the
 856  754   * bitmap. So we check for them.
 857  755   */
 858  756  static inline int
 859  757  dump_pfn_check(pfn_t pfn)
 860  758  {
 861  759          page_t *pp = page_numtopp_nolock(pfn);
 862  760          if (pp == NULL || pp->p_pagenum != pfn ||
↓ open down ↓ 18 lines elided ↑ open up ↑
 881  779                  if (BT_TEST(dumpcfg.bitmap, start))
 882  780                          return (0);
 883  781                  if (!dump_pfn_check(pfn))
 884  782                          return (0);
 885  783          }
 886  784          return (1);
 887  785  }
 888  786  
 889  787  /*
 890  788   * dumpsys_get_maxmem() is called during panic. Find unused ranges
 891      - * and use them for buffers. If we find enough memory switch to
 892      - * parallel bzip2, otherwise use parallel lzjb.
 893      - *
      789 + * and use them for buffers.
 894  790   * It searches the dump bitmap in 2 passes. The first time it looks
 895  791   * for CBUF_MAPSIZE ranges. On the second pass it uses small pages.
 896  792   */
 897  793  static void
 898  794  dumpsys_get_maxmem()
 899  795  {
 900  796          dumpcfg_t *cfg = &dumpcfg;
 901  797          cbuf_t *endcp = &cfg->cbuf[cfg->ncbuf];
 902  798          helper_t *endhp = &cfg->helper[cfg->nhelper];
 903  799          pgcnt_t bitnum, end;
 904      -        size_t sz, endsz, bz2size;
      800 +        size_t sz, endsz;
 905  801          pfn_t pfn, off;
 906  802          cbuf_t *cp;
 907      -        helper_t *hp, *ohp;
      803 +        helper_t *hp;
 908  804          dumpmlw_t mlw;
 909  805          int k;
 910  806  
 911  807          /*
 912      -         * Setting dump_plat_mincpu to 0 at any time forces a serial
 913      -         * dump.
      808 +         * Setting dump_ncpu_low to 0 forces a single threaded dump.
 914  809           */
 915      -        if (dump_plat_mincpu == 0) {
 916      -                cfg->clevel = 0;
      810 +        if (dump_ncpu_low == 0) {
      811 +                cfg->clevel = DUMP_CLEVEL_SERIAL;
 917  812                  return;
 918  813          }
 919  814  
 920  815          /*
 921  816           * There may be no point in looking for spare memory. If
 922  817           * dumping all memory, then none is spare. If doing a serial
 923  818           * dump, then already have buffers.
 924  819           */
 925      -        if (cfg->maxsize == 0 || cfg->clevel < DUMP_CLEVEL_LZJB ||
      820 +        if (cfg->maxsize == 0 || cfg->clevel == DUMP_CLEVEL_SERIAL ||
 926  821              (dump_conflags & DUMP_ALL) != 0) {
 927      -                if (cfg->clevel > DUMP_CLEVEL_LZJB)
 928      -                        cfg->clevel = DUMP_CLEVEL_LZJB;
 929  822                  return;
 930  823          }
 931  824  
 932  825          sz = 0;
 933  826          cfg->found4m = 0;
 934  827          cfg->foundsm = 0;
 935  828  
 936  829          /* bitmap of ranges used to estimate which pfns are being used */
 937  830          bzero(dumpcfg.rbitmap, BT_SIZEOFMAP(dumpcfg.rbitmapsize));
 938  831  
↓ open down ↓ 63 lines elided ↑ open up ↑
1002  895                          hat_devload(kas.a_hat, cfg->maxvm + sz, PAGESIZE, pfn,
1003  896                              PROT_READ | PROT_WRITE, HAT_LOAD_NOCONSIST);
1004  897                          sz += PAGESIZE;
1005  898                          cfg->foundsm++;
1006  899                          dump_set_used(pfn);
1007  900                          if (sz >= cfg->maxsize)
1008  901                                  goto foundmax;
1009  902                  }
1010  903          }
1011  904  
1012      -        /* Fall back to lzjb if we did not get enough memory for bzip2. */
1013      -        endsz = (cfg->maxsize * cfg->threshold) / cfg->nhelper;
1014      -        if (sz < endsz) {
1015      -                cfg->clevel = DUMP_CLEVEL_LZJB;
1016      -        }
1017      -
1018  905          /* Allocate memory for as many helpers as we can. */
1019  906  foundmax:
1020  907  
1021  908          /* Byte offsets into memory found and mapped above */
1022  909          endsz = sz;
1023  910          sz = 0;
1024  911  
1025      -        /* Set the size for bzip2 state. Only bzip2 needs it. */
1026      -        bz2size = BZ2_bzCompressInitSize(dump_bzip2_level);
1027      -
1028  912          /* Skip the preallocate output buffers. */
1029  913          cp = &cfg->cbuf[MINCBUFS];
1030  914  
1031      -        /* Use this to move memory up from the preallocated helpers. */
1032      -        ohp = cfg->helper;
1033      -
1034  915          /* Loop over all helpers and allocate memory. */
1035  916          for (hp = cfg->helper; hp < endhp; hp++) {
1036  917  
1037  918                  /* Skip preallocated helpers by checking hp->page. */
1038  919                  if (hp->page == NULL) {
1039      -                        if (cfg->clevel <= DUMP_CLEVEL_LZJB) {
1040      -                                /* lzjb needs 2 1-page buffers */
1041      -                                if ((sz + (2 * PAGESIZE)) > endsz)
1042      -                                        break;
1043      -                                hp->page = cfg->maxvm + sz;
1044      -                                sz += PAGESIZE;
1045      -                                hp->lzbuf = cfg->maxvm + sz;
1046      -                                sz += PAGESIZE;
1047      -
1048      -                        } else if (ohp->lzbuf != NULL) {
1049      -                                /* re-use the preallocted lzjb page for bzip2 */
1050      -                                hp->page = ohp->lzbuf;
1051      -                                ohp->lzbuf = NULL;
1052      -                                ++ohp;
1053      -
1054      -                        } else {
1055      -                                /* bzip2 needs a 1-page buffer */
1056      -                                if ((sz + PAGESIZE) > endsz)
1057      -                                        break;
1058      -                                hp->page = cfg->maxvm + sz;
1059      -                                sz += PAGESIZE;
1060      -                        }
      920 +                        /* lzjb needs 2 1-page buffers */
      921 +                        if ((sz + (2 * PAGESIZE)) > endsz)
      922 +                                break;
      923 +                        hp->page = cfg->maxvm + sz;
      924 +                        sz += PAGESIZE;
      925 +                        hp->lzbuf = cfg->maxvm + sz;
      926 +                        sz += PAGESIZE;
1061  927                  }
1062  928  
1063  929                  /*
1064  930                   * Add output buffers per helper. The number of
1065  931                   * buffers per helper is determined by the ratio of
1066  932                   * ncbuf to nhelper.
1067  933                   */
1068  934                  for (k = 0; cp < endcp && (sz + CBUF_SIZE) <= endsz &&
1069  935                      k < NCBUF_PER_HELPER; k++) {
1070  936                          cp->state = CBUF_FREEBUF;
1071  937                          cp->size = CBUF_SIZE;
1072  938                          cp->buf = cfg->maxvm + sz;
1073  939                          sz += CBUF_SIZE;
1074  940                          ++cp;
1075  941                  }
1076      -
1077      -                /*
1078      -                 * bzip2 needs compression state. Use the dumpbzalloc
1079      -                 * and dumpbzfree callbacks to allocate the memory.
1080      -                 * bzip2 does allocation only at init time.
1081      -                 */
1082      -                if (cfg->clevel >= DUMP_CLEVEL_BZIP2) {
1083      -                        if ((sz + bz2size) > endsz) {
1084      -                                hp->page = NULL;
1085      -                                break;
1086      -                        } else {
1087      -                                hp->bzstream.opaque = &sz;
1088      -                                hp->bzstream.bzalloc = dumpbzalloc;
1089      -                                hp->bzstream.bzfree = dumpbzfree;
1090      -                                (void) BZ2_bzCompressInit(&hp->bzstream,
1091      -                                    dump_bzip2_level, 0, 0);
1092      -                                hp->bzstream.opaque = NULL;
1093      -                        }
1094      -                }
1095  942          }
1096  943  
1097  944          /* Finish allocating output buffers */
1098  945          for (; cp < endcp && (sz + CBUF_SIZE) <= endsz; cp++) {
1099  946                  cp->state = CBUF_FREEBUF;
1100  947                  cp->size = CBUF_SIZE;
1101  948                  cp->buf = cfg->maxvm + sz;
1102  949                  sz += CBUF_SIZE;
1103  950          }
1104  951  
↓ open down ↓ 835 lines elided ↑ open up ↑
1940 1787                          dumpsys_errmsg(hp, NULL);
1941 1788                          CQ_PUT(mainq, hp->cpin, CBUF_USEDMAP);
1942 1789                          hp->cpin = NULL;
1943 1790                  }
1944 1791          }
1945 1792  
1946 1793          return (hp->cpin != NULL);
1947 1794  }
1948 1795  
1949 1796  /*
1950      - * Compress size bytes starting at buf with bzip2
1951      - * mode:
1952      - *      BZ_RUN          add one more compressed page
1953      - *      BZ_FINISH       no more input, flush the state
1954      - */
1955      -static void
1956      -dumpsys_bzrun(helper_t *hp, void *buf, size_t size, int mode)
1957      -{
1958      -        dumpsync_t *ds = hp->ds;
1959      -        const int CSIZE = sizeof (dumpcsize_t);
1960      -        bz_stream *ps = &hp->bzstream;
1961      -        int rc = 0;
1962      -        uint32_t csize;
1963      -        dumpcsize_t cs;
1964      -
1965      -        /* Set input pointers to new input page */
1966      -        if (size > 0) {
1967      -                ps->avail_in = size;
1968      -                ps->next_in = buf;
1969      -        }
1970      -
1971      -        /* CONSTCOND */
1972      -        while (1) {
1973      -
1974      -                /* Quit when all input has been consumed */
1975      -                if (ps->avail_in == 0 && mode == BZ_RUN)
1976      -                        break;
1977      -
1978      -                /* Get a new output buffer */
1979      -                if (hp->cpout == NULL) {
1980      -                        HRSTART(hp->perpage, outwait);
1981      -                        hp->cpout = CQ_GET(freebufq);
1982      -                        HRSTOP(hp->perpage, outwait);
1983      -                        ps->avail_out = hp->cpout->size - CSIZE;
1984      -                        ps->next_out = hp->cpout->buf + CSIZE;
1985      -                }
1986      -
1987      -                /* Compress input, or finalize */
1988      -                HRSTART(hp->perpage, compress);
1989      -                rc = BZ2_bzCompress(ps, mode);
1990      -                HRSTOP(hp->perpage, compress);
1991      -
1992      -                /* Check for error */
1993      -                if (mode == BZ_RUN && rc != BZ_RUN_OK) {
1994      -                        dumpsys_errmsg(hp, "%d: BZ_RUN error %s at page %lx\n",
1995      -                            hp->helper, BZ2_bzErrorString(rc),
1996      -                            hp->cpin->pagenum);
1997      -                        break;
1998      -                }
1999      -
2000      -                /* Write the buffer if it is full, or we are flushing */
2001      -                if (ps->avail_out == 0 || mode == BZ_FINISH) {
2002      -                        csize = hp->cpout->size - CSIZE - ps->avail_out;
2003      -                        cs = DUMP_SET_TAG(csize, hp->tag);
2004      -                        if (csize > 0) {
2005      -                                (void) memcpy(hp->cpout->buf, &cs, CSIZE);
2006      -                                dumpsys_swrite(hp, hp->cpout, csize + CSIZE);
2007      -                                hp->cpout = NULL;
2008      -                        }
2009      -                }
2010      -
2011      -                /* Check for final complete */
2012      -                if (mode == BZ_FINISH) {
2013      -                        if (rc == BZ_STREAM_END)
2014      -                                break;
2015      -                        if (rc != BZ_FINISH_OK) {
2016      -                                dumpsys_errmsg(hp, "%d: BZ_FINISH error %s\n",
2017      -                                    hp->helper, BZ2_bzErrorString(rc));
2018      -                                break;
2019      -                        }
2020      -                }
2021      -        }
2022      -
2023      -        /* Cleanup state and buffers */
2024      -        if (mode == BZ_FINISH) {
2025      -
2026      -                /* Reset state so that it is re-usable. */
2027      -                (void) BZ2_bzCompressReset(&hp->bzstream);
2028      -
2029      -                /* Give any unused outout buffer to the main task */
2030      -                if (hp->cpout != NULL) {
2031      -                        hp->cpout->used = 0;
2032      -                        CQ_PUT(mainq, hp->cpout, CBUF_ERRMSG);
2033      -                        hp->cpout = NULL;
2034      -                }
2035      -        }
2036      -}
2037      -
2038      -static void
2039      -dumpsys_bz2compress(helper_t *hp)
2040      -{
2041      -        dumpsync_t *ds = hp->ds;
2042      -        dumpstreamhdr_t sh;
2043      -
2044      -        (void) strcpy(sh.stream_magic, DUMP_STREAM_MAGIC);
2045      -        sh.stream_pagenum = (pgcnt_t)-1;
2046      -        sh.stream_npages = 0;
2047      -        hp->cpin = NULL;
2048      -        hp->cpout = NULL;
2049      -        hp->cperr = NULL;
2050      -        hp->in = 0;
2051      -        hp->out = 0;
2052      -        hp->bzstream.avail_in = 0;
2053      -
2054      -        /* Bump reference to mainq while we are running */
2055      -        CQ_OPEN(mainq);
2056      -
2057      -        /* Get one page at a time */
2058      -        while (dumpsys_sread(hp)) {
2059      -                if (sh.stream_pagenum != hp->cpin->pagenum) {
2060      -                        sh.stream_pagenum = hp->cpin->pagenum;
2061      -                        sh.stream_npages = btop(hp->cpin->used);
2062      -                        dumpsys_bzrun(hp, &sh, sizeof (sh), BZ_RUN);
2063      -                }
2064      -                dumpsys_bzrun(hp, hp->page, PAGESIZE, 0);
2065      -        }
2066      -
2067      -        /* Done with input, flush any partial buffer */
2068      -        if (sh.stream_pagenum != (pgcnt_t)-1) {
2069      -                dumpsys_bzrun(hp, NULL, 0, BZ_FINISH);
2070      -                dumpsys_errmsg(hp, NULL);
2071      -        }
2072      -
2073      -        ASSERT(hp->cpin == NULL && hp->cpout == NULL && hp->cperr == NULL);
2074      -
2075      -        /* Decrement main queue count, we are done */
2076      -        CQ_CLOSE(mainq);
2077      -}
2078      -
2079      -/*
2080 1797   * Compress with lzjb
2081 1798   * write stream block if full or size==0
2082 1799   * if csize==0 write stream header, else write <csize, data>
2083 1800   * size==0 is a call to flush a buffer
2084 1801   * hp->cpout is the buffer we are flushing or filling
2085 1802   * hp->out is the next index to fill data
2086 1803   * osize is either csize+data, or the size of a stream header
2087 1804   */
2088 1805  static void
2089 1806  dumpsys_lzjbrun(helper_t *hp, size_t csize, void *buf, size_t size)
↓ open down ↓ 121 lines elided ↑ open up ↑
2211 1928  dumpsys_helper()
2212 1929  {
2213 1930          dumpsys_spinlock(&dumpcfg.helper_lock);
2214 1931          if (dumpcfg.helpers_wanted) {
2215 1932                  helper_t *hp, *hpend = &dumpcfg.helper[dumpcfg.nhelper];
2216 1933  
2217 1934                  for (hp = dumpcfg.helper; hp != hpend; hp++) {
2218 1935                          if (hp->helper == FREEHELPER) {
2219 1936                                  hp->helper = CPU->cpu_id;
2220 1937                                  BT_SET(dumpcfg.helpermap, CPU->cpu_seqid);
2221      -
2222 1938                                  dumpsys_spinunlock(&dumpcfg.helper_lock);
2223      -
2224      -                                if (dumpcfg.clevel < DUMP_CLEVEL_BZIP2)
2225      -                                        dumpsys_lzjbcompress(hp);
2226      -                                else
2227      -                                        dumpsys_bz2compress(hp);
2228      -
     1939 +                                dumpsys_lzjbcompress(hp);
2229 1940                                  hp->helper = DONEHELPER;
2230 1941                                  return;
2231 1942                          }
2232 1943                  }
2233 1944  
2234 1945                  /* No more helpers are needed. */
2235 1946                  dumpcfg.helpers_wanted = 0;
2236 1947  
2237 1948          }
2238 1949          dumpsys_spinunlock(&dumpcfg.helper_lock);
↓ open down ↓ 16 lines elided ↑ open up ↑
2255 1966  /*
2256 1967   * Dump helper for live dumps.
2257 1968   * These run as a system task.
2258 1969   */
2259 1970  static void
2260 1971  dumpsys_live_helper(void *arg)
2261 1972  {
2262 1973          helper_t *hp = arg;
2263 1974  
2264 1975          BT_ATOMIC_SET(dumpcfg.helpermap, CPU->cpu_seqid);
2265      -        if (dumpcfg.clevel < DUMP_CLEVEL_BZIP2)
2266      -                dumpsys_lzjbcompress(hp);
2267      -        else
2268      -                dumpsys_bz2compress(hp);
     1976 +        dumpsys_lzjbcompress(hp);
2269 1977  }
2270 1978  
2271 1979  /*
2272 1980   * Compress one page with lzjb (single threaded case)
2273 1981   */
2274 1982  static void
2275 1983  dumpsys_lzjb_page(helper_t *hp, cbuf_t *cp)
2276 1984  {
2277 1985          dumpsync_t *ds = hp->ds;
2278 1986          uint32_t csize;
↓ open down ↓ 26 lines elided ↑ open up ↑
2305 2013   */
2306 2014  static void
2307 2015  dumpsys_main_task(void *arg)
2308 2016  {
2309 2017          dumpsync_t *ds = arg;
2310 2018          pgcnt_t pagenum = 0, bitnum = 0, hibitnum;
2311 2019          dumpmlw_t mlw;
2312 2020          cbuf_t *cp;
2313 2021          pgcnt_t baseoff, pfnoff;
2314 2022          pfn_t base, pfn;
2315      -        boolean_t dumpserial;
2316 2023          int i;
2317 2024  
2318 2025          /*
2319 2026           * Fall back to serial mode if there are no helpers.
2320      -         * dump_plat_mincpu can be set to 0 at any time.
     2027 +         * dump_ncpu_low can be set to 0 at any time.
2321 2028           * dumpcfg.helpermap must contain at least one member.
2322 2029           *
2323 2030           * It is possible that the helpers haven't registered
2324 2031           * in helpermap yet; wait up to DUMP_HELPER_MAX_WAIT for
2325 2032           * at least one helper to register.
2326 2033           */
2327      -        dumpserial = B_TRUE;
2328      -        if (dump_plat_mincpu != 0 && dumpcfg.clevel != 0) {
     2034 +        if (dump_ncpu_low != 0 && dumpcfg.clevel != DUMP_CLEVEL_SERIAL) {
     2035 +                boolean_t dumpserial = B_TRUE;
2329 2036                  hrtime_t hrtmax = MSEC2NSEC(DUMP_HELPER_MAX_WAIT);
2330 2037                  hrtime_t hrtstart = gethrtime();
2331 2038  
2332 2039                  for (;;) {
2333 2040                          for (i = 0; i < BT_BITOUL(NCPU); ++i) {
2334 2041                                  if (dumpcfg.helpermap[i] != 0) {
2335 2042                                          dumpserial = B_FALSE;
2336 2043                                          break;
2337 2044                                  }
2338 2045                          }
2339 2046  
2340 2047                          if ((!dumpserial) ||
2341 2048                              ((gethrtime() - hrtstart) >= hrtmax)) {
2342 2049                                  break;
2343 2050                          }
2344 2051  
2345 2052                          SMT_PAUSE();
2346 2053                  }
2347 2054  
2348 2055                  if (dumpserial) {
2349      -                        dumpcfg.clevel = 0;
     2056 +                        dumpcfg.clevel = DUMP_CLEVEL_SERIAL;
2350 2057                          if (dumpcfg.helper[0].lzbuf == NULL) {
2351 2058                                  dumpcfg.helper[0].lzbuf =
2352 2059                                      dumpcfg.helper[1].page;
2353 2060                          }
2354 2061                  }
2355 2062          }
2356 2063  
2357 2064          dump_init_memlist_walker(&mlw);
2358 2065  
2359 2066          for (;;) {
↓ open down ↓ 127 lines elided ↑ open up ↑
2487 2194  
2488 2195                          ds->pages_mapped += btop(cp->size);
2489 2196                          ds->pages_used += pagenum - cp->pagenum;
2490 2197  
2491 2198                          CQ_OPEN(mainq);
2492 2199  
2493 2200                          /*
2494 2201                           * If there are no helpers the main task does
2495 2202                           * non-streams lzjb compress.
2496 2203                           */
2497      -                        if (dumpserial) {
     2204 +                        if (dumpcfg.clevel == DUMP_CLEVEL_SERIAL) {
2498 2205                                  dumpsys_lzjb_page(dumpcfg.helper, cp);
2499 2206                          } else {
2500 2207                                  /* pass mapped pages to a helper */
2501 2208                                  CQ_PUT(helperq, cp, CBUF_INREADY);
2502 2209                          }
2503 2210  
2504 2211                          /* the last page was done */
2505 2212                          if (bitnum >= dumpcfg.bitmapsize)
2506 2213                                  CQ_CLOSE(helperq);
2507 2214  
↓ open down ↓ 90 lines elided ↑ open up ↑
2598 2305                          P("\n");
2599 2306          }
2600 2307  
2601 2308          P("ncbuf_used,%d\n", cfg->ncbuf_used);
2602 2309          P("ncmap,%d\n", cfg->ncmap);
2603 2310  
2604 2311          P("Found %ldM ranges,%ld\n", (CBUF_MAPSIZE / DUMP_1MB), cfg->found4m);
2605 2312          P("Found small pages,%ld\n", cfg->foundsm);
2606 2313  
2607 2314          P("Compression level,%d\n", cfg->clevel);
2608      -        P("Compression type,%s %s", cfg->clevel == 0 ? "serial" : "parallel",
2609      -            cfg->clevel >= DUMP_CLEVEL_BZIP2 ? "bzip2" : "lzjb");
2610      -        if (cfg->clevel >= DUMP_CLEVEL_BZIP2)
2611      -                P(" (level %d)\n", dump_bzip2_level);
2612      -        else
2613      -                P("\n");
     2315 +        P("Compression type,%s lzjb\n",
     2316 +            cfg->clevel == DUMP_CLEVEL_SERIAL ? "serial" : "parallel");
2614 2317          P("Compression ratio,%d.%02d\n", compress_ratio / 100, compress_ratio %
2615 2318              100);
2616 2319          P("nhelper_used,%d\n", cfg->nhelper_used);
2617 2320  
2618 2321          P("Dump I/O rate MBS,%d.%02d\n", iorate / 100, iorate % 100);
2619 2322          P("..total bytes,%lld\n", (u_longlong_t)ds->nwrite);
2620 2323          P("..total nsec,%lld\n", (u_longlong_t)ds->iotime);
2621 2324          P("dumpbuf.iosize,%ld\n", dumpbuf.iosize);
2622 2325          P("dumpbuf.size,%ld\n", dumpbuf.size);
2623 2326  
↓ open down ↓ 240 lines elided ↑ open up ↑
2864 2567          dump_plat_pfn();
2865 2568  
2866 2569          /*
2867 2570           * Write out all the pages.
2868 2571           * Map pages, copy them handling UEs, compress, and write them out.
2869 2572           * Cooperate with any helpers running on CPUs in panic_idle().
2870 2573           */
2871 2574          dumphdr->dump_data = dumpvp_flush();
2872 2575  
2873 2576          bzero(dumpcfg.helpermap, BT_SIZEOFMAP(NCPU));
2874      -        ds->live = dumpcfg.clevel > 0 &&
     2577 +        ds->live = dumpcfg.clevel > DUMP_CLEVEL_SERIAL &&
2875 2578              (dumphdr->dump_flags & DF_LIVE) != 0;
2876 2579  
2877 2580          save_dump_clevel = dumpcfg.clevel;
2878 2581          if (panicstr)
2879 2582                  dumpsys_get_maxmem();
2880      -        else if (dumpcfg.clevel >= DUMP_CLEVEL_BZIP2)
2881      -                dumpcfg.clevel = DUMP_CLEVEL_LZJB;
2882 2583  
2883 2584          dumpcfg.nhelper_used = 0;
2884 2585          for (hp = dumpcfg.helper; hp != hpend; hp++) {
2885 2586                  if (hp->page == NULL) {
2886 2587                          hp->helper = DONEHELPER;
2887 2588                          continue;
2888 2589                  }
2889 2590                  ++dumpcfg.nhelper_used;
2890 2591                  hp->helper = FREEHELPER;
2891 2592                  hp->taskqid = NULL;
2892 2593                  hp->ds = ds;
2893 2594                  bzero(&hp->perpage, sizeof (hp->perpage));
2894      -                if (dumpcfg.clevel >= DUMP_CLEVEL_BZIP2)
2895      -                        (void) BZ2_bzCompressReset(&hp->bzstream);
2896 2595          }
2897 2596  
2898 2597          CQ_OPEN(freebufq);
2899 2598          CQ_OPEN(helperq);
2900 2599  
2901 2600          dumpcfg.ncbuf_used = 0;
2902 2601          for (cp = dumpcfg.cbuf; cp != &dumpcfg.cbuf[dumpcfg.ncbuf]; cp++) {
2903 2602                  if (cp->buf != NULL) {
2904 2603                          CQ_PUT(freebufq, cp, CBUF_FREEBUF);
2905 2604                          ++dumpcfg.ncbuf_used;
↓ open down ↓ 17 lines elided ↑ open up ↑
2923 2622                          if (hp->page == NULL)
2924 2623                                  continue;
2925 2624                          hp->helper = hp - dumpcfg.helper;
2926 2625                          hp->taskqid = taskq_dispatch(livetaskq,
2927 2626                              dumpsys_live_helper, (void *)hp, TQ_NOSLEEP);
2928 2627                  }
2929 2628  
2930 2629          } else {
2931 2630                  if (panicstr)
2932 2631                          kmem_dump_begin();
2933      -                dumpcfg.helpers_wanted = dumpcfg.clevel > 0;
     2632 +                dumpcfg.helpers_wanted = dumpcfg.clevel > DUMP_CLEVEL_SERIAL;
2934 2633                  dumpsys_spinunlock(&dumpcfg.helper_lock);
2935 2634          }
2936 2635  
2937 2636          /* run main task */
2938 2637          dumpsys_main_task(ds);
2939 2638  
2940 2639          ds->elapsed = gethrtime() - ds->start;
2941 2640          if (ds->elapsed < 1)
2942 2641                  ds->elapsed = 1;
2943 2642  
↓ open down ↓ 193 lines elided ↑ open up ↑
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX