Print this page
9709 Remove support for BZIP2 from dump
Reviewed by: Sanjay Nadkarni <sanjay.nadkarni@nexenta.com>
Reviewed by: Yuri Pankov <yuri.pankov@nexenta.com>


   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 1983, 2010, Oracle and/or its affiliates. All rights reserved.
  23  * Copyright 2016 Joyent, Inc.
  24  */
  25 /*
  26  * Copyright 2016 Nexenta Systems, Inc. All rights reserved.
  27  */
  28 
  29 #include <stdio.h>
  30 #include <stdlib.h>
  31 #include <stdarg.h>
  32 #include <unistd.h>
  33 #include <fcntl.h>
  34 #include <errno.h>
  35 #include <string.h>
  36 #include <deflt.h>
  37 #include <time.h>
  38 #include <syslog.h>
  39 #include <stropts.h>
  40 #include <pthread.h>
  41 #include <limits.h>
  42 #include <atomic.h>
  43 #include <libnvpair.h>
  44 #include <libintl.h>
  45 #include <sys/mem.h>
  46 #include <sys/statvfs.h>
  47 #include <sys/dumphdr.h>
  48 #include <sys/dumpadm.h>
  49 #include <sys/compress.h>
  50 #include <sys/panic.h>
  51 #include <sys/sysmacros.h>
  52 #include <sys/stat.h>
  53 #include <sys/resource.h>
  54 #include <bzip2/bzlib.h>
  55 #include <sys/fm/util.h>
  56 #include <fm/libfmevent.h>
  57 #include <sys/int_fmtio.h>
  58 
  59 
  60 /* fread/fwrite buffer size */
  61 #define FBUFSIZE                (1ULL << 20)
  62 
  63 /* minimum size for output buffering */
  64 #define MINCOREBLKSIZE          (1ULL << 17)
  65 
  66 /* create this file if metrics collection is enabled in the kernel */
  67 #define METRICSFILE "METRICS.csv"
  68 
  69 static char     progname[9] = "savecore";
  70 static char     *savedir;               /* savecore directory */
  71 static char     *dumpfile;              /* source of raw crash dump */
  72 static long     bounds = -1;            /* numeric suffix */
  73 static long     pagesize;               /* dump pagesize */
  74 static int      dumpfd = -1;            /* dumpfile descriptor */


 361                 dumpfd = Open(dumpfile, O_RDWR | O_DSYNC, 0644);
 362         endoff = llseek(dumpfd, -DUMP_OFFSET, SEEK_END) & -DUMP_OFFSET;
 363         Pread(dumpfd, &dumphdr, sizeof (dumphdr), endoff);
 364         Pread(dumpfd, &datahdr, sizeof (datahdr), endoff + sizeof (dumphdr));
 365 
 366         pagesize = dumphdr.dump_pagesize;
 367 
 368         if (dumphdr.dump_magic != DUMP_MAGIC)
 369                 logprint(SC_SL_NONE | SC_EXIT_PEND, "bad magic number %x",
 370                     dumphdr.dump_magic);
 371 
 372         if ((dumphdr.dump_flags & DF_VALID) == 0 && !disregard_valid_flag)
 373                 logprint(SC_SL_NONE | SC_IF_VERBOSE | SC_EXIT_OK,
 374                     "dump already processed");
 375 
 376         if (dumphdr.dump_version != DUMP_VERSION)
 377                 logprint(SC_SL_NONE | SC_IF_VERBOSE | SC_EXIT_PEND,
 378                     "dump version (%d) != %s version (%d)",
 379                     dumphdr.dump_version, progname, DUMP_VERSION);
 380 




 381         if (dumphdr.dump_wordsize != DUMP_WORDSIZE)
 382                 logprint(SC_SL_NONE | SC_EXIT_PEND,
 383                     "dump is from %u-bit kernel - cannot save on %u-bit kernel",
 384                     dumphdr.dump_wordsize, DUMP_WORDSIZE);
 385 
 386         if (datahdr.dump_datahdr_magic == DUMP_DATAHDR_MAGIC) {
 387                 if (datahdr.dump_datahdr_version != DUMP_DATAHDR_VERSION)
 388                         logprint(SC_SL_NONE | SC_IF_VERBOSE | SC_EXIT_PEND,
 389                             "dump data version (%d) != %s data version (%d)",
 390                             datahdr.dump_datahdr_version, progname,
 391                             DUMP_DATAHDR_VERSION);
 392         } else {
 393                 (void) memset(&datahdr, 0, sizeof (datahdr));
 394                 datahdr.dump_maxcsize = pagesize;
 395         }
 396 
 397         /*
 398          * Read the initial header, clear the valid bits, and compare headers.
 399          * The main header may have been overwritten by swapping if we're
 400          * using a swap partition as the dump device, in which case we bail.


 683         int size;
 684 };
 685 
 686 typedef enum streamstate {
 687         STREAMSTART,
 688         STREAMPAGES
 689 } streamstate_t;
 690 
 691 typedef struct stream {
 692         streamstate_t state;
 693         int init;
 694         int tag;
 695         int bound;
 696         int nout;
 697         char *blkbuf;
 698         blockhdr_t blocks;
 699         pgcnt_t pagenum;
 700         pgcnt_t curpage;
 701         pgcnt_t npages;
 702         pgcnt_t done;
 703         bz_stream strm;
 704         dumpcsize_t sc;
 705         dumpstreamhdr_t sh;
 706 } stream_t;
 707 
 708 static stream_t *streams;
 709 static stream_t *endstreams;
 710 
 711 const int cs = sizeof (dumpcsize_t);
 712 
 713 typedef struct tinfo {
 714         pthread_t tid;
 715         int corefd;
 716 } tinfo_t;
 717 
 718 static int threads_stop;
 719 static int threads_active;
 720 static tinfo_t *tinfo;
 721 static tinfo_t *endtinfo;
 722 
 723 static pthread_mutex_t lock = PTHREAD_MUTEX_INITIALIZER;


 946                                 doflush = 1;
 947                                 atomic_inc_64(&zpages);
 948                         } else if (++s->nout >= BTOP(coreblksize) ||
 949                             isblkbnd(s->curpage + s->nout)) {
 950                                 doflush = 1;
 951                         }
 952                         if (++s->done >= s->npages) {
 953                                 s->state = STREAMSTART;
 954                                 doflush = 1;
 955                         }
 956                         if (doflush) {
 957                                 putpage(corefd, s->blkbuf, s->curpage, s->nout);
 958                                 s->nout = 0;
 959                                 s->curpage = s->pagenum + s->done;
 960                         }
 961                         break;
 962                 }
 963         }
 964 }
 965 
 966 /* bzlib library reports errors with this callback */
 967 void
 968 bz_internal_error(int errcode)
 969 {
 970         logprint(SC_SL_ERR | SC_EXIT_ERR, "bz_internal_error: err %s\n",
 971             BZ2_bzErrorString(errcode));
 972 }
 973 
 974 /*
 975  * Return one object in the stream.
 976  *
 977  * An object (stream header or page) will likely span an input block
 978  * of compression data. Return non-zero when an entire object has been
 979  * retrieved from the stream.
 980  */
 981 static int
 982 bz2decompress(stream_t *s, void *buf, size_t size)
 983 {
 984         int rc;
 985 
 986         if (s->strm.avail_out == 0) {
 987                 s->strm.next_out = buf;
 988                 s->strm.avail_out = size;
 989         }
 990         while (s->strm.avail_in > 0) {
 991                 rc = BZ2_bzDecompress(&s->strm);
 992                 if (rc == BZ_STREAM_END) {
 993                         rc = BZ2_bzDecompressReset(&s->strm);
 994                         if (rc != BZ_OK)
 995                                 logprint(SC_SL_ERR | SC_EXIT_ERR,
 996                                     "BZ2_bzDecompressReset: %s",
 997                                     BZ2_bzErrorString(rc));
 998                         continue;
 999                 }
1000 
1001                 if (s->strm.avail_out == 0)
1002                         break;
1003         }
1004         return (s->strm.avail_out == 0);
1005 }
1006 
1007 /*
1008  * Process one bzip2 block.
1009  * The interface is documented here:
1010  * http://www.bzip.org/1.0.5/bzip2-manual-1.0.5.html
1011  */
1012 static void
1013 bz2block(int corefd, stream_t *s, char *block, size_t blocksz)
1014 {
1015         int rc = 0;
1016         int doflush;
1017         char *out;
1018 
1019         if (!s->init) {
1020                 s->init = 1;
1021                 rc = BZ2_bzDecompressInit(&s->strm, 0, 0);
1022                 if (rc != BZ_OK)
1023                         logprint(SC_SL_ERR | SC_EXIT_ERR,
1024                             "BZ2_bzDecompressInit: %s", BZ2_bzErrorString(rc));
1025                 if (s->blkbuf == NULL)
1026                         s->blkbuf = Zalloc(coreblksize);
1027                 s->strm.avail_out = 0;
1028                 s->state = STREAMSTART;
1029         }
1030         s->strm.next_in = block;
1031         s->strm.avail_in = blocksz;
1032 
1033         while (s->strm.avail_in > 0) {
1034                 switch (s->state) {
1035                 case STREAMSTART:
1036                         if (!bz2decompress(s, &s->sh, sizeof (s->sh)))
1037                                 return;
1038                         if (strcmp(DUMP_STREAM_MAGIC, s->sh.stream_magic) != 0)
1039                                 logprint(SC_SL_ERR | SC_EXIT_ERR,
1040                                     "BZ2 STREAMSTART: bad stream header");
1041                         if (s->sh.stream_npages > datahdr.dump_maxrange)
1042                                 logprint(SC_SL_ERR | SC_EXIT_ERR,
1043                                     "BZ2 STREAMSTART: bad range: %d > %d",
1044                                     s->sh.stream_npages, datahdr.dump_maxrange);
1045                         s->pagenum = s->sh.stream_pagenum;
1046                         s->npages = s->sh.stream_npages;
1047                         s->curpage = s->pagenum;
1048                         s->nout = 0;
1049                         s->done = 0;
1050                         s->state = STREAMPAGES;
1051                         break;
1052                 case STREAMPAGES:
1053                         out = s->blkbuf + PTOB(s->nout);
1054                         if (!bz2decompress(s, out, pagesize))
1055                                 return;
1056 
1057                         atomic_inc_64(&saved);
1058 
1059                         doflush = 0;
1060                         if (s->nout == 0 && iszpage(out)) {
1061                                 doflush = 1;
1062                                 atomic_inc_64(&zpages);
1063                         } else if (++s->nout >= BTOP(coreblksize) ||
1064                             isblkbnd(s->curpage + s->nout)) {
1065                                 doflush = 1;
1066                         }
1067                         if (++s->done >= s->npages) {
1068                                 s->state = STREAMSTART;
1069                                 doflush = 1;
1070                         }
1071                         if (doflush) {
1072                                 putpage(corefd, s->blkbuf, s->curpage, s->nout);
1073                                 s->nout = 0;
1074                                 s->curpage = s->pagenum + s->done;
1075                         }
1076                         break;
1077                 }
1078         }
1079 }
1080 
1081 /* report progress */
1082 static void
1083 report_progress()
1084 {
1085         int sec, percent;
1086 
1087         if (!interactive)
1088                 return;
1089 
1090         percent = saved * 100LL / corehdr.dump_npages;
1091         sec = (gethrtime() - startts) / NANOSEC;
1092         if (percent > percent_done || sec > sec_done) {
1093                 (void) printf("\r%2d:%02d %3d%% done", sec / 60, sec % 60,
1094                     percent);
1095                 (void) fflush(stdout);
1096                 sec_done = sec;
1097                 percent_done = percent;
1098         }
1099 }
1100 


1103 runstreams(void *arg)
1104 {
1105         tinfo_t *t = arg;
1106         stream_t *s;
1107         block_t *b;
1108         int bound;
1109 
1110         (void) pthread_mutex_lock(&lock);
1111         while (!threads_stop) {
1112                 bound = 0;
1113                 for (s = streams; s != endstreams; s++) {
1114                         if (s->bound || s->blocks.head == NULL)
1115                                 continue;
1116                         s->bound = 1;
1117                         bound = 1;
1118                         (void) pthread_cond_signal(&cvwork);
1119                         while (s->blocks.head != NULL) {
1120                                 b = deqh(&s->blocks);
1121                                 (void) pthread_mutex_unlock(&lock);
1122 
1123                                 if (datahdr.dump_clevel < DUMP_CLEVEL_BZIP2)
1124                                         lzjbblock(t->corefd, s, b->block,
1125                                             b->size);
1126                                 else
1127                                         bz2block(t->corefd, s, b->block,
1128                                             b->size);
1129 
1130                                 (void) pthread_mutex_lock(&lock);
1131                                 enqt(&freeblocks, b);
1132                                 (void) pthread_cond_signal(&cvfree);
1133 
1134                                 report_progress();
1135                         }
1136                         s->bound = 0;
1137                         (void) pthread_cond_signal(&cvbarrier);
1138                 }
1139                 if (!bound && !threads_stop)
1140                         (void) pthread_cond_wait(&cvwork, &lock);
1141         }
1142         (void) close(t->corefd);
1143         (void) pthread_cond_signal(&cvwork);
1144         (void) pthread_mutex_unlock(&lock);
1145         return (arg);
1146 }
1147 
1148 /*
1149  * Process compressed pages.
1150  *
1151  * The old format, now called single-threaded lzjb, is a 32-bit size
1152  * word followed by 'size' bytes of lzjb compression data for one
1153  * page. The new format extends this by storing a 12-bit "tag" in the
1154  * upper bits of the size word. When the size word is pagesize or
1155  * less, it is assumed to be one lzjb page. When the size word is
1156  * greater than pagesize, it is assumed to be a "stream block",
1157  * belonging to up to 4095 streams. In practice, the number of streams
1158  * is set to one less than the number of CPUs running at crash
1159  * time. One CPU processes the crash dump, the remaining CPUs
1160  * separately process groups of data pages.
1161  *
1162  * savecore creates a thread per stream, but never more threads than
1163  * the number of CPUs running savecore. This is because savecore can
1164  * be processing a crash file from a remote machine, which may have
1165  * more CPUs.
1166  *
1167  * When the kernel uses parallel lzjb or parallel bzip2, we expect a
1168  * series of 128KB blocks of compression data. In this case, each
1169  * block has a "tag", in the range 1-4095. Each block is handed off to
1170  * to the threads running "runstreams". The dump format is either lzjb
1171  * or bzip2, never a mixture. These threads, in turn, process the
1172  * compression data for groups of pages. Groups of pages are delimited
1173  * by a "stream header", which indicates a starting pfn and number of
1174  * pages. When a stream block has been read, the condition variable
1175  * "cvwork" is signalled, which causes one of the avaiable threads to
1176  * wake up and process the stream.
1177  *
1178  * In the parallel case there will be streams blocks encoding all data
1179  * pages. The stream of blocks is terminated by a zero size
1180  * word. There can be a few lzjb pages tacked on the end, depending on
1181  * the architecture. The sbarrier function ensures that all stream
1182  * blocks have been processed so that the page number for the few
1183  * single pages at the end can be known.
1184  */
1185 static void
1186 decompress_pages(int corefd)
1187 {
1188         char *cpage = NULL;
1189         char *dpage = NULL;
1190         char *out;
1191         pgcnt_t curpage = 0;
1192         block_t *b;
1193         FILE *dumpf;
1194         FILE *tracef = NULL;
1195         stream_t *s;
1196         size_t dsize;




   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 1983, 2010, Oracle and/or its affiliates. All rights reserved.
  23  * Copyright 2016 Joyent, Inc.
  24  */
  25 /*
  26  * Copyright 2018 Nexenta Systems, Inc. All rights reserved.
  27  */
  28 
  29 #include <stdio.h>
  30 #include <stdlib.h>
  31 #include <stdarg.h>
  32 #include <unistd.h>
  33 #include <fcntl.h>
  34 #include <errno.h>
  35 #include <string.h>
  36 #include <deflt.h>
  37 #include <time.h>
  38 #include <syslog.h>
  39 #include <stropts.h>
  40 #include <pthread.h>
  41 #include <limits.h>
  42 #include <atomic.h>
  43 #include <libnvpair.h>
  44 #include <libintl.h>
  45 #include <sys/mem.h>
  46 #include <sys/statvfs.h>
  47 #include <sys/dumphdr.h>
  48 #include <sys/dumpadm.h>
  49 #include <sys/compress.h>
  50 #include <sys/panic.h>
  51 #include <sys/sysmacros.h>
  52 #include <sys/stat.h>
  53 #include <sys/resource.h>

  54 #include <sys/fm/util.h>
  55 #include <fm/libfmevent.h>
  56 #include <sys/int_fmtio.h>
  57 
  58 
  59 /* fread/fwrite buffer size */
  60 #define FBUFSIZE                (1ULL << 20)
  61 
  62 /* minimum size for output buffering */
  63 #define MINCOREBLKSIZE          (1ULL << 17)
  64 
  65 /* create this file if metrics collection is enabled in the kernel */
  66 #define METRICSFILE "METRICS.csv"
  67 
  68 static char     progname[9] = "savecore";
  69 static char     *savedir;               /* savecore directory */
  70 static char     *dumpfile;              /* source of raw crash dump */
  71 static long     bounds = -1;            /* numeric suffix */
  72 static long     pagesize;               /* dump pagesize */
  73 static int      dumpfd = -1;            /* dumpfile descriptor */


 360                 dumpfd = Open(dumpfile, O_RDWR | O_DSYNC, 0644);
 361         endoff = llseek(dumpfd, -DUMP_OFFSET, SEEK_END) & -DUMP_OFFSET;
 362         Pread(dumpfd, &dumphdr, sizeof (dumphdr), endoff);
 363         Pread(dumpfd, &datahdr, sizeof (datahdr), endoff + sizeof (dumphdr));
 364 
 365         pagesize = dumphdr.dump_pagesize;
 366 
 367         if (dumphdr.dump_magic != DUMP_MAGIC)
 368                 logprint(SC_SL_NONE | SC_EXIT_PEND, "bad magic number %x",
 369                     dumphdr.dump_magic);
 370 
 371         if ((dumphdr.dump_flags & DF_VALID) == 0 && !disregard_valid_flag)
 372                 logprint(SC_SL_NONE | SC_IF_VERBOSE | SC_EXIT_OK,
 373                     "dump already processed");
 374 
 375         if (dumphdr.dump_version != DUMP_VERSION)
 376                 logprint(SC_SL_NONE | SC_IF_VERBOSE | SC_EXIT_PEND,
 377                     "dump version (%d) != %s version (%d)",
 378                     dumphdr.dump_version, progname, DUMP_VERSION);
 379 
 380         if (datahdr.dump_clevel > DUMP_CLEVEL_LZJB)
 381                 logprint(SC_SL_NONE | SC_EXIT_PEND,
 382                     "unsupported compression format (%d)", datahdr.dump_clevel);
 383 
 384         if (dumphdr.dump_wordsize != DUMP_WORDSIZE)
 385                 logprint(SC_SL_NONE | SC_EXIT_PEND,
 386                     "dump is from %u-bit kernel - cannot save on %u-bit kernel",
 387                     dumphdr.dump_wordsize, DUMP_WORDSIZE);
 388 
 389         if (datahdr.dump_datahdr_magic == DUMP_DATAHDR_MAGIC) {
 390                 if (datahdr.dump_datahdr_version != DUMP_DATAHDR_VERSION)
 391                         logprint(SC_SL_NONE | SC_IF_VERBOSE | SC_EXIT_PEND,
 392                             "dump data version (%d) != %s data version (%d)",
 393                             datahdr.dump_datahdr_version, progname,
 394                             DUMP_DATAHDR_VERSION);
 395         } else {
 396                 (void) memset(&datahdr, 0, sizeof (datahdr));
 397                 datahdr.dump_maxcsize = pagesize;
 398         }
 399 
 400         /*
 401          * Read the initial header, clear the valid bits, and compare headers.
 402          * The main header may have been overwritten by swapping if we're
 403          * using a swap partition as the dump device, in which case we bail.


 686         int size;
 687 };
 688 
 689 typedef enum streamstate {
 690         STREAMSTART,
 691         STREAMPAGES
 692 } streamstate_t;
 693 
 694 typedef struct stream {
 695         streamstate_t state;
 696         int init;
 697         int tag;
 698         int bound;
 699         int nout;
 700         char *blkbuf;
 701         blockhdr_t blocks;
 702         pgcnt_t pagenum;
 703         pgcnt_t curpage;
 704         pgcnt_t npages;
 705         pgcnt_t done;

 706         dumpcsize_t sc;
 707         dumpstreamhdr_t sh;
 708 } stream_t;
 709 
 710 static stream_t *streams;
 711 static stream_t *endstreams;
 712 
 713 const int cs = sizeof (dumpcsize_t);
 714 
 715 typedef struct tinfo {
 716         pthread_t tid;
 717         int corefd;
 718 } tinfo_t;
 719 
 720 static int threads_stop;
 721 static int threads_active;
 722 static tinfo_t *tinfo;
 723 static tinfo_t *endtinfo;
 724 
 725 static pthread_mutex_t lock = PTHREAD_MUTEX_INITIALIZER;


 948                                 doflush = 1;
 949                                 atomic_inc_64(&zpages);
 950                         } else if (++s->nout >= BTOP(coreblksize) ||
 951                             isblkbnd(s->curpage + s->nout)) {
 952                                 doflush = 1;
 953                         }
 954                         if (++s->done >= s->npages) {
 955                                 s->state = STREAMSTART;
 956                                 doflush = 1;
 957                         }
 958                         if (doflush) {
 959                                 putpage(corefd, s->blkbuf, s->curpage, s->nout);
 960                                 s->nout = 0;
 961                                 s->curpage = s->pagenum + s->done;
 962                         }
 963                         break;
 964                 }
 965         }
 966 }
 967 



















































































































 968 /* report progress */
 969 static void
 970 report_progress()
 971 {
 972         int sec, percent;
 973 
 974         if (!interactive)
 975                 return;
 976 
 977         percent = saved * 100LL / corehdr.dump_npages;
 978         sec = (gethrtime() - startts) / NANOSEC;
 979         if (percent > percent_done || sec > sec_done) {
 980                 (void) printf("\r%2d:%02d %3d%% done", sec / 60, sec % 60,
 981                     percent);
 982                 (void) fflush(stdout);
 983                 sec_done = sec;
 984                 percent_done = percent;
 985         }
 986 }
 987 


 990 runstreams(void *arg)
 991 {
 992         tinfo_t *t = arg;
 993         stream_t *s;
 994         block_t *b;
 995         int bound;
 996 
 997         (void) pthread_mutex_lock(&lock);
 998         while (!threads_stop) {
 999                 bound = 0;
1000                 for (s = streams; s != endstreams; s++) {
1001                         if (s->bound || s->blocks.head == NULL)
1002                                 continue;
1003                         s->bound = 1;
1004                         bound = 1;
1005                         (void) pthread_cond_signal(&cvwork);
1006                         while (s->blocks.head != NULL) {
1007                                 b = deqh(&s->blocks);
1008                                 (void) pthread_mutex_unlock(&lock);
1009 

1010                                 lzjbblock(t->corefd, s, b->block,
1011                                     b->size);



1012 
1013                                 (void) pthread_mutex_lock(&lock);
1014                                 enqt(&freeblocks, b);
1015                                 (void) pthread_cond_signal(&cvfree);
1016 
1017                                 report_progress();
1018                         }
1019                         s->bound = 0;
1020                         (void) pthread_cond_signal(&cvbarrier);
1021                 }
1022                 if (!bound && !threads_stop)
1023                         (void) pthread_cond_wait(&cvwork, &lock);
1024         }
1025         (void) close(t->corefd);
1026         (void) pthread_cond_signal(&cvwork);
1027         (void) pthread_mutex_unlock(&lock);
1028         return (arg);
1029 }
1030 
1031 /*
1032  * Process compressed pages.
1033  *
1034  * The old format, now called single-threaded lzjb, is a 32-bit size
1035  * word followed by 'size' bytes of lzjb compression data for one
1036  * page. The new format extends this by storing a 12-bit "tag" in the
1037  * upper bits of the size word. When the size word is pagesize or
1038  * less, it is assumed to be one lzjb page. When the size word is
1039  * greater than pagesize, it is assumed to be a "stream block",
1040  * belonging to up to 4095 streams. In practice, the number of streams
1041  * is set to one less than the number of CPUs running at crash
1042  * time. One CPU processes the crash dump, the remaining CPUs
1043  * separately process groups of data pages.
1044  *
1045  * savecore creates a thread per stream, but never more threads than
1046  * the number of CPUs running savecore. This is because savecore can
1047  * be processing a crash file from a remote machine, which may have
1048  * more CPUs.
1049  *
1050  * When the kernel uses parallel compression we expect a series of 128KB
1051  * blocks of compression data. In this case, each block has a "tag" in
1052  * the range 1-4095. Each block is handed off to the threads running
1053  * "runstreams". These threads, in turn, process the compression data
1054  * for groups of pages. Groups of pages are delimited by a "stream header",
1055  * which indicates a starting pfn and number of pages. When a stream block
1056  * has been read, the condition variable "cvwork" is signalled, which causes
1057  * one of the available threads to wake up and process the stream.


1058  *
1059  * In the parallel case there will be streams blocks encoding all data
1060  * pages. The stream of blocks is terminated by a zero size
1061  * word. There can be a few lzjb pages tacked on the end, depending on
1062  * the architecture. The sbarrier function ensures that all stream
1063  * blocks have been processed so that the page number for the few
1064  * single pages at the end can be known.
1065  */
1066 static void
1067 decompress_pages(int corefd)
1068 {
1069         char *cpage = NULL;
1070         char *dpage = NULL;
1071         char *out;
1072         pgcnt_t curpage = 0;
1073         block_t *b;
1074         FILE *dumpf;
1075         FILE *tracef = NULL;
1076         stream_t *s;
1077         size_t dsize;