Print this page
9694 Parallel dump hangs
Reviewed by: Roman Strashkin <roman.strashkin@nexenta.com>
Reviewed by: Yuri Pankov <yuri.pankov@nexenta.com>
Reviewed by: Evan Layton <evan.layton@nexenta.com>
Reviewed by: Sanjay Nadkarni <sanjay.nadkarni@nexenta.com>
Reviewed by: John Levon <levon@movementarian.org>

@@ -20,10 +20,11 @@
  */
 
 /*
  * Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved.
  * Copyright 2018 Joyent, Inc.
+ * Copyright 2018 Nexenta Systems, Inc. All rights reserved.
  */
 
 #include <sys/types.h>
 #include <sys/param.h>
 #include <sys/systm.h>

@@ -69,10 +70,11 @@
 #include <vm/pvn.h>
 #include <vm/seg.h>
 #include <vm/seg_kmem.h>
 #include <sys/clock_impl.h>
 #include <sys/hold_page.h>
+#include <sys/cpu.h>
 
 #include <bzip2/bzlib.h>
 
 #define ONE_GIG (1024 * 1024 * 1024UL)
 

@@ -438,10 +440,19 @@
 } dumpbuf_t;
 
 dumpbuf_t dumpbuf;              /* I/O buffer */
 
 /*
+ * For parallel dump, defines maximum time main task thread will wait
+ * for at least one helper to register in dumpcfg.helpermap, before
+ * assuming there are no helpers and falling back to serial mode.
+ * Value is chosen arbitrary and provides *really* long wait for any
+ * available helper to register.
+ */
+#define DUMP_HELPER_MAX_WAIT    1000    /* millisec */
+
+/*
  * The dump I/O buffer must be at least one page, at most xfer_size
  * bytes, and should scale with physmem in between.  The transfer size
  * passed in will either represent a global default (maxphys) or the
  * best size for the device.  The size of the dumpbuf I/O buffer is
  * limited by dumpbuf_limit (8MB by default) because the dump

@@ -2299,33 +2310,51 @@
         pgcnt_t pagenum = 0, bitnum = 0, hibitnum;
         dumpmlw_t mlw;
         cbuf_t *cp;
         pgcnt_t baseoff, pfnoff;
         pfn_t base, pfn;
-        int i, dumpserial;
+        boolean_t dumpserial;
+        int i;
 
         /*
          * Fall back to serial mode if there are no helpers.
          * dump_plat_mincpu can be set to 0 at any time.
          * dumpcfg.helpermap must contain at least one member.
+         *
+         * It is possible that the helpers haven't registered
+         * in helpermap yet; wait up to DUMP_HELPER_MAX_WAIT for
+         * at least one helper to register.
          */
-        dumpserial = 1;
-
+        dumpserial = B_TRUE;
         if (dump_plat_mincpu != 0 && dumpcfg.clevel != 0) {
+                hrtime_t hrtmax = MSEC2NSEC(DUMP_HELPER_MAX_WAIT);
+                hrtime_t hrtstart = gethrtime();
+
+                for (;;) {
                 for (i = 0; i < BT_BITOUL(NCPU); ++i) {
                         if (dumpcfg.helpermap[i] != 0) {
-                                dumpserial = 0;
+                                        dumpserial = B_FALSE;
                                 break;
                         }
                 }
+
+                        if ((!dumpserial) ||
+                            ((gethrtime() - hrtstart) >= hrtmax)) {
+                                break;
         }
 
+                        SMT_PAUSE();
+                }
+
         if (dumpserial) {
                 dumpcfg.clevel = 0;
-                if (dumpcfg.helper[0].lzbuf == NULL)
-                        dumpcfg.helper[0].lzbuf = dumpcfg.helper[1].page;
+                        if (dumpcfg.helper[0].lzbuf == NULL) {
+                                dumpcfg.helper[0].lzbuf =
+                                    dumpcfg.helper[1].page;
         }
+                }
+        }
 
         dump_init_memlist_walker(&mlw);
 
         for (;;) {
                 int sec = (gethrtime() - ds->start) / NANOSEC;

@@ -2465,15 +2494,14 @@
                          * If there are no helpers the main task does
                          * non-streams lzjb compress.
                          */
                         if (dumpserial) {
                                 dumpsys_lzjb_page(dumpcfg.helper, cp);
-                                break;
-                        }
-
+                        } else {
                         /* pass mapped pages to a helper */
                         CQ_PUT(helperq, cp, CBUF_INREADY);
+                        }
 
                         /* the last page was done */
                         if (bitnum >= dumpcfg.bitmapsize)
                                 CQ_CLOSE(helperq);
 

@@ -2575,12 +2603,16 @@
 
         P("Found %ldM ranges,%ld\n", (CBUF_MAPSIZE / DUMP_1MB), cfg->found4m);
         P("Found small pages,%ld\n", cfg->foundsm);
 
         P("Compression level,%d\n", cfg->clevel);
-        P("Compression type,%s %s\n", cfg->clevel == 0 ? "serial" : "parallel",
+        P("Compression type,%s %s", cfg->clevel == 0 ? "serial" : "parallel",
             cfg->clevel >= DUMP_CLEVEL_BZIP2 ? "bzip2" : "lzjb");
+        if (cfg->clevel >= DUMP_CLEVEL_BZIP2)
+                P(" (level %d)\n", dump_bzip2_level);
+        else
+                P("\n");
         P("Compression ratio,%d.%02d\n", compress_ratio / 100, compress_ratio %
             100);
         P("nhelper_used,%d\n", cfg->nhelper_used);
 
         P("Dump I/O rate MBS,%d.%02d\n", iorate / 100, iorate % 100);