Print this page
3741 zfs needs better comments
Submitted by:   Will Andrews <willa@spectralogic.com>
Submitted by:   Justin Gibbs <justing@spectralogic.com>
Submitted by:   Alan Somers <alans@spectralogic.com>
Reviewed by:    Matthew Ahrens <mahrens@delphix.com>

@@ -429,27 +429,54 @@
 static const zio_vsd_ops_t vdev_raidz_vsd_ops = {
         vdev_raidz_map_free_vsd,
         vdev_raidz_cksum_report
 };
 
+/*
+ * Divides the IO evenly across all child vdevs; usually, dcols is
+ * the number of children in the target vdev.
+ */
 static raidz_map_t *
 vdev_raidz_map_alloc(zio_t *zio, uint64_t unit_shift, uint64_t dcols,
     uint64_t nparity)
 {
         raidz_map_t *rm;
+        /* The starting RAIDZ (parent) vdev sector of the block. */
         uint64_t b = zio->io_offset >> unit_shift;
+        /* The zio's size in units of the vdev's minimum sector size */
         uint64_t s = zio->io_size >> unit_shift;
+        /* The first column for this stripe. */
         uint64_t f = b % dcols;
+        /* The starting byte offset on each child vdev. */
         uint64_t o = (b / dcols) << unit_shift;
         uint64_t q, r, c, bc, col, acols, scols, coff, devidx, asize, tot;
 
+        /*
+         * "Quotient": The number of data sectors for this stripe on all but
+         * the "big column" child vdevs that also contain "remainder" data.
+         */
         q = s / (dcols - nparity);
+
+        /*
+         * "Remainder": The number of partial stripe data sectors in this I/O.
+         * This will add a sector to some, but not all, child vdevs.
+         */
         r = s - q * (dcols - nparity);
+
+        /* The number of "big columns" - those which contain remainder data. */
         bc = (r == 0 ? 0 : r + nparity);
+
+        /*
+         * The total number of data and parity sectors associated with
+         * this I/O.
+         */
         tot = s + nparity * (q + (r == 0 ? 0 : 1));
 
+        /* acols: The columns that will be accessed. */
+        /* scols: The columns that will be accessed or skipped. */
         if (q == 0) {
+                /* Our I/O request doesn't span all child vdevs. */
                 acols = bc;
                 scols = MIN(dcols, roundup(bc, nparity + 1));
         } else {
                 acols = dcols;
                 scols = dcols;

@@ -1519,10 +1546,27 @@
         rc->rc_error = zio->io_error;
         rc->rc_tried = 1;
         rc->rc_skipped = 0;
 }
 
+/*
+ * Start an IO operation on a RAIDZ VDev
+ *
+ * Outline:
+ * - For write operations:
+ *   1. Generate the parity data
+ *   2. Create child zio write operations to each column's vdev, for both
+ *      data and parity.
+ *   3. If the column skips any sectors for padding, create optional dummy
+ *      write zio children for those areas to improve aggregation continuity.
+ * - For read operations:
+ *   1. Create child zio read operations to each data column's vdev to read
+ *      the range of data required for zio.
+ *   2. If this is a scrub or resilver operation, or if any of the data
+ *      vdevs have had errors, then create zio read operations to the parity
+ *      columns' VDevs as well.
+ */
 static int
 vdev_raidz_io_start(zio_t *zio)
 {
         vdev_t *vd = zio->io_vd;
         vdev_t *tvd = vd->vdev_top;

@@ -1859,10 +1903,31 @@
         }
 
         return (ret);
 }
 
+/*
+ * Complete an IO operation on a RAIDZ VDev
+ *
+ * Outline:
+ * - For write operations:
+ *   1. Check for errors on the child IOs.
+ *   2. Return, setting an error code if too few child VDevs were written
+ *      to reconstruct the data later.  Note that partial writes are
+ *      considered successful if they can be reconstructed at all.
+ * - For read operations:
+ *   1. Check for errors on the child IOs.
+ *   2. If data errors occurred:
+ *      a. Try to reassemble the data from the parity available.
+ *      b. If we haven't yet read the parity drives, read them now.
+ *      c. If all parity drives have been read but the data still doesn't
+ *         reassemble with a correct checksum, then try combinatorial
+ *         reconstruction.
+ *      d. If that doesn't work, return an error.
+ *   3. If there were unexpected errors or this is a resilver operation,
+ *      rewrite the vdevs that had errors.
+ */
 static void
 vdev_raidz_io_done(zio_t *zio)
 {
         vdev_t *vd = zio->io_vd;
         vdev_t *cvd;