1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  */
  25 /*
  26  * Copyright (c) 2013 by Delphix. All rights reserved.
  27  */
  28 
  29 #include <sys/zfs_context.h>
  30 #include <sys/range_tree.h>
  31 #include <sys/space_reftree.h>
  32 
  33 /*
  34  * Space reference trees.
  35  *
  36  * A range tree is a collection of integers.  Every integer is either
  37  * in the tree, or it's not.  A space reference tree generalizes
  38  * the idea: it allows its members to have arbitrary reference counts,
  39  * as opposed to the implicit reference count of 0 or 1 in a range tree.
  40  * This representation comes in handy when computing the union or
  41  * intersection of multiple space maps.  For example, the union of
  42  * N range trees is the subset of the reference tree with refcnt >= 1.
  43  * The intersection of N range trees is the subset with refcnt >= N.
  44  *
  45  * [It's very much like a Fourier transform.  Unions and intersections
  46  * are hard to perform in the 'range tree domain', so we convert the trees
  47  * into the 'reference count domain', where it's trivial, then invert.]
  48  *
  49  * vdev_dtl_reassess() uses computations of this form to determine
  50  * DTL_MISSING and DTL_OUTAGE for interior vdevs -- e.g. a RAID-Z vdev
  51  * has an outage wherever refcnt >= vdev_nparity + 1, and a mirror vdev
  52  * has an outage wherever refcnt >= vdev_children.
  53  */
  54 static int
  55 space_reftree_compare(const void *x1, const void *x2)
  56 {
  57         const space_ref_t *sr1 = x1;
  58         const space_ref_t *sr2 = x2;
  59 
  60         if (sr1->sr_offset < sr2->sr_offset)
  61                 return (-1);
  62         if (sr1->sr_offset > sr2->sr_offset)
  63                 return (1);
  64 
  65         if (sr1 < sr2)
  66                 return (-1);
  67         if (sr1 > sr2)
  68                 return (1);
  69 
  70         return (0);
  71 }
  72 
  73 void
  74 space_reftree_create(avl_tree_t *t)
  75 {
  76         avl_create(t, space_reftree_compare,
  77             sizeof (space_ref_t), offsetof(space_ref_t, sr_node));
  78 }
  79 
  80 void
  81 space_reftree_destroy(avl_tree_t *t)
  82 {
  83         space_ref_t *sr;
  84         void *cookie = NULL;
  85 
  86         while ((sr = avl_destroy_nodes(t, &cookie)) != NULL)
  87                 kmem_free(sr, sizeof (*sr));
  88 
  89         avl_destroy(t);
  90 }
  91 
  92 static void
  93 space_reftree_add_node(avl_tree_t *t, uint64_t offset, int64_t refcnt)
  94 {
  95         space_ref_t *sr;
  96 
  97         sr = kmem_alloc(sizeof (*sr), KM_SLEEP);
  98         sr->sr_offset = offset;
  99         sr->sr_refcnt = refcnt;
 100 
 101         avl_add(t, sr);
 102 }
 103 
 104 void
 105 space_reftree_add_seg(avl_tree_t *t, uint64_t start, uint64_t end,
 106         int64_t refcnt)
 107 {
 108         space_reftree_add_node(t, start, refcnt);
 109         space_reftree_add_node(t, end, -refcnt);
 110 }
 111 
 112 /*
 113  * Convert (or add) a range tree into a reference tree.
 114  */
 115 void
 116 space_reftree_add_map(avl_tree_t *t, range_tree_t *rt, int64_t refcnt)
 117 {
 118         range_seg_t *rs;
 119 
 120         ASSERT(MUTEX_HELD(rt->rt_lock));
 121 
 122         for (rs = avl_first(&rt->rt_root); rs; rs = AVL_NEXT(&rt->rt_root, rs))
 123                 space_reftree_add_seg(t, rs->rs_start, rs->rs_end, refcnt);
 124 }
 125 
 126 /*
 127  * Convert a reference tree into a range tree.  The range tree will contain
 128  * all members of the reference tree for which refcnt >= minref.
 129  */
 130 void
 131 space_reftree_generate_map(avl_tree_t *t, range_tree_t *rt, int64_t minref)
 132 {
 133         uint64_t start = -1ULL;
 134         int64_t refcnt = 0;
 135         space_ref_t *sr;
 136 
 137         ASSERT(MUTEX_HELD(rt->rt_lock));
 138 
 139         range_tree_vacate(rt, NULL, NULL);
 140 
 141         for (sr = avl_first(t); sr != NULL; sr = AVL_NEXT(t, sr)) {
 142                 refcnt += sr->sr_refcnt;
 143                 if (refcnt >= minref) {
 144                         if (start == -1ULL) {
 145                                 start = sr->sr_offset;
 146                         }
 147                 } else {
 148                         if (start != -1ULL) {
 149                                 uint64_t end = sr->sr_offset;
 150                                 ASSERT(start <= end);
 151                                 if (end > start)
 152                                         range_tree_add(rt, start, end - start);
 153                                 start = -1ULL;
 154                         }
 155                 }
 156         }
 157         ASSERT(refcnt == 0);
 158         ASSERT(start == -1ULL);
 159 }