Print this page
4101 metaslab_debug should allow for fine-grained control
4102 space_maps should store more information about themselves
4103 space map object blocksize should be increased
4104 ::spa_space no longer works
4105 removing a mirrored log device results in a leaked object
4106 asynchronously load metaslab
Reviewed by: Matthew Ahrens <mahrens@delphix.com>
Reviewed by: Adam Leventhal <ahl@delphix.com>
Reviewed by: Sebastien Roy <seb@delphix.com>


   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  */
  25 
  26 /*
  27  * Copyright (c) 2012 by Delphix. All rights reserved.
  28  */
  29 
  30 #ifndef _SYS_SPACE_MAP_H
  31 #define _SYS_SPACE_MAP_H
  32 
  33 #include <sys/avl.h>

  34 #include <sys/dmu.h>
  35 
  36 #ifdef  __cplusplus
  37 extern "C" {
  38 #endif
  39 
  40 typedef struct space_map_ops space_map_ops_t;








  41 




























  42 typedef struct space_map {
  43         avl_tree_t      sm_root;        /* offset-ordered segment AVL tree */
  44         uint64_t        sm_space;       /* sum of all segments in the map */
  45         uint64_t        sm_start;       /* start of map */
  46         uint64_t        sm_size;        /* size of map */
  47         uint8_t         sm_shift;       /* unit shift */
  48         uint8_t         sm_loaded;      /* map loaded? */
  49         uint8_t         sm_loading;     /* map loading? */
  50         uint8_t         sm_condensing;  /* map condensing? */
  51         kcondvar_t      sm_load_cv;     /* map load completion */
  52         space_map_ops_t *sm_ops;        /* space map block picker ops vector */
  53         avl_tree_t      *sm_pp_root;    /* size-ordered, picker-private tree */
  54         void            *sm_ppd;        /* picker-private data */
  55         kmutex_t        *sm_lock;       /* pointer to lock that protects map */
  56 } space_map_t;
  57 
  58 typedef struct space_seg {
  59         avl_node_t      ss_node;        /* AVL node */
  60         avl_node_t      ss_pp_node;     /* AVL picker-private node */
  61         uint64_t        ss_start;       /* starting offset of this segment */
  62         uint64_t        ss_end;         /* ending offset (non-inclusive) */
  63 } space_seg_t;
  64 
  65 typedef struct space_ref {
  66         avl_node_t      sr_node;        /* AVL node */
  67         uint64_t        sr_offset;      /* offset (start or end) */
  68         int64_t         sr_refcnt;      /* associated reference count */
  69 } space_ref_t;
  70 
  71 typedef struct space_map_obj {
  72         uint64_t        smo_object;     /* on-disk space map object */
  73         uint64_t        smo_objsize;    /* size of the object */
  74         uint64_t        smo_alloc;      /* space allocated from the map */
  75 } space_map_obj_t;
  76 
  77 struct space_map_ops {
  78         void    (*smop_load)(space_map_t *sm);
  79         void    (*smop_unload)(space_map_t *sm);
  80         uint64_t (*smop_alloc)(space_map_t *sm, uint64_t size);
  81         void    (*smop_claim)(space_map_t *sm, uint64_t start, uint64_t size);
  82         void    (*smop_free)(space_map_t *sm, uint64_t start, uint64_t size);
  83         uint64_t (*smop_max)(space_map_t *sm);
  84         boolean_t (*smop_fragmented)(space_map_t *sm);
  85 };
  86 
  87 /*
  88  * debug entry
  89  *
  90  *    1      3         10                     50
  91  *  ,---+--------+------------+---------------------------------.
  92  *  | 1 | action |  syncpass  |        txg (lower bits)         |
  93  *  `---+--------+------------+---------------------------------'
  94  *   63  62    60 59        50 49                               0
  95  *
  96  *
  97  * non-debug entry
  98  *
  99  *    1               47                   1           15
 100  *  ,-----------------------------------------------------------.
 101  *  | 0 |   offset (sm_shift units)    | type |       run       |
 102  *  `-----------------------------------------------------------'
 103  *   63  62                          17   16   15               0
 104  */
 105 
 106 /* All this stuff takes and returns bytes */
 107 #define SM_RUN_DECODE(x)        (BF64_DECODE(x, 0, 15) + 1)
 108 #define SM_RUN_ENCODE(x)        BF64_ENCODE((x) - 1, 0, 15)
 109 #define SM_TYPE_DECODE(x)       BF64_DECODE(x, 15, 1)
 110 #define SM_TYPE_ENCODE(x)       BF64_ENCODE(x, 15, 1)
 111 #define SM_OFFSET_DECODE(x)     BF64_DECODE(x, 16, 47)
 112 #define SM_OFFSET_ENCODE(x)     BF64_ENCODE(x, 16, 47)
 113 #define SM_DEBUG_DECODE(x)      BF64_DECODE(x, 63, 1)
 114 #define SM_DEBUG_ENCODE(x)      BF64_ENCODE(x, 63, 1)
 115 
 116 #define SM_DEBUG_ACTION_DECODE(x)       BF64_DECODE(x, 60, 3)
 117 #define SM_DEBUG_ACTION_ENCODE(x)       BF64_ENCODE(x, 60, 3)
 118 
 119 #define SM_DEBUG_SYNCPASS_DECODE(x)     BF64_DECODE(x, 50, 10)
 120 #define SM_DEBUG_SYNCPASS_ENCODE(x)     BF64_ENCODE(x, 50, 10)
 121 
 122 #define SM_DEBUG_TXG_DECODE(x)          BF64_DECODE(x, 0, 50)
 123 #define SM_DEBUG_TXG_ENCODE(x)          BF64_ENCODE(x, 0, 50)
 124 
 125 #define SM_RUN_MAX                      SM_RUN_DECODE(~0ULL)
 126 
 127 #define SM_ALLOC        0x0
 128 #define SM_FREE         0x1


 129 
 130 /*
 131  * The data for a given space map can be kept on blocks of any size.
 132  * Larger blocks entail fewer i/o operations, but they also cause the
 133  * DMU to keep more data in-core, and also to waste more i/o bandwidth
 134  * when only a few blocks have changed since the last transaction group.
 135  * This could use a lot more research, but for now, set the freelist
 136  * block size to 4k (2^12).

 137  */
 138 #define SPACE_MAP_BLOCKSHIFT    12
 139 
 140 typedef void space_map_func_t(space_map_t *sm, uint64_t start, uint64_t size);
 141 
 142 extern void space_map_init(void);
 143 extern void space_map_fini(void);
 144 extern void space_map_create(space_map_t *sm, uint64_t start, uint64_t size,
 145     uint8_t shift, kmutex_t *lp);
 146 extern void space_map_destroy(space_map_t *sm);
 147 extern void space_map_add(space_map_t *sm, uint64_t start, uint64_t size);
 148 extern void space_map_remove(space_map_t *sm, uint64_t start, uint64_t size);
 149 extern boolean_t space_map_contains(space_map_t *sm,
 150     uint64_t start, uint64_t size);
 151 extern space_seg_t *space_map_find(space_map_t *sm, uint64_t start,
 152     uint64_t size, avl_index_t *wherep);
 153 extern void space_map_swap(space_map_t **msrc, space_map_t **mdest);
 154 extern void space_map_vacate(space_map_t *sm,
 155     space_map_func_t *func, space_map_t *mdest);
 156 extern void space_map_walk(space_map_t *sm,
 157     space_map_func_t *func, space_map_t *mdest);
 158 
 159 extern void space_map_load_wait(space_map_t *sm);
 160 extern int space_map_load(space_map_t *sm, space_map_ops_t *ops,
 161     uint8_t maptype, space_map_obj_t *smo, objset_t *os);
 162 extern void space_map_unload(space_map_t *sm);
 163 
 164 extern uint64_t space_map_alloc(space_map_t *sm, uint64_t size);
 165 extern void space_map_claim(space_map_t *sm, uint64_t start, uint64_t size);
 166 extern void space_map_free(space_map_t *sm, uint64_t start, uint64_t size);
 167 extern uint64_t space_map_maxsize(space_map_t *sm);
 168 
 169 extern void space_map_sync(space_map_t *sm, uint8_t maptype,
 170     space_map_obj_t *smo, objset_t *os, dmu_tx_t *tx);
 171 extern void space_map_truncate(space_map_obj_t *smo,
 172     objset_t *os, dmu_tx_t *tx);

 173 
 174 extern void space_map_ref_create(avl_tree_t *t);
 175 extern void space_map_ref_destroy(avl_tree_t *t);
 176 extern void space_map_ref_add_seg(avl_tree_t *t,
 177     uint64_t start, uint64_t end, int64_t refcnt);
 178 extern void space_map_ref_add_map(avl_tree_t *t,
 179     space_map_t *sm, int64_t refcnt);
 180 extern void space_map_ref_generate_map(avl_tree_t *t,
 181     space_map_t *sm, int64_t minref);
 182 


 183 #ifdef  __cplusplus
 184 }
 185 #endif
 186 
 187 #endif  /* _SYS_SPACE_MAP_H */


   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  */
  25 
  26 /*
  27  * Copyright (c) 2013 by Delphix. All rights reserved.
  28  */
  29 
  30 #ifndef _SYS_SPACE_MAP_H
  31 #define _SYS_SPACE_MAP_H
  32 
  33 #include <sys/avl.h>
  34 #include <sys/range_tree.h>
  35 #include <sys/dmu.h>
  36 
  37 #ifdef  __cplusplus
  38 extern "C" {
  39 #endif
  40 
  41 /*
  42  * The size of the space map object has increased to include a histogram.
  43  * The SPACE_MAP_SIZE_V0 designates the original size and is used to
  44  * maintain backward compatibility.
  45  */
  46 #define SPACE_MAP_SIZE_V0       (3 * sizeof (uint64_t))
  47 #define SPACE_MAP_HISTOGRAM_SIZE(sm)                    \
  48         (sizeof ((sm)->sm_phys->smp_histogram) /  \
  49         sizeof ((sm)->sm_phys->smp_histogram[0]))
  50 
  51 /*
  52  * The space_map_phys is the on-disk representation of the space map.
  53  * Consumers of space maps should never reference any of the members of this
  54  * structure directly. These members may only be updated in syncing context.
  55  *
  56  * Note the smp_object is no longer used but remains in the structure
  57  * for backward compatibility.
  58  */
  59 typedef struct space_map_phys {
  60         uint64_t        smp_object;     /* on-disk space map object */
  61         uint64_t        smp_objsize;    /* size of the object */
  62         uint64_t        smp_alloc;      /* space allocated from the map */
  63         uint64_t        smp_pad[5];     /* reserved */
  64 
  65         /*
  66          * The smp_histogram maintains a histogram of free regions. Each
  67          * bucket, smp_histogram[i], contains the number of free regions
  68          * whose size is:
  69          * 2^(i+sm_shift) <= size of free region in bytes < 2^(i+sm_shift+1)
  70          */
  71         uint64_t        smp_histogram[32]; /* histogram of free space */
  72 } space_map_phys_t;
  73 
  74 /*
  75  * The space map object defines a region of space, its size, how much is
  76  * allocated, and the on-disk object that stores this information.
  77  * Consumers of space maps may only access the members of this structure.
  78  */
  79 typedef struct space_map {


  80         uint64_t        sm_start;       /* start of map */
  81         uint64_t        sm_size;        /* size of map */
  82         uint8_t         sm_shift;       /* unit shift */
  83         uint64_t        sm_length;      /* synced length */
  84         uint64_t        sm_alloc;       /* synced space allocated */
  85         objset_t        *sm_os;         /* objset for this map */
  86         uint64_t        sm_object;      /* object id for this map */
  87         uint32_t        sm_blksz;       /* block size for space map */
  88         dmu_buf_t       *sm_dbuf;       /* space_map_phys_t dbuf */
  89         space_map_phys_t *sm_phys;      /* on-disk space map */
  90         kmutex_t        *sm_lock;       /* pointer to lock that protects map */
  91 } space_map_t;
  92 





























  93 /*
  94  * debug entry
  95  *
  96  *    1      3         10                     50
  97  *  ,---+--------+------------+---------------------------------.
  98  *  | 1 | action |  syncpass  |        txg (lower bits)         |
  99  *  `---+--------+------------+---------------------------------'
 100  *   63  62    60 59        50 49                               0
 101  *
 102  *
 103  * non-debug entry
 104  *
 105  *    1               47                   1           15
 106  *  ,-----------------------------------------------------------.
 107  *  | 0 |   offset (sm_shift units)    | type |       run       |
 108  *  `-----------------------------------------------------------'
 109  *   63  62                          17   16   15               0
 110  */
 111 
 112 /* All this stuff takes and returns bytes */
 113 #define SM_RUN_DECODE(x)        (BF64_DECODE(x, 0, 15) + 1)
 114 #define SM_RUN_ENCODE(x)        BF64_ENCODE((x) - 1, 0, 15)
 115 #define SM_TYPE_DECODE(x)       BF64_DECODE(x, 15, 1)
 116 #define SM_TYPE_ENCODE(x)       BF64_ENCODE(x, 15, 1)
 117 #define SM_OFFSET_DECODE(x)     BF64_DECODE(x, 16, 47)
 118 #define SM_OFFSET_ENCODE(x)     BF64_ENCODE(x, 16, 47)
 119 #define SM_DEBUG_DECODE(x)      BF64_DECODE(x, 63, 1)
 120 #define SM_DEBUG_ENCODE(x)      BF64_ENCODE(x, 63, 1)
 121 
 122 #define SM_DEBUG_ACTION_DECODE(x)       BF64_DECODE(x, 60, 3)
 123 #define SM_DEBUG_ACTION_ENCODE(x)       BF64_ENCODE(x, 60, 3)
 124 
 125 #define SM_DEBUG_SYNCPASS_DECODE(x)     BF64_DECODE(x, 50, 10)
 126 #define SM_DEBUG_SYNCPASS_ENCODE(x)     BF64_ENCODE(x, 50, 10)
 127 
 128 #define SM_DEBUG_TXG_DECODE(x)          BF64_DECODE(x, 0, 50)
 129 #define SM_DEBUG_TXG_ENCODE(x)          BF64_ENCODE(x, 0, 50)
 130 
 131 #define SM_RUN_MAX                      SM_RUN_DECODE(~0ULL)
 132 
 133 typedef enum {
 134         SM_ALLOC,
 135         SM_FREE
 136 } maptype_t;
 137 
 138 /*
 139  * The data for a given space map can be kept on blocks of any size.
 140  * Larger blocks entail fewer i/o operations, but they also cause the
 141  * DMU to keep more data in-core, and also to waste more i/o bandwidth
 142  * when only a few blocks have changed since the last transaction group.
 143  * Rather than having a fixed block size for all space maps the block size
 144  * can adjust as needed (see space_map_max_blksz). Set the initial block
 145  * size for the space map to 4k.
 146  */
 147 #define SPACE_MAP_INITIAL_BLOCKSIZE     (1ULL << 12)
 148 
 149 int space_map_load(space_map_t *sm, range_tree_t *rt, maptype_t maptype);
 150 
 151 void space_map_histogram_clear(space_map_t *sm);
 152 void space_map_histogram_add(space_map_t *sm, range_tree_t *rt,
 153     dmu_tx_t *tx);













 154 
 155 void space_map_update(space_map_t *sm);



 156 
 157 uint64_t space_map_object(space_map_t *sm);
 158 uint64_t space_map_allocated(space_map_t *sm);
 159 uint64_t space_map_length(space_map_t *sm);

 160 
 161 void space_map_write(space_map_t *sm, range_tree_t *rt, maptype_t maptype,
 162     dmu_tx_t *tx);
 163 void space_map_truncate(space_map_t *sm, dmu_tx_t *tx);
 164 uint64_t space_map_alloc(objset_t *os, dmu_tx_t *tx);
 165 void space_map_free(space_map_t *sm, dmu_tx_t *tx);
 166 
 167 int space_map_open(space_map_t **smp, objset_t *os, uint64_t object,
 168     uint64_t start, uint64_t size, uint8_t shift, kmutex_t *lp);
 169 void space_map_close(space_map_t *sm);





 170 
 171 int64_t space_map_alloc_delta(space_map_t *sm);
 172 
 173 #ifdef  __cplusplus
 174 }
 175 #endif
 176 
 177 #endif  /* _SYS_SPACE_MAP_H */