1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 23 * Copyright (c) 2012 by Delphix. All rights reserved. 24 */ 25 26 #include <sys/dmu.h> 27 #include <sys/dmu_tx.h> 28 #include <sys/dsl_pool.h> 29 #include <sys/dsl_dir.h> 30 #include <sys/dsl_synctask.h> 31 #include <sys/metaslab.h> 32 33 #define DST_AVG_BLKSHIFT 14 34 35 /* ARGSUSED */ 36 static int 37 dsl_null_checkfunc(void *arg1, void *arg2, dmu_tx_t *tx) 38 { 39 return (0); 40 } 41 42 dsl_sync_task_group_t * 43 dsl_sync_task_group_create(dsl_pool_t *dp) 44 { 45 dsl_sync_task_group_t *dstg; 46 47 dstg = kmem_zalloc(sizeof (dsl_sync_task_group_t), KM_SLEEP); 48 list_create(&dstg->dstg_tasks, sizeof (dsl_sync_task_t), 49 offsetof(dsl_sync_task_t, dst_node)); 50 dstg->dstg_pool = dp; 51 52 return (dstg); 53 } 54 55 void 56 dsl_sync_task_create(dsl_sync_task_group_t *dstg, 57 dsl_checkfunc_t *checkfunc, dsl_syncfunc_t *syncfunc, 58 void *arg1, void *arg2, int blocks_modified) 59 { 60 dsl_sync_task_t *dst; 61 62 if (checkfunc == NULL) 63 checkfunc = dsl_null_checkfunc; 64 dst = kmem_zalloc(sizeof (dsl_sync_task_t), KM_SLEEP); 65 dst->dst_checkfunc = checkfunc; 66 dst->dst_syncfunc = syncfunc; 67 dst->dst_arg1 = arg1; 68 dst->dst_arg2 = arg2; 69 list_insert_tail(&dstg->dstg_tasks, dst); 70 71 dstg->dstg_space += blocks_modified << DST_AVG_BLKSHIFT; 72 } 73 74 int 75 dsl_sync_task_group_wait(dsl_sync_task_group_t *dstg) 76 { 77 dmu_tx_t *tx; 78 uint64_t txg; 79 dsl_sync_task_t *dst; 80 81 top: 82 tx = dmu_tx_create_dd(dstg->dstg_pool->dp_mos_dir); 83 VERIFY(0 == dmu_tx_assign(tx, TXG_WAIT)); 84 85 txg = dmu_tx_get_txg(tx); 86 87 /* Do a preliminary error check. */ 88 dstg->dstg_err = 0; 89 #ifdef ZFS_DEBUG 90 /* 91 * Only check half the time, otherwise, the sync-context 92 * check will almost never fail. 93 */ 94 if (spa_get_random(2) == 0) 95 goto skip; 96 #endif 97 rw_enter(&dstg->dstg_pool->dp_config_rwlock, RW_READER); 98 for (dst = list_head(&dstg->dstg_tasks); dst; 99 dst = list_next(&dstg->dstg_tasks, dst)) { 100 dst->dst_err = 101 dst->dst_checkfunc(dst->dst_arg1, dst->dst_arg2, tx); 102 if (dst->dst_err) 103 dstg->dstg_err = dst->dst_err; 104 } 105 rw_exit(&dstg->dstg_pool->dp_config_rwlock); 106 107 if (dstg->dstg_err) { 108 dmu_tx_commit(tx); 109 return (dstg->dstg_err); 110 } 111 skip: 112 113 /* 114 * We don't generally have many sync tasks, so pay the price of 115 * add_tail to get the tasks executed in the right order. 116 */ 117 VERIFY(0 == txg_list_add_tail(&dstg->dstg_pool->dp_sync_tasks, 118 dstg, txg)); 119 120 dmu_tx_commit(tx); 121 122 txg_wait_synced(dstg->dstg_pool, txg); 123 124 if (dstg->dstg_err == EAGAIN) { 125 txg_wait_synced(dstg->dstg_pool, txg + TXG_DEFER_SIZE); 126 goto top; 127 } 128 129 return (dstg->dstg_err); 130 } 131 132 void 133 dsl_sync_task_group_nowait(dsl_sync_task_group_t *dstg, dmu_tx_t *tx) 134 { 135 uint64_t txg; 136 137 dstg->dstg_nowaiter = B_TRUE; 138 txg = dmu_tx_get_txg(tx); 139 /* 140 * We don't generally have many sync tasks, so pay the price of 141 * add_tail to get the tasks executed in the right order. 142 */ 143 VERIFY(0 == txg_list_add_tail(&dstg->dstg_pool->dp_sync_tasks, 144 dstg, txg)); 145 } 146 147 void 148 dsl_sync_task_group_destroy(dsl_sync_task_group_t *dstg) 149 { 150 dsl_sync_task_t *dst; 151 152 while (dst = list_head(&dstg->dstg_tasks)) { 153 list_remove(&dstg->dstg_tasks, dst); 154 kmem_free(dst, sizeof (dsl_sync_task_t)); 155 } 156 kmem_free(dstg, sizeof (dsl_sync_task_group_t)); 157 } 158 159 void 160 dsl_sync_task_group_sync(dsl_sync_task_group_t *dstg, dmu_tx_t *tx) 161 { 162 dsl_sync_task_t *dst; 163 dsl_pool_t *dp = dstg->dstg_pool; 164 uint64_t quota, used; 165 166 ASSERT0(dstg->dstg_err); 167 168 /* 169 * Check for sufficient space. We just check against what's 170 * on-disk; we don't want any in-flight accounting to get in our 171 * way, because open context may have already used up various 172 * in-core limits (arc_tempreserve, dsl_pool_tempreserve). 173 */ 174 quota = dsl_pool_adjustedsize(dp, B_FALSE) - 175 metaslab_class_get_deferred(spa_normal_class(dp->dp_spa)); 176 used = dp->dp_root_dir->dd_phys->dd_used_bytes; 177 /* MOS space is triple-dittoed, so we multiply by 3. */ 178 if (dstg->dstg_space > 0 && used + dstg->dstg_space * 3 > quota) { 179 dstg->dstg_err = ENOSPC; 180 return; 181 } 182 183 /* 184 * Check for errors by calling checkfuncs. 185 */ 186 rw_enter(&dp->dp_config_rwlock, RW_WRITER); 187 for (dst = list_head(&dstg->dstg_tasks); dst; 188 dst = list_next(&dstg->dstg_tasks, dst)) { 189 dst->dst_err = 190 dst->dst_checkfunc(dst->dst_arg1, dst->dst_arg2, tx); 191 if (dst->dst_err) 192 dstg->dstg_err = dst->dst_err; 193 } 194 195 if (dstg->dstg_err == 0) { 196 /* 197 * Execute sync tasks. 198 */ 199 for (dst = list_head(&dstg->dstg_tasks); dst; 200 dst = list_next(&dstg->dstg_tasks, dst)) { 201 dst->dst_syncfunc(dst->dst_arg1, dst->dst_arg2, tx); 202 } 203 } 204 rw_exit(&dp->dp_config_rwlock); 205 206 if (dstg->dstg_nowaiter) 207 dsl_sync_task_group_destroy(dstg); 208 } 209 210 int 211 dsl_sync_task_do(dsl_pool_t *dp, 212 dsl_checkfunc_t *checkfunc, dsl_syncfunc_t *syncfunc, 213 void *arg1, void *arg2, int blocks_modified) 214 { 215 dsl_sync_task_group_t *dstg; 216 int err; 217 218 ASSERT(spa_writeable(dp->dp_spa)); 219 220 dstg = dsl_sync_task_group_create(dp); 221 dsl_sync_task_create(dstg, checkfunc, syncfunc, 222 arg1, arg2, blocks_modified); 223 err = dsl_sync_task_group_wait(dstg); 224 dsl_sync_task_group_destroy(dstg); 225 return (err); 226 } 227 228 void 229 dsl_sync_task_do_nowait(dsl_pool_t *dp, 230 dsl_checkfunc_t *checkfunc, dsl_syncfunc_t *syncfunc, 231 void *arg1, void *arg2, int blocks_modified, dmu_tx_t *tx) 232 { 233 dsl_sync_task_group_t *dstg; 234 235 if (!spa_writeable(dp->dp_spa)) 236 return; 237 238 dstg = dsl_sync_task_group_create(dp); 239 dsl_sync_task_create(dstg, checkfunc, syncfunc, 240 arg1, arg2, blocks_modified); 241 dsl_sync_task_group_nowait(dstg, tx); 242 }