1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  23  * Copyright (c) 2012 by Delphix. All rights reserved.
  24  */
  25 
  26 #include <sys/dmu.h>
  27 #include <sys/dmu_tx.h>
  28 #include <sys/dsl_pool.h>
  29 #include <sys/dsl_dir.h>
  30 #include <sys/dsl_synctask.h>
  31 #include <sys/metaslab.h>
  32 
  33 #define DST_AVG_BLKSHIFT 14
  34 
  35 /* ARGSUSED */
  36 static int
  37 dsl_null_checkfunc(void *arg1, void *arg2, dmu_tx_t *tx)
  38 {
  39         return (0);
  40 }
  41 
  42 dsl_sync_task_group_t *
  43 dsl_sync_task_group_create(dsl_pool_t *dp)
  44 {
  45         dsl_sync_task_group_t *dstg;
  46 
  47         dstg = kmem_zalloc(sizeof (dsl_sync_task_group_t), KM_SLEEP);
  48         list_create(&dstg->dstg_tasks, sizeof (dsl_sync_task_t),
  49             offsetof(dsl_sync_task_t, dst_node));
  50         dstg->dstg_pool = dp;
  51 
  52         return (dstg);
  53 }
  54 
  55 void
  56 dsl_sync_task_create(dsl_sync_task_group_t *dstg,
  57     dsl_checkfunc_t *checkfunc, dsl_syncfunc_t *syncfunc,
  58     void *arg1, void *arg2, int blocks_modified)
  59 {
  60         dsl_sync_task_t *dst;
  61 
  62         if (checkfunc == NULL)
  63                 checkfunc = dsl_null_checkfunc;
  64         dst = kmem_zalloc(sizeof (dsl_sync_task_t), KM_SLEEP);
  65         dst->dst_checkfunc = checkfunc;
  66         dst->dst_syncfunc = syncfunc;
  67         dst->dst_arg1 = arg1;
  68         dst->dst_arg2 = arg2;
  69         list_insert_tail(&dstg->dstg_tasks, dst);
  70 
  71         dstg->dstg_space += blocks_modified << DST_AVG_BLKSHIFT;
  72 }
  73 
  74 int
  75 dsl_sync_task_group_wait(dsl_sync_task_group_t *dstg)
  76 {
  77         dmu_tx_t *tx;
  78         uint64_t txg;
  79         dsl_sync_task_t *dst;
  80 
  81 top:
  82         tx = dmu_tx_create_dd(dstg->dstg_pool->dp_mos_dir);
  83         VERIFY(0 == dmu_tx_assign(tx, TXG_WAIT));
  84 
  85         txg = dmu_tx_get_txg(tx);
  86 
  87         /* Do a preliminary error check. */
  88         dstg->dstg_err = 0;
  89 #ifdef ZFS_DEBUG
  90         /*
  91          * Only check half the time, otherwise, the sync-context
  92          * check will almost never fail.
  93          */
  94         if (spa_get_random(2) == 0)
  95                 goto skip;
  96 #endif
  97         rw_enter(&dstg->dstg_pool->dp_config_rwlock, RW_READER);
  98         for (dst = list_head(&dstg->dstg_tasks); dst;
  99             dst = list_next(&dstg->dstg_tasks, dst)) {
 100                 dst->dst_err =
 101                     dst->dst_checkfunc(dst->dst_arg1, dst->dst_arg2, tx);
 102                 if (dst->dst_err)
 103                         dstg->dstg_err = dst->dst_err;
 104         }
 105         rw_exit(&dstg->dstg_pool->dp_config_rwlock);
 106 
 107         if (dstg->dstg_err) {
 108                 dmu_tx_commit(tx);
 109                 return (dstg->dstg_err);
 110         }
 111 skip:
 112 
 113         /*
 114          * We don't generally have many sync tasks, so pay the price of
 115          * add_tail to get the tasks executed in the right order.
 116          */
 117         VERIFY(0 == txg_list_add_tail(&dstg->dstg_pool->dp_sync_tasks,
 118             dstg, txg));
 119 
 120         dmu_tx_commit(tx);
 121 
 122         txg_wait_synced(dstg->dstg_pool, txg);
 123 
 124         if (dstg->dstg_err == EAGAIN) {
 125                 txg_wait_synced(dstg->dstg_pool, txg + TXG_DEFER_SIZE);
 126                 goto top;
 127         }
 128 
 129         return (dstg->dstg_err);
 130 }
 131 
 132 void
 133 dsl_sync_task_group_nowait(dsl_sync_task_group_t *dstg, dmu_tx_t *tx)
 134 {
 135         uint64_t txg;
 136 
 137         dstg->dstg_nowaiter = B_TRUE;
 138         txg = dmu_tx_get_txg(tx);
 139         /*
 140          * We don't generally have many sync tasks, so pay the price of
 141          * add_tail to get the tasks executed in the right order.
 142          */
 143         VERIFY(0 == txg_list_add_tail(&dstg->dstg_pool->dp_sync_tasks,
 144             dstg, txg));
 145 }
 146 
 147 void
 148 dsl_sync_task_group_destroy(dsl_sync_task_group_t *dstg)
 149 {
 150         dsl_sync_task_t *dst;
 151 
 152         while (dst = list_head(&dstg->dstg_tasks)) {
 153                 list_remove(&dstg->dstg_tasks, dst);
 154                 kmem_free(dst, sizeof (dsl_sync_task_t));
 155         }
 156         kmem_free(dstg, sizeof (dsl_sync_task_group_t));
 157 }
 158 
 159 void
 160 dsl_sync_task_group_sync(dsl_sync_task_group_t *dstg, dmu_tx_t *tx)
 161 {
 162         dsl_sync_task_t *dst;
 163         dsl_pool_t *dp = dstg->dstg_pool;
 164         uint64_t quota, used;
 165 
 166         ASSERT0(dstg->dstg_err);
 167 
 168         /*
 169          * Check for sufficient space.  We just check against what's
 170          * on-disk; we don't want any in-flight accounting to get in our
 171          * way, because open context may have already used up various
 172          * in-core limits (arc_tempreserve, dsl_pool_tempreserve).
 173          */
 174         quota = dsl_pool_adjustedsize(dp, B_FALSE) -
 175             metaslab_class_get_deferred(spa_normal_class(dp->dp_spa));
 176         used = dp->dp_root_dir->dd_phys->dd_used_bytes;
 177         /* MOS space is triple-dittoed, so we multiply by 3. */
 178         if (dstg->dstg_space > 0 && used + dstg->dstg_space * 3 > quota) {
 179                 dstg->dstg_err = ENOSPC;
 180                 return;
 181         }
 182 
 183         /*
 184          * Check for errors by calling checkfuncs.
 185          */
 186         rw_enter(&dp->dp_config_rwlock, RW_WRITER);
 187         for (dst = list_head(&dstg->dstg_tasks); dst;
 188             dst = list_next(&dstg->dstg_tasks, dst)) {
 189                 dst->dst_err =
 190                     dst->dst_checkfunc(dst->dst_arg1, dst->dst_arg2, tx);
 191                 if (dst->dst_err)
 192                         dstg->dstg_err = dst->dst_err;
 193         }
 194 
 195         if (dstg->dstg_err == 0) {
 196                 /*
 197                  * Execute sync tasks.
 198                  */
 199                 for (dst = list_head(&dstg->dstg_tasks); dst;
 200                     dst = list_next(&dstg->dstg_tasks, dst)) {
 201                         dst->dst_syncfunc(dst->dst_arg1, dst->dst_arg2, tx);
 202                 }
 203         }
 204         rw_exit(&dp->dp_config_rwlock);
 205 
 206         if (dstg->dstg_nowaiter)
 207                 dsl_sync_task_group_destroy(dstg);
 208 }
 209 
 210 int
 211 dsl_sync_task_do(dsl_pool_t *dp,
 212     dsl_checkfunc_t *checkfunc, dsl_syncfunc_t *syncfunc,
 213     void *arg1, void *arg2, int blocks_modified)
 214 {
 215         dsl_sync_task_group_t *dstg;
 216         int err;
 217 
 218         ASSERT(spa_writeable(dp->dp_spa));
 219 
 220         dstg = dsl_sync_task_group_create(dp);
 221         dsl_sync_task_create(dstg, checkfunc, syncfunc,
 222             arg1, arg2, blocks_modified);
 223         err = dsl_sync_task_group_wait(dstg);
 224         dsl_sync_task_group_destroy(dstg);
 225         return (err);
 226 }
 227 
 228 void
 229 dsl_sync_task_do_nowait(dsl_pool_t *dp,
 230     dsl_checkfunc_t *checkfunc, dsl_syncfunc_t *syncfunc,
 231     void *arg1, void *arg2, int blocks_modified, dmu_tx_t *tx)
 232 {
 233         dsl_sync_task_group_t *dstg;
 234 
 235         if (!spa_writeable(dp->dp_spa))
 236                 return;
 237 
 238         dstg = dsl_sync_task_group_create(dp);
 239         dsl_sync_task_create(dstg, checkfunc, syncfunc,
 240             arg1, arg2, blocks_modified);
 241         dsl_sync_task_group_nowait(dstg, tx);
 242 }