1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  23  * Copyright (c) 2013 by Delphix. All rights reserved.
  24  */
  25 
  26 #include <sys/dmu.h>
  27 #include <sys/dmu_objset.h>
  28 #include <sys/dmu_tx.h>
  29 #include <sys/dsl_dataset.h>
  30 #include <sys/dsl_dir.h>
  31 #include <sys/dsl_prop.h>
  32 #include <sys/dsl_synctask.h>
  33 #include <sys/dsl_deleg.h>
  34 #include <sys/spa.h>
  35 #include <sys/metaslab.h>
  36 #include <sys/zap.h>
  37 #include <sys/zio.h>
  38 #include <sys/arc.h>
  39 #include <sys/sunddi.h>
  40 #include "zfs_namecheck.h"
  41 
  42 static uint64_t dsl_dir_space_towrite(dsl_dir_t *dd);
  43 
  44 /* ARGSUSED */
  45 static void
  46 dsl_dir_evict(dmu_buf_t *db, void *arg)
  47 {
  48         dsl_dir_t *dd = arg;
  49         dsl_pool_t *dp = dd->dd_pool;
  50         int t;
  51 
  52         for (t = 0; t < TXG_SIZE; t++) {
  53                 ASSERT(!txg_list_member(&dp->dp_dirty_dirs, dd, t));
  54                 ASSERT(dd->dd_tempreserved[t] == 0);
  55                 ASSERT(dd->dd_space_towrite[t] == 0);
  56         }
  57 
  58         if (dd->dd_parent)
  59                 dsl_dir_rele(dd->dd_parent, dd);
  60 
  61         spa_close(dd->dd_pool->dp_spa, dd);
  62 
  63         /*
  64          * The props callback list should have been cleaned up by
  65          * objset_evict().
  66          */
  67         list_destroy(&dd->dd_prop_cbs);
  68         mutex_destroy(&dd->dd_lock);
  69         kmem_free(dd, sizeof (dsl_dir_t));
  70 }
  71 
  72 int
  73 dsl_dir_hold_obj(dsl_pool_t *dp, uint64_t ddobj,
  74     const char *tail, void *tag, dsl_dir_t **ddp)
  75 {
  76         dmu_buf_t *dbuf;
  77         dsl_dir_t *dd;
  78         int err;
  79 
  80         ASSERT(dsl_pool_config_held(dp));
  81 
  82         err = dmu_bonus_hold(dp->dp_meta_objset, ddobj, tag, &dbuf);
  83         if (err != 0)
  84                 return (err);
  85         dd = dmu_buf_get_user(dbuf);
  86 #ifdef ZFS_DEBUG
  87         {
  88                 dmu_object_info_t doi;
  89                 dmu_object_info_from_db(dbuf, &doi);
  90                 ASSERT3U(doi.doi_type, ==, DMU_OT_DSL_DIR);
  91                 ASSERT3U(doi.doi_bonus_size, >=, sizeof (dsl_dir_phys_t));
  92         }
  93 #endif
  94         if (dd == NULL) {
  95                 dsl_dir_t *winner;
  96 
  97                 dd = kmem_zalloc(sizeof (dsl_dir_t), KM_SLEEP);
  98                 dd->dd_object = ddobj;
  99                 dd->dd_dbuf = dbuf;
 100                 dd->dd_pool = dp;
 101                 dd->dd_phys = dbuf->db_data;
 102                 mutex_init(&dd->dd_lock, NULL, MUTEX_DEFAULT, NULL);
 103 
 104                 list_create(&dd->dd_prop_cbs, sizeof (dsl_prop_cb_record_t),
 105                     offsetof(dsl_prop_cb_record_t, cbr_node));
 106 
 107                 dsl_dir_snap_cmtime_update(dd);
 108 
 109                 if (dd->dd_phys->dd_parent_obj) {
 110                         err = dsl_dir_hold_obj(dp, dd->dd_phys->dd_parent_obj,
 111                             NULL, dd, &dd->dd_parent);
 112                         if (err != 0)
 113                                 goto errout;
 114                         if (tail) {
 115 #ifdef ZFS_DEBUG
 116                                 uint64_t foundobj;
 117 
 118                                 err = zap_lookup(dp->dp_meta_objset,
 119                                     dd->dd_parent->dd_phys->dd_child_dir_zapobj,
 120                                     tail, sizeof (foundobj), 1, &foundobj);
 121                                 ASSERT(err || foundobj == ddobj);
 122 #endif
 123                                 (void) strcpy(dd->dd_myname, tail);
 124                         } else {
 125                                 err = zap_value_search(dp->dp_meta_objset,
 126                                     dd->dd_parent->dd_phys->dd_child_dir_zapobj,
 127                                     ddobj, 0, dd->dd_myname);
 128                         }
 129                         if (err != 0)
 130                                 goto errout;
 131                 } else {
 132                         (void) strcpy(dd->dd_myname, spa_name(dp->dp_spa));
 133                 }
 134 
 135                 if (dsl_dir_is_clone(dd)) {
 136                         dmu_buf_t *origin_bonus;
 137                         dsl_dataset_phys_t *origin_phys;
 138 
 139                         /*
 140                          * We can't open the origin dataset, because
 141                          * that would require opening this dsl_dir.
 142                          * Just look at its phys directly instead.
 143                          */
 144                         err = dmu_bonus_hold(dp->dp_meta_objset,
 145                             dd->dd_phys->dd_origin_obj, FTAG, &origin_bonus);
 146                         if (err != 0)
 147                                 goto errout;
 148                         origin_phys = origin_bonus->db_data;
 149                         dd->dd_origin_txg =
 150                             origin_phys->ds_creation_txg;
 151                         dmu_buf_rele(origin_bonus, FTAG);
 152                 }
 153 
 154                 winner = dmu_buf_set_user_ie(dbuf, dd, &dd->dd_phys,
 155                     dsl_dir_evict);
 156                 if (winner) {
 157                         if (dd->dd_parent)
 158                                 dsl_dir_rele(dd->dd_parent, dd);
 159                         mutex_destroy(&dd->dd_lock);
 160                         kmem_free(dd, sizeof (dsl_dir_t));
 161                         dd = winner;
 162                 } else {
 163                         spa_open_ref(dp->dp_spa, dd);
 164                 }
 165         }
 166 
 167         /*
 168          * The dsl_dir_t has both open-to-close and instantiate-to-evict
 169          * holds on the spa.  We need the open-to-close holds because
 170          * otherwise the spa_refcnt wouldn't change when we open a
 171          * dir which the spa also has open, so we could incorrectly
 172          * think it was OK to unload/export/destroy the pool.  We need
 173          * the instantiate-to-evict hold because the dsl_dir_t has a
 174          * pointer to the dd_pool, which has a pointer to the spa_t.
 175          */
 176         spa_open_ref(dp->dp_spa, tag);
 177         ASSERT3P(dd->dd_pool, ==, dp);
 178         ASSERT3U(dd->dd_object, ==, ddobj);
 179         ASSERT3P(dd->dd_dbuf, ==, dbuf);
 180         *ddp = dd;
 181         return (0);
 182 
 183 errout:
 184         if (dd->dd_parent)
 185                 dsl_dir_rele(dd->dd_parent, dd);
 186         mutex_destroy(&dd->dd_lock);
 187         kmem_free(dd, sizeof (dsl_dir_t));
 188         dmu_buf_rele(dbuf, tag);
 189         return (err);
 190 }
 191 
 192 void
 193 dsl_dir_rele(dsl_dir_t *dd, void *tag)
 194 {
 195         dprintf_dd(dd, "%s\n", "");
 196         spa_close(dd->dd_pool->dp_spa, tag);
 197         dmu_buf_rele(dd->dd_dbuf, tag);
 198 }
 199 
 200 /* buf must be long enough (MAXNAMELEN + strlen(MOS_DIR_NAME) + 1 should do) */
 201 void
 202 dsl_dir_name(dsl_dir_t *dd, char *buf)
 203 {
 204         if (dd->dd_parent) {
 205                 dsl_dir_name(dd->dd_parent, buf);
 206                 (void) strcat(buf, "/");
 207         } else {
 208                 buf[0] = '\0';
 209         }
 210         if (!MUTEX_HELD(&dd->dd_lock)) {
 211                 /*
 212                  * recursive mutex so that we can use
 213                  * dprintf_dd() with dd_lock held
 214                  */
 215                 mutex_enter(&dd->dd_lock);
 216                 (void) strcat(buf, dd->dd_myname);
 217                 mutex_exit(&dd->dd_lock);
 218         } else {
 219                 (void) strcat(buf, dd->dd_myname);
 220         }
 221 }
 222 
 223 /* Calculate name length, avoiding all the strcat calls of dsl_dir_name */
 224 int
 225 dsl_dir_namelen(dsl_dir_t *dd)
 226 {
 227         int result = 0;
 228 
 229         if (dd->dd_parent) {
 230                 /* parent's name + 1 for the "/" */
 231                 result = dsl_dir_namelen(dd->dd_parent) + 1;
 232         }
 233 
 234         if (!MUTEX_HELD(&dd->dd_lock)) {
 235                 /* see dsl_dir_name */
 236                 mutex_enter(&dd->dd_lock);
 237                 result += strlen(dd->dd_myname);
 238                 mutex_exit(&dd->dd_lock);
 239         } else {
 240                 result += strlen(dd->dd_myname);
 241         }
 242 
 243         return (result);
 244 }
 245 
 246 static int
 247 getcomponent(const char *path, char *component, const char **nextp)
 248 {
 249         char *p;
 250 
 251         if ((path == NULL) || (path[0] == '\0'))
 252                 return (SET_ERROR(ENOENT));
 253         /* This would be a good place to reserve some namespace... */
 254         p = strpbrk(path, "/@");
 255         if (p && (p[1] == '/' || p[1] == '@')) {
 256                 /* two separators in a row */
 257                 return (SET_ERROR(EINVAL));
 258         }
 259         if (p == NULL || p == path) {
 260                 /*
 261                  * if the first thing is an @ or /, it had better be an
 262                  * @ and it had better not have any more ats or slashes,
 263                  * and it had better have something after the @.
 264                  */
 265                 if (p != NULL &&
 266                     (p[0] != '@' || strpbrk(path+1, "/@") || p[1] == '\0'))
 267                         return (SET_ERROR(EINVAL));
 268                 if (strlen(path) >= MAXNAMELEN)
 269                         return (SET_ERROR(ENAMETOOLONG));
 270                 (void) strcpy(component, path);
 271                 p = NULL;
 272         } else if (p[0] == '/') {
 273                 if (p - path >= MAXNAMELEN)
 274                         return (SET_ERROR(ENAMETOOLONG));
 275                 (void) strncpy(component, path, p - path);
 276                 component[p - path] = '\0';
 277                 p++;
 278         } else if (p[0] == '@') {
 279                 /*
 280                  * if the next separator is an @, there better not be
 281                  * any more slashes.
 282                  */
 283                 if (strchr(path, '/'))
 284                         return (SET_ERROR(EINVAL));
 285                 if (p - path >= MAXNAMELEN)
 286                         return (SET_ERROR(ENAMETOOLONG));
 287                 (void) strncpy(component, path, p - path);
 288                 component[p - path] = '\0';
 289         } else {
 290                 panic("invalid p=%p", (void *)p);
 291         }
 292         *nextp = p;
 293         return (0);
 294 }
 295 
 296 /*
 297  * Return the dsl_dir_t, and possibly the last component which couldn't
 298  * be found in *tail.  The name must be in the specified dsl_pool_t.  This
 299  * thread must hold the dp_config_rwlock for the pool.  Returns NULL if the
 300  * path is bogus, or if tail==NULL and we couldn't parse the whole name.
 301  * (*tail)[0] == '@' means that the last component is a snapshot.
 302  */
 303 int
 304 dsl_dir_hold(dsl_pool_t *dp, const char *name, void *tag,
 305     dsl_dir_t **ddp, const char **tailp)
 306 {
 307         char buf[MAXNAMELEN];
 308         const char *spaname, *next, *nextnext = NULL;
 309         int err;
 310         dsl_dir_t *dd;
 311         uint64_t ddobj;
 312 
 313         err = getcomponent(name, buf, &next);
 314         if (err != 0)
 315                 return (err);
 316 
 317         /* Make sure the name is in the specified pool. */
 318         spaname = spa_name(dp->dp_spa);
 319         if (strcmp(buf, spaname) != 0)
 320                 return (SET_ERROR(EINVAL));
 321 
 322         ASSERT(dsl_pool_config_held(dp));
 323 
 324         err = dsl_dir_hold_obj(dp, dp->dp_root_dir_obj, NULL, tag, &dd);
 325         if (err != 0) {
 326                 return (err);
 327         }
 328 
 329         while (next != NULL) {
 330                 dsl_dir_t *child_ds;
 331                 err = getcomponent(next, buf, &nextnext);
 332                 if (err != 0)
 333                         break;
 334                 ASSERT(next[0] != '\0');
 335                 if (next[0] == '@')
 336                         break;
 337                 dprintf("looking up %s in obj%lld\n",
 338                     buf, dd->dd_phys->dd_child_dir_zapobj);
 339 
 340                 err = zap_lookup(dp->dp_meta_objset,
 341                     dd->dd_phys->dd_child_dir_zapobj,
 342                     buf, sizeof (ddobj), 1, &ddobj);
 343                 if (err != 0) {
 344                         if (err == ENOENT)
 345                                 err = 0;
 346                         break;
 347                 }
 348 
 349                 err = dsl_dir_hold_obj(dp, ddobj, buf, tag, &child_ds);
 350                 if (err != 0)
 351                         break;
 352                 dsl_dir_rele(dd, tag);
 353                 dd = child_ds;
 354                 next = nextnext;
 355         }
 356 
 357         if (err != 0) {
 358                 dsl_dir_rele(dd, tag);
 359                 return (err);
 360         }
 361 
 362         /*
 363          * It's an error if there's more than one component left, or
 364          * tailp==NULL and there's any component left.
 365          */
 366         if (next != NULL &&
 367             (tailp == NULL || (nextnext && nextnext[0] != '\0'))) {
 368                 /* bad path name */
 369                 dsl_dir_rele(dd, tag);
 370                 dprintf("next=%p (%s) tail=%p\n", next, next?next:"", tailp);
 371                 err = SET_ERROR(ENOENT);
 372         }
 373         if (tailp != NULL)
 374                 *tailp = next;
 375         *ddp = dd;
 376         return (err);
 377 }
 378 
 379 uint64_t
 380 dsl_dir_create_sync(dsl_pool_t *dp, dsl_dir_t *pds, const char *name,
 381     dmu_tx_t *tx)
 382 {
 383         objset_t *mos = dp->dp_meta_objset;
 384         uint64_t ddobj;
 385         dsl_dir_phys_t *ddphys;
 386         dmu_buf_t *dbuf;
 387 
 388         ddobj = dmu_object_alloc(mos, DMU_OT_DSL_DIR, 0,
 389             DMU_OT_DSL_DIR, sizeof (dsl_dir_phys_t), tx);
 390         if (pds) {
 391                 VERIFY(0 == zap_add(mos, pds->dd_phys->dd_child_dir_zapobj,
 392                     name, sizeof (uint64_t), 1, &ddobj, tx));
 393         } else {
 394                 /* it's the root dir */
 395                 VERIFY(0 == zap_add(mos, DMU_POOL_DIRECTORY_OBJECT,
 396                     DMU_POOL_ROOT_DATASET, sizeof (uint64_t), 1, &ddobj, tx));
 397         }
 398         VERIFY(0 == dmu_bonus_hold(mos, ddobj, FTAG, &dbuf));
 399         dmu_buf_will_dirty(dbuf, tx);
 400         ddphys = dbuf->db_data;
 401 
 402         ddphys->dd_creation_time = gethrestime_sec();
 403         if (pds)
 404                 ddphys->dd_parent_obj = pds->dd_object;
 405         ddphys->dd_props_zapobj = zap_create(mos,
 406             DMU_OT_DSL_PROPS, DMU_OT_NONE, 0, tx);
 407         ddphys->dd_child_dir_zapobj = zap_create(mos,
 408             DMU_OT_DSL_DIR_CHILD_MAP, DMU_OT_NONE, 0, tx);
 409         if (spa_version(dp->dp_spa) >= SPA_VERSION_USED_BREAKDOWN)
 410                 ddphys->dd_flags |= DD_FLAG_USED_BREAKDOWN;
 411         dmu_buf_rele(dbuf, FTAG);
 412 
 413         return (ddobj);
 414 }
 415 
 416 boolean_t
 417 dsl_dir_is_clone(dsl_dir_t *dd)
 418 {
 419         return (dd->dd_phys->dd_origin_obj &&
 420             (dd->dd_pool->dp_origin_snap == NULL ||
 421             dd->dd_phys->dd_origin_obj !=
 422             dd->dd_pool->dp_origin_snap->ds_object));
 423 }
 424 
 425 void
 426 dsl_dir_stats(dsl_dir_t *dd, nvlist_t *nv)
 427 {
 428         mutex_enter(&dd->dd_lock);
 429         dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USED,
 430             dd->dd_phys->dd_used_bytes);
 431         dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_QUOTA, dd->dd_phys->dd_quota);
 432         dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_RESERVATION,
 433             dd->dd_phys->dd_reserved);
 434         dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_COMPRESSRATIO,
 435             dd->dd_phys->dd_compressed_bytes == 0 ? 100 :
 436             (dd->dd_phys->dd_uncompressed_bytes * 100 /
 437             dd->dd_phys->dd_compressed_bytes));
 438         dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_LOGICALUSED,
 439             dd->dd_phys->dd_uncompressed_bytes);
 440         if (dd->dd_phys->dd_flags & DD_FLAG_USED_BREAKDOWN) {
 441                 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USEDSNAP,
 442                     dd->dd_phys->dd_used_breakdown[DD_USED_SNAP]);
 443                 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USEDDS,
 444                     dd->dd_phys->dd_used_breakdown[DD_USED_HEAD]);
 445                 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USEDREFRESERV,
 446                     dd->dd_phys->dd_used_breakdown[DD_USED_REFRSRV]);
 447                 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USEDCHILD,
 448                     dd->dd_phys->dd_used_breakdown[DD_USED_CHILD] +
 449                     dd->dd_phys->dd_used_breakdown[DD_USED_CHILD_RSRV]);
 450         }
 451         mutex_exit(&dd->dd_lock);
 452 
 453         if (dsl_dir_is_clone(dd)) {
 454                 dsl_dataset_t *ds;
 455                 char buf[MAXNAMELEN];
 456 
 457                 VERIFY0(dsl_dataset_hold_obj(dd->dd_pool,
 458                     dd->dd_phys->dd_origin_obj, FTAG, &ds));
 459                 dsl_dataset_name(ds, buf);
 460                 dsl_dataset_rele(ds, FTAG);
 461                 dsl_prop_nvlist_add_string(nv, ZFS_PROP_ORIGIN, buf);
 462         }
 463 }
 464 
 465 void
 466 dsl_dir_dirty(dsl_dir_t *dd, dmu_tx_t *tx)
 467 {
 468         dsl_pool_t *dp = dd->dd_pool;
 469 
 470         ASSERT(dd->dd_phys);
 471 
 472         if (txg_list_add(&dp->dp_dirty_dirs, dd, tx->tx_txg)) {
 473                 /* up the hold count until we can be written out */
 474                 dmu_buf_add_ref(dd->dd_dbuf, dd);
 475         }
 476 }
 477 
 478 static int64_t
 479 parent_delta(dsl_dir_t *dd, uint64_t used, int64_t delta)
 480 {
 481         uint64_t old_accounted = MAX(used, dd->dd_phys->dd_reserved);
 482         uint64_t new_accounted = MAX(used + delta, dd->dd_phys->dd_reserved);
 483         return (new_accounted - old_accounted);
 484 }
 485 
 486 void
 487 dsl_dir_sync(dsl_dir_t *dd, dmu_tx_t *tx)
 488 {
 489         ASSERT(dmu_tx_is_syncing(tx));
 490 
 491         mutex_enter(&dd->dd_lock);
 492         ASSERT0(dd->dd_tempreserved[tx->tx_txg&TXG_MASK]);
 493         dprintf_dd(dd, "txg=%llu towrite=%lluK\n", tx->tx_txg,
 494             dd->dd_space_towrite[tx->tx_txg&TXG_MASK] / 1024);
 495         dd->dd_space_towrite[tx->tx_txg&TXG_MASK] = 0;
 496         mutex_exit(&dd->dd_lock);
 497 
 498         /* release the hold from dsl_dir_dirty */
 499         dmu_buf_rele(dd->dd_dbuf, dd);
 500 }
 501 
 502 static uint64_t
 503 dsl_dir_space_towrite(dsl_dir_t *dd)
 504 {
 505         uint64_t space = 0;
 506         int i;
 507 
 508         ASSERT(MUTEX_HELD(&dd->dd_lock));
 509 
 510         for (i = 0; i < TXG_SIZE; i++) {
 511                 space += dd->dd_space_towrite[i&TXG_MASK];
 512                 ASSERT3U(dd->dd_space_towrite[i&TXG_MASK], >=, 0);
 513         }
 514         return (space);
 515 }
 516 
 517 /*
 518  * How much space would dd have available if ancestor had delta applied
 519  * to it?  If ondiskonly is set, we're only interested in what's
 520  * on-disk, not estimated pending changes.
 521  */
 522 uint64_t
 523 dsl_dir_space_available(dsl_dir_t *dd,
 524     dsl_dir_t *ancestor, int64_t delta, int ondiskonly)
 525 {
 526         uint64_t parentspace, myspace, quota, used;
 527 
 528         /*
 529          * If there are no restrictions otherwise, assume we have
 530          * unlimited space available.
 531          */
 532         quota = UINT64_MAX;
 533         parentspace = UINT64_MAX;
 534 
 535         if (dd->dd_parent != NULL) {
 536                 parentspace = dsl_dir_space_available(dd->dd_parent,
 537                     ancestor, delta, ondiskonly);
 538         }
 539 
 540         mutex_enter(&dd->dd_lock);
 541         if (dd->dd_phys->dd_quota != 0)
 542                 quota = dd->dd_phys->dd_quota;
 543         used = dd->dd_phys->dd_used_bytes;
 544         if (!ondiskonly)
 545                 used += dsl_dir_space_towrite(dd);
 546 
 547         if (dd->dd_parent == NULL) {
 548                 uint64_t poolsize = dsl_pool_adjustedsize(dd->dd_pool, FALSE);
 549                 quota = MIN(quota, poolsize);
 550         }
 551 
 552         if (dd->dd_phys->dd_reserved > used && parentspace != UINT64_MAX) {
 553                 /*
 554                  * We have some space reserved, in addition to what our
 555                  * parent gave us.
 556                  */
 557                 parentspace += dd->dd_phys->dd_reserved - used;
 558         }
 559 
 560         if (dd == ancestor) {
 561                 ASSERT(delta <= 0);
 562                 ASSERT(used >= -delta);
 563                 used += delta;
 564                 if (parentspace != UINT64_MAX)
 565                         parentspace -= delta;
 566         }
 567 
 568         if (used > quota) {
 569                 /* over quota */
 570                 myspace = 0;
 571         } else {
 572                 /*
 573                  * the lesser of the space provided by our parent and
 574                  * the space left in our quota
 575                  */
 576                 myspace = MIN(parentspace, quota - used);
 577         }
 578 
 579         mutex_exit(&dd->dd_lock);
 580 
 581         return (myspace);
 582 }
 583 
 584 struct tempreserve {
 585         list_node_t tr_node;
 586         dsl_pool_t *tr_dp;
 587         dsl_dir_t *tr_ds;
 588         uint64_t tr_size;
 589 };
 590 
 591 static int
 592 dsl_dir_tempreserve_impl(dsl_dir_t *dd, uint64_t asize, boolean_t netfree,
 593     boolean_t ignorequota, boolean_t checkrefquota, list_t *tr_list,
 594     dmu_tx_t *tx, boolean_t first)
 595 {
 596         uint64_t txg = tx->tx_txg;
 597         uint64_t est_inflight, used_on_disk, quota, parent_rsrv;
 598         uint64_t deferred = 0;
 599         struct tempreserve *tr;
 600         int retval = EDQUOT;
 601         int txgidx = txg & TXG_MASK;
 602         int i;
 603         uint64_t ref_rsrv = 0;
 604 
 605         ASSERT3U(txg, !=, 0);
 606         ASSERT3S(asize, >, 0);
 607 
 608         mutex_enter(&dd->dd_lock);
 609 
 610         /*
 611          * Check against the dsl_dir's quota.  We don't add in the delta
 612          * when checking for over-quota because they get one free hit.
 613          */
 614         est_inflight = dsl_dir_space_towrite(dd);
 615         for (i = 0; i < TXG_SIZE; i++)
 616                 est_inflight += dd->dd_tempreserved[i];
 617         used_on_disk = dd->dd_phys->dd_used_bytes;
 618 
 619         /*
 620          * On the first iteration, fetch the dataset's used-on-disk and
 621          * refreservation values. Also, if checkrefquota is set, test if
 622          * allocating this space would exceed the dataset's refquota.
 623          */
 624         if (first && tx->tx_objset) {
 625                 int error;
 626                 dsl_dataset_t *ds = tx->tx_objset->os_dsl_dataset;
 627 
 628                 error = dsl_dataset_check_quota(ds, checkrefquota,
 629                     asize, est_inflight, &used_on_disk, &ref_rsrv);
 630                 if (error) {
 631                         mutex_exit(&dd->dd_lock);
 632                         return (error);
 633                 }
 634         }
 635 
 636         /*
 637          * If this transaction will result in a net free of space,
 638          * we want to let it through.
 639          */
 640         if (ignorequota || netfree || dd->dd_phys->dd_quota == 0)
 641                 quota = UINT64_MAX;
 642         else
 643                 quota = dd->dd_phys->dd_quota;
 644 
 645         /*
 646          * Adjust the quota against the actual pool size at the root
 647          * minus any outstanding deferred frees.
 648          * To ensure that it's possible to remove files from a full
 649          * pool without inducing transient overcommits, we throttle
 650          * netfree transactions against a quota that is slightly larger,
 651          * but still within the pool's allocation slop.  In cases where
 652          * we're very close to full, this will allow a steady trickle of
 653          * removes to get through.
 654          */
 655         if (dd->dd_parent == NULL) {
 656                 spa_t *spa = dd->dd_pool->dp_spa;
 657                 uint64_t poolsize = dsl_pool_adjustedsize(dd->dd_pool, netfree);
 658                 deferred = metaslab_class_get_deferred(spa_normal_class(spa));
 659                 if (poolsize - deferred < quota) {
 660                         quota = poolsize - deferred;
 661                         retval = ENOSPC;
 662                 }
 663         }
 664 
 665         /*
 666          * If they are requesting more space, and our current estimate
 667          * is over quota, they get to try again unless the actual
 668          * on-disk is over quota and there are no pending changes (which
 669          * may free up space for us).
 670          */
 671         if (used_on_disk + est_inflight >= quota) {
 672                 if (est_inflight > 0 || used_on_disk < quota ||
 673                     (retval == ENOSPC && used_on_disk < quota + deferred))
 674                         retval = ERESTART;
 675                 dprintf_dd(dd, "failing: used=%lluK inflight = %lluK "
 676                     "quota=%lluK tr=%lluK err=%d\n",
 677                     used_on_disk>>10, est_inflight>>10,
 678                     quota>>10, asize>>10, retval);
 679                 mutex_exit(&dd->dd_lock);
 680                 return (SET_ERROR(retval));
 681         }
 682 
 683         /* We need to up our estimated delta before dropping dd_lock */
 684         dd->dd_tempreserved[txgidx] += asize;
 685 
 686         parent_rsrv = parent_delta(dd, used_on_disk + est_inflight,
 687             asize - ref_rsrv);
 688         mutex_exit(&dd->dd_lock);
 689 
 690         tr = kmem_zalloc(sizeof (struct tempreserve), KM_SLEEP);
 691         tr->tr_ds = dd;
 692         tr->tr_size = asize;
 693         list_insert_tail(tr_list, tr);
 694 
 695         /* see if it's OK with our parent */
 696         if (dd->dd_parent && parent_rsrv) {
 697                 boolean_t ismos = (dd->dd_phys->dd_head_dataset_obj == 0);
 698 
 699                 return (dsl_dir_tempreserve_impl(dd->dd_parent,
 700                     parent_rsrv, netfree, ismos, TRUE, tr_list, tx, FALSE));
 701         } else {
 702                 return (0);
 703         }
 704 }
 705 
 706 /*
 707  * Reserve space in this dsl_dir, to be used in this tx's txg.
 708  * After the space has been dirtied (and dsl_dir_willuse_space()
 709  * has been called), the reservation should be canceled, using
 710  * dsl_dir_tempreserve_clear().
 711  */
 712 int
 713 dsl_dir_tempreserve_space(dsl_dir_t *dd, uint64_t lsize, uint64_t asize,
 714     uint64_t fsize, uint64_t usize, void **tr_cookiep, dmu_tx_t *tx)
 715 {
 716         int err;
 717         list_t *tr_list;
 718 
 719         if (asize == 0) {
 720                 *tr_cookiep = NULL;
 721                 return (0);
 722         }
 723 
 724         tr_list = kmem_alloc(sizeof (list_t), KM_SLEEP);
 725         list_create(tr_list, sizeof (struct tempreserve),
 726             offsetof(struct tempreserve, tr_node));
 727         ASSERT3S(asize, >, 0);
 728         ASSERT3S(fsize, >=, 0);
 729 
 730         err = arc_tempreserve_space(lsize, tx->tx_txg);
 731         if (err == 0) {
 732                 struct tempreserve *tr;
 733 
 734                 tr = kmem_zalloc(sizeof (struct tempreserve), KM_SLEEP);
 735                 tr->tr_size = lsize;
 736                 list_insert_tail(tr_list, tr);
 737 
 738                 err = dsl_pool_tempreserve_space(dd->dd_pool, asize, tx);
 739         } else {
 740                 if (err == EAGAIN) {
 741                         txg_delay(dd->dd_pool, tx->tx_txg,
 742                             MSEC2NSEC(10), MSEC2NSEC(10));
 743                         err = SET_ERROR(ERESTART);
 744                 }
 745                 dsl_pool_memory_pressure(dd->dd_pool);
 746         }
 747 
 748         if (err == 0) {
 749                 struct tempreserve *tr;
 750 
 751                 tr = kmem_zalloc(sizeof (struct tempreserve), KM_SLEEP);
 752                 tr->tr_dp = dd->dd_pool;
 753                 tr->tr_size = asize;
 754                 list_insert_tail(tr_list, tr);
 755 
 756                 err = dsl_dir_tempreserve_impl(dd, asize, fsize >= asize,
 757                     FALSE, asize > usize, tr_list, tx, TRUE);
 758         }
 759 
 760         if (err != 0)
 761                 dsl_dir_tempreserve_clear(tr_list, tx);
 762         else
 763                 *tr_cookiep = tr_list;
 764 
 765         return (err);
 766 }
 767 
 768 /*
 769  * Clear a temporary reservation that we previously made with
 770  * dsl_dir_tempreserve_space().
 771  */
 772 void
 773 dsl_dir_tempreserve_clear(void *tr_cookie, dmu_tx_t *tx)
 774 {
 775         int txgidx = tx->tx_txg & TXG_MASK;
 776         list_t *tr_list = tr_cookie;
 777         struct tempreserve *tr;
 778 
 779         ASSERT3U(tx->tx_txg, !=, 0);
 780 
 781         if (tr_cookie == NULL)
 782                 return;
 783 
 784         while (tr = list_head(tr_list)) {
 785                 if (tr->tr_dp) {
 786                         dsl_pool_tempreserve_clear(tr->tr_dp, tr->tr_size, tx);
 787                 } else if (tr->tr_ds) {
 788                         mutex_enter(&tr->tr_ds->dd_lock);
 789                         ASSERT3U(tr->tr_ds->dd_tempreserved[txgidx], >=,
 790                             tr->tr_size);
 791                         tr->tr_ds->dd_tempreserved[txgidx] -= tr->tr_size;
 792                         mutex_exit(&tr->tr_ds->dd_lock);
 793                 } else {
 794                         arc_tempreserve_clear(tr->tr_size);
 795                 }
 796                 list_remove(tr_list, tr);
 797                 kmem_free(tr, sizeof (struct tempreserve));
 798         }
 799 
 800         kmem_free(tr_list, sizeof (list_t));
 801 }
 802 
 803 static void
 804 dsl_dir_willuse_space_impl(dsl_dir_t *dd, int64_t space, dmu_tx_t *tx)
 805 {
 806         int64_t parent_space;
 807         uint64_t est_used;
 808 
 809         mutex_enter(&dd->dd_lock);
 810         if (space > 0)
 811                 dd->dd_space_towrite[tx->tx_txg & TXG_MASK] += space;
 812 
 813         est_used = dsl_dir_space_towrite(dd) + dd->dd_phys->dd_used_bytes;
 814         parent_space = parent_delta(dd, est_used, space);
 815         mutex_exit(&dd->dd_lock);
 816 
 817         /* Make sure that we clean up dd_space_to* */
 818         dsl_dir_dirty(dd, tx);
 819 
 820         /* XXX this is potentially expensive and unnecessary... */
 821         if (parent_space && dd->dd_parent)
 822                 dsl_dir_willuse_space_impl(dd->dd_parent, parent_space, tx);
 823 }
 824 
 825 /*
 826  * Call in open context when we think we're going to write/free space,
 827  * eg. when dirtying data.  Be conservative (ie. OK to write less than
 828  * this or free more than this, but don't write more or free less).
 829  */
 830 void
 831 dsl_dir_willuse_space(dsl_dir_t *dd, int64_t space, dmu_tx_t *tx)
 832 {
 833         dsl_pool_willuse_space(dd->dd_pool, space, tx);
 834         dsl_dir_willuse_space_impl(dd, space, tx);
 835 }
 836 
 837 /* call from syncing context when we actually write/free space for this dd */
 838 void
 839 dsl_dir_diduse_space(dsl_dir_t *dd, dd_used_t type,
 840     int64_t used, int64_t compressed, int64_t uncompressed, dmu_tx_t *tx)
 841 {
 842         int64_t accounted_delta;
 843         boolean_t needlock = !MUTEX_HELD(&dd->dd_lock);
 844 
 845         ASSERT(dmu_tx_is_syncing(tx));
 846         ASSERT(type < DD_USED_NUM);
 847 
 848         if (needlock)
 849                 mutex_enter(&dd->dd_lock);
 850         accounted_delta = parent_delta(dd, dd->dd_phys->dd_used_bytes, used);
 851         ASSERT(used >= 0 || dd->dd_phys->dd_used_bytes >= -used);
 852         ASSERT(compressed >= 0 ||
 853             dd->dd_phys->dd_compressed_bytes >= -compressed);
 854         ASSERT(uncompressed >= 0 ||
 855             dd->dd_phys->dd_uncompressed_bytes >= -uncompressed);
 856         dmu_buf_will_dirty(dd->dd_dbuf, tx);
 857         dd->dd_phys->dd_used_bytes += used;
 858         dd->dd_phys->dd_uncompressed_bytes += uncompressed;
 859         dd->dd_phys->dd_compressed_bytes += compressed;
 860 
 861         if (dd->dd_phys->dd_flags & DD_FLAG_USED_BREAKDOWN) {
 862                 ASSERT(used > 0 ||
 863                     dd->dd_phys->dd_used_breakdown[type] >= -used);
 864                 dd->dd_phys->dd_used_breakdown[type] += used;
 865 #ifdef DEBUG
 866                 dd_used_t t;
 867                 uint64_t u = 0;
 868                 for (t = 0; t < DD_USED_NUM; t++)
 869                         u += dd->dd_phys->dd_used_breakdown[t];
 870                 ASSERT3U(u, ==, dd->dd_phys->dd_used_bytes);
 871 #endif
 872         }
 873         if (needlock)
 874                 mutex_exit(&dd->dd_lock);
 875 
 876         if (dd->dd_parent != NULL) {
 877                 dsl_dir_diduse_space(dd->dd_parent, DD_USED_CHILD,
 878                     accounted_delta, compressed, uncompressed, tx);
 879                 dsl_dir_transfer_space(dd->dd_parent,
 880                     used - accounted_delta,
 881                     DD_USED_CHILD_RSRV, DD_USED_CHILD, tx);
 882         }
 883 }
 884 
 885 void
 886 dsl_dir_transfer_space(dsl_dir_t *dd, int64_t delta,
 887     dd_used_t oldtype, dd_used_t newtype, dmu_tx_t *tx)
 888 {
 889         boolean_t needlock = !MUTEX_HELD(&dd->dd_lock);
 890 
 891         ASSERT(dmu_tx_is_syncing(tx));
 892         ASSERT(oldtype < DD_USED_NUM);
 893         ASSERT(newtype < DD_USED_NUM);
 894 
 895         if (delta == 0 || !(dd->dd_phys->dd_flags & DD_FLAG_USED_BREAKDOWN))
 896                 return;
 897 
 898         if (needlock)
 899                 mutex_enter(&dd->dd_lock);
 900         ASSERT(delta > 0 ?
 901             dd->dd_phys->dd_used_breakdown[oldtype] >= delta :
 902             dd->dd_phys->dd_used_breakdown[newtype] >= -delta);
 903         ASSERT(dd->dd_phys->dd_used_bytes >= ABS(delta));
 904         dmu_buf_will_dirty(dd->dd_dbuf, tx);
 905         dd->dd_phys->dd_used_breakdown[oldtype] -= delta;
 906         dd->dd_phys->dd_used_breakdown[newtype] += delta;
 907         if (needlock)
 908                 mutex_exit(&dd->dd_lock);
 909 }
 910 
 911 typedef struct dsl_dir_set_qr_arg {
 912         const char *ddsqra_name;
 913         zprop_source_t ddsqra_source;
 914         uint64_t ddsqra_value;
 915 } dsl_dir_set_qr_arg_t;
 916 
 917 static int
 918 dsl_dir_set_quota_check(void *arg, dmu_tx_t *tx)
 919 {
 920         dsl_dir_set_qr_arg_t *ddsqra = arg;
 921         dsl_pool_t *dp = dmu_tx_pool(tx);
 922         dsl_dataset_t *ds;
 923         int error;
 924         uint64_t towrite, newval;
 925 
 926         error = dsl_dataset_hold(dp, ddsqra->ddsqra_name, FTAG, &ds);
 927         if (error != 0)
 928                 return (error);
 929 
 930         error = dsl_prop_predict(ds->ds_dir, "quota",
 931             ddsqra->ddsqra_source, ddsqra->ddsqra_value, &newval);
 932         if (error != 0) {
 933                 dsl_dataset_rele(ds, FTAG);
 934                 return (error);
 935         }
 936 
 937         if (newval == 0) {
 938                 dsl_dataset_rele(ds, FTAG);
 939                 return (0);
 940         }
 941 
 942         mutex_enter(&ds->ds_dir->dd_lock);
 943         /*
 944          * If we are doing the preliminary check in open context, and
 945          * there are pending changes, then don't fail it, since the
 946          * pending changes could under-estimate the amount of space to be
 947          * freed up.
 948          */
 949         towrite = dsl_dir_space_towrite(ds->ds_dir);
 950         if ((dmu_tx_is_syncing(tx) || towrite == 0) &&
 951             (newval < ds->ds_dir->dd_phys->dd_reserved ||
 952             newval < ds->ds_dir->dd_phys->dd_used_bytes + towrite)) {
 953                 error = SET_ERROR(ENOSPC);
 954         }
 955         mutex_exit(&ds->ds_dir->dd_lock);
 956         dsl_dataset_rele(ds, FTAG);
 957         return (error);
 958 }
 959 
 960 static void
 961 dsl_dir_set_quota_sync(void *arg, dmu_tx_t *tx)
 962 {
 963         dsl_dir_set_qr_arg_t *ddsqra = arg;
 964         dsl_pool_t *dp = dmu_tx_pool(tx);
 965         dsl_dataset_t *ds;
 966         uint64_t newval;
 967 
 968         VERIFY0(dsl_dataset_hold(dp, ddsqra->ddsqra_name, FTAG, &ds));
 969 
 970         dsl_prop_set_sync_impl(ds, zfs_prop_to_name(ZFS_PROP_QUOTA),
 971             ddsqra->ddsqra_source, sizeof (ddsqra->ddsqra_value), 1,
 972             &ddsqra->ddsqra_value, tx);
 973 
 974         VERIFY0(dsl_prop_get_int_ds(ds,
 975             zfs_prop_to_name(ZFS_PROP_QUOTA), &newval));
 976 
 977         dmu_buf_will_dirty(ds->ds_dir->dd_dbuf, tx);
 978         mutex_enter(&ds->ds_dir->dd_lock);
 979         ds->ds_dir->dd_phys->dd_quota = newval;
 980         mutex_exit(&ds->ds_dir->dd_lock);
 981         dsl_dataset_rele(ds, FTAG);
 982 }
 983 
 984 int
 985 dsl_dir_set_quota(const char *ddname, zprop_source_t source, uint64_t quota)
 986 {
 987         dsl_dir_set_qr_arg_t ddsqra;
 988 
 989         ddsqra.ddsqra_name = ddname;
 990         ddsqra.ddsqra_source = source;
 991         ddsqra.ddsqra_value = quota;
 992 
 993         return (dsl_sync_task(ddname, dsl_dir_set_quota_check,
 994             dsl_dir_set_quota_sync, &ddsqra, 0));
 995 }
 996 
 997 int
 998 dsl_dir_set_reservation_check(void *arg, dmu_tx_t *tx)
 999 {
1000         dsl_dir_set_qr_arg_t *ddsqra = arg;
1001         dsl_pool_t *dp = dmu_tx_pool(tx);
1002         dsl_dataset_t *ds;
1003         dsl_dir_t *dd;
1004         uint64_t newval, used, avail;
1005         int error;
1006 
1007         error = dsl_dataset_hold(dp, ddsqra->ddsqra_name, FTAG, &ds);
1008         if (error != 0)
1009                 return (error);
1010         dd = ds->ds_dir;
1011 
1012         /*
1013          * If we are doing the preliminary check in open context, the
1014          * space estimates may be inaccurate.
1015          */
1016         if (!dmu_tx_is_syncing(tx)) {
1017                 dsl_dataset_rele(ds, FTAG);
1018                 return (0);
1019         }
1020 
1021         error = dsl_prop_predict(ds->ds_dir,
1022             zfs_prop_to_name(ZFS_PROP_RESERVATION),
1023             ddsqra->ddsqra_source, ddsqra->ddsqra_value, &newval);
1024         if (error != 0) {
1025                 dsl_dataset_rele(ds, FTAG);
1026                 return (error);
1027         }
1028 
1029         mutex_enter(&dd->dd_lock);
1030         used = dd->dd_phys->dd_used_bytes;
1031         mutex_exit(&dd->dd_lock);
1032 
1033         if (dd->dd_parent) {
1034                 avail = dsl_dir_space_available(dd->dd_parent,
1035                     NULL, 0, FALSE);
1036         } else {
1037                 avail = dsl_pool_adjustedsize(dd->dd_pool, B_FALSE) - used;
1038         }
1039 
1040         if (MAX(used, newval) > MAX(used, dd->dd_phys->dd_reserved)) {
1041                 uint64_t delta = MAX(used, newval) -
1042                     MAX(used, dd->dd_phys->dd_reserved);
1043 
1044                 if (delta > avail ||
1045                     (dd->dd_phys->dd_quota > 0 &&
1046                     newval > dd->dd_phys->dd_quota))
1047                         error = SET_ERROR(ENOSPC);
1048         }
1049 
1050         dsl_dataset_rele(ds, FTAG);
1051         return (error);
1052 }
1053 
1054 void
1055 dsl_dir_set_reservation_sync_impl(dsl_dir_t *dd, uint64_t value, dmu_tx_t *tx)
1056 {
1057         uint64_t used;
1058         int64_t delta;
1059 
1060         dmu_buf_will_dirty(dd->dd_dbuf, tx);
1061 
1062         mutex_enter(&dd->dd_lock);
1063         used = dd->dd_phys->dd_used_bytes;
1064         delta = MAX(used, value) - MAX(used, dd->dd_phys->dd_reserved);
1065         dd->dd_phys->dd_reserved = value;
1066 
1067         if (dd->dd_parent != NULL) {
1068                 /* Roll up this additional usage into our ancestors */
1069                 dsl_dir_diduse_space(dd->dd_parent, DD_USED_CHILD_RSRV,
1070                     delta, 0, 0, tx);
1071         }
1072         mutex_exit(&dd->dd_lock);
1073 }
1074 
1075 
1076 static void
1077 dsl_dir_set_reservation_sync(void *arg, dmu_tx_t *tx)
1078 {
1079         dsl_dir_set_qr_arg_t *ddsqra = arg;
1080         dsl_pool_t *dp = dmu_tx_pool(tx);
1081         dsl_dataset_t *ds;
1082         uint64_t newval;
1083 
1084         VERIFY0(dsl_dataset_hold(dp, ddsqra->ddsqra_name, FTAG, &ds));
1085 
1086         dsl_prop_set_sync_impl(ds, zfs_prop_to_name(ZFS_PROP_RESERVATION),
1087             ddsqra->ddsqra_source, sizeof (ddsqra->ddsqra_value), 1,
1088             &ddsqra->ddsqra_value, tx);
1089 
1090         VERIFY0(dsl_prop_get_int_ds(ds,
1091             zfs_prop_to_name(ZFS_PROP_RESERVATION), &newval));
1092 
1093         dsl_dir_set_reservation_sync_impl(ds->ds_dir, newval, tx);
1094         dsl_dataset_rele(ds, FTAG);
1095 }
1096 
1097 int
1098 dsl_dir_set_reservation(const char *ddname, zprop_source_t source,
1099     uint64_t reservation)
1100 {
1101         dsl_dir_set_qr_arg_t ddsqra;
1102 
1103         ddsqra.ddsqra_name = ddname;
1104         ddsqra.ddsqra_source = source;
1105         ddsqra.ddsqra_value = reservation;
1106 
1107         return (dsl_sync_task(ddname, dsl_dir_set_reservation_check,
1108             dsl_dir_set_reservation_sync, &ddsqra, 0));
1109 }
1110 
1111 static dsl_dir_t *
1112 closest_common_ancestor(dsl_dir_t *ds1, dsl_dir_t *ds2)
1113 {
1114         for (; ds1; ds1 = ds1->dd_parent) {
1115                 dsl_dir_t *dd;
1116                 for (dd = ds2; dd; dd = dd->dd_parent) {
1117                         if (ds1 == dd)
1118                                 return (dd);
1119                 }
1120         }
1121         return (NULL);
1122 }
1123 
1124 /*
1125  * If delta is applied to dd, how much of that delta would be applied to
1126  * ancestor?  Syncing context only.
1127  */
1128 static int64_t
1129 would_change(dsl_dir_t *dd, int64_t delta, dsl_dir_t *ancestor)
1130 {
1131         if (dd == ancestor)
1132                 return (delta);
1133 
1134         mutex_enter(&dd->dd_lock);
1135         delta = parent_delta(dd, dd->dd_phys->dd_used_bytes, delta);
1136         mutex_exit(&dd->dd_lock);
1137         return (would_change(dd->dd_parent, delta, ancestor));
1138 }
1139 
1140 typedef struct dsl_dir_rename_arg {
1141         const char *ddra_oldname;
1142         const char *ddra_newname;
1143 } dsl_dir_rename_arg_t;
1144 
1145 /* ARGSUSED */
1146 static int
1147 dsl_valid_rename(dsl_pool_t *dp, dsl_dataset_t *ds, void *arg)
1148 {
1149         int *deltap = arg;
1150         char namebuf[MAXNAMELEN];
1151 
1152         dsl_dataset_name(ds, namebuf);
1153 
1154         if (strlen(namebuf) + *deltap >= MAXNAMELEN)
1155                 return (SET_ERROR(ENAMETOOLONG));
1156         return (0);
1157 }
1158 
1159 static int
1160 dsl_dir_rename_check(void *arg, dmu_tx_t *tx)
1161 {
1162         dsl_dir_rename_arg_t *ddra = arg;
1163         dsl_pool_t *dp = dmu_tx_pool(tx);
1164         dsl_dir_t *dd, *newparent;
1165         const char *mynewname;
1166         int error;
1167         int delta = strlen(ddra->ddra_newname) - strlen(ddra->ddra_oldname);
1168 
1169         /* target dir should exist */
1170         error = dsl_dir_hold(dp, ddra->ddra_oldname, FTAG, &dd, NULL);
1171         if (error != 0)
1172                 return (error);
1173 
1174         /* new parent should exist */
1175         error = dsl_dir_hold(dp, ddra->ddra_newname, FTAG,
1176             &newparent, &mynewname);
1177         if (error != 0) {
1178                 dsl_dir_rele(dd, FTAG);
1179                 return (error);
1180         }
1181 
1182         /* can't rename to different pool */
1183         if (dd->dd_pool != newparent->dd_pool) {
1184                 dsl_dir_rele(newparent, FTAG);
1185                 dsl_dir_rele(dd, FTAG);
1186                 return (SET_ERROR(ENXIO));
1187         }
1188 
1189         /* new name should not already exist */
1190         if (mynewname == NULL) {
1191                 dsl_dir_rele(newparent, FTAG);
1192                 dsl_dir_rele(dd, FTAG);
1193                 return (SET_ERROR(EEXIST));
1194         }
1195 
1196         /* if the name length is growing, validate child name lengths */
1197         if (delta > 0) {
1198                 error = dmu_objset_find_dp(dp, dd->dd_object, dsl_valid_rename,
1199                     &delta, DS_FIND_CHILDREN | DS_FIND_SNAPSHOTS);
1200                 if (error != 0) {
1201                         dsl_dir_rele(newparent, FTAG);
1202                         dsl_dir_rele(dd, FTAG);
1203                         return (error);
1204                 }
1205         }
1206 
1207         if (newparent != dd->dd_parent) {
1208                 /* is there enough space? */
1209                 uint64_t myspace =
1210                     MAX(dd->dd_phys->dd_used_bytes, dd->dd_phys->dd_reserved);
1211 
1212                 /* no rename into our descendant */
1213                 if (closest_common_ancestor(dd, newparent) == dd) {
1214                         dsl_dir_rele(newparent, FTAG);
1215                         dsl_dir_rele(dd, FTAG);
1216                         return (SET_ERROR(EINVAL));
1217                 }
1218 
1219                 error = dsl_dir_transfer_possible(dd->dd_parent,
1220                     newparent, myspace);
1221                 if (error != 0) {
1222                         dsl_dir_rele(newparent, FTAG);
1223                         dsl_dir_rele(dd, FTAG);
1224                         return (error);
1225                 }
1226         }
1227 
1228         dsl_dir_rele(newparent, FTAG);
1229         dsl_dir_rele(dd, FTAG);
1230         return (0);
1231 }
1232 
1233 static void
1234 dsl_dir_rename_sync(void *arg, dmu_tx_t *tx)
1235 {
1236         dsl_dir_rename_arg_t *ddra = arg;
1237         dsl_pool_t *dp = dmu_tx_pool(tx);
1238         dsl_dir_t *dd, *newparent;
1239         const char *mynewname;
1240         int error;
1241         objset_t *mos = dp->dp_meta_objset;
1242 
1243         VERIFY0(dsl_dir_hold(dp, ddra->ddra_oldname, FTAG, &dd, NULL));
1244         VERIFY0(dsl_dir_hold(dp, ddra->ddra_newname, FTAG, &newparent,
1245             &mynewname));
1246 
1247         /* Log this before we change the name. */
1248         spa_history_log_internal_dd(dd, "rename", tx,
1249             "-> %s", ddra->ddra_newname);
1250 
1251         if (newparent != dd->dd_parent) {
1252                 dsl_dir_diduse_space(dd->dd_parent, DD_USED_CHILD,
1253                     -dd->dd_phys->dd_used_bytes,
1254                     -dd->dd_phys->dd_compressed_bytes,
1255                     -dd->dd_phys->dd_uncompressed_bytes, tx);
1256                 dsl_dir_diduse_space(newparent, DD_USED_CHILD,
1257                     dd->dd_phys->dd_used_bytes,
1258                     dd->dd_phys->dd_compressed_bytes,
1259                     dd->dd_phys->dd_uncompressed_bytes, tx);
1260 
1261                 if (dd->dd_phys->dd_reserved > dd->dd_phys->dd_used_bytes) {
1262                         uint64_t unused_rsrv = dd->dd_phys->dd_reserved -
1263                             dd->dd_phys->dd_used_bytes;
1264 
1265                         dsl_dir_diduse_space(dd->dd_parent, DD_USED_CHILD_RSRV,
1266                             -unused_rsrv, 0, 0, tx);
1267                         dsl_dir_diduse_space(newparent, DD_USED_CHILD_RSRV,
1268                             unused_rsrv, 0, 0, tx);
1269                 }
1270         }
1271 
1272         dmu_buf_will_dirty(dd->dd_dbuf, tx);
1273 
1274         /* remove from old parent zapobj */
1275         error = zap_remove(mos, dd->dd_parent->dd_phys->dd_child_dir_zapobj,
1276             dd->dd_myname, tx);
1277         ASSERT0(error);
1278 
1279         (void) strcpy(dd->dd_myname, mynewname);
1280         dsl_dir_rele(dd->dd_parent, dd);
1281         dd->dd_phys->dd_parent_obj = newparent->dd_object;
1282         VERIFY0(dsl_dir_hold_obj(dp,
1283             newparent->dd_object, NULL, dd, &dd->dd_parent));
1284 
1285         /* add to new parent zapobj */
1286         VERIFY0(zap_add(mos, newparent->dd_phys->dd_child_dir_zapobj,
1287             dd->dd_myname, 8, 1, &dd->dd_object, tx));
1288 
1289         dsl_prop_notify_all(dd);
1290 
1291         dsl_dir_rele(newparent, FTAG);
1292         dsl_dir_rele(dd, FTAG);
1293 }
1294 
1295 int
1296 dsl_dir_rename(const char *oldname, const char *newname)
1297 {
1298         dsl_dir_rename_arg_t ddra;
1299 
1300         ddra.ddra_oldname = oldname;
1301         ddra.ddra_newname = newname;
1302 
1303         return (dsl_sync_task(oldname,
1304             dsl_dir_rename_check, dsl_dir_rename_sync, &ddra, 3));
1305 }
1306 
1307 int
1308 dsl_dir_transfer_possible(dsl_dir_t *sdd, dsl_dir_t *tdd, uint64_t space)
1309 {
1310         dsl_dir_t *ancestor;
1311         int64_t adelta;
1312         uint64_t avail;
1313 
1314         ancestor = closest_common_ancestor(sdd, tdd);
1315         adelta = would_change(sdd, -space, ancestor);
1316         avail = dsl_dir_space_available(tdd, ancestor, adelta, FALSE);
1317         if (avail < space)
1318                 return (SET_ERROR(ENOSPC));
1319 
1320         return (0);
1321 }
1322 
1323 timestruc_t
1324 dsl_dir_snap_cmtime(dsl_dir_t *dd)
1325 {
1326         timestruc_t t;
1327 
1328         mutex_enter(&dd->dd_lock);
1329         t = dd->dd_snap_cmtime;
1330         mutex_exit(&dd->dd_lock);
1331 
1332         return (t);
1333 }
1334 
1335 void
1336 dsl_dir_snap_cmtime_update(dsl_dir_t *dd)
1337 {
1338         timestruc_t t;
1339 
1340         gethrestime(&t);
1341         mutex_enter(&dd->dd_lock);
1342         dd->dd_snap_cmtime = t;
1343         mutex_exit(&dd->dd_lock);
1344 }