illumos-gate Wdiff usr/src/uts/common/fs/zfs/dsl_dir.c

Print this page

5610 zfs clone from different source and target pools produces coredump

Split	Close
Expand all
Collapse all

          --- old/usr/src/uts/common/fs/zfs/dsl_dir.c
          +++ new/usr/src/uts/common/fs/zfs/dsl_dir.c

   1    1  /*
   2    2   * CDDL HEADER START
   3    3   *
   4    4   * The contents of this file are subject to the terms of the
   5    5   * Common Development and Distribution License (the "License").
   6    6   * You may not use this file except in compliance with the License.
   7    7   *
   8    8   * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9    9   * or http://www.opensolaris.org/os/licensing.
  10   10   * See the License for the specific language governing permissions
  11   11   * and limitations under the License.
  12   12   *
  13   13   * When distributing Covered Code, include this CDDL HEADER in each
  14   14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying

↓ open down ↓

16 lines elided

↑ open up ↑

  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  /*
  22   22   * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  23   23   * Copyright (c) 2012, 2014 by Delphix. All rights reserved.
  24   24   * Copyright (c) 2013 Martin Matuska. All rights reserved.
  25   25   * Copyright (c) 2014 Joyent, Inc. All rights reserved.
  26   26   * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
       27 + * Copyright 2015 Nexenta Systems, Inc. All rights reserved.
  27   28   */
  28   29  
  29   30  #include <sys/dmu.h>
  30   31  #include <sys/dmu_objset.h>
  31   32  #include <sys/dmu_tx.h>
  32   33  #include <sys/dsl_dataset.h>
  33   34  #include <sys/dsl_dir.h>
  34   35  #include <sys/dsl_prop.h>
  35   36  #include <sys/dsl_synctask.h>
  36   37  #include <sys/dsl_deleg.h>

  37   38  #include <sys/dmu_impl.h>
  38   39  #include <sys/spa.h>
  39   40  #include <sys/metaslab.h>
  40   41  #include <sys/zap.h>
  41   42  #include <sys/zio.h>
  42   43  #include <sys/arc.h>
  43   44  #include <sys/sunddi.h>
  44   45  #include <sys/zfeature.h>
  45   46  #include <sys/policy.h>
  46   47  #include <sys/zfs_znode.h>
  47   48  #include "zfs_namecheck.h"
  48   49  #include "zfs_prop.h"
  49   50  
  50   51  /*
  51   52   * Filesystem and Snapshot Limits
  52   53   * ------------------------------
  53   54   *
  54   55   * These limits are used to restrict the number of filesystems and/or snapshots
  55   56   * that can be created at a given level in the tree or below. A typical
  56   57   * use-case is with a delegated dataset where the administrator wants to ensure
  57   58   * that a user within the zone is not creating too many additional filesystems
  58   59   * or snapshots, even though they're not exceeding their space quota.
  59   60   *
  60   61   * The filesystem and snapshot counts are stored as extensible properties. This
  61   62   * capability is controlled by a feature flag and must be enabled to be used.
  62   63   * Once enabled, the feature is not active until the first limit is set. At
  63   64   * that point, future operations to create/destroy filesystems or snapshots
  64   65   * will validate and update the counts.
  65   66   *
  66   67   * Because the count properties will not exist before the feature is active,
  67   68   * the counts are updated when a limit is first set on an uninitialized
  68   69   * dsl_dir node in the tree (The filesystem/snapshot count on a node includes
  69   70   * all of the nested filesystems/snapshots. Thus, a new leaf node has a
  70   71   * filesystem count of 0 and a snapshot count of 0. Non-existent filesystem and
  71   72   * snapshot count properties on a node indicate uninitialized counts on that
  72   73   * node.) When first setting a limit on an uninitialized node, the code starts
  73   74   * at the filesystem with the new limit and descends into all sub-filesystems
  74   75   * to add the count properties.
  75   76   *
  76   77   * In practice this is lightweight since a limit is typically set when the
  77   78   * filesystem is created and thus has no children. Once valid, changing the
  78   79   * limit value won't require a re-traversal since the counts are already valid.
  79   80   * When recursively fixing the counts, if a node with a limit is encountered
  80   81   * during the descent, the counts are known to be valid and there is no need to
  81   82   * descend into that filesystem's children. The counts on filesystems above the
  82   83   * one with the new limit will still be uninitialized, unless a limit is
  83   84   * eventually set on one of those filesystems. The counts are always recursively
  84   85   * updated when a limit is set on a dataset, unless there is already a limit.
  85   86   * When a new limit value is set on a filesystem with an existing limit, it is
  86   87   * possible for the new limit to be less than the current count at that level
  87   88   * since a user who can change the limit is also allowed to exceed the limit.
  88   89   *
  89   90   * Once the feature is active, then whenever a filesystem or snapshot is
  90   91   * created, the code recurses up the tree, validating the new count against the
  91   92   * limit at each initialized level. In practice, most levels will not have a
  92   93   * limit set. If there is a limit at any initialized level up the tree, the
  93   94   * check must pass or the creation will fail. Likewise, when a filesystem or
  94   95   * snapshot is destroyed, the counts are recursively adjusted all the way up
  95   96   * the initizized nodes in the tree. Renaming a filesystem into different point
  96   97   * in the tree will first validate, then update the counts on each branch up to
  97   98   * the common ancestor. A receive will also validate the counts and then update
  98   99   * them.
  99  100   *
 100  101   * An exception to the above behavior is that the limit is not enforced if the
 101  102   * user has permission to modify the limit. This is primarily so that
 102  103   * recursive snapshots in the global zone always work. We want to prevent a
 103  104   * denial-of-service in which a lower level delegated dataset could max out its
 104  105   * limit and thus block recursive snapshots from being taken in the global zone.
 105  106   * Because of this, it is possible for the snapshot count to be over the limit
 106  107   * and snapshots taken in the global zone could cause a lower level dataset to
 107  108   * hit or exceed its limit. The administrator taking the global zone recursive
 108  109   * snapshot should be aware of this side-effect and behave accordingly.
 109  110   * For consistency, the filesystem limit is also not enforced if the user can
 110  111   * modify the limit.
 111  112   *
 112  113   * The filesystem and snapshot limits are validated by dsl_fs_ss_limit_check()
 113  114   * and updated by dsl_fs_ss_count_adjust(). A new limit value is setup in
 114  115   * dsl_dir_activate_fs_ss_limit() and the counts are adjusted, if necessary, by
 115  116   * dsl_dir_init_fs_ss_count().
 116  117   *
 117  118   * There is a special case when we receive a filesystem that already exists. In
 118  119   * this case a temporary clone name of %X is created (see dmu_recv_begin). We
 119  120   * never update the filesystem counts for temporary clones.
 120  121   *
 121  122   * Likewise, we do not update the snapshot counts for temporary snapshots,
 122  123   * such as those created by zfs diff.
 123  124   */
 124  125  
 125  126  extern inline dsl_dir_phys_t *dsl_dir_phys(dsl_dir_t *dd);
 126  127  
 127  128  static uint64_t dsl_dir_space_towrite(dsl_dir_t *dd);
 128  129  
 129  130  static void
 130  131  dsl_dir_evict(void *dbu)
 131  132  {
 132  133          dsl_dir_t *dd = dbu;
 133  134          dsl_pool_t *dp = dd->dd_pool;
 134  135          int t;
 135  136  
 136  137          dd->dd_dbuf = NULL;
 137  138  
 138  139          for (t = 0; t < TXG_SIZE; t++) {
 139  140                  ASSERT(!txg_list_member(&dp->dp_dirty_dirs, dd, t));
 140  141                  ASSERT(dd->dd_tempreserved[t] == 0);
 141  142                  ASSERT(dd->dd_space_towrite[t] == 0);
 142  143          }
 143  144  
 144  145          if (dd->dd_parent)
 145  146                  dsl_dir_async_rele(dd->dd_parent, dd);
 146  147  
 147  148          spa_async_close(dd->dd_pool->dp_spa, dd);
 148  149  
 149  150          /*
 150  151           * The props callback list should have been cleaned up by
 151  152           * objset_evict().
 152  153           */
 153  154          list_destroy(&dd->dd_prop_cbs);
 154  155          mutex_destroy(&dd->dd_lock);
 155  156          kmem_free(dd, sizeof (dsl_dir_t));
 156  157  }
 157  158  
 158  159  int
 159  160  dsl_dir_hold_obj(dsl_pool_t *dp, uint64_t ddobj,
 160  161      const char *tail, void *tag, dsl_dir_t **ddp)
 161  162  {
 162  163          dmu_buf_t *dbuf;
 163  164          dsl_dir_t *dd;
 164  165          int err;
 165  166  
 166  167          ASSERT(dsl_pool_config_held(dp));
 167  168  
 168  169          err = dmu_bonus_hold(dp->dp_meta_objset, ddobj, tag, &dbuf);
 169  170          if (err != 0)
 170  171                  return (err);
 171  172          dd = dmu_buf_get_user(dbuf);
 172  173  #ifdef ZFS_DEBUG
 173  174          {
 174  175                  dmu_object_info_t doi;
 175  176                  dmu_object_info_from_db(dbuf, &doi);
 176  177                  ASSERT3U(doi.doi_bonus_type, ==, DMU_OT_DSL_DIR);
 177  178                  ASSERT3U(doi.doi_bonus_size, >=, sizeof (dsl_dir_phys_t));
 178  179          }
 179  180  #endif
 180  181          if (dd == NULL) {
 181  182                  dsl_dir_t *winner;
 182  183  
 183  184                  dd = kmem_zalloc(sizeof (dsl_dir_t), KM_SLEEP);
 184  185                  dd->dd_object = ddobj;
 185  186                  dd->dd_dbuf = dbuf;
 186  187                  dd->dd_pool = dp;
 187  188                  mutex_init(&dd->dd_lock, NULL, MUTEX_DEFAULT, NULL);
 188  189  
 189  190                  list_create(&dd->dd_prop_cbs, sizeof (dsl_prop_cb_record_t),
 190  191                      offsetof(dsl_prop_cb_record_t, cbr_node));
 191  192  
 192  193                  dsl_dir_snap_cmtime_update(dd);
 193  194  
 194  195                  if (dsl_dir_phys(dd)->dd_parent_obj) {
 195  196                          err = dsl_dir_hold_obj(dp,
 196  197                              dsl_dir_phys(dd)->dd_parent_obj, NULL, dd,
 197  198                              &dd->dd_parent);
 198  199                          if (err != 0)
 199  200                                  goto errout;
 200  201                          if (tail) {
 201  202  #ifdef ZFS_DEBUG
 202  203                                  uint64_t foundobj;
 203  204  
 204  205                                  err = zap_lookup(dp->dp_meta_objset,
 205  206                                      dsl_dir_phys(dd->dd_parent)->
 206  207                                      dd_child_dir_zapobj, tail,
 207  208                                      sizeof (foundobj), 1, &foundobj);
 208  209                                  ASSERT(err || foundobj == ddobj);
 209  210  #endif
 210  211                                  (void) strcpy(dd->dd_myname, tail);
 211  212                          } else {
 212  213                                  err = zap_value_search(dp->dp_meta_objset,
 213  214                                      dsl_dir_phys(dd->dd_parent)->
 214  215                                      dd_child_dir_zapobj,
 215  216                                      ddobj, 0, dd->dd_myname);
 216  217                          }
 217  218                          if (err != 0)
 218  219                                  goto errout;
 219  220                  } else {
 220  221                          (void) strcpy(dd->dd_myname, spa_name(dp->dp_spa));
 221  222                  }
 222  223  
 223  224                  if (dsl_dir_is_clone(dd)) {
 224  225                          dmu_buf_t *origin_bonus;
 225  226                          dsl_dataset_phys_t *origin_phys;
 226  227  
 227  228                          /*
 228  229                           * We can't open the origin dataset, because
 229  230                           * that would require opening this dsl_dir.
 230  231                           * Just look at its phys directly instead.
 231  232                           */
 232  233                          err = dmu_bonus_hold(dp->dp_meta_objset,
 233  234                              dsl_dir_phys(dd)->dd_origin_obj, FTAG,
 234  235                              &origin_bonus);
 235  236                          if (err != 0)
 236  237                                  goto errout;
 237  238                          origin_phys = origin_bonus->db_data;
 238  239                          dd->dd_origin_txg =
 239  240                              origin_phys->ds_creation_txg;
 240  241                          dmu_buf_rele(origin_bonus, FTAG);
 241  242                  }
 242  243  
 243  244                  dmu_buf_init_user(&dd->dd_dbu, dsl_dir_evict, &dd->dd_dbuf);
 244  245                  winner = dmu_buf_set_user_ie(dbuf, &dd->dd_dbu);
 245  246                  if (winner != NULL) {
 246  247                          if (dd->dd_parent)
 247  248                                  dsl_dir_rele(dd->dd_parent, dd);
 248  249                          mutex_destroy(&dd->dd_lock);
 249  250                          kmem_free(dd, sizeof (dsl_dir_t));
 250  251                          dd = winner;
 251  252                  } else {
 252  253                          spa_open_ref(dp->dp_spa, dd);
 253  254                  }
 254  255          }
 255  256  
 256  257          /*
 257  258           * The dsl_dir_t has both open-to-close and instantiate-to-evict
 258  259           * holds on the spa.  We need the open-to-close holds because
 259  260           * otherwise the spa_refcnt wouldn't change when we open a
 260  261           * dir which the spa also has open, so we could incorrectly
 261  262           * think it was OK to unload/export/destroy the pool.  We need
 262  263           * the instantiate-to-evict hold because the dsl_dir_t has a
 263  264           * pointer to the dd_pool, which has a pointer to the spa_t.
 264  265           */
 265  266          spa_open_ref(dp->dp_spa, tag);
 266  267          ASSERT3P(dd->dd_pool, ==, dp);
 267  268          ASSERT3U(dd->dd_object, ==, ddobj);
 268  269          ASSERT3P(dd->dd_dbuf, ==, dbuf);
 269  270          *ddp = dd;
 270  271          return (0);
 271  272  
 272  273  errout:
 273  274          if (dd->dd_parent)
 274  275                  dsl_dir_rele(dd->dd_parent, dd);
 275  276          mutex_destroy(&dd->dd_lock);
 276  277          kmem_free(dd, sizeof (dsl_dir_t));
 277  278          dmu_buf_rele(dbuf, tag);
 278  279          return (err);
 279  280  }
 280  281  
 281  282  void
 282  283  dsl_dir_rele(dsl_dir_t *dd, void *tag)
 283  284  {
 284  285          dprintf_dd(dd, "%s\n", "");
 285  286          spa_close(dd->dd_pool->dp_spa, tag);
 286  287          dmu_buf_rele(dd->dd_dbuf, tag);
 287  288  }
 288  289  
 289  290  /*
 290  291   * Remove a reference to the given dsl dir that is being asynchronously
 291  292   * released.  Async releases occur from a taskq performing eviction of
 292  293   * dsl datasets and dirs.  This process is identical to a normal release
 293  294   * with the exception of using the async API for releasing the reference on
 294  295   * the spa.
 295  296   */
 296  297  void
 297  298  dsl_dir_async_rele(dsl_dir_t *dd, void *tag)
 298  299  {
 299  300          dprintf_dd(dd, "%s\n", "");
 300  301          spa_async_close(dd->dd_pool->dp_spa, tag);
 301  302          dmu_buf_rele(dd->dd_dbuf, tag);
 302  303  }
 303  304  
 304  305  /* buf must be long enough (MAXNAMELEN + strlen(MOS_DIR_NAME) + 1 should do) */
 305  306  void
 306  307  dsl_dir_name(dsl_dir_t *dd, char *buf)
 307  308  {
 308  309          if (dd->dd_parent) {
 309  310                  dsl_dir_name(dd->dd_parent, buf);
 310  311                  (void) strcat(buf, "/");
 311  312          } else {
 312  313                  buf[0] = '\0';
 313  314          }
 314  315          if (!MUTEX_HELD(&dd->dd_lock)) {
 315  316                  /*
 316  317                   * recursive mutex so that we can use
 317  318                   * dprintf_dd() with dd_lock held
 318  319                   */
 319  320                  mutex_enter(&dd->dd_lock);
 320  321                  (void) strcat(buf, dd->dd_myname);
 321  322                  mutex_exit(&dd->dd_lock);
 322  323          } else {
 323  324                  (void) strcat(buf, dd->dd_myname);
 324  325          }
 325  326  }
 326  327  
 327  328  /* Calculate name length, avoiding all the strcat calls of dsl_dir_name */
 328  329  int
 329  330  dsl_dir_namelen(dsl_dir_t *dd)
 330  331  {
 331  332          int result = 0;
 332  333  
 333  334          if (dd->dd_parent) {
 334  335                  /* parent's name + 1 for the "/" */
 335  336                  result = dsl_dir_namelen(dd->dd_parent) + 1;
 336  337          }
 337  338  
 338  339          if (!MUTEX_HELD(&dd->dd_lock)) {
 339  340                  /* see dsl_dir_name */
 340  341                  mutex_enter(&dd->dd_lock);
 341  342                  result += strlen(dd->dd_myname);
 342  343                  mutex_exit(&dd->dd_lock);
 343  344          } else {
 344  345                  result += strlen(dd->dd_myname);
 345  346          }
 346  347  
 347  348          return (result);
 348  349  }
 349  350  
 350  351  static int
 351  352  getcomponent(const char *path, char *component, const char **nextp)
 352  353  {
 353  354          char *p;
 354  355  
 355  356          if ((path == NULL) || (path[0] == '\0'))
 356  357                  return (SET_ERROR(ENOENT));
 357  358          /* This would be a good place to reserve some namespace... */
 358  359          p = strpbrk(path, "/@");
 359  360          if (p && (p[1] == '/' || p[1] == '@')) {
 360  361                  /* two separators in a row */
 361  362                  return (SET_ERROR(EINVAL));
 362  363          }
 363  364          if (p == NULL || p == path) {
 364  365                  /*
 365  366                   * if the first thing is an @ or /, it had better be an
 366  367                   * @ and it had better not have any more ats or slashes,
 367  368                   * and it had better have something after the @.
 368  369                   */
 369  370                  if (p != NULL &&
 370  371                      (p[0] != '@' || strpbrk(path+1, "/@") || p[1] == '\0'))
 371  372                          return (SET_ERROR(EINVAL));
 372  373                  if (strlen(path) >= MAXNAMELEN)
 373  374                          return (SET_ERROR(ENAMETOOLONG));
 374  375                  (void) strcpy(component, path);
 375  376                  p = NULL;
 376  377          } else if (p[0] == '/') {
 377  378                  if (p - path >= MAXNAMELEN)
 378  379                          return (SET_ERROR(ENAMETOOLONG));
 379  380                  (void) strncpy(component, path, p - path);
 380  381                  component[p - path] = '\0';
 381  382                  p++;
 382  383          } else if (p[0] == '@') {
 383  384                  /*
 384  385                   * if the next separator is an @, there better not be
 385  386                   * any more slashes.
 386  387                   */
 387  388                  if (strchr(path, '/'))
 388  389                          return (SET_ERROR(EINVAL));
 389  390                  if (p - path >= MAXNAMELEN)
 390  391                          return (SET_ERROR(ENAMETOOLONG));
 391  392                  (void) strncpy(component, path, p - path);
 392  393                  component[p - path] = '\0';
 393  394          } else {
 394  395                  panic("invalid p=%p", (void *)p);
 395  396          }
 396  397          *nextp = p;
 397  398          return (0);
 398  399  }
 399  400  
 400  401  /*
 401  402   * Return the dsl_dir_t, and possibly the last component which couldn't
 402  403   * be found in *tail.  The name must be in the specified dsl_pool_t.  This
 403  404   * thread must hold the dp_config_rwlock for the pool.  Returns NULL if the
 404  405   * path is bogus, or if tail==NULL and we couldn't parse the whole name.
 405  406   * (*tail)[0] == '@' means that the last component is a snapshot.
 406  407   */
 407  408  int
 408  409  dsl_dir_hold(dsl_pool_t *dp, const char *name, void *tag,
 409  410      dsl_dir_t **ddp, const char **tailp)
 410  411  {
 411  412          char buf[MAXNAMELEN];
 412  413          const char *spaname, *next, *nextnext = NULL;
 413  414          int err;

↓ open down ↓

377 lines elided

↑ open up ↑

 414  415          dsl_dir_t *dd;
 415  416          uint64_t ddobj;
 416  417  
 417  418          err = getcomponent(name, buf, &next);
 418  419          if (err != 0)
 419  420                  return (err);
 420  421  
 421  422          /* Make sure the name is in the specified pool. */
 422  423          spaname = spa_name(dp->dp_spa);
 423  424          if (strcmp(buf, spaname) != 0)
 424      -                return (SET_ERROR(EINVAL));
      425 +                return (SET_ERROR(EXDEV));
 425  426  
 426  427          ASSERT(dsl_pool_config_held(dp));
 427  428  
 428  429          err = dsl_dir_hold_obj(dp, dp->dp_root_dir_obj, NULL, tag, &dd);
 429  430          if (err != 0) {
 430  431                  return (err);
 431  432          }
 432  433  
 433  434          while (next != NULL) {
 434  435                  dsl_dir_t *child_dd;

 435  436                  err = getcomponent(next, buf, &nextnext);
 436  437                  if (err != 0)
 437  438                          break;
 438  439                  ASSERT(next[0] != '\0');
 439  440                  if (next[0] == '@')
 440  441                          break;
 441  442                  dprintf("looking up %s in obj%lld\n",
 442  443                      buf, dsl_dir_phys(dd)->dd_child_dir_zapobj);
 443  444  
 444  445                  err = zap_lookup(dp->dp_meta_objset,
 445  446                      dsl_dir_phys(dd)->dd_child_dir_zapobj,
 446  447                      buf, sizeof (ddobj), 1, &ddobj);
 447  448                  if (err != 0) {
 448  449                          if (err == ENOENT)
 449  450                                  err = 0;
 450  451                          break;
 451  452                  }
 452  453  
 453  454                  err = dsl_dir_hold_obj(dp, ddobj, buf, tag, &child_dd);
 454  455                  if (err != 0)
 455  456                          break;
 456  457                  dsl_dir_rele(dd, tag);
 457  458                  dd = child_dd;
 458  459                  next = nextnext;
 459  460          }
 460  461  
 461  462          if (err != 0) {
 462  463                  dsl_dir_rele(dd, tag);
 463  464                  return (err);
 464  465          }
 465  466  
 466  467          /*
 467  468           * It's an error if there's more than one component left, or
 468  469           * tailp==NULL and there's any component left.
 469  470           */
 470  471          if (next != NULL &&
 471  472              (tailp == NULL || (nextnext && nextnext[0] != '\0'))) {
 472  473                  /* bad path name */
 473  474                  dsl_dir_rele(dd, tag);
 474  475                  dprintf("next=%p (%s) tail=%p\n", next, next?next:"", tailp);
 475  476                  err = SET_ERROR(ENOENT);
 476  477          }
 477  478          if (tailp != NULL)
 478  479                  *tailp = next;
 479  480          *ddp = dd;
 480  481          return (err);
 481  482  }
 482  483  
 483  484  /*
 484  485   * If the counts are already initialized for this filesystem and its
 485  486   * descendants then do nothing, otherwise initialize the counts.
 486  487   *
 487  488   * The counts on this filesystem, and those below, may be uninitialized due to
 488  489   * either the use of a pre-existing pool which did not support the
 489  490   * filesystem/snapshot limit feature, or one in which the feature had not yet
 490  491   * been enabled.
 491  492   *
 492  493   * Recursively descend the filesystem tree and update the filesystem/snapshot
 493  494   * counts on each filesystem below, then update the cumulative count on the
 494  495   * current filesystem. If the filesystem already has a count set on it,
 495  496   * then we know that its counts, and the counts on the filesystems below it,
 496  497   * are already correct, so we don't have to update this filesystem.
 497  498   */
 498  499  static void
 499  500  dsl_dir_init_fs_ss_count(dsl_dir_t *dd, dmu_tx_t *tx)
 500  501  {
 501  502          uint64_t my_fs_cnt = 0;
 502  503          uint64_t my_ss_cnt = 0;
 503  504          dsl_pool_t *dp = dd->dd_pool;
 504  505          objset_t *os = dp->dp_meta_objset;
 505  506          zap_cursor_t *zc;
 506  507          zap_attribute_t *za;
 507  508          dsl_dataset_t *ds;
 508  509  
 509  510          ASSERT(spa_feature_is_active(dp->dp_spa, SPA_FEATURE_FS_SS_LIMIT));
 510  511          ASSERT(dsl_pool_config_held(dp));
 511  512          ASSERT(dmu_tx_is_syncing(tx));
 512  513  
 513  514          dsl_dir_zapify(dd, tx);
 514  515  
 515  516          /*
 516  517           * If the filesystem count has already been initialized then we
 517  518           * don't need to recurse down any further.
 518  519           */
 519  520          if (zap_contains(os, dd->dd_object, DD_FIELD_FILESYSTEM_COUNT) == 0)
 520  521                  return;
 521  522  
 522  523          zc = kmem_alloc(sizeof (zap_cursor_t), KM_SLEEP);
 523  524          za = kmem_alloc(sizeof (zap_attribute_t), KM_SLEEP);
 524  525  
 525  526          /* Iterate my child dirs */
 526  527          for (zap_cursor_init(zc, os, dsl_dir_phys(dd)->dd_child_dir_zapobj);
 527  528              zap_cursor_retrieve(zc, za) == 0; zap_cursor_advance(zc)) {
 528  529                  dsl_dir_t *chld_dd;
 529  530                  uint64_t count;
 530  531  
 531  532                  VERIFY0(dsl_dir_hold_obj(dp, za->za_first_integer, NULL, FTAG,
 532  533                      &chld_dd));
 533  534  
 534  535                  /*
 535  536                   * Ignore hidden ($FREE, $MOS & $ORIGIN) objsets and
 536  537                   * temporary datasets.
 537  538                   */
 538  539                  if (chld_dd->dd_myname[0] == '$' ||
 539  540                      chld_dd->dd_myname[0] == '%') {
 540  541                          dsl_dir_rele(chld_dd, FTAG);
 541  542                          continue;
 542  543                  }
 543  544  
 544  545                  my_fs_cnt++;    /* count this child */
 545  546  
 546  547                  dsl_dir_init_fs_ss_count(chld_dd, tx);
 547  548  
 548  549                  VERIFY0(zap_lookup(os, chld_dd->dd_object,
 549  550                      DD_FIELD_FILESYSTEM_COUNT, sizeof (count), 1, &count));
 550  551                  my_fs_cnt += count;
 551  552                  VERIFY0(zap_lookup(os, chld_dd->dd_object,
 552  553                      DD_FIELD_SNAPSHOT_COUNT, sizeof (count), 1, &count));
 553  554                  my_ss_cnt += count;
 554  555  
 555  556                  dsl_dir_rele(chld_dd, FTAG);
 556  557          }
 557  558          zap_cursor_fini(zc);
 558  559          /* Count my snapshots (we counted children's snapshots above) */
 559  560          VERIFY0(dsl_dataset_hold_obj(dd->dd_pool,
 560  561              dsl_dir_phys(dd)->dd_head_dataset_obj, FTAG, &ds));
 561  562  
 562  563          for (zap_cursor_init(zc, os, dsl_dataset_phys(ds)->ds_snapnames_zapobj);
 563  564              zap_cursor_retrieve(zc, za) == 0;
 564  565              zap_cursor_advance(zc)) {
 565  566                  /* Don't count temporary snapshots */
 566  567                  if (za->za_name[0] != '%')
 567  568                          my_ss_cnt++;
 568  569          }
 569  570          zap_cursor_fini(zc);
 570  571  
 571  572          dsl_dataset_rele(ds, FTAG);
 572  573  
 573  574          kmem_free(zc, sizeof (zap_cursor_t));
 574  575          kmem_free(za, sizeof (zap_attribute_t));
 575  576  
 576  577          /* we're in a sync task, update counts */
 577  578          dmu_buf_will_dirty(dd->dd_dbuf, tx);
 578  579          VERIFY0(zap_add(os, dd->dd_object, DD_FIELD_FILESYSTEM_COUNT,
 579  580              sizeof (my_fs_cnt), 1, &my_fs_cnt, tx));
 580  581          VERIFY0(zap_add(os, dd->dd_object, DD_FIELD_SNAPSHOT_COUNT,
 581  582              sizeof (my_ss_cnt), 1, &my_ss_cnt, tx));
 582  583  }
 583  584  
 584  585  static int
 585  586  dsl_dir_actv_fs_ss_limit_check(void *arg, dmu_tx_t *tx)
 586  587  {
 587  588          char *ddname = (char *)arg;
 588  589          dsl_pool_t *dp = dmu_tx_pool(tx);
 589  590          dsl_dataset_t *ds;
 590  591          dsl_dir_t *dd;
 591  592          int error;
 592  593  
 593  594          error = dsl_dataset_hold(dp, ddname, FTAG, &ds);
 594  595          if (error != 0)
 595  596                  return (error);
 596  597  
 597  598          if (!spa_feature_is_enabled(dp->dp_spa, SPA_FEATURE_FS_SS_LIMIT)) {
 598  599                  dsl_dataset_rele(ds, FTAG);
 599  600                  return (SET_ERROR(ENOTSUP));
 600  601          }
 601  602  
 602  603          dd = ds->ds_dir;
 603  604          if (spa_feature_is_active(dp->dp_spa, SPA_FEATURE_FS_SS_LIMIT) &&
 604  605              dsl_dir_is_zapified(dd) &&
 605  606              zap_contains(dp->dp_meta_objset, dd->dd_object,
 606  607              DD_FIELD_FILESYSTEM_COUNT) == 0) {
 607  608                  dsl_dataset_rele(ds, FTAG);
 608  609                  return (SET_ERROR(EALREADY));
 609  610          }
 610  611  
 611  612          dsl_dataset_rele(ds, FTAG);
 612  613          return (0);
 613  614  }
 614  615  
 615  616  static void
 616  617  dsl_dir_actv_fs_ss_limit_sync(void *arg, dmu_tx_t *tx)
 617  618  {
 618  619          char *ddname = (char *)arg;
 619  620          dsl_pool_t *dp = dmu_tx_pool(tx);
 620  621          dsl_dataset_t *ds;
 621  622          spa_t *spa;
 622  623  
 623  624          VERIFY0(dsl_dataset_hold(dp, ddname, FTAG, &ds));
 624  625  
 625  626          spa = dsl_dataset_get_spa(ds);
 626  627  
 627  628          if (!spa_feature_is_active(spa, SPA_FEATURE_FS_SS_LIMIT)) {
 628  629                  /*
 629  630                   * Since the feature was not active and we're now setting a
 630  631                   * limit, increment the feature-active counter so that the
 631  632                   * feature becomes active for the first time.
 632  633                   *
 633  634                   * We are already in a sync task so we can update the MOS.
 634  635                   */
 635  636                  spa_feature_incr(spa, SPA_FEATURE_FS_SS_LIMIT, tx);
 636  637          }
 637  638  
 638  639          /*
 639  640           * Since we are now setting a non-UINT64_MAX limit on the filesystem,
 640  641           * we need to ensure the counts are correct. Descend down the tree from
 641  642           * this point and update all of the counts to be accurate.
 642  643           */
 643  644          dsl_dir_init_fs_ss_count(ds->ds_dir, tx);
 644  645  
 645  646          dsl_dataset_rele(ds, FTAG);
 646  647  }
 647  648  
 648  649  /*
 649  650   * Make sure the feature is enabled and activate it if necessary.
 650  651   * Since we're setting a limit, ensure the on-disk counts are valid.
 651  652   * This is only called by the ioctl path when setting a limit value.
 652  653   *
 653  654   * We do not need to validate the new limit, since users who can change the
 654  655   * limit are also allowed to exceed the limit.
 655  656   */
 656  657  int
 657  658  dsl_dir_activate_fs_ss_limit(const char *ddname)
 658  659  {
 659  660          int error;
 660  661  
 661  662          error = dsl_sync_task(ddname, dsl_dir_actv_fs_ss_limit_check,
 662  663              dsl_dir_actv_fs_ss_limit_sync, (void *)ddname, 0,
 663  664              ZFS_SPACE_CHECK_RESERVED);
 664  665  
 665  666          if (error == EALREADY)
 666  667                  error = 0;
 667  668  
 668  669          return (error);
 669  670  }
 670  671  
 671  672  /*
 672  673   * Used to determine if the filesystem_limit or snapshot_limit should be
 673  674   * enforced. We allow the limit to be exceeded if the user has permission to
 674  675   * write the property value. We pass in the creds that we got in the open
 675  676   * context since we will always be the GZ root in syncing context. We also have
 676  677   * to handle the case where we are allowed to change the limit on the current
 677  678   * dataset, but there may be another limit in the tree above.
 678  679   *
 679  680   * We can never modify these two properties within a non-global zone. In
 680  681   * addition, the other checks are modeled on zfs_secpolicy_write_perms. We
 681  682   * can't use that function since we are already holding the dp_config_rwlock.
 682  683   * In addition, we already have the dd and dealing with snapshots is simplified
 683  684   * in this code.
 684  685   */
 685  686  
 686  687  typedef enum {
 687  688          ENFORCE_ALWAYS,
 688  689          ENFORCE_NEVER,
 689  690          ENFORCE_ABOVE
 690  691  } enforce_res_t;
 691  692  
 692  693  static enforce_res_t
 693  694  dsl_enforce_ds_ss_limits(dsl_dir_t *dd, zfs_prop_t prop, cred_t *cr)
 694  695  {
 695  696          enforce_res_t enforce = ENFORCE_ALWAYS;
 696  697          uint64_t obj;
 697  698          dsl_dataset_t *ds;
 698  699          uint64_t zoned;
 699  700  
 700  701          ASSERT(prop == ZFS_PROP_FILESYSTEM_LIMIT ||
 701  702              prop == ZFS_PROP_SNAPSHOT_LIMIT);
 702  703  
 703  704  #ifdef _KERNEL
 704  705          if (crgetzoneid(cr) != GLOBAL_ZONEID)
 705  706                  return (ENFORCE_ALWAYS);
 706  707  
 707  708          if (secpolicy_zfs(cr) == 0)
 708  709                  return (ENFORCE_NEVER);
 709  710  #endif
 710  711  
 711  712          if ((obj = dsl_dir_phys(dd)->dd_head_dataset_obj) == 0)
 712  713                  return (ENFORCE_ALWAYS);
 713  714  
 714  715          ASSERT(dsl_pool_config_held(dd->dd_pool));
 715  716  
 716  717          if (dsl_dataset_hold_obj(dd->dd_pool, obj, FTAG, &ds) != 0)
 717  718                  return (ENFORCE_ALWAYS);
 718  719  
 719  720          if (dsl_prop_get_ds(ds, "zoned", 8, 1, &zoned, NULL) || zoned) {
 720  721                  /* Only root can access zoned fs's from the GZ */
 721  722                  enforce = ENFORCE_ALWAYS;
 722  723          } else {
 723  724                  if (dsl_deleg_access_impl(ds, zfs_prop_to_name(prop), cr) == 0)
 724  725                          enforce = ENFORCE_ABOVE;
 725  726          }
 726  727  
 727  728          dsl_dataset_rele(ds, FTAG);
 728  729          return (enforce);
 729  730  }
 730  731  
 731  732  /*
 732  733   * Check if adding additional child filesystem(s) would exceed any filesystem
 733  734   * limits or adding additional snapshot(s) would exceed any snapshot limits.
 734  735   * The prop argument indicates which limit to check.
 735  736   *
 736  737   * Note that all filesystem limits up to the root (or the highest
 737  738   * initialized) filesystem or the given ancestor must be satisfied.
 738  739   */
 739  740  int
 740  741  dsl_fs_ss_limit_check(dsl_dir_t *dd, uint64_t delta, zfs_prop_t prop,
 741  742      dsl_dir_t *ancestor, cred_t *cr)
 742  743  {
 743  744          objset_t *os = dd->dd_pool->dp_meta_objset;
 744  745          uint64_t limit, count;
 745  746          char *count_prop;
 746  747          enforce_res_t enforce;
 747  748          int err = 0;
 748  749  
 749  750          ASSERT(dsl_pool_config_held(dd->dd_pool));
 750  751          ASSERT(prop == ZFS_PROP_FILESYSTEM_LIMIT ||
 751  752              prop == ZFS_PROP_SNAPSHOT_LIMIT);
 752  753  
 753  754          /*
 754  755           * If we're allowed to change the limit, don't enforce the limit
 755  756           * e.g. this can happen if a snapshot is taken by an administrative
 756  757           * user in the global zone (i.e. a recursive snapshot by root).
 757  758           * However, we must handle the case of delegated permissions where we
 758  759           * are allowed to change the limit on the current dataset, but there
 759  760           * is another limit in the tree above.
 760  761           */
 761  762          enforce = dsl_enforce_ds_ss_limits(dd, prop, cr);
 762  763          if (enforce == ENFORCE_NEVER)
 763  764                  return (0);
 764  765  
 765  766          /*
 766  767           * e.g. if renaming a dataset with no snapshots, count adjustment
 767  768           * is 0.
 768  769           */
 769  770          if (delta == 0)
 770  771                  return (0);
 771  772  
 772  773          if (prop == ZFS_PROP_SNAPSHOT_LIMIT) {
 773  774                  /*
 774  775                   * We don't enforce the limit for temporary snapshots. This is
 775  776                   * indicated by a NULL cred_t argument.
 776  777                   */
 777  778                  if (cr == NULL)
 778  779                          return (0);
 779  780  
 780  781                  count_prop = DD_FIELD_SNAPSHOT_COUNT;
 781  782          } else {
 782  783                  count_prop = DD_FIELD_FILESYSTEM_COUNT;
 783  784          }
 784  785  
 785  786          /*
 786  787           * If an ancestor has been provided, stop checking the limit once we
 787  788           * hit that dir. We need this during rename so that we don't overcount
 788  789           * the check once we recurse up to the common ancestor.
 789  790           */
 790  791          if (ancestor == dd)
 791  792                  return (0);
 792  793  
 793  794          /*
 794  795           * If we hit an uninitialized node while recursing up the tree, we can
 795  796           * stop since we know there is no limit here (or above). The counts are
 796  797           * not valid on this node and we know we won't touch this node's counts.
 797  798           */
 798  799          if (!dsl_dir_is_zapified(dd) || zap_lookup(os, dd->dd_object,
 799  800              count_prop, sizeof (count), 1, &count) == ENOENT)
 800  801                  return (0);
 801  802  
 802  803          err = dsl_prop_get_dd(dd, zfs_prop_to_name(prop), 8, 1, &limit, NULL,
 803  804              B_FALSE);
 804  805          if (err != 0)
 805  806                  return (err);
 806  807  
 807  808          /* Is there a limit which we've hit? */
 808  809          if (enforce == ENFORCE_ALWAYS && (count + delta) > limit)
 809  810                  return (SET_ERROR(EDQUOT));
 810  811  
 811  812          if (dd->dd_parent != NULL)
 812  813                  err = dsl_fs_ss_limit_check(dd->dd_parent, delta, prop,
 813  814                      ancestor, cr);
 814  815  
 815  816          return (err);
 816  817  }
 817  818  
 818  819  /*
 819  820   * Adjust the filesystem or snapshot count for the specified dsl_dir_t and all
 820  821   * parents. When a new filesystem/snapshot is created, increment the count on
 821  822   * all parents, and when a filesystem/snapshot is destroyed, decrement the
 822  823   * count.
 823  824   */
 824  825  void
 825  826  dsl_fs_ss_count_adjust(dsl_dir_t *dd, int64_t delta, const char *prop,
 826  827      dmu_tx_t *tx)
 827  828  {
 828  829          int err;
 829  830          objset_t *os = dd->dd_pool->dp_meta_objset;
 830  831          uint64_t count;
 831  832  
 832  833          ASSERT(dsl_pool_config_held(dd->dd_pool));
 833  834          ASSERT(dmu_tx_is_syncing(tx));
 834  835          ASSERT(strcmp(prop, DD_FIELD_FILESYSTEM_COUNT) == 0 ||
 835  836              strcmp(prop, DD_FIELD_SNAPSHOT_COUNT) == 0);
 836  837  
 837  838          /*
 838  839           * When we receive an incremental stream into a filesystem that already
 839  840           * exists, a temporary clone is created.  We don't count this temporary
 840  841           * clone, whose name begins with a '%'. We also ignore hidden ($FREE,
 841  842           * $MOS & $ORIGIN) objsets.
 842  843           */
 843  844          if ((dd->dd_myname[0] == '%' || dd->dd_myname[0] == '$') &&
 844  845              strcmp(prop, DD_FIELD_FILESYSTEM_COUNT) == 0)
 845  846                  return;
 846  847  
 847  848          /*
 848  849           * e.g. if renaming a dataset with no snapshots, count adjustment is 0
 849  850           */
 850  851          if (delta == 0)
 851  852                  return;
 852  853  
 853  854          /*
 854  855           * If we hit an uninitialized node while recursing up the tree, we can
 855  856           * stop since we know the counts are not valid on this node and we
 856  857           * know we shouldn't touch this node's counts. An uninitialized count
 857  858           * on the node indicates that either the feature has not yet been
 858  859           * activated or there are no limits on this part of the tree.
 859  860           */
 860  861          if (!dsl_dir_is_zapified(dd) || (err = zap_lookup(os, dd->dd_object,
 861  862              prop, sizeof (count), 1, &count)) == ENOENT)
 862  863                  return;
 863  864          VERIFY0(err);
 864  865  
 865  866          count += delta;
 866  867          /* Use a signed verify to make sure we're not neg. */
 867  868          VERIFY3S(count, >=, 0);
 868  869  
 869  870          VERIFY0(zap_update(os, dd->dd_object, prop, sizeof (count), 1, &count,
 870  871              tx));
 871  872  
 872  873          /* Roll up this additional count into our ancestors */
 873  874          if (dd->dd_parent != NULL)
 874  875                  dsl_fs_ss_count_adjust(dd->dd_parent, delta, prop, tx);
 875  876  }
 876  877  
 877  878  uint64_t
 878  879  dsl_dir_create_sync(dsl_pool_t *dp, dsl_dir_t *pds, const char *name,
 879  880      dmu_tx_t *tx)
 880  881  {
 881  882          objset_t *mos = dp->dp_meta_objset;
 882  883          uint64_t ddobj;
 883  884          dsl_dir_phys_t *ddphys;
 884  885          dmu_buf_t *dbuf;
 885  886  
 886  887          ddobj = dmu_object_alloc(mos, DMU_OT_DSL_DIR, 0,
 887  888              DMU_OT_DSL_DIR, sizeof (dsl_dir_phys_t), tx);
 888  889          if (pds) {
 889  890                  VERIFY(0 == zap_add(mos, dsl_dir_phys(pds)->dd_child_dir_zapobj,
 890  891                      name, sizeof (uint64_t), 1, &ddobj, tx));
 891  892          } else {
 892  893                  /* it's the root dir */
 893  894                  VERIFY(0 == zap_add(mos, DMU_POOL_DIRECTORY_OBJECT,
 894  895                      DMU_POOL_ROOT_DATASET, sizeof (uint64_t), 1, &ddobj, tx));
 895  896          }
 896  897          VERIFY(0 == dmu_bonus_hold(mos, ddobj, FTAG, &dbuf));
 897  898          dmu_buf_will_dirty(dbuf, tx);
 898  899          ddphys = dbuf->db_data;
 899  900  
 900  901          ddphys->dd_creation_time = gethrestime_sec();
 901  902          if (pds) {
 902  903                  ddphys->dd_parent_obj = pds->dd_object;
 903  904  
 904  905                  /* update the filesystem counts */
 905  906                  dsl_fs_ss_count_adjust(pds, 1, DD_FIELD_FILESYSTEM_COUNT, tx);
 906  907          }
 907  908          ddphys->dd_props_zapobj = zap_create(mos,
 908  909              DMU_OT_DSL_PROPS, DMU_OT_NONE, 0, tx);
 909  910          ddphys->dd_child_dir_zapobj = zap_create(mos,
 910  911              DMU_OT_DSL_DIR_CHILD_MAP, DMU_OT_NONE, 0, tx);
 911  912          if (spa_version(dp->dp_spa) >= SPA_VERSION_USED_BREAKDOWN)
 912  913                  ddphys->dd_flags |= DD_FLAG_USED_BREAKDOWN;
 913  914          dmu_buf_rele(dbuf, FTAG);
 914  915  
 915  916          return (ddobj);
 916  917  }
 917  918  
 918  919  boolean_t
 919  920  dsl_dir_is_clone(dsl_dir_t *dd)
 920  921  {
 921  922          return (dsl_dir_phys(dd)->dd_origin_obj &&
 922  923              (dd->dd_pool->dp_origin_snap == NULL ||
 923  924              dsl_dir_phys(dd)->dd_origin_obj !=
 924  925              dd->dd_pool->dp_origin_snap->ds_object));
 925  926  }
 926  927  
 927  928  void
 928  929  dsl_dir_stats(dsl_dir_t *dd, nvlist_t *nv)
 929  930  {
 930  931          mutex_enter(&dd->dd_lock);
 931  932          dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USED,
 932  933              dsl_dir_phys(dd)->dd_used_bytes);
 933  934          dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_QUOTA,
 934  935              dsl_dir_phys(dd)->dd_quota);
 935  936          dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_RESERVATION,
 936  937              dsl_dir_phys(dd)->dd_reserved);
 937  938          dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_COMPRESSRATIO,
 938  939              dsl_dir_phys(dd)->dd_compressed_bytes == 0 ? 100 :
 939  940              (dsl_dir_phys(dd)->dd_uncompressed_bytes * 100 /
 940  941              dsl_dir_phys(dd)->dd_compressed_bytes));
 941  942          dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_LOGICALUSED,
 942  943              dsl_dir_phys(dd)->dd_uncompressed_bytes);
 943  944          if (dsl_dir_phys(dd)->dd_flags & DD_FLAG_USED_BREAKDOWN) {
 944  945                  dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USEDSNAP,
 945  946                      dsl_dir_phys(dd)->dd_used_breakdown[DD_USED_SNAP]);
 946  947                  dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USEDDS,
 947  948                      dsl_dir_phys(dd)->dd_used_breakdown[DD_USED_HEAD]);
 948  949                  dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USEDREFRESERV,
 949  950                      dsl_dir_phys(dd)->dd_used_breakdown[DD_USED_REFRSRV]);
 950  951                  dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USEDCHILD,
 951  952                      dsl_dir_phys(dd)->dd_used_breakdown[DD_USED_CHILD] +
 952  953                      dsl_dir_phys(dd)->dd_used_breakdown[DD_USED_CHILD_RSRV]);
 953  954          }
 954  955          mutex_exit(&dd->dd_lock);
 955  956  
 956  957          if (dsl_dir_is_zapified(dd)) {
 957  958                  uint64_t count;
 958  959                  objset_t *os = dd->dd_pool->dp_meta_objset;
 959  960  
 960  961                  if (zap_lookup(os, dd->dd_object, DD_FIELD_FILESYSTEM_COUNT,
 961  962                      sizeof (count), 1, &count) == 0) {
 962  963                          dsl_prop_nvlist_add_uint64(nv,
 963  964                              ZFS_PROP_FILESYSTEM_COUNT, count);
 964  965                  }
 965  966                  if (zap_lookup(os, dd->dd_object, DD_FIELD_SNAPSHOT_COUNT,
 966  967                      sizeof (count), 1, &count) == 0) {
 967  968                          dsl_prop_nvlist_add_uint64(nv,
 968  969                              ZFS_PROP_SNAPSHOT_COUNT, count);
 969  970                  }
 970  971          }
 971  972  
 972  973          if (dsl_dir_is_clone(dd)) {
 973  974                  dsl_dataset_t *ds;
 974  975                  char buf[MAXNAMELEN];
 975  976  
 976  977                  VERIFY0(dsl_dataset_hold_obj(dd->dd_pool,
 977  978                      dsl_dir_phys(dd)->dd_origin_obj, FTAG, &ds));
 978  979                  dsl_dataset_name(ds, buf);
 979  980                  dsl_dataset_rele(ds, FTAG);
 980  981                  dsl_prop_nvlist_add_string(nv, ZFS_PROP_ORIGIN, buf);
 981  982          }
 982  983  }
 983  984  
 984  985  void
 985  986  dsl_dir_dirty(dsl_dir_t *dd, dmu_tx_t *tx)
 986  987  {
 987  988          dsl_pool_t *dp = dd->dd_pool;
 988  989  
 989  990          ASSERT(dsl_dir_phys(dd));
 990  991  
 991  992          if (txg_list_add(&dp->dp_dirty_dirs, dd, tx->tx_txg)) {
 992  993                  /* up the hold count until we can be written out */
 993  994                  dmu_buf_add_ref(dd->dd_dbuf, dd);
 994  995          }
 995  996  }
 996  997  
 997  998  static int64_t
 998  999  parent_delta(dsl_dir_t *dd, uint64_t used, int64_t delta)
 999 1000  {
1000 1001          uint64_t old_accounted = MAX(used, dsl_dir_phys(dd)->dd_reserved);
1001 1002          uint64_t new_accounted =
1002 1003              MAX(used + delta, dsl_dir_phys(dd)->dd_reserved);
1003 1004          return (new_accounted - old_accounted);
1004 1005  }
1005 1006  
1006 1007  void
1007 1008  dsl_dir_sync(dsl_dir_t *dd, dmu_tx_t *tx)
1008 1009  {
1009 1010          ASSERT(dmu_tx_is_syncing(tx));
1010 1011  
1011 1012          mutex_enter(&dd->dd_lock);
1012 1013          ASSERT0(dd->dd_tempreserved[tx->tx_txg&TXG_MASK]);
1013 1014          dprintf_dd(dd, "txg=%llu towrite=%lluK\n", tx->tx_txg,
1014 1015              dd->dd_space_towrite[tx->tx_txg&TXG_MASK] / 1024);
1015 1016          dd->dd_space_towrite[tx->tx_txg&TXG_MASK] = 0;
1016 1017          mutex_exit(&dd->dd_lock);
1017 1018  
1018 1019          /* release the hold from dsl_dir_dirty */
1019 1020          dmu_buf_rele(dd->dd_dbuf, dd);
1020 1021  }
1021 1022  
1022 1023  static uint64_t
1023 1024  dsl_dir_space_towrite(dsl_dir_t *dd)
1024 1025  {
1025 1026          uint64_t space = 0;
1026 1027          int i;
1027 1028  
1028 1029          ASSERT(MUTEX_HELD(&dd->dd_lock));
1029 1030  
1030 1031          for (i = 0; i < TXG_SIZE; i++) {
1031 1032                  space += dd->dd_space_towrite[i&TXG_MASK];
1032 1033                  ASSERT3U(dd->dd_space_towrite[i&TXG_MASK], >=, 0);
1033 1034          }
1034 1035          return (space);
1035 1036  }
1036 1037  
1037 1038  /*
1038 1039   * How much space would dd have available if ancestor had delta applied
1039 1040   * to it?  If ondiskonly is set, we're only interested in what's
1040 1041   * on-disk, not estimated pending changes.
1041 1042   */
1042 1043  uint64_t
1043 1044  dsl_dir_space_available(dsl_dir_t *dd,
1044 1045      dsl_dir_t *ancestor, int64_t delta, int ondiskonly)
1045 1046  {
1046 1047          uint64_t parentspace, myspace, quota, used;
1047 1048  
1048 1049          /*
1049 1050           * If there are no restrictions otherwise, assume we have
1050 1051           * unlimited space available.
1051 1052           */
1052 1053          quota = UINT64_MAX;
1053 1054          parentspace = UINT64_MAX;
1054 1055  
1055 1056          if (dd->dd_parent != NULL) {
1056 1057                  parentspace = dsl_dir_space_available(dd->dd_parent,
1057 1058                      ancestor, delta, ondiskonly);
1058 1059          }
1059 1060  
1060 1061          mutex_enter(&dd->dd_lock);
1061 1062          if (dsl_dir_phys(dd)->dd_quota != 0)
1062 1063                  quota = dsl_dir_phys(dd)->dd_quota;
1063 1064          used = dsl_dir_phys(dd)->dd_used_bytes;
1064 1065          if (!ondiskonly)
1065 1066                  used += dsl_dir_space_towrite(dd);
1066 1067  
1067 1068          if (dd->dd_parent == NULL) {
1068 1069                  uint64_t poolsize = dsl_pool_adjustedsize(dd->dd_pool, FALSE);
1069 1070                  quota = MIN(quota, poolsize);
1070 1071          }
1071 1072  
1072 1073          if (dsl_dir_phys(dd)->dd_reserved > used && parentspace != UINT64_MAX) {
1073 1074                  /*
1074 1075                   * We have some space reserved, in addition to what our
1075 1076                   * parent gave us.
1076 1077                   */
1077 1078                  parentspace += dsl_dir_phys(dd)->dd_reserved - used;
1078 1079          }
1079 1080  
1080 1081          if (dd == ancestor) {
1081 1082                  ASSERT(delta <= 0);
1082 1083                  ASSERT(used >= -delta);
1083 1084                  used += delta;
1084 1085                  if (parentspace != UINT64_MAX)
1085 1086                          parentspace -= delta;
1086 1087          }
1087 1088  
1088 1089          if (used > quota) {
1089 1090                  /* over quota */
1090 1091                  myspace = 0;
1091 1092          } else {
1092 1093                  /*
1093 1094                   * the lesser of the space provided by our parent and
1094 1095                   * the space left in our quota
1095 1096                   */
1096 1097                  myspace = MIN(parentspace, quota - used);
1097 1098          }
1098 1099  
1099 1100          mutex_exit(&dd->dd_lock);
1100 1101  
1101 1102          return (myspace);
1102 1103  }
1103 1104  
1104 1105  struct tempreserve {
1105 1106          list_node_t tr_node;
1106 1107          dsl_dir_t *tr_ds;
1107 1108          uint64_t tr_size;
1108 1109  };
1109 1110  
1110 1111  static int
1111 1112  dsl_dir_tempreserve_impl(dsl_dir_t *dd, uint64_t asize, boolean_t netfree,
1112 1113      boolean_t ignorequota, boolean_t checkrefquota, list_t *tr_list,
1113 1114      dmu_tx_t *tx, boolean_t first)
1114 1115  {
1115 1116          uint64_t txg = tx->tx_txg;
1116 1117          uint64_t est_inflight, used_on_disk, quota, parent_rsrv;
1117 1118          uint64_t deferred = 0;
1118 1119          struct tempreserve *tr;
1119 1120          int retval = EDQUOT;
1120 1121          int txgidx = txg & TXG_MASK;
1121 1122          int i;
1122 1123          uint64_t ref_rsrv = 0;
1123 1124  
1124 1125          ASSERT3U(txg, !=, 0);
1125 1126          ASSERT3S(asize, >, 0);
1126 1127  
1127 1128          mutex_enter(&dd->dd_lock);
1128 1129  
1129 1130          /*
1130 1131           * Check against the dsl_dir's quota.  We don't add in the delta
1131 1132           * when checking for over-quota because they get one free hit.
1132 1133           */
1133 1134          est_inflight = dsl_dir_space_towrite(dd);
1134 1135          for (i = 0; i < TXG_SIZE; i++)
1135 1136                  est_inflight += dd->dd_tempreserved[i];
1136 1137          used_on_disk = dsl_dir_phys(dd)->dd_used_bytes;
1137 1138  
1138 1139          /*
1139 1140           * On the first iteration, fetch the dataset's used-on-disk and
1140 1141           * refreservation values. Also, if checkrefquota is set, test if
1141 1142           * allocating this space would exceed the dataset's refquota.
1142 1143           */
1143 1144          if (first && tx->tx_objset) {
1144 1145                  int error;
1145 1146                  dsl_dataset_t *ds = tx->tx_objset->os_dsl_dataset;
1146 1147  
1147 1148                  error = dsl_dataset_check_quota(ds, checkrefquota,
1148 1149                      asize, est_inflight, &used_on_disk, &ref_rsrv);
1149 1150                  if (error) {
1150 1151                          mutex_exit(&dd->dd_lock);
1151 1152                          return (error);
1152 1153                  }
1153 1154          }
1154 1155  
1155 1156          /*
1156 1157           * If this transaction will result in a net free of space,
1157 1158           * we want to let it through.
1158 1159           */
1159 1160          if (ignorequota || netfree || dsl_dir_phys(dd)->dd_quota == 0)
1160 1161                  quota = UINT64_MAX;
1161 1162          else
1162 1163                  quota = dsl_dir_phys(dd)->dd_quota;
1163 1164  
1164 1165          /*
1165 1166           * Adjust the quota against the actual pool size at the root
1166 1167           * minus any outstanding deferred frees.
1167 1168           * To ensure that it's possible to remove files from a full
1168 1169           * pool without inducing transient overcommits, we throttle
1169 1170           * netfree transactions against a quota that is slightly larger,
1170 1171           * but still within the pool's allocation slop.  In cases where
1171 1172           * we're very close to full, this will allow a steady trickle of
1172 1173           * removes to get through.
1173 1174           */
1174 1175          if (dd->dd_parent == NULL) {
1175 1176                  spa_t *spa = dd->dd_pool->dp_spa;
1176 1177                  uint64_t poolsize = dsl_pool_adjustedsize(dd->dd_pool, netfree);
1177 1178                  deferred = metaslab_class_get_deferred(spa_normal_class(spa));
1178 1179                  if (poolsize - deferred < quota) {
1179 1180                          quota = poolsize - deferred;
1180 1181                          retval = ENOSPC;
1181 1182                  }
1182 1183          }
1183 1184  
1184 1185          /*
1185 1186           * If they are requesting more space, and our current estimate
1186 1187           * is over quota, they get to try again unless the actual
1187 1188           * on-disk is over quota and there are no pending changes (which
1188 1189           * may free up space for us).
1189 1190           */
1190 1191          if (used_on_disk + est_inflight >= quota) {
1191 1192                  if (est_inflight > 0 || used_on_disk < quota ||
1192 1193                      (retval == ENOSPC && used_on_disk < quota + deferred))
1193 1194                          retval = ERESTART;
1194 1195                  dprintf_dd(dd, "failing: used=%lluK inflight = %lluK "
1195 1196                      "quota=%lluK tr=%lluK err=%d\n",
1196 1197                      used_on_disk>>10, est_inflight>>10,
1197 1198                      quota>>10, asize>>10, retval);
1198 1199                  mutex_exit(&dd->dd_lock);
1199 1200                  return (SET_ERROR(retval));
1200 1201          }
1201 1202  
1202 1203          /* We need to up our estimated delta before dropping dd_lock */
1203 1204          dd->dd_tempreserved[txgidx] += asize;
1204 1205  
1205 1206          parent_rsrv = parent_delta(dd, used_on_disk + est_inflight,
1206 1207              asize - ref_rsrv);
1207 1208          mutex_exit(&dd->dd_lock);
1208 1209  
1209 1210          tr = kmem_zalloc(sizeof (struct tempreserve), KM_SLEEP);
1210 1211          tr->tr_ds = dd;
1211 1212          tr->tr_size = asize;
1212 1213          list_insert_tail(tr_list, tr);
1213 1214  
1214 1215          /* see if it's OK with our parent */
1215 1216          if (dd->dd_parent && parent_rsrv) {
1216 1217                  boolean_t ismos = (dsl_dir_phys(dd)->dd_head_dataset_obj == 0);
1217 1218  
1218 1219                  return (dsl_dir_tempreserve_impl(dd->dd_parent,
1219 1220                      parent_rsrv, netfree, ismos, TRUE, tr_list, tx, FALSE));
1220 1221          } else {
1221 1222                  return (0);
1222 1223          }
1223 1224  }
1224 1225  
1225 1226  /*
1226 1227   * Reserve space in this dsl_dir, to be used in this tx's txg.
1227 1228   * After the space has been dirtied (and dsl_dir_willuse_space()
1228 1229   * has been called), the reservation should be canceled, using
1229 1230   * dsl_dir_tempreserve_clear().
1230 1231   */
1231 1232  int
1232 1233  dsl_dir_tempreserve_space(dsl_dir_t *dd, uint64_t lsize, uint64_t asize,
1233 1234      uint64_t fsize, uint64_t usize, void **tr_cookiep, dmu_tx_t *tx)
1234 1235  {
1235 1236          int err;
1236 1237          list_t *tr_list;
1237 1238  
1238 1239          if (asize == 0) {
1239 1240                  *tr_cookiep = NULL;
1240 1241                  return (0);
1241 1242          }
1242 1243  
1243 1244          tr_list = kmem_alloc(sizeof (list_t), KM_SLEEP);
1244 1245          list_create(tr_list, sizeof (struct tempreserve),
1245 1246              offsetof(struct tempreserve, tr_node));
1246 1247          ASSERT3S(asize, >, 0);
1247 1248          ASSERT3S(fsize, >=, 0);
1248 1249  
1249 1250          err = arc_tempreserve_space(lsize, tx->tx_txg);
1250 1251          if (err == 0) {
1251 1252                  struct tempreserve *tr;
1252 1253  
1253 1254                  tr = kmem_zalloc(sizeof (struct tempreserve), KM_SLEEP);
1254 1255                  tr->tr_size = lsize;
1255 1256                  list_insert_tail(tr_list, tr);
1256 1257          } else {
1257 1258                  if (err == EAGAIN) {
1258 1259                          /*
1259 1260                           * If arc_memory_throttle() detected that pageout
1260 1261                           * is running and we are low on memory, we delay new
1261 1262                           * non-pageout transactions to give pageout an
1262 1263                           * advantage.
1263 1264                           *
1264 1265                           * It is unfortunate to be delaying while the caller's
1265 1266                           * locks are held.
1266 1267                           */
1267 1268                          txg_delay(dd->dd_pool, tx->tx_txg,
1268 1269                              MSEC2NSEC(10), MSEC2NSEC(10));
1269 1270                          err = SET_ERROR(ERESTART);
1270 1271                  }
1271 1272          }
1272 1273  
1273 1274          if (err == 0) {
1274 1275                  err = dsl_dir_tempreserve_impl(dd, asize, fsize >= asize,
1275 1276                      FALSE, asize > usize, tr_list, tx, TRUE);
1276 1277          }
1277 1278  
1278 1279          if (err != 0)
1279 1280                  dsl_dir_tempreserve_clear(tr_list, tx);
1280 1281          else
1281 1282                  *tr_cookiep = tr_list;
1282 1283  
1283 1284          return (err);
1284 1285  }
1285 1286  
1286 1287  /*
1287 1288   * Clear a temporary reservation that we previously made with
1288 1289   * dsl_dir_tempreserve_space().
1289 1290   */
1290 1291  void
1291 1292  dsl_dir_tempreserve_clear(void *tr_cookie, dmu_tx_t *tx)
1292 1293  {
1293 1294          int txgidx = tx->tx_txg & TXG_MASK;
1294 1295          list_t *tr_list = tr_cookie;
1295 1296          struct tempreserve *tr;
1296 1297  
1297 1298          ASSERT3U(tx->tx_txg, !=, 0);
1298 1299  
1299 1300          if (tr_cookie == NULL)
1300 1301                  return;
1301 1302  
1302 1303          while ((tr = list_head(tr_list)) != NULL) {
1303 1304                  if (tr->tr_ds) {
1304 1305                          mutex_enter(&tr->tr_ds->dd_lock);
1305 1306                          ASSERT3U(tr->tr_ds->dd_tempreserved[txgidx], >=,
1306 1307                              tr->tr_size);
1307 1308                          tr->tr_ds->dd_tempreserved[txgidx] -= tr->tr_size;
1308 1309                          mutex_exit(&tr->tr_ds->dd_lock);
1309 1310                  } else {
1310 1311                          arc_tempreserve_clear(tr->tr_size);
1311 1312                  }
1312 1313                  list_remove(tr_list, tr);
1313 1314                  kmem_free(tr, sizeof (struct tempreserve));
1314 1315          }
1315 1316  
1316 1317          kmem_free(tr_list, sizeof (list_t));
1317 1318  }
1318 1319  
1319 1320  /*
1320 1321   * This should be called from open context when we think we're going to write
1321 1322   * or free space, for example when dirtying data. Be conservative; it's okay
1322 1323   * to write less space or free more, but we don't want to write more or free
1323 1324   * less than the amount specified.
1324 1325   */
1325 1326  void
1326 1327  dsl_dir_willuse_space(dsl_dir_t *dd, int64_t space, dmu_tx_t *tx)
1327 1328  {
1328 1329          int64_t parent_space;
1329 1330          uint64_t est_used;
1330 1331  
1331 1332          mutex_enter(&dd->dd_lock);
1332 1333          if (space > 0)
1333 1334                  dd->dd_space_towrite[tx->tx_txg & TXG_MASK] += space;
1334 1335  
1335 1336          est_used = dsl_dir_space_towrite(dd) + dsl_dir_phys(dd)->dd_used_bytes;
1336 1337          parent_space = parent_delta(dd, est_used, space);
1337 1338          mutex_exit(&dd->dd_lock);
1338 1339  
1339 1340          /* Make sure that we clean up dd_space_to* */
1340 1341          dsl_dir_dirty(dd, tx);
1341 1342  
1342 1343          /* XXX this is potentially expensive and unnecessary... */
1343 1344          if (parent_space && dd->dd_parent)
1344 1345                  dsl_dir_willuse_space(dd->dd_parent, parent_space, tx);
1345 1346  }
1346 1347  
1347 1348  /* call from syncing context when we actually write/free space for this dd */
1348 1349  void
1349 1350  dsl_dir_diduse_space(dsl_dir_t *dd, dd_used_t type,
1350 1351      int64_t used, int64_t compressed, int64_t uncompressed, dmu_tx_t *tx)
1351 1352  {
1352 1353          int64_t accounted_delta;
1353 1354  
1354 1355          /*
1355 1356           * dsl_dataset_set_refreservation_sync_impl() calls this with
1356 1357           * dd_lock held, so that it can atomically update
1357 1358           * ds->ds_reserved and the dsl_dir accounting, so that
1358 1359           * dsl_dataset_check_quota() can see dataset and dir accounting
1359 1360           * consistently.
1360 1361           */
1361 1362          boolean_t needlock = !MUTEX_HELD(&dd->dd_lock);
1362 1363  
1363 1364          ASSERT(dmu_tx_is_syncing(tx));
1364 1365          ASSERT(type < DD_USED_NUM);
1365 1366  
1366 1367          dmu_buf_will_dirty(dd->dd_dbuf, tx);
1367 1368  
1368 1369          if (needlock)
1369 1370                  mutex_enter(&dd->dd_lock);
1370 1371          accounted_delta =
1371 1372              parent_delta(dd, dsl_dir_phys(dd)->dd_used_bytes, used);
1372 1373          ASSERT(used >= 0 || dsl_dir_phys(dd)->dd_used_bytes >= -used);
1373 1374          ASSERT(compressed >= 0 ||
1374 1375              dsl_dir_phys(dd)->dd_compressed_bytes >= -compressed);
1375 1376          ASSERT(uncompressed >= 0 ||
1376 1377              dsl_dir_phys(dd)->dd_uncompressed_bytes >= -uncompressed);
1377 1378          dsl_dir_phys(dd)->dd_used_bytes += used;
1378 1379          dsl_dir_phys(dd)->dd_uncompressed_bytes += uncompressed;
1379 1380          dsl_dir_phys(dd)->dd_compressed_bytes += compressed;
1380 1381  
1381 1382          if (dsl_dir_phys(dd)->dd_flags & DD_FLAG_USED_BREAKDOWN) {
1382 1383                  ASSERT(used > 0 ||
1383 1384                      dsl_dir_phys(dd)->dd_used_breakdown[type] >= -used);
1384 1385                  dsl_dir_phys(dd)->dd_used_breakdown[type] += used;
1385 1386  #ifdef DEBUG
1386 1387                  dd_used_t t;
1387 1388                  uint64_t u = 0;
1388 1389                  for (t = 0; t < DD_USED_NUM; t++)
1389 1390                          u += dsl_dir_phys(dd)->dd_used_breakdown[t];
1390 1391                  ASSERT3U(u, ==, dsl_dir_phys(dd)->dd_used_bytes);
1391 1392  #endif
1392 1393          }
1393 1394          if (needlock)
1394 1395                  mutex_exit(&dd->dd_lock);
1395 1396  
1396 1397          if (dd->dd_parent != NULL) {
1397 1398                  dsl_dir_diduse_space(dd->dd_parent, DD_USED_CHILD,
1398 1399                      accounted_delta, compressed, uncompressed, tx);
1399 1400                  dsl_dir_transfer_space(dd->dd_parent,
1400 1401                      used - accounted_delta,
1401 1402                      DD_USED_CHILD_RSRV, DD_USED_CHILD, tx);
1402 1403          }
1403 1404  }
1404 1405  
1405 1406  void
1406 1407  dsl_dir_transfer_space(dsl_dir_t *dd, int64_t delta,
1407 1408      dd_used_t oldtype, dd_used_t newtype, dmu_tx_t *tx)
1408 1409  {
1409 1410          ASSERT(dmu_tx_is_syncing(tx));
1410 1411          ASSERT(oldtype < DD_USED_NUM);
1411 1412          ASSERT(newtype < DD_USED_NUM);
1412 1413  
1413 1414          if (delta == 0 ||
1414 1415              !(dsl_dir_phys(dd)->dd_flags & DD_FLAG_USED_BREAKDOWN))
1415 1416                  return;
1416 1417  
1417 1418          dmu_buf_will_dirty(dd->dd_dbuf, tx);
1418 1419          mutex_enter(&dd->dd_lock);
1419 1420          ASSERT(delta > 0 ?
1420 1421              dsl_dir_phys(dd)->dd_used_breakdown[oldtype] >= delta :
1421 1422              dsl_dir_phys(dd)->dd_used_breakdown[newtype] >= -delta);
1422 1423          ASSERT(dsl_dir_phys(dd)->dd_used_bytes >= ABS(delta));
1423 1424          dsl_dir_phys(dd)->dd_used_breakdown[oldtype] -= delta;
1424 1425          dsl_dir_phys(dd)->dd_used_breakdown[newtype] += delta;
1425 1426          mutex_exit(&dd->dd_lock);
1426 1427  }
1427 1428  
1428 1429  typedef struct dsl_dir_set_qr_arg {
1429 1430          const char *ddsqra_name;
1430 1431          zprop_source_t ddsqra_source;
1431 1432          uint64_t ddsqra_value;
1432 1433  } dsl_dir_set_qr_arg_t;
1433 1434  
1434 1435  static int
1435 1436  dsl_dir_set_quota_check(void *arg, dmu_tx_t *tx)
1436 1437  {
1437 1438          dsl_dir_set_qr_arg_t *ddsqra = arg;
1438 1439          dsl_pool_t *dp = dmu_tx_pool(tx);
1439 1440          dsl_dataset_t *ds;
1440 1441          int error;
1441 1442          uint64_t towrite, newval;
1442 1443  
1443 1444          error = dsl_dataset_hold(dp, ddsqra->ddsqra_name, FTAG, &ds);
1444 1445          if (error != 0)
1445 1446                  return (error);
1446 1447  
1447 1448          error = dsl_prop_predict(ds->ds_dir, "quota",
1448 1449              ddsqra->ddsqra_source, ddsqra->ddsqra_value, &newval);
1449 1450          if (error != 0) {
1450 1451                  dsl_dataset_rele(ds, FTAG);
1451 1452                  return (error);
1452 1453          }
1453 1454  
1454 1455          if (newval == 0) {
1455 1456                  dsl_dataset_rele(ds, FTAG);
1456 1457                  return (0);
1457 1458          }
1458 1459  
1459 1460          mutex_enter(&ds->ds_dir->dd_lock);
1460 1461          /*
1461 1462           * If we are doing the preliminary check in open context, and
1462 1463           * there are pending changes, then don't fail it, since the
1463 1464           * pending changes could under-estimate the amount of space to be
1464 1465           * freed up.
1465 1466           */
1466 1467          towrite = dsl_dir_space_towrite(ds->ds_dir);
1467 1468          if ((dmu_tx_is_syncing(tx) || towrite == 0) &&
1468 1469              (newval < dsl_dir_phys(ds->ds_dir)->dd_reserved ||
1469 1470              newval < dsl_dir_phys(ds->ds_dir)->dd_used_bytes + towrite)) {
1470 1471                  error = SET_ERROR(ENOSPC);
1471 1472          }
1472 1473          mutex_exit(&ds->ds_dir->dd_lock);
1473 1474          dsl_dataset_rele(ds, FTAG);
1474 1475          return (error);
1475 1476  }
1476 1477  
1477 1478  static void
1478 1479  dsl_dir_set_quota_sync(void *arg, dmu_tx_t *tx)
1479 1480  {
1480 1481          dsl_dir_set_qr_arg_t *ddsqra = arg;
1481 1482          dsl_pool_t *dp = dmu_tx_pool(tx);
1482 1483          dsl_dataset_t *ds;
1483 1484          uint64_t newval;
1484 1485  
1485 1486          VERIFY0(dsl_dataset_hold(dp, ddsqra->ddsqra_name, FTAG, &ds));
1486 1487  
1487 1488          if (spa_version(dp->dp_spa) >= SPA_VERSION_RECVD_PROPS) {
1488 1489                  dsl_prop_set_sync_impl(ds, zfs_prop_to_name(ZFS_PROP_QUOTA),
1489 1490                      ddsqra->ddsqra_source, sizeof (ddsqra->ddsqra_value), 1,
1490 1491                      &ddsqra->ddsqra_value, tx);
1491 1492  
1492 1493                  VERIFY0(dsl_prop_get_int_ds(ds,
1493 1494                      zfs_prop_to_name(ZFS_PROP_QUOTA), &newval));
1494 1495          } else {
1495 1496                  newval = ddsqra->ddsqra_value;
1496 1497                  spa_history_log_internal_ds(ds, "set", tx, "%s=%lld",
1497 1498                      zfs_prop_to_name(ZFS_PROP_QUOTA), (longlong_t)newval);
1498 1499          }
1499 1500  
1500 1501          dmu_buf_will_dirty(ds->ds_dir->dd_dbuf, tx);
1501 1502          mutex_enter(&ds->ds_dir->dd_lock);
1502 1503          dsl_dir_phys(ds->ds_dir)->dd_quota = newval;
1503 1504          mutex_exit(&ds->ds_dir->dd_lock);
1504 1505          dsl_dataset_rele(ds, FTAG);
1505 1506  }
1506 1507  
1507 1508  int
1508 1509  dsl_dir_set_quota(const char *ddname, zprop_source_t source, uint64_t quota)
1509 1510  {
1510 1511          dsl_dir_set_qr_arg_t ddsqra;
1511 1512  
1512 1513          ddsqra.ddsqra_name = ddname;
1513 1514          ddsqra.ddsqra_source = source;
1514 1515          ddsqra.ddsqra_value = quota;
1515 1516  
1516 1517          return (dsl_sync_task(ddname, dsl_dir_set_quota_check,
1517 1518              dsl_dir_set_quota_sync, &ddsqra, 0, ZFS_SPACE_CHECK_NONE));
1518 1519  }
1519 1520  
1520 1521  int
1521 1522  dsl_dir_set_reservation_check(void *arg, dmu_tx_t *tx)
1522 1523  {
1523 1524          dsl_dir_set_qr_arg_t *ddsqra = arg;
1524 1525          dsl_pool_t *dp = dmu_tx_pool(tx);
1525 1526          dsl_dataset_t *ds;
1526 1527          dsl_dir_t *dd;
1527 1528          uint64_t newval, used, avail;
1528 1529          int error;
1529 1530  
1530 1531          error = dsl_dataset_hold(dp, ddsqra->ddsqra_name, FTAG, &ds);
1531 1532          if (error != 0)
1532 1533                  return (error);
1533 1534          dd = ds->ds_dir;
1534 1535  
1535 1536          /*
1536 1537           * If we are doing the preliminary check in open context, the
1537 1538           * space estimates may be inaccurate.
1538 1539           */
1539 1540          if (!dmu_tx_is_syncing(tx)) {
1540 1541                  dsl_dataset_rele(ds, FTAG);
1541 1542                  return (0);
1542 1543          }
1543 1544  
1544 1545          error = dsl_prop_predict(ds->ds_dir,
1545 1546              zfs_prop_to_name(ZFS_PROP_RESERVATION),
1546 1547              ddsqra->ddsqra_source, ddsqra->ddsqra_value, &newval);
1547 1548          if (error != 0) {
1548 1549                  dsl_dataset_rele(ds, FTAG);
1549 1550                  return (error);
1550 1551          }
1551 1552  
1552 1553          mutex_enter(&dd->dd_lock);
1553 1554          used = dsl_dir_phys(dd)->dd_used_bytes;
1554 1555          mutex_exit(&dd->dd_lock);
1555 1556  
1556 1557          if (dd->dd_parent) {
1557 1558                  avail = dsl_dir_space_available(dd->dd_parent,
1558 1559                      NULL, 0, FALSE);
1559 1560          } else {
1560 1561                  avail = dsl_pool_adjustedsize(dd->dd_pool, B_FALSE) - used;
1561 1562          }
1562 1563  
1563 1564          if (MAX(used, newval) > MAX(used, dsl_dir_phys(dd)->dd_reserved)) {
1564 1565                  uint64_t delta = MAX(used, newval) -
1565 1566                      MAX(used, dsl_dir_phys(dd)->dd_reserved);
1566 1567  
1567 1568                  if (delta > avail ||
1568 1569                      (dsl_dir_phys(dd)->dd_quota > 0 &&
1569 1570                      newval > dsl_dir_phys(dd)->dd_quota))
1570 1571                          error = SET_ERROR(ENOSPC);
1571 1572          }
1572 1573  
1573 1574          dsl_dataset_rele(ds, FTAG);
1574 1575          return (error);
1575 1576  }
1576 1577  
1577 1578  void
1578 1579  dsl_dir_set_reservation_sync_impl(dsl_dir_t *dd, uint64_t value, dmu_tx_t *tx)
1579 1580  {
1580 1581          uint64_t used;
1581 1582          int64_t delta;
1582 1583  
1583 1584          dmu_buf_will_dirty(dd->dd_dbuf, tx);
1584 1585  
1585 1586          mutex_enter(&dd->dd_lock);
1586 1587          used = dsl_dir_phys(dd)->dd_used_bytes;
1587 1588          delta = MAX(used, value) - MAX(used, dsl_dir_phys(dd)->dd_reserved);
1588 1589          dsl_dir_phys(dd)->dd_reserved = value;
1589 1590  
1590 1591          if (dd->dd_parent != NULL) {
1591 1592                  /* Roll up this additional usage into our ancestors */
1592 1593                  dsl_dir_diduse_space(dd->dd_parent, DD_USED_CHILD_RSRV,
1593 1594                      delta, 0, 0, tx);
1594 1595          }
1595 1596          mutex_exit(&dd->dd_lock);
1596 1597  }
1597 1598  
1598 1599  
1599 1600  static void
1600 1601  dsl_dir_set_reservation_sync(void *arg, dmu_tx_t *tx)
1601 1602  {
1602 1603          dsl_dir_set_qr_arg_t *ddsqra = arg;
1603 1604          dsl_pool_t *dp = dmu_tx_pool(tx);
1604 1605          dsl_dataset_t *ds;
1605 1606          uint64_t newval;
1606 1607  
1607 1608          VERIFY0(dsl_dataset_hold(dp, ddsqra->ddsqra_name, FTAG, &ds));
1608 1609  
1609 1610          if (spa_version(dp->dp_spa) >= SPA_VERSION_RECVD_PROPS) {
1610 1611                  dsl_prop_set_sync_impl(ds,
1611 1612                      zfs_prop_to_name(ZFS_PROP_RESERVATION),
1612 1613                      ddsqra->ddsqra_source, sizeof (ddsqra->ddsqra_value), 1,
1613 1614                      &ddsqra->ddsqra_value, tx);
1614 1615  
1615 1616                  VERIFY0(dsl_prop_get_int_ds(ds,
1616 1617                      zfs_prop_to_name(ZFS_PROP_RESERVATION), &newval));
1617 1618          } else {
1618 1619                  newval = ddsqra->ddsqra_value;
1619 1620                  spa_history_log_internal_ds(ds, "set", tx, "%s=%lld",
1620 1621                      zfs_prop_to_name(ZFS_PROP_RESERVATION),
1621 1622                      (longlong_t)newval);
1622 1623          }
1623 1624  
1624 1625          dsl_dir_set_reservation_sync_impl(ds->ds_dir, newval, tx);
1625 1626          dsl_dataset_rele(ds, FTAG);
1626 1627  }
1627 1628  
1628 1629  int
1629 1630  dsl_dir_set_reservation(const char *ddname, zprop_source_t source,
1630 1631      uint64_t reservation)
1631 1632  {
1632 1633          dsl_dir_set_qr_arg_t ddsqra;
1633 1634  
1634 1635          ddsqra.ddsqra_name = ddname;
1635 1636          ddsqra.ddsqra_source = source;
1636 1637          ddsqra.ddsqra_value = reservation;
1637 1638  
1638 1639          return (dsl_sync_task(ddname, dsl_dir_set_reservation_check,
1639 1640              dsl_dir_set_reservation_sync, &ddsqra, 0, ZFS_SPACE_CHECK_NONE));
1640 1641  }
1641 1642  
1642 1643  static dsl_dir_t *
1643 1644  closest_common_ancestor(dsl_dir_t *ds1, dsl_dir_t *ds2)
1644 1645  {
1645 1646          for (; ds1; ds1 = ds1->dd_parent) {
1646 1647                  dsl_dir_t *dd;
1647 1648                  for (dd = ds2; dd; dd = dd->dd_parent) {
1648 1649                          if (ds1 == dd)
1649 1650                                  return (dd);
1650 1651                  }
1651 1652          }
1652 1653          return (NULL);
1653 1654  }
1654 1655  
1655 1656  /*
1656 1657   * If delta is applied to dd, how much of that delta would be applied to
1657 1658   * ancestor?  Syncing context only.
1658 1659   */
1659 1660  static int64_t
1660 1661  would_change(dsl_dir_t *dd, int64_t delta, dsl_dir_t *ancestor)
1661 1662  {
1662 1663          if (dd == ancestor)
1663 1664                  return (delta);
1664 1665  
1665 1666          mutex_enter(&dd->dd_lock);
1666 1667          delta = parent_delta(dd, dsl_dir_phys(dd)->dd_used_bytes, delta);
1667 1668          mutex_exit(&dd->dd_lock);
1668 1669          return (would_change(dd->dd_parent, delta, ancestor));
1669 1670  }
1670 1671  
1671 1672  typedef struct dsl_dir_rename_arg {
1672 1673          const char *ddra_oldname;
1673 1674          const char *ddra_newname;
1674 1675          cred_t *ddra_cred;
1675 1676  } dsl_dir_rename_arg_t;
1676 1677  
1677 1678  /* ARGSUSED */
1678 1679  static int
1679 1680  dsl_valid_rename(dsl_pool_t *dp, dsl_dataset_t *ds, void *arg)
1680 1681  {
1681 1682          int *deltap = arg;
1682 1683          char namebuf[MAXNAMELEN];
1683 1684  
1684 1685          dsl_dataset_name(ds, namebuf);
1685 1686  
1686 1687          if (strlen(namebuf) + *deltap >= MAXNAMELEN)
1687 1688                  return (SET_ERROR(ENAMETOOLONG));
1688 1689          return (0);
1689 1690  }
1690 1691  
1691 1692  static int
1692 1693  dsl_dir_rename_check(void *arg, dmu_tx_t *tx)
1693 1694  {
1694 1695          dsl_dir_rename_arg_t *ddra = arg;
1695 1696          dsl_pool_t *dp = dmu_tx_pool(tx);
1696 1697          dsl_dir_t *dd, *newparent;
1697 1698          const char *mynewname;
1698 1699          int error;
1699 1700          int delta = strlen(ddra->ddra_newname) - strlen(ddra->ddra_oldname);
1700 1701  
1701 1702          /* target dir should exist */
1702 1703          error = dsl_dir_hold(dp, ddra->ddra_oldname, FTAG, &dd, NULL);
1703 1704          if (error != 0)
1704 1705                  return (error);
1705 1706  
1706 1707          /* new parent should exist */
1707 1708          error = dsl_dir_hold(dp, ddra->ddra_newname, FTAG,
1708 1709              &newparent, &mynewname);
1709 1710          if (error != 0) {
1710 1711                  dsl_dir_rele(dd, FTAG);
1711 1712                  return (error);
1712 1713          }
1713 1714  
1714 1715          /* can't rename to different pool */
1715 1716          if (dd->dd_pool != newparent->dd_pool) {
1716 1717                  dsl_dir_rele(newparent, FTAG);
1717 1718                  dsl_dir_rele(dd, FTAG);
1718 1719                  return (SET_ERROR(ENXIO));
1719 1720          }
1720 1721  
1721 1722          /* new name should not already exist */
1722 1723          if (mynewname == NULL) {
1723 1724                  dsl_dir_rele(newparent, FTAG);
1724 1725                  dsl_dir_rele(dd, FTAG);
1725 1726                  return (SET_ERROR(EEXIST));
1726 1727          }
1727 1728  
1728 1729          /* if the name length is growing, validate child name lengths */
1729 1730          if (delta > 0) {
1730 1731                  error = dmu_objset_find_dp(dp, dd->dd_object, dsl_valid_rename,
1731 1732                      &delta, DS_FIND_CHILDREN | DS_FIND_SNAPSHOTS);
1732 1733                  if (error != 0) {
1733 1734                          dsl_dir_rele(newparent, FTAG);
1734 1735                          dsl_dir_rele(dd, FTAG);
1735 1736                          return (error);
1736 1737                  }
1737 1738          }
1738 1739  
1739 1740          if (dmu_tx_is_syncing(tx)) {
1740 1741                  if (spa_feature_is_active(dp->dp_spa,
1741 1742                      SPA_FEATURE_FS_SS_LIMIT)) {
1742 1743                          /*
1743 1744                           * Although this is the check function and we don't
1744 1745                           * normally make on-disk changes in check functions,
1745 1746                           * we need to do that here.
1746 1747                           *
1747 1748                           * Ensure this portion of the tree's counts have been
1748 1749                           * initialized in case the new parent has limits set.
1749 1750                           */
1750 1751                          dsl_dir_init_fs_ss_count(dd, tx);
1751 1752                  }
1752 1753          }
1753 1754  
1754 1755          if (newparent != dd->dd_parent) {
1755 1756                  /* is there enough space? */
1756 1757                  uint64_t myspace =
1757 1758                      MAX(dsl_dir_phys(dd)->dd_used_bytes,
1758 1759                      dsl_dir_phys(dd)->dd_reserved);
1759 1760                  objset_t *os = dd->dd_pool->dp_meta_objset;
1760 1761                  uint64_t fs_cnt = 0;
1761 1762                  uint64_t ss_cnt = 0;
1762 1763  
1763 1764                  if (dsl_dir_is_zapified(dd)) {
1764 1765                          int err;
1765 1766  
1766 1767                          err = zap_lookup(os, dd->dd_object,
1767 1768                              DD_FIELD_FILESYSTEM_COUNT, sizeof (fs_cnt), 1,
1768 1769                              &fs_cnt);
1769 1770                          if (err != ENOENT && err != 0) {
1770 1771                                  dsl_dir_rele(newparent, FTAG);
1771 1772                                  dsl_dir_rele(dd, FTAG);
1772 1773                                  return (err);
1773 1774                          }
1774 1775  
1775 1776                          /*
1776 1777                           * have to add 1 for the filesystem itself that we're
1777 1778                           * moving
1778 1779                           */
1779 1780                          fs_cnt++;
1780 1781  
1781 1782                          err = zap_lookup(os, dd->dd_object,
1782 1783                              DD_FIELD_SNAPSHOT_COUNT, sizeof (ss_cnt), 1,
1783 1784                              &ss_cnt);
1784 1785                          if (err != ENOENT && err != 0) {
1785 1786                                  dsl_dir_rele(newparent, FTAG);
1786 1787                                  dsl_dir_rele(dd, FTAG);
1787 1788                                  return (err);
1788 1789                          }
1789 1790                  }
1790 1791  
1791 1792                  /* no rename into our descendant */
1792 1793                  if (closest_common_ancestor(dd, newparent) == dd) {
1793 1794                          dsl_dir_rele(newparent, FTAG);
1794 1795                          dsl_dir_rele(dd, FTAG);
1795 1796                          return (SET_ERROR(EINVAL));
1796 1797                  }
1797 1798  
1798 1799                  error = dsl_dir_transfer_possible(dd->dd_parent,
1799 1800                      newparent, fs_cnt, ss_cnt, myspace, ddra->ddra_cred);
1800 1801                  if (error != 0) {
1801 1802                          dsl_dir_rele(newparent, FTAG);
1802 1803                          dsl_dir_rele(dd, FTAG);
1803 1804                          return (error);
1804 1805                  }
1805 1806          }
1806 1807  
1807 1808          dsl_dir_rele(newparent, FTAG);
1808 1809          dsl_dir_rele(dd, FTAG);
1809 1810          return (0);
1810 1811  }
1811 1812  
1812 1813  static void
1813 1814  dsl_dir_rename_sync(void *arg, dmu_tx_t *tx)
1814 1815  {
1815 1816          dsl_dir_rename_arg_t *ddra = arg;
1816 1817          dsl_pool_t *dp = dmu_tx_pool(tx);
1817 1818          dsl_dir_t *dd, *newparent;
1818 1819          const char *mynewname;
1819 1820          int error;
1820 1821          objset_t *mos = dp->dp_meta_objset;
1821 1822  
1822 1823          VERIFY0(dsl_dir_hold(dp, ddra->ddra_oldname, FTAG, &dd, NULL));
1823 1824          VERIFY0(dsl_dir_hold(dp, ddra->ddra_newname, FTAG, &newparent,
1824 1825              &mynewname));
1825 1826  
1826 1827          /* Log this before we change the name. */
1827 1828          spa_history_log_internal_dd(dd, "rename", tx,
1828 1829              "-> %s", ddra->ddra_newname);
1829 1830  
1830 1831          if (newparent != dd->dd_parent) {
1831 1832                  objset_t *os = dd->dd_pool->dp_meta_objset;
1832 1833                  uint64_t fs_cnt = 0;
1833 1834                  uint64_t ss_cnt = 0;
1834 1835  
1835 1836                  /*
1836 1837                   * We already made sure the dd counts were initialized in the
1837 1838                   * check function.
1838 1839                   */
1839 1840                  if (spa_feature_is_active(dp->dp_spa,
1840 1841                      SPA_FEATURE_FS_SS_LIMIT)) {
1841 1842                          VERIFY0(zap_lookup(os, dd->dd_object,
1842 1843                              DD_FIELD_FILESYSTEM_COUNT, sizeof (fs_cnt), 1,
1843 1844                              &fs_cnt));
1844 1845                          /* add 1 for the filesystem itself that we're moving */
1845 1846                          fs_cnt++;
1846 1847  
1847 1848                          VERIFY0(zap_lookup(os, dd->dd_object,
1848 1849                              DD_FIELD_SNAPSHOT_COUNT, sizeof (ss_cnt), 1,
1849 1850                              &ss_cnt));
1850 1851                  }
1851 1852  
1852 1853                  dsl_fs_ss_count_adjust(dd->dd_parent, -fs_cnt,
1853 1854                      DD_FIELD_FILESYSTEM_COUNT, tx);
1854 1855                  dsl_fs_ss_count_adjust(newparent, fs_cnt,
1855 1856                      DD_FIELD_FILESYSTEM_COUNT, tx);
1856 1857  
1857 1858                  dsl_fs_ss_count_adjust(dd->dd_parent, -ss_cnt,
1858 1859                      DD_FIELD_SNAPSHOT_COUNT, tx);
1859 1860                  dsl_fs_ss_count_adjust(newparent, ss_cnt,
1860 1861                      DD_FIELD_SNAPSHOT_COUNT, tx);
1861 1862  
1862 1863                  dsl_dir_diduse_space(dd->dd_parent, DD_USED_CHILD,
1863 1864                      -dsl_dir_phys(dd)->dd_used_bytes,
1864 1865                      -dsl_dir_phys(dd)->dd_compressed_bytes,
1865 1866                      -dsl_dir_phys(dd)->dd_uncompressed_bytes, tx);
1866 1867                  dsl_dir_diduse_space(newparent, DD_USED_CHILD,
1867 1868                      dsl_dir_phys(dd)->dd_used_bytes,
1868 1869                      dsl_dir_phys(dd)->dd_compressed_bytes,
1869 1870                      dsl_dir_phys(dd)->dd_uncompressed_bytes, tx);
1870 1871  
1871 1872                  if (dsl_dir_phys(dd)->dd_reserved >
1872 1873                      dsl_dir_phys(dd)->dd_used_bytes) {
1873 1874                          uint64_t unused_rsrv = dsl_dir_phys(dd)->dd_reserved -
1874 1875                              dsl_dir_phys(dd)->dd_used_bytes;
1875 1876  
1876 1877                          dsl_dir_diduse_space(dd->dd_parent, DD_USED_CHILD_RSRV,
1877 1878                              -unused_rsrv, 0, 0, tx);
1878 1879                          dsl_dir_diduse_space(newparent, DD_USED_CHILD_RSRV,
1879 1880                              unused_rsrv, 0, 0, tx);
1880 1881                  }
1881 1882          }
1882 1883  
1883 1884          dmu_buf_will_dirty(dd->dd_dbuf, tx);
1884 1885  
1885 1886          /* remove from old parent zapobj */
1886 1887          error = zap_remove(mos,
1887 1888              dsl_dir_phys(dd->dd_parent)->dd_child_dir_zapobj,
1888 1889              dd->dd_myname, tx);
1889 1890          ASSERT0(error);
1890 1891  
1891 1892          (void) strcpy(dd->dd_myname, mynewname);
1892 1893          dsl_dir_rele(dd->dd_parent, dd);
1893 1894          dsl_dir_phys(dd)->dd_parent_obj = newparent->dd_object;
1894 1895          VERIFY0(dsl_dir_hold_obj(dp,
1895 1896              newparent->dd_object, NULL, dd, &dd->dd_parent));
1896 1897  
1897 1898          /* add to new parent zapobj */
1898 1899          VERIFY0(zap_add(mos, dsl_dir_phys(newparent)->dd_child_dir_zapobj,
1899 1900              dd->dd_myname, 8, 1, &dd->dd_object, tx));
1900 1901  
1901 1902          dsl_prop_notify_all(dd);
1902 1903  
1903 1904          dsl_dir_rele(newparent, FTAG);
1904 1905          dsl_dir_rele(dd, FTAG);
1905 1906  }
1906 1907  
1907 1908  int
1908 1909  dsl_dir_rename(const char *oldname, const char *newname)
1909 1910  {
1910 1911          dsl_dir_rename_arg_t ddra;
1911 1912  
1912 1913          ddra.ddra_oldname = oldname;
1913 1914          ddra.ddra_newname = newname;
1914 1915          ddra.ddra_cred = CRED();
1915 1916  
1916 1917          return (dsl_sync_task(oldname,
1917 1918              dsl_dir_rename_check, dsl_dir_rename_sync, &ddra,
1918 1919              3, ZFS_SPACE_CHECK_RESERVED));
1919 1920  }
1920 1921  
1921 1922  int
1922 1923  dsl_dir_transfer_possible(dsl_dir_t *sdd, dsl_dir_t *tdd,
1923 1924      uint64_t fs_cnt, uint64_t ss_cnt, uint64_t space, cred_t *cr)
1924 1925  {
1925 1926          dsl_dir_t *ancestor;
1926 1927          int64_t adelta;
1927 1928          uint64_t avail;
1928 1929          int err;
1929 1930  
1930 1931          ancestor = closest_common_ancestor(sdd, tdd);
1931 1932          adelta = would_change(sdd, -space, ancestor);
1932 1933          avail = dsl_dir_space_available(tdd, ancestor, adelta, FALSE);
1933 1934          if (avail < space)
1934 1935                  return (SET_ERROR(ENOSPC));
1935 1936  
1936 1937          err = dsl_fs_ss_limit_check(tdd, fs_cnt, ZFS_PROP_FILESYSTEM_LIMIT,
1937 1938              ancestor, cr);
1938 1939          if (err != 0)
1939 1940                  return (err);
1940 1941          err = dsl_fs_ss_limit_check(tdd, ss_cnt, ZFS_PROP_SNAPSHOT_LIMIT,
1941 1942              ancestor, cr);
1942 1943          if (err != 0)
1943 1944                  return (err);
1944 1945  
1945 1946          return (0);
1946 1947  }
1947 1948  
1948 1949  timestruc_t
1949 1950  dsl_dir_snap_cmtime(dsl_dir_t *dd)
1950 1951  {
1951 1952          timestruc_t t;
1952 1953  
1953 1954          mutex_enter(&dd->dd_lock);
1954 1955          t = dd->dd_snap_cmtime;
1955 1956          mutex_exit(&dd->dd_lock);
1956 1957  
1957 1958          return (t);
1958 1959  }
1959 1960  
1960 1961  void
1961 1962  dsl_dir_snap_cmtime_update(dsl_dir_t *dd)
1962 1963  {
1963 1964          timestruc_t t;
1964 1965  
1965 1966          gethrestime(&t);
1966 1967          mutex_enter(&dd->dd_lock);
1967 1968          dd->dd_snap_cmtime = t;
1968 1969          mutex_exit(&dd->dd_lock);
1969 1970  }
1970 1971  
1971 1972  void
1972 1973  dsl_dir_zapify(dsl_dir_t *dd, dmu_tx_t *tx)
1973 1974  {
1974 1975          objset_t *mos = dd->dd_pool->dp_meta_objset;
1975 1976          dmu_object_zapify(mos, dd->dd_object, DMU_OT_DSL_DIR, tx);
1976 1977  }
1977 1978  
1978 1979  boolean_t
1979 1980  dsl_dir_is_zapified(dsl_dir_t *dd)
1980 1981  {
1981 1982          dmu_object_info_t doi;
1982 1983  
1983 1984          dmu_object_info_from_db(dd->dd_dbuf, &doi);
1984 1985          return (doi.doi_type == DMU_OTN_ZAP_METADATA);
1985 1986  }

↓ open down ↓

1551 lines elided

↑ open up ↑

XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX