Print this page
    
3006 VERIFY[S,U,P] and ASSERT[S,U,P] frequently check if first argument is zero
    
      
        | Split | 
	Close | 
      
      | Expand all | 
      | Collapse all | 
    
    
          --- old/usr/src/uts/common/fs/zfs/dsl_dir.c
          +++ new/usr/src/uts/common/fs/zfs/dsl_dir.c
   1    1  /*
   2    2   * CDDL HEADER START
   3    3   *
   4    4   * The contents of this file are subject to the terms of the
   5    5   * Common Development and Distribution License (the "License").
   6    6   * You may not use this file except in compliance with the License.
   7    7   *
   8    8   * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9    9   * or http://www.opensolaris.org/os/licensing.
  10   10   * See the License for the specific language governing permissions
  11   11   * and limitations under the License.
  12   12   *
  13   13   * When distributing Covered Code, include this CDDL HEADER in each
  14   14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  /*
  22   22   * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  23   23   * Copyright (c) 2012 by Delphix. All rights reserved.
  24   24   */
  25   25  
  26   26  #include <sys/dmu.h>
  27   27  #include <sys/dmu_objset.h>
  28   28  #include <sys/dmu_tx.h>
  29   29  #include <sys/dsl_dataset.h>
  30   30  #include <sys/dsl_dir.h>
  31   31  #include <sys/dsl_prop.h>
  32   32  #include <sys/dsl_synctask.h>
  33   33  #include <sys/dsl_deleg.h>
  34   34  #include <sys/spa.h>
  35   35  #include <sys/metaslab.h>
  36   36  #include <sys/zap.h>
  37   37  #include <sys/zio.h>
  38   38  #include <sys/arc.h>
  39   39  #include <sys/sunddi.h>
  40   40  #include "zfs_namecheck.h"
  41   41  
  42   42  static uint64_t dsl_dir_space_towrite(dsl_dir_t *dd);
  43   43  static void dsl_dir_set_reservation_sync_impl(dsl_dir_t *dd,
  44   44      uint64_t value, dmu_tx_t *tx);
  45   45  
  46   46  /* ARGSUSED */
  47   47  static void
  48   48  dsl_dir_evict(dmu_buf_t *db, void *arg)
  49   49  {
  50   50          dsl_dir_t *dd = arg;
  51   51          dsl_pool_t *dp = dd->dd_pool;
  52   52          int t;
  53   53  
  54   54          for (t = 0; t < TXG_SIZE; t++) {
  55   55                  ASSERT(!txg_list_member(&dp->dp_dirty_dirs, dd, t));
  56   56                  ASSERT(dd->dd_tempreserved[t] == 0);
  57   57                  ASSERT(dd->dd_space_towrite[t] == 0);
  58   58          }
  59   59  
  60   60          if (dd->dd_parent)
  61   61                  dsl_dir_close(dd->dd_parent, dd);
  62   62  
  63   63          spa_close(dd->dd_pool->dp_spa, dd);
  64   64  
  65   65          /*
  66   66           * The props callback list should have been cleaned up by
  67   67           * objset_evict().
  68   68           */
  69   69          list_destroy(&dd->dd_prop_cbs);
  70   70          mutex_destroy(&dd->dd_lock);
  71   71          kmem_free(dd, sizeof (dsl_dir_t));
  72   72  }
  73   73  
  74   74  int
  75   75  dsl_dir_open_obj(dsl_pool_t *dp, uint64_t ddobj,
  76   76      const char *tail, void *tag, dsl_dir_t **ddp)
  77   77  {
  78   78          dmu_buf_t *dbuf;
  79   79          dsl_dir_t *dd;
  80   80          int err;
  81   81  
  82   82          ASSERT(RW_LOCK_HELD(&dp->dp_config_rwlock) ||
  83   83              dsl_pool_sync_context(dp));
  84   84  
  85   85          err = dmu_bonus_hold(dp->dp_meta_objset, ddobj, tag, &dbuf);
  86   86          if (err)
  87   87                  return (err);
  88   88          dd = dmu_buf_get_user(dbuf);
  89   89  #ifdef ZFS_DEBUG
  90   90          {
  91   91                  dmu_object_info_t doi;
  92   92                  dmu_object_info_from_db(dbuf, &doi);
  93   93                  ASSERT3U(doi.doi_type, ==, DMU_OT_DSL_DIR);
  94   94                  ASSERT3U(doi.doi_bonus_size, >=, sizeof (dsl_dir_phys_t));
  95   95          }
  96   96  #endif
  97   97          if (dd == NULL) {
  98   98                  dsl_dir_t *winner;
  99   99  
 100  100                  dd = kmem_zalloc(sizeof (dsl_dir_t), KM_SLEEP);
 101  101                  dd->dd_object = ddobj;
 102  102                  dd->dd_dbuf = dbuf;
 103  103                  dd->dd_pool = dp;
 104  104                  dd->dd_phys = dbuf->db_data;
 105  105                  mutex_init(&dd->dd_lock, NULL, MUTEX_DEFAULT, NULL);
 106  106  
 107  107                  list_create(&dd->dd_prop_cbs, sizeof (dsl_prop_cb_record_t),
 108  108                      offsetof(dsl_prop_cb_record_t, cbr_node));
 109  109  
 110  110                  dsl_dir_snap_cmtime_update(dd);
 111  111  
 112  112                  if (dd->dd_phys->dd_parent_obj) {
 113  113                          err = dsl_dir_open_obj(dp, dd->dd_phys->dd_parent_obj,
 114  114                              NULL, dd, &dd->dd_parent);
 115  115                          if (err)
 116  116                                  goto errout;
 117  117                          if (tail) {
 118  118  #ifdef ZFS_DEBUG
 119  119                                  uint64_t foundobj;
 120  120  
 121  121                                  err = zap_lookup(dp->dp_meta_objset,
 122  122                                      dd->dd_parent->dd_phys->dd_child_dir_zapobj,
 123  123                                      tail, sizeof (foundobj), 1, &foundobj);
 124  124                                  ASSERT(err || foundobj == ddobj);
 125  125  #endif
 126  126                                  (void) strcpy(dd->dd_myname, tail);
 127  127                          } else {
 128  128                                  err = zap_value_search(dp->dp_meta_objset,
 129  129                                      dd->dd_parent->dd_phys->dd_child_dir_zapobj,
 130  130                                      ddobj, 0, dd->dd_myname);
 131  131                          }
 132  132                          if (err)
 133  133                                  goto errout;
 134  134                  } else {
 135  135                          (void) strcpy(dd->dd_myname, spa_name(dp->dp_spa));
 136  136                  }
 137  137  
 138  138                  if (dsl_dir_is_clone(dd)) {
 139  139                          dmu_buf_t *origin_bonus;
 140  140                          dsl_dataset_phys_t *origin_phys;
 141  141  
 142  142                          /*
 143  143                           * We can't open the origin dataset, because
 144  144                           * that would require opening this dsl_dir.
 145  145                           * Just look at its phys directly instead.
 146  146                           */
 147  147                          err = dmu_bonus_hold(dp->dp_meta_objset,
 148  148                              dd->dd_phys->dd_origin_obj, FTAG, &origin_bonus);
 149  149                          if (err)
 150  150                                  goto errout;
 151  151                          origin_phys = origin_bonus->db_data;
 152  152                          dd->dd_origin_txg =
 153  153                              origin_phys->ds_creation_txg;
 154  154                          dmu_buf_rele(origin_bonus, FTAG);
 155  155                  }
 156  156  
 157  157                  winner = dmu_buf_set_user_ie(dbuf, dd, &dd->dd_phys,
 158  158                      dsl_dir_evict);
 159  159                  if (winner) {
 160  160                          if (dd->dd_parent)
 161  161                                  dsl_dir_close(dd->dd_parent, dd);
 162  162                          mutex_destroy(&dd->dd_lock);
 163  163                          kmem_free(dd, sizeof (dsl_dir_t));
 164  164                          dd = winner;
 165  165                  } else {
 166  166                          spa_open_ref(dp->dp_spa, dd);
 167  167                  }
 168  168          }
 169  169  
 170  170          /*
 171  171           * The dsl_dir_t has both open-to-close and instantiate-to-evict
 172  172           * holds on the spa.  We need the open-to-close holds because
 173  173           * otherwise the spa_refcnt wouldn't change when we open a
 174  174           * dir which the spa also has open, so we could incorrectly
 175  175           * think it was OK to unload/export/destroy the pool.  We need
 176  176           * the instantiate-to-evict hold because the dsl_dir_t has a
 177  177           * pointer to the dd_pool, which has a pointer to the spa_t.
 178  178           */
 179  179          spa_open_ref(dp->dp_spa, tag);
 180  180          ASSERT3P(dd->dd_pool, ==, dp);
 181  181          ASSERT3U(dd->dd_object, ==, ddobj);
 182  182          ASSERT3P(dd->dd_dbuf, ==, dbuf);
 183  183          *ddp = dd;
 184  184          return (0);
 185  185  
 186  186  errout:
 187  187          if (dd->dd_parent)
 188  188                  dsl_dir_close(dd->dd_parent, dd);
 189  189          mutex_destroy(&dd->dd_lock);
 190  190          kmem_free(dd, sizeof (dsl_dir_t));
 191  191          dmu_buf_rele(dbuf, tag);
 192  192          return (err);
 193  193  
 194  194  }
 195  195  
 196  196  void
 197  197  dsl_dir_close(dsl_dir_t *dd, void *tag)
 198  198  {
 199  199          dprintf_dd(dd, "%s\n", "");
 200  200          spa_close(dd->dd_pool->dp_spa, tag);
 201  201          dmu_buf_rele(dd->dd_dbuf, tag);
 202  202  }
 203  203  
 204  204  /* buf must be long enough (MAXNAMELEN + strlen(MOS_DIR_NAME) + 1 should do) */
 205  205  void
 206  206  dsl_dir_name(dsl_dir_t *dd, char *buf)
 207  207  {
 208  208          if (dd->dd_parent) {
 209  209                  dsl_dir_name(dd->dd_parent, buf);
 210  210                  (void) strcat(buf, "/");
 211  211          } else {
 212  212                  buf[0] = '\0';
 213  213          }
 214  214          if (!MUTEX_HELD(&dd->dd_lock)) {
 215  215                  /*
 216  216                   * recursive mutex so that we can use
 217  217                   * dprintf_dd() with dd_lock held
 218  218                   */
 219  219                  mutex_enter(&dd->dd_lock);
 220  220                  (void) strcat(buf, dd->dd_myname);
 221  221                  mutex_exit(&dd->dd_lock);
 222  222          } else {
 223  223                  (void) strcat(buf, dd->dd_myname);
 224  224          }
 225  225  }
 226  226  
 227  227  /* Calculate name legnth, avoiding all the strcat calls of dsl_dir_name */
 228  228  int
 229  229  dsl_dir_namelen(dsl_dir_t *dd)
 230  230  {
 231  231          int result = 0;
 232  232  
 233  233          if (dd->dd_parent) {
 234  234                  /* parent's name + 1 for the "/" */
 235  235                  result = dsl_dir_namelen(dd->dd_parent) + 1;
 236  236          }
 237  237  
 238  238          if (!MUTEX_HELD(&dd->dd_lock)) {
 239  239                  /* see dsl_dir_name */
 240  240                  mutex_enter(&dd->dd_lock);
 241  241                  result += strlen(dd->dd_myname);
 242  242                  mutex_exit(&dd->dd_lock);
 243  243          } else {
 244  244                  result += strlen(dd->dd_myname);
 245  245          }
 246  246  
 247  247          return (result);
 248  248  }
 249  249  
 250  250  static int
 251  251  getcomponent(const char *path, char *component, const char **nextp)
 252  252  {
 253  253          char *p;
 254  254          if ((path == NULL) || (path[0] == '\0'))
 255  255                  return (ENOENT);
 256  256          /* This would be a good place to reserve some namespace... */
 257  257          p = strpbrk(path, "/@");
 258  258          if (p && (p[1] == '/' || p[1] == '@')) {
 259  259                  /* two separators in a row */
 260  260                  return (EINVAL);
 261  261          }
 262  262          if (p == NULL || p == path) {
 263  263                  /*
 264  264                   * if the first thing is an @ or /, it had better be an
 265  265                   * @ and it had better not have any more ats or slashes,
 266  266                   * and it had better have something after the @.
 267  267                   */
 268  268                  if (p != NULL &&
 269  269                      (p[0] != '@' || strpbrk(path+1, "/@") || p[1] == '\0'))
 270  270                          return (EINVAL);
 271  271                  if (strlen(path) >= MAXNAMELEN)
 272  272                          return (ENAMETOOLONG);
 273  273                  (void) strcpy(component, path);
 274  274                  p = NULL;
 275  275          } else if (p[0] == '/') {
 276  276                  if (p-path >= MAXNAMELEN)
 277  277                          return (ENAMETOOLONG);
 278  278                  (void) strncpy(component, path, p - path);
 279  279                  component[p-path] = '\0';
 280  280                  p++;
 281  281          } else if (p[0] == '@') {
 282  282                  /*
 283  283                   * if the next separator is an @, there better not be
 284  284                   * any more slashes.
 285  285                   */
 286  286                  if (strchr(path, '/'))
 287  287                          return (EINVAL);
 288  288                  if (p-path >= MAXNAMELEN)
 289  289                          return (ENAMETOOLONG);
 290  290                  (void) strncpy(component, path, p - path);
 291  291                  component[p-path] = '\0';
 292  292          } else {
 293  293                  ASSERT(!"invalid p");
 294  294          }
 295  295          *nextp = p;
 296  296          return (0);
 297  297  }
 298  298  
 299  299  /*
 300  300   * same as dsl_open_dir, ignore the first component of name and use the
 301  301   * spa instead
 302  302   */
 303  303  int
 304  304  dsl_dir_open_spa(spa_t *spa, const char *name, void *tag,
 305  305      dsl_dir_t **ddp, const char **tailp)
 306  306  {
 307  307          char buf[MAXNAMELEN];
 308  308          const char *next, *nextnext = NULL;
 309  309          int err;
 310  310          dsl_dir_t *dd;
 311  311          dsl_pool_t *dp;
 312  312          uint64_t ddobj;
 313  313          int openedspa = FALSE;
 314  314  
 315  315          dprintf("%s\n", name);
 316  316  
 317  317          err = getcomponent(name, buf, &next);
 318  318          if (err)
 319  319                  return (err);
 320  320          if (spa == NULL) {
 321  321                  err = spa_open(buf, &spa, FTAG);
 322  322                  if (err) {
 323  323                          dprintf("spa_open(%s) failed\n", buf);
 324  324                          return (err);
 325  325                  }
 326  326                  openedspa = TRUE;
 327  327  
 328  328                  /* XXX this assertion belongs in spa_open */
 329  329                  ASSERT(!dsl_pool_sync_context(spa_get_dsl(spa)));
 330  330          }
 331  331  
 332  332          dp = spa_get_dsl(spa);
 333  333  
 334  334          rw_enter(&dp->dp_config_rwlock, RW_READER);
 335  335          err = dsl_dir_open_obj(dp, dp->dp_root_dir_obj, NULL, tag, &dd);
 336  336          if (err) {
 337  337                  rw_exit(&dp->dp_config_rwlock);
 338  338                  if (openedspa)
 339  339                          spa_close(spa, FTAG);
 340  340                  return (err);
 341  341          }
 342  342  
 343  343          while (next != NULL) {
 344  344                  dsl_dir_t *child_ds;
 345  345                  err = getcomponent(next, buf, &nextnext);
 346  346                  if (err)
 347  347                          break;
 348  348                  ASSERT(next[0] != '\0');
 349  349                  if (next[0] == '@')
 350  350                          break;
 351  351                  dprintf("looking up %s in obj%lld\n",
 352  352                      buf, dd->dd_phys->dd_child_dir_zapobj);
 353  353  
 354  354                  err = zap_lookup(dp->dp_meta_objset,
 355  355                      dd->dd_phys->dd_child_dir_zapobj,
 356  356                      buf, sizeof (ddobj), 1, &ddobj);
 357  357                  if (err) {
 358  358                          if (err == ENOENT)
 359  359                                  err = 0;
 360  360                          break;
 361  361                  }
 362  362  
 363  363                  err = dsl_dir_open_obj(dp, ddobj, buf, tag, &child_ds);
 364  364                  if (err)
 365  365                          break;
 366  366                  dsl_dir_close(dd, tag);
 367  367                  dd = child_ds;
 368  368                  next = nextnext;
 369  369          }
 370  370          rw_exit(&dp->dp_config_rwlock);
 371  371  
 372  372          if (err) {
 373  373                  dsl_dir_close(dd, tag);
 374  374                  if (openedspa)
 375  375                          spa_close(spa, FTAG);
 376  376                  return (err);
 377  377          }
 378  378  
 379  379          /*
 380  380           * It's an error if there's more than one component left, or
 381  381           * tailp==NULL and there's any component left.
 382  382           */
 383  383          if (next != NULL &&
 384  384              (tailp == NULL || (nextnext && nextnext[0] != '\0'))) {
 385  385                  /* bad path name */
 386  386                  dsl_dir_close(dd, tag);
 387  387                  dprintf("next=%p (%s) tail=%p\n", next, next?next:"", tailp);
 388  388                  err = ENOENT;
 389  389          }
 390  390          if (tailp)
 391  391                  *tailp = next;
 392  392          if (openedspa)
 393  393                  spa_close(spa, FTAG);
 394  394          *ddp = dd;
 395  395          return (err);
 396  396  }
 397  397  
 398  398  /*
 399  399   * Return the dsl_dir_t, and possibly the last component which couldn't
 400  400   * be found in *tail.  Return NULL if the path is bogus, or if
 401  401   * tail==NULL and we couldn't parse the whole name.  (*tail)[0] == '@'
 402  402   * means that the last component is a snapshot.
 403  403   */
 404  404  int
 405  405  dsl_dir_open(const char *name, void *tag, dsl_dir_t **ddp, const char **tailp)
 406  406  {
 407  407          return (dsl_dir_open_spa(NULL, name, tag, ddp, tailp));
 408  408  }
 409  409  
 410  410  uint64_t
 411  411  dsl_dir_create_sync(dsl_pool_t *dp, dsl_dir_t *pds, const char *name,
 412  412      dmu_tx_t *tx)
 413  413  {
 414  414          objset_t *mos = dp->dp_meta_objset;
 415  415          uint64_t ddobj;
 416  416          dsl_dir_phys_t *ddphys;
 417  417          dmu_buf_t *dbuf;
 418  418  
 419  419          ddobj = dmu_object_alloc(mos, DMU_OT_DSL_DIR, 0,
 420  420              DMU_OT_DSL_DIR, sizeof (dsl_dir_phys_t), tx);
 421  421          if (pds) {
 422  422                  VERIFY(0 == zap_add(mos, pds->dd_phys->dd_child_dir_zapobj,
 423  423                      name, sizeof (uint64_t), 1, &ddobj, tx));
 424  424          } else {
 425  425                  /* it's the root dir */
 426  426                  VERIFY(0 == zap_add(mos, DMU_POOL_DIRECTORY_OBJECT,
 427  427                      DMU_POOL_ROOT_DATASET, sizeof (uint64_t), 1, &ddobj, tx));
 428  428          }
 429  429          VERIFY(0 == dmu_bonus_hold(mos, ddobj, FTAG, &dbuf));
 430  430          dmu_buf_will_dirty(dbuf, tx);
 431  431          ddphys = dbuf->db_data;
 432  432  
 433  433          ddphys->dd_creation_time = gethrestime_sec();
 434  434          if (pds)
 435  435                  ddphys->dd_parent_obj = pds->dd_object;
 436  436          ddphys->dd_props_zapobj = zap_create(mos,
 437  437              DMU_OT_DSL_PROPS, DMU_OT_NONE, 0, tx);
 438  438          ddphys->dd_child_dir_zapobj = zap_create(mos,
 439  439              DMU_OT_DSL_DIR_CHILD_MAP, DMU_OT_NONE, 0, tx);
 440  440          if (spa_version(dp->dp_spa) >= SPA_VERSION_USED_BREAKDOWN)
 441  441                  ddphys->dd_flags |= DD_FLAG_USED_BREAKDOWN;
 442  442          dmu_buf_rele(dbuf, FTAG);
 443  443  
 444  444          return (ddobj);
 445  445  }
 446  446  
 447  447  /* ARGSUSED */
 448  448  int
 449  449  dsl_dir_destroy_check(void *arg1, void *arg2, dmu_tx_t *tx)
 450  450  {
 451  451          dsl_dir_t *dd = arg1;
 452  452          dsl_pool_t *dp = dd->dd_pool;
 453  453          objset_t *mos = dp->dp_meta_objset;
 454  454          int err;
 455  455          uint64_t count;
 456  456  
 457  457          /*
 458  458           * There should be exactly two holds, both from
 459  459           * dsl_dataset_destroy: one on the dd directory, and one on its
 460  460           * head ds.  Otherwise, someone is trying to lookup something
 461  461           * inside this dir while we want to destroy it.  The
 462  462           * config_rwlock ensures that nobody else opens it after we
 463  463           * check.
 464  464           */
 465  465          if (dmu_buf_refcount(dd->dd_dbuf) > 2)
 466  466                  return (EBUSY);
 467  467  
 468  468          err = zap_count(mos, dd->dd_phys->dd_child_dir_zapobj, &count);
 469  469          if (err)
 470  470                  return (err);
 471  471          if (count != 0)
 472  472                  return (EEXIST);
 473  473  
 474  474          return (0);
 475  475  }
 476  476  
 477  477  void
 478  478  dsl_dir_destroy_sync(void *arg1, void *tag, dmu_tx_t *tx)
 479  479  {
 480  480          dsl_dir_t *dd = arg1;
 481  481          objset_t *mos = dd->dd_pool->dp_meta_objset;
 482  482          uint64_t obj;
 483  483          dd_used_t t;
  
    | 
      ↓ open down ↓ | 
    483 lines elided | 
    
      ↑ open up ↑ | 
  
 484  484  
 485  485          ASSERT(RW_WRITE_HELD(&dd->dd_pool->dp_config_rwlock));
 486  486          ASSERT(dd->dd_phys->dd_head_dataset_obj == 0);
 487  487  
 488  488          /*
 489  489           * Remove our reservation. The impl() routine avoids setting the
 490  490           * actual property, which would require the (already destroyed) ds.
 491  491           */
 492  492          dsl_dir_set_reservation_sync_impl(dd, 0, tx);
 493  493  
 494      -        ASSERT3U(dd->dd_phys->dd_used_bytes, ==, 0);
 495      -        ASSERT3U(dd->dd_phys->dd_reserved, ==, 0);
      494 +        ASSERT0(dd->dd_phys->dd_used_bytes);
      495 +        ASSERT0(dd->dd_phys->dd_reserved);
 496  496          for (t = 0; t < DD_USED_NUM; t++)
 497      -                ASSERT3U(dd->dd_phys->dd_used_breakdown[t], ==, 0);
      497 +                ASSERT0(dd->dd_phys->dd_used_breakdown[t]);
 498  498  
 499  499          VERIFY(0 == zap_destroy(mos, dd->dd_phys->dd_child_dir_zapobj, tx));
 500  500          VERIFY(0 == zap_destroy(mos, dd->dd_phys->dd_props_zapobj, tx));
 501  501          VERIFY(0 == dsl_deleg_destroy(mos, dd->dd_phys->dd_deleg_zapobj, tx));
 502  502          VERIFY(0 == zap_remove(mos,
 503  503              dd->dd_parent->dd_phys->dd_child_dir_zapobj, dd->dd_myname, tx));
 504  504  
 505  505          obj = dd->dd_object;
 506  506          dsl_dir_close(dd, tag);
 507  507          VERIFY(0 == dmu_object_free(mos, obj, tx));
 508  508  }
 509  509  
 510  510  boolean_t
 511  511  dsl_dir_is_clone(dsl_dir_t *dd)
 512  512  {
 513  513          return (dd->dd_phys->dd_origin_obj &&
 514  514              (dd->dd_pool->dp_origin_snap == NULL ||
 515  515              dd->dd_phys->dd_origin_obj !=
 516  516              dd->dd_pool->dp_origin_snap->ds_object));
 517  517  }
 518  518  
 519  519  void
 520  520  dsl_dir_stats(dsl_dir_t *dd, nvlist_t *nv)
 521  521  {
 522  522          mutex_enter(&dd->dd_lock);
 523  523          dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USED,
 524  524              dd->dd_phys->dd_used_bytes);
 525  525          dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_QUOTA, dd->dd_phys->dd_quota);
 526  526          dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_RESERVATION,
 527  527              dd->dd_phys->dd_reserved);
 528  528          dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_COMPRESSRATIO,
 529  529              dd->dd_phys->dd_compressed_bytes == 0 ? 100 :
 530  530              (dd->dd_phys->dd_uncompressed_bytes * 100 /
 531  531              dd->dd_phys->dd_compressed_bytes));
 532  532          if (dd->dd_phys->dd_flags & DD_FLAG_USED_BREAKDOWN) {
 533  533                  dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USEDSNAP,
 534  534                      dd->dd_phys->dd_used_breakdown[DD_USED_SNAP]);
 535  535                  dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USEDDS,
 536  536                      dd->dd_phys->dd_used_breakdown[DD_USED_HEAD]);
 537  537                  dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USEDREFRESERV,
 538  538                      dd->dd_phys->dd_used_breakdown[DD_USED_REFRSRV]);
 539  539                  dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USEDCHILD,
 540  540                      dd->dd_phys->dd_used_breakdown[DD_USED_CHILD] +
 541  541                      dd->dd_phys->dd_used_breakdown[DD_USED_CHILD_RSRV]);
 542  542          }
 543  543          mutex_exit(&dd->dd_lock);
 544  544  
 545  545          rw_enter(&dd->dd_pool->dp_config_rwlock, RW_READER);
 546  546          if (dsl_dir_is_clone(dd)) {
 547  547                  dsl_dataset_t *ds;
 548  548                  char buf[MAXNAMELEN];
 549  549  
 550  550                  VERIFY(0 == dsl_dataset_hold_obj(dd->dd_pool,
 551  551                      dd->dd_phys->dd_origin_obj, FTAG, &ds));
 552  552                  dsl_dataset_name(ds, buf);
 553  553                  dsl_dataset_rele(ds, FTAG);
 554  554                  dsl_prop_nvlist_add_string(nv, ZFS_PROP_ORIGIN, buf);
 555  555          }
 556  556          rw_exit(&dd->dd_pool->dp_config_rwlock);
 557  557  }
 558  558  
 559  559  void
 560  560  dsl_dir_dirty(dsl_dir_t *dd, dmu_tx_t *tx)
 561  561  {
 562  562          dsl_pool_t *dp = dd->dd_pool;
 563  563  
 564  564          ASSERT(dd->dd_phys);
 565  565  
 566  566          if (txg_list_add(&dp->dp_dirty_dirs, dd, tx->tx_txg) == 0) {
 567  567                  /* up the hold count until we can be written out */
 568  568                  dmu_buf_add_ref(dd->dd_dbuf, dd);
 569  569          }
 570  570  }
 571  571  
 572  572  static int64_t
 573  573  parent_delta(dsl_dir_t *dd, uint64_t used, int64_t delta)
 574  574  {
 575  575          uint64_t old_accounted = MAX(used, dd->dd_phys->dd_reserved);
 576  576          uint64_t new_accounted = MAX(used + delta, dd->dd_phys->dd_reserved);
 577  577          return (new_accounted - old_accounted);
  
    | 
      ↓ open down ↓ | 
    70 lines elided | 
    
      ↑ open up ↑ | 
  
 578  578  }
 579  579  
 580  580  void
 581  581  dsl_dir_sync(dsl_dir_t *dd, dmu_tx_t *tx)
 582  582  {
 583  583          ASSERT(dmu_tx_is_syncing(tx));
 584  584  
 585  585          dmu_buf_will_dirty(dd->dd_dbuf, tx);
 586  586  
 587  587          mutex_enter(&dd->dd_lock);
 588      -        ASSERT3U(dd->dd_tempreserved[tx->tx_txg&TXG_MASK], ==, 0);
      588 +        ASSERT0(dd->dd_tempreserved[tx->tx_txg&TXG_MASK]);
 589  589          dprintf_dd(dd, "txg=%llu towrite=%lluK\n", tx->tx_txg,
 590  590              dd->dd_space_towrite[tx->tx_txg&TXG_MASK] / 1024);
 591  591          dd->dd_space_towrite[tx->tx_txg&TXG_MASK] = 0;
 592  592          mutex_exit(&dd->dd_lock);
 593  593  
 594  594          /* release the hold from dsl_dir_dirty */
 595  595          dmu_buf_rele(dd->dd_dbuf, dd);
 596  596  }
 597  597  
 598  598  static uint64_t
 599  599  dsl_dir_space_towrite(dsl_dir_t *dd)
 600  600  {
 601  601          uint64_t space = 0;
 602  602          int i;
 603  603  
 604  604          ASSERT(MUTEX_HELD(&dd->dd_lock));
 605  605  
 606  606          for (i = 0; i < TXG_SIZE; i++) {
 607  607                  space += dd->dd_space_towrite[i&TXG_MASK];
 608  608                  ASSERT3U(dd->dd_space_towrite[i&TXG_MASK], >=, 0);
 609  609          }
 610  610          return (space);
 611  611  }
 612  612  
 613  613  /*
 614  614   * How much space would dd have available if ancestor had delta applied
 615  615   * to it?  If ondiskonly is set, we're only interested in what's
 616  616   * on-disk, not estimated pending changes.
 617  617   */
 618  618  uint64_t
 619  619  dsl_dir_space_available(dsl_dir_t *dd,
 620  620      dsl_dir_t *ancestor, int64_t delta, int ondiskonly)
 621  621  {
 622  622          uint64_t parentspace, myspace, quota, used;
 623  623  
 624  624          /*
 625  625           * If there are no restrictions otherwise, assume we have
 626  626           * unlimited space available.
 627  627           */
 628  628          quota = UINT64_MAX;
 629  629          parentspace = UINT64_MAX;
 630  630  
 631  631          if (dd->dd_parent != NULL) {
 632  632                  parentspace = dsl_dir_space_available(dd->dd_parent,
 633  633                      ancestor, delta, ondiskonly);
 634  634          }
 635  635  
 636  636          mutex_enter(&dd->dd_lock);
 637  637          if (dd->dd_phys->dd_quota != 0)
 638  638                  quota = dd->dd_phys->dd_quota;
 639  639          used = dd->dd_phys->dd_used_bytes;
 640  640          if (!ondiskonly)
 641  641                  used += dsl_dir_space_towrite(dd);
 642  642  
 643  643          if (dd->dd_parent == NULL) {
 644  644                  uint64_t poolsize = dsl_pool_adjustedsize(dd->dd_pool, FALSE);
 645  645                  quota = MIN(quota, poolsize);
 646  646          }
 647  647  
 648  648          if (dd->dd_phys->dd_reserved > used && parentspace != UINT64_MAX) {
 649  649                  /*
 650  650                   * We have some space reserved, in addition to what our
 651  651                   * parent gave us.
 652  652                   */
 653  653                  parentspace += dd->dd_phys->dd_reserved - used;
 654  654          }
 655  655  
 656  656          if (dd == ancestor) {
 657  657                  ASSERT(delta <= 0);
 658  658                  ASSERT(used >= -delta);
 659  659                  used += delta;
 660  660                  if (parentspace != UINT64_MAX)
 661  661                          parentspace -= delta;
 662  662          }
 663  663  
 664  664          if (used > quota) {
 665  665                  /* over quota */
 666  666                  myspace = 0;
 667  667          } else {
 668  668                  /*
 669  669                   * the lesser of the space provided by our parent and
 670  670                   * the space left in our quota
 671  671                   */
 672  672                  myspace = MIN(parentspace, quota - used);
 673  673          }
 674  674  
 675  675          mutex_exit(&dd->dd_lock);
 676  676  
 677  677          return (myspace);
 678  678  }
 679  679  
 680  680  struct tempreserve {
 681  681          list_node_t tr_node;
 682  682          dsl_pool_t *tr_dp;
 683  683          dsl_dir_t *tr_ds;
 684  684          uint64_t tr_size;
 685  685  };
 686  686  
 687  687  static int
 688  688  dsl_dir_tempreserve_impl(dsl_dir_t *dd, uint64_t asize, boolean_t netfree,
 689  689      boolean_t ignorequota, boolean_t checkrefquota, list_t *tr_list,
 690  690      dmu_tx_t *tx, boolean_t first)
 691  691  {
 692  692          uint64_t txg = tx->tx_txg;
 693  693          uint64_t est_inflight, used_on_disk, quota, parent_rsrv;
 694  694          uint64_t deferred = 0;
 695  695          struct tempreserve *tr;
 696  696          int retval = EDQUOT;
 697  697          int txgidx = txg & TXG_MASK;
 698  698          int i;
 699  699          uint64_t ref_rsrv = 0;
 700  700  
 701  701          ASSERT3U(txg, !=, 0);
 702  702          ASSERT3S(asize, >, 0);
 703  703  
 704  704          mutex_enter(&dd->dd_lock);
 705  705  
 706  706          /*
 707  707           * Check against the dsl_dir's quota.  We don't add in the delta
 708  708           * when checking for over-quota because they get one free hit.
 709  709           */
 710  710          est_inflight = dsl_dir_space_towrite(dd);
 711  711          for (i = 0; i < TXG_SIZE; i++)
 712  712                  est_inflight += dd->dd_tempreserved[i];
 713  713          used_on_disk = dd->dd_phys->dd_used_bytes;
 714  714  
 715  715          /*
 716  716           * On the first iteration, fetch the dataset's used-on-disk and
 717  717           * refreservation values. Also, if checkrefquota is set, test if
 718  718           * allocating this space would exceed the dataset's refquota.
 719  719           */
 720  720          if (first && tx->tx_objset) {
 721  721                  int error;
 722  722                  dsl_dataset_t *ds = tx->tx_objset->os_dsl_dataset;
 723  723  
 724  724                  error = dsl_dataset_check_quota(ds, checkrefquota,
 725  725                      asize, est_inflight, &used_on_disk, &ref_rsrv);
 726  726                  if (error) {
 727  727                          mutex_exit(&dd->dd_lock);
 728  728                          return (error);
 729  729                  }
 730  730          }
 731  731  
 732  732          /*
 733  733           * If this transaction will result in a net free of space,
 734  734           * we want to let it through.
 735  735           */
 736  736          if (ignorequota || netfree || dd->dd_phys->dd_quota == 0)
 737  737                  quota = UINT64_MAX;
 738  738          else
 739  739                  quota = dd->dd_phys->dd_quota;
 740  740  
 741  741          /*
 742  742           * Adjust the quota against the actual pool size at the root
 743  743           * minus any outstanding deferred frees.
 744  744           * To ensure that it's possible to remove files from a full
 745  745           * pool without inducing transient overcommits, we throttle
 746  746           * netfree transactions against a quota that is slightly larger,
 747  747           * but still within the pool's allocation slop.  In cases where
 748  748           * we're very close to full, this will allow a steady trickle of
 749  749           * removes to get through.
 750  750           */
 751  751          if (dd->dd_parent == NULL) {
 752  752                  spa_t *spa = dd->dd_pool->dp_spa;
 753  753                  uint64_t poolsize = dsl_pool_adjustedsize(dd->dd_pool, netfree);
 754  754                  deferred = metaslab_class_get_deferred(spa_normal_class(spa));
 755  755                  if (poolsize - deferred < quota) {
 756  756                          quota = poolsize - deferred;
 757  757                          retval = ENOSPC;
 758  758                  }
 759  759          }
 760  760  
 761  761          /*
 762  762           * If they are requesting more space, and our current estimate
 763  763           * is over quota, they get to try again unless the actual
 764  764           * on-disk is over quota and there are no pending changes (which
 765  765           * may free up space for us).
 766  766           */
 767  767          if (used_on_disk + est_inflight >= quota) {
 768  768                  if (est_inflight > 0 || used_on_disk < quota ||
 769  769                      (retval == ENOSPC && used_on_disk < quota + deferred))
 770  770                          retval = ERESTART;
 771  771                  dprintf_dd(dd, "failing: used=%lluK inflight = %lluK "
 772  772                      "quota=%lluK tr=%lluK err=%d\n",
 773  773                      used_on_disk>>10, est_inflight>>10,
 774  774                      quota>>10, asize>>10, retval);
 775  775                  mutex_exit(&dd->dd_lock);
 776  776                  return (retval);
 777  777          }
 778  778  
 779  779          /* We need to up our estimated delta before dropping dd_lock */
 780  780          dd->dd_tempreserved[txgidx] += asize;
 781  781  
 782  782          parent_rsrv = parent_delta(dd, used_on_disk + est_inflight,
 783  783              asize - ref_rsrv);
 784  784          mutex_exit(&dd->dd_lock);
 785  785  
 786  786          tr = kmem_zalloc(sizeof (struct tempreserve), KM_SLEEP);
 787  787          tr->tr_ds = dd;
 788  788          tr->tr_size = asize;
 789  789          list_insert_tail(tr_list, tr);
 790  790  
 791  791          /* see if it's OK with our parent */
 792  792          if (dd->dd_parent && parent_rsrv) {
 793  793                  boolean_t ismos = (dd->dd_phys->dd_head_dataset_obj == 0);
 794  794  
 795  795                  return (dsl_dir_tempreserve_impl(dd->dd_parent,
 796  796                      parent_rsrv, netfree, ismos, TRUE, tr_list, tx, FALSE));
 797  797          } else {
 798  798                  return (0);
 799  799          }
 800  800  }
 801  801  
 802  802  /*
 803  803   * Reserve space in this dsl_dir, to be used in this tx's txg.
 804  804   * After the space has been dirtied (and dsl_dir_willuse_space()
 805  805   * has been called), the reservation should be canceled, using
 806  806   * dsl_dir_tempreserve_clear().
 807  807   */
 808  808  int
 809  809  dsl_dir_tempreserve_space(dsl_dir_t *dd, uint64_t lsize, uint64_t asize,
 810  810      uint64_t fsize, uint64_t usize, void **tr_cookiep, dmu_tx_t *tx)
 811  811  {
 812  812          int err;
 813  813          list_t *tr_list;
 814  814  
 815  815          if (asize == 0) {
 816  816                  *tr_cookiep = NULL;
 817  817                  return (0);
 818  818          }
 819  819  
 820  820          tr_list = kmem_alloc(sizeof (list_t), KM_SLEEP);
 821  821          list_create(tr_list, sizeof (struct tempreserve),
 822  822              offsetof(struct tempreserve, tr_node));
 823  823          ASSERT3S(asize, >, 0);
 824  824          ASSERT3S(fsize, >=, 0);
 825  825  
 826  826          err = arc_tempreserve_space(lsize, tx->tx_txg);
 827  827          if (err == 0) {
 828  828                  struct tempreserve *tr;
 829  829  
 830  830                  tr = kmem_zalloc(sizeof (struct tempreserve), KM_SLEEP);
 831  831                  tr->tr_size = lsize;
 832  832                  list_insert_tail(tr_list, tr);
 833  833  
 834  834                  err = dsl_pool_tempreserve_space(dd->dd_pool, asize, tx);
 835  835          } else {
 836  836                  if (err == EAGAIN) {
 837  837                          txg_delay(dd->dd_pool, tx->tx_txg, 1);
 838  838                          err = ERESTART;
 839  839                  }
 840  840                  dsl_pool_memory_pressure(dd->dd_pool);
 841  841          }
 842  842  
 843  843          if (err == 0) {
 844  844                  struct tempreserve *tr;
 845  845  
 846  846                  tr = kmem_zalloc(sizeof (struct tempreserve), KM_SLEEP);
 847  847                  tr->tr_dp = dd->dd_pool;
 848  848                  tr->tr_size = asize;
 849  849                  list_insert_tail(tr_list, tr);
 850  850  
 851  851                  err = dsl_dir_tempreserve_impl(dd, asize, fsize >= asize,
 852  852                      FALSE, asize > usize, tr_list, tx, TRUE);
 853  853          }
 854  854  
 855  855          if (err)
 856  856                  dsl_dir_tempreserve_clear(tr_list, tx);
 857  857          else
 858  858                  *tr_cookiep = tr_list;
 859  859  
 860  860          return (err);
 861  861  }
 862  862  
 863  863  /*
 864  864   * Clear a temporary reservation that we previously made with
 865  865   * dsl_dir_tempreserve_space().
 866  866   */
 867  867  void
 868  868  dsl_dir_tempreserve_clear(void *tr_cookie, dmu_tx_t *tx)
 869  869  {
 870  870          int txgidx = tx->tx_txg & TXG_MASK;
 871  871          list_t *tr_list = tr_cookie;
 872  872          struct tempreserve *tr;
 873  873  
 874  874          ASSERT3U(tx->tx_txg, !=, 0);
 875  875  
 876  876          if (tr_cookie == NULL)
 877  877                  return;
 878  878  
 879  879          while (tr = list_head(tr_list)) {
 880  880                  if (tr->tr_dp) {
 881  881                          dsl_pool_tempreserve_clear(tr->tr_dp, tr->tr_size, tx);
 882  882                  } else if (tr->tr_ds) {
 883  883                          mutex_enter(&tr->tr_ds->dd_lock);
 884  884                          ASSERT3U(tr->tr_ds->dd_tempreserved[txgidx], >=,
 885  885                              tr->tr_size);
 886  886                          tr->tr_ds->dd_tempreserved[txgidx] -= tr->tr_size;
 887  887                          mutex_exit(&tr->tr_ds->dd_lock);
 888  888                  } else {
 889  889                          arc_tempreserve_clear(tr->tr_size);
 890  890                  }
 891  891                  list_remove(tr_list, tr);
 892  892                  kmem_free(tr, sizeof (struct tempreserve));
 893  893          }
 894  894  
 895  895          kmem_free(tr_list, sizeof (list_t));
 896  896  }
 897  897  
 898  898  static void
 899  899  dsl_dir_willuse_space_impl(dsl_dir_t *dd, int64_t space, dmu_tx_t *tx)
 900  900  {
 901  901          int64_t parent_space;
 902  902          uint64_t est_used;
 903  903  
 904  904          mutex_enter(&dd->dd_lock);
 905  905          if (space > 0)
 906  906                  dd->dd_space_towrite[tx->tx_txg & TXG_MASK] += space;
 907  907  
 908  908          est_used = dsl_dir_space_towrite(dd) + dd->dd_phys->dd_used_bytes;
 909  909          parent_space = parent_delta(dd, est_used, space);
 910  910          mutex_exit(&dd->dd_lock);
 911  911  
 912  912          /* Make sure that we clean up dd_space_to* */
 913  913          dsl_dir_dirty(dd, tx);
 914  914  
 915  915          /* XXX this is potentially expensive and unnecessary... */
 916  916          if (parent_space && dd->dd_parent)
 917  917                  dsl_dir_willuse_space_impl(dd->dd_parent, parent_space, tx);
 918  918  }
 919  919  
 920  920  /*
 921  921   * Call in open context when we think we're going to write/free space,
 922  922   * eg. when dirtying data.  Be conservative (ie. OK to write less than
 923  923   * this or free more than this, but don't write more or free less).
 924  924   */
 925  925  void
 926  926  dsl_dir_willuse_space(dsl_dir_t *dd, int64_t space, dmu_tx_t *tx)
 927  927  {
 928  928          dsl_pool_willuse_space(dd->dd_pool, space, tx);
 929  929          dsl_dir_willuse_space_impl(dd, space, tx);
 930  930  }
 931  931  
 932  932  /* call from syncing context when we actually write/free space for this dd */
 933  933  void
 934  934  dsl_dir_diduse_space(dsl_dir_t *dd, dd_used_t type,
 935  935      int64_t used, int64_t compressed, int64_t uncompressed, dmu_tx_t *tx)
 936  936  {
 937  937          int64_t accounted_delta;
 938  938          boolean_t needlock = !MUTEX_HELD(&dd->dd_lock);
 939  939  
 940  940          ASSERT(dmu_tx_is_syncing(tx));
 941  941          ASSERT(type < DD_USED_NUM);
 942  942  
 943  943          dsl_dir_dirty(dd, tx);
 944  944  
 945  945          if (needlock)
 946  946                  mutex_enter(&dd->dd_lock);
 947  947          accounted_delta = parent_delta(dd, dd->dd_phys->dd_used_bytes, used);
 948  948          ASSERT(used >= 0 || dd->dd_phys->dd_used_bytes >= -used);
 949  949          ASSERT(compressed >= 0 ||
 950  950              dd->dd_phys->dd_compressed_bytes >= -compressed);
 951  951          ASSERT(uncompressed >= 0 ||
 952  952              dd->dd_phys->dd_uncompressed_bytes >= -uncompressed);
 953  953          dd->dd_phys->dd_used_bytes += used;
 954  954          dd->dd_phys->dd_uncompressed_bytes += uncompressed;
 955  955          dd->dd_phys->dd_compressed_bytes += compressed;
 956  956  
 957  957          if (dd->dd_phys->dd_flags & DD_FLAG_USED_BREAKDOWN) {
 958  958                  ASSERT(used > 0 ||
 959  959                      dd->dd_phys->dd_used_breakdown[type] >= -used);
 960  960                  dd->dd_phys->dd_used_breakdown[type] += used;
 961  961  #ifdef DEBUG
 962  962                  dd_used_t t;
 963  963                  uint64_t u = 0;
 964  964                  for (t = 0; t < DD_USED_NUM; t++)
 965  965                          u += dd->dd_phys->dd_used_breakdown[t];
 966  966                  ASSERT3U(u, ==, dd->dd_phys->dd_used_bytes);
 967  967  #endif
 968  968          }
 969  969          if (needlock)
 970  970                  mutex_exit(&dd->dd_lock);
 971  971  
 972  972          if (dd->dd_parent != NULL) {
 973  973                  dsl_dir_diduse_space(dd->dd_parent, DD_USED_CHILD,
 974  974                      accounted_delta, compressed, uncompressed, tx);
 975  975                  dsl_dir_transfer_space(dd->dd_parent,
 976  976                      used - accounted_delta,
 977  977                      DD_USED_CHILD_RSRV, DD_USED_CHILD, tx);
 978  978          }
 979  979  }
 980  980  
 981  981  void
 982  982  dsl_dir_transfer_space(dsl_dir_t *dd, int64_t delta,
 983  983      dd_used_t oldtype, dd_used_t newtype, dmu_tx_t *tx)
 984  984  {
 985  985          boolean_t needlock = !MUTEX_HELD(&dd->dd_lock);
 986  986  
 987  987          ASSERT(dmu_tx_is_syncing(tx));
 988  988          ASSERT(oldtype < DD_USED_NUM);
 989  989          ASSERT(newtype < DD_USED_NUM);
 990  990  
 991  991          if (delta == 0 || !(dd->dd_phys->dd_flags & DD_FLAG_USED_BREAKDOWN))
 992  992                  return;
 993  993  
 994  994          dsl_dir_dirty(dd, tx);
 995  995          if (needlock)
 996  996                  mutex_enter(&dd->dd_lock);
 997  997          ASSERT(delta > 0 ?
 998  998              dd->dd_phys->dd_used_breakdown[oldtype] >= delta :
 999  999              dd->dd_phys->dd_used_breakdown[newtype] >= -delta);
1000 1000          ASSERT(dd->dd_phys->dd_used_bytes >= ABS(delta));
1001 1001          dd->dd_phys->dd_used_breakdown[oldtype] -= delta;
1002 1002          dd->dd_phys->dd_used_breakdown[newtype] += delta;
1003 1003          if (needlock)
1004 1004                  mutex_exit(&dd->dd_lock);
1005 1005  }
1006 1006  
1007 1007  static int
1008 1008  dsl_dir_set_quota_check(void *arg1, void *arg2, dmu_tx_t *tx)
1009 1009  {
1010 1010          dsl_dataset_t *ds = arg1;
1011 1011          dsl_dir_t *dd = ds->ds_dir;
1012 1012          dsl_prop_setarg_t *psa = arg2;
1013 1013          int err;
1014 1014          uint64_t towrite;
1015 1015  
1016 1016          if ((err = dsl_prop_predict_sync(ds->ds_dir, psa)) != 0)
1017 1017                  return (err);
1018 1018  
1019 1019          if (psa->psa_effective_value == 0)
1020 1020                  return (0);
1021 1021  
1022 1022          mutex_enter(&dd->dd_lock);
1023 1023          /*
1024 1024           * If we are doing the preliminary check in open context, and
1025 1025           * there are pending changes, then don't fail it, since the
1026 1026           * pending changes could under-estimate the amount of space to be
1027 1027           * freed up.
1028 1028           */
1029 1029          towrite = dsl_dir_space_towrite(dd);
1030 1030          if ((dmu_tx_is_syncing(tx) || towrite == 0) &&
1031 1031              (psa->psa_effective_value < dd->dd_phys->dd_reserved ||
1032 1032              psa->psa_effective_value < dd->dd_phys->dd_used_bytes + towrite)) {
1033 1033                  err = ENOSPC;
1034 1034          }
1035 1035          mutex_exit(&dd->dd_lock);
1036 1036          return (err);
1037 1037  }
1038 1038  
1039 1039  extern dsl_syncfunc_t dsl_prop_set_sync;
1040 1040  
1041 1041  static void
1042 1042  dsl_dir_set_quota_sync(void *arg1, void *arg2, dmu_tx_t *tx)
1043 1043  {
1044 1044          dsl_dataset_t *ds = arg1;
1045 1045          dsl_dir_t *dd = ds->ds_dir;
1046 1046          dsl_prop_setarg_t *psa = arg2;
1047 1047          uint64_t effective_value = psa->psa_effective_value;
1048 1048  
1049 1049          dsl_prop_set_sync(ds, psa, tx);
1050 1050          DSL_PROP_CHECK_PREDICTION(dd, psa);
1051 1051  
1052 1052          dmu_buf_will_dirty(dd->dd_dbuf, tx);
1053 1053  
1054 1054          mutex_enter(&dd->dd_lock);
1055 1055          dd->dd_phys->dd_quota = effective_value;
1056 1056          mutex_exit(&dd->dd_lock);
1057 1057  
1058 1058          spa_history_log_internal_dd(dd, "set quota", tx,
1059 1059              "quota=%lld", (longlong_t)effective_value);
1060 1060  }
1061 1061  
1062 1062  int
1063 1063  dsl_dir_set_quota(const char *ddname, zprop_source_t source, uint64_t quota)
1064 1064  {
1065 1065          dsl_dir_t *dd;
1066 1066          dsl_dataset_t *ds;
1067 1067          dsl_prop_setarg_t psa;
1068 1068          int err;
1069 1069  
1070 1070          dsl_prop_setarg_init_uint64(&psa, "quota", source, "a);
1071 1071  
1072 1072          err = dsl_dataset_hold(ddname, FTAG, &ds);
1073 1073          if (err)
1074 1074                  return (err);
1075 1075  
1076 1076          err = dsl_dir_open(ddname, FTAG, &dd, NULL);
1077 1077          if (err) {
1078 1078                  dsl_dataset_rele(ds, FTAG);
1079 1079                  return (err);
1080 1080          }
1081 1081  
1082 1082          ASSERT(ds->ds_dir == dd);
1083 1083  
1084 1084          /*
1085 1085           * If someone removes a file, then tries to set the quota, we want to
1086 1086           * make sure the file freeing takes effect.
1087 1087           */
1088 1088          txg_wait_open(dd->dd_pool, 0);
1089 1089  
1090 1090          err = dsl_sync_task_do(dd->dd_pool, dsl_dir_set_quota_check,
1091 1091              dsl_dir_set_quota_sync, ds, &psa, 0);
1092 1092  
1093 1093          dsl_dir_close(dd, FTAG);
1094 1094          dsl_dataset_rele(ds, FTAG);
1095 1095          return (err);
1096 1096  }
1097 1097  
1098 1098  int
1099 1099  dsl_dir_set_reservation_check(void *arg1, void *arg2, dmu_tx_t *tx)
1100 1100  {
1101 1101          dsl_dataset_t *ds = arg1;
1102 1102          dsl_dir_t *dd = ds->ds_dir;
1103 1103          dsl_prop_setarg_t *psa = arg2;
1104 1104          uint64_t effective_value;
1105 1105          uint64_t used, avail;
1106 1106          int err;
1107 1107  
1108 1108          if ((err = dsl_prop_predict_sync(ds->ds_dir, psa)) != 0)
1109 1109                  return (err);
1110 1110  
1111 1111          effective_value = psa->psa_effective_value;
1112 1112  
1113 1113          /*
1114 1114           * If we are doing the preliminary check in open context, the
1115 1115           * space estimates may be inaccurate.
1116 1116           */
1117 1117          if (!dmu_tx_is_syncing(tx))
1118 1118                  return (0);
1119 1119  
1120 1120          mutex_enter(&dd->dd_lock);
1121 1121          used = dd->dd_phys->dd_used_bytes;
1122 1122          mutex_exit(&dd->dd_lock);
1123 1123  
1124 1124          if (dd->dd_parent) {
1125 1125                  avail = dsl_dir_space_available(dd->dd_parent,
1126 1126                      NULL, 0, FALSE);
1127 1127          } else {
1128 1128                  avail = dsl_pool_adjustedsize(dd->dd_pool, B_FALSE) - used;
1129 1129          }
1130 1130  
1131 1131          if (MAX(used, effective_value) > MAX(used, dd->dd_phys->dd_reserved)) {
1132 1132                  uint64_t delta = MAX(used, effective_value) -
1133 1133                      MAX(used, dd->dd_phys->dd_reserved);
1134 1134  
1135 1135                  if (delta > avail)
1136 1136                          return (ENOSPC);
1137 1137                  if (dd->dd_phys->dd_quota > 0 &&
1138 1138                      effective_value > dd->dd_phys->dd_quota)
1139 1139                          return (ENOSPC);
1140 1140          }
1141 1141  
1142 1142          return (0);
1143 1143  }
1144 1144  
1145 1145  static void
1146 1146  dsl_dir_set_reservation_sync_impl(dsl_dir_t *dd, uint64_t value, dmu_tx_t *tx)
1147 1147  {
1148 1148          uint64_t used;
1149 1149          int64_t delta;
1150 1150  
1151 1151          dmu_buf_will_dirty(dd->dd_dbuf, tx);
1152 1152  
1153 1153          mutex_enter(&dd->dd_lock);
1154 1154          used = dd->dd_phys->dd_used_bytes;
1155 1155          delta = MAX(used, value) - MAX(used, dd->dd_phys->dd_reserved);
1156 1156          dd->dd_phys->dd_reserved = value;
1157 1157  
1158 1158          if (dd->dd_parent != NULL) {
1159 1159                  /* Roll up this additional usage into our ancestors */
1160 1160                  dsl_dir_diduse_space(dd->dd_parent, DD_USED_CHILD_RSRV,
1161 1161                      delta, 0, 0, tx);
1162 1162          }
1163 1163          mutex_exit(&dd->dd_lock);
1164 1164  }
1165 1165  
1166 1166  
1167 1167  static void
1168 1168  dsl_dir_set_reservation_sync(void *arg1, void *arg2, dmu_tx_t *tx)
1169 1169  {
1170 1170          dsl_dataset_t *ds = arg1;
1171 1171          dsl_dir_t *dd = ds->ds_dir;
1172 1172          dsl_prop_setarg_t *psa = arg2;
1173 1173          uint64_t value = psa->psa_effective_value;
1174 1174  
1175 1175          dsl_prop_set_sync(ds, psa, tx);
1176 1176          DSL_PROP_CHECK_PREDICTION(dd, psa);
1177 1177  
1178 1178          dsl_dir_set_reservation_sync_impl(dd, value, tx);
1179 1179  
1180 1180          spa_history_log_internal_dd(dd, "set reservation", tx,
1181 1181              "reservation=%lld", (longlong_t)value);
1182 1182  }
1183 1183  
1184 1184  int
1185 1185  dsl_dir_set_reservation(const char *ddname, zprop_source_t source,
1186 1186      uint64_t reservation)
1187 1187  {
1188 1188          dsl_dir_t *dd;
1189 1189          dsl_dataset_t *ds;
1190 1190          dsl_prop_setarg_t psa;
1191 1191          int err;
1192 1192  
1193 1193          dsl_prop_setarg_init_uint64(&psa, "reservation", source, &reservation);
1194 1194  
1195 1195          err = dsl_dataset_hold(ddname, FTAG, &ds);
1196 1196          if (err)
1197 1197                  return (err);
1198 1198  
1199 1199          err = dsl_dir_open(ddname, FTAG, &dd, NULL);
1200 1200          if (err) {
1201 1201                  dsl_dataset_rele(ds, FTAG);
1202 1202                  return (err);
1203 1203          }
1204 1204  
1205 1205          ASSERT(ds->ds_dir == dd);
1206 1206  
1207 1207          err = dsl_sync_task_do(dd->dd_pool, dsl_dir_set_reservation_check,
1208 1208              dsl_dir_set_reservation_sync, ds, &psa, 0);
1209 1209  
1210 1210          dsl_dir_close(dd, FTAG);
1211 1211          dsl_dataset_rele(ds, FTAG);
1212 1212          return (err);
1213 1213  }
1214 1214  
1215 1215  static dsl_dir_t *
1216 1216  closest_common_ancestor(dsl_dir_t *ds1, dsl_dir_t *ds2)
1217 1217  {
1218 1218          for (; ds1; ds1 = ds1->dd_parent) {
1219 1219                  dsl_dir_t *dd;
1220 1220                  for (dd = ds2; dd; dd = dd->dd_parent) {
1221 1221                          if (ds1 == dd)
1222 1222                                  return (dd);
1223 1223                  }
1224 1224          }
1225 1225          return (NULL);
1226 1226  }
1227 1227  
1228 1228  /*
1229 1229   * If delta is applied to dd, how much of that delta would be applied to
1230 1230   * ancestor?  Syncing context only.
1231 1231   */
1232 1232  static int64_t
1233 1233  would_change(dsl_dir_t *dd, int64_t delta, dsl_dir_t *ancestor)
1234 1234  {
1235 1235          if (dd == ancestor)
1236 1236                  return (delta);
1237 1237  
1238 1238          mutex_enter(&dd->dd_lock);
1239 1239          delta = parent_delta(dd, dd->dd_phys->dd_used_bytes, delta);
1240 1240          mutex_exit(&dd->dd_lock);
1241 1241          return (would_change(dd->dd_parent, delta, ancestor));
1242 1242  }
1243 1243  
1244 1244  struct renamearg {
1245 1245          dsl_dir_t *newparent;
1246 1246          const char *mynewname;
1247 1247  };
1248 1248  
1249 1249  static int
1250 1250  dsl_dir_rename_check(void *arg1, void *arg2, dmu_tx_t *tx)
1251 1251  {
1252 1252          dsl_dir_t *dd = arg1;
1253 1253          struct renamearg *ra = arg2;
1254 1254          dsl_pool_t *dp = dd->dd_pool;
1255 1255          objset_t *mos = dp->dp_meta_objset;
1256 1256          int err;
1257 1257          uint64_t val;
1258 1258  
1259 1259          /*
1260 1260           * There should only be one reference, from dmu_objset_rename().
1261 1261           * Fleeting holds are also possible (eg, from "zfs list" getting
1262 1262           * stats), but any that are present in open context will likely
1263 1263           * be gone by syncing context, so only fail from syncing
1264 1264           * context.
1265 1265           */
1266 1266          if (dmu_tx_is_syncing(tx) && dmu_buf_refcount(dd->dd_dbuf) > 1)
1267 1267                  return (EBUSY);
1268 1268  
1269 1269          /* check for existing name */
1270 1270          err = zap_lookup(mos, ra->newparent->dd_phys->dd_child_dir_zapobj,
1271 1271              ra->mynewname, 8, 1, &val);
1272 1272          if (err == 0)
1273 1273                  return (EEXIST);
1274 1274          if (err != ENOENT)
1275 1275                  return (err);
1276 1276  
1277 1277          if (ra->newparent != dd->dd_parent) {
1278 1278                  /* is there enough space? */
1279 1279                  uint64_t myspace =
1280 1280                      MAX(dd->dd_phys->dd_used_bytes, dd->dd_phys->dd_reserved);
1281 1281  
1282 1282                  /* no rename into our descendant */
1283 1283                  if (closest_common_ancestor(dd, ra->newparent) == dd)
1284 1284                          return (EINVAL);
1285 1285  
1286 1286                  if (err = dsl_dir_transfer_possible(dd->dd_parent,
1287 1287                      ra->newparent, myspace))
1288 1288                          return (err);
1289 1289          }
1290 1290  
1291 1291          return (0);
1292 1292  }
1293 1293  
1294 1294  static void
1295 1295  dsl_dir_rename_sync(void *arg1, void *arg2, dmu_tx_t *tx)
1296 1296  {
1297 1297          dsl_dir_t *dd = arg1;
1298 1298          struct renamearg *ra = arg2;
1299 1299          dsl_pool_t *dp = dd->dd_pool;
1300 1300          objset_t *mos = dp->dp_meta_objset;
1301 1301          int err;
1302 1302          char namebuf[MAXNAMELEN];
1303 1303  
1304 1304          ASSERT(dmu_buf_refcount(dd->dd_dbuf) <= 2);
1305 1305  
1306 1306          /* Log this before we change the name. */
1307 1307          dsl_dir_name(ra->newparent, namebuf);
1308 1308          spa_history_log_internal_dd(dd, "rename", tx,
1309 1309              "-> %s/%s", namebuf, ra->mynewname);
1310 1310  
1311 1311          if (ra->newparent != dd->dd_parent) {
1312 1312                  dsl_dir_diduse_space(dd->dd_parent, DD_USED_CHILD,
1313 1313                      -dd->dd_phys->dd_used_bytes,
1314 1314                      -dd->dd_phys->dd_compressed_bytes,
1315 1315                      -dd->dd_phys->dd_uncompressed_bytes, tx);
1316 1316                  dsl_dir_diduse_space(ra->newparent, DD_USED_CHILD,
1317 1317                      dd->dd_phys->dd_used_bytes,
1318 1318                      dd->dd_phys->dd_compressed_bytes,
1319 1319                      dd->dd_phys->dd_uncompressed_bytes, tx);
1320 1320  
1321 1321                  if (dd->dd_phys->dd_reserved > dd->dd_phys->dd_used_bytes) {
1322 1322                          uint64_t unused_rsrv = dd->dd_phys->dd_reserved -
1323 1323                              dd->dd_phys->dd_used_bytes;
1324 1324  
1325 1325                          dsl_dir_diduse_space(dd->dd_parent, DD_USED_CHILD_RSRV,
1326 1326                              -unused_rsrv, 0, 0, tx);
  
    | 
      ↓ open down ↓ | 
    728 lines elided | 
    
      ↑ open up ↑ | 
  
1327 1327                          dsl_dir_diduse_space(ra->newparent, DD_USED_CHILD_RSRV,
1328 1328                              unused_rsrv, 0, 0, tx);
1329 1329                  }
1330 1330          }
1331 1331  
1332 1332          dmu_buf_will_dirty(dd->dd_dbuf, tx);
1333 1333  
1334 1334          /* remove from old parent zapobj */
1335 1335          err = zap_remove(mos, dd->dd_parent->dd_phys->dd_child_dir_zapobj,
1336 1336              dd->dd_myname, tx);
1337      -        ASSERT3U(err, ==, 0);
     1337 +        ASSERT0(err);
1338 1338  
1339 1339          (void) strcpy(dd->dd_myname, ra->mynewname);
1340 1340          dsl_dir_close(dd->dd_parent, dd);
1341 1341          dd->dd_phys->dd_parent_obj = ra->newparent->dd_object;
1342 1342          VERIFY(0 == dsl_dir_open_obj(dd->dd_pool,
1343 1343              ra->newparent->dd_object, NULL, dd, &dd->dd_parent));
1344 1344  
1345 1345          /* add to new parent zapobj */
1346 1346          err = zap_add(mos, ra->newparent->dd_phys->dd_child_dir_zapobj,
1347 1347              dd->dd_myname, 8, 1, &dd->dd_object, tx);
1348      -        ASSERT3U(err, ==, 0);
     1348 +        ASSERT0(err);
1349 1349  
1350 1350  }
1351 1351  
1352 1352  int
1353 1353  dsl_dir_rename(dsl_dir_t *dd, const char *newname)
1354 1354  {
1355 1355          struct renamearg ra;
1356 1356          int err;
1357 1357  
1358 1358          /* new parent should exist */
1359 1359          err = dsl_dir_open(newname, FTAG, &ra.newparent, &ra.mynewname);
1360 1360          if (err)
1361 1361                  return (err);
1362 1362  
1363 1363          /* can't rename to different pool */
1364 1364          if (dd->dd_pool != ra.newparent->dd_pool) {
1365 1365                  err = ENXIO;
1366 1366                  goto out;
1367 1367          }
1368 1368  
1369 1369          /* new name should not already exist */
1370 1370          if (ra.mynewname == NULL) {
1371 1371                  err = EEXIST;
1372 1372                  goto out;
1373 1373          }
1374 1374  
1375 1375          err = dsl_sync_task_do(dd->dd_pool,
1376 1376              dsl_dir_rename_check, dsl_dir_rename_sync, dd, &ra, 3);
1377 1377  
1378 1378  out:
1379 1379          dsl_dir_close(ra.newparent, FTAG);
1380 1380          return (err);
1381 1381  }
1382 1382  
1383 1383  int
1384 1384  dsl_dir_transfer_possible(dsl_dir_t *sdd, dsl_dir_t *tdd, uint64_t space)
1385 1385  {
1386 1386          dsl_dir_t *ancestor;
1387 1387          int64_t adelta;
1388 1388          uint64_t avail;
1389 1389  
1390 1390          ancestor = closest_common_ancestor(sdd, tdd);
1391 1391          adelta = would_change(sdd, -space, ancestor);
1392 1392          avail = dsl_dir_space_available(tdd, ancestor, adelta, FALSE);
1393 1393          if (avail < space)
1394 1394                  return (ENOSPC);
1395 1395  
1396 1396          return (0);
1397 1397  }
1398 1398  
1399 1399  timestruc_t
1400 1400  dsl_dir_snap_cmtime(dsl_dir_t *dd)
1401 1401  {
1402 1402          timestruc_t t;
1403 1403  
1404 1404          mutex_enter(&dd->dd_lock);
1405 1405          t = dd->dd_snap_cmtime;
1406 1406          mutex_exit(&dd->dd_lock);
1407 1407  
1408 1408          return (t);
1409 1409  }
1410 1410  
1411 1411  void
1412 1412  dsl_dir_snap_cmtime_update(dsl_dir_t *dd)
1413 1413  {
1414 1414          timestruc_t t;
1415 1415  
1416 1416          gethrestime(&t);
1417 1417          mutex_enter(&dd->dd_lock);
1418 1418          dd->dd_snap_cmtime = t;
1419 1419          mutex_exit(&dd->dd_lock);
1420 1420  }
  
    | 
      ↓ open down ↓ | 
    62 lines elided | 
    
      ↑ open up ↑ | 
  
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX