Print this page
    
OS-1566 filesystem limits for ZFS datasets
    
      
        | Split | Close | 
      | Expand all | 
      | Collapse all | 
    
    
          --- old/usr/src/uts/common/fs/zfs/dmu_objset.c
          +++ new/usr/src/uts/common/fs/zfs/dmu_objset.c
   1    1  /*
   2    2   * CDDL HEADER START
   3    3   *
   4    4   * The contents of this file are subject to the terms of the
   5    5   * Common Development and Distribution License (the "License").
   6    6   * You may not use this file except in compliance with the License.
   7    7   *
   8    8   * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9    9   * or http://www.opensolaris.org/os/licensing.
  10   10   * See the License for the specific language governing permissions
  11   11   * and limitations under the License.
  12   12   *
  13   13   * When distributing Covered Code, include this CDDL HEADER in each
  
    | ↓ open down ↓ | 13 lines elided | ↑ open up ↑ | 
  14   14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  /*
  22   22   * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  23   23   * Copyright (c) 2012 by Delphix. All rights reserved.
       24 + * Copyright (c) 2012, Joyent, Inc. All rights reserved.
  24   25   */
  25   26  
  26   27  /* Portions Copyright 2010 Robert Milkowski */
  27   28  
  28   29  #include <sys/cred.h>
  29   30  #include <sys/zfs_context.h>
  30   31  #include <sys/dmu_objset.h>
  31   32  #include <sys/dsl_dir.h>
  32   33  #include <sys/dsl_dataset.h>
  33   34  #include <sys/dsl_prop.h>
  34   35  #include <sys/dsl_pool.h>
  35   36  #include <sys/dsl_synctask.h>
  36   37  #include <sys/dsl_deleg.h>
  37   38  #include <sys/dnode.h>
  38   39  #include <sys/dbuf.h>
  39   40  #include <sys/zvol.h>
  40   41  #include <sys/dmu_tx.h>
  41   42  #include <sys/zap.h>
  42   43  #include <sys/zil.h>
  43   44  #include <sys/dmu_impl.h>
  44   45  #include <sys/zfs_ioctl.h>
  45   46  #include <sys/sa.h>
  46   47  #include <sys/zfs_onexit.h>
  47   48  
  48   49  /*
  49   50   * Needed to close a window in dnode_move() that allows the objset to be freed
  50   51   * before it can be safely accessed.
  51   52   */
  52   53  krwlock_t os_lock;
  53   54  
  54   55  void
  55   56  dmu_objset_init(void)
  56   57  {
  57   58          rw_init(&os_lock, NULL, RW_DEFAULT, NULL);
  58   59  }
  59   60  
  60   61  void
  61   62  dmu_objset_fini(void)
  62   63  {
  63   64          rw_destroy(&os_lock);
  64   65  }
  65   66  
  66   67  spa_t *
  67   68  dmu_objset_spa(objset_t *os)
  68   69  {
  69   70          return (os->os_spa);
  70   71  }
  71   72  
  72   73  zilog_t *
  73   74  dmu_objset_zil(objset_t *os)
  74   75  {
  75   76          return (os->os_zil);
  76   77  }
  77   78  
  78   79  dsl_pool_t *
  79   80  dmu_objset_pool(objset_t *os)
  80   81  {
  81   82          dsl_dataset_t *ds;
  82   83  
  83   84          if ((ds = os->os_dsl_dataset) != NULL && ds->ds_dir)
  84   85                  return (ds->ds_dir->dd_pool);
  85   86          else
  86   87                  return (spa_get_dsl(os->os_spa));
  87   88  }
  88   89  
  89   90  dsl_dataset_t *
  90   91  dmu_objset_ds(objset_t *os)
  91   92  {
  92   93          return (os->os_dsl_dataset);
  93   94  }
  94   95  
  95   96  dmu_objset_type_t
  96   97  dmu_objset_type(objset_t *os)
  97   98  {
  98   99          return (os->os_phys->os_type);
  99  100  }
 100  101  
 101  102  void
 102  103  dmu_objset_name(objset_t *os, char *buf)
 103  104  {
 104  105          dsl_dataset_name(os->os_dsl_dataset, buf);
 105  106  }
 106  107  
 107  108  uint64_t
 108  109  dmu_objset_id(objset_t *os)
 109  110  {
 110  111          dsl_dataset_t *ds = os->os_dsl_dataset;
 111  112  
 112  113          return (ds ? ds->ds_object : 0);
 113  114  }
 114  115  
 115  116  uint64_t
 116  117  dmu_objset_syncprop(objset_t *os)
 117  118  {
 118  119          return (os->os_sync);
 119  120  }
 120  121  
 121  122  uint64_t
 122  123  dmu_objset_logbias(objset_t *os)
 123  124  {
 124  125          return (os->os_logbias);
 125  126  }
 126  127  
 127  128  static void
 128  129  checksum_changed_cb(void *arg, uint64_t newval)
 129  130  {
 130  131          objset_t *os = arg;
 131  132  
 132  133          /*
 133  134           * Inheritance should have been done by now.
 134  135           */
 135  136          ASSERT(newval != ZIO_CHECKSUM_INHERIT);
 136  137  
 137  138          os->os_checksum = zio_checksum_select(newval, ZIO_CHECKSUM_ON_VALUE);
 138  139  }
 139  140  
 140  141  static void
 141  142  compression_changed_cb(void *arg, uint64_t newval)
 142  143  {
 143  144          objset_t *os = arg;
 144  145  
 145  146          /*
 146  147           * Inheritance and range checking should have been done by now.
 147  148           */
 148  149          ASSERT(newval != ZIO_COMPRESS_INHERIT);
 149  150  
 150  151          os->os_compress = zio_compress_select(newval, ZIO_COMPRESS_ON_VALUE);
 151  152  }
 152  153  
 153  154  static void
 154  155  copies_changed_cb(void *arg, uint64_t newval)
 155  156  {
 156  157          objset_t *os = arg;
 157  158  
 158  159          /*
 159  160           * Inheritance and range checking should have been done by now.
 160  161           */
 161  162          ASSERT(newval > 0);
 162  163          ASSERT(newval <= spa_max_replication(os->os_spa));
 163  164  
 164  165          os->os_copies = newval;
 165  166  }
 166  167  
 167  168  static void
 168  169  dedup_changed_cb(void *arg, uint64_t newval)
 169  170  {
 170  171          objset_t *os = arg;
 171  172          spa_t *spa = os->os_spa;
 172  173          enum zio_checksum checksum;
 173  174  
 174  175          /*
 175  176           * Inheritance should have been done by now.
 176  177           */
 177  178          ASSERT(newval != ZIO_CHECKSUM_INHERIT);
 178  179  
 179  180          checksum = zio_checksum_dedup_select(spa, newval, ZIO_CHECKSUM_OFF);
 180  181  
 181  182          os->os_dedup_checksum = checksum & ZIO_CHECKSUM_MASK;
 182  183          os->os_dedup_verify = !!(checksum & ZIO_CHECKSUM_VERIFY);
 183  184  }
 184  185  
 185  186  static void
 186  187  primary_cache_changed_cb(void *arg, uint64_t newval)
 187  188  {
 188  189          objset_t *os = arg;
 189  190  
 190  191          /*
 191  192           * Inheritance and range checking should have been done by now.
 192  193           */
 193  194          ASSERT(newval == ZFS_CACHE_ALL || newval == ZFS_CACHE_NONE ||
 194  195              newval == ZFS_CACHE_METADATA);
 195  196  
 196  197          os->os_primary_cache = newval;
 197  198  }
 198  199  
 199  200  static void
 200  201  secondary_cache_changed_cb(void *arg, uint64_t newval)
 201  202  {
 202  203          objset_t *os = arg;
 203  204  
 204  205          /*
 205  206           * Inheritance and range checking should have been done by now.
 206  207           */
 207  208          ASSERT(newval == ZFS_CACHE_ALL || newval == ZFS_CACHE_NONE ||
 208  209              newval == ZFS_CACHE_METADATA);
 209  210  
 210  211          os->os_secondary_cache = newval;
 211  212  }
 212  213  
 213  214  static void
 214  215  sync_changed_cb(void *arg, uint64_t newval)
 215  216  {
 216  217          objset_t *os = arg;
 217  218  
 218  219          /*
 219  220           * Inheritance and range checking should have been done by now.
 220  221           */
 221  222          ASSERT(newval == ZFS_SYNC_STANDARD || newval == ZFS_SYNC_ALWAYS ||
 222  223              newval == ZFS_SYNC_DISABLED);
 223  224  
 224  225          os->os_sync = newval;
 225  226          if (os->os_zil)
 226  227                  zil_set_sync(os->os_zil, newval);
 227  228  }
 228  229  
 229  230  static void
 230  231  logbias_changed_cb(void *arg, uint64_t newval)
 231  232  {
 232  233          objset_t *os = arg;
 233  234  
 234  235          ASSERT(newval == ZFS_LOGBIAS_LATENCY ||
 235  236              newval == ZFS_LOGBIAS_THROUGHPUT);
 236  237          os->os_logbias = newval;
 237  238          if (os->os_zil)
 238  239                  zil_set_logbias(os->os_zil, newval);
 239  240  }
 240  241  
 241  242  void
 242  243  dmu_objset_byteswap(void *buf, size_t size)
 243  244  {
 244  245          objset_phys_t *osp = buf;
 245  246  
 246  247          ASSERT(size == OBJSET_OLD_PHYS_SIZE || size == sizeof (objset_phys_t));
 247  248          dnode_byteswap(&osp->os_meta_dnode);
 248  249          byteswap_uint64_array(&osp->os_zil_header, sizeof (zil_header_t));
 249  250          osp->os_type = BSWAP_64(osp->os_type);
 250  251          osp->os_flags = BSWAP_64(osp->os_flags);
 251  252          if (size == sizeof (objset_phys_t)) {
 252  253                  dnode_byteswap(&osp->os_userused_dnode);
 253  254                  dnode_byteswap(&osp->os_groupused_dnode);
 254  255          }
 255  256  }
 256  257  
 257  258  int
 258  259  dmu_objset_open_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp,
 259  260      objset_t **osp)
 260  261  {
 261  262          objset_t *os;
 262  263          int i, err;
 263  264  
 264  265          ASSERT(ds == NULL || MUTEX_HELD(&ds->ds_opening_lock));
 265  266  
 266  267          os = kmem_zalloc(sizeof (objset_t), KM_SLEEP);
 267  268          os->os_dsl_dataset = ds;
 268  269          os->os_spa = spa;
 269  270          os->os_rootbp = bp;
 270  271          if (!BP_IS_HOLE(os->os_rootbp)) {
 271  272                  uint32_t aflags = ARC_WAIT;
 272  273                  zbookmark_t zb;
 273  274                  SET_BOOKMARK(&zb, ds ? ds->ds_object : DMU_META_OBJSET,
 274  275                      ZB_ROOT_OBJECT, ZB_ROOT_LEVEL, ZB_ROOT_BLKID);
 275  276  
 276  277                  if (DMU_OS_IS_L2CACHEABLE(os))
 277  278                          aflags |= ARC_L2CACHE;
 278  279  
 279  280                  dprintf_bp(os->os_rootbp, "reading %s", "");
 280  281                  /*
 281  282                   * XXX when bprewrite scrub can change the bp,
 282  283                   * and this is called from dmu_objset_open_ds_os, the bp
 283  284                   * could change, and we'll need a lock.
 284  285                   */
 285  286                  err = dsl_read_nolock(NULL, spa, os->os_rootbp,
 286  287                      arc_getbuf_func, &os->os_phys_buf,
 287  288                      ZIO_PRIORITY_SYNC_READ, ZIO_FLAG_CANFAIL, &aflags, &zb);
 288  289                  if (err) {
 289  290                          kmem_free(os, sizeof (objset_t));
 290  291                          /* convert checksum errors into IO errors */
 291  292                          if (err == ECKSUM)
 292  293                                  err = EIO;
 293  294                          return (err);
 294  295                  }
 295  296  
 296  297                  /* Increase the blocksize if we are permitted. */
 297  298                  if (spa_version(spa) >= SPA_VERSION_USERSPACE &&
 298  299                      arc_buf_size(os->os_phys_buf) < sizeof (objset_phys_t)) {
 299  300                          arc_buf_t *buf = arc_buf_alloc(spa,
 300  301                              sizeof (objset_phys_t), &os->os_phys_buf,
 301  302                              ARC_BUFC_METADATA);
 302  303                          bzero(buf->b_data, sizeof (objset_phys_t));
 303  304                          bcopy(os->os_phys_buf->b_data, buf->b_data,
 304  305                              arc_buf_size(os->os_phys_buf));
 305  306                          (void) arc_buf_remove_ref(os->os_phys_buf,
 306  307                              &os->os_phys_buf);
 307  308                          os->os_phys_buf = buf;
 308  309                  }
 309  310  
 310  311                  os->os_phys = os->os_phys_buf->b_data;
 311  312                  os->os_flags = os->os_phys->os_flags;
 312  313          } else {
 313  314                  int size = spa_version(spa) >= SPA_VERSION_USERSPACE ?
 314  315                      sizeof (objset_phys_t) : OBJSET_OLD_PHYS_SIZE;
 315  316                  os->os_phys_buf = arc_buf_alloc(spa, size,
 316  317                      &os->os_phys_buf, ARC_BUFC_METADATA);
 317  318                  os->os_phys = os->os_phys_buf->b_data;
 318  319                  bzero(os->os_phys, size);
 319  320          }
 320  321  
 321  322          /*
 322  323           * Note: the changed_cb will be called once before the register
 323  324           * func returns, thus changing the checksum/compression from the
 324  325           * default (fletcher2/off).  Snapshots don't need to know about
 325  326           * checksum/compression/copies.
 326  327           */
 327  328          if (ds) {
 328  329                  err = dsl_prop_register(ds, "primarycache",
 329  330                      primary_cache_changed_cb, os);
 330  331                  if (err == 0)
 331  332                          err = dsl_prop_register(ds, "secondarycache",
 332  333                              secondary_cache_changed_cb, os);
 333  334                  if (!dsl_dataset_is_snapshot(ds)) {
 334  335                          if (err == 0)
 335  336                                  err = dsl_prop_register(ds, "checksum",
 336  337                                      checksum_changed_cb, os);
 337  338                          if (err == 0)
 338  339                                  err = dsl_prop_register(ds, "compression",
 339  340                                      compression_changed_cb, os);
 340  341                          if (err == 0)
 341  342                                  err = dsl_prop_register(ds, "copies",
 342  343                                      copies_changed_cb, os);
 343  344                          if (err == 0)
 344  345                                  err = dsl_prop_register(ds, "dedup",
 345  346                                      dedup_changed_cb, os);
 346  347                          if (err == 0)
 347  348                                  err = dsl_prop_register(ds, "logbias",
 348  349                                      logbias_changed_cb, os);
 349  350                          if (err == 0)
 350  351                                  err = dsl_prop_register(ds, "sync",
 351  352                                      sync_changed_cb, os);
 352  353                  }
 353  354                  if (err) {
 354  355                          VERIFY(arc_buf_remove_ref(os->os_phys_buf,
 355  356                              &os->os_phys_buf) == 1);
 356  357                          kmem_free(os, sizeof (objset_t));
 357  358                          return (err);
 358  359                  }
 359  360          } else if (ds == NULL) {
 360  361                  /* It's the meta-objset. */
 361  362                  os->os_checksum = ZIO_CHECKSUM_FLETCHER_4;
 362  363                  os->os_compress = ZIO_COMPRESS_LZJB;
 363  364                  os->os_copies = spa_max_replication(spa);
 364  365                  os->os_dedup_checksum = ZIO_CHECKSUM_OFF;
 365  366                  os->os_dedup_verify = 0;
 366  367                  os->os_logbias = 0;
 367  368                  os->os_sync = 0;
 368  369                  os->os_primary_cache = ZFS_CACHE_ALL;
 369  370                  os->os_secondary_cache = ZFS_CACHE_ALL;
 370  371          }
 371  372  
 372  373          if (ds == NULL || !dsl_dataset_is_snapshot(ds))
 373  374                  os->os_zil_header = os->os_phys->os_zil_header;
 374  375          os->os_zil = zil_alloc(os, &os->os_zil_header);
 375  376  
 376  377          for (i = 0; i < TXG_SIZE; i++) {
 377  378                  list_create(&os->os_dirty_dnodes[i], sizeof (dnode_t),
 378  379                      offsetof(dnode_t, dn_dirty_link[i]));
 379  380                  list_create(&os->os_free_dnodes[i], sizeof (dnode_t),
 380  381                      offsetof(dnode_t, dn_dirty_link[i]));
 381  382          }
 382  383          list_create(&os->os_dnodes, sizeof (dnode_t),
 383  384              offsetof(dnode_t, dn_link));
 384  385          list_create(&os->os_downgraded_dbufs, sizeof (dmu_buf_impl_t),
 385  386              offsetof(dmu_buf_impl_t, db_link));
 386  387  
 387  388          mutex_init(&os->os_lock, NULL, MUTEX_DEFAULT, NULL);
 388  389          mutex_init(&os->os_obj_lock, NULL, MUTEX_DEFAULT, NULL);
 389  390          mutex_init(&os->os_user_ptr_lock, NULL, MUTEX_DEFAULT, NULL);
 390  391  
 391  392          DMU_META_DNODE(os) = dnode_special_open(os,
 392  393              &os->os_phys->os_meta_dnode, DMU_META_DNODE_OBJECT,
 393  394              &os->os_meta_dnode);
 394  395          if (arc_buf_size(os->os_phys_buf) >= sizeof (objset_phys_t)) {
 395  396                  DMU_USERUSED_DNODE(os) = dnode_special_open(os,
 396  397                      &os->os_phys->os_userused_dnode, DMU_USERUSED_OBJECT,
 397  398                      &os->os_userused_dnode);
 398  399                  DMU_GROUPUSED_DNODE(os) = dnode_special_open(os,
 399  400                      &os->os_phys->os_groupused_dnode, DMU_GROUPUSED_OBJECT,
 400  401                      &os->os_groupused_dnode);
 401  402          }
 402  403  
 403  404          /*
 404  405           * We should be the only thread trying to do this because we
 405  406           * have ds_opening_lock
 406  407           */
 407  408          if (ds) {
 408  409                  mutex_enter(&ds->ds_lock);
 409  410                  ASSERT(ds->ds_objset == NULL);
 410  411                  ds->ds_objset = os;
 411  412                  mutex_exit(&ds->ds_lock);
 412  413          }
 413  414  
 414  415          *osp = os;
 415  416          return (0);
 416  417  }
 417  418  
 418  419  int
 419  420  dmu_objset_from_ds(dsl_dataset_t *ds, objset_t **osp)
 420  421  {
 421  422          int err = 0;
 422  423  
 423  424          mutex_enter(&ds->ds_opening_lock);
 424  425          *osp = ds->ds_objset;
 425  426          if (*osp == NULL) {
 426  427                  err = dmu_objset_open_impl(dsl_dataset_get_spa(ds),
 427  428                      ds, dsl_dataset_get_blkptr(ds), osp);
 428  429          }
 429  430          mutex_exit(&ds->ds_opening_lock);
 430  431          return (err);
 431  432  }
 432  433  
 433  434  /* called from zpl */
 434  435  int
 435  436  dmu_objset_hold(const char *name, void *tag, objset_t **osp)
 436  437  {
 437  438          dsl_dataset_t *ds;
 438  439          int err;
 439  440  
 440  441          err = dsl_dataset_hold(name, tag, &ds);
 441  442          if (err)
 442  443                  return (err);
 443  444  
 444  445          err = dmu_objset_from_ds(ds, osp);
 445  446          if (err)
 446  447                  dsl_dataset_rele(ds, tag);
 447  448  
 448  449          return (err);
 449  450  }
 450  451  
 451  452  /* called from zpl */
 452  453  int
 453  454  dmu_objset_own(const char *name, dmu_objset_type_t type,
 454  455      boolean_t readonly, void *tag, objset_t **osp)
 455  456  {
 456  457          dsl_dataset_t *ds;
 457  458          int err;
 458  459  
 459  460          err = dsl_dataset_own(name, B_FALSE, tag, &ds);
 460  461          if (err)
 461  462                  return (err);
 462  463  
 463  464          err = dmu_objset_from_ds(ds, osp);
 464  465          if (err) {
 465  466                  dsl_dataset_disown(ds, tag);
 466  467          } else if (type != DMU_OST_ANY && type != (*osp)->os_phys->os_type) {
 467  468                  dmu_objset_disown(*osp, tag);
 468  469                  return (EINVAL);
 469  470          } else if (!readonly && dsl_dataset_is_snapshot(ds)) {
 470  471                  dmu_objset_disown(*osp, tag);
 471  472                  return (EROFS);
 472  473          }
 473  474          return (err);
 474  475  }
 475  476  
 476  477  void
 477  478  dmu_objset_rele(objset_t *os, void *tag)
 478  479  {
 479  480          dsl_dataset_rele(os->os_dsl_dataset, tag);
 480  481  }
 481  482  
 482  483  void
 483  484  dmu_objset_disown(objset_t *os, void *tag)
 484  485  {
 485  486          dsl_dataset_disown(os->os_dsl_dataset, tag);
 486  487  }
 487  488  
 488  489  int
 489  490  dmu_objset_evict_dbufs(objset_t *os)
 490  491  {
 491  492          dnode_t *dn;
 492  493  
 493  494          mutex_enter(&os->os_lock);
 494  495  
 495  496          /* process the mdn last, since the other dnodes have holds on it */
 496  497          list_remove(&os->os_dnodes, DMU_META_DNODE(os));
 497  498          list_insert_tail(&os->os_dnodes, DMU_META_DNODE(os));
 498  499  
 499  500          /*
 500  501           * Find the first dnode with holds.  We have to do this dance
 501  502           * because dnode_add_ref() only works if you already have a
 502  503           * hold.  If there are no holds then it has no dbufs so OK to
 503  504           * skip.
 504  505           */
 505  506          for (dn = list_head(&os->os_dnodes);
 506  507              dn && !dnode_add_ref(dn, FTAG);
 507  508              dn = list_next(&os->os_dnodes, dn))
 508  509                  continue;
 509  510  
 510  511          while (dn) {
 511  512                  dnode_t *next_dn = dn;
 512  513  
 513  514                  do {
 514  515                          next_dn = list_next(&os->os_dnodes, next_dn);
 515  516                  } while (next_dn && !dnode_add_ref(next_dn, FTAG));
 516  517  
 517  518                  mutex_exit(&os->os_lock);
 518  519                  dnode_evict_dbufs(dn);
 519  520                  dnode_rele(dn, FTAG);
 520  521                  mutex_enter(&os->os_lock);
 521  522                  dn = next_dn;
 522  523          }
 523  524          dn = list_head(&os->os_dnodes);
 524  525          mutex_exit(&os->os_lock);
 525  526          return (dn != DMU_META_DNODE(os));
 526  527  }
 527  528  
 528  529  void
 529  530  dmu_objset_evict(objset_t *os)
 530  531  {
 531  532          dsl_dataset_t *ds = os->os_dsl_dataset;
 532  533  
 533  534          for (int t = 0; t < TXG_SIZE; t++)
 534  535                  ASSERT(!dmu_objset_is_dirty(os, t));
 535  536  
 536  537          if (ds) {
 537  538                  if (!dsl_dataset_is_snapshot(ds)) {
 538  539                          VERIFY(0 == dsl_prop_unregister(ds, "checksum",
 539  540                              checksum_changed_cb, os));
 540  541                          VERIFY(0 == dsl_prop_unregister(ds, "compression",
 541  542                              compression_changed_cb, os));
 542  543                          VERIFY(0 == dsl_prop_unregister(ds, "copies",
 543  544                              copies_changed_cb, os));
 544  545                          VERIFY(0 == dsl_prop_unregister(ds, "dedup",
 545  546                              dedup_changed_cb, os));
 546  547                          VERIFY(0 == dsl_prop_unregister(ds, "logbias",
 547  548                              logbias_changed_cb, os));
 548  549                          VERIFY(0 == dsl_prop_unregister(ds, "sync",
 549  550                              sync_changed_cb, os));
 550  551                  }
 551  552                  VERIFY(0 == dsl_prop_unregister(ds, "primarycache",
 552  553                      primary_cache_changed_cb, os));
 553  554                  VERIFY(0 == dsl_prop_unregister(ds, "secondarycache",
 554  555                      secondary_cache_changed_cb, os));
 555  556          }
 556  557  
 557  558          if (os->os_sa)
 558  559                  sa_tear_down(os);
 559  560  
 560  561          /*
 561  562           * We should need only a single pass over the dnode list, since
 562  563           * nothing can be added to the list at this point.
 563  564           */
 564  565          (void) dmu_objset_evict_dbufs(os);
 565  566  
 566  567          dnode_special_close(&os->os_meta_dnode);
 567  568          if (DMU_USERUSED_DNODE(os)) {
 568  569                  dnode_special_close(&os->os_userused_dnode);
 569  570                  dnode_special_close(&os->os_groupused_dnode);
 570  571          }
 571  572          zil_free(os->os_zil);
 572  573  
 573  574          ASSERT3P(list_head(&os->os_dnodes), ==, NULL);
 574  575  
 575  576          VERIFY(arc_buf_remove_ref(os->os_phys_buf, &os->os_phys_buf) == 1);
 576  577  
 577  578          /*
 578  579           * This is a barrier to prevent the objset from going away in
 579  580           * dnode_move() until we can safely ensure that the objset is still in
 580  581           * use. We consider the objset valid before the barrier and invalid
 581  582           * after the barrier.
 582  583           */
 583  584          rw_enter(&os_lock, RW_READER);
 584  585          rw_exit(&os_lock);
 585  586  
 586  587          mutex_destroy(&os->os_lock);
 587  588          mutex_destroy(&os->os_obj_lock);
 588  589          mutex_destroy(&os->os_user_ptr_lock);
 589  590          kmem_free(os, sizeof (objset_t));
 590  591  }
 591  592  
 592  593  timestruc_t
 593  594  dmu_objset_snap_cmtime(objset_t *os)
 594  595  {
 595  596          return (dsl_dir_snap_cmtime(os->os_dsl_dataset->ds_dir));
 596  597  }
 597  598  
 598  599  /* called from dsl for meta-objset */
 599  600  objset_t *
 600  601  dmu_objset_create_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp,
 601  602      dmu_objset_type_t type, dmu_tx_t *tx)
 602  603  {
 603  604          objset_t *os;
 604  605          dnode_t *mdn;
 605  606  
 606  607          ASSERT(dmu_tx_is_syncing(tx));
 607  608          if (ds != NULL)
 608  609                  VERIFY(0 == dmu_objset_from_ds(ds, &os));
 609  610          else
 610  611                  VERIFY(0 == dmu_objset_open_impl(spa, NULL, bp, &os));
 611  612  
 612  613          mdn = DMU_META_DNODE(os);
 613  614  
 614  615          dnode_allocate(mdn, DMU_OT_DNODE, 1 << DNODE_BLOCK_SHIFT,
 615  616              DN_MAX_INDBLKSHIFT, DMU_OT_NONE, 0, tx);
 616  617  
 617  618          /*
 618  619           * We don't want to have to increase the meta-dnode's nlevels
 619  620           * later, because then we could do it in quescing context while
 620  621           * we are also accessing it in open context.
 621  622           *
 622  623           * This precaution is not necessary for the MOS (ds == NULL),
 623  624           * because the MOS is only updated in syncing context.
 624  625           * This is most fortunate: the MOS is the only objset that
 625  626           * needs to be synced multiple times as spa_sync() iterates
 626  627           * to convergence, so minimizing its dn_nlevels matters.
 627  628           */
 628  629          if (ds != NULL) {
 629  630                  int levels = 1;
 630  631  
 631  632                  /*
 632  633                   * Determine the number of levels necessary for the meta-dnode
 633  634                   * to contain DN_MAX_OBJECT dnodes.
 634  635                   */
 635  636                  while ((uint64_t)mdn->dn_nblkptr << (mdn->dn_datablkshift +
 636  637                      (levels - 1) * (mdn->dn_indblkshift - SPA_BLKPTRSHIFT)) <
 637  638                      DN_MAX_OBJECT * sizeof (dnode_phys_t))
 638  639                          levels++;
 639  640  
 640  641                  mdn->dn_next_nlevels[tx->tx_txg & TXG_MASK] =
 641  642                      mdn->dn_nlevels = levels;
 642  643          }
 643  644  
 644  645          ASSERT(type != DMU_OST_NONE);
 645  646          ASSERT(type != DMU_OST_ANY);
 646  647          ASSERT(type < DMU_OST_NUMTYPES);
 647  648          os->os_phys->os_type = type;
 648  649          if (dmu_objset_userused_enabled(os)) {
 649  650                  os->os_phys->os_flags |= OBJSET_FLAG_USERACCOUNTING_COMPLETE;
 650  651                  os->os_flags = os->os_phys->os_flags;
 651  652          }
 652  653  
 653  654          dsl_dataset_dirty(ds, tx);
 654  655  
 655  656          return (os);
 656  657  }
 657  658  
 658  659  struct oscarg {
 659  660          void (*userfunc)(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx);
 660  661          void *userarg;
 661  662          dsl_dataset_t *clone_origin;
 662  663          const char *lastname;
 663  664          dmu_objset_type_t type;
 664  665          uint64_t flags;
 665  666          cred_t *cr;
 666  667  };
 667  668  
 668  669  /*ARGSUSED*/
 669  670  static int
 670  671  dmu_objset_create_check(void *arg1, void *arg2, dmu_tx_t *tx)
 671  672  {
 672  673          dsl_dir_t *dd = arg1;
 673  674          struct oscarg *oa = arg2;
 674  675          objset_t *mos = dd->dd_pool->dp_meta_objset;
 675  676          int err;
 676  677          uint64_t ddobj;
 677  678  
 678  679          err = zap_lookup(mos, dd->dd_phys->dd_child_dir_zapobj,
 679  680              oa->lastname, sizeof (uint64_t), 1, &ddobj);
 680  681          if (err != ENOENT)
 681  682                  return (err ? err : EEXIST);
 682  683  
  
    | ↓ open down ↓ | 649 lines elided | ↑ open up ↑ | 
 683  684          if (oa->clone_origin != NULL) {
 684  685                  /* You can't clone across pools. */
 685  686                  if (oa->clone_origin->ds_dir->dd_pool != dd->dd_pool)
 686  687                          return (EXDEV);
 687  688  
 688  689                  /* You can only clone snapshots, not the head datasets. */
 689  690                  if (!dsl_dataset_is_snapshot(oa->clone_origin))
 690  691                          return (EINVAL);
 691  692          }
 692  693  
 693      -        return (0);
      694 +        return (dsl_dir_fscount_check(dd, 1, NULL, oa->cr));
 694  695  }
 695  696  
 696  697  static void
 697  698  dmu_objset_create_sync(void *arg1, void *arg2, dmu_tx_t *tx)
 698  699  {
 699  700          dsl_dir_t *dd = arg1;
 700  701          spa_t *spa = dd->dd_pool->dp_spa;
 701  702          struct oscarg *oa = arg2;
 702  703          uint64_t obj;
 703  704          dsl_dataset_t *ds;
 704  705          blkptr_t *bp;
 705  706  
 706  707          ASSERT(dmu_tx_is_syncing(tx));
 707  708  
      709 +        dsl_dir_fscount_adjust(dd, tx, 1, B_TRUE);
      710 +
 708  711          obj = dsl_dataset_create_sync(dd, oa->lastname,
 709  712              oa->clone_origin, oa->flags, oa->cr, tx);
 710  713  
 711  714          VERIFY3U(0, ==, dsl_dataset_hold_obj(dd->dd_pool, obj, FTAG, &ds));
 712  715          bp = dsl_dataset_get_blkptr(ds);
 713  716          if (BP_IS_HOLE(bp)) {
 714  717                  objset_t *os =
 715  718                      dmu_objset_create_impl(spa, ds, bp, oa->type, tx);
 716  719  
 717  720                  if (oa->userfunc)
 718  721                          oa->userfunc(os, oa->userarg, oa->cr, tx);
 719  722          }
 720  723  
 721  724          if (oa->clone_origin == NULL) {
 722  725                  spa_history_log_internal_ds(ds, "create", tx, "");
 723  726          } else {
 724  727                  char namebuf[MAXNAMELEN];
 725  728                  dsl_dataset_name(oa->clone_origin, namebuf);
 726  729                  spa_history_log_internal_ds(ds, "clone", tx,
 727  730                      "origin=%s (%llu)", namebuf, oa->clone_origin->ds_object);
 728  731          }
 729  732          dsl_dataset_rele(ds, FTAG);
 730  733  }
 731  734  
 732  735  int
 733  736  dmu_objset_create(const char *name, dmu_objset_type_t type, uint64_t flags,
 734  737      void (*func)(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx), void *arg)
 735  738  {
 736  739          dsl_dir_t *pdd;
 737  740          const char *tail;
 738  741          int err = 0;
 739  742          struct oscarg oa = { 0 };
 740  743  
 741  744          ASSERT(strchr(name, '@') == NULL);
 742  745          err = dsl_dir_open(name, FTAG, &pdd, &tail);
 743  746          if (err)
 744  747                  return (err);
 745  748          if (tail == NULL) {
 746  749                  dsl_dir_close(pdd, FTAG);
 747  750                  return (EEXIST);
 748  751          }
 749  752  
 750  753          oa.userfunc = func;
 751  754          oa.userarg = arg;
 752  755          oa.lastname = tail;
 753  756          oa.type = type;
 754  757          oa.flags = flags;
 755  758          oa.cr = CRED();
 756  759  
 757  760          err = dsl_sync_task_do(pdd->dd_pool, dmu_objset_create_check,
 758  761              dmu_objset_create_sync, pdd, &oa, 5);
 759  762          dsl_dir_close(pdd, FTAG);
 760  763          return (err);
 761  764  }
 762  765  
 763  766  int
 764  767  dmu_objset_clone(const char *name, dsl_dataset_t *clone_origin, uint64_t flags)
 765  768  {
 766  769          dsl_dir_t *pdd;
 767  770          const char *tail;
 768  771          int err = 0;
 769  772          struct oscarg oa = { 0 };
 770  773  
 771  774          ASSERT(strchr(name, '@') == NULL);
 772  775          err = dsl_dir_open(name, FTAG, &pdd, &tail);
 773  776          if (err)
 774  777                  return (err);
 775  778          if (tail == NULL) {
 776  779                  dsl_dir_close(pdd, FTAG);
 777  780                  return (EEXIST);
 778  781          }
 779  782  
 780  783          oa.lastname = tail;
 781  784          oa.clone_origin = clone_origin;
 782  785          oa.flags = flags;
 783  786          oa.cr = CRED();
 784  787  
 785  788          err = dsl_sync_task_do(pdd->dd_pool, dmu_objset_create_check,
 786  789              dmu_objset_create_sync, pdd, &oa, 5);
 787  790          dsl_dir_close(pdd, FTAG);
 788  791          return (err);
 789  792  }
 790  793  
 791  794  int
 792  795  dmu_objset_destroy(const char *name, boolean_t defer)
 793  796  {
 794  797          dsl_dataset_t *ds;
 795  798          int error;
 796  799  
 797  800          error = dsl_dataset_own(name, B_TRUE, FTAG, &ds);
 798  801          if (error == 0) {
 799  802                  error = dsl_dataset_destroy(ds, FTAG, defer);
  
    | ↓ open down ↓ | 82 lines elided | ↑ open up ↑ | 
 800  803                  /* dsl_dataset_destroy() closes the ds. */
 801  804          }
 802  805  
 803  806          return (error);
 804  807  }
 805  808  
 806  809  typedef struct snapallarg {
 807  810          dsl_sync_task_group_t *saa_dstg;
 808  811          boolean_t saa_needsuspend;
 809  812          nvlist_t *saa_props;
      813 +        cred_t *saa_cr;
 810  814  
 811  815          /* the following are used only if 'temporary' is set: */
 812  816          boolean_t saa_temporary;
 813  817          const char *saa_htag;
 814  818          struct dsl_ds_holdarg *saa_ha;
 815  819          dsl_dataset_t *saa_newds;
 816  820  } snapallarg_t;
 817  821  
 818  822  typedef struct snaponearg {
 819  823          const char *soa_longname; /* long snap name */
 820  824          const char *soa_snapname; /* short snap name */
      825 +        uint64_t soa_tot_cnt;
 821  826          snapallarg_t *soa_saa;
 822  827  } snaponearg_t;
 823  828  
 824  829  static int
 825  830  snapshot_check(void *arg1, void *arg2, dmu_tx_t *tx)
 826  831  {
 827  832          objset_t *os = arg1;
 828  833          snaponearg_t *soa = arg2;
 829  834          snapallarg_t *saa = soa->soa_saa;
 830  835          int error;
 831  836  
 832  837          /* The props have already been checked by zfs_check_userprops(). */
 833  838  
 834  839          error = dsl_dataset_snapshot_check(os->os_dsl_dataset,
 835      -            soa->soa_snapname, tx);
      840 +            soa->soa_snapname, soa->soa_tot_cnt, tx, saa->saa_cr);
 836  841          if (error)
 837  842                  return (error);
 838  843  
 839  844          if (saa->saa_temporary) {
 840  845                  /*
 841  846                   * Ideally we would just call
 842  847                   * dsl_dataset_user_hold_check() and
 843  848                   * dsl_dataset_destroy_check() here.  However the
 844  849                   * dataset we want to hold and destroy is the snapshot
 845  850                   * that we just confirmed we can create, but it won't
 846  851                   * exist until after these checks are run.  Do any
 847  852                   * checks we can here and if more checks are added to
 848  853                   * those routines in the future, similar checks may be
 849  854                   * necessary here.
 850  855                   */
 851  856                  if (spa_version(os->os_spa) < SPA_VERSION_USERREFS)
 852  857                          return (ENOTSUP);
 853  858                  /*
 854  859                   * Not checking number of tags because the tag will be
 855  860                   * unique, as it will be the only tag.
 856  861                   */
 857  862                  if (strlen(saa->saa_htag) + MAX_TAG_PREFIX_LEN >= MAXNAMELEN)
 858  863                          return (E2BIG);
 859  864  
 860  865                  saa->saa_ha = kmem_alloc(sizeof (struct dsl_ds_holdarg),
 861  866                      KM_SLEEP);
 862  867                  saa->saa_ha->temphold = B_TRUE;
 863  868                  saa->saa_ha->htag = saa->saa_htag;
 864  869          }
 865  870          return (error);
 866  871  }
 867  872  
 868  873  static void
 869  874  snapshot_sync(void *arg1, void *arg2, dmu_tx_t *tx)
 870  875  {
 871  876          objset_t *os = arg1;
 872  877          dsl_dataset_t *ds = os->os_dsl_dataset;
 873  878          snaponearg_t *soa = arg2;
 874  879          snapallarg_t *saa = soa->soa_saa;
 875  880  
 876  881          dsl_dataset_snapshot_sync(ds, soa->soa_snapname, tx);
 877  882  
 878  883          if (saa->saa_props != NULL) {
 879  884                  dsl_props_arg_t pa;
 880  885                  pa.pa_props = saa->saa_props;
 881  886                  pa.pa_source = ZPROP_SRC_LOCAL;
 882  887                  dsl_props_set_sync(ds->ds_prev, &pa, tx);
 883  888          }
 884  889  
 885  890          if (saa->saa_temporary) {
 886  891                  struct dsl_ds_destroyarg da;
 887  892  
 888  893                  dsl_dataset_user_hold_sync(ds->ds_prev, saa->saa_ha, tx);
 889  894                  kmem_free(saa->saa_ha, sizeof (struct dsl_ds_holdarg));
  
    | ↓ open down ↓ | 44 lines elided | ↑ open up ↑ | 
 890  895                  saa->saa_ha = NULL;
 891  896                  saa->saa_newds = ds->ds_prev;
 892  897  
 893  898                  da.ds = ds->ds_prev;
 894  899                  da.defer = B_TRUE;
 895  900                  dsl_dataset_destroy_sync(&da, FTAG, tx);
 896  901          }
 897  902  }
 898  903  
 899  904  static int
 900      -snapshot_one_impl(const char *snapname, void *arg)
      905 +snapshot_one_impl(const char *snapname, void *arg, uint64_t cnt)
 901  906  {
 902  907          char fsname[MAXPATHLEN];
 903  908          snapallarg_t *saa = arg;
 904  909          snaponearg_t *soa;
 905  910          objset_t *os;
 906  911          int err;
 907  912  
 908  913          (void) strlcpy(fsname, snapname, sizeof (fsname));
 909  914          strchr(fsname, '@')[0] = '\0';
 910  915  
 911  916          err = dmu_objset_hold(fsname, saa, &os);
 912  917          if (err != 0)
 913  918                  return (err);
 914  919  
 915  920          /*
 916  921           * If the objset is in an inconsistent state (eg, in the process
 917  922           * of being destroyed), don't snapshot it.
 918  923           */
 919  924          if (os->os_dsl_dataset->ds_phys->ds_flags & DS_FLAG_INCONSISTENT) {
 920  925                  dmu_objset_rele(os, saa);
 921  926                  return (EBUSY);
 922  927          }
 923  928  
 924  929          if (saa->saa_needsuspend) {
 925  930                  err = zil_suspend(dmu_objset_zil(os));
  
    | ↓ open down ↓ | 15 lines elided | ↑ open up ↑ | 
 926  931                  if (err) {
 927  932                          dmu_objset_rele(os, saa);
 928  933                          return (err);
 929  934                  }
 930  935          }
 931  936  
 932  937          soa = kmem_zalloc(sizeof (*soa), KM_SLEEP);
 933  938          soa->soa_saa = saa;
 934  939          soa->soa_longname = snapname;
 935  940          soa->soa_snapname = strchr(snapname, '@') + 1;
      941 +        soa->soa_tot_cnt = cnt;
 936  942  
 937  943          dsl_sync_task_create(saa->saa_dstg, snapshot_check, snapshot_sync,
 938  944              os, soa, 3);
 939  945  
 940  946          return (0);
 941  947  }
 942  948  
 943  949  /*
 944  950   * The snapshots must all be in the same pool.
 945  951   */
 946  952  int
 947  953  dmu_objset_snapshot(nvlist_t *snaps, nvlist_t *props, nvlist_t *errors)
 948  954  {
 949  955          dsl_sync_task_t *dst;
 950  956          snapallarg_t saa = { 0 };
 951  957          spa_t *spa;
 952  958          int rv = 0;
 953  959          int err;
 954  960          nvpair_t *pair;
      961 +        nvlist_t *cnt_track = NULL;
      962 +        char *pdelim;
      963 +        uint64_t val;
      964 +        char nm[MAXPATHLEN];
 955  965  
 956  966          pair = nvlist_next_nvpair(snaps, NULL);
 957  967          if (pair == NULL)
 958  968                  return (0);
 959  969  
 960  970          err = spa_open(nvpair_name(pair), &spa, FTAG);
 961  971          if (err)
 962  972                  return (err);
 963  973          saa.saa_dstg = dsl_sync_task_group_create(spa_get_dsl(spa));
 964  974          saa.saa_props = props;
 965  975          saa.saa_needsuspend = (spa_version(spa) < SPA_VERSION_FAST_SNAP);
      976 +        saa.saa_cr = CRED();
 966  977  
      978 +        /*
      979 +         * Pre-compute how many total new snapshots will be created for each
      980 +         * level in the tree and below. This is needed for validating the
      981 +         * snapshot limit when taking a recursive snapshot.
      982 +         *
      983 +         * The problem is that the counts are not actually adjusted when
      984 +         * we are checking, only when we finally sync. For a single snapshot,
      985 +         * this is easy, the count will increase by 1 at each node up the tree,
      986 +         * but its more complicated for recursive snapshots. Since we are
      987 +         * validating each snapshot independently we need to be sure that we
      988 +         * are validating the complete count for the entire set of snapshots.
      989 +         * We do this by rolling up the counts for each component of the name
      990 +         * into an nvlist then we'll use that count in the validation of each
      991 +         * individual snapshot.
      992 +         *
      993 +         * We validated the snapshot names in zfs_ioc_snapshot so we know they
      994 +         * have a '@'.
      995 +         */
      996 +        cnt_track = fnvlist_alloc();
      997 +
 967  998          for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL;
 968  999              pair = nvlist_next_nvpair(snaps, pair)) {
 969      -                err = snapshot_one_impl(nvpair_name(pair), &saa);
     1000 +                (void) strlcpy(nm, nvpair_name(pair), sizeof (nm));
     1001 +                pdelim = strchr(nm, '@');
     1002 +                *pdelim = '\0';
     1003 +
     1004 +                do {
     1005 +                        if (nvlist_lookup_uint64(cnt_track, nm, &val) == 0) {
     1006 +                                /* update existing entry */
     1007 +                                fnvlist_add_uint64(cnt_track, nm, val + 1);
     1008 +                        } else {
     1009 +                                /* add to list */
     1010 +                                fnvlist_add_uint64(cnt_track, nm, 1);
     1011 +                        }
     1012 +
     1013 +                        pdelim = strrchr(nm, '/');
     1014 +                        if (pdelim != NULL)
     1015 +                                *pdelim = '\0';
     1016 +                } while (pdelim != NULL);
     1017 +        }
     1018 +
     1019 +        /*
     1020 +         * We've calculated the counts, now validate.
     1021 +         */
     1022 +        for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL;
     1023 +            pair = nvlist_next_nvpair(snaps, pair)) {
     1024 +                (void) strlcpy(nm, nvpair_name(pair), sizeof (nm));
     1025 +                pdelim = strchr(nm, '@');
     1026 +                *pdelim = '\0';
     1027 +
     1028 +                val = fnvlist_lookup_uint64(cnt_track, nm);
     1029 +                err = snapshot_one_impl(nvpair_name(pair), &saa, val);
 970 1030                  if (err != 0) {
 971 1031                          if (errors != NULL) {
 972 1032                                  fnvlist_add_int32(errors,
 973 1033                                      nvpair_name(pair), err);
 974 1034                          }
 975 1035                          rv = err;
 976 1036                  }
 977 1037          }
 978 1038  
     1039 +        nvlist_free(cnt_track);
     1040 +
 979 1041          /*
 980 1042           * If any call to snapshot_one_impl() failed, don't execute the
 981 1043           * sync task.  The error handling code below will clean up the
 982 1044           * snaponearg_t from any successful calls to
 983 1045           * snapshot_one_impl().
 984 1046           */
 985 1047          if (rv == 0)
 986 1048                  err = dsl_sync_task_group_wait(saa.saa_dstg);
 987 1049          if (err != 0)
 988 1050                  rv = err;
 989 1051  
 990 1052          for (dst = list_head(&saa.saa_dstg->dstg_tasks); dst;
 991 1053              dst = list_next(&saa.saa_dstg->dstg_tasks, dst)) {
 992 1054                  objset_t *os = dst->dst_arg1;
 993 1055                  snaponearg_t *soa = dst->dst_arg2;
 994 1056                  if (dst->dst_err != 0) {
 995 1057                          if (errors != NULL) {
 996 1058                                  fnvlist_add_int32(errors,
 997 1059                                      soa->soa_longname, dst->dst_err);
 998 1060                          }
 999 1061                          rv = dst->dst_err;
1000 1062                  }
1001 1063  
1002 1064                  if (saa.saa_needsuspend)
1003 1065                          zil_resume(dmu_objset_zil(os));
1004 1066                  dmu_objset_rele(os, &saa);
1005 1067                  kmem_free(soa, sizeof (*soa));
1006 1068          }
1007 1069  
1008 1070          dsl_sync_task_group_destroy(saa.saa_dstg);
1009 1071          spa_close(spa, FTAG);
1010 1072          return (rv);
1011 1073  }
1012 1074  
1013 1075  int
1014 1076  dmu_objset_snapshot_one(const char *fsname, const char *snapname)
1015 1077  {
1016 1078          int err;
1017 1079          char *longsnap = kmem_asprintf("%s@%s", fsname, snapname);
1018 1080          nvlist_t *snaps = fnvlist_alloc();
1019 1081  
1020 1082          fnvlist_add_boolean(snaps, longsnap);
1021 1083          err = dmu_objset_snapshot(snaps, NULL, NULL);
1022 1084          fnvlist_free(snaps);
1023 1085          strfree(longsnap);
1024 1086          return (err);
1025 1087  }
1026 1088  
1027 1089  int
1028 1090  dmu_objset_snapshot_tmp(const char *snapname, const char *tag, int cleanup_fd)
1029 1091  {
1030 1092          dsl_sync_task_t *dst;
1031 1093          snapallarg_t saa = { 0 };
1032 1094          spa_t *spa;
1033 1095          minor_t minor;
1034 1096          int err;
1035 1097  
1036 1098          err = spa_open(snapname, &spa, FTAG);
1037 1099          if (err)
1038 1100                  return (err);
1039 1101          saa.saa_dstg = dsl_sync_task_group_create(spa_get_dsl(spa));
1040 1102          saa.saa_htag = tag;
1041 1103          saa.saa_needsuspend = (spa_version(spa) < SPA_VERSION_FAST_SNAP);
1042 1104          saa.saa_temporary = B_TRUE;
  
    | ↓ open down ↓ | 54 lines elided | ↑ open up ↑ | 
1043 1105  
1044 1106          if (cleanup_fd < 0) {
1045 1107                  spa_close(spa, FTAG);
1046 1108                  return (EINVAL);
1047 1109          }
1048 1110          if ((err = zfs_onexit_fd_hold(cleanup_fd, &minor)) != 0) {
1049 1111                  spa_close(spa, FTAG);
1050 1112                  return (err);
1051 1113          }
1052 1114  
1053      -        err = snapshot_one_impl(snapname, &saa);
     1115 +        err = snapshot_one_impl(snapname, &saa, 1);
1054 1116  
1055 1117          if (err == 0)
1056 1118                  err = dsl_sync_task_group_wait(saa.saa_dstg);
1057 1119  
1058 1120          for (dst = list_head(&saa.saa_dstg->dstg_tasks); dst;
1059 1121              dst = list_next(&saa.saa_dstg->dstg_tasks, dst)) {
1060 1122                  objset_t *os = dst->dst_arg1;
1061 1123                  dsl_register_onexit_hold_cleanup(saa.saa_newds, tag, minor);
1062 1124                  if (saa.saa_needsuspend)
1063 1125                          zil_resume(dmu_objset_zil(os));
1064 1126                  dmu_objset_rele(os, &saa);
1065 1127          }
1066 1128  
1067 1129          zfs_onexit_fd_rele(cleanup_fd);
1068 1130          dsl_sync_task_group_destroy(saa.saa_dstg);
1069 1131          spa_close(spa, FTAG);
1070 1132          return (err);
1071 1133  }
1072 1134  
1073 1135  
1074 1136  static void
1075 1137  dmu_objset_sync_dnodes(list_t *list, list_t *newlist, dmu_tx_t *tx)
1076 1138  {
1077 1139          dnode_t *dn;
1078 1140  
1079 1141          while (dn = list_head(list)) {
1080 1142                  ASSERT(dn->dn_object != DMU_META_DNODE_OBJECT);
1081 1143                  ASSERT(dn->dn_dbuf->db_data_pending);
1082 1144                  /*
1083 1145                   * Initialize dn_zio outside dnode_sync() because the
1084 1146                   * meta-dnode needs to set it ouside dnode_sync().
1085 1147                   */
1086 1148                  dn->dn_zio = dn->dn_dbuf->db_data_pending->dr_zio;
1087 1149                  ASSERT(dn->dn_zio);
1088 1150  
1089 1151                  ASSERT3U(dn->dn_nlevels, <=, DN_MAX_LEVELS);
1090 1152                  list_remove(list, dn);
1091 1153  
1092 1154                  if (newlist) {
1093 1155                          (void) dnode_add_ref(dn, newlist);
1094 1156                          list_insert_tail(newlist, dn);
1095 1157                  }
1096 1158  
1097 1159                  dnode_sync(dn, tx);
1098 1160          }
1099 1161  }
1100 1162  
1101 1163  /* ARGSUSED */
1102 1164  static void
1103 1165  dmu_objset_write_ready(zio_t *zio, arc_buf_t *abuf, void *arg)
1104 1166  {
1105 1167          blkptr_t *bp = zio->io_bp;
1106 1168          objset_t *os = arg;
1107 1169          dnode_phys_t *dnp = &os->os_phys->os_meta_dnode;
1108 1170  
1109 1171          ASSERT(bp == os->os_rootbp);
1110 1172          ASSERT(BP_GET_TYPE(bp) == DMU_OT_OBJSET);
1111 1173          ASSERT(BP_GET_LEVEL(bp) == 0);
1112 1174  
1113 1175          /*
1114 1176           * Update rootbp fill count: it should be the number of objects
1115 1177           * allocated in the object set (not counting the "special"
1116 1178           * objects that are stored in the objset_phys_t -- the meta
1117 1179           * dnode and user/group accounting objects).
1118 1180           */
1119 1181          bp->blk_fill = 0;
1120 1182          for (int i = 0; i < dnp->dn_nblkptr; i++)
1121 1183                  bp->blk_fill += dnp->dn_blkptr[i].blk_fill;
1122 1184  }
1123 1185  
1124 1186  /* ARGSUSED */
1125 1187  static void
1126 1188  dmu_objset_write_done(zio_t *zio, arc_buf_t *abuf, void *arg)
1127 1189  {
1128 1190          blkptr_t *bp = zio->io_bp;
1129 1191          blkptr_t *bp_orig = &zio->io_bp_orig;
1130 1192          objset_t *os = arg;
1131 1193  
1132 1194          if (zio->io_flags & ZIO_FLAG_IO_REWRITE) {
1133 1195                  ASSERT(BP_EQUAL(bp, bp_orig));
1134 1196          } else {
1135 1197                  dsl_dataset_t *ds = os->os_dsl_dataset;
1136 1198                  dmu_tx_t *tx = os->os_synctx;
1137 1199  
1138 1200                  (void) dsl_dataset_block_kill(ds, bp_orig, tx, B_TRUE);
1139 1201                  dsl_dataset_block_born(ds, bp, tx);
1140 1202          }
1141 1203  }
1142 1204  
1143 1205  /* called from dsl */
1144 1206  void
1145 1207  dmu_objset_sync(objset_t *os, zio_t *pio, dmu_tx_t *tx)
1146 1208  {
1147 1209          int txgoff;
1148 1210          zbookmark_t zb;
1149 1211          zio_prop_t zp;
1150 1212          zio_t *zio;
1151 1213          list_t *list;
1152 1214          list_t *newlist = NULL;
1153 1215          dbuf_dirty_record_t *dr;
1154 1216  
1155 1217          dprintf_ds(os->os_dsl_dataset, "txg=%llu\n", tx->tx_txg);
1156 1218  
1157 1219          ASSERT(dmu_tx_is_syncing(tx));
1158 1220          /* XXX the write_done callback should really give us the tx... */
1159 1221          os->os_synctx = tx;
1160 1222  
1161 1223          if (os->os_dsl_dataset == NULL) {
1162 1224                  /*
1163 1225                   * This is the MOS.  If we have upgraded,
1164 1226                   * spa_max_replication() could change, so reset
1165 1227                   * os_copies here.
1166 1228                   */
1167 1229                  os->os_copies = spa_max_replication(os->os_spa);
1168 1230          }
1169 1231  
1170 1232          /*
1171 1233           * Create the root block IO
1172 1234           */
1173 1235          SET_BOOKMARK(&zb, os->os_dsl_dataset ?
1174 1236              os->os_dsl_dataset->ds_object : DMU_META_OBJSET,
1175 1237              ZB_ROOT_OBJECT, ZB_ROOT_LEVEL, ZB_ROOT_BLKID);
1176 1238          VERIFY3U(0, ==, arc_release_bp(os->os_phys_buf, &os->os_phys_buf,
1177 1239              os->os_rootbp, os->os_spa, &zb));
1178 1240  
1179 1241          dmu_write_policy(os, NULL, 0, 0, &zp);
1180 1242  
1181 1243          zio = arc_write(pio, os->os_spa, tx->tx_txg,
1182 1244              os->os_rootbp, os->os_phys_buf, DMU_OS_IS_L2CACHEABLE(os), &zp,
1183 1245              dmu_objset_write_ready, dmu_objset_write_done, os,
1184 1246              ZIO_PRIORITY_ASYNC_WRITE, ZIO_FLAG_MUSTSUCCEED, &zb);
1185 1247  
1186 1248          /*
1187 1249           * Sync special dnodes - the parent IO for the sync is the root block
1188 1250           */
1189 1251          DMU_META_DNODE(os)->dn_zio = zio;
1190 1252          dnode_sync(DMU_META_DNODE(os), tx);
1191 1253  
1192 1254          os->os_phys->os_flags = os->os_flags;
1193 1255  
1194 1256          if (DMU_USERUSED_DNODE(os) &&
1195 1257              DMU_USERUSED_DNODE(os)->dn_type != DMU_OT_NONE) {
1196 1258                  DMU_USERUSED_DNODE(os)->dn_zio = zio;
1197 1259                  dnode_sync(DMU_USERUSED_DNODE(os), tx);
1198 1260                  DMU_GROUPUSED_DNODE(os)->dn_zio = zio;
1199 1261                  dnode_sync(DMU_GROUPUSED_DNODE(os), tx);
1200 1262          }
1201 1263  
1202 1264          txgoff = tx->tx_txg & TXG_MASK;
1203 1265  
1204 1266          if (dmu_objset_userused_enabled(os)) {
1205 1267                  newlist = &os->os_synced_dnodes;
1206 1268                  /*
1207 1269                   * We must create the list here because it uses the
1208 1270                   * dn_dirty_link[] of this txg.
1209 1271                   */
1210 1272                  list_create(newlist, sizeof (dnode_t),
1211 1273                      offsetof(dnode_t, dn_dirty_link[txgoff]));
1212 1274          }
1213 1275  
1214 1276          dmu_objset_sync_dnodes(&os->os_free_dnodes[txgoff], newlist, tx);
1215 1277          dmu_objset_sync_dnodes(&os->os_dirty_dnodes[txgoff], newlist, tx);
1216 1278  
1217 1279          list = &DMU_META_DNODE(os)->dn_dirty_records[txgoff];
1218 1280          while (dr = list_head(list)) {
1219 1281                  ASSERT(dr->dr_dbuf->db_level == 0);
1220 1282                  list_remove(list, dr);
1221 1283                  if (dr->dr_zio)
1222 1284                          zio_nowait(dr->dr_zio);
1223 1285          }
1224 1286          /*
1225 1287           * Free intent log blocks up to this tx.
1226 1288           */
1227 1289          zil_sync(os->os_zil, tx);
1228 1290          os->os_phys->os_zil_header = os->os_zil_header;
1229 1291          zio_nowait(zio);
1230 1292  }
1231 1293  
1232 1294  boolean_t
1233 1295  dmu_objset_is_dirty(objset_t *os, uint64_t txg)
1234 1296  {
1235 1297          return (!list_is_empty(&os->os_dirty_dnodes[txg & TXG_MASK]) ||
1236 1298              !list_is_empty(&os->os_free_dnodes[txg & TXG_MASK]));
1237 1299  }
1238 1300  
1239 1301  static objset_used_cb_t *used_cbs[DMU_OST_NUMTYPES];
1240 1302  
1241 1303  void
1242 1304  dmu_objset_register_type(dmu_objset_type_t ost, objset_used_cb_t *cb)
1243 1305  {
1244 1306          used_cbs[ost] = cb;
1245 1307  }
1246 1308  
1247 1309  boolean_t
1248 1310  dmu_objset_userused_enabled(objset_t *os)
1249 1311  {
1250 1312          return (spa_version(os->os_spa) >= SPA_VERSION_USERSPACE &&
1251 1313              used_cbs[os->os_phys->os_type] != NULL &&
1252 1314              DMU_USERUSED_DNODE(os) != NULL);
1253 1315  }
1254 1316  
1255 1317  static void
1256 1318  do_userquota_update(objset_t *os, uint64_t used, uint64_t flags,
1257 1319      uint64_t user, uint64_t group, boolean_t subtract, dmu_tx_t *tx)
1258 1320  {
1259 1321          if ((flags & DNODE_FLAG_USERUSED_ACCOUNTED)) {
1260 1322                  int64_t delta = DNODE_SIZE + used;
1261 1323                  if (subtract)
1262 1324                          delta = -delta;
1263 1325                  VERIFY3U(0, ==, zap_increment_int(os, DMU_USERUSED_OBJECT,
1264 1326                      user, delta, tx));
1265 1327                  VERIFY3U(0, ==, zap_increment_int(os, DMU_GROUPUSED_OBJECT,
1266 1328                      group, delta, tx));
1267 1329          }
1268 1330  }
1269 1331  
1270 1332  void
1271 1333  dmu_objset_do_userquota_updates(objset_t *os, dmu_tx_t *tx)
1272 1334  {
1273 1335          dnode_t *dn;
1274 1336          list_t *list = &os->os_synced_dnodes;
1275 1337  
1276 1338          ASSERT(list_head(list) == NULL || dmu_objset_userused_enabled(os));
1277 1339  
1278 1340          while (dn = list_head(list)) {
1279 1341                  int flags;
1280 1342                  ASSERT(!DMU_OBJECT_IS_SPECIAL(dn->dn_object));
1281 1343                  ASSERT(dn->dn_phys->dn_type == DMU_OT_NONE ||
1282 1344                      dn->dn_phys->dn_flags &
1283 1345                      DNODE_FLAG_USERUSED_ACCOUNTED);
1284 1346  
1285 1347                  /* Allocate the user/groupused objects if necessary. */
1286 1348                  if (DMU_USERUSED_DNODE(os)->dn_type == DMU_OT_NONE) {
1287 1349                          VERIFY(0 == zap_create_claim(os,
1288 1350                              DMU_USERUSED_OBJECT,
1289 1351                              DMU_OT_USERGROUP_USED, DMU_OT_NONE, 0, tx));
1290 1352                          VERIFY(0 == zap_create_claim(os,
1291 1353                              DMU_GROUPUSED_OBJECT,
1292 1354                              DMU_OT_USERGROUP_USED, DMU_OT_NONE, 0, tx));
1293 1355                  }
1294 1356  
1295 1357                  /*
1296 1358                   * We intentionally modify the zap object even if the
1297 1359                   * net delta is zero.  Otherwise
1298 1360                   * the block of the zap obj could be shared between
1299 1361                   * datasets but need to be different between them after
1300 1362                   * a bprewrite.
1301 1363                   */
1302 1364  
1303 1365                  flags = dn->dn_id_flags;
1304 1366                  ASSERT(flags);
1305 1367                  if (flags & DN_ID_OLD_EXIST)  {
1306 1368                          do_userquota_update(os, dn->dn_oldused, dn->dn_oldflags,
1307 1369                              dn->dn_olduid, dn->dn_oldgid, B_TRUE, tx);
1308 1370                  }
1309 1371                  if (flags & DN_ID_NEW_EXIST) {
1310 1372                          do_userquota_update(os, DN_USED_BYTES(dn->dn_phys),
1311 1373                              dn->dn_phys->dn_flags,  dn->dn_newuid,
1312 1374                              dn->dn_newgid, B_FALSE, tx);
1313 1375                  }
1314 1376  
1315 1377                  mutex_enter(&dn->dn_mtx);
1316 1378                  dn->dn_oldused = 0;
1317 1379                  dn->dn_oldflags = 0;
1318 1380                  if (dn->dn_id_flags & DN_ID_NEW_EXIST) {
1319 1381                          dn->dn_olduid = dn->dn_newuid;
1320 1382                          dn->dn_oldgid = dn->dn_newgid;
1321 1383                          dn->dn_id_flags |= DN_ID_OLD_EXIST;
1322 1384                          if (dn->dn_bonuslen == 0)
1323 1385                                  dn->dn_id_flags |= DN_ID_CHKED_SPILL;
1324 1386                          else
1325 1387                                  dn->dn_id_flags |= DN_ID_CHKED_BONUS;
1326 1388                  }
1327 1389                  dn->dn_id_flags &= ~(DN_ID_NEW_EXIST);
1328 1390                  mutex_exit(&dn->dn_mtx);
1329 1391  
1330 1392                  list_remove(list, dn);
1331 1393                  dnode_rele(dn, list);
1332 1394          }
1333 1395  }
1334 1396  
1335 1397  /*
1336 1398   * Returns a pointer to data to find uid/gid from
1337 1399   *
1338 1400   * If a dirty record for transaction group that is syncing can't
1339 1401   * be found then NULL is returned.  In the NULL case it is assumed
1340 1402   * the uid/gid aren't changing.
1341 1403   */
1342 1404  static void *
1343 1405  dmu_objset_userquota_find_data(dmu_buf_impl_t *db, dmu_tx_t *tx)
1344 1406  {
1345 1407          dbuf_dirty_record_t *dr, **drp;
1346 1408          void *data;
1347 1409  
1348 1410          if (db->db_dirtycnt == 0)
1349 1411                  return (db->db.db_data);  /* Nothing is changing */
1350 1412  
1351 1413          for (drp = &db->db_last_dirty; (dr = *drp) != NULL; drp = &dr->dr_next)
1352 1414                  if (dr->dr_txg == tx->tx_txg)
1353 1415                          break;
1354 1416  
1355 1417          if (dr == NULL) {
1356 1418                  data = NULL;
1357 1419          } else {
1358 1420                  dnode_t *dn;
1359 1421  
1360 1422                  DB_DNODE_ENTER(dr->dr_dbuf);
1361 1423                  dn = DB_DNODE(dr->dr_dbuf);
1362 1424  
1363 1425                  if (dn->dn_bonuslen == 0 &&
1364 1426                      dr->dr_dbuf->db_blkid == DMU_SPILL_BLKID)
1365 1427                          data = dr->dt.dl.dr_data->b_data;
1366 1428                  else
1367 1429                          data = dr->dt.dl.dr_data;
1368 1430  
1369 1431                  DB_DNODE_EXIT(dr->dr_dbuf);
1370 1432          }
1371 1433  
1372 1434          return (data);
1373 1435  }
1374 1436  
1375 1437  void
1376 1438  dmu_objset_userquota_get_ids(dnode_t *dn, boolean_t before, dmu_tx_t *tx)
1377 1439  {
1378 1440          objset_t *os = dn->dn_objset;
1379 1441          void *data = NULL;
1380 1442          dmu_buf_impl_t *db = NULL;
1381 1443          uint64_t *user, *group;
1382 1444          int flags = dn->dn_id_flags;
1383 1445          int error;
1384 1446          boolean_t have_spill = B_FALSE;
1385 1447  
1386 1448          if (!dmu_objset_userused_enabled(dn->dn_objset))
1387 1449                  return;
1388 1450  
1389 1451          if (before && (flags & (DN_ID_CHKED_BONUS|DN_ID_OLD_EXIST|
1390 1452              DN_ID_CHKED_SPILL)))
1391 1453                  return;
1392 1454  
1393 1455          if (before && dn->dn_bonuslen != 0)
1394 1456                  data = DN_BONUS(dn->dn_phys);
1395 1457          else if (!before && dn->dn_bonuslen != 0) {
1396 1458                  if (dn->dn_bonus) {
1397 1459                          db = dn->dn_bonus;
1398 1460                          mutex_enter(&db->db_mtx);
1399 1461                          data = dmu_objset_userquota_find_data(db, tx);
1400 1462                  } else {
1401 1463                          data = DN_BONUS(dn->dn_phys);
1402 1464                  }
1403 1465          } else if (dn->dn_bonuslen == 0 && dn->dn_bonustype == DMU_OT_SA) {
1404 1466                          int rf = 0;
1405 1467  
1406 1468                          if (RW_WRITE_HELD(&dn->dn_struct_rwlock))
1407 1469                                  rf |= DB_RF_HAVESTRUCT;
1408 1470                          error = dmu_spill_hold_by_dnode(dn,
1409 1471                              rf | DB_RF_MUST_SUCCEED,
1410 1472                              FTAG, (dmu_buf_t **)&db);
1411 1473                          ASSERT(error == 0);
1412 1474                          mutex_enter(&db->db_mtx);
1413 1475                          data = (before) ? db->db.db_data :
1414 1476                              dmu_objset_userquota_find_data(db, tx);
1415 1477                          have_spill = B_TRUE;
1416 1478          } else {
1417 1479                  mutex_enter(&dn->dn_mtx);
1418 1480                  dn->dn_id_flags |= DN_ID_CHKED_BONUS;
1419 1481                  mutex_exit(&dn->dn_mtx);
1420 1482                  return;
1421 1483          }
1422 1484  
1423 1485          if (before) {
1424 1486                  ASSERT(data);
1425 1487                  user = &dn->dn_olduid;
1426 1488                  group = &dn->dn_oldgid;
1427 1489          } else if (data) {
1428 1490                  user = &dn->dn_newuid;
1429 1491                  group = &dn->dn_newgid;
1430 1492          }
1431 1493  
1432 1494          /*
1433 1495           * Must always call the callback in case the object
1434 1496           * type has changed and that type isn't an object type to track
1435 1497           */
1436 1498          error = used_cbs[os->os_phys->os_type](dn->dn_bonustype, data,
1437 1499              user, group);
1438 1500  
1439 1501          /*
1440 1502           * Preserve existing uid/gid when the callback can't determine
1441 1503           * what the new uid/gid are and the callback returned EEXIST.
1442 1504           * The EEXIST error tells us to just use the existing uid/gid.
1443 1505           * If we don't know what the old values are then just assign
1444 1506           * them to 0, since that is a new file  being created.
1445 1507           */
1446 1508          if (!before && data == NULL && error == EEXIST) {
1447 1509                  if (flags & DN_ID_OLD_EXIST) {
1448 1510                          dn->dn_newuid = dn->dn_olduid;
1449 1511                          dn->dn_newgid = dn->dn_oldgid;
1450 1512                  } else {
1451 1513                          dn->dn_newuid = 0;
1452 1514                          dn->dn_newgid = 0;
1453 1515                  }
1454 1516                  error = 0;
1455 1517          }
1456 1518  
1457 1519          if (db)
1458 1520                  mutex_exit(&db->db_mtx);
1459 1521  
1460 1522          mutex_enter(&dn->dn_mtx);
1461 1523          if (error == 0 && before)
1462 1524                  dn->dn_id_flags |= DN_ID_OLD_EXIST;
1463 1525          if (error == 0 && !before)
1464 1526                  dn->dn_id_flags |= DN_ID_NEW_EXIST;
1465 1527  
1466 1528          if (have_spill) {
1467 1529                  dn->dn_id_flags |= DN_ID_CHKED_SPILL;
1468 1530          } else {
1469 1531                  dn->dn_id_flags |= DN_ID_CHKED_BONUS;
1470 1532          }
1471 1533          mutex_exit(&dn->dn_mtx);
1472 1534          if (have_spill)
1473 1535                  dmu_buf_rele((dmu_buf_t *)db, FTAG);
1474 1536  }
1475 1537  
1476 1538  boolean_t
1477 1539  dmu_objset_userspace_present(objset_t *os)
1478 1540  {
1479 1541          return (os->os_phys->os_flags &
1480 1542              OBJSET_FLAG_USERACCOUNTING_COMPLETE);
1481 1543  }
1482 1544  
1483 1545  int
1484 1546  dmu_objset_userspace_upgrade(objset_t *os)
1485 1547  {
1486 1548          uint64_t obj;
1487 1549          int err = 0;
1488 1550  
1489 1551          if (dmu_objset_userspace_present(os))
1490 1552                  return (0);
1491 1553          if (!dmu_objset_userused_enabled(os))
1492 1554                  return (ENOTSUP);
1493 1555          if (dmu_objset_is_snapshot(os))
1494 1556                  return (EINVAL);
1495 1557  
1496 1558          /*
1497 1559           * We simply need to mark every object dirty, so that it will be
1498 1560           * synced out and now accounted.  If this is called
1499 1561           * concurrently, or if we already did some work before crashing,
1500 1562           * that's fine, since we track each object's accounted state
1501 1563           * independently.
1502 1564           */
1503 1565  
1504 1566          for (obj = 0; err == 0; err = dmu_object_next(os, &obj, FALSE, 0)) {
1505 1567                  dmu_tx_t *tx;
1506 1568                  dmu_buf_t *db;
1507 1569                  int objerr;
1508 1570  
1509 1571                  if (issig(JUSTLOOKING) && issig(FORREAL))
1510 1572                          return (EINTR);
1511 1573  
1512 1574                  objerr = dmu_bonus_hold(os, obj, FTAG, &db);
1513 1575                  if (objerr)
1514 1576                          continue;
1515 1577                  tx = dmu_tx_create(os);
1516 1578                  dmu_tx_hold_bonus(tx, obj);
1517 1579                  objerr = dmu_tx_assign(tx, TXG_WAIT);
1518 1580                  if (objerr) {
1519 1581                          dmu_tx_abort(tx);
1520 1582                          continue;
1521 1583                  }
1522 1584                  dmu_buf_will_dirty(db, tx);
1523 1585                  dmu_buf_rele(db, FTAG);
1524 1586                  dmu_tx_commit(tx);
1525 1587          }
1526 1588  
1527 1589          os->os_flags |= OBJSET_FLAG_USERACCOUNTING_COMPLETE;
1528 1590          txg_wait_synced(dmu_objset_pool(os), 0);
1529 1591          return (0);
1530 1592  }
1531 1593  
1532 1594  void
1533 1595  dmu_objset_space(objset_t *os, uint64_t *refdbytesp, uint64_t *availbytesp,
1534 1596      uint64_t *usedobjsp, uint64_t *availobjsp)
1535 1597  {
1536 1598          dsl_dataset_space(os->os_dsl_dataset, refdbytesp, availbytesp,
1537 1599              usedobjsp, availobjsp);
1538 1600  }
1539 1601  
1540 1602  uint64_t
1541 1603  dmu_objset_fsid_guid(objset_t *os)
1542 1604  {
1543 1605          return (dsl_dataset_fsid_guid(os->os_dsl_dataset));
1544 1606  }
1545 1607  
1546 1608  void
1547 1609  dmu_objset_fast_stat(objset_t *os, dmu_objset_stats_t *stat)
1548 1610  {
1549 1611          stat->dds_type = os->os_phys->os_type;
1550 1612          if (os->os_dsl_dataset)
1551 1613                  dsl_dataset_fast_stat(os->os_dsl_dataset, stat);
1552 1614  }
1553 1615  
1554 1616  void
1555 1617  dmu_objset_stats(objset_t *os, nvlist_t *nv)
1556 1618  {
1557 1619          ASSERT(os->os_dsl_dataset ||
1558 1620              os->os_phys->os_type == DMU_OST_META);
1559 1621  
1560 1622          if (os->os_dsl_dataset != NULL)
1561 1623                  dsl_dataset_stats(os->os_dsl_dataset, nv);
1562 1624  
1563 1625          dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_TYPE,
1564 1626              os->os_phys->os_type);
1565 1627          dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USERACCOUNTING,
1566 1628              dmu_objset_userspace_present(os));
1567 1629  }
1568 1630  
1569 1631  int
1570 1632  dmu_objset_is_snapshot(objset_t *os)
1571 1633  {
1572 1634          if (os->os_dsl_dataset != NULL)
1573 1635                  return (dsl_dataset_is_snapshot(os->os_dsl_dataset));
1574 1636          else
1575 1637                  return (B_FALSE);
1576 1638  }
1577 1639  
1578 1640  int
1579 1641  dmu_snapshot_realname(objset_t *os, char *name, char *real, int maxlen,
1580 1642      boolean_t *conflict)
1581 1643  {
1582 1644          dsl_dataset_t *ds = os->os_dsl_dataset;
1583 1645          uint64_t ignored;
1584 1646  
1585 1647          if (ds->ds_phys->ds_snapnames_zapobj == 0)
1586 1648                  return (ENOENT);
1587 1649  
1588 1650          return (zap_lookup_norm(ds->ds_dir->dd_pool->dp_meta_objset,
1589 1651              ds->ds_phys->ds_snapnames_zapobj, name, 8, 1, &ignored, MT_FIRST,
1590 1652              real, maxlen, conflict));
1591 1653  }
1592 1654  
1593 1655  int
1594 1656  dmu_snapshot_list_next(objset_t *os, int namelen, char *name,
1595 1657      uint64_t *idp, uint64_t *offp, boolean_t *case_conflict)
1596 1658  {
1597 1659          dsl_dataset_t *ds = os->os_dsl_dataset;
1598 1660          zap_cursor_t cursor;
1599 1661          zap_attribute_t attr;
1600 1662  
1601 1663          if (ds->ds_phys->ds_snapnames_zapobj == 0)
1602 1664                  return (ENOENT);
1603 1665  
1604 1666          zap_cursor_init_serialized(&cursor,
1605 1667              ds->ds_dir->dd_pool->dp_meta_objset,
1606 1668              ds->ds_phys->ds_snapnames_zapobj, *offp);
1607 1669  
1608 1670          if (zap_cursor_retrieve(&cursor, &attr) != 0) {
1609 1671                  zap_cursor_fini(&cursor);
1610 1672                  return (ENOENT);
1611 1673          }
1612 1674  
1613 1675          if (strlen(attr.za_name) + 1 > namelen) {
1614 1676                  zap_cursor_fini(&cursor);
1615 1677                  return (ENAMETOOLONG);
1616 1678          }
1617 1679  
1618 1680          (void) strcpy(name, attr.za_name);
1619 1681          if (idp)
1620 1682                  *idp = attr.za_first_integer;
1621 1683          if (case_conflict)
1622 1684                  *case_conflict = attr.za_normalization_conflict;
1623 1685          zap_cursor_advance(&cursor);
1624 1686          *offp = zap_cursor_serialize(&cursor);
1625 1687          zap_cursor_fini(&cursor);
1626 1688  
1627 1689          return (0);
1628 1690  }
1629 1691  
1630 1692  int
1631 1693  dmu_dir_list_next(objset_t *os, int namelen, char *name,
1632 1694      uint64_t *idp, uint64_t *offp)
1633 1695  {
1634 1696          dsl_dir_t *dd = os->os_dsl_dataset->ds_dir;
1635 1697          zap_cursor_t cursor;
1636 1698          zap_attribute_t attr;
1637 1699  
1638 1700          /* there is no next dir on a snapshot! */
1639 1701          if (os->os_dsl_dataset->ds_object !=
1640 1702              dd->dd_phys->dd_head_dataset_obj)
1641 1703                  return (ENOENT);
1642 1704  
1643 1705          zap_cursor_init_serialized(&cursor,
1644 1706              dd->dd_pool->dp_meta_objset,
1645 1707              dd->dd_phys->dd_child_dir_zapobj, *offp);
1646 1708  
1647 1709          if (zap_cursor_retrieve(&cursor, &attr) != 0) {
1648 1710                  zap_cursor_fini(&cursor);
1649 1711                  return (ENOENT);
1650 1712          }
1651 1713  
1652 1714          if (strlen(attr.za_name) + 1 > namelen) {
1653 1715                  zap_cursor_fini(&cursor);
1654 1716                  return (ENAMETOOLONG);
1655 1717          }
1656 1718  
1657 1719          (void) strcpy(name, attr.za_name);
1658 1720          if (idp)
1659 1721                  *idp = attr.za_first_integer;
1660 1722          zap_cursor_advance(&cursor);
1661 1723          *offp = zap_cursor_serialize(&cursor);
1662 1724          zap_cursor_fini(&cursor);
1663 1725  
1664 1726          return (0);
1665 1727  }
1666 1728  
1667 1729  struct findarg {
1668 1730          int (*func)(const char *, void *);
1669 1731          void *arg;
1670 1732  };
1671 1733  
1672 1734  /* ARGSUSED */
1673 1735  static int
1674 1736  findfunc(spa_t *spa, uint64_t dsobj, const char *dsname, void *arg)
1675 1737  {
1676 1738          struct findarg *fa = arg;
1677 1739          return (fa->func(dsname, fa->arg));
1678 1740  }
1679 1741  
1680 1742  /*
1681 1743   * Find all objsets under name, and for each, call 'func(child_name, arg)'.
1682 1744   * Perhaps change all callers to use dmu_objset_find_spa()?
1683 1745   */
1684 1746  int
1685 1747  dmu_objset_find(char *name, int func(const char *, void *), void *arg,
1686 1748      int flags)
1687 1749  {
1688 1750          struct findarg fa;
1689 1751          fa.func = func;
1690 1752          fa.arg = arg;
1691 1753          return (dmu_objset_find_spa(NULL, name, findfunc, &fa, flags));
1692 1754  }
1693 1755  
1694 1756  /*
1695 1757   * Find all objsets under name, call func on each
1696 1758   */
1697 1759  int
1698 1760  dmu_objset_find_spa(spa_t *spa, const char *name,
1699 1761      int func(spa_t *, uint64_t, const char *, void *), void *arg, int flags)
1700 1762  {
1701 1763          dsl_dir_t *dd;
1702 1764          dsl_pool_t *dp;
1703 1765          dsl_dataset_t *ds;
1704 1766          zap_cursor_t zc;
1705 1767          zap_attribute_t *attr;
1706 1768          char *child;
1707 1769          uint64_t thisobj;
1708 1770          int err;
1709 1771  
1710 1772          if (name == NULL)
1711 1773                  name = spa_name(spa);
1712 1774          err = dsl_dir_open_spa(spa, name, FTAG, &dd, NULL);
1713 1775          if (err)
1714 1776                  return (err);
1715 1777  
1716 1778          /* Don't visit hidden ($MOS & $ORIGIN) objsets. */
1717 1779          if (dd->dd_myname[0] == '$') {
1718 1780                  dsl_dir_close(dd, FTAG);
1719 1781                  return (0);
1720 1782          }
1721 1783  
1722 1784          thisobj = dd->dd_phys->dd_head_dataset_obj;
1723 1785          attr = kmem_alloc(sizeof (zap_attribute_t), KM_SLEEP);
1724 1786          dp = dd->dd_pool;
1725 1787  
1726 1788          /*
1727 1789           * Iterate over all children.
1728 1790           */
1729 1791          if (flags & DS_FIND_CHILDREN) {
1730 1792                  for (zap_cursor_init(&zc, dp->dp_meta_objset,
1731 1793                      dd->dd_phys->dd_child_dir_zapobj);
1732 1794                      zap_cursor_retrieve(&zc, attr) == 0;
1733 1795                      (void) zap_cursor_advance(&zc)) {
1734 1796                          ASSERT(attr->za_integer_length == sizeof (uint64_t));
1735 1797                          ASSERT(attr->za_num_integers == 1);
1736 1798  
1737 1799                          child = kmem_asprintf("%s/%s", name, attr->za_name);
1738 1800                          err = dmu_objset_find_spa(spa, child, func, arg, flags);
1739 1801                          strfree(child);
1740 1802                          if (err)
1741 1803                                  break;
1742 1804                  }
1743 1805                  zap_cursor_fini(&zc);
1744 1806  
1745 1807                  if (err) {
1746 1808                          dsl_dir_close(dd, FTAG);
1747 1809                          kmem_free(attr, sizeof (zap_attribute_t));
1748 1810                          return (err);
1749 1811                  }
1750 1812          }
1751 1813  
1752 1814          /*
1753 1815           * Iterate over all snapshots.
1754 1816           */
1755 1817          if (flags & DS_FIND_SNAPSHOTS) {
1756 1818                  if (!dsl_pool_sync_context(dp))
1757 1819                          rw_enter(&dp->dp_config_rwlock, RW_READER);
1758 1820                  err = dsl_dataset_hold_obj(dp, thisobj, FTAG, &ds);
1759 1821                  if (!dsl_pool_sync_context(dp))
1760 1822                          rw_exit(&dp->dp_config_rwlock);
1761 1823  
1762 1824                  if (err == 0) {
1763 1825                          uint64_t snapobj = ds->ds_phys->ds_snapnames_zapobj;
1764 1826                          dsl_dataset_rele(ds, FTAG);
1765 1827  
1766 1828                          for (zap_cursor_init(&zc, dp->dp_meta_objset, snapobj);
1767 1829                              zap_cursor_retrieve(&zc, attr) == 0;
1768 1830                              (void) zap_cursor_advance(&zc)) {
1769 1831                                  ASSERT(attr->za_integer_length ==
1770 1832                                      sizeof (uint64_t));
1771 1833                                  ASSERT(attr->za_num_integers == 1);
1772 1834  
1773 1835                                  child = kmem_asprintf("%s@%s",
1774 1836                                      name, attr->za_name);
1775 1837                                  err = func(spa, attr->za_first_integer,
1776 1838                                      child, arg);
1777 1839                                  strfree(child);
1778 1840                                  if (err)
1779 1841                                          break;
1780 1842                          }
1781 1843                          zap_cursor_fini(&zc);
1782 1844                  }
1783 1845          }
1784 1846  
1785 1847          dsl_dir_close(dd, FTAG);
1786 1848          kmem_free(attr, sizeof (zap_attribute_t));
1787 1849  
1788 1850          if (err)
1789 1851                  return (err);
1790 1852  
1791 1853          /*
1792 1854           * Apply to self if appropriate.
1793 1855           */
1794 1856          err = func(spa, thisobj, name, arg);
1795 1857          return (err);
1796 1858  }
1797 1859  
1798 1860  /* ARGSUSED */
1799 1861  int
1800 1862  dmu_objset_prefetch(const char *name, void *arg)
1801 1863  {
1802 1864          dsl_dataset_t *ds;
1803 1865  
1804 1866          if (dsl_dataset_hold(name, FTAG, &ds))
1805 1867                  return (0);
1806 1868  
1807 1869          if (!BP_IS_HOLE(&ds->ds_phys->ds_bp)) {
1808 1870                  mutex_enter(&ds->ds_opening_lock);
1809 1871                  if (ds->ds_objset == NULL) {
1810 1872                          uint32_t aflags = ARC_NOWAIT | ARC_PREFETCH;
1811 1873                          zbookmark_t zb;
1812 1874  
1813 1875                          SET_BOOKMARK(&zb, ds->ds_object, ZB_ROOT_OBJECT,
1814 1876                              ZB_ROOT_LEVEL, ZB_ROOT_BLKID);
1815 1877  
1816 1878                          (void) dsl_read_nolock(NULL, dsl_dataset_get_spa(ds),
1817 1879                              &ds->ds_phys->ds_bp, NULL, NULL,
1818 1880                              ZIO_PRIORITY_ASYNC_READ,
1819 1881                              ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE,
1820 1882                              &aflags, &zb);
1821 1883                  }
1822 1884                  mutex_exit(&ds->ds_opening_lock);
1823 1885          }
1824 1886  
1825 1887          dsl_dataset_rele(ds, FTAG);
1826 1888          return (0);
1827 1889  }
1828 1890  
1829 1891  void
1830 1892  dmu_objset_set_user(objset_t *os, void *user_ptr)
1831 1893  {
1832 1894          ASSERT(MUTEX_HELD(&os->os_user_ptr_lock));
1833 1895          os->os_user_ptr = user_ptr;
1834 1896  }
1835 1897  
1836 1898  void *
1837 1899  dmu_objset_get_user(objset_t *os)
1838 1900  {
1839 1901          ASSERT(MUTEX_HELD(&os->os_user_ptr_lock));
1840 1902          return (os->os_user_ptr);
1841 1903  }
  
    | ↓ open down ↓ | 778 lines elided | ↑ open up ↑ | 
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX