Print this page
    
3006 VERIFY[S,U,P] and ASSERT[S,U,P] frequently check if first argument is zero
    
      
        | Split | Close | 
      | Expand all | 
      | Collapse all | 
    
    
          --- old/usr/src/uts/common/fs/zfs/dmu_objset.c
          +++ new/usr/src/uts/common/fs/zfs/dmu_objset.c
   1    1  /*
   2    2   * CDDL HEADER START
   3    3   *
   4    4   * The contents of this file are subject to the terms of the
   5    5   * Common Development and Distribution License (the "License").
   6    6   * You may not use this file except in compliance with the License.
   7    7   *
   8    8   * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9    9   * or http://www.opensolaris.org/os/licensing.
  10   10   * See the License for the specific language governing permissions
  11   11   * and limitations under the License.
  12   12   *
  13   13   * When distributing Covered Code, include this CDDL HEADER in each
  14   14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  /*
  22   22   * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  23   23   * Copyright (c) 2012 by Delphix. All rights reserved.
  24   24   */
  25   25  
  26   26  /* Portions Copyright 2010 Robert Milkowski */
  27   27  
  28   28  #include <sys/cred.h>
  29   29  #include <sys/zfs_context.h>
  30   30  #include <sys/dmu_objset.h>
  31   31  #include <sys/dsl_dir.h>
  32   32  #include <sys/dsl_dataset.h>
  33   33  #include <sys/dsl_prop.h>
  34   34  #include <sys/dsl_pool.h>
  35   35  #include <sys/dsl_synctask.h>
  36   36  #include <sys/dsl_deleg.h>
  37   37  #include <sys/dnode.h>
  38   38  #include <sys/dbuf.h>
  39   39  #include <sys/zvol.h>
  40   40  #include <sys/dmu_tx.h>
  41   41  #include <sys/zap.h>
  42   42  #include <sys/zil.h>
  43   43  #include <sys/dmu_impl.h>
  44   44  #include <sys/zfs_ioctl.h>
  45   45  #include <sys/sa.h>
  46   46  #include <sys/zfs_onexit.h>
  47   47  
  48   48  /*
  49   49   * Needed to close a window in dnode_move() that allows the objset to be freed
  50   50   * before it can be safely accessed.
  51   51   */
  52   52  krwlock_t os_lock;
  53   53  
  54   54  void
  55   55  dmu_objset_init(void)
  56   56  {
  57   57          rw_init(&os_lock, NULL, RW_DEFAULT, NULL);
  58   58  }
  59   59  
  60   60  void
  61   61  dmu_objset_fini(void)
  62   62  {
  63   63          rw_destroy(&os_lock);
  64   64  }
  65   65  
  66   66  spa_t *
  67   67  dmu_objset_spa(objset_t *os)
  68   68  {
  69   69          return (os->os_spa);
  70   70  }
  71   71  
  72   72  zilog_t *
  73   73  dmu_objset_zil(objset_t *os)
  74   74  {
  75   75          return (os->os_zil);
  76   76  }
  77   77  
  78   78  dsl_pool_t *
  79   79  dmu_objset_pool(objset_t *os)
  80   80  {
  81   81          dsl_dataset_t *ds;
  82   82  
  83   83          if ((ds = os->os_dsl_dataset) != NULL && ds->ds_dir)
  84   84                  return (ds->ds_dir->dd_pool);
  85   85          else
  86   86                  return (spa_get_dsl(os->os_spa));
  87   87  }
  88   88  
  89   89  dsl_dataset_t *
  90   90  dmu_objset_ds(objset_t *os)
  91   91  {
  92   92          return (os->os_dsl_dataset);
  93   93  }
  94   94  
  95   95  dmu_objset_type_t
  96   96  dmu_objset_type(objset_t *os)
  97   97  {
  98   98          return (os->os_phys->os_type);
  99   99  }
 100  100  
 101  101  void
 102  102  dmu_objset_name(objset_t *os, char *buf)
 103  103  {
 104  104          dsl_dataset_name(os->os_dsl_dataset, buf);
 105  105  }
 106  106  
 107  107  uint64_t
 108  108  dmu_objset_id(objset_t *os)
 109  109  {
 110  110          dsl_dataset_t *ds = os->os_dsl_dataset;
 111  111  
 112  112          return (ds ? ds->ds_object : 0);
 113  113  }
 114  114  
 115  115  uint64_t
 116  116  dmu_objset_syncprop(objset_t *os)
 117  117  {
 118  118          return (os->os_sync);
 119  119  }
 120  120  
 121  121  uint64_t
 122  122  dmu_objset_logbias(objset_t *os)
 123  123  {
 124  124          return (os->os_logbias);
 125  125  }
 126  126  
 127  127  static void
 128  128  checksum_changed_cb(void *arg, uint64_t newval)
 129  129  {
 130  130          objset_t *os = arg;
 131  131  
 132  132          /*
 133  133           * Inheritance should have been done by now.
 134  134           */
 135  135          ASSERT(newval != ZIO_CHECKSUM_INHERIT);
 136  136  
 137  137          os->os_checksum = zio_checksum_select(newval, ZIO_CHECKSUM_ON_VALUE);
 138  138  }
 139  139  
 140  140  static void
 141  141  compression_changed_cb(void *arg, uint64_t newval)
 142  142  {
 143  143          objset_t *os = arg;
 144  144  
 145  145          /*
 146  146           * Inheritance and range checking should have been done by now.
 147  147           */
 148  148          ASSERT(newval != ZIO_COMPRESS_INHERIT);
 149  149  
 150  150          os->os_compress = zio_compress_select(newval, ZIO_COMPRESS_ON_VALUE);
 151  151  }
 152  152  
 153  153  static void
 154  154  copies_changed_cb(void *arg, uint64_t newval)
 155  155  {
 156  156          objset_t *os = arg;
 157  157  
 158  158          /*
 159  159           * Inheritance and range checking should have been done by now.
 160  160           */
 161  161          ASSERT(newval > 0);
 162  162          ASSERT(newval <= spa_max_replication(os->os_spa));
 163  163  
 164  164          os->os_copies = newval;
 165  165  }
 166  166  
 167  167  static void
 168  168  dedup_changed_cb(void *arg, uint64_t newval)
 169  169  {
 170  170          objset_t *os = arg;
 171  171          spa_t *spa = os->os_spa;
 172  172          enum zio_checksum checksum;
 173  173  
 174  174          /*
 175  175           * Inheritance should have been done by now.
 176  176           */
 177  177          ASSERT(newval != ZIO_CHECKSUM_INHERIT);
 178  178  
 179  179          checksum = zio_checksum_dedup_select(spa, newval, ZIO_CHECKSUM_OFF);
 180  180  
 181  181          os->os_dedup_checksum = checksum & ZIO_CHECKSUM_MASK;
 182  182          os->os_dedup_verify = !!(checksum & ZIO_CHECKSUM_VERIFY);
 183  183  }
 184  184  
 185  185  static void
 186  186  primary_cache_changed_cb(void *arg, uint64_t newval)
 187  187  {
 188  188          objset_t *os = arg;
 189  189  
 190  190          /*
 191  191           * Inheritance and range checking should have been done by now.
 192  192           */
 193  193          ASSERT(newval == ZFS_CACHE_ALL || newval == ZFS_CACHE_NONE ||
 194  194              newval == ZFS_CACHE_METADATA);
 195  195  
 196  196          os->os_primary_cache = newval;
 197  197  }
 198  198  
 199  199  static void
 200  200  secondary_cache_changed_cb(void *arg, uint64_t newval)
 201  201  {
 202  202          objset_t *os = arg;
 203  203  
 204  204          /*
 205  205           * Inheritance and range checking should have been done by now.
 206  206           */
 207  207          ASSERT(newval == ZFS_CACHE_ALL || newval == ZFS_CACHE_NONE ||
 208  208              newval == ZFS_CACHE_METADATA);
 209  209  
 210  210          os->os_secondary_cache = newval;
 211  211  }
 212  212  
 213  213  static void
 214  214  sync_changed_cb(void *arg, uint64_t newval)
 215  215  {
 216  216          objset_t *os = arg;
 217  217  
 218  218          /*
 219  219           * Inheritance and range checking should have been done by now.
 220  220           */
 221  221          ASSERT(newval == ZFS_SYNC_STANDARD || newval == ZFS_SYNC_ALWAYS ||
 222  222              newval == ZFS_SYNC_DISABLED);
 223  223  
 224  224          os->os_sync = newval;
 225  225          if (os->os_zil)
 226  226                  zil_set_sync(os->os_zil, newval);
 227  227  }
 228  228  
 229  229  static void
 230  230  logbias_changed_cb(void *arg, uint64_t newval)
 231  231  {
 232  232          objset_t *os = arg;
 233  233  
 234  234          ASSERT(newval == ZFS_LOGBIAS_LATENCY ||
 235  235              newval == ZFS_LOGBIAS_THROUGHPUT);
 236  236          os->os_logbias = newval;
 237  237          if (os->os_zil)
 238  238                  zil_set_logbias(os->os_zil, newval);
 239  239  }
 240  240  
 241  241  void
 242  242  dmu_objset_byteswap(void *buf, size_t size)
 243  243  {
 244  244          objset_phys_t *osp = buf;
 245  245  
 246  246          ASSERT(size == OBJSET_OLD_PHYS_SIZE || size == sizeof (objset_phys_t));
 247  247          dnode_byteswap(&osp->os_meta_dnode);
 248  248          byteswap_uint64_array(&osp->os_zil_header, sizeof (zil_header_t));
 249  249          osp->os_type = BSWAP_64(osp->os_type);
 250  250          osp->os_flags = BSWAP_64(osp->os_flags);
 251  251          if (size == sizeof (objset_phys_t)) {
 252  252                  dnode_byteswap(&osp->os_userused_dnode);
 253  253                  dnode_byteswap(&osp->os_groupused_dnode);
 254  254          }
 255  255  }
 256  256  
 257  257  int
 258  258  dmu_objset_open_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp,
 259  259      objset_t **osp)
 260  260  {
 261  261          objset_t *os;
 262  262          int i, err;
 263  263  
 264  264          ASSERT(ds == NULL || MUTEX_HELD(&ds->ds_opening_lock));
 265  265  
 266  266          os = kmem_zalloc(sizeof (objset_t), KM_SLEEP);
 267  267          os->os_dsl_dataset = ds;
 268  268          os->os_spa = spa;
 269  269          os->os_rootbp = bp;
 270  270          if (!BP_IS_HOLE(os->os_rootbp)) {
 271  271                  uint32_t aflags = ARC_WAIT;
 272  272                  zbookmark_t zb;
 273  273                  SET_BOOKMARK(&zb, ds ? ds->ds_object : DMU_META_OBJSET,
 274  274                      ZB_ROOT_OBJECT, ZB_ROOT_LEVEL, ZB_ROOT_BLKID);
 275  275  
 276  276                  if (DMU_OS_IS_L2CACHEABLE(os))
 277  277                          aflags |= ARC_L2CACHE;
 278  278  
 279  279                  dprintf_bp(os->os_rootbp, "reading %s", "");
 280  280                  /*
 281  281                   * XXX when bprewrite scrub can change the bp,
 282  282                   * and this is called from dmu_objset_open_ds_os, the bp
 283  283                   * could change, and we'll need a lock.
 284  284                   */
 285  285                  err = dsl_read_nolock(NULL, spa, os->os_rootbp,
 286  286                      arc_getbuf_func, &os->os_phys_buf,
 287  287                      ZIO_PRIORITY_SYNC_READ, ZIO_FLAG_CANFAIL, &aflags, &zb);
 288  288                  if (err) {
 289  289                          kmem_free(os, sizeof (objset_t));
 290  290                          /* convert checksum errors into IO errors */
 291  291                          if (err == ECKSUM)
 292  292                                  err = EIO;
 293  293                          return (err);
 294  294                  }
 295  295  
 296  296                  /* Increase the blocksize if we are permitted. */
 297  297                  if (spa_version(spa) >= SPA_VERSION_USERSPACE &&
 298  298                      arc_buf_size(os->os_phys_buf) < sizeof (objset_phys_t)) {
 299  299                          arc_buf_t *buf = arc_buf_alloc(spa,
 300  300                              sizeof (objset_phys_t), &os->os_phys_buf,
 301  301                              ARC_BUFC_METADATA);
 302  302                          bzero(buf->b_data, sizeof (objset_phys_t));
 303  303                          bcopy(os->os_phys_buf->b_data, buf->b_data,
 304  304                              arc_buf_size(os->os_phys_buf));
 305  305                          (void) arc_buf_remove_ref(os->os_phys_buf,
 306  306                              &os->os_phys_buf);
 307  307                          os->os_phys_buf = buf;
 308  308                  }
 309  309  
 310  310                  os->os_phys = os->os_phys_buf->b_data;
 311  311                  os->os_flags = os->os_phys->os_flags;
 312  312          } else {
 313  313                  int size = spa_version(spa) >= SPA_VERSION_USERSPACE ?
 314  314                      sizeof (objset_phys_t) : OBJSET_OLD_PHYS_SIZE;
 315  315                  os->os_phys_buf = arc_buf_alloc(spa, size,
 316  316                      &os->os_phys_buf, ARC_BUFC_METADATA);
 317  317                  os->os_phys = os->os_phys_buf->b_data;
 318  318                  bzero(os->os_phys, size);
 319  319          }
 320  320  
 321  321          /*
 322  322           * Note: the changed_cb will be called once before the register
 323  323           * func returns, thus changing the checksum/compression from the
 324  324           * default (fletcher2/off).  Snapshots don't need to know about
 325  325           * checksum/compression/copies.
 326  326           */
 327  327          if (ds) {
 328  328                  err = dsl_prop_register(ds, "primarycache",
 329  329                      primary_cache_changed_cb, os);
 330  330                  if (err == 0)
 331  331                          err = dsl_prop_register(ds, "secondarycache",
 332  332                              secondary_cache_changed_cb, os);
 333  333                  if (!dsl_dataset_is_snapshot(ds)) {
 334  334                          if (err == 0)
 335  335                                  err = dsl_prop_register(ds, "checksum",
 336  336                                      checksum_changed_cb, os);
 337  337                          if (err == 0)
 338  338                                  err = dsl_prop_register(ds, "compression",
 339  339                                      compression_changed_cb, os);
 340  340                          if (err == 0)
 341  341                                  err = dsl_prop_register(ds, "copies",
 342  342                                      copies_changed_cb, os);
 343  343                          if (err == 0)
 344  344                                  err = dsl_prop_register(ds, "dedup",
 345  345                                      dedup_changed_cb, os);
 346  346                          if (err == 0)
 347  347                                  err = dsl_prop_register(ds, "logbias",
 348  348                                      logbias_changed_cb, os);
 349  349                          if (err == 0)
 350  350                                  err = dsl_prop_register(ds, "sync",
 351  351                                      sync_changed_cb, os);
 352  352                  }
 353  353                  if (err) {
 354  354                          VERIFY(arc_buf_remove_ref(os->os_phys_buf,
 355  355                              &os->os_phys_buf) == 1);
 356  356                          kmem_free(os, sizeof (objset_t));
 357  357                          return (err);
 358  358                  }
 359  359          } else if (ds == NULL) {
 360  360                  /* It's the meta-objset. */
 361  361                  os->os_checksum = ZIO_CHECKSUM_FLETCHER_4;
 362  362                  os->os_compress = ZIO_COMPRESS_LZJB;
 363  363                  os->os_copies = spa_max_replication(spa);
 364  364                  os->os_dedup_checksum = ZIO_CHECKSUM_OFF;
 365  365                  os->os_dedup_verify = 0;
 366  366                  os->os_logbias = 0;
 367  367                  os->os_sync = 0;
 368  368                  os->os_primary_cache = ZFS_CACHE_ALL;
 369  369                  os->os_secondary_cache = ZFS_CACHE_ALL;
 370  370          }
 371  371  
 372  372          if (ds == NULL || !dsl_dataset_is_snapshot(ds))
 373  373                  os->os_zil_header = os->os_phys->os_zil_header;
 374  374          os->os_zil = zil_alloc(os, &os->os_zil_header);
 375  375  
 376  376          for (i = 0; i < TXG_SIZE; i++) {
 377  377                  list_create(&os->os_dirty_dnodes[i], sizeof (dnode_t),
 378  378                      offsetof(dnode_t, dn_dirty_link[i]));
 379  379                  list_create(&os->os_free_dnodes[i], sizeof (dnode_t),
 380  380                      offsetof(dnode_t, dn_dirty_link[i]));
 381  381          }
 382  382          list_create(&os->os_dnodes, sizeof (dnode_t),
 383  383              offsetof(dnode_t, dn_link));
 384  384          list_create(&os->os_downgraded_dbufs, sizeof (dmu_buf_impl_t),
 385  385              offsetof(dmu_buf_impl_t, db_link));
 386  386  
 387  387          mutex_init(&os->os_lock, NULL, MUTEX_DEFAULT, NULL);
 388  388          mutex_init(&os->os_obj_lock, NULL, MUTEX_DEFAULT, NULL);
 389  389          mutex_init(&os->os_user_ptr_lock, NULL, MUTEX_DEFAULT, NULL);
 390  390  
 391  391          DMU_META_DNODE(os) = dnode_special_open(os,
 392  392              &os->os_phys->os_meta_dnode, DMU_META_DNODE_OBJECT,
 393  393              &os->os_meta_dnode);
 394  394          if (arc_buf_size(os->os_phys_buf) >= sizeof (objset_phys_t)) {
 395  395                  DMU_USERUSED_DNODE(os) = dnode_special_open(os,
 396  396                      &os->os_phys->os_userused_dnode, DMU_USERUSED_OBJECT,
 397  397                      &os->os_userused_dnode);
 398  398                  DMU_GROUPUSED_DNODE(os) = dnode_special_open(os,
 399  399                      &os->os_phys->os_groupused_dnode, DMU_GROUPUSED_OBJECT,
 400  400                      &os->os_groupused_dnode);
 401  401          }
 402  402  
 403  403          /*
 404  404           * We should be the only thread trying to do this because we
 405  405           * have ds_opening_lock
 406  406           */
 407  407          if (ds) {
 408  408                  mutex_enter(&ds->ds_lock);
 409  409                  ASSERT(ds->ds_objset == NULL);
 410  410                  ds->ds_objset = os;
 411  411                  mutex_exit(&ds->ds_lock);
 412  412          }
 413  413  
 414  414          *osp = os;
 415  415          return (0);
 416  416  }
 417  417  
 418  418  int
 419  419  dmu_objset_from_ds(dsl_dataset_t *ds, objset_t **osp)
 420  420  {
 421  421          int err = 0;
 422  422  
 423  423          mutex_enter(&ds->ds_opening_lock);
 424  424          *osp = ds->ds_objset;
 425  425          if (*osp == NULL) {
 426  426                  err = dmu_objset_open_impl(dsl_dataset_get_spa(ds),
 427  427                      ds, dsl_dataset_get_blkptr(ds), osp);
 428  428          }
 429  429          mutex_exit(&ds->ds_opening_lock);
 430  430          return (err);
 431  431  }
 432  432  
 433  433  /* called from zpl */
 434  434  int
 435  435  dmu_objset_hold(const char *name, void *tag, objset_t **osp)
 436  436  {
 437  437          dsl_dataset_t *ds;
 438  438          int err;
 439  439  
 440  440          err = dsl_dataset_hold(name, tag, &ds);
 441  441          if (err)
 442  442                  return (err);
 443  443  
 444  444          err = dmu_objset_from_ds(ds, osp);
 445  445          if (err)
 446  446                  dsl_dataset_rele(ds, tag);
 447  447  
 448  448          return (err);
 449  449  }
 450  450  
 451  451  /* called from zpl */
 452  452  int
 453  453  dmu_objset_own(const char *name, dmu_objset_type_t type,
 454  454      boolean_t readonly, void *tag, objset_t **osp)
 455  455  {
 456  456          dsl_dataset_t *ds;
 457  457          int err;
 458  458  
 459  459          err = dsl_dataset_own(name, B_FALSE, tag, &ds);
 460  460          if (err)
 461  461                  return (err);
 462  462  
 463  463          err = dmu_objset_from_ds(ds, osp);
 464  464          if (err) {
 465  465                  dsl_dataset_disown(ds, tag);
 466  466          } else if (type != DMU_OST_ANY && type != (*osp)->os_phys->os_type) {
 467  467                  dmu_objset_disown(*osp, tag);
 468  468                  return (EINVAL);
 469  469          } else if (!readonly && dsl_dataset_is_snapshot(ds)) {
 470  470                  dmu_objset_disown(*osp, tag);
 471  471                  return (EROFS);
 472  472          }
 473  473          return (err);
 474  474  }
 475  475  
 476  476  void
 477  477  dmu_objset_rele(objset_t *os, void *tag)
 478  478  {
 479  479          dsl_dataset_rele(os->os_dsl_dataset, tag);
 480  480  }
 481  481  
 482  482  void
 483  483  dmu_objset_disown(objset_t *os, void *tag)
 484  484  {
 485  485          dsl_dataset_disown(os->os_dsl_dataset, tag);
 486  486  }
 487  487  
 488  488  int
 489  489  dmu_objset_evict_dbufs(objset_t *os)
 490  490  {
 491  491          dnode_t *dn;
 492  492  
 493  493          mutex_enter(&os->os_lock);
 494  494  
 495  495          /* process the mdn last, since the other dnodes have holds on it */
 496  496          list_remove(&os->os_dnodes, DMU_META_DNODE(os));
 497  497          list_insert_tail(&os->os_dnodes, DMU_META_DNODE(os));
 498  498  
 499  499          /*
 500  500           * Find the first dnode with holds.  We have to do this dance
 501  501           * because dnode_add_ref() only works if you already have a
 502  502           * hold.  If there are no holds then it has no dbufs so OK to
 503  503           * skip.
 504  504           */
 505  505          for (dn = list_head(&os->os_dnodes);
 506  506              dn && !dnode_add_ref(dn, FTAG);
 507  507              dn = list_next(&os->os_dnodes, dn))
 508  508                  continue;
 509  509  
 510  510          while (dn) {
 511  511                  dnode_t *next_dn = dn;
 512  512  
 513  513                  do {
 514  514                          next_dn = list_next(&os->os_dnodes, next_dn);
 515  515                  } while (next_dn && !dnode_add_ref(next_dn, FTAG));
 516  516  
 517  517                  mutex_exit(&os->os_lock);
 518  518                  dnode_evict_dbufs(dn);
 519  519                  dnode_rele(dn, FTAG);
 520  520                  mutex_enter(&os->os_lock);
 521  521                  dn = next_dn;
 522  522          }
 523  523          dn = list_head(&os->os_dnodes);
 524  524          mutex_exit(&os->os_lock);
 525  525          return (dn != DMU_META_DNODE(os));
 526  526  }
 527  527  
 528  528  void
 529  529  dmu_objset_evict(objset_t *os)
 530  530  {
 531  531          dsl_dataset_t *ds = os->os_dsl_dataset;
 532  532  
 533  533          for (int t = 0; t < TXG_SIZE; t++)
 534  534                  ASSERT(!dmu_objset_is_dirty(os, t));
 535  535  
 536  536          if (ds) {
 537  537                  if (!dsl_dataset_is_snapshot(ds)) {
 538  538                          VERIFY(0 == dsl_prop_unregister(ds, "checksum",
 539  539                              checksum_changed_cb, os));
 540  540                          VERIFY(0 == dsl_prop_unregister(ds, "compression",
 541  541                              compression_changed_cb, os));
 542  542                          VERIFY(0 == dsl_prop_unregister(ds, "copies",
 543  543                              copies_changed_cb, os));
 544  544                          VERIFY(0 == dsl_prop_unregister(ds, "dedup",
 545  545                              dedup_changed_cb, os));
 546  546                          VERIFY(0 == dsl_prop_unregister(ds, "logbias",
 547  547                              logbias_changed_cb, os));
 548  548                          VERIFY(0 == dsl_prop_unregister(ds, "sync",
 549  549                              sync_changed_cb, os));
 550  550                  }
 551  551                  VERIFY(0 == dsl_prop_unregister(ds, "primarycache",
 552  552                      primary_cache_changed_cb, os));
 553  553                  VERIFY(0 == dsl_prop_unregister(ds, "secondarycache",
 554  554                      secondary_cache_changed_cb, os));
 555  555          }
 556  556  
 557  557          if (os->os_sa)
 558  558                  sa_tear_down(os);
 559  559  
 560  560          /*
 561  561           * We should need only a single pass over the dnode list, since
 562  562           * nothing can be added to the list at this point.
 563  563           */
 564  564          (void) dmu_objset_evict_dbufs(os);
 565  565  
 566  566          dnode_special_close(&os->os_meta_dnode);
 567  567          if (DMU_USERUSED_DNODE(os)) {
 568  568                  dnode_special_close(&os->os_userused_dnode);
 569  569                  dnode_special_close(&os->os_groupused_dnode);
 570  570          }
 571  571          zil_free(os->os_zil);
 572  572  
 573  573          ASSERT3P(list_head(&os->os_dnodes), ==, NULL);
 574  574  
 575  575          VERIFY(arc_buf_remove_ref(os->os_phys_buf, &os->os_phys_buf) == 1);
 576  576  
 577  577          /*
 578  578           * This is a barrier to prevent the objset from going away in
 579  579           * dnode_move() until we can safely ensure that the objset is still in
 580  580           * use. We consider the objset valid before the barrier and invalid
 581  581           * after the barrier.
 582  582           */
 583  583          rw_enter(&os_lock, RW_READER);
 584  584          rw_exit(&os_lock);
 585  585  
 586  586          mutex_destroy(&os->os_lock);
 587  587          mutex_destroy(&os->os_obj_lock);
 588  588          mutex_destroy(&os->os_user_ptr_lock);
 589  589          kmem_free(os, sizeof (objset_t));
 590  590  }
 591  591  
 592  592  timestruc_t
 593  593  dmu_objset_snap_cmtime(objset_t *os)
 594  594  {
 595  595          return (dsl_dir_snap_cmtime(os->os_dsl_dataset->ds_dir));
 596  596  }
 597  597  
 598  598  /* called from dsl for meta-objset */
 599  599  objset_t *
 600  600  dmu_objset_create_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp,
 601  601      dmu_objset_type_t type, dmu_tx_t *tx)
 602  602  {
 603  603          objset_t *os;
 604  604          dnode_t *mdn;
 605  605  
 606  606          ASSERT(dmu_tx_is_syncing(tx));
 607  607          if (ds != NULL)
 608  608                  VERIFY(0 == dmu_objset_from_ds(ds, &os));
 609  609          else
 610  610                  VERIFY(0 == dmu_objset_open_impl(spa, NULL, bp, &os));
 611  611  
 612  612          mdn = DMU_META_DNODE(os);
 613  613  
 614  614          dnode_allocate(mdn, DMU_OT_DNODE, 1 << DNODE_BLOCK_SHIFT,
 615  615              DN_MAX_INDBLKSHIFT, DMU_OT_NONE, 0, tx);
 616  616  
 617  617          /*
 618  618           * We don't want to have to increase the meta-dnode's nlevels
 619  619           * later, because then we could do it in quescing context while
 620  620           * we are also accessing it in open context.
 621  621           *
 622  622           * This precaution is not necessary for the MOS (ds == NULL),
 623  623           * because the MOS is only updated in syncing context.
 624  624           * This is most fortunate: the MOS is the only objset that
 625  625           * needs to be synced multiple times as spa_sync() iterates
 626  626           * to convergence, so minimizing its dn_nlevels matters.
 627  627           */
 628  628          if (ds != NULL) {
 629  629                  int levels = 1;
 630  630  
 631  631                  /*
 632  632                   * Determine the number of levels necessary for the meta-dnode
 633  633                   * to contain DN_MAX_OBJECT dnodes.
 634  634                   */
 635  635                  while ((uint64_t)mdn->dn_nblkptr << (mdn->dn_datablkshift +
 636  636                      (levels - 1) * (mdn->dn_indblkshift - SPA_BLKPTRSHIFT)) <
 637  637                      DN_MAX_OBJECT * sizeof (dnode_phys_t))
 638  638                          levels++;
 639  639  
 640  640                  mdn->dn_next_nlevels[tx->tx_txg & TXG_MASK] =
 641  641                      mdn->dn_nlevels = levels;
 642  642          }
 643  643  
 644  644          ASSERT(type != DMU_OST_NONE);
 645  645          ASSERT(type != DMU_OST_ANY);
 646  646          ASSERT(type < DMU_OST_NUMTYPES);
 647  647          os->os_phys->os_type = type;
 648  648          if (dmu_objset_userused_enabled(os)) {
 649  649                  os->os_phys->os_flags |= OBJSET_FLAG_USERACCOUNTING_COMPLETE;
 650  650                  os->os_flags = os->os_phys->os_flags;
 651  651          }
 652  652  
 653  653          dsl_dataset_dirty(ds, tx);
 654  654  
 655  655          return (os);
 656  656  }
 657  657  
 658  658  struct oscarg {
 659  659          void (*userfunc)(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx);
 660  660          void *userarg;
 661  661          dsl_dataset_t *clone_origin;
 662  662          const char *lastname;
 663  663          dmu_objset_type_t type;
 664  664          uint64_t flags;
 665  665          cred_t *cr;
 666  666  };
 667  667  
 668  668  /*ARGSUSED*/
 669  669  static int
 670  670  dmu_objset_create_check(void *arg1, void *arg2, dmu_tx_t *tx)
 671  671  {
 672  672          dsl_dir_t *dd = arg1;
 673  673          struct oscarg *oa = arg2;
 674  674          objset_t *mos = dd->dd_pool->dp_meta_objset;
 675  675          int err;
 676  676          uint64_t ddobj;
 677  677  
 678  678          err = zap_lookup(mos, dd->dd_phys->dd_child_dir_zapobj,
 679  679              oa->lastname, sizeof (uint64_t), 1, &ddobj);
 680  680          if (err != ENOENT)
 681  681                  return (err ? err : EEXIST);
 682  682  
 683  683          if (oa->clone_origin != NULL) {
 684  684                  /* You can't clone across pools. */
 685  685                  if (oa->clone_origin->ds_dir->dd_pool != dd->dd_pool)
 686  686                          return (EXDEV);
 687  687  
 688  688                  /* You can only clone snapshots, not the head datasets. */
 689  689                  if (!dsl_dataset_is_snapshot(oa->clone_origin))
 690  690                          return (EINVAL);
 691  691          }
 692  692  
 693  693          return (0);
 694  694  }
 695  695  
 696  696  static void
 697  697  dmu_objset_create_sync(void *arg1, void *arg2, dmu_tx_t *tx)
 698  698  {
 699  699          dsl_dir_t *dd = arg1;
 700  700          spa_t *spa = dd->dd_pool->dp_spa;
  
    | ↓ open down ↓ | 700 lines elided | ↑ open up ↑ | 
 701  701          struct oscarg *oa = arg2;
 702  702          uint64_t obj;
 703  703          dsl_dataset_t *ds;
 704  704          blkptr_t *bp;
 705  705  
 706  706          ASSERT(dmu_tx_is_syncing(tx));
 707  707  
 708  708          obj = dsl_dataset_create_sync(dd, oa->lastname,
 709  709              oa->clone_origin, oa->flags, oa->cr, tx);
 710  710  
 711      -        VERIFY3U(0, ==, dsl_dataset_hold_obj(dd->dd_pool, obj, FTAG, &ds));
      711 +        VERIFY0(dsl_dataset_hold_obj(dd->dd_pool, obj, FTAG, &ds));
 712  712          bp = dsl_dataset_get_blkptr(ds);
 713  713          if (BP_IS_HOLE(bp)) {
 714  714                  objset_t *os =
 715  715                      dmu_objset_create_impl(spa, ds, bp, oa->type, tx);
 716  716  
 717  717                  if (oa->userfunc)
 718  718                          oa->userfunc(os, oa->userarg, oa->cr, tx);
 719  719          }
 720  720  
 721  721          if (oa->clone_origin == NULL) {
 722  722                  spa_history_log_internal_ds(ds, "create", tx, "");
 723  723          } else {
 724  724                  char namebuf[MAXNAMELEN];
 725  725                  dsl_dataset_name(oa->clone_origin, namebuf);
 726  726                  spa_history_log_internal_ds(ds, "clone", tx,
 727  727                      "origin=%s (%llu)", namebuf, oa->clone_origin->ds_object);
 728  728          }
 729  729          dsl_dataset_rele(ds, FTAG);
 730  730  }
 731  731  
 732  732  int
 733  733  dmu_objset_create(const char *name, dmu_objset_type_t type, uint64_t flags,
 734  734      void (*func)(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx), void *arg)
 735  735  {
 736  736          dsl_dir_t *pdd;
 737  737          const char *tail;
 738  738          int err = 0;
 739  739          struct oscarg oa = { 0 };
 740  740  
 741  741          ASSERT(strchr(name, '@') == NULL);
 742  742          err = dsl_dir_open(name, FTAG, &pdd, &tail);
 743  743          if (err)
 744  744                  return (err);
 745  745          if (tail == NULL) {
 746  746                  dsl_dir_close(pdd, FTAG);
 747  747                  return (EEXIST);
 748  748          }
 749  749  
 750  750          oa.userfunc = func;
 751  751          oa.userarg = arg;
 752  752          oa.lastname = tail;
 753  753          oa.type = type;
 754  754          oa.flags = flags;
 755  755          oa.cr = CRED();
 756  756  
 757  757          err = dsl_sync_task_do(pdd->dd_pool, dmu_objset_create_check,
 758  758              dmu_objset_create_sync, pdd, &oa, 5);
 759  759          dsl_dir_close(pdd, FTAG);
 760  760          return (err);
 761  761  }
 762  762  
 763  763  int
 764  764  dmu_objset_clone(const char *name, dsl_dataset_t *clone_origin, uint64_t flags)
 765  765  {
 766  766          dsl_dir_t *pdd;
 767  767          const char *tail;
 768  768          int err = 0;
 769  769          struct oscarg oa = { 0 };
 770  770  
 771  771          ASSERT(strchr(name, '@') == NULL);
 772  772          err = dsl_dir_open(name, FTAG, &pdd, &tail);
 773  773          if (err)
 774  774                  return (err);
 775  775          if (tail == NULL) {
 776  776                  dsl_dir_close(pdd, FTAG);
 777  777                  return (EEXIST);
 778  778          }
 779  779  
 780  780          oa.lastname = tail;
 781  781          oa.clone_origin = clone_origin;
 782  782          oa.flags = flags;
 783  783          oa.cr = CRED();
 784  784  
 785  785          err = dsl_sync_task_do(pdd->dd_pool, dmu_objset_create_check,
 786  786              dmu_objset_create_sync, pdd, &oa, 5);
 787  787          dsl_dir_close(pdd, FTAG);
 788  788          return (err);
 789  789  }
 790  790  
 791  791  int
 792  792  dmu_objset_destroy(const char *name, boolean_t defer)
 793  793  {
 794  794          dsl_dataset_t *ds;
 795  795          int error;
 796  796  
 797  797          error = dsl_dataset_own(name, B_TRUE, FTAG, &ds);
 798  798          if (error == 0) {
 799  799                  error = dsl_dataset_destroy(ds, FTAG, defer);
 800  800                  /* dsl_dataset_destroy() closes the ds. */
 801  801          }
 802  802  
 803  803          return (error);
 804  804  }
 805  805  
 806  806  typedef struct snapallarg {
 807  807          dsl_sync_task_group_t *saa_dstg;
 808  808          boolean_t saa_needsuspend;
 809  809          nvlist_t *saa_props;
 810  810  
 811  811          /* the following are used only if 'temporary' is set: */
 812  812          boolean_t saa_temporary;
 813  813          const char *saa_htag;
 814  814          struct dsl_ds_holdarg *saa_ha;
 815  815          dsl_dataset_t *saa_newds;
 816  816  } snapallarg_t;
 817  817  
 818  818  typedef struct snaponearg {
 819  819          const char *soa_longname; /* long snap name */
 820  820          const char *soa_snapname; /* short snap name */
 821  821          snapallarg_t *soa_saa;
 822  822  } snaponearg_t;
 823  823  
 824  824  static int
 825  825  snapshot_check(void *arg1, void *arg2, dmu_tx_t *tx)
 826  826  {
 827  827          objset_t *os = arg1;
 828  828          snaponearg_t *soa = arg2;
 829  829          snapallarg_t *saa = soa->soa_saa;
 830  830          int error;
 831  831  
 832  832          /* The props have already been checked by zfs_check_userprops(). */
 833  833  
 834  834          error = dsl_dataset_snapshot_check(os->os_dsl_dataset,
 835  835              soa->soa_snapname, tx);
 836  836          if (error)
 837  837                  return (error);
 838  838  
 839  839          if (saa->saa_temporary) {
 840  840                  /*
 841  841                   * Ideally we would just call
 842  842                   * dsl_dataset_user_hold_check() and
 843  843                   * dsl_dataset_destroy_check() here.  However the
 844  844                   * dataset we want to hold and destroy is the snapshot
 845  845                   * that we just confirmed we can create, but it won't
 846  846                   * exist until after these checks are run.  Do any
 847  847                   * checks we can here and if more checks are added to
 848  848                   * those routines in the future, similar checks may be
 849  849                   * necessary here.
 850  850                   */
 851  851                  if (spa_version(os->os_spa) < SPA_VERSION_USERREFS)
 852  852                          return (ENOTSUP);
 853  853                  /*
 854  854                   * Not checking number of tags because the tag will be
 855  855                   * unique, as it will be the only tag.
 856  856                   */
 857  857                  if (strlen(saa->saa_htag) + MAX_TAG_PREFIX_LEN >= MAXNAMELEN)
 858  858                          return (E2BIG);
 859  859  
 860  860                  saa->saa_ha = kmem_alloc(sizeof (struct dsl_ds_holdarg),
 861  861                      KM_SLEEP);
 862  862                  saa->saa_ha->temphold = B_TRUE;
 863  863                  saa->saa_ha->htag = saa->saa_htag;
 864  864          }
 865  865          return (error);
 866  866  }
 867  867  
 868  868  static void
 869  869  snapshot_sync(void *arg1, void *arg2, dmu_tx_t *tx)
 870  870  {
 871  871          objset_t *os = arg1;
 872  872          dsl_dataset_t *ds = os->os_dsl_dataset;
 873  873          snaponearg_t *soa = arg2;
 874  874          snapallarg_t *saa = soa->soa_saa;
 875  875  
 876  876          dsl_dataset_snapshot_sync(ds, soa->soa_snapname, tx);
 877  877  
 878  878          if (saa->saa_props != NULL) {
 879  879                  dsl_props_arg_t pa;
 880  880                  pa.pa_props = saa->saa_props;
 881  881                  pa.pa_source = ZPROP_SRC_LOCAL;
 882  882                  dsl_props_set_sync(ds->ds_prev, &pa, tx);
 883  883          }
 884  884  
 885  885          if (saa->saa_temporary) {
 886  886                  struct dsl_ds_destroyarg da;
 887  887  
 888  888                  dsl_dataset_user_hold_sync(ds->ds_prev, saa->saa_ha, tx);
 889  889                  kmem_free(saa->saa_ha, sizeof (struct dsl_ds_holdarg));
 890  890                  saa->saa_ha = NULL;
 891  891                  saa->saa_newds = ds->ds_prev;
 892  892  
 893  893                  da.ds = ds->ds_prev;
 894  894                  da.defer = B_TRUE;
 895  895                  dsl_dataset_destroy_sync(&da, FTAG, tx);
 896  896          }
 897  897  }
 898  898  
 899  899  static int
 900  900  snapshot_one_impl(const char *snapname, void *arg)
 901  901  {
 902  902          char fsname[MAXPATHLEN];
 903  903          snapallarg_t *saa = arg;
 904  904          snaponearg_t *soa;
 905  905          objset_t *os;
 906  906          int err;
 907  907  
 908  908          (void) strlcpy(fsname, snapname, sizeof (fsname));
 909  909          strchr(fsname, '@')[0] = '\0';
 910  910  
 911  911          err = dmu_objset_hold(fsname, saa, &os);
 912  912          if (err != 0)
 913  913                  return (err);
 914  914  
 915  915          /*
 916  916           * If the objset is in an inconsistent state (eg, in the process
 917  917           * of being destroyed), don't snapshot it.
 918  918           */
 919  919          if (os->os_dsl_dataset->ds_phys->ds_flags & DS_FLAG_INCONSISTENT) {
 920  920                  dmu_objset_rele(os, saa);
 921  921                  return (EBUSY);
 922  922          }
 923  923  
 924  924          if (saa->saa_needsuspend) {
 925  925                  err = zil_suspend(dmu_objset_zil(os));
 926  926                  if (err) {
 927  927                          dmu_objset_rele(os, saa);
 928  928                          return (err);
 929  929                  }
 930  930          }
 931  931  
 932  932          soa = kmem_zalloc(sizeof (*soa), KM_SLEEP);
 933  933          soa->soa_saa = saa;
 934  934          soa->soa_longname = snapname;
 935  935          soa->soa_snapname = strchr(snapname, '@') + 1;
 936  936  
 937  937          dsl_sync_task_create(saa->saa_dstg, snapshot_check, snapshot_sync,
 938  938              os, soa, 3);
 939  939  
 940  940          return (0);
 941  941  }
 942  942  
 943  943  /*
 944  944   * The snapshots must all be in the same pool.
 945  945   */
 946  946  int
 947  947  dmu_objset_snapshot(nvlist_t *snaps, nvlist_t *props, nvlist_t *errors)
 948  948  {
 949  949          dsl_sync_task_t *dst;
 950  950          snapallarg_t saa = { 0 };
 951  951          spa_t *spa;
 952  952          int rv = 0;
 953  953          int err;
 954  954          nvpair_t *pair;
 955  955  
 956  956          pair = nvlist_next_nvpair(snaps, NULL);
 957  957          if (pair == NULL)
 958  958                  return (0);
 959  959  
 960  960          err = spa_open(nvpair_name(pair), &spa, FTAG);
 961  961          if (err)
 962  962                  return (err);
 963  963          saa.saa_dstg = dsl_sync_task_group_create(spa_get_dsl(spa));
 964  964          saa.saa_props = props;
 965  965          saa.saa_needsuspend = (spa_version(spa) < SPA_VERSION_FAST_SNAP);
 966  966  
 967  967          for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL;
 968  968              pair = nvlist_next_nvpair(snaps, pair)) {
 969  969                  err = snapshot_one_impl(nvpair_name(pair), &saa);
 970  970                  if (err != 0) {
 971  971                          if (errors != NULL) {
 972  972                                  fnvlist_add_int32(errors,
 973  973                                      nvpair_name(pair), err);
 974  974                          }
 975  975                          rv = err;
 976  976                  }
 977  977          }
 978  978  
 979  979          /*
 980  980           * If any call to snapshot_one_impl() failed, don't execute the
 981  981           * sync task.  The error handling code below will clean up the
 982  982           * snaponearg_t from any successful calls to
 983  983           * snapshot_one_impl().
 984  984           */
 985  985          if (rv == 0)
 986  986                  err = dsl_sync_task_group_wait(saa.saa_dstg);
 987  987          if (err != 0)
 988  988                  rv = err;
 989  989  
 990  990          for (dst = list_head(&saa.saa_dstg->dstg_tasks); dst;
 991  991              dst = list_next(&saa.saa_dstg->dstg_tasks, dst)) {
 992  992                  objset_t *os = dst->dst_arg1;
 993  993                  snaponearg_t *soa = dst->dst_arg2;
 994  994                  if (dst->dst_err != 0) {
 995  995                          if (errors != NULL) {
 996  996                                  fnvlist_add_int32(errors,
 997  997                                      soa->soa_longname, dst->dst_err);
 998  998                          }
 999  999                          rv = dst->dst_err;
1000 1000                  }
1001 1001  
1002 1002                  if (saa.saa_needsuspend)
1003 1003                          zil_resume(dmu_objset_zil(os));
1004 1004                  dmu_objset_rele(os, &saa);
1005 1005                  kmem_free(soa, sizeof (*soa));
1006 1006          }
1007 1007  
1008 1008          dsl_sync_task_group_destroy(saa.saa_dstg);
1009 1009          spa_close(spa, FTAG);
1010 1010          return (rv);
1011 1011  }
1012 1012  
1013 1013  int
1014 1014  dmu_objset_snapshot_one(const char *fsname, const char *snapname)
1015 1015  {
1016 1016          int err;
1017 1017          char *longsnap = kmem_asprintf("%s@%s", fsname, snapname);
1018 1018          nvlist_t *snaps = fnvlist_alloc();
1019 1019  
1020 1020          fnvlist_add_boolean(snaps, longsnap);
1021 1021          err = dmu_objset_snapshot(snaps, NULL, NULL);
1022 1022          fnvlist_free(snaps);
1023 1023          strfree(longsnap);
1024 1024          return (err);
1025 1025  }
1026 1026  
1027 1027  int
1028 1028  dmu_objset_snapshot_tmp(const char *snapname, const char *tag, int cleanup_fd)
1029 1029  {
1030 1030          dsl_sync_task_t *dst;
1031 1031          snapallarg_t saa = { 0 };
1032 1032          spa_t *spa;
1033 1033          minor_t minor;
1034 1034          int err;
1035 1035  
1036 1036          err = spa_open(snapname, &spa, FTAG);
1037 1037          if (err)
1038 1038                  return (err);
1039 1039          saa.saa_dstg = dsl_sync_task_group_create(spa_get_dsl(spa));
1040 1040          saa.saa_htag = tag;
1041 1041          saa.saa_needsuspend = (spa_version(spa) < SPA_VERSION_FAST_SNAP);
1042 1042          saa.saa_temporary = B_TRUE;
1043 1043  
1044 1044          if (cleanup_fd < 0) {
1045 1045                  spa_close(spa, FTAG);
1046 1046                  return (EINVAL);
1047 1047          }
1048 1048          if ((err = zfs_onexit_fd_hold(cleanup_fd, &minor)) != 0) {
1049 1049                  spa_close(spa, FTAG);
1050 1050                  return (err);
1051 1051          }
1052 1052  
1053 1053          err = snapshot_one_impl(snapname, &saa);
1054 1054  
1055 1055          if (err == 0)
1056 1056                  err = dsl_sync_task_group_wait(saa.saa_dstg);
1057 1057  
1058 1058          for (dst = list_head(&saa.saa_dstg->dstg_tasks); dst;
1059 1059              dst = list_next(&saa.saa_dstg->dstg_tasks, dst)) {
1060 1060                  objset_t *os = dst->dst_arg1;
1061 1061                  dsl_register_onexit_hold_cleanup(saa.saa_newds, tag, minor);
1062 1062                  if (saa.saa_needsuspend)
1063 1063                          zil_resume(dmu_objset_zil(os));
1064 1064                  dmu_objset_rele(os, &saa);
1065 1065          }
1066 1066  
1067 1067          zfs_onexit_fd_rele(cleanup_fd);
1068 1068          dsl_sync_task_group_destroy(saa.saa_dstg);
1069 1069          spa_close(spa, FTAG);
1070 1070          return (err);
1071 1071  }
1072 1072  
1073 1073  
1074 1074  static void
1075 1075  dmu_objset_sync_dnodes(list_t *list, list_t *newlist, dmu_tx_t *tx)
1076 1076  {
1077 1077          dnode_t *dn;
1078 1078  
1079 1079          while (dn = list_head(list)) {
1080 1080                  ASSERT(dn->dn_object != DMU_META_DNODE_OBJECT);
1081 1081                  ASSERT(dn->dn_dbuf->db_data_pending);
1082 1082                  /*
1083 1083                   * Initialize dn_zio outside dnode_sync() because the
1084 1084                   * meta-dnode needs to set it ouside dnode_sync().
1085 1085                   */
1086 1086                  dn->dn_zio = dn->dn_dbuf->db_data_pending->dr_zio;
1087 1087                  ASSERT(dn->dn_zio);
1088 1088  
1089 1089                  ASSERT3U(dn->dn_nlevels, <=, DN_MAX_LEVELS);
1090 1090                  list_remove(list, dn);
1091 1091  
1092 1092                  if (newlist) {
1093 1093                          (void) dnode_add_ref(dn, newlist);
1094 1094                          list_insert_tail(newlist, dn);
1095 1095                  }
1096 1096  
1097 1097                  dnode_sync(dn, tx);
1098 1098          }
1099 1099  }
1100 1100  
1101 1101  /* ARGSUSED */
1102 1102  static void
1103 1103  dmu_objset_write_ready(zio_t *zio, arc_buf_t *abuf, void *arg)
1104 1104  {
1105 1105          blkptr_t *bp = zio->io_bp;
1106 1106          objset_t *os = arg;
1107 1107          dnode_phys_t *dnp = &os->os_phys->os_meta_dnode;
1108 1108  
1109 1109          ASSERT(bp == os->os_rootbp);
1110 1110          ASSERT(BP_GET_TYPE(bp) == DMU_OT_OBJSET);
1111 1111          ASSERT(BP_GET_LEVEL(bp) == 0);
1112 1112  
1113 1113          /*
1114 1114           * Update rootbp fill count: it should be the number of objects
1115 1115           * allocated in the object set (not counting the "special"
1116 1116           * objects that are stored in the objset_phys_t -- the meta
1117 1117           * dnode and user/group accounting objects).
1118 1118           */
1119 1119          bp->blk_fill = 0;
1120 1120          for (int i = 0; i < dnp->dn_nblkptr; i++)
1121 1121                  bp->blk_fill += dnp->dn_blkptr[i].blk_fill;
1122 1122  }
1123 1123  
1124 1124  /* ARGSUSED */
1125 1125  static void
1126 1126  dmu_objset_write_done(zio_t *zio, arc_buf_t *abuf, void *arg)
1127 1127  {
1128 1128          blkptr_t *bp = zio->io_bp;
1129 1129          blkptr_t *bp_orig = &zio->io_bp_orig;
1130 1130          objset_t *os = arg;
1131 1131  
1132 1132          if (zio->io_flags & ZIO_FLAG_IO_REWRITE) {
1133 1133                  ASSERT(BP_EQUAL(bp, bp_orig));
1134 1134          } else {
1135 1135                  dsl_dataset_t *ds = os->os_dsl_dataset;
1136 1136                  dmu_tx_t *tx = os->os_synctx;
1137 1137  
1138 1138                  (void) dsl_dataset_block_kill(ds, bp_orig, tx, B_TRUE);
1139 1139                  dsl_dataset_block_born(ds, bp, tx);
1140 1140          }
1141 1141  }
1142 1142  
1143 1143  /* called from dsl */
1144 1144  void
1145 1145  dmu_objset_sync(objset_t *os, zio_t *pio, dmu_tx_t *tx)
1146 1146  {
1147 1147          int txgoff;
1148 1148          zbookmark_t zb;
1149 1149          zio_prop_t zp;
1150 1150          zio_t *zio;
1151 1151          list_t *list;
1152 1152          list_t *newlist = NULL;
1153 1153          dbuf_dirty_record_t *dr;
1154 1154  
1155 1155          dprintf_ds(os->os_dsl_dataset, "txg=%llu\n", tx->tx_txg);
1156 1156  
1157 1157          ASSERT(dmu_tx_is_syncing(tx));
1158 1158          /* XXX the write_done callback should really give us the tx... */
1159 1159          os->os_synctx = tx;
1160 1160  
1161 1161          if (os->os_dsl_dataset == NULL) {
1162 1162                  /*
1163 1163                   * This is the MOS.  If we have upgraded,
1164 1164                   * spa_max_replication() could change, so reset
1165 1165                   * os_copies here.
  
    | ↓ open down ↓ | 444 lines elided | ↑ open up ↑ | 
1166 1166                   */
1167 1167                  os->os_copies = spa_max_replication(os->os_spa);
1168 1168          }
1169 1169  
1170 1170          /*
1171 1171           * Create the root block IO
1172 1172           */
1173 1173          SET_BOOKMARK(&zb, os->os_dsl_dataset ?
1174 1174              os->os_dsl_dataset->ds_object : DMU_META_OBJSET,
1175 1175              ZB_ROOT_OBJECT, ZB_ROOT_LEVEL, ZB_ROOT_BLKID);
1176      -        VERIFY3U(0, ==, arc_release_bp(os->os_phys_buf, &os->os_phys_buf,
     1176 +        VERIFY0(arc_release_bp(os->os_phys_buf, &os->os_phys_buf,
1177 1177              os->os_rootbp, os->os_spa, &zb));
1178 1178  
1179 1179          dmu_write_policy(os, NULL, 0, 0, &zp);
1180 1180  
1181 1181          zio = arc_write(pio, os->os_spa, tx->tx_txg,
1182 1182              os->os_rootbp, os->os_phys_buf, DMU_OS_IS_L2CACHEABLE(os), &zp,
1183 1183              dmu_objset_write_ready, dmu_objset_write_done, os,
1184 1184              ZIO_PRIORITY_ASYNC_WRITE, ZIO_FLAG_MUSTSUCCEED, &zb);
1185 1185  
1186 1186          /*
1187 1187           * Sync special dnodes - the parent IO for the sync is the root block
1188 1188           */
1189 1189          DMU_META_DNODE(os)->dn_zio = zio;
1190 1190          dnode_sync(DMU_META_DNODE(os), tx);
1191 1191  
1192 1192          os->os_phys->os_flags = os->os_flags;
1193 1193  
1194 1194          if (DMU_USERUSED_DNODE(os) &&
1195 1195              DMU_USERUSED_DNODE(os)->dn_type != DMU_OT_NONE) {
1196 1196                  DMU_USERUSED_DNODE(os)->dn_zio = zio;
1197 1197                  dnode_sync(DMU_USERUSED_DNODE(os), tx);
1198 1198                  DMU_GROUPUSED_DNODE(os)->dn_zio = zio;
1199 1199                  dnode_sync(DMU_GROUPUSED_DNODE(os), tx);
1200 1200          }
1201 1201  
1202 1202          txgoff = tx->tx_txg & TXG_MASK;
1203 1203  
1204 1204          if (dmu_objset_userused_enabled(os)) {
1205 1205                  newlist = &os->os_synced_dnodes;
1206 1206                  /*
1207 1207                   * We must create the list here because it uses the
1208 1208                   * dn_dirty_link[] of this txg.
1209 1209                   */
1210 1210                  list_create(newlist, sizeof (dnode_t),
1211 1211                      offsetof(dnode_t, dn_dirty_link[txgoff]));
1212 1212          }
1213 1213  
1214 1214          dmu_objset_sync_dnodes(&os->os_free_dnodes[txgoff], newlist, tx);
1215 1215          dmu_objset_sync_dnodes(&os->os_dirty_dnodes[txgoff], newlist, tx);
1216 1216  
1217 1217          list = &DMU_META_DNODE(os)->dn_dirty_records[txgoff];
1218 1218          while (dr = list_head(list)) {
1219 1219                  ASSERT(dr->dr_dbuf->db_level == 0);
1220 1220                  list_remove(list, dr);
1221 1221                  if (dr->dr_zio)
1222 1222                          zio_nowait(dr->dr_zio);
1223 1223          }
1224 1224          /*
1225 1225           * Free intent log blocks up to this tx.
1226 1226           */
1227 1227          zil_sync(os->os_zil, tx);
1228 1228          os->os_phys->os_zil_header = os->os_zil_header;
1229 1229          zio_nowait(zio);
1230 1230  }
1231 1231  
1232 1232  boolean_t
1233 1233  dmu_objset_is_dirty(objset_t *os, uint64_t txg)
1234 1234  {
1235 1235          return (!list_is_empty(&os->os_dirty_dnodes[txg & TXG_MASK]) ||
1236 1236              !list_is_empty(&os->os_free_dnodes[txg & TXG_MASK]));
1237 1237  }
1238 1238  
1239 1239  boolean_t
1240 1240  dmu_objset_is_dirty_anywhere(objset_t *os)
1241 1241  {
1242 1242          for (int t = 0; t < TXG_SIZE; t++)
1243 1243                  if (dmu_objset_is_dirty(os, t))
1244 1244                          return (B_TRUE);
1245 1245          return (B_FALSE);
1246 1246  }
1247 1247  
1248 1248  static objset_used_cb_t *used_cbs[DMU_OST_NUMTYPES];
1249 1249  
1250 1250  void
1251 1251  dmu_objset_register_type(dmu_objset_type_t ost, objset_used_cb_t *cb)
1252 1252  {
1253 1253          used_cbs[ost] = cb;
1254 1254  }
1255 1255  
1256 1256  boolean_t
1257 1257  dmu_objset_userused_enabled(objset_t *os)
1258 1258  {
1259 1259          return (spa_version(os->os_spa) >= SPA_VERSION_USERSPACE &&
1260 1260              used_cbs[os->os_phys->os_type] != NULL &&
1261 1261              DMU_USERUSED_DNODE(os) != NULL);
  
    | ↓ open down ↓ | 75 lines elided | ↑ open up ↑ | 
1262 1262  }
1263 1263  
1264 1264  static void
1265 1265  do_userquota_update(objset_t *os, uint64_t used, uint64_t flags,
1266 1266      uint64_t user, uint64_t group, boolean_t subtract, dmu_tx_t *tx)
1267 1267  {
1268 1268          if ((flags & DNODE_FLAG_USERUSED_ACCOUNTED)) {
1269 1269                  int64_t delta = DNODE_SIZE + used;
1270 1270                  if (subtract)
1271 1271                          delta = -delta;
1272      -                VERIFY3U(0, ==, zap_increment_int(os, DMU_USERUSED_OBJECT,
     1272 +                VERIFY0(zap_increment_int(os, DMU_USERUSED_OBJECT,
1273 1273                      user, delta, tx));
1274      -                VERIFY3U(0, ==, zap_increment_int(os, DMU_GROUPUSED_OBJECT,
     1274 +                VERIFY0(zap_increment_int(os, DMU_GROUPUSED_OBJECT,
1275 1275                      group, delta, tx));
1276 1276          }
1277 1277  }
1278 1278  
1279 1279  void
1280 1280  dmu_objset_do_userquota_updates(objset_t *os, dmu_tx_t *tx)
1281 1281  {
1282 1282          dnode_t *dn;
1283 1283          list_t *list = &os->os_synced_dnodes;
1284 1284  
1285 1285          ASSERT(list_head(list) == NULL || dmu_objset_userused_enabled(os));
1286 1286  
1287 1287          while (dn = list_head(list)) {
1288 1288                  int flags;
1289 1289                  ASSERT(!DMU_OBJECT_IS_SPECIAL(dn->dn_object));
1290 1290                  ASSERT(dn->dn_phys->dn_type == DMU_OT_NONE ||
1291 1291                      dn->dn_phys->dn_flags &
1292 1292                      DNODE_FLAG_USERUSED_ACCOUNTED);
1293 1293  
1294 1294                  /* Allocate the user/groupused objects if necessary. */
1295 1295                  if (DMU_USERUSED_DNODE(os)->dn_type == DMU_OT_NONE) {
1296 1296                          VERIFY(0 == zap_create_claim(os,
1297 1297                              DMU_USERUSED_OBJECT,
1298 1298                              DMU_OT_USERGROUP_USED, DMU_OT_NONE, 0, tx));
1299 1299                          VERIFY(0 == zap_create_claim(os,
1300 1300                              DMU_GROUPUSED_OBJECT,
1301 1301                              DMU_OT_USERGROUP_USED, DMU_OT_NONE, 0, tx));
1302 1302                  }
1303 1303  
1304 1304                  /*
1305 1305                   * We intentionally modify the zap object even if the
1306 1306                   * net delta is zero.  Otherwise
1307 1307                   * the block of the zap obj could be shared between
1308 1308                   * datasets but need to be different between them after
1309 1309                   * a bprewrite.
1310 1310                   */
1311 1311  
1312 1312                  flags = dn->dn_id_flags;
1313 1313                  ASSERT(flags);
1314 1314                  if (flags & DN_ID_OLD_EXIST)  {
1315 1315                          do_userquota_update(os, dn->dn_oldused, dn->dn_oldflags,
1316 1316                              dn->dn_olduid, dn->dn_oldgid, B_TRUE, tx);
1317 1317                  }
1318 1318                  if (flags & DN_ID_NEW_EXIST) {
1319 1319                          do_userquota_update(os, DN_USED_BYTES(dn->dn_phys),
1320 1320                              dn->dn_phys->dn_flags,  dn->dn_newuid,
1321 1321                              dn->dn_newgid, B_FALSE, tx);
1322 1322                  }
1323 1323  
1324 1324                  mutex_enter(&dn->dn_mtx);
1325 1325                  dn->dn_oldused = 0;
1326 1326                  dn->dn_oldflags = 0;
1327 1327                  if (dn->dn_id_flags & DN_ID_NEW_EXIST) {
1328 1328                          dn->dn_olduid = dn->dn_newuid;
1329 1329                          dn->dn_oldgid = dn->dn_newgid;
1330 1330                          dn->dn_id_flags |= DN_ID_OLD_EXIST;
1331 1331                          if (dn->dn_bonuslen == 0)
1332 1332                                  dn->dn_id_flags |= DN_ID_CHKED_SPILL;
1333 1333                          else
1334 1334                                  dn->dn_id_flags |= DN_ID_CHKED_BONUS;
1335 1335                  }
1336 1336                  dn->dn_id_flags &= ~(DN_ID_NEW_EXIST);
1337 1337                  mutex_exit(&dn->dn_mtx);
1338 1338  
1339 1339                  list_remove(list, dn);
1340 1340                  dnode_rele(dn, list);
1341 1341          }
1342 1342  }
1343 1343  
1344 1344  /*
1345 1345   * Returns a pointer to data to find uid/gid from
1346 1346   *
1347 1347   * If a dirty record for transaction group that is syncing can't
1348 1348   * be found then NULL is returned.  In the NULL case it is assumed
1349 1349   * the uid/gid aren't changing.
1350 1350   */
1351 1351  static void *
1352 1352  dmu_objset_userquota_find_data(dmu_buf_impl_t *db, dmu_tx_t *tx)
1353 1353  {
1354 1354          dbuf_dirty_record_t *dr, **drp;
1355 1355          void *data;
1356 1356  
1357 1357          if (db->db_dirtycnt == 0)
1358 1358                  return (db->db.db_data);  /* Nothing is changing */
1359 1359  
1360 1360          for (drp = &db->db_last_dirty; (dr = *drp) != NULL; drp = &dr->dr_next)
1361 1361                  if (dr->dr_txg == tx->tx_txg)
1362 1362                          break;
1363 1363  
1364 1364          if (dr == NULL) {
1365 1365                  data = NULL;
1366 1366          } else {
1367 1367                  dnode_t *dn;
1368 1368  
1369 1369                  DB_DNODE_ENTER(dr->dr_dbuf);
1370 1370                  dn = DB_DNODE(dr->dr_dbuf);
1371 1371  
1372 1372                  if (dn->dn_bonuslen == 0 &&
1373 1373                      dr->dr_dbuf->db_blkid == DMU_SPILL_BLKID)
1374 1374                          data = dr->dt.dl.dr_data->b_data;
1375 1375                  else
1376 1376                          data = dr->dt.dl.dr_data;
1377 1377  
1378 1378                  DB_DNODE_EXIT(dr->dr_dbuf);
1379 1379          }
1380 1380  
1381 1381          return (data);
1382 1382  }
1383 1383  
1384 1384  void
1385 1385  dmu_objset_userquota_get_ids(dnode_t *dn, boolean_t before, dmu_tx_t *tx)
1386 1386  {
1387 1387          objset_t *os = dn->dn_objset;
1388 1388          void *data = NULL;
1389 1389          dmu_buf_impl_t *db = NULL;
1390 1390          uint64_t *user, *group;
1391 1391          int flags = dn->dn_id_flags;
1392 1392          int error;
1393 1393          boolean_t have_spill = B_FALSE;
1394 1394  
1395 1395          if (!dmu_objset_userused_enabled(dn->dn_objset))
1396 1396                  return;
1397 1397  
1398 1398          if (before && (flags & (DN_ID_CHKED_BONUS|DN_ID_OLD_EXIST|
1399 1399              DN_ID_CHKED_SPILL)))
1400 1400                  return;
1401 1401  
1402 1402          if (before && dn->dn_bonuslen != 0)
1403 1403                  data = DN_BONUS(dn->dn_phys);
1404 1404          else if (!before && dn->dn_bonuslen != 0) {
1405 1405                  if (dn->dn_bonus) {
1406 1406                          db = dn->dn_bonus;
1407 1407                          mutex_enter(&db->db_mtx);
1408 1408                          data = dmu_objset_userquota_find_data(db, tx);
1409 1409                  } else {
1410 1410                          data = DN_BONUS(dn->dn_phys);
1411 1411                  }
1412 1412          } else if (dn->dn_bonuslen == 0 && dn->dn_bonustype == DMU_OT_SA) {
1413 1413                          int rf = 0;
1414 1414  
1415 1415                          if (RW_WRITE_HELD(&dn->dn_struct_rwlock))
1416 1416                                  rf |= DB_RF_HAVESTRUCT;
1417 1417                          error = dmu_spill_hold_by_dnode(dn,
1418 1418                              rf | DB_RF_MUST_SUCCEED,
1419 1419                              FTAG, (dmu_buf_t **)&db);
1420 1420                          ASSERT(error == 0);
1421 1421                          mutex_enter(&db->db_mtx);
1422 1422                          data = (before) ? db->db.db_data :
1423 1423                              dmu_objset_userquota_find_data(db, tx);
1424 1424                          have_spill = B_TRUE;
1425 1425          } else {
1426 1426                  mutex_enter(&dn->dn_mtx);
1427 1427                  dn->dn_id_flags |= DN_ID_CHKED_BONUS;
1428 1428                  mutex_exit(&dn->dn_mtx);
1429 1429                  return;
1430 1430          }
1431 1431  
1432 1432          if (before) {
1433 1433                  ASSERT(data);
1434 1434                  user = &dn->dn_olduid;
1435 1435                  group = &dn->dn_oldgid;
1436 1436          } else if (data) {
1437 1437                  user = &dn->dn_newuid;
1438 1438                  group = &dn->dn_newgid;
1439 1439          }
1440 1440  
1441 1441          /*
1442 1442           * Must always call the callback in case the object
1443 1443           * type has changed and that type isn't an object type to track
1444 1444           */
1445 1445          error = used_cbs[os->os_phys->os_type](dn->dn_bonustype, data,
1446 1446              user, group);
1447 1447  
1448 1448          /*
1449 1449           * Preserve existing uid/gid when the callback can't determine
1450 1450           * what the new uid/gid are and the callback returned EEXIST.
1451 1451           * The EEXIST error tells us to just use the existing uid/gid.
1452 1452           * If we don't know what the old values are then just assign
1453 1453           * them to 0, since that is a new file  being created.
1454 1454           */
1455 1455          if (!before && data == NULL && error == EEXIST) {
1456 1456                  if (flags & DN_ID_OLD_EXIST) {
1457 1457                          dn->dn_newuid = dn->dn_olduid;
1458 1458                          dn->dn_newgid = dn->dn_oldgid;
1459 1459                  } else {
1460 1460                          dn->dn_newuid = 0;
1461 1461                          dn->dn_newgid = 0;
1462 1462                  }
1463 1463                  error = 0;
1464 1464          }
1465 1465  
1466 1466          if (db)
1467 1467                  mutex_exit(&db->db_mtx);
1468 1468  
1469 1469          mutex_enter(&dn->dn_mtx);
1470 1470          if (error == 0 && before)
1471 1471                  dn->dn_id_flags |= DN_ID_OLD_EXIST;
1472 1472          if (error == 0 && !before)
1473 1473                  dn->dn_id_flags |= DN_ID_NEW_EXIST;
1474 1474  
1475 1475          if (have_spill) {
1476 1476                  dn->dn_id_flags |= DN_ID_CHKED_SPILL;
1477 1477          } else {
1478 1478                  dn->dn_id_flags |= DN_ID_CHKED_BONUS;
1479 1479          }
1480 1480          mutex_exit(&dn->dn_mtx);
1481 1481          if (have_spill)
1482 1482                  dmu_buf_rele((dmu_buf_t *)db, FTAG);
1483 1483  }
1484 1484  
1485 1485  boolean_t
1486 1486  dmu_objset_userspace_present(objset_t *os)
1487 1487  {
1488 1488          return (os->os_phys->os_flags &
1489 1489              OBJSET_FLAG_USERACCOUNTING_COMPLETE);
1490 1490  }
1491 1491  
1492 1492  int
1493 1493  dmu_objset_userspace_upgrade(objset_t *os)
1494 1494  {
1495 1495          uint64_t obj;
1496 1496          int err = 0;
1497 1497  
1498 1498          if (dmu_objset_userspace_present(os))
1499 1499                  return (0);
1500 1500          if (!dmu_objset_userused_enabled(os))
1501 1501                  return (ENOTSUP);
1502 1502          if (dmu_objset_is_snapshot(os))
1503 1503                  return (EINVAL);
1504 1504  
1505 1505          /*
1506 1506           * We simply need to mark every object dirty, so that it will be
1507 1507           * synced out and now accounted.  If this is called
1508 1508           * concurrently, or if we already did some work before crashing,
1509 1509           * that's fine, since we track each object's accounted state
1510 1510           * independently.
1511 1511           */
1512 1512  
1513 1513          for (obj = 0; err == 0; err = dmu_object_next(os, &obj, FALSE, 0)) {
1514 1514                  dmu_tx_t *tx;
1515 1515                  dmu_buf_t *db;
1516 1516                  int objerr;
1517 1517  
1518 1518                  if (issig(JUSTLOOKING) && issig(FORREAL))
1519 1519                          return (EINTR);
1520 1520  
1521 1521                  objerr = dmu_bonus_hold(os, obj, FTAG, &db);
1522 1522                  if (objerr)
1523 1523                          continue;
1524 1524                  tx = dmu_tx_create(os);
1525 1525                  dmu_tx_hold_bonus(tx, obj);
1526 1526                  objerr = dmu_tx_assign(tx, TXG_WAIT);
1527 1527                  if (objerr) {
1528 1528                          dmu_tx_abort(tx);
1529 1529                          continue;
1530 1530                  }
1531 1531                  dmu_buf_will_dirty(db, tx);
1532 1532                  dmu_buf_rele(db, FTAG);
1533 1533                  dmu_tx_commit(tx);
1534 1534          }
1535 1535  
1536 1536          os->os_flags |= OBJSET_FLAG_USERACCOUNTING_COMPLETE;
1537 1537          txg_wait_synced(dmu_objset_pool(os), 0);
1538 1538          return (0);
1539 1539  }
1540 1540  
1541 1541  void
1542 1542  dmu_objset_space(objset_t *os, uint64_t *refdbytesp, uint64_t *availbytesp,
1543 1543      uint64_t *usedobjsp, uint64_t *availobjsp)
1544 1544  {
1545 1545          dsl_dataset_space(os->os_dsl_dataset, refdbytesp, availbytesp,
1546 1546              usedobjsp, availobjsp);
1547 1547  }
1548 1548  
1549 1549  uint64_t
1550 1550  dmu_objset_fsid_guid(objset_t *os)
1551 1551  {
1552 1552          return (dsl_dataset_fsid_guid(os->os_dsl_dataset));
1553 1553  }
1554 1554  
1555 1555  void
1556 1556  dmu_objset_fast_stat(objset_t *os, dmu_objset_stats_t *stat)
1557 1557  {
1558 1558          stat->dds_type = os->os_phys->os_type;
1559 1559          if (os->os_dsl_dataset)
1560 1560                  dsl_dataset_fast_stat(os->os_dsl_dataset, stat);
1561 1561  }
1562 1562  
1563 1563  void
1564 1564  dmu_objset_stats(objset_t *os, nvlist_t *nv)
1565 1565  {
1566 1566          ASSERT(os->os_dsl_dataset ||
1567 1567              os->os_phys->os_type == DMU_OST_META);
1568 1568  
1569 1569          if (os->os_dsl_dataset != NULL)
1570 1570                  dsl_dataset_stats(os->os_dsl_dataset, nv);
1571 1571  
1572 1572          dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_TYPE,
1573 1573              os->os_phys->os_type);
1574 1574          dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USERACCOUNTING,
1575 1575              dmu_objset_userspace_present(os));
1576 1576  }
1577 1577  
1578 1578  int
1579 1579  dmu_objset_is_snapshot(objset_t *os)
1580 1580  {
1581 1581          if (os->os_dsl_dataset != NULL)
1582 1582                  return (dsl_dataset_is_snapshot(os->os_dsl_dataset));
1583 1583          else
1584 1584                  return (B_FALSE);
1585 1585  }
1586 1586  
1587 1587  int
1588 1588  dmu_snapshot_realname(objset_t *os, char *name, char *real, int maxlen,
1589 1589      boolean_t *conflict)
1590 1590  {
1591 1591          dsl_dataset_t *ds = os->os_dsl_dataset;
1592 1592          uint64_t ignored;
1593 1593  
1594 1594          if (ds->ds_phys->ds_snapnames_zapobj == 0)
1595 1595                  return (ENOENT);
1596 1596  
1597 1597          return (zap_lookup_norm(ds->ds_dir->dd_pool->dp_meta_objset,
1598 1598              ds->ds_phys->ds_snapnames_zapobj, name, 8, 1, &ignored, MT_FIRST,
1599 1599              real, maxlen, conflict));
1600 1600  }
1601 1601  
1602 1602  int
1603 1603  dmu_snapshot_list_next(objset_t *os, int namelen, char *name,
1604 1604      uint64_t *idp, uint64_t *offp, boolean_t *case_conflict)
1605 1605  {
1606 1606          dsl_dataset_t *ds = os->os_dsl_dataset;
1607 1607          zap_cursor_t cursor;
1608 1608          zap_attribute_t attr;
1609 1609  
1610 1610          if (ds->ds_phys->ds_snapnames_zapobj == 0)
1611 1611                  return (ENOENT);
1612 1612  
1613 1613          zap_cursor_init_serialized(&cursor,
1614 1614              ds->ds_dir->dd_pool->dp_meta_objset,
1615 1615              ds->ds_phys->ds_snapnames_zapobj, *offp);
1616 1616  
1617 1617          if (zap_cursor_retrieve(&cursor, &attr) != 0) {
1618 1618                  zap_cursor_fini(&cursor);
1619 1619                  return (ENOENT);
1620 1620          }
1621 1621  
1622 1622          if (strlen(attr.za_name) + 1 > namelen) {
1623 1623                  zap_cursor_fini(&cursor);
1624 1624                  return (ENAMETOOLONG);
1625 1625          }
1626 1626  
1627 1627          (void) strcpy(name, attr.za_name);
1628 1628          if (idp)
1629 1629                  *idp = attr.za_first_integer;
1630 1630          if (case_conflict)
1631 1631                  *case_conflict = attr.za_normalization_conflict;
1632 1632          zap_cursor_advance(&cursor);
1633 1633          *offp = zap_cursor_serialize(&cursor);
1634 1634          zap_cursor_fini(&cursor);
1635 1635  
1636 1636          return (0);
1637 1637  }
1638 1638  
1639 1639  int
1640 1640  dmu_dir_list_next(objset_t *os, int namelen, char *name,
1641 1641      uint64_t *idp, uint64_t *offp)
1642 1642  {
1643 1643          dsl_dir_t *dd = os->os_dsl_dataset->ds_dir;
1644 1644          zap_cursor_t cursor;
1645 1645          zap_attribute_t attr;
1646 1646  
1647 1647          /* there is no next dir on a snapshot! */
1648 1648          if (os->os_dsl_dataset->ds_object !=
1649 1649              dd->dd_phys->dd_head_dataset_obj)
1650 1650                  return (ENOENT);
1651 1651  
1652 1652          zap_cursor_init_serialized(&cursor,
1653 1653              dd->dd_pool->dp_meta_objset,
1654 1654              dd->dd_phys->dd_child_dir_zapobj, *offp);
1655 1655  
1656 1656          if (zap_cursor_retrieve(&cursor, &attr) != 0) {
1657 1657                  zap_cursor_fini(&cursor);
1658 1658                  return (ENOENT);
1659 1659          }
1660 1660  
1661 1661          if (strlen(attr.za_name) + 1 > namelen) {
1662 1662                  zap_cursor_fini(&cursor);
1663 1663                  return (ENAMETOOLONG);
1664 1664          }
1665 1665  
1666 1666          (void) strcpy(name, attr.za_name);
1667 1667          if (idp)
1668 1668                  *idp = attr.za_first_integer;
1669 1669          zap_cursor_advance(&cursor);
1670 1670          *offp = zap_cursor_serialize(&cursor);
1671 1671          zap_cursor_fini(&cursor);
1672 1672  
1673 1673          return (0);
1674 1674  }
1675 1675  
1676 1676  struct findarg {
1677 1677          int (*func)(const char *, void *);
1678 1678          void *arg;
1679 1679  };
1680 1680  
1681 1681  /* ARGSUSED */
1682 1682  static int
1683 1683  findfunc(spa_t *spa, uint64_t dsobj, const char *dsname, void *arg)
1684 1684  {
1685 1685          struct findarg *fa = arg;
1686 1686          return (fa->func(dsname, fa->arg));
1687 1687  }
1688 1688  
1689 1689  /*
1690 1690   * Find all objsets under name, and for each, call 'func(child_name, arg)'.
1691 1691   * Perhaps change all callers to use dmu_objset_find_spa()?
1692 1692   */
1693 1693  int
1694 1694  dmu_objset_find(char *name, int func(const char *, void *), void *arg,
1695 1695      int flags)
1696 1696  {
1697 1697          struct findarg fa;
1698 1698          fa.func = func;
1699 1699          fa.arg = arg;
1700 1700          return (dmu_objset_find_spa(NULL, name, findfunc, &fa, flags));
1701 1701  }
1702 1702  
1703 1703  /*
1704 1704   * Find all objsets under name, call func on each
1705 1705   */
1706 1706  int
1707 1707  dmu_objset_find_spa(spa_t *spa, const char *name,
1708 1708      int func(spa_t *, uint64_t, const char *, void *), void *arg, int flags)
1709 1709  {
1710 1710          dsl_dir_t *dd;
1711 1711          dsl_pool_t *dp;
1712 1712          dsl_dataset_t *ds;
1713 1713          zap_cursor_t zc;
1714 1714          zap_attribute_t *attr;
1715 1715          char *child;
1716 1716          uint64_t thisobj;
1717 1717          int err;
1718 1718  
1719 1719          if (name == NULL)
1720 1720                  name = spa_name(spa);
1721 1721          err = dsl_dir_open_spa(spa, name, FTAG, &dd, NULL);
1722 1722          if (err)
1723 1723                  return (err);
1724 1724  
1725 1725          /* Don't visit hidden ($MOS & $ORIGIN) objsets. */
1726 1726          if (dd->dd_myname[0] == '$') {
1727 1727                  dsl_dir_close(dd, FTAG);
1728 1728                  return (0);
1729 1729          }
1730 1730  
1731 1731          thisobj = dd->dd_phys->dd_head_dataset_obj;
1732 1732          attr = kmem_alloc(sizeof (zap_attribute_t), KM_SLEEP);
1733 1733          dp = dd->dd_pool;
1734 1734  
1735 1735          /*
1736 1736           * Iterate over all children.
1737 1737           */
1738 1738          if (flags & DS_FIND_CHILDREN) {
1739 1739                  for (zap_cursor_init(&zc, dp->dp_meta_objset,
1740 1740                      dd->dd_phys->dd_child_dir_zapobj);
1741 1741                      zap_cursor_retrieve(&zc, attr) == 0;
1742 1742                      (void) zap_cursor_advance(&zc)) {
1743 1743                          ASSERT(attr->za_integer_length == sizeof (uint64_t));
1744 1744                          ASSERT(attr->za_num_integers == 1);
1745 1745  
1746 1746                          child = kmem_asprintf("%s/%s", name, attr->za_name);
1747 1747                          err = dmu_objset_find_spa(spa, child, func, arg, flags);
1748 1748                          strfree(child);
1749 1749                          if (err)
1750 1750                                  break;
1751 1751                  }
1752 1752                  zap_cursor_fini(&zc);
1753 1753  
1754 1754                  if (err) {
1755 1755                          dsl_dir_close(dd, FTAG);
1756 1756                          kmem_free(attr, sizeof (zap_attribute_t));
1757 1757                          return (err);
1758 1758                  }
1759 1759          }
1760 1760  
1761 1761          /*
1762 1762           * Iterate over all snapshots.
1763 1763           */
1764 1764          if (flags & DS_FIND_SNAPSHOTS) {
1765 1765                  if (!dsl_pool_sync_context(dp))
1766 1766                          rw_enter(&dp->dp_config_rwlock, RW_READER);
1767 1767                  err = dsl_dataset_hold_obj(dp, thisobj, FTAG, &ds);
1768 1768                  if (!dsl_pool_sync_context(dp))
1769 1769                          rw_exit(&dp->dp_config_rwlock);
1770 1770  
1771 1771                  if (err == 0) {
1772 1772                          uint64_t snapobj = ds->ds_phys->ds_snapnames_zapobj;
1773 1773                          dsl_dataset_rele(ds, FTAG);
1774 1774  
1775 1775                          for (zap_cursor_init(&zc, dp->dp_meta_objset, snapobj);
1776 1776                              zap_cursor_retrieve(&zc, attr) == 0;
1777 1777                              (void) zap_cursor_advance(&zc)) {
1778 1778                                  ASSERT(attr->za_integer_length ==
1779 1779                                      sizeof (uint64_t));
1780 1780                                  ASSERT(attr->za_num_integers == 1);
1781 1781  
1782 1782                                  child = kmem_asprintf("%s@%s",
1783 1783                                      name, attr->za_name);
1784 1784                                  err = func(spa, attr->za_first_integer,
1785 1785                                      child, arg);
1786 1786                                  strfree(child);
1787 1787                                  if (err)
1788 1788                                          break;
1789 1789                          }
1790 1790                          zap_cursor_fini(&zc);
1791 1791                  }
1792 1792          }
1793 1793  
1794 1794          dsl_dir_close(dd, FTAG);
1795 1795          kmem_free(attr, sizeof (zap_attribute_t));
1796 1796  
1797 1797          if (err)
1798 1798                  return (err);
1799 1799  
1800 1800          /*
1801 1801           * Apply to self if appropriate.
1802 1802           */
1803 1803          err = func(spa, thisobj, name, arg);
1804 1804          return (err);
1805 1805  }
1806 1806  
1807 1807  /* ARGSUSED */
1808 1808  int
1809 1809  dmu_objset_prefetch(const char *name, void *arg)
1810 1810  {
1811 1811          dsl_dataset_t *ds;
1812 1812  
1813 1813          if (dsl_dataset_hold(name, FTAG, &ds))
1814 1814                  return (0);
1815 1815  
1816 1816          if (!BP_IS_HOLE(&ds->ds_phys->ds_bp)) {
1817 1817                  mutex_enter(&ds->ds_opening_lock);
1818 1818                  if (ds->ds_objset == NULL) {
1819 1819                          uint32_t aflags = ARC_NOWAIT | ARC_PREFETCH;
1820 1820                          zbookmark_t zb;
1821 1821  
1822 1822                          SET_BOOKMARK(&zb, ds->ds_object, ZB_ROOT_OBJECT,
1823 1823                              ZB_ROOT_LEVEL, ZB_ROOT_BLKID);
1824 1824  
1825 1825                          (void) dsl_read_nolock(NULL, dsl_dataset_get_spa(ds),
1826 1826                              &ds->ds_phys->ds_bp, NULL, NULL,
1827 1827                              ZIO_PRIORITY_ASYNC_READ,
1828 1828                              ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE,
1829 1829                              &aflags, &zb);
1830 1830                  }
1831 1831                  mutex_exit(&ds->ds_opening_lock);
1832 1832          }
1833 1833  
1834 1834          dsl_dataset_rele(ds, FTAG);
1835 1835          return (0);
1836 1836  }
1837 1837  
1838 1838  void
1839 1839  dmu_objset_set_user(objset_t *os, void *user_ptr)
1840 1840  {
1841 1841          ASSERT(MUTEX_HELD(&os->os_user_ptr_lock));
1842 1842          os->os_user_ptr = user_ptr;
1843 1843  }
1844 1844  
1845 1845  void *
1846 1846  dmu_objset_get_user(objset_t *os)
1847 1847  {
1848 1848          ASSERT(MUTEX_HELD(&os->os_user_ptr_lock));
1849 1849          return (os->os_user_ptr);
1850 1850  }
  
    | ↓ open down ↓ | 566 lines elided | ↑ open up ↑ | 
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX