Print this page
    
3742 zfs comments need cleaner, more consistent style
Submitted by:   Will Andrews <willa@spectralogic.com>
Submitted by:   Alan Somers <alans@spectralogic.com>
Reviewed by:    Matthew Ahrens <mahrens@delphix.com>
Reviewed by:    George Wilson <george.wilson@delphix.com>
Reviewed by:    Eric Schrock <eric.schrock@delphix.com>
    
      
        | Split | Close | 
      | Expand all | 
      | Collapse all | 
    
    
          --- old/usr/src/uts/common/fs/zfs/spa_misc.c
          +++ new/usr/src/uts/common/fs/zfs/spa_misc.c
   1    1  /*
   2    2   * CDDL HEADER START
   3    3   *
   4    4   * The contents of this file are subject to the terms of the
   5    5   * Common Development and Distribution License (the "License").
   6    6   * You may not use this file except in compliance with the License.
   7    7   *
   8    8   * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9    9   * or http://www.opensolaris.org/os/licensing.
  10   10   * See the License for the specific language governing permissions
  11   11   * and limitations under the License.
  12   12   *
  13   13   * When distributing Covered Code, include this CDDL HEADER in each
  14   14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  /*
  22   22   * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  23   23   * Copyright (c) 2013 by Delphix. All rights reserved.
  24   24   * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
  25   25   */
  26   26  
  27   27  #include <sys/zfs_context.h>
  28   28  #include <sys/spa_impl.h>
  29   29  #include <sys/spa_boot.h>
  30   30  #include <sys/zio.h>
  31   31  #include <sys/zio_checksum.h>
  32   32  #include <sys/zio_compress.h>
  33   33  #include <sys/dmu.h>
  34   34  #include <sys/dmu_tx.h>
  35   35  #include <sys/zap.h>
  36   36  #include <sys/zil.h>
  37   37  #include <sys/vdev_impl.h>
  38   38  #include <sys/metaslab.h>
  39   39  #include <sys/uberblock_impl.h>
  40   40  #include <sys/txg.h>
  41   41  #include <sys/avl.h>
  42   42  #include <sys/unique.h>
  43   43  #include <sys/dsl_pool.h>
  44   44  #include <sys/dsl_dir.h>
  45   45  #include <sys/dsl_prop.h>
  46   46  #include <sys/dsl_scan.h>
  47   47  #include <sys/fs/zfs.h>
  48   48  #include <sys/metaslab_impl.h>
  49   49  #include <sys/arc.h>
  50   50  #include <sys/ddt.h>
  51   51  #include "zfs_prop.h"
  52   52  #include "zfeature_common.h"
  53   53  
  54   54  /*
  55   55   * SPA locking
  56   56   *
  57   57   * There are four basic locks for managing spa_t structures:
  58   58   *
  59   59   * spa_namespace_lock (global mutex)
  60   60   *
  61   61   *      This lock must be acquired to do any of the following:
  62   62   *
  63   63   *              - Lookup a spa_t by name
  64   64   *              - Add or remove a spa_t from the namespace
  65   65   *              - Increase spa_refcount from non-zero
  66   66   *              - Check if spa_refcount is zero
  67   67   *              - Rename a spa_t
  68   68   *              - add/remove/attach/detach devices
  69   69   *              - Held for the duration of create/destroy/import/export
  70   70   *
  71   71   *      It does not need to handle recursion.  A create or destroy may
  72   72   *      reference objects (files or zvols) in other pools, but by
  73   73   *      definition they must have an existing reference, and will never need
  74   74   *      to lookup a spa_t by name.
  75   75   *
  76   76   * spa_refcount (per-spa refcount_t protected by mutex)
  77   77   *
  78   78   *      This reference count keep track of any active users of the spa_t.  The
  79   79   *      spa_t cannot be destroyed or freed while this is non-zero.  Internally,
  80   80   *      the refcount is never really 'zero' - opening a pool implicitly keeps
  81   81   *      some references in the DMU.  Internally we check against spa_minref, but
  82   82   *      present the image of a zero/non-zero value to consumers.
  83   83   *
  84   84   * spa_config_lock[] (per-spa array of rwlocks)
  85   85   *
  86   86   *      This protects the spa_t from config changes, and must be held in
  87   87   *      the following circumstances:
  88   88   *
  89   89   *              - RW_READER to perform I/O to the spa
  90   90   *              - RW_WRITER to change the vdev config
  91   91   *
  92   92   * The locking order is fairly straightforward:
  93   93   *
  94   94   *              spa_namespace_lock      ->      spa_refcount
  95   95   *
  96   96   *      The namespace lock must be acquired to increase the refcount from 0
  97   97   *      or to check if it is zero.
  98   98   *
  99   99   *              spa_refcount            ->      spa_config_lock[]
 100  100   *
 101  101   *      There must be at least one valid reference on the spa_t to acquire
 102  102   *      the config lock.
 103  103   *
 104  104   *              spa_namespace_lock      ->      spa_config_lock[]
 105  105   *
 106  106   *      The namespace lock must always be taken before the config lock.
 107  107   *
 108  108   *
 109  109   * The spa_namespace_lock can be acquired directly and is globally visible.
 110  110   *
 111  111   * The namespace is manipulated using the following functions, all of which
 112  112   * require the spa_namespace_lock to be held.
 113  113   *
 114  114   *      spa_lookup()            Lookup a spa_t by name.
 115  115   *
 116  116   *      spa_add()               Create a new spa_t in the namespace.
 117  117   *
 118  118   *      spa_remove()            Remove a spa_t from the namespace.  This also
 119  119   *                              frees up any memory associated with the spa_t.
 120  120   *
 121  121   *      spa_next()              Returns the next spa_t in the system, or the
 122  122   *                              first if NULL is passed.
 123  123   *
 124  124   *      spa_evict_all()         Shutdown and remove all spa_t structures in
 125  125   *                              the system.
 126  126   *
 127  127   *      spa_guid_exists()       Determine whether a pool/device guid exists.
 128  128   *
 129  129   * The spa_refcount is manipulated using the following functions:
 130  130   *
 131  131   *      spa_open_ref()          Adds a reference to the given spa_t.  Must be
 132  132   *                              called with spa_namespace_lock held if the
 133  133   *                              refcount is currently zero.
 134  134   *
 135  135   *      spa_close()             Remove a reference from the spa_t.  This will
 136  136   *                              not free the spa_t or remove it from the
 137  137   *                              namespace.  No locking is required.
 138  138   *
 139  139   *      spa_refcount_zero()     Returns true if the refcount is currently
 140  140   *                              zero.  Must be called with spa_namespace_lock
 141  141   *                              held.
 142  142   *
 143  143   * The spa_config_lock[] is an array of rwlocks, ordered as follows:
 144  144   * SCL_CONFIG > SCL_STATE > SCL_ALLOC > SCL_ZIO > SCL_FREE > SCL_VDEV.
 145  145   * spa_config_lock[] is manipulated with spa_config_{enter,exit,held}().
 146  146   *
 147  147   * To read the configuration, it suffices to hold one of these locks as reader.
 148  148   * To modify the configuration, you must hold all locks as writer.  To modify
 149  149   * vdev state without altering the vdev tree's topology (e.g. online/offline),
 150  150   * you must hold SCL_STATE and SCL_ZIO as writer.
 151  151   *
 152  152   * We use these distinct config locks to avoid recursive lock entry.
 153  153   * For example, spa_sync() (which holds SCL_CONFIG as reader) induces
 154  154   * block allocations (SCL_ALLOC), which may require reading space maps
 155  155   * from disk (dmu_read() -> zio_read() -> SCL_ZIO).
 156  156   *
 157  157   * The spa config locks cannot be normal rwlocks because we need the
 158  158   * ability to hand off ownership.  For example, SCL_ZIO is acquired
 159  159   * by the issuing thread and later released by an interrupt thread.
 160  160   * They do, however, obey the usual write-wanted semantics to prevent
 161  161   * writer (i.e. system administrator) starvation.
 162  162   *
 163  163   * The lock acquisition rules are as follows:
 164  164   *
 165  165   * SCL_CONFIG
 166  166   *      Protects changes to the vdev tree topology, such as vdev
 167  167   *      add/remove/attach/detach.  Protects the dirty config list
 168  168   *      (spa_config_dirty_list) and the set of spares and l2arc devices.
 169  169   *
 170  170   * SCL_STATE
 171  171   *      Protects changes to pool state and vdev state, such as vdev
 172  172   *      online/offline/fault/degrade/clear.  Protects the dirty state list
 173  173   *      (spa_state_dirty_list) and global pool state (spa_state).
 174  174   *
 175  175   * SCL_ALLOC
 176  176   *      Protects changes to metaslab groups and classes.
 177  177   *      Held as reader by metaslab_alloc() and metaslab_claim().
 178  178   *
 179  179   * SCL_ZIO
 180  180   *      Held by bp-level zios (those which have no io_vd upon entry)
 181  181   *      to prevent changes to the vdev tree.  The bp-level zio implicitly
 182  182   *      protects all of its vdev child zios, which do not hold SCL_ZIO.
 183  183   *
 184  184   * SCL_FREE
 185  185   *      Protects changes to metaslab groups and classes.
 186  186   *      Held as reader by metaslab_free().  SCL_FREE is distinct from
 187  187   *      SCL_ALLOC, and lower than SCL_ZIO, so that we can safely free
 188  188   *      blocks in zio_done() while another i/o that holds either
 189  189   *      SCL_ALLOC or SCL_ZIO is waiting for this i/o to complete.
 190  190   *
 191  191   * SCL_VDEV
 192  192   *      Held as reader to prevent changes to the vdev tree during trivial
 193  193   *      inquiries such as bp_get_dsize().  SCL_VDEV is distinct from the
 194  194   *      other locks, and lower than all of them, to ensure that it's safe
 195  195   *      to acquire regardless of caller context.
 196  196   *
 197  197   * In addition, the following rules apply:
 198  198   *
 199  199   * (a)  spa_props_lock protects pool properties, spa_config and spa_config_list.
 200  200   *      The lock ordering is SCL_CONFIG > spa_props_lock.
 201  201   *
 202  202   * (b)  I/O operations on leaf vdevs.  For any zio operation that takes
 203  203   *      an explicit vdev_t argument -- such as zio_ioctl(), zio_read_phys(),
 204  204   *      or zio_write_phys() -- the caller must ensure that the config cannot
 205  205   *      cannot change in the interim, and that the vdev cannot be reopened.
 206  206   *      SCL_STATE as reader suffices for both.
 207  207   *
 208  208   * The vdev configuration is protected by spa_vdev_enter() / spa_vdev_exit().
 209  209   *
 210  210   *      spa_vdev_enter()        Acquire the namespace lock and the config lock
 211  211   *                              for writing.
 212  212   *
 213  213   *      spa_vdev_exit()         Release the config lock, wait for all I/O
 214  214   *                              to complete, sync the updated configs to the
 215  215   *                              cache, and release the namespace lock.
 216  216   *
 217  217   * vdev state is protected by spa_vdev_state_enter() / spa_vdev_state_exit().
 218  218   * Like spa_vdev_enter/exit, these are convenience wrappers -- the actual
 219  219   * locking is, always, based on spa_namespace_lock and spa_config_lock[].
 220  220   *
 221  221   * spa_rename() is also implemented within this file since it requires
 222  222   * manipulation of the namespace.
 223  223   */
 224  224  
 225  225  static avl_tree_t spa_namespace_avl;
 226  226  kmutex_t spa_namespace_lock;
 227  227  static kcondvar_t spa_namespace_cv;
 228  228  static int spa_active_count;
 229  229  int spa_max_replication_override = SPA_DVAS_PER_BP;
 230  230  
 231  231  static kmutex_t spa_spare_lock;
 232  232  static avl_tree_t spa_spare_avl;
 233  233  static kmutex_t spa_l2cache_lock;
 234  234  static avl_tree_t spa_l2cache_avl;
 235  235  
 236  236  kmem_cache_t *spa_buffer_pool;
 237  237  int spa_mode_global;
 238  238  
 239  239  #ifdef ZFS_DEBUG
 240  240  /* Everything except dprintf and spa is on by default in debug builds */
 241  241  int zfs_flags = ~(ZFS_DEBUG_DPRINTF | ZFS_DEBUG_SPA);
 242  242  #else
 243  243  int zfs_flags = 0;
 244  244  #endif
 245  245  
 246  246  /*
 247  247   * zfs_recover can be set to nonzero to attempt to recover from
 248  248   * otherwise-fatal errors, typically caused by on-disk corruption.  When
 249  249   * set, calls to zfs_panic_recover() will turn into warning messages.
 250  250   */
 251  251  int zfs_recover = 0;
 252  252  
 253  253  extern int zfs_txg_synctime_ms;
 254  254  
 255  255  /*
 256  256   * Expiration time in units of zfs_txg_synctime_ms. This value has two
 257  257   * meanings. First it is used to determine when the spa_deadman logic
 258  258   * should fire. By default the spa_deadman will fire if spa_sync has
 259  259   * not completed in 1000 * zfs_txg_synctime_ms (i.e. 1000 seconds).
 260  260   * Secondly, the value determines if an I/O is considered "hung".
 261  261   * Any I/O that has not completed in zfs_deadman_synctime is considered
 262  262   * "hung" resulting in a system panic.
 263  263   */
 264  264  uint64_t zfs_deadman_synctime = 1000ULL;
 265  265  
 266  266  /*
 267  267   * Override the zfs deadman behavior via /etc/system. By default the
 268  268   * deadman is enabled except on VMware and sparc deployments.
 269  269   */
 270  270  int zfs_deadman_enabled = -1;
 271  271  
 272  272  
 273  273  /*
 274  274   * ==========================================================================
 275  275   * SPA config locking
 276  276   * ==========================================================================
 277  277   */
 278  278  static void
 279  279  spa_config_lock_init(spa_t *spa)
 280  280  {
 281  281          for (int i = 0; i < SCL_LOCKS; i++) {
 282  282                  spa_config_lock_t *scl = &spa->spa_config_lock[i];
 283  283                  mutex_init(&scl->scl_lock, NULL, MUTEX_DEFAULT, NULL);
 284  284                  cv_init(&scl->scl_cv, NULL, CV_DEFAULT, NULL);
 285  285                  refcount_create_untracked(&scl->scl_count);
 286  286                  scl->scl_writer = NULL;
 287  287                  scl->scl_write_wanted = 0;
 288  288          }
 289  289  }
 290  290  
 291  291  static void
 292  292  spa_config_lock_destroy(spa_t *spa)
 293  293  {
 294  294          for (int i = 0; i < SCL_LOCKS; i++) {
 295  295                  spa_config_lock_t *scl = &spa->spa_config_lock[i];
 296  296                  mutex_destroy(&scl->scl_lock);
 297  297                  cv_destroy(&scl->scl_cv);
 298  298                  refcount_destroy(&scl->scl_count);
 299  299                  ASSERT(scl->scl_writer == NULL);
 300  300                  ASSERT(scl->scl_write_wanted == 0);
 301  301          }
 302  302  }
 303  303  
 304  304  int
 305  305  spa_config_tryenter(spa_t *spa, int locks, void *tag, krw_t rw)
 306  306  {
 307  307          for (int i = 0; i < SCL_LOCKS; i++) {
 308  308                  spa_config_lock_t *scl = &spa->spa_config_lock[i];
 309  309                  if (!(locks & (1 << i)))
 310  310                          continue;
 311  311                  mutex_enter(&scl->scl_lock);
 312  312                  if (rw == RW_READER) {
 313  313                          if (scl->scl_writer || scl->scl_write_wanted) {
 314  314                                  mutex_exit(&scl->scl_lock);
 315  315                                  spa_config_exit(spa, locks ^ (1 << i), tag);
 316  316                                  return (0);
 317  317                          }
 318  318                  } else {
 319  319                          ASSERT(scl->scl_writer != curthread);
 320  320                          if (!refcount_is_zero(&scl->scl_count)) {
 321  321                                  mutex_exit(&scl->scl_lock);
 322  322                                  spa_config_exit(spa, locks ^ (1 << i), tag);
 323  323                                  return (0);
 324  324                          }
 325  325                          scl->scl_writer = curthread;
 326  326                  }
 327  327                  (void) refcount_add(&scl->scl_count, tag);
 328  328                  mutex_exit(&scl->scl_lock);
 329  329          }
 330  330          return (1);
 331  331  }
 332  332  
 333  333  void
 334  334  spa_config_enter(spa_t *spa, int locks, void *tag, krw_t rw)
 335  335  {
 336  336          int wlocks_held = 0;
 337  337  
 338  338          ASSERT3U(SCL_LOCKS, <, sizeof (wlocks_held) * NBBY);
 339  339  
 340  340          for (int i = 0; i < SCL_LOCKS; i++) {
 341  341                  spa_config_lock_t *scl = &spa->spa_config_lock[i];
 342  342                  if (scl->scl_writer == curthread)
 343  343                          wlocks_held |= (1 << i);
 344  344                  if (!(locks & (1 << i)))
 345  345                          continue;
 346  346                  mutex_enter(&scl->scl_lock);
 347  347                  if (rw == RW_READER) {
 348  348                          while (scl->scl_writer || scl->scl_write_wanted) {
 349  349                                  cv_wait(&scl->scl_cv, &scl->scl_lock);
 350  350                          }
 351  351                  } else {
 352  352                          ASSERT(scl->scl_writer != curthread);
 353  353                          while (!refcount_is_zero(&scl->scl_count)) {
 354  354                                  scl->scl_write_wanted++;
 355  355                                  cv_wait(&scl->scl_cv, &scl->scl_lock);
 356  356                                  scl->scl_write_wanted--;
 357  357                          }
 358  358                          scl->scl_writer = curthread;
 359  359                  }
 360  360                  (void) refcount_add(&scl->scl_count, tag);
 361  361                  mutex_exit(&scl->scl_lock);
 362  362          }
 363  363          ASSERT(wlocks_held <= locks);
 364  364  }
 365  365  
 366  366  void
 367  367  spa_config_exit(spa_t *spa, int locks, void *tag)
 368  368  {
 369  369          for (int i = SCL_LOCKS - 1; i >= 0; i--) {
 370  370                  spa_config_lock_t *scl = &spa->spa_config_lock[i];
 371  371                  if (!(locks & (1 << i)))
 372  372                          continue;
 373  373                  mutex_enter(&scl->scl_lock);
 374  374                  ASSERT(!refcount_is_zero(&scl->scl_count));
 375  375                  if (refcount_remove(&scl->scl_count, tag) == 0) {
 376  376                          ASSERT(scl->scl_writer == NULL ||
 377  377                              scl->scl_writer == curthread);
 378  378                          scl->scl_writer = NULL; /* OK in either case */
 379  379                          cv_broadcast(&scl->scl_cv);
 380  380                  }
 381  381                  mutex_exit(&scl->scl_lock);
 382  382          }
 383  383  }
 384  384  
 385  385  int
 386  386  spa_config_held(spa_t *spa, int locks, krw_t rw)
 387  387  {
 388  388          int locks_held = 0;
 389  389  
 390  390          for (int i = 0; i < SCL_LOCKS; i++) {
 391  391                  spa_config_lock_t *scl = &spa->spa_config_lock[i];
 392  392                  if (!(locks & (1 << i)))
 393  393                          continue;
 394  394                  if ((rw == RW_READER && !refcount_is_zero(&scl->scl_count)) ||
 395  395                      (rw == RW_WRITER && scl->scl_writer == curthread))
 396  396                          locks_held |= 1 << i;
 397  397          }
 398  398  
 399  399          return (locks_held);
 400  400  }
 401  401  
 402  402  /*
 403  403   * ==========================================================================
 404  404   * SPA namespace functions
 405  405   * ==========================================================================
 406  406   */
 407  407  
 408  408  /*
 409  409   * Lookup the named spa_t in the AVL tree.  The spa_namespace_lock must be held.
 410  410   * Returns NULL if no matching spa_t is found.
 411  411   */
 412  412  spa_t *
 413  413  spa_lookup(const char *name)
 414  414  {
 415  415          static spa_t search;    /* spa_t is large; don't allocate on stack */
 416  416          spa_t *spa;
 417  417          avl_index_t where;
 418  418          char *cp;
 419  419  
 420  420          ASSERT(MUTEX_HELD(&spa_namespace_lock));
 421  421  
 422  422          (void) strlcpy(search.spa_name, name, sizeof (search.spa_name));
 423  423  
 424  424          /*
 425  425           * If it's a full dataset name, figure out the pool name and
 426  426           * just use that.
 427  427           */
 428  428          cp = strpbrk(search.spa_name, "/@");
 429  429          if (cp != NULL)
 430  430                  *cp = '\0';
 431  431  
 432  432          spa = avl_find(&spa_namespace_avl, &search, &where);
 433  433  
 434  434          return (spa);
 435  435  }
 436  436  
 437  437  /*
 438  438   * Fires when spa_sync has not completed within zfs_deadman_synctime_ms.
 439  439   * If the zfs_deadman_enabled flag is set then it inspects all vdev queues
 440  440   * looking for potentially hung I/Os.
 441  441   */
 442  442  void
 443  443  spa_deadman(void *arg)
 444  444  {
 445  445          spa_t *spa = arg;
 446  446  
 447  447          zfs_dbgmsg("slow spa_sync: started %llu seconds ago, calls %llu",
 448  448              (gethrtime() - spa->spa_sync_starttime) / NANOSEC,
 449  449              ++spa->spa_deadman_calls);
 450  450          if (zfs_deadman_enabled)
 451  451                  vdev_deadman(spa->spa_root_vdev);
 452  452  }
 453  453  
 454  454  /*
 455  455   * Create an uninitialized spa_t with the given name.  Requires
 456  456   * spa_namespace_lock.  The caller must ensure that the spa_t doesn't already
 457  457   * exist by calling spa_lookup() first.
 458  458   */
 459  459  spa_t *
 460  460  spa_add(const char *name, nvlist_t *config, const char *altroot)
 461  461  {
 462  462          spa_t *spa;
 463  463          spa_config_dirent_t *dp;
 464  464          cyc_handler_t hdlr;
 465  465          cyc_time_t when;
 466  466  
 467  467          ASSERT(MUTEX_HELD(&spa_namespace_lock));
 468  468  
 469  469          spa = kmem_zalloc(sizeof (spa_t), KM_SLEEP);
 470  470  
 471  471          mutex_init(&spa->spa_async_lock, NULL, MUTEX_DEFAULT, NULL);
 472  472          mutex_init(&spa->spa_errlist_lock, NULL, MUTEX_DEFAULT, NULL);
 473  473          mutex_init(&spa->spa_errlog_lock, NULL, MUTEX_DEFAULT, NULL);
 474  474          mutex_init(&spa->spa_history_lock, NULL, MUTEX_DEFAULT, NULL);
 475  475          mutex_init(&spa->spa_proc_lock, NULL, MUTEX_DEFAULT, NULL);
 476  476          mutex_init(&spa->spa_props_lock, NULL, MUTEX_DEFAULT, NULL);
 477  477          mutex_init(&spa->spa_scrub_lock, NULL, MUTEX_DEFAULT, NULL);
 478  478          mutex_init(&spa->spa_suspend_lock, NULL, MUTEX_DEFAULT, NULL);
 479  479          mutex_init(&spa->spa_vdev_top_lock, NULL, MUTEX_DEFAULT, NULL);
 480  480          mutex_init(&spa->spa_iokstat_lock, NULL, MUTEX_DEFAULT, NULL);
 481  481  
 482  482          cv_init(&spa->spa_async_cv, NULL, CV_DEFAULT, NULL);
 483  483          cv_init(&spa->spa_proc_cv, NULL, CV_DEFAULT, NULL);
 484  484          cv_init(&spa->spa_scrub_io_cv, NULL, CV_DEFAULT, NULL);
 485  485          cv_init(&spa->spa_suspend_cv, NULL, CV_DEFAULT, NULL);
 486  486  
 487  487          for (int t = 0; t < TXG_SIZE; t++)
 488  488                  bplist_create(&spa->spa_free_bplist[t]);
 489  489  
 490  490          (void) strlcpy(spa->spa_name, name, sizeof (spa->spa_name));
 491  491          spa->spa_state = POOL_STATE_UNINITIALIZED;
 492  492          spa->spa_freeze_txg = UINT64_MAX;
 493  493          spa->spa_final_txg = UINT64_MAX;
 494  494          spa->spa_load_max_txg = UINT64_MAX;
 495  495          spa->spa_proc = &p0;
 496  496          spa->spa_proc_state = SPA_PROC_NONE;
 497  497  
 498  498          hdlr.cyh_func = spa_deadman;
 499  499          hdlr.cyh_arg = spa;
 500  500          hdlr.cyh_level = CY_LOW_LEVEL;
 501  501  
 502  502          spa->spa_deadman_synctime = MSEC2NSEC(zfs_deadman_synctime *
 503  503              zfs_txg_synctime_ms);
 504  504  
 505  505          /*
 506  506           * This determines how often we need to check for hung I/Os after
 507  507           * the cyclic has already fired. Since checking for hung I/Os is
 508  508           * an expensive operation we don't want to check too frequently.
 509  509           * Instead wait for 5 synctimes before checking again.
 510  510           */
 511  511          when.cyt_interval = MSEC2NSEC(5 * zfs_txg_synctime_ms);
 512  512          when.cyt_when = CY_INFINITY;
 513  513          mutex_enter(&cpu_lock);
 514  514          spa->spa_deadman_cycid = cyclic_add(&hdlr, &when);
 515  515          mutex_exit(&cpu_lock);
 516  516  
 517  517          refcount_create(&spa->spa_refcount);
 518  518          spa_config_lock_init(spa);
 519  519  
 520  520          avl_add(&spa_namespace_avl, spa);
 521  521  
 522  522          /*
 523  523           * Set the alternate root, if there is one.
 524  524           */
 525  525          if (altroot) {
 526  526                  spa->spa_root = spa_strdup(altroot);
 527  527                  spa_active_count++;
 528  528          }
 529  529  
 530  530          /*
 531  531           * Every pool starts with the default cachefile
 532  532           */
 533  533          list_create(&spa->spa_config_list, sizeof (spa_config_dirent_t),
 534  534              offsetof(spa_config_dirent_t, scd_link));
 535  535  
 536  536          dp = kmem_zalloc(sizeof (spa_config_dirent_t), KM_SLEEP);
 537  537          dp->scd_path = altroot ? NULL : spa_strdup(spa_config_path);
 538  538          list_insert_head(&spa->spa_config_list, dp);
 539  539  
 540  540          VERIFY(nvlist_alloc(&spa->spa_load_info, NV_UNIQUE_NAME,
 541  541              KM_SLEEP) == 0);
 542  542  
 543  543          if (config != NULL) {
 544  544                  nvlist_t *features;
 545  545  
 546  546                  if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_FEATURES_FOR_READ,
 547  547                      &features) == 0) {
 548  548                          VERIFY(nvlist_dup(features, &spa->spa_label_features,
 549  549                              0) == 0);
 550  550                  }
 551  551  
 552  552                  VERIFY(nvlist_dup(config, &spa->spa_config, 0) == 0);
 553  553          }
 554  554  
 555  555          if (spa->spa_label_features == NULL) {
 556  556                  VERIFY(nvlist_alloc(&spa->spa_label_features, NV_UNIQUE_NAME,
 557  557                      KM_SLEEP) == 0);
 558  558          }
 559  559  
 560  560          spa->spa_iokstat = kstat_create("zfs", 0, name,
 561  561              "disk", KSTAT_TYPE_IO, 1, 0);
 562  562          if (spa->spa_iokstat) {
 563  563                  spa->spa_iokstat->ks_lock = &spa->spa_iokstat_lock;
 564  564                  kstat_install(spa->spa_iokstat);
 565  565          }
 566  566  
 567  567          spa->spa_debug = ((zfs_flags & ZFS_DEBUG_SPA) != 0);
 568  568  
 569  569          return (spa);
 570  570  }
 571  571  
 572  572  /*
 573  573   * Removes a spa_t from the namespace, freeing up any memory used.  Requires
 574  574   * spa_namespace_lock.  This is called only after the spa_t has been closed and
 575  575   * deactivated.
 576  576   */
 577  577  void
 578  578  spa_remove(spa_t *spa)
 579  579  {
 580  580          spa_config_dirent_t *dp;
 581  581  
 582  582          ASSERT(MUTEX_HELD(&spa_namespace_lock));
 583  583          ASSERT(spa->spa_state == POOL_STATE_UNINITIALIZED);
 584  584  
 585  585          nvlist_free(spa->spa_config_splitting);
 586  586  
 587  587          avl_remove(&spa_namespace_avl, spa);
 588  588          cv_broadcast(&spa_namespace_cv);
 589  589  
 590  590          if (spa->spa_root) {
 591  591                  spa_strfree(spa->spa_root);
 592  592                  spa_active_count--;
 593  593          }
 594  594  
 595  595          while ((dp = list_head(&spa->spa_config_list)) != NULL) {
 596  596                  list_remove(&spa->spa_config_list, dp);
 597  597                  if (dp->scd_path != NULL)
 598  598                          spa_strfree(dp->scd_path);
 599  599                  kmem_free(dp, sizeof (spa_config_dirent_t));
 600  600          }
 601  601  
 602  602          list_destroy(&spa->spa_config_list);
 603  603  
 604  604          nvlist_free(spa->spa_label_features);
 605  605          nvlist_free(spa->spa_load_info);
 606  606          spa_config_set(spa, NULL);
 607  607  
 608  608          mutex_enter(&cpu_lock);
 609  609          if (spa->spa_deadman_cycid != CYCLIC_NONE)
 610  610                  cyclic_remove(spa->spa_deadman_cycid);
 611  611          mutex_exit(&cpu_lock);
 612  612          spa->spa_deadman_cycid = CYCLIC_NONE;
 613  613  
 614  614          refcount_destroy(&spa->spa_refcount);
 615  615  
 616  616          spa_config_lock_destroy(spa);
 617  617  
 618  618          kstat_delete(spa->spa_iokstat);
 619  619          spa->spa_iokstat = NULL;
 620  620  
 621  621          for (int t = 0; t < TXG_SIZE; t++)
 622  622                  bplist_destroy(&spa->spa_free_bplist[t]);
 623  623  
 624  624          cv_destroy(&spa->spa_async_cv);
 625  625          cv_destroy(&spa->spa_proc_cv);
 626  626          cv_destroy(&spa->spa_scrub_io_cv);
 627  627          cv_destroy(&spa->spa_suspend_cv);
 628  628  
 629  629          mutex_destroy(&spa->spa_async_lock);
 630  630          mutex_destroy(&spa->spa_errlist_lock);
 631  631          mutex_destroy(&spa->spa_errlog_lock);
 632  632          mutex_destroy(&spa->spa_history_lock);
 633  633          mutex_destroy(&spa->spa_proc_lock);
 634  634          mutex_destroy(&spa->spa_props_lock);
 635  635          mutex_destroy(&spa->spa_scrub_lock);
 636  636          mutex_destroy(&spa->spa_suspend_lock);
 637  637          mutex_destroy(&spa->spa_vdev_top_lock);
 638  638          mutex_destroy(&spa->spa_iokstat_lock);
 639  639  
 640  640          kmem_free(spa, sizeof (spa_t));
 641  641  }
 642  642  
 643  643  /*
 644  644   * Given a pool, return the next pool in the namespace, or NULL if there is
 645  645   * none.  If 'prev' is NULL, return the first pool.
 646  646   */
 647  647  spa_t *
 648  648  spa_next(spa_t *prev)
 649  649  {
 650  650          ASSERT(MUTEX_HELD(&spa_namespace_lock));
 651  651  
 652  652          if (prev)
 653  653                  return (AVL_NEXT(&spa_namespace_avl, prev));
 654  654          else
 655  655                  return (avl_first(&spa_namespace_avl));
 656  656  }
 657  657  
 658  658  /*
 659  659   * ==========================================================================
 660  660   * SPA refcount functions
 661  661   * ==========================================================================
 662  662   */
 663  663  
 664  664  /*
 665  665   * Add a reference to the given spa_t.  Must have at least one reference, or
 666  666   * have the namespace lock held.
 667  667   */
 668  668  void
 669  669  spa_open_ref(spa_t *spa, void *tag)
 670  670  {
 671  671          ASSERT(refcount_count(&spa->spa_refcount) >= spa->spa_minref ||
 672  672              MUTEX_HELD(&spa_namespace_lock));
 673  673          (void) refcount_add(&spa->spa_refcount, tag);
 674  674  }
 675  675  
 676  676  /*
 677  677   * Remove a reference to the given spa_t.  Must have at least one reference, or
 678  678   * have the namespace lock held.
 679  679   */
 680  680  void
 681  681  spa_close(spa_t *spa, void *tag)
 682  682  {
 683  683          ASSERT(refcount_count(&spa->spa_refcount) > spa->spa_minref ||
 684  684              MUTEX_HELD(&spa_namespace_lock));
 685  685          (void) refcount_remove(&spa->spa_refcount, tag);
 686  686  }
 687  687  
 688  688  /*
 689  689   * Check to see if the spa refcount is zero.  Must be called with
 690  690   * spa_namespace_lock held.  We really compare against spa_minref, which is the
 691  691   * number of references acquired when opening a pool
 692  692   */
 693  693  boolean_t
 694  694  spa_refcount_zero(spa_t *spa)
 695  695  {
 696  696          ASSERT(MUTEX_HELD(&spa_namespace_lock));
 697  697  
 698  698          return (refcount_count(&spa->spa_refcount) == spa->spa_minref);
 699  699  }
 700  700  
 701  701  /*
 702  702   * ==========================================================================
 703  703   * SPA spare and l2cache tracking
 704  704   * ==========================================================================
 705  705   */
 706  706  
 707  707  /*
 708  708   * Hot spares and cache devices are tracked using the same code below,
 709  709   * for 'auxiliary' devices.
 710  710   */
 711  711  
 712  712  typedef struct spa_aux {
 713  713          uint64_t        aux_guid;
 714  714          uint64_t        aux_pool;
 715  715          avl_node_t      aux_avl;
 716  716          int             aux_count;
 717  717  } spa_aux_t;
 718  718  
 719  719  static int
 720  720  spa_aux_compare(const void *a, const void *b)
 721  721  {
 722  722          const spa_aux_t *sa = a;
 723  723          const spa_aux_t *sb = b;
 724  724  
 725  725          if (sa->aux_guid < sb->aux_guid)
 726  726                  return (-1);
 727  727          else if (sa->aux_guid > sb->aux_guid)
 728  728                  return (1);
 729  729          else
 730  730                  return (0);
 731  731  }
 732  732  
 733  733  void
 734  734  spa_aux_add(vdev_t *vd, avl_tree_t *avl)
 735  735  {
 736  736          avl_index_t where;
 737  737          spa_aux_t search;
 738  738          spa_aux_t *aux;
 739  739  
 740  740          search.aux_guid = vd->vdev_guid;
 741  741          if ((aux = avl_find(avl, &search, &where)) != NULL) {
 742  742                  aux->aux_count++;
 743  743          } else {
 744  744                  aux = kmem_zalloc(sizeof (spa_aux_t), KM_SLEEP);
 745  745                  aux->aux_guid = vd->vdev_guid;
 746  746                  aux->aux_count = 1;
 747  747                  avl_insert(avl, aux, where);
 748  748          }
 749  749  }
 750  750  
 751  751  void
 752  752  spa_aux_remove(vdev_t *vd, avl_tree_t *avl)
 753  753  {
 754  754          spa_aux_t search;
 755  755          spa_aux_t *aux;
 756  756          avl_index_t where;
 757  757  
 758  758          search.aux_guid = vd->vdev_guid;
 759  759          aux = avl_find(avl, &search, &where);
 760  760  
 761  761          ASSERT(aux != NULL);
 762  762  
 763  763          if (--aux->aux_count == 0) {
 764  764                  avl_remove(avl, aux);
 765  765                  kmem_free(aux, sizeof (spa_aux_t));
 766  766          } else if (aux->aux_pool == spa_guid(vd->vdev_spa)) {
 767  767                  aux->aux_pool = 0ULL;
 768  768          }
 769  769  }
 770  770  
 771  771  boolean_t
 772  772  spa_aux_exists(uint64_t guid, uint64_t *pool, int *refcnt, avl_tree_t *avl)
 773  773  {
 774  774          spa_aux_t search, *found;
 775  775  
 776  776          search.aux_guid = guid;
 777  777          found = avl_find(avl, &search, NULL);
 778  778  
 779  779          if (pool) {
 780  780                  if (found)
 781  781                          *pool = found->aux_pool;
 782  782                  else
 783  783                          *pool = 0ULL;
 784  784          }
 785  785  
 786  786          if (refcnt) {
 787  787                  if (found)
 788  788                          *refcnt = found->aux_count;
 789  789                  else
 790  790                          *refcnt = 0;
 791  791          }
 792  792  
 793  793          return (found != NULL);
 794  794  }
 795  795  
 796  796  void
 797  797  spa_aux_activate(vdev_t *vd, avl_tree_t *avl)
 798  798  {
 799  799          spa_aux_t search, *found;
 800  800          avl_index_t where;
 801  801  
 802  802          search.aux_guid = vd->vdev_guid;
 803  803          found = avl_find(avl, &search, &where);
 804  804          ASSERT(found != NULL);
 805  805          ASSERT(found->aux_pool == 0ULL);
 806  806  
 807  807          found->aux_pool = spa_guid(vd->vdev_spa);
 808  808  }
 809  809  
 810  810  /*
 811  811   * Spares are tracked globally due to the following constraints:
 812  812   *
 813  813   *      - A spare may be part of multiple pools.
 814  814   *      - A spare may be added to a pool even if it's actively in use within
 815  815   *        another pool.
 816  816   *      - A spare in use in any pool can only be the source of a replacement if
 817  817   *        the target is a spare in the same pool.
 818  818   *
 819  819   * We keep track of all spares on the system through the use of a reference
 820  820   * counted AVL tree.  When a vdev is added as a spare, or used as a replacement
 821  821   * spare, then we bump the reference count in the AVL tree.  In addition, we set
 822  822   * the 'vdev_isspare' member to indicate that the device is a spare (active or
 823  823   * inactive).  When a spare is made active (used to replace a device in the
 824  824   * pool), we also keep track of which pool its been made a part of.
 825  825   *
 826  826   * The 'spa_spare_lock' protects the AVL tree.  These functions are normally
 827  827   * called under the spa_namespace lock as part of vdev reconfiguration.  The
 828  828   * separate spare lock exists for the status query path, which does not need to
 829  829   * be completely consistent with respect to other vdev configuration changes.
 830  830   */
 831  831  
 832  832  static int
 833  833  spa_spare_compare(const void *a, const void *b)
 834  834  {
 835  835          return (spa_aux_compare(a, b));
 836  836  }
 837  837  
 838  838  void
 839  839  spa_spare_add(vdev_t *vd)
 840  840  {
 841  841          mutex_enter(&spa_spare_lock);
 842  842          ASSERT(!vd->vdev_isspare);
 843  843          spa_aux_add(vd, &spa_spare_avl);
 844  844          vd->vdev_isspare = B_TRUE;
 845  845          mutex_exit(&spa_spare_lock);
 846  846  }
 847  847  
 848  848  void
 849  849  spa_spare_remove(vdev_t *vd)
 850  850  {
 851  851          mutex_enter(&spa_spare_lock);
 852  852          ASSERT(vd->vdev_isspare);
 853  853          spa_aux_remove(vd, &spa_spare_avl);
 854  854          vd->vdev_isspare = B_FALSE;
 855  855          mutex_exit(&spa_spare_lock);
 856  856  }
 857  857  
 858  858  boolean_t
 859  859  spa_spare_exists(uint64_t guid, uint64_t *pool, int *refcnt)
 860  860  {
 861  861          boolean_t found;
 862  862  
 863  863          mutex_enter(&spa_spare_lock);
 864  864          found = spa_aux_exists(guid, pool, refcnt, &spa_spare_avl);
 865  865          mutex_exit(&spa_spare_lock);
 866  866  
 867  867          return (found);
 868  868  }
 869  869  
 870  870  void
 871  871  spa_spare_activate(vdev_t *vd)
 872  872  {
 873  873          mutex_enter(&spa_spare_lock);
 874  874          ASSERT(vd->vdev_isspare);
 875  875          spa_aux_activate(vd, &spa_spare_avl);
 876  876          mutex_exit(&spa_spare_lock);
 877  877  }
 878  878  
 879  879  /*
 880  880   * Level 2 ARC devices are tracked globally for the same reasons as spares.
 881  881   * Cache devices currently only support one pool per cache device, and so
 882  882   * for these devices the aux reference count is currently unused beyond 1.
 883  883   */
 884  884  
 885  885  static int
 886  886  spa_l2cache_compare(const void *a, const void *b)
 887  887  {
 888  888          return (spa_aux_compare(a, b));
 889  889  }
 890  890  
 891  891  void
 892  892  spa_l2cache_add(vdev_t *vd)
 893  893  {
 894  894          mutex_enter(&spa_l2cache_lock);
 895  895          ASSERT(!vd->vdev_isl2cache);
 896  896          spa_aux_add(vd, &spa_l2cache_avl);
 897  897          vd->vdev_isl2cache = B_TRUE;
 898  898          mutex_exit(&spa_l2cache_lock);
 899  899  }
 900  900  
 901  901  void
 902  902  spa_l2cache_remove(vdev_t *vd)
 903  903  {
 904  904          mutex_enter(&spa_l2cache_lock);
 905  905          ASSERT(vd->vdev_isl2cache);
 906  906          spa_aux_remove(vd, &spa_l2cache_avl);
 907  907          vd->vdev_isl2cache = B_FALSE;
 908  908          mutex_exit(&spa_l2cache_lock);
 909  909  }
 910  910  
 911  911  boolean_t
 912  912  spa_l2cache_exists(uint64_t guid, uint64_t *pool)
 913  913  {
 914  914          boolean_t found;
 915  915  
 916  916          mutex_enter(&spa_l2cache_lock);
 917  917          found = spa_aux_exists(guid, pool, NULL, &spa_l2cache_avl);
 918  918          mutex_exit(&spa_l2cache_lock);
 919  919  
 920  920          return (found);
 921  921  }
 922  922  
 923  923  void
 924  924  spa_l2cache_activate(vdev_t *vd)
 925  925  {
 926  926          mutex_enter(&spa_l2cache_lock);
 927  927          ASSERT(vd->vdev_isl2cache);
 928  928          spa_aux_activate(vd, &spa_l2cache_avl);
 929  929          mutex_exit(&spa_l2cache_lock);
 930  930  }
 931  931  
 932  932  /*
 933  933   * ==========================================================================
 934  934   * SPA vdev locking
 935  935   * ==========================================================================
 936  936   */
 937  937  
 938  938  /*
 939  939   * Lock the given spa_t for the purpose of adding or removing a vdev.
 940  940   * Grabs the global spa_namespace_lock plus the spa config lock for writing.
 941  941   * It returns the next transaction group for the spa_t.
 942  942   */
 943  943  uint64_t
 944  944  spa_vdev_enter(spa_t *spa)
 945  945  {
 946  946          mutex_enter(&spa->spa_vdev_top_lock);
 947  947          mutex_enter(&spa_namespace_lock);
 948  948          return (spa_vdev_config_enter(spa));
 949  949  }
 950  950  
 951  951  /*
 952  952   * Internal implementation for spa_vdev_enter().  Used when a vdev
 953  953   * operation requires multiple syncs (i.e. removing a device) while
 954  954   * keeping the spa_namespace_lock held.
 955  955   */
 956  956  uint64_t
 957  957  spa_vdev_config_enter(spa_t *spa)
 958  958  {
 959  959          ASSERT(MUTEX_HELD(&spa_namespace_lock));
 960  960  
 961  961          spa_config_enter(spa, SCL_ALL, spa, RW_WRITER);
 962  962  
 963  963          return (spa_last_synced_txg(spa) + 1);
 964  964  }
 965  965  
 966  966  /*
 967  967   * Used in combination with spa_vdev_config_enter() to allow the syncing
 968  968   * of multiple transactions without releasing the spa_namespace_lock.
 969  969   */
 970  970  void
 971  971  spa_vdev_config_exit(spa_t *spa, vdev_t *vd, uint64_t txg, int error, char *tag)
 972  972  {
 973  973          ASSERT(MUTEX_HELD(&spa_namespace_lock));
 974  974  
 975  975          int config_changed = B_FALSE;
 976  976  
 977  977          ASSERT(txg > spa_last_synced_txg(spa));
 978  978  
 979  979          spa->spa_pending_vdev = NULL;
 980  980  
 981  981          /*
 982  982           * Reassess the DTLs.
 983  983           */
 984  984          vdev_dtl_reassess(spa->spa_root_vdev, 0, 0, B_FALSE);
 985  985  
 986  986          if (error == 0 && !list_is_empty(&spa->spa_config_dirty_list)) {
 987  987                  config_changed = B_TRUE;
 988  988                  spa->spa_config_generation++;
 989  989          }
 990  990  
 991  991          /*
 992  992           * Verify the metaslab classes.
 993  993           */
 994  994          ASSERT(metaslab_class_validate(spa_normal_class(spa)) == 0);
 995  995          ASSERT(metaslab_class_validate(spa_log_class(spa)) == 0);
 996  996  
 997  997          spa_config_exit(spa, SCL_ALL, spa);
 998  998  
 999  999          /*
1000 1000           * Panic the system if the specified tag requires it.  This
1001 1001           * is useful for ensuring that configurations are updated
1002 1002           * transactionally.
1003 1003           */
1004 1004          if (zio_injection_enabled)
1005 1005                  zio_handle_panic_injection(spa, tag, 0);
1006 1006  
1007 1007          /*
1008 1008           * Note: this txg_wait_synced() is important because it ensures
1009 1009           * that there won't be more than one config change per txg.
1010 1010           * This allows us to use the txg as the generation number.
1011 1011           */
1012 1012          if (error == 0)
1013 1013                  txg_wait_synced(spa->spa_dsl_pool, txg);
1014 1014  
1015 1015          if (vd != NULL) {
1016 1016                  ASSERT(!vd->vdev_detached || vd->vdev_dtl_smo.smo_object == 0);
1017 1017                  spa_config_enter(spa, SCL_ALL, spa, RW_WRITER);
1018 1018                  vdev_free(vd);
1019 1019                  spa_config_exit(spa, SCL_ALL, spa);
1020 1020          }
1021 1021  
1022 1022          /*
1023 1023           * If the config changed, update the config cache.
1024 1024           */
1025 1025          if (config_changed)
1026 1026                  spa_config_sync(spa, B_FALSE, B_TRUE);
1027 1027  }
1028 1028  
1029 1029  /*
1030 1030   * Unlock the spa_t after adding or removing a vdev.  Besides undoing the
1031 1031   * locking of spa_vdev_enter(), we also want make sure the transactions have
1032 1032   * synced to disk, and then update the global configuration cache with the new
1033 1033   * information.
1034 1034   */
1035 1035  int
1036 1036  spa_vdev_exit(spa_t *spa, vdev_t *vd, uint64_t txg, int error)
1037 1037  {
1038 1038          spa_vdev_config_exit(spa, vd, txg, error, FTAG);
1039 1039          mutex_exit(&spa_namespace_lock);
1040 1040          mutex_exit(&spa->spa_vdev_top_lock);
1041 1041  
1042 1042          return (error);
1043 1043  }
1044 1044  
1045 1045  /*
1046 1046   * Lock the given spa_t for the purpose of changing vdev state.
1047 1047   */
1048 1048  void
1049 1049  spa_vdev_state_enter(spa_t *spa, int oplocks)
1050 1050  {
1051 1051          int locks = SCL_STATE_ALL | oplocks;
1052 1052  
1053 1053          /*
1054 1054           * Root pools may need to read of the underlying devfs filesystem
1055 1055           * when opening up a vdev.  Unfortunately if we're holding the
1056 1056           * SCL_ZIO lock it will result in a deadlock when we try to issue
1057 1057           * the read from the root filesystem.  Instead we "prefetch"
1058 1058           * the associated vnodes that we need prior to opening the
1059 1059           * underlying devices and cache them so that we can prevent
1060 1060           * any I/O when we are doing the actual open.
1061 1061           */
1062 1062          if (spa_is_root(spa)) {
1063 1063                  int low = locks & ~(SCL_ZIO - 1);
1064 1064                  int high = locks & ~low;
1065 1065  
1066 1066                  spa_config_enter(spa, high, spa, RW_WRITER);
1067 1067                  vdev_hold(spa->spa_root_vdev);
1068 1068                  spa_config_enter(spa, low, spa, RW_WRITER);
1069 1069          } else {
1070 1070                  spa_config_enter(spa, locks, spa, RW_WRITER);
1071 1071          }
1072 1072          spa->spa_vdev_locks = locks;
1073 1073  }
1074 1074  
1075 1075  int
1076 1076  spa_vdev_state_exit(spa_t *spa, vdev_t *vd, int error)
1077 1077  {
1078 1078          boolean_t config_changed = B_FALSE;
1079 1079  
1080 1080          if (vd != NULL || error == 0)
1081 1081                  vdev_dtl_reassess(vd ? vd->vdev_top : spa->spa_root_vdev,
1082 1082                      0, 0, B_FALSE);
1083 1083  
1084 1084          if (vd != NULL) {
1085 1085                  vdev_state_dirty(vd->vdev_top);
1086 1086                  config_changed = B_TRUE;
1087 1087                  spa->spa_config_generation++;
1088 1088          }
1089 1089  
1090 1090          if (spa_is_root(spa))
1091 1091                  vdev_rele(spa->spa_root_vdev);
1092 1092  
1093 1093          ASSERT3U(spa->spa_vdev_locks, >=, SCL_STATE_ALL);
1094 1094          spa_config_exit(spa, spa->spa_vdev_locks, spa);
1095 1095  
1096 1096          /*
1097 1097           * If anything changed, wait for it to sync.  This ensures that,
1098 1098           * from the system administrator's perspective, zpool(1M) commands
1099 1099           * are synchronous.  This is important for things like zpool offline:
1100 1100           * when the command completes, you expect no further I/O from ZFS.
1101 1101           */
1102 1102          if (vd != NULL)
1103 1103                  txg_wait_synced(spa->spa_dsl_pool, 0);
1104 1104  
1105 1105          /*
1106 1106           * If the config changed, update the config cache.
1107 1107           */
1108 1108          if (config_changed) {
1109 1109                  mutex_enter(&spa_namespace_lock);
1110 1110                  spa_config_sync(spa, B_FALSE, B_TRUE);
1111 1111                  mutex_exit(&spa_namespace_lock);
1112 1112          }
1113 1113  
1114 1114          return (error);
1115 1115  }
1116 1116  
1117 1117  /*
1118 1118   * ==========================================================================
1119 1119   * Miscellaneous functions
1120 1120   * ==========================================================================
1121 1121   */
1122 1122  
1123 1123  void
1124 1124  spa_activate_mos_feature(spa_t *spa, const char *feature)
1125 1125  {
1126 1126          (void) nvlist_add_boolean(spa->spa_label_features, feature);
1127 1127          vdev_config_dirty(spa->spa_root_vdev);
1128 1128  }
1129 1129  
1130 1130  void
1131 1131  spa_deactivate_mos_feature(spa_t *spa, const char *feature)
1132 1132  {
1133 1133          (void) nvlist_remove_all(spa->spa_label_features, feature);
1134 1134          vdev_config_dirty(spa->spa_root_vdev);
1135 1135  }
1136 1136  
1137 1137  /*
1138 1138   * Rename a spa_t.
1139 1139   */
1140 1140  int
1141 1141  spa_rename(const char *name, const char *newname)
1142 1142  {
1143 1143          spa_t *spa;
1144 1144          int err;
1145 1145  
1146 1146          /*
1147 1147           * Lookup the spa_t and grab the config lock for writing.  We need to
1148 1148           * actually open the pool so that we can sync out the necessary labels.
1149 1149           * It's OK to call spa_open() with the namespace lock held because we
1150 1150           * allow recursive calls for other reasons.
1151 1151           */
1152 1152          mutex_enter(&spa_namespace_lock);
1153 1153          if ((err = spa_open(name, &spa, FTAG)) != 0) {
1154 1154                  mutex_exit(&spa_namespace_lock);
1155 1155                  return (err);
1156 1156          }
1157 1157  
1158 1158          spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
1159 1159  
1160 1160          avl_remove(&spa_namespace_avl, spa);
1161 1161          (void) strlcpy(spa->spa_name, newname, sizeof (spa->spa_name));
1162 1162          avl_add(&spa_namespace_avl, spa);
1163 1163  
1164 1164          /*
1165 1165           * Sync all labels to disk with the new names by marking the root vdev
1166 1166           * dirty and waiting for it to sync.  It will pick up the new pool name
1167 1167           * during the sync.
1168 1168           */
1169 1169          vdev_config_dirty(spa->spa_root_vdev);
1170 1170  
1171 1171          spa_config_exit(spa, SCL_ALL, FTAG);
1172 1172  
1173 1173          txg_wait_synced(spa->spa_dsl_pool, 0);
1174 1174  
1175 1175          /*
1176 1176           * Sync the updated config cache.
1177 1177           */
1178 1178          spa_config_sync(spa, B_FALSE, B_TRUE);
1179 1179  
1180 1180          spa_close(spa, FTAG);
1181 1181  
1182 1182          mutex_exit(&spa_namespace_lock);
1183 1183  
1184 1184          return (0);
1185 1185  }
1186 1186  
1187 1187  /*
1188 1188   * Return the spa_t associated with given pool_guid, if it exists.  If
1189 1189   * device_guid is non-zero, determine whether the pool exists *and* contains
1190 1190   * a device with the specified device_guid.
1191 1191   */
1192 1192  spa_t *
1193 1193  spa_by_guid(uint64_t pool_guid, uint64_t device_guid)
1194 1194  {
1195 1195          spa_t *spa;
1196 1196          avl_tree_t *t = &spa_namespace_avl;
1197 1197  
1198 1198          ASSERT(MUTEX_HELD(&spa_namespace_lock));
1199 1199  
1200 1200          for (spa = avl_first(t); spa != NULL; spa = AVL_NEXT(t, spa)) {
1201 1201                  if (spa->spa_state == POOL_STATE_UNINITIALIZED)
1202 1202                          continue;
1203 1203                  if (spa->spa_root_vdev == NULL)
1204 1204                          continue;
1205 1205                  if (spa_guid(spa) == pool_guid) {
1206 1206                          if (device_guid == 0)
1207 1207                                  break;
1208 1208  
1209 1209                          if (vdev_lookup_by_guid(spa->spa_root_vdev,
1210 1210                              device_guid) != NULL)
1211 1211                                  break;
1212 1212  
1213 1213                          /*
1214 1214                           * Check any devices we may be in the process of adding.
1215 1215                           */
1216 1216                          if (spa->spa_pending_vdev) {
1217 1217                                  if (vdev_lookup_by_guid(spa->spa_pending_vdev,
1218 1218                                      device_guid) != NULL)
1219 1219                                          break;
1220 1220                          }
1221 1221                  }
1222 1222          }
1223 1223  
1224 1224          return (spa);
1225 1225  }
1226 1226  
1227 1227  /*
1228 1228   * Determine whether a pool with the given pool_guid exists.
1229 1229   */
1230 1230  boolean_t
1231 1231  spa_guid_exists(uint64_t pool_guid, uint64_t device_guid)
1232 1232  {
1233 1233          return (spa_by_guid(pool_guid, device_guid) != NULL);
1234 1234  }
1235 1235  
1236 1236  char *
1237 1237  spa_strdup(const char *s)
1238 1238  {
1239 1239          size_t len;
1240 1240          char *new;
1241 1241  
1242 1242          len = strlen(s);
1243 1243          new = kmem_alloc(len + 1, KM_SLEEP);
1244 1244          bcopy(s, new, len);
1245 1245          new[len] = '\0';
1246 1246  
1247 1247          return (new);
1248 1248  }
1249 1249  
1250 1250  void
1251 1251  spa_strfree(char *s)
1252 1252  {
1253 1253          kmem_free(s, strlen(s) + 1);
1254 1254  }
1255 1255  
1256 1256  uint64_t
1257 1257  spa_get_random(uint64_t range)
1258 1258  {
1259 1259          uint64_t r;
1260 1260  
1261 1261          ASSERT(range != 0);
1262 1262  
1263 1263          (void) random_get_pseudo_bytes((void *)&r, sizeof (uint64_t));
1264 1264  
1265 1265          return (r % range);
1266 1266  }
1267 1267  
1268 1268  uint64_t
1269 1269  spa_generate_guid(spa_t *spa)
1270 1270  {
1271 1271          uint64_t guid = spa_get_random(-1ULL);
1272 1272  
1273 1273          if (spa != NULL) {
1274 1274                  while (guid == 0 || spa_guid_exists(spa_guid(spa), guid))
1275 1275                          guid = spa_get_random(-1ULL);
1276 1276          } else {
1277 1277                  while (guid == 0 || spa_guid_exists(guid, 0))
1278 1278                          guid = spa_get_random(-1ULL);
1279 1279          }
1280 1280  
1281 1281          return (guid);
1282 1282  }
1283 1283  
1284 1284  void
1285 1285  sprintf_blkptr(char *buf, const blkptr_t *bp)
1286 1286  {
1287 1287          char type[256];
1288 1288          char *checksum = NULL;
1289 1289          char *compress = NULL;
1290 1290  
1291 1291          if (bp != NULL) {
1292 1292                  if (BP_GET_TYPE(bp) & DMU_OT_NEWTYPE) {
1293 1293                          dmu_object_byteswap_t bswap =
1294 1294                              DMU_OT_BYTESWAP(BP_GET_TYPE(bp));
1295 1295                          (void) snprintf(type, sizeof (type), "bswap %s %s",
1296 1296                              DMU_OT_IS_METADATA(BP_GET_TYPE(bp)) ?
1297 1297                              "metadata" : "data",
1298 1298                              dmu_ot_byteswap[bswap].ob_name);
1299 1299                  } else {
1300 1300                          (void) strlcpy(type, dmu_ot[BP_GET_TYPE(bp)].ot_name,
1301 1301                              sizeof (type));
1302 1302                  }
1303 1303                  checksum = zio_checksum_table[BP_GET_CHECKSUM(bp)].ci_name;
1304 1304                  compress = zio_compress_table[BP_GET_COMPRESS(bp)].ci_name;
1305 1305          }
1306 1306  
1307 1307          SPRINTF_BLKPTR(snprintf, ' ', buf, bp, type, checksum, compress);
1308 1308  }
1309 1309  
1310 1310  void
1311 1311  spa_freeze(spa_t *spa)
1312 1312  {
1313 1313          uint64_t freeze_txg = 0;
1314 1314  
1315 1315          spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
1316 1316          if (spa->spa_freeze_txg == UINT64_MAX) {
1317 1317                  freeze_txg = spa_last_synced_txg(spa) + TXG_SIZE;
1318 1318                  spa->spa_freeze_txg = freeze_txg;
1319 1319          }
1320 1320          spa_config_exit(spa, SCL_ALL, FTAG);
1321 1321          if (freeze_txg != 0)
1322 1322                  txg_wait_synced(spa_get_dsl(spa), freeze_txg);
1323 1323  }
1324 1324  
1325 1325  void
1326 1326  zfs_panic_recover(const char *fmt, ...)
  
    | ↓ open down ↓ | 1326 lines elided | ↑ open up ↑ | 
1327 1327  {
1328 1328          va_list adx;
1329 1329  
1330 1330          va_start(adx, fmt);
1331 1331          vcmn_err(zfs_recover ? CE_WARN : CE_PANIC, fmt, adx);
1332 1332          va_end(adx);
1333 1333  }
1334 1334  
1335 1335  /*
1336 1336   * This is a stripped-down version of strtoull, suitable only for converting
1337      - * lowercase hexidecimal numbers that don't overflow.
     1337 + * lowercase hexadecimal numbers that don't overflow.
1338 1338   */
1339 1339  uint64_t
1340 1340  strtonum(const char *str, char **nptr)
1341 1341  {
1342 1342          uint64_t val = 0;
1343 1343          char c;
1344 1344          int digit;
1345 1345  
1346 1346          while ((c = *str) != '\0') {
1347 1347                  if (c >= '0' && c <= '9')
1348 1348                          digit = c - '0';
1349 1349                  else if (c >= 'a' && c <= 'f')
1350 1350                          digit = 10 + c - 'a';
1351 1351                  else
1352 1352                          break;
1353 1353  
1354 1354                  val *= 16;
1355 1355                  val += digit;
1356 1356  
1357 1357                  str++;
1358 1358          }
1359 1359  
1360 1360          if (nptr)
1361 1361                  *nptr = (char *)str;
1362 1362  
1363 1363          return (val);
1364 1364  }
1365 1365  
1366 1366  /*
1367 1367   * ==========================================================================
1368 1368   * Accessor functions
1369 1369   * ==========================================================================
1370 1370   */
1371 1371  
1372 1372  boolean_t
1373 1373  spa_shutting_down(spa_t *spa)
1374 1374  {
1375 1375          return (spa->spa_async_suspended);
1376 1376  }
1377 1377  
1378 1378  dsl_pool_t *
1379 1379  spa_get_dsl(spa_t *spa)
1380 1380  {
1381 1381          return (spa->spa_dsl_pool);
1382 1382  }
1383 1383  
1384 1384  boolean_t
1385 1385  spa_is_initializing(spa_t *spa)
1386 1386  {
1387 1387          return (spa->spa_is_initializing);
1388 1388  }
1389 1389  
1390 1390  blkptr_t *
1391 1391  spa_get_rootblkptr(spa_t *spa)
1392 1392  {
1393 1393          return (&spa->spa_ubsync.ub_rootbp);
1394 1394  }
1395 1395  
1396 1396  void
1397 1397  spa_set_rootblkptr(spa_t *spa, const blkptr_t *bp)
1398 1398  {
1399 1399          spa->spa_uberblock.ub_rootbp = *bp;
1400 1400  }
1401 1401  
1402 1402  void
1403 1403  spa_altroot(spa_t *spa, char *buf, size_t buflen)
1404 1404  {
1405 1405          if (spa->spa_root == NULL)
1406 1406                  buf[0] = '\0';
1407 1407          else
1408 1408                  (void) strncpy(buf, spa->spa_root, buflen);
1409 1409  }
1410 1410  
1411 1411  int
1412 1412  spa_sync_pass(spa_t *spa)
1413 1413  {
1414 1414          return (spa->spa_sync_pass);
1415 1415  }
1416 1416  
1417 1417  char *
1418 1418  spa_name(spa_t *spa)
1419 1419  {
1420 1420          return (spa->spa_name);
1421 1421  }
1422 1422  
1423 1423  uint64_t
1424 1424  spa_guid(spa_t *spa)
1425 1425  {
1426 1426          dsl_pool_t *dp = spa_get_dsl(spa);
1427 1427          uint64_t guid;
1428 1428  
1429 1429          /*
1430 1430           * If we fail to parse the config during spa_load(), we can go through
1431 1431           * the error path (which posts an ereport) and end up here with no root
1432 1432           * vdev.  We stash the original pool guid in 'spa_config_guid' to handle
1433 1433           * this case.
1434 1434           */
1435 1435          if (spa->spa_root_vdev == NULL)
1436 1436                  return (spa->spa_config_guid);
1437 1437  
1438 1438          guid = spa->spa_last_synced_guid != 0 ?
1439 1439              spa->spa_last_synced_guid : spa->spa_root_vdev->vdev_guid;
1440 1440  
1441 1441          /*
1442 1442           * Return the most recently synced out guid unless we're
1443 1443           * in syncing context.
1444 1444           */
1445 1445          if (dp && dsl_pool_sync_context(dp))
1446 1446                  return (spa->spa_root_vdev->vdev_guid);
1447 1447          else
1448 1448                  return (guid);
1449 1449  }
1450 1450  
1451 1451  uint64_t
1452 1452  spa_load_guid(spa_t *spa)
1453 1453  {
1454 1454          /*
1455 1455           * This is a GUID that exists solely as a reference for the
1456 1456           * purposes of the arc.  It is generated at load time, and
1457 1457           * is never written to persistent storage.
1458 1458           */
1459 1459          return (spa->spa_load_guid);
1460 1460  }
1461 1461  
1462 1462  uint64_t
1463 1463  spa_last_synced_txg(spa_t *spa)
1464 1464  {
1465 1465          return (spa->spa_ubsync.ub_txg);
1466 1466  }
1467 1467  
1468 1468  uint64_t
1469 1469  spa_first_txg(spa_t *spa)
1470 1470  {
1471 1471          return (spa->spa_first_txg);
1472 1472  }
1473 1473  
1474 1474  uint64_t
1475 1475  spa_syncing_txg(spa_t *spa)
1476 1476  {
1477 1477          return (spa->spa_syncing_txg);
1478 1478  }
1479 1479  
1480 1480  pool_state_t
1481 1481  spa_state(spa_t *spa)
1482 1482  {
1483 1483          return (spa->spa_state);
1484 1484  }
1485 1485  
1486 1486  spa_load_state_t
1487 1487  spa_load_state(spa_t *spa)
1488 1488  {
1489 1489          return (spa->spa_load_state);
1490 1490  }
1491 1491  
1492 1492  uint64_t
1493 1493  spa_freeze_txg(spa_t *spa)
1494 1494  {
1495 1495          return (spa->spa_freeze_txg);
1496 1496  }
1497 1497  
1498 1498  /* ARGSUSED */
1499 1499  uint64_t
1500 1500  spa_get_asize(spa_t *spa, uint64_t lsize)
1501 1501  {
1502 1502          /*
1503 1503           * The worst case is single-sector max-parity RAID-Z blocks, in which
1504 1504           * case the space requirement is exactly (VDEV_RAIDZ_MAXPARITY + 1)
1505 1505           * times the size; so just assume that.  Add to this the fact that
1506 1506           * we can have up to 3 DVAs per bp, and one more factor of 2 because
1507 1507           * the block may be dittoed with up to 3 DVAs by ddt_sync().
1508 1508           */
1509 1509          return (lsize * (VDEV_RAIDZ_MAXPARITY + 1) * SPA_DVAS_PER_BP * 2);
1510 1510  }
1511 1511  
1512 1512  uint64_t
1513 1513  spa_get_dspace(spa_t *spa)
1514 1514  {
1515 1515          return (spa->spa_dspace);
1516 1516  }
1517 1517  
1518 1518  void
1519 1519  spa_update_dspace(spa_t *spa)
1520 1520  {
1521 1521          spa->spa_dspace = metaslab_class_get_dspace(spa_normal_class(spa)) +
1522 1522              ddt_get_dedup_dspace(spa);
1523 1523  }
1524 1524  
1525 1525  /*
1526 1526   * Return the failure mode that has been set to this pool. The default
1527 1527   * behavior will be to block all I/Os when a complete failure occurs.
1528 1528   */
1529 1529  uint8_t
1530 1530  spa_get_failmode(spa_t *spa)
1531 1531  {
1532 1532          return (spa->spa_failmode);
1533 1533  }
1534 1534  
1535 1535  boolean_t
1536 1536  spa_suspended(spa_t *spa)
1537 1537  {
1538 1538          return (spa->spa_suspended);
1539 1539  }
1540 1540  
1541 1541  uint64_t
1542 1542  spa_version(spa_t *spa)
1543 1543  {
1544 1544          return (spa->spa_ubsync.ub_version);
1545 1545  }
1546 1546  
1547 1547  boolean_t
1548 1548  spa_deflate(spa_t *spa)
1549 1549  {
1550 1550          return (spa->spa_deflate);
1551 1551  }
1552 1552  
1553 1553  metaslab_class_t *
1554 1554  spa_normal_class(spa_t *spa)
1555 1555  {
1556 1556          return (spa->spa_normal_class);
1557 1557  }
1558 1558  
1559 1559  metaslab_class_t *
1560 1560  spa_log_class(spa_t *spa)
1561 1561  {
1562 1562          return (spa->spa_log_class);
1563 1563  }
1564 1564  
1565 1565  int
1566 1566  spa_max_replication(spa_t *spa)
1567 1567  {
1568 1568          /*
1569 1569           * As of SPA_VERSION == SPA_VERSION_DITTO_BLOCKS, we are able to
1570 1570           * handle BPs with more than one DVA allocated.  Set our max
1571 1571           * replication level accordingly.
1572 1572           */
1573 1573          if (spa_version(spa) < SPA_VERSION_DITTO_BLOCKS)
1574 1574                  return (1);
1575 1575          return (MIN(SPA_DVAS_PER_BP, spa_max_replication_override));
1576 1576  }
1577 1577  
1578 1578  int
1579 1579  spa_prev_software_version(spa_t *spa)
1580 1580  {
1581 1581          return (spa->spa_prev_software_version);
1582 1582  }
1583 1583  
1584 1584  uint64_t
1585 1585  spa_deadman_synctime(spa_t *spa)
1586 1586  {
1587 1587          return (spa->spa_deadman_synctime);
1588 1588  }
1589 1589  
1590 1590  uint64_t
1591 1591  dva_get_dsize_sync(spa_t *spa, const dva_t *dva)
1592 1592  {
1593 1593          uint64_t asize = DVA_GET_ASIZE(dva);
1594 1594          uint64_t dsize = asize;
1595 1595  
1596 1596          ASSERT(spa_config_held(spa, SCL_ALL, RW_READER) != 0);
1597 1597  
1598 1598          if (asize != 0 && spa->spa_deflate) {
1599 1599                  vdev_t *vd = vdev_lookup_top(spa, DVA_GET_VDEV(dva));
1600 1600                  dsize = (asize >> SPA_MINBLOCKSHIFT) * vd->vdev_deflate_ratio;
1601 1601          }
1602 1602  
1603 1603          return (dsize);
1604 1604  }
1605 1605  
1606 1606  uint64_t
1607 1607  bp_get_dsize_sync(spa_t *spa, const blkptr_t *bp)
1608 1608  {
1609 1609          uint64_t dsize = 0;
1610 1610  
1611 1611          for (int d = 0; d < SPA_DVAS_PER_BP; d++)
1612 1612                  dsize += dva_get_dsize_sync(spa, &bp->blk_dva[d]);
1613 1613  
1614 1614          return (dsize);
1615 1615  }
1616 1616  
1617 1617  uint64_t
1618 1618  bp_get_dsize(spa_t *spa, const blkptr_t *bp)
1619 1619  {
1620 1620          uint64_t dsize = 0;
1621 1621  
1622 1622          spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER);
1623 1623  
1624 1624          for (int d = 0; d < SPA_DVAS_PER_BP; d++)
1625 1625                  dsize += dva_get_dsize_sync(spa, &bp->blk_dva[d]);
1626 1626  
1627 1627          spa_config_exit(spa, SCL_VDEV, FTAG);
1628 1628  
1629 1629          return (dsize);
1630 1630  }
1631 1631  
1632 1632  /*
1633 1633   * ==========================================================================
1634 1634   * Initialization and Termination
1635 1635   * ==========================================================================
1636 1636   */
1637 1637  
1638 1638  static int
1639 1639  spa_name_compare(const void *a1, const void *a2)
1640 1640  {
1641 1641          const spa_t *s1 = a1;
1642 1642          const spa_t *s2 = a2;
1643 1643          int s;
1644 1644  
1645 1645          s = strcmp(s1->spa_name, s2->spa_name);
1646 1646          if (s > 0)
1647 1647                  return (1);
1648 1648          if (s < 0)
1649 1649                  return (-1);
1650 1650          return (0);
1651 1651  }
1652 1652  
1653 1653  int
1654 1654  spa_busy(void)
1655 1655  {
1656 1656          return (spa_active_count);
1657 1657  }
1658 1658  
1659 1659  void
1660 1660  spa_boot_init()
1661 1661  {
1662 1662          spa_config_load();
1663 1663  }
1664 1664  
1665 1665  void
1666 1666  spa_init(int mode)
1667 1667  {
1668 1668          mutex_init(&spa_namespace_lock, NULL, MUTEX_DEFAULT, NULL);
1669 1669          mutex_init(&spa_spare_lock, NULL, MUTEX_DEFAULT, NULL);
1670 1670          mutex_init(&spa_l2cache_lock, NULL, MUTEX_DEFAULT, NULL);
1671 1671          cv_init(&spa_namespace_cv, NULL, CV_DEFAULT, NULL);
1672 1672  
1673 1673          avl_create(&spa_namespace_avl, spa_name_compare, sizeof (spa_t),
1674 1674              offsetof(spa_t, spa_avl));
1675 1675  
1676 1676          avl_create(&spa_spare_avl, spa_spare_compare, sizeof (spa_aux_t),
1677 1677              offsetof(spa_aux_t, aux_avl));
1678 1678  
1679 1679          avl_create(&spa_l2cache_avl, spa_l2cache_compare, sizeof (spa_aux_t),
1680 1680              offsetof(spa_aux_t, aux_avl));
1681 1681  
1682 1682          spa_mode_global = mode;
1683 1683  
1684 1684  #ifdef _KERNEL
1685 1685          spa_arch_init();
1686 1686  #else
1687 1687          if (spa_mode_global != FREAD && dprintf_find_string("watch")) {
1688 1688                  arc_procfd = open("/proc/self/ctl", O_WRONLY);
1689 1689                  if (arc_procfd == -1) {
1690 1690                          perror("could not enable watchpoints: "
1691 1691                              "opening /proc/self/ctl failed: ");
1692 1692                  } else {
1693 1693                          arc_watch = B_TRUE;
1694 1694                  }
1695 1695          }
1696 1696  #endif
1697 1697  
1698 1698          refcount_init();
1699 1699          unique_init();
1700 1700          space_map_init();
1701 1701          zio_init();
1702 1702          dmu_init();
1703 1703          zil_init();
1704 1704          vdev_cache_stat_init();
1705 1705          zfs_prop_init();
1706 1706          zpool_prop_init();
1707 1707          zpool_feature_init();
1708 1708          spa_config_load();
1709 1709          l2arc_start();
1710 1710  }
1711 1711  
1712 1712  void
1713 1713  spa_fini(void)
1714 1714  {
1715 1715          l2arc_stop();
1716 1716  
1717 1717          spa_evict_all();
1718 1718  
1719 1719          vdev_cache_stat_fini();
1720 1720          zil_fini();
1721 1721          dmu_fini();
1722 1722          zio_fini();
1723 1723          space_map_fini();
1724 1724          unique_fini();
1725 1725          refcount_fini();
1726 1726  
1727 1727          avl_destroy(&spa_namespace_avl);
1728 1728          avl_destroy(&spa_spare_avl);
1729 1729          avl_destroy(&spa_l2cache_avl);
1730 1730  
1731 1731          cv_destroy(&spa_namespace_cv);
1732 1732          mutex_destroy(&spa_namespace_lock);
1733 1733          mutex_destroy(&spa_spare_lock);
1734 1734          mutex_destroy(&spa_l2cache_lock);
1735 1735  }
1736 1736  
1737 1737  /*
1738 1738   * Return whether this pool has slogs. No locking needed.
1739 1739   * It's not a problem if the wrong answer is returned as it's only for
1740 1740   * performance and not correctness
1741 1741   */
1742 1742  boolean_t
1743 1743  spa_has_slogs(spa_t *spa)
1744 1744  {
1745 1745          return (spa->spa_log_class->mc_rotor != NULL);
1746 1746  }
1747 1747  
1748 1748  spa_log_state_t
1749 1749  spa_get_log_state(spa_t *spa)
1750 1750  {
1751 1751          return (spa->spa_log_state);
1752 1752  }
1753 1753  
1754 1754  void
1755 1755  spa_set_log_state(spa_t *spa, spa_log_state_t state)
1756 1756  {
1757 1757          spa->spa_log_state = state;
1758 1758  }
1759 1759  
1760 1760  boolean_t
1761 1761  spa_is_root(spa_t *spa)
1762 1762  {
1763 1763          return (spa->spa_is_root);
1764 1764  }
1765 1765  
1766 1766  boolean_t
1767 1767  spa_writeable(spa_t *spa)
1768 1768  {
1769 1769          return (!!(spa->spa_mode & FWRITE));
1770 1770  }
1771 1771  
1772 1772  int
1773 1773  spa_mode(spa_t *spa)
1774 1774  {
1775 1775          return (spa->spa_mode);
1776 1776  }
1777 1777  
1778 1778  uint64_t
1779 1779  spa_bootfs(spa_t *spa)
1780 1780  {
1781 1781          return (spa->spa_bootfs);
1782 1782  }
1783 1783  
1784 1784  uint64_t
1785 1785  spa_delegation(spa_t *spa)
1786 1786  {
1787 1787          return (spa->spa_delegation);
1788 1788  }
1789 1789  
1790 1790  objset_t *
1791 1791  spa_meta_objset(spa_t *spa)
1792 1792  {
1793 1793          return (spa->spa_meta_objset);
1794 1794  }
1795 1795  
1796 1796  enum zio_checksum
1797 1797  spa_dedup_checksum(spa_t *spa)
1798 1798  {
1799 1799          return (spa->spa_dedup_checksum);
1800 1800  }
1801 1801  
1802 1802  /*
1803 1803   * Reset pool scan stat per scan pass (or reboot).
1804 1804   */
1805 1805  void
1806 1806  spa_scan_stat_init(spa_t *spa)
1807 1807  {
1808 1808          /* data not stored on disk */
1809 1809          spa->spa_scan_pass_start = gethrestime_sec();
1810 1810          spa->spa_scan_pass_exam = 0;
1811 1811          vdev_scan_stat_init(spa->spa_root_vdev);
1812 1812  }
1813 1813  
1814 1814  /*
1815 1815   * Get scan stats for zpool status reports
1816 1816   */
1817 1817  int
1818 1818  spa_scan_get_stats(spa_t *spa, pool_scan_stat_t *ps)
1819 1819  {
1820 1820          dsl_scan_t *scn = spa->spa_dsl_pool ? spa->spa_dsl_pool->dp_scan : NULL;
1821 1821  
1822 1822          if (scn == NULL || scn->scn_phys.scn_func == POOL_SCAN_NONE)
1823 1823                  return (SET_ERROR(ENOENT));
1824 1824          bzero(ps, sizeof (pool_scan_stat_t));
1825 1825  
1826 1826          /* data stored on disk */
1827 1827          ps->pss_func = scn->scn_phys.scn_func;
1828 1828          ps->pss_start_time = scn->scn_phys.scn_start_time;
1829 1829          ps->pss_end_time = scn->scn_phys.scn_end_time;
1830 1830          ps->pss_to_examine = scn->scn_phys.scn_to_examine;
1831 1831          ps->pss_examined = scn->scn_phys.scn_examined;
1832 1832          ps->pss_to_process = scn->scn_phys.scn_to_process;
1833 1833          ps->pss_processed = scn->scn_phys.scn_processed;
1834 1834          ps->pss_errors = scn->scn_phys.scn_errors;
1835 1835          ps->pss_state = scn->scn_phys.scn_state;
1836 1836  
1837 1837          /* data not stored on disk */
1838 1838          ps->pss_pass_start = spa->spa_scan_pass_start;
1839 1839          ps->pss_pass_exam = spa->spa_scan_pass_exam;
1840 1840  
1841 1841          return (0);
1842 1842  }
1843 1843  
1844 1844  boolean_t
1845 1845  spa_debug_enabled(spa_t *spa)
1846 1846  {
1847 1847          return (spa->spa_debug);
1848 1848  }
  
    | ↓ open down ↓ | 501 lines elided | ↑ open up ↑ | 
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX