233 static kmutex_t spa_l2cache_lock;
234 static avl_tree_t spa_l2cache_avl;
235
236 kmem_cache_t *spa_buffer_pool;
237 int spa_mode_global;
238
239 #ifdef ZFS_DEBUG
240 /* Everything except dprintf and spa is on by default in debug builds */
241 int zfs_flags = ~(ZFS_DEBUG_DPRINTF | ZFS_DEBUG_SPA);
242 #else
243 int zfs_flags = 0;
244 #endif
245
246 /*
247 * zfs_recover can be set to nonzero to attempt to recover from
248 * otherwise-fatal errors, typically caused by on-disk corruption. When
249 * set, calls to zfs_panic_recover() will turn into warning messages.
250 */
251 int zfs_recover = 0;
252
253 extern int zfs_txg_synctime_ms;
254
255 /*
256 * Expiration time in units of zfs_txg_synctime_ms. This value has two
257 * meanings. First it is used to determine when the spa_deadman logic
258 * should fire. By default the spa_deadman will fire if spa_sync has
259 * not completed in 1000 * zfs_txg_synctime_ms (i.e. 1000 seconds).
260 * Secondly, the value determines if an I/O is considered "hung".
261 * Any I/O that has not completed in zfs_deadman_synctime is considered
262 * "hung" resulting in a system panic.
263 */
264 uint64_t zfs_deadman_synctime = 1000ULL;
265
266 /*
267 * Override the zfs deadman behavior via /etc/system. By default the
268 * deadman is enabled except on VMware and sparc deployments.
269 */
270 int zfs_deadman_enabled = -1;
271
272
273 /*
274 * ==========================================================================
275 * SPA config locking
276 * ==========================================================================
277 */
278 static void
279 spa_config_lock_init(spa_t *spa)
280 {
281 for (int i = 0; i < SCL_LOCKS; i++) {
282 spa_config_lock_t *scl = &spa->spa_config_lock[i];
283 mutex_init(&scl->scl_lock, NULL, MUTEX_DEFAULT, NULL);
284 cv_init(&scl->scl_cv, NULL, CV_DEFAULT, NULL);
285 refcount_create_untracked(&scl->scl_count);
286 scl->scl_writer = NULL;
287 scl->scl_write_wanted = 0;
288 }
289 }
290
291 static void
482 cv_init(&spa->spa_async_cv, NULL, CV_DEFAULT, NULL);
483 cv_init(&spa->spa_proc_cv, NULL, CV_DEFAULT, NULL);
484 cv_init(&spa->spa_scrub_io_cv, NULL, CV_DEFAULT, NULL);
485 cv_init(&spa->spa_suspend_cv, NULL, CV_DEFAULT, NULL);
486
487 for (int t = 0; t < TXG_SIZE; t++)
488 bplist_create(&spa->spa_free_bplist[t]);
489
490 (void) strlcpy(spa->spa_name, name, sizeof (spa->spa_name));
491 spa->spa_state = POOL_STATE_UNINITIALIZED;
492 spa->spa_freeze_txg = UINT64_MAX;
493 spa->spa_final_txg = UINT64_MAX;
494 spa->spa_load_max_txg = UINT64_MAX;
495 spa->spa_proc = &p0;
496 spa->spa_proc_state = SPA_PROC_NONE;
497
498 hdlr.cyh_func = spa_deadman;
499 hdlr.cyh_arg = spa;
500 hdlr.cyh_level = CY_LOW_LEVEL;
501
502 spa->spa_deadman_synctime = MSEC2NSEC(zfs_deadman_synctime *
503 zfs_txg_synctime_ms);
504
505 /*
506 * This determines how often we need to check for hung I/Os after
507 * the cyclic has already fired. Since checking for hung I/Os is
508 * an expensive operation we don't want to check too frequently.
509 * Instead wait for 5 synctimes before checking again.
510 */
511 when.cyt_interval = MSEC2NSEC(5 * zfs_txg_synctime_ms);
512 when.cyt_when = CY_INFINITY;
513 mutex_enter(&cpu_lock);
514 spa->spa_deadman_cycid = cyclic_add(&hdlr, &when);
515 mutex_exit(&cpu_lock);
516
517 refcount_create(&spa->spa_refcount);
518 spa_config_lock_init(spa);
519
520 avl_add(&spa_namespace_avl, spa);
521
522 /*
523 * Set the alternate root, if there is one.
524 */
525 if (altroot) {
526 spa->spa_root = spa_strdup(altroot);
527 spa_active_count++;
528 }
529
530 /*
531 * Every pool starts with the default cachefile
1482 {
1483 return (spa->spa_state);
1484 }
1485
1486 spa_load_state_t
1487 spa_load_state(spa_t *spa)
1488 {
1489 return (spa->spa_load_state);
1490 }
1491
1492 uint64_t
1493 spa_freeze_txg(spa_t *spa)
1494 {
1495 return (spa->spa_freeze_txg);
1496 }
1497
1498 /* ARGSUSED */
1499 uint64_t
1500 spa_get_asize(spa_t *spa, uint64_t lsize)
1501 {
1502 /*
1503 * The worst case is single-sector max-parity RAID-Z blocks, in which
1504 * case the space requirement is exactly (VDEV_RAIDZ_MAXPARITY + 1)
1505 * times the size; so just assume that. Add to this the fact that
1506 * we can have up to 3 DVAs per bp, and one more factor of 2 because
1507 * the block may be dittoed with up to 3 DVAs by ddt_sync().
1508 */
1509 return (lsize * (VDEV_RAIDZ_MAXPARITY + 1) * SPA_DVAS_PER_BP * 2);
1510 }
1511
1512 uint64_t
1513 spa_get_dspace(spa_t *spa)
1514 {
1515 return (spa->spa_dspace);
1516 }
1517
1518 void
1519 spa_update_dspace(spa_t *spa)
1520 {
1521 spa->spa_dspace = metaslab_class_get_dspace(spa_normal_class(spa)) +
1522 ddt_get_dedup_dspace(spa);
1523 }
1524
1525 /*
1526 * Return the failure mode that has been set to this pool. The default
1527 * behavior will be to block all I/Os when a complete failure occurs.
1528 */
1529 uint8_t
|
233 static kmutex_t spa_l2cache_lock;
234 static avl_tree_t spa_l2cache_avl;
235
236 kmem_cache_t *spa_buffer_pool;
237 int spa_mode_global;
238
239 #ifdef ZFS_DEBUG
240 /* Everything except dprintf and spa is on by default in debug builds */
241 int zfs_flags = ~(ZFS_DEBUG_DPRINTF | ZFS_DEBUG_SPA);
242 #else
243 int zfs_flags = 0;
244 #endif
245
246 /*
247 * zfs_recover can be set to nonzero to attempt to recover from
248 * otherwise-fatal errors, typically caused by on-disk corruption. When
249 * set, calls to zfs_panic_recover() will turn into warning messages.
250 */
251 int zfs_recover = 0;
252
253 /*
254 * Expiration time in milliseconds. This value has two meanings. First it is
255 * used to determine when the spa_deadman() logic should fire. By default the
256 * spa_deadman() will fire if spa_sync() has not completed in 1000 seconds.
257 * Secondly, the value determines if an I/O is considered "hung". Any I/O that
258 * has not completed in zfs_deadman_synctime_ms is considered "hung" resulting
259 * in a system panic.
260 */
261 uint64_t zfs_deadman_synctime_ms = 1000000ULL;
262
263 /*
264 * Check time in milliseconds. This defines the frequency at which we check
265 * for hung I/O.
266 */
267 uint64_t zfs_deadman_checktime_ms = 5000ULL;
268
269 /*
270 * Override the zfs deadman behavior via /etc/system. By default the
271 * deadman is enabled except on VMware and sparc deployments.
272 */
273 int zfs_deadman_enabled = -1;
274
275 /*
276 * The worst case is single-sector max-parity RAID-Z blocks, in which
277 * case the space requirement is exactly (VDEV_RAIDZ_MAXPARITY + 1)
278 * times the size; so just assume that. Add to this the fact that
279 * we can have up to 3 DVAs per bp, and one more factor of 2 because
280 * the block may be dittoed with up to 3 DVAs by ddt_sync(). All together,
281 * the worst case is:
282 * (VDEV_RAIDZ_MAXPARITY + 1) * SPA_DVAS_PER_BP * 2 == 24
283 */
284 int spa_asize_inflation = 24;
285
286 /*
287 * ==========================================================================
288 * SPA config locking
289 * ==========================================================================
290 */
291 static void
292 spa_config_lock_init(spa_t *spa)
293 {
294 for (int i = 0; i < SCL_LOCKS; i++) {
295 spa_config_lock_t *scl = &spa->spa_config_lock[i];
296 mutex_init(&scl->scl_lock, NULL, MUTEX_DEFAULT, NULL);
297 cv_init(&scl->scl_cv, NULL, CV_DEFAULT, NULL);
298 refcount_create_untracked(&scl->scl_count);
299 scl->scl_writer = NULL;
300 scl->scl_write_wanted = 0;
301 }
302 }
303
304 static void
495 cv_init(&spa->spa_async_cv, NULL, CV_DEFAULT, NULL);
496 cv_init(&spa->spa_proc_cv, NULL, CV_DEFAULT, NULL);
497 cv_init(&spa->spa_scrub_io_cv, NULL, CV_DEFAULT, NULL);
498 cv_init(&spa->spa_suspend_cv, NULL, CV_DEFAULT, NULL);
499
500 for (int t = 0; t < TXG_SIZE; t++)
501 bplist_create(&spa->spa_free_bplist[t]);
502
503 (void) strlcpy(spa->spa_name, name, sizeof (spa->spa_name));
504 spa->spa_state = POOL_STATE_UNINITIALIZED;
505 spa->spa_freeze_txg = UINT64_MAX;
506 spa->spa_final_txg = UINT64_MAX;
507 spa->spa_load_max_txg = UINT64_MAX;
508 spa->spa_proc = &p0;
509 spa->spa_proc_state = SPA_PROC_NONE;
510
511 hdlr.cyh_func = spa_deadman;
512 hdlr.cyh_arg = spa;
513 hdlr.cyh_level = CY_LOW_LEVEL;
514
515 spa->spa_deadman_synctime = MSEC2NSEC(zfs_deadman_synctime_ms);
516
517 /*
518 * This determines how often we need to check for hung I/Os after
519 * the cyclic has already fired. Since checking for hung I/Os is
520 * an expensive operation we don't want to check too frequently.
521 * Instead wait for 5 seconds before checking again.
522 */
523 when.cyt_interval = MSEC2NSEC(zfs_deadman_checktime_ms);
524 when.cyt_when = CY_INFINITY;
525 mutex_enter(&cpu_lock);
526 spa->spa_deadman_cycid = cyclic_add(&hdlr, &when);
527 mutex_exit(&cpu_lock);
528
529 refcount_create(&spa->spa_refcount);
530 spa_config_lock_init(spa);
531
532 avl_add(&spa_namespace_avl, spa);
533
534 /*
535 * Set the alternate root, if there is one.
536 */
537 if (altroot) {
538 spa->spa_root = spa_strdup(altroot);
539 spa_active_count++;
540 }
541
542 /*
543 * Every pool starts with the default cachefile
1494 {
1495 return (spa->spa_state);
1496 }
1497
1498 spa_load_state_t
1499 spa_load_state(spa_t *spa)
1500 {
1501 return (spa->spa_load_state);
1502 }
1503
1504 uint64_t
1505 spa_freeze_txg(spa_t *spa)
1506 {
1507 return (spa->spa_freeze_txg);
1508 }
1509
1510 /* ARGSUSED */
1511 uint64_t
1512 spa_get_asize(spa_t *spa, uint64_t lsize)
1513 {
1514 return (lsize * spa_asize_inflation);
1515 }
1516
1517 uint64_t
1518 spa_get_dspace(spa_t *spa)
1519 {
1520 return (spa->spa_dspace);
1521 }
1522
1523 void
1524 spa_update_dspace(spa_t *spa)
1525 {
1526 spa->spa_dspace = metaslab_class_get_dspace(spa_normal_class(spa)) +
1527 ddt_get_dedup_dspace(spa);
1528 }
1529
1530 /*
1531 * Return the failure mode that has been set to this pool. The default
1532 * behavior will be to block all I/Os when a complete failure occurs.
1533 */
1534 uint8_t
|