Print this page
zpool import is braindead
*** 20,29 ****
--- 20,30 ----
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012 by Delphix. All rights reserved.
* Copyright 2014 Nexenta Systems, Inc. All rights reserved.
+ * Copyright 2014 RackTop Systems.
*/
/*
* Pool import support functions.
*
*** 432,447 ****
get_configs(libzfs_handle_t *hdl, pool_list_t *pl, boolean_t active_ok)
{
pool_entry_t *pe;
vdev_entry_t *ve;
config_entry_t *ce;
! nvlist_t *ret = NULL, *config = NULL, *tmp, *nvtop, *nvroot;
nvlist_t **spares, **l2cache;
uint_t i, nspares, nl2cache;
boolean_t config_seen;
uint64_t best_txg;
! char *name, *hostname;
uint64_t guid;
uint_t children = 0;
nvlist_t **child = NULL;
uint_t holes;
uint64_t *hole_array, max_id;
--- 433,448 ----
get_configs(libzfs_handle_t *hdl, pool_list_t *pl, boolean_t active_ok)
{
pool_entry_t *pe;
vdev_entry_t *ve;
config_entry_t *ce;
! nvlist_t *ret = NULL, *config = NULL, *tmp = NULL, *nvtop, *nvroot;
nvlist_t **spares, **l2cache;
uint_t i, nspares, nl2cache;
boolean_t config_seen;
uint64_t best_txg;
! char *name, *hostname = NULL;
uint64_t guid;
uint_t children = 0;
nvlist_t **child = NULL;
uint_t holes;
uint64_t *hole_array, max_id;
*** 903,1048 ****
free(label);
*config = NULL;
return (0);
}
! typedef struct rdsk_node {
! char *rn_name;
! int rn_dfd;
! libzfs_handle_t *rn_hdl;
! nvlist_t *rn_config;
! avl_tree_t *rn_avl;
! avl_node_t rn_node;
! boolean_t rn_nozpool;
! } rdsk_node_t;
! static int
! slice_cache_compare(const void *arg1, const void *arg2)
{
! const char *nm1 = ((rdsk_node_t *)arg1)->rn_name;
! const char *nm2 = ((rdsk_node_t *)arg2)->rn_name;
! char *nm1slice, *nm2slice;
! int rv;
! /*
! * slices zero and two are the most likely to provide results,
! * so put those first
! */
! nm1slice = strstr(nm1, "s0");
! nm2slice = strstr(nm2, "s0");
! if (nm1slice && !nm2slice) {
! return (-1);
! }
! if (!nm1slice && nm2slice) {
! return (1);
! }
! nm1slice = strstr(nm1, "s2");
! nm2slice = strstr(nm2, "s2");
! if (nm1slice && !nm2slice) {
! return (-1);
! }
! if (!nm1slice && nm2slice) {
! return (1);
! }
! rv = strcmp(nm1, nm2);
! if (rv == 0)
! return (0);
! return (rv > 0 ? 1 : -1);
}
static void
! check_one_slice(avl_tree_t *r, char *diskname, uint_t partno,
! diskaddr_t size, uint_t blksz)
{
- rdsk_node_t tmpnode;
- rdsk_node_t *node;
- char sname[MAXNAMELEN];
-
- tmpnode.rn_name = &sname[0];
- (void) snprintf(tmpnode.rn_name, MAXNAMELEN, "%s%u",
- diskname, partno);
/*
* protect against division by zero for disk labels that
* contain a bogus sector size
*/
if (blksz == 0)
blksz = DEV_BSIZE;
/* too small to contain a zpool? */
! if ((size < (SPA_MINDEVSIZE / blksz)) &&
! (node = avl_find(r, &tmpnode, NULL)))
! node->rn_nozpool = B_TRUE;
}
static void
! nozpool_all_slices(avl_tree_t *r, const char *sname)
! {
! char diskname[MAXNAMELEN];
! char *ptr;
! int i;
!
! (void) strncpy(diskname, sname, MAXNAMELEN);
! if (((ptr = strrchr(diskname, 's')) == NULL) &&
! ((ptr = strrchr(diskname, 'p')) == NULL))
! return;
! ptr[0] = 's';
! ptr[1] = '\0';
! for (i = 0; i < NDKMAP; i++)
! check_one_slice(r, diskname, i, 0, 1);
! ptr[0] = 'p';
! for (i = 0; i <= FD_NUMPART; i++)
! check_one_slice(r, diskname, i, 0, 1);
! }
!
! static void
! check_slices(avl_tree_t *r, int fd, const char *sname)
{
struct extvtoc vtoc;
struct dk_gpt *gpt;
! char diskname[MAXNAMELEN];
! char *ptr;
! int i;
!
! (void) strncpy(diskname, sname, MAXNAMELEN);
! if ((ptr = strrchr(diskname, 's')) == NULL || !isdigit(ptr[1]))
! return;
! ptr[1] = '\0';
if (read_extvtoc(fd, &vtoc) >= 0) {
! for (i = 0; i < NDKMAP; i++)
! check_one_slice(r, diskname, i,
! vtoc.v_part[i].p_size, vtoc.v_sectorsz);
} else if (efi_alloc_and_read(fd, &gpt) >= 0) {
! /*
! * on x86 we'll still have leftover links that point
! * to slices s[9-15], so use NDKMAP instead
! */
! for (i = 0; i < NDKMAP; i++)
! check_one_slice(r, diskname, i,
! gpt->efi_parts[i].p_size, gpt->efi_lbasize);
/* nodes p[1-4] are never used with EFI labels */
! ptr[0] = 'p';
! for (i = 1; i <= FD_NUMPART; i++)
! check_one_slice(r, diskname, i, 0, 1);
efi_free(gpt);
}
}
static void
zpool_open_func(void *arg)
{
! rdsk_node_t *rn = arg;
struct stat64 statbuf;
nvlist_t *config;
int fd;
! if (rn->rn_nozpool)
return;
! if ((fd = openat64(rn->rn_dfd, rn->rn_name, O_RDONLY)) < 0) {
! /* symlink to a device that's no longer there */
! if (errno == ENOENT)
! nozpool_all_slices(rn->rn_avl, rn->rn_name);
return;
}
/*
* Ignore failed stats. We only want regular
* files, character devs and block devs.
--- 904,1032 ----
free(label);
*config = NULL;
return (0);
}
! typedef struct slice_node {
! char *sn_name;
! nvlist_t *sn_config;
! boolean_t sn_nozpool;
! int sn_partno;
! struct disk_node *sn_disk;
! struct slice_node *sn_next;
! } slice_node_t;
!
! typedef struct disk_node {
! char *dn_name;
! int dn_dfd;
! libzfs_handle_t *dn_hdl;
! nvlist_t *dn_config;
! struct slice_node *dn_slices;
! struct disk_node *dn_next;
! } disk_node_t;
!
! #ifdef sparc
! #define WHOLE_DISK "s2"
! #else
! #define WHOLE_DISK "p0"
! #endif
! /*
! * This function splits the slice from the device name. Currently it supports
! * VTOC slices (s[0-16]) and DOS/FDISK partitions (p[0-4]). If this function
! * is updated to support other slice types then the check_slices function will
! * also need to be updated.
! */
! static boolean_t
! get_disk_slice(libzfs_handle_t *hdl, char *disk, char **slice, int *partno)
{
! char *p;
! if ((p = strrchr(disk, 's')) == NULL &&
! (p = strrchr(disk, 'p')) == NULL)
! return (B_FALSE);
! if (!isdigit(p[1]))
! return (B_FALSE);
!
! *slice = zfs_strdup(hdl, p);
! *partno = atoi(p + 1);
!
! p = '\0';
! return (B_TRUE);
}
static void
! check_one_slice(slice_node_t *slice, diskaddr_t size, uint_t blksz)
{
/*
* protect against division by zero for disk labels that
* contain a bogus sector size
*/
if (blksz == 0)
blksz = DEV_BSIZE;
/* too small to contain a zpool? */
! if (size < (SPA_MINDEVSIZE / blksz))
! slice->sn_nozpool = B_TRUE;
}
static void
! check_slices(slice_node_t *slices, int fd)
{
struct extvtoc vtoc;
struct dk_gpt *gpt;
! slice_node_t *slice;
! diskaddr_t size;
if (read_extvtoc(fd, &vtoc) >= 0) {
! for (slice = slices; slice; slice = slice->sn_next) {
! if (slice->sn_name[0] == 'p')
! continue;
! size = vtoc.v_part[slice->sn_partno].p_size;
! check_one_slice(slice, size, vtoc.v_sectorsz);
! }
} else if (efi_alloc_and_read(fd, &gpt) >= 0) {
! for (slice = slices; slice; slice = slice->sn_next) {
/* nodes p[1-4] are never used with EFI labels */
! if (slice->sn_name[0] == 'p') {
! if (slice->sn_partno > 0)
! slice->sn_nozpool = B_TRUE;
! continue;
! }
! size = gpt->efi_parts[slice->sn_partno].p_size;
! check_one_slice(slice, size, gpt->efi_lbasize);
! }
efi_free(gpt);
}
}
static void
zpool_open_func(void *arg)
{
! disk_node_t *disk = arg;
struct stat64 statbuf;
+ slice_node_t *slice;
nvlist_t *config;
+ char *devname;
int fd;
! /*
! * If the disk has no slices we open it directly, otherwise we try
! * to open the whole disk slice.
! */
! if (disk->dn_slices == NULL)
! devname = strdup(disk->dn_name);
! else
! (void) asprintf(&devname, "%s" WHOLE_DISK, disk->dn_name);
!
! if (devname == NULL) {
! (void) no_memory(disk->dn_hdl);
return;
! }
!
! if ((fd = openat64(disk->dn_dfd, devname, O_RDONLY)) < 0) {
! free(devname);
return;
}
/*
* Ignore failed stats. We only want regular
* files, character devs and block devs.
*** 1050,1085 ****
if (fstat64(fd, &statbuf) != 0 ||
(!S_ISREG(statbuf.st_mode) &&
!S_ISCHR(statbuf.st_mode) &&
!S_ISBLK(statbuf.st_mode))) {
(void) close(fd);
return;
}
/* this file is too small to hold a zpool */
! if (S_ISREG(statbuf.st_mode) &&
! statbuf.st_size < SPA_MINDEVSIZE) {
(void) close(fd);
return;
! } else if (!S_ISREG(statbuf.st_mode)) {
/*
* Try to read the disk label first so we don't have to
* open a bunch of minor nodes that can't have a zpool.
*/
! check_slices(rn->rn_avl, fd, rn->rn_name);
}
! if ((zpool_read_label(fd, &config)) != 0) {
(void) close(fd);
! (void) no_memory(rn->rn_hdl);
return;
}
(void) close(fd);
! rn->rn_config = config;
! if (config != NULL) {
! assert(rn->rn_nozpool == B_FALSE);
}
}
/*
* Given a file descriptor, clear (zero) the label information. This function
--- 1034,1112 ----
if (fstat64(fd, &statbuf) != 0 ||
(!S_ISREG(statbuf.st_mode) &&
!S_ISCHR(statbuf.st_mode) &&
!S_ISBLK(statbuf.st_mode))) {
(void) close(fd);
+ free(devname);
return;
}
/* this file is too small to hold a zpool */
! if (S_ISREG(statbuf.st_mode) && statbuf.st_size < SPA_MINDEVSIZE) {
(void) close(fd);
+ free(devname);
return;
! } else if (!S_ISREG(statbuf.st_mode) && disk->dn_slices != NULL) {
/*
* Try to read the disk label first so we don't have to
* open a bunch of minor nodes that can't have a zpool.
*/
! check_slices(disk->dn_slices, fd);
}
! /*
! * If we're working with the device directly (it has no slices)
! * then we can just read the config and we're done.
! */
! if (disk->dn_slices == NULL) {
! if (zpool_read_label(fd, &config) != 0) {
! (void) no_memory(disk->dn_hdl);
(void) close(fd);
! free(devname);
return;
}
+ disk->dn_config = config;
+ (void) close(fd);
+ free(devname);
+ return;
+ }
+
(void) close(fd);
+ free(devname);
+
+ /*
+ * Go through and read the label off each slice. The check_slices
+ * function has already performed some basic checks and set the
+ * sn_nozpool flag on any slices which just can't contain a zpool.
+ */
+ for (slice = disk->dn_slices; slice; slice = slice->sn_next) {
+ if (slice->sn_nozpool == B_TRUE)
+ continue;
+
+ (void) asprintf(&devname, "%s%s", disk->dn_name,
+ slice->sn_name);
+
+ if (devname == NULL) {
+ (void) no_memory(disk->dn_hdl);
+ free(devname);
+ return;
+ }
+ if ((fd = openat64(disk->dn_dfd, devname, O_RDONLY)) < 0) {
+ free(devname);
+ continue;
+ }
+
+ if ((zpool_read_label(fd, &config)) != 0) {
+ (void) no_memory(disk->dn_hdl);
+ (void) close(fd);
+ free(devname);
+ return;
+ }
! slice->sn_config = config;
! (void) close(fd);
! free(devname);
}
}
/*
* Given a file descriptor, clear (zero) the label information. This function
*** 1132,1143 ****
pool_list_t pools = { 0 };
pool_entry_t *pe, *penext;
vdev_entry_t *ve, *venext;
config_entry_t *ce, *cenext;
name_entry_t *ne, *nenext;
- avl_tree_t slice_cache;
- rdsk_node_t *slice;
void *cookie;
if (dirs == 0) {
dirs = 1;
dir = &default_dir;
--- 1159,1168 ----
*** 1150,1159 ****
--- 1175,1186 ----
*/
for (i = 0; i < dirs; i++) {
tpool_t *t;
char *rdsk;
int dfd;
+ disk_node_t *disks = NULL, *curdisk = NULL;
+ slice_node_t *curslice = NULL;
/* use realpath to normalize the path */
if (realpath(dir[i], path) == 0) {
(void) zfs_error_fmt(hdl, EZFS_BADPATH,
dgettext(TEXT_DOMAIN, "cannot open '%s'"), dir[i]);
*** 1181,1234 ****
dgettext(TEXT_DOMAIN, "cannot open '%s'"),
rdsk);
goto error;
}
- avl_create(&slice_cache, slice_cache_compare,
- sizeof (rdsk_node_t), offsetof(rdsk_node_t, rn_node));
/*
* This is not MT-safe, but we have no MT consumers of libzfs
*/
while ((dp = readdir64(dirp)) != NULL) {
! const char *name = dp->d_name;
! if (name[0] == '.' &&
! (name[1] == 0 || (name[1] == '.' && name[2] == 0)))
continue;
! slice = zfs_alloc(hdl, sizeof (rdsk_node_t));
! slice->rn_name = zfs_strdup(hdl, name);
! slice->rn_avl = &slice_cache;
! slice->rn_dfd = dfd;
! slice->rn_hdl = hdl;
! slice->rn_nozpool = B_FALSE;
! avl_add(&slice_cache, slice);
}
/*
* create a thread pool to do all of this in parallel;
! * rn_nozpool is not protected, so this is racy in that
! * multiple tasks could decide that the same slice can
! * not hold a zpool, which is benign. Also choose
! * double the number of processors; we hold a lot of
! * locks in the kernel, so going beyond this doesn't
! * buy us much.
*/
t = tpool_create(1, 2 * sysconf(_SC_NPROCESSORS_ONLN),
0, NULL);
! for (slice = avl_first(&slice_cache); slice;
! (slice = avl_walk(&slice_cache, slice,
! AVL_AFTER)))
! (void) tpool_dispatch(t, zpool_open_func, slice);
tpool_wait(t);
tpool_destroy(t);
! cookie = NULL;
! while ((slice = avl_destroy_nodes(&slice_cache,
! &cookie)) != NULL) {
! if (slice->rn_config != NULL) {
! nvlist_t *config = slice->rn_config;
boolean_t matched = B_TRUE;
if (iarg->poolname != NULL) {
char *pname;
matched = nvlist_lookup_string(config,
ZPOOL_CONFIG_POOL_NAME,
--- 1208,1314 ----
dgettext(TEXT_DOMAIN, "cannot open '%s'"),
rdsk);
goto error;
}
/*
* This is not MT-safe, but we have no MT consumers of libzfs
*/
while ((dp = readdir64(dirp)) != NULL) {
! boolean_t isslice;
! char *name, *sname;
! int partno;
!
! if (dp->d_name[0] == '.' && (dp->d_name[1] == '\0' ||
! (dp->d_name[1] == '.' && dp->d_name[2] == '\0')))
continue;
! name = zfs_strdup(hdl, dp->d_name);
!
! /*
! * We create a new disk node every time we encounter
! * a disk with no slices or the disk name changes.
! */
! isslice = get_disk_slice(hdl, name, &sname, &partno);
! if (isslice == B_FALSE || curdisk == NULL ||
! strcmp(curdisk->dn_name, name) != 0) {
! disk_node_t *newdisk;
!
! newdisk = zfs_alloc(hdl, sizeof (disk_node_t));
! newdisk->dn_name = name;
! newdisk->dn_dfd = dfd;
! newdisk->dn_hdl = hdl;
!
! if (curdisk != NULL)
! curdisk->dn_next = newdisk;
! else
! disks = newdisk;
!
! curdisk = newdisk;
! curslice = NULL;
! }
!
! assert(curdisk != NULL);
!
! /*
! * Add a new slice node to the current disk node.
! * We do this for all slices including zero slices.
! */
! if (isslice == B_TRUE) {
! slice_node_t *newslice;
!
! newslice = zfs_alloc(hdl,
! sizeof (slice_node_t));
! newslice->sn_name = sname;
! newslice->sn_partno = partno;
! newslice->sn_disk = curdisk;
!
! if (curslice != NULL)
! curslice->sn_next = newslice;
! else
! curdisk->dn_slices = newslice;
!
! curslice = newslice;
! }
}
/*
* create a thread pool to do all of this in parallel;
! * choose double the number of processors; we hold a lot
! * of locks in the kernel, so going beyond this doesn't
! * buy us much. Each disk (and any slices it might have)
! * is handled inside a single thread.
*/
t = tpool_create(1, 2 * sysconf(_SC_NPROCESSORS_ONLN),
0, NULL);
! for (curdisk = disks; curdisk; curdisk = curdisk->dn_next)
! (void) tpool_dispatch(t, zpool_open_func, curdisk);
tpool_wait(t);
tpool_destroy(t);
! curdisk = disks;
! while (curdisk != NULL) {
! nvlist_t *config;
! disk_node_t *prevdisk;
!
! /*
! * If the device has slices we examine the config on
! * each of those. If not we use the config directly
! * from the device instead.
! */
! curslice = curdisk->dn_slices;
!
! if (curslice != NULL)
! config = curslice->sn_config;
! else
! config = curdisk->dn_config;
!
! do {
boolean_t matched = B_TRUE;
+ if (config == NULL)
+ goto next;
+
if (iarg->poolname != NULL) {
char *pname;
matched = nvlist_lookup_string(config,
ZPOOL_CONFIG_POOL_NAME,
*** 1240,1263 ****
matched = nvlist_lookup_uint64(config,
ZPOOL_CONFIG_POOL_GUID,
&this_guid) == 0 &&
iarg->guid == this_guid;
}
if (!matched) {
nvlist_free(config);
! config = NULL;
! continue;
}
/* use the non-raw path for the config */
! (void) strlcpy(end, slice->rn_name, pathleft);
if (add_config(hdl, &pools, path, config) != 0)
goto error;
}
! free(slice->rn_name);
! free(slice);
}
- avl_destroy(&slice_cache);
(void) closedir(dirp);
dirp = NULL;
}
--- 1320,1375 ----
matched = nvlist_lookup_uint64(config,
ZPOOL_CONFIG_POOL_GUID,
&this_guid) == 0 &&
iarg->guid == this_guid;
}
+
if (!matched) {
nvlist_free(config);
! goto next;
}
+
/* use the non-raw path for the config */
! if (curslice != NULL)
! (void) snprintf(end, pathleft, "%s%s",
! curdisk->dn_name,
! curslice->sn_name);
! else
! (void) strlcpy(end, curdisk->dn_name,
! pathleft);
if (add_config(hdl, &pools, path, config) != 0)
goto error;
+
+ next:
+ /*
+ * If we're looking at slices free this one
+ * and go move onto the next.
+ */
+ if (curslice != NULL) {
+ slice_node_t *prevslice;
+
+ prevslice = curslice;
+ curslice = curslice->sn_next;
+
+ free(prevslice->sn_name);
+ free(prevslice);
+
+ if (curslice != NULL) {
+ config = curslice->sn_config;
+ }
}
! } while (curslice != NULL);
!
! /*
! * Free this disk and move onto the next one.
! */
! prevdisk = curdisk;
! curdisk = curdisk->dn_next;
!
! free(prevdisk->dn_name);
! free(prevdisk);
}
(void) closedir(dirp);
dirp = NULL;
}