Print this page
zpool import is braindead

*** 20,29 **** --- 20,30 ---- */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2012 by Delphix. All rights reserved. * Copyright 2014 Nexenta Systems, Inc. All rights reserved. + * Copyright 2014 RackTop Systems. */ /* * Pool import support functions. *
*** 432,447 **** get_configs(libzfs_handle_t *hdl, pool_list_t *pl, boolean_t active_ok) { pool_entry_t *pe; vdev_entry_t *ve; config_entry_t *ce; ! nvlist_t *ret = NULL, *config = NULL, *tmp, *nvtop, *nvroot; nvlist_t **spares, **l2cache; uint_t i, nspares, nl2cache; boolean_t config_seen; uint64_t best_txg; ! char *name, *hostname; uint64_t guid; uint_t children = 0; nvlist_t **child = NULL; uint_t holes; uint64_t *hole_array, max_id; --- 433,448 ---- get_configs(libzfs_handle_t *hdl, pool_list_t *pl, boolean_t active_ok) { pool_entry_t *pe; vdev_entry_t *ve; config_entry_t *ce; ! nvlist_t *ret = NULL, *config = NULL, *tmp = NULL, *nvtop, *nvroot; nvlist_t **spares, **l2cache; uint_t i, nspares, nl2cache; boolean_t config_seen; uint64_t best_txg; ! char *name, *hostname = NULL; uint64_t guid; uint_t children = 0; nvlist_t **child = NULL; uint_t holes; uint64_t *hole_array, max_id;
*** 903,1048 **** free(label); *config = NULL; return (0); } ! typedef struct rdsk_node { ! char *rn_name; ! int rn_dfd; ! libzfs_handle_t *rn_hdl; ! nvlist_t *rn_config; ! avl_tree_t *rn_avl; ! avl_node_t rn_node; ! boolean_t rn_nozpool; ! } rdsk_node_t; ! static int ! slice_cache_compare(const void *arg1, const void *arg2) { ! const char *nm1 = ((rdsk_node_t *)arg1)->rn_name; ! const char *nm2 = ((rdsk_node_t *)arg2)->rn_name; ! char *nm1slice, *nm2slice; ! int rv; ! /* ! * slices zero and two are the most likely to provide results, ! * so put those first ! */ ! nm1slice = strstr(nm1, "s0"); ! nm2slice = strstr(nm2, "s0"); ! if (nm1slice && !nm2slice) { ! return (-1); ! } ! if (!nm1slice && nm2slice) { ! return (1); ! } ! nm1slice = strstr(nm1, "s2"); ! nm2slice = strstr(nm2, "s2"); ! if (nm1slice && !nm2slice) { ! return (-1); ! } ! if (!nm1slice && nm2slice) { ! return (1); ! } ! rv = strcmp(nm1, nm2); ! if (rv == 0) ! return (0); ! return (rv > 0 ? 1 : -1); } static void ! check_one_slice(avl_tree_t *r, char *diskname, uint_t partno, ! diskaddr_t size, uint_t blksz) { - rdsk_node_t tmpnode; - rdsk_node_t *node; - char sname[MAXNAMELEN]; - - tmpnode.rn_name = &sname[0]; - (void) snprintf(tmpnode.rn_name, MAXNAMELEN, "%s%u", - diskname, partno); /* * protect against division by zero for disk labels that * contain a bogus sector size */ if (blksz == 0) blksz = DEV_BSIZE; /* too small to contain a zpool? */ ! if ((size < (SPA_MINDEVSIZE / blksz)) && ! (node = avl_find(r, &tmpnode, NULL))) ! node->rn_nozpool = B_TRUE; } static void ! nozpool_all_slices(avl_tree_t *r, const char *sname) ! { ! char diskname[MAXNAMELEN]; ! char *ptr; ! int i; ! ! (void) strncpy(diskname, sname, MAXNAMELEN); ! if (((ptr = strrchr(diskname, 's')) == NULL) && ! ((ptr = strrchr(diskname, 'p')) == NULL)) ! return; ! ptr[0] = 's'; ! ptr[1] = '\0'; ! for (i = 0; i < NDKMAP; i++) ! check_one_slice(r, diskname, i, 0, 1); ! ptr[0] = 'p'; ! for (i = 0; i <= FD_NUMPART; i++) ! check_one_slice(r, diskname, i, 0, 1); ! } ! ! static void ! check_slices(avl_tree_t *r, int fd, const char *sname) { struct extvtoc vtoc; struct dk_gpt *gpt; ! char diskname[MAXNAMELEN]; ! char *ptr; ! int i; ! ! (void) strncpy(diskname, sname, MAXNAMELEN); ! if ((ptr = strrchr(diskname, 's')) == NULL || !isdigit(ptr[1])) ! return; ! ptr[1] = '\0'; if (read_extvtoc(fd, &vtoc) >= 0) { ! for (i = 0; i < NDKMAP; i++) ! check_one_slice(r, diskname, i, ! vtoc.v_part[i].p_size, vtoc.v_sectorsz); } else if (efi_alloc_and_read(fd, &gpt) >= 0) { ! /* ! * on x86 we'll still have leftover links that point ! * to slices s[9-15], so use NDKMAP instead ! */ ! for (i = 0; i < NDKMAP; i++) ! check_one_slice(r, diskname, i, ! gpt->efi_parts[i].p_size, gpt->efi_lbasize); /* nodes p[1-4] are never used with EFI labels */ ! ptr[0] = 'p'; ! for (i = 1; i <= FD_NUMPART; i++) ! check_one_slice(r, diskname, i, 0, 1); efi_free(gpt); } } static void zpool_open_func(void *arg) { ! rdsk_node_t *rn = arg; struct stat64 statbuf; nvlist_t *config; int fd; ! if (rn->rn_nozpool) return; ! if ((fd = openat64(rn->rn_dfd, rn->rn_name, O_RDONLY)) < 0) { ! /* symlink to a device that's no longer there */ ! if (errno == ENOENT) ! nozpool_all_slices(rn->rn_avl, rn->rn_name); return; } /* * Ignore failed stats. We only want regular * files, character devs and block devs. --- 904,1032 ---- free(label); *config = NULL; return (0); } ! typedef struct slice_node { ! char *sn_name; ! nvlist_t *sn_config; ! boolean_t sn_nozpool; ! int sn_partno; ! struct disk_node *sn_disk; ! struct slice_node *sn_next; ! } slice_node_t; ! ! typedef struct disk_node { ! char *dn_name; ! int dn_dfd; ! libzfs_handle_t *dn_hdl; ! nvlist_t *dn_config; ! struct slice_node *dn_slices; ! struct disk_node *dn_next; ! } disk_node_t; ! ! #ifdef sparc ! #define WHOLE_DISK "s2" ! #else ! #define WHOLE_DISK "p0" ! #endif ! /* ! * This function splits the slice from the device name. Currently it supports ! * VTOC slices (s[0-16]) and DOS/FDISK partitions (p[0-4]). If this function ! * is updated to support other slice types then the check_slices function will ! * also need to be updated. ! */ ! static boolean_t ! get_disk_slice(libzfs_handle_t *hdl, char *disk, char **slice, int *partno) { ! char *p; ! if ((p = strrchr(disk, 's')) == NULL && ! (p = strrchr(disk, 'p')) == NULL) ! return (B_FALSE); ! if (!isdigit(p[1])) ! return (B_FALSE); ! ! *slice = zfs_strdup(hdl, p); ! *partno = atoi(p + 1); ! ! p = '\0'; ! return (B_TRUE); } static void ! check_one_slice(slice_node_t *slice, diskaddr_t size, uint_t blksz) { /* * protect against division by zero for disk labels that * contain a bogus sector size */ if (blksz == 0) blksz = DEV_BSIZE; /* too small to contain a zpool? */ ! if (size < (SPA_MINDEVSIZE / blksz)) ! slice->sn_nozpool = B_TRUE; } static void ! check_slices(slice_node_t *slices, int fd) { struct extvtoc vtoc; struct dk_gpt *gpt; ! slice_node_t *slice; ! diskaddr_t size; if (read_extvtoc(fd, &vtoc) >= 0) { ! for (slice = slices; slice; slice = slice->sn_next) { ! if (slice->sn_name[0] == 'p') ! continue; ! size = vtoc.v_part[slice->sn_partno].p_size; ! check_one_slice(slice, size, vtoc.v_sectorsz); ! } } else if (efi_alloc_and_read(fd, &gpt) >= 0) { ! for (slice = slices; slice; slice = slice->sn_next) { /* nodes p[1-4] are never used with EFI labels */ ! if (slice->sn_name[0] == 'p') { ! if (slice->sn_partno > 0) ! slice->sn_nozpool = B_TRUE; ! continue; ! } ! size = gpt->efi_parts[slice->sn_partno].p_size; ! check_one_slice(slice, size, gpt->efi_lbasize); ! } efi_free(gpt); } } static void zpool_open_func(void *arg) { ! disk_node_t *disk = arg; struct stat64 statbuf; + slice_node_t *slice; nvlist_t *config; + char *devname; int fd; ! /* ! * If the disk has no slices we open it directly, otherwise we try ! * to open the whole disk slice. ! */ ! if (disk->dn_slices == NULL) ! devname = strdup(disk->dn_name); ! else ! (void) asprintf(&devname, "%s" WHOLE_DISK, disk->dn_name); ! ! if (devname == NULL) { ! (void) no_memory(disk->dn_hdl); return; ! } ! ! if ((fd = openat64(disk->dn_dfd, devname, O_RDONLY)) < 0) { ! free(devname); return; } /* * Ignore failed stats. We only want regular * files, character devs and block devs.
*** 1050,1085 **** if (fstat64(fd, &statbuf) != 0 || (!S_ISREG(statbuf.st_mode) && !S_ISCHR(statbuf.st_mode) && !S_ISBLK(statbuf.st_mode))) { (void) close(fd); return; } /* this file is too small to hold a zpool */ ! if (S_ISREG(statbuf.st_mode) && ! statbuf.st_size < SPA_MINDEVSIZE) { (void) close(fd); return; ! } else if (!S_ISREG(statbuf.st_mode)) { /* * Try to read the disk label first so we don't have to * open a bunch of minor nodes that can't have a zpool. */ ! check_slices(rn->rn_avl, fd, rn->rn_name); } ! if ((zpool_read_label(fd, &config)) != 0) { (void) close(fd); ! (void) no_memory(rn->rn_hdl); return; } (void) close(fd); ! rn->rn_config = config; ! if (config != NULL) { ! assert(rn->rn_nozpool == B_FALSE); } } /* * Given a file descriptor, clear (zero) the label information. This function --- 1034,1112 ---- if (fstat64(fd, &statbuf) != 0 || (!S_ISREG(statbuf.st_mode) && !S_ISCHR(statbuf.st_mode) && !S_ISBLK(statbuf.st_mode))) { (void) close(fd); + free(devname); return; } /* this file is too small to hold a zpool */ ! if (S_ISREG(statbuf.st_mode) && statbuf.st_size < SPA_MINDEVSIZE) { (void) close(fd); + free(devname); return; ! } else if (!S_ISREG(statbuf.st_mode) && disk->dn_slices != NULL) { /* * Try to read the disk label first so we don't have to * open a bunch of minor nodes that can't have a zpool. */ ! check_slices(disk->dn_slices, fd); } ! /* ! * If we're working with the device directly (it has no slices) ! * then we can just read the config and we're done. ! */ ! if (disk->dn_slices == NULL) { ! if (zpool_read_label(fd, &config) != 0) { ! (void) no_memory(disk->dn_hdl); (void) close(fd); ! free(devname); return; } + disk->dn_config = config; + (void) close(fd); + free(devname); + return; + } + (void) close(fd); + free(devname); + + /* + * Go through and read the label off each slice. The check_slices + * function has already performed some basic checks and set the + * sn_nozpool flag on any slices which just can't contain a zpool. + */ + for (slice = disk->dn_slices; slice; slice = slice->sn_next) { + if (slice->sn_nozpool == B_TRUE) + continue; + + (void) asprintf(&devname, "%s%s", disk->dn_name, + slice->sn_name); + + if (devname == NULL) { + (void) no_memory(disk->dn_hdl); + free(devname); + return; + } + if ((fd = openat64(disk->dn_dfd, devname, O_RDONLY)) < 0) { + free(devname); + continue; + } + + if ((zpool_read_label(fd, &config)) != 0) { + (void) no_memory(disk->dn_hdl); + (void) close(fd); + free(devname); + return; + } ! slice->sn_config = config; ! (void) close(fd); ! free(devname); } } /* * Given a file descriptor, clear (zero) the label information. This function
*** 1132,1143 **** pool_list_t pools = { 0 }; pool_entry_t *pe, *penext; vdev_entry_t *ve, *venext; config_entry_t *ce, *cenext; name_entry_t *ne, *nenext; - avl_tree_t slice_cache; - rdsk_node_t *slice; void *cookie; if (dirs == 0) { dirs = 1; dir = &default_dir; --- 1159,1168 ----
*** 1150,1159 **** --- 1175,1186 ---- */ for (i = 0; i < dirs; i++) { tpool_t *t; char *rdsk; int dfd; + disk_node_t *disks = NULL, *curdisk = NULL; + slice_node_t *curslice = NULL; /* use realpath to normalize the path */ if (realpath(dir[i], path) == 0) { (void) zfs_error_fmt(hdl, EZFS_BADPATH, dgettext(TEXT_DOMAIN, "cannot open '%s'"), dir[i]);
*** 1181,1234 **** dgettext(TEXT_DOMAIN, "cannot open '%s'"), rdsk); goto error; } - avl_create(&slice_cache, slice_cache_compare, - sizeof (rdsk_node_t), offsetof(rdsk_node_t, rn_node)); /* * This is not MT-safe, but we have no MT consumers of libzfs */ while ((dp = readdir64(dirp)) != NULL) { ! const char *name = dp->d_name; ! if (name[0] == '.' && ! (name[1] == 0 || (name[1] == '.' && name[2] == 0))) continue; ! slice = zfs_alloc(hdl, sizeof (rdsk_node_t)); ! slice->rn_name = zfs_strdup(hdl, name); ! slice->rn_avl = &slice_cache; ! slice->rn_dfd = dfd; ! slice->rn_hdl = hdl; ! slice->rn_nozpool = B_FALSE; ! avl_add(&slice_cache, slice); } /* * create a thread pool to do all of this in parallel; ! * rn_nozpool is not protected, so this is racy in that ! * multiple tasks could decide that the same slice can ! * not hold a zpool, which is benign. Also choose ! * double the number of processors; we hold a lot of ! * locks in the kernel, so going beyond this doesn't ! * buy us much. */ t = tpool_create(1, 2 * sysconf(_SC_NPROCESSORS_ONLN), 0, NULL); ! for (slice = avl_first(&slice_cache); slice; ! (slice = avl_walk(&slice_cache, slice, ! AVL_AFTER))) ! (void) tpool_dispatch(t, zpool_open_func, slice); tpool_wait(t); tpool_destroy(t); ! cookie = NULL; ! while ((slice = avl_destroy_nodes(&slice_cache, ! &cookie)) != NULL) { ! if (slice->rn_config != NULL) { ! nvlist_t *config = slice->rn_config; boolean_t matched = B_TRUE; if (iarg->poolname != NULL) { char *pname; matched = nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME, --- 1208,1314 ---- dgettext(TEXT_DOMAIN, "cannot open '%s'"), rdsk); goto error; } /* * This is not MT-safe, but we have no MT consumers of libzfs */ while ((dp = readdir64(dirp)) != NULL) { ! boolean_t isslice; ! char *name, *sname; ! int partno; ! ! if (dp->d_name[0] == '.' && (dp->d_name[1] == '\0' || ! (dp->d_name[1] == '.' && dp->d_name[2] == '\0'))) continue; ! name = zfs_strdup(hdl, dp->d_name); ! ! /* ! * We create a new disk node every time we encounter ! * a disk with no slices or the disk name changes. ! */ ! isslice = get_disk_slice(hdl, name, &sname, &partno); ! if (isslice == B_FALSE || curdisk == NULL || ! strcmp(curdisk->dn_name, name) != 0) { ! disk_node_t *newdisk; ! ! newdisk = zfs_alloc(hdl, sizeof (disk_node_t)); ! newdisk->dn_name = name; ! newdisk->dn_dfd = dfd; ! newdisk->dn_hdl = hdl; ! ! if (curdisk != NULL) ! curdisk->dn_next = newdisk; ! else ! disks = newdisk; ! ! curdisk = newdisk; ! curslice = NULL; ! } ! ! assert(curdisk != NULL); ! ! /* ! * Add a new slice node to the current disk node. ! * We do this for all slices including zero slices. ! */ ! if (isslice == B_TRUE) { ! slice_node_t *newslice; ! ! newslice = zfs_alloc(hdl, ! sizeof (slice_node_t)); ! newslice->sn_name = sname; ! newslice->sn_partno = partno; ! newslice->sn_disk = curdisk; ! ! if (curslice != NULL) ! curslice->sn_next = newslice; ! else ! curdisk->dn_slices = newslice; ! ! curslice = newslice; ! } } /* * create a thread pool to do all of this in parallel; ! * choose double the number of processors; we hold a lot ! * of locks in the kernel, so going beyond this doesn't ! * buy us much. Each disk (and any slices it might have) ! * is handled inside a single thread. */ t = tpool_create(1, 2 * sysconf(_SC_NPROCESSORS_ONLN), 0, NULL); ! for (curdisk = disks; curdisk; curdisk = curdisk->dn_next) ! (void) tpool_dispatch(t, zpool_open_func, curdisk); tpool_wait(t); tpool_destroy(t); ! curdisk = disks; ! while (curdisk != NULL) { ! nvlist_t *config; ! disk_node_t *prevdisk; ! ! /* ! * If the device has slices we examine the config on ! * each of those. If not we use the config directly ! * from the device instead. ! */ ! curslice = curdisk->dn_slices; ! ! if (curslice != NULL) ! config = curslice->sn_config; ! else ! config = curdisk->dn_config; ! ! do { boolean_t matched = B_TRUE; + if (config == NULL) + goto next; + if (iarg->poolname != NULL) { char *pname; matched = nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME,
*** 1240,1263 **** matched = nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, &this_guid) == 0 && iarg->guid == this_guid; } if (!matched) { nvlist_free(config); ! config = NULL; ! continue; } /* use the non-raw path for the config */ ! (void) strlcpy(end, slice->rn_name, pathleft); if (add_config(hdl, &pools, path, config) != 0) goto error; } ! free(slice->rn_name); ! free(slice); } - avl_destroy(&slice_cache); (void) closedir(dirp); dirp = NULL; } --- 1320,1375 ---- matched = nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, &this_guid) == 0 && iarg->guid == this_guid; } + if (!matched) { nvlist_free(config); ! goto next; } + /* use the non-raw path for the config */ ! if (curslice != NULL) ! (void) snprintf(end, pathleft, "%s%s", ! curdisk->dn_name, ! curslice->sn_name); ! else ! (void) strlcpy(end, curdisk->dn_name, ! pathleft); if (add_config(hdl, &pools, path, config) != 0) goto error; + + next: + /* + * If we're looking at slices free this one + * and go move onto the next. + */ + if (curslice != NULL) { + slice_node_t *prevslice; + + prevslice = curslice; + curslice = curslice->sn_next; + + free(prevslice->sn_name); + free(prevslice); + + if (curslice != NULL) { + config = curslice->sn_config; + } } ! } while (curslice != NULL); ! ! /* ! * Free this disk and move onto the next one. ! */ ! prevdisk = curdisk; ! curdisk = curdisk->dn_next; ! ! free(prevdisk->dn_name); ! free(prevdisk); } (void) closedir(dirp); dirp = NULL; }