illumos-gate-zpool Sdiff usr/src/lib/libzfs/common/libzfs

Print this page

zpool import is braindead

   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  23  * Copyright (c) 2012 by Delphix. All rights reserved.
  24  * Copyright 2014 Nexenta Systems, Inc. All rights reserved.

  25  */
  26 
  27 /*
  28  * Pool import support functions.
  29  *
  30  * To import a pool, we rely on reading the configuration information from the
  31  * ZFS label of each device.  If we successfully read the label, then we
  32  * organize the configuration information in the following hierarchy:
  33  *
  34  *      pool guid -> toplevel vdev guid -> label txg
  35  *
  36  * Duplicate entries matching this same tuple will be discarded.  Once we have
  37  * examined every device, we pick the best label txg config for each toplevel
  38  * vdev.  We then arrange these toplevel vdevs into a complete pool config, and
  39  * update any paths that have changed.  Finally, we attempt to import the pool
  40  * using our derived config, and record the results.
  41  */
  42 
  43 #include <ctype.h>
  44 #include <devid.h>

 417                 /* Top-level is a hole */
 418                 if (hole_array[c] == id)
 419                         return (B_TRUE);
 420         }
 421         return (B_FALSE);
 422 }
 423 
 424 /*
 425  * Convert our list of pools into the definitive set of configurations.  We
 426  * start by picking the best config for each toplevel vdev.  Once that's done,
 427  * we assemble the toplevel vdevs into a full config for the pool.  We make a
 428  * pass to fix up any incorrect paths, and then add it to the main list to
 429  * return to the user.
 430  */
 431 static nvlist_t *
 432 get_configs(libzfs_handle_t *hdl, pool_list_t *pl, boolean_t active_ok)
 433 {
 434         pool_entry_t *pe;
 435         vdev_entry_t *ve;
 436         config_entry_t *ce;
 437         nvlist_t *ret = NULL, *config = NULL, *tmp, *nvtop, *nvroot;
 438         nvlist_t **spares, **l2cache;
 439         uint_t i, nspares, nl2cache;
 440         boolean_t config_seen;
 441         uint64_t best_txg;
 442         char *name, *hostname;
 443         uint64_t guid;
 444         uint_t children = 0;
 445         nvlist_t **child = NULL;
 446         uint_t holes;
 447         uint64_t *hole_array, max_id;
 448         uint_t c;
 449         boolean_t isactive;
 450         uint64_t hostid;
 451         nvlist_t *nvl;
 452         boolean_t found_one = B_FALSE;
 453         boolean_t valid_top_config = B_FALSE;
 454 
 455         if (nvlist_alloc(&ret, 0, 0) != 0)
 456                 goto nomem;
 457 
 458         for (pe = pl->pools; pe != NULL; pe = pe->pe_next) {
 459                 uint64_t id, max_txg = 0;
 460 
 461                 if (nvlist_alloc(&config, NV_UNIQUE_NAME, 0) != 0)
 462                         goto nomem;

 888                         nvlist_free(*config);
 889                         continue;
 890                 }
 891 
 892                 if (state != POOL_STATE_SPARE && state != POOL_STATE_L2CACHE &&
 893                     (nvlist_lookup_uint64(*config, ZPOOL_CONFIG_POOL_TXG,
 894                     &txg) != 0 || txg == 0)) {
 895                         nvlist_free(*config);
 896                         continue;
 897                 }
 898 
 899                 free(label);
 900                 return (0);
 901         }
 902 
 903         free(label);
 904         *config = NULL;
 905         return (0);
 906 }
 907 
 908 typedef struct rdsk_node {
 909         char *rn_name;
 910         int rn_dfd;
 911         libzfs_handle_t *rn_hdl;
 912         nvlist_t *rn_config;
 913         avl_tree_t *rn_avl;
 914         avl_node_t rn_node;
 915         boolean_t rn_nozpool;
 916 } rdsk_node_t;














 917 
 918 static int
 919 slice_cache_compare(const void *arg1, const void *arg2)






 920 {
 921         const char  *nm1 = ((rdsk_node_t *)arg1)->rn_name;
 922         const char  *nm2 = ((rdsk_node_t *)arg2)->rn_name;
 923         char *nm1slice, *nm2slice;
 924         int rv;
 925 
 926         /*
 927          * slices zero and two are the most likely to provide results,
 928          * so put those first
 929          */
 930         nm1slice = strstr(nm1, "s0");
 931         nm2slice = strstr(nm2, "s0");
 932         if (nm1slice && !nm2slice) {
 933                 return (-1);
 934         }
 935         if (!nm1slice && nm2slice) {
 936                 return (1);
 937         }
 938         nm1slice = strstr(nm1, "s2");
 939         nm2slice = strstr(nm2, "s2");
 940         if (nm1slice && !nm2slice) {
 941                 return (-1);
 942         }
 943         if (!nm1slice && nm2slice) {
 944                 return (1);
 945         }
 946 
 947         rv = strcmp(nm1, nm2);
 948         if (rv == 0)
 949                 return (0);
 950         return (rv > 0 ? 1 : -1);




 951 }
 952 
 953 static void
 954 check_one_slice(avl_tree_t *r, char *diskname, uint_t partno,
 955     diskaddr_t size, uint_t blksz)
 956 {
 957         rdsk_node_t tmpnode;
 958         rdsk_node_t *node;
 959         char sname[MAXNAMELEN];
 960 
 961         tmpnode.rn_name = &sname[0];
 962         (void) snprintf(tmpnode.rn_name, MAXNAMELEN, "%s%u",
 963             diskname, partno);
 964         /*
 965          * protect against division by zero for disk labels that
 966          * contain a bogus sector size
 967          */
 968         if (blksz == 0)
 969                 blksz = DEV_BSIZE;
 970         /* too small to contain a zpool? */
 971         if ((size < (SPA_MINDEVSIZE / blksz)) &&
 972             (node = avl_find(r, &tmpnode, NULL)))
 973                 node->rn_nozpool = B_TRUE;
 974 }
 975 
 976 static void
 977 nozpool_all_slices(avl_tree_t *r, const char *sname)
 978 {
 979         char diskname[MAXNAMELEN];
 980         char *ptr;
 981         int i;
 982 
 983         (void) strncpy(diskname, sname, MAXNAMELEN);
 984         if (((ptr = strrchr(diskname, 's')) == NULL) &&
 985             ((ptr = strrchr(diskname, 'p')) == NULL))
 986                 return;
 987         ptr[0] = 's';
 988         ptr[1] = '\0';
 989         for (i = 0; i < NDKMAP; i++)
 990                 check_one_slice(r, diskname, i, 0, 1);
 991         ptr[0] = 'p';
 992         for (i = 0; i <= FD_NUMPART; i++)
 993                 check_one_slice(r, diskname, i, 0, 1);
 994 }
 995 
 996 static void
 997 check_slices(avl_tree_t *r, int fd, const char *sname)
 998 {
 999         struct extvtoc vtoc;
1000         struct dk_gpt *gpt;
1001         char diskname[MAXNAMELEN];
1002         char *ptr;
1003         int i;
1004 
1005         (void) strncpy(diskname, sname, MAXNAMELEN);
1006         if ((ptr = strrchr(diskname, 's')) == NULL || !isdigit(ptr[1]))
1007                 return;
1008         ptr[1] = '\0';
1009 
1010         if (read_extvtoc(fd, &vtoc) >= 0) {
1011                 for (i = 0; i < NDKMAP; i++)
1012                         check_one_slice(r, diskname, i,
1013                             vtoc.v_part[i].p_size, vtoc.v_sectorsz);



1014         } else if (efi_alloc_and_read(fd, &gpt) >= 0) {
1015                 /*
1016                  * on x86 we'll still have leftover links that point
1017                  * to slices s[9-15], so use NDKMAP instead
1018                  */
1019                 for (i = 0; i < NDKMAP; i++)
1020                         check_one_slice(r, diskname, i,
1021                             gpt->efi_parts[i].p_size, gpt->efi_lbasize);
1022                 /* nodes p[1-4] are never used with EFI labels */
1023                 ptr[0] = 'p';
1024                 for (i = 1; i <= FD_NUMPART; i++)
1025                         check_one_slice(r, diskname, i, 0, 1);





1026                 efi_free(gpt);
1027         }
1028 }
1029 
1030 static void
1031 zpool_open_func(void *arg)
1032 {
1033         rdsk_node_t *rn = arg;
1034         struct stat64 statbuf;

1035         nvlist_t *config;

1036         int fd;
1037 
1038         if (rn->rn_nozpool)










1039                 return;
1040         if ((fd = openat64(rn->rn_dfd, rn->rn_name, O_RDONLY)) < 0) {
1041                 /* symlink to a device that's no longer there */
1042                 if (errno == ENOENT)
1043                         nozpool_all_slices(rn->rn_avl, rn->rn_name);
1044                 return;
1045         }
1046         /*
1047          * Ignore failed stats.  We only want regular
1048          * files, character devs and block devs.
1049          */
1050         if (fstat64(fd, &statbuf) != 0 ||
1051             (!S_ISREG(statbuf.st_mode) &&
1052             !S_ISCHR(statbuf.st_mode) &&
1053             !S_ISBLK(statbuf.st_mode))) {
1054                 (void) close(fd);

1055                 return;
1056         }
1057         /* this file is too small to hold a zpool */
1058         if (S_ISREG(statbuf.st_mode) &&
1059             statbuf.st_size < SPA_MINDEVSIZE) {
1060                 (void) close(fd);

1061                 return;
1062         } else if (!S_ISREG(statbuf.st_mode)) {
1063                 /*
1064                  * Try to read the disk label first so we don't have to
1065                  * open a bunch of minor nodes that can't have a zpool.
1066                  */
1067                 check_slices(rn->rn_avl, fd, rn->rn_name);
1068         }
1069 
1070         if ((zpool_read_label(fd, &config)) != 0) {






1071                 (void) close(fd);
1072                 (void) no_memory(rn->rn_hdl);
1073                 return;
1074         }






1075         (void) close(fd);



















1076 











1077 
1078         rn->rn_config = config;
1079         if (config != NULL) {
1080                 assert(rn->rn_nozpool == B_FALSE);
1081         }
1082 }
1083 
1084 /*
1085  * Given a file descriptor, clear (zero) the label information.  This function
1086  * is currently only used in the appliance stack as part of the ZFS sysevent
1087  * module.
1088  */
1089 int
1090 zpool_clear_label(int fd)
1091 {
1092         struct stat64 statbuf;
1093         int l;
1094         vdev_label_t *label;
1095         uint64_t size;
1096 
1097         if (fstat64(fd, &statbuf) == -1)
1098                 return (0);
1099         size = P2ALIGN_TYPED(statbuf.st_size, sizeof (vdev_label_t), uint64_t);
1100

1117  * given (argc is 0), then the default directory (/dev/dsk) is searched.
1118  * poolname or guid (but not both) are provided by the caller when trying
1119  * to import a specific pool.
1120  */
1121 static nvlist_t *
1122 zpool_find_import_impl(libzfs_handle_t *hdl, importargs_t *iarg)
1123 {
1124         int i, dirs = iarg->paths;
1125         DIR *dirp = NULL;
1126         struct dirent64 *dp;
1127         char path[MAXPATHLEN];
1128         char *end, **dir = iarg->path;
1129         size_t pathleft;
1130         nvlist_t *ret = NULL;
1131         static char *default_dir = "/dev/dsk";
1132         pool_list_t pools = { 0 };
1133         pool_entry_t *pe, *penext;
1134         vdev_entry_t *ve, *venext;
1135         config_entry_t *ce, *cenext;
1136         name_entry_t *ne, *nenext;
1137         avl_tree_t slice_cache;
1138         rdsk_node_t *slice;
1139         void *cookie;
1140 
1141         if (dirs == 0) {
1142                 dirs = 1;
1143                 dir = &default_dir;
1144         }
1145 
1146         /*
1147          * Go through and read the label configuration information from every
1148          * possible device, organizing the information according to pool GUID
1149          * and toplevel GUID.
1150          */
1151         for (i = 0; i < dirs; i++) {
1152                 tpool_t *t;
1153                 char *rdsk;
1154                 int dfd;


1155 
1156                 /* use realpath to normalize the path */
1157                 if (realpath(dir[i], path) == 0) {
1158                         (void) zfs_error_fmt(hdl, EZFS_BADPATH,
1159                             dgettext(TEXT_DOMAIN, "cannot open '%s'"), dir[i]);
1160                         goto error;
1161                 }
1162                 end = &path[strlen(path)];
1163                 *end++ = '/';
1164                 *end = 0;
1165                 pathleft = &path[sizeof (path)] - end;
1166 
1167                 /*
1168                  * Using raw devices instead of block devices when we're
1169                  * reading the labels skips a bunch of slow operations during
1170                  * close(2) processing, so we replace /dev/dsk with /dev/rdsk.
1171                  */
1172                 if (strcmp(path, "/dev/dsk/") == 0)
1173                         rdsk = "/dev/rdsk/";
1174                 else
1175                         rdsk = path;
1176 
1177                 if ((dfd = open64(rdsk, O_RDONLY)) < 0 ||
1178                     (dirp = fdopendir(dfd)) == NULL) {
1179                         zfs_error_aux(hdl, strerror(errno));
1180                         (void) zfs_error_fmt(hdl, EZFS_BADPATH,
1181                             dgettext(TEXT_DOMAIN, "cannot open '%s'"),
1182                             rdsk);
1183                         goto error;
1184                 }
1185 
1186                 avl_create(&slice_cache, slice_cache_compare,
1187                     sizeof (rdsk_node_t), offsetof(rdsk_node_t, rn_node));
1188                 /*
1189                  * This is not MT-safe, but we have no MT consumers of libzfs
1190                  */
1191                 while ((dp = readdir64(dirp)) != NULL) {
1192                         const char *name = dp->d_name;
1193                         if (name[0] == '.' &&
1194                             (name[1] == 0 || (name[1] == '.' && name[2] == 0)))



1195                                 continue;
1196 
1197                         slice = zfs_alloc(hdl, sizeof (rdsk_node_t));
1198                         slice->rn_name = zfs_strdup(hdl, name);
1199                         slice->rn_avl = &slice_cache;
1200                         slice->rn_dfd = dfd;
1201                         slice->rn_hdl = hdl;
1202                         slice->rn_nozpool = B_FALSE;
1203                         avl_add(&slice_cache, slice);








































1204                 }
1205                 /*
1206                  * create a thread pool to do all of this in parallel;
1207                  * rn_nozpool is not protected, so this is racy in that
1208                  * multiple tasks could decide that the same slice can
1209                  * not hold a zpool, which is benign.  Also choose
1210                  * double the number of processors; we hold a lot of
1211                  * locks in the kernel, so going beyond this doesn't
1212                  * buy us much.
1213                  */
1214                 t = tpool_create(1, 2 * sysconf(_SC_NPROCESSORS_ONLN),
1215                     0, NULL);
1216                 for (slice = avl_first(&slice_cache); slice;
1217                     (slice = avl_walk(&slice_cache, slice,
1218                     AVL_AFTER)))
1219                         (void) tpool_dispatch(t, zpool_open_func, slice);
1220                 tpool_wait(t);
1221                 tpool_destroy(t);
1222 
1223                 cookie = NULL;
1224                 while ((slice = avl_destroy_nodes(&slice_cache,
1225                     &cookie)) != NULL) {
1226                         if (slice->rn_config != NULL) {
1227                                 nvlist_t *config = slice->rn_config;













1228                                 boolean_t matched = B_TRUE;
1229 



1230                                 if (iarg->poolname != NULL) {
1231                                         char *pname;
1232 
1233                                         matched = nvlist_lookup_string(config,
1234                                             ZPOOL_CONFIG_POOL_NAME,
1235                                             &pname) == 0 &&
1236                                             strcmp(iarg->poolname, pname) == 0;
1237                                 } else if (iarg->guid != 0) {
1238                                         uint64_t this_guid;
1239 
1240                                         matched = nvlist_lookup_uint64(config,
1241                                             ZPOOL_CONFIG_POOL_GUID,
1242                                             &this_guid) == 0 &&
1243                                             iarg->guid == this_guid;
1244                                 }

1245                                 if (!matched) {
1246                                         nvlist_free(config);
1247                                         config = NULL;
1248                                         continue;
1249                                 }

1250                                 /* use the non-raw path for the config */
1251                                 (void) strlcpy(end, slice->rn_name, pathleft);






1252                                 if (add_config(hdl, &pools, path, config) != 0)
1253                                         goto error;


















1254                         }
1255                         free(slice->rn_name);
1256                         free(slice);








1257                 }
1258                 avl_destroy(&slice_cache);
1259 
1260                 (void) closedir(dirp);
1261                 dirp = NULL;
1262         }
1263 
1264         ret = get_configs(hdl, &pools, iarg->can_be_active);
1265 
1266 error:
1267         for (pe = pools.pools; pe != NULL; pe = penext) {
1268                 penext = pe->pe_next;
1269                 for (ve = pe->pe_vdevs; ve != NULL; ve = venext) {
1270                         venext = ve->ve_next;
1271                         for (ce = ve->ve_configs; ce != NULL; ce = cenext) {
1272                                 cenext = ce->ce_next;
1273                                 if (ce->ce_config)
1274                                         nvlist_free(ce->ce_config);
1275                                 free(ce);
1276                         }
1277                         free(ve);
1278                 }

   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  23  * Copyright (c) 2012 by Delphix. All rights reserved.
  24  * Copyright 2014 Nexenta Systems, Inc. All rights reserved.
  25  * Copyright 2014 RackTop Systems.
  26  */
  27 
  28 /*
  29  * Pool import support functions.
  30  *
  31  * To import a pool, we rely on reading the configuration information from the
  32  * ZFS label of each device.  If we successfully read the label, then we
  33  * organize the configuration information in the following hierarchy:
  34  *
  35  *      pool guid -> toplevel vdev guid -> label txg
  36  *
  37  * Duplicate entries matching this same tuple will be discarded.  Once we have
  38  * examined every device, we pick the best label txg config for each toplevel
  39  * vdev.  We then arrange these toplevel vdevs into a complete pool config, and
  40  * update any paths that have changed.  Finally, we attempt to import the pool
  41  * using our derived config, and record the results.
  42  */
  43 
  44 #include <ctype.h>
  45 #include <devid.h>

 418                 /* Top-level is a hole */
 419                 if (hole_array[c] == id)
 420                         return (B_TRUE);
 421         }
 422         return (B_FALSE);
 423 }
 424 
 425 /*
 426  * Convert our list of pools into the definitive set of configurations.  We
 427  * start by picking the best config for each toplevel vdev.  Once that's done,
 428  * we assemble the toplevel vdevs into a full config for the pool.  We make a
 429  * pass to fix up any incorrect paths, and then add it to the main list to
 430  * return to the user.
 431  */
 432 static nvlist_t *
 433 get_configs(libzfs_handle_t *hdl, pool_list_t *pl, boolean_t active_ok)
 434 {
 435         pool_entry_t *pe;
 436         vdev_entry_t *ve;
 437         config_entry_t *ce;
 438         nvlist_t *ret = NULL, *config = NULL, *tmp = NULL, *nvtop, *nvroot;
 439         nvlist_t **spares, **l2cache;
 440         uint_t i, nspares, nl2cache;
 441         boolean_t config_seen;
 442         uint64_t best_txg;
 443         char *name, *hostname = NULL;
 444         uint64_t guid;
 445         uint_t children = 0;
 446         nvlist_t **child = NULL;
 447         uint_t holes;
 448         uint64_t *hole_array, max_id;
 449         uint_t c;
 450         boolean_t isactive;
 451         uint64_t hostid;
 452         nvlist_t *nvl;
 453         boolean_t found_one = B_FALSE;
 454         boolean_t valid_top_config = B_FALSE;
 455 
 456         if (nvlist_alloc(&ret, 0, 0) != 0)
 457                 goto nomem;
 458 
 459         for (pe = pl->pools; pe != NULL; pe = pe->pe_next) {
 460                 uint64_t id, max_txg = 0;
 461 
 462                 if (nvlist_alloc(&config, NV_UNIQUE_NAME, 0) != 0)
 463                         goto nomem;

 889                         nvlist_free(*config);
 890                         continue;
 891                 }
 892 
 893                 if (state != POOL_STATE_SPARE && state != POOL_STATE_L2CACHE &&
 894                     (nvlist_lookup_uint64(*config, ZPOOL_CONFIG_POOL_TXG,
 895                     &txg) != 0 || txg == 0)) {
 896                         nvlist_free(*config);
 897                         continue;
 898                 }
 899 
 900                 free(label);
 901                 return (0);
 902         }
 903 
 904         free(label);
 905         *config = NULL;
 906         return (0);
 907 }
 908 
 909 typedef struct slice_node {
 910         char *sn_name;
 911         nvlist_t *sn_config;
 912         boolean_t sn_nozpool;
 913         int sn_partno;
 914         struct disk_node *sn_disk;
 915         struct slice_node *sn_next;
 916 } slice_node_t;
 917 
 918 typedef struct disk_node {
 919         char *dn_name;
 920         int dn_dfd;
 921         libzfs_handle_t *dn_hdl;
 922         nvlist_t *dn_config;
 923         struct slice_node *dn_slices;
 924         struct disk_node *dn_next;
 925 } disk_node_t;
 926 
 927 #ifdef  sparc
 928 #define WHOLE_DISK      "s2"
 929 #else
 930 #define WHOLE_DISK      "p0"
 931 #endif
 932 
 933 /*
 934  * This function splits the slice from the device name.  Currently it supports
 935  * VTOC slices (s[0-16]) and DOS/FDISK partitions (p[0-4]).  If this function
 936  * is updated to support other slice types then the check_slices function will
 937  * also need to be updated.
 938  */
 939 static boolean_t
 940 get_disk_slice(libzfs_handle_t *hdl, char *disk, char **slice, int *partno)
 941 {
 942         char *p;



 943 
 944         if ((p = strrchr(disk, 's')) == NULL &&
 945             (p = strrchr(disk, 'p')) == NULL)
 946                 return (B_FALSE);

















 947 
 948         if (!isdigit(p[1]))
 949                 return (B_FALSE);
 950 
 951         *slice = zfs_strdup(hdl, p);
 952         *partno = atoi(p + 1);
 953 
 954         p = '\0';
 955         return (B_TRUE);
 956 }
 957 
 958 static void
 959 check_one_slice(slice_node_t *slice, diskaddr_t size, uint_t blksz)

 960 {







 961         /*
 962          * protect against division by zero for disk labels that
 963          * contain a bogus sector size
 964          */
 965         if (blksz == 0)
 966                 blksz = DEV_BSIZE;
 967         /* too small to contain a zpool? */
 968         if (size < (SPA_MINDEVSIZE / blksz))
 969                 slice->sn_nozpool = B_TRUE;

 970 }
 971 
 972 static void
 973 check_slices(slice_node_t *slices, int fd)




















 974 {
 975         struct extvtoc vtoc;
 976         struct dk_gpt *gpt;
 977         slice_node_t *slice;
 978         diskaddr_t size;






 979 
 980         if (read_extvtoc(fd, &vtoc) >= 0) {
 981                 for (slice = slices; slice; slice = slice->sn_next) {
 982                         if (slice->sn_name[0] == 'p')
 983                                 continue;
 984                         size = vtoc.v_part[slice->sn_partno].p_size;
 985                         check_one_slice(slice, size, vtoc.v_sectorsz);
 986                 }
 987         } else if (efi_alloc_and_read(fd, &gpt) >= 0) {
 988                 for (slice = slices; slice; slice = slice->sn_next) {






 989                         /* nodes p[1-4] are never used with EFI labels */
 990                         if (slice->sn_name[0] == 'p') {
 991                                 if (slice->sn_partno > 0)
 992                                         slice->sn_nozpool = B_TRUE;
 993                                 continue;
 994                         }
 995                         size = gpt->efi_parts[slice->sn_partno].p_size;
 996                         check_one_slice(slice, size, gpt->efi_lbasize);
 997                 }
 998                 efi_free(gpt);
 999         }
1000 }
1001 
1002 static void
1003 zpool_open_func(void *arg)
1004 {
1005         disk_node_t *disk = arg;
1006         struct stat64 statbuf;
1007         slice_node_t *slice;
1008         nvlist_t *config;
1009         char *devname;
1010         int fd;
1011 
1012         /*
1013          * If the disk has no slices we open it directly, otherwise we try
1014          * to open the whole disk slice.
1015          */
1016         if (disk->dn_slices == NULL)
1017                 devname = strdup(disk->dn_name);
1018         else
1019                 (void) asprintf(&devname, "%s" WHOLE_DISK, disk->dn_name);
1020 
1021         if (devname == NULL) {
1022                 (void) no_memory(disk->dn_hdl);
1023                 return;
1024         }
1025 
1026         if ((fd = openat64(disk->dn_dfd, devname, O_RDONLY)) < 0) {
1027                 free(devname);
1028                 return;
1029         }
1030         /*
1031          * Ignore failed stats.  We only want regular
1032          * files, character devs and block devs.
1033          */
1034         if (fstat64(fd, &statbuf) != 0 ||
1035             (!S_ISREG(statbuf.st_mode) &&
1036             !S_ISCHR(statbuf.st_mode) &&
1037             !S_ISBLK(statbuf.st_mode))) {
1038                 (void) close(fd);
1039                 free(devname);
1040                 return;
1041         }
1042         /* this file is too small to hold a zpool */
1043         if (S_ISREG(statbuf.st_mode) && statbuf.st_size < SPA_MINDEVSIZE) {

1044                 (void) close(fd);
1045                 free(devname);
1046                 return;
1047         } else if (!S_ISREG(statbuf.st_mode) && disk->dn_slices != NULL) {
1048                 /*
1049                  * Try to read the disk label first so we don't have to
1050                  * open a bunch of minor nodes that can't have a zpool.
1051                  */
1052                 check_slices(disk->dn_slices, fd);
1053         }
1054 
1055         /*
1056          * If we're working with the device directly (it has no slices)
1057          * then we can just read the config and we're done.
1058          */
1059         if (disk->dn_slices == NULL) {
1060                 if (zpool_read_label(fd, &config) != 0) {
1061                         (void) no_memory(disk->dn_hdl);
1062                         (void) close(fd);
1063                         free(devname);
1064                         return;
1065                 }
1066                 disk->dn_config = config;
1067                 (void) close(fd);
1068                 free(devname);
1069                 return;
1070         }
1071 
1072         (void) close(fd);
1073         free(devname);
1074 
1075         /*
1076          * Go through and read the label off each slice.  The check_slices
1077          * function has already performed some basic checks and set the
1078          * sn_nozpool flag on any slices which just can't contain a zpool.
1079          */
1080         for (slice = disk->dn_slices; slice; slice = slice->sn_next) {
1081                 if (slice->sn_nozpool == B_TRUE)
1082                         continue;
1083 
1084                 (void) asprintf(&devname, "%s%s", disk->dn_name,
1085                     slice->sn_name);
1086 
1087                 if (devname == NULL) {
1088                         (void) no_memory(disk->dn_hdl);
1089                         free(devname);
1090                         return;
1091                 }
1092 
1093                 if ((fd = openat64(disk->dn_dfd, devname, O_RDONLY)) < 0) {
1094                         free(devname);
1095                         continue;
1096                 }
1097 
1098                 if ((zpool_read_label(fd, &config)) != 0) {
1099                         (void) no_memory(disk->dn_hdl);
1100                         (void) close(fd);
1101                         free(devname);
1102                         return;
1103                 }
1104 
1105                 slice->sn_config = config;
1106                 (void) close(fd);
1107                 free(devname);
1108         }
1109 }
1110 
1111 /*
1112  * Given a file descriptor, clear (zero) the label information.  This function
1113  * is currently only used in the appliance stack as part of the ZFS sysevent
1114  * module.
1115  */
1116 int
1117 zpool_clear_label(int fd)
1118 {
1119         struct stat64 statbuf;
1120         int l;
1121         vdev_label_t *label;
1122         uint64_t size;
1123 
1124         if (fstat64(fd, &statbuf) == -1)
1125                 return (0);
1126         size = P2ALIGN_TYPED(statbuf.st_size, sizeof (vdev_label_t), uint64_t);
1127

1144  * given (argc is 0), then the default directory (/dev/dsk) is searched.
1145  * poolname or guid (but not both) are provided by the caller when trying
1146  * to import a specific pool.
1147  */
1148 static nvlist_t *
1149 zpool_find_import_impl(libzfs_handle_t *hdl, importargs_t *iarg)
1150 {
1151         int i, dirs = iarg->paths;
1152         DIR *dirp = NULL;
1153         struct dirent64 *dp;
1154         char path[MAXPATHLEN];
1155         char *end, **dir = iarg->path;
1156         size_t pathleft;
1157         nvlist_t *ret = NULL;
1158         static char *default_dir = "/dev/dsk";
1159         pool_list_t pools = { 0 };
1160         pool_entry_t *pe, *penext;
1161         vdev_entry_t *ve, *venext;
1162         config_entry_t *ce, *cenext;
1163         name_entry_t *ne, *nenext;


1164         void *cookie;
1165 
1166         if (dirs == 0) {
1167                 dirs = 1;
1168                 dir = &default_dir;
1169         }
1170 
1171         /*
1172          * Go through and read the label configuration information from every
1173          * possible device, organizing the information according to pool GUID
1174          * and toplevel GUID.
1175          */
1176         for (i = 0; i < dirs; i++) {
1177                 tpool_t *t;
1178                 char *rdsk;
1179                 int dfd;
1180                 disk_node_t *disks = NULL, *curdisk = NULL;
1181                 slice_node_t *curslice = NULL;
1182 
1183                 /* use realpath to normalize the path */
1184                 if (realpath(dir[i], path) == 0) {
1185                         (void) zfs_error_fmt(hdl, EZFS_BADPATH,
1186                             dgettext(TEXT_DOMAIN, "cannot open '%s'"), dir[i]);
1187                         goto error;
1188                 }
1189                 end = &path[strlen(path)];
1190                 *end++ = '/';
1191                 *end = 0;
1192                 pathleft = &path[sizeof (path)] - end;
1193 
1194                 /*
1195                  * Using raw devices instead of block devices when we're
1196                  * reading the labels skips a bunch of slow operations during
1197                  * close(2) processing, so we replace /dev/dsk with /dev/rdsk.
1198                  */
1199                 if (strcmp(path, "/dev/dsk/") == 0)
1200                         rdsk = "/dev/rdsk/";
1201                 else
1202                         rdsk = path;
1203 
1204                 if ((dfd = open64(rdsk, O_RDONLY)) < 0 ||
1205                     (dirp = fdopendir(dfd)) == NULL) {
1206                         zfs_error_aux(hdl, strerror(errno));
1207                         (void) zfs_error_fmt(hdl, EZFS_BADPATH,
1208                             dgettext(TEXT_DOMAIN, "cannot open '%s'"),
1209                             rdsk);
1210                         goto error;
1211                 }
1212 


1213                 /*
1214                  * This is not MT-safe, but we have no MT consumers of libzfs
1215                  */
1216                 while ((dp = readdir64(dirp)) != NULL) {
1217                         boolean_t isslice;
1218                         char *name, *sname;
1219                         int partno;
1220 
1221                         if (dp->d_name[0] == '.' && (dp->d_name[1] == '\0' ||
1222                             (dp->d_name[1] == '.' && dp->d_name[2] == '\0')))
1223                                 continue;
1224 
1225                         name = zfs_strdup(hdl, dp->d_name);
1226 
1227                         /*
1228                          * We create a new disk node every time we encounter
1229                          * a disk with no slices or the disk name changes.
1230                          */
1231                         isslice = get_disk_slice(hdl, name, &sname, &partno);
1232                         if (isslice == B_FALSE || curdisk == NULL ||
1233                             strcmp(curdisk->dn_name, name) != 0) {
1234                                 disk_node_t *newdisk;
1235 
1236                                 newdisk = zfs_alloc(hdl, sizeof (disk_node_t));
1237                                 newdisk->dn_name = name;
1238                                 newdisk->dn_dfd = dfd;
1239                                 newdisk->dn_hdl = hdl;
1240 
1241                                 if (curdisk != NULL)
1242                                         curdisk->dn_next = newdisk;
1243                                 else
1244                                         disks = newdisk;
1245 
1246                                 curdisk = newdisk;
1247                                 curslice = NULL;
1248                         }
1249 
1250                         assert(curdisk != NULL);
1251 
1252                         /*
1253                          * Add a new slice node to the current disk node.
1254                          * We do this for all slices including zero slices.
1255                          */
1256                         if (isslice == B_TRUE) {
1257                                 slice_node_t *newslice;
1258 
1259                                 newslice = zfs_alloc(hdl,
1260                                     sizeof (slice_node_t));
1261                                 newslice->sn_name = sname;
1262                                 newslice->sn_partno = partno;
1263                                 newslice->sn_disk = curdisk;
1264 
1265                                 if (curslice != NULL)
1266                                         curslice->sn_next = newslice;
1267                                 else
1268                                         curdisk->dn_slices = newslice;
1269 
1270                                 curslice = newslice;
1271                         }
1272                 }
1273                 /*
1274                  * create a thread pool to do all of this in parallel;
1275                  * choose double the number of processors; we hold a lot
1276                  * of locks in the kernel, so going beyond this doesn't
1277                  * buy us much.  Each disk (and any slices it might have)
1278                  * is handled inside a single thread.


1279                  */
1280                 t = tpool_create(1, 2 * sysconf(_SC_NPROCESSORS_ONLN),
1281                     0, NULL);
1282                 for (curdisk = disks; curdisk; curdisk = curdisk->dn_next)
1283                         (void) tpool_dispatch(t, zpool_open_func, curdisk);


1284                 tpool_wait(t);
1285                 tpool_destroy(t);
1286 
1287                 curdisk = disks;
1288                 while (curdisk != NULL) {
1289                         nvlist_t *config;
1290                         disk_node_t *prevdisk;
1291 
1292                         /*
1293                          * If the device has slices we examine the config on
1294                          * each of those.  If not we use the config directly
1295                          * from the device instead.
1296                          */
1297                         curslice = curdisk->dn_slices;
1298 
1299                         if (curslice != NULL)
1300                                 config = curslice->sn_config;
1301                         else
1302                                 config = curdisk->dn_config;
1303 
1304                         do {
1305                                 boolean_t matched = B_TRUE;
1306 
1307                                 if (config == NULL)
1308                                         goto next;
1309 
1310                                 if (iarg->poolname != NULL) {
1311                                         char *pname;
1312 
1313                                         matched = nvlist_lookup_string(config,
1314                                             ZPOOL_CONFIG_POOL_NAME,
1315                                             &pname) == 0 &&
1316                                             strcmp(iarg->poolname, pname) == 0;
1317                                 } else if (iarg->guid != 0) {
1318                                         uint64_t this_guid;
1319 
1320                                         matched = nvlist_lookup_uint64(config,
1321                                             ZPOOL_CONFIG_POOL_GUID,
1322                                             &this_guid) == 0 &&
1323                                             iarg->guid == this_guid;
1324                                 }
1325 
1326                                 if (!matched) {
1327                                         nvlist_free(config);
1328                                         goto next;

1329                                 }
1330 
1331                                 /* use the non-raw path for the config */
1332                                 if (curslice != NULL)
1333                                         (void) snprintf(end, pathleft, "%s%s",
1334                                             curdisk->dn_name,
1335                                             curslice->sn_name);
1336                                 else
1337                                         (void) strlcpy(end, curdisk->dn_name,
1338                                             pathleft);
1339                                 if (add_config(hdl, &pools, path, config) != 0)
1340                                         goto error;
1341 
1342 next:
1343                                 /*
1344                                  * If we're looking at slices free this one
1345                                  * and go move onto the next.
1346                                  */
1347                                 if (curslice != NULL) {
1348                                         slice_node_t *prevslice;
1349 
1350                                         prevslice = curslice;
1351                                         curslice = curslice->sn_next;
1352 
1353                                         free(prevslice->sn_name);
1354                                         free(prevslice);
1355 
1356                                         if (curslice != NULL) {
1357                                                 config = curslice->sn_config;
1358                                         }
1359                                 }
1360                         } while (curslice != NULL);
1361 
1362                         /*
1363                          * Free this disk and move onto the next one.
1364                          */
1365                         prevdisk = curdisk;
1366                         curdisk = curdisk->dn_next;
1367 
1368                         free(prevdisk->dn_name);
1369                         free(prevdisk);
1370                 }

1371 
1372                 (void) closedir(dirp);
1373                 dirp = NULL;
1374         }
1375 
1376         ret = get_configs(hdl, &pools, iarg->can_be_active);
1377 
1378 error:
1379         for (pe = pools.pools; pe != NULL; pe = penext) {
1380                 penext = pe->pe_next;
1381                 for (ve = pe->pe_vdevs; ve != NULL; ve = venext) {
1382                         venext = ve->ve_next;
1383                         for (ce = ve->ve_configs; ce != NULL; ce = cenext) {
1384                                 cenext = ce->ce_next;
1385                                 if (ce->ce_config)
1386                                         nvlist_free(ce->ce_config);
1387                                 free(ce);
1388                         }
1389                         free(ve);
1390                 }