5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23 * Copyright (c) 2012 by Delphix. All rights reserved.
24 * Copyright 2014 Nexenta Systems, Inc. All rights reserved.
25 */
26
27 /*
28 * Pool import support functions.
29 *
30 * To import a pool, we rely on reading the configuration information from the
31 * ZFS label of each device. If we successfully read the label, then we
32 * organize the configuration information in the following hierarchy:
33 *
34 * pool guid -> toplevel vdev guid -> label txg
35 *
36 * Duplicate entries matching this same tuple will be discarded. Once we have
37 * examined every device, we pick the best label txg config for each toplevel
38 * vdev. We then arrange these toplevel vdevs into a complete pool config, and
39 * update any paths that have changed. Finally, we attempt to import the pool
40 * using our derived config, and record the results.
41 */
42
43 #include <ctype.h>
44 #include <devid.h>
417 /* Top-level is a hole */
418 if (hole_array[c] == id)
419 return (B_TRUE);
420 }
421 return (B_FALSE);
422 }
423
424 /*
425 * Convert our list of pools into the definitive set of configurations. We
426 * start by picking the best config for each toplevel vdev. Once that's done,
427 * we assemble the toplevel vdevs into a full config for the pool. We make a
428 * pass to fix up any incorrect paths, and then add it to the main list to
429 * return to the user.
430 */
431 static nvlist_t *
432 get_configs(libzfs_handle_t *hdl, pool_list_t *pl, boolean_t active_ok)
433 {
434 pool_entry_t *pe;
435 vdev_entry_t *ve;
436 config_entry_t *ce;
437 nvlist_t *ret = NULL, *config = NULL, *tmp, *nvtop, *nvroot;
438 nvlist_t **spares, **l2cache;
439 uint_t i, nspares, nl2cache;
440 boolean_t config_seen;
441 uint64_t best_txg;
442 char *name, *hostname;
443 uint64_t guid;
444 uint_t children = 0;
445 nvlist_t **child = NULL;
446 uint_t holes;
447 uint64_t *hole_array, max_id;
448 uint_t c;
449 boolean_t isactive;
450 uint64_t hostid;
451 nvlist_t *nvl;
452 boolean_t found_one = B_FALSE;
453 boolean_t valid_top_config = B_FALSE;
454
455 if (nvlist_alloc(&ret, 0, 0) != 0)
456 goto nomem;
457
458 for (pe = pl->pools; pe != NULL; pe = pe->pe_next) {
459 uint64_t id, max_txg = 0;
460
461 if (nvlist_alloc(&config, NV_UNIQUE_NAME, 0) != 0)
462 goto nomem;
888 nvlist_free(*config);
889 continue;
890 }
891
892 if (state != POOL_STATE_SPARE && state != POOL_STATE_L2CACHE &&
893 (nvlist_lookup_uint64(*config, ZPOOL_CONFIG_POOL_TXG,
894 &txg) != 0 || txg == 0)) {
895 nvlist_free(*config);
896 continue;
897 }
898
899 free(label);
900 return (0);
901 }
902
903 free(label);
904 *config = NULL;
905 return (0);
906 }
907
908 typedef struct rdsk_node {
909 char *rn_name;
910 int rn_dfd;
911 libzfs_handle_t *rn_hdl;
912 nvlist_t *rn_config;
913 avl_tree_t *rn_avl;
914 avl_node_t rn_node;
915 boolean_t rn_nozpool;
916 } rdsk_node_t;
917
918 static int
919 slice_cache_compare(const void *arg1, const void *arg2)
920 {
921 const char *nm1 = ((rdsk_node_t *)arg1)->rn_name;
922 const char *nm2 = ((rdsk_node_t *)arg2)->rn_name;
923 char *nm1slice, *nm2slice;
924 int rv;
925
926 /*
927 * slices zero and two are the most likely to provide results,
928 * so put those first
929 */
930 nm1slice = strstr(nm1, "s0");
931 nm2slice = strstr(nm2, "s0");
932 if (nm1slice && !nm2slice) {
933 return (-1);
934 }
935 if (!nm1slice && nm2slice) {
936 return (1);
937 }
938 nm1slice = strstr(nm1, "s2");
939 nm2slice = strstr(nm2, "s2");
940 if (nm1slice && !nm2slice) {
941 return (-1);
942 }
943 if (!nm1slice && nm2slice) {
944 return (1);
945 }
946
947 rv = strcmp(nm1, nm2);
948 if (rv == 0)
949 return (0);
950 return (rv > 0 ? 1 : -1);
951 }
952
953 static void
954 check_one_slice(avl_tree_t *r, char *diskname, uint_t partno,
955 diskaddr_t size, uint_t blksz)
956 {
957 rdsk_node_t tmpnode;
958 rdsk_node_t *node;
959 char sname[MAXNAMELEN];
960
961 tmpnode.rn_name = &sname[0];
962 (void) snprintf(tmpnode.rn_name, MAXNAMELEN, "%s%u",
963 diskname, partno);
964 /*
965 * protect against division by zero for disk labels that
966 * contain a bogus sector size
967 */
968 if (blksz == 0)
969 blksz = DEV_BSIZE;
970 /* too small to contain a zpool? */
971 if ((size < (SPA_MINDEVSIZE / blksz)) &&
972 (node = avl_find(r, &tmpnode, NULL)))
973 node->rn_nozpool = B_TRUE;
974 }
975
976 static void
977 nozpool_all_slices(avl_tree_t *r, const char *sname)
978 {
979 char diskname[MAXNAMELEN];
980 char *ptr;
981 int i;
982
983 (void) strncpy(diskname, sname, MAXNAMELEN);
984 if (((ptr = strrchr(diskname, 's')) == NULL) &&
985 ((ptr = strrchr(diskname, 'p')) == NULL))
986 return;
987 ptr[0] = 's';
988 ptr[1] = '\0';
989 for (i = 0; i < NDKMAP; i++)
990 check_one_slice(r, diskname, i, 0, 1);
991 ptr[0] = 'p';
992 for (i = 0; i <= FD_NUMPART; i++)
993 check_one_slice(r, diskname, i, 0, 1);
994 }
995
996 static void
997 check_slices(avl_tree_t *r, int fd, const char *sname)
998 {
999 struct extvtoc vtoc;
1000 struct dk_gpt *gpt;
1001 char diskname[MAXNAMELEN];
1002 char *ptr;
1003 int i;
1004
1005 (void) strncpy(diskname, sname, MAXNAMELEN);
1006 if ((ptr = strrchr(diskname, 's')) == NULL || !isdigit(ptr[1]))
1007 return;
1008 ptr[1] = '\0';
1009
1010 if (read_extvtoc(fd, &vtoc) >= 0) {
1011 for (i = 0; i < NDKMAP; i++)
1012 check_one_slice(r, diskname, i,
1013 vtoc.v_part[i].p_size, vtoc.v_sectorsz);
1014 } else if (efi_alloc_and_read(fd, &gpt) >= 0) {
1015 /*
1016 * on x86 we'll still have leftover links that point
1017 * to slices s[9-15], so use NDKMAP instead
1018 */
1019 for (i = 0; i < NDKMAP; i++)
1020 check_one_slice(r, diskname, i,
1021 gpt->efi_parts[i].p_size, gpt->efi_lbasize);
1022 /* nodes p[1-4] are never used with EFI labels */
1023 ptr[0] = 'p';
1024 for (i = 1; i <= FD_NUMPART; i++)
1025 check_one_slice(r, diskname, i, 0, 1);
1026 efi_free(gpt);
1027 }
1028 }
1029
1030 static void
1031 zpool_open_func(void *arg)
1032 {
1033 rdsk_node_t *rn = arg;
1034 struct stat64 statbuf;
1035 nvlist_t *config;
1036 int fd;
1037
1038 if (rn->rn_nozpool)
1039 return;
1040 if ((fd = openat64(rn->rn_dfd, rn->rn_name, O_RDONLY)) < 0) {
1041 /* symlink to a device that's no longer there */
1042 if (errno == ENOENT)
1043 nozpool_all_slices(rn->rn_avl, rn->rn_name);
1044 return;
1045 }
1046 /*
1047 * Ignore failed stats. We only want regular
1048 * files, character devs and block devs.
1049 */
1050 if (fstat64(fd, &statbuf) != 0 ||
1051 (!S_ISREG(statbuf.st_mode) &&
1052 !S_ISCHR(statbuf.st_mode) &&
1053 !S_ISBLK(statbuf.st_mode))) {
1054 (void) close(fd);
1055 return;
1056 }
1057 /* this file is too small to hold a zpool */
1058 if (S_ISREG(statbuf.st_mode) &&
1059 statbuf.st_size < SPA_MINDEVSIZE) {
1060 (void) close(fd);
1061 return;
1062 } else if (!S_ISREG(statbuf.st_mode)) {
1063 /*
1064 * Try to read the disk label first so we don't have to
1065 * open a bunch of minor nodes that can't have a zpool.
1066 */
1067 check_slices(rn->rn_avl, fd, rn->rn_name);
1068 }
1069
1070 if ((zpool_read_label(fd, &config)) != 0) {
1071 (void) close(fd);
1072 (void) no_memory(rn->rn_hdl);
1073 return;
1074 }
1075 (void) close(fd);
1076
1077
1078 rn->rn_config = config;
1079 if (config != NULL) {
1080 assert(rn->rn_nozpool == B_FALSE);
1081 }
1082 }
1083
1084 /*
1085 * Given a file descriptor, clear (zero) the label information. This function
1086 * is currently only used in the appliance stack as part of the ZFS sysevent
1087 * module.
1088 */
1089 int
1090 zpool_clear_label(int fd)
1091 {
1092 struct stat64 statbuf;
1093 int l;
1094 vdev_label_t *label;
1095 uint64_t size;
1096
1097 if (fstat64(fd, &statbuf) == -1)
1098 return (0);
1099 size = P2ALIGN_TYPED(statbuf.st_size, sizeof (vdev_label_t), uint64_t);
1100
1117 * given (argc is 0), then the default directory (/dev/dsk) is searched.
1118 * poolname or guid (but not both) are provided by the caller when trying
1119 * to import a specific pool.
1120 */
1121 static nvlist_t *
1122 zpool_find_import_impl(libzfs_handle_t *hdl, importargs_t *iarg)
1123 {
1124 int i, dirs = iarg->paths;
1125 DIR *dirp = NULL;
1126 struct dirent64 *dp;
1127 char path[MAXPATHLEN];
1128 char *end, **dir = iarg->path;
1129 size_t pathleft;
1130 nvlist_t *ret = NULL;
1131 static char *default_dir = "/dev/dsk";
1132 pool_list_t pools = { 0 };
1133 pool_entry_t *pe, *penext;
1134 vdev_entry_t *ve, *venext;
1135 config_entry_t *ce, *cenext;
1136 name_entry_t *ne, *nenext;
1137 avl_tree_t slice_cache;
1138 rdsk_node_t *slice;
1139 void *cookie;
1140
1141 if (dirs == 0) {
1142 dirs = 1;
1143 dir = &default_dir;
1144 }
1145
1146 /*
1147 * Go through and read the label configuration information from every
1148 * possible device, organizing the information according to pool GUID
1149 * and toplevel GUID.
1150 */
1151 for (i = 0; i < dirs; i++) {
1152 tpool_t *t;
1153 char *rdsk;
1154 int dfd;
1155
1156 /* use realpath to normalize the path */
1157 if (realpath(dir[i], path) == 0) {
1158 (void) zfs_error_fmt(hdl, EZFS_BADPATH,
1159 dgettext(TEXT_DOMAIN, "cannot open '%s'"), dir[i]);
1160 goto error;
1161 }
1162 end = &path[strlen(path)];
1163 *end++ = '/';
1164 *end = 0;
1165 pathleft = &path[sizeof (path)] - end;
1166
1167 /*
1168 * Using raw devices instead of block devices when we're
1169 * reading the labels skips a bunch of slow operations during
1170 * close(2) processing, so we replace /dev/dsk with /dev/rdsk.
1171 */
1172 if (strcmp(path, "/dev/dsk/") == 0)
1173 rdsk = "/dev/rdsk/";
1174 else
1175 rdsk = path;
1176
1177 if ((dfd = open64(rdsk, O_RDONLY)) < 0 ||
1178 (dirp = fdopendir(dfd)) == NULL) {
1179 zfs_error_aux(hdl, strerror(errno));
1180 (void) zfs_error_fmt(hdl, EZFS_BADPATH,
1181 dgettext(TEXT_DOMAIN, "cannot open '%s'"),
1182 rdsk);
1183 goto error;
1184 }
1185
1186 avl_create(&slice_cache, slice_cache_compare,
1187 sizeof (rdsk_node_t), offsetof(rdsk_node_t, rn_node));
1188 /*
1189 * This is not MT-safe, but we have no MT consumers of libzfs
1190 */
1191 while ((dp = readdir64(dirp)) != NULL) {
1192 const char *name = dp->d_name;
1193 if (name[0] == '.' &&
1194 (name[1] == 0 || (name[1] == '.' && name[2] == 0)))
1195 continue;
1196
1197 slice = zfs_alloc(hdl, sizeof (rdsk_node_t));
1198 slice->rn_name = zfs_strdup(hdl, name);
1199 slice->rn_avl = &slice_cache;
1200 slice->rn_dfd = dfd;
1201 slice->rn_hdl = hdl;
1202 slice->rn_nozpool = B_FALSE;
1203 avl_add(&slice_cache, slice);
1204 }
1205 /*
1206 * create a thread pool to do all of this in parallel;
1207 * rn_nozpool is not protected, so this is racy in that
1208 * multiple tasks could decide that the same slice can
1209 * not hold a zpool, which is benign. Also choose
1210 * double the number of processors; we hold a lot of
1211 * locks in the kernel, so going beyond this doesn't
1212 * buy us much.
1213 */
1214 t = tpool_create(1, 2 * sysconf(_SC_NPROCESSORS_ONLN),
1215 0, NULL);
1216 for (slice = avl_first(&slice_cache); slice;
1217 (slice = avl_walk(&slice_cache, slice,
1218 AVL_AFTER)))
1219 (void) tpool_dispatch(t, zpool_open_func, slice);
1220 tpool_wait(t);
1221 tpool_destroy(t);
1222
1223 cookie = NULL;
1224 while ((slice = avl_destroy_nodes(&slice_cache,
1225 &cookie)) != NULL) {
1226 if (slice->rn_config != NULL) {
1227 nvlist_t *config = slice->rn_config;
1228 boolean_t matched = B_TRUE;
1229
1230 if (iarg->poolname != NULL) {
1231 char *pname;
1232
1233 matched = nvlist_lookup_string(config,
1234 ZPOOL_CONFIG_POOL_NAME,
1235 &pname) == 0 &&
1236 strcmp(iarg->poolname, pname) == 0;
1237 } else if (iarg->guid != 0) {
1238 uint64_t this_guid;
1239
1240 matched = nvlist_lookup_uint64(config,
1241 ZPOOL_CONFIG_POOL_GUID,
1242 &this_guid) == 0 &&
1243 iarg->guid == this_guid;
1244 }
1245 if (!matched) {
1246 nvlist_free(config);
1247 config = NULL;
1248 continue;
1249 }
1250 /* use the non-raw path for the config */
1251 (void) strlcpy(end, slice->rn_name, pathleft);
1252 if (add_config(hdl, &pools, path, config) != 0)
1253 goto error;
1254 }
1255 free(slice->rn_name);
1256 free(slice);
1257 }
1258 avl_destroy(&slice_cache);
1259
1260 (void) closedir(dirp);
1261 dirp = NULL;
1262 }
1263
1264 ret = get_configs(hdl, &pools, iarg->can_be_active);
1265
1266 error:
1267 for (pe = pools.pools; pe != NULL; pe = penext) {
1268 penext = pe->pe_next;
1269 for (ve = pe->pe_vdevs; ve != NULL; ve = venext) {
1270 venext = ve->ve_next;
1271 for (ce = ve->ve_configs; ce != NULL; ce = cenext) {
1272 cenext = ce->ce_next;
1273 if (ce->ce_config)
1274 nvlist_free(ce->ce_config);
1275 free(ce);
1276 }
1277 free(ve);
1278 }
|
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23 * Copyright (c) 2012 by Delphix. All rights reserved.
24 * Copyright 2014 Nexenta Systems, Inc. All rights reserved.
25 * Copyright 2014 RackTop Systems.
26 */
27
28 /*
29 * Pool import support functions.
30 *
31 * To import a pool, we rely on reading the configuration information from the
32 * ZFS label of each device. If we successfully read the label, then we
33 * organize the configuration information in the following hierarchy:
34 *
35 * pool guid -> toplevel vdev guid -> label txg
36 *
37 * Duplicate entries matching this same tuple will be discarded. Once we have
38 * examined every device, we pick the best label txg config for each toplevel
39 * vdev. We then arrange these toplevel vdevs into a complete pool config, and
40 * update any paths that have changed. Finally, we attempt to import the pool
41 * using our derived config, and record the results.
42 */
43
44 #include <ctype.h>
45 #include <devid.h>
418 /* Top-level is a hole */
419 if (hole_array[c] == id)
420 return (B_TRUE);
421 }
422 return (B_FALSE);
423 }
424
425 /*
426 * Convert our list of pools into the definitive set of configurations. We
427 * start by picking the best config for each toplevel vdev. Once that's done,
428 * we assemble the toplevel vdevs into a full config for the pool. We make a
429 * pass to fix up any incorrect paths, and then add it to the main list to
430 * return to the user.
431 */
432 static nvlist_t *
433 get_configs(libzfs_handle_t *hdl, pool_list_t *pl, boolean_t active_ok)
434 {
435 pool_entry_t *pe;
436 vdev_entry_t *ve;
437 config_entry_t *ce;
438 nvlist_t *ret = NULL, *config = NULL, *tmp = NULL, *nvtop, *nvroot;
439 nvlist_t **spares, **l2cache;
440 uint_t i, nspares, nl2cache;
441 boolean_t config_seen;
442 uint64_t best_txg;
443 char *name, *hostname = NULL;
444 uint64_t guid;
445 uint_t children = 0;
446 nvlist_t **child = NULL;
447 uint_t holes;
448 uint64_t *hole_array, max_id;
449 uint_t c;
450 boolean_t isactive;
451 uint64_t hostid;
452 nvlist_t *nvl;
453 boolean_t found_one = B_FALSE;
454 boolean_t valid_top_config = B_FALSE;
455
456 if (nvlist_alloc(&ret, 0, 0) != 0)
457 goto nomem;
458
459 for (pe = pl->pools; pe != NULL; pe = pe->pe_next) {
460 uint64_t id, max_txg = 0;
461
462 if (nvlist_alloc(&config, NV_UNIQUE_NAME, 0) != 0)
463 goto nomem;
889 nvlist_free(*config);
890 continue;
891 }
892
893 if (state != POOL_STATE_SPARE && state != POOL_STATE_L2CACHE &&
894 (nvlist_lookup_uint64(*config, ZPOOL_CONFIG_POOL_TXG,
895 &txg) != 0 || txg == 0)) {
896 nvlist_free(*config);
897 continue;
898 }
899
900 free(label);
901 return (0);
902 }
903
904 free(label);
905 *config = NULL;
906 return (0);
907 }
908
909 typedef struct slice_node {
910 char *sn_name;
911 nvlist_t *sn_config;
912 boolean_t sn_nozpool;
913 int sn_partno;
914 struct disk_node *sn_disk;
915 struct slice_node *sn_next;
916 } slice_node_t;
917
918 typedef struct disk_node {
919 char *dn_name;
920 int dn_dfd;
921 libzfs_handle_t *dn_hdl;
922 nvlist_t *dn_config;
923 struct slice_node *dn_slices;
924 struct disk_node *dn_next;
925 } disk_node_t;
926
927 #ifdef sparc
928 #define WHOLE_DISK "s2"
929 #else
930 #define WHOLE_DISK "p0"
931 #endif
932
933 /*
934 * This function splits the slice from the device name. Currently it supports
935 * VTOC slices (s[0-16]) and DOS/FDISK partitions (p[0-4]). If this function
936 * is updated to support other slice types then the check_slices function will
937 * also need to be updated.
938 */
939 static boolean_t
940 get_disk_slice(libzfs_handle_t *hdl, char *disk, char **slice, int *partno)
941 {
942 char *p;
943
944 if ((p = strrchr(disk, 's')) == NULL &&
945 (p = strrchr(disk, 'p')) == NULL)
946 return (B_FALSE);
947
948 if (!isdigit(p[1]))
949 return (B_FALSE);
950
951 *slice = zfs_strdup(hdl, p);
952 *partno = atoi(p + 1);
953
954 p = '\0';
955 return (B_TRUE);
956 }
957
958 static void
959 check_one_slice(slice_node_t *slice, diskaddr_t size, uint_t blksz)
960 {
961 /*
962 * protect against division by zero for disk labels that
963 * contain a bogus sector size
964 */
965 if (blksz == 0)
966 blksz = DEV_BSIZE;
967 /* too small to contain a zpool? */
968 if (size < (SPA_MINDEVSIZE / blksz))
969 slice->sn_nozpool = B_TRUE;
970 }
971
972 static void
973 check_slices(slice_node_t *slices, int fd)
974 {
975 struct extvtoc vtoc;
976 struct dk_gpt *gpt;
977 slice_node_t *slice;
978 diskaddr_t size;
979
980 if (read_extvtoc(fd, &vtoc) >= 0) {
981 for (slice = slices; slice; slice = slice->sn_next) {
982 if (slice->sn_name[0] == 'p')
983 continue;
984 size = vtoc.v_part[slice->sn_partno].p_size;
985 check_one_slice(slice, size, vtoc.v_sectorsz);
986 }
987 } else if (efi_alloc_and_read(fd, &gpt) >= 0) {
988 for (slice = slices; slice; slice = slice->sn_next) {
989 /* nodes p[1-4] are never used with EFI labels */
990 if (slice->sn_name[0] == 'p') {
991 if (slice->sn_partno > 0)
992 slice->sn_nozpool = B_TRUE;
993 continue;
994 }
995 size = gpt->efi_parts[slice->sn_partno].p_size;
996 check_one_slice(slice, size, gpt->efi_lbasize);
997 }
998 efi_free(gpt);
999 }
1000 }
1001
1002 static void
1003 zpool_open_func(void *arg)
1004 {
1005 disk_node_t *disk = arg;
1006 struct stat64 statbuf;
1007 slice_node_t *slice;
1008 nvlist_t *config;
1009 char *devname;
1010 int fd;
1011
1012 /*
1013 * If the disk has no slices we open it directly, otherwise we try
1014 * to open the whole disk slice.
1015 */
1016 if (disk->dn_slices == NULL)
1017 devname = strdup(disk->dn_name);
1018 else
1019 (void) asprintf(&devname, "%s" WHOLE_DISK, disk->dn_name);
1020
1021 if (devname == NULL) {
1022 (void) no_memory(disk->dn_hdl);
1023 return;
1024 }
1025
1026 if ((fd = openat64(disk->dn_dfd, devname, O_RDONLY)) < 0) {
1027 free(devname);
1028 return;
1029 }
1030 /*
1031 * Ignore failed stats. We only want regular
1032 * files, character devs and block devs.
1033 */
1034 if (fstat64(fd, &statbuf) != 0 ||
1035 (!S_ISREG(statbuf.st_mode) &&
1036 !S_ISCHR(statbuf.st_mode) &&
1037 !S_ISBLK(statbuf.st_mode))) {
1038 (void) close(fd);
1039 free(devname);
1040 return;
1041 }
1042 /* this file is too small to hold a zpool */
1043 if (S_ISREG(statbuf.st_mode) && statbuf.st_size < SPA_MINDEVSIZE) {
1044 (void) close(fd);
1045 free(devname);
1046 return;
1047 } else if (!S_ISREG(statbuf.st_mode) && disk->dn_slices != NULL) {
1048 /*
1049 * Try to read the disk label first so we don't have to
1050 * open a bunch of minor nodes that can't have a zpool.
1051 */
1052 check_slices(disk->dn_slices, fd);
1053 }
1054
1055 /*
1056 * If we're working with the device directly (it has no slices)
1057 * then we can just read the config and we're done.
1058 */
1059 if (disk->dn_slices == NULL) {
1060 if (zpool_read_label(fd, &config) != 0) {
1061 (void) no_memory(disk->dn_hdl);
1062 (void) close(fd);
1063 free(devname);
1064 return;
1065 }
1066 disk->dn_config = config;
1067 (void) close(fd);
1068 free(devname);
1069 return;
1070 }
1071
1072 (void) close(fd);
1073 free(devname);
1074
1075 /*
1076 * Go through and read the label off each slice. The check_slices
1077 * function has already performed some basic checks and set the
1078 * sn_nozpool flag on any slices which just can't contain a zpool.
1079 */
1080 for (slice = disk->dn_slices; slice; slice = slice->sn_next) {
1081 if (slice->sn_nozpool == B_TRUE)
1082 continue;
1083
1084 (void) asprintf(&devname, "%s%s", disk->dn_name,
1085 slice->sn_name);
1086
1087 if (devname == NULL) {
1088 (void) no_memory(disk->dn_hdl);
1089 free(devname);
1090 return;
1091 }
1092
1093 if ((fd = openat64(disk->dn_dfd, devname, O_RDONLY)) < 0) {
1094 free(devname);
1095 continue;
1096 }
1097
1098 if ((zpool_read_label(fd, &config)) != 0) {
1099 (void) no_memory(disk->dn_hdl);
1100 (void) close(fd);
1101 free(devname);
1102 return;
1103 }
1104
1105 slice->sn_config = config;
1106 (void) close(fd);
1107 free(devname);
1108 }
1109 }
1110
1111 /*
1112 * Given a file descriptor, clear (zero) the label information. This function
1113 * is currently only used in the appliance stack as part of the ZFS sysevent
1114 * module.
1115 */
1116 int
1117 zpool_clear_label(int fd)
1118 {
1119 struct stat64 statbuf;
1120 int l;
1121 vdev_label_t *label;
1122 uint64_t size;
1123
1124 if (fstat64(fd, &statbuf) == -1)
1125 return (0);
1126 size = P2ALIGN_TYPED(statbuf.st_size, sizeof (vdev_label_t), uint64_t);
1127
1144 * given (argc is 0), then the default directory (/dev/dsk) is searched.
1145 * poolname or guid (but not both) are provided by the caller when trying
1146 * to import a specific pool.
1147 */
1148 static nvlist_t *
1149 zpool_find_import_impl(libzfs_handle_t *hdl, importargs_t *iarg)
1150 {
1151 int i, dirs = iarg->paths;
1152 DIR *dirp = NULL;
1153 struct dirent64 *dp;
1154 char path[MAXPATHLEN];
1155 char *end, **dir = iarg->path;
1156 size_t pathleft;
1157 nvlist_t *ret = NULL;
1158 static char *default_dir = "/dev/dsk";
1159 pool_list_t pools = { 0 };
1160 pool_entry_t *pe, *penext;
1161 vdev_entry_t *ve, *venext;
1162 config_entry_t *ce, *cenext;
1163 name_entry_t *ne, *nenext;
1164 void *cookie;
1165
1166 if (dirs == 0) {
1167 dirs = 1;
1168 dir = &default_dir;
1169 }
1170
1171 /*
1172 * Go through and read the label configuration information from every
1173 * possible device, organizing the information according to pool GUID
1174 * and toplevel GUID.
1175 */
1176 for (i = 0; i < dirs; i++) {
1177 tpool_t *t;
1178 char *rdsk;
1179 int dfd;
1180 disk_node_t *disks = NULL, *curdisk = NULL;
1181 slice_node_t *curslice = NULL;
1182
1183 /* use realpath to normalize the path */
1184 if (realpath(dir[i], path) == 0) {
1185 (void) zfs_error_fmt(hdl, EZFS_BADPATH,
1186 dgettext(TEXT_DOMAIN, "cannot open '%s'"), dir[i]);
1187 goto error;
1188 }
1189 end = &path[strlen(path)];
1190 *end++ = '/';
1191 *end = 0;
1192 pathleft = &path[sizeof (path)] - end;
1193
1194 /*
1195 * Using raw devices instead of block devices when we're
1196 * reading the labels skips a bunch of slow operations during
1197 * close(2) processing, so we replace /dev/dsk with /dev/rdsk.
1198 */
1199 if (strcmp(path, "/dev/dsk/") == 0)
1200 rdsk = "/dev/rdsk/";
1201 else
1202 rdsk = path;
1203
1204 if ((dfd = open64(rdsk, O_RDONLY)) < 0 ||
1205 (dirp = fdopendir(dfd)) == NULL) {
1206 zfs_error_aux(hdl, strerror(errno));
1207 (void) zfs_error_fmt(hdl, EZFS_BADPATH,
1208 dgettext(TEXT_DOMAIN, "cannot open '%s'"),
1209 rdsk);
1210 goto error;
1211 }
1212
1213 /*
1214 * This is not MT-safe, but we have no MT consumers of libzfs
1215 */
1216 while ((dp = readdir64(dirp)) != NULL) {
1217 boolean_t isslice;
1218 char *name, *sname;
1219 int partno;
1220
1221 if (dp->d_name[0] == '.' && (dp->d_name[1] == '\0' ||
1222 (dp->d_name[1] == '.' && dp->d_name[2] == '\0')))
1223 continue;
1224
1225 name = zfs_strdup(hdl, dp->d_name);
1226
1227 /*
1228 * We create a new disk node every time we encounter
1229 * a disk with no slices or the disk name changes.
1230 */
1231 isslice = get_disk_slice(hdl, name, &sname, &partno);
1232 if (isslice == B_FALSE || curdisk == NULL ||
1233 strcmp(curdisk->dn_name, name) != 0) {
1234 disk_node_t *newdisk;
1235
1236 newdisk = zfs_alloc(hdl, sizeof (disk_node_t));
1237 newdisk->dn_name = name;
1238 newdisk->dn_dfd = dfd;
1239 newdisk->dn_hdl = hdl;
1240
1241 if (curdisk != NULL)
1242 curdisk->dn_next = newdisk;
1243 else
1244 disks = newdisk;
1245
1246 curdisk = newdisk;
1247 curslice = NULL;
1248 }
1249
1250 assert(curdisk != NULL);
1251
1252 /*
1253 * Add a new slice node to the current disk node.
1254 * We do this for all slices including zero slices.
1255 */
1256 if (isslice == B_TRUE) {
1257 slice_node_t *newslice;
1258
1259 newslice = zfs_alloc(hdl,
1260 sizeof (slice_node_t));
1261 newslice->sn_name = sname;
1262 newslice->sn_partno = partno;
1263 newslice->sn_disk = curdisk;
1264
1265 if (curslice != NULL)
1266 curslice->sn_next = newslice;
1267 else
1268 curdisk->dn_slices = newslice;
1269
1270 curslice = newslice;
1271 }
1272 }
1273 /*
1274 * create a thread pool to do all of this in parallel;
1275 * choose double the number of processors; we hold a lot
1276 * of locks in the kernel, so going beyond this doesn't
1277 * buy us much. Each disk (and any slices it might have)
1278 * is handled inside a single thread.
1279 */
1280 t = tpool_create(1, 2 * sysconf(_SC_NPROCESSORS_ONLN),
1281 0, NULL);
1282 for (curdisk = disks; curdisk; curdisk = curdisk->dn_next)
1283 (void) tpool_dispatch(t, zpool_open_func, curdisk);
1284 tpool_wait(t);
1285 tpool_destroy(t);
1286
1287 curdisk = disks;
1288 while (curdisk != NULL) {
1289 nvlist_t *config;
1290 disk_node_t *prevdisk;
1291
1292 /*
1293 * If the device has slices we examine the config on
1294 * each of those. If not we use the config directly
1295 * from the device instead.
1296 */
1297 curslice = curdisk->dn_slices;
1298
1299 if (curslice != NULL)
1300 config = curslice->sn_config;
1301 else
1302 config = curdisk->dn_config;
1303
1304 do {
1305 boolean_t matched = B_TRUE;
1306
1307 if (config == NULL)
1308 goto next;
1309
1310 if (iarg->poolname != NULL) {
1311 char *pname;
1312
1313 matched = nvlist_lookup_string(config,
1314 ZPOOL_CONFIG_POOL_NAME,
1315 &pname) == 0 &&
1316 strcmp(iarg->poolname, pname) == 0;
1317 } else if (iarg->guid != 0) {
1318 uint64_t this_guid;
1319
1320 matched = nvlist_lookup_uint64(config,
1321 ZPOOL_CONFIG_POOL_GUID,
1322 &this_guid) == 0 &&
1323 iarg->guid == this_guid;
1324 }
1325
1326 if (!matched) {
1327 nvlist_free(config);
1328 goto next;
1329 }
1330
1331 /* use the non-raw path for the config */
1332 if (curslice != NULL)
1333 (void) snprintf(end, pathleft, "%s%s",
1334 curdisk->dn_name,
1335 curslice->sn_name);
1336 else
1337 (void) strlcpy(end, curdisk->dn_name,
1338 pathleft);
1339 if (add_config(hdl, &pools, path, config) != 0)
1340 goto error;
1341
1342 next:
1343 /*
1344 * If we're looking at slices free this one
1345 * and go move onto the next.
1346 */
1347 if (curslice != NULL) {
1348 slice_node_t *prevslice;
1349
1350 prevslice = curslice;
1351 curslice = curslice->sn_next;
1352
1353 free(prevslice->sn_name);
1354 free(prevslice);
1355
1356 if (curslice != NULL) {
1357 config = curslice->sn_config;
1358 }
1359 }
1360 } while (curslice != NULL);
1361
1362 /*
1363 * Free this disk and move onto the next one.
1364 */
1365 prevdisk = curdisk;
1366 curdisk = curdisk->dn_next;
1367
1368 free(prevdisk->dn_name);
1369 free(prevdisk);
1370 }
1371
1372 (void) closedir(dirp);
1373 dirp = NULL;
1374 }
1375
1376 ret = get_configs(hdl, &pools, iarg->can_be_active);
1377
1378 error:
1379 for (pe = pools.pools; pe != NULL; pe = penext) {
1380 penext = pe->pe_next;
1381 for (ve = pe->pe_vdevs; ve != NULL; ve = venext) {
1382 venext = ve->ve_next;
1383 for (ce = ve->ve_configs; ce != NULL; ce = cenext) {
1384 cenext = ce->ce_next;
1385 if (ce->ce_config)
1386 nvlist_free(ce->ce_config);
1387 free(ce);
1388 }
1389 free(ve);
1390 }
|