Print this page
7127 remove -Wno-missing-braces from Makefile.uts
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/uts/common/fs/dev/sdev_zvolops.c
+++ new/usr/src/uts/common/fs/dev/sdev_zvolops.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21 /*
22 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
23 23 * Use is subject to license terms.
24 24 * Copyright 2013, 2016 Joyent, Inc. All rights reserved.
25 25 */
26 26
27 27 /* vnode ops for the /dev/zvol directory */
28 28
29 29 #include <sys/types.h>
30 30 #include <sys/param.h>
31 31 #include <sys/sysmacros.h>
32 32 #include <sys/ddi.h>
33 33 #include <sys/sunndi.h>
34 34 #include <sys/sunldi.h>
35 35 #include <fs/fs_subr.h>
36 36 #include <sys/fs/dv_node.h>
37 37 #include <sys/fs/sdev_impl.h>
38 38 #include <sys/zfs_ioctl.h>
39 39 #include <sys/policy.h>
40 40 #include <sys/stat.h>
41 41 #include <sys/vfs_opreg.h>
42 42
43 43 struct vnodeops *devzvol_vnodeops;
44 44 static major_t devzvol_major;
45 45 static taskq_ent_t devzvol_zclist_task;
46 46
47 47 static kmutex_t devzvol_mtx;
48 48 /* Below are protected by devzvol_mtx */
49 49 static boolean_t devzvol_isopen;
50 50 static boolean_t devzvol_zclist_task_running = B_FALSE;
51 51 static uint64_t devzvol_gen = 0;
52 52 static uint64_t devzvol_zclist;
53 53 static size_t devzvol_zclist_size;
54 54 static ldi_ident_t devzvol_li;
55 55 static ldi_handle_t devzvol_lh;
56 56
57 57 /*
58 58 * we need to use ddi_mod* since fs/dev gets loaded early on in
59 59 * startup(), and linking fs/dev to fs/zfs would drag in a lot of
60 60 * other stuff (like drv/random) before the rest of the system is
61 61 * ready to go
62 62 */
63 63 ddi_modhandle_t zfs_mod;
64 64 int (*szcm)(char *);
65 65 int (*szn2m)(char *, minor_t *);
66 66
67 67 int
68 68 sdev_zvol_create_minor(char *dsname)
69 69 {
70 70 if (szcm == NULL)
71 71 return (-1);
72 72 return ((*szcm)(dsname));
73 73 }
74 74
75 75 int
76 76 sdev_zvol_name2minor(char *dsname, minor_t *minor)
77 77 {
78 78 if (szn2m == NULL)
79 79 return (-1);
80 80 return ((*szn2m)(dsname, minor));
81 81 }
82 82
83 83 int
84 84 devzvol_open_zfs()
85 85 {
86 86 int rc;
87 87 dev_t dv;
88 88
89 89 devzvol_li = ldi_ident_from_anon();
90 90 if (ldi_open_by_name("/dev/zfs", FREAD | FWRITE, kcred,
91 91 &devzvol_lh, devzvol_li))
92 92 return (-1);
93 93 if (zfs_mod == NULL && ((zfs_mod = ddi_modopen("fs/zfs",
94 94 KRTLD_MODE_FIRST, &rc)) == NULL)) {
95 95 return (rc);
96 96 }
97 97 ASSERT(szcm == NULL && szn2m == NULL);
98 98 if ((szcm = (int (*)(char *))
99 99 ddi_modsym(zfs_mod, "zvol_create_minor", &rc)) == NULL) {
100 100 cmn_err(CE_WARN, "couldn't resolve zvol_create_minor");
101 101 return (rc);
102 102 }
103 103 if ((szn2m = (int(*)(char *, minor_t *))
104 104 ddi_modsym(zfs_mod, "zvol_name2minor", &rc)) == NULL) {
105 105 cmn_err(CE_WARN, "couldn't resolve zvol_name2minor");
106 106 return (rc);
107 107 }
108 108 if (ldi_get_dev(devzvol_lh, &dv))
109 109 return (-1);
110 110 devzvol_major = getmajor(dv);
111 111 return (0);
112 112 }
113 113
114 114 void
115 115 devzvol_close_zfs()
116 116 {
117 117 szcm = NULL;
118 118 szn2m = NULL;
119 119 (void) ldi_close(devzvol_lh, FREAD|FWRITE, kcred);
120 120 ldi_ident_release(devzvol_li);
121 121 if (zfs_mod != NULL) {
122 122 (void) ddi_modclose(zfs_mod);
123 123 zfs_mod = NULL;
124 124 }
125 125 }
126 126
127 127 int
128 128 devzvol_handle_ioctl(int cmd, zfs_cmd_t *zc, size_t *alloc_size)
129 129 {
130 130 uint64_t cookie;
131 131 int size = 8000;
132 132 int unused;
133 133 int rc;
134 134
135 135 if (cmd != ZFS_IOC_POOL_CONFIGS)
136 136 mutex_enter(&devzvol_mtx);
137 137 if (!devzvol_isopen) {
138 138 if ((rc = devzvol_open_zfs()) == 0) {
139 139 devzvol_isopen = B_TRUE;
140 140 } else {
141 141 if (cmd != ZFS_IOC_POOL_CONFIGS)
142 142 mutex_exit(&devzvol_mtx);
143 143 return (ENXIO);
144 144 }
145 145 }
146 146 cookie = zc->zc_cookie;
147 147 again:
148 148 zc->zc_nvlist_dst = (uint64_t)(intptr_t)kmem_alloc(size,
149 149 KM_SLEEP);
150 150 zc->zc_nvlist_dst_size = size;
151 151 rc = ldi_ioctl(devzvol_lh, cmd, (intptr_t)zc, FKIOCTL, kcred,
152 152 &unused);
153 153 if (rc == ENOMEM) {
154 154 int newsize;
155 155 newsize = zc->zc_nvlist_dst_size;
156 156 ASSERT(newsize > size);
157 157 kmem_free((void *)(uintptr_t)zc->zc_nvlist_dst, size);
158 158 size = newsize;
159 159 zc->zc_cookie = cookie;
160 160 goto again;
161 161 }
162 162 if (alloc_size == NULL)
163 163 kmem_free((void *)(uintptr_t)zc->zc_nvlist_dst, size);
164 164 else
165 165 *alloc_size = size;
166 166 if (cmd != ZFS_IOC_POOL_CONFIGS)
167 167 mutex_exit(&devzvol_mtx);
168 168 return (rc);
169 169 }
170 170
171 171 /* figures out if the objset exists and returns its type */
172 172 int
173 173 devzvol_objset_check(char *dsname, dmu_objset_type_t *type)
174 174 {
175 175 boolean_t ispool;
176 176 zfs_cmd_t *zc;
177 177 int rc;
178 178 nvlist_t *nvl;
179 179 size_t nvsz;
180 180
181 181 zc = kmem_zalloc(sizeof (zfs_cmd_t), KM_SLEEP);
182 182 (void) strlcpy(zc->zc_name, dsname, MAXPATHLEN);
183 183
184 184 nvl = fnvlist_alloc();
185 185 fnvlist_add_boolean_value(nvl, "cachedpropsonly", B_TRUE);
186 186 zc->zc_nvlist_src = (uintptr_t)fnvlist_pack(nvl, &nvsz);
187 187 zc->zc_nvlist_src_size = nvsz;
188 188 fnvlist_free(nvl);
189 189
190 190 ispool = (strchr(dsname, '/') == NULL) ? B_TRUE : B_FALSE;
191 191 rc = devzvol_handle_ioctl(ispool ? ZFS_IOC_POOL_STATS :
192 192 ZFS_IOC_OBJSET_STATS, zc, NULL);
193 193 if (type && rc == 0)
194 194 *type = (ispool) ? DMU_OST_ZFS :
195 195 zc->zc_objset_stats.dds_type;
196 196 fnvlist_pack_free((char *)(uintptr_t)zc->zc_nvlist_src, nvsz);
197 197 kmem_free(zc, sizeof (zfs_cmd_t));
198 198 return (rc);
199 199 }
200 200
201 201 /*
202 202 * Returns what the zfs dataset name should be, given the /dev/zvol
203 203 * path and an optional name (can be NULL).
204 204 *
205 205 * Note that if the name param is NULL, then path must be an
206 206 * actual dataset's directory and not one of the top-level
207 207 * /dev/zvol/{dsk,rdsk} dirs, as these do not correspond to a
208 208 * specific dataset.
209 209 */
210 210 char *
211 211 devzvol_make_dsname(const char *path, const char *name)
212 212 {
213 213 char *dsname;
214 214 const char *ptr;
215 215 int dslen;
216 216
217 217 if (strcmp(path, ZVOL_DIR) == 0)
218 218 return (NULL);
219 219 if (name && (strcmp(name, ".") == 0 || strcmp(name, "..") == 0))
220 220 return (NULL);
221 221 ptr = path + strlen(ZVOL_DIR);
222 222 if (strncmp(ptr, "/dsk", 4) == 0)
223 223 ptr += strlen("/dsk");
224 224 else if (strncmp(ptr, "/rdsk", 5) == 0)
225 225 ptr += strlen("/rdsk");
226 226 else
227 227 return (NULL);
228 228
229 229 if (*ptr == '/')
230 230 ptr++;
231 231 else if (name == NULL)
232 232 return (NULL);
233 233
234 234 dslen = strlen(ptr);
235 235 if (dslen)
236 236 dslen++; /* plus null */
237 237 if (name)
238 238 dslen += strlen(name) + 1; /* plus slash */
239 239 dsname = kmem_zalloc(dslen, KM_SLEEP);
240 240 if (*ptr) {
241 241 (void) strlcpy(dsname, ptr, dslen);
242 242 if (name)
243 243 (void) strlcat(dsname, "/", dslen);
244 244 }
245 245 if (name)
246 246 (void) strlcat(dsname, name, dslen);
247 247 return (dsname);
248 248 }
249 249
250 250 /*
251 251 * check if the zvol's sdev_node is still valid, which means make
252 252 * sure the zvol is still valid. zvol minors aren't proactively
253 253 * destroyed when the zvol is destroyed, so we use a validator to clean
254 254 * these up (in other words, when such nodes are encountered during
255 255 * subsequent lookup() and readdir() operations) so that only valid
256 256 * nodes are returned. The ordering between devname_lookup_func and
257 257 * devzvol_validate is a little inefficient in the case of invalid
258 258 * or stale nodes because devname_lookup_func calls
259 259 * devzvol_create_{dir, link}, then the validator says it's invalid,
260 260 * and then the node gets cleaned up.
261 261 */
262 262 int
263 263 devzvol_validate(struct sdev_node *dv)
264 264 {
265 265 vnode_t *vn = SDEVTOV(dv);
266 266 dmu_objset_type_t do_type;
267 267 char *dsname;
268 268 char *nm = dv->sdev_name;
269 269 int rc;
270 270
271 271 sdcmn_err13(("validating ('%s' '%s')", dv->sdev_path, nm));
272 272 /*
273 273 * validate only READY nodes; if someone is sitting on the
274 274 * directory of a dataset that just got destroyed we could
275 275 * get a zombie node which we just skip.
276 276 */
277 277 if (dv->sdev_state != SDEV_READY) {
278 278 sdcmn_err13(("skipping '%s'", nm));
279 279 return (SDEV_VTOR_SKIP);
280 280 }
281 281
282 282 if ((strcmp(dv->sdev_path, ZVOL_DIR "/dsk") == 0) ||
283 283 (strcmp(dv->sdev_path, ZVOL_DIR "/rdsk") == 0))
284 284 return (SDEV_VTOR_VALID);
285 285 dsname = devzvol_make_dsname(dv->sdev_path, NULL);
286 286 if (dsname == NULL)
287 287 return (SDEV_VTOR_INVALID);
288 288
289 289 /*
290 290 * Leave any nodes alone that have been explicitly created by
291 291 * sdev profiles.
292 292 */
293 293 if (!(dv->sdev_flags & SDEV_GLOBAL) && dv->sdev_origin != NULL) {
294 294 kmem_free(dsname, strlen(dsname) + 1);
295 295 return (SDEV_VTOR_VALID);
296 296 }
297 297
298 298 rc = devzvol_objset_check(dsname, &do_type);
299 299 sdcmn_err13((" '%s' rc %d", dsname, rc));
300 300 if (rc != 0) {
301 301 sdev_node_t *parent = dv->sdev_dotdot;
302 302 /*
303 303 * Explicitly passed-through zvols in our sdev profile can't
304 304 * be created as prof_* shadow nodes, because in the GZ they
305 305 * are symlinks, but in the NGZ they are actual device files.
306 306 *
307 307 * The objset_check will fail on these as they are outside
308 308 * any delegated dataset (zfs will not allow ioctl access to
309 309 * them from this zone). We still want them to work, though.
310 310 */
311 311 if (!(parent->sdev_flags & SDEV_GLOBAL) &&
312 312 parent->sdev_origin != NULL &&
313 313 !(dv->sdev_flags & SDEV_GLOBAL) &&
314 314 (vn->v_type == VBLK || vn->v_type == VCHR) &&
315 315 prof_name_matched(nm, parent)) {
316 316 do_type = DMU_OST_ZVOL;
317 317 } else {
318 318 kmem_free(dsname, strlen(dsname) + 1);
319 319 return (SDEV_VTOR_INVALID);
320 320 }
321 321 }
322 322
323 323 sdcmn_err13((" v_type %d do_type %d",
324 324 vn->v_type, do_type));
325 325 if ((vn->v_type == VLNK && do_type != DMU_OST_ZVOL) ||
326 326 ((vn->v_type == VBLK || vn->v_type == VCHR) &&
327 327 do_type != DMU_OST_ZVOL) ||
328 328 (vn->v_type == VDIR && do_type == DMU_OST_ZVOL)) {
329 329 kmem_free(dsname, strlen(dsname) + 1);
330 330 return (SDEV_VTOR_STALE);
331 331 }
332 332 if (vn->v_type == VLNK) {
333 333 char *ptr, *link;
334 334 long val = 0;
335 335 minor_t lminor, ominor;
336 336
337 337 rc = sdev_getlink(vn, &link);
338 338 ASSERT(rc == 0);
339 339
340 340 ptr = strrchr(link, ':') + 1;
341 341 rc = ddi_strtol(ptr, NULL, 10, &val);
342 342 kmem_free(link, strlen(link) + 1);
343 343 ASSERT(rc == 0 && val != 0);
344 344 lminor = (minor_t)val;
345 345 if (sdev_zvol_name2minor(dsname, &ominor) < 0 ||
346 346 ominor != lminor) {
347 347 kmem_free(dsname, strlen(dsname) + 1);
348 348 return (SDEV_VTOR_STALE);
349 349 }
350 350 }
351 351 kmem_free(dsname, strlen(dsname) + 1);
352 352 return (SDEV_VTOR_VALID);
353 353 }
354 354
355 355 /*
356 356 * Taskq callback to update the devzvol_zclist.
357 357 *
358 358 * We need to defer this to the taskq to avoid it running with a user
359 359 * context that might be associated with some non-global zone, and thus
360 360 * not being able to list all of the pools on the entire system.
361 361 */
362 362 /*ARGSUSED*/
363 363 static void
364 364 devzvol_update_zclist_cb(void *arg)
365 365 {
366 366 zfs_cmd_t *zc;
367 367 int rc;
368 368 size_t size;
369 369
370 370 zc = kmem_zalloc(sizeof (zfs_cmd_t), KM_SLEEP);
371 371 mutex_enter(&devzvol_mtx);
372 372 zc->zc_cookie = devzvol_gen;
373 373
374 374 rc = devzvol_handle_ioctl(ZFS_IOC_POOL_CONFIGS, zc, &size);
375 375 switch (rc) {
376 376 case 0:
377 377 /* new generation */
378 378 ASSERT(devzvol_gen != zc->zc_cookie);
379 379 devzvol_gen = zc->zc_cookie;
380 380 if (devzvol_zclist)
381 381 kmem_free((void *)(uintptr_t)devzvol_zclist,
382 382 devzvol_zclist_size);
383 383 devzvol_zclist = zc->zc_nvlist_dst;
384 384 /* Keep the alloc'd size, not the nvlist size. */
385 385 devzvol_zclist_size = size;
386 386 break;
387 387 default:
388 388 /*
389 389 * Either there was no change in pool configuration
390 390 * since we last asked (rc == EEXIST) or we got a
391 391 * catastrophic error.
392 392 *
393 393 * Give up memory and exit.
394 394 */
395 395 kmem_free((void *)(uintptr_t)zc->zc_nvlist_dst,
396 396 size);
397 397 break;
398 398 }
399 399
400 400 VERIFY(devzvol_zclist_task_running == B_TRUE);
401 401 devzvol_zclist_task_running = B_FALSE;
402 402 mutex_exit(&devzvol_mtx);
403 403
404 404 kmem_free(zc, sizeof (zfs_cmd_t));
405 405 }
406 406
407 407 static void
408 408 devzvol_update_zclist(void)
409 409 {
410 410 mutex_enter(&devzvol_mtx);
411 411 if (devzvol_zclist_task_running == B_TRUE) {
412 412 mutex_exit(&devzvol_mtx);
413 413 goto wait;
414 414 }
415 415
416 416 devzvol_zclist_task_running = B_TRUE;
417 417
418 418 taskq_dispatch_ent(sdev_taskq, devzvol_update_zclist_cb, NULL, 0,
419 419 &devzvol_zclist_task);
420 420
421 421 mutex_exit(&devzvol_mtx);
422 422
423 423 wait:
424 424 taskq_wait(sdev_taskq);
425 425 }
426 426
427 427 /*
428 428 * Creates sub-directories for each zpool as needed in response to a
429 429 * readdir on one of the /dev/zvol/{dsk,rdsk} directories.
430 430 */
431 431 void
432 432 devzvol_create_pool_dirs(struct vnode *dvp)
433 433 {
434 434 nvlist_t *nv = NULL;
435 435 nvpair_t *elem = NULL;
436 436 int pools = 0;
437 437 int rc;
438 438
439 439 sdcmn_err13(("devzvol_create_pool_dirs"));
440 440
441 441 devzvol_update_zclist();
442 442
443 443 mutex_enter(&devzvol_mtx);
444 444
445 445 rc = nvlist_unpack((char *)(uintptr_t)devzvol_zclist,
446 446 devzvol_zclist_size, &nv, 0);
447 447 if (rc) {
448 448 ASSERT(rc == 0);
449 449 kmem_free((void *)(uintptr_t)devzvol_zclist,
450 450 devzvol_zclist_size);
451 451 devzvol_gen = 0;
452 452 devzvol_zclist = NULL;
453 453 devzvol_zclist_size = 0;
454 454 goto out;
455 455 }
456 456 mutex_exit(&devzvol_mtx);
457 457 while ((elem = nvlist_next_nvpair(nv, elem)) != NULL) {
458 458 struct vnode *vp;
459 459 ASSERT(dvp->v_count > 0);
460 460 rc = VOP_LOOKUP(dvp, nvpair_name(elem), &vp, NULL, 0,
461 461 NULL, kcred, NULL, 0, NULL);
462 462 /* should either work, or not be visible from a zone */
463 463 ASSERT(rc == 0 || rc == ENOENT);
464 464 if (rc == 0)
465 465 VN_RELE(vp);
466 466 pools++;
467 467 }
468 468 nvlist_free(nv);
469 469 mutex_enter(&devzvol_mtx);
470 470 if (devzvol_isopen && pools == 0) {
471 471 /* clean up so zfs can be unloaded */
472 472 devzvol_close_zfs();
473 473 devzvol_isopen = B_FALSE;
474 474 }
475 475 out:
476 476 mutex_exit(&devzvol_mtx);
477 477 }
478 478
479 479 /*ARGSUSED3*/
480 480 static int
481 481 devzvol_create_dir(struct sdev_node *ddv, char *nm, void **arg,
482 482 cred_t *cred, void *whatever, char *whichever)
483 483 {
484 484 timestruc_t now;
485 485 struct vattr *vap = (struct vattr *)arg;
486 486
487 487 sdcmn_err13(("create_dir (%s) (%s) '%s'", ddv->sdev_name,
488 488 ddv->sdev_path, nm));
489 489 ASSERT(strncmp(ddv->sdev_path, ZVOL_DIR,
490 490 strlen(ZVOL_DIR)) == 0);
491 491 *vap = *sdev_getdefault_attr(VDIR);
492 492 gethrestime(&now);
493 493 vap->va_atime = now;
494 494 vap->va_mtime = now;
495 495 vap->va_ctime = now;
496 496 return (0);
497 497 }
498 498
499 499 /*ARGSUSED3*/
500 500 static int
501 501 devzvol_create_link(struct sdev_node *ddv, char *nm,
502 502 void **arg, cred_t *cred, void *whatever, char *whichever)
503 503 {
504 504 minor_t minor;
505 505 char *pathname = (char *)*arg;
506 506 int rc;
507 507 char *dsname;
508 508 char *x;
509 509 char str[MAXNAMELEN];
510 510 sdcmn_err13(("create_link (%s) (%s) '%s'", ddv->sdev_name,
511 511 ddv->sdev_path, nm));
512 512 dsname = devzvol_make_dsname(ddv->sdev_path, nm);
513 513 rc = sdev_zvol_create_minor(dsname);
514 514 if ((rc != 0 && rc != EEXIST && rc != EBUSY) ||
515 515 sdev_zvol_name2minor(dsname, &minor)) {
516 516 sdcmn_err13(("devzvol_create_link %d", rc));
517 517 kmem_free(dsname, strlen(dsname) + 1);
518 518 return (-1);
519 519 }
520 520 kmem_free(dsname, strlen(dsname) + 1);
521 521
522 522 /*
523 523 * This is a valid zvol; create a symlink that points to the
524 524 * minor which was created under /devices/pseudo/zfs@0
525 525 */
526 526 *pathname = '\0';
527 527 for (x = ddv->sdev_path; x = strchr(x, '/'); x++)
528 528 (void) strcat(pathname, "../");
529 529 (void) snprintf(str, sizeof (str), ZVOL_PSEUDO_DEV "%u", minor);
530 530 (void) strncat(pathname, str, MAXPATHLEN);
531 531 if (strncmp(ddv->sdev_path, ZVOL_FULL_RDEV_DIR,
532 532 strlen(ZVOL_FULL_RDEV_DIR)) == 0)
533 533 (void) strcat(pathname, ",raw");
534 534 return (0);
535 535 }
536 536
537 537 /* Clean zvol sdev_nodes that are no longer valid. */
538 538 static void
539 539 devzvol_prunedir(struct sdev_node *ddv)
540 540 {
541 541 struct sdev_node *dv;
542 542
543 543 ASSERT(RW_READ_HELD(&ddv->sdev_contents));
544 544
545 545 sdcmn_err13(("prunedir '%s'", ddv->sdev_name));
546 546 ASSERT(strncmp(ddv->sdev_path, ZVOL_DIR, strlen(ZVOL_DIR)) == 0);
547 547 if (rw_tryupgrade(&ddv->sdev_contents) == 0) {
548 548 rw_exit(&ddv->sdev_contents);
549 549 rw_enter(&ddv->sdev_contents, RW_WRITER);
550 550 }
551 551
552 552 dv = SDEV_FIRST_ENTRY(ddv);
553 553 while (dv) {
554 554 sdcmn_err13(("sdev_name '%s'", dv->sdev_name));
555 555
556 556 switch (devzvol_validate(dv)) {
557 557 case SDEV_VTOR_VALID:
558 558 case SDEV_VTOR_SKIP:
559 559 dv = SDEV_NEXT_ENTRY(ddv, dv);
560 560 continue;
561 561 case SDEV_VTOR_INVALID:
562 562 sdcmn_err7(("prunedir: destroy invalid "
563 563 "node: %s\n", dv->sdev_name));
564 564 break;
565 565 }
566 566
567 567 if ((SDEVTOV(dv)->v_type == VDIR) &&
568 568 (sdev_cleandir(dv, NULL, 0) != 0)) {
569 569 dv = SDEV_NEXT_ENTRY(ddv, dv);
570 570 continue;
571 571 }
572 572 SDEV_HOLD(dv);
573 573 /* remove the cache node */
574 574 sdev_cache_update(ddv, &dv, dv->sdev_name,
575 575 SDEV_CACHE_DELETE);
576 576 SDEV_RELE(dv);
577 577 dv = SDEV_FIRST_ENTRY(ddv);
578 578 }
579 579 rw_downgrade(&ddv->sdev_contents);
580 580 }
581 581
582 582 /*
583 583 * This function is used to create a dir or dev inside a zone's /dev when the
584 584 * zone has a zvol that is dynamically created within the zone (i.e. inside
585 585 * of a delegated dataset. Since there is no /devices tree within a zone,
586 586 * we create the chr/blk devices directly inside the zone's /dev instead of
587 587 * making symlinks.
588 588 */
589 589 static int
590 590 devzvol_mk_ngz_node(struct sdev_node *parent, char *nm)
591 591 {
592 592 struct vattr vattr;
593 593 timestruc_t now;
594 594 enum vtype expected_type = VDIR;
595 595 dmu_objset_type_t do_type;
596 596 struct sdev_node *dv = NULL;
597 597 int res;
598 598 char *dsname;
599 599
600 600 bzero(&vattr, sizeof (vattr));
601 601 gethrestime(&now);
602 602 vattr.va_mask = AT_TYPE|AT_MODE|AT_UID|AT_GID;
603 603 vattr.va_uid = SDEV_UID_DEFAULT;
604 604 vattr.va_gid = SDEV_GID_DEFAULT;
605 605 vattr.va_type = VNON;
606 606 vattr.va_atime = now;
607 607 vattr.va_mtime = now;
608 608 vattr.va_ctime = now;
609 609
610 610 if ((dsname = devzvol_make_dsname(parent->sdev_path, nm)) == NULL)
611 611 return (ENOENT);
612 612
613 613 if (devzvol_objset_check(dsname, &do_type) != 0) {
614 614 /*
615 615 * objset_check will succeed on any valid objset in the global
616 616 * zone, and any valid delegated dataset. It will fail, however,
617 617 * in non-global zones on explicitly whitelisted zvol devices
618 618 * that are outside any delegated dataset.
619 619 *
620 620 * The directories leading up to the zvol device itself will be
621 621 * created by prof for us in advance (and will always validate
622 622 * because of the matching check in devzvol_validate). The zvol
623 623 * device itself can't be created by prof though because in the
624 624 * GZ it's a symlink, and in the NGZ it is not. So, we create
625 625 * such zvol device files here.
626 626 */
627 627 if (!(parent->sdev_flags & SDEV_GLOBAL) &&
628 628 parent->sdev_origin != NULL &&
629 629 prof_name_matched(nm, parent)) {
630 630 do_type = DMU_OST_ZVOL;
631 631 } else {
632 632 kmem_free(dsname, strlen(dsname) + 1);
633 633 return (ENOENT);
634 634 }
635 635 }
636 636
637 637 if (do_type == DMU_OST_ZVOL)
638 638 expected_type = VBLK;
639 639
640 640 if (expected_type == VDIR) {
641 641 vattr.va_type = VDIR;
642 642 vattr.va_mode = SDEV_DIRMODE_DEFAULT;
643 643 } else {
644 644 minor_t minor;
645 645 dev_t devnum;
646 646 int rc;
647 647
648 648 rc = sdev_zvol_create_minor(dsname);
649 649 if ((rc != 0 && rc != EEXIST && rc != EBUSY) ||
650 650 sdev_zvol_name2minor(dsname, &minor)) {
651 651 kmem_free(dsname, strlen(dsname) + 1);
652 652 return (ENOENT);
653 653 }
654 654
655 655 devnum = makedevice(devzvol_major, minor);
656 656 vattr.va_rdev = devnum;
657 657
658 658 if (strstr(parent->sdev_path, "/rdsk/") != NULL)
659 659 vattr.va_type = VCHR;
660 660 else
661 661 vattr.va_type = VBLK;
662 662 vattr.va_mode = SDEV_DEVMODE_DEFAULT;
663 663 }
664 664 kmem_free(dsname, strlen(dsname) + 1);
665 665
666 666 rw_enter(&parent->sdev_contents, RW_WRITER);
667 667
668 668 res = sdev_mknode(parent, nm, &dv, &vattr,
669 669 NULL, NULL, kcred, SDEV_READY);
670 670 rw_exit(&parent->sdev_contents);
671 671 if (res != 0)
672 672 return (ENOENT);
673 673
674 674 SDEV_RELE(dv);
675 675 return (0);
676 676 }
677 677
678 678 /*ARGSUSED*/
679 679 static int
680 680 devzvol_lookup(struct vnode *dvp, char *nm, struct vnode **vpp,
681 681 struct pathname *pnp, int flags, struct vnode *rdir, struct cred *cred,
682 682 caller_context_t *ct, int *direntflags, pathname_t *realpnp)
683 683 {
684 684 enum vtype expected_type = VDIR;
685 685 struct sdev_node *parent = VTOSDEV(dvp);
686 686 char *dsname;
687 687 dmu_objset_type_t do_type;
688 688 int error;
689 689
690 690 sdcmn_err13(("devzvol_lookup '%s' '%s'", parent->sdev_path, nm));
691 691 *vpp = NULL;
692 692 /* execute access is required to search the directory */
693 693 if ((error = VOP_ACCESS(dvp, VEXEC, 0, cred, ct)) != 0)
694 694 return (error);
695 695
696 696 rw_enter(&parent->sdev_contents, RW_READER);
697 697 if (!SDEV_IS_GLOBAL(parent)) {
698 698 int res;
699 699
700 700 rw_exit(&parent->sdev_contents);
701 701
702 702 /*
703 703 * If we're in the global zone and reach down into a non-global
704 704 * zone's /dev/zvol then this action could trigger the creation
705 705 * of all of the zvol devices for every zone into the non-global
706 706 * zone's /dev tree. This could be a big security hole. To
707 707 * prevent this, disallow the global zone from looking inside
708 708 * a non-global zones /dev/zvol. This behavior is similar to
709 709 * delegated datasets, which cannot be used by the global zone.
710 710 */
711 711 if (getzoneid() == GLOBAL_ZONEID)
712 712 return (EPERM);
713 713
714 714 res = prof_lookup(dvp, nm, vpp, cred);
715 715
716 716 /*
717 717 * We won't find a zvol that was dynamically created inside
718 718 * a NGZ, within a delegated dataset, in the zone's dev profile
719 719 * but prof_lookup will also find it via sdev_cache_lookup.
720 720 */
721 721 if (res == ENOENT) {
722 722 /*
723 723 * We have to create the sdev node for the dymamically
724 724 * created zvol.
725 725 */
726 726 if (devzvol_mk_ngz_node(parent, nm) != 0)
727 727 return (ENOENT);
728 728 res = prof_lookup(dvp, nm, vpp, cred);
729 729 }
730 730
731 731 return (res);
732 732 }
733 733
734 734 /*
735 735 * Don't let the global-zone style lookup succeed here when we're not
736 736 * running in the global zone. This can happen because prof calls into
737 737 * us (in prof_filldir) trying to create an explicitly passed-through
738 738 * zvol device outside any delegated dataset.
739 739 *
740 740 * We have to stop this here or else we will create prof shadows of
741 741 * the global zone symlink, which will make no sense at all in the
742 742 * non-global zone (it has no /devices for the symlink to point at).
743 743 *
744 744 * These zvols will be created later (at access time) by mk_ngz_node
745 745 * instead. The dirs leading up to them will be created by prof
746 746 * internally.
747 747 *
748 748 * We have to return EPERM here, because ENOENT is given special
749 749 * meaning by prof in this context.
750 750 */
751 751 if (getzoneid() != GLOBAL_ZONEID) {
752 752 rw_exit(&parent->sdev_contents);
753 753 return (EPERM);
754 754 }
755 755
756 756 dsname = devzvol_make_dsname(parent->sdev_path, nm);
757 757 rw_exit(&parent->sdev_contents);
758 758 sdcmn_err13(("rvp dsname %s", dsname ? dsname : "(null)"));
759 759 if (dsname) {
760 760 error = devzvol_objset_check(dsname, &do_type);
761 761 if (error != 0) {
762 762 error = ENOENT;
763 763 goto out;
764 764 }
765 765 if (do_type == DMU_OST_ZVOL)
766 766 expected_type = VLNK;
767 767 }
768 768 /*
769 769 * the callbacks expect:
770 770 *
771 771 * parent->sdev_path nm
772 772 * /dev/zvol {r}dsk
773 773 * /dev/zvol/{r}dsk <pool name>
774 774 * /dev/zvol/{r}dsk/<dataset name> <last ds component>
775 775 *
776 776 * sdev_name is always last path component of sdev_path
777 777 */
778 778 if (expected_type == VDIR) {
779 779 error = devname_lookup_func(parent, nm, vpp, cred,
780 780 devzvol_create_dir, SDEV_VATTR);
781 781 } else {
782 782 error = devname_lookup_func(parent, nm, vpp, cred,
783 783 devzvol_create_link, SDEV_VLINK);
784 784 }
785 785 sdcmn_err13(("devzvol_lookup %d %d", expected_type, error));
786 786 ASSERT(error || ((*vpp)->v_type == expected_type));
787 787 out:
788 788 if (dsname)
789 789 kmem_free(dsname, strlen(dsname) + 1);
790 790 sdcmn_err13(("devzvol_lookup %d", error));
791 791 return (error);
792 792 }
793 793
794 794 /*
795 795 * We allow create to find existing nodes
796 796 * - if the node doesn't exist - EROFS
797 797 * - creating an existing dir read-only succeeds, otherwise EISDIR
798 798 * - exclusive creates fail - EEXIST
799 799 */
800 800 /*ARGSUSED2*/
801 801 static int
802 802 devzvol_create(struct vnode *dvp, char *nm, struct vattr *vap, vcexcl_t excl,
803 803 int mode, struct vnode **vpp, struct cred *cred, int flag,
804 804 caller_context_t *ct, vsecattr_t *vsecp)
805 805 {
806 806 int error;
807 807 struct vnode *vp;
808 808
809 809 *vpp = NULL;
810 810
811 811 error = devzvol_lookup(dvp, nm, &vp, NULL, 0, NULL, cred, ct, NULL,
812 812 NULL);
813 813 if (error == 0) {
814 814 if (excl == EXCL)
815 815 error = EEXIST;
816 816 else if (vp->v_type == VDIR && (mode & VWRITE))
817 817 error = EISDIR;
818 818 else
819 819 error = VOP_ACCESS(vp, mode, 0, cred, ct);
820 820
821 821 if (error) {
822 822 VN_RELE(vp);
823 823 } else
824 824 *vpp = vp;
825 825 } else if (error == ENOENT) {
826 826 error = EROFS;
827 827 }
828 828
829 829 return (error);
830 830 }
831 831
832 832 void sdev_iter_snapshots(struct vnode *dvp, char *name);
833 833
834 834 void
835 835 sdev_iter_datasets(struct vnode *dvp, int arg, char *name)
836 836 {
837 837 zfs_cmd_t *zc;
838 838 int rc;
839 839
840 840 sdcmn_err13(("iter name is '%s' (arg %x)", name, arg));
841 841 zc = kmem_zalloc(sizeof (zfs_cmd_t), KM_SLEEP);
842 842 (void) strcpy(zc->zc_name, name);
843 843
844 844 while ((rc = devzvol_handle_ioctl(arg, zc, B_FALSE)) == 0) {
845 845 struct vnode *vpp;
846 846 char *ptr;
847 847
848 848 sdcmn_err13((" name %s", zc->zc_name));
849 849 if (strchr(zc->zc_name, '$') || strchr(zc->zc_name, '%'))
850 850 goto skip;
851 851 ptr = strrchr(zc->zc_name, '/') + 1;
852 852 rc = devzvol_lookup(dvp, ptr, &vpp, NULL, 0, NULL,
853 853 kcred, NULL, NULL, NULL);
854 854 if (rc == 0) {
855 855 VN_RELE(vpp);
856 856 } else if (rc == ENOENT) {
857 857 goto skip;
858 858 } else {
859 859 /*
860 860 * EBUSY == problem with zvols's dmu holds?
861 861 * EPERM when in a NGZ and traversing up and out.
862 862 */
863 863 goto skip;
864 864 }
865 865 if (arg == ZFS_IOC_DATASET_LIST_NEXT &&
866 866 zc->zc_objset_stats.dds_type != DMU_OST_ZFS)
867 867 sdev_iter_snapshots(dvp, zc->zc_name);
868 868 skip:
869 869 (void) strcpy(zc->zc_name, name);
870 870 }
871 871 kmem_free(zc, sizeof (zfs_cmd_t));
872 872 }
873 873
874 874 void
875 875 sdev_iter_snapshots(struct vnode *dvp, char *name)
876 876 {
877 877 sdev_iter_datasets(dvp, ZFS_IOC_SNAPSHOT_LIST_NEXT, name);
878 878 }
879 879
880 880 /*ARGSUSED4*/
881 881 static int
882 882 devzvol_readdir(struct vnode *dvp, struct uio *uiop, struct cred *cred,
883 883 int *eofp, caller_context_t *ct_unused, int flags_unused)
884 884 {
885 885 struct sdev_node *sdvp = VTOSDEV(dvp);
886 886 char *ptr;
887 887
888 888 sdcmn_err13(("zv readdir of '%s' %s'", sdvp->sdev_path,
889 889 sdvp->sdev_name));
890 890
891 891 if (strcmp(sdvp->sdev_path, ZVOL_DIR) == 0) {
892 892 struct vnode *vp;
893 893
894 894 rw_exit(&sdvp->sdev_contents);
895 895 (void) devname_lookup_func(sdvp, "dsk", &vp, cred,
896 896 devzvol_create_dir, SDEV_VATTR);
897 897 VN_RELE(vp);
898 898 (void) devname_lookup_func(sdvp, "rdsk", &vp, cred,
899 899 devzvol_create_dir, SDEV_VATTR);
900 900 VN_RELE(vp);
901 901 rw_enter(&sdvp->sdev_contents, RW_READER);
902 902 return (devname_readdir_func(dvp, uiop, cred, eofp, 0));
903 903 }
904 904 if (uiop->uio_offset == 0)
905 905 devzvol_prunedir(sdvp);
906 906 ptr = sdvp->sdev_path + strlen(ZVOL_DIR);
907 907 if ((strcmp(ptr, "/dsk") == 0) || (strcmp(ptr, "/rdsk") == 0)) {
908 908 rw_exit(&sdvp->sdev_contents);
909 909 devzvol_create_pool_dirs(dvp);
910 910 rw_enter(&sdvp->sdev_contents, RW_READER);
911 911 return (devname_readdir_func(dvp, uiop, cred, eofp, 0));
912 912 }
913 913
914 914 ptr = strchr(ptr + 1, '/');
↓ open down ↓ |
914 lines elided |
↑ open up ↑ |
915 915 if (ptr == NULL)
916 916 return (ENOENT);
917 917 ptr++;
918 918 rw_exit(&sdvp->sdev_contents);
919 919 sdev_iter_datasets(dvp, ZFS_IOC_DATASET_LIST_NEXT, ptr);
920 920 rw_enter(&sdvp->sdev_contents, RW_READER);
921 921 return (devname_readdir_func(dvp, uiop, cred, eofp, 0));
922 922 }
923 923
924 924 const fs_operation_def_t devzvol_vnodeops_tbl[] = {
925 - VOPNAME_READDIR, { .vop_readdir = devzvol_readdir },
926 - VOPNAME_LOOKUP, { .vop_lookup = devzvol_lookup },
927 - VOPNAME_CREATE, { .vop_create = devzvol_create },
928 - VOPNAME_RENAME, { .error = fs_nosys },
929 - VOPNAME_MKDIR, { .error = fs_nosys },
930 - VOPNAME_RMDIR, { .error = fs_nosys },
931 - VOPNAME_REMOVE, { .error = fs_nosys },
932 - VOPNAME_SYMLINK, { .error = fs_nosys },
933 - NULL, NULL
925 + { VOPNAME_READDIR, { .vop_readdir = devzvol_readdir } },
926 + { VOPNAME_LOOKUP, { .vop_lookup = devzvol_lookup } },
927 + { VOPNAME_CREATE, { .vop_create = devzvol_create } },
928 + { VOPNAME_RENAME, { .error = fs_nosys } },
929 + { VOPNAME_MKDIR, { .error = fs_nosys } },
930 + { VOPNAME_RMDIR, { .error = fs_nosys } },
931 + { VOPNAME_REMOVE, { .error = fs_nosys } },
932 + { VOPNAME_SYMLINK, { .error = fs_nosys } },
933 + { NULL, { NULL } }
934 934 };
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX