Print this page
7127 remove -Wno-missing-braces from Makefile.uts
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/uts/common/fs/mntfs/mntvnops.c
+++ new/usr/src/uts/common/fs/mntfs/mntvnops.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21 /*
22 22 * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
23 23 */
24 24
25 25 #include <sys/file.h>
26 26 #include <sys/stat.h>
27 27 #include <sys/atomic.h>
28 28 #include <sys/mntio.h>
29 29 #include <sys/mnttab.h>
30 30 #include <sys/mount.h>
31 31 #include <sys/sunddi.h>
32 32 #include <sys/sysmacros.h>
33 33 #include <sys/systm.h>
34 34 #include <sys/vfs.h>
35 35 #include <sys/vfs_opreg.h>
36 36 #include <sys/fs/mntdata.h>
37 37 #include <fs/fs_subr.h>
38 38 #include <sys/vmsystm.h>
39 39 #include <vm/seg_vn.h>
40 40 #include <sys/time.h>
41 41 #include <sys/ksynch.h>
42 42 #include <sys/sdt.h>
43 43
44 44 #define MNTROOTINO 2
45 45
46 46 static mntnode_t *mntgetnode(vnode_t *);
47 47
48 48 vnodeops_t *mntvnodeops;
49 49 extern void vfs_mnttab_readop(void);
50 50
51 51 /*
52 52 * Design of kernel mnttab accounting.
53 53 *
54 54 * mntfs provides two methods of reading the in-kernel mnttab, i.e. the state of
55 55 * the mounted resources: the read-only file /etc/mnttab, and a collection of
56 56 * ioctl() commands. Most of these interfaces are public and are described in
57 57 * mnttab(4). Three private ioctl() commands, MNTIOC_GETMNTENT,
58 58 * MNTIOC_GETEXTMNTENT and MNTIOC_GETMNTANY, provide for the getmntent(3C)
59 59 * family of functions, allowing them to support white space in mount names.
60 60 *
61 61 * A significant feature of mntfs is that it provides a file descriptor with a
62 62 * snapshot once it begins to consume mnttab data. Thus, as the process
63 63 * continues to consume data, its view of the in-kernel mnttab does not change
64 64 * even if resources are mounted or unmounted. The intent is to ensure that
65 65 * processes are guaranteed to read self-consistent data even as the system
66 66 * changes.
67 67 *
68 68 * The snapshot is implemented by a "database", unique to each zone, that
69 69 * comprises a linked list of mntelem_ts. The database is identified by
70 70 * zone_mntfs_db and is protected by zone_mntfs_db_lock. Each element contains
71 71 * the text entry in /etc/mnttab for a mounted resource, i.e. a vfs_t, and is
72 72 * marked with its time of "birth", i.e. creation. An element is "killed", and
73 73 * marked with its time of death, when it is found to be out of date, e.g. when
74 74 * the corresponding resource has been unmounted.
75 75 *
76 76 * When a process performs the first read() or ioctl() for a file descriptor for
77 77 * /etc/mnttab, the database is updated by a call to mntfs_snapshot() to ensure
78 78 * that an element exists for each currently mounted resource. Following this,
79 79 * the current time is written into a snapshot structure, a mntsnap_t, embedded
80 80 * in the descriptor's mntnode_t.
81 81 *
82 82 * mntfs is able to enumerate the /etc/mnttab entries corresponding to a
83 83 * particular file descriptor by searching the database for entries that were
84 84 * born before the appropriate snapshot and that either are still alive or died
85 85 * after the snapshot was created. Consumers use the iterator function
86 86 * mntfs_get_next_elem() to identify the next suitable element in the database.
87 87 *
88 88 * Each snapshot has a hold on its corresponding database elements, effected by
89 89 * a per-element reference count. At last close(), a snapshot is destroyed in
90 90 * mntfs_freesnap() by releasing all of its holds; an element is destroyed if
91 91 * its reference count becomes zero. Therefore the database never exists unless
92 92 * there is at least one active consumer of /etc/mnttab.
93 93 *
94 94 * getmntent(3C) et al. "do not open, close or rewind the file." This implies
95 95 * that getmntent() and read() must be able to operate without interaction on
96 96 * the same file descriptor; this is accomplished by the use of separate
97 97 * mntsnap_ts for both read() and ioctl().
98 98 *
99 99 * mntfs observes the following lock-ordering:
100 100 *
101 101 * mnp->mnt_contents -> vfslist -> zonep->zone_mntfs_db_lock
102 102 *
103 103 * NOTE: The following variable enables the generation of the "dev=xxx"
104 104 * in the option string for a mounted file system. Really this should
105 105 * be gotten rid of altogether, but for the sake of backwards compatibility
106 106 * we had to leave it in. It is defined as a 32-bit device number. This
107 107 * means that when 64-bit device numbers are in use, if either the major or
108 108 * minor part of the device number will not fit in a 16 bit quantity, the
109 109 * "dev=" will be set to NODEV (0x7fffffff). See PSARC 1999/566 and
110 110 * 1999/131 for details. The cmpldev() function used to generate the 32-bit
111 111 * device number handles this check and assigns the proper value.
112 112 */
113 113 int mntfs_enabledev = 1; /* enable old "dev=xxx" option */
114 114
115 115 extern void vfs_mono_time(timespec_t *);
116 116 enum { MNTFS_FIRST, MNTFS_SECOND, MNTFS_NEITHER };
117 117
118 118 /*
119 119 * Determine whether a field within a line from /etc/mnttab contains actual
120 120 * content or simply the marker string "-". This never applies to the time,
121 121 * therefore the delimiter must be a tab.
122 122 */
123 123 #define MNTFS_REAL_FIELD(x) (*(x) != '-' || *((x) + 1) != '\t')
124 124
125 125 static int
126 126 mntfs_devsize(struct vfs *vfsp)
127 127 {
128 128 dev32_t odev;
129 129
130 130 (void) cmpldev(&odev, vfsp->vfs_dev);
131 131 return (snprintf(NULL, 0, "dev=%x", odev));
132 132 }
133 133
134 134 static int
135 135 mntfs_devprint(struct vfs *vfsp, char *buf)
136 136 {
137 137 dev32_t odev;
138 138
139 139 (void) cmpldev(&odev, vfsp->vfs_dev);
140 140 return (snprintf(buf, MAX_MNTOPT_STR, "dev=%x", odev));
141 141 }
142 142
143 143 /* Identify which, if either, of two supplied timespec structs is newer. */
144 144 static int
145 145 mntfs_newest(timespec_t *a, timespec_t *b)
146 146 {
147 147 if (a->tv_sec == b->tv_sec &&
148 148 a->tv_nsec == b->tv_nsec) {
149 149 return (MNTFS_NEITHER);
150 150 } else if (b->tv_sec > a->tv_sec ||
151 151 (b->tv_sec == a->tv_sec &&
152 152 b->tv_nsec > a->tv_nsec)) {
153 153 return (MNTFS_SECOND);
154 154 } else {
155 155 return (MNTFS_FIRST);
156 156 }
157 157 }
158 158
159 159 static int
160 160 mntfs_optsize(struct vfs *vfsp)
161 161 {
162 162 int i, size = 0;
163 163 mntopt_t *mop;
164 164
165 165 for (i = 0; i < vfsp->vfs_mntopts.mo_count; i++) {
166 166 mop = &vfsp->vfs_mntopts.mo_list[i];
167 167 if (mop->mo_flags & MO_NODISPLAY)
168 168 continue;
169 169 if (mop->mo_flags & MO_SET) {
170 170 if (size)
171 171 size++; /* space for comma */
172 172 size += strlen(mop->mo_name);
173 173 /*
174 174 * count option value if there is one
175 175 */
176 176 if (mop->mo_arg != NULL) {
177 177 size += strlen(mop->mo_arg) + 1;
178 178 }
179 179 }
180 180 }
181 181 if (vfsp->vfs_zone != NULL && vfsp->vfs_zone != global_zone) {
182 182 /*
183 183 * Add space for "zone=<zone_name>" if required.
184 184 */
185 185 if (size)
186 186 size++; /* space for comma */
187 187 size += sizeof ("zone=") - 1;
188 188 size += strlen(vfsp->vfs_zone->zone_name);
189 189 }
190 190 if (mntfs_enabledev) {
191 191 if (size != 0)
192 192 size++; /* space for comma */
193 193 size += mntfs_devsize(vfsp);
194 194 }
195 195 if (size == 0)
196 196 size = strlen("-");
197 197 return (size);
198 198 }
199 199
200 200 static int
201 201 mntfs_optprint(struct vfs *vfsp, char *buf)
202 202 {
203 203 int i, optinbuf = 0;
204 204 mntopt_t *mop;
205 205 char *origbuf = buf;
206 206
207 207 for (i = 0; i < vfsp->vfs_mntopts.mo_count; i++) {
208 208 mop = &vfsp->vfs_mntopts.mo_list[i];
209 209 if (mop->mo_flags & MO_NODISPLAY)
210 210 continue;
211 211 if (mop->mo_flags & MO_SET) {
212 212 if (optinbuf)
213 213 *buf++ = ',';
214 214 else
215 215 optinbuf = 1;
216 216 buf += snprintf(buf, MAX_MNTOPT_STR,
217 217 "%s", mop->mo_name);
218 218 /*
219 219 * print option value if there is one
220 220 */
221 221 if (mop->mo_arg != NULL) {
222 222 buf += snprintf(buf, MAX_MNTOPT_STR, "=%s",
223 223 mop->mo_arg);
224 224 }
225 225 }
226 226 }
227 227 if (vfsp->vfs_zone != NULL && vfsp->vfs_zone != global_zone) {
228 228 if (optinbuf)
229 229 *buf++ = ',';
230 230 else
231 231 optinbuf = 1;
232 232 buf += snprintf(buf, MAX_MNTOPT_STR, "zone=%s",
233 233 vfsp->vfs_zone->zone_name);
234 234 }
235 235 if (mntfs_enabledev) {
236 236 if (optinbuf++)
237 237 *buf++ = ',';
238 238 buf += mntfs_devprint(vfsp, buf);
239 239 }
240 240 if (!optinbuf) {
241 241 buf += snprintf(buf, MAX_MNTOPT_STR, "-");
242 242 }
243 243 return (buf - origbuf);
244 244 }
245 245
246 246 void
247 247 mntfs_populate_text(vfs_t *vfsp, zone_t *zonep, mntelem_t *elemp)
248 248 {
249 249 struct extmnttab *tabp = &elemp->mnte_tab;
250 250 const char *resource, *mntpt;
251 251 char *cp = elemp->mnte_text;
252 252 mntpt = refstr_value(vfsp->vfs_mntpt);
253 253 resource = refstr_value(vfsp->vfs_resource);
254 254
255 255 tabp->mnt_special = 0;
256 256 if (resource != NULL && resource[0] != '\0') {
257 257 if (resource[0] != '/') {
258 258 cp += snprintf(cp, MAXPATHLEN, "%s\t", resource);
259 259 } else if (!ZONE_PATH_VISIBLE(resource, zonep)) {
260 260 /*
261 261 * Use the mount point as the resource.
262 262 */
263 263 cp += snprintf(cp, MAXPATHLEN, "%s\t",
264 264 ZONE_PATH_TRANSLATE(mntpt, zonep));
265 265 } else {
266 266 cp += snprintf(cp, MAXPATHLEN, "%s\t",
267 267 ZONE_PATH_TRANSLATE(resource, zonep));
268 268 }
269 269 } else {
270 270 cp += snprintf(cp, MAXPATHLEN, "-\t");
271 271 }
272 272
273 273 tabp->mnt_mountp = (char *)(cp - elemp->mnte_text);
274 274 if (mntpt != NULL && mntpt[0] != '\0') {
275 275 /*
276 276 * We know the mount point is visible from within the zone,
277 277 * otherwise it wouldn't be on the zone's vfs list.
278 278 */
279 279 cp += snprintf(cp, MAXPATHLEN, "%s\t",
280 280 ZONE_PATH_TRANSLATE(mntpt, zonep));
281 281 } else {
282 282 cp += snprintf(cp, MAXPATHLEN, "-\t");
283 283 }
284 284
285 285 tabp->mnt_fstype = (char *)(cp - elemp->mnte_text);
286 286 cp += snprintf(cp, MAXPATHLEN, "%s\t",
287 287 vfssw[vfsp->vfs_fstype].vsw_name);
288 288
289 289 tabp->mnt_mntopts = (char *)(cp - elemp->mnte_text);
290 290 cp += mntfs_optprint(vfsp, cp);
291 291 *cp++ = '\t';
292 292
293 293 tabp->mnt_time = (char *)(cp - elemp->mnte_text);
294 294 cp += snprintf(cp, MAX_MNTOPT_STR, "%ld", vfsp->vfs_mtime);
295 295 *cp++ = '\n'; /* over-write snprintf's trailing null-byte */
296 296
297 297 tabp->mnt_major = getmajor(vfsp->vfs_dev);
298 298 tabp->mnt_minor = getminor(vfsp->vfs_dev);
299 299
300 300 elemp->mnte_text_size = cp - elemp->mnte_text;
301 301 elemp->mnte_vfs_ctime = vfsp->vfs_hrctime;
302 302 elemp->mnte_hidden = vfsp->vfs_flag & VFS_NOMNTTAB;
303 303 }
304 304
305 305 /* Determine the length of the /etc/mnttab entry for this vfs_t. */
306 306 static size_t
307 307 mntfs_text_len(vfs_t *vfsp, zone_t *zone)
308 308 {
309 309 size_t size = 0;
310 310 const char *resource, *mntpt;
311 311 size_t mntsize;
312 312
313 313 mntpt = refstr_value(vfsp->vfs_mntpt);
314 314 if (mntpt != NULL && mntpt[0] != '\0') {
315 315 mntsize = strlen(ZONE_PATH_TRANSLATE(mntpt, zone)) + 1;
316 316 } else {
317 317 mntsize = 2; /* "-\t" */
318 318 }
319 319 size += mntsize;
320 320
321 321 resource = refstr_value(vfsp->vfs_resource);
322 322 if (resource != NULL && resource[0] != '\0') {
323 323 if (resource[0] != '/') {
324 324 size += strlen(resource) + 1;
325 325 } else if (!ZONE_PATH_VISIBLE(resource, zone)) {
326 326 /*
327 327 * Same as the zone's view of the mount point.
328 328 */
329 329 size += mntsize;
330 330 } else {
331 331 size += strlen(ZONE_PATH_TRANSLATE(resource, zone)) + 1;
332 332 }
333 333 } else {
334 334 size += 2; /* "-\t" */
335 335 }
336 336 size += strlen(vfssw[vfsp->vfs_fstype].vsw_name) + 1;
337 337 size += mntfs_optsize(vfsp);
338 338 size += snprintf(NULL, 0, "\t%ld\n", vfsp->vfs_mtime);
339 339 return (size);
340 340 }
341 341
342 342 /* Destroy the resources associated with a snapshot element. */
343 343 static void
344 344 mntfs_destroy_elem(mntelem_t *elemp)
345 345 {
346 346 kmem_free(elemp->mnte_text, elemp->mnte_text_size);
347 347 kmem_free(elemp, sizeof (mntelem_t));
348 348 }
349 349
350 350 /*
351 351 * Return 1 if the given snapshot is in the range of the given element; return
352 352 * 0 otherwise.
353 353 */
354 354 static int
355 355 mntfs_elem_in_range(mntsnap_t *snapp, mntelem_t *elemp)
356 356 {
357 357 timespec_t *stimep = &snapp->mnts_time;
358 358 timespec_t *btimep = &elemp->mnte_birth;
359 359 timespec_t *dtimep = &elemp->mnte_death;
360 360
361 361 /*
362 362 * If a snapshot is in range of an element then the snapshot must have
363 363 * been created after the birth of the element, and either the element
364 364 * is still alive or it died after the snapshot was created.
365 365 */
366 366 if (mntfs_newest(btimep, stimep) == MNTFS_SECOND &&
367 367 (MNTFS_ELEM_IS_ALIVE(elemp) ||
368 368 mntfs_newest(stimep, dtimep) == MNTFS_SECOND))
369 369 return (1);
370 370 else
371 371 return (0);
372 372 }
373 373
374 374 /*
375 375 * Return the next valid database element, after the one provided, for a given
376 376 * snapshot; return NULL if none exists. The caller must hold the zone's
377 377 * database lock as a reader before calling this function.
378 378 */
379 379 static mntelem_t *
380 380 mntfs_get_next_elem(mntsnap_t *snapp, mntelem_t *elemp)
381 381 {
382 382 int show_hidden = snapp->mnts_flags & MNTS_SHOWHIDDEN;
383 383
384 384 do {
385 385 elemp = elemp->mnte_next;
386 386 } while (elemp &&
387 387 (!mntfs_elem_in_range(snapp, elemp) ||
388 388 (!show_hidden && elemp->mnte_hidden)));
389 389 return (elemp);
390 390 }
391 391
392 392 /*
393 393 * This function frees the resources associated with a mntsnap_t. It walks
394 394 * through the database, decrementing the reference count of any element that
395 395 * satisfies the snapshot. If the reference count of an element becomes zero
396 396 * then it is removed from the database.
397 397 */
398 398 static void
399 399 mntfs_freesnap(mntnode_t *mnp, mntsnap_t *snapp)
400 400 {
401 401 zone_t *zonep = MTOD(mnp)->mnt_zone_ref.zref_zone;
402 402 krwlock_t *dblockp = &zonep->zone_mntfs_db_lock;
403 403 mntelem_t **elempp = &zonep->zone_mntfs_db;
404 404 mntelem_t *elemp;
405 405 int show_hidden = snapp->mnts_flags & MNTS_SHOWHIDDEN;
406 406 size_t number_decremented = 0;
407 407
408 408 ASSERT(RW_WRITE_HELD(&mnp->mnt_contents));
409 409
410 410 /* Ignore an uninitialised snapshot. */
411 411 if (snapp->mnts_nmnts == 0)
412 412 return;
413 413
414 414 /* Drop the holds on any matching database elements. */
415 415 rw_enter(dblockp, RW_WRITER);
416 416 while ((elemp = *elempp) != NULL) {
417 417 if (mntfs_elem_in_range(snapp, elemp) &&
418 418 (!elemp->mnte_hidden || show_hidden) &&
419 419 ++number_decremented && --elemp->mnte_refcnt == 0) {
420 420 if ((*elempp = elemp->mnte_next) != NULL)
421 421 (*elempp)->mnte_prev = elemp->mnte_prev;
422 422 mntfs_destroy_elem(elemp);
423 423 } else {
424 424 elempp = &elemp->mnte_next;
425 425 }
426 426 }
427 427 rw_exit(dblockp);
428 428 ASSERT(number_decremented == snapp->mnts_nmnts);
429 429
430 430 /* Clear the snapshot data. */
431 431 bzero(snapp, sizeof (mntsnap_t));
432 432 }
433 433
434 434 /* Insert the new database element newp after the existing element prevp. */
435 435 static void
436 436 mntfs_insert_after(mntelem_t *newp, mntelem_t *prevp)
437 437 {
438 438 newp->mnte_prev = prevp;
439 439 newp->mnte_next = prevp->mnte_next;
440 440 prevp->mnte_next = newp;
441 441 if (newp->mnte_next != NULL)
442 442 newp->mnte_next->mnte_prev = newp;
443 443 }
444 444
445 445 /* Create and return a copy of a given database element. */
446 446 static mntelem_t *
447 447 mntfs_copy(mntelem_t *origp)
448 448 {
449 449 mntelem_t *copyp;
450 450
451 451 copyp = kmem_zalloc(sizeof (mntelem_t), KM_SLEEP);
452 452 copyp->mnte_vfs_ctime = origp->mnte_vfs_ctime;
453 453 copyp->mnte_text_size = origp->mnte_text_size;
454 454 copyp->mnte_text = kmem_alloc(copyp->mnte_text_size, KM_SLEEP);
455 455 bcopy(origp->mnte_text, copyp->mnte_text, copyp->mnte_text_size);
456 456 copyp->mnte_tab = origp->mnte_tab;
457 457 copyp->mnte_hidden = origp->mnte_hidden;
458 458
459 459 return (copyp);
460 460 }
461 461
462 462 /*
463 463 * Compare two database elements and determine whether or not the vfs_t payload
464 464 * data of each are the same. Return 1 if so and 0 otherwise.
465 465 */
466 466 static int
467 467 mntfs_is_same_element(mntelem_t *a, mntelem_t *b)
468 468 {
469 469 if (a->mnte_hidden == b->mnte_hidden &&
470 470 a->mnte_text_size == b->mnte_text_size &&
471 471 bcmp(a->mnte_text, b->mnte_text, a->mnte_text_size) == 0 &&
472 472 bcmp(&a->mnte_tab, &b->mnte_tab, sizeof (struct extmnttab)) == 0)
473 473 return (1);
474 474 else
475 475 return (0);
476 476 }
477 477
478 478 /*
479 479 * mntfs_snapshot() updates the database, creating it if necessary, so that it
480 480 * accurately reflects the state of the in-kernel mnttab. It also increments
481 481 * the reference count on all database elements that correspond to currently-
482 482 * mounted resources. Finally, it initialises the appropriate snapshot
483 483 * structure.
484 484 *
485 485 * Each vfs_t is given a high-resolution time stamp, for the benefit of mntfs,
486 486 * when it is inserted into the in-kernel mnttab. This time stamp is copied into
487 487 * the corresponding database element when it is created, allowing the element
488 488 * and the vfs_t to be identified as a pair. It is possible that some file
489 489 * systems may make unadvertised changes to, for example, a resource's mount
490 490 * options. Therefore, in order to determine whether a database element is an
491 491 * up-to-date representation of a given vfs_t, it is compared with a temporary
492 492 * element generated for this purpose. Although less efficient, this is safer
493 493 * than implementing an mtime for a vfs_t.
494 494 *
495 495 * Some mounted resources are marked as "hidden" with a VFS_NOMNTTAB flag. These
496 496 * are considered invisible unless the user has already set the MNT_SHOWHIDDEN
497 497 * flag in the vnode using the MNTIOC_SHOWHIDDEN ioctl.
498 498 */
499 499 static void
500 500 mntfs_snapshot(mntnode_t *mnp, mntsnap_t *snapp)
501 501 {
502 502 mntdata_t *mnd = MTOD(mnp);
503 503 zone_t *zonep = mnd->mnt_zone_ref.zref_zone;
504 504 int is_global_zone = (zonep == global_zone);
505 505 int show_hidden = mnp->mnt_flags & MNT_SHOWHIDDEN;
506 506 vfs_t *vfsp, *firstvfsp, *lastvfsp;
507 507 vfs_t dummyvfs;
508 508 vfs_t *dummyvfsp = NULL;
509 509 krwlock_t *dblockp = &zonep->zone_mntfs_db_lock;
510 510 mntelem_t **headpp = &zonep->zone_mntfs_db;
511 511 mntelem_t *elemp;
512 512 mntelem_t *prevp = NULL;
513 513 int order;
514 514 mntelem_t *tempelemp;
515 515 mntelem_t *newp;
516 516 mntelem_t *firstp = NULL;
517 517 size_t nmnts = 0;
518 518 size_t total_text_size = 0;
519 519 size_t normal_text_size = 0;
520 520 int insert_before;
521 521 timespec_t last_mtime;
522 522 size_t entry_length, new_entry_length;
523 523
524 524
525 525 ASSERT(RW_WRITE_HELD(&mnp->mnt_contents));
526 526 vfs_list_read_lock();
527 527 vfs_mnttab_modtime(&last_mtime);
528 528
529 529 /*
530 530 * If this snapshot already exists then we must have been asked to
531 531 * rewind the file, i.e. discard the snapshot and create a new one in
532 532 * its place. In this case we first see if the in-kernel mnttab has
533 533 * advertised a change; if not then we simply reinitialise the metadata.
534 534 */
535 535 if (snapp->mnts_nmnts) {
536 536 if (mntfs_newest(&last_mtime, &snapp->mnts_last_mtime) ==
537 537 MNTFS_NEITHER) {
538 538 /*
539 539 * An unchanged mtime is no guarantee that the
540 540 * in-kernel mnttab is unchanged; for example, a
541 541 * concurrent remount may be between calls to
542 542 * vfs_setmntopt_nolock() and vfs_mnttab_modtimeupd().
543 543 * It follows that the database may have changed, and
544 544 * in particular that some elements in this snapshot
545 545 * may have been killed by another call to
546 546 * mntfs_snapshot(). It is therefore not merely
547 547 * unnecessary to update the snapshot's time but in
548 548 * fact dangerous; it needs to be left alone.
549 549 */
550 550 snapp->mnts_next = snapp->mnts_first;
551 551 snapp->mnts_flags &= ~MNTS_REWIND;
552 552 snapp->mnts_foffset = snapp->mnts_ieoffset = 0;
553 553 vfs_list_unlock();
554 554 return;
555 555 } else {
556 556 mntfs_freesnap(mnp, snapp);
557 557 }
558 558 }
559 559
560 560 /*
561 561 * Create a temporary database element. For each vfs_t, the temporary
562 562 * element will be populated with the corresponding text. If the vfs_t
563 563 * does not have a corresponding element within the database, or if
564 564 * there is such an element but it is stale, a copy of the temporary
565 565 * element is inserted into the database at the appropriate location.
566 566 */
567 567 tempelemp = kmem_alloc(sizeof (mntelem_t), KM_SLEEP);
568 568 entry_length = MNT_LINE_MAX;
569 569 tempelemp->mnte_text = kmem_alloc(entry_length, KM_SLEEP);
570 570
571 571 /* Find the first and last vfs_t for the given zone. */
572 572 if (is_global_zone) {
573 573 firstvfsp = rootvfs;
574 574 lastvfsp = firstvfsp->vfs_prev;
575 575 } else {
576 576 firstvfsp = zonep->zone_vfslist;
577 577 /*
578 578 * If there isn't already a vfs_t for root then we create a
579 579 * dummy which will be used as the head of the list (which will
580 580 * therefore no longer be circular).
581 581 */
582 582 if (firstvfsp == NULL ||
583 583 strcmp(refstr_value(firstvfsp->vfs_mntpt),
584 584 zonep->zone_rootpath) != 0) {
585 585 /*
586 586 * The zone's vfs_ts will have mount points relative to
587 587 * the zone's root path. The vfs_t for the zone's
588 588 * root file system would therefore have a mount point
589 589 * equal to the zone's root path. Since the zone's root
590 590 * path isn't a mount point, we copy the vfs_t of the
591 591 * zone's root vnode, and provide it with a fake mount
592 592 * and resource. However, if the zone's root is a
593 593 * zfs dataset, use the dataset name as the resource.
594 594 *
595 595 * Note that by cloning another vfs_t we also acquire
596 596 * its high-resolution ctime. This might appear to
597 597 * violate the requirement that the ctimes in the list
598 598 * of vfs_ts are unique and monotonically increasing;
599 599 * this is not the case. The dummy vfs_t appears in only
600 600 * a non-global zone's vfs_t list, where the cloned
601 601 * vfs_t would not ordinarily be visible; the ctimes are
602 602 * therefore unique. The zone's root path must be
603 603 * available before the zone boots, and so its root
604 604 * vnode's vfs_t's ctime must be lower than those of any
605 605 * resources subsequently mounted by the zone. The
606 606 * ctimes are therefore monotonically increasing.
607 607 */
608 608 dummyvfs = *zonep->zone_rootvp->v_vfsp;
609 609 dummyvfs.vfs_mntpt = refstr_alloc(zonep->zone_rootpath);
610 610 if (strcmp(vfssw[dummyvfs.vfs_fstype].vsw_name, "zfs")
611 611 != 0)
612 612 dummyvfs.vfs_resource = dummyvfs.vfs_mntpt;
613 613 dummyvfsp = &dummyvfs;
614 614 if (firstvfsp == NULL) {
615 615 lastvfsp = dummyvfsp;
616 616 } else {
617 617 lastvfsp = firstvfsp->vfs_zone_prev;
618 618 dummyvfsp->vfs_zone_next = firstvfsp;
619 619 }
620 620 firstvfsp = dummyvfsp;
621 621 } else {
622 622 lastvfsp = firstvfsp->vfs_zone_prev;
623 623 }
624 624 }
625 625
626 626 /*
627 627 * Now walk through all the vfs_ts for this zone. For each one, find the
628 628 * corresponding database element, creating it first if necessary, and
629 629 * increment its reference count.
630 630 */
631 631 rw_enter(dblockp, RW_WRITER);
632 632 elemp = zonep->zone_mntfs_db;
633 633 /* CSTYLED */
634 634 for (vfsp = firstvfsp;;
635 635 vfsp = is_global_zone ? vfsp->vfs_next : vfsp->vfs_zone_next) {
636 636 DTRACE_PROBE1(new__vfs, vfs_t *, vfsp);
637 637 /* Consider only visible entries. */
638 638 if ((vfsp->vfs_flag & VFS_NOMNTTAB) == 0 || show_hidden) {
639 639 /*
640 640 * Walk through the existing database looking for either
641 641 * an element that matches the current vfs_t, or for the
642 642 * correct place in which to insert a new element.
643 643 */
644 644 insert_before = 0;
645 645 for (; elemp; prevp = elemp, elemp = elemp->mnte_next) {
646 646 DTRACE_PROBE1(considering__elem, mntelem_t *,
647 647 elemp);
648 648
649 649 /* Compare the vfs_t with the element. */
650 650 order = mntfs_newest(&elemp->mnte_vfs_ctime,
651 651 &vfsp->vfs_hrctime);
652 652
653 653 /*
654 654 * If we encounter a database element newer than
655 655 * this vfs_t then we've stepped over a gap
656 656 * where the element for this vfs_t must be
657 657 * inserted.
658 658 */
659 659 if (order == MNTFS_FIRST) {
660 660 insert_before = 1;
661 661 break;
662 662 }
663 663
664 664 /* Dead elements no longer interest us. */
665 665 if (MNTFS_ELEM_IS_DEAD(elemp))
666 666 continue;
667 667
668 668 /*
669 669 * If the time stamps are the same then the
670 670 * element is potential match for the vfs_t,
671 671 * although it may later prove to be stale.
672 672 */
673 673 if (order == MNTFS_NEITHER)
674 674 break;
675 675
676 676 /*
677 677 * This element must be older than the vfs_t.
678 678 * It must, therefore, correspond to a vfs_t
679 679 * that has been unmounted. Since the element is
680 680 * still alive, we kill it if it is visible.
681 681 */
682 682 if (!elemp->mnte_hidden || show_hidden)
683 683 vfs_mono_time(&elemp->mnte_death);
684 684 }
685 685 DTRACE_PROBE2(possible__match, vfs_t *, vfsp,
686 686 mntelem_t *, elemp);
687 687
688 688 /* Create a new database element if required. */
689 689 new_entry_length = mntfs_text_len(vfsp, zonep);
690 690 if (new_entry_length > entry_length) {
691 691 kmem_free(tempelemp->mnte_text, entry_length);
692 692 tempelemp->mnte_text =
693 693 kmem_alloc(new_entry_length, KM_SLEEP);
694 694 entry_length = new_entry_length;
695 695 }
696 696 mntfs_populate_text(vfsp, zonep, tempelemp);
697 697 ASSERT(tempelemp->mnte_text_size == new_entry_length);
698 698 if (elemp == NULL) {
699 699 /*
700 700 * We ran off the end of the database. Insert a
701 701 * new element at the end.
702 702 */
703 703 newp = mntfs_copy(tempelemp);
704 704 vfs_mono_time(&newp->mnte_birth);
705 705 if (prevp) {
706 706 mntfs_insert_after(newp, prevp);
707 707 } else {
708 708 newp->mnte_next = NULL;
709 709 newp->mnte_prev = NULL;
710 710 ASSERT(*headpp == NULL);
711 711 *headpp = newp;
712 712 }
713 713 elemp = newp;
714 714 } else if (insert_before) {
715 715 /*
716 716 * Insert a new element before the current one.
717 717 */
718 718 newp = mntfs_copy(tempelemp);
719 719 vfs_mono_time(&newp->mnte_birth);
720 720 if (prevp) {
721 721 mntfs_insert_after(newp, prevp);
722 722 } else {
723 723 newp->mnte_next = elemp;
724 724 newp->mnte_prev = NULL;
725 725 elemp->mnte_prev = newp;
726 726 ASSERT(*headpp == elemp);
727 727 *headpp = newp;
728 728 }
729 729 elemp = newp;
730 730 } else if (!mntfs_is_same_element(elemp, tempelemp)) {
731 731 /*
732 732 * The element corresponds to the vfs_t, but the
733 733 * vfs_t has changed; it must have been
734 734 * remounted. Kill the old element and insert a
735 735 * new one after it.
736 736 */
737 737 vfs_mono_time(&elemp->mnte_death);
738 738 newp = mntfs_copy(tempelemp);
739 739 vfs_mono_time(&newp->mnte_birth);
740 740 mntfs_insert_after(newp, elemp);
741 741 elemp = newp;
742 742 }
743 743
744 744 /* We've found the corresponding element. Hold it. */
745 745 DTRACE_PROBE1(incrementing, mntelem_t *, elemp);
746 746 elemp->mnte_refcnt++;
747 747
748 748 /*
749 749 * Update the parameters used to initialise the
750 750 * snapshot.
751 751 */
752 752 nmnts++;
753 753 total_text_size += elemp->mnte_text_size;
754 754 if (!elemp->mnte_hidden)
755 755 normal_text_size += elemp->mnte_text_size;
756 756 if (!firstp)
757 757 firstp = elemp;
758 758
759 759 prevp = elemp;
760 760 elemp = elemp->mnte_next;
761 761 }
762 762
763 763 if (vfsp == lastvfsp)
764 764 break;
765 765 }
766 766
767 767 /*
768 768 * Any remaining visible database elements that are still alive must be
769 769 * killed now, because their corresponding vfs_ts must have been
770 770 * unmounted.
771 771 */
772 772 for (; elemp; elemp = elemp->mnte_next) {
773 773 if (MNTFS_ELEM_IS_ALIVE(elemp) &&
774 774 (!elemp->mnte_hidden || show_hidden))
775 775 vfs_mono_time(&elemp->mnte_death);
776 776 }
777 777
778 778 /* Initialise the snapshot. */
779 779 vfs_mono_time(&snapp->mnts_time);
780 780 snapp->mnts_last_mtime = last_mtime;
781 781 snapp->mnts_first = snapp->mnts_next = firstp;
782 782 snapp->mnts_flags = show_hidden ? MNTS_SHOWHIDDEN : 0;
783 783 snapp->mnts_nmnts = nmnts;
784 784 snapp->mnts_text_size = total_text_size;
785 785 snapp->mnts_foffset = snapp->mnts_ieoffset = 0;
786 786
787 787 /*
788 788 * Record /etc/mnttab's current size and mtime for possible future use
789 789 * by mntgetattr().
790 790 */
791 791 mnd->mnt_size = normal_text_size;
792 792 mnd->mnt_mtime = last_mtime;
793 793 if (show_hidden) {
794 794 mnd->mnt_hidden_size = total_text_size;
795 795 mnd->mnt_hidden_mtime = last_mtime;
796 796 }
797 797
798 798 /* Clean up. */
799 799 rw_exit(dblockp);
800 800 vfs_list_unlock();
801 801 if (dummyvfsp != NULL)
802 802 refstr_rele(dummyvfsp->vfs_mntpt);
803 803 kmem_free(tempelemp->mnte_text, entry_length);
804 804 kmem_free(tempelemp, sizeof (mntelem_t));
805 805 }
806 806
807 807 /*
808 808 * Public function to convert vfs_mntopts into a string.
809 809 * A buffer of sufficient size is allocated, which is returned via bufp,
810 810 * and whose length is returned via lenp.
811 811 */
812 812 void
813 813 mntfs_getmntopts(struct vfs *vfsp, char **bufp, size_t *lenp)
814 814 {
815 815 size_t len;
816 816 char *buf;
817 817
818 818 vfs_list_read_lock();
819 819
820 820 len = mntfs_optsize(vfsp) + 1;
821 821 buf = kmem_alloc(len, KM_NOSLEEP);
822 822 if (buf == NULL) {
823 823 *bufp = NULL;
824 824 vfs_list_unlock();
825 825 return;
826 826 }
827 827 buf[len - 1] = '\0';
828 828 (void) mntfs_optprint(vfsp, buf);
829 829 ASSERT(buf[len - 1] == '\0');
830 830
831 831 vfs_list_unlock();
832 832 *bufp = buf;
833 833 *lenp = len;
834 834 }
835 835
836 836 /* ARGSUSED */
837 837 static int
838 838 mntopen(vnode_t **vpp, int flag, cred_t *cr, caller_context_t *ct)
839 839 {
840 840 vnode_t *vp = *vpp;
841 841 mntnode_t *nmnp;
842 842
843 843 /*
844 844 * Not allowed to open for writing, return error.
845 845 */
846 846 if (flag & FWRITE)
847 847 return (EPERM);
848 848 /*
849 849 * Create a new mnt/vnode for each open, this will give us a handle to
850 850 * hang the snapshot on.
851 851 */
852 852 nmnp = mntgetnode(vp);
853 853
854 854 *vpp = MTOV(nmnp);
855 855 atomic_inc_32(&MTOD(nmnp)->mnt_nopen);
856 856 VN_RELE(vp);
857 857 return (0);
858 858 }
859 859
860 860 /* ARGSUSED */
861 861 static int
862 862 mntclose(vnode_t *vp, int flag, int count, offset_t offset, cred_t *cr,
863 863 caller_context_t *ct)
864 864 {
865 865 mntnode_t *mnp = VTOM(vp);
866 866
867 867 /* Clean up any locks or shares held by the current process */
868 868 cleanlocks(vp, ttoproc(curthread)->p_pid, 0);
869 869 cleanshares(vp, ttoproc(curthread)->p_pid);
870 870
871 871 if (count > 1)
872 872 return (0);
873 873 if (vp->v_count == 1) {
874 874 rw_enter(&mnp->mnt_contents, RW_WRITER);
875 875 mntfs_freesnap(mnp, &mnp->mnt_read);
876 876 mntfs_freesnap(mnp, &mnp->mnt_ioctl);
877 877 rw_exit(&mnp->mnt_contents);
878 878 atomic_dec_32(&MTOD(mnp)->mnt_nopen);
879 879 }
880 880 return (0);
881 881 }
882 882
883 883 /* ARGSUSED */
884 884 static int
885 885 mntread(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cred, caller_context_t *ct)
886 886 {
887 887 mntnode_t *mnp = VTOM(vp);
888 888 zone_t *zonep = MTOD(mnp)->mnt_zone_ref.zref_zone;
889 889 mntsnap_t *snapp = &mnp->mnt_read;
890 890 off_t off = uio->uio_offset;
891 891 size_t len = uio->uio_resid;
892 892 char *bufferp;
893 893 size_t available, copylen;
894 894 size_t written = 0;
895 895 mntelem_t *elemp;
896 896 krwlock_t *dblockp = &zonep->zone_mntfs_db_lock;
897 897 int error = 0;
898 898 off_t ieoffset;
899 899
900 900 rw_enter(&mnp->mnt_contents, RW_WRITER);
901 901 if (snapp->mnts_nmnts == 0 || (off == (off_t)0))
902 902 mntfs_snapshot(mnp, snapp);
903 903
904 904 if ((size_t)(off + len) > snapp->mnts_text_size)
905 905 len = snapp->mnts_text_size - off;
906 906
907 907 if (off < 0 || len > snapp->mnts_text_size) {
908 908 rw_exit(&mnp->mnt_contents);
909 909 return (EFAULT);
910 910 }
911 911
912 912 if (len == 0) {
913 913 rw_exit(&mnp->mnt_contents);
914 914 return (0);
915 915 }
916 916
917 917 /*
918 918 * For the file offset provided, locate the corresponding database
919 919 * element and calculate the corresponding offset within its text. If
920 920 * the file offset is the same as that reached during the last read(2)
921 921 * then use the saved element and intra-element offset.
922 922 */
923 923 rw_enter(dblockp, RW_READER);
924 924 if (off == 0 || (off == snapp->mnts_foffset)) {
925 925 elemp = snapp->mnts_next;
926 926 ieoffset = snapp->mnts_ieoffset;
927 927 } else {
928 928 off_t total_off;
929 929 /*
930 930 * Find the element corresponding to the requested file offset
931 931 * by walking through the database and summing the text sizes
932 932 * of the individual elements. If the requested file offset is
933 933 * greater than that reached on the last visit then we can start
934 934 * at the last seen element; otherwise, we have to start at the
935 935 * beginning.
936 936 */
937 937 if (off > snapp->mnts_foffset) {
938 938 elemp = snapp->mnts_next;
939 939 total_off = snapp->mnts_foffset - snapp->mnts_ieoffset;
940 940 } else {
941 941 elemp = snapp->mnts_first;
942 942 total_off = 0;
943 943 }
944 944 while (off > total_off + elemp->mnte_text_size) {
945 945 total_off += elemp->mnte_text_size;
946 946 elemp = mntfs_get_next_elem(snapp, elemp);
947 947 ASSERT(elemp != NULL);
948 948 }
949 949 /* Calculate the intra-element offset. */
950 950 if (off > total_off)
951 951 ieoffset = off - total_off;
952 952 else
953 953 ieoffset = 0;
954 954 }
955 955
956 956 /*
957 957 * Create a buffer and populate it with the text from successive
958 958 * database elements until it is full.
959 959 */
960 960 bufferp = kmem_alloc(len, KM_SLEEP);
961 961 while (written < len) {
962 962 available = elemp->mnte_text_size - ieoffset;
963 963 copylen = MIN(len - written, available);
964 964 bcopy(elemp->mnte_text + ieoffset, bufferp + written, copylen);
965 965 written += copylen;
966 966 if (copylen == available) {
967 967 elemp = mntfs_get_next_elem(snapp, elemp);
968 968 ASSERT(elemp != NULL || written == len);
969 969 ieoffset = 0;
970 970 } else {
971 971 ieoffset += copylen;
972 972 }
973 973 }
974 974 rw_exit(dblockp);
975 975
976 976 /*
977 977 * Write the populated buffer, update the snapshot's state if
978 978 * successful and then advertise our read.
979 979 */
980 980 error = uiomove(bufferp, len, UIO_READ, uio);
981 981 if (error == 0) {
982 982 snapp->mnts_next = elemp;
983 983 snapp->mnts_foffset = off + len;
984 984 snapp->mnts_ieoffset = ieoffset;
985 985 }
986 986 vfs_mnttab_readop();
987 987 rw_exit(&mnp->mnt_contents);
988 988
989 989 /* Clean up. */
990 990 kmem_free(bufferp, len);
991 991 return (error);
992 992 }
993 993
994 994 static int
995 995 mntgetattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr,
996 996 caller_context_t *ct)
997 997 {
998 998 int mask = vap->va_mask;
999 999 int error;
1000 1000 mntnode_t *mnp = VTOM(vp);
1001 1001 timespec_t mtime, old_mtime;
1002 1002 size_t size, old_size;
1003 1003 mntdata_t *mntdata = MTOD(VTOM(vp));
1004 1004 mntsnap_t *rsnapp, *isnapp;
1005 1005 extern timespec_t vfs_mnttab_ctime;
1006 1006
1007 1007
1008 1008 /* AT_MODE, AT_UID and AT_GID are derived from the underlying file. */
1009 1009 if (mask & AT_MODE|AT_UID|AT_GID) {
1010 1010 if (error = VOP_GETATTR(mnp->mnt_mountvp, vap, flags, cr, ct))
1011 1011 return (error);
1012 1012 }
1013 1013
1014 1014 /*
1015 1015 * There are some minor subtleties in the determination of
1016 1016 * /etc/mnttab's size and mtime. We wish to avoid any condition in
1017 1017 * which, in the vicinity of a change to the in-kernel mnttab, we
1018 1018 * return an old value for one but a new value for the other. We cannot
1019 1019 * simply hold vfslist for the entire calculation because we might need
1020 1020 * to call mntfs_snapshot(), which calls vfs_list_read_lock().
1021 1021 */
1022 1022 if (mask & AT_SIZE|AT_NBLOCKS) {
1023 1023 rw_enter(&mnp->mnt_contents, RW_WRITER);
1024 1024
1025 1025 vfs_list_read_lock();
1026 1026 vfs_mnttab_modtime(&mtime);
1027 1027 if (mnp->mnt_flags & MNT_SHOWHIDDEN) {
1028 1028 old_mtime = mntdata->mnt_hidden_mtime;
1029 1029 old_size = mntdata->mnt_hidden_size;
1030 1030 } else {
1031 1031 old_mtime = mntdata->mnt_mtime;
1032 1032 old_size = mntdata->mnt_size;
1033 1033 }
1034 1034 vfs_list_unlock();
1035 1035
1036 1036 rsnapp = &mnp->mnt_read;
1037 1037 isnapp = &mnp->mnt_ioctl;
1038 1038 if (rsnapp->mnts_nmnts || isnapp->mnts_nmnts) {
1039 1039 /*
1040 1040 * The mntnode already has at least one snapshot from
1041 1041 * which to take the size; the user will understand from
1042 1042 * mnttab(4) that the current size of the in-kernel
1043 1043 * mnttab is irrelevant.
1044 1044 */
1045 1045 size = rsnapp->mnts_nmnts ? rsnapp->mnts_text_size :
1046 1046 isnapp->mnts_text_size;
1047 1047 } else if (mntfs_newest(&mtime, &old_mtime) == MNTFS_NEITHER) {
1048 1048 /*
1049 1049 * There is no existing valid snapshot but the in-kernel
1050 1050 * mnttab has not changed since the time that the last
1051 1051 * one was generated. Use the old file size; note that
1052 1052 * it is guaranteed to be consistent with mtime, which
1053 1053 * may be returned to the user later.
1054 1054 */
1055 1055 size = old_size;
1056 1056 } else {
1057 1057 /*
1058 1058 * There is no snapshot and the in-kernel mnttab has
1059 1059 * changed since the last one was created. We generate a
1060 1060 * new snapshot which we use for not only the size but
1061 1061 * also the mtime, thereby ensuring that the two are
1062 1062 * consistent.
1063 1063 */
1064 1064 mntfs_snapshot(mnp, rsnapp);
1065 1065 size = rsnapp->mnts_text_size;
1066 1066 mtime = rsnapp->mnts_last_mtime;
1067 1067 mntfs_freesnap(mnp, rsnapp);
1068 1068 }
1069 1069
1070 1070 rw_exit(&mnp->mnt_contents);
1071 1071 } else if (mask & AT_ATIME|AT_MTIME) {
1072 1072 vfs_list_read_lock();
1073 1073 vfs_mnttab_modtime(&mtime);
1074 1074 vfs_list_unlock();
1075 1075 }
1076 1076
1077 1077 /* Always look like a regular file. */
1078 1078 if (mask & AT_TYPE)
1079 1079 vap->va_type = VREG;
1080 1080 /* Mode should basically be read only. */
1081 1081 if (mask & AT_MODE)
1082 1082 vap->va_mode &= 07444;
1083 1083 if (mask & AT_FSID)
1084 1084 vap->va_fsid = vp->v_vfsp->vfs_dev;
1085 1085 /* Nodeid is always ROOTINO. */
1086 1086 if (mask & AT_NODEID)
1087 1087 vap->va_nodeid = (ino64_t)MNTROOTINO;
1088 1088 /*
1089 1089 * Set nlink to the number of open vnodes for mnttab info
1090 1090 * plus one for existing.
1091 1091 */
1092 1092 if (mask & AT_NLINK)
1093 1093 vap->va_nlink = mntdata->mnt_nopen + 1;
1094 1094 if (mask & AT_SIZE)
1095 1095 vap->va_size = size;
1096 1096 if (mask & AT_ATIME)
1097 1097 vap->va_atime = mtime;
1098 1098 if (mask & AT_MTIME)
1099 1099 vap->va_mtime = mtime;
1100 1100 if (mask & AT_CTIME)
1101 1101 vap->va_ctime = vfs_mnttab_ctime;
1102 1102 if (mask & AT_RDEV)
1103 1103 vap->va_rdev = 0;
1104 1104 if (mask & AT_BLKSIZE)
1105 1105 vap->va_blksize = DEV_BSIZE;
1106 1106 if (mask & AT_NBLOCKS)
1107 1107 vap->va_nblocks = btod(size);
1108 1108 if (mask & AT_SEQ)
1109 1109 vap->va_seq = 0;
1110 1110
1111 1111 return (0);
1112 1112 }
1113 1113
1114 1114 static int
1115 1115 mntaccess(vnode_t *vp, int mode, int flags, cred_t *cr,
1116 1116 caller_context_t *ct)
1117 1117 {
1118 1118 mntnode_t *mnp = VTOM(vp);
1119 1119
1120 1120 if (mode & (VWRITE|VEXEC))
1121 1121 return (EROFS);
1122 1122
1123 1123 /*
1124 1124 * Do access check on the underlying directory vnode.
1125 1125 */
1126 1126 return (VOP_ACCESS(mnp->mnt_mountvp, mode, flags, cr, ct));
1127 1127 }
1128 1128
1129 1129
1130 1130 /*
1131 1131 * New /mntfs vnode required; allocate it and fill in most of the fields.
1132 1132 */
1133 1133 static mntnode_t *
1134 1134 mntgetnode(vnode_t *dp)
1135 1135 {
1136 1136 mntnode_t *mnp;
1137 1137 vnode_t *vp;
1138 1138
1139 1139 mnp = kmem_zalloc(sizeof (mntnode_t), KM_SLEEP);
1140 1140 mnp->mnt_vnode = vn_alloc(KM_SLEEP);
1141 1141 mnp->mnt_mountvp = VTOM(dp)->mnt_mountvp;
1142 1142 rw_init(&mnp->mnt_contents, NULL, RW_DEFAULT, NULL);
1143 1143 vp = MTOV(mnp);
1144 1144 vp->v_flag = VNOCACHE|VNOMAP|VNOSWAP|VNOMOUNT;
1145 1145 vn_setops(vp, mntvnodeops);
1146 1146 vp->v_vfsp = dp->v_vfsp;
1147 1147 vp->v_type = VREG;
1148 1148 vp->v_data = (caddr_t)mnp;
1149 1149
1150 1150 return (mnp);
1151 1151 }
1152 1152
1153 1153 /*
1154 1154 * Free the storage obtained from mntgetnode().
1155 1155 */
1156 1156 static void
1157 1157 mntfreenode(mntnode_t *mnp)
1158 1158 {
1159 1159 vnode_t *vp = MTOV(mnp);
1160 1160
1161 1161 rw_destroy(&mnp->mnt_contents);
1162 1162 vn_invalid(vp);
1163 1163 vn_free(vp);
1164 1164 kmem_free(mnp, sizeof (*mnp));
1165 1165 }
1166 1166
1167 1167
1168 1168 /* ARGSUSED */
1169 1169 static int
1170 1170 mntfsync(vnode_t *vp, int syncflag, cred_t *cr, caller_context_t *ct)
1171 1171 {
1172 1172 return (0);
1173 1173 }
1174 1174
1175 1175 /* ARGSUSED */
1176 1176 static void
1177 1177 mntinactive(vnode_t *vp, cred_t *cr, caller_context_t *ct)
1178 1178 {
1179 1179 mntnode_t *mnp = VTOM(vp);
1180 1180
1181 1181 mntfreenode(mnp);
1182 1182 }
1183 1183
1184 1184 /*
1185 1185 * lseek(2) is supported only to rewind the file by resetmnttab(3C). Rewinding
1186 1186 * has a special meaning for /etc/mnttab: it forces mntfs to refresh the
1187 1187 * snapshot at the next ioctl().
1188 1188 *
1189 1189 * mnttab(4) explains that "the snapshot...is taken any time a read(2) is
1190 1190 * performed at offset 0". We therefore ignore the read snapshot here.
1191 1191 */
1192 1192 /* ARGSUSED */
1193 1193 static int
1194 1194 mntseek(vnode_t *vp, offset_t ooff, offset_t *noffp, caller_context_t *ct)
1195 1195 {
1196 1196 mntnode_t *mnp = VTOM(vp);
1197 1197
1198 1198 if (*noffp == 0) {
1199 1199 rw_enter(&mnp->mnt_contents, RW_WRITER);
1200 1200 mnp->mnt_ioctl.mnts_flags |= MNTS_REWIND;
1201 1201 rw_exit(&mnp->mnt_contents);
1202 1202 }
1203 1203
1204 1204 return (0);
1205 1205 }
1206 1206
1207 1207 /*
1208 1208 * Return the answer requested to poll().
1209 1209 * POLLRDBAND will return when the mtime of the mnttab
1210 1210 * information is newer than the latest one read for this open.
1211 1211 */
1212 1212 /* ARGSUSED */
1213 1213 static int
1214 1214 mntpoll(vnode_t *vp, short ev, int any, short *revp, pollhead_t **phpp,
1215 1215 caller_context_t *ct)
1216 1216 {
1217 1217 mntnode_t *mnp = VTOM(vp);
1218 1218 mntsnap_t *snapp;
1219 1219
1220 1220 rw_enter(&mnp->mnt_contents, RW_READER);
1221 1221 if (mntfs_newest(&mnp->mnt_ioctl.mnts_last_mtime,
1222 1222 &mnp->mnt_read.mnts_last_mtime) == MNTFS_FIRST)
1223 1223 snapp = &mnp->mnt_ioctl;
1224 1224 else
1225 1225 snapp = &mnp->mnt_read;
1226 1226
1227 1227 *revp = 0;
1228 1228 *phpp = (pollhead_t *)NULL;
1229 1229 if (ev & POLLIN)
1230 1230 *revp |= POLLIN;
1231 1231
1232 1232 if (ev & POLLRDNORM)
1233 1233 *revp |= POLLRDNORM;
1234 1234
1235 1235 if (ev & POLLRDBAND) {
1236 1236 vfs_mnttab_poll(&snapp->mnts_last_mtime, phpp);
1237 1237 if (*phpp == (pollhead_t *)NULL)
1238 1238 *revp |= POLLRDBAND;
1239 1239 }
1240 1240 rw_exit(&mnp->mnt_contents);
1241 1241
1242 1242 if (*revp || *phpp != NULL || any) {
1243 1243 return (0);
1244 1244 }
1245 1245 /*
1246 1246 * If someone is polling an unsupported poll events (e.g.
1247 1247 * POLLOUT, POLLPRI, etc.), just return POLLERR revents.
1248 1248 * That way we will ensure that we don't return a 0
1249 1249 * revents with a NULL pollhead pointer.
1250 1250 */
1251 1251 *revp = POLLERR;
1252 1252 return (0);
1253 1253 }
1254 1254
1255 1255 /*
1256 1256 * mntfs_same_word() returns 1 if two words are the same in the context of
1257 1257 * MNTIOC_GETMNTANY and 0 otherwise.
1258 1258 *
1259 1259 * worda is a memory address that lies somewhere in the buffer bufa; it cannot
1260 1260 * be NULL since this is used to indicate to getmntany(3C) that the user does
1261 1261 * not wish to match a particular field. The text to which worda points is
1262 1262 * supplied by the user; if it is not null-terminated then it cannot match.
1263 1263 *
1264 1264 * Buffer bufb contains a line from /etc/mnttab, in which the fields are
1265 1265 * delimited by tab or new-line characters. offb is the offset of the second
1266 1266 * word within this buffer.
1267 1267 *
1268 1268 * mntfs_same_word() returns 1 if the words are the same and 0 otherwise.
1269 1269 */
1270 1270 int
1271 1271 mntfs_same_word(char *worda, char *bufa, size_t sizea, off_t offb, char *bufb,
1272 1272 size_t sizeb)
1273 1273 {
1274 1274 char *wordb = bufb + offb;
1275 1275 int bytes_remaining;
1276 1276
1277 1277 ASSERT(worda != NULL);
1278 1278
1279 1279 bytes_remaining = MIN(((bufa + sizea) - worda),
1280 1280 ((bufb + sizeb) - wordb));
1281 1281 while (bytes_remaining && *worda == *wordb) {
1282 1282 worda++;
1283 1283 wordb++;
1284 1284 bytes_remaining--;
1285 1285 }
1286 1286 if (bytes_remaining &&
1287 1287 *worda == '\0' && (*wordb == '\t' || *wordb == '\n'))
1288 1288 return (1);
1289 1289 else
1290 1290 return (0);
1291 1291 }
1292 1292
1293 1293 /*
1294 1294 * mntfs_special_info_string() returns which, if either, of VBLK or VCHR
1295 1295 * corresponds to a supplied path. If the path is a special device then the
1296 1296 * function optionally sets the major and minor numbers.
1297 1297 */
1298 1298 vtype_t
1299 1299 mntfs_special_info_string(char *path, uint_t *major, uint_t *minor, cred_t *cr)
1300 1300 {
1301 1301 vattr_t vattr;
1302 1302 vnode_t *vp;
1303 1303 vtype_t type;
1304 1304 int error;
1305 1305
1306 1306 if (path == NULL || *path != '/' ||
1307 1307 lookupnameat(path + 1, UIO_SYSSPACE, FOLLOW, NULLVPP, &vp, rootdir))
1308 1308 return (0);
1309 1309
1310 1310 vattr.va_mask = AT_TYPE | AT_RDEV;
1311 1311 error = VOP_GETATTR(vp, &vattr, ATTR_REAL, cr, NULL);
1312 1312 VN_RELE(vp);
1313 1313
1314 1314 if (error == 0 && ((type = vattr.va_type) == VBLK || type == VCHR)) {
1315 1315 if (major && minor) {
1316 1316 *major = getmajor(vattr.va_rdev);
1317 1317 *minor = getminor(vattr.va_rdev);
1318 1318 }
1319 1319 return (type);
1320 1320 } else {
1321 1321 return (0);
1322 1322 }
1323 1323 }
1324 1324
1325 1325 /*
1326 1326 * mntfs_special_info_element() extracts the name of the mounted resource
1327 1327 * for a given element and copies it into a null-terminated string, which it
1328 1328 * then passes to mntfs_special_info_string().
1329 1329 */
1330 1330 vtype_t
1331 1331 mntfs_special_info_element(mntelem_t *elemp, cred_t *cr)
1332 1332 {
1333 1333 char *newpath;
1334 1334 vtype_t type;
1335 1335
1336 1336 newpath = kmem_alloc(elemp->mnte_text_size, KM_SLEEP);
1337 1337 bcopy(elemp->mnte_text, newpath, (off_t)(elemp->mnte_tab.mnt_mountp));
1338 1338 *(newpath + (off_t)elemp->mnte_tab.mnt_mountp - 1) = '\0';
1339 1339 type = mntfs_special_info_string(newpath, NULL, NULL, cr);
1340 1340 kmem_free(newpath, elemp->mnte_text_size);
1341 1341
1342 1342 return (type);
1343 1343 }
1344 1344
1345 1345 /*
1346 1346 * Convert an address that points to a byte within a user buffer into an
1347 1347 * address that points to the corresponding offset within a kernel buffer. If
1348 1348 * the user address is NULL then make no conversion. If the address does not
1349 1349 * lie within the buffer then reset it to NULL.
1350 1350 */
1351 1351 char *
1352 1352 mntfs_import_addr(char *uaddr, char *ubufp, char *kbufp, size_t bufsize)
1353 1353 {
1354 1354 if (uaddr < ubufp || uaddr >= ubufp + bufsize)
1355 1355 return (NULL);
1356 1356 else
1357 1357 return (kbufp + (uaddr - ubufp));
1358 1358 }
1359 1359
1360 1360 /*
1361 1361 * These 32-bit versions are to support STRUCT_DECL(9F) etc. in
1362 1362 * mntfs_copyout_element() and mntioctl().
1363 1363 */
1364 1364 #ifdef _SYSCALL32_IMPL
1365 1365 typedef struct extmnttab32 {
1366 1366 uint32_t mnt_special;
1367 1367 uint32_t mnt_mountp;
1368 1368 uint32_t mnt_fstype;
1369 1369 uint32_t mnt_mntopts;
1370 1370 uint32_t mnt_time;
1371 1371 uint_t mnt_major;
1372 1372 uint_t mnt_minor;
1373 1373 } extmnttab32_t;
1374 1374
1375 1375 typedef struct mnttab32 {
1376 1376 uint32_t mnt_special;
1377 1377 uint32_t mnt_mountp;
1378 1378 uint32_t mnt_fstype;
1379 1379 uint32_t mnt_mntopts;
1380 1380 uint32_t mnt_time;
1381 1381 } mnttab32_t;
1382 1382
1383 1383 struct mntentbuf32 {
1384 1384 uint32_t mbuf_emp;
1385 1385 uint_t mbuf_bufsize;
1386 1386 uint32_t mbuf_buf;
1387 1387 };
1388 1388 #endif
1389 1389
1390 1390 /*
1391 1391 * mntfs_copyout_element() is common code for the MNTIOC_GETMNTENT,
1392 1392 * MNTIOC_GETEXTMNTENT and MNTIOC_GETMNTANY ioctls. Having identifed the
1393 1393 * database element desired by the user, this function copies out the text and
1394 1394 * the pointers to the relevant userland addresses. It returns 0 on success
1395 1395 * and non-zero otherwise.
1396 1396 */
1397 1397 int
1398 1398 mntfs_copyout_elem(mntelem_t *elemp, struct extmnttab *uemp,
1399 1399 char *ubufp, int cmd, int datamodel)
1400 1400 {
1401 1401 STRUCT_DECL(extmnttab, ktab);
1402 1402 char *dbbufp = elemp->mnte_text;
1403 1403 size_t dbbufsize = elemp->mnte_text_size;
1404 1404 struct extmnttab *dbtabp = &elemp->mnte_tab;
1405 1405 size_t ssize;
1406 1406 char *kbufp;
1407 1407 int error = 0;
1408 1408
1409 1409
1410 1410 /*
1411 1411 * We create a struct extmnttab within the kernel of the size
1412 1412 * determined by the user's data model. We then populate its
1413 1413 * fields by combining the start address of the text buffer
1414 1414 * supplied by the user, ubufp, with the offsets stored for
1415 1415 * this database element within dbtabp, a pointer to a struct
1416 1416 * extmnttab.
1417 1417 *
1418 1418 * Note that if the corresponding field is "-" this signifies
1419 1419 * no real content, and we set the address to NULL. This does
1420 1420 * not apply to mnt_time.
1421 1421 */
1422 1422 STRUCT_INIT(ktab, datamodel);
1423 1423 STRUCT_FSETP(ktab, mnt_special,
1424 1424 MNTFS_REAL_FIELD(dbbufp) ? ubufp : NULL);
1425 1425 STRUCT_FSETP(ktab, mnt_mountp,
1426 1426 MNTFS_REAL_FIELD(dbbufp + (off_t)dbtabp->mnt_mountp) ?
1427 1427 ubufp + (off_t)dbtabp->mnt_mountp : NULL);
1428 1428 STRUCT_FSETP(ktab, mnt_fstype,
1429 1429 MNTFS_REAL_FIELD(dbbufp + (off_t)dbtabp->mnt_fstype) ?
1430 1430 ubufp + (off_t)dbtabp->mnt_fstype : NULL);
1431 1431 STRUCT_FSETP(ktab, mnt_mntopts,
1432 1432 MNTFS_REAL_FIELD(dbbufp + (off_t)dbtabp->mnt_mntopts) ?
1433 1433 ubufp + (off_t)dbtabp->mnt_mntopts : NULL);
1434 1434 STRUCT_FSETP(ktab, mnt_time,
1435 1435 ubufp + (off_t)dbtabp->mnt_time);
1436 1436 if (cmd == MNTIOC_GETEXTMNTENT) {
1437 1437 STRUCT_FSETP(ktab, mnt_major, dbtabp->mnt_major);
1438 1438 STRUCT_FSETP(ktab, mnt_minor, dbtabp->mnt_minor);
1439 1439 ssize = SIZEOF_STRUCT(extmnttab, datamodel);
1440 1440 } else {
1441 1441 ssize = SIZEOF_STRUCT(mnttab, datamodel);
1442 1442 }
1443 1443 if (copyout(STRUCT_BUF(ktab), uemp, ssize))
1444 1444 return (EFAULT);
1445 1445
1446 1446 /*
1447 1447 * We create a text buffer in the kernel into which we copy the
1448 1448 * /etc/mnttab entry for this element. We change the tab and
1449 1449 * new-line delimiters to null bytes before copying out the
1450 1450 * buffer.
1451 1451 */
1452 1452 kbufp = kmem_alloc(dbbufsize, KM_SLEEP);
1453 1453 bcopy(elemp->mnte_text, kbufp, dbbufsize);
1454 1454 *(kbufp + (off_t)dbtabp->mnt_mountp - 1) =
1455 1455 *(kbufp + (off_t)dbtabp->mnt_fstype - 1) =
1456 1456 *(kbufp + (off_t)dbtabp->mnt_mntopts - 1) =
1457 1457 *(kbufp + (off_t)dbtabp->mnt_time - 1) =
1458 1458 *(kbufp + dbbufsize - 1) = '\0';
1459 1459 if (copyout(kbufp, ubufp, dbbufsize))
1460 1460 error = EFAULT;
1461 1461
1462 1462 kmem_free(kbufp, dbbufsize);
1463 1463 return (error);
1464 1464 }
1465 1465
1466 1466 /* ARGSUSED */
1467 1467 static int
1468 1468 mntioctl(struct vnode *vp, int cmd, intptr_t arg, int flag, cred_t *cr,
1469 1469 int *rvalp, caller_context_t *ct)
1470 1470 {
1471 1471 uint_t *up = (uint_t *)arg;
1472 1472 mntnode_t *mnp = VTOM(vp);
1473 1473 mntsnap_t *snapp = &mnp->mnt_ioctl;
1474 1474 int error = 0;
1475 1475 zone_t *zonep = MTOD(mnp)->mnt_zone_ref.zref_zone;
1476 1476 krwlock_t *dblockp = &zonep->zone_mntfs_db_lock;
1477 1477 model_t datamodel = flag & DATAMODEL_MASK;
1478 1478
1479 1479 switch (cmd) {
1480 1480
1481 1481 case MNTIOC_NMNTS: /* get no. of mounted resources */
1482 1482 {
1483 1483 rw_enter(&mnp->mnt_contents, RW_READER);
1484 1484 if (snapp->mnts_nmnts == 0 ||
1485 1485 (snapp->mnts_flags & MNTS_REWIND)) {
1486 1486 if (!rw_tryupgrade(&mnp->mnt_contents)) {
1487 1487 rw_exit(&mnp->mnt_contents);
1488 1488 rw_enter(&mnp->mnt_contents, RW_WRITER);
1489 1489 }
1490 1490 if (snapp->mnts_nmnts == 0 ||
1491 1491 (snapp->mnts_flags & MNTS_REWIND))
1492 1492 mntfs_snapshot(mnp, snapp);
1493 1493 }
1494 1494 rw_exit(&mnp->mnt_contents);
1495 1495
1496 1496 if (suword32(up, snapp->mnts_nmnts) != 0)
1497 1497 error = EFAULT;
1498 1498 break;
1499 1499 }
1500 1500
1501 1501 case MNTIOC_GETDEVLIST: /* get mounted device major/minor nos */
1502 1502 {
1503 1503 size_t len;
1504 1504 uint_t *devlist;
1505 1505 mntelem_t *elemp;
1506 1506 int i = 0;
1507 1507
1508 1508 rw_enter(&mnp->mnt_contents, RW_READER);
1509 1509 if (snapp->mnts_nmnts == 0 ||
1510 1510 (snapp->mnts_flags & MNTS_REWIND)) {
1511 1511 if (!rw_tryupgrade(&mnp->mnt_contents)) {
1512 1512 rw_exit(&mnp->mnt_contents);
1513 1513 rw_enter(&mnp->mnt_contents, RW_WRITER);
1514 1514 }
1515 1515 if (snapp->mnts_nmnts == 0 ||
1516 1516 (snapp->mnts_flags & MNTS_REWIND))
1517 1517 mntfs_snapshot(mnp, snapp);
1518 1518 rw_downgrade(&mnp->mnt_contents);
1519 1519 }
1520 1520
1521 1521 /* Create a local buffer to hold the device numbers. */
1522 1522 len = 2 * snapp->mnts_nmnts * sizeof (uint_t);
1523 1523 devlist = kmem_alloc(len, KM_SLEEP);
1524 1524
1525 1525 /*
1526 1526 * Walk the database elements for this snapshot and add their
1527 1527 * major and minor numbers.
1528 1528 */
1529 1529 rw_enter(dblockp, RW_READER);
1530 1530 for (elemp = snapp->mnts_first; elemp;
1531 1531 elemp = mntfs_get_next_elem(snapp, elemp)) {
1532 1532 devlist[2 * i] = elemp->mnte_tab.mnt_major;
1533 1533 devlist[2 * i + 1] = elemp->mnte_tab.mnt_minor;
1534 1534 i++;
1535 1535 }
1536 1536 rw_exit(dblockp);
1537 1537 ASSERT(i == snapp->mnts_nmnts);
1538 1538 rw_exit(&mnp->mnt_contents);
1539 1539
1540 1540 error = xcopyout(devlist, up, len);
1541 1541 kmem_free(devlist, len);
1542 1542 break;
1543 1543 }
1544 1544
1545 1545 case MNTIOC_SETTAG: /* set tag on mounted file system */
1546 1546 case MNTIOC_CLRTAG: /* clear tag on mounted file system */
1547 1547 {
1548 1548 struct mnttagdesc *dp = (struct mnttagdesc *)arg;
1549 1549 STRUCT_DECL(mnttagdesc, tagdesc);
1550 1550 char *cptr;
1551 1551 uint32_t major, minor;
1552 1552 char tagbuf[MAX_MNTOPT_TAG];
1553 1553 char *pbuf;
1554 1554 size_t len;
1555 1555 uint_t start = 0;
1556 1556 mntdata_t *mntdata = MTOD(mnp);
1557 1557 zone_t *zone = mntdata->mnt_zone_ref.zref_zone;
1558 1558
1559 1559 STRUCT_INIT(tagdesc, flag & DATAMODEL_MASK);
1560 1560 if (copyin(dp, STRUCT_BUF(tagdesc), STRUCT_SIZE(tagdesc))) {
1561 1561 error = EFAULT;
1562 1562 break;
1563 1563 }
1564 1564 pbuf = kmem_alloc(MAXPATHLEN, KM_SLEEP);
1565 1565 if (zone != global_zone) {
1566 1566 (void) strcpy(pbuf, zone->zone_rootpath);
1567 1567 /* truncate "/" and nul */
1568 1568 start = zone->zone_rootpathlen - 2;
1569 1569 ASSERT(pbuf[start] == '/');
1570 1570 }
1571 1571 cptr = STRUCT_FGETP(tagdesc, mtd_mntpt);
1572 1572 error = copyinstr(cptr, pbuf + start, MAXPATHLEN - start, &len);
1573 1573 if (error) {
1574 1574 kmem_free(pbuf, MAXPATHLEN);
1575 1575 break;
1576 1576 }
1577 1577 if (start != 0 && pbuf[start] != '/') {
1578 1578 kmem_free(pbuf, MAXPATHLEN);
1579 1579 error = EINVAL;
1580 1580 break;
1581 1581 }
1582 1582 cptr = STRUCT_FGETP(tagdesc, mtd_tag);
1583 1583 if ((error = copyinstr(cptr, tagbuf, MAX_MNTOPT_TAG, &len))) {
1584 1584 kmem_free(pbuf, MAXPATHLEN);
1585 1585 break;
1586 1586 }
1587 1587 major = STRUCT_FGET(tagdesc, mtd_major);
1588 1588 minor = STRUCT_FGET(tagdesc, mtd_minor);
1589 1589 if (cmd == MNTIOC_SETTAG)
1590 1590 error = vfs_settag(major, minor, pbuf, tagbuf, cr);
1591 1591 else
1592 1592 error = vfs_clrtag(major, minor, pbuf, tagbuf, cr);
1593 1593 kmem_free(pbuf, MAXPATHLEN);
1594 1594 break;
1595 1595 }
1596 1596
1597 1597 case MNTIOC_SHOWHIDDEN:
1598 1598 {
1599 1599 rw_enter(&mnp->mnt_contents, RW_WRITER);
1600 1600 mnp->mnt_flags |= MNT_SHOWHIDDEN;
1601 1601 rw_exit(&mnp->mnt_contents);
1602 1602 break;
1603 1603 }
1604 1604
1605 1605 case MNTIOC_GETMNTANY:
1606 1606 {
1607 1607 STRUCT_DECL(mntentbuf, embuf); /* Our copy of user's embuf */
1608 1608 STRUCT_DECL(extmnttab, ktab); /* Out copy of user's emp */
1609 1609 struct extmnttab *uemp; /* uaddr of user's emp */
1610 1610 char *ubufp; /* uaddr of user's text buf */
1611 1611 size_t ubufsize; /* size of the above */
1612 1612 struct extmnttab preftab; /* our version of user's emp */
1613 1613 char *prefbuf; /* our copy of user's text */
1614 1614 mntelem_t *elemp; /* a database element */
1615 1615 struct extmnttab *dbtabp; /* element's extmnttab */
1616 1616 char *dbbufp; /* element's text buf */
1617 1617 size_t dbbufsize; /* size of the above */
1618 1618 vtype_t type; /* type, if any, of special */
1619 1619
1620 1620
1621 1621 /*
1622 1622 * embuf is a struct embuf within the kernel. We copy into it
1623 1623 * the struct embuf supplied by the user.
1624 1624 */
1625 1625 STRUCT_INIT(embuf, datamodel);
1626 1626 if (copyin((void *) arg, STRUCT_BUF(embuf),
1627 1627 STRUCT_SIZE(embuf))) {
1628 1628 error = EFAULT;
1629 1629 break;
1630 1630 }
1631 1631 uemp = STRUCT_FGETP(embuf, mbuf_emp);
1632 1632 ubufp = STRUCT_FGETP(embuf, mbuf_buf);
1633 1633 ubufsize = STRUCT_FGET(embuf, mbuf_bufsize);
1634 1634
1635 1635 /*
1636 1636 * Check that the text buffer offered by the user is the
1637 1637 * agreed size.
1638 1638 */
1639 1639 if (ubufsize != MNT_LINE_MAX) {
1640 1640 error = EINVAL;
1641 1641 break;
1642 1642 }
1643 1643
1644 1644 /* Copy the user-supplied entry into a local buffer. */
1645 1645 prefbuf = kmem_alloc(MNT_LINE_MAX, KM_SLEEP);
1646 1646 if (copyin(ubufp, prefbuf, MNT_LINE_MAX)) {
1647 1647 kmem_free(prefbuf, MNT_LINE_MAX);
1648 1648 error = EFAULT;
1649 1649 break;
1650 1650 }
1651 1651
1652 1652 /* Ensure that any string within it is null-terminated. */
1653 1653 *(prefbuf + MNT_LINE_MAX - 1) = 0;
1654 1654
1655 1655 /* Copy in the user-supplied mpref */
1656 1656 STRUCT_INIT(ktab, datamodel);
1657 1657 if (copyin(uemp, STRUCT_BUF(ktab),
1658 1658 SIZEOF_STRUCT(mnttab, datamodel))) {
1659 1659 kmem_free(prefbuf, MNT_LINE_MAX);
1660 1660 error = EFAULT;
1661 1661 break;
1662 1662 }
1663 1663
1664 1664 /*
1665 1665 * Copy the members of the user's pref struct into a local
1666 1666 * struct. The pointers need to be offset and verified to
1667 1667 * ensure that they lie within the bounds of the buffer.
1668 1668 */
1669 1669 preftab.mnt_special = mntfs_import_addr(STRUCT_FGETP(ktab,
1670 1670 mnt_special), ubufp, prefbuf, MNT_LINE_MAX);
1671 1671 preftab.mnt_mountp = mntfs_import_addr(STRUCT_FGETP(ktab,
1672 1672 mnt_mountp), ubufp, prefbuf, MNT_LINE_MAX);
1673 1673 preftab.mnt_fstype = mntfs_import_addr(STRUCT_FGETP(ktab,
1674 1674 mnt_fstype), ubufp, prefbuf, MNT_LINE_MAX);
1675 1675 preftab.mnt_mntopts = mntfs_import_addr(STRUCT_FGETP(ktab,
1676 1676 mnt_mntopts), ubufp, prefbuf, MNT_LINE_MAX);
1677 1677 preftab.mnt_time = mntfs_import_addr(STRUCT_FGETP(ktab,
1678 1678 mnt_time), ubufp, prefbuf, MNT_LINE_MAX);
1679 1679
1680 1680 /*
1681 1681 * If the user specifies a mounted resource that is a special
1682 1682 * device then we capture its mode and major and minor numbers;
1683 1683 * cf. the block comment below.
1684 1684 */
1685 1685 type = mntfs_special_info_string(preftab.mnt_special,
1686 1686 &preftab.mnt_major, &preftab.mnt_minor, cr);
1687 1687
1688 1688 rw_enter(&mnp->mnt_contents, RW_WRITER);
1689 1689 if (snapp->mnts_nmnts == 0 ||
1690 1690 (snapp->mnts_flags & MNTS_REWIND))
1691 1691 mntfs_snapshot(mnp, snapp);
1692 1692
1693 1693 /*
1694 1694 * This is the core functionality that implements getmntany().
1695 1695 * We walk through the mntfs database until we find an element
1696 1696 * matching the user's preferences that are contained in
1697 1697 * preftab. Typically, this means checking that the text
1698 1698 * matches. However, the mounted resource is special: if the
1699 1699 * user is looking for a special device then we must find a
1700 1700 * database element with the same major and minor numbers and
1701 1701 * the same type, i.e. VBLK or VCHR. The type is not recorded
1702 1702 * in the element because it cannot be inferred from the vfs_t.
1703 1703 * We therefore check the type of suitable candidates via
1704 1704 * mntfs_special_info_element(); since this calls into the
1705 1705 * underlying file system we make sure to drop the database lock
1706 1706 * first.
1707 1707 */
1708 1708 elemp = snapp->mnts_next;
1709 1709 rw_enter(dblockp, RW_READER);
1710 1710 for (;;) {
1711 1711 for (; elemp; elemp = mntfs_get_next_elem(snapp,
1712 1712 elemp)) {
1713 1713 dbtabp = &elemp->mnte_tab;
1714 1714 dbbufp = elemp->mnte_text;
1715 1715 dbbufsize = elemp->mnte_text_size;
1716 1716
1717 1717 if (((type &&
1718 1718 dbtabp->mnt_major == preftab.mnt_major &&
1719 1719 dbtabp->mnt_minor == preftab.mnt_minor &&
1720 1720 MNTFS_REAL_FIELD(dbbufp)) ||
1721 1721 (!type && (!preftab.mnt_special ||
1722 1722 mntfs_same_word(preftab.mnt_special,
1723 1723 prefbuf, MNT_LINE_MAX, (off_t)0, dbbufp,
1724 1724 dbbufsize)))) &&
1725 1725
1726 1726 (!preftab.mnt_mountp || mntfs_same_word(
1727 1727 preftab.mnt_mountp, prefbuf, MNT_LINE_MAX,
1728 1728 (off_t)dbtabp->mnt_mountp, dbbufp,
1729 1729 dbbufsize)) &&
1730 1730
1731 1731 (!preftab.mnt_fstype || mntfs_same_word(
1732 1732 preftab.mnt_fstype, prefbuf, MNT_LINE_MAX,
1733 1733 (off_t)dbtabp->mnt_fstype, dbbufp,
1734 1734 dbbufsize)) &&
1735 1735
1736 1736 (!preftab.mnt_mntopts || mntfs_same_word(
1737 1737 preftab.mnt_mntopts, prefbuf, MNT_LINE_MAX,
1738 1738 (off_t)dbtabp->mnt_mntopts, dbbufp,
1739 1739 dbbufsize)) &&
1740 1740
1741 1741 (!preftab.mnt_time || mntfs_same_word(
1742 1742 preftab.mnt_time, prefbuf, MNT_LINE_MAX,
1743 1743 (off_t)dbtabp->mnt_time, dbbufp,
1744 1744 dbbufsize)))
1745 1745 break;
1746 1746 }
1747 1747 rw_exit(dblockp);
1748 1748
1749 1749 if (elemp == NULL || type == 0 ||
1750 1750 type == mntfs_special_info_element(elemp, cr))
1751 1751 break;
1752 1752
1753 1753 rw_enter(dblockp, RW_READER);
1754 1754 elemp = mntfs_get_next_elem(snapp, elemp);
1755 1755 }
1756 1756
1757 1757 kmem_free(prefbuf, MNT_LINE_MAX);
1758 1758
1759 1759 /* If we failed to find a match then return EOF. */
1760 1760 if (elemp == NULL) {
1761 1761 rw_exit(&mnp->mnt_contents);
1762 1762 *rvalp = MNTFS_EOF;
1763 1763 break;
1764 1764 }
1765 1765
1766 1766 /*
1767 1767 * Check that the text buffer offered by the user will be large
1768 1768 * enough to accommodate the text for this entry.
1769 1769 */
1770 1770 if (elemp->mnte_text_size > MNT_LINE_MAX) {
1771 1771 rw_exit(&mnp->mnt_contents);
1772 1772 *rvalp = MNTFS_TOOLONG;
1773 1773 break;
1774 1774 }
1775 1775
1776 1776 /*
1777 1777 * Populate the user's struct mnttab and text buffer using the
1778 1778 * element's contents.
1779 1779 */
1780 1780 if (mntfs_copyout_elem(elemp, uemp, ubufp, cmd, datamodel)) {
1781 1781 error = EFAULT;
1782 1782 } else {
1783 1783 rw_enter(dblockp, RW_READER);
1784 1784 elemp = mntfs_get_next_elem(snapp, elemp);
1785 1785 rw_exit(dblockp);
1786 1786 snapp->mnts_next = elemp;
1787 1787 }
1788 1788 rw_exit(&mnp->mnt_contents);
1789 1789 break;
1790 1790 }
1791 1791
1792 1792 case MNTIOC_GETMNTENT:
1793 1793 case MNTIOC_GETEXTMNTENT:
1794 1794 {
1795 1795 STRUCT_DECL(mntentbuf, embuf); /* Our copy of user's embuf */
1796 1796 struct extmnttab *uemp; /* uaddr of user's emp */
1797 1797 char *ubufp; /* uaddr of user's text buf */
1798 1798 size_t ubufsize; /* size of the above */
1799 1799 mntelem_t *elemp; /* a database element */
1800 1800
1801 1801
1802 1802 rw_enter(&mnp->mnt_contents, RW_WRITER);
1803 1803 if (snapp->mnts_nmnts == 0 ||
1804 1804 (snapp->mnts_flags & MNTS_REWIND))
1805 1805 mntfs_snapshot(mnp, snapp);
1806 1806 if ((elemp = snapp->mnts_next) == NULL) {
1807 1807 rw_exit(&mnp->mnt_contents);
1808 1808 *rvalp = MNTFS_EOF;
1809 1809 break;
1810 1810 }
1811 1811
1812 1812 /*
1813 1813 * embuf is a struct embuf within the kernel. We copy into it
1814 1814 * the struct embuf supplied by the user.
1815 1815 */
1816 1816 STRUCT_INIT(embuf, datamodel);
1817 1817 if (copyin((void *) arg, STRUCT_BUF(embuf),
1818 1818 STRUCT_SIZE(embuf))) {
1819 1819 rw_exit(&mnp->mnt_contents);
1820 1820 error = EFAULT;
1821 1821 break;
1822 1822 }
1823 1823 uemp = STRUCT_FGETP(embuf, mbuf_emp);
1824 1824 ubufp = STRUCT_FGETP(embuf, mbuf_buf);
1825 1825 ubufsize = STRUCT_FGET(embuf, mbuf_bufsize);
1826 1826
1827 1827 /*
1828 1828 * Check that the text buffer offered by the user will be large
1829 1829 * enough to accommodate the text for this entry.
1830 1830 */
1831 1831 if (elemp->mnte_text_size > ubufsize) {
1832 1832 rw_exit(&mnp->mnt_contents);
1833 1833 *rvalp = MNTFS_TOOLONG;
1834 1834 break;
1835 1835 }
1836 1836
1837 1837 /*
1838 1838 * Populate the user's struct mnttab and text buffer using the
1839 1839 * element's contents.
1840 1840 */
1841 1841 if (mntfs_copyout_elem(elemp, uemp, ubufp, cmd, datamodel)) {
1842 1842 error = EFAULT;
1843 1843 } else {
1844 1844 rw_enter(dblockp, RW_READER);
1845 1845 elemp = mntfs_get_next_elem(snapp, elemp);
1846 1846 rw_exit(dblockp);
1847 1847 snapp->mnts_next = elemp;
1848 1848 }
1849 1849 rw_exit(&mnp->mnt_contents);
1850 1850 break;
1851 1851 }
1852 1852
1853 1853 default:
1854 1854 error = EINVAL;
1855 1855 break;
1856 1856 }
1857 1857
1858 1858 return (error);
1859 1859 }
1860 1860
1861 1861 /*
1862 1862 * mntfs provides a new vnode for each open(2). Two vnodes will represent the
1863 1863 * same instance of /etc/mnttab if they share the same (zone-specific) vfs.
1864 1864 */
1865 1865 /* ARGSUSED */
↓ open down ↓ |
1865 lines elided |
↑ open up ↑ |
1866 1866 int
1867 1867 mntcmp(vnode_t *vp1, vnode_t *vp2, caller_context_t *ct)
1868 1868 {
1869 1869 return (vp1 != NULL && vp2 != NULL && vp1->v_vfsp == vp2->v_vfsp);
1870 1870 }
1871 1871
1872 1872 /*
1873 1873 * /mntfs vnode operations vector
1874 1874 */
1875 1875 const fs_operation_def_t mnt_vnodeops_template[] = {
1876 - VOPNAME_OPEN, { .vop_open = mntopen },
1877 - VOPNAME_CLOSE, { .vop_close = mntclose },
1878 - VOPNAME_READ, { .vop_read = mntread },
1879 - VOPNAME_IOCTL, { .vop_ioctl = mntioctl },
1880 - VOPNAME_GETATTR, { .vop_getattr = mntgetattr },
1881 - VOPNAME_ACCESS, { .vop_access = mntaccess },
1882 - VOPNAME_FSYNC, { .vop_fsync = mntfsync },
1883 - VOPNAME_INACTIVE, { .vop_inactive = mntinactive },
1884 - VOPNAME_SEEK, { .vop_seek = mntseek },
1885 - VOPNAME_POLL, { .vop_poll = mntpoll },
1886 - VOPNAME_CMP, { .vop_cmp = mntcmp },
1887 - VOPNAME_DISPOSE, { .error = fs_error },
1888 - VOPNAME_SHRLOCK, { .error = fs_error },
1889 - NULL, NULL
1876 + { VOPNAME_OPEN, { .vop_open = mntopen } },
1877 + { VOPNAME_CLOSE, { .vop_close = mntclose } },
1878 + { VOPNAME_READ, { .vop_read = mntread } },
1879 + { VOPNAME_IOCTL, { .vop_ioctl = mntioctl } },
1880 + { VOPNAME_GETATTR, { .vop_getattr = mntgetattr } },
1881 + { VOPNAME_ACCESS, { .vop_access = mntaccess } },
1882 + { VOPNAME_FSYNC, { .vop_fsync = mntfsync } },
1883 + { VOPNAME_INACTIVE, { .vop_inactive = mntinactive } },
1884 + { VOPNAME_SEEK, { .vop_seek = mntseek } },
1885 + { VOPNAME_POLL, { .vop_poll = mntpoll } },
1886 + { VOPNAME_CMP, { .vop_cmp = mntcmp } },
1887 + { VOPNAME_DISPOSE, { .error = fs_error } },
1888 + { VOPNAME_SHRLOCK, { .error = fs_error } },
1889 + { NULL, { NULL } }
1890 1890 };
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX