Print this page
basic fsh prototype (no comments yet)
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/uts/common/fs/vnode.c
+++ new/usr/src/uts/common/fs/vnode.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21
22 22 /*
23 23 * Copyright (c) 1988, 2010, Oracle and/or its affiliates. All rights reserved.
24 24 */
25 25
26 26 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
27 27 /* All Rights Reserved */
28 28
29 29 /*
30 30 * University Copyright- Copyright (c) 1982, 1986, 1988
31 31 * The Regents of the University of California
32 32 * All Rights Reserved
33 33 *
34 34 * University Acknowledgment- Portions of this document are derived from
35 35 * software developed by the University of California, Berkeley, and its
36 36 * contributors.
37 37 */
38 38
39 39 #include <sys/types.h>
40 40 #include <sys/param.h>
41 41 #include <sys/t_lock.h>
42 42 #include <sys/errno.h>
43 43 #include <sys/cred.h>
44 44 #include <sys/user.h>
45 45 #include <sys/uio.h>
46 46 #include <sys/file.h>
47 47 #include <sys/pathname.h>
48 48 #include <sys/vfs.h>
49 49 #include <sys/vfs_opreg.h>
50 50 #include <sys/vnode.h>
51 51 #include <sys/rwstlock.h>
52 52 #include <sys/fem.h>
53 53 #include <sys/stat.h>
54 54 #include <sys/mode.h>
55 55 #include <sys/conf.h>
56 56 #include <sys/sysmacros.h>
57 57 #include <sys/cmn_err.h>
↓ open down ↓ |
57 lines elided |
↑ open up ↑ |
58 58 #include <sys/systm.h>
59 59 #include <sys/kmem.h>
60 60 #include <sys/debug.h>
61 61 #include <c2/audit.h>
62 62 #include <sys/acl.h>
63 63 #include <sys/nbmlock.h>
64 64 #include <sys/fcntl.h>
65 65 #include <fs/fs_subr.h>
66 66 #include <sys/taskq.h>
67 67 #include <fs/fs_reparse.h>
68 +#include <sys/fsh_impl.h>
68 69
69 70 /* Determine if this vnode is a file that is read-only */
70 71 #define ISROFILE(vp) \
71 72 ((vp)->v_type != VCHR && (vp)->v_type != VBLK && \
72 73 (vp)->v_type != VFIFO && vn_is_readonly(vp))
73 74
74 75 /* Tunable via /etc/system; used only by admin/install */
75 76 int nfs_global_client_only;
76 77
77 78 /*
78 79 * Array of vopstats_t for per-FS-type vopstats. This array has the same
79 80 * number of entries as and parallel to the vfssw table. (Arguably, it could
80 81 * be part of the vfssw table.) Once it's initialized, it's accessed using
81 82 * the same fstype index that is used to index into the vfssw table.
82 83 */
83 84 vopstats_t **vopstats_fstype;
84 85
85 86 /* vopstats initialization template used for fast initialization via bcopy() */
86 87 static vopstats_t *vs_templatep;
87 88
88 89 /* Kmem cache handle for vsk_anchor_t allocations */
89 90 kmem_cache_t *vsk_anchor_cache;
90 91
91 92 /* file events cleanup routine */
92 93 extern void free_fopdata(vnode_t *);
93 94
94 95 /*
95 96 * Root of AVL tree for the kstats associated with vopstats. Lock protects
96 97 * updates to vsktat_tree.
97 98 */
98 99 avl_tree_t vskstat_tree;
99 100 kmutex_t vskstat_tree_lock;
100 101
101 102 /* Global variable which enables/disables the vopstats collection */
102 103 int vopstats_enabled = 1;
103 104
104 105 /*
105 106 * forward declarations for internal vnode specific data (vsd)
106 107 */
107 108 static void *vsd_realloc(void *, size_t, size_t);
108 109
109 110 /*
110 111 * forward declarations for reparse point functions
111 112 */
112 113 static int fs_reparse_mark(char *target, vattr_t *vap, xvattr_t *xvattr);
113 114
114 115 /*
115 116 * VSD -- VNODE SPECIFIC DATA
116 117 * The v_data pointer is typically used by a file system to store a
117 118 * pointer to the file system's private node (e.g. ufs inode, nfs rnode).
118 119 * However, there are times when additional project private data needs
119 120 * to be stored separately from the data (node) pointed to by v_data.
120 121 * This additional data could be stored by the file system itself or
121 122 * by a completely different kernel entity. VSD provides a way for
122 123 * callers to obtain a key and store a pointer to private data associated
123 124 * with a vnode.
124 125 *
125 126 * Callers are responsible for protecting the vsd by holding v_vsd_lock
126 127 * for calls to vsd_set() and vsd_get().
127 128 */
128 129
129 130 /*
130 131 * vsd_lock protects:
131 132 * vsd_nkeys - creation and deletion of vsd keys
132 133 * vsd_list - insertion and deletion of vsd_node in the vsd_list
133 134 * vsd_destructor - adding and removing destructors to the list
134 135 */
135 136 static kmutex_t vsd_lock;
136 137 static uint_t vsd_nkeys; /* size of destructor array */
137 138 /* list of vsd_node's */
138 139 static list_t *vsd_list = NULL;
139 140 /* per-key destructor funcs */
140 141 static void (**vsd_destructor)(void *);
141 142
142 143 /*
143 144 * The following is the common set of actions needed to update the
144 145 * vopstats structure from a vnode op. Both VOPSTATS_UPDATE() and
145 146 * VOPSTATS_UPDATE_IO() do almost the same thing, except for the
146 147 * recording of the bytes transferred. Since the code is similar
147 148 * but small, it is nearly a duplicate. Consequently any changes
148 149 * to one may need to be reflected in the other.
149 150 * Rundown of the variables:
150 151 * vp - Pointer to the vnode
151 152 * counter - Partial name structure member to update in vopstats for counts
152 153 * bytecounter - Partial name structure member to update in vopstats for bytes
153 154 * bytesval - Value to update in vopstats for bytes
154 155 * fstype - Index into vsanchor_fstype[], same as index into vfssw[]
155 156 * vsp - Pointer to vopstats structure (either in vfs or vsanchor_fstype[i])
156 157 */
157 158
158 159 #define VOPSTATS_UPDATE(vp, counter) { \
159 160 vfs_t *vfsp = (vp)->v_vfsp; \
160 161 if (vfsp && vfsp->vfs_implp && \
161 162 (vfsp->vfs_flag & VFS_STATS) && (vp)->v_type != VBAD) { \
162 163 vopstats_t *vsp = &vfsp->vfs_vopstats; \
163 164 uint64_t *stataddr = &(vsp->n##counter.value.ui64); \
164 165 extern void __dtrace_probe___fsinfo_##counter(vnode_t *, \
165 166 size_t, uint64_t *); \
166 167 __dtrace_probe___fsinfo_##counter(vp, 0, stataddr); \
167 168 (*stataddr)++; \
168 169 if ((vsp = vfsp->vfs_fstypevsp) != NULL) { \
169 170 vsp->n##counter.value.ui64++; \
170 171 } \
171 172 } \
172 173 }
173 174
174 175 #define VOPSTATS_UPDATE_IO(vp, counter, bytecounter, bytesval) { \
175 176 vfs_t *vfsp = (vp)->v_vfsp; \
176 177 if (vfsp && vfsp->vfs_implp && \
177 178 (vfsp->vfs_flag & VFS_STATS) && (vp)->v_type != VBAD) { \
178 179 vopstats_t *vsp = &vfsp->vfs_vopstats; \
179 180 uint64_t *stataddr = &(vsp->n##counter.value.ui64); \
180 181 extern void __dtrace_probe___fsinfo_##counter(vnode_t *, \
181 182 size_t, uint64_t *); \
182 183 __dtrace_probe___fsinfo_##counter(vp, bytesval, stataddr); \
183 184 (*stataddr)++; \
184 185 vsp->bytecounter.value.ui64 += bytesval; \
185 186 if ((vsp = vfsp->vfs_fstypevsp) != NULL) { \
186 187 vsp->n##counter.value.ui64++; \
187 188 vsp->bytecounter.value.ui64 += bytesval; \
188 189 } \
189 190 } \
190 191 }
191 192
192 193 /*
193 194 * If the filesystem does not support XIDs map credential
194 195 * If the vfsp is NULL, perhaps we should also map?
195 196 */
196 197 #define VOPXID_MAP_CR(vp, cr) { \
197 198 vfs_t *vfsp = (vp)->v_vfsp; \
198 199 if (vfsp != NULL && (vfsp->vfs_flag & VFS_XID) == 0) \
199 200 cr = crgetmapped(cr); \
200 201 }
201 202
202 203 /*
203 204 * Convert stat(2) formats to vnode types and vice versa. (Knows about
204 205 * numerical order of S_IFMT and vnode types.)
205 206 */
206 207 enum vtype iftovt_tab[] = {
207 208 VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON,
208 209 VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VNON
209 210 };
210 211
211 212 ushort_t vttoif_tab[] = {
212 213 0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK, S_IFIFO,
213 214 S_IFDOOR, 0, S_IFSOCK, S_IFPORT, 0
214 215 };
215 216
216 217 /*
217 218 * The system vnode cache.
218 219 */
219 220
220 221 kmem_cache_t *vn_cache;
221 222
222 223
223 224 /*
224 225 * Vnode operations vector.
225 226 */
226 227
227 228 static const fs_operation_trans_def_t vn_ops_table[] = {
228 229 VOPNAME_OPEN, offsetof(struct vnodeops, vop_open),
229 230 fs_nosys, fs_nosys,
230 231
231 232 VOPNAME_CLOSE, offsetof(struct vnodeops, vop_close),
232 233 fs_nosys, fs_nosys,
233 234
234 235 VOPNAME_READ, offsetof(struct vnodeops, vop_read),
235 236 fs_nosys, fs_nosys,
236 237
237 238 VOPNAME_WRITE, offsetof(struct vnodeops, vop_write),
238 239 fs_nosys, fs_nosys,
239 240
240 241 VOPNAME_IOCTL, offsetof(struct vnodeops, vop_ioctl),
241 242 fs_nosys, fs_nosys,
242 243
243 244 VOPNAME_SETFL, offsetof(struct vnodeops, vop_setfl),
244 245 fs_setfl, fs_nosys,
245 246
246 247 VOPNAME_GETATTR, offsetof(struct vnodeops, vop_getattr),
247 248 fs_nosys, fs_nosys,
248 249
249 250 VOPNAME_SETATTR, offsetof(struct vnodeops, vop_setattr),
250 251 fs_nosys, fs_nosys,
251 252
252 253 VOPNAME_ACCESS, offsetof(struct vnodeops, vop_access),
253 254 fs_nosys, fs_nosys,
254 255
255 256 VOPNAME_LOOKUP, offsetof(struct vnodeops, vop_lookup),
256 257 fs_nosys, fs_nosys,
257 258
258 259 VOPNAME_CREATE, offsetof(struct vnodeops, vop_create),
259 260 fs_nosys, fs_nosys,
260 261
261 262 VOPNAME_REMOVE, offsetof(struct vnodeops, vop_remove),
262 263 fs_nosys, fs_nosys,
263 264
264 265 VOPNAME_LINK, offsetof(struct vnodeops, vop_link),
265 266 fs_nosys, fs_nosys,
266 267
267 268 VOPNAME_RENAME, offsetof(struct vnodeops, vop_rename),
268 269 fs_nosys, fs_nosys,
269 270
270 271 VOPNAME_MKDIR, offsetof(struct vnodeops, vop_mkdir),
271 272 fs_nosys, fs_nosys,
272 273
273 274 VOPNAME_RMDIR, offsetof(struct vnodeops, vop_rmdir),
274 275 fs_nosys, fs_nosys,
275 276
276 277 VOPNAME_READDIR, offsetof(struct vnodeops, vop_readdir),
277 278 fs_nosys, fs_nosys,
278 279
279 280 VOPNAME_SYMLINK, offsetof(struct vnodeops, vop_symlink),
280 281 fs_nosys, fs_nosys,
281 282
282 283 VOPNAME_READLINK, offsetof(struct vnodeops, vop_readlink),
283 284 fs_nosys, fs_nosys,
284 285
285 286 VOPNAME_FSYNC, offsetof(struct vnodeops, vop_fsync),
286 287 fs_nosys, fs_nosys,
287 288
288 289 VOPNAME_INACTIVE, offsetof(struct vnodeops, vop_inactive),
289 290 fs_nosys, fs_nosys,
290 291
291 292 VOPNAME_FID, offsetof(struct vnodeops, vop_fid),
292 293 fs_nosys, fs_nosys,
293 294
294 295 VOPNAME_RWLOCK, offsetof(struct vnodeops, vop_rwlock),
295 296 fs_rwlock, fs_rwlock,
296 297
297 298 VOPNAME_RWUNLOCK, offsetof(struct vnodeops, vop_rwunlock),
298 299 (fs_generic_func_p) fs_rwunlock,
299 300 (fs_generic_func_p) fs_rwunlock, /* no errors allowed */
300 301
301 302 VOPNAME_SEEK, offsetof(struct vnodeops, vop_seek),
302 303 fs_nosys, fs_nosys,
303 304
304 305 VOPNAME_CMP, offsetof(struct vnodeops, vop_cmp),
305 306 fs_cmp, fs_cmp, /* no errors allowed */
306 307
307 308 VOPNAME_FRLOCK, offsetof(struct vnodeops, vop_frlock),
308 309 fs_frlock, fs_nosys,
309 310
310 311 VOPNAME_SPACE, offsetof(struct vnodeops, vop_space),
311 312 fs_nosys, fs_nosys,
312 313
313 314 VOPNAME_REALVP, offsetof(struct vnodeops, vop_realvp),
314 315 fs_nosys, fs_nosys,
315 316
316 317 VOPNAME_GETPAGE, offsetof(struct vnodeops, vop_getpage),
317 318 fs_nosys, fs_nosys,
318 319
319 320 VOPNAME_PUTPAGE, offsetof(struct vnodeops, vop_putpage),
320 321 fs_nosys, fs_nosys,
321 322
322 323 VOPNAME_MAP, offsetof(struct vnodeops, vop_map),
323 324 (fs_generic_func_p) fs_nosys_map,
324 325 (fs_generic_func_p) fs_nosys_map,
325 326
326 327 VOPNAME_ADDMAP, offsetof(struct vnodeops, vop_addmap),
327 328 (fs_generic_func_p) fs_nosys_addmap,
328 329 (fs_generic_func_p) fs_nosys_addmap,
329 330
330 331 VOPNAME_DELMAP, offsetof(struct vnodeops, vop_delmap),
331 332 fs_nosys, fs_nosys,
332 333
333 334 VOPNAME_POLL, offsetof(struct vnodeops, vop_poll),
334 335 (fs_generic_func_p) fs_poll, (fs_generic_func_p) fs_nosys_poll,
335 336
336 337 VOPNAME_DUMP, offsetof(struct vnodeops, vop_dump),
337 338 fs_nosys, fs_nosys,
338 339
339 340 VOPNAME_PATHCONF, offsetof(struct vnodeops, vop_pathconf),
340 341 fs_pathconf, fs_nosys,
341 342
342 343 VOPNAME_PAGEIO, offsetof(struct vnodeops, vop_pageio),
343 344 fs_nosys, fs_nosys,
344 345
345 346 VOPNAME_DUMPCTL, offsetof(struct vnodeops, vop_dumpctl),
346 347 fs_nosys, fs_nosys,
347 348
348 349 VOPNAME_DISPOSE, offsetof(struct vnodeops, vop_dispose),
349 350 (fs_generic_func_p) fs_dispose,
350 351 (fs_generic_func_p) fs_nodispose,
351 352
352 353 VOPNAME_SETSECATTR, offsetof(struct vnodeops, vop_setsecattr),
353 354 fs_nosys, fs_nosys,
354 355
355 356 VOPNAME_GETSECATTR, offsetof(struct vnodeops, vop_getsecattr),
356 357 fs_fab_acl, fs_nosys,
357 358
358 359 VOPNAME_SHRLOCK, offsetof(struct vnodeops, vop_shrlock),
359 360 fs_shrlock, fs_nosys,
360 361
361 362 VOPNAME_VNEVENT, offsetof(struct vnodeops, vop_vnevent),
362 363 (fs_generic_func_p) fs_vnevent_nosupport,
363 364 (fs_generic_func_p) fs_vnevent_nosupport,
364 365
365 366 VOPNAME_REQZCBUF, offsetof(struct vnodeops, vop_reqzcbuf),
366 367 fs_nosys, fs_nosys,
367 368
368 369 VOPNAME_RETZCBUF, offsetof(struct vnodeops, vop_retzcbuf),
369 370 fs_nosys, fs_nosys,
370 371
371 372 NULL, 0, NULL, NULL
372 373 };
373 374
374 375 /* Extensible attribute (xva) routines. */
375 376
376 377 /*
377 378 * Zero out the structure, set the size of the requested/returned bitmaps,
378 379 * set AT_XVATTR in the embedded vattr_t's va_mask, and set up the pointer
379 380 * to the returned attributes array.
380 381 */
381 382 void
382 383 xva_init(xvattr_t *xvap)
383 384 {
384 385 bzero(xvap, sizeof (xvattr_t));
385 386 xvap->xva_mapsize = XVA_MAPSIZE;
386 387 xvap->xva_magic = XVA_MAGIC;
387 388 xvap->xva_vattr.va_mask = AT_XVATTR;
388 389 xvap->xva_rtnattrmapp = &(xvap->xva_rtnattrmap)[0];
389 390 }
390 391
391 392 /*
392 393 * If AT_XVATTR is set, returns a pointer to the embedded xoptattr_t
393 394 * structure. Otherwise, returns NULL.
394 395 */
395 396 xoptattr_t *
396 397 xva_getxoptattr(xvattr_t *xvap)
397 398 {
398 399 xoptattr_t *xoap = NULL;
399 400 if (xvap->xva_vattr.va_mask & AT_XVATTR)
400 401 xoap = &xvap->xva_xoptattrs;
401 402 return (xoap);
402 403 }
403 404
404 405 /*
405 406 * Used by the AVL routines to compare two vsk_anchor_t structures in the tree.
406 407 * We use the f_fsid reported by VFS_STATVFS() since we use that for the
407 408 * kstat name.
408 409 */
409 410 static int
410 411 vska_compar(const void *n1, const void *n2)
411 412 {
412 413 int ret;
413 414 ulong_t p1 = ((vsk_anchor_t *)n1)->vsk_fsid;
414 415 ulong_t p2 = ((vsk_anchor_t *)n2)->vsk_fsid;
415 416
416 417 if (p1 < p2) {
417 418 ret = -1;
418 419 } else if (p1 > p2) {
419 420 ret = 1;
420 421 } else {
421 422 ret = 0;
422 423 }
423 424
424 425 return (ret);
425 426 }
426 427
427 428 /*
428 429 * Used to create a single template which will be bcopy()ed to a newly
429 430 * allocated vsanchor_combo_t structure in new_vsanchor(), below.
430 431 */
431 432 static vopstats_t *
432 433 create_vopstats_template()
433 434 {
434 435 vopstats_t *vsp;
435 436
436 437 vsp = kmem_alloc(sizeof (vopstats_t), KM_SLEEP);
437 438 bzero(vsp, sizeof (*vsp)); /* Start fresh */
438 439
439 440 /* VOP_OPEN */
440 441 kstat_named_init(&vsp->nopen, "nopen", KSTAT_DATA_UINT64);
441 442 /* VOP_CLOSE */
442 443 kstat_named_init(&vsp->nclose, "nclose", KSTAT_DATA_UINT64);
443 444 /* VOP_READ I/O */
444 445 kstat_named_init(&vsp->nread, "nread", KSTAT_DATA_UINT64);
445 446 kstat_named_init(&vsp->read_bytes, "read_bytes", KSTAT_DATA_UINT64);
446 447 /* VOP_WRITE I/O */
447 448 kstat_named_init(&vsp->nwrite, "nwrite", KSTAT_DATA_UINT64);
448 449 kstat_named_init(&vsp->write_bytes, "write_bytes", KSTAT_DATA_UINT64);
449 450 /* VOP_IOCTL */
450 451 kstat_named_init(&vsp->nioctl, "nioctl", KSTAT_DATA_UINT64);
451 452 /* VOP_SETFL */
452 453 kstat_named_init(&vsp->nsetfl, "nsetfl", KSTAT_DATA_UINT64);
453 454 /* VOP_GETATTR */
454 455 kstat_named_init(&vsp->ngetattr, "ngetattr", KSTAT_DATA_UINT64);
455 456 /* VOP_SETATTR */
456 457 kstat_named_init(&vsp->nsetattr, "nsetattr", KSTAT_DATA_UINT64);
457 458 /* VOP_ACCESS */
458 459 kstat_named_init(&vsp->naccess, "naccess", KSTAT_DATA_UINT64);
459 460 /* VOP_LOOKUP */
460 461 kstat_named_init(&vsp->nlookup, "nlookup", KSTAT_DATA_UINT64);
461 462 /* VOP_CREATE */
462 463 kstat_named_init(&vsp->ncreate, "ncreate", KSTAT_DATA_UINT64);
463 464 /* VOP_REMOVE */
464 465 kstat_named_init(&vsp->nremove, "nremove", KSTAT_DATA_UINT64);
465 466 /* VOP_LINK */
466 467 kstat_named_init(&vsp->nlink, "nlink", KSTAT_DATA_UINT64);
467 468 /* VOP_RENAME */
468 469 kstat_named_init(&vsp->nrename, "nrename", KSTAT_DATA_UINT64);
469 470 /* VOP_MKDIR */
470 471 kstat_named_init(&vsp->nmkdir, "nmkdir", KSTAT_DATA_UINT64);
471 472 /* VOP_RMDIR */
472 473 kstat_named_init(&vsp->nrmdir, "nrmdir", KSTAT_DATA_UINT64);
473 474 /* VOP_READDIR I/O */
474 475 kstat_named_init(&vsp->nreaddir, "nreaddir", KSTAT_DATA_UINT64);
475 476 kstat_named_init(&vsp->readdir_bytes, "readdir_bytes",
476 477 KSTAT_DATA_UINT64);
477 478 /* VOP_SYMLINK */
478 479 kstat_named_init(&vsp->nsymlink, "nsymlink", KSTAT_DATA_UINT64);
479 480 /* VOP_READLINK */
480 481 kstat_named_init(&vsp->nreadlink, "nreadlink", KSTAT_DATA_UINT64);
481 482 /* VOP_FSYNC */
482 483 kstat_named_init(&vsp->nfsync, "nfsync", KSTAT_DATA_UINT64);
483 484 /* VOP_INACTIVE */
484 485 kstat_named_init(&vsp->ninactive, "ninactive", KSTAT_DATA_UINT64);
485 486 /* VOP_FID */
486 487 kstat_named_init(&vsp->nfid, "nfid", KSTAT_DATA_UINT64);
487 488 /* VOP_RWLOCK */
488 489 kstat_named_init(&vsp->nrwlock, "nrwlock", KSTAT_DATA_UINT64);
489 490 /* VOP_RWUNLOCK */
490 491 kstat_named_init(&vsp->nrwunlock, "nrwunlock", KSTAT_DATA_UINT64);
491 492 /* VOP_SEEK */
492 493 kstat_named_init(&vsp->nseek, "nseek", KSTAT_DATA_UINT64);
493 494 /* VOP_CMP */
494 495 kstat_named_init(&vsp->ncmp, "ncmp", KSTAT_DATA_UINT64);
495 496 /* VOP_FRLOCK */
496 497 kstat_named_init(&vsp->nfrlock, "nfrlock", KSTAT_DATA_UINT64);
497 498 /* VOP_SPACE */
498 499 kstat_named_init(&vsp->nspace, "nspace", KSTAT_DATA_UINT64);
499 500 /* VOP_REALVP */
500 501 kstat_named_init(&vsp->nrealvp, "nrealvp", KSTAT_DATA_UINT64);
501 502 /* VOP_GETPAGE */
502 503 kstat_named_init(&vsp->ngetpage, "ngetpage", KSTAT_DATA_UINT64);
503 504 /* VOP_PUTPAGE */
504 505 kstat_named_init(&vsp->nputpage, "nputpage", KSTAT_DATA_UINT64);
505 506 /* VOP_MAP */
506 507 kstat_named_init(&vsp->nmap, "nmap", KSTAT_DATA_UINT64);
507 508 /* VOP_ADDMAP */
508 509 kstat_named_init(&vsp->naddmap, "naddmap", KSTAT_DATA_UINT64);
509 510 /* VOP_DELMAP */
510 511 kstat_named_init(&vsp->ndelmap, "ndelmap", KSTAT_DATA_UINT64);
511 512 /* VOP_POLL */
512 513 kstat_named_init(&vsp->npoll, "npoll", KSTAT_DATA_UINT64);
513 514 /* VOP_DUMP */
514 515 kstat_named_init(&vsp->ndump, "ndump", KSTAT_DATA_UINT64);
515 516 /* VOP_PATHCONF */
516 517 kstat_named_init(&vsp->npathconf, "npathconf", KSTAT_DATA_UINT64);
517 518 /* VOP_PAGEIO */
518 519 kstat_named_init(&vsp->npageio, "npageio", KSTAT_DATA_UINT64);
519 520 /* VOP_DUMPCTL */
520 521 kstat_named_init(&vsp->ndumpctl, "ndumpctl", KSTAT_DATA_UINT64);
521 522 /* VOP_DISPOSE */
522 523 kstat_named_init(&vsp->ndispose, "ndispose", KSTAT_DATA_UINT64);
523 524 /* VOP_SETSECATTR */
524 525 kstat_named_init(&vsp->nsetsecattr, "nsetsecattr", KSTAT_DATA_UINT64);
525 526 /* VOP_GETSECATTR */
526 527 kstat_named_init(&vsp->ngetsecattr, "ngetsecattr", KSTAT_DATA_UINT64);
527 528 /* VOP_SHRLOCK */
528 529 kstat_named_init(&vsp->nshrlock, "nshrlock", KSTAT_DATA_UINT64);
529 530 /* VOP_VNEVENT */
530 531 kstat_named_init(&vsp->nvnevent, "nvnevent", KSTAT_DATA_UINT64);
531 532 /* VOP_REQZCBUF */
532 533 kstat_named_init(&vsp->nreqzcbuf, "nreqzcbuf", KSTAT_DATA_UINT64);
533 534 /* VOP_RETZCBUF */
534 535 kstat_named_init(&vsp->nretzcbuf, "nretzcbuf", KSTAT_DATA_UINT64);
535 536
536 537 return (vsp);
537 538 }
538 539
539 540 /*
540 541 * Creates a kstat structure associated with a vopstats structure.
541 542 */
542 543 kstat_t *
543 544 new_vskstat(char *ksname, vopstats_t *vsp)
544 545 {
545 546 kstat_t *ksp;
546 547
547 548 if (!vopstats_enabled) {
548 549 return (NULL);
549 550 }
550 551
551 552 ksp = kstat_create("unix", 0, ksname, "misc", KSTAT_TYPE_NAMED,
552 553 sizeof (vopstats_t)/sizeof (kstat_named_t),
553 554 KSTAT_FLAG_VIRTUAL|KSTAT_FLAG_WRITABLE);
554 555 if (ksp) {
555 556 ksp->ks_data = vsp;
556 557 kstat_install(ksp);
557 558 }
558 559
559 560 return (ksp);
560 561 }
561 562
562 563 /*
563 564 * Called from vfsinit() to initialize the support mechanisms for vopstats
564 565 */
565 566 void
566 567 vopstats_startup()
567 568 {
568 569 if (!vopstats_enabled)
569 570 return;
570 571
571 572 /*
572 573 * Creates the AVL tree which holds per-vfs vopstat anchors. This
573 574 * is necessary since we need to check if a kstat exists before we
574 575 * attempt to create it. Also, initialize its lock.
575 576 */
576 577 avl_create(&vskstat_tree, vska_compar, sizeof (vsk_anchor_t),
577 578 offsetof(vsk_anchor_t, vsk_node));
578 579 mutex_init(&vskstat_tree_lock, NULL, MUTEX_DEFAULT, NULL);
579 580
580 581 vsk_anchor_cache = kmem_cache_create("vsk_anchor_cache",
581 582 sizeof (vsk_anchor_t), sizeof (uintptr_t), NULL, NULL, NULL,
582 583 NULL, NULL, 0);
583 584
584 585 /*
585 586 * Set up the array of pointers for the vopstats-by-FS-type.
586 587 * The entries will be allocated/initialized as each file system
587 588 * goes through modload/mod_installfs.
588 589 */
589 590 vopstats_fstype = (vopstats_t **)kmem_zalloc(
590 591 (sizeof (vopstats_t *) * nfstype), KM_SLEEP);
591 592
592 593 /* Set up the global vopstats initialization template */
593 594 vs_templatep = create_vopstats_template();
594 595 }
595 596
596 597 /*
597 598 * We need to have the all of the counters zeroed.
598 599 * The initialization of the vopstats_t includes on the order of
599 600 * 50 calls to kstat_named_init(). Rather that do that on every call,
600 601 * we do it once in a template (vs_templatep) then bcopy it over.
601 602 */
602 603 void
603 604 initialize_vopstats(vopstats_t *vsp)
604 605 {
605 606 if (vsp == NULL)
606 607 return;
607 608
608 609 bcopy(vs_templatep, vsp, sizeof (vopstats_t));
609 610 }
610 611
611 612 /*
612 613 * If possible, determine which vopstats by fstype to use and
613 614 * return a pointer to the caller.
614 615 */
615 616 vopstats_t *
616 617 get_fstype_vopstats(vfs_t *vfsp, struct vfssw *vswp)
617 618 {
618 619 int fstype = 0; /* Index into vfssw[] */
619 620 vopstats_t *vsp = NULL;
620 621
621 622 if (vfsp == NULL || (vfsp->vfs_flag & VFS_STATS) == 0 ||
622 623 !vopstats_enabled)
623 624 return (NULL);
624 625 /*
625 626 * Set up the fstype. We go to so much trouble because all versions
626 627 * of NFS use the same fstype in their vfs even though they have
627 628 * distinct entries in the vfssw[] table.
628 629 * NOTE: A special vfs (e.g., EIO_vfs) may not have an entry.
629 630 */
630 631 if (vswp) {
631 632 fstype = vswp - vfssw; /* Gets us the index */
632 633 } else {
633 634 fstype = vfsp->vfs_fstype;
634 635 }
635 636
636 637 /*
637 638 * Point to the per-fstype vopstats. The only valid values are
638 639 * non-zero positive values less than the number of vfssw[] table
639 640 * entries.
640 641 */
641 642 if (fstype > 0 && fstype < nfstype) {
642 643 vsp = vopstats_fstype[fstype];
643 644 }
644 645
645 646 return (vsp);
646 647 }
647 648
648 649 /*
649 650 * Generate a kstat name, create the kstat structure, and allocate a
650 651 * vsk_anchor_t to hold it together. Return the pointer to the vsk_anchor_t
651 652 * to the caller. This must only be called from a mount.
652 653 */
653 654 vsk_anchor_t *
654 655 get_vskstat_anchor(vfs_t *vfsp)
655 656 {
656 657 char kstatstr[KSTAT_STRLEN]; /* kstat name for vopstats */
657 658 statvfs64_t statvfsbuf; /* Needed to find f_fsid */
658 659 vsk_anchor_t *vskp = NULL; /* vfs <--> kstat anchor */
659 660 kstat_t *ksp; /* Ptr to new kstat */
660 661 avl_index_t where; /* Location in the AVL tree */
661 662
662 663 if (vfsp == NULL || vfsp->vfs_implp == NULL ||
663 664 (vfsp->vfs_flag & VFS_STATS) == 0 || !vopstats_enabled)
664 665 return (NULL);
665 666
666 667 /* Need to get the fsid to build a kstat name */
667 668 if (VFS_STATVFS(vfsp, &statvfsbuf) == 0) {
668 669 /* Create a name for our kstats based on fsid */
669 670 (void) snprintf(kstatstr, KSTAT_STRLEN, "%s%lx",
670 671 VOPSTATS_STR, statvfsbuf.f_fsid);
671 672
672 673 /* Allocate and initialize the vsk_anchor_t */
673 674 vskp = kmem_cache_alloc(vsk_anchor_cache, KM_SLEEP);
674 675 bzero(vskp, sizeof (*vskp));
675 676 vskp->vsk_fsid = statvfsbuf.f_fsid;
676 677
677 678 mutex_enter(&vskstat_tree_lock);
678 679 if (avl_find(&vskstat_tree, vskp, &where) == NULL) {
679 680 avl_insert(&vskstat_tree, vskp, where);
680 681 mutex_exit(&vskstat_tree_lock);
681 682
682 683 /*
683 684 * Now that we've got the anchor in the AVL
684 685 * tree, we can create the kstat.
685 686 */
686 687 ksp = new_vskstat(kstatstr, &vfsp->vfs_vopstats);
687 688 if (ksp) {
688 689 vskp->vsk_ksp = ksp;
689 690 }
690 691 } else {
691 692 /* Oops, found one! Release memory and lock. */
692 693 mutex_exit(&vskstat_tree_lock);
693 694 kmem_cache_free(vsk_anchor_cache, vskp);
694 695 vskp = NULL;
695 696 }
696 697 }
697 698 return (vskp);
698 699 }
699 700
700 701 /*
701 702 * We're in the process of tearing down the vfs and need to cleanup
702 703 * the data structures associated with the vopstats. Must only be called
703 704 * from dounmount().
704 705 */
705 706 void
706 707 teardown_vopstats(vfs_t *vfsp)
707 708 {
708 709 vsk_anchor_t *vskap;
709 710 avl_index_t where;
710 711
711 712 if (vfsp == NULL || vfsp->vfs_implp == NULL ||
712 713 (vfsp->vfs_flag & VFS_STATS) == 0 || !vopstats_enabled)
713 714 return;
714 715
715 716 /* This is a safe check since VFS_STATS must be set (see above) */
716 717 if ((vskap = vfsp->vfs_vskap) == NULL)
717 718 return;
718 719
719 720 /* Whack the pointer right away */
720 721 vfsp->vfs_vskap = NULL;
721 722
722 723 /* Lock the tree, remove the node, and delete the kstat */
723 724 mutex_enter(&vskstat_tree_lock);
724 725 if (avl_find(&vskstat_tree, vskap, &where)) {
725 726 avl_remove(&vskstat_tree, vskap);
726 727 }
727 728
728 729 if (vskap->vsk_ksp) {
729 730 kstat_delete(vskap->vsk_ksp);
730 731 }
731 732 mutex_exit(&vskstat_tree_lock);
732 733
733 734 kmem_cache_free(vsk_anchor_cache, vskap);
734 735 }
735 736
736 737 /*
737 738 * Read or write a vnode. Called from kernel code.
738 739 */
739 740 int
740 741 vn_rdwr(
741 742 enum uio_rw rw,
742 743 struct vnode *vp,
743 744 caddr_t base,
744 745 ssize_t len,
745 746 offset_t offset,
746 747 enum uio_seg seg,
747 748 int ioflag,
748 749 rlim64_t ulimit, /* meaningful only if rw is UIO_WRITE */
749 750 cred_t *cr,
750 751 ssize_t *residp)
751 752 {
752 753 struct uio uio;
753 754 struct iovec iov;
754 755 int error;
755 756 int in_crit = 0;
756 757
757 758 if (rw == UIO_WRITE && ISROFILE(vp))
758 759 return (EROFS);
759 760
760 761 if (len < 0)
761 762 return (EIO);
762 763
763 764 VOPXID_MAP_CR(vp, cr);
764 765
765 766 iov.iov_base = base;
766 767 iov.iov_len = len;
767 768 uio.uio_iov = &iov;
768 769 uio.uio_iovcnt = 1;
769 770 uio.uio_loffset = offset;
770 771 uio.uio_segflg = (short)seg;
771 772 uio.uio_resid = len;
772 773 uio.uio_llimit = ulimit;
773 774
774 775 /*
775 776 * We have to enter the critical region before calling VOP_RWLOCK
776 777 * to avoid a deadlock with ufs.
777 778 */
778 779 if (nbl_need_check(vp)) {
779 780 int svmand;
780 781
781 782 nbl_start_crit(vp, RW_READER);
782 783 in_crit = 1;
783 784 error = nbl_svmand(vp, cr, &svmand);
784 785 if (error != 0)
785 786 goto done;
786 787 if (nbl_conflict(vp, rw == UIO_WRITE ? NBL_WRITE : NBL_READ,
787 788 uio.uio_offset, uio.uio_resid, svmand, NULL)) {
788 789 error = EACCES;
789 790 goto done;
790 791 }
791 792 }
792 793
793 794 (void) VOP_RWLOCK(vp,
794 795 rw == UIO_WRITE ? V_WRITELOCK_TRUE : V_WRITELOCK_FALSE, NULL);
795 796 if (rw == UIO_WRITE) {
796 797 uio.uio_fmode = FWRITE;
797 798 uio.uio_extflg = UIO_COPY_DEFAULT;
798 799 error = VOP_WRITE(vp, &uio, ioflag, cr, NULL);
799 800 } else {
800 801 uio.uio_fmode = FREAD;
801 802 uio.uio_extflg = UIO_COPY_CACHED;
802 803 error = VOP_READ(vp, &uio, ioflag, cr, NULL);
803 804 }
804 805 VOP_RWUNLOCK(vp,
805 806 rw == UIO_WRITE ? V_WRITELOCK_TRUE : V_WRITELOCK_FALSE, NULL);
806 807 if (residp)
807 808 *residp = uio.uio_resid;
808 809 else if (uio.uio_resid)
809 810 error = EIO;
810 811
811 812 done:
812 813 if (in_crit)
813 814 nbl_end_crit(vp);
814 815 return (error);
815 816 }
816 817
817 818 /*
818 819 * Release a vnode. Call VOP_INACTIVE on last reference or
819 820 * decrement reference count.
820 821 *
821 822 * To avoid race conditions, the v_count is left at 1 for
822 823 * the call to VOP_INACTIVE. This prevents another thread
823 824 * from reclaiming and releasing the vnode *before* the
824 825 * VOP_INACTIVE routine has a chance to destroy the vnode.
825 826 * We can't have more than 1 thread calling VOP_INACTIVE
826 827 * on a vnode.
827 828 */
828 829 void
829 830 vn_rele(vnode_t *vp)
830 831 {
831 832 VERIFY(vp->v_count > 0);
832 833 mutex_enter(&vp->v_lock);
833 834 if (vp->v_count == 1) {
834 835 mutex_exit(&vp->v_lock);
835 836 VOP_INACTIVE(vp, CRED(), NULL);
836 837 return;
837 838 }
838 839 vp->v_count--;
839 840 mutex_exit(&vp->v_lock);
840 841 }
841 842
842 843 /*
843 844 * Release a vnode referenced by the DNLC. Multiple DNLC references are treated
844 845 * as a single reference, so v_count is not decremented until the last DNLC hold
845 846 * is released. This makes it possible to distinguish vnodes that are referenced
846 847 * only by the DNLC.
847 848 */
848 849 void
849 850 vn_rele_dnlc(vnode_t *vp)
850 851 {
851 852 VERIFY((vp->v_count > 0) && (vp->v_count_dnlc > 0));
852 853 mutex_enter(&vp->v_lock);
853 854 if (--vp->v_count_dnlc == 0) {
854 855 if (vp->v_count == 1) {
855 856 mutex_exit(&vp->v_lock);
856 857 VOP_INACTIVE(vp, CRED(), NULL);
857 858 return;
858 859 }
859 860 vp->v_count--;
860 861 }
861 862 mutex_exit(&vp->v_lock);
862 863 }
863 864
864 865 /*
865 866 * Like vn_rele() except that it clears v_stream under v_lock.
866 867 * This is used by sockfs when it dismantels the association between
867 868 * the sockfs node and the vnode in the underlaying file system.
868 869 * v_lock has to be held to prevent a thread coming through the lookupname
869 870 * path from accessing a stream head that is going away.
870 871 */
871 872 void
872 873 vn_rele_stream(vnode_t *vp)
873 874 {
874 875 VERIFY(vp->v_count > 0);
875 876 mutex_enter(&vp->v_lock);
876 877 vp->v_stream = NULL;
877 878 if (vp->v_count == 1) {
878 879 mutex_exit(&vp->v_lock);
879 880 VOP_INACTIVE(vp, CRED(), NULL);
880 881 return;
881 882 }
882 883 vp->v_count--;
883 884 mutex_exit(&vp->v_lock);
884 885 }
885 886
886 887 static void
887 888 vn_rele_inactive(vnode_t *vp)
888 889 {
889 890 VOP_INACTIVE(vp, CRED(), NULL);
890 891 }
891 892
892 893 /*
893 894 * Like vn_rele() except if we are going to call VOP_INACTIVE() then do it
894 895 * asynchronously using a taskq. This can avoid deadlocks caused by re-entering
895 896 * the file system as a result of releasing the vnode. Note, file systems
896 897 * already have to handle the race where the vnode is incremented before the
897 898 * inactive routine is called and does its locking.
898 899 *
899 900 * Warning: Excessive use of this routine can lead to performance problems.
900 901 * This is because taskqs throttle back allocation if too many are created.
901 902 */
902 903 void
903 904 vn_rele_async(vnode_t *vp, taskq_t *taskq)
904 905 {
905 906 VERIFY(vp->v_count > 0);
906 907 mutex_enter(&vp->v_lock);
907 908 if (vp->v_count == 1) {
908 909 mutex_exit(&vp->v_lock);
909 910 VERIFY(taskq_dispatch(taskq, (task_func_t *)vn_rele_inactive,
910 911 vp, TQ_SLEEP) != NULL);
911 912 return;
912 913 }
913 914 vp->v_count--;
914 915 mutex_exit(&vp->v_lock);
915 916 }
916 917
917 918 int
918 919 vn_open(
919 920 char *pnamep,
920 921 enum uio_seg seg,
921 922 int filemode,
922 923 int createmode,
923 924 struct vnode **vpp,
924 925 enum create crwhy,
925 926 mode_t umask)
926 927 {
927 928 return (vn_openat(pnamep, seg, filemode, createmode, vpp, crwhy,
928 929 umask, NULL, -1));
929 930 }
930 931
931 932
932 933 /*
933 934 * Open/create a vnode.
934 935 * This may be callable by the kernel, the only known use
935 936 * of user context being that the current user credentials
936 937 * are used for permissions. crwhy is defined iff filemode & FCREAT.
937 938 */
938 939 int
939 940 vn_openat(
940 941 char *pnamep,
941 942 enum uio_seg seg,
942 943 int filemode,
943 944 int createmode,
944 945 struct vnode **vpp,
945 946 enum create crwhy,
946 947 mode_t umask,
947 948 struct vnode *startvp,
948 949 int fd)
949 950 {
950 951 struct vnode *vp;
951 952 int mode;
952 953 int accessflags;
953 954 int error;
954 955 int in_crit = 0;
955 956 int open_done = 0;
956 957 int shrlock_done = 0;
957 958 struct vattr vattr;
958 959 enum symfollow follow;
959 960 int estale_retry = 0;
960 961 struct shrlock shr;
961 962 struct shr_locowner shr_own;
962 963
963 964 mode = 0;
964 965 accessflags = 0;
965 966 if (filemode & FREAD)
966 967 mode |= VREAD;
967 968 if (filemode & (FWRITE|FTRUNC))
968 969 mode |= VWRITE;
969 970 if (filemode & (FSEARCH|FEXEC|FXATTRDIROPEN))
970 971 mode |= VEXEC;
971 972
972 973 /* symlink interpretation */
973 974 if (filemode & FNOFOLLOW)
974 975 follow = NO_FOLLOW;
975 976 else
976 977 follow = FOLLOW;
977 978
978 979 if (filemode & FAPPEND)
979 980 accessflags |= V_APPEND;
980 981
981 982 top:
982 983 if (filemode & FCREAT) {
983 984 enum vcexcl excl;
984 985
985 986 /*
986 987 * Wish to create a file.
987 988 */
988 989 vattr.va_type = VREG;
989 990 vattr.va_mode = createmode;
990 991 vattr.va_mask = AT_TYPE|AT_MODE;
991 992 if (filemode & FTRUNC) {
992 993 vattr.va_size = 0;
993 994 vattr.va_mask |= AT_SIZE;
994 995 }
995 996 if (filemode & FEXCL)
996 997 excl = EXCL;
997 998 else
998 999 excl = NONEXCL;
999 1000
1000 1001 if (error =
1001 1002 vn_createat(pnamep, seg, &vattr, excl, mode, &vp, crwhy,
1002 1003 (filemode & ~(FTRUNC|FEXCL)), umask, startvp))
1003 1004 return (error);
1004 1005 } else {
1005 1006 /*
1006 1007 * Wish to open a file. Just look it up.
1007 1008 */
1008 1009 if (error = lookupnameat(pnamep, seg, follow,
1009 1010 NULLVPP, &vp, startvp)) {
1010 1011 if ((error == ESTALE) &&
1011 1012 fs_need_estale_retry(estale_retry++))
1012 1013 goto top;
1013 1014 return (error);
1014 1015 }
1015 1016
1016 1017 /*
1017 1018 * Get the attributes to check whether file is large.
1018 1019 * We do this only if the FOFFMAX flag is not set and
1019 1020 * only for regular files.
1020 1021 */
1021 1022
1022 1023 if (!(filemode & FOFFMAX) && (vp->v_type == VREG)) {
1023 1024 vattr.va_mask = AT_SIZE;
1024 1025 if ((error = VOP_GETATTR(vp, &vattr, 0,
1025 1026 CRED(), NULL))) {
1026 1027 goto out;
1027 1028 }
1028 1029 if (vattr.va_size > (u_offset_t)MAXOFF32_T) {
1029 1030 /*
1030 1031 * Large File API - regular open fails
1031 1032 * if FOFFMAX flag is set in file mode
1032 1033 */
1033 1034 error = EOVERFLOW;
1034 1035 goto out;
1035 1036 }
1036 1037 }
1037 1038 /*
1038 1039 * Can't write directories, active texts, or
1039 1040 * read-only filesystems. Can't truncate files
1040 1041 * on which mandatory locking is in effect.
1041 1042 */
1042 1043 if (filemode & (FWRITE|FTRUNC)) {
1043 1044 /*
1044 1045 * Allow writable directory if VDIROPEN flag is set.
1045 1046 */
1046 1047 if (vp->v_type == VDIR && !(vp->v_flag & VDIROPEN)) {
1047 1048 error = EISDIR;
1048 1049 goto out;
1049 1050 }
1050 1051 if (ISROFILE(vp)) {
1051 1052 error = EROFS;
1052 1053 goto out;
1053 1054 }
1054 1055 /*
1055 1056 * Can't truncate files on which
1056 1057 * sysv mandatory locking is in effect.
1057 1058 */
1058 1059 if (filemode & FTRUNC) {
1059 1060 vnode_t *rvp;
1060 1061
1061 1062 if (VOP_REALVP(vp, &rvp, NULL) != 0)
1062 1063 rvp = vp;
1063 1064 if (rvp->v_filocks != NULL) {
1064 1065 vattr.va_mask = AT_MODE;
1065 1066 if ((error = VOP_GETATTR(vp,
1066 1067 &vattr, 0, CRED(), NULL)) == 0 &&
1067 1068 MANDLOCK(vp, vattr.va_mode))
1068 1069 error = EAGAIN;
1069 1070 }
1070 1071 }
1071 1072 if (error)
1072 1073 goto out;
1073 1074 }
1074 1075 /*
1075 1076 * Check permissions.
1076 1077 */
1077 1078 if (error = VOP_ACCESS(vp, mode, accessflags, CRED(), NULL))
1078 1079 goto out;
1079 1080 /*
1080 1081 * Require FSEARCH to return a directory.
1081 1082 * Require FEXEC to return a regular file.
1082 1083 */
1083 1084 if ((filemode & FSEARCH) && vp->v_type != VDIR) {
1084 1085 error = ENOTDIR;
1085 1086 goto out;
1086 1087 }
1087 1088 if ((filemode & FEXEC) && vp->v_type != VREG) {
1088 1089 error = ENOEXEC; /* XXX: error code? */
1089 1090 goto out;
1090 1091 }
1091 1092 }
1092 1093
1093 1094 /*
1094 1095 * Do remaining checks for FNOFOLLOW and FNOLINKS.
1095 1096 */
1096 1097 if ((filemode & FNOFOLLOW) && vp->v_type == VLNK) {
1097 1098 error = ELOOP;
1098 1099 goto out;
1099 1100 }
1100 1101 if (filemode & FNOLINKS) {
1101 1102 vattr.va_mask = AT_NLINK;
1102 1103 if ((error = VOP_GETATTR(vp, &vattr, 0, CRED(), NULL))) {
1103 1104 goto out;
1104 1105 }
1105 1106 if (vattr.va_nlink != 1) {
1106 1107 error = EMLINK;
1107 1108 goto out;
1108 1109 }
1109 1110 }
1110 1111
1111 1112 /*
1112 1113 * Opening a socket corresponding to the AF_UNIX pathname
1113 1114 * in the filesystem name space is not supported.
1114 1115 * However, VSOCK nodes in namefs are supported in order
1115 1116 * to make fattach work for sockets.
1116 1117 *
1117 1118 * XXX This uses VOP_REALVP to distinguish between
1118 1119 * an unopened namefs node (where VOP_REALVP returns a
1119 1120 * different VSOCK vnode) and a VSOCK created by vn_create
1120 1121 * in some file system (where VOP_REALVP would never return
1121 1122 * a different vnode).
1122 1123 */
1123 1124 if (vp->v_type == VSOCK) {
1124 1125 struct vnode *nvp;
1125 1126
1126 1127 error = VOP_REALVP(vp, &nvp, NULL);
1127 1128 if (error != 0 || nvp == NULL || nvp == vp ||
1128 1129 nvp->v_type != VSOCK) {
1129 1130 error = EOPNOTSUPP;
1130 1131 goto out;
1131 1132 }
1132 1133 }
1133 1134
1134 1135 if ((vp->v_type == VREG) && nbl_need_check(vp)) {
1135 1136 /* get share reservation */
1136 1137 shr.s_access = 0;
1137 1138 if (filemode & FWRITE)
1138 1139 shr.s_access |= F_WRACC;
1139 1140 if (filemode & FREAD)
1140 1141 shr.s_access |= F_RDACC;
1141 1142 shr.s_deny = 0;
1142 1143 shr.s_sysid = 0;
1143 1144 shr.s_pid = ttoproc(curthread)->p_pid;
1144 1145 shr_own.sl_pid = shr.s_pid;
1145 1146 shr_own.sl_id = fd;
1146 1147 shr.s_own_len = sizeof (shr_own);
1147 1148 shr.s_owner = (caddr_t)&shr_own;
1148 1149 error = VOP_SHRLOCK(vp, F_SHARE_NBMAND, &shr, filemode, CRED(),
1149 1150 NULL);
1150 1151 if (error)
1151 1152 goto out;
1152 1153 shrlock_done = 1;
1153 1154
1154 1155 /* nbmand conflict check if truncating file */
1155 1156 if ((filemode & FTRUNC) && !(filemode & FCREAT)) {
1156 1157 nbl_start_crit(vp, RW_READER);
1157 1158 in_crit = 1;
1158 1159
1159 1160 vattr.va_mask = AT_SIZE;
1160 1161 if (error = VOP_GETATTR(vp, &vattr, 0, CRED(), NULL))
1161 1162 goto out;
1162 1163 if (nbl_conflict(vp, NBL_WRITE, 0, vattr.va_size, 0,
1163 1164 NULL)) {
1164 1165 error = EACCES;
1165 1166 goto out;
1166 1167 }
1167 1168 }
1168 1169 }
1169 1170
1170 1171 /*
1171 1172 * Do opening protocol.
1172 1173 */
1173 1174 error = VOP_OPEN(&vp, filemode, CRED(), NULL);
1174 1175 if (error)
1175 1176 goto out;
1176 1177 open_done = 1;
1177 1178
1178 1179 /*
1179 1180 * Truncate if required.
1180 1181 */
1181 1182 if ((filemode & FTRUNC) && !(filemode & FCREAT)) {
1182 1183 vattr.va_size = 0;
1183 1184 vattr.va_mask = AT_SIZE;
1184 1185 if ((error = VOP_SETATTR(vp, &vattr, 0, CRED(), NULL)) != 0)
1185 1186 goto out;
1186 1187 }
1187 1188 out:
1188 1189 ASSERT(vp->v_count > 0);
1189 1190
1190 1191 if (in_crit) {
1191 1192 nbl_end_crit(vp);
1192 1193 in_crit = 0;
1193 1194 }
1194 1195 if (error) {
1195 1196 if (open_done) {
1196 1197 (void) VOP_CLOSE(vp, filemode, 1, (offset_t)0, CRED(),
1197 1198 NULL);
1198 1199 open_done = 0;
1199 1200 shrlock_done = 0;
1200 1201 }
1201 1202 if (shrlock_done) {
1202 1203 (void) VOP_SHRLOCK(vp, F_UNSHARE, &shr, 0, CRED(),
1203 1204 NULL);
1204 1205 shrlock_done = 0;
1205 1206 }
1206 1207
1207 1208 /*
1208 1209 * The following clause was added to handle a problem
1209 1210 * with NFS consistency. It is possible that a lookup
1210 1211 * of the file to be opened succeeded, but the file
1211 1212 * itself doesn't actually exist on the server. This
1212 1213 * is chiefly due to the DNLC containing an entry for
1213 1214 * the file which has been removed on the server. In
1214 1215 * this case, we just start over. If there was some
1215 1216 * other cause for the ESTALE error, then the lookup
1216 1217 * of the file will fail and the error will be returned
1217 1218 * above instead of looping around from here.
1218 1219 */
1219 1220 VN_RELE(vp);
1220 1221 if ((error == ESTALE) && fs_need_estale_retry(estale_retry++))
1221 1222 goto top;
1222 1223 } else
1223 1224 *vpp = vp;
1224 1225 return (error);
1225 1226 }
1226 1227
1227 1228 /*
1228 1229 * The following two accessor functions are for the NFSv4 server. Since there
1229 1230 * is no VOP_OPEN_UP/DOWNGRADE we need a way for the NFS server to keep the
1230 1231 * vnode open counts correct when a client "upgrades" an open or does an
1231 1232 * open_downgrade. In NFS, an upgrade or downgrade can not only change the
1232 1233 * open mode (add or subtract read or write), but also change the share/deny
1233 1234 * modes. However, share reservations are not integrated with OPEN, yet, so
1234 1235 * we need to handle each separately. These functions are cleaner than having
1235 1236 * the NFS server manipulate the counts directly, however, nobody else should
1236 1237 * use these functions.
1237 1238 */
1238 1239 void
1239 1240 vn_open_upgrade(
1240 1241 vnode_t *vp,
1241 1242 int filemode)
1242 1243 {
1243 1244 ASSERT(vp->v_type == VREG);
1244 1245
1245 1246 if (filemode & FREAD)
1246 1247 atomic_add_32(&(vp->v_rdcnt), 1);
1247 1248 if (filemode & FWRITE)
1248 1249 atomic_add_32(&(vp->v_wrcnt), 1);
1249 1250
1250 1251 }
1251 1252
1252 1253 void
1253 1254 vn_open_downgrade(
1254 1255 vnode_t *vp,
1255 1256 int filemode)
1256 1257 {
1257 1258 ASSERT(vp->v_type == VREG);
1258 1259
1259 1260 if (filemode & FREAD) {
1260 1261 ASSERT(vp->v_rdcnt > 0);
1261 1262 atomic_add_32(&(vp->v_rdcnt), -1);
1262 1263 }
1263 1264 if (filemode & FWRITE) {
1264 1265 ASSERT(vp->v_wrcnt > 0);
1265 1266 atomic_add_32(&(vp->v_wrcnt), -1);
1266 1267 }
1267 1268
1268 1269 }
1269 1270
1270 1271 int
1271 1272 vn_create(
1272 1273 char *pnamep,
1273 1274 enum uio_seg seg,
1274 1275 struct vattr *vap,
1275 1276 enum vcexcl excl,
1276 1277 int mode,
1277 1278 struct vnode **vpp,
1278 1279 enum create why,
1279 1280 int flag,
1280 1281 mode_t umask)
1281 1282 {
1282 1283 return (vn_createat(pnamep, seg, vap, excl, mode, vpp, why, flag,
1283 1284 umask, NULL));
1284 1285 }
1285 1286
1286 1287 /*
1287 1288 * Create a vnode (makenode).
1288 1289 */
1289 1290 int
1290 1291 vn_createat(
1291 1292 char *pnamep,
1292 1293 enum uio_seg seg,
1293 1294 struct vattr *vap,
1294 1295 enum vcexcl excl,
1295 1296 int mode,
1296 1297 struct vnode **vpp,
1297 1298 enum create why,
1298 1299 int flag,
1299 1300 mode_t umask,
1300 1301 struct vnode *startvp)
1301 1302 {
1302 1303 struct vnode *dvp; /* ptr to parent dir vnode */
1303 1304 struct vnode *vp = NULL;
1304 1305 struct pathname pn;
1305 1306 int error;
1306 1307 int in_crit = 0;
1307 1308 struct vattr vattr;
1308 1309 enum symfollow follow;
1309 1310 int estale_retry = 0;
1310 1311 uint32_t auditing = AU_AUDITING();
1311 1312
1312 1313 ASSERT((vap->va_mask & (AT_TYPE|AT_MODE)) == (AT_TYPE|AT_MODE));
1313 1314
1314 1315 /* symlink interpretation */
1315 1316 if ((flag & FNOFOLLOW) || excl == EXCL)
1316 1317 follow = NO_FOLLOW;
1317 1318 else
1318 1319 follow = FOLLOW;
1319 1320 flag &= ~(FNOFOLLOW|FNOLINKS);
1320 1321
1321 1322 top:
1322 1323 /*
1323 1324 * Lookup directory.
1324 1325 * If new object is a file, call lower level to create it.
1325 1326 * Note that it is up to the lower level to enforce exclusive
1326 1327 * creation, if the file is already there.
1327 1328 * This allows the lower level to do whatever
1328 1329 * locking or protocol that is needed to prevent races.
1329 1330 * If the new object is directory call lower level to make
1330 1331 * the new directory, with "." and "..".
1331 1332 */
1332 1333 if (error = pn_get(pnamep, seg, &pn))
1333 1334 return (error);
1334 1335 if (auditing)
1335 1336 audit_vncreate_start();
1336 1337 dvp = NULL;
1337 1338 *vpp = NULL;
1338 1339 /*
1339 1340 * lookup will find the parent directory for the vnode.
1340 1341 * When it is done the pn holds the name of the entry
1341 1342 * in the directory.
1342 1343 * If this is a non-exclusive create we also find the node itself.
1343 1344 */
1344 1345 error = lookuppnat(&pn, NULL, follow, &dvp,
1345 1346 (excl == EXCL) ? NULLVPP : vpp, startvp);
1346 1347 if (error) {
1347 1348 pn_free(&pn);
1348 1349 if ((error == ESTALE) && fs_need_estale_retry(estale_retry++))
1349 1350 goto top;
1350 1351 if (why == CRMKDIR && error == EINVAL)
1351 1352 error = EEXIST; /* SVID */
1352 1353 return (error);
1353 1354 }
1354 1355
1355 1356 if (why != CRMKNOD)
1356 1357 vap->va_mode &= ~VSVTX;
1357 1358
1358 1359 /*
1359 1360 * If default ACLs are defined for the directory don't apply the
1360 1361 * umask if umask is passed.
1361 1362 */
1362 1363
1363 1364 if (umask) {
1364 1365
1365 1366 vsecattr_t vsec;
1366 1367
1367 1368 vsec.vsa_aclcnt = 0;
1368 1369 vsec.vsa_aclentp = NULL;
1369 1370 vsec.vsa_dfaclcnt = 0;
1370 1371 vsec.vsa_dfaclentp = NULL;
1371 1372 vsec.vsa_mask = VSA_DFACLCNT;
1372 1373 error = VOP_GETSECATTR(dvp, &vsec, 0, CRED(), NULL);
1373 1374 /*
1374 1375 * If error is ENOSYS then treat it as no error
1375 1376 * Don't want to force all file systems to support
1376 1377 * aclent_t style of ACL's.
1377 1378 */
1378 1379 if (error == ENOSYS)
1379 1380 error = 0;
1380 1381 if (error) {
1381 1382 if (*vpp != NULL)
1382 1383 VN_RELE(*vpp);
1383 1384 goto out;
1384 1385 } else {
1385 1386 /*
1386 1387 * Apply the umask if no default ACLs.
1387 1388 */
1388 1389 if (vsec.vsa_dfaclcnt == 0)
1389 1390 vap->va_mode &= ~umask;
1390 1391
1391 1392 /*
1392 1393 * VOP_GETSECATTR() may have allocated memory for
1393 1394 * ACLs we didn't request, so double-check and
1394 1395 * free it if necessary.
1395 1396 */
1396 1397 if (vsec.vsa_aclcnt && vsec.vsa_aclentp != NULL)
1397 1398 kmem_free((caddr_t)vsec.vsa_aclentp,
1398 1399 vsec.vsa_aclcnt * sizeof (aclent_t));
1399 1400 if (vsec.vsa_dfaclcnt && vsec.vsa_dfaclentp != NULL)
1400 1401 kmem_free((caddr_t)vsec.vsa_dfaclentp,
1401 1402 vsec.vsa_dfaclcnt * sizeof (aclent_t));
1402 1403 }
1403 1404 }
1404 1405
1405 1406 /*
1406 1407 * In general we want to generate EROFS if the file system is
1407 1408 * readonly. However, POSIX (IEEE Std. 1003.1) section 5.3.1
1408 1409 * documents the open system call, and it says that O_CREAT has no
1409 1410 * effect if the file already exists. Bug 1119649 states
1410 1411 * that open(path, O_CREAT, ...) fails when attempting to open an
1411 1412 * existing file on a read only file system. Thus, the first part
1412 1413 * of the following if statement has 3 checks:
1413 1414 * if the file exists &&
1414 1415 * it is being open with write access &&
1415 1416 * the file system is read only
1416 1417 * then generate EROFS
1417 1418 */
1418 1419 if ((*vpp != NULL && (mode & VWRITE) && ISROFILE(*vpp)) ||
1419 1420 (*vpp == NULL && dvp->v_vfsp->vfs_flag & VFS_RDONLY)) {
1420 1421 if (*vpp)
1421 1422 VN_RELE(*vpp);
1422 1423 error = EROFS;
1423 1424 } else if (excl == NONEXCL && *vpp != NULL) {
1424 1425 vnode_t *rvp;
1425 1426
1426 1427 /*
1427 1428 * File already exists. If a mandatory lock has been
1428 1429 * applied, return error.
1429 1430 */
1430 1431 vp = *vpp;
1431 1432 if (VOP_REALVP(vp, &rvp, NULL) != 0)
1432 1433 rvp = vp;
1433 1434 if ((vap->va_mask & AT_SIZE) && nbl_need_check(vp)) {
1434 1435 nbl_start_crit(vp, RW_READER);
1435 1436 in_crit = 1;
1436 1437 }
1437 1438 if (rvp->v_filocks != NULL || rvp->v_shrlocks != NULL) {
1438 1439 vattr.va_mask = AT_MODE|AT_SIZE;
1439 1440 if (error = VOP_GETATTR(vp, &vattr, 0, CRED(), NULL)) {
1440 1441 goto out;
1441 1442 }
1442 1443 if (MANDLOCK(vp, vattr.va_mode)) {
1443 1444 error = EAGAIN;
1444 1445 goto out;
1445 1446 }
1446 1447 /*
1447 1448 * File cannot be truncated if non-blocking mandatory
1448 1449 * locks are currently on the file.
1449 1450 */
1450 1451 if ((vap->va_mask & AT_SIZE) && in_crit) {
1451 1452 u_offset_t offset;
1452 1453 ssize_t length;
1453 1454
1454 1455 offset = vap->va_size > vattr.va_size ?
1455 1456 vattr.va_size : vap->va_size;
1456 1457 length = vap->va_size > vattr.va_size ?
1457 1458 vap->va_size - vattr.va_size :
1458 1459 vattr.va_size - vap->va_size;
1459 1460 if (nbl_conflict(vp, NBL_WRITE, offset,
1460 1461 length, 0, NULL)) {
1461 1462 error = EACCES;
1462 1463 goto out;
1463 1464 }
1464 1465 }
1465 1466 }
1466 1467
1467 1468 /*
1468 1469 * If the file is the root of a VFS, we've crossed a
1469 1470 * mount point and the "containing" directory that we
1470 1471 * acquired above (dvp) is irrelevant because it's in
1471 1472 * a different file system. We apply VOP_CREATE to the
1472 1473 * target itself instead of to the containing directory
1473 1474 * and supply a null path name to indicate (conventionally)
1474 1475 * the node itself as the "component" of interest.
1475 1476 *
1476 1477 * The intercession of the file system is necessary to
1477 1478 * ensure that the appropriate permission checks are
1478 1479 * done.
1479 1480 */
1480 1481 if (vp->v_flag & VROOT) {
1481 1482 ASSERT(why != CRMKDIR);
1482 1483 error = VOP_CREATE(vp, "", vap, excl, mode, vpp,
1483 1484 CRED(), flag, NULL, NULL);
1484 1485 /*
1485 1486 * If the create succeeded, it will have created
1486 1487 * a new reference to the vnode. Give up the
1487 1488 * original reference. The assertion should not
1488 1489 * get triggered because NBMAND locks only apply to
1489 1490 * VREG files. And if in_crit is non-zero for some
1490 1491 * reason, detect that here, rather than when we
1491 1492 * deference a null vp.
1492 1493 */
1493 1494 ASSERT(in_crit == 0);
1494 1495 VN_RELE(vp);
1495 1496 vp = NULL;
1496 1497 goto out;
1497 1498 }
1498 1499
1499 1500 /*
1500 1501 * Large File API - non-large open (FOFFMAX flag not set)
1501 1502 * of regular file fails if the file size exceeds MAXOFF32_T.
1502 1503 */
1503 1504 if (why != CRMKDIR &&
1504 1505 !(flag & FOFFMAX) &&
1505 1506 (vp->v_type == VREG)) {
1506 1507 vattr.va_mask = AT_SIZE;
1507 1508 if ((error = VOP_GETATTR(vp, &vattr, 0,
1508 1509 CRED(), NULL))) {
1509 1510 goto out;
1510 1511 }
1511 1512 if ((vattr.va_size > (u_offset_t)MAXOFF32_T)) {
1512 1513 error = EOVERFLOW;
1513 1514 goto out;
1514 1515 }
1515 1516 }
1516 1517 }
1517 1518
1518 1519 if (error == 0) {
1519 1520 /*
1520 1521 * Call mkdir() if specified, otherwise create().
1521 1522 */
1522 1523 int must_be_dir = pn_fixslash(&pn); /* trailing '/'? */
1523 1524
1524 1525 if (why == CRMKDIR)
1525 1526 /*
1526 1527 * N.B., if vn_createat() ever requests
1527 1528 * case-insensitive behavior then it will need
1528 1529 * to be passed to VOP_MKDIR(). VOP_CREATE()
1529 1530 * will already get it via "flag"
1530 1531 */
1531 1532 error = VOP_MKDIR(dvp, pn.pn_path, vap, vpp, CRED(),
1532 1533 NULL, 0, NULL);
1533 1534 else if (!must_be_dir)
1534 1535 error = VOP_CREATE(dvp, pn.pn_path, vap,
1535 1536 excl, mode, vpp, CRED(), flag, NULL, NULL);
1536 1537 else
1537 1538 error = ENOTDIR;
1538 1539 }
1539 1540
1540 1541 out:
1541 1542
1542 1543 if (auditing)
1543 1544 audit_vncreate_finish(*vpp, error);
1544 1545 if (in_crit) {
1545 1546 nbl_end_crit(vp);
1546 1547 in_crit = 0;
1547 1548 }
1548 1549 if (vp != NULL) {
1549 1550 VN_RELE(vp);
1550 1551 vp = NULL;
1551 1552 }
1552 1553 pn_free(&pn);
1553 1554 VN_RELE(dvp);
1554 1555 /*
1555 1556 * The following clause was added to handle a problem
1556 1557 * with NFS consistency. It is possible that a lookup
1557 1558 * of the file to be created succeeded, but the file
1558 1559 * itself doesn't actually exist on the server. This
1559 1560 * is chiefly due to the DNLC containing an entry for
1560 1561 * the file which has been removed on the server. In
1561 1562 * this case, we just start over. If there was some
1562 1563 * other cause for the ESTALE error, then the lookup
1563 1564 * of the file will fail and the error will be returned
1564 1565 * above instead of looping around from here.
1565 1566 */
1566 1567 if ((error == ESTALE) && fs_need_estale_retry(estale_retry++))
1567 1568 goto top;
1568 1569 return (error);
1569 1570 }
1570 1571
1571 1572 int
1572 1573 vn_link(char *from, char *to, enum uio_seg seg)
1573 1574 {
1574 1575 return (vn_linkat(NULL, from, NO_FOLLOW, NULL, to, seg));
1575 1576 }
1576 1577
1577 1578 int
1578 1579 vn_linkat(vnode_t *fstartvp, char *from, enum symfollow follow,
1579 1580 vnode_t *tstartvp, char *to, enum uio_seg seg)
1580 1581 {
1581 1582 struct vnode *fvp; /* from vnode ptr */
1582 1583 struct vnode *tdvp; /* to directory vnode ptr */
1583 1584 struct pathname pn;
1584 1585 int error;
1585 1586 struct vattr vattr;
1586 1587 dev_t fsid;
1587 1588 int estale_retry = 0;
1588 1589 uint32_t auditing = AU_AUDITING();
1589 1590
1590 1591 top:
1591 1592 fvp = tdvp = NULL;
1592 1593 if (error = pn_get(to, seg, &pn))
1593 1594 return (error);
1594 1595 if (auditing && fstartvp != NULL)
1595 1596 audit_setfsat_path(1);
1596 1597 if (error = lookupnameat(from, seg, follow, NULLVPP, &fvp, fstartvp))
1597 1598 goto out;
1598 1599 if (auditing && tstartvp != NULL)
1599 1600 audit_setfsat_path(3);
1600 1601 if (error = lookuppnat(&pn, NULL, NO_FOLLOW, &tdvp, NULLVPP, tstartvp))
1601 1602 goto out;
1602 1603 /*
1603 1604 * Make sure both source vnode and target directory vnode are
1604 1605 * in the same vfs and that it is writeable.
1605 1606 */
1606 1607 vattr.va_mask = AT_FSID;
1607 1608 if (error = VOP_GETATTR(fvp, &vattr, 0, CRED(), NULL))
1608 1609 goto out;
1609 1610 fsid = vattr.va_fsid;
1610 1611 vattr.va_mask = AT_FSID;
1611 1612 if (error = VOP_GETATTR(tdvp, &vattr, 0, CRED(), NULL))
1612 1613 goto out;
1613 1614 if (fsid != vattr.va_fsid) {
1614 1615 error = EXDEV;
1615 1616 goto out;
1616 1617 }
1617 1618 if (tdvp->v_vfsp->vfs_flag & VFS_RDONLY) {
1618 1619 error = EROFS;
1619 1620 goto out;
1620 1621 }
1621 1622 /*
1622 1623 * Do the link.
1623 1624 */
1624 1625 (void) pn_fixslash(&pn);
1625 1626 error = VOP_LINK(tdvp, fvp, pn.pn_path, CRED(), NULL, 0);
1626 1627 out:
1627 1628 pn_free(&pn);
1628 1629 if (fvp)
1629 1630 VN_RELE(fvp);
1630 1631 if (tdvp)
1631 1632 VN_RELE(tdvp);
1632 1633 if ((error == ESTALE) && fs_need_estale_retry(estale_retry++))
1633 1634 goto top;
1634 1635 return (error);
1635 1636 }
1636 1637
1637 1638 int
1638 1639 vn_rename(char *from, char *to, enum uio_seg seg)
1639 1640 {
1640 1641 return (vn_renameat(NULL, from, NULL, to, seg));
1641 1642 }
1642 1643
1643 1644 int
1644 1645 vn_renameat(vnode_t *fdvp, char *fname, vnode_t *tdvp,
1645 1646 char *tname, enum uio_seg seg)
1646 1647 {
1647 1648 int error;
1648 1649 struct vattr vattr;
1649 1650 struct pathname fpn; /* from pathname */
1650 1651 struct pathname tpn; /* to pathname */
1651 1652 dev_t fsid;
1652 1653 int in_crit_src, in_crit_targ;
1653 1654 vnode_t *fromvp, *fvp;
1654 1655 vnode_t *tovp, *targvp;
1655 1656 int estale_retry = 0;
1656 1657 uint32_t auditing = AU_AUDITING();
1657 1658
1658 1659 top:
1659 1660 fvp = fromvp = tovp = targvp = NULL;
1660 1661 in_crit_src = in_crit_targ = 0;
1661 1662 /*
1662 1663 * Get to and from pathnames.
1663 1664 */
1664 1665 if (error = pn_get(fname, seg, &fpn))
1665 1666 return (error);
1666 1667 if (error = pn_get(tname, seg, &tpn)) {
1667 1668 pn_free(&fpn);
1668 1669 return (error);
1669 1670 }
1670 1671
1671 1672 /*
1672 1673 * First we need to resolve the correct directories
1673 1674 * The passed in directories may only be a starting point,
1674 1675 * but we need the real directories the file(s) live in.
1675 1676 * For example the fname may be something like usr/lib/sparc
1676 1677 * and we were passed in the / directory, but we need to
1677 1678 * use the lib directory for the rename.
1678 1679 */
1679 1680
1680 1681 if (auditing && fdvp != NULL)
1681 1682 audit_setfsat_path(1);
1682 1683 /*
1683 1684 * Lookup to and from directories.
1684 1685 */
1685 1686 if (error = lookuppnat(&fpn, NULL, NO_FOLLOW, &fromvp, &fvp, fdvp)) {
1686 1687 goto out;
1687 1688 }
1688 1689
1689 1690 /*
1690 1691 * Make sure there is an entry.
1691 1692 */
1692 1693 if (fvp == NULL) {
1693 1694 error = ENOENT;
1694 1695 goto out;
1695 1696 }
1696 1697
1697 1698 if (auditing && tdvp != NULL)
1698 1699 audit_setfsat_path(3);
1699 1700 if (error = lookuppnat(&tpn, NULL, NO_FOLLOW, &tovp, &targvp, tdvp)) {
1700 1701 goto out;
1701 1702 }
1702 1703
1703 1704 /*
1704 1705 * Make sure both the from vnode directory and the to directory
1705 1706 * are in the same vfs and the to directory is writable.
1706 1707 * We check fsid's, not vfs pointers, so loopback fs works.
1707 1708 */
1708 1709 if (fromvp != tovp) {
1709 1710 vattr.va_mask = AT_FSID;
1710 1711 if (error = VOP_GETATTR(fromvp, &vattr, 0, CRED(), NULL))
1711 1712 goto out;
1712 1713 fsid = vattr.va_fsid;
1713 1714 vattr.va_mask = AT_FSID;
1714 1715 if (error = VOP_GETATTR(tovp, &vattr, 0, CRED(), NULL))
1715 1716 goto out;
1716 1717 if (fsid != vattr.va_fsid) {
1717 1718 error = EXDEV;
1718 1719 goto out;
1719 1720 }
1720 1721 }
1721 1722
1722 1723 if (tovp->v_vfsp->vfs_flag & VFS_RDONLY) {
1723 1724 error = EROFS;
1724 1725 goto out;
1725 1726 }
1726 1727
1727 1728 if (targvp && (fvp != targvp)) {
1728 1729 nbl_start_crit(targvp, RW_READER);
1729 1730 in_crit_targ = 1;
1730 1731 if (nbl_conflict(targvp, NBL_REMOVE, 0, 0, 0, NULL)) {
1731 1732 error = EACCES;
1732 1733 goto out;
1733 1734 }
1734 1735 }
1735 1736
1736 1737 if (nbl_need_check(fvp)) {
1737 1738 nbl_start_crit(fvp, RW_READER);
1738 1739 in_crit_src = 1;
1739 1740 if (nbl_conflict(fvp, NBL_RENAME, 0, 0, 0, NULL)) {
1740 1741 error = EACCES;
1741 1742 goto out;
1742 1743 }
1743 1744 }
1744 1745
1745 1746 /*
1746 1747 * Do the rename.
1747 1748 */
1748 1749 (void) pn_fixslash(&tpn);
1749 1750 error = VOP_RENAME(fromvp, fpn.pn_path, tovp, tpn.pn_path, CRED(),
1750 1751 NULL, 0);
1751 1752
1752 1753 out:
1753 1754 pn_free(&fpn);
1754 1755 pn_free(&tpn);
1755 1756 if (in_crit_src)
1756 1757 nbl_end_crit(fvp);
1757 1758 if (in_crit_targ)
1758 1759 nbl_end_crit(targvp);
1759 1760 if (fromvp)
1760 1761 VN_RELE(fromvp);
1761 1762 if (tovp)
1762 1763 VN_RELE(tovp);
1763 1764 if (targvp)
1764 1765 VN_RELE(targvp);
1765 1766 if (fvp)
1766 1767 VN_RELE(fvp);
1767 1768 if ((error == ESTALE) && fs_need_estale_retry(estale_retry++))
1768 1769 goto top;
1769 1770 return (error);
1770 1771 }
1771 1772
1772 1773 /*
1773 1774 * Remove a file or directory.
1774 1775 */
1775 1776 int
1776 1777 vn_remove(char *fnamep, enum uio_seg seg, enum rm dirflag)
1777 1778 {
1778 1779 return (vn_removeat(NULL, fnamep, seg, dirflag));
1779 1780 }
1780 1781
1781 1782 int
1782 1783 vn_removeat(vnode_t *startvp, char *fnamep, enum uio_seg seg, enum rm dirflag)
1783 1784 {
1784 1785 struct vnode *vp; /* entry vnode */
1785 1786 struct vnode *dvp; /* ptr to parent dir vnode */
1786 1787 struct vnode *coveredvp;
1787 1788 struct pathname pn; /* name of entry */
1788 1789 enum vtype vtype;
1789 1790 int error;
1790 1791 struct vfs *vfsp;
1791 1792 struct vfs *dvfsp; /* ptr to parent dir vfs */
1792 1793 int in_crit = 0;
1793 1794 int estale_retry = 0;
1794 1795
1795 1796 top:
1796 1797 if (error = pn_get(fnamep, seg, &pn))
1797 1798 return (error);
1798 1799 dvp = vp = NULL;
1799 1800 if (error = lookuppnat(&pn, NULL, NO_FOLLOW, &dvp, &vp, startvp)) {
1800 1801 pn_free(&pn);
1801 1802 if ((error == ESTALE) && fs_need_estale_retry(estale_retry++))
1802 1803 goto top;
1803 1804 return (error);
1804 1805 }
1805 1806
1806 1807 /*
1807 1808 * Make sure there is an entry.
1808 1809 */
1809 1810 if (vp == NULL) {
1810 1811 error = ENOENT;
1811 1812 goto out;
1812 1813 }
1813 1814
1814 1815 vfsp = vp->v_vfsp;
1815 1816 dvfsp = dvp->v_vfsp;
1816 1817
1817 1818 /*
1818 1819 * If the named file is the root of a mounted filesystem, fail,
1819 1820 * unless it's marked unlinkable. In that case, unmount the
1820 1821 * filesystem and proceed to unlink the covered vnode. (If the
1821 1822 * covered vnode is a directory, use rmdir instead of unlink,
1822 1823 * to avoid file system corruption.)
1823 1824 */
1824 1825 if (vp->v_flag & VROOT) {
1825 1826 if ((vfsp->vfs_flag & VFS_UNLINKABLE) == 0) {
1826 1827 error = EBUSY;
1827 1828 goto out;
1828 1829 }
1829 1830
1830 1831 /*
1831 1832 * Namefs specific code starts here.
1832 1833 */
1833 1834
1834 1835 if (dirflag == RMDIRECTORY) {
1835 1836 /*
1836 1837 * User called rmdir(2) on a file that has
1837 1838 * been namefs mounted on top of. Since
1838 1839 * namefs doesn't allow directories to
1839 1840 * be mounted on other files we know
1840 1841 * vp is not of type VDIR so fail to operation.
1841 1842 */
1842 1843 error = ENOTDIR;
1843 1844 goto out;
1844 1845 }
1845 1846
1846 1847 /*
1847 1848 * If VROOT is still set after grabbing vp->v_lock,
1848 1849 * noone has finished nm_unmount so far and coveredvp
1849 1850 * is valid.
1850 1851 * If we manage to grab vn_vfswlock(coveredvp) before releasing
1851 1852 * vp->v_lock, any race window is eliminated.
1852 1853 */
1853 1854
1854 1855 mutex_enter(&vp->v_lock);
1855 1856 if ((vp->v_flag & VROOT) == 0) {
1856 1857 /* Someone beat us to the unmount */
1857 1858 mutex_exit(&vp->v_lock);
1858 1859 error = EBUSY;
1859 1860 goto out;
1860 1861 }
1861 1862 vfsp = vp->v_vfsp;
1862 1863 coveredvp = vfsp->vfs_vnodecovered;
1863 1864 ASSERT(coveredvp);
1864 1865 /*
1865 1866 * Note: Implementation of vn_vfswlock shows that ordering of
1866 1867 * v_lock / vn_vfswlock is not an issue here.
1867 1868 */
1868 1869 error = vn_vfswlock(coveredvp);
1869 1870 mutex_exit(&vp->v_lock);
1870 1871
1871 1872 if (error)
1872 1873 goto out;
1873 1874
1874 1875 VN_HOLD(coveredvp);
1875 1876 VN_RELE(vp);
1876 1877 error = dounmount(vfsp, 0, CRED());
1877 1878
1878 1879 /*
1879 1880 * Unmounted the namefs file system; now get
1880 1881 * the object it was mounted over.
1881 1882 */
1882 1883 vp = coveredvp;
1883 1884 /*
1884 1885 * If namefs was mounted over a directory, then
1885 1886 * we want to use rmdir() instead of unlink().
1886 1887 */
1887 1888 if (vp->v_type == VDIR)
1888 1889 dirflag = RMDIRECTORY;
1889 1890
1890 1891 if (error)
1891 1892 goto out;
1892 1893 }
1893 1894
1894 1895 /*
1895 1896 * Make sure filesystem is writeable.
1896 1897 * We check the parent directory's vfs in case this is an lofs vnode.
1897 1898 */
1898 1899 if (dvfsp && dvfsp->vfs_flag & VFS_RDONLY) {
1899 1900 error = EROFS;
1900 1901 goto out;
1901 1902 }
1902 1903
1903 1904 vtype = vp->v_type;
1904 1905
1905 1906 /*
1906 1907 * If there is the possibility of an nbmand share reservation, make
1907 1908 * sure it's okay to remove the file. Keep a reference to the
1908 1909 * vnode, so that we can exit the nbl critical region after
1909 1910 * calling VOP_REMOVE.
1910 1911 * If there is no possibility of an nbmand share reservation,
1911 1912 * release the vnode reference now. Filesystems like NFS may
1912 1913 * behave differently if there is an extra reference, so get rid of
1913 1914 * this one. Fortunately, we can't have nbmand mounts on NFS
1914 1915 * filesystems.
1915 1916 */
1916 1917 if (nbl_need_check(vp)) {
1917 1918 nbl_start_crit(vp, RW_READER);
1918 1919 in_crit = 1;
1919 1920 if (nbl_conflict(vp, NBL_REMOVE, 0, 0, 0, NULL)) {
1920 1921 error = EACCES;
1921 1922 goto out;
1922 1923 }
1923 1924 } else {
1924 1925 VN_RELE(vp);
1925 1926 vp = NULL;
1926 1927 }
1927 1928
1928 1929 if (dirflag == RMDIRECTORY) {
1929 1930 /*
1930 1931 * Caller is using rmdir(2), which can only be applied to
1931 1932 * directories.
1932 1933 */
1933 1934 if (vtype != VDIR) {
1934 1935 error = ENOTDIR;
1935 1936 } else {
1936 1937 vnode_t *cwd;
1937 1938 proc_t *pp = curproc;
1938 1939
1939 1940 mutex_enter(&pp->p_lock);
1940 1941 cwd = PTOU(pp)->u_cdir;
1941 1942 VN_HOLD(cwd);
1942 1943 mutex_exit(&pp->p_lock);
1943 1944 error = VOP_RMDIR(dvp, pn.pn_path, cwd, CRED(),
1944 1945 NULL, 0);
1945 1946 VN_RELE(cwd);
1946 1947 }
1947 1948 } else {
1948 1949 /*
1949 1950 * Unlink(2) can be applied to anything.
1950 1951 */
1951 1952 error = VOP_REMOVE(dvp, pn.pn_path, CRED(), NULL, 0);
1952 1953 }
1953 1954
1954 1955 out:
1955 1956 pn_free(&pn);
1956 1957 if (in_crit) {
1957 1958 nbl_end_crit(vp);
1958 1959 in_crit = 0;
1959 1960 }
1960 1961 if (vp != NULL)
1961 1962 VN_RELE(vp);
1962 1963 if (dvp != NULL)
1963 1964 VN_RELE(dvp);
1964 1965 if ((error == ESTALE) && fs_need_estale_retry(estale_retry++))
1965 1966 goto top;
1966 1967 return (error);
1967 1968 }
1968 1969
1969 1970 /*
1970 1971 * Utility function to compare equality of vnodes.
1971 1972 * Compare the underlying real vnodes, if there are underlying vnodes.
1972 1973 * This is a more thorough comparison than the VN_CMP() macro provides.
1973 1974 */
1974 1975 int
1975 1976 vn_compare(vnode_t *vp1, vnode_t *vp2)
1976 1977 {
1977 1978 vnode_t *realvp;
1978 1979
1979 1980 if (vp1 != NULL && VOP_REALVP(vp1, &realvp, NULL) == 0)
1980 1981 vp1 = realvp;
1981 1982 if (vp2 != NULL && VOP_REALVP(vp2, &realvp, NULL) == 0)
1982 1983 vp2 = realvp;
1983 1984 return (VN_CMP(vp1, vp2));
1984 1985 }
1985 1986
1986 1987 /*
1987 1988 * The number of locks to hash into. This value must be a power
1988 1989 * of 2 minus 1 and should probably also be prime.
1989 1990 */
1990 1991 #define NUM_BUCKETS 1023
1991 1992
1992 1993 struct vn_vfslocks_bucket {
1993 1994 kmutex_t vb_lock;
1994 1995 vn_vfslocks_entry_t *vb_list;
1995 1996 char pad[64 - sizeof (kmutex_t) - sizeof (void *)];
1996 1997 };
1997 1998
1998 1999 /*
1999 2000 * Total number of buckets will be NUM_BUCKETS + 1 .
2000 2001 */
2001 2002
2002 2003 #pragma align 64(vn_vfslocks_buckets)
2003 2004 static struct vn_vfslocks_bucket vn_vfslocks_buckets[NUM_BUCKETS + 1];
2004 2005
2005 2006 #define VN_VFSLOCKS_SHIFT 9
2006 2007
2007 2008 #define VN_VFSLOCKS_HASH(vfsvpptr) \
2008 2009 ((((intptr_t)(vfsvpptr)) >> VN_VFSLOCKS_SHIFT) & NUM_BUCKETS)
2009 2010
2010 2011 /*
2011 2012 * vn_vfslocks_getlock() uses an HASH scheme to generate
2012 2013 * rwstlock using vfs/vnode pointer passed to it.
2013 2014 *
2014 2015 * vn_vfslocks_rele() releases a reference in the
2015 2016 * HASH table which allows the entry allocated by
2016 2017 * vn_vfslocks_getlock() to be freed at a later
2017 2018 * stage when the refcount drops to zero.
2018 2019 */
2019 2020
2020 2021 vn_vfslocks_entry_t *
2021 2022 vn_vfslocks_getlock(void *vfsvpptr)
2022 2023 {
2023 2024 struct vn_vfslocks_bucket *bp;
2024 2025 vn_vfslocks_entry_t *vep;
2025 2026 vn_vfslocks_entry_t *tvep;
2026 2027
2027 2028 ASSERT(vfsvpptr != NULL);
2028 2029 bp = &vn_vfslocks_buckets[VN_VFSLOCKS_HASH(vfsvpptr)];
2029 2030
2030 2031 mutex_enter(&bp->vb_lock);
2031 2032 for (vep = bp->vb_list; vep != NULL; vep = vep->ve_next) {
2032 2033 if (vep->ve_vpvfs == vfsvpptr) {
2033 2034 vep->ve_refcnt++;
2034 2035 mutex_exit(&bp->vb_lock);
2035 2036 return (vep);
2036 2037 }
2037 2038 }
2038 2039 mutex_exit(&bp->vb_lock);
2039 2040 vep = kmem_alloc(sizeof (*vep), KM_SLEEP);
2040 2041 rwst_init(&vep->ve_lock, NULL, RW_DEFAULT, NULL);
2041 2042 vep->ve_vpvfs = (char *)vfsvpptr;
2042 2043 vep->ve_refcnt = 1;
2043 2044 mutex_enter(&bp->vb_lock);
2044 2045 for (tvep = bp->vb_list; tvep != NULL; tvep = tvep->ve_next) {
2045 2046 if (tvep->ve_vpvfs == vfsvpptr) {
2046 2047 tvep->ve_refcnt++;
2047 2048 mutex_exit(&bp->vb_lock);
2048 2049
2049 2050 /*
2050 2051 * There is already an entry in the hash
2051 2052 * destroy what we just allocated.
2052 2053 */
2053 2054 rwst_destroy(&vep->ve_lock);
2054 2055 kmem_free(vep, sizeof (*vep));
2055 2056 return (tvep);
2056 2057 }
2057 2058 }
2058 2059 vep->ve_next = bp->vb_list;
2059 2060 bp->vb_list = vep;
2060 2061 mutex_exit(&bp->vb_lock);
2061 2062 return (vep);
2062 2063 }
2063 2064
2064 2065 void
2065 2066 vn_vfslocks_rele(vn_vfslocks_entry_t *vepent)
2066 2067 {
2067 2068 struct vn_vfslocks_bucket *bp;
2068 2069 vn_vfslocks_entry_t *vep;
2069 2070 vn_vfslocks_entry_t *pvep;
2070 2071
2071 2072 ASSERT(vepent != NULL);
2072 2073 ASSERT(vepent->ve_vpvfs != NULL);
2073 2074
2074 2075 bp = &vn_vfslocks_buckets[VN_VFSLOCKS_HASH(vepent->ve_vpvfs)];
2075 2076
2076 2077 mutex_enter(&bp->vb_lock);
2077 2078 vepent->ve_refcnt--;
2078 2079
2079 2080 if ((int32_t)vepent->ve_refcnt < 0)
2080 2081 cmn_err(CE_PANIC, "vn_vfslocks_rele: refcount negative");
2081 2082
2082 2083 if (vepent->ve_refcnt == 0) {
2083 2084 for (vep = bp->vb_list; vep != NULL; vep = vep->ve_next) {
2084 2085 if (vep->ve_vpvfs == vepent->ve_vpvfs) {
2085 2086 if (bp->vb_list == vep)
2086 2087 bp->vb_list = vep->ve_next;
2087 2088 else {
2088 2089 /* LINTED */
2089 2090 pvep->ve_next = vep->ve_next;
2090 2091 }
2091 2092 mutex_exit(&bp->vb_lock);
2092 2093 rwst_destroy(&vep->ve_lock);
2093 2094 kmem_free(vep, sizeof (*vep));
2094 2095 return;
2095 2096 }
2096 2097 pvep = vep;
2097 2098 }
2098 2099 cmn_err(CE_PANIC, "vn_vfslocks_rele: vp/vfs not found");
2099 2100 }
2100 2101 mutex_exit(&bp->vb_lock);
2101 2102 }
2102 2103
2103 2104 /*
2104 2105 * vn_vfswlock_wait is used to implement a lock which is logically a writers
2105 2106 * lock protecting the v_vfsmountedhere field.
2106 2107 * vn_vfswlock_wait has been modified to be similar to vn_vfswlock,
2107 2108 * except that it blocks to acquire the lock VVFSLOCK.
2108 2109 *
2109 2110 * traverse() and routines re-implementing part of traverse (e.g. autofs)
2110 2111 * need to hold this lock. mount(), vn_rename(), vn_remove() and so on
2111 2112 * need the non-blocking version of the writers lock i.e. vn_vfswlock
2112 2113 */
2113 2114 int
2114 2115 vn_vfswlock_wait(vnode_t *vp)
2115 2116 {
2116 2117 int retval;
2117 2118 vn_vfslocks_entry_t *vpvfsentry;
2118 2119 ASSERT(vp != NULL);
2119 2120
2120 2121 vpvfsentry = vn_vfslocks_getlock(vp);
2121 2122 retval = rwst_enter_sig(&vpvfsentry->ve_lock, RW_WRITER);
2122 2123
2123 2124 if (retval == EINTR) {
2124 2125 vn_vfslocks_rele(vpvfsentry);
2125 2126 return (EINTR);
2126 2127 }
2127 2128 return (retval);
2128 2129 }
2129 2130
2130 2131 int
2131 2132 vn_vfsrlock_wait(vnode_t *vp)
2132 2133 {
2133 2134 int retval;
2134 2135 vn_vfslocks_entry_t *vpvfsentry;
2135 2136 ASSERT(vp != NULL);
2136 2137
2137 2138 vpvfsentry = vn_vfslocks_getlock(vp);
2138 2139 retval = rwst_enter_sig(&vpvfsentry->ve_lock, RW_READER);
2139 2140
2140 2141 if (retval == EINTR) {
2141 2142 vn_vfslocks_rele(vpvfsentry);
2142 2143 return (EINTR);
2143 2144 }
2144 2145
2145 2146 return (retval);
2146 2147 }
2147 2148
2148 2149
2149 2150 /*
2150 2151 * vn_vfswlock is used to implement a lock which is logically a writers lock
2151 2152 * protecting the v_vfsmountedhere field.
2152 2153 */
2153 2154 int
2154 2155 vn_vfswlock(vnode_t *vp)
2155 2156 {
2156 2157 vn_vfslocks_entry_t *vpvfsentry;
2157 2158
2158 2159 /*
2159 2160 * If vp is NULL then somebody is trying to lock the covered vnode
2160 2161 * of /. (vfs_vnodecovered is NULL for /). This situation will
2161 2162 * only happen when unmounting /. Since that operation will fail
2162 2163 * anyway, return EBUSY here instead of in VFS_UNMOUNT.
2163 2164 */
2164 2165 if (vp == NULL)
2165 2166 return (EBUSY);
2166 2167
2167 2168 vpvfsentry = vn_vfslocks_getlock(vp);
2168 2169
2169 2170 if (rwst_tryenter(&vpvfsentry->ve_lock, RW_WRITER))
2170 2171 return (0);
2171 2172
2172 2173 vn_vfslocks_rele(vpvfsentry);
2173 2174 return (EBUSY);
2174 2175 }
2175 2176
2176 2177 int
2177 2178 vn_vfsrlock(vnode_t *vp)
2178 2179 {
2179 2180 vn_vfslocks_entry_t *vpvfsentry;
2180 2181
2181 2182 /*
2182 2183 * If vp is NULL then somebody is trying to lock the covered vnode
2183 2184 * of /. (vfs_vnodecovered is NULL for /). This situation will
2184 2185 * only happen when unmounting /. Since that operation will fail
2185 2186 * anyway, return EBUSY here instead of in VFS_UNMOUNT.
2186 2187 */
2187 2188 if (vp == NULL)
2188 2189 return (EBUSY);
2189 2190
2190 2191 vpvfsentry = vn_vfslocks_getlock(vp);
2191 2192
2192 2193 if (rwst_tryenter(&vpvfsentry->ve_lock, RW_READER))
2193 2194 return (0);
2194 2195
2195 2196 vn_vfslocks_rele(vpvfsentry);
2196 2197 return (EBUSY);
2197 2198 }
2198 2199
2199 2200 void
2200 2201 vn_vfsunlock(vnode_t *vp)
2201 2202 {
2202 2203 vn_vfslocks_entry_t *vpvfsentry;
2203 2204
2204 2205 /*
2205 2206 * ve_refcnt needs to be decremented twice.
2206 2207 * 1. To release refernce after a call to vn_vfslocks_getlock()
2207 2208 * 2. To release the reference from the locking routines like
2208 2209 * vn_vfsrlock/vn_vfswlock etc,.
2209 2210 */
2210 2211 vpvfsentry = vn_vfslocks_getlock(vp);
2211 2212 vn_vfslocks_rele(vpvfsentry);
2212 2213
2213 2214 rwst_exit(&vpvfsentry->ve_lock);
2214 2215 vn_vfslocks_rele(vpvfsentry);
2215 2216 }
2216 2217
2217 2218 int
2218 2219 vn_vfswlock_held(vnode_t *vp)
2219 2220 {
2220 2221 int held;
2221 2222 vn_vfslocks_entry_t *vpvfsentry;
2222 2223
2223 2224 ASSERT(vp != NULL);
2224 2225
2225 2226 vpvfsentry = vn_vfslocks_getlock(vp);
2226 2227 held = rwst_lock_held(&vpvfsentry->ve_lock, RW_WRITER);
2227 2228
2228 2229 vn_vfslocks_rele(vpvfsentry);
2229 2230 return (held);
2230 2231 }
2231 2232
2232 2233
2233 2234 int
2234 2235 vn_make_ops(
2235 2236 const char *name, /* Name of file system */
2236 2237 const fs_operation_def_t *templ, /* Operation specification */
2237 2238 vnodeops_t **actual) /* Return the vnodeops */
2238 2239 {
2239 2240 int unused_ops;
2240 2241 int error;
2241 2242
2242 2243 *actual = (vnodeops_t *)kmem_alloc(sizeof (vnodeops_t), KM_SLEEP);
2243 2244
2244 2245 (*actual)->vnop_name = name;
2245 2246
2246 2247 error = fs_build_vector(*actual, &unused_ops, vn_ops_table, templ);
2247 2248 if (error) {
2248 2249 kmem_free(*actual, sizeof (vnodeops_t));
2249 2250 }
2250 2251
2251 2252 #if DEBUG
2252 2253 if (unused_ops != 0)
2253 2254 cmn_err(CE_WARN, "vn_make_ops: %s: %d operations supplied "
2254 2255 "but not used", name, unused_ops);
2255 2256 #endif
2256 2257
2257 2258 return (error);
2258 2259 }
2259 2260
2260 2261 /*
2261 2262 * Free the vnodeops created as a result of vn_make_ops()
2262 2263 */
2263 2264 void
2264 2265 vn_freevnodeops(vnodeops_t *vnops)
2265 2266 {
2266 2267 kmem_free(vnops, sizeof (vnodeops_t));
2267 2268 }
2268 2269
2269 2270 /*
2270 2271 * Vnode cache.
2271 2272 */
2272 2273
2273 2274 /* ARGSUSED */
2274 2275 static int
2275 2276 vn_cache_constructor(void *buf, void *cdrarg, int kmflags)
2276 2277 {
2277 2278 struct vnode *vp;
2278 2279
2279 2280 vp = buf;
2280 2281
2281 2282 mutex_init(&vp->v_lock, NULL, MUTEX_DEFAULT, NULL);
2282 2283 mutex_init(&vp->v_vsd_lock, NULL, MUTEX_DEFAULT, NULL);
2283 2284 cv_init(&vp->v_cv, NULL, CV_DEFAULT, NULL);
2284 2285 rw_init(&vp->v_nbllock, NULL, RW_DEFAULT, NULL);
2285 2286 vp->v_femhead = NULL; /* Must be done before vn_reinit() */
2286 2287 vp->v_path = NULL;
2287 2288 vp->v_mpssdata = NULL;
2288 2289 vp->v_vsd = NULL;
2289 2290 vp->v_fopdata = NULL;
2290 2291
2291 2292 return (0);
2292 2293 }
2293 2294
2294 2295 /* ARGSUSED */
2295 2296 static void
2296 2297 vn_cache_destructor(void *buf, void *cdrarg)
2297 2298 {
2298 2299 struct vnode *vp;
2299 2300
2300 2301 vp = buf;
2301 2302
2302 2303 rw_destroy(&vp->v_nbllock);
2303 2304 cv_destroy(&vp->v_cv);
2304 2305 mutex_destroy(&vp->v_vsd_lock);
2305 2306 mutex_destroy(&vp->v_lock);
2306 2307 }
2307 2308
2308 2309 void
2309 2310 vn_create_cache(void)
2310 2311 {
2311 2312 /* LINTED */
2312 2313 ASSERT((1 << VNODE_ALIGN_LOG2) ==
2313 2314 P2ROUNDUP(sizeof (struct vnode), VNODE_ALIGN));
2314 2315 vn_cache = kmem_cache_create("vn_cache", sizeof (struct vnode),
2315 2316 VNODE_ALIGN, vn_cache_constructor, vn_cache_destructor, NULL, NULL,
2316 2317 NULL, 0);
2317 2318 }
2318 2319
2319 2320 void
2320 2321 vn_destroy_cache(void)
2321 2322 {
2322 2323 kmem_cache_destroy(vn_cache);
2323 2324 }
2324 2325
2325 2326 /*
2326 2327 * Used by file systems when fs-specific nodes (e.g., ufs inodes) are
2327 2328 * cached by the file system and vnodes remain associated.
2328 2329 */
2329 2330 void
2330 2331 vn_recycle(vnode_t *vp)
2331 2332 {
2332 2333 ASSERT(vp->v_pages == NULL);
2333 2334
2334 2335 /*
2335 2336 * XXX - This really belongs in vn_reinit(), but we have some issues
2336 2337 * with the counts. Best to have it here for clean initialization.
2337 2338 */
2338 2339 vp->v_rdcnt = 0;
2339 2340 vp->v_wrcnt = 0;
2340 2341 vp->v_mmap_read = 0;
2341 2342 vp->v_mmap_write = 0;
2342 2343
2343 2344 /*
2344 2345 * If FEM was in use, make sure everything gets cleaned up
2345 2346 * NOTE: vp->v_femhead is initialized to NULL in the vnode
2346 2347 * constructor.
2347 2348 */
2348 2349 if (vp->v_femhead) {
2349 2350 /* XXX - There should be a free_femhead() that does all this */
2350 2351 ASSERT(vp->v_femhead->femh_list == NULL);
2351 2352 mutex_destroy(&vp->v_femhead->femh_lock);
2352 2353 kmem_free(vp->v_femhead, sizeof (*(vp->v_femhead)));
2353 2354 vp->v_femhead = NULL;
2354 2355 }
2355 2356 if (vp->v_path) {
2356 2357 kmem_free(vp->v_path, strlen(vp->v_path) + 1);
2357 2358 vp->v_path = NULL;
2358 2359 }
2359 2360
2360 2361 if (vp->v_fopdata != NULL) {
2361 2362 free_fopdata(vp);
2362 2363 }
2363 2364 vp->v_mpssdata = NULL;
2364 2365 vsd_free(vp);
2365 2366 }
2366 2367
2367 2368 /*
2368 2369 * Used to reset the vnode fields including those that are directly accessible
2369 2370 * as well as those which require an accessor function.
2370 2371 *
2371 2372 * Does not initialize:
2372 2373 * synchronization objects: v_lock, v_vsd_lock, v_nbllock, v_cv
2373 2374 * v_data (since FS-nodes and vnodes point to each other and should
2374 2375 * be updated simultaneously)
2375 2376 * v_op (in case someone needs to make a VOP call on this object)
2376 2377 */
2377 2378 void
2378 2379 vn_reinit(vnode_t *vp)
2379 2380 {
2380 2381 vp->v_count = 1;
2381 2382 vp->v_count_dnlc = 0;
2382 2383 vp->v_vfsp = NULL;
2383 2384 vp->v_stream = NULL;
2384 2385 vp->v_vfsmountedhere = NULL;
2385 2386 vp->v_flag = 0;
2386 2387 vp->v_type = VNON;
2387 2388 vp->v_rdev = NODEV;
2388 2389
2389 2390 vp->v_filocks = NULL;
2390 2391 vp->v_shrlocks = NULL;
2391 2392 vp->v_pages = NULL;
2392 2393
2393 2394 vp->v_locality = NULL;
2394 2395 vp->v_xattrdir = NULL;
2395 2396
2396 2397 /* Handles v_femhead, v_path, and the r/w/map counts */
2397 2398 vn_recycle(vp);
2398 2399 }
2399 2400
2400 2401 vnode_t *
2401 2402 vn_alloc(int kmflag)
2402 2403 {
2403 2404 vnode_t *vp;
2404 2405
2405 2406 vp = kmem_cache_alloc(vn_cache, kmflag);
2406 2407
2407 2408 if (vp != NULL) {
2408 2409 vp->v_femhead = NULL; /* Must be done before vn_reinit() */
2409 2410 vp->v_fopdata = NULL;
2410 2411 vn_reinit(vp);
2411 2412 }
2412 2413
2413 2414 return (vp);
2414 2415 }
2415 2416
2416 2417 void
2417 2418 vn_free(vnode_t *vp)
2418 2419 {
2419 2420 ASSERT(vp->v_shrlocks == NULL);
2420 2421 ASSERT(vp->v_filocks == NULL);
2421 2422
2422 2423 /*
2423 2424 * Some file systems call vn_free() with v_count of zero,
2424 2425 * some with v_count of 1. In any case, the value should
2425 2426 * never be anything else.
2426 2427 */
2427 2428 ASSERT((vp->v_count == 0) || (vp->v_count == 1));
2428 2429 ASSERT(vp->v_count_dnlc == 0);
2429 2430 if (vp->v_path != NULL) {
2430 2431 kmem_free(vp->v_path, strlen(vp->v_path) + 1);
2431 2432 vp->v_path = NULL;
2432 2433 }
2433 2434
2434 2435 /* If FEM was in use, make sure everything gets cleaned up */
2435 2436 if (vp->v_femhead) {
2436 2437 /* XXX - There should be a free_femhead() that does all this */
2437 2438 ASSERT(vp->v_femhead->femh_list == NULL);
2438 2439 mutex_destroy(&vp->v_femhead->femh_lock);
2439 2440 kmem_free(vp->v_femhead, sizeof (*(vp->v_femhead)));
2440 2441 vp->v_femhead = NULL;
2441 2442 }
2442 2443
2443 2444 if (vp->v_fopdata != NULL) {
2444 2445 free_fopdata(vp);
2445 2446 }
2446 2447 vp->v_mpssdata = NULL;
2447 2448 vsd_free(vp);
2448 2449 kmem_cache_free(vn_cache, vp);
2449 2450 }
2450 2451
2451 2452 /*
2452 2453 * vnode status changes, should define better states than 1, 0.
2453 2454 */
2454 2455 void
2455 2456 vn_reclaim(vnode_t *vp)
2456 2457 {
2457 2458 vfs_t *vfsp = vp->v_vfsp;
2458 2459
2459 2460 if (vfsp == NULL ||
2460 2461 vfsp->vfs_implp == NULL || vfsp->vfs_femhead == NULL) {
2461 2462 return;
2462 2463 }
2463 2464 (void) VFS_VNSTATE(vfsp, vp, VNTRANS_RECLAIMED);
2464 2465 }
2465 2466
2466 2467 void
2467 2468 vn_idle(vnode_t *vp)
2468 2469 {
2469 2470 vfs_t *vfsp = vp->v_vfsp;
2470 2471
2471 2472 if (vfsp == NULL ||
2472 2473 vfsp->vfs_implp == NULL || vfsp->vfs_femhead == NULL) {
2473 2474 return;
2474 2475 }
2475 2476 (void) VFS_VNSTATE(vfsp, vp, VNTRANS_IDLED);
2476 2477 }
2477 2478 void
2478 2479 vn_exists(vnode_t *vp)
2479 2480 {
2480 2481 vfs_t *vfsp = vp->v_vfsp;
2481 2482
2482 2483 if (vfsp == NULL ||
2483 2484 vfsp->vfs_implp == NULL || vfsp->vfs_femhead == NULL) {
2484 2485 return;
2485 2486 }
2486 2487 (void) VFS_VNSTATE(vfsp, vp, VNTRANS_EXISTS);
2487 2488 }
2488 2489
2489 2490 void
2490 2491 vn_invalid(vnode_t *vp)
2491 2492 {
2492 2493 vfs_t *vfsp = vp->v_vfsp;
2493 2494
2494 2495 if (vfsp == NULL ||
2495 2496 vfsp->vfs_implp == NULL || vfsp->vfs_femhead == NULL) {
2496 2497 return;
2497 2498 }
2498 2499 (void) VFS_VNSTATE(vfsp, vp, VNTRANS_DESTROYED);
2499 2500 }
2500 2501
2501 2502 /* Vnode event notification */
2502 2503
2503 2504 int
2504 2505 vnevent_support(vnode_t *vp, caller_context_t *ct)
2505 2506 {
2506 2507 if (vp == NULL)
2507 2508 return (EINVAL);
2508 2509
2509 2510 return (VOP_VNEVENT(vp, VE_SUPPORT, NULL, NULL, ct));
2510 2511 }
2511 2512
2512 2513 void
2513 2514 vnevent_rename_src(vnode_t *vp, vnode_t *dvp, char *name, caller_context_t *ct)
2514 2515 {
2515 2516 if (vp == NULL || vp->v_femhead == NULL) {
2516 2517 return;
2517 2518 }
2518 2519 (void) VOP_VNEVENT(vp, VE_RENAME_SRC, dvp, name, ct);
2519 2520 }
2520 2521
2521 2522 void
2522 2523 vnevent_rename_dest(vnode_t *vp, vnode_t *dvp, char *name,
2523 2524 caller_context_t *ct)
2524 2525 {
2525 2526 if (vp == NULL || vp->v_femhead == NULL) {
2526 2527 return;
2527 2528 }
2528 2529 (void) VOP_VNEVENT(vp, VE_RENAME_DEST, dvp, name, ct);
2529 2530 }
2530 2531
2531 2532 void
2532 2533 vnevent_rename_dest_dir(vnode_t *vp, caller_context_t *ct)
2533 2534 {
2534 2535 if (vp == NULL || vp->v_femhead == NULL) {
2535 2536 return;
2536 2537 }
2537 2538 (void) VOP_VNEVENT(vp, VE_RENAME_DEST_DIR, NULL, NULL, ct);
2538 2539 }
2539 2540
2540 2541 void
2541 2542 vnevent_remove(vnode_t *vp, vnode_t *dvp, char *name, caller_context_t *ct)
2542 2543 {
2543 2544 if (vp == NULL || vp->v_femhead == NULL) {
2544 2545 return;
2545 2546 }
2546 2547 (void) VOP_VNEVENT(vp, VE_REMOVE, dvp, name, ct);
2547 2548 }
2548 2549
2549 2550 void
2550 2551 vnevent_rmdir(vnode_t *vp, vnode_t *dvp, char *name, caller_context_t *ct)
2551 2552 {
2552 2553 if (vp == NULL || vp->v_femhead == NULL) {
2553 2554 return;
2554 2555 }
2555 2556 (void) VOP_VNEVENT(vp, VE_RMDIR, dvp, name, ct);
2556 2557 }
2557 2558
2558 2559 void
2559 2560 vnevent_create(vnode_t *vp, caller_context_t *ct)
2560 2561 {
2561 2562 if (vp == NULL || vp->v_femhead == NULL) {
2562 2563 return;
2563 2564 }
2564 2565 (void) VOP_VNEVENT(vp, VE_CREATE, NULL, NULL, ct);
2565 2566 }
2566 2567
2567 2568 void
2568 2569 vnevent_link(vnode_t *vp, caller_context_t *ct)
2569 2570 {
2570 2571 if (vp == NULL || vp->v_femhead == NULL) {
2571 2572 return;
2572 2573 }
2573 2574 (void) VOP_VNEVENT(vp, VE_LINK, NULL, NULL, ct);
2574 2575 }
2575 2576
2576 2577 void
2577 2578 vnevent_mountedover(vnode_t *vp, caller_context_t *ct)
2578 2579 {
2579 2580 if (vp == NULL || vp->v_femhead == NULL) {
2580 2581 return;
2581 2582 }
2582 2583 (void) VOP_VNEVENT(vp, VE_MOUNTEDOVER, NULL, NULL, ct);
2583 2584 }
2584 2585
2585 2586 /*
2586 2587 * Vnode accessors.
2587 2588 */
2588 2589
2589 2590 int
2590 2591 vn_is_readonly(vnode_t *vp)
2591 2592 {
2592 2593 return (vp->v_vfsp->vfs_flag & VFS_RDONLY);
2593 2594 }
2594 2595
2595 2596 int
2596 2597 vn_has_flocks(vnode_t *vp)
2597 2598 {
2598 2599 return (vp->v_filocks != NULL);
2599 2600 }
2600 2601
2601 2602 int
2602 2603 vn_has_mandatory_locks(vnode_t *vp, int mode)
2603 2604 {
2604 2605 return ((vp->v_filocks != NULL) && (MANDLOCK(vp, mode)));
2605 2606 }
2606 2607
2607 2608 int
2608 2609 vn_has_cached_data(vnode_t *vp)
2609 2610 {
2610 2611 return (vp->v_pages != NULL);
2611 2612 }
2612 2613
2613 2614 /*
2614 2615 * Return 0 if the vnode in question shouldn't be permitted into a zone via
2615 2616 * zone_enter(2).
2616 2617 */
2617 2618 int
2618 2619 vn_can_change_zones(vnode_t *vp)
2619 2620 {
2620 2621 struct vfssw *vswp;
2621 2622 int allow = 1;
2622 2623 vnode_t *rvp;
2623 2624
2624 2625 if (nfs_global_client_only != 0)
2625 2626 return (1);
2626 2627
2627 2628 /*
2628 2629 * We always want to look at the underlying vnode if there is one.
2629 2630 */
2630 2631 if (VOP_REALVP(vp, &rvp, NULL) != 0)
2631 2632 rvp = vp;
2632 2633 /*
2633 2634 * Some pseudo filesystems (including doorfs) don't actually register
2634 2635 * their vfsops_t, so the following may return NULL; we happily let
2635 2636 * such vnodes switch zones.
2636 2637 */
2637 2638 vswp = vfs_getvfsswbyvfsops(vfs_getops(rvp->v_vfsp));
2638 2639 if (vswp != NULL) {
2639 2640 if (vswp->vsw_flag & VSW_NOTZONESAFE)
2640 2641 allow = 0;
2641 2642 vfs_unrefvfssw(vswp);
2642 2643 }
2643 2644 return (allow);
2644 2645 }
2645 2646
2646 2647 /*
2647 2648 * Return nonzero if the vnode is a mount point, zero if not.
2648 2649 */
2649 2650 int
2650 2651 vn_ismntpt(vnode_t *vp)
2651 2652 {
2652 2653 return (vp->v_vfsmountedhere != NULL);
2653 2654 }
2654 2655
2655 2656 /* Retrieve the vfs (if any) mounted on this vnode */
2656 2657 vfs_t *
2657 2658 vn_mountedvfs(vnode_t *vp)
2658 2659 {
2659 2660 return (vp->v_vfsmountedhere);
2660 2661 }
2661 2662
2662 2663 /*
2663 2664 * Return nonzero if the vnode is referenced by the dnlc, zero if not.
2664 2665 */
2665 2666 int
2666 2667 vn_in_dnlc(vnode_t *vp)
2667 2668 {
2668 2669 return (vp->v_count_dnlc > 0);
2669 2670 }
2670 2671
2671 2672 /*
2672 2673 * vn_has_other_opens() checks whether a particular file is opened by more than
2673 2674 * just the caller and whether the open is for read and/or write.
2674 2675 * This routine is for calling after the caller has already called VOP_OPEN()
2675 2676 * and the caller wishes to know if they are the only one with it open for
2676 2677 * the mode(s) specified.
2677 2678 *
2678 2679 * Vnode counts are only kept on regular files (v_type=VREG).
2679 2680 */
2680 2681 int
2681 2682 vn_has_other_opens(
2682 2683 vnode_t *vp,
2683 2684 v_mode_t mode)
2684 2685 {
2685 2686
2686 2687 ASSERT(vp != NULL);
2687 2688
2688 2689 switch (mode) {
2689 2690 case V_WRITE:
2690 2691 if (vp->v_wrcnt > 1)
2691 2692 return (V_TRUE);
2692 2693 break;
2693 2694 case V_RDORWR:
2694 2695 if ((vp->v_rdcnt > 1) || (vp->v_wrcnt > 1))
2695 2696 return (V_TRUE);
2696 2697 break;
2697 2698 case V_RDANDWR:
2698 2699 if ((vp->v_rdcnt > 1) && (vp->v_wrcnt > 1))
2699 2700 return (V_TRUE);
2700 2701 break;
2701 2702 case V_READ:
2702 2703 if (vp->v_rdcnt > 1)
2703 2704 return (V_TRUE);
2704 2705 break;
2705 2706 }
2706 2707
2707 2708 return (V_FALSE);
2708 2709 }
2709 2710
2710 2711 /*
2711 2712 * vn_is_opened() checks whether a particular file is opened and
2712 2713 * whether the open is for read and/or write.
2713 2714 *
2714 2715 * Vnode counts are only kept on regular files (v_type=VREG).
2715 2716 */
2716 2717 int
2717 2718 vn_is_opened(
2718 2719 vnode_t *vp,
2719 2720 v_mode_t mode)
2720 2721 {
2721 2722
2722 2723 ASSERT(vp != NULL);
2723 2724
2724 2725 switch (mode) {
2725 2726 case V_WRITE:
2726 2727 if (vp->v_wrcnt)
2727 2728 return (V_TRUE);
2728 2729 break;
2729 2730 case V_RDANDWR:
2730 2731 if (vp->v_rdcnt && vp->v_wrcnt)
2731 2732 return (V_TRUE);
2732 2733 break;
2733 2734 case V_RDORWR:
2734 2735 if (vp->v_rdcnt || vp->v_wrcnt)
2735 2736 return (V_TRUE);
2736 2737 break;
2737 2738 case V_READ:
2738 2739 if (vp->v_rdcnt)
2739 2740 return (V_TRUE);
2740 2741 break;
2741 2742 }
2742 2743
2743 2744 return (V_FALSE);
2744 2745 }
2745 2746
2746 2747 /*
2747 2748 * vn_is_mapped() checks whether a particular file is mapped and whether
2748 2749 * the file is mapped read and/or write.
2749 2750 */
2750 2751 int
2751 2752 vn_is_mapped(
2752 2753 vnode_t *vp,
2753 2754 v_mode_t mode)
2754 2755 {
2755 2756
2756 2757 ASSERT(vp != NULL);
2757 2758
2758 2759 #if !defined(_LP64)
2759 2760 switch (mode) {
2760 2761 /*
2761 2762 * The atomic_add_64_nv functions force atomicity in the
2762 2763 * case of 32 bit architectures. Otherwise the 64 bit values
2763 2764 * require two fetches. The value of the fields may be
2764 2765 * (potentially) changed between the first fetch and the
2765 2766 * second
2766 2767 */
2767 2768 case V_WRITE:
2768 2769 if (atomic_add_64_nv((&(vp->v_mmap_write)), 0))
2769 2770 return (V_TRUE);
2770 2771 break;
2771 2772 case V_RDANDWR:
2772 2773 if ((atomic_add_64_nv((&(vp->v_mmap_read)), 0)) &&
2773 2774 (atomic_add_64_nv((&(vp->v_mmap_write)), 0)))
2774 2775 return (V_TRUE);
2775 2776 break;
2776 2777 case V_RDORWR:
2777 2778 if ((atomic_add_64_nv((&(vp->v_mmap_read)), 0)) ||
2778 2779 (atomic_add_64_nv((&(vp->v_mmap_write)), 0)))
2779 2780 return (V_TRUE);
2780 2781 break;
2781 2782 case V_READ:
2782 2783 if (atomic_add_64_nv((&(vp->v_mmap_read)), 0))
2783 2784 return (V_TRUE);
2784 2785 break;
2785 2786 }
2786 2787 #else
2787 2788 switch (mode) {
2788 2789 case V_WRITE:
2789 2790 if (vp->v_mmap_write)
2790 2791 return (V_TRUE);
2791 2792 break;
2792 2793 case V_RDANDWR:
2793 2794 if (vp->v_mmap_read && vp->v_mmap_write)
2794 2795 return (V_TRUE);
2795 2796 break;
2796 2797 case V_RDORWR:
2797 2798 if (vp->v_mmap_read || vp->v_mmap_write)
2798 2799 return (V_TRUE);
2799 2800 break;
2800 2801 case V_READ:
2801 2802 if (vp->v_mmap_read)
2802 2803 return (V_TRUE);
2803 2804 break;
2804 2805 }
2805 2806 #endif
2806 2807
2807 2808 return (V_FALSE);
2808 2809 }
2809 2810
2810 2811 /*
2811 2812 * Set the operations vector for a vnode.
2812 2813 *
2813 2814 * FEM ensures that the v_femhead pointer is filled in before the
2814 2815 * v_op pointer is changed. This means that if the v_femhead pointer
2815 2816 * is NULL, and the v_op field hasn't changed since before which checked
2816 2817 * the v_femhead pointer; then our update is ok - we are not racing with
2817 2818 * FEM.
2818 2819 */
2819 2820 void
2820 2821 vn_setops(vnode_t *vp, vnodeops_t *vnodeops)
2821 2822 {
2822 2823 vnodeops_t *op;
2823 2824
2824 2825 ASSERT(vp != NULL);
2825 2826 ASSERT(vnodeops != NULL);
2826 2827
2827 2828 op = vp->v_op;
2828 2829 membar_consumer();
2829 2830 /*
2830 2831 * If vp->v_femhead == NULL, then we'll call casptr() to do the
2831 2832 * compare-and-swap on vp->v_op. If either fails, then FEM is
2832 2833 * in effect on the vnode and we need to have FEM deal with it.
2833 2834 */
2834 2835 if (vp->v_femhead != NULL || casptr(&vp->v_op, op, vnodeops) != op) {
2835 2836 fem_setvnops(vp, vnodeops);
2836 2837 }
2837 2838 }
2838 2839
2839 2840 /*
2840 2841 * Retrieve the operations vector for a vnode
2841 2842 * As with vn_setops(above); make sure we aren't racing with FEM.
2842 2843 * FEM sets the v_op to a special, internal, vnodeops that wouldn't
2843 2844 * make sense to the callers of this routine.
2844 2845 */
2845 2846 vnodeops_t *
2846 2847 vn_getops(vnode_t *vp)
2847 2848 {
2848 2849 vnodeops_t *op;
2849 2850
2850 2851 ASSERT(vp != NULL);
2851 2852
2852 2853 op = vp->v_op;
2853 2854 membar_consumer();
2854 2855 if (vp->v_femhead == NULL && op == vp->v_op) {
2855 2856 return (op);
2856 2857 } else {
2857 2858 return (fem_getvnops(vp));
2858 2859 }
2859 2860 }
2860 2861
2861 2862 /*
2862 2863 * Returns non-zero (1) if the vnodeops matches that of the vnode.
2863 2864 * Returns zero (0) if not.
2864 2865 */
2865 2866 int
2866 2867 vn_matchops(vnode_t *vp, vnodeops_t *vnodeops)
2867 2868 {
2868 2869 return (vn_getops(vp) == vnodeops);
2869 2870 }
2870 2871
2871 2872 /*
2872 2873 * Returns non-zero (1) if the specified operation matches the
2873 2874 * corresponding operation for that the vnode.
2874 2875 * Returns zero (0) if not.
2875 2876 */
2876 2877
2877 2878 #define MATCHNAME(n1, n2) (((n1)[0] == (n2)[0]) && (strcmp((n1), (n2)) == 0))
2878 2879
2879 2880 int
2880 2881 vn_matchopval(vnode_t *vp, char *vopname, fs_generic_func_p funcp)
2881 2882 {
2882 2883 const fs_operation_trans_def_t *otdp;
2883 2884 fs_generic_func_p *loc = NULL;
2884 2885 vnodeops_t *vop = vn_getops(vp);
2885 2886
2886 2887 ASSERT(vopname != NULL);
2887 2888
2888 2889 for (otdp = vn_ops_table; otdp->name != NULL; otdp++) {
2889 2890 if (MATCHNAME(otdp->name, vopname)) {
2890 2891 loc = (fs_generic_func_p *)
2891 2892 ((char *)(vop) + otdp->offset);
2892 2893 break;
2893 2894 }
2894 2895 }
2895 2896
2896 2897 return ((loc != NULL) && (*loc == funcp));
2897 2898 }
2898 2899
2899 2900 /*
2900 2901 * fs_new_caller_id() needs to return a unique ID on a given local system.
2901 2902 * The IDs do not need to survive across reboots. These are primarily
2902 2903 * used so that (FEM) monitors can detect particular callers (such as
2903 2904 * the NFS server) to a given vnode/vfs operation.
2904 2905 */
2905 2906 u_longlong_t
2906 2907 fs_new_caller_id()
2907 2908 {
2908 2909 static uint64_t next_caller_id = 0LL; /* First call returns 1 */
2909 2910
2910 2911 return ((u_longlong_t)atomic_add_64_nv(&next_caller_id, 1));
2911 2912 }
2912 2913
2913 2914 /*
2914 2915 * Given a starting vnode and a path, updates the path in the target vnode in
2915 2916 * a safe manner. If the vnode already has path information embedded, then the
2916 2917 * cached path is left untouched.
2917 2918 */
2918 2919
2919 2920 size_t max_vnode_path = 4 * MAXPATHLEN;
2920 2921
2921 2922 void
2922 2923 vn_setpath(vnode_t *rootvp, struct vnode *startvp, struct vnode *vp,
2923 2924 const char *path, size_t plen)
2924 2925 {
2925 2926 char *rpath;
2926 2927 vnode_t *base;
2927 2928 size_t rpathlen, rpathalloc;
2928 2929 int doslash = 1;
2929 2930
2930 2931 if (*path == '/') {
2931 2932 base = rootvp;
2932 2933 path++;
2933 2934 plen--;
2934 2935 } else {
2935 2936 base = startvp;
2936 2937 }
2937 2938
2938 2939 /*
2939 2940 * We cannot grab base->v_lock while we hold vp->v_lock because of
2940 2941 * the potential for deadlock.
2941 2942 */
2942 2943 mutex_enter(&base->v_lock);
2943 2944 if (base->v_path == NULL) {
2944 2945 mutex_exit(&base->v_lock);
2945 2946 return;
2946 2947 }
2947 2948
2948 2949 rpathlen = strlen(base->v_path);
2949 2950 rpathalloc = rpathlen + plen + 1;
2950 2951 /* Avoid adding a slash if there's already one there */
2951 2952 if (base->v_path[rpathlen-1] == '/')
2952 2953 doslash = 0;
2953 2954 else
2954 2955 rpathalloc++;
2955 2956
2956 2957 /*
2957 2958 * We don't want to call kmem_alloc(KM_SLEEP) with kernel locks held,
2958 2959 * so we must do this dance. If, by chance, something changes the path,
2959 2960 * just give up since there is no real harm.
2960 2961 */
2961 2962 mutex_exit(&base->v_lock);
2962 2963
2963 2964 /* Paths should stay within reason */
2964 2965 if (rpathalloc > max_vnode_path)
2965 2966 return;
2966 2967
2967 2968 rpath = kmem_alloc(rpathalloc, KM_SLEEP);
2968 2969
2969 2970 mutex_enter(&base->v_lock);
2970 2971 if (base->v_path == NULL || strlen(base->v_path) != rpathlen) {
2971 2972 mutex_exit(&base->v_lock);
2972 2973 kmem_free(rpath, rpathalloc);
2973 2974 return;
2974 2975 }
2975 2976 bcopy(base->v_path, rpath, rpathlen);
2976 2977 mutex_exit(&base->v_lock);
2977 2978
2978 2979 if (doslash)
2979 2980 rpath[rpathlen++] = '/';
2980 2981 bcopy(path, rpath + rpathlen, plen);
2981 2982 rpath[rpathlen + plen] = '\0';
2982 2983
2983 2984 mutex_enter(&vp->v_lock);
2984 2985 if (vp->v_path != NULL) {
2985 2986 mutex_exit(&vp->v_lock);
2986 2987 kmem_free(rpath, rpathalloc);
2987 2988 } else {
2988 2989 vp->v_path = rpath;
2989 2990 mutex_exit(&vp->v_lock);
2990 2991 }
2991 2992 }
2992 2993
2993 2994 /*
2994 2995 * Sets the path to the vnode to be the given string, regardless of current
2995 2996 * context. The string must be a complete path from rootdir. This is only used
2996 2997 * by fsop_root() for setting the path based on the mountpoint.
2997 2998 */
2998 2999 void
2999 3000 vn_setpath_str(struct vnode *vp, const char *str, size_t len)
3000 3001 {
3001 3002 char *buf = kmem_alloc(len + 1, KM_SLEEP);
3002 3003
3003 3004 mutex_enter(&vp->v_lock);
3004 3005 if (vp->v_path != NULL) {
3005 3006 mutex_exit(&vp->v_lock);
3006 3007 kmem_free(buf, len + 1);
3007 3008 return;
3008 3009 }
3009 3010
3010 3011 vp->v_path = buf;
3011 3012 bcopy(str, vp->v_path, len);
3012 3013 vp->v_path[len] = '\0';
3013 3014
3014 3015 mutex_exit(&vp->v_lock);
3015 3016 }
3016 3017
3017 3018 /*
3018 3019 * Called from within filesystem's vop_rename() to handle renames once the
3019 3020 * target vnode is available.
3020 3021 */
3021 3022 void
3022 3023 vn_renamepath(vnode_t *dvp, vnode_t *vp, const char *nm, size_t len)
3023 3024 {
3024 3025 char *tmp;
3025 3026
3026 3027 mutex_enter(&vp->v_lock);
3027 3028 tmp = vp->v_path;
3028 3029 vp->v_path = NULL;
3029 3030 mutex_exit(&vp->v_lock);
3030 3031 vn_setpath(rootdir, dvp, vp, nm, len);
3031 3032 if (tmp != NULL)
3032 3033 kmem_free(tmp, strlen(tmp) + 1);
3033 3034 }
3034 3035
3035 3036 /*
3036 3037 * Similar to vn_setpath_str(), this function sets the path of the destination
3037 3038 * vnode to the be the same as the source vnode.
3038 3039 */
3039 3040 void
3040 3041 vn_copypath(struct vnode *src, struct vnode *dst)
3041 3042 {
3042 3043 char *buf;
3043 3044 int alloc;
3044 3045
3045 3046 mutex_enter(&src->v_lock);
3046 3047 if (src->v_path == NULL) {
3047 3048 mutex_exit(&src->v_lock);
3048 3049 return;
3049 3050 }
3050 3051 alloc = strlen(src->v_path) + 1;
3051 3052
3052 3053 /* avoid kmem_alloc() with lock held */
3053 3054 mutex_exit(&src->v_lock);
3054 3055 buf = kmem_alloc(alloc, KM_SLEEP);
3055 3056 mutex_enter(&src->v_lock);
3056 3057 if (src->v_path == NULL || strlen(src->v_path) + 1 != alloc) {
3057 3058 mutex_exit(&src->v_lock);
3058 3059 kmem_free(buf, alloc);
3059 3060 return;
3060 3061 }
3061 3062 bcopy(src->v_path, buf, alloc);
3062 3063 mutex_exit(&src->v_lock);
3063 3064
3064 3065 mutex_enter(&dst->v_lock);
3065 3066 if (dst->v_path != NULL) {
3066 3067 mutex_exit(&dst->v_lock);
3067 3068 kmem_free(buf, alloc);
3068 3069 return;
3069 3070 }
3070 3071 dst->v_path = buf;
3071 3072 mutex_exit(&dst->v_lock);
3072 3073 }
3073 3074
3074 3075 /*
3075 3076 * XXX Private interface for segvn routines that handle vnode
3076 3077 * large page segments.
3077 3078 *
3078 3079 * return 1 if vp's file system VOP_PAGEIO() implementation
3079 3080 * can be safely used instead of VOP_GETPAGE() for handling
3080 3081 * pagefaults against regular non swap files. VOP_PAGEIO()
3081 3082 * interface is considered safe here if its implementation
3082 3083 * is very close to VOP_GETPAGE() implementation.
3083 3084 * e.g. It zero's out the part of the page beyond EOF. Doesn't
3084 3085 * panic if there're file holes but instead returns an error.
3085 3086 * Doesn't assume file won't be changed by user writes, etc.
3086 3087 *
3087 3088 * return 0 otherwise.
3088 3089 *
3089 3090 * For now allow segvn to only use VOP_PAGEIO() with ufs and nfs.
3090 3091 */
3091 3092 int
3092 3093 vn_vmpss_usepageio(vnode_t *vp)
3093 3094 {
3094 3095 vfs_t *vfsp = vp->v_vfsp;
3095 3096 char *fsname = vfssw[vfsp->vfs_fstype].vsw_name;
3096 3097 char *pageio_ok_fss[] = {"ufs", "nfs", NULL};
3097 3098 char **fsok = pageio_ok_fss;
3098 3099
3099 3100 if (fsname == NULL) {
3100 3101 return (0);
3101 3102 }
3102 3103
3103 3104 for (; *fsok; fsok++) {
3104 3105 if (strcmp(*fsok, fsname) == 0) {
3105 3106 return (1);
3106 3107 }
3107 3108 }
3108 3109 return (0);
3109 3110 }
3110 3111
3111 3112 /* VOP_XXX() macros call the corresponding fop_xxx() function */
3112 3113
3113 3114 int
3114 3115 fop_open(
3115 3116 vnode_t **vpp,
3116 3117 int mode,
3117 3118 cred_t *cr,
3118 3119 caller_context_t *ct)
3119 3120 {
3120 3121 int ret;
3121 3122 vnode_t *vp = *vpp;
3122 3123
3123 3124 VN_HOLD(vp);
3124 3125 /*
3125 3126 * Adding to the vnode counts before calling open
3126 3127 * avoids the need for a mutex. It circumvents a race
3127 3128 * condition where a query made on the vnode counts results in a
3128 3129 * false negative. The inquirer goes away believing the file is
3129 3130 * not open when there is an open on the file already under way.
3130 3131 *
3131 3132 * The counts are meant to prevent NFS from granting a delegation
3132 3133 * when it would be dangerous to do so.
3133 3134 *
↓ open down ↓ |
3056 lines elided |
↑ open up ↑ |
3134 3135 * The vnode counts are only kept on regular files
3135 3136 */
3136 3137 if ((*vpp)->v_type == VREG) {
3137 3138 if (mode & FREAD)
3138 3139 atomic_add_32(&((*vpp)->v_rdcnt), 1);
3139 3140 if (mode & FWRITE)
3140 3141 atomic_add_32(&((*vpp)->v_wrcnt), 1);
3141 3142 }
3142 3143
3143 3144 VOPXID_MAP_CR(vp, cr);
3145 +
3146 + /*
3147 + * Control is passed to fsh. In the end, underlying vop_vopen()
3148 + * is called.
3149 + */
3150 + ret = fsh_open(vpp, mode, cr, ct);
3144 3151
3145 - ret = (*(*(vpp))->v_op->vop_open)(vpp, mode, cr, ct);
3146 -
3147 3152 if (ret) {
3148 3153 /*
3149 3154 * Use the saved vp just in case the vnode ptr got trashed
3150 3155 * by the error.
3151 3156 */
3152 3157 VOPSTATS_UPDATE(vp, open);
3153 3158 if ((vp->v_type == VREG) && (mode & FREAD))
3154 3159 atomic_add_32(&(vp->v_rdcnt), -1);
3155 3160 if ((vp->v_type == VREG) && (mode & FWRITE))
3156 3161 atomic_add_32(&(vp->v_wrcnt), -1);
3157 3162 } else {
3158 3163 /*
3159 3164 * Some filesystems will return a different vnode,
3160 3165 * but the same path was still used to open it.
3161 3166 * So if we do change the vnode and need to
3162 3167 * copy over the path, do so here, rather than special
3163 3168 * casing each filesystem. Adjust the vnode counts to
3164 3169 * reflect the vnode switch.
3165 3170 */
3166 3171 VOPSTATS_UPDATE(*vpp, open);
3167 3172 if (*vpp != vp && *vpp != NULL) {
3168 3173 vn_copypath(vp, *vpp);
3169 3174 if (((*vpp)->v_type == VREG) && (mode & FREAD))
3170 3175 atomic_add_32(&((*vpp)->v_rdcnt), 1);
3171 3176 if ((vp->v_type == VREG) && (mode & FREAD))
3172 3177 atomic_add_32(&(vp->v_rdcnt), -1);
3173 3178 if (((*vpp)->v_type == VREG) && (mode & FWRITE))
3174 3179 atomic_add_32(&((*vpp)->v_wrcnt), 1);
3175 3180 if ((vp->v_type == VREG) && (mode & FWRITE))
3176 3181 atomic_add_32(&(vp->v_wrcnt), -1);
3177 3182 }
3178 3183 }
3179 3184 VN_RELE(vp);
3180 3185 return (ret);
3181 3186 }
3182 3187
3183 3188 int
3184 3189 fop_close(
↓ open down ↓ |
28 lines elided |
↑ open up ↑ |
3185 3190 vnode_t *vp,
3186 3191 int flag,
3187 3192 int count,
3188 3193 offset_t offset,
3189 3194 cred_t *cr,
3190 3195 caller_context_t *ct)
3191 3196 {
3192 3197 int err;
3193 3198
3194 3199 VOPXID_MAP_CR(vp, cr);
3195 -
3196 - err = (*(vp)->v_op->vop_close)(vp, flag, count, offset, cr, ct);
3200 +
3201 + err = fsh_close(vp, flag, count, offset, cr, ct);
3197 3202 VOPSTATS_UPDATE(vp, close);
3198 3203 /*
3199 3204 * Check passed in count to handle possible dups. Vnode counts are only
3200 3205 * kept on regular files
3201 3206 */
3202 3207 if ((vp->v_type == VREG) && (count == 1)) {
3203 3208 if (flag & FREAD) {
3204 3209 ASSERT(vp->v_rdcnt > 0);
3205 3210 atomic_add_32(&(vp->v_rdcnt), -1);
3206 3211 }
3207 3212 if (flag & FWRITE) {
3208 3213 ASSERT(vp->v_wrcnt > 0);
3209 3214 atomic_add_32(&(vp->v_wrcnt), -1);
3210 3215 }
3211 3216 }
3212 3217 return (err);
3213 3218 }
3214 3219
3215 3220 int
3216 3221 fop_read(
↓ open down ↓ |
10 lines elided |
↑ open up ↑ |
3217 3222 vnode_t *vp,
3218 3223 uio_t *uiop,
3219 3224 int ioflag,
3220 3225 cred_t *cr,
3221 3226 caller_context_t *ct)
3222 3227 {
3223 3228 int err;
3224 3229 ssize_t resid_start = uiop->uio_resid;
3225 3230
3226 3231 VOPXID_MAP_CR(vp, cr);
3227 -
3228 - err = (*(vp)->v_op->vop_read)(vp, uiop, ioflag, cr, ct);
3232 +
3233 + err = fsh_read(vp, uiop, ioflag, cr, ct);
3229 3234 VOPSTATS_UPDATE_IO(vp, read,
3230 3235 read_bytes, (resid_start - uiop->uio_resid));
3231 3236 return (err);
3232 3237 }
3233 3238
3234 3239 int
3235 3240 fop_write(
3236 3241 vnode_t *vp,
3237 3242 uio_t *uiop,
3238 3243 int ioflag,
3239 3244 cred_t *cr,
3240 3245 caller_context_t *ct)
3241 3246 {
3242 3247 int err;
3243 3248 ssize_t resid_start = uiop->uio_resid;
3244 3249
3245 3250 VOPXID_MAP_CR(vp, cr);
3246 3251
3247 - err = (*(vp)->v_op->vop_write)(vp, uiop, ioflag, cr, ct);
3252 + err = fsh_write(vp, uiop, ioflag, cr, ct);
3248 3253 VOPSTATS_UPDATE_IO(vp, write,
3249 3254 write_bytes, (resid_start - uiop->uio_resid));
3250 3255 return (err);
3251 3256 }
3252 3257
3253 3258 int
3254 3259 fop_ioctl(
3255 3260 vnode_t *vp,
3256 3261 int cmd,
3257 3262 intptr_t arg,
3258 3263 int flag,
3259 3264 cred_t *cr,
3260 3265 int *rvalp,
3261 3266 caller_context_t *ct)
3262 3267 {
3263 3268 int err;
3264 3269
3265 3270 VOPXID_MAP_CR(vp, cr);
3266 3271
3267 3272 err = (*(vp)->v_op->vop_ioctl)(vp, cmd, arg, flag, cr, rvalp, ct);
3268 3273 VOPSTATS_UPDATE(vp, ioctl);
3269 3274 return (err);
3270 3275 }
3271 3276
3272 3277 int
3273 3278 fop_setfl(
3274 3279 vnode_t *vp,
3275 3280 int oflags,
3276 3281 int nflags,
3277 3282 cred_t *cr,
3278 3283 caller_context_t *ct)
3279 3284 {
3280 3285 int err;
3281 3286
3282 3287 VOPXID_MAP_CR(vp, cr);
3283 3288
3284 3289 err = (*(vp)->v_op->vop_setfl)(vp, oflags, nflags, cr, ct);
3285 3290 VOPSTATS_UPDATE(vp, setfl);
3286 3291 return (err);
3287 3292 }
3288 3293
3289 3294 int
3290 3295 fop_getattr(
3291 3296 vnode_t *vp,
3292 3297 vattr_t *vap,
3293 3298 int flags,
3294 3299 cred_t *cr,
3295 3300 caller_context_t *ct)
3296 3301 {
3297 3302 int err;
3298 3303
3299 3304 VOPXID_MAP_CR(vp, cr);
3300 3305
3301 3306 /*
3302 3307 * If this file system doesn't understand the xvattr extensions
3303 3308 * then turn off the xvattr bit.
3304 3309 */
3305 3310 if (vfs_has_feature(vp->v_vfsp, VFSFT_XVATTR) == 0) {
3306 3311 vap->va_mask &= ~AT_XVATTR;
3307 3312 }
3308 3313
3309 3314 /*
3310 3315 * We're only allowed to skip the ACL check iff we used a 32 bit
3311 3316 * ACE mask with VOP_ACCESS() to determine permissions.
3312 3317 */
3313 3318 if ((flags & ATTR_NOACLCHECK) &&
3314 3319 vfs_has_feature(vp->v_vfsp, VFSFT_ACEMASKONACCESS) == 0) {
3315 3320 return (EINVAL);
3316 3321 }
3317 3322 err = (*(vp)->v_op->vop_getattr)(vp, vap, flags, cr, ct);
3318 3323 VOPSTATS_UPDATE(vp, getattr);
3319 3324 return (err);
3320 3325 }
3321 3326
3322 3327 int
3323 3328 fop_setattr(
3324 3329 vnode_t *vp,
3325 3330 vattr_t *vap,
3326 3331 int flags,
3327 3332 cred_t *cr,
3328 3333 caller_context_t *ct)
3329 3334 {
3330 3335 int err;
3331 3336
3332 3337 VOPXID_MAP_CR(vp, cr);
3333 3338
3334 3339 /*
3335 3340 * If this file system doesn't understand the xvattr extensions
3336 3341 * then turn off the xvattr bit.
3337 3342 */
3338 3343 if (vfs_has_feature(vp->v_vfsp, VFSFT_XVATTR) == 0) {
3339 3344 vap->va_mask &= ~AT_XVATTR;
3340 3345 }
3341 3346
3342 3347 /*
3343 3348 * We're only allowed to skip the ACL check iff we used a 32 bit
3344 3349 * ACE mask with VOP_ACCESS() to determine permissions.
3345 3350 */
3346 3351 if ((flags & ATTR_NOACLCHECK) &&
3347 3352 vfs_has_feature(vp->v_vfsp, VFSFT_ACEMASKONACCESS) == 0) {
3348 3353 return (EINVAL);
3349 3354 }
3350 3355 err = (*(vp)->v_op->vop_setattr)(vp, vap, flags, cr, ct);
3351 3356 VOPSTATS_UPDATE(vp, setattr);
3352 3357 return (err);
3353 3358 }
3354 3359
3355 3360 int
3356 3361 fop_access(
3357 3362 vnode_t *vp,
3358 3363 int mode,
3359 3364 int flags,
3360 3365 cred_t *cr,
3361 3366 caller_context_t *ct)
3362 3367 {
3363 3368 int err;
3364 3369
3365 3370 if ((flags & V_ACE_MASK) &&
3366 3371 vfs_has_feature(vp->v_vfsp, VFSFT_ACEMASKONACCESS) == 0) {
3367 3372 return (EINVAL);
3368 3373 }
3369 3374
3370 3375 VOPXID_MAP_CR(vp, cr);
3371 3376
3372 3377 err = (*(vp)->v_op->vop_access)(vp, mode, flags, cr, ct);
3373 3378 VOPSTATS_UPDATE(vp, access);
3374 3379 return (err);
3375 3380 }
3376 3381
3377 3382 int
3378 3383 fop_lookup(
3379 3384 vnode_t *dvp,
3380 3385 char *nm,
3381 3386 vnode_t **vpp,
3382 3387 pathname_t *pnp,
3383 3388 int flags,
3384 3389 vnode_t *rdir,
3385 3390 cred_t *cr,
3386 3391 caller_context_t *ct,
3387 3392 int *deflags, /* Returned per-dirent flags */
3388 3393 pathname_t *ppnp) /* Returned case-preserved name in directory */
3389 3394 {
3390 3395 int ret;
3391 3396
3392 3397 /*
3393 3398 * If this file system doesn't support case-insensitive access
3394 3399 * and said access is requested, fail quickly. It is required
3395 3400 * that if the vfs supports case-insensitive lookup, it also
3396 3401 * supports extended dirent flags.
3397 3402 */
3398 3403 if (flags & FIGNORECASE &&
3399 3404 (vfs_has_feature(dvp->v_vfsp, VFSFT_CASEINSENSITIVE) == 0 &&
3400 3405 vfs_has_feature(dvp->v_vfsp, VFSFT_NOCASESENSITIVE) == 0))
3401 3406 return (EINVAL);
3402 3407
3403 3408 VOPXID_MAP_CR(dvp, cr);
3404 3409
3405 3410 if ((flags & LOOKUP_XATTR) && (flags & LOOKUP_HAVE_SYSATTR_DIR) == 0) {
3406 3411 ret = xattr_dir_lookup(dvp, vpp, flags, cr);
3407 3412 } else {
3408 3413 ret = (*(dvp)->v_op->vop_lookup)
3409 3414 (dvp, nm, vpp, pnp, flags, rdir, cr, ct, deflags, ppnp);
3410 3415 }
3411 3416 if (ret == 0 && *vpp) {
3412 3417 VOPSTATS_UPDATE(*vpp, lookup);
3413 3418 if ((*vpp)->v_path == NULL) {
3414 3419 vn_setpath(rootdir, dvp, *vpp, nm, strlen(nm));
3415 3420 }
3416 3421 }
3417 3422
3418 3423 return (ret);
3419 3424 }
3420 3425
3421 3426 int
3422 3427 fop_create(
3423 3428 vnode_t *dvp,
3424 3429 char *name,
3425 3430 vattr_t *vap,
3426 3431 vcexcl_t excl,
3427 3432 int mode,
3428 3433 vnode_t **vpp,
3429 3434 cred_t *cr,
3430 3435 int flags,
3431 3436 caller_context_t *ct,
3432 3437 vsecattr_t *vsecp) /* ACL to set during create */
3433 3438 {
3434 3439 int ret;
3435 3440
3436 3441 if (vsecp != NULL &&
3437 3442 vfs_has_feature(dvp->v_vfsp, VFSFT_ACLONCREATE) == 0) {
3438 3443 return (EINVAL);
3439 3444 }
3440 3445 /*
3441 3446 * If this file system doesn't support case-insensitive access
3442 3447 * and said access is requested, fail quickly.
3443 3448 */
3444 3449 if (flags & FIGNORECASE &&
3445 3450 (vfs_has_feature(dvp->v_vfsp, VFSFT_CASEINSENSITIVE) == 0 &&
3446 3451 vfs_has_feature(dvp->v_vfsp, VFSFT_NOCASESENSITIVE) == 0))
3447 3452 return (EINVAL);
3448 3453
3449 3454 VOPXID_MAP_CR(dvp, cr);
3450 3455
3451 3456 ret = (*(dvp)->v_op->vop_create)
3452 3457 (dvp, name, vap, excl, mode, vpp, cr, flags, ct, vsecp);
3453 3458 if (ret == 0 && *vpp) {
3454 3459 VOPSTATS_UPDATE(*vpp, create);
3455 3460 if ((*vpp)->v_path == NULL) {
3456 3461 vn_setpath(rootdir, dvp, *vpp, name, strlen(name));
3457 3462 }
3458 3463 }
3459 3464
3460 3465 return (ret);
3461 3466 }
3462 3467
3463 3468 int
3464 3469 fop_remove(
3465 3470 vnode_t *dvp,
3466 3471 char *nm,
3467 3472 cred_t *cr,
3468 3473 caller_context_t *ct,
3469 3474 int flags)
3470 3475 {
3471 3476 int err;
3472 3477
3473 3478 /*
3474 3479 * If this file system doesn't support case-insensitive access
3475 3480 * and said access is requested, fail quickly.
3476 3481 */
3477 3482 if (flags & FIGNORECASE &&
3478 3483 (vfs_has_feature(dvp->v_vfsp, VFSFT_CASEINSENSITIVE) == 0 &&
3479 3484 vfs_has_feature(dvp->v_vfsp, VFSFT_NOCASESENSITIVE) == 0))
3480 3485 return (EINVAL);
3481 3486
3482 3487 VOPXID_MAP_CR(dvp, cr);
3483 3488
3484 3489 err = (*(dvp)->v_op->vop_remove)(dvp, nm, cr, ct, flags);
3485 3490 VOPSTATS_UPDATE(dvp, remove);
3486 3491 return (err);
3487 3492 }
3488 3493
3489 3494 int
3490 3495 fop_link(
3491 3496 vnode_t *tdvp,
3492 3497 vnode_t *svp,
3493 3498 char *tnm,
3494 3499 cred_t *cr,
3495 3500 caller_context_t *ct,
3496 3501 int flags)
3497 3502 {
3498 3503 int err;
3499 3504
3500 3505 /*
3501 3506 * If the target file system doesn't support case-insensitive access
3502 3507 * and said access is requested, fail quickly.
3503 3508 */
3504 3509 if (flags & FIGNORECASE &&
3505 3510 (vfs_has_feature(tdvp->v_vfsp, VFSFT_CASEINSENSITIVE) == 0 &&
3506 3511 vfs_has_feature(tdvp->v_vfsp, VFSFT_NOCASESENSITIVE) == 0))
3507 3512 return (EINVAL);
3508 3513
3509 3514 VOPXID_MAP_CR(tdvp, cr);
3510 3515
3511 3516 err = (*(tdvp)->v_op->vop_link)(tdvp, svp, tnm, cr, ct, flags);
3512 3517 VOPSTATS_UPDATE(tdvp, link);
3513 3518 return (err);
3514 3519 }
3515 3520
3516 3521 int
3517 3522 fop_rename(
3518 3523 vnode_t *sdvp,
3519 3524 char *snm,
3520 3525 vnode_t *tdvp,
3521 3526 char *tnm,
3522 3527 cred_t *cr,
3523 3528 caller_context_t *ct,
3524 3529 int flags)
3525 3530 {
3526 3531 int err;
3527 3532
3528 3533 /*
3529 3534 * If the file system involved does not support
3530 3535 * case-insensitive access and said access is requested, fail
3531 3536 * quickly.
3532 3537 */
3533 3538 if (flags & FIGNORECASE &&
3534 3539 ((vfs_has_feature(sdvp->v_vfsp, VFSFT_CASEINSENSITIVE) == 0 &&
3535 3540 vfs_has_feature(sdvp->v_vfsp, VFSFT_NOCASESENSITIVE) == 0)))
3536 3541 return (EINVAL);
3537 3542
3538 3543 VOPXID_MAP_CR(tdvp, cr);
3539 3544
3540 3545 err = (*(sdvp)->v_op->vop_rename)(sdvp, snm, tdvp, tnm, cr, ct, flags);
3541 3546 VOPSTATS_UPDATE(sdvp, rename);
3542 3547 return (err);
3543 3548 }
3544 3549
3545 3550 int
3546 3551 fop_mkdir(
3547 3552 vnode_t *dvp,
3548 3553 char *dirname,
3549 3554 vattr_t *vap,
3550 3555 vnode_t **vpp,
3551 3556 cred_t *cr,
3552 3557 caller_context_t *ct,
3553 3558 int flags,
3554 3559 vsecattr_t *vsecp) /* ACL to set during create */
3555 3560 {
3556 3561 int ret;
3557 3562
3558 3563 if (vsecp != NULL &&
3559 3564 vfs_has_feature(dvp->v_vfsp, VFSFT_ACLONCREATE) == 0) {
3560 3565 return (EINVAL);
3561 3566 }
3562 3567 /*
3563 3568 * If this file system doesn't support case-insensitive access
3564 3569 * and said access is requested, fail quickly.
3565 3570 */
3566 3571 if (flags & FIGNORECASE &&
3567 3572 (vfs_has_feature(dvp->v_vfsp, VFSFT_CASEINSENSITIVE) == 0 &&
3568 3573 vfs_has_feature(dvp->v_vfsp, VFSFT_NOCASESENSITIVE) == 0))
3569 3574 return (EINVAL);
3570 3575
3571 3576 VOPXID_MAP_CR(dvp, cr);
3572 3577
3573 3578 ret = (*(dvp)->v_op->vop_mkdir)
3574 3579 (dvp, dirname, vap, vpp, cr, ct, flags, vsecp);
3575 3580 if (ret == 0 && *vpp) {
3576 3581 VOPSTATS_UPDATE(*vpp, mkdir);
3577 3582 if ((*vpp)->v_path == NULL) {
3578 3583 vn_setpath(rootdir, dvp, *vpp, dirname,
3579 3584 strlen(dirname));
3580 3585 }
3581 3586 }
3582 3587
3583 3588 return (ret);
3584 3589 }
3585 3590
3586 3591 int
3587 3592 fop_rmdir(
3588 3593 vnode_t *dvp,
3589 3594 char *nm,
3590 3595 vnode_t *cdir,
3591 3596 cred_t *cr,
3592 3597 caller_context_t *ct,
3593 3598 int flags)
3594 3599 {
3595 3600 int err;
3596 3601
3597 3602 /*
3598 3603 * If this file system doesn't support case-insensitive access
3599 3604 * and said access is requested, fail quickly.
3600 3605 */
3601 3606 if (flags & FIGNORECASE &&
3602 3607 (vfs_has_feature(dvp->v_vfsp, VFSFT_CASEINSENSITIVE) == 0 &&
3603 3608 vfs_has_feature(dvp->v_vfsp, VFSFT_NOCASESENSITIVE) == 0))
3604 3609 return (EINVAL);
3605 3610
3606 3611 VOPXID_MAP_CR(dvp, cr);
3607 3612
3608 3613 err = (*(dvp)->v_op->vop_rmdir)(dvp, nm, cdir, cr, ct, flags);
3609 3614 VOPSTATS_UPDATE(dvp, rmdir);
3610 3615 return (err);
3611 3616 }
3612 3617
3613 3618 int
3614 3619 fop_readdir(
3615 3620 vnode_t *vp,
3616 3621 uio_t *uiop,
3617 3622 cred_t *cr,
3618 3623 int *eofp,
3619 3624 caller_context_t *ct,
3620 3625 int flags)
3621 3626 {
3622 3627 int err;
3623 3628 ssize_t resid_start = uiop->uio_resid;
3624 3629
3625 3630 /*
3626 3631 * If this file system doesn't support retrieving directory
3627 3632 * entry flags and said access is requested, fail quickly.
3628 3633 */
3629 3634 if (flags & V_RDDIR_ENTFLAGS &&
3630 3635 vfs_has_feature(vp->v_vfsp, VFSFT_DIRENTFLAGS) == 0)
3631 3636 return (EINVAL);
3632 3637
3633 3638 VOPXID_MAP_CR(vp, cr);
3634 3639
3635 3640 err = (*(vp)->v_op->vop_readdir)(vp, uiop, cr, eofp, ct, flags);
3636 3641 VOPSTATS_UPDATE_IO(vp, readdir,
3637 3642 readdir_bytes, (resid_start - uiop->uio_resid));
3638 3643 return (err);
3639 3644 }
3640 3645
3641 3646 int
3642 3647 fop_symlink(
3643 3648 vnode_t *dvp,
3644 3649 char *linkname,
3645 3650 vattr_t *vap,
3646 3651 char *target,
3647 3652 cred_t *cr,
3648 3653 caller_context_t *ct,
3649 3654 int flags)
3650 3655 {
3651 3656 int err;
3652 3657 xvattr_t xvattr;
3653 3658
3654 3659 /*
3655 3660 * If this file system doesn't support case-insensitive access
3656 3661 * and said access is requested, fail quickly.
3657 3662 */
3658 3663 if (flags & FIGNORECASE &&
3659 3664 (vfs_has_feature(dvp->v_vfsp, VFSFT_CASEINSENSITIVE) == 0 &&
3660 3665 vfs_has_feature(dvp->v_vfsp, VFSFT_NOCASESENSITIVE) == 0))
3661 3666 return (EINVAL);
3662 3667
3663 3668 VOPXID_MAP_CR(dvp, cr);
3664 3669
3665 3670 /* check for reparse point */
3666 3671 if ((vfs_has_feature(dvp->v_vfsp, VFSFT_REPARSE)) &&
3667 3672 (strncmp(target, FS_REPARSE_TAG_STR,
3668 3673 strlen(FS_REPARSE_TAG_STR)) == 0)) {
3669 3674 if (!fs_reparse_mark(target, vap, &xvattr))
3670 3675 vap = (vattr_t *)&xvattr;
3671 3676 }
3672 3677
3673 3678 err = (*(dvp)->v_op->vop_symlink)
3674 3679 (dvp, linkname, vap, target, cr, ct, flags);
3675 3680 VOPSTATS_UPDATE(dvp, symlink);
3676 3681 return (err);
3677 3682 }
3678 3683
3679 3684 int
3680 3685 fop_readlink(
3681 3686 vnode_t *vp,
3682 3687 uio_t *uiop,
3683 3688 cred_t *cr,
3684 3689 caller_context_t *ct)
3685 3690 {
3686 3691 int err;
3687 3692
3688 3693 VOPXID_MAP_CR(vp, cr);
3689 3694
3690 3695 err = (*(vp)->v_op->vop_readlink)(vp, uiop, cr, ct);
3691 3696 VOPSTATS_UPDATE(vp, readlink);
3692 3697 return (err);
3693 3698 }
3694 3699
3695 3700 int
3696 3701 fop_fsync(
3697 3702 vnode_t *vp,
3698 3703 int syncflag,
3699 3704 cred_t *cr,
3700 3705 caller_context_t *ct)
3701 3706 {
3702 3707 int err;
3703 3708
3704 3709 VOPXID_MAP_CR(vp, cr);
3705 3710
3706 3711 err = (*(vp)->v_op->vop_fsync)(vp, syncflag, cr, ct);
3707 3712 VOPSTATS_UPDATE(vp, fsync);
3708 3713 return (err);
3709 3714 }
3710 3715
3711 3716 void
3712 3717 fop_inactive(
3713 3718 vnode_t *vp,
3714 3719 cred_t *cr,
3715 3720 caller_context_t *ct)
3716 3721 {
3717 3722 /* Need to update stats before vop call since we may lose the vnode */
3718 3723 VOPSTATS_UPDATE(vp, inactive);
3719 3724
3720 3725 VOPXID_MAP_CR(vp, cr);
3721 3726
3722 3727 (*(vp)->v_op->vop_inactive)(vp, cr, ct);
3723 3728 }
3724 3729
3725 3730 int
3726 3731 fop_fid(
3727 3732 vnode_t *vp,
3728 3733 fid_t *fidp,
3729 3734 caller_context_t *ct)
3730 3735 {
3731 3736 int err;
3732 3737
3733 3738 err = (*(vp)->v_op->vop_fid)(vp, fidp, ct);
3734 3739 VOPSTATS_UPDATE(vp, fid);
3735 3740 return (err);
3736 3741 }
3737 3742
3738 3743 int
3739 3744 fop_rwlock(
3740 3745 vnode_t *vp,
3741 3746 int write_lock,
3742 3747 caller_context_t *ct)
3743 3748 {
3744 3749 int ret;
3745 3750
3746 3751 ret = ((*(vp)->v_op->vop_rwlock)(vp, write_lock, ct));
3747 3752 VOPSTATS_UPDATE(vp, rwlock);
3748 3753 return (ret);
3749 3754 }
3750 3755
3751 3756 void
3752 3757 fop_rwunlock(
3753 3758 vnode_t *vp,
3754 3759 int write_lock,
3755 3760 caller_context_t *ct)
3756 3761 {
3757 3762 (*(vp)->v_op->vop_rwunlock)(vp, write_lock, ct);
3758 3763 VOPSTATS_UPDATE(vp, rwunlock);
3759 3764 }
3760 3765
3761 3766 int
3762 3767 fop_seek(
3763 3768 vnode_t *vp,
3764 3769 offset_t ooff,
3765 3770 offset_t *noffp,
3766 3771 caller_context_t *ct)
3767 3772 {
3768 3773 int err;
3769 3774
3770 3775 err = (*(vp)->v_op->vop_seek)(vp, ooff, noffp, ct);
3771 3776 VOPSTATS_UPDATE(vp, seek);
3772 3777 return (err);
3773 3778 }
3774 3779
3775 3780 int
3776 3781 fop_cmp(
3777 3782 vnode_t *vp1,
3778 3783 vnode_t *vp2,
3779 3784 caller_context_t *ct)
3780 3785 {
3781 3786 int err;
3782 3787
3783 3788 err = (*(vp1)->v_op->vop_cmp)(vp1, vp2, ct);
3784 3789 VOPSTATS_UPDATE(vp1, cmp);
3785 3790 return (err);
3786 3791 }
3787 3792
3788 3793 int
3789 3794 fop_frlock(
3790 3795 vnode_t *vp,
3791 3796 int cmd,
3792 3797 flock64_t *bfp,
3793 3798 int flag,
3794 3799 offset_t offset,
3795 3800 struct flk_callback *flk_cbp,
3796 3801 cred_t *cr,
3797 3802 caller_context_t *ct)
3798 3803 {
3799 3804 int err;
3800 3805
3801 3806 VOPXID_MAP_CR(vp, cr);
3802 3807
3803 3808 err = (*(vp)->v_op->vop_frlock)
3804 3809 (vp, cmd, bfp, flag, offset, flk_cbp, cr, ct);
3805 3810 VOPSTATS_UPDATE(vp, frlock);
3806 3811 return (err);
3807 3812 }
3808 3813
3809 3814 int
3810 3815 fop_space(
3811 3816 vnode_t *vp,
3812 3817 int cmd,
3813 3818 flock64_t *bfp,
3814 3819 int flag,
3815 3820 offset_t offset,
3816 3821 cred_t *cr,
3817 3822 caller_context_t *ct)
3818 3823 {
3819 3824 int err;
3820 3825
3821 3826 VOPXID_MAP_CR(vp, cr);
3822 3827
3823 3828 err = (*(vp)->v_op->vop_space)(vp, cmd, bfp, flag, offset, cr, ct);
3824 3829 VOPSTATS_UPDATE(vp, space);
3825 3830 return (err);
3826 3831 }
3827 3832
3828 3833 int
3829 3834 fop_realvp(
3830 3835 vnode_t *vp,
3831 3836 vnode_t **vpp,
3832 3837 caller_context_t *ct)
3833 3838 {
3834 3839 int err;
3835 3840
3836 3841 err = (*(vp)->v_op->vop_realvp)(vp, vpp, ct);
3837 3842 VOPSTATS_UPDATE(vp, realvp);
3838 3843 return (err);
3839 3844 }
3840 3845
3841 3846 int
3842 3847 fop_getpage(
3843 3848 vnode_t *vp,
3844 3849 offset_t off,
3845 3850 size_t len,
3846 3851 uint_t *protp,
3847 3852 page_t **plarr,
3848 3853 size_t plsz,
3849 3854 struct seg *seg,
3850 3855 caddr_t addr,
3851 3856 enum seg_rw rw,
3852 3857 cred_t *cr,
3853 3858 caller_context_t *ct)
3854 3859 {
3855 3860 int err;
3856 3861
3857 3862 VOPXID_MAP_CR(vp, cr);
3858 3863
3859 3864 err = (*(vp)->v_op->vop_getpage)
3860 3865 (vp, off, len, protp, plarr, plsz, seg, addr, rw, cr, ct);
3861 3866 VOPSTATS_UPDATE(vp, getpage);
3862 3867 return (err);
3863 3868 }
3864 3869
3865 3870 int
3866 3871 fop_putpage(
3867 3872 vnode_t *vp,
3868 3873 offset_t off,
3869 3874 size_t len,
3870 3875 int flags,
3871 3876 cred_t *cr,
3872 3877 caller_context_t *ct)
3873 3878 {
3874 3879 int err;
3875 3880
3876 3881 VOPXID_MAP_CR(vp, cr);
3877 3882
3878 3883 err = (*(vp)->v_op->vop_putpage)(vp, off, len, flags, cr, ct);
3879 3884 VOPSTATS_UPDATE(vp, putpage);
3880 3885 return (err);
3881 3886 }
3882 3887
3883 3888 int
3884 3889 fop_map(
3885 3890 vnode_t *vp,
3886 3891 offset_t off,
3887 3892 struct as *as,
3888 3893 caddr_t *addrp,
3889 3894 size_t len,
3890 3895 uchar_t prot,
3891 3896 uchar_t maxprot,
3892 3897 uint_t flags,
3893 3898 cred_t *cr,
3894 3899 caller_context_t *ct)
3895 3900 {
3896 3901 int err;
3897 3902
3898 3903 VOPXID_MAP_CR(vp, cr);
3899 3904
3900 3905 err = (*(vp)->v_op->vop_map)
3901 3906 (vp, off, as, addrp, len, prot, maxprot, flags, cr, ct);
3902 3907 VOPSTATS_UPDATE(vp, map);
3903 3908 return (err);
3904 3909 }
3905 3910
3906 3911 int
3907 3912 fop_addmap(
3908 3913 vnode_t *vp,
3909 3914 offset_t off,
3910 3915 struct as *as,
3911 3916 caddr_t addr,
3912 3917 size_t len,
3913 3918 uchar_t prot,
3914 3919 uchar_t maxprot,
3915 3920 uint_t flags,
3916 3921 cred_t *cr,
3917 3922 caller_context_t *ct)
3918 3923 {
3919 3924 int error;
3920 3925 u_longlong_t delta;
3921 3926
3922 3927 VOPXID_MAP_CR(vp, cr);
3923 3928
3924 3929 error = (*(vp)->v_op->vop_addmap)
3925 3930 (vp, off, as, addr, len, prot, maxprot, flags, cr, ct);
3926 3931
3927 3932 if ((!error) && (vp->v_type == VREG)) {
3928 3933 delta = (u_longlong_t)btopr(len);
3929 3934 /*
3930 3935 * If file is declared MAP_PRIVATE, it can't be written back
3931 3936 * even if open for write. Handle as read.
3932 3937 */
3933 3938 if (flags & MAP_PRIVATE) {
3934 3939 atomic_add_64((uint64_t *)(&(vp->v_mmap_read)),
3935 3940 (int64_t)delta);
3936 3941 } else {
3937 3942 /*
3938 3943 * atomic_add_64 forces the fetch of a 64 bit value to
3939 3944 * be atomic on 32 bit machines
3940 3945 */
3941 3946 if (maxprot & PROT_WRITE)
3942 3947 atomic_add_64((uint64_t *)(&(vp->v_mmap_write)),
3943 3948 (int64_t)delta);
3944 3949 if (maxprot & PROT_READ)
3945 3950 atomic_add_64((uint64_t *)(&(vp->v_mmap_read)),
3946 3951 (int64_t)delta);
3947 3952 if (maxprot & PROT_EXEC)
3948 3953 atomic_add_64((uint64_t *)(&(vp->v_mmap_read)),
3949 3954 (int64_t)delta);
3950 3955 }
3951 3956 }
3952 3957 VOPSTATS_UPDATE(vp, addmap);
3953 3958 return (error);
3954 3959 }
3955 3960
3956 3961 int
3957 3962 fop_delmap(
3958 3963 vnode_t *vp,
3959 3964 offset_t off,
3960 3965 struct as *as,
3961 3966 caddr_t addr,
3962 3967 size_t len,
3963 3968 uint_t prot,
3964 3969 uint_t maxprot,
3965 3970 uint_t flags,
3966 3971 cred_t *cr,
3967 3972 caller_context_t *ct)
3968 3973 {
3969 3974 int error;
3970 3975 u_longlong_t delta;
3971 3976
3972 3977 VOPXID_MAP_CR(vp, cr);
3973 3978
3974 3979 error = (*(vp)->v_op->vop_delmap)
3975 3980 (vp, off, as, addr, len, prot, maxprot, flags, cr, ct);
3976 3981
3977 3982 /*
3978 3983 * NFS calls into delmap twice, the first time
3979 3984 * it simply establishes a callback mechanism and returns EAGAIN
3980 3985 * while the real work is being done upon the second invocation.
3981 3986 * We have to detect this here and only decrement the counts upon
3982 3987 * the second delmap request.
3983 3988 */
3984 3989 if ((error != EAGAIN) && (vp->v_type == VREG)) {
3985 3990
3986 3991 delta = (u_longlong_t)btopr(len);
3987 3992
3988 3993 if (flags & MAP_PRIVATE) {
3989 3994 atomic_add_64((uint64_t *)(&(vp->v_mmap_read)),
3990 3995 (int64_t)(-delta));
3991 3996 } else {
3992 3997 /*
3993 3998 * atomic_add_64 forces the fetch of a 64 bit value
3994 3999 * to be atomic on 32 bit machines
3995 4000 */
3996 4001 if (maxprot & PROT_WRITE)
3997 4002 atomic_add_64((uint64_t *)(&(vp->v_mmap_write)),
3998 4003 (int64_t)(-delta));
3999 4004 if (maxprot & PROT_READ)
4000 4005 atomic_add_64((uint64_t *)(&(vp->v_mmap_read)),
4001 4006 (int64_t)(-delta));
4002 4007 if (maxprot & PROT_EXEC)
4003 4008 atomic_add_64((uint64_t *)(&(vp->v_mmap_read)),
4004 4009 (int64_t)(-delta));
4005 4010 }
4006 4011 }
4007 4012 VOPSTATS_UPDATE(vp, delmap);
4008 4013 return (error);
4009 4014 }
4010 4015
4011 4016
4012 4017 int
4013 4018 fop_poll(
4014 4019 vnode_t *vp,
4015 4020 short events,
4016 4021 int anyyet,
4017 4022 short *reventsp,
4018 4023 struct pollhead **phpp,
4019 4024 caller_context_t *ct)
4020 4025 {
4021 4026 int err;
4022 4027
4023 4028 err = (*(vp)->v_op->vop_poll)(vp, events, anyyet, reventsp, phpp, ct);
4024 4029 VOPSTATS_UPDATE(vp, poll);
4025 4030 return (err);
4026 4031 }
4027 4032
4028 4033 int
4029 4034 fop_dump(
4030 4035 vnode_t *vp,
4031 4036 caddr_t addr,
4032 4037 offset_t lbdn,
4033 4038 offset_t dblks,
4034 4039 caller_context_t *ct)
4035 4040 {
4036 4041 int err;
4037 4042
4038 4043 /* ensure lbdn and dblks can be passed safely to bdev_dump */
4039 4044 if ((lbdn != (daddr_t)lbdn) || (dblks != (int)dblks))
4040 4045 return (EIO);
4041 4046
4042 4047 err = (*(vp)->v_op->vop_dump)(vp, addr, lbdn, dblks, ct);
4043 4048 VOPSTATS_UPDATE(vp, dump);
4044 4049 return (err);
4045 4050 }
4046 4051
4047 4052 int
4048 4053 fop_pathconf(
4049 4054 vnode_t *vp,
4050 4055 int cmd,
4051 4056 ulong_t *valp,
4052 4057 cred_t *cr,
4053 4058 caller_context_t *ct)
4054 4059 {
4055 4060 int err;
4056 4061
4057 4062 VOPXID_MAP_CR(vp, cr);
4058 4063
4059 4064 err = (*(vp)->v_op->vop_pathconf)(vp, cmd, valp, cr, ct);
4060 4065 VOPSTATS_UPDATE(vp, pathconf);
4061 4066 return (err);
4062 4067 }
4063 4068
4064 4069 int
4065 4070 fop_pageio(
4066 4071 vnode_t *vp,
4067 4072 struct page *pp,
4068 4073 u_offset_t io_off,
4069 4074 size_t io_len,
4070 4075 int flags,
4071 4076 cred_t *cr,
4072 4077 caller_context_t *ct)
4073 4078 {
4074 4079 int err;
4075 4080
4076 4081 VOPXID_MAP_CR(vp, cr);
4077 4082
4078 4083 err = (*(vp)->v_op->vop_pageio)(vp, pp, io_off, io_len, flags, cr, ct);
4079 4084 VOPSTATS_UPDATE(vp, pageio);
4080 4085 return (err);
4081 4086 }
4082 4087
4083 4088 int
4084 4089 fop_dumpctl(
4085 4090 vnode_t *vp,
4086 4091 int action,
4087 4092 offset_t *blkp,
4088 4093 caller_context_t *ct)
4089 4094 {
4090 4095 int err;
4091 4096 err = (*(vp)->v_op->vop_dumpctl)(vp, action, blkp, ct);
4092 4097 VOPSTATS_UPDATE(vp, dumpctl);
4093 4098 return (err);
4094 4099 }
4095 4100
4096 4101 void
4097 4102 fop_dispose(
4098 4103 vnode_t *vp,
4099 4104 page_t *pp,
4100 4105 int flag,
4101 4106 int dn,
4102 4107 cred_t *cr,
4103 4108 caller_context_t *ct)
4104 4109 {
4105 4110 /* Must do stats first since it's possible to lose the vnode */
4106 4111 VOPSTATS_UPDATE(vp, dispose);
4107 4112
4108 4113 VOPXID_MAP_CR(vp, cr);
4109 4114
4110 4115 (*(vp)->v_op->vop_dispose)(vp, pp, flag, dn, cr, ct);
4111 4116 }
4112 4117
4113 4118 int
4114 4119 fop_setsecattr(
4115 4120 vnode_t *vp,
4116 4121 vsecattr_t *vsap,
4117 4122 int flag,
4118 4123 cred_t *cr,
4119 4124 caller_context_t *ct)
4120 4125 {
4121 4126 int err;
4122 4127
4123 4128 VOPXID_MAP_CR(vp, cr);
4124 4129
4125 4130 /*
4126 4131 * We're only allowed to skip the ACL check iff we used a 32 bit
4127 4132 * ACE mask with VOP_ACCESS() to determine permissions.
4128 4133 */
4129 4134 if ((flag & ATTR_NOACLCHECK) &&
4130 4135 vfs_has_feature(vp->v_vfsp, VFSFT_ACEMASKONACCESS) == 0) {
4131 4136 return (EINVAL);
4132 4137 }
4133 4138 err = (*(vp)->v_op->vop_setsecattr) (vp, vsap, flag, cr, ct);
4134 4139 VOPSTATS_UPDATE(vp, setsecattr);
4135 4140 return (err);
4136 4141 }
4137 4142
4138 4143 int
4139 4144 fop_getsecattr(
4140 4145 vnode_t *vp,
4141 4146 vsecattr_t *vsap,
4142 4147 int flag,
4143 4148 cred_t *cr,
4144 4149 caller_context_t *ct)
4145 4150 {
4146 4151 int err;
4147 4152
4148 4153 /*
4149 4154 * We're only allowed to skip the ACL check iff we used a 32 bit
4150 4155 * ACE mask with VOP_ACCESS() to determine permissions.
4151 4156 */
4152 4157 if ((flag & ATTR_NOACLCHECK) &&
4153 4158 vfs_has_feature(vp->v_vfsp, VFSFT_ACEMASKONACCESS) == 0) {
4154 4159 return (EINVAL);
4155 4160 }
4156 4161
4157 4162 VOPXID_MAP_CR(vp, cr);
4158 4163
4159 4164 err = (*(vp)->v_op->vop_getsecattr) (vp, vsap, flag, cr, ct);
4160 4165 VOPSTATS_UPDATE(vp, getsecattr);
4161 4166 return (err);
4162 4167 }
4163 4168
4164 4169 int
4165 4170 fop_shrlock(
4166 4171 vnode_t *vp,
4167 4172 int cmd,
4168 4173 struct shrlock *shr,
4169 4174 int flag,
4170 4175 cred_t *cr,
4171 4176 caller_context_t *ct)
4172 4177 {
4173 4178 int err;
4174 4179
4175 4180 VOPXID_MAP_CR(vp, cr);
4176 4181
4177 4182 err = (*(vp)->v_op->vop_shrlock)(vp, cmd, shr, flag, cr, ct);
4178 4183 VOPSTATS_UPDATE(vp, shrlock);
4179 4184 return (err);
4180 4185 }
4181 4186
4182 4187 int
4183 4188 fop_vnevent(vnode_t *vp, vnevent_t vnevent, vnode_t *dvp, char *fnm,
4184 4189 caller_context_t *ct)
4185 4190 {
4186 4191 int err;
4187 4192
4188 4193 err = (*(vp)->v_op->vop_vnevent)(vp, vnevent, dvp, fnm, ct);
4189 4194 VOPSTATS_UPDATE(vp, vnevent);
4190 4195 return (err);
4191 4196 }
4192 4197
4193 4198 int
4194 4199 fop_reqzcbuf(vnode_t *vp, enum uio_rw ioflag, xuio_t *uiop, cred_t *cr,
4195 4200 caller_context_t *ct)
4196 4201 {
4197 4202 int err;
4198 4203
4199 4204 if (vfs_has_feature(vp->v_vfsp, VFSFT_ZEROCOPY_SUPPORTED) == 0)
4200 4205 return (ENOTSUP);
4201 4206 err = (*(vp)->v_op->vop_reqzcbuf)(vp, ioflag, uiop, cr, ct);
4202 4207 VOPSTATS_UPDATE(vp, reqzcbuf);
4203 4208 return (err);
4204 4209 }
4205 4210
4206 4211 int
4207 4212 fop_retzcbuf(vnode_t *vp, xuio_t *uiop, cred_t *cr, caller_context_t *ct)
4208 4213 {
4209 4214 int err;
4210 4215
4211 4216 if (vfs_has_feature(vp->v_vfsp, VFSFT_ZEROCOPY_SUPPORTED) == 0)
4212 4217 return (ENOTSUP);
4213 4218 err = (*(vp)->v_op->vop_retzcbuf)(vp, uiop, cr, ct);
4214 4219 VOPSTATS_UPDATE(vp, retzcbuf);
4215 4220 return (err);
4216 4221 }
4217 4222
4218 4223 /*
4219 4224 * Default destructor
4220 4225 * Needed because NULL destructor means that the key is unused
4221 4226 */
4222 4227 /* ARGSUSED */
4223 4228 void
4224 4229 vsd_defaultdestructor(void *value)
4225 4230 {}
4226 4231
4227 4232 /*
4228 4233 * Create a key (index into per vnode array)
4229 4234 * Locks out vsd_create, vsd_destroy, and vsd_free
4230 4235 * May allocate memory with lock held
4231 4236 */
4232 4237 void
4233 4238 vsd_create(uint_t *keyp, void (*destructor)(void *))
4234 4239 {
4235 4240 int i;
4236 4241 uint_t nkeys;
4237 4242
4238 4243 /*
4239 4244 * if key is allocated, do nothing
4240 4245 */
4241 4246 mutex_enter(&vsd_lock);
4242 4247 if (*keyp) {
4243 4248 mutex_exit(&vsd_lock);
4244 4249 return;
4245 4250 }
4246 4251 /*
4247 4252 * find an unused key
4248 4253 */
4249 4254 if (destructor == NULL)
4250 4255 destructor = vsd_defaultdestructor;
4251 4256
4252 4257 for (i = 0; i < vsd_nkeys; ++i)
4253 4258 if (vsd_destructor[i] == NULL)
4254 4259 break;
4255 4260
4256 4261 /*
4257 4262 * if no unused keys, increase the size of the destructor array
4258 4263 */
4259 4264 if (i == vsd_nkeys) {
4260 4265 if ((nkeys = (vsd_nkeys << 1)) == 0)
4261 4266 nkeys = 1;
4262 4267 vsd_destructor =
4263 4268 (void (**)(void *))vsd_realloc((void *)vsd_destructor,
4264 4269 (size_t)(vsd_nkeys * sizeof (void (*)(void *))),
4265 4270 (size_t)(nkeys * sizeof (void (*)(void *))));
4266 4271 vsd_nkeys = nkeys;
4267 4272 }
4268 4273
4269 4274 /*
4270 4275 * allocate the next available unused key
4271 4276 */
4272 4277 vsd_destructor[i] = destructor;
4273 4278 *keyp = i + 1;
4274 4279
4275 4280 /* create vsd_list, if it doesn't exist */
4276 4281 if (vsd_list == NULL) {
4277 4282 vsd_list = kmem_alloc(sizeof (list_t), KM_SLEEP);
4278 4283 list_create(vsd_list, sizeof (struct vsd_node),
4279 4284 offsetof(struct vsd_node, vs_nodes));
4280 4285 }
4281 4286
4282 4287 mutex_exit(&vsd_lock);
4283 4288 }
4284 4289
4285 4290 /*
4286 4291 * Destroy a key
4287 4292 *
4288 4293 * Assumes that the caller is preventing vsd_set and vsd_get
4289 4294 * Locks out vsd_create, vsd_destroy, and vsd_free
4290 4295 * May free memory with lock held
4291 4296 */
4292 4297 void
4293 4298 vsd_destroy(uint_t *keyp)
4294 4299 {
4295 4300 uint_t key;
4296 4301 struct vsd_node *vsd;
4297 4302
4298 4303 /*
4299 4304 * protect the key namespace and our destructor lists
4300 4305 */
4301 4306 mutex_enter(&vsd_lock);
4302 4307 key = *keyp;
4303 4308 *keyp = 0;
4304 4309
4305 4310 ASSERT(key <= vsd_nkeys);
4306 4311
4307 4312 /*
4308 4313 * if the key is valid
4309 4314 */
4310 4315 if (key != 0) {
4311 4316 uint_t k = key - 1;
4312 4317 /*
4313 4318 * for every vnode with VSD, call key's destructor
4314 4319 */
4315 4320 for (vsd = list_head(vsd_list); vsd != NULL;
4316 4321 vsd = list_next(vsd_list, vsd)) {
4317 4322 /*
4318 4323 * no VSD for key in this vnode
4319 4324 */
4320 4325 if (key > vsd->vs_nkeys)
4321 4326 continue;
4322 4327 /*
4323 4328 * call destructor for key
4324 4329 */
4325 4330 if (vsd->vs_value[k] && vsd_destructor[k])
4326 4331 (*vsd_destructor[k])(vsd->vs_value[k]);
4327 4332 /*
4328 4333 * reset value for key
4329 4334 */
4330 4335 vsd->vs_value[k] = NULL;
4331 4336 }
4332 4337 /*
4333 4338 * actually free the key (NULL destructor == unused)
4334 4339 */
4335 4340 vsd_destructor[k] = NULL;
4336 4341 }
4337 4342
4338 4343 mutex_exit(&vsd_lock);
4339 4344 }
4340 4345
4341 4346 /*
4342 4347 * Quickly return the per vnode value that was stored with the specified key
4343 4348 * Assumes the caller is protecting key from vsd_create and vsd_destroy
4344 4349 * Assumes the caller is holding v_vsd_lock to protect the vsd.
4345 4350 */
4346 4351 void *
4347 4352 vsd_get(vnode_t *vp, uint_t key)
4348 4353 {
4349 4354 struct vsd_node *vsd;
4350 4355
4351 4356 ASSERT(vp != NULL);
4352 4357 ASSERT(mutex_owned(&vp->v_vsd_lock));
4353 4358
4354 4359 vsd = vp->v_vsd;
4355 4360
4356 4361 if (key && vsd != NULL && key <= vsd->vs_nkeys)
4357 4362 return (vsd->vs_value[key - 1]);
4358 4363 return (NULL);
4359 4364 }
4360 4365
4361 4366 /*
4362 4367 * Set a per vnode value indexed with the specified key
4363 4368 * Assumes the caller is holding v_vsd_lock to protect the vsd.
4364 4369 */
4365 4370 int
4366 4371 vsd_set(vnode_t *vp, uint_t key, void *value)
4367 4372 {
4368 4373 struct vsd_node *vsd;
4369 4374
4370 4375 ASSERT(vp != NULL);
4371 4376 ASSERT(mutex_owned(&vp->v_vsd_lock));
4372 4377
4373 4378 if (key == 0)
4374 4379 return (EINVAL);
4375 4380
4376 4381 vsd = vp->v_vsd;
4377 4382 if (vsd == NULL)
4378 4383 vsd = vp->v_vsd = kmem_zalloc(sizeof (*vsd), KM_SLEEP);
4379 4384
4380 4385 /*
4381 4386 * If the vsd was just allocated, vs_nkeys will be 0, so the following
4382 4387 * code won't happen and we will continue down and allocate space for
4383 4388 * the vs_value array.
4384 4389 * If the caller is replacing one value with another, then it is up
4385 4390 * to the caller to free/rele/destroy the previous value (if needed).
4386 4391 */
4387 4392 if (key <= vsd->vs_nkeys) {
4388 4393 vsd->vs_value[key - 1] = value;
4389 4394 return (0);
4390 4395 }
4391 4396
4392 4397 ASSERT(key <= vsd_nkeys);
4393 4398
4394 4399 if (vsd->vs_nkeys == 0) {
4395 4400 mutex_enter(&vsd_lock); /* lock out vsd_destroy() */
4396 4401 /*
4397 4402 * Link onto list of all VSD nodes.
4398 4403 */
4399 4404 list_insert_head(vsd_list, vsd);
4400 4405 mutex_exit(&vsd_lock);
4401 4406 }
4402 4407
4403 4408 /*
4404 4409 * Allocate vnode local storage and set the value for key
4405 4410 */
4406 4411 vsd->vs_value = vsd_realloc(vsd->vs_value,
4407 4412 vsd->vs_nkeys * sizeof (void *),
4408 4413 key * sizeof (void *));
4409 4414 vsd->vs_nkeys = key;
4410 4415 vsd->vs_value[key - 1] = value;
4411 4416
4412 4417 return (0);
4413 4418 }
4414 4419
4415 4420 /*
4416 4421 * Called from vn_free() to run the destructor function for each vsd
4417 4422 * Locks out vsd_create and vsd_destroy
4418 4423 * Assumes that the destructor *DOES NOT* use vsd
4419 4424 */
4420 4425 void
4421 4426 vsd_free(vnode_t *vp)
4422 4427 {
4423 4428 int i;
4424 4429 struct vsd_node *vsd = vp->v_vsd;
4425 4430
4426 4431 if (vsd == NULL)
4427 4432 return;
4428 4433
4429 4434 if (vsd->vs_nkeys == 0) {
4430 4435 kmem_free(vsd, sizeof (*vsd));
4431 4436 vp->v_vsd = NULL;
4432 4437 return;
4433 4438 }
4434 4439
4435 4440 /*
4436 4441 * lock out vsd_create and vsd_destroy, call
4437 4442 * the destructor, and mark the value as destroyed.
4438 4443 */
4439 4444 mutex_enter(&vsd_lock);
4440 4445
4441 4446 for (i = 0; i < vsd->vs_nkeys; i++) {
4442 4447 if (vsd->vs_value[i] && vsd_destructor[i])
4443 4448 (*vsd_destructor[i])(vsd->vs_value[i]);
4444 4449 vsd->vs_value[i] = NULL;
4445 4450 }
4446 4451
4447 4452 /*
4448 4453 * remove from linked list of VSD nodes
4449 4454 */
4450 4455 list_remove(vsd_list, vsd);
4451 4456
4452 4457 mutex_exit(&vsd_lock);
4453 4458
4454 4459 /*
4455 4460 * free up the VSD
4456 4461 */
4457 4462 kmem_free(vsd->vs_value, vsd->vs_nkeys * sizeof (void *));
4458 4463 kmem_free(vsd, sizeof (struct vsd_node));
4459 4464 vp->v_vsd = NULL;
4460 4465 }
4461 4466
4462 4467 /*
4463 4468 * realloc
4464 4469 */
4465 4470 static void *
4466 4471 vsd_realloc(void *old, size_t osize, size_t nsize)
4467 4472 {
4468 4473 void *new;
4469 4474
4470 4475 new = kmem_zalloc(nsize, KM_SLEEP);
4471 4476 if (old) {
4472 4477 bcopy(old, new, osize);
4473 4478 kmem_free(old, osize);
4474 4479 }
4475 4480 return (new);
4476 4481 }
4477 4482
4478 4483 /*
4479 4484 * Setup the extensible system attribute for creating a reparse point.
4480 4485 * The symlink data 'target' is validated for proper format of a reparse
4481 4486 * string and a check also made to make sure the symlink data does not
4482 4487 * point to an existing file.
4483 4488 *
4484 4489 * return 0 if ok else -1.
4485 4490 */
4486 4491 static int
4487 4492 fs_reparse_mark(char *target, vattr_t *vap, xvattr_t *xvattr)
4488 4493 {
4489 4494 xoptattr_t *xoap;
4490 4495
4491 4496 if ((!target) || (!vap) || (!xvattr))
4492 4497 return (-1);
4493 4498
4494 4499 /* validate reparse string */
4495 4500 if (reparse_validate((const char *)target))
4496 4501 return (-1);
4497 4502
4498 4503 xva_init(xvattr);
4499 4504 xvattr->xva_vattr = *vap;
4500 4505 xvattr->xva_vattr.va_mask |= AT_XVATTR;
4501 4506 xoap = xva_getxoptattr(xvattr);
4502 4507 ASSERT(xoap);
4503 4508 XVA_SET_REQ(xvattr, XAT_REPARSE);
4504 4509 xoap->xoa_reparse = 1;
4505 4510
4506 4511 return (0);
4507 4512 }
4508 4513
4509 4514 /*
4510 4515 * Function to check whether a symlink is a reparse point.
4511 4516 * Return B_TRUE if it is a reparse point, else return B_FALSE
4512 4517 */
4513 4518 boolean_t
4514 4519 vn_is_reparse(vnode_t *vp, cred_t *cr, caller_context_t *ct)
4515 4520 {
4516 4521 xvattr_t xvattr;
4517 4522 xoptattr_t *xoap;
4518 4523
4519 4524 if ((vp->v_type != VLNK) ||
4520 4525 !(vfs_has_feature(vp->v_vfsp, VFSFT_XVATTR)))
4521 4526 return (B_FALSE);
4522 4527
4523 4528 xva_init(&xvattr);
4524 4529 xoap = xva_getxoptattr(&xvattr);
4525 4530 ASSERT(xoap);
4526 4531 XVA_SET_REQ(&xvattr, XAT_REPARSE);
4527 4532
4528 4533 if (VOP_GETATTR(vp, &xvattr.xva_vattr, 0, cr, ct))
4529 4534 return (B_FALSE);
4530 4535
4531 4536 if ((!(xvattr.xva_vattr.va_mask & AT_XVATTR)) ||
4532 4537 (!(XVA_ISSET_RTN(&xvattr, XAT_REPARSE))))
4533 4538 return (B_FALSE);
4534 4539
4535 4540 return (xoap->xoa_reparse ? B_TRUE : B_FALSE);
4536 4541 }
↓ open down ↓ |
1279 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX