Print this page
7127 remove -Wno-missing-braces from Makefile.uts
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/uts/common/fs/nfs/nfs4_stub_vnops.c
+++ new/usr/src/uts/common/fs/nfs/nfs4_stub_vnops.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21
22 22 /*
23 23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
24 24 * Use is subject to license terms.
25 25 */
26 26
27 27 /*
28 28 * Support for ephemeral mounts, e.g. mirror-mounts. These mounts are
29 29 * triggered from a "stub" rnode via a special set of vnodeops.
30 30 */
31 31
32 32 #include <sys/param.h>
33 33 #include <sys/types.h>
34 34 #include <sys/systm.h>
35 35 #include <sys/cred.h>
36 36 #include <sys/time.h>
37 37 #include <sys/vnode.h>
38 38 #include <sys/vfs.h>
39 39 #include <sys/vfs_opreg.h>
40 40 #include <sys/file.h>
41 41 #include <sys/filio.h>
42 42 #include <sys/uio.h>
43 43 #include <sys/buf.h>
44 44 #include <sys/mman.h>
45 45 #include <sys/pathname.h>
46 46 #include <sys/dirent.h>
47 47 #include <sys/debug.h>
48 48 #include <sys/vmsystm.h>
49 49 #include <sys/fcntl.h>
50 50 #include <sys/flock.h>
51 51 #include <sys/swap.h>
52 52 #include <sys/errno.h>
53 53 #include <sys/strsubr.h>
54 54 #include <sys/sysmacros.h>
55 55 #include <sys/kmem.h>
56 56 #include <sys/mount.h>
57 57 #include <sys/cmn_err.h>
58 58 #include <sys/pathconf.h>
59 59 #include <sys/utsname.h>
60 60 #include <sys/dnlc.h>
61 61 #include <sys/acl.h>
62 62 #include <sys/systeminfo.h>
63 63 #include <sys/policy.h>
64 64 #include <sys/sdt.h>
65 65 #include <sys/list.h>
66 66 #include <sys/stat.h>
67 67 #include <sys/mntent.h>
68 68 #include <sys/priv.h>
69 69
70 70 #include <rpc/types.h>
71 71 #include <rpc/auth.h>
72 72 #include <rpc/clnt.h>
73 73
74 74 #include <nfs/nfs.h>
75 75 #include <nfs/nfs_clnt.h>
76 76 #include <nfs/nfs_acl.h>
77 77 #include <nfs/lm.h>
78 78 #include <nfs/nfs4.h>
79 79 #include <nfs/nfs4_kprot.h>
80 80 #include <nfs/rnode4.h>
81 81 #include <nfs/nfs4_clnt.h>
82 82 #include <nfs/nfsid_map.h>
83 83 #include <nfs/nfs4_idmap_impl.h>
84 84
85 85 #include <vm/hat.h>
86 86 #include <vm/as.h>
87 87 #include <vm/page.h>
88 88 #include <vm/pvn.h>
89 89 #include <vm/seg.h>
90 90 #include <vm/seg_map.h>
91 91 #include <vm/seg_kpm.h>
92 92 #include <vm/seg_vn.h>
93 93
94 94 #include <fs/fs_subr.h>
95 95
96 96 #include <sys/ddi.h>
97 97 #include <sys/int_fmtio.h>
98 98
99 99 #include <sys/sunddi.h>
100 100
101 101 #include <sys/priv_names.h>
102 102
103 103 extern zone_key_t nfs4clnt_zone_key;
104 104 extern zone_key_t nfsidmap_zone_key;
105 105
106 106 /*
107 107 * The automatic unmounter thread stuff!
108 108 */
109 109 static int nfs4_trigger_thread_timer = 20; /* in seconds */
110 110
111 111 /*
112 112 * Just a default....
113 113 */
114 114 static uint_t nfs4_trigger_mount_to = 240;
115 115
116 116 typedef struct nfs4_trigger_globals {
117 117 kmutex_t ntg_forest_lock;
118 118 uint_t ntg_mount_to;
119 119 int ntg_thread_started;
120 120 nfs4_ephemeral_tree_t *ntg_forest;
121 121 } nfs4_trigger_globals_t;
122 122
123 123 kmutex_t nfs4_ephemeral_thread_lock;
124 124
125 125 zone_key_t nfs4_ephemeral_key = ZONE_KEY_UNINITIALIZED;
126 126
127 127 static void nfs4_ephemeral_start_harvester(nfs4_trigger_globals_t *);
128 128
129 129 /*
130 130 * Used for ephemeral mounts; contains data either duplicated from
131 131 * servinfo4_t, or hand-crafted, depending on type of ephemeral mount.
132 132 *
133 133 * It's intended that this structure is used solely for ephemeral
134 134 * mount-type specific data, for passing this data to
135 135 * nfs4_trigger_nargs_create().
136 136 */
137 137 typedef struct ephemeral_servinfo {
138 138 char *esi_hostname;
139 139 char *esi_netname;
140 140 char *esi_path;
141 141 int esi_path_len;
142 142 int esi_mount_flags;
143 143 struct netbuf *esi_addr;
144 144 struct netbuf *esi_syncaddr;
145 145 struct knetconfig *esi_knconf;
146 146 } ephemeral_servinfo_t;
147 147
148 148 /*
149 149 * Collect together the mount-type specific and generic data args.
150 150 */
151 151 typedef struct domount_args {
152 152 ephemeral_servinfo_t *dma_esi;
153 153 char *dma_hostlist; /* comma-sep. for RO failover */
154 154 struct nfs_args *dma_nargs;
155 155 } domount_args_t;
156 156
157 157
158 158 /*
159 159 * The vnode ops functions for a trigger stub vnode
160 160 */
161 161 static int nfs4_trigger_open(vnode_t **, int, cred_t *, caller_context_t *);
162 162 static int nfs4_trigger_getattr(vnode_t *, struct vattr *, int, cred_t *,
163 163 caller_context_t *);
164 164 static int nfs4_trigger_setattr(vnode_t *, struct vattr *, int, cred_t *,
165 165 caller_context_t *);
166 166 static int nfs4_trigger_access(vnode_t *, int, int, cred_t *,
167 167 caller_context_t *);
168 168 static int nfs4_trigger_readlink(vnode_t *, struct uio *, cred_t *,
169 169 caller_context_t *);
170 170 static int nfs4_trigger_lookup(vnode_t *, char *, vnode_t **,
171 171 struct pathname *, int, vnode_t *, cred_t *, caller_context_t *,
172 172 int *, pathname_t *);
173 173 static int nfs4_trigger_create(vnode_t *, char *, struct vattr *,
174 174 enum vcexcl, int, vnode_t **, cred_t *, int, caller_context_t *,
175 175 vsecattr_t *);
176 176 static int nfs4_trigger_remove(vnode_t *, char *, cred_t *, caller_context_t *,
177 177 int);
178 178 static int nfs4_trigger_link(vnode_t *, vnode_t *, char *, cred_t *,
179 179 caller_context_t *, int);
180 180 static int nfs4_trigger_rename(vnode_t *, char *, vnode_t *, char *,
181 181 cred_t *, caller_context_t *, int);
182 182 static int nfs4_trigger_mkdir(vnode_t *, char *, struct vattr *,
183 183 vnode_t **, cred_t *, caller_context_t *, int, vsecattr_t *vsecp);
184 184 static int nfs4_trigger_rmdir(vnode_t *, char *, vnode_t *, cred_t *,
185 185 caller_context_t *, int);
186 186 static int nfs4_trigger_symlink(vnode_t *, char *, struct vattr *, char *,
187 187 cred_t *, caller_context_t *, int);
188 188 static int nfs4_trigger_cmp(vnode_t *, vnode_t *, caller_context_t *);
189 189
190 190 /*
191 191 * Regular NFSv4 vnodeops that we need to reference directly
192 192 */
193 193 extern int nfs4_getattr(vnode_t *, struct vattr *, int, cred_t *,
194 194 caller_context_t *);
195 195 extern void nfs4_inactive(vnode_t *, cred_t *, caller_context_t *);
196 196 extern int nfs4_rwlock(vnode_t *, int, caller_context_t *);
197 197 extern void nfs4_rwunlock(vnode_t *, int, caller_context_t *);
198 198 extern int nfs4_lookup(vnode_t *, char *, vnode_t **,
199 199 struct pathname *, int, vnode_t *, cred_t *,
200 200 caller_context_t *, int *, pathname_t *);
201 201 extern int nfs4_pathconf(vnode_t *, int, ulong_t *, cred_t *,
202 202 caller_context_t *);
203 203 extern int nfs4_getsecattr(vnode_t *, vsecattr_t *, int, cred_t *,
204 204 caller_context_t *);
205 205 extern int nfs4_fid(vnode_t *, fid_t *, caller_context_t *);
206 206 extern int nfs4_realvp(vnode_t *, vnode_t **, caller_context_t *);
207 207
208 208 static int nfs4_trigger_mount(vnode_t *, cred_t *, vnode_t **);
209 209 static int nfs4_trigger_domount(vnode_t *, domount_args_t *, vfs_t **,
210 210 cred_t *, vnode_t **);
211 211 static int nfs4_trigger_domount_args_create(vnode_t *, cred_t *,
212 212 domount_args_t **dmap);
213 213 static void nfs4_trigger_domount_args_destroy(domount_args_t *dma,
214 214 vnode_t *vp);
215 215 static ephemeral_servinfo_t *nfs4_trigger_esi_create(vnode_t *, servinfo4_t *,
216 216 cred_t *);
217 217 static void nfs4_trigger_esi_destroy(ephemeral_servinfo_t *, vnode_t *);
218 218 static ephemeral_servinfo_t *nfs4_trigger_esi_create_mirrormount(vnode_t *,
219 219 servinfo4_t *);
220 220 static ephemeral_servinfo_t *nfs4_trigger_esi_create_referral(vnode_t *,
221 221 cred_t *);
222 222 static struct nfs_args *nfs4_trigger_nargs_create(mntinfo4_t *, servinfo4_t *,
223 223 ephemeral_servinfo_t *);
224 224 static void nfs4_trigger_nargs_destroy(struct nfs_args *);
225 225 static char *nfs4_trigger_create_mntopts(vfs_t *);
226 226 static void nfs4_trigger_destroy_mntopts(char *);
227 227 static int nfs4_trigger_add_mntopt(char *, char *, vfs_t *);
228 228 static enum clnt_stat nfs4_trigger_ping_server(servinfo4_t *, int);
229 229 static enum clnt_stat nfs4_ping_server_common(struct knetconfig *,
230 230 struct netbuf *, int);
231 231
232 232 extern int umount2_engine(vfs_t *, int, cred_t *, int);
233 233
234 234 vnodeops_t *nfs4_trigger_vnodeops;
235 235
236 236 /*
237 237 * These are the vnodeops that we must define for stub vnodes.
238 238 *
239 239 *
240 240 * Many of the VOPs defined for NFSv4 do not need to be defined here,
241 241 * for various reasons. This will result in the VFS default function being
242 242 * used:
243 243 *
244 244 * - These VOPs require a previous VOP_OPEN to have occurred. That will have
245 245 * lost the reference to the stub vnode, meaning these should not be called:
246 246 * close, read, write, ioctl, readdir, seek.
247 247 *
248 248 * - These VOPs are meaningless for vnodes without data pages. Since the
249 249 * stub vnode is of type VDIR, these should not be called:
250 250 * space, getpage, putpage, map, addmap, delmap, pageio, fsync.
251 251 *
252 252 * - These VOPs are otherwise not applicable, and should not be called:
253 253 * dump, setsecattr.
254 254 *
255 255 *
256 256 * These VOPs we do not want to define, but nor do we want the VFS default
257 257 * action. Instead, we specify the VFS error function, with fs_error(), but
258 258 * note that fs_error() is not actually called. Instead it results in the
259 259 * use of the error function defined for the particular VOP, in vn_ops_table[]:
260 260 *
261 261 * - frlock, dispose, shrlock.
262 262 *
263 263 *
264 264 * These VOPs we define to use the corresponding regular NFSv4 vnodeop.
265 265 * NOTE: if any of these ops involve an OTW call with the stub FH, then
266 266 * that call must be wrapped with save_mnt_secinfo()/check_mnt_secinfo()
↓ open down ↓ |
266 lines elided |
↑ open up ↑ |
267 267 * to protect the security data in the servinfo4_t for the "parent"
268 268 * filesystem that contains the stub.
269 269 *
270 270 * - These VOPs should not trigger a mount, so that "ls -l" does not:
271 271 * pathconf, getsecattr.
272 272 *
273 273 * - These VOPs would not make sense to trigger:
274 274 * inactive, rwlock, rwunlock, fid, realvp.
275 275 */
276 276 const fs_operation_def_t nfs4_trigger_vnodeops_template[] = {
277 - VOPNAME_OPEN, { .vop_open = nfs4_trigger_open },
278 - VOPNAME_GETATTR, { .vop_getattr = nfs4_trigger_getattr },
279 - VOPNAME_SETATTR, { .vop_setattr = nfs4_trigger_setattr },
280 - VOPNAME_ACCESS, { .vop_access = nfs4_trigger_access },
281 - VOPNAME_LOOKUP, { .vop_lookup = nfs4_trigger_lookup },
282 - VOPNAME_CREATE, { .vop_create = nfs4_trigger_create },
283 - VOPNAME_REMOVE, { .vop_remove = nfs4_trigger_remove },
284 - VOPNAME_LINK, { .vop_link = nfs4_trigger_link },
285 - VOPNAME_RENAME, { .vop_rename = nfs4_trigger_rename },
286 - VOPNAME_MKDIR, { .vop_mkdir = nfs4_trigger_mkdir },
287 - VOPNAME_RMDIR, { .vop_rmdir = nfs4_trigger_rmdir },
288 - VOPNAME_SYMLINK, { .vop_symlink = nfs4_trigger_symlink },
289 - VOPNAME_READLINK, { .vop_readlink = nfs4_trigger_readlink },
290 - VOPNAME_INACTIVE, { .vop_inactive = nfs4_inactive },
291 - VOPNAME_FID, { .vop_fid = nfs4_fid },
292 - VOPNAME_RWLOCK, { .vop_rwlock = nfs4_rwlock },
293 - VOPNAME_RWUNLOCK, { .vop_rwunlock = nfs4_rwunlock },
294 - VOPNAME_REALVP, { .vop_realvp = nfs4_realvp },
295 - VOPNAME_GETSECATTR, { .vop_getsecattr = nfs4_getsecattr },
296 - VOPNAME_PATHCONF, { .vop_pathconf = nfs4_pathconf },
297 - VOPNAME_FRLOCK, { .error = fs_error },
298 - VOPNAME_DISPOSE, { .error = fs_error },
299 - VOPNAME_SHRLOCK, { .error = fs_error },
300 - VOPNAME_VNEVENT, { .vop_vnevent = fs_vnevent_support },
301 - NULL, NULL
277 + { VOPNAME_OPEN, { .vop_open = nfs4_trigger_open } },
278 + { VOPNAME_GETATTR, { .vop_getattr = nfs4_trigger_getattr } },
279 + { VOPNAME_SETATTR, { .vop_setattr = nfs4_trigger_setattr } },
280 + { VOPNAME_ACCESS, { .vop_access = nfs4_trigger_access } },
281 + { VOPNAME_LOOKUP, { .vop_lookup = nfs4_trigger_lookup } },
282 + { VOPNAME_CREATE, { .vop_create = nfs4_trigger_create } },
283 + { VOPNAME_REMOVE, { .vop_remove = nfs4_trigger_remove } },
284 + { VOPNAME_LINK, { .vop_link = nfs4_trigger_link } },
285 + { VOPNAME_RENAME, { .vop_rename = nfs4_trigger_rename } },
286 + { VOPNAME_MKDIR, { .vop_mkdir = nfs4_trigger_mkdir } },
287 + { VOPNAME_RMDIR, { .vop_rmdir = nfs4_trigger_rmdir } },
288 + { VOPNAME_SYMLINK, { .vop_symlink = nfs4_trigger_symlink } },
289 + { VOPNAME_READLINK, { .vop_readlink = nfs4_trigger_readlink } },
290 + { VOPNAME_INACTIVE, { .vop_inactive = nfs4_inactive } },
291 + { VOPNAME_FID, { .vop_fid = nfs4_fid } },
292 + { VOPNAME_RWLOCK, { .vop_rwlock = nfs4_rwlock } },
293 + { VOPNAME_RWUNLOCK, { .vop_rwunlock = nfs4_rwunlock } },
294 + { VOPNAME_REALVP, { .vop_realvp = nfs4_realvp } },
295 + { VOPNAME_GETSECATTR, { .vop_getsecattr = nfs4_getsecattr } },
296 + { VOPNAME_PATHCONF, { .vop_pathconf = nfs4_pathconf } },
297 + { VOPNAME_FRLOCK, { .error = fs_error } },
298 + { VOPNAME_DISPOSE, { .error = fs_error } },
299 + { VOPNAME_SHRLOCK, { .error = fs_error } },
300 + { VOPNAME_VNEVENT, { .vop_vnevent = fs_vnevent_support } },
301 + { NULL, {NULL } }
302 302 };
303 303
304 304 static void
305 305 nfs4_ephemeral_tree_incr(nfs4_ephemeral_tree_t *net)
306 306 {
307 307 ASSERT(mutex_owned(&net->net_cnt_lock));
308 308 net->net_refcnt++;
309 309 ASSERT(net->net_refcnt != 0);
310 310 }
311 311
312 312 static void
313 313 nfs4_ephemeral_tree_hold(nfs4_ephemeral_tree_t *net)
314 314 {
315 315 mutex_enter(&net->net_cnt_lock);
316 316 nfs4_ephemeral_tree_incr(net);
317 317 mutex_exit(&net->net_cnt_lock);
318 318 }
319 319
320 320 /*
321 321 * We need a safe way to decrement the refcnt whilst the
322 322 * lock is being held.
323 323 */
324 324 static void
325 325 nfs4_ephemeral_tree_decr(nfs4_ephemeral_tree_t *net)
326 326 {
327 327 ASSERT(mutex_owned(&net->net_cnt_lock));
328 328 ASSERT(net->net_refcnt != 0);
329 329 net->net_refcnt--;
330 330 }
331 331
332 332 static void
333 333 nfs4_ephemeral_tree_rele(nfs4_ephemeral_tree_t *net)
334 334 {
335 335 mutex_enter(&net->net_cnt_lock);
336 336 nfs4_ephemeral_tree_decr(net);
337 337 mutex_exit(&net->net_cnt_lock);
338 338 }
339 339
340 340 /*
341 341 * Trigger ops for stub vnodes; for mirror mounts, etc.
342 342 *
343 343 * The general idea is that a "triggering" op will first call
344 344 * nfs4_trigger_mount(), which will find out whether a mount has already
345 345 * been triggered.
346 346 *
347 347 * If it has, then nfs4_trigger_mount() sets newvp to the root vnode
348 348 * of the covering vfs.
349 349 *
350 350 * If a mount has not yet been triggered, nfs4_trigger_mount() will do so,
351 351 * and again set newvp, as above.
352 352 *
353 353 * The triggering op may then re-issue the VOP by calling it on newvp.
354 354 *
355 355 * Note that some ops may perform custom action, and may or may not need
356 356 * to trigger a mount.
357 357 *
358 358 * Some ops need to call the regular NFSv4 vnodeop for a stub vnode. We
359 359 * obviously can't do this with VOP_<whatever>, since it's a stub vnode
360 360 * and that would just recurse. Instead, we call the v4 op directly,
361 361 * by name. This is OK, since we know that the vnode is for NFSv4,
362 362 * otherwise it couldn't be a stub.
363 363 *
364 364 */
365 365
366 366 static int
367 367 nfs4_trigger_open(vnode_t **vpp, int flag, cred_t *cr, caller_context_t *ct)
368 368 {
369 369 int error;
370 370 vnode_t *newvp;
371 371
372 372 error = nfs4_trigger_mount(*vpp, cr, &newvp);
373 373 if (error)
374 374 return (error);
375 375
376 376 /* Release the stub vnode, as we're losing the reference to it */
377 377 VN_RELE(*vpp);
378 378
379 379 /* Give the caller the root vnode of the newly-mounted fs */
380 380 *vpp = newvp;
381 381
382 382 /* return with VN_HELD(newvp) */
383 383 return (VOP_OPEN(vpp, flag, cr, ct));
384 384 }
385 385
386 386 void
387 387 nfs4_fake_attrs(vnode_t *vp, struct vattr *vap)
388 388 {
389 389 uint_t mask;
390 390 timespec_t now;
391 391
392 392 /*
393 393 * Set some attributes here for referrals.
394 394 */
395 395 mask = vap->va_mask;
396 396 bzero(vap, sizeof (struct vattr));
397 397 vap->va_mask = mask;
398 398 vap->va_uid = 0;
399 399 vap->va_gid = 0;
400 400 vap->va_nlink = 1;
401 401 vap->va_size = 1;
402 402 gethrestime(&now);
403 403 vap->va_atime = now;
404 404 vap->va_mtime = now;
405 405 vap->va_ctime = now;
406 406 vap->va_type = VDIR;
407 407 vap->va_mode = 0555;
408 408 vap->va_fsid = vp->v_vfsp->vfs_dev;
409 409 vap->va_rdev = 0;
410 410 vap->va_blksize = MAXBSIZE;
411 411 vap->va_nblocks = 1;
412 412 vap->va_seq = 0;
413 413 }
414 414
415 415 /*
416 416 * For the majority of cases, nfs4_trigger_getattr() will not trigger
417 417 * a mount. However, if ATTR_TRIGGER is set, we are being informed
418 418 * that we need to force the mount before we attempt to determine
419 419 * the attributes. The intent is an atomic operation for security
420 420 * testing.
421 421 *
422 422 * If we're not triggering a mount, we can still inquire about the
423 423 * actual attributes from the server in the mirror mount case,
424 424 * and will return manufactured attributes for a referral (see
425 425 * the 'create' branch of find_referral_stubvp()).
426 426 */
427 427 static int
428 428 nfs4_trigger_getattr(vnode_t *vp, struct vattr *vap, int flags, cred_t *cr,
429 429 caller_context_t *ct)
430 430 {
431 431 int error;
432 432
433 433 if (flags & ATTR_TRIGGER) {
434 434 vnode_t *newvp;
435 435
436 436 error = nfs4_trigger_mount(vp, cr, &newvp);
437 437 if (error)
438 438 return (error);
439 439
440 440 error = VOP_GETATTR(newvp, vap, flags, cr, ct);
441 441 VN_RELE(newvp);
442 442
443 443 } else if (RP_ISSTUB_MIRRORMOUNT(VTOR4(vp))) {
444 444
445 445 error = nfs4_getattr(vp, vap, flags, cr, ct);
446 446
447 447 } else if (RP_ISSTUB_REFERRAL(VTOR4(vp))) {
448 448
449 449 nfs4_fake_attrs(vp, vap);
450 450 error = 0;
451 451 }
452 452
453 453 return (error);
454 454 }
455 455
456 456 static int
457 457 nfs4_trigger_setattr(vnode_t *vp, struct vattr *vap, int flags, cred_t *cr,
458 458 caller_context_t *ct)
459 459 {
460 460 int error;
461 461 vnode_t *newvp;
462 462
463 463 error = nfs4_trigger_mount(vp, cr, &newvp);
464 464 if (error)
465 465 return (error);
466 466
467 467 error = VOP_SETATTR(newvp, vap, flags, cr, ct);
468 468 VN_RELE(newvp);
469 469
470 470 return (error);
471 471 }
472 472
473 473 static int
474 474 nfs4_trigger_access(vnode_t *vp, int mode, int flags, cred_t *cr,
475 475 caller_context_t *ct)
476 476 {
477 477 int error;
478 478 vnode_t *newvp;
479 479
480 480 error = nfs4_trigger_mount(vp, cr, &newvp);
481 481 if (error)
482 482 return (error);
483 483
484 484 error = VOP_ACCESS(newvp, mode, flags, cr, ct);
485 485 VN_RELE(newvp);
486 486
487 487 return (error);
488 488 }
489 489
490 490 static int
491 491 nfs4_trigger_lookup(vnode_t *dvp, char *nm, vnode_t **vpp,
492 492 struct pathname *pnp, int flags, vnode_t *rdir, cred_t *cr,
493 493 caller_context_t *ct, int *deflags, pathname_t *rpnp)
494 494 {
495 495 int error;
496 496 vnode_t *newdvp;
497 497 rnode4_t *drp = VTOR4(dvp);
498 498
499 499 ASSERT(RP_ISSTUB(drp));
500 500
501 501 /*
502 502 * It's not legal to lookup ".." for an fs root, so we mustn't pass
503 503 * that up. Instead, pass onto the regular op, regardless of whether
504 504 * we've triggered a mount.
505 505 */
506 506 if (strcmp(nm, "..") == 0)
507 507 if (RP_ISSTUB_MIRRORMOUNT(drp)) {
508 508 return (nfs4_lookup(dvp, nm, vpp, pnp, flags, rdir, cr,
509 509 ct, deflags, rpnp));
510 510 } else if (RP_ISSTUB_REFERRAL(drp)) {
511 511 /* Return the parent vnode */
512 512 return (vtodv(dvp, vpp, cr, TRUE));
513 513 }
514 514
515 515 error = nfs4_trigger_mount(dvp, cr, &newdvp);
516 516 if (error)
517 517 return (error);
518 518
519 519 error = VOP_LOOKUP(newdvp, nm, vpp, pnp, flags, rdir, cr, ct,
520 520 deflags, rpnp);
521 521 VN_RELE(newdvp);
522 522
523 523 return (error);
524 524 }
525 525
526 526 static int
527 527 nfs4_trigger_create(vnode_t *dvp, char *nm, struct vattr *va,
528 528 enum vcexcl exclusive, int mode, vnode_t **vpp, cred_t *cr,
529 529 int flags, caller_context_t *ct, vsecattr_t *vsecp)
530 530 {
531 531 int error;
532 532 vnode_t *newdvp;
533 533
534 534 error = nfs4_trigger_mount(dvp, cr, &newdvp);
535 535 if (error)
536 536 return (error);
537 537
538 538 error = VOP_CREATE(newdvp, nm, va, exclusive, mode, vpp, cr,
539 539 flags, ct, vsecp);
540 540 VN_RELE(newdvp);
541 541
542 542 return (error);
543 543 }
544 544
545 545 static int
546 546 nfs4_trigger_remove(vnode_t *dvp, char *nm, cred_t *cr, caller_context_t *ct,
547 547 int flags)
548 548 {
549 549 int error;
550 550 vnode_t *newdvp;
551 551
552 552 error = nfs4_trigger_mount(dvp, cr, &newdvp);
553 553 if (error)
554 554 return (error);
555 555
556 556 error = VOP_REMOVE(newdvp, nm, cr, ct, flags);
557 557 VN_RELE(newdvp);
558 558
559 559 return (error);
560 560 }
561 561
562 562 static int
563 563 nfs4_trigger_link(vnode_t *tdvp, vnode_t *svp, char *tnm, cred_t *cr,
564 564 caller_context_t *ct, int flags)
565 565 {
566 566 int error;
567 567 vnode_t *newtdvp;
568 568
569 569 error = nfs4_trigger_mount(tdvp, cr, &newtdvp);
570 570 if (error)
571 571 return (error);
572 572
573 573 /*
574 574 * We don't check whether svp is a stub. Let the NFSv4 code
575 575 * detect that error, and return accordingly.
576 576 */
577 577 error = VOP_LINK(newtdvp, svp, tnm, cr, ct, flags);
578 578 VN_RELE(newtdvp);
579 579
580 580 return (error);
581 581 }
582 582
583 583 static int
584 584 nfs4_trigger_rename(vnode_t *sdvp, char *snm, vnode_t *tdvp, char *tnm,
585 585 cred_t *cr, caller_context_t *ct, int flags)
586 586 {
587 587 int error;
588 588 vnode_t *newsdvp;
589 589 rnode4_t *tdrp = VTOR4(tdvp);
590 590
591 591 /*
592 592 * We know that sdvp is a stub, otherwise we would not be here.
593 593 *
594 594 * If tdvp is also be a stub, there are two possibilities: it
595 595 * is either the same stub as sdvp [i.e. VN_CMP(sdvp, tdvp)]
596 596 * or it is a different stub [!VN_CMP(sdvp, tdvp)].
597 597 *
598 598 * In the former case, just trigger sdvp, and treat tdvp as
599 599 * though it were not a stub.
600 600 *
601 601 * In the latter case, it might be a different stub for the
602 602 * same server fs as sdvp, or for a different server fs.
603 603 * Regardless, from the client perspective this would still
604 604 * be a cross-filesystem rename, and should not be allowed,
605 605 * so return EXDEV, without triggering either mount.
606 606 */
607 607 if (RP_ISSTUB(tdrp) && !VN_CMP(sdvp, tdvp))
608 608 return (EXDEV);
609 609
610 610 error = nfs4_trigger_mount(sdvp, cr, &newsdvp);
611 611 if (error)
612 612 return (error);
613 613
614 614 error = VOP_RENAME(newsdvp, snm, tdvp, tnm, cr, ct, flags);
615 615
616 616 VN_RELE(newsdvp);
617 617
618 618 return (error);
619 619 }
620 620
621 621 /* ARGSUSED */
622 622 static int
623 623 nfs4_trigger_mkdir(vnode_t *dvp, char *nm, struct vattr *va, vnode_t **vpp,
624 624 cred_t *cr, caller_context_t *ct, int flags, vsecattr_t *vsecp)
625 625 {
626 626 int error;
627 627 vnode_t *newdvp;
628 628
629 629 error = nfs4_trigger_mount(dvp, cr, &newdvp);
630 630 if (error)
631 631 return (error);
632 632
633 633 error = VOP_MKDIR(newdvp, nm, va, vpp, cr, ct, flags, vsecp);
634 634 VN_RELE(newdvp);
635 635
636 636 return (error);
637 637 }
638 638
639 639 static int
640 640 nfs4_trigger_rmdir(vnode_t *dvp, char *nm, vnode_t *cdir, cred_t *cr,
641 641 caller_context_t *ct, int flags)
642 642 {
643 643 int error;
644 644 vnode_t *newdvp;
645 645
646 646 error = nfs4_trigger_mount(dvp, cr, &newdvp);
647 647 if (error)
648 648 return (error);
649 649
650 650 error = VOP_RMDIR(newdvp, nm, cdir, cr, ct, flags);
651 651 VN_RELE(newdvp);
652 652
653 653 return (error);
654 654 }
655 655
656 656 static int
657 657 nfs4_trigger_symlink(vnode_t *dvp, char *lnm, struct vattr *tva, char *tnm,
658 658 cred_t *cr, caller_context_t *ct, int flags)
659 659 {
660 660 int error;
661 661 vnode_t *newdvp;
662 662
663 663 error = nfs4_trigger_mount(dvp, cr, &newdvp);
664 664 if (error)
665 665 return (error);
666 666
667 667 error = VOP_SYMLINK(newdvp, lnm, tva, tnm, cr, ct, flags);
668 668 VN_RELE(newdvp);
669 669
670 670 return (error);
671 671 }
672 672
673 673 static int
674 674 nfs4_trigger_readlink(vnode_t *vp, struct uio *uiop, cred_t *cr,
675 675 caller_context_t *ct)
676 676 {
677 677 int error;
678 678 vnode_t *newvp;
679 679
680 680 error = nfs4_trigger_mount(vp, cr, &newvp);
681 681 if (error)
682 682 return (error);
683 683
684 684 error = VOP_READLINK(newvp, uiop, cr, ct);
685 685 VN_RELE(newvp);
686 686
687 687 return (error);
688 688 }
689 689
690 690 /* end of trigger vnode ops */
691 691
692 692 /*
693 693 * See if the mount has already been done by another caller.
694 694 */
695 695 static int
696 696 nfs4_trigger_mounted_already(vnode_t *vp, vnode_t **newvpp,
697 697 bool_t *was_mounted, vfs_t **vfsp)
698 698 {
699 699 int error;
700 700 mntinfo4_t *mi = VTOMI4(vp);
701 701
702 702 *was_mounted = FALSE;
703 703
704 704 error = vn_vfsrlock_wait(vp);
705 705 if (error)
706 706 return (error);
707 707
708 708 *vfsp = vn_mountedvfs(vp);
709 709 if (*vfsp != NULL) {
710 710 /* the mount has already occurred */
711 711 error = VFS_ROOT(*vfsp, newvpp);
712 712 if (!error) {
713 713 /* need to update the reference time */
714 714 mutex_enter(&mi->mi_lock);
715 715 if (mi->mi_ephemeral)
716 716 mi->mi_ephemeral->ne_ref_time =
717 717 gethrestime_sec();
718 718 mutex_exit(&mi->mi_lock);
719 719
720 720 *was_mounted = TRUE;
721 721 }
722 722 }
723 723
724 724 vn_vfsunlock(vp);
725 725 return (0);
726 726 }
727 727
728 728 /*
729 729 * Mount upon a trigger vnode; for mirror-mounts, referrals, etc.
730 730 *
731 731 * The mount may have already occurred, via another thread. If not,
732 732 * assemble the location information - which may require fetching - and
733 733 * perform the mount.
734 734 *
735 735 * Sets newvp to be the root of the fs that is now covering vp. Note
736 736 * that we return with VN_HELD(*newvp).
737 737 *
738 738 * The caller is responsible for passing the VOP onto the covering fs.
739 739 */
740 740 static int
741 741 nfs4_trigger_mount(vnode_t *vp, cred_t *cr, vnode_t **newvpp)
742 742 {
743 743 int error;
744 744 vfs_t *vfsp;
745 745 rnode4_t *rp = VTOR4(vp);
746 746 mntinfo4_t *mi = VTOMI4(vp);
747 747 domount_args_t *dma;
748 748
749 749 nfs4_ephemeral_tree_t *net;
750 750
751 751 bool_t must_unlock = FALSE;
752 752 bool_t is_building = FALSE;
753 753 bool_t was_mounted = FALSE;
754 754
755 755 cred_t *mcred = NULL;
756 756
757 757 nfs4_trigger_globals_t *ntg;
758 758
759 759 zone_t *zone = curproc->p_zone;
760 760
761 761 ASSERT(RP_ISSTUB(rp));
762 762
763 763 *newvpp = NULL;
764 764
765 765 /*
766 766 * Has the mount already occurred?
767 767 */
768 768 error = nfs4_trigger_mounted_already(vp, newvpp,
769 769 &was_mounted, &vfsp);
770 770 if (error || was_mounted)
771 771 goto done;
772 772
773 773 ntg = zone_getspecific(nfs4_ephemeral_key, zone);
774 774 ASSERT(ntg != NULL);
775 775
776 776 mutex_enter(&mi->mi_lock);
777 777
778 778 /*
779 779 * We need to lock down the ephemeral tree.
780 780 */
781 781 if (mi->mi_ephemeral_tree == NULL) {
782 782 net = kmem_zalloc(sizeof (*net), KM_SLEEP);
783 783 mutex_init(&net->net_tree_lock, NULL, MUTEX_DEFAULT, NULL);
784 784 mutex_init(&net->net_cnt_lock, NULL, MUTEX_DEFAULT, NULL);
785 785 net->net_refcnt = 1;
786 786 net->net_status = NFS4_EPHEMERAL_TREE_BUILDING;
787 787 is_building = TRUE;
788 788
789 789 /*
790 790 * We need to add it to the zone specific list for
791 791 * automatic unmounting and harvesting of deadwood.
792 792 */
793 793 mutex_enter(&ntg->ntg_forest_lock);
794 794 if (ntg->ntg_forest != NULL)
795 795 net->net_next = ntg->ntg_forest;
796 796 ntg->ntg_forest = net;
797 797 mutex_exit(&ntg->ntg_forest_lock);
798 798
799 799 /*
800 800 * No lock order confusion with mi_lock because no
801 801 * other node could have grabbed net_tree_lock.
802 802 */
803 803 mutex_enter(&net->net_tree_lock);
804 804 mi->mi_ephemeral_tree = net;
805 805 net->net_mount = mi;
806 806 mutex_exit(&mi->mi_lock);
807 807
808 808 MI4_HOLD(mi);
809 809 VFS_HOLD(mi->mi_vfsp);
810 810 } else {
811 811 net = mi->mi_ephemeral_tree;
812 812 nfs4_ephemeral_tree_hold(net);
813 813
814 814 mutex_exit(&mi->mi_lock);
815 815
816 816 mutex_enter(&net->net_tree_lock);
817 817
818 818 /*
819 819 * We can only procede if the tree is neither locked
820 820 * nor being torn down.
821 821 */
822 822 mutex_enter(&net->net_cnt_lock);
823 823 if (net->net_status & NFS4_EPHEMERAL_TREE_PROCESSING) {
824 824 nfs4_ephemeral_tree_decr(net);
825 825 mutex_exit(&net->net_cnt_lock);
826 826 mutex_exit(&net->net_tree_lock);
827 827
828 828 return (EIO);
829 829 }
830 830 mutex_exit(&net->net_cnt_lock);
831 831 }
832 832
833 833 mutex_enter(&net->net_cnt_lock);
834 834 net->net_status |= NFS4_EPHEMERAL_TREE_MOUNTING;
835 835 mutex_exit(&net->net_cnt_lock);
836 836
837 837 must_unlock = TRUE;
838 838
839 839 error = nfs4_trigger_domount_args_create(vp, cr, &dma);
840 840 if (error)
841 841 goto done;
842 842
843 843 /*
844 844 * Note that since we define mirror mounts to work
845 845 * for any user, we simply extend the privileges of
846 846 * the user's credentials to allow the mount to
847 847 * proceed.
848 848 */
849 849 mcred = crdup(cr);
850 850 if (mcred == NULL) {
851 851 error = EINVAL;
852 852 nfs4_trigger_domount_args_destroy(dma, vp);
853 853 goto done;
854 854 }
855 855
856 856 crset_zone_privall(mcred);
857 857 if (is_system_labeled())
858 858 (void) setpflags(NET_MAC_AWARE, 1, mcred);
859 859
860 860 error = nfs4_trigger_domount(vp, dma, &vfsp, mcred, newvpp);
861 861 nfs4_trigger_domount_args_destroy(dma, vp);
862 862
863 863 DTRACE_PROBE2(nfs4clnt__func__referral__mount,
864 864 vnode_t *, vp, int, error);
865 865
866 866 crfree(mcred);
867 867
868 868 done:
869 869
870 870 if (must_unlock) {
871 871 mutex_enter(&net->net_cnt_lock);
872 872 net->net_status &= ~NFS4_EPHEMERAL_TREE_MOUNTING;
873 873
874 874 /*
875 875 * REFCNT: If we are the root of the tree, then we need
876 876 * to keep a reference because we malloced the tree and
877 877 * this is where we tied it to our mntinfo.
878 878 *
879 879 * If we are not the root of the tree, then our tie to
880 880 * the mntinfo occured elsewhere and we need to
881 881 * decrement the reference to the tree.
882 882 */
883 883 if (is_building)
884 884 net->net_status &= ~NFS4_EPHEMERAL_TREE_BUILDING;
885 885 else
886 886 nfs4_ephemeral_tree_decr(net);
887 887 mutex_exit(&net->net_cnt_lock);
888 888
889 889 mutex_exit(&net->net_tree_lock);
890 890 }
891 891
892 892 if (!error && (newvpp == NULL || *newvpp == NULL))
893 893 error = ENOSYS;
894 894
895 895 return (error);
896 896 }
897 897
898 898 /*
899 899 * Collect together both the generic & mount-type specific args.
900 900 */
901 901 static int
902 902 nfs4_trigger_domount_args_create(vnode_t *vp, cred_t *cr, domount_args_t **dmap)
903 903 {
904 904 int nointr;
905 905 char *hostlist;
906 906 servinfo4_t *svp;
907 907 struct nfs_args *nargs, *nargs_head;
908 908 enum clnt_stat status;
909 909 ephemeral_servinfo_t *esi, *esi_first;
910 910 domount_args_t *dma;
911 911 mntinfo4_t *mi = VTOMI4(vp);
912 912
913 913 nointr = !(mi->mi_flags & MI4_INT);
914 914 hostlist = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
915 915
916 916 svp = mi->mi_curr_serv;
917 917 /* check if the current server is responding */
918 918 status = nfs4_trigger_ping_server(svp, nointr);
919 919 if (status == RPC_SUCCESS) {
920 920 esi_first = nfs4_trigger_esi_create(vp, svp, cr);
921 921 if (esi_first == NULL) {
922 922 kmem_free(hostlist, MAXPATHLEN);
923 923 return (EINVAL);
924 924 }
925 925
926 926 (void) strlcpy(hostlist, esi_first->esi_hostname, MAXPATHLEN);
927 927
928 928 nargs_head = nfs4_trigger_nargs_create(mi, svp, esi_first);
929 929 } else {
930 930 /* current server did not respond */
931 931 esi_first = NULL;
932 932 nargs_head = NULL;
933 933 }
934 934 nargs = nargs_head;
935 935
936 936 /*
937 937 * NFS RO failover.
938 938 *
939 939 * If we have multiple servinfo4 structures, linked via sv_next,
940 940 * we must create one nfs_args for each, linking the nfs_args via
941 941 * nfs_ext_u.nfs_extB.next.
942 942 *
943 943 * We need to build a corresponding esi for each, too, but that is
944 944 * used solely for building nfs_args, and may be immediately
945 945 * discarded, as domount() requires the info from just one esi,
946 946 * but all the nfs_args.
947 947 *
948 948 * Currently, the NFS mount code will hang if not all servers
949 949 * requested are available. To avoid that, we need to ping each
950 950 * server, here, and remove it from the list if it is not
951 951 * responding. This has the side-effect of that server then
952 952 * being permanently unavailable for this failover mount, even if
953 953 * it recovers. That's unfortunate, but the best we can do until
954 954 * the mount code path is fixed.
955 955 */
956 956
957 957 /*
958 958 * If the current server was down, loop indefinitely until we find
959 959 * at least one responsive server.
960 960 */
961 961 do {
962 962 /* no locking needed for sv_next; it is only set at fs mount */
963 963 for (svp = mi->mi_servers; svp != NULL; svp = svp->sv_next) {
964 964 struct nfs_args *next;
965 965
966 966 /*
967 967 * nargs_head: the head of the nfs_args list
968 968 * nargs: the current tail of the list
969 969 * next: the newly-created element to be added
970 970 */
971 971
972 972 /*
973 973 * We've already tried the current server, above;
974 974 * if it was responding, we have already included it
975 975 * and it may now be ignored.
976 976 *
977 977 * Otherwise, try it again, since it may now have
978 978 * recovered.
979 979 */
980 980 if (svp == mi->mi_curr_serv && esi_first != NULL)
981 981 continue;
982 982
983 983 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0);
984 984 if (svp->sv_flags & SV4_NOTINUSE) {
985 985 nfs_rw_exit(&svp->sv_lock);
986 986 continue;
987 987 }
988 988 nfs_rw_exit(&svp->sv_lock);
989 989
990 990 /* check if the server is responding */
991 991 status = nfs4_trigger_ping_server(svp, nointr);
992 992 if (status == RPC_INTR) {
993 993 kmem_free(hostlist, MAXPATHLEN);
994 994 nfs4_trigger_esi_destroy(esi_first, vp);
995 995 nargs = nargs_head;
996 996 while (nargs != NULL) {
997 997 next = nargs->nfs_ext_u.nfs_extB.next;
998 998 nfs4_trigger_nargs_destroy(nargs);
999 999 nargs = next;
1000 1000 }
1001 1001 return (EINTR);
1002 1002 } else if (status != RPC_SUCCESS) {
1003 1003 /* if the server did not respond, ignore it */
1004 1004 continue;
1005 1005 }
1006 1006
1007 1007 esi = nfs4_trigger_esi_create(vp, svp, cr);
1008 1008 if (esi == NULL)
1009 1009 continue;
1010 1010
1011 1011 /*
1012 1012 * If the original current server (mi_curr_serv)
1013 1013 * was down when when we first tried it,
1014 1014 * (i.e. esi_first == NULL),
1015 1015 * we select this new server (svp) to be the server
1016 1016 * that we will actually contact (esi_first).
1017 1017 *
1018 1018 * Note that it's possible that mi_curr_serv == svp,
1019 1019 * if that mi_curr_serv was down but has now recovered.
1020 1020 */
1021 1021 next = nfs4_trigger_nargs_create(mi, svp, esi);
1022 1022 if (esi_first == NULL) {
1023 1023 ASSERT(nargs == NULL);
1024 1024 ASSERT(nargs_head == NULL);
1025 1025 nargs_head = next;
1026 1026 esi_first = esi;
1027 1027 (void) strlcpy(hostlist,
1028 1028 esi_first->esi_hostname, MAXPATHLEN);
1029 1029 } else {
1030 1030 ASSERT(nargs_head != NULL);
1031 1031 nargs->nfs_ext_u.nfs_extB.next = next;
1032 1032 (void) strlcat(hostlist, ",", MAXPATHLEN);
1033 1033 (void) strlcat(hostlist, esi->esi_hostname,
1034 1034 MAXPATHLEN);
1035 1035 /* esi was only needed for hostname & nargs */
1036 1036 nfs4_trigger_esi_destroy(esi, vp);
1037 1037 }
1038 1038
1039 1039 nargs = next;
1040 1040 }
1041 1041
1042 1042 /* if we've had no response at all, wait a second */
1043 1043 if (esi_first == NULL)
1044 1044 delay(drv_usectohz(1000000));
1045 1045
1046 1046 } while (esi_first == NULL);
1047 1047 ASSERT(nargs_head != NULL);
1048 1048
1049 1049 dma = kmem_zalloc(sizeof (domount_args_t), KM_SLEEP);
1050 1050 dma->dma_esi = esi_first;
1051 1051 dma->dma_hostlist = hostlist;
1052 1052 dma->dma_nargs = nargs_head;
1053 1053 *dmap = dma;
1054 1054
1055 1055 return (0);
1056 1056 }
1057 1057
1058 1058 static void
1059 1059 nfs4_trigger_domount_args_destroy(domount_args_t *dma, vnode_t *vp)
1060 1060 {
1061 1061 if (dma != NULL) {
1062 1062 if (dma->dma_esi != NULL && vp != NULL)
1063 1063 nfs4_trigger_esi_destroy(dma->dma_esi, vp);
1064 1064
1065 1065 if (dma->dma_hostlist != NULL)
1066 1066 kmem_free(dma->dma_hostlist, MAXPATHLEN);
1067 1067
1068 1068 if (dma->dma_nargs != NULL) {
1069 1069 struct nfs_args *nargs = dma->dma_nargs;
1070 1070
1071 1071 do {
1072 1072 struct nfs_args *next =
1073 1073 nargs->nfs_ext_u.nfs_extB.next;
1074 1074
1075 1075 nfs4_trigger_nargs_destroy(nargs);
1076 1076 nargs = next;
1077 1077 } while (nargs != NULL);
1078 1078 }
1079 1079
1080 1080 kmem_free(dma, sizeof (domount_args_t));
1081 1081 }
1082 1082 }
1083 1083
1084 1084 /*
1085 1085 * The ephemeral_servinfo_t struct contains basic information we will need to
1086 1086 * perform the mount. Whilst the structure is generic across different
1087 1087 * types of ephemeral mount, the way we gather its contents differs.
1088 1088 */
1089 1089 static ephemeral_servinfo_t *
1090 1090 nfs4_trigger_esi_create(vnode_t *vp, servinfo4_t *svp, cred_t *cr)
1091 1091 {
1092 1092 ephemeral_servinfo_t *esi;
1093 1093 rnode4_t *rp = VTOR4(vp);
1094 1094
1095 1095 ASSERT(RP_ISSTUB(rp));
1096 1096
1097 1097 /* Call the ephemeral type-specific routine */
1098 1098 if (RP_ISSTUB_MIRRORMOUNT(rp))
1099 1099 esi = nfs4_trigger_esi_create_mirrormount(vp, svp);
1100 1100 else if (RP_ISSTUB_REFERRAL(rp))
1101 1101 esi = nfs4_trigger_esi_create_referral(vp, cr);
1102 1102 else
1103 1103 esi = NULL;
1104 1104 return (esi);
1105 1105 }
1106 1106
1107 1107 static void
1108 1108 nfs4_trigger_esi_destroy(ephemeral_servinfo_t *esi, vnode_t *vp)
1109 1109 {
1110 1110 rnode4_t *rp = VTOR4(vp);
1111 1111
1112 1112 ASSERT(RP_ISSTUB(rp));
1113 1113
1114 1114 /* Currently, no need for an ephemeral type-specific routine */
1115 1115
1116 1116 /*
1117 1117 * The contents of ephemeral_servinfo_t goes into nfs_args,
1118 1118 * and will be handled by nfs4_trigger_nargs_destroy().
1119 1119 * We need only free the structure itself.
1120 1120 */
1121 1121 if (esi != NULL)
1122 1122 kmem_free(esi, sizeof (ephemeral_servinfo_t));
1123 1123 }
1124 1124
1125 1125 /*
1126 1126 * Some of this may turn out to be common with other ephemeral types,
1127 1127 * in which case it should be moved to nfs4_trigger_esi_create(), or a
1128 1128 * common function called.
1129 1129 */
1130 1130
1131 1131 /*
1132 1132 * Mirror mounts case - should have all data available
1133 1133 */
1134 1134 static ephemeral_servinfo_t *
1135 1135 nfs4_trigger_esi_create_mirrormount(vnode_t *vp, servinfo4_t *svp)
1136 1136 {
1137 1137 char *stubpath;
1138 1138 struct knetconfig *sikncp, *svkncp;
1139 1139 struct netbuf *bufp;
1140 1140 ephemeral_servinfo_t *esi;
1141 1141
1142 1142 esi = kmem_zalloc(sizeof (ephemeral_servinfo_t), KM_SLEEP);
1143 1143
1144 1144 /* initially set to be our type of ephemeral mount; may be added to */
1145 1145 esi->esi_mount_flags = NFSMNT_MIRRORMOUNT;
1146 1146
1147 1147 /*
1148 1148 * We're copying info from the stub rnode's servinfo4, but
1149 1149 * we must create new copies, not pointers, since this information
1150 1150 * is to be associated with the new mount, which will be
1151 1151 * unmounted (and its structures freed) separately
1152 1152 */
1153 1153
1154 1154 /*
1155 1155 * Sizes passed to kmem_[z]alloc here must match those freed
1156 1156 * in nfs4_free_args()
1157 1157 */
1158 1158
1159 1159 /*
1160 1160 * We hold sv_lock across kmem_zalloc() calls that may sleep, but this
1161 1161 * is difficult to avoid: as we need to read svp to calculate the
1162 1162 * sizes to be allocated.
1163 1163 */
1164 1164 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0);
1165 1165
1166 1166 esi->esi_hostname = kmem_zalloc(strlen(svp->sv_hostname) + 1, KM_SLEEP);
1167 1167 (void) strcat(esi->esi_hostname, svp->sv_hostname);
1168 1168
1169 1169 esi->esi_addr = kmem_zalloc(sizeof (struct netbuf), KM_SLEEP);
1170 1170 bufp = esi->esi_addr;
1171 1171 bufp->len = svp->sv_addr.len;
1172 1172 bufp->maxlen = svp->sv_addr.maxlen;
1173 1173 bufp->buf = kmem_zalloc(bufp->len, KM_SLEEP);
1174 1174 bcopy(svp->sv_addr.buf, bufp->buf, bufp->len);
1175 1175
1176 1176 esi->esi_knconf = kmem_zalloc(sizeof (*esi->esi_knconf), KM_SLEEP);
1177 1177 sikncp = esi->esi_knconf;
1178 1178 svkncp = svp->sv_knconf;
1179 1179 sikncp->knc_semantics = svkncp->knc_semantics;
1180 1180 sikncp->knc_protofmly = (caddr_t)kmem_zalloc(KNC_STRSIZE, KM_SLEEP);
1181 1181 (void) strcat((char *)sikncp->knc_protofmly,
1182 1182 (char *)svkncp->knc_protofmly);
1183 1183 sikncp->knc_proto = (caddr_t)kmem_zalloc(KNC_STRSIZE, KM_SLEEP);
1184 1184 (void) strcat((char *)sikncp->knc_proto, (char *)svkncp->knc_proto);
1185 1185 sikncp->knc_rdev = svkncp->knc_rdev;
1186 1186
1187 1187 /*
1188 1188 * Used when AUTH_DH is negotiated.
1189 1189 *
1190 1190 * This is ephemeral mount-type specific, since it contains the
1191 1191 * server's time-sync syncaddr.
1192 1192 */
1193 1193 if (svp->sv_dhsec) {
1194 1194 struct netbuf *bufp;
1195 1195 sec_data_t *sdata;
1196 1196 dh_k4_clntdata_t *data;
1197 1197
1198 1198 sdata = svp->sv_dhsec;
1199 1199 data = (dh_k4_clntdata_t *)sdata->data;
1200 1200 ASSERT(sdata->rpcflavor == AUTH_DH);
1201 1201
1202 1202 bufp = kmem_zalloc(sizeof (struct netbuf), KM_SLEEP);
1203 1203 bufp->len = data->syncaddr.len;
1204 1204 bufp->maxlen = data->syncaddr.maxlen;
1205 1205 bufp->buf = kmem_zalloc(bufp->len, KM_SLEEP);
1206 1206 bcopy(data->syncaddr.buf, bufp->buf, bufp->len);
1207 1207 esi->esi_syncaddr = bufp;
1208 1208
1209 1209 if (data->netname != NULL) {
1210 1210 int nmlen = data->netnamelen;
1211 1211
1212 1212 /*
1213 1213 * We need to copy from a dh_k4_clntdata_t
1214 1214 * netname/netnamelen pair to a NUL-terminated
1215 1215 * netname string suitable for putting in nfs_args,
1216 1216 * where the latter has no netnamelen field.
1217 1217 */
1218 1218 esi->esi_netname = kmem_zalloc(nmlen + 1, KM_SLEEP);
1219 1219 bcopy(data->netname, esi->esi_netname, nmlen);
1220 1220 }
1221 1221 } else {
1222 1222 esi->esi_syncaddr = NULL;
1223 1223 esi->esi_netname = NULL;
1224 1224 }
1225 1225
1226 1226 stubpath = fn_path(VTOSV(vp)->sv_name);
1227 1227 /* step over initial '.', to avoid e.g. sv_path: "/tank./ws" */
1228 1228 ASSERT(*stubpath == '.');
1229 1229 stubpath += 1;
1230 1230
1231 1231 /* for nfs_args->fh */
1232 1232 esi->esi_path_len = strlen(stubpath) + 1;
1233 1233 if (strcmp(svp->sv_path, "/") != 0)
1234 1234 esi->esi_path_len += strlen(svp->sv_path);
1235 1235 esi->esi_path = kmem_zalloc(esi->esi_path_len, KM_SLEEP);
1236 1236 if (strcmp(svp->sv_path, "/") != 0)
1237 1237 (void) strcat(esi->esi_path, svp->sv_path);
1238 1238 (void) strcat(esi->esi_path, stubpath);
1239 1239
1240 1240 stubpath -= 1;
1241 1241 /* stubpath allocated by fn_path() */
1242 1242 kmem_free(stubpath, strlen(stubpath) + 1);
1243 1243
1244 1244 nfs_rw_exit(&svp->sv_lock);
1245 1245
1246 1246 return (esi);
1247 1247 }
1248 1248
1249 1249 /*
1250 1250 * Makes an upcall to NFSMAPID daemon to resolve hostname of NFS server to
1251 1251 * get network information required to do the mount call.
1252 1252 */
1253 1253 int
1254 1254 nfs4_callmapid(utf8string *server, struct nfs_fsl_info *resp)
1255 1255 {
1256 1256 door_arg_t door_args;
1257 1257 door_handle_t dh;
1258 1258 XDR xdr;
1259 1259 refd_door_args_t *xdr_argsp;
1260 1260 refd_door_res_t *orig_resp;
1261 1261 k_sigset_t smask;
1262 1262 int xdr_len = 0;
1263 1263 int res_len = 16; /* length of an ip adress */
1264 1264 int orig_reslen = res_len;
1265 1265 int error = 0;
1266 1266 struct nfsidmap_globals *nig;
1267 1267
1268 1268 if (zone_status_get(curproc->p_zone) >= ZONE_IS_SHUTTING_DOWN)
1269 1269 return (ECONNREFUSED);
1270 1270
1271 1271 nig = zone_getspecific(nfsidmap_zone_key, nfs_zone());
1272 1272 ASSERT(nig != NULL);
1273 1273
1274 1274 mutex_enter(&nig->nfsidmap_daemon_lock);
1275 1275 dh = nig->nfsidmap_daemon_dh;
1276 1276 if (dh == NULL) {
1277 1277 mutex_exit(&nig->nfsidmap_daemon_lock);
1278 1278 cmn_err(CE_NOTE,
1279 1279 "nfs4_callmapid: nfsmapid daemon not " \
1280 1280 "running unable to resolve host name\n");
1281 1281 return (EINVAL);
1282 1282 }
1283 1283 door_ki_hold(dh);
1284 1284 mutex_exit(&nig->nfsidmap_daemon_lock);
1285 1285
1286 1286 xdr_len = xdr_sizeof(&(xdr_utf8string), server);
1287 1287
1288 1288 xdr_argsp = kmem_zalloc(xdr_len + sizeof (*xdr_argsp), KM_SLEEP);
1289 1289 xdr_argsp->xdr_len = xdr_len;
1290 1290 xdr_argsp->cmd = NFSMAPID_SRV_NETINFO;
1291 1291
1292 1292 xdrmem_create(&xdr, (char *)&xdr_argsp->xdr_arg,
1293 1293 xdr_len, XDR_ENCODE);
1294 1294
1295 1295 if (!xdr_utf8string(&xdr, server)) {
1296 1296 kmem_free(xdr_argsp, xdr_len + sizeof (*xdr_argsp));
1297 1297 door_ki_rele(dh);
1298 1298 return (1);
1299 1299 }
1300 1300
1301 1301 if (orig_reslen)
1302 1302 orig_resp = kmem_alloc(orig_reslen, KM_SLEEP);
1303 1303
1304 1304 door_args.data_ptr = (char *)xdr_argsp;
1305 1305 door_args.data_size = sizeof (*xdr_argsp) + xdr_argsp->xdr_len;
1306 1306 door_args.desc_ptr = NULL;
1307 1307 door_args.desc_num = 0;
1308 1308 door_args.rbuf = orig_resp ? (char *)orig_resp : NULL;
1309 1309 door_args.rsize = res_len;
1310 1310
1311 1311 sigintr(&smask, 1);
1312 1312 error = door_ki_upcall(dh, &door_args);
1313 1313 sigunintr(&smask);
1314 1314
1315 1315 door_ki_rele(dh);
1316 1316
1317 1317 kmem_free(xdr_argsp, xdr_len + sizeof (*xdr_argsp));
1318 1318 if (error) {
1319 1319 kmem_free(orig_resp, orig_reslen);
1320 1320 /*
1321 1321 * There is no door to connect to. The referral daemon
1322 1322 * must not be running yet.
1323 1323 */
1324 1324 cmn_err(CE_WARN,
1325 1325 "nfsmapid not running cannot resolve host name");
1326 1326 goto out;
1327 1327 }
1328 1328
1329 1329 /*
1330 1330 * If the results buffer passed back are not the same as
1331 1331 * what was sent free the old buffer and use the new one.
1332 1332 */
1333 1333 if (orig_resp && orig_reslen) {
1334 1334 refd_door_res_t *door_resp;
1335 1335
1336 1336 door_resp = (refd_door_res_t *)door_args.rbuf;
1337 1337 if ((void *)door_args.rbuf != orig_resp)
1338 1338 kmem_free(orig_resp, orig_reslen);
1339 1339 if (door_resp->res_status == 0) {
1340 1340 xdrmem_create(&xdr, (char *)&door_resp->xdr_res,
1341 1341 door_resp->xdr_len, XDR_DECODE);
1342 1342 bzero(resp, sizeof (struct nfs_fsl_info));
1343 1343 if (!xdr_nfs_fsl_info(&xdr, resp)) {
1344 1344 DTRACE_PROBE2(
1345 1345 nfs4clnt__debug__referral__upcall__xdrfail,
1346 1346 struct nfs_fsl_info *, resp,
1347 1347 char *, "nfs4_callmapid");
1348 1348 error = EINVAL;
1349 1349 }
1350 1350 } else {
1351 1351 DTRACE_PROBE2(
1352 1352 nfs4clnt__debug__referral__upcall__badstatus,
1353 1353 int, door_resp->res_status,
1354 1354 char *, "nfs4_callmapid");
1355 1355 error = door_resp->res_status;
1356 1356 }
1357 1357 kmem_free(door_args.rbuf, door_args.rsize);
1358 1358 }
1359 1359 out:
1360 1360 DTRACE_PROBE2(nfs4clnt__func__referral__upcall,
1361 1361 char *, server, int, error);
1362 1362 return (error);
1363 1363 }
1364 1364
1365 1365 /*
1366 1366 * Fetches the fs_locations attribute. Typically called
1367 1367 * from a Replication/Migration/Referrals/Mirror-mount context
1368 1368 *
1369 1369 * Fills in the attributes in garp. The caller is assumed
1370 1370 * to have allocated memory for garp.
1371 1371 *
1372 1372 * lock: if set do not lock s_recovlock and mi_recovlock mutex,
1373 1373 * it's already done by caller. Otherwise lock these mutexes
1374 1374 * before doing the rfs4call().
1375 1375 *
1376 1376 * Returns
1377 1377 * 1 for success
1378 1378 * 0 for failure
1379 1379 */
1380 1380 int
1381 1381 nfs4_fetch_locations(mntinfo4_t *mi, nfs4_sharedfh_t *sfh, char *nm,
1382 1382 cred_t *cr, nfs4_ga_res_t *garp, COMPOUND4res_clnt *callres, bool_t lock)
1383 1383 {
1384 1384 COMPOUND4args_clnt args;
1385 1385 COMPOUND4res_clnt res;
1386 1386 nfs_argop4 *argop;
1387 1387 int argoplist_size = 3 * sizeof (nfs_argop4);
1388 1388 nfs4_server_t *sp = NULL;
1389 1389 int doqueue = 1;
1390 1390 nfs4_error_t e = { 0, NFS4_OK, RPC_SUCCESS };
1391 1391 int retval = 1;
1392 1392 struct nfs4_clnt *nfscl;
1393 1393
1394 1394 if (lock == TRUE)
1395 1395 (void) nfs_rw_enter_sig(&mi->mi_recovlock, RW_READER, 0);
1396 1396 else
1397 1397 ASSERT(nfs_rw_lock_held(&mi->mi_recovlock, RW_READER) ||
1398 1398 nfs_rw_lock_held(&mi->mi_recovlock, RW_WRITER));
1399 1399
1400 1400 sp = find_nfs4_server(mi);
1401 1401 if (lock == TRUE)
1402 1402 nfs_rw_exit(&mi->mi_recovlock);
1403 1403
1404 1404 if (sp != NULL)
1405 1405 mutex_exit(&sp->s_lock);
1406 1406
1407 1407 if (lock == TRUE) {
1408 1408 if (sp != NULL)
1409 1409 (void) nfs_rw_enter_sig(&sp->s_recovlock,
1410 1410 RW_WRITER, 0);
1411 1411 (void) nfs_rw_enter_sig(&mi->mi_recovlock, RW_WRITER, 0);
1412 1412 } else {
1413 1413 if (sp != NULL) {
1414 1414 ASSERT(nfs_rw_lock_held(&sp->s_recovlock, RW_READER) ||
1415 1415 nfs_rw_lock_held(&sp->s_recovlock, RW_WRITER));
1416 1416 }
1417 1417 }
1418 1418
1419 1419 /*
1420 1420 * Do we want to do the setup for recovery here?
1421 1421 *
1422 1422 * We know that the server responded to a null ping a very
1423 1423 * short time ago, and we know that we intend to do a
1424 1424 * single stateless operation - we want to fetch attributes,
1425 1425 * so we know we can't encounter errors about state. If
1426 1426 * something goes wrong with the GETATTR, like not being
1427 1427 * able to get a response from the server or getting any
1428 1428 * kind of FH error, we should fail the mount.
1429 1429 *
1430 1430 * We may want to re-visited this at a later time.
1431 1431 */
1432 1432 argop = kmem_alloc(argoplist_size, KM_SLEEP);
1433 1433
1434 1434 args.ctag = TAG_GETATTR_FSLOCATION;
1435 1435 /* PUTFH LOOKUP GETATTR */
1436 1436 args.array_len = 3;
1437 1437 args.array = argop;
1438 1438
1439 1439 /* 0. putfh file */
1440 1440 argop[0].argop = OP_CPUTFH;
1441 1441 argop[0].nfs_argop4_u.opcputfh.sfh = sfh;
1442 1442
1443 1443 /* 1. lookup name, can't be dotdot */
1444 1444 argop[1].argop = OP_CLOOKUP;
1445 1445 argop[1].nfs_argop4_u.opclookup.cname = nm;
1446 1446
1447 1447 /* 2. file attrs */
1448 1448 argop[2].argop = OP_GETATTR;
1449 1449 argop[2].nfs_argop4_u.opgetattr.attr_request =
1450 1450 FATTR4_FSID_MASK | FATTR4_FS_LOCATIONS_MASK |
1451 1451 FATTR4_MOUNTED_ON_FILEID_MASK;
1452 1452 argop[2].nfs_argop4_u.opgetattr.mi = mi;
1453 1453
1454 1454 rfs4call(mi, &args, &res, cr, &doqueue, 0, &e);
1455 1455
1456 1456 if (lock == TRUE) {
1457 1457 nfs_rw_exit(&mi->mi_recovlock);
1458 1458 if (sp != NULL)
1459 1459 nfs_rw_exit(&sp->s_recovlock);
1460 1460 }
1461 1461
1462 1462 nfscl = zone_getspecific(nfs4clnt_zone_key, nfs_zone());
1463 1463 nfscl->nfscl_stat.referrals.value.ui64++;
1464 1464 DTRACE_PROBE3(nfs4clnt__func__referral__fsloc,
1465 1465 nfs4_sharedfh_t *, sfh, char *, nm, nfs4_error_t *, &e);
1466 1466
1467 1467 if (e.error != 0) {
1468 1468 if (sp != NULL)
1469 1469 nfs4_server_rele(sp);
1470 1470 kmem_free(argop, argoplist_size);
1471 1471 return (0);
1472 1472 }
1473 1473
1474 1474 /*
1475 1475 * Check for all possible error conditions.
1476 1476 * For valid replies without an ops array or for illegal
1477 1477 * replies, return a failure.
1478 1478 */
1479 1479 if (res.status != NFS4_OK || res.array_len < 3 ||
1480 1480 res.array[2].nfs_resop4_u.opgetattr.status != NFS4_OK) {
1481 1481 retval = 0;
1482 1482 goto exit;
1483 1483 }
1484 1484
1485 1485 /*
1486 1486 * There isn't much value in putting the attributes
1487 1487 * in the attr cache since fs_locations4 aren't
1488 1488 * encountered very frequently, so just make them
1489 1489 * available to the caller.
1490 1490 */
1491 1491 *garp = res.array[2].nfs_resop4_u.opgetattr.ga_res;
1492 1492
1493 1493 DTRACE_PROBE2(nfs4clnt__debug__referral__fsloc,
1494 1494 nfs4_ga_res_t *, garp, char *, "nfs4_fetch_locations");
1495 1495
1496 1496 /* No fs_locations? -- return a failure */
1497 1497 if (garp->n4g_ext_res == NULL ||
1498 1498 garp->n4g_ext_res->n4g_fslocations.locations_val == NULL) {
1499 1499 retval = 0;
1500 1500 goto exit;
1501 1501 }
1502 1502
1503 1503 if (!garp->n4g_fsid_valid)
1504 1504 retval = 0;
1505 1505
1506 1506 exit:
1507 1507 if (retval == 0) {
1508 1508 /* the call was ok but failed validating the call results */
1509 1509 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res);
1510 1510 } else {
1511 1511 ASSERT(callres != NULL);
1512 1512 *callres = res;
1513 1513 }
1514 1514
1515 1515 if (sp != NULL)
1516 1516 nfs4_server_rele(sp);
1517 1517 kmem_free(argop, argoplist_size);
1518 1518 return (retval);
1519 1519 }
1520 1520
1521 1521 /* tunable to disable referral mounts */
1522 1522 int nfs4_no_referrals = 0;
1523 1523
1524 1524 /*
1525 1525 * Returns NULL if the vnode cannot be created or found.
1526 1526 */
1527 1527 vnode_t *
1528 1528 find_referral_stubvp(vnode_t *dvp, char *nm, cred_t *cr)
1529 1529 {
1530 1530 nfs_fh4 *stub_fh, *dfh;
1531 1531 nfs4_sharedfh_t *sfhp;
1532 1532 char *newfhval;
1533 1533 vnode_t *vp = NULL;
1534 1534 fattr4_mounted_on_fileid mnt_on_fileid;
1535 1535 nfs4_ga_res_t garp;
1536 1536 mntinfo4_t *mi;
1537 1537 COMPOUND4res_clnt callres;
1538 1538 hrtime_t t;
1539 1539
1540 1540 if (nfs4_no_referrals)
1541 1541 return (NULL);
1542 1542
1543 1543 /*
1544 1544 * Get the mounted_on_fileid, unique on that server::fsid
1545 1545 */
1546 1546 mi = VTOMI4(dvp);
1547 1547 if (nfs4_fetch_locations(mi, VTOR4(dvp)->r_fh, nm, cr,
1548 1548 &garp, &callres, FALSE) == 0)
1549 1549 return (NULL);
1550 1550 mnt_on_fileid = garp.n4g_mon_fid;
1551 1551 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&callres);
1552 1552
1553 1553 /*
1554 1554 * Build a fake filehandle from the dir FH and the mounted_on_fileid
1555 1555 */
1556 1556 dfh = &VTOR4(dvp)->r_fh->sfh_fh;
1557 1557 stub_fh = kmem_alloc(sizeof (nfs_fh4), KM_SLEEP);
1558 1558 stub_fh->nfs_fh4_val = kmem_alloc(dfh->nfs_fh4_len +
1559 1559 sizeof (fattr4_mounted_on_fileid), KM_SLEEP);
1560 1560 newfhval = stub_fh->nfs_fh4_val;
1561 1561
1562 1562 /* copy directory's file handle */
1563 1563 bcopy(dfh->nfs_fh4_val, newfhval, dfh->nfs_fh4_len);
1564 1564 stub_fh->nfs_fh4_len = dfh->nfs_fh4_len;
1565 1565 newfhval = newfhval + dfh->nfs_fh4_len;
1566 1566
1567 1567 /* Add mounted_on_fileid. Use bcopy to avoid alignment problem */
1568 1568 bcopy((char *)&mnt_on_fileid, newfhval,
1569 1569 sizeof (fattr4_mounted_on_fileid));
1570 1570 stub_fh->nfs_fh4_len += sizeof (fattr4_mounted_on_fileid);
1571 1571
1572 1572 sfhp = sfh4_put(stub_fh, VTOMI4(dvp), NULL);
1573 1573 kmem_free(stub_fh->nfs_fh4_val, dfh->nfs_fh4_len +
1574 1574 sizeof (fattr4_mounted_on_fileid));
1575 1575 kmem_free(stub_fh, sizeof (nfs_fh4));
1576 1576 if (sfhp == NULL)
1577 1577 return (NULL);
1578 1578
1579 1579 t = gethrtime();
1580 1580 garp.n4g_va.va_type = VDIR;
1581 1581 vp = makenfs4node(sfhp, NULL, dvp->v_vfsp, t,
1582 1582 cr, dvp, fn_get(VTOSV(dvp)->sv_name, nm, sfhp));
1583 1583
1584 1584 if (vp != NULL)
1585 1585 vp->v_type = VDIR;
1586 1586
1587 1587 sfh4_rele(&sfhp);
1588 1588 return (vp);
1589 1589 }
1590 1590
1591 1591 int
1592 1592 nfs4_setup_referral(vnode_t *dvp, char *nm, vnode_t **vpp, cred_t *cr)
1593 1593 {
1594 1594 vnode_t *nvp;
1595 1595 rnode4_t *rp;
1596 1596
1597 1597 if ((nvp = find_referral_stubvp(dvp, nm, cr)) == NULL)
1598 1598 return (EINVAL);
1599 1599
1600 1600 rp = VTOR4(nvp);
1601 1601 mutex_enter(&rp->r_statelock);
1602 1602 r4_stub_referral(rp);
1603 1603 mutex_exit(&rp->r_statelock);
1604 1604 dnlc_enter(dvp, nm, nvp);
1605 1605
1606 1606 if (*vpp != NULL)
1607 1607 VN_RELE(*vpp); /* no longer need this vnode */
1608 1608
1609 1609 *vpp = nvp;
1610 1610
1611 1611 return (0);
1612 1612 }
1613 1613
1614 1614 /*
1615 1615 * Fetch the location information and resolve the new server.
1616 1616 * Caller needs to free up the XDR data which is returned.
1617 1617 * Input: mount info, shared filehandle, nodename
1618 1618 * Return: Index to the result or Error(-1)
1619 1619 * Output: FsLocations Info, Resolved Server Info.
1620 1620 */
1621 1621 int
1622 1622 nfs4_process_referral(mntinfo4_t *mi, nfs4_sharedfh_t *sfh,
1623 1623 char *nm, cred_t *cr, nfs4_ga_res_t *grp, COMPOUND4res_clnt *res,
1624 1624 struct nfs_fsl_info *fsloc)
1625 1625 {
1626 1626 fs_location4 *fsp;
1627 1627 struct nfs_fsl_info nfsfsloc;
1628 1628 int ret, i, error;
1629 1629 nfs4_ga_res_t garp;
1630 1630 COMPOUND4res_clnt callres;
1631 1631 struct knetconfig *knc;
1632 1632
1633 1633 ret = nfs4_fetch_locations(mi, sfh, nm, cr, &garp, &callres, TRUE);
1634 1634 if (ret == 0)
1635 1635 return (-1);
1636 1636
1637 1637 /*
1638 1638 * As a lame attempt to figuring out if we're
1639 1639 * handling a migration event or a referral,
1640 1640 * look for rnodes with this fsid in the rnode
1641 1641 * cache.
1642 1642 *
1643 1643 * If we can find one or more such rnodes, it
1644 1644 * means we're handling a migration event and
1645 1645 * we want to bail out in that case.
1646 1646 */
1647 1647 if (r4find_by_fsid(mi, &garp.n4g_fsid)) {
1648 1648 DTRACE_PROBE3(nfs4clnt__debug__referral__migration,
1649 1649 mntinfo4_t *, mi, nfs4_ga_res_t *, &garp,
1650 1650 char *, "nfs4_process_referral");
1651 1651 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&callres);
1652 1652 return (-1);
1653 1653 }
1654 1654
1655 1655 /*
1656 1656 * Find the first responsive server to mount. When we find
1657 1657 * one, fsp will point to it.
1658 1658 */
1659 1659 for (i = 0; i < garp.n4g_ext_res->n4g_fslocations.locations_len; i++) {
1660 1660
1661 1661 fsp = &garp.n4g_ext_res->n4g_fslocations.locations_val[i];
1662 1662 if (fsp->server_len == 0 || fsp->server_val == NULL)
1663 1663 continue;
1664 1664
1665 1665 error = nfs4_callmapid(fsp->server_val, &nfsfsloc);
1666 1666 if (error != 0)
1667 1667 continue;
1668 1668
1669 1669 error = nfs4_ping_server_common(nfsfsloc.knconf,
1670 1670 nfsfsloc.addr, !(mi->mi_flags & MI4_INT));
1671 1671 if (error == RPC_SUCCESS)
1672 1672 break;
1673 1673
1674 1674 DTRACE_PROBE2(nfs4clnt__debug__referral__srvaddr,
1675 1675 sockaddr_in *, (struct sockaddr_in *)nfsfsloc.addr->buf,
1676 1676 char *, "nfs4_process_referral");
1677 1677
1678 1678 (void) xdr_free(xdr_nfs_fsl_info, (char *)&nfsfsloc);
1679 1679 }
1680 1680 knc = nfsfsloc.knconf;
1681 1681 if ((i >= garp.n4g_ext_res->n4g_fslocations.locations_len) ||
1682 1682 (knc->knc_protofmly == NULL) || (knc->knc_proto == NULL)) {
1683 1683 DTRACE_PROBE2(nfs4clnt__debug__referral__nofsloc,
1684 1684 nfs4_ga_res_t *, &garp, char *, "nfs4_process_referral");
1685 1685 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&callres);
1686 1686 return (-1);
1687 1687 }
1688 1688
1689 1689 /* Send the results back */
1690 1690 *fsloc = nfsfsloc;
1691 1691 *grp = garp;
1692 1692 *res = callres;
1693 1693 return (i);
1694 1694 }
1695 1695
1696 1696 /*
1697 1697 * Referrals case - need to fetch referral data and then upcall to
1698 1698 * user-level to get complete mount data.
1699 1699 */
1700 1700 static ephemeral_servinfo_t *
1701 1701 nfs4_trigger_esi_create_referral(vnode_t *vp, cred_t *cr)
1702 1702 {
1703 1703 struct knetconfig *sikncp, *svkncp;
1704 1704 struct netbuf *bufp;
1705 1705 ephemeral_servinfo_t *esi;
1706 1706 vnode_t *dvp;
1707 1707 rnode4_t *drp;
1708 1708 fs_location4 *fsp;
1709 1709 struct nfs_fsl_info nfsfsloc;
1710 1710 nfs4_ga_res_t garp;
1711 1711 char *p;
1712 1712 char fn[MAXNAMELEN];
1713 1713 int i, index = -1;
1714 1714 mntinfo4_t *mi;
1715 1715 COMPOUND4res_clnt callres;
1716 1716
1717 1717 /*
1718 1718 * If we're passed in a stub vnode that
1719 1719 * isn't a "referral" stub, bail out
1720 1720 * and return a failure
1721 1721 */
1722 1722 if (!RP_ISSTUB_REFERRAL(VTOR4(vp)))
1723 1723 return (NULL);
1724 1724
1725 1725 if (vtodv(vp, &dvp, CRED(), TRUE) != 0)
1726 1726 return (NULL);
1727 1727
1728 1728 drp = VTOR4(dvp);
1729 1729 if (nfs_rw_enter_sig(&drp->r_rwlock, RW_READER, INTR4(dvp))) {
1730 1730 VN_RELE(dvp);
1731 1731 return (NULL);
1732 1732 }
1733 1733
1734 1734 if (vtoname(vp, fn, MAXNAMELEN) != 0) {
1735 1735 nfs_rw_exit(&drp->r_rwlock);
1736 1736 VN_RELE(dvp);
1737 1737 return (NULL);
1738 1738 }
1739 1739
1740 1740 mi = VTOMI4(dvp);
1741 1741 index = nfs4_process_referral(mi, drp->r_fh, fn, cr,
1742 1742 &garp, &callres, &nfsfsloc);
1743 1743 nfs_rw_exit(&drp->r_rwlock);
1744 1744 VN_RELE(dvp);
1745 1745 if (index < 0)
1746 1746 return (NULL);
1747 1747
1748 1748 fsp = &garp.n4g_ext_res->n4g_fslocations.locations_val[index];
1749 1749 esi = kmem_zalloc(sizeof (ephemeral_servinfo_t), KM_SLEEP);
1750 1750
1751 1751 /* initially set to be our type of ephemeral mount; may be added to */
1752 1752 esi->esi_mount_flags = NFSMNT_REFERRAL;
1753 1753
1754 1754 esi->esi_hostname =
1755 1755 kmem_zalloc(fsp->server_val->utf8string_len + 1, KM_SLEEP);
1756 1756 bcopy(fsp->server_val->utf8string_val, esi->esi_hostname,
1757 1757 fsp->server_val->utf8string_len);
1758 1758 esi->esi_hostname[fsp->server_val->utf8string_len] = '\0';
1759 1759
1760 1760 bufp = kmem_alloc(sizeof (struct netbuf), KM_SLEEP);
1761 1761 bufp->len = nfsfsloc.addr->len;
1762 1762 bufp->maxlen = nfsfsloc.addr->maxlen;
1763 1763 bufp->buf = kmem_zalloc(bufp->len, KM_SLEEP);
1764 1764 bcopy(nfsfsloc.addr->buf, bufp->buf, bufp->len);
1765 1765 esi->esi_addr = bufp;
1766 1766
1767 1767 esi->esi_knconf = kmem_zalloc(sizeof (*esi->esi_knconf), KM_SLEEP);
1768 1768 sikncp = esi->esi_knconf;
1769 1769
1770 1770 DTRACE_PROBE2(nfs4clnt__debug__referral__nfsfsloc,
1771 1771 struct nfs_fsl_info *, &nfsfsloc,
1772 1772 char *, "nfs4_trigger_esi_create_referral");
1773 1773
1774 1774 svkncp = nfsfsloc.knconf;
1775 1775 sikncp->knc_semantics = svkncp->knc_semantics;
1776 1776 sikncp->knc_protofmly = (caddr_t)kmem_zalloc(KNC_STRSIZE, KM_SLEEP);
1777 1777 (void) strlcat((char *)sikncp->knc_protofmly,
1778 1778 (char *)svkncp->knc_protofmly, KNC_STRSIZE);
1779 1779 sikncp->knc_proto = (caddr_t)kmem_zalloc(KNC_STRSIZE, KM_SLEEP);
1780 1780 (void) strlcat((char *)sikncp->knc_proto, (char *)svkncp->knc_proto,
1781 1781 KNC_STRSIZE);
1782 1782 sikncp->knc_rdev = svkncp->knc_rdev;
1783 1783
1784 1784 DTRACE_PROBE2(nfs4clnt__debug__referral__knetconf,
1785 1785 struct knetconfig *, sikncp,
1786 1786 char *, "nfs4_trigger_esi_create_referral");
1787 1787
1788 1788 esi->esi_netname = kmem_zalloc(nfsfsloc.netnm_len, KM_SLEEP);
1789 1789 bcopy(nfsfsloc.netname, esi->esi_netname, nfsfsloc.netnm_len);
1790 1790 esi->esi_syncaddr = NULL;
1791 1791
1792 1792 esi->esi_path = p = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
1793 1793 esi->esi_path_len = MAXPATHLEN;
1794 1794 *p++ = '/';
1795 1795 for (i = 0; i < fsp->rootpath.pathname4_len; i++) {
1796 1796 component4 *comp;
1797 1797
1798 1798 comp = &fsp->rootpath.pathname4_val[i];
1799 1799 /* If no space, null the string and bail */
1800 1800 if ((p - esi->esi_path) + comp->utf8string_len + 1 > MAXPATHLEN)
1801 1801 goto err;
1802 1802 bcopy(comp->utf8string_val, p, comp->utf8string_len);
1803 1803 p += comp->utf8string_len;
1804 1804 *p++ = '/';
1805 1805 }
1806 1806 if (fsp->rootpath.pathname4_len != 0)
1807 1807 *(p - 1) = '\0';
1808 1808 else
1809 1809 *p = '\0';
1810 1810 p = esi->esi_path;
1811 1811 esi->esi_path = strdup(p);
1812 1812 esi->esi_path_len = strlen(p) + 1;
1813 1813 kmem_free(p, MAXPATHLEN);
1814 1814
1815 1815 /* Allocated in nfs4_process_referral() */
1816 1816 (void) xdr_free(xdr_nfs_fsl_info, (char *)&nfsfsloc);
1817 1817 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&callres);
1818 1818
1819 1819 return (esi);
1820 1820 err:
1821 1821 kmem_free(esi->esi_path, esi->esi_path_len);
1822 1822 kmem_free(esi->esi_hostname, fsp->server_val->utf8string_len + 1);
1823 1823 kmem_free(esi->esi_addr->buf, esi->esi_addr->len);
1824 1824 kmem_free(esi->esi_addr, sizeof (struct netbuf));
1825 1825 kmem_free(esi->esi_knconf->knc_protofmly, KNC_STRSIZE);
1826 1826 kmem_free(esi->esi_knconf->knc_proto, KNC_STRSIZE);
1827 1827 kmem_free(esi->esi_knconf, sizeof (*esi->esi_knconf));
1828 1828 kmem_free(esi->esi_netname, nfsfsloc.netnm_len);
1829 1829 kmem_free(esi, sizeof (ephemeral_servinfo_t));
1830 1830 (void) xdr_free(xdr_nfs_fsl_info, (char *)&nfsfsloc);
1831 1831 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&callres);
1832 1832 return (NULL);
1833 1833 }
1834 1834
1835 1835 /*
1836 1836 * Assemble the args, and call the generic VFS mount function to
1837 1837 * finally perform the ephemeral mount.
1838 1838 */
1839 1839 static int
1840 1840 nfs4_trigger_domount(vnode_t *stubvp, domount_args_t *dma, vfs_t **vfsp,
1841 1841 cred_t *cr, vnode_t **newvpp)
1842 1842 {
1843 1843 struct mounta *uap;
1844 1844 char *mntpt, *orig_path, *path;
1845 1845 const char *orig_mntpt;
1846 1846 int retval;
1847 1847 int mntpt_len;
1848 1848 int spec_len;
1849 1849 zone_t *zone = curproc->p_zone;
1850 1850 bool_t has_leading_slash;
1851 1851 int i;
1852 1852
1853 1853 vfs_t *stubvfsp = stubvp->v_vfsp;
1854 1854 ephemeral_servinfo_t *esi = dma->dma_esi;
1855 1855 struct nfs_args *nargs = dma->dma_nargs;
1856 1856
1857 1857 /* first, construct the mount point for the ephemeral mount */
1858 1858 orig_path = path = fn_path(VTOSV(stubvp)->sv_name);
1859 1859 orig_mntpt = (char *)refstr_value(stubvfsp->vfs_mntpt);
1860 1860
1861 1861 if (*orig_path == '.')
1862 1862 orig_path++;
1863 1863
1864 1864 /*
1865 1865 * Get rid of zone's root path
1866 1866 */
1867 1867 if (zone != global_zone) {
1868 1868 /*
1869 1869 * -1 for trailing '/' and -1 for EOS.
1870 1870 */
1871 1871 if (strncmp(zone->zone_rootpath, orig_mntpt,
1872 1872 zone->zone_rootpathlen - 1) == 0) {
1873 1873 orig_mntpt += (zone->zone_rootpathlen - 2);
1874 1874 }
1875 1875 }
1876 1876
1877 1877 mntpt_len = strlen(orig_mntpt) + strlen(orig_path);
1878 1878 mntpt = kmem_zalloc(mntpt_len + 1, KM_SLEEP);
1879 1879 (void) strcat(mntpt, orig_mntpt);
1880 1880 (void) strcat(mntpt, orig_path);
1881 1881
1882 1882 kmem_free(path, strlen(path) + 1);
1883 1883 path = esi->esi_path;
1884 1884 if (*path == '.')
1885 1885 path++;
1886 1886 if (path[0] == '/' && path[1] == '/')
1887 1887 path++;
1888 1888 has_leading_slash = (*path == '/');
1889 1889
1890 1890 spec_len = strlen(dma->dma_hostlist);
1891 1891 spec_len += strlen(path);
1892 1892
1893 1893 /* We are going to have to add this in */
1894 1894 if (!has_leading_slash)
1895 1895 spec_len++;
1896 1896
1897 1897 /* We need to get the ':' for dma_hostlist:esi_path */
1898 1898 spec_len++;
1899 1899
1900 1900 uap = kmem_zalloc(sizeof (struct mounta), KM_SLEEP);
1901 1901 uap->spec = kmem_zalloc(spec_len + 1, KM_SLEEP);
1902 1902 (void) snprintf(uap->spec, spec_len + 1, "%s:%s%s", dma->dma_hostlist,
1903 1903 has_leading_slash ? "" : "/", path);
1904 1904
1905 1905 uap->dir = mntpt;
1906 1906
1907 1907 uap->flags = MS_SYSSPACE | MS_DATA;
1908 1908 /* fstype-independent mount options not covered elsewhere */
1909 1909 /* copy parent's mount(1M) "-m" flag */
1910 1910 if (stubvfsp->vfs_flag & VFS_NOMNTTAB)
1911 1911 uap->flags |= MS_NOMNTTAB;
1912 1912
1913 1913 uap->fstype = MNTTYPE_NFS4;
1914 1914 uap->dataptr = (char *)nargs;
1915 1915 /* not needed for MS_SYSSPACE */
1916 1916 uap->datalen = 0;
1917 1917
1918 1918 /* use optptr to pass in extra mount options */
1919 1919 uap->flags |= MS_OPTIONSTR;
1920 1920 uap->optptr = nfs4_trigger_create_mntopts(stubvfsp);
1921 1921 if (uap->optptr == NULL) {
1922 1922 retval = EINVAL;
1923 1923 goto done;
1924 1924 }
1925 1925
1926 1926 /* domount() expects us to count the trailing NUL */
1927 1927 uap->optlen = strlen(uap->optptr) + 1;
1928 1928
1929 1929 /*
1930 1930 * If we get EBUSY, we try again once to see if we can perform
1931 1931 * the mount. We do this because of a spurious race condition.
1932 1932 */
1933 1933 for (i = 0; i < 2; i++) {
1934 1934 int error;
1935 1935 bool_t was_mounted;
1936 1936
1937 1937 retval = domount(NULL, uap, stubvp, cr, vfsp);
1938 1938 if (retval == 0) {
1939 1939 retval = VFS_ROOT(*vfsp, newvpp);
1940 1940 VFS_RELE(*vfsp);
1941 1941 break;
1942 1942 } else if (retval != EBUSY) {
1943 1943 break;
1944 1944 }
1945 1945
1946 1946 /*
1947 1947 * We might find it mounted by the other racer...
1948 1948 */
1949 1949 error = nfs4_trigger_mounted_already(stubvp,
1950 1950 newvpp, &was_mounted, vfsp);
1951 1951 if (error) {
1952 1952 goto done;
1953 1953 } else if (was_mounted) {
1954 1954 retval = 0;
1955 1955 break;
1956 1956 }
1957 1957 }
1958 1958
1959 1959 done:
1960 1960 if (uap->optptr)
1961 1961 nfs4_trigger_destroy_mntopts(uap->optptr);
1962 1962
1963 1963 kmem_free(uap->spec, spec_len + 1);
1964 1964 kmem_free(uap, sizeof (struct mounta));
1965 1965 kmem_free(mntpt, mntpt_len + 1);
1966 1966
1967 1967 return (retval);
1968 1968 }
1969 1969
1970 1970 /*
1971 1971 * Build an nfs_args structure for passing to domount().
1972 1972 *
1973 1973 * Ephemeral mount-type specific data comes from the ephemeral_servinfo_t;
1974 1974 * generic data - common to all ephemeral mount types - is read directly
1975 1975 * from the parent mount's servinfo4_t and mntinfo4_t, via the stub vnode.
1976 1976 */
1977 1977 static struct nfs_args *
1978 1978 nfs4_trigger_nargs_create(mntinfo4_t *mi, servinfo4_t *svp,
1979 1979 ephemeral_servinfo_t *esi)
1980 1980 {
1981 1981 sec_data_t *secdata;
1982 1982 struct nfs_args *nargs;
1983 1983
1984 1984 /* setup the nfs args */
1985 1985 nargs = kmem_zalloc(sizeof (struct nfs_args), KM_SLEEP);
1986 1986
1987 1987 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0);
1988 1988
1989 1989 nargs->addr = esi->esi_addr;
1990 1990
1991 1991 /* for AUTH_DH by negotiation */
1992 1992 if (esi->esi_syncaddr || esi->esi_netname) {
1993 1993 nargs->flags |= NFSMNT_SECURE;
1994 1994 nargs->syncaddr = esi->esi_syncaddr;
1995 1995 nargs->netname = esi->esi_netname;
1996 1996 }
1997 1997
1998 1998 nargs->flags |= NFSMNT_KNCONF;
1999 1999 nargs->knconf = esi->esi_knconf;
2000 2000 nargs->flags |= NFSMNT_HOSTNAME;
2001 2001 nargs->hostname = esi->esi_hostname;
2002 2002 nargs->fh = esi->esi_path;
2003 2003
2004 2004 /* general mount settings, all copied from parent mount */
2005 2005 mutex_enter(&mi->mi_lock);
2006 2006
2007 2007 if (!(mi->mi_flags & MI4_HARD))
2008 2008 nargs->flags |= NFSMNT_SOFT;
2009 2009
2010 2010 nargs->flags |= NFSMNT_WSIZE | NFSMNT_RSIZE | NFSMNT_TIMEO |
2011 2011 NFSMNT_RETRANS;
2012 2012 nargs->wsize = mi->mi_stsize;
2013 2013 nargs->rsize = mi->mi_tsize;
2014 2014 nargs->timeo = mi->mi_timeo;
2015 2015 nargs->retrans = mi->mi_retrans;
2016 2016
2017 2017 if (mi->mi_flags & MI4_INT)
2018 2018 nargs->flags |= NFSMNT_INT;
2019 2019 if (mi->mi_flags & MI4_NOAC)
2020 2020 nargs->flags |= NFSMNT_NOAC;
2021 2021
2022 2022 nargs->flags |= NFSMNT_ACREGMIN | NFSMNT_ACREGMAX | NFSMNT_ACDIRMIN |
2023 2023 NFSMNT_ACDIRMAX;
2024 2024 nargs->acregmin = HR2SEC(mi->mi_acregmin);
2025 2025 nargs->acregmax = HR2SEC(mi->mi_acregmax);
2026 2026 nargs->acdirmin = HR2SEC(mi->mi_acdirmin);
2027 2027 nargs->acdirmax = HR2SEC(mi->mi_acdirmax);
2028 2028
2029 2029 /* add any specific flags for this type of ephemeral mount */
2030 2030 nargs->flags |= esi->esi_mount_flags;
2031 2031
2032 2032 if (mi->mi_flags & MI4_NOCTO)
2033 2033 nargs->flags |= NFSMNT_NOCTO;
2034 2034 if (mi->mi_flags & MI4_GRPID)
2035 2035 nargs->flags |= NFSMNT_GRPID;
2036 2036 if (mi->mi_flags & MI4_LLOCK)
2037 2037 nargs->flags |= NFSMNT_LLOCK;
2038 2038 if (mi->mi_flags & MI4_NOPRINT)
2039 2039 nargs->flags |= NFSMNT_NOPRINT;
2040 2040 if (mi->mi_flags & MI4_DIRECTIO)
2041 2041 nargs->flags |= NFSMNT_DIRECTIO;
2042 2042 if (mi->mi_flags & MI4_PUBLIC && nargs->flags & NFSMNT_MIRRORMOUNT)
2043 2043 nargs->flags |= NFSMNT_PUBLIC;
2044 2044
2045 2045 /* Do some referral-specific option tweaking */
2046 2046 if (nargs->flags & NFSMNT_REFERRAL) {
2047 2047 nargs->flags &= ~NFSMNT_DORDMA;
2048 2048 nargs->flags |= NFSMNT_TRYRDMA;
2049 2049 }
2050 2050
2051 2051 mutex_exit(&mi->mi_lock);
2052 2052
2053 2053 /*
2054 2054 * Security data & negotiation policy.
2055 2055 *
2056 2056 * For mirror mounts, we need to preserve the parent mount's
2057 2057 * preference for security negotiation, translating SV4_TRYSECDEFAULT
2058 2058 * to NFSMNT_SECDEFAULT if present.
2059 2059 *
2060 2060 * For referrals, we always want security negotiation and will
2061 2061 * set NFSMNT_SECDEFAULT and we will not copy current secdata.
2062 2062 * The reason is that we can't negotiate down from a parent's
2063 2063 * Kerberos flavor to AUTH_SYS.
2064 2064 *
2065 2065 * If SV4_TRYSECDEFAULT is not set, that indicates that a specific
2066 2066 * security flavour was requested, with data in sv_secdata, and that
2067 2067 * no negotiation should occur. If this specified flavour fails, that's
2068 2068 * it. We will copy sv_secdata, and not set NFSMNT_SECDEFAULT.
2069 2069 *
2070 2070 * If SV4_TRYSECDEFAULT is set, then we start with a passed-in
2071 2071 * default flavour, in sv_secdata, but then negotiate a new flavour.
2072 2072 * Possible flavours are recorded in an array in sv_secinfo, with
2073 2073 * currently in-use flavour pointed to by sv_currsec.
2074 2074 *
2075 2075 * If sv_currsec is set, i.e. if negotiation has already occurred,
2076 2076 * we will copy sv_currsec. Otherwise, copy sv_secdata. Regardless,
2077 2077 * we will set NFSMNT_SECDEFAULT, to enable negotiation.
2078 2078 */
2079 2079 if (nargs->flags & NFSMNT_REFERRAL) {
2080 2080 /* enable negotiation for referral mount */
2081 2081 nargs->flags |= NFSMNT_SECDEFAULT;
2082 2082 secdata = kmem_alloc(sizeof (sec_data_t), KM_SLEEP);
2083 2083 secdata->secmod = secdata->rpcflavor = AUTH_SYS;
2084 2084 secdata->data = NULL;
2085 2085 } else if (svp->sv_flags & SV4_TRYSECDEFAULT) {
2086 2086 /* enable negotiation for mirror mount */
2087 2087 nargs->flags |= NFSMNT_SECDEFAULT;
2088 2088
2089 2089 /*
2090 2090 * As a starting point for negotiation, copy parent
2091 2091 * mount's negotiated flavour (sv_currsec) if available,
2092 2092 * or its passed-in flavour (sv_secdata) if not.
2093 2093 */
2094 2094 if (svp->sv_currsec != NULL)
2095 2095 secdata = copy_sec_data(svp->sv_currsec);
2096 2096 else if (svp->sv_secdata != NULL)
2097 2097 secdata = copy_sec_data(svp->sv_secdata);
2098 2098 else
2099 2099 secdata = NULL;
2100 2100 } else {
2101 2101 /* do not enable negotiation; copy parent's passed-in flavour */
2102 2102 if (svp->sv_secdata != NULL)
2103 2103 secdata = copy_sec_data(svp->sv_secdata);
2104 2104 else
2105 2105 secdata = NULL;
2106 2106 }
2107 2107
2108 2108 nfs_rw_exit(&svp->sv_lock);
2109 2109
2110 2110 nargs->flags |= NFSMNT_NEWARGS;
2111 2111 nargs->nfs_args_ext = NFS_ARGS_EXTB;
2112 2112 nargs->nfs_ext_u.nfs_extB.secdata = secdata;
2113 2113
2114 2114 /* for NFS RO failover; caller will set if necessary */
2115 2115 nargs->nfs_ext_u.nfs_extB.next = NULL;
2116 2116
2117 2117 return (nargs);
2118 2118 }
2119 2119
2120 2120 static void
2121 2121 nfs4_trigger_nargs_destroy(struct nfs_args *nargs)
2122 2122 {
2123 2123 /*
2124 2124 * Either the mount failed, in which case the data is not needed, or
2125 2125 * nfs4_mount() has either taken copies of what it needs or,
2126 2126 * where it has merely copied the ptr, it has set *our* ptr to NULL,
2127 2127 * whereby nfs4_free_args() will ignore it.
2128 2128 */
2129 2129 nfs4_free_args(nargs);
2130 2130 kmem_free(nargs, sizeof (struct nfs_args));
2131 2131 }
2132 2132
2133 2133 /*
2134 2134 * When we finally get into the mounting, we need to add this
2135 2135 * node to the ephemeral tree.
2136 2136 *
2137 2137 * This is called from nfs4_mount().
2138 2138 */
2139 2139 int
2140 2140 nfs4_record_ephemeral_mount(mntinfo4_t *mi, vnode_t *mvp)
2141 2141 {
2142 2142 mntinfo4_t *mi_parent;
2143 2143 nfs4_ephemeral_t *eph;
2144 2144 nfs4_ephemeral_tree_t *net;
2145 2145
2146 2146 nfs4_ephemeral_t *prior;
2147 2147 nfs4_ephemeral_t *child;
2148 2148
2149 2149 nfs4_ephemeral_t *peer;
2150 2150
2151 2151 nfs4_trigger_globals_t *ntg;
2152 2152 zone_t *zone = curproc->p_zone;
2153 2153
2154 2154 int rc = 0;
2155 2155
2156 2156 mi_parent = VTOMI4(mvp);
2157 2157
2158 2158 /*
2159 2159 * Get this before grabbing anything else!
2160 2160 */
2161 2161 ntg = zone_getspecific(nfs4_ephemeral_key, zone);
2162 2162 if (!ntg->ntg_thread_started) {
2163 2163 nfs4_ephemeral_start_harvester(ntg);
2164 2164 }
2165 2165
2166 2166 mutex_enter(&mi_parent->mi_lock);
2167 2167 mutex_enter(&mi->mi_lock);
2168 2168
2169 2169 net = mi->mi_ephemeral_tree =
2170 2170 mi_parent->mi_ephemeral_tree;
2171 2171
2172 2172 /*
2173 2173 * If the mi_ephemeral_tree is NULL, then it
2174 2174 * means that either the harvester or a manual
2175 2175 * umount has cleared the tree out right before
2176 2176 * we got here.
2177 2177 *
2178 2178 * There is nothing we can do here, so return
2179 2179 * to the caller and let them decide whether they
2180 2180 * try again.
2181 2181 */
2182 2182 if (net == NULL) {
2183 2183 mutex_exit(&mi->mi_lock);
2184 2184 mutex_exit(&mi_parent->mi_lock);
2185 2185
2186 2186 return (EBUSY);
2187 2187 }
2188 2188
2189 2189 /*
2190 2190 * We've just tied the mntinfo to the tree, so
2191 2191 * now we bump the refcnt and hold it there until
2192 2192 * this mntinfo is removed from the tree.
2193 2193 */
2194 2194 nfs4_ephemeral_tree_hold(net);
2195 2195
2196 2196 /*
2197 2197 * We need to tack together the ephemeral mount
2198 2198 * with this new mntinfo.
2199 2199 */
2200 2200 eph = kmem_zalloc(sizeof (*eph), KM_SLEEP);
2201 2201 eph->ne_mount = mi;
2202 2202 MI4_HOLD(mi);
2203 2203 VFS_HOLD(mi->mi_vfsp);
2204 2204 eph->ne_ref_time = gethrestime_sec();
2205 2205
2206 2206 /*
2207 2207 * We need to tell the ephemeral mount when
2208 2208 * to time out.
2209 2209 */
2210 2210 eph->ne_mount_to = ntg->ntg_mount_to;
2211 2211
2212 2212 mi->mi_ephemeral = eph;
2213 2213
2214 2214 /*
2215 2215 * If the enclosing mntinfo4 is also ephemeral,
2216 2216 * then we need to point to its enclosing parent.
2217 2217 * Else the enclosing mntinfo4 is the enclosing parent.
2218 2218 *
2219 2219 * We also need to weave this ephemeral node
2220 2220 * into the tree.
2221 2221 */
2222 2222 if (mi_parent->mi_flags & MI4_EPHEMERAL) {
2223 2223 /*
2224 2224 * We need to decide if we are
2225 2225 * the root node of this branch
2226 2226 * or if we are a sibling of this
2227 2227 * branch.
2228 2228 */
2229 2229 prior = mi_parent->mi_ephemeral;
2230 2230 if (prior == NULL) {
2231 2231 /*
2232 2232 * Race condition, clean up, and
2233 2233 * let caller handle mntinfo.
2234 2234 */
2235 2235 mi->mi_flags &= ~MI4_EPHEMERAL;
2236 2236 mi->mi_ephemeral = NULL;
2237 2237 kmem_free(eph, sizeof (*eph));
2238 2238 VFS_RELE(mi->mi_vfsp);
2239 2239 MI4_RELE(mi);
2240 2240 nfs4_ephemeral_tree_rele(net);
2241 2241 rc = EBUSY;
2242 2242 } else {
2243 2243 if (prior->ne_child == NULL) {
2244 2244 prior->ne_child = eph;
2245 2245 } else {
2246 2246 child = prior->ne_child;
2247 2247
2248 2248 prior->ne_child = eph;
2249 2249 eph->ne_peer = child;
2250 2250
2251 2251 child->ne_prior = eph;
2252 2252 }
2253 2253
2254 2254 eph->ne_prior = prior;
2255 2255 }
2256 2256 } else {
2257 2257 /*
2258 2258 * The parent mntinfo4 is the non-ephemeral
2259 2259 * root of the ephemeral tree. We
2260 2260 * need to decide if we are the root
2261 2261 * node of that tree or if we are a
2262 2262 * sibling of the root node.
2263 2263 *
2264 2264 * We are the root if there is no
2265 2265 * other node.
2266 2266 */
2267 2267 if (net->net_root == NULL) {
2268 2268 net->net_root = eph;
2269 2269 } else {
2270 2270 eph->ne_peer = peer = net->net_root;
2271 2271 ASSERT(peer != NULL);
2272 2272 net->net_root = eph;
2273 2273
2274 2274 peer->ne_prior = eph;
2275 2275 }
2276 2276
2277 2277 eph->ne_prior = NULL;
2278 2278 }
2279 2279
2280 2280 mutex_exit(&mi->mi_lock);
2281 2281 mutex_exit(&mi_parent->mi_lock);
2282 2282
2283 2283 return (rc);
2284 2284 }
2285 2285
2286 2286 /*
2287 2287 * Commit the changes to the ephemeral tree for removing this node.
2288 2288 */
2289 2289 static void
2290 2290 nfs4_ephemeral_umount_cleanup(nfs4_ephemeral_t *eph)
2291 2291 {
2292 2292 nfs4_ephemeral_t *e = eph;
2293 2293 nfs4_ephemeral_t *peer;
2294 2294 nfs4_ephemeral_t *prior;
2295 2295
2296 2296 peer = eph->ne_peer;
2297 2297 prior = e->ne_prior;
2298 2298
2299 2299 /*
2300 2300 * If this branch root was not the
2301 2301 * tree root, then we need to fix back pointers.
2302 2302 */
2303 2303 if (prior) {
2304 2304 if (prior->ne_child == e) {
2305 2305 prior->ne_child = peer;
2306 2306 } else {
2307 2307 prior->ne_peer = peer;
2308 2308 }
2309 2309
2310 2310 if (peer)
2311 2311 peer->ne_prior = prior;
2312 2312 } else if (peer) {
2313 2313 peer->ne_mount->mi_ephemeral_tree->net_root = peer;
2314 2314 peer->ne_prior = NULL;
2315 2315 } else {
2316 2316 e->ne_mount->mi_ephemeral_tree->net_root = NULL;
2317 2317 }
2318 2318 }
2319 2319
2320 2320 /*
2321 2321 * We want to avoid recursion at all costs. So we need to
2322 2322 * unroll the tree. We do this by a depth first traversal to
2323 2323 * leaf nodes. We blast away the leaf and work our way back
2324 2324 * up and down the tree.
2325 2325 */
2326 2326 static int
2327 2327 nfs4_ephemeral_unmount_engine(nfs4_ephemeral_t *eph,
2328 2328 int isTreeRoot, int flag, cred_t *cr)
2329 2329 {
2330 2330 nfs4_ephemeral_t *e = eph;
2331 2331 nfs4_ephemeral_t *prior;
2332 2332 mntinfo4_t *mi;
2333 2333 vfs_t *vfsp;
2334 2334 int error;
2335 2335
2336 2336 /*
2337 2337 * We use the loop while unrolling the ephemeral tree.
2338 2338 */
2339 2339 for (;;) {
2340 2340 /*
2341 2341 * First we walk down the child.
2342 2342 */
2343 2343 if (e->ne_child) {
2344 2344 prior = e;
2345 2345 e = e->ne_child;
2346 2346 continue;
2347 2347 }
2348 2348
2349 2349 /*
2350 2350 * If we are the root of the branch we are removing,
2351 2351 * we end it here. But if the branch is the root of
2352 2352 * the tree, we have to forge on. We do not consider
2353 2353 * the peer list for the root because while it may
2354 2354 * be okay to remove, it is both extra work and a
2355 2355 * potential for a false-positive error to stall the
2356 2356 * unmount attempt.
2357 2357 */
2358 2358 if (e == eph && isTreeRoot == FALSE)
2359 2359 return (0);
2360 2360
2361 2361 /*
2362 2362 * Next we walk down the peer list.
2363 2363 */
2364 2364 if (e->ne_peer) {
2365 2365 prior = e;
2366 2366 e = e->ne_peer;
2367 2367 continue;
2368 2368 }
2369 2369
2370 2370 /*
2371 2371 * We can only remove the node passed in by the
2372 2372 * caller if it is the root of the ephemeral tree.
2373 2373 * Otherwise, the caller will remove it.
2374 2374 */
2375 2375 if (e == eph && isTreeRoot == FALSE)
2376 2376 return (0);
2377 2377
2378 2378 /*
2379 2379 * Okay, we have a leaf node, time
2380 2380 * to prune it!
2381 2381 *
2382 2382 * Note that prior can only be NULL if
2383 2383 * and only if it is the root of the
2384 2384 * ephemeral tree.
2385 2385 */
2386 2386 prior = e->ne_prior;
2387 2387
2388 2388 mi = e->ne_mount;
2389 2389 mutex_enter(&mi->mi_lock);
2390 2390 vfsp = mi->mi_vfsp;
2391 2391 ASSERT(vfsp != NULL);
2392 2392
2393 2393 /*
2394 2394 * Cleared by umount2_engine.
2395 2395 */
2396 2396 VFS_HOLD(vfsp);
2397 2397
2398 2398 /*
2399 2399 * Inform nfs4_unmount to not recursively
2400 2400 * descend into this node's children when it
2401 2401 * gets processed.
2402 2402 */
2403 2403 mi->mi_flags |= MI4_EPHEMERAL_RECURSED;
2404 2404 mutex_exit(&mi->mi_lock);
2405 2405
2406 2406 error = umount2_engine(vfsp, flag, cr, FALSE);
2407 2407 if (error) {
2408 2408 /*
2409 2409 * We need to reenable nfs4_unmount's ability
2410 2410 * to recursively descend on this node.
2411 2411 */
2412 2412 mutex_enter(&mi->mi_lock);
2413 2413 mi->mi_flags &= ~MI4_EPHEMERAL_RECURSED;
2414 2414 mutex_exit(&mi->mi_lock);
2415 2415
2416 2416 return (error);
2417 2417 }
2418 2418
2419 2419 /*
2420 2420 * If we are the current node, we do not want to
2421 2421 * touch anything else. At this point, the only
2422 2422 * way the current node can have survived to here
2423 2423 * is if it is the root of the ephemeral tree and
2424 2424 * we are unmounting the enclosing mntinfo4.
2425 2425 */
2426 2426 if (e == eph) {
2427 2427 ASSERT(prior == NULL);
2428 2428 return (0);
2429 2429 }
2430 2430
2431 2431 /*
2432 2432 * Stitch up the prior node. Note that since
2433 2433 * we have handled the root of the tree, prior
2434 2434 * must be non-NULL.
2435 2435 */
2436 2436 ASSERT(prior != NULL);
2437 2437 if (prior->ne_child == e) {
2438 2438 prior->ne_child = NULL;
2439 2439 } else {
2440 2440 ASSERT(prior->ne_peer == e);
2441 2441
2442 2442 prior->ne_peer = NULL;
2443 2443 }
2444 2444
2445 2445 e = prior;
2446 2446 }
2447 2447
2448 2448 /* NOTREACHED */
2449 2449 }
2450 2450
2451 2451 /*
2452 2452 * Common code to safely release net_cnt_lock and net_tree_lock
2453 2453 */
2454 2454 void
2455 2455 nfs4_ephemeral_umount_unlock(bool_t *pmust_unlock,
2456 2456 nfs4_ephemeral_tree_t **pnet)
2457 2457 {
2458 2458 nfs4_ephemeral_tree_t *net = *pnet;
2459 2459
2460 2460 if (*pmust_unlock) {
2461 2461 mutex_enter(&net->net_cnt_lock);
2462 2462 net->net_status &= ~NFS4_EPHEMERAL_TREE_UMOUNTING;
2463 2463 mutex_exit(&net->net_cnt_lock);
2464 2464
2465 2465 mutex_exit(&net->net_tree_lock);
2466 2466
2467 2467 *pmust_unlock = FALSE;
2468 2468 }
2469 2469 }
2470 2470
2471 2471 /*
2472 2472 * While we may have removed any child or sibling nodes of this
2473 2473 * ephemeral node, we can not nuke it until we know that there
2474 2474 * were no actived vnodes on it. This will do that final
2475 2475 * work once we know it is not busy.
2476 2476 */
2477 2477 void
2478 2478 nfs4_ephemeral_umount_activate(mntinfo4_t *mi, bool_t *pmust_unlock,
2479 2479 nfs4_ephemeral_tree_t **pnet)
2480 2480 {
2481 2481 /*
2482 2482 * Now we need to get rid of the ephemeral data if it exists.
2483 2483 */
2484 2484 mutex_enter(&mi->mi_lock);
2485 2485 if (mi->mi_ephemeral) {
2486 2486 /*
2487 2487 * If we are the root node of an ephemeral branch
2488 2488 * which is being removed, then we need to fixup
2489 2489 * pointers into and out of the node.
2490 2490 */
2491 2491 if (!(mi->mi_flags & MI4_EPHEMERAL_RECURSED))
2492 2492 nfs4_ephemeral_umount_cleanup(mi->mi_ephemeral);
2493 2493
2494 2494 nfs4_ephemeral_tree_rele(*pnet);
2495 2495 ASSERT(mi->mi_ephemeral != NULL);
2496 2496
2497 2497 kmem_free(mi->mi_ephemeral, sizeof (*mi->mi_ephemeral));
2498 2498 mi->mi_ephemeral = NULL;
2499 2499 VFS_RELE(mi->mi_vfsp);
2500 2500 MI4_RELE(mi);
2501 2501 }
2502 2502 mutex_exit(&mi->mi_lock);
2503 2503
2504 2504 nfs4_ephemeral_umount_unlock(pmust_unlock, pnet);
2505 2505 }
2506 2506
2507 2507 /*
2508 2508 * Unmount an ephemeral node.
2509 2509 *
2510 2510 * Note that if this code fails, then it must unlock.
2511 2511 *
2512 2512 * If it succeeds, then the caller must be prepared to do so.
2513 2513 */
2514 2514 int
2515 2515 nfs4_ephemeral_umount(mntinfo4_t *mi, int flag, cred_t *cr,
2516 2516 bool_t *pmust_unlock, nfs4_ephemeral_tree_t **pnet)
2517 2517 {
2518 2518 int error = 0;
2519 2519 nfs4_ephemeral_t *eph;
2520 2520 nfs4_ephemeral_tree_t *net;
2521 2521 int is_derooting = FALSE;
2522 2522 int is_recursed = FALSE;
2523 2523 int was_locked = FALSE;
2524 2524
2525 2525 /*
2526 2526 * Make sure to set the default state for cleaning
2527 2527 * up the tree in the caller (and on the way out).
2528 2528 */
2529 2529 *pmust_unlock = FALSE;
2530 2530
2531 2531 /*
2532 2532 * The active vnodes on this file system may be ephemeral
2533 2533 * children. We need to check for and try to unmount them
2534 2534 * here. If any can not be unmounted, we are going
2535 2535 * to return EBUSY.
2536 2536 */
2537 2537 mutex_enter(&mi->mi_lock);
2538 2538
2539 2539 /*
2540 2540 * If an ephemeral tree, we need to check to see if
2541 2541 * the lock is already held. If it is, then we need
2542 2542 * to see if we are being called as a result of
2543 2543 * the recursive removal of some node of the tree or
2544 2544 * if we are another attempt to remove the tree.
2545 2545 *
2546 2546 * mi_flags & MI4_EPHEMERAL indicates an ephemeral
2547 2547 * node. mi_ephemeral being non-NULL also does this.
2548 2548 *
2549 2549 * mi_ephemeral_tree being non-NULL is sufficient
2550 2550 * to also indicate either it is an ephemeral node
2551 2551 * or the enclosing mntinfo4.
2552 2552 *
2553 2553 * Do we need MI4_EPHEMERAL? Yes, it is useful for
2554 2554 * when we delete the ephemeral node and need to
2555 2555 * differentiate from an ephemeral node and the
2556 2556 * enclosing root node.
2557 2557 */
2558 2558 *pnet = net = mi->mi_ephemeral_tree;
2559 2559 if (net == NULL) {
2560 2560 mutex_exit(&mi->mi_lock);
2561 2561 return (0);
2562 2562 }
2563 2563
2564 2564 eph = mi->mi_ephemeral;
2565 2565 is_recursed = mi->mi_flags & MI4_EPHEMERAL_RECURSED;
2566 2566 is_derooting = (eph == NULL);
2567 2567
2568 2568 mutex_enter(&net->net_cnt_lock);
2569 2569
2570 2570 /*
2571 2571 * If this is not recursion, then we need to
2572 2572 * check to see if a harvester thread has
2573 2573 * already grabbed the lock.
2574 2574 *
2575 2575 * After we exit this branch, we may not
2576 2576 * blindly return, we need to jump to
2577 2577 * is_busy!
2578 2578 */
2579 2579 if (!is_recursed) {
2580 2580 if (net->net_status &
2581 2581 NFS4_EPHEMERAL_TREE_LOCKED) {
2582 2582 /*
2583 2583 * If the tree is locked, we need
2584 2584 * to decide whether we are the
2585 2585 * harvester or some explicit call
2586 2586 * for a umount. The only way that
2587 2587 * we are the harvester is if
2588 2588 * MS_SYSSPACE is set.
2589 2589 *
2590 2590 * We only let the harvester through
2591 2591 * at this point.
2592 2592 *
2593 2593 * We return EBUSY so that the
2594 2594 * caller knows something is
2595 2595 * going on. Note that by that
2596 2596 * time, the umount in the other
2597 2597 * thread may have already occured.
2598 2598 */
2599 2599 if (!(flag & MS_SYSSPACE)) {
2600 2600 mutex_exit(&net->net_cnt_lock);
2601 2601 mutex_exit(&mi->mi_lock);
2602 2602
2603 2603 return (EBUSY);
2604 2604 }
2605 2605
2606 2606 was_locked = TRUE;
2607 2607 }
2608 2608 }
2609 2609
2610 2610 mutex_exit(&net->net_cnt_lock);
2611 2611 mutex_exit(&mi->mi_lock);
2612 2612
2613 2613 /*
2614 2614 * If we are not the harvester, we need to check
2615 2615 * to see if we need to grab the tree lock.
2616 2616 */
2617 2617 if (was_locked == FALSE) {
2618 2618 /*
2619 2619 * If we grab the lock, it means that no other
2620 2620 * operation is working on the tree. If we don't
2621 2621 * grab it, we need to decide if this is because
2622 2622 * we are a recursive call or a new operation.
2623 2623 */
2624 2624 if (mutex_tryenter(&net->net_tree_lock)) {
2625 2625 *pmust_unlock = TRUE;
2626 2626 } else {
2627 2627 /*
2628 2628 * If we are a recursive call, we can
2629 2629 * proceed without the lock.
2630 2630 * Otherwise we have to wait until
2631 2631 * the lock becomes free.
2632 2632 */
2633 2633 if (!is_recursed) {
2634 2634 mutex_enter(&net->net_cnt_lock);
2635 2635 if (net->net_status &
2636 2636 (NFS4_EPHEMERAL_TREE_DEROOTING
2637 2637 | NFS4_EPHEMERAL_TREE_INVALID)) {
2638 2638 mutex_exit(&net->net_cnt_lock);
2639 2639 goto is_busy;
2640 2640 }
2641 2641 mutex_exit(&net->net_cnt_lock);
2642 2642
2643 2643 /*
2644 2644 * We can't hold any other locks whilst
2645 2645 * we wait on this to free up.
2646 2646 */
2647 2647 mutex_enter(&net->net_tree_lock);
2648 2648
2649 2649 /*
2650 2650 * Note that while mi->mi_ephemeral
2651 2651 * may change and thus we have to
2652 2652 * update eph, it is the case that
2653 2653 * we have tied down net and
2654 2654 * do not care if mi->mi_ephemeral_tree
2655 2655 * has changed.
2656 2656 */
2657 2657 mutex_enter(&mi->mi_lock);
2658 2658 eph = mi->mi_ephemeral;
2659 2659 mutex_exit(&mi->mi_lock);
2660 2660
2661 2661 /*
2662 2662 * Okay, we need to see if either the
2663 2663 * tree got nuked or the current node
2664 2664 * got nuked. Both of which will cause
2665 2665 * an error.
2666 2666 *
2667 2667 * Note that a subsequent retry of the
2668 2668 * umount shall work.
2669 2669 */
2670 2670 mutex_enter(&net->net_cnt_lock);
2671 2671 if (net->net_status &
2672 2672 NFS4_EPHEMERAL_TREE_INVALID ||
2673 2673 (!is_derooting && eph == NULL)) {
2674 2674 mutex_exit(&net->net_cnt_lock);
2675 2675 mutex_exit(&net->net_tree_lock);
2676 2676 goto is_busy;
2677 2677 }
2678 2678 mutex_exit(&net->net_cnt_lock);
2679 2679 *pmust_unlock = TRUE;
2680 2680 }
2681 2681 }
2682 2682 }
2683 2683
2684 2684 /*
2685 2685 * Only once we have grabbed the lock can we mark what we
2686 2686 * are planning on doing to the ephemeral tree.
2687 2687 */
2688 2688 if (*pmust_unlock) {
2689 2689 mutex_enter(&net->net_cnt_lock);
2690 2690 net->net_status |= NFS4_EPHEMERAL_TREE_UMOUNTING;
2691 2691
2692 2692 /*
2693 2693 * Check to see if we are nuking the root.
2694 2694 */
2695 2695 if (is_derooting)
2696 2696 net->net_status |=
2697 2697 NFS4_EPHEMERAL_TREE_DEROOTING;
2698 2698 mutex_exit(&net->net_cnt_lock);
2699 2699 }
2700 2700
2701 2701 if (!is_derooting) {
2702 2702 /*
2703 2703 * Only work on children if the caller has not already
2704 2704 * done so.
2705 2705 */
2706 2706 if (!is_recursed) {
2707 2707 ASSERT(eph != NULL);
2708 2708
2709 2709 error = nfs4_ephemeral_unmount_engine(eph,
2710 2710 FALSE, flag, cr);
2711 2711 if (error)
2712 2712 goto is_busy;
2713 2713 }
2714 2714 } else {
2715 2715 eph = net->net_root;
2716 2716
2717 2717 /*
2718 2718 * Only work if there is something there.
2719 2719 */
2720 2720 if (eph) {
2721 2721 error = nfs4_ephemeral_unmount_engine(eph, TRUE,
2722 2722 flag, cr);
2723 2723 if (error) {
2724 2724 mutex_enter(&net->net_cnt_lock);
2725 2725 net->net_status &=
2726 2726 ~NFS4_EPHEMERAL_TREE_DEROOTING;
2727 2727 mutex_exit(&net->net_cnt_lock);
2728 2728 goto is_busy;
2729 2729 }
2730 2730
2731 2731 /*
2732 2732 * Nothing else which goes wrong will
2733 2733 * invalidate the blowing away of the
2734 2734 * ephmeral tree.
2735 2735 */
2736 2736 net->net_root = NULL;
2737 2737 }
2738 2738
2739 2739 /*
2740 2740 * We have derooted and we have caused the tree to be
2741 2741 * invalidated.
2742 2742 */
2743 2743 mutex_enter(&net->net_cnt_lock);
2744 2744 net->net_status &= ~NFS4_EPHEMERAL_TREE_DEROOTING;
2745 2745 net->net_status |= NFS4_EPHEMERAL_TREE_INVALID;
2746 2746 DTRACE_NFSV4_1(nfs4clnt__dbg__ephemeral__tree__derooting,
2747 2747 uint_t, net->net_refcnt);
2748 2748
2749 2749 /*
2750 2750 * We will not finalize this node, so safe to
2751 2751 * release it.
2752 2752 */
2753 2753 nfs4_ephemeral_tree_decr(net);
2754 2754 mutex_exit(&net->net_cnt_lock);
2755 2755
2756 2756 if (was_locked == FALSE)
2757 2757 mutex_exit(&net->net_tree_lock);
2758 2758
2759 2759 /*
2760 2760 * We have just blown away any notation of this
2761 2761 * tree being locked or having a refcnt.
2762 2762 * We can't let the caller try to clean things up.
2763 2763 */
2764 2764 *pmust_unlock = FALSE;
2765 2765
2766 2766 /*
2767 2767 * At this point, the tree should no longer be
2768 2768 * associated with the mntinfo4. We need to pull
2769 2769 * it off there and let the harvester take
2770 2770 * care of it once the refcnt drops.
2771 2771 */
2772 2772 mutex_enter(&mi->mi_lock);
2773 2773 mi->mi_ephemeral_tree = NULL;
2774 2774 mutex_exit(&mi->mi_lock);
2775 2775 }
2776 2776
2777 2777 return (0);
2778 2778
2779 2779 is_busy:
2780 2780
2781 2781 nfs4_ephemeral_umount_unlock(pmust_unlock, pnet);
2782 2782
2783 2783 return (error);
2784 2784 }
2785 2785
2786 2786 /*
2787 2787 * Do the umount and record any error in the parent.
2788 2788 */
2789 2789 static void
2790 2790 nfs4_ephemeral_record_umount(vfs_t *vfsp, int flag,
2791 2791 nfs4_ephemeral_t *e, nfs4_ephemeral_t *prior)
2792 2792 {
2793 2793 int error;
2794 2794
2795 2795 /*
2796 2796 * Only act on if the fs is still mounted.
2797 2797 */
2798 2798 if (vfsp == NULL)
2799 2799 return;
2800 2800
2801 2801 error = umount2_engine(vfsp, flag, kcred, FALSE);
2802 2802 if (error) {
2803 2803 if (prior) {
2804 2804 if (prior->ne_child == e)
2805 2805 prior->ne_state |=
2806 2806 NFS4_EPHEMERAL_CHILD_ERROR;
2807 2807 else
2808 2808 prior->ne_state |=
2809 2809 NFS4_EPHEMERAL_PEER_ERROR;
2810 2810 }
2811 2811 }
2812 2812 }
2813 2813
2814 2814 /*
2815 2815 * For each tree in the forest (where the forest is in
2816 2816 * effect all of the ephemeral trees for this zone),
2817 2817 * scan to see if a node can be unmounted. Note that
2818 2818 * unlike nfs4_ephemeral_unmount_engine(), we do
2819 2819 * not process the current node before children or
2820 2820 * siblings. I.e., if a node can be unmounted, we
2821 2821 * do not recursively check to see if the nodes
2822 2822 * hanging off of it can also be unmounted.
2823 2823 *
2824 2824 * Instead, we delve down deep to try and remove the
2825 2825 * children first. Then, because we share code with
2826 2826 * nfs4_ephemeral_unmount_engine(), we will try
2827 2827 * them again. This could be a performance issue in
2828 2828 * the future.
2829 2829 *
2830 2830 * Also note that unlike nfs4_ephemeral_unmount_engine(),
2831 2831 * we do not halt on an error. We will not remove the
2832 2832 * current node, but we will keep on trying to remove
2833 2833 * the others.
2834 2834 *
2835 2835 * force indicates that we want the unmount to occur
2836 2836 * even if there is something blocking it.
2837 2837 *
2838 2838 * time_check indicates that we want to see if the
2839 2839 * mount has expired past mount_to or not. Typically
2840 2840 * we want to do this and only on a shutdown of the
2841 2841 * zone would we want to ignore the check.
2842 2842 */
2843 2843 static void
2844 2844 nfs4_ephemeral_harvest_forest(nfs4_trigger_globals_t *ntg,
2845 2845 bool_t force, bool_t time_check)
2846 2846 {
2847 2847 nfs4_ephemeral_tree_t *net;
2848 2848 nfs4_ephemeral_tree_t *prev = NULL;
2849 2849 nfs4_ephemeral_tree_t *next;
2850 2850 nfs4_ephemeral_t *e;
2851 2851 nfs4_ephemeral_t *prior;
2852 2852 time_t now = gethrestime_sec();
2853 2853
2854 2854 nfs4_ephemeral_tree_t *harvest = NULL;
2855 2855
2856 2856 int flag;
2857 2857
2858 2858 mntinfo4_t *mi;
2859 2859 vfs_t *vfsp;
2860 2860
2861 2861 if (force)
2862 2862 flag = MS_FORCE | MS_SYSSPACE;
2863 2863 else
2864 2864 flag = MS_SYSSPACE;
2865 2865
2866 2866 mutex_enter(&ntg->ntg_forest_lock);
2867 2867 for (net = ntg->ntg_forest; net != NULL; net = next) {
2868 2868 next = net->net_next;
2869 2869
2870 2870 nfs4_ephemeral_tree_hold(net);
2871 2871
2872 2872 mutex_enter(&net->net_tree_lock);
2873 2873
2874 2874 /*
2875 2875 * Let the unmount code know that the
2876 2876 * tree is already locked!
2877 2877 */
2878 2878 mutex_enter(&net->net_cnt_lock);
2879 2879 net->net_status |= NFS4_EPHEMERAL_TREE_LOCKED;
2880 2880 mutex_exit(&net->net_cnt_lock);
2881 2881
2882 2882 /*
2883 2883 * If the intent is force all ephemeral nodes to
2884 2884 * be unmounted in this zone, we can short circuit a
2885 2885 * lot of tree traversal and simply zap the root node.
2886 2886 */
2887 2887 if (force) {
2888 2888 if (net->net_root) {
2889 2889 mi = net->net_root->ne_mount;
2890 2890
2891 2891 vfsp = mi->mi_vfsp;
2892 2892 ASSERT(vfsp != NULL);
2893 2893
2894 2894 /*
2895 2895 * Cleared by umount2_engine.
2896 2896 */
2897 2897 VFS_HOLD(vfsp);
2898 2898
2899 2899 (void) umount2_engine(vfsp, flag,
2900 2900 kcred, FALSE);
2901 2901
2902 2902 goto check_done;
2903 2903 }
2904 2904 }
2905 2905
2906 2906 e = net->net_root;
2907 2907 if (e)
2908 2908 e->ne_state = NFS4_EPHEMERAL_VISIT_CHILD;
2909 2909
2910 2910 while (e) {
2911 2911 if (e->ne_state == NFS4_EPHEMERAL_VISIT_CHILD) {
2912 2912 e->ne_state = NFS4_EPHEMERAL_VISIT_SIBLING;
2913 2913 if (e->ne_child) {
2914 2914 e = e->ne_child;
2915 2915 e->ne_state =
2916 2916 NFS4_EPHEMERAL_VISIT_CHILD;
2917 2917 }
2918 2918
2919 2919 continue;
2920 2920 } else if (e->ne_state ==
2921 2921 NFS4_EPHEMERAL_VISIT_SIBLING) {
2922 2922 e->ne_state = NFS4_EPHEMERAL_PROCESS_ME;
2923 2923 if (e->ne_peer) {
2924 2924 e = e->ne_peer;
2925 2925 e->ne_state =
2926 2926 NFS4_EPHEMERAL_VISIT_CHILD;
2927 2927 }
2928 2928
2929 2929 continue;
2930 2930 } else if (e->ne_state ==
2931 2931 NFS4_EPHEMERAL_CHILD_ERROR) {
2932 2932 prior = e->ne_prior;
2933 2933
2934 2934 /*
2935 2935 * If a child reported an error, do
2936 2936 * not bother trying to unmount.
2937 2937 *
2938 2938 * If your prior node is a parent,
2939 2939 * pass the error up such that they
2940 2940 * also do not try to unmount.
2941 2941 *
2942 2942 * However, if your prior is a sibling,
2943 2943 * let them try to unmount if they can.
2944 2944 */
2945 2945 if (prior) {
2946 2946 if (prior->ne_child == e)
2947 2947 prior->ne_state |=
2948 2948 NFS4_EPHEMERAL_CHILD_ERROR;
2949 2949 else
2950 2950 prior->ne_state |=
2951 2951 NFS4_EPHEMERAL_PEER_ERROR;
2952 2952 }
2953 2953
2954 2954 /*
2955 2955 * Clear the error and if needed, process peers.
2956 2956 *
2957 2957 * Once we mask out the error, we know whether
2958 2958 * or we have to process another node.
2959 2959 */
2960 2960 e->ne_state &= ~NFS4_EPHEMERAL_CHILD_ERROR;
2961 2961 if (e->ne_state == NFS4_EPHEMERAL_PROCESS_ME)
2962 2962 e = prior;
2963 2963
2964 2964 continue;
2965 2965 } else if (e->ne_state ==
2966 2966 NFS4_EPHEMERAL_PEER_ERROR) {
2967 2967 prior = e->ne_prior;
2968 2968
2969 2969 if (prior) {
2970 2970 if (prior->ne_child == e)
2971 2971 prior->ne_state =
2972 2972 NFS4_EPHEMERAL_CHILD_ERROR;
2973 2973 else
2974 2974 prior->ne_state =
2975 2975 NFS4_EPHEMERAL_PEER_ERROR;
2976 2976 }
2977 2977
2978 2978 /*
2979 2979 * Clear the error from this node and do the
2980 2980 * correct processing.
2981 2981 */
2982 2982 e->ne_state &= ~NFS4_EPHEMERAL_PEER_ERROR;
2983 2983 continue;
2984 2984 }
2985 2985
2986 2986 prior = e->ne_prior;
2987 2987 e->ne_state = NFS4_EPHEMERAL_OK;
2988 2988
2989 2989 /*
2990 2990 * It must be the case that we need to process
2991 2991 * this node.
2992 2992 */
2993 2993 if (!time_check ||
2994 2994 now - e->ne_ref_time > e->ne_mount_to) {
2995 2995 mi = e->ne_mount;
2996 2996 vfsp = mi->mi_vfsp;
2997 2997
2998 2998 /*
2999 2999 * Cleared by umount2_engine.
3000 3000 */
3001 3001 if (vfsp != NULL)
3002 3002 VFS_HOLD(vfsp);
3003 3003
3004 3004 /*
3005 3005 * Note that we effectively work down to the
3006 3006 * leaf nodes first, try to unmount them,
3007 3007 * then work our way back up into the leaf
3008 3008 * nodes.
3009 3009 *
3010 3010 * Also note that we deal with a lot of
3011 3011 * complexity by sharing the work with
3012 3012 * the manual unmount code.
3013 3013 */
3014 3014 nfs4_ephemeral_record_umount(vfsp, flag,
3015 3015 e, prior);
3016 3016 }
3017 3017
3018 3018 e = prior;
3019 3019 }
3020 3020
3021 3021 check_done:
3022 3022
3023 3023 /*
3024 3024 * At this point we are done processing this tree.
3025 3025 *
3026 3026 * If the tree is invalid and we were the only reference
3027 3027 * to it, then we push it on the local linked list
3028 3028 * to remove it at the end. We avoid that action now
3029 3029 * to keep the tree processing going along at a fair clip.
3030 3030 *
3031 3031 * Else, even if we were the only reference, we
3032 3032 * allow it to be reused as needed.
3033 3033 */
3034 3034 mutex_enter(&net->net_cnt_lock);
3035 3035 nfs4_ephemeral_tree_decr(net);
3036 3036 if (net->net_refcnt == 0 &&
3037 3037 net->net_status & NFS4_EPHEMERAL_TREE_INVALID) {
3038 3038 net->net_status &= ~NFS4_EPHEMERAL_TREE_LOCKED;
3039 3039 mutex_exit(&net->net_cnt_lock);
3040 3040 mutex_exit(&net->net_tree_lock);
3041 3041
3042 3042 if (prev)
3043 3043 prev->net_next = net->net_next;
3044 3044 else
3045 3045 ntg->ntg_forest = net->net_next;
3046 3046
3047 3047 net->net_next = harvest;
3048 3048 harvest = net;
3049 3049
3050 3050 VFS_RELE(net->net_mount->mi_vfsp);
3051 3051 MI4_RELE(net->net_mount);
3052 3052
3053 3053 continue;
3054 3054 }
3055 3055
3056 3056 net->net_status &= ~NFS4_EPHEMERAL_TREE_LOCKED;
3057 3057 mutex_exit(&net->net_cnt_lock);
3058 3058 mutex_exit(&net->net_tree_lock);
3059 3059
3060 3060 prev = net;
3061 3061 }
3062 3062 mutex_exit(&ntg->ntg_forest_lock);
3063 3063
3064 3064 for (net = harvest; net != NULL; net = next) {
3065 3065 next = net->net_next;
3066 3066
3067 3067 mutex_destroy(&net->net_tree_lock);
3068 3068 mutex_destroy(&net->net_cnt_lock);
3069 3069 kmem_free(net, sizeof (*net));
3070 3070 }
3071 3071 }
3072 3072
3073 3073 /*
3074 3074 * This is the thread which decides when the harvesting
3075 3075 * can proceed and when to kill it off for this zone.
3076 3076 */
3077 3077 static void
3078 3078 nfs4_ephemeral_harvester(nfs4_trigger_globals_t *ntg)
3079 3079 {
3080 3080 clock_t timeleft;
3081 3081 zone_t *zone = curproc->p_zone;
3082 3082
3083 3083 for (;;) {
3084 3084 timeleft = zone_status_timedwait(zone, ddi_get_lbolt() +
3085 3085 nfs4_trigger_thread_timer * hz, ZONE_IS_SHUTTING_DOWN);
3086 3086
3087 3087 /*
3088 3088 * zone is exiting...
3089 3089 */
3090 3090 if (timeleft != -1) {
3091 3091 ASSERT(zone_status_get(zone) >= ZONE_IS_SHUTTING_DOWN);
3092 3092 zthread_exit();
3093 3093 /* NOTREACHED */
3094 3094 }
3095 3095
3096 3096 /*
3097 3097 * Only bother scanning if there is potential
3098 3098 * work to be done.
3099 3099 */
3100 3100 if (ntg->ntg_forest == NULL)
3101 3101 continue;
3102 3102
3103 3103 /*
3104 3104 * Now scan the list and get rid of everything which
3105 3105 * is old.
3106 3106 */
3107 3107 nfs4_ephemeral_harvest_forest(ntg, FALSE, TRUE);
3108 3108 }
3109 3109
3110 3110 /* NOTREACHED */
3111 3111 }
3112 3112
3113 3113 /*
3114 3114 * The zone specific glue needed to start the unmount harvester.
3115 3115 *
3116 3116 * Note that we want to avoid holding the mutex as long as possible,
3117 3117 * hence the multiple checks.
3118 3118 *
3119 3119 * The caller should avoid us getting down here in the first
3120 3120 * place.
3121 3121 */
3122 3122 static void
3123 3123 nfs4_ephemeral_start_harvester(nfs4_trigger_globals_t *ntg)
3124 3124 {
3125 3125 /*
3126 3126 * It got started before we got here...
3127 3127 */
3128 3128 if (ntg->ntg_thread_started)
3129 3129 return;
3130 3130
3131 3131 mutex_enter(&nfs4_ephemeral_thread_lock);
3132 3132
3133 3133 if (ntg->ntg_thread_started) {
3134 3134 mutex_exit(&nfs4_ephemeral_thread_lock);
3135 3135 return;
3136 3136 }
3137 3137
3138 3138 /*
3139 3139 * Start the unmounter harvester thread for this zone.
3140 3140 */
3141 3141 (void) zthread_create(NULL, 0, nfs4_ephemeral_harvester,
3142 3142 ntg, 0, minclsyspri);
3143 3143
3144 3144 ntg->ntg_thread_started = TRUE;
3145 3145 mutex_exit(&nfs4_ephemeral_thread_lock);
3146 3146 }
3147 3147
3148 3148 /*ARGSUSED*/
3149 3149 static void *
3150 3150 nfs4_ephemeral_zsd_create(zoneid_t zoneid)
3151 3151 {
3152 3152 nfs4_trigger_globals_t *ntg;
3153 3153
3154 3154 ntg = kmem_zalloc(sizeof (*ntg), KM_SLEEP);
3155 3155 ntg->ntg_thread_started = FALSE;
3156 3156
3157 3157 /*
3158 3158 * This is the default....
3159 3159 */
3160 3160 ntg->ntg_mount_to = nfs4_trigger_thread_timer;
3161 3161
3162 3162 mutex_init(&ntg->ntg_forest_lock, NULL,
3163 3163 MUTEX_DEFAULT, NULL);
3164 3164
3165 3165 return (ntg);
3166 3166 }
3167 3167
3168 3168 /*
3169 3169 * Try a nice gentle walk down the forest and convince
3170 3170 * all of the trees to gracefully give it up.
3171 3171 */
3172 3172 /*ARGSUSED*/
3173 3173 static void
3174 3174 nfs4_ephemeral_zsd_shutdown(zoneid_t zoneid, void *arg)
3175 3175 {
3176 3176 nfs4_trigger_globals_t *ntg = arg;
3177 3177
3178 3178 if (!ntg)
3179 3179 return;
3180 3180
3181 3181 nfs4_ephemeral_harvest_forest(ntg, FALSE, FALSE);
3182 3182 }
3183 3183
3184 3184 /*
3185 3185 * Race along the forest and rip all of the trees out by
3186 3186 * their rootballs!
3187 3187 */
3188 3188 /*ARGSUSED*/
3189 3189 static void
3190 3190 nfs4_ephemeral_zsd_destroy(zoneid_t zoneid, void *arg)
3191 3191 {
3192 3192 nfs4_trigger_globals_t *ntg = arg;
3193 3193
3194 3194 if (!ntg)
3195 3195 return;
3196 3196
3197 3197 nfs4_ephemeral_harvest_forest(ntg, TRUE, FALSE);
3198 3198
3199 3199 mutex_destroy(&ntg->ntg_forest_lock);
3200 3200 kmem_free(ntg, sizeof (*ntg));
3201 3201 }
3202 3202
3203 3203 /*
3204 3204 * This is the zone independent cleanup needed for
3205 3205 * emphemeral mount processing.
3206 3206 */
3207 3207 void
3208 3208 nfs4_ephemeral_fini(void)
3209 3209 {
3210 3210 (void) zone_key_delete(nfs4_ephemeral_key);
3211 3211 mutex_destroy(&nfs4_ephemeral_thread_lock);
3212 3212 }
3213 3213
3214 3214 /*
3215 3215 * This is the zone independent initialization needed for
3216 3216 * emphemeral mount processing.
3217 3217 */
3218 3218 void
3219 3219 nfs4_ephemeral_init(void)
3220 3220 {
3221 3221 mutex_init(&nfs4_ephemeral_thread_lock, NULL, MUTEX_DEFAULT,
3222 3222 NULL);
3223 3223
3224 3224 zone_key_create(&nfs4_ephemeral_key, nfs4_ephemeral_zsd_create,
3225 3225 nfs4_ephemeral_zsd_shutdown, nfs4_ephemeral_zsd_destroy);
3226 3226 }
3227 3227
3228 3228 /*
3229 3229 * nfssys() calls this function to set the per-zone
3230 3230 * value of mount_to to drive when an ephemeral mount is
3231 3231 * timed out. Each mount will grab a copy of this value
3232 3232 * when mounted.
3233 3233 */
3234 3234 void
3235 3235 nfs4_ephemeral_set_mount_to(uint_t mount_to)
3236 3236 {
3237 3237 nfs4_trigger_globals_t *ntg;
3238 3238 zone_t *zone = curproc->p_zone;
3239 3239
3240 3240 ntg = zone_getspecific(nfs4_ephemeral_key, zone);
3241 3241
3242 3242 ntg->ntg_mount_to = mount_to;
3243 3243 }
3244 3244
3245 3245 /*
3246 3246 * Walk the list of v4 mount options; if they are currently set in vfsp,
3247 3247 * append them to a new comma-separated mount option string, and return it.
3248 3248 *
3249 3249 * Caller should free by calling nfs4_trigger_destroy_mntopts().
3250 3250 */
3251 3251 static char *
3252 3252 nfs4_trigger_create_mntopts(vfs_t *vfsp)
3253 3253 {
3254 3254 uint_t i;
3255 3255 char *mntopts;
3256 3256 struct vfssw *vswp;
3257 3257 mntopts_t *optproto;
3258 3258
3259 3259 mntopts = kmem_zalloc(MAX_MNTOPT_STR, KM_SLEEP);
3260 3260
3261 3261 /* get the list of applicable mount options for v4; locks *vswp */
3262 3262 vswp = vfs_getvfssw(MNTTYPE_NFS4);
3263 3263 optproto = &vswp->vsw_optproto;
3264 3264
3265 3265 for (i = 0; i < optproto->mo_count; i++) {
3266 3266 struct mntopt *mop = &optproto->mo_list[i];
3267 3267
3268 3268 if (mop->mo_flags & MO_EMPTY)
3269 3269 continue;
3270 3270
3271 3271 if (nfs4_trigger_add_mntopt(mntopts, mop->mo_name, vfsp)) {
3272 3272 kmem_free(mntopts, MAX_MNTOPT_STR);
3273 3273 vfs_unrefvfssw(vswp);
3274 3274 return (NULL);
3275 3275 }
3276 3276 }
3277 3277
3278 3278 vfs_unrefvfssw(vswp);
3279 3279
3280 3280 /*
3281 3281 * MNTOPT_XATTR is not in the v4 mount opt proto list,
3282 3282 * and it may only be passed via MS_OPTIONSTR, so we
3283 3283 * must handle it here.
3284 3284 *
3285 3285 * Ideally, it would be in the list, but NFS does not specify its
3286 3286 * own opt proto list, it uses instead the default one. Since
3287 3287 * not all filesystems support extended attrs, it would not be
3288 3288 * appropriate to add it there.
3289 3289 */
3290 3290 if (nfs4_trigger_add_mntopt(mntopts, MNTOPT_XATTR, vfsp) ||
3291 3291 nfs4_trigger_add_mntopt(mntopts, MNTOPT_NOXATTR, vfsp)) {
3292 3292 kmem_free(mntopts, MAX_MNTOPT_STR);
3293 3293 return (NULL);
3294 3294 }
3295 3295
3296 3296 return (mntopts);
3297 3297 }
3298 3298
3299 3299 static void
3300 3300 nfs4_trigger_destroy_mntopts(char *mntopts)
3301 3301 {
3302 3302 if (mntopts)
3303 3303 kmem_free(mntopts, MAX_MNTOPT_STR);
3304 3304 }
3305 3305
3306 3306 /*
3307 3307 * Check a single mount option (optname). Add to mntopts if it is set in VFS.
3308 3308 */
3309 3309 static int
3310 3310 nfs4_trigger_add_mntopt(char *mntopts, char *optname, vfs_t *vfsp)
3311 3311 {
3312 3312 if (mntopts == NULL || optname == NULL || vfsp == NULL)
3313 3313 return (EINVAL);
3314 3314
3315 3315 if (vfs_optionisset(vfsp, optname, NULL)) {
3316 3316 size_t mntoptslen = strlen(mntopts);
3317 3317 size_t optnamelen = strlen(optname);
3318 3318
3319 3319 /* +1 for ',', +1 for NUL */
3320 3320 if (mntoptslen + optnamelen + 2 > MAX_MNTOPT_STR)
3321 3321 return (EOVERFLOW);
3322 3322
3323 3323 /* first or subsequent mount option? */
3324 3324 if (*mntopts != '\0')
3325 3325 (void) strcat(mntopts, ",");
3326 3326
3327 3327 (void) strcat(mntopts, optname);
3328 3328 }
3329 3329
3330 3330 return (0);
3331 3331 }
3332 3332
3333 3333 static enum clnt_stat
3334 3334 nfs4_ping_server_common(struct knetconfig *knc, struct netbuf *addr, int nointr)
3335 3335 {
3336 3336 int retries;
3337 3337 uint_t max_msgsize;
3338 3338 enum clnt_stat status;
3339 3339 CLIENT *cl;
3340 3340 struct timeval timeout;
3341 3341
3342 3342 /* as per recov_newserver() */
3343 3343 max_msgsize = 0;
3344 3344 retries = 1;
3345 3345 timeout.tv_sec = 2;
3346 3346 timeout.tv_usec = 0;
3347 3347
3348 3348 if (clnt_tli_kcreate(knc, addr, NFS_PROGRAM, NFS_V4,
3349 3349 max_msgsize, retries, CRED(), &cl) != 0)
3350 3350 return (RPC_FAILED);
3351 3351
3352 3352 if (nointr)
3353 3353 cl->cl_nosignal = TRUE;
3354 3354 status = CLNT_CALL(cl, RFS_NULL, xdr_void, NULL, xdr_void, NULL,
3355 3355 timeout);
3356 3356 if (nointr)
3357 3357 cl->cl_nosignal = FALSE;
3358 3358
3359 3359 AUTH_DESTROY(cl->cl_auth);
3360 3360 CLNT_DESTROY(cl);
3361 3361
3362 3362 return (status);
3363 3363 }
3364 3364
3365 3365 static enum clnt_stat
3366 3366 nfs4_trigger_ping_server(servinfo4_t *svp, int nointr)
3367 3367 {
3368 3368 return (nfs4_ping_server_common(svp->sv_knconf, &svp->sv_addr, nointr));
3369 3369 }
↓ open down ↓ |
3058 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX