Print this page
4827 nfs4: slow file locking
4837 NFSv4 client lock retry delay upper limit should be shorter
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/uts/common/fs/nfs/nfs4_rnode.c
+++ new/usr/src/uts/common/fs/nfs/nfs4_rnode.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21 /*
22 22 * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
23 23 * Use is subject to license terms.
24 24 */
25 25
26 26 /*
27 27 * Copyright (c) 1983,1984,1985,1986,1987,1988,1989 AT&T.
28 28 * All Rights Reserved
29 29 */
30 30
31 31 /*
32 32 * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
33 33 */
34 34
35 35 #include <sys/param.h>
36 36 #include <sys/types.h>
37 37 #include <sys/systm.h>
38 38 #include <sys/cred.h>
39 39 #include <sys/proc.h>
40 40 #include <sys/user.h>
41 41 #include <sys/time.h>
42 42 #include <sys/buf.h>
43 43 #include <sys/vfs.h>
44 44 #include <sys/vnode.h>
45 45 #include <sys/socket.h>
46 46 #include <sys/uio.h>
47 47 #include <sys/tiuser.h>
48 48 #include <sys/swap.h>
49 49 #include <sys/errno.h>
50 50 #include <sys/debug.h>
51 51 #include <sys/kmem.h>
52 52 #include <sys/kstat.h>
53 53 #include <sys/cmn_err.h>
54 54 #include <sys/vtrace.h>
55 55 #include <sys/session.h>
56 56 #include <sys/dnlc.h>
57 57 #include <sys/bitmap.h>
58 58 #include <sys/acl.h>
59 59 #include <sys/ddi.h>
60 60 #include <sys/pathname.h>
61 61 #include <sys/flock.h>
62 62 #include <sys/dirent.h>
63 63 #include <sys/flock.h>
64 64 #include <sys/callb.h>
65 65 #include <sys/sdt.h>
66 66
67 67 #include <vm/pvn.h>
68 68
69 69 #include <rpc/types.h>
70 70 #include <rpc/xdr.h>
71 71 #include <rpc/auth.h>
72 72 #include <rpc/rpcsec_gss.h>
73 73 #include <rpc/clnt.h>
74 74
75 75 #include <nfs/nfs.h>
76 76 #include <nfs/nfs_clnt.h>
77 77 #include <nfs/nfs_acl.h>
78 78
79 79 #include <nfs/nfs4.h>
80 80 #include <nfs/rnode4.h>
81 81 #include <nfs/nfs4_clnt.h>
82 82
83 83 /*
84 84 * The hash queues for the access to active and cached rnodes
85 85 * are organized as doubly linked lists. A reader/writer lock
86 86 * for each hash bucket is used to control access and to synchronize
87 87 * lookups, additions, and deletions from the hash queue.
88 88 *
89 89 * The rnode freelist is organized as a doubly linked list with
90 90 * a head pointer. Additions and deletions are synchronized via
91 91 * a single mutex.
92 92 *
93 93 * In order to add an rnode to the free list, it must be hashed into
94 94 * a hash queue and the exclusive lock to the hash queue be held.
95 95 * If an rnode is not hashed into a hash queue, then it is destroyed
96 96 * because it represents no valuable information that can be reused
97 97 * about the file. The exclusive lock to the hash queue must be
98 98 * held in order to prevent a lookup in the hash queue from finding
99 99 * the rnode and using it and assuming that the rnode is not on the
100 100 * freelist. The lookup in the hash queue will have the hash queue
101 101 * locked, either exclusive or shared.
102 102 *
103 103 * The vnode reference count for each rnode is not allowed to drop
104 104 * below 1. This prevents external entities, such as the VM
105 105 * subsystem, from acquiring references to vnodes already on the
106 106 * freelist and then trying to place them back on the freelist
107 107 * when their reference is released. This means that the when an
108 108 * rnode is looked up in the hash queues, then either the rnode
109 109 * is removed from the freelist and that reference is transferred to
110 110 * the new reference or the vnode reference count must be incremented
111 111 * accordingly. The mutex for the freelist must be held in order to
112 112 * accurately test to see if the rnode is on the freelist or not.
113 113 * The hash queue lock might be held shared and it is possible that
114 114 * two different threads may race to remove the rnode from the
115 115 * freelist. This race can be resolved by holding the mutex for the
116 116 * freelist. Please note that the mutex for the freelist does not
117 117 * need to be held if the rnode is not on the freelist. It can not be
118 118 * placed on the freelist due to the requirement that the thread
119 119 * putting the rnode on the freelist must hold the exclusive lock
120 120 * to the hash queue and the thread doing the lookup in the hash
121 121 * queue is holding either a shared or exclusive lock to the hash
122 122 * queue.
123 123 *
124 124 * The lock ordering is:
125 125 *
126 126 * hash bucket lock -> vnode lock
127 127 * hash bucket lock -> freelist lock -> r_statelock
128 128 */
129 129 r4hashq_t *rtable4;
130 130
131 131 static kmutex_t rp4freelist_lock;
132 132 static rnode4_t *rp4freelist = NULL;
133 133 static long rnode4_new = 0;
134 134 int rtable4size;
135 135 static int rtable4mask;
136 136 static struct kmem_cache *rnode4_cache;
137 137 static int rnode4_hashlen = 4;
138 138
139 139 static void r4inactive(rnode4_t *, cred_t *);
140 140 static vnode_t *make_rnode4(nfs4_sharedfh_t *, r4hashq_t *, struct vfs *,
141 141 struct vnodeops *,
142 142 int (*)(vnode_t *, page_t *, u_offset_t *, size_t *, int,
143 143 cred_t *),
144 144 int *, cred_t *);
145 145 static void rp4_rmfree(rnode4_t *);
146 146 int nfs4_free_data_reclaim(rnode4_t *);
147 147 static int nfs4_active_data_reclaim(rnode4_t *);
148 148 static int nfs4_free_reclaim(void);
149 149 static int nfs4_active_reclaim(void);
150 150 static int nfs4_rnode_reclaim(void);
151 151 static void nfs4_reclaim(void *);
152 152 static int isrootfh(nfs4_sharedfh_t *, rnode4_t *);
153 153 static void uninit_rnode4(rnode4_t *);
154 154 static void destroy_rnode4(rnode4_t *);
155 155 static void r4_stub_set(rnode4_t *, nfs4_stub_type_t);
156 156
157 157 #ifdef DEBUG
158 158 static int r4_check_for_dups = 0; /* Flag to enable dup rnode detection. */
159 159 static int nfs4_rnode_debug = 0;
160 160 /* if nonzero, kmem_cache_free() rnodes rather than place on freelist */
161 161 static int nfs4_rnode_nofreelist = 0;
162 162 /* give messages on colliding shared filehandles */
163 163 static void r4_dup_check(rnode4_t *, vfs_t *);
164 164 #endif
165 165
166 166 /*
167 167 * If the vnode has pages, run the list and check for any that are
168 168 * still dangling. We call this routine before putting an rnode on
169 169 * the free list.
170 170 */
171 171 static int
172 172 nfs4_dross_pages(vnode_t *vp)
173 173 {
174 174 page_t *pp;
175 175 kmutex_t *vphm;
176 176
177 177 vphm = page_vnode_mutex(vp);
178 178 mutex_enter(vphm);
179 179 if ((pp = vp->v_pages) != NULL) {
180 180 do {
181 181 if (pp->p_hash != PVN_VPLIST_HASH_TAG &&
182 182 pp->p_fsdata != C_NOCOMMIT) {
183 183 mutex_exit(vphm);
184 184 return (1);
185 185 }
186 186 } while ((pp = pp->p_vpnext) != vp->v_pages);
187 187 }
188 188 mutex_exit(vphm);
189 189
190 190 return (0);
191 191 }
192 192
193 193 /*
194 194 * Flush any pages left on this rnode.
195 195 */
196 196 static void
197 197 r4flushpages(rnode4_t *rp, cred_t *cr)
198 198 {
199 199 vnode_t *vp;
200 200 int error;
201 201
202 202 /*
203 203 * Before freeing anything, wait until all asynchronous
204 204 * activity is done on this rnode. This will allow all
205 205 * asynchronous read ahead and write behind i/o's to
206 206 * finish.
207 207 */
208 208 mutex_enter(&rp->r_statelock);
209 209 while (rp->r_count > 0)
210 210 cv_wait(&rp->r_cv, &rp->r_statelock);
211 211 mutex_exit(&rp->r_statelock);
212 212
213 213 /*
214 214 * Flush and invalidate all pages associated with the vnode.
215 215 */
216 216 vp = RTOV4(rp);
217 217 if (nfs4_has_pages(vp)) {
218 218 ASSERT(vp->v_type != VCHR);
219 219 if ((rp->r_flags & R4DIRTY) && !rp->r_error) {
220 220 error = VOP_PUTPAGE(vp, (u_offset_t)0, 0, 0, cr, NULL);
221 221 if (error && (error == ENOSPC || error == EDQUOT)) {
222 222 mutex_enter(&rp->r_statelock);
223 223 if (!rp->r_error)
224 224 rp->r_error = error;
225 225 mutex_exit(&rp->r_statelock);
226 226 }
227 227 }
228 228 nfs4_invalidate_pages(vp, (u_offset_t)0, cr);
229 229 }
230 230 }
231 231
232 232 /*
233 233 * Free the resources associated with an rnode.
234 234 */
235 235 static void
236 236 r4inactive(rnode4_t *rp, cred_t *cr)
237 237 {
238 238 vnode_t *vp;
239 239 char *contents;
240 240 int size;
241 241 vsecattr_t *vsp;
242 242 vnode_t *xattr;
243 243
244 244 r4flushpages(rp, cr);
245 245
246 246 vp = RTOV4(rp);
247 247
248 248 /*
249 249 * Free any held caches which may be
250 250 * associated with this rnode.
251 251 */
252 252 mutex_enter(&rp->r_statelock);
253 253 contents = rp->r_symlink.contents;
254 254 size = rp->r_symlink.size;
255 255 rp->r_symlink.contents = NULL;
256 256 vsp = rp->r_secattr;
257 257 rp->r_secattr = NULL;
258 258 xattr = rp->r_xattr_dir;
259 259 rp->r_xattr_dir = NULL;
260 260 mutex_exit(&rp->r_statelock);
261 261
262 262 /*
263 263 * Free the access cache entries.
264 264 */
265 265 (void) nfs4_access_purge_rp(rp);
266 266
267 267 /*
268 268 * Free the readdir cache entries.
269 269 */
270 270 nfs4_purge_rddir_cache(vp);
271 271
272 272 /*
273 273 * Free the symbolic link cache.
274 274 */
275 275 if (contents != NULL) {
276 276
277 277 kmem_free((void *)contents, size);
278 278 }
279 279
280 280 /*
281 281 * Free any cached ACL.
282 282 */
283 283 if (vsp != NULL)
284 284 nfs4_acl_free_cache(vsp);
285 285
286 286 /*
287 287 * Release the cached xattr_dir
288 288 */
289 289 if (xattr != NULL)
290 290 VN_RELE(xattr);
291 291 }
292 292
293 293 /*
294 294 * We have seen a case that the fh passed in is for "." which
295 295 * should be a VROOT node, however, the fh is different from the
296 296 * root fh stored in the mntinfo4_t. The invalid fh might be
297 297 * from a misbehaved server and will panic the client system at
298 298 * a later time. To avoid the panic, we drop the bad fh, use
299 299 * the root fh from mntinfo4_t, and print an error message
300 300 * for attention.
301 301 */
302 302 nfs4_sharedfh_t *
303 303 badrootfh_check(nfs4_sharedfh_t *fh, nfs4_fname_t *nm, mntinfo4_t *mi,
304 304 int *wasbad)
305 305 {
306 306 char *s;
307 307
308 308 *wasbad = 0;
309 309 s = fn_name(nm);
310 310 ASSERT(strcmp(s, "..") != 0);
311 311
312 312 if ((s[0] == '.' && s[1] == '\0') && fh &&
313 313 !SFH4_SAME(mi->mi_rootfh, fh)) {
314 314 #ifdef DEBUG
315 315 nfs4_fhandle_t fhandle;
316 316
317 317 zcmn_err(mi->mi_zone->zone_id, CE_WARN,
318 318 "Server %s returns a different "
319 319 "root filehandle for the path %s:",
320 320 mi->mi_curr_serv->sv_hostname,
321 321 mi->mi_curr_serv->sv_path);
322 322
323 323 /* print the bad fh */
324 324 fhandle.fh_len = fh->sfh_fh.nfs_fh4_len;
325 325 bcopy(fh->sfh_fh.nfs_fh4_val, fhandle.fh_buf,
326 326 fhandle.fh_len);
327 327 nfs4_printfhandle(&fhandle);
328 328
329 329 /* print mi_rootfh */
330 330 fhandle.fh_len = mi->mi_rootfh->sfh_fh.nfs_fh4_len;
331 331 bcopy(mi->mi_rootfh->sfh_fh.nfs_fh4_val, fhandle.fh_buf,
332 332 fhandle.fh_len);
333 333 nfs4_printfhandle(&fhandle);
334 334 #endif
335 335 /* use mi_rootfh instead; fh will be rele by the caller */
336 336 fh = mi->mi_rootfh;
337 337 *wasbad = 1;
338 338 }
339 339
340 340 kmem_free(s, MAXNAMELEN);
341 341 return (fh);
342 342 }
343 343
344 344 void
345 345 r4_do_attrcache(vnode_t *vp, nfs4_ga_res_t *garp, int newnode,
346 346 hrtime_t t, cred_t *cr, int index)
347 347 {
348 348 int is_stub;
349 349 vattr_t *attr;
350 350 /*
351 351 * Don't add to attrcache if time overflow, but
352 352 * no need to check because either attr is null or the time
353 353 * values in it were processed by nfs4_time_ntov(), which checks
354 354 * for time overflows.
355 355 */
356 356 attr = garp ? &garp->n4g_va : NULL;
357 357
358 358 if (attr) {
359 359 if (!newnode) {
360 360 rw_exit(&rtable4[index].r_lock);
361 361 #ifdef DEBUG
362 362 if (vp->v_type != attr->va_type &&
363 363 vp->v_type != VNON && attr->va_type != VNON) {
364 364 zcmn_err(VTOMI4(vp)->mi_zone->zone_id, CE_WARN,
365 365 "makenfs4node: type (%d) doesn't "
366 366 "match type of found node at %p (%d)",
367 367 attr->va_type, (void *)vp, vp->v_type);
368 368 }
369 369 #endif
370 370 nfs4_attr_cache(vp, garp, t, cr, TRUE, NULL);
371 371 } else {
372 372 rnode4_t *rp = VTOR4(vp);
373 373
374 374 vp->v_type = attr->va_type;
375 375 vp->v_rdev = attr->va_rdev;
376 376
377 377 /*
378 378 * Turn this object into a "stub" object if we
379 379 * crossed an underlying server fs boundary.
380 380 * To make this check, during mount we save the
381 381 * fsid of the server object being mounted.
382 382 * Here we compare this object's server fsid
383 383 * with the fsid we saved at mount. If they
384 384 * are different, we crossed server fs boundary.
385 385 *
386 386 * The stub type is set (or not) at rnode
387 387 * creation time and it never changes for life
388 388 * of the rnode.
389 389 *
390 390 * This stub will be for a mirror-mount, rather than
391 391 * a referral (the latter also sets R4SRVSTUB).
392 392 *
393 393 * The stub type is also set during RO failover,
394 394 * nfs4_remap_file().
395 395 *
396 396 * We don't bother with taking r_state_lock to
397 397 * set the stub type because this is a new rnode
398 398 * and we're holding the hash bucket r_lock RW_WRITER.
399 399 * No other thread could have obtained access
400 400 * to this rnode.
401 401 */
402 402 is_stub = 0;
403 403 if (garp->n4g_fsid_valid) {
404 404 fattr4_fsid ga_fsid = garp->n4g_fsid;
405 405 servinfo4_t *svp = rp->r_server;
406 406
407 407 rp->r_srv_fsid = ga_fsid;
408 408
409 409 (void) nfs_rw_enter_sig(&svp->sv_lock,
410 410 RW_READER, 0);
411 411 if (!FATTR4_FSID_EQ(&ga_fsid, &svp->sv_fsid))
412 412 is_stub = 1;
413 413 nfs_rw_exit(&svp->sv_lock);
414 414 }
415 415
416 416 if (is_stub)
417 417 r4_stub_mirrormount(rp);
418 418 else
419 419 r4_stub_none(rp);
420 420
421 421 /* Can not cache partial attr */
422 422 if (attr->va_mask == AT_ALL)
423 423 nfs4_attrcache_noinval(vp, garp, t);
424 424 else
425 425 PURGE_ATTRCACHE4(vp);
426 426
427 427 rw_exit(&rtable4[index].r_lock);
428 428 }
429 429 } else {
430 430 if (newnode) {
431 431 PURGE_ATTRCACHE4(vp);
432 432 }
433 433 rw_exit(&rtable4[index].r_lock);
434 434 }
435 435 }
436 436
437 437 /*
438 438 * Find or create an rnode based primarily on filehandle. To be
439 439 * used when dvp (vnode for parent directory) is not available;
440 440 * otherwise, makenfs4node() should be used.
441 441 *
442 442 * The nfs4_fname_t argument *npp is consumed and nulled out.
443 443 */
444 444
445 445 vnode_t *
446 446 makenfs4node_by_fh(nfs4_sharedfh_t *sfh, nfs4_sharedfh_t *psfh,
447 447 nfs4_fname_t **npp, nfs4_ga_res_t *garp,
448 448 mntinfo4_t *mi, cred_t *cr, hrtime_t t)
449 449 {
450 450 vfs_t *vfsp = mi->mi_vfsp;
451 451 int newnode = 0;
452 452 vnode_t *vp;
453 453 rnode4_t *rp;
454 454 svnode_t *svp;
455 455 nfs4_fname_t *name, *svpname;
456 456 int index;
457 457
458 458 ASSERT(npp && *npp);
459 459 name = *npp;
460 460 *npp = NULL;
461 461
462 462 index = rtable4hash(sfh);
463 463 rw_enter(&rtable4[index].r_lock, RW_READER);
464 464
465 465 vp = make_rnode4(sfh, &rtable4[index], vfsp,
466 466 nfs4_vnodeops, nfs4_putapage, &newnode, cr);
467 467
468 468 svp = VTOSV(vp);
469 469 rp = VTOR4(vp);
470 470 if (newnode) {
471 471 svp->sv_forw = svp->sv_back = svp;
472 472 svp->sv_name = name;
473 473 if (psfh != NULL)
474 474 sfh4_hold(psfh);
475 475 svp->sv_dfh = psfh;
476 476 } else {
477 477 /*
478 478 * It is possible that due to a server
479 479 * side rename fnames have changed.
480 480 * update the fname here.
481 481 */
482 482 mutex_enter(&rp->r_svlock);
483 483 svpname = svp->sv_name;
484 484 if (svp->sv_name != name) {
485 485 svp->sv_name = name;
486 486 mutex_exit(&rp->r_svlock);
487 487 fn_rele(&svpname);
488 488 } else {
489 489 mutex_exit(&rp->r_svlock);
490 490 fn_rele(&name);
491 491 }
492 492 }
493 493
494 494 ASSERT(RW_LOCK_HELD(&rtable4[index].r_lock));
495 495 r4_do_attrcache(vp, garp, newnode, t, cr, index);
496 496 ASSERT(rw_owner(&rtable4[index].r_lock) != curthread);
497 497
498 498 return (vp);
499 499 }
500 500
501 501 /*
502 502 * Find or create a vnode for the given filehandle, filesystem, parent, and
503 503 * name. The reference to nm is consumed, so the caller must first do an
504 504 * fn_hold() if it wants to continue using nm after this call.
505 505 */
506 506 vnode_t *
507 507 makenfs4node(nfs4_sharedfh_t *fh, nfs4_ga_res_t *garp, struct vfs *vfsp,
508 508 hrtime_t t, cred_t *cr, vnode_t *dvp, nfs4_fname_t *nm)
509 509 {
510 510 vnode_t *vp;
511 511 int newnode;
512 512 int index;
513 513 mntinfo4_t *mi = VFTOMI4(vfsp);
514 514 int had_badfh = 0;
515 515 rnode4_t *rp;
516 516
517 517 ASSERT(dvp != NULL);
518 518
519 519 fh = badrootfh_check(fh, nm, mi, &had_badfh);
520 520
521 521 index = rtable4hash(fh);
522 522 rw_enter(&rtable4[index].r_lock, RW_READER);
523 523
524 524 /*
525 525 * Note: make_rnode4() may upgrade the hash bucket lock to exclusive.
526 526 */
527 527 vp = make_rnode4(fh, &rtable4[index], vfsp, nfs4_vnodeops,
528 528 nfs4_putapage, &newnode, cr);
529 529
530 530 rp = VTOR4(vp);
531 531 sv_activate(&vp, dvp, &nm, newnode);
532 532 if (dvp->v_flag & V_XATTRDIR) {
533 533 mutex_enter(&rp->r_statelock);
534 534 rp->r_flags |= R4ISXATTR;
535 535 mutex_exit(&rp->r_statelock);
536 536 }
537 537
538 538 /* if getting a bad file handle, do not cache the attributes. */
539 539 if (had_badfh) {
540 540 rw_exit(&rtable4[index].r_lock);
541 541 return (vp);
542 542 }
543 543
544 544 ASSERT(RW_LOCK_HELD(&rtable4[index].r_lock));
545 545 r4_do_attrcache(vp, garp, newnode, t, cr, index);
546 546 ASSERT(rw_owner(&rtable4[index].r_lock) != curthread);
547 547
548 548 return (vp);
549 549 }
550 550
551 551 /*
552 552 * Hash on address of filehandle object.
553 553 * XXX totally untuned.
554 554 */
555 555
556 556 int
557 557 rtable4hash(nfs4_sharedfh_t *fh)
558 558 {
559 559 return (((uintptr_t)fh / sizeof (*fh)) & rtable4mask);
560 560 }
561 561
562 562 /*
563 563 * Find or create the vnode for the given filehandle and filesystem.
564 564 * *newnode is set to zero if the vnode already existed; non-zero if it had
565 565 * to be created.
566 566 *
567 567 * Note: make_rnode4() may upgrade the hash bucket lock to exclusive.
568 568 */
569 569
570 570 static vnode_t *
571 571 make_rnode4(nfs4_sharedfh_t *fh, r4hashq_t *rhtp, struct vfs *vfsp,
572 572 struct vnodeops *vops,
573 573 int (*putapage)(vnode_t *, page_t *, u_offset_t *, size_t *, int, cred_t *),
574 574 int *newnode, cred_t *cr)
575 575 {
576 576 rnode4_t *rp;
577 577 rnode4_t *trp;
578 578 vnode_t *vp;
579 579 mntinfo4_t *mi;
580 580
581 581 ASSERT(RW_READ_HELD(&rhtp->r_lock));
582 582
583 583 mi = VFTOMI4(vfsp);
584 584
585 585 start:
586 586 if ((rp = r4find(rhtp, fh, vfsp)) != NULL) {
587 587 vp = RTOV4(rp);
588 588 *newnode = 0;
589 589 return (vp);
590 590 }
591 591 rw_exit(&rhtp->r_lock);
592 592
593 593 mutex_enter(&rp4freelist_lock);
594 594
595 595 if (rp4freelist != NULL && rnode4_new >= nrnode) {
596 596 rp = rp4freelist;
597 597 rp4_rmfree(rp);
598 598 mutex_exit(&rp4freelist_lock);
599 599
600 600 vp = RTOV4(rp);
601 601
602 602 if (rp->r_flags & R4HASHED) {
603 603 rw_enter(&rp->r_hashq->r_lock, RW_WRITER);
604 604 mutex_enter(&vp->v_lock);
605 605 if (vp->v_count > 1) {
606 606 vp->v_count--;
607 607 mutex_exit(&vp->v_lock);
608 608 rw_exit(&rp->r_hashq->r_lock);
609 609 rw_enter(&rhtp->r_lock, RW_READER);
610 610 goto start;
611 611 }
612 612 mutex_exit(&vp->v_lock);
613 613 rp4_rmhash_locked(rp);
614 614 rw_exit(&rp->r_hashq->r_lock);
615 615 }
616 616
617 617 r4inactive(rp, cr);
618 618
619 619 mutex_enter(&vp->v_lock);
620 620 if (vp->v_count > 1) {
621 621 vp->v_count--;
622 622 mutex_exit(&vp->v_lock);
623 623 rw_enter(&rhtp->r_lock, RW_READER);
624 624 goto start;
625 625 }
626 626 mutex_exit(&vp->v_lock);
627 627 vn_invalid(vp);
628 628
629 629 /*
630 630 * destroy old locks before bzero'ing and
631 631 * recreating the locks below.
632 632 */
633 633 uninit_rnode4(rp);
634 634
635 635 /*
636 636 * Make sure that if rnode is recycled then
637 637 * VFS count is decremented properly before
638 638 * reuse.
639 639 */
640 640 VFS_RELE(vp->v_vfsp);
641 641 vn_reinit(vp);
642 642 } else {
643 643 vnode_t *new_vp;
644 644
645 645 mutex_exit(&rp4freelist_lock);
646 646
647 647 rp = kmem_cache_alloc(rnode4_cache, KM_SLEEP);
648 648 new_vp = vn_alloc(KM_SLEEP);
649 649
650 650 atomic_add_long((ulong_t *)&rnode4_new, 1);
651 651 #ifdef DEBUG
652 652 clstat4_debug.nrnode.value.ui64++;
653 653 #endif
654 654 vp = new_vp;
655 655 }
656 656
657 657 bzero(rp, sizeof (*rp));
658 658 rp->r_vnode = vp;
659 659 nfs_rw_init(&rp->r_rwlock, NULL, RW_DEFAULT, NULL);
660 660 nfs_rw_init(&rp->r_lkserlock, NULL, RW_DEFAULT, NULL);
661 661 mutex_init(&rp->r_svlock, NULL, MUTEX_DEFAULT, NULL);
662 662 mutex_init(&rp->r_statelock, NULL, MUTEX_DEFAULT, NULL);
663 663 mutex_init(&rp->r_statev4_lock, NULL, MUTEX_DEFAULT, NULL);
664 664 mutex_init(&rp->r_os_lock, NULL, MUTEX_DEFAULT, NULL);
665 665 rp->created_v4 = 0;
666 666 list_create(&rp->r_open_streams, sizeof (nfs4_open_stream_t),
667 667 offsetof(nfs4_open_stream_t, os_node));
668 668 rp->r_lo_head.lo_prev_rnode = &rp->r_lo_head;
669 669 rp->r_lo_head.lo_next_rnode = &rp->r_lo_head;
670 670 cv_init(&rp->r_cv, NULL, CV_DEFAULT, NULL);
671 671 cv_init(&rp->r_commit.c_cv, NULL, CV_DEFAULT, NULL);
672 672 rp->r_flags = R4READDIRWATTR;
673 673 rp->r_fh = fh;
674 674 rp->r_hashq = rhtp;
675 675 sfh4_hold(rp->r_fh);
676 676 rp->r_server = mi->mi_curr_serv;
677 677 rp->r_deleg_type = OPEN_DELEGATE_NONE;
678 678 rp->r_deleg_needs_recovery = OPEN_DELEGATE_NONE;
679 679 nfs_rw_init(&rp->r_deleg_recall_lock, NULL, RW_DEFAULT, NULL);
680 680
681 681 rddir4_cache_create(rp);
682 682 rp->r_putapage = putapage;
683 683 vn_setops(vp, vops);
684 684 vp->v_data = (caddr_t)rp;
685 685 vp->v_vfsp = vfsp;
686 686 VFS_HOLD(vfsp);
687 687 vp->v_type = VNON;
688 688 vp->v_flag |= VMODSORT;
689 689 if (isrootfh(fh, rp))
690 690 vp->v_flag = VROOT;
691 691 vn_exists(vp);
692 692
693 693 /*
694 694 * There is a race condition if someone else
695 695 * alloc's the rnode while no locks are held, so we
696 696 * check again and recover if found.
697 697 */
698 698 rw_enter(&rhtp->r_lock, RW_WRITER);
699 699 if ((trp = r4find(rhtp, fh, vfsp)) != NULL) {
700 700 vp = RTOV4(trp);
701 701 *newnode = 0;
702 702 rw_exit(&rhtp->r_lock);
703 703 rp4_addfree(rp, cr);
704 704 rw_enter(&rhtp->r_lock, RW_READER);
705 705 return (vp);
706 706 }
707 707 rp4_addhash(rp);
708 708 *newnode = 1;
709 709 return (vp);
710 710 }
711 711
712 712 static void
713 713 uninit_rnode4(rnode4_t *rp)
714 714 {
715 715 vnode_t *vp = RTOV4(rp);
716 716
717 717 ASSERT(rp != NULL);
718 718 ASSERT(vp != NULL);
719 719 ASSERT(vp->v_count == 1);
720 720 ASSERT(rp->r_count == 0);
721 721 ASSERT(rp->r_mapcnt == 0);
722 722 if (rp->r_flags & R4LODANGLERS) {
723 723 nfs4_flush_lock_owners(rp);
724 724 }
725 725 ASSERT(rp->r_lo_head.lo_next_rnode == &rp->r_lo_head);
726 726 ASSERT(rp->r_lo_head.lo_prev_rnode == &rp->r_lo_head);
727 727 ASSERT(!(rp->r_flags & R4HASHED));
728 728 ASSERT(rp->r_freef == NULL && rp->r_freeb == NULL);
729 729 nfs4_clear_open_streams(rp);
730 730 list_destroy(&rp->r_open_streams);
731 731
732 732 /*
733 733 * Destroy the rddir cache first since we need to grab the r_statelock.
734 734 */
735 735 mutex_enter(&rp->r_statelock);
736 736 rddir4_cache_destroy(rp);
737 737 mutex_exit(&rp->r_statelock);
738 738 sv_uninit(&rp->r_svnode);
739 739 sfh4_rele(&rp->r_fh);
740 740 nfs_rw_destroy(&rp->r_rwlock);
741 741 nfs_rw_destroy(&rp->r_lkserlock);
742 742 mutex_destroy(&rp->r_statelock);
743 743 mutex_destroy(&rp->r_statev4_lock);
744 744 mutex_destroy(&rp->r_os_lock);
745 745 cv_destroy(&rp->r_cv);
746 746 cv_destroy(&rp->r_commit.c_cv);
747 747 nfs_rw_destroy(&rp->r_deleg_recall_lock);
748 748 if (rp->r_flags & R4DELMAPLIST)
749 749 list_destroy(&rp->r_indelmap);
750 750 }
751 751
752 752 /*
753 753 * Put an rnode on the free list.
754 754 *
755 755 * Rnodes which were allocated above and beyond the normal limit
756 756 * are immediately freed.
757 757 */
758 758 void
759 759 rp4_addfree(rnode4_t *rp, cred_t *cr)
760 760 {
761 761 vnode_t *vp;
762 762 vnode_t *xattr;
763 763 struct vfs *vfsp;
764 764
765 765 vp = RTOV4(rp);
766 766 ASSERT(vp->v_count >= 1);
767 767 ASSERT(rp->r_freef == NULL && rp->r_freeb == NULL);
768 768
769 769 /*
770 770 * If we have too many rnodes allocated and there are no
771 771 * references to this rnode, or if the rnode is no longer
772 772 * accessible by it does not reside in the hash queues,
773 773 * or if an i/o error occurred while writing to the file,
774 774 * then just free it instead of putting it on the rnode
775 775 * freelist.
776 776 */
777 777 vfsp = vp->v_vfsp;
778 778 if (((rnode4_new > nrnode || !(rp->r_flags & R4HASHED) ||
779 779 #ifdef DEBUG
780 780 (nfs4_rnode_nofreelist != 0) ||
781 781 #endif
782 782 rp->r_error || (rp->r_flags & R4RECOVERR) ||
783 783 (vfsp->vfs_flag & VFS_UNMOUNTED)) && rp->r_count == 0)) {
784 784 if (rp->r_flags & R4HASHED) {
785 785 rw_enter(&rp->r_hashq->r_lock, RW_WRITER);
786 786 mutex_enter(&vp->v_lock);
787 787 if (vp->v_count > 1) {
788 788 vp->v_count--;
789 789 mutex_exit(&vp->v_lock);
790 790 rw_exit(&rp->r_hashq->r_lock);
791 791 return;
792 792 }
793 793 mutex_exit(&vp->v_lock);
794 794 rp4_rmhash_locked(rp);
795 795 rw_exit(&rp->r_hashq->r_lock);
796 796 }
797 797
798 798 /*
799 799 * Make sure we don't have a delegation on this rnode
800 800 * before destroying it.
801 801 */
802 802 if (rp->r_deleg_type != OPEN_DELEGATE_NONE) {
803 803 (void) nfs4delegreturn(rp,
804 804 NFS4_DR_FORCE|NFS4_DR_PUSH|NFS4_DR_REOPEN);
805 805 }
806 806
807 807 r4inactive(rp, cr);
808 808
809 809 /*
810 810 * Recheck the vnode reference count. We need to
811 811 * make sure that another reference has not been
812 812 * acquired while we were not holding v_lock. The
813 813 * rnode is not in the rnode hash queues; one
814 814 * way for a reference to have been acquired
815 815 * is for a VOP_PUTPAGE because the rnode was marked
816 816 * with R4DIRTY or for a modified page. This
817 817 * reference may have been acquired before our call
818 818 * to r4inactive. The i/o may have been completed,
819 819 * thus allowing r4inactive to complete, but the
820 820 * reference to the vnode may not have been released
821 821 * yet. In any case, the rnode can not be destroyed
822 822 * until the other references to this vnode have been
823 823 * released. The other references will take care of
824 824 * either destroying the rnode or placing it on the
825 825 * rnode freelist. If there are no other references,
826 826 * then the rnode may be safely destroyed.
827 827 */
828 828 mutex_enter(&vp->v_lock);
829 829 if (vp->v_count > 1) {
830 830 vp->v_count--;
831 831 mutex_exit(&vp->v_lock);
832 832 return;
833 833 }
834 834 mutex_exit(&vp->v_lock);
835 835
836 836 destroy_rnode4(rp);
837 837 return;
838 838 }
839 839
840 840 /*
841 841 * Lock the hash queue and then recheck the reference count
842 842 * to ensure that no other threads have acquired a reference
843 843 * to indicate that the rnode should not be placed on the
844 844 * freelist. If another reference has been acquired, then
845 845 * just release this one and let the other thread complete
846 846 * the processing of adding this rnode to the freelist.
847 847 */
848 848 again:
849 849 rw_enter(&rp->r_hashq->r_lock, RW_WRITER);
850 850
851 851 mutex_enter(&vp->v_lock);
852 852 if (vp->v_count > 1) {
853 853 vp->v_count--;
854 854 mutex_exit(&vp->v_lock);
855 855 rw_exit(&rp->r_hashq->r_lock);
856 856 return;
857 857 }
858 858 mutex_exit(&vp->v_lock);
859 859
860 860 /*
861 861 * Make sure we don't put an rnode with a delegation
862 862 * on the free list.
863 863 */
864 864 if (rp->r_deleg_type != OPEN_DELEGATE_NONE) {
865 865 rw_exit(&rp->r_hashq->r_lock);
866 866 (void) nfs4delegreturn(rp,
867 867 NFS4_DR_FORCE|NFS4_DR_PUSH|NFS4_DR_REOPEN);
868 868 goto again;
869 869 }
870 870
871 871 /*
872 872 * Now that we have the hash queue lock, and we know there
873 873 * are not anymore references on the vnode, check to make
874 874 * sure there aren't any open streams still on the rnode.
875 875 * If so, drop the hash queue lock, remove the open streams,
876 876 * and recheck the v_count.
877 877 */
878 878 mutex_enter(&rp->r_os_lock);
879 879 if (list_head(&rp->r_open_streams) != NULL) {
880 880 mutex_exit(&rp->r_os_lock);
881 881 rw_exit(&rp->r_hashq->r_lock);
882 882 if (nfs_zone() != VTOMI4(vp)->mi_zone)
883 883 nfs4_clear_open_streams(rp);
884 884 else
885 885 (void) nfs4close_all(vp, cr);
886 886 goto again;
887 887 }
888 888 mutex_exit(&rp->r_os_lock);
889 889
890 890 /*
891 891 * Before we put it on the freelist, make sure there are no pages.
892 892 * If there are, flush and commit of all of the dirty and
893 893 * uncommitted pages, assuming the file system isn't read only.
894 894 */
895 895 if (!(vp->v_vfsp->vfs_flag & VFS_RDONLY) && nfs4_dross_pages(vp)) {
896 896 rw_exit(&rp->r_hashq->r_lock);
897 897 r4flushpages(rp, cr);
898 898 goto again;
899 899 }
900 900
901 901 /*
902 902 * Before we put it on the freelist, make sure there is no
903 903 * active xattr directory cached, the freelist will not
904 904 * have its entries r4inactive'd if there is still an active
905 905 * rnode, thus nothing in the freelist can hold another
906 906 * rnode active.
907 907 */
908 908 xattr = rp->r_xattr_dir;
909 909 rp->r_xattr_dir = NULL;
910 910
911 911 /*
912 912 * If there is no cached data or metadata for this file, then
913 913 * put the rnode on the front of the freelist so that it will
914 914 * be reused before other rnodes which may have cached data or
915 915 * metadata associated with them.
916 916 */
917 917 mutex_enter(&rp4freelist_lock);
918 918 if (rp4freelist == NULL) {
919 919 rp->r_freef = rp;
920 920 rp->r_freeb = rp;
921 921 rp4freelist = rp;
922 922 } else {
923 923 rp->r_freef = rp4freelist;
924 924 rp->r_freeb = rp4freelist->r_freeb;
925 925 rp4freelist->r_freeb->r_freef = rp;
926 926 rp4freelist->r_freeb = rp;
927 927 if (!nfs4_has_pages(vp) && rp->r_dir == NULL &&
928 928 rp->r_symlink.contents == NULL && rp->r_secattr == NULL)
929 929 rp4freelist = rp;
930 930 }
931 931 mutex_exit(&rp4freelist_lock);
932 932
933 933 rw_exit(&rp->r_hashq->r_lock);
934 934
935 935 if (xattr)
936 936 VN_RELE(xattr);
937 937 }
938 938
939 939 /*
940 940 * Remove an rnode from the free list.
941 941 *
942 942 * The caller must be holding rp4freelist_lock and the rnode
943 943 * must be on the freelist.
944 944 */
945 945 static void
946 946 rp4_rmfree(rnode4_t *rp)
947 947 {
948 948
949 949 ASSERT(MUTEX_HELD(&rp4freelist_lock));
950 950 ASSERT(rp->r_freef != NULL && rp->r_freeb != NULL);
951 951
952 952 if (rp == rp4freelist) {
953 953 rp4freelist = rp->r_freef;
954 954 if (rp == rp4freelist)
955 955 rp4freelist = NULL;
956 956 }
957 957 rp->r_freeb->r_freef = rp->r_freef;
958 958 rp->r_freef->r_freeb = rp->r_freeb;
959 959
960 960 rp->r_freef = rp->r_freeb = NULL;
961 961 }
962 962
963 963 /*
964 964 * Put a rnode in the hash table.
965 965 *
966 966 * The caller must be holding the exclusive hash queue lock
967 967 */
968 968 void
969 969 rp4_addhash(rnode4_t *rp)
970 970 {
971 971 ASSERT(RW_WRITE_HELD(&rp->r_hashq->r_lock));
972 972 ASSERT(!(rp->r_flags & R4HASHED));
973 973
974 974 #ifdef DEBUG
975 975 r4_dup_check(rp, RTOV4(rp)->v_vfsp);
976 976 #endif
977 977
978 978 rp->r_hashf = rp->r_hashq->r_hashf;
979 979 rp->r_hashq->r_hashf = rp;
980 980 rp->r_hashb = (rnode4_t *)rp->r_hashq;
981 981 rp->r_hashf->r_hashb = rp;
982 982
983 983 mutex_enter(&rp->r_statelock);
984 984 rp->r_flags |= R4HASHED;
985 985 mutex_exit(&rp->r_statelock);
986 986 }
987 987
988 988 /*
989 989 * Remove a rnode from the hash table.
990 990 *
991 991 * The caller must be holding the hash queue lock.
992 992 */
993 993 void
994 994 rp4_rmhash_locked(rnode4_t *rp)
995 995 {
996 996 ASSERT(RW_WRITE_HELD(&rp->r_hashq->r_lock));
997 997 ASSERT(rp->r_flags & R4HASHED);
998 998
999 999 rp->r_hashb->r_hashf = rp->r_hashf;
1000 1000 rp->r_hashf->r_hashb = rp->r_hashb;
1001 1001
1002 1002 mutex_enter(&rp->r_statelock);
1003 1003 rp->r_flags &= ~R4HASHED;
1004 1004 mutex_exit(&rp->r_statelock);
1005 1005 }
1006 1006
1007 1007 /*
1008 1008 * Remove a rnode from the hash table.
1009 1009 *
1010 1010 * The caller must not be holding the hash queue lock.
1011 1011 */
1012 1012 void
1013 1013 rp4_rmhash(rnode4_t *rp)
1014 1014 {
1015 1015 rw_enter(&rp->r_hashq->r_lock, RW_WRITER);
1016 1016 rp4_rmhash_locked(rp);
1017 1017 rw_exit(&rp->r_hashq->r_lock);
1018 1018 }
1019 1019
1020 1020 /*
1021 1021 * Lookup a rnode by fhandle. Ignores rnodes that had failed recovery.
1022 1022 * Returns NULL if no match. If an rnode is returned, the reference count
1023 1023 * on the master vnode is incremented.
1024 1024 *
1025 1025 * The caller must be holding the hash queue lock, either shared or exclusive.
1026 1026 */
1027 1027 rnode4_t *
1028 1028 r4find(r4hashq_t *rhtp, nfs4_sharedfh_t *fh, struct vfs *vfsp)
1029 1029 {
1030 1030 rnode4_t *rp;
1031 1031 vnode_t *vp;
1032 1032
1033 1033 ASSERT(RW_LOCK_HELD(&rhtp->r_lock));
1034 1034
1035 1035 for (rp = rhtp->r_hashf; rp != (rnode4_t *)rhtp; rp = rp->r_hashf) {
1036 1036 vp = RTOV4(rp);
1037 1037 if (vp->v_vfsp == vfsp && SFH4_SAME(rp->r_fh, fh)) {
1038 1038
1039 1039 mutex_enter(&rp->r_statelock);
1040 1040 if (rp->r_flags & R4RECOVERR) {
1041 1041 mutex_exit(&rp->r_statelock);
1042 1042 continue;
1043 1043 }
1044 1044 mutex_exit(&rp->r_statelock);
1045 1045 #ifdef DEBUG
1046 1046 r4_dup_check(rp, vfsp);
1047 1047 #endif
1048 1048 if (rp->r_freef != NULL) {
1049 1049 mutex_enter(&rp4freelist_lock);
1050 1050 /*
1051 1051 * If the rnode is on the freelist,
1052 1052 * then remove it and use that reference
1053 1053 * as the new reference. Otherwise,
1054 1054 * need to increment the reference count.
1055 1055 */
1056 1056 if (rp->r_freef != NULL) {
1057 1057 rp4_rmfree(rp);
1058 1058 mutex_exit(&rp4freelist_lock);
1059 1059 } else {
1060 1060 mutex_exit(&rp4freelist_lock);
1061 1061 VN_HOLD(vp);
1062 1062 }
1063 1063 } else
1064 1064 VN_HOLD(vp);
1065 1065
1066 1066 /*
1067 1067 * if root vnode, set v_flag to indicate that
1068 1068 */
1069 1069 if (isrootfh(fh, rp)) {
1070 1070 if (!(vp->v_flag & VROOT)) {
1071 1071 mutex_enter(&vp->v_lock);
1072 1072 vp->v_flag |= VROOT;
1073 1073 mutex_exit(&vp->v_lock);
1074 1074 }
1075 1075 }
1076 1076 return (rp);
1077 1077 }
1078 1078 }
1079 1079 return (NULL);
1080 1080 }
1081 1081
1082 1082 /*
1083 1083 * Lookup an rnode by fhandle. Just a wrapper for r4find()
1084 1084 * that assumes the caller hasn't already got the lock
1085 1085 * on the hash bucket.
1086 1086 */
1087 1087 rnode4_t *
1088 1088 r4find_unlocked(nfs4_sharedfh_t *fh, struct vfs *vfsp)
1089 1089 {
1090 1090 rnode4_t *rp;
1091 1091 int index;
1092 1092
1093 1093 index = rtable4hash(fh);
1094 1094 rw_enter(&rtable4[index].r_lock, RW_READER);
1095 1095 rp = r4find(&rtable4[index], fh, vfsp);
1096 1096 rw_exit(&rtable4[index].r_lock);
1097 1097
1098 1098 return (rp);
1099 1099 }
1100 1100
1101 1101 /*
1102 1102 * Return >0 if there is a active vnode belonging to this vfs in the
1103 1103 * rtable4 cache.
1104 1104 *
1105 1105 * Several of these checks are done without holding the usual
1106 1106 * locks. This is safe because destroy_rtable(), rp_addfree(),
1107 1107 * etc. will redo the necessary checks before actually destroying
1108 1108 * any rnodes.
1109 1109 */
1110 1110 int
1111 1111 check_rtable4(struct vfs *vfsp)
1112 1112 {
1113 1113 rnode4_t *rp;
1114 1114 vnode_t *vp;
1115 1115 int busy = NFSV4_RTABLE4_OK;
1116 1116 int index;
1117 1117
1118 1118 for (index = 0; index < rtable4size; index++) {
1119 1119 rw_enter(&rtable4[index].r_lock, RW_READER);
1120 1120
1121 1121 for (rp = rtable4[index].r_hashf;
1122 1122 rp != (rnode4_t *)(&rtable4[index]);
1123 1123 rp = rp->r_hashf) {
1124 1124
1125 1125 vp = RTOV4(rp);
1126 1126 if (vp->v_vfsp == vfsp) {
1127 1127 if (rp->r_freef == NULL) {
1128 1128 busy = NFSV4_RTABLE4_NOT_FREE_LIST;
1129 1129 } else if (nfs4_has_pages(vp) &&
1130 1130 (rp->r_flags & R4DIRTY)) {
1131 1131 busy = NFSV4_RTABLE4_DIRTY_PAGES;
1132 1132 } else if (rp->r_count > 0) {
1133 1133 busy = NFSV4_RTABLE4_POS_R_COUNT;
1134 1134 }
1135 1135
1136 1136 if (busy != NFSV4_RTABLE4_OK) {
1137 1137 #ifdef DEBUG
1138 1138 char *path;
1139 1139
1140 1140 path = fn_path(rp->r_svnode.sv_name);
1141 1141 DTRACE_NFSV4_3(rnode__e__debug,
1142 1142 int, busy, char *, path,
1143 1143 rnode4_t *, rp);
1144 1144 kmem_free(path, strlen(path)+1);
1145 1145 #endif
1146 1146 rw_exit(&rtable4[index].r_lock);
1147 1147 return (busy);
1148 1148 }
1149 1149 }
1150 1150 }
1151 1151 rw_exit(&rtable4[index].r_lock);
1152 1152 }
1153 1153 return (busy);
1154 1154 }
1155 1155
1156 1156 /*
1157 1157 * Destroy inactive vnodes from the hash queues which
1158 1158 * belong to this vfs. All of the vnodes should be inactive.
1159 1159 * It is essential that we destroy all rnodes in case of
1160 1160 * forced unmount as well as in normal unmount case.
1161 1161 */
1162 1162
1163 1163 void
1164 1164 destroy_rtable4(struct vfs *vfsp, cred_t *cr)
1165 1165 {
1166 1166 int index;
1167 1167 vnode_t *vp;
1168 1168 rnode4_t *rp, *r_hashf, *rlist;
1169 1169
1170 1170 rlist = NULL;
1171 1171
1172 1172 for (index = 0; index < rtable4size; index++) {
1173 1173 rw_enter(&rtable4[index].r_lock, RW_WRITER);
1174 1174 for (rp = rtable4[index].r_hashf;
1175 1175 rp != (rnode4_t *)(&rtable4[index]);
1176 1176 rp = r_hashf) {
1177 1177 /* save the hash pointer before destroying */
1178 1178 r_hashf = rp->r_hashf;
1179 1179
1180 1180 vp = RTOV4(rp);
1181 1181 if (vp->v_vfsp == vfsp) {
1182 1182 mutex_enter(&rp4freelist_lock);
1183 1183 if (rp->r_freef != NULL) {
1184 1184 rp4_rmfree(rp);
1185 1185 mutex_exit(&rp4freelist_lock);
1186 1186 rp4_rmhash_locked(rp);
1187 1187 rp->r_hashf = rlist;
1188 1188 rlist = rp;
1189 1189 } else
1190 1190 mutex_exit(&rp4freelist_lock);
1191 1191 }
1192 1192 }
1193 1193 rw_exit(&rtable4[index].r_lock);
1194 1194 }
1195 1195
1196 1196 for (rp = rlist; rp != NULL; rp = r_hashf) {
1197 1197 r_hashf = rp->r_hashf;
1198 1198 /*
1199 1199 * This call to rp4_addfree will end up destroying the
1200 1200 * rnode, but in a safe way with the appropriate set
1201 1201 * of checks done.
1202 1202 */
1203 1203 rp4_addfree(rp, cr);
1204 1204 }
1205 1205 }
1206 1206
1207 1207 /*
1208 1208 * This routine destroys all the resources of an rnode
1209 1209 * and finally the rnode itself.
1210 1210 */
1211 1211 static void
1212 1212 destroy_rnode4(rnode4_t *rp)
1213 1213 {
1214 1214 vnode_t *vp;
1215 1215 vfs_t *vfsp;
1216 1216
1217 1217 ASSERT(rp->r_deleg_type == OPEN_DELEGATE_NONE);
1218 1218
1219 1219 vp = RTOV4(rp);
1220 1220 vfsp = vp->v_vfsp;
1221 1221
1222 1222 uninit_rnode4(rp);
1223 1223 atomic_add_long((ulong_t *)&rnode4_new, -1);
1224 1224 #ifdef DEBUG
1225 1225 clstat4_debug.nrnode.value.ui64--;
1226 1226 #endif
1227 1227 kmem_cache_free(rnode4_cache, rp);
1228 1228 vn_invalid(vp);
1229 1229 vn_free(vp);
1230 1230 VFS_RELE(vfsp);
1231 1231 }
1232 1232
1233 1233 /*
1234 1234 * Invalidate the attributes on all rnodes forcing the next getattr
1235 1235 * to go over the wire. Used to flush stale uid and gid mappings.
1236 1236 * Maybe done on a per vfsp, or all rnodes (vfsp == NULL)
1237 1237 */
1238 1238 void
1239 1239 nfs4_rnode_invalidate(struct vfs *vfsp)
1240 1240 {
1241 1241 int index;
1242 1242 rnode4_t *rp;
1243 1243 vnode_t *vp;
1244 1244
1245 1245 /*
1246 1246 * Walk the hash queues looking for rnodes.
1247 1247 */
1248 1248 for (index = 0; index < rtable4size; index++) {
1249 1249 rw_enter(&rtable4[index].r_lock, RW_READER);
1250 1250 for (rp = rtable4[index].r_hashf;
1251 1251 rp != (rnode4_t *)(&rtable4[index]);
1252 1252 rp = rp->r_hashf) {
1253 1253 vp = RTOV4(rp);
1254 1254 if (vfsp != NULL && vp->v_vfsp != vfsp)
1255 1255 continue;
1256 1256
1257 1257 if (!mutex_tryenter(&rp->r_statelock))
1258 1258 continue;
1259 1259
1260 1260 /*
1261 1261 * Expire the attributes by resetting the change
1262 1262 * and attr timeout.
1263 1263 */
1264 1264 rp->r_change = 0;
1265 1265 PURGE_ATTRCACHE4_LOCKED(rp);
1266 1266 mutex_exit(&rp->r_statelock);
1267 1267 }
1268 1268 rw_exit(&rtable4[index].r_lock);
1269 1269 }
1270 1270 }
1271 1271
1272 1272 /*
1273 1273 * Flush all vnodes in this (or every) vfs.
1274 1274 * Used by nfs_sync and by nfs_unmount.
1275 1275 */
1276 1276 void
1277 1277 r4flush(struct vfs *vfsp, cred_t *cr)
1278 1278 {
1279 1279 int index;
1280 1280 rnode4_t *rp;
1281 1281 vnode_t *vp, **vplist;
1282 1282 long num, cnt;
1283 1283
1284 1284 /*
1285 1285 * Check to see whether there is anything to do.
1286 1286 */
1287 1287 num = rnode4_new;
1288 1288 if (num == 0)
1289 1289 return;
1290 1290
1291 1291 /*
1292 1292 * Allocate a slot for all currently active rnodes on the
1293 1293 * supposition that they all may need flushing.
1294 1294 */
1295 1295 vplist = kmem_alloc(num * sizeof (*vplist), KM_SLEEP);
1296 1296 cnt = 0;
1297 1297
1298 1298 /*
1299 1299 * Walk the hash queues looking for rnodes with page
1300 1300 * lists associated with them. Make a list of these
1301 1301 * files.
1302 1302 */
1303 1303 for (index = 0; index < rtable4size; index++) {
1304 1304 rw_enter(&rtable4[index].r_lock, RW_READER);
1305 1305 for (rp = rtable4[index].r_hashf;
1306 1306 rp != (rnode4_t *)(&rtable4[index]);
1307 1307 rp = rp->r_hashf) {
1308 1308 vp = RTOV4(rp);
1309 1309 /*
1310 1310 * Don't bother sync'ing a vp if it
1311 1311 * is part of virtual swap device or
1312 1312 * if VFS is read-only
1313 1313 */
1314 1314 if (IS_SWAPVP(vp) || vn_is_readonly(vp))
1315 1315 continue;
1316 1316 /*
1317 1317 * If flushing all mounted file systems or
1318 1318 * the vnode belongs to this vfs, has pages
1319 1319 * and is marked as either dirty or mmap'd,
1320 1320 * hold and add this vnode to the list of
1321 1321 * vnodes to flush.
1322 1322 */
1323 1323 if ((vfsp == NULL || vp->v_vfsp == vfsp) &&
1324 1324 nfs4_has_pages(vp) &&
1325 1325 ((rp->r_flags & R4DIRTY) || rp->r_mapcnt > 0)) {
1326 1326 VN_HOLD(vp);
1327 1327 vplist[cnt++] = vp;
1328 1328 if (cnt == num) {
1329 1329 rw_exit(&rtable4[index].r_lock);
1330 1330 goto toomany;
1331 1331 }
1332 1332 }
1333 1333 }
1334 1334 rw_exit(&rtable4[index].r_lock);
1335 1335 }
1336 1336 toomany:
1337 1337
1338 1338 /*
1339 1339 * Flush and release all of the files on the list.
1340 1340 */
1341 1341 while (cnt-- > 0) {
1342 1342 vp = vplist[cnt];
1343 1343 (void) VOP_PUTPAGE(vp, (u_offset_t)0, 0, B_ASYNC, cr, NULL);
1344 1344 VN_RELE(vp);
1345 1345 }
1346 1346
1347 1347 /*
1348 1348 * Free the space allocated to hold the list.
1349 1349 */
1350 1350 kmem_free(vplist, num * sizeof (*vplist));
1351 1351 }
1352 1352
1353 1353 int
1354 1354 nfs4_free_data_reclaim(rnode4_t *rp)
1355 1355 {
1356 1356 char *contents;
1357 1357 vnode_t *xattr;
1358 1358 int size;
1359 1359 vsecattr_t *vsp;
1360 1360 int freed;
1361 1361 bool_t rdc = FALSE;
1362 1362
1363 1363 /*
1364 1364 * Free any held caches which may
1365 1365 * be associated with this rnode.
1366 1366 */
1367 1367 mutex_enter(&rp->r_statelock);
1368 1368 if (rp->r_dir != NULL)
1369 1369 rdc = TRUE;
1370 1370 contents = rp->r_symlink.contents;
1371 1371 size = rp->r_symlink.size;
1372 1372 rp->r_symlink.contents = NULL;
1373 1373 vsp = rp->r_secattr;
1374 1374 rp->r_secattr = NULL;
1375 1375 xattr = rp->r_xattr_dir;
1376 1376 rp->r_xattr_dir = NULL;
1377 1377 mutex_exit(&rp->r_statelock);
1378 1378
1379 1379 /*
1380 1380 * Free the access cache entries.
1381 1381 */
1382 1382 freed = nfs4_access_purge_rp(rp);
1383 1383
1384 1384 if (rdc == FALSE && contents == NULL && vsp == NULL && xattr == NULL)
1385 1385 return (freed);
1386 1386
1387 1387 /*
1388 1388 * Free the readdir cache entries, incompletely if we can't block.
1389 1389 */
1390 1390 nfs4_purge_rddir_cache(RTOV4(rp));
1391 1391
1392 1392 /*
1393 1393 * Free the symbolic link cache.
1394 1394 */
1395 1395 if (contents != NULL) {
1396 1396
1397 1397 kmem_free((void *)contents, size);
1398 1398 }
1399 1399
1400 1400 /*
1401 1401 * Free any cached ACL.
1402 1402 */
1403 1403 if (vsp != NULL)
1404 1404 nfs4_acl_free_cache(vsp);
1405 1405
1406 1406 /*
1407 1407 * Release the xattr directory vnode
1408 1408 */
1409 1409 if (xattr != NULL)
1410 1410 VN_RELE(xattr);
1411 1411
1412 1412 return (1);
1413 1413 }
1414 1414
1415 1415 static int
1416 1416 nfs4_active_data_reclaim(rnode4_t *rp)
1417 1417 {
1418 1418 char *contents;
1419 1419 vnode_t *xattr = NULL;
1420 1420 int size;
1421 1421 vsecattr_t *vsp;
1422 1422 int freed;
1423 1423 bool_t rdc = FALSE;
1424 1424
1425 1425 /*
1426 1426 * Free any held credentials and caches which
1427 1427 * may be associated with this rnode.
1428 1428 */
1429 1429 if (!mutex_tryenter(&rp->r_statelock))
1430 1430 return (0);
1431 1431 contents = rp->r_symlink.contents;
1432 1432 size = rp->r_symlink.size;
1433 1433 rp->r_symlink.contents = NULL;
1434 1434 vsp = rp->r_secattr;
1435 1435 rp->r_secattr = NULL;
1436 1436 if (rp->r_dir != NULL)
1437 1437 rdc = TRUE;
1438 1438 /*
1439 1439 * To avoid a deadlock, do not free r_xattr_dir cache if it is hashed
1440 1440 * on the same r_hashq queue. We are not mandated to free all caches.
1441 1441 * VN_RELE(rp->r_xattr_dir) will be done sometime later - e.g. when the
1442 1442 * rnode 'rp' is freed or put on the free list.
1443 1443 *
1444 1444 * We will retain NFS4_XATTR_DIR_NOTSUPP because:
1445 1445 * - it has no associated rnode4_t (its v_data is NULL),
1446 1446 * - it is preallocated statically and will never go away,
1447 1447 * so we cannot save anything by releasing it.
1448 1448 */
1449 1449 if (rp->r_xattr_dir && rp->r_xattr_dir != NFS4_XATTR_DIR_NOTSUPP &&
1450 1450 VTOR4(rp->r_xattr_dir)->r_hashq != rp->r_hashq) {
1451 1451 xattr = rp->r_xattr_dir;
1452 1452 rp->r_xattr_dir = NULL;
1453 1453 }
1454 1454 mutex_exit(&rp->r_statelock);
1455 1455
1456 1456 /*
1457 1457 * Free the access cache entries.
1458 1458 */
1459 1459 freed = nfs4_access_purge_rp(rp);
1460 1460
1461 1461 if (contents == NULL && vsp == NULL && rdc == FALSE && xattr == NULL)
1462 1462 return (freed);
1463 1463
1464 1464 /*
1465 1465 * Free the symbolic link cache.
1466 1466 */
1467 1467 if (contents != NULL) {
1468 1468
1469 1469 kmem_free((void *)contents, size);
1470 1470 }
1471 1471
1472 1472 /*
1473 1473 * Free any cached ACL.
1474 1474 */
1475 1475 if (vsp != NULL)
1476 1476 nfs4_acl_free_cache(vsp);
1477 1477
1478 1478 nfs4_purge_rddir_cache(RTOV4(rp));
1479 1479
1480 1480 /*
1481 1481 * Release the xattr directory vnode
1482 1482 */
1483 1483 if (xattr != NULL)
1484 1484 VN_RELE(xattr);
1485 1485
1486 1486 return (1);
1487 1487 }
1488 1488
1489 1489 static int
1490 1490 nfs4_free_reclaim(void)
1491 1491 {
1492 1492 int freed;
1493 1493 rnode4_t *rp;
1494 1494
1495 1495 #ifdef DEBUG
1496 1496 clstat4_debug.f_reclaim.value.ui64++;
1497 1497 #endif
1498 1498 freed = 0;
1499 1499 mutex_enter(&rp4freelist_lock);
1500 1500 rp = rp4freelist;
1501 1501 if (rp != NULL) {
1502 1502 do {
1503 1503 if (nfs4_free_data_reclaim(rp))
1504 1504 freed = 1;
1505 1505 } while ((rp = rp->r_freef) != rp4freelist);
1506 1506 }
1507 1507 mutex_exit(&rp4freelist_lock);
1508 1508 return (freed);
1509 1509 }
1510 1510
1511 1511 static int
1512 1512 nfs4_active_reclaim(void)
1513 1513 {
1514 1514 int freed;
1515 1515 int index;
1516 1516 rnode4_t *rp;
1517 1517
1518 1518 #ifdef DEBUG
1519 1519 clstat4_debug.a_reclaim.value.ui64++;
1520 1520 #endif
1521 1521 freed = 0;
1522 1522 for (index = 0; index < rtable4size; index++) {
1523 1523 rw_enter(&rtable4[index].r_lock, RW_READER);
1524 1524 for (rp = rtable4[index].r_hashf;
1525 1525 rp != (rnode4_t *)(&rtable4[index]);
1526 1526 rp = rp->r_hashf) {
1527 1527 if (nfs4_active_data_reclaim(rp))
1528 1528 freed = 1;
1529 1529 }
1530 1530 rw_exit(&rtable4[index].r_lock);
1531 1531 }
1532 1532 return (freed);
1533 1533 }
1534 1534
1535 1535 static int
1536 1536 nfs4_rnode_reclaim(void)
1537 1537 {
1538 1538 int freed;
1539 1539 rnode4_t *rp;
1540 1540 vnode_t *vp;
1541 1541
1542 1542 #ifdef DEBUG
1543 1543 clstat4_debug.r_reclaim.value.ui64++;
1544 1544 #endif
1545 1545 freed = 0;
1546 1546 mutex_enter(&rp4freelist_lock);
1547 1547 while ((rp = rp4freelist) != NULL) {
1548 1548 rp4_rmfree(rp);
1549 1549 mutex_exit(&rp4freelist_lock);
1550 1550 if (rp->r_flags & R4HASHED) {
1551 1551 vp = RTOV4(rp);
1552 1552 rw_enter(&rp->r_hashq->r_lock, RW_WRITER);
1553 1553 mutex_enter(&vp->v_lock);
1554 1554 if (vp->v_count > 1) {
1555 1555 vp->v_count--;
1556 1556 mutex_exit(&vp->v_lock);
1557 1557 rw_exit(&rp->r_hashq->r_lock);
1558 1558 mutex_enter(&rp4freelist_lock);
1559 1559 continue;
1560 1560 }
1561 1561 mutex_exit(&vp->v_lock);
1562 1562 rp4_rmhash_locked(rp);
1563 1563 rw_exit(&rp->r_hashq->r_lock);
1564 1564 }
1565 1565 /*
1566 1566 * This call to rp_addfree will end up destroying the
1567 1567 * rnode, but in a safe way with the appropriate set
1568 1568 * of checks done.
1569 1569 */
1570 1570 rp4_addfree(rp, CRED());
1571 1571 mutex_enter(&rp4freelist_lock);
1572 1572 }
1573 1573 mutex_exit(&rp4freelist_lock);
1574 1574 return (freed);
1575 1575 }
1576 1576
1577 1577 /*ARGSUSED*/
1578 1578 static void
1579 1579 nfs4_reclaim(void *cdrarg)
1580 1580 {
1581 1581 #ifdef DEBUG
1582 1582 clstat4_debug.reclaim.value.ui64++;
1583 1583 #endif
1584 1584 if (nfs4_free_reclaim())
1585 1585 return;
1586 1586
1587 1587 if (nfs4_active_reclaim())
1588 1588 return;
1589 1589
1590 1590 (void) nfs4_rnode_reclaim();
1591 1591 }
1592 1592
1593 1593 /*
1594 1594 * Returns the clientid4 to use for the given mntinfo4. Note that the
1595 1595 * clientid can change if the caller drops mi_recovlock.
1596 1596 */
1597 1597
1598 1598 clientid4
1599 1599 mi2clientid(mntinfo4_t *mi)
1600 1600 {
1601 1601 nfs4_server_t *sp;
1602 1602 clientid4 clientid = 0;
1603 1603
1604 1604 /* this locks down sp if it is found */
↓ open down ↓ |
1604 lines elided |
↑ open up ↑ |
1605 1605 sp = find_nfs4_server(mi);
1606 1606 if (sp != NULL) {
1607 1607 clientid = sp->clientid;
1608 1608 mutex_exit(&sp->s_lock);
1609 1609 nfs4_server_rele(sp);
1610 1610 }
1611 1611 return (clientid);
1612 1612 }
1613 1613
1614 1614 /*
1615 - * Return the current lease time for the server associated with the given
1616 - * file. Note that the lease time could change immediately after this
1617 - * call.
1618 - */
1619 -
1620 -time_t
1621 -r2lease_time(rnode4_t *rp)
1622 -{
1623 - nfs4_server_t *sp;
1624 - time_t lease_time;
1625 - mntinfo4_t *mi = VTOMI4(RTOV4(rp));
1626 -
1627 - (void) nfs_rw_enter_sig(&mi->mi_recovlock, RW_READER, 0);
1628 -
1629 - /* this locks down sp if it is found */
1630 - sp = find_nfs4_server(VTOMI4(RTOV4(rp)));
1631 -
1632 - if (VTOMI4(RTOV4(rp))->mi_vfsp->vfs_flag & VFS_UNMOUNTED) {
1633 - if (sp != NULL) {
1634 - mutex_exit(&sp->s_lock);
1635 - nfs4_server_rele(sp);
1636 - }
1637 - nfs_rw_exit(&mi->mi_recovlock);
1638 - return (1); /* 1 second */
1639 - }
1640 -
1641 - ASSERT(sp != NULL);
1642 -
1643 - lease_time = sp->s_lease_time;
1644 -
1645 - mutex_exit(&sp->s_lock);
1646 - nfs4_server_rele(sp);
1647 - nfs_rw_exit(&mi->mi_recovlock);
1648 -
1649 - return (lease_time);
1650 -}
1651 -
1652 -/*
1653 1615 * Return a list with information about all the known open instances for
1654 1616 * a filesystem. The caller must call r4releopenlist() when done with the
1655 1617 * list.
1656 1618 *
1657 1619 * We are safe at looking at os_valid and os_pending_close across dropping
1658 1620 * the 'os_sync_lock' to count up the number of open streams and then
1659 1621 * allocate memory for the osp list due to:
1660 1622 * -Looking at os_pending_close is safe since this routine is
1661 1623 * only called via recovery, and os_pending_close can only be set via
1662 1624 * a non-recovery operation (which are all blocked when recovery
1663 1625 * is active).
1664 1626 *
1665 1627 * -Examining os_valid is safe since non-recovery operations, which
1666 1628 * could potentially switch os_valid to 0, are blocked (via
1667 1629 * nfs4_start_fop) and recovery is single-threaded per mntinfo4_t
1668 1630 * (which means we are the only recovery thread potentially acting
1669 1631 * on this open stream).
1670 1632 */
1671 1633
1672 1634 nfs4_opinst_t *
1673 1635 r4mkopenlist(mntinfo4_t *mi)
1674 1636 {
1675 1637 nfs4_opinst_t *reopenlist, *rep;
1676 1638 rnode4_t *rp;
1677 1639 vnode_t *vp;
1678 1640 vfs_t *vfsp = mi->mi_vfsp;
1679 1641 int numosp;
1680 1642 nfs4_open_stream_t *osp;
1681 1643 int index;
1682 1644 open_delegation_type4 dtype;
1683 1645 int hold_vnode;
1684 1646
1685 1647 reopenlist = NULL;
1686 1648
1687 1649 for (index = 0; index < rtable4size; index++) {
1688 1650 rw_enter(&rtable4[index].r_lock, RW_READER);
1689 1651 for (rp = rtable4[index].r_hashf;
1690 1652 rp != (rnode4_t *)(&rtable4[index]);
1691 1653 rp = rp->r_hashf) {
1692 1654
1693 1655 vp = RTOV4(rp);
1694 1656 if (vp->v_vfsp != vfsp)
1695 1657 continue;
1696 1658 hold_vnode = 0;
1697 1659
1698 1660 mutex_enter(&rp->r_os_lock);
1699 1661
1700 1662 /* Count the number of valid open_streams of the file */
1701 1663 numosp = 0;
1702 1664 for (osp = list_head(&rp->r_open_streams); osp != NULL;
1703 1665 osp = list_next(&rp->r_open_streams, osp)) {
1704 1666 mutex_enter(&osp->os_sync_lock);
1705 1667 if (osp->os_valid && !osp->os_pending_close)
1706 1668 numosp++;
1707 1669 mutex_exit(&osp->os_sync_lock);
1708 1670 }
1709 1671
1710 1672 /* Fill in the valid open streams per vp */
1711 1673 if (numosp > 0) {
1712 1674 int j;
1713 1675
1714 1676 hold_vnode = 1;
1715 1677
1716 1678 /*
1717 1679 * Add a new open instance to the list
1718 1680 */
1719 1681 rep = kmem_zalloc(sizeof (*reopenlist),
1720 1682 KM_SLEEP);
1721 1683 rep->re_next = reopenlist;
1722 1684 reopenlist = rep;
1723 1685
1724 1686 rep->re_vp = vp;
1725 1687 rep->re_osp = kmem_zalloc(
1726 1688 numosp * sizeof (*(rep->re_osp)),
1727 1689 KM_SLEEP);
1728 1690 rep->re_numosp = numosp;
1729 1691
1730 1692 j = 0;
1731 1693 for (osp = list_head(&rp->r_open_streams);
1732 1694 osp != NULL;
1733 1695 osp = list_next(&rp->r_open_streams, osp)) {
1734 1696
1735 1697 mutex_enter(&osp->os_sync_lock);
1736 1698 if (osp->os_valid &&
1737 1699 !osp->os_pending_close) {
1738 1700 osp->os_ref_count++;
1739 1701 rep->re_osp[j] = osp;
1740 1702 j++;
1741 1703 }
1742 1704 mutex_exit(&osp->os_sync_lock);
1743 1705 }
1744 1706 /*
1745 1707 * Assuming valid osp(s) stays valid between
1746 1708 * the time obtaining j and numosp.
1747 1709 */
1748 1710 ASSERT(j == numosp);
1749 1711 }
1750 1712
1751 1713 mutex_exit(&rp->r_os_lock);
1752 1714 /* do this here to keep v_lock > r_os_lock */
1753 1715 if (hold_vnode)
1754 1716 VN_HOLD(vp);
1755 1717 mutex_enter(&rp->r_statev4_lock);
1756 1718 if (rp->r_deleg_type != OPEN_DELEGATE_NONE) {
1757 1719 /*
1758 1720 * If this rnode holds a delegation,
1759 1721 * but if there are no valid open streams,
1760 1722 * then just discard the delegation
1761 1723 * without doing delegreturn.
1762 1724 */
1763 1725 if (numosp > 0)
1764 1726 rp->r_deleg_needs_recovery =
1765 1727 rp->r_deleg_type;
1766 1728 }
1767 1729 /* Save the delegation type for use outside the lock */
1768 1730 dtype = rp->r_deleg_type;
1769 1731 mutex_exit(&rp->r_statev4_lock);
1770 1732
1771 1733 /*
1772 1734 * If we have a delegation then get rid of it.
1773 1735 * We've set rp->r_deleg_needs_recovery so we have
1774 1736 * enough information to recover.
1775 1737 */
1776 1738 if (dtype != OPEN_DELEGATE_NONE) {
1777 1739 (void) nfs4delegreturn(rp, NFS4_DR_DISCARD);
1778 1740 }
1779 1741 }
1780 1742 rw_exit(&rtable4[index].r_lock);
1781 1743 }
1782 1744 return (reopenlist);
1783 1745 }
1784 1746
1785 1747 /*
1786 1748 * Given a filesystem id, check to see if any rnodes
1787 1749 * within this fsid reside in the rnode cache, other
1788 1750 * than one we know about.
1789 1751 *
1790 1752 * Return 1 if an rnode is found, 0 otherwise
1791 1753 */
1792 1754 int
1793 1755 r4find_by_fsid(mntinfo4_t *mi, fattr4_fsid *moved_fsid)
1794 1756 {
1795 1757 rnode4_t *rp;
1796 1758 vnode_t *vp;
1797 1759 vfs_t *vfsp = mi->mi_vfsp;
1798 1760 fattr4_fsid *fsid;
1799 1761 int index, found = 0;
1800 1762
1801 1763 for (index = 0; index < rtable4size; index++) {
1802 1764 rw_enter(&rtable4[index].r_lock, RW_READER);
1803 1765 for (rp = rtable4[index].r_hashf;
1804 1766 rp != (rnode4_t *)(&rtable4[index]);
1805 1767 rp = rp->r_hashf) {
1806 1768
1807 1769 vp = RTOV4(rp);
1808 1770 if (vp->v_vfsp != vfsp)
1809 1771 continue;
1810 1772
1811 1773 /*
1812 1774 * XXX there might be a case where a
1813 1775 * replicated fs may have the same fsid
1814 1776 * across two different servers. This
1815 1777 * check isn't good enough in that case
1816 1778 */
1817 1779 fsid = &rp->r_srv_fsid;
1818 1780 if (FATTR4_FSID_EQ(moved_fsid, fsid)) {
1819 1781 found = 1;
1820 1782 break;
1821 1783 }
1822 1784 }
1823 1785 rw_exit(&rtable4[index].r_lock);
1824 1786
1825 1787 if (found)
1826 1788 break;
1827 1789 }
1828 1790 return (found);
1829 1791 }
1830 1792
1831 1793 /*
1832 1794 * Release the list of open instance references.
1833 1795 */
1834 1796
1835 1797 void
1836 1798 r4releopenlist(nfs4_opinst_t *reopenp)
1837 1799 {
1838 1800 nfs4_opinst_t *rep, *next;
1839 1801 int i;
1840 1802
1841 1803 for (rep = reopenp; rep; rep = next) {
1842 1804 next = rep->re_next;
1843 1805
1844 1806 for (i = 0; i < rep->re_numosp; i++)
1845 1807 open_stream_rele(rep->re_osp[i], VTOR4(rep->re_vp));
1846 1808
1847 1809 VN_RELE(rep->re_vp);
1848 1810 kmem_free(rep->re_osp,
1849 1811 rep->re_numosp * sizeof (*(rep->re_osp)));
1850 1812
1851 1813 kmem_free(rep, sizeof (*rep));
1852 1814 }
1853 1815 }
1854 1816
1855 1817 int
1856 1818 nfs4_rnode_init(void)
1857 1819 {
1858 1820 ulong_t nrnode4_max;
1859 1821 int i;
1860 1822
1861 1823 /*
1862 1824 * Compute the size of the rnode4 hash table
1863 1825 */
1864 1826 if (nrnode <= 0)
1865 1827 nrnode = ncsize;
1866 1828 nrnode4_max =
1867 1829 (ulong_t)((kmem_maxavail() >> 2) / sizeof (struct rnode4));
1868 1830 if (nrnode > nrnode4_max || (nrnode == 0 && ncsize == 0)) {
1869 1831 zcmn_err(GLOBAL_ZONEID, CE_NOTE,
1870 1832 "!setting nrnode to max value of %ld", nrnode4_max);
1871 1833 nrnode = nrnode4_max;
1872 1834 }
1873 1835 rtable4size = 1 << highbit(nrnode / rnode4_hashlen);
1874 1836 rtable4mask = rtable4size - 1;
1875 1837
1876 1838 /*
1877 1839 * Allocate and initialize the hash buckets
1878 1840 */
1879 1841 rtable4 = kmem_alloc(rtable4size * sizeof (*rtable4), KM_SLEEP);
1880 1842 for (i = 0; i < rtable4size; i++) {
1881 1843 rtable4[i].r_hashf = (rnode4_t *)(&rtable4[i]);
1882 1844 rtable4[i].r_hashb = (rnode4_t *)(&rtable4[i]);
1883 1845 rw_init(&rtable4[i].r_lock, NULL, RW_DEFAULT, NULL);
1884 1846 }
1885 1847
1886 1848 rnode4_cache = kmem_cache_create("rnode4_cache", sizeof (rnode4_t),
1887 1849 0, NULL, NULL, nfs4_reclaim, NULL, NULL, 0);
1888 1850
1889 1851 return (0);
1890 1852 }
1891 1853
1892 1854 int
1893 1855 nfs4_rnode_fini(void)
1894 1856 {
1895 1857 int i;
1896 1858
1897 1859 /*
1898 1860 * Deallocate the rnode hash queues
1899 1861 */
1900 1862 kmem_cache_destroy(rnode4_cache);
1901 1863
1902 1864 for (i = 0; i < rtable4size; i++)
1903 1865 rw_destroy(&rtable4[i].r_lock);
1904 1866
1905 1867 kmem_free(rtable4, rtable4size * sizeof (*rtable4));
1906 1868
1907 1869 return (0);
1908 1870 }
1909 1871
1910 1872 /*
1911 1873 * Return non-zero if the given filehandle refers to the root filehandle
1912 1874 * for the given rnode.
1913 1875 */
1914 1876
1915 1877 static int
1916 1878 isrootfh(nfs4_sharedfh_t *fh, rnode4_t *rp)
1917 1879 {
1918 1880 int isroot;
1919 1881
1920 1882 isroot = 0;
1921 1883 if (SFH4_SAME(VTOMI4(RTOV4(rp))->mi_rootfh, fh))
1922 1884 isroot = 1;
1923 1885
1924 1886 return (isroot);
1925 1887 }
1926 1888
1927 1889 /*
1928 1890 * The r4_stub_* routines assume that the rnode is newly activated, and
1929 1891 * that the caller either holds the hash bucket r_lock for this rnode as
1930 1892 * RW_WRITER, or holds r_statelock.
1931 1893 */
1932 1894 static void
1933 1895 r4_stub_set(rnode4_t *rp, nfs4_stub_type_t type)
1934 1896 {
1935 1897 vnode_t *vp = RTOV4(rp);
1936 1898 krwlock_t *hash_lock = &rp->r_hashq->r_lock;
1937 1899
1938 1900 ASSERT(RW_WRITE_HELD(hash_lock) || MUTEX_HELD(&rp->r_statelock));
1939 1901
1940 1902 rp->r_stub_type = type;
1941 1903
1942 1904 /*
1943 1905 * Safely switch this vnode to the trigger vnodeops.
1944 1906 *
1945 1907 * Currently, we don't ever switch a trigger vnode back to using
1946 1908 * "regular" v4 vnodeops. NFS4_STUB_NONE is only used to note that
1947 1909 * a new v4 object is not a trigger, and it will already have the
1948 1910 * correct v4 vnodeops by default. So, no "else" case required here.
1949 1911 */
1950 1912 if (type != NFS4_STUB_NONE)
1951 1913 vn_setops(vp, nfs4_trigger_vnodeops);
1952 1914 }
1953 1915
1954 1916 void
1955 1917 r4_stub_mirrormount(rnode4_t *rp)
1956 1918 {
1957 1919 r4_stub_set(rp, NFS4_STUB_MIRRORMOUNT);
1958 1920 }
1959 1921
1960 1922 void
1961 1923 r4_stub_referral(rnode4_t *rp)
1962 1924 {
1963 1925 DTRACE_PROBE1(nfs4clnt__func__referral__moved,
1964 1926 vnode_t *, RTOV4(rp));
1965 1927 r4_stub_set(rp, NFS4_STUB_REFERRAL);
1966 1928 }
1967 1929
1968 1930 void
1969 1931 r4_stub_none(rnode4_t *rp)
1970 1932 {
1971 1933 r4_stub_set(rp, NFS4_STUB_NONE);
1972 1934 }
1973 1935
1974 1936 #ifdef DEBUG
1975 1937
1976 1938 /*
1977 1939 * Look in the rnode table for other rnodes that have the same filehandle.
1978 1940 * Assume the lock is held for the hash chain of checkrp
1979 1941 */
1980 1942
1981 1943 static void
1982 1944 r4_dup_check(rnode4_t *checkrp, vfs_t *vfsp)
1983 1945 {
1984 1946 rnode4_t *rp;
1985 1947 vnode_t *tvp;
1986 1948 nfs4_fhandle_t fh, fh2;
1987 1949 int index;
1988 1950
1989 1951 if (!r4_check_for_dups)
1990 1952 return;
1991 1953
1992 1954 ASSERT(RW_LOCK_HELD(&checkrp->r_hashq->r_lock));
1993 1955
1994 1956 sfh4_copyval(checkrp->r_fh, &fh);
1995 1957
1996 1958 for (index = 0; index < rtable4size; index++) {
1997 1959
1998 1960 if (&rtable4[index] != checkrp->r_hashq)
1999 1961 rw_enter(&rtable4[index].r_lock, RW_READER);
2000 1962
2001 1963 for (rp = rtable4[index].r_hashf;
2002 1964 rp != (rnode4_t *)(&rtable4[index]);
2003 1965 rp = rp->r_hashf) {
2004 1966
2005 1967 if (rp == checkrp)
2006 1968 continue;
2007 1969
2008 1970 tvp = RTOV4(rp);
2009 1971 if (tvp->v_vfsp != vfsp)
2010 1972 continue;
2011 1973
2012 1974 sfh4_copyval(rp->r_fh, &fh2);
2013 1975 if (nfs4cmpfhandle(&fh, &fh2) == 0) {
2014 1976 cmn_err(CE_PANIC, "rnodes with same fs, fh "
2015 1977 "(%p, %p)", (void *)checkrp, (void *)rp);
2016 1978 }
2017 1979 }
2018 1980
2019 1981 if (&rtable4[index] != checkrp->r_hashq)
2020 1982 rw_exit(&rtable4[index].r_lock);
2021 1983 }
2022 1984 }
2023 1985
2024 1986 #endif /* DEBUG */
↓ open down ↓ |
362 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX