18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 */
25 /*
26 * Copyright 2012 Nexenta Systems, Inc. All rights reserved.
27 */
28
29 /*
30 * Copyright 1983,1984,1985,1986,1987,1988,1989 AT&T.
31 * All Rights Reserved
32 */
33
34 /*
35 * Copyright (c) 2013, Joyent, Inc. All rights reserved.
36 */
37
38 #include <sys/param.h>
39 #include <sys/types.h>
40 #include <sys/systm.h>
41 #include <sys/cred.h>
42 #include <sys/time.h>
43 #include <sys/vnode.h>
44 #include <sys/vfs.h>
45 #include <sys/vfs_opreg.h>
46 #include <sys/file.h>
47 #include <sys/filio.h>
48 #include <sys/uio.h>
49 #include <sys/buf.h>
50 #include <sys/mman.h>
51 #include <sys/pathname.h>
52 #include <sys/dirent.h>
53 #include <sys/debug.h>
54 #include <sys/vmsystm.h>
55 #include <sys/fcntl.h>
56 #include <sys/flock.h>
57 #include <sys/swap.h>
273 struct pathname *, int, vnode_t *, cred_t *,
274 caller_context_t *, int *, pathname_t *);
275 int nfs4_fid(vnode_t *, fid_t *, caller_context_t *);
276 int nfs4_rwlock(vnode_t *, int, caller_context_t *);
277 void nfs4_rwunlock(vnode_t *, int, caller_context_t *);
278 int nfs4_realvp(vnode_t *, vnode_t **, caller_context_t *);
279 int nfs4_pathconf(vnode_t *, int, ulong_t *, cred_t *,
280 caller_context_t *);
281 int nfs4_getsecattr(vnode_t *, vsecattr_t *, int, cred_t *,
282 caller_context_t *);
283 int nfs4_shrlock(vnode_t *, int, struct shrlock *, int, cred_t *,
284 caller_context_t *);
285
286 /*
287 * Used for nfs4_commit_vp() to indicate if we should
288 * wait on pending writes.
289 */
290 #define NFS4_WRITE_NOWAIT 0
291 #define NFS4_WRITE_WAIT 1
292
293 #define NFS4_BASE_WAIT_TIME 1 /* 1 second */
294
295 /*
296 * Error flags used to pass information about certain special errors
297 * which need to be handled specially.
298 */
299 #define NFS_EOF -98
300 #define NFS_VERF_MISMATCH -97
301
302 /*
303 * Flags used to differentiate between which operation drove the
304 * potential CLOSE OTW. (see nfs4_close_otw_if_necessary)
305 */
306 #define NFS4_CLOSE_OP 0x1
307 #define NFS4_DELMAP_OP 0x2
308 #define NFS4_INACTIVE_OP 0x3
309
310 #define ISVDEV(t) ((t == VBLK) || (t == VCHR) || (t == VFIFO))
311
312 /* ALIGN64 aligns the given buffer and adjust buffer size to 64 bit */
313 #define ALIGN64(x, ptr, sz) \
314 x = ((uintptr_t)(ptr)) & (sizeof (uint64_t) - 1); \
345
346 static int nfs4_bio_do_stop = 0;
347
348 static int nfs4_lostpage = 0; /* number of times we lost original page */
349
350 int nfs4_mmap_debug = 0;
351
352 static int nfs4_pathconf_cache_hits = 0;
353 static int nfs4_pathconf_cache_misses = 0;
354
355 int nfs4close_all_cnt;
356 int nfs4close_one_debug = 0;
357 int nfs4close_notw_debug = 0;
358
359 int denied_to_flk_debug = 0;
360 void *lockt_denied_debug;
361
362 #endif
363
364 /*
365 * How long to wait before trying again if OPEN_CONFIRM gets ETIMEDOUT
366 * or NFS4ERR_RESOURCE.
367 */
368 static int confirm_retry_sec = 30;
369
370 static int nfs4_lookup_neg_cache = 1;
371
372 /*
373 * number of pages to read ahead
374 * optimized for 100 base-T.
375 */
376 static int nfs4_nra = 4;
377
378 static int nfs4_do_symlink_cache = 1;
379
380 static int nfs4_pathconf_disable_cache = 0;
381
382 /*
383 * These are the vnode ops routines which implement the vnode interface to
384 * the networked file system. These routines just take their parameters,
12956 NFS4_DEBUG(nfs4_client_lock_debug, (CE_NOTE,
12957 "nfs4frlock_get_sysid: no sysid, return ENOLCK"));
12958 return (ENOLCK);
12959 }
12960
12961 flk->l_sysid = lm_sysidt(*lspp);
12962
12963 return (0);
12964 }
12965
12966 /*
12967 * Do the remaining preliminary setup for nfs4frlock.
12968 */
12969 static void
12970 nfs4frlock_pre_setup(clock_t *tick_delayp, nfs4_recov_state_t *recov_statep,
12971 flock64_t *flk, short *whencep, vnode_t *vp, cred_t *search_cr,
12972 cred_t **cred_otw)
12973 {
12974 /*
12975 * set tick_delay to the base delay time.
12976 * (NFS4_BASE_WAIT_TIME is in secs)
12977 */
12978
12979 *tick_delayp = drv_usectohz(NFS4_BASE_WAIT_TIME * 1000 * 1000);
12980
12981 /*
12982 * If lock is relative to EOF, we need the newest length of the
12983 * file. Therefore invalidate the ATTR_CACHE.
12984 */
12985
12986 *whencep = flk->l_whence;
12987
12988 if (*whencep == 2) /* SEEK_END */
12989 PURGE_ATTRCACHE4(vp);
12990
12991 recov_statep->rs_flags = 0;
12992 recov_statep->rs_num_retry_despite_err = 0;
12993 *cred_otw = nfs4_get_otw_cred(search_cr, VTOMI4(vp), NULL);
12994 }
12995
12996 /*
12997 * Initialize and allocate the data structures necessary for
12998 * the nfs4frlock call.
12999 * Allocates argsp's op array, frees up the saved_rqstpp if there is one.
14730
14731 /*
14732 * So, here we're going to need to retrieve the lock-owner
14733 * again (in case recovery has done a switch-a-roo) and
14734 * remove it because we can.
14735 */
14736 lop = find_lock_owner(rp, curproc->p_pid, LOWN_ANY);
14737
14738 if (lop) {
14739 nfs4_rnode_remove_lock_owner(rp, lop);
14740 lock_owner_rele(lop);
14741 }
14742
14743 nfs4_end_fop(mi, vp, NULL, OH_LOCKU, &recov_state, 0);
14744 return (0);
14745 }
14746
14747 /*
14748 * Wait for 'tick_delay' clock ticks.
14749 * Implement exponential backoff until hit the lease_time of this nfs4_server.
14750 * NOTE: lock_lease_time is in seconds.
14751 *
14752 * XXX For future improvements, should implement a waiting queue scheme.
14753 */
14754 static int
14755 nfs4_block_and_wait(clock_t *tick_delay, rnode4_t *rp)
14756 {
14757 long milliseconds_delay;
14758 time_t lock_lease_time;
14759
14760 /* wait tick_delay clock ticks or siginteruptus */
14761 if (delay_sig(*tick_delay)) {
14762 return (EINTR);
14763 }
14764 NFS4_DEBUG(nfs4_client_lock_debug, (CE_NOTE, "nfs4_block_and_wait: "
14765 "reissue the lock request: blocked for %ld clock ticks: %ld "
14766 "milliseconds", *tick_delay, drv_hztousec(*tick_delay) / 1000));
14767
14768 /* get the lease time */
14769 lock_lease_time = r2lease_time(rp);
14770
14771 /* drv_hztousec converts ticks to microseconds */
14772 milliseconds_delay = drv_hztousec(*tick_delay) / 1000;
14773 if (milliseconds_delay < lock_lease_time * 1000) {
14774 *tick_delay = 2 * *tick_delay;
14775 if (drv_hztousec(*tick_delay) > lock_lease_time * 1000 * 1000)
14776 *tick_delay = drv_usectohz(lock_lease_time*1000*1000);
14777 }
14778 return (0);
14779 }
14780
14781
14782 void
14783 nfs4_vnops_init(void)
14784 {
14785 }
14786
14787 void
14788 nfs4_vnops_fini(void)
14789 {
14790 }
14791
14792 /*
14793 * Return a reference to the directory (parent) vnode for a given vnode,
14794 * using the saved pathname information and the directory file handle. The
14795 * caller is responsible for disposing of the reference.
14796 * Returns zero or an errno value.
14797 *
14798 * Caller should set need_start_op to FALSE if it is the recovery
14799 * thread, or if a start_fop has already been done. Otherwise, TRUE.
14800 */
14801 int
|
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 */
25 /*
26 * Copyright 2012 Nexenta Systems, Inc. All rights reserved.
27 */
28
29 /*
30 * Copyright 1983,1984,1985,1986,1987,1988,1989 AT&T.
31 * All Rights Reserved
32 */
33
34 /*
35 * Copyright (c) 2013, Joyent, Inc. All rights reserved.
36 */
37
38 /*
39 * Copyright (c) 2014, STRATO AG. All rights reserved.
40 */
41
42 #include <sys/param.h>
43 #include <sys/types.h>
44 #include <sys/systm.h>
45 #include <sys/cred.h>
46 #include <sys/time.h>
47 #include <sys/vnode.h>
48 #include <sys/vfs.h>
49 #include <sys/vfs_opreg.h>
50 #include <sys/file.h>
51 #include <sys/filio.h>
52 #include <sys/uio.h>
53 #include <sys/buf.h>
54 #include <sys/mman.h>
55 #include <sys/pathname.h>
56 #include <sys/dirent.h>
57 #include <sys/debug.h>
58 #include <sys/vmsystm.h>
59 #include <sys/fcntl.h>
60 #include <sys/flock.h>
61 #include <sys/swap.h>
277 struct pathname *, int, vnode_t *, cred_t *,
278 caller_context_t *, int *, pathname_t *);
279 int nfs4_fid(vnode_t *, fid_t *, caller_context_t *);
280 int nfs4_rwlock(vnode_t *, int, caller_context_t *);
281 void nfs4_rwunlock(vnode_t *, int, caller_context_t *);
282 int nfs4_realvp(vnode_t *, vnode_t **, caller_context_t *);
283 int nfs4_pathconf(vnode_t *, int, ulong_t *, cred_t *,
284 caller_context_t *);
285 int nfs4_getsecattr(vnode_t *, vsecattr_t *, int, cred_t *,
286 caller_context_t *);
287 int nfs4_shrlock(vnode_t *, int, struct shrlock *, int, cred_t *,
288 caller_context_t *);
289
290 /*
291 * Used for nfs4_commit_vp() to indicate if we should
292 * wait on pending writes.
293 */
294 #define NFS4_WRITE_NOWAIT 0
295 #define NFS4_WRITE_WAIT 1
296
297 /*
298 * Error flags used to pass information about certain special errors
299 * which need to be handled specially.
300 */
301 #define NFS_EOF -98
302 #define NFS_VERF_MISMATCH -97
303
304 /*
305 * Flags used to differentiate between which operation drove the
306 * potential CLOSE OTW. (see nfs4_close_otw_if_necessary)
307 */
308 #define NFS4_CLOSE_OP 0x1
309 #define NFS4_DELMAP_OP 0x2
310 #define NFS4_INACTIVE_OP 0x3
311
312 #define ISVDEV(t) ((t == VBLK) || (t == VCHR) || (t == VFIFO))
313
314 /* ALIGN64 aligns the given buffer and adjust buffer size to 64 bit */
315 #define ALIGN64(x, ptr, sz) \
316 x = ((uintptr_t)(ptr)) & (sizeof (uint64_t) - 1); \
347
348 static int nfs4_bio_do_stop = 0;
349
350 static int nfs4_lostpage = 0; /* number of times we lost original page */
351
352 int nfs4_mmap_debug = 0;
353
354 static int nfs4_pathconf_cache_hits = 0;
355 static int nfs4_pathconf_cache_misses = 0;
356
357 int nfs4close_all_cnt;
358 int nfs4close_one_debug = 0;
359 int nfs4close_notw_debug = 0;
360
361 int denied_to_flk_debug = 0;
362 void *lockt_denied_debug;
363
364 #endif
365
366 /*
367 * In milliseconds. Should be less than half of the lease time or better,
368 * less than one second.
369 */
370 int nfs4_base_wait_time = 20;
371
372 /*
373 * How long to wait before trying again if OPEN_CONFIRM gets ETIMEDOUT
374 * or NFS4ERR_RESOURCE.
375 */
376 static int confirm_retry_sec = 30;
377
378 static int nfs4_lookup_neg_cache = 1;
379
380 /*
381 * number of pages to read ahead
382 * optimized for 100 base-T.
383 */
384 static int nfs4_nra = 4;
385
386 static int nfs4_do_symlink_cache = 1;
387
388 static int nfs4_pathconf_disable_cache = 0;
389
390 /*
391 * These are the vnode ops routines which implement the vnode interface to
392 * the networked file system. These routines just take their parameters,
12964 NFS4_DEBUG(nfs4_client_lock_debug, (CE_NOTE,
12965 "nfs4frlock_get_sysid: no sysid, return ENOLCK"));
12966 return (ENOLCK);
12967 }
12968
12969 flk->l_sysid = lm_sysidt(*lspp);
12970
12971 return (0);
12972 }
12973
12974 /*
12975 * Do the remaining preliminary setup for nfs4frlock.
12976 */
12977 static void
12978 nfs4frlock_pre_setup(clock_t *tick_delayp, nfs4_recov_state_t *recov_statep,
12979 flock64_t *flk, short *whencep, vnode_t *vp, cred_t *search_cr,
12980 cred_t **cred_otw)
12981 {
12982 /*
12983 * set tick_delay to the base delay time.
12984 * (nfs4_base_wait_time is in msecs)
12985 */
12986
12987 *tick_delayp = drv_usectohz(nfs4_base_wait_time * 1000);
12988
12989 /*
12990 * If lock is relative to EOF, we need the newest length of the
12991 * file. Therefore invalidate the ATTR_CACHE.
12992 */
12993
12994 *whencep = flk->l_whence;
12995
12996 if (*whencep == 2) /* SEEK_END */
12997 PURGE_ATTRCACHE4(vp);
12998
12999 recov_statep->rs_flags = 0;
13000 recov_statep->rs_num_retry_despite_err = 0;
13001 *cred_otw = nfs4_get_otw_cred(search_cr, VTOMI4(vp), NULL);
13002 }
13003
13004 /*
13005 * Initialize and allocate the data structures necessary for
13006 * the nfs4frlock call.
13007 * Allocates argsp's op array, frees up the saved_rqstpp if there is one.
14738
14739 /*
14740 * So, here we're going to need to retrieve the lock-owner
14741 * again (in case recovery has done a switch-a-roo) and
14742 * remove it because we can.
14743 */
14744 lop = find_lock_owner(rp, curproc->p_pid, LOWN_ANY);
14745
14746 if (lop) {
14747 nfs4_rnode_remove_lock_owner(rp, lop);
14748 lock_owner_rele(lop);
14749 }
14750
14751 nfs4_end_fop(mi, vp, NULL, OH_LOCKU, &recov_state, 0);
14752 return (0);
14753 }
14754
14755 /*
14756 * Wait for 'tick_delay' clock ticks.
14757 * Implement exponential backoff until hit the lease_time of this nfs4_server.
14758 *
14759 * The client should retry to acquire the lock faster than the lease period.
14760 * We use roughly half of the lease time to use a similar calculation as it is
14761 * used in nfs4_renew_lease_thread().
14762 *
14763 * XXX For future improvements, should implement a waiting queue scheme.
14764 */
14765 static int
14766 nfs4_block_and_wait(clock_t *tick_delay, rnode4_t *rp)
14767 {
14768 long max_msec_delay = 1 * 1000; /* 1 sec */
14769 nfs4_server_t *sp;
14770 mntinfo4_t *mi = VTOMI4(RTOV4(rp));
14771
14772 /* wait tick_delay clock ticks or siginteruptus */
14773 if (delay_sig(*tick_delay)) {
14774 return (EINTR);
14775 }
14776
14777 NFS4_DEBUG(nfs4_client_lock_debug, (CE_NOTE, "nfs4_block_and_wait: "
14778 "reissue the lock request: blocked for %ld clock ticks: %ld "
14779 "milliseconds", *tick_delay, drv_hztousec(*tick_delay) / 1000));
14780
14781 /*
14782 * Get the current lease time and propagation time for the server
14783 * associated with the given file. Note that both times could
14784 * change immediately after this section.
14785 */
14786 nfs_rw_enter_sig(&mi->mi_recovlock, RW_READER, 0);
14787 sp = find_nfs4_server(mi);
14788 if (sp != NULL) {
14789 if (!(mi->mi_vfsp->vfs_flag & VFS_UNMOUNTED)) {
14790 max_msec_delay = sp->s_lease_time * 1000 / 2 -
14791 (3 * sp->propagation_delay.tv_sec *
14792 1000);
14793 }
14794 mutex_exit(&sp->s_lock);
14795 nfs4_server_rele(sp);
14796 }
14797 nfs_rw_exit(&mi->mi_recovlock);
14798
14799 max_msec_delay = MAX(max_msec_delay, nfs4_base_wait_time);
14800 *tick_delay = MIN(drv_usectohz(max_msec_delay * 1000), *tick_delay * 2);
14801 return (0);
14802 }
14803
14804 void
14805 nfs4_vnops_init(void)
14806 {
14807 }
14808
14809 void
14810 nfs4_vnops_fini(void)
14811 {
14812 }
14813
14814 /*
14815 * Return a reference to the directory (parent) vnode for a given vnode,
14816 * using the saved pathname information and the directory file handle. The
14817 * caller is responsible for disposing of the reference.
14818 * Returns zero or an errno value.
14819 *
14820 * Caller should set need_start_op to FALSE if it is the recovery
14821 * thread, or if a start_fop has already been done. Otherwise, TRUE.
14822 */
14823 int
|