Print this page
4827 nfs4: slow file locking
4837 NFSv4 client lock retry delay upper limit should be shorter

Split Close
Expand all
Collapse all
          --- old/usr/src/uts/common/fs/nfs/nfs4_vnops.c
          +++ new/usr/src/uts/common/fs/nfs/nfs4_vnops.c
↓ open down ↓ 27 lines elided ↑ open up ↑
  28   28  
  29   29  /*
  30   30   *      Copyright 1983,1984,1985,1986,1987,1988,1989 AT&T.
  31   31   *      All Rights Reserved
  32   32   */
  33   33  
  34   34  /*
  35   35   * Copyright (c) 2013, Joyent, Inc. All rights reserved.
  36   36   */
  37   37  
       38 +/*
       39 + * Copyright (c) 2014, STRATO AG. All rights reserved.
       40 + */
       41 +
  38   42  #include <sys/param.h>
  39   43  #include <sys/types.h>
  40   44  #include <sys/systm.h>
  41   45  #include <sys/cred.h>
  42   46  #include <sys/time.h>
  43   47  #include <sys/vnode.h>
  44   48  #include <sys/vfs.h>
  45   49  #include <sys/vfs_opreg.h>
  46   50  #include <sys/file.h>
  47   51  #include <sys/filio.h>
↓ open down ↓ 235 lines elided ↑ open up ↑
 283  287  int     nfs4_shrlock(vnode_t *, int, struct shrlock *, int, cred_t *,
 284  288              caller_context_t *);
 285  289  
 286  290  /*
 287  291   * Used for nfs4_commit_vp() to indicate if we should
 288  292   * wait on pending writes.
 289  293   */
 290  294  #define NFS4_WRITE_NOWAIT       0
 291  295  #define NFS4_WRITE_WAIT         1
 292  296  
 293      -#define NFS4_BASE_WAIT_TIME 1   /* 1 second */
 294      -
 295  297  /*
 296  298   * Error flags used to pass information about certain special errors
 297  299   * which need to be handled specially.
 298  300   */
 299  301  #define NFS_EOF                 -98
 300  302  #define NFS_VERF_MISMATCH       -97
 301  303  
 302  304  /*
 303  305   * Flags used to differentiate between which operation drove the
 304  306   * potential CLOSE OTW. (see nfs4_close_otw_if_necessary)
↓ open down ↓ 50 lines elided ↑ open up ↑
 355  357  int nfs4close_all_cnt;
 356  358  int nfs4close_one_debug = 0;
 357  359  int nfs4close_notw_debug = 0;
 358  360  
 359  361  int denied_to_flk_debug = 0;
 360  362  void *lockt_denied_debug;
 361  363  
 362  364  #endif
 363  365  
 364  366  /*
      367 + * In milliseconds. Should be less than half of the lease time or better,
      368 + * less than one second.
      369 + */
      370 +int nfs4_base_wait_time = 20;
      371 +
      372 +/*
 365  373   * How long to wait before trying again if OPEN_CONFIRM gets ETIMEDOUT
 366  374   * or NFS4ERR_RESOURCE.
 367  375   */
 368  376  static int confirm_retry_sec = 30;
 369  377  
 370  378  static int nfs4_lookup_neg_cache = 1;
 371  379  
 372  380  /*
 373  381   * number of pages to read ahead
 374  382   * optimized for 100 base-T.
↓ open down ↓ 12591 lines elided ↑ open up ↑
12966 12974  /*
12967 12975   * Do the remaining preliminary setup for nfs4frlock.
12968 12976   */
12969 12977  static void
12970 12978  nfs4frlock_pre_setup(clock_t *tick_delayp, nfs4_recov_state_t *recov_statep,
12971 12979      flock64_t *flk, short *whencep, vnode_t *vp, cred_t *search_cr,
12972 12980      cred_t **cred_otw)
12973 12981  {
12974 12982          /*
12975 12983           * set tick_delay to the base delay time.
12976      -         * (NFS4_BASE_WAIT_TIME is in secs)
     12984 +         * (nfs4_base_wait_time is in msecs)
12977 12985           */
12978 12986  
12979      -        *tick_delayp = drv_usectohz(NFS4_BASE_WAIT_TIME * 1000 * 1000);
     12987 +        *tick_delayp = drv_usectohz(nfs4_base_wait_time * 1000);
12980 12988  
12981 12989          /*
12982 12990           * If lock is relative to EOF, we need the newest length of the
12983 12991           * file. Therefore invalidate the ATTR_CACHE.
12984 12992           */
12985 12993  
12986 12994          *whencep = flk->l_whence;
12987 12995  
12988 12996          if (*whencep == 2)              /* SEEK_END */
12989 12997                  PURGE_ATTRCACHE4(vp);
↓ open down ↓ 1750 lines elided ↑ open up ↑
14740 14748                  lock_owner_rele(lop);
14741 14749          }
14742 14750  
14743 14751          nfs4_end_fop(mi, vp, NULL, OH_LOCKU, &recov_state, 0);
14744 14752          return (0);
14745 14753  }
14746 14754  
14747 14755  /*
14748 14756   * Wait for 'tick_delay' clock ticks.
14749 14757   * Implement exponential backoff until hit the lease_time of this nfs4_server.
14750      - * NOTE: lock_lease_time is in seconds.
     14758 + *
     14759 + * The client should retry to acquire the lock faster than the lease period.
     14760 + * We use roughly half of the lease time to use a similar calculation as it is
     14761 + * used in nfs4_renew_lease_thread().
14751 14762   *
14752 14763   * XXX For future improvements, should implement a waiting queue scheme.
14753 14764   */
14754 14765  static int
14755 14766  nfs4_block_and_wait(clock_t *tick_delay, rnode4_t *rp)
14756 14767  {
14757      -        long milliseconds_delay;
14758      -        time_t lock_lease_time;
     14768 +        long max_msec_delay = 1 * 1000;         /* 1 sec */
     14769 +        nfs4_server_t *sp;
     14770 +        mntinfo4_t *mi = VTOMI4(RTOV4(rp));
14759 14771  
14760 14772          /* wait tick_delay clock ticks or siginteruptus */
14761 14773          if (delay_sig(*tick_delay)) {
14762 14774                  return (EINTR);
14763 14775          }
     14776 +
14764 14777          NFS4_DEBUG(nfs4_client_lock_debug, (CE_NOTE, "nfs4_block_and_wait: "
14765 14778              "reissue the lock request: blocked for %ld clock ticks: %ld "
14766 14779              "milliseconds", *tick_delay, drv_hztousec(*tick_delay) / 1000));
14767 14780  
14768      -        /* get the lease time */
14769      -        lock_lease_time = r2lease_time(rp);
14770      -
14771      -        /* drv_hztousec converts ticks to microseconds */
14772      -        milliseconds_delay = drv_hztousec(*tick_delay) / 1000;
14773      -        if (milliseconds_delay < lock_lease_time * 1000) {
14774      -                *tick_delay = 2 * *tick_delay;
14775      -                if (drv_hztousec(*tick_delay) > lock_lease_time * 1000 * 1000)
14776      -                        *tick_delay = drv_usectohz(lock_lease_time*1000*1000);
     14781 +        /*
     14782 +         * Get the current lease time and propagation time for the server
     14783 +         * associated with the given file. Note that both times could
     14784 +         * change immediately after this section.
     14785 +         */
     14786 +        nfs_rw_enter_sig(&mi->mi_recovlock, RW_READER, 0);
     14787 +        sp = find_nfs4_server(mi);
     14788 +        if (sp != NULL) {
     14789 +                if (!(mi->mi_vfsp->vfs_flag & VFS_UNMOUNTED)) {
     14790 +                        max_msec_delay = sp->s_lease_time * 1000 / 2 -
     14791 +                                        (3 * sp->propagation_delay.tv_sec *
     14792 +                                        1000);
     14793 +                }
     14794 +                mutex_exit(&sp->s_lock);
     14795 +                nfs4_server_rele(sp);
14777 14796          }
     14797 +        nfs_rw_exit(&mi->mi_recovlock);
     14798 +
     14799 +        max_msec_delay = MAX(max_msec_delay, nfs4_base_wait_time);
     14800 +        *tick_delay = MIN(drv_usectohz(max_msec_delay * 1000), *tick_delay * 2);
14778 14801          return (0);
14779 14802  }
14780 14803  
14781      -
14782 14804  void
14783 14805  nfs4_vnops_init(void)
14784 14806  {
14785 14807  }
14786 14808  
14787 14809  void
14788 14810  nfs4_vnops_fini(void)
14789 14811  {
14790 14812  }
14791 14813  
↓ open down ↓ 1212 lines elided ↑ open up ↑
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX