1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
  23  */
  24 
  25 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
  26 /*      All Rights Reserved   */
  27 
  28 /*
  29  * Portions of this source code were derived from Berkeley 4.3 BSD
  30  * under license from the Regents of the University of California.
  31  */
  32 
  33 #ifndef _NFS4_CLNT_H
  34 #define _NFS4_CLNT_H
  35 
  36 #include <sys/errno.h>
  37 #include <sys/types.h>
  38 #include <sys/kstat.h>
  39 #include <sys/time.h>
  40 #include <sys/flock.h>
  41 #include <vm/page.h>
  42 #include <nfs/nfs4_kprot.h>
  43 #include <nfs/nfs4.h>
  44 #include <nfs/rnode.h>
  45 #include <sys/avl.h>
  46 #include <sys/list.h>
  47 #include <rpc/auth.h>
  48 #include <sys/door.h>
  49 #include <sys/condvar_impl.h>
  50 #include <sys/zone.h>
  51 
  52 #ifdef  __cplusplus
  53 extern "C" {
  54 #endif
  55 
  56 #define NFS4_SIZE_OK(size)      ((size) <= MAXOFFSET_T)
  57 
  58 /* Four states of nfs4_server's lease_valid */
  59 #define NFS4_LEASE_INVALID              0
  60 #define NFS4_LEASE_VALID                1
  61 #define NFS4_LEASE_UNINITIALIZED        2
  62 #define NFS4_LEASE_NOT_STARTED          3
  63 
  64 /* flag to tell the renew thread it should exit */
  65 #define NFS4_THREAD_EXIT        1
  66 
  67 /* Default number of seconds to wait on GRACE and DELAY errors */
  68 #define NFS4ERR_DELAY_TIME      10
  69 
  70 /* Number of hash buckets for open owners for each nfs4_server */
  71 #define NFS4_NUM_OO_BUCKETS     53
  72 
  73 /* Number of freed open owners (per mntinfo4_t) to keep around */
  74 #define NFS4_NUM_FREED_OPEN_OWNERS      8
  75 
  76 /* Number of seconds to wait before retrying a SETCLIENTID(_CONFIRM) op */
  77 #define NFS4_RETRY_SCLID_DELAY  10
  78 
  79 /* Number of times we should retry a SETCLIENTID(_CONFIRM) op */
  80 #define NFS4_NUM_SCLID_RETRIES  3
  81 
  82 /* Number of times we should retry on open after getting NFS4ERR_BAD_SEQID */
  83 #define NFS4_NUM_RETRY_BAD_SEQID        3
  84 
  85 /*
  86  * Macro to wakeup sleeping async worker threads.
  87  */
  88 #define NFS4_WAKE_ASYNC_WORKER(work_cv) {                               \
  89         if (CV_HAS_WAITERS(&work_cv[NFS4_ASYNC_QUEUE]))             \
  90                 cv_signal(&work_cv[NFS4_ASYNC_QUEUE]);                      \
  91         else if (CV_HAS_WAITERS(&work_cv[NFS4_ASYNC_PGOPS_QUEUE]))  \
  92                 cv_signal(&work_cv[NFS4_ASYNC_PGOPS_QUEUE]);                \
  93 }
  94 
  95 #define NFS4_WAKEALL_ASYNC_WORKERS(work_cv) {                           \
  96                 cv_broadcast(&work_cv[NFS4_ASYNC_QUEUE]);           \
  97                 cv_broadcast(&work_cv[NFS4_ASYNC_PGOPS_QUEUE]);             \
  98 }
  99 
 100 /*
 101  * Is the attribute cache valid?  If client holds a delegation, then attrs
 102  * are by definition valid.  If not, then check to see if attrs have timed out.
 103  */
 104 #define ATTRCACHE4_VALID(vp) (VTOR4(vp)->r_deleg_type != OPEN_DELEGATE_NONE || \
 105         gethrtime() < VTOR4(vp)->r_time_attr_inval)
 106 
 107 /*
 108  * Flags to indicate whether to purge the DNLC for non-directory vnodes
 109  * in a call to nfs_purge_caches.
 110  */
 111 #define NFS4_NOPURGE_DNLC       0
 112 #define NFS4_PURGE_DNLC         1
 113 
 114 /*
 115  * Is cache valid?
 116  * Swap is always valid, if no attributes (attrtime == 0) or
 117  * if mtime matches cached mtime it is valid
 118  * NOTE: mtime is now a timestruc_t.
 119  * Caller should be holding the rnode r_statelock mutex.
 120  */
 121 #define CACHE4_VALID(rp, mtime, fsize)                          \
 122         ((RTOV4(rp)->v_flag & VISSWAP) == VISSWAP ||             \
 123         (((mtime).tv_sec == (rp)->r_attr.va_mtime.tv_sec &&  \
 124         (mtime).tv_nsec == (rp)->r_attr.va_mtime.tv_nsec) && \
 125         ((fsize) == (rp)->r_attr.va_size)))
 126 
 127 /*
 128  * Macro to detect forced unmount or a zone shutdown.
 129  */
 130 #define FS_OR_ZONE_GONE4(vfsp) \
 131         (((vfsp)->vfs_flag & VFS_UNMOUNTED) || \
 132         zone_status_get(curproc->p_zone) >= ZONE_IS_SHUTTING_DOWN)
 133 
 134 /*
 135  * Macro to help determine whether a request failed because the underlying
 136  * filesystem has been forcibly unmounted or because of zone shutdown.
 137  */
 138 #define NFS4_FRC_UNMT_ERR(err, vfsp) \
 139         ((err) == EIO && FS_OR_ZONE_GONE4((vfsp)))
 140 
 141 /*
 142  * Due to the way the address space callbacks are used to execute a delmap,
 143  * we must keep track of how many times the same thread has called
 144  * VOP_DELMAP()->nfs4_delmap().  This is done by having a list of
 145  * nfs4_delmapcall_t's associated with each rnode4_t.  This list is protected
 146  * by the rnode4_t's r_statelock.  The individual elements do not need to be
 147  * protected as they will only ever be created, modified and destroyed by
 148  * one thread (the call_id).
 149  * See nfs4_delmap() for further explanation.
 150  */
 151 typedef struct nfs4_delmapcall {
 152         kthread_t       *call_id;
 153         int             error;  /* error from delmap */
 154         list_node_t     call_node;
 155 } nfs4_delmapcall_t;
 156 
 157 /*
 158  * delmap address space callback args
 159  */
 160 typedef struct nfs4_delmap_args {
 161         vnode_t                 *vp;
 162         offset_t                off;
 163         caddr_t                 addr;
 164         size_t                  len;
 165         uint_t                  prot;
 166         uint_t                  maxprot;
 167         uint_t                  flags;
 168         cred_t                  *cr;
 169         nfs4_delmapcall_t       *caller; /* to retrieve errors from the cb */
 170 } nfs4_delmap_args_t;
 171 
 172 /*
 173  * client side statistics
 174  */
 175 /*
 176  * Per-zone counters
 177  */
 178 struct clstat4 {
 179         kstat_named_t   calls;                  /* client requests */
 180         kstat_named_t   badcalls;               /* rpc failures */
 181         kstat_named_t   referrals;              /* referrals */
 182         kstat_named_t   referlinks;             /* referrals as symlinks */
 183         kstat_named_t   clgets;                 /* client handle gets */
 184         kstat_named_t   cltoomany;              /* client handle cache misses */
 185 #ifdef DEBUG
 186         kstat_named_t   clalloc;                /* number of client handles */
 187         kstat_named_t   noresponse;             /* server not responding cnt */
 188         kstat_named_t   failover;               /* server failover count */
 189         kstat_named_t   remap;                  /* server remap count */
 190 #endif
 191 };
 192 
 193 #ifdef DEBUG
 194 /*
 195  * The following are statistics that describe the behavior of the system as a
 196  * whole and don't correspond to any particular zone.
 197  */
 198 struct clstat4_debug {
 199         kstat_named_t   nrnode;                 /* number of allocated rnodes */
 200         kstat_named_t   access;                 /* size of access cache */
 201         kstat_named_t   dirent;                 /* size of readdir cache */
 202         kstat_named_t   dirents;                /* size of readdir buf cache */
 203         kstat_named_t   reclaim;                /* number of reclaims */
 204         kstat_named_t   clreclaim;              /* number of cl reclaims */
 205         kstat_named_t   f_reclaim;              /* number of free reclaims */
 206         kstat_named_t   a_reclaim;              /* number of active reclaims */
 207         kstat_named_t   r_reclaim;              /* number of rnode reclaims */
 208         kstat_named_t   rpath;                  /* bytes used to store rpaths */
 209 };
 210 extern struct clstat4_debug clstat4_debug;
 211 
 212 #endif
 213 
 214 /*
 215  * The NFS specific async_reqs structure. iotype4 is grouped to support two
 216  * types of async thread pools, please read comments section of mntinfo4_t
 217  * definition for more information. Care should be taken while adding new
 218  * members to this group.
 219  */
 220 
 221 enum iotype4 {
 222         NFS4_PUTAPAGE,
 223         NFS4_PAGEIO,
 224         NFS4_COMMIT,
 225         NFS4_READ_AHEAD,
 226         NFS4_READDIR,
 227         NFS4_INACTIVE,
 228         NFS4_ASYNC_TYPES
 229 };
 230 #define NFS4_ASYNC_PGOPS_TYPES  (NFS4_COMMIT + 1)
 231 
 232 /*
 233  * NFS async requests queue type.
 234  */
 235 enum ioqtype4 {
 236         NFS4_ASYNC_QUEUE,
 237         NFS4_ASYNC_PGOPS_QUEUE,
 238         NFS4_MAX_ASYNC_QUEUES
 239 };
 240 
 241 /*
 242  * Number of NFS async threads operating exclusively on page op requests.
 243  */
 244 #define NUM_ASYNC_PGOPS_THREADS 0x2
 245 
 246 struct nfs4_async_read_req {
 247         void (*readahead)();            /* pointer to readahead function */
 248         u_offset_t blkoff;              /* offset in file */
 249         struct seg *seg;                /* segment to do i/o to */
 250         caddr_t addr;                   /* address to do i/o to */
 251 };
 252 
 253 struct nfs4_pageio_req {
 254         int (*pageio)();                /* pointer to pageio function */
 255         page_t *pp;                     /* page list */
 256         u_offset_t io_off;              /* offset in file */
 257         uint_t io_len;                  /* size of request */
 258         int flags;
 259 };
 260 
 261 struct nfs4_readdir_req {
 262         int (*readdir)();               /* pointer to readdir function */
 263         struct rddir4_cache *rdc;       /* pointer to cache entry to fill */
 264 };
 265 
 266 struct nfs4_commit_req {
 267         void (*commit)();               /* pointer to commit function */
 268         page_t *plist;                  /* page list */
 269         offset4 offset;                 /* starting offset */
 270         count4 count;                   /* size of range to be commited */
 271 };
 272 
 273 struct nfs4_async_reqs {
 274         struct nfs4_async_reqs *a_next; /* pointer to next arg struct */
 275 #ifdef DEBUG
 276         kthread_t *a_queuer;            /* thread id of queueing thread */
 277 #endif
 278         struct vnode *a_vp;             /* vnode pointer */
 279         struct cred *a_cred;            /* cred pointer */
 280         enum iotype4 a_io;              /* i/o type */
 281         union {
 282                 struct nfs4_async_read_req a_read_args;
 283                 struct nfs4_pageio_req a_pageio_args;
 284                 struct nfs4_readdir_req a_readdir_args;
 285                 struct nfs4_commit_req a_commit_args;
 286         } a_args;
 287 };
 288 
 289 #define a_nfs4_readahead a_args.a_read_args.readahead
 290 #define a_nfs4_blkoff a_args.a_read_args.blkoff
 291 #define a_nfs4_seg a_args.a_read_args.seg
 292 #define a_nfs4_addr a_args.a_read_args.addr
 293 
 294 #define a_nfs4_putapage a_args.a_pageio_args.pageio
 295 #define a_nfs4_pageio a_args.a_pageio_args.pageio
 296 #define a_nfs4_pp a_args.a_pageio_args.pp
 297 #define a_nfs4_off a_args.a_pageio_args.io_off
 298 #define a_nfs4_len a_args.a_pageio_args.io_len
 299 #define a_nfs4_flags a_args.a_pageio_args.flags
 300 
 301 #define a_nfs4_readdir a_args.a_readdir_args.readdir
 302 #define a_nfs4_rdc a_args.a_readdir_args.rdc
 303 
 304 #define a_nfs4_commit a_args.a_commit_args.commit
 305 #define a_nfs4_plist a_args.a_commit_args.plist
 306 #define a_nfs4_offset a_args.a_commit_args.offset
 307 #define a_nfs4_count a_args.a_commit_args.count
 308 
 309 /*
 310  * Security information
 311  */
 312 typedef struct sv_secinfo {
 313         uint_t          count;  /* how many sdata there are */
 314         uint_t          index;  /* which sdata[index] */
 315         struct sec_data *sdata;
 316 } sv_secinfo_t;
 317 
 318 /*
 319  * Hash bucket for the mi's open owner list (mi_oo_list).
 320  */
 321 typedef struct nfs4_oo_hash_bucket {
 322         list_t                  b_oo_hash_list;
 323         kmutex_t                b_lock;
 324 } nfs4_oo_hash_bucket_t;
 325 
 326 /*
 327  * Global array of ctags.
 328  */
 329 extern ctag_t nfs4_ctags[];
 330 
 331 typedef enum nfs4_tag_type {
 332         TAG_NONE,
 333         TAG_ACCESS,
 334         TAG_CLOSE,
 335         TAG_CLOSE_LOST,
 336         TAG_CLOSE_UNDO,
 337         TAG_COMMIT,
 338         TAG_DELEGRETURN,
 339         TAG_FSINFO,
 340         TAG_GET_SYMLINK,
 341         TAG_GETATTR,
 342         TAG_GETATTR_FSLOCATION,
 343         TAG_INACTIVE,
 344         TAG_LINK,
 345         TAG_LOCK,
 346         TAG_LOCK_RECLAIM,
 347         TAG_LOCK_RESEND,
 348         TAG_LOCK_REINSTATE,
 349         TAG_LOCK_UNKNOWN,
 350         TAG_LOCKT,
 351         TAG_LOCKU,
 352         TAG_LOCKU_RESEND,
 353         TAG_LOCKU_REINSTATE,
 354         TAG_LOOKUP,
 355         TAG_LOOKUP_PARENT,
 356         TAG_LOOKUP_VALID,
 357         TAG_LOOKUP_VPARENT,
 358         TAG_MKDIR,
 359         TAG_MKNOD,
 360         TAG_MOUNT,
 361         TAG_OPEN,
 362         TAG_OPEN_CONFIRM,
 363         TAG_OPEN_CONFIRM_LOST,
 364         TAG_OPEN_DG,
 365         TAG_OPEN_DG_LOST,
 366         TAG_OPEN_LOST,
 367         TAG_OPENATTR,
 368         TAG_PATHCONF,
 369         TAG_PUTROOTFH,
 370         TAG_READ,
 371         TAG_READAHEAD,
 372         TAG_READDIR,
 373         TAG_READLINK,
 374         TAG_RELOCK,
 375         TAG_REMAP_LOOKUP,
 376         TAG_REMAP_LOOKUP_AD,
 377         TAG_REMAP_LOOKUP_NA,
 378         TAG_REMAP_MOUNT,
 379         TAG_RMDIR,
 380         TAG_REMOVE,
 381         TAG_RENAME,
 382         TAG_RENAME_VFH,
 383         TAG_RENEW,
 384         TAG_REOPEN,
 385         TAG_REOPEN_LOST,
 386         TAG_SECINFO,
 387         TAG_SETATTR,
 388         TAG_SETCLIENTID,
 389         TAG_SETCLIENTID_CF,
 390         TAG_SYMLINK,
 391         TAG_WRITE
 392 } nfs4_tag_type_t;
 393 
 394 #define NFS4_TAG_INITIALIZER    {                               \
 395                 {TAG_NONE,              "",                     \
 396                         {0x20202020, 0x20202020, 0x20202020}},  \
 397                 {TAG_ACCESS,            "access",               \
 398                         {0x61636365, 0x73732020, 0x20202020}},  \
 399                 {TAG_CLOSE,             "close",                \
 400                         {0x636c6f73, 0x65202020, 0x20202020}},  \
 401                 {TAG_CLOSE_LOST,        "lost close",           \
 402                         {0x6c6f7374, 0x20636c6f, 0x73652020}},  \
 403                 {TAG_CLOSE_UNDO,        "undo close",           \
 404                         {0x756e646f, 0x20636c6f, 0x73652020}},  \
 405                 {TAG_COMMIT,            "commit",               \
 406                         {0x636f6d6d, 0x69742020, 0x20202020}},  \
 407                 {TAG_DELEGRETURN,       "delegreturn",          \
 408                         {0x64656c65, 0x67726574, 0x75726e20}},  \
 409                 {TAG_FSINFO,            "fsinfo",               \
 410                         {0x6673696e, 0x666f2020, 0x20202020}},  \
 411                 {TAG_GET_SYMLINK,       "get symlink text",     \
 412                         {0x67657420, 0x736c6e6b, 0x20747874}},  \
 413                 {TAG_GETATTR,           "getattr",              \
 414                         {0x67657461, 0x74747220, 0x20202020}},  \
 415                 {TAG_GETATTR_FSLOCATION, "getattr fslocation",  \
 416                         {0x67657461, 0x74747220, 0x66736c6f}},  \
 417                 {TAG_INACTIVE,          "inactive",             \
 418                         {0x696e6163, 0x74697665, 0x20202020}},  \
 419                 {TAG_LINK,              "link",                 \
 420                         {0x6c696e6b, 0x20202020, 0x20202020}},  \
 421                 {TAG_LOCK,              "lock",                 \
 422                         {0x6c6f636b, 0x20202020, 0x20202020}},  \
 423                 {TAG_LOCK_RECLAIM,      "reclaim lock",         \
 424                         {0x7265636c, 0x61696d20, 0x6c6f636b}},  \
 425                 {TAG_LOCK_RESEND,       "resend lock",          \
 426                         {0x72657365, 0x6e64206c, 0x6f636b20}},  \
 427                 {TAG_LOCK_REINSTATE,    "reinstate lock",       \
 428                         {0x7265696e, 0x7374206c, 0x6f636b20}},  \
 429                 {TAG_LOCK_UNKNOWN,      "unknown lock",         \
 430                         {0x756e6b6e, 0x6f776e20, 0x6c6f636b}},  \
 431                 {TAG_LOCKT,             "lock test",            \
 432                         {0x6c6f636b, 0x5f746573, 0x74202020}},  \
 433                 {TAG_LOCKU,             "unlock",               \
 434                         {0x756e6c6f, 0x636b2020, 0x20202020}},  \
 435                 {TAG_LOCKU_RESEND,      "resend locku",         \
 436                         {0x72657365, 0x6e64206c, 0x6f636b75}},  \
 437                 {TAG_LOCKU_REINSTATE,   "reinstate unlock",     \
 438                         {0x7265696e, 0x73742075, 0x6e6c636b}},  \
 439                 {TAG_LOOKUP,            "lookup",               \
 440                         {0x6c6f6f6b, 0x75702020, 0x20202020}},  \
 441                 {TAG_LOOKUP_PARENT,     "lookup parent",        \
 442                         {0x6c6f6f6b, 0x75702070, 0x6172656e}},  \
 443                 {TAG_LOOKUP_VALID,      "lookup valid",         \
 444                         {0x6c6f6f6b, 0x75702076, 0x616c6964}},  \
 445                 {TAG_LOOKUP_VPARENT,    "lookup valid parent",  \
 446                         {0x6c6f6f6b, 0x766c6420, 0x7061726e}},  \
 447                 {TAG_MKDIR,             "mkdir",                \
 448                         {0x6d6b6469, 0x72202020, 0x20202020}},  \
 449                 {TAG_MKNOD,             "mknod",                \
 450                         {0x6d6b6e6f, 0x64202020, 0x20202020}},  \
 451                 {TAG_MOUNT,             "mount",                \
 452                         {0x6d6f756e, 0x74202020, 0x20202020}},  \
 453                 {TAG_OPEN,              "open",                 \
 454                         {0x6f70656e, 0x20202020, 0x20202020}},  \
 455                 {TAG_OPEN_CONFIRM,      "open confirm",         \
 456                         {0x6f70656e, 0x5f636f6e, 0x6669726d}},  \
 457                 {TAG_OPEN_CONFIRM_LOST, "lost open confirm",    \
 458                         {0x6c6f7374, 0x206f7065, 0x6e5f636f}},  \
 459                 {TAG_OPEN_DG,           "open downgrade",       \
 460                         {0x6f70656e, 0x20646772, 0x61646520}},  \
 461                 {TAG_OPEN_DG_LOST,      "lost open downgrade",  \
 462                         {0x6c737420, 0x6f70656e, 0x20646772}},  \
 463                 {TAG_OPEN_LOST,         "lost open",            \
 464                         {0x6c6f7374, 0x206f7065, 0x6e202020}},  \
 465                 {TAG_OPENATTR,          "openattr",             \
 466                         {0x6f70656e, 0x61747472, 0x20202020}},  \
 467                 {TAG_PATHCONF,          "pathhconf",            \
 468                         {0x70617468, 0x636f6e66, 0x20202020}},  \
 469                 {TAG_PUTROOTFH,         "putrootfh",            \
 470                         {0x70757472, 0x6f6f7466, 0x68202020}},  \
 471                 {TAG_READ,              "read",                 \
 472                         {0x72656164, 0x20202020, 0x20202020}},  \
 473                 {TAG_READAHEAD,         "readahead",            \
 474                         {0x72656164, 0x61686561, 0x64202020}},  \
 475                 {TAG_READDIR,           "readdir",              \
 476                         {0x72656164, 0x64697220, 0x20202020}},  \
 477                 {TAG_READLINK,          "readlink",             \
 478                         {0x72656164, 0x6c696e6b, 0x20202020}},  \
 479                 {TAG_RELOCK,            "relock",               \
 480                         {0x72656c6f, 0x636b2020, 0x20202020}},  \
 481                 {TAG_REMAP_LOOKUP,      "remap lookup",         \
 482                         {0x72656d61, 0x70206c6f, 0x6f6b7570}},  \
 483                 {TAG_REMAP_LOOKUP_AD,   "remap lookup attr dir",        \
 484                         {0x72656d70, 0x206c6b75, 0x70206164}},  \
 485                 {TAG_REMAP_LOOKUP_NA,   "remap lookup named attrs",     \
 486                         {0x72656d70, 0x206c6b75, 0x70206e61}},  \
 487                 {TAG_REMAP_MOUNT,       "remap mount",          \
 488                         {0x72656d61, 0x70206d6f, 0x756e7420}},  \
 489                 {TAG_RMDIR,             "rmdir",                \
 490                         {0x726d6469, 0x72202020, 0x20202020}},  \
 491                 {TAG_REMOVE,            "remove",               \
 492                         {0x72656d6f, 0x76652020, 0x20202020}},  \
 493                 {TAG_RENAME,            "rename",               \
 494                         {0x72656e61, 0x6d652020, 0x20202020}},  \
 495                 {TAG_RENAME_VFH,        "rename volatile fh",   \
 496                         {0x72656e61, 0x6d652028, 0x76666829}},  \
 497                 {TAG_RENEW,             "renew",                \
 498                         {0x72656e65, 0x77202020, 0x20202020}},  \
 499                 {TAG_REOPEN,            "reopen",               \
 500                         {0x72656f70, 0x656e2020, 0x20202020}},  \
 501                 {TAG_REOPEN_LOST,       "lost reopen",          \
 502                         {0x6c6f7374, 0x2072656f, 0x70656e20}},  \
 503                 {TAG_SECINFO,           "secinfo",              \
 504                         {0x73656369, 0x6e666f20, 0x20202020}},  \
 505                 {TAG_SETATTR,           "setattr",              \
 506                         {0x73657461, 0x74747220, 0x20202020}},  \
 507                 {TAG_SETCLIENTID,       "setclientid",          \
 508                         {0x73657463, 0x6c69656e, 0x74696420}},  \
 509                 {TAG_SETCLIENTID_CF,    "setclientid_confirm",  \
 510                         {0x73636c6e, 0x7469645f, 0x636f6e66}},  \
 511                 {TAG_SYMLINK,           "symlink",              \
 512                         {0x73796d6c, 0x696e6b20, 0x20202020}},  \
 513                 {TAG_WRITE,             "write",                \
 514                         {0x77726974, 0x65202020, 0x20202020}}   \
 515         }
 516 
 517 /*
 518  * These flags are for differentiating the search criterian for
 519  * find_open_owner().  The comparison is done with the open_owners's
 520  * 'oo_just_created' flag.
 521  */
 522 #define NFS4_PERM_CREATED       0x0
 523 #define NFS4_JUST_CREATED       0x1
 524 
 525 /*
 526  * Hashed by the cr_uid and cr_ruid of credential 'oo_cred'. 'oo_cred_otw'
 527  * is stored upon a successful OPEN.  This is needed when the user's effective
 528  * and real uid's don't match.  The 'oo_cred_otw' overrides the credential
 529  * passed down by VFS for async read/write, commit, lock, and close operations.
 530  *
 531  * The oo_ref_count keeps track the number of active references on this
 532  * data structure + number of nfs4_open_streams point to this structure.
 533  *
 534  * 'oo_valid' tells whether this stuct is about to be freed or not.
 535  *
 536  * 'oo_just_created' tells us whether this struct has just been created but
 537  * not been fully finalized (that is created upon an OPEN request and
 538  * finalized upon the OPEN success).
 539  *
 540  * The 'oo_seqid_inuse' is for the open seqid synchronization.  If a thread
 541  * is currently using the open owner and it's open_seqid, then it sets the
 542  * oo_seqid_inuse to true if it currently is not set.  If it is set then it
 543  * does a cv_wait on the oo_cv_seqid_sync condition variable.  When the thread
 544  * is done it unsets the oo_seqid_inuse and does a cv_signal to wake a process
 545  * waiting on the condition variable.
 546  *
 547  * 'oo_last_good_seqid' is the last valid seqid this open owner sent OTW,
 548  * and 'oo_last_good_op' is the operation that issued the last valid seqid.
 549  *
 550  * Lock ordering:
 551  *      mntinfo4_t::mi_lock > oo_lock (for searching mi_oo_list)
 552  *
 553  *      oo_seqid_inuse > mntinfo4_t::mi_lock
 554  *      oo_seqid_inuse > rnode4_t::r_statelock
 555  *      oo_seqid_inuse > rnode4_t::r_statev4_lock
 556  *      oo_seqid_inuse > nfs4_open_stream_t::os_sync_lock
 557  *
 558  * The 'oo_seqid_inuse'/'oo_cv_seqid_sync' protects:
 559  *      oo_last_good_op
 560  *      oo_last_good_seqid
 561  *      oo_name
 562  *      oo_seqid
 563  *
 564  * The 'oo_lock' protects:
 565  *      oo_cred
 566  *      oo_cred_otw
 567  *      oo_foo_node
 568  *      oo_hash_node
 569  *      oo_just_created
 570  *      oo_ref_count
 571  *      oo_valid
 572  */
 573 
 574 typedef struct nfs4_open_owner {
 575         cred_t                  *oo_cred;
 576         int                     oo_ref_count;
 577         int                     oo_valid;
 578         int                     oo_just_created;
 579         seqid4                  oo_seqid;
 580         seqid4                  oo_last_good_seqid;
 581         nfs4_tag_type_t         oo_last_good_op;
 582         unsigned                oo_seqid_inuse:1;
 583         cred_t                  *oo_cred_otw;
 584         kcondvar_t              oo_cv_seqid_sync;
 585         /*
 586          * Fix this to always be 8 bytes
 587          */
 588         uint64_t                oo_name;
 589         list_node_t             oo_hash_node;
 590         list_node_t             oo_foo_node;
 591         kmutex_t                oo_lock;
 592 } nfs4_open_owner_t;
 593 
 594 /*
 595  * Static server information.
 596  * These fields are read-only once they are initialized; sv_lock
 597  * should be held as writer if they are changed during mount:
 598  *      sv_addr
 599  *      sv_dhsec
 600  *      sv_hostname
 601  *      sv_hostnamelen
 602  *      sv_knconf
 603  *      sv_next
 604  *      sv_origknconf
 605  *
 606  * These fields are protected by sv_lock:
 607  *      sv_currsec
 608  *      sv_fhandle
 609  *      sv_flags
 610  *      sv_fsid
 611  *      sv_path
 612  *      sv_pathlen
 613  *      sv_pfhandle
 614  *      sv_save_secinfo
 615  *      sv_savesec
 616  *      sv_secdata
 617  *      sv_secinfo
 618  *      sv_supp_attrs
 619  *
 620  * Lock ordering:
 621  * nfs_rtable4_lock > sv_lock
 622  * rnode4_t::r_statelock > sv_lock
 623  */
 624 typedef struct servinfo4 {
 625         struct knetconfig *sv_knconf;   /* bound TLI fd */
 626         struct knetconfig *sv_origknconf;       /* For RDMA save orig knconf */
 627         struct netbuf      sv_addr;     /* server's address */
 628         nfs4_fhandle_t     sv_fhandle;  /* this server's filehandle */
 629         nfs4_fhandle_t     sv_pfhandle; /* parent dir filehandle */
 630         int                sv_pathlen;  /* Length of server path */
 631         char              *sv_path;     /* Path name on server */
 632         uint32_t           sv_flags;    /* flags for this server */
 633         sec_data_t        *sv_secdata;  /* client initiated security data */
 634         sv_secinfo_t      *sv_secinfo;  /* server security information */
 635         sec_data_t        *sv_currsec;  /* security data currently used; */
 636                                         /* points to one of the sec_data */
 637                                         /* entries in sv_secinfo */
 638         sv_secinfo_t      *sv_save_secinfo; /* saved secinfo */
 639         sec_data_t        *sv_savesec;  /* saved security data */
 640         sec_data_t        *sv_dhsec;    /* AUTH_DH data from the user land */
 641         char              *sv_hostname; /* server's hostname */
 642         int                sv_hostnamelen;  /* server's hostname length */
 643         fattr4_fsid             sv_fsid;    /* fsid of shared obj       */
 644         fattr4_supported_attrs  sv_supp_attrs;
 645         struct servinfo4  *sv_next;     /* next in list */
 646         nfs_rwlock_t       sv_lock;
 647 } servinfo4_t;
 648 
 649 /* sv_flags fields */
 650 #define SV4_TRYSECINFO          0x001   /* try secinfo data from the server */
 651 #define SV4_TRYSECDEFAULT       0x002   /* try a default flavor */
 652 #define SV4_NOTINUSE            0x004   /* servinfo4_t had fatal errors */
 653 #define SV4_ROOT_STALE          0x008   /* root vnode got ESTALE */
 654 
 655 /*
 656  * Lock call types.  See nfs4frlock().
 657  */
 658 typedef enum nfs4_lock_call_type {
 659         NFS4_LCK_CTYPE_NORM,
 660         NFS4_LCK_CTYPE_RECLAIM,
 661         NFS4_LCK_CTYPE_RESEND,
 662         NFS4_LCK_CTYPE_REINSTATE
 663 } nfs4_lock_call_type_t;
 664 
 665 /*
 666  * This structure holds the information for a lost open/close/open downgrade/
 667  * lock/locku request.  It is also used for requests that are queued up so
 668  * that the recovery thread can release server state after a forced
 669  * unmount.
 670  * "lr_op" is 0 if the struct is uninitialized.  Otherwise, it is set to
 671  * the proper OP_* nfs_opnum4 number.  The other fields contain information
 672  * to reconstruct the call.
 673  *
 674  * lr_dvp is used for OPENs with CREATE, so that we can do a PUTFH of the
 675  * parent directroy without relying on vtodv (since we may not have a vp
 676  * for the file we wish to create).
 677  *
 678  * lr_putfirst means that the request should go to the front of the resend
 679  * queue, rather than the end.
 680  */
 681 typedef struct nfs4_lost_rqst {
 682         list_node_t                     lr_node;
 683         nfs_opnum4                      lr_op;
 684         vnode_t                         *lr_vp;
 685         vnode_t                         *lr_dvp;
 686         nfs4_open_owner_t               *lr_oop;
 687         struct nfs4_open_stream         *lr_osp;
 688         struct nfs4_lock_owner          *lr_lop;
 689         cred_t                          *lr_cr;
 690         flock64_t                       *lr_flk;
 691         bool_t                          lr_putfirst;
 692         union {
 693                 struct {
 694                         nfs4_lock_call_type_t lru_ctype;
 695                         nfs_lock_type4  lru_locktype;
 696                 } lru_lockargs;         /* LOCK, LOCKU */
 697                 struct {
 698                         uint32_t                lru_oaccess;
 699                         uint32_t                lru_odeny;
 700                         enum open_claim_type4   lru_oclaim;
 701                         stateid4                lru_ostateid; /* reopen only */
 702                         component4              lru_ofile;
 703                 } lru_open_args;
 704                 struct {
 705                         uint32_t        lru_dg_access;
 706                         uint32_t        lru_dg_deny;
 707                 } lru_open_dg_args;
 708         } nfs4_lr_u;
 709 } nfs4_lost_rqst_t;
 710 
 711 #define lr_oacc         nfs4_lr_u.lru_open_args.lru_oaccess
 712 #define lr_odeny        nfs4_lr_u.lru_open_args.lru_odeny
 713 #define lr_oclaim       nfs4_lr_u.lru_open_args.lru_oclaim
 714 #define lr_ostateid     nfs4_lr_u.lru_open_args.lru_ostateid
 715 #define lr_ofile        nfs4_lr_u.lru_open_args.lru_ofile
 716 #define lr_dg_acc       nfs4_lr_u.lru_open_dg_args.lru_dg_access
 717 #define lr_dg_deny      nfs4_lr_u.lru_open_dg_args.lru_dg_deny
 718 #define lr_ctype        nfs4_lr_u.lru_lockargs.lru_ctype
 719 #define lr_locktype     nfs4_lr_u.lru_lockargs.lru_locktype
 720 
 721 /*
 722  * Recovery actions.  Some actions can imply further recovery using a
 723  * different recovery action (e.g., recovering the clientid leads to
 724  * recovering open files and locks).
 725  */
 726 
 727 typedef enum {
 728         NR_UNUSED,
 729         NR_CLIENTID,
 730         NR_OPENFILES,
 731         NR_FHEXPIRED,
 732         NR_FAILOVER,
 733         NR_WRONGSEC,
 734         NR_EXPIRED,
 735         NR_BAD_STATEID,
 736         NR_BADHANDLE,
 737         NR_BAD_SEQID,
 738         NR_OLDSTATEID,
 739         NR_GRACE,
 740         NR_DELAY,
 741         NR_LOST_LOCK,
 742         NR_LOST_STATE_RQST,
 743         NR_STALE,
 744         NR_MOVED
 745 } nfs4_recov_t;
 746 
 747 /*
 748  * Administrative and debug message framework.
 749  */
 750 
 751 #define NFS4_MSG_MAX    100
 752 extern int nfs4_msg_max;
 753 
 754 #define NFS4_REFERRAL_LOOP_MAX  20
 755 
 756 typedef enum {
 757         RE_BAD_SEQID,
 758         RE_BADHANDLE,
 759         RE_CLIENTID,
 760         RE_DEAD_FILE,
 761         RE_END,
 762         RE_FAIL_RELOCK,
 763         RE_FAIL_REMAP_LEN,
 764         RE_FAIL_REMAP_OP,
 765         RE_FAILOVER,
 766         RE_FILE_DIFF,
 767         RE_LOST_STATE,
 768         RE_OPENS_CHANGED,
 769         RE_SIGLOST,
 770         RE_SIGLOST_NO_DUMP,
 771         RE_START,
 772         RE_UNEXPECTED_ACTION,
 773         RE_UNEXPECTED_ERRNO,
 774         RE_UNEXPECTED_STATUS,
 775         RE_WRONGSEC,
 776         RE_LOST_STATE_BAD_OP,
 777         RE_REFERRAL
 778 } nfs4_event_type_t;
 779 
 780 typedef enum {
 781         RFS_NO_INSPECT,
 782         RFS_INSPECT
 783 } nfs4_fact_status_t;
 784 
 785 typedef enum {
 786         RF_BADOWNER,
 787         RF_ERR,
 788         RF_RENEW_EXPIRED,
 789         RF_SRV_NOT_RESPOND,
 790         RF_SRV_OK,
 791         RF_SRVS_NOT_RESPOND,
 792         RF_SRVS_OK,
 793         RF_DELMAP_CB_ERR,
 794         RF_SENDQ_FULL
 795 } nfs4_fact_type_t;
 796 
 797 typedef enum {
 798         NFS4_MS_DUMP,
 799         NFS4_MS_NO_DUMP
 800 } nfs4_msg_status_t;
 801 
 802 typedef struct nfs4_rfact {
 803         nfs4_fact_type_t        rf_type;
 804         nfs4_fact_status_t      rf_status;
 805         bool_t                  rf_reboot;
 806         nfs4_recov_t            rf_action;
 807         nfs_opnum4              rf_op;
 808         nfsstat4                rf_stat4;
 809         timespec_t              rf_time;
 810         int                     rf_error;
 811         struct rnode4           *rf_rp1;
 812         char                    *rf_char1;
 813 } nfs4_rfact_t;
 814 
 815 typedef struct nfs4_revent {
 816         nfs4_event_type_t       re_type;
 817         nfsstat4                re_stat4;
 818         uint_t                  re_uint;
 819         pid_t                   re_pid;
 820         struct mntinfo4         *re_mi;
 821         struct rnode4           *re_rp1;
 822         struct rnode4           *re_rp2;
 823         char                    *re_char1;
 824         char                    *re_char2;
 825         nfs4_tag_type_t         re_tag1;
 826         nfs4_tag_type_t         re_tag2;
 827         seqid4                  re_seqid1;
 828         seqid4                  re_seqid2;
 829 } nfs4_revent_t;
 830 
 831 typedef enum {
 832         RM_EVENT,
 833         RM_FACT
 834 } nfs4_msg_type_t;
 835 
 836 typedef struct nfs4_debug_msg {
 837         timespec_t              msg_time;
 838         nfs4_msg_type_t         msg_type;
 839         char                    *msg_srv;
 840         char                    *msg_mntpt;
 841         union {
 842                 nfs4_rfact_t    msg_fact;
 843                 nfs4_revent_t   msg_event;
 844         } rmsg_u;
 845         nfs4_msg_status_t       msg_status;
 846         list_node_t             msg_node;
 847 } nfs4_debug_msg_t;
 848 
 849 /*
 850  * NFS private data per mounted file system
 851  *      The mi_lock mutex protects the following fields:
 852  *              mi_flags
 853  *              mi_in_recovery
 854  *              mi_recovflags
 855  *              mi_recovthread
 856  *              mi_error
 857  *              mi_printed
 858  *              mi_down
 859  *              mi_stsize
 860  *              mi_curread
 861  *              mi_curwrite
 862  *              mi_timers
 863  *              mi_curr_serv
 864  *              mi_klmconfig
 865  *              mi_oo_list
 866  *              mi_foo_list
 867  *              mi_foo_num
 868  *              mi_foo_max
 869  *              mi_lost_state
 870  *              mi_bseqid_list
 871  *              mi_ephemeral
 872  *              mi_ephemeral_tree
 873  *
 874  *      Normally the netconfig information for the mount comes from
 875  *      mi_curr_serv and mi_klmconfig is NULL.  If NLM calls need to use a
 876  *      different transport, mi_klmconfig contains the necessary netconfig
 877  *      information.
 878  *
 879  *      The mi_async_lock mutex protects the following fields:
 880  *              mi_async_reqs
 881  *              mi_async_req_count
 882  *              mi_async_tail
 883  *              mi_async_curr[NFS4_MAX_ASYNC_QUEUES]
 884  *              mi_async_clusters
 885  *              mi_async_init_clusters
 886  *              mi_threads[NFS4_MAX_ASYNC_QUEUES]
 887  *              mi_inactive_thread
 888  *              mi_manager_thread
 889  *
 890  *      The nfs4_server_t::s_lock protects the following fields:
 891  *              mi_clientid
 892  *              mi_clientid_next
 893  *              mi_clientid_prev
 894  *              mi_open_files
 895  *
 896  *      The mntinfo4_t::mi_recovlock protects the following fields:
 897  *              mi_srvsettime
 898  *              mi_srvset_cnt
 899  *              mi_srv
 900  *
 901  * Changing mi_srv from one nfs4_server_t to a different one requires
 902  * holding the mi_recovlock as RW_WRITER.
 903  * Exception: setting mi_srv the first time in mount/mountroot is done
 904  * holding the mi_recovlock as RW_READER.
 905  *
 906  *      Locking order:
 907  *        mi4_globals::mig_lock > mi_async_lock
 908  *        mi_async_lock > nfs4_server_t::s_lock > mi_lock
 909  *        mi_recovlock > mi_rename_lock > nfs_rtable4_lock
 910  *        nfs4_server_t::s_recovlock > mi_recovlock
 911  *        rnode4_t::r_rwlock > mi_rename_lock
 912  *        nfs_rtable4_lock > mi_lock
 913  *        nfs4_server_t::s_lock > mi_msg_list_lock
 914  *        mi_recovlock > nfs4_server_t::s_lock
 915  *        mi_recovlock > nfs4_server_lst_lock
 916  *
 917  * The 'mi_oo_list' represents the hash buckets that contain the
 918  * nfs4_open_owenrs for this particular mntinfo4.
 919  *
 920  * The 'mi_foo_list' represents the freed nfs4_open_owners for this mntinfo4.
 921  * 'mi_foo_num' is the current number of freed open owners on the list,
 922  * 'mi_foo_max' is the maximum number of freed open owners that are allowable
 923  * on the list.
 924  *
 925  * mi_rootfh and mi_srvparentfh are read-only once created, but that just
 926  * refers to the pointer.  The contents must be updated to keep in sync
 927  * with mi_curr_serv.
 928  *
 929  * The mi_msg_list_lock protects against adding/deleting entries to the
 930  * mi_msg_list, and also the updating/retrieving of mi_lease_period;
 931  *
 932  * 'mi_zone' is initialized at structure creation time, and never
 933  * changes; it may be read without a lock.
 934  *
 935  * mi_zone_node is linkage into the mi4_globals.mig_list, and is
 936  * protected by mi4_globals.mig_list_lock.
 937  *
 938  * If MI4_EPHEMERAL is set in mi_flags, then mi_ephemeral points to an
 939  * ephemeral structure for this ephemeral mount point. It can not be
 940  * NULL. Also, mi_ephemeral_tree points to the root of the ephemeral
 941  * tree.
 942  *
 943  * If MI4_EPHEMERAL is not set in mi_flags, then mi_ephemeral has
 944  * to be NULL. If mi_ephemeral_tree is non-NULL, then this node
 945  * is the enclosing mntinfo4 for the ephemeral tree.
 946  */
 947 struct zone;
 948 struct nfs4_ephemeral;
 949 struct nfs4_ephemeral_tree;
 950 struct nfs4_server;
 951 typedef struct mntinfo4 {
 952         kmutex_t        mi_lock;        /* protects mntinfo4 fields */
 953         struct servinfo4 *mi_servers;   /* server list */
 954         struct servinfo4 *mi_curr_serv; /* current server */
 955         struct nfs4_sharedfh *mi_rootfh; /* root filehandle */
 956         struct nfs4_sharedfh *mi_srvparentfh; /* root's parent on server */
 957         kcondvar_t      mi_failover_cv; /* failover synchronization */
 958         struct vfs      *mi_vfsp;       /* back pointer to vfs */
 959         enum vtype      mi_type;        /* file type of the root vnode */
 960         uint_t          mi_flags;       /* see below */
 961         uint_t          mi_recovflags;  /* if recovery active; see below */
 962         kthread_t       *mi_recovthread; /* active recov thread or NULL */
 963         uint_t          mi_error;       /* only set/valid when MI4_RECOV_FAIL */
 964                                         /* is set in mi_flags */
 965         int             mi_tsize;       /* transfer size (bytes) */
 966                                         /* really read size */
 967         int             mi_stsize;      /* server's max transfer size (bytes) */
 968                                         /* really write size */
 969         int             mi_timeo;       /* inital timeout in 10th sec */
 970         int             mi_retrans;     /* times to retry request */
 971         hrtime_t        mi_acregmin;    /* min time to hold cached file attr */
 972         hrtime_t        mi_acregmax;    /* max time to hold cached file attr */
 973         hrtime_t        mi_acdirmin;    /* min time to hold cached dir attr */
 974         hrtime_t        mi_acdirmax;    /* max time to hold cached dir attr */
 975         len_t           mi_maxfilesize; /* for pathconf _PC_FILESIZEBITS */
 976         int             mi_curread;     /* current read size */
 977         int             mi_curwrite;    /* current write size */
 978         uint_t          mi_count;       /* ref count */
 979         /*
 980          * Async I/O management
 981          * We have 2 pools of threads working on async I/O:
 982          *      (1) Threads which work on all async queues. Default number of
 983          *      threads in this queue is 8. Threads in this pool work on async
 984          *      queue pointed by mi_async_curr[NFS4_ASYNC_QUEUE]. Number of
 985          *      active threads in this pool is tracked by
 986          *      mi_threads[NFS4_ASYNC_QUEUE].
 987          *      (ii)Threads which work only on page op async queues.
 988          *      Page ops queue comprises of NFS4_PUTAPAGE, NFS4_PAGEIO &
 989          *      NFS4_COMMIT. Default number of threads in this queue is 2
 990          *      (NUM_ASYNC_PGOPS_THREADS). Threads in this pool work on async
 991          *      queue pointed by mi_async_curr[NFS4_ASYNC_PGOPS_QUEUE]. Number
 992          *      of active threads in this pool is tracked by
 993          *      mi_threads[NFS4_ASYNC_PGOPS_QUEUE].
 994          *
 995          * In addition to above two pools, there is always one thread that
 996          * handles over-the-wire requests for VOP_INACTIVE.
 997          */
 998         struct nfs4_async_reqs *mi_async_reqs[NFS4_ASYNC_TYPES];
 999         struct nfs4_async_reqs *mi_async_tail[NFS4_ASYNC_TYPES];
1000         struct nfs4_async_reqs **mi_async_curr[NFS4_MAX_ASYNC_QUEUES];
1001                                                 /* current async queue */
1002         uint_t          mi_async_clusters[NFS4_ASYNC_TYPES];
1003         uint_t          mi_async_init_clusters;
1004         uint_t          mi_async_req_count; /* # outstanding work requests */
1005         kcondvar_t      mi_async_reqs_cv; /* signaled when there's work */
1006         ushort_t        mi_threads[NFS4_MAX_ASYNC_QUEUES];
1007                                         /* number of active async threads */
1008         ushort_t        mi_max_threads; /* max number of async threads */
1009         kthread_t       *mi_manager_thread; /* async manager thread id */
1010         kthread_t       *mi_inactive_thread; /* inactive thread id */
1011         kcondvar_t      mi_inact_req_cv; /* notify VOP_INACTIVE thread */
1012         kcondvar_t      mi_async_work_cv[NFS4_MAX_ASYNC_QUEUES];
1013                                         /* tell workers to work */
1014         kcondvar_t      mi_async_cv;    /* all pool threads exited */
1015         kmutex_t        mi_async_lock;
1016         /*
1017          * Other stuff
1018          */
1019         struct pathcnf  *mi_pathconf;   /* static pathconf kludge */
1020         rpcprog_t       mi_prog;        /* RPC program number */
1021         rpcvers_t       mi_vers;        /* RPC program version number */
1022         char            **mi_rfsnames;  /* mapping to proc names */
1023         kstat_named_t   *mi_reqs;       /* count of requests */
1024         clock_t         mi_printftime;  /* last error printf time */
1025         nfs_rwlock_t    mi_recovlock;   /* separate ops from recovery (v4) */
1026         time_t          mi_grace_wait;  /* non-zero represents time to wait */
1027         /* when we switched nfs4_server_t - only for observability purposes */
1028         time_t          mi_srvsettime;
1029         nfs_rwlock_t    mi_rename_lock; /* atomic volfh rename  */
1030         struct nfs4_fname *mi_fname;    /* root fname */
1031         list_t          mi_lost_state;  /* resend list */
1032         list_t          mi_bseqid_list; /* bad seqid list */
1033         /*
1034          * Client Side Failover stats
1035          */
1036         uint_t          mi_noresponse;  /* server not responding count */
1037         uint_t          mi_failover;    /* failover to new server count */
1038         uint_t          mi_remap;       /* remap to new server count */
1039         /*
1040          * Kstat statistics
1041          */
1042         struct kstat    *mi_io_kstats;
1043         struct kstat    *mi_ro_kstats;
1044         kstat_t         *mi_recov_ksp;  /* ptr to the recovery kstat */
1045 
1046         /*
1047          * Volatile fh flags (nfsv4)
1048          */
1049         uint32_t        mi_fh_expire_type;
1050         /*
1051          * Lease Management
1052          */
1053         struct mntinfo4 *mi_clientid_next;
1054         struct mntinfo4 *mi_clientid_prev;
1055         clientid4       mi_clientid; /* redundant info found in nfs4_server */
1056         int             mi_open_files;  /* count of open files */
1057         int             mi_in_recovery; /* count of recovery instances */
1058         kcondvar_t      mi_cv_in_recov; /* cv for recovery threads */
1059         /*
1060          * Open owner stuff.
1061          */
1062         struct nfs4_oo_hash_bucket      mi_oo_list[NFS4_NUM_OO_BUCKETS];
1063         list_t                          mi_foo_list;
1064         int                             mi_foo_num;
1065         int                             mi_foo_max;
1066         /*
1067          * Shared filehandle pool.
1068          */
1069         nfs_rwlock_t                    mi_fh_lock;
1070         avl_tree_t                      mi_filehandles;
1071 
1072         /*
1073          * Debug message queue.
1074          */
1075         list_t                  mi_msg_list;
1076         int                     mi_msg_count;
1077         time_t                  mi_lease_period;
1078                                         /*
1079                                          * not guaranteed to be accurate.
1080                                          * only should be used by debug queue.
1081                                          */
1082         kmutex_t                mi_msg_list_lock;
1083         /*
1084          * Zones support.
1085          */
1086         struct zone     *mi_zone;       /* Zone in which FS is mounted */
1087         zone_ref_t      mi_zone_ref;    /* Reference to aforementioned zone */
1088         list_node_t     mi_zone_node;  /* linkage into per-zone mi list */
1089 
1090         /*
1091          * Links for unmounting ephemeral mounts.
1092          */
1093         struct nfs4_ephemeral           *mi_ephemeral;
1094         struct nfs4_ephemeral_tree      *mi_ephemeral_tree;
1095 
1096         uint_t mi_srvset_cnt; /* increment when changing the nfs4_server_t */
1097         struct nfs4_server *mi_srv; /* backpointer to nfs4_server_t */
1098         /*
1099          * Referral related info.
1100          */
1101         int             mi_vfs_referral_loop_cnt;
1102 } mntinfo4_t;
1103 
1104 /*
1105  * The values for mi_flags.
1106  *
1107  *      MI4_HARD                 hard or soft mount
1108  *      MI4_PRINTED              responding message printed
1109  *      MI4_INT                  allow INTR on hard mount
1110  *      MI4_DOWN                 server is down
1111  *      MI4_NOAC                 don't cache attributes
1112  *      MI4_NOCTO                no close-to-open consistency
1113  *      MI4_LLOCK                local locking only (no lockmgr)
1114  *      MI4_GRPID                System V group id inheritance
1115  *      MI4_SHUTDOWN             System is rebooting or shutting down
1116  *      MI4_LINK                 server supports link
1117  *      MI4_SYMLINK              server supports symlink
1118  *      MI4_EPHEMERAL_RECURSED   an ephemeral mount being unmounted
1119  *                               due to a recursive call - no need
1120  *                               for additional recursion
1121  *      MI4_ACL                  server supports NFSv4 ACLs
1122  *      MI4_MIRRORMOUNT          is a mirrormount
1123  *      MI4_NOPRINT              don't print messages
1124  *      MI4_DIRECTIO             do direct I/O
1125  *      MI4_RECOV_ACTIV          filesystem has recovery a thread
1126  *      MI4_REMOVE_ON_LAST_CLOSE remove from server's list
1127  *      MI4_RECOV_FAIL           client recovery failed
1128  *      MI4_PUBLIC               public/url option used
1129  *      MI4_MOUNTING             mount in progress, don't failover
1130  *      MI4_POSIX_LOCK           if server is using POSIX locking
1131  *      MI4_LOCK_DEBUG           cmn_err'd posix lock err msg
1132  *      MI4_DEAD                 zone has released it
1133  *      MI4_INACTIVE_IDLE        inactive thread idle
1134  *      MI4_BADOWNER_DEBUG       badowner error msg per mount
1135  *      MI4_ASYNC_MGR_STOP       tell async manager to die
1136  *      MI4_TIMEDOUT             saw a timeout during zone shutdown
1137  *      MI4_EPHEMERAL            is an ephemeral mount
1138  */
1139 #define MI4_HARD                 0x1
1140 #define MI4_PRINTED              0x2
1141 #define MI4_INT                  0x4
1142 #define MI4_DOWN                 0x8
1143 #define MI4_NOAC                 0x10
1144 #define MI4_NOCTO                0x20
1145 #define MI4_LLOCK                0x80
1146 #define MI4_GRPID                0x100
1147 #define MI4_SHUTDOWN             0x200
1148 #define MI4_LINK                 0x400
1149 #define MI4_SYMLINK              0x800
1150 #define MI4_EPHEMERAL_RECURSED   0x1000
1151 #define MI4_ACL                  0x2000
1152 /* MI4_MIRRORMOUNT is also defined in nfsstat.c */
1153 #define MI4_MIRRORMOUNT          0x4000
1154 #define MI4_REFERRAL             0x8000
1155 /* 0x10000 is available */
1156 #define MI4_NOPRINT              0x20000
1157 #define MI4_DIRECTIO             0x40000
1158 /* 0x80000 is available */
1159 #define MI4_RECOV_ACTIV          0x100000
1160 #define MI4_REMOVE_ON_LAST_CLOSE 0x200000
1161 #define MI4_RECOV_FAIL           0x400000
1162 #define MI4_PUBLIC               0x800000
1163 #define MI4_MOUNTING             0x1000000
1164 #define MI4_POSIX_LOCK           0x2000000
1165 #define MI4_LOCK_DEBUG           0x4000000
1166 #define MI4_DEAD                 0x8000000
1167 #define MI4_INACTIVE_IDLE        0x10000000
1168 #define MI4_BADOWNER_DEBUG       0x20000000
1169 #define MI4_ASYNC_MGR_STOP       0x40000000
1170 #define MI4_TIMEDOUT             0x80000000
1171 
1172 #define MI4_EPHEMERAL           (MI4_MIRRORMOUNT | MI4_REFERRAL)
1173 
1174 #define INTR4(vp)       (VTOMI4(vp)->mi_flags & MI4_INT)
1175 
1176 #define FAILOVER_MOUNT4(mi)     (mi->mi_servers->sv_next)
1177 
1178 /*
1179  * Recovery flags.
1180  *
1181  * MI4R_NEED_CLIENTID is sort of redundant (it's the nfs4_server_t flag
1182  * that's important), but some flag is needed to indicate that recovery is
1183  * going on for the filesystem.
1184  */
1185 #define MI4R_NEED_CLIENTID      0x1
1186 #define MI4R_REOPEN_FILES       0x2
1187 #define MI4R_NEED_SECINFO       0x4
1188 #define MI4R_NEED_NEW_SERVER    0x8
1189 #define MI4R_REMAP_FILES        0x10
1190 #define MI4R_SRV_REBOOT         0x20    /* server has rebooted */
1191 #define MI4R_LOST_STATE         0x40
1192 #define MI4R_BAD_SEQID          0x80
1193 #define MI4R_MOVED              0x100
1194 
1195 #define MI4_HOLD(mi) {          \
1196         mi_hold(mi);            \
1197 }
1198 
1199 #define MI4_RELE(mi) {          \
1200         mi_rele(mi);            \
1201 }
1202 
1203 /*
1204  * vfs pointer to mount info
1205  */
1206 #define VFTOMI4(vfsp)   ((mntinfo4_t *)((vfsp)->vfs_data))
1207 
1208 /*
1209  * vnode pointer to mount info
1210  */
1211 #define VTOMI4(vp)      ((mntinfo4_t *)(((vp)->v_vfsp)->vfs_data))
1212 
1213 /*
1214  * Lease Management
1215  *
1216  * lease_valid is initially set to NFS4_LEASE_NOT_STARTED.  This is when the
1217  * nfs4_server is first created.  lease_valid is then set to
1218  * NFS4_LEASE_UNITIALIZED when the renew thread is started.  The extra state of
1219  * NFS4_LEASE_NOT_STARTED is needed for client recovery (so we know if a thread
1220  * already exists when we do SETCLIENTID).  lease_valid is then set to
1221  * NFS4_LEASE_VALID (if it is at NFS4_LEASE_UNITIALIZED) when a state creating
1222  * operation (OPEN) is done. lease_valid stays at NFS4_LEASE_VALID as long as
1223  * the lease is renewed.  It is set to NFS4_LEASE_INVALID when the lease
1224  * expires.  Client recovery is needed to set the lease back to
1225  * NFS4_LEASE_VALID from NFS4_LEASE_INVALID.
1226  *
1227  * The s_cred is the credential used to mount the first file system for this
1228  * server.  It used as the credential for the renew thread's calls to the
1229  * server.
1230  *
1231  * The renew thread waits on the condition variable cv_thread_exit.  If the cv
1232  * is signalled, then the thread knows it must check s_thread_exit to see if
1233  * it should exit.  The cv is signaled when the last file system is unmounted
1234  * from a particular server.  s_thread_exit is set to 0 upon thread startup,
1235  * and set to NFS4_THREAD_EXIT, when the last file system is unmounted thereby
1236  * telling the thread to exit.  s_thread_exit is needed to avoid spurious
1237  * wakeups.
1238  *
1239  * state_ref_count is incremented every time a new file is opened and
1240  * decremented every time a file is closed otw.  This keeps track of whether
1241  * the nfs4_server has state associated with it or not.
1242  *
1243  * s_refcnt is the reference count for storage management of the struct
1244  * itself.
1245  *
1246  * mntinfo4_list points to the doubly linked list of mntinfo4s that share
1247  * this nfs4_server (ie: <clientid, saddr> pair) in the current zone.  This is
1248  * needed for a nfs4_server to get a mntinfo4 for use in rfs4call.
1249  *
1250  * s_recovlock is used to synchronize recovery operations.  The thread
1251  * that is recovering the client must acquire it as a writer.  If the
1252  * thread is using the clientid (including recovery operations on other
1253  * state), acquire it as a reader.
1254  *
1255  * The 's_otw_call_count' keeps track of the number of outstanding over the
1256  * wire requests for this structure.  The struct will not go away as long
1257  * as this is non-zero (or s_refcnt is non-zero).
1258  *
1259  * The 's_cv_otw_count' is used in conjuntion with the 's_otw_call_count'
1260  * variable to let the renew thread when an outstanding otw request has
1261  * finished.
1262  *
1263  * 'zoneid' and 'zone_globals' are set at creation of this structure
1264  * and are read-only after that; no lock is required to read them.
1265  *
1266  * s_lock protects: everything except cv_thread_exit and s_recovlock.
1267  *
1268  * s_program is used as the index into the nfs4_callback_globals's
1269  * nfs4prog2server table.  When a callback request comes in, we can
1270  * use that request's program number (minus NFS4_CALLBACK) as an index
1271  * into the nfs4prog2server.  That entry will hold the nfs4_server_t ptr.
1272  * We can then access that nfs4_server_t and its 's_deleg_list' (its list of
1273  * delegated rnode4_ts).
1274  *
1275  * Lock order:
1276  * nfs4_server::s_lock > mntinfo4::mi_lock
1277  * nfs_rtable4_lock > s_lock
1278  * nfs4_server_lst_lock > s_lock
1279  * s_recovlock > s_lock
1280  */
1281 struct nfs4_callback_globals;
1282 
1283 typedef struct nfs4_server {
1284         struct nfs4_server      *forw;
1285         struct nfs4_server      *back;
1286         struct netbuf           saddr;
1287         uint_t                  s_flags; /* see below */
1288         uint_t                  s_refcnt;
1289         clientid4               clientid;       /* what we get from server */
1290         nfs_client_id4          clidtosend;     /* what we send to server */
1291         mntinfo4_t              *mntinfo4_list;
1292         int                     lease_valid;
1293         time_t                  s_lease_time;
1294         time_t                  last_renewal_time;
1295         timespec_t              propagation_delay;
1296         cred_t                  *s_cred;
1297         kcondvar_t              cv_thread_exit;
1298         int                     s_thread_exit;
1299         int                     state_ref_count;
1300         int                     s_otw_call_count;
1301         kcondvar_t              s_cv_otw_count;
1302         kcondvar_t              s_clientid_pend;
1303         kmutex_t                s_lock;
1304         list_t                  s_deleg_list;
1305         rpcprog_t               s_program;
1306         nfs_rwlock_t            s_recovlock;
1307         kcondvar_t              wait_cb_null; /* used to wait for CB_NULL */
1308         zoneid_t                zoneid; /* zone using this nfs4_server_t */
1309         struct nfs4_callback_globals *zone_globals;     /* globals */
1310 } nfs4_server_t;
1311 
1312 /* nfs4_server flags */
1313 #define N4S_CLIENTID_SET        1       /* server has our clientid */
1314 #define N4S_CLIENTID_PEND       0x2     /* server doesn't have clientid */
1315 #define N4S_CB_PINGED           0x4     /* server has sent us a CB_NULL */
1316 #define N4S_CB_WAITER           0x8     /* is/has wait{ing/ed} for cb_null */
1317 #define N4S_INSERTED            0x10    /* list has reference for server */
1318 #define N4S_BADOWNER_DEBUG      0x20    /* bad owner err msg per client */
1319 
1320 #define N4S_CB_PAUSE_TIME       10000   /* Amount of time to pause (10ms) */
1321 
1322 struct lease_time_arg {
1323         time_t  lease_time;
1324 };
1325 
1326 enum nfs4_delegreturn_policy {
1327         IMMEDIATE,
1328         FIRSTCLOSE,
1329         LASTCLOSE,
1330         INACTIVE
1331 };
1332 
1333 /*
1334  * Operation hints for the recovery framework (mostly).
1335  *
1336  * EXCEPTIONS:
1337  * OH_ACCESS, OH_GETACL, OH_GETATTR, OH_LOOKUP, OH_READDIR
1338  *      These hints exist to allow user visit/readdir a R4SRVSTUB dir.
1339  *      (dir represents the root of a server fs that has not yet been
1340  *      mounted at client)
1341  */
1342 typedef enum {
1343         OH_OTHER,
1344         OH_READ,
1345         OH_WRITE,
1346         OH_COMMIT,
1347         OH_VFH_RENAME,
1348         OH_MOUNT,
1349         OH_CLOSE,
1350         OH_LOCKU,
1351         OH_DELEGRETURN,
1352         OH_ACCESS,
1353         OH_GETACL,
1354         OH_GETATTR,
1355         OH_LOOKUP,
1356         OH_READDIR
1357 } nfs4_op_hint_t;
1358 
1359 /*
1360  * This data structure is used to track ephemeral mounts for both
1361  * mirror mounts and referrals.
1362  *
1363  * Note that each nfs4_ephemeral can only have one other nfs4_ephemeral
1364  * pointing at it. So we don't need two backpointers to walk
1365  * back up the tree.
1366  *
1367  * An ephemeral tree is pointed to by an enclosing non-ephemeral
1368  * mntinfo4. The root is also pointed to by its ephemeral
1369  * mntinfo4. ne_child will get us back to it, while ne_prior
1370  * will get us back to the non-ephemeral mntinfo4. This is an
1371  * edge case we will need to be wary of when walking back up the
1372  * tree.
1373  *
1374  * The way we handle this edge case is to have ne_prior be NULL
1375  * for the root nfs4_ephemeral node.
1376  */
1377 typedef struct nfs4_ephemeral {
1378         mntinfo4_t              *ne_mount;      /* who encloses us */
1379         struct nfs4_ephemeral   *ne_child;      /* first child node */
1380         struct nfs4_ephemeral   *ne_peer;       /* next sibling */
1381         struct nfs4_ephemeral   *ne_prior;      /* who points at us */
1382         time_t                  ne_ref_time;    /* time last referenced */
1383         uint_t                  ne_mount_to;    /* timeout at */
1384         int                     ne_state;       /* used to traverse */
1385 } nfs4_ephemeral_t;
1386 
1387 /*
1388  * State for the node (set in ne_state):
1389  */
1390 #define NFS4_EPHEMERAL_OK               0x0
1391 #define NFS4_EPHEMERAL_VISIT_CHILD      0x1
1392 #define NFS4_EPHEMERAL_VISIT_SIBLING    0x2
1393 #define NFS4_EPHEMERAL_PROCESS_ME       0x4
1394 #define NFS4_EPHEMERAL_CHILD_ERROR      0x8
1395 #define NFS4_EPHEMERAL_PEER_ERROR       0x10
1396 
1397 /*
1398  * These are the locks used in processing ephemeral data:
1399  *
1400  * mi->mi_lock
1401  *
1402  * net->net_tree_lock
1403  *     This lock is used to gate all tree operations.
1404  *     If it is held, then no other process may
1405  *     traverse the tree. This allows us to not
1406  *     throw a hold on each vfs_t in the tree.
1407  *     Can be held for a "long" time.
1408  *
1409  * net->net_cnt_lock
1410  *     Used to protect refcnt and status.
1411  *     Must be held for a really short time.
1412  *
1413  * nfs4_ephemeral_thread_lock
1414  *     Is only held to create the harvester for the zone.
1415  *     There is no ordering imposed on it.
1416  *     Held for a really short time.
1417  *
1418  * Some further detail on the interactions:
1419  *
1420  * net_tree_lock controls access to net_root. Access needs to first be
1421  * attempted in a non-blocking check.
1422  *
1423  * net_cnt_lock controls access to net_refcnt and net_status. It must only be
1424  * held for very short periods of time, unless the refcnt is 0 and the status
1425  * is INVALID.
1426  *
1427  * Before a caller can grab net_tree_lock, it must first grab net_cnt_lock
1428  * to bump the net_refcnt. It then releases it and does the action specific
1429  * algorithm to get the net_tree_lock. Once it has that, then it is okay to
1430  * grab the net_cnt_lock and change the status. The status can only be
1431  * changed if the caller has the net_tree_lock held as well.
1432  *
1433  * Note that the initial grab of net_cnt_lock must occur whilst
1434  * mi_lock is being held. This prevents stale data in that if the
1435  * ephemeral tree is non-NULL, then the harvester can not remove
1436  * the tree from the mntinfo node until it grabs that lock. I.e.,
1437  * we get the pointer to the tree and hold the lock atomically
1438  * with respect to being in mi_lock.
1439  *
1440  * When a caller is done with net_tree_lock, it can decrement the net_refcnt
1441  * either before it releases net_tree_lock or after.
1442  *
1443  * In either event, to decrement net_refcnt, it must hold net_cnt_lock.
1444  *
1445  * Note that the overall locking scheme for the nodes is to control access
1446  * via the tree. The current scheme could easily be extended such that
1447  * the enclosing root referenced a "forest" of trees. The underlying trees
1448  * would be autonomous with respect to locks.
1449  *
1450  * Note that net_next is controlled by external locks
1451  * particular to the data structure that the tree is being added to.
1452  */
1453 typedef struct nfs4_ephemeral_tree {
1454         mntinfo4_t                      *net_mount;
1455         nfs4_ephemeral_t                *net_root;
1456         struct nfs4_ephemeral_tree      *net_next;
1457         kmutex_t                        net_tree_lock;
1458         kmutex_t                        net_cnt_lock;
1459         uint_t                          net_status;
1460         uint_t                          net_refcnt;
1461 } nfs4_ephemeral_tree_t;
1462 
1463 /*
1464  * State for the tree (set in net_status):
1465  */
1466 #define NFS4_EPHEMERAL_TREE_OK          0x0
1467 #define NFS4_EPHEMERAL_TREE_BUILDING    0x1
1468 #define NFS4_EPHEMERAL_TREE_DEROOTING   0x2
1469 #define NFS4_EPHEMERAL_TREE_INVALID     0x4
1470 #define NFS4_EPHEMERAL_TREE_MOUNTING    0x8
1471 #define NFS4_EPHEMERAL_TREE_UMOUNTING   0x10
1472 #define NFS4_EPHEMERAL_TREE_LOCKED      0x20
1473 
1474 #define NFS4_EPHEMERAL_TREE_PROCESSING  (NFS4_EPHEMERAL_TREE_DEROOTING | \
1475         NFS4_EPHEMERAL_TREE_INVALID | NFS4_EPHEMERAL_TREE_UMOUNTING | \
1476         NFS4_EPHEMERAL_TREE_LOCKED)
1477 
1478 /*
1479  * This macro evaluates to non-zero if the given op releases state at the
1480  * server.
1481  */
1482 #define OH_IS_STATE_RELE(op)    ((op) == OH_CLOSE || (op) == OH_LOCKU || \
1483                                 (op) == OH_DELEGRETURN)
1484 
1485 #ifdef _KERNEL
1486 
1487 extern void     nfs4_async_manager(struct vfs *);
1488 extern void     nfs4_async_manager_stop(struct vfs *);
1489 extern void     nfs4_async_stop(struct vfs *);
1490 extern int      nfs4_async_stop_sig(struct vfs *);
1491 extern int      nfs4_async_readahead(vnode_t *, u_offset_t, caddr_t,
1492                                 struct seg *, cred_t *,
1493                                 void (*)(vnode_t *, u_offset_t,
1494                                 caddr_t, struct seg *, cred_t *));
1495 extern int      nfs4_async_putapage(vnode_t *, page_t *, u_offset_t, size_t,
1496                                 int, cred_t *, int (*)(vnode_t *, page_t *,
1497                                 u_offset_t, size_t, int, cred_t *));
1498 extern int      nfs4_async_pageio(vnode_t *, page_t *, u_offset_t, size_t,
1499                                 int, cred_t *, int (*)(vnode_t *, page_t *,
1500                                 u_offset_t, size_t, int, cred_t *));
1501 extern void     nfs4_async_commit(vnode_t *, page_t *, offset3, count3,
1502                                 cred_t *, void (*)(vnode_t *, page_t *,
1503                                 offset3, count3, cred_t *));
1504 extern void     nfs4_async_inactive(vnode_t *, cred_t *);
1505 extern void     nfs4_inactive_thread(mntinfo4_t *mi);
1506 extern void     nfs4_inactive_otw(vnode_t *, cred_t *);
1507 extern int      nfs4_putpages(vnode_t *, u_offset_t, size_t, int, cred_t *);
1508 
1509 extern int      nfs4_setopts(vnode_t *, model_t, struct nfs_args *);
1510 extern void     nfs4_mnt_kstat_init(struct vfs *);
1511 
1512 extern void     rfs4call(struct mntinfo4 *, struct COMPOUND4args_clnt *,
1513                         struct COMPOUND4res_clnt *, cred_t *, int *, int,
1514                         nfs4_error_t *);
1515 extern void     nfs4_acl_fill_cache(struct rnode4 *, vsecattr_t *);
1516 extern int      nfs4_attr_otw(vnode_t *, nfs4_tag_type_t,
1517                                 nfs4_ga_res_t *, bitmap4, cred_t *);
1518 
1519 extern void     nfs4_attrcache_noinval(vnode_t *, nfs4_ga_res_t *, hrtime_t);
1520 extern void     nfs4_attr_cache(vnode_t *, nfs4_ga_res_t *,
1521                                 hrtime_t, cred_t *, int,
1522                                 change_info4 *);
1523 extern void     nfs4_purge_rddir_cache(vnode_t *);
1524 extern void     nfs4_invalidate_pages(vnode_t *, u_offset_t, cred_t *);
1525 extern void     nfs4_purge_caches(vnode_t *, int, cred_t *, int);
1526 extern void     nfs4_purge_stale_fh(int, vnode_t *, cred_t *);
1527 extern void     nfs4_flush_pages(vnode_t *vp, cred_t *cr);
1528 
1529 extern void     nfs4rename_update(vnode_t *, vnode_t *, nfs_fh4 *, char *);
1530 extern void     nfs4_update_paths(vnode_t *, char *, vnode_t *, char *,
1531                         vnode_t *);
1532 
1533 extern void     nfs4args_lookup_free(nfs_argop4 *, int);
1534 extern void     nfs4args_copen_free(OPEN4cargs *);
1535 
1536 extern void     nfs4_printfhandle(nfs4_fhandle_t *);
1537 
1538 extern void     nfs_free_mi4(mntinfo4_t *);
1539 extern void     sv4_free(servinfo4_t *);
1540 extern void     nfs4_mi_zonelist_add(mntinfo4_t *);
1541 extern int      nfs4_mi_zonelist_remove(mntinfo4_t *);
1542 extern int      nfs4_secinfo_recov(mntinfo4_t *, vnode_t *, vnode_t *);
1543 extern void     nfs4_secinfo_init(void);
1544 extern void     nfs4_secinfo_fini(void);
1545 extern int      nfs4_secinfo_path(mntinfo4_t *, cred_t *, int);
1546 extern int      nfs4_secinfo_vnode_otw(vnode_t *, char *, cred_t *);
1547 extern void     secinfo_free(sv_secinfo_t *);
1548 extern void     save_mnt_secinfo(servinfo4_t *);
1549 extern void     check_mnt_secinfo(servinfo4_t *, vnode_t *);
1550 extern int      vattr_to_fattr4(vattr_t *, vsecattr_t *, fattr4 *, int,
1551                                 enum nfs_opnum4, bitmap4 supp_mask);
1552 extern int      nfs4_putapage(vnode_t *, page_t *, u_offset_t *, size_t *,
1553                         int, cred_t *);
1554 extern void     nfs4_write_error(vnode_t *, int, cred_t *);
1555 extern void     nfs4_lockcompletion(vnode_t *, int);
1556 extern bool_t   nfs4_map_lost_lock_conflict(vnode_t *);
1557 extern int      vtodv(vnode_t *, vnode_t **, cred_t *, bool_t);
1558 extern int      vtoname(vnode_t *, char *, ssize_t);
1559 extern void     nfs4open_confirm(vnode_t *, seqid4*, stateid4 *, cred_t *,
1560                     bool_t, bool_t *, nfs4_open_owner_t *, bool_t,
1561                     nfs4_error_t *, int *);
1562 extern void     nfs4_error_zinit(nfs4_error_t *);
1563 extern void     nfs4_error_init(nfs4_error_t *, int);
1564 extern void     nfs4_free_args(struct nfs_args *);
1565 
1566 extern void     mi_hold(mntinfo4_t *);
1567 extern void     mi_rele(mntinfo4_t *);
1568 
1569 extern vnode_t  *find_referral_stubvp(vnode_t *, char *, cred_t *);
1570 extern int       nfs4_setup_referral(vnode_t *, char *, vnode_t **, cred_t *);
1571 
1572 extern sec_data_t       *copy_sec_data(sec_data_t *);
1573 extern gss_clntdata_t   *copy_sec_data_gss(gss_clntdata_t *);
1574 
1575 #ifdef DEBUG
1576 extern int      nfs4_consistent_type(vnode_t *);
1577 #endif
1578 
1579 extern void     nfs4_init_dot_entries(void);
1580 extern void     nfs4_destroy_dot_entries(void);
1581 extern struct nfs4_callback_globals     *nfs4_get_callback_globals(void);
1582 
1583 extern struct nfs4_server nfs4_server_lst;
1584 
1585 extern clock_t nfs_write_error_interval;
1586 
1587 #endif /* _KERNEL */
1588 
1589 /*
1590  * Flags for nfs4getfh_otw.
1591  */
1592 
1593 #define NFS4_GETFH_PUBLIC       0x01
1594 #define NFS4_GETFH_NEEDSOP      0x02
1595 
1596 /*
1597  * Found through rnodes.
1598  *
1599  * The os_open_ref_count keeps track the number of open file descriptor
1600  * refernces on this data structure.  It will be bumped for any successful
1601  * OTW OPEN call and any OPEN call that determines the OTW call is not
1602  * necessary and the open stream hasn't just been created (see
1603  * nfs4_is_otw_open_necessary).
1604  *
1605  * os_mapcnt is a count of the number of mmapped pages for a particular
1606  * open stream; this in conjunction w/ os_open_ref_count is used to
1607  * determine when to do a close to the server.  This is necessary because
1608  * of the semantics of doing open, mmap, close; the OTW close must be wait
1609  * until all open and mmap references have vanished.
1610  *
1611  * 'os_valid' tells us whether this structure is about to be freed or not,
1612  * if it is then don't return it in find_open_stream().
1613  *
1614  * 'os_final_close' is set when a CLOSE OTW was attempted.  This is needed
1615  * so we can properly count the os_open_ref_count in cases where we VOP_CLOSE
1616  * without a VOP_OPEN, and have nfs4_inactive() drive the OTW CLOSE.  It
1617  * also helps differentiate the VOP_OPEN/VN_RELE case from the VOP_CLOSE
1618  * that tried to close OTW but failed, and left the state cleanup to
1619  * nfs4_inactive/CLOSE_FORCE.
1620  *
1621  * 'os_force_close' is used to let us know if an intervening thread came
1622  * and reopened the open stream after we decided to issue a CLOSE_FORCE,
1623  * but before we could actually process the CLOSE_FORCE.
1624  *
1625  * 'os_pending_close' is set when an over-the-wire CLOSE is deferred to the
1626  * lost state queue.
1627  *
1628  * 'open_stateid' is set the last open stateid returned by the server unless
1629  * 'os_delegation' is 1, in which case 'open_stateid' refers to the
1630  * delegation stateid returned by the server.  This is used in cases where the
1631  * client tries to OPEN a file but already has a suitable delegation, so we
1632  * just stick the delegation stateid in the open stream.
1633  *
1634  * os_dc_openacc are open access bits which have been granted to the
1635  * open stream by virtue of a delegation, but which have not been seen
1636  * by the server.  This applies even if the open stream does not have
1637  * os_delegation set.  These bits are used when setting file locks to
1638  * determine whether an open with CLAIM_DELEGATE_CUR needs to be done
1639  * before the lock request can be sent to the server.  See
1640  * nfs4frlock_check_deleg().
1641  *
1642  * 'os_mmap_read/write' keep track of the read and write access our memory
1643  * maps require.  We need to keep track of this so we can provide the proper
1644  * access bits in the open/mmap/close/reboot/reopen case.
1645  *
1646  * 'os_failed_reopen' tells us that we failed to successfully reopen this
1647  * open stream; therefore, we should not use this open stateid as it is
1648  * not valid anymore. This flag is also used to indicate an unsuccessful
1649  * attempt to reopen a delegation open stream with CLAIM_DELEGATE_CUR.
1650  *
1651  * If 'os_orig_oo_name' is different than os_open_owner's oo_name
1652  * then this tells us that this open stream's open owner used a
1653  * bad seqid (that is, got NFS4ERR_BAD_SEQID).  If different, this open
1654  * stream will no longer be used for future OTW state releasing calls.
1655  *
1656  * Lock ordering:
1657  * rnode4_t::r_os_lock > os_sync_lock
1658  * os_sync_lock > rnode4_t::r_statelock
1659  * os_sync_lock > rnode4_t::r_statev4_lock
1660  * os_sync_lock > mntinfo4_t::mi_lock (via hold over rfs4call)
1661  *
1662  * The 'os_sync_lock' protects:
1663  *      open_stateid
1664  *      os_dc_openacc
1665  *      os_delegation
1666  *      os_failed_reopen
1667  *      os_final_close
1668  *      os_force_close
1669  *      os_mapcnt
1670  *      os_mmap_read
1671  *      os_mmap_write
1672  *      os_open_ref_count
1673  *      os_pending_close
1674  *      os_share_acc_read
1675  *      os_share_acc_write
1676  *      os_share_deny_none
1677  *      os_share_deny_read
1678  *      os_share_deny_write
1679  *      os_ref_count
1680  *      os_valid
1681  *
1682  * The rnode4_t::r_os_lock protects:
1683  *      os_node
1684  *
1685  * These fields are set at creation time and
1686  * read only after that:
1687  *      os_open_owner
1688  *      os_orig_oo_name
1689  */
1690 typedef struct nfs4_open_stream {
1691         uint64_t                os_share_acc_read;
1692         uint64_t                os_share_acc_write;
1693         uint64_t                os_mmap_read;
1694         uint64_t                os_mmap_write;
1695         uint32_t                os_share_deny_none;
1696         uint32_t                os_share_deny_read;
1697         uint32_t                os_share_deny_write;
1698         stateid4                open_stateid;
1699         int                     os_dc_openacc;
1700         int                     os_ref_count;
1701         unsigned                os_valid:1;
1702         unsigned                os_delegation:1;
1703         unsigned                os_final_close:1;
1704         unsigned                os_pending_close:1;
1705         unsigned                os_failed_reopen:1;
1706         unsigned                os_force_close:1;
1707         int                     os_open_ref_count;
1708         long                    os_mapcnt;
1709         list_node_t             os_node;
1710         struct nfs4_open_owner  *os_open_owner;
1711         uint64_t                os_orig_oo_name;
1712         kmutex_t                os_sync_lock;
1713 } nfs4_open_stream_t;
1714 
1715 /*
1716  * This structure describes the format of the lock_owner_name
1717  * field of the lock owner.
1718  */
1719 
1720 typedef struct nfs4_lo_name {
1721         uint64_t        ln_seq_num;
1722         pid_t           ln_pid;
1723 } nfs4_lo_name_t;
1724 
1725 /*
1726  * Flags for lo_flags.
1727  */
1728 #define NFS4_LOCK_SEQID_INUSE   0x1
1729 #define NFS4_BAD_SEQID_LOCK     0x2
1730 
1731 /*
1732  * The lo_prev_rnode and lo_next_rnode are for a circular list that hangs
1733  * off the rnode.  If the links are NULL it means this object is not on the
1734  * list.
1735  *
1736  * 'lo_pending_rqsts' is non-zero if we ever tried to send a request and
1737  * didn't get a response back.  This is used to figure out if we have
1738  * possible remote v4 locks, so that we can clean up at process exit.  In
1739  * theory, the client should be able to figure out if the server received
1740  * the request (based on what seqid works), so maybe we can get rid of this
1741  * flag someday.
1742  *
1743  * 'lo_ref_count' tells us how many processes/threads are using this data
1744  * structure.  The rnode's list accounts for one reference.
1745  *
1746  * 'lo_just_created' is set to NFS4_JUST_CREATED when we first create the
1747  * data structure.  It is then set to NFS4_PERM_CREATED when a lock request
1748  * is successful using this lock owner structure.  We need to keep 'temporary'
1749  * lock owners around so we can properly keep the lock seqid synchronization
1750  * when multiple processes/threads are trying to create the lock owner for the
1751  * first time (especially with the DENIED error case).  Once
1752  * 'lo_just_created' is set to NFS4_PERM_CREATED, it doesn't change.
1753  *
1754  * 'lo_valid' tells us whether this structure is about to be freed or not,
1755  * if it is then don't return it from find_lock_owner().
1756  *
1757  * Retrieving and setting of 'lock_seqid' is protected by the
1758  * NFS4_LOCK_SEQID_INUSE flag.  Waiters for NFS4_LOCK_SEQID_INUSE should
1759  * use 'lo_cv_seqid_sync'.
1760  *
1761  * The setting of 'lock_stateid' is protected by the
1762  * NFS4_LOCK_SEQID_INUSE flag and 'lo_lock'.  The retrieving of the
1763  * 'lock_stateid' is protected by 'lo_lock', with the additional
1764  * requirement that the calling function can handle NFS4ERR_OLD_STATEID and
1765  * NFS4ERR_BAD_STATEID as appropiate.
1766  *
1767  * The setting of NFS4_BAD_SEQID_LOCK to lo_flags tells us whether this lock
1768  * owner used a bad seqid (that is, got NFS4ERR_BAD_SEQID).  With this set,
1769  * this lock owner will no longer be used for future OTW calls.  Once set,
1770  * it is never unset.
1771  *
1772  * Lock ordering:
1773  * rnode4_t::r_statev4_lock > lo_lock
1774  */
1775 typedef struct nfs4_lock_owner {
1776         struct nfs4_lock_owner  *lo_next_rnode;
1777         struct nfs4_lock_owner  *lo_prev_rnode;
1778         int                     lo_pid;
1779         stateid4                lock_stateid;
1780         seqid4                  lock_seqid;
1781         /*
1782          * Fix this to always be 12 bytes
1783          */
1784         nfs4_lo_name_t          lock_owner_name;
1785         int                     lo_ref_count;
1786         int                     lo_valid;
1787         int                     lo_pending_rqsts;
1788         int                     lo_just_created;
1789         int                     lo_flags;
1790         kcondvar_t              lo_cv_seqid_sync;
1791         kmutex_t                lo_lock;
1792         kthread_t               *lo_seqid_holder; /* debugging aid */
1793 } nfs4_lock_owner_t;
1794 
1795 /* for nfs4_lock_owner_t lookups */
1796 typedef enum {LOWN_ANY, LOWN_VALID_STATEID} lown_which_t;
1797 
1798 /* Number of times to retry a call that fails with state independent error */
1799 #define NFS4_NUM_RECOV_RETRIES  3
1800 
1801 typedef enum {
1802         NO_SID,
1803         DEL_SID,
1804         LOCK_SID,
1805         OPEN_SID,
1806         SPEC_SID
1807 } nfs4_stateid_type_t;
1808 
1809 typedef struct nfs4_stateid_types {
1810         stateid4 d_sid;
1811         stateid4 l_sid;
1812         stateid4 o_sid;
1813         nfs4_stateid_type_t cur_sid_type;
1814 } nfs4_stateid_types_t;
1815 
1816 /*
1817  * Per-zone data for dealing with callbacks.  Included here solely for the
1818  * benefit of MDB.
1819  */
1820 struct nfs4_callback_stats {
1821         kstat_named_t   delegations;
1822         kstat_named_t   cb_getattr;
1823         kstat_named_t   cb_recall;
1824         kstat_named_t   cb_null;
1825         kstat_named_t   cb_dispatch;
1826         kstat_named_t   delegaccept_r;
1827         kstat_named_t   delegaccept_rw;
1828         kstat_named_t   delegreturn;
1829         kstat_named_t   callbacks;
1830         kstat_named_t   claim_cur;
1831         kstat_named_t   claim_cur_ok;
1832         kstat_named_t   recall_trunc;
1833         kstat_named_t   recall_failed;
1834         kstat_named_t   return_limit_write;
1835         kstat_named_t   return_limit_addmap;
1836         kstat_named_t   deleg_recover;
1837         kstat_named_t   cb_illegal;
1838 };
1839 
1840 struct nfs4_callback_globals {
1841         kmutex_t nfs4_cb_lock;
1842         kmutex_t nfs4_dlist_lock;
1843         int nfs4_program_hint;
1844         /* this table maps the program number to the nfs4_server structure */
1845         struct nfs4_server **nfs4prog2server;
1846         list_t nfs4_dlist;
1847         list_t nfs4_cb_ports;
1848         struct nfs4_callback_stats nfs4_callback_stats;
1849 #ifdef DEBUG
1850         int nfs4_dlistadd_c;
1851         int nfs4_dlistclean_c;
1852 #endif
1853 };
1854 
1855 typedef enum {
1856         CLOSE_NORM,
1857         CLOSE_DELMAP,
1858         CLOSE_FORCE,
1859         CLOSE_RESEND,
1860         CLOSE_AFTER_RESEND
1861 } nfs4_close_type_t;
1862 
1863 /*
1864  * Structure to hold the bad seqid information that is passed
1865  * to the recovery framework.
1866  */
1867 typedef struct nfs4_bseqid_entry {
1868         nfs4_open_owner_t       *bs_oop;
1869         nfs4_lock_owner_t       *bs_lop;
1870         vnode_t                 *bs_vp;
1871         pid_t                   bs_pid;
1872         nfs4_tag_type_t         bs_tag;
1873         seqid4                  bs_seqid;
1874         list_node_t             bs_node;
1875 } nfs4_bseqid_entry_t;
1876 
1877 #ifdef _KERNEL
1878 
1879 extern void     nfs4close_one(vnode_t *, nfs4_open_stream_t *, cred_t *, int,
1880                     nfs4_lost_rqst_t *, nfs4_error_t *, nfs4_close_type_t,
1881                     size_t, uint_t, uint_t);
1882 extern void     nfs4close_notw(vnode_t *, nfs4_open_stream_t *, int *);
1883 extern void     nfs4_set_lock_stateid(nfs4_lock_owner_t *, stateid4);
1884 extern void     open_owner_hold(nfs4_open_owner_t *);
1885 extern void     open_owner_rele(nfs4_open_owner_t *);
1886 extern nfs4_open_stream_t       *find_or_create_open_stream(nfs4_open_owner_t *,
1887                                         struct rnode4 *, int *);
1888 extern nfs4_open_stream_t *find_open_stream(nfs4_open_owner_t *,
1889                                 struct rnode4 *);
1890 extern nfs4_open_stream_t *create_open_stream(nfs4_open_owner_t *oop,
1891                                 struct rnode4 *rp);
1892 extern void     open_stream_hold(nfs4_open_stream_t *);
1893 extern void     open_stream_rele(nfs4_open_stream_t *, struct rnode4 *);
1894 extern int      nfs4close_all(vnode_t *, cred_t *);
1895 extern void     lock_owner_hold(nfs4_lock_owner_t *);
1896 extern void     lock_owner_rele(nfs4_lock_owner_t *);
1897 extern nfs4_lock_owner_t *create_lock_owner(struct rnode4 *, pid_t);
1898 extern nfs4_lock_owner_t *find_lock_owner(struct rnode4 *, pid_t, lown_which_t);
1899 extern void     nfs4_rnode_remove_lock_owner(struct rnode4 *,
1900                         nfs4_lock_owner_t *);
1901 extern void     nfs4_flush_lock_owners(struct rnode4 *);
1902 extern void nfs4_setlockowner_args(lock_owner4 *, struct rnode4 *, pid_t);
1903 extern void     nfs4_set_open_seqid(seqid4, nfs4_open_owner_t *,
1904                     nfs4_tag_type_t);
1905 extern void     nfs4_set_lock_seqid(seqid4, nfs4_lock_owner_t *);
1906 extern void     nfs4_get_and_set_next_open_seqid(nfs4_open_owner_t *,
1907                     nfs4_tag_type_t);
1908 extern void     nfs4_end_open_seqid_sync(nfs4_open_owner_t *);
1909 extern int      nfs4_start_open_seqid_sync(nfs4_open_owner_t *, mntinfo4_t *);
1910 extern void     nfs4_end_lock_seqid_sync(nfs4_lock_owner_t *);
1911 extern int      nfs4_start_lock_seqid_sync(nfs4_lock_owner_t *, mntinfo4_t *);
1912 extern void     nfs4_setup_lock_args(nfs4_lock_owner_t *, nfs4_open_owner_t *,
1913                         nfs4_open_stream_t *, clientid4, locker4 *);
1914 extern void     nfs4_destroy_open_owner(nfs4_open_owner_t *);
1915 
1916 extern void             nfs4_renew_lease_thread(nfs4_server_t *);
1917 extern nfs4_server_t    *find_nfs4_server(mntinfo4_t *);
1918 extern nfs4_server_t    *find_nfs4_server_all(mntinfo4_t *, int all);
1919 extern nfs4_server_t    *new_nfs4_server(servinfo4_t *, cred_t *);
1920 extern void             nfs4_mark_srv_dead(nfs4_server_t *);
1921 extern nfs4_server_t    *servinfo4_to_nfs4_server(servinfo4_t *);
1922 extern void             nfs4_inc_state_ref_count(mntinfo4_t *);
1923 extern void             nfs4_inc_state_ref_count_nolock(nfs4_server_t *,
1924                                 mntinfo4_t *);
1925 extern void             nfs4_dec_state_ref_count(mntinfo4_t *);
1926 extern void             nfs4_dec_state_ref_count_nolock(nfs4_server_t *,
1927                                 mntinfo4_t *);
1928 extern clientid4        mi2clientid(mntinfo4_t *);
1929 extern int              nfs4_server_in_recovery(nfs4_server_t *);
1930 extern bool_t           nfs4_server_vlock(nfs4_server_t *, int);
1931 extern nfs4_open_owner_t *create_open_owner(cred_t *, mntinfo4_t *);
1932 extern uint64_t         nfs4_get_new_oo_name(void);
1933 extern nfs4_open_owner_t *find_open_owner(cred_t *, int, mntinfo4_t *);
1934 extern nfs4_open_owner_t *find_open_owner_nolock(cred_t *, int, mntinfo4_t *);
1935 extern void     nfs4frlock(nfs4_lock_call_type_t, vnode_t *, int, flock64_t *,
1936                         int, u_offset_t, cred_t *, nfs4_error_t *,
1937                         nfs4_lost_rqst_t *, int *);
1938 extern void     nfs4open_dg_save_lost_rqst(int, nfs4_lost_rqst_t *,
1939                     nfs4_open_owner_t *, nfs4_open_stream_t *, cred_t *,
1940                     vnode_t *, int, int);
1941 extern void     nfs4_open_downgrade(int, int, nfs4_open_owner_t *,
1942                     nfs4_open_stream_t *, vnode_t *, cred_t *,
1943                     nfs4_lost_rqst_t *, nfs4_error_t *, cred_t **, seqid4 *);
1944 extern seqid4   nfs4_get_open_seqid(nfs4_open_owner_t *);
1945 extern cred_t   *nfs4_get_otw_cred(cred_t *, mntinfo4_t *, nfs4_open_owner_t *);
1946 extern void     nfs4_init_stateid_types(nfs4_stateid_types_t *);
1947 extern void     nfs4_save_stateid(stateid4 *, nfs4_stateid_types_t *);
1948 
1949 extern kmutex_t nfs4_server_lst_lock;
1950 
1951 extern void     nfs4callback_destroy(nfs4_server_t *);
1952 extern void     nfs4_callback_init(void);
1953 extern void     nfs4_callback_fini(void);
1954 extern void     nfs4_cb_args(nfs4_server_t *, struct knetconfig *,
1955                         SETCLIENTID4args *);
1956 extern void     nfs4delegreturn_async(struct rnode4 *, int, bool_t);
1957 
1958 extern enum nfs4_delegreturn_policy nfs4_delegreturn_policy;
1959 
1960 extern void     nfs4_add_mi_to_server(nfs4_server_t *, mntinfo4_t *);
1961 extern void     nfs4_remove_mi_from_server(mntinfo4_t *, nfs4_server_t *);
1962 extern nfs4_server_t *nfs4_move_mi(mntinfo4_t *, servinfo4_t *, servinfo4_t *);
1963 extern bool_t   nfs4_fs_active(nfs4_server_t *);
1964 extern void     nfs4_server_rele(nfs4_server_t *);
1965 extern bool_t   inlease(nfs4_server_t *);
1966 extern bool_t   nfs4_has_pages(vnode_t *);
1967 extern void     nfs4_log_badowner(mntinfo4_t *, nfs_opnum4);
1968 
1969 #endif /* _KERNEL */
1970 
1971 /*
1972  * Client State Recovery
1973  */
1974 
1975 /*
1976  * The following defines are used for rs_flags in
1977  * a nfs4_recov_state_t structure.
1978  *
1979  * NFS4_RS_RENAME_HELD          Indicates that the mi_rename_lock was held.
1980  * NFS4_RS_GRACE_MSG            Set once we have uprintf'ed a grace message.
1981  * NFS4_RS_DELAY_MSG            Set once we have uprintf'ed a delay message.
1982  * NFS4_RS_RECALL_HELD1         r_deleg_recall_lock for vp1 was held.
1983  * NFS4_RS_RECALL_HELD2         r_deleg_recall_lock for vp2 was held.
1984  */
1985 #define NFS4_RS_RENAME_HELD     0x000000001
1986 #define NFS4_RS_GRACE_MSG       0x000000002
1987 #define NFS4_RS_DELAY_MSG       0x000000004
1988 #define NFS4_RS_RECALL_HELD1    0x000000008
1989 #define NFS4_RS_RECALL_HELD2    0x000000010
1990 
1991 /*
1992  * Information that is retrieved from nfs4_start_op() and that is
1993  * passed into nfs4_end_op().
1994  *
1995  * rs_sp is a reference to the nfs4_server that was found, or NULL.
1996  *
1997  * rs_num_retry_despite_err is the number times client retried an
1998  * OTW op despite a recovery error.  It is only incremented for hints
1999  * exempt to normal R4RECOVERR processing
2000  * (OH_CLOSE/OH_LOCKU/OH_DELEGRETURN).  (XXX this special-case code
2001  * needs review for possible removal.)
2002  * It is initialized wherever nfs4_recov_state_t is declared -- usually
2003  * very near initialization of rs_flags.
2004  */
2005 typedef struct {
2006         nfs4_server_t   *rs_sp;
2007         int             rs_flags;
2008         int             rs_num_retry_despite_err;
2009 } nfs4_recov_state_t;
2010 
2011 /*
2012  * Flags for nfs4_check_remap, nfs4_remap_file and nfs4_remap_root.
2013  */
2014 
2015 #define NFS4_REMAP_CKATTRS      1
2016 #define NFS4_REMAP_NEEDSOP      2
2017 
2018 #ifdef _KERNEL
2019 
2020 extern int      nfs4_is_otw_open_necessary(nfs4_open_owner_t *, int,
2021                         vnode_t *, int, int *, int, nfs4_recov_state_t *);
2022 extern void     nfs4setclientid(struct mntinfo4 *, struct cred *, bool_t,
2023                         nfs4_error_t *);
2024 extern void     nfs4_reopen(vnode_t *, nfs4_open_stream_t *, nfs4_error_t *,
2025                         open_claim_type4, bool_t, bool_t);
2026 extern void     nfs4_remap_root(struct mntinfo4 *, nfs4_error_t *, int);
2027 extern void     nfs4_check_remap(mntinfo4_t *mi, vnode_t *vp, int,
2028                         nfs4_error_t *);
2029 extern void     nfs4_remap_file(mntinfo4_t *mi, vnode_t *vp, int,
2030                         nfs4_error_t *);
2031 extern int      nfs4_make_dotdot(struct nfs4_sharedfh *, hrtime_t,
2032                         vnode_t *, cred_t *, vnode_t **, int);
2033 extern void     nfs4_fail_recov(vnode_t *, char *, int, nfsstat4);
2034 
2035 extern int      nfs4_needs_recovery(nfs4_error_t *, bool_t, vfs_t *);
2036 extern int      nfs4_recov_marks_dead(nfsstat4);
2037 extern bool_t   nfs4_start_recovery(nfs4_error_t *, struct mntinfo4 *,
2038                         vnode_t *, vnode_t *, stateid4 *,
2039                         nfs4_lost_rqst_t *, nfs_opnum4, nfs4_bseqid_entry_t *,
2040                         vnode_t *, char *);
2041 extern int      nfs4_start_op(struct mntinfo4 *, vnode_t *, vnode_t *,
2042                         nfs4_recov_state_t *);
2043 extern void     nfs4_end_op(struct mntinfo4 *, vnode_t *, vnode_t *,
2044                         nfs4_recov_state_t *, bool_t);
2045 extern int      nfs4_start_fop(struct mntinfo4 *, vnode_t *, vnode_t *,
2046                         nfs4_op_hint_t, nfs4_recov_state_t *, bool_t *);
2047 extern void     nfs4_end_fop(struct mntinfo4 *, vnode_t *, vnode_t *,
2048                                 nfs4_op_hint_t, nfs4_recov_state_t *, bool_t);
2049 extern char     *nfs4_recov_action_to_str(nfs4_recov_t);
2050 
2051 /*
2052  * In sequence, code desiring to unmount an ephemeral tree must
2053  * call nfs4_ephemeral_umount, nfs4_ephemeral_umount_activate,
2054  * and nfs4_ephemeral_umount_unlock. The _unlock must also be
2055  * called on all error paths that occur before it would naturally
2056  * be invoked.
2057  *
2058  * The caller must also provde a pointer to a boolean to keep track
2059  * of whether or not the code in _unlock is to be ran.
2060  */
2061 extern void     nfs4_ephemeral_umount_activate(mntinfo4_t *,
2062     bool_t *, nfs4_ephemeral_tree_t **);
2063 extern int      nfs4_ephemeral_umount(mntinfo4_t *, int, cred_t *,
2064     bool_t *, nfs4_ephemeral_tree_t **);
2065 extern void     nfs4_ephemeral_umount_unlock(bool_t *,
2066     nfs4_ephemeral_tree_t **);
2067 
2068 extern int      nfs4_record_ephemeral_mount(mntinfo4_t *mi, vnode_t *mvp);
2069 
2070 extern int      nfs4_callmapid(utf8string *, struct nfs_fsl_info *);
2071 extern int      nfs4_fetch_locations(mntinfo4_t *, struct nfs4_sharedfh *,
2072     char *, cred_t *, nfs4_ga_res_t *, COMPOUND4res_clnt *, bool_t);
2073 
2074 extern int      wait_for_recall(vnode_t *, vnode_t *, nfs4_op_hint_t,
2075                         nfs4_recov_state_t *);
2076 extern void     nfs4_end_op_recall(vnode_t *, vnode_t *, nfs4_recov_state_t *);
2077 extern void     nfs4_send_siglost(pid_t, mntinfo4_t *mi, vnode_t *vp, bool_t,
2078                     int, nfsstat4);
2079 extern time_t   nfs4err_delay_time;
2080 extern void     nfs4_set_grace_wait(mntinfo4_t *);
2081 extern void     nfs4_set_delay_wait(vnode_t *);
2082 extern int      nfs4_wait_for_grace(mntinfo4_t *, nfs4_recov_state_t *);
2083 extern int      nfs4_wait_for_delay(vnode_t *, nfs4_recov_state_t *);
2084 extern nfs4_bseqid_entry_t *nfs4_create_bseqid_entry(nfs4_open_owner_t *,
2085                     nfs4_lock_owner_t *, vnode_t *, pid_t, nfs4_tag_type_t,
2086                     seqid4);
2087 
2088 extern void     nfs4_resend_open_otw(vnode_t **, nfs4_lost_rqst_t *,
2089                         nfs4_error_t *);
2090 extern void     nfs4_resend_delegreturn(nfs4_lost_rqst_t *, nfs4_error_t *,
2091                         nfs4_server_t *);
2092 extern int      nfs4_rpc_retry_error(int);
2093 extern int      nfs4_try_failover(nfs4_error_t *);
2094 extern void     nfs4_free_msg(nfs4_debug_msg_t *);
2095 extern void     nfs4_mnt_recov_kstat_init(vfs_t *);
2096 extern void     nfs4_mi_kstat_inc_delay(mntinfo4_t *);
2097 extern void     nfs4_mi_kstat_inc_no_grace(mntinfo4_t *);
2098 extern char     *nfs4_stat_to_str(nfsstat4);
2099 extern char     *nfs4_op_to_str(nfs_opnum4);
2100 
2101 extern void     nfs4_queue_event(nfs4_event_type_t, mntinfo4_t *, char *,
2102                     uint_t, vnode_t *, vnode_t *, nfsstat4, char *, pid_t,
2103                     nfs4_tag_type_t, nfs4_tag_type_t, seqid4, seqid4);
2104 extern void     nfs4_queue_fact(nfs4_fact_type_t, mntinfo4_t *, nfsstat4,
2105                     nfs4_recov_t, nfs_opnum4, bool_t, char *, int, vnode_t *);
2106 #pragma rarely_called(nfs4_queue_event)
2107 #pragma rarely_called(nfs4_queue_fact)
2108 
2109 /* Used for preformed "." and ".." dirents */
2110 extern char     *nfs4_dot_entries;
2111 extern char     *nfs4_dot_dot_entry;
2112 
2113 #ifdef  DEBUG
2114 extern uint_t   nfs4_tsd_key;
2115 #endif
2116 
2117 #endif /* _KERNEL */
2118 
2119 /*
2120  * Filehandle management.
2121  *
2122  * Filehandles can change in v4, so rather than storing the filehandle
2123  * directly in the rnode, etc., we manage the filehandle through one of
2124  * these objects.
2125  * Locking: sfh_fh and sfh_tree is protected by the filesystem's
2126  * mi_fh_lock.  The reference count and flags are protected by sfh_lock.
2127  * sfh_mi is read-only.
2128  *
2129  * mntinfo4_t::mi_fh_lock > sfh_lock.
2130  */
2131 
2132 typedef struct nfs4_sharedfh {
2133         nfs_fh4 sfh_fh;                 /* key and current filehandle */
2134         kmutex_t sfh_lock;
2135         uint_t sfh_refcnt;              /* reference count */
2136         uint_t sfh_flags;
2137         mntinfo4_t *sfh_mi;             /* backptr to filesystem */
2138         avl_node_t sfh_tree;            /* used by avl package */
2139 } nfs4_sharedfh_t;
2140 
2141 #define SFH4_SAME(sfh1, sfh2)   ((sfh1) == (sfh2))
2142 
2143 /*
2144  * Flags.
2145  */
2146 #define SFH4_IN_TREE    0x1             /* currently in an AVL tree */
2147 
2148 #ifdef _KERNEL
2149 
2150 extern void sfh4_createtab(avl_tree_t *);
2151 extern nfs4_sharedfh_t *sfh4_get(const nfs_fh4 *, mntinfo4_t *);
2152 extern nfs4_sharedfh_t *sfh4_put(const nfs_fh4 *, mntinfo4_t *,
2153                                 nfs4_sharedfh_t *);
2154 extern void sfh4_update(nfs4_sharedfh_t *, const nfs_fh4 *);
2155 extern void sfh4_copyval(const nfs4_sharedfh_t *, nfs4_fhandle_t *);
2156 extern void sfh4_hold(nfs4_sharedfh_t *);
2157 extern void sfh4_rele(nfs4_sharedfh_t **);
2158 extern void sfh4_printfhandle(const nfs4_sharedfh_t *);
2159 
2160 #endif
2161 
2162 /*
2163  * Path and file name management.
2164  *
2165  * This type stores the name of an entry in the filesystem and keeps enough
2166  * information that it can provide a complete path.  All fields are
2167  * protected by fn_lock, except for the reference count, which is managed
2168  * using atomic add/subtract.
2169  *
2170  * Additionally shared filehandle for this fname is stored.
2171  * Normally, fn_get() when it creates this fname stores the passed in
2172  * shared fh in fn_sfh by doing sfh_hold. Similarly the path which
2173  * destroys this fname releases the reference on this fh by doing sfh_rele.
2174  *
2175  * fn_get uses the fn_sfh to refine the comparision in cases
2176  * where we have matched the name but have differing file handles,
2177  * this normally happens due to
2178  *
2179  *      1. Server side rename of a file/directory.
2180  *      2. Another client renaming a file/directory on the server.
2181  *
2182  * Differing names but same filehandle is possible as in the case of hardlinks,
2183  * but differing filehandles with same name component will later confuse
2184  * the client and can cause various panics.
2185  *
2186  * Lock order: child and then parent.
2187  */
2188 
2189 typedef struct nfs4_fname {
2190         struct nfs4_fname *fn_parent;   /* parent name; null if fs root */
2191         char *fn_name;                  /* the actual name */
2192         ssize_t fn_len;                 /* strlen(fn_name) */
2193         uint32_t fn_refcnt;             /* reference count */
2194         kmutex_t fn_lock;
2195         avl_node_t fn_tree;
2196         avl_tree_t fn_children;         /* children, if any */
2197         nfs4_sharedfh_t *fn_sfh;        /* The fh for this fname */
2198 } nfs4_fname_t;
2199 
2200 #ifdef _KERNEL
2201 
2202 extern vnode_t  nfs4_xattr_notsupp_vnode;
2203 #define NFS4_XATTR_DIR_NOTSUPP  &nfs4_xattr_notsupp_vnode
2204 
2205 extern nfs4_fname_t *fn_get(nfs4_fname_t *, char *, nfs4_sharedfh_t *);
2206 extern void fn_hold(nfs4_fname_t *);
2207 extern void fn_rele(nfs4_fname_t **);
2208 extern char *fn_name(nfs4_fname_t *);
2209 extern char *fn_path(nfs4_fname_t *);
2210 extern void fn_move(nfs4_fname_t *, nfs4_fname_t *, char *);
2211 extern nfs4_fname_t *fn_parent(nfs4_fname_t *);
2212 
2213 /* Referral Support */
2214 extern int nfs4_process_referral(mntinfo4_t *, nfs4_sharedfh_t *, char *,
2215     cred_t *, nfs4_ga_res_t *, COMPOUND4res_clnt *, struct nfs_fsl_info *);
2216 
2217 #endif
2218 
2219 /*
2220  * Per-zone data for managing client handles, included in this file for the
2221  * benefit of MDB.
2222  */
2223 struct nfs4_clnt {
2224         struct chhead   *nfscl_chtable4;
2225         kmutex_t        nfscl_chtable4_lock;
2226         zoneid_t        nfscl_zoneid;
2227         list_node_t     nfscl_node;
2228         struct clstat4  nfscl_stat;
2229 };
2230 
2231 #ifdef  __cplusplus
2232 }
2233 #endif
2234 
2235 #endif /* _NFS4_CLNT_H */