1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  */
  25 /*
  26  * Copyright 2012 Nexenta Systems, Inc. All rights reserved.
  27  */
  28 
  29 /*
  30  *      Copyright (c) 1983,1984,1985,1986,1987,1988,1989  AT&T.
  31  *      All Rights Reserved
  32  */
  33 
  34 #include <sys/param.h>
  35 #include <sys/types.h>
  36 #include <sys/systm.h>
  37 #include <sys/cmn_err.h>
  38 #include <sys/vtrace.h>
  39 #include <sys/session.h>
  40 #include <sys/thread.h>
  41 #include <sys/dnlc.h>
  42 #include <sys/cred.h>
  43 #include <sys/priv.h>
  44 #include <sys/list.h>
  45 #include <sys/sdt.h>
  46 #include <sys/policy.h>
  47 
  48 #include <rpc/types.h>
  49 #include <rpc/xdr.h>
  50 
  51 #include <nfs/nfs.h>
  52 
  53 #include <nfs/nfs_clnt.h>
  54 
  55 #include <nfs/nfs4.h>
  56 #include <nfs/rnode4.h>
  57 #include <nfs/nfs4_clnt.h>
  58 
  59 /*
  60  * client side statistics
  61  */
  62 static const struct clstat4 clstat4_tmpl = {
  63         { "calls",      KSTAT_DATA_UINT64 },
  64         { "badcalls",   KSTAT_DATA_UINT64 },
  65         { "referrals",  KSTAT_DATA_UINT64 },
  66         { "referlinks", KSTAT_DATA_UINT64 },
  67         { "clgets",     KSTAT_DATA_UINT64 },
  68         { "cltoomany",  KSTAT_DATA_UINT64 },
  69 #ifdef DEBUG
  70         { "clalloc",    KSTAT_DATA_UINT64 },
  71         { "noresponse", KSTAT_DATA_UINT64 },
  72         { "failover",   KSTAT_DATA_UINT64 },
  73         { "remap",      KSTAT_DATA_UINT64 },
  74 #endif
  75 };
  76 
  77 #ifdef DEBUG
  78 struct clstat4_debug clstat4_debug = {
  79         { "nrnode",     KSTAT_DATA_UINT64 },
  80         { "access",     KSTAT_DATA_UINT64 },
  81         { "dirent",     KSTAT_DATA_UINT64 },
  82         { "dirents",    KSTAT_DATA_UINT64 },
  83         { "reclaim",    KSTAT_DATA_UINT64 },
  84         { "clreclaim",  KSTAT_DATA_UINT64 },
  85         { "f_reclaim",  KSTAT_DATA_UINT64 },
  86         { "a_reclaim",  KSTAT_DATA_UINT64 },
  87         { "r_reclaim",  KSTAT_DATA_UINT64 },
  88         { "r_path",     KSTAT_DATA_UINT64 },
  89 };
  90 #endif
  91 
  92 /*
  93  * We keep a global list of per-zone client data, so we can clean up all zones
  94  * if we get low on memory.
  95  */
  96 static list_t nfs4_clnt_list;
  97 static kmutex_t nfs4_clnt_list_lock;
  98 zone_key_t nfs4clnt_zone_key;
  99 
 100 static struct kmem_cache *chtab4_cache;
 101 
 102 #ifdef DEBUG
 103 static int nfs4_rfscall_debug;
 104 static int nfs4_try_failover_any;
 105 int nfs4_utf8_debug = 0;
 106 #endif
 107 
 108 /*
 109  * NFSv4 readdir cache implementation
 110  */
 111 typedef struct rddir4_cache_impl {
 112         rddir4_cache    rc;             /* readdir cache element */
 113         kmutex_t        lock;           /* lock protects count */
 114         uint_t          count;          /* reference count */
 115         avl_node_t      tree;           /* AVL tree link */
 116 } rddir4_cache_impl;
 117 
 118 static int rddir4_cache_compar(const void *, const void *);
 119 static void rddir4_cache_free(rddir4_cache_impl *);
 120 static rddir4_cache *rddir4_cache_alloc(int);
 121 static void rddir4_cache_hold(rddir4_cache *);
 122 static int try_failover(enum clnt_stat);
 123 
 124 static int nfs4_readdir_cache_hits = 0;
 125 static int nfs4_readdir_cache_waits = 0;
 126 static int nfs4_readdir_cache_misses = 0;
 127 
 128 /*
 129  * Shared nfs4 functions
 130  */
 131 
 132 /*
 133  * Copy an nfs_fh4.  The destination storage (to->nfs_fh4_val) must already
 134  * be allocated.
 135  */
 136 
 137 void
 138 nfs_fh4_copy(nfs_fh4 *from, nfs_fh4 *to)
 139 {
 140         to->nfs_fh4_len = from->nfs_fh4_len;
 141         bcopy(from->nfs_fh4_val, to->nfs_fh4_val, to->nfs_fh4_len);
 142 }
 143 
 144 /*
 145  * nfs4cmpfh - compare 2 filehandles.
 146  * Returns 0 if the two nfsv4 filehandles are the same, -1 if the first is
 147  * "less" than the second, +1 if the first is "greater" than the second.
 148  */
 149 
 150 int
 151 nfs4cmpfh(const nfs_fh4 *fh4p1, const nfs_fh4 *fh4p2)
 152 {
 153         const char *c1, *c2;
 154 
 155         if (fh4p1->nfs_fh4_len < fh4p2->nfs_fh4_len)
 156                 return (-1);
 157         if (fh4p1->nfs_fh4_len > fh4p2->nfs_fh4_len)
 158                 return (1);
 159         for (c1 = fh4p1->nfs_fh4_val, c2 = fh4p2->nfs_fh4_val;
 160             c1 < fh4p1->nfs_fh4_val + fh4p1->nfs_fh4_len;
 161             c1++, c2++) {
 162                 if (*c1 < *c2)
 163                         return (-1);
 164                 if (*c1 > *c2)
 165                         return (1);
 166         }
 167 
 168         return (0);
 169 }
 170 
 171 /*
 172  * Compare two v4 filehandles.  Return zero if they're the same, non-zero
 173  * if they're not.  Like nfs4cmpfh(), but different filehandle
 174  * representation, and doesn't provide information about greater than or
 175  * less than.
 176  */
 177 
 178 int
 179 nfs4cmpfhandle(nfs4_fhandle_t *fh1, nfs4_fhandle_t *fh2)
 180 {
 181         if (fh1->fh_len == fh2->fh_len)
 182                 return (bcmp(fh1->fh_buf, fh2->fh_buf, fh1->fh_len));
 183 
 184         return (1);
 185 }
 186 
 187 int
 188 stateid4_cmp(stateid4 *s1, stateid4 *s2)
 189 {
 190         if (bcmp(s1, s2, sizeof (stateid4)) == 0)
 191                 return (1);
 192         else
 193                 return (0);
 194 }
 195 
 196 nfsstat4
 197 puterrno4(int error)
 198 {
 199         switch (error) {
 200         case 0:
 201                 return (NFS4_OK);
 202         case EPERM:
 203                 return (NFS4ERR_PERM);
 204         case ENOENT:
 205                 return (NFS4ERR_NOENT);
 206         case EINTR:
 207                 return (NFS4ERR_IO);
 208         case EIO:
 209                 return (NFS4ERR_IO);
 210         case ENXIO:
 211                 return (NFS4ERR_NXIO);
 212         case ENOMEM:
 213                 return (NFS4ERR_RESOURCE);
 214         case EACCES:
 215                 return (NFS4ERR_ACCESS);
 216         case EBUSY:
 217                 return (NFS4ERR_IO);
 218         case EEXIST:
 219                 return (NFS4ERR_EXIST);
 220         case EXDEV:
 221                 return (NFS4ERR_XDEV);
 222         case ENODEV:
 223                 return (NFS4ERR_IO);
 224         case ENOTDIR:
 225                 return (NFS4ERR_NOTDIR);
 226         case EISDIR:
 227                 return (NFS4ERR_ISDIR);
 228         case EINVAL:
 229                 return (NFS4ERR_INVAL);
 230         case EMFILE:
 231                 return (NFS4ERR_RESOURCE);
 232         case EFBIG:
 233                 return (NFS4ERR_FBIG);
 234         case ENOSPC:
 235                 return (NFS4ERR_NOSPC);
 236         case EROFS:
 237                 return (NFS4ERR_ROFS);
 238         case EMLINK:
 239                 return (NFS4ERR_MLINK);
 240         case EDEADLK:
 241                 return (NFS4ERR_DEADLOCK);
 242         case ENOLCK:
 243                 return (NFS4ERR_DENIED);
 244         case EREMOTE:
 245                 return (NFS4ERR_SERVERFAULT);
 246         case ENOTSUP:
 247                 return (NFS4ERR_NOTSUPP);
 248         case EDQUOT:
 249                 return (NFS4ERR_DQUOT);
 250         case ENAMETOOLONG:
 251                 return (NFS4ERR_NAMETOOLONG);
 252         case EOVERFLOW:
 253                 return (NFS4ERR_INVAL);
 254         case ENOSYS:
 255                 return (NFS4ERR_NOTSUPP);
 256         case ENOTEMPTY:
 257                 return (NFS4ERR_NOTEMPTY);
 258         case EOPNOTSUPP:
 259                 return (NFS4ERR_NOTSUPP);
 260         case ESTALE:
 261                 return (NFS4ERR_STALE);
 262         case EAGAIN:
 263                 if (curthread->t_flag & T_WOULDBLOCK) {
 264                         curthread->t_flag &= ~T_WOULDBLOCK;
 265                         return (NFS4ERR_DELAY);
 266                 }
 267                 return (NFS4ERR_LOCKED);
 268         default:
 269                 return ((enum nfsstat4)error);
 270         }
 271 }
 272 
 273 int
 274 geterrno4(enum nfsstat4 status)
 275 {
 276         switch (status) {
 277         case NFS4_OK:
 278                 return (0);
 279         case NFS4ERR_PERM:
 280                 return (EPERM);
 281         case NFS4ERR_NOENT:
 282                 return (ENOENT);
 283         case NFS4ERR_IO:
 284                 return (EIO);
 285         case NFS4ERR_NXIO:
 286                 return (ENXIO);
 287         case NFS4ERR_ACCESS:
 288                 return (EACCES);
 289         case NFS4ERR_EXIST:
 290                 return (EEXIST);
 291         case NFS4ERR_XDEV:
 292                 return (EXDEV);
 293         case NFS4ERR_NOTDIR:
 294                 return (ENOTDIR);
 295         case NFS4ERR_ISDIR:
 296                 return (EISDIR);
 297         case NFS4ERR_INVAL:
 298                 return (EINVAL);
 299         case NFS4ERR_FBIG:
 300                 return (EFBIG);
 301         case NFS4ERR_NOSPC:
 302                 return (ENOSPC);
 303         case NFS4ERR_ROFS:
 304                 return (EROFS);
 305         case NFS4ERR_MLINK:
 306                 return (EMLINK);
 307         case NFS4ERR_NAMETOOLONG:
 308                 return (ENAMETOOLONG);
 309         case NFS4ERR_NOTEMPTY:
 310                 return (ENOTEMPTY);
 311         case NFS4ERR_DQUOT:
 312                 return (EDQUOT);
 313         case NFS4ERR_STALE:
 314                 return (ESTALE);
 315         case NFS4ERR_BADHANDLE:
 316                 return (ESTALE);
 317         case NFS4ERR_BAD_COOKIE:
 318                 return (EINVAL);
 319         case NFS4ERR_NOTSUPP:
 320                 return (EOPNOTSUPP);
 321         case NFS4ERR_TOOSMALL:
 322                 return (EINVAL);
 323         case NFS4ERR_SERVERFAULT:
 324                 return (EIO);
 325         case NFS4ERR_BADTYPE:
 326                 return (EINVAL);
 327         case NFS4ERR_DELAY:
 328                 return (ENXIO);
 329         case NFS4ERR_SAME:
 330                 return (EPROTO);
 331         case NFS4ERR_DENIED:
 332                 return (ENOLCK);
 333         case NFS4ERR_EXPIRED:
 334                 return (EPROTO);
 335         case NFS4ERR_LOCKED:
 336                 return (EACCES);
 337         case NFS4ERR_GRACE:
 338                 return (EAGAIN);
 339         case NFS4ERR_FHEXPIRED: /* if got here, failed to get a new fh */
 340                 return (ESTALE);
 341         case NFS4ERR_SHARE_DENIED:
 342                 return (EACCES);
 343         case NFS4ERR_WRONGSEC:
 344                 return (EPERM);
 345         case NFS4ERR_CLID_INUSE:
 346                 return (EAGAIN);
 347         case NFS4ERR_RESOURCE:
 348                 return (EAGAIN);
 349         case NFS4ERR_MOVED:
 350                 return (EPROTO);
 351         case NFS4ERR_NOFILEHANDLE:
 352                 return (EIO);
 353         case NFS4ERR_MINOR_VERS_MISMATCH:
 354                 return (ENOTSUP);
 355         case NFS4ERR_STALE_CLIENTID:
 356                 return (EIO);
 357         case NFS4ERR_STALE_STATEID:
 358                 return (EIO);
 359         case NFS4ERR_OLD_STATEID:
 360                 return (EIO);
 361         case NFS4ERR_BAD_STATEID:
 362                 return (EIO);
 363         case NFS4ERR_BAD_SEQID:
 364                 return (EIO);
 365         case NFS4ERR_NOT_SAME:
 366                 return (EPROTO);
 367         case NFS4ERR_LOCK_RANGE:
 368                 return (EPROTO);
 369         case NFS4ERR_SYMLINK:
 370                 return (EPROTO);
 371         case NFS4ERR_RESTOREFH:
 372                 return (EPROTO);
 373         case NFS4ERR_LEASE_MOVED:
 374                 return (EPROTO);
 375         case NFS4ERR_ATTRNOTSUPP:
 376                 return (ENOTSUP);
 377         case NFS4ERR_NO_GRACE:
 378                 return (EPROTO);
 379         case NFS4ERR_RECLAIM_BAD:
 380                 return (EPROTO);
 381         case NFS4ERR_RECLAIM_CONFLICT:
 382                 return (EPROTO);
 383         case NFS4ERR_BADXDR:
 384                 return (EINVAL);
 385         case NFS4ERR_LOCKS_HELD:
 386                 return (EIO);
 387         case NFS4ERR_OPENMODE:
 388                 return (EACCES);
 389         case NFS4ERR_BADOWNER:
 390                 /*
 391                  * Client and server are in different DNS domains
 392                  * and the NFSMAPID_DOMAIN in /etc/default/nfs
 393                  * doesn't match.  No good answer here.  Return
 394                  * EACCESS, which translates to "permission denied".
 395                  */
 396                 return (EACCES);
 397         case NFS4ERR_BADCHAR:
 398                 return (EINVAL);
 399         case NFS4ERR_BADNAME:
 400                 return (EINVAL);
 401         case NFS4ERR_BAD_RANGE:
 402                 return (EIO);
 403         case NFS4ERR_LOCK_NOTSUPP:
 404                 return (ENOTSUP);
 405         case NFS4ERR_OP_ILLEGAL:
 406                 return (EINVAL);
 407         case NFS4ERR_DEADLOCK:
 408                 return (EDEADLK);
 409         case NFS4ERR_FILE_OPEN:
 410                 return (EACCES);
 411         case NFS4ERR_ADMIN_REVOKED:
 412                 return (EPROTO);
 413         case NFS4ERR_CB_PATH_DOWN:
 414                 return (EPROTO);
 415         default:
 416 #ifdef DEBUG
 417                 zcmn_err(getzoneid(), CE_WARN, "geterrno4: got status %d",
 418                     status);
 419 #endif
 420                 return ((int)status);
 421         }
 422 }
 423 
 424 void
 425 nfs4_log_badowner(mntinfo4_t *mi, nfs_opnum4 op)
 426 {
 427         nfs4_server_t *server;
 428 
 429         /*
 430          * Return if already printed/queued a msg
 431          * for this mount point.
 432          */
 433         if (mi->mi_flags & MI4_BADOWNER_DEBUG)
 434                 return;
 435         /*
 436          * Happens once per client <-> server pair.
 437          */
 438         if (nfs_rw_enter_sig(&mi->mi_recovlock, RW_READER,
 439             mi->mi_flags & MI4_INT))
 440                 return;
 441 
 442         server = find_nfs4_server(mi);
 443         if (server == NULL) {
 444                 nfs_rw_exit(&mi->mi_recovlock);
 445                 return;
 446         }
 447 
 448         if (!(server->s_flags & N4S_BADOWNER_DEBUG)) {
 449                 zcmn_err(mi->mi_zone->zone_id, CE_WARN,
 450                     "!NFSMAPID_DOMAIN does not match"
 451                     " the server: %s domain.\n"
 452                     "Please check configuration",
 453                     mi->mi_curr_serv->sv_hostname);
 454                 server->s_flags |= N4S_BADOWNER_DEBUG;
 455         }
 456         mutex_exit(&server->s_lock);
 457         nfs4_server_rele(server);
 458         nfs_rw_exit(&mi->mi_recovlock);
 459 
 460         /*
 461          * Happens once per mntinfo4_t.
 462          * This error is deemed as one of the recovery facts "RF_BADOWNER",
 463          * queue this in the mesg queue for this mount_info. This message
 464          * is not printed, meaning its absent from id_to_dump_solo_fact()
 465          * but its there for inspection if the queue is ever dumped/inspected.
 466          */
 467         mutex_enter(&mi->mi_lock);
 468         if (!(mi->mi_flags & MI4_BADOWNER_DEBUG)) {
 469                 nfs4_queue_fact(RF_BADOWNER, mi, NFS4ERR_BADOWNER, 0, op,
 470                     FALSE, NULL, 0, NULL);
 471                 mi->mi_flags |= MI4_BADOWNER_DEBUG;
 472         }
 473         mutex_exit(&mi->mi_lock);
 474 }
 475 
 476 int
 477 nfs4_time_ntov(nfstime4 *ntime, timestruc_t *vatime)
 478 {
 479         int64_t sec;
 480         int32_t nsec;
 481 
 482         /*
 483          * Here check that the nfsv4 time is valid for the system.
 484          * nfsv4 time value is a signed 64-bit, and the system time
 485          * may be either int64_t or int32_t (depends on the kernel),
 486          * so if the kernel is 32-bit, the nfsv4 time value may not fit.
 487          */
 488 #ifndef _LP64
 489         if (! NFS4_TIME_OK(ntime->seconds)) {
 490                 return (EOVERFLOW);
 491         }
 492 #endif
 493 
 494         /* Invalid to specify 1 billion (or more) nsecs */
 495         if (ntime->nseconds >= 1000000000)
 496                 return (EINVAL);
 497 
 498         if (ntime->seconds < 0) {
 499                 sec = ntime->seconds + 1;
 500                 nsec = -1000000000 + ntime->nseconds;
 501         } else {
 502                 sec = ntime->seconds;
 503                 nsec = ntime->nseconds;
 504         }
 505 
 506         vatime->tv_sec = sec;
 507         vatime->tv_nsec = nsec;
 508 
 509         return (0);
 510 }
 511 
 512 int
 513 nfs4_time_vton(timestruc_t *vatime, nfstime4 *ntime)
 514 {
 515         int64_t sec;
 516         uint32_t nsec;
 517 
 518         /*
 519          * nfsv4 time value is a signed 64-bit, and the system time
 520          * may be either int64_t or int32_t (depends on the kernel),
 521          * so all system time values will fit.
 522          */
 523         if (vatime->tv_nsec >= 0) {
 524                 sec = vatime->tv_sec;
 525                 nsec = vatime->tv_nsec;
 526         } else {
 527                 sec = vatime->tv_sec - 1;
 528                 nsec = 1000000000 + vatime->tv_nsec;
 529         }
 530         ntime->seconds = sec;
 531         ntime->nseconds = nsec;
 532 
 533         return (0);
 534 }
 535 
 536 /*
 537  * Converts a utf8 string to a valid null terminated filename string.
 538  *
 539  * XXX - Not actually translating the UTF-8 string as per RFC 2279.
 540  *       For now, just validate that the UTF-8 string off the wire
 541  *       does not have characters that will freak out UFS, and leave
 542  *       it at that.
 543  */
 544 char *
 545 utf8_to_fn(utf8string *u8s, uint_t *lenp, char *s)
 546 {
 547         ASSERT(lenp != NULL);
 548 
 549         if (u8s == NULL || u8s->utf8string_len <= 0 ||
 550             u8s->utf8string_val == NULL)
 551                 return (NULL);
 552 
 553         /*
 554          * Check for obvious illegal filename chars
 555          */
 556         if (utf8_strchr(u8s, '/') != NULL) {
 557 #ifdef DEBUG
 558                 if (nfs4_utf8_debug) {
 559                         char *path;
 560                         int len = u8s->utf8string_len;
 561 
 562                         path = kmem_alloc(len + 1, KM_SLEEP);
 563                         bcopy(u8s->utf8string_val, path, len);
 564                         path[len] = '\0';
 565 
 566                         zcmn_err(getzoneid(), CE_WARN,
 567                             "Invalid UTF-8 filename: %s", path);
 568 
 569                         kmem_free(path, len + 1);
 570                 }
 571 #endif
 572                 return (NULL);
 573         }
 574 
 575         return (utf8_to_str(u8s, lenp, s));
 576 }
 577 
 578 /*
 579  * Converts a utf8 string to a C string.
 580  * kmem_allocs a new string if not supplied
 581  */
 582 char *
 583 utf8_to_str(utf8string *str, uint_t *lenp, char *s)
 584 {
 585         char    *sp;
 586         char    *u8p;
 587         int     len;
 588         int      i;
 589 
 590         ASSERT(lenp != NULL);
 591 
 592         if (str == NULL)
 593                 return (NULL);
 594 
 595         u8p = str->utf8string_val;
 596         len = str->utf8string_len;
 597         if (len <= 0 || u8p == NULL) {
 598                 if (s)
 599                         *s = '\0';
 600                 return (NULL);
 601         }
 602 
 603         sp = s;
 604         if (sp == NULL)
 605                 sp = kmem_alloc(len + 1, KM_SLEEP);
 606 
 607         /*
 608          * At least check for embedded nulls
 609          */
 610         for (i = 0; i < len; i++) {
 611                 sp[i] = u8p[i];
 612                 if (u8p[i] == '\0') {
 613 #ifdef  DEBUG
 614                         zcmn_err(getzoneid(), CE_WARN,
 615                             "Embedded NULL in UTF-8 string");
 616 #endif
 617                         if (s == NULL)
 618                                 kmem_free(sp, len + 1);
 619                         return (NULL);
 620                 }
 621         }
 622         sp[len] = '\0';
 623         *lenp = len + 1;
 624 
 625         return (sp);
 626 }
 627 
 628 /*
 629  * str_to_utf8 - converts a null-terminated C string to a utf8 string
 630  */
 631 utf8string *
 632 str_to_utf8(char *nm, utf8string *str)
 633 {
 634         int len;
 635 
 636         if (str == NULL)
 637                 return (NULL);
 638 
 639         if (nm == NULL || *nm == '\0') {
 640                 str->utf8string_len = 0;
 641                 str->utf8string_val = NULL;
 642         }
 643 
 644         len = strlen(nm);
 645 
 646         str->utf8string_val = kmem_alloc(len, KM_SLEEP);
 647         str->utf8string_len = len;
 648         bcopy(nm, str->utf8string_val, len);
 649 
 650         return (str);
 651 }
 652 
 653 utf8string *
 654 utf8_copy(utf8string *src, utf8string *dest)
 655 {
 656         if (src == NULL)
 657                 return (NULL);
 658         if (dest == NULL)
 659                 return (NULL);
 660 
 661         if (src->utf8string_len > 0) {
 662                 dest->utf8string_val = kmem_alloc(src->utf8string_len,
 663                     KM_SLEEP);
 664                 bcopy(src->utf8string_val, dest->utf8string_val,
 665                     src->utf8string_len);
 666                 dest->utf8string_len = src->utf8string_len;
 667         } else {
 668                 dest->utf8string_val = NULL;
 669                 dest->utf8string_len = 0;
 670         }
 671 
 672         return (dest);
 673 }
 674 
 675 int
 676 utf8_compare(const utf8string *a, const utf8string *b)
 677 {
 678         int mlen, cmp;
 679         int alen, blen;
 680         char *aval, *bval;
 681 
 682         if ((a == NULL) && (b == NULL))
 683                 return (0);
 684         else if (a == NULL)
 685                 return (-1);
 686         else if (b == NULL)
 687                 return (1);
 688 
 689         alen = a->utf8string_len;
 690         blen = b->utf8string_len;
 691         aval = a->utf8string_val;
 692         bval = b->utf8string_val;
 693 
 694         if (((alen == 0) || (aval == NULL)) &&
 695             ((blen == 0) || (bval == NULL)))
 696                 return (0);
 697         else if ((alen == 0) || (aval == NULL))
 698                 return (-1);
 699         else if ((blen == 0) || (bval == NULL))
 700                 return (1);
 701 
 702         mlen = MIN(alen, blen);
 703         cmp = strncmp(aval, bval, mlen);
 704 
 705         if ((cmp == 0) && (alen == blen))
 706                 return (0);
 707         else if ((cmp == 0) && (alen < blen))
 708                 return (-1);
 709         else if (cmp == 0)
 710                 return (1);
 711         else if (cmp < 0)
 712                 return (-1);
 713         return (1);
 714 }
 715 
 716 /*
 717  * utf8_dir_verify - checks that the utf8 string is valid
 718  */
 719 nfsstat4
 720 utf8_dir_verify(utf8string *str)
 721 {
 722         char *nm;
 723         int len;
 724 
 725         if (str == NULL)
 726                 return (NFS4ERR_INVAL);
 727 
 728         nm = str->utf8string_val;
 729         len = str->utf8string_len;
 730         if (nm == NULL || len == 0) {
 731                 return (NFS4ERR_INVAL);
 732         }
 733 
 734         if (len == 1 && nm[0] == '.')
 735                 return (NFS4ERR_BADNAME);
 736         if (len == 2 && nm[0] == '.' && nm[1] == '.')
 737                 return (NFS4ERR_BADNAME);
 738 
 739         if (utf8_strchr(str, '/') != NULL)
 740                 return (NFS4ERR_BADNAME);
 741 
 742         if (utf8_strchr(str, '\0') != NULL)
 743                 return (NFS4ERR_BADNAME);
 744 
 745         return (NFS4_OK);
 746 }
 747 
 748 /*
 749  * from rpcsec module (common/rpcsec)
 750  */
 751 extern int sec_clnt_geth(CLIENT *, struct sec_data *, cred_t *, AUTH **);
 752 extern void sec_clnt_freeh(AUTH *);
 753 extern void sec_clnt_freeinfo(struct sec_data *);
 754 
 755 /*
 756  * authget() gets an auth handle based on the security
 757  * information from the servinfo in mountinfo.
 758  * The auth handle is stored in ch_client->cl_auth.
 759  *
 760  * First security flavor of choice is to use sv_secdata
 761  * which is initiated by the client. If that fails, get
 762  * secinfo from the server and then select one from the
 763  * server secinfo list .
 764  *
 765  * For RPCSEC_GSS flavor, upon success, a secure context is
 766  * established between client and server.
 767  */
 768 int
 769 authget(servinfo4_t *svp, CLIENT *ch_client, cred_t *cr)
 770 {
 771         int error, i;
 772 
 773         /*
 774          * SV4_TRYSECINFO indicates to try the secinfo list from
 775          * sv_secinfo until a successful one is reached. Point
 776          * sv_currsec to the selected security mechanism for
 777          * later sessions.
 778          */
 779         (void) nfs_rw_enter_sig(&svp->sv_lock, RW_WRITER, 0);
 780         if ((svp->sv_flags & SV4_TRYSECINFO) && svp->sv_secinfo) {
 781                 for (i = svp->sv_secinfo->index; i < svp->sv_secinfo->count;
 782                     i++) {
 783                         if (!(error = sec_clnt_geth(ch_client,
 784                             &svp->sv_secinfo->sdata[i],
 785                             cr, &ch_client->cl_auth))) {
 786 
 787                                 svp->sv_currsec = &svp->sv_secinfo->sdata[i];
 788                                 svp->sv_secinfo->index = i;
 789                                 /* done */
 790                                 svp->sv_flags &= ~SV4_TRYSECINFO;
 791                                 break;
 792                         }
 793 
 794                         /*
 795                          * Allow the caller retry with the security flavor
 796                          * pointed by svp->sv_secinfo->index when
 797                          * ETIMEDOUT/ECONNRESET occurs.
 798                          */
 799                         if (error == ETIMEDOUT || error == ECONNRESET) {
 800                                 svp->sv_secinfo->index = i;
 801                                 break;
 802                         }
 803                 }
 804         } else {
 805                 /* sv_currsec points to one of the entries in sv_secinfo */
 806                 if (svp->sv_currsec) {
 807                         error = sec_clnt_geth(ch_client, svp->sv_currsec, cr,
 808                             &ch_client->cl_auth);
 809                 } else {
 810                         /* If it's null, use sv_secdata. */
 811                         error = sec_clnt_geth(ch_client, svp->sv_secdata, cr,
 812                             &ch_client->cl_auth);
 813                 }
 814         }
 815         nfs_rw_exit(&svp->sv_lock);
 816 
 817         return (error);
 818 }
 819 
 820 /*
 821  * Common handle get program for NFS, NFS ACL, and NFS AUTH client.
 822  */
 823 int
 824 clget4(clinfo_t *ci, servinfo4_t *svp, cred_t *cr, CLIENT **newcl,
 825     struct chtab **chp, struct nfs4_clnt *nfscl)
 826 {
 827         struct chhead *ch, *newch;
 828         struct chhead **plistp;
 829         struct chtab *cp;
 830         int error;
 831         k_sigset_t smask;
 832 
 833         if (newcl == NULL || chp == NULL || ci == NULL)
 834                 return (EINVAL);
 835 
 836         *newcl = NULL;
 837         *chp = NULL;
 838 
 839         /*
 840          * Find an unused handle or create one
 841          */
 842         newch = NULL;
 843         nfscl->nfscl_stat.clgets.value.ui64++;
 844 top:
 845         /*
 846          * Find the correct entry in the cache to check for free
 847          * client handles.  The search is based on the RPC program
 848          * number, program version number, dev_t for the transport
 849          * device, and the protocol family.
 850          */
 851         mutex_enter(&nfscl->nfscl_chtable4_lock);
 852         plistp = &nfscl->nfscl_chtable4;
 853         for (ch = nfscl->nfscl_chtable4; ch != NULL; ch = ch->ch_next) {
 854                 if (ch->ch_prog == ci->cl_prog &&
 855                     ch->ch_vers == ci->cl_vers &&
 856                     ch->ch_dev == svp->sv_knconf->knc_rdev &&
 857                     (strcmp(ch->ch_protofmly,
 858                     svp->sv_knconf->knc_protofmly) == 0))
 859                         break;
 860                 plistp = &ch->ch_next;
 861         }
 862 
 863         /*
 864          * If we didn't find a cache entry for this quadruple, then
 865          * create one.  If we don't have one already preallocated,
 866          * then drop the cache lock, create one, and then start over.
 867          * If we did have a preallocated entry, then just add it to
 868          * the front of the list.
 869          */
 870         if (ch == NULL) {
 871                 if (newch == NULL) {
 872                         mutex_exit(&nfscl->nfscl_chtable4_lock);
 873                         newch = kmem_alloc(sizeof (*newch), KM_SLEEP);
 874                         newch->ch_timesused = 0;
 875                         newch->ch_prog = ci->cl_prog;
 876                         newch->ch_vers = ci->cl_vers;
 877                         newch->ch_dev = svp->sv_knconf->knc_rdev;
 878                         newch->ch_protofmly = kmem_alloc(
 879                             strlen(svp->sv_knconf->knc_protofmly) + 1,
 880                             KM_SLEEP);
 881                         (void) strcpy(newch->ch_protofmly,
 882                             svp->sv_knconf->knc_protofmly);
 883                         newch->ch_list = NULL;
 884                         goto top;
 885                 }
 886                 ch = newch;
 887                 newch = NULL;
 888                 ch->ch_next = nfscl->nfscl_chtable4;
 889                 nfscl->nfscl_chtable4 = ch;
 890         /*
 891          * We found a cache entry, but if it isn't on the front of the
 892          * list, then move it to the front of the list to try to take
 893          * advantage of locality of operations.
 894          */
 895         } else if (ch != nfscl->nfscl_chtable4) {
 896                 *plistp = ch->ch_next;
 897                 ch->ch_next = nfscl->nfscl_chtable4;
 898                 nfscl->nfscl_chtable4 = ch;
 899         }
 900 
 901         /*
 902          * If there was a free client handle cached, then remove it
 903          * from the list, init it, and use it.
 904          */
 905         if (ch->ch_list != NULL) {
 906                 cp = ch->ch_list;
 907                 ch->ch_list = cp->ch_list;
 908                 mutex_exit(&nfscl->nfscl_chtable4_lock);
 909                 if (newch != NULL) {
 910                         kmem_free(newch->ch_protofmly,
 911                             strlen(newch->ch_protofmly) + 1);
 912                         kmem_free(newch, sizeof (*newch));
 913                 }
 914                 (void) clnt_tli_kinit(cp->ch_client, svp->sv_knconf,
 915                     &svp->sv_addr, ci->cl_readsize, ci->cl_retrans, cr);
 916 
 917                 /*
 918                  * Get an auth handle.
 919                  */
 920                 error = authget(svp, cp->ch_client, cr);
 921                 if (error || cp->ch_client->cl_auth == NULL) {
 922                         CLNT_DESTROY(cp->ch_client);
 923                         kmem_cache_free(chtab4_cache, cp);
 924                         return ((error != 0) ? error : EINTR);
 925                 }
 926                 ch->ch_timesused++;
 927                 *newcl = cp->ch_client;
 928                 *chp = cp;
 929                 return (0);
 930         }
 931 
 932         /*
 933          * There weren't any free client handles which fit, so allocate
 934          * a new one and use that.
 935          */
 936 #ifdef DEBUG
 937         atomic_inc_64(&nfscl->nfscl_stat.clalloc.value.ui64);
 938 #endif
 939         mutex_exit(&nfscl->nfscl_chtable4_lock);
 940 
 941         nfscl->nfscl_stat.cltoomany.value.ui64++;
 942         if (newch != NULL) {
 943                 kmem_free(newch->ch_protofmly, strlen(newch->ch_protofmly) + 1);
 944                 kmem_free(newch, sizeof (*newch));
 945         }
 946 
 947         cp = kmem_cache_alloc(chtab4_cache, KM_SLEEP);
 948         cp->ch_head = ch;
 949 
 950         sigintr(&smask, (int)ci->cl_flags & MI4_INT);
 951         error = clnt_tli_kcreate(svp->sv_knconf, &svp->sv_addr, ci->cl_prog,
 952             ci->cl_vers, ci->cl_readsize, ci->cl_retrans, cr, &cp->ch_client);
 953         sigunintr(&smask);
 954 
 955         if (error != 0) {
 956                 kmem_cache_free(chtab4_cache, cp);
 957 #ifdef DEBUG
 958                 atomic_dec_64(&nfscl->nfscl_stat.clalloc.value.ui64);
 959 #endif
 960                 /*
 961                  * Warning is unnecessary if error is EINTR.
 962                  */
 963                 if (error != EINTR) {
 964                         nfs_cmn_err(error, CE_WARN,
 965                             "clget: couldn't create handle: %m\n");
 966                 }
 967                 return (error);
 968         }
 969         (void) CLNT_CONTROL(cp->ch_client, CLSET_PROGRESS, NULL);
 970         auth_destroy(cp->ch_client->cl_auth);
 971 
 972         /*
 973          * Get an auth handle.
 974          */
 975         error = authget(svp, cp->ch_client, cr);
 976         if (error || cp->ch_client->cl_auth == NULL) {
 977                 CLNT_DESTROY(cp->ch_client);
 978                 kmem_cache_free(chtab4_cache, cp);
 979 #ifdef DEBUG
 980                 atomic_dec_64(&nfscl->nfscl_stat.clalloc.value.ui64);
 981 #endif
 982                 return ((error != 0) ? error : EINTR);
 983         }
 984         ch->ch_timesused++;
 985         *newcl = cp->ch_client;
 986         ASSERT(cp->ch_client->cl_nosignal == FALSE);
 987         *chp = cp;
 988         return (0);
 989 }
 990 
 991 static int
 992 nfs_clget4(mntinfo4_t *mi, servinfo4_t *svp, cred_t *cr, CLIENT **newcl,
 993     struct chtab **chp, struct nfs4_clnt *nfscl)
 994 {
 995         clinfo_t ci;
 996         bool_t is_recov;
 997         int firstcall, error = 0;
 998 
 999         /*
1000          * Set read buffer size to rsize
1001          * and add room for RPC headers.
1002          */
1003         ci.cl_readsize = mi->mi_tsize;
1004         if (ci.cl_readsize != 0)
1005                 ci.cl_readsize += (RPC_MAXDATASIZE - NFS_MAXDATA);
1006 
1007         /*
1008          * If soft mount and server is down just try once.
1009          * meaning: do not retransmit.
1010          */
1011         if (!(mi->mi_flags & MI4_HARD) && (mi->mi_flags & MI4_DOWN))
1012                 ci.cl_retrans = 0;
1013         else
1014                 ci.cl_retrans = mi->mi_retrans;
1015 
1016         ci.cl_prog = mi->mi_prog;
1017         ci.cl_vers = mi->mi_vers;
1018         ci.cl_flags = mi->mi_flags;
1019 
1020         /*
1021          * clget4 calls authget() to get an auth handle. For RPCSEC_GSS
1022          * security flavor, the client tries to establish a security context
1023          * by contacting the server. If the connection is timed out or reset,
1024          * e.g. server reboot, we will try again.
1025          */
1026         is_recov = (curthread == mi->mi_recovthread);
1027         firstcall = 1;
1028 
1029         do {
1030                 error = clget4(&ci, svp, cr, newcl, chp, nfscl);
1031 
1032                 if (error == 0)
1033                         break;
1034 
1035                 /*
1036                  * For forced unmount and zone shutdown, bail out but
1037                  * let the recovery thread do one more transmission.
1038                  */
1039                 if ((FS_OR_ZONE_GONE4(mi->mi_vfsp)) &&
1040                     (!is_recov || !firstcall)) {
1041                         error = EIO;
1042                         break;
1043                 }
1044 
1045                 /* do not retry for soft mount */
1046                 if (!(mi->mi_flags & MI4_HARD))
1047                         break;
1048 
1049                 /* let the caller deal with the failover case */
1050                 if (FAILOVER_MOUNT4(mi))
1051                         break;
1052 
1053                 firstcall = 0;
1054 
1055         } while (error == ETIMEDOUT || error == ECONNRESET);
1056 
1057         return (error);
1058 }
1059 
1060 void
1061 clfree4(CLIENT *cl, struct chtab *cp, struct nfs4_clnt *nfscl)
1062 {
1063         if (cl->cl_auth != NULL) {
1064                 sec_clnt_freeh(cl->cl_auth);
1065                 cl->cl_auth = NULL;
1066         }
1067 
1068         /*
1069          * Timestamp this cache entry so that we know when it was last
1070          * used.
1071          */
1072         cp->ch_freed = gethrestime_sec();
1073 
1074         /*
1075          * Add the free client handle to the front of the list.
1076          * This way, the list will be sorted in youngest to oldest
1077          * order.
1078          */
1079         mutex_enter(&nfscl->nfscl_chtable4_lock);
1080         cp->ch_list = cp->ch_head->ch_list;
1081         cp->ch_head->ch_list = cp;
1082         mutex_exit(&nfscl->nfscl_chtable4_lock);
1083 }
1084 
1085 #define CL_HOLDTIME     60      /* time to hold client handles */
1086 
1087 static void
1088 clreclaim4_zone(struct nfs4_clnt *nfscl, uint_t cl_holdtime)
1089 {
1090         struct chhead *ch;
1091         struct chtab *cp;       /* list of objects that can be reclaimed */
1092         struct chtab *cpe;
1093         struct chtab *cpl;
1094         struct chtab **cpp;
1095 #ifdef DEBUG
1096         int n = 0;
1097         clstat4_debug.clreclaim.value.ui64++;
1098 #endif
1099 
1100         /*
1101          * Need to reclaim some memory, so step through the cache
1102          * looking through the lists for entries which can be freed.
1103          */
1104         cp = NULL;
1105 
1106         mutex_enter(&nfscl->nfscl_chtable4_lock);
1107 
1108         /*
1109          * Here we step through each non-NULL quadruple and start to
1110          * construct the reclaim list pointed to by cp.  Note that
1111          * cp will contain all eligible chtab entries.  When this traversal
1112          * completes, chtab entries from the last quadruple will be at the
1113          * front of cp and entries from previously inspected quadruples have
1114          * been appended to the rear of cp.
1115          */
1116         for (ch = nfscl->nfscl_chtable4; ch != NULL; ch = ch->ch_next) {
1117                 if (ch->ch_list == NULL)
1118                         continue;
1119                 /*
1120                  * Search each list for entries older then
1121                  * cl_holdtime seconds.  The lists are maintained
1122                  * in youngest to oldest order so that when the
1123                  * first entry is found which is old enough, then
1124                  * all of the rest of the entries on the list will
1125                  * be old enough as well.
1126                  */
1127                 cpl = ch->ch_list;
1128                 cpp = &ch->ch_list;
1129                 while (cpl != NULL &&
1130                     cpl->ch_freed + cl_holdtime > gethrestime_sec()) {
1131                         cpp = &cpl->ch_list;
1132                         cpl = cpl->ch_list;
1133                 }
1134                 if (cpl != NULL) {
1135                         *cpp = NULL;
1136                         if (cp != NULL) {
1137                                 cpe = cpl;
1138                                 while (cpe->ch_list != NULL)
1139                                         cpe = cpe->ch_list;
1140                                 cpe->ch_list = cp;
1141                         }
1142                         cp = cpl;
1143                 }
1144         }
1145 
1146         mutex_exit(&nfscl->nfscl_chtable4_lock);
1147 
1148         /*
1149          * If cp is empty, then there is nothing to reclaim here.
1150          */
1151         if (cp == NULL)
1152                 return;
1153 
1154         /*
1155          * Step through the list of entries to free, destroying each client
1156          * handle and kmem_free'ing the memory for each entry.
1157          */
1158         while (cp != NULL) {
1159 #ifdef DEBUG
1160                 n++;
1161 #endif
1162                 CLNT_DESTROY(cp->ch_client);
1163                 cpl = cp->ch_list;
1164                 kmem_cache_free(chtab4_cache, cp);
1165                 cp = cpl;
1166         }
1167 
1168 #ifdef DEBUG
1169         /*
1170          * Update clalloc so that nfsstat shows the current number
1171          * of allocated client handles.
1172          */
1173         atomic_add_64(&nfscl->nfscl_stat.clalloc.value.ui64, -n);
1174 #endif
1175 }
1176 
1177 /* ARGSUSED */
1178 static void
1179 clreclaim4(void *all)
1180 {
1181         struct nfs4_clnt *nfscl;
1182 
1183         /*
1184          * The system is low on memory; go through and try to reclaim some from
1185          * every zone on the system.
1186          */
1187         mutex_enter(&nfs4_clnt_list_lock);
1188         nfscl = list_head(&nfs4_clnt_list);
1189         for (; nfscl != NULL; nfscl = list_next(&nfs4_clnt_list, nfscl))
1190                 clreclaim4_zone(nfscl, CL_HOLDTIME);
1191         mutex_exit(&nfs4_clnt_list_lock);
1192 }
1193 
1194 /*
1195  * Minimum time-out values indexed by call type
1196  * These units are in "eights" of a second to avoid multiplies
1197  */
1198 static unsigned int minimum_timeo[] = {
1199         6, 7, 10
1200 };
1201 
1202 #define SHORTWAIT       (NFS_COTS_TIMEO / 10)
1203 
1204 /*
1205  * Back off for retransmission timeout, MAXTIMO is in hz of a sec
1206  */
1207 #define MAXTIMO (20*hz)
1208 #define backoff(tim)    (((tim) < MAXTIMO) ? dobackoff(tim) : (tim))
1209 #define dobackoff(tim)  ((((tim) << 1) > MAXTIMO) ? MAXTIMO : ((tim) << 1))
1210 
1211 static int
1212 nfs4_rfscall(mntinfo4_t *mi, rpcproc_t which, xdrproc_t xdrargs, caddr_t argsp,
1213     xdrproc_t xdrres, caddr_t resp, cred_t *icr, int *doqueue,
1214     enum clnt_stat *rpc_statusp, int flags, struct nfs4_clnt *nfscl)
1215 {
1216         CLIENT *client;
1217         struct chtab *ch;
1218         cred_t *cr = icr;
1219         struct rpc_err rpcerr, rpcerr_tmp;
1220         enum clnt_stat status;
1221         int error;
1222         struct timeval wait;
1223         int timeo;              /* in units of hz */
1224         bool_t tryagain, is_recov;
1225         bool_t cred_cloned = FALSE;
1226         k_sigset_t smask;
1227         servinfo4_t *svp;
1228 #ifdef DEBUG
1229         char *bufp;
1230 #endif
1231         int firstcall;
1232 
1233         rpcerr.re_status = RPC_SUCCESS;
1234 
1235         /*
1236          * If we know that we are rebooting then let's
1237          * not bother with doing any over the wireness.
1238          */
1239         mutex_enter(&mi->mi_lock);
1240         if (mi->mi_flags & MI4_SHUTDOWN) {
1241                 mutex_exit(&mi->mi_lock);
1242                 return (EIO);
1243         }
1244         mutex_exit(&mi->mi_lock);
1245 
1246         /* For TSOL, use a new cred which has net_mac_aware flag */
1247         if (!cred_cloned && is_system_labeled()) {
1248                 cred_cloned = TRUE;
1249                 cr = crdup(icr);
1250                 (void) setpflags(NET_MAC_AWARE, 1, cr);
1251         }
1252 
1253         /*
1254          * clget() calls clnt_tli_kinit() which clears the xid, so we
1255          * are guaranteed to reprocess the retry as a new request.
1256          */
1257         svp = mi->mi_curr_serv;
1258         rpcerr.re_errno = nfs_clget4(mi, svp, cr, &client, &ch, nfscl);
1259         if (rpcerr.re_errno != 0)
1260                 return (rpcerr.re_errno);
1261 
1262         timeo = (mi->mi_timeo * hz) / 10;
1263 
1264         /*
1265          * If hard mounted fs, retry call forever unless hard error
1266          * occurs.
1267          *
1268          * For forced unmount, let the recovery thread through but return
1269          * an error for all others.  This is so that user processes can
1270          * exit quickly.  The recovery thread bails out after one
1271          * transmission so that it can tell if it needs to continue.
1272          *
1273          * For zone shutdown, behave as above to encourage quick
1274          * process exit, but also fail quickly when servers have
1275          * timed out before and reduce the timeouts.
1276          */
1277         is_recov = (curthread == mi->mi_recovthread);
1278         firstcall = 1;
1279         do {
1280                 tryagain = FALSE;
1281 
1282                 NFS4_DEBUG(nfs4_rfscall_debug, (CE_NOTE,
1283                     "nfs4_rfscall: vfs_flag=0x%x, %s",
1284                     mi->mi_vfsp->vfs_flag,
1285                     is_recov ? "recov thread" : "not recov thread"));
1286 
1287                 /*
1288                  * It's possible while we're retrying the admin
1289                  * decided to reboot.
1290                  */
1291                 mutex_enter(&mi->mi_lock);
1292                 if (mi->mi_flags & MI4_SHUTDOWN) {
1293                         mutex_exit(&mi->mi_lock);
1294                         clfree4(client, ch, nfscl);
1295                         if (cred_cloned)
1296                                 crfree(cr);
1297                         return (EIO);
1298                 }
1299                 mutex_exit(&mi->mi_lock);
1300 
1301                 if ((mi->mi_vfsp->vfs_flag & VFS_UNMOUNTED) &&
1302                     (!is_recov || !firstcall)) {
1303                         clfree4(client, ch, nfscl);
1304                         if (cred_cloned)
1305                                 crfree(cr);
1306                         return (EIO);
1307                 }
1308 
1309                 if (zone_status_get(curproc->p_zone) >= ZONE_IS_SHUTTING_DOWN) {
1310                         mutex_enter(&mi->mi_lock);
1311                         if ((mi->mi_flags & MI4_TIMEDOUT) ||
1312                             !is_recov || !firstcall) {
1313                                 mutex_exit(&mi->mi_lock);
1314                                 clfree4(client, ch, nfscl);
1315                                 if (cred_cloned)
1316                                         crfree(cr);
1317                                 return (EIO);
1318                         }
1319                         mutex_exit(&mi->mi_lock);
1320                         timeo = (MIN(mi->mi_timeo, SHORTWAIT) * hz) / 10;
1321                 }
1322 
1323                 firstcall = 0;
1324                 TICK_TO_TIMEVAL(timeo, &wait);
1325 
1326                 /*
1327                  * Mask out all signals except SIGHUP, SIGINT, SIGQUIT
1328                  * and SIGTERM. (Preserving the existing masks).
1329                  * Mask out SIGINT if mount option nointr is specified.
1330                  */
1331                 sigintr(&smask, (int)mi->mi_flags & MI4_INT);
1332                 if (!(mi->mi_flags & MI4_INT))
1333                         client->cl_nosignal = TRUE;
1334 
1335                 /*
1336                  * If there is a current signal, then don't bother
1337                  * even trying to send out the request because we
1338                  * won't be able to block waiting for the response.
1339                  * Simply assume RPC_INTR and get on with it.
1340                  */
1341                 if (ttolwp(curthread) != NULL && ISSIG(curthread, JUSTLOOKING))
1342                         status = RPC_INTR;
1343                 else {
1344                         status = CLNT_CALL(client, which, xdrargs, argsp,
1345                             xdrres, resp, wait);
1346                 }
1347 
1348                 if (!(mi->mi_flags & MI4_INT))
1349                         client->cl_nosignal = FALSE;
1350                 /*
1351                  * restore original signal mask
1352                  */
1353                 sigunintr(&smask);
1354 
1355                 switch (status) {
1356                 case RPC_SUCCESS:
1357                         break;
1358 
1359                 case RPC_INTR:
1360                         /*
1361                          * There is no way to recover from this error,
1362                          * even if mount option nointr is specified.
1363                          * SIGKILL, for example, cannot be blocked.
1364                          */
1365                         rpcerr.re_status = RPC_INTR;
1366                         rpcerr.re_errno = EINTR;
1367                         break;
1368 
1369                 case RPC_UDERROR:
1370                         /*
1371                          * If the NFS server is local (vold) and
1372                          * it goes away then we get RPC_UDERROR.
1373                          * This is a retryable error, so we would
1374                          * loop, so check to see if the specific
1375                          * error was ECONNRESET, indicating that
1376                          * target did not exist at all.  If so,
1377                          * return with RPC_PROGUNAVAIL and
1378                          * ECONNRESET to indicate why.
1379                          */
1380                         CLNT_GETERR(client, &rpcerr);
1381                         if (rpcerr.re_errno == ECONNRESET) {
1382                                 rpcerr.re_status = RPC_PROGUNAVAIL;
1383                                 rpcerr.re_errno = ECONNRESET;
1384                                 break;
1385                         }
1386                         /*FALLTHROUGH*/
1387 
1388                 default:                /* probably RPC_TIMEDOUT */
1389 
1390                         if (IS_UNRECOVERABLE_RPC(status))
1391                                 break;
1392 
1393                         /*
1394                          * increment server not responding count
1395                          */
1396                         mutex_enter(&mi->mi_lock);
1397                         mi->mi_noresponse++;
1398                         mutex_exit(&mi->mi_lock);
1399 #ifdef DEBUG
1400                         nfscl->nfscl_stat.noresponse.value.ui64++;
1401 #endif
1402                         /*
1403                          * On zone shutdown, mark server dead and move on.
1404                          */
1405                         if (zone_status_get(curproc->p_zone) >=
1406                             ZONE_IS_SHUTTING_DOWN) {
1407                                 mutex_enter(&mi->mi_lock);
1408                                 mi->mi_flags |= MI4_TIMEDOUT;
1409                                 mutex_exit(&mi->mi_lock);
1410                                 clfree4(client, ch, nfscl);
1411                                 if (cred_cloned)
1412                                         crfree(cr);
1413                                 return (EIO);
1414                         }
1415 
1416                         /*
1417                          * NFS client failover support:
1418                          * return and let the caller take care of
1419                          * failover.  We only return for failover mounts
1420                          * because otherwise we want the "not responding"
1421                          * message, the timer updates, etc.
1422                          */
1423                         if (mi->mi_vers == 4 && FAILOVER_MOUNT4(mi) &&
1424                             (error = try_failover(status)) != 0) {
1425                                 clfree4(client, ch, nfscl);
1426                                 if (cred_cloned)
1427                                         crfree(cr);
1428                                 *rpc_statusp = status;
1429                                 return (error);
1430                         }
1431 
1432                         if (flags & RFSCALL_SOFT)
1433                                 break;
1434 
1435                         tryagain = TRUE;
1436 
1437                         /*
1438                          * The call is in progress (over COTS).
1439                          * Try the CLNT_CALL again, but don't
1440                          * print a noisy error message.
1441                          */
1442                         if (status == RPC_INPROGRESS)
1443                                 break;
1444 
1445                         timeo = backoff(timeo);
1446                         CLNT_GETERR(client, &rpcerr_tmp);
1447 
1448                         mutex_enter(&mi->mi_lock);
1449                         if (!(mi->mi_flags & MI4_PRINTED)) {
1450                                 mi->mi_flags |= MI4_PRINTED;
1451                                 mutex_exit(&mi->mi_lock);
1452                                 if ((status == RPC_CANTSEND) &&
1453                                     (rpcerr_tmp.re_errno == ENOBUFS))
1454                                         nfs4_queue_fact(RF_SENDQ_FULL, mi, 0,
1455                                             0, 0, FALSE, NULL, 0, NULL);
1456                                 else
1457                                         nfs4_queue_fact(RF_SRV_NOT_RESPOND, mi,
1458                                             0, 0, 0, FALSE, NULL, 0, NULL);
1459                         } else
1460                                 mutex_exit(&mi->mi_lock);
1461 
1462                         if (*doqueue && nfs_has_ctty()) {
1463                                 *doqueue = 0;
1464                                 if (!(mi->mi_flags & MI4_NOPRINT)) {
1465                                         if ((status == RPC_CANTSEND) &&
1466                                             (rpcerr_tmp.re_errno == ENOBUFS))
1467                                                 nfs4_queue_fact(RF_SENDQ_FULL,
1468                                                     mi, 0, 0, 0, FALSE, NULL,
1469                                                     0, NULL);
1470                                         else
1471                                                 nfs4_queue_fact(
1472                                                     RF_SRV_NOT_RESPOND, mi, 0,
1473                                                     0, 0, FALSE, NULL, 0, NULL);
1474                                 }
1475                         }
1476                 }
1477         } while (tryagain);
1478 
1479         DTRACE_PROBE2(nfs4__rfscall_debug, enum clnt_stat, status,
1480             int, rpcerr.re_errno);
1481 
1482         if (status != RPC_SUCCESS) {
1483                 zoneid_t zoneid = mi->mi_zone->zone_id;
1484 
1485                 /*
1486                  * Let soft mounts use the timed out message.
1487                  */
1488                 if (status == RPC_INPROGRESS)
1489                         status = RPC_TIMEDOUT;
1490                 nfscl->nfscl_stat.badcalls.value.ui64++;
1491                 if (status != RPC_INTR) {
1492                         mutex_enter(&mi->mi_lock);
1493                         mi->mi_flags |= MI4_DOWN;
1494                         mutex_exit(&mi->mi_lock);
1495                         CLNT_GETERR(client, &rpcerr);
1496 #ifdef DEBUG
1497                         bufp = clnt_sperror(client, svp->sv_hostname);
1498                         zprintf(zoneid, "NFS%d %s failed for %s\n",
1499                             mi->mi_vers, mi->mi_rfsnames[which], bufp);
1500                         if (nfs_has_ctty()) {
1501                                 if (!(mi->mi_flags & MI4_NOPRINT)) {
1502                                         uprintf("NFS%d %s failed for %s\n",
1503                                             mi->mi_vers, mi->mi_rfsnames[which],
1504                                             bufp);
1505                                 }
1506                         }
1507                         kmem_free(bufp, MAXPATHLEN);
1508 #else
1509                         zprintf(zoneid,
1510                             "NFS %s failed for server %s: error %d (%s)\n",
1511                             mi->mi_rfsnames[which], svp->sv_hostname,
1512                             status, clnt_sperrno(status));
1513                         if (nfs_has_ctty()) {
1514                                 if (!(mi->mi_flags & MI4_NOPRINT)) {
1515                                         uprintf(
1516                                 "NFS %s failed for server %s: error %d (%s)\n",
1517                                             mi->mi_rfsnames[which],
1518                                             svp->sv_hostname, status,
1519                                             clnt_sperrno(status));
1520                                 }
1521                         }
1522 #endif
1523                         /*
1524                          * when CLNT_CALL() fails with RPC_AUTHERROR,
1525                          * re_errno is set appropriately depending on
1526                          * the authentication error
1527                          */
1528                         if (status == RPC_VERSMISMATCH ||
1529                             status == RPC_PROGVERSMISMATCH)
1530                                 rpcerr.re_errno = EIO;
1531                 }
1532         } else {
1533                 /*
1534                  * Test the value of mi_down and mi_printed without
1535                  * holding the mi_lock mutex.  If they are both zero,
1536                  * then it is okay to skip the down and printed
1537                  * processing.  This saves on a mutex_enter and
1538                  * mutex_exit pair for a normal, successful RPC.
1539                  * This was just complete overhead.
1540                  */
1541                 if (mi->mi_flags & (MI4_DOWN | MI4_PRINTED)) {
1542                         mutex_enter(&mi->mi_lock);
1543                         mi->mi_flags &= ~MI4_DOWN;
1544                         if (mi->mi_flags & MI4_PRINTED) {
1545                                 mi->mi_flags &= ~MI4_PRINTED;
1546                                 mutex_exit(&mi->mi_lock);
1547                                 if (!(mi->mi_vfsp->vfs_flag & VFS_UNMOUNTED))
1548                                         nfs4_queue_fact(RF_SRV_OK, mi, 0, 0,
1549                                             0, FALSE, NULL, 0, NULL);
1550                         } else
1551                                 mutex_exit(&mi->mi_lock);
1552                 }
1553 
1554                 if (*doqueue == 0) {
1555                         if (!(mi->mi_flags & MI4_NOPRINT) &&
1556                             !(mi->mi_vfsp->vfs_flag & VFS_UNMOUNTED))
1557                                 nfs4_queue_fact(RF_SRV_OK, mi, 0, 0, 0,
1558                                     FALSE, NULL, 0, NULL);
1559 
1560                         *doqueue = 1;
1561                 }
1562         }
1563 
1564         clfree4(client, ch, nfscl);
1565         if (cred_cloned)
1566                 crfree(cr);
1567 
1568         ASSERT(rpcerr.re_status == RPC_SUCCESS || rpcerr.re_errno != 0);
1569 
1570         TRACE_1(TR_FAC_NFS, TR_RFSCALL_END, "nfs4_rfscall_end:errno %d",
1571             rpcerr.re_errno);
1572 
1573         *rpc_statusp = status;
1574         return (rpcerr.re_errno);
1575 }
1576 
1577 /*
1578  * rfs4call - general wrapper for RPC calls initiated by the client
1579  */
1580 void
1581 rfs4call(mntinfo4_t *mi, COMPOUND4args_clnt *argsp, COMPOUND4res_clnt *resp,
1582     cred_t *cr, int *doqueue, int flags, nfs4_error_t *ep)
1583 {
1584         int i, error;
1585         enum clnt_stat rpc_status = NFS4_OK;
1586         int num_resops;
1587         struct nfs4_clnt *nfscl;
1588 
1589         ASSERT(nfs_zone() == mi->mi_zone);
1590         nfscl = zone_getspecific(nfs4clnt_zone_key, nfs_zone());
1591         ASSERT(nfscl != NULL);
1592 
1593         nfscl->nfscl_stat.calls.value.ui64++;
1594         mi->mi_reqs[NFSPROC4_COMPOUND].value.ui64++;
1595 
1596         /* Set up the results struct for XDR usage */
1597         resp->argsp = argsp;
1598         resp->array = NULL;
1599         resp->status = 0;
1600         resp->decode_len = 0;
1601 
1602         error = nfs4_rfscall(mi, NFSPROC4_COMPOUND,
1603             xdr_COMPOUND4args_clnt, (caddr_t)argsp,
1604             xdr_COMPOUND4res_clnt, (caddr_t)resp, cr,
1605             doqueue, &rpc_status, flags, nfscl);
1606 
1607         /* Return now if it was an RPC error */
1608         if (error) {
1609                 ep->error = error;
1610                 ep->stat = resp->status;
1611                 ep->rpc_status = rpc_status;
1612                 return;
1613         }
1614 
1615         /* else we'll count the processed operations */
1616         num_resops = resp->decode_len;
1617         for (i = 0; i < num_resops; i++) {
1618                 /*
1619                  * Count the individual operations
1620                  * processed by the server.
1621                  */
1622                 if (resp->array[i].resop >= NFSPROC4_NULL &&
1623                     resp->array[i].resop <= OP_WRITE)
1624                         mi->mi_reqs[resp->array[i].resop].value.ui64++;
1625         }
1626 
1627         ep->error = 0;
1628         ep->stat = resp->status;
1629         ep->rpc_status = rpc_status;
1630 }
1631 
1632 /*
1633  * nfs4rename_update - updates stored state after a rename.  Currently this
1634  * is the path of the object and anything under it, and the filehandle of
1635  * the renamed object.
1636  */
1637 void
1638 nfs4rename_update(vnode_t *renvp, vnode_t *ndvp, nfs_fh4 *nfh4p, char *nnm)
1639 {
1640         sfh4_update(VTOR4(renvp)->r_fh, nfh4p);
1641         fn_move(VTOSV(renvp)->sv_name, VTOSV(ndvp)->sv_name, nnm);
1642 }
1643 
1644 /*
1645  * Routine to look up the filehandle for the given path and rootvp.
1646  *
1647  * Return values:
1648  * - success: returns zero and *statp is set to NFS4_OK, and *fhp is
1649  *   updated.
1650  * - error: return value (errno value) and/or *statp is set appropriately.
1651  */
1652 #define RML_ORDINARY    1
1653 #define RML_NAMED_ATTR  2
1654 #define RML_ATTRDIR     3
1655 
1656 static void
1657 remap_lookup(nfs4_fname_t *fname, vnode_t *rootvp,
1658     int filetype, cred_t *cr,
1659     nfs_fh4 *fhp, nfs4_ga_res_t *garp,          /* fh, attrs for object */
1660     nfs_fh4 *pfhp, nfs4_ga_res_t *pgarp,        /* fh, attrs for parent */
1661     nfs4_error_t *ep)
1662 {
1663         COMPOUND4args_clnt args;
1664         COMPOUND4res_clnt res;
1665         nfs_argop4 *argop;
1666         nfs_resop4 *resop;
1667         int num_argops;
1668         lookup4_param_t lookuparg;
1669         nfs_fh4 *tmpfhp;
1670         int doqueue = 1;
1671         char *path;
1672         mntinfo4_t *mi;
1673 
1674         ASSERT(fname != NULL);
1675         ASSERT(rootvp->v_type == VDIR);
1676 
1677         mi = VTOMI4(rootvp);
1678         path = fn_path(fname);
1679         switch (filetype) {
1680         case RML_NAMED_ATTR:
1681                 lookuparg.l4_getattrs = LKP4_LAST_NAMED_ATTR;
1682                 args.ctag = TAG_REMAP_LOOKUP_NA;
1683                 break;
1684         case RML_ATTRDIR:
1685                 lookuparg.l4_getattrs = LKP4_LAST_ATTRDIR;
1686                 args.ctag = TAG_REMAP_LOOKUP_AD;
1687                 break;
1688         case RML_ORDINARY:
1689                 lookuparg.l4_getattrs = LKP4_ALL_ATTRIBUTES;
1690                 args.ctag = TAG_REMAP_LOOKUP;
1691                 break;
1692         default:
1693                 ep->error = EINVAL;
1694                 return;
1695         }
1696         lookuparg.argsp = &args;
1697         lookuparg.resp = &res;
1698         lookuparg.header_len = 1;       /* Putfh */
1699         lookuparg.trailer_len = 0;
1700         lookuparg.ga_bits = NFS4_VATTR_MASK;
1701         lookuparg.mi = VTOMI4(rootvp);
1702 
1703         (void) nfs4lookup_setup(path, &lookuparg, 1);
1704 
1705         /* 0: putfh directory */
1706         argop = args.array;
1707         argop[0].argop = OP_CPUTFH;
1708         argop[0].nfs_argop4_u.opcputfh.sfh = VTOR4(rootvp)->r_fh;
1709 
1710         num_argops = args.array_len;
1711 
1712         rfs4call(mi, &args, &res, cr, &doqueue, RFSCALL_SOFT, ep);
1713 
1714         if (ep->error || res.status != NFS4_OK)
1715                 goto exit;
1716 
1717         /* get the object filehandle */
1718         resop = &res.array[res.array_len - 2];
1719         if (resop->resop != OP_GETFH) {
1720                 nfs4_queue_event(RE_FAIL_REMAP_OP, mi, NULL,
1721                     0, NULL, NULL, 0, NULL, 0, TAG_NONE, TAG_NONE, 0, 0);
1722                 ep->stat = NFS4ERR_SERVERFAULT;
1723                 goto exit;
1724         }
1725         tmpfhp = &resop->nfs_resop4_u.opgetfh.object;
1726         if (tmpfhp->nfs_fh4_len > NFS4_FHSIZE) {
1727                 nfs4_queue_event(RE_FAIL_REMAP_LEN, mi, NULL,
1728                     tmpfhp->nfs_fh4_len, NULL, NULL, 0, NULL, 0, TAG_NONE,
1729                     TAG_NONE, 0, 0);
1730                 ep->stat = NFS4ERR_SERVERFAULT;
1731                 goto exit;
1732         }
1733         fhp->nfs_fh4_val = kmem_alloc(tmpfhp->nfs_fh4_len, KM_SLEEP);
1734         nfs_fh4_copy(tmpfhp, fhp);
1735 
1736         /* get the object attributes */
1737         resop = &res.array[res.array_len - 1];
1738         if (garp && resop->resop == OP_GETATTR)
1739                 *garp = resop->nfs_resop4_u.opgetattr.ga_res;
1740 
1741         /* See if there are enough fields in the response for parent info */
1742         if ((int)res.array_len - 5 <= 0)
1743                 goto exit;
1744 
1745         /* get the parent filehandle */
1746         resop = &res.array[res.array_len - 5];
1747         if (resop->resop != OP_GETFH) {
1748                 nfs4_queue_event(RE_FAIL_REMAP_OP, mi, NULL,
1749                     0, NULL, NULL, 0, NULL, 0, TAG_NONE, TAG_NONE, 0, 0);
1750                 ep->stat = NFS4ERR_SERVERFAULT;
1751                 goto exit;
1752         }
1753         tmpfhp = &resop->nfs_resop4_u.opgetfh.object;
1754         if (tmpfhp->nfs_fh4_len > NFS4_FHSIZE) {
1755                 nfs4_queue_event(RE_FAIL_REMAP_LEN, mi, NULL,
1756                     tmpfhp->nfs_fh4_len, NULL, NULL, 0, NULL, 0, TAG_NONE,
1757                     TAG_NONE, 0, 0);
1758                 ep->stat = NFS4ERR_SERVERFAULT;
1759                 goto exit;
1760         }
1761         pfhp->nfs_fh4_val = kmem_alloc(tmpfhp->nfs_fh4_len, KM_SLEEP);
1762         nfs_fh4_copy(tmpfhp, pfhp);
1763 
1764         /* get the parent attributes */
1765         resop = &res.array[res.array_len - 4];
1766         if (pgarp && resop->resop == OP_GETATTR)
1767                 *pgarp = resop->nfs_resop4_u.opgetattr.ga_res;
1768 
1769 exit:
1770         /*
1771          * It is too hard to remember where all the OP_LOOKUPs are
1772          */
1773         nfs4args_lookup_free(argop, num_argops);
1774         kmem_free(argop, lookuparg.arglen * sizeof (nfs_argop4));
1775 
1776         if (!ep->error)
1777                 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res);
1778         kmem_free(path, strlen(path)+1);
1779 }
1780 
1781 /*
1782  * NFS client failover / volatile filehandle support
1783  *
1784  * Recover the filehandle for the given rnode.
1785  *
1786  * Errors are returned via the nfs4_error_t parameter.
1787  */
1788 
1789 void
1790 nfs4_remap_file(mntinfo4_t *mi, vnode_t *vp, int flags, nfs4_error_t *ep)
1791 {
1792         int is_stub;
1793         rnode4_t *rp = VTOR4(vp);
1794         vnode_t *rootvp = NULL;
1795         vnode_t *dvp = NULL;
1796         cred_t *cr, *cred_otw;
1797         nfs4_ga_res_t gar, pgar;
1798         nfs_fh4 newfh = {0, NULL}, newpfh = {0, NULL};
1799         int filetype = RML_ORDINARY;
1800         nfs4_recov_state_t recov = {NULL, 0, 0};
1801         int badfhcount = 0;
1802         nfs4_open_stream_t *osp = NULL;
1803         bool_t first_time = TRUE;       /* first time getting OTW cred */
1804         bool_t last_time = FALSE;       /* last time getting OTW cred */
1805 
1806         NFS4_DEBUG(nfs4_client_failover_debug, (CE_NOTE,
1807             "nfs4_remap_file: remapping %s", rnode4info(rp)));
1808         ASSERT(nfs4_consistent_type(vp));
1809 
1810         if (vp->v_flag & VROOT) {
1811                 nfs4_remap_root(mi, ep, flags);
1812                 return;
1813         }
1814 
1815         /*
1816          * Given the root fh, use the path stored in
1817          * the rnode to find the fh for the new server.
1818          */
1819         ep->error = VFS_ROOT(mi->mi_vfsp, &rootvp);
1820         if (ep->error != 0)
1821                 return;
1822 
1823         cr = curthread->t_cred;
1824         ASSERT(cr != NULL);
1825 get_remap_cred:
1826         /*
1827          * Releases the osp, if it is provided.
1828          * Puts a hold on the cred_otw and the new osp (if found).
1829          */
1830         cred_otw = nfs4_get_otw_cred_by_osp(rp, cr, &osp,
1831             &first_time, &last_time);
1832         ASSERT(cred_otw != NULL);
1833 
1834         if (rp->r_flags & R4ISXATTR) {
1835                 filetype = RML_NAMED_ATTR;
1836                 (void) vtodv(vp, &dvp, cred_otw, FALSE);
1837         }
1838 
1839         if (vp->v_flag & V_XATTRDIR) {
1840                 filetype = RML_ATTRDIR;
1841         }
1842 
1843         if (filetype == RML_ORDINARY && rootvp->v_type == VREG) {
1844                 /* file mount, doesn't need a remap */
1845                 goto done;
1846         }
1847 
1848 again:
1849         remap_lookup(rp->r_svnode.sv_name, rootvp, filetype, cred_otw,
1850             &newfh, &gar, &newpfh, &pgar, ep);
1851 
1852         NFS4_DEBUG(nfs4_client_failover_debug, (CE_NOTE,
1853             "nfs4_remap_file: remap_lookup returned %d/%d",
1854             ep->error, ep->stat));
1855 
1856         if (last_time == FALSE && ep->error == EACCES) {
1857                 crfree(cred_otw);
1858                 if (dvp != NULL)
1859                         VN_RELE(dvp);
1860                 goto get_remap_cred;
1861         }
1862         if (ep->error != 0)
1863                 goto done;
1864 
1865         switch (ep->stat) {
1866         case NFS4_OK:
1867                 badfhcount = 0;
1868                 if (recov.rs_flags & NFS4_RS_DELAY_MSG) {
1869                         mutex_enter(&rp->r_statelock);
1870                         rp->r_delay_interval = 0;
1871                         mutex_exit(&rp->r_statelock);
1872                         uprintf("NFS File Available..\n");
1873                 }
1874                 break;
1875         case NFS4ERR_FHEXPIRED:
1876         case NFS4ERR_BADHANDLE:
1877         case NFS4ERR_STALE:
1878                 /*
1879                  * If we ran into filehandle problems, we should try to
1880                  * remap the root vnode first and hope life gets better.
1881                  * But we need to avoid loops.
1882                  */
1883                 if (badfhcount++ > 0)
1884                         goto done;
1885                 if (newfh.nfs_fh4_len != 0) {
1886                         kmem_free(newfh.nfs_fh4_val, newfh.nfs_fh4_len);
1887                         newfh.nfs_fh4_len = 0;
1888                 }
1889                 if (newpfh.nfs_fh4_len != 0) {
1890                         kmem_free(newpfh.nfs_fh4_val, newpfh.nfs_fh4_len);
1891                         newpfh.nfs_fh4_len = 0;
1892                 }
1893                 /* relative path - remap rootvp then retry */
1894                 VN_RELE(rootvp);
1895                 rootvp = NULL;
1896                 nfs4_remap_root(mi, ep, flags);
1897                 if (ep->error != 0 || ep->stat != NFS4_OK)
1898                         goto done;
1899                 ep->error = VFS_ROOT(mi->mi_vfsp, &rootvp);
1900                 if (ep->error != 0)
1901                         goto done;
1902                 goto again;
1903         case NFS4ERR_DELAY:
1904                 badfhcount = 0;
1905                 nfs4_set_delay_wait(vp);
1906                 ep->error = nfs4_wait_for_delay(vp, &recov);
1907                 if (ep->error != 0)
1908                         goto done;
1909                 goto again;
1910         case NFS4ERR_ACCESS:
1911                 /* get new cred, try again */
1912                 if (last_time == TRUE)
1913                         goto done;
1914                 if (dvp != NULL)
1915                         VN_RELE(dvp);
1916                 crfree(cred_otw);
1917                 goto get_remap_cred;
1918         default:
1919                 goto done;
1920         }
1921 
1922         /*
1923          * Check on the new and old rnodes before updating;
1924          * if the vnode type or size changes, issue a warning
1925          * and mark the file dead.
1926          */
1927         mutex_enter(&rp->r_statelock);
1928         if (flags & NFS4_REMAP_CKATTRS) {
1929                 if (vp->v_type != gar.n4g_va.va_type ||
1930                     (vp->v_type != VDIR &&
1931                     rp->r_size != gar.n4g_va.va_size)) {
1932                         NFS4_DEBUG(nfs4_client_failover_debug, (CE_NOTE,
1933                             "nfs4_remap_file: size %d vs. %d, type %d vs. %d",
1934                             (int)rp->r_size, (int)gar.n4g_va.va_size,
1935                             vp->v_type, gar.n4g_va.va_type));
1936                         mutex_exit(&rp->r_statelock);
1937                         nfs4_queue_event(RE_FILE_DIFF, mi,
1938                             rp->r_server->sv_hostname, 0, vp, NULL, 0, NULL, 0,
1939                             TAG_NONE, TAG_NONE, 0, 0);
1940                         nfs4_fail_recov(vp, NULL, 0, NFS4_OK);
1941                         goto done;
1942                 }
1943         }
1944         ASSERT(gar.n4g_va.va_type != VNON);
1945         rp->r_server = mi->mi_curr_serv;
1946 
1947         /*
1948          * Turn this object into a "stub" object if we
1949          * crossed an underlying server fs boundary.
1950          *
1951          * This stub will be for a mirror-mount.
1952          * A referral would look like a boundary crossing
1953          * as well, but would not be the same type of object,
1954          * so we would expect to mark the object dead.
1955          *
1956          * See comment in r4_do_attrcache() for more details.
1957          */
1958         is_stub = 0;
1959         if (gar.n4g_fsid_valid) {
1960                 (void) nfs_rw_enter_sig(&rp->r_server->sv_lock, RW_READER, 0);
1961                 rp->r_srv_fsid = gar.n4g_fsid;
1962                 if (!FATTR4_FSID_EQ(&gar.n4g_fsid, &rp->r_server->sv_fsid))
1963                         is_stub = 1;
1964                 nfs_rw_exit(&rp->r_server->sv_lock);
1965 #ifdef DEBUG
1966         } else {
1967                 NFS4_DEBUG(nfs4_client_failover_debug, (CE_NOTE,
1968                     "remap_file: fsid attr not provided by server.  rp=%p",
1969                     (void *)rp));
1970 #endif
1971         }
1972         if (is_stub)
1973                 r4_stub_mirrormount(rp);
1974         else
1975                 r4_stub_none(rp);
1976         mutex_exit(&rp->r_statelock);
1977         nfs4_attrcache_noinval(vp, &gar, gethrtime()); /* force update */
1978         sfh4_update(rp->r_fh, &newfh);
1979         ASSERT(nfs4_consistent_type(vp));
1980 
1981         /*
1982          * If we got parent info, use it to update the parent
1983          */
1984         if (newpfh.nfs_fh4_len != 0) {
1985                 if (rp->r_svnode.sv_dfh != NULL)
1986                         sfh4_update(rp->r_svnode.sv_dfh, &newpfh);
1987                 if (dvp != NULL) {
1988                         /* force update of attrs */
1989                         nfs4_attrcache_noinval(dvp, &pgar, gethrtime());
1990                 }
1991         }
1992 done:
1993         if (newfh.nfs_fh4_len != 0)
1994                 kmem_free(newfh.nfs_fh4_val, newfh.nfs_fh4_len);
1995         if (newpfh.nfs_fh4_len != 0)
1996                 kmem_free(newpfh.nfs_fh4_val, newpfh.nfs_fh4_len);
1997         if (cred_otw != NULL)
1998                 crfree(cred_otw);
1999         if (rootvp != NULL)
2000                 VN_RELE(rootvp);
2001         if (dvp != NULL)
2002                 VN_RELE(dvp);
2003         if (osp != NULL)
2004                 open_stream_rele(osp, rp);
2005 }
2006 
2007 /*
2008  * Client-side failover support: remap the filehandle for vp if it appears
2009  * necessary.  errors are returned via the nfs4_error_t parameter; though,
2010  * if there is a problem, we will just try again later.
2011  */
2012 
2013 void
2014 nfs4_check_remap(mntinfo4_t *mi, vnode_t *vp, int flags, nfs4_error_t *ep)
2015 {
2016         if (vp == NULL)
2017                 return;
2018 
2019         if (!(vp->v_vfsp->vfs_flag & VFS_RDONLY))
2020                 return;
2021 
2022         if (VTOR4(vp)->r_server == mi->mi_curr_serv)
2023                 return;
2024 
2025         nfs4_remap_file(mi, vp, flags, ep);
2026 }
2027 
2028 /*
2029  * nfs4_make_dotdot() - find or create a parent vnode of a non-root node.
2030  *
2031  * Our caller has a filehandle for ".." relative to a particular
2032  * directory object.  We want to find or create a parent vnode
2033  * with that filehandle and return it.  We can of course create
2034  * a vnode from this filehandle, but we need to also make sure
2035  * that if ".." is a regular file (i.e. dvp is a V_XATTRDIR)
2036  * that we have a parent FH for future reopens as well.  If
2037  * we have a remap failure, we won't be able to reopen this
2038  * file, but we won't treat that as fatal because a reopen
2039  * is at least unlikely.  Someday nfs4_reopen() should look
2040  * for a missing parent FH and try a remap to recover from it.
2041  *
2042  * need_start_op argument indicates whether this function should
2043  * do a start_op before calling remap_lookup().  This should
2044  * be FALSE, if you are the recovery thread or in an op; otherwise,
2045  * set it to TRUE.
2046  */
2047 int
2048 nfs4_make_dotdot(nfs4_sharedfh_t *fhp, hrtime_t t, vnode_t *dvp,
2049     cred_t *cr, vnode_t **vpp, int need_start_op)
2050 {
2051         mntinfo4_t *mi = VTOMI4(dvp);
2052         nfs4_fname_t *np = NULL, *pnp = NULL;
2053         vnode_t *vp = NULL, *rootvp = NULL;
2054         rnode4_t *rp;
2055         nfs_fh4 newfh = {0, NULL}, newpfh = {0, NULL};
2056         nfs4_ga_res_t gar, pgar;
2057         vattr_t va, pva;
2058         nfs4_error_t e = { 0, NFS4_OK, RPC_SUCCESS };
2059         nfs4_sharedfh_t *sfh = NULL, *psfh = NULL;
2060         nfs4_recov_state_t recov_state;
2061 
2062 #ifdef DEBUG
2063         /*
2064          * ensure need_start_op is correct
2065          */
2066         {
2067                 int no_need_start_op = (tsd_get(nfs4_tsd_key) ||
2068                     (curthread == mi->mi_recovthread));
2069                 /* C needs a ^^ operator! */
2070                 ASSERT(((need_start_op) && (!no_need_start_op)) ||
2071                     ((! need_start_op) && (no_need_start_op)));
2072         }
2073 #endif
2074         ASSERT(VTOMI4(dvp)->mi_zone == nfs_zone());
2075 
2076         NFS4_DEBUG(nfs4_client_shadow_debug, (CE_NOTE,
2077             "nfs4_make_dotdot: called with fhp %p, dvp %s", (void *)fhp,
2078             rnode4info(VTOR4(dvp))));
2079 
2080         /*
2081          * rootvp might be needed eventually. Holding it now will
2082          * ensure that r4find_unlocked() will find it, if ".." is the root.
2083          */
2084         e.error = VFS_ROOT(mi->mi_vfsp, &rootvp);
2085         if (e.error != 0)
2086                 goto out;
2087         rp = r4find_unlocked(fhp, mi->mi_vfsp);
2088         if (rp != NULL) {
2089                 *vpp = RTOV4(rp);
2090                 VN_RELE(rootvp);
2091                 return (0);
2092         }
2093 
2094         /*
2095          * Since we don't have the rnode, we have to go over the wire.
2096          * remap_lookup() can get all of the filehandles and attributes
2097          * we need in one operation.
2098          */
2099         np = fn_parent(VTOSV(dvp)->sv_name);
2100         /* if a parent was not found return an error */
2101         if (np == NULL) {
2102                 e.error = ENOENT;
2103                 goto out;
2104         }
2105 
2106         recov_state.rs_flags = 0;
2107         recov_state.rs_num_retry_despite_err = 0;
2108 recov_retry:
2109         if (need_start_op) {
2110                 e.error = nfs4_start_fop(mi, rootvp, NULL, OH_LOOKUP,
2111                     &recov_state, NULL);
2112                 if (e.error != 0) {
2113                         goto out;
2114                 }
2115         }
2116 
2117         pgar.n4g_va.va_type = VNON;
2118         gar.n4g_va.va_type = VNON;
2119 
2120         remap_lookup(np, rootvp, RML_ORDINARY, cr,
2121             &newfh, &gar, &newpfh, &pgar, &e);
2122         if (nfs4_needs_recovery(&e, FALSE, mi->mi_vfsp)) {
2123                 if (need_start_op) {
2124                         bool_t abort;
2125 
2126                         abort = nfs4_start_recovery(&e, mi,
2127                             rootvp, NULL, NULL, NULL, OP_LOOKUP, NULL, NULL,
2128                             NULL);
2129                         if (abort) {
2130                                 nfs4_end_fop(mi, rootvp, NULL, OH_LOOKUP,
2131                                     &recov_state, FALSE);
2132                                 if (e.error == 0)
2133                                         e.error = EIO;
2134                                 goto out;
2135                         }
2136                         nfs4_end_fop(mi, rootvp, NULL, OH_LOOKUP,
2137                             &recov_state, TRUE);
2138                         goto recov_retry;
2139                 }
2140                 if (e.error == 0)
2141                         e.error = EIO;
2142                 goto out;
2143         }
2144 
2145         va = gar.n4g_va;
2146         pva = pgar.n4g_va;
2147 
2148         if ((e.error != 0) ||
2149             (va.va_type != VDIR)) {
2150                 if (need_start_op)
2151                         nfs4_end_fop(mi, rootvp, NULL, OH_LOOKUP,
2152                             &recov_state, FALSE);
2153                 if (e.error == 0)
2154                         e.error = EIO;
2155                 goto out;
2156         }
2157 
2158         if (e.stat != NFS4_OK) {
2159                 if (need_start_op)
2160                         nfs4_end_fop(mi, rootvp, NULL, OH_LOOKUP,
2161                             &recov_state, FALSE);
2162                 e.error = EIO;
2163                 goto out;
2164         }
2165 
2166         /*
2167          * It is possible for remap_lookup() to return with no error,
2168          * but without providing the parent filehandle and attrs.
2169          */
2170         if (pva.va_type != VDIR) {
2171                 /*
2172                  * Call remap_lookup() again, this time with the
2173                  * newpfh and pgar args in the first position.
2174                  */
2175                 pnp = fn_parent(np);
2176                 if (pnp != NULL) {
2177                         remap_lookup(pnp, rootvp, RML_ORDINARY, cr,
2178                             &newpfh, &pgar, NULL, NULL, &e);
2179                         /*
2180                          * This remap_lookup call modifies pgar. The following
2181                          * line prevents trouble when checking the va_type of
2182                          * pva later in this code.
2183                          */
2184                         pva = pgar.n4g_va;
2185 
2186                         if (nfs4_needs_recovery(&e, FALSE,
2187                             mi->mi_vfsp)) {
2188                                 if (need_start_op) {
2189                                         bool_t abort;
2190 
2191                                         abort = nfs4_start_recovery(&e, mi,
2192                                             rootvp, NULL, NULL, NULL,
2193                                             OP_LOOKUP, NULL, NULL, NULL);
2194                                         if (abort) {
2195                                                 nfs4_end_fop(mi, rootvp, NULL,
2196                                                     OH_LOOKUP, &recov_state,
2197                                                     FALSE);
2198                                                 if (e.error == 0)
2199                                                         e.error = EIO;
2200                                                 goto out;
2201                                         }
2202                                         nfs4_end_fop(mi, rootvp, NULL,
2203                                             OH_LOOKUP, &recov_state, TRUE);
2204                                         goto recov_retry;
2205                                 }
2206                                 if (e.error == 0)
2207                                         e.error = EIO;
2208                                 goto out;
2209                         }
2210 
2211                         if (e.stat != NFS4_OK) {
2212                                 if (need_start_op)
2213                                         nfs4_end_fop(mi, rootvp, NULL,
2214                                             OH_LOOKUP, &recov_state, FALSE);
2215                                 e.error = EIO;
2216                                 goto out;
2217                         }
2218                 }
2219                 if ((pnp == NULL) ||
2220                     (e.error != 0) ||
2221                     (pva.va_type == VNON)) {
2222                         if (need_start_op)
2223                                 nfs4_end_fop(mi, rootvp, NULL, OH_LOOKUP,
2224                                     &recov_state, FALSE);
2225                         if (e.error == 0)
2226                                 e.error = EIO;
2227                         goto out;
2228                 }
2229         }
2230         ASSERT(newpfh.nfs_fh4_len != 0);
2231         if (need_start_op)
2232                 nfs4_end_fop(mi, rootvp, NULL, OH_LOOKUP, &recov_state, FALSE);
2233         psfh = sfh4_get(&newpfh, mi);
2234 
2235         sfh = sfh4_get(&newfh, mi);
2236         vp = makenfs4node_by_fh(sfh, psfh, &np, &gar, mi, cr, t);
2237 
2238 out:
2239         if (np != NULL)
2240                 fn_rele(&np);
2241         if (pnp != NULL)
2242                 fn_rele(&pnp);
2243         if (newfh.nfs_fh4_len != 0)
2244                 kmem_free(newfh.nfs_fh4_val, newfh.nfs_fh4_len);
2245         if (newpfh.nfs_fh4_len != 0)
2246                 kmem_free(newpfh.nfs_fh4_val, newpfh.nfs_fh4_len);
2247         if (sfh != NULL)
2248                 sfh4_rele(&sfh);
2249         if (psfh != NULL)
2250                 sfh4_rele(&psfh);
2251         if (rootvp != NULL)
2252                 VN_RELE(rootvp);
2253         *vpp = vp;
2254         return (e.error);
2255 }
2256 
2257 #ifdef DEBUG
2258 size_t r_path_memuse = 0;
2259 #endif
2260 
2261 /*
2262  * NFS client failover support
2263  *
2264  * sv4_free() frees the malloc'd portion of a "servinfo_t".
2265  */
2266 void
2267 sv4_free(servinfo4_t *svp)
2268 {
2269         servinfo4_t *next;
2270         struct knetconfig *knconf;
2271 
2272         while (svp != NULL) {
2273                 next = svp->sv_next;
2274                 if (svp->sv_dhsec)
2275                         sec_clnt_freeinfo(svp->sv_dhsec);
2276                 if (svp->sv_secdata)
2277                         sec_clnt_freeinfo(svp->sv_secdata);
2278                 if (svp->sv_save_secinfo &&
2279                     svp->sv_save_secinfo != svp->sv_secinfo)
2280                         secinfo_free(svp->sv_save_secinfo);
2281                 if (svp->sv_secinfo)
2282                         secinfo_free(svp->sv_secinfo);
2283                 if (svp->sv_hostname && svp->sv_hostnamelen > 0)
2284                         kmem_free(svp->sv_hostname, svp->sv_hostnamelen);
2285                 knconf = svp->sv_knconf;
2286                 if (knconf != NULL) {
2287                         if (knconf->knc_protofmly != NULL)
2288                                 kmem_free(knconf->knc_protofmly, KNC_STRSIZE);
2289                         if (knconf->knc_proto != NULL)
2290                                 kmem_free(knconf->knc_proto, KNC_STRSIZE);
2291                         kmem_free(knconf, sizeof (*knconf));
2292                 }
2293                 knconf = svp->sv_origknconf;
2294                 if (knconf != NULL) {
2295                         if (knconf->knc_protofmly != NULL)
2296                                 kmem_free(knconf->knc_protofmly, KNC_STRSIZE);
2297                         if (knconf->knc_proto != NULL)
2298                                 kmem_free(knconf->knc_proto, KNC_STRSIZE);
2299                         kmem_free(knconf, sizeof (*knconf));
2300                 }
2301                 if (svp->sv_addr.buf != NULL && svp->sv_addr.maxlen != 0)
2302                         kmem_free(svp->sv_addr.buf, svp->sv_addr.maxlen);
2303                 if (svp->sv_path != NULL) {
2304                         kmem_free(svp->sv_path, svp->sv_pathlen);
2305                 }
2306                 nfs_rw_destroy(&svp->sv_lock);
2307                 kmem_free(svp, sizeof (*svp));
2308                 svp = next;
2309         }
2310 }
2311 
2312 void
2313 nfs4_printfhandle(nfs4_fhandle_t *fhp)
2314 {
2315         int *ip;
2316         char *buf;
2317         size_t bufsize;
2318         char *cp;
2319 
2320         /*
2321          * 13 == "(file handle:"
2322          * maximum of NFS_FHANDLE / sizeof (*ip) elements in fh_buf times
2323          *      1 == ' '
2324          *      8 == maximum strlen of "%x"
2325          * 3 == ")\n\0"
2326          */
2327         bufsize = 13 + ((NFS_FHANDLE_LEN / sizeof (*ip)) * (1 + 8)) + 3;
2328         buf = kmem_alloc(bufsize, KM_NOSLEEP);
2329         if (buf == NULL)
2330                 return;
2331 
2332         cp = buf;
2333         (void) strcpy(cp, "(file handle:");
2334         while (*cp != '\0')
2335                 cp++;
2336         for (ip = (int *)fhp->fh_buf;
2337             ip < (int *)&fhp->fh_buf[fhp->fh_len];
2338             ip++) {
2339                 (void) sprintf(cp, " %x", *ip);
2340                 while (*cp != '\0')
2341                         cp++;
2342         }
2343         (void) strcpy(cp, ")\n");
2344 
2345         zcmn_err(getzoneid(), CE_CONT, "%s", buf);
2346 
2347         kmem_free(buf, bufsize);
2348 }
2349 
2350 /*
2351  * The NFSv4 readdir cache subsystem.
2352  *
2353  * We provide a set of interfaces to allow the rest of the system to utilize
2354  * a caching mechanism while encapsulating the details of the actual
2355  * implementation.  This should allow for better maintainability and
2356  * extensibility by consolidating the implementation details in one location.
2357  */
2358 
2359 /*
2360  * Comparator used by AVL routines.
2361  */
2362 static int
2363 rddir4_cache_compar(const void *x, const void *y)
2364 {
2365         rddir4_cache_impl *ai = (rddir4_cache_impl *)x;
2366         rddir4_cache_impl *bi = (rddir4_cache_impl *)y;
2367         rddir4_cache *a = &ai->rc;
2368         rddir4_cache *b = &bi->rc;
2369 
2370         if (a->nfs4_cookie == b->nfs4_cookie) {
2371                 if (a->buflen == b->buflen)
2372                         return (0);
2373                 if (a->buflen < b->buflen)
2374                         return (-1);
2375                 return (1);
2376         }
2377 
2378         if (a->nfs4_cookie < b->nfs4_cookie)
2379                         return (-1);
2380 
2381         return (1);
2382 }
2383 
2384 /*
2385  * Allocate an opaque handle for the readdir cache.
2386  */
2387 void
2388 rddir4_cache_create(rnode4_t *rp)
2389 {
2390         ASSERT(rp->r_dir == NULL);
2391 
2392         rp->r_dir = kmem_alloc(sizeof (avl_tree_t), KM_SLEEP);
2393 
2394         avl_create(rp->r_dir, rddir4_cache_compar, sizeof (rddir4_cache_impl),
2395             offsetof(rddir4_cache_impl, tree));
2396 }
2397 
2398 /*
2399  *  Purge the cache of all cached readdir responses.
2400  */
2401 void
2402 rddir4_cache_purge(rnode4_t *rp)
2403 {
2404         rddir4_cache_impl       *rdip;
2405         rddir4_cache_impl       *nrdip;
2406 
2407         ASSERT(MUTEX_HELD(&rp->r_statelock));
2408 
2409         if (rp->r_dir == NULL)
2410                 return;
2411 
2412         rdip = avl_first(rp->r_dir);
2413 
2414         while (rdip != NULL) {
2415                 nrdip = AVL_NEXT(rp->r_dir, rdip);
2416                 avl_remove(rp->r_dir, rdip);
2417                 rdip->rc.flags &= ~RDDIRCACHED;
2418                 rddir4_cache_rele(rp, &rdip->rc);
2419                 rdip = nrdip;
2420         }
2421         ASSERT(avl_numnodes(rp->r_dir) == 0);
2422 }
2423 
2424 /*
2425  * Destroy the readdir cache.
2426  */
2427 void
2428 rddir4_cache_destroy(rnode4_t *rp)
2429 {
2430         ASSERT(MUTEX_HELD(&rp->r_statelock));
2431         if (rp->r_dir == NULL)
2432                 return;
2433 
2434         rddir4_cache_purge(rp);
2435         avl_destroy(rp->r_dir);
2436         kmem_free(rp->r_dir, sizeof (avl_tree_t));
2437         rp->r_dir = NULL;
2438 }
2439 
2440 /*
2441  * Locate a readdir response from the readdir cache.
2442  *
2443  * Return values:
2444  *
2445  * NULL - If there is an unrecoverable situation like the operation may have
2446  *        been interrupted.
2447  *
2448  * rddir4_cache * - A pointer to a rddir4_cache is returned to the caller.
2449  *                  The flags are set approprately, such that the caller knows
2450  *                  what state the entry is in.
2451  */
2452 rddir4_cache *
2453 rddir4_cache_lookup(rnode4_t *rp, offset_t cookie, int count)
2454 {
2455         rddir4_cache_impl       *rdip = NULL;
2456         rddir4_cache_impl       srdip;
2457         rddir4_cache            *srdc;
2458         rddir4_cache            *rdc = NULL;
2459         rddir4_cache            *nrdc = NULL;
2460         avl_index_t             where;
2461 
2462 top:
2463         ASSERT(nfs_rw_lock_held(&rp->r_rwlock, RW_READER));
2464         ASSERT(MUTEX_HELD(&rp->r_statelock));
2465         /*
2466          * Check to see if the readdir cache has been disabled.  If so, then
2467          * simply allocate an rddir4_cache entry and return it, since caching
2468          * operations do not apply.
2469          */
2470         if (rp->r_dir == NULL) {
2471                 if (nrdc == NULL) {
2472                         /*
2473                          * Drop the lock because we are doing a sleeping
2474                          * allocation.
2475                          */
2476                         mutex_exit(&rp->r_statelock);
2477                         rdc = rddir4_cache_alloc(KM_SLEEP);
2478                         rdc->nfs4_cookie = cookie;
2479                         rdc->buflen = count;
2480                         mutex_enter(&rp->r_statelock);
2481                         return (rdc);
2482                 }
2483                 return (nrdc);
2484         }
2485 
2486         srdc = &srdip.rc;
2487         srdc->nfs4_cookie = cookie;
2488         srdc->buflen = count;
2489 
2490         rdip = avl_find(rp->r_dir, &srdip, &where);
2491 
2492         /*
2493          * If we didn't find an entry then create one and insert it
2494          * into the cache.
2495          */
2496         if (rdip == NULL) {
2497                 /*
2498                  * Check for the case where we have made a second pass through
2499                  * the cache due to a lockless allocation.  If we find that no
2500                  * thread has already inserted this entry, do the insert now
2501                  * and return.
2502                  */
2503                 if (nrdc != NULL) {
2504                         avl_insert(rp->r_dir, nrdc->data, where);
2505                         nrdc->flags |= RDDIRCACHED;
2506                         rddir4_cache_hold(nrdc);
2507                         return (nrdc);
2508                 }
2509 
2510 #ifdef DEBUG
2511                 nfs4_readdir_cache_misses++;
2512 #endif
2513                 /*
2514                  * First, try to allocate an entry without sleeping.  If that
2515                  * fails then drop the lock and do a sleeping allocation.
2516                  */
2517                 nrdc = rddir4_cache_alloc(KM_NOSLEEP);
2518                 if (nrdc != NULL) {
2519                         nrdc->nfs4_cookie = cookie;
2520                         nrdc->buflen = count;
2521                         avl_insert(rp->r_dir, nrdc->data, where);
2522                         nrdc->flags |= RDDIRCACHED;
2523                         rddir4_cache_hold(nrdc);
2524                         return (nrdc);
2525                 }
2526 
2527                 /*
2528                  * Drop the lock and do a sleeping allocation.  We incur
2529                  * additional overhead by having to search the cache again,
2530                  * but this case should be rare.
2531                  */
2532                 mutex_exit(&rp->r_statelock);
2533                 nrdc = rddir4_cache_alloc(KM_SLEEP);
2534                 nrdc->nfs4_cookie = cookie;
2535                 nrdc->buflen = count;
2536                 mutex_enter(&rp->r_statelock);
2537                 /*
2538                  * We need to take another pass through the cache
2539                  * since we dropped our lock to perform the alloc.
2540                  * Another thread may have come by and inserted the
2541                  * entry we are interested in.
2542                  */
2543                 goto top;
2544         }
2545 
2546         /*
2547          * Check to see if we need to free our entry.  This can happen if
2548          * another thread came along beat us to the insert.  We can
2549          * safely call rddir4_cache_free directly because no other thread
2550          * would have a reference to this entry.
2551          */
2552         if (nrdc != NULL)
2553                 rddir4_cache_free((rddir4_cache_impl *)nrdc->data);
2554 
2555 #ifdef DEBUG
2556         nfs4_readdir_cache_hits++;
2557 #endif
2558         /*
2559          * Found something.  Make sure it's ready to return.
2560          */
2561         rdc = &rdip->rc;
2562         rddir4_cache_hold(rdc);
2563         /*
2564          * If the cache entry is in the process of being filled in, wait
2565          * until this completes.  The RDDIRWAIT bit is set to indicate that
2566          * someone is waiting and when the thread currently filling the entry
2567          * is done, it should do a cv_broadcast to wakeup all of the threads
2568          * waiting for it to finish. If the thread wakes up to find that
2569          * someone new is now trying to complete the the entry, go back
2570          * to sleep.
2571          */
2572         while (rdc->flags & RDDIR) {
2573                 /*
2574                  * The entry is not complete.
2575                  */
2576                 nfs_rw_exit(&rp->r_rwlock);
2577                 rdc->flags |= RDDIRWAIT;
2578 #ifdef DEBUG
2579                 nfs4_readdir_cache_waits++;
2580 #endif
2581                 while (rdc->flags & RDDIRWAIT) {
2582                         if (!cv_wait_sig(&rdc->cv, &rp->r_statelock)) {
2583                                 /*
2584                                  * We got interrupted, probably the user
2585                                  * typed ^C or an alarm fired.  We free the
2586                                  * new entry if we allocated one.
2587                                  */
2588                                 rddir4_cache_rele(rp, rdc);
2589                                 mutex_exit(&rp->r_statelock);
2590                                 (void) nfs_rw_enter_sig(&rp->r_rwlock,
2591                                     RW_READER, FALSE);
2592                                 mutex_enter(&rp->r_statelock);
2593                                 return (NULL);
2594                         }
2595                 }
2596                 mutex_exit(&rp->r_statelock);
2597                 (void) nfs_rw_enter_sig(&rp->r_rwlock,
2598                     RW_READER, FALSE);
2599                 mutex_enter(&rp->r_statelock);
2600         }
2601 
2602         /*
2603          * The entry we were waiting on may have been purged from
2604          * the cache and should no longer be used, release it and
2605          * start over.
2606          */
2607         if (!(rdc->flags & RDDIRCACHED)) {
2608                 rddir4_cache_rele(rp, rdc);
2609                 goto top;
2610         }
2611 
2612         /*
2613          * The entry is completed.  Return it.
2614          */
2615         return (rdc);
2616 }
2617 
2618 /*
2619  * Allocate a cache element and return it.  Can return NULL if memory is
2620  * low.
2621  */
2622 static rddir4_cache *
2623 rddir4_cache_alloc(int flags)
2624 {
2625         rddir4_cache_impl       *rdip = NULL;
2626         rddir4_cache            *rc = NULL;
2627 
2628         rdip = kmem_alloc(sizeof (rddir4_cache_impl), flags);
2629 
2630         if (rdip != NULL) {
2631                 rc = &rdip->rc;
2632                 rc->data = (void *)rdip;
2633                 rc->nfs4_cookie = 0;
2634                 rc->nfs4_ncookie = 0;
2635                 rc->entries = NULL;
2636                 rc->eof = 0;
2637                 rc->entlen = 0;
2638                 rc->buflen = 0;
2639                 rc->actlen = 0;
2640                 /*
2641                  * A readdir is required so set the flag.
2642                  */
2643                 rc->flags = RDDIRREQ;
2644                 cv_init(&rc->cv, NULL, CV_DEFAULT, NULL);
2645                 rc->error = 0;
2646                 mutex_init(&rdip->lock, NULL, MUTEX_DEFAULT, NULL);
2647                 rdip->count = 1;
2648 #ifdef DEBUG
2649                 atomic_inc_64(&clstat4_debug.dirent.value.ui64);
2650 #endif
2651         }
2652         return (rc);
2653 }
2654 
2655 /*
2656  * Increment the reference count to this cache element.
2657  */
2658 static void
2659 rddir4_cache_hold(rddir4_cache *rc)
2660 {
2661         rddir4_cache_impl *rdip = (rddir4_cache_impl *)rc->data;
2662 
2663         mutex_enter(&rdip->lock);
2664         rdip->count++;
2665         mutex_exit(&rdip->lock);
2666 }
2667 
2668 /*
2669  * Release a reference to this cache element.  If the count is zero then
2670  * free the element.
2671  */
2672 void
2673 rddir4_cache_rele(rnode4_t *rp, rddir4_cache *rdc)
2674 {
2675         rddir4_cache_impl *rdip = (rddir4_cache_impl *)rdc->data;
2676 
2677         ASSERT(MUTEX_HELD(&rp->r_statelock));
2678 
2679         /*
2680          * Check to see if we have any waiters.  If so, we can wake them
2681          * so that they can proceed.
2682          */
2683         if (rdc->flags & RDDIRWAIT) {
2684                 rdc->flags &= ~RDDIRWAIT;
2685                 cv_broadcast(&rdc->cv);
2686         }
2687 
2688         mutex_enter(&rdip->lock);
2689         ASSERT(rdip->count > 0);
2690         if (--rdip->count == 0) {
2691                 mutex_exit(&rdip->lock);
2692                 rddir4_cache_free(rdip);
2693         } else
2694                 mutex_exit(&rdip->lock);
2695 }
2696 
2697 /*
2698  * Free a cache element.
2699  */
2700 static void
2701 rddir4_cache_free(rddir4_cache_impl *rdip)
2702 {
2703         rddir4_cache *rc = &rdip->rc;
2704 
2705 #ifdef DEBUG
2706         atomic_dec_64(&clstat4_debug.dirent.value.ui64);
2707 #endif
2708         if (rc->entries != NULL)
2709                 kmem_free(rc->entries, rc->buflen);
2710         cv_destroy(&rc->cv);
2711         mutex_destroy(&rdip->lock);
2712         kmem_free(rdip, sizeof (*rdip));
2713 }
2714 
2715 /*
2716  * Snapshot callback for nfs:0:nfs4_client as registered with the kstat
2717  * framework.
2718  */
2719 static int
2720 cl4_snapshot(kstat_t *ksp, void *buf, int rw)
2721 {
2722         ksp->ks_snaptime = gethrtime();
2723         if (rw == KSTAT_WRITE) {
2724                 bcopy(buf, ksp->ks_private, sizeof (clstat4_tmpl));
2725 #ifdef DEBUG
2726                 /*
2727                  * Currently only the global zone can write to kstats, but we
2728                  * add the check just for paranoia.
2729                  */
2730                 if (INGLOBALZONE(curproc))
2731                         bcopy((char *)buf + sizeof (clstat4_tmpl),
2732                             &clstat4_debug, sizeof (clstat4_debug));
2733 #endif
2734         } else {
2735                 bcopy(ksp->ks_private, buf, sizeof (clstat4_tmpl));
2736 #ifdef DEBUG
2737                 /*
2738                  * If we're displaying the "global" debug kstat values, we
2739                  * display them as-is to all zones since in fact they apply to
2740                  * the system as a whole.
2741                  */
2742                 bcopy(&clstat4_debug, (char *)buf + sizeof (clstat4_tmpl),
2743                     sizeof (clstat4_debug));
2744 #endif
2745         }
2746         return (0);
2747 }
2748 
2749 
2750 
2751 /*
2752  * Zone support
2753  */
2754 static void *
2755 clinit4_zone(zoneid_t zoneid)
2756 {
2757         kstat_t *nfs4_client_kstat;
2758         struct nfs4_clnt *nfscl;
2759         uint_t ndata;
2760 
2761         nfscl = kmem_alloc(sizeof (*nfscl), KM_SLEEP);
2762         mutex_init(&nfscl->nfscl_chtable4_lock, NULL, MUTEX_DEFAULT, NULL);
2763         nfscl->nfscl_chtable4 = NULL;
2764         nfscl->nfscl_zoneid = zoneid;
2765 
2766         bcopy(&clstat4_tmpl, &nfscl->nfscl_stat, sizeof (clstat4_tmpl));
2767         ndata = sizeof (clstat4_tmpl) / sizeof (kstat_named_t);
2768 #ifdef DEBUG
2769         ndata += sizeof (clstat4_debug) / sizeof (kstat_named_t);
2770 #endif
2771         if ((nfs4_client_kstat = kstat_create_zone("nfs", 0, "nfs4_client",
2772             "misc", KSTAT_TYPE_NAMED, ndata,
2773             KSTAT_FLAG_VIRTUAL | KSTAT_FLAG_WRITABLE, zoneid)) != NULL) {
2774                 nfs4_client_kstat->ks_private = &nfscl->nfscl_stat;
2775                 nfs4_client_kstat->ks_snapshot = cl4_snapshot;
2776                 kstat_install(nfs4_client_kstat);
2777         }
2778         mutex_enter(&nfs4_clnt_list_lock);
2779         list_insert_head(&nfs4_clnt_list, nfscl);
2780         mutex_exit(&nfs4_clnt_list_lock);
2781 
2782         return (nfscl);
2783 }
2784 
2785 /*ARGSUSED*/
2786 static void
2787 clfini4_zone(zoneid_t zoneid, void *arg)
2788 {
2789         struct nfs4_clnt *nfscl = arg;
2790         chhead_t *chp, *next;
2791 
2792         if (nfscl == NULL)
2793                 return;
2794         mutex_enter(&nfs4_clnt_list_lock);
2795         list_remove(&nfs4_clnt_list, nfscl);
2796         mutex_exit(&nfs4_clnt_list_lock);
2797         clreclaim4_zone(nfscl, 0);
2798         for (chp = nfscl->nfscl_chtable4; chp != NULL; chp = next) {
2799                 ASSERT(chp->ch_list == NULL);
2800                 kmem_free(chp->ch_protofmly, strlen(chp->ch_protofmly) + 1);
2801                 next = chp->ch_next;
2802                 kmem_free(chp, sizeof (*chp));
2803         }
2804         kstat_delete_byname_zone("nfs", 0, "nfs4_client", zoneid);
2805         mutex_destroy(&nfscl->nfscl_chtable4_lock);
2806         kmem_free(nfscl, sizeof (*nfscl));
2807 }
2808 
2809 /*
2810  * Called by endpnt_destructor to make sure the client handles are
2811  * cleaned up before the RPC endpoints.  This becomes a no-op if
2812  * clfini_zone (above) is called first.  This function is needed
2813  * (rather than relying on clfini_zone to clean up) because the ZSD
2814  * callbacks have no ordering mechanism, so we have no way to ensure
2815  * that clfini_zone is called before endpnt_destructor.
2816  */
2817 void
2818 clcleanup4_zone(zoneid_t zoneid)
2819 {
2820         struct nfs4_clnt *nfscl;
2821 
2822         mutex_enter(&nfs4_clnt_list_lock);
2823         nfscl = list_head(&nfs4_clnt_list);
2824         for (; nfscl != NULL; nfscl = list_next(&nfs4_clnt_list, nfscl)) {
2825                 if (nfscl->nfscl_zoneid == zoneid) {
2826                         clreclaim4_zone(nfscl, 0);
2827                         break;
2828                 }
2829         }
2830         mutex_exit(&nfs4_clnt_list_lock);
2831 }
2832 
2833 int
2834 nfs4_subr_init(void)
2835 {
2836         /*
2837          * Allocate and initialize the client handle cache
2838          */
2839         chtab4_cache = kmem_cache_create("client_handle4_cache",
2840             sizeof (struct chtab), 0, NULL, NULL, clreclaim4, NULL,
2841             NULL, 0);
2842 
2843         /*
2844          * Initialize the list of per-zone client handles (and associated data).
2845          * This needs to be done before we call zone_key_create().
2846          */
2847         list_create(&nfs4_clnt_list, sizeof (struct nfs4_clnt),
2848             offsetof(struct nfs4_clnt, nfscl_node));
2849 
2850         /*
2851          * Initialize the zone_key for per-zone client handle lists.
2852          */
2853         zone_key_create(&nfs4clnt_zone_key, clinit4_zone, NULL, clfini4_zone);
2854 
2855         if (nfs4err_delay_time == 0)
2856                 nfs4err_delay_time = NFS4ERR_DELAY_TIME;
2857 
2858         return (0);
2859 }
2860 
2861 int
2862 nfs4_subr_fini(void)
2863 {
2864         /*
2865          * Deallocate the client handle cache
2866          */
2867         kmem_cache_destroy(chtab4_cache);
2868 
2869         /*
2870          * Destroy the zone_key
2871          */
2872         (void) zone_key_delete(nfs4clnt_zone_key);
2873 
2874         return (0);
2875 }
2876 /*
2877  * Set or Clear direct I/O flag
2878  * VOP_RWLOCK() is held for write access to prevent a race condition
2879  * which would occur if a process is in the middle of a write when
2880  * directio flag gets set. It is possible that all pages may not get flushed.
2881  *
2882  * This is a copy of nfs_directio, changes here may need to be made
2883  * there and vice versa.
2884  */
2885 
2886 int
2887 nfs4_directio(vnode_t *vp, int cmd, cred_t *cr)
2888 {
2889         int     error = 0;
2890         rnode4_t *rp;
2891 
2892         rp = VTOR4(vp);
2893 
2894         if (cmd == DIRECTIO_ON) {
2895 
2896                 if (rp->r_flags & R4DIRECTIO)
2897                         return (0);
2898 
2899                 /*
2900                  * Flush the page cache.
2901                  */
2902 
2903                 (void) VOP_RWLOCK(vp, V_WRITELOCK_TRUE, NULL);
2904 
2905                 if (rp->r_flags & R4DIRECTIO) {
2906                         VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, NULL);
2907                         return (0);
2908                 }
2909 
2910                 if (nfs4_has_pages(vp) &&
2911                     ((rp->r_flags & R4DIRTY) || rp->r_awcount > 0)) {
2912                         error = VOP_PUTPAGE(vp, (offset_t)0, (uint_t)0,
2913                             B_INVAL, cr, NULL);
2914                         if (error) {
2915                                 if (error == ENOSPC || error == EDQUOT) {
2916                                         mutex_enter(&rp->r_statelock);
2917                                         if (!rp->r_error)
2918                                                 rp->r_error = error;
2919                                         mutex_exit(&rp->r_statelock);
2920                                 }
2921                                 VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, NULL);
2922                                 return (error);
2923                         }
2924                 }
2925 
2926                 mutex_enter(&rp->r_statelock);
2927                 rp->r_flags |= R4DIRECTIO;
2928                 mutex_exit(&rp->r_statelock);
2929                 VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, NULL);
2930                 return (0);
2931         }
2932 
2933         if (cmd == DIRECTIO_OFF) {
2934                 mutex_enter(&rp->r_statelock);
2935                 rp->r_flags &= ~R4DIRECTIO;      /* disable direct mode */
2936                 mutex_exit(&rp->r_statelock);
2937                 return (0);
2938         }
2939 
2940         return (EINVAL);
2941 }
2942 
2943 /*
2944  * Return TRUE if the file has any pages.  Always go back to
2945  * the master vnode to check v_pages since none of the shadows
2946  * can have pages.
2947  */
2948 
2949 bool_t
2950 nfs4_has_pages(vnode_t *vp)
2951 {
2952         rnode4_t *rp;
2953 
2954         rp = VTOR4(vp);
2955         if (IS_SHADOW(vp, rp))
2956                 vp = RTOV4(rp); /* RTOV4 always gives the master */
2957 
2958         return (vn_has_cached_data(vp));
2959 }
2960 
2961 /*
2962  * This table is used to determine whether the client should attempt
2963  * failover based on the clnt_stat value returned by CLNT_CALL.  The
2964  * clnt_stat is used as an index into the table.  If
2965  * the error value that corresponds to the clnt_stat value in the
2966  * table is non-zero, then that is the error to be returned AND
2967  * that signals that failover should be attempted.
2968  *
2969  * Special note: If the RPC_ values change, then direct indexing of the
2970  * table is no longer valid, but having the RPC_ values in the table
2971  * allow the functions to detect the change and issue a warning.
2972  * In this case, the code will always attempt failover as a defensive
2973  * measure.
2974  */
2975 
2976 static struct try_failover_tab {
2977         enum clnt_stat  cstat;
2978         int             error;
2979 } try_failover_table [] = {
2980 
2981         { RPC_SUCCESS,          0 },
2982         { RPC_CANTENCODEARGS,   0 },
2983         { RPC_CANTDECODERES,    0 },
2984         { RPC_CANTSEND,         ECOMM },
2985         { RPC_CANTRECV,         ECOMM },
2986         { RPC_TIMEDOUT,         ETIMEDOUT },
2987         { RPC_VERSMISMATCH,     0 },
2988         { RPC_AUTHERROR,        0 },
2989         { RPC_PROGUNAVAIL,      0 },
2990         { RPC_PROGVERSMISMATCH, 0 },
2991         { RPC_PROCUNAVAIL,      0 },
2992         { RPC_CANTDECODEARGS,   0 },
2993         { RPC_SYSTEMERROR,      ENOSR },
2994         { RPC_UNKNOWNHOST,      EHOSTUNREACH },
2995         { RPC_RPCBFAILURE,      ENETUNREACH },
2996         { RPC_PROGNOTREGISTERED,        ECONNREFUSED },
2997         { RPC_FAILED,           ETIMEDOUT },
2998         { RPC_UNKNOWNPROTO,     EHOSTUNREACH },
2999         { RPC_INTR,             0 },
3000         { RPC_UNKNOWNADDR,      EHOSTUNREACH },
3001         { RPC_TLIERROR,         0 },
3002         { RPC_NOBROADCAST,      EHOSTUNREACH },
3003         { RPC_N2AXLATEFAILURE,  ECONNREFUSED },
3004         { RPC_UDERROR,          0 },
3005         { RPC_INPROGRESS,       0 },
3006         { RPC_STALERACHANDLE,   EINVAL },
3007         { RPC_CANTCONNECT,      ECONNREFUSED },
3008         { RPC_XPRTFAILED,       ECONNABORTED },
3009         { RPC_CANTCREATESTREAM, ECONNREFUSED },
3010         { RPC_CANTSTORE,        ENOBUFS }
3011 };
3012 
3013 /*
3014  * nfs4_try_failover - determine whether the client should
3015  * attempt failover based on the values stored in the nfs4_error_t.
3016  */
3017 int
3018 nfs4_try_failover(nfs4_error_t *ep)
3019 {
3020         if (ep->error == ETIMEDOUT || ep->stat == NFS4ERR_RESOURCE)
3021                 return (TRUE);
3022 
3023         if (ep->error && ep->rpc_status != RPC_SUCCESS)
3024                 return (try_failover(ep->rpc_status) != 0 ? TRUE : FALSE);
3025 
3026         return (FALSE);
3027 }
3028 
3029 /*
3030  * try_failover - internal version of nfs4_try_failover, called
3031  * only by rfscall and aclcall.  Determine if failover is warranted
3032  * based on the clnt_stat and return the error number if it is.
3033  */
3034 static int
3035 try_failover(enum clnt_stat rpc_status)
3036 {
3037         int err = 0;
3038 
3039         if (rpc_status == RPC_SUCCESS)
3040                 return (0);
3041 
3042 #ifdef  DEBUG
3043         if (rpc_status != 0 && nfs4_try_failover_any) {
3044                 err = ETIMEDOUT;
3045                 goto done;
3046         }
3047 #endif
3048         /*
3049          * The rpc status is used as an index into the table.
3050          * If the rpc status is outside of the range of the
3051          * table or if the rpc error numbers have been changed
3052          * since the table was constructed, then print a warning
3053          * (DEBUG only) and try failover anyway.  Otherwise, just
3054          * grab the resulting error number out of the table.
3055          */
3056         if (rpc_status < RPC_SUCCESS || rpc_status >=
3057             sizeof (try_failover_table)/sizeof (try_failover_table[0]) ||
3058             try_failover_table[rpc_status].cstat != rpc_status) {
3059 
3060                 err = ETIMEDOUT;
3061 #ifdef  DEBUG
3062                 cmn_err(CE_NOTE, "try_failover: unexpected rpc error %d",
3063                     rpc_status);
3064 #endif
3065         } else
3066                 err = try_failover_table[rpc_status].error;
3067 
3068 done:
3069         if (rpc_status)
3070                 NFS4_DEBUG(nfs4_client_failover_debug, (CE_NOTE,
3071                     "nfs4_try_failover: %strying failover on error %d",
3072                     err ? "" : "NOT ", rpc_status));
3073 
3074         return (err);
3075 }
3076 
3077 void
3078 nfs4_error_zinit(nfs4_error_t *ep)
3079 {
3080         ep->error = 0;
3081         ep->stat = NFS4_OK;
3082         ep->rpc_status = RPC_SUCCESS;
3083 }
3084 
3085 void
3086 nfs4_error_init(nfs4_error_t *ep, int error)
3087 {
3088         ep->error = error;
3089         ep->stat = NFS4_OK;
3090         ep->rpc_status = RPC_SUCCESS;
3091 }
3092 
3093 
3094 #ifdef DEBUG
3095 
3096 /*
3097  * Return a 16-bit hash for filehandle, stateid, clientid, owner.
3098  * use the same algorithm as for NFS v3.
3099  *
3100  */
3101 int
3102 hash16(void *p, int len)
3103 {
3104         int i, rem;
3105         uint_t *wp;
3106         uint_t key = 0;
3107 
3108         /* protect against non word aligned */
3109         if ((rem = len & 3) != 0)
3110                 len &= ~3;
3111 
3112         for (i = 0, wp = (uint_t *)p; i < len; i += 4, wp++) {
3113                 key ^= (*wp >> 16) ^ *wp;
3114         }
3115 
3116         /* hash left-over bytes */
3117         for (i = 0; i < rem; i++)
3118                 key ^= *((uchar_t *)p + i);
3119 
3120         return (key & 0xffff);
3121 }
3122 
3123 /*
3124  * rnode4info - return filehandle and path information for an rnode.
3125  * XXX MT issues: uses a single static buffer, no locking of path.
3126  */
3127 char *
3128 rnode4info(rnode4_t *rp)
3129 {
3130         static char buf[80];
3131         nfs4_fhandle_t fhandle;
3132         char *path;
3133         char *type;
3134 
3135         if (rp == NULL)
3136                 return ("null");
3137         if (rp->r_flags & R4ISXATTR)
3138                 type = "attr";
3139         else if (RTOV4(rp)->v_flag & V_XATTRDIR)
3140                 type = "attrdir";
3141         else if (RTOV4(rp)->v_flag & VROOT)
3142                 type = "root";
3143         else if (RTOV4(rp)->v_type == VDIR)
3144                 type = "dir";
3145         else if (RTOV4(rp)->v_type == VREG)
3146                 type = "file";
3147         else
3148                 type = "other";
3149         sfh4_copyval(rp->r_fh, &fhandle);
3150         path = fn_path(rp->r_svnode.sv_name);
3151         (void) snprintf(buf, 80, "$%p[%s], type=%s, flags=%04X, FH=%04X\n",
3152             (void *)rp, path, type, rp->r_flags,
3153             hash16((void *)&fhandle.fh_buf, fhandle.fh_len));
3154         kmem_free(path, strlen(path)+1);
3155         return (buf);
3156 }
3157 #endif