1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 1990, 2010, Oracle and/or its affiliates. All rights reserved.
  23  * Copyright (c) 2011 Bayard G. Bell. All rights reserved.
  24  */
  25 
  26 /*
  27  *      Copyright (c) 1983,1984,1985,1986,1987,1988,1989  AT&T.
  28  *      All rights reserved.
  29  *      Use is subject to license terms.
  30  */
  31 
  32 #include <sys/param.h>
  33 #include <sys/types.h>
  34 #include <sys/systm.h>
  35 #include <sys/cred.h>
  36 #include <sys/proc.h>
  37 #include <sys/user.h>
  38 #include <sys/buf.h>
  39 #include <sys/vfs.h>
  40 #include <sys/vnode.h>
  41 #include <sys/pathname.h>
  42 #include <sys/uio.h>
  43 #include <sys/file.h>
  44 #include <sys/stat.h>
  45 #include <sys/errno.h>
  46 #include <sys/socket.h>
  47 #include <sys/sysmacros.h>
  48 #include <sys/siginfo.h>
  49 #include <sys/tiuser.h>
  50 #include <sys/statvfs.h>
  51 #include <sys/stream.h>
  52 #include <sys/strsubr.h>
  53 #include <sys/stropts.h>
  54 #include <sys/timod.h>
  55 #include <sys/t_kuser.h>
  56 #include <sys/kmem.h>
  57 #include <sys/kstat.h>
  58 #include <sys/dirent.h>
  59 #include <sys/cmn_err.h>
  60 #include <sys/debug.h>
  61 #include <sys/unistd.h>
  62 #include <sys/vtrace.h>
  63 #include <sys/mode.h>
  64 #include <sys/acl.h>
  65 #include <sys/sdt.h>
  66 
  67 #include <rpc/types.h>
  68 #include <rpc/auth.h>
  69 #include <rpc/auth_unix.h>
  70 #include <rpc/auth_des.h>
  71 #include <rpc/svc.h>
  72 #include <rpc/xdr.h>
  73 #include <rpc/rpc_rdma.h>
  74 
  75 #include <nfs/nfs.h>
  76 #include <nfs/export.h>
  77 #include <nfs/nfssys.h>
  78 #include <nfs/nfs_clnt.h>
  79 #include <nfs/nfs_acl.h>
  80 #include <nfs/nfs_log.h>
  81 #include <nfs/nfs_cmd.h>
  82 #include <nfs/lm.h>
  83 #include <nfs/nfs_dispatch.h>
  84 #include <nfs/nfs4_drc.h>
  85 
  86 #include <sys/modctl.h>
  87 #include <sys/cladm.h>
  88 #include <sys/clconf.h>
  89 
  90 #include <sys/tsol/label.h>
  91 
  92 #define MAXHOST 32
  93 const char *kinet_ntop6(uchar_t *, char *, size_t);
  94 
  95 /*
  96  * Module linkage information.
  97  */
  98 
  99 static struct modlmisc modlmisc = {
 100         &mod_miscops, "NFS server module"
 101 };
 102 
 103 static struct modlinkage modlinkage = {
 104         MODREV_1, (void *)&modlmisc, NULL
 105 };
 106 
 107 kmem_cache_t *nfs_xuio_cache;
 108 int nfs_loaned_buffers = 0;
 109 
 110 int
 111 _init(void)
 112 {
 113         int status;
 114 
 115         if ((status = nfs_srvinit()) != 0) {
 116                 cmn_err(CE_WARN, "_init: nfs_srvinit failed");
 117                 return (status);
 118         }
 119 
 120         status = mod_install((struct modlinkage *)&modlinkage);
 121         if (status != 0) {
 122                 /*
 123                  * Could not load module, cleanup previous
 124                  * initialization work.
 125                  */
 126                 nfs_srvfini();
 127         }
 128 
 129         /*
 130          * Initialise some placeholders for nfssys() calls. These have
 131          * to be declared by the nfs module, since that handles nfssys()
 132          * calls - also used by NFS clients - but are provided by this
 133          * nfssrv module. These also then serve as confirmation to the
 134          * relevant code in nfs that nfssrv has been loaded, as they're
 135          * initially NULL.
 136          */
 137         nfs_srv_quiesce_func = nfs_srv_quiesce_all;
 138         nfs_srv_dss_func = rfs4_dss_setpaths;
 139 
 140         /* setup DSS paths here; must be done before initial server startup */
 141         rfs4_dss_paths = rfs4_dss_oldpaths = NULL;
 142 
 143         /* initialize the copy reduction caches */
 144 
 145         nfs_xuio_cache = kmem_cache_create("nfs_xuio_cache",
 146             sizeof (nfs_xuio_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
 147 
 148         return (status);
 149 }
 150 
 151 int
 152 _fini()
 153 {
 154         return (EBUSY);
 155 }
 156 
 157 int
 158 _info(struct modinfo *modinfop)
 159 {
 160         return (mod_info(&modlinkage, modinfop));
 161 }
 162 
 163 /*
 164  * PUBLICFH_CHECK() checks if the dispatch routine supports
 165  * RPC_PUBLICFH_OK, if the filesystem is exported public, and if the
 166  * incoming request is using the public filehandle. The check duplicates
 167  * the exportmatch() call done in checkexport(), and we should consider
 168  * modifying those routines to avoid the duplication. For now, we optimize
 169  * by calling exportmatch() only after checking that the dispatch routine
 170  * supports RPC_PUBLICFH_OK, and if the filesystem is explicitly exported
 171  * public (i.e., not the placeholder).
 172  */
 173 #define PUBLICFH_CHECK(disp, exi, fsid, xfid) \
 174                 ((disp->dis_flags & RPC_PUBLICFH_OK) && \
 175                 ((exi->exi_export.ex_flags & EX_PUBLIC) || \
 176                 (exi == exi_public && exportmatch(exi_root, \
 177                 fsid, xfid))))
 178 
 179 static void     nfs_srv_shutdown_all(int);
 180 static void     rfs4_server_start(int);
 181 static void     nullfree(void);
 182 static void     rfs_dispatch(struct svc_req *, SVCXPRT *);
 183 static void     acl_dispatch(struct svc_req *, SVCXPRT *);
 184 static void     common_dispatch(struct svc_req *, SVCXPRT *,
 185                 rpcvers_t, rpcvers_t, char *,
 186                 struct rpc_disptable *);
 187 static void     hanfsv4_failover(void);
 188 static  int     checkauth(struct exportinfo *, struct svc_req *, cred_t *, int,
 189                         bool_t);
 190 static char     *client_name(struct svc_req *req);
 191 static char     *client_addr(struct svc_req *req, char *buf);
 192 extern  int     sec_svc_getcred(struct svc_req *, cred_t *cr, char **, int *);
 193 extern  bool_t  sec_svc_inrootlist(int, caddr_t, int, caddr_t *);
 194 
 195 #define NFSLOG_COPY_NETBUF(exi, xprt, nb)       {               \
 196         (nb)->maxlen = (xprt)->xp_rtaddr.maxlen;          \
 197         (nb)->len = (xprt)->xp_rtaddr.len;                        \
 198         (nb)->buf = kmem_alloc((nb)->len, KM_SLEEP);              \
 199         bcopy((xprt)->xp_rtaddr.buf, (nb)->buf, (nb)->len);    \
 200         }
 201 
 202 /*
 203  * Public Filehandle common nfs routines
 204  */
 205 static int      MCLpath(char **);
 206 static void     URLparse(char *);
 207 
 208 /*
 209  * NFS callout table.
 210  * This table is used by svc_getreq() to dispatch a request with
 211  * a given prog/vers pair to an appropriate service provider
 212  * dispatch routine.
 213  *
 214  * NOTE: ordering is relied upon below when resetting the version min/max
 215  * for NFS_PROGRAM.  Careful, if this is ever changed.
 216  */
 217 static SVC_CALLOUT __nfs_sc_clts[] = {
 218         { NFS_PROGRAM,     NFS_VERSMIN,     NFS_VERSMAX,        rfs_dispatch },
 219         { NFS_ACL_PROGRAM, NFS_ACL_VERSMIN, NFS_ACL_VERSMAX,    acl_dispatch }
 220 };
 221 
 222 static SVC_CALLOUT_TABLE nfs_sct_clts = {
 223         sizeof (__nfs_sc_clts) / sizeof (__nfs_sc_clts[0]), FALSE,
 224         __nfs_sc_clts
 225 };
 226 
 227 static SVC_CALLOUT __nfs_sc_cots[] = {
 228         { NFS_PROGRAM,     NFS_VERSMIN,     NFS_VERSMAX,        rfs_dispatch },
 229         { NFS_ACL_PROGRAM, NFS_ACL_VERSMIN, NFS_ACL_VERSMAX,    acl_dispatch }
 230 };
 231 
 232 static SVC_CALLOUT_TABLE nfs_sct_cots = {
 233         sizeof (__nfs_sc_cots) / sizeof (__nfs_sc_cots[0]), FALSE, __nfs_sc_cots
 234 };
 235 
 236 static SVC_CALLOUT __nfs_sc_rdma[] = {
 237         { NFS_PROGRAM,     NFS_VERSMIN,     NFS_VERSMAX,        rfs_dispatch },
 238         { NFS_ACL_PROGRAM, NFS_ACL_VERSMIN, NFS_ACL_VERSMAX,    acl_dispatch }
 239 };
 240 
 241 static SVC_CALLOUT_TABLE nfs_sct_rdma = {
 242         sizeof (__nfs_sc_rdma) / sizeof (__nfs_sc_rdma[0]), FALSE, __nfs_sc_rdma
 243 };
 244 rpcvers_t nfs_versmin = NFS_VERSMIN_DEFAULT;
 245 rpcvers_t nfs_versmax = NFS_VERSMAX_DEFAULT;
 246 
 247 /*
 248  * Used to track the state of the server so that initialization
 249  * can be done properly.
 250  */
 251 typedef enum {
 252         NFS_SERVER_STOPPED,     /* server state destroyed */
 253         NFS_SERVER_STOPPING,    /* server state being destroyed */
 254         NFS_SERVER_RUNNING,
 255         NFS_SERVER_QUIESCED,    /* server state preserved */
 256         NFS_SERVER_OFFLINE      /* server pool offline */
 257 } nfs_server_running_t;
 258 
 259 static nfs_server_running_t nfs_server_upordown;
 260 static kmutex_t nfs_server_upordown_lock;
 261 static  kcondvar_t nfs_server_upordown_cv;
 262 
 263 /*
 264  * DSS: distributed stable storage
 265  * lists of all DSS paths: current, and before last warmstart
 266  */
 267 nvlist_t *rfs4_dss_paths, *rfs4_dss_oldpaths;
 268 
 269 int rfs4_dispatch(struct rpcdisp *, struct svc_req *, SVCXPRT *, char *);
 270 bool_t rfs4_minorvers_mismatch(struct svc_req *, SVCXPRT *, void *);
 271 
 272 /*
 273  * RDMA wait variables.
 274  */
 275 static kcondvar_t rdma_wait_cv;
 276 static kmutex_t rdma_wait_mutex;
 277 
 278 /*
 279  * Will be called at the point the server pool is being unregistered
 280  * from the pool list. From that point onwards, the pool is waiting
 281  * to be drained and as such the server state is stale and pertains
 282  * to the old instantiation of the NFS server pool.
 283  */
 284 void
 285 nfs_srv_offline(void)
 286 {
 287         mutex_enter(&nfs_server_upordown_lock);
 288         if (nfs_server_upordown == NFS_SERVER_RUNNING) {
 289                 nfs_server_upordown = NFS_SERVER_OFFLINE;
 290         }
 291         mutex_exit(&nfs_server_upordown_lock);
 292 }
 293 
 294 /*
 295  * Will be called at the point the server pool is being destroyed so
 296  * all transports have been closed and no service threads are in
 297  * existence.
 298  *
 299  * If we quiesce the server, we're shutting it down without destroying the
 300  * server state. This allows it to warm start subsequently.
 301  */
 302 void
 303 nfs_srv_stop_all(void)
 304 {
 305         int quiesce = 0;
 306         nfs_srv_shutdown_all(quiesce);
 307 }
 308 
 309 /*
 310  * This alternative shutdown routine can be requested via nfssys()
 311  */
 312 void
 313 nfs_srv_quiesce_all(void)
 314 {
 315         int quiesce = 1;
 316         nfs_srv_shutdown_all(quiesce);
 317 }
 318 
 319 static void
 320 nfs_srv_shutdown_all(int quiesce) {
 321         mutex_enter(&nfs_server_upordown_lock);
 322         if (quiesce) {
 323                 if (nfs_server_upordown == NFS_SERVER_RUNNING ||
 324                         nfs_server_upordown == NFS_SERVER_OFFLINE) {
 325                         nfs_server_upordown = NFS_SERVER_QUIESCED;
 326                         cv_signal(&nfs_server_upordown_cv);
 327 
 328                         /* reset DSS state, for subsequent warm restart */
 329                         rfs4_dss_numnewpaths = 0;
 330                         rfs4_dss_newpaths = NULL;
 331 
 332                         cmn_err(CE_NOTE, "nfs_server: server is now quiesced; "
 333                             "NFSv4 state has been preserved");
 334                 }
 335         } else {
 336                 if (nfs_server_upordown == NFS_SERVER_OFFLINE) {
 337                         nfs_server_upordown = NFS_SERVER_STOPPING;
 338                         mutex_exit(&nfs_server_upordown_lock);
 339                         rfs4_state_fini();
 340                         rfs4_fini_drc(nfs4_drc);
 341                         mutex_enter(&nfs_server_upordown_lock);
 342                         nfs_server_upordown = NFS_SERVER_STOPPED;
 343                         cv_signal(&nfs_server_upordown_cv);
 344                 }
 345         }
 346         mutex_exit(&nfs_server_upordown_lock);
 347 }
 348 
 349 static int
 350 nfs_srv_set_sc_versions(struct file *fp, SVC_CALLOUT_TABLE **sctpp,
 351                         rpcvers_t versmin, rpcvers_t versmax)
 352 {
 353         struct strioctl strioc;
 354         struct T_info_ack tinfo;
 355         int             error, retval;
 356 
 357         /*
 358          * Find out what type of transport this is.
 359          */
 360         strioc.ic_cmd = TI_GETINFO;
 361         strioc.ic_timout = -1;
 362         strioc.ic_len = sizeof (tinfo);
 363         strioc.ic_dp = (char *)&tinfo;
 364         tinfo.PRIM_type = T_INFO_REQ;
 365 
 366         error = strioctl(fp->f_vnode, I_STR, (intptr_t)&strioc, 0, K_TO_K,
 367             CRED(), &retval);
 368         if (error || retval)
 369                 return (error);
 370 
 371         /*
 372          * Based on our query of the transport type...
 373          *
 374          * Reset the min/max versions based on the caller's request
 375          * NOTE: This assumes that NFS_PROGRAM is first in the array!!
 376          * And the second entry is the NFS_ACL_PROGRAM.
 377          */
 378         switch (tinfo.SERV_type) {
 379         case T_CLTS:
 380                 if (versmax == NFS_V4)
 381                         return (EINVAL);
 382                 __nfs_sc_clts[0].sc_versmin = versmin;
 383                 __nfs_sc_clts[0].sc_versmax = versmax;
 384                 __nfs_sc_clts[1].sc_versmin = versmin;
 385                 __nfs_sc_clts[1].sc_versmax = versmax;
 386                 *sctpp = &nfs_sct_clts;
 387                 break;
 388         case T_COTS:
 389         case T_COTS_ORD:
 390                 __nfs_sc_cots[0].sc_versmin = versmin;
 391                 __nfs_sc_cots[0].sc_versmax = versmax;
 392                 /* For the NFS_ACL program, check the max version */
 393                 if (versmax > NFS_ACL_VERSMAX)
 394                         versmax = NFS_ACL_VERSMAX;
 395                 __nfs_sc_cots[1].sc_versmin = versmin;
 396                 __nfs_sc_cots[1].sc_versmax = versmax;
 397                 *sctpp = &nfs_sct_cots;
 398                 break;
 399         default:
 400                 error = EINVAL;
 401         }
 402 
 403         return (error);
 404 }
 405 
 406 /*
 407  * NFS Server system call.
 408  * Does all of the work of running a NFS server.
 409  * uap->fd is the fd of an open transport provider
 410  */
 411 int
 412 nfs_svc(struct nfs_svc_args *arg, model_t model)
 413 {
 414         file_t *fp;
 415         SVCMASTERXPRT *xprt;
 416         int error;
 417         int readsize;
 418         char buf[KNC_STRSIZE];
 419         size_t len;
 420         STRUCT_HANDLE(nfs_svc_args, uap);
 421         struct netbuf addrmask;
 422         SVC_CALLOUT_TABLE *sctp = NULL;
 423 
 424 #ifdef lint
 425         model = model;          /* STRUCT macros don't always refer to it */
 426 #endif
 427 
 428         STRUCT_SET_HANDLE(uap, model, arg);
 429 
 430         /* Check privileges in nfssys() */
 431 
 432         if ((fp = getf(STRUCT_FGET(uap, fd))) == NULL)
 433                 return (EBADF);
 434 
 435         /*
 436          * Set read buffer size to rsize
 437          * and add room for RPC headers.
 438          */
 439         readsize = nfs3tsize() + (RPC_MAXDATASIZE - NFS_MAXDATA);
 440         if (readsize < RPC_MAXDATASIZE)
 441                 readsize = RPC_MAXDATASIZE;
 442 
 443         error = copyinstr((const char *)STRUCT_FGETP(uap, netid), buf,
 444             KNC_STRSIZE, &len);
 445         if (error) {
 446                 releasef(STRUCT_FGET(uap, fd));
 447                 return (error);
 448         }
 449 
 450         addrmask.len = STRUCT_FGET(uap, addrmask.len);
 451         addrmask.maxlen = STRUCT_FGET(uap, addrmask.maxlen);
 452         addrmask.buf = kmem_alloc(addrmask.maxlen, KM_SLEEP);
 453         error = copyin(STRUCT_FGETP(uap, addrmask.buf), addrmask.buf,
 454             addrmask.len);
 455         if (error) {
 456                 releasef(STRUCT_FGET(uap, fd));
 457                 kmem_free(addrmask.buf, addrmask.maxlen);
 458                 return (error);
 459         }
 460 
 461         nfs_versmin = STRUCT_FGET(uap, versmin);
 462         nfs_versmax = STRUCT_FGET(uap, versmax);
 463 
 464         /* Double check the vers min/max ranges */
 465         if ((nfs_versmin > nfs_versmax) ||
 466             (nfs_versmin < NFS_VERSMIN) ||
 467             (nfs_versmax > NFS_VERSMAX)) {
 468                 nfs_versmin = NFS_VERSMIN_DEFAULT;
 469                 nfs_versmax = NFS_VERSMAX_DEFAULT;
 470         }
 471 
 472         if (error =
 473             nfs_srv_set_sc_versions(fp, &sctp, nfs_versmin, nfs_versmax)) {
 474                 releasef(STRUCT_FGET(uap, fd));
 475                 kmem_free(addrmask.buf, addrmask.maxlen);
 476                 return (error);
 477         }
 478 
 479         /* Initialize nfsv4 server */
 480         if (nfs_versmax == (rpcvers_t)NFS_V4)
 481                 rfs4_server_start(STRUCT_FGET(uap, delegation));
 482 
 483         /* Create a transport handle. */
 484         error = svc_tli_kcreate(fp, readsize, buf, &addrmask, &xprt,
 485             sctp, NULL, NFS_SVCPOOL_ID, TRUE);
 486 
 487         if (error)
 488                 kmem_free(addrmask.buf, addrmask.maxlen);
 489 
 490         releasef(STRUCT_FGET(uap, fd));
 491 
 492         /* HA-NFSv4: save the cluster nodeid */
 493         if (cluster_bootflags & CLUSTER_BOOTED)
 494                 lm_global_nlmid = clconf_get_nodeid();
 495 
 496         return (error);
 497 }
 498 
 499 static void
 500 rfs4_server_start(int nfs4_srv_delegation)
 501 {
 502         /*
 503          * Determine if the server has previously been "started" and
 504          * if not, do the per instance initialization
 505          */
 506         mutex_enter(&nfs_server_upordown_lock);
 507 
 508         if (nfs_server_upordown != NFS_SERVER_RUNNING) {
 509                 /* Do we need to stop and wait on the previous server? */
 510                 while (nfs_server_upordown == NFS_SERVER_STOPPING ||
 511                     nfs_server_upordown == NFS_SERVER_OFFLINE)
 512                         cv_wait(&nfs_server_upordown_cv,
 513                             &nfs_server_upordown_lock);
 514 
 515                 if (nfs_server_upordown != NFS_SERVER_RUNNING) {
 516                         (void) svc_pool_control(NFS_SVCPOOL_ID,
 517                             SVCPSET_UNREGISTER_PROC, (void *)&nfs_srv_offline);
 518                         (void) svc_pool_control(NFS_SVCPOOL_ID,
 519                             SVCPSET_SHUTDOWN_PROC, (void *)&nfs_srv_stop_all);
 520 
 521                         /* is this an nfsd warm start? */
 522                         if (nfs_server_upordown == NFS_SERVER_QUIESCED) {
 523                                 cmn_err(CE_NOTE, "nfs_server: "
 524                                     "server was previously quiesced; "
 525                                     "existing NFSv4 state will be re-used");
 526 
 527                                 /*
 528                                  * HA-NFSv4: this is also the signal
 529                                  * that a Resource Group failover has
 530                                  * occurred.
 531                                  */
 532                                 if (cluster_bootflags & CLUSTER_BOOTED)
 533                                         hanfsv4_failover();
 534                         } else {
 535                                 /* cold start */
 536                                 rfs4_state_init();
 537                                 nfs4_drc = rfs4_init_drc(nfs4_drc_max,
 538                                     nfs4_drc_hash);
 539                         }
 540 
 541                         /*
 542                          * Check to see if delegation is to be
 543                          * enabled at the server
 544                          */
 545                         if (nfs4_srv_delegation != FALSE)
 546                                 rfs4_set_deleg_policy(SRV_NORMAL_DELEGATE);
 547 
 548                         nfs_server_upordown = NFS_SERVER_RUNNING;
 549                 }
 550                 cv_signal(&nfs_server_upordown_cv);
 551         }
 552         mutex_exit(&nfs_server_upordown_lock);
 553 }
 554 
 555 /*
 556  * If RDMA device available,
 557  * start RDMA listener.
 558  */
 559 int
 560 rdma_start(struct rdma_svc_args *rsa)
 561 {
 562         int error;
 563         rdma_xprt_group_t started_rdma_xprts;
 564         rdma_stat stat;
 565         int svc_state = 0;
 566 
 567         /* Double check the vers min/max ranges */
 568         if ((rsa->nfs_versmin > rsa->nfs_versmax) ||
 569             (rsa->nfs_versmin < NFS_VERSMIN) ||
 570             (rsa->nfs_versmax > NFS_VERSMAX)) {
 571                 rsa->nfs_versmin = NFS_VERSMIN_DEFAULT;
 572                 rsa->nfs_versmax = NFS_VERSMAX_DEFAULT;
 573         }
 574         nfs_versmin = rsa->nfs_versmin;
 575         nfs_versmax = rsa->nfs_versmax;
 576 
 577         /* Set the versions in the callout table */
 578         __nfs_sc_rdma[0].sc_versmin = rsa->nfs_versmin;
 579         __nfs_sc_rdma[0].sc_versmax = rsa->nfs_versmax;
 580         /* For the NFS_ACL program, check the max version */
 581         __nfs_sc_rdma[1].sc_versmin = rsa->nfs_versmin;
 582         if (rsa->nfs_versmax > NFS_ACL_VERSMAX)
 583                 __nfs_sc_rdma[1].sc_versmax = NFS_ACL_VERSMAX;
 584         else
 585                 __nfs_sc_rdma[1].sc_versmax = rsa->nfs_versmax;
 586 
 587         /* Initialize nfsv4 server */
 588         if (rsa->nfs_versmax == (rpcvers_t)NFS_V4)
 589                 rfs4_server_start(rsa->delegation);
 590 
 591         started_rdma_xprts.rtg_count = 0;
 592         started_rdma_xprts.rtg_listhead = NULL;
 593         started_rdma_xprts.rtg_poolid = rsa->poolid;
 594 
 595 restart:
 596         error = svc_rdma_kcreate(rsa->netid, &nfs_sct_rdma, rsa->poolid,
 597             &started_rdma_xprts);
 598 
 599         svc_state = !error;
 600 
 601         while (!error) {
 602 
 603                 /*
 604                  * wait till either interrupted by a signal on
 605                  * nfs service stop/restart or signalled by a
 606                  * rdma plugin attach/detatch.
 607                  */
 608 
 609                 stat = rdma_kwait();
 610 
 611                 /*
 612                  * stop services if running -- either on a HCA detach event
 613                  * or if the nfs service is stopped/restarted.
 614                  */
 615 
 616                 if ((stat == RDMA_HCA_DETACH || stat == RDMA_INTR) &&
 617                     svc_state) {
 618                         rdma_stop(&started_rdma_xprts);
 619                         svc_state = 0;
 620                 }
 621 
 622                 /*
 623                  * nfs service stop/restart, break out of the
 624                  * wait loop and return;
 625                  */
 626                 if (stat == RDMA_INTR)
 627                         return (0);
 628 
 629                 /*
 630                  * restart stopped services on a HCA attach event
 631                  * (if not already running)
 632                  */
 633 
 634                 if ((stat == RDMA_HCA_ATTACH) && (svc_state == 0))
 635                         goto restart;
 636 
 637                 /*
 638                  * loop until a nfs service stop/restart
 639                  */
 640         }
 641 
 642         return (error);
 643 }
 644 
 645 /* ARGSUSED */
 646 void
 647 rpc_null(caddr_t *argp, caddr_t *resp)
 648 {
 649 }
 650 
 651 /* ARGSUSED */
 652 void
 653 rpc_null_v3(caddr_t *argp, caddr_t *resp, struct exportinfo *exi,
 654     struct svc_req *req, cred_t *cr)
 655 {
 656         DTRACE_NFSV3_3(op__null__start, struct svc_req *, req,
 657             cred_t *, cr, vnode_t *, NULL);
 658         DTRACE_NFSV3_3(op__null__done, struct svc_req *, req,
 659             cred_t *, cr, vnode_t *, NULL);
 660 }
 661 
 662 /* ARGSUSED */
 663 static void
 664 rfs_error(caddr_t *argp, caddr_t *resp)
 665 {
 666         /* return (EOPNOTSUPP); */
 667 }
 668 
 669 static void
 670 nullfree(void)
 671 {
 672 }
 673 
 674 static char *rfscallnames_v2[] = {
 675         "RFS2_NULL",
 676         "RFS2_GETATTR",
 677         "RFS2_SETATTR",
 678         "RFS2_ROOT",
 679         "RFS2_LOOKUP",
 680         "RFS2_READLINK",
 681         "RFS2_READ",
 682         "RFS2_WRITECACHE",
 683         "RFS2_WRITE",
 684         "RFS2_CREATE",
 685         "RFS2_REMOVE",
 686         "RFS2_RENAME",
 687         "RFS2_LINK",
 688         "RFS2_SYMLINK",
 689         "RFS2_MKDIR",
 690         "RFS2_RMDIR",
 691         "RFS2_READDIR",
 692         "RFS2_STATFS"
 693 };
 694 
 695 static struct rpcdisp rfsdisptab_v2[] = {
 696         /*
 697          * NFS VERSION 2
 698          */
 699 
 700         /* RFS_NULL = 0 */
 701         {rpc_null,
 702             xdr_void, NULL_xdrproc_t, 0,
 703             xdr_void, NULL_xdrproc_t, 0,
 704             nullfree, RPC_IDEMPOTENT,
 705             0},
 706 
 707         /* RFS_GETATTR = 1 */
 708         {rfs_getattr,
 709             xdr_fhandle, xdr_fastfhandle, sizeof (fhandle_t),
 710             xdr_attrstat, xdr_fastattrstat, sizeof (struct nfsattrstat),
 711             nullfree, RPC_IDEMPOTENT|RPC_ALLOWANON|RPC_MAPRESP,
 712             rfs_getattr_getfh},
 713 
 714         /* RFS_SETATTR = 2 */
 715         {rfs_setattr,
 716             xdr_saargs, NULL_xdrproc_t, sizeof (struct nfssaargs),
 717             xdr_attrstat, xdr_fastattrstat, sizeof (struct nfsattrstat),
 718             nullfree, RPC_MAPRESP,
 719             rfs_setattr_getfh},
 720 
 721         /* RFS_ROOT = 3 *** NO LONGER SUPPORTED *** */
 722         {rfs_error,
 723             xdr_void, NULL_xdrproc_t, 0,
 724             xdr_void, NULL_xdrproc_t, 0,
 725             nullfree, RPC_IDEMPOTENT,
 726             0},
 727 
 728         /* RFS_LOOKUP = 4 */
 729         {rfs_lookup,
 730             xdr_diropargs, NULL_xdrproc_t, sizeof (struct nfsdiropargs),
 731             xdr_diropres, xdr_fastdiropres, sizeof (struct nfsdiropres),
 732             nullfree, RPC_IDEMPOTENT|RPC_MAPRESP|RPC_PUBLICFH_OK,
 733             rfs_lookup_getfh},
 734 
 735         /* RFS_READLINK = 5 */
 736         {rfs_readlink,
 737             xdr_fhandle, xdr_fastfhandle, sizeof (fhandle_t),
 738             xdr_rdlnres, NULL_xdrproc_t, sizeof (struct nfsrdlnres),
 739             rfs_rlfree, RPC_IDEMPOTENT,
 740             rfs_readlink_getfh},
 741 
 742         /* RFS_READ = 6 */
 743         {rfs_read,
 744             xdr_readargs, NULL_xdrproc_t, sizeof (struct nfsreadargs),
 745             xdr_rdresult, NULL_xdrproc_t, sizeof (struct nfsrdresult),
 746             rfs_rdfree, RPC_IDEMPOTENT,
 747             rfs_read_getfh},
 748 
 749         /* RFS_WRITECACHE = 7 *** NO LONGER SUPPORTED *** */
 750         {rfs_error,
 751             xdr_void, NULL_xdrproc_t, 0,
 752             xdr_void, NULL_xdrproc_t, 0,
 753             nullfree, RPC_IDEMPOTENT,
 754             0},
 755 
 756         /* RFS_WRITE = 8 */
 757         {rfs_write,
 758             xdr_writeargs, NULL_xdrproc_t, sizeof (struct nfswriteargs),
 759             xdr_attrstat, xdr_fastattrstat, sizeof (struct nfsattrstat),
 760             nullfree, RPC_MAPRESP,
 761             rfs_write_getfh},
 762 
 763         /* RFS_CREATE = 9 */
 764         {rfs_create,
 765             xdr_creatargs, NULL_xdrproc_t, sizeof (struct nfscreatargs),
 766             xdr_diropres, xdr_fastdiropres, sizeof (struct nfsdiropres),
 767             nullfree, RPC_MAPRESP,
 768             rfs_create_getfh},
 769 
 770         /* RFS_REMOVE = 10 */
 771         {rfs_remove,
 772             xdr_diropargs, NULL_xdrproc_t, sizeof (struct nfsdiropargs),
 773 #ifdef _LITTLE_ENDIAN
 774             xdr_enum, xdr_fastenum, sizeof (enum nfsstat),
 775 #else
 776             xdr_enum, NULL_xdrproc_t, sizeof (enum nfsstat),
 777 #endif
 778             nullfree, RPC_MAPRESP,
 779             rfs_remove_getfh},
 780 
 781         /* RFS_RENAME = 11 */
 782         {rfs_rename,
 783             xdr_rnmargs, NULL_xdrproc_t, sizeof (struct nfsrnmargs),
 784 #ifdef _LITTLE_ENDIAN
 785             xdr_enum, xdr_fastenum, sizeof (enum nfsstat),
 786 #else
 787             xdr_enum, NULL_xdrproc_t, sizeof (enum nfsstat),
 788 #endif
 789             nullfree, RPC_MAPRESP,
 790             rfs_rename_getfh},
 791 
 792         /* RFS_LINK = 12 */
 793         {rfs_link,
 794             xdr_linkargs, NULL_xdrproc_t, sizeof (struct nfslinkargs),
 795 #ifdef _LITTLE_ENDIAN
 796             xdr_enum, xdr_fastenum, sizeof (enum nfsstat),
 797 #else
 798             xdr_enum, NULL_xdrproc_t, sizeof (enum nfsstat),
 799 #endif
 800             nullfree, RPC_MAPRESP,
 801             rfs_link_getfh},
 802 
 803         /* RFS_SYMLINK = 13 */
 804         {rfs_symlink,
 805             xdr_slargs, NULL_xdrproc_t, sizeof (struct nfsslargs),
 806 #ifdef _LITTLE_ENDIAN
 807             xdr_enum, xdr_fastenum, sizeof (enum nfsstat),
 808 #else
 809             xdr_enum, NULL_xdrproc_t, sizeof (enum nfsstat),
 810 #endif
 811             nullfree, RPC_MAPRESP,
 812             rfs_symlink_getfh},
 813 
 814         /* RFS_MKDIR = 14 */
 815         {rfs_mkdir,
 816             xdr_creatargs, NULL_xdrproc_t, sizeof (struct nfscreatargs),
 817             xdr_diropres, xdr_fastdiropres, sizeof (struct nfsdiropres),
 818             nullfree, RPC_MAPRESP,
 819             rfs_mkdir_getfh},
 820 
 821         /* RFS_RMDIR = 15 */
 822         {rfs_rmdir,
 823             xdr_diropargs, NULL_xdrproc_t, sizeof (struct nfsdiropargs),
 824 #ifdef _LITTLE_ENDIAN
 825             xdr_enum, xdr_fastenum, sizeof (enum nfsstat),
 826 #else
 827             xdr_enum, NULL_xdrproc_t, sizeof (enum nfsstat),
 828 #endif
 829             nullfree, RPC_MAPRESP,
 830             rfs_rmdir_getfh},
 831 
 832         /* RFS_READDIR = 16 */
 833         {rfs_readdir,
 834             xdr_rddirargs, NULL_xdrproc_t, sizeof (struct nfsrddirargs),
 835             xdr_putrddirres, NULL_xdrproc_t, sizeof (struct nfsrddirres),
 836             rfs_rddirfree, RPC_IDEMPOTENT,
 837             rfs_readdir_getfh},
 838 
 839         /* RFS_STATFS = 17 */
 840         {rfs_statfs,
 841             xdr_fhandle, xdr_fastfhandle, sizeof (fhandle_t),
 842             xdr_statfs, xdr_faststatfs, sizeof (struct nfsstatfs),
 843             nullfree, RPC_IDEMPOTENT|RPC_ALLOWANON|RPC_MAPRESP,
 844             rfs_statfs_getfh},
 845 };
 846 
 847 static char *rfscallnames_v3[] = {
 848         "RFS3_NULL",
 849         "RFS3_GETATTR",
 850         "RFS3_SETATTR",
 851         "RFS3_LOOKUP",
 852         "RFS3_ACCESS",
 853         "RFS3_READLINK",
 854         "RFS3_READ",
 855         "RFS3_WRITE",
 856         "RFS3_CREATE",
 857         "RFS3_MKDIR",
 858         "RFS3_SYMLINK",
 859         "RFS3_MKNOD",
 860         "RFS3_REMOVE",
 861         "RFS3_RMDIR",
 862         "RFS3_RENAME",
 863         "RFS3_LINK",
 864         "RFS3_READDIR",
 865         "RFS3_READDIRPLUS",
 866         "RFS3_FSSTAT",
 867         "RFS3_FSINFO",
 868         "RFS3_PATHCONF",
 869         "RFS3_COMMIT"
 870 };
 871 
 872 static struct rpcdisp rfsdisptab_v3[] = {
 873         /*
 874          * NFS VERSION 3
 875          */
 876 
 877         /* RFS_NULL = 0 */
 878         {rpc_null_v3,
 879             xdr_void, NULL_xdrproc_t, 0,
 880             xdr_void, NULL_xdrproc_t, 0,
 881             nullfree, RPC_IDEMPOTENT,
 882             0},
 883 
 884         /* RFS3_GETATTR = 1 */
 885         {rfs3_getattr,
 886             xdr_nfs_fh3_server, NULL_xdrproc_t, sizeof (GETATTR3args),
 887             xdr_GETATTR3res, NULL_xdrproc_t, sizeof (GETATTR3res),
 888             nullfree, (RPC_IDEMPOTENT | RPC_ALLOWANON),
 889             rfs3_getattr_getfh},
 890 
 891         /* RFS3_SETATTR = 2 */
 892         {rfs3_setattr,
 893             xdr_SETATTR3args, NULL_xdrproc_t, sizeof (SETATTR3args),
 894             xdr_SETATTR3res, NULL_xdrproc_t, sizeof (SETATTR3res),
 895             nullfree, 0,
 896             rfs3_setattr_getfh},
 897 
 898         /* RFS3_LOOKUP = 3 */
 899         {rfs3_lookup,
 900             xdr_diropargs3, NULL_xdrproc_t, sizeof (LOOKUP3args),
 901             xdr_LOOKUP3res, NULL_xdrproc_t, sizeof (LOOKUP3res),
 902             nullfree, (RPC_IDEMPOTENT | RPC_PUBLICFH_OK),
 903             rfs3_lookup_getfh},
 904 
 905         /* RFS3_ACCESS = 4 */
 906         {rfs3_access,
 907             xdr_ACCESS3args, NULL_xdrproc_t, sizeof (ACCESS3args),
 908             xdr_ACCESS3res, NULL_xdrproc_t, sizeof (ACCESS3res),
 909             nullfree, RPC_IDEMPOTENT,
 910             rfs3_access_getfh},
 911 
 912         /* RFS3_READLINK = 5 */
 913         {rfs3_readlink,
 914             xdr_nfs_fh3_server, NULL_xdrproc_t, sizeof (READLINK3args),
 915             xdr_READLINK3res, NULL_xdrproc_t, sizeof (READLINK3res),
 916             rfs3_readlink_free, RPC_IDEMPOTENT,
 917             rfs3_readlink_getfh},
 918 
 919         /* RFS3_READ = 6 */
 920         {rfs3_read,
 921             xdr_READ3args, NULL_xdrproc_t, sizeof (READ3args),
 922             xdr_READ3res, NULL_xdrproc_t, sizeof (READ3res),
 923             rfs3_read_free, RPC_IDEMPOTENT,
 924             rfs3_read_getfh},
 925 
 926         /* RFS3_WRITE = 7 */
 927         {rfs3_write,
 928             xdr_WRITE3args, NULL_xdrproc_t, sizeof (WRITE3args),
 929             xdr_WRITE3res, NULL_xdrproc_t, sizeof (WRITE3res),
 930             nullfree, 0,
 931             rfs3_write_getfh},
 932 
 933         /* RFS3_CREATE = 8 */
 934         {rfs3_create,
 935             xdr_CREATE3args, NULL_xdrproc_t, sizeof (CREATE3args),
 936             xdr_CREATE3res, NULL_xdrproc_t, sizeof (CREATE3res),
 937             nullfree, 0,
 938             rfs3_create_getfh},
 939 
 940         /* RFS3_MKDIR = 9 */
 941         {rfs3_mkdir,
 942             xdr_MKDIR3args, NULL_xdrproc_t, sizeof (MKDIR3args),
 943             xdr_MKDIR3res, NULL_xdrproc_t, sizeof (MKDIR3res),
 944             nullfree, 0,
 945             rfs3_mkdir_getfh},
 946 
 947         /* RFS3_SYMLINK = 10 */
 948         {rfs3_symlink,
 949             xdr_SYMLINK3args, NULL_xdrproc_t, sizeof (SYMLINK3args),
 950             xdr_SYMLINK3res, NULL_xdrproc_t, sizeof (SYMLINK3res),
 951             nullfree, 0,
 952             rfs3_symlink_getfh},
 953 
 954         /* RFS3_MKNOD = 11 */
 955         {rfs3_mknod,
 956             xdr_MKNOD3args, NULL_xdrproc_t, sizeof (MKNOD3args),
 957             xdr_MKNOD3res, NULL_xdrproc_t, sizeof (MKNOD3res),
 958             nullfree, 0,
 959             rfs3_mknod_getfh},
 960 
 961         /* RFS3_REMOVE = 12 */
 962         {rfs3_remove,
 963             xdr_diropargs3, NULL_xdrproc_t, sizeof (REMOVE3args),
 964             xdr_REMOVE3res, NULL_xdrproc_t, sizeof (REMOVE3res),
 965             nullfree, 0,
 966             rfs3_remove_getfh},
 967 
 968         /* RFS3_RMDIR = 13 */
 969         {rfs3_rmdir,
 970             xdr_diropargs3, NULL_xdrproc_t, sizeof (RMDIR3args),
 971             xdr_RMDIR3res, NULL_xdrproc_t, sizeof (RMDIR3res),
 972             nullfree, 0,
 973             rfs3_rmdir_getfh},
 974 
 975         /* RFS3_RENAME = 14 */
 976         {rfs3_rename,
 977             xdr_RENAME3args, NULL_xdrproc_t, sizeof (RENAME3args),
 978             xdr_RENAME3res, NULL_xdrproc_t, sizeof (RENAME3res),
 979             nullfree, 0,
 980             rfs3_rename_getfh},
 981 
 982         /* RFS3_LINK = 15 */
 983         {rfs3_link,
 984             xdr_LINK3args, NULL_xdrproc_t, sizeof (LINK3args),
 985             xdr_LINK3res, NULL_xdrproc_t, sizeof (LINK3res),
 986             nullfree, 0,
 987             rfs3_link_getfh},
 988 
 989         /* RFS3_READDIR = 16 */
 990         {rfs3_readdir,
 991             xdr_READDIR3args, NULL_xdrproc_t, sizeof (READDIR3args),
 992             xdr_READDIR3res, NULL_xdrproc_t, sizeof (READDIR3res),
 993             rfs3_readdir_free, RPC_IDEMPOTENT,
 994             rfs3_readdir_getfh},
 995 
 996         /* RFS3_READDIRPLUS = 17 */
 997         {rfs3_readdirplus,
 998             xdr_READDIRPLUS3args, NULL_xdrproc_t, sizeof (READDIRPLUS3args),
 999             xdr_READDIRPLUS3res, NULL_xdrproc_t, sizeof (READDIRPLUS3res),
1000             rfs3_readdirplus_free, RPC_AVOIDWORK,
1001             rfs3_readdirplus_getfh},
1002 
1003         /* RFS3_FSSTAT = 18 */
1004         {rfs3_fsstat,
1005             xdr_nfs_fh3_server, NULL_xdrproc_t, sizeof (FSSTAT3args),
1006             xdr_FSSTAT3res, NULL_xdrproc_t, sizeof (FSSTAT3res),
1007             nullfree, RPC_IDEMPOTENT,
1008             rfs3_fsstat_getfh},
1009 
1010         /* RFS3_FSINFO = 19 */
1011         {rfs3_fsinfo,
1012             xdr_nfs_fh3_server, NULL_xdrproc_t, sizeof (FSINFO3args),
1013             xdr_FSINFO3res, NULL_xdrproc_t, sizeof (FSINFO3res),
1014             nullfree, RPC_IDEMPOTENT|RPC_ALLOWANON,
1015             rfs3_fsinfo_getfh},
1016 
1017         /* RFS3_PATHCONF = 20 */
1018         {rfs3_pathconf,
1019             xdr_nfs_fh3_server, NULL_xdrproc_t, sizeof (PATHCONF3args),
1020             xdr_PATHCONF3res, NULL_xdrproc_t, sizeof (PATHCONF3res),
1021             nullfree, RPC_IDEMPOTENT,
1022             rfs3_pathconf_getfh},
1023 
1024         /* RFS3_COMMIT = 21 */
1025         {rfs3_commit,
1026             xdr_COMMIT3args, NULL_xdrproc_t, sizeof (COMMIT3args),
1027             xdr_COMMIT3res, NULL_xdrproc_t, sizeof (COMMIT3res),
1028             nullfree, RPC_IDEMPOTENT,
1029             rfs3_commit_getfh},
1030 };
1031 
1032 static char *rfscallnames_v4[] = {
1033         "RFS4_NULL",
1034         "RFS4_COMPOUND",
1035         "RFS4_NULL",
1036         "RFS4_NULL",
1037         "RFS4_NULL",
1038         "RFS4_NULL",
1039         "RFS4_NULL",
1040         "RFS4_NULL",
1041         "RFS4_CREATE"
1042 };
1043 
1044 static struct rpcdisp rfsdisptab_v4[] = {
1045         /*
1046          * NFS VERSION 4
1047          */
1048 
1049         /* RFS_NULL = 0 */
1050         {rpc_null,
1051             xdr_void, NULL_xdrproc_t, 0,
1052             xdr_void, NULL_xdrproc_t, 0,
1053             nullfree, RPC_IDEMPOTENT, 0},
1054 
1055         /* RFS4_compound = 1 */
1056         {rfs4_compound,
1057             xdr_COMPOUND4args_srv, NULL_xdrproc_t, sizeof (COMPOUND4args),
1058             xdr_COMPOUND4res_srv, NULL_xdrproc_t, sizeof (COMPOUND4res),
1059             rfs4_compound_free, 0, 0},
1060 };
1061 
1062 union rfs_args {
1063         /*
1064          * NFS VERSION 2
1065          */
1066 
1067         /* RFS_NULL = 0 */
1068 
1069         /* RFS_GETATTR = 1 */
1070         fhandle_t nfs2_getattr_args;
1071 
1072         /* RFS_SETATTR = 2 */
1073         struct nfssaargs nfs2_setattr_args;
1074 
1075         /* RFS_ROOT = 3 *** NO LONGER SUPPORTED *** */
1076 
1077         /* RFS_LOOKUP = 4 */
1078         struct nfsdiropargs nfs2_lookup_args;
1079 
1080         /* RFS_READLINK = 5 */
1081         fhandle_t nfs2_readlink_args;
1082 
1083         /* RFS_READ = 6 */
1084         struct nfsreadargs nfs2_read_args;
1085 
1086         /* RFS_WRITECACHE = 7 *** NO LONGER SUPPORTED *** */
1087 
1088         /* RFS_WRITE = 8 */
1089         struct nfswriteargs nfs2_write_args;
1090 
1091         /* RFS_CREATE = 9 */
1092         struct nfscreatargs nfs2_create_args;
1093 
1094         /* RFS_REMOVE = 10 */
1095         struct nfsdiropargs nfs2_remove_args;
1096 
1097         /* RFS_RENAME = 11 */
1098         struct nfsrnmargs nfs2_rename_args;
1099 
1100         /* RFS_LINK = 12 */
1101         struct nfslinkargs nfs2_link_args;
1102 
1103         /* RFS_SYMLINK = 13 */
1104         struct nfsslargs nfs2_symlink_args;
1105 
1106         /* RFS_MKDIR = 14 */
1107         struct nfscreatargs nfs2_mkdir_args;
1108 
1109         /* RFS_RMDIR = 15 */
1110         struct nfsdiropargs nfs2_rmdir_args;
1111 
1112         /* RFS_READDIR = 16 */
1113         struct nfsrddirargs nfs2_readdir_args;
1114 
1115         /* RFS_STATFS = 17 */
1116         fhandle_t nfs2_statfs_args;
1117 
1118         /*
1119          * NFS VERSION 3
1120          */
1121 
1122         /* RFS_NULL = 0 */
1123 
1124         /* RFS3_GETATTR = 1 */
1125         GETATTR3args nfs3_getattr_args;
1126 
1127         /* RFS3_SETATTR = 2 */
1128         SETATTR3args nfs3_setattr_args;
1129 
1130         /* RFS3_LOOKUP = 3 */
1131         LOOKUP3args nfs3_lookup_args;
1132 
1133         /* RFS3_ACCESS = 4 */
1134         ACCESS3args nfs3_access_args;
1135 
1136         /* RFS3_READLINK = 5 */
1137         READLINK3args nfs3_readlink_args;
1138 
1139         /* RFS3_READ = 6 */
1140         READ3args nfs3_read_args;
1141 
1142         /* RFS3_WRITE = 7 */
1143         WRITE3args nfs3_write_args;
1144 
1145         /* RFS3_CREATE = 8 */
1146         CREATE3args nfs3_create_args;
1147 
1148         /* RFS3_MKDIR = 9 */
1149         MKDIR3args nfs3_mkdir_args;
1150 
1151         /* RFS3_SYMLINK = 10 */
1152         SYMLINK3args nfs3_symlink_args;
1153 
1154         /* RFS3_MKNOD = 11 */
1155         MKNOD3args nfs3_mknod_args;
1156 
1157         /* RFS3_REMOVE = 12 */
1158         REMOVE3args nfs3_remove_args;
1159 
1160         /* RFS3_RMDIR = 13 */
1161         RMDIR3args nfs3_rmdir_args;
1162 
1163         /* RFS3_RENAME = 14 */
1164         RENAME3args nfs3_rename_args;
1165 
1166         /* RFS3_LINK = 15 */
1167         LINK3args nfs3_link_args;
1168 
1169         /* RFS3_READDIR = 16 */
1170         READDIR3args nfs3_readdir_args;
1171 
1172         /* RFS3_READDIRPLUS = 17 */
1173         READDIRPLUS3args nfs3_readdirplus_args;
1174 
1175         /* RFS3_FSSTAT = 18 */
1176         FSSTAT3args nfs3_fsstat_args;
1177 
1178         /* RFS3_FSINFO = 19 */
1179         FSINFO3args nfs3_fsinfo_args;
1180 
1181         /* RFS3_PATHCONF = 20 */
1182         PATHCONF3args nfs3_pathconf_args;
1183 
1184         /* RFS3_COMMIT = 21 */
1185         COMMIT3args nfs3_commit_args;
1186 
1187         /*
1188          * NFS VERSION 4
1189          */
1190 
1191         /* RFS_NULL = 0 */
1192 
1193         /* COMPUND = 1 */
1194         COMPOUND4args nfs4_compound_args;
1195 };
1196 
1197 union rfs_res {
1198         /*
1199          * NFS VERSION 2
1200          */
1201 
1202         /* RFS_NULL = 0 */
1203 
1204         /* RFS_GETATTR = 1 */
1205         struct nfsattrstat nfs2_getattr_res;
1206 
1207         /* RFS_SETATTR = 2 */
1208         struct nfsattrstat nfs2_setattr_res;
1209 
1210         /* RFS_ROOT = 3 *** NO LONGER SUPPORTED *** */
1211 
1212         /* RFS_LOOKUP = 4 */
1213         struct nfsdiropres nfs2_lookup_res;
1214 
1215         /* RFS_READLINK = 5 */
1216         struct nfsrdlnres nfs2_readlink_res;
1217 
1218         /* RFS_READ = 6 */
1219         struct nfsrdresult nfs2_read_res;
1220 
1221         /* RFS_WRITECACHE = 7 *** NO LONGER SUPPORTED *** */
1222 
1223         /* RFS_WRITE = 8 */
1224         struct nfsattrstat nfs2_write_res;
1225 
1226         /* RFS_CREATE = 9 */
1227         struct nfsdiropres nfs2_create_res;
1228 
1229         /* RFS_REMOVE = 10 */
1230         enum nfsstat nfs2_remove_res;
1231 
1232         /* RFS_RENAME = 11 */
1233         enum nfsstat nfs2_rename_res;
1234 
1235         /* RFS_LINK = 12 */
1236         enum nfsstat nfs2_link_res;
1237 
1238         /* RFS_SYMLINK = 13 */
1239         enum nfsstat nfs2_symlink_res;
1240 
1241         /* RFS_MKDIR = 14 */
1242         struct nfsdiropres nfs2_mkdir_res;
1243 
1244         /* RFS_RMDIR = 15 */
1245         enum nfsstat nfs2_rmdir_res;
1246 
1247         /* RFS_READDIR = 16 */
1248         struct nfsrddirres nfs2_readdir_res;
1249 
1250         /* RFS_STATFS = 17 */
1251         struct nfsstatfs nfs2_statfs_res;
1252 
1253         /*
1254          * NFS VERSION 3
1255          */
1256 
1257         /* RFS_NULL = 0 */
1258 
1259         /* RFS3_GETATTR = 1 */
1260         GETATTR3res nfs3_getattr_res;
1261 
1262         /* RFS3_SETATTR = 2 */
1263         SETATTR3res nfs3_setattr_res;
1264 
1265         /* RFS3_LOOKUP = 3 */
1266         LOOKUP3res nfs3_lookup_res;
1267 
1268         /* RFS3_ACCESS = 4 */
1269         ACCESS3res nfs3_access_res;
1270 
1271         /* RFS3_READLINK = 5 */
1272         READLINK3res nfs3_readlink_res;
1273 
1274         /* RFS3_READ = 6 */
1275         READ3res nfs3_read_res;
1276 
1277         /* RFS3_WRITE = 7 */
1278         WRITE3res nfs3_write_res;
1279 
1280         /* RFS3_CREATE = 8 */
1281         CREATE3res nfs3_create_res;
1282 
1283         /* RFS3_MKDIR = 9 */
1284         MKDIR3res nfs3_mkdir_res;
1285 
1286         /* RFS3_SYMLINK = 10 */
1287         SYMLINK3res nfs3_symlink_res;
1288 
1289         /* RFS3_MKNOD = 11 */
1290         MKNOD3res nfs3_mknod_res;
1291 
1292         /* RFS3_REMOVE = 12 */
1293         REMOVE3res nfs3_remove_res;
1294 
1295         /* RFS3_RMDIR = 13 */
1296         RMDIR3res nfs3_rmdir_res;
1297 
1298         /* RFS3_RENAME = 14 */
1299         RENAME3res nfs3_rename_res;
1300 
1301         /* RFS3_LINK = 15 */
1302         LINK3res nfs3_link_res;
1303 
1304         /* RFS3_READDIR = 16 */
1305         READDIR3res nfs3_readdir_res;
1306 
1307         /* RFS3_READDIRPLUS = 17 */
1308         READDIRPLUS3res nfs3_readdirplus_res;
1309 
1310         /* RFS3_FSSTAT = 18 */
1311         FSSTAT3res nfs3_fsstat_res;
1312 
1313         /* RFS3_FSINFO = 19 */
1314         FSINFO3res nfs3_fsinfo_res;
1315 
1316         /* RFS3_PATHCONF = 20 */
1317         PATHCONF3res nfs3_pathconf_res;
1318 
1319         /* RFS3_COMMIT = 21 */
1320         COMMIT3res nfs3_commit_res;
1321 
1322         /*
1323          * NFS VERSION 4
1324          */
1325 
1326         /* RFS_NULL = 0 */
1327 
1328         /* RFS4_COMPOUND = 1 */
1329         COMPOUND4res nfs4_compound_res;
1330 
1331 };
1332 
1333 static struct rpc_disptable rfs_disptable[] = {
1334         {sizeof (rfsdisptab_v2) / sizeof (rfsdisptab_v2[0]),
1335             rfscallnames_v2,
1336             &rfsproccnt_v2_ptr, rfsdisptab_v2},
1337         {sizeof (rfsdisptab_v3) / sizeof (rfsdisptab_v3[0]),
1338             rfscallnames_v3,
1339             &rfsproccnt_v3_ptr, rfsdisptab_v3},
1340         {sizeof (rfsdisptab_v4) / sizeof (rfsdisptab_v4[0]),
1341             rfscallnames_v4,
1342             &rfsproccnt_v4_ptr, rfsdisptab_v4},
1343 };
1344 
1345 /*
1346  * If nfs_portmon is set, then clients are required to use privileged
1347  * ports (ports < IPPORT_RESERVED) in order to get NFS services.
1348  *
1349  * N.B.: this attempt to carry forward the already ill-conceived notion
1350  * of privileged ports for TCP/UDP is really quite ineffectual.  Not only
1351  * is it transport-dependent, it's laughably easy to spoof.  If you're
1352  * really interested in security, you must start with secure RPC instead.
1353  */
1354 static int nfs_portmon = 0;
1355 
1356 #ifdef DEBUG
1357 static int cred_hits = 0;
1358 static int cred_misses = 0;
1359 #endif
1360 
1361 
1362 #ifdef DEBUG
1363 /*
1364  * Debug code to allow disabling of rfs_dispatch() use of
1365  * fastxdrargs() and fastxdrres() calls for testing purposes.
1366  */
1367 static int rfs_no_fast_xdrargs = 0;
1368 static int rfs_no_fast_xdrres = 0;
1369 #endif
1370 
1371 union acl_args {
1372         /*
1373          * ACL VERSION 2
1374          */
1375 
1376         /* ACL2_NULL = 0 */
1377 
1378         /* ACL2_GETACL = 1 */
1379         GETACL2args acl2_getacl_args;
1380 
1381         /* ACL2_SETACL = 2 */
1382         SETACL2args acl2_setacl_args;
1383 
1384         /* ACL2_GETATTR = 3 */
1385         GETATTR2args acl2_getattr_args;
1386 
1387         /* ACL2_ACCESS = 4 */
1388         ACCESS2args acl2_access_args;
1389 
1390         /* ACL2_GETXATTRDIR = 5 */
1391         GETXATTRDIR2args acl2_getxattrdir_args;
1392 
1393         /*
1394          * ACL VERSION 3
1395          */
1396 
1397         /* ACL3_NULL = 0 */
1398 
1399         /* ACL3_GETACL = 1 */
1400         GETACL3args acl3_getacl_args;
1401 
1402         /* ACL3_SETACL = 2 */
1403         SETACL3args acl3_setacl;
1404 
1405         /* ACL3_GETXATTRDIR = 3 */
1406         GETXATTRDIR3args acl3_getxattrdir_args;
1407 
1408 };
1409 
1410 union acl_res {
1411         /*
1412          * ACL VERSION 2
1413          */
1414 
1415         /* ACL2_NULL = 0 */
1416 
1417         /* ACL2_GETACL = 1 */
1418         GETACL2res acl2_getacl_res;
1419 
1420         /* ACL2_SETACL = 2 */
1421         SETACL2res acl2_setacl_res;
1422 
1423         /* ACL2_GETATTR = 3 */
1424         GETATTR2res acl2_getattr_res;
1425 
1426         /* ACL2_ACCESS = 4 */
1427         ACCESS2res acl2_access_res;
1428 
1429         /* ACL2_GETXATTRDIR = 5 */
1430         GETXATTRDIR2args acl2_getxattrdir_res;
1431 
1432         /*
1433          * ACL VERSION 3
1434          */
1435 
1436         /* ACL3_NULL = 0 */
1437 
1438         /* ACL3_GETACL = 1 */
1439         GETACL3res acl3_getacl_res;
1440 
1441         /* ACL3_SETACL = 2 */
1442         SETACL3res acl3_setacl_res;
1443 
1444         /* ACL3_GETXATTRDIR = 3 */
1445         GETXATTRDIR3res acl3_getxattrdir_res;
1446 
1447 };
1448 
1449 static bool_t
1450 auth_tooweak(struct svc_req *req, char *res)
1451 {
1452 
1453         if (req->rq_vers == NFS_VERSION && req->rq_proc == RFS_LOOKUP) {
1454                 struct nfsdiropres *dr = (struct nfsdiropres *)res;
1455                 if (dr->dr_status == WNFSERR_CLNT_FLAVOR)
1456                         return (TRUE);
1457         } else if (req->rq_vers == NFS_V3 && req->rq_proc == NFSPROC3_LOOKUP) {
1458                 LOOKUP3res *resp = (LOOKUP3res *)res;
1459                 if (resp->status == WNFSERR_CLNT_FLAVOR)
1460                         return (TRUE);
1461         }
1462         return (FALSE);
1463 }
1464 
1465 
1466 static void
1467 common_dispatch(struct svc_req *req, SVCXPRT *xprt, rpcvers_t min_vers,
1468                 rpcvers_t max_vers, char *pgmname,
1469                 struct rpc_disptable *disptable)
1470 {
1471         int which;
1472         rpcvers_t vers;
1473         char *args;
1474         union {
1475                         union rfs_args ra;
1476                         union acl_args aa;
1477                 } args_buf;
1478         char *res;
1479         union {
1480                         union rfs_res rr;
1481                         union acl_res ar;
1482                 } res_buf;
1483         struct rpcdisp *disp = NULL;
1484         int dis_flags = 0;
1485         cred_t *cr;
1486         int error = 0;
1487         int anon_ok;
1488         struct exportinfo *exi = NULL;
1489         unsigned int nfslog_rec_id;
1490         int dupstat;
1491         struct dupreq *dr;
1492         int authres;
1493         bool_t publicfh_ok = FALSE;
1494         enum_t auth_flavor;
1495         bool_t dupcached = FALSE;
1496         struct netbuf   nb;
1497         bool_t logging_enabled = FALSE;
1498         struct exportinfo *nfslog_exi = NULL;
1499         char **procnames;
1500         char cbuf[INET6_ADDRSTRLEN];    /* to hold both IPv4 and IPv6 addr */
1501 
1502         vers = req->rq_vers;
1503 
1504         if (vers < min_vers || vers > max_vers) {
1505                 svcerr_progvers(req->rq_xprt, min_vers, max_vers);
1506                 error++;
1507                 cmn_err(CE_NOTE, "%s: bad version number %u", pgmname, vers);
1508                 goto done;
1509         }
1510         vers -= min_vers;
1511 
1512         which = req->rq_proc;
1513         if (which < 0 || which >= disptable[(int)vers].dis_nprocs) {
1514                 svcerr_noproc(req->rq_xprt);
1515                 error++;
1516                 goto done;
1517         }
1518 
1519         (*(disptable[(int)vers].dis_proccntp))[which].value.ui64++;
1520 
1521         disp = &disptable[(int)vers].dis_table[which];
1522         procnames = disptable[(int)vers].dis_procnames;
1523 
1524         auth_flavor = req->rq_cred.oa_flavor;
1525 
1526         /*
1527          * Deserialize into the args struct.
1528          */
1529         args = (char *)&args_buf;
1530 
1531 #ifdef DEBUG
1532         if (rfs_no_fast_xdrargs || (auth_flavor == RPCSEC_GSS) ||
1533             disp->dis_fastxdrargs == NULL_xdrproc_t ||
1534             !SVC_GETARGS(xprt, disp->dis_fastxdrargs, (char *)&args))
1535 #else
1536         if ((auth_flavor == RPCSEC_GSS) ||
1537             disp->dis_fastxdrargs == NULL_xdrproc_t ||
1538             !SVC_GETARGS(xprt, disp->dis_fastxdrargs, (char *)&args))
1539 #endif
1540         {
1541                 bzero(args, disp->dis_argsz);
1542                 if (!SVC_GETARGS(xprt, disp->dis_xdrargs, args)) {
1543                         error++;
1544                         /*
1545                          * Check if we are outside our capabilities.
1546                          */
1547                         if (rfs4_minorvers_mismatch(req, xprt, (void *)args))
1548                                 goto done;
1549 
1550                         svcerr_decode(xprt);
1551                         cmn_err(CE_NOTE,
1552                             "Failed to decode arguments for %s version %u "
1553                             "procedure %s client %s%s",
1554                             pgmname, vers + min_vers, procnames[which],
1555                             client_name(req), client_addr(req, cbuf));
1556                         goto done;
1557                 }
1558         }
1559 
1560         /*
1561          * If Version 4 use that specific dispatch function.
1562          */
1563         if (req->rq_vers == 4) {
1564                 error += rfs4_dispatch(disp, req, xprt, args);
1565                 goto done;
1566         }
1567 
1568         dis_flags = disp->dis_flags;
1569 
1570         /*
1571          * Find export information and check authentication,
1572          * setting the credential if everything is ok.
1573          */
1574         if (disp->dis_getfh != NULL) {
1575                 void *fh;
1576                 fsid_t *fsid;
1577                 fid_t *fid, *xfid;
1578                 fhandle_t *fh2;
1579                 nfs_fh3 *fh3;
1580 
1581                 fh = (*disp->dis_getfh)(args);
1582                 switch (req->rq_vers) {
1583                 case NFS_VERSION:
1584                         fh2 = (fhandle_t *)fh;
1585                         fsid = &fh2->fh_fsid;
1586                         fid = (fid_t *)&fh2->fh_len;
1587                         xfid = (fid_t *)&fh2->fh_xlen;
1588                         break;
1589                 case NFS_V3:
1590                         fh3 = (nfs_fh3 *)fh;
1591                         fsid = &fh3->fh3_fsid;
1592                         fid = FH3TOFIDP(fh3);
1593                         xfid = FH3TOXFIDP(fh3);
1594                         break;
1595                 }
1596 
1597                 /*
1598                  * Fix for bug 1038302 - corbin
1599                  * There is a problem here if anonymous access is
1600                  * disallowed.  If the current request is part of the
1601                  * client's mount process for the requested filesystem,
1602                  * then it will carry root (uid 0) credentials on it, and
1603                  * will be denied by checkauth if that client does not
1604                  * have explicit root=0 permission.  This will cause the
1605                  * client's mount operation to fail.  As a work-around,
1606                  * we check here to see if the request is a getattr or
1607                  * statfs operation on the exported vnode itself, and
1608                  * pass a flag to checkauth with the result of this test.
1609                  *
1610                  * The filehandle refers to the mountpoint itself if
1611                  * the fh_data and fh_xdata portions of the filehandle
1612                  * are equal.
1613                  *
1614                  * Added anon_ok argument to checkauth().
1615                  */
1616 
1617                 if ((dis_flags & RPC_ALLOWANON) && EQFID(fid, xfid))
1618                         anon_ok = 1;
1619                 else
1620                         anon_ok = 0;
1621 
1622                 cr = xprt->xp_cred;
1623                 ASSERT(cr != NULL);
1624 #ifdef DEBUG
1625                 if (crgetref(cr) != 1) {
1626                         crfree(cr);
1627                         cr = crget();
1628                         xprt->xp_cred = cr;
1629                         cred_misses++;
1630                 } else
1631                         cred_hits++;
1632 #else
1633                 if (crgetref(cr) != 1) {
1634                         crfree(cr);
1635                         cr = crget();
1636                         xprt->xp_cred = cr;
1637                 }
1638 #endif
1639 
1640                 exi = checkexport(fsid, xfid);
1641 
1642                 if (exi != NULL) {
1643                         publicfh_ok = PUBLICFH_CHECK(disp, exi, fsid, xfid);
1644 
1645                         /*
1646                          * Don't allow non-V4 clients access
1647                          * to pseudo exports
1648                          */
1649                         if (PSEUDO(exi)) {
1650                                 svcerr_weakauth(xprt);
1651                                 error++;
1652                                 goto done;
1653                         }
1654 
1655                         authres = checkauth(exi, req, cr, anon_ok, publicfh_ok);
1656                         /*
1657                          * authres >  0: authentication OK - proceed
1658                          * authres == 0: authentication weak - return error
1659                          * authres <  0: authentication timeout - drop
1660                          */
1661                         if (authres <= 0) {
1662                                 if (authres == 0) {
1663                                         svcerr_weakauth(xprt);
1664                                         error++;
1665                                 }
1666                                 goto done;
1667                         }
1668                 }
1669         } else
1670                 cr = NULL;
1671 
1672         if ((dis_flags & RPC_MAPRESP) && (auth_flavor != RPCSEC_GSS)) {
1673                 res = (char *)SVC_GETRES(xprt, disp->dis_ressz);
1674                 if (res == NULL)
1675                         res = (char *)&res_buf;
1676         } else
1677                 res = (char *)&res_buf;
1678 
1679         if (!(dis_flags & RPC_IDEMPOTENT)) {
1680                 dupstat = SVC_DUP_EXT(xprt, req, res, disp->dis_ressz, &dr,
1681                     &dupcached);
1682 
1683                 switch (dupstat) {
1684                 case DUP_ERROR:
1685                         svcerr_systemerr(xprt);
1686                         error++;
1687                         goto done;
1688                         /* NOTREACHED */
1689                 case DUP_INPROGRESS:
1690                         if (res != (char *)&res_buf)
1691                                 SVC_FREERES(xprt);
1692                         error++;
1693                         goto done;
1694                         /* NOTREACHED */
1695                 case DUP_NEW:
1696                 case DUP_DROP:
1697                         curthread->t_flag |= T_DONTPEND;
1698 
1699                         (*disp->dis_proc)(args, res, exi, req, cr);
1700 
1701                         curthread->t_flag &= ~T_DONTPEND;
1702                         if (curthread->t_flag & T_WOULDBLOCK) {
1703                                 curthread->t_flag &= ~T_WOULDBLOCK;
1704                                 SVC_DUPDONE_EXT(xprt, dr, res, NULL,
1705                                     disp->dis_ressz, DUP_DROP);
1706                                 if (res != (char *)&res_buf)
1707                                         SVC_FREERES(xprt);
1708                                 error++;
1709                                 goto done;
1710                         }
1711                         if (dis_flags & RPC_AVOIDWORK) {
1712                                 SVC_DUPDONE_EXT(xprt, dr, res, NULL,
1713                                     disp->dis_ressz, DUP_DROP);
1714                         } else {
1715                                 SVC_DUPDONE_EXT(xprt, dr, res,
1716                                     disp->dis_resfree == nullfree ? NULL :
1717                                     disp->dis_resfree,
1718                                     disp->dis_ressz, DUP_DONE);
1719                                 dupcached = TRUE;
1720                         }
1721                         break;
1722                 case DUP_DONE:
1723                         break;
1724                 }
1725 
1726         } else {
1727                 curthread->t_flag |= T_DONTPEND;
1728 
1729                 (*disp->dis_proc)(args, res, exi, req, cr);
1730 
1731                 curthread->t_flag &= ~T_DONTPEND;
1732                 if (curthread->t_flag & T_WOULDBLOCK) {
1733                         curthread->t_flag &= ~T_WOULDBLOCK;
1734                         if (res != (char *)&res_buf)
1735                                 SVC_FREERES(xprt);
1736                         error++;
1737                         goto done;
1738                 }
1739         }
1740 
1741         if (auth_tooweak(req, res)) {
1742                 svcerr_weakauth(xprt);
1743                 error++;
1744                 goto done;
1745         }
1746 
1747         /*
1748          * Check to see if logging has been enabled on the server.
1749          * If so, then obtain the export info struct to be used for
1750          * the later writing of the log record.  This is done for
1751          * the case that a lookup is done across a non-logged public
1752          * file system.
1753          */
1754         if (nfslog_buffer_list != NULL) {
1755                 nfslog_exi = nfslog_get_exi(exi, req, res, &nfslog_rec_id);
1756                 /*
1757                  * Is logging enabled?
1758                  */
1759                 logging_enabled = (nfslog_exi != NULL);
1760 
1761                 /*
1762                  * Copy the netbuf for logging purposes, before it is
1763                  * freed by svc_sendreply().
1764                  */
1765                 if (logging_enabled) {
1766                         NFSLOG_COPY_NETBUF(nfslog_exi, xprt, &nb);
1767                         /*
1768                          * If RPC_MAPRESP flag set (i.e. in V2 ops) the
1769                          * res gets copied directly into the mbuf and
1770                          * may be freed soon after the sendreply. So we
1771                          * must copy it here to a safe place...
1772                          */
1773                         if (res != (char *)&res_buf) {
1774                                 bcopy(res, (char *)&res_buf, disp->dis_ressz);
1775                         }
1776                 }
1777         }
1778 
1779         /*
1780          * Serialize and send results struct
1781          */
1782 #ifdef DEBUG
1783         if (rfs_no_fast_xdrres == 0 && res != (char *)&res_buf)
1784 #else
1785         if (res != (char *)&res_buf)
1786 #endif
1787         {
1788                 if (!svc_sendreply(xprt, disp->dis_fastxdrres, res)) {
1789                         cmn_err(CE_NOTE, "%s: bad sendreply", pgmname);
1790                         svcerr_systemerr(xprt);
1791                         error++;
1792                 }
1793         } else {
1794                 if (!svc_sendreply(xprt, disp->dis_xdrres, res)) {
1795                         cmn_err(CE_NOTE, "%s: bad sendreply", pgmname);
1796                         svcerr_systemerr(xprt);
1797                         error++;
1798                 }
1799         }
1800 
1801         /*
1802          * Log if needed
1803          */
1804         if (logging_enabled) {
1805                 nfslog_write_record(nfslog_exi, req, args, (char *)&res_buf,
1806                     cr, &nb, nfslog_rec_id, NFSLOG_ONE_BUFFER);
1807                 exi_rele(nfslog_exi);
1808                 kmem_free((&nb)->buf, (&nb)->len);
1809         }
1810 
1811         /*
1812          * Free results struct. With the addition of NFS V4 we can
1813          * have non-idempotent procedures with functions.
1814          */
1815         if (disp->dis_resfree != nullfree && dupcached == FALSE) {
1816                 (*disp->dis_resfree)(res);
1817         }
1818 
1819 done:
1820         /*
1821          * Free arguments struct
1822          */
1823         if (disp) {
1824                 if (!SVC_FREEARGS(xprt, disp->dis_xdrargs, args)) {
1825                         cmn_err(CE_NOTE, "%s: bad freeargs", pgmname);
1826                         error++;
1827                 }
1828         } else {
1829                 if (!SVC_FREEARGS(xprt, (xdrproc_t)0, (caddr_t)0)) {
1830                         cmn_err(CE_NOTE, "%s: bad freeargs", pgmname);
1831                         error++;
1832                 }
1833         }
1834 
1835         if (exi != NULL)
1836                 exi_rele(exi);
1837 
1838         global_svstat_ptr[req->rq_vers][NFS_BADCALLS].value.ui64 += error;
1839 
1840         global_svstat_ptr[req->rq_vers][NFS_CALLS].value.ui64++;
1841 }
1842 
1843 static void
1844 rfs_dispatch(struct svc_req *req, SVCXPRT *xprt)
1845 {
1846         common_dispatch(req, xprt, NFS_VERSMIN, NFS_VERSMAX,
1847             "NFS", rfs_disptable);
1848 }
1849 
1850 static char *aclcallnames_v2[] = {
1851         "ACL2_NULL",
1852         "ACL2_GETACL",
1853         "ACL2_SETACL",
1854         "ACL2_GETATTR",
1855         "ACL2_ACCESS",
1856         "ACL2_GETXATTRDIR"
1857 };
1858 
1859 static struct rpcdisp acldisptab_v2[] = {
1860         /*
1861          * ACL VERSION 2
1862          */
1863 
1864         /* ACL2_NULL = 0 */
1865         {rpc_null,
1866             xdr_void, NULL_xdrproc_t, 0,
1867             xdr_void, NULL_xdrproc_t, 0,
1868             nullfree, RPC_IDEMPOTENT,
1869             0},
1870 
1871         /* ACL2_GETACL = 1 */
1872         {acl2_getacl,
1873             xdr_GETACL2args, xdr_fastGETACL2args, sizeof (GETACL2args),
1874             xdr_GETACL2res, NULL_xdrproc_t, sizeof (GETACL2res),
1875             acl2_getacl_free, RPC_IDEMPOTENT,
1876             acl2_getacl_getfh},
1877 
1878         /* ACL2_SETACL = 2 */
1879         {acl2_setacl,
1880             xdr_SETACL2args, NULL_xdrproc_t, sizeof (SETACL2args),
1881 #ifdef _LITTLE_ENDIAN
1882             xdr_SETACL2res, xdr_fastSETACL2res, sizeof (SETACL2res),
1883 #else
1884             xdr_SETACL2res, NULL_xdrproc_t, sizeof (SETACL2res),
1885 #endif
1886             nullfree, RPC_MAPRESP,
1887             acl2_setacl_getfh},
1888 
1889         /* ACL2_GETATTR = 3 */
1890         {acl2_getattr,
1891             xdr_GETATTR2args, xdr_fastGETATTR2args, sizeof (GETATTR2args),
1892 #ifdef _LITTLE_ENDIAN
1893             xdr_GETATTR2res, xdr_fastGETATTR2res, sizeof (GETATTR2res),
1894 #else
1895             xdr_GETATTR2res, NULL_xdrproc_t, sizeof (GETATTR2res),
1896 #endif
1897             nullfree, RPC_IDEMPOTENT|RPC_ALLOWANON|RPC_MAPRESP,
1898             acl2_getattr_getfh},
1899 
1900         /* ACL2_ACCESS = 4 */
1901         {acl2_access,
1902             xdr_ACCESS2args, xdr_fastACCESS2args, sizeof (ACCESS2args),
1903 #ifdef _LITTLE_ENDIAN
1904             xdr_ACCESS2res, xdr_fastACCESS2res, sizeof (ACCESS2res),
1905 #else
1906             xdr_ACCESS2res, NULL_xdrproc_t, sizeof (ACCESS2res),
1907 #endif
1908             nullfree, RPC_IDEMPOTENT|RPC_MAPRESP,
1909             acl2_access_getfh},
1910 
1911         /* ACL2_GETXATTRDIR = 5 */
1912         {acl2_getxattrdir,
1913             xdr_GETXATTRDIR2args, NULL_xdrproc_t, sizeof (GETXATTRDIR2args),
1914             xdr_GETXATTRDIR2res, NULL_xdrproc_t, sizeof (GETXATTRDIR2res),
1915             nullfree, RPC_IDEMPOTENT,
1916             acl2_getxattrdir_getfh},
1917 };
1918 
1919 static char *aclcallnames_v3[] = {
1920         "ACL3_NULL",
1921         "ACL3_GETACL",
1922         "ACL3_SETACL",
1923         "ACL3_GETXATTRDIR"
1924 };
1925 
1926 static struct rpcdisp acldisptab_v3[] = {
1927         /*
1928          * ACL VERSION 3
1929          */
1930 
1931         /* ACL3_NULL = 0 */
1932         {rpc_null,
1933             xdr_void, NULL_xdrproc_t, 0,
1934             xdr_void, NULL_xdrproc_t, 0,
1935             nullfree, RPC_IDEMPOTENT,
1936             0},
1937 
1938         /* ACL3_GETACL = 1 */
1939         {acl3_getacl,
1940             xdr_GETACL3args, NULL_xdrproc_t, sizeof (GETACL3args),
1941             xdr_GETACL3res, NULL_xdrproc_t, sizeof (GETACL3res),
1942             acl3_getacl_free, RPC_IDEMPOTENT,
1943             acl3_getacl_getfh},
1944 
1945         /* ACL3_SETACL = 2 */
1946         {acl3_setacl,
1947             xdr_SETACL3args, NULL_xdrproc_t, sizeof (SETACL3args),
1948             xdr_SETACL3res, NULL_xdrproc_t, sizeof (SETACL3res),
1949             nullfree, 0,
1950             acl3_setacl_getfh},
1951 
1952         /* ACL3_GETXATTRDIR = 3 */
1953         {acl3_getxattrdir,
1954             xdr_GETXATTRDIR3args, NULL_xdrproc_t, sizeof (GETXATTRDIR3args),
1955             xdr_GETXATTRDIR3res, NULL_xdrproc_t, sizeof (GETXATTRDIR3res),
1956             nullfree, RPC_IDEMPOTENT,
1957             acl3_getxattrdir_getfh},
1958 };
1959 
1960 static struct rpc_disptable acl_disptable[] = {
1961         {sizeof (acldisptab_v2) / sizeof (acldisptab_v2[0]),
1962                 aclcallnames_v2,
1963                 &aclproccnt_v2_ptr, acldisptab_v2},
1964         {sizeof (acldisptab_v3) / sizeof (acldisptab_v3[0]),
1965                 aclcallnames_v3,
1966                 &aclproccnt_v3_ptr, acldisptab_v3},
1967 };
1968 
1969 static void
1970 acl_dispatch(struct svc_req *req, SVCXPRT *xprt)
1971 {
1972         common_dispatch(req, xprt, NFS_ACL_VERSMIN, NFS_ACL_VERSMAX,
1973             "ACL", acl_disptable);
1974 }
1975 
1976 int
1977 checkwin(int flavor, int window, struct svc_req *req)
1978 {
1979         struct authdes_cred *adc;
1980 
1981         switch (flavor) {
1982         case AUTH_DES:
1983                 adc = (struct authdes_cred *)req->rq_clntcred;
1984                 if (adc->adc_fullname.window > window)
1985                         return (0);
1986                 break;
1987 
1988         default:
1989                 break;
1990         }
1991         return (1);
1992 }
1993 
1994 
1995 /*
1996  * checkauth() will check the access permission against the export
1997  * information.  Then map root uid/gid to appropriate uid/gid.
1998  *
1999  * This routine is used by NFS V3 and V2 code.
2000  */
2001 static int
2002 checkauth(struct exportinfo *exi, struct svc_req *req, cred_t *cr, int anon_ok,
2003     bool_t publicfh_ok)
2004 {
2005         int i, nfsflavor, rpcflavor, stat, access;
2006         struct secinfo *secp;
2007         caddr_t principal;
2008         char buf[INET6_ADDRSTRLEN]; /* to hold both IPv4 and IPv6 addr */
2009         int anon_res = 0;
2010 
2011         /*
2012          * Check for privileged port number
2013          * N.B.:  this assumes that we know the format of a netbuf.
2014          */
2015         if (nfs_portmon) {
2016                 struct sockaddr *ca;
2017                 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2018 
2019                 if (ca == NULL)
2020                         return (0);
2021 
2022                 if ((ca->sa_family == AF_INET &&
2023                     ntohs(((struct sockaddr_in *)ca)->sin_port) >=
2024                     IPPORT_RESERVED) ||
2025                     (ca->sa_family == AF_INET6 &&
2026                     ntohs(((struct sockaddr_in6 *)ca)->sin6_port) >=
2027                     IPPORT_RESERVED)) {
2028                         cmn_err(CE_NOTE,
2029                             "nfs_server: client %s%ssent NFS request from "
2030                             "unprivileged port",
2031                             client_name(req), client_addr(req, buf));
2032                         return (0);
2033                 }
2034         }
2035 
2036         /*
2037          *  return 1 on success or 0 on failure
2038          */
2039         stat = sec_svc_getcred(req, cr, &principal, &nfsflavor);
2040 
2041         /*
2042          * A failed AUTH_UNIX svc_get_cred() implies we couldn't set
2043          * the credentials; below we map that to anonymous.
2044          */
2045         if (!stat && nfsflavor != AUTH_UNIX) {
2046                 cmn_err(CE_NOTE,
2047                     "nfs_server: couldn't get unix cred for %s",
2048                     client_name(req));
2049                 return (0);
2050         }
2051 
2052         /*
2053          * Short circuit checkauth() on operations that support the
2054          * public filehandle, and if the request for that operation
2055          * is using the public filehandle. Note that we must call
2056          * sec_svc_getcred() first so that xp_cookie is set to the
2057          * right value. Normally xp_cookie is just the RPC flavor
2058          * of the the request, but in the case of RPCSEC_GSS it
2059          * could be a pseudo flavor.
2060          */
2061         if (publicfh_ok)
2062                 return (1);
2063 
2064         rpcflavor = req->rq_cred.oa_flavor;
2065         /*
2066          * Check if the auth flavor is valid for this export
2067          */
2068         access = nfsauth_access(exi, req);
2069         if (access & NFSAUTH_DROP)
2070                 return (-1);    /* drop the request */
2071 
2072         if (access & NFSAUTH_DENIED) {
2073                 /*
2074                  * If anon_ok == 1 and we got NFSAUTH_DENIED, it was
2075                  * probably due to the flavor not matching during the
2076                  * the mount attempt. So map the flavor to AUTH_NONE
2077                  * so that the credentials get mapped to the anonymous
2078                  * user.
2079                  */
2080                 if (anon_ok == 1)
2081                         rpcflavor = AUTH_NONE;
2082                 else
2083                         return (0);     /* deny access */
2084 
2085         } else if (access & NFSAUTH_MAPNONE) {
2086                 /*
2087                  * Access was granted even though the flavor mismatched
2088                  * because AUTH_NONE was one of the exported flavors.
2089                  */
2090                 rpcflavor = AUTH_NONE;
2091 
2092         } else if (access & NFSAUTH_WRONGSEC) {
2093                 /*
2094                  * NFSAUTH_WRONGSEC is used for NFSv4. If we get here,
2095                  * it means a client ignored the list of allowed flavors
2096                  * returned via the MOUNT protocol. So we just disallow it!
2097                  */
2098                 return (0);
2099         }
2100 
2101         switch (rpcflavor) {
2102         case AUTH_NONE:
2103                 anon_res = crsetugid(cr, exi->exi_export.ex_anon,
2104                     exi->exi_export.ex_anon);
2105                 (void) crsetgroups(cr, 0, NULL);
2106                 break;
2107 
2108         case AUTH_UNIX:
2109                 if (!stat || crgetuid(cr) == 0 && !(access & NFSAUTH_ROOT)) {
2110                         anon_res = crsetugid(cr, exi->exi_export.ex_anon,
2111                             exi->exi_export.ex_anon);
2112                         (void) crsetgroups(cr, 0, NULL);
2113                 } else if (!stat || crgetuid(cr) == 0 &&
2114                     access & NFSAUTH_ROOT) {
2115                         /*
2116                          * It is root, so apply rootid to get real UID
2117                          * Find the secinfo structure.  We should be able
2118                          * to find it by the time we reach here.
2119                          * nfsauth_access() has done the checking.
2120                          */
2121                         secp = NULL;
2122                         for (i = 0; i < exi->exi_export.ex_seccnt; i++) {
2123                                 struct secinfo *sptr;
2124                                 sptr = &exi->exi_export.ex_secinfo[i];
2125                                 if (sptr->s_secinfo.sc_nfsnum == nfsflavor) {
2126                                         secp = sptr;
2127                                         break;
2128                                 }
2129                         }
2130                         if (secp != NULL) {
2131                                 (void) crsetugid(cr, secp->s_rootid,
2132                                     secp->s_rootid);
2133                                 (void) crsetgroups(cr, 0, NULL);
2134                         }
2135                 }
2136                 break;
2137 
2138         case AUTH_DES:
2139         case RPCSEC_GSS:
2140                 /*
2141                  *  Find the secinfo structure.  We should be able
2142                  *  to find it by the time we reach here.
2143                  *  nfsauth_access() has done the checking.
2144                  */
2145                 secp = NULL;
2146                 for (i = 0; i < exi->exi_export.ex_seccnt; i++) {
2147                         if (exi->exi_export.ex_secinfo[i].s_secinfo.sc_nfsnum ==
2148                             nfsflavor) {
2149                                 secp = &exi->exi_export.ex_secinfo[i];
2150                                 break;
2151                         }
2152                 }
2153 
2154                 if (!secp) {
2155                         cmn_err(CE_NOTE, "nfs_server: client %s%shad "
2156                             "no secinfo data for flavor %d",
2157                             client_name(req), client_addr(req, buf),
2158                             nfsflavor);
2159                         return (0);
2160                 }
2161 
2162                 if (!checkwin(rpcflavor, secp->s_window, req)) {
2163                         cmn_err(CE_NOTE,
2164                             "nfs_server: client %s%sused invalid "
2165                             "auth window value",
2166                             client_name(req), client_addr(req, buf));
2167                         return (0);
2168                 }
2169 
2170                 /*
2171                  * Map root principals listed in the share's root= list to root,
2172                  * and map any others principals that were mapped to root by RPC
2173                  * to anon.
2174                  */
2175                 if (principal && sec_svc_inrootlist(rpcflavor, principal,
2176                     secp->s_rootcnt, secp->s_rootnames)) {
2177                         if (crgetuid(cr) == 0 && secp->s_rootid == 0)
2178                                 return (1);
2179 
2180 
2181                         (void) crsetugid(cr, secp->s_rootid, secp->s_rootid);
2182 
2183                         /*
2184                          * NOTE: If and when kernel-land privilege tracing is
2185                          * added this may have to be replaced with code that
2186                          * retrieves root's supplementary groups (e.g., using
2187                          * kgss_get_group_info().  In the meantime principals
2188                          * mapped to uid 0 get all privileges, so setting cr's
2189                          * supplementary groups for them does nothing.
2190                          */
2191                         (void) crsetgroups(cr, 0, NULL);
2192 
2193                         return (1);
2194                 }
2195 
2196                 /*
2197                  * Not a root princ, or not in root list, map UID 0/nobody to
2198                  * the anon ID for the share.  (RPC sets cr's UIDs and GIDs to
2199                  * UID_NOBODY and GID_NOBODY, respectively.)
2200                  */
2201                 if (crgetuid(cr) != 0 &&
2202                     (crgetuid(cr) != UID_NOBODY || crgetgid(cr) != GID_NOBODY))
2203                         return (1);
2204 
2205                 anon_res = crsetugid(cr, exi->exi_export.ex_anon,
2206                     exi->exi_export.ex_anon);
2207                 (void) crsetgroups(cr, 0, NULL);
2208                 break;
2209         default:
2210                 return (0);
2211         } /* switch on rpcflavor */
2212 
2213         /*
2214          * Even if anon access is disallowed via ex_anon == -1, we allow
2215          * this access if anon_ok is set.  So set creds to the default
2216          * "nobody" id.
2217          */
2218         if (anon_res != 0) {
2219                 if (anon_ok == 0) {
2220                         cmn_err(CE_NOTE,
2221                             "nfs_server: client %s%ssent wrong "
2222                             "authentication for %s",
2223                             client_name(req), client_addr(req, buf),
2224                             exi->exi_export.ex_path ?
2225                             exi->exi_export.ex_path : "?");
2226                         return (0);
2227                 }
2228 
2229                 if (crsetugid(cr, UID_NOBODY, GID_NOBODY) != 0)
2230                         return (0);
2231         }
2232 
2233         return (1);
2234 }
2235 
2236 /*
2237  * returns 0 on failure, -1 on a drop, -2 on wrong security flavor,
2238  * and 1 on success
2239  */
2240 int
2241 checkauth4(struct compound_state *cs, struct svc_req *req)
2242 {
2243         int i, rpcflavor, access;
2244         struct secinfo *secp;
2245         char buf[MAXHOST + 1];
2246         int anon_res = 0, nfsflavor;
2247         struct exportinfo *exi;
2248         cred_t  *cr;
2249         caddr_t principal;
2250 
2251         exi = cs->exi;
2252         cr = cs->cr;
2253         principal = cs->principal;
2254         nfsflavor = cs->nfsflavor;
2255 
2256         ASSERT(cr != NULL);
2257 
2258         rpcflavor = req->rq_cred.oa_flavor;
2259         cs->access &= ~CS_ACCESS_LIMITED;
2260 
2261         /*
2262          * Check for privileged port number
2263          * N.B.:  this assumes that we know the format of a netbuf.
2264          */
2265         if (nfs_portmon) {
2266                 struct sockaddr *ca;
2267                 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2268 
2269                 if (ca == NULL)
2270                         return (0);
2271 
2272                 if ((ca->sa_family == AF_INET &&
2273                     ntohs(((struct sockaddr_in *)ca)->sin_port) >=
2274                     IPPORT_RESERVED) ||
2275                     (ca->sa_family == AF_INET6 &&
2276                     ntohs(((struct sockaddr_in6 *)ca)->sin6_port) >=
2277                     IPPORT_RESERVED)) {
2278                         cmn_err(CE_NOTE,
2279                             "nfs_server: client %s%ssent NFSv4 request from "
2280                             "unprivileged port",
2281                             client_name(req), client_addr(req, buf));
2282                         return (0);
2283                 }
2284         }
2285 
2286         /*
2287          * Check the access right per auth flavor on the vnode of
2288          * this export for the given request.
2289          */
2290         access = nfsauth4_access(cs->exi, cs->vp, req);
2291 
2292         if (access & NFSAUTH_WRONGSEC)
2293                 return (-2);    /* no access for this security flavor */
2294 
2295         if (access & NFSAUTH_DROP)
2296                 return (-1);    /* drop the request */
2297 
2298         if (access & NFSAUTH_DENIED) {
2299 
2300                 if (exi->exi_export.ex_seccnt > 0)
2301                         return (0);     /* deny access */
2302 
2303         } else if (access & NFSAUTH_LIMITED) {
2304 
2305                 cs->access |= CS_ACCESS_LIMITED;
2306 
2307         } else if (access & NFSAUTH_MAPNONE) {
2308                 /*
2309                  * Access was granted even though the flavor mismatched
2310                  * because AUTH_NONE was one of the exported flavors.
2311                  */
2312                 rpcflavor = AUTH_NONE;
2313         }
2314 
2315         /*
2316          * XXX probably need to redo some of it for nfsv4?
2317          * return 1 on success or 0 on failure
2318          */
2319 
2320         switch (rpcflavor) {
2321         case AUTH_NONE:
2322                 anon_res = crsetugid(cr, exi->exi_export.ex_anon,
2323                     exi->exi_export.ex_anon);
2324                 (void) crsetgroups(cr, 0, NULL);
2325                 break;
2326 
2327         case AUTH_UNIX:
2328                 if (crgetuid(cr) == 0 && !(access & NFSAUTH_ROOT)) {
2329                         anon_res = crsetugid(cr, exi->exi_export.ex_anon,
2330                             exi->exi_export.ex_anon);
2331                         (void) crsetgroups(cr, 0, NULL);
2332                 } else if (crgetuid(cr) == 0 && access & NFSAUTH_ROOT) {
2333                         /*
2334                          * It is root, so apply rootid to get real UID
2335                          * Find the secinfo structure.  We should be able
2336                          * to find it by the time we reach here.
2337                          * nfsauth_access() has done the checking.
2338                          */
2339                         secp = NULL;
2340                         for (i = 0; i < exi->exi_export.ex_seccnt; i++) {
2341                                 struct secinfo *sptr;
2342                                 sptr = &exi->exi_export.ex_secinfo[i];
2343                                 if (sptr->s_secinfo.sc_nfsnum == nfsflavor) {
2344                                         secp = &exi->exi_export.ex_secinfo[i];
2345                                         break;
2346                                 }
2347                         }
2348                         if (secp != NULL) {
2349                                 (void) crsetugid(cr, secp->s_rootid,
2350                                     secp->s_rootid);
2351                                 (void) crsetgroups(cr, 0, NULL);
2352                         }
2353                 }
2354                 break;
2355 
2356         default:
2357                 /*
2358                  *  Find the secinfo structure.  We should be able
2359                  *  to find it by the time we reach here.
2360                  *  nfsauth_access() has done the checking.
2361                  */
2362                 secp = NULL;
2363                 for (i = 0; i < exi->exi_export.ex_seccnt; i++) {
2364                         if (exi->exi_export.ex_secinfo[i].s_secinfo.sc_nfsnum ==
2365                             nfsflavor) {
2366                                 secp = &exi->exi_export.ex_secinfo[i];
2367                                 break;
2368                         }
2369                 }
2370 
2371                 if (!secp) {
2372                         cmn_err(CE_NOTE, "nfs_server: client %s%shad "
2373                             "no secinfo data for flavor %d",
2374                             client_name(req), client_addr(req, buf),
2375                             nfsflavor);
2376                         return (0);
2377                 }
2378 
2379                 if (!checkwin(rpcflavor, secp->s_window, req)) {
2380                         cmn_err(CE_NOTE,
2381                             "nfs_server: client %s%sused invalid "
2382                             "auth window value",
2383                             client_name(req), client_addr(req, buf));
2384                         return (0);
2385                 }
2386 
2387                 /*
2388                  * Map root principals listed in the share's root= list to root,
2389                  * and map any others principals that were mapped to root by RPC
2390                  * to anon. If not going to anon, set to rootid (root_mapping).
2391                  */
2392                 if (principal && sec_svc_inrootlist(rpcflavor, principal,
2393                     secp->s_rootcnt, secp->s_rootnames)) {
2394                         if (crgetuid(cr) == 0 && secp->s_rootid == 0)
2395                                 return (1);
2396 
2397                         (void) crsetugid(cr, secp->s_rootid, secp->s_rootid);
2398 
2399                         /*
2400                          * NOTE: If and when kernel-land privilege tracing is
2401                          * added this may have to be replaced with code that
2402                          * retrieves root's supplementary groups (e.g., using
2403                          * kgss_get_group_info().  In the meantime principals
2404                          * mapped to uid 0 get all privileges, so setting cr's
2405                          * supplementary groups for them does nothing.
2406                          */
2407                         (void) crsetgroups(cr, 0, NULL);
2408 
2409                         return (1);
2410                 }
2411 
2412                 /*
2413                  * Not a root princ, or not in root list, map UID 0/nobody to
2414                  * the anon ID for the share.  (RPC sets cr's UIDs and GIDs to
2415                  * UID_NOBODY and GID_NOBODY, respectively.)
2416                  */
2417                 if (crgetuid(cr) != 0 &&
2418                     (crgetuid(cr) != UID_NOBODY || crgetgid(cr) != GID_NOBODY))
2419                         return (1);
2420 
2421                 anon_res = crsetugid(cr, exi->exi_export.ex_anon,
2422                     exi->exi_export.ex_anon);
2423                 (void) crsetgroups(cr, 0, NULL);
2424                 break;
2425         } /* switch on rpcflavor */
2426 
2427         /*
2428          * Even if anon access is disallowed via ex_anon == -1, we allow
2429          * this access if anon_ok is set.  So set creds to the default
2430          * "nobody" id.
2431          */
2432 
2433         if (anon_res != 0) {
2434                 cmn_err(CE_NOTE,
2435                     "nfs_server: client %s%ssent wrong "
2436                     "authentication for %s",
2437                     client_name(req), client_addr(req, buf),
2438                     exi->exi_export.ex_path ?
2439                     exi->exi_export.ex_path : "?");
2440                 return (0);
2441         }
2442 
2443         return (1);
2444 }
2445 
2446 
2447 static char *
2448 client_name(struct svc_req *req)
2449 {
2450         char *hostname = NULL;
2451 
2452         /*
2453          * If it's a Unix cred then use the
2454          * hostname from the credential.
2455          */
2456         if (req->rq_cred.oa_flavor == AUTH_UNIX) {
2457                 hostname = ((struct authunix_parms *)
2458                     req->rq_clntcred)->aup_machname;
2459         }
2460         if (hostname == NULL)
2461                 hostname = "";
2462 
2463         return (hostname);
2464 }
2465 
2466 static char *
2467 client_addr(struct svc_req *req, char *buf)
2468 {
2469         struct sockaddr *ca;
2470         uchar_t *b;
2471         char *frontspace = "";
2472 
2473         /*
2474          * We assume we are called in tandem with client_name and the
2475          * format string looks like "...client %s%sblah blah..."
2476          *
2477          * If it's a Unix cred then client_name returned
2478          * a host name, so we need insert a space between host name
2479          * and IP address.
2480          */
2481         if (req->rq_cred.oa_flavor == AUTH_UNIX)
2482                 frontspace = " ";
2483 
2484         /*
2485          * Convert the caller's IP address to a dotted string
2486          */
2487         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2488 
2489         if (ca->sa_family == AF_INET) {
2490                 b = (uchar_t *)&((struct sockaddr_in *)ca)->sin_addr;
2491                 (void) sprintf(buf, "%s(%d.%d.%d.%d) ", frontspace,
2492                     b[0] & 0xFF, b[1] & 0xFF, b[2] & 0xFF, b[3] & 0xFF);
2493         } else if (ca->sa_family == AF_INET6) {
2494                 struct sockaddr_in6 *sin6;
2495                 sin6 = (struct sockaddr_in6 *)ca;
2496                 (void) kinet_ntop6((uchar_t *)&sin6->sin6_addr,
2497                     buf, INET6_ADDRSTRLEN);
2498 
2499         } else {
2500 
2501                 /*
2502                  * No IP address to print. If there was a host name
2503                  * printed, then we print a space.
2504                  */
2505                 (void) sprintf(buf, frontspace);
2506         }
2507 
2508         return (buf);
2509 }
2510 
2511 /*
2512  * NFS Server initialization routine.  This routine should only be called
2513  * once.  It performs the following tasks:
2514  *      - Call sub-initialization routines (localize access to variables)
2515  *      - Initialize all locks
2516  *      - initialize the version 3 write verifier
2517  */
2518 int
2519 nfs_srvinit(void)
2520 {
2521         int error;
2522 
2523         error = nfs_exportinit();
2524         if (error != 0)
2525                 return (error);
2526         error = rfs4_srvrinit();
2527         if (error != 0) {
2528                 nfs_exportfini();
2529                 return (error);
2530         }
2531         rfs_srvrinit();
2532         rfs3_srvrinit();
2533         nfsauth_init();
2534 
2535         /* Init the stuff to control start/stop */
2536         nfs_server_upordown = NFS_SERVER_STOPPED;
2537         mutex_init(&nfs_server_upordown_lock, NULL, MUTEX_DEFAULT, NULL);
2538         cv_init(&nfs_server_upordown_cv, NULL, CV_DEFAULT, NULL);
2539         mutex_init(&rdma_wait_mutex, NULL, MUTEX_DEFAULT, NULL);
2540         cv_init(&rdma_wait_cv, NULL, CV_DEFAULT, NULL);
2541 
2542         return (0);
2543 }
2544 
2545 /*
2546  * NFS Server finalization routine. This routine is called to cleanup the
2547  * initialization work previously performed if the NFS server module could
2548  * not be loaded correctly.
2549  */
2550 void
2551 nfs_srvfini(void)
2552 {
2553         nfsauth_fini();
2554         rfs3_srvrfini();
2555         rfs_srvrfini();
2556         nfs_exportfini();
2557 
2558         mutex_destroy(&nfs_server_upordown_lock);
2559         cv_destroy(&nfs_server_upordown_cv);
2560         mutex_destroy(&rdma_wait_mutex);
2561         cv_destroy(&rdma_wait_cv);
2562 }
2563 
2564 /*
2565  * Set up an iovec array of up to cnt pointers.
2566  */
2567 
2568 void
2569 mblk_to_iov(mblk_t *m, int cnt, struct iovec *iovp)
2570 {
2571         while (m != NULL && cnt-- > 0) {
2572                 iovp->iov_base = (caddr_t)m->b_rptr;
2573                 iovp->iov_len = (m->b_wptr - m->b_rptr);
2574                 iovp++;
2575                 m = m->b_cont;
2576         }
2577 }
2578 
2579 /*
2580  * Common code between NFS Version 2 and NFS Version 3 for the public
2581  * filehandle multicomponent lookups.
2582  */
2583 
2584 /*
2585  * Public filehandle evaluation of a multi-component lookup, following
2586  * symbolic links, if necessary. This may result in a vnode in another
2587  * filesystem, which is OK as long as the other filesystem is exported.
2588  *
2589  * Note that the exi will be set either to NULL or a new reference to the
2590  * exportinfo struct that corresponds to the vnode of the multi-component path.
2591  * It is the callers responsibility to release this reference.
2592  */
2593 int
2594 rfs_publicfh_mclookup(char *p, vnode_t *dvp, cred_t *cr, vnode_t **vpp,
2595     struct exportinfo **exi, struct sec_ol *sec)
2596 {
2597         int pathflag;
2598         vnode_t *mc_dvp = NULL;
2599         vnode_t *realvp;
2600         int error;
2601 
2602         *exi = NULL;
2603 
2604         /*
2605          * check if the given path is a url or native path. Since p is
2606          * modified by MCLpath(), it may be empty after returning from
2607          * there, and should be checked.
2608          */
2609         if ((pathflag = MCLpath(&p)) == -1)
2610                 return (EIO);
2611 
2612         /*
2613          * If pathflag is SECURITY_QUERY, turn the SEC_QUERY bit
2614          * on in sec->sec_flags. This bit will later serve as an
2615          * indication in makefh_ol() or makefh3_ol() to overload the
2616          * filehandle to contain the sec modes used by the server for
2617          * the path.
2618          */
2619         if (pathflag == SECURITY_QUERY) {
2620                 if ((sec->sec_index = (uint_t)(*p)) > 0) {
2621                         sec->sec_flags |= SEC_QUERY;
2622                         p++;
2623                         if ((pathflag = MCLpath(&p)) == -1)
2624                                 return (EIO);
2625                 } else {
2626                         cmn_err(CE_NOTE,
2627                             "nfs_server: invalid security index %d, "
2628                             "violating WebNFS SNEGO protocol.", sec->sec_index);
2629                         return (EIO);
2630                 }
2631         }
2632 
2633         if (p[0] == '\0') {
2634                 error = ENOENT;
2635                 goto publicfh_done;
2636         }
2637 
2638         error = rfs_pathname(p, &mc_dvp, vpp, dvp, cr, pathflag);
2639 
2640         /*
2641          * If name resolves to "/" we get EINVAL since we asked for
2642          * the vnode of the directory that the file is in. Try again
2643          * with NULL directory vnode.
2644          */
2645         if (error == EINVAL) {
2646                 error = rfs_pathname(p, NULL, vpp, dvp, cr, pathflag);
2647                 if (!error) {
2648                         ASSERT(*vpp != NULL);
2649                         if ((*vpp)->v_type == VDIR) {
2650                                 VN_HOLD(*vpp);
2651                                 mc_dvp = *vpp;
2652                         } else {
2653                                 /*
2654                                  * This should not happen, the filesystem is
2655                                  * in an inconsistent state. Fail the lookup
2656                                  * at this point.
2657                                  */
2658                                 VN_RELE(*vpp);
2659                                 error = EINVAL;
2660                         }
2661                 }
2662         }
2663 
2664         if (error)
2665                 goto publicfh_done;
2666 
2667         if (*vpp == NULL) {
2668                 error = ENOENT;
2669                 goto publicfh_done;
2670         }
2671 
2672         ASSERT(mc_dvp != NULL);
2673         ASSERT(*vpp != NULL);
2674 
2675         if ((*vpp)->v_type == VDIR) {
2676                 do {
2677                         /*
2678                          * *vpp may be an AutoFS node, so we perform
2679                          * a VOP_ACCESS() to trigger the mount of the intended
2680                          * filesystem, so we can perform the lookup in the
2681                          * intended filesystem.
2682                          */
2683                         (void) VOP_ACCESS(*vpp, 0, 0, cr, NULL);
2684 
2685                         /*
2686                          * If vnode is covered, get the
2687                          * the topmost vnode.
2688                          */
2689                         if (vn_mountedvfs(*vpp) != NULL) {
2690                                 error = traverse(vpp);
2691                                 if (error) {
2692                                         VN_RELE(*vpp);
2693                                         goto publicfh_done;
2694                                 }
2695                         }
2696 
2697                         if (VOP_REALVP(*vpp, &realvp, NULL) == 0 &&
2698                             realvp != *vpp) {
2699                                 /*
2700                                  * If realvp is different from *vpp
2701                                  * then release our reference on *vpp, so that
2702                                  * the export access check be performed on the
2703                                  * real filesystem instead.
2704                                  */
2705                                 VN_HOLD(realvp);
2706                                 VN_RELE(*vpp);
2707                                 *vpp = realvp;
2708                         } else {
2709                                 break;
2710                         }
2711                 /* LINTED */
2712                 } while (TRUE);
2713 
2714                 /*
2715                  * Let nfs_vptexi() figure what the real parent is.
2716                  */
2717                 VN_RELE(mc_dvp);
2718                 mc_dvp = NULL;
2719 
2720         } else {
2721                 /*
2722                  * If vnode is covered, get the
2723                  * the topmost vnode.
2724                  */
2725                 if (vn_mountedvfs(mc_dvp) != NULL) {
2726                         error = traverse(&mc_dvp);
2727                         if (error) {
2728                                 VN_RELE(*vpp);
2729                                 goto publicfh_done;
2730                         }
2731                 }
2732 
2733                 if (VOP_REALVP(mc_dvp, &realvp, NULL) == 0 &&
2734                     realvp != mc_dvp) {
2735                         /*
2736                          * *vpp is a file, obtain realvp of the parent
2737                          * directory vnode.
2738                          */
2739                         VN_HOLD(realvp);
2740                         VN_RELE(mc_dvp);
2741                         mc_dvp = realvp;
2742                 }
2743         }
2744 
2745         /*
2746          * The pathname may take us from the public filesystem to another.
2747          * If that's the case then just set the exportinfo to the new export
2748          * and build filehandle for it. Thanks to per-access checking there's
2749          * no security issues with doing this. If the client is not allowed
2750          * access to this new export then it will get an access error when it
2751          * tries to use the filehandle
2752          */
2753         if (error = nfs_check_vpexi(mc_dvp, *vpp, kcred, exi)) {
2754                 VN_RELE(*vpp);
2755                 goto publicfh_done;
2756         }
2757 
2758         /*
2759          * Not allowed access to pseudo exports.
2760          */
2761         if (PSEUDO(*exi)) {
2762                 error = ENOENT;
2763                 VN_RELE(*vpp);
2764                 goto publicfh_done;
2765         }
2766 
2767         /*
2768          * Do a lookup for the index file. We know the index option doesn't
2769          * allow paths through handling in the share command, so mc_dvp will
2770          * be the parent for the index file vnode, if its present. Use
2771          * temporary pointers to preserve and reuse the vnode pointers of the
2772          * original directory in case there's no index file. Note that the
2773          * index file is a native path, and should not be interpreted by
2774          * the URL parser in rfs_pathname()
2775          */
2776         if (((*exi)->exi_export.ex_flags & EX_INDEX) &&
2777             ((*vpp)->v_type == VDIR) && (pathflag == URLPATH)) {
2778                 vnode_t *tvp, *tmc_dvp; /* temporary vnode pointers */
2779 
2780                 tmc_dvp = mc_dvp;
2781                 mc_dvp = tvp = *vpp;
2782 
2783                 error = rfs_pathname((*exi)->exi_export.ex_index, NULL, vpp,
2784                     mc_dvp, cr, NATIVEPATH);
2785 
2786                 if (error == ENOENT) {
2787                         *vpp = tvp;
2788                         mc_dvp = tmc_dvp;
2789                         error = 0;
2790                 } else {        /* ok or error other than ENOENT */
2791                         if (tmc_dvp)
2792                                 VN_RELE(tmc_dvp);
2793                         if (error)
2794                                 goto publicfh_done;
2795 
2796                         /*
2797                          * Found a valid vp for index "filename". Sanity check
2798                          * for odd case where a directory is provided as index
2799                          * option argument and leads us to another filesystem
2800                          */
2801 
2802                         /* Release the reference on the old exi value */
2803                         ASSERT(*exi != NULL);
2804                         exi_rele(*exi);
2805 
2806                         if (error = nfs_check_vpexi(mc_dvp, *vpp, kcred, exi)) {
2807                                 VN_RELE(*vpp);
2808                                 goto publicfh_done;
2809                         }
2810                 }
2811         }
2812 
2813 publicfh_done:
2814         if (mc_dvp)
2815                 VN_RELE(mc_dvp);
2816 
2817         return (error);
2818 }
2819 
2820 /*
2821  * Evaluate a multi-component path
2822  */
2823 int
2824 rfs_pathname(
2825         char *path,                     /* pathname to evaluate */
2826         vnode_t **dirvpp,               /* ret for ptr to parent dir vnode */
2827         vnode_t **compvpp,              /* ret for ptr to component vnode */
2828         vnode_t *startdvp,              /* starting vnode */
2829         cred_t *cr,                     /* user's credential */
2830         int pathflag)                   /* flag to identify path, e.g. URL */
2831 {
2832         char namebuf[TYPICALMAXPATHLEN];
2833         struct pathname pn;
2834         int error;
2835 
2836         /*
2837          * If pathname starts with '/', then set startdvp to root.
2838          */
2839         if (*path == '/') {
2840                 while (*path == '/')
2841                         path++;
2842 
2843                 startdvp = rootdir;
2844         }
2845 
2846         error = pn_get_buf(path, UIO_SYSSPACE, &pn, namebuf, sizeof (namebuf));
2847         if (error == 0) {
2848                 /*
2849                  * Call the URL parser for URL paths to modify the original
2850                  * string to handle any '%' encoded characters that exist.
2851                  * Done here to avoid an extra bcopy in the lookup.
2852                  * We need to be careful about pathlen's. We know that
2853                  * rfs_pathname() is called with a non-empty path. However,
2854                  * it could be emptied due to the path simply being all /'s,
2855                  * which is valid to proceed with the lookup, or due to the
2856                  * URL parser finding an encoded null character at the
2857                  * beginning of path which should not proceed with the lookup.
2858                  */
2859                 if (pn.pn_pathlen != 0 && pathflag == URLPATH) {
2860                         URLparse(pn.pn_path);
2861                         if ((pn.pn_pathlen = strlen(pn.pn_path)) == 0)
2862                                 return (ENOENT);
2863                 }
2864                 VN_HOLD(startdvp);
2865                 error = lookuppnvp(&pn, NULL, NO_FOLLOW, dirvpp, compvpp,
2866                     rootdir, startdvp, cr);
2867         }
2868         if (error == ENAMETOOLONG) {
2869                 /*
2870                  * This thread used a pathname > TYPICALMAXPATHLEN bytes long.
2871                  */
2872                 if (error = pn_get(path, UIO_SYSSPACE, &pn))
2873                         return (error);
2874                 if (pn.pn_pathlen != 0 && pathflag == URLPATH) {
2875                         URLparse(pn.pn_path);
2876                         if ((pn.pn_pathlen = strlen(pn.pn_path)) == 0) {
2877                                 pn_free(&pn);
2878                                 return (ENOENT);
2879                         }
2880                 }
2881                 VN_HOLD(startdvp);
2882                 error = lookuppnvp(&pn, NULL, NO_FOLLOW, dirvpp, compvpp,
2883                     rootdir, startdvp, cr);
2884                 pn_free(&pn);
2885         }
2886 
2887         return (error);
2888 }
2889 
2890 /*
2891  * Adapt the multicomponent lookup path depending on the pathtype
2892  */
2893 static int
2894 MCLpath(char **path)
2895 {
2896         unsigned char c = (unsigned char)**path;
2897 
2898         /*
2899          * If the MCL path is between 0x20 and 0x7E (graphic printable
2900          * character of the US-ASCII coded character set), its a URL path,
2901          * per RFC 1738.
2902          */
2903         if (c >= 0x20 && c <= 0x7E)
2904                 return (URLPATH);
2905 
2906         /*
2907          * If the first octet of the MCL path is not an ASCII character
2908          * then it must be interpreted as a tag value that describes the
2909          * format of the remaining octets of the MCL path.
2910          *
2911          * If the first octet of the MCL path is 0x81 it is a query
2912          * for the security info.
2913          */
2914         switch (c) {
2915         case 0x80:      /* native path, i.e. MCL via mount protocol */
2916                 (*path)++;
2917                 return (NATIVEPATH);
2918         case 0x81:      /* security query */
2919                 (*path)++;
2920                 return (SECURITY_QUERY);
2921         default:
2922                 return (-1);
2923         }
2924 }
2925 
2926 #define fromhex(c)  ((c >= '0' && c <= '9') ? (c - '0') : \
2927                         ((c >= 'A' && c <= 'F') ? (c - 'A' + 10) :\
2928                         ((c >= 'a' && c <= 'f') ? (c - 'a' + 10) : 0)))
2929 
2930 /*
2931  * The implementation of URLparse guarantees that the final string will
2932  * fit in the original one. Replaces '%' occurrences followed by 2 characters
2933  * with its corresponding hexadecimal character.
2934  */
2935 static void
2936 URLparse(char *str)
2937 {
2938         char *p, *q;
2939 
2940         p = q = str;
2941         while (*p) {
2942                 *q = *p;
2943                 if (*p++ == '%') {
2944                         if (*p) {
2945                                 *q = fromhex(*p) * 16;
2946                                 p++;
2947                                 if (*p) {
2948                                         *q += fromhex(*p);
2949                                         p++;
2950                                 }
2951                         }
2952                 }
2953                 q++;
2954         }
2955         *q = '\0';
2956 }
2957 
2958 
2959 /*
2960  * Get the export information for the lookup vnode, and verify its
2961  * useable.
2962  */
2963 int
2964 nfs_check_vpexi(vnode_t *mc_dvp, vnode_t *vp, cred_t *cr,
2965     struct exportinfo **exi)
2966 {
2967         int walk;
2968         int error = 0;
2969 
2970         *exi = nfs_vptoexi(mc_dvp, vp, cr, &walk, NULL, FALSE);
2971         if (*exi == NULL)
2972                 error = EACCES;
2973         else {
2974                 /*
2975                  * If nosub is set for this export then
2976                  * a lookup relative to the public fh
2977                  * must not terminate below the
2978                  * exported directory.
2979                  */
2980                 if ((*exi)->exi_export.ex_flags & EX_NOSUB && walk > 0)
2981                         error = EACCES;
2982         }
2983 
2984         return (error);
2985 }
2986 
2987 /*
2988  * Do the main work of handling HA-NFSv4 Resource Group failover on
2989  * Sun Cluster.
2990  * We need to detect whether any RG admin paths have been added or removed,
2991  * and adjust resources accordingly.
2992  * Currently we're using a very inefficient algorithm, ~ 2 * O(n**2). In
2993  * order to scale, the list and array of paths need to be held in more
2994  * suitable data structures.
2995  */
2996 static void
2997 hanfsv4_failover(void)
2998 {
2999         int i, start_grace, numadded_paths = 0;
3000         char **added_paths = NULL;
3001         rfs4_dss_path_t *dss_path;
3002 
3003         /*
3004          * Note: currently, rfs4_dss_pathlist cannot be NULL, since
3005          * it will always include an entry for NFS4_DSS_VAR_DIR. If we
3006          * make the latter dynamically specified too, the following will
3007          * need to be adjusted.
3008          */
3009 
3010         /*
3011          * First, look for removed paths: RGs that have been failed-over
3012          * away from this node.
3013          * Walk the "currently-serving" rfs4_dss_pathlist and, for each
3014          * path, check if it is on the "passed-in" rfs4_dss_newpaths array
3015          * from nfsd. If not, that RG path has been removed.
3016          *
3017          * Note that nfsd has sorted rfs4_dss_newpaths for us, and removed
3018          * any duplicates.
3019          */
3020         dss_path = rfs4_dss_pathlist;
3021         do {
3022                 int found = 0;
3023                 char *path = dss_path->path;
3024 
3025                 /* used only for non-HA so may not be removed */
3026                 if (strcmp(path, NFS4_DSS_VAR_DIR) == 0) {
3027                         dss_path = dss_path->next;
3028                         continue;
3029                 }
3030 
3031                 for (i = 0; i < rfs4_dss_numnewpaths; i++) {
3032                         int cmpret;
3033                         char *newpath = rfs4_dss_newpaths[i];
3034 
3035                         /*
3036                          * Since nfsd has sorted rfs4_dss_newpaths for us,
3037                          * once the return from strcmp is negative we know
3038                          * we've passed the point where "path" should be,
3039                          * and can stop searching: "path" has been removed.
3040                          */
3041                         cmpret = strcmp(path, newpath);
3042                         if (cmpret < 0)
3043                                 break;
3044                         if (cmpret == 0) {
3045                                 found = 1;
3046                                 break;
3047                         }
3048                 }
3049 
3050                 if (found == 0) {
3051                         unsigned index = dss_path->index;
3052                         rfs4_servinst_t *sip = dss_path->sip;
3053                         rfs4_dss_path_t *path_next = dss_path->next;
3054 
3055                         /*
3056                          * This path has been removed.
3057                          * We must clear out the servinst reference to
3058                          * it, since it's now owned by another
3059                          * node: we should not attempt to touch it.
3060                          */
3061                         ASSERT(dss_path == sip->dss_paths[index]);
3062                         sip->dss_paths[index] = NULL;
3063 
3064                         /* remove from "currently-serving" list, and destroy */
3065                         remque(dss_path);
3066                         /* allow for NUL */
3067                         kmem_free(dss_path->path, strlen(dss_path->path) + 1);
3068                         kmem_free(dss_path, sizeof (rfs4_dss_path_t));
3069 
3070                         dss_path = path_next;
3071                 } else {
3072                         /* path was found; not removed */
3073                         dss_path = dss_path->next;
3074                 }
3075         } while (dss_path != rfs4_dss_pathlist);
3076 
3077         /*
3078          * Now, look for added paths: RGs that have been failed-over
3079          * to this node.
3080          * Walk the "passed-in" rfs4_dss_newpaths array from nfsd and,
3081          * for each path, check if it is on the "currently-serving"
3082          * rfs4_dss_pathlist. If not, that RG path has been added.
3083          *
3084          * Note: we don't do duplicate detection here; nfsd does that for us.
3085          *
3086          * Note: numadded_paths <= rfs4_dss_numnewpaths, which gives us
3087          * an upper bound for the size needed for added_paths[numadded_paths].
3088          */
3089 
3090         /* probably more space than we need, but guaranteed to be enough */
3091         if (rfs4_dss_numnewpaths > 0) {
3092                 size_t sz = rfs4_dss_numnewpaths * sizeof (char *);
3093                 added_paths = kmem_zalloc(sz, KM_SLEEP);
3094         }
3095 
3096         /* walk the "passed-in" rfs4_dss_newpaths array from nfsd */
3097         for (i = 0; i < rfs4_dss_numnewpaths; i++) {
3098                 int found = 0;
3099                 char *newpath = rfs4_dss_newpaths[i];
3100 
3101                 dss_path = rfs4_dss_pathlist;
3102                 do {
3103                         char *path = dss_path->path;
3104 
3105                         /* used only for non-HA */
3106                         if (strcmp(path, NFS4_DSS_VAR_DIR) == 0) {
3107                                 dss_path = dss_path->next;
3108                                 continue;
3109                         }
3110 
3111                         if (strncmp(path, newpath, strlen(path)) == 0) {
3112                                 found = 1;
3113                                 break;
3114                         }
3115 
3116                         dss_path = dss_path->next;
3117                 } while (dss_path != rfs4_dss_pathlist);
3118 
3119                 if (found == 0) {
3120                         added_paths[numadded_paths] = newpath;
3121                         numadded_paths++;
3122                 }
3123         }
3124 
3125         /* did we find any added paths? */
3126         if (numadded_paths > 0) {
3127                 /* create a new server instance, and start its grace period */
3128                 start_grace = 1;
3129                 rfs4_servinst_create(start_grace, numadded_paths, added_paths);
3130 
3131                 /* read in the stable storage state from these paths */
3132                 rfs4_dss_readstate(numadded_paths, added_paths);
3133 
3134                 /*
3135                  * Multiple failovers during a grace period will cause
3136                  * clients of the same resource group to be partitioned
3137                  * into different server instances, with different
3138                  * grace periods.  Since clients of the same resource
3139                  * group must be subject to the same grace period,
3140                  * we need to reset all currently active grace periods.
3141                  */
3142                 rfs4_grace_reset_all();
3143         }
3144 
3145         if (rfs4_dss_numnewpaths > 0)
3146                 kmem_free(added_paths, rfs4_dss_numnewpaths * sizeof (char *));
3147 }
3148 
3149 /*
3150  * Used by NFSv3 and NFSv4 server to query label of
3151  * a pathname component during lookup/access ops.
3152  */
3153 ts_label_t *
3154 nfs_getflabel(vnode_t *vp, struct exportinfo *exi)
3155 {
3156         zone_t *zone;
3157         ts_label_t *zone_label;
3158         char *path;
3159 
3160         mutex_enter(&vp->v_lock);
3161         if (vp->v_path != NULL) {
3162                 zone = zone_find_by_any_path(vp->v_path, B_FALSE);
3163                 mutex_exit(&vp->v_lock);
3164         } else {
3165                 /*
3166                  * v_path not cached. Fall back on pathname of exported
3167                  * file system as we rely on pathname from which we can
3168                  * derive a label. The exported file system portion of
3169                  * path is sufficient to obtain a label.
3170                  */
3171                 path = exi->exi_export.ex_path;
3172                 if (path == NULL) {
3173                         mutex_exit(&vp->v_lock);
3174                         return (NULL);
3175                 }
3176                 zone = zone_find_by_any_path(path, B_FALSE);
3177                 mutex_exit(&vp->v_lock);
3178         }
3179         /*
3180          * Caller has verified that the file is either
3181          * exported or visible. So if the path falls in
3182          * global zone, admin_low is returned; otherwise
3183          * the zone's label is returned.
3184          */
3185         zone_label = zone->zone_slabel;
3186         label_hold(zone_label);
3187         zone_rele(zone);
3188         return (zone_label);
3189 }
3190 
3191 /*
3192  * TX NFS routine used by NFSv3 and NFSv4 to do label check
3193  * on client label and server's file object lable.
3194  */
3195 boolean_t
3196 do_rfs_label_check(bslabel_t *clabel, vnode_t *vp, int flag,
3197     struct exportinfo *exi)
3198 {
3199         bslabel_t *slabel;
3200         ts_label_t *tslabel;
3201         boolean_t result;
3202 
3203         if ((tslabel = nfs_getflabel(vp, exi)) == NULL) {
3204                 return (B_FALSE);
3205         }
3206         slabel = label2bslabel(tslabel);
3207         DTRACE_PROBE4(tx__rfs__log__info__labelcheck, char *,
3208             "comparing server's file label(1) with client label(2) (vp(3))",
3209             bslabel_t *, slabel, bslabel_t *, clabel, vnode_t *, vp);
3210 
3211         if (flag == EQUALITY_CHECK)
3212                 result = blequal(clabel, slabel);
3213         else
3214                 result = bldominates(clabel, slabel);
3215         label_rele(tslabel);
3216         return (result);
3217 }
3218 
3219 /*
3220  * Callback function to return the loaned buffers.
3221  * Calls VOP_RETZCBUF() only after all uio_iov[]
3222  * buffers are returned. nu_ref maintains the count.
3223  */
3224 void
3225 rfs_free_xuio(void *free_arg)
3226 {
3227         uint_t ref;
3228         nfs_xuio_t *nfsuiop = (nfs_xuio_t *)free_arg;
3229 
3230         ref = atomic_dec_uint_nv(&nfsuiop->nu_ref);
3231 
3232         /*
3233          * Call VOP_RETZCBUF() only when all the iov buffers
3234          * are sent OTW.
3235          */
3236         if (ref != 0)
3237                 return;
3238 
3239         if (((uio_t *)nfsuiop)->uio_extflg & UIO_XUIO) {
3240                 (void) VOP_RETZCBUF(nfsuiop->nu_vp, (xuio_t *)free_arg, NULL,
3241                     NULL);
3242                 VN_RELE(nfsuiop->nu_vp);
3243         }
3244 
3245         kmem_cache_free(nfs_xuio_cache, free_arg);
3246 }
3247 
3248 xuio_t *
3249 rfs_setup_xuio(vnode_t *vp)
3250 {
3251         nfs_xuio_t *nfsuiop;
3252 
3253         nfsuiop = kmem_cache_alloc(nfs_xuio_cache, KM_SLEEP);
3254 
3255         bzero(nfsuiop, sizeof (nfs_xuio_t));
3256         nfsuiop->nu_vp = vp;
3257 
3258         /*
3259          * ref count set to 1. more may be added
3260          * if multiple mblks refer to multiple iov's.
3261          * This is done in uio_to_mblk().
3262          */
3263 
3264         nfsuiop->nu_ref = 1;
3265 
3266         nfsuiop->nu_frtn.free_func = rfs_free_xuio;
3267         nfsuiop->nu_frtn.free_arg = (char *)nfsuiop;
3268 
3269         nfsuiop->nu_uio.xu_type = UIOTYPE_ZEROCOPY;
3270 
3271         return (&nfsuiop->nu_uio);
3272 }
3273 
3274 mblk_t *
3275 uio_to_mblk(uio_t *uiop)
3276 {
3277         struct iovec *iovp;
3278         int i;
3279         mblk_t *mp, *mp1;
3280         nfs_xuio_t *nfsuiop = (nfs_xuio_t *)uiop;
3281 
3282         if (uiop->uio_iovcnt == 0)
3283                 return (NULL);
3284 
3285         iovp = uiop->uio_iov;
3286         mp = mp1 = esballoca((uchar_t *)iovp->iov_base, iovp->iov_len,
3287             BPRI_MED, &nfsuiop->nu_frtn);
3288         ASSERT(mp != NULL);
3289 
3290         mp->b_wptr += iovp->iov_len;
3291         mp->b_datap->db_type = M_DATA;
3292 
3293         for (i = 1; i < uiop->uio_iovcnt; i++) {
3294                 iovp = (uiop->uio_iov + i);
3295 
3296                 mp1->b_cont = esballoca(
3297                     (uchar_t *)iovp->iov_base, iovp->iov_len, BPRI_MED,
3298                     &nfsuiop->nu_frtn);
3299 
3300                 mp1 = mp1->b_cont;
3301                 ASSERT(mp1 != NULL);
3302                 mp1->b_wptr += iovp->iov_len;
3303                 mp1->b_datap->db_type = M_DATA;
3304         }
3305 
3306         nfsuiop->nu_ref = uiop->uio_iovcnt;
3307 
3308         return (mp);
3309 }
3310 
3311 void
3312 rfs_rndup_mblks(mblk_t *mp, uint_t len, int buf_loaned)
3313 {
3314         int i, rndup;
3315         int alloc_err = 0;
3316         mblk_t *rmp;
3317 
3318         rndup = BYTES_PER_XDR_UNIT - (len % BYTES_PER_XDR_UNIT);
3319 
3320         /* single mblk_t non copy-reduction case */
3321         if (!buf_loaned) {
3322                 mp->b_wptr += len;
3323                 if (rndup != BYTES_PER_XDR_UNIT) {
3324                         for (i = 0; i < rndup; i++)
3325                                 *mp->b_wptr++ = '\0';
3326                 }
3327                 return;
3328         }
3329 
3330         /* no need for extra rndup */
3331         if (rndup == BYTES_PER_XDR_UNIT)
3332                 return;
3333 
3334         while (mp->b_cont)
3335                 mp = mp->b_cont;
3336 
3337         /*
3338          * In case of copy-reduction mblks, the size of the mblks
3339          * are fixed and are of the size of the loaned buffers.
3340          * Allocate a roundup mblk and chain it to the data
3341          * buffers. This is sub-optimal, but not expected to
3342          * happen in regular common workloads.
3343          */
3344 
3345         rmp = allocb_wait(rndup, BPRI_MED, STR_NOSIG, &alloc_err);
3346         ASSERT(rmp != NULL);
3347         ASSERT(alloc_err == 0);
3348 
3349         for (i = 0; i < rndup; i++)
3350                 *rmp->b_wptr++ = '\0';
3351 
3352         rmp->b_datap->db_type = M_DATA;
3353         mp->b_cont = rmp;
3354 }