1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 1986, 2010, Oracle and/or its affiliates. All rights reserved.
  23  */
  24 
  25 /*
  26  *      Copyright (c) 1983,1984,1985,1986,1987,1988,1989  AT&T.
  27  *      All rights reserved.
  28  */
  29 
  30 #include <sys/param.h>
  31 #include <sys/types.h>
  32 #include <sys/systm.h>
  33 #include <sys/cred.h>
  34 #include <sys/vfs.h>
  35 #include <sys/vfs_opreg.h>
  36 #include <sys/vnode.h>
  37 #include <sys/pathname.h>
  38 #include <sys/sysmacros.h>
  39 #include <sys/kmem.h>
  40 #include <sys/mkdev.h>
  41 #include <sys/mount.h>
  42 #include <sys/mntent.h>
  43 #include <sys/statvfs.h>
  44 #include <sys/errno.h>
  45 #include <sys/debug.h>
  46 #include <sys/cmn_err.h>
  47 #include <sys/utsname.h>
  48 #include <sys/bootconf.h>
  49 #include <sys/modctl.h>
  50 #include <sys/acl.h>
  51 #include <sys/flock.h>
  52 #include <sys/policy.h>
  53 #include <sys/zone.h>
  54 #include <sys/class.h>
  55 #include <sys/socket.h>
  56 #include <sys/netconfig.h>
  57 #include <sys/tsol/tnet.h>
  58 
  59 #include <rpc/types.h>
  60 #include <rpc/auth.h>
  61 #include <rpc/clnt.h>
  62 
  63 #include <nfs/nfs.h>
  64 #include <nfs/nfs_clnt.h>
  65 #include <nfs/rnode.h>
  66 #include <nfs/mount.h>
  67 #include <nfs/nfs_acl.h>
  68 
  69 #include <fs/fs_subr.h>
  70 
  71 /*
  72  * From rpcsec module (common/rpcsec).
  73  */
  74 extern int sec_clnt_loadinfo(struct sec_data *, struct sec_data **, model_t);
  75 extern void sec_clnt_freeinfo(struct sec_data *);
  76 
  77 /*
  78  * The order and contents of this structure must be kept in sync with that of
  79  * rfsreqcnt_v3_tmpl in nfs_stats.c
  80  */
  81 static char *rfsnames_v3[] = {
  82         "null", "getattr", "setattr", "lookup", "access", "readlink", "read",
  83         "write", "create", "mkdir", "symlink", "mknod", "remove", "rmdir",
  84         "rename", "link", "readdir", "readdirplus", "fsstat", "fsinfo",
  85         "pathconf", "commit"
  86 };
  87 
  88 /*
  89  * This table maps from NFS protocol number into call type.
  90  * Zero means a "Lookup" type call
  91  * One  means a "Read" type call
  92  * Two  means a "Write" type call
  93  * This is used to select a default time-out.
  94  */
  95 static uchar_t call_type_v3[] = {
  96         0, 0, 1, 0, 0, 0, 1,
  97         2, 2, 2, 2, 2, 2, 2,
  98         2, 2, 1, 2, 0, 0, 0,
  99         2 };
 100 
 101 /*
 102  * Similar table, but to determine which timer to use
 103  * (only real reads and writes!)
 104  */
 105 static uchar_t timer_type_v3[] = {
 106         0, 0, 0, 0, 0, 0, 1,
 107         2, 0, 0, 0, 0, 0, 0,
 108         0, 0, 1, 1, 0, 0, 0,
 109         0 };
 110 
 111 /*
 112  * This table maps from NFS protocol number into a call type
 113  * for the semisoft mount option.
 114  * Zero means do not repeat operation.
 115  * One  means repeat.
 116  */
 117 static uchar_t ss_call_type_v3[] = {
 118         0, 0, 1, 0, 0, 0, 0,
 119         1, 1, 1, 1, 1, 1, 1,
 120         1, 1, 0, 0, 0, 0, 0,
 121         1 };
 122 
 123 /*
 124  * nfs3 vfs operations.
 125  */
 126 static int      nfs3_mount(vfs_t *, vnode_t *, struct mounta *, cred_t *);
 127 static int      nfs3_unmount(vfs_t *, int, cred_t *);
 128 static int      nfs3_root(vfs_t *, vnode_t **);
 129 static int      nfs3_statvfs(vfs_t *, struct statvfs64 *);
 130 static int      nfs3_sync(vfs_t *, short, cred_t *);
 131 static int      nfs3_vget(vfs_t *, vnode_t **, fid_t *);
 132 static int      nfs3_mountroot(vfs_t *, whymountroot_t);
 133 static void     nfs3_freevfs(vfs_t *);
 134 
 135 static int      nfs3rootvp(vnode_t **, vfs_t *, struct servinfo *,
 136                     int, cred_t *, zone_t *);
 137 
 138 /*
 139  * Initialize the vfs structure
 140  */
 141 
 142 static int nfs3fstyp;
 143 vfsops_t *nfs3_vfsops;
 144 
 145 /*
 146  * Debug variable to check for rdma based
 147  * transport startup and cleanup. Controlled
 148  * through /etc/system. Off by default.
 149  */
 150 extern int rdma_debug;
 151 
 152 int
 153 nfs3init(int fstyp, char *name)
 154 {
 155         static const fs_operation_def_t nfs3_vfsops_template[] = {
 156                 VFSNAME_MOUNT,          { .vfs_mount = nfs3_mount },
 157                 VFSNAME_UNMOUNT,        { .vfs_unmount = nfs3_unmount },
 158                 VFSNAME_ROOT,           { .vfs_root = nfs3_root },
 159                 VFSNAME_STATVFS,        { .vfs_statvfs = nfs3_statvfs },
 160                 VFSNAME_SYNC,           { .vfs_sync = nfs3_sync },
 161                 VFSNAME_VGET,           { .vfs_vget = nfs3_vget },
 162                 VFSNAME_MOUNTROOT,      { .vfs_mountroot = nfs3_mountroot },
 163                 VFSNAME_FREEVFS,        { .vfs_freevfs = nfs3_freevfs },
 164                 NULL,                   NULL
 165         };
 166         int error;
 167 
 168         error = vfs_setfsops(fstyp, nfs3_vfsops_template, &nfs3_vfsops);
 169         if (error != 0) {
 170                 zcmn_err(GLOBAL_ZONEID, CE_WARN,
 171                     "nfs3init: bad vfs ops template");
 172                 return (error);
 173         }
 174 
 175         error = vn_make_ops(name, nfs3_vnodeops_template, &nfs3_vnodeops);
 176         if (error != 0) {
 177                 (void) vfs_freevfsops_by_type(fstyp);
 178                 zcmn_err(GLOBAL_ZONEID, CE_WARN,
 179                     "nfs3init: bad vnode ops template");
 180                 return (error);
 181         }
 182 
 183         nfs3fstyp = fstyp;
 184 
 185         return (0);
 186 }
 187 
 188 void
 189 nfs3fini(void)
 190 {
 191 }
 192 
 193 static void
 194 nfs3_free_args(struct nfs_args *nargs, nfs_fhandle *fh)
 195 {
 196 
 197         if (fh)
 198                 kmem_free(fh, sizeof (*fh));
 199 
 200         if (nargs->knconf) {
 201                 if (nargs->knconf->knc_protofmly)
 202                         kmem_free(nargs->knconf->knc_protofmly, KNC_STRSIZE);
 203                 if (nargs->knconf->knc_proto)
 204                         kmem_free(nargs->knconf->knc_proto, KNC_STRSIZE);
 205                 kmem_free(nargs->knconf, sizeof (*nargs->knconf));
 206                 nargs->knconf = NULL;
 207         }
 208 
 209         if (nargs->fh) {
 210                 kmem_free(nargs->fh, strlen(nargs->fh) + 1);
 211                 nargs->fh = NULL;
 212         }
 213 
 214         if (nargs->hostname) {
 215                 kmem_free(nargs->hostname, strlen(nargs->hostname) + 1);
 216                 nargs->hostname = NULL;
 217         }
 218 
 219         if (nargs->addr) {
 220                 if (nargs->addr->buf) {
 221                         ASSERT(nargs->addr->len);
 222                         kmem_free(nargs->addr->buf, nargs->addr->len);
 223                 }
 224                 kmem_free(nargs->addr, sizeof (struct netbuf));
 225                 nargs->addr = NULL;
 226         }
 227 
 228         if (nargs->syncaddr) {
 229                 ASSERT(nargs->syncaddr->len);
 230                 if (nargs->syncaddr->buf) {
 231                         ASSERT(nargs->syncaddr->len);
 232                         kmem_free(nargs->syncaddr->buf, nargs->syncaddr->len);
 233                 }
 234                 kmem_free(nargs->syncaddr, sizeof (struct netbuf));
 235                 nargs->syncaddr = NULL;
 236         }
 237 
 238         if (nargs->netname) {
 239                 kmem_free(nargs->netname, strlen(nargs->netname) + 1);
 240                 nargs->netname = NULL;
 241         }
 242 
 243         if (nargs->nfs_ext_u.nfs_extA.secdata) {
 244                 sec_clnt_freeinfo(nargs->nfs_ext_u.nfs_extA.secdata);
 245                 nargs->nfs_ext_u.nfs_extA.secdata = NULL;
 246         }
 247 }
 248 
 249 static int
 250 nfs3_copyin(char *data, int datalen, struct nfs_args *nargs, nfs_fhandle *fh)
 251 {
 252 
 253         int error;
 254         size_t nlen;                    /* length of netname */
 255         size_t hlen;                    /* length of hostname */
 256         char netname[MAXNETNAMELEN+1];  /* server's netname */
 257         struct netbuf addr;             /* server's address */
 258         struct netbuf syncaddr;         /* AUTH_DES time sync addr */
 259         struct knetconfig *knconf;      /* transport knetconfig structure */
 260         struct sec_data *secdata = NULL;        /* security data */
 261         STRUCT_DECL(nfs_args, args);            /* nfs mount arguments */
 262         STRUCT_DECL(knetconfig, knconf_tmp);
 263         STRUCT_DECL(netbuf, addr_tmp);
 264         int flags;
 265         char *p, *pf;
 266         char *userbufptr;
 267 
 268 
 269         bzero(nargs, sizeof (*nargs));
 270 
 271         STRUCT_INIT(args, get_udatamodel());
 272         bzero(STRUCT_BUF(args), SIZEOF_STRUCT(nfs_args, DATAMODEL_NATIVE));
 273         if (copyin(data, STRUCT_BUF(args), MIN(datalen, STRUCT_SIZE(args))))
 274                 return (EFAULT);
 275 
 276         nargs->wsize = STRUCT_FGET(args, wsize);
 277         nargs->rsize = STRUCT_FGET(args, rsize);
 278         nargs->timeo = STRUCT_FGET(args, timeo);
 279         nargs->retrans = STRUCT_FGET(args, retrans);
 280         nargs->acregmin = STRUCT_FGET(args, acregmin);
 281         nargs->acregmax = STRUCT_FGET(args, acregmax);
 282         nargs->acdirmin = STRUCT_FGET(args, acdirmin);
 283         nargs->acdirmax = STRUCT_FGET(args, acdirmax);
 284 
 285         flags = STRUCT_FGET(args, flags);
 286         nargs->flags = flags;
 287 
 288         addr.buf = NULL;
 289         syncaddr.buf = NULL;
 290 
 291         /*
 292          * Allocate space for a knetconfig structure and
 293          * its strings and copy in from user-land.
 294          */
 295         knconf = kmem_zalloc(sizeof (*knconf), KM_SLEEP);
 296         STRUCT_INIT(knconf_tmp, get_udatamodel());
 297         if (copyin(STRUCT_FGETP(args, knconf), STRUCT_BUF(knconf_tmp),
 298             STRUCT_SIZE(knconf_tmp))) {
 299                 kmem_free(knconf, sizeof (*knconf));
 300                 return (EFAULT);
 301         }
 302 
 303         knconf->knc_semantics = STRUCT_FGET(knconf_tmp, knc_semantics);
 304         knconf->knc_protofmly = STRUCT_FGETP(knconf_tmp, knc_protofmly);
 305         knconf->knc_proto = STRUCT_FGETP(knconf_tmp, knc_proto);
 306         if (get_udatamodel() != DATAMODEL_LP64) {
 307                 knconf->knc_rdev = expldev(STRUCT_FGET(knconf_tmp, knc_rdev));
 308         } else {
 309                 knconf->knc_rdev = STRUCT_FGET(knconf_tmp, knc_rdev);
 310         }
 311 
 312         pf = kmem_alloc(KNC_STRSIZE, KM_SLEEP);
 313         p = kmem_alloc(KNC_STRSIZE, KM_SLEEP);
 314         error = copyinstr(knconf->knc_protofmly, pf, KNC_STRSIZE, NULL);
 315         if (error) {
 316                 kmem_free(pf, KNC_STRSIZE);
 317                 kmem_free(p, KNC_STRSIZE);
 318                 kmem_free(knconf, sizeof (*knconf));
 319                 return (error);
 320         }
 321 
 322         error = copyinstr(knconf->knc_proto, p, KNC_STRSIZE, NULL);
 323         if (error) {
 324                 kmem_free(pf, KNC_STRSIZE);
 325                 kmem_free(p, KNC_STRSIZE);
 326                 kmem_free(knconf, sizeof (*knconf));
 327                 return (error);
 328         }
 329 
 330 
 331         knconf->knc_protofmly = pf;
 332         knconf->knc_proto = p;
 333 
 334         nargs->knconf = knconf;
 335         /*
 336          * Get server address
 337          */
 338         STRUCT_INIT(addr_tmp, get_udatamodel());
 339         if (copyin(STRUCT_FGETP(args, addr), STRUCT_BUF(addr_tmp),
 340             STRUCT_SIZE(addr_tmp))) {
 341                 error = EFAULT;
 342                 goto errout;
 343         }
 344 
 345         nargs->addr = kmem_alloc(sizeof (struct netbuf), KM_SLEEP);
 346         userbufptr = STRUCT_FGETP(addr_tmp, buf);
 347         addr.len = STRUCT_FGET(addr_tmp, len);
 348         addr.buf = kmem_alloc(addr.len, KM_SLEEP);
 349         addr.maxlen = addr.len;
 350         if (copyin(userbufptr, addr.buf, addr.len)) {
 351                 kmem_free(addr.buf, addr.len);
 352                 error = EFAULT;
 353                 goto errout;
 354         }
 355         bcopy(&addr, nargs->addr, sizeof (struct netbuf));
 356 
 357         /*
 358          * Get the root fhandle
 359          */
 360 
 361         if (copyin(STRUCT_FGETP(args, fh), fh, sizeof (nfs_fhandle))) {
 362                 error = EFAULT;
 363                 goto errout;
 364         }
 365 
 366 
 367         /*
 368          * Get server's hostname
 369          */
 370         if (flags & NFSMNT_HOSTNAME) {
 371                 error = copyinstr(STRUCT_FGETP(args, hostname), netname,
 372                     sizeof (netname), &hlen);
 373         if (error)
 374                 goto errout;
 375         nargs->hostname = kmem_zalloc(hlen, KM_SLEEP);
 376         (void) strcpy(nargs->hostname, netname);
 377         } else {
 378         nargs->hostname = NULL;
 379         }
 380 
 381 
 382         /*
 383          * If there are syncaddr and netname data, load them in. This is
 384          * to support data needed for NFSV4 when AUTH_DH is the negotiated
 385          * flavor via SECINFO. (instead of using MOUNT protocol in V3).
 386          */
 387         netname[0] = '\0';
 388         if (flags & NFSMNT_SECURE) {
 389                 if (STRUCT_FGETP(args, syncaddr) == NULL) {
 390                         error = EINVAL;
 391                         goto errout;
 392                 }
 393                 /* get syncaddr */
 394                 STRUCT_INIT(addr_tmp, get_udatamodel());
 395                 if (copyin(STRUCT_FGETP(args, syncaddr), STRUCT_BUF(addr_tmp),
 396                     STRUCT_SIZE(addr_tmp))) {
 397                         error = EINVAL;
 398                         goto errout;
 399                 }
 400                 userbufptr = STRUCT_FGETP(addr_tmp, buf);
 401                 syncaddr.len = STRUCT_FGET(addr_tmp, len);
 402                 syncaddr.buf = kmem_alloc(syncaddr.len, KM_SLEEP);
 403                 syncaddr.maxlen = syncaddr.len;
 404                 if (copyin(userbufptr, syncaddr.buf, syncaddr.len)) {
 405                         kmem_free(syncaddr.buf, syncaddr.len);
 406                         error = EFAULT;
 407                         goto errout;
 408                 }
 409 
 410                 nargs->syncaddr = kmem_alloc(sizeof (struct netbuf), KM_SLEEP);
 411                 bcopy(&syncaddr, nargs->syncaddr, sizeof (struct netbuf));
 412 
 413                 ASSERT(STRUCT_FGETP(args, netname));
 414 
 415                 if (copyinstr(STRUCT_FGETP(args, netname), netname,
 416                     sizeof (netname), &nlen)) {
 417                         error = EFAULT;
 418                         goto errout;
 419                 }
 420 
 421                 netname[nlen] = '\0';
 422                 nargs->netname = kmem_zalloc(nlen, KM_SLEEP);
 423                 (void) strcpy(nargs->netname, netname);
 424         }
 425 
 426         /*
 427          * Get the extention data which has the security data structure.
 428          * This includes data for AUTH_SYS as well.
 429          */
 430         if (flags & NFSMNT_NEWARGS) {
 431                 nargs->nfs_args_ext = STRUCT_FGET(args, nfs_args_ext);
 432                 if (nargs->nfs_args_ext == NFS_ARGS_EXTA ||
 433                     nargs->nfs_args_ext == NFS_ARGS_EXTB) {
 434                         /*
 435                          * Indicating the application is using the new
 436                          * sec_data structure to pass in the security
 437                          * data.
 438                          */
 439                         if (STRUCT_FGETP(args,
 440                             nfs_ext_u.nfs_extA.secdata) != NULL) {
 441                                 error = sec_clnt_loadinfo(
 442                                     (struct sec_data *)STRUCT_FGETP(args,
 443                                     nfs_ext_u.nfs_extA.secdata), &secdata,
 444                                     get_udatamodel());
 445                         }
 446                         nargs->nfs_ext_u.nfs_extA.secdata = secdata;
 447                 }
 448         }
 449 
 450         if (error)
 451                 goto errout;
 452 
 453         /*
 454          * Failover support:
 455          *
 456          * We may have a linked list of nfs_args structures,
 457          * which means the user is looking for failover.  If
 458          * the mount is either not "read-only" or "soft",
 459          * we want to bail out with EINVAL.
 460          */
 461         if (nargs->nfs_args_ext == NFS_ARGS_EXTB)
 462                 nargs->nfs_ext_u.nfs_extB.next =
 463                     STRUCT_FGETP(args, nfs_ext_u.nfs_extB.next);
 464 
 465 errout:
 466         if (error)
 467                 nfs3_free_args(nargs, fh);
 468 
 469         return (error);
 470 }
 471 
 472 
 473 /*
 474  * nfs mount vfsop
 475  * Set up mount info record and attach it to vfs struct.
 476  */
 477 static int
 478 nfs3_mount(vfs_t *vfsp, vnode_t *mvp, struct mounta *uap, cred_t *cr)
 479 {
 480         struct nfs_args *args = NULL;
 481         nfs_fhandle     *fhandle = NULL;
 482         char *data = uap->dataptr;
 483         int error;
 484         vnode_t *rtvp;                  /* the server's root */
 485         mntinfo_t *mi;                  /* mount info, pointed at by vfs */
 486         size_t nlen;                    /* length of netname */
 487         struct knetconfig *knconf;      /* transport knetconfig structure */
 488         struct knetconfig *rdma_knconf; /* rdma transport structure */
 489         rnode_t *rp;
 490         struct servinfo *svp;           /* nfs server info */
 491         struct servinfo *svp_tail = NULL; /* previous nfs server info */
 492         struct servinfo *svp_head;      /* first nfs server info */
 493         struct servinfo *svp_2ndlast;   /* 2nd last in server info list */
 494         struct sec_data *secdata;       /* security data */
 495         int flags, addr_type;
 496         zone_t *zone = nfs_zone();
 497         zone_t *mntzone = NULL;
 498 
 499 
 500         if ((error = secpolicy_fs_mount(cr, mvp, vfsp)) != 0)
 501                 return (EPERM);
 502 
 503         if (mvp->v_type != VDIR)
 504                 return (ENOTDIR);
 505 
 506         /*
 507          * get arguments
 508          *
 509          * nfs_args is now versioned and is extensible, so
 510          * uap->datalen might be different from sizeof (args)
 511          * in a compatible situation.
 512          */
 513 
 514 more:
 515 
 516         if (!(uap->flags & MS_SYSSPACE)) {
 517                 if (args == NULL)
 518                         args = kmem_alloc(sizeof (struct nfs_args), KM_SLEEP);
 519                 else {
 520                         nfs3_free_args(args, fhandle);
 521                         fhandle = NULL;
 522                 }
 523                 if (fhandle == NULL)
 524                         fhandle = kmem_alloc(sizeof (nfs_fhandle), KM_SLEEP);
 525                 error = nfs3_copyin(data, uap->datalen, args, fhandle);
 526                 if (error) {
 527                         if (args)
 528                                 kmem_free(args, sizeof (*args));
 529                         return (error);
 530                 }
 531         } else {
 532                 args = (struct nfs_args *)data;
 533                 fhandle = (nfs_fhandle *)args->fh;
 534         }
 535 
 536 
 537         flags = args->flags;
 538 
 539         if (uap->flags & MS_REMOUNT) {
 540                 size_t  n;
 541                 char    name[FSTYPSZ];
 542 
 543                 if (uap->flags & MS_SYSSPACE) {
 544                         error = copystr(uap->fstype, name, FSTYPSZ, &n);
 545                 } else {
 546                         nfs3_free_args(args, fhandle);
 547                         kmem_free(args, sizeof (*args));
 548                         error = copyinstr(uap->fstype, name, FSTYPSZ, &n);
 549                 }
 550                 if (error) {
 551                         if (error == ENAMETOOLONG)
 552                                 return (EINVAL);
 553                         return (error);
 554                 }
 555 
 556                 /*
 557                  * This check is to ensure that the request is a
 558                  * genuine nfs remount request.
 559                  */
 560 
 561                 if (strncmp(name, "nfs", 3) != 0)
 562                         return (EINVAL);
 563 
 564                 /*
 565                  * If the request changes the locking type, disallow the
 566                  * remount,
 567                  * because it's questionable whether we can transfer the
 568                  * locking state correctly.
 569                  */
 570 
 571                 if ((mi = VFTOMI(vfsp)) != NULL) {
 572                         uint_t new_mi_llock;
 573                         uint_t old_mi_llock;
 574 
 575                         new_mi_llock = (flags & NFSMNT_LLOCK) ? 1 : 0;
 576                         old_mi_llock = (mi->mi_flags & MI_LLOCK) ? 1 : 0;
 577                         if (old_mi_llock != new_mi_llock)
 578                                 return (EBUSY);
 579                 }
 580                 return (0);
 581         }
 582 
 583         mutex_enter(&mvp->v_lock);
 584         if (!(uap->flags & MS_OVERLAY) &&
 585             (mvp->v_count != 1 || (mvp->v_flag & VROOT))) {
 586                 mutex_exit(&mvp->v_lock);
 587                 if (!(uap->flags & MS_SYSSPACE)) {
 588                         nfs3_free_args(args, fhandle);
 589                         kmem_free(args, sizeof (*args));
 590                 }
 591                 return (EBUSY);
 592         }
 593         mutex_exit(&mvp->v_lock);
 594 
 595         /* make sure things are zeroed for errout: */
 596         rtvp = NULL;
 597         mi = NULL;
 598         secdata = NULL;
 599 
 600         /*
 601          * A valid knetconfig structure is required.
 602          */
 603         if (!(flags & NFSMNT_KNCONF)) {
 604                 if (!(uap->flags & MS_SYSSPACE)) {
 605                         nfs3_free_args(args, fhandle);
 606                         kmem_free(args, sizeof (*args));
 607                 }
 608                 return (EINVAL);
 609         }
 610 
 611         if ((strlen(args->knconf->knc_protofmly) >= KNC_STRSIZE) ||
 612             (strlen(args->knconf->knc_proto) >= KNC_STRSIZE)) {
 613                 if (!(uap->flags & MS_SYSSPACE)) {
 614                         nfs3_free_args(args, fhandle);
 615                         kmem_free(args, sizeof (*args));
 616                 }
 617                 return (EINVAL);
 618         }
 619 
 620         /*
 621          * Allocate a servinfo struct.
 622          */
 623         svp = kmem_zalloc(sizeof (*svp), KM_SLEEP);
 624         mutex_init(&svp->sv_lock, NULL, MUTEX_DEFAULT, NULL);
 625         if (svp_tail) {
 626                 svp_2ndlast = svp_tail;
 627                 svp_tail->sv_next = svp;
 628         } else {
 629                 svp_head = svp;
 630                 svp_2ndlast = svp;
 631         }
 632 
 633         svp_tail = svp;
 634 
 635         svp->sv_knconf = args->knconf;
 636         args->knconf = NULL;
 637 
 638         if (args->addr == NULL || args->addr->buf == NULL) {
 639                 error = EINVAL;
 640                 goto errout;
 641         }
 642 
 643         svp->sv_addr.maxlen = args->addr->maxlen;
 644         svp->sv_addr.len = args->addr->len;
 645         svp->sv_addr.buf = args->addr->buf;
 646         args->addr->buf = NULL;
 647 
 648         /*
 649          * Check the root fhandle length
 650          */
 651         ASSERT(fhandle);
 652         if (fhandle->fh_len > NFS3_FHSIZE || fhandle->fh_len == 0) {
 653                 error = EINVAL;
 654 #ifdef DEBUG
 655                 zcmn_err(getzoneid(), CE_WARN,
 656                     "nfs3_mount: got an invalid fhandle. fh_len = %d",
 657                     fhandle->fh_len);
 658                 fhandle->fh_len = NFS_FHANDLE_LEN;
 659                 nfs_printfhandle(fhandle);
 660 #endif
 661                 goto errout;
 662         }
 663 
 664         bcopy(&fhandle->fh_buf, &svp->sv_fhandle.fh_buf, fhandle->fh_len);
 665         svp->sv_fhandle.fh_len = fhandle->fh_len;
 666 
 667         /*
 668          * Get server's hostname
 669          */
 670         if (flags & NFSMNT_HOSTNAME) {
 671                 if (args->hostname == NULL) {
 672                         error = EINVAL;
 673                         goto errout;
 674                 }
 675                 svp->sv_hostnamelen = strlen(args->hostname) + 1;
 676                 svp->sv_hostname = args->hostname;
 677                 args->hostname = NULL;
 678         } else {
 679                 char *p = "unknown-host";
 680                 svp->sv_hostnamelen = strlen(p) + 1;
 681                 svp->sv_hostname = kmem_zalloc(svp->sv_hostnamelen, KM_SLEEP);
 682                 (void) strcpy(svp->sv_hostname, p);
 683         }
 684 
 685 
 686         /*
 687          * RDMA MOUNT SUPPORT FOR NFS v3:
 688          * Establish, is it possible to use RDMA, if so overload the
 689          * knconf with rdma specific knconf and free the orignal.
 690          */
 691         if ((flags & NFSMNT_TRYRDMA) || (flags & NFSMNT_DORDMA)) {
 692                 /*
 693                  * Determine the addr type for RDMA, IPv4 or v6.
 694                  */
 695                 if (strcmp(svp->sv_knconf->knc_protofmly, NC_INET) == 0)
 696                         addr_type = AF_INET;
 697                 else if (strcmp(svp->sv_knconf->knc_protofmly, NC_INET6) == 0)
 698                         addr_type = AF_INET6;
 699 
 700                 if (rdma_reachable(addr_type, &svp->sv_addr,
 701                     &rdma_knconf) == 0) {
 702                         /*
 703                          * If successful, hijack the orignal knconf and
 704                          * replace with a new one, depending on the flags.
 705                          */
 706                         svp->sv_origknconf = svp->sv_knconf;
 707                         svp->sv_knconf = rdma_knconf;
 708                         knconf = rdma_knconf;
 709                 } else {
 710                         if (flags & NFSMNT_TRYRDMA) {
 711 #ifdef  DEBUG
 712                                 if (rdma_debug)
 713                                         zcmn_err(getzoneid(), CE_WARN,
 714                                             "no RDMA onboard, revert\n");
 715 #endif
 716                         }
 717 
 718                         if (flags & NFSMNT_DORDMA) {
 719                                 /*
 720                                  * If proto=rdma is specified and no RDMA
 721                                  * path to this server is avialable then
 722                                  * ditch this server.
 723                                  * This is not included in the mountable
 724                                  * server list or the replica list.
 725                                  * Check if more servers are specified;
 726                                  * Failover case, otherwise bail out of mount.
 727                                  */
 728                                 if (args->nfs_args_ext == NFS_ARGS_EXTB &&
 729                                     args->nfs_ext_u.nfs_extB.next != NULL) {
 730                                         data = (char *)
 731                                             args->nfs_ext_u.nfs_extB.next;
 732                                         if (uap->flags & MS_RDONLY &&
 733                                             !(flags & NFSMNT_SOFT)) {
 734                                                 if (svp_head->sv_next == NULL) {
 735                                                         svp_tail = NULL;
 736                                                         svp_2ndlast = NULL;
 737                                                         sv_free(svp_head);
 738                                                         goto more;
 739                                                 } else {
 740                                                         svp_tail = svp_2ndlast;
 741                                                         svp_2ndlast->sv_next =
 742                                                             NULL;
 743                                                         sv_free(svp);
 744                                                         goto more;
 745                                                 }
 746                                         }
 747                                 } else {
 748                                         /*
 749                                          * This is the last server specified
 750                                          * in the nfs_args list passed down
 751                                          * and its not rdma capable.
 752                                          */
 753                                         if (svp_head->sv_next == NULL) {
 754                                                 /*
 755                                                  * Is this the only one
 756                                                  */
 757                                                 error = EINVAL;
 758 #ifdef  DEBUG
 759                                                 if (rdma_debug)
 760                                                         zcmn_err(getzoneid(),
 761                                                             CE_WARN,
 762                                                             "No RDMA srv");
 763 #endif
 764                                                 goto errout;
 765                                         } else {
 766                                                 /*
 767                                                  * There is list, since some
 768                                                  * servers specified before
 769                                                  * this passed all requirements
 770                                                  */
 771                                                 svp_tail = svp_2ndlast;
 772                                                 svp_2ndlast->sv_next = NULL;
 773                                                 sv_free(svp);
 774                                                 goto proceed;
 775                                         }
 776                                 }
 777                         }
 778                 }
 779         }
 780 
 781         /*
 782          * Get the extention data which has the new security data structure.
 783          */
 784         if (flags & NFSMNT_NEWARGS) {
 785                 switch (args->nfs_args_ext) {
 786                 case NFS_ARGS_EXTA:
 787                 case NFS_ARGS_EXTB:
 788                         /*
 789                          * Indicating the application is using the new
 790                          * sec_data structure to pass in the security
 791                          * data.
 792                          */
 793                         secdata = args->nfs_ext_u.nfs_extA.secdata;
 794                         if (args->nfs_ext_u.nfs_extA.secdata == NULL) {
 795                                 error = EINVAL;
 796                         } else {
 797                                 /*
 798                                  * Need to validate the flavor here if
 799                                  * sysspace, userspace was already
 800                                  * validate from the nfs_copyin function.
 801                                  */
 802                                 switch (secdata->rpcflavor) {
 803                                 case AUTH_NONE:
 804                                 case AUTH_UNIX:
 805                                 case AUTH_LOOPBACK:
 806                                 case AUTH_DES:
 807                                 case RPCSEC_GSS:
 808                                         args->nfs_ext_u.nfs_extA.secdata = NULL;
 809                                         break;
 810                                 default:
 811                                         error = EINVAL;
 812                                         goto errout;
 813                                 }
 814                         }
 815                         break;
 816 
 817                 default:
 818                         error = EINVAL;
 819                         break;
 820                 }
 821         } else if (flags & NFSMNT_SECURE) {
 822                 /*
 823                  * Keep this for backward compatibility to support
 824                  * NFSMNT_SECURE/NFSMNT_RPCTIMESYNC flags.
 825                  */
 826                 if (args->syncaddr == NULL || args->syncaddr->buf == NULL) {
 827                         error = EINVAL;
 828                         goto errout;
 829                 }
 830                 /*
 831                  * Move security related data to the sec_data structure.
 832                  */
 833                 {
 834                         dh_k4_clntdata_t *data;
 835                         char *pf, *p;
 836                         secdata = kmem_alloc(sizeof (*secdata), KM_SLEEP);
 837                         if (flags & NFSMNT_RPCTIMESYNC)
 838                                 secdata->flags |= AUTH_F_RPCTIMESYNC;
 839                         data = kmem_alloc(sizeof (*data), KM_SLEEP);
 840                         bcopy(args->syncaddr, &data->syncaddr,
 841                             sizeof (*args->syncaddr));
 842 
 843                         /*
 844                          * duplicate the knconf information for the
 845                          * new opaque data.
 846                          */
 847                         data->knconf = kmem_alloc(sizeof (*knconf), KM_SLEEP);
 848                         *data->knconf = *knconf;
 849                         pf = kmem_alloc(KNC_STRSIZE, KM_SLEEP);
 850                         p = kmem_alloc(KNC_STRSIZE, KM_SLEEP);
 851                         bcopy(knconf->knc_protofmly, pf, KNC_STRSIZE);
 852                         bcopy(knconf->knc_proto, pf, KNC_STRSIZE);
 853                         data->knconf->knc_protofmly = pf;
 854                         data->knconf->knc_proto = p;
 855 
 856                         nlen = strlen(args->hostname) + 1;
 857                         /* move server netname to the sec_data structure */
 858                         if (nlen != 0) {
 859                                 data->netname = kmem_alloc(nlen, KM_SLEEP);
 860                                 bcopy(args->hostname, data->netname, nlen);
 861                                 data->netnamelen = nlen;
 862                         }
 863                         secdata->secmod = secdata->rpcflavor = AUTH_DES;
 864                         secdata->data = (caddr_t)data;
 865                 }
 866         } else  {
 867                 secdata = kmem_alloc(sizeof (*secdata), KM_SLEEP);
 868                 secdata->secmod = secdata->rpcflavor = AUTH_UNIX;
 869                 secdata->data = NULL;
 870         }
 871 
 872         svp->sv_secdata = secdata;
 873         if (error)
 874                 goto errout;
 875 
 876         /*
 877          * See bug 1180236.
 878          * If mount secure failed, we will fall back to AUTH_NONE
 879          * and try again.  nfs3rootvp() will turn this back off.
 880          *
 881          * The NFS Version 3 mount uses the FSINFO and GETATTR
 882          * procedures.  The server should not care if these procedures
 883          * have the proper security flavor, so if mount retries using
 884          * AUTH_NONE that does not require a credential setup for root
 885          * then the automounter would work without requiring root to be
 886          * keylogged into AUTH_DES.
 887          */
 888         if (secdata->rpcflavor != AUTH_UNIX &&
 889             secdata->rpcflavor != AUTH_LOOPBACK)
 890                 secdata->flags |= AUTH_F_TRYNONE;
 891 
 892         /*
 893          * Failover support:
 894          *
 895          * We may have a linked list of nfs_args structures,
 896          * which means the user is looking for failover.  If
 897          * the mount is either not "read-only" or "soft",
 898          * we want to bail out with EINVAL.
 899          */
 900         if (args->nfs_args_ext == NFS_ARGS_EXTB &&
 901             args->nfs_ext_u.nfs_extB.next != NULL) {
 902                 if (uap->flags & MS_RDONLY && !(flags & NFSMNT_SOFT)) {
 903                         data = (char *)args->nfs_ext_u.nfs_extB.next;
 904                         goto more;
 905                 }
 906                 error = EINVAL;
 907                 goto errout;
 908         }
 909 
 910         /*
 911          * Determine the zone we're being mounted into.
 912          */
 913         zone_hold(mntzone = zone);              /* start with this assumption */
 914         if (getzoneid() == GLOBAL_ZONEID) {
 915                 zone_rele(mntzone);
 916                 mntzone = zone_find_by_path(refstr_value(vfsp->vfs_mntpt));
 917                 ASSERT(mntzone != NULL);
 918                 if (mntzone != zone) {
 919                         error = EBUSY;
 920                         goto errout;
 921                 }
 922         }
 923 
 924         if (is_system_labeled()) {
 925                 error = nfs_mount_label_policy(vfsp, &svp->sv_addr,
 926                     svp->sv_knconf, cr);
 927 
 928                 if (error > 0)
 929                         goto errout;
 930 
 931                 if (error == -1) {
 932                         /* change mount to read-only to prevent write-down */
 933                         vfs_setmntopt(vfsp, MNTOPT_RO, NULL, 0);
 934                 }
 935         }
 936 
 937         /*
 938          * Stop the mount from going any further if the zone is going away.
 939          */
 940         if (zone_status_get(mntzone) >= ZONE_IS_SHUTTING_DOWN) {
 941                 error = EBUSY;
 942                 goto errout;
 943         }
 944 
 945         /*
 946          * Get root vnode.
 947          */
 948 proceed:
 949         error = nfs3rootvp(&rtvp, vfsp, svp_head, flags, cr, mntzone);
 950 
 951         if (error)
 952                 goto errout;
 953 
 954         /*
 955          * Set option fields in the mount info record
 956          */
 957         mi = VTOMI(rtvp);
 958 
 959         if (svp_head->sv_next)
 960                 mi->mi_flags |= MI_LLOCK;
 961 
 962         error = nfs_setopts(rtvp, DATAMODEL_NATIVE, args);
 963 
 964 errout:
 965         if (rtvp != NULL) {
 966                 if (error) {
 967                         rp = VTOR(rtvp);
 968                         if (rp->r_flags & RHASHED)
 969                                 rp_rmhash(rp);
 970                 }
 971                 VN_RELE(rtvp);
 972         }
 973 
 974         if (error) {
 975                 sv_free(svp_head);
 976                 if (mi != NULL) {
 977                         nfs_async_stop(vfsp);
 978                         nfs_async_manager_stop(vfsp);
 979                         if (mi->mi_io_kstats) {
 980                                 kstat_delete(mi->mi_io_kstats);
 981                                 mi->mi_io_kstats = NULL;
 982                         }
 983                         if (mi->mi_ro_kstats) {
 984                                 kstat_delete(mi->mi_ro_kstats);
 985                                 mi->mi_ro_kstats = NULL;
 986                         }
 987                         nfs_free_mi(mi);
 988                 }
 989         }
 990 
 991 
 992         if (!(uap->flags & MS_SYSSPACE)) {
 993                 nfs3_free_args(args, fhandle);
 994                 kmem_free(args, sizeof (*args));
 995         }
 996 
 997         if (mntzone != NULL)
 998                 zone_rele(mntzone);
 999 
1000         return (error);
1001 }
1002 
1003 static int nfs3_dynamic = 0;    /* global variable to enable dynamic retrans. */
1004 static ushort_t nfs3_max_threads = 8;   /* max number of active async threads */
1005 uint_t nfs3_bsize = 32 * 1024;  /* client `block' size */
1006 static uint_t nfs3_async_clusters = 1;  /* # of reqs from each async queue */
1007 static uint_t nfs3_cots_timeo = NFS_COTS_TIMEO;
1008 
1009 static int
1010 nfs3rootvp(vnode_t **rtvpp, vfs_t *vfsp, struct servinfo *svp,
1011         int flags, cred_t *cr, zone_t *zone)
1012 {
1013         vnode_t *rtvp;
1014         mntinfo_t *mi;
1015         dev_t nfs_dev;
1016         struct vattr va;
1017         struct FSINFO3args args;
1018         struct FSINFO3res res;
1019         int error;
1020         int douprintf;
1021         rnode_t *rp;
1022         int i;
1023         uint_t max_transfer_size;
1024         struct nfs_stats *nfsstatsp;
1025         cred_t *lcr = NULL, *tcr = cr;
1026 
1027         nfsstatsp = zone_getspecific(nfsstat_zone_key, nfs_zone());
1028         ASSERT(nfsstatsp != NULL);
1029 
1030         ASSERT(nfs_zone() == zone);
1031         /*
1032          * Create a mount record and link it to the vfs struct.
1033          */
1034         mi = kmem_zalloc(sizeof (*mi), KM_SLEEP);
1035         mutex_init(&mi->mi_lock, NULL, MUTEX_DEFAULT, NULL);
1036         mutex_init(&mi->mi_remap_lock, NULL, MUTEX_DEFAULT, NULL);
1037         mi->mi_flags = MI_ACL | MI_EXTATTR;
1038         if (!(flags & NFSMNT_SOFT))
1039                 mi->mi_flags |= MI_HARD;
1040         if ((flags & NFSMNT_SEMISOFT))
1041                 mi->mi_flags |= MI_SEMISOFT;
1042         if ((flags & NFSMNT_NOPRINT))
1043                 mi->mi_flags |= MI_NOPRINT;
1044         if (flags & NFSMNT_INT)
1045                 mi->mi_flags |= MI_INT;
1046         mi->mi_retrans = NFS_RETRIES;
1047         if (svp->sv_knconf->knc_semantics == NC_TPI_COTS_ORD ||
1048             svp->sv_knconf->knc_semantics == NC_TPI_COTS)
1049                 mi->mi_timeo = nfs3_cots_timeo;
1050         else
1051                 mi->mi_timeo = NFS_TIMEO;
1052         mi->mi_prog = NFS_PROGRAM;
1053         mi->mi_vers = NFS_V3;
1054         mi->mi_rfsnames = rfsnames_v3;
1055         mi->mi_reqs = nfsstatsp->nfs_stats_v3.rfsreqcnt_ptr;
1056         mi->mi_call_type = call_type_v3;
1057         mi->mi_ss_call_type = ss_call_type_v3;
1058         mi->mi_timer_type = timer_type_v3;
1059         mi->mi_aclnames = aclnames_v3;
1060         mi->mi_aclreqs = nfsstatsp->nfs_stats_v3.aclreqcnt_ptr;
1061         mi->mi_acl_call_type = acl_call_type_v3;
1062         mi->mi_acl_ss_call_type = acl_ss_call_type_v3;
1063         mi->mi_acl_timer_type = acl_timer_type_v3;
1064         cv_init(&mi->mi_failover_cv, NULL, CV_DEFAULT, NULL);
1065         mi->mi_servers = svp;
1066         mi->mi_curr_serv = svp;
1067         mi->mi_acregmin = SEC2HR(ACREGMIN);
1068         mi->mi_acregmax = SEC2HR(ACREGMAX);
1069         mi->mi_acdirmin = SEC2HR(ACDIRMIN);
1070         mi->mi_acdirmax = SEC2HR(ACDIRMAX);
1071 
1072         if (nfs3_dynamic)
1073                 mi->mi_flags |= MI_DYNAMIC;
1074 
1075         if (flags & NFSMNT_DIRECTIO)
1076                 mi->mi_flags |= MI_DIRECTIO;
1077 
1078         /*
1079          * Make a vfs struct for nfs.  We do this here instead of below
1080          * because rtvp needs a vfs before we can do a getattr on it.
1081          *
1082          * Assign a unique device id to the mount
1083          */
1084         mutex_enter(&nfs_minor_lock);
1085         do {
1086                 nfs_minor = (nfs_minor + 1) & MAXMIN32;
1087                 nfs_dev = makedevice(nfs_major, nfs_minor);
1088         } while (vfs_devismounted(nfs_dev));
1089         mutex_exit(&nfs_minor_lock);
1090 
1091         vfsp->vfs_dev = nfs_dev;
1092         vfs_make_fsid(&vfsp->vfs_fsid, nfs_dev, nfs3fstyp);
1093         vfsp->vfs_data = (caddr_t)mi;
1094         vfsp->vfs_fstype = nfsfstyp;
1095 
1096         /*
1097          * Verify that nfs3_bsize tuneable is set to an
1098          * acceptable value.  It be a multiple of PAGESIZE or
1099          * file corruption can occur.
1100          */
1101         if (nfs3_bsize & PAGEOFFSET)
1102                 nfs3_bsize &= PAGEMASK;
1103         if (nfs3_bsize < PAGESIZE)
1104                 nfs3_bsize = PAGESIZE;
1105         vfsp->vfs_bsize = nfs3_bsize;
1106 
1107         /*
1108          * Initialize fields used to support async putpage operations.
1109          */
1110         for (i = 0; i < NFS_ASYNC_TYPES; i++)
1111                 mi->mi_async_clusters[i] = nfs3_async_clusters;
1112         mi->mi_async_init_clusters = nfs3_async_clusters;
1113         mi->mi_async_curr[NFS_ASYNC_QUEUE] =
1114             mi->mi_async_curr[NFS_ASYNC_PGOPS_QUEUE] = &mi->mi_async_reqs[0];
1115         mi->mi_max_threads = nfs3_max_threads;
1116         mutex_init(&mi->mi_async_lock, NULL, MUTEX_DEFAULT, NULL);
1117         cv_init(&mi->mi_async_reqs_cv, NULL, CV_DEFAULT, NULL);
1118         cv_init(&mi->mi_async_work_cv[NFS_ASYNC_QUEUE], NULL, CV_DEFAULT, NULL);
1119         cv_init(&mi->mi_async_work_cv[NFS_ASYNC_PGOPS_QUEUE], NULL,
1120             CV_DEFAULT, NULL);
1121         cv_init(&mi->mi_async_cv, NULL, CV_DEFAULT, NULL);
1122 
1123         mi->mi_vfsp = vfsp;
1124         mi->mi_zone = zone;
1125         zone_init_ref(&mi->mi_zone_ref);
1126         zone_hold_ref(zone, &mi->mi_zone_ref, ZONE_REF_NFS);
1127         nfs_mi_zonelist_add(mi);
1128 
1129         /*
1130          * Make the root vnode, use it to get attributes,
1131          * then remake it with the attributes.
1132          */
1133         rtvp = makenfs3node((nfs_fh3 *)&svp->sv_fhandle,
1134             NULL, vfsp, gethrtime(), cr, NULL, NULL);
1135 
1136         /*
1137          * Make the FSINFO calls, primarily at this point to
1138          * determine the transfer size.  For client failover,
1139          * we'll want this to be the minimum bid from any
1140          * server, so that we don't overrun stated limits.
1141          *
1142          * While we're looping, we'll turn off AUTH_F_TRYNONE,
1143          * which is only for the mount operation.
1144          */
1145 
1146         mi->mi_tsize = nfs3_tsize(svp->sv_knconf);
1147         mi->mi_stsize = mi->mi_tsize;
1148 
1149         mi->mi_curread = nfs3_bsize;
1150         mi->mi_curwrite = mi->mi_curread;
1151 
1152         /*
1153          * If the uid is set then set the creds for secure mounts
1154          * by proxy processes such as automountd.
1155          */
1156         if (svp->sv_secdata->uid != 0 &&
1157             svp->sv_secdata->rpcflavor == RPCSEC_GSS) {
1158                 lcr = crdup(cr);
1159                 (void) crsetugid(lcr, svp->sv_secdata->uid, crgetgid(cr));
1160                 tcr = lcr;
1161         }
1162 
1163         for (svp = mi->mi_servers; svp != NULL; svp = svp->sv_next) {
1164                 douprintf = 1;
1165                 mi->mi_curr_serv = svp;
1166                 max_transfer_size = nfs3_tsize(svp->sv_knconf);
1167                 mi->mi_tsize = MIN(max_transfer_size, mi->mi_tsize);
1168                 mi->mi_stsize = MIN(max_transfer_size, mi->mi_stsize);
1169                 mi->mi_curread = MIN(max_transfer_size, mi->mi_curread);
1170                 mi->mi_curwrite = MIN(max_transfer_size, mi->mi_curwrite);
1171                 args.fsroot = *(nfs_fh3 *)&svp->sv_fhandle;
1172 
1173                 error = rfs3call(mi, NFSPROC3_FSINFO,
1174                     xdr_nfs_fh3, (caddr_t)&args,
1175                     xdr_FSINFO3res, (caddr_t)&res, tcr,
1176                     &douprintf, &res.status, 0, NULL);
1177                 if (error)
1178                         goto bad;
1179                 error = geterrno3(res.status);
1180                 if (error)
1181                         goto bad;
1182 
1183                 /* get type of root node */
1184                 if (res.resok.obj_attributes.attributes) {
1185                         if (res.resok.obj_attributes.attr.type < NF3REG ||
1186                             res.resok.obj_attributes.attr.type > NF3FIFO) {
1187 #ifdef DEBUG
1188                                 zcmn_err(getzoneid(), CE_WARN,
1189                             "NFS3 server %s returned a bad file type for root",
1190                                     svp->sv_hostname);
1191 #else
1192                                 zcmn_err(getzoneid(), CE_WARN,
1193                             "NFS server %s returned a bad file type for root",
1194                                     svp->sv_hostname);
1195 #endif
1196                                 error = EINVAL;
1197                                 goto bad;
1198                         } else {
1199                                 if (rtvp->v_type != VNON && rtvp->v_type !=
1200                                     nf3_to_vt[res.resok.obj_attributes.attr.
1201                                     type]) {
1202 #ifdef DEBUG
1203                                         zcmn_err(getzoneid(), CE_WARN,
1204                 "NFS3 server %s returned a different file type for root",
1205                                             svp->sv_hostname);
1206 #else
1207                                         zcmn_err(getzoneid(), CE_WARN,
1208                 "NFS server %s returned a different file type for root",
1209                                             svp->sv_hostname);
1210 #endif
1211                                         error = EINVAL;
1212                                         goto bad;
1213                                 }
1214                                 rtvp->v_type =
1215                                     nf3_to_vt[res.resok.obj_attributes.attr.
1216                                     type];
1217                         }
1218                 }
1219 
1220                 if (res.resok.rtmax != 0) {
1221                         mi->mi_tsize = MIN(res.resok.rtmax, mi->mi_tsize);
1222                         if (res.resok.rtpref != 0) {
1223                                 mi->mi_curread = MIN(res.resok.rtpref,
1224                                     mi->mi_curread);
1225                         } else {
1226                                 mi->mi_curread = MIN(res.resok.rtmax,
1227                                     mi->mi_curread);
1228                         }
1229                 } else if (res.resok.rtpref != 0) {
1230                         mi->mi_tsize = MIN(res.resok.rtpref, mi->mi_tsize);
1231                         mi->mi_curread = MIN(res.resok.rtpref, mi->mi_curread);
1232                 } else {
1233 #ifdef DEBUG
1234                         zcmn_err(getzoneid(), CE_WARN,
1235                             "NFS3 server %s returned 0 for read transfer sizes",
1236                             svp->sv_hostname);
1237 #else
1238                         zcmn_err(getzoneid(), CE_WARN,
1239                             "NFS server %s returned 0 for read transfer sizes",
1240                             svp->sv_hostname);
1241 #endif
1242                         error = EIO;
1243                         goto bad;
1244                 }
1245                 if (res.resok.wtmax != 0) {
1246                         mi->mi_stsize = MIN(res.resok.wtmax, mi->mi_stsize);
1247                         if (res.resok.wtpref != 0) {
1248                                 mi->mi_curwrite = MIN(res.resok.wtpref,
1249                                     mi->mi_curwrite);
1250                         } else {
1251                                 mi->mi_curwrite = MIN(res.resok.wtmax,
1252                                     mi->mi_curwrite);
1253                         }
1254                 } else if (res.resok.wtpref != 0) {
1255                         mi->mi_stsize = MIN(res.resok.wtpref, mi->mi_stsize);
1256                         mi->mi_curwrite = MIN(res.resok.wtpref,
1257                             mi->mi_curwrite);
1258                 } else {
1259 #ifdef DEBUG
1260                         zcmn_err(getzoneid(), CE_WARN,
1261                         "NFS3 server %s returned 0 for write transfer sizes",
1262                             svp->sv_hostname);
1263 #else
1264                         zcmn_err(getzoneid(), CE_WARN,
1265                         "NFS server %s returned 0 for write transfer sizes",
1266                             svp->sv_hostname);
1267 #endif
1268                         error = EIO;
1269                         goto bad;
1270                 }
1271 
1272                 /*
1273                  * These signal the ability of the server to create
1274                  * hard links and symbolic links, so they really
1275                  * aren't relevant if there is more than one server.
1276                  * We'll set them here, though it probably looks odd.
1277                  */
1278                 if (res.resok.properties & FSF3_LINK)
1279                         mi->mi_flags |= MI_LINK;
1280                 if (res.resok.properties & FSF3_SYMLINK)
1281                         mi->mi_flags |= MI_SYMLINK;
1282 
1283                 /* Pick up smallest non-zero maxfilesize value */
1284                 if (res.resok.maxfilesize) {
1285                         if (mi->mi_maxfilesize) {
1286                                 mi->mi_maxfilesize = MIN(mi->mi_maxfilesize,
1287                                     res.resok.maxfilesize);
1288                         } else
1289                                 mi->mi_maxfilesize = res.resok.maxfilesize;
1290                 }
1291 
1292                 /*
1293                  * AUTH_F_TRYNONE is only for the mount operation,
1294                  * so turn it back off.
1295                  */
1296                 svp->sv_secdata->flags &= ~AUTH_F_TRYNONE;
1297         }
1298         mi->mi_curr_serv = mi->mi_servers;
1299 
1300         /*
1301          * Start the thread responsible for handling async worker threads.
1302          */
1303         VFS_HOLD(vfsp); /* add reference for thread */
1304         mi->mi_manager_thread = zthread_create(NULL, 0, nfs_async_manager,
1305             vfsp, 0, minclsyspri);
1306         ASSERT(mi->mi_manager_thread != NULL);
1307 
1308         /*
1309          * Initialize kstats
1310          */
1311         nfs_mnt_kstat_init(vfsp);
1312 
1313         /* If we didn't get a type, get one now */
1314         if (rtvp->v_type == VNON) {
1315                 va.va_mask = AT_ALL;
1316 
1317                 error = nfs3getattr(rtvp, &va, tcr);
1318                 if (error)
1319                         goto bad;
1320                 rtvp->v_type = va.va_type;
1321         }
1322 
1323         mi->mi_type = rtvp->v_type;
1324 
1325         *rtvpp = rtvp;
1326         if (lcr != NULL)
1327                 crfree(lcr);
1328 
1329         return (0);
1330 bad:
1331         /*
1332          * An error occurred somewhere, need to clean up...
1333          * We need to release our reference to the root vnode and
1334          * destroy the mntinfo struct that we just created.
1335          */
1336         if (lcr != NULL)
1337                 crfree(lcr);
1338         rp = VTOR(rtvp);
1339         if (rp->r_flags & RHASHED)
1340                 rp_rmhash(rp);
1341         VN_RELE(rtvp);
1342         nfs_async_stop(vfsp);
1343         nfs_async_manager_stop(vfsp);
1344         if (mi->mi_io_kstats) {
1345                 kstat_delete(mi->mi_io_kstats);
1346                 mi->mi_io_kstats = NULL;
1347         }
1348         if (mi->mi_ro_kstats) {
1349                 kstat_delete(mi->mi_ro_kstats);
1350                 mi->mi_ro_kstats = NULL;
1351         }
1352         nfs_free_mi(mi);
1353         *rtvpp = NULL;
1354         return (error);
1355 }
1356 
1357 /*
1358  * vfs operations
1359  */
1360 static int
1361 nfs3_unmount(vfs_t *vfsp, int flag, cred_t *cr)
1362 {
1363         mntinfo_t *mi;
1364         ushort_t omax;
1365 
1366         if (secpolicy_fs_unmount(cr, vfsp) != 0)
1367                 return (EPERM);
1368 
1369         mi = VFTOMI(vfsp);
1370         if (flag & MS_FORCE) {
1371 
1372                 vfsp->vfs_flag |= VFS_UNMOUNTED;
1373 
1374                 /*
1375                  * We are about to stop the async manager.
1376                  * Let every one know not to schedule any
1377                  * more async requests
1378                  */
1379                 mutex_enter(&mi->mi_async_lock);
1380                 mi->mi_max_threads = 0;
1381                 NFS_WAKEALL_ASYNC_WORKERS(mi->mi_async_work_cv);
1382                 mutex_exit(&mi->mi_async_lock);
1383 
1384                 /*
1385                  * We need to stop the manager thread explicitly; the worker
1386                  * threads can time out and exit on their own.
1387                  */
1388                 nfs_async_manager_stop(vfsp);
1389                 destroy_rtable(vfsp, cr);
1390                 if (mi->mi_io_kstats) {
1391                         kstat_delete(mi->mi_io_kstats);
1392                         mi->mi_io_kstats = NULL;
1393                 }
1394                 if (mi->mi_ro_kstats) {
1395                         kstat_delete(mi->mi_ro_kstats);
1396                         mi->mi_ro_kstats = NULL;
1397                 }
1398                 return (0);
1399         }
1400         /*
1401          * Wait until all asynchronous putpage operations on
1402          * this file system are complete before flushing rnodes
1403          * from the cache.
1404          */
1405         omax = mi->mi_max_threads;
1406         if (nfs_async_stop_sig(vfsp)) {
1407                 return (EINTR);
1408         }
1409         rflush(vfsp, cr);
1410         /*
1411          * If there are any active vnodes on this file system,
1412          * then the file system is busy and can't be umounted.
1413          */
1414         if (check_rtable(vfsp)) {
1415                 mutex_enter(&mi->mi_async_lock);
1416                 mi->mi_max_threads = omax;
1417                 mutex_exit(&mi->mi_async_lock);
1418                 return (EBUSY);
1419         }
1420         /*
1421          * The unmount can't fail from now on; stop the worker thread manager.
1422          */
1423         nfs_async_manager_stop(vfsp);
1424         /*
1425          * Destroy all rnodes belonging to this file system from the
1426          * rnode hash queues and purge any resources allocated to
1427          * them.
1428          */
1429         destroy_rtable(vfsp, cr);
1430         if (mi->mi_io_kstats) {
1431                 kstat_delete(mi->mi_io_kstats);
1432                 mi->mi_io_kstats = NULL;
1433         }
1434         if (mi->mi_ro_kstats) {
1435                 kstat_delete(mi->mi_ro_kstats);
1436                 mi->mi_ro_kstats = NULL;
1437         }
1438         return (0);
1439 }
1440 
1441 /*
1442  * find root of nfs
1443  */
1444 static int
1445 nfs3_root(vfs_t *vfsp, vnode_t **vpp)
1446 {
1447         mntinfo_t *mi;
1448         vnode_t *vp;
1449         servinfo_t *svp;
1450         rnode_t *rp;
1451         int error = 0;
1452 
1453         mi = VFTOMI(vfsp);
1454 
1455         if (nfs_zone() != mi->mi_zone)
1456                 return (EPERM);
1457 
1458         svp = mi->mi_curr_serv;
1459         if (svp && (svp->sv_flags & SV_ROOT_STALE)) {
1460                 mutex_enter(&svp->sv_lock);
1461                 svp->sv_flags &= ~SV_ROOT_STALE;
1462                 mutex_exit(&svp->sv_lock);
1463                 error = ENOENT;
1464         }
1465 
1466         vp = makenfs3node((nfs_fh3 *)&mi->mi_curr_serv->sv_fhandle,
1467             NULL, vfsp, gethrtime(), CRED(), NULL, NULL);
1468 
1469         /*
1470          * if the SV_ROOT_STALE flag was reset above, reset the
1471          * RSTALE flag if needed and return an error
1472          */
1473         if (error == ENOENT) {
1474                 rp = VTOR(vp);
1475                 if (svp && rp->r_flags & RSTALE) {
1476                         mutex_enter(&rp->r_statelock);
1477                         rp->r_flags &= ~RSTALE;
1478                         mutex_exit(&rp->r_statelock);
1479                 }
1480                 VN_RELE(vp);
1481                 return (error);
1482         }
1483 
1484         ASSERT(vp->v_type == VNON || vp->v_type == mi->mi_type);
1485 
1486         vp->v_type = mi->mi_type;
1487 
1488         *vpp = vp;
1489 
1490         return (0);
1491 }
1492 
1493 /*
1494  * Get file system statistics.
1495  */
1496 static int
1497 nfs3_statvfs(vfs_t *vfsp, struct statvfs64 *sbp)
1498 {
1499         int error;
1500         struct mntinfo *mi;
1501         struct FSSTAT3args args;
1502         struct FSSTAT3res res;
1503         int douprintf;
1504         failinfo_t fi;
1505         vnode_t *vp;
1506         cred_t *cr;
1507         hrtime_t t;
1508 
1509         mi = VFTOMI(vfsp);
1510         if (nfs_zone() != mi->mi_zone)
1511                 return (EPERM);
1512         error = nfs3_root(vfsp, &vp);
1513         if (error)
1514                 return (error);
1515 
1516         cr = CRED();
1517 
1518         args.fsroot = *VTOFH3(vp);
1519         fi.vp = vp;
1520         fi.fhp = (caddr_t)&args.fsroot;
1521         fi.copyproc = nfs3copyfh;
1522         fi.lookupproc = nfs3lookup;
1523         fi.xattrdirproc = acl_getxattrdir3;
1524 
1525         douprintf = 1;
1526 
1527         t = gethrtime();
1528 
1529         error = rfs3call(mi, NFSPROC3_FSSTAT,
1530             xdr_nfs_fh3, (caddr_t)&args,
1531             xdr_FSSTAT3res, (caddr_t)&res, cr,
1532             &douprintf, &res.status, 0, &fi);
1533 
1534         if (error) {
1535                 VN_RELE(vp);
1536                 return (error);
1537         }
1538 
1539         error = geterrno3(res.status);
1540         if (!error) {
1541                 nfs3_cache_post_op_attr(vp, &res.resok.obj_attributes, t, cr);
1542                 sbp->f_bsize = MAXBSIZE;
1543                 sbp->f_frsize = DEV_BSIZE;
1544                 /*
1545                  * Allow -1 fields to pass through unconverted.  These
1546                  * indicate "don't know" fields.
1547                  */
1548                 if (res.resok.tbytes == (size3)-1)
1549                         sbp->f_blocks = (fsblkcnt64_t)res.resok.tbytes;
1550                 else {
1551                         sbp->f_blocks = (fsblkcnt64_t)
1552                             (res.resok.tbytes / DEV_BSIZE);
1553                 }
1554                 if (res.resok.fbytes == (size3)-1)
1555                         sbp->f_bfree = (fsblkcnt64_t)res.resok.fbytes;
1556                 else {
1557                         sbp->f_bfree = (fsblkcnt64_t)
1558                             (res.resok.fbytes / DEV_BSIZE);
1559                 }
1560                 if (res.resok.abytes == (size3)-1)
1561                         sbp->f_bavail = (fsblkcnt64_t)res.resok.abytes;
1562                 else {
1563                         sbp->f_bavail = (fsblkcnt64_t)
1564                             (res.resok.abytes / DEV_BSIZE);
1565                 }
1566                 sbp->f_files = (fsfilcnt64_t)res.resok.tfiles;
1567                 sbp->f_ffree = (fsfilcnt64_t)res.resok.ffiles;
1568                 sbp->f_favail = (fsfilcnt64_t)res.resok.afiles;
1569                 sbp->f_fsid = (unsigned long)vfsp->vfs_fsid.val[0];
1570                 (void) strncpy(sbp->f_basetype,
1571                     vfssw[vfsp->vfs_fstype].vsw_name, FSTYPSZ);
1572                 sbp->f_flag = vf_to_stf(vfsp->vfs_flag);
1573                 sbp->f_namemax = (ulong_t)-1;
1574         } else {
1575                 nfs3_cache_post_op_attr(vp, &res.resfail.obj_attributes, t, cr);
1576                 PURGE_STALE_FH(error, vp, cr);
1577         }
1578 
1579         VN_RELE(vp);
1580 
1581         return (error);
1582 }
1583 
1584 static kmutex_t nfs3_syncbusy;
1585 
1586 /*
1587  * Flush dirty nfs files for file system vfsp.
1588  * If vfsp == NULL, all nfs files are flushed.
1589  */
1590 /* ARGSUSED */
1591 static int
1592 nfs3_sync(vfs_t *vfsp, short flag, cred_t *cr)
1593 {
1594         /*
1595          * Cross-zone calls are OK here, since this translates to a
1596          * VOP_PUTPAGE(B_ASYNC), which gets picked up by the right zone.
1597          */
1598         if (!(flag & SYNC_ATTR) && mutex_tryenter(&nfs3_syncbusy) != 0) {
1599                 rflush(vfsp, cr);
1600                 mutex_exit(&nfs3_syncbusy);
1601         }
1602         return (0);
1603 }
1604 
1605 /* ARGSUSED */
1606 static int
1607 nfs3_vget(vfs_t *vfsp, vnode_t **vpp, fid_t *fidp)
1608 {
1609         int error;
1610         nfs_fh3 fh;
1611         vnode_t *vp;
1612         struct vattr va;
1613 
1614         if (fidp->fid_len > NFS3_FHSIZE) {
1615                 *vpp = NULL;
1616                 return (ESTALE);
1617         }
1618 
1619         if (nfs_zone() != VFTOMI(vfsp)->mi_zone)
1620                 return (EPERM);
1621         fh.fh3_length = fidp->fid_len;
1622         bcopy(fidp->fid_data, fh.fh3_u.data, fh.fh3_length);
1623 
1624         vp = makenfs3node(&fh, NULL, vfsp, gethrtime(), CRED(), NULL, NULL);
1625 
1626         if (VTOR(vp)->r_flags & RSTALE) {
1627                 VN_RELE(vp);
1628                 *vpp = NULL;
1629                 return (ENOENT);
1630         }
1631 
1632         if (vp->v_type == VNON) {
1633                 va.va_mask = AT_ALL;
1634                 error = nfs3getattr(vp, &va, CRED());
1635                 if (error) {
1636                         VN_RELE(vp);
1637                         *vpp = NULL;
1638                         return (error);
1639                 }
1640                 vp->v_type = va.va_type;
1641         }
1642 
1643         *vpp = vp;
1644 
1645         return (0);
1646 }
1647 
1648 /* ARGSUSED */
1649 static int
1650 nfs3_mountroot(vfs_t *vfsp, whymountroot_t why)
1651 {
1652         vnode_t *rtvp;
1653         char root_hostname[SYS_NMLN+1];
1654         struct servinfo *svp;
1655         int error;
1656         int vfsflags;
1657         size_t size;
1658         char *root_path;
1659         struct pathname pn;
1660         char *name;
1661         cred_t *cr;
1662         struct nfs_args args;           /* nfs mount arguments */
1663         static char token[10];
1664 
1665         bzero(&args, sizeof (args));
1666 
1667         /* do this BEFORE getfile which causes xid stamps to be initialized */
1668         clkset(-1L);            /* hack for now - until we get time svc? */
1669 
1670         if (why == ROOT_REMOUNT) {
1671                 /*
1672                  * Shouldn't happen.
1673                  */
1674                 panic("nfs3_mountroot: why == ROOT_REMOUNT");
1675         }
1676 
1677         if (why == ROOT_UNMOUNT) {
1678                 /*
1679                  * Nothing to do for NFS.
1680                  */
1681                 return (0);
1682         }
1683 
1684         /*
1685          * why == ROOT_INIT
1686          */
1687 
1688         name = token;
1689         *name = 0;
1690         getfsname("root", name, sizeof (token));
1691 
1692         pn_alloc(&pn);
1693         root_path = pn.pn_path;
1694 
1695         svp = kmem_zalloc(sizeof (*svp), KM_SLEEP);
1696         svp->sv_knconf = kmem_zalloc(sizeof (*svp->sv_knconf), KM_SLEEP);
1697         svp->sv_knconf->knc_protofmly = kmem_alloc(KNC_STRSIZE, KM_SLEEP);
1698         svp->sv_knconf->knc_proto = kmem_alloc(KNC_STRSIZE, KM_SLEEP);
1699 
1700         /*
1701          * Get server address
1702          * Get the root fhandle
1703          * Get server's transport
1704          * Get server's hostname
1705          * Get options
1706          */
1707         args.addr = &svp->sv_addr;
1708         args.fh = (char *)&svp->sv_fhandle;
1709         args.knconf = svp->sv_knconf;
1710         args.hostname = root_hostname;
1711         vfsflags = 0;
1712         if (error = mount_root(*name ? name : "root", root_path, NFS_V3,
1713             &args, &vfsflags)) {
1714                 if (error == EPROTONOSUPPORT)
1715                         nfs_cmn_err(error, CE_WARN, "nfs3_mountroot: "
1716                             "mount_root failed: server doesn't support NFS V3");
1717                 else
1718                         nfs_cmn_err(error, CE_WARN,
1719                             "nfs3_mountroot: mount_root failed: %m");
1720                 sv_free(svp);
1721                 pn_free(&pn);
1722                 return (error);
1723         }
1724         svp->sv_hostnamelen = (int)(strlen(root_hostname) + 1);
1725         svp->sv_hostname = kmem_alloc(svp->sv_hostnamelen, KM_SLEEP);
1726         (void) strcpy(svp->sv_hostname, root_hostname);
1727 
1728         /*
1729          * Force root partition to always be mounted with AUTH_UNIX for now
1730          */
1731         svp->sv_secdata = kmem_alloc(sizeof (*svp->sv_secdata), KM_SLEEP);
1732         svp->sv_secdata->secmod = AUTH_UNIX;
1733         svp->sv_secdata->rpcflavor = AUTH_UNIX;
1734         svp->sv_secdata->data = NULL;
1735 
1736         cr = crgetcred();
1737         rtvp = NULL;
1738 
1739         error = nfs3rootvp(&rtvp, vfsp, svp, args.flags, cr, global_zone);
1740 
1741         crfree(cr);
1742 
1743         if (error) {
1744                 pn_free(&pn);
1745                 sv_free(svp);
1746                 return (error);
1747         }
1748 
1749         error = nfs_setopts(rtvp, DATAMODEL_NATIVE, &args);
1750         if (error) {
1751                 nfs_cmn_err(error, CE_WARN,
1752                     "nfs3_mountroot: invalid root mount options");
1753                 pn_free(&pn);
1754                 goto errout;
1755         }
1756 
1757         (void) vfs_lock_wait(vfsp);
1758         vfs_add(NULL, vfsp, vfsflags);
1759         vfs_unlock(vfsp);
1760 
1761         size = strlen(svp->sv_hostname);
1762         (void) strcpy(rootfs.bo_name, svp->sv_hostname);
1763         rootfs.bo_name[size] = ':';
1764         (void) strcpy(&rootfs.bo_name[size + 1], root_path);
1765 
1766         pn_free(&pn);
1767 
1768 errout:
1769         if (error) {
1770                 sv_free(svp);
1771                 nfs_async_stop(vfsp);
1772                 nfs_async_manager_stop(vfsp);
1773         }
1774 
1775         if (rtvp != NULL)
1776                 VN_RELE(rtvp);
1777 
1778         return (error);
1779 }
1780 
1781 /*
1782  * Initialization routine for VFS routines.  Should only be called once
1783  */
1784 int
1785 nfs3_vfsinit(void)
1786 {
1787         mutex_init(&nfs3_syncbusy, NULL, MUTEX_DEFAULT, NULL);
1788         return (0);
1789 }
1790 
1791 void
1792 nfs3_vfsfini(void)
1793 {
1794         mutex_destroy(&nfs3_syncbusy);
1795 }
1796 
1797 void
1798 nfs3_freevfs(vfs_t *vfsp)
1799 {
1800         mntinfo_t *mi;
1801         servinfo_t *svp;
1802 
1803         /* free up the resources */
1804         mi = VFTOMI(vfsp);
1805         svp = mi->mi_servers;
1806         mi->mi_servers = mi->mi_curr_serv = NULL;
1807         sv_free(svp);
1808 
1809         /*
1810          * By this time we should have already deleted the
1811          * mi kstats in the unmount code. If they are still around
1812          * somethings wrong
1813          */
1814         ASSERT(mi->mi_io_kstats == NULL);
1815         nfs_free_mi(mi);
1816 }