1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 1990, 2010, Oracle and/or its affiliates. All rights reserved.
  23  * Copyright (c) 2011 Bayard G. Bell. All rights reserved.
  24  * Copyright 2013 Joyent, Inc. All rights reserved.
  25  */
  26 
  27 /*
  28  *      Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T
  29  *              All rights reserved.
  30  */
  31 
  32 #include <sys/errno.h>
  33 #include <sys/param.h>
  34 #include <sys/types.h>
  35 #include <sys/user.h>
  36 #include <sys/stat.h>
  37 #include <sys/time.h>
  38 #include <sys/utsname.h>
  39 #include <sys/vfs.h>
  40 #include <sys/vfs_opreg.h>
  41 #include <sys/vnode.h>
  42 #include <sys/pathname.h>
  43 #include <sys/bootconf.h>
  44 #include <fs/fs_subr.h>
  45 #include <rpc/types.h>
  46 #include <nfs/nfs.h>
  47 #include <nfs/nfs4.h>
  48 #include <nfs/nfs_clnt.h>
  49 #include <nfs/rnode.h>
  50 #include <nfs/mount.h>
  51 #include <nfs/nfssys.h>
  52 #include <sys/debug.h>
  53 #include <sys/cmn_err.h>
  54 #include <sys/file.h>
  55 #include <sys/fcntl.h>
  56 #include <sys/zone.h>
  57 
  58 /*
  59  * This is the loadable module wrapper.
  60  */
  61 #include <sys/systm.h>
  62 #include <sys/modctl.h>
  63 #include <sys/syscall.h>
  64 #include <sys/ddi.h>
  65 
  66 #include <rpc/types.h>
  67 #include <rpc/auth.h>
  68 #include <rpc/clnt.h>
  69 #include <rpc/svc.h>
  70 
  71 /*
  72  * The pseudo NFS filesystem to allow diskless booting to dynamically
  73  * mount either a NFS V2, NFS V3, or NFS V4 filesystem.  This only implements
  74  * the VFS_MOUNTROOT op and is only intended to be used by the
  75  * diskless booting code until the real root filesystem is mounted.
  76  * Nothing else should ever call this!
  77  *
  78  * The strategy is that if the initial rootfs type is set to "nfsdyn"
  79  * by loadrootmodules() this filesystem is called to mount the
  80  * root filesystem.  It first attempts to mount a V4 filesystem, and if that
  81  * fails due to an RPC version mismatch it tries V3 and finally V2.
  82  * Once the real mount succeeds the vfsops and rootfs name are changed
  83  * to reflect the real filesystem type.
  84  */
  85 static int nfsdyninit(int, char *);
  86 static int nfsdyn_mountroot(vfs_t *, whymountroot_t);
  87 
  88 vfsops_t *nfsdyn_vfsops;
  89 
  90 /*
  91  * The following data structures are used to configure the NFS
  92  * system call, the NFS Version 2 client VFS, and the NFS Version
  93  * 3 client VFS into the system.  The NFS Version 4 structures are defined in
  94  * nfs4_common.c
  95  */
  96 
  97 /*
  98  * The NFS system call.
  99  */
 100 static struct sysent nfssysent = {
 101         2,
 102         SE_32RVAL1 | SE_ARGC | SE_NOUNLOAD,
 103         nfssys
 104 };
 105 
 106 static struct modlsys modlsys = {
 107         &mod_syscallops,
 108         "NFS syscall, client, and common",
 109         &nfssysent
 110 };
 111 
 112 #ifdef _SYSCALL32_IMPL
 113 static struct modlsys modlsys32 = {
 114         &mod_syscallops32,
 115         "NFS syscall, client, and common (32-bit)",
 116         &nfssysent
 117 };
 118 #endif /* _SYSCALL32_IMPL */
 119 
 120 /*
 121  * The NFS Dynamic client VFS.
 122  */
 123 static vfsdef_t vfw = {
 124         VFSDEF_VERSION,
 125         "nfsdyn",
 126         nfsdyninit,
 127         0,
 128         NULL
 129 };
 130 
 131 static struct modlfs modlfs = {
 132         &mod_fsops,
 133         "network filesystem",
 134         &vfw
 135 };
 136 
 137 /*
 138  * The NFS Version 2 client VFS.
 139  */
 140 static vfsdef_t vfw2 = {
 141         VFSDEF_VERSION,
 142         "nfs",
 143         nfsinit,
 144         VSW_CANREMOUNT|VSW_NOTZONESAFE|VSW_STATS,
 145         NULL
 146 };
 147 
 148 static struct modlfs modlfs2 = {
 149         &mod_fsops,
 150         "network filesystem version 2",
 151         &vfw2
 152 };
 153 
 154 /*
 155  * The NFS Version 3 client VFS.
 156  */
 157 static vfsdef_t vfw3 = {
 158         VFSDEF_VERSION,
 159         "nfs3",
 160         nfs3init,
 161         VSW_CANREMOUNT|VSW_NOTZONESAFE|VSW_STATS,
 162         NULL
 163 };
 164 
 165 static struct modlfs modlfs3 = {
 166         &mod_fsops,
 167         "network filesystem version 3",
 168         &vfw3
 169 };
 170 
 171 extern struct modlfs modlfs4;
 172 
 173 /*
 174  * We have too many linkage structures so we define our own XXX
 175  */
 176 struct modlinkage_big {
 177         int             ml_rev;         /* rev of loadable modules system */
 178         void            *ml_linkage[7]; /* NULL terminated list of */
 179                                         /* linkage structures */
 180 };
 181 
 182 /*
 183  * All of the module configuration linkages required to configure
 184  * the system call and client VFS's into the system.
 185  */
 186 static struct modlinkage_big modlinkage = {
 187         MODREV_1,
 188         &modlsys,
 189 #ifdef _SYSCALL32_IMPL
 190         &modlsys32,
 191 #endif
 192         &modlfs,
 193         &modlfs2,
 194         &modlfs3,
 195         &modlfs4,
 196         NULL
 197 };
 198 
 199 /*
 200  * This routine is invoked automatically when the kernel module
 201  * containing this routine is loaded.  This allows module specific
 202  * initialization to be done when the module is loaded.
 203  */
 204 int
 205 _init(void)
 206 {
 207         int status;
 208 
 209         if ((status = nfs_clntinit()) != 0) {
 210                 cmn_err(CE_WARN, "_init: nfs_clntinit failed");
 211                 return (status);
 212         }
 213 
 214         /*
 215          * Create the version specific kstats.
 216          *
 217          * PSARC 2001/697 Contract Private Interface
 218          * All nfs kstats are under SunMC contract
 219          * Please refer to the PSARC listed above and contact
 220          * SunMC before making any changes!
 221          *
 222          * Changes must be reviewed by Solaris File Sharing
 223          * Changes must be communicated to contract-2001-697@sun.com
 224          *
 225          */
 226 
 227         zone_key_create(&nfsstat_zone_key, nfsstat_zone_init, NULL,
 228             nfsstat_zone_fini);
 229         status = mod_install((struct modlinkage *)&modlinkage);
 230 
 231         if (status)  {
 232                 (void) zone_key_delete(nfsstat_zone_key);
 233 
 234                 /*
 235                  * Failed to install module, cleanup previous
 236                  * initialization work.
 237                  */
 238                 nfs_clntfini();
 239 
 240                 /*
 241                  * Clean up work performed indirectly by mod_installfs()
 242                  * as a result of our call to mod_install().
 243                  */
 244                 nfs4fini();
 245                 nfs3fini();
 246                 nfsfini();
 247         }
 248         return (status);
 249 }
 250 
 251 int
 252 _fini(void)
 253 {
 254         /* Don't allow module to be unloaded */
 255         return (EBUSY);
 256 }
 257 
 258 int
 259 _info(struct modinfo *modinfop)
 260 {
 261         return (mod_info((struct modlinkage *)&modlinkage, modinfop));
 262 }
 263 
 264 /*
 265  * General utilities
 266  */
 267 
 268 /*
 269  * Returns the preferred transfer size in bytes based on
 270  * what network interfaces are available.
 271  */
 272 int
 273 nfstsize(void)
 274 {
 275         /*
 276          * For the moment, just return NFS_MAXDATA until we can query the
 277          * appropriate transport.
 278          */
 279         return (NFS_MAXDATA);
 280 }
 281 
 282 /*
 283  * Returns the preferred transfer size in bytes based on
 284  * what network interfaces are available.
 285  */
 286 
 287 /* this should reflect the largest transfer size possible */
 288 static int nfs3_max_transfer_size = 1024 * 1024;
 289 
 290 int
 291 nfs3tsize(void)
 292 {
 293         /*
 294          * For the moment, just return nfs3_max_transfer_size until we
 295          * can query the appropriate transport.
 296          */
 297         return (nfs3_max_transfer_size);
 298 }
 299 
 300 static uint_t nfs3_max_transfer_size_clts = 32 * 1024;
 301 static uint_t nfs3_max_transfer_size_cots = 1024 * 1024;
 302 static uint_t nfs3_max_transfer_size_rdma = 1024 * 1024;
 303 
 304 uint_t
 305 nfs3_tsize(struct knetconfig *knp)
 306 {
 307 
 308         if (knp->knc_semantics == NC_TPI_COTS_ORD ||
 309             knp->knc_semantics == NC_TPI_COTS)
 310                 return (nfs3_max_transfer_size_cots);
 311         if (knp->knc_semantics == NC_TPI_RDMA)
 312                 return (nfs3_max_transfer_size_rdma);
 313         return (nfs3_max_transfer_size_clts);
 314 }
 315 
 316 uint_t
 317 rfs3_tsize(struct svc_req *req)
 318 {
 319 
 320         if (req->rq_xprt->xp_type == T_COTS_ORD ||
 321             req->rq_xprt->xp_type == T_COTS)
 322                 return (nfs3_max_transfer_size_cots);
 323         if (req->rq_xprt->xp_type == T_RDMA)
 324                 return (nfs3_max_transfer_size_rdma);
 325         return (nfs3_max_transfer_size_clts);
 326 }
 327 
 328 /* ARGSUSED */
 329 static int
 330 nfsdyninit(int fstyp, char *name)
 331 {
 332         static const fs_operation_def_t nfsdyn_vfsops_template[] = {
 333                 VFSNAME_MOUNTROOT, { .vfs_mountroot = nfsdyn_mountroot },
 334                 NULL, NULL
 335         };
 336         int error;
 337 
 338         error = vfs_setfsops(fstyp, nfsdyn_vfsops_template, &nfsdyn_vfsops);
 339         if (error != 0)
 340                 return (error);
 341 
 342         return (0);
 343 }
 344 
 345 /* ARGSUSED */
 346 static int
 347 nfsdyn_mountroot(vfs_t *vfsp, whymountroot_t why)
 348 {
 349         char root_hostname[SYS_NMLN+1];
 350         struct servinfo *svp;
 351         int error;
 352         int vfsflags;
 353         char *root_path;
 354         struct pathname pn;
 355         char *name;
 356         static char token[10];
 357         struct nfs_args args;           /* nfs mount arguments */
 358 
 359         bzero(&args, sizeof (args));
 360 
 361         /* do this BEFORE getfile which causes xid stamps to be initialized */
 362         clkset(-1L);            /* hack for now - until we get time svc? */
 363 
 364         if (why == ROOT_REMOUNT) {
 365                 /*
 366                  * Shouldn't happen.
 367                  */
 368                 panic("nfs3_mountroot: why == ROOT_REMOUNT\n");
 369         }
 370 
 371         if (why == ROOT_UNMOUNT) {
 372                 /*
 373                  * Nothing to do for NFS.
 374                  */
 375                 return (0);
 376         }
 377 
 378         /*
 379          * why == ROOT_INIT
 380          */
 381 
 382         name = token;
 383         *name = 0;
 384         getfsname("root", name, sizeof (token));
 385 
 386         pn_alloc(&pn);
 387         root_path = pn.pn_path;
 388 
 389         svp = kmem_zalloc(sizeof (*svp), KM_SLEEP);
 390         mutex_init(&svp->sv_lock, NULL, MUTEX_DEFAULT, NULL);
 391         svp->sv_knconf = kmem_zalloc(sizeof (*svp->sv_knconf), KM_SLEEP);
 392         svp->sv_knconf->knc_protofmly = kmem_alloc(KNC_STRSIZE, KM_SLEEP);
 393         svp->sv_knconf->knc_proto = kmem_alloc(KNC_STRSIZE, KM_SLEEP);
 394 
 395         /*
 396          * First try version 4
 397          */
 398         vfs_setops(vfsp, nfs4_vfsops);
 399         args.addr = &svp->sv_addr;
 400         args.fh = (char *)&svp->sv_fhandle;
 401         args.knconf = svp->sv_knconf;
 402         args.hostname = root_hostname;
 403         vfsflags = 0;
 404 
 405         if (error = mount_root(*name ? name : "root", root_path, NFS_V4,
 406             &args, &vfsflags)) {
 407                 if (error != EPROTONOSUPPORT) {
 408                         nfs_cmn_err(error, CE_WARN,
 409                             "Unable to mount NFS root filesystem: %m");
 410                         sv_free(svp);
 411                         pn_free(&pn);
 412                         vfs_setops(vfsp, nfsdyn_vfsops);
 413                         return (error);
 414                 }
 415 
 416                 /*
 417                  * Then try version 3
 418                  */
 419                 bzero(&args, sizeof (args));
 420                 vfs_setops(vfsp, nfs3_vfsops);
 421                 args.addr = &svp->sv_addr;
 422                 args.fh = (char *)&svp->sv_fhandle;
 423                 args.knconf = svp->sv_knconf;
 424                 args.hostname = root_hostname;
 425                 vfsflags = 0;
 426 
 427                 if (error = mount_root(*name ? name : "root", root_path,
 428                     NFS_V3, &args, &vfsflags)) {
 429                         if (error != EPROTONOSUPPORT) {
 430                                 nfs_cmn_err(error, CE_WARN,
 431                                     "Unable to mount NFS root filesystem: %m");
 432                                 sv_free(svp);
 433                                 pn_free(&pn);
 434                                 vfs_setops(vfsp, nfsdyn_vfsops);
 435                                 return (error);
 436                         }
 437 
 438                         /*
 439                          * Finally, try version 2
 440                          */
 441                         bzero(&args, sizeof (args));
 442                         args.addr = &svp->sv_addr;
 443                         args.fh = (char *)&svp->sv_fhandle.fh_buf;
 444                         args.knconf = svp->sv_knconf;
 445                         args.hostname = root_hostname;
 446                         vfsflags = 0;
 447 
 448                         vfs_setops(vfsp, nfs_vfsops);
 449 
 450                         if (error = mount_root(*name ? name : "root",
 451                             root_path, NFS_VERSION, &args, &vfsflags)) {
 452                                 nfs_cmn_err(error, CE_WARN,
 453                                     "Unable to mount NFS root filesystem: %m");
 454                                 sv_free(svp);
 455                                 pn_free(&pn);
 456                                 vfs_setops(vfsp, nfsdyn_vfsops);
 457                                 return (error);
 458                         }
 459                 }
 460         }
 461 
 462         sv_free(svp);
 463         pn_free(&pn);
 464         return (VFS_MOUNTROOT(vfsp, why));
 465 }
 466 
 467 int
 468 nfs_setopts(vnode_t *vp, model_t model, struct nfs_args *buf)
 469 {
 470         mntinfo_t *mi;                  /* mount info, pointed at by vfs */
 471         STRUCT_HANDLE(nfs_args, args);
 472         int flags;
 473 
 474 #ifdef lint
 475         model = model;
 476 #endif
 477 
 478         STRUCT_SET_HANDLE(args, model, buf);
 479 
 480         flags = STRUCT_FGET(args, flags);
 481 
 482         /*
 483          * Set option fields in mount info record
 484          */
 485         mi = VTOMI(vp);
 486 
 487         if (flags & NFSMNT_NOAC) {
 488                 mi->mi_flags |= MI_NOAC;
 489                 PURGE_ATTRCACHE(vp);
 490         }
 491         if (flags & NFSMNT_NOCTO)
 492                 mi->mi_flags |= MI_NOCTO;
 493         if (flags & NFSMNT_LLOCK)
 494                 mi->mi_flags |= MI_LLOCK;
 495         if (flags & NFSMNT_GRPID)
 496                 mi->mi_flags |= MI_GRPID;
 497         if (flags & NFSMNT_RETRANS) {
 498                 if (STRUCT_FGET(args, retrans) < 0)
 499                         return (EINVAL);
 500                 mi->mi_retrans = STRUCT_FGET(args, retrans);
 501         }
 502         if (flags & NFSMNT_TIMEO) {
 503                 if (STRUCT_FGET(args, timeo) <= 0)
 504                         return (EINVAL);
 505                 mi->mi_timeo = STRUCT_FGET(args, timeo);
 506                 /*
 507                  * The following scales the standard deviation and
 508                  * and current retransmission timer to match the
 509                  * initial value for the timeout specified.
 510                  */
 511                 mi->mi_timers[NFS_CALLTYPES].rt_deviate =
 512                     (mi->mi_timeo * hz * 2) / 5;
 513                 mi->mi_timers[NFS_CALLTYPES].rt_rtxcur =
 514                     mi->mi_timeo * hz / 10;
 515         }
 516         if (flags & NFSMNT_RSIZE) {
 517                 if (STRUCT_FGET(args, rsize) <= 0)
 518                         return (EINVAL);
 519                 mi->mi_tsize = MIN(mi->mi_tsize, STRUCT_FGET(args, rsize));
 520                 mi->mi_curread = MIN(mi->mi_curread, mi->mi_tsize);
 521         }
 522         if (flags & NFSMNT_WSIZE) {
 523                 if (STRUCT_FGET(args, wsize) <= 0)
 524                         return (EINVAL);
 525                 mi->mi_stsize = MIN(mi->mi_stsize, STRUCT_FGET(args, wsize));
 526                 mi->mi_curwrite = MIN(mi->mi_curwrite, mi->mi_stsize);
 527         }
 528         if (flags & NFSMNT_ACREGMIN) {
 529                 if (STRUCT_FGET(args, acregmin) < 0)
 530                         mi->mi_acregmin = ACMINMAX;
 531                 else
 532                         mi->mi_acregmin = MIN(STRUCT_FGET(args, acregmin),
 533                             ACMINMAX);
 534                 mi->mi_acregmin = SEC2HR(mi->mi_acregmin);
 535         }
 536         if (flags & NFSMNT_ACREGMAX) {
 537                 if (STRUCT_FGET(args, acregmax) < 0)
 538                         mi->mi_acregmax = ACMAXMAX;
 539                 else
 540                         mi->mi_acregmax = MIN(STRUCT_FGET(args, acregmax),
 541                             ACMAXMAX);
 542                 mi->mi_acregmax = SEC2HR(mi->mi_acregmax);
 543         }
 544         if (flags & NFSMNT_ACDIRMIN) {
 545                 if (STRUCT_FGET(args, acdirmin) < 0)
 546                         mi->mi_acdirmin = ACMINMAX;
 547                 else
 548                         mi->mi_acdirmin = MIN(STRUCT_FGET(args, acdirmin),
 549                             ACMINMAX);
 550                 mi->mi_acdirmin = SEC2HR(mi->mi_acdirmin);
 551         }
 552         if (flags & NFSMNT_ACDIRMAX) {
 553                 if (STRUCT_FGET(args, acdirmax) < 0)
 554                         mi->mi_acdirmax = ACMAXMAX;
 555                 else
 556                         mi->mi_acdirmax = MIN(STRUCT_FGET(args, acdirmax),
 557                             ACMAXMAX);
 558                 mi->mi_acdirmax = SEC2HR(mi->mi_acdirmax);
 559         }
 560 
 561         if (flags & NFSMNT_LOOPBACK)
 562                 mi->mi_flags |= MI_LOOPBACK;
 563 
 564         return (0);
 565 }
 566 
 567 /*
 568  * Set or Clear direct I/O flag
 569  * VOP_RWLOCK() is held for write access to prevent a race condition
 570  * which would occur if a process is in the middle of a write when
 571  * directio flag gets set. It is possible that all pages may not get flushed.
 572  */
 573 
 574 /* ARGSUSED */
 575 int
 576 nfs_directio(vnode_t *vp, int cmd, cred_t *cr)
 577 {
 578         int     error = 0;
 579         rnode_t *rp;
 580 
 581         rp = VTOR(vp);
 582 
 583         if (cmd == DIRECTIO_ON) {
 584 
 585                 if (rp->r_flags & RDIRECTIO)
 586                         return (0);
 587 
 588                 /*
 589                  * Flush the page cache.
 590                  */
 591 
 592                 (void) VOP_RWLOCK(vp, V_WRITELOCK_TRUE, NULL);
 593 
 594                 if (rp->r_flags & RDIRECTIO) {
 595                         VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, NULL);
 596                         return (0);
 597                 }
 598 
 599                 if (vn_has_cached_data(vp) &&
 600                     ((rp->r_flags & RDIRTY) || rp->r_awcount > 0)) {
 601                         error = VOP_PUTPAGE(vp, (offset_t)0, (uint_t)0,
 602                             B_INVAL, cr, NULL);
 603                         if (error) {
 604                                 if (error == ENOSPC || error == EDQUOT) {
 605                                         mutex_enter(&rp->r_statelock);
 606                                         if (!rp->r_error)
 607                                                 rp->r_error = error;
 608                                         mutex_exit(&rp->r_statelock);
 609                                 }
 610                                 VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, NULL);
 611                                 return (error);
 612                         }
 613                 }
 614 
 615                 mutex_enter(&rp->r_statelock);
 616                 rp->r_flags |= RDIRECTIO;
 617                 mutex_exit(&rp->r_statelock);
 618                 VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, NULL);
 619                 return (0);
 620         }
 621 
 622         if (cmd == DIRECTIO_OFF) {
 623                 mutex_enter(&rp->r_statelock);
 624                 rp->r_flags &= ~RDIRECTIO;       /* disable direct mode */
 625                 mutex_exit(&rp->r_statelock);
 626                 return (0);
 627         }
 628 
 629         return (EINVAL);
 630 }