1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 1990, 2010, Oracle and/or its affiliates. All rights reserved.
  23  * Copyright (c) 2011 Bayard G. Bell. All rights reserved.
  24  * Copyright 2013 Joyent, Inc. All rights reserved.
  25  */
  26 
  27 /*
  28  *      Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T
  29  *              All rights reserved.
  30  */
  31 
  32 #include <sys/errno.h>
  33 #include <sys/param.h>
  34 #include <sys/types.h>
  35 #include <sys/user.h>
  36 #include <sys/stat.h>
  37 #include <sys/time.h>
  38 #include <sys/utsname.h>
  39 #include <sys/vfs.h>
  40 #include <sys/vfs_opreg.h>
  41 #include <sys/vnode.h>
  42 #include <sys/pathname.h>
  43 #include <sys/bootconf.h>
  44 #include <fs/fs_subr.h>
  45 #include <rpc/types.h>
  46 #include <nfs/nfs.h>
  47 #include <nfs/nfs4.h>
  48 #include <nfs/nfs_clnt.h>
  49 #include <nfs/rnode.h>
  50 #include <nfs/mount.h>
  51 #include <nfs/nfssys.h>
  52 #include <sys/debug.h>
  53 #include <sys/cmn_err.h>
  54 #include <sys/file.h>
  55 #include <sys/fcntl.h>
  56 #include <sys/zone.h>
  57 
  58 /*
  59  * This is the loadable module wrapper.
  60  */
  61 #include <sys/systm.h>
  62 #include <sys/modctl.h>
  63 #include <sys/syscall.h>
  64 #include <sys/ddi.h>
  65 
  66 #include <rpc/types.h>
  67 #include <rpc/auth.h>
  68 #include <rpc/clnt.h>
  69 #include <rpc/svc.h>
  70 
  71 /*
  72  * The pseudo NFS filesystem to allow diskless booting to dynamically
  73  * mount either a NFS V2, NFS V3, or NFS V4 filesystem.  This only implements
  74  * the VFS_MOUNTROOT op and is only intended to be used by the
  75  * diskless booting code until the real root filesystem is mounted.
  76  * Nothing else should ever call this!
  77  *
  78  * The strategy is that if the initial rootfs type is set to "nfsdyn"
  79  * by loadrootmodules() this filesystem is called to mount the
  80  * root filesystem.  It first attempts to mount a V4 filesystem, and if that
  81  * fails due to an RPC version mismatch it tries V3 and finally V2.
  82  * Once the real mount succeeds the vfsops and rootfs name are changed
  83  * to reflect the real filesystem type.
  84  */
  85 static int nfsdyninit(int, char *);
  86 static int nfsdyn_mountroot(vfs_t *, whymountroot_t);
  87 
  88 vfsops_t *nfsdyn_vfsops;
  89 
  90 /*
  91  * The following data structures are used to configure the NFS
  92  * system call, the NFS Version 2 client VFS, and the NFS Version
  93  * 3 client VFS into the system.  The NFS Version 4 structures are defined in
  94  * nfs4_common.c
  95  */
  96 
  97 /*
  98  * The NFS system call.
  99  */
 100 static struct sysent nfssysent = {
 101         2,
 102         SE_32RVAL1 | SE_ARGC | SE_NOUNLOAD,
 103         nfssys
 104 };
 105 
 106 static struct modlsys modlsys = {
 107         &mod_syscallops,
 108         "NFS syscall, client, and common",
 109         &nfssysent
 110 };
 111 
 112 #ifdef _SYSCALL32_IMPL
 113 static struct modlsys modlsys32 = {
 114         &mod_syscallops32,
 115         "NFS syscall, client, and common (32-bit)",
 116         &nfssysent
 117 };
 118 #endif /* _SYSCALL32_IMPL */
 119 
 120 /*
 121  * The NFS Dynamic client VFS.
 122  */
 123 static vfsdef_t vfw = {
 124         VFSDEF_VERSION,
 125         "nfsdyn",
 126         nfsdyninit,
 127         0,
 128         NULL
 129 };
 130 
 131 static struct modlfs modlfs = {
 132         &mod_fsops,
 133         "network filesystem",
 134         &vfw
 135 };
 136 
 137 /*
 138  * The NFS Version 2 client VFS.
 139  */
 140 static vfsdef_t vfw2 = {
 141         VFSDEF_VERSION,
 142         "nfs",
 143         nfsinit,
 144         VSW_CANREMOUNT|VSW_NOTZONESAFE|VSW_STATS,
 145         NULL
 146 };
 147 
 148 static struct modlfs modlfs2 = {
 149         &mod_fsops,
 150         "network filesystem version 2",
 151         &vfw2
 152 };
 153 
 154 /*
 155  * The NFS Version 3 client VFS.
 156  */
 157 static vfsdef_t vfw3 = {
 158         VFSDEF_VERSION,
 159         "nfs3",
 160         nfs3init,
 161         VSW_CANREMOUNT|VSW_NOTZONESAFE|VSW_STATS,
 162         NULL
 163 };
 164 
 165 static struct modlfs modlfs3 = {
 166         &mod_fsops,
 167         "network filesystem version 3",
 168         &vfw3
 169 };
 170 
 171 extern struct modlfs modlfs4;
 172 
 173 /*
 174  * We have too many linkage structures so we define our own XXX
 175  */
 176 struct modlinkage_big {
 177         int             ml_rev;         /* rev of loadable modules system */
 178         void            *ml_linkage[7]; /* NULL terminated list of */
 179                                         /* linkage structures */
 180 };
 181 
 182 /*
 183  * All of the module configuration linkages required to configure
 184  * the system call and client VFS's into the system.
 185  */
 186 static struct modlinkage_big modlinkage = {
 187         MODREV_1,
 188         {   &modlsys,
 189 #ifdef _SYSCALL32_IMPL
 190             &modlsys32,
 191 #endif
 192             &modlfs,
 193             &modlfs2,
 194             &modlfs3,
 195             &modlfs4,
 196             NULL
 197         }
 198 };
 199 
 200 /*
 201  * This routine is invoked automatically when the kernel module
 202  * containing this routine is loaded.  This allows module specific
 203  * initialization to be done when the module is loaded.
 204  */
 205 int
 206 _init(void)
 207 {
 208         int status;
 209 
 210         if ((status = nfs_clntinit()) != 0) {
 211                 cmn_err(CE_WARN, "_init: nfs_clntinit failed");
 212                 return (status);
 213         }
 214 
 215         /*
 216          * Create the version specific kstats.
 217          *
 218          * PSARC 2001/697 Contract Private Interface
 219          * All nfs kstats are under SunMC contract
 220          * Please refer to the PSARC listed above and contact
 221          * SunMC before making any changes!
 222          *
 223          * Changes must be reviewed by Solaris File Sharing
 224          * Changes must be communicated to contract-2001-697@sun.com
 225          *
 226          */
 227 
 228         zone_key_create(&nfsstat_zone_key, nfsstat_zone_init, NULL,
 229             nfsstat_zone_fini);
 230         status = mod_install((struct modlinkage *)&modlinkage);
 231 
 232         if (status)  {
 233                 (void) zone_key_delete(nfsstat_zone_key);
 234 
 235                 /*
 236                  * Failed to install module, cleanup previous
 237                  * initialization work.
 238                  */
 239                 nfs_clntfini();
 240 
 241                 /*
 242                  * Clean up work performed indirectly by mod_installfs()
 243                  * as a result of our call to mod_install().
 244                  */
 245                 nfs4fini();
 246                 nfs3fini();
 247                 nfsfini();
 248         }
 249         return (status);
 250 }
 251 
 252 int
 253 _fini(void)
 254 {
 255         /* Don't allow module to be unloaded */
 256         return (EBUSY);
 257 }
 258 
 259 int
 260 _info(struct modinfo *modinfop)
 261 {
 262         return (mod_info((struct modlinkage *)&modlinkage, modinfop));
 263 }
 264 
 265 /*
 266  * General utilities
 267  */
 268 
 269 /*
 270  * Returns the preferred transfer size in bytes based on
 271  * what network interfaces are available.
 272  */
 273 int
 274 nfstsize(void)
 275 {
 276         /*
 277          * For the moment, just return NFS_MAXDATA until we can query the
 278          * appropriate transport.
 279          */
 280         return (NFS_MAXDATA);
 281 }
 282 
 283 /*
 284  * Returns the preferred transfer size in bytes based on
 285  * what network interfaces are available.
 286  */
 287 
 288 /* this should reflect the largest transfer size possible */
 289 static int nfs3_max_transfer_size = 1024 * 1024;
 290 
 291 int
 292 nfs3tsize(void)
 293 {
 294         /*
 295          * For the moment, just return nfs3_max_transfer_size until we
 296          * can query the appropriate transport.
 297          */
 298         return (nfs3_max_transfer_size);
 299 }
 300 
 301 static uint_t nfs3_max_transfer_size_clts = 32 * 1024;
 302 static uint_t nfs3_max_transfer_size_cots = 1024 * 1024;
 303 static uint_t nfs3_max_transfer_size_rdma = 1024 * 1024;
 304 
 305 uint_t
 306 nfs3_tsize(struct knetconfig *knp)
 307 {
 308 
 309         if (knp->knc_semantics == NC_TPI_COTS_ORD ||
 310             knp->knc_semantics == NC_TPI_COTS)
 311                 return (nfs3_max_transfer_size_cots);
 312         if (knp->knc_semantics == NC_TPI_RDMA)
 313                 return (nfs3_max_transfer_size_rdma);
 314         return (nfs3_max_transfer_size_clts);
 315 }
 316 
 317 uint_t
 318 rfs3_tsize(struct svc_req *req)
 319 {
 320 
 321         if (req->rq_xprt->xp_type == T_COTS_ORD ||
 322             req->rq_xprt->xp_type == T_COTS)
 323                 return (nfs3_max_transfer_size_cots);
 324         if (req->rq_xprt->xp_type == T_RDMA)
 325                 return (nfs3_max_transfer_size_rdma);
 326         return (nfs3_max_transfer_size_clts);
 327 }
 328 
 329 /* ARGSUSED */
 330 static int
 331 nfsdyninit(int fstyp, char *name)
 332 {
 333         static const fs_operation_def_t nfsdyn_vfsops_template[] = {
 334                 { VFSNAME_MOUNTROOT, { .vfs_mountroot = nfsdyn_mountroot } },
 335                 { NULL,                 { NULL } }
 336         };
 337         int error;
 338 
 339         error = vfs_setfsops(fstyp, nfsdyn_vfsops_template, &nfsdyn_vfsops);
 340         if (error != 0)
 341                 return (error);
 342 
 343         return (0);
 344 }
 345 
 346 /* ARGSUSED */
 347 static int
 348 nfsdyn_mountroot(vfs_t *vfsp, whymountroot_t why)
 349 {
 350         char root_hostname[SYS_NMLN+1];
 351         struct servinfo *svp;
 352         int error;
 353         int vfsflags;
 354         char *root_path;
 355         struct pathname pn;
 356         char *name;
 357         static char token[10];
 358         struct nfs_args args;           /* nfs mount arguments */
 359 
 360         bzero(&args, sizeof (args));
 361 
 362         /* do this BEFORE getfile which causes xid stamps to be initialized */
 363         clkset(-1L);            /* hack for now - until we get time svc? */
 364 
 365         if (why == ROOT_REMOUNT) {
 366                 /*
 367                  * Shouldn't happen.
 368                  */
 369                 panic("nfs3_mountroot: why == ROOT_REMOUNT\n");
 370         }
 371 
 372         if (why == ROOT_UNMOUNT) {
 373                 /*
 374                  * Nothing to do for NFS.
 375                  */
 376                 return (0);
 377         }
 378 
 379         /*
 380          * why == ROOT_INIT
 381          */
 382 
 383         name = token;
 384         *name = 0;
 385         getfsname("root", name, sizeof (token));
 386 
 387         pn_alloc(&pn);
 388         root_path = pn.pn_path;
 389 
 390         svp = kmem_zalloc(sizeof (*svp), KM_SLEEP);
 391         mutex_init(&svp->sv_lock, NULL, MUTEX_DEFAULT, NULL);
 392         svp->sv_knconf = kmem_zalloc(sizeof (*svp->sv_knconf), KM_SLEEP);
 393         svp->sv_knconf->knc_protofmly = kmem_alloc(KNC_STRSIZE, KM_SLEEP);
 394         svp->sv_knconf->knc_proto = kmem_alloc(KNC_STRSIZE, KM_SLEEP);
 395 
 396         /*
 397          * First try version 4
 398          */
 399         vfs_setops(vfsp, nfs4_vfsops);
 400         args.addr = &svp->sv_addr;
 401         args.fh = (char *)&svp->sv_fhandle;
 402         args.knconf = svp->sv_knconf;
 403         args.hostname = root_hostname;
 404         vfsflags = 0;
 405 
 406         if (error = mount_root(*name ? name : "root", root_path, NFS_V4,
 407             &args, &vfsflags)) {
 408                 if (error != EPROTONOSUPPORT) {
 409                         nfs_cmn_err(error, CE_WARN,
 410                             "Unable to mount NFS root filesystem: %m");
 411                         sv_free(svp);
 412                         pn_free(&pn);
 413                         vfs_setops(vfsp, nfsdyn_vfsops);
 414                         return (error);
 415                 }
 416 
 417                 /*
 418                  * Then try version 3
 419                  */
 420                 bzero(&args, sizeof (args));
 421                 vfs_setops(vfsp, nfs3_vfsops);
 422                 args.addr = &svp->sv_addr;
 423                 args.fh = (char *)&svp->sv_fhandle;
 424                 args.knconf = svp->sv_knconf;
 425                 args.hostname = root_hostname;
 426                 vfsflags = 0;
 427 
 428                 if (error = mount_root(*name ? name : "root", root_path,
 429                     NFS_V3, &args, &vfsflags)) {
 430                         if (error != EPROTONOSUPPORT) {
 431                                 nfs_cmn_err(error, CE_WARN,
 432                                     "Unable to mount NFS root filesystem: %m");
 433                                 sv_free(svp);
 434                                 pn_free(&pn);
 435                                 vfs_setops(vfsp, nfsdyn_vfsops);
 436                                 return (error);
 437                         }
 438 
 439                         /*
 440                          * Finally, try version 2
 441                          */
 442                         bzero(&args, sizeof (args));
 443                         args.addr = &svp->sv_addr;
 444                         args.fh = (char *)&svp->sv_fhandle.fh_buf;
 445                         args.knconf = svp->sv_knconf;
 446                         args.hostname = root_hostname;
 447                         vfsflags = 0;
 448 
 449                         vfs_setops(vfsp, nfs_vfsops);
 450 
 451                         if (error = mount_root(*name ? name : "root",
 452                             root_path, NFS_VERSION, &args, &vfsflags)) {
 453                                 nfs_cmn_err(error, CE_WARN,
 454                                     "Unable to mount NFS root filesystem: %m");
 455                                 sv_free(svp);
 456                                 pn_free(&pn);
 457                                 vfs_setops(vfsp, nfsdyn_vfsops);
 458                                 return (error);
 459                         }
 460                 }
 461         }
 462 
 463         sv_free(svp);
 464         pn_free(&pn);
 465         return (VFS_MOUNTROOT(vfsp, why));
 466 }
 467 
 468 int
 469 nfs_setopts(vnode_t *vp, model_t model, struct nfs_args *buf)
 470 {
 471         mntinfo_t *mi;                  /* mount info, pointed at by vfs */
 472         STRUCT_HANDLE(nfs_args, args);
 473         int flags;
 474 
 475 #ifdef lint
 476         model = model;
 477 #endif
 478 
 479         STRUCT_SET_HANDLE(args, model, buf);
 480 
 481         flags = STRUCT_FGET(args, flags);
 482 
 483         /*
 484          * Set option fields in mount info record
 485          */
 486         mi = VTOMI(vp);
 487 
 488         if (flags & NFSMNT_NOAC) {
 489                 mi->mi_flags |= MI_NOAC;
 490                 PURGE_ATTRCACHE(vp);
 491         }
 492         if (flags & NFSMNT_NOCTO)
 493                 mi->mi_flags |= MI_NOCTO;
 494         if (flags & NFSMNT_LLOCK)
 495                 mi->mi_flags |= MI_LLOCK;
 496         if (flags & NFSMNT_GRPID)
 497                 mi->mi_flags |= MI_GRPID;
 498         if (flags & NFSMNT_RETRANS) {
 499                 if (STRUCT_FGET(args, retrans) < 0)
 500                         return (EINVAL);
 501                 mi->mi_retrans = STRUCT_FGET(args, retrans);
 502         }
 503         if (flags & NFSMNT_TIMEO) {
 504                 if (STRUCT_FGET(args, timeo) <= 0)
 505                         return (EINVAL);
 506                 mi->mi_timeo = STRUCT_FGET(args, timeo);
 507                 /*
 508                  * The following scales the standard deviation and
 509                  * and current retransmission timer to match the
 510                  * initial value for the timeout specified.
 511                  */
 512                 mi->mi_timers[NFS_CALLTYPES].rt_deviate =
 513                     (mi->mi_timeo * hz * 2) / 5;
 514                 mi->mi_timers[NFS_CALLTYPES].rt_rtxcur =
 515                     mi->mi_timeo * hz / 10;
 516         }
 517         if (flags & NFSMNT_RSIZE) {
 518                 if (STRUCT_FGET(args, rsize) <= 0)
 519                         return (EINVAL);
 520                 mi->mi_tsize = MIN(mi->mi_tsize, STRUCT_FGET(args, rsize));
 521                 mi->mi_curread = MIN(mi->mi_curread, mi->mi_tsize);
 522         }
 523         if (flags & NFSMNT_WSIZE) {
 524                 if (STRUCT_FGET(args, wsize) <= 0)
 525                         return (EINVAL);
 526                 mi->mi_stsize = MIN(mi->mi_stsize, STRUCT_FGET(args, wsize));
 527                 mi->mi_curwrite = MIN(mi->mi_curwrite, mi->mi_stsize);
 528         }
 529         if (flags & NFSMNT_ACREGMIN) {
 530                 if (STRUCT_FGET(args, acregmin) < 0)
 531                         mi->mi_acregmin = ACMINMAX;
 532                 else
 533                         mi->mi_acregmin = MIN(STRUCT_FGET(args, acregmin),
 534                             ACMINMAX);
 535                 mi->mi_acregmin = SEC2HR(mi->mi_acregmin);
 536         }
 537         if (flags & NFSMNT_ACREGMAX) {
 538                 if (STRUCT_FGET(args, acregmax) < 0)
 539                         mi->mi_acregmax = ACMAXMAX;
 540                 else
 541                         mi->mi_acregmax = MIN(STRUCT_FGET(args, acregmax),
 542                             ACMAXMAX);
 543                 mi->mi_acregmax = SEC2HR(mi->mi_acregmax);
 544         }
 545         if (flags & NFSMNT_ACDIRMIN) {
 546                 if (STRUCT_FGET(args, acdirmin) < 0)
 547                         mi->mi_acdirmin = ACMINMAX;
 548                 else
 549                         mi->mi_acdirmin = MIN(STRUCT_FGET(args, acdirmin),
 550                             ACMINMAX);
 551                 mi->mi_acdirmin = SEC2HR(mi->mi_acdirmin);
 552         }
 553         if (flags & NFSMNT_ACDIRMAX) {
 554                 if (STRUCT_FGET(args, acdirmax) < 0)
 555                         mi->mi_acdirmax = ACMAXMAX;
 556                 else
 557                         mi->mi_acdirmax = MIN(STRUCT_FGET(args, acdirmax),
 558                             ACMAXMAX);
 559                 mi->mi_acdirmax = SEC2HR(mi->mi_acdirmax);
 560         }
 561 
 562         if (flags & NFSMNT_LOOPBACK)
 563                 mi->mi_flags |= MI_LOOPBACK;
 564 
 565         return (0);
 566 }
 567 
 568 /*
 569  * Set or Clear direct I/O flag
 570  * VOP_RWLOCK() is held for write access to prevent a race condition
 571  * which would occur if a process is in the middle of a write when
 572  * directio flag gets set. It is possible that all pages may not get flushed.
 573  */
 574 
 575 /* ARGSUSED */
 576 int
 577 nfs_directio(vnode_t *vp, int cmd, cred_t *cr)
 578 {
 579         int     error = 0;
 580         rnode_t *rp;
 581 
 582         rp = VTOR(vp);
 583 
 584         if (cmd == DIRECTIO_ON) {
 585 
 586                 if (rp->r_flags & RDIRECTIO)
 587                         return (0);
 588 
 589                 /*
 590                  * Flush the page cache.
 591                  */
 592 
 593                 (void) VOP_RWLOCK(vp, V_WRITELOCK_TRUE, NULL);
 594 
 595                 if (rp->r_flags & RDIRECTIO) {
 596                         VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, NULL);
 597                         return (0);
 598                 }
 599 
 600                 if (vn_has_cached_data(vp) &&
 601                     ((rp->r_flags & RDIRTY) || rp->r_awcount > 0)) {
 602                         error = VOP_PUTPAGE(vp, (offset_t)0, (uint_t)0,
 603                             B_INVAL, cr, NULL);
 604                         if (error) {
 605                                 if (error == ENOSPC || error == EDQUOT) {
 606                                         mutex_enter(&rp->r_statelock);
 607                                         if (!rp->r_error)
 608                                                 rp->r_error = error;
 609                                         mutex_exit(&rp->r_statelock);
 610                                 }
 611                                 VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, NULL);
 612                                 return (error);
 613                         }
 614                 }
 615 
 616                 mutex_enter(&rp->r_statelock);
 617                 rp->r_flags |= RDIRECTIO;
 618                 mutex_exit(&rp->r_statelock);
 619                 VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, NULL);
 620                 return (0);
 621         }
 622 
 623         if (cmd == DIRECTIO_OFF) {
 624                 mutex_enter(&rp->r_statelock);
 625                 rp->r_flags &= ~RDIRECTIO;       /* disable direct mode */
 626                 mutex_exit(&rp->r_statelock);
 627                 return (0);
 628         }
 629 
 630         return (EINVAL);
 631 }