1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 1990, 2010, Oracle and/or its affiliates. All rights reserved. 23 * Copyright (c) 2011 Bayard G. Bell. All rights reserved. 24 * Copyright 2013 Joyent, Inc. All rights reserved. 25 */ 26 27 /* 28 * Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T 29 * All rights reserved. 30 */ 31 32 #include <sys/errno.h> 33 #include <sys/param.h> 34 #include <sys/types.h> 35 #include <sys/user.h> 36 #include <sys/stat.h> 37 #include <sys/time.h> 38 #include <sys/utsname.h> 39 #include <sys/vfs.h> 40 #include <sys/vfs_opreg.h> 41 #include <sys/vnode.h> 42 #include <sys/pathname.h> 43 #include <sys/bootconf.h> 44 #include <fs/fs_subr.h> 45 #include <rpc/types.h> 46 #include <nfs/nfs.h> 47 #include <nfs/nfs4.h> 48 #include <nfs/nfs_clnt.h> 49 #include <nfs/rnode.h> 50 #include <nfs/mount.h> 51 #include <nfs/nfssys.h> 52 #include <sys/debug.h> 53 #include <sys/cmn_err.h> 54 #include <sys/file.h> 55 #include <sys/fcntl.h> 56 #include <sys/zone.h> 57 58 /* 59 * This is the loadable module wrapper. 60 */ 61 #include <sys/systm.h> 62 #include <sys/modctl.h> 63 #include <sys/syscall.h> 64 #include <sys/ddi.h> 65 66 #include <rpc/types.h> 67 #include <rpc/auth.h> 68 #include <rpc/clnt.h> 69 #include <rpc/svc.h> 70 71 /* 72 * The pseudo NFS filesystem to allow diskless booting to dynamically 73 * mount either a NFS V2, NFS V3, or NFS V4 filesystem. This only implements 74 * the VFS_MOUNTROOT op and is only intended to be used by the 75 * diskless booting code until the real root filesystem is mounted. 76 * Nothing else should ever call this! 77 * 78 * The strategy is that if the initial rootfs type is set to "nfsdyn" 79 * by loadrootmodules() this filesystem is called to mount the 80 * root filesystem. It first attempts to mount a V4 filesystem, and if that 81 * fails due to an RPC version mismatch it tries V3 and finally V2. 82 * Once the real mount succeeds the vfsops and rootfs name are changed 83 * to reflect the real filesystem type. 84 */ 85 static int nfsdyninit(int, char *); 86 static int nfsdyn_mountroot(vfs_t *, whymountroot_t); 87 88 vfsops_t *nfsdyn_vfsops; 89 90 /* 91 * The following data structures are used to configure the NFS 92 * system call, the NFS Version 2 client VFS, and the NFS Version 93 * 3 client VFS into the system. The NFS Version 4 structures are defined in 94 * nfs4_common.c 95 */ 96 97 /* 98 * The NFS system call. 99 */ 100 static struct sysent nfssysent = { 101 2, 102 SE_32RVAL1 | SE_ARGC | SE_NOUNLOAD, 103 nfssys 104 }; 105 106 static struct modlsys modlsys = { 107 &mod_syscallops, 108 "NFS syscall, client, and common", 109 &nfssysent 110 }; 111 112 #ifdef _SYSCALL32_IMPL 113 static struct modlsys modlsys32 = { 114 &mod_syscallops32, 115 "NFS syscall, client, and common (32-bit)", 116 &nfssysent 117 }; 118 #endif /* _SYSCALL32_IMPL */ 119 120 /* 121 * The NFS Dynamic client VFS. 122 */ 123 static vfsdef_t vfw = { 124 VFSDEF_VERSION, 125 "nfsdyn", 126 nfsdyninit, 127 0, 128 NULL 129 }; 130 131 static struct modlfs modlfs = { 132 &mod_fsops, 133 "network filesystem", 134 &vfw 135 }; 136 137 /* 138 * The NFS Version 2 client VFS. 139 */ 140 static vfsdef_t vfw2 = { 141 VFSDEF_VERSION, 142 "nfs", 143 nfsinit, 144 VSW_CANREMOUNT|VSW_NOTZONESAFE|VSW_STATS, 145 NULL 146 }; 147 148 static struct modlfs modlfs2 = { 149 &mod_fsops, 150 "network filesystem version 2", 151 &vfw2 152 }; 153 154 /* 155 * The NFS Version 3 client VFS. 156 */ 157 static vfsdef_t vfw3 = { 158 VFSDEF_VERSION, 159 "nfs3", 160 nfs3init, 161 VSW_CANREMOUNT|VSW_NOTZONESAFE|VSW_STATS, 162 NULL 163 }; 164 165 static struct modlfs modlfs3 = { 166 &mod_fsops, 167 "network filesystem version 3", 168 &vfw3 169 }; 170 171 extern struct modlfs modlfs4; 172 173 /* 174 * We have too many linkage structures so we define our own XXX 175 */ 176 struct modlinkage_big { 177 int ml_rev; /* rev of loadable modules system */ 178 void *ml_linkage[7]; /* NULL terminated list of */ 179 /* linkage structures */ 180 }; 181 182 /* 183 * All of the module configuration linkages required to configure 184 * the system call and client VFS's into the system. 185 */ 186 static struct modlinkage_big modlinkage = { 187 MODREV_1, 188 { &modlsys, 189 #ifdef _SYSCALL32_IMPL 190 &modlsys32, 191 #endif 192 &modlfs, 193 &modlfs2, 194 &modlfs3, 195 &modlfs4, 196 NULL 197 } 198 }; 199 200 /* 201 * This routine is invoked automatically when the kernel module 202 * containing this routine is loaded. This allows module specific 203 * initialization to be done when the module is loaded. 204 */ 205 int 206 _init(void) 207 { 208 int status; 209 210 if ((status = nfs_clntinit()) != 0) { 211 cmn_err(CE_WARN, "_init: nfs_clntinit failed"); 212 return (status); 213 } 214 215 /* 216 * Create the version specific kstats. 217 * 218 * PSARC 2001/697 Contract Private Interface 219 * All nfs kstats are under SunMC contract 220 * Please refer to the PSARC listed above and contact 221 * SunMC before making any changes! 222 * 223 * Changes must be reviewed by Solaris File Sharing 224 * Changes must be communicated to contract-2001-697@sun.com 225 * 226 */ 227 228 zone_key_create(&nfsstat_zone_key, nfsstat_zone_init, NULL, 229 nfsstat_zone_fini); 230 status = mod_install((struct modlinkage *)&modlinkage); 231 232 if (status) { 233 (void) zone_key_delete(nfsstat_zone_key); 234 235 /* 236 * Failed to install module, cleanup previous 237 * initialization work. 238 */ 239 nfs_clntfini(); 240 241 /* 242 * Clean up work performed indirectly by mod_installfs() 243 * as a result of our call to mod_install(). 244 */ 245 nfs4fini(); 246 nfs3fini(); 247 nfsfini(); 248 } 249 return (status); 250 } 251 252 int 253 _fini(void) 254 { 255 /* Don't allow module to be unloaded */ 256 return (EBUSY); 257 } 258 259 int 260 _info(struct modinfo *modinfop) 261 { 262 return (mod_info((struct modlinkage *)&modlinkage, modinfop)); 263 } 264 265 /* 266 * General utilities 267 */ 268 269 /* 270 * Returns the preferred transfer size in bytes based on 271 * what network interfaces are available. 272 */ 273 int 274 nfstsize(void) 275 { 276 /* 277 * For the moment, just return NFS_MAXDATA until we can query the 278 * appropriate transport. 279 */ 280 return (NFS_MAXDATA); 281 } 282 283 /* 284 * Returns the preferred transfer size in bytes based on 285 * what network interfaces are available. 286 */ 287 288 /* this should reflect the largest transfer size possible */ 289 static int nfs3_max_transfer_size = 1024 * 1024; 290 291 int 292 nfs3tsize(void) 293 { 294 /* 295 * For the moment, just return nfs3_max_transfer_size until we 296 * can query the appropriate transport. 297 */ 298 return (nfs3_max_transfer_size); 299 } 300 301 static uint_t nfs3_max_transfer_size_clts = 32 * 1024; 302 static uint_t nfs3_max_transfer_size_cots = 1024 * 1024; 303 static uint_t nfs3_max_transfer_size_rdma = 1024 * 1024; 304 305 uint_t 306 nfs3_tsize(struct knetconfig *knp) 307 { 308 309 if (knp->knc_semantics == NC_TPI_COTS_ORD || 310 knp->knc_semantics == NC_TPI_COTS) 311 return (nfs3_max_transfer_size_cots); 312 if (knp->knc_semantics == NC_TPI_RDMA) 313 return (nfs3_max_transfer_size_rdma); 314 return (nfs3_max_transfer_size_clts); 315 } 316 317 uint_t 318 rfs3_tsize(struct svc_req *req) 319 { 320 321 if (req->rq_xprt->xp_type == T_COTS_ORD || 322 req->rq_xprt->xp_type == T_COTS) 323 return (nfs3_max_transfer_size_cots); 324 if (req->rq_xprt->xp_type == T_RDMA) 325 return (nfs3_max_transfer_size_rdma); 326 return (nfs3_max_transfer_size_clts); 327 } 328 329 /* ARGSUSED */ 330 static int 331 nfsdyninit(int fstyp, char *name) 332 { 333 static const fs_operation_def_t nfsdyn_vfsops_template[] = { 334 { VFSNAME_MOUNTROOT, { .vfs_mountroot = nfsdyn_mountroot } }, 335 { NULL, { NULL } } 336 }; 337 int error; 338 339 error = vfs_setfsops(fstyp, nfsdyn_vfsops_template, &nfsdyn_vfsops); 340 if (error != 0) 341 return (error); 342 343 return (0); 344 } 345 346 /* ARGSUSED */ 347 static int 348 nfsdyn_mountroot(vfs_t *vfsp, whymountroot_t why) 349 { 350 char root_hostname[SYS_NMLN+1]; 351 struct servinfo *svp; 352 int error; 353 int vfsflags; 354 char *root_path; 355 struct pathname pn; 356 char *name; 357 static char token[10]; 358 struct nfs_args args; /* nfs mount arguments */ 359 360 bzero(&args, sizeof (args)); 361 362 /* do this BEFORE getfile which causes xid stamps to be initialized */ 363 clkset(-1L); /* hack for now - until we get time svc? */ 364 365 if (why == ROOT_REMOUNT) { 366 /* 367 * Shouldn't happen. 368 */ 369 panic("nfs3_mountroot: why == ROOT_REMOUNT\n"); 370 } 371 372 if (why == ROOT_UNMOUNT) { 373 /* 374 * Nothing to do for NFS. 375 */ 376 return (0); 377 } 378 379 /* 380 * why == ROOT_INIT 381 */ 382 383 name = token; 384 *name = 0; 385 getfsname("root", name, sizeof (token)); 386 387 pn_alloc(&pn); 388 root_path = pn.pn_path; 389 390 svp = kmem_zalloc(sizeof (*svp), KM_SLEEP); 391 mutex_init(&svp->sv_lock, NULL, MUTEX_DEFAULT, NULL); 392 svp->sv_knconf = kmem_zalloc(sizeof (*svp->sv_knconf), KM_SLEEP); 393 svp->sv_knconf->knc_protofmly = kmem_alloc(KNC_STRSIZE, KM_SLEEP); 394 svp->sv_knconf->knc_proto = kmem_alloc(KNC_STRSIZE, KM_SLEEP); 395 396 /* 397 * First try version 4 398 */ 399 vfs_setops(vfsp, nfs4_vfsops); 400 args.addr = &svp->sv_addr; 401 args.fh = (char *)&svp->sv_fhandle; 402 args.knconf = svp->sv_knconf; 403 args.hostname = root_hostname; 404 vfsflags = 0; 405 406 if (error = mount_root(*name ? name : "root", root_path, NFS_V4, 407 &args, &vfsflags)) { 408 if (error != EPROTONOSUPPORT) { 409 nfs_cmn_err(error, CE_WARN, 410 "Unable to mount NFS root filesystem: %m"); 411 sv_free(svp); 412 pn_free(&pn); 413 vfs_setops(vfsp, nfsdyn_vfsops); 414 return (error); 415 } 416 417 /* 418 * Then try version 3 419 */ 420 bzero(&args, sizeof (args)); 421 vfs_setops(vfsp, nfs3_vfsops); 422 args.addr = &svp->sv_addr; 423 args.fh = (char *)&svp->sv_fhandle; 424 args.knconf = svp->sv_knconf; 425 args.hostname = root_hostname; 426 vfsflags = 0; 427 428 if (error = mount_root(*name ? name : "root", root_path, 429 NFS_V3, &args, &vfsflags)) { 430 if (error != EPROTONOSUPPORT) { 431 nfs_cmn_err(error, CE_WARN, 432 "Unable to mount NFS root filesystem: %m"); 433 sv_free(svp); 434 pn_free(&pn); 435 vfs_setops(vfsp, nfsdyn_vfsops); 436 return (error); 437 } 438 439 /* 440 * Finally, try version 2 441 */ 442 bzero(&args, sizeof (args)); 443 args.addr = &svp->sv_addr; 444 args.fh = (char *)&svp->sv_fhandle.fh_buf; 445 args.knconf = svp->sv_knconf; 446 args.hostname = root_hostname; 447 vfsflags = 0; 448 449 vfs_setops(vfsp, nfs_vfsops); 450 451 if (error = mount_root(*name ? name : "root", 452 root_path, NFS_VERSION, &args, &vfsflags)) { 453 nfs_cmn_err(error, CE_WARN, 454 "Unable to mount NFS root filesystem: %m"); 455 sv_free(svp); 456 pn_free(&pn); 457 vfs_setops(vfsp, nfsdyn_vfsops); 458 return (error); 459 } 460 } 461 } 462 463 sv_free(svp); 464 pn_free(&pn); 465 return (VFS_MOUNTROOT(vfsp, why)); 466 } 467 468 int 469 nfs_setopts(vnode_t *vp, model_t model, struct nfs_args *buf) 470 { 471 mntinfo_t *mi; /* mount info, pointed at by vfs */ 472 STRUCT_HANDLE(nfs_args, args); 473 int flags; 474 475 #ifdef lint 476 model = model; 477 #endif 478 479 STRUCT_SET_HANDLE(args, model, buf); 480 481 flags = STRUCT_FGET(args, flags); 482 483 /* 484 * Set option fields in mount info record 485 */ 486 mi = VTOMI(vp); 487 488 if (flags & NFSMNT_NOAC) { 489 mi->mi_flags |= MI_NOAC; 490 PURGE_ATTRCACHE(vp); 491 } 492 if (flags & NFSMNT_NOCTO) 493 mi->mi_flags |= MI_NOCTO; 494 if (flags & NFSMNT_LLOCK) 495 mi->mi_flags |= MI_LLOCK; 496 if (flags & NFSMNT_GRPID) 497 mi->mi_flags |= MI_GRPID; 498 if (flags & NFSMNT_RETRANS) { 499 if (STRUCT_FGET(args, retrans) < 0) 500 return (EINVAL); 501 mi->mi_retrans = STRUCT_FGET(args, retrans); 502 } 503 if (flags & NFSMNT_TIMEO) { 504 if (STRUCT_FGET(args, timeo) <= 0) 505 return (EINVAL); 506 mi->mi_timeo = STRUCT_FGET(args, timeo); 507 /* 508 * The following scales the standard deviation and 509 * and current retransmission timer to match the 510 * initial value for the timeout specified. 511 */ 512 mi->mi_timers[NFS_CALLTYPES].rt_deviate = 513 (mi->mi_timeo * hz * 2) / 5; 514 mi->mi_timers[NFS_CALLTYPES].rt_rtxcur = 515 mi->mi_timeo * hz / 10; 516 } 517 if (flags & NFSMNT_RSIZE) { 518 if (STRUCT_FGET(args, rsize) <= 0) 519 return (EINVAL); 520 mi->mi_tsize = MIN(mi->mi_tsize, STRUCT_FGET(args, rsize)); 521 mi->mi_curread = MIN(mi->mi_curread, mi->mi_tsize); 522 } 523 if (flags & NFSMNT_WSIZE) { 524 if (STRUCT_FGET(args, wsize) <= 0) 525 return (EINVAL); 526 mi->mi_stsize = MIN(mi->mi_stsize, STRUCT_FGET(args, wsize)); 527 mi->mi_curwrite = MIN(mi->mi_curwrite, mi->mi_stsize); 528 } 529 if (flags & NFSMNT_ACREGMIN) { 530 if (STRUCT_FGET(args, acregmin) < 0) 531 mi->mi_acregmin = ACMINMAX; 532 else 533 mi->mi_acregmin = MIN(STRUCT_FGET(args, acregmin), 534 ACMINMAX); 535 mi->mi_acregmin = SEC2HR(mi->mi_acregmin); 536 } 537 if (flags & NFSMNT_ACREGMAX) { 538 if (STRUCT_FGET(args, acregmax) < 0) 539 mi->mi_acregmax = ACMAXMAX; 540 else 541 mi->mi_acregmax = MIN(STRUCT_FGET(args, acregmax), 542 ACMAXMAX); 543 mi->mi_acregmax = SEC2HR(mi->mi_acregmax); 544 } 545 if (flags & NFSMNT_ACDIRMIN) { 546 if (STRUCT_FGET(args, acdirmin) < 0) 547 mi->mi_acdirmin = ACMINMAX; 548 else 549 mi->mi_acdirmin = MIN(STRUCT_FGET(args, acdirmin), 550 ACMINMAX); 551 mi->mi_acdirmin = SEC2HR(mi->mi_acdirmin); 552 } 553 if (flags & NFSMNT_ACDIRMAX) { 554 if (STRUCT_FGET(args, acdirmax) < 0) 555 mi->mi_acdirmax = ACMAXMAX; 556 else 557 mi->mi_acdirmax = MIN(STRUCT_FGET(args, acdirmax), 558 ACMAXMAX); 559 mi->mi_acdirmax = SEC2HR(mi->mi_acdirmax); 560 } 561 562 if (flags & NFSMNT_LOOPBACK) 563 mi->mi_flags |= MI_LOOPBACK; 564 565 return (0); 566 } 567 568 /* 569 * Set or Clear direct I/O flag 570 * VOP_RWLOCK() is held for write access to prevent a race condition 571 * which would occur if a process is in the middle of a write when 572 * directio flag gets set. It is possible that all pages may not get flushed. 573 */ 574 575 /* ARGSUSED */ 576 int 577 nfs_directio(vnode_t *vp, int cmd, cred_t *cr) 578 { 579 int error = 0; 580 rnode_t *rp; 581 582 rp = VTOR(vp); 583 584 if (cmd == DIRECTIO_ON) { 585 586 if (rp->r_flags & RDIRECTIO) 587 return (0); 588 589 /* 590 * Flush the page cache. 591 */ 592 593 (void) VOP_RWLOCK(vp, V_WRITELOCK_TRUE, NULL); 594 595 if (rp->r_flags & RDIRECTIO) { 596 VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, NULL); 597 return (0); 598 } 599 600 if (vn_has_cached_data(vp) && 601 ((rp->r_flags & RDIRTY) || rp->r_awcount > 0)) { 602 error = VOP_PUTPAGE(vp, (offset_t)0, (uint_t)0, 603 B_INVAL, cr, NULL); 604 if (error) { 605 if (error == ENOSPC || error == EDQUOT) { 606 mutex_enter(&rp->r_statelock); 607 if (!rp->r_error) 608 rp->r_error = error; 609 mutex_exit(&rp->r_statelock); 610 } 611 VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, NULL); 612 return (error); 613 } 614 } 615 616 mutex_enter(&rp->r_statelock); 617 rp->r_flags |= RDIRECTIO; 618 mutex_exit(&rp->r_statelock); 619 VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, NULL); 620 return (0); 621 } 622 623 if (cmd == DIRECTIO_OFF) { 624 mutex_enter(&rp->r_statelock); 625 rp->r_flags &= ~RDIRECTIO; /* disable direct mode */ 626 mutex_exit(&rp->r_statelock); 627 return (0); 628 } 629 630 return (EINVAL); 631 }