1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 1990, 2010, Oracle and/or its affiliates. All rights reserved. 23 * Copyright (c) 2011 Bayard G. Bell. All rights reserved. 24 * Copyright 2013 Joyent, Inc. All rights reserved. 25 */ 26 27 /* 28 * Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T 29 * All rights reserved. 30 */ 31 32 #include <sys/errno.h> 33 #include <sys/param.h> 34 #include <sys/types.h> 35 #include <sys/user.h> 36 #include <sys/stat.h> 37 #include <sys/time.h> 38 #include <sys/utsname.h> 39 #include <sys/vfs.h> 40 #include <sys/vfs_opreg.h> 41 #include <sys/vnode.h> 42 #include <sys/pathname.h> 43 #include <sys/bootconf.h> 44 #include <fs/fs_subr.h> 45 #include <rpc/types.h> 46 #include <nfs/nfs.h> 47 #include <nfs/nfs4.h> 48 #include <nfs/nfs_clnt.h> 49 #include <nfs/rnode.h> 50 #include <nfs/mount.h> 51 #include <nfs/nfssys.h> 52 #include <sys/debug.h> 53 #include <sys/cmn_err.h> 54 #include <sys/file.h> 55 #include <sys/fcntl.h> 56 #include <sys/zone.h> 57 58 /* 59 * This is the loadable module wrapper. 60 */ 61 #include <sys/systm.h> 62 #include <sys/modctl.h> 63 #include <sys/syscall.h> 64 #include <sys/ddi.h> 65 66 #include <rpc/types.h> 67 #include <rpc/auth.h> 68 #include <rpc/clnt.h> 69 #include <rpc/svc.h> 70 71 /* 72 * The pseudo NFS filesystem to allow diskless booting to dynamically 73 * mount either a NFS V2, NFS V3, or NFS V4 filesystem. This only implements 74 * the VFS_MOUNTROOT op and is only intended to be used by the 75 * diskless booting code until the real root filesystem is mounted. 76 * Nothing else should ever call this! 77 * 78 * The strategy is that if the initial rootfs type is set to "nfsdyn" 79 * by loadrootmodules() this filesystem is called to mount the 80 * root filesystem. It first attempts to mount a V4 filesystem, and if that 81 * fails due to an RPC version mismatch it tries V3 and finally V2. 82 * Once the real mount succeeds the vfsops and rootfs name are changed 83 * to reflect the real filesystem type. 84 */ 85 static int nfsdyninit(int, char *); 86 static int nfsdyn_mountroot(vfs_t *, whymountroot_t); 87 88 vfsops_t *nfsdyn_vfsops; 89 90 /* 91 * The following data structures are used to configure the NFS 92 * system call, the NFS Version 2 client VFS, and the NFS Version 93 * 3 client VFS into the system. The NFS Version 4 structures are defined in 94 * nfs4_common.c 95 */ 96 97 /* 98 * The NFS system call. 99 */ 100 static struct sysent nfssysent = { 101 2, 102 SE_32RVAL1 | SE_ARGC | SE_NOUNLOAD, 103 nfssys 104 }; 105 106 static struct modlsys modlsys = { 107 &mod_syscallops, 108 "NFS syscall, client, and common", 109 &nfssysent 110 }; 111 112 #ifdef _SYSCALL32_IMPL 113 static struct modlsys modlsys32 = { 114 &mod_syscallops32, 115 "NFS syscall, client, and common (32-bit)", 116 &nfssysent 117 }; 118 #endif /* _SYSCALL32_IMPL */ 119 120 /* 121 * The NFS Dynamic client VFS. 122 */ 123 static vfsdef_t vfw = { 124 VFSDEF_VERSION, 125 "nfsdyn", 126 nfsdyninit, 127 0, 128 NULL 129 }; 130 131 static struct modlfs modlfs = { 132 &mod_fsops, 133 "network filesystem", 134 &vfw 135 }; 136 137 /* 138 * The NFS Version 2 client VFS. 139 */ 140 static vfsdef_t vfw2 = { 141 VFSDEF_VERSION, 142 "nfs", 143 nfsinit, 144 VSW_CANREMOUNT|VSW_NOTZONESAFE|VSW_STATS, 145 NULL 146 }; 147 148 static struct modlfs modlfs2 = { 149 &mod_fsops, 150 "network filesystem version 2", 151 &vfw2 152 }; 153 154 /* 155 * The NFS Version 3 client VFS. 156 */ 157 static vfsdef_t vfw3 = { 158 VFSDEF_VERSION, 159 "nfs3", 160 nfs3init, 161 VSW_CANREMOUNT|VSW_NOTZONESAFE|VSW_STATS, 162 NULL 163 }; 164 165 static struct modlfs modlfs3 = { 166 &mod_fsops, 167 "network filesystem version 3", 168 &vfw3 169 }; 170 171 extern struct modlfs modlfs4; 172 173 /* 174 * We have too many linkage structures so we define our own XXX 175 */ 176 struct modlinkage_big { 177 int ml_rev; /* rev of loadable modules system */ 178 void *ml_linkage[7]; /* NULL terminated list of */ 179 /* linkage structures */ 180 }; 181 182 /* 183 * All of the module configuration linkages required to configure 184 * the system call and client VFS's into the system. 185 */ 186 static struct modlinkage_big modlinkage = { 187 MODREV_1, 188 &modlsys, 189 #ifdef _SYSCALL32_IMPL 190 &modlsys32, 191 #endif 192 &modlfs, 193 &modlfs2, 194 &modlfs3, 195 &modlfs4, 196 NULL 197 }; 198 199 /* 200 * This routine is invoked automatically when the kernel module 201 * containing this routine is loaded. This allows module specific 202 * initialization to be done when the module is loaded. 203 */ 204 int 205 _init(void) 206 { 207 int status; 208 209 if ((status = nfs_clntinit()) != 0) { 210 cmn_err(CE_WARN, "_init: nfs_clntinit failed"); 211 return (status); 212 } 213 214 /* 215 * Create the version specific kstats. 216 * 217 * PSARC 2001/697 Contract Private Interface 218 * All nfs kstats are under SunMC contract 219 * Please refer to the PSARC listed above and contact 220 * SunMC before making any changes! 221 * 222 * Changes must be reviewed by Solaris File Sharing 223 * Changes must be communicated to contract-2001-697@sun.com 224 * 225 */ 226 227 zone_key_create(&nfsstat_zone_key, nfsstat_zone_init, NULL, 228 nfsstat_zone_fini); 229 status = mod_install((struct modlinkage *)&modlinkage); 230 231 if (status) { 232 (void) zone_key_delete(nfsstat_zone_key); 233 234 /* 235 * Failed to install module, cleanup previous 236 * initialization work. 237 */ 238 nfs_clntfini(); 239 240 /* 241 * Clean up work performed indirectly by mod_installfs() 242 * as a result of our call to mod_install(). 243 */ 244 nfs4fini(); 245 nfs3fini(); 246 nfsfini(); 247 } 248 return (status); 249 } 250 251 int 252 _fini(void) 253 { 254 /* Don't allow module to be unloaded */ 255 return (EBUSY); 256 } 257 258 int 259 _info(struct modinfo *modinfop) 260 { 261 return (mod_info((struct modlinkage *)&modlinkage, modinfop)); 262 } 263 264 /* 265 * General utilities 266 */ 267 268 /* 269 * Returns the preferred transfer size in bytes based on 270 * what network interfaces are available. 271 */ 272 int 273 nfstsize(void) 274 { 275 /* 276 * For the moment, just return NFS_MAXDATA until we can query the 277 * appropriate transport. 278 */ 279 return (NFS_MAXDATA); 280 } 281 282 /* 283 * Returns the preferred transfer size in bytes based on 284 * what network interfaces are available. 285 */ 286 287 /* this should reflect the largest transfer size possible */ 288 static int nfs3_max_transfer_size = 1024 * 1024; 289 290 int 291 nfs3tsize(void) 292 { 293 /* 294 * For the moment, just return nfs3_max_transfer_size until we 295 * can query the appropriate transport. 296 */ 297 return (nfs3_max_transfer_size); 298 } 299 300 static uint_t nfs3_max_transfer_size_clts = 32 * 1024; 301 static uint_t nfs3_max_transfer_size_cots = 1024 * 1024; 302 static uint_t nfs3_max_transfer_size_rdma = 1024 * 1024; 303 304 uint_t 305 nfs3_tsize(struct knetconfig *knp) 306 { 307 308 if (knp->knc_semantics == NC_TPI_COTS_ORD || 309 knp->knc_semantics == NC_TPI_COTS) 310 return (nfs3_max_transfer_size_cots); 311 if (knp->knc_semantics == NC_TPI_RDMA) 312 return (nfs3_max_transfer_size_rdma); 313 return (nfs3_max_transfer_size_clts); 314 } 315 316 uint_t 317 rfs3_tsize(struct svc_req *req) 318 { 319 320 if (req->rq_xprt->xp_type == T_COTS_ORD || 321 req->rq_xprt->xp_type == T_COTS) 322 return (nfs3_max_transfer_size_cots); 323 if (req->rq_xprt->xp_type == T_RDMA) 324 return (nfs3_max_transfer_size_rdma); 325 return (nfs3_max_transfer_size_clts); 326 } 327 328 /* ARGSUSED */ 329 static int 330 nfsdyninit(int fstyp, char *name) 331 { 332 static const fs_operation_def_t nfsdyn_vfsops_template[] = { 333 VFSNAME_MOUNTROOT, { .vfs_mountroot = nfsdyn_mountroot }, 334 NULL, NULL 335 }; 336 int error; 337 338 error = vfs_setfsops(fstyp, nfsdyn_vfsops_template, &nfsdyn_vfsops); 339 if (error != 0) 340 return (error); 341 342 return (0); 343 } 344 345 /* ARGSUSED */ 346 static int 347 nfsdyn_mountroot(vfs_t *vfsp, whymountroot_t why) 348 { 349 char root_hostname[SYS_NMLN+1]; 350 struct servinfo *svp; 351 int error; 352 int vfsflags; 353 char *root_path; 354 struct pathname pn; 355 char *name; 356 static char token[10]; 357 struct nfs_args args; /* nfs mount arguments */ 358 359 bzero(&args, sizeof (args)); 360 361 /* do this BEFORE getfile which causes xid stamps to be initialized */ 362 clkset(-1L); /* hack for now - until we get time svc? */ 363 364 if (why == ROOT_REMOUNT) { 365 /* 366 * Shouldn't happen. 367 */ 368 panic("nfs3_mountroot: why == ROOT_REMOUNT\n"); 369 } 370 371 if (why == ROOT_UNMOUNT) { 372 /* 373 * Nothing to do for NFS. 374 */ 375 return (0); 376 } 377 378 /* 379 * why == ROOT_INIT 380 */ 381 382 name = token; 383 *name = 0; 384 getfsname("root", name, sizeof (token)); 385 386 pn_alloc(&pn); 387 root_path = pn.pn_path; 388 389 svp = kmem_zalloc(sizeof (*svp), KM_SLEEP); 390 mutex_init(&svp->sv_lock, NULL, MUTEX_DEFAULT, NULL); 391 svp->sv_knconf = kmem_zalloc(sizeof (*svp->sv_knconf), KM_SLEEP); 392 svp->sv_knconf->knc_protofmly = kmem_alloc(KNC_STRSIZE, KM_SLEEP); 393 svp->sv_knconf->knc_proto = kmem_alloc(KNC_STRSIZE, KM_SLEEP); 394 395 /* 396 * First try version 4 397 */ 398 vfs_setops(vfsp, nfs4_vfsops); 399 args.addr = &svp->sv_addr; 400 args.fh = (char *)&svp->sv_fhandle; 401 args.knconf = svp->sv_knconf; 402 args.hostname = root_hostname; 403 vfsflags = 0; 404 405 if (error = mount_root(*name ? name : "root", root_path, NFS_V4, 406 &args, &vfsflags)) { 407 if (error != EPROTONOSUPPORT) { 408 nfs_cmn_err(error, CE_WARN, 409 "Unable to mount NFS root filesystem: %m"); 410 sv_free(svp); 411 pn_free(&pn); 412 vfs_setops(vfsp, nfsdyn_vfsops); 413 return (error); 414 } 415 416 /* 417 * Then try version 3 418 */ 419 bzero(&args, sizeof (args)); 420 vfs_setops(vfsp, nfs3_vfsops); 421 args.addr = &svp->sv_addr; 422 args.fh = (char *)&svp->sv_fhandle; 423 args.knconf = svp->sv_knconf; 424 args.hostname = root_hostname; 425 vfsflags = 0; 426 427 if (error = mount_root(*name ? name : "root", root_path, 428 NFS_V3, &args, &vfsflags)) { 429 if (error != EPROTONOSUPPORT) { 430 nfs_cmn_err(error, CE_WARN, 431 "Unable to mount NFS root filesystem: %m"); 432 sv_free(svp); 433 pn_free(&pn); 434 vfs_setops(vfsp, nfsdyn_vfsops); 435 return (error); 436 } 437 438 /* 439 * Finally, try version 2 440 */ 441 bzero(&args, sizeof (args)); 442 args.addr = &svp->sv_addr; 443 args.fh = (char *)&svp->sv_fhandle.fh_buf; 444 args.knconf = svp->sv_knconf; 445 args.hostname = root_hostname; 446 vfsflags = 0; 447 448 vfs_setops(vfsp, nfs_vfsops); 449 450 if (error = mount_root(*name ? name : "root", 451 root_path, NFS_VERSION, &args, &vfsflags)) { 452 nfs_cmn_err(error, CE_WARN, 453 "Unable to mount NFS root filesystem: %m"); 454 sv_free(svp); 455 pn_free(&pn); 456 vfs_setops(vfsp, nfsdyn_vfsops); 457 return (error); 458 } 459 } 460 } 461 462 sv_free(svp); 463 pn_free(&pn); 464 return (VFS_MOUNTROOT(vfsp, why)); 465 } 466 467 int 468 nfs_setopts(vnode_t *vp, model_t model, struct nfs_args *buf) 469 { 470 mntinfo_t *mi; /* mount info, pointed at by vfs */ 471 STRUCT_HANDLE(nfs_args, args); 472 int flags; 473 474 #ifdef lint 475 model = model; 476 #endif 477 478 STRUCT_SET_HANDLE(args, model, buf); 479 480 flags = STRUCT_FGET(args, flags); 481 482 /* 483 * Set option fields in mount info record 484 */ 485 mi = VTOMI(vp); 486 487 if (flags & NFSMNT_NOAC) { 488 mi->mi_flags |= MI_NOAC; 489 PURGE_ATTRCACHE(vp); 490 } 491 if (flags & NFSMNT_NOCTO) 492 mi->mi_flags |= MI_NOCTO; 493 if (flags & NFSMNT_LLOCK) 494 mi->mi_flags |= MI_LLOCK; 495 if (flags & NFSMNT_GRPID) 496 mi->mi_flags |= MI_GRPID; 497 if (flags & NFSMNT_RETRANS) { 498 if (STRUCT_FGET(args, retrans) < 0) 499 return (EINVAL); 500 mi->mi_retrans = STRUCT_FGET(args, retrans); 501 } 502 if (flags & NFSMNT_TIMEO) { 503 if (STRUCT_FGET(args, timeo) <= 0) 504 return (EINVAL); 505 mi->mi_timeo = STRUCT_FGET(args, timeo); 506 /* 507 * The following scales the standard deviation and 508 * and current retransmission timer to match the 509 * initial value for the timeout specified. 510 */ 511 mi->mi_timers[NFS_CALLTYPES].rt_deviate = 512 (mi->mi_timeo * hz * 2) / 5; 513 mi->mi_timers[NFS_CALLTYPES].rt_rtxcur = 514 mi->mi_timeo * hz / 10; 515 } 516 if (flags & NFSMNT_RSIZE) { 517 if (STRUCT_FGET(args, rsize) <= 0) 518 return (EINVAL); 519 mi->mi_tsize = MIN(mi->mi_tsize, STRUCT_FGET(args, rsize)); 520 mi->mi_curread = MIN(mi->mi_curread, mi->mi_tsize); 521 } 522 if (flags & NFSMNT_WSIZE) { 523 if (STRUCT_FGET(args, wsize) <= 0) 524 return (EINVAL); 525 mi->mi_stsize = MIN(mi->mi_stsize, STRUCT_FGET(args, wsize)); 526 mi->mi_curwrite = MIN(mi->mi_curwrite, mi->mi_stsize); 527 } 528 if (flags & NFSMNT_ACREGMIN) { 529 if (STRUCT_FGET(args, acregmin) < 0) 530 mi->mi_acregmin = ACMINMAX; 531 else 532 mi->mi_acregmin = MIN(STRUCT_FGET(args, acregmin), 533 ACMINMAX); 534 mi->mi_acregmin = SEC2HR(mi->mi_acregmin); 535 } 536 if (flags & NFSMNT_ACREGMAX) { 537 if (STRUCT_FGET(args, acregmax) < 0) 538 mi->mi_acregmax = ACMAXMAX; 539 else 540 mi->mi_acregmax = MIN(STRUCT_FGET(args, acregmax), 541 ACMAXMAX); 542 mi->mi_acregmax = SEC2HR(mi->mi_acregmax); 543 } 544 if (flags & NFSMNT_ACDIRMIN) { 545 if (STRUCT_FGET(args, acdirmin) < 0) 546 mi->mi_acdirmin = ACMINMAX; 547 else 548 mi->mi_acdirmin = MIN(STRUCT_FGET(args, acdirmin), 549 ACMINMAX); 550 mi->mi_acdirmin = SEC2HR(mi->mi_acdirmin); 551 } 552 if (flags & NFSMNT_ACDIRMAX) { 553 if (STRUCT_FGET(args, acdirmax) < 0) 554 mi->mi_acdirmax = ACMAXMAX; 555 else 556 mi->mi_acdirmax = MIN(STRUCT_FGET(args, acdirmax), 557 ACMAXMAX); 558 mi->mi_acdirmax = SEC2HR(mi->mi_acdirmax); 559 } 560 561 if (flags & NFSMNT_LOOPBACK) 562 mi->mi_flags |= MI_LOOPBACK; 563 564 return (0); 565 } 566 567 /* 568 * Set or Clear direct I/O flag 569 * VOP_RWLOCK() is held for write access to prevent a race condition 570 * which would occur if a process is in the middle of a write when 571 * directio flag gets set. It is possible that all pages may not get flushed. 572 */ 573 574 /* ARGSUSED */ 575 int 576 nfs_directio(vnode_t *vp, int cmd, cred_t *cr) 577 { 578 int error = 0; 579 rnode_t *rp; 580 581 rp = VTOR(vp); 582 583 if (cmd == DIRECTIO_ON) { 584 585 if (rp->r_flags & RDIRECTIO) 586 return (0); 587 588 /* 589 * Flush the page cache. 590 */ 591 592 (void) VOP_RWLOCK(vp, V_WRITELOCK_TRUE, NULL); 593 594 if (rp->r_flags & RDIRECTIO) { 595 VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, NULL); 596 return (0); 597 } 598 599 if (vn_has_cached_data(vp) && 600 ((rp->r_flags & RDIRTY) || rp->r_awcount > 0)) { 601 error = VOP_PUTPAGE(vp, (offset_t)0, (uint_t)0, 602 B_INVAL, cr, NULL); 603 if (error) { 604 if (error == ENOSPC || error == EDQUOT) { 605 mutex_enter(&rp->r_statelock); 606 if (!rp->r_error) 607 rp->r_error = error; 608 mutex_exit(&rp->r_statelock); 609 } 610 VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, NULL); 611 return (error); 612 } 613 } 614 615 mutex_enter(&rp->r_statelock); 616 rp->r_flags |= RDIRECTIO; 617 mutex_exit(&rp->r_statelock); 618 VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, NULL); 619 return (0); 620 } 621 622 if (cmd == DIRECTIO_OFF) { 623 mutex_enter(&rp->r_statelock); 624 rp->r_flags &= ~RDIRECTIO; /* disable direct mode */ 625 mutex_exit(&rp->r_statelock); 626 return (0); 627 } 628 629 return (EINVAL); 630 }