1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 1990, 2010, Oracle and/or its affiliates. All rights reserved. 23 */ 24 25 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 26 /* All rights reserved. */ 27 28 29 #include <sys/types.h> 30 #include <sys/param.h> 31 #include <sys/cmn_err.h> 32 #include <sys/debug.h> 33 #include <sys/dirent.h> 34 #include <sys/errno.h> 35 #include <sys/file.h> 36 #include <sys/inline.h> 37 #include <sys/kmem.h> 38 #include <sys/pathname.h> 39 #include <sys/resource.h> 40 #include <sys/statvfs.h> 41 #include <sys/mount.h> 42 #include <sys/sysmacros.h> 43 #include <sys/systm.h> 44 #include <sys/uio.h> 45 #include <sys/vfs.h> 46 #include <sys/vfs_opreg.h> 47 #include <sys/vnode.h> 48 #include <sys/cred.h> 49 #include <sys/mntent.h> 50 #include <sys/mount.h> 51 #include <sys/user.h> 52 #include <sys/t_lock.h> 53 #include <sys/modctl.h> 54 #include <sys/policy.h> 55 #include <fs/fs_subr.h> 56 #include <sys/atomic.h> 57 #include <sys/mkdev.h> 58 59 #define round(r) (((r)+sizeof (int)-1)&(~(sizeof (int)-1))) 60 #define fdtoi(n) ((n)+100) 61 62 #define FDDIRSIZE 14 63 struct fddirect { 64 short d_ino; 65 char d_name[FDDIRSIZE]; 66 }; 67 68 #define FDROOTINO 2 69 #define FDSDSIZE sizeof (struct fddirect) 70 #define FDNSIZE 10 71 72 static int fdfstype = 0; 73 static major_t fdfsmaj; 74 static minor_t fdfsmin; 75 static major_t fdrmaj; 76 static kmutex_t fd_minor_lock; 77 78 static int fdget(vnode_t *, char *, vnode_t **); 79 80 /* ARGSUSED */ 81 static int 82 fdopen(vnode_t **vpp, int mode, cred_t *cr, caller_context_t *ct) 83 { 84 if ((*vpp)->v_type != VDIR) { 85 mutex_enter(&(*vpp)->v_lock); 86 (*vpp)->v_flag |= VDUP; 87 mutex_exit(&(*vpp)->v_lock); 88 } 89 return (0); 90 } 91 92 /* ARGSUSED */ 93 static int 94 fdclose(vnode_t *vp, int flag, int count, offset_t offset, cred_t *cr, 95 caller_context_t *ct) 96 { 97 return (0); 98 } 99 100 /* ARGSUSED */ 101 static int 102 fdread(vnode_t *vp, uio_t *uiop, int ioflag, cred_t *cr, caller_context_t *ct) 103 { 104 static struct fddirect dotbuf[] = { 105 { FDROOTINO, "." }, 106 { FDROOTINO, ".." } 107 }; 108 struct fddirect dirbuf; 109 int i, n; 110 int minfd, maxfd, modoff, error = 0; 111 int nentries; 112 rctl_qty_t fdno_ctl; 113 int endoff; 114 115 if (vp->v_type != VDIR) 116 return (ENOSYS); 117 118 mutex_enter(&curproc->p_lock); 119 fdno_ctl = rctl_enforced_value(rctlproc_legacy[RLIMIT_NOFILE], 120 curproc->p_rctls, curproc); 121 nentries = MIN(P_FINFO(curproc)->fi_nfiles, (int)fdno_ctl); 122 mutex_exit(&curproc->p_lock); 123 124 endoff = (nentries + 2) * FDSDSIZE; 125 126 /* 127 * Fake up ".", "..", and the /dev/fd directory entries. 128 */ 129 if (uiop->uio_loffset < (offset_t)0 || 130 uiop->uio_loffset >= (offset_t)endoff || 131 uiop->uio_resid <= 0) 132 return (0); 133 ASSERT(uiop->uio_loffset <= MAXOFF_T); 134 if (uiop->uio_offset < 2*FDSDSIZE) { 135 error = uiomove((caddr_t)dotbuf + uiop->uio_offset, 136 MIN(uiop->uio_resid, 2*FDSDSIZE - uiop->uio_offset), 137 UIO_READ, uiop); 138 if (uiop->uio_resid <= 0 || error) 139 return (error); 140 } 141 minfd = (uiop->uio_offset - 2*FDSDSIZE)/FDSDSIZE; 142 maxfd = (uiop->uio_offset + uiop->uio_resid - 1)/FDSDSIZE; 143 modoff = uiop->uio_offset % FDSDSIZE; 144 145 for (i = 0; i < FDDIRSIZE; i++) 146 dirbuf.d_name[i] = '\0'; 147 for (i = minfd; i < MIN(maxfd, nentries); i++) { 148 n = i; 149 dirbuf.d_ino = fdtoi(n); 150 numtos((ulong_t)n, dirbuf.d_name); 151 error = uiomove((caddr_t)&dirbuf + modoff, 152 MIN(uiop->uio_resid, FDSDSIZE - modoff), 153 UIO_READ, uiop); 154 if (uiop->uio_resid <= 0 || error) 155 return (error); 156 modoff = 0; 157 } 158 159 return (error); 160 } 161 162 /* ARGSUSED */ 163 static int 164 fdgetattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr, 165 caller_context_t *ct) 166 { 167 vfs_t *vfsp = vp->v_vfsp; 168 timestruc_t now; 169 170 if (vp->v_type == VDIR) { 171 vap->va_nlink = 2; 172 vap->va_size = (u_offset_t) 173 ((P_FINFO(curproc)->fi_nfiles + 2) * FDSDSIZE); 174 vap->va_mode = 0555; 175 vap->va_nodeid = (ino64_t)FDROOTINO; 176 } else { 177 vap->va_nlink = 1; 178 vap->va_size = (u_offset_t)0; 179 vap->va_mode = 0666; 180 vap->va_nodeid = (ino64_t)fdtoi(getminor(vp->v_rdev)); 181 } 182 vap->va_type = vp->v_type; 183 vap->va_rdev = vp->v_rdev; 184 vap->va_blksize = vfsp->vfs_bsize; 185 vap->va_nblocks = (fsblkcnt64_t)0; 186 gethrestime(&now); 187 vap->va_atime = vap->va_mtime = vap->va_ctime = now; 188 vap->va_uid = 0; 189 vap->va_gid = 0; 190 vap->va_fsid = vfsp->vfs_dev; 191 vap->va_seq = 0; 192 return (0); 193 } 194 195 /* ARGSUSED */ 196 static int 197 fdaccess(vnode_t *vp, int mode, int flags, cred_t *cr, caller_context_t *ct) 198 { 199 return (0); 200 } 201 202 /* ARGSUSED */ 203 static int 204 fdlookup(vnode_t *dp, char *comp, vnode_t **vpp, pathname_t *pnp, 205 int flags, vnode_t *rdir, cred_t *cr, caller_context_t *ct, 206 int *direntflags, pathname_t *realpnp) 207 { 208 if (comp[0] == 0 || strcmp(comp, ".") == 0 || strcmp(comp, "..") == 0) { 209 VN_HOLD(dp); 210 *vpp = dp; 211 return (0); 212 } 213 return (fdget(dp, comp, vpp)); 214 } 215 216 /* ARGSUSED */ 217 static int 218 fdcreate(vnode_t *dvp, char *comp, vattr_t *vap, enum vcexcl excl, 219 int mode, vnode_t **vpp, cred_t *cr, int flag, caller_context_t *ct, 220 vsecattr_t *vsecp) 221 { 222 return (fdget(dvp, comp, vpp)); 223 } 224 225 /* ARGSUSED */ 226 static int 227 fdreaddir(vnode_t *vp, uio_t *uiop, cred_t *cr, int *eofp, caller_context_t *ct, 228 int flags) 229 { 230 /* bp holds one dirent structure */ 231 u_offset_t bp[DIRENT64_RECLEN(FDNSIZE) / sizeof (u_offset_t)]; 232 struct dirent64 *dirent = (struct dirent64 *)bp; 233 int reclen, nentries; 234 rctl_qty_t fdno_ctl; 235 int n; 236 int oresid; 237 off_t off; 238 239 if (uiop->uio_offset < 0 || uiop->uio_resid <= 0 || 240 (uiop->uio_offset % FDSDSIZE) != 0) 241 return (ENOENT); 242 243 ASSERT(uiop->uio_loffset <= MAXOFF_T); 244 oresid = uiop->uio_resid; 245 bzero(bp, sizeof (bp)); 246 247 mutex_enter(&curproc->p_lock); 248 fdno_ctl = rctl_enforced_value(rctlproc_legacy[RLIMIT_NOFILE], 249 curproc->p_rctls, curproc); 250 nentries = MIN(P_FINFO(curproc)->fi_nfiles, (int)fdno_ctl); 251 mutex_exit(&curproc->p_lock); 252 253 while (uiop->uio_resid > 0) { 254 if ((off = uiop->uio_offset) == 0) { /* "." */ 255 dirent->d_ino = (ino64_t)FDROOTINO; 256 dirent->d_name[0] = '.'; 257 dirent->d_name[1] = '\0'; 258 reclen = DIRENT64_RECLEN(1); 259 } else if (off == FDSDSIZE) { /* ".." */ 260 dirent->d_ino = (ino64_t)FDROOTINO; 261 dirent->d_name[0] = '.'; 262 dirent->d_name[1] = '.'; 263 dirent->d_name[2] = '\0'; 264 reclen = DIRENT64_RECLEN(2); 265 } else { 266 /* 267 * Return entries corresponding to the allowable 268 * number of file descriptors for this process. 269 */ 270 if ((n = (off-2*FDSDSIZE)/FDSDSIZE) >= nentries) 271 break; 272 dirent->d_ino = (ino64_t)fdtoi(n); 273 numtos((ulong_t)n, dirent->d_name); 274 reclen = DIRENT64_RECLEN(strlen(dirent->d_name)); 275 } 276 dirent->d_off = (offset_t)(uiop->uio_offset + FDSDSIZE); 277 dirent->d_reclen = (ushort_t)reclen; 278 279 if (reclen > uiop->uio_resid) { 280 /* 281 * Error if no entries have been returned yet. 282 */ 283 if (uiop->uio_resid == oresid) 284 return (EINVAL); 285 break; 286 } 287 /* 288 * uiomove() updates both resid and offset by the same 289 * amount. But we want offset to change in increments 290 * of FDSDSIZE, which is different from the number of bytes 291 * being returned to the user. So we set uio_offset 292 * separately, ignoring what uiomove() does. 293 */ 294 if (uiomove((caddr_t)dirent, reclen, UIO_READ, uiop)) 295 return (EFAULT); 296 uiop->uio_offset = off + FDSDSIZE; 297 } 298 if (eofp) 299 *eofp = ((uiop->uio_offset-2*FDSDSIZE)/FDSDSIZE >= nentries); 300 return (0); 301 } 302 303 /* ARGSUSED */ 304 static void 305 fdinactive(vnode_t *vp, cred_t *cr, caller_context_t *ct) 306 { 307 mutex_enter(&vp->v_lock); 308 ASSERT(vp->v_count >= 1); 309 if (--vp->v_count != 0) { 310 mutex_exit(&vp->v_lock); 311 return; 312 } 313 mutex_exit(&vp->v_lock); 314 vn_invalid(vp); 315 vn_free(vp); 316 } 317 318 static struct vnodeops *fd_vnodeops; 319 320 static const fs_operation_def_t fd_vnodeops_template[] = { 321 { VOPNAME_OPEN, { .vop_open = fdopen } }, 322 { VOPNAME_CLOSE, { .vop_close = fdclose } }, 323 { VOPNAME_READ, { .vop_read = fdread } }, 324 { VOPNAME_GETATTR, { .vop_getattr = fdgetattr } }, 325 { VOPNAME_ACCESS, { .vop_access = fdaccess } }, 326 { VOPNAME_LOOKUP, { .vop_lookup = fdlookup } }, 327 { VOPNAME_CREATE, { .vop_create = fdcreate } }, 328 { VOPNAME_READDIR, { .vop_readdir = fdreaddir } }, 329 { VOPNAME_INACTIVE, { .vop_inactive = fdinactive } }, 330 { VOPNAME_FRLOCK, { .error = fs_error } }, 331 { VOPNAME_POLL, { .error = fs_error } }, 332 { VOPNAME_DISPOSE, { .error = fs_error } }, 333 { NULL, { NULL } } 334 }; 335 336 static int 337 fdget(struct vnode *dvp, char *comp, struct vnode **vpp) 338 { 339 int n = 0; 340 struct vnode *vp; 341 342 while (*comp) { 343 if (*comp < '0' || *comp > '9') 344 return (ENOENT); 345 n = 10 * n + *comp++ - '0'; 346 } 347 vp = vn_alloc(KM_SLEEP); 348 vp->v_type = VCHR; 349 vp->v_vfsp = dvp->v_vfsp; 350 vn_setops(vp, fd_vnodeops); 351 vp->v_data = NULL; 352 vp->v_flag = VNOMAP; 353 vp->v_rdev = makedevice(fdrmaj, n); 354 vn_exists(vp); 355 *vpp = vp; 356 return (0); 357 } 358 359 /* 360 * fdfs is mounted on /dev/fd, however, there are two interesting 361 * possibilities - two threads racing to do the same mount (protected 362 * by vfs locking), and two threads mounting fdfs in different places. 363 */ 364 /*ARGSUSED*/ 365 static int 366 fdmount(vfs_t *vfsp, vnode_t *mvp, struct mounta *uap, cred_t *cr) 367 { 368 struct vnode *vp; 369 370 if (secpolicy_fs_mount(cr, mvp, vfsp) != 0) 371 return (EPERM); 372 if (mvp->v_type != VDIR) 373 return (ENOTDIR); 374 375 mutex_enter(&mvp->v_lock); 376 if ((uap->flags & MS_OVERLAY) == 0 && 377 (mvp->v_count > 1 || (mvp->v_flag & VROOT))) { 378 mutex_exit(&mvp->v_lock); 379 return (EBUSY); 380 } 381 mutex_exit(&mvp->v_lock); 382 383 /* 384 * Having the resource be anything but "fd" doesn't make sense 385 */ 386 vfs_setresource(vfsp, "fd", 0); 387 388 vp = vn_alloc(KM_SLEEP); 389 vp->v_vfsp = vfsp; 390 vn_setops(vp, fd_vnodeops); 391 vp->v_type = VDIR; 392 vp->v_data = NULL; 393 vp->v_flag |= VROOT; 394 vfsp->vfs_fstype = fdfstype; 395 vfsp->vfs_data = (char *)vp; 396 mutex_enter(&fd_minor_lock); 397 do { 398 fdfsmin = (fdfsmin + 1) & L_MAXMIN32; 399 vfsp->vfs_dev = makedevice(fdfsmaj, fdfsmin); 400 } while (vfs_devismounted(vfsp->vfs_dev)); 401 mutex_exit(&fd_minor_lock); 402 vfs_make_fsid(&vfsp->vfs_fsid, vfsp->vfs_dev, fdfstype); 403 vfsp->vfs_bsize = 1024; 404 return (0); 405 } 406 407 /* ARGSUSED */ 408 static int 409 fdunmount(vfs_t *vfsp, int flag, cred_t *cr) 410 { 411 vnode_t *rvp; 412 413 if (secpolicy_fs_unmount(cr, vfsp) != 0) 414 return (EPERM); 415 416 /* 417 * forced unmount is not supported by this file system 418 * and thus, ENOTSUP, is being returned. 419 */ 420 if (flag & MS_FORCE) 421 return (ENOTSUP); 422 423 rvp = (vnode_t *)vfsp->vfs_data; 424 if (rvp->v_count > 1) 425 return (EBUSY); 426 427 VN_RELE(rvp); 428 return (0); 429 } 430 431 /* ARGSUSED */ 432 static int 433 fdroot(vfs_t *vfsp, vnode_t **vpp) 434 { 435 vnode_t *vp = (vnode_t *)vfsp->vfs_data; 436 437 VN_HOLD(vp); 438 *vpp = vp; 439 return (0); 440 } 441 442 /* 443 * No locking required because I held the root vnode before calling this 444 * function so the vfs won't disappear on me. To be more explicit: 445 * fdvrootp->v_count will be greater than 1 so fdunmount will just return. 446 */ 447 static int 448 fdstatvfs(struct vfs *vfsp, struct statvfs64 *sp) 449 { 450 dev32_t d32; 451 rctl_qty_t fdno_ctl; 452 453 mutex_enter(&curproc->p_lock); 454 fdno_ctl = rctl_enforced_value(rctlproc_legacy[RLIMIT_NOFILE], 455 curproc->p_rctls, curproc); 456 mutex_exit(&curproc->p_lock); 457 458 bzero(sp, sizeof (*sp)); 459 sp->f_bsize = 1024; 460 sp->f_frsize = 1024; 461 sp->f_blocks = (fsblkcnt64_t)0; 462 sp->f_bfree = (fsblkcnt64_t)0; 463 sp->f_bavail = (fsblkcnt64_t)0; 464 sp->f_files = (fsfilcnt64_t) 465 (MIN(P_FINFO(curproc)->fi_nfiles, fdno_ctl + 2)); 466 sp->f_ffree = (fsfilcnt64_t)0; 467 sp->f_favail = (fsfilcnt64_t)0; 468 (void) cmpldev(&d32, vfsp->vfs_dev); 469 sp->f_fsid = d32; 470 (void) strcpy(sp->f_basetype, vfssw[fdfstype].vsw_name); 471 sp->f_flag = vf_to_stf(vfsp->vfs_flag); 472 sp->f_namemax = FDNSIZE; 473 (void) strcpy(sp->f_fstr, "/dev/fd"); 474 (void) strcpy(&sp->f_fstr[8], "/dev/fd"); 475 return (0); 476 } 477 478 int 479 fdinit(int fstype, char *name) 480 { 481 static const fs_operation_def_t fd_vfsops_template[] = { 482 { VFSNAME_MOUNT, { .vfs_mount = fdmount } }, 483 { VFSNAME_UNMOUNT, { .vfs_unmount = fdunmount } }, 484 { VFSNAME_ROOT, { .vfs_root = fdroot } }, 485 { VFSNAME_STATVFS, { .vfs_statvfs = fdstatvfs } }, 486 { NULL, { NULL } } 487 }; 488 int error; 489 490 fdfstype = fstype; 491 ASSERT(fdfstype != 0); 492 493 /* 494 * Associate VFS ops vector with this fstype. 495 */ 496 error = vfs_setfsops(fstype, fd_vfsops_template, NULL); 497 if (error != 0) { 498 cmn_err(CE_WARN, "fdinit: bad vnode ops template"); 499 return (error); 500 } 501 502 error = vn_make_ops(name, fd_vnodeops_template, &fd_vnodeops); 503 if (error != 0) { 504 (void) vfs_freevfsops_by_type(fstype); 505 cmn_err(CE_WARN, "fdinit: bad vnode ops template"); 506 return (error); 507 } 508 509 /* 510 * Assign unique "device" numbers (reported by stat(2)). 511 */ 512 fdfsmaj = getudev(); 513 fdrmaj = getudev(); 514 if (fdfsmaj == (major_t)-1 || fdrmaj == (major_t)-1) { 515 cmn_err(CE_WARN, "fdinit: can't get unique device numbers"); 516 if (fdfsmaj == (major_t)-1) 517 fdfsmaj = 0; 518 if (fdrmaj == (major_t)-1) 519 fdrmaj = 0; 520 } 521 mutex_init(&fd_minor_lock, NULL, MUTEX_DEFAULT, NULL); 522 return (0); 523 } 524 525 /* 526 * FDFS Mount options table 527 */ 528 static char *rw_cancel[] = { MNTOPT_RO, NULL }; 529 530 static mntopt_t mntopts[] = { 531 /* 532 * option name cancel option default arg flags 533 */ 534 { MNTOPT_RW, rw_cancel, NULL, MO_DEFAULT, 535 (void *)MNTOPT_NOINTR }, 536 { MNTOPT_IGNORE, NULL, NULL, 0, 537 (void *)0 }, 538 }; 539 540 static mntopts_t fdfs_mntopts = { 541 sizeof (mntopts) / sizeof (mntopt_t), 542 mntopts 543 }; 544 545 static vfsdef_t vfw = { 546 VFSDEF_VERSION, 547 "fd", 548 fdinit, 549 VSW_HASPROTO | VSW_ZMOUNT, 550 &fdfs_mntopts 551 }; 552 553 static struct modlfs modlfs = { 554 &mod_fsops, 555 "filesystem for fd", 556 &vfw 557 }; 558 559 static struct modlinkage modlinkage = { 560 MODREV_1, 561 { &modlfs, NULL } 562 }; 563 564 int 565 _init(void) 566 { 567 return (mod_install(&modlinkage)); 568 } 569 570 int 571 _info(struct modinfo *modinfop) 572 { 573 return (mod_info(&modlinkage, modinfop)); 574 }