1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  * Copyright 2015 Joyent, Inc.  All rights reserved.
  25  */
  26 
  27 /*
  28  * This is the /dev (hence, the sdev_ prefix) filesystem.
  29  */
  30 
  31 #include <sys/types.h>
  32 #include <sys/param.h>
  33 #include <sys/sysmacros.h>
  34 #include <sys/systm.h>
  35 #include <sys/kmem.h>
  36 #include <sys/time.h>
  37 #include <sys/pathname.h>
  38 #include <sys/vfs.h>
  39 #include <sys/vfs_opreg.h>
  40 #include <sys/vnode.h>
  41 #include <sys/file.h>
  42 #include <sys/stat.h>
  43 #include <sys/uio.h>
  44 #include <sys/stat.h>
  45 #include <sys/errno.h>
  46 #include <sys/cmn_err.h>
  47 #include <sys/cred.h>
  48 #include <sys/statvfs.h>
  49 #include <sys/policy.h>
  50 #include <sys/mount.h>
  51 #include <sys/debug.h>
  52 #include <sys/modctl.h>
  53 #include <sys/mkdev.h>
  54 #include <fs/fs_subr.h>
  55 #include <sys/fs/sdev_impl.h>
  56 #include <sys/fs/snode.h>
  57 #include <sys/fs/dv_node.h>
  58 #include <sys/sunndi.h>
  59 #include <sys/mntent.h>
  60 #include <sys/disp.h>
  61 
  62 /*
  63  * /dev vfs operations.
  64  */
  65 
  66 /*
  67  * globals
  68  */
  69 struct sdev_data *sdev_origins; /* mount info for origins under /dev */
  70 kmutex_t sdev_lock; /* used for mount/unmount/rename synchronization */
  71 taskq_t *sdev_taskq = NULL;
  72 
  73 /*
  74  * static
  75  */
  76 static major_t devmajor;        /* the fictitious major we live on */
  77 static major_t devminor;        /* the fictitious minor of this instance */
  78 static struct sdev_data *sdev_mntinfo = NULL;   /* linked list of instances */
  79 
  80 /* LINTED E_STATIC_UNUSED */            /* useful for debugging */
  81 static struct vnode *sdev_stale_attrvp; /* stale root attrvp after remount */
  82 
  83 static int sdev_mount(struct vfs *, struct vnode *, struct mounta *,
  84     struct cred *);
  85 static int sdev_unmount(struct vfs *, int, struct cred *);
  86 static int sdev_root(struct vfs *, struct vnode **);
  87 static int sdev_statvfs(struct vfs *, struct statvfs64 *);
  88 static void sdev_insert_mntinfo(struct sdev_data *);
  89 static int devinit(int, char *);
  90 
  91 static vfsdef_t sdev_vfssw = {
  92         VFSDEF_VERSION,
  93         "dev",          /* type name string */
  94         devinit,        /* init routine */
  95         VSW_CANREMOUNT, /* flags */
  96         NULL            /* mount options table prototype */
  97 };
  98 
  99 
 100 /*
 101  * Module linkage information
 102  */
 103 static struct modlfs modlfs = {
 104         &mod_fsops, "/dev filesystem", &sdev_vfssw
 105 };
 106 
 107 static struct modlinkage modlinkage = {
 108         MODREV_1, (void *)&modlfs, NULL
 109 };
 110 
 111 int
 112 _init(void)
 113 {
 114         int e;
 115 
 116         mutex_init(&sdev_lock, NULL, MUTEX_DEFAULT, NULL);
 117         sdev_node_cache_init();
 118         sdev_devfsadm_lockinit();
 119         if ((e = mod_install(&modlinkage)) != 0) {
 120                 sdev_devfsadm_lockdestroy();
 121                 sdev_node_cache_fini();
 122                 mutex_destroy(&sdev_lock);
 123                 return (e);
 124         }
 125         return (0);
 126 }
 127 
 128 /*
 129  * dev module remained loaded for the global /dev instance
 130  */
 131 int
 132 _fini(void)
 133 {
 134         return (EBUSY);
 135 }
 136 
 137 int
 138 _info(struct modinfo *modinfop)
 139 {
 140         return (mod_info(&modlinkage, modinfop));
 141 }
 142 
 143 /*ARGSUSED*/
 144 static int
 145 devinit(int fstype, char *name)
 146 {
 147         static const fs_operation_def_t dev_vfsops_tbl[] = {
 148                 VFSNAME_MOUNT,          { .vfs_mount = sdev_mount },
 149                 VFSNAME_UNMOUNT,        { .vfs_unmount = sdev_unmount },
 150                 VFSNAME_ROOT,           { .vfs_root = sdev_root },
 151                 VFSNAME_STATVFS,        { .vfs_statvfs = sdev_statvfs },
 152                 NULL,                   NULL
 153         };
 154 
 155         int     error;
 156         extern major_t getudev(void);
 157 
 158         devtype = fstype;
 159 
 160         error = vfs_setfsops(fstype, dev_vfsops_tbl, NULL);
 161         if (error != 0) {
 162                 cmn_err(CE_WARN, "devinit: bad vfs ops tbl");
 163                 return (error);
 164         }
 165 
 166         error = vn_make_ops("dev", sdev_vnodeops_tbl, &sdev_vnodeops);
 167         if (error != 0) {
 168                 (void) vfs_freevfsops_by_type(fstype);
 169                 cmn_err(CE_WARN, "devinit: bad vnode ops tbl");
 170                 return (error);
 171         }
 172 
 173         if ((devmajor = getudev()) == (major_t)-1) {
 174                 cmn_err(CE_WARN, "%s: can't get unique dev", sdev_vfssw.name);
 175                 return (1);
 176         }
 177 
 178         /* initialize negative cache */
 179         sdev_ncache_init();
 180 
 181         return (0);
 182 }
 183 
 184 /*
 185  * Both mount point and backing store directory name are
 186  * passed in from userland
 187  */
 188 static int
 189 sdev_mount(struct vfs *vfsp, struct vnode *mvp, struct mounta *uap,
 190     struct cred *cr)
 191 {
 192         struct sdev_data *sdev_data;
 193         struct vnode *avp;
 194         struct sdev_node *dv;
 195         struct sdev_mountargs *args = NULL;
 196         int     error = 0;
 197         dev_t   devdev;
 198 
 199         /*
 200          * security check
 201          */
 202         if ((secpolicy_fs_mount(cr, mvp, vfsp) != 0) ||
 203             (secpolicy_sys_devices(cr) != 0))
 204                 return (EPERM);
 205 
 206         /*
 207          * Sanity check the mount point
 208          */
 209         if (mvp->v_type != VDIR)
 210                 return (ENOTDIR);
 211 
 212         /*
 213          * Sanity Check for overlay mount.
 214          */
 215         mutex_enter(&mvp->v_lock);
 216         if ((uap->flags & MS_OVERLAY) == 0 &&
 217             (uap->flags & MS_REMOUNT) == 0 &&
 218             (mvp->v_count > 1 || (mvp->v_flag & VROOT))) {
 219                 mutex_exit(&mvp->v_lock);
 220                 return (EBUSY);
 221         }
 222         mutex_exit(&mvp->v_lock);
 223 
 224         args = kmem_zalloc(sizeof (*args), KM_SLEEP);
 225 
 226         if ((uap->flags & MS_DATA) &&
 227             (uap->datalen != 0 && uap->dataptr != NULL)) {
 228                 /* copy in the arguments */
 229                 if (error = sdev_copyin_mountargs(uap, args))
 230                         goto cleanup;
 231         }
 232 
 233         /*
 234          * Sanity check the backing store
 235          */
 236         if (args->sdev_attrdir) {
 237                 /* user supplied an attribute store */
 238                 if (error = lookupname((char *)(uintptr_t)args->sdev_attrdir,
 239                     UIO_USERSPACE, FOLLOW, NULLVPP, &avp)) {
 240                         cmn_err(CE_NOTE, "/dev fs: lookup on attribute "
 241                             "directory %s failed",
 242                             (char *)(uintptr_t)args->sdev_attrdir);
 243                         goto cleanup;
 244                 }
 245 
 246                 if (avp->v_type != VDIR) {
 247                         VN_RELE(avp);
 248                         error = ENOTDIR;
 249                         goto cleanup;
 250                 }
 251         } else {
 252                 /* use mountp as the attribute store */
 253                 avp = mvp;
 254                 VN_HOLD(avp);
 255         }
 256 
 257         mutex_enter(&sdev_lock);
 258 
 259         /*
 260          * Check that the taskq has been created. We can't do this in our
 261          * _init or devinit because they run too early for ddi_taskq_create.
 262          */
 263         if (sdev_taskq == NULL) {
 264                 sdev_taskq = taskq_create("sdev", 1, minclsyspri, 1, 1, 0);
 265                 if (sdev_taskq == NULL) {
 266                         error = ENOMEM;
 267                         mutex_exit(&sdev_lock);
 268                         VN_RELE(avp);
 269                         goto cleanup;
 270                 }
 271         }
 272 
 273         /*
 274          * handling installation
 275          */
 276         if (uap->flags & MS_REMOUNT) {
 277                 sdev_data = (struct sdev_data *)vfsp->vfs_data;
 278                 ASSERT(sdev_data);
 279 
 280                 dv = sdev_data->sdev_root;
 281                 ASSERT(dv == dv->sdev_dotdot);
 282 
 283                 /*
 284                  * mark all existing sdev_nodes (except root node) stale
 285                  */
 286                 sdev_stale(dv);
 287 
 288                 /* Reset previous mountargs */
 289                 if (sdev_data->sdev_mountargs) {
 290                         kmem_free(sdev_data->sdev_mountargs,
 291                             sizeof (struct sdev_mountargs));
 292                 }
 293                 sdev_data->sdev_mountargs = args;
 294                 args = NULL;            /* so it won't be freed below */
 295 
 296                 sdev_stale_attrvp = dv->sdev_attrvp;
 297                 dv->sdev_attrvp = avp;
 298                 vfsp->vfs_mtime = ddi_get_time();
 299 
 300                 mutex_exit(&sdev_lock);
 301                 goto cleanup;                           /* we're done */
 302         }
 303 
 304         /*
 305          * Create and initialize the vfs-private data.
 306          */
 307         devdev = makedevice(devmajor, devminor);
 308         while (vfs_devismounted(devdev)) {
 309                 devminor = (devminor + 1) & MAXMIN32;
 310 
 311                 /*
 312                  * All the minor numbers are used up.
 313                  */
 314                 if (devminor == 0) {
 315                         mutex_exit(&sdev_lock);
 316                         VN_RELE(avp);
 317                         error = ENODEV;
 318                         goto cleanup;
 319                 }
 320 
 321                 devdev = makedevice(devmajor, devminor);
 322         }
 323 
 324         dv = sdev_mkroot(vfsp, devdev, mvp, avp, cr);
 325         sdev_data = kmem_zalloc(sizeof (struct sdev_data), KM_SLEEP);
 326         vfsp->vfs_dev = devdev;
 327         vfsp->vfs_data = (caddr_t)sdev_data;
 328         vfsp->vfs_fstype = devtype;
 329         vfsp->vfs_bsize = DEV_BSIZE;
 330         vfsp->vfs_mtime = ddi_get_time();
 331         vfs_make_fsid(&vfsp->vfs_fsid, vfsp->vfs_dev, devtype);
 332 
 333         ASSERT(dv == dv->sdev_dotdot);
 334 
 335         sdev_data->sdev_vfsp = vfsp;
 336         sdev_data->sdev_root = dv;
 337         sdev_data->sdev_mountargs = args;
 338 
 339         /* get acl flavor from attribute dir */
 340         if (VOP_PATHCONF(avp, _PC_ACL_ENABLED, &sdev_data->sdev_acl_flavor,
 341             kcred, NULL) != 0 || sdev_data->sdev_acl_flavor == 0)
 342                 sdev_data->sdev_acl_flavor = _ACL_ACLENT_ENABLED;
 343 
 344         args = NULL;                    /* so it won't be freed below */
 345         sdev_insert_mntinfo(sdev_data);
 346         mutex_exit(&sdev_lock);
 347 
 348         if (!SDEV_IS_GLOBAL(dv)) {
 349                 ASSERT(sdev_origins);
 350                 dv->sdev_flags &= ~SDEV_GLOBAL;
 351                 dv->sdev_origin = sdev_origins->sdev_root;
 352         } else {
 353                 sdev_ncache_setup();
 354                 rw_enter(&dv->sdev_contents, RW_WRITER);
 355                 sdev_filldir_dynamic(dv);
 356                 rw_exit(&dv->sdev_contents);
 357         }
 358 
 359         sdev_update_timestamps(dv->sdev_attrvp,
 360             cr, AT_CTIME|AT_MTIME|AT_ATIME);
 361 
 362 cleanup:
 363         if (args)
 364                 kmem_free(args, sizeof (*args));
 365         return (error);
 366 }
 367 
 368 /*
 369  * unmounting the non-global /dev instances, e.g. when deleting a Kevlar zone.
 370  */
 371 static int
 372 sdev_unmount(struct vfs *vfsp, int flag, struct cred *cr)
 373 {
 374         struct sdev_node *dv;
 375         int error;
 376         struct sdev_data *sdev_data, *prev, *next;
 377 
 378         /*
 379          * enforce the security policies
 380          */
 381         if ((secpolicy_fs_unmount(cr, vfsp) != 0) ||
 382             (secpolicy_sys_devices(cr) != 0))
 383                 return (EPERM);
 384 
 385         if (flag & MS_FORCE)
 386                 return (ENOTSUP);
 387 
 388         mutex_enter(&sdev_lock);
 389         dv = VFSTOSDEVFS(vfsp)->sdev_root;
 390         ASSERT(dv == dv->sdev_dotdot);
 391         if (SDEVTOV(dv)->v_count > 1) {
 392                 mutex_exit(&sdev_lock);
 393                 return (EBUSY);
 394         }
 395 
 396         /*
 397          * global instance remains mounted
 398          */
 399         if (SDEV_IS_GLOBAL(dv)) {
 400                 mutex_exit(&sdev_lock);
 401                 return (EBUSY);
 402         }
 403         mutex_exit(&sdev_lock);
 404 
 405         /* verify the v_count */
 406         if ((error = sdev_cleandir(dv, NULL, 0)) != 0) {
 407                 return (error);
 408         }
 409         ASSERT(SDEVTOV(dv)->v_count == 1);
 410 
 411         /* release hold on root node and destroy it */
 412         SDEV_RELE(dv);
 413         dv->sdev_nlink -= 2;
 414         sdev_nodedestroy(dv, 0);
 415 
 416         sdev_data = (struct sdev_data *)vfsp->vfs_data;
 417         vfsp->vfs_data = (caddr_t)0;
 418 
 419         /*
 420          * XXX separate it into sdev_delete_mntinfo() if useful
 421          */
 422         mutex_enter(&sdev_lock);
 423         prev = sdev_data->sdev_prev;
 424         next = sdev_data->sdev_next;
 425         if (prev)
 426                 prev->sdev_next = next;
 427         else
 428                 sdev_mntinfo = next;
 429         if (next)
 430                 next->sdev_prev = prev;
 431         mutex_exit(&sdev_lock);
 432 
 433         if (sdev_data->sdev_mountargs) {
 434                 kmem_free(sdev_data->sdev_mountargs,
 435                     sizeof (struct sdev_mountargs));
 436         }
 437         kmem_free(sdev_data, sizeof (struct sdev_data));
 438         return (0);
 439 }
 440 
 441 /*
 442  * return root vnode for given vfs
 443  */
 444 static int
 445 sdev_root(struct vfs *vfsp, struct vnode **vpp)
 446 {
 447         *vpp = SDEVTOV(VFSTOSDEVFS(vfsp)->sdev_root);
 448         VN_HOLD(*vpp);
 449         return (0);
 450 }
 451 
 452 /*
 453  * return 'generic superblock' information to userland.
 454  *
 455  * not much that we can usefully admit to here
 456  */
 457 static int
 458 sdev_statvfs(struct vfs *vfsp, struct statvfs64 *sbp)
 459 {
 460         dev32_t d32;
 461 
 462         bzero(sbp, sizeof (*sbp));
 463         sbp->f_frsize = sbp->f_bsize = vfsp->vfs_bsize;
 464         sbp->f_files = kmem_cache_stat(sdev_node_cache, "alloc");
 465 
 466         /* no illusions that free/avail files is relevant to dev */
 467         sbp->f_ffree = 0;
 468         sbp->f_favail = 0;
 469 
 470         /* no illusions that blocks are relevant to devfs */
 471         sbp->f_bfree = 0;
 472         sbp->f_bavail = 0;
 473         sbp->f_blocks = 0;
 474 
 475         (void) cmpldev(&d32, vfsp->vfs_dev);
 476         sbp->f_fsid = d32;
 477         (void) strcpy(sbp->f_basetype, vfssw[devtype].vsw_name);
 478         sbp->f_flag = vf_to_stf(vfsp->vfs_flag);
 479         sbp->f_namemax = MAXNAMELEN - 1;
 480         (void) strcpy(sbp->f_fstr, "dev");
 481 
 482         return (0);
 483 }
 484 
 485 static void
 486 sdev_insert_mntinfo(struct sdev_data *data)
 487 {
 488         ASSERT(mutex_owned(&sdev_lock));
 489         data->sdev_next = sdev_mntinfo;
 490         data->sdev_prev = NULL;
 491         if (sdev_mntinfo) {
 492                 sdev_mntinfo->sdev_prev = data;
 493         } else {
 494                 sdev_origins = data;
 495         }
 496         sdev_mntinfo = data;
 497 }
 498 
 499 struct sdev_data *
 500 sdev_find_mntinfo(char *mntpt)
 501 {
 502         struct sdev_data *mntinfo;
 503 
 504         mutex_enter(&sdev_lock);
 505         mntinfo = sdev_mntinfo;
 506         while (mntinfo) {
 507                 if (strcmp(mntpt, mntinfo->sdev_root->sdev_name) == 0) {
 508                         SDEVTOV(mntinfo->sdev_root)->v_count++;
 509                         break;
 510                 }
 511                 mntinfo = mntinfo->sdev_next;
 512         }
 513         mutex_exit(&sdev_lock);
 514         return (mntinfo);
 515 }
 516 
 517 void
 518 sdev_mntinfo_rele(struct sdev_data *mntinfo)
 519 {
 520         mutex_enter(&sdev_lock);
 521         SDEVTOV(mntinfo->sdev_root)->v_count--;
 522         mutex_exit(&sdev_lock);
 523 }