1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  * Copyright 2015 Joyent, Inc.
  25  */
  26 
  27 #include <sys/param.h>
  28 #include <sys/systm.h>
  29 #include <sys/errno.h>
  30 #include <sys/vnode.h>
  31 #include <sys/vfs.h>
  32 #include <sys/vfs_opreg.h>
  33 #include <sys/uio.h>
  34 #include <sys/cred.h>
  35 #include <sys/pathname.h>
  36 #include <sys/debug.h>
  37 #include <sys/fs/lofs_node.h>
  38 #include <sys/fs/lofs_info.h>
  39 #include <fs/fs_subr.h>
  40 #include <vm/as.h>
  41 #include <vm/seg.h>
  42 
  43 /*
  44  * These are the vnode ops routines which implement the vnode interface to
  45  * the looped-back file system.  These routines just take their parameters,
  46  * and then calling the appropriate real vnode routine(s) to do the work.
  47  */
  48 
  49 static int
  50 lo_open(vnode_t **vpp, int flag, struct cred *cr, caller_context_t *ct)
  51 {
  52         vnode_t *vp = *vpp;
  53         vnode_t *rvp;
  54         vnode_t *oldvp;
  55         int error;
  56 
  57 #ifdef LODEBUG
  58         lo_dprint(4, "lo_open vp %p cnt=%d realvp %p cnt=%d\n",
  59             vp, vp->v_count, realvp(vp), realvp(vp)->v_count);
  60 #endif
  61 
  62         oldvp = vp;
  63         vp = rvp = realvp(vp);
  64         /*
  65          * Need to hold new reference to vp since VOP_OPEN() may
  66          * decide to release it.
  67          */
  68         VN_HOLD(vp);
  69         error = VOP_OPEN(&rvp, flag, cr, ct);
  70 
  71         if (!error && rvp != vp) {
  72                 /*
  73                  * the FS which we called should have released the
  74                  * new reference on vp
  75                  */
  76                 *vpp = makelonode(rvp, vtoli(oldvp->v_vfsp), 0);
  77                 if ((*vpp)->v_type == VDIR) {
  78                         /*
  79                          * Copy over any looping flags to the new lnode.
  80                          */
  81                         (vtol(*vpp))->lo_looping |= (vtol(oldvp))->lo_looping;
  82                 }
  83                 if (IS_DEVVP(*vpp)) {
  84                         vnode_t *svp;
  85 
  86                         svp = specvp(*vpp, (*vpp)->v_rdev, (*vpp)->v_type, cr);
  87                         VN_RELE(*vpp);
  88                         if (svp == NULL)
  89                                 error = ENOSYS;
  90                         else
  91                                 *vpp = svp;
  92                 }
  93                 VN_RELE(oldvp);
  94         } else {
  95                 ASSERT(rvp->v_count > 1);
  96                 VN_RELE(rvp);
  97         }
  98 
  99         return (error);
 100 }
 101 
 102 static int
 103 lo_close(
 104         vnode_t *vp,
 105         int flag,
 106         int count,
 107         offset_t offset,
 108         struct cred *cr,
 109         caller_context_t *ct)
 110 {
 111 #ifdef LODEBUG
 112         lo_dprint(4, "lo_close vp %p realvp %p\n", vp, realvp(vp));
 113 #endif
 114         vp = realvp(vp);
 115         return (VOP_CLOSE(vp, flag, count, offset, cr, ct));
 116 }
 117 
 118 static int
 119 lo_read(vnode_t *vp, struct uio *uiop, int ioflag, struct cred *cr,
 120         caller_context_t *ct)
 121 {
 122 #ifdef LODEBUG
 123         lo_dprint(4, "lo_read vp %p realvp %p\n", vp, realvp(vp));
 124 #endif
 125         vp = realvp(vp);
 126         return (VOP_READ(vp, uiop, ioflag, cr, ct));
 127 }
 128 
 129 static int
 130 lo_write(vnode_t *vp, struct uio *uiop, int ioflag, struct cred *cr,
 131         caller_context_t *ct)
 132 {
 133 #ifdef LODEBUG
 134         lo_dprint(4, "lo_write vp %p realvp %p\n", vp, realvp(vp));
 135 #endif
 136         vp = realvp(vp);
 137         return (VOP_WRITE(vp, uiop, ioflag, cr, ct));
 138 }
 139 
 140 static int
 141 lo_ioctl(
 142         vnode_t *vp,
 143         int cmd,
 144         intptr_t arg,
 145         int flag,
 146         struct cred *cr,
 147         int *rvalp,
 148         caller_context_t *ct)
 149 {
 150 #ifdef LODEBUG
 151         lo_dprint(4, "lo_ioctl vp %p realvp %p\n", vp, realvp(vp));
 152 #endif
 153         vp = realvp(vp);
 154         return (VOP_IOCTL(vp, cmd, arg, flag, cr, rvalp, ct));
 155 }
 156 
 157 static int
 158 lo_setfl(vnode_t *vp, int oflags, int nflags, cred_t *cr, caller_context_t *ct)
 159 {
 160         vp = realvp(vp);
 161         return (VOP_SETFL(vp, oflags, nflags, cr, ct));
 162 }
 163 
 164 static int
 165 lo_getattr(
 166         vnode_t *vp,
 167         struct vattr *vap,
 168         int flags,
 169         struct cred *cr,
 170         caller_context_t *ct)
 171 {
 172         int error;
 173 
 174 #ifdef LODEBUG
 175         lo_dprint(4, "lo_getattr vp %p realvp %p\n", vp, realvp(vp));
 176 #endif
 177         if (error = VOP_GETATTR(realvp(vp), vap, flags, cr, ct))
 178                 return (error);
 179 
 180         return (0);
 181 }
 182 
 183 static int
 184 lo_setattr(
 185         vnode_t *vp,
 186         struct vattr *vap,
 187         int flags,
 188         struct cred *cr,
 189         caller_context_t *ct)
 190 {
 191 #ifdef LODEBUG
 192         lo_dprint(4, "lo_setattr vp %p realvp %p\n", vp, realvp(vp));
 193 #endif
 194         vp = realvp(vp);
 195         return (VOP_SETATTR(vp, vap, flags, cr, ct));
 196 }
 197 
 198 static int
 199 lo_access(
 200         vnode_t *vp,
 201         int mode,
 202         int flags,
 203         struct cred *cr,
 204         caller_context_t *ct)
 205 {
 206 #ifdef LODEBUG
 207         lo_dprint(4, "lo_access vp %p realvp %p\n", vp, realvp(vp));
 208 #endif
 209         if (mode & VWRITE) {
 210                 if (vp->v_type == VREG && vn_is_readonly(vp))
 211                         return (EROFS);
 212         }
 213         vp = realvp(vp);
 214         return (VOP_ACCESS(vp, mode, flags, cr, ct));
 215 }
 216 
 217 static int
 218 lo_fsync(vnode_t *vp, int syncflag, struct cred *cr, caller_context_t *ct)
 219 {
 220 #ifdef LODEBUG
 221         lo_dprint(4, "lo_fsync vp %p realvp %p\n", vp, realvp(vp));
 222 #endif
 223         vp = realvp(vp);
 224         return (VOP_FSYNC(vp, syncflag, cr, ct));
 225 }
 226 
 227 /*ARGSUSED*/
 228 static void
 229 lo_inactive(vnode_t *vp, struct cred *cr, caller_context_t *ct)
 230 {
 231 #ifdef LODEBUG
 232         lo_dprint(4, "lo_inactive %p, realvp %p\n", vp, realvp(vp));
 233 #endif
 234         freelonode(vtol(vp));
 235 }
 236 
 237 /* ARGSUSED */
 238 static int
 239 lo_fid(vnode_t *vp, struct fid *fidp, caller_context_t *ct)
 240 {
 241 #ifdef LODEBUG
 242         lo_dprint(4, "lo_fid %p, realvp %p\n", vp, realvp(vp));
 243 #endif
 244         vp = realvp(vp);
 245         return (VOP_FID(vp, fidp, ct));
 246 }
 247 
 248 /*
 249  * Given a vnode of lofs type, lookup nm name and
 250  * return a shadow vnode (of lofs type) of the
 251  * real vnode found.
 252  *
 253  * Due to the nature of lofs, there is a potential
 254  * looping in path traversal.
 255  *
 256  * starting from the mount point of an lofs;
 257  * a loop is defined to be a traversal path
 258  * where the mount point or the real vnode of
 259  * the root of this lofs is encountered twice.
 260  * Once at the start of traversal and second
 261  * when the looping is found.
 262  *
 263  * When a loop is encountered, a shadow of the
 264  * covered vnode is returned to stop the looping.
 265  *
 266  * This normally works, but with the advent of
 267  * the new automounter, returning the shadow of the
 268  * covered vnode (autonode, in this case) does not
 269  * stop the loop.  Because further lookup on this
 270  * lonode will cause the autonode to call lo_lookup()
 271  * on the lonode covering it.
 272  *
 273  * example "/net/jurassic/net/jurassic" is a loop.
 274  * returning the shadow of the autonode corresponding to
 275  * "/net/jurassic/net/jurassic" will not terminate the
 276  * loop.   To solve this problem we allow the loop to go
 277  * through one more level component lookup.  Whichever
 278  * directory is then looked up in "/net/jurassic/net/jurassic"
 279  * the vnode returned is the vnode covered by the autonode
 280  * "net" and this will terminate the loop.
 281  *
 282  * Lookup for dot dot has to be dealt with separately.
 283  * It will be nice to have a "one size fits all" kind
 284  * of solution, so that we don't have so many ifs statement
 285  * in the lo_lookup() to handle dotdot.  But, since
 286  * there are so many special cases to handle different
 287  * kinds looping above, we need special codes to handle
 288  * dotdot lookup as well.
 289  */
 290 static int
 291 lo_lookup(
 292         vnode_t *dvp,
 293         char *nm,
 294         vnode_t **vpp,
 295         struct pathname *pnp,
 296         int flags,
 297         vnode_t *rdir,
 298         struct cred *cr,
 299         caller_context_t *ct,
 300         int *direntflags,
 301         pathname_t *realpnp)
 302 {
 303         vnode_t *vp = NULL, *tvp = NULL, *nonlovp;
 304         int error, is_indirectloop;
 305         vnode_t *realdvp = realvp(dvp);
 306         struct loinfo *li = vtoli(dvp->v_vfsp);
 307         int looping = 0;
 308         int autoloop = 0;
 309         int doingdotdot = 0;
 310         int nosub = 0;
 311         int mkflag = 0;
 312 
 313         /*
 314          * If name is empty and no XATTR flags are set, then return
 315          * dvp (empty name == lookup ".").  If an XATTR flag is set
 316          * then we need to call VOP_LOOKUP to get the xattr dir.
 317          */
 318         if (nm[0] == '\0' && ! (flags & (CREATE_XATTR_DIR|LOOKUP_XATTR))) {
 319                 VN_HOLD(dvp);
 320                 *vpp = dvp;
 321                 return (0);
 322         }
 323 
 324         if (nm[0] == '.' && nm[1] == '.' && nm[2] == '\0') {
 325                 doingdotdot++;
 326                 /*
 327                  * Handle ".." out of mounted filesystem
 328                  */
 329                 while ((realdvp->v_flag & VROOT) && realdvp != rootdir) {
 330                         realdvp = realdvp->v_vfsp->vfs_vnodecovered;
 331                         ASSERT(realdvp != NULL);
 332                 }
 333         }
 334 
 335         *vpp = NULL;    /* default(error) case */
 336 
 337         /*
 338          * Do the normal lookup
 339          */
 340         if (error = VOP_LOOKUP(realdvp, nm, &vp, pnp, flags, rdir, cr,
 341             ct, direntflags, realpnp)) {
 342                 vp = NULL;
 343                 goto out;
 344         }
 345 
 346         /*
 347          * We do this check here to avoid returning a stale file handle to the
 348          * caller.
 349          */
 350         if (nm[0] == '.' && nm[1] == '\0') {
 351                 ASSERT(vp == realdvp);
 352                 VN_HOLD(dvp);
 353                 VN_RELE(vp);
 354                 *vpp = dvp;
 355                 return (0);
 356         }
 357 
 358         if (doingdotdot) {
 359                 if ((vtol(dvp))->lo_looping & LO_LOOPING) {
 360                         vfs_t *vfsp;
 361 
 362                         error = vn_vfsrlock_wait(realdvp);
 363                         if (error)
 364                                 goto out;
 365                         vfsp = vn_mountedvfs(realdvp);
 366                         /*
 367                          * In the standard case if the looping flag is set and
 368                          * performing dotdot we would be returning from a
 369                          * covered vnode, implying vfsp could not be null. The
 370                          * exceptions being if we have looping and overlay
 371                          * mounts or looping and covered file systems.
 372                          */
 373                         if (vfsp == NULL) {
 374                                 /*
 375                                  * Overlay mount or covered file system,
 376                                  * so just make the shadow node.
 377                                  */
 378                                 vn_vfsunlock(realdvp);
 379                                 *vpp = makelonode(vp, li, 0);
 380                                 (vtol(*vpp))->lo_looping |= LO_LOOPING;
 381                                 return (0);
 382                         }
 383                         /*
 384                          * When looping get the actual found vnode
 385                          * instead of the vnode covered.
 386                          * Here we have to hold the lock for realdvp
 387                          * since an unmount during the traversal to the
 388                          * root vnode would turn *vfsp into garbage
 389                          * which would be fatal.
 390                          */
 391                         error = VFS_ROOT(vfsp, &tvp);
 392                         vn_vfsunlock(realdvp);
 393 
 394                         if (error)
 395                                 goto out;
 396 
 397                         if ((tvp == li->li_rootvp) && (vp == realvp(tvp))) {
 398                                 /*
 399                                  * we're back at the real vnode
 400                                  * of the rootvp
 401                                  *
 402                                  * return the rootvp
 403                                  * Ex: /mnt/mnt/..
 404                                  * where / has been lofs-mounted
 405                                  * onto /mnt.  Return the lofs
 406                                  * node mounted at /mnt.
 407                                  */
 408                                 *vpp = tvp;
 409                                 VN_RELE(vp);
 410                                 return (0);
 411                         } else {
 412                                 /*
 413                                  * We are returning from a covered
 414                                  * node whose vfs_mountedhere is
 415                                  * not pointing to vfs of the current
 416                                  * root vnode.
 417                                  * This is a condn where in we
 418                                  * returned a covered node say Zc
 419                                  * but Zc is not the cover of current
 420                                  * root.
 421                                  * i.e.., if X is the root vnode
 422                                  * lookup(Zc,"..") is taking us to
 423                                  * X.
 424                                  * Ex: /net/X/net/X/Y
 425                                  *
 426                                  * If LO_AUTOLOOP (autofs/lofs looping detected)
 427                                  * has been set then we are encountering the
 428                                  * cover of Y (Y being any directory vnode
 429                                  * under /net/X/net/X/).
 430                                  * When performing a dotdot set the
 431                                  * returned vp to the vnode covered
 432                                  * by the mounted lofs, ie /net/X/net/X
 433                                  */
 434                                 VN_RELE(tvp);
 435                                 if ((vtol(dvp))->lo_looping & LO_AUTOLOOP) {
 436                                         VN_RELE(vp);
 437                                         vp = li->li_rootvp;
 438                                         vp = vp->v_vfsp->vfs_vnodecovered;
 439                                         VN_HOLD(vp);
 440                                         *vpp = makelonode(vp, li, 0);
 441                                         (vtol(*vpp))->lo_looping |= LO_LOOPING;
 442                                         return (0);
 443                                 }
 444                         }
 445                 } else {
 446                         /*
 447                          * No frills just make the shadow node.
 448                          */
 449                         *vpp = makelonode(vp, li, 0);
 450                         return (0);
 451                 }
 452         }
 453 
 454         nosub = (vtoli(dvp->v_vfsp)->li_flag & LO_NOSUB);
 455 
 456         /*
 457          * If this vnode is mounted on, then we
 458          * traverse to the vnode which is the root of
 459          * the mounted file system.
 460          */
 461         if (!nosub && (error = traverse(&vp)))
 462                 goto out;
 463 
 464         /*
 465          * Make a lnode for the real vnode.
 466          */
 467         if (vp->v_type != VDIR || nosub) {
 468                 *vpp = makelonode(vp, li, 0);
 469                 if (IS_DEVVP(*vpp)) {
 470                         vnode_t *svp;
 471 
 472                         svp = specvp(*vpp, (*vpp)->v_rdev, (*vpp)->v_type, cr);
 473                         VN_RELE(*vpp);
 474                         if (svp == NULL)
 475                                 error = ENOSYS;
 476                         else
 477                                 *vpp = svp;
 478                 }
 479                 return (error);
 480         }
 481 
 482         /*
 483          * if the found vnode (vp) is not of type lofs
 484          * then we're just going to make a shadow of that
 485          * vp and get out.
 486          *
 487          * If the found vnode (vp) is of lofs type, and
 488          * we're not doing dotdot, check if we are
 489          * looping.
 490          */
 491         if (!doingdotdot && vfs_matchops(vp->v_vfsp, lo_vfsops)) {
 492                 /*
 493                  * Check if we're looping, i.e.
 494                  * vp equals the root vp of the lofs, directly
 495                  * or indirectly, return the covered node.
 496                  */
 497 
 498                 if (!((vtol(dvp))->lo_looping & LO_LOOPING)) {
 499                         if (vp == li->li_rootvp) {
 500                                 /*
 501                                  * Direct looping condn.
 502                                  * Ex:- X is / mounted directory so lookup of
 503                                  * /X/X is a direct looping condn.
 504                                  */
 505                                 tvp = vp;
 506                                 vp = vp->v_vfsp->vfs_vnodecovered;
 507                                 VN_HOLD(vp);
 508                                 VN_RELE(tvp);
 509                                 looping++;
 510                         } else {
 511                                 /*
 512                                  * Indirect looping can be defined as
 513                                  * real lookup returning rootvp of the current
 514                                  * tree in any level of recursion.
 515                                  *
 516                                  * This check is useful if there are multiple
 517                                  * levels of lofs indirections. Suppose vnode X
 518                                  * in the current lookup has as its real vnode
 519                                  * another lofs node. Y = realvp(X) Y should be
 520                                  * a lofs node for the check to continue or Y
 521                                  * is not the rootvp of X.
 522                                  * Ex:- say X and Y are two vnodes
 523                                  * say real(Y) is X and real(X) is Z
 524                                  * parent vnode for X and Y is Z
 525                                  * lookup(Y,"path") say we are looking for Y
 526                                  * again under Y and we have to return Yc.
 527                                  * but the lookup of Y under Y doesnot return
 528                                  * Y the root vnode again here is why.
 529                                  * 1. lookup(Y,"path of Y") will go to
 530                                  * 2. lookup(real(Y),"path of Y") and then to
 531                                  * 3. lookup(real(X),"path of Y").
 532                                  * and now what lookup level 1 sees is the
 533                                  * outcome of 2 but the vnode Y is due to
 534                                  * lookup(Z,"path of Y") so we have to skip
 535                                  * intermediate levels to find if in any level
 536                                  * there is a looping.
 537                                  */
 538                                 is_indirectloop = 0;
 539                                 nonlovp = vp;
 540                                 while (
 541                                     vfs_matchops(nonlovp->v_vfsp, lo_vfsops) &&
 542                                     !(is_indirectloop)) {
 543                                         if (li->li_rootvp  == nonlovp) {
 544                                                 is_indirectloop++;
 545                                                 break;
 546                                         }
 547                                         nonlovp = realvp(nonlovp);
 548                                 }
 549 
 550                                 if (is_indirectloop) {
 551                                         VN_RELE(vp);
 552                                         vp = nonlovp;
 553                                         vp = vp->v_vfsp->vfs_vnodecovered;
 554                                         VN_HOLD(vp);
 555                                         looping++;
 556                                 }
 557                         }
 558                 } else {
 559                         /*
 560                          * come here only because of the interaction between
 561                          * the autofs and lofs.
 562                          *
 563                          * Lookup of "/net/X/net/X" will return a shadow of
 564                          * an autonode X_a which we call X_l.
 565                          *
 566                          * Lookup of anything under X_l, will trigger a call to
 567                          * auto_lookup(X_a,nm) which will eventually call
 568                          * lo_lookup(X_lr,nm) where X_lr is the root vnode of
 569                          * the current lofs.
 570                          *
 571                          * We come here only when we are called with X_l as dvp
 572                          * and look for something underneath.
 573                          *
 574                          * Now that an autofs/lofs looping condition has been
 575                          * identified any directory vnode contained within
 576                          * dvp will be set to the vnode covered by the
 577                          * mounted autofs. Thus all directories within dvp
 578                          * will appear empty hence teminating the looping.
 579                          * The LO_AUTOLOOP flag is set on the returned lonode
 580                          * to indicate the termination of the autofs/lofs
 581                          * looping. This is required for the correct behaviour
 582                          * when performing a dotdot.
 583                          */
 584                         realdvp = realvp(dvp);
 585                         while (vfs_matchops(realdvp->v_vfsp, lo_vfsops)) {
 586                                 realdvp = realvp(realdvp);
 587                         }
 588 
 589                         error = VFS_ROOT(realdvp->v_vfsp, &tvp);
 590                         if (error)
 591                                 goto out;
 592                         /*
 593                          * tvp now contains the rootvp of the vfs of the
 594                          * real vnode of dvp. The directory vnode vp is set
 595                          * to the covered vnode to terminate looping. No
 596                          * distinction is made between any vp as all directory
 597                          * vnodes contained in dvp are returned as the covered
 598                          * vnode.
 599                          */
 600                         VN_RELE(vp);
 601                         vp = tvp;       /* possibly is an autonode */
 602 
 603                         /*
 604                          * Need to find the covered vnode
 605                          */
 606                         if (vp->v_vfsp->vfs_vnodecovered == NULL) {
 607                                 /*
 608                                  * We don't have a covered vnode so this isn't
 609                                  * an autonode. To find the autonode simply
 610                                  * find the vnode covered by the lofs rootvp.
 611                                  */
 612                                 vp = li->li_rootvp;
 613                                 vp = vp->v_vfsp->vfs_vnodecovered;
 614                                 VN_RELE(tvp);
 615                                 error = VFS_ROOT(vp->v_vfsp, &tvp);
 616                                 if (error)
 617                                         goto out;
 618                                 vp = tvp;       /* now this is an autonode */
 619                                 if (vp->v_vfsp->vfs_vnodecovered == NULL) {
 620                                         /*
 621                                          * Still can't find a covered vnode.
 622                                          * Fail the lookup, or we'd loop.
 623                                          */
 624                                         error = ENOENT;
 625                                         goto out;
 626                                 }
 627                         }
 628                         vp = vp->v_vfsp->vfs_vnodecovered;
 629                         VN_HOLD(vp);
 630                         VN_RELE(tvp);
 631                         /*
 632                          * Force the creation of a new lnode even if the hash
 633                          * table contains a lnode that references this vnode.
 634                          */
 635                         mkflag = LOF_FORCE;
 636                         autoloop++;
 637                 }
 638         }
 639         *vpp = makelonode(vp, li, mkflag);
 640 
 641         if ((looping) ||
 642             (((vtol(dvp))->lo_looping & LO_LOOPING) && !doingdotdot)) {
 643                 (vtol(*vpp))->lo_looping |= LO_LOOPING;
 644         }
 645 
 646         if (autoloop) {
 647                 (vtol(*vpp))->lo_looping |= LO_AUTOLOOP;
 648         }
 649 
 650 out:
 651         if (error != 0 && vp != NULL)
 652                 VN_RELE(vp);
 653 #ifdef LODEBUG
 654         lo_dprint(4,
 655         "lo_lookup dvp %x realdvp %x nm '%s' newvp %x real vp %x error %d\n",
 656             dvp, realvp(dvp), nm, *vpp, vp, error);
 657 #endif
 658         return (error);
 659 }
 660 
 661 /*ARGSUSED*/
 662 static int
 663 lo_create(
 664         vnode_t *dvp,
 665         char *nm,
 666         struct vattr *va,
 667         enum vcexcl exclusive,
 668         int mode,
 669         vnode_t **vpp,
 670         struct cred *cr,
 671         int flag,
 672         caller_context_t *ct,
 673         vsecattr_t *vsecp)
 674 {
 675         int error;
 676         vnode_t *vp = NULL;
 677 
 678 #ifdef LODEBUG
 679         lo_dprint(4, "lo_create vp %p realvp %p\n", dvp, realvp(dvp));
 680 #endif
 681         if (*nm == '\0') {
 682                 ASSERT(vpp && dvp == *vpp);
 683                 vp = realvp(*vpp);
 684         }
 685 
 686         error = VOP_CREATE(realvp(dvp), nm, va, exclusive, mode, &vp, cr, flag,
 687             ct, vsecp);
 688         if (!error) {
 689                 *vpp = makelonode(vp, vtoli(dvp->v_vfsp), 0);
 690                 if (IS_DEVVP(*vpp)) {
 691                         vnode_t *svp;
 692 
 693                         svp = specvp(*vpp, (*vpp)->v_rdev, (*vpp)->v_type, cr);
 694                         VN_RELE(*vpp);
 695                         if (svp == NULL)
 696                                 error = ENOSYS;
 697                         else
 698                                 *vpp = svp;
 699                 }
 700         } else if (error == ENOSYS && exclusive == NONEXCL &&
 701             dvp == vtoli(dvp->v_vfsp)->li_rootvp &&
 702             realvp(dvp)->v_type == VREG) {
 703                 /*
 704                  * We have a single regular file lofs mounted, thus the file is
 705                  * the root vnode (the directory vp is the file vp). Some
 706                  * underlying file systems (e.g. tmpfs or ufs) properly handle
 707                  * this style of create but at least zfs won't support create
 708                  * this way (see zfs_fvnodeops_template which has fs_nosys for
 709                  * the vop_create entry because zfs_create doesn't work
 710                  * properly for this case).
 711                  */
 712                 if ((error = VOP_ACCESS(dvp, mode, 0, cr, NULL)) == 0) {
 713                         /*
 714                          * Since we already know the vnode for the existing
 715                          * file we can handle create as a no-op, as expected,
 716                          * truncating the file if necessary.
 717                          */
 718                         struct vattr vattr;
 719 
 720                         vattr.va_size = 0;
 721                         vattr.va_mask = AT_SIZE;
 722 
 723                         if ((va->va_mask & AT_SIZE) != 0 && va->va_size == 0 &&
 724                             VOP_SETATTR(dvp, &vattr, 0, CRED(), NULL) != 0)
 725                                 return (error);
 726 
 727                         /*
 728                          * vn_createat will do a vn_rele on the file if it is
 729                          * pre-existing, which it is in the case of a single
 730                          * file mounted as the root. Thus, when we eventually
 731                          * close the file the count will already be 1 so the
 732                          * vnode would be freed. To prevent that, we add an
 733                          * extra hold here.
 734                          */
 735                         VN_HOLD(dvp);
 736                         *vpp = dvp;
 737                         error = 0;
 738                 }
 739         }
 740 
 741         return (error);
 742 }
 743 
 744 static int
 745 lo_remove(
 746         vnode_t *dvp,
 747         char *nm,
 748         struct cred *cr,
 749         caller_context_t *ct,
 750         int flags)
 751 {
 752 #ifdef LODEBUG
 753         lo_dprint(4, "lo_remove vp %p realvp %p\n", dvp, realvp(dvp));
 754 #endif
 755         dvp = realvp(dvp);
 756         return (VOP_REMOVE(dvp, nm, cr, ct, flags));
 757 }
 758 
 759 static int
 760 lo_link(
 761         vnode_t *tdvp,
 762         vnode_t *vp,
 763         char *tnm,
 764         struct cred *cr,
 765         caller_context_t *ct,
 766         int flags)
 767 {
 768         vnode_t *realvp;
 769 
 770 #ifdef LODEBUG
 771         lo_dprint(4, "lo_link vp %p realvp %p\n", vp, realvp(vp));
 772 #endif
 773 
 774         /*
 775          * The source and destination vnodes may be in different lofs
 776          * filesystems sharing the same underlying filesystem, so we need to
 777          * make sure that the filesystem containing the source vnode is not
 778          * mounted read-only (vn_link() has already checked the target vnode).
 779          *
 780          * In a situation such as:
 781          *
 782          * /data        - regular filesystem
 783          * /foo         - lofs mount of /data/foo
 784          * /bar         - read-only lofs mount of /data/bar
 785          *
 786          * This disallows a link from /bar/somefile to /foo/somefile,
 787          * which would otherwise allow changes to somefile on the read-only
 788          * mounted /bar.
 789          */
 790 
 791         if (vn_is_readonly(vp)) {
 792                 return (EROFS);
 793         }
 794         while (vn_matchops(vp, lo_vnodeops)) {
 795                 vp = realvp(vp);
 796         }
 797 
 798         /*
 799          * In the case where the source vnode is on another stacking
 800          * filesystem (such as specfs), the loop above will
 801          * terminate before finding the true underlying vnode.
 802          *
 803          * We use VOP_REALVP here to continue the search.
 804          */
 805         if (VOP_REALVP(vp, &realvp, ct) == 0)
 806                 vp = realvp;
 807 
 808         while (vn_matchops(tdvp, lo_vnodeops)) {
 809                 tdvp = realvp(tdvp);
 810         }
 811         if (vp->v_vfsp != tdvp->v_vfsp)
 812                 return (EXDEV);
 813         return (VOP_LINK(tdvp, vp, tnm, cr, ct, flags));
 814 }
 815 
 816 static int
 817 lo_rename(
 818         vnode_t *odvp,
 819         char *onm,
 820         vnode_t *ndvp,
 821         char *nnm,
 822         struct cred *cr,
 823         caller_context_t *ct,
 824         int flags)
 825 {
 826         vnode_t *tnvp;
 827 
 828 #ifdef LODEBUG
 829         lo_dprint(4, "lo_rename vp %p realvp %p\n", odvp, realvp(odvp));
 830 #endif
 831         /*
 832          * If we are coming from a loop back mounted fs, that has been
 833          * mounted in the same filesystem as where we want to move to,
 834          * and that filesystem is read/write, but the lofs filesystem is
 835          * read only, we don't want to allow a rename of the file. The
 836          * vn_rename code checks to be sure the target is read/write already
 837          * so that is not necessary here. However, consider the following
 838          * example:
 839          *              / - regular root fs
 840          *              /foo - directory in root
 841          *              /foo/bar - file in foo directory(in root fs)
 842          *              /baz - directory in root
 843          *              mount -F lofs -o ro /foo /baz - all still in root
 844          *                      directory
 845          * The fact that we mounted /foo on /baz read only should stop us
 846          * from renaming the file /foo/bar /bar, but it doesn't since
 847          * / is read/write. We are still renaming here since we are still
 848          * in the same filesystem, it is just that we do not check to see
 849          * if the filesystem we are coming from in this case is read only.
 850          */
 851         if (odvp->v_vfsp->vfs_flag & VFS_RDONLY)
 852                 return (EROFS);
 853         /*
 854          * We need to make sure we're not trying to remove a mount point for a
 855          * filesystem mounted on top of lofs, which only we know about.
 856          */
 857         if (vn_matchops(ndvp, lo_vnodeops))     /* Not our problem. */
 858                 goto rename;
 859 
 860         /*
 861          * XXXci - Once case-insensitive behavior is implemented, it should
 862          * be added here.
 863          */
 864         if (VOP_LOOKUP(ndvp, nnm, &tnvp, NULL, 0, NULL, cr,
 865             ct, NULL, NULL) != 0)
 866                 goto rename;
 867         if (tnvp->v_type != VDIR) {
 868                 VN_RELE(tnvp);
 869                 goto rename;
 870         }
 871         if (vn_mountedvfs(tnvp)) {
 872                 VN_RELE(tnvp);
 873                 return (EBUSY);
 874         }
 875         VN_RELE(tnvp);
 876 rename:
 877         /*
 878          * Since the case we're dealing with above can happen at any layer in
 879          * the stack of lofs filesystems, we need to recurse down the stack,
 880          * checking to see if there are any instances of a filesystem mounted on
 881          * top of lofs. In order to keep on using the lofs version of
 882          * VOP_RENAME(), we make sure that while the target directory is of type
 883          * lofs, the source directory (the one used for getting the fs-specific
 884          * version of VOP_RENAME()) is also of type lofs.
 885          */
 886         if (vn_matchops(ndvp, lo_vnodeops)) {
 887                 ndvp = realvp(ndvp);    /* Check the next layer */
 888         } else {
 889                 /*
 890                  * We can go fast here
 891                  */
 892                 while (vn_matchops(odvp, lo_vnodeops)) {
 893                         odvp = realvp(odvp);
 894                 }
 895                 if (odvp->v_vfsp != ndvp->v_vfsp)
 896                         return (EXDEV);
 897         }
 898         return (VOP_RENAME(odvp, onm, ndvp, nnm, cr, ct, flags));
 899 }
 900 
 901 static int
 902 lo_mkdir(
 903         vnode_t *dvp,
 904         char *nm,
 905         struct vattr *va,
 906         vnode_t **vpp,
 907         struct cred *cr,
 908         caller_context_t *ct,
 909         int flags,
 910         vsecattr_t *vsecp)
 911 {
 912         int error;
 913 
 914 #ifdef LODEBUG
 915         lo_dprint(4, "lo_mkdir vp %p realvp %p\n", dvp, realvp(dvp));
 916 #endif
 917         error = VOP_MKDIR(realvp(dvp), nm, va, vpp, cr, ct, flags, vsecp);
 918         if (!error)
 919                 *vpp = makelonode(*vpp, vtoli(dvp->v_vfsp), 0);
 920         return (error);
 921 }
 922 
 923 static int
 924 lo_realvp(vnode_t *vp, vnode_t **vpp, caller_context_t *ct)
 925 {
 926 #ifdef LODEBUG
 927         lo_dprint(4, "lo_realvp %p\n", vp);
 928 #endif
 929         while (vn_matchops(vp, lo_vnodeops))
 930                 vp = realvp(vp);
 931 
 932         if (VOP_REALVP(vp, vpp, ct) != 0)
 933                 *vpp = vp;
 934         return (0);
 935 }
 936 
 937 static int
 938 lo_rmdir(
 939         vnode_t *dvp,
 940         char *nm,
 941         vnode_t *cdir,
 942         struct cred *cr,
 943         caller_context_t *ct,
 944         int flags)
 945 {
 946         vnode_t *rvp = cdir;
 947 
 948 #ifdef LODEBUG
 949         lo_dprint(4, "lo_rmdir vp %p realvp %p\n", dvp, realvp(dvp));
 950 #endif
 951         /* if cdir is lofs vnode ptr get its real vnode ptr */
 952         if (vn_matchops(dvp, vn_getops(rvp)))
 953                 (void) lo_realvp(cdir, &rvp, ct);
 954         dvp = realvp(dvp);
 955         return (VOP_RMDIR(dvp, nm, rvp, cr, ct, flags));
 956 }
 957 
 958 static int
 959 lo_symlink(
 960         vnode_t *dvp,
 961         char *lnm,
 962         struct vattr *tva,
 963         char *tnm,
 964         struct cred *cr,
 965         caller_context_t *ct,
 966         int flags)
 967 {
 968 #ifdef LODEBUG
 969         lo_dprint(4, "lo_symlink vp %p realvp %p\n", dvp, realvp(dvp));
 970 #endif
 971         dvp = realvp(dvp);
 972         return (VOP_SYMLINK(dvp, lnm, tva, tnm, cr, ct, flags));
 973 }
 974 
 975 static int
 976 lo_readlink(
 977         vnode_t *vp,
 978         struct uio *uiop,
 979         struct cred *cr,
 980         caller_context_t *ct)
 981 {
 982         vp = realvp(vp);
 983         return (VOP_READLINK(vp, uiop, cr, ct));
 984 }
 985 
 986 static int
 987 lo_readdir(
 988         vnode_t *vp,
 989         struct uio *uiop,
 990         struct cred *cr,
 991         int *eofp,
 992         caller_context_t *ct,
 993         int flags)
 994 {
 995 #ifdef LODEBUG
 996         lo_dprint(4, "lo_readdir vp %p realvp %p\n", vp, realvp(vp));
 997 #endif
 998         vp = realvp(vp);
 999         return (VOP_READDIR(vp, uiop, cr, eofp, ct, flags));
1000 }
1001 
1002 static int
1003 lo_rwlock(vnode_t *vp, int write_lock, caller_context_t *ct)
1004 {
1005         vp = realvp(vp);
1006         return (VOP_RWLOCK(vp, write_lock, ct));
1007 }
1008 
1009 static void
1010 lo_rwunlock(vnode_t *vp, int write_lock, caller_context_t *ct)
1011 {
1012         vp = realvp(vp);
1013         VOP_RWUNLOCK(vp, write_lock, ct);
1014 }
1015 
1016 static int
1017 lo_seek(vnode_t *vp, offset_t ooff, offset_t *noffp, caller_context_t *ct)
1018 {
1019         vp = realvp(vp);
1020         return (VOP_SEEK(vp, ooff, noffp, ct));
1021 }
1022 
1023 static int
1024 lo_cmp(vnode_t *vp1, vnode_t *vp2, caller_context_t *ct)
1025 {
1026         while (vn_matchops(vp1, lo_vnodeops))
1027                 vp1 = realvp(vp1);
1028         while (vn_matchops(vp2, lo_vnodeops))
1029                 vp2 = realvp(vp2);
1030         return (VOP_CMP(vp1, vp2, ct));
1031 }
1032 
1033 static int
1034 lo_frlock(
1035         vnode_t *vp,
1036         int cmd,
1037         struct flock64 *bfp,
1038         int flag,
1039         offset_t offset,
1040         struct flk_callback *flk_cbp,
1041         cred_t *cr,
1042         caller_context_t *ct)
1043 {
1044         vp = realvp(vp);
1045         return (VOP_FRLOCK(vp, cmd, bfp, flag, offset, flk_cbp, cr, ct));
1046 }
1047 
1048 static int
1049 lo_space(
1050         vnode_t *vp,
1051         int cmd,
1052         struct flock64 *bfp,
1053         int flag,
1054         offset_t offset,
1055         struct cred *cr,
1056         caller_context_t *ct)
1057 {
1058         vp = realvp(vp);
1059         return (VOP_SPACE(vp, cmd, bfp, flag, offset, cr, ct));
1060 }
1061 
1062 static int
1063 lo_getpage(
1064         vnode_t *vp,
1065         offset_t off,
1066         size_t len,
1067         uint_t *prot,
1068         struct page *parr[],
1069         size_t psz,
1070         struct seg *seg,
1071         caddr_t addr,
1072         enum seg_rw rw,
1073         struct cred *cr,
1074         caller_context_t *ct)
1075 {
1076         vp = realvp(vp);
1077         return (VOP_GETPAGE(vp, off, len, prot, parr, psz, seg, addr, rw, cr,
1078             ct));
1079 }
1080 
1081 static int
1082 lo_putpage(
1083         vnode_t *vp,
1084         offset_t off,
1085         size_t len,
1086         int flags,
1087         struct cred *cr,
1088         caller_context_t *ct)
1089 {
1090         vp = realvp(vp);
1091         return (VOP_PUTPAGE(vp, off, len, flags, cr, ct));
1092 }
1093 
1094 static int
1095 lo_map(
1096         vnode_t *vp,
1097         offset_t off,
1098         struct as *as,
1099         caddr_t *addrp,
1100         size_t len,
1101         uchar_t prot,
1102         uchar_t maxprot,
1103         uint_t flags,
1104         struct cred *cr,
1105         caller_context_t *ct)
1106 {
1107         vp = realvp(vp);
1108         return (VOP_MAP(vp, off, as, addrp, len, prot, maxprot, flags, cr, ct));
1109 }
1110 
1111 static int
1112 lo_addmap(
1113         vnode_t *vp,
1114         offset_t off,
1115         struct as *as,
1116         caddr_t addr,
1117         size_t len,
1118         uchar_t prot,
1119         uchar_t maxprot,
1120         uint_t flags,
1121         struct cred *cr,
1122         caller_context_t *ct)
1123 {
1124         vp = realvp(vp);
1125         return (VOP_ADDMAP(vp, off, as, addr, len, prot, maxprot, flags, cr,
1126             ct));
1127 }
1128 
1129 static int
1130 lo_delmap(
1131         vnode_t *vp,
1132         offset_t off,
1133         struct as *as,
1134         caddr_t addr,
1135         size_t len,
1136         uint_t prot,
1137         uint_t maxprot,
1138         uint_t flags,
1139         struct cred *cr,
1140         caller_context_t *ct)
1141 {
1142         vp = realvp(vp);
1143         return (VOP_DELMAP(vp, off, as, addr, len, prot, maxprot, flags, cr,
1144             ct));
1145 }
1146 
1147 static int
1148 lo_poll(
1149         vnode_t *vp,
1150         short events,
1151         int anyyet,
1152         short *reventsp,
1153         struct pollhead **phpp,
1154         caller_context_t *ct)
1155 {
1156         vp = realvp(vp);
1157         return (VOP_POLL(vp, events, anyyet, reventsp, phpp, ct));
1158 }
1159 
1160 static int
1161 lo_dump(vnode_t *vp, caddr_t addr, offset_t bn, offset_t count,
1162     caller_context_t *ct)
1163 {
1164         vp = realvp(vp);
1165         return (VOP_DUMP(vp, addr, bn, count, ct));
1166 }
1167 
1168 static int
1169 lo_pathconf(
1170         vnode_t *vp,
1171         int cmd,
1172         ulong_t *valp,
1173         struct cred *cr,
1174         caller_context_t *ct)
1175 {
1176         vp = realvp(vp);
1177         return (VOP_PATHCONF(vp, cmd, valp, cr, ct));
1178 }
1179 
1180 static int
1181 lo_pageio(
1182         vnode_t *vp,
1183         struct page *pp,
1184         u_offset_t io_off,
1185         size_t io_len,
1186         int flags,
1187         cred_t *cr,
1188         caller_context_t *ct)
1189 {
1190         vp = realvp(vp);
1191         return (VOP_PAGEIO(vp, pp, io_off, io_len, flags, cr, ct));
1192 }
1193 
1194 static void
1195 lo_dispose(
1196         vnode_t *vp,
1197         page_t *pp,
1198         int fl,
1199         int dn,
1200         cred_t *cr,
1201         caller_context_t *ct)
1202 {
1203         vp = realvp(vp);
1204         if (vp != NULL && !VN_ISKAS(vp))
1205                 VOP_DISPOSE(vp, pp, fl, dn, cr, ct);
1206 }
1207 
1208 static int
1209 lo_setsecattr(
1210         vnode_t *vp,
1211         vsecattr_t *secattr,
1212         int flags,
1213         struct cred *cr,
1214         caller_context_t *ct)
1215 {
1216         if (vn_is_readonly(vp))
1217                 return (EROFS);
1218         vp = realvp(vp);
1219         return (VOP_SETSECATTR(vp, secattr, flags, cr, ct));
1220 }
1221 
1222 static int
1223 lo_getsecattr(
1224         vnode_t *vp,
1225         vsecattr_t *secattr,
1226         int flags,
1227         struct cred *cr,
1228         caller_context_t *ct)
1229 {
1230         vp = realvp(vp);
1231         return (VOP_GETSECATTR(vp, secattr, flags, cr, ct));
1232 }
1233 
1234 static int
1235 lo_shrlock(
1236         vnode_t *vp,
1237         int cmd,
1238         struct shrlock *shr,
1239         int flag,
1240         cred_t *cr,
1241         caller_context_t *ct)
1242 {
1243         vp = realvp(vp);
1244         return (VOP_SHRLOCK(vp, cmd, shr, flag, cr, ct));
1245 }
1246 
1247 /*
1248  * Loopback vnode operations vector.
1249  */
1250 
1251 struct vnodeops *lo_vnodeops;
1252 
1253 const fs_operation_def_t lo_vnodeops_template[] = {
1254         { VOPNAME_OPEN,         { .vop_open = lo_open } },
1255         { VOPNAME_CLOSE,        { .vop_close = lo_close } },
1256         { VOPNAME_READ,         { .vop_read = lo_read } },
1257         { VOPNAME_WRITE,        { .vop_write = lo_write } },
1258         { VOPNAME_IOCTL,        { .vop_ioctl = lo_ioctl } },
1259         { VOPNAME_SETFL,        { .vop_setfl = lo_setfl } },
1260         { VOPNAME_GETATTR,      { .vop_getattr = lo_getattr } },
1261         { VOPNAME_SETATTR,      { .vop_setattr = lo_setattr } },
1262         { VOPNAME_ACCESS,       { .vop_access = lo_access } },
1263         { VOPNAME_LOOKUP,       { .vop_lookup = lo_lookup } },
1264         { VOPNAME_CREATE,       { .vop_create = lo_create } },
1265         { VOPNAME_REMOVE,       { .vop_remove = lo_remove } },
1266         { VOPNAME_LINK,         { .vop_link = lo_link } },
1267         { VOPNAME_RENAME,       { .vop_rename = lo_rename } },
1268         { VOPNAME_MKDIR,        { .vop_mkdir = lo_mkdir } },
1269         { VOPNAME_RMDIR,        { .vop_rmdir = lo_rmdir } },
1270         { VOPNAME_READDIR,      { .vop_readdir = lo_readdir } },
1271         { VOPNAME_SYMLINK,      { .vop_symlink = lo_symlink } },
1272         { VOPNAME_READLINK,     { .vop_readlink = lo_readlink } },
1273         { VOPNAME_FSYNC,        { .vop_fsync = lo_fsync } },
1274         { VOPNAME_INACTIVE,     { .vop_inactive = lo_inactive } },
1275         { VOPNAME_FID,          { .vop_fid = lo_fid } },
1276         { VOPNAME_RWLOCK,       { .vop_rwlock = lo_rwlock } },
1277         { VOPNAME_RWUNLOCK,     { .vop_rwunlock = lo_rwunlock } },
1278         { VOPNAME_SEEK,         { .vop_seek = lo_seek } },
1279         { VOPNAME_CMP,          { .vop_cmp = lo_cmp } },
1280         { VOPNAME_FRLOCK,       { .vop_frlock = lo_frlock } },
1281         { VOPNAME_SPACE,        { .vop_space = lo_space } },
1282         { VOPNAME_REALVP,       { .vop_realvp = lo_realvp } },
1283         { VOPNAME_GETPAGE,      { .vop_getpage = lo_getpage } },
1284         { VOPNAME_PUTPAGE,      { .vop_putpage = lo_putpage } },
1285         { VOPNAME_MAP,          { .vop_map = lo_map } },
1286         { VOPNAME_ADDMAP,       { .vop_addmap = lo_addmap } },
1287         { VOPNAME_DELMAP,       { .vop_delmap = lo_delmap } },
1288         { VOPNAME_POLL,         { .vop_poll = lo_poll } },
1289         { VOPNAME_DUMP,         { .vop_dump = lo_dump } },
1290         { VOPNAME_DUMPCTL,      { .error = fs_error } },        /* XXX - why? */
1291         { VOPNAME_PATHCONF,     { .vop_pathconf = lo_pathconf } },
1292         { VOPNAME_PAGEIO,       { .vop_pageio = lo_pageio } },
1293         { VOPNAME_DISPOSE,      { .vop_dispose = lo_dispose } },
1294         { VOPNAME_SETSECATTR,   { .vop_setsecattr = lo_setsecattr } },
1295         { VOPNAME_GETSECATTR,   { .vop_getsecattr = lo_getsecattr } },
1296         { VOPNAME_SHRLOCK,      { .vop_shrlock = lo_shrlock } },
1297         { NULL,                 { NULL } }
1298 };