1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  */
  25 /*
  26  * Copyright (c) 2012 by Delphix. All rights reserved.
  27  */
  28 
  29 /*      Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
  30 /*        All Rights Reserved   */
  31 
  32 /*
  33  * University Copyright- Copyright (c) 1982, 1986, 1988
  34  * The Regents of the University of California
  35  * All Rights Reserved
  36  *
  37  * University Acknowledgment- Portions of this document are derived from
  38  * software developed by the University of California, Berkeley, and its
  39  * contributors.
  40  */
  41 
  42 
  43 #include <sys/types.h>
  44 #include <sys/t_lock.h>
  45 #include <sys/param.h>
  46 #include <sys/systm.h>
  47 #include <sys/buf.h>
  48 #include <sys/conf.h>
  49 #include <sys/cred.h>
  50 #include <sys/kmem.h>
  51 #include <sys/sysmacros.h>
  52 #include <sys/vfs.h>
  53 #include <sys/vfs_opreg.h>
  54 #include <sys/vnode.h>
  55 #include <sys/fs/snode.h>
  56 #include <sys/fs/fifonode.h>
  57 #include <sys/debug.h>
  58 #include <sys/errno.h>
  59 #include <sys/time.h>
  60 #include <sys/file.h>
  61 #include <sys/open.h>
  62 #include <sys/user.h>
  63 #include <sys/termios.h>
  64 #include <sys/stream.h>
  65 #include <sys/strsubr.h>
  66 #include <sys/autoconf.h>
  67 #include <sys/esunddi.h>
  68 #include <sys/flock.h>
  69 #include <sys/modctl.h>
  70 
  71 struct vfs spec_vfs;
  72 static dev_t specdev;
  73 struct kmem_cache *snode_cache;
  74 int spec_debug = 0;
  75 
  76 static struct snode *sfind(dev_t, vtype_t, struct vnode *);
  77 static struct vnode *get_cvp(dev_t, vtype_t, struct snode *, int *);
  78 static void sinsert(struct snode *);
  79 
  80 struct vnode *
  81 specvp_devfs(
  82         struct vnode    *realvp,
  83         dev_t           dev,
  84         vtype_t         vtyp,
  85         struct cred     *cr,
  86         dev_info_t      *dip)
  87 {
  88         struct vnode    *vp;
  89 
  90         ASSERT(realvp && dip);
  91         vp = specvp(realvp, dev, vtyp, cr);
  92         ASSERT(vp);
  93 
  94         /* associate a dip hold with the common snode's s_dip pointer */
  95         spec_assoc_vp_with_devi(vp, dip);
  96         return (vp);
  97 }
  98 
  99 /*
 100  * Return a shadow special vnode for the given dev.
 101  * If no snode exists for this dev create one and put it
 102  * in a table hashed by <dev, realvp>.  If the snode for
 103  * this dev is already in the table return it (ref count is
 104  * incremented by sfind).  The snode will be flushed from the
 105  * table when spec_inactive calls sdelete.
 106  *
 107  * The fsid is inherited from the real vnode so that clones
 108  * can be found.
 109  *
 110  */
 111 struct vnode *
 112 specvp(
 113         struct vnode    *vp,
 114         dev_t           dev,
 115         vtype_t         type,
 116         struct cred     *cr)
 117 {
 118         struct snode *sp;
 119         struct snode *nsp;
 120         struct snode *csp;
 121         struct vnode *svp;
 122         struct vattr va;
 123         int     rc;
 124         int     used_csp = 0;           /* Did we use pre-allocated csp */
 125 
 126         if (vp == NULL)
 127                 return (NULL);
 128         if (vp->v_type == VFIFO)
 129                 return (fifovp(vp, cr));
 130 
 131         ASSERT(vp->v_type == type);
 132         ASSERT(vp->v_rdev == dev);
 133 
 134         /*
 135          * Pre-allocate snodes before holding any locks in case we block
 136          */
 137         nsp = kmem_cache_alloc(snode_cache, KM_SLEEP);
 138         csp = kmem_cache_alloc(snode_cache, KM_SLEEP);
 139 
 140         /*
 141          * Get the time attributes outside of the stable lock since
 142          * this operation may block. Unfortunately, it may not have
 143          * been required if the snode is in the cache.
 144          */
 145         va.va_mask = AT_FSID | AT_TIMES;
 146         rc = VOP_GETATTR(vp, &va, 0, cr, NULL);     /* XXX may block! */
 147 
 148         mutex_enter(&stable_lock);
 149         if ((sp = sfind(dev, type, vp)) == NULL) {
 150                 struct vnode *cvp;
 151 
 152                 sp = nsp;       /* Use pre-allocated snode */
 153                 svp = STOV(sp);
 154 
 155                 sp->s_realvp = vp;
 156                 VN_HOLD(vp);
 157                 sp->s_commonvp       = NULL;
 158                 sp->s_dev    = dev;
 159                 sp->s_dip    = NULL;
 160                 sp->s_nextr  = NULL;
 161                 sp->s_list   = NULL;
 162                 sp->s_plcy   = NULL;
 163                 sp->s_size   = 0;
 164                 sp->s_flag   = 0;
 165                 if (rc == 0) {
 166                         /*
 167                          * Set times in snode to those in the vnode.
 168                          */
 169                         sp->s_fsid = va.va_fsid;
 170                         sp->s_atime = va.va_atime.tv_sec;
 171                         sp->s_mtime = va.va_mtime.tv_sec;
 172                         sp->s_ctime = va.va_ctime.tv_sec;
 173                 } else {
 174                         sp->s_fsid = specdev;
 175                         sp->s_atime = 0;
 176                         sp->s_mtime = 0;
 177                         sp->s_ctime = 0;
 178                 }
 179                 sp->s_count  = 0;
 180                 sp->s_mapcnt = 0;
 181 
 182                 vn_reinit(svp);
 183                 svp->v_flag  = (vp->v_flag & VROOT);
 184                 svp->v_vfsp  = vp->v_vfsp;
 185                 VFS_HOLD(svp->v_vfsp);
 186                 svp->v_type  = type;
 187                 svp->v_rdev  = dev;
 188                 (void) vn_copypath(vp, svp);
 189                 if (type == VBLK || type == VCHR) {
 190                         cvp = get_cvp(dev, type, csp, &used_csp);
 191                         svp->v_stream = cvp->v_stream;
 192 
 193                         sp->s_commonvp = cvp;
 194                 }
 195                 vn_exists(svp);
 196                 sinsert(sp);
 197                 mutex_exit(&stable_lock);
 198                 if (used_csp == 0) {
 199                         /* Didn't use pre-allocated snode so free it */
 200                         kmem_cache_free(snode_cache, csp);
 201                 }
 202         } else {
 203                 mutex_exit(&stable_lock);
 204                 /* free unused snode memory */
 205                 kmem_cache_free(snode_cache, nsp);
 206                 kmem_cache_free(snode_cache, csp);
 207         }
 208         return (STOV(sp));
 209 }
 210 
 211 /*
 212  * Return a special vnode for the given dev; no vnode is supplied
 213  * for it to shadow.  Always create a new snode and put it in the
 214  * table hashed by <dev, NULL>.  The snode will be flushed from the
 215  * table when spec_inactive() calls sdelete().  The association of
 216  * this node with a attached instance of hardware is not made until
 217  * spec_open time.
 218  *
 219  * N.B. Assumes caller takes on responsibility of making sure no one
 220  * else is creating a snode for (dev, type) at this time.
 221  */
 222 struct vnode *
 223 makespecvp(dev_t dev, vtype_t type)
 224 {
 225         struct snode *sp;
 226         struct vnode *svp, *cvp;
 227         time_t now;
 228 
 229         sp = kmem_cache_alloc(snode_cache, KM_SLEEP);
 230         svp = STOV(sp);
 231         cvp = commonvp(dev, type);
 232         now = gethrestime_sec();
 233 
 234         sp->s_realvp = NULL;
 235         sp->s_commonvp       = cvp;
 236         sp->s_dev    = dev;
 237         sp->s_dip    = NULL;
 238         sp->s_nextr  = NULL;
 239         sp->s_list   = NULL;
 240         sp->s_plcy   = NULL;
 241         sp->s_size   = 0;
 242         sp->s_flag   = 0;
 243         sp->s_fsid   = specdev;
 244         sp->s_atime  = now;
 245         sp->s_mtime  = now;
 246         sp->s_ctime  = now;
 247         sp->s_count  = 0;
 248         sp->s_mapcnt = 0;
 249 
 250         vn_reinit(svp);
 251         svp->v_vfsp  = &spec_vfs;
 252         svp->v_stream        = cvp->v_stream;
 253         svp->v_type  = type;
 254         svp->v_rdev  = dev;
 255 
 256         vn_exists(svp);
 257         mutex_enter(&stable_lock);
 258         sinsert(sp);
 259         mutex_exit(&stable_lock);
 260 
 261         return (svp);
 262 }
 263 
 264 
 265 /*
 266  * This function is called from spec_assoc_vp_with_devi(). That function
 267  * associates a "new" dip with a common snode, releasing (any) old dip
 268  * in the process. This function (spec_assoc_fence()) looks at the "new dip"
 269  * and determines whether the snode should be fenced of or not. As the table
 270  * below indicates, the value of old-dip is a don't care for all cases.
 271  *
 272  * old-dip      new-dip         common-snode
 273  * =========================================
 274  * Don't care   NULL            unfence
 275  * Don't care   retired         fence
 276  * Don't care   not-retired     unfence
 277  *
 278  * Since old-dip value is a "don't care", it is not passed into this function.
 279  */
 280 static void
 281 spec_assoc_fence(dev_info_t *ndip, vnode_t *vp)
 282 {
 283         int             fence;
 284         struct snode    *csp;
 285 
 286         ASSERT(vp);
 287         ASSERT(vn_matchops(vp, spec_getvnodeops()));
 288 
 289         fence = 0;
 290         if (ndip != NULL) {
 291                 mutex_enter(&DEVI(ndip)->devi_lock);
 292                 if (DEVI(ndip)->devi_flags & DEVI_RETIRED)
 293                         fence = 1;
 294                 mutex_exit(&DEVI(ndip)->devi_lock);
 295         }
 296 
 297         csp = VTOCS(vp);
 298         ASSERT(csp);
 299 
 300         /* SFENCED flag only set on common snode */
 301         mutex_enter(&csp->s_lock);
 302         if (fence)
 303                 csp->s_flag |= SFENCED;
 304         else
 305                 csp->s_flag &= ~SFENCED;
 306         mutex_exit(&csp->s_lock);
 307 
 308         FENDBG((CE_NOTE, "%sfenced common snode (%p) for new dip=%p",
 309             fence ? "" : "un", (void *)csp, (void *)ndip));
 310 }
 311 
 312 /*
 313  * Associate the common snode with a devinfo node.  This is called from:
 314  *
 315  *   1) specvp_devfs to associate a specfs node with the dip attached
 316  *      by devfs.
 317  *
 318  *   2) spec_open after path reconstruction and attach.
 319  *
 320  *   3) From dacf processing to associate a makespecvp node with
 321  *      the dip that dacf postattach processing is being performed on.
 322  *      This association is made prior to open to avoid recursion issues.
 323  *
 324  *   4) From ddi_assoc_queue_with_devi to change vnode association as part of
 325  *      DL_ATTACH/DL_DETACH processing (SDIPSET already set).  The call
 326  *      from ddi_assoc_queue_with_devi may specify a NULL dip.
 327  *
 328  * We put an extra hold on the devinfo node passed in as we establish it as
 329  * the new s_dip pointer.  Any hold associated with the prior s_dip pointer
 330  * is released. The new hold will stay active until another call to
 331  * spec_assoc_vp_with_devi or until the common snode is destroyed by
 332  * spec_inactive after the last VN_RELE of the common node. This devinfo hold
 333  * transfers across a clone open except in the clone_dev case, where the clone
 334  * driver is no longer required after open.
 335  *
 336  * When SDIPSET is set and s_dip is NULL, the vnode has an association with
 337  * the driver even though there is currently no association with a specific
 338  * hardware instance.
 339  */
 340 void
 341 spec_assoc_vp_with_devi(struct vnode *vp, dev_info_t *dip)
 342 {
 343         struct snode    *csp;
 344         dev_info_t      *olddip;
 345 
 346         ASSERT(vp);
 347 
 348         /*
 349          * Don't establish a NULL association for a vnode associated with the
 350          * clone driver.  The qassociate(, -1) call from a streams driver's
 351          * open implementation to indicate support for qassociate has the
 352          * side-effect of this type of spec_assoc_vp_with_devi call. This
 353          * call should not change the the association of the pre-clone
 354          * vnode associated with the clone driver, the post-clone newdev
 355          * association will be established later by spec_clone().
 356          */
 357         if ((dip == NULL) && (getmajor(vp->v_rdev) == clone_major))
 358                 return;
 359 
 360         /* hold the new */
 361         if (dip)
 362                 e_ddi_hold_devi(dip);
 363 
 364         csp = VTOS(VTOS(vp)->s_commonvp);
 365         mutex_enter(&csp->s_lock);
 366         olddip = csp->s_dip;
 367         csp->s_dip = dip;
 368         csp->s_flag |= SDIPSET;
 369 
 370         /* If association changes then invalidate cached size */
 371         if (olddip != dip)
 372                 csp->s_flag &= ~SSIZEVALID;
 373         mutex_exit(&csp->s_lock);
 374 
 375         spec_assoc_fence(dip, vp);
 376 
 377         /* release the old */
 378         if (olddip)
 379                 ddi_release_devi(olddip);
 380 }
 381 
 382 /*
 383  * Return the held dip associated with the specified snode.
 384  */
 385 dev_info_t *
 386 spec_hold_devi_by_vp(struct vnode *vp)
 387 {
 388         struct snode    *csp;
 389         dev_info_t      *dip;
 390 
 391         ASSERT(vn_matchops(vp, spec_getvnodeops()));
 392 
 393         csp = VTOS(VTOS(vp)->s_commonvp);
 394         dip = csp->s_dip;
 395         if (dip)
 396                 e_ddi_hold_devi(dip);
 397         return (dip);
 398 }
 399 
 400 /*
 401  * Find a special vnode that refers to the given device
 402  * of the given type.  Never return a "common" vnode.
 403  * Return NULL if a special vnode does not exist.
 404  * HOLD the vnode before returning it.
 405  */
 406 struct vnode *
 407 specfind(dev_t dev, vtype_t type)
 408 {
 409         struct snode *st;
 410         struct vnode *nvp;
 411 
 412         mutex_enter(&stable_lock);
 413         st = stable[STABLEHASH(dev)];
 414         while (st != NULL) {
 415                 if (st->s_dev == dev) {
 416                         nvp = STOV(st);
 417                         if (nvp->v_type == type && st->s_commonvp != nvp) {
 418                                 VN_HOLD(nvp);
 419                                 mutex_exit(&stable_lock);
 420                                 return (nvp);
 421                         }
 422                 }
 423                 st = st->s_next;
 424         }
 425         mutex_exit(&stable_lock);
 426         return (NULL);
 427 }
 428 
 429 /*
 430  * Loop through the snode cache looking for snodes referencing dip.
 431  *
 432  * This function determines if a devinfo node is "BUSY" from the perspective
 433  * of having an active vnode associated with the device, which represents a
 434  * dependency on the device's services.  This function is needed because a
 435  * devinfo node can have a non-zero devi_ref and still NOT be "BUSY" when,
 436  * for instance, the framework is manipulating the node (has an open
 437  * ndi_hold_devi).
 438  *
 439  * Returns:
 440  *      DEVI_REFERENCED         - if dip is referenced
 441  *      DEVI_NOT_REFERENCED     - if dip is not referenced
 442  */
 443 int
 444 devi_stillreferenced(dev_info_t *dip)
 445 {
 446         struct snode    *sp;
 447         int             i;
 448 
 449         /* if no hold then there can't be an snode with s_dip == dip */
 450         if (e_ddi_devi_holdcnt(dip) == 0)
 451                 return (DEVI_NOT_REFERENCED);
 452 
 453         mutex_enter(&stable_lock);
 454         for (i = 0; i < STABLESIZE; i++) {
 455                 for (sp = stable[i]; sp != NULL; sp = sp->s_next) {
 456                         if (sp->s_dip == dip) {
 457                                 mutex_exit(&stable_lock);
 458                                 return (DEVI_REFERENCED);
 459                         }
 460                 }
 461         }
 462         mutex_exit(&stable_lock);
 463         return (DEVI_NOT_REFERENCED);
 464 }
 465 
 466 /*
 467  * Given an snode, returns the open count and the dip
 468  * associated with that snode
 469  * Assumes the caller holds the appropriate locks
 470  * to prevent snode and/or dip from going away.
 471  * Returns:
 472  *      -1      No associated dip
 473  *      >= 0 Number of opens.
 474  */
 475 int
 476 spec_devi_open_count(struct snode *sp, dev_info_t **dipp)
 477 {
 478         dev_info_t *dip;
 479         uint_t count;
 480         struct vnode *vp;
 481 
 482         ASSERT(sp);
 483         ASSERT(dipp);
 484 
 485         vp = STOV(sp);
 486 
 487         *dipp = NULL;
 488 
 489         /*
 490          * We are only interested in common snodes. Only common snodes
 491          * get their s_count fields bumped up on opens.
 492          */
 493         if (sp->s_commonvp != vp || (dip = sp->s_dip) == NULL)
 494                 return (-1);
 495 
 496         mutex_enter(&sp->s_lock);
 497         count = sp->s_count + sp->s_mapcnt;
 498         if (sp->s_flag & SLOCKED)
 499                 count++;
 500         mutex_exit(&sp->s_lock);
 501 
 502         *dipp = dip;
 503 
 504         return (count);
 505 }
 506 
 507 /*
 508  * Given a device vnode, return the common
 509  * vnode associated with it.
 510  */
 511 struct vnode *
 512 common_specvp(struct vnode *vp)
 513 {
 514         struct snode *sp;
 515 
 516         if ((vp->v_type != VBLK) && (vp->v_type != VCHR) ||
 517             !vn_matchops(vp, spec_getvnodeops()))
 518                 return (vp);
 519         sp = VTOS(vp);
 520         return (sp->s_commonvp);
 521 }
 522 
 523 /*
 524  * Returns a special vnode for the given dev.  The vnode is the
 525  * one which is "common" to all the snodes which represent the
 526  * same device.
 527  * Similar to commonvp() but doesn't acquire the stable_lock, and
 528  * may use a pre-allocated snode provided by caller.
 529  */
 530 static struct vnode *
 531 get_cvp(
 532         dev_t           dev,
 533         vtype_t         type,
 534         struct snode    *nsp,           /* pre-allocated snode */
 535         int             *used_nsp)      /* flag indicating if we use nsp */
 536 {
 537         struct snode *sp;
 538         struct vnode *svp;
 539 
 540         ASSERT(MUTEX_HELD(&stable_lock));
 541         if ((sp = sfind(dev, type, NULL)) == NULL) {
 542                 sp = nsp;               /* Use pre-allocated snode */
 543                 *used_nsp = 1;          /* return value */
 544                 svp = STOV(sp);
 545 
 546                 sp->s_realvp = NULL;
 547                 sp->s_commonvp       = svp;          /* points to itself */
 548                 sp->s_dev    = dev;
 549                 sp->s_dip    = NULL;
 550                 sp->s_nextr  = NULL;
 551                 sp->s_list   = NULL;
 552                 sp->s_plcy   = NULL;
 553                 sp->s_size   = UNKNOWN_SIZE;
 554                 sp->s_flag   = 0;
 555                 sp->s_fsid   = specdev;
 556                 sp->s_atime  = 0;
 557                 sp->s_mtime  = 0;
 558                 sp->s_ctime  = 0;
 559                 sp->s_count  = 0;
 560                 sp->s_mapcnt = 0;
 561 
 562                 vn_reinit(svp);
 563                 svp->v_vfsp  = &spec_vfs;
 564                 svp->v_type  = type;
 565                 svp->v_rdev  = dev;
 566                 vn_exists(svp);
 567                 sinsert(sp);
 568         } else
 569                 *used_nsp = 0;
 570         return (STOV(sp));
 571 }
 572 
 573 /*
 574  * Returns a special vnode for the given dev.  The vnode is the
 575  * one which is "common" to all the snodes which represent the
 576  * same device.  For use ONLY by SPECFS.
 577  */
 578 struct vnode *
 579 commonvp(dev_t dev, vtype_t type)
 580 {
 581         struct snode *sp, *nsp;
 582         struct vnode *svp;
 583 
 584         /* Pre-allocate snode in case we might block */
 585         nsp = kmem_cache_alloc(snode_cache, KM_SLEEP);
 586 
 587         mutex_enter(&stable_lock);
 588         if ((sp = sfind(dev, type, NULL)) == NULL) {
 589                 sp = nsp;               /* Use pre-alloced snode */
 590                 svp = STOV(sp);
 591 
 592                 sp->s_realvp = NULL;
 593                 sp->s_commonvp       = svp;          /* points to itself */
 594                 sp->s_dev    = dev;
 595                 sp->s_dip    = NULL;
 596                 sp->s_nextr  = NULL;
 597                 sp->s_list   = NULL;
 598                 sp->s_plcy   = NULL;
 599                 sp->s_size   = UNKNOWN_SIZE;
 600                 sp->s_flag   = 0;
 601                 sp->s_fsid   = specdev;
 602                 sp->s_atime  = 0;
 603                 sp->s_mtime  = 0;
 604                 sp->s_ctime  = 0;
 605                 sp->s_count  = 0;
 606                 sp->s_mapcnt = 0;
 607 
 608                 vn_reinit(svp);
 609                 svp->v_vfsp  = &spec_vfs;
 610                 svp->v_type  = type;
 611                 svp->v_rdev  = dev;
 612                 vn_exists(svp);
 613                 sinsert(sp);
 614                 mutex_exit(&stable_lock);
 615         } else {
 616                 mutex_exit(&stable_lock);
 617                 /* Didn't need the pre-allocated snode */
 618                 kmem_cache_free(snode_cache, nsp);
 619         }
 620         return (STOV(sp));
 621 }
 622 
 623 /*
 624  * Snode lookup stuff.
 625  * These routines maintain a table of snodes hashed by dev so
 626  * that the snode for an dev can be found if it already exists.
 627  */
 628 struct snode *stable[STABLESIZE];
 629 int             stablesz = STABLESIZE;
 630 kmutex_t        stable_lock;
 631 
 632 /*
 633  * Put a snode in the table.
 634  */
 635 static void
 636 sinsert(struct snode *sp)
 637 {
 638         ASSERT(MUTEX_HELD(&stable_lock));
 639         sp->s_next = stable[STABLEHASH(sp->s_dev)];
 640         stable[STABLEHASH(sp->s_dev)] = sp;
 641 }
 642 
 643 /*
 644  * Remove an snode from the hash table.
 645  * The realvp is not released here because spec_inactive() still
 646  * needs it to do a spec_fsync().
 647  */
 648 void
 649 sdelete(struct snode *sp)
 650 {
 651         struct snode *st;
 652         struct snode *stprev = NULL;
 653 
 654         ASSERT(MUTEX_HELD(&stable_lock));
 655         st = stable[STABLEHASH(sp->s_dev)];
 656         while (st != NULL) {
 657                 if (st == sp) {
 658                         if (stprev == NULL)
 659                                 stable[STABLEHASH(sp->s_dev)] = st->s_next;
 660                         else
 661                                 stprev->s_next = st->s_next;
 662                         break;
 663                 }
 664                 stprev = st;
 665                 st = st->s_next;
 666         }
 667 }
 668 
 669 /*
 670  * Lookup an snode by <dev, type, vp>.
 671  * ONLY looks for snodes with non-NULL s_realvp members and
 672  * common snodes (with s_commonvp pointing to its vnode).
 673  *
 674  * If vp is NULL, only return commonvp. Otherwise return
 675  * shadow vp with both shadow and common vp's VN_HELD.
 676  */
 677 static struct snode *
 678 sfind(
 679         dev_t   dev,
 680         vtype_t type,
 681         struct vnode *vp)
 682 {
 683         struct snode *st;
 684         struct vnode *svp;
 685 
 686         ASSERT(MUTEX_HELD(&stable_lock));
 687         st = stable[STABLEHASH(dev)];
 688         while (st != NULL) {
 689                 svp = STOV(st);
 690                 if (st->s_dev == dev && svp->v_type == type &&
 691                     VN_CMP(st->s_realvp, vp) &&
 692                     (vp != NULL || st->s_commonvp == svp) &&
 693                     (vp == NULL || st->s_realvp->v_vfsp == vp->v_vfsp)) {
 694                         VN_HOLD(svp);
 695                         return (st);
 696                 }
 697                 st = st->s_next;
 698         }
 699         return (NULL);
 700 }
 701 
 702 /*
 703  * Mark the accessed, updated, or changed times in an snode
 704  * with the current time.
 705  */
 706 void
 707 smark(struct snode *sp, int flag)
 708 {
 709         time_t  now = gethrestime_sec();
 710 
 711         /* check for change to avoid unnecessary locking */
 712         ASSERT((flag & ~(SACC|SUPD|SCHG)) == 0);
 713         if (((flag & sp->s_flag) != flag) ||
 714             ((flag & SACC) && (sp->s_atime != now)) ||
 715             ((flag & SUPD) && (sp->s_mtime != now)) ||
 716             ((flag & SCHG) && (sp->s_ctime != now))) {
 717                 /* lock and update */
 718                 mutex_enter(&sp->s_lock);
 719                 sp->s_flag |= flag;
 720                 if (flag & SACC)
 721                         sp->s_atime = now;
 722                 if (flag & SUPD)
 723                         sp->s_mtime = now;
 724                 if (flag & SCHG)
 725                         sp->s_ctime = now;
 726                 mutex_exit(&sp->s_lock);
 727         }
 728 }
 729 
 730 /*
 731  * Return the maximum file offset permitted for this device.
 732  * -1 means unrestricted.  SLOFFSET is associated with D_64BIT.
 733  *
 734  * On a 32-bit kernel this will limit:
 735  *   o  D_64BIT devices to SPEC_MAXOFFSET_T.
 736  *   o  non-D_64BIT character drivers to a 32-bit offset (MAXOFF_T).
 737  */
 738 offset_t
 739 spec_maxoffset(struct vnode *vp)
 740 {
 741         struct snode *sp = VTOS(vp);
 742         struct snode *csp = VTOS(sp->s_commonvp);
 743 
 744         if (vp->v_stream)
 745                 return ((offset_t)-1);
 746         else if (csp->s_flag & SANYOFFSET)       /* D_U64BIT */
 747                 return ((offset_t)-1);
 748 #ifdef _ILP32
 749         if (csp->s_flag & SLOFFSET)              /* D_64BIT */
 750                 return (SPEC_MAXOFFSET_T);
 751 #endif  /* _ILP32 */
 752         return (MAXOFF_T);
 753 }
 754 
 755 /*ARGSUSED*/
 756 static int
 757 snode_constructor(void *buf, void *cdrarg, int kmflags)
 758 {
 759         struct snode *sp = buf;
 760         struct vnode *vp;
 761 
 762         vp = sp->s_vnode = vn_alloc(kmflags);
 763         if (vp == NULL) {
 764                 return (-1);
 765         }
 766         vn_setops(vp, spec_getvnodeops());
 767         vp->v_data = sp;
 768 
 769         mutex_init(&sp->s_lock, NULL, MUTEX_DEFAULT, NULL);
 770         cv_init(&sp->s_cv, NULL, CV_DEFAULT, NULL);
 771         return (0);
 772 }
 773 
 774 /*ARGSUSED1*/
 775 static void
 776 snode_destructor(void *buf, void *cdrarg)
 777 {
 778         struct snode *sp = buf;
 779         struct vnode *vp = STOV(sp);
 780 
 781         mutex_destroy(&sp->s_lock);
 782         cv_destroy(&sp->s_cv);
 783 
 784         vn_free(vp);
 785 }
 786 
 787 
 788 int
 789 specinit(int fstype, char *name)
 790 {
 791         static const fs_operation_def_t spec_vfsops_template[] = {
 792                 VFSNAME_SYNC, { .vfs_sync = spec_sync },
 793                 NULL, NULL
 794         };
 795         extern struct vnodeops *spec_vnodeops;
 796         extern const fs_operation_def_t spec_vnodeops_template[];
 797         struct vfsops *spec_vfsops;
 798         int error;
 799         dev_t dev;
 800 
 801         /*
 802          * Associate vfs and vnode operations.
 803          */
 804         error = vfs_setfsops(fstype, spec_vfsops_template, &spec_vfsops);
 805         if (error != 0) {
 806                 cmn_err(CE_WARN, "specinit: bad vfs ops template");
 807                 return (error);
 808         }
 809 
 810         error = vn_make_ops(name, spec_vnodeops_template, &spec_vnodeops);
 811         if (error != 0) {
 812                 (void) vfs_freevfsops_by_type(fstype);
 813                 cmn_err(CE_WARN, "specinit: bad vnode ops template");
 814                 return (error);
 815         }
 816 
 817         mutex_init(&stable_lock, NULL, MUTEX_DEFAULT, NULL);
 818         mutex_init(&spec_syncbusy, NULL, MUTEX_DEFAULT, NULL);
 819 
 820         /*
 821          * Create snode cache
 822          */
 823         snode_cache = kmem_cache_create("snode_cache", sizeof (struct snode),
 824             0, snode_constructor, snode_destructor, NULL, NULL, NULL, 0);
 825 
 826         /*
 827          * Associate vfs operations with spec_vfs
 828          */
 829         VFS_INIT(&spec_vfs, spec_vfsops, (caddr_t)NULL);
 830         if ((dev = getudev()) == -1)
 831                 dev = 0;
 832         specdev = makedevice(dev, 0);
 833         return (0);
 834 }
 835 
 836 int
 837 device_close(struct vnode *vp, int flag, struct cred *cr)
 838 {
 839         struct snode *sp = VTOS(vp);
 840         enum vtype type = vp->v_type;
 841         struct vnode *cvp;
 842         dev_t dev;
 843         int error;
 844 
 845         dev = sp->s_dev;
 846         cvp = sp->s_commonvp;
 847 
 848         switch (type) {
 849 
 850         case VCHR:
 851                 if (vp->v_stream) {
 852                         if (cvp->v_stream != NULL)
 853                                 error = strclose(cvp, flag, cr);
 854                         vp->v_stream = NULL;
 855                 } else
 856                         error = dev_close(dev, flag, OTYP_CHR, cr);
 857                 break;
 858 
 859         case VBLK:
 860                 /*
 861                  * On last close a block device we must
 862                  * invalidate any in-core blocks so that we
 863                  * can, for example, change floppy disks.
 864                  */
 865                 (void) spec_putpage(cvp, (offset_t)0,
 866                     (size_t)0, B_INVAL|B_FORCE, cr, NULL);
 867                 bflush(dev);
 868                 binval(dev);
 869                 error = dev_close(dev, flag, OTYP_BLK, cr);
 870                 break;
 871         default:
 872                 panic("device_close: not a device");
 873                 /*NOTREACHED*/
 874         }
 875 
 876         return (error);
 877 }
 878 
 879 struct vnode *
 880 makectty(vnode_t *ovp)
 881 {
 882         vnode_t *vp;
 883 
 884         if (vp = makespecvp(ovp->v_rdev, VCHR)) {
 885                 struct snode *sp;
 886                 struct snode *csp;
 887                 struct vnode *cvp;
 888 
 889                 sp = VTOS(vp);
 890                 cvp = sp->s_commonvp;
 891                 csp = VTOS(cvp);
 892                 mutex_enter(&csp->s_lock);
 893                 csp->s_count++;
 894                 mutex_exit(&csp->s_lock);
 895         }
 896 
 897         return (vp);
 898 }
 899 
 900 void
 901 spec_snode_walk(int (*callback)(struct snode *sp, void *arg), void *arg)
 902 {
 903         struct snode    *sp;
 904         int             i;
 905 
 906         ASSERT(callback);
 907 
 908         mutex_enter(&stable_lock);
 909         for (i = 0; i < STABLESIZE; i++) {
 910                 for (sp = stable[i]; sp; sp = sp->s_next) {
 911                         if (callback(sp, arg) != DDI_WALK_CONTINUE)
 912                                 goto out;
 913                 }
 914         }
 915 out:
 916         mutex_exit(&stable_lock);
 917 }
 918 
 919 int
 920 spec_is_clone(vnode_t *vp)
 921 {
 922         struct snode *sp;
 923 
 924         if (vn_matchops(vp, spec_getvnodeops())) {
 925                 sp = VTOS(vp);
 926                 return ((sp->s_flag & SCLONE) ? 1 : 0);
 927         }
 928 
 929         return (0);
 930 }
 931 
 932 int
 933 spec_is_selfclone(vnode_t *vp)
 934 {
 935         struct snode *sp;
 936 
 937         if (vn_matchops(vp, spec_getvnodeops())) {
 938                 sp = VTOS(vp);
 939                 return ((sp->s_flag & SSELFCLONE) ? 1 : 0);
 940         }
 941 
 942         return (0);
 943 }
 944 
 945 /*
 946  * We may be invoked with a NULL vp in which case we fence off
 947  * all snodes associated with dip
 948  */
 949 int
 950 spec_fence_snode(dev_info_t *dip, struct vnode *vp)
 951 {
 952         struct snode    *sp;
 953         struct snode    *csp;
 954         int             retired;
 955         int             i;
 956         char            *path;
 957         int             emitted;
 958 
 959         ASSERT(dip);
 960 
 961         retired = 0;
 962         mutex_enter(&DEVI(dip)->devi_lock);
 963         if (DEVI(dip)->devi_flags & DEVI_RETIRED)
 964                 retired = 1;
 965         mutex_exit(&DEVI(dip)->devi_lock);
 966 
 967         if (!retired)
 968                 return (0);
 969 
 970         path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
 971         (void) ddi_pathname(dip, path);
 972 
 973 
 974         if (vp != NULL) {
 975                 ASSERT(vn_matchops(vp, spec_getvnodeops()));
 976                 csp = VTOCS(vp);
 977                 ASSERT(csp);
 978                 mutex_enter(&csp->s_lock);
 979                 csp->s_flag |= SFENCED;
 980                 mutex_exit(&csp->s_lock);
 981                 FENDBG((CE_NOTE, "fenced off snode(%p) for dip: %s",
 982                     (void *)csp, path));
 983                 kmem_free(path, MAXPATHLEN);
 984                 return (0);
 985         }
 986 
 987         emitted = 0;
 988         mutex_enter(&stable_lock);
 989         for (i = 0; i < STABLESIZE; i++) {
 990                 for (sp = stable[i]; sp != NULL; sp = sp->s_next) {
 991                         ASSERT(sp->s_commonvp);
 992                         csp = VTOS(sp->s_commonvp);
 993                         if (csp->s_dip == dip) {
 994                                 /* fence off the common snode */
 995                                 mutex_enter(&csp->s_lock);
 996                                 csp->s_flag |= SFENCED;
 997                                 mutex_exit(&csp->s_lock);
 998                                 if (!emitted) {
 999                                         FENDBG((CE_NOTE, "fenced 1 of N"));
1000                                         emitted++;
1001                                 }
1002                         }
1003                 }
1004         }
1005         mutex_exit(&stable_lock);
1006 
1007         FENDBG((CE_NOTE, "fenced off all snodes for dip: %s", path));
1008         kmem_free(path, MAXPATHLEN);
1009 
1010         return (0);
1011 }
1012 
1013 
1014 int
1015 spec_unfence_snode(dev_info_t *dip)
1016 {
1017         struct snode    *sp;
1018         struct snode    *csp;
1019         int             i;
1020         char            *path;
1021         int             emitted;
1022 
1023         ASSERT(dip);
1024 
1025         path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
1026         (void) ddi_pathname(dip, path);
1027 
1028         emitted = 0;
1029         mutex_enter(&stable_lock);
1030         for (i = 0; i < STABLESIZE; i++) {
1031                 for (sp = stable[i]; sp != NULL; sp = sp->s_next) {
1032                         ASSERT(sp->s_commonvp);
1033                         csp = VTOS(sp->s_commonvp);
1034                         ASSERT(csp);
1035                         if (csp->s_dip == dip) {
1036                                 /* unfence the common snode */
1037                                 mutex_enter(&csp->s_lock);
1038                                 csp->s_flag &= ~SFENCED;
1039                                 mutex_exit(&csp->s_lock);
1040                                 if (!emitted) {
1041                                         FENDBG((CE_NOTE, "unfenced 1 of N"));
1042                                         emitted++;
1043                                 }
1044                         }
1045                 }
1046         }
1047         mutex_exit(&stable_lock);
1048 
1049         FENDBG((CE_NOTE, "unfenced all snodes for dip: %s", path));
1050         kmem_free(path, MAXPATHLEN);
1051 
1052         return (0);
1053 }
1054 
1055 void
1056 spec_size_invalidate(dev_t dev, vtype_t type)
1057 {
1058 
1059         struct snode *csp;
1060 
1061         mutex_enter(&stable_lock);
1062         if ((csp = sfind(dev, type, NULL)) != NULL) {
1063                 mutex_enter(&csp->s_lock);
1064                 csp->s_flag &= ~SSIZEVALID;
1065                 VN_RELE_ASYNC(STOV(csp), system_taskq);
1066                 mutex_exit(&csp->s_lock);
1067         }
1068         mutex_exit(&stable_lock);
1069 }