1 /*
   2  * Copyright (c) 2000-2001 Boris Popov
   3  * All rights reserved.
   4  *
   5  * Redistribution and use in source and binary forms, with or without
   6  * modification, are permitted provided that the following conditions
   7  * are met:
   8  * 1. Redistributions of source code must retain the above copyright
   9  *    notice, this list of conditions and the following disclaimer.
  10  * 2. Redistributions in binary form must reproduce the above copyright
  11  *    notice, this list of conditions and the following disclaimer in the
  12  *    documentation and/or other materials provided with the distribution.
  13  * 3. All advertising materials mentioning features or use of this software
  14  *    must display the following acknowledgement:
  15  *    This product includes software developed by Boris Popov.
  16  * 4. Neither the name of the author nor the names of any co-contributors
  17  *    may be used to endorse or promote products derived from this software
  18  *    without specific prior written permission.
  19  *
  20  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  30  * SUCH DAMAGE.
  31  *
  32  * $Id: smbfs_vnops.c,v 1.128.36.1 2005/05/27 02:35:28 lindak Exp $
  33  */
  34 
  35 /*
  36  * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
  37  */
  38 
  39 /*
  40  * Vnode operations
  41  *
  42  * This file is similar to nfs3_vnops.c
  43  */
  44 
  45 #include <sys/param.h>
  46 #include <sys/systm.h>
  47 #include <sys/cred.h>
  48 #include <sys/vnode.h>
  49 #include <sys/vfs.h>
  50 #include <sys/filio.h>
  51 #include <sys/uio.h>
  52 #include <sys/dirent.h>
  53 #include <sys/errno.h>
  54 #include <sys/sunddi.h>
  55 #include <sys/sysmacros.h>
  56 #include <sys/kmem.h>
  57 #include <sys/cmn_err.h>
  58 #include <sys/vfs_opreg.h>
  59 #include <sys/policy.h>
  60 #include <sys/sdt.h>
  61 #include <sys/zone.h>
  62 #include <sys/vmsystm.h>
  63 
  64 #include <vm/hat.h>
  65 #include <vm/as.h>
  66 #include <vm/page.h>
  67 #include <vm/pvn.h>
  68 #include <vm/seg.h>
  69 #include <vm/seg_map.h>
  70 #include <vm/seg_kpm.h>
  71 #include <vm/seg_vn.h>
  72 
  73 #include <netsmb/smb_osdep.h>
  74 #include <netsmb/smb.h>
  75 #include <netsmb/smb_conn.h>
  76 #include <netsmb/smb_subr.h>
  77 
  78 #include <smbfs/smbfs.h>
  79 #include <smbfs/smbfs_node.h>
  80 #include <smbfs/smbfs_subr.h>
  81 
  82 #include <sys/fs/smbfs_ioctl.h>
  83 #include <fs/fs_subr.h>
  84 
  85 /*
  86  * We assign directory offsets like the NFS client, where the
  87  * offset increments by _one_ after each directory entry.
  88  * Further, the entries "." and ".." are always at offsets
  89  * zero and one (respectively) and the "real" entries from
  90  * the server appear at offsets starting with two.  This
  91  * macro is used to initialize the n_dirofs field after
  92  * setting n_dirseq with a _findopen call.
  93  */
  94 #define FIRST_DIROFS    2
  95 
  96 /*
  97  * These characters are illegal in NTFS file names.
  98  * ref: http://support.microsoft.com/kb/147438
  99  *
 100  * Careful!  The check in the XATTR case skips the
 101  * first character to allow colon in XATTR names.
 102  */
 103 static const char illegal_chars[] = {
 104         ':',    /* colon - keep this first! */
 105         '\\',   /* back slash */
 106         '/',    /* slash */
 107         '*',    /* asterisk */
 108         '?',    /* question mark */
 109         '"',    /* double quote */
 110         '<', /* less than sign */
 111         '>', /* greater than sign */
 112         '|',    /* vertical bar */
 113         0
 114 };
 115 
 116 /*
 117  * Turning this on causes nodes to be created in the cache
 118  * during directory listings, normally avoiding a second
 119  * OtW attribute fetch just after a readdir.
 120  */
 121 int smbfs_fastlookup = 1;
 122 
 123 struct vnodeops *smbfs_vnodeops = NULL;
 124 
 125 /* local static function defines */
 126 
 127 static int      smbfslookup_cache(vnode_t *, char *, int, vnode_t **,
 128                         cred_t *);
 129 static int      smbfslookup(vnode_t *dvp, char *nm, vnode_t **vpp, cred_t *cr,
 130                         int cache_ok, caller_context_t *);
 131 static int      smbfsremove(vnode_t *dvp, vnode_t *vp, struct smb_cred *scred,
 132                         int flags);
 133 static int      smbfsrename(vnode_t *odvp, vnode_t *ovp, vnode_t *ndvp,
 134                         char *nnm, struct smb_cred *scred, int flags);
 135 static int      smbfssetattr(vnode_t *, struct vattr *, int, cred_t *);
 136 static int      smbfs_accessx(void *, int, cred_t *);
 137 static int      smbfs_readvdir(vnode_t *vp, uio_t *uio, cred_t *cr, int *eofp,
 138                         caller_context_t *);
 139 static void     smbfs_rele_fid(smbnode_t *, struct smb_cred *);
 140 static uint32_t xvattr_to_dosattr(smbnode_t *, struct vattr *);
 141 
 142 static int      smbfs_rdwrlbn(vnode_t *, page_t *, u_offset_t, size_t, int,
 143                         cred_t *);
 144 static int      smbfs_bio(struct buf *, int, cred_t *);
 145 static int      smbfs_writenp(smbnode_t *np, caddr_t base, int tcount,
 146                         struct uio *uiop, int pgcreated);
 147 
 148 static int      smbfs_fsync(vnode_t *, int, cred_t *, caller_context_t *);
 149 static int      smbfs_putpage(vnode_t *, offset_t, size_t, int, cred_t *,
 150                         caller_context_t *);
 151 static int      smbfs_getapage(vnode_t *, u_offset_t, size_t, uint_t *,
 152                         page_t *[], size_t, struct seg *, caddr_t,
 153                         enum seg_rw, cred_t *);
 154 static int      smbfs_putapage(vnode_t *, page_t *, u_offset_t *, size_t *,
 155                         int, cred_t *);
 156 static void     smbfs_delmap_callback(struct as *, void *, uint_t);
 157 
 158 /*
 159  * Error flags used to pass information about certain special errors
 160  * which need to be handled specially.
 161  */
 162 #define SMBFS_EOF                       -98
 163 
 164 /* When implementing OtW locks, make this a real function. */
 165 #define smbfs_lm_has_sleep(vp) 0
 166 
 167 /*
 168  * These are the vnode ops routines which implement the vnode interface to
 169  * the networked file system.  These routines just take their parameters,
 170  * make them look networkish by putting the right info into interface structs,
 171  * and then calling the appropriate remote routine(s) to do the work.
 172  *
 173  * Note on directory name lookup cacheing:  If we detect a stale fhandle,
 174  * we purge the directory cache relative to that vnode.  This way, the
 175  * user won't get burned by the cache repeatedly.  See <smbfs/smbnode.h> for
 176  * more details on smbnode locking.
 177  */
 178 
 179 
 180 /*
 181  * XXX
 182  * When new and relevant functionality is enabled, we should be
 183  * calling vfs_set_feature() to inform callers that pieces of
 184  * functionality are available, per PSARC 2007/227.
 185  */
 186 /* ARGSUSED */
 187 static int
 188 smbfs_open(vnode_t **vpp, int flag, cred_t *cr, caller_context_t *ct)
 189 {
 190         smbnode_t       *np;
 191         vnode_t         *vp;
 192         smbfattr_t      fa;
 193         u_int32_t       rights, rightsrcvd;
 194         u_int16_t       fid, oldfid;
 195         int             oldgenid;
 196         struct smb_cred scred;
 197         smbmntinfo_t    *smi;
 198         smb_share_t     *ssp;
 199         cred_t          *oldcr;
 200         int             tmperror;
 201         int             error = 0;
 202 
 203         vp = *vpp;
 204         np = VTOSMB(vp);
 205         smi = VTOSMI(vp);
 206         ssp = smi->smi_share;
 207 
 208         if (curproc->p_zone != smi->smi_zone_ref.zref_zone)
 209                 return (EIO);
 210 
 211         if (smi->smi_flags & SMI_DEAD || vp->v_vfsp->vfs_flag & VFS_UNMOUNTED)
 212                 return (EIO);
 213 
 214         if (vp->v_type != VREG && vp->v_type != VDIR) { /* XXX VLNK? */
 215                 SMBVDEBUG("open eacces vtype=%d\n", vp->v_type);
 216                 return (EACCES);
 217         }
 218 
 219         /*
 220          * Get exclusive access to n_fid and related stuff.
 221          * No returns after this until out.
 222          */
 223         if (smbfs_rw_enter_sig(&np->r_lkserlock, RW_WRITER, SMBINTR(vp)))
 224                 return (EINTR);
 225         smb_credinit(&scred, cr);
 226 
 227         /*
 228          * Keep track of the vnode type at first open.
 229          * It may change later, and we need close to do
 230          * cleanup for the type we opened.  Also deny
 231          * open of new types until old type is closed.
 232          */
 233         if (np->n_ovtype == VNON) {
 234                 ASSERT(np->n_dirrefs == 0);
 235                 ASSERT(np->n_fidrefs == 0);
 236         } else if (np->n_ovtype != vp->v_type) {
 237                 SMBVDEBUG("open n_ovtype=%d v_type=%d\n",
 238                     np->n_ovtype, vp->v_type);
 239                 error = EACCES;
 240                 goto out;
 241         }
 242 
 243         /*
 244          * Directory open.  See smbfs_readvdir()
 245          */
 246         if (vp->v_type == VDIR) {
 247                 if (np->n_dirseq == NULL) {
 248                         /* first open */
 249                         error = smbfs_smb_findopen(np, "*", 1,
 250                             SMB_FA_SYSTEM | SMB_FA_HIDDEN | SMB_FA_DIR,
 251                             &scred, &np->n_dirseq);
 252                         if (error != 0)
 253                                 goto out;
 254                 }
 255                 np->n_dirofs = FIRST_DIROFS;
 256                 np->n_dirrefs++;
 257                 goto have_fid;
 258         }
 259 
 260         /*
 261          * If caller specified O_TRUNC/FTRUNC, then be sure to set
 262          * FWRITE (to drive successful setattr(size=0) after open)
 263          */
 264         if (flag & FTRUNC)
 265                 flag |= FWRITE;
 266 
 267         /*
 268          * If we already have it open, and the FID is still valid,
 269          * check whether the rights are sufficient for FID reuse.
 270          */
 271         if (np->n_fidrefs > 0 &&
 272             np->n_vcgenid == ssp->ss_vcgenid) {
 273                 int upgrade = 0;
 274 
 275                 if ((flag & FWRITE) &&
 276                     !(np->n_rights & SA_RIGHT_FILE_WRITE_DATA))
 277                         upgrade = 1;
 278                 if ((flag & FREAD) &&
 279                     !(np->n_rights & SA_RIGHT_FILE_READ_DATA))
 280                         upgrade = 1;
 281                 if (!upgrade) {
 282                         /*
 283                          *  the existing open is good enough
 284                          */
 285                         np->n_fidrefs++;
 286                         goto have_fid;
 287                 }
 288         }
 289         rights = np->n_fidrefs ? np->n_rights : 0;
 290 
 291         /*
 292          * we always ask for READ_CONTROL so we can always get the
 293          * owner/group IDs to satisfy a stat.  Ditto attributes.
 294          */
 295         rights |= (STD_RIGHT_READ_CONTROL_ACCESS |
 296             SA_RIGHT_FILE_READ_ATTRIBUTES);
 297         if ((flag & FREAD))
 298                 rights |= SA_RIGHT_FILE_READ_DATA;
 299         if ((flag & FWRITE))
 300                 rights |= SA_RIGHT_FILE_WRITE_DATA |
 301                     SA_RIGHT_FILE_APPEND_DATA |
 302                     SA_RIGHT_FILE_WRITE_ATTRIBUTES;
 303 
 304         bzero(&fa, sizeof (fa));
 305         error = smbfs_smb_open(np,
 306             NULL, 0, 0, /* name nmlen xattr */
 307             rights, &scred,
 308             &fid, &rightsrcvd, &fa);
 309         if (error)
 310                 goto out;
 311         smbfs_attrcache_fa(vp, &fa);
 312 
 313         /*
 314          * We have a new FID and access rights.
 315          */
 316         oldfid = np->n_fid;
 317         oldgenid = np->n_vcgenid;
 318         np->n_fid = fid;
 319         np->n_vcgenid = ssp->ss_vcgenid;
 320         np->n_rights = rightsrcvd;
 321         np->n_fidrefs++;
 322         if (np->n_fidrefs > 1 &&
 323             oldgenid == ssp->ss_vcgenid) {
 324                 /*
 325                  * We already had it open (presumably because
 326                  * it was open with insufficient rights.)
 327                  * Close old wire-open.
 328                  */
 329                 tmperror = smbfs_smb_close(ssp,
 330                     oldfid, NULL, &scred);
 331                 if (tmperror)
 332                         SMBVDEBUG("error %d closing %s\n",
 333                             tmperror, np->n_rpath);
 334         }
 335 
 336         /*
 337          * This thread did the open.
 338          * Save our credentials too.
 339          */
 340         mutex_enter(&np->r_statelock);
 341         oldcr = np->r_cred;
 342         np->r_cred = cr;
 343         crhold(cr);
 344         if (oldcr)
 345                 crfree(oldcr);
 346         mutex_exit(&np->r_statelock);
 347 
 348 have_fid:
 349         /*
 350          * Keep track of the vnode type at first open.
 351          * (see comments above)
 352          */
 353         if (np->n_ovtype == VNON)
 354                 np->n_ovtype = vp->v_type;
 355 
 356 out:
 357         smb_credrele(&scred);
 358         smbfs_rw_exit(&np->r_lkserlock);
 359         return (error);
 360 }
 361 
 362 /*ARGSUSED*/
 363 static int
 364 smbfs_close(vnode_t *vp, int flag, int count, offset_t offset, cred_t *cr,
 365         caller_context_t *ct)
 366 {
 367         smbnode_t       *np;
 368         smbmntinfo_t    *smi;
 369         struct smb_cred scred;
 370         int error = 0;
 371 
 372         np = VTOSMB(vp);
 373         smi = VTOSMI(vp);
 374 
 375         /*
 376          * Don't "bail out" for VFS_UNMOUNTED here,
 377          * as we want to do cleanup, etc.
 378          */
 379 
 380         /*
 381          * zone_enter(2) prevents processes from changing zones with SMBFS files
 382          * open; if we happen to get here from the wrong zone we can't do
 383          * anything over the wire.
 384          */
 385         if (smi->smi_zone_ref.zref_zone != curproc->p_zone) {
 386                 /*
 387                  * We could attempt to clean up locks, except we're sure
 388                  * that the current process didn't acquire any locks on
 389                  * the file: any attempt to lock a file belong to another zone
 390                  * will fail, and one can't lock an SMBFS file and then change
 391                  * zones, as that fails too.
 392                  *
 393                  * Returning an error here is the sane thing to do.  A
 394                  * subsequent call to VN_RELE() which translates to a
 395                  * smbfs_inactive() will clean up state: if the zone of the
 396                  * vnode's origin is still alive and kicking, an async worker
 397                  * thread will handle the request (from the correct zone), and
 398                  * everything (minus the final smbfs_getattr_otw() call) should
 399                  * be OK. If the zone is going away smbfs_async_inactive() will
 400                  * throw away cached pages inline.
 401                  */
 402                 return (EIO);
 403         }
 404 
 405         /*
 406          * If we are using local locking for this filesystem, then
 407          * release all of the SYSV style record locks.  Otherwise,
 408          * we are doing network locking and we need to release all
 409          * of the network locks.  All of the locks held by this
 410          * process on this file are released no matter what the
 411          * incoming reference count is.
 412          */
 413         if (smi->smi_flags & SMI_LLOCK) {
 414                 pid_t pid = ddi_get_pid();
 415                 cleanlocks(vp, pid, 0);
 416                 cleanshares(vp, pid);
 417         }
 418         /*
 419          * else doing OtW locking.  SMB servers drop all locks
 420          * on the file ID we close here, so no _lockrelease()
 421          */
 422 
 423         /*
 424          * This (passed in) count is the ref. count from the
 425          * user's file_t before the closef call (fio.c).
 426          * The rest happens only on last close.
 427          */
 428         if (count > 1)
 429                 return (0);
 430 
 431         /* NFS has DNLC purge here. */
 432 
 433         /*
 434          * If the file was open for write and there are pages,
 435          * then make sure dirty pages written back.
 436          *
 437          * NFS does this async when "close-to-open" is off
 438          * (MI_NOCTO flag is set) to avoid blocking the caller.
 439          * For now, always do this synchronously (no B_ASYNC).
 440          */
 441         if ((flag & FWRITE) && vn_has_cached_data(vp)) {
 442                 error = smbfs_putpage(vp, (offset_t)0, 0, 0, cr, ct);
 443                 if (error == EAGAIN)
 444                         error = 0;
 445         }
 446         if (error == 0) {
 447                 mutex_enter(&np->r_statelock);
 448                 np->r_flags &= ~RSTALE;
 449                 np->r_error = 0;
 450                 mutex_exit(&np->r_statelock);
 451         }
 452 
 453         /*
 454          * Decrement the reference count for the FID
 455          * and possibly do the OtW close.
 456          *
 457          * Exclusive lock for modifying n_fid stuff.
 458          * Don't want this one ever interruptible.
 459          */
 460         (void) smbfs_rw_enter_sig(&np->r_lkserlock, RW_WRITER, 0);
 461         smb_credinit(&scred, cr);
 462 
 463         smbfs_rele_fid(np, &scred);
 464 
 465         smb_credrele(&scred);
 466         smbfs_rw_exit(&np->r_lkserlock);
 467 
 468         return (0);
 469 }
 470 
 471 /*
 472  * Helper for smbfs_close.  Decrement the reference count
 473  * for an SMB-level file or directory ID, and when the last
 474  * reference for the fid goes away, do the OtW close.
 475  * Also called in smbfs_inactive (defensive cleanup).
 476  */
 477 static void
 478 smbfs_rele_fid(smbnode_t *np, struct smb_cred *scred)
 479 {
 480         smb_share_t     *ssp;
 481         cred_t          *oldcr;
 482         struct smbfs_fctx *fctx;
 483         int             error;
 484         uint16_t ofid;
 485 
 486         ssp = np->n_mount->smi_share;
 487         error = 0;
 488 
 489         /* Make sure we serialize for n_dirseq use. */
 490         ASSERT(smbfs_rw_lock_held(&np->r_lkserlock, RW_WRITER));
 491 
 492         /*
 493          * Note that vp->v_type may change if a remote node
 494          * is deleted and recreated as a different type, and
 495          * our getattr may change v_type accordingly.
 496          * Now use n_ovtype to keep track of the v_type
 497          * we had during open (see comments above).
 498          */
 499         switch (np->n_ovtype) {
 500         case VDIR:
 501                 ASSERT(np->n_dirrefs > 0);
 502                 if (--np->n_dirrefs)
 503                         return;
 504                 if ((fctx = np->n_dirseq) != NULL) {
 505                         np->n_dirseq = NULL;
 506                         np->n_dirofs = 0;
 507                         error = smbfs_smb_findclose(fctx, scred);
 508                 }
 509                 break;
 510 
 511         case VREG:
 512                 ASSERT(np->n_fidrefs > 0);
 513                 if (--np->n_fidrefs)
 514                         return;
 515                 if ((ofid = np->n_fid) != SMB_FID_UNUSED) {
 516                         np->n_fid = SMB_FID_UNUSED;
 517                         /* After reconnect, n_fid is invalid */
 518                         if (np->n_vcgenid == ssp->ss_vcgenid) {
 519                                 error = smbfs_smb_close(
 520                                     ssp, ofid, NULL, scred);
 521                         }
 522                 }
 523                 break;
 524 
 525         default:
 526                 SMBVDEBUG("bad n_ovtype %d\n", np->n_ovtype);
 527                 break;
 528         }
 529         if (error) {
 530                 SMBVDEBUG("error %d closing %s\n",
 531                     error, np->n_rpath);
 532         }
 533 
 534         /* Allow next open to use any v_type. */
 535         np->n_ovtype = VNON;
 536 
 537         /*
 538          * Other "last close" stuff.
 539          */
 540         mutex_enter(&np->r_statelock);
 541         if (np->n_flag & NATTRCHANGED)
 542                 smbfs_attrcache_rm_locked(np);
 543         oldcr = np->r_cred;
 544         np->r_cred = NULL;
 545         mutex_exit(&np->r_statelock);
 546         if (oldcr != NULL)
 547                 crfree(oldcr);
 548 }
 549 
 550 /* ARGSUSED */
 551 static int
 552 smbfs_read(vnode_t *vp, struct uio *uiop, int ioflag, cred_t *cr,
 553         caller_context_t *ct)
 554 {
 555         struct smb_cred scred;
 556         struct vattr    va;
 557         smbnode_t       *np;
 558         smbmntinfo_t    *smi;
 559         smb_share_t     *ssp;
 560         offset_t        endoff;
 561         ssize_t         past_eof;
 562         int             error;
 563 
 564         caddr_t         base;
 565         u_offset_t      off;
 566         size_t          n;
 567         int             on;
 568         uint_t          flags;
 569 
 570         np = VTOSMB(vp);
 571         smi = VTOSMI(vp);
 572         ssp = smi->smi_share;
 573 
 574         if (curproc->p_zone != smi->smi_zone_ref.zref_zone)
 575                 return (EIO);
 576 
 577         if (smi->smi_flags & SMI_DEAD || vp->v_vfsp->vfs_flag & VFS_UNMOUNTED)
 578                 return (EIO);
 579 
 580         ASSERT(smbfs_rw_lock_held(&np->r_rwlock, RW_READER));
 581 
 582         if (vp->v_type != VREG)
 583                 return (EISDIR);
 584 
 585         if (uiop->uio_resid == 0)
 586                 return (0);
 587 
 588         /*
 589          * Like NFS3, just check for 63-bit overflow.
 590          * Our SMB layer takes care to return EFBIG
 591          * when it has to fallback to a 32-bit call.
 592          */
 593         endoff = uiop->uio_loffset + uiop->uio_resid;
 594         if (uiop->uio_loffset < 0 || endoff < 0)
 595                 return (EINVAL);
 596 
 597         /* get vnode attributes from server */
 598         va.va_mask = AT_SIZE | AT_MTIME;
 599         if (error = smbfsgetattr(vp, &va, cr))
 600                 return (error);
 601 
 602         /* Update mtime with mtime from server here? */
 603 
 604         /* if offset is beyond EOF, read nothing */
 605         if (uiop->uio_loffset >= va.va_size)
 606                 return (0);
 607 
 608         /*
 609          * Limit the read to the remaining file size.
 610          * Do this by temporarily reducing uio_resid
 611          * by the amount the lies beyoned the EOF.
 612          */
 613         if (endoff > va.va_size) {
 614                 past_eof = (ssize_t)(endoff - va.va_size);
 615                 uiop->uio_resid -= past_eof;
 616         } else
 617                 past_eof = 0;
 618 
 619         /*
 620          * Bypass VM if caching has been disabled (e.g., locking) or if
 621          * using client-side direct I/O and the file is not mmap'd and
 622          * there are no cached pages.
 623          */
 624         if ((vp->v_flag & VNOCACHE) ||
 625             (((np->r_flags & RDIRECTIO) || (smi->smi_flags & SMI_DIRECTIO)) &&
 626             np->r_mapcnt == 0 && np->r_inmap == 0 &&
 627             !vn_has_cached_data(vp))) {
 628 
 629                 /* Shared lock for n_fid use in smb_rwuio */
 630                 if (smbfs_rw_enter_sig(&np->r_lkserlock, RW_READER, SMBINTR(vp)))
 631                         return (EINTR);
 632                 smb_credinit(&scred, cr);
 633 
 634                 /* After reconnect, n_fid is invalid */
 635                 if (np->n_vcgenid != ssp->ss_vcgenid)
 636                         error = ESTALE;
 637                 else
 638                         error = smb_rwuio(ssp, np->n_fid, UIO_READ,
 639                             uiop, &scred, smb_timo_read);
 640 
 641                 smb_credrele(&scred);
 642                 smbfs_rw_exit(&np->r_lkserlock);
 643 
 644                 /* undo adjustment of resid */
 645                 uiop->uio_resid += past_eof;
 646 
 647                 return (error);
 648         }
 649 
 650         /* (else) Do I/O through segmap. */
 651         do {
 652                 off = uiop->uio_loffset & MAXBMASK; /* mapping offset */
 653                 on = uiop->uio_loffset & MAXBOFFSET; /* Relative offset */
 654                 n = MIN(MAXBSIZE - on, uiop->uio_resid);
 655 
 656                 error = smbfs_validate_caches(vp, cr);
 657                 if (error)
 658                         break;
 659 
 660                 /* NFS waits for RINCACHEPURGE here. */
 661 
 662                 if (vpm_enable) {
 663                         /*
 664                          * Copy data.
 665                          */
 666                         error = vpm_data_copy(vp, off + on, n, uiop,
 667                             1, NULL, 0, S_READ);
 668                 } else {
 669                         base = segmap_getmapflt(segkmap, vp, off + on, n, 1,
 670                             S_READ);
 671 
 672                         error = uiomove(base + on, n, UIO_READ, uiop);
 673                 }
 674 
 675                 if (!error) {
 676                         /*
 677                          * If read a whole block or read to eof,
 678                          * won't need this buffer again soon.
 679                          */
 680                         mutex_enter(&np->r_statelock);
 681                         if (n + on == MAXBSIZE ||
 682                             uiop->uio_loffset == np->r_size)
 683                                 flags = SM_DONTNEED;
 684                         else
 685                                 flags = 0;
 686                         mutex_exit(&np->r_statelock);
 687                         if (vpm_enable) {
 688                                 error = vpm_sync_pages(vp, off, n, flags);
 689                         } else {
 690                                 error = segmap_release(segkmap, base, flags);
 691                         }
 692                 } else {
 693                         if (vpm_enable) {
 694                                 (void) vpm_sync_pages(vp, off, n, 0);
 695                         } else {
 696                                 (void) segmap_release(segkmap, base, 0);
 697                         }
 698                 }
 699         } while (!error && uiop->uio_resid > 0);
 700 
 701         /* undo adjustment of resid */
 702         uiop->uio_resid += past_eof;
 703 
 704         return (error);
 705 }
 706 
 707 
 708 /* ARGSUSED */
 709 static int
 710 smbfs_write(vnode_t *vp, struct uio *uiop, int ioflag, cred_t *cr,
 711         caller_context_t *ct)
 712 {
 713         struct smb_cred scred;
 714         struct vattr    va;
 715         smbnode_t       *np;
 716         smbmntinfo_t    *smi;
 717         smb_share_t     *ssp;
 718         offset_t        endoff, limit;
 719         ssize_t         past_limit;
 720         int             error, timo;
 721         caddr_t         base;
 722         u_offset_t      off;
 723         size_t          n;
 724         int             on;
 725         uint_t          flags;
 726         u_offset_t      last_off;
 727         size_t          last_resid;
 728         uint_t          bsize;
 729 
 730         np = VTOSMB(vp);
 731         smi = VTOSMI(vp);
 732         ssp = smi->smi_share;
 733 
 734         if (curproc->p_zone != smi->smi_zone_ref.zref_zone)
 735                 return (EIO);
 736 
 737         if (smi->smi_flags & SMI_DEAD || vp->v_vfsp->vfs_flag & VFS_UNMOUNTED)
 738                 return (EIO);
 739 
 740         ASSERT(smbfs_rw_lock_held(&np->r_rwlock, RW_WRITER));
 741 
 742         if (vp->v_type != VREG)
 743                 return (EISDIR);
 744 
 745         if (uiop->uio_resid == 0)
 746                 return (0);
 747 
 748         /*
 749          * Handle ioflag bits: (FAPPEND|FSYNC|FDSYNC)
 750          */
 751         if (ioflag & (FAPPEND | FSYNC)) {
 752                 if (np->n_flag & NMODIFIED) {
 753                         smbfs_attrcache_remove(np);
 754                 }
 755         }
 756         if (ioflag & FAPPEND) {
 757                 /*
 758                  * File size can be changed by another client
 759                  *
 760                  * Todo: Consider redesigning this to use a
 761                  * handle opened for append instead.
 762                  */
 763                 va.va_mask = AT_SIZE;
 764                 if (error = smbfsgetattr(vp, &va, cr))
 765                         return (error);
 766                 uiop->uio_loffset = va.va_size;
 767         }
 768 
 769         /*
 770          * Like NFS3, just check for 63-bit overflow.
 771          */
 772         endoff = uiop->uio_loffset + uiop->uio_resid;
 773         if (uiop->uio_loffset < 0 || endoff < 0)
 774                 return (EINVAL);
 775 
 776         /*
 777          * Check to make sure that the process will not exceed
 778          * its limit on file size.  It is okay to write up to
 779          * the limit, but not beyond.  Thus, the write which
 780          * reaches the limit will be short and the next write
 781          * will return an error.
 782          *
 783          * So if we're starting at or beyond the limit, EFBIG.
 784          * Otherwise, temporarily reduce resid to the amount
 785          * that is after the limit.
 786          */
 787         limit = uiop->uio_llimit;
 788         if (limit == RLIM64_INFINITY || limit > MAXOFFSET_T)
 789                 limit = MAXOFFSET_T;
 790         if (uiop->uio_loffset >= limit) {
 791                 proc_t *p = ttoproc(curthread);
 792 
 793                 mutex_enter(&p->p_lock);
 794                 (void) rctl_action(rctlproc_legacy[RLIMIT_FSIZE],
 795                     p->p_rctls, p, RCA_UNSAFE_SIGINFO);
 796                 mutex_exit(&p->p_lock);
 797                 return (EFBIG);
 798         }
 799         if (endoff > limit) {
 800                 past_limit = (ssize_t)(endoff - limit);
 801                 uiop->uio_resid -= past_limit;
 802         } else
 803                 past_limit = 0;
 804 
 805         /*
 806          * Bypass VM if caching has been disabled (e.g., locking) or if
 807          * using client-side direct I/O and the file is not mmap'd and
 808          * there are no cached pages.
 809          */
 810         if ((vp->v_flag & VNOCACHE) ||
 811             (((np->r_flags & RDIRECTIO) || (smi->smi_flags & SMI_DIRECTIO)) &&
 812             np->r_mapcnt == 0 && np->r_inmap == 0 &&
 813             !vn_has_cached_data(vp))) {
 814 
 815 smbfs_fwrite:
 816                 if (np->r_flags & RSTALE) {
 817                         last_resid = uiop->uio_resid;
 818                         last_off = uiop->uio_loffset;
 819                         error = np->r_error;
 820                         /*
 821                          * A close may have cleared r_error, if so,
 822                          * propagate ESTALE error return properly
 823                          */
 824                         if (error == 0)
 825                                 error = ESTALE;
 826                         goto bottom;
 827                 }
 828 
 829                 /* Timeout: longer for append. */
 830                 timo = smb_timo_write;
 831                 if (endoff > np->r_size)
 832                         timo = smb_timo_append;
 833 
 834                 /* Shared lock for n_fid use in smb_rwuio */
 835                 if (smbfs_rw_enter_sig(&np->r_lkserlock, RW_READER, SMBINTR(vp)))
 836                         return (EINTR);
 837                 smb_credinit(&scred, cr);
 838 
 839                 /* After reconnect, n_fid is invalid */
 840                 if (np->n_vcgenid != ssp->ss_vcgenid)
 841                         error = ESTALE;
 842                 else
 843                         error = smb_rwuio(ssp, np->n_fid, UIO_WRITE,
 844                             uiop, &scred, timo);
 845 
 846                 if (error == 0) {
 847                         mutex_enter(&np->r_statelock);
 848                         np->n_flag |= (NFLUSHWIRE | NATTRCHANGED);
 849                         if (uiop->uio_loffset > (offset_t)np->r_size)
 850                                 np->r_size = (len_t)uiop->uio_loffset;
 851                         mutex_exit(&np->r_statelock);
 852                         if (ioflag & (FSYNC | FDSYNC)) {
 853                                 /* Don't error the I/O if this fails. */
 854                                 (void) smbfs_smb_flush(np, &scred);
 855                         }
 856                 }
 857 
 858                 smb_credrele(&scred);
 859                 smbfs_rw_exit(&np->r_lkserlock);
 860 
 861                 /* undo adjustment of resid */
 862                 uiop->uio_resid += past_limit;
 863 
 864                 return (error);
 865         }
 866 
 867         /* (else) Do I/O through segmap. */
 868         bsize = vp->v_vfsp->vfs_bsize;
 869 
 870         do {
 871                 off = uiop->uio_loffset & MAXBMASK; /* mapping offset */
 872                 on = uiop->uio_loffset & MAXBOFFSET; /* Relative offset */
 873                 n = MIN(MAXBSIZE - on, uiop->uio_resid);
 874 
 875                 last_resid = uiop->uio_resid;
 876                 last_off = uiop->uio_loffset;
 877 
 878                 if (np->r_flags & RSTALE) {
 879                         error = np->r_error;
 880                         /*
 881                          * A close may have cleared r_error, if so,
 882                          * propagate ESTALE error return properly
 883                          */
 884                         if (error == 0)
 885                                 error = ESTALE;
 886                         break;
 887                 }
 888 
 889                 /*
 890                  * From NFS: Don't create dirty pages faster than they
 891                  * can be cleaned.
 892                  *
 893                  * Here NFS also checks for async writes (np->r_awcount)
 894                  */
 895                 mutex_enter(&np->r_statelock);
 896                 while (np->r_gcount > 0) {
 897                         if (SMBINTR(vp)) {
 898                                 klwp_t *lwp = ttolwp(curthread);
 899 
 900                                 if (lwp != NULL)
 901                                         lwp->lwp_nostop++;
 902                                 if (!cv_wait_sig(&np->r_cv, &np->r_statelock)) {
 903                                         mutex_exit(&np->r_statelock);
 904                                         if (lwp != NULL)
 905                                                 lwp->lwp_nostop--;
 906                                         error = EINTR;
 907                                         goto bottom;
 908                                 }
 909                                 if (lwp != NULL)
 910                                         lwp->lwp_nostop--;
 911                         } else
 912                                 cv_wait(&np->r_cv, &np->r_statelock);
 913                 }
 914                 mutex_exit(&np->r_statelock);
 915 
 916                 /*
 917                  * Touch the page and fault it in if it is not in core
 918                  * before segmap_getmapflt or vpm_data_copy can lock it.
 919                  * This is to avoid the deadlock if the buffer is mapped
 920                  * to the same file through mmap which we want to write.
 921                  */
 922                 uio_prefaultpages((long)n, uiop);
 923 
 924                 if (vpm_enable) {
 925                         /*
 926                          * It will use kpm mappings, so no need to
 927                          * pass an address.
 928                          */
 929                         error = smbfs_writenp(np, NULL, n, uiop, 0);
 930                 } else {
 931                         if (segmap_kpm) {
 932                                 int pon = uiop->uio_loffset & PAGEOFFSET;
 933                                 size_t pn = MIN(PAGESIZE - pon,
 934                                     uiop->uio_resid);
 935                                 int pagecreate;
 936 
 937                                 mutex_enter(&np->r_statelock);
 938                                 pagecreate = (pon == 0) && (pn == PAGESIZE ||
 939                                     uiop->uio_loffset + pn >= np->r_size);
 940                                 mutex_exit(&np->r_statelock);
 941 
 942                                 base = segmap_getmapflt(segkmap, vp, off + on,
 943                                     pn, !pagecreate, S_WRITE);
 944 
 945                                 error = smbfs_writenp(np, base + pon, n, uiop,
 946                                     pagecreate);
 947 
 948                         } else {
 949                                 base = segmap_getmapflt(segkmap, vp, off + on,
 950                                     n, 0, S_READ);
 951                                 error = smbfs_writenp(np, base + on, n, uiop, 0);
 952                         }
 953                 }
 954 
 955                 if (!error) {
 956                         if (smi->smi_flags & SMI_NOAC)
 957                                 flags = SM_WRITE;
 958                         else if ((uiop->uio_loffset % bsize) == 0 ||
 959                             IS_SWAPVP(vp)) {
 960                                 /*
 961                                  * Have written a whole block.
 962                                  * Start an asynchronous write
 963                                  * and mark the buffer to
 964                                  * indicate that it won't be
 965                                  * needed again soon.
 966                                  */
 967                                 flags = SM_WRITE | SM_ASYNC | SM_DONTNEED;
 968                         } else
 969                                 flags = 0;
 970                         if ((ioflag & (FSYNC|FDSYNC)) ||
 971                             (np->r_flags & ROUTOFSPACE)) {
 972                                 flags &= ~SM_ASYNC;
 973                                 flags |= SM_WRITE;
 974                         }
 975                         if (vpm_enable) {
 976                                 error = vpm_sync_pages(vp, off, n, flags);
 977                         } else {
 978                                 error = segmap_release(segkmap, base, flags);
 979                         }
 980                 } else {
 981                         if (vpm_enable) {
 982                                 (void) vpm_sync_pages(vp, off, n, 0);
 983                         } else {
 984                                 (void) segmap_release(segkmap, base, 0);
 985                         }
 986                         /*
 987                          * In the event that we got an access error while
 988                          * faulting in a page for a write-only file just
 989                          * force a write.
 990                          */
 991                         if (error == EACCES)
 992                                 goto smbfs_fwrite;
 993                 }
 994         } while (!error && uiop->uio_resid > 0);
 995 
 996 bottom:
 997         /* undo adjustment of resid */
 998         if (error) {
 999                 uiop->uio_resid = last_resid + past_limit;
1000                 uiop->uio_loffset = last_off;
1001         } else {
1002                 uiop->uio_resid += past_limit;
1003         }
1004 
1005         return (error);
1006 }
1007 
1008 /*
1009  * Like nfs_client.c: writerp()
1010  *
1011  * Write by creating pages and uiomove data onto them.
1012  */
1013 
1014 int
1015 smbfs_writenp(smbnode_t *np, caddr_t base, int tcount, struct uio *uio,
1016     int pgcreated)
1017 {
1018         int             pagecreate;
1019         int             n;
1020         int             saved_n;
1021         caddr_t         saved_base;
1022         u_offset_t      offset;
1023         int             error;
1024         int             sm_error;
1025         vnode_t         *vp = SMBTOV(np);
1026 
1027         ASSERT(tcount <= MAXBSIZE && tcount <= uio->uio_resid);
1028         ASSERT(smbfs_rw_lock_held(&np->r_rwlock, RW_WRITER));
1029         if (!vpm_enable) {
1030                 ASSERT(((uintptr_t)base & MAXBOFFSET) + tcount <= MAXBSIZE);
1031         }
1032 
1033         /*
1034          * Move bytes in at most PAGESIZE chunks. We must avoid
1035          * spanning pages in uiomove() because page faults may cause
1036          * the cache to be invalidated out from under us. The r_size is not
1037          * updated until after the uiomove. If we push the last page of a
1038          * file before r_size is correct, we will lose the data written past
1039          * the current (and invalid) r_size.
1040          */
1041         do {
1042                 offset = uio->uio_loffset;
1043                 pagecreate = 0;
1044 
1045                 /*
1046                  * n is the number of bytes required to satisfy the request
1047                  *   or the number of bytes to fill out the page.
1048                  */
1049                 n = (int)MIN((PAGESIZE - (offset & PAGEOFFSET)), tcount);
1050 
1051                 /*
1052                  * Check to see if we can skip reading in the page
1053                  * and just allocate the memory.  We can do this
1054                  * if we are going to rewrite the entire mapping
1055                  * or if we are going to write to or beyond the current
1056                  * end of file from the beginning of the mapping.
1057                  *
1058                  * The read of r_size is now protected by r_statelock.
1059                  */
1060                 mutex_enter(&np->r_statelock);
1061                 /*
1062                  * When pgcreated is nonzero the caller has already done
1063                  * a segmap_getmapflt with forcefault 0 and S_WRITE. With
1064                  * segkpm this means we already have at least one page
1065                  * created and mapped at base.
1066                  */
1067                 pagecreate = pgcreated ||
1068                     ((offset & PAGEOFFSET) == 0 &&
1069                     (n == PAGESIZE || ((offset + n) >= np->r_size)));
1070 
1071                 mutex_exit(&np->r_statelock);
1072                 if (!vpm_enable && pagecreate) {
1073                         /*
1074                          * The last argument tells segmap_pagecreate() to
1075                          * always lock the page, as opposed to sometimes
1076                          * returning with the page locked. This way we avoid a
1077                          * fault on the ensuing uiomove(), but also
1078                          * more importantly (to fix bug 1094402) we can
1079                          * call segmap_fault() to unlock the page in all
1080                          * cases. An alternative would be to modify
1081                          * segmap_pagecreate() to tell us when it is
1082                          * locking a page, but that's a fairly major
1083                          * interface change.
1084                          */
1085                         if (pgcreated == 0)
1086                                 (void) segmap_pagecreate(segkmap, base,
1087                                     (uint_t)n, 1);
1088                         saved_base = base;
1089                         saved_n = n;
1090                 }
1091 
1092                 /*
1093                  * The number of bytes of data in the last page can not
1094                  * be accurately be determined while page is being
1095                  * uiomove'd to and the size of the file being updated.
1096                  * Thus, inform threads which need to know accurately
1097                  * how much data is in the last page of the file.  They
1098                  * will not do the i/o immediately, but will arrange for
1099                  * the i/o to happen later when this modify operation
1100                  * will have finished.
1101                  */
1102                 ASSERT(!(np->r_flags & RMODINPROGRESS));
1103                 mutex_enter(&np->r_statelock);
1104                 np->r_flags |= RMODINPROGRESS;
1105                 np->r_modaddr = (offset & MAXBMASK);
1106                 mutex_exit(&np->r_statelock);
1107 
1108                 if (vpm_enable) {
1109                         /*
1110                          * Copy data. If new pages are created, part of
1111                          * the page that is not written will be initizliazed
1112                          * with zeros.
1113                          */
1114                         error = vpm_data_copy(vp, offset, n, uio,
1115                             !pagecreate, NULL, 0, S_WRITE);
1116                 } else {
1117                         error = uiomove(base, n, UIO_WRITE, uio);
1118                 }
1119 
1120                 /*
1121                  * r_size is the maximum number of
1122                  * bytes known to be in the file.
1123                  * Make sure it is at least as high as the
1124                  * first unwritten byte pointed to by uio_loffset.
1125                  */
1126                 mutex_enter(&np->r_statelock);
1127                 if (np->r_size < uio->uio_loffset)
1128                         np->r_size = uio->uio_loffset;
1129                 np->r_flags &= ~RMODINPROGRESS;
1130                 np->r_flags |= RDIRTY;
1131                 mutex_exit(&np->r_statelock);
1132 
1133                 /* n = # of bytes written */
1134                 n = (int)(uio->uio_loffset - offset);
1135 
1136                 if (!vpm_enable) {
1137                         base += n;
1138                 }
1139                 tcount -= n;
1140                 /*
1141                  * If we created pages w/o initializing them completely,
1142                  * we need to zero the part that wasn't set up.
1143                  * This happens on a most EOF write cases and if
1144                  * we had some sort of error during the uiomove.
1145                  */
1146                 if (!vpm_enable && pagecreate) {
1147                         if ((uio->uio_loffset & PAGEOFFSET) || n == 0)
1148                                 (void) kzero(base, PAGESIZE - n);
1149 
1150                         if (pgcreated) {
1151                                 /*
1152                                  * Caller is responsible for this page,
1153                                  * it was not created in this loop.
1154                                  */
1155                                 pgcreated = 0;
1156                         } else {
1157                                 /*
1158                                  * For bug 1094402: segmap_pagecreate locks
1159                                  * page. Unlock it. This also unlocks the
1160                                  * pages allocated by page_create_va() in
1161                                  * segmap_pagecreate().
1162                                  */
1163                                 sm_error = segmap_fault(kas.a_hat, segkmap,
1164                                     saved_base, saved_n,
1165                                     F_SOFTUNLOCK, S_WRITE);
1166                                 if (error == 0)
1167                                         error = sm_error;
1168                         }
1169                 }
1170         } while (tcount > 0 && error == 0);
1171 
1172         return (error);
1173 }
1174 
1175 /*
1176  * Flags are composed of {B_ASYNC, B_INVAL, B_FREE, B_DONTNEED}
1177  * Like nfs3_rdwrlbn()
1178  */
1179 static int
1180 smbfs_rdwrlbn(vnode_t *vp, page_t *pp, u_offset_t off, size_t len,
1181         int flags, cred_t *cr)
1182 {
1183         smbmntinfo_t    *smi = VTOSMI(vp);
1184         struct buf *bp;
1185         int error;
1186         int sync;
1187 
1188         if (curproc->p_zone != smi->smi_zone_ref.zref_zone)
1189                 return (EIO);
1190 
1191         if (smi->smi_flags & SMI_DEAD || vp->v_vfsp->vfs_flag & VFS_UNMOUNTED)
1192                 return (EIO);
1193 
1194         bp = pageio_setup(pp, len, vp, flags);
1195         ASSERT(bp != NULL);
1196 
1197         /*
1198          * pageio_setup should have set b_addr to 0.  This
1199          * is correct since we want to do I/O on a page
1200          * boundary.  bp_mapin will use this addr to calculate
1201          * an offset, and then set b_addr to the kernel virtual
1202          * address it allocated for us.
1203          */
1204         ASSERT(bp->b_un.b_addr == 0);
1205 
1206         bp->b_edev = 0;
1207         bp->b_dev = 0;
1208         bp->b_lblkno = lbtodb(off);
1209         bp->b_file = vp;
1210         bp->b_offset = (offset_t)off;
1211         bp_mapin(bp);
1212 
1213         /*
1214          * Calculate the desired level of stability to write data.
1215          */
1216         if ((flags & (B_WRITE|B_ASYNC)) == (B_WRITE|B_ASYNC) &&
1217             freemem > desfree) {
1218                 sync = 0;
1219         } else {
1220                 sync = 1;
1221         }
1222 
1223         error = smbfs_bio(bp, sync, cr);
1224 
1225         bp_mapout(bp);
1226         pageio_done(bp);
1227 
1228         return (error);
1229 }
1230 
1231 
1232 /*
1233  * Corresponds to nfs3_vnopc.c : nfs3_bio(), though the NFS code
1234  * uses nfs3read()/nfs3write() where we use smb_rwuio().  Also,
1235  * NFS has this later in the file.  Move it up here closer to
1236  * the one call site just above.
1237  */
1238 
1239 static int
1240 smbfs_bio(struct buf *bp, int sync, cred_t *cr)
1241 {
1242         struct iovec aiov[1];
1243         struct uio  auio;
1244         struct smb_cred scred;
1245         smbnode_t *np = VTOSMB(bp->b_vp);
1246         smbmntinfo_t *smi = np->n_mount;
1247         smb_share_t *ssp = smi->smi_share;
1248         offset_t offset;
1249         offset_t endoff;
1250         size_t count;
1251         size_t past_eof;
1252         int error;
1253 
1254         ASSERT(curproc->p_zone == smi->smi_zone_ref.zref_zone);
1255 
1256         offset = ldbtob(bp->b_lblkno);
1257         count = bp->b_bcount;
1258         endoff = offset + count;
1259         if (offset < 0 || endoff < 0)
1260                 return (EINVAL);
1261 
1262         /*
1263          * Limit file I/O to the remaining file size, but see
1264          * the notes in smbfs_getpage about SMBFS_EOF.
1265          */
1266         mutex_enter(&np->r_statelock);
1267         if (offset >= np->r_size) {
1268                 mutex_exit(&np->r_statelock);
1269                 if (bp->b_flags & B_READ) {
1270                         return (SMBFS_EOF);
1271                 } else {
1272                         return (EINVAL);
1273                 }
1274         }
1275         if (endoff > np->r_size) {
1276                 past_eof = (size_t)(endoff - np->r_size);
1277                 count -= past_eof;
1278         } else
1279                 past_eof = 0;
1280         mutex_exit(&np->r_statelock);
1281         ASSERT(count > 0);
1282 
1283         /* Caller did bpmapin().  Mapped address is... */
1284         aiov[0].iov_base = bp->b_un.b_addr;
1285         aiov[0].iov_len = count;
1286         auio.uio_iov = aiov;
1287         auio.uio_iovcnt = 1;
1288         auio.uio_loffset = offset;
1289         auio.uio_segflg = UIO_SYSSPACE;
1290         auio.uio_fmode = 0;
1291         auio.uio_resid = count;
1292 
1293         /* Shared lock for n_fid use in smb_rwuio */
1294         if (smbfs_rw_enter_sig(&np->r_lkserlock, RW_READER,
1295             smi->smi_flags & SMI_INT))
1296                 return (EINTR);
1297         smb_credinit(&scred, cr);
1298 
1299         DTRACE_IO1(start, struct buf *, bp);
1300 
1301         if (bp->b_flags & B_READ) {
1302 
1303                 /* After reconnect, n_fid is invalid */
1304                 if (np->n_vcgenid != ssp->ss_vcgenid)
1305                         error = ESTALE;
1306                 else
1307                         error = smb_rwuio(ssp, np->n_fid, UIO_READ,
1308                             &auio, &scred, smb_timo_read);
1309 
1310                 /* Like NFS, only set b_error here. */
1311                 bp->b_error = error;
1312                 bp->b_resid = auio.uio_resid;
1313 
1314                 if (!error && auio.uio_resid != 0)
1315                         error = EIO;
1316                 if (!error && past_eof != 0) {
1317                         /* Zero the memory beyond EOF. */
1318                         bzero(bp->b_un.b_addr + count, past_eof);
1319                 }
1320         } else {
1321 
1322                 /* After reconnect, n_fid is invalid */
1323                 if (np->n_vcgenid != ssp->ss_vcgenid)
1324                         error = ESTALE;
1325                 else
1326                         error = smb_rwuio(ssp, np->n_fid, UIO_WRITE,
1327                             &auio, &scred, smb_timo_write);
1328 
1329                 /* Like NFS, only set b_error here. */
1330                 bp->b_error = error;
1331                 bp->b_resid = auio.uio_resid;
1332 
1333                 if (!error && auio.uio_resid != 0)
1334                         error = EIO;
1335                 if (!error && sync) {
1336                         (void) smbfs_smb_flush(np, &scred);
1337                 }
1338         }
1339 
1340         /*
1341          * This comes from nfs3_commit()
1342          */
1343         if (error != 0) {
1344                 mutex_enter(&np->r_statelock);
1345                 if (error == ESTALE)
1346                         np->r_flags |= RSTALE;
1347                 if (!np->r_error)
1348                         np->r_error = error;
1349                 mutex_exit(&np->r_statelock);
1350                 bp->b_flags |= B_ERROR;
1351         }
1352 
1353         DTRACE_IO1(done, struct buf *, bp);
1354 
1355         smb_credrele(&scred);
1356         smbfs_rw_exit(&np->r_lkserlock);
1357 
1358         if (error == ESTALE)
1359                 smbfs_attrcache_remove(np);
1360 
1361         return (error);
1362 }
1363 
1364 /*
1365  * Here NFS has: nfs3write, nfs3read
1366  * We use smb_rwuio instead.
1367  */
1368 
1369 /* ARGSUSED */
1370 static int
1371 smbfs_ioctl(vnode_t *vp, int cmd, intptr_t arg, int flag,
1372         cred_t *cr, int *rvalp, caller_context_t *ct)
1373 {
1374         int             error;
1375         smbmntinfo_t    *smi;
1376 
1377         smi = VTOSMI(vp);
1378 
1379         if (curproc->p_zone != smi->smi_zone_ref.zref_zone)
1380                 return (EIO);
1381 
1382         if (smi->smi_flags & SMI_DEAD || vp->v_vfsp->vfs_flag & VFS_UNMOUNTED)
1383                 return (EIO);
1384 
1385         switch (cmd) {
1386 
1387         case _FIOFFS:
1388                 error = smbfs_fsync(vp, 0, cr, ct);
1389                 break;
1390 
1391                 /*
1392                  * The following two ioctls are used by bfu.
1393                  * Silently ignore to avoid bfu errors.
1394                  */
1395         case _FIOGDIO:
1396         case _FIOSDIO:
1397                 error = 0;
1398                 break;
1399 
1400 #if 0   /* Todo - SMB ioctl query regions */
1401         case _FIO_SEEK_DATA:
1402         case _FIO_SEEK_HOLE:
1403 #endif
1404 
1405         case _FIODIRECTIO:
1406                 error = smbfs_directio(vp, (int)arg, cr);
1407                 break;
1408 
1409                 /*
1410                  * Allow get/set with "raw" security descriptor (SD) data.
1411                  * Useful for testing, diagnosing idmap problems, etc.
1412                  */
1413         case SMBFSIO_GETSD:
1414                 error = smbfs_acl_iocget(vp, arg, flag, cr);
1415                 break;
1416 
1417         case SMBFSIO_SETSD:
1418                 error = smbfs_acl_iocset(vp, arg, flag, cr);
1419                 break;
1420 
1421         default:
1422                 error = ENOTTY;
1423                 break;
1424         }
1425 
1426         return (error);
1427 }
1428 
1429 
1430 /*
1431  * Return either cached or remote attributes. If get remote attr
1432  * use them to check and invalidate caches, then cache the new attributes.
1433  */
1434 /* ARGSUSED */
1435 static int
1436 smbfs_getattr(vnode_t *vp, struct vattr *vap, int flags, cred_t *cr,
1437         caller_context_t *ct)
1438 {
1439         smbnode_t *np;
1440         smbmntinfo_t *smi;
1441         int error;
1442 
1443         smi = VTOSMI(vp);
1444 
1445         if (curproc->p_zone != smi->smi_zone_ref.zref_zone)
1446                 return (EIO);
1447 
1448         if (smi->smi_flags & SMI_DEAD || vp->v_vfsp->vfs_flag & VFS_UNMOUNTED)
1449                 return (EIO);
1450 
1451         /*
1452          * If it has been specified that the return value will
1453          * just be used as a hint, and we are only being asked
1454          * for size, fsid or rdevid, then return the client's
1455          * notion of these values without checking to make sure
1456          * that the attribute cache is up to date.
1457          * The whole point is to avoid an over the wire GETATTR
1458          * call.
1459          */
1460         np = VTOSMB(vp);
1461         if (flags & ATTR_HINT) {
1462                 if (vap->va_mask ==
1463                     (vap->va_mask & (AT_SIZE | AT_FSID | AT_RDEV))) {
1464                         mutex_enter(&np->r_statelock);
1465                         if (vap->va_mask | AT_SIZE)
1466                                 vap->va_size = np->r_size;
1467                         if (vap->va_mask | AT_FSID)
1468                                 vap->va_fsid = vp->v_vfsp->vfs_dev;
1469                         if (vap->va_mask | AT_RDEV)
1470                                 vap->va_rdev = vp->v_rdev;
1471                         mutex_exit(&np->r_statelock);
1472                         return (0);
1473                 }
1474         }
1475 
1476         /*
1477          * Only need to flush pages if asking for the mtime
1478          * and if there any dirty pages.
1479          *
1480          * Here NFS also checks for async writes (np->r_awcount)
1481          */
1482         if (vap->va_mask & AT_MTIME) {
1483                 if (vn_has_cached_data(vp) &&
1484                     ((np->r_flags & RDIRTY) != 0)) {
1485                         mutex_enter(&np->r_statelock);
1486                         np->r_gcount++;
1487                         mutex_exit(&np->r_statelock);
1488                         error = smbfs_putpage(vp, (offset_t)0, 0, 0, cr, ct);
1489                         mutex_enter(&np->r_statelock);
1490                         if (error && (error == ENOSPC || error == EDQUOT)) {
1491                                 if (!np->r_error)
1492                                         np->r_error = error;
1493                         }
1494                         if (--np->r_gcount == 0)
1495                                 cv_broadcast(&np->r_cv);
1496                         mutex_exit(&np->r_statelock);
1497                 }
1498         }
1499 
1500         return (smbfsgetattr(vp, vap, cr));
1501 }
1502 
1503 /* smbfsgetattr() in smbfs_client.c */
1504 
1505 /*ARGSUSED4*/
1506 static int
1507 smbfs_setattr(vnode_t *vp, struct vattr *vap, int flags, cred_t *cr,
1508                 caller_context_t *ct)
1509 {
1510         vfs_t           *vfsp;
1511         smbmntinfo_t    *smi;
1512         int             error;
1513         uint_t          mask;
1514         struct vattr    oldva;
1515 
1516         vfsp = vp->v_vfsp;
1517         smi = VFTOSMI(vfsp);
1518 
1519         if (curproc->p_zone != smi->smi_zone_ref.zref_zone)
1520                 return (EIO);
1521 
1522         if (smi->smi_flags & SMI_DEAD || vfsp->vfs_flag & VFS_UNMOUNTED)
1523                 return (EIO);
1524 
1525         mask = vap->va_mask;
1526         if (mask & AT_NOSET)
1527                 return (EINVAL);
1528 
1529         if (vfsp->vfs_flag & VFS_RDONLY)
1530                 return (EROFS);
1531 
1532         /*
1533          * This is a _local_ access check so that only the owner of
1534          * this mount can set attributes.  With ACLs enabled, the
1535          * file owner can be different from the mount owner, and we
1536          * need to check the _mount_ owner here.  See _access_rwx
1537          */
1538         bzero(&oldva, sizeof (oldva));
1539         oldva.va_mask = AT_TYPE | AT_MODE;
1540         error = smbfsgetattr(vp, &oldva, cr);
1541         if (error)
1542                 return (error);
1543         oldva.va_mask |= AT_UID | AT_GID;
1544         oldva.va_uid = smi->smi_uid;
1545         oldva.va_gid = smi->smi_gid;
1546 
1547         error = secpolicy_vnode_setattr(cr, vp, vap, &oldva, flags,
1548             smbfs_accessx, vp);
1549         if (error)
1550                 return (error);
1551 
1552         if (mask & (AT_UID | AT_GID)) {
1553                 if (smi->smi_flags & SMI_ACL)
1554                         error = smbfs_acl_setids(vp, vap, cr);
1555                 else
1556                         error = ENOSYS;
1557                 if (error != 0) {
1558                         SMBVDEBUG("error %d seting UID/GID on %s",
1559                             error, VTOSMB(vp)->n_rpath);
1560                         /*
1561                          * It might be more correct to return the
1562                          * error here, but that causes complaints
1563                          * when root extracts a cpio archive, etc.
1564                          * So ignore this error, and go ahead with
1565                          * the rest of the setattr work.
1566                          */
1567                 }
1568         }
1569 
1570         error = smbfssetattr(vp, vap, flags, cr);
1571 
1572 #ifdef  SMBFS_VNEVENT
1573         if (error == 0 && (vap->va_mask & AT_SIZE) && vap->va_size == 0)
1574                 vnevent_truncate(vp, ct);
1575 #endif
1576 
1577         return (error);
1578 }
1579 
1580 /*
1581  * Mostly from Darwin smbfs_setattr()
1582  * but then modified a lot.
1583  */
1584 /* ARGSUSED */
1585 static int
1586 smbfssetattr(vnode_t *vp, struct vattr *vap, int flags, cred_t *cr)
1587 {
1588         int             error = 0;
1589         smbnode_t       *np = VTOSMB(vp);
1590         uint_t          mask = vap->va_mask;
1591         struct timespec *mtime, *atime;
1592         struct smb_cred scred;
1593         int             cerror, modified = 0;
1594         unsigned short  fid;
1595         int have_fid = 0;
1596         uint32_t rights = 0;
1597         uint32_t dosattr = 0;
1598 
1599         ASSERT(curproc->p_zone == VTOSMI(vp)->smi_zone_ref.zref_zone);
1600 
1601         /*
1602          * There are no settable attributes on the XATTR dir,
1603          * so just silently ignore these.  On XATTR files,
1604          * you can set the size but nothing else.
1605          */
1606         if (vp->v_flag & V_XATTRDIR)
1607                 return (0);
1608         if (np->n_flag & N_XATTR) {
1609                 if (mask & AT_TIMES)
1610                         SMBVDEBUG("ignore set time on xattr\n");
1611                 mask &= AT_SIZE;
1612         }
1613 
1614         /*
1615          * Only need to flush pages if there are any pages and
1616          * if the file is marked as dirty in some fashion.  The
1617          * file must be flushed so that we can accurately
1618          * determine the size of the file and the cached data
1619          * after the SETATTR returns.  A file is considered to
1620          * be dirty if it is either marked with RDIRTY, has
1621          * outstanding i/o's active, or is mmap'd.  In this
1622          * last case, we can't tell whether there are dirty
1623          * pages, so we flush just to be sure.
1624          */
1625         if (vn_has_cached_data(vp) &&
1626             ((np->r_flags & RDIRTY) ||
1627             np->r_count > 0 ||
1628             np->r_mapcnt > 0)) {
1629                 ASSERT(vp->v_type != VCHR);
1630                 error = smbfs_putpage(vp, (offset_t)0, 0, 0, cr, NULL);
1631                 if (error && (error == ENOSPC || error == EDQUOT)) {
1632                         mutex_enter(&np->r_statelock);
1633                         if (!np->r_error)
1634                                 np->r_error = error;
1635                         mutex_exit(&np->r_statelock);
1636                 }
1637         }
1638 
1639         /*
1640          * If our caller is trying to set multiple attributes, they
1641          * can make no assumption about what order they are done in.
1642          * Here we try to do them in order of decreasing likelihood
1643          * of failure, just to minimize the chance we'll wind up
1644          * with a partially complete request.
1645          */
1646 
1647         /* Shared lock for (possible) n_fid use. */
1648         if (smbfs_rw_enter_sig(&np->r_lkserlock, RW_READER, SMBINTR(vp)))
1649                 return (EINTR);
1650         smb_credinit(&scred, cr);
1651 
1652         /*
1653          * If the caller has provided extensible attributes,
1654          * map those into DOS attributes supported by SMB.
1655          * Note: zero means "no change".
1656          */
1657         if (mask & AT_XVATTR)
1658                 dosattr = xvattr_to_dosattr(np, vap);
1659 
1660         /*
1661          * Will we need an open handle for this setattr?
1662          * If so, what rights will we need?
1663          */
1664         if (dosattr || (mask & (AT_ATIME | AT_MTIME))) {
1665                 rights |=
1666                     SA_RIGHT_FILE_WRITE_ATTRIBUTES;
1667         }
1668         if (mask & AT_SIZE) {
1669                 rights |=
1670                     SA_RIGHT_FILE_WRITE_DATA |
1671                     SA_RIGHT_FILE_APPEND_DATA;
1672         }
1673 
1674         /*
1675          * Only SIZE really requires a handle, but it's
1676          * simpler and more reliable to set via a handle.
1677          * Some servers like NT4 won't set times by path.
1678          * Also, we're usually setting everything anyway.
1679          */
1680         if (rights != 0) {
1681                 error = smbfs_smb_tmpopen(np, rights, &scred, &fid);
1682                 if (error) {
1683                         SMBVDEBUG("error %d opening %s\n",
1684                             error, np->n_rpath);
1685                         goto out;
1686                 }
1687                 have_fid = 1;
1688         }
1689 
1690         /*
1691          * If the server supports the UNIX extensions, right here is where
1692          * we'd support changes to uid, gid, mode, and possibly va_flags.
1693          * For now we claim to have made any such changes.
1694          */
1695 
1696         if (mask & AT_SIZE) {
1697                 /*
1698                  * If the new file size is less than what the client sees as
1699                  * the file size, then just change the size and invalidate
1700                  * the pages.
1701                  */
1702 
1703                 /*
1704                  * Set the file size to vap->va_size.
1705                  */
1706                 ASSERT(have_fid);
1707                 error = smbfs_smb_setfsize(np, fid, vap->va_size, &scred);
1708                 if (error) {
1709                         SMBVDEBUG("setsize error %d file %s\n",
1710                             error, np->n_rpath);
1711                 } else {
1712                         /*
1713                          * Darwin had code here to zero-extend.
1714                          * Tests indicate the server will zero-fill,
1715                          * so looks like we don't need to do that.
1716                          */
1717                         mutex_enter(&np->r_statelock);
1718                         np->r_size = vap->va_size;
1719                         mutex_exit(&np->r_statelock);
1720                         modified = 1;
1721                 }
1722         }
1723 
1724         /*
1725          * Todo: Implement setting create_time (which is
1726          * different from ctime).
1727          */
1728         mtime = ((mask & AT_MTIME) ? &vap->va_mtime : 0);
1729         atime = ((mask & AT_ATIME) ? &vap->va_atime : 0);
1730 
1731         if (dosattr || mtime || atime) {
1732                 /*
1733                  * Always use the handle-based set attr call now.
1734                  */
1735                 ASSERT(have_fid);
1736                 error = smbfs_smb_setfattr(np, fid,
1737                     dosattr, mtime, atime, &scred);
1738                 if (error) {
1739                         SMBVDEBUG("set times error %d file %s\n",
1740                             error, np->n_rpath);
1741                 } else {
1742                         modified = 1;
1743                 }
1744         }
1745 
1746 out:
1747         if (have_fid) {
1748                 cerror = smbfs_smb_tmpclose(np, fid, &scred);
1749                 if (cerror)
1750                         SMBVDEBUG("error %d closing %s\n",
1751                             cerror, np->n_rpath);
1752         }
1753 
1754         smb_credrele(&scred);
1755         smbfs_rw_exit(&np->r_lkserlock);
1756 
1757         if (modified) {
1758                 /*
1759                  * Invalidate attribute cache in case the server
1760                  * doesn't set exactly the attributes we asked.
1761                  */
1762                 smbfs_attrcache_remove(np);
1763 
1764                 /*
1765                  * If changing the size of the file, invalidate
1766                  * any local cached data which is no longer part
1767                  * of the file.  We also possibly invalidate the
1768                  * last page in the file.  We could use
1769                  * pvn_vpzero(), but this would mark the page as
1770                  * modified and require it to be written back to
1771                  * the server for no particularly good reason.
1772                  * This way, if we access it, then we bring it
1773                  * back in.  A read should be cheaper than a
1774                  * write.
1775                  */
1776                 if (mask & AT_SIZE) {
1777                         smbfs_invalidate_pages(vp,
1778                             (vap->va_size & PAGEMASK), cr);
1779                 }
1780         }
1781 
1782         return (error);
1783 }
1784 
1785 /*
1786  * Helper function for extensible system attributes (PSARC 2007/315)
1787  * Compute the DOS attribute word to pass to _setfattr (see above).
1788  * This returns zero IFF no change is being made to attributes.
1789  * Otherwise return the new attributes or SMB_EFA_NORMAL.
1790  */
1791 static uint32_t
1792 xvattr_to_dosattr(smbnode_t *np, struct vattr *vap)
1793 {
1794         xvattr_t *xvap = (xvattr_t *)vap;
1795         xoptattr_t *xoap = NULL;
1796         uint32_t attr = np->r_attr.fa_attr;
1797         boolean_t anyset = B_FALSE;
1798 
1799         if ((xoap = xva_getxoptattr(xvap)) == NULL)
1800                 return (0);
1801 
1802         if (XVA_ISSET_REQ(xvap, XAT_ARCHIVE)) {
1803                 if (xoap->xoa_archive)
1804                         attr |= SMB_FA_ARCHIVE;
1805                 else
1806                         attr &= ~SMB_FA_ARCHIVE;
1807                 XVA_SET_RTN(xvap, XAT_ARCHIVE);
1808                 anyset = B_TRUE;
1809         }
1810         if (XVA_ISSET_REQ(xvap, XAT_SYSTEM)) {
1811                 if (xoap->xoa_system)
1812                         attr |= SMB_FA_SYSTEM;
1813                 else
1814                         attr &= ~SMB_FA_SYSTEM;
1815                 XVA_SET_RTN(xvap, XAT_SYSTEM);
1816                 anyset = B_TRUE;
1817         }
1818         if (XVA_ISSET_REQ(xvap, XAT_READONLY)) {
1819                 if (xoap->xoa_readonly)
1820                         attr |= SMB_FA_RDONLY;
1821                 else
1822                         attr &= ~SMB_FA_RDONLY;
1823                 XVA_SET_RTN(xvap, XAT_READONLY);
1824                 anyset = B_TRUE;
1825         }
1826         if (XVA_ISSET_REQ(xvap, XAT_HIDDEN)) {
1827                 if (xoap->xoa_hidden)
1828                         attr |= SMB_FA_HIDDEN;
1829                 else
1830                         attr &= ~SMB_FA_HIDDEN;
1831                 XVA_SET_RTN(xvap, XAT_HIDDEN);
1832                 anyset = B_TRUE;
1833         }
1834 
1835         if (anyset == B_FALSE)
1836                 return (0);     /* no change */
1837         if (attr == 0)
1838                 attr = SMB_EFA_NORMAL;
1839 
1840         return (attr);
1841 }
1842 
1843 /*
1844  * smbfs_access_rwx()
1845  * Common function for smbfs_access, etc.
1846  *
1847  * The security model implemented by the FS is unusual
1848  * due to the current "single user mounts" restriction:
1849  * All access under a given mount point uses the CIFS
1850  * credentials established by the owner of the mount.
1851  *
1852  * Most access checking is handled by the CIFS server,
1853  * but we need sufficient Unix access checks here to
1854  * prevent other local Unix users from having access
1855  * to objects under this mount that the uid/gid/mode
1856  * settings in the mount would not allow.
1857  *
1858  * With this model, there is a case where we need the
1859  * ability to do an access check before we have the
1860  * vnode for an object.  This function takes advantage
1861  * of the fact that the uid/gid/mode is per mount, and
1862  * avoids the need for a vnode.
1863  *
1864  * We still (sort of) need a vnode when we call
1865  * secpolicy_vnode_access, but that only uses
1866  * the vtype field, so we can use a pair of fake
1867  * vnodes that have only v_type filled in.
1868  */
1869 static int
1870 smbfs_access_rwx(vfs_t *vfsp, int vtype, int mode, cred_t *cr)
1871 {
1872         /* See the secpolicy call below. */
1873         static const vnode_t tmpl_vdir = { .v_type = VDIR };
1874         static const vnode_t tmpl_vreg = { .v_type = VREG };
1875         vattr_t         va;
1876         vnode_t         *tvp;
1877         struct smbmntinfo *smi = VFTOSMI(vfsp);
1878         int shift = 0;
1879 
1880         /*
1881          * Build our (fabricated) vnode attributes.
1882          */
1883         bzero(&va, sizeof (va));
1884         va.va_mask = AT_TYPE | AT_MODE | AT_UID | AT_GID;
1885         va.va_type = vtype;
1886         va.va_mode = (vtype == VDIR) ?
1887             smi->smi_dmode : smi->smi_fmode;
1888         va.va_uid = smi->smi_uid;
1889         va.va_gid = smi->smi_gid;
1890 
1891         /*
1892          * Disallow write attempts on read-only file systems,
1893          * unless the file is a device or fifo node.  Note:
1894          * Inline vn_is_readonly and IS_DEVVP here because
1895          * we may not have a vnode ptr.  Original expr. was:
1896          * (mode & VWRITE) && vn_is_readonly(vp) && !IS_DEVVP(vp))
1897          */
1898         if ((mode & VWRITE) &&
1899             (vfsp->vfs_flag & VFS_RDONLY) &&
1900             !(vtype == VCHR || vtype == VBLK || vtype == VFIFO))
1901                 return (EROFS);
1902 
1903         /*
1904          * Disallow attempts to access mandatory lock files.
1905          * Similarly, expand MANDLOCK here.
1906          */
1907         if ((mode & (VWRITE | VREAD | VEXEC)) &&
1908             va.va_type == VREG && MANDMODE(va.va_mode))
1909                 return (EACCES);
1910 
1911         /*
1912          * Access check is based on only
1913          * one of owner, group, public.
1914          * If not owner, then check group.
1915          * If not a member of the group,
1916          * then check public access.
1917          */
1918         if (crgetuid(cr) != va.va_uid) {
1919                 shift += 3;
1920                 if (!groupmember(va.va_gid, cr))
1921                         shift += 3;
1922         }
1923 
1924         /*
1925          * We need a vnode for secpolicy_vnode_access,
1926          * but the only thing it looks at is v_type,
1927          * so pass one of the templates above.
1928          */
1929         tvp = (va.va_type == VDIR) ?
1930             (vnode_t *)&tmpl_vdir :
1931             (vnode_t *)&tmpl_vreg;
1932 
1933         return (secpolicy_vnode_access2(cr, tvp, va.va_uid,
1934             va.va_mode << shift, mode));
1935 }
1936 
1937 /*
1938  * See smbfs_setattr
1939  */
1940 static int
1941 smbfs_accessx(void *arg, int mode, cred_t *cr)
1942 {
1943         vnode_t *vp = arg;
1944         /*
1945          * Note: The caller has checked the current zone,
1946          * the SMI_DEAD and VFS_UNMOUNTED flags, etc.
1947          */
1948         return (smbfs_access_rwx(vp->v_vfsp, vp->v_type, mode, cr));
1949 }
1950 
1951 /*
1952  * XXX
1953  * This op should support PSARC 2007/403, Modified Access Checks for CIFS
1954  */
1955 /* ARGSUSED */
1956 static int
1957 smbfs_access(vnode_t *vp, int mode, int flags, cred_t *cr, caller_context_t *ct)
1958 {
1959         vfs_t           *vfsp;
1960         smbmntinfo_t    *smi;
1961 
1962         vfsp = vp->v_vfsp;
1963         smi = VFTOSMI(vfsp);
1964 
1965         if (curproc->p_zone != smi->smi_zone_ref.zref_zone)
1966                 return (EIO);
1967 
1968         if (smi->smi_flags & SMI_DEAD || vfsp->vfs_flag & VFS_UNMOUNTED)
1969                 return (EIO);
1970 
1971         return (smbfs_access_rwx(vfsp, vp->v_type, mode, cr));
1972 }
1973 
1974 
1975 /* ARGSUSED */
1976 static int
1977 smbfs_readlink(vnode_t *vp, struct uio *uiop, cred_t *cr, caller_context_t *ct)
1978 {
1979         /* Not yet... */
1980         return (ENOSYS);
1981 }
1982 
1983 
1984 /*
1985  * Flush local dirty pages to stable storage on the server.
1986  *
1987  * If FNODSYNC is specified, then there is nothing to do because
1988  * metadata changes are not cached on the client before being
1989  * sent to the server.
1990  */
1991 /* ARGSUSED */
1992 static int
1993 smbfs_fsync(vnode_t *vp, int syncflag, cred_t *cr, caller_context_t *ct)
1994 {
1995         int             error = 0;
1996         smbmntinfo_t    *smi;
1997         smbnode_t       *np;
1998         struct smb_cred scred;
1999 
2000         np = VTOSMB(vp);
2001         smi = VTOSMI(vp);
2002 
2003         if (curproc->p_zone != smi->smi_zone_ref.zref_zone)
2004                 return (EIO);
2005 
2006         if (smi->smi_flags & SMI_DEAD || vp->v_vfsp->vfs_flag & VFS_UNMOUNTED)
2007                 return (EIO);
2008 
2009         if ((syncflag & FNODSYNC) || IS_SWAPVP(vp))
2010                 return (0);
2011 
2012         if ((syncflag & (FSYNC|FDSYNC)) == 0)
2013                 return (0);
2014 
2015         error = smbfs_putpage(vp, (offset_t)0, 0, 0, cr, ct);
2016         if (error)
2017                 return (error);
2018 
2019         /* Shared lock for n_fid use in _flush */
2020         if (smbfs_rw_enter_sig(&np->r_lkserlock, RW_READER, SMBINTR(vp)))
2021                 return (EINTR);
2022         smb_credinit(&scred, cr);
2023 
2024         error = smbfs_smb_flush(np, &scred);
2025 
2026         smb_credrele(&scred);
2027         smbfs_rw_exit(&np->r_lkserlock);
2028 
2029         return (error);
2030 }
2031 
2032 /*
2033  * Last reference to vnode went away.
2034  */
2035 /* ARGSUSED */
2036 static void
2037 smbfs_inactive(vnode_t *vp, cred_t *cr, caller_context_t *ct)
2038 {
2039         struct smb_cred scred;
2040         smbnode_t       *np = VTOSMB(vp);
2041         int error;
2042 
2043         /*
2044          * Don't "bail out" for VFS_UNMOUNTED here,
2045          * as we want to do cleanup, etc.
2046          * See also pcfs_inactive
2047          */
2048 
2049         /*
2050          * If this is coming from the wrong zone, we let someone in the right
2051          * zone take care of it asynchronously.  We can get here due to
2052          * VN_RELE() being called from pageout() or fsflush().  This call may
2053          * potentially turn into an expensive no-op if, for instance, v_count
2054          * gets incremented in the meantime, but it's still correct.
2055          */
2056 
2057         /*
2058          * From NFS:rinactive()
2059          *
2060          * Before freeing anything, wait until all asynchronous
2061          * activity is done on this rnode.  This will allow all
2062          * asynchronous read ahead and write behind i/o's to
2063          * finish.
2064          */
2065         mutex_enter(&np->r_statelock);
2066         while (np->r_count > 0)
2067                 cv_wait(&np->r_cv, &np->r_statelock);
2068         mutex_exit(&np->r_statelock);
2069 
2070         /*
2071          * Flush and invalidate all pages associated with the vnode.
2072          */
2073         if (vn_has_cached_data(vp)) {
2074                 if ((np->r_flags & RDIRTY) && !np->r_error) {
2075                         error = smbfs_putpage(vp, (u_offset_t)0, 0, 0, cr, ct);
2076                         if (error && (error == ENOSPC || error == EDQUOT)) {
2077                                 mutex_enter(&np->r_statelock);
2078                                 if (!np->r_error)
2079                                         np->r_error = error;
2080                                 mutex_exit(&np->r_statelock);
2081                         }
2082                 }
2083                 smbfs_invalidate_pages(vp, (u_offset_t)0, cr);
2084         }
2085         /*
2086          * This vnode should have lost all cached data.
2087          */
2088         ASSERT(vn_has_cached_data(vp) == 0);
2089 
2090         /*
2091          * Defend against the possibility that higher-level callers
2092          * might not correctly balance open and close calls.  If we
2093          * get here with open references remaining, it means there
2094          * was a missing VOP_CLOSE somewhere.  If that happens, do
2095          * the close here so we don't "leak" FIDs on the server.
2096          *
2097          * Exclusive lock for modifying n_fid stuff.
2098          * Don't want this one ever interruptible.
2099          */
2100         (void) smbfs_rw_enter_sig(&np->r_lkserlock, RW_WRITER, 0);
2101         smb_credinit(&scred, cr);
2102 
2103         switch (np->n_ovtype) {
2104         case VNON:
2105                 /* not open (OK) */
2106                 break;
2107 
2108         case VDIR:
2109                 if (np->n_dirrefs == 0)
2110                         break;
2111                 SMBVDEBUG("open dir: refs %d path %s\n",
2112                     np->n_dirrefs, np->n_rpath);
2113                 /* Force last close. */
2114                 np->n_dirrefs = 1;
2115                 smbfs_rele_fid(np, &scred);
2116                 break;
2117 
2118         case VREG:
2119                 if (np->n_fidrefs == 0)
2120                         break;
2121                 SMBVDEBUG("open file: refs %d id 0x%x path %s\n",
2122                     np->n_fidrefs, np->n_fid, np->n_rpath);
2123                 /* Force last close. */
2124                 np->n_fidrefs = 1;
2125                 smbfs_rele_fid(np, &scred);
2126                 break;
2127 
2128         default:
2129                 SMBVDEBUG("bad n_ovtype %d\n", np->n_ovtype);
2130                 np->n_ovtype = VNON;
2131                 break;
2132         }
2133 
2134         smb_credrele(&scred);
2135         smbfs_rw_exit(&np->r_lkserlock);
2136 
2137         /*
2138          * XATTR directories (and the files under them) have
2139          * little value for reclaim, so just remove them from
2140          * the "hash" (AVL) as soon as they go inactive.
2141          * Note that the node may already have been removed
2142          * from the hash by smbfsremove.
2143          */
2144         if ((np->n_flag & N_XATTR) != 0 &&
2145             (np->r_flags & RHASHED) != 0)
2146                 smbfs_rmhash(np);
2147 
2148         smbfs_addfree(np);
2149 }
2150 
2151 /*
2152  * Remote file system operations having to do with directory manipulation.
2153  */
2154 /* ARGSUSED */
2155 static int
2156 smbfs_lookup(vnode_t *dvp, char *nm, vnode_t **vpp, struct pathname *pnp,
2157         int flags, vnode_t *rdir, cred_t *cr, caller_context_t *ct,
2158         int *direntflags, pathname_t *realpnp)
2159 {
2160         vfs_t           *vfs;
2161         smbmntinfo_t    *smi;
2162         smbnode_t       *dnp;
2163         int             error;
2164 
2165         vfs = dvp->v_vfsp;
2166         smi = VFTOSMI(vfs);
2167 
2168         if (curproc->p_zone != smi->smi_zone_ref.zref_zone)
2169                 return (EPERM);
2170 
2171         if (smi->smi_flags & SMI_DEAD || vfs->vfs_flag & VFS_UNMOUNTED)
2172                 return (EIO);
2173 
2174         dnp = VTOSMB(dvp);
2175 
2176         /*
2177          * Are we looking up extended attributes?  If so, "dvp" is
2178          * the file or directory for which we want attributes, and
2179          * we need a lookup of the (faked up) attribute directory
2180          * before we lookup the rest of the path.
2181          */
2182         if (flags & LOOKUP_XATTR) {
2183                 /*
2184                  * Require the xattr mount option.
2185                  */
2186                 if ((vfs->vfs_flag & VFS_XATTR) == 0)
2187                         return (EINVAL);
2188 
2189                 error = smbfs_get_xattrdir(dvp, vpp, cr, flags);
2190                 return (error);
2191         }
2192 
2193         if (smbfs_rw_enter_sig(&dnp->r_rwlock, RW_READER, SMBINTR(dvp)))
2194                 return (EINTR);
2195 
2196         error = smbfslookup(dvp, nm, vpp, cr, 1, ct);
2197 
2198         smbfs_rw_exit(&dnp->r_rwlock);
2199 
2200         return (error);
2201 }
2202 
2203 /* ARGSUSED */
2204 static int
2205 smbfslookup(vnode_t *dvp, char *nm, vnode_t **vpp, cred_t *cr,
2206         int cache_ok, caller_context_t *ct)
2207 {
2208         int             error;
2209         int             supplen; /* supported length */
2210         vnode_t         *vp;
2211         smbnode_t       *np;
2212         smbnode_t       *dnp;
2213         smbmntinfo_t    *smi;
2214         /* struct smb_vc        *vcp; */
2215         const char      *ill;
2216         const char      *name = (const char *)nm;
2217         int             nmlen = strlen(nm);
2218         int             rplen;
2219         struct smb_cred scred;
2220         struct smbfattr fa;
2221 
2222         smi = VTOSMI(dvp);
2223         dnp = VTOSMB(dvp);
2224 
2225         ASSERT(curproc->p_zone == smi->smi_zone_ref.zref_zone);
2226 
2227 #ifdef NOT_YET
2228         vcp = SSTOVC(smi->smi_share);
2229 
2230         /* XXX: Should compute this once and store it in smbmntinfo_t */
2231         supplen = (SMB_DIALECT(vcp) >= SMB_DIALECT_LANMAN2_0) ? 255 : 12;
2232 #else
2233         supplen = 255;
2234 #endif
2235 
2236         /*
2237          * RWlock must be held, either reader or writer.
2238          */
2239         ASSERT(dnp->r_rwlock.count != 0);
2240 
2241         /*
2242          * If lookup is for "", just return dvp.
2243          * No need to perform any access checks.
2244          */
2245         if (nmlen == 0) {
2246                 VN_HOLD(dvp);
2247                 *vpp = dvp;
2248                 return (0);
2249         }
2250 
2251         /*
2252          * Can't do lookups in non-directories.
2253          */
2254         if (dvp->v_type != VDIR)
2255                 return (ENOTDIR);
2256 
2257         /*
2258          * Need search permission in the directory.
2259          */
2260         error = smbfs_access(dvp, VEXEC, 0, cr, ct);
2261         if (error)
2262                 return (error);
2263 
2264         /*
2265          * If lookup is for ".", just return dvp.
2266          * Access check was done above.
2267          */
2268         if (nmlen == 1 && name[0] == '.') {
2269                 VN_HOLD(dvp);
2270                 *vpp = dvp;
2271                 return (0);
2272         }
2273 
2274         /*
2275          * Now some sanity checks on the name.
2276          * First check the length.
2277          */
2278         if (nmlen > supplen)
2279                 return (ENAMETOOLONG);
2280 
2281         /*
2282          * Avoid surprises with characters that are
2283          * illegal in Windows file names.
2284          * Todo: CATIA mappings?
2285          */
2286         ill = illegal_chars;
2287         if (dnp->n_flag & N_XATTR)
2288                 ill++; /* allow colon */
2289         if (strpbrk(nm, ill))
2290                 return (EINVAL);
2291 
2292         /*
2293          * Special handling for lookup of ".."
2294          *
2295          * We keep full pathnames (as seen on the server)
2296          * so we can just trim off the last component to
2297          * get the full pathname of the parent.  Note:
2298          * We don't actually copy and modify, but just
2299          * compute the trimmed length and pass that with
2300          * the current dir path (not null terminated).
2301          *
2302          * We don't go over-the-wire to get attributes
2303          * for ".." because we know it's a directory,
2304          * and we can just leave the rest "stale"
2305          * until someone does a getattr.
2306          */
2307         if (nmlen == 2 && name[0] == '.' && name[1] == '.') {
2308                 if (dvp->v_flag & VROOT) {
2309                         /*
2310                          * Already at the root.  This can happen
2311                          * with directory listings at the root,
2312                          * which lookup "." and ".." to get the
2313                          * inode numbers.  Let ".." be the same
2314                          * as "." in the FS root.
2315                          */
2316                         VN_HOLD(dvp);
2317                         *vpp = dvp;
2318                         return (0);
2319                 }
2320 
2321                 /*
2322                  * Special case for XATTR directory
2323                  */
2324                 if (dvp->v_flag & V_XATTRDIR) {
2325                         error = smbfs_xa_parent(dvp, vpp);
2326                         return (error);
2327                 }
2328 
2329                 /*
2330                  * Find the parent path length.
2331                  */
2332                 rplen = dnp->n_rplen;
2333                 ASSERT(rplen > 0);
2334                 while (--rplen >= 0) {
2335                         if (dnp->n_rpath[rplen] == '\\')
2336                                 break;
2337                 }
2338                 if (rplen <= 0) {
2339                         /* Found our way to the root. */
2340                         vp = SMBTOV(smi->smi_root);
2341                         VN_HOLD(vp);
2342                         *vpp = vp;
2343                         return (0);
2344                 }
2345                 np = smbfs_node_findcreate(smi,
2346                     dnp->n_rpath, rplen, NULL, 0, 0,
2347                     &smbfs_fattr0); /* force create */
2348                 ASSERT(np != NULL);
2349                 vp = SMBTOV(np);
2350                 vp->v_type = VDIR;
2351 
2352                 /* Success! */
2353                 *vpp = vp;
2354                 return (0);
2355         }
2356 
2357         /*
2358          * Normal lookup of a name under this directory.
2359          * Note we handled "", ".", ".." above.
2360          */
2361         if (cache_ok) {
2362                 /*
2363                  * The caller indicated that it's OK to use a
2364                  * cached result for this lookup, so try to
2365                  * reclaim a node from the smbfs node cache.
2366                  */
2367                 error = smbfslookup_cache(dvp, nm, nmlen, &vp, cr);
2368                 if (error)
2369                         return (error);
2370                 if (vp != NULL) {
2371                         /* hold taken in lookup_cache */
2372                         *vpp = vp;
2373                         return (0);
2374                 }
2375         }
2376 
2377         /*
2378          * OK, go over-the-wire to get the attributes,
2379          * then create the node.
2380          */
2381         smb_credinit(&scred, cr);
2382         /* Note: this can allocate a new "name" */
2383         error = smbfs_smb_lookup(dnp, &name, &nmlen, &fa, &scred);
2384         smb_credrele(&scred);
2385         if (error == ENOTDIR) {
2386                 /*
2387                  * Lookup failed because this directory was
2388                  * removed or renamed by another client.
2389                  * Remove any cached attributes under it.
2390                  */
2391                 smbfs_attrcache_remove(dnp);
2392                 smbfs_attrcache_prune(dnp);
2393         }
2394         if (error)
2395                 goto out;
2396 
2397         error = smbfs_nget(dvp, name, nmlen, &fa, &vp);
2398         if (error)
2399                 goto out;
2400 
2401         /* Success! */
2402         *vpp = vp;
2403 
2404 out:
2405         /* smbfs_smb_lookup may have allocated name. */
2406         if (name != nm)
2407                 smbfs_name_free(name, nmlen);
2408 
2409         return (error);
2410 }
2411 
2412 /*
2413  * smbfslookup_cache
2414  *
2415  * Try to reclaim a node from the smbfs node cache.
2416  * Some statistics for DEBUG.
2417  *
2418  * This mechanism lets us avoid many of the five (or more)
2419  * OtW lookup calls per file seen with "ls -l" if we search
2420  * the smbfs node cache for recently inactive(ated) nodes.
2421  */
2422 #ifdef DEBUG
2423 int smbfs_lookup_cache_calls = 0;
2424 int smbfs_lookup_cache_error = 0;
2425 int smbfs_lookup_cache_miss = 0;
2426 int smbfs_lookup_cache_stale = 0;
2427 int smbfs_lookup_cache_hits = 0;
2428 #endif /* DEBUG */
2429 
2430 /* ARGSUSED */
2431 static int
2432 smbfslookup_cache(vnode_t *dvp, char *nm, int nmlen,
2433         vnode_t **vpp, cred_t *cr)
2434 {
2435         struct vattr va;
2436         smbnode_t *dnp;
2437         smbnode_t *np;
2438         vnode_t *vp;
2439         int error;
2440         char sep;
2441 
2442         dnp = VTOSMB(dvp);
2443         *vpp = NULL;
2444 
2445 #ifdef DEBUG
2446         smbfs_lookup_cache_calls++;
2447 #endif
2448 
2449         /*
2450          * First make sure we can get attributes for the
2451          * directory.  Cached attributes are OK here.
2452          * If we removed or renamed the directory, this
2453          * will return ENOENT.  If someone else removed
2454          * this directory or file, we'll find out when we
2455          * try to open or get attributes.
2456          */
2457         va.va_mask = AT_TYPE | AT_MODE;
2458         error = smbfsgetattr(dvp, &va, cr);
2459         if (error) {
2460 #ifdef DEBUG
2461                 smbfs_lookup_cache_error++;
2462 #endif
2463                 return (error);
2464         }
2465 
2466         /*
2467          * Passing NULL smbfattr here so we will
2468          * just look, not create.
2469          */
2470         sep = SMBFS_DNP_SEP(dnp);
2471         np = smbfs_node_findcreate(dnp->n_mount,
2472             dnp->n_rpath, dnp->n_rplen,
2473             nm, nmlen, sep, NULL);
2474         if (np == NULL) {
2475 #ifdef DEBUG
2476                 smbfs_lookup_cache_miss++;
2477 #endif
2478                 return (0);
2479         }
2480 
2481         /*
2482          * Found it.  Attributes still valid?
2483          */
2484         vp = SMBTOV(np);
2485         if (np->r_attrtime <= gethrtime()) {
2486                 /* stale */
2487 #ifdef DEBUG
2488                 smbfs_lookup_cache_stale++;
2489 #endif
2490                 VN_RELE(vp);
2491                 return (0);
2492         }
2493 
2494         /*
2495          * Success!
2496          * Caller gets hold from smbfs_node_findcreate
2497          */
2498 #ifdef DEBUG
2499         smbfs_lookup_cache_hits++;
2500 #endif
2501         *vpp = vp;
2502         return (0);
2503 }
2504 
2505 
2506 /*
2507  * XXX
2508  * vsecattr_t is new to build 77, and we need to eventually support
2509  * it in order to create an ACL when an object is created.
2510  *
2511  * This op should support the new FIGNORECASE flag for case-insensitive
2512  * lookups, per PSARC 2007/244.
2513  */
2514 /* ARGSUSED */
2515 static int
2516 smbfs_create(vnode_t *dvp, char *nm, struct vattr *va, enum vcexcl exclusive,
2517         int mode, vnode_t **vpp, cred_t *cr, int lfaware, caller_context_t *ct,
2518         vsecattr_t *vsecp)
2519 {
2520         int             error;
2521         int             cerror;
2522         vfs_t           *vfsp;
2523         vnode_t         *vp;
2524         smbnode_t       *np;
2525         smbnode_t       *dnp;
2526         smbmntinfo_t    *smi;
2527         struct vattr    vattr;
2528         struct smbfattr fattr;
2529         struct smb_cred scred;
2530         const char *name = (const char *)nm;
2531         int             nmlen = strlen(nm);
2532         uint32_t        disp;
2533         uint16_t        fid;
2534         int             xattr;
2535 
2536         vfsp = dvp->v_vfsp;
2537         smi = VFTOSMI(vfsp);
2538         dnp = VTOSMB(dvp);
2539         vp = NULL;
2540 
2541         if (curproc->p_zone != smi->smi_zone_ref.zref_zone)
2542                 return (EPERM);
2543 
2544         if (smi->smi_flags & SMI_DEAD || vfsp->vfs_flag & VFS_UNMOUNTED)
2545                 return (EIO);
2546 
2547         /*
2548          * Note: this may break mknod(2) calls to create a directory,
2549          * but that's obscure use.  Some other filesystems do this.
2550          * Todo: redirect VDIR type here to _mkdir.
2551          */
2552         if (va->va_type != VREG)
2553                 return (EINVAL);
2554 
2555         /*
2556          * If the pathname is "", just use dvp, no checks.
2557          * Do this outside of the rwlock (like zfs).
2558          */
2559         if (nmlen == 0) {
2560                 VN_HOLD(dvp);
2561                 *vpp = dvp;
2562                 return (0);
2563         }
2564 
2565         /* Don't allow "." or ".." through here. */
2566         if ((nmlen == 1 && name[0] == '.') ||
2567             (nmlen == 2 && name[0] == '.' && name[1] == '.'))
2568                 return (EISDIR);
2569 
2570         /*
2571          * We make a copy of the attributes because the caller does not
2572          * expect us to change what va points to.
2573          */
2574         vattr = *va;
2575 
2576         if (smbfs_rw_enter_sig(&dnp->r_rwlock, RW_WRITER, SMBINTR(dvp)))
2577                 return (EINTR);
2578         smb_credinit(&scred, cr);
2579 
2580         /*
2581          * NFS needs to go over the wire, just to be sure whether the
2582          * file exists or not.  Using a cached result is dangerous in
2583          * this case when making a decision regarding existence.
2584          *
2585          * The SMB protocol does NOT really need to go OTW here
2586          * thanks to the expressive NTCREATE disposition values.
2587          * Unfortunately, to do Unix access checks correctly,
2588          * we need to know if the object already exists.
2589          * When the object does not exist, we need VWRITE on
2590          * the directory.  Note: smbfslookup() checks VEXEC.
2591          */
2592         error = smbfslookup(dvp, nm, &vp, cr, 0, ct);
2593         if (error == 0) {
2594                 /*
2595                  * The file already exists.  Error?
2596                  * NB: have a hold from smbfslookup
2597                  */
2598                 if (exclusive == EXCL) {
2599                         error = EEXIST;
2600                         VN_RELE(vp);
2601                         goto out;
2602                 }
2603                 /*
2604                  * Verify requested access.
2605                  */
2606                 error = smbfs_access(vp, mode, 0, cr, ct);
2607                 if (error) {
2608                         VN_RELE(vp);
2609                         goto out;
2610                 }
2611 
2612                 /*
2613                  * Truncate (if requested).
2614                  */
2615                 if ((vattr.va_mask & AT_SIZE) && vp->v_type == VREG) {
2616                         np = VTOSMB(vp);
2617                         /*
2618                          * Check here for large file truncation by
2619                          * LF-unaware process, like ufs_create().
2620                          */
2621                         if (!(lfaware & FOFFMAX)) {
2622                                 mutex_enter(&np->r_statelock);
2623                                 if (np->r_size > MAXOFF32_T)
2624                                         error = EOVERFLOW;
2625                                 mutex_exit(&np->r_statelock);
2626                         }
2627                         if (error) {
2628                                 VN_RELE(vp);
2629                                 goto out;
2630                         }
2631                         vattr.va_mask = AT_SIZE;
2632                         error = smbfssetattr(vp, &vattr, 0, cr);
2633                         if (error) {
2634                                 VN_RELE(vp);
2635                                 goto out;
2636                         }
2637 #ifdef  SMBFS_VNEVENT
2638                         /* Existing file was truncated */
2639                         vnevent_create(vp, ct);
2640 #endif
2641                         /* invalidate pages done in smbfssetattr() */
2642                 }
2643                 /* Success! */
2644                 *vpp = vp;
2645                 goto out;
2646         }
2647 
2648         /*
2649          * The file did not exist.  Need VWRITE in the directory.
2650          */
2651         error = smbfs_access(dvp, VWRITE, 0, cr, ct);
2652         if (error)
2653                 goto out;
2654 
2655         /*
2656          * Now things get tricky.  We also need to check the
2657          * requested open mode against the file we may create.
2658          * See comments at smbfs_access_rwx
2659          */
2660         error = smbfs_access_rwx(vfsp, VREG, mode, cr);
2661         if (error)
2662                 goto out;
2663 
2664         /*
2665          * Now the code derived from Darwin,
2666          * but with greater use of NT_CREATE
2667          * disposition options.  Much changed.
2668          *
2669          * Create (or open) a new child node.
2670          * Note we handled "." and ".." above.
2671          */
2672 
2673         if (exclusive == EXCL)
2674                 disp = NTCREATEX_DISP_CREATE;
2675         else {
2676                 /* Truncate regular files if requested. */
2677                 if ((va->va_type == VREG) &&
2678                     (va->va_mask & AT_SIZE) &&
2679                     (va->va_size == 0))
2680                         disp = NTCREATEX_DISP_OVERWRITE_IF;
2681                 else
2682                         disp = NTCREATEX_DISP_OPEN_IF;
2683         }
2684         xattr = (dnp->n_flag & N_XATTR) ? 1 : 0;
2685         error = smbfs_smb_create(dnp,
2686             name, nmlen, xattr,
2687             disp, &scred, &fid);
2688         if (error)
2689                 goto out;
2690 
2691         /*
2692          * Should use the fid to get/set the size
2693          * while we have it opened here.  See above.
2694          */
2695 
2696         cerror = smbfs_smb_close(smi->smi_share, fid, NULL, &scred);
2697         if (cerror)
2698                 SMBVDEBUG("error %d closing %s\\%s\n",
2699                     cerror, dnp->n_rpath, name);
2700 
2701         /*
2702          * In the open case, the name may differ a little
2703          * from what we passed to create (case, etc.)
2704          * so call lookup to get the (opened) name.
2705          *
2706          * XXX: Could avoid this extra lookup if the
2707          * "createact" result from NT_CREATE says we
2708          * created the object.
2709          */
2710         error = smbfs_smb_lookup(dnp, &name, &nmlen, &fattr, &scred);
2711         if (error)
2712                 goto out;
2713 
2714         /* update attr and directory cache */
2715         smbfs_attr_touchdir(dnp);
2716 
2717         error = smbfs_nget(dvp, name, nmlen, &fattr, &vp);
2718         if (error)
2719                 goto out;
2720 
2721         /* Success! */
2722         *vpp = vp;
2723         error = 0;
2724 
2725 out:
2726         smb_credrele(&scred);
2727         smbfs_rw_exit(&dnp->r_rwlock);
2728         if (name != nm)
2729                 smbfs_name_free(name, nmlen);
2730         return (error);
2731 }
2732 
2733 /*
2734  * XXX
2735  * This op should support the new FIGNORECASE flag for case-insensitive
2736  * lookups, per PSARC 2007/244.
2737  */
2738 /* ARGSUSED */
2739 static int
2740 smbfs_remove(vnode_t *dvp, char *nm, cred_t *cr, caller_context_t *ct,
2741         int flags)
2742 {
2743         struct smb_cred scred;
2744         vnode_t         *vp = NULL;
2745         smbnode_t       *dnp = VTOSMB(dvp);
2746         smbmntinfo_t    *smi = VTOSMI(dvp);
2747         int             error;
2748 
2749         if (curproc->p_zone != smi->smi_zone_ref.zref_zone)
2750                 return (EPERM);
2751 
2752         if (smi->smi_flags & SMI_DEAD || dvp->v_vfsp->vfs_flag & VFS_UNMOUNTED)
2753                 return (EIO);
2754 
2755         /*
2756          * Verify access to the dirctory.
2757          */
2758         error = smbfs_access(dvp, VWRITE|VEXEC, 0, cr, ct);
2759         if (error)
2760                 return (error);
2761 
2762         if (smbfs_rw_enter_sig(&dnp->r_rwlock, RW_WRITER, SMBINTR(dvp)))
2763                 return (EINTR);
2764         smb_credinit(&scred, cr);
2765 
2766         /* Lookup the file to remove. */
2767         error = smbfslookup(dvp, nm, &vp, cr, 0, ct);
2768         if (error == 0) {
2769                 /*
2770                  * Do the real remove work
2771                  */
2772                 error = smbfsremove(dvp, vp, &scred, flags);
2773                 VN_RELE(vp);
2774         }
2775 
2776         smb_credrele(&scred);
2777         smbfs_rw_exit(&dnp->r_rwlock);
2778 
2779         return (error);
2780 }
2781 
2782 /*
2783  * smbfsremove does the real work of removing in SMBFS
2784  * Caller has done dir access checks etc.
2785  *
2786  * The normal way to delete a file over SMB is open it (with DELETE access),
2787  * set the "delete-on-close" flag, and close the file.  The problem for Unix
2788  * applications is that they expect the file name to be gone once the unlink
2789  * completes, and the SMB server does not actually delete the file until ALL
2790  * opens of that file are closed.  We can't assume our open handles are the
2791  * only open handles on a file we're deleting, so to be safe we'll try to
2792  * rename the file to a temporary name and then set delete-on-close.  If we
2793  * fail to set delete-on-close (i.e. because other opens prevent it) then
2794  * undo the changes we made and give up with EBUSY.  Note that we might have
2795  * permission to delete a file but lack permission to rename, so we want to
2796  * continue in cases where rename fails.  As an optimization, only do the
2797  * rename when we have the file open.
2798  *
2799  * This is similar to what NFS does when deleting a file that has local opens,
2800  * but thanks to SMB delete-on-close, we don't need to keep track of when the
2801  * last local open goes away and send a delete.  The server does that for us.
2802  */
2803 /* ARGSUSED */
2804 static int
2805 smbfsremove(vnode_t *dvp, vnode_t *vp, struct smb_cred *scred,
2806     int flags)
2807 {
2808         smbnode_t       *dnp = VTOSMB(dvp);
2809         smbnode_t       *np = VTOSMB(vp);
2810         char            *tmpname = NULL;
2811         int             tnlen;
2812         int             error;
2813         unsigned short  fid;
2814         boolean_t       have_fid = B_FALSE;
2815         boolean_t       renamed = B_FALSE;
2816 
2817         /*
2818          * The dvp RWlock must be held as writer.
2819          */
2820         ASSERT(dnp->r_rwlock.owner == curthread);
2821 
2822         /* Never allow link/unlink directories on SMB. */
2823         if (vp->v_type == VDIR)
2824                 return (EPERM);
2825 
2826         /*
2827          * We need to flush any dirty pages which happen to
2828          * be hanging around before removing the file.  This
2829          * shouldn't happen very often and mostly on file
2830          * systems mounted "nocto".
2831          */
2832         if (vn_has_cached_data(vp) &&
2833             ((np->r_flags & RDIRTY) || np->r_count > 0)) {
2834                 error = smbfs_putpage(vp, (offset_t)0, 0, 0,
2835                     scred->scr_cred, NULL);
2836                 if (error && (error == ENOSPC || error == EDQUOT)) {
2837                         mutex_enter(&np->r_statelock);
2838                         if (!np->r_error)
2839                                 np->r_error = error;
2840                         mutex_exit(&np->r_statelock);
2841                 }
2842         }
2843 
2844         /* Shared lock for n_fid use in smbfs_smb_setdisp etc. */
2845         if (smbfs_rw_enter_sig(&np->r_lkserlock, RW_READER, SMBINTR(vp)))
2846                 return (EINTR);
2847 
2848         /*
2849          * Get a file handle with delete access.
2850          * Close this FID before return.
2851          */
2852         error = smbfs_smb_tmpopen(np, STD_RIGHT_DELETE_ACCESS,
2853             scred, &fid);
2854         if (error) {
2855                 SMBVDEBUG("error %d opening %s\n",
2856                     error, np->n_rpath);
2857                 goto out;
2858         }
2859         have_fid = B_TRUE;
2860 
2861         /*
2862          * If we have the file open, try to rename it to a temporary name.
2863          * If we can't rename, continue on and try setting DoC anyway.
2864          */
2865         if ((vp->v_count > 1) && (np->n_fidrefs > 0)) {
2866                 tmpname = kmem_alloc(MAXNAMELEN, KM_SLEEP);
2867                 tnlen = smbfs_newname(tmpname, MAXNAMELEN);
2868                 error = smbfs_smb_t2rename(np, tmpname, tnlen, scred, fid, 0);
2869                 if (error != 0) {
2870                         SMBVDEBUG("error %d renaming %s -> %s\n",
2871                             error, np->n_rpath, tmpname);
2872                         /* Keep going without the rename. */
2873                 } else {
2874                         renamed = B_TRUE;
2875                 }
2876         }
2877 
2878         /*
2879          * Mark the file as delete-on-close.  If we can't,
2880          * undo what we did and err out.
2881          */
2882         error = smbfs_smb_setdisp(np, fid, 1, scred);
2883         if (error != 0) {
2884                 SMBVDEBUG("error %d setting DoC on %s\n",
2885                     error, np->n_rpath);
2886                 /*
2887                  * Failed to set DoC. If we renamed, undo that.
2888                  * Need np->n_rpath relative to parent (dnp).
2889                  * Use parent path name length plus one for
2890                  * the separator ('/' or ':')
2891                  */
2892                 if (renamed) {
2893                         char *oldname;
2894                         int oldnlen;
2895                         int err2;
2896 
2897                         oldname = np->n_rpath + (dnp->n_rplen + 1);
2898                         oldnlen = np->n_rplen - (dnp->n_rplen + 1);
2899                         err2 = smbfs_smb_t2rename(np, oldname, oldnlen,
2900                             scred, fid, 0);
2901                         SMBVDEBUG("error %d un-renaming %s -> %s\n",
2902                             err2, tmpname, np->n_rpath);
2903                 }
2904                 error = EBUSY;
2905                 goto out;
2906         }
2907         /* Done! */
2908         smbfs_attrcache_prune(np);
2909 
2910 #ifdef  SMBFS_VNEVENT
2911         vnevent_remove(vp, dvp, nm, ct);
2912 #endif
2913 
2914 out:
2915         if (tmpname != NULL)
2916                 kmem_free(tmpname, MAXNAMELEN);
2917 
2918         if (have_fid)
2919                 (void) smbfs_smb_tmpclose(np, fid, scred);
2920         smbfs_rw_exit(&np->r_lkserlock);
2921 
2922         if (error == 0) {
2923                 /* Keep lookup from finding this node anymore. */
2924                 smbfs_rmhash(np);
2925         }
2926 
2927         return (error);
2928 }
2929 
2930 
2931 /* ARGSUSED */
2932 static int
2933 smbfs_link(vnode_t *tdvp, vnode_t *svp, char *tnm, cred_t *cr,
2934         caller_context_t *ct, int flags)
2935 {
2936         /* Not yet... */
2937         return (ENOSYS);
2938 }
2939 
2940 
2941 /*
2942  * XXX
2943  * This op should support the new FIGNORECASE flag for case-insensitive
2944  * lookups, per PSARC 2007/244.
2945  */
2946 /* ARGSUSED */
2947 static int
2948 smbfs_rename(vnode_t *odvp, char *onm, vnode_t *ndvp, char *nnm, cred_t *cr,
2949         caller_context_t *ct, int flags)
2950 {
2951         struct smb_cred scred;
2952         smbnode_t       *odnp = VTOSMB(odvp);
2953         smbnode_t       *ndnp = VTOSMB(ndvp);
2954         vnode_t         *ovp;
2955         int error;
2956 
2957         if (curproc->p_zone != VTOSMI(odvp)->smi_zone_ref.zref_zone ||
2958             curproc->p_zone != VTOSMI(ndvp)->smi_zone_ref.zref_zone)
2959                 return (EPERM);
2960 
2961         if (VTOSMI(odvp)->smi_flags & SMI_DEAD ||
2962             VTOSMI(ndvp)->smi_flags & SMI_DEAD ||
2963             odvp->v_vfsp->vfs_flag & VFS_UNMOUNTED ||
2964             ndvp->v_vfsp->vfs_flag & VFS_UNMOUNTED)
2965                 return (EIO);
2966 
2967         if (strcmp(onm, ".") == 0 || strcmp(onm, "..") == 0 ||
2968             strcmp(nnm, ".") == 0 || strcmp(nnm, "..") == 0)
2969                 return (EINVAL);
2970 
2971         /*
2972          * Check that everything is on the same filesystem.
2973          * vn_rename checks the fsid's, but in case we don't
2974          * fill those in correctly, check here too.
2975          */
2976         if (odvp->v_vfsp != ndvp->v_vfsp)
2977                 return (EXDEV);
2978 
2979         /*
2980          * Need write access on source and target.
2981          * Server takes care of most checks.
2982          */
2983         error = smbfs_access(odvp, VWRITE|VEXEC, 0, cr, ct);
2984         if (error)
2985                 return (error);
2986         if (odvp != ndvp) {
2987                 error = smbfs_access(ndvp, VWRITE, 0, cr, ct);
2988                 if (error)
2989                         return (error);
2990         }
2991 
2992         /*
2993          * Need to lock both old/new dirs as writer.
2994          *
2995          * Avoid deadlock here on old vs new directory nodes
2996          * by always taking the locks in order of address.
2997          * The order is arbitrary, but must be consistent.
2998          */
2999         if (odnp < ndnp) {
3000                 if (smbfs_rw_enter_sig(&odnp->r_rwlock, RW_WRITER,
3001                     SMBINTR(odvp)))
3002                         return (EINTR);
3003                 if (smbfs_rw_enter_sig(&ndnp->r_rwlock, RW_WRITER,
3004                     SMBINTR(ndvp))) {
3005                         smbfs_rw_exit(&odnp->r_rwlock);
3006                         return (EINTR);
3007                 }
3008         } else {
3009                 if (smbfs_rw_enter_sig(&ndnp->r_rwlock, RW_WRITER,
3010                     SMBINTR(ndvp)))
3011                         return (EINTR);
3012                 if (smbfs_rw_enter_sig(&odnp->r_rwlock, RW_WRITER,
3013                     SMBINTR(odvp))) {
3014                         smbfs_rw_exit(&ndnp->r_rwlock);
3015                         return (EINTR);
3016                 }
3017         }
3018         smb_credinit(&scred, cr);
3019 
3020         /* Lookup the "old" name */
3021         error = smbfslookup(odvp, onm, &ovp, cr, 0, ct);
3022         if (error == 0) {
3023                 /*
3024                  * Do the real rename work
3025                  */
3026                 error = smbfsrename(odvp, ovp, ndvp, nnm, &scred, flags);
3027                 VN_RELE(ovp);
3028         }
3029 
3030         smb_credrele(&scred);
3031         smbfs_rw_exit(&odnp->r_rwlock);
3032         smbfs_rw_exit(&ndnp->r_rwlock);
3033 
3034         return (error);
3035 }
3036 
3037 /*
3038  * smbfsrename does the real work of renaming in SMBFS
3039  * Caller has done dir access checks etc.
3040  */
3041 /* ARGSUSED */
3042 static int
3043 smbfsrename(vnode_t *odvp, vnode_t *ovp, vnode_t *ndvp, char *nnm,
3044     struct smb_cred *scred, int flags)
3045 {
3046         smbnode_t       *odnp = VTOSMB(odvp);
3047         smbnode_t       *onp = VTOSMB(ovp);
3048         smbnode_t       *ndnp = VTOSMB(ndvp);
3049         vnode_t         *nvp = NULL;
3050         int             error;
3051         int             nvp_locked = 0;
3052 
3053         /* Things our caller should have checked. */
3054         ASSERT(curproc->p_zone == VTOSMI(odvp)->smi_zone_ref.zref_zone);
3055         ASSERT(odvp->v_vfsp == ndvp->v_vfsp);
3056         ASSERT(odnp->r_rwlock.owner == curthread);
3057         ASSERT(ndnp->r_rwlock.owner == curthread);
3058 
3059         /*
3060          * Lookup the target file.  If it exists, it needs to be
3061          * checked to see whether it is a mount point and whether
3062          * it is active (open).
3063          */
3064         error = smbfslookup(ndvp, nnm, &nvp, scred->scr_cred, 0, NULL);
3065         if (!error) {
3066                 /*
3067                  * Target (nvp) already exists.  Check that it
3068                  * has the same type as the source.  The server
3069                  * will check this also, (and more reliably) but
3070                  * this lets us return the correct error codes.
3071                  */
3072                 if (ovp->v_type == VDIR) {
3073                         if (nvp->v_type != VDIR) {
3074                                 error = ENOTDIR;
3075                                 goto out;
3076                         }
3077                 } else {
3078                         if (nvp->v_type == VDIR) {
3079                                 error = EISDIR;
3080                                 goto out;
3081                         }
3082                 }
3083 
3084                 /*
3085                  * POSIX dictates that when the source and target
3086                  * entries refer to the same file object, rename
3087                  * must do nothing and exit without error.
3088                  */
3089                 if (ovp == nvp) {
3090                         error = 0;
3091                         goto out;
3092                 }
3093 
3094                 /*
3095                  * Also must ensure the target is not a mount point,
3096                  * and keep mount/umount away until we're done.
3097                  */
3098                 if (vn_vfsrlock(nvp)) {
3099                         error = EBUSY;
3100                         goto out;
3101                 }
3102                 nvp_locked = 1;
3103                 if (vn_mountedvfs(nvp) != NULL) {
3104                         error = EBUSY;
3105                         goto out;
3106                 }
3107 
3108                 /*
3109                  * CIFS may give a SHARING_VIOLATION error when
3110                  * trying to rename onto an exising object,
3111                  * so try to remove the target first.
3112                  * (Only for files, not directories.)
3113                  */
3114                 if (nvp->v_type == VDIR) {
3115                         error = EEXIST;
3116                         goto out;
3117                 }
3118                 error = smbfsremove(ndvp, nvp, scred, flags);
3119                 if (error != 0)
3120                         goto out;
3121 
3122                 /*
3123                  * OK, removed the target file.  Continue as if
3124                  * lookup target had failed (nvp == NULL).
3125                  */
3126                 vn_vfsunlock(nvp);
3127                 nvp_locked = 0;
3128                 VN_RELE(nvp);
3129                 nvp = NULL;
3130         } /* nvp */
3131 
3132         smbfs_attrcache_remove(onp);
3133         error = smbfs_smb_rename(onp, ndnp, nnm, strlen(nnm), scred);
3134 
3135         /*
3136          * If the old name should no longer exist,
3137          * discard any cached attributes under it.
3138          */
3139         if (error == 0) {
3140                 smbfs_attrcache_prune(onp);
3141                 /* SMBFS_VNEVENT... */
3142         }
3143 
3144 out:
3145         if (nvp) {
3146                 if (nvp_locked)
3147                         vn_vfsunlock(nvp);
3148                 VN_RELE(nvp);
3149         }
3150 
3151         return (error);
3152 }
3153 
3154 /*
3155  * XXX
3156  * vsecattr_t is new to build 77, and we need to eventually support
3157  * it in order to create an ACL when an object is created.
3158  *
3159  * This op should support the new FIGNORECASE flag for case-insensitive
3160  * lookups, per PSARC 2007/244.
3161  */
3162 /* ARGSUSED */
3163 static int
3164 smbfs_mkdir(vnode_t *dvp, char *nm, struct vattr *va, vnode_t **vpp,
3165         cred_t *cr, caller_context_t *ct, int flags, vsecattr_t *vsecp)
3166 {
3167         vnode_t         *vp;
3168         struct smbnode  *dnp = VTOSMB(dvp);
3169         struct smbmntinfo *smi = VTOSMI(dvp);
3170         struct smb_cred scred;
3171         struct smbfattr fattr;
3172         const char              *name = (const char *) nm;
3173         int             nmlen = strlen(name);
3174         int             error, hiderr;
3175 
3176         if (curproc->p_zone != smi->smi_zone_ref.zref_zone)
3177                 return (EPERM);
3178 
3179         if (smi->smi_flags & SMI_DEAD || dvp->v_vfsp->vfs_flag & VFS_UNMOUNTED)
3180                 return (EIO);
3181 
3182         if ((nmlen == 1 && name[0] == '.') ||
3183             (nmlen == 2 && name[0] == '.' && name[1] == '.'))
3184                 return (EEXIST);
3185 
3186         /* Only plain files are allowed in V_XATTRDIR. */
3187         if (dvp->v_flag & V_XATTRDIR)
3188                 return (EINVAL);
3189 
3190         if (smbfs_rw_enter_sig(&dnp->r_rwlock, RW_WRITER, SMBINTR(dvp)))
3191                 return (EINTR);
3192         smb_credinit(&scred, cr);
3193 
3194         /*
3195          * Require write access in the containing directory.
3196          */
3197         error = smbfs_access(dvp, VWRITE, 0, cr, ct);
3198         if (error)
3199                 goto out;
3200 
3201         error = smbfs_smb_mkdir(dnp, name, nmlen, &scred);
3202         if (error)
3203                 goto out;
3204 
3205         error = smbfs_smb_lookup(dnp, &name, &nmlen, &fattr, &scred);
3206         if (error)
3207                 goto out;
3208 
3209         smbfs_attr_touchdir(dnp);
3210 
3211         error = smbfs_nget(dvp, name, nmlen, &fattr, &vp);
3212         if (error)
3213                 goto out;
3214 
3215         if (name[0] == '.')
3216                 if ((hiderr = smbfs_smb_hideit(VTOSMB(vp), NULL, 0, &scred)))
3217                         SMBVDEBUG("hide failure %d\n", hiderr);
3218 
3219         /* Success! */
3220         *vpp = vp;
3221         error = 0;
3222 out:
3223         smb_credrele(&scred);
3224         smbfs_rw_exit(&dnp->r_rwlock);
3225 
3226         if (name != nm)
3227                 smbfs_name_free(name, nmlen);
3228 
3229         return (error);
3230 }
3231 
3232 /*
3233  * XXX
3234  * This op should support the new FIGNORECASE flag for case-insensitive
3235  * lookups, per PSARC 2007/244.
3236  */
3237 /* ARGSUSED */
3238 static int
3239 smbfs_rmdir(vnode_t *dvp, char *nm, vnode_t *cdir, cred_t *cr,
3240         caller_context_t *ct, int flags)
3241 {
3242         vnode_t         *vp = NULL;
3243         int             vp_locked = 0;
3244         struct smbmntinfo *smi = VTOSMI(dvp);
3245         struct smbnode  *dnp = VTOSMB(dvp);
3246         struct smbnode  *np;
3247         struct smb_cred scred;
3248         int             error;
3249 
3250         if (curproc->p_zone != smi->smi_zone_ref.zref_zone)
3251                 return (EPERM);
3252 
3253         if (smi->smi_flags & SMI_DEAD || dvp->v_vfsp->vfs_flag & VFS_UNMOUNTED)
3254                 return (EIO);
3255 
3256         if (smbfs_rw_enter_sig(&dnp->r_rwlock, RW_WRITER, SMBINTR(dvp)))
3257                 return (EINTR);
3258         smb_credinit(&scred, cr);
3259 
3260         /*
3261          * Require w/x access in the containing directory.
3262          * Server handles all other access checks.
3263          */
3264         error = smbfs_access(dvp, VEXEC|VWRITE, 0, cr, ct);
3265         if (error)
3266                 goto out;
3267 
3268         /*
3269          * First lookup the entry to be removed.
3270          */
3271         error = smbfslookup(dvp, nm, &vp, cr, 0, ct);
3272         if (error)
3273                 goto out;
3274         np = VTOSMB(vp);
3275 
3276         /*
3277          * Disallow rmdir of "." or current dir, or the FS root.
3278          * Also make sure it's a directory, not a mount point,
3279          * and lock to keep mount/umount away until we're done.
3280          */
3281         if ((vp == dvp) || (vp == cdir) || (vp->v_flag & VROOT)) {
3282                 error = EINVAL;
3283                 goto out;
3284         }
3285         if (vp->v_type != VDIR) {
3286                 error = ENOTDIR;
3287                 goto out;
3288         }
3289         if (vn_vfsrlock(vp)) {
3290                 error = EBUSY;
3291                 goto out;
3292         }
3293         vp_locked = 1;
3294         if (vn_mountedvfs(vp) != NULL) {
3295                 error = EBUSY;
3296                 goto out;
3297         }
3298 
3299         smbfs_attrcache_remove(np);
3300         error = smbfs_smb_rmdir(np, &scred);
3301 
3302         /*
3303          * Similar to smbfs_remove
3304          */
3305         switch (error) {
3306         case 0:
3307         case ENOENT:
3308         case ENOTDIR:
3309                 smbfs_attrcache_prune(np);
3310                 break;
3311         }
3312 
3313         if (error)
3314                 goto out;
3315 
3316         mutex_enter(&np->r_statelock);
3317         dnp->n_flag |= NMODIFIED;
3318         mutex_exit(&np->r_statelock);
3319         smbfs_attr_touchdir(dnp);
3320         smbfs_rmhash(np);
3321 
3322 out:
3323         if (vp) {
3324                 if (vp_locked)
3325                         vn_vfsunlock(vp);
3326                 VN_RELE(vp);
3327         }
3328         smb_credrele(&scred);
3329         smbfs_rw_exit(&dnp->r_rwlock);
3330 
3331         return (error);
3332 }
3333 
3334 
3335 /* ARGSUSED */
3336 static int
3337 smbfs_symlink(vnode_t *dvp, char *lnm, struct vattr *tva, char *tnm, cred_t *cr,
3338         caller_context_t *ct, int flags)
3339 {
3340         /* Not yet... */
3341         return (ENOSYS);
3342 }
3343 
3344 
3345 /* ARGSUSED */
3346 static int
3347 smbfs_readdir(vnode_t *vp, struct uio *uiop, cred_t *cr, int *eofp,
3348         caller_context_t *ct, int flags)
3349 {
3350         struct smbnode  *np = VTOSMB(vp);
3351         int             error = 0;
3352         smbmntinfo_t    *smi;
3353 
3354         smi = VTOSMI(vp);
3355 
3356         if (curproc->p_zone != smi->smi_zone_ref.zref_zone)
3357                 return (EIO);
3358 
3359         if (smi->smi_flags & SMI_DEAD || vp->v_vfsp->vfs_flag & VFS_UNMOUNTED)
3360                 return (EIO);
3361 
3362         /*
3363          * Require read access in the directory.
3364          */
3365         error = smbfs_access(vp, VREAD, 0, cr, ct);
3366         if (error)
3367                 return (error);
3368 
3369         ASSERT(smbfs_rw_lock_held(&np->r_rwlock, RW_READER));
3370 
3371         /*
3372          * Todo readdir cache here
3373          *
3374          * I am serializing the entire readdir opreation
3375          * now since we have not yet implemented readdir
3376          * cache. This fix needs to be revisited once
3377          * we implement readdir cache.
3378          */
3379         if (smbfs_rw_enter_sig(&np->r_lkserlock, RW_WRITER, SMBINTR(vp)))
3380                 return (EINTR);
3381 
3382         error = smbfs_readvdir(vp, uiop, cr, eofp, ct);
3383 
3384         smbfs_rw_exit(&np->r_lkserlock);
3385 
3386         return (error);
3387 }
3388 
3389 /* ARGSUSED */
3390 static int
3391 smbfs_readvdir(vnode_t *vp, uio_t *uio, cred_t *cr, int *eofp,
3392         caller_context_t *ct)
3393 {
3394         /*
3395          * Note: "limit" tells the SMB-level FindFirst/FindNext
3396          * functions how many directory entries to request in
3397          * each OtW call.  It needs to be large enough so that
3398          * we don't make lots of tiny OtW requests, but there's
3399          * no point making it larger than the maximum number of
3400          * OtW entries that would fit in a maximum sized trans2
3401          * response (64k / 48).  Beyond that, it's just tuning.
3402          * WinNT used 512, Win2k used 1366.  We use 1000.
3403          */
3404         static const int limit = 1000;
3405         /* Largest possible dirent size. */
3406         static const size_t dbufsiz = DIRENT64_RECLEN(SMB_MAXFNAMELEN);
3407         struct smb_cred scred;
3408         vnode_t         *newvp;
3409         struct smbnode  *np = VTOSMB(vp);
3410         struct smbfs_fctx *ctx;
3411         struct dirent64 *dp;
3412         ssize_t         save_resid;
3413         offset_t        save_offset; /* 64 bits */
3414         int             offset; /* yes, 32 bits */
3415         int             nmlen, error;
3416         ushort_t        reclen;
3417 
3418         ASSERT(curproc->p_zone == VTOSMI(vp)->smi_zone_ref.zref_zone);
3419 
3420         /* Make sure we serialize for n_dirseq use. */
3421         ASSERT(smbfs_rw_lock_held(&np->r_lkserlock, RW_WRITER));
3422 
3423         /*
3424          * Make sure smbfs_open filled in n_dirseq
3425          */
3426         if (np->n_dirseq == NULL)
3427                 return (EBADF);
3428 
3429         /* Check for overflow of (32-bit) directory offset. */
3430         if (uio->uio_loffset < 0 || uio->uio_loffset > INT32_MAX ||
3431             (uio->uio_loffset + uio->uio_resid) > INT32_MAX)
3432                 return (EINVAL);
3433 
3434         /* Require space for at least one dirent. */
3435         if (uio->uio_resid < dbufsiz)
3436                 return (EINVAL);
3437 
3438         SMBVDEBUG("dirname='%s'\n", np->n_rpath);
3439         smb_credinit(&scred, cr);
3440         dp = kmem_alloc(dbufsiz, KM_SLEEP);
3441 
3442         save_resid = uio->uio_resid;
3443         save_offset = uio->uio_loffset;
3444         offset = uio->uio_offset;
3445         SMBVDEBUG("in: offset=%d, resid=%d\n",
3446             (int)uio->uio_offset, (int)uio->uio_resid);
3447         error = 0;
3448 
3449         /*
3450          * Generate the "." and ".." entries here so we can
3451          * (1) make sure they appear (but only once), and
3452          * (2) deal with getting their I numbers which the
3453          * findnext below does only for normal names.
3454          */
3455         while (offset < FIRST_DIROFS) {
3456                 /*
3457                  * Tricky bit filling in the first two:
3458                  * offset 0 is ".", offset 1 is ".."
3459                  * so strlen of these is offset+1.
3460                  */
3461                 reclen = DIRENT64_RECLEN(offset + 1);
3462                 if (uio->uio_resid < reclen)
3463                         goto out;
3464                 bzero(dp, reclen);
3465                 dp->d_reclen = reclen;
3466                 dp->d_name[0] = '.';
3467                 dp->d_name[1] = '.';
3468                 dp->d_name[offset + 1] = '\0';
3469                 /*
3470                  * Want the real I-numbers for the "." and ".."
3471                  * entries.  For these two names, we know that
3472                  * smbfslookup can get the nodes efficiently.
3473                  */
3474                 error = smbfslookup(vp, dp->d_name, &newvp, cr, 1, ct);
3475                 if (error) {
3476                         dp->d_ino = np->n_ino + offset; /* fiction */
3477                 } else {
3478                         dp->d_ino = VTOSMB(newvp)->n_ino;
3479                         VN_RELE(newvp);
3480                 }
3481                 /*
3482                  * Note: d_off is the offset that a user-level program
3483                  * should seek to for reading the NEXT directory entry.
3484                  * See libc: readdir, telldir, seekdir
3485                  */
3486                 dp->d_off = offset + 1;
3487                 error = uiomove(dp, reclen, UIO_READ, uio);
3488                 if (error)
3489                         goto out;
3490                 /*
3491                  * Note: uiomove updates uio->uio_offset,
3492                  * but we want it to be our "cookie" value,
3493                  * which just counts dirents ignoring size.
3494                  */
3495                 uio->uio_offset = ++offset;
3496         }
3497 
3498         /*
3499          * If there was a backward seek, we have to reopen.
3500          */
3501         if (offset < np->n_dirofs) {
3502                 SMBVDEBUG("Reopening search %d:%d\n",
3503                     offset, np->n_dirofs);
3504                 error = smbfs_smb_findopen(np, "*", 1,
3505                     SMB_FA_SYSTEM | SMB_FA_HIDDEN | SMB_FA_DIR,
3506                     &scred, &ctx);
3507                 if (error) {
3508                         SMBVDEBUG("can not open search, error = %d", error);
3509                         goto out;
3510                 }
3511                 /* free the old one */
3512                 (void) smbfs_smb_findclose(np->n_dirseq, &scred);
3513                 /* save the new one */
3514                 np->n_dirseq = ctx;
3515                 np->n_dirofs = FIRST_DIROFS;
3516         } else {
3517                 ctx = np->n_dirseq;
3518         }
3519 
3520         /*
3521          * Skip entries before the requested offset.
3522          */
3523         while (np->n_dirofs < offset) {
3524                 error = smbfs_smb_findnext(ctx, limit, &scred);
3525                 if (error != 0)
3526                         goto out;
3527                 np->n_dirofs++;
3528         }
3529 
3530         /*
3531          * While there's room in the caller's buffer:
3532          *      get a directory entry from SMB,
3533          *      convert to a dirent, copyout.
3534          * We stop when there is no longer room for a
3535          * maximum sized dirent because we must decide
3536          * before we know anything about the next entry.
3537          */
3538         while (uio->uio_resid >= dbufsiz) {
3539                 error = smbfs_smb_findnext(ctx, limit, &scred);
3540                 if (error != 0)
3541                         goto out;
3542                 np->n_dirofs++;
3543 
3544                 /* Sanity check the name length. */
3545                 nmlen = ctx->f_nmlen;
3546                 if (nmlen > SMB_MAXFNAMELEN) {
3547                         nmlen = SMB_MAXFNAMELEN;
3548                         SMBVDEBUG("Truncating name: %s\n", ctx->f_name);
3549                 }
3550                 if (smbfs_fastlookup) {
3551                         /* See comment at smbfs_fastlookup above. */
3552                         if (smbfs_nget(vp, ctx->f_name, nmlen,
3553                             &ctx->f_attr, &newvp) == 0)
3554                                 VN_RELE(newvp);
3555                 }
3556 
3557                 reclen = DIRENT64_RECLEN(nmlen);
3558                 bzero(dp, reclen);
3559                 dp->d_reclen = reclen;
3560                 bcopy(ctx->f_name, dp->d_name, nmlen);
3561                 dp->d_name[nmlen] = '\0';
3562                 dp->d_ino = ctx->f_inum;
3563                 dp->d_off = offset + 1;      /* See d_off comment above */
3564                 error = uiomove(dp, reclen, UIO_READ, uio);
3565                 if (error)
3566                         goto out;
3567                 /* See comment re. uio_offset above. */
3568                 uio->uio_offset = ++offset;
3569         }
3570 
3571 out:
3572         /*
3573          * When we come to the end of a directory, the
3574          * SMB-level functions return ENOENT, but the
3575          * caller is not expecting an error return.
3576          *
3577          * Also note that we must delay the call to
3578          * smbfs_smb_findclose(np->n_dirseq, ...)
3579          * until smbfs_close so that all reads at the
3580          * end of the directory will return no data.
3581          */
3582         if (error == ENOENT) {
3583                 error = 0;
3584                 if (eofp)
3585                         *eofp = 1;
3586         }
3587         /*
3588          * If we encountered an error (i.e. "access denied")
3589          * from the FindFirst call, we will have copied out
3590          * the "." and ".." entries leaving offset == 2.
3591          * In that case, restore the original offset/resid
3592          * so the caller gets no data with the error.
3593          */
3594         if (error != 0 && offset == FIRST_DIROFS) {
3595                 uio->uio_loffset = save_offset;
3596                 uio->uio_resid = save_resid;
3597         }
3598         SMBVDEBUG("out: offset=%d, resid=%d\n",
3599             (int)uio->uio_offset, (int)uio->uio_resid);
3600 
3601         kmem_free(dp, dbufsiz);
3602         smb_credrele(&scred);
3603         return (error);
3604 }
3605 
3606 /*
3607  * Here NFS has: nfs3_bio
3608  * See smbfs_bio above.
3609  */
3610 
3611 /* ARGSUSED */
3612 static int
3613 smbfs_fid(vnode_t *vp, fid_t *fidp, caller_context_t *ct)
3614 {
3615         return (ENOSYS);
3616 }
3617 
3618 
3619 /*
3620  * The pair of functions VOP_RWLOCK, VOP_RWUNLOCK
3621  * are optional functions that are called by:
3622  *    getdents, before/after VOP_READDIR
3623  *    pread, before/after ... VOP_READ
3624  *    pwrite, before/after ... VOP_WRITE
3625  *    (other places)
3626  *
3627  * Careful here: None of the above check for any
3628  * error returns from VOP_RWLOCK / VOP_RWUNLOCK!
3629  * In fact, the return value from _rwlock is NOT
3630  * an error code, but V_WRITELOCK_TRUE / _FALSE.
3631  *
3632  * Therefore, it's up to _this_ code to make sure
3633  * the lock state remains balanced, which means
3634  * we can't "bail out" on interrupts, etc.
3635  */
3636 
3637 /* ARGSUSED2 */
3638 static int
3639 smbfs_rwlock(vnode_t *vp, int write_lock, caller_context_t *ctp)
3640 {
3641         smbnode_t       *np = VTOSMB(vp);
3642 
3643         if (!write_lock) {
3644                 (void) smbfs_rw_enter_sig(&np->r_rwlock, RW_READER, FALSE);
3645                 return (V_WRITELOCK_FALSE);
3646         }
3647 
3648 
3649         (void) smbfs_rw_enter_sig(&np->r_rwlock, RW_WRITER, FALSE);
3650         return (V_WRITELOCK_TRUE);
3651 }
3652 
3653 /* ARGSUSED */
3654 static void
3655 smbfs_rwunlock(vnode_t *vp, int write_lock, caller_context_t *ctp)
3656 {
3657         smbnode_t       *np = VTOSMB(vp);
3658 
3659         smbfs_rw_exit(&np->r_rwlock);
3660 }
3661 
3662 
3663 /* ARGSUSED */
3664 static int
3665 smbfs_seek(vnode_t *vp, offset_t ooff, offset_t *noffp, caller_context_t *ct)
3666 {
3667         smbmntinfo_t    *smi;
3668 
3669         smi = VTOSMI(vp);
3670 
3671         if (curproc->p_zone != smi->smi_zone_ref.zref_zone)
3672                 return (EPERM);
3673 
3674         if (smi->smi_flags & SMI_DEAD || vp->v_vfsp->vfs_flag & VFS_UNMOUNTED)
3675                 return (EIO);
3676 
3677         /*
3678          * Because we stuff the readdir cookie into the offset field
3679          * someone may attempt to do an lseek with the cookie which
3680          * we want to succeed.
3681          */
3682         if (vp->v_type == VDIR)
3683                 return (0);
3684 
3685         /* Like NFS3, just check for 63-bit overflow. */
3686         if (*noffp < 0)
3687                 return (EINVAL);
3688 
3689         return (0);
3690 }
3691 
3692 /* mmap support ******************************************************** */
3693 
3694 #ifdef DEBUG
3695 static int smbfs_lostpage = 0;  /* number of times we lost original page */
3696 #endif
3697 
3698 /*
3699  * Return all the pages from [off..off+len) in file
3700  * Like nfs3_getpage
3701  */
3702 /* ARGSUSED */
3703 static int
3704 smbfs_getpage(vnode_t *vp, offset_t off, size_t len, uint_t *protp,
3705         page_t *pl[], size_t plsz, struct seg *seg, caddr_t addr,
3706         enum seg_rw rw, cred_t *cr, caller_context_t *ct)
3707 {
3708         smbnode_t       *np;
3709         smbmntinfo_t    *smi;
3710         int             error;
3711 
3712         np = VTOSMB(vp);
3713         smi = VTOSMI(vp);
3714 
3715         if (curproc->p_zone != smi->smi_zone_ref.zref_zone)
3716                 return (EIO);
3717 
3718         if (smi->smi_flags & SMI_DEAD || vp->v_vfsp->vfs_flag & VFS_UNMOUNTED)
3719                 return (EIO);
3720 
3721         if (vp->v_flag & VNOMAP)
3722                 return (ENOSYS);
3723 
3724         if (protp != NULL)
3725                 *protp = PROT_ALL;
3726 
3727         /*
3728          * Now valididate that the caches are up to date.
3729          */
3730         error = smbfs_validate_caches(vp, cr);
3731         if (error)
3732                 return (error);
3733 
3734 retry:
3735         mutex_enter(&np->r_statelock);
3736 
3737         /*
3738          * Don't create dirty pages faster than they
3739          * can be cleaned ... (etc. see nfs)
3740          *
3741          * Here NFS also tests:
3742          *  (mi->mi_max_threads != 0 &&
3743          *  rp->r_awcount > 2 * mi->mi_max_threads)
3744          */
3745         if (rw == S_CREATE) {
3746                 while (np->r_gcount > 0)
3747                         cv_wait(&np->r_cv, &np->r_statelock);
3748         }
3749 
3750         /*
3751          * If we are getting called as a side effect of a write
3752          * operation the local file size might not be extended yet.
3753          * In this case we want to be able to return pages of zeroes.
3754          */
3755         if (off + len > np->r_size + PAGEOFFSET && seg != segkmap) {
3756                 mutex_exit(&np->r_statelock);
3757                 return (EFAULT);                /* beyond EOF */
3758         }
3759 
3760         mutex_exit(&np->r_statelock);
3761 
3762         error = pvn_getpages(smbfs_getapage, vp, off, len, protp,
3763             pl, plsz, seg, addr, rw, cr);
3764 
3765         switch (error) {
3766         case SMBFS_EOF:
3767                 smbfs_purge_caches(vp, cr);
3768                 goto retry;
3769         case ESTALE:
3770                 /*
3771                  * Here NFS has: PURGE_STALE_FH(error, vp, cr);
3772                  * In-line here as we only use it once.
3773                  */
3774                 mutex_enter(&np->r_statelock);
3775                 np->r_flags |= RSTALE;
3776                 if (!np->r_error)
3777                         np->r_error = (error);
3778                 mutex_exit(&np->r_statelock);
3779                 if (vn_has_cached_data(vp))
3780                         smbfs_invalidate_pages(vp, (u_offset_t)0, cr);
3781                 smbfs_purge_caches(vp, cr);
3782                 break;
3783         default:
3784                 break;
3785         }
3786 
3787         return (error);
3788 }
3789 
3790 /*
3791  * Called from pvn_getpages to get a particular page.
3792  * Like nfs3_getapage
3793  */
3794 /* ARGSUSED */
3795 static int
3796 smbfs_getapage(vnode_t *vp, u_offset_t off, size_t len, uint_t *protp,
3797         page_t *pl[], size_t plsz, struct seg *seg, caddr_t addr,
3798         enum seg_rw rw, cred_t *cr)
3799 {
3800         smbnode_t       *np;
3801         smbmntinfo_t   *smi;
3802 
3803         uint_t          bsize;
3804         struct buf      *bp;
3805         page_t          *pp;
3806         u_offset_t      lbn;
3807         u_offset_t      io_off;
3808         u_offset_t      blkoff;
3809         size_t          io_len;
3810         uint_t blksize;
3811         int error;
3812         /* int readahead; */
3813         int readahead_issued = 0;
3814         /* int ra_window; * readahead window */
3815         page_t *pagefound;
3816 
3817         np = VTOSMB(vp);
3818         smi = VTOSMI(vp);
3819 
3820         if (curproc->p_zone != smi->smi_zone_ref.zref_zone)
3821                 return (EIO);
3822 
3823         if (smi->smi_flags & SMI_DEAD || vp->v_vfsp->vfs_flag & VFS_UNMOUNTED)
3824                 return (EIO);
3825 
3826         bsize = MAX(vp->v_vfsp->vfs_bsize, PAGESIZE);
3827 
3828 reread:
3829         bp = NULL;
3830         pp = NULL;
3831         pagefound = NULL;
3832 
3833         if (pl != NULL)
3834                 pl[0] = NULL;
3835 
3836         error = 0;
3837         lbn = off / bsize;
3838         blkoff = lbn * bsize;
3839 
3840         /*
3841          * NFS queues up readahead work here.
3842          */
3843 
3844 again:
3845         if ((pagefound = page_exists(vp, off)) == NULL) {
3846                 if (pl == NULL) {
3847                         (void) 0; /* Todo: smbfs_async_readahead(); */
3848                 } else if (rw == S_CREATE) {
3849                         /*
3850                          * Block for this page is not allocated, or the offset
3851                          * is beyond the current allocation size, or we're
3852                          * allocating a swap slot and the page was not found,
3853                          * so allocate it and return a zero page.
3854                          */
3855                         if ((pp = page_create_va(vp, off,
3856                             PAGESIZE, PG_WAIT, seg, addr)) == NULL)
3857                                 cmn_err(CE_PANIC, "smbfs_getapage: page_create");
3858                         io_len = PAGESIZE;
3859                         mutex_enter(&np->r_statelock);
3860                         np->r_nextr = off + PAGESIZE;
3861                         mutex_exit(&np->r_statelock);
3862                 } else {
3863                         /*
3864                          * Need to go to server to get a BLOCK, exception to
3865                          * that being while reading at offset = 0 or doing
3866                          * random i/o, in that case read only a PAGE.
3867                          */
3868                         mutex_enter(&np->r_statelock);
3869                         if (blkoff < np->r_size &&
3870                             blkoff + bsize >= np->r_size) {
3871                                 /*
3872                                  * If only a block or less is left in
3873                                  * the file, read all that is remaining.
3874                                  */
3875                                 if (np->r_size <= off) {
3876                                         /*
3877                                          * Trying to access beyond EOF,
3878                                          * set up to get at least one page.
3879                                          */
3880                                         blksize = off + PAGESIZE - blkoff;
3881                                 } else
3882                                         blksize = np->r_size - blkoff;
3883                         } else if ((off == 0) ||
3884                             (off != np->r_nextr && !readahead_issued)) {
3885                                 blksize = PAGESIZE;
3886                                 blkoff = off; /* block = page here */
3887                         } else
3888                                 blksize = bsize;
3889                         mutex_exit(&np->r_statelock);
3890 
3891                         pp = pvn_read_kluster(vp, off, seg, addr, &io_off,
3892                             &io_len, blkoff, blksize, 0);
3893 
3894                         /*
3895                          * Some other thread has entered the page,
3896                          * so just use it.
3897                          */
3898                         if (pp == NULL)
3899                                 goto again;
3900 
3901                         /*
3902                          * Now round the request size up to page boundaries.
3903                          * This ensures that the entire page will be
3904                          * initialized to zeroes if EOF is encountered.
3905                          */
3906                         io_len = ptob(btopr(io_len));
3907 
3908                         bp = pageio_setup(pp, io_len, vp, B_READ);
3909                         ASSERT(bp != NULL);
3910 
3911                         /*
3912                          * pageio_setup should have set b_addr to 0.  This
3913                          * is correct since we want to do I/O on a page
3914                          * boundary.  bp_mapin will use this addr to calculate
3915                          * an offset, and then set b_addr to the kernel virtual
3916                          * address it allocated for us.
3917                          */
3918                         ASSERT(bp->b_un.b_addr == 0);
3919 
3920                         bp->b_edev = 0;
3921                         bp->b_dev = 0;
3922                         bp->b_lblkno = lbtodb(io_off);
3923                         bp->b_file = vp;
3924                         bp->b_offset = (offset_t)off;
3925                         bp_mapin(bp);
3926 
3927                         /*
3928                          * If doing a write beyond what we believe is EOF,
3929                          * don't bother trying to read the pages from the
3930                          * server, we'll just zero the pages here.  We
3931                          * don't check that the rw flag is S_WRITE here
3932                          * because some implementations may attempt a
3933                          * read access to the buffer before copying data.
3934                          */
3935                         mutex_enter(&np->r_statelock);
3936                         if (io_off >= np->r_size && seg == segkmap) {
3937                                 mutex_exit(&np->r_statelock);
3938                                 bzero(bp->b_un.b_addr, io_len);
3939                         } else {
3940                                 mutex_exit(&np->r_statelock);
3941                                 error = smbfs_bio(bp, 0, cr);
3942                         }
3943 
3944                         /*
3945                          * Unmap the buffer before freeing it.
3946                          */
3947                         bp_mapout(bp);
3948                         pageio_done(bp);
3949 
3950                         /* Here NFS3 updates all pp->p_fsdata */
3951 
3952                         if (error == SMBFS_EOF) {
3953                                 /*
3954                                  * If doing a write system call just return
3955                                  * zeroed pages, else user tried to get pages
3956                                  * beyond EOF, return error.  We don't check
3957                                  * that the rw flag is S_WRITE here because
3958                                  * some implementations may attempt a read
3959                                  * access to the buffer before copying data.
3960                                  */
3961                                 if (seg == segkmap)
3962                                         error = 0;
3963                                 else
3964                                         error = EFAULT;
3965                         }
3966 
3967                         if (!readahead_issued && !error) {
3968                                 mutex_enter(&np->r_statelock);
3969                                 np->r_nextr = io_off + io_len;
3970                                 mutex_exit(&np->r_statelock);
3971                         }
3972                 }
3973         }
3974 
3975         if (pl == NULL)
3976                 return (error);
3977 
3978         if (error) {
3979                 if (pp != NULL)
3980                         pvn_read_done(pp, B_ERROR);
3981                 return (error);
3982         }
3983 
3984         if (pagefound) {
3985                 se_t se = (rw == S_CREATE ? SE_EXCL : SE_SHARED);
3986 
3987                 /*
3988                  * Page exists in the cache, acquire the appropriate lock.
3989                  * If this fails, start all over again.
3990                  */
3991                 if ((pp = page_lookup(vp, off, se)) == NULL) {
3992 #ifdef DEBUG
3993                         smbfs_lostpage++;
3994 #endif
3995                         goto reread;
3996                 }
3997                 pl[0] = pp;
3998                 pl[1] = NULL;
3999                 return (0);
4000         }
4001 
4002         if (pp != NULL)
4003                 pvn_plist_init(pp, pl, plsz, off, io_len, rw);
4004 
4005         return (error);
4006 }
4007 
4008 /*
4009  * Here NFS has: nfs3_readahead
4010  * No read-ahead in smbfs yet.
4011  */
4012 
4013 /*
4014  * Flags are composed of {B_INVAL, B_FREE, B_DONTNEED, B_FORCE}
4015  * If len == 0, do from off to EOF.
4016  *
4017  * The normal cases should be len == 0 && off == 0 (entire vp list),
4018  * len == MAXBSIZE (from segmap_release actions), and len == PAGESIZE
4019  * (from pageout).
4020  *
4021  * Like nfs3_putpage + nfs_putpages
4022  */
4023 /* ARGSUSED */
4024 static int
4025 smbfs_putpage(vnode_t *vp, offset_t off, size_t len, int flags, cred_t *cr,
4026         caller_context_t *ct)
4027 {
4028         smbnode_t *np;
4029         smbmntinfo_t *smi;
4030         page_t *pp;
4031         u_offset_t eoff;
4032         u_offset_t io_off;
4033         size_t io_len;
4034         int error;
4035         int rdirty;
4036         int err;
4037 
4038         np = VTOSMB(vp);
4039         smi = VTOSMI(vp);
4040 
4041         if (curproc->p_zone != smi->smi_zone_ref.zref_zone)
4042                 return (EIO);
4043 
4044         if (smi->smi_flags & SMI_DEAD || vp->v_vfsp->vfs_flag & VFS_UNMOUNTED)
4045                 return (EIO);
4046 
4047         if (vp->v_flag & VNOMAP)
4048                 return (ENOSYS);
4049 
4050         /* Here NFS does rp->r_count (++/--) stuff. */
4051 
4052         /* Beginning of code from nfs_putpages. */
4053 
4054         if (!vn_has_cached_data(vp))
4055                 return (0);
4056 
4057         /*
4058          * If ROUTOFSPACE is set, then all writes turn into B_INVAL
4059          * writes.  B_FORCE is set to force the VM system to actually
4060          * invalidate the pages, even if the i/o failed.  The pages
4061          * need to get invalidated because they can't be written out
4062          * because there isn't any space left on either the server's
4063          * file system or in the user's disk quota.  The B_FREE bit
4064          * is cleared to avoid confusion as to whether this is a
4065          * request to place the page on the freelist or to destroy
4066          * it.
4067          */
4068         if ((np->r_flags & ROUTOFSPACE) ||
4069             (vp->v_vfsp->vfs_flag & VFS_UNMOUNTED))
4070                 flags = (flags & ~B_FREE) | B_INVAL | B_FORCE;
4071 
4072         if (len == 0) {
4073                 /*
4074                  * If doing a full file synchronous operation, then clear
4075                  * the RDIRTY bit.  If a page gets dirtied while the flush
4076                  * is happening, then RDIRTY will get set again.  The
4077                  * RDIRTY bit must get cleared before the flush so that
4078                  * we don't lose this information.
4079                  *
4080                  * NFS has B_ASYNC vs sync stuff here.
4081                  */
4082                 if (off == (u_offset_t)0 &&
4083                     (np->r_flags & RDIRTY)) {
4084                         mutex_enter(&np->r_statelock);
4085                         rdirty = (np->r_flags & RDIRTY);
4086                         np->r_flags &= ~RDIRTY;
4087                         mutex_exit(&np->r_statelock);
4088                 } else
4089                         rdirty = 0;
4090 
4091                 /*
4092                  * Search the entire vp list for pages >= off, and flush
4093                  * the dirty pages.
4094                  */
4095                 error = pvn_vplist_dirty(vp, off, smbfs_putapage,
4096                     flags, cr);
4097 
4098                 /*
4099                  * If an error occurred and the file was marked as dirty
4100                  * before and we aren't forcibly invalidating pages, then
4101                  * reset the RDIRTY flag.
4102                  */
4103                 if (error && rdirty &&
4104                     (flags & (B_INVAL | B_FORCE)) != (B_INVAL | B_FORCE)) {
4105                         mutex_enter(&np->r_statelock);
4106                         np->r_flags |= RDIRTY;
4107                         mutex_exit(&np->r_statelock);
4108                 }
4109         } else {
4110                 /*
4111                  * Do a range from [off...off + len) looking for pages
4112                  * to deal with.
4113                  */
4114                 error = 0;
4115                 io_len = 1; /* quiet warnings */
4116                 eoff = off + len;
4117 
4118                 for (io_off = off; io_off < eoff; io_off += io_len) {
4119                         mutex_enter(&np->r_statelock);
4120                         if (io_off >= np->r_size) {
4121                                 mutex_exit(&np->r_statelock);
4122                                 break;
4123                         }
4124                         mutex_exit(&np->r_statelock);
4125                         /*
4126                          * If we are not invalidating, synchronously
4127                          * freeing or writing pages use the routine
4128                          * page_lookup_nowait() to prevent reclaiming
4129                          * them from the free list.
4130                          */
4131                         if ((flags & B_INVAL) || !(flags & B_ASYNC)) {
4132                                 pp = page_lookup(vp, io_off,
4133                                     (flags & (B_INVAL | B_FREE)) ?
4134                                     SE_EXCL : SE_SHARED);
4135                         } else {
4136                                 pp = page_lookup_nowait(vp, io_off,
4137                                     (flags & B_FREE) ? SE_EXCL : SE_SHARED);
4138                         }
4139 
4140                         if (pp == NULL || !pvn_getdirty(pp, flags))
4141                                 io_len = PAGESIZE;
4142                         else {
4143                                 err = smbfs_putapage(vp, pp, &io_off,
4144                                     &io_len, flags, cr);
4145                                 if (!error)
4146                                         error = err;
4147                                 /*
4148                                  * "io_off" and "io_len" are returned as
4149                                  * the range of pages we actually wrote.
4150                                  * This allows us to skip ahead more quickly
4151                                  * since several pages may've been dealt
4152                                  * with by this iteration of the loop.
4153                                  */
4154                         }
4155                 }
4156         }
4157 
4158         return (error);
4159 }
4160 
4161 /*
4162  * Write out a single page, possibly klustering adjacent dirty pages.
4163  *
4164  * Like nfs3_putapage / nfs3_sync_putapage
4165  */
4166 static int
4167 smbfs_putapage(vnode_t *vp, page_t *pp, u_offset_t *offp, size_t *lenp,
4168         int flags, cred_t *cr)
4169 {
4170         smbnode_t *np;
4171         u_offset_t io_off;
4172         u_offset_t lbn_off;
4173         u_offset_t lbn;
4174         size_t io_len;
4175         uint_t bsize;
4176         int error;
4177 
4178         np = VTOSMB(vp);
4179 
4180         ASSERT(!vn_is_readonly(vp));
4181 
4182         bsize = MAX(vp->v_vfsp->vfs_bsize, PAGESIZE);
4183         lbn = pp->p_offset / bsize;
4184         lbn_off = lbn * bsize;
4185 
4186         /*
4187          * Find a kluster that fits in one block, or in
4188          * one page if pages are bigger than blocks.  If
4189          * there is less file space allocated than a whole
4190          * page, we'll shorten the i/o request below.
4191          */
4192         pp = pvn_write_kluster(vp, pp, &io_off, &io_len, lbn_off,
4193             roundup(bsize, PAGESIZE), flags);
4194 
4195         /*
4196          * pvn_write_kluster shouldn't have returned a page with offset
4197          * behind the original page we were given.  Verify that.
4198          */
4199         ASSERT((pp->p_offset / bsize) >= lbn);
4200 
4201         /*
4202          * Now pp will have the list of kept dirty pages marked for
4203          * write back.  It will also handle invalidation and freeing
4204          * of pages that are not dirty.  Check for page length rounding
4205          * problems.
4206          */
4207         if (io_off + io_len > lbn_off + bsize) {
4208                 ASSERT((io_off + io_len) - (lbn_off + bsize) < PAGESIZE);
4209                 io_len = lbn_off + bsize - io_off;
4210         }
4211         /*
4212          * The RMODINPROGRESS flag makes sure that smbfs_bio() sees a
4213          * consistent value of r_size. RMODINPROGRESS is set in writerp().
4214          * When RMODINPROGRESS is set it indicates that a uiomove() is in
4215          * progress and the r_size has not been made consistent with the
4216          * new size of the file. When the uiomove() completes the r_size is
4217          * updated and the RMODINPROGRESS flag is cleared.
4218          *
4219          * The RMODINPROGRESS flag makes sure that smbfs_bio() sees a
4220          * consistent value of r_size. Without this handshaking, it is
4221          * possible that smbfs_bio() picks  up the old value of r_size
4222          * before the uiomove() in writerp() completes. This will result
4223          * in the write through smbfs_bio() being dropped.
4224          *
4225          * More precisely, there is a window between the time the uiomove()
4226          * completes and the time the r_size is updated. If a VOP_PUTPAGE()
4227          * operation intervenes in this window, the page will be picked up,
4228          * because it is dirty (it will be unlocked, unless it was
4229          * pagecreate'd). When the page is picked up as dirty, the dirty
4230          * bit is reset (pvn_getdirty()). In smbfs_write(), r_size is
4231          * checked. This will still be the old size. Therefore the page will
4232          * not be written out. When segmap_release() calls VOP_PUTPAGE(),
4233          * the page will be found to be clean and the write will be dropped.
4234          */
4235         if (np->r_flags & RMODINPROGRESS) {
4236                 mutex_enter(&np->r_statelock);
4237                 if ((np->r_flags & RMODINPROGRESS) &&
4238                     np->r_modaddr + MAXBSIZE > io_off &&
4239                     np->r_modaddr < io_off + io_len) {
4240                         page_t *plist;
4241                         /*
4242                          * A write is in progress for this region of the file.
4243                          * If we did not detect RMODINPROGRESS here then this
4244                          * path through smbfs_putapage() would eventually go to
4245                          * smbfs_bio() and may not write out all of the data
4246                          * in the pages. We end up losing data. So we decide
4247                          * to set the modified bit on each page in the page
4248                          * list and mark the rnode with RDIRTY. This write
4249                          * will be restarted at some later time.
4250                          */
4251                         plist = pp;
4252                         while (plist != NULL) {
4253                                 pp = plist;
4254                                 page_sub(&plist, pp);
4255                                 hat_setmod(pp);
4256                                 page_io_unlock(pp);
4257                                 page_unlock(pp);
4258                         }
4259                         np->r_flags |= RDIRTY;
4260                         mutex_exit(&np->r_statelock);
4261                         if (offp)
4262                                 *offp = io_off;
4263                         if (lenp)
4264                                 *lenp = io_len;
4265                         return (0);
4266                 }
4267                 mutex_exit(&np->r_statelock);
4268         }
4269 
4270         /*
4271          * NFS handles (flags & B_ASYNC) here...
4272          * (See nfs_async_putapage())
4273          *
4274          * This code section from: nfs3_sync_putapage()
4275          */
4276 
4277         flags |= B_WRITE;
4278 
4279         error = smbfs_rdwrlbn(vp, pp, io_off, io_len, flags, cr);
4280 
4281         if ((error == ENOSPC || error == EDQUOT || error == EFBIG ||
4282             error == EACCES) &&
4283             (flags & (B_INVAL|B_FORCE)) != (B_INVAL|B_FORCE)) {
4284                 if (!(np->r_flags & ROUTOFSPACE)) {
4285                         mutex_enter(&np->r_statelock);
4286                         np->r_flags |= ROUTOFSPACE;
4287                         mutex_exit(&np->r_statelock);
4288                 }
4289                 flags |= B_ERROR;
4290                 pvn_write_done(pp, flags);
4291                 /*
4292                  * If this was not an async thread, then try again to
4293                  * write out the pages, but this time, also destroy
4294                  * them whether or not the write is successful.  This
4295                  * will prevent memory from filling up with these
4296                  * pages and destroying them is the only alternative
4297                  * if they can't be written out.
4298                  *
4299                  * Don't do this if this is an async thread because
4300                  * when the pages are unlocked in pvn_write_done,
4301                  * some other thread could have come along, locked
4302                  * them, and queued for an async thread.  It would be
4303                  * possible for all of the async threads to be tied
4304                  * up waiting to lock the pages again and they would
4305                  * all already be locked and waiting for an async
4306                  * thread to handle them.  Deadlock.
4307                  */
4308                 if (!(flags & B_ASYNC)) {
4309                         error = smbfs_putpage(vp, io_off, io_len,
4310                             B_INVAL | B_FORCE, cr, NULL);
4311                 }
4312         } else {
4313                 if (error)
4314                         flags |= B_ERROR;
4315                 else if (np->r_flags & ROUTOFSPACE) {
4316                         mutex_enter(&np->r_statelock);
4317                         np->r_flags &= ~ROUTOFSPACE;
4318                         mutex_exit(&np->r_statelock);
4319                 }
4320                 pvn_write_done(pp, flags);
4321         }
4322 
4323         /* Now more code from: nfs3_putapage */
4324 
4325         if (offp)
4326                 *offp = io_off;
4327         if (lenp)
4328                 *lenp = io_len;
4329 
4330         return (error);
4331 }
4332 
4333 /*
4334  * NFS has this in nfs_client.c (shared by v2,v3,...)
4335  * We have it here so smbfs_putapage can be file scope.
4336  */
4337 void
4338 smbfs_invalidate_pages(vnode_t *vp, u_offset_t off, cred_t *cr)
4339 {
4340         smbnode_t *np;
4341 
4342         np = VTOSMB(vp);
4343 
4344         mutex_enter(&np->r_statelock);
4345         while (np->r_flags & RTRUNCATE)
4346                 cv_wait(&np->r_cv, &np->r_statelock);
4347         np->r_flags |= RTRUNCATE;
4348 
4349         if (off == (u_offset_t)0) {
4350                 np->r_flags &= ~RDIRTY;
4351                 if (!(np->r_flags & RSTALE))
4352                         np->r_error = 0;
4353         }
4354         /* Here NFSv3 has np->r_truncaddr = off; */
4355         mutex_exit(&np->r_statelock);
4356 
4357         (void) pvn_vplist_dirty(vp, off, smbfs_putapage,
4358             B_INVAL | B_TRUNC, cr);
4359 
4360         mutex_enter(&np->r_statelock);
4361         np->r_flags &= ~RTRUNCATE;
4362         cv_broadcast(&np->r_cv);
4363         mutex_exit(&np->r_statelock);
4364 }
4365 
4366 /* Like nfs3_map */
4367 
4368 /* ARGSUSED */
4369 static int
4370 smbfs_map(vnode_t *vp, offset_t off, struct as *as, caddr_t *addrp,
4371         size_t len, uchar_t prot, uchar_t maxprot, uint_t flags,
4372         cred_t *cr, caller_context_t *ct)
4373 {
4374         segvn_crargs_t  vn_a;
4375         struct vattr    va;
4376         smbnode_t       *np;
4377         smbmntinfo_t    *smi;
4378         int             error;
4379 
4380         np = VTOSMB(vp);
4381         smi = VTOSMI(vp);
4382 
4383         if (curproc->p_zone != smi->smi_zone_ref.zref_zone)
4384                 return (EIO);
4385 
4386         if (smi->smi_flags & SMI_DEAD || vp->v_vfsp->vfs_flag & VFS_UNMOUNTED)
4387                 return (EIO);
4388 
4389         if (vp->v_flag & VNOMAP)
4390                 return (ENOSYS);
4391 
4392         if (off < 0 || off + (ssize_t)len < 0)
4393                 return (ENXIO);
4394 
4395         if (vp->v_type != VREG)
4396                 return (ENODEV);
4397 
4398         /*
4399          * NFS does close-to-open consistency stuff here.
4400          * Just get (possibly cached) attributes.
4401          */
4402         va.va_mask = AT_ALL;
4403         if ((error = smbfsgetattr(vp, &va, cr)) != 0)
4404                 return (error);
4405 
4406         /*
4407          * Check to see if the vnode is currently marked as not cachable.
4408          * This means portions of the file are locked (through VOP_FRLOCK).
4409          * In this case the map request must be refused.  We use
4410          * rp->r_lkserlock to avoid a race with concurrent lock requests.
4411          */
4412         /*
4413          * Atomically increment r_inmap after acquiring r_rwlock. The
4414          * idea here is to acquire r_rwlock to block read/write and
4415          * not to protect r_inmap. r_inmap will inform smbfs_read/write()
4416          * that we are in smbfs_map(). Now, r_rwlock is acquired in order
4417          * and we can prevent the deadlock that would have occurred
4418          * when smbfs_addmap() would have acquired it out of order.
4419          *
4420          * Since we are not protecting r_inmap by any lock, we do not
4421          * hold any lock when we decrement it. We atomically decrement
4422          * r_inmap after we release r_lkserlock.  Note that rwlock is
4423          * re-entered as writer in smbfs_addmap (called via as_map).
4424          */
4425 
4426         if (smbfs_rw_enter_sig(&np->r_rwlock, RW_WRITER, SMBINTR(vp)))
4427                 return (EINTR);
4428         atomic_inc_uint(&np->r_inmap);
4429         smbfs_rw_exit(&np->r_rwlock);
4430 
4431         if (smbfs_rw_enter_sig(&np->r_lkserlock, RW_WRITER, SMBINTR(vp))) {
4432                 atomic_dec_uint(&np->r_inmap);
4433                 return (EINTR);
4434         }
4435 
4436         if (vp->v_flag & VNOCACHE) {
4437                 error = EAGAIN;
4438                 goto done;
4439         }
4440 
4441         /*
4442          * Don't allow concurrent locks and mapping if mandatory locking is
4443          * enabled.
4444          */
4445         if ((flk_has_remote_locks(vp) || smbfs_lm_has_sleep(vp)) &&
4446             MANDLOCK(vp, va.va_mode)) {
4447                 error = EAGAIN;
4448                 goto done;
4449         }
4450 
4451         as_rangelock(as);
4452         error = choose_addr(as, addrp, len, off, ADDR_VACALIGN, flags);
4453         if (error != 0) {
4454                 as_rangeunlock(as);
4455                 goto done;
4456         }
4457 
4458         vn_a.vp = vp;
4459         vn_a.offset = off;
4460         vn_a.type = (flags & MAP_TYPE);
4461         vn_a.prot = (uchar_t)prot;
4462         vn_a.maxprot = (uchar_t)maxprot;
4463         vn_a.flags = (flags & ~MAP_TYPE);
4464         vn_a.cred = cr;
4465         vn_a.amp = NULL;
4466         vn_a.szc = 0;
4467         vn_a.lgrp_mem_policy_flags = 0;
4468 
4469         error = as_map(as, *addrp, len, segvn_create, &vn_a);
4470         as_rangeunlock(as);
4471 
4472 done:
4473         smbfs_rw_exit(&np->r_lkserlock);
4474         atomic_dec_uint(&np->r_inmap);
4475         return (error);
4476 }
4477 
4478 /* ARGSUSED */
4479 static int
4480 smbfs_addmap(vnode_t *vp, offset_t off, struct as *as, caddr_t addr,
4481         size_t len, uchar_t prot, uchar_t maxprot, uint_t flags,
4482         cred_t *cr, caller_context_t *ct)
4483 {
4484         smbnode_t *np = VTOSMB(vp);
4485         boolean_t inc_fidrefs = B_FALSE;
4486 
4487         /*
4488          * When r_mapcnt goes from zero to non-zero,
4489          * increment n_fidrefs
4490          */
4491         mutex_enter(&np->r_statelock);
4492         if (np->r_mapcnt == 0)
4493                 inc_fidrefs = B_TRUE;
4494         np->r_mapcnt += btopr(len);
4495         mutex_exit(&np->r_statelock);
4496 
4497         if (inc_fidrefs) {
4498                 (void) smbfs_rw_enter_sig(&np->r_lkserlock, RW_WRITER, 0);
4499                 np->n_fidrefs++;
4500                 smbfs_rw_exit(&np->r_lkserlock);
4501         }
4502 
4503         return (0);
4504 }
4505 
4506 /*
4507  * Use an address space callback to flush pages dirty pages after unmap,
4508  * which we can't do directly in smbfs_delmap due to locking issues.
4509  */
4510 typedef struct smbfs_delmap_args {
4511         vnode_t                 *vp;
4512         cred_t                  *cr;
4513         offset_t                off;
4514         caddr_t                 addr;
4515         size_t                  len;
4516         uint_t                  prot;
4517         uint_t                  maxprot;
4518         uint_t                  flags;
4519         boolean_t               dec_fidrefs;
4520 } smbfs_delmap_args_t;
4521 
4522 /* ARGSUSED */
4523 static int
4524 smbfs_delmap(vnode_t *vp, offset_t off, struct as *as, caddr_t addr,
4525         size_t len, uint_t prot, uint_t maxprot, uint_t flags,
4526         cred_t *cr, caller_context_t *ct)
4527 {
4528         smbnode_t *np = VTOSMB(vp);
4529         smbfs_delmap_args_t     *dmapp;
4530         int error;
4531 
4532         dmapp = kmem_zalloc(sizeof (*dmapp), KM_SLEEP);
4533 
4534         dmapp->vp = vp;
4535         dmapp->off = off;
4536         dmapp->addr = addr;
4537         dmapp->len = len;
4538         dmapp->prot = prot;
4539         dmapp->maxprot = maxprot;
4540         dmapp->flags = flags;
4541         dmapp->cr = cr;
4542         dmapp->dec_fidrefs = B_FALSE;
4543 
4544         /*
4545          * When r_mapcnt returns to zero, arrange for the
4546          * callback to decrement n_fidrefs
4547          */
4548         mutex_enter(&np->r_statelock);
4549         np->r_mapcnt -= btopr(len);
4550         ASSERT(np->r_mapcnt >= 0);
4551         if (np->r_mapcnt == 0)
4552                 dmapp->dec_fidrefs = B_TRUE;
4553         mutex_exit(&np->r_statelock);
4554 
4555         error = as_add_callback(as, smbfs_delmap_callback, dmapp,
4556             AS_UNMAP_EVENT, addr, len, KM_SLEEP);
4557         if (error != 0) {
4558                 /*
4559                  * So sad, no callback is coming. Can't flush pages
4560                  * in delmap (as locks).  Just handle n_fidrefs.
4561                  */
4562                 cmn_err(CE_NOTE, "smbfs_delmap(%p) "
4563                     "as_add_callback err=%d",
4564                     (void *)vp, error);
4565 
4566                 if (dmapp->dec_fidrefs) {
4567                         struct smb_cred scred;
4568 
4569                         (void) smbfs_rw_enter_sig(&np->r_lkserlock,
4570                             RW_WRITER, 0);
4571                         smb_credinit(&scred, dmapp->cr);
4572 
4573                         smbfs_rele_fid(np, &scred);
4574 
4575                         smb_credrele(&scred);
4576                         smbfs_rw_exit(&np->r_lkserlock);
4577                 }
4578                 kmem_free(dmapp, sizeof (*dmapp));
4579         }
4580 
4581         return (0);
4582 }
4583 
4584 /*
4585  * Remove some pages from an mmap'd vnode.  Flush any
4586  * dirty pages in the unmapped range.
4587  */
4588 /* ARGSUSED */
4589 static void
4590 smbfs_delmap_callback(struct as *as, void *arg, uint_t event)
4591 {
4592         vnode_t                 *vp;
4593         smbnode_t               *np;
4594         smbmntinfo_t            *smi;
4595         smbfs_delmap_args_t     *dmapp = arg;
4596 
4597         vp = dmapp->vp;
4598         np = VTOSMB(vp);
4599         smi = VTOSMI(vp);
4600 
4601         /* Decremented r_mapcnt in smbfs_delmap */
4602 
4603         /*
4604          * Initiate a page flush and potential commit if there are
4605          * pages, the file system was not mounted readonly, the segment
4606          * was mapped shared, and the pages themselves were writeable.
4607          *
4608          * mark RDIRTY here, will be used to check if a file is dirty when
4609          * unmount smbfs
4610          */
4611         if (vn_has_cached_data(vp) && !vn_is_readonly(vp) &&
4612             dmapp->flags == MAP_SHARED && (dmapp->maxprot & PROT_WRITE)) {
4613                 mutex_enter(&np->r_statelock);
4614                 np->r_flags |= RDIRTY;
4615                 mutex_exit(&np->r_statelock);
4616 
4617                 /*
4618                  * Need to finish the putpage before we
4619                  * close the OtW FID needed for I/O.
4620                  */
4621                 (void) smbfs_putpage(vp, dmapp->off, dmapp->len, 0,
4622                     dmapp->cr, NULL);
4623         }
4624 
4625         if ((np->r_flags & RDIRECTIO) || (smi->smi_flags & SMI_DIRECTIO))
4626                 (void) smbfs_putpage(vp, dmapp->off, dmapp->len,
4627                     B_INVAL, dmapp->cr, NULL);
4628 
4629         /*
4630          * If r_mapcnt went to zero, drop our FID ref now.
4631          * On the last fidref, this does an OtW close.
4632          */
4633         if (dmapp->dec_fidrefs) {
4634                 struct smb_cred scred;
4635 
4636                 (void) smbfs_rw_enter_sig(&np->r_lkserlock, RW_WRITER, 0);
4637                 smb_credinit(&scred, dmapp->cr);
4638 
4639                 smbfs_rele_fid(np, &scred);
4640 
4641                 smb_credrele(&scred);
4642                 smbfs_rw_exit(&np->r_lkserlock);
4643         }
4644 
4645         (void) as_delete_callback(as, arg);
4646         kmem_free(dmapp, sizeof (*dmapp));
4647 }
4648 
4649 /* No smbfs_pageio() or smbfs_dispose() ops. */
4650 
4651 /* misc. ******************************************************** */
4652 
4653 
4654 /*
4655  * XXX
4656  * This op may need to support PSARC 2007/440, nbmand changes for CIFS Service.
4657  */
4658 static int
4659 smbfs_frlock(vnode_t *vp, int cmd, struct flock64 *bfp, int flag,
4660         offset_t offset, struct flk_callback *flk_cbp, cred_t *cr,
4661         caller_context_t *ct)
4662 {
4663         if (curproc->p_zone != VTOSMI(vp)->smi_zone_ref.zref_zone)
4664                 return (EIO);
4665 
4666         if (VTOSMI(vp)->smi_flags & SMI_LLOCK)
4667                 return (fs_frlock(vp, cmd, bfp, flag, offset, flk_cbp, cr, ct));
4668         else
4669                 return (ENOSYS);
4670 }
4671 
4672 /*
4673  * Free storage space associated with the specified vnode.  The portion
4674  * to be freed is specified by bfp->l_start and bfp->l_len (already
4675  * normalized to a "whence" of 0).
4676  *
4677  * Called by fcntl(fd, F_FREESP, lkp) for libc:ftruncate, etc.
4678  */
4679 /* ARGSUSED */
4680 static int
4681 smbfs_space(vnode_t *vp, int cmd, struct flock64 *bfp, int flag,
4682         offset_t offset, cred_t *cr, caller_context_t *ct)
4683 {
4684         int             error;
4685         smbmntinfo_t    *smi;
4686 
4687         smi = VTOSMI(vp);
4688 
4689         if (curproc->p_zone != smi->smi_zone_ref.zref_zone)
4690                 return (EIO);
4691 
4692         if (smi->smi_flags & SMI_DEAD || vp->v_vfsp->vfs_flag & VFS_UNMOUNTED)
4693                 return (EIO);
4694 
4695         /* Caller (fcntl) has checked v_type */
4696         ASSERT(vp->v_type == VREG);
4697         if (cmd != F_FREESP)
4698                 return (EINVAL);
4699 
4700         /*
4701          * Like NFS3, no 32-bit offset checks here.
4702          * Our SMB layer takes care to return EFBIG
4703          * when it has to fallback to a 32-bit call.
4704          */
4705 
4706         error = convoff(vp, bfp, 0, offset);
4707         if (!error) {
4708                 ASSERT(bfp->l_start >= 0);
4709                 if (bfp->l_len == 0) {
4710                         struct vattr va;
4711 
4712                         /*
4713                          * ftruncate should not change the ctime and
4714                          * mtime if we truncate the file to its
4715                          * previous size.
4716                          */
4717                         va.va_mask = AT_SIZE;
4718                         error = smbfsgetattr(vp, &va, cr);
4719                         if (error || va.va_size == bfp->l_start)
4720                                 return (error);
4721                         va.va_mask = AT_SIZE;
4722                         va.va_size = bfp->l_start;
4723                         error = smbfssetattr(vp, &va, 0, cr);
4724                         /* SMBFS_VNEVENT... */
4725                 } else
4726                         error = EINVAL;
4727         }
4728 
4729         return (error);
4730 }
4731 
4732 
4733 /* ARGSUSED */
4734 static int
4735 smbfs_realvp(vnode_t *vp, vnode_t **vpp, caller_context_t *ct)
4736 {
4737 
4738         return (ENOSYS);
4739 }
4740 
4741 
4742 /* ARGSUSED */
4743 static int
4744 smbfs_pathconf(vnode_t *vp, int cmd, ulong_t *valp, cred_t *cr,
4745         caller_context_t *ct)
4746 {
4747         vfs_t *vfs;
4748         smbmntinfo_t *smi;
4749         struct smb_share *ssp;
4750 
4751         vfs = vp->v_vfsp;
4752         smi = VFTOSMI(vfs);
4753 
4754         if (curproc->p_zone != smi->smi_zone_ref.zref_zone)
4755                 return (EIO);
4756 
4757         if (smi->smi_flags & SMI_DEAD || vp->v_vfsp->vfs_flag & VFS_UNMOUNTED)
4758                 return (EIO);
4759 
4760         switch (cmd) {
4761         case _PC_FILESIZEBITS:
4762                 ssp = smi->smi_share;
4763                 if (SSTOVC(ssp)->vc_sopt.sv_caps & SMB_CAP_LARGE_FILES)
4764                         *valp = 64;
4765                 else
4766                         *valp = 32;
4767                 break;
4768 
4769         case _PC_LINK_MAX:
4770                 /* We only ever report one link to an object */
4771                 *valp = 1;
4772                 break;
4773 
4774         case _PC_ACL_ENABLED:
4775                 /*
4776                  * Always indicate that ACLs are enabled and
4777                  * that we support ACE_T format, otherwise
4778                  * libsec will ask for ACLENT_T format data
4779                  * which we don't support.
4780                  */
4781                 *valp = _ACL_ACE_ENABLED;
4782                 break;
4783 
4784         case _PC_SYMLINK_MAX:   /* No symlinks until we do Unix extensions */
4785                 *valp = 0;
4786                 break;
4787 
4788         case _PC_XATTR_EXISTS:
4789                 if (vfs->vfs_flag & VFS_XATTR) {
4790                         *valp = smbfs_xa_exists(vp, cr);
4791                         break;
4792                 }
4793                 return (EINVAL);
4794 
4795         case _PC_SATTR_ENABLED:
4796         case _PC_SATTR_EXISTS:
4797                 *valp = 1;
4798                 break;
4799 
4800         case _PC_TIMESTAMP_RESOLUTION:
4801                 /*
4802                  * Windows times are tenths of microseconds
4803                  * (multiples of 100 nanoseconds).
4804                  */
4805                 *valp = 100L;
4806                 break;
4807 
4808         default:
4809                 return (fs_pathconf(vp, cmd, valp, cr, ct));
4810         }
4811         return (0);
4812 }
4813 
4814 /* ARGSUSED */
4815 static int
4816 smbfs_getsecattr(vnode_t *vp, vsecattr_t *vsa, int flag, cred_t *cr,
4817         caller_context_t *ct)
4818 {
4819         vfs_t *vfsp;
4820         smbmntinfo_t *smi;
4821         int     error;
4822         uint_t  mask;
4823 
4824         vfsp = vp->v_vfsp;
4825         smi = VFTOSMI(vfsp);
4826 
4827         if (curproc->p_zone != smi->smi_zone_ref.zref_zone)
4828                 return (EIO);
4829 
4830         if (smi->smi_flags & SMI_DEAD || vfsp->vfs_flag & VFS_UNMOUNTED)
4831                 return (EIO);
4832 
4833         /*
4834          * Our _pathconf indicates _ACL_ACE_ENABLED,
4835          * so we should only see VSA_ACE, etc here.
4836          * Note: vn_create asks for VSA_DFACLCNT,
4837          * and it expects ENOSYS and empty data.
4838          */
4839         mask = vsa->vsa_mask & (VSA_ACE | VSA_ACECNT |
4840             VSA_ACE_ACLFLAGS | VSA_ACE_ALLTYPES);
4841         if (mask == 0)
4842                 return (ENOSYS);
4843 
4844         if (smi->smi_flags & SMI_ACL)
4845                 error = smbfs_acl_getvsa(vp, vsa, flag, cr);
4846         else
4847                 error = ENOSYS;
4848 
4849         if (error == ENOSYS)
4850                 error = fs_fab_acl(vp, vsa, flag, cr, ct);
4851 
4852         return (error);
4853 }
4854 
4855 /* ARGSUSED */
4856 static int
4857 smbfs_setsecattr(vnode_t *vp, vsecattr_t *vsa, int flag, cred_t *cr,
4858         caller_context_t *ct)
4859 {
4860         vfs_t *vfsp;
4861         smbmntinfo_t *smi;
4862         int     error;
4863         uint_t  mask;
4864 
4865         vfsp = vp->v_vfsp;
4866         smi = VFTOSMI(vfsp);
4867 
4868         if (curproc->p_zone != smi->smi_zone_ref.zref_zone)
4869                 return (EIO);
4870 
4871         if (smi->smi_flags & SMI_DEAD || vfsp->vfs_flag & VFS_UNMOUNTED)
4872                 return (EIO);
4873 
4874         /*
4875          * Our _pathconf indicates _ACL_ACE_ENABLED,
4876          * so we should only see VSA_ACE, etc here.
4877          */
4878         mask = vsa->vsa_mask & (VSA_ACE | VSA_ACECNT);
4879         if (mask == 0)
4880                 return (ENOSYS);
4881 
4882         if (vfsp->vfs_flag & VFS_RDONLY)
4883                 return (EROFS);
4884 
4885         /*
4886          * Allow only the mount owner to do this.
4887          * See comments at smbfs_access_rwx.
4888          */
4889         error = secpolicy_vnode_setdac(cr, smi->smi_uid);
4890         if (error != 0)
4891                 return (error);
4892 
4893         if (smi->smi_flags & SMI_ACL)
4894                 error = smbfs_acl_setvsa(vp, vsa, flag, cr);
4895         else
4896                 error = ENOSYS;
4897 
4898         return (error);
4899 }
4900 
4901 
4902 /*
4903  * XXX
4904  * This op should eventually support PSARC 2007/268.
4905  */
4906 static int
4907 smbfs_shrlock(vnode_t *vp, int cmd, struct shrlock *shr, int flag, cred_t *cr,
4908         caller_context_t *ct)
4909 {
4910         if (curproc->p_zone != VTOSMI(vp)->smi_zone_ref.zref_zone)
4911                 return (EIO);
4912 
4913         if (VTOSMI(vp)->smi_flags & SMI_LLOCK)
4914                 return (fs_shrlock(vp, cmd, shr, flag, cr, ct));
4915         else
4916                 return (ENOSYS);
4917 }
4918 
4919 
4920 /*
4921  * Most unimplemented ops will return ENOSYS because of fs_nosys().
4922  * The only ops where that won't work are ACCESS (due to open(2)
4923  * failures) and ... (anything else left?)
4924  */
4925 const fs_operation_def_t smbfs_vnodeops_template[] = {
4926         VOPNAME_OPEN,           { .vop_open = smbfs_open },
4927         VOPNAME_CLOSE,          { .vop_close = smbfs_close },
4928         VOPNAME_READ,           { .vop_read = smbfs_read },
4929         VOPNAME_WRITE,          { .vop_write = smbfs_write },
4930         VOPNAME_IOCTL,          { .vop_ioctl = smbfs_ioctl },
4931         VOPNAME_GETATTR,        { .vop_getattr = smbfs_getattr },
4932         VOPNAME_SETATTR,        { .vop_setattr = smbfs_setattr },
4933         VOPNAME_ACCESS,         { .vop_access = smbfs_access },
4934         VOPNAME_LOOKUP,         { .vop_lookup = smbfs_lookup },
4935         VOPNAME_CREATE,         { .vop_create = smbfs_create },
4936         VOPNAME_REMOVE,         { .vop_remove = smbfs_remove },
4937         VOPNAME_LINK,           { .vop_link = smbfs_link },
4938         VOPNAME_RENAME,         { .vop_rename = smbfs_rename },
4939         VOPNAME_MKDIR,          { .vop_mkdir = smbfs_mkdir },
4940         VOPNAME_RMDIR,          { .vop_rmdir = smbfs_rmdir },
4941         VOPNAME_READDIR,        { .vop_readdir = smbfs_readdir },
4942         VOPNAME_SYMLINK,        { .vop_symlink = smbfs_symlink },
4943         VOPNAME_READLINK,       { .vop_readlink = smbfs_readlink },
4944         VOPNAME_FSYNC,          { .vop_fsync = smbfs_fsync },
4945         VOPNAME_INACTIVE,       { .vop_inactive = smbfs_inactive },
4946         VOPNAME_FID,            { .vop_fid = smbfs_fid },
4947         VOPNAME_RWLOCK,         { .vop_rwlock = smbfs_rwlock },
4948         VOPNAME_RWUNLOCK,       { .vop_rwunlock = smbfs_rwunlock },
4949         VOPNAME_SEEK,           { .vop_seek = smbfs_seek },
4950         VOPNAME_FRLOCK,         { .vop_frlock = smbfs_frlock },
4951         VOPNAME_SPACE,          { .vop_space = smbfs_space },
4952         VOPNAME_REALVP,         { .vop_realvp = smbfs_realvp },
4953         VOPNAME_GETPAGE,        { .vop_getpage = smbfs_getpage },
4954         VOPNAME_PUTPAGE,        { .vop_putpage = smbfs_putpage },
4955         VOPNAME_MAP,            { .vop_map = smbfs_map },
4956         VOPNAME_ADDMAP,         { .vop_addmap = smbfs_addmap },
4957         VOPNAME_DELMAP,         { .vop_delmap = smbfs_delmap },
4958         VOPNAME_DUMP,           { .error = fs_nosys }, /* smbfs_dump, */
4959         VOPNAME_PATHCONF,       { .vop_pathconf = smbfs_pathconf },
4960         VOPNAME_PAGEIO,         { .error = fs_nosys }, /* smbfs_pageio, */
4961         VOPNAME_SETSECATTR,     { .vop_setsecattr = smbfs_setsecattr },
4962         VOPNAME_GETSECATTR,     { .vop_getsecattr = smbfs_getsecattr },
4963         VOPNAME_SHRLOCK,        { .vop_shrlock = smbfs_shrlock },
4964 #ifdef  SMBFS_VNEVENT
4965         VOPNAME_VNEVENT,        { .vop_vnevent = fs_vnevent_support },
4966 #endif
4967         { NULL, NULL }
4968 };