1 /*
   2  * Copyright (c) 2000-2001 Boris Popov
   3  * All rights reserved.
   4  *
   5  * Redistribution and use in source and binary forms, with or without
   6  * modification, are permitted provided that the following conditions
   7  * are met:
   8  * 1. Redistributions of source code must retain the above copyright
   9  *    notice, this list of conditions and the following disclaimer.
  10  * 2. Redistributions in binary form must reproduce the above copyright
  11  *    notice, this list of conditions and the following disclaimer in the
  12  *    documentation and/or other materials provided with the distribution.
  13  * 3. All advertising materials mentioning features or use of this software
  14  *    must display the following acknowledgement:
  15  *    This product includes software developed by Boris Popov.
  16  * 4. Neither the name of the author nor the names of any co-contributors
  17  *    may be used to endorse or promote products derived from this software
  18  *    without specific prior written permission.
  19  *
  20  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  30  * SUCH DAMAGE.
  31  *
  32  * $Id: smbfs_vnops.c,v 1.128.36.1 2005/05/27 02:35:28 lindak Exp $
  33  */
  34 
  35 /*
  36  * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
  37  */
  38 
  39 #include <sys/systm.h>
  40 #include <sys/cred.h>
  41 #include <sys/vnode.h>
  42 #include <sys/vfs.h>
  43 #include <sys/filio.h>
  44 #include <sys/uio.h>
  45 #include <sys/dirent.h>
  46 #include <sys/errno.h>
  47 #include <sys/sunddi.h>
  48 #include <sys/sysmacros.h>
  49 #include <sys/kmem.h>
  50 #include <sys/cmn_err.h>
  51 #include <sys/vfs_opreg.h>
  52 #include <sys/policy.h>
  53 
  54 #include <sys/param.h>
  55 #include <sys/vm.h>
  56 #include <vm/seg_vn.h>
  57 #include <vm/pvn.h>
  58 #include <vm/as.h>
  59 #include <vm/hat.h>
  60 #include <vm/page.h>
  61 #include <vm/seg.h>
  62 #include <vm/seg_map.h>
  63 #include <vm/seg_kmem.h>
  64 #include <vm/seg_kpm.h>
  65 
  66 #include <netsmb/smb_osdep.h>
  67 #include <netsmb/smb.h>
  68 #include <netsmb/smb_conn.h>
  69 #include <netsmb/smb_subr.h>
  70 
  71 #include <smbfs/smbfs.h>
  72 #include <smbfs/smbfs_node.h>
  73 #include <smbfs/smbfs_subr.h>
  74 
  75 #include <sys/fs/smbfs_ioctl.h>
  76 #include <fs/fs_subr.h>
  77 
  78 /*
  79  * We assign directory offsets like the NFS client, where the
  80  * offset increments by _one_ after each directory entry.
  81  * Further, the entries "." and ".." are always at offsets
  82  * zero and one (respectively) and the "real" entries from
  83  * the server appear at offsets starting with two.  This
  84  * macro is used to initialize the n_dirofs field after
  85  * setting n_dirseq with a _findopen call.
  86  */
  87 #define FIRST_DIROFS    2
  88 
  89 /*
  90  * These characters are illegal in NTFS file names.
  91  * ref: http://support.microsoft.com/kb/147438
  92  *
  93  * Careful!  The check in the XATTR case skips the
  94  * first character to allow colon in XATTR names.
  95  */
  96 static const char illegal_chars[] = {
  97         ':',    /* colon - keep this first! */
  98         '\\',   /* back slash */
  99         '/',    /* slash */
 100         '*',    /* asterisk */
 101         '?',    /* question mark */
 102         '"',    /* double quote */
 103         '<', /* less than sign */
 104         '>', /* greater than sign */
 105         '|',    /* vertical bar */
 106         0
 107 };
 108 
 109 /*
 110  * Turning this on causes nodes to be created in the cache
 111  * during directory listings, normally avoiding a second
 112  * OtW attribute fetch just after a readdir.
 113  */
 114 int smbfs_fastlookup = 1;
 115 
 116 /* local static function defines */
 117 
 118 static int      smbfslookup_cache(vnode_t *, char *, int, vnode_t **,
 119                         cred_t *);
 120 static int      smbfslookup(vnode_t *dvp, char *nm, vnode_t **vpp, cred_t *cr,
 121                         int cache_ok, caller_context_t *);
 122 static int      smbfsrename(vnode_t *odvp, char *onm, vnode_t *ndvp, char *nnm,
 123                         cred_t *cr, caller_context_t *);
 124 static int      smbfssetattr(vnode_t *, struct vattr *, int, cred_t *);
 125 static int      smbfs_accessx(void *, int, cred_t *);
 126 static int      smbfs_readvdir(vnode_t *vp, uio_t *uio, cred_t *cr, int *eofp,
 127                         caller_context_t *);
 128 static void     smbfs_rele_fid(smbnode_t *, struct smb_cred *);
 129 
 130 /*
 131  * These are the vnode ops routines which implement the vnode interface to
 132  * the networked file system.  These routines just take their parameters,
 133  * make them look networkish by putting the right info into interface structs,
 134  * and then calling the appropriate remote routine(s) to do the work.
 135  *
 136  * Note on directory name lookup cacheing:  If we detect a stale fhandle,
 137  * we purge the directory cache relative to that vnode.  This way, the
 138  * user won't get burned by the cache repeatedly.  See <smbfs/smbnode.h> for
 139  * more details on smbnode locking.
 140  */
 141 
 142 static int      smbfs_open(vnode_t **, int, cred_t *, caller_context_t *);
 143 static int      smbfs_close(vnode_t *, int, int, offset_t, cred_t *,
 144                         caller_context_t *);
 145 static int      smbfs_read(vnode_t *, struct uio *, int, cred_t *,
 146                         caller_context_t *);
 147 static int      smbfs_write(vnode_t *, struct uio *, int, cred_t *,
 148                         caller_context_t *);
 149 static int      smbfs_ioctl(vnode_t *, int, intptr_t, int, cred_t *, int *,
 150                         caller_context_t *);
 151 static int      smbfs_getattr(vnode_t *, struct vattr *, int, cred_t *,
 152                         caller_context_t *);
 153 static int      smbfs_setattr(vnode_t *, struct vattr *, int, cred_t *,
 154                         caller_context_t *);
 155 static int      smbfs_access(vnode_t *, int, int, cred_t *, caller_context_t *);
 156 static int      smbfs_fsync(vnode_t *, int, cred_t *, caller_context_t *);
 157 static void     smbfs_inactive(vnode_t *, cred_t *, caller_context_t *);
 158 static int      smbfs_lookup(vnode_t *, char *, vnode_t **, struct pathname *,
 159                         int, vnode_t *, cred_t *, caller_context_t *,
 160                         int *, pathname_t *);
 161 static int      smbfs_create(vnode_t *, char *, struct vattr *, enum vcexcl,
 162                         int, vnode_t **, cred_t *, int, caller_context_t *,
 163                         vsecattr_t *);
 164 static int      smbfs_remove(vnode_t *, char *, cred_t *, caller_context_t *,
 165                         int);
 166 static int      smbfs_rename(vnode_t *, char *, vnode_t *, char *, cred_t *,
 167                         caller_context_t *, int);
 168 static int      smbfs_mkdir(vnode_t *, char *, struct vattr *, vnode_t **,
 169                         cred_t *, caller_context_t *, int, vsecattr_t *);
 170 static int      smbfs_rmdir(vnode_t *, char *, vnode_t *, cred_t *,
 171                         caller_context_t *, int);
 172 static int      smbfs_readdir(vnode_t *, struct uio *, cred_t *, int *,
 173                         caller_context_t *, int);
 174 static int      smbfs_rwlock(vnode_t *, int, caller_context_t *);
 175 static void     smbfs_rwunlock(vnode_t *, int, caller_context_t *);
 176 static int      smbfs_seek(vnode_t *, offset_t, offset_t *, caller_context_t *);
 177 static int      smbfs_frlock(vnode_t *, int, struct flock64 *, int, offset_t,
 178                         struct flk_callback *, cred_t *, caller_context_t *);
 179 static int      smbfs_space(vnode_t *, int, struct flock64 *, int, offset_t,
 180                         cred_t *, caller_context_t *);
 181 static int      smbfs_pathconf(vnode_t *, int, ulong_t *, cred_t *,
 182                         caller_context_t *);
 183 static int      smbfs_setsecattr(vnode_t *, vsecattr_t *, int, cred_t *,
 184                         caller_context_t *);
 185 static int      smbfs_getsecattr(vnode_t *, vsecattr_t *, int, cred_t *,
 186                         caller_context_t *);
 187 static int      smbfs_shrlock(vnode_t *, int, struct shrlock *, int, cred_t *,
 188                         caller_context_t *);
 189 
 190 static int uio_page_mapin(uio_t *uiop, page_t *pp);
 191 
 192 static void uio_page_mapout(uio_t *uiop, page_t *pp);
 193 
 194 static int smbfs_map(vnode_t *vp, offset_t off, struct as *as, caddr_t *addrp,
 195         size_t len, uchar_t prot, uchar_t maxprot, uint_t flags, cred_t *cr,
 196         caller_context_t *ct);
 197 
 198 static int smbfs_addmap(vnode_t *vp, offset_t off, struct as *as, caddr_t addr,
 199         size_t len, uchar_t prot, uchar_t maxprot, uint_t flags, cred_t *cr,
 200         caller_context_t *ct);
 201 
 202 static int smbfs_delmap(vnode_t *vp, offset_t off, struct as *as, caddr_t addr,
 203         size_t len, uint_t prot, uint_t maxprot, uint_t flags, cred_t *cr,
 204         caller_context_t *ct);
 205 
 206 static int smbfs_putpage(vnode_t *vp, offset_t off, size_t len, int flags,
 207         cred_t *cr, caller_context_t *ct);
 208 
 209 static int smbfs_putapage(vnode_t *vp, page_t *pp, u_offset_t *offp, size_t *lenp,
 210         int flags, cred_t *cr);
 211 
 212 static int smbfs_getpage(vnode_t *vp, offset_t off, size_t len, uint_t *protp,
 213         page_t *pl[], size_t plsz, struct seg *seg, caddr_t addr,
 214         enum seg_rw rw, cred_t *cr, caller_context_t *ct);
 215 
 216 static int smbfs_getapage(vnode_t *vp, u_offset_t off, size_t len,
 217         uint_t *protp, page_t *pl[], size_t plsz, struct seg *seg, caddr_t addr,
 218         enum seg_rw rw, cred_t *cr);
 219 
 220 static int writenp(smbnode_t *np, caddr_t base, int tcount, struct uio *uiop, int pgcreated);
 221 
 222 /* Dummy function to use until correct function is ported in */
 223 int noop_vnodeop() {
 224         return (0);
 225 }
 226 
 227 struct vnodeops *smbfs_vnodeops = NULL;
 228 
 229 /*
 230  * Most unimplemented ops will return ENOSYS because of fs_nosys().
 231  * The only ops where that won't work are ACCESS (due to open(2)
 232  * failures) and ... (anything else left?)
 233  */
 234 const fs_operation_def_t smbfs_vnodeops_template[] = {
 235         { VOPNAME_OPEN,         { .vop_open = smbfs_open } },
 236         { VOPNAME_CLOSE,        { .vop_close = smbfs_close } },
 237         { VOPNAME_READ,         { .vop_read = smbfs_read } },
 238         { VOPNAME_WRITE,        { .vop_write = smbfs_write } },
 239         { VOPNAME_IOCTL,        { .vop_ioctl = smbfs_ioctl } },
 240         { VOPNAME_GETATTR,      { .vop_getattr = smbfs_getattr } },
 241         { VOPNAME_SETATTR,      { .vop_setattr = smbfs_setattr } },
 242         { VOPNAME_ACCESS,       { .vop_access = smbfs_access } },
 243         { VOPNAME_LOOKUP,       { .vop_lookup = smbfs_lookup } },
 244         { VOPNAME_CREATE,       { .vop_create = smbfs_create } },
 245         { VOPNAME_REMOVE,       { .vop_remove = smbfs_remove } },
 246         { VOPNAME_LINK,         { .error = fs_nosys } }, /* smbfs_link, */
 247         { VOPNAME_RENAME,       { .vop_rename = smbfs_rename } },
 248         { VOPNAME_MKDIR,        { .vop_mkdir = smbfs_mkdir } },
 249         { VOPNAME_RMDIR,        { .vop_rmdir = smbfs_rmdir } },
 250         { VOPNAME_READDIR,      { .vop_readdir = smbfs_readdir } },
 251         { VOPNAME_SYMLINK,      { .error = fs_nosys } }, /* smbfs_symlink, */
 252         { VOPNAME_READLINK,     { .error = fs_nosys } }, /* smbfs_readlink, */
 253         { VOPNAME_FSYNC,        { .vop_fsync = smbfs_fsync } },
 254         { VOPNAME_INACTIVE,     { .vop_inactive = smbfs_inactive } },
 255         { VOPNAME_FID,          { .error = fs_nosys } }, /* smbfs_fid, */
 256         { VOPNAME_RWLOCK,       { .vop_rwlock = smbfs_rwlock } },
 257         { VOPNAME_RWUNLOCK,     { .vop_rwunlock = smbfs_rwunlock } },
 258         { VOPNAME_SEEK,         { .vop_seek = smbfs_seek } },
 259         { VOPNAME_FRLOCK,       { .vop_frlock = smbfs_frlock } },
 260         { VOPNAME_SPACE,        { .vop_space = smbfs_space } },
 261         { VOPNAME_REALVP,       { .error = fs_nosys } }, /* smbfs_realvp, */
 262         { VOPNAME_GETPAGE,      { .vop_getpage = smbfs_getpage } }, /* smbfs_getpage, */
 263         { VOPNAME_PUTPAGE,      { .vop_putpage = smbfs_putpage } }, /* smbfs_putpage, */
 264         { VOPNAME_MAP,          { .vop_map = smbfs_map } }, /* smbfs_map, */
 265         { VOPNAME_ADDMAP,       { .vop_addmap = smbfs_addmap } }, /* smbfs_addmap, */
 266         { VOPNAME_DELMAP,       { .vop_delmap = smbfs_delmap } }, /* smbfs_delmap, */
 267         { VOPNAME_DISPOSE,      { .vop_dispose = fs_dispose}},
 268         { VOPNAME_DUMP,         { .error = fs_nosys } }, /* smbfs_dump, */
 269         { VOPNAME_PATHCONF,     { .vop_pathconf = smbfs_pathconf } },
 270         { VOPNAME_PAGEIO,       { .error = fs_nosys } }, /* smbfs_pageio, */
 271         { VOPNAME_SETSECATTR,   { .vop_setsecattr = smbfs_setsecattr } },
 272         { VOPNAME_GETSECATTR,   { .vop_getsecattr = smbfs_getsecattr } },
 273         { VOPNAME_SHRLOCK,      { .vop_shrlock = smbfs_shrlock } },
 274         { NULL, NULL }
 275 };
 276 
 277 /*
 278  * XXX
 279  * When new and relevant functionality is enabled, we should be
 280  * calling vfs_set_feature() to inform callers that pieces of
 281  * functionality are available, per PSARC 2007/227.
 282  */
 283 /* ARGSUSED */
 284 static int
 285 smbfs_open(vnode_t **vpp, int flag, cred_t *cr, caller_context_t *ct)
 286 {
 287         smbnode_t       *np;
 288         vnode_t         *vp;
 289         smbfattr_t      fa;
 290         u_int32_t       rights, rightsrcvd;
 291         u_int16_t       fid, oldfid;
 292         int             oldgenid;
 293         struct smb_cred scred;
 294         smbmntinfo_t    *smi;
 295         smb_share_t     *ssp;
 296         cred_t          *oldcr;
 297         int             tmperror;
 298         int             error = 0;
 299 
 300         vp = *vpp;
 301         np = VTOSMB(vp);
 302         smi = VTOSMI(vp);
 303         ssp = smi->smi_share;
 304 
 305         if (curproc->p_zone != smi->smi_zone_ref.zref_zone)
 306                 return (EIO);
 307 
 308         if (smi->smi_flags & SMI_DEAD || vp->v_vfsp->vfs_flag & VFS_UNMOUNTED)
 309                 return (EIO);
 310 
 311         if (vp->v_type != VREG && vp->v_type != VDIR) { /* XXX VLNK? */
 312                 SMBVDEBUG("open eacces vtype=%d\n", vp->v_type);
 313                 return (EACCES);
 314         }
 315 
 316         /*
 317          * Get exclusive access to n_fid and related stuff.
 318          * No returns after this until out.
 319          */
 320         if (smbfs_rw_enter_sig(&np->r_lkserlock, RW_WRITER, SMBINTR(vp)))
 321                 return (EINTR);
 322         smb_credinit(&scred, cr);
 323 
 324         /*
 325          * Keep track of the vnode type at first open.
 326          * It may change later, and we need close to do
 327          * cleanup for the type we opened.  Also deny
 328          * open of new types until old type is closed.
 329          * XXX: Per-open instance nodes whould help.
 330          */
 331         if (np->n_ovtype == VNON) {
 332                 ASSERT(np->n_dirrefs == 0);
 333                 ASSERT(np->n_fidrefs == 0);
 334         } else if (np->n_ovtype != vp->v_type) {
 335                 SMBVDEBUG("open n_ovtype=%d v_type=%d\n",
 336                     np->n_ovtype, vp->v_type);
 337                 error = EACCES;
 338                 goto out;
 339         }
 340 
 341         /*
 342          * Directory open.  See smbfs_readvdir()
 343          */
 344         if (vp->v_type == VDIR) {
 345                 if (np->n_dirseq == NULL) {
 346                         /* first open */
 347                         error = smbfs_smb_findopen(np, "*", 1,
 348                             SMB_FA_SYSTEM | SMB_FA_HIDDEN | SMB_FA_DIR,
 349                             &scred, &np->n_dirseq);
 350                         if (error != 0)
 351                                 goto out;
 352                 }
 353                 np->n_dirofs = FIRST_DIROFS;
 354                 np->n_dirrefs++;
 355                 goto have_fid;
 356         }
 357 
 358         /*
 359          * If caller specified O_TRUNC/FTRUNC, then be sure to set
 360          * FWRITE (to drive successful setattr(size=0) after open)
 361          */
 362         if (flag & FTRUNC)
 363                 flag |= FWRITE;
 364 
 365         /*
 366          * If we already have it open, and the FID is still valid,
 367          * check whether the rights are sufficient for FID reuse.
 368          */
 369         if (np->n_fidrefs > 0 &&
 370             np->n_vcgenid == ssp->ss_vcgenid) {
 371                 int upgrade = 0;
 372 
 373                 if ((flag & FWRITE) &&
 374                     !(np->n_rights & SA_RIGHT_FILE_WRITE_DATA))
 375                         upgrade = 1;
 376                 if ((flag & FREAD) &&
 377                     !(np->n_rights & SA_RIGHT_FILE_READ_DATA))
 378                         upgrade = 1;
 379                 if (!upgrade) {
 380                         /*
 381                          *  the existing open is good enough
 382                          */
 383                         np->n_fidrefs++;
 384                         goto have_fid;
 385                 }
 386         }
 387         rights = np->n_fidrefs ? np->n_rights : 0;
 388 
 389         /*
 390          * we always ask for READ_CONTROL so we can always get the
 391          * owner/group IDs to satisfy a stat.  Ditto attributes.
 392          */
 393         rights |= (STD_RIGHT_READ_CONTROL_ACCESS |
 394             SA_RIGHT_FILE_READ_ATTRIBUTES);
 395         if ((flag & FREAD))
 396                 rights |= SA_RIGHT_FILE_READ_DATA;
 397         if ((flag & FWRITE))
 398                 rights |= SA_RIGHT_FILE_WRITE_DATA |
 399                     SA_RIGHT_FILE_APPEND_DATA |
 400                     SA_RIGHT_FILE_WRITE_ATTRIBUTES;
 401 
 402         bzero(&fa, sizeof (fa));
 403         error = smbfs_smb_open(np,
 404             NULL, 0, 0, /* name nmlen xattr */
 405             rights, &scred,
 406             &fid, &rightsrcvd, &fa);
 407         if (error)
 408                 goto out;
 409         smbfs_attrcache_fa(vp, &fa);
 410 
 411         /*
 412          * We have a new FID and access rights.
 413          */
 414         oldfid = np->n_fid;
 415         oldgenid = np->n_vcgenid;
 416         np->n_fid = fid;
 417         np->n_vcgenid = ssp->ss_vcgenid;
 418         np->n_rights = rightsrcvd;
 419         np->n_fidrefs++;
 420         if (np->n_fidrefs > 1 &&
 421             oldgenid == ssp->ss_vcgenid) {
 422                 /*
 423                  * We already had it open (presumably because
 424                  * it was open with insufficient rights.)
 425                  * Close old wire-open.
 426                  */
 427                 tmperror = smbfs_smb_close(ssp,
 428                     oldfid, NULL, &scred);
 429                 if (tmperror)
 430                         SMBVDEBUG("error %d closing %s\n",
 431                             tmperror, np->n_rpath);
 432         }
 433 
 434         /*
 435          * This thread did the open.
 436          * Save our credentials too.
 437          */
 438         mutex_enter(&np->r_statelock);
 439         oldcr = np->r_cred;
 440         np->r_cred = cr;
 441         crhold(cr);
 442         if (oldcr)
 443                 crfree(oldcr);
 444         mutex_exit(&np->r_statelock);
 445 
 446 have_fid:
 447         /*
 448          * Keep track of the vnode type at first open.
 449          * (see comments above)
 450          */
 451         if (np->n_ovtype == VNON)
 452                 np->n_ovtype = vp->v_type;
 453 
 454 out:
 455         smb_credrele(&scred);
 456         smbfs_rw_exit(&np->r_lkserlock);
 457         return (error);
 458 }
 459 
 460 /*ARGSUSED*/
 461 static int
 462 smbfs_close(vnode_t *vp, int flag, int count, offset_t offset, cred_t *cr,
 463         caller_context_t *ct)
 464 {
 465         smbnode_t       *np;
 466         smbmntinfo_t    *smi;
 467         struct smb_cred scred;
 468 
 469         np = VTOSMB(vp);
 470         smi = VTOSMI(vp);
 471 
 472         /*
 473          * Don't "bail out" for VFS_UNMOUNTED here,
 474          * as we want to do cleanup, etc.
 475          */
 476 
 477         /*
 478          * zone_enter(2) prevents processes from changing zones with SMBFS files
 479          * open; if we happen to get here from the wrong zone we can't do
 480          * anything over the wire.
 481          */
 482         if (smi->smi_zone_ref.zref_zone != curproc->p_zone) {
 483                 /*
 484                  * We could attempt to clean up locks, except we're sure
 485                  * that the current process didn't acquire any locks on
 486                  * the file: any attempt to lock a file belong to another zone
 487                  * will fail, and one can't lock an SMBFS file and then change
 488                  * zones, as that fails too.
 489                  *
 490                  * Returning an error here is the sane thing to do.  A
 491                  * subsequent call to VN_RELE() which translates to a
 492                  * smbfs_inactive() will clean up state: if the zone of the
 493                  * vnode's origin is still alive and kicking, an async worker
 494                  * thread will handle the request (from the correct zone), and
 495                  * everything (minus the final smbfs_getattr_otw() call) should
 496                  * be OK. If the zone is going away smbfs_async_inactive() will
 497                  * throw away cached pages inline.
 498                  */
 499                 return (EIO);
 500         }
 501 
 502         /*
 503          * If we are using local locking for this filesystem, then
 504          * release all of the SYSV style record locks.  Otherwise,
 505          * we are doing network locking and we need to release all
 506          * of the network locks.  All of the locks held by this
 507          * process on this file are released no matter what the
 508          * incoming reference count is.
 509          */
 510         if (smi->smi_flags & SMI_LLOCK) {
 511                 pid_t pid = ddi_get_pid();
 512                 cleanlocks(vp, pid, 0);
 513                 cleanshares(vp, pid);
 514         }
 515 
 516         /*
 517          * This (passed in) count is the ref. count from the
 518          * user's file_t before the closef call (fio.c).
 519          * We only care when the reference goes away.
 520          */
 521         if (count > 1)
 522                 return (0);
 523 
 524         /*
 525          * Decrement the reference count for the FID
 526          * and possibly do the OtW close.
 527          *
 528          * Exclusive lock for modifying n_fid stuff.
 529          * Don't want this one ever interruptible.
 530          */
 531         (void) smbfs_rw_enter_sig(&np->r_lkserlock, RW_WRITER, 0);
 532         smb_credinit(&scred, cr);
 533 
 534         /*
 535          * If FID ref. count is 1 and count of mmaped pages isn't 0,
 536          * we won't call smbfs_rele_fid(), because it will result in the otW close.
 537          * The count of mapped pages isn't 0, which means the mapped pages
 538          * possibly will be accessed after close(), we should keep the FID valid,
 539          * i.e., dont do the otW close.
 540          * Dont worry that FID will be leaked, because when the
 541          * vnode's count becomes 0, smbfs_inactive() will
 542          * help us release FID and eventually do the otW close.
 543          */
 544         if (np->n_fidrefs > 1) {
 545                 smbfs_rele_fid(np, &scred);
 546         } else if (np->r_mapcnt == 0) {
 547                 /*
 548                  * Before otW close, make sure dirty pages written back.
 549                  */
 550                 if ((flag & FWRITE) && vn_has_cached_data(vp)) {
 551                         /* smbfs_putapage() will acquire shared lock, so release
 552                          * exclusive lock temporally.
 553                          */
 554                         smbfs_rw_exit(&np->r_lkserlock);
 555 
 556                         (void) smbfs_putpage(vp, (offset_t) 0, 0, B_INVAL | B_ASYNC, cr, ct);
 557 
 558                         /* acquire exclusive lock again. */
 559                         (void) smbfs_rw_enter_sig(&np->r_lkserlock, RW_WRITER, 0);
 560                 }
 561                 smbfs_rele_fid(np, &scred);
 562         }
 563 
 564         smb_credrele(&scred);
 565         smbfs_rw_exit(&np->r_lkserlock);
 566 
 567         return (0);
 568 }
 569 
 570 /*
 571  * Helper for smbfs_close.  Decrement the reference count
 572  * for an SMB-level file or directory ID, and when the last
 573  * reference for the fid goes away, do the OtW close.
 574  * Also called in smbfs_inactive (defensive cleanup).
 575  */
 576 static void
 577 smbfs_rele_fid(smbnode_t *np, struct smb_cred *scred)
 578 {
 579         smb_share_t     *ssp;
 580         cred_t          *oldcr;
 581         struct smbfs_fctx *fctx;
 582         int             error;
 583         uint16_t ofid;
 584 
 585         ssp = np->n_mount->smi_share;
 586         error = 0;
 587 
 588         /* Make sure we serialize for n_dirseq use. */
 589         ASSERT(smbfs_rw_lock_held(&np->r_lkserlock, RW_WRITER));
 590 
 591         /*
 592          * Note that vp->v_type may change if a remote node
 593          * is deleted and recreated as a different type, and
 594          * our getattr may change v_type accordingly.
 595          * Now use n_ovtype to keep track of the v_type
 596          * we had during open (see comments above).
 597          */
 598         switch (np->n_ovtype) {
 599         case VDIR:
 600                 ASSERT(np->n_dirrefs > 0);
 601                 if (--np->n_dirrefs)
 602                         return;
 603                 if ((fctx = np->n_dirseq) != NULL) {
 604                         np->n_dirseq = NULL;
 605                         np->n_dirofs = 0;
 606                         error = smbfs_smb_findclose(fctx, scred);
 607                 }
 608                 break;
 609 
 610         case VREG:
 611                 ASSERT(np->n_fidrefs > 0);
 612                 if (--np->n_fidrefs)
 613                         return;
 614                 if ((ofid = np->n_fid) != SMB_FID_UNUSED) {
 615                         np->n_fid = SMB_FID_UNUSED;
 616                         /* After reconnect, n_fid is invalid */
 617                         if (np->n_vcgenid == ssp->ss_vcgenid) {
 618                                 error = smbfs_smb_close(
 619                                     ssp, ofid, NULL, scred);
 620                         }
 621                 }
 622                 break;
 623 
 624         default:
 625                 SMBVDEBUG("bad n_ovtype %d\n", np->n_ovtype);
 626                 break;
 627         }
 628         if (error) {
 629                 SMBVDEBUG("error %d closing %s\n",
 630                     error, np->n_rpath);
 631         }
 632 
 633         /* Allow next open to use any v_type. */
 634         np->n_ovtype = VNON;
 635 
 636         /*
 637          * Other "last close" stuff.
 638          */
 639         mutex_enter(&np->r_statelock);
 640         if (np->n_flag & NATTRCHANGED)
 641                 smbfs_attrcache_rm_locked(np);
 642         oldcr = np->r_cred;
 643         np->r_cred = NULL;
 644         mutex_exit(&np->r_statelock);
 645         if (oldcr != NULL)
 646                 crfree(oldcr);
 647 }
 648 
 649 /* ARGSUSED */
 650 static int
 651 smbfs_read(vnode_t * vp, struct uio * uiop, int ioflag, cred_t * cr,
 652            caller_context_t * ct)
 653 {
 654         struct smb_cred scred;
 655         struct vattr    va;
 656         smbnode_t      *np;
 657         smbmntinfo_t   *smi;
 658         smb_share_t    *ssp;
 659         offset_t        endoff;
 660         ssize_t         past_eof;
 661         int             error;
 662 
 663         caddr_t         base;
 664         u_offset_t      blk;
 665         u_offset_t      boff;
 666         size_t          blen;
 667         uint_t          flags;
 668 
 669         np = VTOSMB(vp);
 670         smi = VTOSMI(vp);
 671         ssp = smi->smi_share;
 672 
 673         if (curproc->p_zone != smi->smi_zone_ref.zref_zone)
 674                 return (EIO);
 675 
 676         if (smi->smi_flags & SMI_DEAD || vp->v_vfsp->vfs_flag & VFS_UNMOUNTED)
 677                 return (EIO);
 678 
 679         ASSERT(smbfs_rw_lock_held(&np->r_rwlock, RW_READER));
 680 
 681         if (vp->v_type != VREG)
 682                 return (EISDIR);
 683 
 684         if (uiop->uio_resid == 0)
 685                 return (0);
 686 
 687         /*
 688          * Like NFS3, just check for 63-bit overflow. Our SMB layer takes
 689          * care to return EFBIG when it has to fallback to a 32-bit call.
 690          */
 691         endoff = uiop->uio_loffset + uiop->uio_resid;
 692         if (uiop->uio_loffset < 0 || endoff < 0)
 693                 return (EINVAL);
 694 
 695         /* get vnode attributes from server */
 696         va.va_mask = AT_SIZE | AT_MTIME;
 697         if (error = smbfsgetattr(vp, &va, cr))
 698                 return (error);
 699 
 700         /* Update mtime with mtime from server here? */
 701 
 702         /* if offset is beyond EOF, read nothing */
 703         if (uiop->uio_loffset >= va.va_size)
 704                 return (0);
 705 
 706         /*
 707          * Limit the read to the remaining file size. Do this by temporarily
 708          * reducing uio_resid by the amount the lies beyoned the EOF.
 709          */
 710         if (endoff > va.va_size) {
 711                 past_eof = (ssize_t) (endoff - va.va_size);
 712                 uiop->uio_resid -= past_eof;
 713         } else
 714                 past_eof = 0;
 715 
 716         /* Bypass the VM if vnode is non-cacheable. */
 717         if ((vp->v_flag & VNOCACHE) ||
 718             ((np->r_flags & RDIRECTIO) &&
 719              np->r_mapcnt == 0 &&
 720              !(vn_has_cached_data(vp)))) {
 721 
 722                 /* Shared lock for n_fid use in smb_rwuio */
 723                 if (smbfs_rw_enter_sig(&np->r_lkserlock, RW_READER, SMBINTR(vp)))
 724                         return (EINTR);
 725                 smb_credinit(&scred, cr);
 726 
 727                 /* After reconnect, n_fid is invalid */
 728                 if (np->n_vcgenid != ssp->ss_vcgenid)
 729                         error = ESTALE;
 730                 else
 731                         error = smb_rwuio(ssp, np->n_fid, UIO_READ,
 732                                           uiop, &scred, smb_timo_read);
 733 
 734                 smb_credrele(&scred);
 735                 smbfs_rw_exit(&np->r_lkserlock);
 736 
 737         } else {
 738 
 739                 /* Do I/O through segmap. */
 740                 do {
 741                         blk = uiop->uio_loffset & MAXBMASK;
 742                         boff = uiop->uio_loffset & MAXBOFFSET;
 743                         blen = MIN(MAXBSIZE - boff, uiop->uio_resid);
 744 
 745                         if (vpm_enable) {
 746 
 747                                 error = vpm_data_copy(vp, blk + boff, blen, uiop, 1, NULL, 0, S_READ);
 748 
 749                         } else {
 750 
 751                                 base = segmap_getmapflt(segkmap, vp, blk + boff, blen, 1, S_READ);
 752 
 753                                 error = uiomove(base + boff, blen, UIO_READ, uiop);
 754                         }
 755 
 756                         if (!error) {
 757                                 mutex_enter(&np->r_statelock);
 758                                 if ((blen + boff == MAXBSIZE) || (uiop->uio_loffset == np->r_size)) {
 759                                         flags = SM_DONTNEED;
 760                                 } else {
 761                                         flags = 0;
 762                                 }
 763                                 mutex_exit(&np->r_statelock);
 764                         } else {
 765                                 flags = 0;
 766                         }
 767                         if (vpm_enable) {
 768                                 (void) vpm_sync_pages(vp, blk + boff, blen, flags);
 769                         } else {
 770                                 (void) segmap_release(segkmap, base, flags);
 771                         }
 772                 } while (!error && uiop->uio_resid > 0);
 773         }
 774 
 775         /* undo adjustment of resid */
 776         uiop->uio_resid += past_eof;
 777 
 778         return (error);
 779 }
 780 
 781 /* ARGSUSED */
 782 static int
 783 smbfs_write(vnode_t * vp, struct uio * uiop, int ioflag, cred_t * cr,
 784             caller_context_t * ct)
 785 {
 786         struct smb_cred scred;
 787         struct vattr    va;
 788         smbnode_t      *np;
 789         smbmntinfo_t   *smi;
 790         smb_share_t    *ssp;
 791         offset_t        endoff, limit;
 792         ssize_t         past_limit;
 793         int             error, timo;
 794 
 795         caddr_t         base;
 796         u_offset_t      blk;
 797         u_offset_t      boff;
 798         size_t          blen;
 799         uint_t          flags;
 800 
 801         u_offset_t      last_off;
 802         size_t          last_resid;
 803 
 804         np = VTOSMB(vp);
 805         smi = VTOSMI(vp);
 806         ssp = smi->smi_share;
 807 
 808         if (curproc->p_zone != smi->smi_zone_ref.zref_zone)
 809                 return (EIO);
 810 
 811         if (smi->smi_flags & SMI_DEAD || vp->v_vfsp->vfs_flag & VFS_UNMOUNTED)
 812                 return (EIO);
 813 
 814         ASSERT(smbfs_rw_lock_held(&np->r_rwlock, RW_WRITER));
 815 
 816         if (vp->v_type != VREG)
 817                 return (EISDIR);
 818 
 819         if (uiop->uio_resid == 0)
 820                 return (0);
 821 
 822         /*
 823          * Handle ioflag bits: (FAPPEND|FSYNC|FDSYNC)
 824          */
 825         if (ioflag & (FAPPEND | FSYNC)) {
 826                 if (np->n_flag & NMODIFIED) {
 827                         smbfs_attrcache_remove(np);
 828                         /* XXX: smbfs_vinvalbuf? */
 829                 }
 830         }
 831         if (ioflag & FAPPEND) {
 832                 /*
 833                  * File size can be changed by another client
 834                  */
 835                 va.va_mask = AT_SIZE;
 836                 if (error = smbfsgetattr(vp, &va, cr))
 837                         return (error);
 838                 uiop->uio_loffset = va.va_size;
 839         }
 840         /*
 841          * Like NFS3, just check for 63-bit overflow.
 842          */
 843         endoff = uiop->uio_loffset + uiop->uio_resid;
 844         if (uiop->uio_loffset < 0 || endoff < 0)
 845                 return (EINVAL);
 846 
 847         /*
 848          * Check to make sure that the process will not exceed its limit on
 849          * file size.  It is okay to write up to the limit, but not beyond.
 850          * Thus, the write which reaches the limit will be short and the next
 851          * write will return an error.
 852          * 
 853          * So if we're starting at or beyond the limit, EFBIG. Otherwise,
 854          * temporarily reduce resid to the amount the falls after the limit.
 855          */
 856         limit = uiop->uio_llimit;
 857         if (limit == RLIM64_INFINITY || limit > MAXOFFSET_T)
 858                 limit = MAXOFFSET_T;
 859         if (uiop->uio_loffset >= limit)
 860                 return (EFBIG);
 861         if (endoff > limit) {
 862                 past_limit = (ssize_t) (endoff - limit);
 863                 uiop->uio_resid -= past_limit;
 864         } else
 865                 past_limit = 0;
 866 
 867         /* Bypass the VM if vnode is non-cacheable. */
 868         if ((vp->v_flag & VNOCACHE) ||
 869             ((np->r_flags & RDIRECTIO) &&
 870              np->r_mapcnt == 0 &&
 871              !(vn_has_cached_data(vp)))) {
 872 
 873                 /* Timeout: longer for append. */
 874                 timo = smb_timo_write;
 875                 if (endoff > np->r_size)
 876                         timo = smb_timo_append;
 877 
 878                 /* Shared lock for n_fid use in smb_rwuio */
 879                 if (smbfs_rw_enter_sig(&np->r_lkserlock, RW_READER, SMBINTR(vp)))
 880                         return (EINTR);
 881                 smb_credinit(&scred, cr);
 882 
 883                 /* After reconnect, n_fid is invalid */
 884                 if (np->n_vcgenid != ssp->ss_vcgenid)
 885                         error = ESTALE;
 886                 else
 887                         error = smb_rwuio(ssp, np->n_fid, UIO_WRITE,
 888                                           uiop, &scred, timo);
 889 
 890                 if (error == 0) {
 891                         mutex_enter(&np->r_statelock);
 892                         np->n_flag |= (NFLUSHWIRE | NATTRCHANGED);
 893                         if (uiop->uio_loffset > (offset_t) np->r_size)
 894                                 np->r_size = (len_t) uiop->uio_loffset;
 895                         mutex_exit(&np->r_statelock);
 896                         if (ioflag & (FSYNC | FDSYNC)) {
 897                                 /* Don't error the I/O if this fails. */
 898                                 (void) smbfs_smb_flush(np, &scred);
 899                         }
 900                 }
 901                 smb_credrele(&scred);
 902                 smbfs_rw_exit(&np->r_lkserlock);
 903 
 904         } else {
 905 
 906                 /* Do I/O through segmap. */
 907                 size_t          bsize = vp->v_vfsp->vfs_bsize;
 908 
 909                 do {
 910                         blk = uiop->uio_loffset & MAXBMASK;
 911                         boff = uiop->uio_loffset & MAXBOFFSET;
 912                         blen = MIN(MAXBSIZE - boff, uiop->uio_resid);
 913 
 914                         last_off = uiop->uio_loffset;
 915                         last_resid = uiop->uio_resid;
 916 
 917                         uio_prefaultpages((ssize_t) blen, uiop);
 918 
 919                         if (vpm_enable) {
 920 
 921                                 error = writenp(np, NULL, blen, uiop, 0);
 922 
 923                         } else {
 924 
 925                                 if (segmap_kpm) {
 926                                         u_offset_t      poff = uiop->uio_loffset & PAGEOFFSET;
 927                                         size_t          plen = MIN(PAGESIZE - poff, uiop->uio_resid);
 928 
 929                                         int             pagecreate;
 930 
 931                                         mutex_enter(&np->r_statelock);
 932                                         pagecreate = (poff == 0) &&
 933                                                 ((plen == PAGESIZE) ||
 934                                                  (uiop->uio_loffset + plen >= np->r_size));
 935                                         mutex_exit(&np->r_statelock);
 936 
 937                                         base = segmap_getmapflt(segkmap, vp, blk + boff, plen, !pagecreate, S_WRITE);
 938                                         error = writenp(np, base + poff, blen, uiop, pagecreate);
 939 
 940                                 } else {
 941                                         base = segmap_getmapflt(segkmap, vp, blk + boff, blen, 0, S_READ);
 942                                         error = writenp(np, base + boff, blen, uiop, 0);
 943                                 }
 944                         }
 945 
 946                         if (!error) {
 947                                 if (uiop->uio_loffset % bsize == 0) {
 948                                         flags = SM_WRITE | SM_DONTNEED;
 949                                 } else {
 950                                         flags = 0;
 951                                 }
 952 
 953                                 if (ioflag & (FSYNC | FDSYNC)) {
 954                                         flags &= ~SM_ASYNC;
 955                                         flags |= SM_WRITE;
 956                                 }
 957                                 if (vpm_enable) {
 958                                         error = vpm_sync_pages(vp, blk, blen, flags);
 959                                 } else {
 960                                         error = segmap_release(segkmap, base, flags);
 961                                 }
 962                         } else {
 963                                 if (vpm_enable) {
 964                                         (void) vpm_sync_pages(vp, blk, blen, 0);
 965                                 } else {
 966                                         (void) segmap_release(segkmap, base, 0);
 967                                 }
 968                         }
 969                 } while (!error && uiop->uio_resid > 0);
 970         }
 971 
 972         /* undo adjustment of resid */
 973         if (error) {
 974                 uiop->uio_resid = last_resid + past_limit;
 975                 uiop->uio_loffset = last_off;
 976         } else {
 977                 uiop->uio_resid += past_limit;
 978         }
 979 
 980         return (error);
 981 }
 982 
 983 /* correspond to writerp() in nfs_client.c */
 984 static int
 985 writenp(smbnode_t * np, caddr_t base, int tcount, struct uio * uiop, int pgcreated)
 986 {
 987         int             pagecreate;
 988         int             n;
 989         int             saved_n;
 990         caddr_t         saved_base;
 991         u_offset_t      offset;
 992         int             error;
 993         int             sm_error;
 994 
 995         vnode_t        *vp = SMBTOV(np);
 996 
 997         ASSERT(tcount <= MAXBSIZE && tcount <= uiop->uio_resid);
 998         ASSERT(smbfs_rw_lock_held(&np->r_rwlock, RW_WRITER));
 999         if (!vpm_enable) {
1000                 ASSERT(((uintptr_t) base & MAXBOFFSET) + tcount <= MAXBSIZE);
1001         }
1002         /*
1003          * Move bytes in at most PAGESIZE chunks. We must avoid spanning
1004          * pages in uiomove() because page faults may cause the cache to be
1005          * invalidated out from under us. The r_size is not updated until
1006          * after the uiomove. If we push the last page of a file before
1007          * r_size is correct, we will lose the data written past the current
1008          * (and invalid) r_size.
1009          */
1010         do {
1011                 offset = uiop->uio_loffset;
1012                 pagecreate = 0;
1013 
1014                 /*
1015                  * n is the number of bytes required to satisfy the request
1016                  * or the number of bytes to fill out the page.
1017                  */
1018                 n = (int) MIN((PAGESIZE - (offset & PAGEOFFSET)), tcount);
1019 
1020                 /*
1021                  * Check to see if we can skip reading in the page and just
1022                  * allocate the memory. We can do this if we are going to
1023                  * rewrite the entire mapping or if we are going to write to
1024                  * or beyond the current end of file from the beginning of
1025                  * the mapping.
1026                  * 
1027                  * The read of r_size is now protected by r_statelock.
1028                  */
1029                 mutex_enter(&np->r_statelock);
1030                 /*
1031                  * When pgcreated is nonzero the caller has already done a
1032                  * segmap_getmapflt with forcefault 0 and S_WRITE. With
1033                  * segkpm this means we already have at least one page
1034                  * created and mapped at base.
1035                  */
1036                 pagecreate = pgcreated ||
1037                         ((offset & PAGEOFFSET) == 0 &&
1038                          (n == PAGESIZE || ((offset + n) >= np->r_size)));
1039 
1040                 mutex_exit(&np->r_statelock);
1041 
1042                 if (!vpm_enable && pagecreate) {
1043                         /*
1044                          * The last argument tells segmap_pagecreate() to
1045                          * always lock the page, as opposed to sometimes
1046                          * returning with the page locked. This way we avoid
1047                          * a fault on the ensuing uiomove(), but also more
1048                          * importantly (to fix bug 1094402) we can call
1049                          * segmap_fault() to unlock the page in all cases. An
1050                          * alternative would be to modify segmap_pagecreate()
1051                          * to tell us when it is locking a page, but that's a
1052                          * fairly major interface change.
1053                          */
1054                         if (pgcreated == 0)
1055                                 (void) segmap_pagecreate(segkmap, base,
1056                                                          (uint_t) n, 1);
1057                         saved_base = base;
1058                         saved_n = n;
1059                 }
1060                 /*
1061                  * The number of bytes of data in the last page can not be
1062                  * accurately be determined while page is being uiomove'd to
1063                  * and the size of the file being updated. Thus, inform
1064                  * threads which need to know accurately how much data is in
1065                  * the last page of the file. They will not do the i/o
1066                  * immediately, but will arrange for the i/o to happen later
1067                  * when this modify operation will have finished.
1068                  */
1069                 ASSERT(!(np->r_flags & RMODINPROGRESS));
1070                 mutex_enter(&np->r_statelock);
1071                 np->r_flags |= RMODINPROGRESS;
1072                 np->r_modaddr = (offset & MAXBMASK);
1073                 mutex_exit(&np->r_statelock);
1074 
1075                 if (vpm_enable) {
1076                         /*
1077                          * Copy data. If new pages are created, part of the
1078                          * page that is not written will be initizliazed with
1079                          * zeros.
1080                          */
1081                         error = vpm_data_copy(vp, offset, n, uiop,
1082                                               !pagecreate, NULL, 0, S_WRITE);
1083                 } else {
1084                         error = uiomove(base, n, UIO_WRITE, uiop);
1085                 }
1086 
1087                 /*
1088                  * r_size is the maximum number of bytes known to be in the
1089                  * file. Make sure it is at least as high as the first
1090                  * unwritten byte pointed to by uio_loffset.
1091                  */
1092                 mutex_enter(&np->r_statelock);
1093                 if (np->r_size < uiop->uio_loffset)
1094                         np->r_size = uiop->uio_loffset;
1095                 np->r_flags &= ~RMODINPROGRESS;
1096                 np->r_flags |= RDIRTY;
1097                 mutex_exit(&np->r_statelock);
1098 
1099                 /* n = # of bytes written */
1100                 n = (int) (uiop->uio_loffset - offset);
1101 
1102                 if (!vpm_enable) {
1103                         base += n;
1104                 }
1105                 tcount -= n;
1106                 /*
1107                  * If we created pages w/o initializing them completely, we
1108                  * need to zero the part that wasn't set up. This happens on
1109                  * a most EOF write cases and if we had some sort of error
1110                  * during the uiomove.
1111                  */
1112                 if (!vpm_enable && pagecreate) {
1113                         if ((uiop->uio_loffset & PAGEOFFSET) || n == 0)
1114                                 (void) kzero(base, PAGESIZE - n);
1115 
1116                         if (pgcreated) {
1117                                 /*
1118                                  * Caller is responsible for this page, it
1119                                  * was not created in this loop.
1120                                  */
1121                                 pgcreated = 0;
1122                         } else {
1123                                 /*
1124                                  * For bug 1094402: segmap_pagecreate locks
1125                                  * page. Unlock it. This also unlocks the
1126                                  * pages allocated by page_create_va() in
1127                                  * segmap_pagecreate().
1128                                  */
1129                                 sm_error = segmap_fault(kas.a_hat, segkmap,
1130                                                         saved_base, saved_n,
1131                                                      F_SOFTUNLOCK, S_WRITE);
1132                                 if (error == 0)
1133                                         error = sm_error;
1134                         }
1135                 }
1136         } while (tcount > 0 && error == 0);
1137 
1138         return (error);
1139 }
1140 
1141 /* ARGSUSED */
1142 static int
1143 smbfs_ioctl(vnode_t *vp, int cmd, intptr_t arg, int flag,
1144         cred_t *cr, int *rvalp, caller_context_t *ct)
1145 {
1146         int             error;
1147         smbmntinfo_t    *smi;
1148 
1149         smi = VTOSMI(vp);
1150 
1151         if (curproc->p_zone != smi->smi_zone_ref.zref_zone)
1152                 return (EIO);
1153 
1154         if (smi->smi_flags & SMI_DEAD || vp->v_vfsp->vfs_flag & VFS_UNMOUNTED)
1155                 return (EIO);
1156 
1157         switch (cmd) {
1158                 /* First three from ZFS. XXX - need these? */
1159 
1160         case _FIOFFS:
1161                 error = smbfs_fsync(vp, 0, cr, ct);
1162                 break;
1163 
1164                 /*
1165                  * The following two ioctls are used by bfu.
1166                  * Silently ignore to avoid bfu errors.
1167                  */
1168         case _FIOGDIO:
1169         case _FIOSDIO:
1170                 error = 0;
1171                 break;
1172 
1173 #ifdef NOT_YET  /* XXX - from the NFS code. */
1174         case _FIODIRECTIO:
1175                 error = smbfs_directio(vp, (int)arg, cr);
1176 #endif
1177 
1178                 /*
1179                  * Allow get/set with "raw" security descriptor (SD) data.
1180                  * Useful for testing, diagnosing idmap problems, etc.
1181                  */
1182         case SMBFSIO_GETSD:
1183                 error = smbfs_acl_iocget(vp, arg, flag, cr);
1184                 break;
1185 
1186         case SMBFSIO_SETSD:
1187                 error = smbfs_acl_iocset(vp, arg, flag, cr);
1188                 break;
1189 
1190         default:
1191                 error = ENOTTY;
1192                 break;
1193         }
1194 
1195         return (error);
1196 }
1197 
1198 
1199 /*
1200  * Return either cached or remote attributes. If get remote attr
1201  * use them to check and invalidate caches, then cache the new attributes.
1202  *
1203  * XXX
1204  * This op should eventually support PSARC 2007/315, Extensible Attribute
1205  * Interfaces, for richer metadata.
1206  */
1207 /* ARGSUSED */
1208 static int
1209 smbfs_getattr(vnode_t *vp, struct vattr *vap, int flags, cred_t *cr,
1210         caller_context_t *ct)
1211 {
1212         smbnode_t *np;
1213         smbmntinfo_t *smi;
1214 
1215         smi = VTOSMI(vp);
1216 
1217         if (curproc->p_zone != smi->smi_zone_ref.zref_zone)
1218                 return (EIO);
1219 
1220         if (smi->smi_flags & SMI_DEAD || vp->v_vfsp->vfs_flag & VFS_UNMOUNTED)
1221                 return (EIO);
1222 
1223         /*
1224          * If it has been specified that the return value will
1225          * just be used as a hint, and we are only being asked
1226          * for size, fsid or rdevid, then return the client's
1227          * notion of these values without checking to make sure
1228          * that the attribute cache is up to date.
1229          * The whole point is to avoid an over the wire GETATTR
1230          * call.
1231          */
1232         np = VTOSMB(vp);
1233         if (flags & ATTR_HINT) {
1234                 if (vap->va_mask ==
1235                     (vap->va_mask & (AT_SIZE | AT_FSID | AT_RDEV))) {
1236                         mutex_enter(&np->r_statelock);
1237                         if (vap->va_mask | AT_SIZE)
1238                                 vap->va_size = np->r_size;
1239                         if (vap->va_mask | AT_FSID)
1240                                 vap->va_fsid = vp->v_vfsp->vfs_dev;
1241                         if (vap->va_mask | AT_RDEV)
1242                                 vap->va_rdev = vp->v_rdev;
1243                         mutex_exit(&np->r_statelock);
1244                         return (0);
1245                 }
1246         }
1247 
1248         return (smbfsgetattr(vp, vap, cr));
1249 }
1250 
1251 /* smbfsgetattr() in smbfs_client.c */
1252 
1253 /*
1254  * XXX
1255  * This op should eventually support PSARC 2007/315, Extensible Attribute
1256  * Interfaces, for richer metadata.
1257  */
1258 /*ARGSUSED4*/
1259 static int
1260 smbfs_setattr(vnode_t *vp, struct vattr *vap, int flags, cred_t *cr,
1261                 caller_context_t *ct)
1262 {
1263         vfs_t           *vfsp;
1264         smbmntinfo_t    *smi;
1265         int             error;
1266         uint_t          mask;
1267         struct vattr    oldva;
1268 
1269         vfsp = vp->v_vfsp;
1270         smi = VFTOSMI(vfsp);
1271 
1272         if (curproc->p_zone != smi->smi_zone_ref.zref_zone)
1273                 return (EIO);
1274 
1275         if (smi->smi_flags & SMI_DEAD || vfsp->vfs_flag & VFS_UNMOUNTED)
1276                 return (EIO);
1277 
1278         mask = vap->va_mask;
1279         if (mask & AT_NOSET)
1280                 return (EINVAL);
1281 
1282         if (vfsp->vfs_flag & VFS_RDONLY)
1283                 return (EROFS);
1284 
1285         /*
1286          * This is a _local_ access check so that only the owner of
1287          * this mount can set attributes.  With ACLs enabled, the
1288          * file owner can be different from the mount owner, and we
1289          * need to check the _mount_ owner here.  See _access_rwx
1290          */
1291         bzero(&oldva, sizeof (oldva));
1292         oldva.va_mask = AT_TYPE | AT_MODE;
1293         error = smbfsgetattr(vp, &oldva, cr);
1294         if (error)
1295                 return (error);
1296         oldva.va_mask |= AT_UID | AT_GID;
1297         oldva.va_uid = smi->smi_uid;
1298         oldva.va_gid = smi->smi_gid;
1299 
1300         error = secpolicy_vnode_setattr(cr, vp, vap, &oldva, flags,
1301             smbfs_accessx, vp);
1302         if (error)
1303                 return (error);
1304 
1305         if (mask & (AT_UID | AT_GID)) {
1306                 if (smi->smi_flags & SMI_ACL)
1307                         error = smbfs_acl_setids(vp, vap, cr);
1308                 else
1309                         error = ENOSYS;
1310                 if (error != 0) {
1311                         SMBVDEBUG("error %d seting UID/GID on %s",
1312                             error, VTOSMB(vp)->n_rpath);
1313                         /*
1314                          * It might be more correct to return the
1315                          * error here, but that causes complaints
1316                          * when root extracts a cpio archive, etc.
1317                          * So ignore this error, and go ahead with
1318                          * the rest of the setattr work.
1319                          */
1320                 }
1321         }
1322 
1323         return (smbfssetattr(vp, vap, flags, cr));
1324 }
1325 
1326 /*
1327  * Mostly from Darwin smbfs_setattr()
1328  * but then modified a lot.
1329  */
1330 /* ARGSUSED */
1331 static int
1332 smbfssetattr(vnode_t *vp, struct vattr *vap, int flags, cred_t *cr)
1333 {
1334         int             error = 0;
1335         smbnode_t       *np = VTOSMB(vp);
1336         uint_t          mask = vap->va_mask;
1337         struct timespec *mtime, *atime;
1338         struct smb_cred scred;
1339         int             cerror, modified = 0;
1340         unsigned short  fid;
1341         int have_fid = 0;
1342         uint32_t rights = 0;
1343 
1344         ASSERT(curproc->p_zone == VTOSMI(vp)->smi_zone_ref.zref_zone);
1345 
1346         /*
1347          * There are no settable attributes on the XATTR dir,
1348          * so just silently ignore these.  On XATTR files,
1349          * you can set the size but nothing else.
1350          */
1351         if (vp->v_flag & V_XATTRDIR)
1352                 return (0);
1353         if (np->n_flag & N_XATTR) {
1354                 if (mask & AT_TIMES)
1355                         SMBVDEBUG("ignore set time on xattr\n");
1356                 mask &= AT_SIZE;
1357         }
1358 
1359         /*
1360          * If our caller is trying to set multiple attributes, they
1361          * can make no assumption about what order they are done in.
1362          * Here we try to do them in order of decreasing likelihood
1363          * of failure, just to minimize the chance we'll wind up
1364          * with a partially complete request.
1365          */
1366 
1367         /* Shared lock for (possible) n_fid use. */
1368         if (smbfs_rw_enter_sig(&np->r_lkserlock, RW_READER, SMBINTR(vp)))
1369                 return (EINTR);
1370         smb_credinit(&scred, cr);
1371 
1372         /*
1373          * Will we need an open handle for this setattr?
1374          * If so, what rights will we need?
1375          */
1376         if (mask & (AT_ATIME | AT_MTIME)) {
1377                 rights |=
1378                     SA_RIGHT_FILE_WRITE_ATTRIBUTES;
1379         }
1380         if (mask & AT_SIZE) {
1381                 rights |=
1382                     SA_RIGHT_FILE_WRITE_DATA |
1383                     SA_RIGHT_FILE_APPEND_DATA;
1384         }
1385 
1386         /*
1387          * Only SIZE really requires a handle, but it's
1388          * simpler and more reliable to set via a handle.
1389          * Some servers like NT4 won't set times by path.
1390          * Also, we're usually setting everything anyway.
1391          */
1392         if (mask & (AT_SIZE | AT_ATIME | AT_MTIME)) {
1393                 error = smbfs_smb_tmpopen(np, rights, &scred, &fid);
1394                 if (error) {
1395                         SMBVDEBUG("error %d opening %s\n",
1396                             error, np->n_rpath);
1397                         goto out;
1398                 }
1399                 have_fid = 1;
1400         }
1401 
1402         /*
1403          * If the server supports the UNIX extensions, right here is where
1404          * we'd support changes to uid, gid, mode, and possibly va_flags.
1405          * For now we claim to have made any such changes.
1406          */
1407 
1408         if (mask & AT_SIZE) {
1409                 /*
1410                  * If the new file size is less than what the client sees as
1411                  * the file size, then just change the size and invalidate
1412                  * the pages.
1413                  * I am commenting this code at present because the function
1414                  * smbfs_putapage() is not yet implemented.
1415                  */
1416 
1417                 /*
1418                  * Set the file size to vap->va_size.
1419                  */
1420                 ASSERT(have_fid);
1421                 error = smbfs_smb_setfsize(np, fid, vap->va_size, &scred);
1422                 if (error) {
1423                         SMBVDEBUG("setsize error %d file %s\n",
1424                             error, np->n_rpath);
1425                 } else {
1426                         /*
1427                          * Darwin had code here to zero-extend.
1428                          * Tests indicate the server will zero-fill,
1429                          * so looks like we don't need to do this.
1430                          * Good thing, as this could take forever.
1431                          *
1432                          * XXX: Reportedly, writing one byte of zero
1433                          * at the end offset avoids problems here.
1434                          */
1435                         mutex_enter(&np->r_statelock);
1436                         np->r_size = vap->va_size;
1437                         mutex_exit(&np->r_statelock);
1438                         modified = 1;
1439                 }
1440         }
1441 
1442         /*
1443          * XXX: When Solaris has create_time, set that too.
1444          * Note: create_time is different from ctime.
1445          */
1446         mtime = ((mask & AT_MTIME) ? &vap->va_mtime : 0);
1447         atime = ((mask & AT_ATIME) ? &vap->va_atime : 0);
1448 
1449         if (mtime || atime) {
1450                 /*
1451                  * Always use the handle-based set attr call now.
1452                  * Not trying to set DOS attributes here so pass zero.
1453                  */
1454                 ASSERT(have_fid);
1455                 error = smbfs_smb_setfattr(np, fid,
1456                     0, mtime, atime, &scred);
1457                 if (error) {
1458                         SMBVDEBUG("set times error %d file %s\n",
1459                             error, np->n_rpath);
1460                 } else {
1461                         modified = 1;
1462                 }
1463         }
1464 
1465 out:
1466         if (modified) {
1467                 /*
1468                  * Invalidate attribute cache in case the server
1469                  * doesn't set exactly the attributes we asked.
1470                  */
1471                 smbfs_attrcache_remove(np);
1472         }
1473 
1474         if (have_fid) {
1475                 cerror = smbfs_smb_tmpclose(np, fid, &scred);
1476                 if (cerror)
1477                         SMBVDEBUG("error %d closing %s\n",
1478                             cerror, np->n_rpath);
1479         }
1480 
1481         smb_credrele(&scred);
1482         smbfs_rw_exit(&np->r_lkserlock);
1483 
1484         return (error);
1485 }
1486 
1487 /*
1488  * smbfs_access_rwx()
1489  * Common function for smbfs_access, etc.
1490  *
1491  * The security model implemented by the FS is unusual
1492  * due to the current "single user mounts" restriction:
1493  * All access under a given mount point uses the CIFS
1494  * credentials established by the owner of the mount.
1495  *
1496  * Most access checking is handled by the CIFS server,
1497  * but we need sufficient Unix access checks here to
1498  * prevent other local Unix users from having access
1499  * to objects under this mount that the uid/gid/mode
1500  * settings in the mount would not allow.
1501  *
1502  * With this model, there is a case where we need the
1503  * ability to do an access check before we have the
1504  * vnode for an object.  This function takes advantage
1505  * of the fact that the uid/gid/mode is per mount, and
1506  * avoids the need for a vnode.
1507  *
1508  * We still (sort of) need a vnode when we call
1509  * secpolicy_vnode_access, but that only uses
1510  * the vtype field, so we can use a pair of fake
1511  * vnodes that have only v_type filled in.
1512  *
1513  * XXX: Later, add a new secpolicy_vtype_access()
1514  * that takes the vtype instead of a vnode, and
1515  * get rid of the tmpl_vxxx fake vnodes below.
1516  */
1517 static int
1518 smbfs_access_rwx(vfs_t *vfsp, int vtype, int mode, cred_t *cr)
1519 {
1520         /* See the secpolicy call below. */
1521         static const vnode_t tmpl_vdir = { .v_type = VDIR };
1522         static const vnode_t tmpl_vreg = { .v_type = VREG };
1523         vattr_t         va;
1524         vnode_t         *tvp;
1525         struct smbmntinfo *smi = VFTOSMI(vfsp);
1526         int shift = 0;
1527 
1528         /*
1529          * Build our (fabricated) vnode attributes.
1530          * XXX: Could make these templates in the
1531          * per-mount struct and use them here.
1532          */
1533         bzero(&va, sizeof (va));
1534         va.va_mask = AT_TYPE | AT_MODE | AT_UID | AT_GID;
1535         va.va_type = vtype;
1536         va.va_mode = (vtype == VDIR) ?
1537             smi->smi_dmode : smi->smi_fmode;
1538         va.va_uid = smi->smi_uid;
1539         va.va_gid = smi->smi_gid;
1540 
1541         /*
1542          * Disallow write attempts on read-only file systems,
1543          * unless the file is a device or fifo node.  Note:
1544          * Inline vn_is_readonly and IS_DEVVP here because
1545          * we may not have a vnode ptr.  Original expr. was:
1546          * (mode & VWRITE) && vn_is_readonly(vp) && !IS_DEVVP(vp))
1547          */
1548         if ((mode & VWRITE) &&
1549             (vfsp->vfs_flag & VFS_RDONLY) &&
1550             !(vtype == VCHR || vtype == VBLK || vtype == VFIFO))
1551                 return (EROFS);
1552 
1553         /*
1554          * Disallow attempts to access mandatory lock files.
1555          * Similarly, expand MANDLOCK here.
1556          * XXX: not sure we need this.
1557          */
1558         if ((mode & (VWRITE | VREAD | VEXEC)) &&
1559             va.va_type == VREG && MANDMODE(va.va_mode))
1560                 return (EACCES);
1561 
1562         /*
1563          * Access check is based on only
1564          * one of owner, group, public.
1565          * If not owner, then check group.
1566          * If not a member of the group,
1567          * then check public access.
1568          */
1569         if (crgetuid(cr) != va.va_uid) {
1570                 shift += 3;
1571                 if (!groupmember(va.va_gid, cr))
1572                         shift += 3;
1573         }
1574 
1575         /*
1576          * We need a vnode for secpolicy_vnode_access,
1577          * but the only thing it looks at is v_type,
1578          * so pass one of the templates above.
1579          */
1580         tvp = (va.va_type == VDIR) ?
1581             (vnode_t *)&tmpl_vdir :
1582             (vnode_t *)&tmpl_vreg;
1583 
1584         return (secpolicy_vnode_access2(cr, tvp, va.va_uid,
1585             va.va_mode << shift, mode));
1586 }
1587 
1588 /*
1589  * See smbfs_setattr
1590  */
1591 static int
1592 smbfs_accessx(void *arg, int mode, cred_t *cr)
1593 {
1594         vnode_t *vp = arg;
1595         /*
1596          * Note: The caller has checked the current zone,
1597          * the SMI_DEAD and VFS_UNMOUNTED flags, etc.
1598          */
1599         return (smbfs_access_rwx(vp->v_vfsp, vp->v_type, mode, cr));
1600 }
1601 
1602 /*
1603  * XXX
1604  * This op should support PSARC 2007/403, Modified Access Checks for CIFS
1605  */
1606 /* ARGSUSED */
1607 static int
1608 smbfs_access(vnode_t *vp, int mode, int flags, cred_t *cr, caller_context_t *ct)
1609 {
1610         vfs_t           *vfsp;
1611         smbmntinfo_t    *smi;
1612 
1613         vfsp = vp->v_vfsp;
1614         smi = VFTOSMI(vfsp);
1615 
1616         if (curproc->p_zone != smi->smi_zone_ref.zref_zone)
1617                 return (EIO);
1618 
1619         if (smi->smi_flags & SMI_DEAD || vfsp->vfs_flag & VFS_UNMOUNTED)
1620                 return (EIO);
1621 
1622         return (smbfs_access_rwx(vfsp, vp->v_type, mode, cr));
1623 }
1624 
1625 
1626 /*
1627  * Flush local dirty pages to stable storage on the server.
1628  *
1629  * If FNODSYNC is specified, then there is nothing to do because
1630  * metadata changes are not cached on the client before being
1631  * sent to the server.
1632  */
1633 /* ARGSUSED */
1634 static int
1635 smbfs_fsync(vnode_t *vp, int syncflag, cred_t *cr, caller_context_t *ct)
1636 {
1637         int             error = 0;
1638         smbmntinfo_t    *smi;
1639         smbnode_t       *np;
1640         struct smb_cred scred;
1641 
1642         np = VTOSMB(vp);
1643         smi = VTOSMI(vp);
1644 
1645         if (curproc->p_zone != smi->smi_zone_ref.zref_zone)
1646                 return (EIO);
1647 
1648         if (smi->smi_flags & SMI_DEAD || vp->v_vfsp->vfs_flag & VFS_UNMOUNTED)
1649                 return (EIO);
1650 
1651         if ((syncflag & FNODSYNC) || IS_SWAPVP(vp))
1652                 return (0);
1653 
1654         if ((syncflag & (FSYNC|FDSYNC)) == 0)
1655                 return (0);
1656 
1657         /* Shared lock for n_fid use in _flush */
1658         if (smbfs_rw_enter_sig(&np->r_lkserlock, RW_READER, SMBINTR(vp)))
1659                 return (EINTR);
1660         smb_credinit(&scred, cr);
1661 
1662         error = smbfs_smb_flush(np, &scred);
1663 
1664         smb_credrele(&scred);
1665         smbfs_rw_exit(&np->r_lkserlock);
1666 
1667         return (error);
1668 }
1669 
1670 /*
1671  * Last reference to vnode went away.
1672  */
1673 /* ARGSUSED */
1674 static void
1675 smbfs_inactive(vnode_t *vp, cred_t *cr, caller_context_t *ct)
1676 {
1677         smbnode_t       *np;
1678         struct smb_cred scred;
1679 
1680         /*
1681          * Don't "bail out" for VFS_UNMOUNTED here,
1682          * as we want to do cleanup, etc.
1683          * See also pcfs_inactive
1684          */
1685 
1686         np = VTOSMB(vp);
1687 
1688         /*
1689          * If this is coming from the wrong zone, we let someone in the right
1690          * zone take care of it asynchronously.  We can get here due to
1691          * VN_RELE() being called from pageout() or fsflush().  This call may
1692          * potentially turn into an expensive no-op if, for instance, v_count
1693          * gets incremented in the meantime, but it's still correct.
1694          */
1695 
1696         /*
1697          * Defend against the possibility that higher-level callers
1698          * might not correctly balance open and close calls.  If we
1699          * get here with open references remaining, it means there
1700          * was a missing VOP_CLOSE somewhere.  If that happens, do
1701          * the close here so we don't "leak" FIDs on the server.
1702          *
1703          * Exclusive lock for modifying n_fid stuff.
1704          * Don't want this one ever interruptible.
1705          */
1706         (void) smbfs_rw_enter_sig(&np->r_lkserlock, RW_WRITER, 0);
1707         smb_credinit(&scred, cr);
1708 
1709         switch (np->n_ovtype) {
1710         case VNON:
1711                 /* not open (OK) */
1712                 break;
1713 
1714         case VDIR:
1715                 if (np->n_dirrefs == 0)
1716                         break;
1717                 SMBVDEBUG("open dir: refs %d path %s\n",
1718                     np->n_dirrefs, np->n_rpath);
1719                 /* Force last close. */
1720                 np->n_dirrefs = 1;
1721                 smbfs_rele_fid(np, &scred);
1722                 break;
1723 
1724         case VREG:
1725                 if (np->n_fidrefs == 0)
1726                         break;
1727                 SMBVDEBUG("open file: refs %d id 0x%x path %s\n",
1728                     np->n_fidrefs, np->n_fid, np->n_rpath);
1729                 /*
1730                  * Before otW close, make sure dirty pages written back.
1731                  */
1732                 if (vn_has_cached_data(vp)) {
1733                         /* smbfs_putapage() will acquire shared lock, so release
1734                          * exclusive lock temporally.
1735                          */
1736                         smbfs_rw_exit(&np->r_lkserlock);
1737 
1738                         (void) smbfs_putpage(vp, (offset_t) 0, 0, B_INVAL | B_ASYNC, cr, ct);
1739 
1740                         /* acquire exclusive lock again. */
1741                         (void) smbfs_rw_enter_sig(&np->r_lkserlock, RW_WRITER, 0);
1742                 }
1743                 /* Force last close. */
1744                 np->n_fidrefs = 1;
1745                 smbfs_rele_fid(np, &scred);
1746                 break;
1747 
1748         default:
1749                 SMBVDEBUG("bad n_ovtype %d\n", np->n_ovtype);
1750                 np->n_ovtype = VNON;
1751                 break;
1752         }
1753 
1754         smb_credrele(&scred);
1755         smbfs_rw_exit(&np->r_lkserlock);
1756 
1757         smbfs_addfree(np);
1758 }
1759 
1760 /*
1761  * Remote file system operations having to do with directory manipulation.
1762  */
1763 /* ARGSUSED */
1764 static int
1765 smbfs_lookup(vnode_t *dvp, char *nm, vnode_t **vpp, struct pathname *pnp,
1766         int flags, vnode_t *rdir, cred_t *cr, caller_context_t *ct,
1767         int *direntflags, pathname_t *realpnp)
1768 {
1769         vfs_t           *vfs;
1770         smbmntinfo_t    *smi;
1771         smbnode_t       *dnp;
1772         int             error;
1773 
1774         vfs = dvp->v_vfsp;
1775         smi = VFTOSMI(vfs);
1776 
1777         if (curproc->p_zone != smi->smi_zone_ref.zref_zone)
1778                 return (EPERM);
1779 
1780         if (smi->smi_flags & SMI_DEAD || vfs->vfs_flag & VFS_UNMOUNTED)
1781                 return (EIO);
1782 
1783         dnp = VTOSMB(dvp);
1784 
1785         /*
1786          * Are we looking up extended attributes?  If so, "dvp" is
1787          * the file or directory for which we want attributes, and
1788          * we need a lookup of the (faked up) attribute directory
1789          * before we lookup the rest of the path.
1790          */
1791         if (flags & LOOKUP_XATTR) {
1792                 /*
1793                  * Require the xattr mount option.
1794                  */
1795                 if ((vfs->vfs_flag & VFS_XATTR) == 0)
1796                         return (EINVAL);
1797 
1798                 error = smbfs_get_xattrdir(dvp, vpp, cr, flags);
1799                 return (error);
1800         }
1801 
1802         if (smbfs_rw_enter_sig(&dnp->r_rwlock, RW_READER, SMBINTR(dvp)))
1803                 return (EINTR);
1804 
1805         error = smbfslookup(dvp, nm, vpp, cr, 1, ct);
1806 
1807         smbfs_rw_exit(&dnp->r_rwlock);
1808 
1809         return (error);
1810 }
1811 
1812 /* ARGSUSED */
1813 static int
1814 smbfslookup(vnode_t *dvp, char *nm, vnode_t **vpp, cred_t *cr,
1815         int cache_ok, caller_context_t *ct)
1816 {
1817         int             error;
1818         int             supplen; /* supported length */
1819         vnode_t         *vp;
1820         smbnode_t       *np;
1821         smbnode_t       *dnp;
1822         smbmntinfo_t    *smi;
1823         /* struct smb_vc        *vcp; */
1824         const char      *ill;
1825         const char      *name = (const char *)nm;
1826         int             nmlen = strlen(nm);
1827         int             rplen;
1828         struct smb_cred scred;
1829         struct smbfattr fa;
1830 
1831         smi = VTOSMI(dvp);
1832         dnp = VTOSMB(dvp);
1833 
1834         ASSERT(curproc->p_zone == smi->smi_zone_ref.zref_zone);
1835 
1836 #ifdef NOT_YET
1837         vcp = SSTOVC(smi->smi_share);
1838 
1839         /* XXX: Should compute this once and store it in smbmntinfo_t */
1840         supplen = (SMB_DIALECT(vcp) >= SMB_DIALECT_LANMAN2_0) ? 255 : 12;
1841 #else
1842         supplen = 255;
1843 #endif
1844 
1845         /*
1846          * RWlock must be held, either reader or writer.
1847          * XXX: Can we check without looking directly
1848          * inside the struct smbfs_rwlock_t?
1849          */
1850         ASSERT(dnp->r_rwlock.count != 0);
1851 
1852         /*
1853          * If lookup is for "", just return dvp.
1854          * No need to perform any access checks.
1855          */
1856         if (nmlen == 0) {
1857                 VN_HOLD(dvp);
1858                 *vpp = dvp;
1859                 return (0);
1860         }
1861 
1862         /*
1863          * Can't do lookups in non-directories.
1864          */
1865         if (dvp->v_type != VDIR)
1866                 return (ENOTDIR);
1867 
1868         /*
1869          * Need search permission in the directory.
1870          */
1871         error = smbfs_access(dvp, VEXEC, 0, cr, ct);
1872         if (error)
1873                 return (error);
1874 
1875         /*
1876          * If lookup is for ".", just return dvp.
1877          * Access check was done above.
1878          */
1879         if (nmlen == 1 && name[0] == '.') {
1880                 VN_HOLD(dvp);
1881                 *vpp = dvp;
1882                 return (0);
1883         }
1884 
1885         /*
1886          * Now some sanity checks on the name.
1887          * First check the length.
1888          */
1889         if (nmlen > supplen)
1890                 return (ENAMETOOLONG);
1891 
1892         /*
1893          * Avoid surprises with characters that are
1894          * illegal in Windows file names.
1895          * Todo: CATIA mappings  XXX
1896          */
1897         ill = illegal_chars;
1898         if (dnp->n_flag & N_XATTR)
1899                 ill++; /* allow colon */
1900         if (strpbrk(nm, ill))
1901                 return (EINVAL);
1902 
1903         /*
1904          * Special handling for lookup of ".."
1905          *
1906          * We keep full pathnames (as seen on the server)
1907          * so we can just trim off the last component to
1908          * get the full pathname of the parent.  Note:
1909          * We don't actually copy and modify, but just
1910          * compute the trimmed length and pass that with
1911          * the current dir path (not null terminated).
1912          *
1913          * We don't go over-the-wire to get attributes
1914          * for ".." because we know it's a directory,
1915          * and we can just leave the rest "stale"
1916          * until someone does a getattr.
1917          */
1918         if (nmlen == 2 && name[0] == '.' && name[1] == '.') {
1919                 if (dvp->v_flag & VROOT) {
1920                         /*
1921                          * Already at the root.  This can happen
1922                          * with directory listings at the root,
1923                          * which lookup "." and ".." to get the
1924                          * inode numbers.  Let ".." be the same
1925                          * as "." in the FS root.
1926                          */
1927                         VN_HOLD(dvp);
1928                         *vpp = dvp;
1929                         return (0);
1930                 }
1931 
1932                 /*
1933                  * Special case for XATTR directory
1934                  */
1935                 if (dvp->v_flag & V_XATTRDIR) {
1936                         error = smbfs_xa_parent(dvp, vpp);
1937                         return (error);
1938                 }
1939 
1940                 /*
1941                  * Find the parent path length.
1942                  */
1943                 rplen = dnp->n_rplen;
1944                 ASSERT(rplen > 0);
1945                 while (--rplen >= 0) {
1946                         if (dnp->n_rpath[rplen] == '\\')
1947                                 break;
1948                 }
1949                 if (rplen <= 0) {
1950                         /* Found our way to the root. */
1951                         vp = SMBTOV(smi->smi_root);
1952                         VN_HOLD(vp);
1953                         *vpp = vp;
1954                         return (0);
1955                 }
1956                 np = smbfs_node_findcreate(smi,
1957                     dnp->n_rpath, rplen, NULL, 0, 0,
1958                     &smbfs_fattr0); /* force create */
1959                 ASSERT(np != NULL);
1960                 vp = SMBTOV(np);
1961                 vp->v_type = VDIR;
1962 
1963                 /* Success! */
1964                 *vpp = vp;
1965                 return (0);
1966         }
1967 
1968         /*
1969          * Normal lookup of a name under this directory.
1970          * Note we handled "", ".", ".." above.
1971          */
1972         if (cache_ok) {
1973                 /*
1974                  * The caller indicated that it's OK to use a
1975                  * cached result for this lookup, so try to
1976                  * reclaim a node from the smbfs node cache.
1977                  */
1978                 error = smbfslookup_cache(dvp, nm, nmlen, &vp, cr);
1979                 if (error)
1980                         return (error);
1981                 if (vp != NULL) {
1982                         /* hold taken in lookup_cache */
1983                         *vpp = vp;
1984                         return (0);
1985                 }
1986         }
1987 
1988         /*
1989          * OK, go over-the-wire to get the attributes,
1990          * then create the node.
1991          */
1992         smb_credinit(&scred, cr);
1993         /* Note: this can allocate a new "name" */
1994         error = smbfs_smb_lookup(dnp, &name, &nmlen, &fa, &scred);
1995         smb_credrele(&scred);
1996         if (error == ENOTDIR) {
1997                 /*
1998                  * Lookup failed because this directory was
1999                  * removed or renamed by another client.
2000                  * Remove any cached attributes under it.
2001                  */
2002                 smbfs_attrcache_remove(dnp);
2003                 smbfs_attrcache_prune(dnp);
2004         }
2005         if (error)
2006                 goto out;
2007 
2008         error = smbfs_nget(dvp, name, nmlen, &fa, &vp);
2009         if (error)
2010                 goto out;
2011 
2012         /* Success! */
2013         *vpp = vp;
2014 
2015 out:
2016         /* smbfs_smb_lookup may have allocated name. */
2017         if (name != nm)
2018                 smbfs_name_free(name, nmlen);
2019 
2020         return (error);
2021 }
2022 
2023 /*
2024  * smbfslookup_cache
2025  *
2026  * Try to reclaim a node from the smbfs node cache.
2027  * Some statistics for DEBUG.
2028  *
2029  * This mechanism lets us avoid many of the five (or more)
2030  * OtW lookup calls per file seen with "ls -l" if we search
2031  * the smbfs node cache for recently inactive(ated) nodes.
2032  */
2033 #ifdef DEBUG
2034 int smbfs_lookup_cache_calls = 0;
2035 int smbfs_lookup_cache_error = 0;
2036 int smbfs_lookup_cache_miss = 0;
2037 int smbfs_lookup_cache_stale = 0;
2038 int smbfs_lookup_cache_hits = 0;
2039 #endif /* DEBUG */
2040 
2041 /* ARGSUSED */
2042 static int
2043 smbfslookup_cache(vnode_t *dvp, char *nm, int nmlen,
2044         vnode_t **vpp, cred_t *cr)
2045 {
2046         struct vattr va;
2047         smbnode_t *dnp;
2048         smbnode_t *np;
2049         vnode_t *vp;
2050         int error;
2051         char sep;
2052 
2053         dnp = VTOSMB(dvp);
2054         *vpp = NULL;
2055 
2056 #ifdef DEBUG
2057         smbfs_lookup_cache_calls++;
2058 #endif
2059 
2060         /*
2061          * First make sure we can get attributes for the
2062          * directory.  Cached attributes are OK here.
2063          * If we removed or renamed the directory, this
2064          * will return ENOENT.  If someone else removed
2065          * this directory or file, we'll find out when we
2066          * try to open or get attributes.
2067          */
2068         va.va_mask = AT_TYPE | AT_MODE;
2069         error = smbfsgetattr(dvp, &va, cr);
2070         if (error) {
2071 #ifdef DEBUG
2072                 smbfs_lookup_cache_error++;
2073 #endif
2074                 return (error);
2075         }
2076 
2077         /*
2078          * Passing NULL smbfattr here so we will
2079          * just look, not create.
2080          */
2081         sep = SMBFS_DNP_SEP(dnp);
2082         np = smbfs_node_findcreate(dnp->n_mount,
2083             dnp->n_rpath, dnp->n_rplen,
2084             nm, nmlen, sep, NULL);
2085         if (np == NULL) {
2086 #ifdef DEBUG
2087                 smbfs_lookup_cache_miss++;
2088 #endif
2089                 return (0);
2090         }
2091 
2092         /*
2093          * Found it.  Attributes still valid?
2094          */
2095         vp = SMBTOV(np);
2096         if (np->r_attrtime <= gethrtime()) {
2097                 /* stale */
2098 #ifdef DEBUG
2099                 smbfs_lookup_cache_stale++;
2100 #endif
2101                 VN_RELE(vp);
2102                 return (0);
2103         }
2104 
2105         /*
2106          * Success!
2107          * Caller gets hold from smbfs_node_findcreate
2108          */
2109 #ifdef DEBUG
2110         smbfs_lookup_cache_hits++;
2111 #endif
2112         *vpp = vp;
2113         return (0);
2114 }
2115 
2116 /*
2117  * XXX
2118  * vsecattr_t is new to build 77, and we need to eventually support
2119  * it in order to create an ACL when an object is created.
2120  *
2121  * This op should support the new FIGNORECASE flag for case-insensitive
2122  * lookups, per PSARC 2007/244.
2123  */
2124 /* ARGSUSED */
2125 static int
2126 smbfs_create(vnode_t *dvp, char *nm, struct vattr *va, enum vcexcl exclusive,
2127         int mode, vnode_t **vpp, cred_t *cr, int lfaware, caller_context_t *ct,
2128         vsecattr_t *vsecp)
2129 {
2130         int             error;
2131         int             cerror;
2132         vfs_t           *vfsp;
2133         vnode_t         *vp;
2134 #ifdef NOT_YET
2135         smbnode_t       *np;
2136 #endif
2137         smbnode_t       *dnp;
2138         smbmntinfo_t    *smi;
2139         struct vattr    vattr;
2140         struct smbfattr fattr;
2141         struct smb_cred scred;
2142         const char *name = (const char *)nm;
2143         int             nmlen = strlen(nm);
2144         uint32_t        disp;
2145         uint16_t        fid;
2146         int             xattr;
2147 
2148         vfsp = dvp->v_vfsp;
2149         smi = VFTOSMI(vfsp);
2150         dnp = VTOSMB(dvp);
2151         vp = NULL;
2152 
2153         if (curproc->p_zone != smi->smi_zone_ref.zref_zone)
2154                 return (EPERM);
2155 
2156         if (smi->smi_flags & SMI_DEAD || vfsp->vfs_flag & VFS_UNMOUNTED)
2157                 return (EIO);
2158 
2159         /*
2160          * Note: this may break mknod(2) calls to create a directory,
2161          * but that's obscure use.  Some other filesystems do this.
2162          * XXX: Later, redirect VDIR type here to _mkdir.
2163          */
2164         if (va->va_type != VREG)
2165                 return (EINVAL);
2166 
2167         /*
2168          * If the pathname is "", just use dvp, no checks.
2169          * Do this outside of the rwlock (like zfs).
2170          */
2171         if (nmlen == 0) {
2172                 VN_HOLD(dvp);
2173                 *vpp = dvp;
2174                 return (0);
2175         }
2176 
2177         /* Don't allow "." or ".." through here. */
2178         if ((nmlen == 1 && name[0] == '.') ||
2179             (nmlen == 2 && name[0] == '.' && name[1] == '.'))
2180                 return (EISDIR);
2181 
2182         /*
2183          * We make a copy of the attributes because the caller does not
2184          * expect us to change what va points to.
2185          */
2186         vattr = *va;
2187 
2188         if (smbfs_rw_enter_sig(&dnp->r_rwlock, RW_WRITER, SMBINTR(dvp)))
2189                 return (EINTR);
2190         smb_credinit(&scred, cr);
2191 
2192         /*
2193          * XXX: Do we need r_lkserlock too?
2194          * No use of any shared fid or fctx...
2195          */
2196 
2197         /*
2198          * NFS needs to go over the wire, just to be sure whether the
2199          * file exists or not.  Using a cached result is dangerous in
2200          * this case when making a decision regarding existence.
2201          *
2202          * The SMB protocol does NOT really need to go OTW here
2203          * thanks to the expressive NTCREATE disposition values.
2204          * Unfortunately, to do Unix access checks correctly,
2205          * we need to know if the object already exists.
2206          * When the object does not exist, we need VWRITE on
2207          * the directory.  Note: smbfslookup() checks VEXEC.
2208          */
2209         error = smbfslookup(dvp, nm, &vp, cr, 0, ct);
2210         if (error == 0) {
2211                 /*
2212                  * The file already exists.  Error?
2213                  * NB: have a hold from smbfslookup
2214                  */
2215                 if (exclusive == EXCL) {
2216                         error = EEXIST;
2217                         VN_RELE(vp);
2218                         goto out;
2219                 }
2220                 /*
2221                  * Verify requested access.
2222                  */
2223                 error = smbfs_access(vp, mode, 0, cr, ct);
2224                 if (error) {
2225                         VN_RELE(vp);
2226                         goto out;
2227                 }
2228 
2229                 /*
2230                  * Truncate (if requested).
2231                  */
2232                 if ((vattr.va_mask & AT_SIZE) && vattr.va_size == 0) {
2233                         vattr.va_mask = AT_SIZE;
2234                         error = smbfssetattr(vp, &vattr, 0, cr);
2235                         if (error) {
2236                                 VN_RELE(vp);
2237                                 goto out;
2238                         }
2239                 }
2240                 /* Success! */
2241 #ifdef NOT_YET
2242                 vnevent_create(vp, ct);
2243 #endif
2244                 *vpp = vp;
2245                 goto out;
2246         }
2247 
2248         /*
2249          * The file did not exist.  Need VWRITE in the directory.
2250          */
2251         error = smbfs_access(dvp, VWRITE, 0, cr, ct);
2252         if (error)
2253                 goto out;
2254 
2255         /*
2256          * Now things get tricky.  We also need to check the
2257          * requested open mode against the file we may create.
2258          * See comments at smbfs_access_rwx
2259          */
2260         error = smbfs_access_rwx(vfsp, VREG, mode, cr);
2261         if (error)
2262                 goto out;
2263 
2264         /*
2265          * Now the code derived from Darwin,
2266          * but with greater use of NT_CREATE
2267          * disposition options.  Much changed.
2268          *
2269          * Create (or open) a new child node.
2270          * Note we handled "." and ".." above.
2271          */
2272 
2273         if (exclusive == EXCL)
2274                 disp = NTCREATEX_DISP_CREATE;
2275         else {
2276                 /* Truncate regular files if requested. */
2277                 if ((va->va_type == VREG) &&
2278                     (va->va_mask & AT_SIZE) &&
2279                     (va->va_size == 0))
2280                         disp = NTCREATEX_DISP_OVERWRITE_IF;
2281                 else
2282                         disp = NTCREATEX_DISP_OPEN_IF;
2283         }
2284         xattr = (dnp->n_flag & N_XATTR) ? 1 : 0;
2285         error = smbfs_smb_create(dnp,
2286             name, nmlen, xattr,
2287             disp, &scred, &fid);
2288         if (error)
2289                 goto out;
2290 
2291         /*
2292          * XXX: Missing some code here to deal with
2293          * the case where we opened an existing file,
2294          * it's size is larger than 32-bits, and we're
2295          * setting the size from a process that's not
2296          * aware of large file offsets.  i.e.
2297          * from the NFS3 code:
2298          */
2299 #if NOT_YET /* XXX */
2300         if ((vattr.va_mask & AT_SIZE) &&
2301             vp->v_type == VREG) {
2302                 np = VTOSMB(vp);
2303                 /*
2304                  * Check here for large file handled
2305                  * by LF-unaware process (as
2306                  * ufs_create() does)
2307                  */
2308                 if (!(lfaware & FOFFMAX)) {
2309                         mutex_enter(&np->r_statelock);
2310                         if (np->r_size > MAXOFF32_T)
2311                                 error = EOVERFLOW;
2312                         mutex_exit(&np->r_statelock);
2313                 }
2314                 if (!error) {
2315                         vattr.va_mask = AT_SIZE;
2316                         error = smbfssetattr(vp,
2317                             &vattr, 0, cr);
2318                 }
2319         }
2320 #endif /* XXX */
2321         /*
2322          * Should use the fid to get/set the size
2323          * while we have it opened here.  See above.
2324          */
2325 
2326         cerror = smbfs_smb_close(smi->smi_share, fid, NULL, &scred);
2327         if (cerror)
2328                 SMBVDEBUG("error %d closing %s\\%s\n",
2329                     cerror, dnp->n_rpath, name);
2330 
2331         /*
2332          * In the open case, the name may differ a little
2333          * from what we passed to create (case, etc.)
2334          * so call lookup to get the (opened) name.
2335          *
2336          * XXX: Could avoid this extra lookup if the
2337          * "createact" result from NT_CREATE says we
2338          * created the object.
2339          */
2340         error = smbfs_smb_lookup(dnp, &name, &nmlen, &fattr, &scred);
2341         if (error)
2342                 goto out;
2343 
2344         /* update attr and directory cache */
2345         smbfs_attr_touchdir(dnp);
2346 
2347         error = smbfs_nget(dvp, name, nmlen, &fattr, &vp);
2348         if (error)
2349                 goto out;
2350 
2351         /* XXX invalidate pages if we truncated? */
2352 
2353         /* Success! */
2354         *vpp = vp;
2355         error = 0;
2356 
2357 out:
2358         smb_credrele(&scred);
2359         smbfs_rw_exit(&dnp->r_rwlock);
2360         if (name != nm)
2361                 smbfs_name_free(name, nmlen);
2362         return (error);
2363 }
2364 
2365 /*
2366  * XXX
2367  * This op should support the new FIGNORECASE flag for case-insensitive
2368  * lookups, per PSARC 2007/244.
2369  */
2370 /* ARGSUSED */
2371 static int
2372 smbfs_remove(vnode_t *dvp, char *nm, cred_t *cr, caller_context_t *ct,
2373         int flags)
2374 {
2375         int             error;
2376         vnode_t         *vp;
2377         smbnode_t       *np;
2378         smbnode_t       *dnp;
2379         struct smb_cred scred;
2380         /* enum smbfsstat status; */
2381         smbmntinfo_t    *smi;
2382 
2383         smi = VTOSMI(dvp);
2384 
2385         if (curproc->p_zone != smi->smi_zone_ref.zref_zone)
2386                 return (EPERM);
2387 
2388         if (smi->smi_flags & SMI_DEAD || dvp->v_vfsp->vfs_flag & VFS_UNMOUNTED)
2389                 return (EIO);
2390 
2391         dnp = VTOSMB(dvp);
2392         if (smbfs_rw_enter_sig(&dnp->r_rwlock, RW_WRITER, SMBINTR(dvp)))
2393                 return (EINTR);
2394         smb_credinit(&scred, cr);
2395 
2396         /*
2397          * Verify access to the dirctory.
2398          */
2399         error = smbfs_access(dvp, VWRITE|VEXEC, 0, cr, ct);
2400         if (error)
2401                 goto out;
2402 
2403         /*
2404          * NOTE:  the darwin code gets the "vp" passed in so it looks
2405          * like the "vp" has probably been "lookup"ed by the VFS layer.
2406          * It looks like we will need to lookup the vp to check the
2407          * caches and check if the object being deleted is a directory.
2408          */
2409         error = smbfslookup(dvp, nm, &vp, cr, 0, ct);
2410         if (error)
2411                 goto out;
2412 
2413         /* Never allow link/unlink directories on CIFS. */
2414         if (vp->v_type == VDIR) {
2415                 VN_RELE(vp);
2416                 error = EPERM;
2417                 goto out;
2418         }
2419 
2420         /*
2421          * Now we have the real reference count on the vnode
2422          * Do we have the file open?
2423          */
2424         np = VTOSMB(vp);
2425         mutex_enter(&np->r_statelock);
2426         if ((vp->v_count > 1) && (np->n_fidrefs > 0)) {
2427                 /*
2428                  * NFS does a rename on remove here.
2429                  * Probably not applicable for SMB.
2430                  * Like Darwin, just return EBUSY.
2431                  *
2432                  * XXX: Todo - Use Trans2rename, and
2433                  * if that fails, ask the server to
2434                  * set the delete-on-close flag.
2435                  */
2436                 mutex_exit(&np->r_statelock);
2437                 error = EBUSY;
2438         } else {
2439                 smbfs_attrcache_rm_locked(np);
2440                 mutex_exit(&np->r_statelock);
2441 
2442                 error = smbfs_smb_delete(np, &scred, NULL, 0, 0);
2443 
2444                 /*
2445                  * If the file should no longer exist, discard
2446                  * any cached attributes under this node.
2447                  */
2448                 switch (error) {
2449                 case 0:
2450                 case ENOENT:
2451                 case ENOTDIR:
2452                         smbfs_attrcache_prune(np);
2453                         break;
2454                 }
2455         }
2456 
2457         VN_RELE(vp);
2458 
2459 out:
2460         smb_credrele(&scred);
2461         smbfs_rw_exit(&dnp->r_rwlock);
2462 
2463         return (error);
2464 }
2465 
2466 
2467 /*
2468  * XXX
2469  * This op should support the new FIGNORECASE flag for case-insensitive
2470  * lookups, per PSARC 2007/244.
2471  */
2472 /* ARGSUSED */
2473 static int
2474 smbfs_rename(vnode_t *odvp, char *onm, vnode_t *ndvp, char *nnm, cred_t *cr,
2475         caller_context_t *ct, int flags)
2476 {
2477         /* vnode_t              *realvp; */
2478 
2479         if (curproc->p_zone != VTOSMI(odvp)->smi_zone_ref.zref_zone ||
2480             curproc->p_zone != VTOSMI(ndvp)->smi_zone_ref.zref_zone)
2481                 return (EPERM);
2482 
2483         if (VTOSMI(odvp)->smi_flags & SMI_DEAD ||
2484             VTOSMI(ndvp)->smi_flags & SMI_DEAD ||
2485             odvp->v_vfsp->vfs_flag & VFS_UNMOUNTED ||
2486             ndvp->v_vfsp->vfs_flag & VFS_UNMOUNTED)
2487                 return (EIO);
2488 
2489         return (smbfsrename(odvp, onm, ndvp, nnm, cr, ct));
2490 }
2491 
2492 /*
2493  * smbfsrename does the real work of renaming in SMBFS
2494  */
2495 /* ARGSUSED */
2496 static int
2497 smbfsrename(vnode_t *odvp, char *onm, vnode_t *ndvp, char *nnm, cred_t *cr,
2498         caller_context_t *ct)
2499 {
2500         int             error;
2501         int             nvp_locked = 0;
2502         vnode_t         *nvp = NULL;
2503         vnode_t         *ovp = NULL;
2504         smbnode_t       *onp;
2505         smbnode_t       *nnp;
2506         smbnode_t       *odnp;
2507         smbnode_t       *ndnp;
2508         struct smb_cred scred;
2509         /* enum smbfsstat       status; */
2510 
2511         ASSERT(curproc->p_zone == VTOSMI(odvp)->smi_zone_ref.zref_zone);
2512 
2513         if (strcmp(onm, ".") == 0 || strcmp(onm, "..") == 0 ||
2514             strcmp(nnm, ".") == 0 || strcmp(nnm, "..") == 0)
2515                 return (EINVAL);
2516 
2517         /*
2518          * Check that everything is on the same filesystem.
2519          * vn_rename checks the fsid's, but in case we don't
2520          * fill those in correctly, check here too.
2521          */
2522         if (odvp->v_vfsp != ndvp->v_vfsp)
2523                 return (EXDEV);
2524 
2525         odnp = VTOSMB(odvp);
2526         ndnp = VTOSMB(ndvp);
2527 
2528         /*
2529          * Avoid deadlock here on old vs new directory nodes
2530          * by always taking the locks in order of address.
2531          * The order is arbitrary, but must be consistent.
2532          */
2533         if (odnp < ndnp) {
2534                 if (smbfs_rw_enter_sig(&odnp->r_rwlock, RW_WRITER,
2535                     SMBINTR(odvp)))
2536                         return (EINTR);
2537                 if (smbfs_rw_enter_sig(&ndnp->r_rwlock, RW_WRITER,
2538                     SMBINTR(ndvp))) {
2539                         smbfs_rw_exit(&odnp->r_rwlock);
2540                         return (EINTR);
2541                 }
2542         } else {
2543                 if (smbfs_rw_enter_sig(&ndnp->r_rwlock, RW_WRITER,
2544                     SMBINTR(ndvp)))
2545                         return (EINTR);
2546                 if (smbfs_rw_enter_sig(&odnp->r_rwlock, RW_WRITER,
2547                     SMBINTR(odvp))) {
2548                         smbfs_rw_exit(&ndnp->r_rwlock);
2549                         return (EINTR);
2550                 }
2551         }
2552         smb_credinit(&scred, cr);
2553         /*
2554          * No returns after this point (goto out)
2555          */
2556 
2557         /*
2558          * Need write access on source and target.
2559          * Server takes care of most checks.
2560          */
2561         error = smbfs_access(odvp, VWRITE|VEXEC, 0, cr, ct);
2562         if (error)
2563                 goto out;
2564         if (odvp != ndvp) {
2565                 error = smbfs_access(ndvp, VWRITE, 0, cr, ct);
2566                 if (error)
2567                         goto out;
2568         }
2569 
2570         /*
2571          * Lookup the source name.  Must already exist.
2572          */
2573         error = smbfslookup(odvp, onm, &ovp, cr, 0, ct);
2574         if (error)
2575                 goto out;
2576 
2577         /*
2578          * Lookup the target file.  If it exists, it needs to be
2579          * checked to see whether it is a mount point and whether
2580          * it is active (open).
2581          */
2582         error = smbfslookup(ndvp, nnm, &nvp, cr, 0, ct);
2583         if (!error) {
2584                 /*
2585                  * Target (nvp) already exists.  Check that it
2586                  * has the same type as the source.  The server
2587                  * will check this also, (and more reliably) but
2588                  * this lets us return the correct error codes.
2589                  */
2590                 if (ovp->v_type == VDIR) {
2591                         if (nvp->v_type != VDIR) {
2592                                 error = ENOTDIR;
2593                                 goto out;
2594                         }
2595                 } else {
2596                         if (nvp->v_type == VDIR) {
2597                                 error = EISDIR;
2598                                 goto out;
2599                         }
2600                 }
2601 
2602                 /*
2603                  * POSIX dictates that when the source and target
2604                  * entries refer to the same file object, rename
2605                  * must do nothing and exit without error.
2606                  */
2607                 if (ovp == nvp) {
2608                         error = 0;
2609                         goto out;
2610                 }
2611 
2612                 /*
2613                  * Also must ensure the target is not a mount point,
2614                  * and keep mount/umount away until we're done.
2615                  */
2616                 if (vn_vfsrlock(nvp)) {
2617                         error = EBUSY;
2618                         goto out;
2619                 }
2620                 nvp_locked = 1;
2621                 if (vn_mountedvfs(nvp) != NULL) {
2622                         error = EBUSY;
2623                         goto out;
2624                 }
2625 
2626                 /*
2627                  * CIFS gives a SHARING_VIOLATION error when
2628                  * trying to rename onto an exising object,
2629                  * so try to remove the target first.
2630                  * (Only for files, not directories.)
2631                  */
2632                 if (nvp->v_type == VDIR) {
2633                         error = EEXIST;
2634                         goto out;
2635                 }
2636 
2637                 /*
2638                  * Nodes that are "not active" here have v_count=2
2639                  * because vn_renameat (our caller) did a lookup on
2640                  * both the source and target before this call.
2641                  * Otherwise this similar to smbfs_remove.
2642                  */
2643                 nnp = VTOSMB(nvp);
2644                 mutex_enter(&nnp->r_statelock);
2645                 if ((nvp->v_count > 2) && (nnp->n_fidrefs > 0)) {
2646                         /*
2647                          * The target file exists, is not the same as
2648                          * the source file, and is active.  Other FS
2649                          * implementations unlink the target here.
2650                          * For SMB, we don't assume we can remove an
2651                          * open file.  Return an error instead.
2652                          */
2653                         mutex_exit(&nnp->r_statelock);
2654                         error = EBUSY;
2655                         goto out;
2656                 }
2657 
2658                 /*
2659                  * Target file is not active. Try to remove it.
2660                  */
2661                 smbfs_attrcache_rm_locked(nnp);
2662                 mutex_exit(&nnp->r_statelock);
2663 
2664                 error = smbfs_smb_delete(nnp, &scred, NULL, 0, 0);
2665 
2666                 /*
2667                  * Similar to smbfs_remove
2668                  */
2669                 switch (error) {
2670                 case 0:
2671                 case ENOENT:
2672                 case ENOTDIR:
2673                         smbfs_attrcache_prune(nnp);
2674                         break;
2675                 }
2676 
2677                 if (error)
2678                         goto out;
2679                 /*
2680                  * OK, removed the target file.  Continue as if
2681                  * lookup target had failed (nvp == NULL).
2682                  */
2683                 vn_vfsunlock(nvp);
2684                 nvp_locked = 0;
2685                 VN_RELE(nvp);
2686                 nvp = NULL;
2687         } /* nvp */
2688 
2689         onp = VTOSMB(ovp);
2690         smbfs_attrcache_remove(onp);
2691 
2692         error = smbfs_smb_rename(onp, ndnp, nnm, strlen(nnm), &scred);
2693 
2694         /*
2695          * If the old name should no longer exist,
2696          * discard any cached attributes under it.
2697          */
2698         if (error == 0)
2699                 smbfs_attrcache_prune(onp);
2700 
2701 out:
2702         if (nvp) {
2703                 if (nvp_locked)
2704                         vn_vfsunlock(nvp);
2705                 VN_RELE(nvp);
2706         }
2707         if (ovp)
2708                 VN_RELE(ovp);
2709 
2710         smb_credrele(&scred);
2711         smbfs_rw_exit(&odnp->r_rwlock);
2712         smbfs_rw_exit(&ndnp->r_rwlock);
2713 
2714         return (error);
2715 }
2716 
2717 /*
2718  * XXX
2719  * vsecattr_t is new to build 77, and we need to eventually support
2720  * it in order to create an ACL when an object is created.
2721  *
2722  * This op should support the new FIGNORECASE flag for case-insensitive
2723  * lookups, per PSARC 2007/244.
2724  */
2725 /* ARGSUSED */
2726 static int
2727 smbfs_mkdir(vnode_t *dvp, char *nm, struct vattr *va, vnode_t **vpp,
2728         cred_t *cr, caller_context_t *ct, int flags, vsecattr_t *vsecp)
2729 {
2730         vnode_t         *vp;
2731         struct smbnode  *dnp = VTOSMB(dvp);
2732         struct smbmntinfo *smi = VTOSMI(dvp);
2733         struct smb_cred scred;
2734         struct smbfattr fattr;
2735         const char              *name = (const char *) nm;
2736         int             nmlen = strlen(name);
2737         int             error, hiderr;
2738 
2739         if (curproc->p_zone != smi->smi_zone_ref.zref_zone)
2740                 return (EPERM);
2741 
2742         if (smi->smi_flags & SMI_DEAD || dvp->v_vfsp->vfs_flag & VFS_UNMOUNTED)
2743                 return (EIO);
2744 
2745         if ((nmlen == 1 && name[0] == '.') ||
2746             (nmlen == 2 && name[0] == '.' && name[1] == '.'))
2747                 return (EEXIST);
2748 
2749         /* Only plain files are allowed in V_XATTRDIR. */
2750         if (dvp->v_flag & V_XATTRDIR)
2751                 return (EINVAL);
2752 
2753         if (smbfs_rw_enter_sig(&dnp->r_rwlock, RW_WRITER, SMBINTR(dvp)))
2754                 return (EINTR);
2755         smb_credinit(&scred, cr);
2756 
2757         /*
2758          * XXX: Do we need r_lkserlock too?
2759          * No use of any shared fid or fctx...
2760          */
2761 
2762         /*
2763          * Require write access in the containing directory.
2764          */
2765         error = smbfs_access(dvp, VWRITE, 0, cr, ct);
2766         if (error)
2767                 goto out;
2768 
2769         error = smbfs_smb_mkdir(dnp, name, nmlen, &scred);
2770         if (error)
2771                 goto out;
2772 
2773         error = smbfs_smb_lookup(dnp, &name, &nmlen, &fattr, &scred);
2774         if (error)
2775                 goto out;
2776 
2777         smbfs_attr_touchdir(dnp);
2778 
2779         error = smbfs_nget(dvp, name, nmlen, &fattr, &vp);
2780         if (error)
2781                 goto out;
2782 
2783         if (name[0] == '.')
2784                 if ((hiderr = smbfs_smb_hideit(VTOSMB(vp), NULL, 0, &scred)))
2785                         SMBVDEBUG("hide failure %d\n", hiderr);
2786 
2787         /* Success! */
2788         *vpp = vp;
2789         error = 0;
2790 out:
2791         smb_credrele(&scred);
2792         smbfs_rw_exit(&dnp->r_rwlock);
2793 
2794         if (name != nm)
2795                 smbfs_name_free(name, nmlen);
2796 
2797         return (error);
2798 }
2799 
2800 /*
2801  * XXX
2802  * This op should support the new FIGNORECASE flag for case-insensitive
2803  * lookups, per PSARC 2007/244.
2804  */
2805 /* ARGSUSED */
2806 static int
2807 smbfs_rmdir(vnode_t *dvp, char *nm, vnode_t *cdir, cred_t *cr,
2808         caller_context_t *ct, int flags)
2809 {
2810         vnode_t         *vp = NULL;
2811         int             vp_locked = 0;
2812         struct smbmntinfo *smi = VTOSMI(dvp);
2813         struct smbnode  *dnp = VTOSMB(dvp);
2814         struct smbnode  *np;
2815         struct smb_cred scred;
2816         int             error;
2817 
2818         if (curproc->p_zone != smi->smi_zone_ref.zref_zone)
2819                 return (EPERM);
2820 
2821         if (smi->smi_flags & SMI_DEAD || dvp->v_vfsp->vfs_flag & VFS_UNMOUNTED)
2822                 return (EIO);
2823 
2824         if (smbfs_rw_enter_sig(&dnp->r_rwlock, RW_WRITER, SMBINTR(dvp)))
2825                 return (EINTR);
2826         smb_credinit(&scred, cr);
2827 
2828         /*
2829          * Require w/x access in the containing directory.
2830          * Server handles all other access checks.
2831          */
2832         error = smbfs_access(dvp, VEXEC|VWRITE, 0, cr, ct);
2833         if (error)
2834                 goto out;
2835 
2836         /*
2837          * First lookup the entry to be removed.
2838          */
2839         error = smbfslookup(dvp, nm, &vp, cr, 0, ct);
2840         if (error)
2841                 goto out;
2842         np = VTOSMB(vp);
2843 
2844         /*
2845          * Disallow rmdir of "." or current dir, or the FS root.
2846          * Also make sure it's a directory, not a mount point,
2847          * and lock to keep mount/umount away until we're done.
2848          */
2849         if ((vp == dvp) || (vp == cdir) || (vp->v_flag & VROOT)) {
2850                 error = EINVAL;
2851                 goto out;
2852         }
2853         if (vp->v_type != VDIR) {
2854                 error = ENOTDIR;
2855                 goto out;
2856         }
2857         if (vn_vfsrlock(vp)) {
2858                 error = EBUSY;
2859                 goto out;
2860         }
2861         vp_locked = 1;
2862         if (vn_mountedvfs(vp) != NULL) {
2863                 error = EBUSY;
2864                 goto out;
2865         }
2866 
2867         smbfs_attrcache_remove(np);
2868         error = smbfs_smb_rmdir(np, &scred);
2869 
2870         /*
2871          * Similar to smbfs_remove
2872          */
2873         switch (error) {
2874         case 0:
2875         case ENOENT:
2876         case ENOTDIR:
2877                 smbfs_attrcache_prune(np);
2878                 break;
2879         }
2880 
2881         if (error)
2882                 goto out;
2883 
2884         mutex_enter(&np->r_statelock);
2885         dnp->n_flag |= NMODIFIED;
2886         mutex_exit(&np->r_statelock);
2887         smbfs_attr_touchdir(dnp);
2888         smbfs_rmhash(np);
2889 
2890 out:
2891         if (vp) {
2892                 if (vp_locked)
2893                         vn_vfsunlock(vp);
2894                 VN_RELE(vp);
2895         }
2896         smb_credrele(&scred);
2897         smbfs_rw_exit(&dnp->r_rwlock);
2898 
2899         return (error);
2900 }
2901 
2902 
2903 /* ARGSUSED */
2904 static int
2905 smbfs_readdir(vnode_t *vp, struct uio *uiop, cred_t *cr, int *eofp,
2906         caller_context_t *ct, int flags)
2907 {
2908         struct smbnode  *np = VTOSMB(vp);
2909         int             error = 0;
2910         smbmntinfo_t    *smi;
2911 
2912         smi = VTOSMI(vp);
2913 
2914         if (curproc->p_zone != smi->smi_zone_ref.zref_zone)
2915                 return (EIO);
2916 
2917         if (smi->smi_flags & SMI_DEAD || vp->v_vfsp->vfs_flag & VFS_UNMOUNTED)
2918                 return (EIO);
2919 
2920         /*
2921          * Require read access in the directory.
2922          */
2923         error = smbfs_access(vp, VREAD, 0, cr, ct);
2924         if (error)
2925                 return (error);
2926 
2927         ASSERT(smbfs_rw_lock_held(&np->r_rwlock, RW_READER));
2928 
2929         /*
2930          * XXX: Todo readdir cache here
2931          * Note: NFS code is just below this.
2932          *
2933          * I am serializing the entire readdir opreation
2934          * now since we have not yet implemented readdir
2935          * cache. This fix needs to be revisited once
2936          * we implement readdir cache.
2937          */
2938         if (smbfs_rw_enter_sig(&np->r_lkserlock, RW_WRITER, SMBINTR(vp)))
2939                 return (EINTR);
2940 
2941         error = smbfs_readvdir(vp, uiop, cr, eofp, ct);
2942 
2943         smbfs_rw_exit(&np->r_lkserlock);
2944 
2945         return (error);
2946 }
2947 
2948 /* ARGSUSED */
2949 static int
2950 smbfs_readvdir(vnode_t *vp, uio_t *uio, cred_t *cr, int *eofp,
2951         caller_context_t *ct)
2952 {
2953         /*
2954          * Note: "limit" tells the SMB-level FindFirst/FindNext
2955          * functions how many directory entries to request in
2956          * each OtW call.  It needs to be large enough so that
2957          * we don't make lots of tiny OtW requests, but there's
2958          * no point making it larger than the maximum number of
2959          * OtW entries that would fit in a maximum sized trans2
2960          * response (64k / 48).  Beyond that, it's just tuning.
2961          * WinNT used 512, Win2k used 1366.  We use 1000.
2962          */
2963         static const int limit = 1000;
2964         /* Largest possible dirent size. */
2965         static const size_t dbufsiz = DIRENT64_RECLEN(SMB_MAXFNAMELEN);
2966         struct smb_cred scred;
2967         vnode_t         *newvp;
2968         struct smbnode  *np = VTOSMB(vp);
2969         struct smbfs_fctx *ctx;
2970         struct dirent64 *dp;
2971         ssize_t         save_resid;
2972         offset_t        save_offset; /* 64 bits */
2973         int             offset; /* yes, 32 bits */
2974         int             nmlen, error;
2975         ushort_t        reclen;
2976 
2977         ASSERT(curproc->p_zone == VTOSMI(vp)->smi_zone_ref.zref_zone);
2978 
2979         /* Make sure we serialize for n_dirseq use. */
2980         ASSERT(smbfs_rw_lock_held(&np->r_lkserlock, RW_WRITER));
2981 
2982         /*
2983          * Make sure smbfs_open filled in n_dirseq
2984          */
2985         if (np->n_dirseq == NULL)
2986                 return (EBADF);
2987 
2988         /* Check for overflow of (32-bit) directory offset. */
2989         if (uio->uio_loffset < 0 || uio->uio_loffset > INT32_MAX ||
2990             (uio->uio_loffset + uio->uio_resid) > INT32_MAX)
2991                 return (EINVAL);
2992 
2993         /* Require space for at least one dirent. */
2994         if (uio->uio_resid < dbufsiz)
2995                 return (EINVAL);
2996 
2997         SMBVDEBUG("dirname='%s'\n", np->n_rpath);
2998         smb_credinit(&scred, cr);
2999         dp = kmem_alloc(dbufsiz, KM_SLEEP);
3000 
3001         save_resid = uio->uio_resid;
3002         save_offset = uio->uio_loffset;
3003         offset = uio->uio_offset;
3004         SMBVDEBUG("in: offset=%d, resid=%d\n",
3005             (int)uio->uio_offset, (int)uio->uio_resid);
3006         error = 0;
3007 
3008         /*
3009          * Generate the "." and ".." entries here so we can
3010          * (1) make sure they appear (but only once), and
3011          * (2) deal with getting their I numbers which the
3012          * findnext below does only for normal names.
3013          */
3014         while (offset < FIRST_DIROFS) {
3015                 /*
3016                  * Tricky bit filling in the first two:
3017                  * offset 0 is ".", offset 1 is ".."
3018                  * so strlen of these is offset+1.
3019                  */
3020                 reclen = DIRENT64_RECLEN(offset + 1);
3021                 if (uio->uio_resid < reclen)
3022                         goto out;
3023                 bzero(dp, reclen);
3024                 dp->d_reclen = reclen;
3025                 dp->d_name[0] = '.';
3026                 dp->d_name[1] = '.';
3027                 dp->d_name[offset + 1] = '\0';
3028                 /*
3029                  * Want the real I-numbers for the "." and ".."
3030                  * entries.  For these two names, we know that
3031                  * smbfslookup can get the nodes efficiently.
3032                  */
3033                 error = smbfslookup(vp, dp->d_name, &newvp, cr, 1, ct);
3034                 if (error) {
3035                         dp->d_ino = np->n_ino + offset; /* fiction */
3036                 } else {
3037                         dp->d_ino = VTOSMB(newvp)->n_ino;
3038                         VN_RELE(newvp);
3039                 }
3040                 /*
3041                  * Note: d_off is the offset that a user-level program
3042                  * should seek to for reading the NEXT directory entry.
3043                  * See libc: readdir, telldir, seekdir
3044                  */
3045                 dp->d_off = offset + 1;
3046                 error = uiomove(dp, reclen, UIO_READ, uio);
3047                 if (error)
3048                         goto out;
3049                 /*
3050                  * Note: uiomove updates uio->uio_offset,
3051                  * but we want it to be our "cookie" value,
3052                  * which just counts dirents ignoring size.
3053                  */
3054                 uio->uio_offset = ++offset;
3055         }
3056 
3057         /*
3058          * If there was a backward seek, we have to reopen.
3059          */
3060         if (offset < np->n_dirofs) {
3061                 SMBVDEBUG("Reopening search %d:%d\n",
3062                     offset, np->n_dirofs);
3063                 error = smbfs_smb_findopen(np, "*", 1,
3064                     SMB_FA_SYSTEM | SMB_FA_HIDDEN | SMB_FA_DIR,
3065                     &scred, &ctx);
3066                 if (error) {
3067                         SMBVDEBUG("can not open search, error = %d", error);
3068                         goto out;
3069                 }
3070                 /* free the old one */
3071                 (void) smbfs_smb_findclose(np->n_dirseq, &scred);
3072                 /* save the new one */
3073                 np->n_dirseq = ctx;
3074                 np->n_dirofs = FIRST_DIROFS;
3075         } else {
3076                 ctx = np->n_dirseq;
3077         }
3078 
3079         /*
3080          * Skip entries before the requested offset.
3081          */
3082         while (np->n_dirofs < offset) {
3083                 error = smbfs_smb_findnext(ctx, limit, &scred);
3084                 if (error != 0)
3085                         goto out;
3086                 np->n_dirofs++;
3087         }
3088 
3089         /*
3090          * While there's room in the caller's buffer:
3091          *      get a directory entry from SMB,
3092          *      convert to a dirent, copyout.
3093          * We stop when there is no longer room for a
3094          * maximum sized dirent because we must decide
3095          * before we know anything about the next entry.
3096          */
3097         while (uio->uio_resid >= dbufsiz) {
3098                 error = smbfs_smb_findnext(ctx, limit, &scred);
3099                 if (error != 0)
3100                         goto out;
3101                 np->n_dirofs++;
3102 
3103                 /* Sanity check the name length. */
3104                 nmlen = ctx->f_nmlen;
3105                 if (nmlen > SMB_MAXFNAMELEN) {
3106                         nmlen = SMB_MAXFNAMELEN;
3107                         SMBVDEBUG("Truncating name: %s\n", ctx->f_name);
3108                 }
3109                 if (smbfs_fastlookup) {
3110                         /* See comment at smbfs_fastlookup above. */
3111                         if (smbfs_nget(vp, ctx->f_name, nmlen,
3112                             &ctx->f_attr, &newvp) == 0)
3113                                 VN_RELE(newvp);
3114                 }
3115 
3116                 reclen = DIRENT64_RECLEN(nmlen);
3117                 bzero(dp, reclen);
3118                 dp->d_reclen = reclen;
3119                 bcopy(ctx->f_name, dp->d_name, nmlen);
3120                 dp->d_name[nmlen] = '\0';
3121                 dp->d_ino = ctx->f_inum;
3122                 dp->d_off = offset + 1;      /* See d_off comment above */
3123                 error = uiomove(dp, reclen, UIO_READ, uio);
3124                 if (error)
3125                         goto out;
3126                 /* See comment re. uio_offset above. */
3127                 uio->uio_offset = ++offset;
3128         }
3129 
3130 out:
3131         /*
3132          * When we come to the end of a directory, the
3133          * SMB-level functions return ENOENT, but the
3134          * caller is not expecting an error return.
3135          *
3136          * Also note that we must delay the call to
3137          * smbfs_smb_findclose(np->n_dirseq, ...)
3138          * until smbfs_close so that all reads at the
3139          * end of the directory will return no data.
3140          */
3141         if (error == ENOENT) {
3142                 error = 0;
3143                 if (eofp)
3144                         *eofp = 1;
3145         }
3146         /*
3147          * If we encountered an error (i.e. "access denied")
3148          * from the FindFirst call, we will have copied out
3149          * the "." and ".." entries leaving offset == 2.
3150          * In that case, restore the original offset/resid
3151          * so the caller gets no data with the error.
3152          */
3153         if (error != 0 && offset == FIRST_DIROFS) {
3154                 uio->uio_loffset = save_offset;
3155                 uio->uio_resid = save_resid;
3156         }
3157         SMBVDEBUG("out: offset=%d, resid=%d\n",
3158             (int)uio->uio_offset, (int)uio->uio_resid);
3159 
3160         kmem_free(dp, dbufsiz);
3161         smb_credrele(&scred);
3162         return (error);
3163 }
3164 
3165 
3166 /*
3167  * The pair of functions VOP_RWLOCK, VOP_RWUNLOCK
3168  * are optional functions that are called by:
3169  *    getdents, before/after VOP_READDIR
3170  *    pread, before/after ... VOP_READ
3171  *    pwrite, before/after ... VOP_WRITE
3172  *    (other places)
3173  *
3174  * Careful here: None of the above check for any
3175  * error returns from VOP_RWLOCK / VOP_RWUNLOCK!
3176  * In fact, the return value from _rwlock is NOT
3177  * an error code, but V_WRITELOCK_TRUE / _FALSE.
3178  *
3179  * Therefore, it's up to _this_ code to make sure
3180  * the lock state remains balanced, which means
3181  * we can't "bail out" on interrupts, etc.
3182  */
3183 
3184 /* ARGSUSED2 */
3185 static int
3186 smbfs_rwlock(vnode_t *vp, int write_lock, caller_context_t *ctp)
3187 {
3188         smbnode_t       *np = VTOSMB(vp);
3189 
3190         if (!write_lock) {
3191                 (void) smbfs_rw_enter_sig(&np->r_rwlock, RW_READER, FALSE);
3192                 return (V_WRITELOCK_FALSE);
3193         }
3194 
3195 
3196         (void) smbfs_rw_enter_sig(&np->r_rwlock, RW_WRITER, FALSE);
3197         return (V_WRITELOCK_TRUE);
3198 }
3199 
3200 /* ARGSUSED */
3201 static void
3202 smbfs_rwunlock(vnode_t *vp, int write_lock, caller_context_t *ctp)
3203 {
3204         smbnode_t       *np = VTOSMB(vp);
3205 
3206         smbfs_rw_exit(&np->r_rwlock);
3207 }
3208 
3209 
3210 /* ARGSUSED */
3211 static int
3212 smbfs_seek(vnode_t *vp, offset_t ooff, offset_t *noffp, caller_context_t *ct)
3213 {
3214         smbmntinfo_t    *smi;
3215 
3216         smi = VTOSMI(vp);
3217 
3218         if (curproc->p_zone != smi->smi_zone_ref.zref_zone)
3219                 return (EPERM);
3220 
3221         if (smi->smi_flags & SMI_DEAD || vp->v_vfsp->vfs_flag & VFS_UNMOUNTED)
3222                 return (EIO);
3223 
3224         /*
3225          * Because we stuff the readdir cookie into the offset field
3226          * someone may attempt to do an lseek with the cookie which
3227          * we want to succeed.
3228          */
3229         if (vp->v_type == VDIR)
3230                 return (0);
3231 
3232         /* Like NFS3, just check for 63-bit overflow. */
3233         if (*noffp < 0)
3234                 return (EINVAL);
3235 
3236         return (0);
3237 }
3238 
3239 
3240 /*
3241  * XXX
3242  * This op may need to support PSARC 2007/440, nbmand changes for CIFS Service.
3243  */
3244 static int
3245 smbfs_frlock(vnode_t *vp, int cmd, struct flock64 *bfp, int flag,
3246         offset_t offset, struct flk_callback *flk_cbp, cred_t *cr,
3247         caller_context_t *ct)
3248 {
3249         if (curproc->p_zone != VTOSMI(vp)->smi_zone_ref.zref_zone)
3250                 return (EIO);
3251 
3252         if (VTOSMI(vp)->smi_flags & SMI_LLOCK)
3253                 return (fs_frlock(vp, cmd, bfp, flag, offset, flk_cbp, cr, ct));
3254         else
3255                 return (ENOSYS);
3256 }
3257 
3258 /*
3259  * Free storage space associated with the specified vnode.  The portion
3260  * to be freed is specified by bfp->l_start and bfp->l_len (already
3261  * normalized to a "whence" of 0).
3262  *
3263  * Called by fcntl(fd, F_FREESP, lkp) for libc:ftruncate, etc.
3264  */
3265 /* ARGSUSED */
3266 static int
3267 smbfs_space(vnode_t *vp, int cmd, struct flock64 *bfp, int flag,
3268         offset_t offset, cred_t *cr, caller_context_t *ct)
3269 {
3270         int             error;
3271         smbmntinfo_t    *smi;
3272 
3273         smi = VTOSMI(vp);
3274 
3275         if (curproc->p_zone != smi->smi_zone_ref.zref_zone)
3276                 return (EIO);
3277 
3278         if (smi->smi_flags & SMI_DEAD || vp->v_vfsp->vfs_flag & VFS_UNMOUNTED)
3279                 return (EIO);
3280 
3281         /* Caller (fcntl) has checked v_type */
3282         ASSERT(vp->v_type == VREG);
3283         if (cmd != F_FREESP)
3284                 return (EINVAL);
3285 
3286         /*
3287          * Like NFS3, no 32-bit offset checks here.
3288          * Our SMB layer takes care to return EFBIG
3289          * when it has to fallback to a 32-bit call.
3290          */
3291 
3292         error = convoff(vp, bfp, 0, offset);
3293         if (!error) {
3294                 ASSERT(bfp->l_start >= 0);
3295                 if (bfp->l_len == 0) {
3296                         struct vattr va;
3297 
3298                         /*
3299                          * ftruncate should not change the ctime and
3300                          * mtime if we truncate the file to its
3301                          * previous size.
3302                          */
3303                         va.va_mask = AT_SIZE;
3304                         error = smbfsgetattr(vp, &va, cr);
3305                         if (error || va.va_size == bfp->l_start)
3306                                 return (error);
3307                         va.va_mask = AT_SIZE;
3308                         va.va_size = bfp->l_start;
3309                         error = smbfssetattr(vp, &va, 0, cr);
3310                 } else
3311                         error = EINVAL;
3312         }
3313 
3314         return (error);
3315 }
3316 
3317 /* ARGSUSED */
3318 static int
3319 smbfs_pathconf(vnode_t *vp, int cmd, ulong_t *valp, cred_t *cr,
3320         caller_context_t *ct)
3321 {
3322         vfs_t *vfs;
3323         smbmntinfo_t *smi;
3324         struct smb_share *ssp;
3325 
3326         vfs = vp->v_vfsp;
3327         smi = VFTOSMI(vfs);
3328 
3329         if (curproc->p_zone != smi->smi_zone_ref.zref_zone)
3330                 return (EIO);
3331 
3332         if (smi->smi_flags & SMI_DEAD || vp->v_vfsp->vfs_flag & VFS_UNMOUNTED)
3333                 return (EIO);
3334 
3335         switch (cmd) {
3336         case _PC_FILESIZEBITS:
3337                 ssp = smi->smi_share;
3338                 if (SSTOVC(ssp)->vc_sopt.sv_caps & SMB_CAP_LARGE_FILES)
3339                         *valp = 64;
3340                 else
3341                         *valp = 32;
3342                 break;
3343 
3344         case _PC_LINK_MAX:
3345                 /* We only ever report one link to an object */
3346                 *valp = 1;
3347                 break;
3348 
3349         case _PC_ACL_ENABLED:
3350                 /*
3351                  * Always indicate that ACLs are enabled and
3352                  * that we support ACE_T format, otherwise
3353                  * libsec will ask for ACLENT_T format data
3354                  * which we don't support.
3355                  */
3356                 *valp = _ACL_ACE_ENABLED;
3357                 break;
3358 
3359         case _PC_SYMLINK_MAX:   /* No symlinks until we do Unix extensions */
3360                 *valp = 0;
3361                 break;
3362 
3363         case _PC_XATTR_EXISTS:
3364                 if (vfs->vfs_flag & VFS_XATTR) {
3365                         *valp = smbfs_xa_exists(vp, cr);
3366                         break;
3367                 }
3368                 return (EINVAL);
3369 
3370         case _PC_TIMESTAMP_RESOLUTION:
3371                 /*
3372                  * Windows times are tenths of microseconds
3373                  * (multiples of 100 nanoseconds).
3374                  */
3375                 *valp = 100L;
3376                 break;
3377 
3378         default:
3379                 return (fs_pathconf(vp, cmd, valp, cr, ct));
3380         }
3381         return (0);
3382 }
3383 
3384 /* ARGSUSED */
3385 static int
3386 smbfs_getsecattr(vnode_t *vp, vsecattr_t *vsa, int flag, cred_t *cr,
3387         caller_context_t *ct)
3388 {
3389         vfs_t *vfsp;
3390         smbmntinfo_t *smi;
3391         int     error;
3392         uint_t  mask;
3393 
3394         vfsp = vp->v_vfsp;
3395         smi = VFTOSMI(vfsp);
3396 
3397         if (curproc->p_zone != smi->smi_zone_ref.zref_zone)
3398                 return (EIO);
3399 
3400         if (smi->smi_flags & SMI_DEAD || vfsp->vfs_flag & VFS_UNMOUNTED)
3401                 return (EIO);
3402 
3403         /*
3404          * Our _pathconf indicates _ACL_ACE_ENABLED,
3405          * so we should only see VSA_ACE, etc here.
3406          * Note: vn_create asks for VSA_DFACLCNT,
3407          * and it expects ENOSYS and empty data.
3408          */
3409         mask = vsa->vsa_mask & (VSA_ACE | VSA_ACECNT |
3410             VSA_ACE_ACLFLAGS | VSA_ACE_ALLTYPES);
3411         if (mask == 0)
3412                 return (ENOSYS);
3413 
3414         if (smi->smi_flags & SMI_ACL)
3415                 error = smbfs_acl_getvsa(vp, vsa, flag, cr);
3416         else
3417                 error = ENOSYS;
3418 
3419         if (error == ENOSYS)
3420                 error = fs_fab_acl(vp, vsa, flag, cr, ct);
3421 
3422         return (error);
3423 }
3424 
3425 /* ARGSUSED */
3426 static int
3427 smbfs_setsecattr(vnode_t *vp, vsecattr_t *vsa, int flag, cred_t *cr,
3428         caller_context_t *ct)
3429 {
3430         vfs_t *vfsp;
3431         smbmntinfo_t *smi;
3432         int     error;
3433         uint_t  mask;
3434 
3435         vfsp = vp->v_vfsp;
3436         smi = VFTOSMI(vfsp);
3437 
3438         if (curproc->p_zone != smi->smi_zone_ref.zref_zone)
3439                 return (EIO);
3440 
3441         if (smi->smi_flags & SMI_DEAD || vfsp->vfs_flag & VFS_UNMOUNTED)
3442                 return (EIO);
3443 
3444         /*
3445          * Our _pathconf indicates _ACL_ACE_ENABLED,
3446          * so we should only see VSA_ACE, etc here.
3447          */
3448         mask = vsa->vsa_mask & (VSA_ACE | VSA_ACECNT);
3449         if (mask == 0)
3450                 return (ENOSYS);
3451 
3452         if (vfsp->vfs_flag & VFS_RDONLY)
3453                 return (EROFS);
3454 
3455         /*
3456          * Allow only the mount owner to do this.
3457          * See comments at smbfs_access_rwx.
3458          */
3459         error = secpolicy_vnode_setdac(cr, smi->smi_uid);
3460         if (error != 0)
3461                 return (error);
3462 
3463         if (smi->smi_flags & SMI_ACL)
3464                 error = smbfs_acl_setvsa(vp, vsa, flag, cr);
3465         else
3466                 error = ENOSYS;
3467 
3468         return (error);
3469 }
3470 
3471 
3472 /*
3473  * XXX
3474  * This op should eventually support PSARC 2007/268.
3475  */
3476 static int
3477 smbfs_shrlock(vnode_t *vp, int cmd, struct shrlock *shr, int flag, cred_t *cr,
3478         caller_context_t *ct)
3479 {
3480         if (curproc->p_zone != VTOSMI(vp)->smi_zone_ref.zref_zone)
3481                 return (EIO);
3482 
3483         if (VTOSMI(vp)->smi_flags & SMI_LLOCK)
3484                 return (fs_shrlock(vp, cmd, shr, flag, cr, ct));
3485         else
3486                 return (ENOSYS);
3487 }
3488 
3489 /* correspond to bp_mapin() in bp_map.c */
3490 static int 
3491 uio_page_mapin(uio_t * uiop, page_t * pp)
3492 {
3493         u_offset_t      off;
3494         size_t          size;
3495         pgcnt_t         npages;
3496         caddr_t         kaddr;
3497         pfn_t           pfnum;
3498 
3499         off = (uintptr_t) uiop->uio_loffset & PAGEOFFSET;
3500         size = P2ROUNDUP(uiop->uio_resid + off, PAGESIZE);
3501         npages = btop(size);
3502 
3503         ASSERT(pp != NULL);
3504 
3505         if (npages == 1 && kpm_enable) {
3506                 kaddr = hat_kpm_mapin(pp, NULL);
3507                 if (kaddr == NULL)
3508                         return (EFAULT);
3509 
3510                 uiop->uio_iov->iov_base = kaddr + off;
3511                 uiop->uio_iov->iov_len = PAGESIZE - off;
3512 
3513         } else {
3514                 kaddr = vmem_xalloc(heap_arena, size, PAGESIZE, 0, 0, NULL, NULL, VM_SLEEP);
3515                 if (kaddr == NULL)
3516                         return (EFAULT);
3517 
3518                 uiop->uio_iov->iov_base = kaddr + off;
3519                 uiop->uio_iov->iov_len = size - off;
3520 
3521                 /* map pages into kaddr */
3522                 uint_t          attr = PROT_READ | PROT_WRITE | HAT_NOSYNC;
3523                 while (npages-- > 0) {
3524                         pfnum = pp->p_pagenum;
3525                         pp = pp->p_next;
3526 
3527                         hat_devload(kas.a_hat, kaddr, PAGESIZE, pfnum, attr, HAT_LOAD_LOCK);
3528                         kaddr += PAGESIZE;
3529                 }
3530         }
3531         return (0);
3532 }
3533 
3534 /* correspond to bp_mapout() in bp_map.c */
3535 static void 
3536 uio_page_mapout(uio_t * uiop, page_t * pp)
3537 {
3538         u_offset_t      off;
3539         size_t          size;
3540         pgcnt_t         npages;
3541         caddr_t         kaddr;
3542 
3543         kaddr = uiop->uio_iov->iov_base;
3544         off = (uintptr_t) kaddr & PAGEOFFSET;
3545         size = P2ROUNDUP(uiop->uio_iov->iov_len + off, PAGESIZE);
3546         npages = btop(size);
3547 
3548         ASSERT(pp != NULL);
3549 
3550         kaddr = (caddr_t) ((uintptr_t) kaddr & MMU_PAGEMASK);
3551 
3552         if (npages == 1 && kpm_enable) {
3553                 hat_kpm_mapout(pp, NULL, kaddr);
3554 
3555         } else {
3556                 hat_unload(kas.a_hat, (void *) kaddr, size,
3557                            HAT_UNLOAD_NOSYNC | HAT_UNLOAD_UNLOCK);
3558                 vmem_free(heap_arena, (void *) kaddr, size);
3559         }
3560         uiop->uio_iov->iov_base = 0;
3561         uiop->uio_iov->iov_len = 0;
3562 }
3563 
3564 static int 
3565 smbfs_map(vnode_t * vp, offset_t off, struct as * as, caddr_t * addrp,
3566        size_t len, uchar_t prot, uchar_t maxprot, uint_t flags, cred_t * cr,
3567           caller_context_t * ct)
3568 {
3569         smbnode_t      *np;
3570         smbmntinfo_t   *smi;
3571         struct vattr    va;
3572         segvn_crargs_t  vn_a;
3573         int             error;
3574 
3575         np = VTOSMB(vp);
3576         smi = VTOSMI(vp);
3577 
3578         if (curproc->p_zone != smi->smi_zone_ref.zref_zone)
3579                 return (EIO);
3580 
3581         if (smi->smi_flags & SMI_DEAD || vp->v_vfsp->vfs_flag & VFS_UNMOUNTED)
3582                 return (EIO);
3583 
3584         if (vp->v_flag & VNOMAP || vp->v_flag & VNOCACHE)
3585                 return (EAGAIN);
3586 
3587         if (vp->v_type != VREG)
3588                 return (ENODEV);
3589 
3590         va.va_mask = AT_ALL;
3591         if (error = smbfsgetattr(vp, &va, cr))
3592                 return (error);
3593 
3594         if (smbfs_rw_enter_sig(&np->r_lkserlock, RW_WRITER, SMBINTR(vp)))
3595                 return (EINTR);
3596 
3597         if (MANDLOCK(vp, va.va_mode)) {
3598                 error = EAGAIN;
3599                 goto out;
3600         }
3601         as_rangelock(as);
3602         error = choose_addr(as, addrp, len, off, ADDR_VACALIGN, flags);
3603 
3604         if (error != 0) {
3605                 as_rangeunlock(as);
3606                 goto out;
3607         }
3608         vn_a.vp = vp;
3609         vn_a.offset = off;
3610         vn_a.type = flags & MAP_TYPE;
3611         vn_a.prot = prot;
3612         vn_a.maxprot = maxprot;
3613         vn_a.flags = flags & ~MAP_TYPE;
3614         vn_a.cred = cr;
3615         vn_a.amp = NULL;
3616         vn_a.szc = 0;
3617         vn_a.lgrp_mem_policy_flags = 0;
3618 
3619         error = as_map(as, *addrp, len, segvn_create, &vn_a);
3620 
3621         as_rangeunlock(as);
3622 
3623 out:
3624         smbfs_rw_exit(&np->r_lkserlock);
3625 
3626         return (error);
3627 }
3628 
3629 static int 
3630 smbfs_addmap(vnode_t * vp, offset_t off, struct as * as, caddr_t addr,
3631        size_t len, uchar_t prot, uchar_t maxprot, uint_t flags, cred_t * cr,
3632              caller_context_t * ct)
3633 {
3634         atomic_add_long((ulong_t *) & VTOSMB(vp)->r_mapcnt, btopr(len));
3635         return (0);
3636 }
3637 
3638 static int 
3639 smbfs_delmap(vnode_t * vp, offset_t off, struct as * as, caddr_t addr,
3640          size_t len, uint_t prot, uint_t maxprot, uint_t flags, cred_t * cr,
3641              caller_context_t * ct)
3642 {
3643 
3644         smbnode_t      *np;
3645 
3646         atomic_add_long((ulong_t *) & VTOSMB(vp)->r_mapcnt, -btopr(len));
3647 
3648         /*
3649          * mark RDIRTY here, will be used to check if a file is dirty when
3650          * unmount smbfs
3651          */
3652         if (vn_has_cached_data(vp) && !vn_is_readonly(vp) && (maxprot & PROT_WRITE)
3653             && (flags == MAP_SHARED)) {
3654                 np = VTOSMB(vp);
3655                 mutex_enter(&np->r_statelock);
3656                 np->r_flags |= RDIRTY;
3657                 mutex_exit(&np->r_statelock);
3658         }
3659         return (0);
3660 }
3661 
3662 static int 
3663 smbfs_putpage(vnode_t * vp, offset_t off, size_t len, int flags,
3664               cred_t * cr, caller_context_t * ct)
3665 {
3666 
3667         smbnode_t      *np;
3668         size_t          io_len;
3669         u_offset_t      io_off;
3670         u_offset_t      eoff;
3671         int             error = 0;
3672         page_t         *pp;
3673         int             rdirty;
3674 
3675         np = VTOSMB(vp);
3676 
3677         if (len == 0) {
3678 
3679                 /* will flush the whole file, so clear RDIRTY */
3680                 if (off == (u_offset_t) 0 && (np->r_flags & RDIRTY)) {
3681                         mutex_enter(&np->r_statelock);
3682                         rdirty = np->r_flags & RDIRTY;
3683                         np->r_flags &= ~RDIRTY;
3684                         mutex_exit(&np->r_statelock);
3685                 } else
3686                         rdirty = 0;
3687 
3688                 error = pvn_vplist_dirty(vp, off, smbfs_putapage, flags, cr);
3689 
3690                 /*
3691                  * if failed and the vnode was dirty before and we aren't
3692                  * forcibly invalidating pages, then mark RDIRTY again.
3693                  */
3694                 if (error && rdirty &&
3695                     (flags & (B_INVAL | B_FORCE)) != (B_INVAL | B_FORCE)) {
3696                         mutex_enter(&np->r_statelock);
3697                         np->r_flags |= RDIRTY;
3698                         mutex_exit(&np->r_statelock);
3699                 }
3700         } else {
3701 
3702                 eoff = off + len;
3703 
3704                 mutex_enter(&np->r_statelock);
3705                 if (eoff > np->r_size)
3706                         eoff = np->r_size;
3707                 mutex_exit(&np->r_statelock);
3708 
3709                 for (io_off = off; io_off < eoff; io_off += io_len) {
3710                         if ((flags & B_INVAL) || (flags & B_ASYNC) == 0) {
3711                                 pp = page_lookup(vp, io_off,
3712                                                  (flags & (B_INVAL | B_FREE) ? SE_EXCL : SE_SHARED));
3713                         } else {
3714                                 pp = page_lookup_nowait(vp, io_off,
3715                                     (flags & B_FREE) ? SE_EXCL : SE_SHARED);
3716                         }
3717 
3718                         if (pp == NULL || !pvn_getdirty(pp, flags))
3719                                 io_len = PAGESIZE;
3720                         else {
3721                                 error = smbfs_putapage(vp, pp, &io_off, &io_len, flags, cr);
3722                         }
3723                 }
3724 
3725         }
3726 
3727         return (error);
3728 }
3729 
3730 static int 
3731 smbfs_putapage(vnode_t * vp, page_t * pp, u_offset_t * offp, size_t * lenp,
3732                int flags, cred_t * cr)
3733 {
3734 
3735         struct smb_cred scred;
3736         smbnode_t      *np;
3737         smbmntinfo_t   *smi;
3738         smb_share_t    *ssp;
3739         uio_t           uio;
3740         iovec_t         uiov, uiov_bak;
3741 
3742         size_t          io_len;
3743         u_offset_t      io_off;
3744         size_t          limit;
3745         size_t          bsize;
3746         size_t          blksize;
3747         u_offset_t      blkoff;
3748         int             error;
3749 
3750         np = VTOSMB(vp);
3751         smi = VTOSMI(vp);
3752         ssp = smi->smi_share;
3753 
3754         /* do block io, get a kluster of dirty pages in a block. */
3755         bsize = MAX(vp->v_vfsp->vfs_bsize, PAGESIZE);
3756         blkoff = pp->p_offset / bsize;
3757         blkoff *= bsize;
3758         blksize = roundup(bsize, PAGESIZE);
3759 
3760         pp = pvn_write_kluster(vp, pp, &io_off, &io_len, blkoff, blksize, flags);
3761 
3762         ASSERT(pp->p_offset >= blkoff);
3763 
3764         if (io_off + io_len > blkoff + blksize) {
3765                 ASSERT((io_off + io_len) - (blkoff + blksize) < PAGESIZE);
3766         }
3767 
3768         /* Don't allow put pages beyond EOF */
3769         mutex_enter(&np->r_statelock);
3770         limit=MIN(np->r_size, blkoff + blksize);
3771         mutex_exit(&np->r_statelock);
3772 
3773         if (io_off >= limit) {
3774                 error = 0;
3775                 goto out;
3776         } else if (io_off + io_len > limit) {
3777                 int             npages = btopr(limit - io_off);
3778                 page_t         *trunc;
3779                 page_list_break(&pp, &trunc, npages);
3780                 if (trunc)
3781                         pvn_write_done(trunc, flags);
3782                 io_len = limit - io_off;
3783         }
3784 
3785         /*
3786          * Taken from NFS4. The RMODINPROGRESS flag makes sure that
3787          * smbfs_putapage() sees a consistent value of r_size. RMODINPROGRESS
3788          * is set in writenp(). When RMODINPROGRESS is set it indicates that
3789          * a uiomove() is in progress and the r_size has not been made
3790          * consistent with the new size of the file. When the uiomove()
3791          * completes the r_size is updated and the RMODINPROGRESS flag is
3792          * cleared.
3793          * 
3794          * The RMODINPROGRESS flag makes sure that smbfs_putapage() sees a
3795          * consistent value of r_size. Without this handshaking, it is
3796          * possible that smbfs_putapage() picks  up the old value of r_size
3797          * before the uiomove() in writenp() completes. This will result in
3798          * the write through smbfs_putapage() being dropped.
3799          * 
3800          * More precisely, there is a window between the time the uiomove()
3801          * completes and the time the r_size is updated. If a VOP_PUTPAGE()
3802          * operation intervenes in this window, the page will be picked up,
3803          * because it is dirty (it will be unlocked, unless it was
3804          * pagecreate'd). When the page is picked up as dirty, the dirty bit
3805          * is reset (pvn_getdirty()). In smbfs_putapage(), r_size is checked.
3806          * This will still be the old size. Therefore the page will not be
3807          * written out. When segmap_release() calls VOP_PUTPAGE(), the page
3808          * will be found to be clean and the write will be dropped.
3809          */
3810         if (np->r_flags & RMODINPROGRESS) {
3811 
3812                 mutex_enter(&np->r_statelock);
3813                 if ((np->r_flags & RMODINPROGRESS) &&
3814                     np->r_modaddr + MAXBSIZE > io_off &&
3815                     np->r_modaddr < io_off + io_len) {
3816                         page_t         *plist;
3817                         /*
3818                          * A write is in progress for this region of the
3819                          * file. If we did not detect RMODINPROGRESS here,
3820                          * the data beyond the file size won't be write out.
3821                          * We end up losing data. So we decide to set the
3822                          * modified bit on each page in the page list and
3823                          * mark the smbnode with RDIRTY. This write will be
3824                          * restarted at some later time.
3825                          */
3826                         plist = pp;
3827                         while (plist != NULL) {
3828                                 pp = plist;
3829                                 page_sub(&plist, pp);
3830                                 hat_setmod(pp);
3831                                 page_io_unlock(pp);
3832                                 page_unlock(pp);
3833                         }
3834                         np->r_flags |= RDIRTY;
3835                         mutex_exit(&np->r_statelock);
3836                         if (offp)
3837                                 *offp = io_off;
3838                         if (lenp)
3839                                 *lenp = io_len;
3840                         return (0);
3841                 }
3842                 mutex_exit(&np->r_statelock);
3843         }
3844 
3845         if (smbfs_rw_enter_sig(&np->r_lkserlock, RW_READER, SMBINTR(vp)))
3846                 return (EINTR);
3847         smb_credinit(&scred, cr);
3848 
3849         if (np->n_vcgenid != ssp->ss_vcgenid)
3850                 error = ESTALE;
3851         else {
3852                 /* just use uio instead of buf, since smb_rwuio need uio. */
3853                 uiov.iov_base = 0;
3854                 uiov.iov_len = 0;
3855                 uio.uio_iov = &uiov;
3856                 uio.uio_iovcnt = 1;
3857                 uio.uio_loffset = io_off;
3858                 uio.uio_resid = io_len;
3859                 uio.uio_segflg = UIO_SYSSPACE;
3860                 uio.uio_llimit = MAXOFFSET_T;
3861                 /* map pages into kernel address space, and setup uio. */
3862                 error = uio_page_mapin(&uio, pp);
3863                 if (error == 0) {
3864                         uiov_bak.iov_base = uiov.iov_base;
3865                         uiov_bak.iov_len = uiov.iov_len;
3866                         error = smb_rwuio(ssp, np->n_fid, UIO_WRITE, &uio, &scred, smb_timo_write);
3867                         if (error == 0) {
3868                                 mutex_enter(&np->r_statelock);
3869                                 np->n_flag |= (NFLUSHWIRE | NATTRCHANGED);
3870                                 mutex_exit(&np->r_statelock);
3871                                 (void) smbfs_smb_flush(np, &scred);
3872                         }
3873                         /* unmap pages from kernel address space. */
3874                         uio.uio_iov = &uiov_bak;
3875                         uio_page_mapout(&uio, pp);
3876                 }
3877         }
3878 
3879         smb_credrele(&scred);
3880         smbfs_rw_exit(&np->r_lkserlock);
3881 
3882 out:
3883         pvn_write_done(pp, ((error) ? B_ERROR : 0) | B_WRITE | flags);
3884 
3885         if (offp)
3886                 *offp = io_off;
3887         if (lenp)
3888                 *lenp = io_len;
3889 
3890         return (error);
3891 }
3892 
3893 static int 
3894 smbfs_getpage(vnode_t * vp, offset_t off, size_t len, uint_t * protp,
3895               page_t * pl[], size_t plsz, struct seg * seg, caddr_t addr,
3896               enum seg_rw rw, cred_t * cr, caller_context_t * ct)
3897 {
3898 
3899         smbnode_t      *np;
3900         int             error;
3901 
3902         /* these pages have all protections. */
3903         if (protp)
3904                 *protp = PROT_ALL;
3905 
3906         np = VTOSMB(vp);
3907 
3908         /* Don't allow get pages beyond EOF, unless it's segkmap. */
3909         mutex_enter(&np->r_statelock);
3910         if (off + len > np->r_size + PAGESIZE && seg != segkmap){
3911                 mutex_exit(&np->r_statelock);
3912                 return (EFAULT);
3913         }
3914         mutex_exit(&np->r_statelock);
3915 
3916         if (len <= PAGESIZE) {
3917                 error = smbfs_getapage(vp, off, len, protp, pl, plsz, seg, addr, rw,
3918                                        cr);
3919         } else {
3920                 error = pvn_getpages(smbfs_getapage, vp, off, len, protp, pl, plsz, seg,
3921                                      addr, rw, cr);
3922         }
3923 
3924         return (error);
3925 }
3926 
3927 static int 
3928 smbfs_getapage(vnode_t * vp, u_offset_t off, size_t len,
3929  uint_t * protp, page_t * pl[], size_t plsz, struct seg * seg, caddr_t addr,
3930                enum seg_rw rw, cred_t * cr)
3931 {
3932 
3933         smbnode_t      *np;
3934         smbmntinfo_t   *smi;
3935         smb_share_t    *ssp;
3936         smb_cred_t      scred;
3937 
3938         page_t         *pp;
3939         uio_t           uio;
3940         iovec_t         uiov, uiov_bak;
3941 
3942         u_offset_t      blkoff;
3943         size_t          bsize;
3944         size_t          blksize;
3945 
3946         u_offset_t      io_off;
3947         size_t          io_len;
3948         size_t          pages_len;
3949 
3950         int             error = 0;
3951 
3952         np = VTOSMB(vp);
3953         smi = VTOSMI(vp);
3954         ssp = smi->smi_share;
3955 
3956         /* if pl is null,it's meaningless */
3957         if (pl == NULL)
3958                 return (EFAULT);
3959 
3960 again:
3961         if (page_exists(vp, off) == NULL) {
3962                 if (rw == S_CREATE) {
3963                         /* just return a empty page if asked to create. */
3964                         if ((pp = page_create_va(vp, off, PAGESIZE, PG_WAIT | PG_EXCL, seg, addr)) == NULL)
3965                                 goto again;
3966                         pages_len = PAGESIZE;
3967                 } else {
3968 
3969                         /*
3970                          * do block io, get a kluster of non-exist pages in a
3971                          * block.
3972                          */
3973                         bsize = MAX(vp->v_vfsp->vfs_bsize, PAGESIZE);
3974                         blkoff = off / bsize;
3975                         blkoff *= bsize;
3976                         blksize = roundup(bsize, PAGESIZE);
3977 
3978                         pp = pvn_read_kluster(vp, off, seg, addr, &io_off, &io_len, blkoff, blksize, 0);
3979 
3980                         if (pp == NULL)
3981                                 goto again;
3982 
3983                         pages_len = io_len;
3984 
3985                         /* Don't need to get pages from server if it's segkmap 
3986                          * that reads beyond EOF. */
3987                         mutex_enter(&np->r_statelock);
3988                         if (io_off >= np->r_size && seg == segkmap) {
3989                                 mutex_exit(&np->r_statelock);
3990                                 error = 0;
3991                                 goto out;
3992                         } else if (io_off + io_len > np->r_size) {
3993                                 int             npages = btopr(np->r_size - io_off);
3994                                 page_t         *trunc;
3995 
3996                                 page_list_break(&pp, &trunc, npages);
3997                                 if (trunc)
3998                                         pvn_read_done(trunc, 0);
3999                                 io_len = np->r_size - io_off;
4000                         }
4001                         mutex_exit(&np->r_statelock);
4002 
4003                         if (smbfs_rw_enter_sig(&np->r_lkserlock, RW_READER, SMBINTR(vp)))
4004                                 return EINTR;
4005                         smb_credinit(&scred, cr);
4006 
4007                         /*
4008                          * just use uio instead of buf, since smb_rwuio need
4009                          * uio.
4010                          */
4011                         uiov.iov_base = 0;
4012                         uiov.iov_len = 0;
4013                         uio.uio_iov = &uiov;
4014                         uio.uio_iovcnt = 1;
4015                         uio.uio_loffset = io_off;
4016                         uio.uio_resid = io_len;
4017                         uio.uio_segflg = UIO_SYSSPACE;
4018                         uio.uio_llimit = MAXOFFSET_T;
4019 
4020                         /*
4021                          * map pages into kernel address space, and setup
4022                          * uio.
4023                          */
4024                         error = uio_page_mapin(&uio, pp);
4025                         if (error == 0) {
4026                                 uiov_bak.iov_base = uiov.iov_base;
4027                                 uiov_bak.iov_len = uiov.iov_len;
4028                                 error = smb_rwuio(ssp, np->n_fid, UIO_READ, &uio, &scred, smb_timo_read);
4029                                 /* unmap pages from kernel address space. */
4030                                 uio.uio_iov = &uiov_bak;
4031                                 uio_page_mapout(&uio, pp);
4032                         }
4033                         smb_credrele(&scred);
4034                         smbfs_rw_exit(&np->r_lkserlock);
4035                 }
4036         } else {
4037                 se_t            se = rw == S_CREATE ? SE_EXCL : SE_SHARED;
4038                 if ((pp = page_lookup(vp, off, se)) == NULL) {
4039                         goto again;
4040                 }
4041         }
4042 
4043 out:
4044         if (pp) {
4045                 if (error) {
4046                         pvn_read_done(pp, B_ERROR);
4047                 } else {
4048                         /* init page list, unlock pages. */
4049                         pvn_plist_init(pp, pl, plsz, off, pages_len, rw);
4050                 }
4051         }
4052         return (error);
4053 }
4054 
4055 /* correspond to nfs_invalidate_pages() in nfs_client.c */
4056 void 
4057 smbfs_invalidate_pages(vnode_t * vp, u_offset_t off, cred_t * cr)
4058 {
4059 
4060         smbnode_t      *np;
4061 
4062         np = VTOSMB(vp);
4063         /* will flush the whole file, so clear RDIRTY */
4064         if (off == (u_offset_t) 0 && (np->r_flags & RDIRTY)) {
4065                 mutex_enter(&np->r_statelock);
4066                 np->r_flags &= ~RDIRTY;
4067                 mutex_exit(&np->r_statelock);
4068         }
4069         (void) pvn_vplist_dirty(vp, off, smbfs_putapage, B_INVAL | B_TRUNC, cr);
4070 }