Print this page
cstyle
Fix missing logic for ESTALE and NFS_EOF
Implement ioctl _FIODIRECTIO
Kill flags arg in smbfs_purge_caches
Lots of comment cleanup
5404 smbfs needs mmap support
Portions contributed by: Gordon Ross <gordon.w.ross@gmail.com>

@@ -34,10 +34,17 @@
 
 /*
  * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
  */
 
+/*
+ * Vnode operations
+ *
+ * This file is similar to nfs3_vnops.c
+ */
+
+#include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/cred.h>
 #include <sys/vnode.h>
 #include <sys/vfs.h>
 #include <sys/filio.h>

@@ -48,11 +55,23 @@
 #include <sys/sysmacros.h>
 #include <sys/kmem.h>
 #include <sys/cmn_err.h>
 #include <sys/vfs_opreg.h>
 #include <sys/policy.h>
+#include <sys/sdt.h>
+#include <sys/zone.h>
+#include <sys/vmsystm.h>
 
+#include <vm/hat.h>
+#include <vm/as.h>
+#include <vm/page.h>
+#include <vm/pvn.h>
+#include <vm/seg.h>
+#include <vm/seg_map.h>
+#include <vm/seg_kpm.h>
+#include <vm/seg_vn.h>
+
 #include <netsmb/smb_osdep.h>
 #include <netsmb/smb.h>
 #include <netsmb/smb_conn.h>
 #include <netsmb/smb_subr.h>
 

@@ -99,10 +118,12 @@
  * during directory listings, normally avoiding a second
  * OtW attribute fetch just after a readdir.
  */
 int smbfs_fastlookup = 1;
 
+struct vnodeops *smbfs_vnodeops = NULL;
+
 /* local static function defines */
 
 static int      smbfslookup_cache(vnode_t *, char *, int, vnode_t **,
                         cred_t *);
 static int      smbfslookup(vnode_t *dvp, char *nm, vnode_t **vpp, cred_t *cr,

@@ -116,11 +137,36 @@
 static int      smbfs_readvdir(vnode_t *vp, uio_t *uio, cred_t *cr, int *eofp,
                         caller_context_t *);
 static void     smbfs_rele_fid(smbnode_t *, struct smb_cred *);
 static uint32_t xvattr_to_dosattr(smbnode_t *, struct vattr *);
 
+static int      smbfs_rdwrlbn(vnode_t *, page_t *, u_offset_t, size_t, int,
+                        cred_t *);
+static int      smbfs_bio(struct buf *, int, cred_t *);
+static int      smbfs_writenp(smbnode_t *np, caddr_t base, int tcount,
+                        struct uio *uiop, int pgcreated);
+
+static int      smbfs_fsync(vnode_t *, int, cred_t *, caller_context_t *);
+static int      smbfs_putpage(vnode_t *, offset_t, size_t, int, cred_t *,
+                        caller_context_t *);
+static int      smbfs_getapage(vnode_t *, u_offset_t, size_t, uint_t *,
+                        page_t *[], size_t, struct seg *, caddr_t,
+                        enum seg_rw, cred_t *);
+static int      smbfs_putapage(vnode_t *, page_t *, u_offset_t *, size_t *,
+                        int, cred_t *);
+static void     smbfs_delmap_callback(struct as *, void *, uint_t);
+
 /*
+ * Error flags used to pass information about certain special errors
+ * which need to be handled specially.
+ */
+#define SMBFS_EOF                       -98
+
+/* When implementing OtW locks, make this a real function. */
+#define smbfs_lm_has_sleep(vp) 0
+
+/*
  * These are the vnode ops routines which implement the vnode interface to
  * the networked file system.  These routines just take their parameters,
  * make them look networkish by putting the right info into interface structs,
  * and then calling the appropriate remote routine(s) to do the work.
  *

@@ -128,113 +174,12 @@
  * we purge the directory cache relative to that vnode.  This way, the
  * user won't get burned by the cache repeatedly.  See <smbfs/smbnode.h> for
  * more details on smbnode locking.
  */
 
-static int      smbfs_open(vnode_t **, int, cred_t *, caller_context_t *);
-static int      smbfs_close(vnode_t *, int, int, offset_t, cred_t *,
-                        caller_context_t *);
-static int      smbfs_read(vnode_t *, struct uio *, int, cred_t *,
-                        caller_context_t *);
-static int      smbfs_write(vnode_t *, struct uio *, int, cred_t *,
-                        caller_context_t *);
-static int      smbfs_ioctl(vnode_t *, int, intptr_t, int, cred_t *, int *,
-                        caller_context_t *);
-static int      smbfs_getattr(vnode_t *, struct vattr *, int, cred_t *,
-                        caller_context_t *);
-static int      smbfs_setattr(vnode_t *, struct vattr *, int, cred_t *,
-                        caller_context_t *);
-static int      smbfs_access(vnode_t *, int, int, cred_t *, caller_context_t *);
-static int      smbfs_fsync(vnode_t *, int, cred_t *, caller_context_t *);
-static void     smbfs_inactive(vnode_t *, cred_t *, caller_context_t *);
-static int      smbfs_lookup(vnode_t *, char *, vnode_t **, struct pathname *,
-                        int, vnode_t *, cred_t *, caller_context_t *,
-                        int *, pathname_t *);
-static int      smbfs_create(vnode_t *, char *, struct vattr *, enum vcexcl,
-                        int, vnode_t **, cred_t *, int, caller_context_t *,
-                        vsecattr_t *);
-static int      smbfs_remove(vnode_t *, char *, cred_t *, caller_context_t *,
-                        int);
-static int      smbfs_rename(vnode_t *, char *, vnode_t *, char *, cred_t *,
-                        caller_context_t *, int);
-static int      smbfs_mkdir(vnode_t *, char *, struct vattr *, vnode_t **,
-                        cred_t *, caller_context_t *, int, vsecattr_t *);
-static int      smbfs_rmdir(vnode_t *, char *, vnode_t *, cred_t *,
-                        caller_context_t *, int);
-static int      smbfs_readdir(vnode_t *, struct uio *, cred_t *, int *,
-                        caller_context_t *, int);
-static int      smbfs_rwlock(vnode_t *, int, caller_context_t *);
-static void     smbfs_rwunlock(vnode_t *, int, caller_context_t *);
-static int      smbfs_seek(vnode_t *, offset_t, offset_t *, caller_context_t *);
-static int      smbfs_frlock(vnode_t *, int, struct flock64 *, int, offset_t,
-                        struct flk_callback *, cred_t *, caller_context_t *);
-static int      smbfs_space(vnode_t *, int, struct flock64 *, int, offset_t,
-                        cred_t *, caller_context_t *);
-static int      smbfs_pathconf(vnode_t *, int, ulong_t *, cred_t *,
-                        caller_context_t *);
-static int      smbfs_setsecattr(vnode_t *, vsecattr_t *, int, cred_t *,
-                        caller_context_t *);
-static int      smbfs_getsecattr(vnode_t *, vsecattr_t *, int, cred_t *,
-                        caller_context_t *);
-static int      smbfs_shrlock(vnode_t *, int, struct shrlock *, int, cred_t *,
-                        caller_context_t *);
 
-/* Dummy function to use until correct function is ported in */
-int noop_vnodeop() {
-        return (0);
-}
-
-struct vnodeops *smbfs_vnodeops = NULL;
-
 /*
- * Most unimplemented ops will return ENOSYS because of fs_nosys().
- * The only ops where that won't work are ACCESS (due to open(2)
- * failures) and ... (anything else left?)
- */
-const fs_operation_def_t smbfs_vnodeops_template[] = {
-        { VOPNAME_OPEN,         { .vop_open = smbfs_open } },
-        { VOPNAME_CLOSE,        { .vop_close = smbfs_close } },
-        { VOPNAME_READ,         { .vop_read = smbfs_read } },
-        { VOPNAME_WRITE,        { .vop_write = smbfs_write } },
-        { VOPNAME_IOCTL,        { .vop_ioctl = smbfs_ioctl } },
-        { VOPNAME_GETATTR,      { .vop_getattr = smbfs_getattr } },
-        { VOPNAME_SETATTR,      { .vop_setattr = smbfs_setattr } },
-        { VOPNAME_ACCESS,       { .vop_access = smbfs_access } },
-        { VOPNAME_LOOKUP,       { .vop_lookup = smbfs_lookup } },
-        { VOPNAME_CREATE,       { .vop_create = smbfs_create } },
-        { VOPNAME_REMOVE,       { .vop_remove = smbfs_remove } },
-        { VOPNAME_LINK,         { .error = fs_nosys } }, /* smbfs_link, */
-        { VOPNAME_RENAME,       { .vop_rename = smbfs_rename } },
-        { VOPNAME_MKDIR,        { .vop_mkdir = smbfs_mkdir } },
-        { VOPNAME_RMDIR,        { .vop_rmdir = smbfs_rmdir } },
-        { VOPNAME_READDIR,      { .vop_readdir = smbfs_readdir } },
-        { VOPNAME_SYMLINK,      { .error = fs_nosys } }, /* smbfs_symlink, */
-        { VOPNAME_READLINK,     { .error = fs_nosys } }, /* smbfs_readlink, */
-        { VOPNAME_FSYNC,        { .vop_fsync = smbfs_fsync } },
-        { VOPNAME_INACTIVE,     { .vop_inactive = smbfs_inactive } },
-        { VOPNAME_FID,          { .error = fs_nosys } }, /* smbfs_fid, */
-        { VOPNAME_RWLOCK,       { .vop_rwlock = smbfs_rwlock } },
-        { VOPNAME_RWUNLOCK,     { .vop_rwunlock = smbfs_rwunlock } },
-        { VOPNAME_SEEK,         { .vop_seek = smbfs_seek } },
-        { VOPNAME_FRLOCK,       { .vop_frlock = smbfs_frlock } },
-        { VOPNAME_SPACE,        { .vop_space = smbfs_space } },
-        { VOPNAME_REALVP,       { .error = fs_nosys } }, /* smbfs_realvp, */
-        { VOPNAME_GETPAGE,      { .error = fs_nosys } }, /* smbfs_getpage, */
-        { VOPNAME_PUTPAGE,      { .error = fs_nosys } }, /* smbfs_putpage, */
-        { VOPNAME_MAP,          { .error = fs_nosys } }, /* smbfs_map, */
-        { VOPNAME_ADDMAP,       { .error = fs_nosys } }, /* smbfs_addmap, */
-        { VOPNAME_DELMAP,       { .error = fs_nosys } }, /* smbfs_delmap, */
-        { VOPNAME_DUMP,         { .error = fs_nosys } }, /* smbfs_dump, */
-        { VOPNAME_PATHCONF,     { .vop_pathconf = smbfs_pathconf } },
-        { VOPNAME_PAGEIO,       { .error = fs_nosys } }, /* smbfs_pageio, */
-        { VOPNAME_SETSECATTR,   { .vop_setsecattr = smbfs_setsecattr } },
-        { VOPNAME_GETSECATTR,   { .vop_getsecattr = smbfs_getsecattr } },
-        { VOPNAME_SHRLOCK,      { .vop_shrlock = smbfs_shrlock } },
-        { NULL, NULL }
-};
-
-/*
  * XXX
  * When new and relevant functionality is enabled, we should be
  * calling vfs_set_feature() to inform callers that pieces of
  * functionality are available, per PSARC 2007/227.
  */

@@ -282,11 +227,10 @@
         /*
          * Keep track of the vnode type at first open.
          * It may change later, and we need close to do
          * cleanup for the type we opened.  Also deny
          * open of new types until old type is closed.
-         * XXX: Per-open instance nodes whould help.
          */
         if (np->n_ovtype == VNON) {
                 ASSERT(np->n_dirrefs == 0);
                 ASSERT(np->n_fidrefs == 0);
         } else if (np->n_ovtype != vp->v_type) {

@@ -421,10 +365,11 @@
         caller_context_t *ct)
 {
         smbnode_t       *np;
         smbmntinfo_t    *smi;
         struct smb_cred scred;
+        int error = 0;
 
         np = VTOSMB(vp);
         smi = VTOSMI(vp);
 
         /*

@@ -468,20 +413,46 @@
         if (smi->smi_flags & SMI_LLOCK) {
                 pid_t pid = ddi_get_pid();
                 cleanlocks(vp, pid, 0);
                 cleanshares(vp, pid);
         }
+        /*
+         * else doing OtW locking.  SMB servers drop all locks
+         * on the file ID we close here, so no _lockrelease()
+         */
 
         /*
          * This (passed in) count is the ref. count from the
          * user's file_t before the closef call (fio.c).
-         * We only care when the reference goes away.
+         * The rest happens only on last close.
          */
         if (count > 1)
                 return (0);
 
+        /* NFS has DNLC purge here. */
+
         /*
+         * If the file was open for write and there are pages,
+         * then make sure dirty pages written back.
+         *
+         * NFS does this async when "close-to-open" is off
+         * (MI_NOCTO flag is set) to avoid blocking the caller.
+         * For now, always do this synchronously (no B_ASYNC).
+         */
+        if ((flag & FWRITE) && vn_has_cached_data(vp)) {
+                error = smbfs_putpage(vp, (offset_t)0, 0, 0, cr, ct);
+                if (error == EAGAIN)
+                        error = 0;
+        }
+        if (error == 0) {
+                mutex_enter(&np->r_statelock);
+                np->r_flags &= ~RSTALE;
+                np->r_error = 0;
+                mutex_exit(&np->r_statelock);
+        }
+
+        /*
          * Decrement the reference count for the FID
          * and possibly do the OtW close.
          *
          * Exclusive lock for modifying n_fid stuff.
          * Don't want this one ever interruptible.

@@ -588,10 +559,16 @@
         smb_share_t     *ssp;
         offset_t        endoff;
         ssize_t         past_eof;
         int             error;
 
+        caddr_t         base;
+        u_offset_t      off;
+        size_t          n;
+        int             on;
+        uint_t          flags;
+
         np = VTOSMB(vp);
         smi = VTOSMI(vp);
         ssp = smi->smi_share;
 
         if (curproc->p_zone != smi->smi_zone_ref.zref_zone)

@@ -637,10 +614,20 @@
                 past_eof = (ssize_t)(endoff - va.va_size);
                 uiop->uio_resid -= past_eof;
         } else
                 past_eof = 0;
 
+        /*
+         * Bypass VM if caching has been disabled (e.g., locking) or if
+         * using client-side direct I/O and the file is not mmap'd and
+         * there are no cached pages.
+         */
+        if ((vp->v_flag & VNOCACHE) ||
+            (((np->r_flags & RDIRECTIO) || (smi->smi_flags & SMI_DIRECTIO)) &&
+            np->r_mapcnt == 0 && np->r_inmap == 0 &&
+            !vn_has_cached_data(vp))) {
+
         /* Shared lock for n_fid use in smb_rwuio */
         if (smbfs_rw_enter_sig(&np->r_lkserlock, RW_READER, SMBINTR(vp)))
                 return (EINTR);
         smb_credinit(&scred, cr);
 

@@ -656,10 +643,67 @@
 
         /* undo adjustment of resid */
         uiop->uio_resid += past_eof;
 
         return (error);
+        }
+
+        /* (else) Do I/O through segmap. */
+        do {
+                off = uiop->uio_loffset & MAXBMASK; /* mapping offset */
+                on = uiop->uio_loffset & MAXBOFFSET; /* Relative offset */
+                n = MIN(MAXBSIZE - on, uiop->uio_resid);
+
+                error = smbfs_validate_caches(vp, cr);
+                if (error)
+                        break;
+
+                /* NFS waits for RINCACHEPURGE here. */
+
+                if (vpm_enable) {
+                        /*
+                         * Copy data.
+                         */
+                        error = vpm_data_copy(vp, off + on, n, uiop,
+                            1, NULL, 0, S_READ);
+                } else {
+                        base = segmap_getmapflt(segkmap, vp, off + on, n, 1,
+                            S_READ);
+
+                        error = uiomove(base + on, n, UIO_READ, uiop);
+                }
+
+                if (!error) {
+                        /*
+                         * If read a whole block or read to eof,
+                         * won't need this buffer again soon.
+                         */
+                        mutex_enter(&np->r_statelock);
+                        if (n + on == MAXBSIZE ||
+                            uiop->uio_loffset == np->r_size)
+                                flags = SM_DONTNEED;
+                        else
+                                flags = 0;
+                        mutex_exit(&np->r_statelock);
+                        if (vpm_enable) {
+                                error = vpm_sync_pages(vp, off, n, flags);
+                        } else {
+                                error = segmap_release(segkmap, base, flags);
+                        }
+                } else {
+                        if (vpm_enable) {
+                                (void) vpm_sync_pages(vp, off, n, 0);
+                        } else {
+                                (void) segmap_release(segkmap, base, 0);
+                        }
+                }
+        } while (!error && uiop->uio_resid > 0);
+
+        /* undo adjustment of resid */
+        uiop->uio_resid += past_eof;
+
+        return (error);
 }
 
 
 /* ARGSUSED */
 static int

@@ -672,10 +716,18 @@
         smbmntinfo_t    *smi;
         smb_share_t     *ssp;
         offset_t        endoff, limit;
         ssize_t         past_limit;
         int             error, timo;
+        caddr_t         base;
+        u_offset_t      off;
+        size_t          n;
+        int             on;
+        uint_t          flags;
+        u_offset_t      last_off;
+        size_t          last_resid;
+        uint_t          bsize;
 
         np = VTOSMB(vp);
         smi = VTOSMI(vp);
         ssp = smi->smi_share;
 

@@ -697,16 +749,18 @@
          * Handle ioflag bits: (FAPPEND|FSYNC|FDSYNC)
          */
         if (ioflag & (FAPPEND | FSYNC)) {
                 if (np->n_flag & NMODIFIED) {
                         smbfs_attrcache_remove(np);
-                        /* XXX: smbfs_vinvalbuf? */
                 }
         }
         if (ioflag & FAPPEND) {
                 /*
                  * File size can be changed by another client
+                 *
+                 * Todo: Consider redesigning this to use a
+                 * handle opened for append instead.
                  */
                 va.va_mask = AT_SIZE;
                 if (error = smbfsgetattr(vp, &va, cr))
                         return (error);
                 uiop->uio_loffset = va.va_size;

@@ -726,23 +780,54 @@
          * reaches the limit will be short and the next write
          * will return an error.
          *
          * So if we're starting at or beyond the limit, EFBIG.
          * Otherwise, temporarily reduce resid to the amount
-         * the falls after the limit.
+         * that is after the limit.
          */
         limit = uiop->uio_llimit;
         if (limit == RLIM64_INFINITY || limit > MAXOFFSET_T)
                 limit = MAXOFFSET_T;
-        if (uiop->uio_loffset >= limit)
+        if (uiop->uio_loffset >= limit) {
+                proc_t *p = ttoproc(curthread);
+
+                mutex_enter(&p->p_lock);
+                (void) rctl_action(rctlproc_legacy[RLIMIT_FSIZE],
+                    p->p_rctls, p, RCA_UNSAFE_SIGINFO);
+                mutex_exit(&p->p_lock);
                 return (EFBIG);
+        }
         if (endoff > limit) {
                 past_limit = (ssize_t)(endoff - limit);
                 uiop->uio_resid -= past_limit;
         } else
                 past_limit = 0;
 
+        /*
+         * Bypass VM if caching has been disabled (e.g., locking) or if
+         * using client-side direct I/O and the file is not mmap'd and
+         * there are no cached pages.
+         */
+        if ((vp->v_flag & VNOCACHE) ||
+            (((np->r_flags & RDIRECTIO) || (smi->smi_flags & SMI_DIRECTIO)) &&
+            np->r_mapcnt == 0 && np->r_inmap == 0 &&
+            !vn_has_cached_data(vp))) {
+
+smbfs_fwrite:
+                if (np->r_flags & RSTALE) {
+                        last_resid = uiop->uio_resid;
+                        last_off = uiop->uio_loffset;
+                        error = np->r_error;
+                        /*
+                         * A close may have cleared r_error, if so,
+                         * propagate ESTALE error return properly
+                         */
+                        if (error == 0)
+                                error = ESTALE;
+                        goto bottom;
+                }
+
         /* Timeout: longer for append. */
         timo = smb_timo_write;
         if (endoff > np->r_size)
                 timo = smb_timo_append;
 

@@ -762,11 +847,11 @@
                 mutex_enter(&np->r_statelock);
                 np->n_flag |= (NFLUSHWIRE | NATTRCHANGED);
                 if (uiop->uio_loffset > (offset_t)np->r_size)
                         np->r_size = (len_t)uiop->uio_loffset;
                 mutex_exit(&np->r_statelock);
-                if (ioflag & (FSYNC|FDSYNC)) {
+                        if (ioflag & (FSYNC | FDSYNC)) {
                         /* Don't error the I/O if this fails. */
                         (void) smbfs_smb_flush(np, &scred);
                 }
         }
 

@@ -775,13 +860,514 @@
 
         /* undo adjustment of resid */
         uiop->uio_resid += past_limit;
 
         return (error);
+        }
+
+        /* (else) Do I/O through segmap. */
+        bsize = vp->v_vfsp->vfs_bsize;
+
+        do {
+                off = uiop->uio_loffset & MAXBMASK; /* mapping offset */
+                on = uiop->uio_loffset & MAXBOFFSET; /* Relative offset */
+                n = MIN(MAXBSIZE - on, uiop->uio_resid);
+
+                last_resid = uiop->uio_resid;
+                last_off = uiop->uio_loffset;
+
+                if (np->r_flags & RSTALE) {
+                        error = np->r_error;
+                        /*
+                         * A close may have cleared r_error, if so,
+                         * propagate ESTALE error return properly
+                         */
+                        if (error == 0)
+                                error = ESTALE;
+                        break;
+                }
+
+                /*
+                 * From NFS: Don't create dirty pages faster than they
+                 * can be cleaned.
+                 *
+                 * Here NFS also checks for async writes (np->r_awcount)
+                 */
+                mutex_enter(&np->r_statelock);
+                while (np->r_gcount > 0) {
+                        if (SMBINTR(vp)) {
+                                klwp_t *lwp = ttolwp(curthread);
+
+                                if (lwp != NULL)
+                                        lwp->lwp_nostop++;
+                                if (!cv_wait_sig(&np->r_cv, &np->r_statelock)) {
+                                        mutex_exit(&np->r_statelock);
+                                        if (lwp != NULL)
+                                                lwp->lwp_nostop--;
+                                        error = EINTR;
+                                        goto bottom;
+                                }
+                                if (lwp != NULL)
+                                        lwp->lwp_nostop--;
+                        } else
+                                cv_wait(&np->r_cv, &np->r_statelock);
+                }
+                mutex_exit(&np->r_statelock);
+
+                /*
+                 * Touch the page and fault it in if it is not in core
+                 * before segmap_getmapflt or vpm_data_copy can lock it.
+                 * This is to avoid the deadlock if the buffer is mapped
+                 * to the same file through mmap which we want to write.
+                 */
+                uio_prefaultpages((long)n, uiop);
+
+                if (vpm_enable) {
+                        /*
+                         * It will use kpm mappings, so no need to
+                         * pass an address.
+                         */
+                        error = smbfs_writenp(np, NULL, n, uiop, 0);
+                } else {
+                        if (segmap_kpm) {
+                                int pon = uiop->uio_loffset & PAGEOFFSET;
+                                size_t pn = MIN(PAGESIZE - pon,
+                                    uiop->uio_resid);
+                                int pagecreate;
+
+                                mutex_enter(&np->r_statelock);
+                                pagecreate = (pon == 0) && (pn == PAGESIZE ||
+                                    uiop->uio_loffset + pn >= np->r_size);
+                                mutex_exit(&np->r_statelock);
+
+                                base = segmap_getmapflt(segkmap, vp, off + on,
+                                    pn, !pagecreate, S_WRITE);
+
+                                error = smbfs_writenp(np, base + pon, n, uiop,
+                                    pagecreate);
+
+                        } else {
+                                base = segmap_getmapflt(segkmap, vp, off + on,
+                                    n, 0, S_READ);
+                                error = smbfs_writenp(np, base + on, n, uiop, 0);
+                        }
+                }
+
+                if (!error) {
+                        if (smi->smi_flags & SMI_NOAC)
+                                flags = SM_WRITE;
+                        else if ((uiop->uio_loffset % bsize) == 0 ||
+                            IS_SWAPVP(vp)) {
+                                /*
+                                 * Have written a whole block.
+                                 * Start an asynchronous write
+                                 * and mark the buffer to
+                                 * indicate that it won't be
+                                 * needed again soon.
+                                 */
+                                flags = SM_WRITE | SM_ASYNC | SM_DONTNEED;
+                        } else
+                                flags = 0;
+                        if ((ioflag & (FSYNC|FDSYNC)) ||
+                            (np->r_flags & ROUTOFSPACE)) {
+                                flags &= ~SM_ASYNC;
+                                flags |= SM_WRITE;
+                        }
+                        if (vpm_enable) {
+                                error = vpm_sync_pages(vp, off, n, flags);
+                        } else {
+                                error = segmap_release(segkmap, base, flags);
+                        }
+                } else {
+                        if (vpm_enable) {
+                                (void) vpm_sync_pages(vp, off, n, 0);
+                        } else {
+                                (void) segmap_release(segkmap, base, 0);
+                        }
+                        /*
+                         * In the event that we got an access error while
+                         * faulting in a page for a write-only file just
+                         * force a write.
+                         */
+                        if (error == EACCES)
+                                goto smbfs_fwrite;
+                }
+        } while (!error && uiop->uio_resid > 0);
+
+bottom:
+        /* undo adjustment of resid */
+        if (error) {
+                uiop->uio_resid = last_resid + past_limit;
+                uiop->uio_loffset = last_off;
+        } else {
+                uiop->uio_resid += past_limit;
+        }
+
+        return (error);
 }
 
+/*
+ * Like nfs_client.c: writerp()
+ *
+ * Write by creating pages and uiomove data onto them.
+ */
 
+int
+smbfs_writenp(smbnode_t *np, caddr_t base, int tcount, struct uio *uio,
+    int pgcreated)
+{
+        int             pagecreate;
+        int             n;
+        int             saved_n;
+        caddr_t         saved_base;
+        u_offset_t      offset;
+        int             error;
+        int             sm_error;
+        vnode_t         *vp = SMBTOV(np);
+
+        ASSERT(tcount <= MAXBSIZE && tcount <= uio->uio_resid);
+        ASSERT(smbfs_rw_lock_held(&np->r_rwlock, RW_WRITER));
+        if (!vpm_enable) {
+                ASSERT(((uintptr_t)base & MAXBOFFSET) + tcount <= MAXBSIZE);
+        }
+
+        /*
+         * Move bytes in at most PAGESIZE chunks. We must avoid
+         * spanning pages in uiomove() because page faults may cause
+         * the cache to be invalidated out from under us. The r_size is not
+         * updated until after the uiomove. If we push the last page of a
+         * file before r_size is correct, we will lose the data written past
+         * the current (and invalid) r_size.
+         */
+        do {
+                offset = uio->uio_loffset;
+                pagecreate = 0;
+
+                /*
+                 * n is the number of bytes required to satisfy the request
+                 *   or the number of bytes to fill out the page.
+                 */
+                n = (int)MIN((PAGESIZE - (offset & PAGEOFFSET)), tcount);
+
+                /*
+                 * Check to see if we can skip reading in the page
+                 * and just allocate the memory.  We can do this
+                 * if we are going to rewrite the entire mapping
+                 * or if we are going to write to or beyond the current
+                 * end of file from the beginning of the mapping.
+                 *
+                 * The read of r_size is now protected by r_statelock.
+                 */
+                mutex_enter(&np->r_statelock);
+                /*
+                 * When pgcreated is nonzero the caller has already done
+                 * a segmap_getmapflt with forcefault 0 and S_WRITE. With
+                 * segkpm this means we already have at least one page
+                 * created and mapped at base.
+                 */
+                pagecreate = pgcreated ||
+                    ((offset & PAGEOFFSET) == 0 &&
+                    (n == PAGESIZE || ((offset + n) >= np->r_size)));
+
+                mutex_exit(&np->r_statelock);
+                if (!vpm_enable && pagecreate) {
+                        /*
+                         * The last argument tells segmap_pagecreate() to
+                         * always lock the page, as opposed to sometimes
+                         * returning with the page locked. This way we avoid a
+                         * fault on the ensuing uiomove(), but also
+                         * more importantly (to fix bug 1094402) we can
+                         * call segmap_fault() to unlock the page in all
+                         * cases. An alternative would be to modify
+                         * segmap_pagecreate() to tell us when it is
+                         * locking a page, but that's a fairly major
+                         * interface change.
+                         */
+                        if (pgcreated == 0)
+                                (void) segmap_pagecreate(segkmap, base,
+                                    (uint_t)n, 1);
+                        saved_base = base;
+                        saved_n = n;
+                }
+
+                /*
+                 * The number of bytes of data in the last page can not
+                 * be accurately be determined while page is being
+                 * uiomove'd to and the size of the file being updated.
+                 * Thus, inform threads which need to know accurately
+                 * how much data is in the last page of the file.  They
+                 * will not do the i/o immediately, but will arrange for
+                 * the i/o to happen later when this modify operation
+                 * will have finished.
+                 */
+                ASSERT(!(np->r_flags & RMODINPROGRESS));
+                mutex_enter(&np->r_statelock);
+                np->r_flags |= RMODINPROGRESS;
+                np->r_modaddr = (offset & MAXBMASK);
+                mutex_exit(&np->r_statelock);
+
+                if (vpm_enable) {
+                        /*
+                         * Copy data. If new pages are created, part of
+                         * the page that is not written will be initizliazed
+                         * with zeros.
+                         */
+                        error = vpm_data_copy(vp, offset, n, uio,
+                            !pagecreate, NULL, 0, S_WRITE);
+                } else {
+                        error = uiomove(base, n, UIO_WRITE, uio);
+                }
+
+                /*
+                 * r_size is the maximum number of
+                 * bytes known to be in the file.
+                 * Make sure it is at least as high as the
+                 * first unwritten byte pointed to by uio_loffset.
+                 */
+                mutex_enter(&np->r_statelock);
+                if (np->r_size < uio->uio_loffset)
+                        np->r_size = uio->uio_loffset;
+                np->r_flags &= ~RMODINPROGRESS;
+                np->r_flags |= RDIRTY;
+                mutex_exit(&np->r_statelock);
+
+                /* n = # of bytes written */
+                n = (int)(uio->uio_loffset - offset);
+
+                if (!vpm_enable) {
+                        base += n;
+                }
+                tcount -= n;
+                /*
+                 * If we created pages w/o initializing them completely,
+                 * we need to zero the part that wasn't set up.
+                 * This happens on a most EOF write cases and if
+                 * we had some sort of error during the uiomove.
+                 */
+                if (!vpm_enable && pagecreate) {
+                        if ((uio->uio_loffset & PAGEOFFSET) || n == 0)
+                                (void) kzero(base, PAGESIZE - n);
+
+                        if (pgcreated) {
+                                /*
+                                 * Caller is responsible for this page,
+                                 * it was not created in this loop.
+                                 */
+                                pgcreated = 0;
+                        } else {
+                                /*
+                                 * For bug 1094402: segmap_pagecreate locks
+                                 * page. Unlock it. This also unlocks the
+                                 * pages allocated by page_create_va() in
+                                 * segmap_pagecreate().
+                                 */
+                                sm_error = segmap_fault(kas.a_hat, segkmap,
+                                    saved_base, saved_n,
+                                    F_SOFTUNLOCK, S_WRITE);
+                                if (error == 0)
+                                        error = sm_error;
+                        }
+                }
+        } while (tcount > 0 && error == 0);
+
+        return (error);
+}
+
+/*
+ * Flags are composed of {B_ASYNC, B_INVAL, B_FREE, B_DONTNEED}
+ * Like nfs3_rdwrlbn()
+ */
+static int
+smbfs_rdwrlbn(vnode_t *vp, page_t *pp, u_offset_t off, size_t len,
+        int flags, cred_t *cr)
+{
+        smbmntinfo_t    *smi = VTOSMI(vp);
+        struct buf *bp;
+        int error;
+        int sync;
+
+        if (curproc->p_zone != smi->smi_zone_ref.zref_zone)
+                return (EIO);
+
+        if (smi->smi_flags & SMI_DEAD || vp->v_vfsp->vfs_flag & VFS_UNMOUNTED)
+                return (EIO);
+
+        bp = pageio_setup(pp, len, vp, flags);
+        ASSERT(bp != NULL);
+
+        /*
+         * pageio_setup should have set b_addr to 0.  This
+         * is correct since we want to do I/O on a page
+         * boundary.  bp_mapin will use this addr to calculate
+         * an offset, and then set b_addr to the kernel virtual
+         * address it allocated for us.
+         */
+        ASSERT(bp->b_un.b_addr == 0);
+
+        bp->b_edev = 0;
+        bp->b_dev = 0;
+        bp->b_lblkno = lbtodb(off);
+        bp->b_file = vp;
+        bp->b_offset = (offset_t)off;
+        bp_mapin(bp);
+
+        /*
+         * Calculate the desired level of stability to write data.
+         */
+        if ((flags & (B_WRITE|B_ASYNC)) == (B_WRITE|B_ASYNC) &&
+            freemem > desfree) {
+                sync = 0;
+        } else {
+                sync = 1;
+        }
+
+        error = smbfs_bio(bp, sync, cr);
+
+        bp_mapout(bp);
+        pageio_done(bp);
+
+        return (error);
+}
+
+
+/*
+ * Corresponds to nfs3_vnopc.c : nfs3_bio(), though the NFS code
+ * uses nfs3read()/nfs3write() where we use smb_rwuio().  Also,
+ * NFS has this later in the file.  Move it up here closer to
+ * the one call site just above.
+ */
+
+static int
+smbfs_bio(struct buf *bp, int sync, cred_t *cr)
+{
+        struct iovec aiov[1];
+        struct uio  auio;
+        struct smb_cred scred;
+        smbnode_t *np = VTOSMB(bp->b_vp);
+        smbmntinfo_t *smi = np->n_mount;
+        smb_share_t *ssp = smi->smi_share;
+        offset_t offset;
+        offset_t endoff;
+        size_t count;
+        size_t past_eof;
+        int error;
+
+        ASSERT(curproc->p_zone == smi->smi_zone_ref.zref_zone);
+
+        offset = ldbtob(bp->b_lblkno);
+        count = bp->b_bcount;
+        endoff = offset + count;
+        if (offset < 0 || endoff < 0)
+                return (EINVAL);
+
+        /*
+         * Limit file I/O to the remaining file size, but see
+         * the notes in smbfs_getpage about SMBFS_EOF.
+         */
+        mutex_enter(&np->r_statelock);
+        if (offset >= np->r_size) {
+                mutex_exit(&np->r_statelock);
+                if (bp->b_flags & B_READ) {
+                        return (SMBFS_EOF);
+                } else {
+                        return (EINVAL);
+                }
+        }
+        if (endoff > np->r_size) {
+                past_eof = (size_t)(endoff - np->r_size);
+                count -= past_eof;
+        } else
+                past_eof = 0;
+        mutex_exit(&np->r_statelock);
+        ASSERT(count > 0);
+
+        /* Caller did bpmapin().  Mapped address is... */
+        aiov[0].iov_base = bp->b_un.b_addr;
+        aiov[0].iov_len = count;
+        auio.uio_iov = aiov;
+        auio.uio_iovcnt = 1;
+        auio.uio_loffset = offset;
+        auio.uio_segflg = UIO_SYSSPACE;
+        auio.uio_fmode = 0;
+        auio.uio_resid = count;
+
+        /* Shared lock for n_fid use in smb_rwuio */
+        if (smbfs_rw_enter_sig(&np->r_lkserlock, RW_READER,
+            smi->smi_flags & SMI_INT))
+                return (EINTR);
+        smb_credinit(&scred, cr);
+
+        DTRACE_IO1(start, struct buf *, bp);
+
+        if (bp->b_flags & B_READ) {
+
+                /* After reconnect, n_fid is invalid */
+                if (np->n_vcgenid != ssp->ss_vcgenid)
+                        error = ESTALE;
+                else
+                        error = smb_rwuio(ssp, np->n_fid, UIO_READ,
+                            &auio, &scred, smb_timo_read);
+
+                /* Like NFS, only set b_error here. */
+                bp->b_error = error;
+                bp->b_resid = auio.uio_resid;
+
+                if (!error && auio.uio_resid != 0)
+                        error = EIO;
+                if (!error && past_eof != 0) {
+                        /* Zero the memory beyond EOF. */
+                        bzero(bp->b_un.b_addr + count, past_eof);
+                }
+        } else {
+
+                /* After reconnect, n_fid is invalid */
+                if (np->n_vcgenid != ssp->ss_vcgenid)
+                        error = ESTALE;
+                else
+                        error = smb_rwuio(ssp, np->n_fid, UIO_WRITE,
+                            &auio, &scred, smb_timo_write);
+
+                /* Like NFS, only set b_error here. */
+                bp->b_error = error;
+                bp->b_resid = auio.uio_resid;
+
+                if (!error && auio.uio_resid != 0)
+                        error = EIO;
+                if (!error && sync) {
+                        (void) smbfs_smb_flush(np, &scred);
+                }
+        }
+
+        /*
+         * This comes from nfs3_commit()
+         */
+        if (error != 0) {
+                mutex_enter(&np->r_statelock);
+                if (error == ESTALE)
+                        np->r_flags |= RSTALE;
+                if (!np->r_error)
+                        np->r_error = error;
+                mutex_exit(&np->r_statelock);
+                bp->b_flags |= B_ERROR;
+        }
+
+        DTRACE_IO1(done, struct buf *, bp);
+
+        smb_credrele(&scred);
+        smbfs_rw_exit(&np->r_lkserlock);
+
+        if (error == ESTALE)
+                smbfs_attrcache_remove(np);
+
+        return (error);
+}
+
+/*
+ * Here NFS has: nfs3write, nfs3read
+ * We use smb_rwuio instead.
+ */
+
 /* ARGSUSED */
 static int
 smbfs_ioctl(vnode_t *vp, int cmd, intptr_t arg, int flag,
         cred_t *cr, int *rvalp, caller_context_t *ct)
 {

@@ -795,11 +1381,10 @@
 
         if (smi->smi_flags & SMI_DEAD || vp->v_vfsp->vfs_flag & VFS_UNMOUNTED)
                 return (EIO);
 
         switch (cmd) {
-                /* First three from ZFS. XXX - need these? */
 
         case _FIOFFS:
                 error = smbfs_fsync(vp, 0, cr, ct);
                 break;
 

@@ -810,14 +1395,18 @@
         case _FIOGDIO:
         case _FIOSDIO:
                 error = 0;
                 break;
 
-#ifdef NOT_YET  /* XXX - from the NFS code. */
+#if 0   /* Todo - SMB ioctl query regions */
+        case _FIO_SEEK_DATA:
+        case _FIO_SEEK_HOLE:
+#endif
+
         case _FIODIRECTIO:
                 error = smbfs_directio(vp, (int)arg, cr);
-#endif
+                break;
 
                 /*
                  * Allow get/set with "raw" security descriptor (SD) data.
                  * Useful for testing, diagnosing idmap problems, etc.
                  */

@@ -847,10 +1436,11 @@
 smbfs_getattr(vnode_t *vp, struct vattr *vap, int flags, cred_t *cr,
         caller_context_t *ct)
 {
         smbnode_t *np;
         smbmntinfo_t *smi;
+        int error;
 
         smi = VTOSMI(vp);
 
         if (curproc->p_zone != smi->smi_zone_ref.zref_zone)
                 return (EIO);

@@ -881,10 +1471,34 @@
                         mutex_exit(&np->r_statelock);
                         return (0);
                 }
         }
 
+        /*
+         * Only need to flush pages if asking for the mtime
+         * and if there any dirty pages.
+         *
+         * Here NFS also checks for async writes (np->r_awcount)
+         */
+        if (vap->va_mask & AT_MTIME) {
+                if (vn_has_cached_data(vp) &&
+                    ((np->r_flags & RDIRTY) != 0)) {
+                        mutex_enter(&np->r_statelock);
+                        np->r_gcount++;
+                        mutex_exit(&np->r_statelock);
+                        error = smbfs_putpage(vp, (offset_t)0, 0, 0, cr, ct);
+                        mutex_enter(&np->r_statelock);
+                        if (error && (error == ENOSPC || error == EDQUOT)) {
+                                if (!np->r_error)
+                                        np->r_error = error;
+                        }
+                        if (--np->r_gcount == 0)
+                                cv_broadcast(&np->r_cv);
+                        mutex_exit(&np->r_statelock);
+                }
+        }
+
         return (smbfsgetattr(vp, vap, cr));
 }
 
 /* smbfsgetattr() in smbfs_client.c */
 

@@ -951,11 +1565,18 @@
                          * the rest of the setattr work.
                          */
                 }
         }
 
-        return (smbfssetattr(vp, vap, flags, cr));
+        error = smbfssetattr(vp, vap, flags, cr);
+
+#ifdef  SMBFS_VNEVENT
+        if (error == 0 && (vap->va_mask & AT_SIZE) && vap->va_size == 0)
+                vnevent_truncate(vp, ct);
+#endif
+
+        return (error);
 }
 
 /*
  * Mostly from Darwin smbfs_setattr()
  * but then modified a lot.

@@ -989,10 +1610,35 @@
                         SMBVDEBUG("ignore set time on xattr\n");
                 mask &= AT_SIZE;
         }
 
         /*
+         * Only need to flush pages if there are any pages and
+         * if the file is marked as dirty in some fashion.  The
+         * file must be flushed so that we can accurately
+         * determine the size of the file and the cached data
+         * after the SETATTR returns.  A file is considered to
+         * be dirty if it is either marked with RDIRTY, has
+         * outstanding i/o's active, or is mmap'd.  In this
+         * last case, we can't tell whether there are dirty
+         * pages, so we flush just to be sure.
+         */
+        if (vn_has_cached_data(vp) &&
+            ((np->r_flags & RDIRTY) ||
+            np->r_count > 0 ||
+            np->r_mapcnt > 0)) {
+                ASSERT(vp->v_type != VCHR);
+                error = smbfs_putpage(vp, (offset_t)0, 0, 0, cr, NULL);
+                if (error && (error == ENOSPC || error == EDQUOT)) {
+                        mutex_enter(&np->r_statelock);
+                        if (!np->r_error)
+                                np->r_error = error;
+                        mutex_exit(&np->r_statelock);
+                }
+        }
+
+        /*
          * If our caller is trying to set multiple attributes, they
          * can make no assumption about what order they are done in.
          * Here we try to do them in order of decreasing likelihood
          * of failure, just to minimize the chance we'll wind up
          * with a partially complete request.

@@ -1050,12 +1696,10 @@
         if (mask & AT_SIZE) {
                 /*
                  * If the new file size is less than what the client sees as
                  * the file size, then just change the size and invalidate
                  * the pages.
-                 * I am commenting this code at present because the function
-                 * smbfs_putapage() is not yet implemented.
                  */
 
                 /*
                  * Set the file size to vap->va_size.
                  */

@@ -1066,26 +1710,22 @@
                             error, np->n_rpath);
                 } else {
                         /*
                          * Darwin had code here to zero-extend.
                          * Tests indicate the server will zero-fill,
-                         * so looks like we don't need to do this.
-                         * Good thing, as this could take forever.
-                         *
-                         * XXX: Reportedly, writing one byte of zero
-                         * at the end offset avoids problems here.
+                         * so looks like we don't need to do that.
                          */
                         mutex_enter(&np->r_statelock);
                         np->r_size = vap->va_size;
                         mutex_exit(&np->r_statelock);
                         modified = 1;
                 }
         }
 
         /*
-         * XXX: When Solaris has create_time, set that too.
-         * Note: create_time is different from ctime.
+         * Todo: Implement setting create_time (which is
+         * different from ctime).
          */
         mtime = ((mask & AT_MTIME) ? &vap->va_mtime : 0);
         atime = ((mask & AT_ATIME) ? &vap->va_atime : 0);
 
         if (dosattr || mtime || atime) {

@@ -1102,18 +1742,10 @@
                         modified = 1;
                 }
         }
 
 out:
-        if (modified) {
-                /*
-                 * Invalidate attribute cache in case the server
-                 * doesn't set exactly the attributes we asked.
-                 */
-                smbfs_attrcache_remove(np);
-        }
-
         if (have_fid) {
                 cerror = smbfs_smb_tmpclose(np, fid, &scred);
                 if (cerror)
                         SMBVDEBUG("error %d closing %s\n",
                             cerror, np->n_rpath);

@@ -1120,10 +1752,35 @@
         }
 
         smb_credrele(&scred);
         smbfs_rw_exit(&np->r_lkserlock);
 
+        if (modified) {
+                /*
+                 * Invalidate attribute cache in case the server
+                 * doesn't set exactly the attributes we asked.
+                 */
+                smbfs_attrcache_remove(np);
+
+                /*
+                 * If changing the size of the file, invalidate
+                 * any local cached data which is no longer part
+                 * of the file.  We also possibly invalidate the
+                 * last page in the file.  We could use
+                 * pvn_vpzero(), but this would mark the page as
+                 * modified and require it to be written back to
+                 * the server for no particularly good reason.
+                 * This way, if we access it, then we bring it
+                 * back in.  A read should be cheaper than a
+                 * write.
+                 */
+                if (mask & AT_SIZE) {
+                        smbfs_invalidate_pages(vp,
+                            (vap->va_size & PAGEMASK), cr);
+                }
+        }
+
         return (error);
 }
 
 /*
  * Helper function for extensible system attributes (PSARC 2007/315)

@@ -1206,14 +1863,10 @@
  *
  * We still (sort of) need a vnode when we call
  * secpolicy_vnode_access, but that only uses
  * the vtype field, so we can use a pair of fake
  * vnodes that have only v_type filled in.
- *
- * XXX: Later, add a new secpolicy_vtype_access()
- * that takes the vtype instead of a vnode, and
- * get rid of the tmpl_vxxx fake vnodes below.
  */
 static int
 smbfs_access_rwx(vfs_t *vfsp, int vtype, int mode, cred_t *cr)
 {
         /* See the secpolicy call below. */

@@ -1224,12 +1877,10 @@
         struct smbmntinfo *smi = VFTOSMI(vfsp);
         int shift = 0;
 
         /*
          * Build our (fabricated) vnode attributes.
-         * XXX: Could make these templates in the
-         * per-mount struct and use them here.
          */
         bzero(&va, sizeof (va));
         va.va_mask = AT_TYPE | AT_MODE | AT_UID | AT_GID;
         va.va_type = vtype;
         va.va_mode = (vtype == VDIR) ?

@@ -1250,11 +1901,10 @@
                 return (EROFS);
 
         /*
          * Disallow attempts to access mandatory lock files.
          * Similarly, expand MANDLOCK here.
-         * XXX: not sure we need this.
          */
         if ((mode & (VWRITE | VREAD | VEXEC)) &&
             va.va_type == VREG && MANDMODE(va.va_mode))
                 return (EACCES);
 

@@ -1320,10 +1970,19 @@
 
         return (smbfs_access_rwx(vfsp, vp->v_type, mode, cr));
 }
 
 
+/* ARGSUSED */
+static int
+smbfs_readlink(vnode_t *vp, struct uio *uiop, cred_t *cr, caller_context_t *ct)
+{
+        /* Not yet... */
+        return (ENOSYS);
+}
+
+
 /*
  * Flush local dirty pages to stable storage on the server.
  *
  * If FNODSYNC is specified, then there is nothing to do because
  * metadata changes are not cached on the client before being

@@ -1351,10 +2010,14 @@
                 return (0);
 
         if ((syncflag & (FSYNC|FDSYNC)) == 0)
                 return (0);
 
+        error = smbfs_putpage(vp, (offset_t)0, 0, 0, cr, ct);
+        if (error)
+                return (error);
+
         /* Shared lock for n_fid use in _flush */
         if (smbfs_rw_enter_sig(&np->r_lkserlock, RW_READER, SMBINTR(vp)))
                 return (EINTR);
         smb_credinit(&scred, cr);
 

@@ -1371,30 +2034,62 @@
  */
 /* ARGSUSED */
 static void
 smbfs_inactive(vnode_t *vp, cred_t *cr, caller_context_t *ct)
 {
-        smbnode_t       *np;
         struct smb_cred scred;
+        smbnode_t       *np = VTOSMB(vp);
+        int error;
 
         /*
          * Don't "bail out" for VFS_UNMOUNTED here,
          * as we want to do cleanup, etc.
          * See also pcfs_inactive
          */
 
-        np = VTOSMB(vp);
-
         /*
          * If this is coming from the wrong zone, we let someone in the right
          * zone take care of it asynchronously.  We can get here due to
          * VN_RELE() being called from pageout() or fsflush().  This call may
          * potentially turn into an expensive no-op if, for instance, v_count
          * gets incremented in the meantime, but it's still correct.
          */
 
         /*
+         * From NFS:rinactive()
+         *
+         * Before freeing anything, wait until all asynchronous
+         * activity is done on this rnode.  This will allow all
+         * asynchronous read ahead and write behind i/o's to
+         * finish.
+         */
+        mutex_enter(&np->r_statelock);
+        while (np->r_count > 0)
+                cv_wait(&np->r_cv, &np->r_statelock);
+        mutex_exit(&np->r_statelock);
+
+        /*
+         * Flush and invalidate all pages associated with the vnode.
+         */
+        if (vn_has_cached_data(vp)) {
+                if ((np->r_flags & RDIRTY) && !np->r_error) {
+                        error = smbfs_putpage(vp, (u_offset_t)0, 0, 0, cr, ct);
+                        if (error && (error == ENOSPC || error == EDQUOT)) {
+                                mutex_enter(&np->r_statelock);
+                                if (!np->r_error)
+                                        np->r_error = error;
+                                mutex_exit(&np->r_statelock);
+                        }
+                }
+                smbfs_invalidate_pages(vp, (u_offset_t)0, cr);
+        }
+        /*
+         * This vnode should have lost all cached data.
+         */
+        ASSERT(vn_has_cached_data(vp) == 0);
+
+        /*
          * Defend against the possibility that higher-level callers
          * might not correctly balance open and close calls.  If we
          * get here with open references remaining, it means there
          * was a missing VOP_CLOSE somewhere.  If that happens, do
          * the close here so we don't "leak" FIDs on the server.

@@ -1538,12 +2233,10 @@
         supplen = 255;
 #endif
 
         /*
          * RWlock must be held, either reader or writer.
-         * XXX: Can we check without looking directly
-         * inside the struct smbfs_rwlock_t?
          */
         ASSERT(dnp->r_rwlock.count != 0);
 
         /*
          * If lookup is for "", just return dvp.

@@ -1586,11 +2279,11 @@
                 return (ENAMETOOLONG);
 
         /*
          * Avoid surprises with characters that are
          * illegal in Windows file names.
-         * Todo: CATIA mappings  XXX
+         * Todo: CATIA mappings?
          */
         ill = illegal_chars;
         if (dnp->n_flag & N_XATTR)
                 ill++; /* allow colon */
         if (strpbrk(nm, ill))

@@ -1807,10 +2500,11 @@
 #endif
         *vpp = vp;
         return (0);
 }
 
+
 /*
  * XXX
  * vsecattr_t is new to build 77, and we need to eventually support
  * it in order to create an ACL when an object is created.
  *

@@ -1825,13 +2519,11 @@
 {
         int             error;
         int             cerror;
         vfs_t           *vfsp;
         vnode_t         *vp;
-#ifdef NOT_YET
         smbnode_t       *np;
-#endif
         smbnode_t       *dnp;
         smbmntinfo_t    *smi;
         struct vattr    vattr;
         struct smbfattr fattr;
         struct smb_cred scred;

@@ -1853,11 +2545,11 @@
                 return (EIO);
 
         /*
          * Note: this may break mknod(2) calls to create a directory,
          * but that's obscure use.  Some other filesystems do this.
-         * XXX: Later, redirect VDIR type here to _mkdir.
+         * Todo: redirect VDIR type here to _mkdir.
          */
         if (va->va_type != VREG)
                 return (EINVAL);
 
         /*

@@ -1918,22 +2610,39 @@
                 }
 
                 /*
                  * Truncate (if requested).
                  */
-                if ((vattr.va_mask & AT_SIZE) && vattr.va_size == 0) {
+                if ((vattr.va_mask & AT_SIZE) && vp->v_type == VREG) {
+                        np = VTOSMB(vp);
+                        /*
+                         * Check here for large file truncation by
+                         * LF-unaware process, like ufs_create().
+                         */
+                        if (!(lfaware & FOFFMAX)) {
+                                mutex_enter(&np->r_statelock);
+                                if (np->r_size > MAXOFF32_T)
+                                        error = EOVERFLOW;
+                                mutex_exit(&np->r_statelock);
+                        }
+                        if (error) {
+                                VN_RELE(vp);
+                                goto out;
+                        }
                         vattr.va_mask = AT_SIZE;
                         error = smbfssetattr(vp, &vattr, 0, cr);
                         if (error) {
                                 VN_RELE(vp);
                                 goto out;
                         }
-                }
-                /* Success! */
-#ifdef NOT_YET
+#ifdef  SMBFS_VNEVENT
+                        /* Existing file was truncated */
                 vnevent_create(vp, ct);
 #endif
+                        /* invalidate pages done in smbfssetattr() */
+                }
+                /* Success! */
                 *vpp = vp;
                 goto out;
         }
 
         /*

@@ -1978,40 +2687,10 @@
             disp, &scred, &fid);
         if (error)
                 goto out;
 
         /*
-         * XXX: Missing some code here to deal with
-         * the case where we opened an existing file,
-         * it's size is larger than 32-bits, and we're
-         * setting the size from a process that's not
-         * aware of large file offsets.  i.e.
-         * from the NFS3 code:
-         */
-#if NOT_YET /* XXX */
-        if ((vattr.va_mask & AT_SIZE) &&
-            vp->v_type == VREG) {
-                np = VTOSMB(vp);
-                /*
-                 * Check here for large file handled
-                 * by LF-unaware process (as
-                 * ufs_create() does)
-                 */
-                if (!(lfaware & FOFFMAX)) {
-                        mutex_enter(&np->r_statelock);
-                        if (np->r_size > MAXOFF32_T)
-                                error = EOVERFLOW;
-                        mutex_exit(&np->r_statelock);
-                }
-                if (!error) {
-                        vattr.va_mask = AT_SIZE;
-                        error = smbfssetattr(vp,
-                            &vattr, 0, cr);
-                }
-        }
-#endif /* XXX */
-        /*
          * Should use the fid to get/set the size
          * while we have it opened here.  See above.
          */
 
         cerror = smbfs_smb_close(smi->smi_share, fid, NULL, &scred);

@@ -2037,12 +2716,10 @@
 
         error = smbfs_nget(dvp, name, nmlen, &fattr, &vp);
         if (error)
                 goto out;
 
-        /* XXX invalidate pages if we truncated? */
-
         /* Success! */
         *vpp = vp;
         error = 0;
 
 out:

@@ -2144,17 +2821,32 @@
 
         /* Never allow link/unlink directories on SMB. */
         if (vp->v_type == VDIR)
                 return (EPERM);
 
+        /*
+         * We need to flush any dirty pages which happen to
+         * be hanging around before removing the file.  This
+         * shouldn't happen very often and mostly on file
+         * systems mounted "nocto".
+         */
+        if (vn_has_cached_data(vp) &&
+            ((np->r_flags & RDIRTY) || np->r_count > 0)) {
+                error = smbfs_putpage(vp, (offset_t)0, 0, 0,
+                    scred->scr_cred, NULL);
+                if (error && (error == ENOSPC || error == EDQUOT)) {
+                        mutex_enter(&np->r_statelock);
+                        if (!np->r_error)
+                                np->r_error = error;
+                        mutex_exit(&np->r_statelock);
+                }
+        }
+
         /* Shared lock for n_fid use in smbfs_smb_setdisp etc. */
         if (smbfs_rw_enter_sig(&np->r_lkserlock, RW_READER, SMBINTR(vp)))
                 return (EINTR);
 
-        /* Force lookup to go OtW */
-        smbfs_attrcache_remove(np);
-
         /*
          * Get a file handle with delete access.
          * Close this FID before return.
          */
         error = smbfs_smb_tmpopen(np, STD_RIGHT_DELETE_ACCESS,

@@ -2213,10 +2905,14 @@
                 goto out;
         }
         /* Done! */
         smbfs_attrcache_prune(np);
 
+#ifdef  SMBFS_VNEVENT
+        vnevent_remove(vp, dvp, nm, ct);
+#endif
+
 out:
         if (tmpname != NULL)
                 kmem_free(tmpname, MAXNAMELEN);
 
         if (have_fid)

@@ -2230,10 +2926,20 @@
 
         return (error);
 }
 
 
+/* ARGSUSED */
+static int
+smbfs_link(vnode_t *tdvp, vnode_t *svp, char *tnm, cred_t *cr,
+        caller_context_t *ct, int flags)
+{
+        /* Not yet... */
+        return (ENOSYS);
+}
+
+
 /*
  * XXX
  * This op should support the new FIGNORECASE flag for case-insensitive
  * lookups, per PSARC 2007/244.
  */

@@ -2422,19 +3128,20 @@
                 VN_RELE(nvp);
                 nvp = NULL;
         } /* nvp */
 
         smbfs_attrcache_remove(onp);
-
         error = smbfs_smb_rename(onp, ndnp, nnm, strlen(nnm), scred);
 
         /*
          * If the old name should no longer exist,
          * discard any cached attributes under it.
          */
-        if (error == 0)
+        if (error == 0) {
                 smbfs_attrcache_prune(onp);
+                /* SMBFS_VNEVENT... */
+        }
 
 out:
         if (nvp) {
                 if (nvp_locked)
                         vn_vfsunlock(nvp);

@@ -2483,15 +3190,10 @@
         if (smbfs_rw_enter_sig(&dnp->r_rwlock, RW_WRITER, SMBINTR(dvp)))
                 return (EINTR);
         smb_credinit(&scred, cr);
 
         /*
-         * XXX: Do we need r_lkserlock too?
-         * No use of any shared fid or fctx...
-         */
-
-        /*
          * Require write access in the containing directory.
          */
         error = smbfs_access(dvp, VWRITE, 0, cr, ct);
         if (error)
                 goto out;

@@ -2630,10 +3332,20 @@
 }
 
 
 /* ARGSUSED */
 static int
+smbfs_symlink(vnode_t *dvp, char *lnm, struct vattr *tva, char *tnm, cred_t *cr,
+        caller_context_t *ct, int flags)
+{
+        /* Not yet... */
+        return (ENOSYS);
+}
+
+
+/* ARGSUSED */
+static int
 smbfs_readdir(vnode_t *vp, struct uio *uiop, cred_t *cr, int *eofp,
         caller_context_t *ct, int flags)
 {
         struct smbnode  *np = VTOSMB(vp);
         int             error = 0;

@@ -2655,12 +3367,11 @@
                 return (error);
 
         ASSERT(smbfs_rw_lock_held(&np->r_rwlock, RW_READER));
 
         /*
-         * XXX: Todo readdir cache here
-         * Note: NFS code is just below this.
+         * Todo readdir cache here
          *
          * I am serializing the entire readdir opreation
          * now since we have not yet implemented readdir
          * cache. This fix needs to be revisited once
          * we implement readdir cache.

@@ -2890,11 +3601,23 @@
         kmem_free(dp, dbufsiz);
         smb_credrele(&scred);
         return (error);
 }
 
+/*
+ * Here NFS has: nfs3_bio
+ * See smbfs_bio above.
+ */
 
+/* ARGSUSED */
+static int
+smbfs_fid(vnode_t *vp, fid_t *fidp, caller_context_t *ct)
+{
+        return (ENOSYS);
+}
+
+
 /*
  * The pair of functions VOP_RWLOCK, VOP_RWUNLOCK
  * are optional functions that are called by:
  *    getdents, before/after VOP_READDIR
  *    pread, before/after ... VOP_READ

@@ -2964,12 +3687,973 @@
                 return (EINVAL);
 
         return (0);
 }
 
+/* mmap support ******************************************************** */
 
+#ifdef DEBUG
+static int smbfs_lostpage = 0;  /* number of times we lost original page */
+#endif
+
 /*
+ * Return all the pages from [off..off+len) in file
+ * Like nfs3_getpage
+ */
+/* ARGSUSED */
+static int
+smbfs_getpage(vnode_t *vp, offset_t off, size_t len, uint_t *protp,
+        page_t *pl[], size_t plsz, struct seg *seg, caddr_t addr,
+        enum seg_rw rw, cred_t *cr, caller_context_t *ct)
+{
+        smbnode_t       *np;
+        smbmntinfo_t    *smi;
+        int             error;
+
+        np = VTOSMB(vp);
+        smi = VTOSMI(vp);
+
+        if (curproc->p_zone != smi->smi_zone_ref.zref_zone)
+                return (EIO);
+
+        if (smi->smi_flags & SMI_DEAD || vp->v_vfsp->vfs_flag & VFS_UNMOUNTED)
+                return (EIO);
+
+        if (vp->v_flag & VNOMAP)
+                return (ENOSYS);
+
+        if (protp != NULL)
+                *protp = PROT_ALL;
+
+        /*
+         * Now valididate that the caches are up to date.
+         */
+        error = smbfs_validate_caches(vp, cr);
+        if (error)
+                return (error);
+
+retry:
+        mutex_enter(&np->r_statelock);
+
+        /*
+         * Don't create dirty pages faster than they
+         * can be cleaned ... (etc. see nfs)
+         *
+         * Here NFS also tests:
+         *  (mi->mi_max_threads != 0 &&
+         *  rp->r_awcount > 2 * mi->mi_max_threads)
+         */
+        if (rw == S_CREATE) {
+                while (np->r_gcount > 0)
+                        cv_wait(&np->r_cv, &np->r_statelock);
+        }
+
+        /*
+         * If we are getting called as a side effect of a write
+         * operation the local file size might not be extended yet.
+         * In this case we want to be able to return pages of zeroes.
+         */
+        if (off + len > np->r_size + PAGEOFFSET && seg != segkmap) {
+                mutex_exit(&np->r_statelock);
+                return (EFAULT);                /* beyond EOF */
+        }
+
+        mutex_exit(&np->r_statelock);
+
+        error = pvn_getpages(smbfs_getapage, vp, off, len, protp,
+            pl, plsz, seg, addr, rw, cr);
+
+        switch (error) {
+        case SMBFS_EOF:
+                smbfs_purge_caches(vp, cr);
+                goto retry;
+        case ESTALE:
+                /*
+                 * Here NFS has: PURGE_STALE_FH(error, vp, cr);
+                 * In-line here as we only use it once.
+                 */
+                mutex_enter(&np->r_statelock);
+                np->r_flags |= RSTALE;
+                if (!np->r_error)
+                        np->r_error = (error);
+                mutex_exit(&np->r_statelock);
+                if (vn_has_cached_data(vp))
+                        smbfs_invalidate_pages(vp, (u_offset_t)0, cr);
+                smbfs_purge_caches(vp, cr);
+                break;
+        default:
+                break;
+        }
+
+        return (error);
+}
+
+/*
+ * Called from pvn_getpages to get a particular page.
+ * Like nfs3_getapage
+ */
+/* ARGSUSED */
+static int
+smbfs_getapage(vnode_t *vp, u_offset_t off, size_t len, uint_t *protp,
+        page_t *pl[], size_t plsz, struct seg *seg, caddr_t addr,
+        enum seg_rw rw, cred_t *cr)
+{
+        smbnode_t       *np;
+        smbmntinfo_t   *smi;
+
+        uint_t          bsize;
+        struct buf      *bp;
+        page_t          *pp;
+        u_offset_t      lbn;
+        u_offset_t      io_off;
+        u_offset_t      blkoff;
+        size_t          io_len;
+        uint_t blksize;
+        int error;
+        /* int readahead; */
+        int readahead_issued = 0;
+        /* int ra_window; * readahead window */
+        page_t *pagefound;
+
+        np = VTOSMB(vp);
+        smi = VTOSMI(vp);
+
+        if (curproc->p_zone != smi->smi_zone_ref.zref_zone)
+                return (EIO);
+
+        if (smi->smi_flags & SMI_DEAD || vp->v_vfsp->vfs_flag & VFS_UNMOUNTED)
+                return (EIO);
+
+        bsize = MAX(vp->v_vfsp->vfs_bsize, PAGESIZE);
+
+reread:
+        bp = NULL;
+        pp = NULL;
+        pagefound = NULL;
+
+        if (pl != NULL)
+                pl[0] = NULL;
+
+        error = 0;
+        lbn = off / bsize;
+        blkoff = lbn * bsize;
+
+        /*
+         * NFS queues up readahead work here.
+         */
+
+again:
+        if ((pagefound = page_exists(vp, off)) == NULL) {
+                if (pl == NULL) {
+                        (void) 0; /* Todo: smbfs_async_readahead(); */
+                } else if (rw == S_CREATE) {
+                        /*
+                         * Block for this page is not allocated, or the offset
+                         * is beyond the current allocation size, or we're
+                         * allocating a swap slot and the page was not found,
+                         * so allocate it and return a zero page.
+                         */
+                        if ((pp = page_create_va(vp, off,
+                            PAGESIZE, PG_WAIT, seg, addr)) == NULL)
+                                cmn_err(CE_PANIC, "smbfs_getapage: page_create");
+                        io_len = PAGESIZE;
+                        mutex_enter(&np->r_statelock);
+                        np->r_nextr = off + PAGESIZE;
+                        mutex_exit(&np->r_statelock);
+                } else {
+                        /*
+                         * Need to go to server to get a BLOCK, exception to
+                         * that being while reading at offset = 0 or doing
+                         * random i/o, in that case read only a PAGE.
+                         */
+                        mutex_enter(&np->r_statelock);
+                        if (blkoff < np->r_size &&
+                            blkoff + bsize >= np->r_size) {
+                                /*
+                                 * If only a block or less is left in
+                                 * the file, read all that is remaining.
+                                 */
+                                if (np->r_size <= off) {
+                                        /*
+                                         * Trying to access beyond EOF,
+                                         * set up to get at least one page.
+                                         */
+                                        blksize = off + PAGESIZE - blkoff;
+                                } else
+                                        blksize = np->r_size - blkoff;
+                        } else if ((off == 0) ||
+                            (off != np->r_nextr && !readahead_issued)) {
+                                blksize = PAGESIZE;
+                                blkoff = off; /* block = page here */
+                        } else
+                                blksize = bsize;
+                        mutex_exit(&np->r_statelock);
+
+                        pp = pvn_read_kluster(vp, off, seg, addr, &io_off,
+                            &io_len, blkoff, blksize, 0);
+
+                        /*
+                         * Some other thread has entered the page,
+                         * so just use it.
+                         */
+                        if (pp == NULL)
+                                goto again;
+
+                        /*
+                         * Now round the request size up to page boundaries.
+                         * This ensures that the entire page will be
+                         * initialized to zeroes if EOF is encountered.
+                         */
+                        io_len = ptob(btopr(io_len));
+
+                        bp = pageio_setup(pp, io_len, vp, B_READ);
+                        ASSERT(bp != NULL);
+
+                        /*
+                         * pageio_setup should have set b_addr to 0.  This
+                         * is correct since we want to do I/O on a page
+                         * boundary.  bp_mapin will use this addr to calculate
+                         * an offset, and then set b_addr to the kernel virtual
+                         * address it allocated for us.
+                         */
+                        ASSERT(bp->b_un.b_addr == 0);
+
+                        bp->b_edev = 0;
+                        bp->b_dev = 0;
+                        bp->b_lblkno = lbtodb(io_off);
+                        bp->b_file = vp;
+                        bp->b_offset = (offset_t)off;
+                        bp_mapin(bp);
+
+                        /*
+                         * If doing a write beyond what we believe is EOF,
+                         * don't bother trying to read the pages from the
+                         * server, we'll just zero the pages here.  We
+                         * don't check that the rw flag is S_WRITE here
+                         * because some implementations may attempt a
+                         * read access to the buffer before copying data.
+                         */
+                        mutex_enter(&np->r_statelock);
+                        if (io_off >= np->r_size && seg == segkmap) {
+                                mutex_exit(&np->r_statelock);
+                                bzero(bp->b_un.b_addr, io_len);
+                        } else {
+                                mutex_exit(&np->r_statelock);
+                                error = smbfs_bio(bp, 0, cr);
+                        }
+
+                        /*
+                         * Unmap the buffer before freeing it.
+                         */
+                        bp_mapout(bp);
+                        pageio_done(bp);
+
+                        /* Here NFS3 updates all pp->p_fsdata */
+
+                        if (error == SMBFS_EOF) {
+                                /*
+                                 * If doing a write system call just return
+                                 * zeroed pages, else user tried to get pages
+                                 * beyond EOF, return error.  We don't check
+                                 * that the rw flag is S_WRITE here because
+                                 * some implementations may attempt a read
+                                 * access to the buffer before copying data.
+                                 */
+                                if (seg == segkmap)
+                                        error = 0;
+                                else
+                                        error = EFAULT;
+                        }
+
+                        if (!readahead_issued && !error) {
+                                mutex_enter(&np->r_statelock);
+                                np->r_nextr = io_off + io_len;
+                                mutex_exit(&np->r_statelock);
+                        }
+                }
+        }
+
+        if (pl == NULL)
+                return (error);
+
+        if (error) {
+                if (pp != NULL)
+                        pvn_read_done(pp, B_ERROR);
+                return (error);
+        }
+
+        if (pagefound) {
+                se_t se = (rw == S_CREATE ? SE_EXCL : SE_SHARED);
+
+                /*
+                 * Page exists in the cache, acquire the appropriate lock.
+                 * If this fails, start all over again.
+                 */
+                if ((pp = page_lookup(vp, off, se)) == NULL) {
+#ifdef DEBUG
+                        smbfs_lostpage++;
+#endif
+                        goto reread;
+                }
+                pl[0] = pp;
+                pl[1] = NULL;
+                return (0);
+        }
+
+        if (pp != NULL)
+                pvn_plist_init(pp, pl, plsz, off, io_len, rw);
+
+        return (error);
+}
+
+/*
+ * Here NFS has: nfs3_readahead
+ * No read-ahead in smbfs yet.
+ */
+
+/*
+ * Flags are composed of {B_INVAL, B_FREE, B_DONTNEED, B_FORCE}
+ * If len == 0, do from off to EOF.
+ *
+ * The normal cases should be len == 0 && off == 0 (entire vp list),
+ * len == MAXBSIZE (from segmap_release actions), and len == PAGESIZE
+ * (from pageout).
+ *
+ * Like nfs3_putpage + nfs_putpages
+ */
+/* ARGSUSED */
+static int
+smbfs_putpage(vnode_t *vp, offset_t off, size_t len, int flags, cred_t *cr,
+        caller_context_t *ct)
+{
+        smbnode_t *np;
+        smbmntinfo_t *smi;
+        page_t *pp;
+        u_offset_t eoff;
+        u_offset_t io_off;
+        size_t io_len;
+        int error;
+        int rdirty;
+        int err;
+
+        np = VTOSMB(vp);
+        smi = VTOSMI(vp);
+
+        if (curproc->p_zone != smi->smi_zone_ref.zref_zone)
+                return (EIO);
+
+        if (smi->smi_flags & SMI_DEAD || vp->v_vfsp->vfs_flag & VFS_UNMOUNTED)
+                return (EIO);
+
+        if (vp->v_flag & VNOMAP)
+                return (ENOSYS);
+
+        /* Here NFS does rp->r_count (++/--) stuff. */
+
+        /* Beginning of code from nfs_putpages. */
+
+        if (!vn_has_cached_data(vp))
+                return (0);
+
+        /*
+         * If ROUTOFSPACE is set, then all writes turn into B_INVAL
+         * writes.  B_FORCE is set to force the VM system to actually
+         * invalidate the pages, even if the i/o failed.  The pages
+         * need to get invalidated because they can't be written out
+         * because there isn't any space left on either the server's
+         * file system or in the user's disk quota.  The B_FREE bit
+         * is cleared to avoid confusion as to whether this is a
+         * request to place the page on the freelist or to destroy
+         * it.
+         */
+        if ((np->r_flags & ROUTOFSPACE) ||
+            (vp->v_vfsp->vfs_flag & VFS_UNMOUNTED))
+                flags = (flags & ~B_FREE) | B_INVAL | B_FORCE;
+
+        if (len == 0) {
+                /*
+                 * If doing a full file synchronous operation, then clear
+                 * the RDIRTY bit.  If a page gets dirtied while the flush
+                 * is happening, then RDIRTY will get set again.  The
+                 * RDIRTY bit must get cleared before the flush so that
+                 * we don't lose this information.
+                 *
+                 * NFS has B_ASYNC vs sync stuff here.
+                 */
+                if (off == (u_offset_t)0 &&
+                    (np->r_flags & RDIRTY)) {
+                        mutex_enter(&np->r_statelock);
+                        rdirty = (np->r_flags & RDIRTY);
+                        np->r_flags &= ~RDIRTY;
+                        mutex_exit(&np->r_statelock);
+                } else
+                        rdirty = 0;
+
+                /*
+                 * Search the entire vp list for pages >= off, and flush
+                 * the dirty pages.
+                 */
+                error = pvn_vplist_dirty(vp, off, smbfs_putapage,
+                    flags, cr);
+
+                /*
+                 * If an error occurred and the file was marked as dirty
+                 * before and we aren't forcibly invalidating pages, then
+                 * reset the RDIRTY flag.
+                 */
+                if (error && rdirty &&
+                    (flags & (B_INVAL | B_FORCE)) != (B_INVAL | B_FORCE)) {
+                        mutex_enter(&np->r_statelock);
+                        np->r_flags |= RDIRTY;
+                        mutex_exit(&np->r_statelock);
+                }
+        } else {
+                /*
+                 * Do a range from [off...off + len) looking for pages
+                 * to deal with.
+                 */
+                error = 0;
+                io_len = 1; /* quiet warnings */
+                eoff = off + len;
+
+                for (io_off = off; io_off < eoff; io_off += io_len) {
+                        mutex_enter(&np->r_statelock);
+                        if (io_off >= np->r_size) {
+                                mutex_exit(&np->r_statelock);
+                                break;
+                        }
+                        mutex_exit(&np->r_statelock);
+                        /*
+                         * If we are not invalidating, synchronously
+                         * freeing or writing pages use the routine
+                         * page_lookup_nowait() to prevent reclaiming
+                         * them from the free list.
+                         */
+                        if ((flags & B_INVAL) || !(flags & B_ASYNC)) {
+                                pp = page_lookup(vp, io_off,
+                                    (flags & (B_INVAL | B_FREE)) ?
+                                    SE_EXCL : SE_SHARED);
+                        } else {
+                                pp = page_lookup_nowait(vp, io_off,
+                                    (flags & B_FREE) ? SE_EXCL : SE_SHARED);
+                        }
+
+                        if (pp == NULL || !pvn_getdirty(pp, flags))
+                                io_len = PAGESIZE;
+                        else {
+                                err = smbfs_putapage(vp, pp, &io_off,
+                                    &io_len, flags, cr);
+                                if (!error)
+                                        error = err;
+                                /*
+                                 * "io_off" and "io_len" are returned as
+                                 * the range of pages we actually wrote.
+                                 * This allows us to skip ahead more quickly
+                                 * since several pages may've been dealt
+                                 * with by this iteration of the loop.
+                                 */
+                        }
+                }
+        }
+
+        return (error);
+}
+
+/*
+ * Write out a single page, possibly klustering adjacent dirty pages.
+ *
+ * Like nfs3_putapage / nfs3_sync_putapage
+ */
+static int
+smbfs_putapage(vnode_t *vp, page_t *pp, u_offset_t *offp, size_t *lenp,
+        int flags, cred_t *cr)
+{
+        smbnode_t *np;
+        u_offset_t io_off;
+        u_offset_t lbn_off;
+        u_offset_t lbn;
+        size_t io_len;
+        uint_t bsize;
+        int error;
+
+        np = VTOSMB(vp);
+
+        ASSERT(!vn_is_readonly(vp));
+
+        bsize = MAX(vp->v_vfsp->vfs_bsize, PAGESIZE);
+        lbn = pp->p_offset / bsize;
+        lbn_off = lbn * bsize;
+
+        /*
+         * Find a kluster that fits in one block, or in
+         * one page if pages are bigger than blocks.  If
+         * there is less file space allocated than a whole
+         * page, we'll shorten the i/o request below.
+         */
+        pp = pvn_write_kluster(vp, pp, &io_off, &io_len, lbn_off,
+            roundup(bsize, PAGESIZE), flags);
+
+        /*
+         * pvn_write_kluster shouldn't have returned a page with offset
+         * behind the original page we were given.  Verify that.
+         */
+        ASSERT((pp->p_offset / bsize) >= lbn);
+
+        /*
+         * Now pp will have the list of kept dirty pages marked for
+         * write back.  It will also handle invalidation and freeing
+         * of pages that are not dirty.  Check for page length rounding
+         * problems.
+         */
+        if (io_off + io_len > lbn_off + bsize) {
+                ASSERT((io_off + io_len) - (lbn_off + bsize) < PAGESIZE);
+                io_len = lbn_off + bsize - io_off;
+        }
+        /*
+         * The RMODINPROGRESS flag makes sure that smbfs_bio() sees a
+         * consistent value of r_size. RMODINPROGRESS is set in writerp().
+         * When RMODINPROGRESS is set it indicates that a uiomove() is in
+         * progress and the r_size has not been made consistent with the
+         * new size of the file. When the uiomove() completes the r_size is
+         * updated and the RMODINPROGRESS flag is cleared.
+         *
+         * The RMODINPROGRESS flag makes sure that smbfs_bio() sees a
+         * consistent value of r_size. Without this handshaking, it is
+         * possible that smbfs_bio() picks  up the old value of r_size
+         * before the uiomove() in writerp() completes. This will result
+         * in the write through smbfs_bio() being dropped.
+         *
+         * More precisely, there is a window between the time the uiomove()
+         * completes and the time the r_size is updated. If a VOP_PUTPAGE()
+         * operation intervenes in this window, the page will be picked up,
+         * because it is dirty (it will be unlocked, unless it was
+         * pagecreate'd). When the page is picked up as dirty, the dirty
+         * bit is reset (pvn_getdirty()). In smbfs_write(), r_size is
+         * checked. This will still be the old size. Therefore the page will
+         * not be written out. When segmap_release() calls VOP_PUTPAGE(),
+         * the page will be found to be clean and the write will be dropped.
+         */
+        if (np->r_flags & RMODINPROGRESS) {
+                mutex_enter(&np->r_statelock);
+                if ((np->r_flags & RMODINPROGRESS) &&
+                    np->r_modaddr + MAXBSIZE > io_off &&
+                    np->r_modaddr < io_off + io_len) {
+                        page_t *plist;
+                        /*
+                         * A write is in progress for this region of the file.
+                         * If we did not detect RMODINPROGRESS here then this
+                         * path through smbfs_putapage() would eventually go to
+                         * smbfs_bio() and may not write out all of the data
+                         * in the pages. We end up losing data. So we decide
+                         * to set the modified bit on each page in the page
+                         * list and mark the rnode with RDIRTY. This write
+                         * will be restarted at some later time.
+                         */
+                        plist = pp;
+                        while (plist != NULL) {
+                                pp = plist;
+                                page_sub(&plist, pp);
+                                hat_setmod(pp);
+                                page_io_unlock(pp);
+                                page_unlock(pp);
+                        }
+                        np->r_flags |= RDIRTY;
+                        mutex_exit(&np->r_statelock);
+                        if (offp)
+                                *offp = io_off;
+                        if (lenp)
+                                *lenp = io_len;
+                        return (0);
+                }
+                mutex_exit(&np->r_statelock);
+        }
+
+        /*
+         * NFS handles (flags & B_ASYNC) here...
+         * (See nfs_async_putapage())
+         *
+         * This code section from: nfs3_sync_putapage()
+         */
+
+        flags |= B_WRITE;
+
+        error = smbfs_rdwrlbn(vp, pp, io_off, io_len, flags, cr);
+
+        if ((error == ENOSPC || error == EDQUOT || error == EFBIG ||
+            error == EACCES) &&
+            (flags & (B_INVAL|B_FORCE)) != (B_INVAL|B_FORCE)) {
+                if (!(np->r_flags & ROUTOFSPACE)) {
+                        mutex_enter(&np->r_statelock);
+                        np->r_flags |= ROUTOFSPACE;
+                        mutex_exit(&np->r_statelock);
+                }
+                flags |= B_ERROR;
+                pvn_write_done(pp, flags);
+                /*
+                 * If this was not an async thread, then try again to
+                 * write out the pages, but this time, also destroy
+                 * them whether or not the write is successful.  This
+                 * will prevent memory from filling up with these
+                 * pages and destroying them is the only alternative
+                 * if they can't be written out.
+                 *
+                 * Don't do this if this is an async thread because
+                 * when the pages are unlocked in pvn_write_done,
+                 * some other thread could have come along, locked
+                 * them, and queued for an async thread.  It would be
+                 * possible for all of the async threads to be tied
+                 * up waiting to lock the pages again and they would
+                 * all already be locked and waiting for an async
+                 * thread to handle them.  Deadlock.
+                 */
+                if (!(flags & B_ASYNC)) {
+                        error = smbfs_putpage(vp, io_off, io_len,
+                            B_INVAL | B_FORCE, cr, NULL);
+                }
+        } else {
+                if (error)
+                        flags |= B_ERROR;
+                else if (np->r_flags & ROUTOFSPACE) {
+                        mutex_enter(&np->r_statelock);
+                        np->r_flags &= ~ROUTOFSPACE;
+                        mutex_exit(&np->r_statelock);
+                }
+                pvn_write_done(pp, flags);
+        }
+
+        /* Now more code from: nfs3_putapage */
+
+        if (offp)
+                *offp = io_off;
+        if (lenp)
+                *lenp = io_len;
+
+        return (error);
+}
+
+/*
+ * NFS has this in nfs_client.c (shared by v2,v3,...)
+ * We have it here so smbfs_putapage can be file scope.
+ */
+void
+smbfs_invalidate_pages(vnode_t *vp, u_offset_t off, cred_t *cr)
+{
+        smbnode_t *np;
+
+        np = VTOSMB(vp);
+
+        mutex_enter(&np->r_statelock);
+        while (np->r_flags & RTRUNCATE)
+                cv_wait(&np->r_cv, &np->r_statelock);
+        np->r_flags |= RTRUNCATE;
+
+        if (off == (u_offset_t)0) {
+                np->r_flags &= ~RDIRTY;
+                if (!(np->r_flags & RSTALE))
+                        np->r_error = 0;
+        }
+        /* Here NFSv3 has np->r_truncaddr = off; */
+        mutex_exit(&np->r_statelock);
+
+        (void) pvn_vplist_dirty(vp, off, smbfs_putapage,
+            B_INVAL | B_TRUNC, cr);
+
+        mutex_enter(&np->r_statelock);
+        np->r_flags &= ~RTRUNCATE;
+        cv_broadcast(&np->r_cv);
+        mutex_exit(&np->r_statelock);
+}
+
+/* Like nfs3_map */
+
+/* ARGSUSED */
+static int
+smbfs_map(vnode_t *vp, offset_t off, struct as *as, caddr_t *addrp,
+        size_t len, uchar_t prot, uchar_t maxprot, uint_t flags,
+        cred_t *cr, caller_context_t *ct)
+{
+        segvn_crargs_t  vn_a;
+        struct vattr    va;
+        smbnode_t       *np;
+        smbmntinfo_t    *smi;
+        int             error;
+
+        np = VTOSMB(vp);
+        smi = VTOSMI(vp);
+
+        if (curproc->p_zone != smi->smi_zone_ref.zref_zone)
+                return (EIO);
+
+        if (smi->smi_flags & SMI_DEAD || vp->v_vfsp->vfs_flag & VFS_UNMOUNTED)
+                return (EIO);
+
+        if (vp->v_flag & VNOMAP)
+                return (ENOSYS);
+
+        if (off < 0 || off + (ssize_t)len < 0)
+                return (ENXIO);
+
+        if (vp->v_type != VREG)
+                return (ENODEV);
+
+        /*
+         * NFS does close-to-open consistency stuff here.
+         * Just get (possibly cached) attributes.
+         */
+        va.va_mask = AT_ALL;
+        if ((error = smbfsgetattr(vp, &va, cr)) != 0)
+                return (error);
+
+        /*
+         * Check to see if the vnode is currently marked as not cachable.
+         * This means portions of the file are locked (through VOP_FRLOCK).
+         * In this case the map request must be refused.  We use
+         * rp->r_lkserlock to avoid a race with concurrent lock requests.
+         */
+        /*
+         * Atomically increment r_inmap after acquiring r_rwlock. The
+         * idea here is to acquire r_rwlock to block read/write and
+         * not to protect r_inmap. r_inmap will inform smbfs_read/write()
+         * that we are in smbfs_map(). Now, r_rwlock is acquired in order
+         * and we can prevent the deadlock that would have occurred
+         * when smbfs_addmap() would have acquired it out of order.
+         *
+         * Since we are not protecting r_inmap by any lock, we do not
+         * hold any lock when we decrement it. We atomically decrement
+         * r_inmap after we release r_lkserlock.  Note that rwlock is
+         * re-entered as writer in smbfs_addmap (called via as_map).
+         */
+
+        if (smbfs_rw_enter_sig(&np->r_rwlock, RW_WRITER, SMBINTR(vp)))
+                return (EINTR);
+        atomic_inc_uint(&np->r_inmap);
+        smbfs_rw_exit(&np->r_rwlock);
+
+        if (smbfs_rw_enter_sig(&np->r_lkserlock, RW_WRITER, SMBINTR(vp))) {
+                atomic_dec_uint(&np->r_inmap);
+                return (EINTR);
+        }
+
+        if (vp->v_flag & VNOCACHE) {
+                error = EAGAIN;
+                goto done;
+        }
+
+        /*
+         * Don't allow concurrent locks and mapping if mandatory locking is
+         * enabled.
+         */
+        if ((flk_has_remote_locks(vp) || smbfs_lm_has_sleep(vp)) &&
+            MANDLOCK(vp, va.va_mode)) {
+                error = EAGAIN;
+                goto done;
+        }
+
+        as_rangelock(as);
+        error = choose_addr(as, addrp, len, off, ADDR_VACALIGN, flags);
+        if (error != 0) {
+                as_rangeunlock(as);
+                goto done;
+        }
+
+        vn_a.vp = vp;
+        vn_a.offset = off;
+        vn_a.type = (flags & MAP_TYPE);
+        vn_a.prot = (uchar_t)prot;
+        vn_a.maxprot = (uchar_t)maxprot;
+        vn_a.flags = (flags & ~MAP_TYPE);
+        vn_a.cred = cr;
+        vn_a.amp = NULL;
+        vn_a.szc = 0;
+        vn_a.lgrp_mem_policy_flags = 0;
+
+        error = as_map(as, *addrp, len, segvn_create, &vn_a);
+        as_rangeunlock(as);
+
+done:
+        smbfs_rw_exit(&np->r_lkserlock);
+        atomic_dec_uint(&np->r_inmap);
+        return (error);
+}
+
+/* ARGSUSED */
+static int
+smbfs_addmap(vnode_t *vp, offset_t off, struct as *as, caddr_t addr,
+        size_t len, uchar_t prot, uchar_t maxprot, uint_t flags,
+        cred_t *cr, caller_context_t *ct)
+{
+        smbnode_t *np = VTOSMB(vp);
+        boolean_t inc_fidrefs = B_FALSE;
+
+        /*
+         * When r_mapcnt goes from zero to non-zero,
+         * increment n_fidrefs
+         */
+        mutex_enter(&np->r_statelock);
+        if (np->r_mapcnt == 0)
+                inc_fidrefs = B_TRUE;
+        np->r_mapcnt += btopr(len);
+        mutex_exit(&np->r_statelock);
+
+        if (inc_fidrefs) {
+                (void) smbfs_rw_enter_sig(&np->r_lkserlock, RW_WRITER, 0);
+                np->n_fidrefs++;
+                smbfs_rw_exit(&np->r_lkserlock);
+        }
+
+        return (0);
+}
+
+/*
+ * Use an address space callback to flush pages dirty pages after unmap,
+ * which we can't do directly in smbfs_delmap due to locking issues.
+ */
+typedef struct smbfs_delmap_args {
+        vnode_t                 *vp;
+        cred_t                  *cr;
+        offset_t                off;
+        caddr_t                 addr;
+        size_t                  len;
+        uint_t                  prot;
+        uint_t                  maxprot;
+        uint_t                  flags;
+        boolean_t               dec_fidrefs;
+} smbfs_delmap_args_t;
+
+/* ARGSUSED */
+static int
+smbfs_delmap(vnode_t *vp, offset_t off, struct as *as, caddr_t addr,
+        size_t len, uint_t prot, uint_t maxprot, uint_t flags,
+        cred_t *cr, caller_context_t *ct)
+{
+        smbnode_t *np = VTOSMB(vp);
+        smbfs_delmap_args_t     *dmapp;
+        int error;
+
+        dmapp = kmem_zalloc(sizeof (*dmapp), KM_SLEEP);
+
+        dmapp->vp = vp;
+        dmapp->off = off;
+        dmapp->addr = addr;
+        dmapp->len = len;
+        dmapp->prot = prot;
+        dmapp->maxprot = maxprot;
+        dmapp->flags = flags;
+        dmapp->cr = cr;
+        dmapp->dec_fidrefs = B_FALSE;
+
+        /*
+         * When r_mapcnt returns to zero, arrange for the
+         * callback to decrement n_fidrefs
+         */
+        mutex_enter(&np->r_statelock);
+        np->r_mapcnt -= btopr(len);
+        ASSERT(np->r_mapcnt >= 0);
+        if (np->r_mapcnt == 0)
+                dmapp->dec_fidrefs = B_TRUE;
+        mutex_exit(&np->r_statelock);
+
+        error = as_add_callback(as, smbfs_delmap_callback, dmapp,
+            AS_UNMAP_EVENT, addr, len, KM_SLEEP);
+        if (error != 0) {
+                /*
+                 * So sad, no callback is coming. Can't flush pages
+                 * in delmap (as locks).  Just handle n_fidrefs.
+                 */
+                cmn_err(CE_NOTE, "smbfs_delmap(%p) "
+                    "as_add_callback err=%d",
+                    (void *)vp, error);
+
+                if (dmapp->dec_fidrefs) {
+                        struct smb_cred scred;
+
+                        (void) smbfs_rw_enter_sig(&np->r_lkserlock,
+                            RW_WRITER, 0);
+                        smb_credinit(&scred, dmapp->cr);
+
+                        smbfs_rele_fid(np, &scred);
+
+                        smb_credrele(&scred);
+                        smbfs_rw_exit(&np->r_lkserlock);
+                }
+                kmem_free(dmapp, sizeof (*dmapp));
+        }
+
+        return (0);
+}
+
+/*
+ * Remove some pages from an mmap'd vnode.  Flush any
+ * dirty pages in the unmapped range.
+ */
+/* ARGSUSED */
+static void
+smbfs_delmap_callback(struct as *as, void *arg, uint_t event)
+{
+        vnode_t                 *vp;
+        smbnode_t               *np;
+        smbmntinfo_t            *smi;
+        smbfs_delmap_args_t     *dmapp = arg;
+
+        vp = dmapp->vp;
+        np = VTOSMB(vp);
+        smi = VTOSMI(vp);
+
+        /* Decremented r_mapcnt in smbfs_delmap */
+
+        /*
+         * Initiate a page flush and potential commit if there are
+         * pages, the file system was not mounted readonly, the segment
+         * was mapped shared, and the pages themselves were writeable.
+         *
+         * mark RDIRTY here, will be used to check if a file is dirty when
+         * unmount smbfs
+         */
+        if (vn_has_cached_data(vp) && !vn_is_readonly(vp) &&
+            dmapp->flags == MAP_SHARED && (dmapp->maxprot & PROT_WRITE)) {
+                mutex_enter(&np->r_statelock);
+                np->r_flags |= RDIRTY;
+                mutex_exit(&np->r_statelock);
+
+                /*
+                 * Need to finish the putpage before we
+                 * close the OtW FID needed for I/O.
+                 */
+                (void) smbfs_putpage(vp, dmapp->off, dmapp->len, 0,
+                    dmapp->cr, NULL);
+        }
+
+        if ((np->r_flags & RDIRECTIO) || (smi->smi_flags & SMI_DIRECTIO))
+                (void) smbfs_putpage(vp, dmapp->off, dmapp->len,
+                    B_INVAL, dmapp->cr, NULL);
+
+        /*
+         * If r_mapcnt went to zero, drop our FID ref now.
+         * On the last fidref, this does an OtW close.
+         */
+        if (dmapp->dec_fidrefs) {
+                struct smb_cred scred;
+
+                (void) smbfs_rw_enter_sig(&np->r_lkserlock, RW_WRITER, 0);
+                smb_credinit(&scred, dmapp->cr);
+
+                smbfs_rele_fid(np, &scred);
+
+                smb_credrele(&scred);
+                smbfs_rw_exit(&np->r_lkserlock);
+        }
+
+        (void) as_delete_callback(as, arg);
+        kmem_free(dmapp, sizeof (*dmapp));
+}
+
+/* No smbfs_pageio() or smbfs_dispose() ops. */
+
+/* misc. ******************************************************** */
+
+
+/*
  * XXX
  * This op may need to support PSARC 2007/440, nbmand changes for CIFS Service.
  */
 static int
 smbfs_frlock(vnode_t *vp, int cmd, struct flock64 *bfp, int flag,

@@ -3035,19 +4719,30 @@
                         if (error || va.va_size == bfp->l_start)
                                 return (error);
                         va.va_mask = AT_SIZE;
                         va.va_size = bfp->l_start;
                         error = smbfssetattr(vp, &va, 0, cr);
+                        /* SMBFS_VNEVENT... */
                 } else
                         error = EINVAL;
         }
 
         return (error);
 }
 
+
 /* ARGSUSED */
 static int
+smbfs_realvp(vnode_t *vp, vnode_t **vpp, caller_context_t *ct)
+{
+
+        return (ENOSYS);
+}
+
+
+/* ARGSUSED */
+static int
 smbfs_pathconf(vnode_t *vp, int cmd, ulong_t *valp, cred_t *cr,
         caller_context_t *ct)
 {
         vfs_t *vfs;
         smbmntinfo_t *smi;

@@ -3218,5 +4913,56 @@
         if (VTOSMI(vp)->smi_flags & SMI_LLOCK)
                 return (fs_shrlock(vp, cmd, shr, flag, cr, ct));
         else
                 return (ENOSYS);
 }
+
+
+/*
+ * Most unimplemented ops will return ENOSYS because of fs_nosys().
+ * The only ops where that won't work are ACCESS (due to open(2)
+ * failures) and ... (anything else left?)
+ */
+const fs_operation_def_t smbfs_vnodeops_template[] = {
+        VOPNAME_OPEN,           { .vop_open = smbfs_open },
+        VOPNAME_CLOSE,          { .vop_close = smbfs_close },
+        VOPNAME_READ,           { .vop_read = smbfs_read },
+        VOPNAME_WRITE,          { .vop_write = smbfs_write },
+        VOPNAME_IOCTL,          { .vop_ioctl = smbfs_ioctl },
+        VOPNAME_GETATTR,        { .vop_getattr = smbfs_getattr },
+        VOPNAME_SETATTR,        { .vop_setattr = smbfs_setattr },
+        VOPNAME_ACCESS,         { .vop_access = smbfs_access },
+        VOPNAME_LOOKUP,         { .vop_lookup = smbfs_lookup },
+        VOPNAME_CREATE,         { .vop_create = smbfs_create },
+        VOPNAME_REMOVE,         { .vop_remove = smbfs_remove },
+        VOPNAME_LINK,           { .vop_link = smbfs_link },
+        VOPNAME_RENAME,         { .vop_rename = smbfs_rename },
+        VOPNAME_MKDIR,          { .vop_mkdir = smbfs_mkdir },
+        VOPNAME_RMDIR,          { .vop_rmdir = smbfs_rmdir },
+        VOPNAME_READDIR,        { .vop_readdir = smbfs_readdir },
+        VOPNAME_SYMLINK,        { .vop_symlink = smbfs_symlink },
+        VOPNAME_READLINK,       { .vop_readlink = smbfs_readlink },
+        VOPNAME_FSYNC,          { .vop_fsync = smbfs_fsync },
+        VOPNAME_INACTIVE,       { .vop_inactive = smbfs_inactive },
+        VOPNAME_FID,            { .vop_fid = smbfs_fid },
+        VOPNAME_RWLOCK,         { .vop_rwlock = smbfs_rwlock },
+        VOPNAME_RWUNLOCK,       { .vop_rwunlock = smbfs_rwunlock },
+        VOPNAME_SEEK,           { .vop_seek = smbfs_seek },
+        VOPNAME_FRLOCK,         { .vop_frlock = smbfs_frlock },
+        VOPNAME_SPACE,          { .vop_space = smbfs_space },
+        VOPNAME_REALVP,         { .vop_realvp = smbfs_realvp },
+        VOPNAME_GETPAGE,        { .vop_getpage = smbfs_getpage },
+        VOPNAME_PUTPAGE,        { .vop_putpage = smbfs_putpage },
+        VOPNAME_MAP,            { .vop_map = smbfs_map },
+        VOPNAME_ADDMAP,         { .vop_addmap = smbfs_addmap },
+        VOPNAME_DELMAP,         { .vop_delmap = smbfs_delmap },
+        VOPNAME_DUMP,           { .error = fs_nosys }, /* smbfs_dump, */
+        VOPNAME_PATHCONF,       { .vop_pathconf = smbfs_pathconf },
+        VOPNAME_PAGEIO,         { .error = fs_nosys }, /* smbfs_pageio, */
+        VOPNAME_SETSECATTR,     { .vop_setsecattr = smbfs_setsecattr },
+        VOPNAME_GETSECATTR,     { .vop_getsecattr = smbfs_getsecattr },
+        VOPNAME_SHRLOCK,        { .vop_shrlock = smbfs_shrlock },
+#ifdef  SMBFS_VNEVENT
+        VOPNAME_VNEVENT,        { .vop_vnevent = fs_vnevent_support },
+#endif
+        { NULL, NULL }
+};