1 /*
   2  * Copyright (c) 2000-2001, Boris Popov
   3  * All rights reserved.
   4  *
   5  * Redistribution and use in source and binary forms, with or without
   6  * modification, are permitted provided that the following conditions
   7  * are met:
   8  * 1. Redistributions of source code must retain the above copyright
   9  *    notice, this list of conditions and the following disclaimer.
  10  * 2. Redistributions in binary form must reproduce the above copyright
  11  *    notice, this list of conditions and the following disclaimer in the
  12  *    documentation and/or other materials provided with the distribution.
  13  * 3. All advertising materials mentioning features or use of this software
  14  *    must display the following acknowledgement:
  15  *    This product includes software developed by Boris Popov.
  16  * 4. Neither the name of the author nor the names of any co-contributors
  17  *    may be used to endorse or promote products derived from this software
  18  *    without specific prior written permission.
  19  *
  20  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  30  * SUCH DAMAGE.
  31  *
  32  * $Id: smbfs_vfsops.c,v 1.73.64.1 2005/05/27 02:35:28 lindak Exp $
  33  */
  34 
  35 /*
  36  * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
  37  * Copyright 2012 Nexenta Systems, Inc.  All rights reserved.
  38  * Copyright 2013, Joyent, Inc. All rights reserved.
  39  * Copyright (c) 2016 by Delphix. All rights reserved.
  40  */
  41 
  42 #include <sys/systm.h>
  43 #include <sys/cred.h>
  44 #include <sys/time.h>
  45 #include <sys/vfs.h>
  46 #include <sys/vnode.h>
  47 #include <fs/fs_subr.h>
  48 #include <sys/sysmacros.h>
  49 #include <sys/kmem.h>
  50 #include <sys/mkdev.h>
  51 #include <sys/mount.h>
  52 #include <sys/statvfs.h>
  53 #include <sys/errno.h>
  54 #include <sys/debug.h>
  55 #include <sys/cmn_err.h>
  56 #include <sys/modctl.h>
  57 #include <sys/policy.h>
  58 #include <sys/atomic.h>
  59 #include <sys/zone.h>
  60 #include <sys/vfs_opreg.h>
  61 #include <sys/mntent.h>
  62 #include <sys/priv.h>
  63 #include <sys/tsol/label.h>
  64 #include <sys/tsol/tndb.h>
  65 #include <inet/ip.h>
  66 
  67 #include <netsmb/smb_osdep.h>
  68 #include <netsmb/smb.h>
  69 #include <netsmb/smb_conn.h>
  70 #include <netsmb/smb_subr.h>
  71 #include <netsmb/smb_dev.h>
  72 
  73 #include <smbfs/smbfs.h>
  74 #include <smbfs/smbfs_node.h>
  75 #include <smbfs/smbfs_subr.h>
  76 
  77 /*
  78  * Should smbfs mount enable "-o acl" by default?  There are good
  79  * arguments for both.  The most common use case is individual users
  80  * accessing files on some SMB server, for which "noacl" is the more
  81  * convenient default.  A less common use case is data migration,
  82  * where the "acl" option might be a desirable default.  We'll make
  83  * the common use case the default.  This default can be changed via
  84  * /etc/system, and/or set per-mount via the "acl" mount option.
  85  */
  86 int smbfs_default_opt_acl = 0;
  87 
  88 /*
  89  * Local functions definitions.
  90  */
  91 int             smbfsinit(int fstyp, char *name);
  92 void            smbfsfini();
  93 static int      smbfs_mount_label_policy(vfs_t *, void *, int, cred_t *);
  94 
  95 /*
  96  * SMBFS Mount options table for MS_OPTIONSTR
  97  * Note: These are not all the options.
  98  * Some options come in via MS_DATA.
  99  * Others are generic (see vfs.c)
 100  */
 101 static char *intr_cancel[] = { MNTOPT_NOINTR, NULL };
 102 static char *nointr_cancel[] = { MNTOPT_INTR, NULL };
 103 static char *acl_cancel[] = { MNTOPT_NOACL, NULL };
 104 static char *noacl_cancel[] = { MNTOPT_ACL, NULL };
 105 static char *xattr_cancel[] = { MNTOPT_NOXATTR, NULL };
 106 static char *noxattr_cancel[] = { MNTOPT_XATTR, NULL };
 107 
 108 static mntopt_t mntopts[] = {
 109 /*
 110  *      option name             cancel option   default arg     flags
 111  *              ufs arg flag
 112  */
 113         { MNTOPT_INTR,          intr_cancel,    NULL,   MO_DEFAULT, 0 },
 114         { MNTOPT_NOINTR,        nointr_cancel,  NULL,   0,      0 },
 115         { MNTOPT_ACL,           acl_cancel,     NULL,   0,      0 },
 116         { MNTOPT_NOACL,         noacl_cancel,   NULL,   0,      0 },
 117         { MNTOPT_XATTR,         xattr_cancel,   NULL,   MO_DEFAULT, 0 },
 118         { MNTOPT_NOXATTR,       noxattr_cancel, NULL,   0,      0 }
 119 };
 120 
 121 static mntopts_t smbfs_mntopts = {
 122         sizeof (mntopts) / sizeof (mntopt_t),
 123         mntopts
 124 };
 125 
 126 static const char fs_type_name[FSTYPSZ] = "smbfs";
 127 
 128 static vfsdef_t vfw = {
 129         VFSDEF_VERSION,
 130         (char *)fs_type_name,
 131         smbfsinit,              /* init routine */
 132         VSW_HASPROTO|VSW_NOTZONESAFE,   /* flags */
 133         &smbfs_mntopts                      /* mount options table prototype */
 134 };
 135 
 136 static struct modlfs modlfs = {
 137         &mod_fsops,
 138         "SMBFS filesystem",
 139         &vfw
 140 };
 141 
 142 static struct modlinkage modlinkage = {
 143         MODREV_1, (void *)&modlfs, NULL
 144 };
 145 
 146 /*
 147  * Mutex to protect the following variables:
 148  *        smbfs_major
 149  *        smbfs_minor
 150  */
 151 extern  kmutex_t        smbfs_minor_lock;
 152 extern  int             smbfs_major;
 153 extern  int             smbfs_minor;
 154 
 155 /*
 156  * Prevent unloads while we have mounts
 157  */
 158 uint32_t        smbfs_mountcount;
 159 
 160 /*
 161  * smbfs vfs operations.
 162  */
 163 static int      smbfs_mount(vfs_t *, vnode_t *, struct mounta *, cred_t *);
 164 static int      smbfs_unmount(vfs_t *, int, cred_t *);
 165 static int      smbfs_root(vfs_t *, vnode_t **);
 166 static int      smbfs_statvfs(vfs_t *, statvfs64_t *);
 167 static int      smbfs_sync(vfs_t *, short, cred_t *);
 168 static void     smbfs_freevfs(vfs_t *);
 169 
 170 /*
 171  * Module loading
 172  */
 173 
 174 /*
 175  * This routine is invoked automatically when the kernel module
 176  * containing this routine is loaded.  This allows module specific
 177  * initialization to be done when the module is loaded.
 178  */
 179 int
 180 _init(void)
 181 {
 182         int             error;
 183 
 184         /*
 185          * Check compiled-in version of "nsmb"
 186          * that we're linked with.  (paranoid)
 187          */
 188         if (nsmb_version != NSMB_VERSION) {
 189                 cmn_err(CE_WARN, "_init: nsmb version mismatch");
 190                 return (ENOTTY);
 191         }
 192 
 193         smbfs_mountcount = 0;
 194 
 195         /*
 196          * NFS calls these two in _clntinit
 197          * Easier to follow this way.
 198          */
 199         if ((error = smbfs_subrinit()) != 0) {
 200                 cmn_err(CE_WARN, "_init: smbfs_subrinit failed");
 201                 return (error);
 202         }
 203 
 204         if ((error = smbfs_vfsinit()) != 0) {
 205                 cmn_err(CE_WARN, "_init: smbfs_vfsinit failed");
 206                 smbfs_subrfini();
 207                 return (error);
 208         }
 209 
 210         if ((error = smbfs_clntinit()) != 0) {
 211                 cmn_err(CE_WARN, "_init: smbfs_clntinit failed");
 212                 smbfs_vfsfini();
 213                 smbfs_subrfini();
 214                 return (error);
 215         }
 216 
 217         error = mod_install((struct modlinkage *)&modlinkage);
 218         return (error);
 219 }
 220 
 221 /*
 222  * Free kernel module resources that were allocated in _init
 223  * and remove the linkage information into the kernel
 224  */
 225 int
 226 _fini(void)
 227 {
 228         int     error;
 229 
 230         /*
 231          * If a forcedly unmounted instance is still hanging around,
 232          * we cannot allow the module to be unloaded because that would
 233          * cause panics once the VFS framework decides it's time to call
 234          * into VFS_FREEVFS().
 235          */
 236         if (smbfs_mountcount)
 237                 return (EBUSY);
 238 
 239         error = mod_remove(&modlinkage);
 240         if (error)
 241                 return (error);
 242 
 243         /*
 244          * Free the allocated smbnodes, etc.
 245          */
 246         smbfs_clntfini();
 247 
 248         /* NFS calls these two in _clntfini */
 249         smbfs_vfsfini();
 250         smbfs_subrfini();
 251 
 252         /*
 253          * Free the ops vectors
 254          */
 255         smbfsfini();
 256         return (0);
 257 }
 258 
 259 /*
 260  * Return information about the module
 261  */
 262 int
 263 _info(struct modinfo *modinfop)
 264 {
 265         return (mod_info((struct modlinkage *)&modlinkage, modinfop));
 266 }
 267 
 268 /*
 269  * Initialize the vfs structure
 270  */
 271 
 272 int smbfsfstyp;
 273 vfsops_t *smbfs_vfsops = NULL;
 274 
 275 static const fs_operation_def_t smbfs_vfsops_template[] = {
 276         { VFSNAME_MOUNT, { .vfs_mount = smbfs_mount } },
 277         { VFSNAME_UNMOUNT, { .vfs_unmount = smbfs_unmount } },
 278         { VFSNAME_ROOT, { .vfs_root = smbfs_root } },
 279         { VFSNAME_STATVFS, { .vfs_statvfs = smbfs_statvfs } },
 280         { VFSNAME_SYNC, { .vfs_sync = smbfs_sync } },
 281         { VFSNAME_VGET, { .error = fs_nosys } },
 282         { VFSNAME_MOUNTROOT, { .error = fs_nosys } },
 283         { VFSNAME_FREEVFS, { .vfs_freevfs = smbfs_freevfs } },
 284         { NULL, NULL }
 285 };
 286 
 287 int
 288 smbfsinit(int fstyp, char *name)
 289 {
 290         int             error;
 291 
 292         error = vfs_setfsops(fstyp, smbfs_vfsops_template, &smbfs_vfsops);
 293         if (error != 0) {
 294                 zcmn_err(GLOBAL_ZONEID, CE_WARN,
 295                     "smbfsinit: bad vfs ops template");
 296                 return (error);
 297         }
 298 
 299         error = vn_make_ops(name, smbfs_vnodeops_template, &smbfs_vnodeops);
 300         if (error != 0) {
 301                 (void) vfs_freevfsops_by_type(fstyp);
 302                 zcmn_err(GLOBAL_ZONEID, CE_WARN,
 303                     "smbfsinit: bad vnode ops template");
 304                 return (error);
 305         }
 306 
 307         smbfsfstyp = fstyp;
 308 
 309         return (0);
 310 }
 311 
 312 void
 313 smbfsfini()
 314 {
 315         if (smbfs_vfsops) {
 316                 (void) vfs_freevfsops_by_type(smbfsfstyp);
 317                 smbfs_vfsops = NULL;
 318         }
 319         if (smbfs_vnodeops) {
 320                 vn_freevnodeops(smbfs_vnodeops);
 321                 smbfs_vnodeops = NULL;
 322         }
 323 }
 324 
 325 void
 326 smbfs_free_smi(smbmntinfo_t *smi)
 327 {
 328         if (smi == NULL)
 329                 return;
 330 
 331         if (smi->smi_zone_ref.zref_zone != NULL)
 332                 zone_rele_ref(&smi->smi_zone_ref, ZONE_REF_SMBFS);
 333 
 334         if (smi->smi_share != NULL)
 335                 smb_share_rele(smi->smi_share);
 336 
 337         avl_destroy(&smi->smi_hash_avl);
 338         rw_destroy(&smi->smi_hash_lk);
 339         cv_destroy(&smi->smi_statvfs_cv);
 340         mutex_destroy(&smi->smi_lock);
 341 
 342         kmem_free(smi, sizeof (smbmntinfo_t));
 343 }
 344 
 345 /*
 346  * smbfs mount vfsop
 347  * Set up mount info record and attach it to vfs struct.
 348  */
 349 static int
 350 smbfs_mount(vfs_t *vfsp, vnode_t *mvp, struct mounta *uap, cred_t *cr)
 351 {
 352         char            *data = uap->dataptr;
 353         int             error;
 354         smbnode_t       *rtnp = NULL;   /* root of this fs */
 355         smbmntinfo_t    *smi = NULL;
 356         dev_t           smbfs_dev;
 357         int             version;
 358         int             devfd;
 359         zone_t          *zone = curproc->p_zone;
 360         zone_t          *mntzone = NULL;
 361         smb_share_t     *ssp = NULL;
 362         smb_cred_t      scred;
 363         int             flags, sec;
 364 
 365         STRUCT_DECL(smbfs_args, args);          /* smbfs mount arguments */
 366 
 367         if ((error = secpolicy_fs_mount(cr, mvp, vfsp)) != 0)
 368                 return (error);
 369 
 370         if (mvp->v_type != VDIR)
 371                 return (ENOTDIR);
 372 
 373         /*
 374          * get arguments
 375          *
 376          * uap->datalen might be different from sizeof (args)
 377          * in a compatible situation.
 378          */
 379         STRUCT_INIT(args, get_udatamodel());
 380         bzero(STRUCT_BUF(args), SIZEOF_STRUCT(smbfs_args, DATAMODEL_NATIVE));
 381         if (copyin(data, STRUCT_BUF(args), MIN(uap->datalen,
 382             SIZEOF_STRUCT(smbfs_args, DATAMODEL_NATIVE))))
 383                 return (EFAULT);
 384 
 385         /*
 386          * Check mount program version
 387          */
 388         version = STRUCT_FGET(args, version);
 389         if (version != SMBFS_VERSION) {
 390                 cmn_err(CE_WARN, "mount version mismatch:"
 391                     " kernel=%d, mount=%d\n",
 392                     SMBFS_VERSION, version);
 393                 return (EINVAL);
 394         }
 395 
 396         /*
 397          * Deal with re-mount requests.
 398          */
 399         if (uap->flags & MS_REMOUNT) {
 400                 cmn_err(CE_WARN, "MS_REMOUNT not implemented");
 401                 return (ENOTSUP);
 402         }
 403 
 404         /*
 405          * Check for busy
 406          */
 407         mutex_enter(&mvp->v_lock);
 408         if (!(uap->flags & MS_OVERLAY) &&
 409             (mvp->v_count != 1 || (mvp->v_flag & VROOT))) {
 410                 mutex_exit(&mvp->v_lock);
 411                 return (EBUSY);
 412         }
 413         mutex_exit(&mvp->v_lock);
 414 
 415         /*
 416          * Get the "share" from the netsmb driver (ssp).
 417          * It is returned with a "ref" (hold) for us.
 418          * Release this hold: at errout below, or in
 419          * smbfs_freevfs().
 420          */
 421         devfd = STRUCT_FGET(args, devfd);
 422         error = smb_dev2share(devfd, &ssp);
 423         if (error) {
 424                 cmn_err(CE_WARN, "invalid device handle %d (%d)\n",
 425                     devfd, error);
 426                 return (error);
 427         }
 428 
 429         /*
 430          * Use "goto errout" from here on.
 431          * See: ssp, smi, rtnp, mntzone
 432          */
 433 
 434         /*
 435          * Determine the zone we're being mounted into.
 436          */
 437         zone_hold(mntzone = zone);              /* start with this assumption */
 438         if (getzoneid() == GLOBAL_ZONEID) {
 439                 zone_rele(mntzone);
 440                 mntzone = zone_find_by_path(refstr_value(vfsp->vfs_mntpt));
 441                 ASSERT(mntzone != NULL);
 442                 if (mntzone != zone) {
 443                         error = EBUSY;
 444                         goto errout;
 445                 }
 446         }
 447 
 448         /*
 449          * Stop the mount from going any further if the zone is going away.
 450          */
 451         if (zone_status_get(mntzone) >= ZONE_IS_SHUTTING_DOWN) {
 452                 error = EBUSY;
 453                 goto errout;
 454         }
 455 
 456         /*
 457          * On a Trusted Extensions client, we may have to force read-only
 458          * for read-down mounts.
 459          */
 460         if (is_system_labeled()) {
 461                 void *addr;
 462                 int ipvers = 0;
 463                 struct smb_vc *vcp;
 464 
 465                 vcp = SSTOVC(ssp);
 466                 addr = smb_vc_getipaddr(vcp, &ipvers);
 467                 error = smbfs_mount_label_policy(vfsp, addr, ipvers, cr);
 468 
 469                 if (error > 0)
 470                         goto errout;
 471 
 472                 if (error == -1) {
 473                         /* change mount to read-only to prevent write-down */
 474                         vfs_setmntopt(vfsp, MNTOPT_RO, NULL, 0);
 475                 }
 476         }
 477 
 478         /* Prevent unload. */
 479         atomic_inc_32(&smbfs_mountcount);
 480 
 481         /*
 482          * Create a mount record and link it to the vfs struct.
 483          * No more possiblities for errors from here on.
 484          * Tear-down of this stuff is in smbfs_free_smi()
 485          *
 486          * Compare with NFS: nfsrootvp()
 487          */
 488         smi = kmem_zalloc(sizeof (*smi), KM_SLEEP);
 489 
 490         mutex_init(&smi->smi_lock, NULL, MUTEX_DEFAULT, NULL);
 491         cv_init(&smi->smi_statvfs_cv, NULL, CV_DEFAULT, NULL);
 492 
 493         rw_init(&smi->smi_hash_lk, NULL, RW_DEFAULT, NULL);
 494         smbfs_init_hash_avl(&smi->smi_hash_avl);
 495 
 496         smi->smi_share = ssp;
 497         ssp = NULL;
 498 
 499         /*
 500          * Convert the anonymous zone hold acquired via zone_hold() above
 501          * into a zone reference.
 502          */
 503         zone_init_ref(&smi->smi_zone_ref);
 504         zone_hold_ref(mntzone, &smi->smi_zone_ref, ZONE_REF_SMBFS);
 505         zone_rele(mntzone);
 506         mntzone = NULL;
 507 
 508         /*
 509          * Initialize option defaults
 510          */
 511         smi->smi_flags       = SMI_LLOCK;
 512         smi->smi_acregmin = SEC2HR(SMBFS_ACREGMIN);
 513         smi->smi_acregmax = SEC2HR(SMBFS_ACREGMAX);
 514         smi->smi_acdirmin = SEC2HR(SMBFS_ACDIRMIN);
 515         smi->smi_acdirmax = SEC2HR(SMBFS_ACDIRMAX);
 516 
 517         /*
 518          * All "generic" mount options have already been
 519          * handled in vfs.c:domount() - see mntopts stuff.
 520          * Query generic options using vfs_optionisset().
 521          * Give ACL an adjustable system-wide default.
 522          */
 523         if (smbfs_default_opt_acl ||
 524             vfs_optionisset(vfsp, MNTOPT_ACL, NULL))
 525                 smi->smi_flags |= SMI_ACL;
 526         if (vfs_optionisset(vfsp, MNTOPT_NOACL, NULL))
 527                 smi->smi_flags &= ~SMI_ACL;
 528         if (vfs_optionisset(vfsp, MNTOPT_INTR, NULL))
 529                 smi->smi_flags |= SMI_INT;
 530 
 531         /*
 532          * Get the mount options that come in as smbfs_args,
 533          * starting with args.flags (SMBFS_MF_xxx)
 534          */
 535         flags = STRUCT_FGET(args, flags);
 536         smi->smi_uid         = STRUCT_FGET(args, uid);
 537         smi->smi_gid         = STRUCT_FGET(args, gid);
 538         smi->smi_fmode       = STRUCT_FGET(args, file_mode) & 0777;
 539         smi->smi_dmode       = STRUCT_FGET(args, dir_mode) & 0777;
 540 
 541         /*
 542          * Hande the SMBFS_MF_xxx flags.
 543          */
 544         if (flags & SMBFS_MF_NOAC)
 545                 smi->smi_flags |= SMI_NOAC;
 546         if (flags & SMBFS_MF_ACREGMIN) {
 547                 sec = STRUCT_FGET(args, acregmin);
 548                 if (sec < 0 || sec > SMBFS_ACMINMAX)
 549                         sec = SMBFS_ACMINMAX;
 550                 smi->smi_acregmin = SEC2HR(sec);
 551         }
 552         if (flags & SMBFS_MF_ACREGMAX) {
 553                 sec = STRUCT_FGET(args, acregmax);
 554                 if (sec < 0 || sec > SMBFS_ACMAXMAX)
 555                         sec = SMBFS_ACMAXMAX;
 556                 smi->smi_acregmax = SEC2HR(sec);
 557         }
 558         if (flags & SMBFS_MF_ACDIRMIN) {
 559                 sec = STRUCT_FGET(args, acdirmin);
 560                 if (sec < 0 || sec > SMBFS_ACMINMAX)
 561                         sec = SMBFS_ACMINMAX;
 562                 smi->smi_acdirmin = SEC2HR(sec);
 563         }
 564         if (flags & SMBFS_MF_ACDIRMAX) {
 565                 sec = STRUCT_FGET(args, acdirmax);
 566                 if (sec < 0 || sec > SMBFS_ACMAXMAX)
 567                         sec = SMBFS_ACMAXMAX;
 568                 smi->smi_acdirmax = SEC2HR(sec);
 569         }
 570 
 571         /*
 572          * Get attributes of the remote file system,
 573          * i.e. ACL support, named streams, etc.
 574          */
 575         smb_credinit(&scred, cr);
 576         error = smbfs_smb_qfsattr(smi->smi_share, &smi->smi_fsa, &scred);
 577         smb_credrele(&scred);
 578         if (error) {
 579                 SMBVDEBUG("smbfs_smb_qfsattr error %d\n", error);
 580         }
 581 
 582         /*
 583          * We enable XATTR by default (via smbfs_mntopts)
 584          * but if the share does not support named streams,
 585          * force the NOXATTR option (also clears XATTR).
 586          * Caller will set or clear VFS_XATTR after this.
 587          */
 588         if ((smi->smi_fsattr & FILE_NAMED_STREAMS) == 0)
 589                 vfs_setmntopt(vfsp, MNTOPT_NOXATTR, NULL, 0);
 590 
 591         /*
 592          * Ditto ACLs (disable if not supported on this share)
 593          */
 594         if ((smi->smi_fsattr & FILE_PERSISTENT_ACLS) == 0) {
 595                 vfs_setmntopt(vfsp, MNTOPT_NOACL, NULL, 0);
 596                 smi->smi_flags &= ~SMI_ACL;
 597         }
 598 
 599         /*
 600          * Assign a unique device id to the mount
 601          */
 602         mutex_enter(&smbfs_minor_lock);
 603         do {
 604                 smbfs_minor = (smbfs_minor + 1) & MAXMIN32;
 605                 smbfs_dev = makedevice(smbfs_major, smbfs_minor);
 606         } while (vfs_devismounted(smbfs_dev));
 607         mutex_exit(&smbfs_minor_lock);
 608 
 609         vfsp->vfs_dev        = smbfs_dev;
 610         vfs_make_fsid(&vfsp->vfs_fsid, smbfs_dev, smbfsfstyp);
 611         vfsp->vfs_data       = (caddr_t)smi;
 612         vfsp->vfs_fstype = smbfsfstyp;
 613         vfsp->vfs_bsize = MAXBSIZE;
 614         vfsp->vfs_bcount = 0;
 615 
 616         smi->smi_vfsp        = vfsp;
 617         smbfs_zonelist_add(smi);        /* undo in smbfs_freevfs */
 618 
 619         /* PSARC 2007/227 VFS Feature Registration */
 620         vfs_set_feature(vfsp, VFSFT_XVATTR);
 621         vfs_set_feature(vfsp, VFSFT_SYSATTR_VIEWS);
 622 
 623         /*
 624          * Create the root vnode, which we need in unmount
 625          * for the call to smbfs_check_table(), etc.
 626          * Release this hold in smbfs_unmount.
 627          */
 628         rtnp = smbfs_node_findcreate(smi, "\\", 1, NULL, 0, 0,
 629             &smbfs_fattr0);
 630         ASSERT(rtnp != NULL);
 631         rtnp->r_vnode->v_type = VDIR;
 632         rtnp->r_vnode->v_flag |= VROOT;
 633         smi->smi_root = rtnp;
 634 
 635         /*
 636          * NFS does other stuff here too:
 637          *   async worker threads
 638          *   init kstats
 639          *
 640          * End of code from NFS nfsrootvp()
 641          */
 642         return (0);
 643 
 644 errout:
 645         vfsp->vfs_data = NULL;
 646         if (smi != NULL)
 647                 smbfs_free_smi(smi);
 648 
 649         if (mntzone != NULL)
 650                 zone_rele(mntzone);
 651 
 652         if (ssp != NULL)
 653                 smb_share_rele(ssp);
 654 
 655         return (error);
 656 }
 657 
 658 /*
 659  * vfs operations
 660  */
 661 static int
 662 smbfs_unmount(vfs_t *vfsp, int flag, cred_t *cr)
 663 {
 664         smbmntinfo_t    *smi;
 665         smbnode_t       *rtnp;
 666 
 667         smi = VFTOSMI(vfsp);
 668 
 669         if (secpolicy_fs_unmount(cr, vfsp) != 0)
 670                 return (EPERM);
 671 
 672         if ((flag & MS_FORCE) == 0) {
 673                 smbfs_rflush(vfsp, cr);
 674 
 675                 /*
 676                  * If there are any active vnodes on this file system,
 677                  * (other than the root vnode) then the file system is
 678                  * busy and can't be umounted.
 679                  */
 680                 if (smbfs_check_table(vfsp, smi->smi_root))
 681                         return (EBUSY);
 682 
 683                 /*
 684                  * We normally hold a ref to the root vnode, so
 685                  * check for references beyond the one we expect:
 686                  *   smbmntinfo_t -> smi_root
 687                  * Note that NFS does not hold the root vnode.
 688                  */
 689                 if (smi->smi_root &&
 690                     smi->smi_root->r_vnode->v_count > 1)
 691                         return (EBUSY);
 692         }
 693 
 694         /*
 695          * common code for both forced and non-forced
 696          *
 697          * Setting VFS_UNMOUNTED prevents new operations.
 698          * Operations already underway may continue,
 699          * but not for long.
 700          */
 701         vfsp->vfs_flag |= VFS_UNMOUNTED;
 702 
 703         /*
 704          * Shutdown any outstanding I/O requests on this share,
 705          * and force a tree disconnect.  The share object will
 706          * continue to hang around until smb_share_rele().
 707          * This should also cause most active nodes to be
 708          * released as their operations fail with EIO.
 709          */
 710         smb_share_kill(smi->smi_share);
 711 
 712         /*
 713          * If we hold the root VP (and we normally do)
 714          * then it's safe to release it now.
 715          */
 716         if (smi->smi_root) {
 717                 rtnp = smi->smi_root;
 718                 smi->smi_root = NULL;
 719                 VN_RELE(rtnp->r_vnode);      /* release root vnode */
 720         }
 721 
 722         /*
 723          * Remove all nodes from the node hash tables.
 724          * This (indirectly) calls: smbfs_addfree, smbinactive,
 725          * which will try to flush dirty pages, etc. so
 726          * don't destroy the underlying share just yet.
 727          *
 728          * Also, with a forced unmount, some nodes may
 729          * remain active, and those will get cleaned up
 730          * after their last vn_rele.
 731          */
 732         smbfs_destroy_table(vfsp);
 733 
 734         /*
 735          * Delete our kstats...
 736          *
 737          * Doing it here, rather than waiting until
 738          * smbfs_freevfs so these are not visible
 739          * after the unmount.
 740          */
 741         if (smi->smi_io_kstats) {
 742                 kstat_delete(smi->smi_io_kstats);
 743                 smi->smi_io_kstats = NULL;
 744         }
 745         if (smi->smi_ro_kstats) {
 746                 kstat_delete(smi->smi_ro_kstats);
 747                 smi->smi_ro_kstats = NULL;
 748         }
 749 
 750         /*
 751          * The rest happens in smbfs_freevfs()
 752          */
 753         return (0);
 754 }
 755 
 756 
 757 /*
 758  * find root of smbfs
 759  */
 760 static int
 761 smbfs_root(vfs_t *vfsp, vnode_t **vpp)
 762 {
 763         smbmntinfo_t    *smi;
 764         vnode_t         *vp;
 765 
 766         smi = VFTOSMI(vfsp);
 767 
 768         if (curproc->p_zone != smi->smi_zone_ref.zref_zone)
 769                 return (EPERM);
 770 
 771         if (smi->smi_flags & SMI_DEAD || vfsp->vfs_flag & VFS_UNMOUNTED)
 772                 return (EIO);
 773 
 774         /*
 775          * The root vp is created in mount and held
 776          * until unmount, so this is paranoia.
 777          */
 778         if (smi->smi_root == NULL)
 779                 return (EIO);
 780 
 781         /* Just take a reference and return it. */
 782         vp = SMBTOV(smi->smi_root);
 783         VN_HOLD(vp);
 784         *vpp = vp;
 785 
 786         return (0);
 787 }
 788 
 789 /*
 790  * Get file system statistics.
 791  */
 792 static int
 793 smbfs_statvfs(vfs_t *vfsp, statvfs64_t *sbp)
 794 {
 795         int             error;
 796         smbmntinfo_t    *smi = VFTOSMI(vfsp);
 797         smb_share_t     *ssp = smi->smi_share;
 798         statvfs64_t     stvfs;
 799         hrtime_t now;
 800         smb_cred_t      scred;
 801 
 802         if (curproc->p_zone != smi->smi_zone_ref.zref_zone)
 803                 return (EPERM);
 804 
 805         if (smi->smi_flags & SMI_DEAD || vfsp->vfs_flag & VFS_UNMOUNTED)
 806                 return (EIO);
 807 
 808         mutex_enter(&smi->smi_lock);
 809 
 810         /*
 811          * Use cached result if still valid.
 812          */
 813 recheck:
 814         now = gethrtime();
 815         if (now < smi->smi_statfstime) {
 816                 error = 0;
 817                 goto cache_hit;
 818         }
 819 
 820         /*
 821          * FS attributes are stale, so someone
 822          * needs to do an OTW call to get them.
 823          * Serialize here so only one thread
 824          * does the OTW call.
 825          */
 826         if (smi->smi_status & SM_STATUS_STATFS_BUSY) {
 827                 smi->smi_status |= SM_STATUS_STATFS_WANT;
 828                 if (!cv_wait_sig(&smi->smi_statvfs_cv, &smi->smi_lock)) {
 829                         mutex_exit(&smi->smi_lock);
 830                         return (EINTR);
 831                 }
 832                 /* Hope status is valid now. */
 833                 goto recheck;
 834         }
 835         smi->smi_status |= SM_STATUS_STATFS_BUSY;
 836         mutex_exit(&smi->smi_lock);
 837 
 838         /*
 839          * Do the OTW call.  Note: lock NOT held.
 840          */
 841         smb_credinit(&scred, NULL);
 842         bzero(&stvfs, sizeof (stvfs));
 843         error = smbfs_smb_statfs(ssp, &stvfs, &scred);
 844         smb_credrele(&scred);
 845         if (error) {
 846                 SMBVDEBUG("statfs error=%d\n", error);
 847         } else {
 848 
 849                 /*
 850                  * Set a few things the OTW call didn't get.
 851                  */
 852                 stvfs.f_frsize = stvfs.f_bsize;
 853                 stvfs.f_favail = stvfs.f_ffree;
 854                 stvfs.f_fsid = (unsigned long)vfsp->vfs_fsid.val[0];
 855                 bcopy(fs_type_name, stvfs.f_basetype, FSTYPSZ);
 856                 stvfs.f_flag    = vf_to_stf(vfsp->vfs_flag);
 857                 stvfs.f_namemax = smi->smi_fsa.fsa_maxname;
 858 
 859                 /*
 860                  * Save the result, update lifetime
 861                  */
 862                 now = gethrtime();
 863                 smi->smi_statfstime = now +
 864                     (SM_MAX_STATFSTIME * (hrtime_t)NANOSEC);
 865                 smi->smi_statvfsbuf = stvfs; /* struct assign! */
 866         }
 867 
 868         mutex_enter(&smi->smi_lock);
 869         if (smi->smi_status & SM_STATUS_STATFS_WANT)
 870                 cv_broadcast(&smi->smi_statvfs_cv);
 871         smi->smi_status &= ~(SM_STATUS_STATFS_BUSY | SM_STATUS_STATFS_WANT);
 872 
 873         /*
 874          * Copy the statvfs data to caller's buf.
 875          * Note: struct assignment
 876          */
 877 cache_hit:
 878         if (error == 0)
 879                 *sbp = smi->smi_statvfsbuf;
 880         mutex_exit(&smi->smi_lock);
 881         return (error);
 882 }
 883 
 884 /*
 885  * Flush dirty smbfs files for file system vfsp.
 886  * If vfsp == NULL, all smbfs files are flushed.
 887  */
 888 /*ARGSUSED*/
 889 static int
 890 smbfs_sync(vfs_t *vfsp, short flag, cred_t *cr)
 891 {
 892 
 893         /*
 894          * SYNC_ATTR is used by fsflush() to force old filesystems like UFS
 895          * to sync metadata, which they would otherwise cache indefinitely.
 896          * Semantically, the only requirement is that the sync be initiated.
 897          * Assume the server-side takes care of attribute sync.
 898          */
 899         if (flag & SYNC_ATTR)
 900                 return (0);
 901 
 902         if (vfsp == NULL) {
 903                 /*
 904                  * Flush ALL smbfs mounts in this zone.
 905                  */
 906                 smbfs_flushall(cr);
 907                 return (0);
 908         }
 909 
 910         smbfs_rflush(vfsp, cr);
 911 
 912         return (0);
 913 }
 914 
 915 /*
 916  * Initialization routine for VFS routines.  Should only be called once
 917  */
 918 int
 919 smbfs_vfsinit(void)
 920 {
 921         return (0);
 922 }
 923 
 924 /*
 925  * Shutdown routine for VFS routines.  Should only be called once
 926  */
 927 void
 928 smbfs_vfsfini(void)
 929 {
 930 }
 931 
 932 void
 933 smbfs_freevfs(vfs_t *vfsp)
 934 {
 935         smbmntinfo_t    *smi;
 936 
 937         /* free up the resources */
 938         smi = VFTOSMI(vfsp);
 939 
 940         /*
 941          * By this time we should have already deleted the
 942          * smi kstats in the unmount code.  If they are still around
 943          * something is wrong
 944          */
 945         ASSERT(smi->smi_io_kstats == NULL);
 946 
 947         smbfs_zonelist_remove(smi);
 948 
 949         smbfs_free_smi(smi);
 950 
 951         /*
 952          * Allow _fini() to succeed now, if so desired.
 953          */
 954         atomic_dec_32(&smbfs_mountcount);
 955 }
 956 
 957 /*
 958  * smbfs_mount_label_policy:
 959  *      Determine whether the mount is allowed according to MAC check,
 960  *      by comparing (where appropriate) label of the remote server
 961  *      against the label of the zone being mounted into.
 962  *
 963  *      Returns:
 964  *               0 :    access allowed
 965  *              -1 :    read-only access allowed (i.e., read-down)
 966  *              >0 : error code, such as EACCES
 967  *
 968  * NB:
 969  * NFS supports Cipso labels by parsing the vfs_resource
 970  * to see what the Solaris server global zone has shared.
 971  * We can't support that for CIFS since resource names
 972  * contain share names, not paths.
 973  */
 974 static int
 975 smbfs_mount_label_policy(vfs_t *vfsp, void *ipaddr, int addr_type, cred_t *cr)
 976 {
 977         bslabel_t       *server_sl, *mntlabel;
 978         zone_t          *mntzone = NULL;
 979         ts_label_t      *zlabel;
 980         tsol_tpc_t      *tp;
 981         ts_label_t      *tsl = NULL;
 982         int             retv;
 983 
 984         /*
 985          * Get the zone's label.  Each zone on a labeled system has a label.
 986          */
 987         mntzone = zone_find_by_any_path(refstr_value(vfsp->vfs_mntpt), B_FALSE);
 988         zlabel = mntzone->zone_slabel;
 989         ASSERT(zlabel != NULL);
 990         label_hold(zlabel);
 991 
 992         retv = EACCES;                          /* assume the worst */
 993 
 994         /*
 995          * Next, get the assigned label of the remote server.
 996          */
 997         tp = find_tpc(ipaddr, addr_type, B_FALSE);
 998         if (tp == NULL)
 999                 goto out;                       /* error getting host entry */
1000 
1001         if (tp->tpc_tp.tp_doi != zlabel->tsl_doi)
1002                 goto rel_tpc;                   /* invalid domain */
1003         if ((tp->tpc_tp.host_type != UNLABELED))
1004                 goto rel_tpc;                   /* invalid hosttype */
1005 
1006         server_sl = &tp->tpc_tp.tp_def_label;
1007         mntlabel = label2bslabel(zlabel);
1008 
1009         /*
1010          * Now compare labels to complete the MAC check.  If the labels
1011          * are equal or if the requestor is in the global zone and has
1012          * NET_MAC_AWARE, then allow read-write access.   (Except for
1013          * mounts into the global zone itself; restrict these to
1014          * read-only.)
1015          *
1016          * If the requestor is in some other zone, but their label
1017          * dominates the server, then allow read-down.
1018          *
1019          * Otherwise, access is denied.
1020          */
1021         if (blequal(mntlabel, server_sl) ||
1022             (crgetzoneid(cr) == GLOBAL_ZONEID &&
1023             getpflags(NET_MAC_AWARE, cr) != 0)) {
1024                 if ((mntzone == global_zone) ||
1025                     !blequal(mntlabel, server_sl))
1026                         retv = -1;              /* read-only */
1027                 else
1028                         retv = 0;               /* access OK */
1029         } else if (bldominates(mntlabel, server_sl)) {
1030                 retv = -1;                      /* read-only */
1031         } else {
1032                 retv = EACCES;
1033         }
1034 
1035         if (tsl != NULL)
1036                 label_rele(tsl);
1037 
1038 rel_tpc:
1039         /*LINTED*/
1040         TPC_RELE(tp);
1041 out:
1042         if (mntzone)
1043                 zone_rele(mntzone);
1044         label_rele(zlabel);
1045         return (retv);
1046 }