1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  24  * Use is subject to license terms.
  25  */
  26 /*
  27  * Copyright (c) 2017 by Delphix. All rights reserved.
  28  */
  29 
  30 #include <sys/param.h>
  31 #include <sys/systm.h>
  32 #include <sys/kmem.h>
  33 #include <sys/user.h>
  34 #include <sys/proc.h>
  35 #include <sys/cred.h>
  36 #include <sys/disp.h>
  37 #include <sys/buf.h>
  38 #include <sys/vfs.h>
  39 #include <sys/vfs_opreg.h>
  40 #include <sys/vnode.h>
  41 #include <sys/fdio.h>
  42 #include <sys/file.h>
  43 #include <sys/uio.h>
  44 #include <sys/conf.h>
  45 #include <sys/statvfs.h>
  46 #include <sys/mount.h>
  47 #include <sys/pathname.h>
  48 #include <sys/cmn_err.h>
  49 #include <sys/debug.h>
  50 #include <sys/sysmacros.h>
  51 #include <sys/conf.h>
  52 #include <sys/mkdev.h>
  53 #include <sys/swap.h>
  54 #include <sys/sunddi.h>
  55 #include <sys/sunldi.h>
  56 #include <sys/dktp/fdisk.h>
  57 #include <sys/fs/pc_label.h>
  58 #include <sys/fs/pc_fs.h>
  59 #include <sys/fs/pc_dir.h>
  60 #include <sys/fs/pc_node.h>
  61 #include <fs/fs_subr.h>
  62 #include <sys/modctl.h>
  63 #include <sys/dkio.h>
  64 #include <sys/open.h>
  65 #include <sys/mntent.h>
  66 #include <sys/policy.h>
  67 #include <sys/atomic.h>
  68 #include <sys/sdt.h>
  69 
  70 /*
  71  * The majority of PC media use a 512 sector size, but
  72  * occasionally you will run across a 1k sector size.
  73  * For media with a 1k sector size, fd_strategy() requires
  74  * the I/O size to be a 1k multiple; so when the sector size
  75  * is not yet known, always read 1k.
  76  */
  77 #define PC_SAFESECSIZE  (PC_SECSIZE * 2)
  78 
  79 static int pcfs_pseudo_floppy(dev_t);
  80 
  81 static int pcfsinit(int, char *);
  82 static int pcfs_mount(struct vfs *, struct vnode *, struct mounta *,
  83         struct cred *);
  84 static int pcfs_unmount(struct vfs *, int, struct cred *);
  85 static int pcfs_root(struct vfs *, struct vnode **);
  86 static int pcfs_statvfs(struct vfs *, struct statvfs64 *);
  87 static int pc_syncfsnodes(struct pcfs *);
  88 static int pcfs_sync(struct vfs *, short, struct cred *);
  89 static int pcfs_vget(struct vfs *vfsp, struct vnode **vpp, struct fid *fidp);
  90 static void pcfs_freevfs(vfs_t *vfsp);
  91 
  92 static int pc_readfat(struct pcfs *fsp, uchar_t *fatp);
  93 static int pc_writefat(struct pcfs *fsp, daddr_t start);
  94 
  95 static int pc_getfattype(struct pcfs *fsp);
  96 static void pcfs_parse_mntopts(struct pcfs *fsp);
  97 
  98 
  99 /*
 100  * pcfs mount options table
 101  */
 102 
 103 static char *nohidden_cancel[] = { MNTOPT_PCFS_HIDDEN, NULL };
 104 static char *hidden_cancel[] = { MNTOPT_PCFS_NOHIDDEN, NULL };
 105 static char *nofoldcase_cancel[] = { MNTOPT_PCFS_FOLDCASE, NULL };
 106 static char *foldcase_cancel[] = { MNTOPT_PCFS_NOFOLDCASE, NULL };
 107 static char *clamptime_cancel[] = { MNTOPT_PCFS_NOCLAMPTIME, NULL };
 108 static char *noclamptime_cancel[] = { MNTOPT_PCFS_CLAMPTIME, NULL };
 109 static char *atime_cancel[] = { MNTOPT_NOATIME, NULL };
 110 static char *noatime_cancel[] = { MNTOPT_ATIME, NULL };
 111 
 112 static mntopt_t mntopts[] = {
 113 /*
 114  *      option name     cancel option   default arg     flags   opt data
 115  */
 116         { MNTOPT_PCFS_NOHIDDEN, nohidden_cancel, NULL, 0, NULL },
 117         { MNTOPT_PCFS_HIDDEN, hidden_cancel, NULL, MO_DEFAULT, NULL },
 118         { MNTOPT_PCFS_NOFOLDCASE, nofoldcase_cancel, NULL, MO_DEFAULT, NULL },
 119         { MNTOPT_PCFS_FOLDCASE, foldcase_cancel, NULL, 0, NULL },
 120         { MNTOPT_PCFS_CLAMPTIME, clamptime_cancel, NULL, MO_DEFAULT, NULL },
 121         { MNTOPT_PCFS_NOCLAMPTIME, noclamptime_cancel, NULL, NULL, NULL },
 122         { MNTOPT_NOATIME, noatime_cancel, NULL, NULL, NULL },
 123         { MNTOPT_ATIME, atime_cancel, NULL, NULL, NULL },
 124         { MNTOPT_PCFS_TIMEZONE, NULL, "+0", MO_DEFAULT | MO_HASVALUE, NULL },
 125         { MNTOPT_PCFS_SECSIZE, NULL, NULL, MO_HASVALUE, NULL }
 126 };
 127 
 128 static mntopts_t pcfs_mntopts = {
 129         sizeof (mntopts) / sizeof (mntopt_t),
 130         mntopts
 131 };
 132 
 133 int pcfsdebuglevel = 0;
 134 
 135 /*
 136  * pcfslock:    protects the list of mounted pc filesystems "pc_mounttab.
 137  * pcfs_lock:   (inside per filesystem structure "pcfs")
 138  *              per filesystem lock. Most of the vfsops and vnodeops are
 139  *              protected by this lock.
 140  * pcnodes_lock: protects the pcnode hash table "pcdhead", "pcfhead".
 141  *
 142  * Lock hierarchy: pcfslock > pcfs_lock > pcnodes_lock
 143  *
 144  * pcfs_mountcount:     used to prevent module unloads while there is still
 145  *                      pcfs state from a former mount hanging around. With
 146  *                      forced umount support, the filesystem module must not
 147  *                      be allowed to go away before the last VFS_FREEVFS()
 148  *                      call has been made.
 149  *                      Since this is just an atomic counter, there's no need
 150  *                      for locking.
 151  */
 152 kmutex_t        pcfslock;
 153 krwlock_t       pcnodes_lock;
 154 uint32_t        pcfs_mountcount;
 155 
 156 static int pcfstype;
 157 
 158 static vfsdef_t vfw = {
 159         VFSDEF_VERSION,
 160         "pcfs",
 161         pcfsinit,
 162         VSW_HASPROTO|VSW_CANREMOUNT|VSW_STATS|VSW_CANLOFI|VSW_MOUNTDEV,
 163         &pcfs_mntopts
 164 };
 165 
 166 extern struct mod_ops mod_fsops;
 167 
 168 static struct modlfs modlfs = {
 169         &mod_fsops,
 170         "PC filesystem",
 171         &vfw
 172 };
 173 
 174 static struct modlinkage modlinkage = {
 175         MODREV_1,
 176         &modlfs,
 177         NULL
 178 };
 179 
 180 int
 181 _init(void)
 182 {
 183         int     error;
 184 
 185 #if !defined(lint)
 186         /* make sure the on-disk structures are sane */
 187         ASSERT(sizeof (struct pcdir) == 32);
 188         ASSERT(sizeof (struct pcdir_lfn) == 32);
 189 #endif
 190         mutex_init(&pcfslock, NULL, MUTEX_DEFAULT, NULL);
 191         rw_init(&pcnodes_lock, NULL, RW_DEFAULT, NULL);
 192         error = mod_install(&modlinkage);
 193         if (error) {
 194                 mutex_destroy(&pcfslock);
 195                 rw_destroy(&pcnodes_lock);
 196         }
 197         return (error);
 198 }
 199 
 200 int
 201 _fini(void)
 202 {
 203         int     error;
 204 
 205         /*
 206          * If a forcedly unmounted instance is still hanging around,
 207          * we cannot allow the module to be unloaded because that would
 208          * cause panics once the VFS framework decides it's time to call
 209          * into VFS_FREEVFS().
 210          */
 211         if (pcfs_mountcount)
 212                 return (EBUSY);
 213 
 214         error = mod_remove(&modlinkage);
 215         if (error)
 216                 return (error);
 217         mutex_destroy(&pcfslock);
 218         rw_destroy(&pcnodes_lock);
 219         /*
 220          * Tear down the operations vectors
 221          */
 222         (void) vfs_freevfsops_by_type(pcfstype);
 223         vn_freevnodeops(pcfs_fvnodeops);
 224         vn_freevnodeops(pcfs_dvnodeops);
 225         return (0);
 226 }
 227 
 228 int
 229 _info(struct modinfo *modinfop)
 230 {
 231         return (mod_info(&modlinkage, modinfop));
 232 }
 233 
 234 /* ARGSUSED1 */
 235 static int
 236 pcfsinit(int fstype, char *name)
 237 {
 238         static const fs_operation_def_t pcfs_vfsops_template[] = {
 239                 VFSNAME_MOUNT,          { .vfs_mount = pcfs_mount },
 240                 VFSNAME_UNMOUNT,        { .vfs_unmount = pcfs_unmount },
 241                 VFSNAME_ROOT,           { .vfs_root = pcfs_root },
 242                 VFSNAME_STATVFS,        { .vfs_statvfs = pcfs_statvfs },
 243                 VFSNAME_SYNC,           { .vfs_sync = pcfs_sync },
 244                 VFSNAME_VGET,           { .vfs_vget = pcfs_vget },
 245                 VFSNAME_FREEVFS,        { .vfs_freevfs = pcfs_freevfs },
 246                 NULL,                   NULL
 247         };
 248         int error;
 249 
 250         error = vfs_setfsops(fstype, pcfs_vfsops_template, NULL);
 251         if (error != 0) {
 252                 cmn_err(CE_WARN, "pcfsinit: bad vfs ops template");
 253                 return (error);
 254         }
 255 
 256         error = vn_make_ops("pcfs", pcfs_fvnodeops_template, &pcfs_fvnodeops);
 257         if (error != 0) {
 258                 (void) vfs_freevfsops_by_type(fstype);
 259                 cmn_err(CE_WARN, "pcfsinit: bad file vnode ops template");
 260                 return (error);
 261         }
 262 
 263         error = vn_make_ops("pcfsd", pcfs_dvnodeops_template, &pcfs_dvnodeops);
 264         if (error != 0) {
 265                 (void) vfs_freevfsops_by_type(fstype);
 266                 vn_freevnodeops(pcfs_fvnodeops);
 267                 cmn_err(CE_WARN, "pcfsinit: bad dir vnode ops template");
 268                 return (error);
 269         }
 270 
 271         pcfstype = fstype;
 272         (void) pc_init();
 273         pcfs_mountcount = 0;
 274         return (0);
 275 }
 276 
 277 static struct pcfs *pc_mounttab = NULL;
 278 
 279 extern struct pcfs_args pc_tz;
 280 
 281 /*
 282  *  Define some special logical drives we use internal to this file.
 283  */
 284 #define BOOT_PARTITION_DRIVE    99
 285 #define PRIMARY_DOS_DRIVE       1
 286 #define UNPARTITIONED_DRIVE     0
 287 
 288 static int
 289 pcfs_device_identify(
 290         struct vfs *vfsp,
 291         struct mounta *uap,
 292         struct cred *cr,
 293         int *dos_ldrive,
 294         dev_t *xdev)
 295 {
 296         struct pathname special;
 297         char *c;
 298         struct vnode *svp = NULL;
 299         struct vnode *lvp = NULL;
 300         int oflag, aflag;
 301         int error;
 302 
 303         /*
 304          * Resolve path name of special file being mounted.
 305          */
 306         if (error = pn_get(uap->spec, UIO_USERSPACE, &special)) {
 307                 return (error);
 308         }
 309 
 310         *dos_ldrive = -1;
 311 
 312         if (error =
 313             lookupname(special.pn_path, UIO_SYSSPACE, FOLLOW, NULLVPP, &svp)) {
 314                 /*
 315                  * If there's no device node, the name specified most likely
 316                  * maps to a PCFS-style "partition specifier" to select a
 317                  * harddisk primary/logical partition. Disable floppy-specific
 318                  * checks in such cases unless an explicit :A or :B is
 319                  * requested.
 320                  */
 321 
 322                 /*
 323                  * Split the pathname string at the last ':' separator.
 324                  * If there's no ':' in the device name, or the ':' is the
 325                  * last character in the string, the name is invalid and
 326                  * the error from the previous lookup will be returned.
 327                  */
 328                 c = strrchr(special.pn_path, ':');
 329                 if (c == NULL || strlen(c) == 0)
 330                         goto devlookup_done;
 331 
 332                 *c++ = '\0';
 333 
 334                 /*
 335                  * PCFS partition name suffixes can be:
 336                  *      - "boot" to indicate the X86BOOT partition
 337                  *      - a drive letter [c-z] for the "DOS logical drive"
 338                  *      - a drive number 1..24 for the "DOS logical drive"
 339                  *      - a "floppy name letter", 'a' or 'b' (just strip this)
 340                  */
 341                 if (strcasecmp(c, "boot") == 0) {
 342                         /*
 343                          * The Solaris boot partition is requested.
 344                          */
 345                         *dos_ldrive = BOOT_PARTITION_DRIVE;
 346                 } else if (strspn(c, "0123456789") == strlen(c)) {
 347                         /*
 348                          * All digits - parse the partition number.
 349                          */
 350                         long drvnum = 0;
 351 
 352                         if ((error = ddi_strtol(c, NULL, 10, &drvnum)) == 0) {
 353                                 /*
 354                                  * A number alright - in the allowed range ?
 355                                  */
 356                                 if (drvnum > 24 || drvnum == 0)
 357                                         error = ENXIO;
 358                         }
 359                         if (error)
 360                                 goto devlookup_done;
 361                         *dos_ldrive = (int)drvnum;
 362                 } else if (strlen(c) == 1) {
 363                         /*
 364                          * A single trailing character was specified.
 365                          *      - [c-zC-Z] means a harddisk partition, and
 366                          *        we retrieve the partition number.
 367                          *      - [abAB] means a floppy drive, so we swallow
 368                          *        the "drive specifier" and test later
 369                          *        whether the physical device is a floppy.
 370                          */
 371                         *c = tolower(*c);
 372                         if (*c == 'a' || *c == 'b') {
 373                                 *dos_ldrive = UNPARTITIONED_DRIVE;
 374                         } else if (*c < 'c' || *c > 'z') {
 375                                 error = ENXIO;
 376                                 goto devlookup_done;
 377                         } else {
 378                                 *dos_ldrive = 1 + *c - 'c';
 379                         }
 380                 } else {
 381                         /*
 382                          * Can't parse this - pass through previous error.
 383                          */
 384                         goto devlookup_done;
 385                 }
 386 
 387 
 388                 error = lookupname(special.pn_path, UIO_SYSSPACE, FOLLOW,
 389                     NULLVPP, &svp);
 390         } else {
 391                 *dos_ldrive = UNPARTITIONED_DRIVE;
 392         }
 393 devlookup_done:
 394         pn_free(&special);
 395         if (error)
 396                 return (error);
 397 
 398         ASSERT(*dos_ldrive >= UNPARTITIONED_DRIVE);
 399 
 400         /*
 401          * Verify caller's permission to open the device special file.
 402          */
 403         if ((vfsp->vfs_flag & VFS_RDONLY) != 0 ||
 404             ((uap->flags & MS_RDONLY) != 0)) {
 405                 oflag = FREAD;
 406                 aflag = VREAD;
 407         } else {
 408                 oflag = FREAD | FWRITE;
 409                 aflag = VREAD | VWRITE;
 410         }
 411 
 412         error = vfs_get_lofi(vfsp, &lvp);
 413 
 414         if (error > 0) {
 415                 if (error == ENOENT)
 416                         error = ENODEV;
 417                 goto out;
 418         } else if (error == 0) {
 419                 *xdev = lvp->v_rdev;
 420         } else {
 421                 *xdev = svp->v_rdev;
 422 
 423                 if (svp->v_type != VBLK) {
 424                         error = ENOTBLK;
 425                         goto out;
 426                 }
 427 
 428                 if ((error = secpolicy_spec_open(cr, svp, oflag)) != 0)
 429                         goto out;
 430         }
 431 
 432         if (getmajor(*xdev) >= devcnt) {
 433                 error = ENXIO;
 434                 goto out;
 435         }
 436 
 437         if ((error = VOP_ACCESS(svp, aflag, 0, cr, NULL)) != 0)
 438                 goto out;
 439 
 440 out:
 441         if (svp != NULL)
 442                 VN_RELE(svp);
 443         if (lvp != NULL)
 444                 VN_RELE(lvp);
 445         return (error);
 446 }
 447 
 448 static int
 449 pcfs_device_ismounted(
 450         struct vfs *vfsp,
 451         int dos_ldrive,
 452         dev_t xdev,
 453         int *remounting,
 454         dev_t *pseudodev)
 455 {
 456         struct pcfs *fsp;
 457         int remount = *remounting;
 458 
 459         /*
 460          * Ensure that this logical drive isn't already mounted, unless
 461          * this is a REMOUNT request.
 462          * Note: The framework will perform this check if the "...:c"
 463          * PCFS-style "logical drive" syntax has not been used and an
 464          * actually existing physical device is backing this filesystem.
 465          * Once all block device drivers support PC-style partitioning,
 466          * this codeblock can be dropped.
 467          */
 468         *pseudodev = xdev;
 469 
 470         if (dos_ldrive) {
 471                 mutex_enter(&pcfslock);
 472                 for (fsp = pc_mounttab; fsp; fsp = fsp->pcfs_nxt)
 473                         if (fsp->pcfs_xdev == xdev &&
 474                             fsp->pcfs_ldrive == dos_ldrive) {
 475                                 mutex_exit(&pcfslock);
 476                                 if (remount) {
 477                                         return (0);
 478                                 } else {
 479                                         return (EBUSY);
 480                                 }
 481                         }
 482                 /*
 483                  * Assign a unique device number for the vfs
 484                  * The old way (getudev() + a constantly incrementing
 485                  * major number) was wrong because it changes vfs_dev
 486                  * across mounts and reboots, which breaks nfs file handles.
 487                  * UFS just uses the real dev_t. We can't do that because
 488                  * of the way pcfs opens fdisk partitons (the :c and :d
 489                  * partitions are on the same dev_t). Though that _might_
 490                  * actually be ok, since the file handle contains an
 491                  * absolute block number, it's probably better to make them
 492                  * different. So I think we should retain the original
 493                  * dev_t, but come up with a different minor number based
 494                  * on the logical drive that will _always_ come up the same.
 495                  * For now, we steal the upper 6 bits.
 496                  */
 497 #ifdef notdef
 498                 /* what should we do here? */
 499                 if (((getminor(xdev) >> 12) & 0x3F) != 0)
 500                         printf("whoops - upper bits used!\n");
 501 #endif
 502                 *pseudodev = makedevice(getmajor(xdev),
 503                     ((dos_ldrive << 12) | getminor(xdev)) & MAXMIN32);
 504                 if (vfs_devmounting(*pseudodev, vfsp)) {
 505                         mutex_exit(&pcfslock);
 506                         return (EBUSY);
 507                 }
 508                 if (vfs_devismounted(*pseudodev)) {
 509                         mutex_exit(&pcfslock);
 510                         if (remount) {
 511                                 return (0);
 512                         } else {
 513                                 return (EBUSY);
 514                         }
 515                 }
 516                 mutex_exit(&pcfslock);
 517         } else {
 518                 *pseudodev = xdev;
 519                 if (vfs_devmounting(*pseudodev, vfsp)) {
 520                         return (EBUSY);
 521                 }
 522                 if (vfs_devismounted(*pseudodev))
 523                         if (remount) {
 524                                 return (0);
 525                         } else {
 526                                 return (EBUSY);
 527                         }
 528         }
 529 
 530         /*
 531          * This is not a remount. Even if MS_REMOUNT was requested,
 532          * the caller needs to proceed as it would on an ordinary
 533          * mount.
 534          */
 535         *remounting = 0;
 536 
 537         ASSERT(*pseudodev);
 538         return (0);
 539 }
 540 
 541 /*
 542  * Get the PCFS-specific mount options from the VFS framework.
 543  * For "timezone" and "secsize", we need to parse the number
 544  * ourselves and ensure its validity.
 545  * Note: "secsize" is deliberately undocumented at this time,
 546  * it's a workaround for devices (particularly: lofi image files)
 547  * that don't support the DKIOCGMEDIAINFO ioctl for autodetection.
 548  */
 549 static void
 550 pcfs_parse_mntopts(struct pcfs *fsp)
 551 {
 552         char *c;
 553         char *endptr;
 554         long l;
 555         struct vfs *vfsp = fsp->pcfs_vfs;
 556 
 557         ASSERT(fsp->pcfs_secondswest == 0);
 558         ASSERT(fsp->pcfs_secsize == 0);
 559 
 560         if (vfs_optionisset(vfsp, MNTOPT_PCFS_HIDDEN, NULL))
 561                 fsp->pcfs_flags |= PCFS_HIDDEN;
 562         if (vfs_optionisset(vfsp, MNTOPT_PCFS_FOLDCASE, NULL))
 563                 fsp->pcfs_flags |= PCFS_FOLDCASE;
 564         if (vfs_optionisset(vfsp, MNTOPT_PCFS_NOCLAMPTIME, NULL))
 565                 fsp->pcfs_flags |= PCFS_NOCLAMPTIME;
 566         if (vfs_optionisset(vfsp, MNTOPT_NOATIME, NULL))
 567                 fsp->pcfs_flags |= PCFS_NOATIME;
 568 
 569         if (vfs_optionisset(vfsp, MNTOPT_PCFS_TIMEZONE, &c)) {
 570                 if (ddi_strtol(c, &endptr, 10, &l) == 0 &&
 571                     endptr == c + strlen(c)) {
 572                         /*
 573                          * A number alright - in the allowed range ?
 574                          */
 575                         if (l <= -12*3600 || l >= 12*3600) {
 576                                 cmn_err(CE_WARN, "!pcfs: invalid use of "
 577                                     "'timezone' mount option - %ld "
 578                                     "is out of range. Assuming 0.", l);
 579                                 l = 0;
 580                         }
 581                 } else {
 582                         cmn_err(CE_WARN, "!pcfs: invalid use of "
 583                             "'timezone' mount option - argument %s "
 584                             "is not a valid number. Assuming 0.", c);
 585                         l = 0;
 586                 }
 587                 fsp->pcfs_secondswest = l;
 588         }
 589 
 590         /*
 591          * The "secsize=..." mount option is a workaround for the lack of
 592          * lofi(7d) support for DKIOCGMEDIAINFO. If PCFS wants to parse the
 593          * partition table of a disk image and it has been partitioned with
 594          * sector sizes other than 512 bytes, we'd fail on loopback'ed disk
 595          * images.
 596          * That should really be fixed in lofi ... this is a workaround.
 597          */
 598         if (vfs_optionisset(vfsp, MNTOPT_PCFS_SECSIZE, &c)) {
 599                 if (ddi_strtol(c, &endptr, 10, &l) == 0 &&
 600                     endptr == c + strlen(c)) {
 601                         /*
 602                          * A number alright - a valid sector size as well ?
 603                          */
 604                         if (!VALID_SECSIZE(l)) {
 605                                 cmn_err(CE_WARN, "!pcfs: invalid use of "
 606                                     "'secsize' mount option - %ld is "
 607                                     "unsupported. Autodetecting.", l);
 608                                 l = 0;
 609                         }
 610                 } else {
 611                         cmn_err(CE_WARN, "!pcfs: invalid use of "
 612                             "'secsize' mount option - argument %s "
 613                             "is not a valid number. Autodetecting.", c);
 614                         l = 0;
 615                 }
 616                 fsp->pcfs_secsize = l;
 617                 fsp->pcfs_sdshift = ddi_ffs(l / DEV_BSIZE) - 1;
 618         }
 619 }
 620 
 621 /*
 622  * vfs operations
 623  */
 624 
 625 /*
 626  * pcfs_mount - backend for VFS_MOUNT() on PCFS.
 627  */
 628 static int
 629 pcfs_mount(
 630         struct vfs *vfsp,
 631         struct vnode *mvp,
 632         struct mounta *uap,
 633         struct cred *cr)
 634 {
 635         struct pcfs *fsp;
 636         struct vnode *devvp;
 637         dev_t pseudodev;
 638         dev_t xdev;
 639         int dos_ldrive = 0;
 640         int error;
 641         int remounting;
 642 
 643         if ((error = secpolicy_fs_mount(cr, mvp, vfsp)) != 0)
 644                 return (error);
 645 
 646         if (mvp->v_type != VDIR)
 647                 return (ENOTDIR);
 648 
 649         mutex_enter(&mvp->v_lock);
 650         if ((uap->flags & MS_REMOUNT) == 0 &&
 651             (uap->flags & MS_OVERLAY) == 0 &&
 652             (mvp->v_count != 1 || (mvp->v_flag & VROOT))) {
 653                 mutex_exit(&mvp->v_lock);
 654                 return (EBUSY);
 655         }
 656         mutex_exit(&mvp->v_lock);
 657 
 658         /*
 659          * PCFS doesn't do mount arguments anymore - everything's a mount
 660          * option these days. In order not to break existing callers, we
 661          * don't reject it yet, just warn that the data (if any) is ignored.
 662          */
 663         if (uap->datalen != 0)
 664                 cmn_err(CE_WARN, "!pcfs: deprecated use of mount(2) with "
 665                     "mount argument structures instead of mount options. "
 666                     "Ignoring mount(2) 'dataptr' argument.");
 667 
 668         /*
 669          * This is needed early, to make sure the access / open calls
 670          * are done using the correct mode. Processing this mount option
 671          * only when calling pcfs_parse_mntopts() would lead us to attempt
 672          * a read/write access to a possibly writeprotected device, and
 673          * a readonly mount attempt might fail because of that.
 674          */
 675         if (uap->flags & MS_RDONLY) {
 676                 vfsp->vfs_flag |= VFS_RDONLY;
 677                 vfs_setmntopt(vfsp, MNTOPT_RO, NULL, 0);
 678         }
 679 
 680         /*
 681          * For most filesystems, this is just a lookupname() on the
 682          * mount pathname string. PCFS historically has to do its own
 683          * partition table parsing because not all Solaris architectures
 684          * support all styles of partitioning that PC media can have, and
 685          * hence PCFS understands "device names" that don't map to actual
 686          * physical device nodes. Parsing the "PCFS syntax" for device
 687          * names is done in pcfs_device_identify() - see there.
 688          *
 689          * Once all block device drivers that can host FAT filesystems have
 690          * been enhanced to create device nodes for all PC-style partitions,
 691          * this code can go away.
 692          */
 693         if (error = pcfs_device_identify(vfsp, uap, cr, &dos_ldrive, &xdev))
 694                 return (error);
 695 
 696         /*
 697          * As with looking up the actual device to mount, PCFS cannot rely
 698          * on just the checks done by vfs_ismounted() whether a given device
 699          * is mounted already. The additional check against the "PCFS syntax"
 700          * is done in  pcfs_device_ismounted().
 701          */
 702         remounting = (uap->flags & MS_REMOUNT);
 703 
 704         if (error = pcfs_device_ismounted(vfsp, dos_ldrive, xdev, &remounting,
 705             &pseudodev))
 706                 return (error);
 707 
 708         if (remounting)
 709                 return (0);
 710 
 711         /*
 712          * Mount the filesystem.
 713          * An instance structure is required before the attempt to locate
 714          * and parse the FAT BPB. This is because mount options may change
 715          * the behaviour of the filesystem type matching code. Precreate
 716          * it and fill it in to a degree that allows parsing the mount
 717          * options.
 718          */
 719         devvp = makespecvp(xdev, VBLK);
 720         if (IS_SWAPVP(devvp)) {
 721                 VN_RELE(devvp);
 722                 return (EBUSY);
 723         }
 724         error = VOP_OPEN(&devvp,
 725             (vfsp->vfs_flag & VFS_RDONLY) ? FREAD : FREAD | FWRITE, cr, NULL);
 726         if (error) {
 727                 VN_RELE(devvp);
 728                 return (error);
 729         }
 730 
 731         fsp = kmem_zalloc(sizeof (*fsp), KM_SLEEP);
 732         fsp->pcfs_vfs = vfsp;
 733         fsp->pcfs_xdev = xdev;
 734         fsp->pcfs_devvp = devvp;
 735         fsp->pcfs_ldrive = dos_ldrive;
 736         mutex_init(&fsp->pcfs_lock, NULL, MUTEX_DEFAULT, NULL);
 737 
 738         pcfs_parse_mntopts(fsp);
 739 
 740         /*
 741          * This is the actual "mount" - the PCFS superblock check.
 742          *
 743          * Find the requested logical drive and the FAT BPB therein.
 744          * Check device type and flag the instance if media is removeable.
 745          *
 746          * Initializes most members of the filesystem instance structure.
 747          * Returns EINVAL if no valid BPB can be found. Other errors may
 748          * occur after I/O failures, or when invalid / unparseable partition
 749          * tables are encountered.
 750          */
 751         if (error = pc_getfattype(fsp))
 752                 goto errout;
 753 
 754         /*
 755          * Now that the BPB has been parsed, this structural information
 756          * is available and known to be valid. Initialize the VFS.
 757          */
 758         vfsp->vfs_data = fsp;
 759         vfsp->vfs_dev = pseudodev;
 760         vfsp->vfs_fstype = pcfstype;
 761         vfs_make_fsid(&vfsp->vfs_fsid, pseudodev, pcfstype);
 762         vfsp->vfs_bcount = 0;
 763         vfsp->vfs_bsize = fsp->pcfs_clsize;
 764 
 765         /*
 766          * Validate that we can access the FAT and that it is, to the
 767          * degree we can verify here, self-consistent.
 768          */
 769         if (error = pc_verify(fsp))
 770                 goto errout;
 771 
 772         /*
 773          * Record the time of the mount, to return as an "approximate"
 774          * timestamp for the FAT root directory. Since FAT roots don't
 775          * have timestamps, this is less confusing to the user than
 776          * claiming "zero" / Jan/01/1970.
 777          */
 778         gethrestime(&fsp->pcfs_mounttime);
 779 
 780         /*
 781          * Fix up the mount options. Because "noatime" is made default on
 782          * removeable media only, a fixed disk will have neither "atime"
 783          * nor "noatime" set. We set the options explicitly depending on
 784          * the PCFS_NOATIME flag, to inform the user of what applies.
 785          * Mount option cancellation will take care that the mutually
 786          * exclusive 'other' is cleared.
 787          */
 788         vfs_setmntopt(vfsp,
 789             fsp->pcfs_flags & PCFS_NOATIME ? MNTOPT_NOATIME : MNTOPT_ATIME,
 790             NULL, 0);
 791 
 792         /*
 793          * All clear - insert the FS instance into PCFS' list.
 794          */
 795         mutex_enter(&pcfslock);
 796         fsp->pcfs_nxt = pc_mounttab;
 797         pc_mounttab = fsp;
 798         mutex_exit(&pcfslock);
 799         atomic_inc_32(&pcfs_mountcount);
 800         return (0);
 801 
 802 errout:
 803         (void) VOP_CLOSE(devvp,
 804             vfsp->vfs_flag & VFS_RDONLY ? FREAD : FREAD | FWRITE,
 805             1, (offset_t)0, cr, NULL);
 806         VN_RELE(devvp);
 807         mutex_destroy(&fsp->pcfs_lock);
 808         kmem_free(fsp, sizeof (*fsp));
 809         return (error);
 810 
 811 }
 812 
 813 static int
 814 pcfs_unmount(
 815         struct vfs *vfsp,
 816         int flag,
 817         struct cred *cr)
 818 {
 819         struct pcfs *fsp, *fsp1;
 820 
 821         if (secpolicy_fs_unmount(cr, vfsp) != 0)
 822                 return (EPERM);
 823 
 824         fsp = VFSTOPCFS(vfsp);
 825 
 826         /*
 827          * We don't have to lock fsp because the VVFSLOCK in vfs layer will
 828          * prevent lookuppn from crossing the mount point.
 829          * If this is not a forced umount request and there's ongoing I/O,
 830          * don't allow the mount to proceed.
 831          */
 832         if (flag & MS_FORCE)
 833                 vfsp->vfs_flag |= VFS_UNMOUNTED;
 834         else if (fsp->pcfs_nrefs)
 835                 return (EBUSY);
 836 
 837         mutex_enter(&pcfslock);
 838 
 839         /*
 840          * If this is a forced umount request or if the fs instance has
 841          * been marked as beyond recovery, allow the umount to proceed
 842          * regardless of state. pc_diskchanged() forcibly releases all
 843          * inactive vnodes/pcnodes.
 844          */
 845         if (flag & MS_FORCE || fsp->pcfs_flags & PCFS_IRRECOV) {
 846                 rw_enter(&pcnodes_lock, RW_WRITER);
 847                 pc_diskchanged(fsp);
 848                 rw_exit(&pcnodes_lock);
 849         }
 850 
 851         /* now there should be no pcp node on pcfhead or pcdhead. */
 852 
 853         if (fsp == pc_mounttab) {
 854                 pc_mounttab = fsp->pcfs_nxt;
 855         } else {
 856                 for (fsp1 = pc_mounttab; fsp1 != NULL; fsp1 = fsp1->pcfs_nxt)
 857                         if (fsp1->pcfs_nxt == fsp)
 858                                 fsp1->pcfs_nxt = fsp->pcfs_nxt;
 859         }
 860 
 861         mutex_exit(&pcfslock);
 862 
 863         /*
 864          * Since we support VFS_FREEVFS(), there's no need to
 865          * free the fsp right now. The framework will tell us
 866          * when the right time to do so has arrived by calling
 867          * into pcfs_freevfs.
 868          */
 869         return (0);
 870 }
 871 
 872 /*
 873  * find root of pcfs
 874  */
 875 static int
 876 pcfs_root(
 877         struct vfs *vfsp,
 878         struct vnode **vpp)
 879 {
 880         struct pcfs *fsp;
 881         struct pcnode *pcp;
 882         int error;
 883 
 884         fsp = VFSTOPCFS(vfsp);
 885         if (error = pc_lockfs(fsp, 0, 0))
 886                 return (error);
 887 
 888         pcp = pc_getnode(fsp, (daddr_t)0, 0, (struct pcdir *)0);
 889         pc_unlockfs(fsp);
 890         *vpp = PCTOV(pcp);
 891         pcp->pc_flags |= PC_EXTERNAL;
 892         return (0);
 893 }
 894 
 895 /*
 896  * Get file system statistics.
 897  */
 898 static int
 899 pcfs_statvfs(
 900         struct vfs *vfsp,
 901         struct statvfs64 *sp)
 902 {
 903         struct pcfs *fsp;
 904         int error;
 905         dev32_t d32;
 906 
 907         fsp = VFSTOPCFS(vfsp);
 908         error = pc_getfat(fsp);
 909         if (error)
 910                 return (error);
 911         bzero(sp, sizeof (*sp));
 912         sp->f_bsize = sp->f_frsize = fsp->pcfs_clsize;
 913         sp->f_blocks = (fsblkcnt64_t)fsp->pcfs_ncluster;
 914         sp->f_bavail = sp->f_bfree = (fsblkcnt64_t)pc_freeclusters(fsp);
 915         sp->f_files = (fsfilcnt64_t)-1;
 916         sp->f_ffree = (fsfilcnt64_t)-1;
 917         sp->f_favail = (fsfilcnt64_t)-1;
 918 #ifdef notdef
 919         (void) cmpldev(&d32, fsp->pcfs_devvp->v_rdev);
 920 #endif /* notdef */
 921         (void) cmpldev(&d32, vfsp->vfs_dev);
 922         sp->f_fsid = d32;
 923         (void) strcpy(sp->f_basetype, vfssw[vfsp->vfs_fstype].vsw_name);
 924         sp->f_flag = vf_to_stf(vfsp->vfs_flag);
 925         sp->f_namemax = PCMAXNAMLEN;
 926         return (0);
 927 }
 928 
 929 static int
 930 pc_syncfsnodes(struct pcfs *fsp)
 931 {
 932         struct pchead *hp;
 933         struct pcnode *pcp;
 934         int error;
 935 
 936         if (error = pc_lockfs(fsp, 0, 0))
 937                 return (error);
 938 
 939         if (!(error = pc_syncfat(fsp))) {
 940                 hp = pcfhead;
 941                 while (hp < & pcfhead [ NPCHASH ]) {
 942                         rw_enter(&pcnodes_lock, RW_READER);
 943                         pcp = hp->pch_forw;
 944                         while (pcp != (struct pcnode *)hp) {
 945                                 if (VFSTOPCFS(PCTOV(pcp) -> v_vfsp) == fsp)
 946                                         if (error = pc_nodesync(pcp))
 947                                                 break;
 948                                 pcp = pcp -> pc_forw;
 949                         }
 950                         rw_exit(&pcnodes_lock);
 951                         if (error)
 952                                 break;
 953                         hp++;
 954                 }
 955         }
 956         pc_unlockfs(fsp);
 957         return (error);
 958 }
 959 
 960 /*
 961  * Flush any pending I/O.
 962  */
 963 /*ARGSUSED*/
 964 static int
 965 pcfs_sync(
 966         struct vfs *vfsp,
 967         short flag,
 968         struct cred *cr)
 969 {
 970         struct pcfs *fsp;
 971         int error = 0;
 972 
 973         /* this prevents the filesystem from being umounted. */
 974         mutex_enter(&pcfslock);
 975         if (vfsp != NULL) {
 976                 fsp = VFSTOPCFS(vfsp);
 977                 if (!(fsp->pcfs_flags & PCFS_IRRECOV)) {
 978                         error = pc_syncfsnodes(fsp);
 979                 } else {
 980                         rw_enter(&pcnodes_lock, RW_WRITER);
 981                         pc_diskchanged(fsp);
 982                         rw_exit(&pcnodes_lock);
 983                         error = EIO;
 984                 }
 985         } else {
 986                 fsp = pc_mounttab;
 987                 while (fsp != NULL) {
 988                         if (fsp->pcfs_flags & PCFS_IRRECOV) {
 989                                 rw_enter(&pcnodes_lock, RW_WRITER);
 990                                 pc_diskchanged(fsp);
 991                                 rw_exit(&pcnodes_lock);
 992                                 error = EIO;
 993                                 break;
 994                         }
 995                         error = pc_syncfsnodes(fsp);
 996                         if (error) break;
 997                         fsp = fsp->pcfs_nxt;
 998                 }
 999         }
1000         mutex_exit(&pcfslock);
1001         return (error);
1002 }
1003 
1004 int
1005 pc_lockfs(struct pcfs *fsp, int diskchanged, int releasing)
1006 {
1007         int err;
1008 
1009         if ((fsp->pcfs_flags & PCFS_IRRECOV) && !releasing)
1010                 return (EIO);
1011 
1012         if ((fsp->pcfs_flags & PCFS_LOCKED) && (fsp->pcfs_owner == curthread)) {
1013                 fsp->pcfs_count++;
1014         } else {
1015                 mutex_enter(&fsp->pcfs_lock);
1016                 if (fsp->pcfs_flags & PCFS_LOCKED)
1017                         panic("pc_lockfs");
1018                 /*
1019                  * We check the IRRECOV bit again just in case somebody
1020                  * snuck past the initial check but then got held up before
1021                  * they could grab the lock.  (And in the meantime someone
1022                  * had grabbed the lock and set the bit)
1023                  */
1024                 if (!diskchanged && !(fsp->pcfs_flags & PCFS_IRRECOV)) {
1025                         if ((err = pc_getfat(fsp))) {
1026                                 mutex_exit(&fsp->pcfs_lock);
1027                                 return (err);
1028                         }
1029                 }
1030                 fsp->pcfs_flags |= PCFS_LOCKED;
1031                 fsp->pcfs_owner = curthread;
1032                 fsp->pcfs_count++;
1033         }
1034         return (0);
1035 }
1036 
1037 void
1038 pc_unlockfs(struct pcfs *fsp)
1039 {
1040 
1041         if ((fsp->pcfs_flags & PCFS_LOCKED) == 0)
1042                 panic("pc_unlockfs");
1043         if (--fsp->pcfs_count < 0)
1044                 panic("pc_unlockfs: count");
1045         if (fsp->pcfs_count == 0) {
1046                 fsp->pcfs_flags &= ~PCFS_LOCKED;
1047                 fsp->pcfs_owner = 0;
1048                 mutex_exit(&fsp->pcfs_lock);
1049         }
1050 }
1051 
1052 int
1053 pc_syncfat(struct pcfs *fsp)
1054 {
1055         struct buf *bp;
1056         int nfat;
1057         int     error = 0;
1058         struct fat_od_fsi *fsinfo_disk;
1059 
1060         if ((fsp->pcfs_fatp == (uchar_t *)0) ||
1061             !(fsp->pcfs_flags & PCFS_FATMOD))
1062                 return (0);
1063         /*
1064          * write out all copies of FATs
1065          */
1066         fsp->pcfs_flags &= ~PCFS_FATMOD;
1067         fsp->pcfs_fattime = gethrestime_sec() + PCFS_DISKTIMEOUT;
1068         for (nfat = 0; nfat < fsp->pcfs_numfat; nfat++) {
1069                 error = pc_writefat(fsp, pc_dbdaddr(fsp,
1070                     fsp->pcfs_fatstart + nfat * fsp->pcfs_fatsec));
1071                 if (error) {
1072                         pc_mark_irrecov(fsp);
1073                         return (EIO);
1074                 }
1075         }
1076         pc_clear_fatchanges(fsp);
1077 
1078         /*
1079          * Write out fsinfo sector.
1080          */
1081         if (IS_FAT32(fsp)) {
1082                 bp = bread(fsp->pcfs_xdev,
1083                     pc_dbdaddr(fsp, fsp->pcfs_fsistart), fsp->pcfs_secsize);
1084                 if (bp->b_flags & (B_ERROR | B_STALE)) {
1085                         error = geterror(bp);
1086                 }
1087                 fsinfo_disk = (fat_od_fsi_t *)(bp->b_un.b_addr);
1088                 if (!error && FSISIG_OK(fsinfo_disk)) {
1089                         fsinfo_disk->fsi_incore.fs_free_clusters =
1090                             LE_32(fsp->pcfs_fsinfo.fs_free_clusters);
1091                         fsinfo_disk->fsi_incore.fs_next_free =
1092                             LE_32(FSINFO_UNKNOWN);
1093                         bwrite2(bp);
1094                         error = geterror(bp);
1095                 }
1096                 brelse(bp);
1097                 if (error) {
1098                         pc_mark_irrecov(fsp);
1099                         return (EIO);
1100                 }
1101         }
1102         return (0);
1103 }
1104 
1105 void
1106 pc_invalfat(struct pcfs *fsp)
1107 {
1108         struct pcfs *xfsp;
1109         int mount_cnt = 0;
1110 
1111         if (fsp->pcfs_fatp == (uchar_t *)0)
1112                 panic("pc_invalfat");
1113         /*
1114          * Release FAT
1115          */
1116         kmem_free(fsp->pcfs_fatp, fsp->pcfs_fatsec * fsp->pcfs_secsize);
1117         fsp->pcfs_fatp = NULL;
1118         kmem_free(fsp->pcfs_fat_changemap, fsp->pcfs_fat_changemapsize);
1119         fsp->pcfs_fat_changemap = NULL;
1120         /*
1121          * Invalidate all the blocks associated with the device.
1122          * Not needed if stateless.
1123          */
1124         for (xfsp = pc_mounttab; xfsp; xfsp = xfsp->pcfs_nxt)
1125                 if (xfsp != fsp && xfsp->pcfs_xdev == fsp->pcfs_xdev)
1126                         mount_cnt++;
1127 
1128         if (!mount_cnt)
1129                 binval(fsp->pcfs_xdev);
1130         /*
1131          * close mounted device
1132          */
1133         (void) VOP_CLOSE(fsp->pcfs_devvp,
1134             (PCFSTOVFS(fsp)->vfs_flag & VFS_RDONLY) ? FREAD : FREAD|FWRITE,
1135             1, (offset_t)0, CRED(), NULL);
1136 }
1137 
1138 void
1139 pc_badfs(struct pcfs *fsp)
1140 {
1141         cmn_err(CE_WARN, "corrupted PC file system on dev (%x.%x):%d\n",
1142             getmajor(fsp->pcfs_devvp->v_rdev),
1143             getminor(fsp->pcfs_devvp->v_rdev), fsp->pcfs_ldrive);
1144 }
1145 
1146 /*
1147  * The problem with supporting NFS on the PCFS filesystem is that there
1148  * is no good place to keep the generation number. The only possible
1149  * place is inside a directory entry. There are a few words that we
1150  * don't use - they store NT & OS/2 attributes, and the creation/last access
1151  * time of the file - but it seems wrong to use them. In addition, directory
1152  * entries come and go. If a directory is removed completely, its directory
1153  * blocks are freed and the generation numbers are lost. Whereas in ufs,
1154  * inode blocks are dedicated for inodes, so the generation numbers are
1155  * permanently kept on the disk.
1156  */
1157 static int
1158 pcfs_vget(struct vfs *vfsp, struct vnode **vpp, struct fid *fidp)
1159 {
1160         struct pcnode *pcp;
1161         struct pc_fid *pcfid;
1162         struct pcfs *fsp;
1163         struct pcdir *ep;
1164         daddr_t eblkno;
1165         int eoffset;
1166         struct buf *bp;
1167         int error;
1168         pc_cluster32_t  cn;
1169 
1170         pcfid = (struct pc_fid *)fidp;
1171         fsp = VFSTOPCFS(vfsp);
1172 
1173         error = pc_lockfs(fsp, 0, 0);
1174         if (error) {
1175                 *vpp = NULL;
1176                 return (error);
1177         }
1178 
1179         if (pcfid->pcfid_block == 0) {
1180                 pcp = pc_getnode(fsp, (daddr_t)0, 0, (struct pcdir *)0);
1181                 pcp->pc_flags |= PC_EXTERNAL;
1182                 *vpp = PCTOV(pcp);
1183                 pc_unlockfs(fsp);
1184                 return (0);
1185         }
1186         eblkno = pcfid->pcfid_block;
1187         eoffset = pcfid->pcfid_offset;
1188 
1189         if ((pc_dbtocl(fsp,
1190             eblkno - fsp->pcfs_dosstart) >= fsp->pcfs_ncluster) ||
1191             (eoffset > fsp->pcfs_clsize)) {
1192                 pc_unlockfs(fsp);
1193                 *vpp = NULL;
1194                 return (EINVAL);
1195         }
1196 
1197         if (eblkno >= fsp->pcfs_datastart || (eblkno - fsp->pcfs_rdirstart)
1198             < (fsp->pcfs_rdirsec & ~(fsp->pcfs_spcl - 1))) {
1199                 bp = bread(fsp->pcfs_xdev, pc_dbdaddr(fsp, eblkno),
1200                     fsp->pcfs_clsize);
1201         } else {
1202                 /*
1203                  * This is an access "backwards" into the FAT12/FAT16
1204                  * root directory. A better code structure would
1205                  * significantly improve maintainability here ...
1206                  */
1207                 bp = bread(fsp->pcfs_xdev, pc_dbdaddr(fsp, eblkno),
1208                     (int)(fsp->pcfs_datastart - eblkno) * fsp->pcfs_secsize);
1209         }
1210         if (bp->b_flags & (B_ERROR | B_STALE)) {
1211                 error = geterror(bp);
1212                 brelse(bp);
1213                 if (error)
1214                         pc_mark_irrecov(fsp);
1215                 *vpp = NULL;
1216                 pc_unlockfs(fsp);
1217                 return (error);
1218         }
1219         ep = (struct pcdir *)(bp->b_un.b_addr + eoffset);
1220         /*
1221          * Ok, if this is a valid file handle that we gave out,
1222          * then simply ensuring that the creation time matches,
1223          * the entry has not been deleted, and it has a valid first
1224          * character should be enough.
1225          *
1226          * Unfortunately, verifying that the <blkno, offset> _still_
1227          * refers to a directory entry is not easy, since we'd have
1228          * to search _all_ directories starting from root to find it.
1229          * That's a high price to pay just in case somebody is forging
1230          * file handles. So instead we verify that as much of the
1231          * entry is valid as we can:
1232          *
1233          * 1. The starting cluster is 0 (unallocated) or valid
1234          * 2. It is not an LFN entry
1235          * 3. It is not hidden (unless mounted as such)
1236          * 4. It is not the label
1237          */
1238         cn = pc_getstartcluster(fsp, ep);
1239         /*
1240          * if the starting cluster is valid, but not valid according
1241          * to pc_validcl(), force it to be to simplify the following if.
1242          */
1243         if (cn == 0)
1244                 cn = PCF_FIRSTCLUSTER;
1245         if (IS_FAT32(fsp)) {
1246                 if (cn >= PCF_LASTCLUSTER32)
1247                         cn = PCF_FIRSTCLUSTER;
1248         } else {
1249                 if (cn >= PCF_LASTCLUSTER)
1250                         cn = PCF_FIRSTCLUSTER;
1251         }
1252         if ((!pc_validcl(fsp, cn)) ||
1253             (PCDL_IS_LFN(ep)) ||
1254             (PCA_IS_HIDDEN(fsp, ep->pcd_attr)) ||
1255             ((ep->pcd_attr & PCA_LABEL) == PCA_LABEL)) {
1256                 bp->b_flags |= B_STALE | B_AGE;
1257                 brelse(bp);
1258                 pc_unlockfs(fsp);
1259                 return (EINVAL);
1260         }
1261         if ((ep->pcd_crtime.pct_time == pcfid->pcfid_ctime) &&
1262             (ep->pcd_filename[0] != PCD_ERASED) &&
1263             (pc_validchar(ep->pcd_filename[0]) ||
1264             (ep->pcd_filename[0] == '.' && ep->pcd_filename[1] == '.'))) {
1265                 pcp = pc_getnode(fsp, eblkno, eoffset, ep);
1266                 pcp->pc_flags |= PC_EXTERNAL;
1267                 *vpp = PCTOV(pcp);
1268         } else {
1269                 *vpp = NULL;
1270         }
1271         bp->b_flags |= B_STALE | B_AGE;
1272         brelse(bp);
1273         pc_unlockfs(fsp);
1274         return (0);
1275 }
1276 
1277 /*
1278  * Unfortunately, FAT32 fat's can be pretty big (On a 1 gig jaz drive, about
1279  * a meg), so we can't bread() it all in at once. This routine reads a
1280  * fat a chunk at a time.
1281  */
1282 static int
1283 pc_readfat(struct pcfs *fsp, uchar_t *fatp)
1284 {
1285         struct buf *bp;
1286         size_t off;
1287         size_t readsize;
1288         daddr_t diskblk;
1289         size_t fatsize = fsp->pcfs_fatsec * fsp->pcfs_secsize;
1290         daddr_t start = fsp->pcfs_fatstart;
1291 
1292         readsize = fsp->pcfs_clsize;
1293         for (off = 0; off < fatsize; off += readsize, fatp += readsize) {
1294                 if (readsize > (fatsize - off))
1295                         readsize = fatsize - off;
1296                 diskblk = pc_dbdaddr(fsp, start +
1297                     pc_cltodb(fsp, pc_lblkno(fsp, off)));
1298                 bp = bread(fsp->pcfs_xdev, diskblk, readsize);
1299                 if (bp->b_flags & (B_ERROR | B_STALE)) {
1300                         brelse(bp);
1301                         return (EIO);
1302                 }
1303                 bp->b_flags |= B_STALE | B_AGE;
1304                 bcopy(bp->b_un.b_addr, fatp, readsize);
1305                 brelse(bp);
1306         }
1307         return (0);
1308 }
1309 
1310 /*
1311  * We write the FAT out a _lot_, in order to make sure that it
1312  * is up-to-date. But on a FAT32 system (large drive, small clusters)
1313  * the FAT might be a couple of megabytes, and writing it all out just
1314  * because we created or deleted a small file is painful (especially
1315  * since we do it for each alternate FAT too). So instead, for FAT16 and
1316  * FAT32 we only write out the bit that has changed. We don't clear
1317  * the 'updated' fields here because the caller might be writing out
1318  * several FATs, so the caller must use pc_clear_fatchanges() after
1319  * all FATs have been updated.
1320  * This function doesn't take "start" from fsp->pcfs_dosstart because
1321  * callers can use it to write either the primary or any of the alternate
1322  * FAT tables.
1323  */
1324 static int
1325 pc_writefat(struct pcfs *fsp, daddr_t start)
1326 {
1327         struct buf *bp;
1328         size_t off;
1329         size_t writesize;
1330         int     error;
1331         uchar_t *fatp = fsp->pcfs_fatp;
1332         size_t fatsize = fsp->pcfs_fatsec * fsp->pcfs_secsize;
1333 
1334         writesize = fsp->pcfs_clsize;
1335         for (off = 0; off < fatsize; off += writesize, fatp += writesize) {
1336                 if (writesize > (fatsize - off))
1337                         writesize = fatsize - off;
1338                 if (!pc_fat_is_changed(fsp, pc_lblkno(fsp, off))) {
1339                         continue;
1340                 }
1341                 bp = ngeteblk(writesize);
1342                 bp->b_edev = fsp->pcfs_xdev;
1343                 bp->b_dev = cmpdev(bp->b_edev);
1344                 bp->b_blkno = pc_dbdaddr(fsp, start +
1345                     pc_cltodb(fsp, pc_lblkno(fsp, off)));
1346                 bcopy(fatp, bp->b_un.b_addr, writesize);
1347                 bwrite2(bp);
1348                 error = geterror(bp);
1349                 brelse(bp);
1350                 if (error) {
1351                         return (error);
1352                 }
1353         }
1354         return (0);
1355 }
1356 
1357 /*
1358  * Mark the FAT cluster that 'cn' is stored in as modified.
1359  */
1360 void
1361 pc_mark_fat_updated(struct pcfs *fsp, pc_cluster32_t cn)
1362 {
1363         pc_cluster32_t  bn;
1364         size_t          size;
1365 
1366         /* which fat block is the cluster number stored in? */
1367         if (IS_FAT32(fsp)) {
1368                 size = sizeof (pc_cluster32_t);
1369                 bn = pc_lblkno(fsp, cn * size);
1370                 fsp->pcfs_fat_changemap[bn] = 1;
1371         } else if (IS_FAT16(fsp)) {
1372                 size = sizeof (pc_cluster16_t);
1373                 bn = pc_lblkno(fsp, cn * size);
1374                 fsp->pcfs_fat_changemap[bn] = 1;
1375         } else {
1376                 offset_t off;
1377                 pc_cluster32_t nbn;
1378 
1379                 ASSERT(IS_FAT12(fsp));
1380                 off = cn + (cn >> 1);
1381                 bn = pc_lblkno(fsp, off);
1382                 fsp->pcfs_fat_changemap[bn] = 1;
1383                 /* does this field wrap into the next fat cluster? */
1384                 nbn = pc_lblkno(fsp, off + 1);
1385                 if (nbn != bn) {
1386                         fsp->pcfs_fat_changemap[nbn] = 1;
1387                 }
1388         }
1389 }
1390 
1391 /*
1392  * return whether the FAT cluster 'bn' is updated and needs to
1393  * be written out.
1394  */
1395 int
1396 pc_fat_is_changed(struct pcfs *fsp, pc_cluster32_t bn)
1397 {
1398         return (fsp->pcfs_fat_changemap[bn] == 1);
1399 }
1400 
1401 /*
1402  * Implementation of VFS_FREEVFS() to support forced umounts.
1403  * This is called by the vfs framework after umount, to trigger
1404  * the release of any resources still associated with the given
1405  * vfs_t once the need to keep them has gone away.
1406  */
1407 void
1408 pcfs_freevfs(vfs_t *vfsp)
1409 {
1410         struct pcfs *fsp = VFSTOPCFS(vfsp);
1411 
1412         mutex_enter(&pcfslock);
1413         /*
1414          * Purging the FAT closes the device - can't do any more
1415          * I/O after this.
1416          */
1417         if (fsp->pcfs_fatp != (uchar_t *)0)
1418                 pc_invalfat(fsp);
1419         mutex_exit(&pcfslock);
1420 
1421         VN_RELE(fsp->pcfs_devvp);
1422         mutex_destroy(&fsp->pcfs_lock);
1423         kmem_free(fsp, sizeof (*fsp));
1424 
1425         /*
1426          * Allow _fini() to succeed now, if so desired.
1427          */
1428         atomic_dec_32(&pcfs_mountcount);
1429 }
1430 
1431 
1432 /*
1433  * PC-style partition parsing and FAT BPB identification/validation code.
1434  * The partition parsers here assume:
1435  *      - a FAT filesystem will be in a partition that has one of a set of
1436  *        recognized partition IDs
1437  *      - the user wants the 'numbering' (C:, D:, ...) that one would get
1438  *        on MSDOS 6.x.
1439  *        That means any non-FAT partition type (NTFS, HPFS, or any Linux fs)
1440  *        will not factor in the enumeration.
1441  * These days, such assumptions should be revisited. FAT is no longer the
1442  * only game in 'PC town'.
1443  */
1444 /*
1445  * isDosDrive()
1446  *      Boolean function.  Give it the systid field for an fdisk partition
1447  *      and it decides if that's a systid that describes a DOS drive.  We
1448  *      use systid values defined in sys/dktp/fdisk.h.
1449  */
1450 static int
1451 isDosDrive(uchar_t checkMe)
1452 {
1453         return ((checkMe == DOSOS12) || (checkMe == DOSOS16) ||
1454             (checkMe == DOSHUGE) || (checkMe == FDISK_WINDOWS) ||
1455             (checkMe == FDISK_EXT_WIN) || (checkMe == FDISK_FAT95) ||
1456             (checkMe == DIAGPART));
1457 }
1458 
1459 
1460 /*
1461  * isDosExtended()
1462  *      Boolean function.  Give it the systid field for an fdisk partition
1463  *      and it decides if that's a systid that describes an extended DOS
1464  *      partition.
1465  */
1466 static int
1467 isDosExtended(uchar_t checkMe)
1468 {
1469         return ((checkMe == EXTDOS) || (checkMe == FDISK_EXTLBA));
1470 }
1471 
1472 
1473 /*
1474  * isBootPart()
1475  *      Boolean function.  Give it the systid field for an fdisk partition
1476  *      and it decides if that's a systid that describes a Solaris boot
1477  *      partition.
1478  */
1479 static int
1480 isBootPart(uchar_t checkMe)
1481 {
1482         return (checkMe == X86BOOT);
1483 }
1484 
1485 
1486 /*
1487  * noLogicalDrive()
1488  *      Display error message about not being able to find a logical
1489  *      drive.
1490  */
1491 static void
1492 noLogicalDrive(int ldrive)
1493 {
1494         if (ldrive == BOOT_PARTITION_DRIVE) {
1495                 cmn_err(CE_NOTE, "!pcfs: no boot partition");
1496         } else {
1497                 cmn_err(CE_NOTE, "!pcfs: %d: no such logical drive", ldrive);
1498         }
1499 }
1500 
1501 
1502 /*
1503  * findTheDrive()
1504  *      Discover offset of the requested logical drive, and return
1505  *      that offset (startSector), the systid of that drive (sysid),
1506  *      and a buffer pointer (bp), with the buffer contents being
1507  *      the first sector of the logical drive (i.e., the sector that
1508  *      contains the BPB for that drive).
1509  *
1510  * Note: this code is not capable of addressing >2TB disks, as it uses
1511  *       daddr_t not diskaddr_t, some of the calculations would overflow
1512  */
1513 #define COPY_PTBL(mbr, ptblp)                                   \
1514         bcopy(&(((struct mboot *)(mbr))->parts), (ptblp),        \
1515             FD_NUMPART * sizeof (struct ipart))
1516 
1517 static int
1518 findTheDrive(struct pcfs *fsp, buf_t **bp)
1519 {
1520         int ldrive = fsp->pcfs_ldrive;
1521         dev_t dev = fsp->pcfs_devvp->v_rdev;
1522 
1523         struct ipart dosp[FD_NUMPART];  /* incore fdisk partition structure */
1524         daddr_t lastseek = 0;           /* Disk block we sought previously */
1525         daddr_t diskblk = 0;            /* Disk block to get */
1526         daddr_t xstartsect;             /* base of Extended DOS partition */
1527         int logicalDriveCount = 0;      /* Count of logical drives seen */
1528         int extendedPart = -1;          /* index of extended dos partition */
1529         int primaryPart = -1;           /* index of primary dos partition */
1530         int bootPart = -1;              /* index of a Solaris boot partition */
1531         uint32_t xnumsect = 0;          /* length of extended DOS partition */
1532         int driveIndex;                 /* computed FDISK table index */
1533         daddr_t startsec;
1534         len_t mediasize;
1535         int i;
1536         /*
1537          * Count of drives in the current extended partition's
1538          * FDISK table, and indexes of the drives themselves.
1539          */
1540         int extndDrives[FD_NUMPART];
1541         int numDrives = 0;
1542 
1543         /*
1544          * Count of drives (beyond primary) in master boot record's
1545          * FDISK table, and indexes of the drives themselves.
1546          */
1547         int extraDrives[FD_NUMPART];
1548         int numExtraDrives = 0;
1549 
1550         /*
1551          * "ldrive == 0" should never happen, as this is a request to
1552          * mount the physical device (and ignore partitioning). The code
1553          * in pcfs_mount() should have made sure that a logical drive number
1554          * is at least 1, meaning we're looking for drive "C:". It is not
1555          * safe (and a bug in the callers of this function) to request logical
1556          * drive number 0; we could ASSERT() but a graceful EIO is a more
1557          * polite way.
1558          */
1559         if (ldrive == 0) {
1560                 cmn_err(CE_NOTE, "!pcfs: request for logical partition zero");
1561                 noLogicalDrive(ldrive);
1562                 return (EIO);
1563         }
1564 
1565         /*
1566          *  Copy from disk block into memory aligned structure for fdisk usage.
1567          */
1568         COPY_PTBL((*bp)->b_un.b_addr, dosp);
1569 
1570         /*
1571          * This check is ok because a FAT BPB and a master boot record (MBB)
1572          * have the same signature, in the same position within the block.
1573          */
1574         if (bpb_get_BPBSig((*bp)->b_un.b_addr) != MBB_MAGIC) {
1575                 cmn_err(CE_NOTE, "!pcfs: MBR partition table signature err, "
1576                     "device (%x.%x):%d\n",
1577                     getmajor(dev), getminor(dev), ldrive);
1578                 return (EINVAL);
1579         }
1580 
1581         /*
1582          * Get a summary of what is in the Master FDISK table.
1583          * Normally we expect to find one partition marked as a DOS drive.
1584          * This partition is the one Windows calls the primary dos partition.
1585          * If the machine has any logical drives then we also expect
1586          * to find a partition marked as an extended DOS partition.
1587          *
1588          * Sometimes we'll find multiple partitions marked as DOS drives.
1589          * The Solaris fdisk program allows these partitions
1590          * to be created, but Windows fdisk no longer does.  We still need
1591          * to support these, though, since Windows does.  We also need to fix
1592          * our fdisk to behave like the Windows version.
1593          *
1594          * It turns out that some off-the-shelf media have *only* an
1595          * Extended partition, so we need to deal with that case as well.
1596          *
1597          * Only a single (the first) Extended or Boot Partition will
1598          * be recognized.  Any others will be ignored.
1599          */
1600         for (i = 0; i < FD_NUMPART; i++) {
1601                 DTRACE_PROBE4(primarypart, struct pcfs *, fsp,
1602                     uint_t, (uint_t)dosp[i].systid,
1603                     uint_t, LE_32(dosp[i].relsect),
1604                     uint_t, LE_32(dosp[i].numsect));
1605 
1606                 if (isDosDrive(dosp[i].systid)) {
1607                         if (primaryPart < 0) {
1608                                 logicalDriveCount++;
1609                                 primaryPart = i;
1610                         } else {
1611                                 extraDrives[numExtraDrives++] = i;
1612                         }
1613                         continue;
1614                 }
1615                 if ((extendedPart < 0) && isDosExtended(dosp[i].systid)) {
1616                         extendedPart = i;
1617                         continue;
1618                 }
1619                 if ((bootPart < 0) && isBootPart(dosp[i].systid)) {
1620                         bootPart = i;
1621                         continue;
1622                 }
1623         }
1624 
1625         if (ldrive == BOOT_PARTITION_DRIVE) {
1626                 if (bootPart < 0) {
1627                         noLogicalDrive(ldrive);
1628                         return (EINVAL);
1629                 }
1630                 startsec = LE_32(dosp[bootPart].relsect);
1631                 mediasize = LE_32(dosp[bootPart].numsect);
1632                 goto found;
1633         }
1634 
1635         if (ldrive == PRIMARY_DOS_DRIVE && primaryPart >= 0) {
1636                 startsec = LE_32(dosp[primaryPart].relsect);
1637                 mediasize = LE_32(dosp[primaryPart].numsect);
1638                 goto found;
1639         }
1640 
1641         /*
1642          * We are not looking for the C: drive (or the primary drive
1643          * was not found), so we had better have an extended partition
1644          * or extra drives in the Master FDISK table.
1645          */
1646         if ((extendedPart < 0) && (numExtraDrives == 0)) {
1647                 cmn_err(CE_NOTE, "!pcfs: no extended dos partition");
1648                 noLogicalDrive(ldrive);
1649                 return (EINVAL);
1650         }
1651 
1652         if (extendedPart >= 0) {
1653                 diskblk = xstartsect = LE_32(dosp[extendedPart].relsect);
1654                 xnumsect = LE_32(dosp[extendedPart].numsect);
1655                 do {
1656                         /*
1657                          *  If the seek would not cause us to change
1658                          *  position on the drive, then we're out of
1659                          *  extended partitions to examine.
1660                          */
1661                         if (diskblk == lastseek)
1662                                 break;
1663                         logicalDriveCount += numDrives;
1664                         /*
1665                          *  Seek the next extended partition, and find
1666                          *  logical drives within it.
1667                          */
1668                         brelse(*bp);
1669                         /*
1670                          * bread() block numbers are multiples of DEV_BSIZE
1671                          * but the device sector size (the unit of partitioning)
1672                          * might be larger than that; pcfs_get_device_info()
1673                          * has calculated the multiplicator for us.
1674                          */
1675                         *bp = bread(dev,
1676                             pc_dbdaddr(fsp, diskblk), fsp->pcfs_secsize);
1677                         if ((*bp)->b_flags & B_ERROR) {
1678                                 return (EIO);
1679                         }
1680 
1681                         lastseek = diskblk;
1682                         COPY_PTBL((*bp)->b_un.b_addr, dosp);
1683                         if (bpb_get_BPBSig((*bp)->b_un.b_addr) != MBB_MAGIC) {
1684                                 cmn_err(CE_NOTE, "!pcfs: "
1685                                     "extended partition table signature err, "
1686                                     "device (%x.%x):%d, LBA %u",
1687                                     getmajor(dev), getminor(dev), ldrive,
1688                                     (uint_t)pc_dbdaddr(fsp, diskblk));
1689                                 return (EINVAL);
1690                         }
1691                         /*
1692                          *  Count up drives, and track where the next
1693                          *  extended partition is in case we need it.  We
1694                          *  are expecting only one extended partition.  If
1695                          *  there is more than one we'll only go to the
1696                          *  first one we see, but warn about ignoring.
1697                          */
1698                         numDrives = 0;
1699                         for (i = 0; i < FD_NUMPART; i++) {
1700                                 DTRACE_PROBE4(extendedpart,
1701                                     struct pcfs *, fsp,
1702                                     uint_t, (uint_t)dosp[i].systid,
1703                                     uint_t, LE_32(dosp[i].relsect),
1704                                     uint_t, LE_32(dosp[i].numsect));
1705                                 if (isDosDrive(dosp[i].systid)) {
1706                                         extndDrives[numDrives++] = i;
1707                                 } else if (isDosExtended(dosp[i].systid)) {
1708                                         if (diskblk != lastseek) {
1709                                                 /*
1710                                                  * Already found an extended
1711                                                  * partition in this table.
1712                                                  */
1713                                                 cmn_err(CE_NOTE,
1714                                                     "!pcfs: ignoring unexpected"
1715                                                     " additional extended"
1716                                                     " partition");
1717                                         } else {
1718                                                 diskblk = xstartsect +
1719                                                     LE_32(dosp[i].relsect);
1720                                         }
1721                                 }
1722                         }
1723                 } while (ldrive > logicalDriveCount + numDrives);
1724 
1725                 ASSERT(numDrives <= FD_NUMPART);
1726 
1727                 if (ldrive <= logicalDriveCount + numDrives) {
1728                         /*
1729                          * The number of logical drives we've found thus
1730                          * far is enough to get us to the one we were
1731                          * searching for.
1732                          */
1733                         driveIndex = logicalDriveCount + numDrives - ldrive;
1734                         mediasize =
1735                             LE_32(dosp[extndDrives[driveIndex]].numsect);
1736                         startsec =
1737                             LE_32(dosp[extndDrives[driveIndex]].relsect) +
1738                             lastseek;
1739                         if (startsec > (xstartsect + xnumsect)) {
1740                                 cmn_err(CE_NOTE, "!pcfs: extended partition "
1741                                     "values bad");
1742                                 return (EINVAL);
1743                         }
1744                         goto found;
1745                 } else {
1746                         /*
1747                          * We ran out of extended dos partition
1748                          * drives.  The only hope now is to go
1749                          * back to extra drives defined in the master
1750                          * fdisk table.  But we overwrote that table
1751                          * already, so we must load it in again.
1752                          */
1753                         logicalDriveCount += numDrives;
1754                         brelse(*bp);
1755                         ASSERT(fsp->pcfs_dosstart == 0);
1756                         *bp = bread(dev, pc_dbdaddr(fsp, fsp->pcfs_dosstart),
1757                             fsp->pcfs_secsize);
1758                         if ((*bp)->b_flags & B_ERROR) {
1759                                 return (EIO);
1760                         }
1761                         COPY_PTBL((*bp)->b_un.b_addr, dosp);
1762                 }
1763         }
1764         /*
1765          *  Still haven't found the drive, is it an extra
1766          *  drive defined in the main FDISK table?
1767          */
1768         if (ldrive <= logicalDriveCount + numExtraDrives) {
1769                 driveIndex = logicalDriveCount + numExtraDrives - ldrive;
1770                 ASSERT(driveIndex < MIN(numExtraDrives, FD_NUMPART));
1771                 mediasize = LE_32(dosp[extraDrives[driveIndex]].numsect);
1772                 startsec = LE_32(dosp[extraDrives[driveIndex]].relsect);
1773                 goto found;
1774         }
1775         /*
1776          *  Still haven't found the drive, and there is
1777          *  nowhere else to look.
1778          */
1779         noLogicalDrive(ldrive);
1780         return (EINVAL);
1781 
1782 found:
1783         /*
1784          * We need this value in units of sectorsize, because PCFS' internal
1785          * offset calculations go haywire for > 512Byte sectors unless all
1786          * pcfs_.*start values are in units of sectors.
1787          * So, assign before the capacity check (that's done in DEV_BSIZE)
1788          */
1789         fsp->pcfs_dosstart = startsec;
1790 
1791         /*
1792          * convert from device sectors to proper units:
1793          *      - starting sector: DEV_BSIZE (as argument to bread())
1794          *      - media size: Bytes
1795          */
1796         startsec = pc_dbdaddr(fsp, startsec);
1797         mediasize *= fsp->pcfs_secsize;
1798 
1799         /*
1800          * some additional validation / warnings in case the partition table
1801          * and the actual media capacity are not in accordance ...
1802          */
1803         if (fsp->pcfs_mediasize != 0) {
1804                 diskaddr_t startoff =
1805                     (diskaddr_t)startsec * (diskaddr_t)DEV_BSIZE;
1806 
1807                 if (startoff >= fsp->pcfs_mediasize ||
1808                     startoff + mediasize > fsp->pcfs_mediasize) {
1809                         cmn_err(CE_WARN,
1810                             "!pcfs: partition size (LBA start %u, %lld bytes, "
1811                             "device (%x.%x):%d) smaller than "
1812                             "mediasize (%lld bytes).\n"
1813                             "filesystem may be truncated, access errors "
1814                             "may result.\n",
1815                             (uint_t)startsec, (long long)mediasize,
1816                             getmajor(fsp->pcfs_xdev), getminor(fsp->pcfs_xdev),
1817                             fsp->pcfs_ldrive, (long long)fsp->pcfs_mediasize);
1818                 }
1819         } else {
1820                 fsp->pcfs_mediasize = mediasize;
1821         }
1822 
1823         return (0);
1824 }
1825 
1826 
1827 static fattype_t
1828 secondaryBPBChecks(struct pcfs *fsp, uchar_t *bpb, size_t secsize)
1829 {
1830         uint32_t ncl = fsp->pcfs_ncluster;
1831 
1832         if (ncl <= 4096) {
1833                 if (bpb_get_FatSz16(bpb) == 0)
1834                         return (FAT_UNKNOWN);
1835 
1836                 if (bpb_get_FatSz16(bpb) * secsize < ncl * 2 &&
1837                     bpb_get_FatSz16(bpb) * secsize >= (3 * ncl / 2))
1838                         return (FAT12);
1839                 if (bcmp(bpb_FilSysType16(bpb), "FAT12", 5) == 0)
1840                         return (FAT12);
1841                 if (bcmp(bpb_FilSysType16(bpb), "FAT16", 5) == 0)
1842                         return (FAT16);
1843 
1844                 switch (bpb_get_Media(bpb)) {
1845                         case SS8SPT:
1846                         case DS8SPT:
1847                         case SS9SPT:
1848                         case DS9SPT:
1849                         case DS18SPT:
1850                         case DS9_15SPT:
1851                                 /*
1852                                  * Is this reliable - all floppies are FAT12 ?
1853                                  */
1854                                 return (FAT12);
1855                         case MD_FIXED:
1856                                 /*
1857                                  * Is this reliable - disks are always FAT16 ?
1858                                  */
1859                                 return (FAT16);
1860                         default:
1861                                 break;
1862                 }
1863         } else if (ncl <= 65536) {
1864                 if (bpb_get_FatSz16(bpb) == 0 && bpb_get_FatSz32(bpb) > 0)
1865                         return (FAT32);
1866                 if (VALID_BOOTSIG(bpb_get_BootSig32(bpb)))
1867                         return (FAT32);
1868                 if (VALID_FSTYPSTR32(bpb_FilSysType32(bpb)))
1869                         return (FAT32);
1870 
1871                 if (VALID_BOOTSIG(bpb_get_BootSig16(bpb)))
1872                         return (FAT16);
1873                 if (bpb_get_FatSz16(bpb) * secsize < ncl * 4)
1874                         return (FAT16);
1875         }
1876 
1877         /*
1878          * We don't know
1879          */
1880         return (FAT_UNKNOWN);
1881 }
1882 
1883 /*
1884  * Check to see if the BPB we found is correct.
1885  *
1886  * This looks far more complicated that it needs to be for pure structural
1887  * validation. The reason for this is that parseBPB() is also used for
1888  * debugging purposes (mdb dcmd) and we therefore want a bitmap of which
1889  * BPB fields (do not) have 'known good' values, even if we (do not) reject
1890  * the BPB when attempting to mount the filesystem.
1891  *
1892  * Real-world usage of FAT shows there are a lot of corner-case situations
1893  * and, following the specification strictly, invalid filesystems out there.
1894  * Known are situations such as:
1895  *      - FAT12/FAT16 filesystems with garbage in either totsec16/32
1896  *        instead of the zero in one of the fields mandated by the spec
1897  *      - filesystems that claim to be larger than the partition they're in
1898  *      - filesystems without valid media descriptor
1899  *      - FAT32 filesystems with RootEntCnt != 0
1900  *      - FAT32 filesystems with less than 65526 clusters
1901  *      - FAT32 filesystems without valid FSI sector
1902  *      - FAT32 filesystems with FAT size in fatsec16 instead of fatsec32
1903  *
1904  * Such filesystems are accessible by PCFS - if it'd know to start with that
1905  * the filesystem should be treated as a specific FAT type. Before S10, it
1906  * relied on the PC/fdisk partition type for the purpose and almost completely
1907  * ignored the BPB; now it ignores the partition type for anything else but
1908  * logical drive enumeration, which can result in rejection of (invalid)
1909  * FAT32 - if the partition ID says FAT32, but the filesystem, for example
1910  * has less than 65526 clusters.
1911  *
1912  * Without a "force this fs as FAT{12,16,32}" tunable or mount option, it's
1913  * not possible to allow all such mostly-compliant filesystems in unless one
1914  * accepts false positives (definitely invalid filesystems that cause problems
1915  * later). This at least allows to pinpoint why the mount failed.
1916  *
1917  * Due to the use of FAT on removeable media, all relaxations of the rules
1918  * here need to be carefully evaluated wrt. to potential effects on PCFS
1919  * resilience. A faulty/"mis-crafted" filesystem must not cause a panic, so
1920  * beware.
1921  */
1922 static int
1923 parseBPB(struct pcfs *fsp, uchar_t *bpb, int *valid)
1924 {
1925         fattype_t type;
1926 
1927         uint32_t        ncl;    /* number of clusters in file area */
1928         uint32_t        rec;
1929         uint32_t        reserved;
1930         uint32_t        fsisec, bkbootsec;
1931         blkcnt_t        totsec, totsec16, totsec32, datasec;
1932         size_t          fatsec, fatsec16, fatsec32, rdirsec;
1933         size_t          secsize;
1934         len_t           mediasize;
1935         uint64_t        validflags = 0;
1936 
1937         if (VALID_BPBSIG(bpb_get_BPBSig(bpb)))
1938                 validflags |= BPB_BPBSIG_OK;
1939 
1940         rec = bpb_get_RootEntCnt(bpb);
1941         reserved = bpb_get_RsvdSecCnt(bpb);
1942         fsisec = bpb_get_FSInfo32(bpb);
1943         bkbootsec = bpb_get_BkBootSec32(bpb);
1944         totsec16 = (blkcnt_t)bpb_get_TotSec16(bpb);
1945         totsec32 = (blkcnt_t)bpb_get_TotSec32(bpb);
1946         fatsec16 = bpb_get_FatSz16(bpb);
1947         fatsec32 = bpb_get_FatSz32(bpb);
1948 
1949         totsec = totsec16 ? totsec16 : totsec32;
1950         fatsec = fatsec16 ? fatsec16 : fatsec32;
1951 
1952         secsize = bpb_get_BytesPerSec(bpb);
1953         if (!VALID_SECSIZE(secsize))
1954                 secsize = fsp->pcfs_secsize;
1955         if (secsize != fsp->pcfs_secsize) {
1956                 PC_DPRINTF3(3, "!pcfs: parseBPB, device (%x.%x):%d:\n",
1957                     getmajor(fsp->pcfs_xdev),
1958                     getminor(fsp->pcfs_xdev), fsp->pcfs_ldrive);
1959                 PC_DPRINTF2(3, "!BPB secsize %d != "
1960                     "autodetected media block size %d\n",
1961                     (int)secsize, (int)fsp->pcfs_secsize);
1962                 if (fsp->pcfs_ldrive) {
1963                         /*
1964                          * We've already attempted to parse the partition
1965                          * table. If the block size used for that don't match
1966                          * the PCFS sector size, we're hosed one way or the
1967                          * other. Just try what happens.
1968                          */
1969                         secsize = fsp->pcfs_secsize;
1970                         PC_DPRINTF1(3,
1971                             "!pcfs: Using autodetected secsize %d\n",
1972                             (int)secsize);
1973                 } else {
1974                         /*
1975                          * This allows mounting lofi images of PCFS partitions
1976                          * with sectorsize != DEV_BSIZE. We can't parse the
1977                          * partition table on whole-disk images unless the
1978                          * (undocumented) "secsize=..." mount option is used,
1979                          * but at least this allows us to mount if we have
1980                          * an image of a partition.
1981                          */
1982                         PC_DPRINTF1(3,
1983                             "!pcfs: Using BPB secsize %d\n", (int)secsize);
1984                 }
1985         }
1986 
1987         if (fsp->pcfs_mediasize == 0) {
1988                 mediasize = (len_t)totsec * (len_t)secsize;
1989                 /*
1990                  * This is not an error because not all devices support the
1991                  * dkio(7i) mediasize queries, and/or not all devices are
1992                  * partitioned. If we have not been able to figure out the
1993                  * size of the underlaying medium, we have to trust the BPB.
1994                  */
1995                 PC_DPRINTF4(3, "!pcfs: parseBPB: mediasize autodetect failed "
1996                     "on device (%x.%x):%d, trusting BPB totsec (%lld Bytes)\n",
1997                     getmajor(fsp->pcfs_xdev), getminor(fsp->pcfs_xdev),
1998                     fsp->pcfs_ldrive, (long long)fsp->pcfs_mediasize);
1999         } else if ((len_t)totsec * (len_t)secsize > fsp->pcfs_mediasize) {
2000                 cmn_err(CE_WARN,
2001                     "!pcfs: autodetected mediasize (%lld Bytes) smaller than "
2002                     "FAT BPB mediasize (%lld Bytes).\n"
2003                     "truncated filesystem on device (%x.%x):%d, access errors "
2004                     "possible.\n",
2005                     (long long)fsp->pcfs_mediasize,
2006                     (long long)(totsec * (blkcnt_t)secsize),
2007                     getmajor(fsp->pcfs_xdev), getminor(fsp->pcfs_xdev),
2008                     fsp->pcfs_ldrive);
2009                 mediasize = fsp->pcfs_mediasize;
2010         } else {
2011                 /*
2012                  * This is actually ok. A FAT needs not occupy the maximum
2013                  * space available in its partition, it can be shorter.
2014                  */
2015                 mediasize = (len_t)totsec * (len_t)secsize;
2016         }
2017 
2018         /*
2019          * Since we let just about anything pass through this function,
2020          * fence against divide-by-zero here.
2021          */
2022         if (secsize)
2023                 rdirsec = roundup(rec * 32, secsize) / secsize;
2024         else
2025                 rdirsec = 0;
2026 
2027         /*
2028          * This assignment is necessary before pc_dbdaddr() can first be
2029          * used. Must initialize the value here.
2030          */
2031         fsp->pcfs_secsize = secsize;
2032         fsp->pcfs_sdshift = ddi_ffs(secsize / DEV_BSIZE) - 1;
2033 
2034         fsp->pcfs_mediasize = mediasize;
2035 
2036         fsp->pcfs_spcl = bpb_get_SecPerClus(bpb);
2037         fsp->pcfs_numfat = bpb_get_NumFATs(bpb);
2038         fsp->pcfs_mediadesc = bpb_get_Media(bpb);
2039         fsp->pcfs_clsize = secsize * fsp->pcfs_spcl;
2040         fsp->pcfs_rdirsec = rdirsec;
2041 
2042         /*
2043          * Remember: All PCFS offset calculations in sectors. Before I/O
2044          * is done, convert to DEV_BSIZE units via pc_dbdaddr(). This is
2045          * necessary so that media with > 512Byte sector sizes work correctly.
2046          */
2047         fsp->pcfs_fatstart = fsp->pcfs_dosstart + reserved;
2048         fsp->pcfs_rdirstart = fsp->pcfs_fatstart + fsp->pcfs_numfat * fatsec;
2049         fsp->pcfs_datastart = fsp->pcfs_rdirstart + rdirsec;
2050         datasec = totsec -
2051             (blkcnt_t)fatsec * fsp->pcfs_numfat -
2052             (blkcnt_t)rdirsec -
2053             (blkcnt_t)reserved;
2054 
2055         DTRACE_PROBE4(fatgeometry,
2056             blkcnt_t, totsec, size_t, fatsec,
2057             size_t, rdirsec, blkcnt_t, datasec);
2058 
2059         /*
2060          * 'totsec' is taken directly from the BPB and guaranteed to fit
2061          * into a 32bit unsigned integer. The calculation of 'datasec',
2062          * on the other hand, could underflow for incorrect values in
2063          * rdirsec/reserved/fatsec. Check for that.
2064          * We also check that the BPB conforms to the FAT specification's
2065          * requirement that either of the 16/32bit total sector counts
2066          * must be zero.
2067          */
2068         if (totsec != 0 &&
2069             (totsec16 == totsec32 || totsec16 == 0 || totsec32 == 0) &&
2070             datasec < totsec && datasec <= UINT32_MAX)
2071                 validflags |= BPB_TOTSEC_OK;
2072 
2073         if ((len_t)totsec * (len_t)secsize <= mediasize)
2074                 validflags |= BPB_MEDIASZ_OK;
2075 
2076         if (VALID_SECSIZE(secsize))
2077                 validflags |= BPB_SECSIZE_OK;
2078         if (VALID_SPCL(fsp->pcfs_spcl))
2079                 validflags |= BPB_SECPERCLUS_OK;
2080         if (VALID_CLSIZE(fsp->pcfs_clsize))
2081                 validflags |= BPB_CLSIZE_OK;
2082         if (VALID_NUMFATS(fsp->pcfs_numfat))
2083                 validflags |= BPB_NUMFAT_OK;
2084         if (VALID_RSVDSEC(reserved) && reserved < totsec)
2085                 validflags |= BPB_RSVDSECCNT_OK;
2086         if (VALID_MEDIA(fsp->pcfs_mediadesc))
2087                 validflags |= BPB_MEDIADESC_OK;
2088         if (VALID_BOOTSIG(bpb_get_BootSig16(bpb)))
2089                 validflags |= BPB_BOOTSIG16_OK;
2090         if (VALID_BOOTSIG(bpb_get_BootSig32(bpb)))
2091                 validflags |= BPB_BOOTSIG32_OK;
2092         if (VALID_FSTYPSTR16(bpb_FilSysType16(bpb)))
2093                 validflags |= BPB_FSTYPSTR16_OK;
2094         if (VALID_FSTYPSTR32(bpb_FilSysType32(bpb)))
2095                 validflags |= BPB_FSTYPSTR32_OK;
2096         if (VALID_OEMNAME(bpb_OEMName(bpb)))
2097                 validflags |= BPB_OEMNAME_OK;
2098         if (bkbootsec > 0 && bkbootsec <= reserved && fsisec != bkbootsec)
2099                 validflags |= BPB_BKBOOTSEC_OK;
2100         if (fsisec > 0 && fsisec <= reserved)
2101                 validflags |= BPB_FSISEC_OK;
2102         if (VALID_JMPBOOT(bpb_jmpBoot(bpb)))
2103                 validflags |= BPB_JMPBOOT_OK;
2104         if (VALID_FSVER32(bpb_get_FSVer32(bpb)))
2105                 validflags |= BPB_FSVER_OK;
2106         if (VALID_VOLLAB(bpb_VolLab16(bpb)))
2107                 validflags |= BPB_VOLLAB16_OK;
2108         if (VALID_VOLLAB(bpb_VolLab32(bpb)))
2109                 validflags |= BPB_VOLLAB32_OK;
2110         if (VALID_EXTFLAGS(bpb_get_ExtFlags32(bpb)))
2111                 validflags |= BPB_EXTFLAGS_OK;
2112 
2113         /*
2114          * Try to determine which FAT format to use.
2115          *
2116          * Calculate the number of clusters in order to determine
2117          * the type of FAT we are looking at.  This is the only
2118          * recommended way of determining FAT type, though there
2119          * are other hints in the data, this is the best way.
2120          *
2121          * Since we let just about "anything" pass through this function
2122          * without early exits, fence against divide-by-zero here.
2123          *
2124          * datasec was already validated against UINT32_MAX so we know
2125          * the result will not overflow the 32bit calculation.
2126          */
2127         if (fsp->pcfs_spcl)
2128                 ncl = (uint32_t)datasec / fsp->pcfs_spcl;
2129         else
2130                 ncl = 0;
2131 
2132         fsp->pcfs_ncluster = ncl;
2133 
2134         /*
2135          * From the Microsoft FAT specification:
2136          * In the following example, when it says <, it does not mean <=.
2137          * Note also that the numbers are correct.  The first number for
2138          * FAT12 is 4085; the second number for FAT16 is 65525. These numbers
2139          * and the '<' signs are not wrong.
2140          *
2141          * We "specialdetect" the corner cases, and use at least one "extra"
2142          * criterion to decide whether it's FAT16 or FAT32 if the cluster
2143          * count is dangerously close to the boundaries.
2144          */
2145 
2146         if (ncl <= PCF_FIRSTCLUSTER) {
2147                 type = FAT_UNKNOWN;
2148         } else if (ncl < 4085) {
2149                 type = FAT12;
2150         } else if (ncl <= 4096) {
2151                 type = FAT_QUESTIONABLE;
2152         } else if (ncl < 65525) {
2153                 type = FAT16;
2154         } else if (ncl <= 65536) {
2155                 type = FAT_QUESTIONABLE;
2156         } else if (ncl < PCF_LASTCLUSTER32) {
2157                 type = FAT32;
2158         } else {
2159                 type = FAT_UNKNOWN;
2160         }
2161 
2162         DTRACE_PROBE4(parseBPB__initial,
2163             struct pcfs *, fsp, unsigned char *, bpb,
2164             int, validflags, fattype_t, type);
2165 
2166 recheck:
2167         fsp->pcfs_fatsec = fatsec;
2168 
2169         /* Do some final sanity checks for each specific type of FAT */
2170         switch (type) {
2171                 case FAT12:
2172                         if (rec != 0)
2173                                 validflags |= BPB_ROOTENTCNT_OK;
2174                         if ((blkcnt_t)bpb_get_TotSec16(bpb) == totsec ||
2175                             bpb_get_TotSec16(bpb) == 0)
2176                                 validflags |= BPB_TOTSEC16_OK;
2177                         if ((blkcnt_t)bpb_get_TotSec32(bpb) == totsec ||
2178                             bpb_get_TotSec32(bpb) == 0)
2179                                 validflags |= BPB_TOTSEC32_OK;
2180                         if (bpb_get_FatSz16(bpb) == fatsec)
2181                                 validflags |= BPB_FATSZ16_OK;
2182                         if (fatsec * secsize >= (ncl + PCF_FIRSTCLUSTER)
2183                             * 3 / 2)
2184                                 validflags |= BPB_FATSZ_OK;
2185                         if (ncl < 4085)
2186                                 validflags |= BPB_NCLUSTERS_OK;
2187 
2188                         fsp->pcfs_lastclmark = (PCF_LASTCLUSTER & 0xfff);
2189                         fsp->pcfs_rootblksize =
2190                             fsp->pcfs_rdirsec * secsize;
2191                         fsp->pcfs_fsistart = 0;
2192 
2193                         if ((validflags & FAT12_VALIDMSK) != FAT12_VALIDMSK)
2194                                 type = FAT_UNKNOWN;
2195                         break;
2196                 case FAT16:
2197                         if (rec != 0)
2198                                 validflags |= BPB_ROOTENTCNT_OK;
2199                         if ((blkcnt_t)bpb_get_TotSec16(bpb) == totsec ||
2200                             bpb_get_TotSec16(bpb) == 0)
2201                                 validflags |= BPB_TOTSEC16_OK;
2202                         if ((blkcnt_t)bpb_get_TotSec32(bpb) == totsec ||
2203                             bpb_get_TotSec32(bpb) == 0)
2204                                 validflags |= BPB_TOTSEC32_OK;
2205                         if (bpb_get_FatSz16(bpb) == fatsec)
2206                                 validflags |= BPB_FATSZ16_OK;
2207                         if (fatsec * secsize >= (ncl + PCF_FIRSTCLUSTER) * 2)
2208                                 validflags |= BPB_FATSZ_OK;
2209                         if (ncl >= 4085 && ncl < 65525)
2210                                 validflags |= BPB_NCLUSTERS_OK;
2211 
2212                         fsp->pcfs_lastclmark = PCF_LASTCLUSTER;
2213                         fsp->pcfs_rootblksize =
2214                             fsp->pcfs_rdirsec * secsize;
2215                         fsp->pcfs_fsistart = 0;
2216 
2217                         if ((validflags & FAT16_VALIDMSK) != FAT16_VALIDMSK)
2218                                 type = FAT_UNKNOWN;
2219                         break;
2220                 case FAT32:
2221                         if (rec == 0)
2222                                 validflags |= BPB_ROOTENTCNT_OK;
2223                         if (bpb_get_TotSec16(bpb) == 0)
2224                                 validflags |= BPB_TOTSEC16_OK;
2225                         if ((blkcnt_t)bpb_get_TotSec32(bpb) == totsec)
2226                                 validflags |= BPB_TOTSEC32_OK;
2227                         if (bpb_get_FatSz16(bpb) == 0)
2228                                 validflags |= BPB_FATSZ16_OK;
2229                         if (bpb_get_FatSz32(bpb) == fatsec)
2230                                 validflags |= BPB_FATSZ32_OK;
2231                         if (fatsec * secsize >= (ncl + PCF_FIRSTCLUSTER) * 4)
2232                                 validflags |= BPB_FATSZ_OK;
2233                         if (ncl >= 65525 && ncl < PCF_LASTCLUSTER32)
2234                                 validflags |= BPB_NCLUSTERS_OK;
2235 
2236                         fsp->pcfs_lastclmark = PCF_LASTCLUSTER32;
2237                         fsp->pcfs_rootblksize = fsp->pcfs_clsize;
2238                         fsp->pcfs_fsistart = fsp->pcfs_dosstart + fsisec;
2239                         if (validflags & BPB_FSISEC_OK)
2240                                 fsp->pcfs_flags |= PCFS_FSINFO_OK;
2241                         fsp->pcfs_rootclnum = bpb_get_RootClus32(bpb);
2242                         if (pc_validcl(fsp, fsp->pcfs_rootclnum))
2243                                 validflags |= BPB_ROOTCLUSTER_OK;
2244 
2245                         /*
2246                          * Current PCFS code only works if 'pcfs_rdirstart'
2247                          * contains the root cluster number on FAT32.
2248                          * That's a mis-use and would better be changed.
2249                          */
2250                         fsp->pcfs_rdirstart = (daddr_t)fsp->pcfs_rootclnum;
2251 
2252                         if ((validflags & FAT32_VALIDMSK) != FAT32_VALIDMSK)
2253                                 type = FAT_UNKNOWN;
2254                         break;
2255                 case FAT_QUESTIONABLE:
2256                         type = secondaryBPBChecks(fsp, bpb, secsize);
2257                         goto recheck;
2258                 default:
2259                         ASSERT(type == FAT_UNKNOWN);
2260                         break;
2261         }
2262 
2263         ASSERT(type != FAT_QUESTIONABLE);
2264 
2265         fsp->pcfs_fattype = type;
2266 
2267         if (valid)
2268                 *valid = validflags;
2269 
2270         DTRACE_PROBE4(parseBPB__final,
2271             struct pcfs *, fsp, unsigned char *, bpb,
2272             int, validflags, fattype_t, type);
2273 
2274         if (type != FAT_UNKNOWN) {
2275                 ASSERT((secsize & (DEV_BSIZE - 1)) == 0);
2276                 ASSERT(ISP2(secsize / DEV_BSIZE));
2277                 return (1);
2278         }
2279 
2280         return (0);
2281 }
2282 
2283 
2284 /*
2285  * Detect the device's native block size (sector size).
2286  *
2287  * Test whether the device is:
2288  *      - a floppy device from a known controller type via DKIOCINFO
2289  *      - a real floppy using the fd(7d) driver and capable of fdio(7I) ioctls
2290  *      - a USB floppy drive (identified by drive geometry)
2291  *
2292  * Detecting a floppy will make PCFS metadata updates on such media synchronous,
2293  * to minimize risks due to slow I/O and user hotplugging / device ejection.
2294  *
2295  * This might be a bit wasteful on kernel stack space; if anyone's
2296  * bothered by this, kmem_alloc/kmem_free the ioctl arguments...
2297  */
2298 static void
2299 pcfs_device_getinfo(struct pcfs *fsp)
2300 {
2301         dev_t                   rdev = fsp->pcfs_xdev;
2302         int                     error;
2303         union {
2304                 struct dk_minfo         mi;
2305                 struct dk_cinfo         ci;
2306                 struct dk_geom          gi;
2307                 struct fd_char          fc;
2308         } arg;                          /* save stackspace ... */
2309         intptr_t argp = (intptr_t)&arg;
2310         ldi_handle_t            lh;
2311         ldi_ident_t             li;
2312         int isfloppy, isremoveable, ishotpluggable;
2313         cred_t                  *cr = CRED();
2314 
2315         if (ldi_ident_from_dev(rdev, &li))
2316                 goto out;
2317 
2318         error = ldi_open_by_dev(&rdev, OTYP_CHR, FREAD, cr, &lh, li);
2319         ldi_ident_release(li);
2320         if (error)
2321                 goto out;
2322 
2323         /*
2324          * Not sure if this could possibly happen. It'd be a bit like
2325          * VOP_OPEN() changing the passed-in vnode ptr. We're just not
2326          * expecting it, needs some thought if triggered ...
2327          */
2328         ASSERT(fsp->pcfs_xdev == rdev);
2329 
2330         /*
2331          * Check for removeable/hotpluggable media.
2332          */
2333         if (ldi_ioctl(lh, DKIOCREMOVABLE,
2334             (intptr_t)&isremoveable, FKIOCTL, cr, NULL)) {
2335                 isremoveable = 0;
2336         }
2337         if (ldi_ioctl(lh, DKIOCHOTPLUGGABLE,
2338             (intptr_t)&ishotpluggable, FKIOCTL, cr, NULL)) {
2339                 ishotpluggable = 0;
2340         }
2341 
2342         /*
2343          * Make sure we don't use "half-initialized" values if the ioctls fail.
2344          */
2345         if (ldi_ioctl(lh, DKIOCGMEDIAINFO, argp, FKIOCTL, cr, NULL)) {
2346                 bzero(&arg, sizeof (arg));
2347                 fsp->pcfs_mediasize = 0;
2348         } else {
2349                 fsp->pcfs_mediasize =
2350                     (len_t)arg.mi.dki_lbsize *
2351                     (len_t)arg.mi.dki_capacity;
2352         }
2353 
2354         if (VALID_SECSIZE(arg.mi.dki_lbsize)) {
2355                 if (fsp->pcfs_secsize == 0) {
2356                         fsp->pcfs_secsize = arg.mi.dki_lbsize;
2357                         fsp->pcfs_sdshift =
2358                             ddi_ffs(arg.mi.dki_lbsize / DEV_BSIZE) - 1;
2359                 } else {
2360                         PC_DPRINTF4(1, "!pcfs: autodetected media block size "
2361                             "%d, device (%x.%x), different from user-provided "
2362                             "%d. User override - ignoring autodetect result.\n",
2363                             arg.mi.dki_lbsize,
2364                             getmajor(fsp->pcfs_xdev), getminor(fsp->pcfs_xdev),
2365                             fsp->pcfs_secsize);
2366                 }
2367         } else if (arg.mi.dki_lbsize) {
2368                 PC_DPRINTF3(1, "!pcfs: autodetected media block size "
2369                     "%d, device (%x.%x), invalid (not 512, 1024, 2048, 4096). "
2370                     "Ignoring autodetect result.\n",
2371                     arg.mi.dki_lbsize,
2372                     getmajor(fsp->pcfs_xdev), getminor(fsp->pcfs_xdev));
2373         }
2374 
2375         /*
2376          * We treat the following media types as a floppy by default.
2377          */
2378         isfloppy =
2379             (arg.mi.dki_media_type == DK_FLOPPY ||
2380             arg.mi.dki_media_type == DK_ZIP ||
2381             arg.mi.dki_media_type == DK_JAZ);
2382 
2383         /*
2384          * if this device understands fdio(7I) requests it's
2385          * obviously a floppy drive.
2386          */
2387         if (!isfloppy &&
2388             !ldi_ioctl(lh, FDIOGCHAR, argp, FKIOCTL, cr, NULL))
2389                 isfloppy = 1;
2390 
2391         /*
2392          * some devices we like to treat as floppies, but they don't
2393          * understand fdio(7I) requests.
2394          */
2395         if (!isfloppy &&
2396             !ldi_ioctl(lh, DKIOCINFO, argp, FKIOCTL, cr, NULL) &&
2397             (arg.ci.dki_ctype == DKC_WDC2880 ||
2398             arg.ci.dki_ctype == DKC_NCRFLOPPY ||
2399             arg.ci.dki_ctype == DKC_SMSFLOPPY ||
2400             arg.ci.dki_ctype == DKC_INTEL82077))
2401                 isfloppy = 1;
2402 
2403         /*
2404          * This is the "final fallback" test - media with
2405          * 2 heads and 80 cylinders are assumed to be floppies.
2406          * This is normally true for USB floppy drives ...
2407          */
2408         if (!isfloppy &&
2409             !ldi_ioctl(lh, DKIOCGGEOM, argp, FKIOCTL, cr, NULL) &&
2410             (arg.gi.dkg_ncyl == 80 && arg.gi.dkg_nhead == 2))
2411                 isfloppy = 1;
2412 
2413         /*
2414          * This is similar to the "old" PCFS code that sets this flag
2415          * just based on the media descriptor being 0xf8 (MD_FIXED).
2416          * Should be re-worked. We really need some specialcasing for
2417          * removeable media.
2418          */
2419         if (!isfloppy) {
2420                 fsp->pcfs_flags |= PCFS_NOCHK;
2421         }
2422 
2423         /*
2424          * We automatically disable access time updates if the medium is
2425          * removeable and/or hotpluggable, and the admin did not explicitly
2426          * request access time updates (via the "atime" mount option).
2427          * The majority of flash-based media should fit this category.
2428          * Minimizing write access extends the lifetime of your memory stick !
2429          */
2430         if (!vfs_optionisset(fsp->pcfs_vfs, MNTOPT_ATIME, NULL) &&
2431             (isremoveable || ishotpluggable | isfloppy)) {
2432                 fsp->pcfs_flags |= PCFS_NOATIME;
2433         }
2434 
2435         (void) ldi_close(lh, FREAD, cr);
2436 out:
2437         if (fsp->pcfs_secsize == 0) {
2438                 PC_DPRINTF3(1, "!pcfs: media block size autodetection "
2439                     "device (%x.%x) failed, no user-provided fallback. "
2440                     "Using %d bytes.\n",
2441                     getmajor(fsp->pcfs_xdev), getminor(fsp->pcfs_xdev),
2442                     DEV_BSIZE);
2443                 fsp->pcfs_secsize = DEV_BSIZE;
2444                 fsp->pcfs_sdshift = 0;
2445         }
2446         ASSERT(fsp->pcfs_secsize % DEV_BSIZE == 0);
2447         ASSERT(VALID_SECSIZE(fsp->pcfs_secsize));
2448 }
2449 
2450 /*
2451  * Get the FAT type for the DOS medium.
2452  *
2453  * -------------------------
2454  * According to Microsoft:
2455  *   The FAT type one of FAT12, FAT16, or FAT32 is determined by the
2456  * count of clusters on the volume and nothing else.
2457  * -------------------------
2458  *
2459  */
2460 static int
2461 pc_getfattype(struct pcfs *fsp)
2462 {
2463         int error = 0;
2464         buf_t *bp = NULL;
2465         struct vnode *devvp = fsp->pcfs_devvp;
2466         dev_t   dev = devvp->v_rdev;
2467 
2468         /*
2469          * Detect the native block size of the medium, and attempt to
2470          * detect whether the medium is removeable.
2471          * We do treat removable media (floppies, USB and FireWire disks)
2472          * differently wrt. to the frequency and synchronicity of FAT updates.
2473          * We need to know the media block size in order to be able to
2474          * parse the partition table.
2475          */
2476         pcfs_device_getinfo(fsp);
2477 
2478         /*
2479          * Unpartitioned media (floppies and some removeable devices)
2480          * don't have a partition table, the FAT BPB is at disk block 0.
2481          * Start out by reading block 0.
2482          */
2483         fsp->pcfs_dosstart = 0;
2484         bp = bread(dev, pc_dbdaddr(fsp, fsp->pcfs_dosstart), fsp->pcfs_secsize);
2485 
2486         if (error = geterror(bp))
2487                 goto out;
2488 
2489         /*
2490          * If a logical drive number is requested, parse the partition table
2491          * and attempt to locate it. Otherwise, proceed immediately to the
2492          * BPB check. findTheDrive(), if successful, returns the disk block
2493          * number where the requested partition starts in "startsec".
2494          */
2495         if (fsp->pcfs_ldrive != 0) {
2496                 PC_DPRINTF3(5, "!pcfs: pc_getfattype: using FDISK table on "
2497                     "device (%x,%x):%d to find BPB\n",
2498                     getmajor(dev), getminor(dev), fsp->pcfs_ldrive);
2499 
2500                 if (error = findTheDrive(fsp, &bp))
2501                         goto out;
2502 
2503                 ASSERT(fsp->pcfs_dosstart != 0);
2504 
2505                 brelse(bp);
2506                 bp = bread(dev, pc_dbdaddr(fsp, fsp->pcfs_dosstart),
2507                     fsp->pcfs_secsize);
2508                 if (error = geterror(bp))
2509                         goto out;
2510         }
2511 
2512         /*
2513          * Validate the BPB and fill in the instance structure.
2514          */
2515         if (!parseBPB(fsp, (uchar_t *)bp->b_un.b_addr, NULL)) {
2516                 PC_DPRINTF4(1, "!pcfs: pc_getfattype: No FAT BPB on "
2517                     "device (%x.%x):%d, disk LBA %u\n",
2518                     getmajor(dev), getminor(dev), fsp->pcfs_ldrive,
2519                     (uint_t)pc_dbdaddr(fsp, fsp->pcfs_dosstart));
2520                 error = EINVAL;
2521                 goto out;
2522         }
2523 
2524         ASSERT(fsp->pcfs_fattype != FAT_UNKNOWN);
2525 
2526 out:
2527         /*
2528          * Release the buffer used
2529          */
2530         if (bp != NULL)
2531                 brelse(bp);
2532         return (error);
2533 }
2534 
2535 
2536 /*
2537  * Get the file allocation table.
2538  * If there is an old FAT, invalidate it.
2539  */
2540 int
2541 pc_getfat(struct pcfs *fsp)
2542 {
2543         struct buf *bp = NULL;
2544         uchar_t *fatp = NULL;
2545         uchar_t *fat_changemap = NULL;
2546         int error;
2547         int fat_changemapsize;
2548         int flags = 0;
2549         int nfat;
2550         int altfat_mustmatch = 0;
2551         int fatsize = fsp->pcfs_fatsec * fsp->pcfs_secsize;
2552 
2553         if (fsp->pcfs_fatp) {
2554                 /*
2555                  * There is a FAT in core.
2556                  * If there are open file pcnodes or we have modified it or
2557                  * it hasn't timed out yet use the in core FAT.
2558                  * Otherwise invalidate it and get a new one
2559                  */
2560 #ifdef notdef
2561                 if (fsp->pcfs_frefs ||
2562                     (fsp->pcfs_flags & PCFS_FATMOD) ||
2563                     (gethrestime_sec() < fsp->pcfs_fattime)) {
2564                         return (0);
2565                 } else {
2566                         mutex_enter(&pcfslock);
2567                         pc_invalfat(fsp);
2568                         mutex_exit(&pcfslock);
2569                 }
2570 #endif /* notdef */
2571                 return (0);
2572         }
2573 
2574         /*
2575          * Get FAT and check it for validity
2576          */
2577         fatp = kmem_alloc(fatsize, KM_SLEEP);
2578         error = pc_readfat(fsp, fatp);
2579         if (error) {
2580                 flags = B_ERROR;
2581                 goto out;
2582         }
2583         fat_changemapsize = (fatsize / fsp->pcfs_clsize) + 1;
2584         fat_changemap = kmem_zalloc(fat_changemapsize, KM_SLEEP);
2585         fsp->pcfs_fatp = fatp;
2586         fsp->pcfs_fat_changemapsize = fat_changemapsize;
2587         fsp->pcfs_fat_changemap = fat_changemap;
2588 
2589         /*
2590          * The only definite signature check is that the
2591          * media descriptor byte should match the first byte
2592          * of the FAT block.
2593          */
2594         if (fatp[0] != fsp->pcfs_mediadesc) {
2595                 cmn_err(CE_NOTE, "!pcfs: FAT signature mismatch, "
2596                     "media descriptor %x, FAT[0] lowbyte %x\n",
2597                     (uint32_t)fsp->pcfs_mediadesc, (uint32_t)fatp[0]);
2598                 cmn_err(CE_NOTE, "!pcfs: Enforcing alternate FAT validation\n");
2599                 altfat_mustmatch = 1;
2600         }
2601 
2602         /*
2603          * Get alternate FATs and check for consistency
2604          * This is an inlined version of pc_readfat().
2605          * Since we're only comparing FAT and alternate FAT,
2606          * there's no reason to let pc_readfat() copy data out
2607          * of the buf. Instead, compare in-situ, one cluster
2608          * at a time.
2609          */
2610         for (nfat = 1; nfat < fsp->pcfs_numfat; nfat++) {
2611                 size_t startsec;
2612                 size_t off;
2613 
2614                 startsec = pc_dbdaddr(fsp,
2615                     fsp->pcfs_fatstart + nfat * fsp->pcfs_fatsec);
2616 
2617                 for (off = 0; off < fatsize; off += fsp->pcfs_clsize) {
2618                         daddr_t fatblk = startsec + pc_dbdaddr(fsp,
2619                             pc_cltodb(fsp, pc_lblkno(fsp, off)));
2620 
2621                         bp = bread(fsp->pcfs_xdev, fatblk,
2622                             MIN(fsp->pcfs_clsize, fatsize - off));
2623                         if (bp->b_flags & (B_ERROR | B_STALE)) {
2624                                 cmn_err(CE_NOTE,
2625                                     "!pcfs: alternate FAT #%d (start LBA %p)"
2626                                     " read error at offset %ld on device"
2627                                     " (%x.%x):%d",
2628                                     nfat, (void *)(uintptr_t)startsec, off,
2629                                     getmajor(fsp->pcfs_xdev),
2630                                     getminor(fsp->pcfs_xdev),
2631                                     fsp->pcfs_ldrive);
2632                                 flags = B_ERROR;
2633                                 error = EIO;
2634                                 goto out;
2635                         }
2636                         bp->b_flags |= B_STALE | B_AGE;
2637                         if (bcmp(bp->b_un.b_addr, fatp + off,
2638                             MIN(fsp->pcfs_clsize, fatsize - off))) {
2639                                 cmn_err(CE_NOTE,
2640                                     "!pcfs: alternate FAT #%d (start LBA %p)"
2641                                     " corrupted at offset %ld on device"
2642                                     " (%x.%x):%d",
2643                                     nfat, (void *)(uintptr_t)startsec, off,
2644                                     getmajor(fsp->pcfs_xdev),
2645                                     getminor(fsp->pcfs_xdev),
2646                                     fsp->pcfs_ldrive);
2647                                 if (altfat_mustmatch) {
2648                                         flags = B_ERROR;
2649                                         error = EIO;
2650                                         goto out;
2651                                 }
2652                         }
2653                         brelse(bp);
2654                         bp = NULL;      /* prevent double release */
2655                 }
2656         }
2657 
2658         fsp->pcfs_fattime = gethrestime_sec() + PCFS_DISKTIMEOUT;
2659         fsp->pcfs_fatjustread = 1;
2660 
2661         /*
2662          * Retrieve FAT32 fsinfo sector.
2663          * A failure to read this is not fatal to accessing the volume.
2664          * It simply means operations that count or search free blocks
2665          * will have to do a full FAT walk, vs. a possibly quicker lookup
2666          * of the summary information.
2667          * Hence, we log a message but return success overall after this point.
2668          */
2669         if (IS_FAT32(fsp) && (fsp->pcfs_flags & PCFS_FSINFO_OK)) {
2670                 struct fat_od_fsi *fsinfo_disk;
2671 
2672                 bp = bread(fsp->pcfs_xdev,
2673                     pc_dbdaddr(fsp, fsp->pcfs_fsistart), fsp->pcfs_secsize);
2674                 fsinfo_disk = (struct fat_od_fsi *)bp->b_un.b_addr;
2675                 if (bp->b_flags & (B_ERROR | B_STALE) ||
2676                     !FSISIG_OK(fsinfo_disk)) {
2677                         cmn_err(CE_NOTE,
2678                             "!pcfs: error reading fat32 fsinfo from "
2679                             "device (%x.%x):%d, block %lld",
2680                             getmajor(fsp->pcfs_xdev), getminor(fsp->pcfs_xdev),
2681                             fsp->pcfs_ldrive,
2682                             (long long)pc_dbdaddr(fsp, fsp->pcfs_fsistart));
2683                         fsp->pcfs_flags &= ~PCFS_FSINFO_OK;
2684                         fsp->pcfs_fsinfo.fs_free_clusters = FSINFO_UNKNOWN;
2685                         fsp->pcfs_fsinfo.fs_next_free = FSINFO_UNKNOWN;
2686                 } else {
2687                         bp->b_flags |= B_STALE | B_AGE;
2688                         fsinfo_disk = (fat_od_fsi_t *)(bp->b_un.b_addr);
2689                         fsp->pcfs_fsinfo.fs_free_clusters =
2690                             LE_32(fsinfo_disk->fsi_incore.fs_free_clusters);
2691                         fsp->pcfs_fsinfo.fs_next_free =
2692                             LE_32(fsinfo_disk->fsi_incore.fs_next_free);
2693                 }
2694                 brelse(bp);
2695                 bp = NULL;
2696         }
2697 
2698         if (pc_validcl(fsp, (pc_cluster32_t)fsp->pcfs_fsinfo.fs_next_free))
2699                 fsp->pcfs_nxfrecls = fsp->pcfs_fsinfo.fs_next_free;
2700         else
2701                 fsp->pcfs_nxfrecls = PCF_FIRSTCLUSTER;
2702 
2703         return (0);
2704 
2705 out:
2706         cmn_err(CE_NOTE, "!pcfs: illegal disk format");
2707         if (bp)
2708                 brelse(bp);
2709         if (fatp)
2710                 kmem_free(fatp, fatsize);
2711         if (fat_changemap)
2712                 kmem_free(fat_changemap, fat_changemapsize);
2713 
2714         if (flags) {
2715                 pc_mark_irrecov(fsp);
2716         }
2717         return (error);
2718 }