illumos-gate New usr/src/uts/common/fs/pcfs/pc

   1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  24  * Use is subject to license terms.
  25  */
  26 
  27 #include <sys/param.h>
  28 #include <sys/systm.h>
  29 #include <sys/kmem.h>
  30 #include <sys/user.h>
  31 #include <sys/proc.h>
  32 #include <sys/cred.h>
  33 #include <sys/disp.h>
  34 #include <sys/buf.h>
  35 #include <sys/vfs.h>
  36 #include <sys/vfs_opreg.h>
  37 #include <sys/vnode.h>
  38 #include <sys/fdio.h>
  39 #include <sys/file.h>
  40 #include <sys/uio.h>
  41 #include <sys/conf.h>
  42 #include <sys/statvfs.h>
  43 #include <sys/mount.h>
  44 #include <sys/pathname.h>
  45 #include <sys/cmn_err.h>
  46 #include <sys/debug.h>
  47 #include <sys/sysmacros.h>
  48 #include <sys/conf.h>
  49 #include <sys/mkdev.h>
  50 #include <sys/swap.h>
  51 #include <sys/sunddi.h>
  52 #include <sys/sunldi.h>
  53 #include <sys/dktp/fdisk.h>
  54 #include <sys/fs/pc_label.h>
  55 #include <sys/fs/pc_fs.h>
  56 #include <sys/fs/pc_dir.h>
  57 #include <sys/fs/pc_node.h>
  58 #include <fs/fs_subr.h>
  59 #include <sys/modctl.h>
  60 #include <sys/dkio.h>
  61 #include <sys/open.h>
  62 #include <sys/mntent.h>
  63 #include <sys/policy.h>
  64 #include <sys/atomic.h>
  65 #include <sys/sdt.h>
  66 
  67 /*
  68  * The majority of PC media use a 512 sector size, but
  69  * occasionally you will run across a 1k sector size.
  70  * For media with a 1k sector size, fd_strategy() requires
  71  * the I/O size to be a 1k multiple; so when the sector size
  72  * is not yet known, always read 1k.
  73  */
  74 #define PC_SAFESECSIZE  (PC_SECSIZE * 2)
  75 
  76 static int pcfs_pseudo_floppy(dev_t);
  77 
  78 static int pcfsinit(int, char *);
  79 static int pcfs_mount(struct vfs *, struct vnode *, struct mounta *,
  80         struct cred *);
  81 static int pcfs_unmount(struct vfs *, int, struct cred *);
  82 static int pcfs_root(struct vfs *, struct vnode **);
  83 static int pcfs_statvfs(struct vfs *, struct statvfs64 *);
  84 static int pc_syncfsnodes(struct pcfs *);
  85 static int pcfs_sync(struct vfs *, short, struct cred *);
  86 static int pcfs_vget(struct vfs *vfsp, struct vnode **vpp, struct fid *fidp);
  87 static void pcfs_freevfs(vfs_t *vfsp);
  88 
  89 static int pc_readfat(struct pcfs *fsp, uchar_t *fatp);
  90 static int pc_writefat(struct pcfs *fsp, daddr_t start);
  91 
  92 static int pc_getfattype(struct pcfs *fsp);
  93 static void pcfs_parse_mntopts(struct pcfs *fsp);
  94 
  95 
  96 /*
  97  * pcfs mount options table
  98  */
  99 
 100 static char *nohidden_cancel[] = { MNTOPT_PCFS_HIDDEN, NULL };
 101 static char *hidden_cancel[] = { MNTOPT_PCFS_NOHIDDEN, NULL };
 102 static char *nofoldcase_cancel[] = { MNTOPT_PCFS_FOLDCASE, NULL };
 103 static char *foldcase_cancel[] = { MNTOPT_PCFS_NOFOLDCASE, NULL };
 104 static char *clamptime_cancel[] = { MNTOPT_PCFS_NOCLAMPTIME, NULL };
 105 static char *noclamptime_cancel[] = { MNTOPT_PCFS_CLAMPTIME, NULL };
 106 static char *atime_cancel[] = { MNTOPT_NOATIME, NULL };
 107 static char *noatime_cancel[] = { MNTOPT_ATIME, NULL };
 108 
 109 static mntopt_t mntopts[] = {
 110 /*
 111  *      option name     cancel option   default arg     flags   opt data
 112  */
 113         { MNTOPT_PCFS_NOHIDDEN, nohidden_cancel, NULL, 0, NULL },
 114         { MNTOPT_PCFS_HIDDEN, hidden_cancel, NULL, MO_DEFAULT, NULL },
 115         { MNTOPT_PCFS_NOFOLDCASE, nofoldcase_cancel, NULL, MO_DEFAULT, NULL },
 116         { MNTOPT_PCFS_FOLDCASE, foldcase_cancel, NULL, 0, NULL },
 117         { MNTOPT_PCFS_CLAMPTIME, clamptime_cancel, NULL, MO_DEFAULT, NULL },
 118         { MNTOPT_PCFS_NOCLAMPTIME, noclamptime_cancel, NULL, NULL, NULL },
 119         { MNTOPT_NOATIME, noatime_cancel, NULL, NULL, NULL },
 120         { MNTOPT_ATIME, atime_cancel, NULL, NULL, NULL },
 121         { MNTOPT_PCFS_TIMEZONE, NULL, "+0", MO_DEFAULT | MO_HASVALUE, NULL },
 122         { MNTOPT_PCFS_SECSIZE, NULL, NULL, MO_HASVALUE, NULL }
 123 };
 124 
 125 static mntopts_t pcfs_mntopts = {
 126         sizeof (mntopts) / sizeof (mntopt_t),
 127         mntopts
 128 };
 129 
 130 int pcfsdebuglevel = 0;
 131 
 132 /*
 133  * pcfslock:    protects the list of mounted pc filesystems "pc_mounttab.
 134  * pcfs_lock:   (inside per filesystem structure "pcfs")
 135  *              per filesystem lock. Most of the vfsops and vnodeops are
 136  *              protected by this lock.
 137  * pcnodes_lock: protects the pcnode hash table "pcdhead", "pcfhead".
 138  *
 139  * Lock hierarchy: pcfslock > pcfs_lock > pcnodes_lock
 140  *
 141  * pcfs_mountcount:     used to prevent module unloads while there is still
 142  *                      pcfs state from a former mount hanging around. With
 143  *                      forced umount support, the filesystem module must not
 144  *                      be allowed to go away before the last VFS_FREEVFS()
 145  *                      call has been made.
 146  *                      Since this is just an atomic counter, there's no need
 147  *                      for locking.
 148  */
 149 kmutex_t        pcfslock;
 150 krwlock_t       pcnodes_lock;
 151 uint32_t        pcfs_mountcount;
 152 
 153 static int pcfstype;
 154 
 155 static vfsdef_t vfw = {
 156         VFSDEF_VERSION,
 157         "pcfs",
 158         pcfsinit,
 159         VSW_HASPROTO|VSW_CANREMOUNT|VSW_STATS|VSW_CANLOFI,
 160         &pcfs_mntopts
 161 };
 162 
 163 extern struct mod_ops mod_fsops;
 164 
 165 static struct modlfs modlfs = {
 166         &mod_fsops,
 167         "PC filesystem",
 168         &vfw
 169 };
 170 
 171 static struct modlinkage modlinkage = {
 172         MODREV_1,
 173         { &modlfs, NULL }
 174 };
 175 
 176 int
 177 _init(void)
 178 {
 179         int     error;
 180 
 181 #if !defined(lint)
 182         /* make sure the on-disk structures are sane */
 183         ASSERT(sizeof (struct pcdir) == 32);
 184         ASSERT(sizeof (struct pcdir_lfn) == 32);
 185 #endif
 186         mutex_init(&pcfslock, NULL, MUTEX_DEFAULT, NULL);
 187         rw_init(&pcnodes_lock, NULL, RW_DEFAULT, NULL);
 188         error = mod_install(&modlinkage);
 189         if (error) {
 190                 mutex_destroy(&pcfslock);
 191                 rw_destroy(&pcnodes_lock);
 192         }
 193         return (error);
 194 }
 195 
 196 int
 197 _fini(void)
 198 {
 199         int     error;
 200 
 201         /*
 202          * If a forcedly unmounted instance is still hanging around,
 203          * we cannot allow the module to be unloaded because that would
 204          * cause panics once the VFS framework decides it's time to call
 205          * into VFS_FREEVFS().
 206          */
 207         if (pcfs_mountcount)
 208                 return (EBUSY);
 209 
 210         error = mod_remove(&modlinkage);
 211         if (error)
 212                 return (error);
 213         mutex_destroy(&pcfslock);
 214         rw_destroy(&pcnodes_lock);
 215         /*
 216          * Tear down the operations vectors
 217          */
 218         (void) vfs_freevfsops_by_type(pcfstype);
 219         vn_freevnodeops(pcfs_fvnodeops);
 220         vn_freevnodeops(pcfs_dvnodeops);
 221         return (0);
 222 }
 223 
 224 int
 225 _info(struct modinfo *modinfop)
 226 {
 227         return (mod_info(&modlinkage, modinfop));
 228 }
 229 
 230 /* ARGSUSED1 */
 231 static int
 232 pcfsinit(int fstype, char *name)
 233 {
 234         static const fs_operation_def_t pcfs_vfsops_template[] = {
 235                 { VFSNAME_MOUNT,        { .vfs_mount = pcfs_mount } },
 236                 { VFSNAME_UNMOUNT,      { .vfs_unmount = pcfs_unmount } },
 237                 { VFSNAME_ROOT,         { .vfs_root = pcfs_root } },
 238                 { VFSNAME_STATVFS,      { .vfs_statvfs = pcfs_statvfs } },
 239                 { VFSNAME_SYNC,         { .vfs_sync = pcfs_sync } },
 240                 { VFSNAME_VGET,         { .vfs_vget = pcfs_vget } },
 241                 { VFSNAME_FREEVFS,      { .vfs_freevfs = pcfs_freevfs } },
 242                 { NULL,                 { NULL } }
 243         };
 244         int error;
 245 
 246         error = vfs_setfsops(fstype, pcfs_vfsops_template, NULL);
 247         if (error != 0) {
 248                 cmn_err(CE_WARN, "pcfsinit: bad vfs ops template");
 249                 return (error);
 250         }
 251 
 252         error = vn_make_ops("pcfs", pcfs_fvnodeops_template, &pcfs_fvnodeops);
 253         if (error != 0) {
 254                 (void) vfs_freevfsops_by_type(fstype);
 255                 cmn_err(CE_WARN, "pcfsinit: bad file vnode ops template");
 256                 return (error);
 257         }
 258 
 259         error = vn_make_ops("pcfsd", pcfs_dvnodeops_template, &pcfs_dvnodeops);
 260         if (error != 0) {
 261                 (void) vfs_freevfsops_by_type(fstype);
 262                 vn_freevnodeops(pcfs_fvnodeops);
 263                 cmn_err(CE_WARN, "pcfsinit: bad dir vnode ops template");
 264                 return (error);
 265         }
 266 
 267         pcfstype = fstype;
 268         (void) pc_init();
 269         pcfs_mountcount = 0;
 270         return (0);
 271 }
 272 
 273 static struct pcfs *pc_mounttab = NULL;
 274 
 275 extern struct pcfs_args pc_tz;
 276 
 277 /*
 278  *  Define some special logical drives we use internal to this file.
 279  */
 280 #define BOOT_PARTITION_DRIVE    99
 281 #define PRIMARY_DOS_DRIVE       1
 282 #define UNPARTITIONED_DRIVE     0
 283 
 284 static int
 285 pcfs_device_identify(
 286         struct vfs *vfsp,
 287         struct mounta *uap,
 288         struct cred *cr,
 289         int *dos_ldrive,
 290         dev_t *xdev)
 291 {
 292         struct pathname special;
 293         char *c;
 294         struct vnode *svp = NULL;
 295         struct vnode *lvp = NULL;
 296         int oflag, aflag;
 297         int error;
 298 
 299         /*
 300          * Resolve path name of special file being mounted.
 301          */
 302         if (error = pn_get(uap->spec, UIO_USERSPACE, &special)) {
 303                 return (error);
 304         }
 305 
 306         *dos_ldrive = -1;
 307 
 308         if (error =
 309             lookupname(special.pn_path, UIO_SYSSPACE, FOLLOW, NULLVPP, &svp)) {
 310                 /*
 311                  * If there's no device node, the name specified most likely
 312                  * maps to a PCFS-style "partition specifier" to select a
 313                  * harddisk primary/logical partition. Disable floppy-specific
 314                  * checks in such cases unless an explicit :A or :B is
 315                  * requested.
 316                  */
 317 
 318                 /*
 319                  * Split the pathname string at the last ':' separator.
 320                  * If there's no ':' in the device name, or the ':' is the
 321                  * last character in the string, the name is invalid and
 322                  * the error from the previous lookup will be returned.
 323                  */
 324                 c = strrchr(special.pn_path, ':');
 325                 if (c == NULL || strlen(c) == 0)
 326                         goto devlookup_done;
 327 
 328                 *c++ = '\0';
 329 
 330                 /*
 331                  * PCFS partition name suffixes can be:
 332                  *      - "boot" to indicate the X86BOOT partition
 333                  *      - a drive letter [c-z] for the "DOS logical drive"
 334                  *      - a drive number 1..24 for the "DOS logical drive"
 335                  *      - a "floppy name letter", 'a' or 'b' (just strip this)
 336                  */
 337                 if (strcasecmp(c, "boot") == 0) {
 338                         /*
 339                          * The Solaris boot partition is requested.
 340                          */
 341                         *dos_ldrive = BOOT_PARTITION_DRIVE;
 342                 } else if (strspn(c, "0123456789") == strlen(c)) {
 343                         /*
 344                          * All digits - parse the partition number.
 345                          */
 346                         long drvnum = 0;
 347 
 348                         if ((error = ddi_strtol(c, NULL, 10, &drvnum)) == 0) {
 349                                 /*
 350                                  * A number alright - in the allowed range ?
 351                                  */
 352                                 if (drvnum > 24 || drvnum == 0)
 353                                         error = ENXIO;
 354                         }
 355                         if (error)
 356                                 goto devlookup_done;
 357                         *dos_ldrive = (int)drvnum;
 358                 } else if (strlen(c) == 1) {
 359                         /*
 360                          * A single trailing character was specified.
 361                          *      - [c-zC-Z] means a harddisk partition, and
 362                          *        we retrieve the partition number.
 363                          *      - [abAB] means a floppy drive, so we swallow
 364                          *        the "drive specifier" and test later
 365                          *        whether the physical device is a floppy.
 366                          */
 367                         *c = tolower(*c);
 368                         if (*c == 'a' || *c == 'b') {
 369                                 *dos_ldrive = UNPARTITIONED_DRIVE;
 370                         } else if (*c < 'c' || *c > 'z') {
 371                                 error = ENXIO;
 372                                 goto devlookup_done;
 373                         } else {
 374                                 *dos_ldrive = 1 + *c - 'c';
 375                         }
 376                 } else {
 377                         /*
 378                          * Can't parse this - pass through previous error.
 379                          */
 380                         goto devlookup_done;
 381                 }
 382 
 383 
 384                 error = lookupname(special.pn_path, UIO_SYSSPACE, FOLLOW,
 385                     NULLVPP, &svp);
 386         } else {
 387                 *dos_ldrive = UNPARTITIONED_DRIVE;
 388         }
 389 devlookup_done:
 390         pn_free(&special);
 391         if (error)
 392                 return (error);
 393 
 394         ASSERT(*dos_ldrive >= UNPARTITIONED_DRIVE);
 395 
 396         /*
 397          * Verify caller's permission to open the device special file.
 398          */
 399         if ((vfsp->vfs_flag & VFS_RDONLY) != 0 ||
 400             ((uap->flags & MS_RDONLY) != 0)) {
 401                 oflag = FREAD;
 402                 aflag = VREAD;
 403         } else {
 404                 oflag = FREAD | FWRITE;
 405                 aflag = VREAD | VWRITE;
 406         }
 407 
 408         error = vfs_get_lofi(vfsp, &lvp);
 409 
 410         if (error > 0) {
 411                 if (error == ENOENT)
 412                         error = ENODEV;
 413                 goto out;
 414         } else if (error == 0) {
 415                 *xdev = lvp->v_rdev;
 416         } else {
 417                 *xdev = svp->v_rdev;
 418 
 419                 if (svp->v_type != VBLK) {
 420                         error = ENOTBLK;
 421                         goto out;
 422                 }
 423 
 424                 if ((error = secpolicy_spec_open(cr, svp, oflag)) != 0)
 425                         goto out;
 426         }
 427 
 428         if (getmajor(*xdev) >= devcnt) {
 429                 error = ENXIO;
 430                 goto out;
 431         }
 432 
 433         if ((error = VOP_ACCESS(svp, aflag, 0, cr, NULL)) != 0)
 434                 goto out;
 435 
 436 out:
 437         if (svp != NULL)
 438                 VN_RELE(svp);
 439         if (lvp != NULL)
 440                 VN_RELE(lvp);
 441         return (error);
 442 }
 443 
 444 static int
 445 pcfs_device_ismounted(
 446         struct vfs *vfsp,
 447         int dos_ldrive,
 448         dev_t xdev,
 449         int *remounting,
 450         dev_t *pseudodev)
 451 {
 452         struct pcfs *fsp;
 453         int remount = *remounting;
 454 
 455         /*
 456          * Ensure that this logical drive isn't already mounted, unless
 457          * this is a REMOUNT request.
 458          * Note: The framework will perform this check if the "...:c"
 459          * PCFS-style "logical drive" syntax has not been used and an
 460          * actually existing physical device is backing this filesystem.
 461          * Once all block device drivers support PC-style partitioning,
 462          * this codeblock can be dropped.
 463          */
 464         *pseudodev = xdev;
 465 
 466         if (dos_ldrive) {
 467                 mutex_enter(&pcfslock);
 468                 for (fsp = pc_mounttab; fsp; fsp = fsp->pcfs_nxt)
 469                         if (fsp->pcfs_xdev == xdev &&
 470                             fsp->pcfs_ldrive == dos_ldrive) {
 471                                 mutex_exit(&pcfslock);
 472                                 if (remount) {
 473                                         return (0);
 474                                 } else {
 475                                         return (EBUSY);
 476                                 }
 477                         }
 478                 /*
 479                  * Assign a unique device number for the vfs
 480                  * The old way (getudev() + a constantly incrementing
 481                  * major number) was wrong because it changes vfs_dev
 482                  * across mounts and reboots, which breaks nfs file handles.
 483                  * UFS just uses the real dev_t. We can't do that because
 484                  * of the way pcfs opens fdisk partitons (the :c and :d
 485                  * partitions are on the same dev_t). Though that _might_
 486                  * actually be ok, since the file handle contains an
 487                  * absolute block number, it's probably better to make them
 488                  * different. So I think we should retain the original
 489                  * dev_t, but come up with a different minor number based
 490                  * on the logical drive that will _always_ come up the same.
 491                  * For now, we steal the upper 6 bits.
 492                  */
 493 #ifdef notdef
 494                 /* what should we do here? */
 495                 if (((getminor(xdev) >> 12) & 0x3F) != 0)
 496                         printf("whoops - upper bits used!\n");
 497 #endif
 498                 *pseudodev = makedevice(getmajor(xdev),
 499                     ((dos_ldrive << 12) | getminor(xdev)) & MAXMIN32);
 500                 if (vfs_devmounting(*pseudodev, vfsp)) {
 501                         mutex_exit(&pcfslock);
 502                         return (EBUSY);
 503                 }
 504                 if (vfs_devismounted(*pseudodev)) {
 505                         mutex_exit(&pcfslock);
 506                         if (remount) {
 507                                 return (0);
 508                         } else {
 509                                 return (EBUSY);
 510                         }
 511                 }
 512                 mutex_exit(&pcfslock);
 513         } else {
 514                 *pseudodev = xdev;
 515                 if (vfs_devmounting(*pseudodev, vfsp)) {
 516                         return (EBUSY);
 517                 }
 518                 if (vfs_devismounted(*pseudodev))
 519                         if (remount) {
 520                                 return (0);
 521                         } else {
 522                                 return (EBUSY);
 523                         }
 524         }
 525 
 526         /*
 527          * This is not a remount. Even if MS_REMOUNT was requested,
 528          * the caller needs to proceed as it would on an ordinary
 529          * mount.
 530          */
 531         *remounting = 0;
 532 
 533         ASSERT(*pseudodev);
 534         return (0);
 535 }
 536 
 537 /*
 538  * Get the PCFS-specific mount options from the VFS framework.
 539  * For "timezone" and "secsize", we need to parse the number
 540  * ourselves and ensure its validity.
 541  * Note: "secsize" is deliberately undocumented at this time,
 542  * it's a workaround for devices (particularly: lofi image files)
 543  * that don't support the DKIOCGMEDIAINFO ioctl for autodetection.
 544  */
 545 static void
 546 pcfs_parse_mntopts(struct pcfs *fsp)
 547 {
 548         char *c;
 549         char *endptr;
 550         long l;
 551         struct vfs *vfsp = fsp->pcfs_vfs;
 552 
 553         ASSERT(fsp->pcfs_secondswest == 0);
 554         ASSERT(fsp->pcfs_secsize == 0);
 555 
 556         if (vfs_optionisset(vfsp, MNTOPT_PCFS_HIDDEN, NULL))
 557                 fsp->pcfs_flags |= PCFS_HIDDEN;
 558         if (vfs_optionisset(vfsp, MNTOPT_PCFS_FOLDCASE, NULL))
 559                 fsp->pcfs_flags |= PCFS_FOLDCASE;
 560         if (vfs_optionisset(vfsp, MNTOPT_PCFS_NOCLAMPTIME, NULL))
 561                 fsp->pcfs_flags |= PCFS_NOCLAMPTIME;
 562         if (vfs_optionisset(vfsp, MNTOPT_NOATIME, NULL))
 563                 fsp->pcfs_flags |= PCFS_NOATIME;
 564 
 565         if (vfs_optionisset(vfsp, MNTOPT_PCFS_TIMEZONE, &c)) {
 566                 if (ddi_strtol(c, &endptr, 10, &l) == 0 &&
 567                     endptr == c + strlen(c)) {
 568                         /*
 569                          * A number alright - in the allowed range ?
 570                          */
 571                         if (l <= -12*3600 || l >= 12*3600) {
 572                                 cmn_err(CE_WARN, "!pcfs: invalid use of "
 573                                     "'timezone' mount option - %ld "
 574                                     "is out of range. Assuming 0.", l);
 575                                 l = 0;
 576                         }
 577                 } else {
 578                         cmn_err(CE_WARN, "!pcfs: invalid use of "
 579                             "'timezone' mount option - argument %s "
 580                             "is not a valid number. Assuming 0.", c);
 581                         l = 0;
 582                 }
 583                 fsp->pcfs_secondswest = l;
 584         }
 585 
 586         /*
 587          * The "secsize=..." mount option is a workaround for the lack of
 588          * lofi(7d) support for DKIOCGMEDIAINFO. If PCFS wants to parse the
 589          * partition table of a disk image and it has been partitioned with
 590          * sector sizes other than 512 bytes, we'd fail on loopback'ed disk
 591          * images.
 592          * That should really be fixed in lofi ... this is a workaround.
 593          */
 594         if (vfs_optionisset(vfsp, MNTOPT_PCFS_SECSIZE, &c)) {
 595                 if (ddi_strtol(c, &endptr, 10, &l) == 0 &&
 596                     endptr == c + strlen(c)) {
 597                         /*
 598                          * A number alright - a valid sector size as well ?
 599                          */
 600                         if (!VALID_SECSIZE(l)) {
 601                                 cmn_err(CE_WARN, "!pcfs: invalid use of "
 602                                     "'secsize' mount option - %ld is "
 603                                     "unsupported. Autodetecting.", l);
 604                                 l = 0;
 605                         }
 606                 } else {
 607                         cmn_err(CE_WARN, "!pcfs: invalid use of "
 608                             "'secsize' mount option - argument %s "
 609                             "is not a valid number. Autodetecting.", c);
 610                         l = 0;
 611                 }
 612                 fsp->pcfs_secsize = l;
 613                 fsp->pcfs_sdshift = ddi_ffs(l / DEV_BSIZE) - 1;
 614         }
 615 }
 616 
 617 /*
 618  * vfs operations
 619  */
 620 
 621 /*
 622  * pcfs_mount - backend for VFS_MOUNT() on PCFS.
 623  */
 624 static int
 625 pcfs_mount(
 626         struct vfs *vfsp,
 627         struct vnode *mvp,
 628         struct mounta *uap,
 629         struct cred *cr)
 630 {
 631         struct pcfs *fsp;
 632         struct vnode *devvp;
 633         dev_t pseudodev;
 634         dev_t xdev;
 635         int dos_ldrive = 0;
 636         int error;
 637         int remounting;
 638 
 639         if ((error = secpolicy_fs_mount(cr, mvp, vfsp)) != 0)
 640                 return (error);
 641 
 642         if (mvp->v_type != VDIR)
 643                 return (ENOTDIR);
 644 
 645         mutex_enter(&mvp->v_lock);
 646         if ((uap->flags & MS_REMOUNT) == 0 &&
 647             (uap->flags & MS_OVERLAY) == 0 &&
 648             (mvp->v_count != 1 || (mvp->v_flag & VROOT))) {
 649                 mutex_exit(&mvp->v_lock);
 650                 return (EBUSY);
 651         }
 652         mutex_exit(&mvp->v_lock);
 653 
 654         /*
 655          * PCFS doesn't do mount arguments anymore - everything's a mount
 656          * option these days. In order not to break existing callers, we
 657          * don't reject it yet, just warn that the data (if any) is ignored.
 658          */
 659         if (uap->datalen != 0)
 660                 cmn_err(CE_WARN, "!pcfs: deprecated use of mount(2) with "
 661                     "mount argument structures instead of mount options. "
 662                     "Ignoring mount(2) 'dataptr' argument.");
 663 
 664         /*
 665          * This is needed early, to make sure the access / open calls
 666          * are done using the correct mode. Processing this mount option
 667          * only when calling pcfs_parse_mntopts() would lead us to attempt
 668          * a read/write access to a possibly writeprotected device, and
 669          * a readonly mount attempt might fail because of that.
 670          */
 671         if (uap->flags & MS_RDONLY) {
 672                 vfsp->vfs_flag |= VFS_RDONLY;
 673                 vfs_setmntopt(vfsp, MNTOPT_RO, NULL, 0);
 674         }
 675 
 676         /*
 677          * For most filesystems, this is just a lookupname() on the
 678          * mount pathname string. PCFS historically has to do its own
 679          * partition table parsing because not all Solaris architectures
 680          * support all styles of partitioning that PC media can have, and
 681          * hence PCFS understands "device names" that don't map to actual
 682          * physical device nodes. Parsing the "PCFS syntax" for device
 683          * names is done in pcfs_device_identify() - see there.
 684          *
 685          * Once all block device drivers that can host FAT filesystems have
 686          * been enhanced to create device nodes for all PC-style partitions,
 687          * this code can go away.
 688          */
 689         if (error = pcfs_device_identify(vfsp, uap, cr, &dos_ldrive, &xdev))
 690                 return (error);
 691 
 692         /*
 693          * As with looking up the actual device to mount, PCFS cannot rely
 694          * on just the checks done by vfs_ismounted() whether a given device
 695          * is mounted already. The additional check against the "PCFS syntax"
 696          * is done in  pcfs_device_ismounted().
 697          */
 698         remounting = (uap->flags & MS_REMOUNT);
 699 
 700         if (error = pcfs_device_ismounted(vfsp, dos_ldrive, xdev, &remounting,
 701             &pseudodev))
 702                 return (error);
 703 
 704         if (remounting)
 705                 return (0);
 706 
 707         /*
 708          * Mount the filesystem.
 709          * An instance structure is required before the attempt to locate
 710          * and parse the FAT BPB. This is because mount options may change
 711          * the behaviour of the filesystem type matching code. Precreate
 712          * it and fill it in to a degree that allows parsing the mount
 713          * options.
 714          */
 715         devvp = makespecvp(xdev, VBLK);
 716         if (IS_SWAPVP(devvp)) {
 717                 VN_RELE(devvp);
 718                 return (EBUSY);
 719         }
 720         error = VOP_OPEN(&devvp,
 721             (vfsp->vfs_flag & VFS_RDONLY) ? FREAD : FREAD | FWRITE, cr, NULL);
 722         if (error) {
 723                 VN_RELE(devvp);
 724                 return (error);
 725         }
 726 
 727         fsp = kmem_zalloc(sizeof (*fsp), KM_SLEEP);
 728         fsp->pcfs_vfs = vfsp;
 729         fsp->pcfs_xdev = xdev;
 730         fsp->pcfs_devvp = devvp;
 731         fsp->pcfs_ldrive = dos_ldrive;
 732         mutex_init(&fsp->pcfs_lock, NULL, MUTEX_DEFAULT, NULL);
 733 
 734         pcfs_parse_mntopts(fsp);
 735 
 736         /*
 737          * This is the actual "mount" - the PCFS superblock check.
 738          *
 739          * Find the requested logical drive and the FAT BPB therein.
 740          * Check device type and flag the instance if media is removeable.
 741          *
 742          * Initializes most members of the filesystem instance structure.
 743          * Returns EINVAL if no valid BPB can be found. Other errors may
 744          * occur after I/O failures, or when invalid / unparseable partition
 745          * tables are encountered.
 746          */
 747         if (error = pc_getfattype(fsp))
 748                 goto errout;
 749 
 750         /*
 751          * Now that the BPB has been parsed, this structural information
 752          * is available and known to be valid. Initialize the VFS.
 753          */
 754         vfsp->vfs_data = fsp;
 755         vfsp->vfs_dev = pseudodev;
 756         vfsp->vfs_fstype = pcfstype;
 757         vfs_make_fsid(&vfsp->vfs_fsid, pseudodev, pcfstype);
 758         vfsp->vfs_bcount = 0;
 759         vfsp->vfs_bsize = fsp->pcfs_clsize;
 760 
 761         /*
 762          * Validate that we can access the FAT and that it is, to the
 763          * degree we can verify here, self-consistent.
 764          */
 765         if (error = pc_verify(fsp))
 766                 goto errout;
 767 
 768         /*
 769          * Record the time of the mount, to return as an "approximate"
 770          * timestamp for the FAT root directory. Since FAT roots don't
 771          * have timestamps, this is less confusing to the user than
 772          * claiming "zero" / Jan/01/1970.
 773          */
 774         gethrestime(&fsp->pcfs_mounttime);
 775 
 776         /*
 777          * Fix up the mount options. Because "noatime" is made default on
 778          * removeable media only, a fixed disk will have neither "atime"
 779          * nor "noatime" set. We set the options explicitly depending on
 780          * the PCFS_NOATIME flag, to inform the user of what applies.
 781          * Mount option cancellation will take care that the mutually
 782          * exclusive 'other' is cleared.
 783          */
 784         vfs_setmntopt(vfsp,
 785             fsp->pcfs_flags & PCFS_NOATIME ? MNTOPT_NOATIME : MNTOPT_ATIME,
 786             NULL, 0);
 787 
 788         /*
 789          * All clear - insert the FS instance into PCFS' list.
 790          */
 791         mutex_enter(&pcfslock);
 792         fsp->pcfs_nxt = pc_mounttab;
 793         pc_mounttab = fsp;
 794         mutex_exit(&pcfslock);
 795         atomic_inc_32(&pcfs_mountcount);
 796         return (0);
 797 
 798 errout:
 799         (void) VOP_CLOSE(devvp,
 800             vfsp->vfs_flag & VFS_RDONLY ? FREAD : FREAD | FWRITE,
 801             1, (offset_t)0, cr, NULL);
 802         VN_RELE(devvp);
 803         mutex_destroy(&fsp->pcfs_lock);
 804         kmem_free(fsp, sizeof (*fsp));
 805         return (error);
 806 
 807 }
 808 
 809 static int
 810 pcfs_unmount(
 811         struct vfs *vfsp,
 812         int flag,
 813         struct cred *cr)
 814 {
 815         struct pcfs *fsp, *fsp1;
 816 
 817         if (secpolicy_fs_unmount(cr, vfsp) != 0)
 818                 return (EPERM);
 819 
 820         fsp = VFSTOPCFS(vfsp);
 821 
 822         /*
 823          * We don't have to lock fsp because the VVFSLOCK in vfs layer will
 824          * prevent lookuppn from crossing the mount point.
 825          * If this is not a forced umount request and there's ongoing I/O,
 826          * don't allow the mount to proceed.
 827          */
 828         if (flag & MS_FORCE)
 829                 vfsp->vfs_flag |= VFS_UNMOUNTED;
 830         else if (fsp->pcfs_nrefs)
 831                 return (EBUSY);
 832 
 833         mutex_enter(&pcfslock);
 834 
 835         /*
 836          * If this is a forced umount request or if the fs instance has
 837          * been marked as beyond recovery, allow the umount to proceed
 838          * regardless of state. pc_diskchanged() forcibly releases all
 839          * inactive vnodes/pcnodes.
 840          */
 841         if (flag & MS_FORCE || fsp->pcfs_flags & PCFS_IRRECOV) {
 842                 rw_enter(&pcnodes_lock, RW_WRITER);
 843                 pc_diskchanged(fsp);
 844                 rw_exit(&pcnodes_lock);
 845         }
 846 
 847         /* now there should be no pcp node on pcfhead or pcdhead. */
 848 
 849         if (fsp == pc_mounttab) {
 850                 pc_mounttab = fsp->pcfs_nxt;
 851         } else {
 852                 for (fsp1 = pc_mounttab; fsp1 != NULL; fsp1 = fsp1->pcfs_nxt)
 853                         if (fsp1->pcfs_nxt == fsp)
 854                                 fsp1->pcfs_nxt = fsp->pcfs_nxt;
 855         }
 856 
 857         mutex_exit(&pcfslock);
 858 
 859         /*
 860          * Since we support VFS_FREEVFS(), there's no need to
 861          * free the fsp right now. The framework will tell us
 862          * when the right time to do so has arrived by calling
 863          * into pcfs_freevfs.
 864          */
 865         return (0);
 866 }
 867 
 868 /*
 869  * find root of pcfs
 870  */
 871 static int
 872 pcfs_root(
 873         struct vfs *vfsp,
 874         struct vnode **vpp)
 875 {
 876         struct pcfs *fsp;
 877         struct pcnode *pcp;
 878         int error;
 879 
 880         fsp = VFSTOPCFS(vfsp);
 881         if (error = pc_lockfs(fsp, 0, 0))
 882                 return (error);
 883 
 884         pcp = pc_getnode(fsp, (daddr_t)0, 0, (struct pcdir *)0);
 885         pc_unlockfs(fsp);
 886         *vpp = PCTOV(pcp);
 887         pcp->pc_flags |= PC_EXTERNAL;
 888         return (0);
 889 }
 890 
 891 /*
 892  * Get file system statistics.
 893  */
 894 static int
 895 pcfs_statvfs(
 896         struct vfs *vfsp,
 897         struct statvfs64 *sp)
 898 {
 899         struct pcfs *fsp;
 900         int error;
 901         dev32_t d32;
 902 
 903         fsp = VFSTOPCFS(vfsp);
 904         error = pc_getfat(fsp);
 905         if (error)
 906                 return (error);
 907         bzero(sp, sizeof (*sp));
 908         sp->f_bsize = sp->f_frsize = fsp->pcfs_clsize;
 909         sp->f_blocks = (fsblkcnt64_t)fsp->pcfs_ncluster;
 910         sp->f_bavail = sp->f_bfree = (fsblkcnt64_t)pc_freeclusters(fsp);
 911         sp->f_files = (fsfilcnt64_t)-1;
 912         sp->f_ffree = (fsfilcnt64_t)-1;
 913         sp->f_favail = (fsfilcnt64_t)-1;
 914 #ifdef notdef
 915         (void) cmpldev(&d32, fsp->pcfs_devvp->v_rdev);
 916 #endif /* notdef */
 917         (void) cmpldev(&d32, vfsp->vfs_dev);
 918         sp->f_fsid = d32;
 919         (void) strcpy(sp->f_basetype, vfssw[vfsp->vfs_fstype].vsw_name);
 920         sp->f_flag = vf_to_stf(vfsp->vfs_flag);
 921         sp->f_namemax = PCMAXNAMLEN;
 922         return (0);
 923 }
 924 
 925 static int
 926 pc_syncfsnodes(struct pcfs *fsp)
 927 {
 928         struct pchead *hp;
 929         struct pcnode *pcp;
 930         int error;
 931 
 932         if (error = pc_lockfs(fsp, 0, 0))
 933                 return (error);
 934 
 935         if (!(error = pc_syncfat(fsp))) {
 936                 hp = pcfhead;
 937                 while (hp < & pcfhead [ NPCHASH ]) {
 938                         rw_enter(&pcnodes_lock, RW_READER);
 939                         pcp = hp->pch_forw;
 940                         while (pcp != (struct pcnode *)hp) {
 941                                 if (VFSTOPCFS(PCTOV(pcp) -> v_vfsp) == fsp)
 942                                         if (error = pc_nodesync(pcp))
 943                                                 break;
 944                                 pcp = pcp -> pc_forw;
 945                         }
 946                         rw_exit(&pcnodes_lock);
 947                         if (error)
 948                                 break;
 949                         hp++;
 950                 }
 951         }
 952         pc_unlockfs(fsp);
 953         return (error);
 954 }
 955 
 956 /*
 957  * Flush any pending I/O.
 958  */
 959 /*ARGSUSED*/
 960 static int
 961 pcfs_sync(
 962         struct vfs *vfsp,
 963         short flag,
 964         struct cred *cr)
 965 {
 966         struct pcfs *fsp;
 967         int error = 0;
 968 
 969         /* this prevents the filesystem from being umounted. */
 970         mutex_enter(&pcfslock);
 971         if (vfsp != NULL) {
 972                 fsp = VFSTOPCFS(vfsp);
 973                 if (!(fsp->pcfs_flags & PCFS_IRRECOV)) {
 974                         error = pc_syncfsnodes(fsp);
 975                 } else {
 976                         rw_enter(&pcnodes_lock, RW_WRITER);
 977                         pc_diskchanged(fsp);
 978                         rw_exit(&pcnodes_lock);
 979                         error = EIO;
 980                 }
 981         } else {
 982                 fsp = pc_mounttab;
 983                 while (fsp != NULL) {
 984                         if (fsp->pcfs_flags & PCFS_IRRECOV) {
 985                                 rw_enter(&pcnodes_lock, RW_WRITER);
 986                                 pc_diskchanged(fsp);
 987                                 rw_exit(&pcnodes_lock);
 988                                 error = EIO;
 989                                 break;
 990                         }
 991                         error = pc_syncfsnodes(fsp);
 992                         if (error) break;
 993                         fsp = fsp->pcfs_nxt;
 994                 }
 995         }
 996         mutex_exit(&pcfslock);
 997         return (error);
 998 }
 999 
1000 int
1001 pc_lockfs(struct pcfs *fsp, int diskchanged, int releasing)
1002 {
1003         int err;
1004 
1005         if ((fsp->pcfs_flags & PCFS_IRRECOV) && !releasing)
1006                 return (EIO);
1007 
1008         if ((fsp->pcfs_flags & PCFS_LOCKED) && (fsp->pcfs_owner == curthread)) {
1009                 fsp->pcfs_count++;
1010         } else {
1011                 mutex_enter(&fsp->pcfs_lock);
1012                 if (fsp->pcfs_flags & PCFS_LOCKED)
1013                         panic("pc_lockfs");
1014                 /*
1015                  * We check the IRRECOV bit again just in case somebody
1016                  * snuck past the initial check but then got held up before
1017                  * they could grab the lock.  (And in the meantime someone
1018                  * had grabbed the lock and set the bit)
1019                  */
1020                 if (!diskchanged && !(fsp->pcfs_flags & PCFS_IRRECOV)) {
1021                         if ((err = pc_getfat(fsp))) {
1022                                 mutex_exit(&fsp->pcfs_lock);
1023                                 return (err);
1024                         }
1025                 }
1026                 fsp->pcfs_flags |= PCFS_LOCKED;
1027                 fsp->pcfs_owner = curthread;
1028                 fsp->pcfs_count++;
1029         }
1030         return (0);
1031 }
1032 
1033 void
1034 pc_unlockfs(struct pcfs *fsp)
1035 {
1036 
1037         if ((fsp->pcfs_flags & PCFS_LOCKED) == 0)
1038                 panic("pc_unlockfs");
1039         if (--fsp->pcfs_count < 0)
1040                 panic("pc_unlockfs: count");
1041         if (fsp->pcfs_count == 0) {
1042                 fsp->pcfs_flags &= ~PCFS_LOCKED;
1043                 fsp->pcfs_owner = 0;
1044                 mutex_exit(&fsp->pcfs_lock);
1045         }
1046 }
1047 
1048 int
1049 pc_syncfat(struct pcfs *fsp)
1050 {
1051         struct buf *bp;
1052         int nfat;
1053         int     error = 0;
1054         struct fat_od_fsi *fsinfo_disk;
1055 
1056         if ((fsp->pcfs_fatp == (uchar_t *)0) ||
1057             !(fsp->pcfs_flags & PCFS_FATMOD))
1058                 return (0);
1059         /*
1060          * write out all copies of FATs
1061          */
1062         fsp->pcfs_flags &= ~PCFS_FATMOD;
1063         fsp->pcfs_fattime = gethrestime_sec() + PCFS_DISKTIMEOUT;
1064         for (nfat = 0; nfat < fsp->pcfs_numfat; nfat++) {
1065                 error = pc_writefat(fsp, pc_dbdaddr(fsp,
1066                     fsp->pcfs_fatstart + nfat * fsp->pcfs_fatsec));
1067                 if (error) {
1068                         pc_mark_irrecov(fsp);
1069                         return (EIO);
1070                 }
1071         }
1072         pc_clear_fatchanges(fsp);
1073 
1074         /*
1075          * Write out fsinfo sector.
1076          */
1077         if (IS_FAT32(fsp)) {
1078                 bp = bread(fsp->pcfs_xdev,
1079                     pc_dbdaddr(fsp, fsp->pcfs_fsistart), fsp->pcfs_secsize);
1080                 if (bp->b_flags & (B_ERROR | B_STALE)) {
1081                         error = geterror(bp);
1082                 }
1083                 fsinfo_disk = (fat_od_fsi_t *)(bp->b_un.b_addr);
1084                 if (!error && FSISIG_OK(fsinfo_disk)) {
1085                         fsinfo_disk->fsi_incore.fs_free_clusters =
1086                             LE_32(fsp->pcfs_fsinfo.fs_free_clusters);
1087                         fsinfo_disk->fsi_incore.fs_next_free =
1088                             LE_32(FSINFO_UNKNOWN);
1089                         bwrite2(bp);
1090                         error = geterror(bp);
1091                 }
1092                 brelse(bp);
1093                 if (error) {
1094                         pc_mark_irrecov(fsp);
1095                         return (EIO);
1096                 }
1097         }
1098         return (0);
1099 }
1100 
1101 void
1102 pc_invalfat(struct pcfs *fsp)
1103 {
1104         struct pcfs *xfsp;
1105         int mount_cnt = 0;
1106 
1107         if (fsp->pcfs_fatp == (uchar_t *)0)
1108                 panic("pc_invalfat");
1109         /*
1110          * Release FAT
1111          */
1112         kmem_free(fsp->pcfs_fatp, fsp->pcfs_fatsec * fsp->pcfs_secsize);
1113         fsp->pcfs_fatp = NULL;
1114         kmem_free(fsp->pcfs_fat_changemap, fsp->pcfs_fat_changemapsize);
1115         fsp->pcfs_fat_changemap = NULL;
1116         /*
1117          * Invalidate all the blocks associated with the device.
1118          * Not needed if stateless.
1119          */
1120         for (xfsp = pc_mounttab; xfsp; xfsp = xfsp->pcfs_nxt)
1121                 if (xfsp != fsp && xfsp->pcfs_xdev == fsp->pcfs_xdev)
1122                         mount_cnt++;
1123 
1124         if (!mount_cnt)
1125                 binval(fsp->pcfs_xdev);
1126         /*
1127          * close mounted device
1128          */
1129         (void) VOP_CLOSE(fsp->pcfs_devvp,
1130             (PCFSTOVFS(fsp)->vfs_flag & VFS_RDONLY) ? FREAD : FREAD|FWRITE,
1131             1, (offset_t)0, CRED(), NULL);
1132 }
1133 
1134 void
1135 pc_badfs(struct pcfs *fsp)
1136 {
1137         cmn_err(CE_WARN, "corrupted PC file system on dev (%x.%x):%d\n",
1138             getmajor(fsp->pcfs_devvp->v_rdev),
1139             getminor(fsp->pcfs_devvp->v_rdev), fsp->pcfs_ldrive);
1140 }
1141 
1142 /*
1143  * The problem with supporting NFS on the PCFS filesystem is that there
1144  * is no good place to keep the generation number. The only possible
1145  * place is inside a directory entry. There are a few words that we
1146  * don't use - they store NT & OS/2 attributes, and the creation/last access
1147  * time of the file - but it seems wrong to use them. In addition, directory
1148  * entries come and go. If a directory is removed completely, its directory
1149  * blocks are freed and the generation numbers are lost. Whereas in ufs,
1150  * inode blocks are dedicated for inodes, so the generation numbers are
1151  * permanently kept on the disk.
1152  */
1153 static int
1154 pcfs_vget(struct vfs *vfsp, struct vnode **vpp, struct fid *fidp)
1155 {
1156         struct pcnode *pcp;
1157         struct pc_fid *pcfid;
1158         struct pcfs *fsp;
1159         struct pcdir *ep;
1160         daddr_t eblkno;
1161         int eoffset;
1162         struct buf *bp;
1163         int error;
1164         pc_cluster32_t  cn;
1165 
1166         pcfid = (struct pc_fid *)fidp;
1167         fsp = VFSTOPCFS(vfsp);
1168 
1169         error = pc_lockfs(fsp, 0, 0);
1170         if (error) {
1171                 *vpp = NULL;
1172                 return (error);
1173         }
1174 
1175         if (pcfid->pcfid_block == 0) {
1176                 pcp = pc_getnode(fsp, (daddr_t)0, 0, (struct pcdir *)0);
1177                 pcp->pc_flags |= PC_EXTERNAL;
1178                 *vpp = PCTOV(pcp);
1179                 pc_unlockfs(fsp);
1180                 return (0);
1181         }
1182         eblkno = pcfid->pcfid_block;
1183         eoffset = pcfid->pcfid_offset;
1184 
1185         if ((pc_dbtocl(fsp,
1186             eblkno - fsp->pcfs_dosstart) >= fsp->pcfs_ncluster) ||
1187             (eoffset > fsp->pcfs_clsize)) {
1188                 pc_unlockfs(fsp);
1189                 *vpp = NULL;
1190                 return (EINVAL);
1191         }
1192 
1193         if (eblkno >= fsp->pcfs_datastart || (eblkno - fsp->pcfs_rdirstart)
1194             < (fsp->pcfs_rdirsec & ~(fsp->pcfs_spcl - 1))) {
1195                 bp = bread(fsp->pcfs_xdev, pc_dbdaddr(fsp, eblkno),
1196                     fsp->pcfs_clsize);
1197         } else {
1198                 /*
1199                  * This is an access "backwards" into the FAT12/FAT16
1200                  * root directory. A better code structure would
1201                  * significantly improve maintainability here ...
1202                  */
1203                 bp = bread(fsp->pcfs_xdev, pc_dbdaddr(fsp, eblkno),
1204                     (int)(fsp->pcfs_datastart - eblkno) * fsp->pcfs_secsize);
1205         }
1206         if (bp->b_flags & (B_ERROR | B_STALE)) {
1207                 error = geterror(bp);
1208                 brelse(bp);
1209                 if (error)
1210                         pc_mark_irrecov(fsp);
1211                 *vpp = NULL;
1212                 pc_unlockfs(fsp);
1213                 return (error);
1214         }
1215         ep = (struct pcdir *)(bp->b_un.b_addr + eoffset);
1216         /*
1217          * Ok, if this is a valid file handle that we gave out,
1218          * then simply ensuring that the creation time matches,
1219          * the entry has not been deleted, and it has a valid first
1220          * character should be enough.
1221          *
1222          * Unfortunately, verifying that the <blkno, offset> _still_
1223          * refers to a directory entry is not easy, since we'd have
1224          * to search _all_ directories starting from root to find it.
1225          * That's a high price to pay just in case somebody is forging
1226          * file handles. So instead we verify that as much of the
1227          * entry is valid as we can:
1228          *
1229          * 1. The starting cluster is 0 (unallocated) or valid
1230          * 2. It is not an LFN entry
1231          * 3. It is not hidden (unless mounted as such)
1232          * 4. It is not the label
1233          */
1234         cn = pc_getstartcluster(fsp, ep);
1235         /*
1236          * if the starting cluster is valid, but not valid according
1237          * to pc_validcl(), force it to be to simplify the following if.
1238          */
1239         if (cn == 0)
1240                 cn = PCF_FIRSTCLUSTER;
1241         if (IS_FAT32(fsp)) {
1242                 if (cn >= PCF_LASTCLUSTER32)
1243                         cn = PCF_FIRSTCLUSTER;
1244         } else {
1245                 if (cn >= PCF_LASTCLUSTER)
1246                         cn = PCF_FIRSTCLUSTER;
1247         }
1248         if ((!pc_validcl(fsp, cn)) ||
1249             (PCDL_IS_LFN(ep)) ||
1250             (PCA_IS_HIDDEN(fsp, ep->pcd_attr)) ||
1251             ((ep->pcd_attr & PCA_LABEL) == PCA_LABEL)) {
1252                 bp->b_flags |= B_STALE | B_AGE;
1253                 brelse(bp);
1254                 pc_unlockfs(fsp);
1255                 return (EINVAL);
1256         }
1257         if ((ep->pcd_crtime.pct_time == pcfid->pcfid_ctime) &&
1258             (ep->pcd_filename[0] != PCD_ERASED) &&
1259             (pc_validchar(ep->pcd_filename[0]) ||
1260             (ep->pcd_filename[0] == '.' && ep->pcd_filename[1] == '.'))) {
1261                 pcp = pc_getnode(fsp, eblkno, eoffset, ep);
1262                 pcp->pc_flags |= PC_EXTERNAL;
1263                 *vpp = PCTOV(pcp);
1264         } else {
1265                 *vpp = NULL;
1266         }
1267         bp->b_flags |= B_STALE | B_AGE;
1268         brelse(bp);
1269         pc_unlockfs(fsp);
1270         return (0);
1271 }
1272 
1273 /*
1274  * Unfortunately, FAT32 fat's can be pretty big (On a 1 gig jaz drive, about
1275  * a meg), so we can't bread() it all in at once. This routine reads a
1276  * fat a chunk at a time.
1277  */
1278 static int
1279 pc_readfat(struct pcfs *fsp, uchar_t *fatp)
1280 {
1281         struct buf *bp;
1282         size_t off;
1283         size_t readsize;
1284         daddr_t diskblk;
1285         size_t fatsize = fsp->pcfs_fatsec * fsp->pcfs_secsize;
1286         daddr_t start = fsp->pcfs_fatstart;
1287 
1288         readsize = fsp->pcfs_clsize;
1289         for (off = 0; off < fatsize; off += readsize, fatp += readsize) {
1290                 if (readsize > (fatsize - off))
1291                         readsize = fatsize - off;
1292                 diskblk = pc_dbdaddr(fsp, start +
1293                     pc_cltodb(fsp, pc_lblkno(fsp, off)));
1294                 bp = bread(fsp->pcfs_xdev, diskblk, readsize);
1295                 if (bp->b_flags & (B_ERROR | B_STALE)) {
1296                         brelse(bp);
1297                         return (EIO);
1298                 }
1299                 bp->b_flags |= B_STALE | B_AGE;
1300                 bcopy(bp->b_un.b_addr, fatp, readsize);
1301                 brelse(bp);
1302         }
1303         return (0);
1304 }
1305 
1306 /*
1307  * We write the FAT out a _lot_, in order to make sure that it
1308  * is up-to-date. But on a FAT32 system (large drive, small clusters)
1309  * the FAT might be a couple of megabytes, and writing it all out just
1310  * because we created or deleted a small file is painful (especially
1311  * since we do it for each alternate FAT too). So instead, for FAT16 and
1312  * FAT32 we only write out the bit that has changed. We don't clear
1313  * the 'updated' fields here because the caller might be writing out
1314  * several FATs, so the caller must use pc_clear_fatchanges() after
1315  * all FATs have been updated.
1316  * This function doesn't take "start" from fsp->pcfs_dosstart because
1317  * callers can use it to write either the primary or any of the alternate
1318  * FAT tables.
1319  */
1320 static int
1321 pc_writefat(struct pcfs *fsp, daddr_t start)
1322 {
1323         struct buf *bp;
1324         size_t off;
1325         size_t writesize;
1326         int     error;
1327         uchar_t *fatp = fsp->pcfs_fatp;
1328         size_t fatsize = fsp->pcfs_fatsec * fsp->pcfs_secsize;
1329 
1330         writesize = fsp->pcfs_clsize;
1331         for (off = 0; off < fatsize; off += writesize, fatp += writesize) {
1332                 if (writesize > (fatsize - off))
1333                         writesize = fatsize - off;
1334                 if (!pc_fat_is_changed(fsp, pc_lblkno(fsp, off))) {
1335                         continue;
1336                 }
1337                 bp = ngeteblk(writesize);
1338                 bp->b_edev = fsp->pcfs_xdev;
1339                 bp->b_dev = cmpdev(bp->b_edev);
1340                 bp->b_blkno = pc_dbdaddr(fsp, start +
1341                     pc_cltodb(fsp, pc_lblkno(fsp, off)));
1342                 bcopy(fatp, bp->b_un.b_addr, writesize);
1343                 bwrite2(bp);
1344                 error = geterror(bp);
1345                 brelse(bp);
1346                 if (error) {
1347                         return (error);
1348                 }
1349         }
1350         return (0);
1351 }
1352 
1353 /*
1354  * Mark the FAT cluster that 'cn' is stored in as modified.
1355  */
1356 void
1357 pc_mark_fat_updated(struct pcfs *fsp, pc_cluster32_t cn)
1358 {
1359         pc_cluster32_t  bn;
1360         size_t          size;
1361 
1362         /* which fat block is the cluster number stored in? */
1363         if (IS_FAT32(fsp)) {
1364                 size = sizeof (pc_cluster32_t);
1365                 bn = pc_lblkno(fsp, cn * size);
1366                 fsp->pcfs_fat_changemap[bn] = 1;
1367         } else if (IS_FAT16(fsp)) {
1368                 size = sizeof (pc_cluster16_t);
1369                 bn = pc_lblkno(fsp, cn * size);
1370                 fsp->pcfs_fat_changemap[bn] = 1;
1371         } else {
1372                 offset_t off;
1373                 pc_cluster32_t nbn;
1374 
1375                 ASSERT(IS_FAT12(fsp));
1376                 off = cn + (cn >> 1);
1377                 bn = pc_lblkno(fsp, off);
1378                 fsp->pcfs_fat_changemap[bn] = 1;
1379                 /* does this field wrap into the next fat cluster? */
1380                 nbn = pc_lblkno(fsp, off + 1);
1381                 if (nbn != bn) {
1382                         fsp->pcfs_fat_changemap[nbn] = 1;
1383                 }
1384         }
1385 }
1386 
1387 /*
1388  * return whether the FAT cluster 'bn' is updated and needs to
1389  * be written out.
1390  */
1391 int
1392 pc_fat_is_changed(struct pcfs *fsp, pc_cluster32_t bn)
1393 {
1394         return (fsp->pcfs_fat_changemap[bn] == 1);
1395 }
1396 
1397 /*
1398  * Implementation of VFS_FREEVFS() to support forced umounts.
1399  * This is called by the vfs framework after umount, to trigger
1400  * the release of any resources still associated with the given
1401  * vfs_t once the need to keep them has gone away.
1402  */
1403 void
1404 pcfs_freevfs(vfs_t *vfsp)
1405 {
1406         struct pcfs *fsp = VFSTOPCFS(vfsp);
1407 
1408         mutex_enter(&pcfslock);
1409         /*
1410          * Purging the FAT closes the device - can't do any more
1411          * I/O after this.
1412          */
1413         if (fsp->pcfs_fatp != (uchar_t *)0)
1414                 pc_invalfat(fsp);
1415         mutex_exit(&pcfslock);
1416 
1417         VN_RELE(fsp->pcfs_devvp);
1418         mutex_destroy(&fsp->pcfs_lock);
1419         kmem_free(fsp, sizeof (*fsp));
1420 
1421         /*
1422          * Allow _fini() to succeed now, if so desired.
1423          */
1424         atomic_dec_32(&pcfs_mountcount);
1425 }
1426 
1427 
1428 /*
1429  * PC-style partition parsing and FAT BPB identification/validation code.
1430  * The partition parsers here assume:
1431  *      - a FAT filesystem will be in a partition that has one of a set of
1432  *        recognized partition IDs
1433  *      - the user wants the 'numbering' (C:, D:, ...) that one would get
1434  *        on MSDOS 6.x.
1435  *        That means any non-FAT partition type (NTFS, HPFS, or any Linux fs)
1436  *        will not factor in the enumeration.
1437  * These days, such assumptions should be revisited. FAT is no longer the
1438  * only game in 'PC town'.
1439  */
1440 /*
1441  * isDosDrive()
1442  *      Boolean function.  Give it the systid field for an fdisk partition
1443  *      and it decides if that's a systid that describes a DOS drive.  We
1444  *      use systid values defined in sys/dktp/fdisk.h.
1445  */
1446 static int
1447 isDosDrive(uchar_t checkMe)
1448 {
1449         return ((checkMe == DOSOS12) || (checkMe == DOSOS16) ||
1450             (checkMe == DOSHUGE) || (checkMe == FDISK_WINDOWS) ||
1451             (checkMe == FDISK_EXT_WIN) || (checkMe == FDISK_FAT95) ||
1452             (checkMe == DIAGPART));
1453 }
1454 
1455 
1456 /*
1457  * isDosExtended()
1458  *      Boolean function.  Give it the systid field for an fdisk partition
1459  *      and it decides if that's a systid that describes an extended DOS
1460  *      partition.
1461  */
1462 static int
1463 isDosExtended(uchar_t checkMe)
1464 {
1465         return ((checkMe == EXTDOS) || (checkMe == FDISK_EXTLBA));
1466 }
1467 
1468 
1469 /*
1470  * isBootPart()
1471  *      Boolean function.  Give it the systid field for an fdisk partition
1472  *      and it decides if that's a systid that describes a Solaris boot
1473  *      partition.
1474  */
1475 static int
1476 isBootPart(uchar_t checkMe)
1477 {
1478         return (checkMe == X86BOOT);
1479 }
1480 
1481 
1482 /*
1483  * noLogicalDrive()
1484  *      Display error message about not being able to find a logical
1485  *      drive.
1486  */
1487 static void
1488 noLogicalDrive(int ldrive)
1489 {
1490         if (ldrive == BOOT_PARTITION_DRIVE) {
1491                 cmn_err(CE_NOTE, "!pcfs: no boot partition");
1492         } else {
1493                 cmn_err(CE_NOTE, "!pcfs: %d: no such logical drive", ldrive);
1494         }
1495 }
1496 
1497 
1498 /*
1499  * findTheDrive()
1500  *      Discover offset of the requested logical drive, and return
1501  *      that offset (startSector), the systid of that drive (sysid),
1502  *      and a buffer pointer (bp), with the buffer contents being
1503  *      the first sector of the logical drive (i.e., the sector that
1504  *      contains the BPB for that drive).
1505  *
1506  * Note: this code is not capable of addressing >2TB disks, as it uses
1507  *       daddr_t not diskaddr_t, some of the calculations would overflow
1508  */
1509 #define COPY_PTBL(mbr, ptblp)                                   \
1510         bcopy(&(((struct mboot *)(mbr))->parts), (ptblp),        \
1511             FD_NUMPART * sizeof (struct ipart))
1512 
1513 static int
1514 findTheDrive(struct pcfs *fsp, buf_t **bp)
1515 {
1516         int ldrive = fsp->pcfs_ldrive;
1517         dev_t dev = fsp->pcfs_devvp->v_rdev;
1518 
1519         struct ipart dosp[FD_NUMPART];  /* incore fdisk partition structure */
1520         daddr_t lastseek = 0;           /* Disk block we sought previously */
1521         daddr_t diskblk = 0;            /* Disk block to get */
1522         daddr_t xstartsect;             /* base of Extended DOS partition */
1523         int logicalDriveCount = 0;      /* Count of logical drives seen */
1524         int extendedPart = -1;          /* index of extended dos partition */
1525         int primaryPart = -1;           /* index of primary dos partition */
1526         int bootPart = -1;              /* index of a Solaris boot partition */
1527         uint32_t xnumsect = 0;          /* length of extended DOS partition */
1528         int driveIndex;                 /* computed FDISK table index */
1529         daddr_t startsec;
1530         len_t mediasize;
1531         int i;
1532         /*
1533          * Count of drives in the current extended partition's
1534          * FDISK table, and indexes of the drives themselves.
1535          */
1536         int extndDrives[FD_NUMPART];
1537         int numDrives = 0;
1538 
1539         /*
1540          * Count of drives (beyond primary) in master boot record's
1541          * FDISK table, and indexes of the drives themselves.
1542          */
1543         int extraDrives[FD_NUMPART];
1544         int numExtraDrives = 0;
1545 
1546         /*
1547          * "ldrive == 0" should never happen, as this is a request to
1548          * mount the physical device (and ignore partitioning). The code
1549          * in pcfs_mount() should have made sure that a logical drive number
1550          * is at least 1, meaning we're looking for drive "C:". It is not
1551          * safe (and a bug in the callers of this function) to request logical
1552          * drive number 0; we could ASSERT() but a graceful EIO is a more
1553          * polite way.
1554          */
1555         if (ldrive == 0) {
1556                 cmn_err(CE_NOTE, "!pcfs: request for logical partition zero");
1557                 noLogicalDrive(ldrive);
1558                 return (EIO);
1559         }
1560 
1561         /*
1562          *  Copy from disk block into memory aligned structure for fdisk usage.
1563          */
1564         COPY_PTBL((*bp)->b_un.b_addr, dosp);
1565 
1566         /*
1567          * This check is ok because a FAT BPB and a master boot record (MBB)
1568          * have the same signature, in the same position within the block.
1569          */
1570         if (bpb_get_BPBSig((*bp)->b_un.b_addr) != MBB_MAGIC) {
1571                 cmn_err(CE_NOTE, "!pcfs: MBR partition table signature err, "
1572                     "device (%x.%x):%d\n",
1573                     getmajor(dev), getminor(dev), ldrive);
1574                 return (EINVAL);
1575         }
1576 
1577         /*
1578          * Get a summary of what is in the Master FDISK table.
1579          * Normally we expect to find one partition marked as a DOS drive.
1580          * This partition is the one Windows calls the primary dos partition.
1581          * If the machine has any logical drives then we also expect
1582          * to find a partition marked as an extended DOS partition.
1583          *
1584          * Sometimes we'll find multiple partitions marked as DOS drives.
1585          * The Solaris fdisk program allows these partitions
1586          * to be created, but Windows fdisk no longer does.  We still need
1587          * to support these, though, since Windows does.  We also need to fix
1588          * our fdisk to behave like the Windows version.
1589          *
1590          * It turns out that some off-the-shelf media have *only* an
1591          * Extended partition, so we need to deal with that case as well.
1592          *
1593          * Only a single (the first) Extended or Boot Partition will
1594          * be recognized.  Any others will be ignored.
1595          */
1596         for (i = 0; i < FD_NUMPART; i++) {
1597                 DTRACE_PROBE4(primarypart, struct pcfs *, fsp,
1598                     uint_t, (uint_t)dosp[i].systid,
1599                     uint_t, LE_32(dosp[i].relsect),
1600                     uint_t, LE_32(dosp[i].numsect));
1601 
1602                 if (isDosDrive(dosp[i].systid)) {
1603                         if (primaryPart < 0) {
1604                                 logicalDriveCount++;
1605                                 primaryPart = i;
1606                         } else {
1607                                 extraDrives[numExtraDrives++] = i;
1608                         }
1609                         continue;
1610                 }
1611                 if ((extendedPart < 0) && isDosExtended(dosp[i].systid)) {
1612                         extendedPart = i;
1613                         continue;
1614                 }
1615                 if ((bootPart < 0) && isBootPart(dosp[i].systid)) {
1616                         bootPart = i;
1617                         continue;
1618                 }
1619         }
1620 
1621         if (ldrive == BOOT_PARTITION_DRIVE) {
1622                 if (bootPart < 0) {
1623                         noLogicalDrive(ldrive);
1624                         return (EINVAL);
1625                 }
1626                 startsec = LE_32(dosp[bootPart].relsect);
1627                 mediasize = LE_32(dosp[bootPart].numsect);
1628                 goto found;
1629         }
1630 
1631         if (ldrive == PRIMARY_DOS_DRIVE && primaryPart >= 0) {
1632                 startsec = LE_32(dosp[primaryPart].relsect);
1633                 mediasize = LE_32(dosp[primaryPart].numsect);
1634                 goto found;
1635         }
1636 
1637         /*
1638          * We are not looking for the C: drive (or the primary drive
1639          * was not found), so we had better have an extended partition
1640          * or extra drives in the Master FDISK table.
1641          */
1642         if ((extendedPart < 0) && (numExtraDrives == 0)) {
1643                 cmn_err(CE_NOTE, "!pcfs: no extended dos partition");
1644                 noLogicalDrive(ldrive);
1645                 return (EINVAL);
1646         }
1647 
1648         if (extendedPart >= 0) {
1649                 diskblk = xstartsect = LE_32(dosp[extendedPart].relsect);
1650                 xnumsect = LE_32(dosp[extendedPart].numsect);
1651                 do {
1652                         /*
1653                          *  If the seek would not cause us to change
1654                          *  position on the drive, then we're out of
1655                          *  extended partitions to examine.
1656                          */
1657                         if (diskblk == lastseek)
1658                                 break;
1659                         logicalDriveCount += numDrives;
1660                         /*
1661                          *  Seek the next extended partition, and find
1662                          *  logical drives within it.
1663                          */
1664                         brelse(*bp);
1665                         /*
1666                          * bread() block numbers are multiples of DEV_BSIZE
1667                          * but the device sector size (the unit of partitioning)
1668                          * might be larger than that; pcfs_get_device_info()
1669                          * has calculated the multiplicator for us.
1670                          */
1671                         *bp = bread(dev,
1672                             pc_dbdaddr(fsp, diskblk), fsp->pcfs_secsize);
1673                         if ((*bp)->b_flags & B_ERROR) {
1674                                 return (EIO);
1675                         }
1676 
1677                         lastseek = diskblk;
1678                         COPY_PTBL((*bp)->b_un.b_addr, dosp);
1679                         if (bpb_get_BPBSig((*bp)->b_un.b_addr) != MBB_MAGIC) {
1680                                 cmn_err(CE_NOTE, "!pcfs: "
1681                                     "extended partition table signature err, "
1682                                     "device (%x.%x):%d, LBA %u",
1683                                     getmajor(dev), getminor(dev), ldrive,
1684                                     (uint_t)pc_dbdaddr(fsp, diskblk));
1685                                 return (EINVAL);
1686                         }
1687                         /*
1688                          *  Count up drives, and track where the next
1689                          *  extended partition is in case we need it.  We
1690                          *  are expecting only one extended partition.  If
1691                          *  there is more than one we'll only go to the
1692                          *  first one we see, but warn about ignoring.
1693                          */
1694                         numDrives = 0;
1695                         for (i = 0; i < FD_NUMPART; i++) {
1696                                 DTRACE_PROBE4(extendedpart,
1697                                     struct pcfs *, fsp,
1698                                     uint_t, (uint_t)dosp[i].systid,
1699                                     uint_t, LE_32(dosp[i].relsect),
1700                                     uint_t, LE_32(dosp[i].numsect));
1701                                 if (isDosDrive(dosp[i].systid)) {
1702                                         extndDrives[numDrives++] = i;
1703                                 } else if (isDosExtended(dosp[i].systid)) {
1704                                         if (diskblk != lastseek) {
1705                                                 /*
1706                                                  * Already found an extended
1707                                                  * partition in this table.
1708                                                  */
1709                                                 cmn_err(CE_NOTE,
1710                                                     "!pcfs: ignoring unexpected"
1711                                                     " additional extended"
1712                                                     " partition");
1713                                         } else {
1714                                                 diskblk = xstartsect +
1715                                                     LE_32(dosp[i].relsect);
1716                                         }
1717                                 }
1718                         }
1719                 } while (ldrive > logicalDriveCount + numDrives);
1720 
1721                 ASSERT(numDrives <= FD_NUMPART);
1722 
1723                 if (ldrive <= logicalDriveCount + numDrives) {
1724                         /*
1725                          * The number of logical drives we've found thus
1726                          * far is enough to get us to the one we were
1727                          * searching for.
1728                          */
1729                         driveIndex = logicalDriveCount + numDrives - ldrive;
1730                         mediasize =
1731                             LE_32(dosp[extndDrives[driveIndex]].numsect);
1732                         startsec =
1733                             LE_32(dosp[extndDrives[driveIndex]].relsect) +
1734                             lastseek;
1735                         if (startsec > (xstartsect + xnumsect)) {
1736                                 cmn_err(CE_NOTE, "!pcfs: extended partition "
1737                                     "values bad");
1738                                 return (EINVAL);
1739                         }
1740                         goto found;
1741                 } else {
1742                         /*
1743                          * We ran out of extended dos partition
1744                          * drives.  The only hope now is to go
1745                          * back to extra drives defined in the master
1746                          * fdisk table.  But we overwrote that table
1747                          * already, so we must load it in again.
1748                          */
1749                         logicalDriveCount += numDrives;
1750                         brelse(*bp);
1751                         ASSERT(fsp->pcfs_dosstart == 0);
1752                         *bp = bread(dev, pc_dbdaddr(fsp, fsp->pcfs_dosstart),
1753                             fsp->pcfs_secsize);
1754                         if ((*bp)->b_flags & B_ERROR) {
1755                                 return (EIO);
1756                         }
1757                         COPY_PTBL((*bp)->b_un.b_addr, dosp);
1758                 }
1759         }
1760         /*
1761          *  Still haven't found the drive, is it an extra
1762          *  drive defined in the main FDISK table?
1763          */
1764         if (ldrive <= logicalDriveCount + numExtraDrives) {
1765                 driveIndex = logicalDriveCount + numExtraDrives - ldrive;
1766                 ASSERT(driveIndex < MIN(numExtraDrives, FD_NUMPART));
1767                 mediasize = LE_32(dosp[extraDrives[driveIndex]].numsect);
1768                 startsec = LE_32(dosp[extraDrives[driveIndex]].relsect);
1769                 goto found;
1770         }
1771         /*
1772          *  Still haven't found the drive, and there is
1773          *  nowhere else to look.
1774          */
1775         noLogicalDrive(ldrive);
1776         return (EINVAL);
1777 
1778 found:
1779         /*
1780          * We need this value in units of sectorsize, because PCFS' internal
1781          * offset calculations go haywire for > 512Byte sectors unless all
1782          * pcfs_.*start values are in units of sectors.
1783          * So, assign before the capacity check (that's done in DEV_BSIZE)
1784          */
1785         fsp->pcfs_dosstart = startsec;
1786 
1787         /*
1788          * convert from device sectors to proper units:
1789          *      - starting sector: DEV_BSIZE (as argument to bread())
1790          *      - media size: Bytes
1791          */
1792         startsec = pc_dbdaddr(fsp, startsec);
1793         mediasize *= fsp->pcfs_secsize;
1794 
1795         /*
1796          * some additional validation / warnings in case the partition table
1797          * and the actual media capacity are not in accordance ...
1798          */
1799         if (fsp->pcfs_mediasize != 0) {
1800                 diskaddr_t startoff =
1801                     (diskaddr_t)startsec * (diskaddr_t)DEV_BSIZE;
1802 
1803                 if (startoff >= fsp->pcfs_mediasize ||
1804                     startoff + mediasize > fsp->pcfs_mediasize) {
1805                         cmn_err(CE_WARN,
1806                             "!pcfs: partition size (LBA start %u, %lld bytes, "
1807                             "device (%x.%x):%d) smaller than "
1808                             "mediasize (%lld bytes).\n"
1809                             "filesystem may be truncated, access errors "
1810                             "may result.\n",
1811                             (uint_t)startsec, (long long)mediasize,
1812                             getmajor(fsp->pcfs_xdev), getminor(fsp->pcfs_xdev),
1813                             fsp->pcfs_ldrive, (long long)fsp->pcfs_mediasize);
1814                 }
1815         } else {
1816                 fsp->pcfs_mediasize = mediasize;
1817         }
1818 
1819         return (0);
1820 }
1821 
1822 
1823 static fattype_t
1824 secondaryBPBChecks(struct pcfs *fsp, uchar_t *bpb, size_t secsize)
1825 {
1826         uint32_t ncl = fsp->pcfs_ncluster;
1827 
1828         if (ncl <= 4096) {
1829                 if (bpb_get_FatSz16(bpb) == 0)
1830                         return (FAT_UNKNOWN);
1831 
1832                 if (bpb_get_FatSz16(bpb) * secsize < ncl * 2 &&
1833                     bpb_get_FatSz16(bpb) * secsize >= (3 * ncl / 2))
1834                         return (FAT12);
1835                 if (bcmp(bpb_FilSysType16(bpb), "FAT12", 5) == 0)
1836                         return (FAT12);
1837                 if (bcmp(bpb_FilSysType16(bpb), "FAT16", 5) == 0)
1838                         return (FAT16);
1839 
1840                 switch (bpb_get_Media(bpb)) {
1841                         case SS8SPT:
1842                         case DS8SPT:
1843                         case SS9SPT:
1844                         case DS9SPT:
1845                         case DS18SPT:
1846                         case DS9_15SPT:
1847                                 /*
1848                                  * Is this reliable - all floppies are FAT12 ?
1849                                  */
1850                                 return (FAT12);
1851                         case MD_FIXED:
1852                                 /*
1853                                  * Is this reliable - disks are always FAT16 ?
1854                                  */
1855                                 return (FAT16);
1856                         default:
1857                                 break;
1858                 }
1859         } else if (ncl <= 65536) {
1860                 if (bpb_get_FatSz16(bpb) == 0 && bpb_get_FatSz32(bpb) > 0)
1861                         return (FAT32);
1862                 if (VALID_BOOTSIG(bpb_get_BootSig32(bpb)))
1863                         return (FAT32);
1864                 if (VALID_FSTYPSTR32(bpb_FilSysType32(bpb)))
1865                         return (FAT32);
1866 
1867                 if (VALID_BOOTSIG(bpb_get_BootSig16(bpb)))
1868                         return (FAT16);
1869                 if (bpb_get_FatSz16(bpb) * secsize < ncl * 4)
1870                         return (FAT16);
1871         }
1872 
1873         /*
1874          * We don't know
1875          */
1876         return (FAT_UNKNOWN);
1877 }
1878 
1879 /*
1880  * Check to see if the BPB we found is correct.
1881  *
1882  * This looks far more complicated that it needs to be for pure structural
1883  * validation. The reason for this is that parseBPB() is also used for
1884  * debugging purposes (mdb dcmd) and we therefore want a bitmap of which
1885  * BPB fields (do not) have 'known good' values, even if we (do not) reject
1886  * the BPB when attempting to mount the filesystem.
1887  *
1888  * Real-world usage of FAT shows there are a lot of corner-case situations
1889  * and, following the specification strictly, invalid filesystems out there.
1890  * Known are situations such as:
1891  *      - FAT12/FAT16 filesystems with garbage in either totsec16/32
1892  *        instead of the zero in one of the fields mandated by the spec
1893  *      - filesystems that claim to be larger than the partition they're in
1894  *      - filesystems without valid media descriptor
1895  *      - FAT32 filesystems with RootEntCnt != 0
1896  *      - FAT32 filesystems with less than 65526 clusters
1897  *      - FAT32 filesystems without valid FSI sector
1898  *      - FAT32 filesystems with FAT size in fatsec16 instead of fatsec32
1899  *
1900  * Such filesystems are accessible by PCFS - if it'd know to start with that
1901  * the filesystem should be treated as a specific FAT type. Before S10, it
1902  * relied on the PC/fdisk partition type for the purpose and almost completely
1903  * ignored the BPB; now it ignores the partition type for anything else but
1904  * logical drive enumeration, which can result in rejection of (invalid)
1905  * FAT32 - if the partition ID says FAT32, but the filesystem, for example
1906  * has less than 65526 clusters.
1907  *
1908  * Without a "force this fs as FAT{12,16,32}" tunable or mount option, it's
1909  * not possible to allow all such mostly-compliant filesystems in unless one
1910  * accepts false positives (definitely invalid filesystems that cause problems
1911  * later). This at least allows to pinpoint why the mount failed.
1912  *
1913  * Due to the use of FAT on removeable media, all relaxations of the rules
1914  * here need to be carefully evaluated wrt. to potential effects on PCFS
1915  * resilience. A faulty/"mis-crafted" filesystem must not cause a panic, so
1916  * beware.
1917  */
1918 static int
1919 parseBPB(struct pcfs *fsp, uchar_t *bpb, int *valid)
1920 {
1921         fattype_t type;
1922 
1923         uint32_t        ncl;    /* number of clusters in file area */
1924         uint32_t        rec;
1925         uint32_t        reserved;
1926         uint32_t        fsisec, bkbootsec;
1927         blkcnt_t        totsec, totsec16, totsec32, datasec;
1928         size_t          fatsec, fatsec16, fatsec32, rdirsec;
1929         size_t          secsize;
1930         len_t           mediasize;
1931         uint64_t        validflags = 0;
1932 
1933         if (VALID_BPBSIG(bpb_get_BPBSig(bpb)))
1934                 validflags |= BPB_BPBSIG_OK;
1935 
1936         rec = bpb_get_RootEntCnt(bpb);
1937         reserved = bpb_get_RsvdSecCnt(bpb);
1938         fsisec = bpb_get_FSInfo32(bpb);
1939         bkbootsec = bpb_get_BkBootSec32(bpb);
1940         totsec16 = (blkcnt_t)bpb_get_TotSec16(bpb);
1941         totsec32 = (blkcnt_t)bpb_get_TotSec32(bpb);
1942         fatsec16 = bpb_get_FatSz16(bpb);
1943         fatsec32 = bpb_get_FatSz32(bpb);
1944 
1945         totsec = totsec16 ? totsec16 : totsec32;
1946         fatsec = fatsec16 ? fatsec16 : fatsec32;
1947 
1948         secsize = bpb_get_BytesPerSec(bpb);
1949         if (!VALID_SECSIZE(secsize))
1950                 secsize = fsp->pcfs_secsize;
1951         if (secsize != fsp->pcfs_secsize) {
1952                 PC_DPRINTF3(3, "!pcfs: parseBPB, device (%x.%x):%d:\n",
1953                     getmajor(fsp->pcfs_xdev),
1954                     getminor(fsp->pcfs_xdev), fsp->pcfs_ldrive);
1955                 PC_DPRINTF2(3, "!BPB secsize %d != "
1956                     "autodetected media block size %d\n",
1957                     (int)secsize, (int)fsp->pcfs_secsize);
1958                 if (fsp->pcfs_ldrive) {
1959                         /*
1960                          * We've already attempted to parse the partition
1961                          * table. If the block size used for that don't match
1962                          * the PCFS sector size, we're hosed one way or the
1963                          * other. Just try what happens.
1964                          */
1965                         secsize = fsp->pcfs_secsize;
1966                         PC_DPRINTF1(3,
1967                             "!pcfs: Using autodetected secsize %d\n",
1968                             (int)secsize);
1969                 } else {
1970                         /*
1971                          * This allows mounting lofi images of PCFS partitions
1972                          * with sectorsize != DEV_BSIZE. We can't parse the
1973                          * partition table on whole-disk images unless the
1974                          * (undocumented) "secsize=..." mount option is used,
1975                          * but at least this allows us to mount if we have
1976                          * an image of a partition.
1977                          */
1978                         PC_DPRINTF1(3,
1979                             "!pcfs: Using BPB secsize %d\n", (int)secsize);
1980                 }
1981         }
1982 
1983         if (fsp->pcfs_mediasize == 0) {
1984                 mediasize = (len_t)totsec * (len_t)secsize;
1985                 /*
1986                  * This is not an error because not all devices support the
1987                  * dkio(7i) mediasize queries, and/or not all devices are
1988                  * partitioned. If we have not been able to figure out the
1989                  * size of the underlaying medium, we have to trust the BPB.
1990                  */
1991                 PC_DPRINTF4(3, "!pcfs: parseBPB: mediasize autodetect failed "
1992                     "on device (%x.%x):%d, trusting BPB totsec (%lld Bytes)\n",
1993                     getmajor(fsp->pcfs_xdev), getminor(fsp->pcfs_xdev),
1994                     fsp->pcfs_ldrive, (long long)fsp->pcfs_mediasize);
1995         } else if ((len_t)totsec * (len_t)secsize > fsp->pcfs_mediasize) {
1996                 cmn_err(CE_WARN,
1997                     "!pcfs: autodetected mediasize (%lld Bytes) smaller than "
1998                     "FAT BPB mediasize (%lld Bytes).\n"
1999                     "truncated filesystem on device (%x.%x):%d, access errors "
2000                     "possible.\n",
2001                     (long long)fsp->pcfs_mediasize,
2002                     (long long)(totsec * (blkcnt_t)secsize),
2003                     getmajor(fsp->pcfs_xdev), getminor(fsp->pcfs_xdev),
2004                     fsp->pcfs_ldrive);
2005                 mediasize = fsp->pcfs_mediasize;
2006         } else {
2007                 /*
2008                  * This is actually ok. A FAT needs not occupy the maximum
2009                  * space available in its partition, it can be shorter.
2010                  */
2011                 mediasize = (len_t)totsec * (len_t)secsize;
2012         }
2013 
2014         /*
2015          * Since we let just about anything pass through this function,
2016          * fence against divide-by-zero here.
2017          */
2018         if (secsize)
2019                 rdirsec = roundup(rec * 32, secsize) / secsize;
2020         else
2021                 rdirsec = 0;
2022 
2023         /*
2024          * This assignment is necessary before pc_dbdaddr() can first be
2025          * used. Must initialize the value here.
2026          */
2027         fsp->pcfs_secsize = secsize;
2028         fsp->pcfs_sdshift = ddi_ffs(secsize / DEV_BSIZE) - 1;
2029 
2030         fsp->pcfs_mediasize = mediasize;
2031 
2032         fsp->pcfs_spcl = bpb_get_SecPerClus(bpb);
2033         fsp->pcfs_numfat = bpb_get_NumFATs(bpb);
2034         fsp->pcfs_mediadesc = bpb_get_Media(bpb);
2035         fsp->pcfs_clsize = secsize * fsp->pcfs_spcl;
2036         fsp->pcfs_rdirsec = rdirsec;
2037 
2038         /*
2039          * Remember: All PCFS offset calculations in sectors. Before I/O
2040          * is done, convert to DEV_BSIZE units via pc_dbdaddr(). This is
2041          * necessary so that media with > 512Byte sector sizes work correctly.
2042          */
2043         fsp->pcfs_fatstart = fsp->pcfs_dosstart + reserved;
2044         fsp->pcfs_rdirstart = fsp->pcfs_fatstart + fsp->pcfs_numfat * fatsec;
2045         fsp->pcfs_datastart = fsp->pcfs_rdirstart + rdirsec;
2046         datasec = totsec -
2047             (blkcnt_t)fatsec * fsp->pcfs_numfat -
2048             (blkcnt_t)rdirsec -
2049             (blkcnt_t)reserved;
2050 
2051         DTRACE_PROBE4(fatgeometry,
2052             blkcnt_t, totsec, size_t, fatsec,
2053             size_t, rdirsec, blkcnt_t, datasec);
2054 
2055         /*
2056          * 'totsec' is taken directly from the BPB and guaranteed to fit
2057          * into a 32bit unsigned integer. The calculation of 'datasec',
2058          * on the other hand, could underflow for incorrect values in
2059          * rdirsec/reserved/fatsec. Check for that.
2060          * We also check that the BPB conforms to the FAT specification's
2061          * requirement that either of the 16/32bit total sector counts
2062          * must be zero.
2063          */
2064         if (totsec != 0 &&
2065             (totsec16 == totsec32 || totsec16 == 0 || totsec32 == 0) &&
2066             datasec < totsec && datasec <= UINT32_MAX)
2067                 validflags |= BPB_TOTSEC_OK;
2068 
2069         if ((len_t)totsec * (len_t)secsize <= mediasize)
2070                 validflags |= BPB_MEDIASZ_OK;
2071 
2072         if (VALID_SECSIZE(secsize))
2073                 validflags |= BPB_SECSIZE_OK;
2074         if (VALID_SPCL(fsp->pcfs_spcl))
2075                 validflags |= BPB_SECPERCLUS_OK;
2076         if (VALID_CLSIZE(fsp->pcfs_clsize))
2077                 validflags |= BPB_CLSIZE_OK;
2078         if (VALID_NUMFATS(fsp->pcfs_numfat))
2079                 validflags |= BPB_NUMFAT_OK;
2080         if (VALID_RSVDSEC(reserved) && reserved < totsec)
2081                 validflags |= BPB_RSVDSECCNT_OK;
2082         if (VALID_MEDIA(fsp->pcfs_mediadesc))
2083                 validflags |= BPB_MEDIADESC_OK;
2084         if (VALID_BOOTSIG(bpb_get_BootSig16(bpb)))
2085                 validflags |= BPB_BOOTSIG16_OK;
2086         if (VALID_BOOTSIG(bpb_get_BootSig32(bpb)))
2087                 validflags |= BPB_BOOTSIG32_OK;
2088         if (VALID_FSTYPSTR16(bpb_FilSysType16(bpb)))
2089                 validflags |= BPB_FSTYPSTR16_OK;
2090         if (VALID_FSTYPSTR32(bpb_FilSysType32(bpb)))
2091                 validflags |= BPB_FSTYPSTR32_OK;
2092         if (VALID_OEMNAME(bpb_OEMName(bpb)))
2093                 validflags |= BPB_OEMNAME_OK;
2094         if (bkbootsec > 0 && bkbootsec <= reserved && fsisec != bkbootsec)
2095                 validflags |= BPB_BKBOOTSEC_OK;
2096         if (fsisec > 0 && fsisec <= reserved)
2097                 validflags |= BPB_FSISEC_OK;
2098         if (VALID_JMPBOOT(bpb_jmpBoot(bpb)))
2099                 validflags |= BPB_JMPBOOT_OK;
2100         if (VALID_FSVER32(bpb_get_FSVer32(bpb)))
2101                 validflags |= BPB_FSVER_OK;
2102         if (VALID_VOLLAB(bpb_VolLab16(bpb)))
2103                 validflags |= BPB_VOLLAB16_OK;
2104         if (VALID_VOLLAB(bpb_VolLab32(bpb)))
2105                 validflags |= BPB_VOLLAB32_OK;
2106         if (VALID_EXTFLAGS(bpb_get_ExtFlags32(bpb)))
2107                 validflags |= BPB_EXTFLAGS_OK;
2108 
2109         /*
2110          * Try to determine which FAT format to use.
2111          *
2112          * Calculate the number of clusters in order to determine
2113          * the type of FAT we are looking at.  This is the only
2114          * recommended way of determining FAT type, though there
2115          * are other hints in the data, this is the best way.
2116          *
2117          * Since we let just about "anything" pass through this function
2118          * without early exits, fence against divide-by-zero here.
2119          *
2120          * datasec was already validated against UINT32_MAX so we know
2121          * the result will not overflow the 32bit calculation.
2122          */
2123         if (fsp->pcfs_spcl)
2124                 ncl = (uint32_t)datasec / fsp->pcfs_spcl;
2125         else
2126                 ncl = 0;
2127 
2128         fsp->pcfs_ncluster = ncl;
2129 
2130         /*
2131          * From the Microsoft FAT specification:
2132          * In the following example, when it says <, it does not mean <=.
2133          * Note also that the numbers are correct.  The first number for
2134          * FAT12 is 4085; the second number for FAT16 is 65525. These numbers
2135          * and the '<' signs are not wrong.
2136          *
2137          * We "specialdetect" the corner cases, and use at least one "extra"
2138          * criterion to decide whether it's FAT16 or FAT32 if the cluster
2139          * count is dangerously close to the boundaries.
2140          */
2141 
2142         if (ncl <= PCF_FIRSTCLUSTER) {
2143                 type = FAT_UNKNOWN;
2144         } else if (ncl < 4085) {
2145                 type = FAT12;
2146         } else if (ncl <= 4096) {
2147                 type = FAT_QUESTIONABLE;
2148         } else if (ncl < 65525) {
2149                 type = FAT16;
2150         } else if (ncl <= 65536) {
2151                 type = FAT_QUESTIONABLE;
2152         } else if (ncl < PCF_LASTCLUSTER32) {
2153                 type = FAT32;
2154         } else {
2155                 type = FAT_UNKNOWN;
2156         }
2157 
2158         DTRACE_PROBE4(parseBPB__initial,
2159             struct pcfs *, fsp, unsigned char *, bpb,
2160             int, validflags, fattype_t, type);
2161 
2162 recheck:
2163         fsp->pcfs_fatsec = fatsec;
2164 
2165         /* Do some final sanity checks for each specific type of FAT */
2166         switch (type) {
2167                 case FAT12:
2168                         if (rec != 0)
2169                                 validflags |= BPB_ROOTENTCNT_OK;
2170                         if ((blkcnt_t)bpb_get_TotSec16(bpb) == totsec ||
2171                             bpb_get_TotSec16(bpb) == 0)
2172                                 validflags |= BPB_TOTSEC16_OK;
2173                         if ((blkcnt_t)bpb_get_TotSec32(bpb) == totsec ||
2174                             bpb_get_TotSec32(bpb) == 0)
2175                                 validflags |= BPB_TOTSEC32_OK;
2176                         if (bpb_get_FatSz16(bpb) == fatsec)
2177                                 validflags |= BPB_FATSZ16_OK;
2178                         if (fatsec * secsize >= (ncl + PCF_FIRSTCLUSTER)
2179                             * 3 / 2)
2180                                 validflags |= BPB_FATSZ_OK;
2181                         if (ncl < 4085)
2182                                 validflags |= BPB_NCLUSTERS_OK;
2183 
2184                         fsp->pcfs_lastclmark = (PCF_LASTCLUSTER & 0xfff);
2185                         fsp->pcfs_rootblksize =
2186                             fsp->pcfs_rdirsec * secsize;
2187                         fsp->pcfs_fsistart = 0;
2188 
2189                         if ((validflags & FAT12_VALIDMSK) != FAT12_VALIDMSK)
2190                                 type = FAT_UNKNOWN;
2191                         break;
2192                 case FAT16:
2193                         if (rec != 0)
2194                                 validflags |= BPB_ROOTENTCNT_OK;
2195                         if ((blkcnt_t)bpb_get_TotSec16(bpb) == totsec ||
2196                             bpb_get_TotSec16(bpb) == 0)
2197                                 validflags |= BPB_TOTSEC16_OK;
2198                         if ((blkcnt_t)bpb_get_TotSec32(bpb) == totsec ||
2199                             bpb_get_TotSec32(bpb) == 0)
2200                                 validflags |= BPB_TOTSEC32_OK;
2201                         if (bpb_get_FatSz16(bpb) == fatsec)
2202                                 validflags |= BPB_FATSZ16_OK;
2203                         if (fatsec * secsize >= (ncl + PCF_FIRSTCLUSTER) * 2)
2204                                 validflags |= BPB_FATSZ_OK;
2205                         if (ncl >= 4085 && ncl < 65525)
2206                                 validflags |= BPB_NCLUSTERS_OK;
2207 
2208                         fsp->pcfs_lastclmark = PCF_LASTCLUSTER;
2209                         fsp->pcfs_rootblksize =
2210                             fsp->pcfs_rdirsec * secsize;
2211                         fsp->pcfs_fsistart = 0;
2212 
2213                         if ((validflags & FAT16_VALIDMSK) != FAT16_VALIDMSK)
2214                                 type = FAT_UNKNOWN;
2215                         break;
2216                 case FAT32:
2217                         if (rec == 0)
2218                                 validflags |= BPB_ROOTENTCNT_OK;
2219                         if (bpb_get_TotSec16(bpb) == 0)
2220                                 validflags |= BPB_TOTSEC16_OK;
2221                         if ((blkcnt_t)bpb_get_TotSec32(bpb) == totsec)
2222                                 validflags |= BPB_TOTSEC32_OK;
2223                         if (bpb_get_FatSz16(bpb) == 0)
2224                                 validflags |= BPB_FATSZ16_OK;
2225                         if (bpb_get_FatSz32(bpb) == fatsec)
2226                                 validflags |= BPB_FATSZ32_OK;
2227                         if (fatsec * secsize >= (ncl + PCF_FIRSTCLUSTER) * 4)
2228                                 validflags |= BPB_FATSZ_OK;
2229                         if (ncl >= 65525 && ncl < PCF_LASTCLUSTER32)
2230                                 validflags |= BPB_NCLUSTERS_OK;
2231 
2232                         fsp->pcfs_lastclmark = PCF_LASTCLUSTER32;
2233                         fsp->pcfs_rootblksize = fsp->pcfs_clsize;
2234                         fsp->pcfs_fsistart = fsp->pcfs_dosstart + fsisec;
2235                         if (validflags & BPB_FSISEC_OK)
2236                                 fsp->pcfs_flags |= PCFS_FSINFO_OK;
2237                         fsp->pcfs_rootclnum = bpb_get_RootClus32(bpb);
2238                         if (pc_validcl(fsp, fsp->pcfs_rootclnum))
2239                                 validflags |= BPB_ROOTCLUSTER_OK;
2240 
2241                         /*
2242                          * Current PCFS code only works if 'pcfs_rdirstart'
2243                          * contains the root cluster number on FAT32.
2244                          * That's a mis-use and would better be changed.
2245                          */
2246                         fsp->pcfs_rdirstart = (daddr_t)fsp->pcfs_rootclnum;
2247 
2248                         if ((validflags & FAT32_VALIDMSK) != FAT32_VALIDMSK)
2249                                 type = FAT_UNKNOWN;
2250                         break;
2251                 case FAT_QUESTIONABLE:
2252                         type = secondaryBPBChecks(fsp, bpb, secsize);
2253                         goto recheck;
2254                 default:
2255                         ASSERT(type == FAT_UNKNOWN);
2256                         break;
2257         }
2258 
2259         ASSERT(type != FAT_QUESTIONABLE);
2260 
2261         fsp->pcfs_fattype = type;
2262 
2263         if (valid)
2264                 *valid = validflags;
2265 
2266         DTRACE_PROBE4(parseBPB__final,
2267             struct pcfs *, fsp, unsigned char *, bpb,
2268             int, validflags, fattype_t, type);
2269 
2270         if (type != FAT_UNKNOWN) {
2271                 ASSERT((secsize & (DEV_BSIZE - 1)) == 0);
2272                 ASSERT(ISP2(secsize / DEV_BSIZE));
2273                 return (1);
2274         }
2275 
2276         return (0);
2277 }
2278 
2279 
2280 /*
2281  * Detect the device's native block size (sector size).
2282  *
2283  * Test whether the device is:
2284  *      - a floppy device from a known controller type via DKIOCINFO
2285  *      - a real floppy using the fd(7d) driver and capable of fdio(7I) ioctls
2286  *      - a USB floppy drive (identified by drive geometry)
2287  *
2288  * Detecting a floppy will make PCFS metadata updates on such media synchronous,
2289  * to minimize risks due to slow I/O and user hotplugging / device ejection.
2290  *
2291  * This might be a bit wasteful on kernel stack space; if anyone's
2292  * bothered by this, kmem_alloc/kmem_free the ioctl arguments...
2293  */
2294 static void
2295 pcfs_device_getinfo(struct pcfs *fsp)
2296 {
2297         dev_t                   rdev = fsp->pcfs_xdev;
2298         int                     error;
2299         union {
2300                 struct dk_minfo         mi;
2301                 struct dk_cinfo         ci;
2302                 struct dk_geom          gi;
2303                 struct fd_char          fc;
2304         } arg;                          /* save stackspace ... */
2305         intptr_t argp = (intptr_t)&arg;
2306         ldi_handle_t            lh;
2307         ldi_ident_t             li;
2308         int isfloppy, isremoveable, ishotpluggable;
2309         cred_t                  *cr = CRED();
2310 
2311         if (ldi_ident_from_dev(rdev, &li))
2312                 goto out;
2313 
2314         error = ldi_open_by_dev(&rdev, OTYP_CHR, FREAD, cr, &lh, li);
2315         ldi_ident_release(li);
2316         if (error)
2317                 goto out;
2318 
2319         /*
2320          * Not sure if this could possibly happen. It'd be a bit like
2321          * VOP_OPEN() changing the passed-in vnode ptr. We're just not
2322          * expecting it, needs some thought if triggered ...
2323          */
2324         ASSERT(fsp->pcfs_xdev == rdev);
2325 
2326         /*
2327          * Check for removeable/hotpluggable media.
2328          */
2329         if (ldi_ioctl(lh, DKIOCREMOVABLE,
2330             (intptr_t)&isremoveable, FKIOCTL, cr, NULL)) {
2331                 isremoveable = 0;
2332         }
2333         if (ldi_ioctl(lh, DKIOCHOTPLUGGABLE,
2334             (intptr_t)&ishotpluggable, FKIOCTL, cr, NULL)) {
2335                 ishotpluggable = 0;
2336         }
2337 
2338         /*
2339          * Make sure we don't use "half-initialized" values if the ioctls fail.
2340          */
2341         if (ldi_ioctl(lh, DKIOCGMEDIAINFO, argp, FKIOCTL, cr, NULL)) {
2342                 bzero(&arg, sizeof (arg));
2343                 fsp->pcfs_mediasize = 0;
2344         } else {
2345                 fsp->pcfs_mediasize =
2346                     (len_t)arg.mi.dki_lbsize *
2347                     (len_t)arg.mi.dki_capacity;
2348         }
2349 
2350         if (VALID_SECSIZE(arg.mi.dki_lbsize)) {
2351                 if (fsp->pcfs_secsize == 0) {
2352                         fsp->pcfs_secsize = arg.mi.dki_lbsize;
2353                         fsp->pcfs_sdshift =
2354                             ddi_ffs(arg.mi.dki_lbsize / DEV_BSIZE) - 1;
2355                 } else {
2356                         PC_DPRINTF4(1, "!pcfs: autodetected media block size "
2357                             "%d, device (%x.%x), different from user-provided "
2358                             "%d. User override - ignoring autodetect result.\n",
2359                             arg.mi.dki_lbsize,
2360                             getmajor(fsp->pcfs_xdev), getminor(fsp->pcfs_xdev),
2361                             fsp->pcfs_secsize);
2362                 }
2363         } else if (arg.mi.dki_lbsize) {
2364                 PC_DPRINTF3(1, "!pcfs: autodetected media block size "
2365                     "%d, device (%x.%x), invalid (not 512, 1024, 2048, 4096). "
2366                     "Ignoring autodetect result.\n",
2367                     arg.mi.dki_lbsize,
2368                     getmajor(fsp->pcfs_xdev), getminor(fsp->pcfs_xdev));
2369         }
2370 
2371         /*
2372          * We treat the following media types as a floppy by default.
2373          */
2374         isfloppy =
2375             (arg.mi.dki_media_type == DK_FLOPPY ||
2376             arg.mi.dki_media_type == DK_ZIP ||
2377             arg.mi.dki_media_type == DK_JAZ);
2378 
2379         /*
2380          * if this device understands fdio(7I) requests it's
2381          * obviously a floppy drive.
2382          */
2383         if (!isfloppy &&
2384             !ldi_ioctl(lh, FDIOGCHAR, argp, FKIOCTL, cr, NULL))
2385                 isfloppy = 1;
2386 
2387         /*
2388          * some devices we like to treat as floppies, but they don't
2389          * understand fdio(7I) requests.
2390          */
2391         if (!isfloppy &&
2392             !ldi_ioctl(lh, DKIOCINFO, argp, FKIOCTL, cr, NULL) &&
2393             (arg.ci.dki_ctype == DKC_WDC2880 ||
2394             arg.ci.dki_ctype == DKC_NCRFLOPPY ||
2395             arg.ci.dki_ctype == DKC_SMSFLOPPY ||
2396             arg.ci.dki_ctype == DKC_INTEL82077))
2397                 isfloppy = 1;
2398 
2399         /*
2400          * This is the "final fallback" test - media with
2401          * 2 heads and 80 cylinders are assumed to be floppies.
2402          * This is normally true for USB floppy drives ...
2403          */
2404         if (!isfloppy &&
2405             !ldi_ioctl(lh, DKIOCGGEOM, argp, FKIOCTL, cr, NULL) &&
2406             (arg.gi.dkg_ncyl == 80 && arg.gi.dkg_nhead == 2))
2407                 isfloppy = 1;
2408 
2409         /*
2410          * This is similar to the "old" PCFS code that sets this flag
2411          * just based on the media descriptor being 0xf8 (MD_FIXED).
2412          * Should be re-worked. We really need some specialcasing for
2413          * removeable media.
2414          */
2415         if (!isfloppy) {
2416                 fsp->pcfs_flags |= PCFS_NOCHK;
2417         }
2418 
2419         /*
2420          * We automatically disable access time updates if the medium is
2421          * removeable and/or hotpluggable, and the admin did not explicitly
2422          * request access time updates (via the "atime" mount option).
2423          * The majority of flash-based media should fit this category.
2424          * Minimizing write access extends the lifetime of your memory stick !
2425          */
2426         if (!vfs_optionisset(fsp->pcfs_vfs, MNTOPT_ATIME, NULL) &&
2427             (isremoveable || ishotpluggable | isfloppy)) {
2428                 fsp->pcfs_flags |= PCFS_NOATIME;
2429         }
2430 
2431         (void) ldi_close(lh, FREAD, cr);
2432 out:
2433         if (fsp->pcfs_secsize == 0) {
2434                 PC_DPRINTF3(1, "!pcfs: media block size autodetection "
2435                     "device (%x.%x) failed, no user-provided fallback. "
2436                     "Using %d bytes.\n",
2437                     getmajor(fsp->pcfs_xdev), getminor(fsp->pcfs_xdev),
2438                     DEV_BSIZE);
2439                 fsp->pcfs_secsize = DEV_BSIZE;
2440                 fsp->pcfs_sdshift = 0;
2441         }
2442         ASSERT(fsp->pcfs_secsize % DEV_BSIZE == 0);
2443         ASSERT(VALID_SECSIZE(fsp->pcfs_secsize));
2444 }
2445 
2446 /*
2447  * Get the FAT type for the DOS medium.
2448  *
2449  * -------------------------
2450  * According to Microsoft:
2451  *   The FAT type one of FAT12, FAT16, or FAT32 is determined by the
2452  * count of clusters on the volume and nothing else.
2453  * -------------------------
2454  *
2455  */
2456 static int
2457 pc_getfattype(struct pcfs *fsp)
2458 {
2459         int error = 0;
2460         buf_t *bp = NULL;
2461         struct vnode *devvp = fsp->pcfs_devvp;
2462         dev_t   dev = devvp->v_rdev;
2463 
2464         /*
2465          * Detect the native block size of the medium, and attempt to
2466          * detect whether the medium is removeable.
2467          * We do treat removable media (floppies, USB and FireWire disks)
2468          * differently wrt. to the frequency and synchronicity of FAT updates.
2469          * We need to know the media block size in order to be able to
2470          * parse the partition table.
2471          */
2472         pcfs_device_getinfo(fsp);
2473 
2474         /*
2475          * Unpartitioned media (floppies and some removeable devices)
2476          * don't have a partition table, the FAT BPB is at disk block 0.
2477          * Start out by reading block 0.
2478          */
2479         fsp->pcfs_dosstart = 0;
2480         bp = bread(dev, pc_dbdaddr(fsp, fsp->pcfs_dosstart), fsp->pcfs_secsize);
2481 
2482         if (error = geterror(bp))
2483                 goto out;
2484 
2485         /*
2486          * If a logical drive number is requested, parse the partition table
2487          * and attempt to locate it. Otherwise, proceed immediately to the
2488          * BPB check. findTheDrive(), if successful, returns the disk block
2489          * number where the requested partition starts in "startsec".
2490          */
2491         if (fsp->pcfs_ldrive != 0) {
2492                 PC_DPRINTF3(5, "!pcfs: pc_getfattype: using FDISK table on "
2493                     "device (%x,%x):%d to find BPB\n",
2494                     getmajor(dev), getminor(dev), fsp->pcfs_ldrive);
2495 
2496                 if (error = findTheDrive(fsp, &bp))
2497                         goto out;
2498 
2499                 ASSERT(fsp->pcfs_dosstart != 0);
2500 
2501                 brelse(bp);
2502                 bp = bread(dev, pc_dbdaddr(fsp, fsp->pcfs_dosstart),
2503                     fsp->pcfs_secsize);
2504                 if (error = geterror(bp))
2505                         goto out;
2506         }
2507 
2508         /*
2509          * Validate the BPB and fill in the instance structure.
2510          */
2511         if (!parseBPB(fsp, (uchar_t *)bp->b_un.b_addr, NULL)) {
2512                 PC_DPRINTF4(1, "!pcfs: pc_getfattype: No FAT BPB on "
2513                     "device (%x.%x):%d, disk LBA %u\n",
2514                     getmajor(dev), getminor(dev), fsp->pcfs_ldrive,
2515                     (uint_t)pc_dbdaddr(fsp, fsp->pcfs_dosstart));
2516                 error = EINVAL;
2517                 goto out;
2518         }
2519 
2520         ASSERT(fsp->pcfs_fattype != FAT_UNKNOWN);
2521 
2522 out:
2523         /*
2524          * Release the buffer used
2525          */
2526         if (bp != NULL)
2527                 brelse(bp);
2528         return (error);
2529 }
2530 
2531 
2532 /*
2533  * Get the file allocation table.
2534  * If there is an old FAT, invalidate it.
2535  */
2536 int
2537 pc_getfat(struct pcfs *fsp)
2538 {
2539         struct buf *bp = NULL;
2540         uchar_t *fatp = NULL;
2541         uchar_t *fat_changemap = NULL;
2542         int error;
2543         int fat_changemapsize;
2544         int flags = 0;
2545         int nfat;
2546         int altfat_mustmatch = 0;
2547         int fatsize = fsp->pcfs_fatsec * fsp->pcfs_secsize;
2548 
2549         if (fsp->pcfs_fatp) {
2550                 /*
2551                  * There is a FAT in core.
2552                  * If there are open file pcnodes or we have modified it or
2553                  * it hasn't timed out yet use the in core FAT.
2554                  * Otherwise invalidate it and get a new one
2555                  */
2556 #ifdef notdef
2557                 if (fsp->pcfs_frefs ||
2558                     (fsp->pcfs_flags & PCFS_FATMOD) ||
2559                     (gethrestime_sec() < fsp->pcfs_fattime)) {
2560                         return (0);
2561                 } else {
2562                         mutex_enter(&pcfslock);
2563                         pc_invalfat(fsp);
2564                         mutex_exit(&pcfslock);
2565                 }
2566 #endif /* notdef */
2567                 return (0);
2568         }
2569 
2570         /*
2571          * Get FAT and check it for validity
2572          */
2573         fatp = kmem_alloc(fatsize, KM_SLEEP);
2574         error = pc_readfat(fsp, fatp);
2575         if (error) {
2576                 flags = B_ERROR;
2577                 goto out;
2578         }
2579         fat_changemapsize = (fatsize / fsp->pcfs_clsize) + 1;
2580         fat_changemap = kmem_zalloc(fat_changemapsize, KM_SLEEP);
2581         fsp->pcfs_fatp = fatp;
2582         fsp->pcfs_fat_changemapsize = fat_changemapsize;
2583         fsp->pcfs_fat_changemap = fat_changemap;
2584 
2585         /*
2586          * The only definite signature check is that the
2587          * media descriptor byte should match the first byte
2588          * of the FAT block.
2589          */
2590         if (fatp[0] != fsp->pcfs_mediadesc) {
2591                 cmn_err(CE_NOTE, "!pcfs: FAT signature mismatch, "
2592                     "media descriptor %x, FAT[0] lowbyte %x\n",
2593                     (uint32_t)fsp->pcfs_mediadesc, (uint32_t)fatp[0]);
2594                 cmn_err(CE_NOTE, "!pcfs: Enforcing alternate FAT validation\n");
2595                 altfat_mustmatch = 1;
2596         }
2597 
2598         /*
2599          * Get alternate FATs and check for consistency
2600          * This is an inlined version of pc_readfat().
2601          * Since we're only comparing FAT and alternate FAT,
2602          * there's no reason to let pc_readfat() copy data out
2603          * of the buf. Instead, compare in-situ, one cluster
2604          * at a time.
2605          */
2606         for (nfat = 1; nfat < fsp->pcfs_numfat; nfat++) {
2607                 size_t startsec;
2608                 size_t off;
2609 
2610                 startsec = pc_dbdaddr(fsp,
2611                     fsp->pcfs_fatstart + nfat * fsp->pcfs_fatsec);
2612 
2613                 for (off = 0; off < fatsize; off += fsp->pcfs_clsize) {
2614                         daddr_t fatblk = startsec + pc_dbdaddr(fsp,
2615                             pc_cltodb(fsp, pc_lblkno(fsp, off)));
2616 
2617                         bp = bread(fsp->pcfs_xdev, fatblk,
2618                             MIN(fsp->pcfs_clsize, fatsize - off));
2619                         if (bp->b_flags & (B_ERROR | B_STALE)) {
2620                                 cmn_err(CE_NOTE,
2621                                     "!pcfs: alternate FAT #%d (start LBA %p)"
2622                                     " read error at offset %ld on device"
2623                                     " (%x.%x):%d",
2624                                     nfat, (void *)(uintptr_t)startsec, off,
2625                                     getmajor(fsp->pcfs_xdev),
2626                                     getminor(fsp->pcfs_xdev),
2627                                     fsp->pcfs_ldrive);
2628                                 flags = B_ERROR;
2629                                 error = EIO;
2630                                 goto out;
2631                         }
2632                         bp->b_flags |= B_STALE | B_AGE;
2633                         if (bcmp(bp->b_un.b_addr, fatp + off,
2634                             MIN(fsp->pcfs_clsize, fatsize - off))) {
2635                                 cmn_err(CE_NOTE,
2636                                     "!pcfs: alternate FAT #%d (start LBA %p)"
2637                                     " corrupted at offset %ld on device"
2638                                     " (%x.%x):%d",
2639                                     nfat, (void *)(uintptr_t)startsec, off,
2640                                     getmajor(fsp->pcfs_xdev),
2641                                     getminor(fsp->pcfs_xdev),
2642                                     fsp->pcfs_ldrive);
2643                                 if (altfat_mustmatch) {
2644                                         flags = B_ERROR;
2645                                         error = EIO;
2646                                         goto out;
2647                                 }
2648                         }
2649                         brelse(bp);
2650                         bp = NULL;      /* prevent double release */
2651                 }
2652         }
2653 
2654         fsp->pcfs_fattime = gethrestime_sec() + PCFS_DISKTIMEOUT;
2655         fsp->pcfs_fatjustread = 1;
2656 
2657         /*
2658          * Retrieve FAT32 fsinfo sector.
2659          * A failure to read this is not fatal to accessing the volume.
2660          * It simply means operations that count or search free blocks
2661          * will have to do a full FAT walk, vs. a possibly quicker lookup
2662          * of the summary information.
2663          * Hence, we log a message but return success overall after this point.
2664          */
2665         if (IS_FAT32(fsp) && (fsp->pcfs_flags & PCFS_FSINFO_OK)) {
2666                 struct fat_od_fsi *fsinfo_disk;
2667 
2668                 bp = bread(fsp->pcfs_xdev,
2669                     pc_dbdaddr(fsp, fsp->pcfs_fsistart), fsp->pcfs_secsize);
2670                 fsinfo_disk = (struct fat_od_fsi *)bp->b_un.b_addr;
2671                 if (bp->b_flags & (B_ERROR | B_STALE) ||
2672                     !FSISIG_OK(fsinfo_disk)) {
2673                         cmn_err(CE_NOTE,
2674                             "!pcfs: error reading fat32 fsinfo from "
2675                             "device (%x.%x):%d, block %lld",
2676                             getmajor(fsp->pcfs_xdev), getminor(fsp->pcfs_xdev),
2677                             fsp->pcfs_ldrive,
2678                             (long long)pc_dbdaddr(fsp, fsp->pcfs_fsistart));
2679                         fsp->pcfs_flags &= ~PCFS_FSINFO_OK;
2680                         fsp->pcfs_fsinfo.fs_free_clusters = FSINFO_UNKNOWN;
2681                         fsp->pcfs_fsinfo.fs_next_free = FSINFO_UNKNOWN;
2682                 } else {
2683                         bp->b_flags |= B_STALE | B_AGE;
2684                         fsinfo_disk = (fat_od_fsi_t *)(bp->b_un.b_addr);
2685                         fsp->pcfs_fsinfo.fs_free_clusters =
2686                             LE_32(fsinfo_disk->fsi_incore.fs_free_clusters);
2687                         fsp->pcfs_fsinfo.fs_next_free =
2688                             LE_32(fsinfo_disk->fsi_incore.fs_next_free);
2689                 }
2690                 brelse(bp);
2691                 bp = NULL;
2692         }
2693 
2694         if (pc_validcl(fsp, (pc_cluster32_t)fsp->pcfs_fsinfo.fs_next_free))
2695                 fsp->pcfs_nxfrecls = fsp->pcfs_fsinfo.fs_next_free;
2696         else
2697                 fsp->pcfs_nxfrecls = PCF_FIRSTCLUSTER;
2698 
2699         return (0);
2700 
2701 out:
2702         cmn_err(CE_NOTE, "!pcfs: illegal disk format");
2703         if (bp)
2704                 brelse(bp);
2705         if (fatp)
2706                 kmem_free(fatp, fatsize);
2707         if (fat_changemap)
2708                 kmem_free(fat_changemap, fat_changemapsize);
2709 
2710         if (flags) {
2711                 pc_mark_irrecov(fsp);
2712         }
2713         return (error);
2714 }