1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  24  * Use is subject to license terms.
  25  */
  26 
  27 #include <sys/param.h>
  28 #include <sys/systm.h>
  29 #include <sys/kmem.h>
  30 #include <sys/user.h>
  31 #include <sys/proc.h>
  32 #include <sys/cred.h>
  33 #include <sys/disp.h>
  34 #include <sys/buf.h>
  35 #include <sys/vfs.h>
  36 #include <sys/vfs_opreg.h>
  37 #include <sys/vnode.h>
  38 #include <sys/fdio.h>
  39 #include <sys/file.h>
  40 #include <sys/uio.h>
  41 #include <sys/conf.h>
  42 #include <sys/statvfs.h>
  43 #include <sys/mount.h>
  44 #include <sys/pathname.h>
  45 #include <sys/cmn_err.h>
  46 #include <sys/debug.h>
  47 #include <sys/sysmacros.h>
  48 #include <sys/conf.h>
  49 #include <sys/mkdev.h>
  50 #include <sys/swap.h>
  51 #include <sys/sunddi.h>
  52 #include <sys/sunldi.h>
  53 #include <sys/dktp/fdisk.h>
  54 #include <sys/fs/pc_label.h>
  55 #include <sys/fs/pc_fs.h>
  56 #include <sys/fs/pc_dir.h>
  57 #include <sys/fs/pc_node.h>
  58 #include <fs/fs_subr.h>
  59 #include <sys/modctl.h>
  60 #include <sys/dkio.h>
  61 #include <sys/open.h>
  62 #include <sys/mntent.h>
  63 #include <sys/policy.h>
  64 #include <sys/atomic.h>
  65 #include <sys/sdt.h>
  66 
  67 /*
  68  * The majority of PC media use a 512 sector size, but
  69  * occasionally you will run across a 1k sector size.
  70  * For media with a 1k sector size, fd_strategy() requires
  71  * the I/O size to be a 1k multiple; so when the sector size
  72  * is not yet known, always read 1k.
  73  */
  74 #define PC_SAFESECSIZE  (PC_SECSIZE * 2)
  75 
  76 static int pcfs_pseudo_floppy(dev_t);
  77 
  78 static int pcfsinit(int, char *);
  79 static int pcfs_mount(struct vfs *, struct vnode *, struct mounta *,
  80         struct cred *);
  81 static int pcfs_unmount(struct vfs *, int, struct cred *);
  82 static int pcfs_root(struct vfs *, struct vnode **);
  83 static int pcfs_statvfs(struct vfs *, struct statvfs64 *);
  84 static int pc_syncfsnodes(struct pcfs *);
  85 static int pcfs_sync(struct vfs *, short, struct cred *);
  86 static int pcfs_vget(struct vfs *vfsp, struct vnode **vpp, struct fid *fidp);
  87 static void pcfs_freevfs(vfs_t *vfsp);
  88 
  89 static int pc_readfat(struct pcfs *fsp, uchar_t *fatp);
  90 static int pc_writefat(struct pcfs *fsp, daddr_t start);
  91 
  92 static int pc_getfattype(struct pcfs *fsp);
  93 static void pcfs_parse_mntopts(struct pcfs *fsp);
  94 
  95 
  96 /*
  97  * pcfs mount options table
  98  */
  99 
 100 static char *nohidden_cancel[] = { MNTOPT_PCFS_HIDDEN, NULL };
 101 static char *hidden_cancel[] = { MNTOPT_PCFS_NOHIDDEN, NULL };
 102 static char *nofoldcase_cancel[] = { MNTOPT_PCFS_FOLDCASE, NULL };
 103 static char *foldcase_cancel[] = { MNTOPT_PCFS_NOFOLDCASE, NULL };
 104 static char *clamptime_cancel[] = { MNTOPT_PCFS_NOCLAMPTIME, NULL };
 105 static char *noclamptime_cancel[] = { MNTOPT_PCFS_CLAMPTIME, NULL };
 106 static char *atime_cancel[] = { MNTOPT_NOATIME, NULL };
 107 static char *noatime_cancel[] = { MNTOPT_ATIME, NULL };
 108 
 109 static mntopt_t mntopts[] = {
 110 /*
 111  *      option name     cancel option   default arg     flags   opt data
 112  */
 113         { MNTOPT_PCFS_NOHIDDEN, nohidden_cancel, NULL, 0, NULL },
 114         { MNTOPT_PCFS_HIDDEN, hidden_cancel, NULL, MO_DEFAULT, NULL },
 115         { MNTOPT_PCFS_NOFOLDCASE, nofoldcase_cancel, NULL, MO_DEFAULT, NULL },
 116         { MNTOPT_PCFS_FOLDCASE, foldcase_cancel, NULL, 0, NULL },
 117         { MNTOPT_PCFS_CLAMPTIME, clamptime_cancel, NULL, MO_DEFAULT, NULL },
 118         { MNTOPT_PCFS_NOCLAMPTIME, noclamptime_cancel, NULL, NULL, NULL },
 119         { MNTOPT_NOATIME, noatime_cancel, NULL, NULL, NULL },
 120         { MNTOPT_ATIME, atime_cancel, NULL, NULL, NULL },
 121         { MNTOPT_PCFS_TIMEZONE, NULL, "+0", MO_DEFAULT | MO_HASVALUE, NULL },
 122         { MNTOPT_PCFS_SECSIZE, NULL, NULL, MO_HASVALUE, NULL }
 123 };
 124 
 125 static mntopts_t pcfs_mntopts = {
 126         sizeof (mntopts) / sizeof (mntopt_t),
 127         mntopts
 128 };
 129 
 130 int pcfsdebuglevel = 0;
 131 
 132 /*
 133  * pcfslock:    protects the list of mounted pc filesystems "pc_mounttab.
 134  * pcfs_lock:   (inside per filesystem structure "pcfs")
 135  *              per filesystem lock. Most of the vfsops and vnodeops are
 136  *              protected by this lock.
 137  * pcnodes_lock: protects the pcnode hash table "pcdhead", "pcfhead".
 138  *
 139  * Lock hierarchy: pcfslock > pcfs_lock > pcnodes_lock
 140  *
 141  * pcfs_mountcount:     used to prevent module unloads while there is still
 142  *                      pcfs state from a former mount hanging around. With
 143  *                      forced umount support, the filesystem module must not
 144  *                      be allowed to go away before the last VFS_FREEVFS()
 145  *                      call has been made.
 146  *                      Since this is just an atomic counter, there's no need
 147  *                      for locking.
 148  */
 149 kmutex_t        pcfslock;
 150 krwlock_t       pcnodes_lock;
 151 uint32_t        pcfs_mountcount;
 152 
 153 static int pcfstype;
 154 
 155 static vfsdef_t vfw = {
 156         VFSDEF_VERSION,
 157         "pcfs",
 158         pcfsinit,
 159         VSW_HASPROTO|VSW_CANREMOUNT|VSW_STATS|VSW_CANLOFI,
 160         &pcfs_mntopts
 161 };
 162 
 163 extern struct mod_ops mod_fsops;
 164 
 165 static struct modlfs modlfs = {
 166         &mod_fsops,
 167         "PC filesystem",
 168         &vfw
 169 };
 170 
 171 static struct modlinkage modlinkage = {
 172         MODREV_1,
 173         &modlfs,
 174         NULL
 175 };
 176 
 177 int
 178 _init(void)
 179 {
 180         int     error;
 181 
 182 #if !defined(lint)
 183         /* make sure the on-disk structures are sane */
 184         ASSERT(sizeof (struct pcdir) == 32);
 185         ASSERT(sizeof (struct pcdir_lfn) == 32);
 186 #endif
 187         mutex_init(&pcfslock, NULL, MUTEX_DEFAULT, NULL);
 188         rw_init(&pcnodes_lock, NULL, RW_DEFAULT, NULL);
 189         error = mod_install(&modlinkage);
 190         if (error) {
 191                 mutex_destroy(&pcfslock);
 192                 rw_destroy(&pcnodes_lock);
 193         }
 194         return (error);
 195 }
 196 
 197 int
 198 _fini(void)
 199 {
 200         int     error;
 201 
 202         /*
 203          * If a forcedly unmounted instance is still hanging around,
 204          * we cannot allow the module to be unloaded because that would
 205          * cause panics once the VFS framework decides it's time to call
 206          * into VFS_FREEVFS().
 207          */
 208         if (pcfs_mountcount)
 209                 return (EBUSY);
 210 
 211         error = mod_remove(&modlinkage);
 212         if (error)
 213                 return (error);
 214         mutex_destroy(&pcfslock);
 215         rw_destroy(&pcnodes_lock);
 216         /*
 217          * Tear down the operations vectors
 218          */
 219         (void) vfs_freevfsops_by_type(pcfstype);
 220         vn_freevnodeops(pcfs_fvnodeops);
 221         vn_freevnodeops(pcfs_dvnodeops);
 222         return (0);
 223 }
 224 
 225 int
 226 _info(struct modinfo *modinfop)
 227 {
 228         return (mod_info(&modlinkage, modinfop));
 229 }
 230 
 231 /* ARGSUSED1 */
 232 static int
 233 pcfsinit(int fstype, char *name)
 234 {
 235         static const fs_operation_def_t pcfs_vfsops_template[] = {
 236                 VFSNAME_MOUNT,          { .vfs_mount = pcfs_mount },
 237                 VFSNAME_UNMOUNT,        { .vfs_unmount = pcfs_unmount },
 238                 VFSNAME_ROOT,           { .vfs_root = pcfs_root },
 239                 VFSNAME_STATVFS,        { .vfs_statvfs = pcfs_statvfs },
 240                 VFSNAME_SYNC,           { .vfs_sync = pcfs_sync },
 241                 VFSNAME_VGET,           { .vfs_vget = pcfs_vget },
 242                 VFSNAME_FREEVFS,        { .vfs_freevfs = pcfs_freevfs },
 243                 NULL,                   NULL
 244         };
 245         int error;
 246 
 247         error = vfs_setfsops(fstype, pcfs_vfsops_template, NULL);
 248         if (error != 0) {
 249                 cmn_err(CE_WARN, "pcfsinit: bad vfs ops template");
 250                 return (error);
 251         }
 252 
 253         error = vn_make_ops("pcfs", pcfs_fvnodeops_template, &pcfs_fvnodeops);
 254         if (error != 0) {
 255                 (void) vfs_freevfsops_by_type(fstype);
 256                 cmn_err(CE_WARN, "pcfsinit: bad file vnode ops template");
 257                 return (error);
 258         }
 259 
 260         error = vn_make_ops("pcfsd", pcfs_dvnodeops_template, &pcfs_dvnodeops);
 261         if (error != 0) {
 262                 (void) vfs_freevfsops_by_type(fstype);
 263                 vn_freevnodeops(pcfs_fvnodeops);
 264                 cmn_err(CE_WARN, "pcfsinit: bad dir vnode ops template");
 265                 return (error);
 266         }
 267 
 268         pcfstype = fstype;
 269         (void) pc_init();
 270         pcfs_mountcount = 0;
 271         return (0);
 272 }
 273 
 274 static struct pcfs *pc_mounttab = NULL;
 275 
 276 extern struct pcfs_args pc_tz;
 277 
 278 /*
 279  *  Define some special logical drives we use internal to this file.
 280  */
 281 #define BOOT_PARTITION_DRIVE    99
 282 #define PRIMARY_DOS_DRIVE       1
 283 #define UNPARTITIONED_DRIVE     0
 284 
 285 static int
 286 pcfs_device_identify(
 287         struct vfs *vfsp,
 288         struct mounta *uap,
 289         struct cred *cr,
 290         int *dos_ldrive,
 291         dev_t *xdev)
 292 {
 293         struct pathname special;
 294         char *c;
 295         struct vnode *svp = NULL;
 296         struct vnode *lvp = NULL;
 297         int oflag, aflag;
 298         int error;
 299 
 300         /*
 301          * Resolve path name of special file being mounted.
 302          */
 303         if (error = pn_get(uap->spec, UIO_USERSPACE, &special)) {
 304                 return (error);
 305         }
 306 
 307         *dos_ldrive = -1;
 308 
 309         if (error =
 310             lookupname(special.pn_path, UIO_SYSSPACE, FOLLOW, NULLVPP, &svp)) {
 311                 /*
 312                  * If there's no device node, the name specified most likely
 313                  * maps to a PCFS-style "partition specifier" to select a
 314                  * harddisk primary/logical partition. Disable floppy-specific
 315                  * checks in such cases unless an explicit :A or :B is
 316                  * requested.
 317                  */
 318 
 319                 /*
 320                  * Split the pathname string at the last ':' separator.
 321                  * If there's no ':' in the device name, or the ':' is the
 322                  * last character in the string, the name is invalid and
 323                  * the error from the previous lookup will be returned.
 324                  */
 325                 c = strrchr(special.pn_path, ':');
 326                 if (c == NULL || strlen(c) == 0)
 327                         goto devlookup_done;
 328 
 329                 *c++ = '\0';
 330 
 331                 /*
 332                  * PCFS partition name suffixes can be:
 333                  *      - "boot" to indicate the X86BOOT partition
 334                  *      - a drive letter [c-z] for the "DOS logical drive"
 335                  *      - a drive number 1..24 for the "DOS logical drive"
 336                  *      - a "floppy name letter", 'a' or 'b' (just strip this)
 337                  */
 338                 if (strcasecmp(c, "boot") == 0) {
 339                         /*
 340                          * The Solaris boot partition is requested.
 341                          */
 342                         *dos_ldrive = BOOT_PARTITION_DRIVE;
 343                 } else if (strspn(c, "0123456789") == strlen(c)) {
 344                         /*
 345                          * All digits - parse the partition number.
 346                          */
 347                         long drvnum = 0;
 348 
 349                         if ((error = ddi_strtol(c, NULL, 10, &drvnum)) == 0) {
 350                                 /*
 351                                  * A number alright - in the allowed range ?
 352                                  */
 353                                 if (drvnum > 24 || drvnum == 0)
 354                                         error = ENXIO;
 355                         }
 356                         if (error)
 357                                 goto devlookup_done;
 358                         *dos_ldrive = (int)drvnum;
 359                 } else if (strlen(c) == 1) {
 360                         /*
 361                          * A single trailing character was specified.
 362                          *      - [c-zC-Z] means a harddisk partition, and
 363                          *        we retrieve the partition number.
 364                          *      - [abAB] means a floppy drive, so we swallow
 365                          *        the "drive specifier" and test later
 366                          *        whether the physical device is a floppy.
 367                          */
 368                         *c = tolower(*c);
 369                         if (*c == 'a' || *c == 'b') {
 370                                 *dos_ldrive = UNPARTITIONED_DRIVE;
 371                         } else if (*c < 'c' || *c > 'z') {
 372                                 error = ENXIO;
 373                                 goto devlookup_done;
 374                         } else {
 375                                 *dos_ldrive = 1 + *c - 'c';
 376                         }
 377                 } else {
 378                         /*
 379                          * Can't parse this - pass through previous error.
 380                          */
 381                         goto devlookup_done;
 382                 }
 383 
 384 
 385                 error = lookupname(special.pn_path, UIO_SYSSPACE, FOLLOW,
 386                     NULLVPP, &svp);
 387         } else {
 388                 *dos_ldrive = UNPARTITIONED_DRIVE;
 389         }
 390 devlookup_done:
 391         pn_free(&special);
 392         if (error)
 393                 return (error);
 394 
 395         ASSERT(*dos_ldrive >= UNPARTITIONED_DRIVE);
 396 
 397         /*
 398          * Verify caller's permission to open the device special file.
 399          */
 400         if ((vfsp->vfs_flag & VFS_RDONLY) != 0 ||
 401             ((uap->flags & MS_RDONLY) != 0)) {
 402                 oflag = FREAD;
 403                 aflag = VREAD;
 404         } else {
 405                 oflag = FREAD | FWRITE;
 406                 aflag = VREAD | VWRITE;
 407         }
 408 
 409         error = vfs_get_lofi(vfsp, &lvp);
 410 
 411         if (error > 0) {
 412                 if (error == ENOENT)
 413                         error = ENODEV;
 414                 goto out;
 415         } else if (error == 0) {
 416                 *xdev = lvp->v_rdev;
 417         } else {
 418                 *xdev = svp->v_rdev;
 419 
 420                 if (svp->v_type != VBLK) {
 421                         error = ENOTBLK;
 422                         goto out;
 423                 }
 424 
 425                 if ((error = secpolicy_spec_open(cr, svp, oflag)) != 0)
 426                         goto out;
 427         }
 428 
 429         if (getmajor(*xdev) >= devcnt) {
 430                 error = ENXIO;
 431                 goto out;
 432         }
 433 
 434         if ((error = VOP_ACCESS(svp, aflag, 0, cr, NULL)) != 0)
 435                 goto out;
 436 
 437 out:
 438         if (svp != NULL)
 439                 VN_RELE(svp);
 440         if (lvp != NULL)
 441                 VN_RELE(lvp);
 442         return (error);
 443 }
 444 
 445 static int
 446 pcfs_device_ismounted(
 447         struct vfs *vfsp,
 448         int dos_ldrive,
 449         dev_t xdev,
 450         int *remounting,
 451         dev_t *pseudodev)
 452 {
 453         struct pcfs *fsp;
 454         int remount = *remounting;
 455 
 456         /*
 457          * Ensure that this logical drive isn't already mounted, unless
 458          * this is a REMOUNT request.
 459          * Note: The framework will perform this check if the "...:c"
 460          * PCFS-style "logical drive" syntax has not been used and an
 461          * actually existing physical device is backing this filesystem.
 462          * Once all block device drivers support PC-style partitioning,
 463          * this codeblock can be dropped.
 464          */
 465         *pseudodev = xdev;
 466 
 467         if (dos_ldrive) {
 468                 mutex_enter(&pcfslock);
 469                 for (fsp = pc_mounttab; fsp; fsp = fsp->pcfs_nxt)
 470                         if (fsp->pcfs_xdev == xdev &&
 471                             fsp->pcfs_ldrive == dos_ldrive) {
 472                                 mutex_exit(&pcfslock);
 473                                 if (remount) {
 474                                         return (0);
 475                                 } else {
 476                                         return (EBUSY);
 477                                 }
 478                         }
 479                 /*
 480                  * Assign a unique device number for the vfs
 481                  * The old way (getudev() + a constantly incrementing
 482                  * major number) was wrong because it changes vfs_dev
 483                  * across mounts and reboots, which breaks nfs file handles.
 484                  * UFS just uses the real dev_t. We can't do that because
 485                  * of the way pcfs opens fdisk partitons (the :c and :d
 486                  * partitions are on the same dev_t). Though that _might_
 487                  * actually be ok, since the file handle contains an
 488                  * absolute block number, it's probably better to make them
 489                  * different. So I think we should retain the original
 490                  * dev_t, but come up with a different minor number based
 491                  * on the logical drive that will _always_ come up the same.
 492                  * For now, we steal the upper 6 bits.
 493                  */
 494 #ifdef notdef
 495                 /* what should we do here? */
 496                 if (((getminor(xdev) >> 12) & 0x3F) != 0)
 497                         printf("whoops - upper bits used!\n");
 498 #endif
 499                 *pseudodev = makedevice(getmajor(xdev),
 500                     ((dos_ldrive << 12) | getminor(xdev)) & MAXMIN32);
 501                 if (vfs_devmounting(*pseudodev, vfsp)) {
 502                         mutex_exit(&pcfslock);
 503                         return (EBUSY);
 504                 }
 505                 if (vfs_devismounted(*pseudodev)) {
 506                         mutex_exit(&pcfslock);
 507                         if (remount) {
 508                                 return (0);
 509                         } else {
 510                                 return (EBUSY);
 511                         }
 512                 }
 513                 mutex_exit(&pcfslock);
 514         } else {
 515                 *pseudodev = xdev;
 516                 if (vfs_devmounting(*pseudodev, vfsp)) {
 517                         return (EBUSY);
 518                 }
 519                 if (vfs_devismounted(*pseudodev))
 520                         if (remount) {
 521                                 return (0);
 522                         } else {
 523                                 return (EBUSY);
 524                         }
 525         }
 526 
 527         /*
 528          * This is not a remount. Even if MS_REMOUNT was requested,
 529          * the caller needs to proceed as it would on an ordinary
 530          * mount.
 531          */
 532         *remounting = 0;
 533 
 534         ASSERT(*pseudodev);
 535         return (0);
 536 }
 537 
 538 /*
 539  * Get the PCFS-specific mount options from the VFS framework.
 540  * For "timezone" and "secsize", we need to parse the number
 541  * ourselves and ensure its validity.
 542  * Note: "secsize" is deliberately undocumented at this time,
 543  * it's a workaround for devices (particularly: lofi image files)
 544  * that don't support the DKIOCGMEDIAINFO ioctl for autodetection.
 545  */
 546 static void
 547 pcfs_parse_mntopts(struct pcfs *fsp)
 548 {
 549         char *c;
 550         char *endptr;
 551         long l;
 552         struct vfs *vfsp = fsp->pcfs_vfs;
 553 
 554         ASSERT(fsp->pcfs_secondswest == 0);
 555         ASSERT(fsp->pcfs_secsize == 0);
 556 
 557         if (vfs_optionisset(vfsp, MNTOPT_PCFS_HIDDEN, NULL))
 558                 fsp->pcfs_flags |= PCFS_HIDDEN;
 559         if (vfs_optionisset(vfsp, MNTOPT_PCFS_FOLDCASE, NULL))
 560                 fsp->pcfs_flags |= PCFS_FOLDCASE;
 561         if (vfs_optionisset(vfsp, MNTOPT_PCFS_NOCLAMPTIME, NULL))
 562                 fsp->pcfs_flags |= PCFS_NOCLAMPTIME;
 563         if (vfs_optionisset(vfsp, MNTOPT_NOATIME, NULL))
 564                 fsp->pcfs_flags |= PCFS_NOATIME;
 565 
 566         if (vfs_optionisset(vfsp, MNTOPT_PCFS_TIMEZONE, &c)) {
 567                 if (ddi_strtol(c, &endptr, 10, &l) == 0 &&
 568                     endptr == c + strlen(c)) {
 569                         /*
 570                          * A number alright - in the allowed range ?
 571                          */
 572                         if (l <= -12*3600 || l >= 12*3600) {
 573                                 cmn_err(CE_WARN, "!pcfs: invalid use of "
 574                                     "'timezone' mount option - %ld "
 575                                     "is out of range. Assuming 0.", l);
 576                                 l = 0;
 577                         }
 578                 } else {
 579                         cmn_err(CE_WARN, "!pcfs: invalid use of "
 580                             "'timezone' mount option - argument %s "
 581                             "is not a valid number. Assuming 0.", c);
 582                         l = 0;
 583                 }
 584                 fsp->pcfs_secondswest = l;
 585         }
 586 
 587         /*
 588          * The "secsize=..." mount option is a workaround for the lack of
 589          * lofi(7d) support for DKIOCGMEDIAINFO. If PCFS wants to parse the
 590          * partition table of a disk image and it has been partitioned with
 591          * sector sizes other than 512 bytes, we'd fail on loopback'ed disk
 592          * images.
 593          * That should really be fixed in lofi ... this is a workaround.
 594          */
 595         if (vfs_optionisset(vfsp, MNTOPT_PCFS_SECSIZE, &c)) {
 596                 if (ddi_strtol(c, &endptr, 10, &l) == 0 &&
 597                     endptr == c + strlen(c)) {
 598                         /*
 599                          * A number alright - a valid sector size as well ?
 600                          */
 601                         if (!VALID_SECSIZE(l)) {
 602                                 cmn_err(CE_WARN, "!pcfs: invalid use of "
 603                                     "'secsize' mount option - %ld is "
 604                                     "unsupported. Autodetecting.", l);
 605                                 l = 0;
 606                         }
 607                 } else {
 608                         cmn_err(CE_WARN, "!pcfs: invalid use of "
 609                             "'secsize' mount option - argument %s "
 610                             "is not a valid number. Autodetecting.", c);
 611                         l = 0;
 612                 }
 613                 fsp->pcfs_secsize = l;
 614                 fsp->pcfs_sdshift = ddi_ffs(l / DEV_BSIZE) - 1;
 615         }
 616 }
 617 
 618 /*
 619  * vfs operations
 620  */
 621 
 622 /*
 623  * pcfs_mount - backend for VFS_MOUNT() on PCFS.
 624  */
 625 static int
 626 pcfs_mount(
 627         struct vfs *vfsp,
 628         struct vnode *mvp,
 629         struct mounta *uap,
 630         struct cred *cr)
 631 {
 632         struct pcfs *fsp;
 633         struct vnode *devvp;
 634         dev_t pseudodev;
 635         dev_t xdev;
 636         int dos_ldrive = 0;
 637         int error;
 638         int remounting;
 639 
 640         if ((error = secpolicy_fs_mount(cr, mvp, vfsp)) != 0)
 641                 return (error);
 642 
 643         if (mvp->v_type != VDIR)
 644                 return (ENOTDIR);
 645 
 646         mutex_enter(&mvp->v_lock);
 647         if ((uap->flags & MS_REMOUNT) == 0 &&
 648             (uap->flags & MS_OVERLAY) == 0 &&
 649             (mvp->v_count != 1 || (mvp->v_flag & VROOT))) {
 650                 mutex_exit(&mvp->v_lock);
 651                 return (EBUSY);
 652         }
 653         mutex_exit(&mvp->v_lock);
 654 
 655         /*
 656          * PCFS doesn't do mount arguments anymore - everything's a mount
 657          * option these days. In order not to break existing callers, we
 658          * don't reject it yet, just warn that the data (if any) is ignored.
 659          */
 660         if (uap->datalen != 0)
 661                 cmn_err(CE_WARN, "!pcfs: deprecated use of mount(2) with "
 662                     "mount argument structures instead of mount options. "
 663                     "Ignoring mount(2) 'dataptr' argument.");
 664 
 665         /*
 666          * This is needed early, to make sure the access / open calls
 667          * are done using the correct mode. Processing this mount option
 668          * only when calling pcfs_parse_mntopts() would lead us to attempt
 669          * a read/write access to a possibly writeprotected device, and
 670          * a readonly mount attempt might fail because of that.
 671          */
 672         if (uap->flags & MS_RDONLY) {
 673                 vfsp->vfs_flag |= VFS_RDONLY;
 674                 vfs_setmntopt(vfsp, MNTOPT_RO, NULL, 0);
 675         }
 676 
 677         /*
 678          * For most filesystems, this is just a lookupname() on the
 679          * mount pathname string. PCFS historically has to do its own
 680          * partition table parsing because not all Solaris architectures
 681          * support all styles of partitioning that PC media can have, and
 682          * hence PCFS understands "device names" that don't map to actual
 683          * physical device nodes. Parsing the "PCFS syntax" for device
 684          * names is done in pcfs_device_identify() - see there.
 685          *
 686          * Once all block device drivers that can host FAT filesystems have
 687          * been enhanced to create device nodes for all PC-style partitions,
 688          * this code can go away.
 689          */
 690         if (error = pcfs_device_identify(vfsp, uap, cr, &dos_ldrive, &xdev))
 691                 return (error);
 692 
 693         /*
 694          * As with looking up the actual device to mount, PCFS cannot rely
 695          * on just the checks done by vfs_ismounted() whether a given device
 696          * is mounted already. The additional check against the "PCFS syntax"
 697          * is done in  pcfs_device_ismounted().
 698          */
 699         remounting = (uap->flags & MS_REMOUNT);
 700 
 701         if (error = pcfs_device_ismounted(vfsp, dos_ldrive, xdev, &remounting,
 702             &pseudodev))
 703                 return (error);
 704 
 705         if (remounting)
 706                 return (0);
 707 
 708         /*
 709          * Mount the filesystem.
 710          * An instance structure is required before the attempt to locate
 711          * and parse the FAT BPB. This is because mount options may change
 712          * the behaviour of the filesystem type matching code. Precreate
 713          * it and fill it in to a degree that allows parsing the mount
 714          * options.
 715          */
 716         devvp = makespecvp(xdev, VBLK);
 717         if (IS_SWAPVP(devvp)) {
 718                 VN_RELE(devvp);
 719                 return (EBUSY);
 720         }
 721         error = VOP_OPEN(&devvp,
 722             (vfsp->vfs_flag & VFS_RDONLY) ? FREAD : FREAD | FWRITE, cr, NULL);
 723         if (error) {
 724                 VN_RELE(devvp);
 725                 return (error);
 726         }
 727 
 728         fsp = kmem_zalloc(sizeof (*fsp), KM_SLEEP);
 729         fsp->pcfs_vfs = vfsp;
 730         fsp->pcfs_xdev = xdev;
 731         fsp->pcfs_devvp = devvp;
 732         fsp->pcfs_ldrive = dos_ldrive;
 733         mutex_init(&fsp->pcfs_lock, NULL, MUTEX_DEFAULT, NULL);
 734 
 735         pcfs_parse_mntopts(fsp);
 736 
 737         /*
 738          * This is the actual "mount" - the PCFS superblock check.
 739          *
 740          * Find the requested logical drive and the FAT BPB therein.
 741          * Check device type and flag the instance if media is removeable.
 742          *
 743          * Initializes most members of the filesystem instance structure.
 744          * Returns EINVAL if no valid BPB can be found. Other errors may
 745          * occur after I/O failures, or when invalid / unparseable partition
 746          * tables are encountered.
 747          */
 748         if (error = pc_getfattype(fsp))
 749                 goto errout;
 750 
 751         /*
 752          * Now that the BPB has been parsed, this structural information
 753          * is available and known to be valid. Initialize the VFS.
 754          */
 755         vfsp->vfs_data = fsp;
 756         vfsp->vfs_dev = pseudodev;
 757         vfsp->vfs_fstype = pcfstype;
 758         vfs_make_fsid(&vfsp->vfs_fsid, pseudodev, pcfstype);
 759         vfsp->vfs_bcount = 0;
 760         vfsp->vfs_bsize = fsp->pcfs_clsize;
 761 
 762         /*
 763          * Validate that we can access the FAT and that it is, to the
 764          * degree we can verify here, self-consistent.
 765          */
 766         if (error = pc_verify(fsp))
 767                 goto errout;
 768 
 769         /*
 770          * Record the time of the mount, to return as an "approximate"
 771          * timestamp for the FAT root directory. Since FAT roots don't
 772          * have timestamps, this is less confusing to the user than
 773          * claiming "zero" / Jan/01/1970.
 774          */
 775         gethrestime(&fsp->pcfs_mounttime);
 776 
 777         /*
 778          * Fix up the mount options. Because "noatime" is made default on
 779          * removeable media only, a fixed disk will have neither "atime"
 780          * nor "noatime" set. We set the options explicitly depending on
 781          * the PCFS_NOATIME flag, to inform the user of what applies.
 782          * Mount option cancellation will take care that the mutually
 783          * exclusive 'other' is cleared.
 784          */
 785         vfs_setmntopt(vfsp,
 786             fsp->pcfs_flags & PCFS_NOATIME ? MNTOPT_NOATIME : MNTOPT_ATIME,
 787             NULL, 0);
 788 
 789         /*
 790          * All clear - insert the FS instance into PCFS' list.
 791          */
 792         mutex_enter(&pcfslock);
 793         fsp->pcfs_nxt = pc_mounttab;
 794         pc_mounttab = fsp;
 795         mutex_exit(&pcfslock);
 796         atomic_inc_32(&pcfs_mountcount);
 797         return (0);
 798 
 799 errout:
 800         (void) VOP_CLOSE(devvp,
 801             vfsp->vfs_flag & VFS_RDONLY ? FREAD : FREAD | FWRITE,
 802             1, (offset_t)0, cr, NULL);
 803         VN_RELE(devvp);
 804         mutex_destroy(&fsp->pcfs_lock);
 805         kmem_free(fsp, sizeof (*fsp));
 806         return (error);
 807 
 808 }
 809 
 810 static int
 811 pcfs_unmount(
 812         struct vfs *vfsp,
 813         int flag,
 814         struct cred *cr)
 815 {
 816         struct pcfs *fsp, *fsp1;
 817 
 818         if (secpolicy_fs_unmount(cr, vfsp) != 0)
 819                 return (EPERM);
 820 
 821         fsp = VFSTOPCFS(vfsp);
 822 
 823         /*
 824          * We don't have to lock fsp because the VVFSLOCK in vfs layer will
 825          * prevent lookuppn from crossing the mount point.
 826          * If this is not a forced umount request and there's ongoing I/O,
 827          * don't allow the mount to proceed.
 828          */
 829         if (flag & MS_FORCE)
 830                 vfsp->vfs_flag |= VFS_UNMOUNTED;
 831         else if (fsp->pcfs_nrefs)
 832                 return (EBUSY);
 833 
 834         mutex_enter(&pcfslock);
 835 
 836         /*
 837          * If this is a forced umount request or if the fs instance has
 838          * been marked as beyond recovery, allow the umount to proceed
 839          * regardless of state. pc_diskchanged() forcibly releases all
 840          * inactive vnodes/pcnodes.
 841          */
 842         if (flag & MS_FORCE || fsp->pcfs_flags & PCFS_IRRECOV) {
 843                 rw_enter(&pcnodes_lock, RW_WRITER);
 844                 pc_diskchanged(fsp);
 845                 rw_exit(&pcnodes_lock);
 846         }
 847 
 848         /* now there should be no pcp node on pcfhead or pcdhead. */
 849 
 850         if (fsp == pc_mounttab) {
 851                 pc_mounttab = fsp->pcfs_nxt;
 852         } else {
 853                 for (fsp1 = pc_mounttab; fsp1 != NULL; fsp1 = fsp1->pcfs_nxt)
 854                         if (fsp1->pcfs_nxt == fsp)
 855                                 fsp1->pcfs_nxt = fsp->pcfs_nxt;
 856         }
 857 
 858         mutex_exit(&pcfslock);
 859 
 860         /*
 861          * Since we support VFS_FREEVFS(), there's no need to
 862          * free the fsp right now. The framework will tell us
 863          * when the right time to do so has arrived by calling
 864          * into pcfs_freevfs.
 865          */
 866         return (0);
 867 }
 868 
 869 /*
 870  * find root of pcfs
 871  */
 872 static int
 873 pcfs_root(
 874         struct vfs *vfsp,
 875         struct vnode **vpp)
 876 {
 877         struct pcfs *fsp;
 878         struct pcnode *pcp;
 879         int error;
 880 
 881         fsp = VFSTOPCFS(vfsp);
 882         if (error = pc_lockfs(fsp, 0, 0))
 883                 return (error);
 884 
 885         pcp = pc_getnode(fsp, (daddr_t)0, 0, (struct pcdir *)0);
 886         pc_unlockfs(fsp);
 887         *vpp = PCTOV(pcp);
 888         pcp->pc_flags |= PC_EXTERNAL;
 889         return (0);
 890 }
 891 
 892 /*
 893  * Get file system statistics.
 894  */
 895 static int
 896 pcfs_statvfs(
 897         struct vfs *vfsp,
 898         struct statvfs64 *sp)
 899 {
 900         struct pcfs *fsp;
 901         int error;
 902         dev32_t d32;
 903 
 904         fsp = VFSTOPCFS(vfsp);
 905         error = pc_getfat(fsp);
 906         if (error)
 907                 return (error);
 908         bzero(sp, sizeof (*sp));
 909         sp->f_bsize = sp->f_frsize = fsp->pcfs_clsize;
 910         sp->f_blocks = (fsblkcnt64_t)fsp->pcfs_ncluster;
 911         sp->f_bavail = sp->f_bfree = (fsblkcnt64_t)pc_freeclusters(fsp);
 912         sp->f_files = (fsfilcnt64_t)-1;
 913         sp->f_ffree = (fsfilcnt64_t)-1;
 914         sp->f_favail = (fsfilcnt64_t)-1;
 915 #ifdef notdef
 916         (void) cmpldev(&d32, fsp->pcfs_devvp->v_rdev);
 917 #endif /* notdef */
 918         (void) cmpldev(&d32, vfsp->vfs_dev);
 919         sp->f_fsid = d32;
 920         (void) strcpy(sp->f_basetype, vfssw[vfsp->vfs_fstype].vsw_name);
 921         sp->f_flag = vf_to_stf(vfsp->vfs_flag);
 922         sp->f_namemax = PCMAXNAMLEN;
 923         return (0);
 924 }
 925 
 926 static int
 927 pc_syncfsnodes(struct pcfs *fsp)
 928 {
 929         struct pchead *hp;
 930         struct pcnode *pcp;
 931         int error;
 932 
 933         if (error = pc_lockfs(fsp, 0, 0))
 934                 return (error);
 935 
 936         if (!(error = pc_syncfat(fsp))) {
 937                 hp = pcfhead;
 938                 while (hp < & pcfhead [ NPCHASH ]) {
 939                         rw_enter(&pcnodes_lock, RW_READER);
 940                         pcp = hp->pch_forw;
 941                         while (pcp != (struct pcnode *)hp) {
 942                                 if (VFSTOPCFS(PCTOV(pcp) -> v_vfsp) == fsp)
 943                                         if (error = pc_nodesync(pcp))
 944                                                 break;
 945                                 pcp = pcp -> pc_forw;
 946                         }
 947                         rw_exit(&pcnodes_lock);
 948                         if (error)
 949                                 break;
 950                         hp++;
 951                 }
 952         }
 953         pc_unlockfs(fsp);
 954         return (error);
 955 }
 956 
 957 /*
 958  * Flush any pending I/O.
 959  */
 960 /*ARGSUSED*/
 961 static int
 962 pcfs_sync(
 963         struct vfs *vfsp,
 964         short flag,
 965         struct cred *cr)
 966 {
 967         struct pcfs *fsp;
 968         int error = 0;
 969 
 970         /* this prevents the filesystem from being umounted. */
 971         mutex_enter(&pcfslock);
 972         if (vfsp != NULL) {
 973                 fsp = VFSTOPCFS(vfsp);
 974                 if (!(fsp->pcfs_flags & PCFS_IRRECOV)) {
 975                         error = pc_syncfsnodes(fsp);
 976                 } else {
 977                         rw_enter(&pcnodes_lock, RW_WRITER);
 978                         pc_diskchanged(fsp);
 979                         rw_exit(&pcnodes_lock);
 980                         error = EIO;
 981                 }
 982         } else {
 983                 fsp = pc_mounttab;
 984                 while (fsp != NULL) {
 985                         if (fsp->pcfs_flags & PCFS_IRRECOV) {
 986                                 rw_enter(&pcnodes_lock, RW_WRITER);
 987                                 pc_diskchanged(fsp);
 988                                 rw_exit(&pcnodes_lock);
 989                                 error = EIO;
 990                                 break;
 991                         }
 992                         error = pc_syncfsnodes(fsp);
 993                         if (error) break;
 994                         fsp = fsp->pcfs_nxt;
 995                 }
 996         }
 997         mutex_exit(&pcfslock);
 998         return (error);
 999 }
1000 
1001 int
1002 pc_lockfs(struct pcfs *fsp, int diskchanged, int releasing)
1003 {
1004         int err;
1005 
1006         if ((fsp->pcfs_flags & PCFS_IRRECOV) && !releasing)
1007                 return (EIO);
1008 
1009         if ((fsp->pcfs_flags & PCFS_LOCKED) && (fsp->pcfs_owner == curthread)) {
1010                 fsp->pcfs_count++;
1011         } else {
1012                 mutex_enter(&fsp->pcfs_lock);
1013                 if (fsp->pcfs_flags & PCFS_LOCKED)
1014                         panic("pc_lockfs");
1015                 /*
1016                  * We check the IRRECOV bit again just in case somebody
1017                  * snuck past the initial check but then got held up before
1018                  * they could grab the lock.  (And in the meantime someone
1019                  * had grabbed the lock and set the bit)
1020                  */
1021                 if (!diskchanged && !(fsp->pcfs_flags & PCFS_IRRECOV)) {
1022                         if ((err = pc_getfat(fsp))) {
1023                                 mutex_exit(&fsp->pcfs_lock);
1024                                 return (err);
1025                         }
1026                 }
1027                 fsp->pcfs_flags |= PCFS_LOCKED;
1028                 fsp->pcfs_owner = curthread;
1029                 fsp->pcfs_count++;
1030         }
1031         return (0);
1032 }
1033 
1034 void
1035 pc_unlockfs(struct pcfs *fsp)
1036 {
1037 
1038         if ((fsp->pcfs_flags & PCFS_LOCKED) == 0)
1039                 panic("pc_unlockfs");
1040         if (--fsp->pcfs_count < 0)
1041                 panic("pc_unlockfs: count");
1042         if (fsp->pcfs_count == 0) {
1043                 fsp->pcfs_flags &= ~PCFS_LOCKED;
1044                 fsp->pcfs_owner = 0;
1045                 mutex_exit(&fsp->pcfs_lock);
1046         }
1047 }
1048 
1049 int
1050 pc_syncfat(struct pcfs *fsp)
1051 {
1052         struct buf *bp;
1053         int nfat;
1054         int     error = 0;
1055         struct fat_od_fsi *fsinfo_disk;
1056 
1057         if ((fsp->pcfs_fatp == (uchar_t *)0) ||
1058             !(fsp->pcfs_flags & PCFS_FATMOD))
1059                 return (0);
1060         /*
1061          * write out all copies of FATs
1062          */
1063         fsp->pcfs_flags &= ~PCFS_FATMOD;
1064         fsp->pcfs_fattime = gethrestime_sec() + PCFS_DISKTIMEOUT;
1065         for (nfat = 0; nfat < fsp->pcfs_numfat; nfat++) {
1066                 error = pc_writefat(fsp, pc_dbdaddr(fsp,
1067                     fsp->pcfs_fatstart + nfat * fsp->pcfs_fatsec));
1068                 if (error) {
1069                         pc_mark_irrecov(fsp);
1070                         return (EIO);
1071                 }
1072         }
1073         pc_clear_fatchanges(fsp);
1074 
1075         /*
1076          * Write out fsinfo sector.
1077          */
1078         if (IS_FAT32(fsp)) {
1079                 bp = bread(fsp->pcfs_xdev,
1080                     pc_dbdaddr(fsp, fsp->pcfs_fsistart), fsp->pcfs_secsize);
1081                 if (bp->b_flags & (B_ERROR | B_STALE)) {
1082                         error = geterror(bp);
1083                 }
1084                 fsinfo_disk = (fat_od_fsi_t *)(bp->b_un.b_addr);
1085                 if (!error && FSISIG_OK(fsinfo_disk)) {
1086                         fsinfo_disk->fsi_incore.fs_free_clusters =
1087                             LE_32(fsp->pcfs_fsinfo.fs_free_clusters);
1088                         fsinfo_disk->fsi_incore.fs_next_free =
1089                             LE_32(FSINFO_UNKNOWN);
1090                         bwrite2(bp);
1091                         error = geterror(bp);
1092                 }
1093                 brelse(bp);
1094                 if (error) {
1095                         pc_mark_irrecov(fsp);
1096                         return (EIO);
1097                 }
1098         }
1099         return (0);
1100 }
1101 
1102 void
1103 pc_invalfat(struct pcfs *fsp)
1104 {
1105         struct pcfs *xfsp;
1106         int mount_cnt = 0;
1107 
1108         if (fsp->pcfs_fatp == (uchar_t *)0)
1109                 panic("pc_invalfat");
1110         /*
1111          * Release FAT
1112          */
1113         kmem_free(fsp->pcfs_fatp, fsp->pcfs_fatsec * fsp->pcfs_secsize);
1114         fsp->pcfs_fatp = NULL;
1115         kmem_free(fsp->pcfs_fat_changemap, fsp->pcfs_fat_changemapsize);
1116         fsp->pcfs_fat_changemap = NULL;
1117         /*
1118          * Invalidate all the blocks associated with the device.
1119          * Not needed if stateless.
1120          */
1121         for (xfsp = pc_mounttab; xfsp; xfsp = xfsp->pcfs_nxt)
1122                 if (xfsp != fsp && xfsp->pcfs_xdev == fsp->pcfs_xdev)
1123                         mount_cnt++;
1124 
1125         if (!mount_cnt)
1126                 binval(fsp->pcfs_xdev);
1127         /*
1128          * close mounted device
1129          */
1130         (void) VOP_CLOSE(fsp->pcfs_devvp,
1131             (PCFSTOVFS(fsp)->vfs_flag & VFS_RDONLY) ? FREAD : FREAD|FWRITE,
1132             1, (offset_t)0, CRED(), NULL);
1133 }
1134 
1135 void
1136 pc_badfs(struct pcfs *fsp)
1137 {
1138         cmn_err(CE_WARN, "corrupted PC file system on dev (%x.%x):%d\n",
1139             getmajor(fsp->pcfs_devvp->v_rdev),
1140             getminor(fsp->pcfs_devvp->v_rdev), fsp->pcfs_ldrive);
1141 }
1142 
1143 /*
1144  * The problem with supporting NFS on the PCFS filesystem is that there
1145  * is no good place to keep the generation number. The only possible
1146  * place is inside a directory entry. There are a few words that we
1147  * don't use - they store NT & OS/2 attributes, and the creation/last access
1148  * time of the file - but it seems wrong to use them. In addition, directory
1149  * entries come and go. If a directory is removed completely, its directory
1150  * blocks are freed and the generation numbers are lost. Whereas in ufs,
1151  * inode blocks are dedicated for inodes, so the generation numbers are
1152  * permanently kept on the disk.
1153  */
1154 static int
1155 pcfs_vget(struct vfs *vfsp, struct vnode **vpp, struct fid *fidp)
1156 {
1157         struct pcnode *pcp;
1158         struct pc_fid *pcfid;
1159         struct pcfs *fsp;
1160         struct pcdir *ep;
1161         daddr_t eblkno;
1162         int eoffset;
1163         struct buf *bp;
1164         int error;
1165         pc_cluster32_t  cn;
1166 
1167         pcfid = (struct pc_fid *)fidp;
1168         fsp = VFSTOPCFS(vfsp);
1169 
1170         error = pc_lockfs(fsp, 0, 0);
1171         if (error) {
1172                 *vpp = NULL;
1173                 return (error);
1174         }
1175 
1176         if (pcfid->pcfid_block == 0) {
1177                 pcp = pc_getnode(fsp, (daddr_t)0, 0, (struct pcdir *)0);
1178                 pcp->pc_flags |= PC_EXTERNAL;
1179                 *vpp = PCTOV(pcp);
1180                 pc_unlockfs(fsp);
1181                 return (0);
1182         }
1183         eblkno = pcfid->pcfid_block;
1184         eoffset = pcfid->pcfid_offset;
1185 
1186         if ((pc_dbtocl(fsp,
1187             eblkno - fsp->pcfs_dosstart) >= fsp->pcfs_ncluster) ||
1188             (eoffset > fsp->pcfs_clsize)) {
1189                 pc_unlockfs(fsp);
1190                 *vpp = NULL;
1191                 return (EINVAL);
1192         }
1193 
1194         if (eblkno >= fsp->pcfs_datastart || (eblkno - fsp->pcfs_rdirstart)
1195             < (fsp->pcfs_rdirsec & ~(fsp->pcfs_spcl - 1))) {
1196                 bp = bread(fsp->pcfs_xdev, pc_dbdaddr(fsp, eblkno),
1197                     fsp->pcfs_clsize);
1198         } else {
1199                 /*
1200                  * This is an access "backwards" into the FAT12/FAT16
1201                  * root directory. A better code structure would
1202                  * significantly improve maintainability here ...
1203                  */
1204                 bp = bread(fsp->pcfs_xdev, pc_dbdaddr(fsp, eblkno),
1205                     (int)(fsp->pcfs_datastart - eblkno) * fsp->pcfs_secsize);
1206         }
1207         if (bp->b_flags & (B_ERROR | B_STALE)) {
1208                 error = geterror(bp);
1209                 brelse(bp);
1210                 if (error)
1211                         pc_mark_irrecov(fsp);
1212                 *vpp = NULL;
1213                 pc_unlockfs(fsp);
1214                 return (error);
1215         }
1216         ep = (struct pcdir *)(bp->b_un.b_addr + eoffset);
1217         /*
1218          * Ok, if this is a valid file handle that we gave out,
1219          * then simply ensuring that the creation time matches,
1220          * the entry has not been deleted, and it has a valid first
1221          * character should be enough.
1222          *
1223          * Unfortunately, verifying that the <blkno, offset> _still_
1224          * refers to a directory entry is not easy, since we'd have
1225          * to search _all_ directories starting from root to find it.
1226          * That's a high price to pay just in case somebody is forging
1227          * file handles. So instead we verify that as much of the
1228          * entry is valid as we can:
1229          *
1230          * 1. The starting cluster is 0 (unallocated) or valid
1231          * 2. It is not an LFN entry
1232          * 3. It is not hidden (unless mounted as such)
1233          * 4. It is not the label
1234          */
1235         cn = pc_getstartcluster(fsp, ep);
1236         /*
1237          * if the starting cluster is valid, but not valid according
1238          * to pc_validcl(), force it to be to simplify the following if.
1239          */
1240         if (cn == 0)
1241                 cn = PCF_FIRSTCLUSTER;
1242         if (IS_FAT32(fsp)) {
1243                 if (cn >= PCF_LASTCLUSTER32)
1244                         cn = PCF_FIRSTCLUSTER;
1245         } else {
1246                 if (cn >= PCF_LASTCLUSTER)
1247                         cn = PCF_FIRSTCLUSTER;
1248         }
1249         if ((!pc_validcl(fsp, cn)) ||
1250             (PCDL_IS_LFN(ep)) ||
1251             (PCA_IS_HIDDEN(fsp, ep->pcd_attr)) ||
1252             ((ep->pcd_attr & PCA_LABEL) == PCA_LABEL)) {
1253                 bp->b_flags |= B_STALE | B_AGE;
1254                 brelse(bp);
1255                 pc_unlockfs(fsp);
1256                 return (EINVAL);
1257         }
1258         if ((ep->pcd_crtime.pct_time == pcfid->pcfid_ctime) &&
1259             (ep->pcd_filename[0] != PCD_ERASED) &&
1260             (pc_validchar(ep->pcd_filename[0]) ||
1261             (ep->pcd_filename[0] == '.' && ep->pcd_filename[1] == '.'))) {
1262                 pcp = pc_getnode(fsp, eblkno, eoffset, ep);
1263                 pcp->pc_flags |= PC_EXTERNAL;
1264                 *vpp = PCTOV(pcp);
1265         } else {
1266                 *vpp = NULL;
1267         }
1268         bp->b_flags |= B_STALE | B_AGE;
1269         brelse(bp);
1270         pc_unlockfs(fsp);
1271         return (0);
1272 }
1273 
1274 /*
1275  * Unfortunately, FAT32 fat's can be pretty big (On a 1 gig jaz drive, about
1276  * a meg), so we can't bread() it all in at once. This routine reads a
1277  * fat a chunk at a time.
1278  */
1279 static int
1280 pc_readfat(struct pcfs *fsp, uchar_t *fatp)
1281 {
1282         struct buf *bp;
1283         size_t off;
1284         size_t readsize;
1285         daddr_t diskblk;
1286         size_t fatsize = fsp->pcfs_fatsec * fsp->pcfs_secsize;
1287         daddr_t start = fsp->pcfs_fatstart;
1288 
1289         readsize = fsp->pcfs_clsize;
1290         for (off = 0; off < fatsize; off += readsize, fatp += readsize) {
1291                 if (readsize > (fatsize - off))
1292                         readsize = fatsize - off;
1293                 diskblk = pc_dbdaddr(fsp, start +
1294                     pc_cltodb(fsp, pc_lblkno(fsp, off)));
1295                 bp = bread(fsp->pcfs_xdev, diskblk, readsize);
1296                 if (bp->b_flags & (B_ERROR | B_STALE)) {
1297                         brelse(bp);
1298                         return (EIO);
1299                 }
1300                 bp->b_flags |= B_STALE | B_AGE;
1301                 bcopy(bp->b_un.b_addr, fatp, readsize);
1302                 brelse(bp);
1303         }
1304         return (0);
1305 }
1306 
1307 /*
1308  * We write the FAT out a _lot_, in order to make sure that it
1309  * is up-to-date. But on a FAT32 system (large drive, small clusters)
1310  * the FAT might be a couple of megabytes, and writing it all out just
1311  * because we created or deleted a small file is painful (especially
1312  * since we do it for each alternate FAT too). So instead, for FAT16 and
1313  * FAT32 we only write out the bit that has changed. We don't clear
1314  * the 'updated' fields here because the caller might be writing out
1315  * several FATs, so the caller must use pc_clear_fatchanges() after
1316  * all FATs have been updated.
1317  * This function doesn't take "start" from fsp->pcfs_dosstart because
1318  * callers can use it to write either the primary or any of the alternate
1319  * FAT tables.
1320  */
1321 static int
1322 pc_writefat(struct pcfs *fsp, daddr_t start)
1323 {
1324         struct buf *bp;
1325         size_t off;
1326         size_t writesize;
1327         int     error;
1328         uchar_t *fatp = fsp->pcfs_fatp;
1329         size_t fatsize = fsp->pcfs_fatsec * fsp->pcfs_secsize;
1330 
1331         writesize = fsp->pcfs_clsize;
1332         for (off = 0; off < fatsize; off += writesize, fatp += writesize) {
1333                 if (writesize > (fatsize - off))
1334                         writesize = fatsize - off;
1335                 if (!pc_fat_is_changed(fsp, pc_lblkno(fsp, off))) {
1336                         continue;
1337                 }
1338                 bp = ngeteblk(writesize);
1339                 bp->b_edev = fsp->pcfs_xdev;
1340                 bp->b_dev = cmpdev(bp->b_edev);
1341                 bp->b_blkno = pc_dbdaddr(fsp, start +
1342                     pc_cltodb(fsp, pc_lblkno(fsp, off)));
1343                 bcopy(fatp, bp->b_un.b_addr, writesize);
1344                 bwrite2(bp);
1345                 error = geterror(bp);
1346                 brelse(bp);
1347                 if (error) {
1348                         return (error);
1349                 }
1350         }
1351         return (0);
1352 }
1353 
1354 /*
1355  * Mark the FAT cluster that 'cn' is stored in as modified.
1356  */
1357 void
1358 pc_mark_fat_updated(struct pcfs *fsp, pc_cluster32_t cn)
1359 {
1360         pc_cluster32_t  bn;
1361         size_t          size;
1362 
1363         /* which fat block is the cluster number stored in? */
1364         if (IS_FAT32(fsp)) {
1365                 size = sizeof (pc_cluster32_t);
1366                 bn = pc_lblkno(fsp, cn * size);
1367                 fsp->pcfs_fat_changemap[bn] = 1;
1368         } else if (IS_FAT16(fsp)) {
1369                 size = sizeof (pc_cluster16_t);
1370                 bn = pc_lblkno(fsp, cn * size);
1371                 fsp->pcfs_fat_changemap[bn] = 1;
1372         } else {
1373                 offset_t off;
1374                 pc_cluster32_t nbn;
1375 
1376                 ASSERT(IS_FAT12(fsp));
1377                 off = cn + (cn >> 1);
1378                 bn = pc_lblkno(fsp, off);
1379                 fsp->pcfs_fat_changemap[bn] = 1;
1380                 /* does this field wrap into the next fat cluster? */
1381                 nbn = pc_lblkno(fsp, off + 1);
1382                 if (nbn != bn) {
1383                         fsp->pcfs_fat_changemap[nbn] = 1;
1384                 }
1385         }
1386 }
1387 
1388 /*
1389  * return whether the FAT cluster 'bn' is updated and needs to
1390  * be written out.
1391  */
1392 int
1393 pc_fat_is_changed(struct pcfs *fsp, pc_cluster32_t bn)
1394 {
1395         return (fsp->pcfs_fat_changemap[bn] == 1);
1396 }
1397 
1398 /*
1399  * Implementation of VFS_FREEVFS() to support forced umounts.
1400  * This is called by the vfs framework after umount, to trigger
1401  * the release of any resources still associated with the given
1402  * vfs_t once the need to keep them has gone away.
1403  */
1404 void
1405 pcfs_freevfs(vfs_t *vfsp)
1406 {
1407         struct pcfs *fsp = VFSTOPCFS(vfsp);
1408 
1409         mutex_enter(&pcfslock);
1410         /*
1411          * Purging the FAT closes the device - can't do any more
1412          * I/O after this.
1413          */
1414         if (fsp->pcfs_fatp != (uchar_t *)0)
1415                 pc_invalfat(fsp);
1416         mutex_exit(&pcfslock);
1417 
1418         VN_RELE(fsp->pcfs_devvp);
1419         mutex_destroy(&fsp->pcfs_lock);
1420         kmem_free(fsp, sizeof (*fsp));
1421 
1422         /*
1423          * Allow _fini() to succeed now, if so desired.
1424          */
1425         atomic_dec_32(&pcfs_mountcount);
1426 }
1427 
1428 
1429 /*
1430  * PC-style partition parsing and FAT BPB identification/validation code.
1431  * The partition parsers here assume:
1432  *      - a FAT filesystem will be in a partition that has one of a set of
1433  *        recognized partition IDs
1434  *      - the user wants the 'numbering' (C:, D:, ...) that one would get
1435  *        on MSDOS 6.x.
1436  *        That means any non-FAT partition type (NTFS, HPFS, or any Linux fs)
1437  *        will not factor in the enumeration.
1438  * These days, such assumptions should be revisited. FAT is no longer the
1439  * only game in 'PC town'.
1440  */
1441 /*
1442  * isDosDrive()
1443  *      Boolean function.  Give it the systid field for an fdisk partition
1444  *      and it decides if that's a systid that describes a DOS drive.  We
1445  *      use systid values defined in sys/dktp/fdisk.h.
1446  */
1447 static int
1448 isDosDrive(uchar_t checkMe)
1449 {
1450         return ((checkMe == DOSOS12) || (checkMe == DOSOS16) ||
1451             (checkMe == DOSHUGE) || (checkMe == FDISK_WINDOWS) ||
1452             (checkMe == FDISK_EXT_WIN) || (checkMe == FDISK_FAT95) ||
1453             (checkMe == DIAGPART));
1454 }
1455 
1456 
1457 /*
1458  * isDosExtended()
1459  *      Boolean function.  Give it the systid field for an fdisk partition
1460  *      and it decides if that's a systid that describes an extended DOS
1461  *      partition.
1462  */
1463 static int
1464 isDosExtended(uchar_t checkMe)
1465 {
1466         return ((checkMe == EXTDOS) || (checkMe == FDISK_EXTLBA));
1467 }
1468 
1469 
1470 /*
1471  * isBootPart()
1472  *      Boolean function.  Give it the systid field for an fdisk partition
1473  *      and it decides if that's a systid that describes a Solaris boot
1474  *      partition.
1475  */
1476 static int
1477 isBootPart(uchar_t checkMe)
1478 {
1479         return (checkMe == X86BOOT);
1480 }
1481 
1482 
1483 /*
1484  * noLogicalDrive()
1485  *      Display error message about not being able to find a logical
1486  *      drive.
1487  */
1488 static void
1489 noLogicalDrive(int ldrive)
1490 {
1491         if (ldrive == BOOT_PARTITION_DRIVE) {
1492                 cmn_err(CE_NOTE, "!pcfs: no boot partition");
1493         } else {
1494                 cmn_err(CE_NOTE, "!pcfs: %d: no such logical drive", ldrive);
1495         }
1496 }
1497 
1498 
1499 /*
1500  * findTheDrive()
1501  *      Discover offset of the requested logical drive, and return
1502  *      that offset (startSector), the systid of that drive (sysid),
1503  *      and a buffer pointer (bp), with the buffer contents being
1504  *      the first sector of the logical drive (i.e., the sector that
1505  *      contains the BPB for that drive).
1506  *
1507  * Note: this code is not capable of addressing >2TB disks, as it uses
1508  *       daddr_t not diskaddr_t, some of the calculations would overflow
1509  */
1510 #define COPY_PTBL(mbr, ptblp)                                   \
1511         bcopy(&(((struct mboot *)(mbr))->parts), (ptblp),        \
1512             FD_NUMPART * sizeof (struct ipart))
1513 
1514 static int
1515 findTheDrive(struct pcfs *fsp, buf_t **bp)
1516 {
1517         int ldrive = fsp->pcfs_ldrive;
1518         dev_t dev = fsp->pcfs_devvp->v_rdev;
1519 
1520         struct ipart dosp[FD_NUMPART];  /* incore fdisk partition structure */
1521         daddr_t lastseek = 0;           /* Disk block we sought previously */
1522         daddr_t diskblk = 0;            /* Disk block to get */
1523         daddr_t xstartsect;             /* base of Extended DOS partition */
1524         int logicalDriveCount = 0;      /* Count of logical drives seen */
1525         int extendedPart = -1;          /* index of extended dos partition */
1526         int primaryPart = -1;           /* index of primary dos partition */
1527         int bootPart = -1;              /* index of a Solaris boot partition */
1528         uint32_t xnumsect = 0;          /* length of extended DOS partition */
1529         int driveIndex;                 /* computed FDISK table index */
1530         daddr_t startsec;
1531         len_t mediasize;
1532         int i;
1533         /*
1534          * Count of drives in the current extended partition's
1535          * FDISK table, and indexes of the drives themselves.
1536          */
1537         int extndDrives[FD_NUMPART];
1538         int numDrives = 0;
1539 
1540         /*
1541          * Count of drives (beyond primary) in master boot record's
1542          * FDISK table, and indexes of the drives themselves.
1543          */
1544         int extraDrives[FD_NUMPART];
1545         int numExtraDrives = 0;
1546 
1547         /*
1548          * "ldrive == 0" should never happen, as this is a request to
1549          * mount the physical device (and ignore partitioning). The code
1550          * in pcfs_mount() should have made sure that a logical drive number
1551          * is at least 1, meaning we're looking for drive "C:". It is not
1552          * safe (and a bug in the callers of this function) to request logical
1553          * drive number 0; we could ASSERT() but a graceful EIO is a more
1554          * polite way.
1555          */
1556         if (ldrive == 0) {
1557                 cmn_err(CE_NOTE, "!pcfs: request for logical partition zero");
1558                 noLogicalDrive(ldrive);
1559                 return (EIO);
1560         }
1561 
1562         /*
1563          *  Copy from disk block into memory aligned structure for fdisk usage.
1564          */
1565         COPY_PTBL((*bp)->b_un.b_addr, dosp);
1566 
1567         /*
1568          * This check is ok because a FAT BPB and a master boot record (MBB)
1569          * have the same signature, in the same position within the block.
1570          */
1571         if (bpb_get_BPBSig((*bp)->b_un.b_addr) != MBB_MAGIC) {
1572                 cmn_err(CE_NOTE, "!pcfs: MBR partition table signature err, "
1573                     "device (%x.%x):%d\n",
1574                     getmajor(dev), getminor(dev), ldrive);
1575                 return (EINVAL);
1576         }
1577 
1578         /*
1579          * Get a summary of what is in the Master FDISK table.
1580          * Normally we expect to find one partition marked as a DOS drive.
1581          * This partition is the one Windows calls the primary dos partition.
1582          * If the machine has any logical drives then we also expect
1583          * to find a partition marked as an extended DOS partition.
1584          *
1585          * Sometimes we'll find multiple partitions marked as DOS drives.
1586          * The Solaris fdisk program allows these partitions
1587          * to be created, but Windows fdisk no longer does.  We still need
1588          * to support these, though, since Windows does.  We also need to fix
1589          * our fdisk to behave like the Windows version.
1590          *
1591          * It turns out that some off-the-shelf media have *only* an
1592          * Extended partition, so we need to deal with that case as well.
1593          *
1594          * Only a single (the first) Extended or Boot Partition will
1595          * be recognized.  Any others will be ignored.
1596          */
1597         for (i = 0; i < FD_NUMPART; i++) {
1598                 DTRACE_PROBE4(primarypart, struct pcfs *, fsp,
1599                     uint_t, (uint_t)dosp[i].systid,
1600                     uint_t, LE_32(dosp[i].relsect),
1601                     uint_t, LE_32(dosp[i].numsect));
1602 
1603                 if (isDosDrive(dosp[i].systid)) {
1604                         if (primaryPart < 0) {
1605                                 logicalDriveCount++;
1606                                 primaryPart = i;
1607                         } else {
1608                                 extraDrives[numExtraDrives++] = i;
1609                         }
1610                         continue;
1611                 }
1612                 if ((extendedPart < 0) && isDosExtended(dosp[i].systid)) {
1613                         extendedPart = i;
1614                         continue;
1615                 }
1616                 if ((bootPart < 0) && isBootPart(dosp[i].systid)) {
1617                         bootPart = i;
1618                         continue;
1619                 }
1620         }
1621 
1622         if (ldrive == BOOT_PARTITION_DRIVE) {
1623                 if (bootPart < 0) {
1624                         noLogicalDrive(ldrive);
1625                         return (EINVAL);
1626                 }
1627                 startsec = LE_32(dosp[bootPart].relsect);
1628                 mediasize = LE_32(dosp[bootPart].numsect);
1629                 goto found;
1630         }
1631 
1632         if (ldrive == PRIMARY_DOS_DRIVE && primaryPart >= 0) {
1633                 startsec = LE_32(dosp[primaryPart].relsect);
1634                 mediasize = LE_32(dosp[primaryPart].numsect);
1635                 goto found;
1636         }
1637 
1638         /*
1639          * We are not looking for the C: drive (or the primary drive
1640          * was not found), so we had better have an extended partition
1641          * or extra drives in the Master FDISK table.
1642          */
1643         if ((extendedPart < 0) && (numExtraDrives == 0)) {
1644                 cmn_err(CE_NOTE, "!pcfs: no extended dos partition");
1645                 noLogicalDrive(ldrive);
1646                 return (EINVAL);
1647         }
1648 
1649         if (extendedPart >= 0) {
1650                 diskblk = xstartsect = LE_32(dosp[extendedPart].relsect);
1651                 xnumsect = LE_32(dosp[extendedPart].numsect);
1652                 do {
1653                         /*
1654                          *  If the seek would not cause us to change
1655                          *  position on the drive, then we're out of
1656                          *  extended partitions to examine.
1657                          */
1658                         if (diskblk == lastseek)
1659                                 break;
1660                         logicalDriveCount += numDrives;
1661                         /*
1662                          *  Seek the next extended partition, and find
1663                          *  logical drives within it.
1664                          */
1665                         brelse(*bp);
1666                         /*
1667                          * bread() block numbers are multiples of DEV_BSIZE
1668                          * but the device sector size (the unit of partitioning)
1669                          * might be larger than that; pcfs_get_device_info()
1670                          * has calculated the multiplicator for us.
1671                          */
1672                         *bp = bread(dev,
1673                             pc_dbdaddr(fsp, diskblk), fsp->pcfs_secsize);
1674                         if ((*bp)->b_flags & B_ERROR) {
1675                                 return (EIO);
1676                         }
1677 
1678                         lastseek = diskblk;
1679                         COPY_PTBL((*bp)->b_un.b_addr, dosp);
1680                         if (bpb_get_BPBSig((*bp)->b_un.b_addr) != MBB_MAGIC) {
1681                                 cmn_err(CE_NOTE, "!pcfs: "
1682                                     "extended partition table signature err, "
1683                                     "device (%x.%x):%d, LBA %u",
1684                                     getmajor(dev), getminor(dev), ldrive,
1685                                     (uint_t)pc_dbdaddr(fsp, diskblk));
1686                                 return (EINVAL);
1687                         }
1688                         /*
1689                          *  Count up drives, and track where the next
1690                          *  extended partition is in case we need it.  We
1691                          *  are expecting only one extended partition.  If
1692                          *  there is more than one we'll only go to the
1693                          *  first one we see, but warn about ignoring.
1694                          */
1695                         numDrives = 0;
1696                         for (i = 0; i < FD_NUMPART; i++) {
1697                                 DTRACE_PROBE4(extendedpart,
1698                                     struct pcfs *, fsp,
1699                                     uint_t, (uint_t)dosp[i].systid,
1700                                     uint_t, LE_32(dosp[i].relsect),
1701                                     uint_t, LE_32(dosp[i].numsect));
1702                                 if (isDosDrive(dosp[i].systid)) {
1703                                         extndDrives[numDrives++] = i;
1704                                 } else if (isDosExtended(dosp[i].systid)) {
1705                                         if (diskblk != lastseek) {
1706                                                 /*
1707                                                  * Already found an extended
1708                                                  * partition in this table.
1709                                                  */
1710                                                 cmn_err(CE_NOTE,
1711                                                     "!pcfs: ignoring unexpected"
1712                                                     " additional extended"
1713                                                     " partition");
1714                                         } else {
1715                                                 diskblk = xstartsect +
1716                                                     LE_32(dosp[i].relsect);
1717                                         }
1718                                 }
1719                         }
1720                 } while (ldrive > logicalDriveCount + numDrives);
1721 
1722                 ASSERT(numDrives <= FD_NUMPART);
1723 
1724                 if (ldrive <= logicalDriveCount + numDrives) {
1725                         /*
1726                          * The number of logical drives we've found thus
1727                          * far is enough to get us to the one we were
1728                          * searching for.
1729                          */
1730                         driveIndex = logicalDriveCount + numDrives - ldrive;
1731                         mediasize =
1732                             LE_32(dosp[extndDrives[driveIndex]].numsect);
1733                         startsec =
1734                             LE_32(dosp[extndDrives[driveIndex]].relsect) +
1735                             lastseek;
1736                         if (startsec > (xstartsect + xnumsect)) {
1737                                 cmn_err(CE_NOTE, "!pcfs: extended partition "
1738                                     "values bad");
1739                                 return (EINVAL);
1740                         }
1741                         goto found;
1742                 } else {
1743                         /*
1744                          * We ran out of extended dos partition
1745                          * drives.  The only hope now is to go
1746                          * back to extra drives defined in the master
1747                          * fdisk table.  But we overwrote that table
1748                          * already, so we must load it in again.
1749                          */
1750                         logicalDriveCount += numDrives;
1751                         brelse(*bp);
1752                         ASSERT(fsp->pcfs_dosstart == 0);
1753                         *bp = bread(dev, pc_dbdaddr(fsp, fsp->pcfs_dosstart),
1754                             fsp->pcfs_secsize);
1755                         if ((*bp)->b_flags & B_ERROR) {
1756                                 return (EIO);
1757                         }
1758                         COPY_PTBL((*bp)->b_un.b_addr, dosp);
1759                 }
1760         }
1761         /*
1762          *  Still haven't found the drive, is it an extra
1763          *  drive defined in the main FDISK table?
1764          */
1765         if (ldrive <= logicalDriveCount + numExtraDrives) {
1766                 driveIndex = logicalDriveCount + numExtraDrives - ldrive;
1767                 ASSERT(driveIndex < MIN(numExtraDrives, FD_NUMPART));
1768                 mediasize = LE_32(dosp[extraDrives[driveIndex]].numsect);
1769                 startsec = LE_32(dosp[extraDrives[driveIndex]].relsect);
1770                 goto found;
1771         }
1772         /*
1773          *  Still haven't found the drive, and there is
1774          *  nowhere else to look.
1775          */
1776         noLogicalDrive(ldrive);
1777         return (EINVAL);
1778 
1779 found:
1780         /*
1781          * We need this value in units of sectorsize, because PCFS' internal
1782          * offset calculations go haywire for > 512Byte sectors unless all
1783          * pcfs_.*start values are in units of sectors.
1784          * So, assign before the capacity check (that's done in DEV_BSIZE)
1785          */
1786         fsp->pcfs_dosstart = startsec;
1787 
1788         /*
1789          * convert from device sectors to proper units:
1790          *      - starting sector: DEV_BSIZE (as argument to bread())
1791          *      - media size: Bytes
1792          */
1793         startsec = pc_dbdaddr(fsp, startsec);
1794         mediasize *= fsp->pcfs_secsize;
1795 
1796         /*
1797          * some additional validation / warnings in case the partition table
1798          * and the actual media capacity are not in accordance ...
1799          */
1800         if (fsp->pcfs_mediasize != 0) {
1801                 diskaddr_t startoff =
1802                     (diskaddr_t)startsec * (diskaddr_t)DEV_BSIZE;
1803 
1804                 if (startoff >= fsp->pcfs_mediasize ||
1805                     startoff + mediasize > fsp->pcfs_mediasize) {
1806                         cmn_err(CE_WARN,
1807                             "!pcfs: partition size (LBA start %u, %lld bytes, "
1808                             "device (%x.%x):%d) smaller than "
1809                             "mediasize (%lld bytes).\n"
1810                             "filesystem may be truncated, access errors "
1811                             "may result.\n",
1812                             (uint_t)startsec, (long long)mediasize,
1813                             getmajor(fsp->pcfs_xdev), getminor(fsp->pcfs_xdev),
1814                             fsp->pcfs_ldrive, (long long)fsp->pcfs_mediasize);
1815                 }
1816         } else {
1817                 fsp->pcfs_mediasize = mediasize;
1818         }
1819 
1820         return (0);
1821 }
1822 
1823 
1824 static fattype_t
1825 secondaryBPBChecks(struct pcfs *fsp, uchar_t *bpb, size_t secsize)
1826 {
1827         uint32_t ncl = fsp->pcfs_ncluster;
1828 
1829         if (ncl <= 4096) {
1830                 if (bpb_get_FatSz16(bpb) == 0)
1831                         return (FAT_UNKNOWN);
1832 
1833                 if (bpb_get_FatSz16(bpb) * secsize < ncl * 2 &&
1834                     bpb_get_FatSz16(bpb) * secsize >= (3 * ncl / 2))
1835                         return (FAT12);
1836                 if (bcmp(bpb_FilSysType16(bpb), "FAT12", 5) == 0)
1837                         return (FAT12);
1838                 if (bcmp(bpb_FilSysType16(bpb), "FAT16", 5) == 0)
1839                         return (FAT16);
1840 
1841                 switch (bpb_get_Media(bpb)) {
1842                         case SS8SPT:
1843                         case DS8SPT:
1844                         case SS9SPT:
1845                         case DS9SPT:
1846                         case DS18SPT:
1847                         case DS9_15SPT:
1848                                 /*
1849                                  * Is this reliable - all floppies are FAT12 ?
1850                                  */
1851                                 return (FAT12);
1852                         case MD_FIXED:
1853                                 /*
1854                                  * Is this reliable - disks are always FAT16 ?
1855                                  */
1856                                 return (FAT16);
1857                         default:
1858                                 break;
1859                 }
1860         } else if (ncl <= 65536) {
1861                 if (bpb_get_FatSz16(bpb) == 0 && bpb_get_FatSz32(bpb) > 0)
1862                         return (FAT32);
1863                 if (VALID_BOOTSIG(bpb_get_BootSig32(bpb)))
1864                         return (FAT32);
1865                 if (VALID_FSTYPSTR32(bpb_FilSysType32(bpb)))
1866                         return (FAT32);
1867 
1868                 if (VALID_BOOTSIG(bpb_get_BootSig16(bpb)))
1869                         return (FAT16);
1870                 if (bpb_get_FatSz16(bpb) * secsize < ncl * 4)
1871                         return (FAT16);
1872         }
1873 
1874         /*
1875          * We don't know
1876          */
1877         return (FAT_UNKNOWN);
1878 }
1879 
1880 /*
1881  * Check to see if the BPB we found is correct.
1882  *
1883  * This looks far more complicated that it needs to be for pure structural
1884  * validation. The reason for this is that parseBPB() is also used for
1885  * debugging purposes (mdb dcmd) and we therefore want a bitmap of which
1886  * BPB fields (do not) have 'known good' values, even if we (do not) reject
1887  * the BPB when attempting to mount the filesystem.
1888  *
1889  * Real-world usage of FAT shows there are a lot of corner-case situations
1890  * and, following the specification strictly, invalid filesystems out there.
1891  * Known are situations such as:
1892  *      - FAT12/FAT16 filesystems with garbage in either totsec16/32
1893  *        instead of the zero in one of the fields mandated by the spec
1894  *      - filesystems that claim to be larger than the partition they're in
1895  *      - filesystems without valid media descriptor
1896  *      - FAT32 filesystems with RootEntCnt != 0
1897  *      - FAT32 filesystems with less than 65526 clusters
1898  *      - FAT32 filesystems without valid FSI sector
1899  *      - FAT32 filesystems with FAT size in fatsec16 instead of fatsec32
1900  *
1901  * Such filesystems are accessible by PCFS - if it'd know to start with that
1902  * the filesystem should be treated as a specific FAT type. Before S10, it
1903  * relied on the PC/fdisk partition type for the purpose and almost completely
1904  * ignored the BPB; now it ignores the partition type for anything else but
1905  * logical drive enumeration, which can result in rejection of (invalid)
1906  * FAT32 - if the partition ID says FAT32, but the filesystem, for example
1907  * has less than 65526 clusters.
1908  *
1909  * Without a "force this fs as FAT{12,16,32}" tunable or mount option, it's
1910  * not possible to allow all such mostly-compliant filesystems in unless one
1911  * accepts false positives (definitely invalid filesystems that cause problems
1912  * later). This at least allows to pinpoint why the mount failed.
1913  *
1914  * Due to the use of FAT on removeable media, all relaxations of the rules
1915  * here need to be carefully evaluated wrt. to potential effects on PCFS
1916  * resilience. A faulty/"mis-crafted" filesystem must not cause a panic, so
1917  * beware.
1918  */
1919 static int
1920 parseBPB(struct pcfs *fsp, uchar_t *bpb, int *valid)
1921 {
1922         fattype_t type;
1923 
1924         uint32_t        ncl;    /* number of clusters in file area */
1925         uint32_t        rec;
1926         uint32_t        reserved;
1927         uint32_t        fsisec, bkbootsec;
1928         blkcnt_t        totsec, totsec16, totsec32, datasec;
1929         size_t          fatsec, fatsec16, fatsec32, rdirsec;
1930         size_t          secsize;
1931         len_t           mediasize;
1932         uint64_t        validflags = 0;
1933 
1934         if (VALID_BPBSIG(bpb_get_BPBSig(bpb)))
1935                 validflags |= BPB_BPBSIG_OK;
1936 
1937         rec = bpb_get_RootEntCnt(bpb);
1938         reserved = bpb_get_RsvdSecCnt(bpb);
1939         fsisec = bpb_get_FSInfo32(bpb);
1940         bkbootsec = bpb_get_BkBootSec32(bpb);
1941         totsec16 = (blkcnt_t)bpb_get_TotSec16(bpb);
1942         totsec32 = (blkcnt_t)bpb_get_TotSec32(bpb);
1943         fatsec16 = bpb_get_FatSz16(bpb);
1944         fatsec32 = bpb_get_FatSz32(bpb);
1945 
1946         totsec = totsec16 ? totsec16 : totsec32;
1947         fatsec = fatsec16 ? fatsec16 : fatsec32;
1948 
1949         secsize = bpb_get_BytesPerSec(bpb);
1950         if (!VALID_SECSIZE(secsize))
1951                 secsize = fsp->pcfs_secsize;
1952         if (secsize != fsp->pcfs_secsize) {
1953                 PC_DPRINTF3(3, "!pcfs: parseBPB, device (%x.%x):%d:\n",
1954                     getmajor(fsp->pcfs_xdev),
1955                     getminor(fsp->pcfs_xdev), fsp->pcfs_ldrive);
1956                 PC_DPRINTF2(3, "!BPB secsize %d != "
1957                     "autodetected media block size %d\n",
1958                     (int)secsize, (int)fsp->pcfs_secsize);
1959                 if (fsp->pcfs_ldrive) {
1960                         /*
1961                          * We've already attempted to parse the partition
1962                          * table. If the block size used for that don't match
1963                          * the PCFS sector size, we're hosed one way or the
1964                          * other. Just try what happens.
1965                          */
1966                         secsize = fsp->pcfs_secsize;
1967                         PC_DPRINTF1(3,
1968                             "!pcfs: Using autodetected secsize %d\n",
1969                             (int)secsize);
1970                 } else {
1971                         /*
1972                          * This allows mounting lofi images of PCFS partitions
1973                          * with sectorsize != DEV_BSIZE. We can't parse the
1974                          * partition table on whole-disk images unless the
1975                          * (undocumented) "secsize=..." mount option is used,
1976                          * but at least this allows us to mount if we have
1977                          * an image of a partition.
1978                          */
1979                         PC_DPRINTF1(3,
1980                             "!pcfs: Using BPB secsize %d\n", (int)secsize);
1981                 }
1982         }
1983 
1984         if (fsp->pcfs_mediasize == 0) {
1985                 mediasize = (len_t)totsec * (len_t)secsize;
1986                 /*
1987                  * This is not an error because not all devices support the
1988                  * dkio(7i) mediasize queries, and/or not all devices are
1989                  * partitioned. If we have not been able to figure out the
1990                  * size of the underlaying medium, we have to trust the BPB.
1991                  */
1992                 PC_DPRINTF4(3, "!pcfs: parseBPB: mediasize autodetect failed "
1993                     "on device (%x.%x):%d, trusting BPB totsec (%lld Bytes)\n",
1994                     getmajor(fsp->pcfs_xdev), getminor(fsp->pcfs_xdev),
1995                     fsp->pcfs_ldrive, (long long)fsp->pcfs_mediasize);
1996         } else if ((len_t)totsec * (len_t)secsize > fsp->pcfs_mediasize) {
1997                 cmn_err(CE_WARN,
1998                     "!pcfs: autodetected mediasize (%lld Bytes) smaller than "
1999                     "FAT BPB mediasize (%lld Bytes).\n"
2000                     "truncated filesystem on device (%x.%x):%d, access errors "
2001                     "possible.\n",
2002                     (long long)fsp->pcfs_mediasize,
2003                     (long long)(totsec * (blkcnt_t)secsize),
2004                     getmajor(fsp->pcfs_xdev), getminor(fsp->pcfs_xdev),
2005                     fsp->pcfs_ldrive);
2006                 mediasize = fsp->pcfs_mediasize;
2007         } else {
2008                 /*
2009                  * This is actually ok. A FAT needs not occupy the maximum
2010                  * space available in its partition, it can be shorter.
2011                  */
2012                 mediasize = (len_t)totsec * (len_t)secsize;
2013         }
2014 
2015         /*
2016          * Since we let just about anything pass through this function,
2017          * fence against divide-by-zero here.
2018          */
2019         if (secsize)
2020                 rdirsec = roundup(rec * 32, secsize) / secsize;
2021         else
2022                 rdirsec = 0;
2023 
2024         /*
2025          * This assignment is necessary before pc_dbdaddr() can first be
2026          * used. Must initialize the value here.
2027          */
2028         fsp->pcfs_secsize = secsize;
2029         fsp->pcfs_sdshift = ddi_ffs(secsize / DEV_BSIZE) - 1;
2030 
2031         fsp->pcfs_mediasize = mediasize;
2032 
2033         fsp->pcfs_spcl = bpb_get_SecPerClus(bpb);
2034         fsp->pcfs_numfat = bpb_get_NumFATs(bpb);
2035         fsp->pcfs_mediadesc = bpb_get_Media(bpb);
2036         fsp->pcfs_clsize = secsize * fsp->pcfs_spcl;
2037         fsp->pcfs_rdirsec = rdirsec;
2038 
2039         /*
2040          * Remember: All PCFS offset calculations in sectors. Before I/O
2041          * is done, convert to DEV_BSIZE units via pc_dbdaddr(). This is
2042          * necessary so that media with > 512Byte sector sizes work correctly.
2043          */
2044         fsp->pcfs_fatstart = fsp->pcfs_dosstart + reserved;
2045         fsp->pcfs_rdirstart = fsp->pcfs_fatstart + fsp->pcfs_numfat * fatsec;
2046         fsp->pcfs_datastart = fsp->pcfs_rdirstart + rdirsec;
2047         datasec = totsec -
2048             (blkcnt_t)fatsec * fsp->pcfs_numfat -
2049             (blkcnt_t)rdirsec -
2050             (blkcnt_t)reserved;
2051 
2052         DTRACE_PROBE4(fatgeometry,
2053             blkcnt_t, totsec, size_t, fatsec,
2054             size_t, rdirsec, blkcnt_t, datasec);
2055 
2056         /*
2057          * 'totsec' is taken directly from the BPB and guaranteed to fit
2058          * into a 32bit unsigned integer. The calculation of 'datasec',
2059          * on the other hand, could underflow for incorrect values in
2060          * rdirsec/reserved/fatsec. Check for that.
2061          * We also check that the BPB conforms to the FAT specification's
2062          * requirement that either of the 16/32bit total sector counts
2063          * must be zero.
2064          */
2065         if (totsec != 0 &&
2066             (totsec16 == totsec32 || totsec16 == 0 || totsec32 == 0) &&
2067             datasec < totsec && datasec <= UINT32_MAX)
2068                 validflags |= BPB_TOTSEC_OK;
2069 
2070         if ((len_t)totsec * (len_t)secsize <= mediasize)
2071                 validflags |= BPB_MEDIASZ_OK;
2072 
2073         if (VALID_SECSIZE(secsize))
2074                 validflags |= BPB_SECSIZE_OK;
2075         if (VALID_SPCL(fsp->pcfs_spcl))
2076                 validflags |= BPB_SECPERCLUS_OK;
2077         if (VALID_CLSIZE(fsp->pcfs_clsize))
2078                 validflags |= BPB_CLSIZE_OK;
2079         if (VALID_NUMFATS(fsp->pcfs_numfat))
2080                 validflags |= BPB_NUMFAT_OK;
2081         if (VALID_RSVDSEC(reserved) && reserved < totsec)
2082                 validflags |= BPB_RSVDSECCNT_OK;
2083         if (VALID_MEDIA(fsp->pcfs_mediadesc))
2084                 validflags |= BPB_MEDIADESC_OK;
2085         if (VALID_BOOTSIG(bpb_get_BootSig16(bpb)))
2086                 validflags |= BPB_BOOTSIG16_OK;
2087         if (VALID_BOOTSIG(bpb_get_BootSig32(bpb)))
2088                 validflags |= BPB_BOOTSIG32_OK;
2089         if (VALID_FSTYPSTR16(bpb_FilSysType16(bpb)))
2090                 validflags |= BPB_FSTYPSTR16_OK;
2091         if (VALID_FSTYPSTR32(bpb_FilSysType32(bpb)))
2092                 validflags |= BPB_FSTYPSTR32_OK;
2093         if (VALID_OEMNAME(bpb_OEMName(bpb)))
2094                 validflags |= BPB_OEMNAME_OK;
2095         if (bkbootsec > 0 && bkbootsec <= reserved && fsisec != bkbootsec)
2096                 validflags |= BPB_BKBOOTSEC_OK;
2097         if (fsisec > 0 && fsisec <= reserved)
2098                 validflags |= BPB_FSISEC_OK;
2099         if (VALID_JMPBOOT(bpb_jmpBoot(bpb)))
2100                 validflags |= BPB_JMPBOOT_OK;
2101         if (VALID_FSVER32(bpb_get_FSVer32(bpb)))
2102                 validflags |= BPB_FSVER_OK;
2103         if (VALID_VOLLAB(bpb_VolLab16(bpb)))
2104                 validflags |= BPB_VOLLAB16_OK;
2105         if (VALID_VOLLAB(bpb_VolLab32(bpb)))
2106                 validflags |= BPB_VOLLAB32_OK;
2107         if (VALID_EXTFLAGS(bpb_get_ExtFlags32(bpb)))
2108                 validflags |= BPB_EXTFLAGS_OK;
2109 
2110         /*
2111          * Try to determine which FAT format to use.
2112          *
2113          * Calculate the number of clusters in order to determine
2114          * the type of FAT we are looking at.  This is the only
2115          * recommended way of determining FAT type, though there
2116          * are other hints in the data, this is the best way.
2117          *
2118          * Since we let just about "anything" pass through this function
2119          * without early exits, fence against divide-by-zero here.
2120          *
2121          * datasec was already validated against UINT32_MAX so we know
2122          * the result will not overflow the 32bit calculation.
2123          */
2124         if (fsp->pcfs_spcl)
2125                 ncl = (uint32_t)datasec / fsp->pcfs_spcl;
2126         else
2127                 ncl = 0;
2128 
2129         fsp->pcfs_ncluster = ncl;
2130 
2131         /*
2132          * From the Microsoft FAT specification:
2133          * In the following example, when it says <, it does not mean <=.
2134          * Note also that the numbers are correct.  The first number for
2135          * FAT12 is 4085; the second number for FAT16 is 65525. These numbers
2136          * and the '<' signs are not wrong.
2137          *
2138          * We "specialdetect" the corner cases, and use at least one "extra"
2139          * criterion to decide whether it's FAT16 or FAT32 if the cluster
2140          * count is dangerously close to the boundaries.
2141          */
2142 
2143         if (ncl <= PCF_FIRSTCLUSTER) {
2144                 type = FAT_UNKNOWN;
2145         } else if (ncl < 4085) {
2146                 type = FAT12;
2147         } else if (ncl <= 4096) {
2148                 type = FAT_QUESTIONABLE;
2149         } else if (ncl < 65525) {
2150                 type = FAT16;
2151         } else if (ncl <= 65536) {
2152                 type = FAT_QUESTIONABLE;
2153         } else if (ncl < PCF_LASTCLUSTER32) {
2154                 type = FAT32;
2155         } else {
2156                 type = FAT_UNKNOWN;
2157         }
2158 
2159         DTRACE_PROBE4(parseBPB__initial,
2160             struct pcfs *, fsp, unsigned char *, bpb,
2161             int, validflags, fattype_t, type);
2162 
2163 recheck:
2164         fsp->pcfs_fatsec = fatsec;
2165 
2166         /* Do some final sanity checks for each specific type of FAT */
2167         switch (type) {
2168                 case FAT12:
2169                         if (rec != 0)
2170                                 validflags |= BPB_ROOTENTCNT_OK;
2171                         if ((blkcnt_t)bpb_get_TotSec16(bpb) == totsec ||
2172                             bpb_get_TotSec16(bpb) == 0)
2173                                 validflags |= BPB_TOTSEC16_OK;
2174                         if ((blkcnt_t)bpb_get_TotSec32(bpb) == totsec ||
2175                             bpb_get_TotSec32(bpb) == 0)
2176                                 validflags |= BPB_TOTSEC32_OK;
2177                         if (bpb_get_FatSz16(bpb) == fatsec)
2178                                 validflags |= BPB_FATSZ16_OK;
2179                         if (fatsec * secsize >= (ncl + PCF_FIRSTCLUSTER)
2180                             * 3 / 2)
2181                                 validflags |= BPB_FATSZ_OK;
2182                         if (ncl < 4085)
2183                                 validflags |= BPB_NCLUSTERS_OK;
2184 
2185                         fsp->pcfs_lastclmark = (PCF_LASTCLUSTER & 0xfff);
2186                         fsp->pcfs_rootblksize =
2187                             fsp->pcfs_rdirsec * secsize;
2188                         fsp->pcfs_fsistart = 0;
2189 
2190                         if ((validflags & FAT12_VALIDMSK) != FAT12_VALIDMSK)
2191                                 type = FAT_UNKNOWN;
2192                         break;
2193                 case FAT16:
2194                         if (rec != 0)
2195                                 validflags |= BPB_ROOTENTCNT_OK;
2196                         if ((blkcnt_t)bpb_get_TotSec16(bpb) == totsec ||
2197                             bpb_get_TotSec16(bpb) == 0)
2198                                 validflags |= BPB_TOTSEC16_OK;
2199                         if ((blkcnt_t)bpb_get_TotSec32(bpb) == totsec ||
2200                             bpb_get_TotSec32(bpb) == 0)
2201                                 validflags |= BPB_TOTSEC32_OK;
2202                         if (bpb_get_FatSz16(bpb) == fatsec)
2203                                 validflags |= BPB_FATSZ16_OK;
2204                         if (fatsec * secsize >= (ncl + PCF_FIRSTCLUSTER) * 2)
2205                                 validflags |= BPB_FATSZ_OK;
2206                         if (ncl >= 4085 && ncl < 65525)
2207                                 validflags |= BPB_NCLUSTERS_OK;
2208 
2209                         fsp->pcfs_lastclmark = PCF_LASTCLUSTER;
2210                         fsp->pcfs_rootblksize =
2211                             fsp->pcfs_rdirsec * secsize;
2212                         fsp->pcfs_fsistart = 0;
2213 
2214                         if ((validflags & FAT16_VALIDMSK) != FAT16_VALIDMSK)
2215                                 type = FAT_UNKNOWN;
2216                         break;
2217                 case FAT32:
2218                         if (rec == 0)
2219                                 validflags |= BPB_ROOTENTCNT_OK;
2220                         if (bpb_get_TotSec16(bpb) == 0)
2221                                 validflags |= BPB_TOTSEC16_OK;
2222                         if ((blkcnt_t)bpb_get_TotSec32(bpb) == totsec)
2223                                 validflags |= BPB_TOTSEC32_OK;
2224                         if (bpb_get_FatSz16(bpb) == 0)
2225                                 validflags |= BPB_FATSZ16_OK;
2226                         if (bpb_get_FatSz32(bpb) == fatsec)
2227                                 validflags |= BPB_FATSZ32_OK;
2228                         if (fatsec * secsize >= (ncl + PCF_FIRSTCLUSTER) * 4)
2229                                 validflags |= BPB_FATSZ_OK;
2230                         if (ncl >= 65525 && ncl < PCF_LASTCLUSTER32)
2231                                 validflags |= BPB_NCLUSTERS_OK;
2232 
2233                         fsp->pcfs_lastclmark = PCF_LASTCLUSTER32;
2234                         fsp->pcfs_rootblksize = fsp->pcfs_clsize;
2235                         fsp->pcfs_fsistart = fsp->pcfs_dosstart + fsisec;
2236                         if (validflags & BPB_FSISEC_OK)
2237                                 fsp->pcfs_flags |= PCFS_FSINFO_OK;
2238                         fsp->pcfs_rootclnum = bpb_get_RootClus32(bpb);
2239                         if (pc_validcl(fsp, fsp->pcfs_rootclnum))
2240                                 validflags |= BPB_ROOTCLUSTER_OK;
2241 
2242                         /*
2243                          * Current PCFS code only works if 'pcfs_rdirstart'
2244                          * contains the root cluster number on FAT32.
2245                          * That's a mis-use and would better be changed.
2246                          */
2247                         fsp->pcfs_rdirstart = (daddr_t)fsp->pcfs_rootclnum;
2248 
2249                         if ((validflags & FAT32_VALIDMSK) != FAT32_VALIDMSK)
2250                                 type = FAT_UNKNOWN;
2251                         break;
2252                 case FAT_QUESTIONABLE:
2253                         type = secondaryBPBChecks(fsp, bpb, secsize);
2254                         goto recheck;
2255                 default:
2256                         ASSERT(type == FAT_UNKNOWN);
2257                         break;
2258         }
2259 
2260         ASSERT(type != FAT_QUESTIONABLE);
2261 
2262         fsp->pcfs_fattype = type;
2263 
2264         if (valid)
2265                 *valid = validflags;
2266 
2267         DTRACE_PROBE4(parseBPB__final,
2268             struct pcfs *, fsp, unsigned char *, bpb,
2269             int, validflags, fattype_t, type);
2270 
2271         if (type != FAT_UNKNOWN) {
2272                 ASSERT((secsize & (DEV_BSIZE - 1)) == 0);
2273                 ASSERT(ISP2(secsize / DEV_BSIZE));
2274                 return (1);
2275         }
2276 
2277         return (0);
2278 }
2279 
2280 
2281 /*
2282  * Detect the device's native block size (sector size).
2283  *
2284  * Test whether the device is:
2285  *      - a floppy device from a known controller type via DKIOCINFO
2286  *      - a real floppy using the fd(7d) driver and capable of fdio(7I) ioctls
2287  *      - a USB floppy drive (identified by drive geometry)
2288  *
2289  * Detecting a floppy will make PCFS metadata updates on such media synchronous,
2290  * to minimize risks due to slow I/O and user hotplugging / device ejection.
2291  *
2292  * This might be a bit wasteful on kernel stack space; if anyone's
2293  * bothered by this, kmem_alloc/kmem_free the ioctl arguments...
2294  */
2295 static void
2296 pcfs_device_getinfo(struct pcfs *fsp)
2297 {
2298         dev_t                   rdev = fsp->pcfs_xdev;
2299         int                     error;
2300         union {
2301                 struct dk_minfo         mi;
2302                 struct dk_cinfo         ci;
2303                 struct dk_geom          gi;
2304                 struct fd_char          fc;
2305         } arg;                          /* save stackspace ... */
2306         intptr_t argp = (intptr_t)&arg;
2307         ldi_handle_t            lh;
2308         ldi_ident_t             li;
2309         int isfloppy, isremoveable, ishotpluggable;
2310         cred_t                  *cr = CRED();
2311 
2312         if (ldi_ident_from_dev(rdev, &li))
2313                 goto out;
2314 
2315         error = ldi_open_by_dev(&rdev, OTYP_CHR, FREAD, cr, &lh, li);
2316         ldi_ident_release(li);
2317         if (error)
2318                 goto out;
2319 
2320         /*
2321          * Not sure if this could possibly happen. It'd be a bit like
2322          * VOP_OPEN() changing the passed-in vnode ptr. We're just not
2323          * expecting it, needs some thought if triggered ...
2324          */
2325         ASSERT(fsp->pcfs_xdev == rdev);
2326 
2327         /*
2328          * Check for removeable/hotpluggable media.
2329          */
2330         if (ldi_ioctl(lh, DKIOCREMOVABLE,
2331             (intptr_t)&isremoveable, FKIOCTL, cr, NULL)) {
2332                 isremoveable = 0;
2333         }
2334         if (ldi_ioctl(lh, DKIOCHOTPLUGGABLE,
2335             (intptr_t)&ishotpluggable, FKIOCTL, cr, NULL)) {
2336                 ishotpluggable = 0;
2337         }
2338 
2339         /*
2340          * Make sure we don't use "half-initialized" values if the ioctls fail.
2341          */
2342         if (ldi_ioctl(lh, DKIOCGMEDIAINFO, argp, FKIOCTL, cr, NULL)) {
2343                 bzero(&arg, sizeof (arg));
2344                 fsp->pcfs_mediasize = 0;
2345         } else {
2346                 fsp->pcfs_mediasize =
2347                     (len_t)arg.mi.dki_lbsize *
2348                     (len_t)arg.mi.dki_capacity;
2349         }
2350 
2351         if (VALID_SECSIZE(arg.mi.dki_lbsize)) {
2352                 if (fsp->pcfs_secsize == 0) {
2353                         fsp->pcfs_secsize = arg.mi.dki_lbsize;
2354                         fsp->pcfs_sdshift =
2355                             ddi_ffs(arg.mi.dki_lbsize / DEV_BSIZE) - 1;
2356                 } else {
2357                         PC_DPRINTF4(1, "!pcfs: autodetected media block size "
2358                             "%d, device (%x.%x), different from user-provided "
2359                             "%d. User override - ignoring autodetect result.\n",
2360                             arg.mi.dki_lbsize,
2361                             getmajor(fsp->pcfs_xdev), getminor(fsp->pcfs_xdev),
2362                             fsp->pcfs_secsize);
2363                 }
2364         } else if (arg.mi.dki_lbsize) {
2365                 PC_DPRINTF3(1, "!pcfs: autodetected media block size "
2366                     "%d, device (%x.%x), invalid (not 512, 1024, 2048, 4096). "
2367                     "Ignoring autodetect result.\n",
2368                     arg.mi.dki_lbsize,
2369                     getmajor(fsp->pcfs_xdev), getminor(fsp->pcfs_xdev));
2370         }
2371 
2372         /*
2373          * We treat the following media types as a floppy by default.
2374          */
2375         isfloppy =
2376             (arg.mi.dki_media_type == DK_FLOPPY ||
2377             arg.mi.dki_media_type == DK_ZIP ||
2378             arg.mi.dki_media_type == DK_JAZ);
2379 
2380         /*
2381          * if this device understands fdio(7I) requests it's
2382          * obviously a floppy drive.
2383          */
2384         if (!isfloppy &&
2385             !ldi_ioctl(lh, FDIOGCHAR, argp, FKIOCTL, cr, NULL))
2386                 isfloppy = 1;
2387 
2388         /*
2389          * some devices we like to treat as floppies, but they don't
2390          * understand fdio(7I) requests.
2391          */
2392         if (!isfloppy &&
2393             !ldi_ioctl(lh, DKIOCINFO, argp, FKIOCTL, cr, NULL) &&
2394             (arg.ci.dki_ctype == DKC_WDC2880 ||
2395             arg.ci.dki_ctype == DKC_NCRFLOPPY ||
2396             arg.ci.dki_ctype == DKC_SMSFLOPPY ||
2397             arg.ci.dki_ctype == DKC_INTEL82077))
2398                 isfloppy = 1;
2399 
2400         /*
2401          * This is the "final fallback" test - media with
2402          * 2 heads and 80 cylinders are assumed to be floppies.
2403          * This is normally true for USB floppy drives ...
2404          */
2405         if (!isfloppy &&
2406             !ldi_ioctl(lh, DKIOCGGEOM, argp, FKIOCTL, cr, NULL) &&
2407             (arg.gi.dkg_ncyl == 80 && arg.gi.dkg_nhead == 2))
2408                 isfloppy = 1;
2409 
2410         /*
2411          * This is similar to the "old" PCFS code that sets this flag
2412          * just based on the media descriptor being 0xf8 (MD_FIXED).
2413          * Should be re-worked. We really need some specialcasing for
2414          * removeable media.
2415          */
2416         if (!isfloppy) {
2417                 fsp->pcfs_flags |= PCFS_NOCHK;
2418         }
2419 
2420         /*
2421          * We automatically disable access time updates if the medium is
2422          * removeable and/or hotpluggable, and the admin did not explicitly
2423          * request access time updates (via the "atime" mount option).
2424          * The majority of flash-based media should fit this category.
2425          * Minimizing write access extends the lifetime of your memory stick !
2426          */
2427         if (!vfs_optionisset(fsp->pcfs_vfs, MNTOPT_ATIME, NULL) &&
2428             (isremoveable || ishotpluggable | isfloppy)) {
2429                 fsp->pcfs_flags |= PCFS_NOATIME;
2430         }
2431 
2432         (void) ldi_close(lh, FREAD, cr);
2433 out:
2434         if (fsp->pcfs_secsize == 0) {
2435                 PC_DPRINTF3(1, "!pcfs: media block size autodetection "
2436                     "device (%x.%x) failed, no user-provided fallback. "
2437                     "Using %d bytes.\n",
2438                     getmajor(fsp->pcfs_xdev), getminor(fsp->pcfs_xdev),
2439                     DEV_BSIZE);
2440                 fsp->pcfs_secsize = DEV_BSIZE;
2441                 fsp->pcfs_sdshift = 0;
2442         }
2443         ASSERT(fsp->pcfs_secsize % DEV_BSIZE == 0);
2444         ASSERT(VALID_SECSIZE(fsp->pcfs_secsize));
2445 }
2446 
2447 /*
2448  * Get the FAT type for the DOS medium.
2449  *
2450  * -------------------------
2451  * According to Microsoft:
2452  *   The FAT type one of FAT12, FAT16, or FAT32 is determined by the
2453  * count of clusters on the volume and nothing else.
2454  * -------------------------
2455  *
2456  */
2457 static int
2458 pc_getfattype(struct pcfs *fsp)
2459 {
2460         int error = 0;
2461         buf_t *bp = NULL;
2462         struct vnode *devvp = fsp->pcfs_devvp;
2463         dev_t   dev = devvp->v_rdev;
2464 
2465         /*
2466          * Detect the native block size of the medium, and attempt to
2467          * detect whether the medium is removeable.
2468          * We do treat removable media (floppies, USB and FireWire disks)
2469          * differently wrt. to the frequency and synchronicity of FAT updates.
2470          * We need to know the media block size in order to be able to
2471          * parse the partition table.
2472          */
2473         pcfs_device_getinfo(fsp);
2474 
2475         /*
2476          * Unpartitioned media (floppies and some removeable devices)
2477          * don't have a partition table, the FAT BPB is at disk block 0.
2478          * Start out by reading block 0.
2479          */
2480         fsp->pcfs_dosstart = 0;
2481         bp = bread(dev, pc_dbdaddr(fsp, fsp->pcfs_dosstart), fsp->pcfs_secsize);
2482 
2483         if (error = geterror(bp))
2484                 goto out;
2485 
2486         /*
2487          * If a logical drive number is requested, parse the partition table
2488          * and attempt to locate it. Otherwise, proceed immediately to the
2489          * BPB check. findTheDrive(), if successful, returns the disk block
2490          * number where the requested partition starts in "startsec".
2491          */
2492         if (fsp->pcfs_ldrive != 0) {
2493                 PC_DPRINTF3(5, "!pcfs: pc_getfattype: using FDISK table on "
2494                     "device (%x,%x):%d to find BPB\n",
2495                     getmajor(dev), getminor(dev), fsp->pcfs_ldrive);
2496 
2497                 if (error = findTheDrive(fsp, &bp))
2498                         goto out;
2499 
2500                 ASSERT(fsp->pcfs_dosstart != 0);
2501 
2502                 brelse(bp);
2503                 bp = bread(dev, pc_dbdaddr(fsp, fsp->pcfs_dosstart),
2504                     fsp->pcfs_secsize);
2505                 if (error = geterror(bp))
2506                         goto out;
2507         }
2508 
2509         /*
2510          * Validate the BPB and fill in the instance structure.
2511          */
2512         if (!parseBPB(fsp, (uchar_t *)bp->b_un.b_addr, NULL)) {
2513                 PC_DPRINTF4(1, "!pcfs: pc_getfattype: No FAT BPB on "
2514                     "device (%x.%x):%d, disk LBA %u\n",
2515                     getmajor(dev), getminor(dev), fsp->pcfs_ldrive,
2516                     (uint_t)pc_dbdaddr(fsp, fsp->pcfs_dosstart));
2517                 error = EINVAL;
2518                 goto out;
2519         }
2520 
2521         ASSERT(fsp->pcfs_fattype != FAT_UNKNOWN);
2522 
2523 out:
2524         /*
2525          * Release the buffer used
2526          */
2527         if (bp != NULL)
2528                 brelse(bp);
2529         return (error);
2530 }
2531 
2532 
2533 /*
2534  * Get the file allocation table.
2535  * If there is an old FAT, invalidate it.
2536  */
2537 int
2538 pc_getfat(struct pcfs *fsp)
2539 {
2540         struct buf *bp = NULL;
2541         uchar_t *fatp = NULL;
2542         uchar_t *fat_changemap = NULL;
2543         int error;
2544         int fat_changemapsize;
2545         int flags = 0;
2546         int nfat;
2547         int altfat_mustmatch = 0;
2548         int fatsize = fsp->pcfs_fatsec * fsp->pcfs_secsize;
2549 
2550         if (fsp->pcfs_fatp) {
2551                 /*
2552                  * There is a FAT in core.
2553                  * If there are open file pcnodes or we have modified it or
2554                  * it hasn't timed out yet use the in core FAT.
2555                  * Otherwise invalidate it and get a new one
2556                  */
2557 #ifdef notdef
2558                 if (fsp->pcfs_frefs ||
2559                     (fsp->pcfs_flags & PCFS_FATMOD) ||
2560                     (gethrestime_sec() < fsp->pcfs_fattime)) {
2561                         return (0);
2562                 } else {
2563                         mutex_enter(&pcfslock);
2564                         pc_invalfat(fsp);
2565                         mutex_exit(&pcfslock);
2566                 }
2567 #endif /* notdef */
2568                 return (0);
2569         }
2570 
2571         /*
2572          * Get FAT and check it for validity
2573          */
2574         fatp = kmem_alloc(fatsize, KM_SLEEP);
2575         error = pc_readfat(fsp, fatp);
2576         if (error) {
2577                 flags = B_ERROR;
2578                 goto out;
2579         }
2580         fat_changemapsize = (fatsize / fsp->pcfs_clsize) + 1;
2581         fat_changemap = kmem_zalloc(fat_changemapsize, KM_SLEEP);
2582         fsp->pcfs_fatp = fatp;
2583         fsp->pcfs_fat_changemapsize = fat_changemapsize;
2584         fsp->pcfs_fat_changemap = fat_changemap;
2585 
2586         /*
2587          * The only definite signature check is that the
2588          * media descriptor byte should match the first byte
2589          * of the FAT block.
2590          */
2591         if (fatp[0] != fsp->pcfs_mediadesc) {
2592                 cmn_err(CE_NOTE, "!pcfs: FAT signature mismatch, "
2593                     "media descriptor %x, FAT[0] lowbyte %x\n",
2594                     (uint32_t)fsp->pcfs_mediadesc, (uint32_t)fatp[0]);
2595                 cmn_err(CE_NOTE, "!pcfs: Enforcing alternate FAT validation\n");
2596                 altfat_mustmatch = 1;
2597         }
2598 
2599         /*
2600          * Get alternate FATs and check for consistency
2601          * This is an inlined version of pc_readfat().
2602          * Since we're only comparing FAT and alternate FAT,
2603          * there's no reason to let pc_readfat() copy data out
2604          * of the buf. Instead, compare in-situ, one cluster
2605          * at a time.
2606          */
2607         for (nfat = 1; nfat < fsp->pcfs_numfat; nfat++) {
2608                 size_t startsec;
2609                 size_t off;
2610 
2611                 startsec = pc_dbdaddr(fsp,
2612                     fsp->pcfs_fatstart + nfat * fsp->pcfs_fatsec);
2613 
2614                 for (off = 0; off < fatsize; off += fsp->pcfs_clsize) {
2615                         daddr_t fatblk = startsec + pc_dbdaddr(fsp,
2616                             pc_cltodb(fsp, pc_lblkno(fsp, off)));
2617 
2618                         bp = bread(fsp->pcfs_xdev, fatblk,
2619                             MIN(fsp->pcfs_clsize, fatsize - off));
2620                         if (bp->b_flags & (B_ERROR | B_STALE)) {
2621                                 cmn_err(CE_NOTE,
2622                                     "!pcfs: alternate FAT #%d (start LBA %p)"
2623                                     " read error at offset %ld on device"
2624                                     " (%x.%x):%d",
2625                                     nfat, (void *)(uintptr_t)startsec, off,
2626                                     getmajor(fsp->pcfs_xdev),
2627                                     getminor(fsp->pcfs_xdev),
2628                                     fsp->pcfs_ldrive);
2629                                 flags = B_ERROR;
2630                                 error = EIO;
2631                                 goto out;
2632                         }
2633                         bp->b_flags |= B_STALE | B_AGE;
2634                         if (bcmp(bp->b_un.b_addr, fatp + off,
2635                             MIN(fsp->pcfs_clsize, fatsize - off))) {
2636                                 cmn_err(CE_NOTE,
2637                                     "!pcfs: alternate FAT #%d (start LBA %p)"
2638                                     " corrupted at offset %ld on device"
2639                                     " (%x.%x):%d",
2640                                     nfat, (void *)(uintptr_t)startsec, off,
2641                                     getmajor(fsp->pcfs_xdev),
2642                                     getminor(fsp->pcfs_xdev),
2643                                     fsp->pcfs_ldrive);
2644                                 if (altfat_mustmatch) {
2645                                         flags = B_ERROR;
2646                                         error = EIO;
2647                                         goto out;
2648                                 }
2649                         }
2650                         brelse(bp);
2651                         bp = NULL;      /* prevent double release */
2652                 }
2653         }
2654 
2655         fsp->pcfs_fattime = gethrestime_sec() + PCFS_DISKTIMEOUT;
2656         fsp->pcfs_fatjustread = 1;
2657 
2658         /*
2659          * Retrieve FAT32 fsinfo sector.
2660          * A failure to read this is not fatal to accessing the volume.
2661          * It simply means operations that count or search free blocks
2662          * will have to do a full FAT walk, vs. a possibly quicker lookup
2663          * of the summary information.
2664          * Hence, we log a message but return success overall after this point.
2665          */
2666         if (IS_FAT32(fsp) && (fsp->pcfs_flags & PCFS_FSINFO_OK)) {
2667                 struct fat_od_fsi *fsinfo_disk;
2668 
2669                 bp = bread(fsp->pcfs_xdev,
2670                     pc_dbdaddr(fsp, fsp->pcfs_fsistart), fsp->pcfs_secsize);
2671                 fsinfo_disk = (struct fat_od_fsi *)bp->b_un.b_addr;
2672                 if (bp->b_flags & (B_ERROR | B_STALE) ||
2673                     !FSISIG_OK(fsinfo_disk)) {
2674                         cmn_err(CE_NOTE,
2675                             "!pcfs: error reading fat32 fsinfo from "
2676                             "device (%x.%x):%d, block %lld",
2677                             getmajor(fsp->pcfs_xdev), getminor(fsp->pcfs_xdev),
2678                             fsp->pcfs_ldrive,
2679                             (long long)pc_dbdaddr(fsp, fsp->pcfs_fsistart));
2680                         fsp->pcfs_flags &= ~PCFS_FSINFO_OK;
2681                         fsp->pcfs_fsinfo.fs_free_clusters = FSINFO_UNKNOWN;
2682                         fsp->pcfs_fsinfo.fs_next_free = FSINFO_UNKNOWN;
2683                 } else {
2684                         bp->b_flags |= B_STALE | B_AGE;
2685                         fsinfo_disk = (fat_od_fsi_t *)(bp->b_un.b_addr);
2686                         fsp->pcfs_fsinfo.fs_free_clusters =
2687                             LE_32(fsinfo_disk->fsi_incore.fs_free_clusters);
2688                         fsp->pcfs_fsinfo.fs_next_free =
2689                             LE_32(fsinfo_disk->fsi_incore.fs_next_free);
2690                 }
2691                 brelse(bp);
2692                 bp = NULL;
2693         }
2694 
2695         if (pc_validcl(fsp, (pc_cluster32_t)fsp->pcfs_fsinfo.fs_next_free))
2696                 fsp->pcfs_nxfrecls = fsp->pcfs_fsinfo.fs_next_free;
2697         else
2698                 fsp->pcfs_nxfrecls = PCF_FIRSTCLUSTER;
2699 
2700         return (0);
2701 
2702 out:
2703         cmn_err(CE_NOTE, "!pcfs: illegal disk format");
2704         if (bp)
2705                 brelse(bp);
2706         if (fatp)
2707                 kmem_free(fatp, fatsize);
2708         if (fat_changemap)
2709                 kmem_free(fat_changemap, fat_changemapsize);
2710 
2711         if (flags) {
2712                 pc_mark_irrecov(fsp);
2713         }
2714         return (error);
2715 }