1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 */
26
27 #include <sys/param.h>
28 #include <sys/systm.h>
29 #include <sys/kmem.h>
30 #include <sys/user.h>
31 #include <sys/proc.h>
32 #include <sys/cred.h>
33 #include <sys/disp.h>
34 #include <sys/buf.h>
35 #include <sys/vfs.h>
36 #include <sys/vfs_opreg.h>
37 #include <sys/vnode.h>
38 #include <sys/fdio.h>
39 #include <sys/file.h>
40 #include <sys/uio.h>
41 #include <sys/conf.h>
42 #include <sys/statvfs.h>
43 #include <sys/mount.h>
44 #include <sys/pathname.h>
45 #include <sys/cmn_err.h>
46 #include <sys/debug.h>
47 #include <sys/sysmacros.h>
48 #include <sys/conf.h>
49 #include <sys/mkdev.h>
50 #include <sys/swap.h>
51 #include <sys/sunddi.h>
52 #include <sys/sunldi.h>
53 #include <sys/dktp/fdisk.h>
54 #include <sys/fs/pc_label.h>
55 #include <sys/fs/pc_fs.h>
56 #include <sys/fs/pc_dir.h>
57 #include <sys/fs/pc_node.h>
58 #include <fs/fs_subr.h>
59 #include <sys/modctl.h>
60 #include <sys/dkio.h>
61 #include <sys/open.h>
62 #include <sys/mntent.h>
63 #include <sys/policy.h>
64 #include <sys/atomic.h>
65 #include <sys/sdt.h>
66
67 /*
68 * The majority of PC media use a 512 sector size, but
69 * occasionally you will run across a 1k sector size.
70 * For media with a 1k sector size, fd_strategy() requires
71 * the I/O size to be a 1k multiple; so when the sector size
72 * is not yet known, always read 1k.
73 */
74 #define PC_SAFESECSIZE (PC_SECSIZE * 2)
75
76 static int pcfs_pseudo_floppy(dev_t);
77
78 static int pcfsinit(int, char *);
79 static int pcfs_mount(struct vfs *, struct vnode *, struct mounta *,
80 struct cred *);
81 static int pcfs_unmount(struct vfs *, int, struct cred *);
82 static int pcfs_root(struct vfs *, struct vnode **);
83 static int pcfs_statvfs(struct vfs *, struct statvfs64 *);
84 static int pc_syncfsnodes(struct pcfs *);
85 static int pcfs_sync(struct vfs *, short, struct cred *);
86 static int pcfs_vget(struct vfs *vfsp, struct vnode **vpp, struct fid *fidp);
87 static void pcfs_freevfs(vfs_t *vfsp);
88
89 static int pc_readfat(struct pcfs *fsp, uchar_t *fatp);
90 static int pc_writefat(struct pcfs *fsp, daddr_t start);
91
92 static int pc_getfattype(struct pcfs *fsp);
93 static void pcfs_parse_mntopts(struct pcfs *fsp);
94
95
96 /*
97 * pcfs mount options table
98 */
99
100 static char *nohidden_cancel[] = { MNTOPT_PCFS_HIDDEN, NULL };
101 static char *hidden_cancel[] = { MNTOPT_PCFS_NOHIDDEN, NULL };
102 static char *nofoldcase_cancel[] = { MNTOPT_PCFS_FOLDCASE, NULL };
103 static char *foldcase_cancel[] = { MNTOPT_PCFS_NOFOLDCASE, NULL };
104 static char *clamptime_cancel[] = { MNTOPT_PCFS_NOCLAMPTIME, NULL };
105 static char *noclamptime_cancel[] = { MNTOPT_PCFS_CLAMPTIME, NULL };
106 static char *atime_cancel[] = { MNTOPT_NOATIME, NULL };
107 static char *noatime_cancel[] = { MNTOPT_ATIME, NULL };
108
109 static mntopt_t mntopts[] = {
110 /*
111 * option name cancel option default arg flags opt data
112 */
113 { MNTOPT_PCFS_NOHIDDEN, nohidden_cancel, NULL, 0, NULL },
114 { MNTOPT_PCFS_HIDDEN, hidden_cancel, NULL, MO_DEFAULT, NULL },
115 { MNTOPT_PCFS_NOFOLDCASE, nofoldcase_cancel, NULL, MO_DEFAULT, NULL },
116 { MNTOPT_PCFS_FOLDCASE, foldcase_cancel, NULL, 0, NULL },
117 { MNTOPT_PCFS_CLAMPTIME, clamptime_cancel, NULL, MO_DEFAULT, NULL },
118 { MNTOPT_PCFS_NOCLAMPTIME, noclamptime_cancel, NULL, NULL, NULL },
119 { MNTOPT_NOATIME, noatime_cancel, NULL, NULL, NULL },
120 { MNTOPT_ATIME, atime_cancel, NULL, NULL, NULL },
121 { MNTOPT_PCFS_TIMEZONE, NULL, "+0", MO_DEFAULT | MO_HASVALUE, NULL },
122 { MNTOPT_PCFS_SECSIZE, NULL, NULL, MO_HASVALUE, NULL }
123 };
124
125 static mntopts_t pcfs_mntopts = {
126 sizeof (mntopts) / sizeof (mntopt_t),
127 mntopts
128 };
129
130 int pcfsdebuglevel = 0;
131
132 /*
133 * pcfslock: protects the list of mounted pc filesystems "pc_mounttab.
134 * pcfs_lock: (inside per filesystem structure "pcfs")
135 * per filesystem lock. Most of the vfsops and vnodeops are
136 * protected by this lock.
137 * pcnodes_lock: protects the pcnode hash table "pcdhead", "pcfhead".
138 *
139 * Lock hierarchy: pcfslock > pcfs_lock > pcnodes_lock
140 *
141 * pcfs_mountcount: used to prevent module unloads while there is still
142 * pcfs state from a former mount hanging around. With
143 * forced umount support, the filesystem module must not
144 * be allowed to go away before the last VFS_FREEVFS()
145 * call has been made.
146 * Since this is just an atomic counter, there's no need
147 * for locking.
148 */
149 kmutex_t pcfslock;
150 krwlock_t pcnodes_lock;
151 uint32_t pcfs_mountcount;
152
153 static int pcfstype;
154
155 static vfsdef_t vfw = {
156 VFSDEF_VERSION,
157 "pcfs",
158 pcfsinit,
159 VSW_HASPROTO|VSW_CANREMOUNT|VSW_STATS|VSW_CANLOFI,
160 &pcfs_mntopts
161 };
162
163 extern struct mod_ops mod_fsops;
164
165 static struct modlfs modlfs = {
166 &mod_fsops,
167 "PC filesystem",
168 &vfw
169 };
170
171 static struct modlinkage modlinkage = {
172 MODREV_1,
173 { &modlfs, NULL }
174 };
175
176 int
177 _init(void)
178 {
179 int error;
180
181 #if !defined(lint)
182 /* make sure the on-disk structures are sane */
183 ASSERT(sizeof (struct pcdir) == 32);
184 ASSERT(sizeof (struct pcdir_lfn) == 32);
185 #endif
186 mutex_init(&pcfslock, NULL, MUTEX_DEFAULT, NULL);
187 rw_init(&pcnodes_lock, NULL, RW_DEFAULT, NULL);
188 error = mod_install(&modlinkage);
189 if (error) {
190 mutex_destroy(&pcfslock);
191 rw_destroy(&pcnodes_lock);
192 }
193 return (error);
194 }
195
196 int
197 _fini(void)
198 {
199 int error;
200
201 /*
202 * If a forcedly unmounted instance is still hanging around,
203 * we cannot allow the module to be unloaded because that would
204 * cause panics once the VFS framework decides it's time to call
205 * into VFS_FREEVFS().
206 */
207 if (pcfs_mountcount)
208 return (EBUSY);
209
210 error = mod_remove(&modlinkage);
211 if (error)
212 return (error);
213 mutex_destroy(&pcfslock);
214 rw_destroy(&pcnodes_lock);
215 /*
216 * Tear down the operations vectors
217 */
218 (void) vfs_freevfsops_by_type(pcfstype);
219 vn_freevnodeops(pcfs_fvnodeops);
220 vn_freevnodeops(pcfs_dvnodeops);
221 return (0);
222 }
223
224 int
225 _info(struct modinfo *modinfop)
226 {
227 return (mod_info(&modlinkage, modinfop));
228 }
229
230 /* ARGSUSED1 */
231 static int
232 pcfsinit(int fstype, char *name)
233 {
234 static const fs_operation_def_t pcfs_vfsops_template[] = {
235 { VFSNAME_MOUNT, { .vfs_mount = pcfs_mount } },
236 { VFSNAME_UNMOUNT, { .vfs_unmount = pcfs_unmount } },
237 { VFSNAME_ROOT, { .vfs_root = pcfs_root } },
238 { VFSNAME_STATVFS, { .vfs_statvfs = pcfs_statvfs } },
239 { VFSNAME_SYNC, { .vfs_sync = pcfs_sync } },
240 { VFSNAME_VGET, { .vfs_vget = pcfs_vget } },
241 { VFSNAME_FREEVFS, { .vfs_freevfs = pcfs_freevfs } },
242 { NULL, { NULL } }
243 };
244 int error;
245
246 error = vfs_setfsops(fstype, pcfs_vfsops_template, NULL);
247 if (error != 0) {
248 cmn_err(CE_WARN, "pcfsinit: bad vfs ops template");
249 return (error);
250 }
251
252 error = vn_make_ops("pcfs", pcfs_fvnodeops_template, &pcfs_fvnodeops);
253 if (error != 0) {
254 (void) vfs_freevfsops_by_type(fstype);
255 cmn_err(CE_WARN, "pcfsinit: bad file vnode ops template");
256 return (error);
257 }
258
259 error = vn_make_ops("pcfsd", pcfs_dvnodeops_template, &pcfs_dvnodeops);
260 if (error != 0) {
261 (void) vfs_freevfsops_by_type(fstype);
262 vn_freevnodeops(pcfs_fvnodeops);
263 cmn_err(CE_WARN, "pcfsinit: bad dir vnode ops template");
264 return (error);
265 }
266
267 pcfstype = fstype;
268 (void) pc_init();
269 pcfs_mountcount = 0;
270 return (0);
271 }
272
273 static struct pcfs *pc_mounttab = NULL;
274
275 extern struct pcfs_args pc_tz;
276
277 /*
278 * Define some special logical drives we use internal to this file.
279 */
280 #define BOOT_PARTITION_DRIVE 99
281 #define PRIMARY_DOS_DRIVE 1
282 #define UNPARTITIONED_DRIVE 0
283
284 static int
285 pcfs_device_identify(
286 struct vfs *vfsp,
287 struct mounta *uap,
288 struct cred *cr,
289 int *dos_ldrive,
290 dev_t *xdev)
291 {
292 struct pathname special;
293 char *c;
294 struct vnode *svp = NULL;
295 struct vnode *lvp = NULL;
296 int oflag, aflag;
297 int error;
298
299 /*
300 * Resolve path name of special file being mounted.
301 */
302 if (error = pn_get(uap->spec, UIO_USERSPACE, &special)) {
303 return (error);
304 }
305
306 *dos_ldrive = -1;
307
308 if (error =
309 lookupname(special.pn_path, UIO_SYSSPACE, FOLLOW, NULLVPP, &svp)) {
310 /*
311 * If there's no device node, the name specified most likely
312 * maps to a PCFS-style "partition specifier" to select a
313 * harddisk primary/logical partition. Disable floppy-specific
314 * checks in such cases unless an explicit :A or :B is
315 * requested.
316 */
317
318 /*
319 * Split the pathname string at the last ':' separator.
320 * If there's no ':' in the device name, or the ':' is the
321 * last character in the string, the name is invalid and
322 * the error from the previous lookup will be returned.
323 */
324 c = strrchr(special.pn_path, ':');
325 if (c == NULL || strlen(c) == 0)
326 goto devlookup_done;
327
328 *c++ = '\0';
329
330 /*
331 * PCFS partition name suffixes can be:
332 * - "boot" to indicate the X86BOOT partition
333 * - a drive letter [c-z] for the "DOS logical drive"
334 * - a drive number 1..24 for the "DOS logical drive"
335 * - a "floppy name letter", 'a' or 'b' (just strip this)
336 */
337 if (strcasecmp(c, "boot") == 0) {
338 /*
339 * The Solaris boot partition is requested.
340 */
341 *dos_ldrive = BOOT_PARTITION_DRIVE;
342 } else if (strspn(c, "0123456789") == strlen(c)) {
343 /*
344 * All digits - parse the partition number.
345 */
346 long drvnum = 0;
347
348 if ((error = ddi_strtol(c, NULL, 10, &drvnum)) == 0) {
349 /*
350 * A number alright - in the allowed range ?
351 */
352 if (drvnum > 24 || drvnum == 0)
353 error = ENXIO;
354 }
355 if (error)
356 goto devlookup_done;
357 *dos_ldrive = (int)drvnum;
358 } else if (strlen(c) == 1) {
359 /*
360 * A single trailing character was specified.
361 * - [c-zC-Z] means a harddisk partition, and
362 * we retrieve the partition number.
363 * - [abAB] means a floppy drive, so we swallow
364 * the "drive specifier" and test later
365 * whether the physical device is a floppy.
366 */
367 *c = tolower(*c);
368 if (*c == 'a' || *c == 'b') {
369 *dos_ldrive = UNPARTITIONED_DRIVE;
370 } else if (*c < 'c' || *c > 'z') {
371 error = ENXIO;
372 goto devlookup_done;
373 } else {
374 *dos_ldrive = 1 + *c - 'c';
375 }
376 } else {
377 /*
378 * Can't parse this - pass through previous error.
379 */
380 goto devlookup_done;
381 }
382
383
384 error = lookupname(special.pn_path, UIO_SYSSPACE, FOLLOW,
385 NULLVPP, &svp);
386 } else {
387 *dos_ldrive = UNPARTITIONED_DRIVE;
388 }
389 devlookup_done:
390 pn_free(&special);
391 if (error)
392 return (error);
393
394 ASSERT(*dos_ldrive >= UNPARTITIONED_DRIVE);
395
396 /*
397 * Verify caller's permission to open the device special file.
398 */
399 if ((vfsp->vfs_flag & VFS_RDONLY) != 0 ||
400 ((uap->flags & MS_RDONLY) != 0)) {
401 oflag = FREAD;
402 aflag = VREAD;
403 } else {
404 oflag = FREAD | FWRITE;
405 aflag = VREAD | VWRITE;
406 }
407
408 error = vfs_get_lofi(vfsp, &lvp);
409
410 if (error > 0) {
411 if (error == ENOENT)
412 error = ENODEV;
413 goto out;
414 } else if (error == 0) {
415 *xdev = lvp->v_rdev;
416 } else {
417 *xdev = svp->v_rdev;
418
419 if (svp->v_type != VBLK) {
420 error = ENOTBLK;
421 goto out;
422 }
423
424 if ((error = secpolicy_spec_open(cr, svp, oflag)) != 0)
425 goto out;
426 }
427
428 if (getmajor(*xdev) >= devcnt) {
429 error = ENXIO;
430 goto out;
431 }
432
433 if ((error = VOP_ACCESS(svp, aflag, 0, cr, NULL)) != 0)
434 goto out;
435
436 out:
437 if (svp != NULL)
438 VN_RELE(svp);
439 if (lvp != NULL)
440 VN_RELE(lvp);
441 return (error);
442 }
443
444 static int
445 pcfs_device_ismounted(
446 struct vfs *vfsp,
447 int dos_ldrive,
448 dev_t xdev,
449 int *remounting,
450 dev_t *pseudodev)
451 {
452 struct pcfs *fsp;
453 int remount = *remounting;
454
455 /*
456 * Ensure that this logical drive isn't already mounted, unless
457 * this is a REMOUNT request.
458 * Note: The framework will perform this check if the "...:c"
459 * PCFS-style "logical drive" syntax has not been used and an
460 * actually existing physical device is backing this filesystem.
461 * Once all block device drivers support PC-style partitioning,
462 * this codeblock can be dropped.
463 */
464 *pseudodev = xdev;
465
466 if (dos_ldrive) {
467 mutex_enter(&pcfslock);
468 for (fsp = pc_mounttab; fsp; fsp = fsp->pcfs_nxt)
469 if (fsp->pcfs_xdev == xdev &&
470 fsp->pcfs_ldrive == dos_ldrive) {
471 mutex_exit(&pcfslock);
472 if (remount) {
473 return (0);
474 } else {
475 return (EBUSY);
476 }
477 }
478 /*
479 * Assign a unique device number for the vfs
480 * The old way (getudev() + a constantly incrementing
481 * major number) was wrong because it changes vfs_dev
482 * across mounts and reboots, which breaks nfs file handles.
483 * UFS just uses the real dev_t. We can't do that because
484 * of the way pcfs opens fdisk partitons (the :c and :d
485 * partitions are on the same dev_t). Though that _might_
486 * actually be ok, since the file handle contains an
487 * absolute block number, it's probably better to make them
488 * different. So I think we should retain the original
489 * dev_t, but come up with a different minor number based
490 * on the logical drive that will _always_ come up the same.
491 * For now, we steal the upper 6 bits.
492 */
493 #ifdef notdef
494 /* what should we do here? */
495 if (((getminor(xdev) >> 12) & 0x3F) != 0)
496 printf("whoops - upper bits used!\n");
497 #endif
498 *pseudodev = makedevice(getmajor(xdev),
499 ((dos_ldrive << 12) | getminor(xdev)) & MAXMIN32);
500 if (vfs_devmounting(*pseudodev, vfsp)) {
501 mutex_exit(&pcfslock);
502 return (EBUSY);
503 }
504 if (vfs_devismounted(*pseudodev)) {
505 mutex_exit(&pcfslock);
506 if (remount) {
507 return (0);
508 } else {
509 return (EBUSY);
510 }
511 }
512 mutex_exit(&pcfslock);
513 } else {
514 *pseudodev = xdev;
515 if (vfs_devmounting(*pseudodev, vfsp)) {
516 return (EBUSY);
517 }
518 if (vfs_devismounted(*pseudodev))
519 if (remount) {
520 return (0);
521 } else {
522 return (EBUSY);
523 }
524 }
525
526 /*
527 * This is not a remount. Even if MS_REMOUNT was requested,
528 * the caller needs to proceed as it would on an ordinary
529 * mount.
530 */
531 *remounting = 0;
532
533 ASSERT(*pseudodev);
534 return (0);
535 }
536
537 /*
538 * Get the PCFS-specific mount options from the VFS framework.
539 * For "timezone" and "secsize", we need to parse the number
540 * ourselves and ensure its validity.
541 * Note: "secsize" is deliberately undocumented at this time,
542 * it's a workaround for devices (particularly: lofi image files)
543 * that don't support the DKIOCGMEDIAINFO ioctl for autodetection.
544 */
545 static void
546 pcfs_parse_mntopts(struct pcfs *fsp)
547 {
548 char *c;
549 char *endptr;
550 long l;
551 struct vfs *vfsp = fsp->pcfs_vfs;
552
553 ASSERT(fsp->pcfs_secondswest == 0);
554 ASSERT(fsp->pcfs_secsize == 0);
555
556 if (vfs_optionisset(vfsp, MNTOPT_PCFS_HIDDEN, NULL))
557 fsp->pcfs_flags |= PCFS_HIDDEN;
558 if (vfs_optionisset(vfsp, MNTOPT_PCFS_FOLDCASE, NULL))
559 fsp->pcfs_flags |= PCFS_FOLDCASE;
560 if (vfs_optionisset(vfsp, MNTOPT_PCFS_NOCLAMPTIME, NULL))
561 fsp->pcfs_flags |= PCFS_NOCLAMPTIME;
562 if (vfs_optionisset(vfsp, MNTOPT_NOATIME, NULL))
563 fsp->pcfs_flags |= PCFS_NOATIME;
564
565 if (vfs_optionisset(vfsp, MNTOPT_PCFS_TIMEZONE, &c)) {
566 if (ddi_strtol(c, &endptr, 10, &l) == 0 &&
567 endptr == c + strlen(c)) {
568 /*
569 * A number alright - in the allowed range ?
570 */
571 if (l <= -12*3600 || l >= 12*3600) {
572 cmn_err(CE_WARN, "!pcfs: invalid use of "
573 "'timezone' mount option - %ld "
574 "is out of range. Assuming 0.", l);
575 l = 0;
576 }
577 } else {
578 cmn_err(CE_WARN, "!pcfs: invalid use of "
579 "'timezone' mount option - argument %s "
580 "is not a valid number. Assuming 0.", c);
581 l = 0;
582 }
583 fsp->pcfs_secondswest = l;
584 }
585
586 /*
587 * The "secsize=..." mount option is a workaround for the lack of
588 * lofi(7d) support for DKIOCGMEDIAINFO. If PCFS wants to parse the
589 * partition table of a disk image and it has been partitioned with
590 * sector sizes other than 512 bytes, we'd fail on loopback'ed disk
591 * images.
592 * That should really be fixed in lofi ... this is a workaround.
593 */
594 if (vfs_optionisset(vfsp, MNTOPT_PCFS_SECSIZE, &c)) {
595 if (ddi_strtol(c, &endptr, 10, &l) == 0 &&
596 endptr == c + strlen(c)) {
597 /*
598 * A number alright - a valid sector size as well ?
599 */
600 if (!VALID_SECSIZE(l)) {
601 cmn_err(CE_WARN, "!pcfs: invalid use of "
602 "'secsize' mount option - %ld is "
603 "unsupported. Autodetecting.", l);
604 l = 0;
605 }
606 } else {
607 cmn_err(CE_WARN, "!pcfs: invalid use of "
608 "'secsize' mount option - argument %s "
609 "is not a valid number. Autodetecting.", c);
610 l = 0;
611 }
612 fsp->pcfs_secsize = l;
613 fsp->pcfs_sdshift = ddi_ffs(l / DEV_BSIZE) - 1;
614 }
615 }
616
617 /*
618 * vfs operations
619 */
620
621 /*
622 * pcfs_mount - backend for VFS_MOUNT() on PCFS.
623 */
624 static int
625 pcfs_mount(
626 struct vfs *vfsp,
627 struct vnode *mvp,
628 struct mounta *uap,
629 struct cred *cr)
630 {
631 struct pcfs *fsp;
632 struct vnode *devvp;
633 dev_t pseudodev;
634 dev_t xdev;
635 int dos_ldrive = 0;
636 int error;
637 int remounting;
638
639 if ((error = secpolicy_fs_mount(cr, mvp, vfsp)) != 0)
640 return (error);
641
642 if (mvp->v_type != VDIR)
643 return (ENOTDIR);
644
645 mutex_enter(&mvp->v_lock);
646 if ((uap->flags & MS_REMOUNT) == 0 &&
647 (uap->flags & MS_OVERLAY) == 0 &&
648 (mvp->v_count != 1 || (mvp->v_flag & VROOT))) {
649 mutex_exit(&mvp->v_lock);
650 return (EBUSY);
651 }
652 mutex_exit(&mvp->v_lock);
653
654 /*
655 * PCFS doesn't do mount arguments anymore - everything's a mount
656 * option these days. In order not to break existing callers, we
657 * don't reject it yet, just warn that the data (if any) is ignored.
658 */
659 if (uap->datalen != 0)
660 cmn_err(CE_WARN, "!pcfs: deprecated use of mount(2) with "
661 "mount argument structures instead of mount options. "
662 "Ignoring mount(2) 'dataptr' argument.");
663
664 /*
665 * This is needed early, to make sure the access / open calls
666 * are done using the correct mode. Processing this mount option
667 * only when calling pcfs_parse_mntopts() would lead us to attempt
668 * a read/write access to a possibly writeprotected device, and
669 * a readonly mount attempt might fail because of that.
670 */
671 if (uap->flags & MS_RDONLY) {
672 vfsp->vfs_flag |= VFS_RDONLY;
673 vfs_setmntopt(vfsp, MNTOPT_RO, NULL, 0);
674 }
675
676 /*
677 * For most filesystems, this is just a lookupname() on the
678 * mount pathname string. PCFS historically has to do its own
679 * partition table parsing because not all Solaris architectures
680 * support all styles of partitioning that PC media can have, and
681 * hence PCFS understands "device names" that don't map to actual
682 * physical device nodes. Parsing the "PCFS syntax" for device
683 * names is done in pcfs_device_identify() - see there.
684 *
685 * Once all block device drivers that can host FAT filesystems have
686 * been enhanced to create device nodes for all PC-style partitions,
687 * this code can go away.
688 */
689 if (error = pcfs_device_identify(vfsp, uap, cr, &dos_ldrive, &xdev))
690 return (error);
691
692 /*
693 * As with looking up the actual device to mount, PCFS cannot rely
694 * on just the checks done by vfs_ismounted() whether a given device
695 * is mounted already. The additional check against the "PCFS syntax"
696 * is done in pcfs_device_ismounted().
697 */
698 remounting = (uap->flags & MS_REMOUNT);
699
700 if (error = pcfs_device_ismounted(vfsp, dos_ldrive, xdev, &remounting,
701 &pseudodev))
702 return (error);
703
704 if (remounting)
705 return (0);
706
707 /*
708 * Mount the filesystem.
709 * An instance structure is required before the attempt to locate
710 * and parse the FAT BPB. This is because mount options may change
711 * the behaviour of the filesystem type matching code. Precreate
712 * it and fill it in to a degree that allows parsing the mount
713 * options.
714 */
715 devvp = makespecvp(xdev, VBLK);
716 if (IS_SWAPVP(devvp)) {
717 VN_RELE(devvp);
718 return (EBUSY);
719 }
720 error = VOP_OPEN(&devvp,
721 (vfsp->vfs_flag & VFS_RDONLY) ? FREAD : FREAD | FWRITE, cr, NULL);
722 if (error) {
723 VN_RELE(devvp);
724 return (error);
725 }
726
727 fsp = kmem_zalloc(sizeof (*fsp), KM_SLEEP);
728 fsp->pcfs_vfs = vfsp;
729 fsp->pcfs_xdev = xdev;
730 fsp->pcfs_devvp = devvp;
731 fsp->pcfs_ldrive = dos_ldrive;
732 mutex_init(&fsp->pcfs_lock, NULL, MUTEX_DEFAULT, NULL);
733
734 pcfs_parse_mntopts(fsp);
735
736 /*
737 * This is the actual "mount" - the PCFS superblock check.
738 *
739 * Find the requested logical drive and the FAT BPB therein.
740 * Check device type and flag the instance if media is removeable.
741 *
742 * Initializes most members of the filesystem instance structure.
743 * Returns EINVAL if no valid BPB can be found. Other errors may
744 * occur after I/O failures, or when invalid / unparseable partition
745 * tables are encountered.
746 */
747 if (error = pc_getfattype(fsp))
748 goto errout;
749
750 /*
751 * Now that the BPB has been parsed, this structural information
752 * is available and known to be valid. Initialize the VFS.
753 */
754 vfsp->vfs_data = fsp;
755 vfsp->vfs_dev = pseudodev;
756 vfsp->vfs_fstype = pcfstype;
757 vfs_make_fsid(&vfsp->vfs_fsid, pseudodev, pcfstype);
758 vfsp->vfs_bcount = 0;
759 vfsp->vfs_bsize = fsp->pcfs_clsize;
760
761 /*
762 * Validate that we can access the FAT and that it is, to the
763 * degree we can verify here, self-consistent.
764 */
765 if (error = pc_verify(fsp))
766 goto errout;
767
768 /*
769 * Record the time of the mount, to return as an "approximate"
770 * timestamp for the FAT root directory. Since FAT roots don't
771 * have timestamps, this is less confusing to the user than
772 * claiming "zero" / Jan/01/1970.
773 */
774 gethrestime(&fsp->pcfs_mounttime);
775
776 /*
777 * Fix up the mount options. Because "noatime" is made default on
778 * removeable media only, a fixed disk will have neither "atime"
779 * nor "noatime" set. We set the options explicitly depending on
780 * the PCFS_NOATIME flag, to inform the user of what applies.
781 * Mount option cancellation will take care that the mutually
782 * exclusive 'other' is cleared.
783 */
784 vfs_setmntopt(vfsp,
785 fsp->pcfs_flags & PCFS_NOATIME ? MNTOPT_NOATIME : MNTOPT_ATIME,
786 NULL, 0);
787
788 /*
789 * All clear - insert the FS instance into PCFS' list.
790 */
791 mutex_enter(&pcfslock);
792 fsp->pcfs_nxt = pc_mounttab;
793 pc_mounttab = fsp;
794 mutex_exit(&pcfslock);
795 atomic_inc_32(&pcfs_mountcount);
796 return (0);
797
798 errout:
799 (void) VOP_CLOSE(devvp,
800 vfsp->vfs_flag & VFS_RDONLY ? FREAD : FREAD | FWRITE,
801 1, (offset_t)0, cr, NULL);
802 VN_RELE(devvp);
803 mutex_destroy(&fsp->pcfs_lock);
804 kmem_free(fsp, sizeof (*fsp));
805 return (error);
806
807 }
808
809 static int
810 pcfs_unmount(
811 struct vfs *vfsp,
812 int flag,
813 struct cred *cr)
814 {
815 struct pcfs *fsp, *fsp1;
816
817 if (secpolicy_fs_unmount(cr, vfsp) != 0)
818 return (EPERM);
819
820 fsp = VFSTOPCFS(vfsp);
821
822 /*
823 * We don't have to lock fsp because the VVFSLOCK in vfs layer will
824 * prevent lookuppn from crossing the mount point.
825 * If this is not a forced umount request and there's ongoing I/O,
826 * don't allow the mount to proceed.
827 */
828 if (flag & MS_FORCE)
829 vfsp->vfs_flag |= VFS_UNMOUNTED;
830 else if (fsp->pcfs_nrefs)
831 return (EBUSY);
832
833 mutex_enter(&pcfslock);
834
835 /*
836 * If this is a forced umount request or if the fs instance has
837 * been marked as beyond recovery, allow the umount to proceed
838 * regardless of state. pc_diskchanged() forcibly releases all
839 * inactive vnodes/pcnodes.
840 */
841 if (flag & MS_FORCE || fsp->pcfs_flags & PCFS_IRRECOV) {
842 rw_enter(&pcnodes_lock, RW_WRITER);
843 pc_diskchanged(fsp);
844 rw_exit(&pcnodes_lock);
845 }
846
847 /* now there should be no pcp node on pcfhead or pcdhead. */
848
849 if (fsp == pc_mounttab) {
850 pc_mounttab = fsp->pcfs_nxt;
851 } else {
852 for (fsp1 = pc_mounttab; fsp1 != NULL; fsp1 = fsp1->pcfs_nxt)
853 if (fsp1->pcfs_nxt == fsp)
854 fsp1->pcfs_nxt = fsp->pcfs_nxt;
855 }
856
857 mutex_exit(&pcfslock);
858
859 /*
860 * Since we support VFS_FREEVFS(), there's no need to
861 * free the fsp right now. The framework will tell us
862 * when the right time to do so has arrived by calling
863 * into pcfs_freevfs.
864 */
865 return (0);
866 }
867
868 /*
869 * find root of pcfs
870 */
871 static int
872 pcfs_root(
873 struct vfs *vfsp,
874 struct vnode **vpp)
875 {
876 struct pcfs *fsp;
877 struct pcnode *pcp;
878 int error;
879
880 fsp = VFSTOPCFS(vfsp);
881 if (error = pc_lockfs(fsp, 0, 0))
882 return (error);
883
884 pcp = pc_getnode(fsp, (daddr_t)0, 0, (struct pcdir *)0);
885 pc_unlockfs(fsp);
886 *vpp = PCTOV(pcp);
887 pcp->pc_flags |= PC_EXTERNAL;
888 return (0);
889 }
890
891 /*
892 * Get file system statistics.
893 */
894 static int
895 pcfs_statvfs(
896 struct vfs *vfsp,
897 struct statvfs64 *sp)
898 {
899 struct pcfs *fsp;
900 int error;
901 dev32_t d32;
902
903 fsp = VFSTOPCFS(vfsp);
904 error = pc_getfat(fsp);
905 if (error)
906 return (error);
907 bzero(sp, sizeof (*sp));
908 sp->f_bsize = sp->f_frsize = fsp->pcfs_clsize;
909 sp->f_blocks = (fsblkcnt64_t)fsp->pcfs_ncluster;
910 sp->f_bavail = sp->f_bfree = (fsblkcnt64_t)pc_freeclusters(fsp);
911 sp->f_files = (fsfilcnt64_t)-1;
912 sp->f_ffree = (fsfilcnt64_t)-1;
913 sp->f_favail = (fsfilcnt64_t)-1;
914 #ifdef notdef
915 (void) cmpldev(&d32, fsp->pcfs_devvp->v_rdev);
916 #endif /* notdef */
917 (void) cmpldev(&d32, vfsp->vfs_dev);
918 sp->f_fsid = d32;
919 (void) strcpy(sp->f_basetype, vfssw[vfsp->vfs_fstype].vsw_name);
920 sp->f_flag = vf_to_stf(vfsp->vfs_flag);
921 sp->f_namemax = PCMAXNAMLEN;
922 return (0);
923 }
924
925 static int
926 pc_syncfsnodes(struct pcfs *fsp)
927 {
928 struct pchead *hp;
929 struct pcnode *pcp;
930 int error;
931
932 if (error = pc_lockfs(fsp, 0, 0))
933 return (error);
934
935 if (!(error = pc_syncfat(fsp))) {
936 hp = pcfhead;
937 while (hp < & pcfhead [ NPCHASH ]) {
938 rw_enter(&pcnodes_lock, RW_READER);
939 pcp = hp->pch_forw;
940 while (pcp != (struct pcnode *)hp) {
941 if (VFSTOPCFS(PCTOV(pcp) -> v_vfsp) == fsp)
942 if (error = pc_nodesync(pcp))
943 break;
944 pcp = pcp -> pc_forw;
945 }
946 rw_exit(&pcnodes_lock);
947 if (error)
948 break;
949 hp++;
950 }
951 }
952 pc_unlockfs(fsp);
953 return (error);
954 }
955
956 /*
957 * Flush any pending I/O.
958 */
959 /*ARGSUSED*/
960 static int
961 pcfs_sync(
962 struct vfs *vfsp,
963 short flag,
964 struct cred *cr)
965 {
966 struct pcfs *fsp;
967 int error = 0;
968
969 /* this prevents the filesystem from being umounted. */
970 mutex_enter(&pcfslock);
971 if (vfsp != NULL) {
972 fsp = VFSTOPCFS(vfsp);
973 if (!(fsp->pcfs_flags & PCFS_IRRECOV)) {
974 error = pc_syncfsnodes(fsp);
975 } else {
976 rw_enter(&pcnodes_lock, RW_WRITER);
977 pc_diskchanged(fsp);
978 rw_exit(&pcnodes_lock);
979 error = EIO;
980 }
981 } else {
982 fsp = pc_mounttab;
983 while (fsp != NULL) {
984 if (fsp->pcfs_flags & PCFS_IRRECOV) {
985 rw_enter(&pcnodes_lock, RW_WRITER);
986 pc_diskchanged(fsp);
987 rw_exit(&pcnodes_lock);
988 error = EIO;
989 break;
990 }
991 error = pc_syncfsnodes(fsp);
992 if (error) break;
993 fsp = fsp->pcfs_nxt;
994 }
995 }
996 mutex_exit(&pcfslock);
997 return (error);
998 }
999
1000 int
1001 pc_lockfs(struct pcfs *fsp, int diskchanged, int releasing)
1002 {
1003 int err;
1004
1005 if ((fsp->pcfs_flags & PCFS_IRRECOV) && !releasing)
1006 return (EIO);
1007
1008 if ((fsp->pcfs_flags & PCFS_LOCKED) && (fsp->pcfs_owner == curthread)) {
1009 fsp->pcfs_count++;
1010 } else {
1011 mutex_enter(&fsp->pcfs_lock);
1012 if (fsp->pcfs_flags & PCFS_LOCKED)
1013 panic("pc_lockfs");
1014 /*
1015 * We check the IRRECOV bit again just in case somebody
1016 * snuck past the initial check but then got held up before
1017 * they could grab the lock. (And in the meantime someone
1018 * had grabbed the lock and set the bit)
1019 */
1020 if (!diskchanged && !(fsp->pcfs_flags & PCFS_IRRECOV)) {
1021 if ((err = pc_getfat(fsp))) {
1022 mutex_exit(&fsp->pcfs_lock);
1023 return (err);
1024 }
1025 }
1026 fsp->pcfs_flags |= PCFS_LOCKED;
1027 fsp->pcfs_owner = curthread;
1028 fsp->pcfs_count++;
1029 }
1030 return (0);
1031 }
1032
1033 void
1034 pc_unlockfs(struct pcfs *fsp)
1035 {
1036
1037 if ((fsp->pcfs_flags & PCFS_LOCKED) == 0)
1038 panic("pc_unlockfs");
1039 if (--fsp->pcfs_count < 0)
1040 panic("pc_unlockfs: count");
1041 if (fsp->pcfs_count == 0) {
1042 fsp->pcfs_flags &= ~PCFS_LOCKED;
1043 fsp->pcfs_owner = 0;
1044 mutex_exit(&fsp->pcfs_lock);
1045 }
1046 }
1047
1048 int
1049 pc_syncfat(struct pcfs *fsp)
1050 {
1051 struct buf *bp;
1052 int nfat;
1053 int error = 0;
1054 struct fat_od_fsi *fsinfo_disk;
1055
1056 if ((fsp->pcfs_fatp == (uchar_t *)0) ||
1057 !(fsp->pcfs_flags & PCFS_FATMOD))
1058 return (0);
1059 /*
1060 * write out all copies of FATs
1061 */
1062 fsp->pcfs_flags &= ~PCFS_FATMOD;
1063 fsp->pcfs_fattime = gethrestime_sec() + PCFS_DISKTIMEOUT;
1064 for (nfat = 0; nfat < fsp->pcfs_numfat; nfat++) {
1065 error = pc_writefat(fsp, pc_dbdaddr(fsp,
1066 fsp->pcfs_fatstart + nfat * fsp->pcfs_fatsec));
1067 if (error) {
1068 pc_mark_irrecov(fsp);
1069 return (EIO);
1070 }
1071 }
1072 pc_clear_fatchanges(fsp);
1073
1074 /*
1075 * Write out fsinfo sector.
1076 */
1077 if (IS_FAT32(fsp)) {
1078 bp = bread(fsp->pcfs_xdev,
1079 pc_dbdaddr(fsp, fsp->pcfs_fsistart), fsp->pcfs_secsize);
1080 if (bp->b_flags & (B_ERROR | B_STALE)) {
1081 error = geterror(bp);
1082 }
1083 fsinfo_disk = (fat_od_fsi_t *)(bp->b_un.b_addr);
1084 if (!error && FSISIG_OK(fsinfo_disk)) {
1085 fsinfo_disk->fsi_incore.fs_free_clusters =
1086 LE_32(fsp->pcfs_fsinfo.fs_free_clusters);
1087 fsinfo_disk->fsi_incore.fs_next_free =
1088 LE_32(FSINFO_UNKNOWN);
1089 bwrite2(bp);
1090 error = geterror(bp);
1091 }
1092 brelse(bp);
1093 if (error) {
1094 pc_mark_irrecov(fsp);
1095 return (EIO);
1096 }
1097 }
1098 return (0);
1099 }
1100
1101 void
1102 pc_invalfat(struct pcfs *fsp)
1103 {
1104 struct pcfs *xfsp;
1105 int mount_cnt = 0;
1106
1107 if (fsp->pcfs_fatp == (uchar_t *)0)
1108 panic("pc_invalfat");
1109 /*
1110 * Release FAT
1111 */
1112 kmem_free(fsp->pcfs_fatp, fsp->pcfs_fatsec * fsp->pcfs_secsize);
1113 fsp->pcfs_fatp = NULL;
1114 kmem_free(fsp->pcfs_fat_changemap, fsp->pcfs_fat_changemapsize);
1115 fsp->pcfs_fat_changemap = NULL;
1116 /*
1117 * Invalidate all the blocks associated with the device.
1118 * Not needed if stateless.
1119 */
1120 for (xfsp = pc_mounttab; xfsp; xfsp = xfsp->pcfs_nxt)
1121 if (xfsp != fsp && xfsp->pcfs_xdev == fsp->pcfs_xdev)
1122 mount_cnt++;
1123
1124 if (!mount_cnt)
1125 binval(fsp->pcfs_xdev);
1126 /*
1127 * close mounted device
1128 */
1129 (void) VOP_CLOSE(fsp->pcfs_devvp,
1130 (PCFSTOVFS(fsp)->vfs_flag & VFS_RDONLY) ? FREAD : FREAD|FWRITE,
1131 1, (offset_t)0, CRED(), NULL);
1132 }
1133
1134 void
1135 pc_badfs(struct pcfs *fsp)
1136 {
1137 cmn_err(CE_WARN, "corrupted PC file system on dev (%x.%x):%d\n",
1138 getmajor(fsp->pcfs_devvp->v_rdev),
1139 getminor(fsp->pcfs_devvp->v_rdev), fsp->pcfs_ldrive);
1140 }
1141
1142 /*
1143 * The problem with supporting NFS on the PCFS filesystem is that there
1144 * is no good place to keep the generation number. The only possible
1145 * place is inside a directory entry. There are a few words that we
1146 * don't use - they store NT & OS/2 attributes, and the creation/last access
1147 * time of the file - but it seems wrong to use them. In addition, directory
1148 * entries come and go. If a directory is removed completely, its directory
1149 * blocks are freed and the generation numbers are lost. Whereas in ufs,
1150 * inode blocks are dedicated for inodes, so the generation numbers are
1151 * permanently kept on the disk.
1152 */
1153 static int
1154 pcfs_vget(struct vfs *vfsp, struct vnode **vpp, struct fid *fidp)
1155 {
1156 struct pcnode *pcp;
1157 struct pc_fid *pcfid;
1158 struct pcfs *fsp;
1159 struct pcdir *ep;
1160 daddr_t eblkno;
1161 int eoffset;
1162 struct buf *bp;
1163 int error;
1164 pc_cluster32_t cn;
1165
1166 pcfid = (struct pc_fid *)fidp;
1167 fsp = VFSTOPCFS(vfsp);
1168
1169 error = pc_lockfs(fsp, 0, 0);
1170 if (error) {
1171 *vpp = NULL;
1172 return (error);
1173 }
1174
1175 if (pcfid->pcfid_block == 0) {
1176 pcp = pc_getnode(fsp, (daddr_t)0, 0, (struct pcdir *)0);
1177 pcp->pc_flags |= PC_EXTERNAL;
1178 *vpp = PCTOV(pcp);
1179 pc_unlockfs(fsp);
1180 return (0);
1181 }
1182 eblkno = pcfid->pcfid_block;
1183 eoffset = pcfid->pcfid_offset;
1184
1185 if ((pc_dbtocl(fsp,
1186 eblkno - fsp->pcfs_dosstart) >= fsp->pcfs_ncluster) ||
1187 (eoffset > fsp->pcfs_clsize)) {
1188 pc_unlockfs(fsp);
1189 *vpp = NULL;
1190 return (EINVAL);
1191 }
1192
1193 if (eblkno >= fsp->pcfs_datastart || (eblkno - fsp->pcfs_rdirstart)
1194 < (fsp->pcfs_rdirsec & ~(fsp->pcfs_spcl - 1))) {
1195 bp = bread(fsp->pcfs_xdev, pc_dbdaddr(fsp, eblkno),
1196 fsp->pcfs_clsize);
1197 } else {
1198 /*
1199 * This is an access "backwards" into the FAT12/FAT16
1200 * root directory. A better code structure would
1201 * significantly improve maintainability here ...
1202 */
1203 bp = bread(fsp->pcfs_xdev, pc_dbdaddr(fsp, eblkno),
1204 (int)(fsp->pcfs_datastart - eblkno) * fsp->pcfs_secsize);
1205 }
1206 if (bp->b_flags & (B_ERROR | B_STALE)) {
1207 error = geterror(bp);
1208 brelse(bp);
1209 if (error)
1210 pc_mark_irrecov(fsp);
1211 *vpp = NULL;
1212 pc_unlockfs(fsp);
1213 return (error);
1214 }
1215 ep = (struct pcdir *)(bp->b_un.b_addr + eoffset);
1216 /*
1217 * Ok, if this is a valid file handle that we gave out,
1218 * then simply ensuring that the creation time matches,
1219 * the entry has not been deleted, and it has a valid first
1220 * character should be enough.
1221 *
1222 * Unfortunately, verifying that the <blkno, offset> _still_
1223 * refers to a directory entry is not easy, since we'd have
1224 * to search _all_ directories starting from root to find it.
1225 * That's a high price to pay just in case somebody is forging
1226 * file handles. So instead we verify that as much of the
1227 * entry is valid as we can:
1228 *
1229 * 1. The starting cluster is 0 (unallocated) or valid
1230 * 2. It is not an LFN entry
1231 * 3. It is not hidden (unless mounted as such)
1232 * 4. It is not the label
1233 */
1234 cn = pc_getstartcluster(fsp, ep);
1235 /*
1236 * if the starting cluster is valid, but not valid according
1237 * to pc_validcl(), force it to be to simplify the following if.
1238 */
1239 if (cn == 0)
1240 cn = PCF_FIRSTCLUSTER;
1241 if (IS_FAT32(fsp)) {
1242 if (cn >= PCF_LASTCLUSTER32)
1243 cn = PCF_FIRSTCLUSTER;
1244 } else {
1245 if (cn >= PCF_LASTCLUSTER)
1246 cn = PCF_FIRSTCLUSTER;
1247 }
1248 if ((!pc_validcl(fsp, cn)) ||
1249 (PCDL_IS_LFN(ep)) ||
1250 (PCA_IS_HIDDEN(fsp, ep->pcd_attr)) ||
1251 ((ep->pcd_attr & PCA_LABEL) == PCA_LABEL)) {
1252 bp->b_flags |= B_STALE | B_AGE;
1253 brelse(bp);
1254 pc_unlockfs(fsp);
1255 return (EINVAL);
1256 }
1257 if ((ep->pcd_crtime.pct_time == pcfid->pcfid_ctime) &&
1258 (ep->pcd_filename[0] != PCD_ERASED) &&
1259 (pc_validchar(ep->pcd_filename[0]) ||
1260 (ep->pcd_filename[0] == '.' && ep->pcd_filename[1] == '.'))) {
1261 pcp = pc_getnode(fsp, eblkno, eoffset, ep);
1262 pcp->pc_flags |= PC_EXTERNAL;
1263 *vpp = PCTOV(pcp);
1264 } else {
1265 *vpp = NULL;
1266 }
1267 bp->b_flags |= B_STALE | B_AGE;
1268 brelse(bp);
1269 pc_unlockfs(fsp);
1270 return (0);
1271 }
1272
1273 /*
1274 * Unfortunately, FAT32 fat's can be pretty big (On a 1 gig jaz drive, about
1275 * a meg), so we can't bread() it all in at once. This routine reads a
1276 * fat a chunk at a time.
1277 */
1278 static int
1279 pc_readfat(struct pcfs *fsp, uchar_t *fatp)
1280 {
1281 struct buf *bp;
1282 size_t off;
1283 size_t readsize;
1284 daddr_t diskblk;
1285 size_t fatsize = fsp->pcfs_fatsec * fsp->pcfs_secsize;
1286 daddr_t start = fsp->pcfs_fatstart;
1287
1288 readsize = fsp->pcfs_clsize;
1289 for (off = 0; off < fatsize; off += readsize, fatp += readsize) {
1290 if (readsize > (fatsize - off))
1291 readsize = fatsize - off;
1292 diskblk = pc_dbdaddr(fsp, start +
1293 pc_cltodb(fsp, pc_lblkno(fsp, off)));
1294 bp = bread(fsp->pcfs_xdev, diskblk, readsize);
1295 if (bp->b_flags & (B_ERROR | B_STALE)) {
1296 brelse(bp);
1297 return (EIO);
1298 }
1299 bp->b_flags |= B_STALE | B_AGE;
1300 bcopy(bp->b_un.b_addr, fatp, readsize);
1301 brelse(bp);
1302 }
1303 return (0);
1304 }
1305
1306 /*
1307 * We write the FAT out a _lot_, in order to make sure that it
1308 * is up-to-date. But on a FAT32 system (large drive, small clusters)
1309 * the FAT might be a couple of megabytes, and writing it all out just
1310 * because we created or deleted a small file is painful (especially
1311 * since we do it for each alternate FAT too). So instead, for FAT16 and
1312 * FAT32 we only write out the bit that has changed. We don't clear
1313 * the 'updated' fields here because the caller might be writing out
1314 * several FATs, so the caller must use pc_clear_fatchanges() after
1315 * all FATs have been updated.
1316 * This function doesn't take "start" from fsp->pcfs_dosstart because
1317 * callers can use it to write either the primary or any of the alternate
1318 * FAT tables.
1319 */
1320 static int
1321 pc_writefat(struct pcfs *fsp, daddr_t start)
1322 {
1323 struct buf *bp;
1324 size_t off;
1325 size_t writesize;
1326 int error;
1327 uchar_t *fatp = fsp->pcfs_fatp;
1328 size_t fatsize = fsp->pcfs_fatsec * fsp->pcfs_secsize;
1329
1330 writesize = fsp->pcfs_clsize;
1331 for (off = 0; off < fatsize; off += writesize, fatp += writesize) {
1332 if (writesize > (fatsize - off))
1333 writesize = fatsize - off;
1334 if (!pc_fat_is_changed(fsp, pc_lblkno(fsp, off))) {
1335 continue;
1336 }
1337 bp = ngeteblk(writesize);
1338 bp->b_edev = fsp->pcfs_xdev;
1339 bp->b_dev = cmpdev(bp->b_edev);
1340 bp->b_blkno = pc_dbdaddr(fsp, start +
1341 pc_cltodb(fsp, pc_lblkno(fsp, off)));
1342 bcopy(fatp, bp->b_un.b_addr, writesize);
1343 bwrite2(bp);
1344 error = geterror(bp);
1345 brelse(bp);
1346 if (error) {
1347 return (error);
1348 }
1349 }
1350 return (0);
1351 }
1352
1353 /*
1354 * Mark the FAT cluster that 'cn' is stored in as modified.
1355 */
1356 void
1357 pc_mark_fat_updated(struct pcfs *fsp, pc_cluster32_t cn)
1358 {
1359 pc_cluster32_t bn;
1360 size_t size;
1361
1362 /* which fat block is the cluster number stored in? */
1363 if (IS_FAT32(fsp)) {
1364 size = sizeof (pc_cluster32_t);
1365 bn = pc_lblkno(fsp, cn * size);
1366 fsp->pcfs_fat_changemap[bn] = 1;
1367 } else if (IS_FAT16(fsp)) {
1368 size = sizeof (pc_cluster16_t);
1369 bn = pc_lblkno(fsp, cn * size);
1370 fsp->pcfs_fat_changemap[bn] = 1;
1371 } else {
1372 offset_t off;
1373 pc_cluster32_t nbn;
1374
1375 ASSERT(IS_FAT12(fsp));
1376 off = cn + (cn >> 1);
1377 bn = pc_lblkno(fsp, off);
1378 fsp->pcfs_fat_changemap[bn] = 1;
1379 /* does this field wrap into the next fat cluster? */
1380 nbn = pc_lblkno(fsp, off + 1);
1381 if (nbn != bn) {
1382 fsp->pcfs_fat_changemap[nbn] = 1;
1383 }
1384 }
1385 }
1386
1387 /*
1388 * return whether the FAT cluster 'bn' is updated and needs to
1389 * be written out.
1390 */
1391 int
1392 pc_fat_is_changed(struct pcfs *fsp, pc_cluster32_t bn)
1393 {
1394 return (fsp->pcfs_fat_changemap[bn] == 1);
1395 }
1396
1397 /*
1398 * Implementation of VFS_FREEVFS() to support forced umounts.
1399 * This is called by the vfs framework after umount, to trigger
1400 * the release of any resources still associated with the given
1401 * vfs_t once the need to keep them has gone away.
1402 */
1403 void
1404 pcfs_freevfs(vfs_t *vfsp)
1405 {
1406 struct pcfs *fsp = VFSTOPCFS(vfsp);
1407
1408 mutex_enter(&pcfslock);
1409 /*
1410 * Purging the FAT closes the device - can't do any more
1411 * I/O after this.
1412 */
1413 if (fsp->pcfs_fatp != (uchar_t *)0)
1414 pc_invalfat(fsp);
1415 mutex_exit(&pcfslock);
1416
1417 VN_RELE(fsp->pcfs_devvp);
1418 mutex_destroy(&fsp->pcfs_lock);
1419 kmem_free(fsp, sizeof (*fsp));
1420
1421 /*
1422 * Allow _fini() to succeed now, if so desired.
1423 */
1424 atomic_dec_32(&pcfs_mountcount);
1425 }
1426
1427
1428 /*
1429 * PC-style partition parsing and FAT BPB identification/validation code.
1430 * The partition parsers here assume:
1431 * - a FAT filesystem will be in a partition that has one of a set of
1432 * recognized partition IDs
1433 * - the user wants the 'numbering' (C:, D:, ...) that one would get
1434 * on MSDOS 6.x.
1435 * That means any non-FAT partition type (NTFS, HPFS, or any Linux fs)
1436 * will not factor in the enumeration.
1437 * These days, such assumptions should be revisited. FAT is no longer the
1438 * only game in 'PC town'.
1439 */
1440 /*
1441 * isDosDrive()
1442 * Boolean function. Give it the systid field for an fdisk partition
1443 * and it decides if that's a systid that describes a DOS drive. We
1444 * use systid values defined in sys/dktp/fdisk.h.
1445 */
1446 static int
1447 isDosDrive(uchar_t checkMe)
1448 {
1449 return ((checkMe == DOSOS12) || (checkMe == DOSOS16) ||
1450 (checkMe == DOSHUGE) || (checkMe == FDISK_WINDOWS) ||
1451 (checkMe == FDISK_EXT_WIN) || (checkMe == FDISK_FAT95) ||
1452 (checkMe == DIAGPART));
1453 }
1454
1455
1456 /*
1457 * isDosExtended()
1458 * Boolean function. Give it the systid field for an fdisk partition
1459 * and it decides if that's a systid that describes an extended DOS
1460 * partition.
1461 */
1462 static int
1463 isDosExtended(uchar_t checkMe)
1464 {
1465 return ((checkMe == EXTDOS) || (checkMe == FDISK_EXTLBA));
1466 }
1467
1468
1469 /*
1470 * isBootPart()
1471 * Boolean function. Give it the systid field for an fdisk partition
1472 * and it decides if that's a systid that describes a Solaris boot
1473 * partition.
1474 */
1475 static int
1476 isBootPart(uchar_t checkMe)
1477 {
1478 return (checkMe == X86BOOT);
1479 }
1480
1481
1482 /*
1483 * noLogicalDrive()
1484 * Display error message about not being able to find a logical
1485 * drive.
1486 */
1487 static void
1488 noLogicalDrive(int ldrive)
1489 {
1490 if (ldrive == BOOT_PARTITION_DRIVE) {
1491 cmn_err(CE_NOTE, "!pcfs: no boot partition");
1492 } else {
1493 cmn_err(CE_NOTE, "!pcfs: %d: no such logical drive", ldrive);
1494 }
1495 }
1496
1497
1498 /*
1499 * findTheDrive()
1500 * Discover offset of the requested logical drive, and return
1501 * that offset (startSector), the systid of that drive (sysid),
1502 * and a buffer pointer (bp), with the buffer contents being
1503 * the first sector of the logical drive (i.e., the sector that
1504 * contains the BPB for that drive).
1505 *
1506 * Note: this code is not capable of addressing >2TB disks, as it uses
1507 * daddr_t not diskaddr_t, some of the calculations would overflow
1508 */
1509 #define COPY_PTBL(mbr, ptblp) \
1510 bcopy(&(((struct mboot *)(mbr))->parts), (ptblp), \
1511 FD_NUMPART * sizeof (struct ipart))
1512
1513 static int
1514 findTheDrive(struct pcfs *fsp, buf_t **bp)
1515 {
1516 int ldrive = fsp->pcfs_ldrive;
1517 dev_t dev = fsp->pcfs_devvp->v_rdev;
1518
1519 struct ipart dosp[FD_NUMPART]; /* incore fdisk partition structure */
1520 daddr_t lastseek = 0; /* Disk block we sought previously */
1521 daddr_t diskblk = 0; /* Disk block to get */
1522 daddr_t xstartsect; /* base of Extended DOS partition */
1523 int logicalDriveCount = 0; /* Count of logical drives seen */
1524 int extendedPart = -1; /* index of extended dos partition */
1525 int primaryPart = -1; /* index of primary dos partition */
1526 int bootPart = -1; /* index of a Solaris boot partition */
1527 uint32_t xnumsect = 0; /* length of extended DOS partition */
1528 int driveIndex; /* computed FDISK table index */
1529 daddr_t startsec;
1530 len_t mediasize;
1531 int i;
1532 /*
1533 * Count of drives in the current extended partition's
1534 * FDISK table, and indexes of the drives themselves.
1535 */
1536 int extndDrives[FD_NUMPART];
1537 int numDrives = 0;
1538
1539 /*
1540 * Count of drives (beyond primary) in master boot record's
1541 * FDISK table, and indexes of the drives themselves.
1542 */
1543 int extraDrives[FD_NUMPART];
1544 int numExtraDrives = 0;
1545
1546 /*
1547 * "ldrive == 0" should never happen, as this is a request to
1548 * mount the physical device (and ignore partitioning). The code
1549 * in pcfs_mount() should have made sure that a logical drive number
1550 * is at least 1, meaning we're looking for drive "C:". It is not
1551 * safe (and a bug in the callers of this function) to request logical
1552 * drive number 0; we could ASSERT() but a graceful EIO is a more
1553 * polite way.
1554 */
1555 if (ldrive == 0) {
1556 cmn_err(CE_NOTE, "!pcfs: request for logical partition zero");
1557 noLogicalDrive(ldrive);
1558 return (EIO);
1559 }
1560
1561 /*
1562 * Copy from disk block into memory aligned structure for fdisk usage.
1563 */
1564 COPY_PTBL((*bp)->b_un.b_addr, dosp);
1565
1566 /*
1567 * This check is ok because a FAT BPB and a master boot record (MBB)
1568 * have the same signature, in the same position within the block.
1569 */
1570 if (bpb_get_BPBSig((*bp)->b_un.b_addr) != MBB_MAGIC) {
1571 cmn_err(CE_NOTE, "!pcfs: MBR partition table signature err, "
1572 "device (%x.%x):%d\n",
1573 getmajor(dev), getminor(dev), ldrive);
1574 return (EINVAL);
1575 }
1576
1577 /*
1578 * Get a summary of what is in the Master FDISK table.
1579 * Normally we expect to find one partition marked as a DOS drive.
1580 * This partition is the one Windows calls the primary dos partition.
1581 * If the machine has any logical drives then we also expect
1582 * to find a partition marked as an extended DOS partition.
1583 *
1584 * Sometimes we'll find multiple partitions marked as DOS drives.
1585 * The Solaris fdisk program allows these partitions
1586 * to be created, but Windows fdisk no longer does. We still need
1587 * to support these, though, since Windows does. We also need to fix
1588 * our fdisk to behave like the Windows version.
1589 *
1590 * It turns out that some off-the-shelf media have *only* an
1591 * Extended partition, so we need to deal with that case as well.
1592 *
1593 * Only a single (the first) Extended or Boot Partition will
1594 * be recognized. Any others will be ignored.
1595 */
1596 for (i = 0; i < FD_NUMPART; i++) {
1597 DTRACE_PROBE4(primarypart, struct pcfs *, fsp,
1598 uint_t, (uint_t)dosp[i].systid,
1599 uint_t, LE_32(dosp[i].relsect),
1600 uint_t, LE_32(dosp[i].numsect));
1601
1602 if (isDosDrive(dosp[i].systid)) {
1603 if (primaryPart < 0) {
1604 logicalDriveCount++;
1605 primaryPart = i;
1606 } else {
1607 extraDrives[numExtraDrives++] = i;
1608 }
1609 continue;
1610 }
1611 if ((extendedPart < 0) && isDosExtended(dosp[i].systid)) {
1612 extendedPart = i;
1613 continue;
1614 }
1615 if ((bootPart < 0) && isBootPart(dosp[i].systid)) {
1616 bootPart = i;
1617 continue;
1618 }
1619 }
1620
1621 if (ldrive == BOOT_PARTITION_DRIVE) {
1622 if (bootPart < 0) {
1623 noLogicalDrive(ldrive);
1624 return (EINVAL);
1625 }
1626 startsec = LE_32(dosp[bootPart].relsect);
1627 mediasize = LE_32(dosp[bootPart].numsect);
1628 goto found;
1629 }
1630
1631 if (ldrive == PRIMARY_DOS_DRIVE && primaryPart >= 0) {
1632 startsec = LE_32(dosp[primaryPart].relsect);
1633 mediasize = LE_32(dosp[primaryPart].numsect);
1634 goto found;
1635 }
1636
1637 /*
1638 * We are not looking for the C: drive (or the primary drive
1639 * was not found), so we had better have an extended partition
1640 * or extra drives in the Master FDISK table.
1641 */
1642 if ((extendedPart < 0) && (numExtraDrives == 0)) {
1643 cmn_err(CE_NOTE, "!pcfs: no extended dos partition");
1644 noLogicalDrive(ldrive);
1645 return (EINVAL);
1646 }
1647
1648 if (extendedPart >= 0) {
1649 diskblk = xstartsect = LE_32(dosp[extendedPart].relsect);
1650 xnumsect = LE_32(dosp[extendedPart].numsect);
1651 do {
1652 /*
1653 * If the seek would not cause us to change
1654 * position on the drive, then we're out of
1655 * extended partitions to examine.
1656 */
1657 if (diskblk == lastseek)
1658 break;
1659 logicalDriveCount += numDrives;
1660 /*
1661 * Seek the next extended partition, and find
1662 * logical drives within it.
1663 */
1664 brelse(*bp);
1665 /*
1666 * bread() block numbers are multiples of DEV_BSIZE
1667 * but the device sector size (the unit of partitioning)
1668 * might be larger than that; pcfs_get_device_info()
1669 * has calculated the multiplicator for us.
1670 */
1671 *bp = bread(dev,
1672 pc_dbdaddr(fsp, diskblk), fsp->pcfs_secsize);
1673 if ((*bp)->b_flags & B_ERROR) {
1674 return (EIO);
1675 }
1676
1677 lastseek = diskblk;
1678 COPY_PTBL((*bp)->b_un.b_addr, dosp);
1679 if (bpb_get_BPBSig((*bp)->b_un.b_addr) != MBB_MAGIC) {
1680 cmn_err(CE_NOTE, "!pcfs: "
1681 "extended partition table signature err, "
1682 "device (%x.%x):%d, LBA %u",
1683 getmajor(dev), getminor(dev), ldrive,
1684 (uint_t)pc_dbdaddr(fsp, diskblk));
1685 return (EINVAL);
1686 }
1687 /*
1688 * Count up drives, and track where the next
1689 * extended partition is in case we need it. We
1690 * are expecting only one extended partition. If
1691 * there is more than one we'll only go to the
1692 * first one we see, but warn about ignoring.
1693 */
1694 numDrives = 0;
1695 for (i = 0; i < FD_NUMPART; i++) {
1696 DTRACE_PROBE4(extendedpart,
1697 struct pcfs *, fsp,
1698 uint_t, (uint_t)dosp[i].systid,
1699 uint_t, LE_32(dosp[i].relsect),
1700 uint_t, LE_32(dosp[i].numsect));
1701 if (isDosDrive(dosp[i].systid)) {
1702 extndDrives[numDrives++] = i;
1703 } else if (isDosExtended(dosp[i].systid)) {
1704 if (diskblk != lastseek) {
1705 /*
1706 * Already found an extended
1707 * partition in this table.
1708 */
1709 cmn_err(CE_NOTE,
1710 "!pcfs: ignoring unexpected"
1711 " additional extended"
1712 " partition");
1713 } else {
1714 diskblk = xstartsect +
1715 LE_32(dosp[i].relsect);
1716 }
1717 }
1718 }
1719 } while (ldrive > logicalDriveCount + numDrives);
1720
1721 ASSERT(numDrives <= FD_NUMPART);
1722
1723 if (ldrive <= logicalDriveCount + numDrives) {
1724 /*
1725 * The number of logical drives we've found thus
1726 * far is enough to get us to the one we were
1727 * searching for.
1728 */
1729 driveIndex = logicalDriveCount + numDrives - ldrive;
1730 mediasize =
1731 LE_32(dosp[extndDrives[driveIndex]].numsect);
1732 startsec =
1733 LE_32(dosp[extndDrives[driveIndex]].relsect) +
1734 lastseek;
1735 if (startsec > (xstartsect + xnumsect)) {
1736 cmn_err(CE_NOTE, "!pcfs: extended partition "
1737 "values bad");
1738 return (EINVAL);
1739 }
1740 goto found;
1741 } else {
1742 /*
1743 * We ran out of extended dos partition
1744 * drives. The only hope now is to go
1745 * back to extra drives defined in the master
1746 * fdisk table. But we overwrote that table
1747 * already, so we must load it in again.
1748 */
1749 logicalDriveCount += numDrives;
1750 brelse(*bp);
1751 ASSERT(fsp->pcfs_dosstart == 0);
1752 *bp = bread(dev, pc_dbdaddr(fsp, fsp->pcfs_dosstart),
1753 fsp->pcfs_secsize);
1754 if ((*bp)->b_flags & B_ERROR) {
1755 return (EIO);
1756 }
1757 COPY_PTBL((*bp)->b_un.b_addr, dosp);
1758 }
1759 }
1760 /*
1761 * Still haven't found the drive, is it an extra
1762 * drive defined in the main FDISK table?
1763 */
1764 if (ldrive <= logicalDriveCount + numExtraDrives) {
1765 driveIndex = logicalDriveCount + numExtraDrives - ldrive;
1766 ASSERT(driveIndex < MIN(numExtraDrives, FD_NUMPART));
1767 mediasize = LE_32(dosp[extraDrives[driveIndex]].numsect);
1768 startsec = LE_32(dosp[extraDrives[driveIndex]].relsect);
1769 goto found;
1770 }
1771 /*
1772 * Still haven't found the drive, and there is
1773 * nowhere else to look.
1774 */
1775 noLogicalDrive(ldrive);
1776 return (EINVAL);
1777
1778 found:
1779 /*
1780 * We need this value in units of sectorsize, because PCFS' internal
1781 * offset calculations go haywire for > 512Byte sectors unless all
1782 * pcfs_.*start values are in units of sectors.
1783 * So, assign before the capacity check (that's done in DEV_BSIZE)
1784 */
1785 fsp->pcfs_dosstart = startsec;
1786
1787 /*
1788 * convert from device sectors to proper units:
1789 * - starting sector: DEV_BSIZE (as argument to bread())
1790 * - media size: Bytes
1791 */
1792 startsec = pc_dbdaddr(fsp, startsec);
1793 mediasize *= fsp->pcfs_secsize;
1794
1795 /*
1796 * some additional validation / warnings in case the partition table
1797 * and the actual media capacity are not in accordance ...
1798 */
1799 if (fsp->pcfs_mediasize != 0) {
1800 diskaddr_t startoff =
1801 (diskaddr_t)startsec * (diskaddr_t)DEV_BSIZE;
1802
1803 if (startoff >= fsp->pcfs_mediasize ||
1804 startoff + mediasize > fsp->pcfs_mediasize) {
1805 cmn_err(CE_WARN,
1806 "!pcfs: partition size (LBA start %u, %lld bytes, "
1807 "device (%x.%x):%d) smaller than "
1808 "mediasize (%lld bytes).\n"
1809 "filesystem may be truncated, access errors "
1810 "may result.\n",
1811 (uint_t)startsec, (long long)mediasize,
1812 getmajor(fsp->pcfs_xdev), getminor(fsp->pcfs_xdev),
1813 fsp->pcfs_ldrive, (long long)fsp->pcfs_mediasize);
1814 }
1815 } else {
1816 fsp->pcfs_mediasize = mediasize;
1817 }
1818
1819 return (0);
1820 }
1821
1822
1823 static fattype_t
1824 secondaryBPBChecks(struct pcfs *fsp, uchar_t *bpb, size_t secsize)
1825 {
1826 uint32_t ncl = fsp->pcfs_ncluster;
1827
1828 if (ncl <= 4096) {
1829 if (bpb_get_FatSz16(bpb) == 0)
1830 return (FAT_UNKNOWN);
1831
1832 if (bpb_get_FatSz16(bpb) * secsize < ncl * 2 &&
1833 bpb_get_FatSz16(bpb) * secsize >= (3 * ncl / 2))
1834 return (FAT12);
1835 if (bcmp(bpb_FilSysType16(bpb), "FAT12", 5) == 0)
1836 return (FAT12);
1837 if (bcmp(bpb_FilSysType16(bpb), "FAT16", 5) == 0)
1838 return (FAT16);
1839
1840 switch (bpb_get_Media(bpb)) {
1841 case SS8SPT:
1842 case DS8SPT:
1843 case SS9SPT:
1844 case DS9SPT:
1845 case DS18SPT:
1846 case DS9_15SPT:
1847 /*
1848 * Is this reliable - all floppies are FAT12 ?
1849 */
1850 return (FAT12);
1851 case MD_FIXED:
1852 /*
1853 * Is this reliable - disks are always FAT16 ?
1854 */
1855 return (FAT16);
1856 default:
1857 break;
1858 }
1859 } else if (ncl <= 65536) {
1860 if (bpb_get_FatSz16(bpb) == 0 && bpb_get_FatSz32(bpb) > 0)
1861 return (FAT32);
1862 if (VALID_BOOTSIG(bpb_get_BootSig32(bpb)))
1863 return (FAT32);
1864 if (VALID_FSTYPSTR32(bpb_FilSysType32(bpb)))
1865 return (FAT32);
1866
1867 if (VALID_BOOTSIG(bpb_get_BootSig16(bpb)))
1868 return (FAT16);
1869 if (bpb_get_FatSz16(bpb) * secsize < ncl * 4)
1870 return (FAT16);
1871 }
1872
1873 /*
1874 * We don't know
1875 */
1876 return (FAT_UNKNOWN);
1877 }
1878
1879 /*
1880 * Check to see if the BPB we found is correct.
1881 *
1882 * This looks far more complicated that it needs to be for pure structural
1883 * validation. The reason for this is that parseBPB() is also used for
1884 * debugging purposes (mdb dcmd) and we therefore want a bitmap of which
1885 * BPB fields (do not) have 'known good' values, even if we (do not) reject
1886 * the BPB when attempting to mount the filesystem.
1887 *
1888 * Real-world usage of FAT shows there are a lot of corner-case situations
1889 * and, following the specification strictly, invalid filesystems out there.
1890 * Known are situations such as:
1891 * - FAT12/FAT16 filesystems with garbage in either totsec16/32
1892 * instead of the zero in one of the fields mandated by the spec
1893 * - filesystems that claim to be larger than the partition they're in
1894 * - filesystems without valid media descriptor
1895 * - FAT32 filesystems with RootEntCnt != 0
1896 * - FAT32 filesystems with less than 65526 clusters
1897 * - FAT32 filesystems without valid FSI sector
1898 * - FAT32 filesystems with FAT size in fatsec16 instead of fatsec32
1899 *
1900 * Such filesystems are accessible by PCFS - if it'd know to start with that
1901 * the filesystem should be treated as a specific FAT type. Before S10, it
1902 * relied on the PC/fdisk partition type for the purpose and almost completely
1903 * ignored the BPB; now it ignores the partition type for anything else but
1904 * logical drive enumeration, which can result in rejection of (invalid)
1905 * FAT32 - if the partition ID says FAT32, but the filesystem, for example
1906 * has less than 65526 clusters.
1907 *
1908 * Without a "force this fs as FAT{12,16,32}" tunable or mount option, it's
1909 * not possible to allow all such mostly-compliant filesystems in unless one
1910 * accepts false positives (definitely invalid filesystems that cause problems
1911 * later). This at least allows to pinpoint why the mount failed.
1912 *
1913 * Due to the use of FAT on removeable media, all relaxations of the rules
1914 * here need to be carefully evaluated wrt. to potential effects on PCFS
1915 * resilience. A faulty/"mis-crafted" filesystem must not cause a panic, so
1916 * beware.
1917 */
1918 static int
1919 parseBPB(struct pcfs *fsp, uchar_t *bpb, int *valid)
1920 {
1921 fattype_t type;
1922
1923 uint32_t ncl; /* number of clusters in file area */
1924 uint32_t rec;
1925 uint32_t reserved;
1926 uint32_t fsisec, bkbootsec;
1927 blkcnt_t totsec, totsec16, totsec32, datasec;
1928 size_t fatsec, fatsec16, fatsec32, rdirsec;
1929 size_t secsize;
1930 len_t mediasize;
1931 uint64_t validflags = 0;
1932
1933 if (VALID_BPBSIG(bpb_get_BPBSig(bpb)))
1934 validflags |= BPB_BPBSIG_OK;
1935
1936 rec = bpb_get_RootEntCnt(bpb);
1937 reserved = bpb_get_RsvdSecCnt(bpb);
1938 fsisec = bpb_get_FSInfo32(bpb);
1939 bkbootsec = bpb_get_BkBootSec32(bpb);
1940 totsec16 = (blkcnt_t)bpb_get_TotSec16(bpb);
1941 totsec32 = (blkcnt_t)bpb_get_TotSec32(bpb);
1942 fatsec16 = bpb_get_FatSz16(bpb);
1943 fatsec32 = bpb_get_FatSz32(bpb);
1944
1945 totsec = totsec16 ? totsec16 : totsec32;
1946 fatsec = fatsec16 ? fatsec16 : fatsec32;
1947
1948 secsize = bpb_get_BytesPerSec(bpb);
1949 if (!VALID_SECSIZE(secsize))
1950 secsize = fsp->pcfs_secsize;
1951 if (secsize != fsp->pcfs_secsize) {
1952 PC_DPRINTF3(3, "!pcfs: parseBPB, device (%x.%x):%d:\n",
1953 getmajor(fsp->pcfs_xdev),
1954 getminor(fsp->pcfs_xdev), fsp->pcfs_ldrive);
1955 PC_DPRINTF2(3, "!BPB secsize %d != "
1956 "autodetected media block size %d\n",
1957 (int)secsize, (int)fsp->pcfs_secsize);
1958 if (fsp->pcfs_ldrive) {
1959 /*
1960 * We've already attempted to parse the partition
1961 * table. If the block size used for that don't match
1962 * the PCFS sector size, we're hosed one way or the
1963 * other. Just try what happens.
1964 */
1965 secsize = fsp->pcfs_secsize;
1966 PC_DPRINTF1(3,
1967 "!pcfs: Using autodetected secsize %d\n",
1968 (int)secsize);
1969 } else {
1970 /*
1971 * This allows mounting lofi images of PCFS partitions
1972 * with sectorsize != DEV_BSIZE. We can't parse the
1973 * partition table on whole-disk images unless the
1974 * (undocumented) "secsize=..." mount option is used,
1975 * but at least this allows us to mount if we have
1976 * an image of a partition.
1977 */
1978 PC_DPRINTF1(3,
1979 "!pcfs: Using BPB secsize %d\n", (int)secsize);
1980 }
1981 }
1982
1983 if (fsp->pcfs_mediasize == 0) {
1984 mediasize = (len_t)totsec * (len_t)secsize;
1985 /*
1986 * This is not an error because not all devices support the
1987 * dkio(7i) mediasize queries, and/or not all devices are
1988 * partitioned. If we have not been able to figure out the
1989 * size of the underlaying medium, we have to trust the BPB.
1990 */
1991 PC_DPRINTF4(3, "!pcfs: parseBPB: mediasize autodetect failed "
1992 "on device (%x.%x):%d, trusting BPB totsec (%lld Bytes)\n",
1993 getmajor(fsp->pcfs_xdev), getminor(fsp->pcfs_xdev),
1994 fsp->pcfs_ldrive, (long long)fsp->pcfs_mediasize);
1995 } else if ((len_t)totsec * (len_t)secsize > fsp->pcfs_mediasize) {
1996 cmn_err(CE_WARN,
1997 "!pcfs: autodetected mediasize (%lld Bytes) smaller than "
1998 "FAT BPB mediasize (%lld Bytes).\n"
1999 "truncated filesystem on device (%x.%x):%d, access errors "
2000 "possible.\n",
2001 (long long)fsp->pcfs_mediasize,
2002 (long long)(totsec * (blkcnt_t)secsize),
2003 getmajor(fsp->pcfs_xdev), getminor(fsp->pcfs_xdev),
2004 fsp->pcfs_ldrive);
2005 mediasize = fsp->pcfs_mediasize;
2006 } else {
2007 /*
2008 * This is actually ok. A FAT needs not occupy the maximum
2009 * space available in its partition, it can be shorter.
2010 */
2011 mediasize = (len_t)totsec * (len_t)secsize;
2012 }
2013
2014 /*
2015 * Since we let just about anything pass through this function,
2016 * fence against divide-by-zero here.
2017 */
2018 if (secsize)
2019 rdirsec = roundup(rec * 32, secsize) / secsize;
2020 else
2021 rdirsec = 0;
2022
2023 /*
2024 * This assignment is necessary before pc_dbdaddr() can first be
2025 * used. Must initialize the value here.
2026 */
2027 fsp->pcfs_secsize = secsize;
2028 fsp->pcfs_sdshift = ddi_ffs(secsize / DEV_BSIZE) - 1;
2029
2030 fsp->pcfs_mediasize = mediasize;
2031
2032 fsp->pcfs_spcl = bpb_get_SecPerClus(bpb);
2033 fsp->pcfs_numfat = bpb_get_NumFATs(bpb);
2034 fsp->pcfs_mediadesc = bpb_get_Media(bpb);
2035 fsp->pcfs_clsize = secsize * fsp->pcfs_spcl;
2036 fsp->pcfs_rdirsec = rdirsec;
2037
2038 /*
2039 * Remember: All PCFS offset calculations in sectors. Before I/O
2040 * is done, convert to DEV_BSIZE units via pc_dbdaddr(). This is
2041 * necessary so that media with > 512Byte sector sizes work correctly.
2042 */
2043 fsp->pcfs_fatstart = fsp->pcfs_dosstart + reserved;
2044 fsp->pcfs_rdirstart = fsp->pcfs_fatstart + fsp->pcfs_numfat * fatsec;
2045 fsp->pcfs_datastart = fsp->pcfs_rdirstart + rdirsec;
2046 datasec = totsec -
2047 (blkcnt_t)fatsec * fsp->pcfs_numfat -
2048 (blkcnt_t)rdirsec -
2049 (blkcnt_t)reserved;
2050
2051 DTRACE_PROBE4(fatgeometry,
2052 blkcnt_t, totsec, size_t, fatsec,
2053 size_t, rdirsec, blkcnt_t, datasec);
2054
2055 /*
2056 * 'totsec' is taken directly from the BPB and guaranteed to fit
2057 * into a 32bit unsigned integer. The calculation of 'datasec',
2058 * on the other hand, could underflow for incorrect values in
2059 * rdirsec/reserved/fatsec. Check for that.
2060 * We also check that the BPB conforms to the FAT specification's
2061 * requirement that either of the 16/32bit total sector counts
2062 * must be zero.
2063 */
2064 if (totsec != 0 &&
2065 (totsec16 == totsec32 || totsec16 == 0 || totsec32 == 0) &&
2066 datasec < totsec && datasec <= UINT32_MAX)
2067 validflags |= BPB_TOTSEC_OK;
2068
2069 if ((len_t)totsec * (len_t)secsize <= mediasize)
2070 validflags |= BPB_MEDIASZ_OK;
2071
2072 if (VALID_SECSIZE(secsize))
2073 validflags |= BPB_SECSIZE_OK;
2074 if (VALID_SPCL(fsp->pcfs_spcl))
2075 validflags |= BPB_SECPERCLUS_OK;
2076 if (VALID_CLSIZE(fsp->pcfs_clsize))
2077 validflags |= BPB_CLSIZE_OK;
2078 if (VALID_NUMFATS(fsp->pcfs_numfat))
2079 validflags |= BPB_NUMFAT_OK;
2080 if (VALID_RSVDSEC(reserved) && reserved < totsec)
2081 validflags |= BPB_RSVDSECCNT_OK;
2082 if (VALID_MEDIA(fsp->pcfs_mediadesc))
2083 validflags |= BPB_MEDIADESC_OK;
2084 if (VALID_BOOTSIG(bpb_get_BootSig16(bpb)))
2085 validflags |= BPB_BOOTSIG16_OK;
2086 if (VALID_BOOTSIG(bpb_get_BootSig32(bpb)))
2087 validflags |= BPB_BOOTSIG32_OK;
2088 if (VALID_FSTYPSTR16(bpb_FilSysType16(bpb)))
2089 validflags |= BPB_FSTYPSTR16_OK;
2090 if (VALID_FSTYPSTR32(bpb_FilSysType32(bpb)))
2091 validflags |= BPB_FSTYPSTR32_OK;
2092 if (VALID_OEMNAME(bpb_OEMName(bpb)))
2093 validflags |= BPB_OEMNAME_OK;
2094 if (bkbootsec > 0 && bkbootsec <= reserved && fsisec != bkbootsec)
2095 validflags |= BPB_BKBOOTSEC_OK;
2096 if (fsisec > 0 && fsisec <= reserved)
2097 validflags |= BPB_FSISEC_OK;
2098 if (VALID_JMPBOOT(bpb_jmpBoot(bpb)))
2099 validflags |= BPB_JMPBOOT_OK;
2100 if (VALID_FSVER32(bpb_get_FSVer32(bpb)))
2101 validflags |= BPB_FSVER_OK;
2102 if (VALID_VOLLAB(bpb_VolLab16(bpb)))
2103 validflags |= BPB_VOLLAB16_OK;
2104 if (VALID_VOLLAB(bpb_VolLab32(bpb)))
2105 validflags |= BPB_VOLLAB32_OK;
2106 if (VALID_EXTFLAGS(bpb_get_ExtFlags32(bpb)))
2107 validflags |= BPB_EXTFLAGS_OK;
2108
2109 /*
2110 * Try to determine which FAT format to use.
2111 *
2112 * Calculate the number of clusters in order to determine
2113 * the type of FAT we are looking at. This is the only
2114 * recommended way of determining FAT type, though there
2115 * are other hints in the data, this is the best way.
2116 *
2117 * Since we let just about "anything" pass through this function
2118 * without early exits, fence against divide-by-zero here.
2119 *
2120 * datasec was already validated against UINT32_MAX so we know
2121 * the result will not overflow the 32bit calculation.
2122 */
2123 if (fsp->pcfs_spcl)
2124 ncl = (uint32_t)datasec / fsp->pcfs_spcl;
2125 else
2126 ncl = 0;
2127
2128 fsp->pcfs_ncluster = ncl;
2129
2130 /*
2131 * From the Microsoft FAT specification:
2132 * In the following example, when it says <, it does not mean <=.
2133 * Note also that the numbers are correct. The first number for
2134 * FAT12 is 4085; the second number for FAT16 is 65525. These numbers
2135 * and the '<' signs are not wrong.
2136 *
2137 * We "specialdetect" the corner cases, and use at least one "extra"
2138 * criterion to decide whether it's FAT16 or FAT32 if the cluster
2139 * count is dangerously close to the boundaries.
2140 */
2141
2142 if (ncl <= PCF_FIRSTCLUSTER) {
2143 type = FAT_UNKNOWN;
2144 } else if (ncl < 4085) {
2145 type = FAT12;
2146 } else if (ncl <= 4096) {
2147 type = FAT_QUESTIONABLE;
2148 } else if (ncl < 65525) {
2149 type = FAT16;
2150 } else if (ncl <= 65536) {
2151 type = FAT_QUESTIONABLE;
2152 } else if (ncl < PCF_LASTCLUSTER32) {
2153 type = FAT32;
2154 } else {
2155 type = FAT_UNKNOWN;
2156 }
2157
2158 DTRACE_PROBE4(parseBPB__initial,
2159 struct pcfs *, fsp, unsigned char *, bpb,
2160 int, validflags, fattype_t, type);
2161
2162 recheck:
2163 fsp->pcfs_fatsec = fatsec;
2164
2165 /* Do some final sanity checks for each specific type of FAT */
2166 switch (type) {
2167 case FAT12:
2168 if (rec != 0)
2169 validflags |= BPB_ROOTENTCNT_OK;
2170 if ((blkcnt_t)bpb_get_TotSec16(bpb) == totsec ||
2171 bpb_get_TotSec16(bpb) == 0)
2172 validflags |= BPB_TOTSEC16_OK;
2173 if ((blkcnt_t)bpb_get_TotSec32(bpb) == totsec ||
2174 bpb_get_TotSec32(bpb) == 0)
2175 validflags |= BPB_TOTSEC32_OK;
2176 if (bpb_get_FatSz16(bpb) == fatsec)
2177 validflags |= BPB_FATSZ16_OK;
2178 if (fatsec * secsize >= (ncl + PCF_FIRSTCLUSTER)
2179 * 3 / 2)
2180 validflags |= BPB_FATSZ_OK;
2181 if (ncl < 4085)
2182 validflags |= BPB_NCLUSTERS_OK;
2183
2184 fsp->pcfs_lastclmark = (PCF_LASTCLUSTER & 0xfff);
2185 fsp->pcfs_rootblksize =
2186 fsp->pcfs_rdirsec * secsize;
2187 fsp->pcfs_fsistart = 0;
2188
2189 if ((validflags & FAT12_VALIDMSK) != FAT12_VALIDMSK)
2190 type = FAT_UNKNOWN;
2191 break;
2192 case FAT16:
2193 if (rec != 0)
2194 validflags |= BPB_ROOTENTCNT_OK;
2195 if ((blkcnt_t)bpb_get_TotSec16(bpb) == totsec ||
2196 bpb_get_TotSec16(bpb) == 0)
2197 validflags |= BPB_TOTSEC16_OK;
2198 if ((blkcnt_t)bpb_get_TotSec32(bpb) == totsec ||
2199 bpb_get_TotSec32(bpb) == 0)
2200 validflags |= BPB_TOTSEC32_OK;
2201 if (bpb_get_FatSz16(bpb) == fatsec)
2202 validflags |= BPB_FATSZ16_OK;
2203 if (fatsec * secsize >= (ncl + PCF_FIRSTCLUSTER) * 2)
2204 validflags |= BPB_FATSZ_OK;
2205 if (ncl >= 4085 && ncl < 65525)
2206 validflags |= BPB_NCLUSTERS_OK;
2207
2208 fsp->pcfs_lastclmark = PCF_LASTCLUSTER;
2209 fsp->pcfs_rootblksize =
2210 fsp->pcfs_rdirsec * secsize;
2211 fsp->pcfs_fsistart = 0;
2212
2213 if ((validflags & FAT16_VALIDMSK) != FAT16_VALIDMSK)
2214 type = FAT_UNKNOWN;
2215 break;
2216 case FAT32:
2217 if (rec == 0)
2218 validflags |= BPB_ROOTENTCNT_OK;
2219 if (bpb_get_TotSec16(bpb) == 0)
2220 validflags |= BPB_TOTSEC16_OK;
2221 if ((blkcnt_t)bpb_get_TotSec32(bpb) == totsec)
2222 validflags |= BPB_TOTSEC32_OK;
2223 if (bpb_get_FatSz16(bpb) == 0)
2224 validflags |= BPB_FATSZ16_OK;
2225 if (bpb_get_FatSz32(bpb) == fatsec)
2226 validflags |= BPB_FATSZ32_OK;
2227 if (fatsec * secsize >= (ncl + PCF_FIRSTCLUSTER) * 4)
2228 validflags |= BPB_FATSZ_OK;
2229 if (ncl >= 65525 && ncl < PCF_LASTCLUSTER32)
2230 validflags |= BPB_NCLUSTERS_OK;
2231
2232 fsp->pcfs_lastclmark = PCF_LASTCLUSTER32;
2233 fsp->pcfs_rootblksize = fsp->pcfs_clsize;
2234 fsp->pcfs_fsistart = fsp->pcfs_dosstart + fsisec;
2235 if (validflags & BPB_FSISEC_OK)
2236 fsp->pcfs_flags |= PCFS_FSINFO_OK;
2237 fsp->pcfs_rootclnum = bpb_get_RootClus32(bpb);
2238 if (pc_validcl(fsp, fsp->pcfs_rootclnum))
2239 validflags |= BPB_ROOTCLUSTER_OK;
2240
2241 /*
2242 * Current PCFS code only works if 'pcfs_rdirstart'
2243 * contains the root cluster number on FAT32.
2244 * That's a mis-use and would better be changed.
2245 */
2246 fsp->pcfs_rdirstart = (daddr_t)fsp->pcfs_rootclnum;
2247
2248 if ((validflags & FAT32_VALIDMSK) != FAT32_VALIDMSK)
2249 type = FAT_UNKNOWN;
2250 break;
2251 case FAT_QUESTIONABLE:
2252 type = secondaryBPBChecks(fsp, bpb, secsize);
2253 goto recheck;
2254 default:
2255 ASSERT(type == FAT_UNKNOWN);
2256 break;
2257 }
2258
2259 ASSERT(type != FAT_QUESTIONABLE);
2260
2261 fsp->pcfs_fattype = type;
2262
2263 if (valid)
2264 *valid = validflags;
2265
2266 DTRACE_PROBE4(parseBPB__final,
2267 struct pcfs *, fsp, unsigned char *, bpb,
2268 int, validflags, fattype_t, type);
2269
2270 if (type != FAT_UNKNOWN) {
2271 ASSERT((secsize & (DEV_BSIZE - 1)) == 0);
2272 ASSERT(ISP2(secsize / DEV_BSIZE));
2273 return (1);
2274 }
2275
2276 return (0);
2277 }
2278
2279
2280 /*
2281 * Detect the device's native block size (sector size).
2282 *
2283 * Test whether the device is:
2284 * - a floppy device from a known controller type via DKIOCINFO
2285 * - a real floppy using the fd(7d) driver and capable of fdio(7I) ioctls
2286 * - a USB floppy drive (identified by drive geometry)
2287 *
2288 * Detecting a floppy will make PCFS metadata updates on such media synchronous,
2289 * to minimize risks due to slow I/O and user hotplugging / device ejection.
2290 *
2291 * This might be a bit wasteful on kernel stack space; if anyone's
2292 * bothered by this, kmem_alloc/kmem_free the ioctl arguments...
2293 */
2294 static void
2295 pcfs_device_getinfo(struct pcfs *fsp)
2296 {
2297 dev_t rdev = fsp->pcfs_xdev;
2298 int error;
2299 union {
2300 struct dk_minfo mi;
2301 struct dk_cinfo ci;
2302 struct dk_geom gi;
2303 struct fd_char fc;
2304 } arg; /* save stackspace ... */
2305 intptr_t argp = (intptr_t)&arg;
2306 ldi_handle_t lh;
2307 ldi_ident_t li;
2308 int isfloppy, isremoveable, ishotpluggable;
2309 cred_t *cr = CRED();
2310
2311 if (ldi_ident_from_dev(rdev, &li))
2312 goto out;
2313
2314 error = ldi_open_by_dev(&rdev, OTYP_CHR, FREAD, cr, &lh, li);
2315 ldi_ident_release(li);
2316 if (error)
2317 goto out;
2318
2319 /*
2320 * Not sure if this could possibly happen. It'd be a bit like
2321 * VOP_OPEN() changing the passed-in vnode ptr. We're just not
2322 * expecting it, needs some thought if triggered ...
2323 */
2324 ASSERT(fsp->pcfs_xdev == rdev);
2325
2326 /*
2327 * Check for removeable/hotpluggable media.
2328 */
2329 if (ldi_ioctl(lh, DKIOCREMOVABLE,
2330 (intptr_t)&isremoveable, FKIOCTL, cr, NULL)) {
2331 isremoveable = 0;
2332 }
2333 if (ldi_ioctl(lh, DKIOCHOTPLUGGABLE,
2334 (intptr_t)&ishotpluggable, FKIOCTL, cr, NULL)) {
2335 ishotpluggable = 0;
2336 }
2337
2338 /*
2339 * Make sure we don't use "half-initialized" values if the ioctls fail.
2340 */
2341 if (ldi_ioctl(lh, DKIOCGMEDIAINFO, argp, FKIOCTL, cr, NULL)) {
2342 bzero(&arg, sizeof (arg));
2343 fsp->pcfs_mediasize = 0;
2344 } else {
2345 fsp->pcfs_mediasize =
2346 (len_t)arg.mi.dki_lbsize *
2347 (len_t)arg.mi.dki_capacity;
2348 }
2349
2350 if (VALID_SECSIZE(arg.mi.dki_lbsize)) {
2351 if (fsp->pcfs_secsize == 0) {
2352 fsp->pcfs_secsize = arg.mi.dki_lbsize;
2353 fsp->pcfs_sdshift =
2354 ddi_ffs(arg.mi.dki_lbsize / DEV_BSIZE) - 1;
2355 } else {
2356 PC_DPRINTF4(1, "!pcfs: autodetected media block size "
2357 "%d, device (%x.%x), different from user-provided "
2358 "%d. User override - ignoring autodetect result.\n",
2359 arg.mi.dki_lbsize,
2360 getmajor(fsp->pcfs_xdev), getminor(fsp->pcfs_xdev),
2361 fsp->pcfs_secsize);
2362 }
2363 } else if (arg.mi.dki_lbsize) {
2364 PC_DPRINTF3(1, "!pcfs: autodetected media block size "
2365 "%d, device (%x.%x), invalid (not 512, 1024, 2048, 4096). "
2366 "Ignoring autodetect result.\n",
2367 arg.mi.dki_lbsize,
2368 getmajor(fsp->pcfs_xdev), getminor(fsp->pcfs_xdev));
2369 }
2370
2371 /*
2372 * We treat the following media types as a floppy by default.
2373 */
2374 isfloppy =
2375 (arg.mi.dki_media_type == DK_FLOPPY ||
2376 arg.mi.dki_media_type == DK_ZIP ||
2377 arg.mi.dki_media_type == DK_JAZ);
2378
2379 /*
2380 * if this device understands fdio(7I) requests it's
2381 * obviously a floppy drive.
2382 */
2383 if (!isfloppy &&
2384 !ldi_ioctl(lh, FDIOGCHAR, argp, FKIOCTL, cr, NULL))
2385 isfloppy = 1;
2386
2387 /*
2388 * some devices we like to treat as floppies, but they don't
2389 * understand fdio(7I) requests.
2390 */
2391 if (!isfloppy &&
2392 !ldi_ioctl(lh, DKIOCINFO, argp, FKIOCTL, cr, NULL) &&
2393 (arg.ci.dki_ctype == DKC_WDC2880 ||
2394 arg.ci.dki_ctype == DKC_NCRFLOPPY ||
2395 arg.ci.dki_ctype == DKC_SMSFLOPPY ||
2396 arg.ci.dki_ctype == DKC_INTEL82077))
2397 isfloppy = 1;
2398
2399 /*
2400 * This is the "final fallback" test - media with
2401 * 2 heads and 80 cylinders are assumed to be floppies.
2402 * This is normally true for USB floppy drives ...
2403 */
2404 if (!isfloppy &&
2405 !ldi_ioctl(lh, DKIOCGGEOM, argp, FKIOCTL, cr, NULL) &&
2406 (arg.gi.dkg_ncyl == 80 && arg.gi.dkg_nhead == 2))
2407 isfloppy = 1;
2408
2409 /*
2410 * This is similar to the "old" PCFS code that sets this flag
2411 * just based on the media descriptor being 0xf8 (MD_FIXED).
2412 * Should be re-worked. We really need some specialcasing for
2413 * removeable media.
2414 */
2415 if (!isfloppy) {
2416 fsp->pcfs_flags |= PCFS_NOCHK;
2417 }
2418
2419 /*
2420 * We automatically disable access time updates if the medium is
2421 * removeable and/or hotpluggable, and the admin did not explicitly
2422 * request access time updates (via the "atime" mount option).
2423 * The majority of flash-based media should fit this category.
2424 * Minimizing write access extends the lifetime of your memory stick !
2425 */
2426 if (!vfs_optionisset(fsp->pcfs_vfs, MNTOPT_ATIME, NULL) &&
2427 (isremoveable || ishotpluggable | isfloppy)) {
2428 fsp->pcfs_flags |= PCFS_NOATIME;
2429 }
2430
2431 (void) ldi_close(lh, FREAD, cr);
2432 out:
2433 if (fsp->pcfs_secsize == 0) {
2434 PC_DPRINTF3(1, "!pcfs: media block size autodetection "
2435 "device (%x.%x) failed, no user-provided fallback. "
2436 "Using %d bytes.\n",
2437 getmajor(fsp->pcfs_xdev), getminor(fsp->pcfs_xdev),
2438 DEV_BSIZE);
2439 fsp->pcfs_secsize = DEV_BSIZE;
2440 fsp->pcfs_sdshift = 0;
2441 }
2442 ASSERT(fsp->pcfs_secsize % DEV_BSIZE == 0);
2443 ASSERT(VALID_SECSIZE(fsp->pcfs_secsize));
2444 }
2445
2446 /*
2447 * Get the FAT type for the DOS medium.
2448 *
2449 * -------------------------
2450 * According to Microsoft:
2451 * The FAT type one of FAT12, FAT16, or FAT32 is determined by the
2452 * count of clusters on the volume and nothing else.
2453 * -------------------------
2454 *
2455 */
2456 static int
2457 pc_getfattype(struct pcfs *fsp)
2458 {
2459 int error = 0;
2460 buf_t *bp = NULL;
2461 struct vnode *devvp = fsp->pcfs_devvp;
2462 dev_t dev = devvp->v_rdev;
2463
2464 /*
2465 * Detect the native block size of the medium, and attempt to
2466 * detect whether the medium is removeable.
2467 * We do treat removable media (floppies, USB and FireWire disks)
2468 * differently wrt. to the frequency and synchronicity of FAT updates.
2469 * We need to know the media block size in order to be able to
2470 * parse the partition table.
2471 */
2472 pcfs_device_getinfo(fsp);
2473
2474 /*
2475 * Unpartitioned media (floppies and some removeable devices)
2476 * don't have a partition table, the FAT BPB is at disk block 0.
2477 * Start out by reading block 0.
2478 */
2479 fsp->pcfs_dosstart = 0;
2480 bp = bread(dev, pc_dbdaddr(fsp, fsp->pcfs_dosstart), fsp->pcfs_secsize);
2481
2482 if (error = geterror(bp))
2483 goto out;
2484
2485 /*
2486 * If a logical drive number is requested, parse the partition table
2487 * and attempt to locate it. Otherwise, proceed immediately to the
2488 * BPB check. findTheDrive(), if successful, returns the disk block
2489 * number where the requested partition starts in "startsec".
2490 */
2491 if (fsp->pcfs_ldrive != 0) {
2492 PC_DPRINTF3(5, "!pcfs: pc_getfattype: using FDISK table on "
2493 "device (%x,%x):%d to find BPB\n",
2494 getmajor(dev), getminor(dev), fsp->pcfs_ldrive);
2495
2496 if (error = findTheDrive(fsp, &bp))
2497 goto out;
2498
2499 ASSERT(fsp->pcfs_dosstart != 0);
2500
2501 brelse(bp);
2502 bp = bread(dev, pc_dbdaddr(fsp, fsp->pcfs_dosstart),
2503 fsp->pcfs_secsize);
2504 if (error = geterror(bp))
2505 goto out;
2506 }
2507
2508 /*
2509 * Validate the BPB and fill in the instance structure.
2510 */
2511 if (!parseBPB(fsp, (uchar_t *)bp->b_un.b_addr, NULL)) {
2512 PC_DPRINTF4(1, "!pcfs: pc_getfattype: No FAT BPB on "
2513 "device (%x.%x):%d, disk LBA %u\n",
2514 getmajor(dev), getminor(dev), fsp->pcfs_ldrive,
2515 (uint_t)pc_dbdaddr(fsp, fsp->pcfs_dosstart));
2516 error = EINVAL;
2517 goto out;
2518 }
2519
2520 ASSERT(fsp->pcfs_fattype != FAT_UNKNOWN);
2521
2522 out:
2523 /*
2524 * Release the buffer used
2525 */
2526 if (bp != NULL)
2527 brelse(bp);
2528 return (error);
2529 }
2530
2531
2532 /*
2533 * Get the file allocation table.
2534 * If there is an old FAT, invalidate it.
2535 */
2536 int
2537 pc_getfat(struct pcfs *fsp)
2538 {
2539 struct buf *bp = NULL;
2540 uchar_t *fatp = NULL;
2541 uchar_t *fat_changemap = NULL;
2542 int error;
2543 int fat_changemapsize;
2544 int flags = 0;
2545 int nfat;
2546 int altfat_mustmatch = 0;
2547 int fatsize = fsp->pcfs_fatsec * fsp->pcfs_secsize;
2548
2549 if (fsp->pcfs_fatp) {
2550 /*
2551 * There is a FAT in core.
2552 * If there are open file pcnodes or we have modified it or
2553 * it hasn't timed out yet use the in core FAT.
2554 * Otherwise invalidate it and get a new one
2555 */
2556 #ifdef notdef
2557 if (fsp->pcfs_frefs ||
2558 (fsp->pcfs_flags & PCFS_FATMOD) ||
2559 (gethrestime_sec() < fsp->pcfs_fattime)) {
2560 return (0);
2561 } else {
2562 mutex_enter(&pcfslock);
2563 pc_invalfat(fsp);
2564 mutex_exit(&pcfslock);
2565 }
2566 #endif /* notdef */
2567 return (0);
2568 }
2569
2570 /*
2571 * Get FAT and check it for validity
2572 */
2573 fatp = kmem_alloc(fatsize, KM_SLEEP);
2574 error = pc_readfat(fsp, fatp);
2575 if (error) {
2576 flags = B_ERROR;
2577 goto out;
2578 }
2579 fat_changemapsize = (fatsize / fsp->pcfs_clsize) + 1;
2580 fat_changemap = kmem_zalloc(fat_changemapsize, KM_SLEEP);
2581 fsp->pcfs_fatp = fatp;
2582 fsp->pcfs_fat_changemapsize = fat_changemapsize;
2583 fsp->pcfs_fat_changemap = fat_changemap;
2584
2585 /*
2586 * The only definite signature check is that the
2587 * media descriptor byte should match the first byte
2588 * of the FAT block.
2589 */
2590 if (fatp[0] != fsp->pcfs_mediadesc) {
2591 cmn_err(CE_NOTE, "!pcfs: FAT signature mismatch, "
2592 "media descriptor %x, FAT[0] lowbyte %x\n",
2593 (uint32_t)fsp->pcfs_mediadesc, (uint32_t)fatp[0]);
2594 cmn_err(CE_NOTE, "!pcfs: Enforcing alternate FAT validation\n");
2595 altfat_mustmatch = 1;
2596 }
2597
2598 /*
2599 * Get alternate FATs and check for consistency
2600 * This is an inlined version of pc_readfat().
2601 * Since we're only comparing FAT and alternate FAT,
2602 * there's no reason to let pc_readfat() copy data out
2603 * of the buf. Instead, compare in-situ, one cluster
2604 * at a time.
2605 */
2606 for (nfat = 1; nfat < fsp->pcfs_numfat; nfat++) {
2607 size_t startsec;
2608 size_t off;
2609
2610 startsec = pc_dbdaddr(fsp,
2611 fsp->pcfs_fatstart + nfat * fsp->pcfs_fatsec);
2612
2613 for (off = 0; off < fatsize; off += fsp->pcfs_clsize) {
2614 daddr_t fatblk = startsec + pc_dbdaddr(fsp,
2615 pc_cltodb(fsp, pc_lblkno(fsp, off)));
2616
2617 bp = bread(fsp->pcfs_xdev, fatblk,
2618 MIN(fsp->pcfs_clsize, fatsize - off));
2619 if (bp->b_flags & (B_ERROR | B_STALE)) {
2620 cmn_err(CE_NOTE,
2621 "!pcfs: alternate FAT #%d (start LBA %p)"
2622 " read error at offset %ld on device"
2623 " (%x.%x):%d",
2624 nfat, (void *)(uintptr_t)startsec, off,
2625 getmajor(fsp->pcfs_xdev),
2626 getminor(fsp->pcfs_xdev),
2627 fsp->pcfs_ldrive);
2628 flags = B_ERROR;
2629 error = EIO;
2630 goto out;
2631 }
2632 bp->b_flags |= B_STALE | B_AGE;
2633 if (bcmp(bp->b_un.b_addr, fatp + off,
2634 MIN(fsp->pcfs_clsize, fatsize - off))) {
2635 cmn_err(CE_NOTE,
2636 "!pcfs: alternate FAT #%d (start LBA %p)"
2637 " corrupted at offset %ld on device"
2638 " (%x.%x):%d",
2639 nfat, (void *)(uintptr_t)startsec, off,
2640 getmajor(fsp->pcfs_xdev),
2641 getminor(fsp->pcfs_xdev),
2642 fsp->pcfs_ldrive);
2643 if (altfat_mustmatch) {
2644 flags = B_ERROR;
2645 error = EIO;
2646 goto out;
2647 }
2648 }
2649 brelse(bp);
2650 bp = NULL; /* prevent double release */
2651 }
2652 }
2653
2654 fsp->pcfs_fattime = gethrestime_sec() + PCFS_DISKTIMEOUT;
2655 fsp->pcfs_fatjustread = 1;
2656
2657 /*
2658 * Retrieve FAT32 fsinfo sector.
2659 * A failure to read this is not fatal to accessing the volume.
2660 * It simply means operations that count or search free blocks
2661 * will have to do a full FAT walk, vs. a possibly quicker lookup
2662 * of the summary information.
2663 * Hence, we log a message but return success overall after this point.
2664 */
2665 if (IS_FAT32(fsp) && (fsp->pcfs_flags & PCFS_FSINFO_OK)) {
2666 struct fat_od_fsi *fsinfo_disk;
2667
2668 bp = bread(fsp->pcfs_xdev,
2669 pc_dbdaddr(fsp, fsp->pcfs_fsistart), fsp->pcfs_secsize);
2670 fsinfo_disk = (struct fat_od_fsi *)bp->b_un.b_addr;
2671 if (bp->b_flags & (B_ERROR | B_STALE) ||
2672 !FSISIG_OK(fsinfo_disk)) {
2673 cmn_err(CE_NOTE,
2674 "!pcfs: error reading fat32 fsinfo from "
2675 "device (%x.%x):%d, block %lld",
2676 getmajor(fsp->pcfs_xdev), getminor(fsp->pcfs_xdev),
2677 fsp->pcfs_ldrive,
2678 (long long)pc_dbdaddr(fsp, fsp->pcfs_fsistart));
2679 fsp->pcfs_flags &= ~PCFS_FSINFO_OK;
2680 fsp->pcfs_fsinfo.fs_free_clusters = FSINFO_UNKNOWN;
2681 fsp->pcfs_fsinfo.fs_next_free = FSINFO_UNKNOWN;
2682 } else {
2683 bp->b_flags |= B_STALE | B_AGE;
2684 fsinfo_disk = (fat_od_fsi_t *)(bp->b_un.b_addr);
2685 fsp->pcfs_fsinfo.fs_free_clusters =
2686 LE_32(fsinfo_disk->fsi_incore.fs_free_clusters);
2687 fsp->pcfs_fsinfo.fs_next_free =
2688 LE_32(fsinfo_disk->fsi_incore.fs_next_free);
2689 }
2690 brelse(bp);
2691 bp = NULL;
2692 }
2693
2694 if (pc_validcl(fsp, (pc_cluster32_t)fsp->pcfs_fsinfo.fs_next_free))
2695 fsp->pcfs_nxfrecls = fsp->pcfs_fsinfo.fs_next_free;
2696 else
2697 fsp->pcfs_nxfrecls = PCF_FIRSTCLUSTER;
2698
2699 return (0);
2700
2701 out:
2702 cmn_err(CE_NOTE, "!pcfs: illegal disk format");
2703 if (bp)
2704 brelse(bp);
2705 if (fatp)
2706 kmem_free(fatp, fatsize);
2707 if (fat_changemap)
2708 kmem_free(fat_changemap, fat_changemapsize);
2709
2710 if (flags) {
2711 pc_mark_irrecov(fsp);
2712 }
2713 return (error);
2714 }