Print this page
7127 remove -Wno-missing-braces from Makefile.uts
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/uts/common/fs/dcfs/dc_vnops.c
+++ new/usr/src/uts/common/fs/dcfs/dc_vnops.c
1 1
2 2 /*
3 3 * CDDL HEADER START
4 4 *
5 5 * The contents of this file are subject to the terms of the
6 6 * Common Development and Distribution License (the "License").
7 7 * You may not use this file except in compliance with the License.
8 8 *
9 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 10 * or http://www.opensolaris.org/os/licensing.
11 11 * See the License for the specific language governing permissions
12 12 * and limitations under the License.
13 13 *
14 14 * When distributing Covered Code, include this CDDL HEADER in each
15 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 16 * If applicable, add the following below this CDDL HEADER, with the
17 17 * fields enclosed by brackets "[]" replaced with your own identifying
18 18 * information: Portions Copyright [yyyy] [name of copyright owner]
19 19 *
20 20 * CDDL HEADER END
21 21 */
22 22 /*
23 23 * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
24 24 */
25 25
26 26 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
27 27 /* All Rights Reserved */
28 28
29 29 /*
30 30 * University Copyright- Copyright (c) 1982, 1986, 1988
31 31 * The Regents of the University of California
32 32 * All Rights Reserved
33 33 *
34 34 * University Acknowledgment- Portions of this document are derived from
35 35 * software developed by the University of California, Berkeley, and its
36 36 * contributors.
37 37 */
38 38
39 39 #include <sys/types.h>
40 40 #include <sys/thread.h>
41 41 #include <sys/t_lock.h>
42 42 #include <sys/param.h>
43 43 #include <sys/systm.h>
44 44 #include <sys/bitmap.h>
45 45 #include <sys/buf.h>
46 46 #include <sys/cmn_err.h>
47 47 #include <sys/conf.h>
48 48 #include <sys/ddi.h>
49 49 #include <sys/debug.h>
50 50 #include <sys/errno.h>
51 51 #include <sys/time.h>
52 52 #include <sys/fcntl.h>
53 53 #include <sys/flock.h>
54 54 #include <sys/file.h>
55 55 #include <sys/kmem.h>
56 56 #include <sys/mman.h>
57 57 #include <sys/vmsystm.h>
58 58 #include <sys/open.h>
59 59 #include <sys/swap.h>
60 60 #include <sys/sysmacros.h>
61 61 #include <sys/uio.h>
62 62 #include <sys/vfs.h>
63 63 #include <sys/vfs_opreg.h>
64 64 #include <sys/vnode.h>
65 65 #include <sys/stat.h>
66 66 #include <sys/poll.h>
67 67 #include <sys/zmod.h>
68 68 #include <sys/fs/decomp.h>
69 69
70 70 #include <vm/hat.h>
71 71 #include <vm/as.h>
72 72 #include <vm/page.h>
73 73 #include <vm/pvn.h>
74 74 #include <vm/seg_vn.h>
75 75 #include <vm/seg_kmem.h>
76 76 #include <vm/seg_map.h>
77 77
78 78 #include <fs/fs_subr.h>
79 79
80 80 /*
81 81 * dcfs - A filesystem for automatic decompressing of fiocompressed files
82 82 *
83 83 * This filesystem is a layered filesystem that sits on top of a normal
84 84 * persistent filesystem and provides automatic decompression of files
85 85 * that have been previously compressed and stored on the host file system.
86 86 * This is a pseudo filesystem in that it does not persist data, rather it
87 87 * intercepts file lookup requests on the host filesystem and provides
88 88 * transparent decompression of those files. Currently the only supported
89 89 * host filesystem is ufs.
90 90 *
91 91 * A file is compressed via a userland utility (currently cmd/boot/fiocompress)
92 92 * and marked by fiocompress as a compressed file via a flag in the on-disk
93 93 * inode (set via a ufs ioctl() - see `ufs_vnops.c`ufs_ioctl()`_FIO_COMPRESSED
94 94 * ufs_lookup checks for this flag and if set, passes control to decompvp
95 95 * a function defined in this (dcfs) filesystem. decomvp uncompresses the file
96 96 * and returns a dcfs vnode to the VFS layer.
97 97 *
98 98 * dcfs is layered on top of ufs and passes requests involving persistence
99 99 * to the underlying ufs filesystem. The compressed files currently cannot be
100 100 * written to.
101 101 */
102 102
103 103
104 104 /*
105 105 * Define data structures within this file.
106 106 */
107 107 #define DCSHFT 5
108 108 #define DCTABLESIZE 16
109 109
110 110 #if ((DCTABLESIZE & (DCTABLESIZE - 1)) == 0)
111 111 #define DCHASH(vp) (((uintptr_t)(vp) >> DCSHFT) & (DCTABLESIZE - 1))
112 112 #else
113 113 #define DCHASH(vp) (((uintptr_t)(vp) >> DCSHFT) % DTABLESIZEC)
114 114 #endif
115 115
116 116 #define DCLRUSIZE 16
117 117
118 118 #define DCCACHESIZE 4
119 119
120 120 #define rounddown(x, y) ((x) & ~((y) - 1))
121 121
122 122 struct dcnode *dctable[DCTABLESIZE];
123 123
124 124 struct dcnode *dclru;
125 125 static int dclru_len;
126 126
127 127 kmutex_t dctable_lock;
128 128
129 129 dev_t dcdev;
130 130 struct vfs dc_vfs;
131 131
132 132 struct kmem_cache *dcnode_cache;
133 133 struct kmem_cache *dcbuf_cache[DCCACHESIZE];
134 134
135 135 kmutex_t dccache_lock;
136 136
137 137 static int dcinit(int, char *);
138 138
139 139 static struct dcnode *dcnode_alloc(void);
140 140 static void dcnode_free(struct dcnode *);
141 141 static void dcnode_recycle(struct dcnode *);
142 142
143 143 static void dcinsert(struct dcnode *);
144 144 static void dcdelete(struct dcnode *);
145 145 static struct dcnode *dcfind(struct vnode *);
146 146 static void dclru_add(struct dcnode *);
147 147 static void dclru_sub(struct dcnode *);
148 148
149 149
150 150 /*
151 151 * This is the loadable module wrapper.
152 152 */
153 153 #include <sys/modctl.h>
154 154
155 155 struct vfsops *dc_vfsops;
156 156
157 157 static vfsdef_t vfw = {
158 158 VFSDEF_VERSION,
159 159 "dcfs",
160 160 dcinit,
161 161 VSW_ZMOUNT,
162 162 NULL
163 163 };
164 164
↓ open down ↓ |
164 lines elided |
↑ open up ↑ |
165 165 /*
166 166 * Module linkage information for the kernel.
167 167 */
168 168 extern struct mod_ops mod_fsops;
169 169
170 170 static struct modlfs modlfs = {
171 171 &mod_fsops, "compressed filesystem", &vfw
172 172 };
173 173
174 174 static struct modlinkage modlinkage = {
175 - MODREV_1, (void *)&modlfs, NULL
175 + MODREV_1, { (void *)&modlfs, NULL }
176 176 };
177 177
178 178 int
179 179 _init()
180 180 {
181 181 return (mod_install(&modlinkage));
182 182 }
183 183
184 184 int
185 185 _info(struct modinfo *modinfop)
186 186 {
187 187 return (mod_info(&modlinkage, modinfop));
188 188 }
189 189
190 190
191 191 static int dc_open(struct vnode **, int, struct cred *, caller_context_t *);
192 192 static int dc_close(struct vnode *, int, int, offset_t,
193 193 struct cred *, caller_context_t *);
194 194 static int dc_read(struct vnode *, struct uio *, int, struct cred *,
195 195 struct caller_context *);
196 196 static int dc_getattr(struct vnode *, struct vattr *, int,
197 197 struct cred *, caller_context_t *);
198 198 static int dc_setattr(struct vnode *, struct vattr *, int, struct cred *,
199 199 struct caller_context *);
200 200 static int dc_access(struct vnode *, int, int,
201 201 struct cred *, caller_context_t *);
202 202 static int dc_fsync(struct vnode *, int, struct cred *, caller_context_t *);
203 203 static void dc_inactive(struct vnode *, struct cred *, caller_context_t *);
204 204 static int dc_fid(struct vnode *, struct fid *, caller_context_t *);
205 205 static int dc_seek(struct vnode *, offset_t, offset_t *, caller_context_t *);
206 206 static int dc_frlock(struct vnode *, int, struct flock64 *, int, offset_t,
207 207 struct flk_callback *, struct cred *, caller_context_t *);
208 208 static int dc_realvp(struct vnode *, struct vnode **, caller_context_t *);
209 209 static int dc_getpage(struct vnode *, offset_t, size_t, uint_t *,
210 210 struct page **, size_t, struct seg *, caddr_t, enum seg_rw,
211 211 struct cred *, caller_context_t *);
212 212 static int dc_putpage(struct vnode *, offset_t, size_t, int,
213 213 struct cred *, caller_context_t *);
↓ open down ↓ |
28 lines elided |
↑ open up ↑ |
214 214 static int dc_map(struct vnode *, offset_t, struct as *, caddr_t *, size_t,
215 215 uchar_t, uchar_t, uint_t, struct cred *, caller_context_t *);
216 216 static int dc_addmap(struct vnode *, offset_t, struct as *, caddr_t, size_t,
217 217 uchar_t, uchar_t, uint_t, struct cred *, caller_context_t *);
218 218 static int dc_delmap(struct vnode *, offset_t, struct as *, caddr_t, size_t,
219 219 uint_t, uint_t, uint_t, struct cred *, caller_context_t *);
220 220
221 221 struct vnodeops *dc_vnodeops;
222 222
223 223 const fs_operation_def_t dc_vnodeops_template[] = {
224 - VOPNAME_OPEN, { .vop_open = dc_open },
225 - VOPNAME_CLOSE, { .vop_close = dc_close },
226 - VOPNAME_READ, { .vop_read = dc_read },
227 - VOPNAME_GETATTR, { .vop_getattr = dc_getattr },
228 - VOPNAME_SETATTR, { .vop_setattr = dc_setattr },
229 - VOPNAME_ACCESS, { .vop_access = dc_access },
230 - VOPNAME_FSYNC, { .vop_fsync = dc_fsync },
231 - VOPNAME_INACTIVE, { .vop_inactive = dc_inactive },
232 - VOPNAME_FID, { .vop_fid = dc_fid },
233 - VOPNAME_SEEK, { .vop_seek = dc_seek },
234 - VOPNAME_FRLOCK, { .vop_frlock = dc_frlock },
235 - VOPNAME_REALVP, { .vop_realvp = dc_realvp },
236 - VOPNAME_GETPAGE, { .vop_getpage = dc_getpage },
237 - VOPNAME_PUTPAGE, { .vop_putpage = dc_putpage },
238 - VOPNAME_MAP, { .vop_map = dc_map },
239 - VOPNAME_ADDMAP, { .vop_addmap = dc_addmap },
240 - VOPNAME_DELMAP, { .vop_delmap = dc_delmap },
241 - NULL, NULL
224 + { VOPNAME_OPEN, { .vop_open = dc_open } },
225 + { VOPNAME_CLOSE, { .vop_close = dc_close } },
226 + { VOPNAME_READ, { .vop_read = dc_read } },
227 + { VOPNAME_GETATTR, { .vop_getattr = dc_getattr } },
228 + { VOPNAME_SETATTR, { .vop_setattr = dc_setattr } },
229 + { VOPNAME_ACCESS, { .vop_access = dc_access } },
230 + { VOPNAME_FSYNC, { .vop_fsync = dc_fsync } },
231 + { VOPNAME_INACTIVE, { .vop_inactive = dc_inactive } },
232 + { VOPNAME_FID, { .vop_fid = dc_fid } },
233 + { VOPNAME_SEEK, { .vop_seek = dc_seek } },
234 + { VOPNAME_FRLOCK, { .vop_frlock = dc_frlock } },
235 + { VOPNAME_REALVP, { .vop_realvp = dc_realvp } },
236 + { VOPNAME_GETPAGE, { .vop_getpage = dc_getpage } },
237 + { VOPNAME_PUTPAGE, { .vop_putpage = dc_putpage } },
238 + { VOPNAME_MAP, { .vop_map = dc_map } },
239 + { VOPNAME_ADDMAP, { .vop_addmap = dc_addmap } },
240 + { VOPNAME_DELMAP, { .vop_delmap = dc_delmap } },
241 + { NULL, { NULL } }
242 242 };
243 243
244 244 /*ARGSUSED*/
245 245 static int
246 246 dc_open(struct vnode **vpp, int flag, struct cred *cr, caller_context_t *ctp)
247 247 {
248 248 return (0);
249 249 }
250 250
251 251 /*ARGSUSED*/
252 252 static int
253 253 dc_close(struct vnode *vp, int flag, int count, offset_t off,
254 254 struct cred *cr, caller_context_t *ctp)
255 255 {
256 256 (void) cleanlocks(vp, ttoproc(curthread)->p_pid, 0);
257 257 cleanshares(vp, ttoproc(curthread)->p_pid);
258 258 return (0);
259 259 }
260 260
261 261 /*ARGSUSED*/
262 262 static int
263 263 dc_read(struct vnode *vp, struct uio *uiop, int ioflag, struct cred *cr,
264 264 struct caller_context *ct)
265 265 {
266 266 struct dcnode *dp = VTODC(vp);
267 267 size_t rdsize = MAX(MAXBSIZE, dp->dc_hdr->ch_blksize);
268 268 size_t fsize = dp->dc_hdr->ch_fsize;
269 269 int error;
270 270
271 271 /*
272 272 * Loop through file with segmap, decompression will occur
273 273 * in dc_getapage
274 274 */
275 275 do {
276 276 caddr_t base;
277 277 size_t n;
278 278 offset_t mapon;
279 279
280 280 /*
281 281 * read to end of block or file
282 282 */
283 283 mapon = uiop->uio_loffset & (rdsize - 1);
284 284 n = MIN(rdsize - mapon, uiop->uio_resid);
285 285 n = MIN(n, fsize - uiop->uio_loffset);
286 286 if (n == 0)
287 287 return (0); /* at EOF */
288 288
289 289 base = segmap_getmapflt(segkmap, vp, uiop->uio_loffset, n, 1,
290 290 S_READ);
291 291 error = uiomove(base + mapon, n, UIO_READ, uiop);
292 292 if (!error) {
293 293 uint_t flags;
294 294
295 295 if (n + mapon == rdsize || uiop->uio_loffset == fsize)
296 296 flags = SM_DONTNEED;
297 297 else
298 298 flags = 0;
299 299 error = segmap_release(segkmap, base, flags);
300 300 } else
301 301 (void) segmap_release(segkmap, base, 0);
302 302 } while (!error && uiop->uio_resid);
303 303
304 304 return (error);
305 305 }
306 306
307 307 static int
308 308 dc_getattr(struct vnode *vp, struct vattr *vap, int flags,
309 309 cred_t *cred, caller_context_t *ctp)
310 310 {
311 311 struct dcnode *dp = VTODC(vp);
312 312 struct vnode *subvp = dp->dc_subvp;
313 313 int error;
314 314
315 315 error = VOP_GETATTR(subvp, vap, flags, cred, ctp);
316 316
317 317 /* substitute uncompressed size */
318 318 vap->va_size = dp->dc_hdr->ch_fsize;
319 319 return (error);
320 320 }
321 321
322 322 static int
323 323 dc_setattr(struct vnode *vp, struct vattr *vap, int flags, cred_t *cred,
324 324 caller_context_t *ctp)
325 325 {
326 326 struct dcnode *dp = VTODC(vp);
327 327 struct vnode *subvp = dp->dc_subvp;
328 328
329 329 return (VOP_SETATTR(subvp, vap, flags, cred, ctp));
330 330 }
331 331
332 332 static int
333 333 dc_access(struct vnode *vp, int mode, int flags,
334 334 cred_t *cred, caller_context_t *ctp)
335 335 {
336 336 struct dcnode *dp = VTODC(vp);
337 337 struct vnode *subvp = dp->dc_subvp;
338 338
339 339 return (VOP_ACCESS(subvp, mode, flags, cred, ctp));
340 340 }
341 341
342 342 /*ARGSUSED*/
343 343 static int
344 344 dc_fsync(vnode_t *vp, int syncflag, cred_t *cred, caller_context_t *ctp)
345 345 {
346 346 return (0);
347 347 }
348 348
349 349 /*ARGSUSED*/
350 350 static void
351 351 dc_inactive(struct vnode *vp, cred_t *cr, caller_context_t *ctp)
352 352 {
353 353 struct dcnode *dp = VTODC(vp);
354 354
355 355 mutex_enter(&dctable_lock);
356 356 mutex_enter(&vp->v_lock);
357 357 ASSERT(vp->v_count >= 1);
358 358 if (--vp->v_count != 0) {
359 359 /*
360 360 * Somebody accessed the dcnode before we got a chance to
361 361 * remove it. They will remove it when they do a vn_rele.
362 362 */
363 363 mutex_exit(&vp->v_lock);
364 364 mutex_exit(&dctable_lock);
365 365 return;
366 366 }
367 367 mutex_exit(&vp->v_lock);
368 368
369 369 dcnode_free(dp);
370 370
371 371 mutex_exit(&dctable_lock);
372 372 }
373 373
374 374 static int
375 375 dc_fid(struct vnode *vp, struct fid *fidp, caller_context_t *ctp)
376 376 {
377 377 struct dcnode *dp = VTODC(vp);
378 378 struct vnode *subvp = dp->dc_subvp;
379 379
380 380 return (VOP_FID(subvp, fidp, ctp));
381 381 }
382 382
383 383 static int
384 384 dc_seek(struct vnode *vp, offset_t oof, offset_t *noffp, caller_context_t *ctp)
385 385 {
386 386 struct dcnode *dp = VTODC(vp);
387 387 struct vnode *subvp = dp->dc_subvp;
388 388
389 389 return (VOP_SEEK(subvp, oof, noffp, ctp));
390 390 }
391 391
392 392 static int
393 393 dc_frlock(struct vnode *vp, int cmd, struct flock64 *bfp, int flag,
394 394 offset_t offset, struct flk_callback *flk_cbp,
395 395 cred_t *cr, caller_context_t *ctp)
396 396 {
397 397 struct dcnode *dp = VTODC(vp);
398 398 int error;
399 399 struct vattr vattr;
400 400
401 401 /*
402 402 * If file is being mapped, disallow frlock.
403 403 */
404 404 vattr.va_mask = AT_MODE;
405 405 if (error = VOP_GETATTR(dp->dc_subvp, &vattr, 0, cr, ctp))
406 406 return (error);
407 407 if (dp->dc_mapcnt > 0 && MANDLOCK(vp, vattr.va_mode))
408 408 return (EAGAIN);
409 409
410 410 return (fs_frlock(vp, cmd, bfp, flag, offset, flk_cbp, cr, ctp));
411 411 }
412 412
413 413 /*ARGSUSED*/
414 414 static int
415 415 dc_getblock_miss(struct vnode *vp, offset_t off, size_t len, struct page **ppp,
416 416 struct seg *seg, caddr_t addr, enum seg_rw rw, struct cred *cr)
417 417 {
418 418 struct dcnode *dp = VTODC(vp);
419 419 struct comphdr *hdr = dp->dc_hdr;
420 420 struct page *pp;
421 421 struct buf *bp;
422 422 caddr_t saddr;
423 423 off_t cblkno;
424 424 size_t rdoff, rdsize, dsize;
425 425 long xlen;
426 426 int error, zerr;
427 427
428 428 ASSERT(len == hdr->ch_blksize);
429 429 /*
430 430 * Get destination pages and make them addressable
431 431 */
432 432 pp = page_create_va(vp, off, len, PG_WAIT, seg, addr);
433 433 bp = pageio_setup(pp, len, vp, B_READ);
434 434 bp_mapin(bp);
435 435
436 436 /*
437 437 * read compressed data from subordinate vnode
438 438 */
439 439 saddr = kmem_cache_alloc(dp->dc_bufcache, KM_SLEEP);
440 440 cblkno = off / len;
441 441 rdoff = hdr->ch_blkmap[cblkno];
442 442 rdsize = hdr->ch_blkmap[cblkno + 1] - rdoff;
443 443 error = vn_rdwr(UIO_READ, dp->dc_subvp, saddr, rdsize, rdoff,
444 444 UIO_SYSSPACE, 0, 0, cr, NULL);
445 445 if (error)
446 446 goto cleanup;
447 447
448 448 /*
449 449 * Uncompress
450 450 */
451 451 dsize = len;
452 452 zerr = z_uncompress(bp->b_un.b_addr, &dsize, saddr, dp->dc_zmax);
453 453 if (zerr != Z_OK) {
454 454 error = EIO;
455 455 goto cleanup;
456 456 }
457 457
458 458 /*
459 459 * Handle EOF
460 460 */
461 461 xlen = hdr->ch_fsize - off;
462 462 if (xlen < len) {
463 463 bzero(bp->b_un.b_addr + xlen, len - xlen);
464 464 if (dsize != xlen)
465 465 error = EIO;
466 466 } else if (dsize != len)
467 467 error = EIO;
468 468
469 469 /*
470 470 * Clean up
471 471 */
472 472 cleanup:
473 473 kmem_cache_free(dp->dc_bufcache, saddr);
474 474 pageio_done(bp);
475 475 *ppp = pp;
476 476 return (error);
477 477 }
478 478
479 479 static int
480 480 dc_getblock(struct vnode *vp, offset_t off, size_t len, struct page **ppp,
481 481 struct seg *seg, caddr_t addr, enum seg_rw rw, struct cred *cr)
482 482 {
483 483 struct page *pp, *plist = NULL;
484 484 offset_t pgoff;
485 485 int rdblk;
486 486
487 487 /*
488 488 * pvn_read_kluster() doesn't quite do what we want, since it
489 489 * thinks sub block reads are ok. Here we always decompress
490 490 * a full block.
491 491 */
492 492
493 493 /*
494 494 * Check page cache
495 495 */
496 496 rdblk = 0;
497 497 for (pgoff = off; pgoff < off + len; pgoff += PAGESIZE) {
498 498 pp = page_lookup(vp, pgoff, SE_EXCL);
499 499 if (pp == NULL) {
500 500 rdblk = 1;
501 501 break;
502 502 }
503 503 page_io_lock(pp);
504 504 page_add(&plist, pp);
505 505 plist = plist->p_next;
506 506 }
507 507 if (!rdblk) {
508 508 *ppp = plist;
509 509 return (0); /* all pages in cache */
510 510 }
511 511
512 512 /*
513 513 * Undo any locks so getblock_miss has an open field
514 514 */
515 515 if (plist != NULL)
516 516 pvn_io_done(plist);
517 517
518 518 return (dc_getblock_miss(vp, off, len, ppp, seg, addr, rw, cr));
519 519 }
520 520
521 521 static int
522 522 dc_realvp(vnode_t *vp, vnode_t **vpp, caller_context_t *ct)
523 523 {
524 524 struct vnode *rvp;
525 525
526 526 vp = VTODC(vp)->dc_subvp;
527 527 if (VOP_REALVP(vp, &rvp, ct) == 0)
528 528 vp = rvp;
529 529 *vpp = vp;
530 530 return (0);
531 531 }
532 532
533 533 /*ARGSUSED10*/
534 534 static int
535 535 dc_getpage(struct vnode *vp, offset_t off, size_t len, uint_t *protp,
536 536 struct page *pl[], size_t plsz, struct seg *seg, caddr_t addr,
537 537 enum seg_rw rw, struct cred *cr, caller_context_t *ctp)
538 538 {
539 539 struct dcnode *dp = VTODC(vp);
540 540 struct comphdr *hdr = dp->dc_hdr;
541 541 struct page *pp, *plist = NULL;
542 542 caddr_t vp_baddr;
543 543 offset_t vp_boff, vp_bend;
544 544 size_t bsize = hdr->ch_blksize;
545 545 int nblks, error;
546 546
547 547 /* does not support write */
548 548 if (rw == S_WRITE) {
549 549 panic("write attempt on compressed file");
550 550 /*NOTREACHED*/
551 551 }
552 552
553 553 if (protp)
554 554 *protp = PROT_ALL;
555 555 /*
556 556 * We don't support asynchronous operation at the moment, so
557 557 * just pretend we did it. If the pages are ever actually
558 558 * needed, they'll get brought in then.
559 559 */
560 560 if (pl == NULL)
561 561 return (0);
562 562
563 563 /*
564 564 * Calc block start and end offsets
565 565 */
566 566 vp_boff = rounddown(off, bsize);
567 567 vp_bend = roundup(off + len, bsize);
568 568 vp_baddr = (caddr_t)rounddown((uintptr_t)addr, bsize);
569 569
570 570 nblks = (vp_bend - vp_boff) / bsize;
571 571 while (nblks--) {
572 572 error = dc_getblock(vp, vp_boff, bsize, &pp, seg, vp_baddr,
573 573 rw, cr);
574 574 page_list_concat(&plist, &pp);
575 575 vp_boff += bsize;
576 576 vp_baddr += bsize;
577 577 }
578 578 if (!error)
579 579 pvn_plist_init(plist, pl, plsz, off, len, rw);
580 580 else
581 581 pvn_read_done(plist, B_ERROR);
582 582 return (error);
583 583 }
584 584
585 585 /*
586 586 * This function should never be called. We need to have it to pass
587 587 * it as an argument to other functions.
588 588 */
589 589 /*ARGSUSED*/
590 590 static int
591 591 dc_putapage(struct vnode *vp, struct page *pp, u_offset_t *offp, size_t *lenp,
592 592 int flags, struct cred *cr)
593 593 {
594 594 /* should never happen */
595 595 cmn_err(CE_PANIC, "dcfs: dc_putapage: dirty page");
596 596 /*NOTREACHED*/
597 597 return (0);
598 598 }
599 599
600 600
601 601 /*
602 602 * The only flags we support are B_INVAL, B_FREE and B_DONTNEED.
603 603 * B_INVAL is set by:
604 604 *
605 605 * 1) the MC_SYNC command of memcntl(2) to support the MS_INVALIDATE flag.
606 606 * 2) the MC_ADVISE command of memcntl(2) with the MADV_DONTNEED advice
607 607 * which translates to an MC_SYNC with the MS_INVALIDATE flag.
608 608 *
609 609 * The B_FREE (as well as the B_DONTNEED) flag is set when the
610 610 * MADV_SEQUENTIAL advice has been used. VOP_PUTPAGE is invoked
611 611 * from SEGVN to release pages behind a pagefault.
612 612 */
613 613 /*ARGSUSED5*/
614 614 static int
615 615 dc_putpage(struct vnode *vp, offset_t off, size_t len, int flags,
616 616 struct cred *cr, caller_context_t *ctp)
617 617 {
618 618 int error = 0;
619 619
620 620 if (vp->v_count == 0) {
621 621 panic("dcfs_putpage: bad v_count");
622 622 /*NOTREACHED*/
623 623 }
624 624
625 625 if (vp->v_flag & VNOMAP)
626 626 return (ENOSYS);
627 627
628 628 if (!vn_has_cached_data(vp)) /* no pages mapped */
629 629 return (0);
630 630
631 631 if (len == 0) /* from 'off' to EOF */
632 632 error = pvn_vplist_dirty(vp, off, dc_putapage, flags, cr);
633 633 else {
634 634 offset_t io_off;
635 635 se_t se = (flags & (B_INVAL | B_FREE)) ? SE_EXCL : SE_SHARED;
636 636
637 637 for (io_off = off; io_off < off + len; io_off += PAGESIZE) {
638 638 page_t *pp;
639 639
640 640 /*
641 641 * We insist on getting the page only if we are
642 642 * about to invalidate, free or write it and
643 643 * the B_ASYNC flag is not set.
644 644 */
645 645 if ((flags & B_INVAL) || ((flags & B_ASYNC) == 0))
646 646 pp = page_lookup(vp, io_off, se);
647 647 else
648 648 pp = page_lookup_nowait(vp, io_off, se);
649 649
650 650 if (pp == NULL)
651 651 continue;
652 652 /*
653 653 * Normally pvn_getdirty() should return 0, which
654 654 * impies that it has done the job for us.
655 655 * The shouldn't-happen scenario is when it returns 1.
656 656 * This means that the page has been modified and
657 657 * needs to be put back.
658 658 * Since we can't write to a dcfs compressed file,
659 659 * we fake a failed I/O and force pvn_write_done()
660 660 * to destroy the page.
661 661 */
662 662 if (pvn_getdirty(pp, flags) == 1) {
663 663 cmn_err(CE_NOTE, "dc_putpage: dirty page");
664 664 pvn_write_done(pp, flags |
665 665 B_ERROR | B_WRITE | B_INVAL | B_FORCE);
666 666 }
667 667 }
668 668 }
669 669 return (error);
670 670 }
671 671
672 672 static int
673 673 dc_map(struct vnode *vp, offset_t off, struct as *as, caddr_t *addrp,
674 674 size_t len, uchar_t prot, uchar_t maxprot, uint_t flags,
675 675 struct cred *cred, caller_context_t *ctp)
676 676 {
677 677 struct vattr vattr;
678 678 struct segvn_crargs vn_a;
679 679 int error;
680 680
681 681 if (vp->v_flag & VNOMAP)
682 682 return (ENOSYS);
683 683
684 684 if (off < (offset_t)0 || (offset_t)(off + len) < (offset_t)0)
685 685 return (ENXIO);
686 686
687 687 /*
688 688 * If file is being locked, disallow mapping.
689 689 */
690 690 if (error = VOP_GETATTR(VTODC(vp)->dc_subvp, &vattr, 0, cred, ctp))
691 691 return (error);
692 692 if (vn_has_mandatory_locks(vp, vattr.va_mode))
693 693 return (EAGAIN);
694 694
695 695 as_rangelock(as);
696 696
697 697 if ((flags & MAP_FIXED) == 0) {
698 698 map_addr(addrp, len, off, 1, flags);
699 699 if (*addrp == NULL) {
700 700 as_rangeunlock(as);
701 701 return (ENOMEM);
702 702 }
703 703 } else {
704 704 /*
705 705 * User specified address - blow away any previous mappings
706 706 */
707 707 (void) as_unmap(as, *addrp, len);
708 708 }
709 709
710 710 vn_a.vp = vp;
711 711 vn_a.offset = off;
712 712 vn_a.type = flags & MAP_TYPE;
713 713 vn_a.prot = prot;
714 714 vn_a.maxprot = maxprot;
715 715 vn_a.flags = flags & ~MAP_TYPE;
716 716 vn_a.cred = cred;
717 717 vn_a.amp = NULL;
718 718 vn_a.szc = 0;
719 719 vn_a.lgrp_mem_policy_flags = 0;
720 720
721 721 error = as_map(as, *addrp, len, segvn_create, &vn_a);
722 722 as_rangeunlock(as);
723 723 return (error);
724 724 }
725 725
726 726 /*ARGSUSED*/
727 727 static int
728 728 dc_addmap(struct vnode *vp, offset_t off, struct as *as, caddr_t addr,
729 729 size_t len, uchar_t prot, uchar_t maxprot, uint_t flags,
730 730 struct cred *cr, caller_context_t *ctp)
731 731 {
732 732 struct dcnode *dp;
733 733
734 734 if (vp->v_flag & VNOMAP)
735 735 return (ENOSYS);
736 736
737 737 dp = VTODC(vp);
738 738 mutex_enter(&dp->dc_lock);
739 739 dp->dc_mapcnt += btopr(len);
740 740 mutex_exit(&dp->dc_lock);
741 741 return (0);
742 742 }
743 743
744 744 /*ARGSUSED*/
745 745 static int
746 746 dc_delmap(struct vnode *vp, offset_t off, struct as *as, caddr_t addr,
747 747 size_t len, uint_t prot, uint_t maxprot, uint_t flags,
748 748 struct cred *cr, caller_context_t *ctp)
749 749 {
750 750 struct dcnode *dp;
751 751
752 752 if (vp->v_flag & VNOMAP)
753 753 return (ENOSYS);
754 754
755 755 dp = VTODC(vp);
756 756 mutex_enter(&dp->dc_lock);
757 757 dp->dc_mapcnt -= btopr(len);
758 758 ASSERT(dp->dc_mapcnt >= 0);
759 759 mutex_exit(&dp->dc_lock);
760 760 return (0);
761 761 }
762 762
763 763 /*
764 764 * Constructor/destructor routines for dcnodes
765 765 */
766 766 /*ARGSUSED1*/
767 767 static int
768 768 dcnode_constructor(void *buf, void *cdrarg, int kmflags)
769 769 {
770 770 struct dcnode *dp = buf;
771 771 struct vnode *vp;
772 772
773 773 vp = dp->dc_vp = vn_alloc(kmflags);
774 774 if (vp == NULL) {
775 775 return (-1);
776 776 }
777 777 vp->v_data = dp;
778 778 vp->v_type = VREG;
779 779 vp->v_flag = VNOSWAP;
780 780 vp->v_vfsp = &dc_vfs;
781 781 vn_setops(vp, dc_vnodeops);
782 782 vn_exists(vp);
783 783
784 784 mutex_init(&dp->dc_lock, NULL, MUTEX_DEFAULT, NULL);
785 785 dp->dc_mapcnt = 0;
786 786 dp->dc_lrunext = dp->dc_lruprev = NULL;
787 787 dp->dc_hdr = NULL;
788 788 dp->dc_subvp = NULL;
789 789 return (0);
790 790 }
791 791
792 792 /*ARGSUSED*/
793 793 static void
794 794 dcnode_destructor(void *buf, void *cdrarg)
795 795 {
796 796 struct dcnode *dp = buf;
797 797 struct vnode *vp = DCTOV(dp);
798 798
799 799 mutex_destroy(&dp->dc_lock);
800 800
801 801 VERIFY(dp->dc_hdr == NULL);
802 802 VERIFY(dp->dc_subvp == NULL);
803 803 vn_invalid(vp);
804 804 vn_free(vp);
805 805 }
806 806
807 807 static struct dcnode *
808 808 dcnode_alloc(void)
809 809 {
810 810 struct dcnode *dp;
811 811
812 812 /*
813 813 * If the free list is above DCLRUSIZE
814 814 * re-use one from it
815 815 */
816 816 mutex_enter(&dctable_lock);
817 817 if (dclru_len < DCLRUSIZE) {
818 818 mutex_exit(&dctable_lock);
819 819 dp = kmem_cache_alloc(dcnode_cache, KM_SLEEP);
820 820 } else {
821 821 ASSERT(dclru != NULL);
822 822 dp = dclru;
823 823 dclru_sub(dp);
824 824 dcdelete(dp);
825 825 mutex_exit(&dctable_lock);
826 826 dcnode_recycle(dp);
827 827 }
828 828 return (dp);
829 829 }
830 830
831 831 static void
832 832 dcnode_free(struct dcnode *dp)
833 833 {
834 834 struct vnode *vp = DCTOV(dp);
835 835
836 836 ASSERT(MUTEX_HELD(&dctable_lock));
837 837
838 838 /*
839 839 * If no cached pages, no need to put it on lru
840 840 */
841 841 if (!vn_has_cached_data(vp)) {
842 842 dcdelete(dp);
843 843 dcnode_recycle(dp);
844 844 kmem_cache_free(dcnode_cache, dp);
845 845 return;
846 846 }
847 847
848 848 /*
849 849 * Add to lru, if it's over the limit, free from head
850 850 */
851 851 dclru_add(dp);
852 852 if (dclru_len > DCLRUSIZE) {
853 853 dp = dclru;
854 854 dclru_sub(dp);
855 855 dcdelete(dp);
856 856 dcnode_recycle(dp);
857 857 kmem_cache_free(dcnode_cache, dp);
858 858 }
859 859 }
860 860
861 861 static void
862 862 dcnode_recycle(struct dcnode *dp)
863 863 {
864 864 struct vnode *vp;
865 865
866 866 vp = DCTOV(dp);
867 867
868 868 VN_RELE(dp->dc_subvp);
869 869 dp->dc_subvp = NULL;
870 870 (void) pvn_vplist_dirty(vp, 0, dc_putapage, B_INVAL, NULL);
871 871 kmem_free(dp->dc_hdr, dp->dc_hdrsize);
872 872 dp->dc_hdr = NULL;
873 873 dp->dc_hdrsize = dp->dc_zmax = 0;
874 874 dp->dc_bufcache = NULL;
875 875 dp->dc_mapcnt = 0;
↓ open down ↓ |
624 lines elided |
↑ open up ↑ |
876 876 vn_reinit(vp);
877 877 vp->v_type = VREG;
878 878 vp->v_flag = VNOSWAP;
879 879 vp->v_vfsp = &dc_vfs;
880 880 }
881 881
882 882 static int
883 883 dcinit(int fstype, char *name)
884 884 {
885 885 static const fs_operation_def_t dc_vfsops_template[] = {
886 - NULL, NULL
886 + { NULL, { NULL } }
887 887 };
888 888 int error;
889 889 major_t dev;
890 890
891 891 error = vfs_setfsops(fstype, dc_vfsops_template, &dc_vfsops);
892 892 if (error) {
893 893 cmn_err(CE_WARN, "dcinit: bad vfs ops template");
894 894 return (error);
895 895 }
896 896 VFS_INIT(&dc_vfs, dc_vfsops, NULL);
897 897 dc_vfs.vfs_flag = VFS_RDONLY;
898 898 dc_vfs.vfs_fstype = fstype;
899 899 if ((dev = getudev()) == (major_t)-1)
900 900 dev = 0;
901 901 dcdev = makedevice(dev, 0);
902 902 dc_vfs.vfs_dev = dcdev;
903 903
904 904 error = vn_make_ops(name, dc_vnodeops_template, &dc_vnodeops);
905 905 if (error != 0) {
906 906 (void) vfs_freevfsops_by_type(fstype);
907 907 cmn_err(CE_WARN, "dcinit: bad vnode ops template");
908 908 return (error);
909 909 }
910 910
911 911 mutex_init(&dctable_lock, NULL, MUTEX_DEFAULT, NULL);
912 912 mutex_init(&dccache_lock, NULL, MUTEX_DEFAULT, NULL);
913 913 dcnode_cache = kmem_cache_create("dcnode_cache", sizeof (struct dcnode),
914 914 0, dcnode_constructor, dcnode_destructor, NULL, NULL, NULL, 0);
915 915
916 916 return (0);
917 917 }
918 918
919 919 /*
920 920 * Return shadow vnode with the given vp as its subordinate
921 921 */
922 922 struct vnode *
923 923 decompvp(struct vnode *vp, cred_t *cred, caller_context_t *ctp)
924 924 {
925 925 struct dcnode *dp, *ndp;
926 926 struct comphdr thdr, *hdr;
927 927 struct kmem_cache **cpp;
928 928 struct vattr vattr;
929 929 size_t hdrsize, bsize;
930 930 int error;
931 931
932 932 /*
933 933 * See if we have an existing shadow
934 934 * If none, we have to manufacture one
935 935 */
936 936 mutex_enter(&dctable_lock);
937 937 dp = dcfind(vp);
938 938 mutex_exit(&dctable_lock);
939 939 if (dp != NULL)
940 940 return (DCTOV(dp));
941 941
942 942 /*
943 943 * Make sure it's a valid compressed file
944 944 */
945 945 hdr = &thdr;
946 946 error = vn_rdwr(UIO_READ, vp, (caddr_t)hdr, sizeof (struct comphdr), 0,
947 947 UIO_SYSSPACE, 0, 0, cred, NULL);
948 948 if (error || hdr->ch_magic != CH_MAGIC_ZLIB ||
949 949 hdr->ch_version != CH_VERSION || hdr->ch_algorithm != CH_ALG_ZLIB ||
950 950 hdr->ch_fsize == 0 || hdr->ch_blksize < PAGESIZE ||
951 951 hdr->ch_blksize > ptob(DCCACHESIZE) || !ISP2(hdr->ch_blksize))
952 952 return (NULL);
953 953
954 954 /* get underlying file size */
955 955 if (VOP_GETATTR(vp, &vattr, 0, cred, ctp) != 0)
956 956 return (NULL);
957 957
958 958 /*
959 959 * Re-read entire header
960 960 */
961 961 hdrsize = hdr->ch_blkmap[0] + sizeof (uint64_t);
962 962 hdr = kmem_alloc(hdrsize, KM_SLEEP);
963 963 error = vn_rdwr(UIO_READ, vp, (caddr_t)hdr, hdrsize, 0, UIO_SYSSPACE,
964 964 0, 0, cred, NULL);
965 965 if (error) {
966 966 kmem_free(hdr, hdrsize);
967 967 return (NULL);
968 968 }
969 969
970 970 /*
971 971 * add extra blkmap entry to make dc_getblock()'s
972 972 * life easier
973 973 */
974 974 bsize = hdr->ch_blksize;
975 975 hdr->ch_blkmap[((hdr->ch_fsize-1) / bsize) + 1] = vattr.va_size;
976 976
977 977 ndp = dcnode_alloc();
978 978 ndp->dc_subvp = vp;
979 979 VN_HOLD(vp);
980 980 ndp->dc_hdr = hdr;
981 981 ndp->dc_hdrsize = hdrsize;
982 982
983 983 /*
984 984 * Allocate kmem cache if none there already
985 985 */
986 986 ndp->dc_zmax = ZMAXBUF(bsize);
987 987 cpp = &dcbuf_cache[btop(bsize)];
988 988 mutex_enter(&dccache_lock);
989 989 if (*cpp == NULL)
990 990 *cpp = kmem_cache_create("dcbuf_cache", ndp->dc_zmax, 0, NULL,
991 991 NULL, NULL, NULL, NULL, 0);
992 992 mutex_exit(&dccache_lock);
993 993 ndp->dc_bufcache = *cpp;
994 994
995 995 /*
996 996 * Recheck table in case someone else created shadow
997 997 * while we were blocked above.
998 998 */
999 999 mutex_enter(&dctable_lock);
1000 1000 dp = dcfind(vp);
1001 1001 if (dp != NULL) {
1002 1002 mutex_exit(&dctable_lock);
1003 1003 dcnode_recycle(ndp);
1004 1004 kmem_cache_free(dcnode_cache, ndp);
1005 1005 return (DCTOV(dp));
1006 1006 }
1007 1007 dcinsert(ndp);
1008 1008 mutex_exit(&dctable_lock);
1009 1009
1010 1010 return (DCTOV(ndp));
1011 1011 }
1012 1012
1013 1013
1014 1014 /*
1015 1015 * dcnode lookup table
1016 1016 * These routines maintain a table of dcnodes hashed by their
1017 1017 * subordinate vnode so that they can be found if they already
1018 1018 * exist in the vnode cache
1019 1019 */
1020 1020
1021 1021 /*
1022 1022 * Put a dcnode in the table.
1023 1023 */
1024 1024 static void
1025 1025 dcinsert(struct dcnode *newdp)
1026 1026 {
1027 1027 int idx = DCHASH(newdp->dc_subvp);
1028 1028
1029 1029 ASSERT(MUTEX_HELD(&dctable_lock));
1030 1030 newdp->dc_hash = dctable[idx];
1031 1031 dctable[idx] = newdp;
1032 1032 }
1033 1033
1034 1034 /*
1035 1035 * Remove a dcnode from the hash table.
1036 1036 */
1037 1037 void
1038 1038 dcdelete(struct dcnode *deldp)
1039 1039 {
1040 1040 int idx = DCHASH(deldp->dc_subvp);
1041 1041 struct dcnode *dp, *prevdp;
1042 1042
1043 1043 ASSERT(MUTEX_HELD(&dctable_lock));
1044 1044 dp = dctable[idx];
1045 1045 if (dp == deldp)
1046 1046 dctable[idx] = dp->dc_hash;
1047 1047 else {
1048 1048 for (prevdp = dp, dp = dp->dc_hash; dp != NULL;
1049 1049 prevdp = dp, dp = dp->dc_hash) {
1050 1050 if (dp == deldp) {
1051 1051 prevdp->dc_hash = dp->dc_hash;
1052 1052 break;
1053 1053 }
1054 1054 }
1055 1055 }
1056 1056 ASSERT(dp != NULL);
1057 1057 }
1058 1058
1059 1059 /*
1060 1060 * Find a shadow vnode in the dctable hash list.
1061 1061 */
1062 1062 static struct dcnode *
1063 1063 dcfind(struct vnode *vp)
1064 1064 {
1065 1065 struct dcnode *dp;
1066 1066
1067 1067 ASSERT(MUTEX_HELD(&dctable_lock));
1068 1068 for (dp = dctable[DCHASH(vp)]; dp != NULL; dp = dp->dc_hash)
1069 1069 if (dp->dc_subvp == vp) {
1070 1070 VN_HOLD(DCTOV(dp));
1071 1071 if (dp->dc_lrunext)
1072 1072 dclru_sub(dp);
1073 1073 return (dp);
1074 1074 }
1075 1075 return (NULL);
1076 1076 }
1077 1077
1078 1078 #ifdef DEBUG
1079 1079 static int
1080 1080 dclru_count(void)
1081 1081 {
1082 1082 struct dcnode *dp;
1083 1083 int i = 0;
1084 1084
1085 1085 if (dclru == NULL)
1086 1086 return (0);
1087 1087 for (dp = dclru; dp->dc_lrunext != dclru; dp = dp->dc_lrunext)
1088 1088 i++;
1089 1089 return (i + 1);
1090 1090 }
1091 1091 #endif
1092 1092
1093 1093 static void
1094 1094 dclru_add(struct dcnode *dp)
1095 1095 {
1096 1096 /*
1097 1097 * Add to dclru as double-link chain
1098 1098 */
1099 1099 ASSERT(MUTEX_HELD(&dctable_lock));
1100 1100 if (dclru == NULL) {
1101 1101 dclru = dp;
1102 1102 dp->dc_lruprev = dp->dc_lrunext = dp;
1103 1103 } else {
1104 1104 struct dcnode *last = dclru->dc_lruprev;
1105 1105
1106 1106 dclru->dc_lruprev = dp;
1107 1107 last->dc_lrunext = dp;
1108 1108 dp->dc_lruprev = last;
1109 1109 dp->dc_lrunext = dclru;
1110 1110 }
1111 1111 dclru_len++;
1112 1112 ASSERT(dclru_len == dclru_count());
1113 1113 }
1114 1114
1115 1115 static void
1116 1116 dclru_sub(struct dcnode *dp)
1117 1117 {
1118 1118 ASSERT(MUTEX_HELD(&dctable_lock));
1119 1119 dp->dc_lrunext->dc_lruprev = dp->dc_lruprev;
1120 1120 dp->dc_lruprev->dc_lrunext = dp->dc_lrunext;
1121 1121 if (dp == dclru)
1122 1122 dclru = dp->dc_lrunext == dp ? NULL : dp->dc_lrunext;
1123 1123 dp->dc_lrunext = dp->dc_lruprev = NULL;
1124 1124 dclru_len--;
1125 1125 ASSERT(dclru_len == dclru_count());
1126 1126 }
↓ open down ↓ |
230 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX