Print this page
*** NO COMMENTS ***
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/uts/common/io/lofi.c
+++ new/usr/src/uts/common/io/lofi.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21 /*
22 22 * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved.
23 23 */
24 24
25 25 /*
26 26 * lofi (loopback file) driver - allows you to attach a file to a device,
27 27 * which can then be accessed through that device. The simple model is that
28 28 * you tell lofi to open a file, and then use the block device you get as
29 29 * you would any block device. lofi translates access to the block device
30 30 * into I/O on the underlying file. This is mostly useful for
31 31 * mounting images of filesystems.
32 32 *
33 33 * lofi is controlled through /dev/lofictl - this is the only device exported
34 34 * during attach, and is minor number 0. lofiadm communicates with lofi through
35 35 * ioctls on this device. When a file is attached to lofi, block and character
36 36 * devices are exported in /dev/lofi and /dev/rlofi. Currently, these devices
37 37 * are identified by their minor number, and the minor number is also used
38 38 * as the name in /dev/lofi. If we ever decide to support virtual disks,
39 39 * we'll have to divide the minor number space to identify fdisk partitions
40 40 * and slices, and the name will then be the minor number shifted down a
41 41 * few bits. Minor devices are tracked with state structures handled with
42 42 * ddi_soft_state(9F) for simplicity.
43 43 *
44 44 * A file attached to lofi is opened when attached and not closed until
45 45 * explicitly detached from lofi. This seems more sensible than deferring
46 46 * the open until the /dev/lofi device is opened, for a number of reasons.
47 47 * One is that any failure is likely to be noticed by the person (or script)
48 48 * running lofiadm. Another is that it would be a security problem if the
49 49 * file was replaced by another one after being added but before being opened.
50 50 *
51 51 * The only hard part about lofi is the ioctls. In order to support things
52 52 * like 'newfs' on a lofi device, it needs to support certain disk ioctls.
53 53 * So it has to fake disk geometry and partition information. More may need
54 54 * to be faked if your favorite utility doesn't work and you think it should
55 55 * (fdformat doesn't work because it really wants to know the type of floppy
56 56 * controller to talk to, and that didn't seem easy to fake. Or possibly even
57 57 * necessary, since we have mkfs_pcfs now).
58 58 *
59 59 * Normally, a lofi device cannot be detached if it is open (i.e. busy). To
60 60 * support simulation of hotplug events, an optional force flag is provided.
61 61 * If a lofi device is open when a force detach is requested, then the
62 62 * underlying file is closed and any subsequent operations return EIO. When the
63 63 * device is closed for the last time, it will be cleaned up at that time. In
64 64 * addition, the DKIOCSTATE ioctl will return DKIO_DEV_GONE when the device is
65 65 * detached but not removed.
66 66 *
67 67 * Known problems:
68 68 *
69 69 * UFS logging. Mounting a UFS filesystem image "logging"
70 70 * works for basic copy testing but wedges during a build of ON through
71 71 * that image. Some deadlock in lufs holding the log mutex and then
72 72 * getting stuck on a buf. So for now, don't do that.
73 73 *
74 74 * Direct I/O. Since the filesystem data is being cached in the buffer
75 75 * cache, _and_ again in the underlying filesystem, it's tempting to
76 76 * enable direct I/O on the underlying file. Don't, because that deadlocks.
77 77 * I think to fix the cache-twice problem we might need filesystem support.
78 78 *
79 79 * Interesting things to do:
80 80 *
81 81 * Allow multiple files for each device. A poor-man's metadisk, basically.
82 82 *
83 83 * Pass-through ioctls on block devices. You can (though it's not
84 84 * documented), give lofi a block device as a file name. Then we shouldn't
85 85 * need to fake a geometry, however, it may be relevant if you're replacing
86 86 * metadisk, or using lofi to get crypto.
87 87 * It makes sense to do lofiadm -c aes -a /dev/dsk/c0t0d0s4 /dev/lofi/1
88 88 * and then in /etc/vfstab have an entry for /dev/lofi/1 as /export/home.
89 89 * In fact this even makes sense if you have lofi "above" metadisk.
90 90 *
91 91 * Encryption:
92 92 * Each lofi device can have its own symmetric key and cipher.
93 93 * They are passed to us by lofiadm(1m) in the correct format for use
94 94 * with the misc/kcf crypto_* routines.
95 95 *
96 96 * Each block has its own IV, that is calculated in lofi_blk_mech(), based
97 97 * on the "master" key held in the lsp and the block number of the buffer.
98 98 */
99 99
100 100 #include <sys/types.h>
101 101 #include <netinet/in.h>
102 102 #include <sys/sysmacros.h>
103 103 #include <sys/uio.h>
104 104 #include <sys/kmem.h>
105 105 #include <sys/cred.h>
106 106 #include <sys/mman.h>
107 107 #include <sys/errno.h>
108 108 #include <sys/aio_req.h>
109 109 #include <sys/stat.h>
110 110 #include <sys/file.h>
111 111 #include <sys/modctl.h>
112 112 #include <sys/conf.h>
113 113 #include <sys/debug.h>
114 114 #include <sys/vnode.h>
115 115 #include <sys/lofi.h>
116 116 #include <sys/fcntl.h>
117 117 #include <sys/pathname.h>
118 118 #include <sys/filio.h>
119 119 #include <sys/fdio.h>
120 120 #include <sys/open.h>
121 121 #include <sys/disp.h>
122 122 #include <vm/seg_map.h>
123 123 #include <sys/ddi.h>
124 124 #include <sys/sunddi.h>
125 125 #include <sys/zmod.h>
126 126 #include <sys/id_space.h>
127 127 #include <sys/mkdev.h>
128 128 #include <sys/crypto/common.h>
129 129 #include <sys/crypto/api.h>
130 130 #include <sys/rctl.h>
131 131 #include <LzmaDec.h>
132 132
133 133 /*
134 134 * The basis for CRYOFF is derived from usr/src/uts/common/sys/fs/ufs_fs.h.
135 135 * Crypto metadata, if it exists, is located at the end of the boot block
136 136 * (BBOFF + BBSIZE, which is SBOFF). The super block and everything after
137 137 * is offset by the size of the crypto metadata which is handled by
138 138 * lsp->ls_crypto_offset.
139 139 */
140 140 #define CRYOFF ((off_t)8192)
141 141
142 142 #define NBLOCKS_PROP_NAME "Nblocks"
143 143 #define SIZE_PROP_NAME "Size"
144 144 #define ZONE_PROP_NAME "zone"
145 145
146 146 #define SETUP_C_DATA(cd, buf, len) \
147 147 (cd).cd_format = CRYPTO_DATA_RAW; \
148 148 (cd).cd_offset = 0; \
149 149 (cd).cd_miscdata = NULL; \
150 150 (cd).cd_length = (len); \
151 151 (cd).cd_raw.iov_base = (buf); \
152 152 (cd).cd_raw.iov_len = (len);
153 153
154 154 #define UIO_CHECK(uio) \
155 155 if (((uio)->uio_loffset % DEV_BSIZE) != 0 || \
156 156 ((uio)->uio_resid % DEV_BSIZE) != 0) { \
157 157 return (EINVAL); \
158 158 }
159 159
160 160 static dev_info_t *lofi_dip = NULL;
161 161 static void *lofi_statep = NULL;
162 162 static kmutex_t lofi_lock; /* state lock */
163 163 static id_space_t *lofi_minor_id;
164 164 static list_t lofi_list;
165 165 static zone_key_t lofi_zone_key;
166 166
167 167 /*
168 168 * Because lofi_taskq_nthreads limits the actual swamping of the device, the
169 169 * maxalloc parameter (lofi_taskq_maxalloc) should be tuned conservatively
170 170 * high. If we want to be assured that the underlying device is always busy,
171 171 * we must be sure that the number of bytes enqueued when the number of
172 172 * enqueued tasks exceeds maxalloc is sufficient to keep the device busy for
173 173 * the duration of the sleep time in taskq_ent_alloc(). That is, lofi should
174 174 * set maxalloc to be the maximum throughput (in bytes per second) of the
175 175 * underlying device divided by the minimum I/O size. We assume a realistic
176 176 * maximum throughput of one hundred megabytes per second; we set maxalloc on
177 177 * the lofi task queue to be 104857600 divided by DEV_BSIZE.
178 178 */
179 179 static int lofi_taskq_maxalloc = 104857600 / DEV_BSIZE;
180 180 static int lofi_taskq_nthreads = 4; /* # of taskq threads per device */
181 181
182 182 const char lofi_crypto_magic[6] = LOFI_CRYPTO_MAGIC;
183 183
184 184 /*
185 185 * To avoid decompressing data in a compressed segment multiple times
186 186 * when accessing small parts of a segment's data, we cache and reuse
187 187 * the uncompressed segment's data.
188 188 *
189 189 * A single cached segment is sufficient to avoid lots of duplicate
190 190 * segment decompress operations. A small cache size also reduces the
191 191 * memory footprint.
192 192 *
193 193 * lofi_max_comp_cache is the maximum number of decompressed data segments
194 194 * cached for each compressed lofi image. It can be set to 0 to disable
195 195 * caching.
196 196 */
197 197
198 198 uint32_t lofi_max_comp_cache = 1;
199 199
200 200 static int gzip_decompress(void *src, size_t srclen, void *dst,
201 201 size_t *destlen, int level);
202 202
203 203 static int lzma_decompress(void *src, size_t srclen, void *dst,
204 204 size_t *dstlen, int level);
205 205
206 206 lofi_compress_info_t lofi_compress_table[LOFI_COMPRESS_FUNCTIONS] = {
207 207 {gzip_decompress, NULL, 6, "gzip"}, /* default */
208 208 {gzip_decompress, NULL, 6, "gzip-6"},
209 209 {gzip_decompress, NULL, 9, "gzip-9"},
210 210 {lzma_decompress, NULL, 0, "lzma"}
211 211 };
212 212
213 213 /*ARGSUSED*/
214 214 static void
215 215 *SzAlloc(void *p, size_t size)
216 216 {
217 217 return (kmem_alloc(size, KM_SLEEP));
218 218 }
219 219
220 220 /*ARGSUSED*/
221 221 static void
222 222 SzFree(void *p, void *address, size_t size)
223 223 {
224 224 kmem_free(address, size);
225 225 }
226 226
227 227 static ISzAlloc g_Alloc = { SzAlloc, SzFree };
228 228
229 229 /*
230 230 * Free data referenced by the linked list of cached uncompressed
231 231 * segments.
232 232 */
233 233 static void
234 234 lofi_free_comp_cache(struct lofi_state *lsp)
235 235 {
236 236 struct lofi_comp_cache *lc;
237 237
238 238 while ((lc = list_remove_head(&lsp->ls_comp_cache)) != NULL) {
239 239 kmem_free(lc->lc_data, lsp->ls_uncomp_seg_sz);
240 240 kmem_free(lc, sizeof (struct lofi_comp_cache));
241 241 lsp->ls_comp_cache_count--;
242 242 }
243 243 ASSERT(lsp->ls_comp_cache_count == 0);
244 244 }
245 245
246 246 static int
247 247 is_opened(struct lofi_state *lsp)
248 248 {
249 249 ASSERT(MUTEX_HELD(&lofi_lock));
250 250 return (lsp->ls_chr_open || lsp->ls_blk_open || lsp->ls_lyr_open_count);
251 251 }
252 252
253 253 static int
254 254 mark_opened(struct lofi_state *lsp, int otyp)
255 255 {
256 256 ASSERT(MUTEX_HELD(&lofi_lock));
257 257 switch (otyp) {
258 258 case OTYP_CHR:
259 259 lsp->ls_chr_open = 1;
260 260 break;
261 261 case OTYP_BLK:
262 262 lsp->ls_blk_open = 1;
263 263 break;
264 264 case OTYP_LYR:
265 265 lsp->ls_lyr_open_count++;
266 266 break;
267 267 default:
268 268 return (-1);
269 269 }
270 270 return (0);
271 271 }
272 272
273 273 static void
274 274 mark_closed(struct lofi_state *lsp, int otyp)
275 275 {
276 276 ASSERT(MUTEX_HELD(&lofi_lock));
277 277 switch (otyp) {
278 278 case OTYP_CHR:
279 279 lsp->ls_chr_open = 0;
280 280 break;
281 281 case OTYP_BLK:
282 282 lsp->ls_blk_open = 0;
283 283 break;
284 284 case OTYP_LYR:
285 285 lsp->ls_lyr_open_count--;
286 286 break;
287 287 default:
288 288 break;
289 289 }
290 290 }
291 291
292 292 static void
293 293 lofi_free_crypto(struct lofi_state *lsp)
294 294 {
295 295 ASSERT(MUTEX_HELD(&lofi_lock));
296 296
297 297 if (lsp->ls_crypto_enabled) {
298 298 /*
299 299 * Clean up the crypto state so that it doesn't hang around
300 300 * in memory after we are done with it.
301 301 */
302 302 if (lsp->ls_key.ck_data != NULL) {
303 303 bzero(lsp->ls_key.ck_data,
304 304 CRYPTO_BITS2BYTES(lsp->ls_key.ck_length));
305 305 kmem_free(lsp->ls_key.ck_data,
306 306 CRYPTO_BITS2BYTES(lsp->ls_key.ck_length));
307 307 lsp->ls_key.ck_data = NULL;
308 308 lsp->ls_key.ck_length = 0;
309 309 }
310 310
311 311 if (lsp->ls_mech.cm_param != NULL) {
312 312 kmem_free(lsp->ls_mech.cm_param,
313 313 lsp->ls_mech.cm_param_len);
314 314 lsp->ls_mech.cm_param = NULL;
315 315 lsp->ls_mech.cm_param_len = 0;
316 316 }
317 317
318 318 if (lsp->ls_iv_mech.cm_param != NULL) {
319 319 kmem_free(lsp->ls_iv_mech.cm_param,
320 320 lsp->ls_iv_mech.cm_param_len);
321 321 lsp->ls_iv_mech.cm_param = NULL;
322 322 lsp->ls_iv_mech.cm_param_len = 0;
323 323 }
324 324
325 325 mutex_destroy(&lsp->ls_crypto_lock);
326 326 }
327 327 }
328 328
329 329 static void
330 330 lofi_destroy(struct lofi_state *lsp, cred_t *credp)
331 331 {
332 332 minor_t minor = getminor(lsp->ls_dev);
333 333 int i;
334 334
335 335 ASSERT(MUTEX_HELD(&lofi_lock));
336 336
337 337 list_remove(&lofi_list, lsp);
338 338
339 339 lofi_free_crypto(lsp);
340 340
341 341 /*
342 342 * Free pre-allocated compressed buffers
343 343 */
344 344 if (lsp->ls_comp_bufs != NULL) {
345 345 for (i = 0; i < lofi_taskq_nthreads; i++) {
346 346 if (lsp->ls_comp_bufs[i].bufsize > 0)
347 347 kmem_free(lsp->ls_comp_bufs[i].buf,
348 348 lsp->ls_comp_bufs[i].bufsize);
349 349 }
350 350 kmem_free(lsp->ls_comp_bufs,
351 351 sizeof (struct compbuf) * lofi_taskq_nthreads);
352 352 }
353 353
354 354 (void) VOP_CLOSE(lsp->ls_vp, lsp->ls_openflag,
355 355 1, 0, credp, NULL);
356 356 VN_RELE(lsp->ls_vp);
357 357 if (lsp->ls_stacked_vp != lsp->ls_vp)
358 358 VN_RELE(lsp->ls_stacked_vp);
359 359
360 360 taskq_destroy(lsp->ls_taskq);
361 361
362 362 if (lsp->ls_kstat != NULL)
363 363 kstat_delete(lsp->ls_kstat);
364 364
365 365 /*
366 366 * Free cached decompressed segment data
367 367 */
368 368 lofi_free_comp_cache(lsp);
369 369 list_destroy(&lsp->ls_comp_cache);
370 370
371 371 if (lsp->ls_uncomp_seg_sz > 0) {
372 372 kmem_free(lsp->ls_comp_index_data, lsp->ls_comp_index_data_sz);
373 373 lsp->ls_uncomp_seg_sz = 0;
374 374 }
375 375
376 376 rctl_decr_lofi(lsp->ls_zone.zref_zone, 1);
377 377 zone_rele_ref(&lsp->ls_zone, ZONE_REF_LOFI);
378 378
379 379 mutex_destroy(&lsp->ls_comp_cache_lock);
380 380 mutex_destroy(&lsp->ls_comp_bufs_lock);
381 381 mutex_destroy(&lsp->ls_kstat_lock);
382 382 mutex_destroy(&lsp->ls_vp_lock);
383 383
384 384 ASSERT(ddi_get_soft_state(lofi_statep, minor) == lsp);
385 385 ddi_soft_state_free(lofi_statep, minor);
386 386 id_free(lofi_minor_id, minor);
387 387 }
388 388
389 389 static void
390 390 lofi_free_dev(dev_t dev)
391 391 {
392 392 minor_t minor = getminor(dev);
393 393 char namebuf[50];
394 394
395 395 ASSERT(MUTEX_HELD(&lofi_lock));
396 396
397 397 (void) ddi_prop_remove(dev, lofi_dip, ZONE_PROP_NAME);
398 398 (void) ddi_prop_remove(dev, lofi_dip, SIZE_PROP_NAME);
399 399 (void) ddi_prop_remove(dev, lofi_dip, NBLOCKS_PROP_NAME);
400 400
401 401 (void) snprintf(namebuf, sizeof (namebuf), "%d", minor);
402 402 ddi_remove_minor_node(lofi_dip, namebuf);
403 403 (void) snprintf(namebuf, sizeof (namebuf), "%d,raw", minor);
404 404 ddi_remove_minor_node(lofi_dip, namebuf);
405 405 }
406 406
407 407 /*ARGSUSED*/
408 408 static void
409 409 lofi_zone_shutdown(zoneid_t zoneid, void *arg)
410 410 {
411 411 struct lofi_state *lsp;
412 412 struct lofi_state *next;
413 413
414 414 mutex_enter(&lofi_lock);
415 415
416 416 for (lsp = list_head(&lofi_list); lsp != NULL; lsp = next) {
417 417
418 418 /* lofi_destroy() frees lsp */
419 419 next = list_next(&lofi_list, lsp);
420 420
421 421 if (lsp->ls_zone.zref_zone->zone_id != zoneid)
422 422 continue;
423 423
424 424 /*
425 425 * No in-zone processes are running, but something has this
426 426 * open. It's either a global zone process, or a lofi
427 427 * mount. In either case we set ls_cleanup so the last
428 428 * user destroys the device.
429 429 */
430 430 if (is_opened(lsp)) {
431 431 lsp->ls_cleanup = 1;
432 432 } else {
433 433 lofi_free_dev(lsp->ls_dev);
434 434 lofi_destroy(lsp, kcred);
435 435 }
436 436 }
437 437
438 438 mutex_exit(&lofi_lock);
439 439 }
440 440
441 441 /*ARGSUSED*/
442 442 static int
443 443 lofi_open(dev_t *devp, int flag, int otyp, struct cred *credp)
444 444 {
445 445 minor_t minor;
446 446 struct lofi_state *lsp;
447 447
448 448 /*
449 449 * lofiadm -a /dev/lofi/1 gets us here.
450 450 */
451 451 if (mutex_owner(&lofi_lock) == curthread)
452 452 return (EINVAL);
453 453
454 454 mutex_enter(&lofi_lock);
455 455
456 456 minor = getminor(*devp);
457 457
458 458 /* master control device */
459 459 if (minor == 0) {
460 460 mutex_exit(&lofi_lock);
461 461 return (0);
462 462 }
463 463
464 464 /* otherwise, the mapping should already exist */
465 465 lsp = ddi_get_soft_state(lofi_statep, minor);
466 466 if (lsp == NULL) {
467 467 mutex_exit(&lofi_lock);
468 468 return (EINVAL);
469 469 }
470 470
↓ open down ↓ |
470 lines elided |
↑ open up ↑ |
471 471 if (lsp->ls_vp == NULL) {
472 472 mutex_exit(&lofi_lock);
473 473 return (ENXIO);
474 474 }
475 475
476 476 if (mark_opened(lsp, otyp) == -1) {
477 477 mutex_exit(&lofi_lock);
478 478 return (EINVAL);
479 479 }
480 480
481 + if (lsp->ls_readonly && (flag & FWRITE)) {
482 + mutex_exit(&lofi_lock);
483 + return (EROFS);
484 + }
485 +
481 486 mutex_exit(&lofi_lock);
482 487 return (0);
483 488 }
484 489
485 490 /*ARGSUSED*/
486 491 static int
487 492 lofi_close(dev_t dev, int flag, int otyp, struct cred *credp)
488 493 {
489 494 minor_t minor;
490 495 struct lofi_state *lsp;
491 496
492 497 mutex_enter(&lofi_lock);
493 498 minor = getminor(dev);
494 499 lsp = ddi_get_soft_state(lofi_statep, minor);
495 500 if (lsp == NULL) {
496 501 mutex_exit(&lofi_lock);
497 502 return (EINVAL);
498 503 }
499 504
500 505 if (minor == 0) {
501 506 mutex_exit(&lofi_lock);
502 507 return (0);
503 508 }
504 509
505 510 mark_closed(lsp, otyp);
506 511
507 512 /*
508 513 * If we forcibly closed the underlying device (li_force), or
509 514 * asked for cleanup (li_cleanup), finish up if we're the last
510 515 * out of the door.
511 516 */
512 517 if (!is_opened(lsp) && (lsp->ls_cleanup || lsp->ls_vp == NULL)) {
513 518 lofi_free_dev(lsp->ls_dev);
514 519 lofi_destroy(lsp, credp);
515 520 }
516 521
517 522 mutex_exit(&lofi_lock);
518 523 return (0);
519 524 }
520 525
521 526 /*
522 527 * Sets the mechanism's initialization vector (IV) if one is needed.
523 528 * The IV is computed from the data block number. lsp->ls_mech is
524 529 * altered so that:
525 530 * lsp->ls_mech.cm_param_len is set to the IV len.
526 531 * lsp->ls_mech.cm_param is set to the IV.
527 532 */
528 533 static int
529 534 lofi_blk_mech(struct lofi_state *lsp, longlong_t lblkno)
530 535 {
531 536 int ret;
532 537 crypto_data_t cdata;
533 538 char *iv;
534 539 size_t iv_len;
535 540 size_t min;
536 541 void *data;
537 542 size_t datasz;
538 543
539 544 ASSERT(MUTEX_HELD(&lsp->ls_crypto_lock));
540 545
541 546 if (lsp == NULL)
542 547 return (CRYPTO_DEVICE_ERROR);
543 548
544 549 /* lsp->ls_mech.cm_param{_len} has already been set for static iv */
545 550 if (lsp->ls_iv_type == IVM_NONE) {
546 551 return (CRYPTO_SUCCESS);
547 552 }
548 553
549 554 /*
550 555 * if kmem already alloced from previous call and it's the same size
551 556 * we need now, just recycle it; allocate new kmem only if we have to
552 557 */
553 558 if (lsp->ls_mech.cm_param == NULL ||
554 559 lsp->ls_mech.cm_param_len != lsp->ls_iv_len) {
555 560 iv_len = lsp->ls_iv_len;
556 561 iv = kmem_zalloc(iv_len, KM_SLEEP);
557 562 } else {
558 563 iv_len = lsp->ls_mech.cm_param_len;
559 564 iv = lsp->ls_mech.cm_param;
560 565 bzero(iv, iv_len);
561 566 }
562 567
563 568 switch (lsp->ls_iv_type) {
564 569 case IVM_ENC_BLKNO:
565 570 /* iv is not static, lblkno changes each time */
566 571 data = &lblkno;
567 572 datasz = sizeof (lblkno);
568 573 break;
569 574 default:
570 575 data = 0;
571 576 datasz = 0;
572 577 break;
573 578 }
574 579
575 580 /*
576 581 * write blkno into the iv buffer padded on the left in case
577 582 * blkno ever grows bigger than its current longlong_t size
578 583 * or a variation other than blkno is used for the iv data
579 584 */
580 585 min = MIN(datasz, iv_len);
581 586 bcopy(data, iv + (iv_len - min), min);
582 587
583 588 /* encrypt the data in-place to get the IV */
584 589 SETUP_C_DATA(cdata, iv, iv_len);
585 590
586 591 ret = crypto_encrypt(&lsp->ls_iv_mech, &cdata, &lsp->ls_key,
587 592 NULL, NULL, NULL);
588 593 if (ret != CRYPTO_SUCCESS) {
589 594 cmn_err(CE_WARN, "failed to create iv for block %lld: (0x%x)",
590 595 lblkno, ret);
591 596 if (lsp->ls_mech.cm_param != iv)
592 597 kmem_free(iv, iv_len);
593 598
594 599 return (ret);
595 600 }
596 601
597 602 /* clean up the iv from the last computation */
598 603 if (lsp->ls_mech.cm_param != NULL && lsp->ls_mech.cm_param != iv)
599 604 kmem_free(lsp->ls_mech.cm_param, lsp->ls_mech.cm_param_len);
600 605
601 606 lsp->ls_mech.cm_param_len = iv_len;
602 607 lsp->ls_mech.cm_param = iv;
603 608
604 609 return (CRYPTO_SUCCESS);
605 610 }
606 611
607 612 /*
608 613 * Performs encryption and decryption of a chunk of data of size "len",
609 614 * one DEV_BSIZE block at a time. "len" is assumed to be a multiple of
610 615 * DEV_BSIZE.
611 616 */
612 617 static int
613 618 lofi_crypto(struct lofi_state *lsp, struct buf *bp, caddr_t plaintext,
614 619 caddr_t ciphertext, size_t len, boolean_t op_encrypt)
615 620 {
616 621 crypto_data_t cdata;
617 622 crypto_data_t wdata;
618 623 int ret;
619 624 longlong_t lblkno = bp->b_lblkno;
620 625
621 626 mutex_enter(&lsp->ls_crypto_lock);
622 627
623 628 /*
624 629 * though we could encrypt/decrypt entire "len" chunk of data, we need
625 630 * to break it into DEV_BSIZE pieces to capture blkno incrementing
626 631 */
627 632 SETUP_C_DATA(cdata, plaintext, len);
628 633 cdata.cd_length = DEV_BSIZE;
629 634 if (ciphertext != NULL) { /* not in-place crypto */
630 635 SETUP_C_DATA(wdata, ciphertext, len);
631 636 wdata.cd_length = DEV_BSIZE;
632 637 }
633 638
634 639 do {
635 640 ret = lofi_blk_mech(lsp, lblkno);
636 641 if (ret != CRYPTO_SUCCESS)
637 642 continue;
638 643
639 644 if (op_encrypt) {
640 645 ret = crypto_encrypt(&lsp->ls_mech, &cdata,
641 646 &lsp->ls_key, NULL,
642 647 ((ciphertext != NULL) ? &wdata : NULL), NULL);
643 648 } else {
644 649 ret = crypto_decrypt(&lsp->ls_mech, &cdata,
645 650 &lsp->ls_key, NULL,
646 651 ((ciphertext != NULL) ? &wdata : NULL), NULL);
647 652 }
648 653
649 654 cdata.cd_offset += DEV_BSIZE;
650 655 if (ciphertext != NULL)
651 656 wdata.cd_offset += DEV_BSIZE;
652 657 lblkno++;
653 658 } while (ret == CRYPTO_SUCCESS && cdata.cd_offset < len);
654 659
655 660 mutex_exit(&lsp->ls_crypto_lock);
656 661
657 662 if (ret != CRYPTO_SUCCESS) {
658 663 cmn_err(CE_WARN, "%s failed for block %lld: (0x%x)",
659 664 op_encrypt ? "crypto_encrypt()" : "crypto_decrypt()",
660 665 lblkno, ret);
661 666 }
662 667
663 668 return (ret);
664 669 }
665 670
666 671 #define RDWR_RAW 1
667 672 #define RDWR_BCOPY 2
668 673
669 674 static int
670 675 lofi_rdwr(caddr_t bufaddr, offset_t offset, struct buf *bp,
671 676 struct lofi_state *lsp, size_t len, int method, caddr_t bcopy_locn)
672 677 {
673 678 ssize_t resid;
674 679 int isread;
675 680 int error;
676 681
677 682 /*
678 683 * Handles reads/writes for both plain and encrypted lofi
679 684 * Note: offset is already shifted by lsp->ls_crypto_offset
680 685 * when it gets here.
681 686 */
682 687
683 688 isread = bp->b_flags & B_READ;
684 689 if (isread) {
685 690 if (method == RDWR_BCOPY) {
686 691 /* DO NOT update bp->b_resid for bcopy */
687 692 bcopy(bcopy_locn, bufaddr, len);
688 693 error = 0;
689 694 } else { /* RDWR_RAW */
690 695 error = vn_rdwr(UIO_READ, lsp->ls_vp, bufaddr, len,
691 696 offset, UIO_SYSSPACE, 0, RLIM64_INFINITY, kcred,
692 697 &resid);
693 698 bp->b_resid = resid;
694 699 }
695 700 if (lsp->ls_crypto_enabled && error == 0) {
696 701 if (lofi_crypto(lsp, bp, bufaddr, NULL, len,
697 702 B_FALSE) != CRYPTO_SUCCESS) {
698 703 /*
699 704 * XXX: original code didn't set residual
700 705 * back to len because no error was expected
701 706 * from bcopy() if encryption is not enabled
702 707 */
703 708 if (method != RDWR_BCOPY)
704 709 bp->b_resid = len;
705 710 error = EIO;
706 711 }
707 712 }
708 713 return (error);
709 714 } else {
710 715 void *iobuf = bufaddr;
711 716
712 717 if (lsp->ls_crypto_enabled) {
713 718 /* don't do in-place crypto to keep bufaddr intact */
714 719 iobuf = kmem_alloc(len, KM_SLEEP);
715 720 if (lofi_crypto(lsp, bp, bufaddr, iobuf, len,
716 721 B_TRUE) != CRYPTO_SUCCESS) {
717 722 kmem_free(iobuf, len);
718 723 if (method != RDWR_BCOPY)
719 724 bp->b_resid = len;
720 725 return (EIO);
721 726 }
722 727 }
723 728 if (method == RDWR_BCOPY) {
724 729 /* DO NOT update bp->b_resid for bcopy */
725 730 bcopy(iobuf, bcopy_locn, len);
726 731 error = 0;
727 732 } else { /* RDWR_RAW */
728 733 error = vn_rdwr(UIO_WRITE, lsp->ls_vp, iobuf, len,
729 734 offset, UIO_SYSSPACE, 0, RLIM64_INFINITY, kcred,
730 735 &resid);
731 736 bp->b_resid = resid;
732 737 }
733 738 if (lsp->ls_crypto_enabled) {
734 739 kmem_free(iobuf, len);
735 740 }
736 741 return (error);
737 742 }
738 743 }
739 744
740 745 static int
741 746 lofi_mapped_rdwr(caddr_t bufaddr, offset_t offset, struct buf *bp,
742 747 struct lofi_state *lsp)
743 748 {
744 749 int error;
745 750 offset_t alignedoffset, mapoffset;
746 751 size_t xfersize;
747 752 int isread;
748 753 int smflags;
749 754 caddr_t mapaddr;
750 755 size_t len;
751 756 enum seg_rw srw;
752 757 int save_error;
753 758
754 759 /*
755 760 * Note: offset is already shifted by lsp->ls_crypto_offset
756 761 * when it gets here.
757 762 */
758 763 if (lsp->ls_crypto_enabled)
759 764 ASSERT(lsp->ls_vp_comp_size == lsp->ls_vp_size);
760 765
761 766 /*
762 767 * segmap always gives us an 8K (MAXBSIZE) chunk, aligned on
763 768 * an 8K boundary, but the buf transfer address may not be
764 769 * aligned on more than a 512-byte boundary (we don't enforce
765 770 * that even though we could). This matters since the initial
766 771 * part of the transfer may not start at offset 0 within the
767 772 * segmap'd chunk. So we have to compensate for that with
768 773 * 'mapoffset'. Subsequent chunks always start off at the
769 774 * beginning, and the last is capped by b_resid
770 775 *
771 776 * Visually, where "|" represents page map boundaries:
772 777 * alignedoffset (mapaddr begins at this segmap boundary)
773 778 * | offset (from beginning of file)
774 779 * | | len
775 780 * v v v
776 781 * ===|====X========|====...======|========X====|====
777 782 * /-------------...---------------/
778 783 * ^ bp->b_bcount/bp->b_resid at start
779 784 * /----/--------/----...------/--------/
780 785 * ^ ^ ^ ^ ^
781 786 * | | | | nth xfersize (<= MAXBSIZE)
782 787 * | | 2nd thru n-1st xfersize (= MAXBSIZE)
783 788 * | 1st xfersize (<= MAXBSIZE)
784 789 * mapoffset (offset into 1st segmap, non-0 1st time, 0 thereafter)
785 790 *
786 791 * Notes: "alignedoffset" is "offset" rounded down to nearest
787 792 * MAXBSIZE boundary. "len" is next page boundary of size
788 793 * PAGESIZE after "alignedoffset".
789 794 */
790 795 mapoffset = offset & MAXBOFFSET;
791 796 alignedoffset = offset - mapoffset;
792 797 bp->b_resid = bp->b_bcount;
793 798 isread = bp->b_flags & B_READ;
794 799 srw = isread ? S_READ : S_WRITE;
795 800 do {
796 801 xfersize = MIN(lsp->ls_vp_comp_size - offset,
797 802 MIN(MAXBSIZE - mapoffset, bp->b_resid));
798 803 len = roundup(mapoffset + xfersize, PAGESIZE);
799 804 mapaddr = segmap_getmapflt(segkmap, lsp->ls_vp,
800 805 alignedoffset, MAXBSIZE, 1, srw);
801 806 /*
802 807 * Now fault in the pages. This lets us check
803 808 * for errors before we reference mapaddr and
804 809 * try to resolve the fault in bcopy (which would
805 810 * panic instead). And this can easily happen,
806 811 * particularly if you've lofi'd a file over NFS
807 812 * and someone deletes the file on the server.
808 813 */
809 814 error = segmap_fault(kas.a_hat, segkmap, mapaddr,
810 815 len, F_SOFTLOCK, srw);
811 816 if (error) {
812 817 (void) segmap_release(segkmap, mapaddr, 0);
813 818 if (FC_CODE(error) == FC_OBJERR)
814 819 error = FC_ERRNO(error);
815 820 else
816 821 error = EIO;
817 822 break;
818 823 }
819 824 /* error may be non-zero for encrypted lofi */
820 825 error = lofi_rdwr(bufaddr, 0, bp, lsp, xfersize,
821 826 RDWR_BCOPY, mapaddr + mapoffset);
822 827 if (error == 0) {
823 828 bp->b_resid -= xfersize;
824 829 bufaddr += xfersize;
825 830 offset += xfersize;
826 831 }
827 832 smflags = 0;
828 833 if (isread) {
829 834 smflags |= SM_FREE;
830 835 /*
831 836 * If we're reading an entire page starting
832 837 * at a page boundary, there's a good chance
833 838 * we won't need it again. Put it on the
834 839 * head of the freelist.
835 840 */
836 841 if (mapoffset == 0 && xfersize == MAXBSIZE)
837 842 smflags |= SM_DONTNEED;
838 843 } else {
839 844 /*
840 845 * Write back good pages, it is okay to
841 846 * always release asynchronous here as we'll
842 847 * follow with VOP_FSYNC for B_SYNC buffers.
843 848 */
844 849 if (error == 0)
845 850 smflags |= SM_WRITE | SM_ASYNC;
846 851 }
847 852 (void) segmap_fault(kas.a_hat, segkmap, mapaddr,
848 853 len, F_SOFTUNLOCK, srw);
849 854 save_error = segmap_release(segkmap, mapaddr, smflags);
850 855 if (error == 0)
851 856 error = save_error;
852 857 /* only the first map may start partial */
853 858 mapoffset = 0;
854 859 alignedoffset += MAXBSIZE;
855 860 } while ((error == 0) && (bp->b_resid > 0) &&
856 861 (offset < lsp->ls_vp_comp_size));
857 862
858 863 return (error);
859 864 }
860 865
861 866 /*
862 867 * Check if segment seg_index is present in the decompressed segment
863 868 * data cache.
864 869 *
865 870 * Returns a pointer to the decompressed segment data cache entry if
866 871 * found, and NULL when decompressed data for this segment is not yet
867 872 * cached.
868 873 */
869 874 static struct lofi_comp_cache *
870 875 lofi_find_comp_data(struct lofi_state *lsp, uint64_t seg_index)
871 876 {
872 877 struct lofi_comp_cache *lc;
873 878
874 879 ASSERT(MUTEX_HELD(&lsp->ls_comp_cache_lock));
875 880
876 881 for (lc = list_head(&lsp->ls_comp_cache); lc != NULL;
877 882 lc = list_next(&lsp->ls_comp_cache, lc)) {
878 883 if (lc->lc_index == seg_index) {
879 884 /*
880 885 * Decompressed segment data was found in the
881 886 * cache.
882 887 *
883 888 * The cache uses an LRU replacement strategy;
884 889 * move the entry to head of list.
885 890 */
886 891 list_remove(&lsp->ls_comp_cache, lc);
887 892 list_insert_head(&lsp->ls_comp_cache, lc);
888 893 return (lc);
889 894 }
890 895 }
891 896 return (NULL);
892 897 }
893 898
894 899 /*
895 900 * Add the data for a decompressed segment at segment index
896 901 * seg_index to the cache of the decompressed segments.
897 902 *
898 903 * Returns a pointer to the cache element structure in case
899 904 * the data was added to the cache; returns NULL when the data
900 905 * wasn't cached.
901 906 */
902 907 static struct lofi_comp_cache *
903 908 lofi_add_comp_data(struct lofi_state *lsp, uint64_t seg_index,
904 909 uchar_t *data)
905 910 {
906 911 struct lofi_comp_cache *lc;
907 912
908 913 ASSERT(MUTEX_HELD(&lsp->ls_comp_cache_lock));
909 914
910 915 while (lsp->ls_comp_cache_count > lofi_max_comp_cache) {
911 916 lc = list_remove_tail(&lsp->ls_comp_cache);
912 917 ASSERT(lc != NULL);
913 918 kmem_free(lc->lc_data, lsp->ls_uncomp_seg_sz);
914 919 kmem_free(lc, sizeof (struct lofi_comp_cache));
915 920 lsp->ls_comp_cache_count--;
916 921 }
917 922
918 923 /*
919 924 * Do not cache when disabled by tunable variable
920 925 */
921 926 if (lofi_max_comp_cache == 0)
922 927 return (NULL);
923 928
924 929 /*
925 930 * When the cache has not yet reached the maximum allowed
926 931 * number of segments, allocate a new cache element.
927 932 * Otherwise the cache is full; reuse the last list element
928 933 * (LRU) for caching the decompressed segment data.
929 934 *
930 935 * The cache element for the new decompressed segment data is
931 936 * added to the head of the list.
932 937 */
933 938 if (lsp->ls_comp_cache_count < lofi_max_comp_cache) {
934 939 lc = kmem_alloc(sizeof (struct lofi_comp_cache), KM_SLEEP);
935 940 lc->lc_data = NULL;
936 941 list_insert_head(&lsp->ls_comp_cache, lc);
937 942 lsp->ls_comp_cache_count++;
938 943 } else {
939 944 lc = list_remove_tail(&lsp->ls_comp_cache);
940 945 if (lc == NULL)
941 946 return (NULL);
942 947 list_insert_head(&lsp->ls_comp_cache, lc);
943 948 }
944 949
945 950 /*
946 951 * Free old uncompressed segment data when reusing a cache
947 952 * entry.
948 953 */
949 954 if (lc->lc_data != NULL)
950 955 kmem_free(lc->lc_data, lsp->ls_uncomp_seg_sz);
951 956
952 957 lc->lc_data = data;
953 958 lc->lc_index = seg_index;
954 959 return (lc);
955 960 }
956 961
957 962
958 963 /*ARGSUSED*/
959 964 static int
960 965 gzip_decompress(void *src, size_t srclen, void *dst,
961 966 size_t *dstlen, int level)
962 967 {
963 968 ASSERT(*dstlen >= srclen);
964 969
965 970 if (z_uncompress(dst, dstlen, src, srclen) != Z_OK)
966 971 return (-1);
967 972 return (0);
968 973 }
969 974
970 975 #define LZMA_HEADER_SIZE (LZMA_PROPS_SIZE + 8)
971 976 /*ARGSUSED*/
972 977 static int
973 978 lzma_decompress(void *src, size_t srclen, void *dst,
974 979 size_t *dstlen, int level)
975 980 {
976 981 size_t insizepure;
977 982 void *actual_src;
978 983 ELzmaStatus status;
979 984
980 985 insizepure = srclen - LZMA_HEADER_SIZE;
981 986 actual_src = (void *)((Byte *)src + LZMA_HEADER_SIZE);
982 987
983 988 if (LzmaDecode((Byte *)dst, (size_t *)dstlen,
984 989 (const Byte *)actual_src, &insizepure,
985 990 (const Byte *)src, LZMA_PROPS_SIZE, LZMA_FINISH_ANY, &status,
986 991 &g_Alloc) != SZ_OK) {
987 992 return (-1);
988 993 }
989 994 return (0);
990 995 }
991 996
992 997 /*
993 998 * This is basically what strategy used to be before we found we
994 999 * needed task queues.
995 1000 */
996 1001 static void
997 1002 lofi_strategy_task(void *arg)
998 1003 {
999 1004 struct buf *bp = (struct buf *)arg;
1000 1005 int error;
1001 1006 int syncflag = 0;
1002 1007 struct lofi_state *lsp;
1003 1008 offset_t offset;
1004 1009 caddr_t bufaddr;
1005 1010 size_t len;
1006 1011 size_t xfersize;
1007 1012 boolean_t bufinited = B_FALSE;
1008 1013
1009 1014 lsp = ddi_get_soft_state(lofi_statep, getminor(bp->b_edev));
1010 1015 if (lsp == NULL) {
1011 1016 error = ENXIO;
1012 1017 goto errout;
1013 1018 }
1014 1019 if (lsp->ls_kstat) {
1015 1020 mutex_enter(lsp->ls_kstat->ks_lock);
1016 1021 kstat_waitq_to_runq(KSTAT_IO_PTR(lsp->ls_kstat));
1017 1022 mutex_exit(lsp->ls_kstat->ks_lock);
1018 1023 }
1019 1024 bp_mapin(bp);
1020 1025 bufaddr = bp->b_un.b_addr;
1021 1026 offset = bp->b_lblkno * DEV_BSIZE; /* offset within file */
1022 1027 if (lsp->ls_crypto_enabled) {
1023 1028 /* encrypted data really begins after crypto header */
1024 1029 offset += lsp->ls_crypto_offset;
1025 1030 }
1026 1031 len = bp->b_bcount;
1027 1032 bufinited = B_TRUE;
1028 1033
1029 1034 if (lsp->ls_vp == NULL || lsp->ls_vp_closereq) {
1030 1035 error = EIO;
1031 1036 goto errout;
1032 1037 }
1033 1038
1034 1039 /*
1035 1040 * If we're writing and the buffer was not B_ASYNC
1036 1041 * we'll follow up with a VOP_FSYNC() to force any
1037 1042 * asynchronous I/O to stable storage.
1038 1043 */
1039 1044 if (!(bp->b_flags & B_READ) && !(bp->b_flags & B_ASYNC))
1040 1045 syncflag = FSYNC;
1041 1046
1042 1047 /*
1043 1048 * We used to always use vn_rdwr here, but we cannot do that because
1044 1049 * we might decide to read or write from the the underlying
1045 1050 * file during this call, which would be a deadlock because
1046 1051 * we have the rw_lock. So instead we page, unless it's not
1047 1052 * mapable or it's a character device or it's an encrypted lofi.
1048 1053 */
1049 1054 if ((lsp->ls_vp->v_flag & VNOMAP) || (lsp->ls_vp->v_type == VCHR) ||
1050 1055 lsp->ls_crypto_enabled) {
1051 1056 error = lofi_rdwr(bufaddr, offset, bp, lsp, len, RDWR_RAW,
1052 1057 NULL);
1053 1058 } else if (lsp->ls_uncomp_seg_sz == 0) {
1054 1059 error = lofi_mapped_rdwr(bufaddr, offset, bp, lsp);
1055 1060 } else {
1056 1061 uchar_t *compressed_seg = NULL, *cmpbuf;
1057 1062 uchar_t *uncompressed_seg = NULL;
1058 1063 lofi_compress_info_t *li;
1059 1064 size_t oblkcount;
1060 1065 ulong_t seglen;
1061 1066 uint64_t sblkno, eblkno, cmpbytes;
1062 1067 uint64_t uncompressed_seg_index;
1063 1068 struct lofi_comp_cache *lc;
1064 1069 offset_t sblkoff, eblkoff;
1065 1070 u_offset_t salign, ealign;
1066 1071 u_offset_t sdiff;
1067 1072 uint32_t comp_data_sz;
1068 1073 uint64_t i;
1069 1074 int j;
1070 1075
1071 1076 /*
1072 1077 * From here on we're dealing primarily with compressed files
1073 1078 */
1074 1079 ASSERT(!lsp->ls_crypto_enabled);
1075 1080
1076 1081 /*
1077 1082 * Compressed files can only be read from and
1078 1083 * not written to
1079 1084 */
1080 1085 if (!(bp->b_flags & B_READ)) {
1081 1086 bp->b_resid = bp->b_bcount;
1082 1087 error = EROFS;
1083 1088 goto done;
1084 1089 }
1085 1090
1086 1091 ASSERT(lsp->ls_comp_algorithm_index >= 0);
1087 1092 li = &lofi_compress_table[lsp->ls_comp_algorithm_index];
1088 1093 /*
1089 1094 * Compute starting and ending compressed segment numbers
1090 1095 * We use only bitwise operations avoiding division and
1091 1096 * modulus because we enforce the compression segment size
1092 1097 * to a power of 2
1093 1098 */
1094 1099 sblkno = offset >> lsp->ls_comp_seg_shift;
1095 1100 sblkoff = offset & (lsp->ls_uncomp_seg_sz - 1);
1096 1101 eblkno = (offset + bp->b_bcount) >> lsp->ls_comp_seg_shift;
1097 1102 eblkoff = (offset + bp->b_bcount) & (lsp->ls_uncomp_seg_sz - 1);
1098 1103
1099 1104 /*
1100 1105 * Check the decompressed segment cache.
1101 1106 *
1102 1107 * The cache is used only when the requested data
1103 1108 * is within a segment. Requests that cross
1104 1109 * segment boundaries bypass the cache.
1105 1110 */
1106 1111 if (sblkno == eblkno ||
1107 1112 (sblkno + 1 == eblkno && eblkoff == 0)) {
1108 1113 /*
1109 1114 * Request doesn't cross a segment boundary,
1110 1115 * now check the cache.
1111 1116 */
1112 1117 mutex_enter(&lsp->ls_comp_cache_lock);
1113 1118 lc = lofi_find_comp_data(lsp, sblkno);
1114 1119 if (lc != NULL) {
1115 1120 /*
1116 1121 * We've found the decompressed segment
1117 1122 * data in the cache; reuse it.
1118 1123 */
1119 1124 bcopy(lc->lc_data + sblkoff, bufaddr,
1120 1125 bp->b_bcount);
1121 1126 mutex_exit(&lsp->ls_comp_cache_lock);
1122 1127 bp->b_resid = 0;
1123 1128 error = 0;
1124 1129 goto done;
1125 1130 }
1126 1131 mutex_exit(&lsp->ls_comp_cache_lock);
1127 1132 }
1128 1133
1129 1134 /*
1130 1135 * Align start offset to block boundary for segmap
1131 1136 */
1132 1137 salign = lsp->ls_comp_seg_index[sblkno];
1133 1138 sdiff = salign & (DEV_BSIZE - 1);
1134 1139 salign -= sdiff;
1135 1140 if (eblkno >= (lsp->ls_comp_index_sz - 1)) {
1136 1141 /*
1137 1142 * We're dealing with the last segment of
1138 1143 * the compressed file -- the size of this
1139 1144 * segment *may not* be the same as the
1140 1145 * segment size for the file
1141 1146 */
1142 1147 eblkoff = (offset + bp->b_bcount) &
1143 1148 (lsp->ls_uncomp_last_seg_sz - 1);
1144 1149 ealign = lsp->ls_vp_comp_size;
1145 1150 } else {
1146 1151 ealign = lsp->ls_comp_seg_index[eblkno + 1];
1147 1152 }
1148 1153
1149 1154 /*
1150 1155 * Preserve original request paramaters
1151 1156 */
1152 1157 oblkcount = bp->b_bcount;
1153 1158
1154 1159 /*
1155 1160 * Assign the calculated parameters
1156 1161 */
1157 1162 comp_data_sz = ealign - salign;
1158 1163 bp->b_bcount = comp_data_sz;
1159 1164
1160 1165 /*
1161 1166 * Buffers to hold compressed segments are pre-allocated
1162 1167 * on a per-thread basis. Find a pre-allocated buffer
1163 1168 * that is not currently in use and mark it for use.
1164 1169 */
1165 1170 mutex_enter(&lsp->ls_comp_bufs_lock);
1166 1171 for (j = 0; j < lofi_taskq_nthreads; j++) {
1167 1172 if (lsp->ls_comp_bufs[j].inuse == 0) {
1168 1173 lsp->ls_comp_bufs[j].inuse = 1;
1169 1174 break;
1170 1175 }
1171 1176 }
1172 1177
1173 1178 mutex_exit(&lsp->ls_comp_bufs_lock);
1174 1179 ASSERT(j < lofi_taskq_nthreads);
1175 1180
1176 1181 /*
1177 1182 * If the pre-allocated buffer size does not match
1178 1183 * the size of the I/O request, re-allocate it with
1179 1184 * the appropriate size
1180 1185 */
1181 1186 if (lsp->ls_comp_bufs[j].bufsize < bp->b_bcount) {
1182 1187 if (lsp->ls_comp_bufs[j].bufsize > 0)
1183 1188 kmem_free(lsp->ls_comp_bufs[j].buf,
1184 1189 lsp->ls_comp_bufs[j].bufsize);
1185 1190 lsp->ls_comp_bufs[j].buf = kmem_alloc(bp->b_bcount,
1186 1191 KM_SLEEP);
1187 1192 lsp->ls_comp_bufs[j].bufsize = bp->b_bcount;
1188 1193 }
1189 1194 compressed_seg = lsp->ls_comp_bufs[j].buf;
1190 1195
1191 1196 /*
1192 1197 * Map in the calculated number of blocks
1193 1198 */
1194 1199 error = lofi_mapped_rdwr((caddr_t)compressed_seg, salign,
1195 1200 bp, lsp);
1196 1201
1197 1202 bp->b_bcount = oblkcount;
1198 1203 bp->b_resid = oblkcount;
1199 1204 if (error != 0)
1200 1205 goto done;
1201 1206
1202 1207 /*
1203 1208 * decompress compressed blocks start
1204 1209 */
1205 1210 cmpbuf = compressed_seg + sdiff;
1206 1211 for (i = sblkno; i <= eblkno; i++) {
1207 1212 ASSERT(i < lsp->ls_comp_index_sz - 1);
1208 1213 uchar_t *useg;
1209 1214
1210 1215 /*
1211 1216 * The last segment is special in that it is
1212 1217 * most likely not going to be the same
1213 1218 * (uncompressed) size as the other segments.
1214 1219 */
1215 1220 if (i == (lsp->ls_comp_index_sz - 2)) {
1216 1221 seglen = lsp->ls_uncomp_last_seg_sz;
1217 1222 } else {
1218 1223 seglen = lsp->ls_uncomp_seg_sz;
1219 1224 }
1220 1225
1221 1226 /*
1222 1227 * Each of the segment index entries contains
1223 1228 * the starting block number for that segment.
1224 1229 * The number of compressed bytes in a segment
1225 1230 * is thus the difference between the starting
1226 1231 * block number of this segment and the starting
1227 1232 * block number of the next segment.
1228 1233 */
1229 1234 cmpbytes = lsp->ls_comp_seg_index[i + 1] -
1230 1235 lsp->ls_comp_seg_index[i];
1231 1236
1232 1237 /*
1233 1238 * The first byte in a compressed segment is a flag
1234 1239 * that indicates whether this segment is compressed
1235 1240 * at all.
1236 1241 *
1237 1242 * The variable 'useg' is used (instead of
1238 1243 * uncompressed_seg) in this loop to keep a
1239 1244 * reference to the uncompressed segment.
1240 1245 *
1241 1246 * N.B. If 'useg' is replaced with uncompressed_seg,
1242 1247 * it leads to memory leaks and heap corruption in
1243 1248 * corner cases where compressed segments lie
1244 1249 * adjacent to uncompressed segments.
1245 1250 */
1246 1251 if (*cmpbuf == UNCOMPRESSED) {
1247 1252 useg = cmpbuf + SEGHDR;
1248 1253 } else {
1249 1254 if (uncompressed_seg == NULL)
1250 1255 uncompressed_seg =
1251 1256 kmem_alloc(lsp->ls_uncomp_seg_sz,
1252 1257 KM_SLEEP);
1253 1258 useg = uncompressed_seg;
1254 1259 uncompressed_seg_index = i;
1255 1260
1256 1261 if (li->l_decompress((cmpbuf + SEGHDR),
1257 1262 (cmpbytes - SEGHDR), uncompressed_seg,
1258 1263 &seglen, li->l_level) != 0) {
1259 1264 error = EIO;
1260 1265 goto done;
1261 1266 }
1262 1267 }
1263 1268
1264 1269 /*
1265 1270 * Determine how much uncompressed data we
1266 1271 * have to copy and copy it
1267 1272 */
1268 1273 xfersize = lsp->ls_uncomp_seg_sz - sblkoff;
1269 1274 if (i == eblkno)
1270 1275 xfersize -= (lsp->ls_uncomp_seg_sz - eblkoff);
1271 1276
1272 1277 bcopy((useg + sblkoff), bufaddr, xfersize);
1273 1278
1274 1279 cmpbuf += cmpbytes;
1275 1280 bufaddr += xfersize;
1276 1281 bp->b_resid -= xfersize;
1277 1282 sblkoff = 0;
1278 1283
1279 1284 if (bp->b_resid == 0)
1280 1285 break;
1281 1286 } /* decompress compressed blocks ends */
1282 1287
1283 1288 /*
1284 1289 * Skip to done if there is no uncompressed data to cache
1285 1290 */
1286 1291 if (uncompressed_seg == NULL)
1287 1292 goto done;
1288 1293
1289 1294 /*
1290 1295 * Add the data for the last decompressed segment to
1291 1296 * the cache.
1292 1297 *
1293 1298 * In case the uncompressed segment data was added to (and
1294 1299 * is referenced by) the cache, make sure we don't free it
1295 1300 * here.
1296 1301 */
1297 1302 mutex_enter(&lsp->ls_comp_cache_lock);
1298 1303 if ((lc = lofi_add_comp_data(lsp, uncompressed_seg_index,
1299 1304 uncompressed_seg)) != NULL) {
1300 1305 uncompressed_seg = NULL;
1301 1306 }
1302 1307 mutex_exit(&lsp->ls_comp_cache_lock);
1303 1308
1304 1309 done:
1305 1310 if (compressed_seg != NULL) {
1306 1311 mutex_enter(&lsp->ls_comp_bufs_lock);
1307 1312 lsp->ls_comp_bufs[j].inuse = 0;
1308 1313 mutex_exit(&lsp->ls_comp_bufs_lock);
1309 1314 }
1310 1315 if (uncompressed_seg != NULL)
1311 1316 kmem_free(uncompressed_seg, lsp->ls_uncomp_seg_sz);
1312 1317 } /* end of handling compressed files */
1313 1318
1314 1319 if ((error == 0) && (syncflag != 0))
1315 1320 error = VOP_FSYNC(lsp->ls_vp, syncflag, kcred, NULL);
1316 1321
1317 1322 errout:
1318 1323 if (bufinited && lsp->ls_kstat) {
1319 1324 size_t n_done = bp->b_bcount - bp->b_resid;
1320 1325 kstat_io_t *kioptr;
1321 1326
1322 1327 mutex_enter(lsp->ls_kstat->ks_lock);
1323 1328 kioptr = KSTAT_IO_PTR(lsp->ls_kstat);
1324 1329 if (bp->b_flags & B_READ) {
1325 1330 kioptr->nread += n_done;
1326 1331 kioptr->reads++;
1327 1332 } else {
1328 1333 kioptr->nwritten += n_done;
1329 1334 kioptr->writes++;
1330 1335 }
1331 1336 kstat_runq_exit(kioptr);
1332 1337 mutex_exit(lsp->ls_kstat->ks_lock);
1333 1338 }
1334 1339
1335 1340 mutex_enter(&lsp->ls_vp_lock);
1336 1341 if (--lsp->ls_vp_iocount == 0)
1337 1342 cv_broadcast(&lsp->ls_vp_cv);
1338 1343 mutex_exit(&lsp->ls_vp_lock);
1339 1344
1340 1345 bioerror(bp, error);
1341 1346 biodone(bp);
1342 1347 }
1343 1348
1344 1349 static int
1345 1350 lofi_strategy(struct buf *bp)
1346 1351 {
1347 1352 struct lofi_state *lsp;
1348 1353 offset_t offset;
1349 1354
1350 1355 /*
1351 1356 * We cannot just do I/O here, because the current thread
1352 1357 * _might_ end up back in here because the underlying filesystem
1353 1358 * wants a buffer, which eventually gets into bio_recycle and
1354 1359 * might call into lofi to write out a delayed-write buffer.
1355 1360 * This is bad if the filesystem above lofi is the same as below.
1356 1361 *
1357 1362 * We could come up with a complex strategy using threads to
1358 1363 * do the I/O asynchronously, or we could use task queues. task
1359 1364 * queues were incredibly easy so they win.
1360 1365 */
1361 1366 lsp = ddi_get_soft_state(lofi_statep, getminor(bp->b_edev));
1362 1367 if (lsp == NULL) {
1363 1368 bioerror(bp, ENXIO);
1364 1369 biodone(bp);
1365 1370 return (0);
1366 1371 }
1367 1372
1368 1373 mutex_enter(&lsp->ls_vp_lock);
1369 1374 if (lsp->ls_vp == NULL || lsp->ls_vp_closereq) {
1370 1375 bioerror(bp, EIO);
1371 1376 biodone(bp);
1372 1377 mutex_exit(&lsp->ls_vp_lock);
1373 1378 return (0);
1374 1379 }
1375 1380
1376 1381 offset = bp->b_lblkno * DEV_BSIZE; /* offset within file */
1377 1382 if (lsp->ls_crypto_enabled) {
1378 1383 /* encrypted data really begins after crypto header */
1379 1384 offset += lsp->ls_crypto_offset;
1380 1385 }
1381 1386 if (offset == lsp->ls_vp_size) {
1382 1387 /* EOF */
1383 1388 if ((bp->b_flags & B_READ) != 0) {
1384 1389 bp->b_resid = bp->b_bcount;
1385 1390 bioerror(bp, 0);
1386 1391 } else {
1387 1392 /* writes should fail */
1388 1393 bioerror(bp, ENXIO);
1389 1394 }
1390 1395 biodone(bp);
1391 1396 mutex_exit(&lsp->ls_vp_lock);
1392 1397 return (0);
1393 1398 }
1394 1399 if (offset > lsp->ls_vp_size) {
1395 1400 bioerror(bp, ENXIO);
1396 1401 biodone(bp);
1397 1402 mutex_exit(&lsp->ls_vp_lock);
1398 1403 return (0);
1399 1404 }
1400 1405 lsp->ls_vp_iocount++;
1401 1406 mutex_exit(&lsp->ls_vp_lock);
1402 1407
1403 1408 if (lsp->ls_kstat) {
1404 1409 mutex_enter(lsp->ls_kstat->ks_lock);
1405 1410 kstat_waitq_enter(KSTAT_IO_PTR(lsp->ls_kstat));
1406 1411 mutex_exit(lsp->ls_kstat->ks_lock);
1407 1412 }
1408 1413 (void) taskq_dispatch(lsp->ls_taskq, lofi_strategy_task, bp, KM_SLEEP);
1409 1414 return (0);
1410 1415 }
1411 1416
1412 1417 /*ARGSUSED2*/
1413 1418 static int
1414 1419 lofi_read(dev_t dev, struct uio *uio, struct cred *credp)
1415 1420 {
1416 1421 if (getminor(dev) == 0)
1417 1422 return (EINVAL);
1418 1423 UIO_CHECK(uio);
1419 1424 return (physio(lofi_strategy, NULL, dev, B_READ, minphys, uio));
1420 1425 }
1421 1426
1422 1427 /*ARGSUSED2*/
1423 1428 static int
1424 1429 lofi_write(dev_t dev, struct uio *uio, struct cred *credp)
1425 1430 {
1426 1431 if (getminor(dev) == 0)
1427 1432 return (EINVAL);
1428 1433 UIO_CHECK(uio);
1429 1434 return (physio(lofi_strategy, NULL, dev, B_WRITE, minphys, uio));
1430 1435 }
1431 1436
1432 1437 /*ARGSUSED2*/
1433 1438 static int
1434 1439 lofi_aread(dev_t dev, struct aio_req *aio, struct cred *credp)
1435 1440 {
1436 1441 if (getminor(dev) == 0)
1437 1442 return (EINVAL);
1438 1443 UIO_CHECK(aio->aio_uio);
1439 1444 return (aphysio(lofi_strategy, anocancel, dev, B_READ, minphys, aio));
1440 1445 }
1441 1446
1442 1447 /*ARGSUSED2*/
1443 1448 static int
1444 1449 lofi_awrite(dev_t dev, struct aio_req *aio, struct cred *credp)
1445 1450 {
1446 1451 if (getminor(dev) == 0)
1447 1452 return (EINVAL);
1448 1453 UIO_CHECK(aio->aio_uio);
1449 1454 return (aphysio(lofi_strategy, anocancel, dev, B_WRITE, minphys, aio));
1450 1455 }
1451 1456
1452 1457 /*ARGSUSED*/
1453 1458 static int
1454 1459 lofi_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
1455 1460 {
1456 1461 switch (infocmd) {
1457 1462 case DDI_INFO_DEVT2DEVINFO:
1458 1463 *result = lofi_dip;
1459 1464 return (DDI_SUCCESS);
1460 1465 case DDI_INFO_DEVT2INSTANCE:
1461 1466 *result = 0;
1462 1467 return (DDI_SUCCESS);
1463 1468 }
1464 1469 return (DDI_FAILURE);
1465 1470 }
1466 1471
1467 1472 static int
1468 1473 lofi_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
1469 1474 {
1470 1475 int error;
1471 1476
1472 1477 if (cmd != DDI_ATTACH)
1473 1478 return (DDI_FAILURE);
1474 1479
1475 1480 lofi_minor_id = id_space_create("lofi_minor_id", 1, L_MAXMIN32 + 1);
1476 1481
1477 1482 if (!lofi_minor_id)
1478 1483 return (DDI_FAILURE);
1479 1484
1480 1485 error = ddi_soft_state_zalloc(lofi_statep, 0);
1481 1486 if (error == DDI_FAILURE) {
1482 1487 id_space_destroy(lofi_minor_id);
1483 1488 return (DDI_FAILURE);
1484 1489 }
1485 1490 error = ddi_create_minor_node(dip, LOFI_CTL_NODE, S_IFCHR, 0,
1486 1491 DDI_PSEUDO, NULL);
1487 1492 if (error == DDI_FAILURE) {
1488 1493 ddi_soft_state_free(lofi_statep, 0);
1489 1494 id_space_destroy(lofi_minor_id);
1490 1495 return (DDI_FAILURE);
1491 1496 }
1492 1497 /* driver handles kernel-issued IOCTLs */
1493 1498 if (ddi_prop_create(DDI_DEV_T_NONE, dip, DDI_PROP_CANSLEEP,
1494 1499 DDI_KERNEL_IOCTL, NULL, 0) != DDI_PROP_SUCCESS) {
1495 1500 ddi_remove_minor_node(dip, NULL);
1496 1501 ddi_soft_state_free(lofi_statep, 0);
1497 1502 id_space_destroy(lofi_minor_id);
1498 1503 return (DDI_FAILURE);
1499 1504 }
1500 1505
1501 1506 zone_key_create(&lofi_zone_key, NULL, lofi_zone_shutdown, NULL);
1502 1507
1503 1508 lofi_dip = dip;
1504 1509 ddi_report_dev(dip);
1505 1510 return (DDI_SUCCESS);
1506 1511 }
1507 1512
1508 1513 static int
1509 1514 lofi_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
1510 1515 {
1511 1516 if (cmd != DDI_DETACH)
1512 1517 return (DDI_FAILURE);
1513 1518
1514 1519 mutex_enter(&lofi_lock);
1515 1520
1516 1521 if (!list_is_empty(&lofi_list)) {
1517 1522 mutex_exit(&lofi_lock);
1518 1523 return (DDI_FAILURE);
1519 1524 }
1520 1525
1521 1526 lofi_dip = NULL;
1522 1527 ddi_remove_minor_node(dip, NULL);
1523 1528 ddi_prop_remove_all(dip);
1524 1529
1525 1530 mutex_exit(&lofi_lock);
1526 1531
1527 1532 if (zone_key_delete(lofi_zone_key) != 0)
1528 1533 cmn_err(CE_WARN, "failed to delete zone key");
1529 1534
1530 1535 ddi_soft_state_free(lofi_statep, 0);
1531 1536
1532 1537 id_space_destroy(lofi_minor_id);
1533 1538
1534 1539 return (DDI_SUCCESS);
1535 1540 }
1536 1541
1537 1542 /*
1538 1543 * With addition of encryption, be careful that encryption key is wiped before
1539 1544 * kernel memory structures are freed, and also that key is not accidentally
1540 1545 * passed out into userland structures.
1541 1546 */
1542 1547 static void
1543 1548 free_lofi_ioctl(struct lofi_ioctl *klip)
1544 1549 {
1545 1550 /* Make sure this encryption key doesn't stick around */
1546 1551 bzero(klip->li_key, sizeof (klip->li_key));
1547 1552 kmem_free(klip, sizeof (struct lofi_ioctl));
1548 1553 }
1549 1554
1550 1555 /*
1551 1556 * These two just simplify the rest of the ioctls that need to copyin/out
1552 1557 * the lofi_ioctl structure.
1553 1558 */
1554 1559 int
1555 1560 copy_in_lofi_ioctl(const struct lofi_ioctl *ulip, struct lofi_ioctl **klipp,
1556 1561 int flag)
1557 1562 {
1558 1563 struct lofi_ioctl *klip;
1559 1564 int error;
1560 1565
1561 1566 klip = *klipp = kmem_alloc(sizeof (struct lofi_ioctl), KM_SLEEP);
1562 1567 error = ddi_copyin(ulip, klip, sizeof (struct lofi_ioctl), flag);
1563 1568 if (error)
1564 1569 goto err;
1565 1570
1566 1571 /* ensure NULL termination */
1567 1572 klip->li_filename[MAXPATHLEN-1] = '\0';
1568 1573 klip->li_algorithm[MAXALGLEN-1] = '\0';
1569 1574 klip->li_cipher[CRYPTO_MAX_MECH_NAME-1] = '\0';
1570 1575 klip->li_iv_cipher[CRYPTO_MAX_MECH_NAME-1] = '\0';
1571 1576
1572 1577 if (klip->li_minor > L_MAXMIN32) {
1573 1578 error = EINVAL;
1574 1579 goto err;
1575 1580 }
1576 1581
1577 1582 return (0);
1578 1583
1579 1584 err:
1580 1585 free_lofi_ioctl(klip);
1581 1586 return (error);
1582 1587 }
1583 1588
1584 1589 int
1585 1590 copy_out_lofi_ioctl(const struct lofi_ioctl *klip, struct lofi_ioctl *ulip,
1586 1591 int flag)
1587 1592 {
1588 1593 int error;
1589 1594
1590 1595 /*
1591 1596 * NOTE: Do NOT copy the crypto_key_t "back" to userland.
1592 1597 * This ensures that an attacker can't trivially find the
1593 1598 * key for a mapping just by issuing the ioctl.
1594 1599 *
1595 1600 * It can still be found by poking around in kmem with mdb(1),
1596 1601 * but there is no point in making it easy when the info isn't
1597 1602 * of any use in this direction anyway.
1598 1603 *
1599 1604 * Either way we don't actually have the raw key stored in
1600 1605 * a form that we can get it anyway, since we just used it
1601 1606 * to create a ctx template and didn't keep "the original".
1602 1607 */
1603 1608 error = ddi_copyout(klip, ulip, sizeof (struct lofi_ioctl), flag);
1604 1609 if (error)
1605 1610 return (EFAULT);
1606 1611 return (0);
1607 1612 }
1608 1613
1609 1614 static int
1610 1615 lofi_access(struct lofi_state *lsp)
1611 1616 {
1612 1617 ASSERT(MUTEX_HELD(&lofi_lock));
↓ open down ↓ |
1122 lines elided |
↑ open up ↑ |
1613 1618 if (INGLOBALZONE(curproc) || lsp->ls_zone.zref_zone == curzone)
1614 1619 return (0);
1615 1620 return (EPERM);
1616 1621 }
1617 1622
1618 1623 /*
1619 1624 * Find the lofi state for the given filename. We compare by vnode to
1620 1625 * allow the global zone visibility into NGZ lofi nodes.
1621 1626 */
1622 1627 static int
1623 -file_to_lofi_nocheck(char *filename, struct lofi_state **lspp)
1628 +file_to_lofi_nocheck(char *filename, boolean_t readonly,
1629 + struct lofi_state **lspp)
1624 1630 {
1625 1631 struct lofi_state *lsp;
1626 1632 vnode_t *vp = NULL;
1627 1633 int err = 0;
1634 + int rdfiles = 0;
1628 1635
1629 1636 ASSERT(MUTEX_HELD(&lofi_lock));
1630 1637
1631 1638 if ((err = lookupname(filename, UIO_SYSSPACE, FOLLOW,
1632 1639 NULLVPP, &vp)) != 0)
1633 1640 goto out;
1634 1641
1635 1642 if (vp->v_type == VREG) {
1636 1643 vnode_t *realvp;
1637 1644 if (VOP_REALVP(vp, &realvp, NULL) == 0) {
1638 1645 VN_HOLD(realvp);
↓ open down ↓ |
1 lines elided |
↑ open up ↑ |
1639 1646 VN_RELE(vp);
1640 1647 vp = realvp;
1641 1648 }
1642 1649 }
1643 1650
1644 1651 for (lsp = list_head(&lofi_list); lsp != NULL;
1645 1652 lsp = list_next(&lofi_list, lsp)) {
1646 1653 if (lsp->ls_vp == vp) {
1647 1654 if (lspp != NULL)
1648 1655 *lspp = lsp;
1656 + if (lsp->ls_readonly) {
1657 + rdfiles++;
1658 + /* Skip if '-r' is specified */
1659 + if (readonly)
1660 + continue;
1661 + }
1649 1662 goto out;
1650 1663 }
1651 1664 }
1652 1665
1653 1666 err = ENOENT;
1654 1667
1668 + /*
1669 + * If a filename is given as an argument for lofi_unmap, we shouldn't
1670 + * allow unmap if there are multiple read-only lofi devices associated
1671 + * with this file.
1672 + */
1673 + if (lspp != NULL) {
1674 + if (rdfiles == 1)
1675 + err = 0;
1676 + else if (rdfiles > 1)
1677 + err = EBUSY;
1678 + }
1679 +
1655 1680 out:
1656 1681 if (vp != NULL)
1657 1682 VN_RELE(vp);
1658 1683 return (err);
1659 1684 }
1660 1685
1661 1686 /*
1662 1687 * Find the minor for the given filename, checking the zone can access
1663 1688 * it.
1664 1689 */
1665 1690 static int
1666 -file_to_lofi(char *filename, struct lofi_state **lspp)
1691 +file_to_lofi(char *filename, boolean_t readonly, struct lofi_state **lspp)
1667 1692 {
1668 1693 int err = 0;
1669 1694
1670 1695 ASSERT(MUTEX_HELD(&lofi_lock));
1671 1696
1672 - if ((err = file_to_lofi_nocheck(filename, lspp)) != 0)
1697 + if ((err = file_to_lofi_nocheck(filename, readonly, lspp)) != 0)
1673 1698 return (err);
1674 1699
1675 1700 if ((err = lofi_access(*lspp)) != 0)
1676 1701 return (err);
1677 1702
1678 1703 return (0);
1679 1704 }
1680 1705
1681 1706 /*
1682 1707 * Fakes up a disk geometry, and one big partition, based on the size
1683 1708 * of the file. This is needed because we allow newfs'ing the device,
1684 1709 * and newfs will do several disk ioctls to figure out the geometry and
1685 1710 * partition information. It uses that information to determine the parameters
1686 1711 * to pass to mkfs. Geometry is pretty much irrelevant these days, but we
1687 1712 * have to support it.
1688 1713 */
1689 1714 static void
1690 1715 fake_disk_geometry(struct lofi_state *lsp)
1691 1716 {
1692 1717 u_offset_t dsize = lsp->ls_vp_size - lsp->ls_crypto_offset;
1693 1718
1694 1719 /* dk_geom - see dkio(7I) */
1695 1720 /*
1696 1721 * dkg_ncyl _could_ be set to one here (one big cylinder with gobs
1697 1722 * of sectors), but that breaks programs like fdisk which want to
1698 1723 * partition a disk by cylinder. With one cylinder, you can't create
1699 1724 * an fdisk partition and put pcfs on it for testing (hard to pick
1700 1725 * a number between one and one).
1701 1726 *
1702 1727 * The cheezy floppy test is an attempt to not have too few cylinders
1703 1728 * for a small file, or so many on a big file that you waste space
1704 1729 * for backup superblocks or cylinder group structures.
1705 1730 */
1706 1731 if (dsize < (2 * 1024 * 1024)) /* floppy? */
1707 1732 lsp->ls_dkg.dkg_ncyl = dsize / (100 * 1024);
1708 1733 else
1709 1734 lsp->ls_dkg.dkg_ncyl = dsize / (300 * 1024);
1710 1735 /* in case file file is < 100k */
1711 1736 if (lsp->ls_dkg.dkg_ncyl == 0)
1712 1737 lsp->ls_dkg.dkg_ncyl = 1;
1713 1738 lsp->ls_dkg.dkg_acyl = 0;
1714 1739 lsp->ls_dkg.dkg_bcyl = 0;
1715 1740 lsp->ls_dkg.dkg_nhead = 1;
1716 1741 lsp->ls_dkg.dkg_obs1 = 0;
1717 1742 lsp->ls_dkg.dkg_intrlv = 0;
1718 1743 lsp->ls_dkg.dkg_obs2 = 0;
1719 1744 lsp->ls_dkg.dkg_obs3 = 0;
1720 1745 lsp->ls_dkg.dkg_apc = 0;
1721 1746 lsp->ls_dkg.dkg_rpm = 7200;
1722 1747 lsp->ls_dkg.dkg_pcyl = lsp->ls_dkg.dkg_ncyl + lsp->ls_dkg.dkg_acyl;
1723 1748 lsp->ls_dkg.dkg_nsect = dsize / (DEV_BSIZE * lsp->ls_dkg.dkg_ncyl);
1724 1749 lsp->ls_dkg.dkg_write_reinstruct = 0;
1725 1750 lsp->ls_dkg.dkg_read_reinstruct = 0;
1726 1751
1727 1752 /* vtoc - see dkio(7I) */
1728 1753 bzero(&lsp->ls_vtoc, sizeof (struct vtoc));
1729 1754 lsp->ls_vtoc.v_sanity = VTOC_SANE;
1730 1755 lsp->ls_vtoc.v_version = V_VERSION;
1731 1756 (void) strncpy(lsp->ls_vtoc.v_volume, LOFI_DRIVER_NAME,
1732 1757 sizeof (lsp->ls_vtoc.v_volume));
1733 1758 lsp->ls_vtoc.v_sectorsz = DEV_BSIZE;
1734 1759 lsp->ls_vtoc.v_nparts = 1;
1735 1760 lsp->ls_vtoc.v_part[0].p_tag = V_UNASSIGNED;
1736 1761
1737 1762 /*
1738 1763 * A compressed file is read-only, other files can
1739 1764 * be read-write
1740 1765 */
1741 1766 if (lsp->ls_uncomp_seg_sz > 0) {
1742 1767 lsp->ls_vtoc.v_part[0].p_flag = V_UNMNT | V_RONLY;
1743 1768 } else {
1744 1769 lsp->ls_vtoc.v_part[0].p_flag = V_UNMNT;
1745 1770 }
1746 1771 lsp->ls_vtoc.v_part[0].p_start = (daddr_t)0;
1747 1772 /*
1748 1773 * The partition size cannot just be the number of sectors, because
1749 1774 * that might not end on a cylinder boundary. And if that's the case,
1750 1775 * newfs/mkfs will print a scary warning. So just figure the size
1751 1776 * based on the number of cylinders and sectors/cylinder.
1752 1777 */
1753 1778 lsp->ls_vtoc.v_part[0].p_size = lsp->ls_dkg.dkg_pcyl *
1754 1779 lsp->ls_dkg.dkg_nsect * lsp->ls_dkg.dkg_nhead;
1755 1780
1756 1781 /* dk_cinfo - see dkio(7I) */
1757 1782 bzero(&lsp->ls_ci, sizeof (struct dk_cinfo));
1758 1783 (void) strcpy(lsp->ls_ci.dki_cname, LOFI_DRIVER_NAME);
1759 1784 lsp->ls_ci.dki_ctype = DKC_MD;
1760 1785 lsp->ls_ci.dki_flags = 0;
1761 1786 lsp->ls_ci.dki_cnum = 0;
1762 1787 lsp->ls_ci.dki_addr = 0;
1763 1788 lsp->ls_ci.dki_space = 0;
1764 1789 lsp->ls_ci.dki_prio = 0;
1765 1790 lsp->ls_ci.dki_vec = 0;
1766 1791 (void) strcpy(lsp->ls_ci.dki_dname, LOFI_DRIVER_NAME);
1767 1792 lsp->ls_ci.dki_unit = 0;
1768 1793 lsp->ls_ci.dki_slave = 0;
1769 1794 lsp->ls_ci.dki_partition = 0;
1770 1795 /*
1771 1796 * newfs uses this to set maxcontig. Must not be < 16, or it
1772 1797 * will be 0 when newfs multiplies it by DEV_BSIZE and divides
1773 1798 * it by the block size. Then tunefs doesn't work because
1774 1799 * maxcontig is 0.
1775 1800 */
1776 1801 lsp->ls_ci.dki_maxtransfer = 16;
1777 1802 }
1778 1803
1779 1804 /*
1780 1805 * map in a compressed file
1781 1806 *
1782 1807 * Read in the header and the index that follows.
1783 1808 *
1784 1809 * The header is as follows -
1785 1810 *
1786 1811 * Signature (name of the compression algorithm)
1787 1812 * Compression segment size (a multiple of 512)
1788 1813 * Number of index entries
1789 1814 * Size of the last block
1790 1815 * The array containing the index entries
1791 1816 *
1792 1817 * The header information is always stored in
1793 1818 * network byte order on disk.
1794 1819 */
1795 1820 static int
1796 1821 lofi_map_compressed_file(struct lofi_state *lsp, char *buf)
1797 1822 {
1798 1823 uint32_t index_sz, header_len, i;
1799 1824 ssize_t resid;
1800 1825 enum uio_rw rw;
1801 1826 char *tbuf = buf;
1802 1827 int error;
1803 1828
1804 1829 /* The signature has already been read */
1805 1830 tbuf += sizeof (lsp->ls_comp_algorithm);
1806 1831 bcopy(tbuf, &(lsp->ls_uncomp_seg_sz), sizeof (lsp->ls_uncomp_seg_sz));
1807 1832 lsp->ls_uncomp_seg_sz = ntohl(lsp->ls_uncomp_seg_sz);
1808 1833
1809 1834 /*
1810 1835 * The compressed segment size must be a power of 2
1811 1836 */
1812 1837 if (lsp->ls_uncomp_seg_sz < DEV_BSIZE ||
1813 1838 !ISP2(lsp->ls_uncomp_seg_sz))
1814 1839 return (EINVAL);
1815 1840
1816 1841 for (i = 0; !((lsp->ls_uncomp_seg_sz >> i) & 1); i++)
1817 1842 ;
1818 1843
1819 1844 lsp->ls_comp_seg_shift = i;
1820 1845
1821 1846 tbuf += sizeof (lsp->ls_uncomp_seg_sz);
1822 1847 bcopy(tbuf, &(lsp->ls_comp_index_sz), sizeof (lsp->ls_comp_index_sz));
1823 1848 lsp->ls_comp_index_sz = ntohl(lsp->ls_comp_index_sz);
1824 1849
1825 1850 tbuf += sizeof (lsp->ls_comp_index_sz);
1826 1851 bcopy(tbuf, &(lsp->ls_uncomp_last_seg_sz),
1827 1852 sizeof (lsp->ls_uncomp_last_seg_sz));
1828 1853 lsp->ls_uncomp_last_seg_sz = ntohl(lsp->ls_uncomp_last_seg_sz);
1829 1854
1830 1855 /*
1831 1856 * Compute the total size of the uncompressed data
1832 1857 * for use in fake_disk_geometry and other calculations.
1833 1858 * Disk geometry has to be faked with respect to the
1834 1859 * actual uncompressed data size rather than the
1835 1860 * compressed file size.
1836 1861 */
1837 1862 lsp->ls_vp_size =
1838 1863 (u_offset_t)(lsp->ls_comp_index_sz - 2) * lsp->ls_uncomp_seg_sz
1839 1864 + lsp->ls_uncomp_last_seg_sz;
1840 1865
1841 1866 /*
1842 1867 * Index size is rounded up to DEV_BSIZE for ease
1843 1868 * of segmapping
1844 1869 */
1845 1870 index_sz = sizeof (*lsp->ls_comp_seg_index) * lsp->ls_comp_index_sz;
1846 1871 header_len = sizeof (lsp->ls_comp_algorithm) +
1847 1872 sizeof (lsp->ls_uncomp_seg_sz) +
1848 1873 sizeof (lsp->ls_comp_index_sz) +
1849 1874 sizeof (lsp->ls_uncomp_last_seg_sz);
1850 1875 lsp->ls_comp_offbase = header_len + index_sz;
1851 1876
1852 1877 index_sz += header_len;
1853 1878 index_sz = roundup(index_sz, DEV_BSIZE);
1854 1879
1855 1880 lsp->ls_comp_index_data = kmem_alloc(index_sz, KM_SLEEP);
1856 1881 lsp->ls_comp_index_data_sz = index_sz;
1857 1882
1858 1883 /*
1859 1884 * Read in the index -- this has a side-effect
1860 1885 * of reading in the header as well
1861 1886 */
1862 1887 rw = UIO_READ;
1863 1888 error = vn_rdwr(rw, lsp->ls_vp, lsp->ls_comp_index_data, index_sz,
1864 1889 0, UIO_SYSSPACE, 0, RLIM64_INFINITY, kcred, &resid);
1865 1890
1866 1891 if (error != 0)
1867 1892 return (error);
1868 1893
1869 1894 /* Skip the header, this is where the index really begins */
1870 1895 lsp->ls_comp_seg_index =
1871 1896 /*LINTED*/
1872 1897 (uint64_t *)(lsp->ls_comp_index_data + header_len);
1873 1898
1874 1899 /*
1875 1900 * Now recompute offsets in the index to account for
1876 1901 * the header length
1877 1902 */
1878 1903 for (i = 0; i < lsp->ls_comp_index_sz; i++) {
1879 1904 lsp->ls_comp_seg_index[i] = lsp->ls_comp_offbase +
1880 1905 BE_64(lsp->ls_comp_seg_index[i]);
1881 1906 }
1882 1907
1883 1908 return (error);
1884 1909 }
1885 1910
1886 1911 static int
1887 1912 lofi_init_crypto(struct lofi_state *lsp, struct lofi_ioctl *klip)
1888 1913 {
1889 1914 struct crypto_meta chead;
1890 1915 char buf[DEV_BSIZE];
1891 1916 ssize_t resid;
1892 1917 char *marker;
1893 1918 int error;
1894 1919 int ret;
1895 1920 int i;
1896 1921
1897 1922 if (!klip->li_crypto_enabled)
1898 1923 return (0);
1899 1924
1900 1925 /*
1901 1926 * All current algorithms have a max of 448 bits.
1902 1927 */
1903 1928 if (klip->li_iv_len > CRYPTO_BITS2BYTES(512))
1904 1929 return (EINVAL);
1905 1930
1906 1931 if (CRYPTO_BITS2BYTES(klip->li_key_len) > sizeof (klip->li_key))
1907 1932 return (EINVAL);
1908 1933
1909 1934 lsp->ls_crypto_enabled = klip->li_crypto_enabled;
1910 1935
1911 1936 mutex_init(&lsp->ls_crypto_lock, NULL, MUTEX_DRIVER, NULL);
1912 1937
1913 1938 lsp->ls_mech.cm_type = crypto_mech2id(klip->li_cipher);
1914 1939 if (lsp->ls_mech.cm_type == CRYPTO_MECH_INVALID) {
1915 1940 cmn_err(CE_WARN, "invalid cipher %s requested for %s",
1916 1941 klip->li_cipher, klip->li_filename);
1917 1942 return (EINVAL);
1918 1943 }
1919 1944
1920 1945 /* this is just initialization here */
1921 1946 lsp->ls_mech.cm_param = NULL;
1922 1947 lsp->ls_mech.cm_param_len = 0;
1923 1948
1924 1949 lsp->ls_iv_type = klip->li_iv_type;
1925 1950 lsp->ls_iv_mech.cm_type = crypto_mech2id(klip->li_iv_cipher);
1926 1951 if (lsp->ls_iv_mech.cm_type == CRYPTO_MECH_INVALID) {
1927 1952 cmn_err(CE_WARN, "invalid iv cipher %s requested"
1928 1953 " for %s", klip->li_iv_cipher, klip->li_filename);
1929 1954 return (EINVAL);
1930 1955 }
1931 1956
1932 1957 /* iv mech must itself take a null iv */
1933 1958 lsp->ls_iv_mech.cm_param = NULL;
1934 1959 lsp->ls_iv_mech.cm_param_len = 0;
1935 1960 lsp->ls_iv_len = klip->li_iv_len;
1936 1961
1937 1962 /*
1938 1963 * Create ctx using li_cipher & the raw li_key after checking
1939 1964 * that it isn't a weak key.
1940 1965 */
1941 1966 lsp->ls_key.ck_format = CRYPTO_KEY_RAW;
1942 1967 lsp->ls_key.ck_length = klip->li_key_len;
1943 1968 lsp->ls_key.ck_data = kmem_alloc(
1944 1969 CRYPTO_BITS2BYTES(lsp->ls_key.ck_length), KM_SLEEP);
1945 1970 bcopy(klip->li_key, lsp->ls_key.ck_data,
1946 1971 CRYPTO_BITS2BYTES(lsp->ls_key.ck_length));
1947 1972
1948 1973 ret = crypto_key_check(&lsp->ls_mech, &lsp->ls_key);
1949 1974 if (ret != CRYPTO_SUCCESS) {
1950 1975 cmn_err(CE_WARN, "weak key check failed for cipher "
1951 1976 "%s on file %s (0x%x)", klip->li_cipher,
1952 1977 klip->li_filename, ret);
1953 1978 return (EINVAL);
1954 1979 }
1955 1980
1956 1981 error = vn_rdwr(UIO_READ, lsp->ls_vp, buf, DEV_BSIZE,
1957 1982 CRYOFF, UIO_SYSSPACE, 0, RLIM64_INFINITY, kcred, &resid);
1958 1983 if (error != 0)
1959 1984 return (error);
1960 1985
1961 1986 /*
1962 1987 * This is the case where the header in the lofi image is already
1963 1988 * initialized to indicate it is encrypted.
1964 1989 */
1965 1990 if (strncmp(buf, lofi_crypto_magic, sizeof (lofi_crypto_magic)) == 0) {
1966 1991 /*
1967 1992 * The encryption header information is laid out this way:
1968 1993 * 6 bytes: hex "CFLOFI"
1969 1994 * 2 bytes: version = 0 ... for now
1970 1995 * 96 bytes: reserved1 (not implemented yet)
1971 1996 * 4 bytes: data_sector = 2 ... for now
1972 1997 * more... not implemented yet
1973 1998 */
1974 1999
1975 2000 marker = buf;
1976 2001
1977 2002 /* copy the magic */
1978 2003 bcopy(marker, lsp->ls_crypto.magic,
1979 2004 sizeof (lsp->ls_crypto.magic));
1980 2005 marker += sizeof (lsp->ls_crypto.magic);
1981 2006
1982 2007 /* read the encryption version number */
1983 2008 bcopy(marker, &(lsp->ls_crypto.version),
1984 2009 sizeof (lsp->ls_crypto.version));
1985 2010 lsp->ls_crypto.version = ntohs(lsp->ls_crypto.version);
1986 2011 marker += sizeof (lsp->ls_crypto.version);
1987 2012
1988 2013 /* read a chunk of reserved data */
1989 2014 bcopy(marker, lsp->ls_crypto.reserved1,
1990 2015 sizeof (lsp->ls_crypto.reserved1));
1991 2016 marker += sizeof (lsp->ls_crypto.reserved1);
1992 2017
1993 2018 /* read block number where encrypted data begins */
1994 2019 bcopy(marker, &(lsp->ls_crypto.data_sector),
1995 2020 sizeof (lsp->ls_crypto.data_sector));
1996 2021 lsp->ls_crypto.data_sector = ntohl(lsp->ls_crypto.data_sector);
1997 2022 marker += sizeof (lsp->ls_crypto.data_sector);
1998 2023
1999 2024 /* and ignore the rest until it is implemented */
2000 2025
2001 2026 lsp->ls_crypto_offset = lsp->ls_crypto.data_sector * DEV_BSIZE;
2002 2027 return (0);
2003 2028 }
2004 2029
2005 2030 /*
2006 2031 * We've requested encryption, but no magic was found, so it must be
2007 2032 * a new image.
2008 2033 */
2009 2034
2010 2035 for (i = 0; i < sizeof (struct crypto_meta); i++) {
2011 2036 if (buf[i] != '\0')
2012 2037 return (EINVAL);
2013 2038 }
2014 2039
2015 2040 marker = buf;
2016 2041 bcopy(lofi_crypto_magic, marker, sizeof (lofi_crypto_magic));
2017 2042 marker += sizeof (lofi_crypto_magic);
2018 2043 chead.version = htons(LOFI_CRYPTO_VERSION);
2019 2044 bcopy(&(chead.version), marker, sizeof (chead.version));
2020 2045 marker += sizeof (chead.version);
2021 2046 marker += sizeof (chead.reserved1);
2022 2047 chead.data_sector = htonl(LOFI_CRYPTO_DATA_SECTOR);
2023 2048 bcopy(&(chead.data_sector), marker, sizeof (chead.data_sector));
2024 2049
2025 2050 /* write the header */
2026 2051 error = vn_rdwr(UIO_WRITE, lsp->ls_vp, buf, DEV_BSIZE,
2027 2052 CRYOFF, UIO_SYSSPACE, 0, RLIM64_INFINITY, kcred, &resid);
2028 2053 if (error != 0)
2029 2054 return (error);
2030 2055
2031 2056 /* fix things up so it looks like we read this info */
2032 2057 bcopy(lofi_crypto_magic, lsp->ls_crypto.magic,
2033 2058 sizeof (lofi_crypto_magic));
2034 2059 lsp->ls_crypto.version = LOFI_CRYPTO_VERSION;
2035 2060 lsp->ls_crypto.data_sector = LOFI_CRYPTO_DATA_SECTOR;
2036 2061 lsp->ls_crypto_offset = lsp->ls_crypto.data_sector * DEV_BSIZE;
2037 2062 return (0);
2038 2063 }
2039 2064
2040 2065 /*
2041 2066 * Check to see if the passed in signature is a valid one. If it is
2042 2067 * valid, return the index into lofi_compress_table.
2043 2068 *
2044 2069 * Return -1 if it is invalid
2045 2070 */
2046 2071 static int
2047 2072 lofi_compress_select(const char *signature)
2048 2073 {
2049 2074 int i;
2050 2075
2051 2076 for (i = 0; i < LOFI_COMPRESS_FUNCTIONS; i++) {
2052 2077 if (strcmp(lofi_compress_table[i].l_name, signature) == 0)
2053 2078 return (i);
2054 2079 }
2055 2080
2056 2081 return (-1);
2057 2082 }
2058 2083
2059 2084 static int
2060 2085 lofi_init_compress(struct lofi_state *lsp)
2061 2086 {
2062 2087 char buf[DEV_BSIZE];
2063 2088 int compress_index;
2064 2089 ssize_t resid;
2065 2090 int error;
2066 2091
2067 2092 error = vn_rdwr(UIO_READ, lsp->ls_vp, buf, DEV_BSIZE, 0, UIO_SYSSPACE,
2068 2093 0, RLIM64_INFINITY, kcred, &resid);
2069 2094
2070 2095 if (error != 0)
2071 2096 return (error);
2072 2097
2073 2098 if ((compress_index = lofi_compress_select(buf)) == -1)
2074 2099 return (0);
2075 2100
2076 2101 /* compression and encryption are mutually exclusive */
2077 2102 if (lsp->ls_crypto_enabled)
2078 2103 return (ENOTSUP);
2079 2104
2080 2105 /* initialize compression info for compressed lofi */
2081 2106 lsp->ls_comp_algorithm_index = compress_index;
2082 2107 (void) strlcpy(lsp->ls_comp_algorithm,
2083 2108 lofi_compress_table[compress_index].l_name,
2084 2109 sizeof (lsp->ls_comp_algorithm));
2085 2110
2086 2111 /* Finally setup per-thread pre-allocated buffers */
2087 2112 lsp->ls_comp_bufs = kmem_zalloc(lofi_taskq_nthreads *
2088 2113 sizeof (struct compbuf), KM_SLEEP);
2089 2114
2090 2115 return (lofi_map_compressed_file(lsp, buf));
2091 2116 }
2092 2117
2093 2118 /*
2094 2119 * map a file to a minor number. Return the minor number.
2095 2120 */
2096 2121 static int
2097 2122 lofi_map_file(dev_t dev, struct lofi_ioctl *ulip, int pickminor,
2098 2123 int *rvalp, struct cred *credp, int ioctl_flag)
2099 2124 {
2100 2125 minor_t minor = (minor_t)-1;
2101 2126 struct lofi_state *lsp = NULL;
2102 2127 struct lofi_ioctl *klip;
2103 2128 int error;
2104 2129 struct vnode *vp = NULL;
2105 2130 vattr_t vattr;
2106 2131 int flag;
2107 2132 dev_t newdev;
2108 2133 char namebuf[50];
2109 2134
2110 2135 error = copy_in_lofi_ioctl(ulip, &klip, ioctl_flag);
2111 2136 if (error != 0)
2112 2137 return (error);
2113 2138
2114 2139 mutex_enter(&lofi_lock);
↓ open down ↓ |
432 lines elided |
↑ open up ↑ |
2115 2140
2116 2141 mutex_enter(&curproc->p_lock);
2117 2142 if ((error = rctl_incr_lofi(curproc, curproc->p_zone, 1)) != 0) {
2118 2143 mutex_exit(&curproc->p_lock);
2119 2144 mutex_exit(&lofi_lock);
2120 2145 free_lofi_ioctl(klip);
2121 2146 return (error);
2122 2147 }
2123 2148 mutex_exit(&curproc->p_lock);
2124 2149
2125 - if (file_to_lofi_nocheck(klip->li_filename, NULL) == 0) {
2150 + if (file_to_lofi_nocheck(klip->li_filename, klip->li_readonly,
2151 + NULL) == 0) {
2126 2152 error = EBUSY;
2127 2153 goto err;
2128 2154 }
2129 2155
2130 2156 if (pickminor) {
2131 2157 minor = (minor_t)id_allocff_nosleep(lofi_minor_id);
2132 2158 if (minor == (minor_t)-1) {
2133 2159 error = EAGAIN;
2134 2160 goto err;
2135 2161 }
2136 2162 } else {
2137 2163 if (ddi_get_soft_state(lofi_statep, klip->li_minor) != NULL) {
2138 2164 error = EEXIST;
2139 2165 goto err;
2140 2166 }
2141 2167
2142 2168 minor = (minor_t)
2143 2169 id_alloc_specific_nosleep(lofi_minor_id, klip->li_minor);
2144 2170 ASSERT(minor != (minor_t)-1);
2145 2171 }
2146 2172
2147 2173 flag = FREAD | FWRITE | FOFFMAX | FEXCL;
2148 2174 error = vn_open(klip->li_filename, UIO_SYSSPACE, flag, 0, &vp, 0, 0);
2149 2175 if (error) {
2150 2176 /* try read-only */
2151 2177 flag &= ~FWRITE;
2152 2178 error = vn_open(klip->li_filename, UIO_SYSSPACE, flag, 0,
2153 2179 &vp, 0, 0);
2154 2180 if (error)
2155 2181 goto err;
2156 2182 }
2157 2183
2158 2184 if (!V_ISLOFIABLE(vp->v_type)) {
2159 2185 error = EINVAL;
2160 2186 goto err;
2161 2187 }
2162 2188
2163 2189 vattr.va_mask = AT_SIZE;
2164 2190 error = VOP_GETATTR(vp, &vattr, 0, credp, NULL);
2165 2191 if (error)
2166 2192 goto err;
2167 2193
2168 2194 /* the file needs to be a multiple of the block size */
2169 2195 if ((vattr.va_size % DEV_BSIZE) != 0) {
2170 2196 error = EINVAL;
2171 2197 goto err;
2172 2198 }
2173 2199
2174 2200 /* lsp alloc+init */
2175 2201
2176 2202 error = ddi_soft_state_zalloc(lofi_statep, minor);
2177 2203 if (error == DDI_FAILURE) {
2178 2204 error = ENOMEM;
2179 2205 goto err;
2180 2206 }
2181 2207
2182 2208 lsp = ddi_get_soft_state(lofi_statep, minor);
2183 2209 list_insert_tail(&lofi_list, lsp);
2184 2210
2185 2211 newdev = makedevice(getmajor(dev), minor);
2186 2212 lsp->ls_dev = newdev;
2187 2213 zone_init_ref(&lsp->ls_zone);
2188 2214 zone_hold_ref(curzone, &lsp->ls_zone, ZONE_REF_LOFI);
2189 2215 lsp->ls_uncomp_seg_sz = 0;
2190 2216 lsp->ls_comp_algorithm[0] = '\0';
2191 2217 lsp->ls_crypto_offset = 0;
2192 2218
2193 2219 cv_init(&lsp->ls_vp_cv, NULL, CV_DRIVER, NULL);
2194 2220 mutex_init(&lsp->ls_comp_cache_lock, NULL, MUTEX_DRIVER, NULL);
2195 2221 mutex_init(&lsp->ls_comp_bufs_lock, NULL, MUTEX_DRIVER, NULL);
2196 2222 mutex_init(&lsp->ls_kstat_lock, NULL, MUTEX_DRIVER, NULL);
2197 2223 mutex_init(&lsp->ls_vp_lock, NULL, MUTEX_DRIVER, NULL);
2198 2224
2199 2225 (void) snprintf(namebuf, sizeof (namebuf), "%s_taskq_%d",
2200 2226 LOFI_DRIVER_NAME, minor);
2201 2227 lsp->ls_taskq = taskq_create_proc(namebuf, lofi_taskq_nthreads,
2202 2228 minclsyspri, 1, lofi_taskq_maxalloc, curzone->zone_zsched, 0);
2203 2229
2204 2230 list_create(&lsp->ls_comp_cache, sizeof (struct lofi_comp_cache),
2205 2231 offsetof(struct lofi_comp_cache, lc_list));
2206 2232
2207 2233 /*
2208 2234 * save open mode so file can be closed properly and vnode counts
2209 2235 * updated correctly.
2210 2236 */
2211 2237 lsp->ls_openflag = flag;
2212 2238
2213 2239 lsp->ls_vp = vp;
2214 2240 lsp->ls_stacked_vp = vp;
2215 2241 /*
2216 2242 * Try to handle stacked lofs vnodes.
2217 2243 */
2218 2244 if (vp->v_type == VREG) {
2219 2245 vnode_t *realvp;
2220 2246
2221 2247 if (VOP_REALVP(vp, &realvp, NULL) == 0) {
2222 2248 /*
2223 2249 * We need to use the realvp for uniqueness
2224 2250 * checking, but keep the stacked vp for
2225 2251 * LOFI_GET_FILENAME display.
2226 2252 */
2227 2253 VN_HOLD(realvp);
2228 2254 lsp->ls_vp = realvp;
2229 2255 }
2230 2256 }
2231 2257
2232 2258 lsp->ls_vp_size = vattr.va_size;
2233 2259 lsp->ls_vp_comp_size = lsp->ls_vp_size;
2234 2260
2235 2261 lsp->ls_kstat = kstat_create_zone(LOFI_DRIVER_NAME, minor,
↓ open down ↓ |
100 lines elided |
↑ open up ↑ |
2236 2262 NULL, "disk", KSTAT_TYPE_IO, 1, 0, getzoneid());
2237 2263
2238 2264 if (lsp->ls_kstat == NULL) {
2239 2265 error = ENOMEM;
2240 2266 goto err;
2241 2267 }
2242 2268
2243 2269 lsp->ls_kstat->ks_lock = &lsp->ls_kstat_lock;
2244 2270 kstat_zone_add(lsp->ls_kstat, GLOBAL_ZONEID);
2245 2271
2272 + lsp->ls_readonly = klip->li_readonly;
2273 +
2246 2274 if ((error = lofi_init_crypto(lsp, klip)) != 0)
2247 2275 goto err;
2248 2276
2249 2277 if ((error = lofi_init_compress(lsp)) != 0)
2250 2278 goto err;
2251 2279
2252 2280 fake_disk_geometry(lsp);
2253 2281
2254 2282 /* create minor nodes */
2255 2283
2256 2284 (void) snprintf(namebuf, sizeof (namebuf), "%d", minor);
2257 2285 error = ddi_create_minor_node(lofi_dip, namebuf, S_IFBLK, minor,
2258 2286 DDI_PSEUDO, NULL);
2259 2287 if (error != DDI_SUCCESS) {
2260 2288 error = ENXIO;
2261 2289 goto err;
2262 2290 }
2263 2291
2264 2292 (void) snprintf(namebuf, sizeof (namebuf), "%d,raw", minor);
2265 2293 error = ddi_create_minor_node(lofi_dip, namebuf, S_IFCHR, minor,
2266 2294 DDI_PSEUDO, NULL);
2267 2295 if (error != DDI_SUCCESS) {
2268 2296 /* remove block node */
2269 2297 (void) snprintf(namebuf, sizeof (namebuf), "%d", minor);
2270 2298 ddi_remove_minor_node(lofi_dip, namebuf);
2271 2299 error = ENXIO;
2272 2300 goto err;
2273 2301 }
2274 2302
2275 2303 /* create DDI properties */
2276 2304
2277 2305 if ((ddi_prop_update_int64(newdev, lofi_dip, SIZE_PROP_NAME,
2278 2306 lsp->ls_vp_size - lsp->ls_crypto_offset)) != DDI_PROP_SUCCESS) {
2279 2307 error = EINVAL;
2280 2308 goto nodeerr;
2281 2309 }
2282 2310
2283 2311 if ((ddi_prop_update_int64(newdev, lofi_dip, NBLOCKS_PROP_NAME,
2284 2312 (lsp->ls_vp_size - lsp->ls_crypto_offset) / DEV_BSIZE))
2285 2313 != DDI_PROP_SUCCESS) {
2286 2314 error = EINVAL;
2287 2315 goto nodeerr;
2288 2316 }
2289 2317
2290 2318 if (ddi_prop_update_string(newdev, lofi_dip, ZONE_PROP_NAME,
2291 2319 (char *)curproc->p_zone->zone_name) != DDI_PROP_SUCCESS) {
2292 2320 error = EINVAL;
2293 2321 goto nodeerr;
2294 2322 }
2295 2323
2296 2324 kstat_install(lsp->ls_kstat);
2297 2325
2298 2326 mutex_exit(&lofi_lock);
2299 2327
2300 2328 if (rvalp)
2301 2329 *rvalp = (int)minor;
2302 2330 klip->li_minor = minor;
2303 2331 (void) copy_out_lofi_ioctl(klip, ulip, ioctl_flag);
2304 2332 free_lofi_ioctl(klip);
2305 2333 return (0);
2306 2334
2307 2335 nodeerr:
2308 2336 lofi_free_dev(newdev);
2309 2337 err:
2310 2338 if (lsp != NULL) {
2311 2339 lofi_destroy(lsp, credp);
2312 2340 } else {
2313 2341 if (vp != NULL) {
2314 2342 (void) VOP_CLOSE(vp, flag, 1, 0, credp, NULL);
2315 2343 VN_RELE(vp);
2316 2344 }
2317 2345
2318 2346 if (minor != (minor_t)-1)
2319 2347 id_free(lofi_minor_id, minor);
2320 2348
2321 2349 rctl_decr_lofi(curproc->p_zone, 1);
2322 2350 }
2323 2351
2324 2352 mutex_exit(&lofi_lock);
2325 2353 free_lofi_ioctl(klip);
2326 2354 return (error);
2327 2355 }
2328 2356
2329 2357 /*
2330 2358 * unmap a file.
2331 2359 */
2332 2360 static int
2333 2361 lofi_unmap_file(struct lofi_ioctl *ulip, int byfilename,
2334 2362 struct cred *credp, int ioctl_flag)
2335 2363 {
↓ open down ↓ |
80 lines elided |
↑ open up ↑ |
2336 2364 struct lofi_state *lsp;
2337 2365 struct lofi_ioctl *klip;
2338 2366 int err;
2339 2367
2340 2368 err = copy_in_lofi_ioctl(ulip, &klip, ioctl_flag);
2341 2369 if (err != 0)
2342 2370 return (err);
2343 2371
2344 2372 mutex_enter(&lofi_lock);
2345 2373 if (byfilename) {
2346 - if ((err = file_to_lofi(klip->li_filename, &lsp)) != 0) {
2374 + if ((err = file_to_lofi(klip->li_filename, klip->li_readonly,
2375 + &lsp)) != 0) {
2347 2376 mutex_exit(&lofi_lock);
2348 2377 return (err);
2349 2378 }
2350 2379 } else if (klip->li_minor == 0) {
2351 2380 mutex_exit(&lofi_lock);
2352 2381 free_lofi_ioctl(klip);
2353 2382 return (ENXIO);
2354 2383 } else {
2355 2384 lsp = ddi_get_soft_state(lofi_statep, klip->li_minor);
2356 2385 }
2357 2386
2358 2387 if (lsp == NULL || lsp->ls_vp == NULL || lofi_access(lsp) != 0) {
2359 2388 mutex_exit(&lofi_lock);
2360 2389 free_lofi_ioctl(klip);
2361 2390 return (ENXIO);
2362 2391 }
2363 2392
2364 2393 klip->li_minor = getminor(lsp->ls_dev);
2365 2394
2366 2395 /*
2367 2396 * If it's still held open, we'll do one of three things:
2368 2397 *
2369 2398 * If no flag is set, just return EBUSY.
2370 2399 *
2371 2400 * If the 'cleanup' flag is set, unmap and remove the device when
2372 2401 * the last user finishes.
2373 2402 *
2374 2403 * If the 'force' flag is set, then we forcibly close the underlying
2375 2404 * file. Subsequent operations will fail, and the DKIOCSTATE ioctl
2376 2405 * will return DKIO_DEV_GONE. When the device is last closed, the
2377 2406 * device will be cleaned up appropriately.
2378 2407 *
2379 2408 * This is complicated by the fact that we may have outstanding
2380 2409 * dispatched I/Os. Rather than having a single mutex to serialize all
2381 2410 * I/O, we keep a count of the number of outstanding I/O requests
2382 2411 * (ls_vp_iocount), as well as a flag to indicate that no new I/Os
2383 2412 * should be dispatched (ls_vp_closereq).
2384 2413 *
2385 2414 * We set the flag, wait for the number of outstanding I/Os to reach 0,
2386 2415 * and then close the underlying vnode.
2387 2416 */
2388 2417 if (is_opened(lsp)) {
2389 2418 if (klip->li_force) {
2390 2419 mutex_enter(&lsp->ls_vp_lock);
2391 2420 lsp->ls_vp_closereq = B_TRUE;
2392 2421 /* wake up any threads waiting on dkiocstate */
2393 2422 cv_broadcast(&lsp->ls_vp_cv);
2394 2423 while (lsp->ls_vp_iocount > 0)
2395 2424 cv_wait(&lsp->ls_vp_cv, &lsp->ls_vp_lock);
2396 2425 mutex_exit(&lsp->ls_vp_lock);
2397 2426
2398 2427 goto out;
2399 2428 } else if (klip->li_cleanup) {
2400 2429 lsp->ls_cleanup = 1;
2401 2430 mutex_exit(&lofi_lock);
2402 2431 free_lofi_ioctl(klip);
2403 2432 return (0);
2404 2433 }
2405 2434
2406 2435 mutex_exit(&lofi_lock);
2407 2436 free_lofi_ioctl(klip);
2408 2437 return (EBUSY);
2409 2438 }
2410 2439
2411 2440 out:
2412 2441 lofi_free_dev(lsp->ls_dev);
2413 2442 lofi_destroy(lsp, credp);
2414 2443
2415 2444 mutex_exit(&lofi_lock);
2416 2445 (void) copy_out_lofi_ioctl(klip, ulip, ioctl_flag);
2417 2446 free_lofi_ioctl(klip);
2418 2447 return (0);
2419 2448 }
2420 2449
2421 2450 /*
2422 2451 * get the filename given the minor number, or the minor number given
2423 2452 * the name.
2424 2453 */
2425 2454 /*ARGSUSED*/
2426 2455 static int
2427 2456 lofi_get_info(dev_t dev, struct lofi_ioctl *ulip, int which,
2428 2457 struct cred *credp, int ioctl_flag)
2429 2458 {
2430 2459 struct lofi_ioctl *klip;
2431 2460 struct lofi_state *lsp;
2432 2461 int error;
2433 2462
2434 2463 error = copy_in_lofi_ioctl(ulip, &klip, ioctl_flag);
2435 2464 if (error != 0)
2436 2465 return (error);
2437 2466
2438 2467 switch (which) {
2439 2468 case LOFI_GET_FILENAME:
2440 2469 if (klip->li_minor == 0) {
2441 2470 free_lofi_ioctl(klip);
2442 2471 return (EINVAL);
2443 2472 }
2444 2473
2445 2474 mutex_enter(&lofi_lock);
2446 2475 lsp = ddi_get_soft_state(lofi_statep, klip->li_minor);
2447 2476 if (lsp == NULL || lofi_access(lsp) != 0) {
2448 2477 mutex_exit(&lofi_lock);
2449 2478 free_lofi_ioctl(klip);
2450 2479 return (ENXIO);
2451 2480 }
2452 2481
↓ open down ↓ |
96 lines elided |
↑ open up ↑ |
2453 2482 /*
2454 2483 * This may fail if, for example, we're trying to look
2455 2484 * up a zoned NFS path from the global zone.
2456 2485 */
2457 2486 if (vnodetopath(NULL, lsp->ls_stacked_vp, klip->li_filename,
2458 2487 sizeof (klip->li_filename), CRED()) != 0) {
2459 2488 (void) strlcpy(klip->li_filename, "?",
2460 2489 sizeof (klip->li_filename));
2461 2490 }
2462 2491
2492 + klip->li_readonly = lsp->ls_readonly;
2493 +
2463 2494 (void) strlcpy(klip->li_algorithm, lsp->ls_comp_algorithm,
2464 2495 sizeof (klip->li_algorithm));
2465 2496 klip->li_crypto_enabled = lsp->ls_crypto_enabled;
2466 2497 mutex_exit(&lofi_lock);
2467 2498 error = copy_out_lofi_ioctl(klip, ulip, ioctl_flag);
2468 2499 free_lofi_ioctl(klip);
2469 2500 return (error);
2470 2501 case LOFI_GET_MINOR:
2471 2502 mutex_enter(&lofi_lock);
2472 - error = file_to_lofi(klip->li_filename, &lsp);
2503 + error = file_to_lofi(klip->li_filename,
2504 + klip->li_readonly, &lsp);
2473 2505 if (error == 0)
2474 2506 klip->li_minor = getminor(lsp->ls_dev);
2475 2507 mutex_exit(&lofi_lock);
2476 2508
2477 2509 if (error == 0)
2478 2510 error = copy_out_lofi_ioctl(klip, ulip, ioctl_flag);
2479 2511
2480 2512 free_lofi_ioctl(klip);
2481 2513 return (error);
2482 2514 case LOFI_CHECK_COMPRESSED:
2483 2515 mutex_enter(&lofi_lock);
2484 - error = file_to_lofi(klip->li_filename, &lsp);
2516 + error = file_to_lofi(klip->li_filename,
2517 + klip->li_readonly, &lsp);
2485 2518 if (error != 0) {
2486 2519 mutex_exit(&lofi_lock);
2487 2520 free_lofi_ioctl(klip);
2488 2521 return (error);
2489 2522 }
2490 2523
2491 2524 klip->li_minor = getminor(lsp->ls_dev);
2492 2525 (void) strlcpy(klip->li_algorithm, lsp->ls_comp_algorithm,
2493 2526 sizeof (klip->li_algorithm));
2494 2527
2495 2528 mutex_exit(&lofi_lock);
2496 2529 error = copy_out_lofi_ioctl(klip, ulip, ioctl_flag);
2497 2530 free_lofi_ioctl(klip);
2498 2531 return (error);
2499 2532 default:
2500 2533 free_lofi_ioctl(klip);
2501 2534 return (EINVAL);
2502 2535 }
2503 2536 }
2504 2537
2505 2538 static int
2506 2539 lofi_ioctl(dev_t dev, int cmd, intptr_t arg, int flag, cred_t *credp,
2507 2540 int *rvalp)
2508 2541 {
2509 2542 int error;
2510 2543 enum dkio_state dkstate;
2511 2544 struct lofi_state *lsp;
2512 2545 minor_t minor;
2513 2546
2514 2547 minor = getminor(dev);
2515 2548 /* lofi ioctls only apply to the master device */
2516 2549 if (minor == 0) {
2517 2550 struct lofi_ioctl *lip = (struct lofi_ioctl *)arg;
2518 2551
2519 2552 /*
2520 2553 * the query command only need read-access - i.e., normal
2521 2554 * users are allowed to do those on the ctl device as
2522 2555 * long as they can open it read-only.
2523 2556 */
2524 2557 switch (cmd) {
2525 2558 case LOFI_MAP_FILE:
2526 2559 if ((flag & FWRITE) == 0)
2527 2560 return (EPERM);
2528 2561 return (lofi_map_file(dev, lip, 1, rvalp, credp, flag));
2529 2562 case LOFI_MAP_FILE_MINOR:
2530 2563 if ((flag & FWRITE) == 0)
2531 2564 return (EPERM);
2532 2565 return (lofi_map_file(dev, lip, 0, rvalp, credp, flag));
2533 2566 case LOFI_UNMAP_FILE:
2534 2567 if ((flag & FWRITE) == 0)
2535 2568 return (EPERM);
2536 2569 return (lofi_unmap_file(lip, 1, credp, flag));
2537 2570 case LOFI_UNMAP_FILE_MINOR:
2538 2571 if ((flag & FWRITE) == 0)
2539 2572 return (EPERM);
2540 2573 return (lofi_unmap_file(lip, 0, credp, flag));
2541 2574 case LOFI_GET_FILENAME:
2542 2575 return (lofi_get_info(dev, lip, LOFI_GET_FILENAME,
2543 2576 credp, flag));
2544 2577 case LOFI_GET_MINOR:
2545 2578 return (lofi_get_info(dev, lip, LOFI_GET_MINOR,
2546 2579 credp, flag));
2547 2580
2548 2581 /*
2549 2582 * This API made limited sense when this value was fixed
2550 2583 * at LOFI_MAX_FILES. However, its use to iterate
2551 2584 * across all possible devices in lofiadm means we don't
2552 2585 * want to return L_MAXMIN32, but the highest
2553 2586 * *allocated* minor.
2554 2587 */
2555 2588 case LOFI_GET_MAXMINOR:
2556 2589 minor = 0;
2557 2590
2558 2591 mutex_enter(&lofi_lock);
2559 2592
2560 2593 for (lsp = list_head(&lofi_list); lsp != NULL;
2561 2594 lsp = list_next(&lofi_list, lsp)) {
2562 2595 if (lofi_access(lsp) != 0)
2563 2596 continue;
2564 2597
2565 2598 if (getminor(lsp->ls_dev) > minor)
2566 2599 minor = getminor(lsp->ls_dev);
2567 2600 }
2568 2601
2569 2602 mutex_exit(&lofi_lock);
2570 2603
2571 2604 error = ddi_copyout(&minor, &lip->li_minor,
2572 2605 sizeof (minor), flag);
2573 2606 if (error)
2574 2607 return (EFAULT);
2575 2608 return (0);
2576 2609
2577 2610 case LOFI_CHECK_COMPRESSED:
2578 2611 return (lofi_get_info(dev, lip, LOFI_CHECK_COMPRESSED,
2579 2612 credp, flag));
2580 2613 default:
2581 2614 return (EINVAL);
2582 2615 }
2583 2616 }
2584 2617
2585 2618 mutex_enter(&lofi_lock);
2586 2619 lsp = ddi_get_soft_state(lofi_statep, minor);
2587 2620 if (lsp == NULL || lsp->ls_vp_closereq) {
2588 2621 mutex_exit(&lofi_lock);
2589 2622 return (ENXIO);
2590 2623 }
2591 2624 mutex_exit(&lofi_lock);
2592 2625
2593 2626 /*
2594 2627 * We explicitly allow DKIOCSTATE, but all other ioctls should fail with
2595 2628 * EIO as if the device was no longer present.
2596 2629 */
2597 2630 if (lsp->ls_vp == NULL && cmd != DKIOCSTATE)
2598 2631 return (EIO);
2599 2632
2600 2633 /* these are for faking out utilities like newfs */
2601 2634 switch (cmd) {
2602 2635 case DKIOCGVTOC:
2603 2636 switch (ddi_model_convert_from(flag & FMODELS)) {
2604 2637 case DDI_MODEL_ILP32: {
2605 2638 struct vtoc32 vtoc32;
2606 2639
2607 2640 vtoctovtoc32(lsp->ls_vtoc, vtoc32);
2608 2641 if (ddi_copyout(&vtoc32, (void *)arg,
2609 2642 sizeof (struct vtoc32), flag))
2610 2643 return (EFAULT);
2611 2644 break;
2612 2645 }
2613 2646
2614 2647 case DDI_MODEL_NONE:
2615 2648 if (ddi_copyout(&lsp->ls_vtoc, (void *)arg,
2616 2649 sizeof (struct vtoc), flag))
2617 2650 return (EFAULT);
2618 2651 break;
2619 2652 }
2620 2653 return (0);
2621 2654 case DKIOCINFO:
2622 2655 error = ddi_copyout(&lsp->ls_ci, (void *)arg,
2623 2656 sizeof (struct dk_cinfo), flag);
2624 2657 if (error)
2625 2658 return (EFAULT);
2626 2659 return (0);
2627 2660 case DKIOCG_VIRTGEOM:
2628 2661 case DKIOCG_PHYGEOM:
2629 2662 case DKIOCGGEOM:
2630 2663 error = ddi_copyout(&lsp->ls_dkg, (void *)arg,
2631 2664 sizeof (struct dk_geom), flag);
2632 2665 if (error)
2633 2666 return (EFAULT);
2634 2667 return (0);
2635 2668 case DKIOCSTATE:
2636 2669 /*
2637 2670 * Normally, lofi devices are always in the INSERTED state. If
2638 2671 * a device is forcefully unmapped, then the device transitions
2639 2672 * to the DKIO_DEV_GONE state.
2640 2673 */
2641 2674 if (ddi_copyin((void *)arg, &dkstate, sizeof (dkstate),
2642 2675 flag) != 0)
2643 2676 return (EFAULT);
2644 2677
2645 2678 mutex_enter(&lsp->ls_vp_lock);
2646 2679 lsp->ls_vp_iocount++;
2647 2680 while (((dkstate == DKIO_INSERTED && lsp->ls_vp != NULL) ||
2648 2681 (dkstate == DKIO_DEV_GONE && lsp->ls_vp == NULL)) &&
2649 2682 !lsp->ls_vp_closereq) {
2650 2683 /*
2651 2684 * By virtue of having the device open, we know that
2652 2685 * 'lsp' will remain valid when we return.
2653 2686 */
2654 2687 if (!cv_wait_sig(&lsp->ls_vp_cv,
2655 2688 &lsp->ls_vp_lock)) {
2656 2689 lsp->ls_vp_iocount--;
2657 2690 cv_broadcast(&lsp->ls_vp_cv);
2658 2691 mutex_exit(&lsp->ls_vp_lock);
2659 2692 return (EINTR);
2660 2693 }
2661 2694 }
2662 2695
2663 2696 dkstate = (!lsp->ls_vp_closereq && lsp->ls_vp != NULL ?
2664 2697 DKIO_INSERTED : DKIO_DEV_GONE);
2665 2698 lsp->ls_vp_iocount--;
2666 2699 cv_broadcast(&lsp->ls_vp_cv);
2667 2700 mutex_exit(&lsp->ls_vp_lock);
2668 2701
2669 2702 if (ddi_copyout(&dkstate, (void *)arg,
2670 2703 sizeof (dkstate), flag) != 0)
2671 2704 return (EFAULT);
2672 2705 return (0);
2673 2706 default:
2674 2707 return (ENOTTY);
2675 2708 }
2676 2709 }
2677 2710
2678 2711 static struct cb_ops lofi_cb_ops = {
2679 2712 lofi_open, /* open */
2680 2713 lofi_close, /* close */
2681 2714 lofi_strategy, /* strategy */
2682 2715 nodev, /* print */
2683 2716 nodev, /* dump */
2684 2717 lofi_read, /* read */
2685 2718 lofi_write, /* write */
2686 2719 lofi_ioctl, /* ioctl */
2687 2720 nodev, /* devmap */
2688 2721 nodev, /* mmap */
2689 2722 nodev, /* segmap */
2690 2723 nochpoll, /* poll */
2691 2724 ddi_prop_op, /* prop_op */
2692 2725 0, /* streamtab */
2693 2726 D_64BIT | D_NEW | D_MP, /* Driver compatibility flag */
2694 2727 CB_REV,
2695 2728 lofi_aread,
2696 2729 lofi_awrite
2697 2730 };
2698 2731
2699 2732 static struct dev_ops lofi_ops = {
2700 2733 DEVO_REV, /* devo_rev, */
2701 2734 0, /* refcnt */
2702 2735 lofi_info, /* info */
2703 2736 nulldev, /* identify */
2704 2737 nulldev, /* probe */
2705 2738 lofi_attach, /* attach */
2706 2739 lofi_detach, /* detach */
2707 2740 nodev, /* reset */
2708 2741 &lofi_cb_ops, /* driver operations */
2709 2742 NULL, /* no bus operations */
2710 2743 NULL, /* power */
2711 2744 ddi_quiesce_not_needed, /* quiesce */
2712 2745 };
2713 2746
2714 2747 static struct modldrv modldrv = {
2715 2748 &mod_driverops,
2716 2749 "loopback file driver",
2717 2750 &lofi_ops,
2718 2751 };
2719 2752
2720 2753 static struct modlinkage modlinkage = {
2721 2754 MODREV_1,
2722 2755 &modldrv,
2723 2756 NULL
2724 2757 };
2725 2758
2726 2759 int
2727 2760 _init(void)
2728 2761 {
2729 2762 int error;
2730 2763
2731 2764 list_create(&lofi_list, sizeof (struct lofi_state),
2732 2765 offsetof(struct lofi_state, ls_list));
2733 2766
2734 2767 error = ddi_soft_state_init(&lofi_statep,
2735 2768 sizeof (struct lofi_state), 0);
2736 2769 if (error)
2737 2770 return (error);
2738 2771
2739 2772 mutex_init(&lofi_lock, NULL, MUTEX_DRIVER, NULL);
2740 2773
2741 2774 error = mod_install(&modlinkage);
2742 2775 if (error) {
2743 2776 mutex_destroy(&lofi_lock);
2744 2777 ddi_soft_state_fini(&lofi_statep);
2745 2778 list_destroy(&lofi_list);
2746 2779 }
2747 2780
2748 2781 return (error);
2749 2782 }
2750 2783
2751 2784 int
2752 2785 _fini(void)
2753 2786 {
2754 2787 int error;
2755 2788
2756 2789 mutex_enter(&lofi_lock);
2757 2790
2758 2791 if (!list_is_empty(&lofi_list)) {
2759 2792 mutex_exit(&lofi_lock);
2760 2793 return (EBUSY);
2761 2794 }
2762 2795
2763 2796 mutex_exit(&lofi_lock);
2764 2797
2765 2798 error = mod_remove(&modlinkage);
2766 2799 if (error)
2767 2800 return (error);
2768 2801
2769 2802 mutex_destroy(&lofi_lock);
2770 2803 ddi_soft_state_fini(&lofi_statep);
2771 2804 list_destroy(&lofi_list);
2772 2805
2773 2806 return (error);
2774 2807 }
2775 2808
2776 2809 int
2777 2810 _info(struct modinfo *modinfop)
2778 2811 {
2779 2812 return (mod_info(&modlinkage, modinfop));
2780 2813 }
↓ open down ↓ |
286 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX