Print this page
7127 remove -Wno-missing-braces from Makefile.uts
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/uts/common/avs/ns/sv/sv.c
+++ new/usr/src/uts/common/avs/ns/sv/sv.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21 /*
22 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
23 23 * Use is subject to license terms.
24 24 *
25 25 * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
26 26 */
27 27
28 28 /*
29 29 * Storage Volume Character and Block Driver (SV)
30 30 *
31 31 * This driver implements a simplistic /dev/{r}dsk/ interface to a
32 32 * specified disk volume that is otherwise managed by the Prism
33 33 * software. The SV driver layers itself onto the underlying disk
34 34 * device driver by changing function pointers in the cb_ops
35 35 * structure.
36 36 *
37 37 * CONFIGURATION:
38 38 *
39 39 * 1. Configure the driver using the svadm utility.
40 40 * 2. Access the device as before through /dev/rdsk/c?t?d?s?
41 41 *
42 42 * LIMITATIONS:
43 43 *
44 44 * This driver should NOT be used to share a device between another
45 45 * DataServices user interface module (e.g., STE) and a user accessing
46 46 * the device through the block device in O_WRITE mode. This is because
47 47 * writes through the block device are asynchronous (due to the page
48 48 * cache) and so consistency between the block device user and the
49 49 * STE user cannot be guaranteed.
50 50 *
51 51 * Data is copied between system struct buf(9s) and nsc_vec_t. This is
52 52 * wasteful and slow.
53 53 */
54 54
55 55 #include <sys/debug.h>
56 56 #include <sys/types.h>
57 57
58 58 #include <sys/ksynch.h>
59 59 #include <sys/kmem.h>
60 60 #include <sys/errno.h>
61 61 #include <sys/varargs.h>
62 62 #include <sys/file.h>
63 63 #include <sys/open.h>
64 64 #include <sys/conf.h>
65 65 #include <sys/cred.h>
66 66 #include <sys/buf.h>
67 67 #include <sys/uio.h>
68 68 #ifndef DS_DDICT
69 69 #include <sys/pathname.h>
70 70 #endif
71 71 #include <sys/aio_req.h>
72 72 #include <sys/dkio.h>
73 73 #include <sys/vtoc.h>
74 74 #include <sys/cmn_err.h>
75 75 #include <sys/modctl.h>
76 76 #include <sys/ddi.h>
77 77 #include <sys/sysmacros.h>
78 78 #include <sys/sunddi.h>
79 79 #include <sys/sunldi.h>
80 80 #include <sys/nsctl/nsvers.h>
81 81
82 82 #include <sys/nsc_thread.h>
83 83 #include <sys/unistat/spcs_s.h>
84 84 #include <sys/unistat/spcs_s_k.h>
85 85 #include <sys/unistat/spcs_errors.h>
86 86
87 87 #ifdef DS_DDICT
88 88 #include "../contract.h"
89 89 #endif
90 90
91 91 #include "../nsctl.h"
92 92
93 93
94 94 #include <sys/sdt.h> /* dtrace is S10 or later */
95 95
96 96 #include "sv.h"
97 97 #include "sv_impl.h"
98 98 #include "sv_efi.h"
99 99
100 100 #define MAX_EINTR_COUNT 1000
101 101
102 102 /*
103 103 * sv_mod_status
104 104 */
105 105 #define SV_PREVENT_UNLOAD 1
106 106 #define SV_ALLOW_UNLOAD 2
107 107
108 108 static const int sv_major_rev = ISS_VERSION_MAJ; /* Major number */
109 109 static const int sv_minor_rev = ISS_VERSION_MIN; /* Minor number */
110 110 static const int sv_micro_rev = ISS_VERSION_MIC; /* Micro number */
111 111 static const int sv_baseline_rev = ISS_VERSION_NUM; /* Baseline number */
112 112
113 113 #ifdef DKIOCPARTITION
114 114 /*
115 115 * CRC32 polynomial table needed for computing the checksums
116 116 * in an EFI vtoc.
117 117 */
118 118 static const uint32_t sv_crc32_table[256] = { CRC32_TABLE };
119 119 #endif
120 120
121 121 static clock_t sv_config_time; /* Time of successful {en,dis}able */
122 122 static int sv_debug; /* Set non-zero for debug to syslog */
123 123 static int sv_mod_status; /* Set to prevent modunload */
124 124
125 125 static dev_info_t *sv_dip; /* Single DIP for driver */
126 126 static kmutex_t sv_mutex; /* Protect global lists, etc. */
127 127
128 128 static nsc_mem_t *sv_mem; /* nsctl memory allocator token */
129 129
130 130
131 131 /*
132 132 * Per device and per major state.
133 133 */
134 134
135 135 #ifndef _SunOS_5_6
136 136 #define UNSAFE_ENTER()
137 137 #define UNSAFE_EXIT()
138 138 #else
139 139 #define UNSAFE_ENTER() mutex_enter(&unsafe_driver)
140 140 #define UNSAFE_EXIT() mutex_exit(&unsafe_driver)
141 141 #endif
142 142
143 143 /* hash table of major dev structures */
144 144 static sv_maj_t *sv_majors[SV_MAJOR_HASH_CNT] = {0};
145 145 static sv_dev_t *sv_devs; /* array of per device structures */
146 146 static int sv_max_devices; /* SV version of nsc_max_devices() */
147 147 static int sv_ndevices; /* number of SV enabled devices */
148 148
149 149 /*
150 150 * Threading.
151 151 */
152 152
153 153 int sv_threads_max = 1024; /* maximum # to dynamically alloc */
154 154 int sv_threads = 32; /* # to pre-allocate (see sv.conf) */
155 155 int sv_threads_extra = 0; /* addl # we would have alloc'ed */
156 156
157 157 static nstset_t *sv_tset; /* the threadset pointer */
158 158
159 159 static int sv_threads_hysteresis = 4; /* hysteresis for threadset resizing */
160 160 static int sv_threads_dev = 2; /* # of threads to alloc per device */
161 161 static int sv_threads_inc = 8; /* increment for changing the set */
162 162 static int sv_threads_needed; /* number of threads needed */
163 163 static int sv_no_threads; /* number of nsc_create errors */
164 164 static int sv_max_nlive; /* max number of threads running */
165 165
166 166
167 167
168 168 /*
169 169 * nsctl fd callbacks.
170 170 */
171 171
172 172 static int svattach_fd(blind_t);
173 173 static int svdetach_fd(blind_t);
174 174
175 175 static nsc_def_t sv_fd_def[] = {
176 176 { "Attach", (uintptr_t)svattach_fd, },
177 177 { "Detach", (uintptr_t)svdetach_fd, },
178 178 { 0, 0, }
179 179 };
180 180
181 181 /*
182 182 * cb_ops functions.
183 183 */
184 184
185 185 static int svopen(dev_t *, int, int, cred_t *);
186 186 static int svclose(dev_t, int, int, cred_t *);
187 187 static int svioctl(dev_t, int, intptr_t, int, cred_t *, int *);
188 188 static int svprint(dev_t, char *);
189 189
190 190 /*
191 191 * These next functions are layered into the underlying driver's devops.
192 192 */
193 193
194 194 static int sv_lyr_open(dev_t *, int, int, cred_t *);
195 195 static int sv_lyr_close(dev_t, int, int, cred_t *);
196 196 static int sv_lyr_strategy(struct buf *);
197 197 static int sv_lyr_read(dev_t, struct uio *, cred_t *);
198 198 static int sv_lyr_write(dev_t, struct uio *, cred_t *);
199 199 static int sv_lyr_aread(dev_t, struct aio_req *, cred_t *);
200 200 static int sv_lyr_awrite(dev_t, struct aio_req *, cred_t *);
201 201 static int sv_lyr_ioctl(dev_t, int, intptr_t, int, cred_t *, int *);
202 202
203 203 static struct cb_ops sv_cb_ops = {
204 204 svopen, /* open */
205 205 svclose, /* close */
206 206 nulldev, /* strategy */
207 207 svprint,
208 208 nodev, /* dump */
209 209 nodev, /* read */
210 210 nodev, /* write */
211 211 svioctl,
212 212 nodev, /* devmap */
213 213 nodev, /* mmap */
214 214 nodev, /* segmap */
215 215 nochpoll, /* poll */
216 216 ddi_prop_op,
217 217 NULL, /* NOT a stream */
218 218 D_NEW | D_MP | D_64BIT,
219 219 CB_REV,
220 220 nodev, /* aread */
221 221 nodev, /* awrite */
222 222 };
223 223
224 224
225 225 /*
226 226 * dev_ops functions.
227 227 */
228 228
229 229 static int sv_getinfo(dev_info_t *, ddi_info_cmd_t, void *, void **);
230 230 static int sv_attach(dev_info_t *, ddi_attach_cmd_t);
231 231 static int sv_detach(dev_info_t *, ddi_detach_cmd_t);
232 232
233 233 static struct dev_ops sv_ops = {
234 234 DEVO_REV,
235 235 0,
236 236 sv_getinfo,
237 237 nulldev, /* identify */
238 238 nulldev, /* probe */
239 239 sv_attach,
240 240 sv_detach,
241 241 nodev, /* reset */
242 242 &sv_cb_ops,
243 243 (struct bus_ops *)0
244 244 };
245 245
246 246 /*
247 247 * Module linkage.
248 248 */
249 249
↓ open down ↓ |
249 lines elided |
↑ open up ↑ |
250 250 extern struct mod_ops mod_driverops;
251 251
252 252 static struct modldrv modldrv = {
253 253 &mod_driverops,
254 254 "nws:Storage Volume:" ISS_VERSION_STR,
255 255 &sv_ops
256 256 };
257 257
258 258 static struct modlinkage modlinkage = {
259 259 MODREV_1,
260 - &modldrv,
261 - 0
260 + { &modldrv, NULL }
262 261 };
263 262
264 263
265 264 int
266 265 _init(void)
267 266 {
268 267 int error;
269 268
270 269 mutex_init(&sv_mutex, NULL, MUTEX_DRIVER, NULL);
271 270
272 271 if ((error = mod_install(&modlinkage)) != 0) {
273 272 mutex_destroy(&sv_mutex);
274 273 return (error);
275 274 }
276 275
277 276 #ifdef DEBUG
278 277 cmn_err(CE_CONT, "!sv (revision %d.%d.%d.%d, %s, %s)\n",
279 278 sv_major_rev, sv_minor_rev, sv_micro_rev, sv_baseline_rev,
280 279 ISS_VERSION_STR, BUILD_DATE_STR);
281 280 #else
282 281 if (sv_micro_rev) {
283 282 cmn_err(CE_CONT, "!sv (revision %d.%d.%d, %s, %s)\n",
284 283 sv_major_rev, sv_minor_rev, sv_micro_rev,
285 284 ISS_VERSION_STR, BUILD_DATE_STR);
286 285 } else {
287 286 cmn_err(CE_CONT, "!sv (revision %d.%d, %s, %s)\n",
288 287 sv_major_rev, sv_minor_rev,
289 288 ISS_VERSION_STR, BUILD_DATE_STR);
290 289 }
291 290 #endif
292 291
293 292 return (error);
294 293 }
295 294
296 295
297 296 int
298 297 _fini(void)
299 298 {
300 299 int error;
301 300
302 301 if ((error = mod_remove(&modlinkage)) != 0)
303 302 return (error);
304 303
305 304 mutex_destroy(&sv_mutex);
306 305
307 306 return (error);
308 307 }
309 308
310 309
311 310 int
312 311 _info(struct modinfo *modinfop)
313 312 {
314 313 return (mod_info(&modlinkage, modinfop));
315 314 }
316 315
317 316
318 317 /*
319 318 * Locking & State.
320 319 *
321 320 * sv_mutex protects config information - sv_maj_t and sv_dev_t lists;
322 321 * threadset creation and sizing; sv_ndevices.
323 322 *
324 323 * If we need to hold both sv_mutex and sv_lock, then the sv_mutex
325 324 * must be acquired first.
326 325 *
327 326 * sv_lock protects the sv_dev_t structure for an individual device.
328 327 *
329 328 * sv_olock protects the otyp/open members of the sv_dev_t. If we need
330 329 * to hold both sv_lock and sv_olock, then the sv_lock must be acquired
331 330 * first.
332 331 *
333 332 * nsc_reserve/nsc_release are used in NSC_MULTI mode to allow multiple
334 333 * I/O operations to a device simultaneously, as above.
335 334 *
336 335 * All nsc_open/nsc_close/nsc_reserve/nsc_release operations that occur
337 336 * with sv_lock write-locked must be done with (sv_state == SV_PENDING)
338 337 * and (sv_pending == curthread) so that any recursion through
339 338 * sv_lyr_open/sv_lyr_close can be detected.
340 339 */
341 340
342 341
343 342 static int
344 343 sv_init_devs(void)
345 344 {
346 345 int i;
347 346
348 347 ASSERT(MUTEX_HELD(&sv_mutex));
349 348
350 349 if (sv_max_devices > 0)
351 350 return (0);
352 351
353 352 sv_max_devices = nsc_max_devices();
354 353
355 354 if (sv_max_devices <= 0) {
356 355 /* nsctl is not attached (nskernd not running) */
357 356 if (sv_debug > 0)
358 357 cmn_err(CE_CONT, "!sv: nsc_max_devices = 0\n");
359 358 return (EAGAIN);
360 359 }
361 360
362 361 sv_devs = nsc_kmem_zalloc((sv_max_devices * sizeof (*sv_devs)),
363 362 KM_NOSLEEP, sv_mem);
364 363
365 364 if (sv_devs == NULL) {
366 365 cmn_err(CE_WARN, "!sv: could not allocate sv_devs array");
367 366 return (ENOMEM);
368 367 }
369 368
370 369 for (i = 0; i < sv_max_devices; i++) {
371 370 mutex_init(&sv_devs[i].sv_olock, NULL, MUTEX_DRIVER, NULL);
372 371 rw_init(&sv_devs[i].sv_lock, NULL, RW_DRIVER, NULL);
373 372 }
374 373
375 374 if (sv_debug > 0)
376 375 cmn_err(CE_CONT, "!sv: sv_init_devs successful\n");
377 376
378 377 return (0);
379 378 }
380 379
381 380
382 381 static int
383 382 sv_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
384 383 {
385 384 int rc;
386 385
387 386 switch (cmd) {
388 387
389 388 case DDI_ATTACH:
390 389 sv_dip = dip;
391 390
392 391 if (ddi_create_minor_node(dip, "sv", S_IFCHR,
393 392 0, DDI_PSEUDO, 0) != DDI_SUCCESS)
394 393 goto failed;
395 394
396 395 mutex_enter(&sv_mutex);
397 396
398 397 sv_mem = nsc_register_mem("SV", NSC_MEM_LOCAL, 0);
399 398 if (sv_mem == NULL) {
400 399 mutex_exit(&sv_mutex);
401 400 goto failed;
402 401 }
403 402
404 403 rc = sv_init_devs();
405 404 if (rc != 0 && rc != EAGAIN) {
406 405 mutex_exit(&sv_mutex);
407 406 goto failed;
408 407 }
409 408
410 409 mutex_exit(&sv_mutex);
411 410
412 411
413 412 ddi_report_dev(dip);
414 413
415 414 sv_threads = ddi_prop_get_int(DDI_DEV_T_ANY, dip,
416 415 DDI_PROP_DONTPASS | DDI_PROP_NOTPROM,
417 416 "sv_threads", sv_threads);
418 417
419 418 if (sv_debug > 0)
420 419 cmn_err(CE_CONT, "!sv: sv_threads=%d\n", sv_threads);
421 420
422 421 if (sv_threads > sv_threads_max)
423 422 sv_threads_max = sv_threads;
424 423
425 424 return (DDI_SUCCESS);
426 425
427 426 default:
428 427 return (DDI_FAILURE);
429 428 }
430 429
431 430 failed:
432 431 DTRACE_PROBE(sv_attach_failed);
433 432 (void) sv_detach(dip, DDI_DETACH);
434 433 return (DDI_FAILURE);
435 434 }
436 435
437 436
438 437 static int
439 438 sv_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
440 439 {
441 440 sv_dev_t *svp;
442 441 int i;
443 442
444 443 switch (cmd) {
445 444
446 445 case DDI_DETACH:
447 446
448 447 /*
449 448 * Check that everything is disabled.
450 449 */
451 450
452 451 mutex_enter(&sv_mutex);
453 452
454 453 if (sv_mod_status == SV_PREVENT_UNLOAD) {
455 454 mutex_exit(&sv_mutex);
456 455 DTRACE_PROBE(sv_detach_err_prevent);
457 456 return (DDI_FAILURE);
458 457 }
459 458
460 459 for (i = 0; sv_devs && i < sv_max_devices; i++) {
461 460 svp = &sv_devs[i];
462 461
463 462 if (svp->sv_state != SV_DISABLE) {
464 463 mutex_exit(&sv_mutex);
465 464 DTRACE_PROBE(sv_detach_err_busy);
466 465 return (DDI_FAILURE);
467 466 }
468 467 }
469 468
470 469
471 470 for (i = 0; sv_devs && i < sv_max_devices; i++) {
472 471 mutex_destroy(&sv_devs[i].sv_olock);
473 472 rw_destroy(&sv_devs[i].sv_lock);
474 473 }
475 474
476 475 if (sv_devs) {
477 476 nsc_kmem_free(sv_devs,
478 477 (sv_max_devices * sizeof (*sv_devs)));
479 478 sv_devs = NULL;
480 479 }
481 480 sv_max_devices = 0;
482 481
483 482 if (sv_mem) {
484 483 nsc_unregister_mem(sv_mem);
485 484 sv_mem = NULL;
486 485 }
487 486
488 487 mutex_exit(&sv_mutex);
489 488
490 489 /*
491 490 * Remove all minor nodes.
492 491 */
493 492
494 493 ddi_remove_minor_node(dip, NULL);
495 494 sv_dip = NULL;
496 495
497 496 return (DDI_SUCCESS);
498 497
499 498 default:
500 499 return (DDI_FAILURE);
501 500 }
502 501 }
503 502
504 503 static sv_maj_t *
505 504 sv_getmajor(const dev_t dev)
506 505 {
507 506 sv_maj_t **insert, *maj;
508 507 major_t umaj = getmajor(dev);
509 508
510 509 /*
511 510 * See if the hash table entry, or one of the hash chains
512 511 * is already allocated for this major number
513 512 */
514 513 if ((maj = sv_majors[SV_MAJOR_HASH(umaj)]) != 0) {
515 514 do {
516 515 if (maj->sm_major == umaj)
517 516 return (maj);
518 517 } while ((maj = maj->sm_next) != 0);
519 518 }
520 519
521 520 /*
522 521 * If the sv_mutex is held, there is design flaw, as the only non-mutex
523 522 * held callers can be sv_enable() or sv_dev_to_sv()
524 523 * Return an error, instead of panicing the system
525 524 */
526 525 if (MUTEX_HELD(&sv_mutex)) {
527 526 cmn_err(CE_WARN, "!sv: could not allocate sv_maj_t");
528 527 return (NULL);
529 528 }
530 529
531 530 /*
532 531 * Determine where to allocate a new element in the hash table
533 532 */
534 533 mutex_enter(&sv_mutex);
535 534 insert = &(sv_majors[SV_MAJOR_HASH(umaj)]);
536 535 for (maj = *insert; maj; maj = maj->sm_next) {
537 536
538 537 /* Did another thread beat us to it? */
539 538 if (maj->sm_major == umaj)
540 539 return (maj);
541 540
542 541 /* Find a NULL insert point? */
543 542 if (maj->sm_next == NULL)
544 543 insert = &maj->sm_next;
545 544 }
546 545
547 546 /*
548 547 * Located the new insert point
549 548 */
550 549 *insert = nsc_kmem_zalloc(sizeof (*maj), KM_NOSLEEP, sv_mem);
551 550 if ((maj = *insert) != 0)
552 551 maj->sm_major = umaj;
553 552 else
554 553 cmn_err(CE_WARN, "!sv: could not allocate sv_maj_t");
555 554
556 555 mutex_exit(&sv_mutex);
557 556
558 557 return (maj);
559 558 }
560 559
561 560 /* ARGSUSED */
562 561
563 562 static int
564 563 sv_getinfo(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
565 564 {
566 565 int rc = DDI_FAILURE;
567 566
568 567 switch (infocmd) {
569 568
570 569 case DDI_INFO_DEVT2DEVINFO:
571 570 *result = sv_dip;
572 571 rc = DDI_SUCCESS;
573 572 break;
574 573
575 574 case DDI_INFO_DEVT2INSTANCE:
576 575 /*
577 576 * We only have a single instance.
578 577 */
579 578 *result = 0;
580 579 rc = DDI_SUCCESS;
581 580 break;
582 581
583 582 default:
584 583 break;
585 584 }
586 585
587 586 return (rc);
588 587 }
589 588
590 589
591 590 /*
592 591 * Hashing of devices onto major device structures.
593 592 *
594 593 * Individual device structures are hashed onto one of the sm_hash[]
595 594 * buckets in the relevant major device structure.
596 595 *
597 596 * Hash insertion and deletion -must- be done with sv_mutex held. Hash
598 597 * searching does not require the mutex because of the sm_seq member.
599 598 * sm_seq is incremented on each insertion (-after- hash chain pointer
600 599 * manipulation) and each deletion (-before- hash chain pointer
601 600 * manipulation). When searching the hash chain, the seq number is
602 601 * checked before accessing each device structure, if the seq number has
603 602 * changed, then we restart the search from the top of the hash chain.
604 603 * If we restart more than SV_HASH_RETRY times, we take sv_mutex and search
605 604 * the hash chain (we are guaranteed that this search cannot be
606 605 * interrupted).
607 606 */
608 607
609 608 #define SV_HASH_RETRY 16
610 609
611 610 static sv_dev_t *
612 611 sv_dev_to_sv(const dev_t dev, sv_maj_t **majpp)
613 612 {
614 613 minor_t umin = getminor(dev);
615 614 sv_dev_t **hb, *next, *svp;
616 615 sv_maj_t *maj;
617 616 int seq;
618 617 int try;
619 618
620 619 /* Get major hash table */
621 620 maj = sv_getmajor(dev);
622 621 if (majpp)
623 622 *majpp = maj;
624 623 if (maj == NULL)
625 624 return (NULL);
626 625
627 626 if (maj->sm_inuse == 0) {
628 627 DTRACE_PROBE1(
629 628 sv_dev_to_sv_end,
630 629 dev_t, dev);
631 630 return (NULL);
632 631 }
633 632
634 633 hb = &(maj->sm_hash[SV_MINOR_HASH(umin)]);
635 634 try = 0;
636 635
637 636 retry:
638 637 if (try > SV_HASH_RETRY)
639 638 mutex_enter(&sv_mutex);
640 639
641 640 seq = maj->sm_seq;
642 641 for (svp = *hb; svp; svp = next) {
643 642 next = svp->sv_hash;
644 643
645 644 nsc_membar_stld(); /* preserve register load order */
646 645
647 646 if (maj->sm_seq != seq) {
648 647 DTRACE_PROBE1(sv_dev_to_sv_retry, dev_t, dev);
649 648 try++;
650 649 goto retry;
651 650 }
652 651
653 652 if (svp->sv_dev == dev)
654 653 break;
655 654 }
656 655
657 656 if (try > SV_HASH_RETRY)
658 657 mutex_exit(&sv_mutex);
659 658
660 659 return (svp);
661 660 }
662 661
663 662
664 663 /*
665 664 * Must be called with sv_mutex held.
666 665 */
667 666
668 667 static int
669 668 sv_get_state(const dev_t udev, sv_dev_t **svpp)
670 669 {
671 670 sv_dev_t **hb, **insert, *svp;
672 671 sv_maj_t *maj;
673 672 minor_t umin;
674 673 int i;
675 674
676 675 /* Get major hash table */
677 676 if ((maj = sv_getmajor(udev)) == NULL)
678 677 return (NULL);
679 678
680 679 /* Determine which minor hash table */
681 680 umin = getminor(udev);
682 681 hb = &(maj->sm_hash[SV_MINOR_HASH(umin)]);
683 682
684 683 /* look for clash */
685 684
686 685 insert = hb;
687 686
688 687 for (svp = *hb; svp; svp = svp->sv_hash) {
689 688 if (svp->sv_dev == udev)
690 689 break;
691 690
692 691 if (svp->sv_hash == NULL)
693 692 insert = &svp->sv_hash;
694 693 }
695 694
696 695 if (svp) {
697 696 DTRACE_PROBE1(
698 697 sv_get_state_enabled,
699 698 dev_t, udev);
700 699 return (SV_EENABLED);
701 700 }
702 701
703 702 /* look for spare sv_devs slot */
704 703
705 704 for (i = 0; i < sv_max_devices; i++) {
706 705 svp = &sv_devs[i];
707 706
708 707 if (svp->sv_state == SV_DISABLE)
709 708 break;
710 709 }
711 710
712 711 if (i >= sv_max_devices) {
713 712 DTRACE_PROBE1(
714 713 sv_get_state_noslots,
715 714 dev_t, udev);
716 715 return (SV_ENOSLOTS);
717 716 }
718 717
719 718 svp->sv_state = SV_PENDING;
720 719 svp->sv_pending = curthread;
721 720
722 721 *insert = svp;
723 722 svp->sv_hash = NULL;
724 723 maj->sm_seq++; /* must be after the store to the hash chain */
725 724
726 725 *svpp = svp;
727 726
728 727 /*
729 728 * We do not know the size of the underlying device at
730 729 * this stage, so initialise "nblocks" property to
731 730 * zero, and update it whenever we succeed in
732 731 * nsc_reserve'ing the underlying nsc_fd_t.
733 732 */
734 733
735 734 svp->sv_nblocks = 0;
736 735
737 736 return (0);
738 737 }
739 738
740 739
741 740 /*
742 741 * Remove a device structure from it's hash chain.
743 742 * Must be called with sv_mutex held.
744 743 */
745 744
746 745 static void
747 746 sv_rm_hash(sv_dev_t *svp)
748 747 {
749 748 sv_dev_t **svpp;
750 749 sv_maj_t *maj;
751 750
752 751 /* Get major hash table */
753 752 if ((maj = sv_getmajor(svp->sv_dev)) == NULL)
754 753 return;
755 754
756 755 /* remove svp from hash chain */
757 756
758 757 svpp = &(maj->sm_hash[SV_MINOR_HASH(getminor(svp->sv_dev))]);
759 758 while (*svpp) {
760 759 if (*svpp == svp) {
761 760 /*
762 761 * increment of sm_seq must be before the
763 762 * removal from the hash chain
764 763 */
765 764 maj->sm_seq++;
766 765 *svpp = svp->sv_hash;
767 766 break;
768 767 }
769 768
770 769 svpp = &(*svpp)->sv_hash;
771 770 }
772 771
773 772 svp->sv_hash = NULL;
774 773 }
775 774
776 775 /*
777 776 * Free (disable) a device structure.
778 777 * Must be called with sv_lock(RW_WRITER) and sv_mutex held, and will
779 778 * perform the exits during its processing.
780 779 */
781 780
782 781 static int
783 782 sv_free(sv_dev_t *svp, const int error)
784 783 {
785 784 struct cb_ops *cb_ops;
786 785 sv_maj_t *maj;
787 786
788 787 /* Get major hash table */
789 788 if ((maj = sv_getmajor(svp->sv_dev)) == NULL)
790 789 return (NULL);
791 790
792 791 svp->sv_state = SV_PENDING;
793 792 svp->sv_pending = curthread;
794 793
795 794 /*
796 795 * Close the fd's before removing from the hash or swapping
797 796 * back the cb_ops pointers so that the cache flushes before new
798 797 * io can come in.
799 798 */
800 799
801 800 if (svp->sv_fd) {
802 801 (void) nsc_close(svp->sv_fd);
803 802 svp->sv_fd = 0;
804 803 }
805 804
806 805 sv_rm_hash(svp);
807 806
808 807 if (error != SV_ESDOPEN &&
809 808 error != SV_ELYROPEN && --maj->sm_inuse == 0) {
810 809
811 810 if (maj->sm_dev_ops)
812 811 cb_ops = maj->sm_dev_ops->devo_cb_ops;
813 812 else
814 813 cb_ops = NULL;
815 814
816 815 if (cb_ops && maj->sm_strategy != NULL) {
817 816 cb_ops->cb_strategy = maj->sm_strategy;
818 817 cb_ops->cb_close = maj->sm_close;
819 818 cb_ops->cb_ioctl = maj->sm_ioctl;
820 819 cb_ops->cb_write = maj->sm_write;
821 820 cb_ops->cb_open = maj->sm_open;
822 821 cb_ops->cb_read = maj->sm_read;
823 822 cb_ops->cb_flag = maj->sm_flag;
824 823
825 824 if (maj->sm_awrite)
826 825 cb_ops->cb_awrite = maj->sm_awrite;
827 826
828 827 if (maj->sm_aread)
829 828 cb_ops->cb_aread = maj->sm_aread;
830 829
831 830 /*
832 831 * corbin XXX
833 832 * Leave backing device ops in maj->sm_*
834 833 * to handle any requests that might come
835 834 * in during the disable. This could be
836 835 * a problem however if the backing device
837 836 * driver is changed while we process these
838 837 * requests.
839 838 *
840 839 * maj->sm_strategy = 0;
841 840 * maj->sm_awrite = 0;
842 841 * maj->sm_write = 0;
843 842 * maj->sm_ioctl = 0;
844 843 * maj->sm_close = 0;
845 844 * maj->sm_aread = 0;
846 845 * maj->sm_read = 0;
847 846 * maj->sm_open = 0;
848 847 * maj->sm_flag = 0;
849 848 *
850 849 */
851 850 }
852 851
853 852 if (maj->sm_dev_ops) {
854 853 maj->sm_dev_ops = 0;
855 854 }
856 855 }
857 856
858 857 if (svp->sv_lh) {
859 858 cred_t *crp = ddi_get_cred();
860 859
861 860 /*
862 861 * Close the protective layered driver open using the
863 862 * Sun Private layered driver i/f.
864 863 */
865 864
866 865 (void) ldi_close(svp->sv_lh, FREAD|FWRITE, crp);
867 866 svp->sv_lh = NULL;
868 867 }
869 868
870 869 svp->sv_timestamp = nsc_lbolt();
871 870 svp->sv_state = SV_DISABLE;
872 871 svp->sv_pending = NULL;
873 872 rw_exit(&svp->sv_lock);
874 873 mutex_exit(&sv_mutex);
875 874
876 875 return (error);
877 876 }
878 877
879 878 /*
880 879 * Reserve the device, taking into account the possibility that
881 880 * the reserve might have to be retried.
882 881 */
883 882 static int
884 883 sv_reserve(nsc_fd_t *fd, int flags)
885 884 {
886 885 int eintr_count;
887 886 int rc;
888 887
889 888 eintr_count = 0;
890 889 do {
891 890 rc = nsc_reserve(fd, flags);
892 891 if (rc == EINTR) {
893 892 ++eintr_count;
894 893 delay(2);
895 894 }
896 895 } while ((rc == EINTR) && (eintr_count < MAX_EINTR_COUNT));
897 896
898 897 return (rc);
899 898 }
900 899
901 900 static int
902 901 sv_enable(const caddr_t path, const int flag,
903 902 const dev_t udev, spcs_s_info_t kstatus)
904 903 {
905 904 struct dev_ops *dev_ops;
906 905 struct cb_ops *cb_ops;
907 906 sv_dev_t *svp;
908 907 sv_maj_t *maj;
909 908 nsc_size_t nblocks;
910 909 int rc;
911 910 cred_t *crp;
912 911 ldi_ident_t li;
913 912
914 913 if (udev == (dev_t)-1 || udev == 0) {
915 914 DTRACE_PROBE1(
916 915 sv_enable_err_baddev,
917 916 dev_t, udev);
918 917 return (SV_EBADDEV);
919 918 }
920 919
921 920 if ((flag & ~(NSC_CACHE|NSC_DEVICE)) != 0) {
922 921 DTRACE_PROBE1(sv_enable_err_amode, dev_t, udev);
923 922 return (SV_EAMODE);
924 923 }
925 924
926 925 /* Get major hash table */
927 926 if ((maj = sv_getmajor(udev)) == NULL)
928 927 return (SV_EBADDEV);
929 928
930 929 mutex_enter(&sv_mutex);
931 930
932 931 rc = sv_get_state(udev, &svp);
933 932 if (rc) {
934 933 mutex_exit(&sv_mutex);
935 934 DTRACE_PROBE1(sv_enable_err_state, dev_t, udev);
936 935 return (rc);
937 936 }
938 937
939 938 rw_enter(&svp->sv_lock, RW_WRITER);
940 939
941 940 /*
942 941 * Get real fd used for io
943 942 */
944 943
945 944 svp->sv_dev = udev;
946 945 svp->sv_flag = flag;
947 946
948 947 /*
949 948 * OR in NSC_DEVICE to ensure that nskern grabs the real strategy
950 949 * function pointer before sv swaps them out.
951 950 */
952 951
953 952 svp->sv_fd = nsc_open(path, (svp->sv_flag | NSC_DEVICE),
954 953 sv_fd_def, (blind_t)udev, &rc);
955 954
956 955 if (svp->sv_fd == NULL) {
957 956 if (kstatus)
958 957 spcs_s_add(kstatus, rc);
959 958 DTRACE_PROBE1(sv_enable_err_fd, dev_t, udev);
960 959 return (sv_free(svp, SV_ESDOPEN));
961 960 }
962 961
963 962 /*
964 963 * Perform a layered driver open using the Sun Private layered
965 964 * driver i/f to ensure that the cb_ops structure for the driver
966 965 * is not detached out from under us whilst sv is enabled.
967 966 *
968 967 */
969 968
970 969 crp = ddi_get_cred();
971 970 svp->sv_lh = NULL;
972 971
973 972 if ((rc = ldi_ident_from_dev(svp->sv_dev, &li)) == 0) {
974 973 rc = ldi_open_by_dev(&svp->sv_dev,
975 974 OTYP_BLK, FREAD|FWRITE, crp, &svp->sv_lh, li);
976 975 }
977 976
978 977 if (rc != 0) {
979 978 if (kstatus)
980 979 spcs_s_add(kstatus, rc);
981 980 DTRACE_PROBE1(sv_enable_err_lyr_open, dev_t, udev);
982 981 return (sv_free(svp, SV_ELYROPEN));
983 982 }
984 983
985 984 /*
986 985 * Do layering if required - must happen after nsc_open().
987 986 */
988 987
989 988 if (maj->sm_inuse++ == 0) {
990 989 maj->sm_dev_ops = nsc_get_devops(getmajor(udev));
991 990
992 991 if (maj->sm_dev_ops == NULL ||
993 992 maj->sm_dev_ops->devo_cb_ops == NULL) {
994 993 DTRACE_PROBE1(sv_enable_err_load, dev_t, udev);
995 994 return (sv_free(svp, SV_ELOAD));
996 995 }
997 996
998 997 dev_ops = maj->sm_dev_ops;
999 998 cb_ops = dev_ops->devo_cb_ops;
1000 999
1001 1000 if (cb_ops->cb_strategy == NULL ||
1002 1001 cb_ops->cb_strategy == nodev ||
1003 1002 cb_ops->cb_strategy == nulldev) {
1004 1003 DTRACE_PROBE1(sv_enable_err_nostrategy, dev_t, udev);
1005 1004 return (sv_free(svp, SV_ELOAD));
1006 1005 }
1007 1006
1008 1007 if (cb_ops->cb_strategy == sv_lyr_strategy) {
1009 1008 DTRACE_PROBE1(sv_enable_err_svstrategy, dev_t, udev);
1010 1009 return (sv_free(svp, SV_ESTRATEGY));
1011 1010 }
1012 1011
1013 1012 maj->sm_strategy = cb_ops->cb_strategy;
1014 1013 maj->sm_close = cb_ops->cb_close;
1015 1014 maj->sm_ioctl = cb_ops->cb_ioctl;
1016 1015 maj->sm_write = cb_ops->cb_write;
1017 1016 maj->sm_open = cb_ops->cb_open;
1018 1017 maj->sm_read = cb_ops->cb_read;
1019 1018 maj->sm_flag = cb_ops->cb_flag;
1020 1019
1021 1020 cb_ops->cb_flag = cb_ops->cb_flag | D_MP;
1022 1021 cb_ops->cb_strategy = sv_lyr_strategy;
1023 1022 cb_ops->cb_close = sv_lyr_close;
1024 1023 cb_ops->cb_ioctl = sv_lyr_ioctl;
1025 1024 cb_ops->cb_write = sv_lyr_write;
1026 1025 cb_ops->cb_open = sv_lyr_open;
1027 1026 cb_ops->cb_read = sv_lyr_read;
1028 1027
1029 1028 /*
1030 1029 * Check that the driver has async I/O entry points
1031 1030 * before changing them.
1032 1031 */
1033 1032
1034 1033 if (dev_ops->devo_rev < 3 || cb_ops->cb_rev < 1) {
1035 1034 maj->sm_awrite = 0;
1036 1035 maj->sm_aread = 0;
1037 1036 } else {
1038 1037 maj->sm_awrite = cb_ops->cb_awrite;
1039 1038 maj->sm_aread = cb_ops->cb_aread;
1040 1039
1041 1040 cb_ops->cb_awrite = sv_lyr_awrite;
1042 1041 cb_ops->cb_aread = sv_lyr_aread;
1043 1042 }
1044 1043
1045 1044 /*
1046 1045 * Bug 4645743
1047 1046 *
1048 1047 * Prevent sv from ever unloading after it has interposed
1049 1048 * on a major device because there is a race between
1050 1049 * sv removing its layered entry points from the target
1051 1050 * dev_ops, a client coming in and accessing the driver,
1052 1051 * and the kernel modunloading the sv text.
1053 1052 *
1054 1053 * To allow unload, do svboot -u, which only happens in
1055 1054 * pkgrm time.
1056 1055 */
1057 1056 ASSERT(MUTEX_HELD(&sv_mutex));
1058 1057 sv_mod_status = SV_PREVENT_UNLOAD;
1059 1058 }
1060 1059
1061 1060
1062 1061 svp->sv_timestamp = nsc_lbolt();
1063 1062 svp->sv_state = SV_ENABLE;
1064 1063 svp->sv_pending = NULL;
1065 1064 rw_exit(&svp->sv_lock);
1066 1065
1067 1066 sv_ndevices++;
1068 1067 mutex_exit(&sv_mutex);
1069 1068
1070 1069 nblocks = 0;
1071 1070 if (sv_reserve(svp->sv_fd, NSC_READ|NSC_MULTI|NSC_PCATCH) == 0) {
1072 1071 nblocks = svp->sv_nblocks;
1073 1072 nsc_release(svp->sv_fd);
1074 1073 }
1075 1074
1076 1075 cmn_err(CE_CONT, "!sv: rdev 0x%lx, nblocks %" NSC_SZFMT "\n",
1077 1076 svp->sv_dev, nblocks);
1078 1077
1079 1078 return (0);
1080 1079 }
1081 1080
1082 1081
1083 1082 static int
1084 1083 sv_prepare_unload()
1085 1084 {
1086 1085 int rc = 0;
1087 1086
1088 1087 mutex_enter(&sv_mutex);
1089 1088
1090 1089 if (sv_mod_status == SV_PREVENT_UNLOAD) {
1091 1090 if ((sv_ndevices != 0) || (sv_tset != NULL)) {
1092 1091 rc = EBUSY;
1093 1092 } else {
1094 1093 sv_mod_status = SV_ALLOW_UNLOAD;
1095 1094 delay(SV_WAIT_UNLOAD * drv_usectohz(1000000));
1096 1095 }
1097 1096 }
1098 1097
1099 1098 mutex_exit(&sv_mutex);
1100 1099 return (rc);
1101 1100 }
1102 1101
1103 1102 static int
1104 1103 svattach_fd(blind_t arg)
1105 1104 {
1106 1105 dev_t dev = (dev_t)arg;
1107 1106 sv_dev_t *svp = sv_dev_to_sv(dev, NULL);
1108 1107 int rc;
1109 1108
1110 1109 if (sv_debug > 0)
1111 1110 cmn_err(CE_CONT, "!svattach_fd(%p, %p)\n", arg, (void *)svp);
1112 1111
1113 1112 if (svp == NULL) {
1114 1113 cmn_err(CE_WARN, "!svattach_fd: no state (arg %p)", arg);
1115 1114 return (0);
1116 1115 }
1117 1116
1118 1117 if ((rc = nsc_partsize(svp->sv_fd, &svp->sv_nblocks)) != 0) {
1119 1118 cmn_err(CE_WARN,
1120 1119 "!svattach_fd: nsc_partsize() failed, rc %d", rc);
1121 1120 svp->sv_nblocks = 0;
1122 1121 }
1123 1122
1124 1123 if ((rc = nsc_maxfbas(svp->sv_fd, 0, &svp->sv_maxfbas)) != 0) {
1125 1124 cmn_err(CE_WARN,
1126 1125 "!svattach_fd: nsc_maxfbas() failed, rc %d", rc);
1127 1126 svp->sv_maxfbas = 0;
1128 1127 }
1129 1128
1130 1129 if (sv_debug > 0) {
1131 1130 cmn_err(CE_CONT,
1132 1131 "!svattach_fd(%p): size %" NSC_SZFMT ", "
1133 1132 "maxfbas %" NSC_SZFMT "\n",
1134 1133 arg, svp->sv_nblocks, svp->sv_maxfbas);
1135 1134 }
1136 1135
1137 1136 return (0);
1138 1137 }
1139 1138
1140 1139
1141 1140 static int
1142 1141 svdetach_fd(blind_t arg)
1143 1142 {
1144 1143 dev_t dev = (dev_t)arg;
1145 1144 sv_dev_t *svp = sv_dev_to_sv(dev, NULL);
1146 1145
1147 1146 if (sv_debug > 0)
1148 1147 cmn_err(CE_CONT, "!svdetach_fd(%p, %p)\n", arg, (void *)svp);
1149 1148
1150 1149 /* svp can be NULL during disable of an sv */
1151 1150 if (svp == NULL)
1152 1151 return (0);
1153 1152
1154 1153 svp->sv_maxfbas = 0;
1155 1154 svp->sv_nblocks = 0;
1156 1155 return (0);
1157 1156 }
1158 1157
1159 1158
1160 1159 /*
1161 1160 * Side effect: if called with (guard != 0), then expects both sv_mutex
1162 1161 * and sv_lock(RW_WRITER) to be held, and will release them before returning.
1163 1162 */
1164 1163
1165 1164 /* ARGSUSED */
1166 1165 static int
1167 1166 sv_disable(dev_t dev, spcs_s_info_t kstatus)
1168 1167 {
1169 1168 sv_dev_t *svp = sv_dev_to_sv(dev, NULL);
1170 1169
1171 1170 if (svp == NULL) {
1172 1171
1173 1172 DTRACE_PROBE1(sv_disable_err_nodev, sv_dev_t *, svp);
1174 1173 return (SV_ENODEV);
1175 1174 }
1176 1175
1177 1176 mutex_enter(&sv_mutex);
1178 1177 rw_enter(&svp->sv_lock, RW_WRITER);
1179 1178
1180 1179 if (svp->sv_fd == NULL || svp->sv_state != SV_ENABLE) {
1181 1180 rw_exit(&svp->sv_lock);
1182 1181 mutex_exit(&sv_mutex);
1183 1182
1184 1183 DTRACE_PROBE1(sv_disable_err_disabled, sv_dev_t *, svp);
1185 1184 return (SV_EDISABLED);
1186 1185 }
1187 1186
1188 1187
1189 1188 sv_ndevices--;
1190 1189 return (sv_free(svp, 0));
1191 1190 }
1192 1191
1193 1192
1194 1193
1195 1194 static int
1196 1195 sv_lyr_open(dev_t *devp, int flag, int otyp, cred_t *crp)
1197 1196 {
1198 1197 nsc_buf_t *tmph;
1199 1198 sv_dev_t *svp;
1200 1199 sv_maj_t *maj;
1201 1200 int (*fn)();
1202 1201 dev_t odev;
1203 1202 int ret;
1204 1203 int rc;
1205 1204
1206 1205 svp = sv_dev_to_sv(*devp, &maj);
1207 1206
1208 1207 if (svp) {
1209 1208 if (svp->sv_state == SV_PENDING &&
1210 1209 svp->sv_pending == curthread) {
1211 1210 /*
1212 1211 * This is a recursive open from a call to
1213 1212 * ddi_lyr_open_by_devt and so we just want
1214 1213 * to pass it straight through to the
1215 1214 * underlying driver.
1216 1215 */
1217 1216 DTRACE_PROBE2(sv_lyr_open_recursive,
1218 1217 sv_dev_t *, svp,
1219 1218 dev_t, *devp);
1220 1219 svp = NULL;
1221 1220 } else
1222 1221 rw_enter(&svp->sv_lock, RW_READER);
1223 1222 }
1224 1223
1225 1224 odev = *devp;
1226 1225
1227 1226 if (maj && (fn = maj->sm_open) != 0) {
1228 1227 if (!(maj->sm_flag & D_MP)) {
1229 1228 UNSAFE_ENTER();
1230 1229 ret = (*fn)(devp, flag, otyp, crp);
1231 1230 UNSAFE_EXIT();
1232 1231 } else {
1233 1232 ret = (*fn)(devp, flag, otyp, crp);
1234 1233 }
1235 1234
1236 1235 if (ret == 0) {
1237 1236 /*
1238 1237 * Re-acquire svp if the driver changed *devp.
1239 1238 */
1240 1239
1241 1240 if (*devp != odev) {
1242 1241 if (svp != NULL)
1243 1242 rw_exit(&svp->sv_lock);
1244 1243
1245 1244 svp = sv_dev_to_sv(*devp, NULL);
1246 1245
1247 1246 if (svp) {
1248 1247 rw_enter(&svp->sv_lock, RW_READER);
1249 1248 }
1250 1249 }
1251 1250 }
1252 1251 } else {
1253 1252 ret = ENODEV;
1254 1253 }
1255 1254
1256 1255 if (svp && ret != 0 && svp->sv_state == SV_ENABLE) {
1257 1256 /*
1258 1257 * Underlying DDI open failed, but we have this
1259 1258 * device SV enabled. If we can read some data
1260 1259 * from the device, fake a successful open (this
1261 1260 * probably means that this device is RDC'd and we
1262 1261 * are getting the data from the secondary node).
1263 1262 *
1264 1263 * The reserve must be done with NSC_TRY|NSC_NOWAIT to
1265 1264 * ensure that it does not deadlock if this open is
1266 1265 * coming from nskernd:get_bsize().
1267 1266 */
1268 1267 rc = sv_reserve(svp->sv_fd,
1269 1268 NSC_TRY | NSC_NOWAIT | NSC_MULTI | NSC_PCATCH);
1270 1269 if (rc == 0) {
1271 1270 tmph = NULL;
1272 1271
1273 1272 rc = nsc_alloc_buf(svp->sv_fd, 0, 1, NSC_READ, &tmph);
1274 1273 if (rc <= 0) {
1275 1274 /* success */
1276 1275 ret = 0;
1277 1276 }
1278 1277
1279 1278 if (tmph) {
1280 1279 (void) nsc_free_buf(tmph);
1281 1280 tmph = NULL;
1282 1281 }
1283 1282
1284 1283 nsc_release(svp->sv_fd);
1285 1284
1286 1285 /*
1287 1286 * Count the number of layered opens that we
1288 1287 * fake since we have to fake a matching number
1289 1288 * of closes (OTYP_LYR open/close calls must be
1290 1289 * paired).
1291 1290 */
1292 1291
1293 1292 if (ret == 0 && otyp == OTYP_LYR) {
1294 1293 mutex_enter(&svp->sv_olock);
1295 1294 svp->sv_openlcnt++;
1296 1295 mutex_exit(&svp->sv_olock);
1297 1296 }
1298 1297 }
1299 1298 }
1300 1299
1301 1300 if (svp) {
1302 1301 rw_exit(&svp->sv_lock);
1303 1302 }
1304 1303
1305 1304 return (ret);
1306 1305 }
1307 1306
1308 1307
1309 1308 static int
1310 1309 sv_lyr_close(dev_t dev, int flag, int otyp, cred_t *crp)
1311 1310 {
1312 1311 sv_dev_t *svp;
1313 1312 sv_maj_t *maj;
1314 1313 int (*fn)();
1315 1314 int ret;
1316 1315
1317 1316 svp = sv_dev_to_sv(dev, &maj);
1318 1317
1319 1318 if (svp &&
1320 1319 svp->sv_state == SV_PENDING &&
1321 1320 svp->sv_pending == curthread) {
1322 1321 /*
1323 1322 * This is a recursive open from a call to
1324 1323 * ddi_lyr_close and so we just want
1325 1324 * to pass it straight through to the
1326 1325 * underlying driver.
1327 1326 */
1328 1327 DTRACE_PROBE2(sv_lyr_close_recursive, sv_dev_t *, svp,
1329 1328 dev_t, dev);
1330 1329 svp = NULL;
1331 1330 }
1332 1331
1333 1332 if (svp) {
1334 1333 rw_enter(&svp->sv_lock, RW_READER);
1335 1334
1336 1335 if (otyp == OTYP_LYR) {
1337 1336 mutex_enter(&svp->sv_olock);
1338 1337
1339 1338 if (svp->sv_openlcnt) {
1340 1339 /*
1341 1340 * Consume sufficient layered closes to
1342 1341 * account for the opens that we faked
1343 1342 * whilst the device was failed.
1344 1343 */
1345 1344 svp->sv_openlcnt--;
1346 1345 mutex_exit(&svp->sv_olock);
1347 1346 rw_exit(&svp->sv_lock);
1348 1347
1349 1348 DTRACE_PROBE1(sv_lyr_close_end, dev_t, dev);
1350 1349
1351 1350 return (0);
1352 1351 }
1353 1352
1354 1353 mutex_exit(&svp->sv_olock);
1355 1354 }
1356 1355 }
1357 1356
1358 1357 if (maj && (fn = maj->sm_close) != 0) {
1359 1358 if (!(maj->sm_flag & D_MP)) {
1360 1359 UNSAFE_ENTER();
1361 1360 ret = (*fn)(dev, flag, otyp, crp);
1362 1361 UNSAFE_EXIT();
1363 1362 } else {
1364 1363 ret = (*fn)(dev, flag, otyp, crp);
1365 1364 }
1366 1365 } else {
1367 1366 ret = ENODEV;
1368 1367 }
1369 1368
1370 1369 if (svp) {
1371 1370 rw_exit(&svp->sv_lock);
1372 1371 }
1373 1372
1374 1373 return (ret);
1375 1374 }
1376 1375
1377 1376
1378 1377 /*
1379 1378 * Convert the specified dev_t into a locked and enabled sv_dev_t, or
1380 1379 * return NULL.
1381 1380 */
1382 1381 static sv_dev_t *
1383 1382 sv_find_enabled(const dev_t dev, sv_maj_t **majpp)
1384 1383 {
1385 1384 sv_dev_t *svp;
1386 1385
1387 1386 while ((svp = sv_dev_to_sv(dev, majpp)) != NULL) {
1388 1387 rw_enter(&svp->sv_lock, RW_READER);
1389 1388
1390 1389 if (svp->sv_state == SV_ENABLE) {
1391 1390 /* locked and enabled */
1392 1391 break;
1393 1392 }
1394 1393
1395 1394 /*
1396 1395 * State was changed while waiting on the lock.
1397 1396 * Wait for a stable state.
1398 1397 */
1399 1398 rw_exit(&svp->sv_lock);
1400 1399
1401 1400 DTRACE_PROBE1(sv_find_enabled_retry, dev_t, dev);
1402 1401
1403 1402 delay(2);
1404 1403 }
1405 1404
1406 1405 return (svp);
1407 1406 }
1408 1407
1409 1408
1410 1409 static int
1411 1410 sv_lyr_uio(dev_t dev, uio_t *uiop, cred_t *crp, int rw)
1412 1411 {
1413 1412 sv_dev_t *svp;
1414 1413 sv_maj_t *maj;
1415 1414 int (*fn)();
1416 1415 int rc;
1417 1416
1418 1417 svp = sv_find_enabled(dev, &maj);
1419 1418 if (svp == NULL) {
1420 1419 if (maj) {
1421 1420 if (rw == NSC_READ)
1422 1421 fn = maj->sm_read;
1423 1422 else
1424 1423 fn = maj->sm_write;
1425 1424
1426 1425 if (fn != 0) {
1427 1426 if (!(maj->sm_flag & D_MP)) {
1428 1427 UNSAFE_ENTER();
1429 1428 rc = (*fn)(dev, uiop, crp);
1430 1429 UNSAFE_EXIT();
1431 1430 } else {
1432 1431 rc = (*fn)(dev, uiop, crp);
1433 1432 }
1434 1433 }
1435 1434
1436 1435 return (rc);
1437 1436 } else {
1438 1437 return (ENODEV);
1439 1438 }
1440 1439 }
1441 1440
1442 1441 ASSERT(RW_READ_HELD(&svp->sv_lock));
1443 1442
1444 1443 if (svp->sv_flag == 0) {
1445 1444 /*
1446 1445 * guard access mode
1447 1446 * - prevent user level access to the device
1448 1447 */
1449 1448 DTRACE_PROBE1(sv_lyr_uio_err_guard, uio_t *, uiop);
1450 1449 rc = EPERM;
1451 1450 goto out;
1452 1451 }
1453 1452
1454 1453 if ((rc = sv_reserve(svp->sv_fd, NSC_MULTI|NSC_PCATCH)) != 0) {
1455 1454 DTRACE_PROBE1(sv_lyr_uio_err_rsrv, uio_t *, uiop);
1456 1455 goto out;
1457 1456 }
1458 1457
1459 1458 if (rw == NSC_READ)
1460 1459 rc = nsc_uread(svp->sv_fd, uiop, crp);
1461 1460 else
1462 1461 rc = nsc_uwrite(svp->sv_fd, uiop, crp);
1463 1462
1464 1463 nsc_release(svp->sv_fd);
1465 1464
1466 1465 out:
1467 1466 rw_exit(&svp->sv_lock);
1468 1467
1469 1468 return (rc);
1470 1469 }
1471 1470
1472 1471
1473 1472 static int
1474 1473 sv_lyr_read(dev_t dev, uio_t *uiop, cred_t *crp)
1475 1474 {
1476 1475 return (sv_lyr_uio(dev, uiop, crp, NSC_READ));
1477 1476 }
1478 1477
1479 1478
1480 1479 static int
1481 1480 sv_lyr_write(dev_t dev, uio_t *uiop, cred_t *crp)
1482 1481 {
1483 1482 return (sv_lyr_uio(dev, uiop, crp, NSC_WRITE));
1484 1483 }
1485 1484
1486 1485
1487 1486 /* ARGSUSED */
1488 1487
1489 1488 static int
1490 1489 sv_lyr_aread(dev_t dev, struct aio_req *aio, cred_t *crp)
1491 1490 {
1492 1491 return (aphysio(sv_lyr_strategy,
1493 1492 anocancel, dev, B_READ, minphys, aio));
1494 1493 }
1495 1494
1496 1495
1497 1496 /* ARGSUSED */
1498 1497
1499 1498 static int
1500 1499 sv_lyr_awrite(dev_t dev, struct aio_req *aio, cred_t *crp)
1501 1500 {
1502 1501 return (aphysio(sv_lyr_strategy,
1503 1502 anocancel, dev, B_WRITE, minphys, aio));
1504 1503 }
1505 1504
1506 1505
1507 1506 /*
1508 1507 * Set up an array containing the list of raw path names
1509 1508 * The array for the paths is svl and the size of the array is
1510 1509 * in size.
1511 1510 *
1512 1511 * If there are more layered devices than will fit in the array,
1513 1512 * the number of extra layered devices is returned. Otherwise
1514 1513 * zero is return.
1515 1514 *
1516 1515 * Input:
1517 1516 * svn : array for paths
1518 1517 * size : size of the array
1519 1518 *
1520 1519 * Output (extra):
1521 1520 * zero : All paths fit in array
1522 1521 * >0 : Number of defined layered devices don't fit in array
1523 1522 */
1524 1523
1525 1524 static int
1526 1525 sv_list(void *ptr, const int size, int *extra, const int ilp32)
1527 1526 {
1528 1527 sv_name32_t *svn32;
1529 1528 sv_name_t *svn;
1530 1529 sv_dev_t *svp;
1531 1530 int *mode, *nblocks;
1532 1531 int i, index;
1533 1532 char *path;
1534 1533
1535 1534 *extra = 0;
1536 1535 index = 0;
1537 1536
1538 1537 if (ilp32)
1539 1538 svn32 = ptr;
1540 1539 else
1541 1540 svn = ptr;
1542 1541
1543 1542 mutex_enter(&sv_mutex);
1544 1543 for (i = 0; i < sv_max_devices; i++) {
1545 1544 svp = &sv_devs[i];
1546 1545
1547 1546 rw_enter(&svp->sv_lock, RW_READER);
1548 1547
1549 1548 if (svp->sv_state != SV_ENABLE) {
1550 1549 rw_exit(&svp->sv_lock);
1551 1550 continue;
1552 1551 }
1553 1552
1554 1553 if ((*extra) != 0 || ptr == NULL) {
1555 1554 /* Another overflow entry */
1556 1555 rw_exit(&svp->sv_lock);
1557 1556 (*extra)++;
1558 1557 continue;
1559 1558 }
1560 1559
1561 1560 if (ilp32) {
1562 1561 nblocks = &svn32->svn_nblocks;
1563 1562 mode = &svn32->svn_mode;
1564 1563 path = svn32->svn_path;
1565 1564
1566 1565 svn32->svn_timestamp = (uint32_t)svp->sv_timestamp;
1567 1566 svn32++;
1568 1567 } else {
1569 1568 nblocks = &svn->svn_nblocks;
1570 1569 mode = &svn->svn_mode;
1571 1570 path = svn->svn_path;
1572 1571
1573 1572 svn->svn_timestamp = svp->sv_timestamp;
1574 1573 svn++;
1575 1574 }
1576 1575
1577 1576 (void) strcpy(path, nsc_pathname(svp->sv_fd));
1578 1577 *nblocks = svp->sv_nblocks;
1579 1578 *mode = svp->sv_flag;
1580 1579
1581 1580 if (*nblocks == 0) {
1582 1581 if (sv_debug > 3)
1583 1582 cmn_err(CE_CONT, "!sv_list: need to reserve\n");
1584 1583
1585 1584 if (sv_reserve(svp->sv_fd, NSC_MULTI|NSC_PCATCH) == 0) {
1586 1585 *nblocks = svp->sv_nblocks;
1587 1586 nsc_release(svp->sv_fd);
1588 1587 }
1589 1588 }
1590 1589
1591 1590 if (++index >= size) {
1592 1591 /* Out of space */
1593 1592 (*extra)++;
1594 1593 }
1595 1594
1596 1595 rw_exit(&svp->sv_lock);
1597 1596 }
1598 1597 mutex_exit(&sv_mutex);
1599 1598
1600 1599 if (index < size) {
1601 1600 /* NULL terminated list */
1602 1601 if (ilp32)
1603 1602 svn32->svn_path[0] = '\0';
1604 1603 else
1605 1604 svn->svn_path[0] = '\0';
1606 1605 }
1607 1606
1608 1607 return (0);
1609 1608 }
1610 1609
1611 1610
1612 1611 static void
1613 1612 sv_thread_tune(int threads)
1614 1613 {
1615 1614 int incr = (threads > 0) ? 1 : -1;
1616 1615 int change = 0;
1617 1616 int nthreads;
1618 1617
1619 1618 ASSERT(MUTEX_HELD(&sv_mutex));
1620 1619
1621 1620 if (sv_threads_extra) {
1622 1621 /* keep track of any additional threads requested */
1623 1622 if (threads > 0) {
1624 1623 sv_threads_extra += threads;
1625 1624 return;
1626 1625 }
1627 1626 threads = -threads;
1628 1627 if (threads >= sv_threads_extra) {
1629 1628 threads -= sv_threads_extra;
1630 1629 sv_threads_extra = 0;
1631 1630 /* fall through to while loop */
1632 1631 } else {
1633 1632 sv_threads_extra -= threads;
1634 1633 return;
1635 1634 }
1636 1635 } else if (threads > 0) {
1637 1636 /*
1638 1637 * do not increase the number of threads beyond
1639 1638 * sv_threads_max when doing dynamic thread tuning
1640 1639 */
1641 1640 nthreads = nst_nthread(sv_tset);
1642 1641 if ((nthreads + threads) > sv_threads_max) {
1643 1642 sv_threads_extra = nthreads + threads - sv_threads_max;
1644 1643 threads = sv_threads_max - nthreads;
1645 1644 if (threads <= 0)
1646 1645 return;
1647 1646 }
1648 1647 }
1649 1648
1650 1649 if (threads < 0)
1651 1650 threads = -threads;
1652 1651
1653 1652 while (threads--) {
1654 1653 nthreads = nst_nthread(sv_tset);
1655 1654 sv_threads_needed += incr;
1656 1655
1657 1656 if (sv_threads_needed >= nthreads)
1658 1657 change += nst_add_thread(sv_tset, sv_threads_inc);
1659 1658 else if ((sv_threads_needed <
1660 1659 (nthreads - (sv_threads_inc + sv_threads_hysteresis))) &&
1661 1660 ((nthreads - sv_threads_inc) >= sv_threads))
1662 1661 change -= nst_del_thread(sv_tset, sv_threads_inc);
1663 1662 }
1664 1663
1665 1664 #ifdef DEBUG
1666 1665 if (change) {
1667 1666 cmn_err(CE_NOTE,
1668 1667 "!sv_thread_tune: threads needed %d, nthreads %d, "
1669 1668 "nthreads change %d",
1670 1669 sv_threads_needed, nst_nthread(sv_tset), change);
1671 1670 }
1672 1671 #endif
1673 1672 }
1674 1673
1675 1674
1676 1675 /* ARGSUSED */
1677 1676 static int
1678 1677 svopen(dev_t *devp, int flag, int otyp, cred_t *crp)
1679 1678 {
1680 1679 int rc;
1681 1680
1682 1681 mutex_enter(&sv_mutex);
1683 1682 rc = sv_init_devs();
1684 1683 mutex_exit(&sv_mutex);
1685 1684
1686 1685 return (rc);
1687 1686 }
1688 1687
1689 1688
1690 1689 /* ARGSUSED */
1691 1690 static int
1692 1691 svclose(dev_t dev, int flag, int otyp, cred_t *crp)
1693 1692 {
1694 1693 const int secs = HZ * 5;
1695 1694 const int ticks = HZ / 10;
1696 1695 int loops = secs / ticks;
1697 1696
1698 1697 mutex_enter(&sv_mutex);
1699 1698 while (sv_ndevices <= 0 && sv_tset != NULL && loops > 0) {
1700 1699 if (nst_nlive(sv_tset) <= 0) {
1701 1700 nst_destroy(sv_tset);
1702 1701 sv_tset = NULL;
1703 1702 break;
1704 1703 }
1705 1704
1706 1705 /* threads still active - wait for them to exit */
1707 1706 mutex_exit(&sv_mutex);
1708 1707 delay(ticks);
1709 1708 loops--;
1710 1709 mutex_enter(&sv_mutex);
1711 1710 }
1712 1711 mutex_exit(&sv_mutex);
1713 1712
1714 1713 if (loops <= 0) {
1715 1714 cmn_err(CE_WARN,
1716 1715 #ifndef DEBUG
1717 1716 /* do not write to console when non-DEBUG */
1718 1717 "!"
1719 1718 #endif
1720 1719 "sv:svclose: threads still active "
1721 1720 "after %d sec - leaking thread set", secs);
1722 1721 }
1723 1722
1724 1723 return (0);
1725 1724 }
1726 1725
1727 1726
1728 1727 static int
1729 1728 svioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *crp, int *rvalp)
1730 1729 {
1731 1730 char itmp1[12], itmp2[12]; /* temp char array for editing ints */
1732 1731 spcs_s_info_t kstatus; /* Kernel version of spcs status */
1733 1732 spcs_s_info_t ustatus; /* Address of user version of spcs status */
1734 1733 sv_list32_t svl32; /* 32 bit Initial structure for SVIOC_LIST */
1735 1734 sv_version_t svv; /* Version structure */
1736 1735 sv_conf_t svc; /* User config structure */
1737 1736 sv_list_t svl; /* Initial structure for SVIOC_LIST */
1738 1737 void *usvn; /* Address of user sv_name_t */
1739 1738 void *svn = NULL; /* Array for SVIOC_LIST */
1740 1739 uint64_t phash; /* pathname hash */
1741 1740 int rc = 0; /* Return code -- errno */
1742 1741 int size; /* Number of items in array */
1743 1742 int bytes; /* Byte size of array */
1744 1743 int ilp32; /* Convert data structures for ilp32 userland */
1745 1744
1746 1745 *rvalp = 0;
1747 1746
1748 1747 /*
1749 1748 * If sv_mod_status is 0 or SV_PREVENT_UNLOAD, then it will continue.
1750 1749 * else it means it previously was SV_PREVENT_UNLOAD, and now it's
1751 1750 * SV_ALLOW_UNLOAD, expecting the driver to eventually unload.
1752 1751 *
1753 1752 * SV_ALLOW_UNLOAD is final state, so no need to grab sv_mutex.
1754 1753 */
1755 1754 if (sv_mod_status == SV_ALLOW_UNLOAD) {
1756 1755 return (EBUSY);
1757 1756 }
1758 1757
1759 1758 if ((cmd != SVIOC_LIST) && ((rc = drv_priv(crp)) != 0))
1760 1759 return (rc);
1761 1760
1762 1761 kstatus = spcs_s_kcreate();
1763 1762 if (!kstatus) {
1764 1763 DTRACE_PROBE1(sv_ioctl_err_kcreate, dev_t, dev);
1765 1764 return (ENOMEM);
1766 1765 }
1767 1766
1768 1767 ilp32 = (ddi_model_convert_from((mode & FMODELS)) == DDI_MODEL_ILP32);
1769 1768
1770 1769 switch (cmd) {
1771 1770
1772 1771 case SVIOC_ENABLE:
1773 1772
1774 1773 if (ilp32) {
1775 1774 sv_conf32_t svc32;
1776 1775
1777 1776 if (ddi_copyin((void *)arg, &svc32,
1778 1777 sizeof (svc32), mode) < 0) {
1779 1778 spcs_s_kfree(kstatus);
1780 1779 return (EFAULT);
1781 1780 }
1782 1781
1783 1782 svc.svc_error = (spcs_s_info_t)svc32.svc_error;
1784 1783 (void) strcpy(svc.svc_path, svc32.svc_path);
1785 1784 svc.svc_flag = svc32.svc_flag;
1786 1785 svc.svc_major = svc32.svc_major;
1787 1786 svc.svc_minor = svc32.svc_minor;
1788 1787 } else {
1789 1788 if (ddi_copyin((void *)arg, &svc,
1790 1789 sizeof (svc), mode) < 0) {
1791 1790 spcs_s_kfree(kstatus);
1792 1791 return (EFAULT);
1793 1792 }
1794 1793 }
1795 1794
1796 1795 /* force to raw access */
1797 1796 svc.svc_flag = NSC_DEVICE;
1798 1797
1799 1798 if (sv_tset == NULL) {
1800 1799 mutex_enter(&sv_mutex);
1801 1800
1802 1801 if (sv_tset == NULL) {
1803 1802 sv_tset = nst_init("sv_thr", sv_threads);
1804 1803 }
1805 1804
1806 1805 mutex_exit(&sv_mutex);
1807 1806
1808 1807 if (sv_tset == NULL) {
1809 1808 cmn_err(CE_WARN,
1810 1809 "!sv: could not allocate %d threads",
1811 1810 sv_threads);
1812 1811 }
1813 1812 }
1814 1813
1815 1814 rc = sv_enable(svc.svc_path, svc.svc_flag,
1816 1815 makedevice(svc.svc_major, svc.svc_minor), kstatus);
1817 1816
1818 1817 if (rc == 0) {
1819 1818 sv_config_time = nsc_lbolt();
1820 1819
1821 1820 mutex_enter(&sv_mutex);
1822 1821 sv_thread_tune(sv_threads_dev);
1823 1822 mutex_exit(&sv_mutex);
1824 1823 }
1825 1824
1826 1825 DTRACE_PROBE3(sv_ioctl_end, dev_t, dev, int, *rvalp, int, rc);
1827 1826
1828 1827 return (spcs_s_ocopyoutf(&kstatus, svc.svc_error, rc));
1829 1828 /* NOTREACHED */
1830 1829
1831 1830 case SVIOC_DISABLE:
1832 1831
1833 1832 if (ilp32) {
1834 1833 sv_conf32_t svc32;
1835 1834
1836 1835 if (ddi_copyin((void *)arg, &svc32,
1837 1836 sizeof (svc32), mode) < 0) {
1838 1837 spcs_s_kfree(kstatus);
1839 1838 return (EFAULT);
1840 1839 }
1841 1840
1842 1841 svc.svc_error = (spcs_s_info_t)svc32.svc_error;
1843 1842 svc.svc_major = svc32.svc_major;
1844 1843 svc.svc_minor = svc32.svc_minor;
1845 1844 (void) strcpy(svc.svc_path, svc32.svc_path);
1846 1845 svc.svc_flag = svc32.svc_flag;
1847 1846 } else {
1848 1847 if (ddi_copyin((void *)arg, &svc,
1849 1848 sizeof (svc), mode) < 0) {
1850 1849 spcs_s_kfree(kstatus);
1851 1850 return (EFAULT);
1852 1851 }
1853 1852 }
1854 1853
1855 1854 if (svc.svc_major == (major_t)-1 &&
1856 1855 svc.svc_minor == (minor_t)-1) {
1857 1856 sv_dev_t *svp;
1858 1857 int i;
1859 1858
1860 1859 /*
1861 1860 * User level could not find the minor device
1862 1861 * node, so do this the slow way by searching
1863 1862 * the entire sv config for a matching pathname.
1864 1863 */
1865 1864
1866 1865 phash = nsc_strhash(svc.svc_path);
1867 1866
1868 1867 mutex_enter(&sv_mutex);
1869 1868
1870 1869 for (i = 0; i < sv_max_devices; i++) {
1871 1870 svp = &sv_devs[i];
1872 1871
1873 1872 if (svp->sv_state == SV_DISABLE ||
1874 1873 svp->sv_fd == NULL)
1875 1874 continue;
1876 1875
1877 1876 if (nsc_fdpathcmp(svp->sv_fd, phash,
1878 1877 svc.svc_path) == 0) {
1879 1878 svc.svc_major = getmajor(svp->sv_dev);
1880 1879 svc.svc_minor = getminor(svp->sv_dev);
1881 1880 break;
1882 1881 }
1883 1882 }
1884 1883
1885 1884 mutex_exit(&sv_mutex);
1886 1885
1887 1886 if (svc.svc_major == (major_t)-1 &&
1888 1887 svc.svc_minor == (minor_t)-1)
1889 1888 return (spcs_s_ocopyoutf(&kstatus,
1890 1889 svc.svc_error, SV_ENODEV));
1891 1890 }
1892 1891
1893 1892 rc = sv_disable(makedevice(svc.svc_major, svc.svc_minor),
1894 1893 kstatus);
1895 1894
1896 1895 if (rc == 0) {
1897 1896 sv_config_time = nsc_lbolt();
1898 1897
1899 1898 mutex_enter(&sv_mutex);
1900 1899 sv_thread_tune(-sv_threads_dev);
1901 1900 mutex_exit(&sv_mutex);
1902 1901 }
1903 1902
1904 1903 DTRACE_PROBE3(sv_ioctl_2, dev_t, dev, int, *rvalp, int, rc);
1905 1904
1906 1905 return (spcs_s_ocopyoutf(&kstatus, svc.svc_error, rc));
1907 1906 /* NOTREACHED */
1908 1907
1909 1908 case SVIOC_LIST:
1910 1909
1911 1910 if (ilp32) {
1912 1911 if (ddi_copyin((void *)arg, &svl32,
1913 1912 sizeof (svl32), mode) < 0) {
1914 1913 spcs_s_kfree(kstatus);
1915 1914 return (EFAULT);
1916 1915 }
1917 1916
1918 1917 ustatus = (spcs_s_info_t)svl32.svl_error;
1919 1918 size = svl32.svl_count;
1920 1919 usvn = (void *)(unsigned long)svl32.svl_names;
1921 1920 } else {
1922 1921 if (ddi_copyin((void *)arg, &svl,
1923 1922 sizeof (svl), mode) < 0) {
1924 1923 spcs_s_kfree(kstatus);
1925 1924 return (EFAULT);
1926 1925 }
1927 1926
1928 1927 ustatus = svl.svl_error;
1929 1928 size = svl.svl_count;
1930 1929 usvn = svl.svl_names;
1931 1930 }
1932 1931
1933 1932 /* Do some boundary checking */
1934 1933 if ((size < 0) || (size > sv_max_devices)) {
1935 1934 /* Array size is out of range */
1936 1935 return (spcs_s_ocopyoutf(&kstatus, ustatus,
1937 1936 SV_EARRBOUNDS, "0",
1938 1937 spcs_s_inttostring(sv_max_devices, itmp1,
1939 1938 sizeof (itmp1), 0),
1940 1939 spcs_s_inttostring(size, itmp2,
1941 1940 sizeof (itmp2), 0)));
1942 1941 }
1943 1942
1944 1943 if (ilp32)
1945 1944 bytes = size * sizeof (sv_name32_t);
1946 1945 else
1947 1946 bytes = size * sizeof (sv_name_t);
1948 1947
1949 1948 /* Allocate memory for the array of structures */
1950 1949 if (bytes != 0) {
1951 1950 svn = kmem_zalloc(bytes, KM_SLEEP);
1952 1951 if (!svn) {
1953 1952 return (spcs_s_ocopyoutf(&kstatus,
1954 1953 ustatus, ENOMEM));
1955 1954 }
1956 1955 }
1957 1956
1958 1957 rc = sv_list(svn, size, rvalp, ilp32);
1959 1958 if (rc) {
1960 1959 if (svn != NULL)
1961 1960 kmem_free(svn, bytes);
1962 1961 return (spcs_s_ocopyoutf(&kstatus, ustatus, rc));
1963 1962 }
1964 1963
1965 1964 if (ilp32) {
1966 1965 svl32.svl_timestamp = (uint32_t)sv_config_time;
1967 1966 svl32.svl_maxdevs = (int32_t)sv_max_devices;
1968 1967
1969 1968 /* Return the list structure */
1970 1969 if (ddi_copyout(&svl32, (void *)arg,
1971 1970 sizeof (svl32), mode) < 0) {
1972 1971 spcs_s_kfree(kstatus);
1973 1972 if (svn != NULL)
1974 1973 kmem_free(svn, bytes);
1975 1974 return (EFAULT);
1976 1975 }
1977 1976 } else {
1978 1977 svl.svl_timestamp = sv_config_time;
1979 1978 svl.svl_maxdevs = sv_max_devices;
1980 1979
1981 1980 /* Return the list structure */
1982 1981 if (ddi_copyout(&svl, (void *)arg,
1983 1982 sizeof (svl), mode) < 0) {
1984 1983 spcs_s_kfree(kstatus);
1985 1984 if (svn != NULL)
1986 1985 kmem_free(svn, bytes);
1987 1986 return (EFAULT);
1988 1987 }
1989 1988 }
1990 1989
1991 1990 /* Return the array */
1992 1991 if (svn != NULL) {
1993 1992 if (ddi_copyout(svn, usvn, bytes, mode) < 0) {
1994 1993 kmem_free(svn, bytes);
1995 1994 spcs_s_kfree(kstatus);
1996 1995 return (EFAULT);
1997 1996 }
1998 1997 kmem_free(svn, bytes);
1999 1998 }
2000 1999
2001 2000 DTRACE_PROBE3(sv_ioctl_3, dev_t, dev, int, *rvalp, int, 0);
2002 2001
2003 2002 return (spcs_s_ocopyoutf(&kstatus, ustatus, 0));
2004 2003 /* NOTREACHED */
2005 2004
2006 2005 case SVIOC_VERSION:
2007 2006
2008 2007 if (ilp32) {
2009 2008 sv_version32_t svv32;
2010 2009
2011 2010 if (ddi_copyin((void *)arg, &svv32,
2012 2011 sizeof (svv32), mode) < 0) {
2013 2012 spcs_s_kfree(kstatus);
2014 2013 return (EFAULT);
2015 2014 }
2016 2015
2017 2016 svv32.svv_major_rev = sv_major_rev;
2018 2017 svv32.svv_minor_rev = sv_minor_rev;
2019 2018 svv32.svv_micro_rev = sv_micro_rev;
2020 2019 svv32.svv_baseline_rev = sv_baseline_rev;
2021 2020
2022 2021 if (ddi_copyout(&svv32, (void *)arg,
2023 2022 sizeof (svv32), mode) < 0) {
2024 2023 spcs_s_kfree(kstatus);
2025 2024 return (EFAULT);
2026 2025 }
2027 2026
2028 2027 ustatus = (spcs_s_info_t)svv32.svv_error;
2029 2028 } else {
2030 2029 if (ddi_copyin((void *)arg, &svv,
2031 2030 sizeof (svv), mode) < 0) {
2032 2031 spcs_s_kfree(kstatus);
2033 2032 return (EFAULT);
2034 2033 }
2035 2034
2036 2035 svv.svv_major_rev = sv_major_rev;
2037 2036 svv.svv_minor_rev = sv_minor_rev;
2038 2037 svv.svv_micro_rev = sv_micro_rev;
2039 2038 svv.svv_baseline_rev = sv_baseline_rev;
2040 2039
2041 2040 if (ddi_copyout(&svv, (void *)arg,
2042 2041 sizeof (svv), mode) < 0) {
2043 2042 spcs_s_kfree(kstatus);
2044 2043 return (EFAULT);
2045 2044 }
2046 2045
2047 2046 ustatus = svv.svv_error;
2048 2047 }
2049 2048
2050 2049 DTRACE_PROBE3(sv_ioctl_4, dev_t, dev, int, *rvalp, int, 0);
2051 2050
2052 2051 return (spcs_s_ocopyoutf(&kstatus, ustatus, 0));
2053 2052 /* NOTREACHED */
2054 2053
2055 2054 case SVIOC_UNLOAD:
2056 2055 rc = sv_prepare_unload();
2057 2056
2058 2057 if (ddi_copyout(&rc, (void *)arg, sizeof (rc), mode) < 0) {
2059 2058 rc = EFAULT;
2060 2059 }
2061 2060
2062 2061 spcs_s_kfree(kstatus);
2063 2062 return (rc);
2064 2063
2065 2064 default:
2066 2065 spcs_s_kfree(kstatus);
2067 2066
2068 2067 DTRACE_PROBE3(sv_ioctl_4, dev_t, dev, int, *rvalp, int, EINVAL);
2069 2068
2070 2069 return (EINVAL);
2071 2070 /* NOTREACHED */
2072 2071 }
2073 2072
2074 2073 /* NOTREACHED */
2075 2074 }
2076 2075
2077 2076
2078 2077 /* ARGSUSED */
2079 2078 static int
2080 2079 svprint(dev_t dev, char *str)
2081 2080 {
2082 2081 int instance = ddi_get_instance(sv_dip);
2083 2082 cmn_err(CE_WARN, "!%s%d: %s", ddi_get_name(sv_dip), instance, str);
2084 2083 return (0);
2085 2084 }
2086 2085
2087 2086
2088 2087 static void
2089 2088 _sv_lyr_strategy(struct buf *bp)
2090 2089 {
2091 2090 caddr_t buf_addr; /* pointer to linear buffer in bp */
2092 2091 nsc_buf_t *bufh = NULL;
2093 2092 nsc_buf_t *hndl = NULL;
2094 2093 sv_dev_t *svp;
2095 2094 nsc_vec_t *v;
2096 2095 sv_maj_t *maj;
2097 2096 nsc_size_t fba_req, fba_len; /* FBA lengths */
2098 2097 nsc_off_t fba_off; /* FBA offset */
2099 2098 size_t tocopy, nbytes; /* byte lengths */
2100 2099 int rw, rc; /* flags and return codes */
2101 2100 int (*fn)();
2102 2101
2103 2102 rc = 0;
2104 2103
2105 2104 if (sv_debug > 5)
2106 2105 cmn_err(CE_CONT, "!_sv_lyr_strategy(%p)\n", (void *)bp);
2107 2106
2108 2107 svp = sv_find_enabled(bp->b_edev, &maj);
2109 2108 if (svp == NULL) {
2110 2109 if (maj && (fn = maj->sm_strategy) != 0) {
2111 2110 if (!(maj->sm_flag & D_MP)) {
2112 2111 UNSAFE_ENTER();
2113 2112 rc = (*fn)(bp);
2114 2113 UNSAFE_EXIT();
2115 2114 } else {
2116 2115 rc = (*fn)(bp);
2117 2116 }
2118 2117 return;
2119 2118 } else {
2120 2119 bioerror(bp, ENODEV);
2121 2120 biodone(bp);
2122 2121 return;
2123 2122 }
2124 2123 }
2125 2124
2126 2125 ASSERT(RW_READ_HELD(&svp->sv_lock));
2127 2126
2128 2127 if (svp->sv_flag == 0) {
2129 2128 /*
2130 2129 * guard access mode
2131 2130 * - prevent user level access to the device
2132 2131 */
2133 2132 DTRACE_PROBE1(sv_lyr_strategy_err_guard, struct buf *, bp);
2134 2133 bioerror(bp, EPERM);
2135 2134 goto out;
2136 2135 }
2137 2136
2138 2137 if ((rc = sv_reserve(svp->sv_fd, NSC_MULTI|NSC_PCATCH)) != 0) {
2139 2138 DTRACE_PROBE1(sv_lyr_strategy_err_rsrv, struct buf *, bp);
2140 2139
2141 2140 if (rc == EINTR)
2142 2141 cmn_err(CE_WARN, "!nsc_reserve() returned EINTR");
2143 2142 bioerror(bp, rc);
2144 2143 goto out;
2145 2144 }
2146 2145
2147 2146 if (bp->b_lblkno >= (diskaddr_t)svp->sv_nblocks) {
2148 2147 DTRACE_PROBE1(sv_lyr_strategy_eof, struct buf *, bp);
2149 2148
2150 2149 if (bp->b_flags & B_READ) {
2151 2150 /* return EOF, not an error */
2152 2151 bp->b_resid = bp->b_bcount;
2153 2152 bioerror(bp, 0);
2154 2153 } else
2155 2154 bioerror(bp, EINVAL);
2156 2155
2157 2156 goto done;
2158 2157 }
2159 2158
2160 2159 /*
2161 2160 * Preallocate a handle once per call to strategy.
2162 2161 * If this fails, then the nsc_alloc_buf() will allocate
2163 2162 * a temporary handle per allocation/free pair.
2164 2163 */
2165 2164
2166 2165 DTRACE_PROBE1(sv_dbg_alloch_start, sv_dev_t *, svp);
2167 2166
2168 2167 bufh = nsc_alloc_handle(svp->sv_fd, NULL, NULL, NULL);
2169 2168
2170 2169 DTRACE_PROBE1(sv_dbg_alloch_end, sv_dev_t *, svp);
2171 2170
2172 2171 if (bufh && (bufh->sb_flag & NSC_HACTIVE) != 0) {
2173 2172 DTRACE_PROBE1(sv_lyr_strategy_err_hactive, struct buf *, bp);
2174 2173
2175 2174 cmn_err(CE_WARN,
2176 2175 "!sv: allocated active handle (bufh %p, flags %x)",
2177 2176 (void *)bufh, bufh->sb_flag);
2178 2177
2179 2178 bioerror(bp, ENXIO);
2180 2179 goto done;
2181 2180 }
2182 2181
2183 2182 fba_req = FBA_LEN(bp->b_bcount);
2184 2183 if (fba_req + bp->b_lblkno > (diskaddr_t)svp->sv_nblocks)
2185 2184 fba_req = (nsc_size_t)(svp->sv_nblocks - bp->b_lblkno);
2186 2185
2187 2186 rw = (bp->b_flags & B_READ) ? NSC_READ : NSC_WRITE;
2188 2187
2189 2188 bp_mapin(bp);
2190 2189
2191 2190 bp->b_resid = bp->b_bcount;
2192 2191 buf_addr = bp->b_un.b_addr;
2193 2192 fba_off = 0;
2194 2193
2195 2194 /*
2196 2195 * fba_req - requested size of transfer in FBAs after
2197 2196 * truncation to device extent, and allowing for
2198 2197 * possible non-FBA bounded final chunk.
2199 2198 * fba_off - offset of start of chunk from start of bp in FBAs.
2200 2199 * fba_len - size of this chunk in FBAs.
2201 2200 */
2202 2201
2203 2202 loop:
2204 2203 fba_len = min(fba_req, svp->sv_maxfbas);
2205 2204 hndl = bufh;
2206 2205
2207 2206 DTRACE_PROBE4(sv_dbg_allocb_start,
2208 2207 sv_dev_t *, svp,
2209 2208 uint64_t, (uint64_t)(bp->b_lblkno + fba_off),
2210 2209 uint64_t, (uint64_t)fba_len,
2211 2210 int, rw);
2212 2211
2213 2212 rc = nsc_alloc_buf(svp->sv_fd, (nsc_off_t)(bp->b_lblkno + fba_off),
2214 2213 fba_len, rw, &hndl);
2215 2214
2216 2215 DTRACE_PROBE1(sv_dbg_allocb_end, sv_dev_t *, svp);
2217 2216
2218 2217 if (rc > 0) {
2219 2218 DTRACE_PROBE1(sv_lyr_strategy_err_alloc, struct buf *, bp);
2220 2219 bioerror(bp, rc);
2221 2220 if (hndl != bufh)
2222 2221 (void) nsc_free_buf(hndl);
2223 2222 hndl = NULL;
2224 2223 goto done;
2225 2224 }
2226 2225
2227 2226 tocopy = min(FBA_SIZE(fba_len), bp->b_resid);
2228 2227 v = hndl->sb_vec;
2229 2228
2230 2229 if (rw == NSC_WRITE && FBA_OFF(tocopy) != 0) {
2231 2230 /*
2232 2231 * Not overwriting all of the last FBA, so read in the
2233 2232 * old contents now before we overwrite it with the new
2234 2233 * data.
2235 2234 */
2236 2235
2237 2236 DTRACE_PROBE2(sv_dbg_read_start, sv_dev_t *, svp,
2238 2237 uint64_t, (uint64_t)(hndl->sb_pos + hndl->sb_len - 1));
2239 2238
2240 2239 rc = nsc_read(hndl, (hndl->sb_pos + hndl->sb_len - 1), 1, 0);
2241 2240 if (rc > 0) {
2242 2241 bioerror(bp, rc);
2243 2242 goto done;
2244 2243 }
2245 2244
2246 2245 DTRACE_PROBE1(sv_dbg_read_end, sv_dev_t *, svp);
2247 2246 }
2248 2247
2249 2248 DTRACE_PROBE1(sv_dbg_bcopy_start, sv_dev_t *, svp);
2250 2249
2251 2250 while (tocopy > 0) {
2252 2251 nbytes = min(tocopy, (nsc_size_t)v->sv_len);
2253 2252
2254 2253 if (bp->b_flags & B_READ)
2255 2254 (void) bcopy(v->sv_addr, buf_addr, nbytes);
2256 2255 else
2257 2256 (void) bcopy(buf_addr, v->sv_addr, nbytes);
2258 2257
2259 2258 bp->b_resid -= nbytes;
2260 2259 buf_addr += nbytes;
2261 2260 tocopy -= nbytes;
2262 2261 v++;
2263 2262 }
2264 2263
2265 2264 DTRACE_PROBE1(sv_dbg_bcopy_end, sv_dev_t *, svp);
2266 2265
2267 2266 if ((bp->b_flags & B_READ) == 0) {
2268 2267 DTRACE_PROBE3(sv_dbg_write_start, sv_dev_t *, svp,
2269 2268 uint64_t, (uint64_t)hndl->sb_pos,
2270 2269 uint64_t, (uint64_t)hndl->sb_len);
2271 2270
2272 2271 rc = nsc_write(hndl, hndl->sb_pos, hndl->sb_len, 0);
2273 2272
2274 2273 DTRACE_PROBE1(sv_dbg_write_end, sv_dev_t *, svp);
2275 2274
2276 2275 if (rc > 0) {
2277 2276 bioerror(bp, rc);
2278 2277 goto done;
2279 2278 }
2280 2279 }
2281 2280
2282 2281 /*
2283 2282 * Adjust FBA offset and requested (ie. remaining) length,
2284 2283 * loop if more data to transfer.
2285 2284 */
2286 2285
2287 2286 fba_off += fba_len;
2288 2287 fba_req -= fba_len;
2289 2288
2290 2289 if (fba_req > 0) {
2291 2290 DTRACE_PROBE1(sv_dbg_freeb_start, sv_dev_t *, svp);
2292 2291
2293 2292 rc = nsc_free_buf(hndl);
2294 2293
2295 2294 DTRACE_PROBE1(sv_dbg_freeb_end, sv_dev_t *, svp);
2296 2295
2297 2296 if (rc > 0) {
2298 2297 DTRACE_PROBE1(sv_lyr_strategy_err_free,
2299 2298 struct buf *, bp);
2300 2299 bioerror(bp, rc);
2301 2300 }
2302 2301
2303 2302 hndl = NULL;
2304 2303
2305 2304 if (rc <= 0)
2306 2305 goto loop;
2307 2306 }
2308 2307
2309 2308 done:
2310 2309 if (hndl != NULL) {
2311 2310 DTRACE_PROBE1(sv_dbg_freeb_start, sv_dev_t *, svp);
2312 2311
2313 2312 rc = nsc_free_buf(hndl);
2314 2313
2315 2314 DTRACE_PROBE1(sv_dbg_freeb_end, sv_dev_t *, svp);
2316 2315
2317 2316 if (rc > 0) {
2318 2317 DTRACE_PROBE1(sv_lyr_strategy_err_free,
2319 2318 struct buf *, bp);
2320 2319 bioerror(bp, rc);
2321 2320 }
2322 2321
2323 2322 hndl = NULL;
2324 2323 }
2325 2324
2326 2325 if (bufh)
2327 2326 (void) nsc_free_handle(bufh);
2328 2327
2329 2328 DTRACE_PROBE1(sv_dbg_rlse_start, sv_dev_t *, svp);
2330 2329
2331 2330 nsc_release(svp->sv_fd);
2332 2331
2333 2332 DTRACE_PROBE1(sv_dbg_rlse_end, sv_dev_t *, svp);
2334 2333
2335 2334 out:
2336 2335 if (sv_debug > 5) {
2337 2336 cmn_err(CE_CONT,
2338 2337 "!_sv_lyr_strategy: bp %p, bufh %p, bp->b_error %d\n",
2339 2338 (void *)bp, (void *)bufh, bp->b_error);
2340 2339 }
2341 2340
2342 2341 DTRACE_PROBE2(sv_lyr_strategy_end, struct buf *, bp, int, bp->b_error);
2343 2342
2344 2343 rw_exit(&svp->sv_lock);
2345 2344 biodone(bp);
2346 2345 }
2347 2346
2348 2347
2349 2348 static void
2350 2349 sv_async_strategy(blind_t arg)
2351 2350 {
2352 2351 struct buf *bp = (struct buf *)arg;
2353 2352 _sv_lyr_strategy(bp);
2354 2353 }
2355 2354
2356 2355
2357 2356 static int
2358 2357 sv_lyr_strategy(struct buf *bp)
2359 2358 {
2360 2359 nsthread_t *tp;
2361 2360 int nlive;
2362 2361
2363 2362 /*
2364 2363 * If B_ASYNC was part of the DDI we could use it as a hint to
2365 2364 * not create a thread for synchronous i/o.
2366 2365 */
2367 2366 if (sv_dev_to_sv(bp->b_edev, NULL) == NULL) {
2368 2367 /* not sv enabled - just pass through */
2369 2368 DTRACE_PROBE1(sv_lyr_strategy_notsv, struct buf *, bp);
2370 2369 _sv_lyr_strategy(bp);
2371 2370 return (0);
2372 2371 }
2373 2372
2374 2373 if (sv_debug > 4) {
2375 2374 cmn_err(CE_CONT, "!sv_lyr_strategy: nthread %d nlive %d\n",
2376 2375 nst_nthread(sv_tset), nst_nlive(sv_tset));
2377 2376 }
2378 2377
2379 2378 /*
2380 2379 * If there are only guard devices enabled there
2381 2380 * won't be a threadset, so don't try and use it.
2382 2381 */
2383 2382 tp = NULL;
2384 2383 if (sv_tset != NULL) {
2385 2384 tp = nst_create(sv_tset, sv_async_strategy, (blind_t)bp, 0);
2386 2385 }
2387 2386
2388 2387 if (tp == NULL) {
2389 2388 /*
2390 2389 * out of threads, so fall back to synchronous io.
2391 2390 */
2392 2391 if (sv_debug > 0) {
2393 2392 cmn_err(CE_CONT,
2394 2393 "!sv_lyr_strategy: thread alloc failed\n");
2395 2394 }
2396 2395
2397 2396 DTRACE_PROBE1(sv_lyr_strategy_no_thread,
2398 2397 struct buf *, bp);
2399 2398
2400 2399 _sv_lyr_strategy(bp);
2401 2400 sv_no_threads++;
2402 2401 } else {
2403 2402 nlive = nst_nlive(sv_tset);
2404 2403 if (nlive > sv_max_nlive) {
2405 2404 if (sv_debug > 0) {
2406 2405 cmn_err(CE_CONT,
2407 2406 "!sv_lyr_strategy: "
2408 2407 "new max nlive %d (nthread %d)\n",
2409 2408 nlive, nst_nthread(sv_tset));
2410 2409 }
2411 2410
2412 2411 sv_max_nlive = nlive;
2413 2412 }
2414 2413 }
2415 2414
2416 2415 return (0);
2417 2416 }
2418 2417
2419 2418 /*
2420 2419 * re-write the size of the current partition
2421 2420 */
2422 2421 static int
2423 2422 sv_fix_dkiocgvtoc(const intptr_t arg, const int mode, sv_dev_t *svp)
2424 2423 {
2425 2424 size_t offset;
2426 2425 int ilp32;
2427 2426 int pnum;
2428 2427 int rc;
2429 2428
2430 2429 ilp32 = (ddi_model_convert_from((mode & FMODELS)) == DDI_MODEL_ILP32);
2431 2430
2432 2431 rc = nskern_partition(svp->sv_dev, &pnum);
2433 2432 if (rc != 0) {
2434 2433 return (rc);
2435 2434 }
2436 2435
2437 2436 if (pnum < 0 || pnum >= V_NUMPAR) {
2438 2437 cmn_err(CE_WARN,
2439 2438 "!sv_gvtoc: unable to determine partition number "
2440 2439 "for dev %lx", svp->sv_dev);
2441 2440 return (EINVAL);
2442 2441 }
2443 2442
2444 2443 if (ilp32) {
2445 2444 int32_t p_size;
2446 2445
2447 2446 #ifdef _SunOS_5_6
2448 2447 offset = offsetof(struct vtoc, v_part);
2449 2448 offset += sizeof (struct partition) * pnum;
2450 2449 offset += offsetof(struct partition, p_size);
2451 2450 #else
2452 2451 offset = offsetof(struct vtoc32, v_part);
2453 2452 offset += sizeof (struct partition32) * pnum;
2454 2453 offset += offsetof(struct partition32, p_size);
2455 2454 #endif
2456 2455
2457 2456 p_size = (int32_t)svp->sv_nblocks;
2458 2457 if (p_size == 0) {
2459 2458 if (sv_reserve(svp->sv_fd,
2460 2459 NSC_MULTI|NSC_PCATCH) == 0) {
2461 2460 p_size = (int32_t)svp->sv_nblocks;
2462 2461 nsc_release(svp->sv_fd);
2463 2462 } else {
2464 2463 rc = EINTR;
2465 2464 }
2466 2465 }
2467 2466
2468 2467 if ((rc == 0) && ddi_copyout(&p_size, (void *)(arg + offset),
2469 2468 sizeof (p_size), mode) != 0) {
2470 2469 rc = EFAULT;
2471 2470 }
2472 2471 } else {
2473 2472 long p_size;
2474 2473
2475 2474 offset = offsetof(struct vtoc, v_part);
2476 2475 offset += sizeof (struct partition) * pnum;
2477 2476 offset += offsetof(struct partition, p_size);
2478 2477
2479 2478 p_size = (long)svp->sv_nblocks;
2480 2479 if (p_size == 0) {
2481 2480 if (sv_reserve(svp->sv_fd,
2482 2481 NSC_MULTI|NSC_PCATCH) == 0) {
2483 2482 p_size = (long)svp->sv_nblocks;
2484 2483 nsc_release(svp->sv_fd);
2485 2484 } else {
2486 2485 rc = EINTR;
2487 2486 }
2488 2487 }
2489 2488
2490 2489 if ((rc == 0) && ddi_copyout(&p_size, (void *)(arg + offset),
2491 2490 sizeof (p_size), mode) != 0) {
2492 2491 rc = EFAULT;
2493 2492 }
2494 2493 }
2495 2494
2496 2495 return (rc);
2497 2496 }
2498 2497
2499 2498
2500 2499 #ifdef DKIOCPARTITION
2501 2500 /*
2502 2501 * re-write the size of the current partition
2503 2502 *
2504 2503 * arg is dk_efi_t.
2505 2504 *
2506 2505 * dk_efi_t->dki_data = (void *)(uintptr_t)efi.dki_data_64;
2507 2506 *
2508 2507 * dk_efi_t->dki_data --> efi_gpt_t (label header)
2509 2508 * dk_efi_t->dki_data + 1 --> efi_gpe_t[] (array of partitions)
2510 2509 *
2511 2510 * efi_gpt_t->efi_gpt_PartitionEntryArrayCRC32 --> CRC32 of array of parts
2512 2511 * efi_gpt_t->efi_gpt_HeaderCRC32 --> CRC32 of header itself
2513 2512 *
2514 2513 * This assumes that sizeof (efi_gpt_t) is the same as the size of a
2515 2514 * logical block on the disk.
2516 2515 *
2517 2516 * Everything is little endian (i.e. disk format).
2518 2517 */
2519 2518 static int
2520 2519 sv_fix_dkiocgetefi(const intptr_t arg, const int mode, sv_dev_t *svp)
2521 2520 {
2522 2521 dk_efi_t efi;
2523 2522 efi_gpt_t gpt;
2524 2523 efi_gpe_t *gpe = NULL;
2525 2524 size_t sgpe;
2526 2525 uint64_t p_size; /* virtual partition size from nsctl */
2527 2526 uint32_t crc;
2528 2527 int unparts; /* number of parts in user's array */
2529 2528 int pnum;
2530 2529 int rc;
2531 2530
2532 2531 rc = nskern_partition(svp->sv_dev, &pnum);
2533 2532 if (rc != 0) {
2534 2533 return (rc);
2535 2534 }
2536 2535
2537 2536 if (pnum < 0) {
2538 2537 cmn_err(CE_WARN,
2539 2538 "!sv_efi: unable to determine partition number for dev %lx",
2540 2539 svp->sv_dev);
2541 2540 return (EINVAL);
2542 2541 }
2543 2542
2544 2543 if (ddi_copyin((void *)arg, &efi, sizeof (efi), mode)) {
2545 2544 return (EFAULT);
2546 2545 }
2547 2546
2548 2547 efi.dki_data = (void *)(uintptr_t)efi.dki_data_64;
2549 2548
2550 2549 if (efi.dki_length < sizeof (gpt) + sizeof (gpe)) {
2551 2550 return (EINVAL);
2552 2551 }
2553 2552
2554 2553 if (ddi_copyin((void *)efi.dki_data, &gpt, sizeof (gpt), mode)) {
2555 2554 rc = EFAULT;
2556 2555 goto out;
2557 2556 }
2558 2557
2559 2558 if ((unparts = LE_32(gpt.efi_gpt_NumberOfPartitionEntries)) == 0)
2560 2559 unparts = 1;
2561 2560 else if (pnum >= unparts) {
2562 2561 cmn_err(CE_WARN,
2563 2562 "!sv_efi: partition# beyond end of user array (%d >= %d)",
2564 2563 pnum, unparts);
2565 2564 return (EINVAL);
2566 2565 }
2567 2566
2568 2567 sgpe = sizeof (*gpe) * unparts;
2569 2568 gpe = kmem_alloc(sgpe, KM_SLEEP);
2570 2569
2571 2570 if (ddi_copyin((void *)(efi.dki_data + 1), gpe, sgpe, mode)) {
2572 2571 rc = EFAULT;
2573 2572 goto out;
2574 2573 }
2575 2574
2576 2575 p_size = svp->sv_nblocks;
2577 2576 if (p_size == 0) {
2578 2577 if (sv_reserve(svp->sv_fd, NSC_MULTI|NSC_PCATCH) == 0) {
2579 2578 p_size = (diskaddr_t)svp->sv_nblocks;
2580 2579 nsc_release(svp->sv_fd);
2581 2580 } else {
2582 2581 rc = EINTR;
2583 2582 }
2584 2583 }
2585 2584
2586 2585 gpe[pnum].efi_gpe_EndingLBA = LE_64(
2587 2586 LE_64(gpe[pnum].efi_gpe_StartingLBA) + p_size - 1);
2588 2587
2589 2588 gpt.efi_gpt_PartitionEntryArrayCRC32 = 0;
2590 2589 CRC32(crc, gpe, sgpe, -1U, sv_crc32_table);
2591 2590 gpt.efi_gpt_PartitionEntryArrayCRC32 = LE_32(~crc);
2592 2591
2593 2592 gpt.efi_gpt_HeaderCRC32 = 0;
2594 2593 CRC32(crc, &gpt, sizeof (gpt), -1U, sv_crc32_table);
2595 2594 gpt.efi_gpt_HeaderCRC32 = LE_32(~crc);
2596 2595
2597 2596 if ((rc == 0) && ddi_copyout(&gpt, efi.dki_data, sizeof (gpt), mode)) {
2598 2597 rc = EFAULT;
2599 2598 goto out;
2600 2599 }
2601 2600
2602 2601 if ((rc == 0) && ddi_copyout(gpe, efi.dki_data + 1, sgpe, mode)) {
2603 2602 rc = EFAULT;
2604 2603 goto out;
2605 2604 }
2606 2605
2607 2606 out:
2608 2607 if (gpe) {
2609 2608 kmem_free(gpe, sgpe);
2610 2609 }
2611 2610
2612 2611 return (rc);
2613 2612 }
2614 2613
2615 2614
2616 2615 /*
2617 2616 * Re-write the size of the partition specified by p_partno
2618 2617 *
2619 2618 * Note that if a DKIOCPARTITION is issued to an fd opened against a
2620 2619 * non-sv'd device, but p_partno requests the size for a different
2621 2620 * device that is sv'd, this function will *not* be called as sv is
2622 2621 * not interposed on the original device (the fd).
2623 2622 *
2624 2623 * It would not be easy to change this as we cannot get the partition
2625 2624 * number for the non-sv'd device, so cannot compute the dev_t of the
2626 2625 * (sv'd) p_partno device, and so cannot find out if it is sv'd or get
2627 2626 * its size from nsctl.
2628 2627 *
2629 2628 * See also the "Bug 4755783" comment in sv_lyr_ioctl().
2630 2629 */
2631 2630 static int
2632 2631 sv_fix_dkiocpartition(const intptr_t arg, const int mode, sv_dev_t *svp)
2633 2632 {
2634 2633 struct partition64 p64;
2635 2634 sv_dev_t *nsvp = NULL;
2636 2635 diskaddr_t p_size;
2637 2636 minor_t nminor;
2638 2637 int pnum, rc;
2639 2638 dev_t ndev;
2640 2639
2641 2640 rc = nskern_partition(svp->sv_dev, &pnum);
2642 2641 if (rc != 0) {
2643 2642 return (rc);
2644 2643 }
2645 2644
2646 2645 if (ddi_copyin((void *)arg, &p64, sizeof (p64), mode)) {
2647 2646 return (EFAULT);
2648 2647 }
2649 2648
2650 2649 if (p64.p_partno != pnum) {
2651 2650 /* switch to requested partition, not the current one */
2652 2651 nminor = getminor(svp->sv_dev) + (p64.p_partno - pnum);
2653 2652 ndev = makedevice(getmajor(svp->sv_dev), nminor);
2654 2653 nsvp = sv_find_enabled(ndev, NULL);
2655 2654 if (nsvp == NULL) {
2656 2655 /* not sv device - just return */
2657 2656 return (0);
2658 2657 }
2659 2658
2660 2659 svp = nsvp;
2661 2660 }
2662 2661
2663 2662 p_size = svp->sv_nblocks;
2664 2663 if (p_size == 0) {
2665 2664 if (sv_reserve(svp->sv_fd, NSC_MULTI|NSC_PCATCH) == 0) {
2666 2665 p_size = (diskaddr_t)svp->sv_nblocks;
2667 2666 nsc_release(svp->sv_fd);
2668 2667 } else {
2669 2668 rc = EINTR;
2670 2669 }
2671 2670 }
2672 2671
2673 2672 if (nsvp != NULL) {
2674 2673 rw_exit(&nsvp->sv_lock);
2675 2674 }
2676 2675
2677 2676 if ((rc == 0) && ddi_copyout(&p_size,
2678 2677 (void *)(arg + offsetof(struct partition64, p_size)),
2679 2678 sizeof (p_size), mode) != 0) {
2680 2679 return (EFAULT);
2681 2680 }
2682 2681
2683 2682 return (rc);
2684 2683 }
2685 2684 #endif /* DKIOCPARTITION */
2686 2685
2687 2686
2688 2687 static int
2689 2688 sv_lyr_ioctl(const dev_t dev, const int cmd, const intptr_t arg,
2690 2689 const int mode, cred_t *crp, int *rvalp)
2691 2690 {
2692 2691 sv_dev_t *svp;
2693 2692 sv_maj_t *maj;
2694 2693 int (*fn)();
2695 2694 int rc = 0;
2696 2695
2697 2696 maj = 0;
2698 2697 fn = 0;
2699 2698
2700 2699 /*
2701 2700 * If sv_mod_status is 0 or SV_PREVENT_UNLOAD, then it will continue.
2702 2701 * else it means it previously was SV_PREVENT_UNLOAD, and now it's
2703 2702 * SV_ALLOW_UNLOAD, expecting the driver to eventually unload.
2704 2703 *
2705 2704 * SV_ALLOW_UNLOAD is final state, so no need to grab sv_mutex.
2706 2705 */
2707 2706 if (sv_mod_status == SV_ALLOW_UNLOAD) {
2708 2707 return (EBUSY);
2709 2708 }
2710 2709
2711 2710 svp = sv_find_enabled(dev, &maj);
2712 2711 if (svp != NULL) {
2713 2712 if (nskernd_isdaemon()) {
2714 2713 /*
2715 2714 * This is nskernd which always needs to see
2716 2715 * the underlying disk device accurately.
2717 2716 *
2718 2717 * So just pass the ioctl straight through
2719 2718 * to the underlying driver as though the device
2720 2719 * was not sv enabled.
2721 2720 */
2722 2721 DTRACE_PROBE2(sv_lyr_ioctl_nskernd, sv_dev_t *, svp,
2723 2722 dev_t, dev);
2724 2723
2725 2724 rw_exit(&svp->sv_lock);
2726 2725 svp = NULL;
2727 2726 } else {
2728 2727 ASSERT(RW_READ_HELD(&svp->sv_lock));
2729 2728 }
2730 2729 }
2731 2730
2732 2731 /*
2733 2732 * We now have a locked and enabled SV device, or a non-SV device.
2734 2733 */
2735 2734
2736 2735 switch (cmd) {
2737 2736 /*
2738 2737 * DKIOCGVTOC, DKIOCSVTOC, DKIOCPARTITION, DKIOCGETEFI
2739 2738 * and DKIOCSETEFI are intercepted and faked up as some
2740 2739 * i/o providers emulate volumes of a different size to
2741 2740 * the underlying volume.
2742 2741 *
2743 2742 * Setting the size by rewriting the vtoc is not permitted.
2744 2743 */
2745 2744
2746 2745 case DKIOCSVTOC:
2747 2746 #ifdef DKIOCPARTITION
2748 2747 case DKIOCSETEFI:
2749 2748 #endif
2750 2749 if (svp == NULL) {
2751 2750 /* not intercepted -- allow ioctl through */
2752 2751 break;
2753 2752 }
2754 2753
2755 2754 rw_exit(&svp->sv_lock);
2756 2755
2757 2756 DTRACE_PROBE2(sv_lyr_ioctl_svtoc, dev_t, dev, int, EPERM);
2758 2757
2759 2758 return (EPERM);
2760 2759
2761 2760 default:
2762 2761 break;
2763 2762 }
2764 2763
2765 2764 /*
2766 2765 * Pass through the real ioctl command.
2767 2766 */
2768 2767
2769 2768 if (maj && (fn = maj->sm_ioctl) != 0) {
2770 2769 if (!(maj->sm_flag & D_MP)) {
2771 2770 UNSAFE_ENTER();
2772 2771 rc = (*fn)(dev, cmd, arg, mode, crp, rvalp);
2773 2772 UNSAFE_EXIT();
2774 2773 } else {
2775 2774 rc = (*fn)(dev, cmd, arg, mode, crp, rvalp);
2776 2775 }
2777 2776 } else {
2778 2777 rc = ENODEV;
2779 2778 }
2780 2779
2781 2780 /*
2782 2781 * Bug 4755783
2783 2782 * Fix up the size of the current partition to allow
2784 2783 * for the virtual volume to be a different size to the
2785 2784 * physical volume (e.g. for II compact dependent shadows).
2786 2785 *
2787 2786 * Note that this only attempts to fix up the current partition
2788 2787 * - the one that the ioctl was issued against. There could be
2789 2788 * other sv'd partitions in the same vtoc, but we cannot tell
2790 2789 * so we don't attempt to fix them up.
2791 2790 */
2792 2791
2793 2792 if (svp != NULL && rc == 0) {
2794 2793 switch (cmd) {
2795 2794 case DKIOCGVTOC:
2796 2795 rc = sv_fix_dkiocgvtoc(arg, mode, svp);
2797 2796 break;
2798 2797
2799 2798 #ifdef DKIOCPARTITION
2800 2799 case DKIOCGETEFI:
2801 2800 rc = sv_fix_dkiocgetefi(arg, mode, svp);
2802 2801 break;
2803 2802
2804 2803 case DKIOCPARTITION:
2805 2804 rc = sv_fix_dkiocpartition(arg, mode, svp);
2806 2805 break;
2807 2806 #endif /* DKIOCPARTITION */
2808 2807 }
2809 2808 }
2810 2809
2811 2810 if (svp != NULL) {
2812 2811 rw_exit(&svp->sv_lock);
2813 2812 }
2814 2813
2815 2814 return (rc);
2816 2815 }
↓ open down ↓ |
2545 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX