Print this page
2976 remove useless offsetof() macros
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/uts/common/avs/ns/sv/sv.c
+++ new/usr/src/uts/common/avs/ns/sv/sv.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21 /*
22 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
23 23 * Use is subject to license terms.
24 24 *
25 25 * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
26 26 */
27 27
28 28 /*
29 29 * Storage Volume Character and Block Driver (SV)
30 30 *
31 31 * This driver implements a simplistic /dev/{r}dsk/ interface to a
32 32 * specified disk volume that is otherwise managed by the Prism
33 33 * software. The SV driver layers itself onto the underlying disk
34 34 * device driver by changing function pointers in the cb_ops
35 35 * structure.
36 36 *
37 37 * CONFIGURATION:
38 38 *
39 39 * 1. Configure the driver using the svadm utility.
40 40 * 2. Access the device as before through /dev/rdsk/c?t?d?s?
41 41 *
42 42 * LIMITATIONS:
43 43 *
44 44 * This driver should NOT be used to share a device between another
45 45 * DataServices user interface module (e.g., STE) and a user accessing
46 46 * the device through the block device in O_WRITE mode. This is because
47 47 * writes through the block device are asynchronous (due to the page
48 48 * cache) and so consistency between the block device user and the
49 49 * STE user cannot be guaranteed.
50 50 *
51 51 * Data is copied between system struct buf(9s) and nsc_vec_t. This is
52 52 * wasteful and slow.
53 53 */
54 54
55 55 #include <sys/debug.h>
56 56 #include <sys/types.h>
57 57
58 58 #include <sys/ksynch.h>
59 59 #include <sys/kmem.h>
60 60 #include <sys/errno.h>
61 61 #include <sys/varargs.h>
62 62 #include <sys/file.h>
63 63 #include <sys/open.h>
64 64 #include <sys/conf.h>
65 65 #include <sys/cred.h>
66 66 #include <sys/buf.h>
↓ open down ↓ |
66 lines elided |
↑ open up ↑ |
67 67 #include <sys/uio.h>
68 68 #ifndef DS_DDICT
69 69 #include <sys/pathname.h>
70 70 #endif
71 71 #include <sys/aio_req.h>
72 72 #include <sys/dkio.h>
73 73 #include <sys/vtoc.h>
74 74 #include <sys/cmn_err.h>
75 75 #include <sys/modctl.h>
76 76 #include <sys/ddi.h>
77 +#include <sys/sysmacros.h>
77 78 #include <sys/sunddi.h>
78 79 #include <sys/sunldi.h>
79 80 #include <sys/nsctl/nsvers.h>
80 81
81 82 #include <sys/nsc_thread.h>
82 83 #include <sys/unistat/spcs_s.h>
83 84 #include <sys/unistat/spcs_s_k.h>
84 85 #include <sys/unistat/spcs_errors.h>
85 86
86 87 #ifdef DS_DDICT
87 88 #include "../contract.h"
88 89 #endif
89 90
90 91 #include "../nsctl.h"
91 92
92 93
93 94 #include <sys/sdt.h> /* dtrace is S10 or later */
94 95
95 96 #include "sv.h"
96 97 #include "sv_impl.h"
97 98 #include "sv_efi.h"
98 99
99 100 #define MAX_EINTR_COUNT 1000
100 101
101 102 /*
102 103 * sv_mod_status
103 104 */
104 105 #define SV_PREVENT_UNLOAD 1
105 106 #define SV_ALLOW_UNLOAD 2
106 107
107 108 static const int sv_major_rev = ISS_VERSION_MAJ; /* Major number */
108 109 static const int sv_minor_rev = ISS_VERSION_MIN; /* Minor number */
109 110 static const int sv_micro_rev = ISS_VERSION_MIC; /* Micro number */
110 111 static const int sv_baseline_rev = ISS_VERSION_NUM; /* Baseline number */
111 112
112 113 #ifdef DKIOCPARTITION
113 114 /*
114 115 * CRC32 polynomial table needed for computing the checksums
115 116 * in an EFI vtoc.
116 117 */
117 118 static const uint32_t sv_crc32_table[256] = { CRC32_TABLE };
118 119 #endif
119 120
120 121 static clock_t sv_config_time; /* Time of successful {en,dis}able */
121 122 static int sv_debug; /* Set non-zero for debug to syslog */
122 123 static int sv_mod_status; /* Set to prevent modunload */
123 124
124 125 static dev_info_t *sv_dip; /* Single DIP for driver */
125 126 static kmutex_t sv_mutex; /* Protect global lists, etc. */
126 127
127 128 static nsc_mem_t *sv_mem; /* nsctl memory allocator token */
128 129
129 130
130 131 /*
131 132 * Per device and per major state.
132 133 */
133 134
134 135 #ifndef _SunOS_5_6
135 136 #define UNSAFE_ENTER()
136 137 #define UNSAFE_EXIT()
137 138 #else
138 139 #define UNSAFE_ENTER() mutex_enter(&unsafe_driver)
139 140 #define UNSAFE_EXIT() mutex_exit(&unsafe_driver)
140 141 #endif
141 142
142 143 /* hash table of major dev structures */
143 144 static sv_maj_t *sv_majors[SV_MAJOR_HASH_CNT] = {0};
144 145 static sv_dev_t *sv_devs; /* array of per device structures */
145 146 static int sv_max_devices; /* SV version of nsc_max_devices() */
146 147 static int sv_ndevices; /* number of SV enabled devices */
147 148
148 149 /*
149 150 * Threading.
150 151 */
151 152
152 153 int sv_threads_max = 1024; /* maximum # to dynamically alloc */
153 154 int sv_threads = 32; /* # to pre-allocate (see sv.conf) */
154 155 int sv_threads_extra = 0; /* addl # we would have alloc'ed */
155 156
156 157 static nstset_t *sv_tset; /* the threadset pointer */
157 158
158 159 static int sv_threads_hysteresis = 4; /* hysteresis for threadset resizing */
159 160 static int sv_threads_dev = 2; /* # of threads to alloc per device */
160 161 static int sv_threads_inc = 8; /* increment for changing the set */
161 162 static int sv_threads_needed; /* number of threads needed */
162 163 static int sv_no_threads; /* number of nsc_create errors */
163 164 static int sv_max_nlive; /* max number of threads running */
164 165
165 166
166 167
167 168 /*
168 169 * nsctl fd callbacks.
169 170 */
170 171
171 172 static int svattach_fd(blind_t);
172 173 static int svdetach_fd(blind_t);
173 174
174 175 static nsc_def_t sv_fd_def[] = {
175 176 { "Attach", (uintptr_t)svattach_fd, },
176 177 { "Detach", (uintptr_t)svdetach_fd, },
177 178 { 0, 0, }
178 179 };
179 180
180 181 /*
181 182 * cb_ops functions.
182 183 */
183 184
184 185 static int svopen(dev_t *, int, int, cred_t *);
185 186 static int svclose(dev_t, int, int, cred_t *);
186 187 static int svioctl(dev_t, int, intptr_t, int, cred_t *, int *);
187 188 static int svprint(dev_t, char *);
188 189
189 190 /*
190 191 * These next functions are layered into the underlying driver's devops.
191 192 */
192 193
193 194 static int sv_lyr_open(dev_t *, int, int, cred_t *);
194 195 static int sv_lyr_close(dev_t, int, int, cred_t *);
195 196 static int sv_lyr_strategy(struct buf *);
196 197 static int sv_lyr_read(dev_t, struct uio *, cred_t *);
197 198 static int sv_lyr_write(dev_t, struct uio *, cred_t *);
198 199 static int sv_lyr_aread(dev_t, struct aio_req *, cred_t *);
199 200 static int sv_lyr_awrite(dev_t, struct aio_req *, cred_t *);
200 201 static int sv_lyr_ioctl(dev_t, int, intptr_t, int, cred_t *, int *);
201 202
202 203 static struct cb_ops sv_cb_ops = {
203 204 svopen, /* open */
204 205 svclose, /* close */
205 206 nulldev, /* strategy */
206 207 svprint,
207 208 nodev, /* dump */
208 209 nodev, /* read */
209 210 nodev, /* write */
210 211 svioctl,
211 212 nodev, /* devmap */
212 213 nodev, /* mmap */
213 214 nodev, /* segmap */
214 215 nochpoll, /* poll */
215 216 ddi_prop_op,
216 217 NULL, /* NOT a stream */
217 218 D_NEW | D_MP | D_64BIT,
218 219 CB_REV,
219 220 nodev, /* aread */
220 221 nodev, /* awrite */
221 222 };
222 223
223 224
224 225 /*
225 226 * dev_ops functions.
226 227 */
227 228
228 229 static int sv_getinfo(dev_info_t *, ddi_info_cmd_t, void *, void **);
229 230 static int sv_attach(dev_info_t *, ddi_attach_cmd_t);
230 231 static int sv_detach(dev_info_t *, ddi_detach_cmd_t);
231 232
232 233 static struct dev_ops sv_ops = {
233 234 DEVO_REV,
234 235 0,
235 236 sv_getinfo,
236 237 nulldev, /* identify */
237 238 nulldev, /* probe */
238 239 sv_attach,
239 240 sv_detach,
240 241 nodev, /* reset */
241 242 &sv_cb_ops,
242 243 (struct bus_ops *)0
243 244 };
244 245
245 246 /*
246 247 * Module linkage.
247 248 */
248 249
249 250 extern struct mod_ops mod_driverops;
250 251
251 252 static struct modldrv modldrv = {
252 253 &mod_driverops,
253 254 "nws:Storage Volume:" ISS_VERSION_STR,
254 255 &sv_ops
255 256 };
256 257
257 258 static struct modlinkage modlinkage = {
258 259 MODREV_1,
259 260 &modldrv,
260 261 0
261 262 };
262 263
263 264
264 265 int
265 266 _init(void)
266 267 {
267 268 int error;
268 269
269 270 mutex_init(&sv_mutex, NULL, MUTEX_DRIVER, NULL);
270 271
271 272 if ((error = mod_install(&modlinkage)) != 0) {
272 273 mutex_destroy(&sv_mutex);
273 274 return (error);
274 275 }
275 276
276 277 #ifdef DEBUG
277 278 cmn_err(CE_CONT, "!sv (revision %d.%d.%d.%d, %s, %s)\n",
278 279 sv_major_rev, sv_minor_rev, sv_micro_rev, sv_baseline_rev,
279 280 ISS_VERSION_STR, BUILD_DATE_STR);
280 281 #else
281 282 if (sv_micro_rev) {
282 283 cmn_err(CE_CONT, "!sv (revision %d.%d.%d, %s, %s)\n",
283 284 sv_major_rev, sv_minor_rev, sv_micro_rev,
284 285 ISS_VERSION_STR, BUILD_DATE_STR);
285 286 } else {
286 287 cmn_err(CE_CONT, "!sv (revision %d.%d, %s, %s)\n",
287 288 sv_major_rev, sv_minor_rev,
288 289 ISS_VERSION_STR, BUILD_DATE_STR);
289 290 }
290 291 #endif
291 292
292 293 return (error);
293 294 }
294 295
295 296
296 297 int
297 298 _fini(void)
298 299 {
299 300 int error;
300 301
301 302 if ((error = mod_remove(&modlinkage)) != 0)
302 303 return (error);
303 304
304 305 mutex_destroy(&sv_mutex);
305 306
306 307 return (error);
307 308 }
308 309
309 310
310 311 int
311 312 _info(struct modinfo *modinfop)
312 313 {
313 314 return (mod_info(&modlinkage, modinfop));
314 315 }
315 316
316 317
317 318 /*
318 319 * Locking & State.
319 320 *
320 321 * sv_mutex protects config information - sv_maj_t and sv_dev_t lists;
321 322 * threadset creation and sizing; sv_ndevices.
322 323 *
323 324 * If we need to hold both sv_mutex and sv_lock, then the sv_mutex
324 325 * must be acquired first.
325 326 *
326 327 * sv_lock protects the sv_dev_t structure for an individual device.
327 328 *
328 329 * sv_olock protects the otyp/open members of the sv_dev_t. If we need
329 330 * to hold both sv_lock and sv_olock, then the sv_lock must be acquired
330 331 * first.
331 332 *
332 333 * nsc_reserve/nsc_release are used in NSC_MULTI mode to allow multiple
333 334 * I/O operations to a device simultaneously, as above.
334 335 *
335 336 * All nsc_open/nsc_close/nsc_reserve/nsc_release operations that occur
336 337 * with sv_lock write-locked must be done with (sv_state == SV_PENDING)
337 338 * and (sv_pending == curthread) so that any recursion through
338 339 * sv_lyr_open/sv_lyr_close can be detected.
339 340 */
340 341
341 342
342 343 static int
343 344 sv_init_devs(void)
344 345 {
345 346 int i;
346 347
347 348 ASSERT(MUTEX_HELD(&sv_mutex));
348 349
349 350 if (sv_max_devices > 0)
350 351 return (0);
351 352
352 353 sv_max_devices = nsc_max_devices();
353 354
354 355 if (sv_max_devices <= 0) {
355 356 /* nsctl is not attached (nskernd not running) */
356 357 if (sv_debug > 0)
357 358 cmn_err(CE_CONT, "!sv: nsc_max_devices = 0\n");
358 359 return (EAGAIN);
359 360 }
360 361
361 362 sv_devs = nsc_kmem_zalloc((sv_max_devices * sizeof (*sv_devs)),
362 363 KM_NOSLEEP, sv_mem);
363 364
364 365 if (sv_devs == NULL) {
365 366 cmn_err(CE_WARN, "!sv: could not allocate sv_devs array");
366 367 return (ENOMEM);
367 368 }
368 369
369 370 for (i = 0; i < sv_max_devices; i++) {
370 371 mutex_init(&sv_devs[i].sv_olock, NULL, MUTEX_DRIVER, NULL);
371 372 rw_init(&sv_devs[i].sv_lock, NULL, RW_DRIVER, NULL);
372 373 }
373 374
374 375 if (sv_debug > 0)
375 376 cmn_err(CE_CONT, "!sv: sv_init_devs successful\n");
376 377
377 378 return (0);
378 379 }
379 380
380 381
381 382 static int
382 383 sv_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
383 384 {
384 385 int rc;
385 386
386 387 switch (cmd) {
387 388
388 389 case DDI_ATTACH:
389 390 sv_dip = dip;
390 391
391 392 if (ddi_create_minor_node(dip, "sv", S_IFCHR,
392 393 0, DDI_PSEUDO, 0) != DDI_SUCCESS)
393 394 goto failed;
394 395
395 396 mutex_enter(&sv_mutex);
396 397
397 398 sv_mem = nsc_register_mem("SV", NSC_MEM_LOCAL, 0);
398 399 if (sv_mem == NULL) {
399 400 mutex_exit(&sv_mutex);
400 401 goto failed;
401 402 }
402 403
403 404 rc = sv_init_devs();
404 405 if (rc != 0 && rc != EAGAIN) {
405 406 mutex_exit(&sv_mutex);
406 407 goto failed;
407 408 }
408 409
409 410 mutex_exit(&sv_mutex);
410 411
411 412
412 413 ddi_report_dev(dip);
413 414
414 415 sv_threads = ddi_prop_get_int(DDI_DEV_T_ANY, dip,
415 416 DDI_PROP_DONTPASS | DDI_PROP_NOTPROM,
416 417 "sv_threads", sv_threads);
417 418
418 419 if (sv_debug > 0)
419 420 cmn_err(CE_CONT, "!sv: sv_threads=%d\n", sv_threads);
420 421
421 422 if (sv_threads > sv_threads_max)
422 423 sv_threads_max = sv_threads;
423 424
424 425 return (DDI_SUCCESS);
425 426
426 427 default:
427 428 return (DDI_FAILURE);
428 429 }
429 430
430 431 failed:
431 432 DTRACE_PROBE(sv_attach_failed);
432 433 (void) sv_detach(dip, DDI_DETACH);
433 434 return (DDI_FAILURE);
434 435 }
435 436
436 437
437 438 static int
438 439 sv_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
439 440 {
440 441 sv_dev_t *svp;
441 442 int i;
442 443
443 444 switch (cmd) {
444 445
445 446 case DDI_DETACH:
446 447
447 448 /*
448 449 * Check that everything is disabled.
449 450 */
450 451
451 452 mutex_enter(&sv_mutex);
452 453
453 454 if (sv_mod_status == SV_PREVENT_UNLOAD) {
454 455 mutex_exit(&sv_mutex);
455 456 DTRACE_PROBE(sv_detach_err_prevent);
456 457 return (DDI_FAILURE);
457 458 }
458 459
459 460 for (i = 0; sv_devs && i < sv_max_devices; i++) {
460 461 svp = &sv_devs[i];
461 462
462 463 if (svp->sv_state != SV_DISABLE) {
463 464 mutex_exit(&sv_mutex);
464 465 DTRACE_PROBE(sv_detach_err_busy);
465 466 return (DDI_FAILURE);
466 467 }
467 468 }
468 469
469 470
470 471 for (i = 0; sv_devs && i < sv_max_devices; i++) {
471 472 mutex_destroy(&sv_devs[i].sv_olock);
472 473 rw_destroy(&sv_devs[i].sv_lock);
473 474 }
474 475
475 476 if (sv_devs) {
476 477 nsc_kmem_free(sv_devs,
477 478 (sv_max_devices * sizeof (*sv_devs)));
478 479 sv_devs = NULL;
479 480 }
480 481 sv_max_devices = 0;
481 482
482 483 if (sv_mem) {
483 484 nsc_unregister_mem(sv_mem);
484 485 sv_mem = NULL;
485 486 }
486 487
487 488 mutex_exit(&sv_mutex);
488 489
489 490 /*
490 491 * Remove all minor nodes.
491 492 */
492 493
493 494 ddi_remove_minor_node(dip, NULL);
494 495 sv_dip = NULL;
495 496
496 497 return (DDI_SUCCESS);
497 498
498 499 default:
499 500 return (DDI_FAILURE);
500 501 }
501 502 }
502 503
503 504 static sv_maj_t *
504 505 sv_getmajor(const dev_t dev)
505 506 {
506 507 sv_maj_t **insert, *maj;
507 508 major_t umaj = getmajor(dev);
508 509
509 510 /*
510 511 * See if the hash table entry, or one of the hash chains
511 512 * is already allocated for this major number
512 513 */
513 514 if ((maj = sv_majors[SV_MAJOR_HASH(umaj)]) != 0) {
514 515 do {
515 516 if (maj->sm_major == umaj)
516 517 return (maj);
517 518 } while ((maj = maj->sm_next) != 0);
518 519 }
519 520
520 521 /*
521 522 * If the sv_mutex is held, there is design flaw, as the only non-mutex
522 523 * held callers can be sv_enable() or sv_dev_to_sv()
523 524 * Return an error, instead of panicing the system
524 525 */
525 526 if (MUTEX_HELD(&sv_mutex)) {
526 527 cmn_err(CE_WARN, "!sv: could not allocate sv_maj_t");
527 528 return (NULL);
528 529 }
529 530
530 531 /*
531 532 * Determine where to allocate a new element in the hash table
532 533 */
533 534 mutex_enter(&sv_mutex);
534 535 insert = &(sv_majors[SV_MAJOR_HASH(umaj)]);
535 536 for (maj = *insert; maj; maj = maj->sm_next) {
536 537
537 538 /* Did another thread beat us to it? */
538 539 if (maj->sm_major == umaj)
539 540 return (maj);
540 541
541 542 /* Find a NULL insert point? */
542 543 if (maj->sm_next == NULL)
543 544 insert = &maj->sm_next;
544 545 }
545 546
546 547 /*
547 548 * Located the new insert point
548 549 */
549 550 *insert = nsc_kmem_zalloc(sizeof (*maj), KM_NOSLEEP, sv_mem);
550 551 if ((maj = *insert) != 0)
551 552 maj->sm_major = umaj;
552 553 else
553 554 cmn_err(CE_WARN, "!sv: could not allocate sv_maj_t");
554 555
555 556 mutex_exit(&sv_mutex);
556 557
557 558 return (maj);
558 559 }
559 560
560 561 /* ARGSUSED */
561 562
562 563 static int
563 564 sv_getinfo(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
564 565 {
565 566 int rc = DDI_FAILURE;
566 567
567 568 switch (infocmd) {
568 569
569 570 case DDI_INFO_DEVT2DEVINFO:
570 571 *result = sv_dip;
571 572 rc = DDI_SUCCESS;
572 573 break;
573 574
574 575 case DDI_INFO_DEVT2INSTANCE:
575 576 /*
576 577 * We only have a single instance.
577 578 */
578 579 *result = 0;
579 580 rc = DDI_SUCCESS;
580 581 break;
581 582
582 583 default:
583 584 break;
584 585 }
585 586
586 587 return (rc);
587 588 }
588 589
589 590
590 591 /*
591 592 * Hashing of devices onto major device structures.
592 593 *
593 594 * Individual device structures are hashed onto one of the sm_hash[]
594 595 * buckets in the relevant major device structure.
595 596 *
596 597 * Hash insertion and deletion -must- be done with sv_mutex held. Hash
597 598 * searching does not require the mutex because of the sm_seq member.
598 599 * sm_seq is incremented on each insertion (-after- hash chain pointer
599 600 * manipulation) and each deletion (-before- hash chain pointer
600 601 * manipulation). When searching the hash chain, the seq number is
601 602 * checked before accessing each device structure, if the seq number has
602 603 * changed, then we restart the search from the top of the hash chain.
603 604 * If we restart more than SV_HASH_RETRY times, we take sv_mutex and search
604 605 * the hash chain (we are guaranteed that this search cannot be
605 606 * interrupted).
606 607 */
607 608
608 609 #define SV_HASH_RETRY 16
609 610
610 611 static sv_dev_t *
611 612 sv_dev_to_sv(const dev_t dev, sv_maj_t **majpp)
612 613 {
613 614 minor_t umin = getminor(dev);
614 615 sv_dev_t **hb, *next, *svp;
615 616 sv_maj_t *maj;
616 617 int seq;
617 618 int try;
618 619
619 620 /* Get major hash table */
620 621 maj = sv_getmajor(dev);
621 622 if (majpp)
622 623 *majpp = maj;
623 624 if (maj == NULL)
624 625 return (NULL);
625 626
626 627 if (maj->sm_inuse == 0) {
627 628 DTRACE_PROBE1(
628 629 sv_dev_to_sv_end,
629 630 dev_t, dev);
630 631 return (NULL);
631 632 }
632 633
633 634 hb = &(maj->sm_hash[SV_MINOR_HASH(umin)]);
634 635 try = 0;
635 636
636 637 retry:
637 638 if (try > SV_HASH_RETRY)
638 639 mutex_enter(&sv_mutex);
639 640
640 641 seq = maj->sm_seq;
641 642 for (svp = *hb; svp; svp = next) {
642 643 next = svp->sv_hash;
643 644
644 645 nsc_membar_stld(); /* preserve register load order */
645 646
646 647 if (maj->sm_seq != seq) {
647 648 DTRACE_PROBE1(sv_dev_to_sv_retry, dev_t, dev);
648 649 try++;
649 650 goto retry;
650 651 }
651 652
652 653 if (svp->sv_dev == dev)
653 654 break;
654 655 }
655 656
656 657 if (try > SV_HASH_RETRY)
657 658 mutex_exit(&sv_mutex);
658 659
659 660 return (svp);
660 661 }
661 662
662 663
663 664 /*
664 665 * Must be called with sv_mutex held.
665 666 */
666 667
667 668 static int
668 669 sv_get_state(const dev_t udev, sv_dev_t **svpp)
669 670 {
670 671 sv_dev_t **hb, **insert, *svp;
671 672 sv_maj_t *maj;
672 673 minor_t umin;
673 674 int i;
674 675
675 676 /* Get major hash table */
676 677 if ((maj = sv_getmajor(udev)) == NULL)
677 678 return (NULL);
678 679
679 680 /* Determine which minor hash table */
680 681 umin = getminor(udev);
681 682 hb = &(maj->sm_hash[SV_MINOR_HASH(umin)]);
682 683
683 684 /* look for clash */
684 685
685 686 insert = hb;
686 687
687 688 for (svp = *hb; svp; svp = svp->sv_hash) {
688 689 if (svp->sv_dev == udev)
689 690 break;
690 691
691 692 if (svp->sv_hash == NULL)
692 693 insert = &svp->sv_hash;
693 694 }
694 695
695 696 if (svp) {
696 697 DTRACE_PROBE1(
697 698 sv_get_state_enabled,
698 699 dev_t, udev);
699 700 return (SV_EENABLED);
700 701 }
701 702
702 703 /* look for spare sv_devs slot */
703 704
704 705 for (i = 0; i < sv_max_devices; i++) {
705 706 svp = &sv_devs[i];
706 707
707 708 if (svp->sv_state == SV_DISABLE)
708 709 break;
709 710 }
710 711
711 712 if (i >= sv_max_devices) {
712 713 DTRACE_PROBE1(
713 714 sv_get_state_noslots,
714 715 dev_t, udev);
715 716 return (SV_ENOSLOTS);
716 717 }
717 718
718 719 svp->sv_state = SV_PENDING;
719 720 svp->sv_pending = curthread;
720 721
721 722 *insert = svp;
722 723 svp->sv_hash = NULL;
723 724 maj->sm_seq++; /* must be after the store to the hash chain */
724 725
725 726 *svpp = svp;
726 727
727 728 /*
728 729 * We do not know the size of the underlying device at
729 730 * this stage, so initialise "nblocks" property to
730 731 * zero, and update it whenever we succeed in
731 732 * nsc_reserve'ing the underlying nsc_fd_t.
732 733 */
733 734
734 735 svp->sv_nblocks = 0;
735 736
736 737 return (0);
737 738 }
738 739
739 740
740 741 /*
741 742 * Remove a device structure from it's hash chain.
742 743 * Must be called with sv_mutex held.
743 744 */
744 745
745 746 static void
746 747 sv_rm_hash(sv_dev_t *svp)
747 748 {
748 749 sv_dev_t **svpp;
749 750 sv_maj_t *maj;
750 751
751 752 /* Get major hash table */
752 753 if ((maj = sv_getmajor(svp->sv_dev)) == NULL)
753 754 return;
754 755
755 756 /* remove svp from hash chain */
756 757
757 758 svpp = &(maj->sm_hash[SV_MINOR_HASH(getminor(svp->sv_dev))]);
758 759 while (*svpp) {
759 760 if (*svpp == svp) {
760 761 /*
761 762 * increment of sm_seq must be before the
762 763 * removal from the hash chain
763 764 */
764 765 maj->sm_seq++;
765 766 *svpp = svp->sv_hash;
766 767 break;
767 768 }
768 769
769 770 svpp = &(*svpp)->sv_hash;
770 771 }
771 772
772 773 svp->sv_hash = NULL;
773 774 }
774 775
775 776 /*
776 777 * Free (disable) a device structure.
777 778 * Must be called with sv_lock(RW_WRITER) and sv_mutex held, and will
778 779 * perform the exits during its processing.
779 780 */
780 781
781 782 static int
782 783 sv_free(sv_dev_t *svp, const int error)
783 784 {
784 785 struct cb_ops *cb_ops;
785 786 sv_maj_t *maj;
786 787
787 788 /* Get major hash table */
788 789 if ((maj = sv_getmajor(svp->sv_dev)) == NULL)
789 790 return (NULL);
790 791
791 792 svp->sv_state = SV_PENDING;
792 793 svp->sv_pending = curthread;
793 794
794 795 /*
795 796 * Close the fd's before removing from the hash or swapping
796 797 * back the cb_ops pointers so that the cache flushes before new
797 798 * io can come in.
798 799 */
799 800
800 801 if (svp->sv_fd) {
801 802 (void) nsc_close(svp->sv_fd);
802 803 svp->sv_fd = 0;
803 804 }
804 805
805 806 sv_rm_hash(svp);
806 807
807 808 if (error != SV_ESDOPEN &&
808 809 error != SV_ELYROPEN && --maj->sm_inuse == 0) {
809 810
810 811 if (maj->sm_dev_ops)
811 812 cb_ops = maj->sm_dev_ops->devo_cb_ops;
812 813 else
813 814 cb_ops = NULL;
814 815
815 816 if (cb_ops && maj->sm_strategy != NULL) {
816 817 cb_ops->cb_strategy = maj->sm_strategy;
817 818 cb_ops->cb_close = maj->sm_close;
818 819 cb_ops->cb_ioctl = maj->sm_ioctl;
819 820 cb_ops->cb_write = maj->sm_write;
820 821 cb_ops->cb_open = maj->sm_open;
821 822 cb_ops->cb_read = maj->sm_read;
822 823 cb_ops->cb_flag = maj->sm_flag;
823 824
824 825 if (maj->sm_awrite)
825 826 cb_ops->cb_awrite = maj->sm_awrite;
826 827
827 828 if (maj->sm_aread)
828 829 cb_ops->cb_aread = maj->sm_aread;
829 830
830 831 /*
831 832 * corbin XXX
832 833 * Leave backing device ops in maj->sm_*
833 834 * to handle any requests that might come
834 835 * in during the disable. This could be
835 836 * a problem however if the backing device
836 837 * driver is changed while we process these
837 838 * requests.
838 839 *
839 840 * maj->sm_strategy = 0;
840 841 * maj->sm_awrite = 0;
841 842 * maj->sm_write = 0;
842 843 * maj->sm_ioctl = 0;
843 844 * maj->sm_close = 0;
844 845 * maj->sm_aread = 0;
845 846 * maj->sm_read = 0;
846 847 * maj->sm_open = 0;
847 848 * maj->sm_flag = 0;
848 849 *
849 850 */
850 851 }
851 852
852 853 if (maj->sm_dev_ops) {
853 854 maj->sm_dev_ops = 0;
854 855 }
855 856 }
856 857
857 858 if (svp->sv_lh) {
858 859 cred_t *crp = ddi_get_cred();
859 860
860 861 /*
861 862 * Close the protective layered driver open using the
862 863 * Sun Private layered driver i/f.
863 864 */
864 865
865 866 (void) ldi_close(svp->sv_lh, FREAD|FWRITE, crp);
866 867 svp->sv_lh = NULL;
867 868 }
868 869
869 870 svp->sv_timestamp = nsc_lbolt();
870 871 svp->sv_state = SV_DISABLE;
871 872 svp->sv_pending = NULL;
872 873 rw_exit(&svp->sv_lock);
873 874 mutex_exit(&sv_mutex);
874 875
875 876 return (error);
876 877 }
877 878
878 879 /*
879 880 * Reserve the device, taking into account the possibility that
880 881 * the reserve might have to be retried.
881 882 */
882 883 static int
883 884 sv_reserve(nsc_fd_t *fd, int flags)
884 885 {
885 886 int eintr_count;
886 887 int rc;
887 888
888 889 eintr_count = 0;
889 890 do {
890 891 rc = nsc_reserve(fd, flags);
891 892 if (rc == EINTR) {
892 893 ++eintr_count;
893 894 delay(2);
894 895 }
895 896 } while ((rc == EINTR) && (eintr_count < MAX_EINTR_COUNT));
896 897
897 898 return (rc);
898 899 }
899 900
900 901 static int
901 902 sv_enable(const caddr_t path, const int flag,
902 903 const dev_t udev, spcs_s_info_t kstatus)
903 904 {
904 905 struct dev_ops *dev_ops;
905 906 struct cb_ops *cb_ops;
906 907 sv_dev_t *svp;
907 908 sv_maj_t *maj;
908 909 nsc_size_t nblocks;
909 910 int rc;
910 911 cred_t *crp;
911 912 ldi_ident_t li;
912 913
913 914 if (udev == (dev_t)-1 || udev == 0) {
914 915 DTRACE_PROBE1(
915 916 sv_enable_err_baddev,
916 917 dev_t, udev);
917 918 return (SV_EBADDEV);
918 919 }
919 920
920 921 if ((flag & ~(NSC_CACHE|NSC_DEVICE)) != 0) {
921 922 DTRACE_PROBE1(sv_enable_err_amode, dev_t, udev);
922 923 return (SV_EAMODE);
923 924 }
924 925
925 926 /* Get major hash table */
926 927 if ((maj = sv_getmajor(udev)) == NULL)
927 928 return (SV_EBADDEV);
928 929
929 930 mutex_enter(&sv_mutex);
930 931
931 932 rc = sv_get_state(udev, &svp);
932 933 if (rc) {
933 934 mutex_exit(&sv_mutex);
934 935 DTRACE_PROBE1(sv_enable_err_state, dev_t, udev);
935 936 return (rc);
936 937 }
937 938
938 939 rw_enter(&svp->sv_lock, RW_WRITER);
939 940
940 941 /*
941 942 * Get real fd used for io
942 943 */
943 944
944 945 svp->sv_dev = udev;
945 946 svp->sv_flag = flag;
946 947
947 948 /*
948 949 * OR in NSC_DEVICE to ensure that nskern grabs the real strategy
949 950 * function pointer before sv swaps them out.
950 951 */
951 952
952 953 svp->sv_fd = nsc_open(path, (svp->sv_flag | NSC_DEVICE),
953 954 sv_fd_def, (blind_t)udev, &rc);
954 955
955 956 if (svp->sv_fd == NULL) {
956 957 if (kstatus)
957 958 spcs_s_add(kstatus, rc);
958 959 DTRACE_PROBE1(sv_enable_err_fd, dev_t, udev);
959 960 return (sv_free(svp, SV_ESDOPEN));
960 961 }
961 962
962 963 /*
963 964 * Perform a layered driver open using the Sun Private layered
964 965 * driver i/f to ensure that the cb_ops structure for the driver
965 966 * is not detached out from under us whilst sv is enabled.
966 967 *
967 968 */
968 969
969 970 crp = ddi_get_cred();
970 971 svp->sv_lh = NULL;
971 972
972 973 if ((rc = ldi_ident_from_dev(svp->sv_dev, &li)) == 0) {
973 974 rc = ldi_open_by_dev(&svp->sv_dev,
974 975 OTYP_BLK, FREAD|FWRITE, crp, &svp->sv_lh, li);
975 976 }
976 977
977 978 if (rc != 0) {
978 979 if (kstatus)
979 980 spcs_s_add(kstatus, rc);
980 981 DTRACE_PROBE1(sv_enable_err_lyr_open, dev_t, udev);
981 982 return (sv_free(svp, SV_ELYROPEN));
982 983 }
983 984
984 985 /*
985 986 * Do layering if required - must happen after nsc_open().
986 987 */
987 988
988 989 if (maj->sm_inuse++ == 0) {
989 990 maj->sm_dev_ops = nsc_get_devops(getmajor(udev));
990 991
991 992 if (maj->sm_dev_ops == NULL ||
992 993 maj->sm_dev_ops->devo_cb_ops == NULL) {
993 994 DTRACE_PROBE1(sv_enable_err_load, dev_t, udev);
994 995 return (sv_free(svp, SV_ELOAD));
995 996 }
996 997
997 998 dev_ops = maj->sm_dev_ops;
998 999 cb_ops = dev_ops->devo_cb_ops;
999 1000
1000 1001 if (cb_ops->cb_strategy == NULL ||
1001 1002 cb_ops->cb_strategy == nodev ||
1002 1003 cb_ops->cb_strategy == nulldev) {
1003 1004 DTRACE_PROBE1(sv_enable_err_nostrategy, dev_t, udev);
1004 1005 return (sv_free(svp, SV_ELOAD));
1005 1006 }
1006 1007
1007 1008 if (cb_ops->cb_strategy == sv_lyr_strategy) {
1008 1009 DTRACE_PROBE1(sv_enable_err_svstrategy, dev_t, udev);
1009 1010 return (sv_free(svp, SV_ESTRATEGY));
1010 1011 }
1011 1012
1012 1013 maj->sm_strategy = cb_ops->cb_strategy;
1013 1014 maj->sm_close = cb_ops->cb_close;
1014 1015 maj->sm_ioctl = cb_ops->cb_ioctl;
1015 1016 maj->sm_write = cb_ops->cb_write;
1016 1017 maj->sm_open = cb_ops->cb_open;
1017 1018 maj->sm_read = cb_ops->cb_read;
1018 1019 maj->sm_flag = cb_ops->cb_flag;
1019 1020
1020 1021 cb_ops->cb_flag = cb_ops->cb_flag | D_MP;
1021 1022 cb_ops->cb_strategy = sv_lyr_strategy;
1022 1023 cb_ops->cb_close = sv_lyr_close;
1023 1024 cb_ops->cb_ioctl = sv_lyr_ioctl;
1024 1025 cb_ops->cb_write = sv_lyr_write;
1025 1026 cb_ops->cb_open = sv_lyr_open;
1026 1027 cb_ops->cb_read = sv_lyr_read;
1027 1028
1028 1029 /*
1029 1030 * Check that the driver has async I/O entry points
1030 1031 * before changing them.
1031 1032 */
1032 1033
1033 1034 if (dev_ops->devo_rev < 3 || cb_ops->cb_rev < 1) {
1034 1035 maj->sm_awrite = 0;
1035 1036 maj->sm_aread = 0;
1036 1037 } else {
1037 1038 maj->sm_awrite = cb_ops->cb_awrite;
1038 1039 maj->sm_aread = cb_ops->cb_aread;
1039 1040
1040 1041 cb_ops->cb_awrite = sv_lyr_awrite;
1041 1042 cb_ops->cb_aread = sv_lyr_aread;
1042 1043 }
1043 1044
1044 1045 /*
1045 1046 * Bug 4645743
1046 1047 *
1047 1048 * Prevent sv from ever unloading after it has interposed
1048 1049 * on a major device because there is a race between
1049 1050 * sv removing its layered entry points from the target
1050 1051 * dev_ops, a client coming in and accessing the driver,
1051 1052 * and the kernel modunloading the sv text.
1052 1053 *
1053 1054 * To allow unload, do svboot -u, which only happens in
1054 1055 * pkgrm time.
1055 1056 */
1056 1057 ASSERT(MUTEX_HELD(&sv_mutex));
1057 1058 sv_mod_status = SV_PREVENT_UNLOAD;
1058 1059 }
1059 1060
1060 1061
1061 1062 svp->sv_timestamp = nsc_lbolt();
1062 1063 svp->sv_state = SV_ENABLE;
1063 1064 svp->sv_pending = NULL;
1064 1065 rw_exit(&svp->sv_lock);
1065 1066
1066 1067 sv_ndevices++;
1067 1068 mutex_exit(&sv_mutex);
1068 1069
1069 1070 nblocks = 0;
1070 1071 if (sv_reserve(svp->sv_fd, NSC_READ|NSC_MULTI|NSC_PCATCH) == 0) {
1071 1072 nblocks = svp->sv_nblocks;
1072 1073 nsc_release(svp->sv_fd);
1073 1074 }
1074 1075
1075 1076 cmn_err(CE_CONT, "!sv: rdev 0x%lx, nblocks %" NSC_SZFMT "\n",
1076 1077 svp->sv_dev, nblocks);
1077 1078
1078 1079 return (0);
1079 1080 }
1080 1081
1081 1082
1082 1083 static int
1083 1084 sv_prepare_unload()
1084 1085 {
1085 1086 int rc = 0;
1086 1087
1087 1088 mutex_enter(&sv_mutex);
1088 1089
1089 1090 if (sv_mod_status == SV_PREVENT_UNLOAD) {
1090 1091 if ((sv_ndevices != 0) || (sv_tset != NULL)) {
1091 1092 rc = EBUSY;
1092 1093 } else {
1093 1094 sv_mod_status = SV_ALLOW_UNLOAD;
1094 1095 delay(SV_WAIT_UNLOAD * drv_usectohz(1000000));
1095 1096 }
1096 1097 }
1097 1098
1098 1099 mutex_exit(&sv_mutex);
1099 1100 return (rc);
1100 1101 }
1101 1102
1102 1103 static int
1103 1104 svattach_fd(blind_t arg)
1104 1105 {
1105 1106 dev_t dev = (dev_t)arg;
1106 1107 sv_dev_t *svp = sv_dev_to_sv(dev, NULL);
1107 1108 int rc;
1108 1109
1109 1110 if (sv_debug > 0)
1110 1111 cmn_err(CE_CONT, "!svattach_fd(%p, %p)\n", arg, (void *)svp);
1111 1112
1112 1113 if (svp == NULL) {
1113 1114 cmn_err(CE_WARN, "!svattach_fd: no state (arg %p)", arg);
1114 1115 return (0);
1115 1116 }
1116 1117
1117 1118 if ((rc = nsc_partsize(svp->sv_fd, &svp->sv_nblocks)) != 0) {
1118 1119 cmn_err(CE_WARN,
1119 1120 "!svattach_fd: nsc_partsize() failed, rc %d", rc);
1120 1121 svp->sv_nblocks = 0;
1121 1122 }
1122 1123
1123 1124 if ((rc = nsc_maxfbas(svp->sv_fd, 0, &svp->sv_maxfbas)) != 0) {
1124 1125 cmn_err(CE_WARN,
1125 1126 "!svattach_fd: nsc_maxfbas() failed, rc %d", rc);
1126 1127 svp->sv_maxfbas = 0;
1127 1128 }
1128 1129
1129 1130 if (sv_debug > 0) {
1130 1131 cmn_err(CE_CONT,
1131 1132 "!svattach_fd(%p): size %" NSC_SZFMT ", "
1132 1133 "maxfbas %" NSC_SZFMT "\n",
1133 1134 arg, svp->sv_nblocks, svp->sv_maxfbas);
1134 1135 }
1135 1136
1136 1137 return (0);
1137 1138 }
1138 1139
1139 1140
1140 1141 static int
1141 1142 svdetach_fd(blind_t arg)
1142 1143 {
1143 1144 dev_t dev = (dev_t)arg;
1144 1145 sv_dev_t *svp = sv_dev_to_sv(dev, NULL);
1145 1146
1146 1147 if (sv_debug > 0)
1147 1148 cmn_err(CE_CONT, "!svdetach_fd(%p, %p)\n", arg, (void *)svp);
1148 1149
1149 1150 /* svp can be NULL during disable of an sv */
1150 1151 if (svp == NULL)
1151 1152 return (0);
1152 1153
1153 1154 svp->sv_maxfbas = 0;
1154 1155 svp->sv_nblocks = 0;
1155 1156 return (0);
1156 1157 }
1157 1158
1158 1159
1159 1160 /*
1160 1161 * Side effect: if called with (guard != 0), then expects both sv_mutex
1161 1162 * and sv_lock(RW_WRITER) to be held, and will release them before returning.
1162 1163 */
1163 1164
1164 1165 /* ARGSUSED */
1165 1166 static int
1166 1167 sv_disable(dev_t dev, spcs_s_info_t kstatus)
1167 1168 {
1168 1169 sv_dev_t *svp = sv_dev_to_sv(dev, NULL);
1169 1170
1170 1171 if (svp == NULL) {
1171 1172
1172 1173 DTRACE_PROBE1(sv_disable_err_nodev, sv_dev_t *, svp);
1173 1174 return (SV_ENODEV);
1174 1175 }
1175 1176
1176 1177 mutex_enter(&sv_mutex);
1177 1178 rw_enter(&svp->sv_lock, RW_WRITER);
1178 1179
1179 1180 if (svp->sv_fd == NULL || svp->sv_state != SV_ENABLE) {
1180 1181 rw_exit(&svp->sv_lock);
1181 1182 mutex_exit(&sv_mutex);
1182 1183
1183 1184 DTRACE_PROBE1(sv_disable_err_disabled, sv_dev_t *, svp);
1184 1185 return (SV_EDISABLED);
1185 1186 }
1186 1187
1187 1188
1188 1189 sv_ndevices--;
1189 1190 return (sv_free(svp, 0));
1190 1191 }
1191 1192
1192 1193
1193 1194
1194 1195 static int
1195 1196 sv_lyr_open(dev_t *devp, int flag, int otyp, cred_t *crp)
1196 1197 {
1197 1198 nsc_buf_t *tmph;
1198 1199 sv_dev_t *svp;
1199 1200 sv_maj_t *maj;
1200 1201 int (*fn)();
1201 1202 dev_t odev;
1202 1203 int ret;
1203 1204 int rc;
1204 1205
1205 1206 svp = sv_dev_to_sv(*devp, &maj);
1206 1207
1207 1208 if (svp) {
1208 1209 if (svp->sv_state == SV_PENDING &&
1209 1210 svp->sv_pending == curthread) {
1210 1211 /*
1211 1212 * This is a recursive open from a call to
1212 1213 * ddi_lyr_open_by_devt and so we just want
1213 1214 * to pass it straight through to the
1214 1215 * underlying driver.
1215 1216 */
1216 1217 DTRACE_PROBE2(sv_lyr_open_recursive,
1217 1218 sv_dev_t *, svp,
1218 1219 dev_t, *devp);
1219 1220 svp = NULL;
1220 1221 } else
1221 1222 rw_enter(&svp->sv_lock, RW_READER);
1222 1223 }
1223 1224
1224 1225 odev = *devp;
1225 1226
1226 1227 if (maj && (fn = maj->sm_open) != 0) {
1227 1228 if (!(maj->sm_flag & D_MP)) {
1228 1229 UNSAFE_ENTER();
1229 1230 ret = (*fn)(devp, flag, otyp, crp);
1230 1231 UNSAFE_EXIT();
1231 1232 } else {
1232 1233 ret = (*fn)(devp, flag, otyp, crp);
1233 1234 }
1234 1235
1235 1236 if (ret == 0) {
1236 1237 /*
1237 1238 * Re-acquire svp if the driver changed *devp.
1238 1239 */
1239 1240
1240 1241 if (*devp != odev) {
1241 1242 if (svp != NULL)
1242 1243 rw_exit(&svp->sv_lock);
1243 1244
1244 1245 svp = sv_dev_to_sv(*devp, NULL);
1245 1246
1246 1247 if (svp) {
1247 1248 rw_enter(&svp->sv_lock, RW_READER);
1248 1249 }
1249 1250 }
1250 1251 }
1251 1252 } else {
1252 1253 ret = ENODEV;
1253 1254 }
1254 1255
1255 1256 if (svp && ret != 0 && svp->sv_state == SV_ENABLE) {
1256 1257 /*
1257 1258 * Underlying DDI open failed, but we have this
1258 1259 * device SV enabled. If we can read some data
1259 1260 * from the device, fake a successful open (this
1260 1261 * probably means that this device is RDC'd and we
1261 1262 * are getting the data from the secondary node).
1262 1263 *
1263 1264 * The reserve must be done with NSC_TRY|NSC_NOWAIT to
1264 1265 * ensure that it does not deadlock if this open is
1265 1266 * coming from nskernd:get_bsize().
1266 1267 */
1267 1268 rc = sv_reserve(svp->sv_fd,
1268 1269 NSC_TRY | NSC_NOWAIT | NSC_MULTI | NSC_PCATCH);
1269 1270 if (rc == 0) {
1270 1271 tmph = NULL;
1271 1272
1272 1273 rc = nsc_alloc_buf(svp->sv_fd, 0, 1, NSC_READ, &tmph);
1273 1274 if (rc <= 0) {
1274 1275 /* success */
1275 1276 ret = 0;
1276 1277 }
1277 1278
1278 1279 if (tmph) {
1279 1280 (void) nsc_free_buf(tmph);
1280 1281 tmph = NULL;
1281 1282 }
1282 1283
1283 1284 nsc_release(svp->sv_fd);
1284 1285
1285 1286 /*
1286 1287 * Count the number of layered opens that we
1287 1288 * fake since we have to fake a matching number
1288 1289 * of closes (OTYP_LYR open/close calls must be
1289 1290 * paired).
1290 1291 */
1291 1292
1292 1293 if (ret == 0 && otyp == OTYP_LYR) {
1293 1294 mutex_enter(&svp->sv_olock);
1294 1295 svp->sv_openlcnt++;
1295 1296 mutex_exit(&svp->sv_olock);
1296 1297 }
1297 1298 }
1298 1299 }
1299 1300
1300 1301 if (svp) {
1301 1302 rw_exit(&svp->sv_lock);
1302 1303 }
1303 1304
1304 1305 return (ret);
1305 1306 }
1306 1307
1307 1308
1308 1309 static int
1309 1310 sv_lyr_close(dev_t dev, int flag, int otyp, cred_t *crp)
1310 1311 {
1311 1312 sv_dev_t *svp;
1312 1313 sv_maj_t *maj;
1313 1314 int (*fn)();
1314 1315 int ret;
1315 1316
1316 1317 svp = sv_dev_to_sv(dev, &maj);
1317 1318
1318 1319 if (svp &&
1319 1320 svp->sv_state == SV_PENDING &&
1320 1321 svp->sv_pending == curthread) {
1321 1322 /*
1322 1323 * This is a recursive open from a call to
1323 1324 * ddi_lyr_close and so we just want
1324 1325 * to pass it straight through to the
1325 1326 * underlying driver.
1326 1327 */
1327 1328 DTRACE_PROBE2(sv_lyr_close_recursive, sv_dev_t *, svp,
1328 1329 dev_t, dev);
1329 1330 svp = NULL;
1330 1331 }
1331 1332
1332 1333 if (svp) {
1333 1334 rw_enter(&svp->sv_lock, RW_READER);
1334 1335
1335 1336 if (otyp == OTYP_LYR) {
1336 1337 mutex_enter(&svp->sv_olock);
1337 1338
1338 1339 if (svp->sv_openlcnt) {
1339 1340 /*
1340 1341 * Consume sufficient layered closes to
1341 1342 * account for the opens that we faked
1342 1343 * whilst the device was failed.
1343 1344 */
1344 1345 svp->sv_openlcnt--;
1345 1346 mutex_exit(&svp->sv_olock);
1346 1347 rw_exit(&svp->sv_lock);
1347 1348
1348 1349 DTRACE_PROBE1(sv_lyr_close_end, dev_t, dev);
1349 1350
1350 1351 return (0);
1351 1352 }
1352 1353
1353 1354 mutex_exit(&svp->sv_olock);
1354 1355 }
1355 1356 }
1356 1357
1357 1358 if (maj && (fn = maj->sm_close) != 0) {
1358 1359 if (!(maj->sm_flag & D_MP)) {
1359 1360 UNSAFE_ENTER();
1360 1361 ret = (*fn)(dev, flag, otyp, crp);
1361 1362 UNSAFE_EXIT();
1362 1363 } else {
1363 1364 ret = (*fn)(dev, flag, otyp, crp);
1364 1365 }
1365 1366 } else {
1366 1367 ret = ENODEV;
1367 1368 }
1368 1369
1369 1370 if (svp) {
1370 1371 rw_exit(&svp->sv_lock);
1371 1372 }
1372 1373
1373 1374 return (ret);
1374 1375 }
1375 1376
1376 1377
1377 1378 /*
1378 1379 * Convert the specified dev_t into a locked and enabled sv_dev_t, or
1379 1380 * return NULL.
1380 1381 */
1381 1382 static sv_dev_t *
1382 1383 sv_find_enabled(const dev_t dev, sv_maj_t **majpp)
1383 1384 {
1384 1385 sv_dev_t *svp;
1385 1386
1386 1387 while ((svp = sv_dev_to_sv(dev, majpp)) != NULL) {
1387 1388 rw_enter(&svp->sv_lock, RW_READER);
1388 1389
1389 1390 if (svp->sv_state == SV_ENABLE) {
1390 1391 /* locked and enabled */
1391 1392 break;
1392 1393 }
1393 1394
1394 1395 /*
1395 1396 * State was changed while waiting on the lock.
1396 1397 * Wait for a stable state.
1397 1398 */
1398 1399 rw_exit(&svp->sv_lock);
1399 1400
1400 1401 DTRACE_PROBE1(sv_find_enabled_retry, dev_t, dev);
1401 1402
1402 1403 delay(2);
1403 1404 }
1404 1405
1405 1406 return (svp);
1406 1407 }
1407 1408
1408 1409
1409 1410 static int
1410 1411 sv_lyr_uio(dev_t dev, uio_t *uiop, cred_t *crp, int rw)
1411 1412 {
1412 1413 sv_dev_t *svp;
1413 1414 sv_maj_t *maj;
1414 1415 int (*fn)();
1415 1416 int rc;
1416 1417
1417 1418 svp = sv_find_enabled(dev, &maj);
1418 1419 if (svp == NULL) {
1419 1420 if (maj) {
1420 1421 if (rw == NSC_READ)
1421 1422 fn = maj->sm_read;
1422 1423 else
1423 1424 fn = maj->sm_write;
1424 1425
1425 1426 if (fn != 0) {
1426 1427 if (!(maj->sm_flag & D_MP)) {
1427 1428 UNSAFE_ENTER();
1428 1429 rc = (*fn)(dev, uiop, crp);
1429 1430 UNSAFE_EXIT();
1430 1431 } else {
1431 1432 rc = (*fn)(dev, uiop, crp);
1432 1433 }
1433 1434 }
1434 1435
1435 1436 return (rc);
1436 1437 } else {
1437 1438 return (ENODEV);
1438 1439 }
1439 1440 }
1440 1441
1441 1442 ASSERT(RW_READ_HELD(&svp->sv_lock));
1442 1443
1443 1444 if (svp->sv_flag == 0) {
1444 1445 /*
1445 1446 * guard access mode
1446 1447 * - prevent user level access to the device
1447 1448 */
1448 1449 DTRACE_PROBE1(sv_lyr_uio_err_guard, uio_t *, uiop);
1449 1450 rc = EPERM;
1450 1451 goto out;
1451 1452 }
1452 1453
1453 1454 if ((rc = sv_reserve(svp->sv_fd, NSC_MULTI|NSC_PCATCH)) != 0) {
1454 1455 DTRACE_PROBE1(sv_lyr_uio_err_rsrv, uio_t *, uiop);
1455 1456 goto out;
1456 1457 }
1457 1458
1458 1459 if (rw == NSC_READ)
1459 1460 rc = nsc_uread(svp->sv_fd, uiop, crp);
1460 1461 else
1461 1462 rc = nsc_uwrite(svp->sv_fd, uiop, crp);
1462 1463
1463 1464 nsc_release(svp->sv_fd);
1464 1465
1465 1466 out:
1466 1467 rw_exit(&svp->sv_lock);
1467 1468
1468 1469 return (rc);
1469 1470 }
1470 1471
1471 1472
1472 1473 static int
1473 1474 sv_lyr_read(dev_t dev, uio_t *uiop, cred_t *crp)
1474 1475 {
1475 1476 return (sv_lyr_uio(dev, uiop, crp, NSC_READ));
1476 1477 }
1477 1478
1478 1479
1479 1480 static int
1480 1481 sv_lyr_write(dev_t dev, uio_t *uiop, cred_t *crp)
1481 1482 {
1482 1483 return (sv_lyr_uio(dev, uiop, crp, NSC_WRITE));
1483 1484 }
1484 1485
1485 1486
1486 1487 /* ARGSUSED */
1487 1488
1488 1489 static int
1489 1490 sv_lyr_aread(dev_t dev, struct aio_req *aio, cred_t *crp)
1490 1491 {
1491 1492 return (aphysio(sv_lyr_strategy,
1492 1493 anocancel, dev, B_READ, minphys, aio));
1493 1494 }
1494 1495
1495 1496
1496 1497 /* ARGSUSED */
1497 1498
1498 1499 static int
1499 1500 sv_lyr_awrite(dev_t dev, struct aio_req *aio, cred_t *crp)
1500 1501 {
1501 1502 return (aphysio(sv_lyr_strategy,
1502 1503 anocancel, dev, B_WRITE, minphys, aio));
1503 1504 }
1504 1505
1505 1506
1506 1507 /*
1507 1508 * Set up an array containing the list of raw path names
1508 1509 * The array for the paths is svl and the size of the array is
1509 1510 * in size.
1510 1511 *
1511 1512 * If there are more layered devices than will fit in the array,
1512 1513 * the number of extra layered devices is returned. Otherwise
1513 1514 * zero is return.
1514 1515 *
1515 1516 * Input:
1516 1517 * svn : array for paths
1517 1518 * size : size of the array
1518 1519 *
1519 1520 * Output (extra):
1520 1521 * zero : All paths fit in array
1521 1522 * >0 : Number of defined layered devices don't fit in array
1522 1523 */
1523 1524
1524 1525 static int
1525 1526 sv_list(void *ptr, const int size, int *extra, const int ilp32)
1526 1527 {
1527 1528 sv_name32_t *svn32;
1528 1529 sv_name_t *svn;
1529 1530 sv_dev_t *svp;
1530 1531 int *mode, *nblocks;
1531 1532 int i, index;
1532 1533 char *path;
1533 1534
1534 1535 *extra = 0;
1535 1536 index = 0;
1536 1537
1537 1538 if (ilp32)
1538 1539 svn32 = ptr;
1539 1540 else
1540 1541 svn = ptr;
1541 1542
1542 1543 mutex_enter(&sv_mutex);
1543 1544 for (i = 0; i < sv_max_devices; i++) {
1544 1545 svp = &sv_devs[i];
1545 1546
1546 1547 rw_enter(&svp->sv_lock, RW_READER);
1547 1548
1548 1549 if (svp->sv_state != SV_ENABLE) {
1549 1550 rw_exit(&svp->sv_lock);
1550 1551 continue;
1551 1552 }
1552 1553
1553 1554 if ((*extra) != 0 || ptr == NULL) {
1554 1555 /* Another overflow entry */
1555 1556 rw_exit(&svp->sv_lock);
1556 1557 (*extra)++;
1557 1558 continue;
1558 1559 }
1559 1560
1560 1561 if (ilp32) {
1561 1562 nblocks = &svn32->svn_nblocks;
1562 1563 mode = &svn32->svn_mode;
1563 1564 path = svn32->svn_path;
1564 1565
1565 1566 svn32->svn_timestamp = (uint32_t)svp->sv_timestamp;
1566 1567 svn32++;
1567 1568 } else {
1568 1569 nblocks = &svn->svn_nblocks;
1569 1570 mode = &svn->svn_mode;
1570 1571 path = svn->svn_path;
1571 1572
1572 1573 svn->svn_timestamp = svp->sv_timestamp;
1573 1574 svn++;
1574 1575 }
1575 1576
1576 1577 (void) strcpy(path, nsc_pathname(svp->sv_fd));
1577 1578 *nblocks = svp->sv_nblocks;
1578 1579 *mode = svp->sv_flag;
1579 1580
1580 1581 if (*nblocks == 0) {
1581 1582 if (sv_debug > 3)
1582 1583 cmn_err(CE_CONT, "!sv_list: need to reserve\n");
1583 1584
1584 1585 if (sv_reserve(svp->sv_fd, NSC_MULTI|NSC_PCATCH) == 0) {
1585 1586 *nblocks = svp->sv_nblocks;
1586 1587 nsc_release(svp->sv_fd);
1587 1588 }
1588 1589 }
1589 1590
1590 1591 if (++index >= size) {
1591 1592 /* Out of space */
1592 1593 (*extra)++;
1593 1594 }
1594 1595
1595 1596 rw_exit(&svp->sv_lock);
1596 1597 }
1597 1598 mutex_exit(&sv_mutex);
1598 1599
1599 1600 if (index < size) {
1600 1601 /* NULL terminated list */
1601 1602 if (ilp32)
1602 1603 svn32->svn_path[0] = '\0';
1603 1604 else
1604 1605 svn->svn_path[0] = '\0';
1605 1606 }
1606 1607
1607 1608 return (0);
1608 1609 }
1609 1610
1610 1611
1611 1612 static void
1612 1613 sv_thread_tune(int threads)
1613 1614 {
1614 1615 int incr = (threads > 0) ? 1 : -1;
1615 1616 int change = 0;
1616 1617 int nthreads;
1617 1618
1618 1619 ASSERT(MUTEX_HELD(&sv_mutex));
1619 1620
1620 1621 if (sv_threads_extra) {
1621 1622 /* keep track of any additional threads requested */
1622 1623 if (threads > 0) {
1623 1624 sv_threads_extra += threads;
1624 1625 return;
1625 1626 }
1626 1627 threads = -threads;
1627 1628 if (threads >= sv_threads_extra) {
1628 1629 threads -= sv_threads_extra;
1629 1630 sv_threads_extra = 0;
1630 1631 /* fall through to while loop */
1631 1632 } else {
1632 1633 sv_threads_extra -= threads;
1633 1634 return;
1634 1635 }
1635 1636 } else if (threads > 0) {
1636 1637 /*
1637 1638 * do not increase the number of threads beyond
1638 1639 * sv_threads_max when doing dynamic thread tuning
1639 1640 */
1640 1641 nthreads = nst_nthread(sv_tset);
1641 1642 if ((nthreads + threads) > sv_threads_max) {
1642 1643 sv_threads_extra = nthreads + threads - sv_threads_max;
1643 1644 threads = sv_threads_max - nthreads;
1644 1645 if (threads <= 0)
1645 1646 return;
1646 1647 }
1647 1648 }
1648 1649
1649 1650 if (threads < 0)
1650 1651 threads = -threads;
1651 1652
1652 1653 while (threads--) {
1653 1654 nthreads = nst_nthread(sv_tset);
1654 1655 sv_threads_needed += incr;
1655 1656
1656 1657 if (sv_threads_needed >= nthreads)
1657 1658 change += nst_add_thread(sv_tset, sv_threads_inc);
1658 1659 else if ((sv_threads_needed <
1659 1660 (nthreads - (sv_threads_inc + sv_threads_hysteresis))) &&
1660 1661 ((nthreads - sv_threads_inc) >= sv_threads))
1661 1662 change -= nst_del_thread(sv_tset, sv_threads_inc);
1662 1663 }
1663 1664
1664 1665 #ifdef DEBUG
1665 1666 if (change) {
1666 1667 cmn_err(CE_NOTE,
1667 1668 "!sv_thread_tune: threads needed %d, nthreads %d, "
1668 1669 "nthreads change %d",
1669 1670 sv_threads_needed, nst_nthread(sv_tset), change);
1670 1671 }
1671 1672 #endif
1672 1673 }
1673 1674
1674 1675
1675 1676 /* ARGSUSED */
1676 1677 static int
1677 1678 svopen(dev_t *devp, int flag, int otyp, cred_t *crp)
1678 1679 {
1679 1680 int rc;
1680 1681
1681 1682 mutex_enter(&sv_mutex);
1682 1683 rc = sv_init_devs();
1683 1684 mutex_exit(&sv_mutex);
1684 1685
1685 1686 return (rc);
1686 1687 }
1687 1688
1688 1689
1689 1690 /* ARGSUSED */
1690 1691 static int
1691 1692 svclose(dev_t dev, int flag, int otyp, cred_t *crp)
1692 1693 {
1693 1694 const int secs = HZ * 5;
1694 1695 const int ticks = HZ / 10;
1695 1696 int loops = secs / ticks;
1696 1697
1697 1698 mutex_enter(&sv_mutex);
1698 1699 while (sv_ndevices <= 0 && sv_tset != NULL && loops > 0) {
1699 1700 if (nst_nlive(sv_tset) <= 0) {
1700 1701 nst_destroy(sv_tset);
1701 1702 sv_tset = NULL;
1702 1703 break;
1703 1704 }
1704 1705
1705 1706 /* threads still active - wait for them to exit */
1706 1707 mutex_exit(&sv_mutex);
1707 1708 delay(ticks);
1708 1709 loops--;
1709 1710 mutex_enter(&sv_mutex);
1710 1711 }
1711 1712 mutex_exit(&sv_mutex);
1712 1713
1713 1714 if (loops <= 0) {
1714 1715 cmn_err(CE_WARN,
1715 1716 #ifndef DEBUG
1716 1717 /* do not write to console when non-DEBUG */
1717 1718 "!"
1718 1719 #endif
1719 1720 "sv:svclose: threads still active "
1720 1721 "after %d sec - leaking thread set", secs);
1721 1722 }
1722 1723
1723 1724 return (0);
1724 1725 }
1725 1726
1726 1727
1727 1728 static int
1728 1729 svioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *crp, int *rvalp)
1729 1730 {
1730 1731 char itmp1[12], itmp2[12]; /* temp char array for editing ints */
1731 1732 spcs_s_info_t kstatus; /* Kernel version of spcs status */
1732 1733 spcs_s_info_t ustatus; /* Address of user version of spcs status */
1733 1734 sv_list32_t svl32; /* 32 bit Initial structure for SVIOC_LIST */
1734 1735 sv_version_t svv; /* Version structure */
1735 1736 sv_conf_t svc; /* User config structure */
1736 1737 sv_list_t svl; /* Initial structure for SVIOC_LIST */
1737 1738 void *usvn; /* Address of user sv_name_t */
1738 1739 void *svn = NULL; /* Array for SVIOC_LIST */
1739 1740 uint64_t phash; /* pathname hash */
1740 1741 int rc = 0; /* Return code -- errno */
1741 1742 int size; /* Number of items in array */
1742 1743 int bytes; /* Byte size of array */
1743 1744 int ilp32; /* Convert data structures for ilp32 userland */
1744 1745
1745 1746 *rvalp = 0;
1746 1747
1747 1748 /*
1748 1749 * If sv_mod_status is 0 or SV_PREVENT_UNLOAD, then it will continue.
1749 1750 * else it means it previously was SV_PREVENT_UNLOAD, and now it's
1750 1751 * SV_ALLOW_UNLOAD, expecting the driver to eventually unload.
1751 1752 *
1752 1753 * SV_ALLOW_UNLOAD is final state, so no need to grab sv_mutex.
1753 1754 */
1754 1755 if (sv_mod_status == SV_ALLOW_UNLOAD) {
1755 1756 return (EBUSY);
1756 1757 }
1757 1758
1758 1759 if ((cmd != SVIOC_LIST) && ((rc = drv_priv(crp)) != 0))
1759 1760 return (rc);
1760 1761
1761 1762 kstatus = spcs_s_kcreate();
1762 1763 if (!kstatus) {
1763 1764 DTRACE_PROBE1(sv_ioctl_err_kcreate, dev_t, dev);
1764 1765 return (ENOMEM);
1765 1766 }
1766 1767
1767 1768 ilp32 = (ddi_model_convert_from((mode & FMODELS)) == DDI_MODEL_ILP32);
1768 1769
1769 1770 switch (cmd) {
1770 1771
1771 1772 case SVIOC_ENABLE:
1772 1773
1773 1774 if (ilp32) {
1774 1775 sv_conf32_t svc32;
1775 1776
1776 1777 if (ddi_copyin((void *)arg, &svc32,
1777 1778 sizeof (svc32), mode) < 0) {
1778 1779 spcs_s_kfree(kstatus);
1779 1780 return (EFAULT);
1780 1781 }
1781 1782
1782 1783 svc.svc_error = (spcs_s_info_t)svc32.svc_error;
1783 1784 (void) strcpy(svc.svc_path, svc32.svc_path);
1784 1785 svc.svc_flag = svc32.svc_flag;
1785 1786 svc.svc_major = svc32.svc_major;
1786 1787 svc.svc_minor = svc32.svc_minor;
1787 1788 } else {
1788 1789 if (ddi_copyin((void *)arg, &svc,
1789 1790 sizeof (svc), mode) < 0) {
1790 1791 spcs_s_kfree(kstatus);
1791 1792 return (EFAULT);
1792 1793 }
1793 1794 }
1794 1795
1795 1796 /* force to raw access */
1796 1797 svc.svc_flag = NSC_DEVICE;
1797 1798
1798 1799 if (sv_tset == NULL) {
1799 1800 mutex_enter(&sv_mutex);
1800 1801
1801 1802 if (sv_tset == NULL) {
1802 1803 sv_tset = nst_init("sv_thr", sv_threads);
1803 1804 }
1804 1805
1805 1806 mutex_exit(&sv_mutex);
1806 1807
1807 1808 if (sv_tset == NULL) {
1808 1809 cmn_err(CE_WARN,
1809 1810 "!sv: could not allocate %d threads",
1810 1811 sv_threads);
1811 1812 }
1812 1813 }
1813 1814
1814 1815 rc = sv_enable(svc.svc_path, svc.svc_flag,
1815 1816 makedevice(svc.svc_major, svc.svc_minor), kstatus);
1816 1817
1817 1818 if (rc == 0) {
1818 1819 sv_config_time = nsc_lbolt();
1819 1820
1820 1821 mutex_enter(&sv_mutex);
1821 1822 sv_thread_tune(sv_threads_dev);
1822 1823 mutex_exit(&sv_mutex);
1823 1824 }
1824 1825
1825 1826 DTRACE_PROBE3(sv_ioctl_end, dev_t, dev, int, *rvalp, int, rc);
1826 1827
1827 1828 return (spcs_s_ocopyoutf(&kstatus, svc.svc_error, rc));
1828 1829 /* NOTREACHED */
1829 1830
1830 1831 case SVIOC_DISABLE:
1831 1832
1832 1833 if (ilp32) {
1833 1834 sv_conf32_t svc32;
1834 1835
1835 1836 if (ddi_copyin((void *)arg, &svc32,
1836 1837 sizeof (svc32), mode) < 0) {
1837 1838 spcs_s_kfree(kstatus);
1838 1839 return (EFAULT);
1839 1840 }
1840 1841
1841 1842 svc.svc_error = (spcs_s_info_t)svc32.svc_error;
1842 1843 svc.svc_major = svc32.svc_major;
1843 1844 svc.svc_minor = svc32.svc_minor;
1844 1845 (void) strcpy(svc.svc_path, svc32.svc_path);
1845 1846 svc.svc_flag = svc32.svc_flag;
1846 1847 } else {
1847 1848 if (ddi_copyin((void *)arg, &svc,
1848 1849 sizeof (svc), mode) < 0) {
1849 1850 spcs_s_kfree(kstatus);
1850 1851 return (EFAULT);
1851 1852 }
1852 1853 }
1853 1854
1854 1855 if (svc.svc_major == (major_t)-1 &&
1855 1856 svc.svc_minor == (minor_t)-1) {
1856 1857 sv_dev_t *svp;
1857 1858 int i;
1858 1859
1859 1860 /*
1860 1861 * User level could not find the minor device
1861 1862 * node, so do this the slow way by searching
1862 1863 * the entire sv config for a matching pathname.
1863 1864 */
1864 1865
1865 1866 phash = nsc_strhash(svc.svc_path);
1866 1867
1867 1868 mutex_enter(&sv_mutex);
1868 1869
1869 1870 for (i = 0; i < sv_max_devices; i++) {
1870 1871 svp = &sv_devs[i];
1871 1872
1872 1873 if (svp->sv_state == SV_DISABLE ||
1873 1874 svp->sv_fd == NULL)
1874 1875 continue;
1875 1876
1876 1877 if (nsc_fdpathcmp(svp->sv_fd, phash,
1877 1878 svc.svc_path) == 0) {
1878 1879 svc.svc_major = getmajor(svp->sv_dev);
1879 1880 svc.svc_minor = getminor(svp->sv_dev);
1880 1881 break;
1881 1882 }
1882 1883 }
1883 1884
1884 1885 mutex_exit(&sv_mutex);
1885 1886
1886 1887 if (svc.svc_major == (major_t)-1 &&
1887 1888 svc.svc_minor == (minor_t)-1)
1888 1889 return (spcs_s_ocopyoutf(&kstatus,
1889 1890 svc.svc_error, SV_ENODEV));
1890 1891 }
1891 1892
1892 1893 rc = sv_disable(makedevice(svc.svc_major, svc.svc_minor),
1893 1894 kstatus);
1894 1895
1895 1896 if (rc == 0) {
1896 1897 sv_config_time = nsc_lbolt();
1897 1898
1898 1899 mutex_enter(&sv_mutex);
1899 1900 sv_thread_tune(-sv_threads_dev);
1900 1901 mutex_exit(&sv_mutex);
1901 1902 }
1902 1903
1903 1904 DTRACE_PROBE3(sv_ioctl_2, dev_t, dev, int, *rvalp, int, rc);
1904 1905
1905 1906 return (spcs_s_ocopyoutf(&kstatus, svc.svc_error, rc));
1906 1907 /* NOTREACHED */
1907 1908
1908 1909 case SVIOC_LIST:
1909 1910
1910 1911 if (ilp32) {
1911 1912 if (ddi_copyin((void *)arg, &svl32,
1912 1913 sizeof (svl32), mode) < 0) {
1913 1914 spcs_s_kfree(kstatus);
1914 1915 return (EFAULT);
1915 1916 }
1916 1917
1917 1918 ustatus = (spcs_s_info_t)svl32.svl_error;
1918 1919 size = svl32.svl_count;
1919 1920 usvn = (void *)(unsigned long)svl32.svl_names;
1920 1921 } else {
1921 1922 if (ddi_copyin((void *)arg, &svl,
1922 1923 sizeof (svl), mode) < 0) {
1923 1924 spcs_s_kfree(kstatus);
1924 1925 return (EFAULT);
1925 1926 }
1926 1927
1927 1928 ustatus = svl.svl_error;
1928 1929 size = svl.svl_count;
1929 1930 usvn = svl.svl_names;
1930 1931 }
1931 1932
1932 1933 /* Do some boundary checking */
1933 1934 if ((size < 0) || (size > sv_max_devices)) {
1934 1935 /* Array size is out of range */
1935 1936 return (spcs_s_ocopyoutf(&kstatus, ustatus,
1936 1937 SV_EARRBOUNDS, "0",
1937 1938 spcs_s_inttostring(sv_max_devices, itmp1,
1938 1939 sizeof (itmp1), 0),
1939 1940 spcs_s_inttostring(size, itmp2,
1940 1941 sizeof (itmp2), 0)));
1941 1942 }
1942 1943
1943 1944 if (ilp32)
1944 1945 bytes = size * sizeof (sv_name32_t);
1945 1946 else
1946 1947 bytes = size * sizeof (sv_name_t);
1947 1948
1948 1949 /* Allocate memory for the array of structures */
1949 1950 if (bytes != 0) {
1950 1951 svn = kmem_zalloc(bytes, KM_SLEEP);
1951 1952 if (!svn) {
1952 1953 return (spcs_s_ocopyoutf(&kstatus,
1953 1954 ustatus, ENOMEM));
1954 1955 }
1955 1956 }
1956 1957
1957 1958 rc = sv_list(svn, size, rvalp, ilp32);
1958 1959 if (rc) {
1959 1960 if (svn != NULL)
1960 1961 kmem_free(svn, bytes);
1961 1962 return (spcs_s_ocopyoutf(&kstatus, ustatus, rc));
1962 1963 }
1963 1964
1964 1965 if (ilp32) {
1965 1966 svl32.svl_timestamp = (uint32_t)sv_config_time;
1966 1967 svl32.svl_maxdevs = (int32_t)sv_max_devices;
1967 1968
1968 1969 /* Return the list structure */
1969 1970 if (ddi_copyout(&svl32, (void *)arg,
1970 1971 sizeof (svl32), mode) < 0) {
1971 1972 spcs_s_kfree(kstatus);
1972 1973 if (svn != NULL)
1973 1974 kmem_free(svn, bytes);
1974 1975 return (EFAULT);
1975 1976 }
1976 1977 } else {
1977 1978 svl.svl_timestamp = sv_config_time;
1978 1979 svl.svl_maxdevs = sv_max_devices;
1979 1980
1980 1981 /* Return the list structure */
1981 1982 if (ddi_copyout(&svl, (void *)arg,
1982 1983 sizeof (svl), mode) < 0) {
1983 1984 spcs_s_kfree(kstatus);
1984 1985 if (svn != NULL)
1985 1986 kmem_free(svn, bytes);
1986 1987 return (EFAULT);
1987 1988 }
1988 1989 }
1989 1990
1990 1991 /* Return the array */
1991 1992 if (svn != NULL) {
1992 1993 if (ddi_copyout(svn, usvn, bytes, mode) < 0) {
1993 1994 kmem_free(svn, bytes);
1994 1995 spcs_s_kfree(kstatus);
1995 1996 return (EFAULT);
1996 1997 }
1997 1998 kmem_free(svn, bytes);
1998 1999 }
1999 2000
2000 2001 DTRACE_PROBE3(sv_ioctl_3, dev_t, dev, int, *rvalp, int, 0);
2001 2002
2002 2003 return (spcs_s_ocopyoutf(&kstatus, ustatus, 0));
2003 2004 /* NOTREACHED */
2004 2005
2005 2006 case SVIOC_VERSION:
2006 2007
2007 2008 if (ilp32) {
2008 2009 sv_version32_t svv32;
2009 2010
2010 2011 if (ddi_copyin((void *)arg, &svv32,
2011 2012 sizeof (svv32), mode) < 0) {
2012 2013 spcs_s_kfree(kstatus);
2013 2014 return (EFAULT);
2014 2015 }
2015 2016
2016 2017 svv32.svv_major_rev = sv_major_rev;
2017 2018 svv32.svv_minor_rev = sv_minor_rev;
2018 2019 svv32.svv_micro_rev = sv_micro_rev;
2019 2020 svv32.svv_baseline_rev = sv_baseline_rev;
2020 2021
2021 2022 if (ddi_copyout(&svv32, (void *)arg,
2022 2023 sizeof (svv32), mode) < 0) {
2023 2024 spcs_s_kfree(kstatus);
2024 2025 return (EFAULT);
2025 2026 }
2026 2027
2027 2028 ustatus = (spcs_s_info_t)svv32.svv_error;
2028 2029 } else {
2029 2030 if (ddi_copyin((void *)arg, &svv,
2030 2031 sizeof (svv), mode) < 0) {
2031 2032 spcs_s_kfree(kstatus);
2032 2033 return (EFAULT);
2033 2034 }
2034 2035
2035 2036 svv.svv_major_rev = sv_major_rev;
2036 2037 svv.svv_minor_rev = sv_minor_rev;
2037 2038 svv.svv_micro_rev = sv_micro_rev;
2038 2039 svv.svv_baseline_rev = sv_baseline_rev;
2039 2040
2040 2041 if (ddi_copyout(&svv, (void *)arg,
2041 2042 sizeof (svv), mode) < 0) {
2042 2043 spcs_s_kfree(kstatus);
2043 2044 return (EFAULT);
2044 2045 }
2045 2046
2046 2047 ustatus = svv.svv_error;
2047 2048 }
2048 2049
2049 2050 DTRACE_PROBE3(sv_ioctl_4, dev_t, dev, int, *rvalp, int, 0);
2050 2051
2051 2052 return (spcs_s_ocopyoutf(&kstatus, ustatus, 0));
2052 2053 /* NOTREACHED */
2053 2054
2054 2055 case SVIOC_UNLOAD:
2055 2056 rc = sv_prepare_unload();
2056 2057
2057 2058 if (ddi_copyout(&rc, (void *)arg, sizeof (rc), mode) < 0) {
2058 2059 rc = EFAULT;
2059 2060 }
2060 2061
2061 2062 spcs_s_kfree(kstatus);
2062 2063 return (rc);
2063 2064
2064 2065 default:
2065 2066 spcs_s_kfree(kstatus);
2066 2067
2067 2068 DTRACE_PROBE3(sv_ioctl_4, dev_t, dev, int, *rvalp, int, EINVAL);
2068 2069
2069 2070 return (EINVAL);
2070 2071 /* NOTREACHED */
2071 2072 }
2072 2073
2073 2074 /* NOTREACHED */
2074 2075 }
2075 2076
2076 2077
2077 2078 /* ARGSUSED */
2078 2079 static int
2079 2080 svprint(dev_t dev, char *str)
2080 2081 {
2081 2082 int instance = ddi_get_instance(sv_dip);
2082 2083 cmn_err(CE_WARN, "!%s%d: %s", ddi_get_name(sv_dip), instance, str);
2083 2084 return (0);
2084 2085 }
2085 2086
2086 2087
2087 2088 static void
2088 2089 _sv_lyr_strategy(struct buf *bp)
2089 2090 {
2090 2091 caddr_t buf_addr; /* pointer to linear buffer in bp */
2091 2092 nsc_buf_t *bufh = NULL;
2092 2093 nsc_buf_t *hndl = NULL;
2093 2094 sv_dev_t *svp;
2094 2095 nsc_vec_t *v;
2095 2096 sv_maj_t *maj;
2096 2097 nsc_size_t fba_req, fba_len; /* FBA lengths */
2097 2098 nsc_off_t fba_off; /* FBA offset */
2098 2099 size_t tocopy, nbytes; /* byte lengths */
2099 2100 int rw, rc; /* flags and return codes */
2100 2101 int (*fn)();
2101 2102
2102 2103 rc = 0;
2103 2104
2104 2105 if (sv_debug > 5)
2105 2106 cmn_err(CE_CONT, "!_sv_lyr_strategy(%p)\n", (void *)bp);
2106 2107
2107 2108 svp = sv_find_enabled(bp->b_edev, &maj);
2108 2109 if (svp == NULL) {
2109 2110 if (maj && (fn = maj->sm_strategy) != 0) {
2110 2111 if (!(maj->sm_flag & D_MP)) {
2111 2112 UNSAFE_ENTER();
2112 2113 rc = (*fn)(bp);
2113 2114 UNSAFE_EXIT();
2114 2115 } else {
2115 2116 rc = (*fn)(bp);
2116 2117 }
2117 2118 return;
2118 2119 } else {
2119 2120 bioerror(bp, ENODEV);
2120 2121 biodone(bp);
2121 2122 return;
2122 2123 }
2123 2124 }
2124 2125
2125 2126 ASSERT(RW_READ_HELD(&svp->sv_lock));
2126 2127
2127 2128 if (svp->sv_flag == 0) {
2128 2129 /*
2129 2130 * guard access mode
2130 2131 * - prevent user level access to the device
2131 2132 */
2132 2133 DTRACE_PROBE1(sv_lyr_strategy_err_guard, struct buf *, bp);
2133 2134 bioerror(bp, EPERM);
2134 2135 goto out;
2135 2136 }
2136 2137
2137 2138 if ((rc = sv_reserve(svp->sv_fd, NSC_MULTI|NSC_PCATCH)) != 0) {
2138 2139 DTRACE_PROBE1(sv_lyr_strategy_err_rsrv, struct buf *, bp);
2139 2140
2140 2141 if (rc == EINTR)
2141 2142 cmn_err(CE_WARN, "!nsc_reserve() returned EINTR");
2142 2143 bioerror(bp, rc);
2143 2144 goto out;
2144 2145 }
2145 2146
2146 2147 if (bp->b_lblkno >= (diskaddr_t)svp->sv_nblocks) {
2147 2148 DTRACE_PROBE1(sv_lyr_strategy_eof, struct buf *, bp);
2148 2149
2149 2150 if (bp->b_flags & B_READ) {
2150 2151 /* return EOF, not an error */
2151 2152 bp->b_resid = bp->b_bcount;
2152 2153 bioerror(bp, 0);
2153 2154 } else
2154 2155 bioerror(bp, EINVAL);
2155 2156
2156 2157 goto done;
2157 2158 }
2158 2159
2159 2160 /*
2160 2161 * Preallocate a handle once per call to strategy.
2161 2162 * If this fails, then the nsc_alloc_buf() will allocate
2162 2163 * a temporary handle per allocation/free pair.
2163 2164 */
2164 2165
2165 2166 DTRACE_PROBE1(sv_dbg_alloch_start, sv_dev_t *, svp);
2166 2167
2167 2168 bufh = nsc_alloc_handle(svp->sv_fd, NULL, NULL, NULL);
2168 2169
2169 2170 DTRACE_PROBE1(sv_dbg_alloch_end, sv_dev_t *, svp);
2170 2171
2171 2172 if (bufh && (bufh->sb_flag & NSC_HACTIVE) != 0) {
2172 2173 DTRACE_PROBE1(sv_lyr_strategy_err_hactive, struct buf *, bp);
2173 2174
2174 2175 cmn_err(CE_WARN,
2175 2176 "!sv: allocated active handle (bufh %p, flags %x)",
2176 2177 (void *)bufh, bufh->sb_flag);
2177 2178
2178 2179 bioerror(bp, ENXIO);
2179 2180 goto done;
2180 2181 }
2181 2182
2182 2183 fba_req = FBA_LEN(bp->b_bcount);
2183 2184 if (fba_req + bp->b_lblkno > (diskaddr_t)svp->sv_nblocks)
2184 2185 fba_req = (nsc_size_t)(svp->sv_nblocks - bp->b_lblkno);
2185 2186
2186 2187 rw = (bp->b_flags & B_READ) ? NSC_READ : NSC_WRITE;
2187 2188
2188 2189 bp_mapin(bp);
2189 2190
2190 2191 bp->b_resid = bp->b_bcount;
2191 2192 buf_addr = bp->b_un.b_addr;
2192 2193 fba_off = 0;
2193 2194
2194 2195 /*
2195 2196 * fba_req - requested size of transfer in FBAs after
2196 2197 * truncation to device extent, and allowing for
2197 2198 * possible non-FBA bounded final chunk.
2198 2199 * fba_off - offset of start of chunk from start of bp in FBAs.
2199 2200 * fba_len - size of this chunk in FBAs.
2200 2201 */
2201 2202
2202 2203 loop:
2203 2204 fba_len = min(fba_req, svp->sv_maxfbas);
2204 2205 hndl = bufh;
2205 2206
2206 2207 DTRACE_PROBE4(sv_dbg_allocb_start,
2207 2208 sv_dev_t *, svp,
2208 2209 uint64_t, (uint64_t)(bp->b_lblkno + fba_off),
2209 2210 uint64_t, (uint64_t)fba_len,
2210 2211 int, rw);
2211 2212
2212 2213 rc = nsc_alloc_buf(svp->sv_fd, (nsc_off_t)(bp->b_lblkno + fba_off),
2213 2214 fba_len, rw, &hndl);
2214 2215
2215 2216 DTRACE_PROBE1(sv_dbg_allocb_end, sv_dev_t *, svp);
2216 2217
2217 2218 if (rc > 0) {
2218 2219 DTRACE_PROBE1(sv_lyr_strategy_err_alloc, struct buf *, bp);
2219 2220 bioerror(bp, rc);
2220 2221 if (hndl != bufh)
2221 2222 (void) nsc_free_buf(hndl);
2222 2223 hndl = NULL;
2223 2224 goto done;
2224 2225 }
2225 2226
2226 2227 tocopy = min(FBA_SIZE(fba_len), bp->b_resid);
2227 2228 v = hndl->sb_vec;
2228 2229
2229 2230 if (rw == NSC_WRITE && FBA_OFF(tocopy) != 0) {
2230 2231 /*
2231 2232 * Not overwriting all of the last FBA, so read in the
2232 2233 * old contents now before we overwrite it with the new
2233 2234 * data.
2234 2235 */
2235 2236
2236 2237 DTRACE_PROBE2(sv_dbg_read_start, sv_dev_t *, svp,
2237 2238 uint64_t, (uint64_t)(hndl->sb_pos + hndl->sb_len - 1));
2238 2239
2239 2240 rc = nsc_read(hndl, (hndl->sb_pos + hndl->sb_len - 1), 1, 0);
2240 2241 if (rc > 0) {
2241 2242 bioerror(bp, rc);
2242 2243 goto done;
2243 2244 }
2244 2245
2245 2246 DTRACE_PROBE1(sv_dbg_read_end, sv_dev_t *, svp);
2246 2247 }
2247 2248
2248 2249 DTRACE_PROBE1(sv_dbg_bcopy_start, sv_dev_t *, svp);
2249 2250
2250 2251 while (tocopy > 0) {
2251 2252 nbytes = min(tocopy, (nsc_size_t)v->sv_len);
2252 2253
2253 2254 if (bp->b_flags & B_READ)
2254 2255 (void) bcopy(v->sv_addr, buf_addr, nbytes);
2255 2256 else
2256 2257 (void) bcopy(buf_addr, v->sv_addr, nbytes);
2257 2258
2258 2259 bp->b_resid -= nbytes;
2259 2260 buf_addr += nbytes;
2260 2261 tocopy -= nbytes;
2261 2262 v++;
2262 2263 }
2263 2264
2264 2265 DTRACE_PROBE1(sv_dbg_bcopy_end, sv_dev_t *, svp);
2265 2266
2266 2267 if ((bp->b_flags & B_READ) == 0) {
2267 2268 DTRACE_PROBE3(sv_dbg_write_start, sv_dev_t *, svp,
2268 2269 uint64_t, (uint64_t)hndl->sb_pos,
2269 2270 uint64_t, (uint64_t)hndl->sb_len);
2270 2271
2271 2272 rc = nsc_write(hndl, hndl->sb_pos, hndl->sb_len, 0);
2272 2273
2273 2274 DTRACE_PROBE1(sv_dbg_write_end, sv_dev_t *, svp);
2274 2275
2275 2276 if (rc > 0) {
2276 2277 bioerror(bp, rc);
2277 2278 goto done;
2278 2279 }
2279 2280 }
2280 2281
2281 2282 /*
2282 2283 * Adjust FBA offset and requested (ie. remaining) length,
2283 2284 * loop if more data to transfer.
2284 2285 */
2285 2286
2286 2287 fba_off += fba_len;
2287 2288 fba_req -= fba_len;
2288 2289
2289 2290 if (fba_req > 0) {
2290 2291 DTRACE_PROBE1(sv_dbg_freeb_start, sv_dev_t *, svp);
2291 2292
2292 2293 rc = nsc_free_buf(hndl);
2293 2294
2294 2295 DTRACE_PROBE1(sv_dbg_freeb_end, sv_dev_t *, svp);
2295 2296
2296 2297 if (rc > 0) {
2297 2298 DTRACE_PROBE1(sv_lyr_strategy_err_free,
2298 2299 struct buf *, bp);
2299 2300 bioerror(bp, rc);
2300 2301 }
2301 2302
2302 2303 hndl = NULL;
2303 2304
2304 2305 if (rc <= 0)
2305 2306 goto loop;
2306 2307 }
2307 2308
2308 2309 done:
2309 2310 if (hndl != NULL) {
2310 2311 DTRACE_PROBE1(sv_dbg_freeb_start, sv_dev_t *, svp);
2311 2312
2312 2313 rc = nsc_free_buf(hndl);
2313 2314
2314 2315 DTRACE_PROBE1(sv_dbg_freeb_end, sv_dev_t *, svp);
2315 2316
2316 2317 if (rc > 0) {
2317 2318 DTRACE_PROBE1(sv_lyr_strategy_err_free,
2318 2319 struct buf *, bp);
2319 2320 bioerror(bp, rc);
2320 2321 }
2321 2322
2322 2323 hndl = NULL;
2323 2324 }
2324 2325
2325 2326 if (bufh)
2326 2327 (void) nsc_free_handle(bufh);
2327 2328
2328 2329 DTRACE_PROBE1(sv_dbg_rlse_start, sv_dev_t *, svp);
2329 2330
2330 2331 nsc_release(svp->sv_fd);
2331 2332
2332 2333 DTRACE_PROBE1(sv_dbg_rlse_end, sv_dev_t *, svp);
2333 2334
2334 2335 out:
2335 2336 if (sv_debug > 5) {
2336 2337 cmn_err(CE_CONT,
2337 2338 "!_sv_lyr_strategy: bp %p, bufh %p, bp->b_error %d\n",
2338 2339 (void *)bp, (void *)bufh, bp->b_error);
2339 2340 }
2340 2341
2341 2342 DTRACE_PROBE2(sv_lyr_strategy_end, struct buf *, bp, int, bp->b_error);
2342 2343
2343 2344 rw_exit(&svp->sv_lock);
2344 2345 biodone(bp);
2345 2346 }
2346 2347
2347 2348
2348 2349 static void
2349 2350 sv_async_strategy(blind_t arg)
2350 2351 {
2351 2352 struct buf *bp = (struct buf *)arg;
2352 2353 _sv_lyr_strategy(bp);
2353 2354 }
2354 2355
2355 2356
2356 2357 static int
2357 2358 sv_lyr_strategy(struct buf *bp)
2358 2359 {
2359 2360 nsthread_t *tp;
2360 2361 int nlive;
2361 2362
2362 2363 /*
2363 2364 * If B_ASYNC was part of the DDI we could use it as a hint to
2364 2365 * not create a thread for synchronous i/o.
2365 2366 */
2366 2367 if (sv_dev_to_sv(bp->b_edev, NULL) == NULL) {
2367 2368 /* not sv enabled - just pass through */
2368 2369 DTRACE_PROBE1(sv_lyr_strategy_notsv, struct buf *, bp);
2369 2370 _sv_lyr_strategy(bp);
2370 2371 return (0);
2371 2372 }
2372 2373
2373 2374 if (sv_debug > 4) {
2374 2375 cmn_err(CE_CONT, "!sv_lyr_strategy: nthread %d nlive %d\n",
2375 2376 nst_nthread(sv_tset), nst_nlive(sv_tset));
2376 2377 }
2377 2378
2378 2379 /*
2379 2380 * If there are only guard devices enabled there
2380 2381 * won't be a threadset, so don't try and use it.
2381 2382 */
2382 2383 tp = NULL;
2383 2384 if (sv_tset != NULL) {
2384 2385 tp = nst_create(sv_tset, sv_async_strategy, (blind_t)bp, 0);
2385 2386 }
2386 2387
2387 2388 if (tp == NULL) {
2388 2389 /*
2389 2390 * out of threads, so fall back to synchronous io.
2390 2391 */
2391 2392 if (sv_debug > 0) {
2392 2393 cmn_err(CE_CONT,
2393 2394 "!sv_lyr_strategy: thread alloc failed\n");
2394 2395 }
2395 2396
2396 2397 DTRACE_PROBE1(sv_lyr_strategy_no_thread,
2397 2398 struct buf *, bp);
2398 2399
2399 2400 _sv_lyr_strategy(bp);
2400 2401 sv_no_threads++;
2401 2402 } else {
2402 2403 nlive = nst_nlive(sv_tset);
2403 2404 if (nlive > sv_max_nlive) {
2404 2405 if (sv_debug > 0) {
2405 2406 cmn_err(CE_CONT,
2406 2407 "!sv_lyr_strategy: "
2407 2408 "new max nlive %d (nthread %d)\n",
↓ open down ↓ |
2321 lines elided |
↑ open up ↑ |
2408 2409 nlive, nst_nthread(sv_tset));
2409 2410 }
2410 2411
2411 2412 sv_max_nlive = nlive;
2412 2413 }
2413 2414 }
2414 2415
2415 2416 return (0);
2416 2417 }
2417 2418
2418 -
2419 -#ifndef offsetof
2420 -#define offsetof(s, m) ((size_t)(&((s *)0)->m))
2421 -#endif
2422 -
2423 2419 /*
2424 2420 * re-write the size of the current partition
2425 2421 */
2426 2422 static int
2427 2423 sv_fix_dkiocgvtoc(const intptr_t arg, const int mode, sv_dev_t *svp)
2428 2424 {
2429 2425 size_t offset;
2430 2426 int ilp32;
2431 2427 int pnum;
2432 2428 int rc;
2433 2429
2434 2430 ilp32 = (ddi_model_convert_from((mode & FMODELS)) == DDI_MODEL_ILP32);
2435 2431
2436 2432 rc = nskern_partition(svp->sv_dev, &pnum);
2437 2433 if (rc != 0) {
2438 2434 return (rc);
2439 2435 }
2440 2436
2441 2437 if (pnum < 0 || pnum >= V_NUMPAR) {
2442 2438 cmn_err(CE_WARN,
2443 2439 "!sv_gvtoc: unable to determine partition number "
2444 2440 "for dev %lx", svp->sv_dev);
2445 2441 return (EINVAL);
2446 2442 }
2447 2443
2448 2444 if (ilp32) {
2449 2445 int32_t p_size;
2450 2446
2451 2447 #ifdef _SunOS_5_6
2452 2448 offset = offsetof(struct vtoc, v_part);
2453 2449 offset += sizeof (struct partition) * pnum;
2454 2450 offset += offsetof(struct partition, p_size);
2455 2451 #else
2456 2452 offset = offsetof(struct vtoc32, v_part);
2457 2453 offset += sizeof (struct partition32) * pnum;
2458 2454 offset += offsetof(struct partition32, p_size);
2459 2455 #endif
2460 2456
2461 2457 p_size = (int32_t)svp->sv_nblocks;
2462 2458 if (p_size == 0) {
2463 2459 if (sv_reserve(svp->sv_fd,
2464 2460 NSC_MULTI|NSC_PCATCH) == 0) {
2465 2461 p_size = (int32_t)svp->sv_nblocks;
2466 2462 nsc_release(svp->sv_fd);
2467 2463 } else {
2468 2464 rc = EINTR;
2469 2465 }
2470 2466 }
2471 2467
2472 2468 if ((rc == 0) && ddi_copyout(&p_size, (void *)(arg + offset),
2473 2469 sizeof (p_size), mode) != 0) {
2474 2470 rc = EFAULT;
2475 2471 }
2476 2472 } else {
2477 2473 long p_size;
2478 2474
2479 2475 offset = offsetof(struct vtoc, v_part);
2480 2476 offset += sizeof (struct partition) * pnum;
2481 2477 offset += offsetof(struct partition, p_size);
2482 2478
2483 2479 p_size = (long)svp->sv_nblocks;
2484 2480 if (p_size == 0) {
2485 2481 if (sv_reserve(svp->sv_fd,
2486 2482 NSC_MULTI|NSC_PCATCH) == 0) {
2487 2483 p_size = (long)svp->sv_nblocks;
2488 2484 nsc_release(svp->sv_fd);
2489 2485 } else {
2490 2486 rc = EINTR;
2491 2487 }
2492 2488 }
2493 2489
2494 2490 if ((rc == 0) && ddi_copyout(&p_size, (void *)(arg + offset),
2495 2491 sizeof (p_size), mode) != 0) {
2496 2492 rc = EFAULT;
2497 2493 }
2498 2494 }
2499 2495
2500 2496 return (rc);
2501 2497 }
2502 2498
2503 2499
2504 2500 #ifdef DKIOCPARTITION
2505 2501 /*
2506 2502 * re-write the size of the current partition
2507 2503 *
2508 2504 * arg is dk_efi_t.
2509 2505 *
2510 2506 * dk_efi_t->dki_data = (void *)(uintptr_t)efi.dki_data_64;
2511 2507 *
2512 2508 * dk_efi_t->dki_data --> efi_gpt_t (label header)
2513 2509 * dk_efi_t->dki_data + 1 --> efi_gpe_t[] (array of partitions)
2514 2510 *
2515 2511 * efi_gpt_t->efi_gpt_PartitionEntryArrayCRC32 --> CRC32 of array of parts
2516 2512 * efi_gpt_t->efi_gpt_HeaderCRC32 --> CRC32 of header itself
2517 2513 *
2518 2514 * This assumes that sizeof (efi_gpt_t) is the same as the size of a
2519 2515 * logical block on the disk.
2520 2516 *
2521 2517 * Everything is little endian (i.e. disk format).
2522 2518 */
2523 2519 static int
2524 2520 sv_fix_dkiocgetefi(const intptr_t arg, const int mode, sv_dev_t *svp)
2525 2521 {
2526 2522 dk_efi_t efi;
2527 2523 efi_gpt_t gpt;
2528 2524 efi_gpe_t *gpe = NULL;
2529 2525 size_t sgpe;
2530 2526 uint64_t p_size; /* virtual partition size from nsctl */
2531 2527 uint32_t crc;
2532 2528 int unparts; /* number of parts in user's array */
2533 2529 int pnum;
2534 2530 int rc;
2535 2531
2536 2532 rc = nskern_partition(svp->sv_dev, &pnum);
2537 2533 if (rc != 0) {
2538 2534 return (rc);
2539 2535 }
2540 2536
2541 2537 if (pnum < 0) {
2542 2538 cmn_err(CE_WARN,
2543 2539 "!sv_efi: unable to determine partition number for dev %lx",
2544 2540 svp->sv_dev);
2545 2541 return (EINVAL);
2546 2542 }
2547 2543
2548 2544 if (ddi_copyin((void *)arg, &efi, sizeof (efi), mode)) {
2549 2545 return (EFAULT);
2550 2546 }
2551 2547
2552 2548 efi.dki_data = (void *)(uintptr_t)efi.dki_data_64;
2553 2549
2554 2550 if (efi.dki_length < sizeof (gpt) + sizeof (gpe)) {
2555 2551 return (EINVAL);
2556 2552 }
2557 2553
2558 2554 if (ddi_copyin((void *)efi.dki_data, &gpt, sizeof (gpt), mode)) {
2559 2555 rc = EFAULT;
2560 2556 goto out;
2561 2557 }
2562 2558
2563 2559 if ((unparts = LE_32(gpt.efi_gpt_NumberOfPartitionEntries)) == 0)
2564 2560 unparts = 1;
2565 2561 else if (pnum >= unparts) {
2566 2562 cmn_err(CE_WARN,
2567 2563 "!sv_efi: partition# beyond end of user array (%d >= %d)",
2568 2564 pnum, unparts);
2569 2565 return (EINVAL);
2570 2566 }
2571 2567
2572 2568 sgpe = sizeof (*gpe) * unparts;
2573 2569 gpe = kmem_alloc(sgpe, KM_SLEEP);
2574 2570
2575 2571 if (ddi_copyin((void *)(efi.dki_data + 1), gpe, sgpe, mode)) {
2576 2572 rc = EFAULT;
2577 2573 goto out;
2578 2574 }
2579 2575
2580 2576 p_size = svp->sv_nblocks;
2581 2577 if (p_size == 0) {
2582 2578 if (sv_reserve(svp->sv_fd, NSC_MULTI|NSC_PCATCH) == 0) {
2583 2579 p_size = (diskaddr_t)svp->sv_nblocks;
2584 2580 nsc_release(svp->sv_fd);
2585 2581 } else {
2586 2582 rc = EINTR;
2587 2583 }
2588 2584 }
2589 2585
2590 2586 gpe[pnum].efi_gpe_EndingLBA = LE_64(
2591 2587 LE_64(gpe[pnum].efi_gpe_StartingLBA) + p_size - 1);
2592 2588
2593 2589 gpt.efi_gpt_PartitionEntryArrayCRC32 = 0;
2594 2590 CRC32(crc, gpe, sgpe, -1U, sv_crc32_table);
2595 2591 gpt.efi_gpt_PartitionEntryArrayCRC32 = LE_32(~crc);
2596 2592
2597 2593 gpt.efi_gpt_HeaderCRC32 = 0;
2598 2594 CRC32(crc, &gpt, sizeof (gpt), -1U, sv_crc32_table);
2599 2595 gpt.efi_gpt_HeaderCRC32 = LE_32(~crc);
2600 2596
2601 2597 if ((rc == 0) && ddi_copyout(&gpt, efi.dki_data, sizeof (gpt), mode)) {
2602 2598 rc = EFAULT;
2603 2599 goto out;
2604 2600 }
2605 2601
2606 2602 if ((rc == 0) && ddi_copyout(gpe, efi.dki_data + 1, sgpe, mode)) {
2607 2603 rc = EFAULT;
2608 2604 goto out;
2609 2605 }
2610 2606
2611 2607 out:
2612 2608 if (gpe) {
2613 2609 kmem_free(gpe, sgpe);
2614 2610 }
2615 2611
2616 2612 return (rc);
2617 2613 }
2618 2614
2619 2615
2620 2616 /*
2621 2617 * Re-write the size of the partition specified by p_partno
2622 2618 *
2623 2619 * Note that if a DKIOCPARTITION is issued to an fd opened against a
2624 2620 * non-sv'd device, but p_partno requests the size for a different
2625 2621 * device that is sv'd, this function will *not* be called as sv is
2626 2622 * not interposed on the original device (the fd).
2627 2623 *
2628 2624 * It would not be easy to change this as we cannot get the partition
2629 2625 * number for the non-sv'd device, so cannot compute the dev_t of the
2630 2626 * (sv'd) p_partno device, and so cannot find out if it is sv'd or get
2631 2627 * its size from nsctl.
2632 2628 *
2633 2629 * See also the "Bug 4755783" comment in sv_lyr_ioctl().
2634 2630 */
2635 2631 static int
2636 2632 sv_fix_dkiocpartition(const intptr_t arg, const int mode, sv_dev_t *svp)
2637 2633 {
2638 2634 struct partition64 p64;
2639 2635 sv_dev_t *nsvp = NULL;
2640 2636 diskaddr_t p_size;
2641 2637 minor_t nminor;
2642 2638 int pnum, rc;
2643 2639 dev_t ndev;
2644 2640
2645 2641 rc = nskern_partition(svp->sv_dev, &pnum);
2646 2642 if (rc != 0) {
2647 2643 return (rc);
2648 2644 }
2649 2645
2650 2646 if (ddi_copyin((void *)arg, &p64, sizeof (p64), mode)) {
2651 2647 return (EFAULT);
2652 2648 }
2653 2649
2654 2650 if (p64.p_partno != pnum) {
2655 2651 /* switch to requested partition, not the current one */
2656 2652 nminor = getminor(svp->sv_dev) + (p64.p_partno - pnum);
2657 2653 ndev = makedevice(getmajor(svp->sv_dev), nminor);
2658 2654 nsvp = sv_find_enabled(ndev, NULL);
2659 2655 if (nsvp == NULL) {
2660 2656 /* not sv device - just return */
2661 2657 return (0);
2662 2658 }
2663 2659
2664 2660 svp = nsvp;
2665 2661 }
2666 2662
2667 2663 p_size = svp->sv_nblocks;
2668 2664 if (p_size == 0) {
2669 2665 if (sv_reserve(svp->sv_fd, NSC_MULTI|NSC_PCATCH) == 0) {
2670 2666 p_size = (diskaddr_t)svp->sv_nblocks;
2671 2667 nsc_release(svp->sv_fd);
2672 2668 } else {
2673 2669 rc = EINTR;
2674 2670 }
2675 2671 }
2676 2672
2677 2673 if (nsvp != NULL) {
2678 2674 rw_exit(&nsvp->sv_lock);
2679 2675 }
2680 2676
2681 2677 if ((rc == 0) && ddi_copyout(&p_size,
2682 2678 (void *)(arg + offsetof(struct partition64, p_size)),
2683 2679 sizeof (p_size), mode) != 0) {
2684 2680 return (EFAULT);
2685 2681 }
2686 2682
2687 2683 return (rc);
2688 2684 }
2689 2685 #endif /* DKIOCPARTITION */
2690 2686
2691 2687
2692 2688 static int
2693 2689 sv_lyr_ioctl(const dev_t dev, const int cmd, const intptr_t arg,
2694 2690 const int mode, cred_t *crp, int *rvalp)
2695 2691 {
2696 2692 sv_dev_t *svp;
2697 2693 sv_maj_t *maj;
2698 2694 int (*fn)();
2699 2695 int rc = 0;
2700 2696
2701 2697 maj = 0;
2702 2698 fn = 0;
2703 2699
2704 2700 /*
2705 2701 * If sv_mod_status is 0 or SV_PREVENT_UNLOAD, then it will continue.
2706 2702 * else it means it previously was SV_PREVENT_UNLOAD, and now it's
2707 2703 * SV_ALLOW_UNLOAD, expecting the driver to eventually unload.
2708 2704 *
2709 2705 * SV_ALLOW_UNLOAD is final state, so no need to grab sv_mutex.
2710 2706 */
2711 2707 if (sv_mod_status == SV_ALLOW_UNLOAD) {
2712 2708 return (EBUSY);
2713 2709 }
2714 2710
2715 2711 svp = sv_find_enabled(dev, &maj);
2716 2712 if (svp != NULL) {
2717 2713 if (nskernd_isdaemon()) {
2718 2714 /*
2719 2715 * This is nskernd which always needs to see
2720 2716 * the underlying disk device accurately.
2721 2717 *
2722 2718 * So just pass the ioctl straight through
2723 2719 * to the underlying driver as though the device
2724 2720 * was not sv enabled.
2725 2721 */
2726 2722 DTRACE_PROBE2(sv_lyr_ioctl_nskernd, sv_dev_t *, svp,
2727 2723 dev_t, dev);
2728 2724
2729 2725 rw_exit(&svp->sv_lock);
2730 2726 svp = NULL;
2731 2727 } else {
2732 2728 ASSERT(RW_READ_HELD(&svp->sv_lock));
2733 2729 }
2734 2730 }
2735 2731
2736 2732 /*
2737 2733 * We now have a locked and enabled SV device, or a non-SV device.
2738 2734 */
2739 2735
2740 2736 switch (cmd) {
2741 2737 /*
2742 2738 * DKIOCGVTOC, DKIOCSVTOC, DKIOCPARTITION, DKIOCGETEFI
2743 2739 * and DKIOCSETEFI are intercepted and faked up as some
2744 2740 * i/o providers emulate volumes of a different size to
2745 2741 * the underlying volume.
2746 2742 *
2747 2743 * Setting the size by rewriting the vtoc is not permitted.
2748 2744 */
2749 2745
2750 2746 case DKIOCSVTOC:
2751 2747 #ifdef DKIOCPARTITION
2752 2748 case DKIOCSETEFI:
2753 2749 #endif
2754 2750 if (svp == NULL) {
2755 2751 /* not intercepted -- allow ioctl through */
2756 2752 break;
2757 2753 }
2758 2754
2759 2755 rw_exit(&svp->sv_lock);
2760 2756
2761 2757 DTRACE_PROBE2(sv_lyr_ioctl_svtoc, dev_t, dev, int, EPERM);
2762 2758
2763 2759 return (EPERM);
2764 2760
2765 2761 default:
2766 2762 break;
2767 2763 }
2768 2764
2769 2765 /*
2770 2766 * Pass through the real ioctl command.
2771 2767 */
2772 2768
2773 2769 if (maj && (fn = maj->sm_ioctl) != 0) {
2774 2770 if (!(maj->sm_flag & D_MP)) {
2775 2771 UNSAFE_ENTER();
2776 2772 rc = (*fn)(dev, cmd, arg, mode, crp, rvalp);
2777 2773 UNSAFE_EXIT();
2778 2774 } else {
2779 2775 rc = (*fn)(dev, cmd, arg, mode, crp, rvalp);
2780 2776 }
2781 2777 } else {
2782 2778 rc = ENODEV;
2783 2779 }
2784 2780
2785 2781 /*
2786 2782 * Bug 4755783
2787 2783 * Fix up the size of the current partition to allow
2788 2784 * for the virtual volume to be a different size to the
2789 2785 * physical volume (e.g. for II compact dependent shadows).
2790 2786 *
2791 2787 * Note that this only attempts to fix up the current partition
2792 2788 * - the one that the ioctl was issued against. There could be
2793 2789 * other sv'd partitions in the same vtoc, but we cannot tell
2794 2790 * so we don't attempt to fix them up.
2795 2791 */
2796 2792
2797 2793 if (svp != NULL && rc == 0) {
2798 2794 switch (cmd) {
2799 2795 case DKIOCGVTOC:
2800 2796 rc = sv_fix_dkiocgvtoc(arg, mode, svp);
2801 2797 break;
2802 2798
2803 2799 #ifdef DKIOCPARTITION
2804 2800 case DKIOCGETEFI:
2805 2801 rc = sv_fix_dkiocgetefi(arg, mode, svp);
2806 2802 break;
2807 2803
2808 2804 case DKIOCPARTITION:
2809 2805 rc = sv_fix_dkiocpartition(arg, mode, svp);
2810 2806 break;
2811 2807 #endif /* DKIOCPARTITION */
2812 2808 }
2813 2809 }
2814 2810
2815 2811 if (svp != NULL) {
2816 2812 rw_exit(&svp->sv_lock);
2817 2813 }
2818 2814
2819 2815 return (rc);
2820 2816 }
↓ open down ↓ |
388 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX