Print this page
7127 remove -Wno-missing-braces from Makefile.uts
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/uts/common/avs/ns/rdc/rdc_dev.c
+++ new/usr/src/uts/common/avs/ns/rdc/rdc_dev.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21 /*
22 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
23 23 * Use is subject to license terms.
24 24 */
25 25
26 26
27 27 #include <sys/types.h>
28 28 #include <sys/ksynch.h>
29 29 #include <sys/kmem.h>
30 30 #include <sys/errno.h>
31 31 #include <sys/cmn_err.h>
32 32 #include <sys/debug.h>
33 33 #include <sys/cred.h>
34 34 #include <sys/file.h>
35 35 #include <sys/ddi.h>
36 36 #include <sys/nsc_thread.h>
37 37 #include <sys/unistat/spcs_s.h>
38 38 #include <sys/unistat/spcs_errors.h>
39 39
40 40 #include <sys/unistat/spcs_s_k.h>
41 41 #ifdef DS_DDICT
42 42 #include "../contract.h"
43 43 #endif
44 44
45 45 #include <sys/nsctl/nsctl.h>
46 46
47 47 #include <sys/sdt.h> /* dtrace is S10 or later */
48 48
49 49 #include "rdc.h"
50 50 #include "rdc_io.h"
51 51 #include "rdc_bitmap.h"
52 52
53 53 /*
54 54 * Remote Dual Copy
55 55 *
56 56 * This file contains the nsctl io provider functionality for RDC.
57 57 *
58 58 * RDC is implemented as a simple filter module that pushes itself between
59 59 * user (SIMCKD, STE, etc.) and SDBC.
60 60 */
61 61
62 62
63 63 static int _rdc_open_count;
64 64 int rdc_eio_nobmp = 0;
65 65
66 66 nsc_io_t *_rdc_io_hc;
67 67 static nsc_io_t *_rdc_io_hr;
68 68 static nsc_def_t _rdc_fd_def[], _rdc_io_def[], _rdc_ior_def[];
69 69
70 70 void _rdc_deinit_dev();
71 71 int rdc_diskq_enqueue(rdc_k_info_t *, rdc_aio_t *);
72 72 extern void rdc_unintercept_diskq(rdc_group_t *);
73 73 rdc_aio_t *rdc_aio_tbuf_get(void *, void *, int, int, int, int, int);
74 74
75 75 static nsc_buf_t *_rdc_alloc_handle(void (*)(), void (*)(),
76 76 void (*)(), rdc_fd_t *);
77 77 static int _rdc_free_handle(rdc_buf_t *, rdc_fd_t *);
78 78
79 79 #ifdef DEBUG
80 80 int rdc_overlap_cnt;
81 81 int rdc_overlap_hnd_cnt;
82 82 #endif
83 83
84 84 static rdc_info_dev_t *rdc_devices;
85 85
86 86 extern int _rdc_rsrv_diskq(rdc_group_t *group);
87 87 extern void _rdc_rlse_diskq(rdc_group_t *group);
88 88
89 89 /*
90 90 * _rdc_init_dev
91 91 * Initialise the io provider.
92 92 */
93 93
94 94 int
95 95 _rdc_init_dev()
96 96 {
97 97 _rdc_io_hc = nsc_register_io("rdc-high-cache",
98 98 NSC_RDCH_ID|NSC_REFCNT|NSC_FILTER, _rdc_io_def);
99 99 if (_rdc_io_hc == NULL)
100 100 cmn_err(CE_WARN, "!rdc: nsc_register_io (high, cache) failed.");
101 101
102 102 _rdc_io_hr = nsc_register_io("rdc-high-raw",
103 103 NSC_RDCHR_ID|NSC_REFCNT|NSC_FILTER, _rdc_ior_def);
104 104 if (_rdc_io_hr == NULL)
105 105 cmn_err(CE_WARN, "!rdc: nsc_register_io (high, raw) failed.");
106 106
107 107 if (!_rdc_io_hc || !_rdc_io_hr) {
108 108 _rdc_deinit_dev();
109 109 return (ENOMEM);
110 110 }
111 111
112 112 return (0);
113 113 }
114 114
115 115
116 116 /*
117 117 * _rdc_deinit_dev
118 118 * De-initialise the io provider.
119 119 *
120 120 */
121 121
122 122 void
123 123 _rdc_deinit_dev()
124 124 {
125 125 int rc;
126 126
127 127 if (_rdc_io_hc) {
128 128 if ((rc = nsc_unregister_io(_rdc_io_hc, 0)) != 0)
129 129 cmn_err(CE_WARN,
130 130 "!rdc: nsc_unregister_io (high, cache) failed: %d",
131 131 rc);
132 132 }
133 133
134 134 if (_rdc_io_hr) {
135 135 if ((rc = nsc_unregister_io(_rdc_io_hr, 0)) != 0)
136 136 cmn_err(CE_WARN,
137 137 "!rdc: nsc_unregister_io (high, raw) failed: %d",
138 138 rc);
139 139 }
140 140 }
141 141
142 142
143 143 /*
144 144 * rdc_idev_open
145 145 * - Open the nsctl file descriptors for the data devices.
146 146 *
147 147 * Must be called with rdc_conf_lock held.
148 148 * id_sets is protected by rdc_conf_lock.
149 149 */
150 150 static rdc_info_dev_t *
151 151 rdc_idev_open(rdc_k_info_t *krdc, char *pathname, int *rc)
152 152 {
153 153 rdc_info_dev_t *dp;
154 154
155 155 ASSERT(MUTEX_HELD(&rdc_conf_lock));
156 156
157 157 for (dp = rdc_devices; dp; dp = dp->id_next) {
158 158 if (dp->id_cache_dev.bi_fd &&
159 159 strcmp(pathname, nsc_pathname(dp->id_cache_dev.bi_fd)) == 0)
160 160 break;
161 161 }
162 162
163 163 if (!dp) {
164 164 dp = kmem_zalloc(sizeof (*dp), KM_SLEEP);
165 165 if (!dp)
166 166 return (NULL);
167 167
168 168 dp->id_cache_dev.bi_krdc = krdc;
169 169 dp->id_cache_dev.bi_fd = nsc_open(pathname,
170 170 NSC_RDCHR_ID|NSC_RDWR|NSC_DEVICE,
171 171 _rdc_fd_def, (blind_t)&dp->id_cache_dev, rc);
172 172 if (!dp->id_cache_dev.bi_fd) {
173 173 kmem_free(dp, sizeof (*dp));
174 174 return (NULL);
175 175 }
176 176
177 177 dp->id_raw_dev.bi_krdc = krdc;
178 178 dp->id_raw_dev.bi_fd = nsc_open(pathname,
179 179 NSC_RDCHR_ID|NSC_RDWR|NSC_DEVICE,
180 180 _rdc_fd_def, (blind_t)&dp->id_raw_dev, rc);
181 181 if (!dp->id_raw_dev.bi_fd) {
182 182 (void) nsc_close(dp->id_cache_dev.bi_fd);
183 183 kmem_free(dp, sizeof (*dp));
184 184 return (NULL);
185 185 }
186 186
187 187 mutex_init(&dp->id_rlock, NULL, MUTEX_DRIVER, NULL);
188 188 cv_init(&dp->id_rcv, NULL, CV_DRIVER, NULL);
189 189
190 190 dp->id_next = rdc_devices;
191 191 rdc_devices = dp;
192 192 }
193 193
194 194 dp->id_sets++;
195 195 return (dp);
196 196 }
197 197
198 198
199 199 /*
200 200 * rdc_idev_close
201 201 * - Close the nsctl file descriptors for the data devices.
202 202 *
203 203 * Must be called with rdc_conf_lock and dp->id_rlock held.
204 204 * Will release dp->id_rlock before returning.
205 205 *
206 206 * id_sets is protected by rdc_conf_lock.
207 207 */
208 208 static void
209 209 rdc_idev_close(rdc_k_info_t *krdc, rdc_info_dev_t *dp)
210 210 {
211 211 rdc_info_dev_t **dpp;
212 212 #ifdef DEBUG
213 213 int count = 0;
214 214 #endif
215 215
216 216 ASSERT(MUTEX_HELD(&rdc_conf_lock));
217 217 ASSERT(MUTEX_HELD(&dp->id_rlock));
218 218
219 219 dp->id_sets--;
220 220 if (dp->id_sets > 0) {
221 221 mutex_exit(&dp->id_rlock);
222 222 return;
223 223 }
224 224
225 225 /* external references must have gone */
226 226 ASSERT((krdc->c_ref + krdc->r_ref + krdc->b_ref) == 0);
227 227
228 228 /* unlink from chain */
229 229
230 230 for (dpp = &rdc_devices; *dpp; dpp = &((*dpp)->id_next)) {
231 231 if (*dpp == dp) {
232 232 /* unlink */
233 233 *dpp = dp->id_next;
234 234 break;
235 235 }
236 236 }
237 237
238 238 /*
239 239 * Wait for all reserves to go away - the rpc server is
240 240 * running asynchronously with this close, and so we
241 241 * have to wait for it to spot that the krdc is !IS_ENABLED()
242 242 * and throw away the nsc_buf_t's that it has allocated
243 243 * and release the device.
244 244 */
245 245
246 246 while (IS_CRSRV(krdc) || IS_RRSRV(krdc)) {
247 247 #ifdef DEBUG
248 248 if (!(++count % 16)) {
249 249 cmn_err(CE_NOTE,
250 250 "!_rdc_idev_close(%s): waiting for nsc_release",
251 251 rdc_u_info[krdc->index].primary.file);
252 252 }
253 253 if (count > (16*20)) {
254 254 /* waited for 20 seconds - too long - panic */
255 255 cmn_err(CE_PANIC,
256 256 "!_rdc_idev_close(%s, %p): lost nsc_release",
257 257 rdc_u_info[krdc->index].primary.file, (void *)krdc);
258 258 }
259 259 #endif
260 260 mutex_exit(&dp->id_rlock);
261 261 delay(HZ>>4);
262 262 mutex_enter(&dp->id_rlock);
263 263 }
264 264
265 265 if (dp->id_cache_dev.bi_fd) {
266 266 (void) nsc_close(dp->id_cache_dev.bi_fd);
267 267 dp->id_cache_dev.bi_fd = NULL;
268 268 }
269 269
270 270 if (dp->id_raw_dev.bi_fd) {
271 271 (void) nsc_close(dp->id_raw_dev.bi_fd);
272 272 dp->id_raw_dev.bi_fd = NULL;
273 273 }
274 274
275 275 mutex_exit(&dp->id_rlock);
276 276 mutex_destroy(&dp->id_rlock);
277 277 cv_destroy(&dp->id_rcv);
278 278
279 279 kmem_free(dp, sizeof (*dp));
280 280 }
281 281
282 282
283 283 /*
284 284 * This function provokes an nsc_reserve() for the device which
285 285 * if successful will populate krdc->maxfbas and urdc->volume_size
286 286 * via the _rdc_attach_fd() callback.
287 287 */
288 288 void
289 289 rdc_get_details(rdc_k_info_t *krdc)
290 290 {
291 291 int rc;
292 292 rdc_u_info_t *urdc = &rdc_u_info[krdc->index];
293 293 nsc_size_t vol_size, maxfbas;
294 294
295 295 if (_rdc_rsrv_devs(krdc, RDC_RAW, RDC_INTERNAL) == 0) {
296 296 /*
297 297 * if the vol is already reserved,
298 298 * volume_size won't be populated on enable because
299 299 * it is a *fake* reserve and does not make it to
300 300 * _rdc_attach_fd(). So do it here.
301 301 */
302 302 rc = nsc_partsize(RDC_U_FD(krdc), &vol_size);
303 303 if (rc != 0) {
304 304 #ifdef DEBUG
305 305 cmn_err(CE_WARN,
306 306 "!rdc_get_details: partsize failed (%d)", rc);
307 307 #endif /* DEBUG */
308 308 urdc->volume_size = vol_size = 0;
309 309 }
310 310
311 311 urdc->volume_size = vol_size;
312 312 rc = nsc_maxfbas(RDC_U_FD(krdc), 0, &maxfbas);
313 313 if (rc != 0) {
314 314 #ifdef DEBUG
315 315 cmn_err(CE_WARN,
316 316 "!rdc_get_details: maxfbas failed (%d)", rc);
317 317 #endif /* DEBUG */
318 318 maxfbas = 0;
319 319 }
320 320 krdc->maxfbas = min(RDC_MAX_MAXFBAS, maxfbas);
321 321
322 322 _rdc_rlse_devs(krdc, RDC_RAW);
323 323 }
324 324 }
325 325
326 326
327 327 /*
328 328 * Should only be used by the config code.
329 329 */
330 330
331 331 int
332 332 rdc_dev_open(rdc_set_t *rdc_set, int options)
333 333 {
334 334 rdc_k_info_t *krdc;
335 335 int index;
336 336 int rc;
337 337 char *pathname;
338 338
339 339 ASSERT(MUTEX_HELD(&rdc_conf_lock));
340 340
341 341 if (options & RDC_OPT_PRIMARY)
342 342 pathname = rdc_set->primary.file;
343 343 else
344 344 pathname = rdc_set->secondary.file;
345 345
346 346 for (index = 0; index < rdc_max_sets; index++) {
347 347 krdc = &rdc_k_info[index];
348 348
349 349 if (!IS_CONFIGURED(krdc))
350 350 break;
351 351 }
352 352
353 353 if (index == rdc_max_sets) {
354 354 #ifdef DEBUG
355 355 cmn_err(CE_WARN, "!rdc_dev_open: out of cd\'s");
356 356 #endif
357 357 index = -EINVAL;
358 358 goto out;
359 359 }
360 360
361 361 if (krdc->devices && (krdc->c_fd || krdc->r_fd)) {
362 362 #ifdef DEBUG
363 363 cmn_err(CE_WARN, "!rdc_dev_open: %s already open", pathname);
364 364 #endif
365 365 index = -EINVAL;
366 366 goto out;
367 367 }
368 368
369 369 _rdc_open_count++;
370 370
371 371 krdc->devices = rdc_idev_open(krdc, pathname, &rc);
372 372 if (!krdc->devices) {
373 373 index = -rc;
374 374 goto open_fail;
375 375 }
376 376
377 377 /*
378 378 * Grab the device size and maxfbas now.
379 379 */
380 380
381 381 rdc_get_details(krdc);
382 382
383 383 out:
384 384 return (index);
385 385
386 386 open_fail:
387 387 _rdc_open_count--;
388 388
389 389 return (index);
390 390 }
391 391
392 392
393 393 void
394 394 rdc_dev_close(rdc_k_info_t *krdc)
395 395 {
396 396 rdc_u_info_t *urdc = &rdc_u_info[krdc->index];
397 397
398 398 mutex_enter(&rdc_conf_lock);
399 399
400 400 if (krdc->devices)
401 401 mutex_enter(&krdc->devices->id_rlock);
402 402
403 403 #ifdef DEBUG
404 404 if (!krdc->devices || !krdc->c_fd || !krdc->r_fd) {
405 405 cmn_err(CE_WARN,
406 406 "!rdc_dev_close(%p): c_fd %p r_fd %p", (void *)krdc,
407 407 (void *) (krdc->devices ? krdc->c_fd : 0),
408 408 (void *) (krdc->devices ? krdc->r_fd : 0));
409 409 }
410 410 #endif
411 411
412 412 if (krdc->devices) {
413 413 /* rdc_idev_close will release id_rlock */
414 414 rdc_idev_close(krdc, krdc->devices);
415 415 krdc->devices = NULL;
416 416 }
417 417
418 418 urdc->primary.file[0] = '\0';
419 419
420 420 if (_rdc_open_count <= 0) {
421 421 cmn_err(CE_WARN, "!rdc: _rdc_open_count corrupt: %d",
422 422 _rdc_open_count);
423 423 }
424 424
425 425 _rdc_open_count--;
426 426
427 427 mutex_exit(&rdc_conf_lock);
428 428 }
429 429
430 430
431 431 /*
432 432 * rdc_intercept
433 433 *
434 434 * Register for IO on this device with nsctl.
435 435 *
436 436 * For a 1-to-many primary we register for each krdc and let nsctl sort
437 437 * out which it wants to be using. This means that we cannot tell which
438 438 * krdc will receive the incoming io from nsctl, though we do know that
439 439 * at any one time only one krdc will be 'attached' and so get io from
440 440 * nsctl.
441 441 *
442 442 * So the krdc->many_next pointer is maintained as a circular list. The
443 443 * result of these multiple nsc_register_paths is that we will see a
444 444 * few more attach and detach io provider calls during enable/resume
445 445 * and disable/suspend of the 1-to-many whilst nsctl settles down to
446 446 * using a single krdc.
447 447 *
448 448 * The major advantage of this scheme is that nsctl sorts out all the
449 449 * rdc_fd_t's so that they can only point to krdc's that are currently
450 450 * active.
451 451 */
452 452 int
453 453 rdc_intercept(rdc_k_info_t *krdc)
454 454 {
455 455 rdc_u_info_t *urdc = &rdc_u_info[krdc->index];
456 456 char *pathname;
457 457 char *bitmap;
458 458
459 459 if (rdc_get_vflags(urdc) & RDC_PRIMARY) {
460 460 pathname = urdc->primary.file;
461 461 bitmap = urdc->primary.bitmap;
462 462 } else {
463 463 pathname = urdc->secondary.file;
464 464 bitmap = urdc->secondary.bitmap;
465 465 }
466 466
467 467 if (!krdc->b_tok)
468 468 krdc->b_tok = nsc_register_path(bitmap, NSC_CACHE | NSC_DEVICE,
469 469 _rdc_io_hc);
470 470
471 471 if (!krdc->c_tok)
472 472 krdc->c_tok = nsc_register_path(pathname, NSC_CACHE,
473 473 _rdc_io_hc);
474 474
475 475 if (!krdc->r_tok)
476 476 krdc->r_tok = nsc_register_path(pathname, NSC_DEVICE,
477 477 _rdc_io_hr);
478 478
479 479 if (!krdc->c_tok || !krdc->r_tok) {
480 480 (void) rdc_unintercept(krdc);
481 481 return (ENXIO);
482 482 }
483 483
484 484 return (0);
485 485 }
486 486
487 487
488 488 static void
489 489 wait_unregistering(rdc_k_info_t *krdc)
490 490 {
491 491 while (krdc->group->unregistering > 0)
492 492 (void) cv_wait_sig(&krdc->group->unregistercv, &rdc_conf_lock);
493 493 }
494 494
495 495 static void
496 496 set_unregistering(rdc_k_info_t *krdc)
497 497 {
498 498 wait_unregistering(krdc);
499 499
500 500 krdc->group->unregistering++;
501 501 }
502 502
503 503 static void
504 504 wakeup_unregistering(rdc_k_info_t *krdc)
505 505 {
506 506 if (krdc->group->unregistering <= 0)
507 507 return;
508 508
509 509 krdc->group->unregistering--;
510 510 cv_broadcast(&krdc->group->unregistercv);
511 511 }
512 512
513 513
514 514 /*
515 515 * rdc_unintercept
516 516 *
517 517 * Unregister for IO on this device.
518 518 *
519 519 * See comments above rdc_intercept.
520 520 */
521 521 int
522 522 rdc_unintercept(rdc_k_info_t *krdc)
523 523 {
524 524 int err = 0;
525 525 int rc;
526 526 rdc_u_info_t *urdc = &rdc_u_info[krdc->index];
527 527
528 528 mutex_enter(&rdc_conf_lock);
529 529 set_unregistering(krdc);
530 530 krdc->type_flag |= RDC_UNREGISTER;
531 531 mutex_exit(&rdc_conf_lock);
532 532
533 533 if (krdc->r_tok) {
534 534 rc = nsc_unregister_path(krdc->r_tok, 0);
535 535 if (rc) {
536 536 cmn_err(CE_WARN, "!rdc: unregister rawfd %d", rc);
537 537 err = rc;
538 538 }
539 539 krdc->r_tok = NULL;
540 540 }
541 541
542 542 if (krdc->c_tok) {
543 543 rc = nsc_unregister_path(krdc->c_tok, 0);
544 544 if (rc) {
545 545 cmn_err(CE_WARN, "!rdc: unregister cachefd %d", rc);
546 546 if (!err)
547 547 err = rc;
548 548 }
549 549 krdc->c_tok = NULL;
550 550 }
551 551
552 552 if (krdc->b_tok) {
553 553 rc = nsc_unregister_path(krdc->b_tok, 0);
554 554 if (rc) {
555 555 cmn_err(CE_WARN, "!rdc: unregister bitmap %d", rc);
556 556 err = rc;
557 557 }
558 558 krdc->b_tok = NULL;
559 559 }
560 560
561 561 rdc_group_enter(krdc);
562 562
563 563 /* Wait for all necessary _rdc_close() calls to complete */
564 564 while ((krdc->c_ref + krdc->r_ref + krdc->b_ref) != 0) {
565 565 krdc->closing++;
566 566 cv_wait(&krdc->closingcv, &krdc->group->lock);
567 567 krdc->closing--;
568 568 }
569 569
570 570 rdc_clr_flags(urdc, RDC_ENABLED);
571 571 rdc_group_exit(krdc);
572 572
573 573
574 574 /*
575 575 * Check there are no outstanding writes in progress.
576 576 * This can happen when a set is being disabled which
577 577 * is one of the 'one_to_many' chain, that did not
578 578 * intercept the original write call.
579 579 */
580 580
581 581 for (;;) {
582 582 rdc_group_enter(krdc);
583 583 if (krdc->aux_state & RDC_AUXWRITE) {
584 584 rdc_group_exit(krdc);
585 585 /*
586 586 * This doesn't happen very often,
587 587 * just delay a bit and re-look.
588 588 */
589 589 delay(50);
590 590 } else {
591 591 rdc_group_exit(krdc);
592 592 break;
593 593 }
594 594 }
595 595
596 596 mutex_enter(&rdc_conf_lock);
597 597 krdc->type_flag &= ~RDC_UNREGISTER;
598 598 wakeup_unregistering(krdc);
599 599 mutex_exit(&rdc_conf_lock);
600 600
601 601 return (err);
602 602 }
603 603
604 604
605 605 /*
606 606 * _rdc_rlse_d
607 607 * Internal version of _rdc_rlse_devs(), only concerned with the
608 608 * data device, not the bitmap.
609 609 */
610 610
611 611 static void
612 612 _rdc_rlse_d(rdc_k_info_t *krdc, int devs)
613 613 {
614 614 _rdc_info_dev_t *cip;
615 615 _rdc_info_dev_t *rip;
616 616 int raw = (devs & RDC_RAW);
617 617
618 618 if (!krdc) {
619 619 cmn_err(CE_WARN, "!rdc: _rdc_rlse_devs null krdc");
620 620 return;
621 621 }
622 622
623 623 ASSERT((devs & (~RDC_BMP)) != 0);
624 624
625 625 cip = &krdc->devices->id_cache_dev;
626 626 rip = &krdc->devices->id_raw_dev;
627 627
628 628 if (IS_RSRV(cip)) {
629 629 /* decrement count */
630 630
631 631 if (raw) {
632 632 if (cip->bi_ofailed > 0) {
633 633 cip->bi_ofailed--;
634 634 } else if (cip->bi_orsrv > 0) {
635 635 cip->bi_orsrv--;
636 636 }
637 637 } else {
638 638 if (cip->bi_failed > 0) {
639 639 cip->bi_failed--;
640 640 } else if (cip->bi_rsrv > 0) {
641 641 cip->bi_rsrv--;
642 642 }
643 643 }
644 644
645 645 /*
646 646 * reset nsc_fd ownership back link, it is only set if
647 647 * we have really done an underlying reserve, not for
648 648 * failed (faked) reserves.
649 649 */
650 650
651 651 if (cip->bi_rsrv > 0 || cip->bi_orsrv > 0) {
652 652 nsc_set_owner(cip->bi_fd, krdc->iodev);
653 653 } else {
654 654 nsc_set_owner(cip->bi_fd, NULL);
655 655 }
656 656
657 657 /* release nsc_fd */
658 658
659 659 if (!IS_RSRV(cip)) {
660 660 nsc_release(cip->bi_fd);
661 661 }
662 662 } else if (IS_RSRV(rip)) {
663 663 /* decrement count */
664 664
665 665 if (raw) {
666 666 if (rip->bi_failed > 0) {
667 667 rip->bi_failed--;
668 668 } else if (rip->bi_rsrv > 0) {
669 669 rip->bi_rsrv--;
670 670 }
671 671 } else {
672 672 if (rip->bi_ofailed > 0) {
673 673 rip->bi_ofailed--;
674 674 } else if (rip->bi_orsrv > 0) {
675 675 rip->bi_orsrv--;
676 676 }
677 677 }
678 678
679 679 /*
680 680 * reset nsc_fd ownership back link, it is only set if
681 681 * we have really done an underlying reserve, not for
682 682 * failed (faked) reserves.
683 683 */
684 684
685 685 if (rip->bi_rsrv > 0 || rip->bi_orsrv > 0) {
686 686 nsc_set_owner(rip->bi_fd, krdc->iodev);
687 687 } else {
688 688 nsc_set_owner(rip->bi_fd, NULL);
689 689 }
690 690
691 691 /* release nsc_fd and any waiters */
692 692
693 693 if (!IS_RSRV(rip)) {
694 694 rip->bi_flag = 0;
695 695 nsc_release(rip->bi_fd);
696 696 cv_broadcast(&krdc->devices->id_rcv);
697 697 }
698 698 } else {
699 699 cmn_err(CE_WARN, "!rdc: _rdc_rlse_devs no reserve? krdc %p",
700 700 (void *) krdc);
701 701 }
702 702 }
703 703
704 704 /*
705 705 * _rdc_rlse_devs
706 706 * Release named underlying devices and take care of setting the
707 707 * back link on the nsc_fd to the correct parent iodev.
708 708 *
709 709 * NOTE: the 'devs' argument must be the same as that passed to
710 710 * the preceding _rdc_rsrv_devs call.
711 711 */
712 712
713 713 void
714 714 _rdc_rlse_devs(rdc_k_info_t *krdc, int devs)
715 715 {
716 716
717 717 DTRACE_PROBE(_rdc_rlse_devs_start);
718 718 mutex_enter(&krdc->devices->id_rlock);
719 719
720 720 ASSERT(!(devs & RDC_CACHE));
721 721
722 722 if ((devs & (~RDC_BMP)) != 0) {
723 723 _rdc_rlse_d(krdc, devs);
724 724 }
725 725
726 726 if ((devs & RDC_BMP) != 0) {
727 727 if (krdc->bmaprsrv > 0 && --krdc->bmaprsrv == 0) {
728 728 nsc_release(krdc->bitmapfd);
729 729 }
730 730 }
731 731
732 732 mutex_exit(&krdc->devices->id_rlock);
733 733
734 734 }
735 735
736 736 /*
737 737 * _rdc_rsrv_d
738 738 * Reserve device flagged, unless its companion is already reserved,
739 739 * in that case increase the reserve on the companion. Take care
740 740 * of setting the nsc_fd ownership back link to the correct parent
741 741 * iodev pointer.
742 742 */
743 743
744 744 static int
745 745 _rdc_rsrv_d(int raw, _rdc_info_dev_t *rid, _rdc_info_dev_t *cid, int flag,
746 746 rdc_k_info_t *krdc)
747 747 {
748 748 _rdc_info_dev_t *p = NULL;
749 749 rdc_u_info_t *urdc = &rdc_u_info[krdc->index];
750 750 int other = 0;
751 751 int rc;
752 752
753 753
754 754 #ifdef DEBUG
755 755 if ((rid->bi_rsrv < 0) ||
756 756 (cid->bi_rsrv < 0) ||
757 757 (rid->bi_orsrv < 0) ||
758 758 (cid->bi_orsrv < 0) ||
759 759 (rid->bi_failed < 0) ||
760 760 (cid->bi_failed < 0) ||
761 761 (rid->bi_ofailed < 0) ||
762 762 (cid->bi_ofailed < 0)) {
763 763 cmn_err(CE_WARN,
764 764 "!_rdc_rsrv_d: negative counts (rsrv %d %d orsrv %d %d)",
765 765 rid->bi_rsrv, cid->bi_rsrv,
766 766 rid->bi_orsrv, cid->bi_orsrv);
767 767 cmn_err(CE_WARN,
768 768 "!_rdc_rsrv_d: negative counts (fail %d %d ofail %d %d)",
769 769 rid->bi_failed, cid->bi_failed,
770 770 rid->bi_ofailed, cid->bi_ofailed);
771 771 cmn_err(CE_PANIC, "_rdc_rsrv_d: negative counts (krdc %p)",
772 772 (void *) krdc);
773 773 }
774 774 #endif
775 775
776 776 /*
777 777 * If user wants to do a cache reserve and it's already
778 778 * raw reserved internally, we need to do a real nsc_reserve, so wait
779 779 * until the release has been done.
780 780 */
781 781 if (IS_RSRV(rid) && (flag == RDC_EXTERNAL) &&
782 782 (raw == 0) && (rid->bi_flag != RDC_EXTERNAL)) {
783 783 krdc->devices->id_release++;
784 784 while (IS_RSRV(rid))
785 785 cv_wait(&krdc->devices->id_rcv,
786 786 &krdc->devices->id_rlock);
787 787 krdc->devices->id_release--;
788 788 }
789 789
790 790 /* select underlying device to use */
791 791
792 792 if (IS_RSRV(rid)) {
793 793 p = rid;
794 794 if (!raw) {
795 795 other = 1;
796 796 }
797 797 } else if (IS_RSRV(cid)) {
798 798 p = cid;
799 799 if (raw) {
800 800 other = 1;
801 801 }
802 802 }
803 803
804 804 /* just increment count and return if already reserved */
805 805
806 806 if (p && !RFAILED(p)) {
807 807 if (other) {
808 808 p->bi_orsrv++;
809 809 } else {
810 810 p->bi_rsrv++;
811 811 }
812 812
813 813 /* set nsc_fd ownership back link */
814 814 nsc_set_owner(p->bi_fd, krdc->iodev);
815 815 return (0);
816 816 }
817 817
818 818 /* attempt reserve */
819 819
820 820 if (!p) {
821 821 p = raw ? rid : cid;
822 822 }
823 823
824 824 if (!p->bi_fd) {
825 825 /* rpc server raced with rdc_dev_close() */
826 826 return (EIO);
827 827 }
828 828 if ((rc = nsc_reserve(p->bi_fd, 0)) == 0) {
829 829 /*
830 830 * convert failed counts into reserved counts, and add
831 831 * in this reserve.
832 832 */
833 833
834 834 p->bi_orsrv = p->bi_ofailed;
835 835 p->bi_rsrv = p->bi_failed;
836 836
837 837 if (other) {
838 838 p->bi_orsrv++;
839 839 } else {
840 840 p->bi_rsrv++;
841 841 }
842 842
843 843 p->bi_ofailed = 0;
844 844 p->bi_failed = 0;
845 845
846 846 /* set nsc_fd ownership back link */
847 847
848 848 nsc_set_owner(p->bi_fd, krdc->iodev);
849 849 } else if (rc != EINTR) {
850 850 /*
851 851 * If this is the master, and the secondary is not
852 852 * failed, then just fake this external reserve so that
853 853 * we can do remote io to the secondary and continue to
854 854 * provide service to the client.
855 855 *
856 856 * Subsequent calls to _rdc_rsrv_d() will re-try the
857 857 * nsc_reserve() until it succeeds.
858 858 */
859 859
860 860 if ((rdc_get_vflags(urdc) & RDC_PRIMARY) &&
861 861 !(rdc_get_vflags(urdc) & RDC_LOGGING) &&
862 862 !((rdc_get_vflags(urdc) & RDC_SLAVE) &&
863 863 (rdc_get_vflags(urdc) & RDC_SYNCING))) {
864 864 if (!(rdc_get_vflags(urdc) & RDC_VOL_FAILED)) {
865 865 rdc_many_enter(krdc);
866 866 /* Primary, so reverse sync needed */
867 867 rdc_set_mflags(urdc, RDC_RSYNC_NEEDED);
868 868 rdc_set_flags_log(urdc, RDC_VOL_FAILED,
869 869 "nsc_reserve failed");
870 870 rdc_many_exit(krdc);
871 871 rc = -1;
872 872 #ifdef DEBUG
873 873 cmn_err(CE_NOTE, "!nsc_reserve failed "
874 874 "with rc == %d\n", rc);
875 875 #endif
876 876 } else {
877 877 rc = 0;
878 878 }
879 879
880 880 if (other) {
881 881 p->bi_ofailed++;
882 882 } else {
883 883 p->bi_failed++;
884 884 }
885 885
886 886 if (krdc->maxfbas == 0) {
887 887 /*
888 888 * fake a maxfbas value for remote i/o,
889 889 * this will get reset when the next
890 890 * successful reserve happens as part
891 891 * of the rdc_attach_fd() callback.
892 892 */
893 893 krdc->maxfbas = 128;
894 894 }
895 895 }
896 896 }
897 897
898 898 if (rc == 0 && raw) {
899 899 p->bi_flag = flag;
900 900 }
901 901
902 902
903 903 return (rc);
904 904 }
905 905
906 906 /*
907 907 * _rdc_rsrv_devs
908 908 * Reserve named underlying devices.
909 909 *
910 910 */
911 911
912 912 int
913 913 _rdc_rsrv_devs(rdc_k_info_t *krdc, int devs, int flag)
914 914 {
915 915 rdc_u_info_t *urdc = &rdc_u_info[krdc->index];
916 916 int write = 0;
917 917 int rc = 0;
918 918 int got = 0;
919 919
920 920 if (!krdc) {
921 921 return (EINVAL);
922 922 }
923 923
924 924 ASSERT(!(devs & RDC_CACHE));
925 925
926 926 mutex_enter(&krdc->devices->id_rlock);
927 927
928 928 if ((devs & (~RDC_BMP)) != 0) {
929 929 if ((rc = _rdc_rsrv_d((devs & RDC_CACHE) == 0,
930 930 &krdc->devices->id_raw_dev, &krdc->devices->id_cache_dev,
931 931 flag, krdc)) != 0) {
932 932 if (rc == -1) {
933 933 /*
934 934 * we need to call rdc_write_state()
935 935 * after we drop the mutex
936 936 */
937 937 write = 1;
938 938 rc = 0;
939 939 } else {
940 940 cmn_err(CE_WARN,
941 941 "!rdc: nsc_reserve(%s) failed %d\n",
942 942 nsc_pathname(krdc->c_fd), rc);
943 943 }
944 944 } else {
945 945 got |= (devs & (~RDC_BMP));
946 946 }
947 947 }
948 948
949 949 if (rc == 0 && (devs & RDC_BMP) != 0) {
950 950 if (krdc->bitmapfd == NULL)
951 951 rc = EIO;
952 952 else if ((krdc->bmaprsrv == 0) &&
953 953 (rc = nsc_reserve(krdc->bitmapfd, 0)) != 0) {
954 954 cmn_err(CE_WARN, "!rdc: nsc_reserve(%s) failed %d\n",
955 955 nsc_pathname(krdc->bitmapfd), rc);
956 956 } else {
957 957 krdc->bmaprsrv++;
958 958 got |= RDC_BMP;
959 959 }
960 960 if (!RDC_SUCCESS(rc)) {
961 961 /* Undo any previous reserve */
962 962 if (got != 0)
963 963 _rdc_rlse_d(krdc, got);
964 964 }
965 965 }
966 966
967 967 mutex_exit(&krdc->devices->id_rlock);
968 968
969 969 if (write) {
970 970 rdc_write_state(urdc);
971 971 }
972 972
973 973 return (rc);
974 974 }
975 975
976 976
977 977 /*
978 978 * Read from the remote end, ensuring that if this is a many group in
979 979 * slave mode that we only remote read from the secondary with the
980 980 * valid data.
981 981 */
982 982 int
983 983 _rdc_remote_read(rdc_k_info_t *krdc, nsc_buf_t *h, nsc_off_t pos,
984 984 nsc_size_t len, int flag)
985 985 {
986 986 rdc_u_info_t *urdc = &rdc_u_info[krdc->index];
987 987 rdc_k_info_t *this = krdc; /* krdc that was requested */
988 988 int rc;
989 989
990 990 if (flag & NSC_RDAHEAD) {
991 991 /*
992 992 * no point in doing readahead remotely,
993 993 * just say we did it ok - the client is about to
994 994 * throw this buffer away as soon as we return.
995 995 */
996 996 return (NSC_DONE);
997 997 }
998 998
999 999 /*
1000 1000 * If this is a many group with a reverse sync in progress and
1001 1001 * this is not the slave krdc/urdc, then search for the slave
1002 1002 * so that we can do the remote io from the correct secondary.
1003 1003 */
1004 1004 if ((rdc_get_mflags(urdc) & RDC_SLAVE) &&
1005 1005 !(rdc_get_vflags(urdc) & RDC_SLAVE)) {
1006 1006 rdc_many_enter(krdc);
1007 1007 for (krdc = krdc->many_next; krdc != this;
1008 1008 krdc = krdc->many_next) {
1009 1009 urdc = &rdc_u_info[krdc->index];
1010 1010 if (!IS_ENABLED(urdc))
1011 1011 continue;
1012 1012 if (rdc_get_vflags(urdc) & RDC_SLAVE)
1013 1013 break;
1014 1014 }
1015 1015 rdc_many_exit(krdc);
1016 1016
1017 1017 this = krdc;
1018 1018 }
1019 1019
1020 1020 read1:
1021 1021 if (rdc_get_vflags(urdc) & RDC_LOGGING) {
1022 1022 /* cannot do remote io without the remote node! */
1023 1023 rc = ENETDOWN;
1024 1024 goto read2;
1025 1025 }
1026 1026
1027 1027
1028 1028 /* wait for the remote end to have the latest data */
1029 1029
1030 1030 if (IS_ASYNC(urdc)) {
1031 1031 while (krdc->group->ra_queue.blocks != 0) {
1032 1032 if (!krdc->group->rdc_writer)
1033 1033 (void) rdc_writer(krdc->index);
1034 1034
1035 1035 (void) rdc_drain_queue(krdc->index);
1036 1036 }
1037 1037 }
1038 1038
1039 1039 if (krdc->io_kstats) {
1040 1040 mutex_enter(krdc->io_kstats->ks_lock);
1041 1041 kstat_runq_enter(KSTAT_IO_PTR(krdc->io_kstats));
1042 1042 mutex_exit(krdc->io_kstats->ks_lock);
1043 1043 }
1044 1044
1045 1045 rc = rdc_net_read(krdc->index, krdc->remote_index, h, pos, len);
1046 1046
1047 1047 if (krdc->io_kstats) {
1048 1048 mutex_enter(krdc->io_kstats->ks_lock);
1049 1049 kstat_runq_exit(KSTAT_IO_PTR(krdc->io_kstats));
1050 1050 mutex_exit(krdc->io_kstats->ks_lock);
1051 1051 }
1052 1052
1053 1053 /* If read error keep trying every secondary until no more */
1054 1054 read2:
1055 1055 if (!RDC_SUCCESS(rc) && IS_MANY(krdc) &&
1056 1056 !(rdc_get_mflags(urdc) & RDC_SLAVE)) {
1057 1057 rdc_many_enter(krdc);
1058 1058 for (krdc = krdc->many_next; krdc != this;
1059 1059 krdc = krdc->many_next) {
1060 1060 urdc = &rdc_u_info[krdc->index];
1061 1061 if (!IS_ENABLED(urdc))
1062 1062 continue;
1063 1063 rdc_many_exit(krdc);
1064 1064 goto read1;
1065 1065 }
1066 1066 rdc_many_exit(krdc);
1067 1067 }
1068 1068
1069 1069 return (rc);
1070 1070 }
1071 1071
1072 1072
1073 1073 /*
1074 1074 * _rdc_alloc_buf
1075 1075 * Allocate a buffer of data
1076 1076 *
1077 1077 * Calling/Exit State:
1078 1078 * Returns NSC_DONE or NSC_HIT for success, NSC_PENDING for async
1079 1079 * I/O, > 0 is an error code.
1080 1080 *
1081 1081 * Description:
1082 1082 */
1083 1083 int rdcbufs = 0;
1084 1084
1085 1085 static int
1086 1086 _rdc_alloc_buf(rdc_fd_t *rfd, nsc_off_t pos, nsc_size_t len, int flag,
1087 1087 rdc_buf_t **ptr)
1088 1088 {
1089 1089 rdc_k_info_t *krdc = rfd->rdc_info;
1090 1090 rdc_u_info_t *urdc = &rdc_u_info[krdc->index];
1091 1091 nsc_vec_t *vec = NULL;
1092 1092 rdc_buf_t *h;
1093 1093 size_t size;
1094 1094 int ioflag;
1095 1095 int rc = 0;
1096 1096
1097 1097 if (RDC_IS_BMP(rfd) || RDC_IS_QUE(rfd))
1098 1098 return (EIO);
1099 1099
1100 1100 if (len == 0)
1101 1101 return (EINVAL);
1102 1102
1103 1103 if (flag & NSC_WRBUF) {
1104 1104
1105 1105 if (!(rdc_get_vflags(urdc) & RDC_PRIMARY) &&
1106 1106 !(rdc_get_vflags(urdc) & RDC_LOGGING)) {
1107 1107 /*
1108 1108 * Forbid writes to secondary unless logging.
1109 1109 */
1110 1110 return (EIO);
1111 1111 }
1112 1112 }
1113 1113
1114 1114 if (!(rdc_get_vflags(urdc) & RDC_PRIMARY) &&
1115 1115 (rdc_get_vflags(urdc) & RDC_SYNC_NEEDED)) {
1116 1116 /*
1117 1117 * Forbid any io to secondary if it needs a sync.
1118 1118 */
1119 1119 return (EIO);
1120 1120 }
1121 1121
1122 1122 if ((rdc_get_vflags(urdc) & RDC_PRIMARY) &&
1123 1123 (rdc_get_vflags(urdc) & RDC_RSYNC_NEEDED) &&
1124 1124 !(rdc_get_vflags(urdc) & RDC_VOL_FAILED) &&
1125 1125 !(rdc_get_vflags(urdc) & RDC_SLAVE)) {
1126 1126 /*
1127 1127 * Forbid any io to primary if it needs a reverse sync
1128 1128 * and is not actively syncing.
1129 1129 */
1130 1130 return (EIO);
1131 1131 }
1132 1132
1133 1133 /* Bounds checking */
1134 1134 ASSERT(urdc->volume_size != 0);
1135 1135 if (pos + len > urdc->volume_size) {
1136 1136 #ifdef DEBUG
1137 1137 cmn_err(CE_NOTE,
1138 1138 "!rdc: Attempt to access beyond end of rdc volume");
1139 1139 #endif
1140 1140 return (EIO);
1141 1141 }
1142 1142
1143 1143 h = *ptr;
1144 1144 if (h == NULL) {
1145 1145 /* should never happen (nsctl does this for us) */
1146 1146 #ifdef DEBUG
1147 1147 cmn_err(CE_WARN, "!_rdc_alloc_buf entered without buffer!");
1148 1148 #endif
1149 1149 h = (rdc_buf_t *)_rdc_alloc_handle(NULL, NULL, NULL, rfd);
1150 1150 if (h == NULL)
1151 1151 return (ENOMEM);
1152 1152
1153 1153 h->rdc_bufh.sb_flag &= ~NSC_HALLOCATED;
1154 1154 *ptr = h;
1155 1155 }
1156 1156
1157 1157 if (flag & NSC_NOBLOCK) {
1158 1158 cmn_err(CE_WARN,
1159 1159 "!_rdc_alloc_buf: removing unsupported NSC_NOBLOCK flag");
1160 1160 flag &= ~(NSC_NOBLOCK);
1161 1161 }
1162 1162
1163 1163 h->rdc_bufh.sb_error = 0;
1164 1164 h->rdc_bufh.sb_flag |= flag;
1165 1165 h->rdc_bufh.sb_pos = pos;
1166 1166 h->rdc_bufh.sb_len = len;
1167 1167 ioflag = flag;
1168 1168
1169 1169 bzero(&h->rdc_sync, sizeof (h->rdc_sync));
1170 1170 mutex_init(&h->rdc_sync.lock, NULL, MUTEX_DRIVER, NULL);
1171 1171 cv_init(&h->rdc_sync.cv, NULL, CV_DRIVER, NULL);
1172 1172
1173 1173 if (flag & NSC_WRBUF)
1174 1174 _rdc_async_throttle(krdc, len); /* throttle incoming io */
1175 1175
1176 1176 /*
1177 1177 * Use remote io when:
1178 1178 * - local volume is failed
1179 1179 * - reserve status is failed
1180 1180 */
1181 1181 if ((rdc_get_vflags(urdc) & RDC_VOL_FAILED) || IS_RFAILED(krdc)) {
1182 1182 rc = EIO;
1183 1183 } else {
1184 1184 rc = nsc_alloc_buf(RDC_U_FD(krdc), pos, len,
1185 1185 ioflag, &h->rdc_bufp);
1186 1186 if (!RDC_SUCCESS(rc)) {
1187 1187 rdc_many_enter(krdc);
1188 1188 if (rdc_get_vflags(urdc) & RDC_PRIMARY) {
1189 1189 /* Primary, so reverse sync needed */
1190 1190 rdc_set_mflags(urdc, RDC_RSYNC_NEEDED);
1191 1191 } else {
1192 1192 /* Secondary, so forward sync needed */
1193 1193 rdc_set_flags(urdc, RDC_SYNC_NEEDED);
1194 1194 }
1195 1195 rdc_set_flags_log(urdc, RDC_VOL_FAILED,
1196 1196 "nsc_alloc_buf failed");
1197 1197 rdc_many_exit(krdc);
1198 1198 rdc_write_state(urdc);
1199 1199 }
1200 1200 }
1201 1201
1202 1202 if (RDC_SUCCESS(rc)) {
1203 1203 h->rdc_bufh.sb_vec = h->rdc_bufp->sb_vec;
1204 1204 h->rdc_flags |= RDC_ALLOC;
1205 1205
1206 1206 /*
1207 1207 * If in slave and reading data, remote read on top of
1208 1208 * the buffer to ensure that we have the latest data.
1209 1209 */
1210 1210 if ((flag & NSC_READ) &&
1211 1211 (rdc_get_vflags(urdc) & RDC_PRIMARY) &&
1212 1212 (rdc_get_mflags(urdc) & RDC_SLAVE)) {
1213 1213 rc = _rdc_remote_read(krdc, &h->rdc_bufh,
1214 1214 pos, len, flag);
1215 1215 /*
1216 1216 * Set NSC_MIXED so that the
1217 1217 * cache will throw away this buffer when we free
1218 1218 * it since we have combined data from multiple
1219 1219 * sources into a single buffer.
1220 1220 */
1221 1221 h->rdc_bufp->sb_flag |= NSC_MIXED;
1222 1222 }
1223 1223 }
1224 1224
1225 1225 /*
1226 1226 * If nsc_alloc_buf above fails, or local volume is failed or
1227 1227 * bitmap is failed or reserve, then we fill the buf from remote
1228 1228 */
1229 1229
1230 1230 if ((!RDC_SUCCESS(rc)) && (rdc_get_vflags(urdc) & RDC_PRIMARY) &&
1231 1231 !(rdc_get_vflags(urdc) & RDC_LOGGING)) {
1232 1232 if (flag & NSC_NODATA) {
1233 1233 ASSERT(!(flag & NSC_READ));
1234 1234 h->rdc_flags |= RDC_REMOTE_BUF;
1235 1235 h->rdc_bufh.sb_vec = NULL;
1236 1236 } else {
1237 1237 size = sizeof (nsc_vec_t) * 2;
1238 1238 h->rdc_vsize = size + FBA_SIZE(len);
1239 1239 vec = kmem_zalloc(h->rdc_vsize, KM_SLEEP);
1240 1240
1241 1241 if (!vec) {
1242 1242 rc = ENOMEM;
1243 1243 goto error;
1244 1244 }
1245 1245
1246 1246 /* single flat buffer */
1247 1247
1248 1248 vec[0].sv_addr = (uchar_t *)vec + size;
1249 1249 vec[0].sv_len = FBA_SIZE(len);
1250 1250 vec[0].sv_vme = 0;
1251 1251
1252 1252 /* null terminator */
1253 1253
1254 1254 vec[1].sv_addr = NULL;
1255 1255 vec[1].sv_len = 0;
1256 1256 vec[1].sv_vme = 0;
1257 1257
1258 1258 h->rdc_bufh.sb_vec = vec;
1259 1259 h->rdc_flags |= RDC_REMOTE_BUF;
1260 1260 h->rdc_flags |= RDC_VEC_ALLOC;
1261 1261 }
1262 1262
1263 1263 if (flag & NSC_READ) {
1264 1264 rc = _rdc_remote_read(krdc, &h->rdc_bufh,
1265 1265 pos, len, flag);
1266 1266 } else {
1267 1267 rc = NSC_DONE;
1268 1268 }
1269 1269 }
1270 1270 error:
1271 1271 if (!RDC_SUCCESS(rc)) {
1272 1272 h->rdc_bufh.sb_error = rc;
1273 1273 }
1274 1274
1275 1275 return (rc);
1276 1276 }
1277 1277
1278 1278
1279 1279 /*
1280 1280 * _rdc_free_buf
1281 1281 */
1282 1282
1283 1283 static int
1284 1284 _rdc_free_buf(rdc_buf_t *h)
1285 1285 {
1286 1286 int rc = 0;
1287 1287
1288 1288 if (h->rdc_flags & RDC_ALLOC) {
1289 1289 if (h->rdc_bufp) {
1290 1290 rc = nsc_free_buf(h->rdc_bufp);
1291 1291 }
1292 1292 h->rdc_flags &= ~(RDC_ALLOC);
1293 1293
1294 1294 if (!RDC_SUCCESS(rc)) {
1295 1295 #ifdef DEBUG
1296 1296 cmn_err(CE_WARN,
1297 1297 "!_rdc_free_buf(%p): nsc_free_buf(%p) returned %d",
1298 1298 (void *) h, (void *) h->rdc_bufp, rc);
1299 1299 #endif
1300 1300 return (rc);
1301 1301 }
1302 1302 }
1303 1303
1304 1304 if (h->rdc_flags & (RDC_REMOTE_BUF|RDC_VEC_ALLOC)) {
1305 1305 if (h->rdc_flags & RDC_VEC_ALLOC) {
1306 1306 kmem_free(h->rdc_bufh.sb_vec, h->rdc_vsize);
1307 1307 }
1308 1308 h->rdc_flags &= ~(RDC_REMOTE_BUF|RDC_VEC_ALLOC);
1309 1309 }
1310 1310
1311 1311 if (h->rdc_anon) {
1312 1312 /* anon buffers still pending */
1313 1313 DTRACE_PROBE1(rdc_free_buf_err, aio_buf_t, h->rdc_anon);
1314 1314 }
1315 1315
1316 1316 if ((h->rdc_bufh.sb_flag & NSC_HALLOCATED) == 0) {
1317 1317 rc = _rdc_free_handle(h, h->rdc_fd);
1318 1318 if (!RDC_SUCCESS(rc)) {
1319 1319 #ifdef DEBUG
1320 1320 cmn_err(CE_WARN,
1321 1321 "!_rdc_free_buf(%p): _rdc_free_handle returned %d",
1322 1322 (void *) h, rc);
1323 1323 #endif
1324 1324 return (rc);
1325 1325 }
1326 1326 } else {
1327 1327 h->rdc_bufh.sb_flag = NSC_HALLOCATED;
1328 1328 h->rdc_bufh.sb_vec = NULL;
1329 1329 h->rdc_bufh.sb_error = 0;
1330 1330 h->rdc_bufh.sb_pos = 0;
1331 1331 h->rdc_bufh.sb_len = 0;
1332 1332 h->rdc_anon = NULL;
1333 1333 h->rdc_vsize = 0;
1334 1334
1335 1335 cv_destroy(&h->rdc_sync.cv);
1336 1336 mutex_destroy(&h->rdc_sync.lock);
1337 1337
1338 1338 }
1339 1339
1340 1340 return (0);
1341 1341 }
1342 1342
1343 1343
1344 1344 /*
1345 1345 * _rdc_open
1346 1346 * Open a device
1347 1347 *
1348 1348 * Calling/Exit State:
1349 1349 * Returns a token to identify the device.
1350 1350 *
1351 1351 * Description:
1352 1352 * Performs the housekeeping operations associated with an upper layer
1353 1353 * of the nsctl stack opening a device.
1354 1354 */
1355 1355
1356 1356 /* ARGSUSED */
1357 1357
1358 1358 static int
1359 1359 _rdc_open(char *path, int flag, blind_t *cdp, nsc_iodev_t *iodev)
1360 1360 {
1361 1361 rdc_k_info_t *krdc;
1362 1362 #ifdef DEBUG
1363 1363 rdc_u_info_t *urdc;
1364 1364 #endif
1365 1365 rdc_fd_t *rfd;
1366 1366 int raw = ((flag & NSC_CACHE) == 0);
1367 1367 int index;
1368 1368 int bmp = 0;
1369 1369 int queue = 0;
1370 1370
1371 1371 rfd = kmem_zalloc(sizeof (*rfd), KM_SLEEP);
1372 1372 if (!rfd)
1373 1373 return (ENOMEM);
1374 1374
1375 1375 /*
1376 1376 * Take config lock to prevent a race with the
1377 1377 * (de)configuration code.
1378 1378 */
1379 1379
1380 1380 mutex_enter(&rdc_conf_lock);
1381 1381
1382 1382 index = rdc_lookup_enabled(path, 0);
1383 1383 if (index < 0) {
1384 1384 index = rdc_lookup_bitmap(path);
1385 1385 if (index >= 0)
1386 1386 bmp = 1;
1387 1387 }
1388 1388 if (index < 0) {
1389 1389 index = rdc_lookup_diskq(path);
1390 1390 if (index >= 0)
1391 1391 queue = 1;
1392 1392 }
1393 1393 if (index < 0) {
1394 1394 /* not found in config */
1395 1395 mutex_exit(&rdc_conf_lock);
1396 1396 kmem_free(rfd, sizeof (*rfd));
1397 1397 return (ENXIO);
1398 1398 }
1399 1399 #ifdef DEBUG
1400 1400 urdc = &rdc_u_info[index];
1401 1401 #endif
1402 1402 krdc = &rdc_k_info[index];
1403 1403
1404 1404 mutex_exit(&rdc_conf_lock);
1405 1405
1406 1406 rdc_group_enter(krdc);
1407 1407
1408 1408 ASSERT(IS_ENABLED(urdc));
1409 1409
1410 1410 if (bmp) {
1411 1411 krdc->b_ref++;
1412 1412 } else if (raw) {
1413 1413 krdc->r_ref++;
1414 1414 } else if (!queue) {
1415 1415 krdc->c_ref++;
1416 1416 }
1417 1417
1418 1418 rfd->rdc_info = krdc;
1419 1419 if (bmp)
1420 1420 rfd->rdc_type = RDC_BMP;
1421 1421 else if (queue)
1422 1422 rfd->rdc_type = RDC_QUE;
1423 1423 else
1424 1424 rfd->rdc_oflags = flag;
1425 1425
1426 1426 rdc_group_exit(krdc);
1427 1427
1428 1428 *cdp = (blind_t)rfd;
1429 1429
1430 1430 return (0);
1431 1431 }
1432 1432
1433 1433 static int
1434 1434 _rdc_openc(char *path, int flag, blind_t *cdp, nsc_iodev_t *iodev)
1435 1435 {
1436 1436 return (_rdc_open(path, NSC_CACHE|flag, cdp, iodev));
1437 1437 }
1438 1438
1439 1439 static int
1440 1440 _rdc_openr(char *path, int flag, blind_t *cdp, nsc_iodev_t *iodev)
1441 1441 {
1442 1442 return (_rdc_open(path, NSC_DEVICE|flag, cdp, iodev));
1443 1443 }
1444 1444
1445 1445
1446 1446 /*
1447 1447 * _rdc_close
1448 1448 * Close a device
1449 1449 *
1450 1450 * Calling/Exit State:
1451 1451 * Always succeeds - returns 0
1452 1452 *
1453 1453 * Description:
1454 1454 * Performs the housekeeping operations associated with an upper layer
1455 1455 * of the sd stack closing a shadowed device.
1456 1456 */
1457 1457
1458 1458 static int
1459 1459 _rdc_close(rfd)
1460 1460 rdc_fd_t *rfd;
1461 1461 {
1462 1462 rdc_k_info_t *krdc = rfd->rdc_info;
1463 1463 int bmp = RDC_IS_BMP(rfd);
1464 1464 int raw = RDC_IS_RAW(rfd);
1465 1465 int queue = RDC_IS_QUE(rfd);
1466 1466
1467 1467 /*
1468 1468 * we don't keep ref counts for the queue, so skip this stuff.
1469 1469 * we may not even have a valid krdc at this point
1470 1470 */
1471 1471 if (queue)
1472 1472 goto queue;
1473 1473 rdc_group_enter(krdc);
1474 1474
1475 1475 if (bmp) {
1476 1476 krdc->b_ref--;
1477 1477 } else if (raw && !queue) {
1478 1478 krdc->r_ref--;
1479 1479 } else if (!queue) {
1480 1480 krdc->c_ref--;
1481 1481 }
1482 1482
1483 1483 if (krdc->closing) {
1484 1484 cv_broadcast(&krdc->closingcv);
1485 1485 }
1486 1486
1487 1487 rdc_group_exit(krdc);
1488 1488 queue:
1489 1489 kmem_free(rfd, sizeof (*rfd));
1490 1490 return (0);
1491 1491 }
1492 1492
1493 1493 /*
1494 1494 * _rdc_alloc_handle
1495 1495 * Allocate a handle
1496 1496 *
1497 1497 */
1498 1498
1499 1499 static nsc_buf_t *
1500 1500 _rdc_alloc_handle(void (*d_cb)(), void (*r_cb)(), void (*w_cb)(), rdc_fd_t *rfd)
1501 1501 {
1502 1502 rdc_buf_t *h;
1503 1503
1504 1504 h = kmem_zalloc(sizeof (*h), KM_SLEEP);
1505 1505 if (!h)
1506 1506 return (NULL);
1507 1507
1508 1508 h->rdc_bufp = nsc_alloc_handle(RDC_FD(rfd), d_cb, r_cb, w_cb);
1509 1509 if (!h->rdc_bufp) {
1510 1510 if (!IS_RFAILED(rfd->rdc_info)) {
1511 1511 /*
1512 1512 * This is a real failure from the io provider below.
1513 1513 */
1514 1514 kmem_free(h, sizeof (*h));
1515 1515 return (NULL);
1516 1516 } else {
1517 1517 /* EMPTY */
1518 1518 /*
1519 1519 * This is just a failed primary device where
1520 1520 * we can do remote io to the secondary.
1521 1521 */
1522 1522 }
1523 1523 }
1524 1524
1525 1525 h->rdc_bufh.sb_flag = NSC_HALLOCATED;
1526 1526 h->rdc_fd = rfd;
1527 1527 mutex_init(&h->aio_lock, NULL, MUTEX_DRIVER, NULL);
1528 1528
1529 1529 return (&h->rdc_bufh);
1530 1530 }
1531 1531
1532 1532
1533 1533 /*
1534 1534 * _rdc_free_handle
1535 1535 * Free a handle
1536 1536 *
1537 1537 */
1538 1538
1539 1539 /* ARGSUSED */
1540 1540 static int
1541 1541 _rdc_free_handle(rdc_buf_t *h, rdc_fd_t *rfd)
1542 1542 {
1543 1543 int rc;
1544 1544
1545 1545 mutex_destroy(&h->aio_lock);
1546 1546 if (h->rdc_bufp) {
1547 1547 rc = nsc_free_handle(h->rdc_bufp);
1548 1548 if (!RDC_SUCCESS(rc))
1549 1549 return (rc);
1550 1550 }
1551 1551 kmem_free(h, sizeof (rdc_buf_t));
1552 1552 return (0);
1553 1553 }
1554 1554
1555 1555
1556 1556 /*
1557 1557 * _rdc_attach
1558 1558 * Attach
1559 1559 *
1560 1560 * Calling/Exit State:
1561 1561 * Returns 0 for success, errno on failure.
1562 1562 *
1563 1563 * Description:
1564 1564 */
1565 1565
1566 1566 static int
1567 1567 _rdc_attach(rdc_fd_t *rfd, nsc_iodev_t *iodev)
1568 1568 {
1569 1569 rdc_k_info_t *krdc;
1570 1570 int raw = RDC_IS_RAW(rfd);
1571 1571 int rc;
1572 1572
1573 1573 if ((RDC_IS_BMP(rfd)) || RDC_IS_QUE(rfd))
1574 1574 return (EINVAL);
1575 1575
1576 1576 krdc = rfd->rdc_info;
1577 1577 if (krdc == NULL)
1578 1578 return (EINVAL);
1579 1579
1580 1580 mutex_enter(&krdc->devices->id_rlock);
1581 1581 krdc->iodev = iodev;
1582 1582 mutex_exit(&krdc->devices->id_rlock);
1583 1583
1584 1584 rc = _rdc_rsrv_devs(krdc, (raw ? RDC_RAW : RDC_CACHE), RDC_EXTERNAL);
1585 1585 return (rc);
1586 1586 }
1587 1587
1588 1588
1589 1589 /*
1590 1590 * _rdc_detach
1591 1591 * Detach
1592 1592 *
1593 1593 * Calling/Exit State:
1594 1594 * Returns 0 for success, always succeeds
1595 1595 *
1596 1596 * Description:
1597 1597 */
1598 1598
1599 1599 static int
1600 1600 _rdc_detach(rdc_fd_t *rfd, nsc_iodev_t *iodev)
1601 1601 {
1602 1602 rdc_k_info_t *krdc = rfd->rdc_info;
1603 1603 int raw = RDC_IS_RAW(rfd);
1604 1604
1605 1605 /*
1606 1606 * Flush the async queue if necessary.
1607 1607 */
1608 1608
1609 1609 if (IS_ASYNC(&rdc_u_info[krdc->index]) && !RDC_IS_DISKQ(krdc->group)) {
1610 1610 int tries = 1;
1611 1611
1612 1612 while (krdc->group->ra_queue.blocks != 0 && tries--) {
1613 1613 if (!krdc->group->rdc_writer)
1614 1614 (void) rdc_writer(krdc->index);
1615 1615
1616 1616 (void) rdc_drain_queue(krdc->index);
1617 1617 }
1618 1618
1619 1619 /* force disgard of possibly blocked flusher threads */
1620 1620 if (rdc_drain_queue(krdc->index) != 0) {
1621 1621 #ifdef DEBUG
1622 1622 net_queue *qp = &krdc->group->ra_queue;
1623 1623 #endif
1624 1624 do {
1625 1625 mutex_enter(&krdc->group->ra_queue.net_qlock);
1626 1626 krdc->group->asyncdis = 1;
1627 1627 cv_broadcast(&krdc->group->asyncqcv);
1628 1628 mutex_exit(&krdc->group->ra_queue.net_qlock);
1629 1629 cmn_err(CE_WARN,
1630 1630 "!RDC: async I/O pending and not drained "
1631 1631 "for %s during detach",
1632 1632 rdc_u_info[krdc->index].primary.file);
1633 1633 #ifdef DEBUG
1634 1634 cmn_err(CE_WARN,
1635 1635 "!nitems: %" NSC_SZFMT " nblocks: %"
1636 1636 NSC_SZFMT " head: 0x%p tail: 0x%p",
1637 1637 qp->nitems, qp->blocks,
1638 1638 (void *)qp->net_qhead,
1639 1639 (void *)qp->net_qtail);
1640 1640 #endif
1641 1641 } while (krdc->group->rdc_thrnum > 0);
1642 1642 }
1643 1643 }
1644 1644
1645 1645 mutex_enter(&krdc->devices->id_rlock);
1646 1646 if (krdc->iodev != iodev)
1647 1647 cmn_err(CE_WARN, "!_rdc_detach: iodev mismatch %p : %p",
1648 1648 (void *) krdc->iodev, (void *) iodev);
1649 1649
1650 1650 krdc->iodev = NULL;
1651 1651 mutex_exit(&krdc->devices->id_rlock);
1652 1652
1653 1653 _rdc_rlse_devs(krdc, (raw ? RDC_RAW : RDC_CACHE));
1654 1654
1655 1655 return (0);
1656 1656 }
1657 1657
1658 1658 /*
1659 1659 * _rdc_get_pinned
1660 1660 *
1661 1661 * only affects local node.
1662 1662 */
1663 1663
1664 1664 static int
1665 1665 _rdc_get_pinned(rdc_fd_t *rfd)
1666 1666 {
1667 1667 return (nsc_get_pinned(RDC_FD(rfd)));
1668 1668 }
1669 1669
1670 1670 /*
1671 1671 * _rdc_discard_pinned
1672 1672 *
1673 1673 * only affects local node.
1674 1674 */
1675 1675
1676 1676 static int
1677 1677 _rdc_discard_pinned(rdc_fd_t *rfd, nsc_off_t pos, nsc_size_t len)
1678 1678 {
1679 1679 return (nsc_discard_pinned(RDC_FD(rfd), pos, len));
1680 1680 }
1681 1681
1682 1682 /*
1683 1683 * _rdc_partsize
1684 1684 *
1685 1685 * only affects the local node.
1686 1686 */
1687 1687
1688 1688 static int
1689 1689 _rdc_partsize(rdc_fd_t *rfd, nsc_size_t *ptr)
1690 1690 {
1691 1691 rdc_u_info_t *urdc;
1692 1692
1693 1693 urdc = &rdc_u_info[rfd->rdc_info->index];
1694 1694 /* Always return saved size */
1695 1695 ASSERT(urdc->volume_size != 0);
1696 1696 *ptr = urdc->volume_size;
1697 1697 return (0);
1698 1698 }
1699 1699
1700 1700 /*
1701 1701 * _rdc_maxfbas
1702 1702 *
1703 1703 * only affects local node
1704 1704 */
1705 1705
1706 1706 /* ARGSUSED */
1707 1707 static int
1708 1708 _rdc_maxfbas(rdc_fd_t *rfd, int flag, nsc_size_t *ptr)
1709 1709 {
1710 1710 rdc_k_info_t *krdc = rfd->rdc_info;
1711 1711 int raw = RDC_IS_RAW(rfd);
1712 1712 int rtype = raw ? RDC_RAW : RDC_CACHE;
1713 1713 int rc = 0;
1714 1714
1715 1715 if (krdc == NULL)
1716 1716 return (EINVAL);
1717 1717 if (flag == NSC_RDAHEAD || flag == NSC_CACHEBLK) {
1718 1718 rc = _rdc_rsrv_devs(krdc, rtype, RDC_INTERNAL);
1719 1719 if (rc == 0) {
1720 1720 rc = nsc_maxfbas(RDC_U_FD(krdc), flag, ptr);
1721 1721 _rdc_rlse_devs(krdc, rtype);
1722 1722 }
1723 1723 } else {
1724 1724 /* Always return saved size */
1725 1725 ASSERT(krdc->maxfbas != 0);
1726 1726 *ptr = krdc->maxfbas - 1;
1727 1727 }
1728 1728
1729 1729 return (rc);
1730 1730 }
1731 1731
1732 1732 /* ARGSUSED */
1733 1733 static int
1734 1734 _rdc_control(rdc_fd_t *rfd, int cmd, void *ptr, int len)
1735 1735 {
1736 1736 return (nsc_control(RDC_FD(rfd), cmd, ptr, len));
1737 1737 }
1738 1738
1739 1739 /*
1740 1740 * _rdc_attach_fd
1741 1741 *
1742 1742 * called by nsctl as part of nsc_reserve() processing when one of
1743 1743 * SNDR's underlying file descriptors becomes available and metadata
1744 1744 * should be re-acquired.
1745 1745 */
1746 1746 static int
1747 1747 _rdc_attach_fd(blind_t arg)
1748 1748 {
1749 1749 _rdc_info_dev_t *dip = (_rdc_info_dev_t *)arg;
1750 1750 rdc_k_info_t *krdc;
1751 1751 rdc_u_info_t *urdc;
1752 1752 nsc_size_t maxfbas, partsize;
1753 1753 int rc;
1754 1754
1755 1755 krdc = dip->bi_krdc;
1756 1756 urdc = &rdc_u_info[krdc->index];
1757 1757
1758 1758 if ((rc = nsc_partsize(dip->bi_fd, &partsize)) != 0) {
1759 1759 cmn_err(CE_WARN,
1760 1760 "!SNDR: cannot get volume size of %s, error %d",
1761 1761 nsc_pathname(dip->bi_fd), rc);
1762 1762 } else if (urdc->volume_size == 0 && partsize > 0) {
1763 1763 /* set volume size for the first time */
1764 1764 urdc->volume_size = partsize;
1765 1765 } else if (urdc->volume_size != partsize) {
1766 1766 /*
1767 1767 * SNDR cannot yet cope with a volume being resized,
1768 1768 * so fail it.
1769 1769 */
1770 1770 if (!(rdc_get_vflags(urdc) & RDC_VOL_FAILED)) {
1771 1771 rdc_many_enter(krdc);
1772 1772 if (rdc_get_vflags(urdc) & RDC_PRIMARY)
1773 1773 rdc_set_mflags(urdc, RDC_RSYNC_NEEDED);
1774 1774 else
1775 1775 rdc_set_mflags(urdc, RDC_SYNC_NEEDED);
1776 1776 rdc_set_flags_log(urdc, RDC_VOL_FAILED,
1777 1777 "volume resized");
1778 1778 rdc_many_exit(krdc);
1779 1779 rdc_write_state(urdc);
1780 1780 }
1781 1781
1782 1782 cmn_err(CE_WARN,
1783 1783 "!SNDR: %s changed size from %" NSC_SZFMT " to %" NSC_SZFMT,
1784 1784 nsc_pathname(dip->bi_fd), urdc->volume_size, partsize);
1785 1785 }
1786 1786
1787 1787 if ((rc = nsc_maxfbas(dip->bi_fd, 0, &maxfbas)) != 0) {
1788 1788 cmn_err(CE_WARN,
1789 1789 "!SNDR: cannot get max transfer size for %s, error %d",
1790 1790 nsc_pathname(dip->bi_fd), rc);
1791 1791 } else if (maxfbas > 0) {
1792 1792 krdc->maxfbas = min(RDC_MAX_MAXFBAS, maxfbas);
1793 1793 }
1794 1794
1795 1795 return (0);
1796 1796 }
1797 1797
1798 1798
1799 1799 /*
1800 1800 * _rdc_pinned
1801 1801 *
1802 1802 * only affects local node
1803 1803 */
1804 1804
1805 1805 static void
1806 1806 _rdc_pinned(_rdc_info_dev_t *dip, nsc_off_t pos, nsc_size_t len)
1807 1807 {
1808 1808 nsc_pinned_data(dip->bi_krdc->iodev, pos, len);
1809 1809 }
1810 1810
1811 1811
1812 1812 /*
1813 1813 * _rdc_unpinned
1814 1814 *
1815 1815 * only affects local node.
1816 1816 */
1817 1817
1818 1818 static void
1819 1819 _rdc_unpinned(_rdc_info_dev_t *dip, nsc_off_t pos, nsc_size_t len)
1820 1820 {
1821 1821 nsc_unpinned_data(dip->bi_krdc->iodev, pos, len);
1822 1822 }
1823 1823
1824 1824
1825 1825 /*
1826 1826 * _rdc_read
1827 1827 *
1828 1828 * read the specified data into the buffer - go remote if local down,
1829 1829 * or the remote end has more recent data because an reverse sync is
1830 1830 * in progress.
1831 1831 */
1832 1832
1833 1833 static int
1834 1834 _rdc_read(rdc_buf_t *h, nsc_off_t pos, nsc_size_t len, int flag)
1835 1835 {
1836 1836 rdc_k_info_t *krdc = h->rdc_fd->rdc_info;
1837 1837 rdc_u_info_t *urdc = &rdc_u_info[krdc->index];
1838 1838 int remote = (RDC_REMOTE(h) || (rdc_get_mflags(urdc) & RDC_SLAVE));
1839 1839 int rc1, rc2;
1840 1840
1841 1841 rc1 = rc2 = 0;
1842 1842
1843 1843 if (!RDC_HANDLE_LIMITS(&h->rdc_bufh, pos, len)) {
1844 1844 cmn_err(CE_WARN,
1845 1845 "!_rdc_read: bounds check: io(handle) pos %" NSC_XSZFMT
1846 1846 "(%" NSC_XSZFMT ") len %" NSC_XSZFMT "(%" NSC_XSZFMT ")",
1847 1847 pos, h->rdc_bufh.sb_pos, len, h->rdc_bufh.sb_len);
1848 1848 h->rdc_bufh.sb_error = EINVAL;
1849 1849 return (h->rdc_bufh.sb_error);
1850 1850 }
1851 1851
1852 1852 if (flag & NSC_NOBLOCK) {
1853 1853 cmn_err(CE_WARN,
1854 1854 "!_rdc_read: removing unsupported NSC_NOBLOCK flag");
1855 1855 flag &= ~(NSC_NOBLOCK);
1856 1856 }
1857 1857
1858 1858
1859 1859 if (!remote) {
1860 1860 rc1 = nsc_read(h->rdc_bufp, pos, len, flag);
1861 1861 }
1862 1862
1863 1863 if (remote || !RDC_SUCCESS(rc1)) {
1864 1864 rc2 = _rdc_remote_read(krdc, &h->rdc_bufh, pos, len, flag);
1865 1865 }
1866 1866
1867 1867 if (remote && !RDC_SUCCESS(rc2))
1868 1868 h->rdc_bufh.sb_error = rc2;
1869 1869 else if (!RDC_SUCCESS(rc1) && !RDC_SUCCESS(rc2))
1870 1870 h->rdc_bufh.sb_error = rc1;
1871 1871
1872 1872 return (h->rdc_bufh.sb_error);
1873 1873 }
1874 1874
1875 1875
1876 1876 static int
1877 1877 _rdc_remote_write(rdc_k_info_t *krdc, rdc_buf_t *h, nsc_buf_t *nsc_h,
1878 1878 nsc_off_t pos, nsc_size_t len, int flag, uint_t bitmask)
1879 1879 {
1880 1880 rdc_u_info_t *urdc = &rdc_u_info[krdc->index];
1881 1881 int rc = 0;
1882 1882 nsc_size_t plen, syncblockpos;
1883 1883 aio_buf_t *anon = NULL;
1884 1884
1885 1885 if (!(rdc_get_vflags(urdc) & RDC_PRIMARY))
1886 1886 return (EINVAL);
1887 1887
1888 1888 if ((rdc_get_vflags(urdc) & RDC_LOGGING) &&
1889 1889 (!IS_STATE(urdc, RDC_QUEUING))) {
1890 1890 goto done;
1891 1891 }
1892 1892
1893 1893 /*
1894 1894 * this check for RDC_SYNCING may seem redundant, but there is a window
1895 1895 * in rdc_sync, where an async set has not yet been transformed into a
1896 1896 * sync set.
1897 1897 */
1898 1898 if ((!IS_ASYNC(urdc) || IS_STATE(urdc, RDC_SYNCING)) ||
1899 1899 RDC_REMOTE(h) ||
1900 1900 krdc->group->synccount > 0 ||
1901 1901 (rdc_get_vflags(urdc) & RDC_SLAVE) ||
1902 1902 (rdc_get_vflags(urdc) & RDC_VOL_FAILED) ||
1903 1903 (rdc_get_vflags(urdc) & RDC_BMP_FAILED)) {
1904 1904
1905 1905 /* sync mode, or remote io mode, or local device is dead */
1906 1906 rc = rdc_net_write(krdc->index, krdc->remote_index,
1907 1907 nsc_h, pos, len, RDC_NOSEQ, RDC_NOQUE, NULL);
1908 1908
1909 1909 if ((rc == 0) &&
1910 1910 !(rdc_get_vflags(urdc) & RDC_BMP_FAILED) &&
1911 1911 !(rdc_get_vflags(urdc) & RDC_VOL_FAILED)) {
1912 1912 if (IS_STATE(urdc, RDC_SYNCING) &&
1913 1913 !IS_STATE(urdc, RDC_FULL) ||
1914 1914 !IS_STATE(urdc, RDC_SLAVE)) {
1915 1915 mutex_enter(&krdc->syncbitmutex);
1916 1916
1917 1917 syncblockpos = LOG_TO_FBA_NUM(krdc->syncbitpos);
1918 1918
1919 1919 DTRACE_PROBE4(rdc_remote_write,
1920 1920 nsc_off_t, krdc->syncbitpos,
1921 1921 nsc_off_t, syncblockpos,
1922 1922 nsc_off_t, pos,
1923 1923 nsc_size_t, len);
1924 1924
1925 1925 /*
1926 1926 * If the current I/O's position plus length is
1927 1927 * greater then the sync block position, only
1928 1928 * clear those blocks upto sync block position
1929 1929 */
1930 1930 if (pos < syncblockpos) {
1931 1931 if ((pos + len) > syncblockpos)
1932 1932 plen = syncblockpos - pos;
1933 1933 else
1934 1934 plen = len;
1935 1935 RDC_CLR_BITMAP(krdc, pos, plen, bitmask,
1936 1936 RDC_BIT_BUMP);
1937 1937 }
1938 1938 mutex_exit(&krdc->syncbitmutex);
1939 1939 } else {
1940 1940 RDC_CLR_BITMAP(krdc, pos, len, bitmask,
1941 1941 RDC_BIT_BUMP);
1942 1942 }
1943 1943 } else if (rc != 0) {
1944 1944 rdc_group_enter(krdc);
1945 1945 rdc_set_flags_log(urdc, RDC_LOGGING,
1946 1946 "net write failed");
1947 1947 rdc_write_state(urdc);
1948 1948 if (rdc_get_vflags(urdc) & RDC_SYNCING)
1949 1949 krdc->disk_status = 1;
1950 1950 rdc_group_exit(krdc);
1951 1951 }
1952 1952 } else if (!IS_STATE(urdc, RDC_SYNCING)) {
1953 1953 DTRACE_PROBE1(async_enque_start, rdc_buf_t *, h);
1954 1954
1955 1955 ASSERT(krdc->group->synccount == 0);
1956 1956 /* async mode */
1957 1957 if ((h == NULL) || ((h->rdc_flags & RDC_ASYNC_VEC) == 0)) {
1958 1958
1959 1959 rc = _rdc_enqueue_write(krdc, pos, len, flag, NULL);
1960 1960
1961 1961 } else {
1962 1962 anon = rdc_aio_buf_get(h, krdc->index);
1963 1963 if (anon == NULL) {
1964 1964 #ifdef DEBUG
1965 1965 cmn_err(CE_WARN,
1966 1966 "!enqueue write failed for handle %p",
1967 1967 (void *) h);
1968 1968 #endif
1969 1969 return (EINVAL);
1970 1970 }
1971 1971 rc = _rdc_enqueue_write(krdc, pos, len, flag,
1972 1972 anon->rdc_abufp);
1973 1973
1974 1974 /*
1975 1975 * get rid of the aio_buf_t now, as this
1976 1976 * may not be the set that this rdc_buf
1977 1977 * was allocated on, we are done with it anyways
1978 1978 * enqueuing code frees the nsc_abuf
1979 1979 */
1980 1980 rdc_aio_buf_del(h, krdc);
1981 1981 }
1982 1982
1983 1983 } else {
1984 1984 ASSERT(IS_STATE(urdc, RDC_SYNCING));
1985 1985 ASSERT(0);
1986 1986 }
1987 1987
1988 1988 done:
1989 1989 if ((anon == NULL) && h && (h->rdc_flags & RDC_ASYNC_VEC)) {
1990 1990 /*
1991 1991 * Toss the anonymous buffer if we have one allocated.
1992 1992 */
1993 1993 anon = rdc_aio_buf_get(h, krdc->index);
1994 1994 if (anon) {
1995 1995 (void) nsc_free_buf(anon->rdc_abufp);
1996 1996 rdc_aio_buf_del(h, krdc);
1997 1997 }
1998 1998 }
1999 1999
2000 2000 return (rc);
2001 2001 }
2002 2002
2003 2003 /*
2004 2004 * _rdc_multi_write
2005 2005 *
2006 2006 * Send to multihop remote. Obeys 1 to many if present and we are crazy
2007 2007 * enough to support it.
2008 2008 *
2009 2009 */
2010 2010 int
2011 2011 _rdc_multi_write(nsc_buf_t *h, nsc_off_t pos, nsc_size_t len, int flag,
2012 2012 rdc_k_info_t *krdc)
2013 2013 {
2014 2014 rdc_u_info_t *urdc = &rdc_u_info[krdc->index];
2015 2015 rdc_k_info_t *this = krdc; /* krdc that was requested */
2016 2016 int rc, retval;
2017 2017 uint_t bitmask;
2018 2018
2019 2019 retval = rc = 0;
2020 2020 if (!RDC_HANDLE_LIMITS(h, pos, len)) {
2021 2021 cmn_err(CE_WARN,
2022 2022 "!_rdc_multi_write: bounds check: io(handle) pos %"
2023 2023 NSC_XSZFMT "(%" NSC_XSZFMT ") len %" NSC_XSZFMT "(%"
2024 2024 NSC_XSZFMT ")", pos, h->sb_pos, len, h->sb_len);
2025 2025 return (EINVAL);
2026 2026 }
2027 2027
2028 2028 /* if this is a 1 to many, set all the bits for all the sets */
2029 2029 do {
2030 2030 if (RDC_SET_BITMAP(krdc, pos, len, &bitmask) < 0) {
2031 2031 (void) nsc_uncommit(h, pos, len, flag);
2032 2032 /* set the error, but try other sets */
2033 2033 retval = EIO;
2034 2034 }
2035 2035 if (IS_MANY(krdc) && IS_STATE(urdc, RDC_PRIMARY)) {
2036 2036 rdc_many_enter(krdc);
2037 2037 for (krdc = krdc->many_next; krdc != this;
2038 2038 krdc = krdc->many_next) {
2039 2039 urdc = &rdc_u_info[krdc->index];
2040 2040 if (!IS_ENABLED(urdc))
2041 2041 continue;
2042 2042 break;
2043 2043 }
2044 2044 rdc_many_exit(krdc);
2045 2045 }
2046 2046 } while (krdc != this);
2047 2047
2048 2048 urdc = &rdc_u_info[krdc->index];
2049 2049
2050 2050 if (flag & NSC_NOBLOCK) {
2051 2051 cmn_err(CE_WARN,
2052 2052 "!_rdc_multi_write: removing unsupported NSC_NOBLOCK flag");
2053 2053 flag &= ~(NSC_NOBLOCK);
2054 2054 }
2055 2055
2056 2056 multiwrite1:
2057 2057 if ((rdc_get_vflags(urdc) & RDC_PRIMARY) &&
2058 2058 (!IS_STATE(urdc, RDC_LOGGING) ||
2059 2059 (IS_STATE(urdc, RDC_LOGGING) &&
2060 2060 IS_STATE(urdc, RDC_QUEUING)))) {
2061 2061 rc = _rdc_remote_write(krdc, NULL, h, pos, len, flag, bitmask);
2062 2062 }
2063 2063
2064 2064 if (!RDC_SUCCESS(rc) && retval == 0) {
2065 2065 retval = rc;
2066 2066 }
2067 2067
2068 2068 multiwrite2:
2069 2069 if (IS_MANY(krdc) && (rdc_get_vflags(urdc) && RDC_PRIMARY)) {
2070 2070 rdc_many_enter(krdc);
2071 2071 for (krdc = krdc->many_next; krdc != this;
2072 2072 krdc = krdc->many_next) {
2073 2073 urdc = &rdc_u_info[krdc->index];
2074 2074 if (!IS_ENABLED(urdc))
2075 2075 continue;
2076 2076 rc = 0;
2077 2077 rdc_many_exit(krdc);
2078 2078
2079 2079 goto multiwrite1;
2080 2080 }
2081 2081 rdc_many_exit(krdc);
2082 2082 }
2083 2083
2084 2084 return (retval);
2085 2085 }
2086 2086
2087 2087 void
2088 2088 _rdc_diskq_enqueue_thr(rdc_aio_t *p)
2089 2089 {
2090 2090 rdc_thrsync_t *sync = (rdc_thrsync_t *)p->next;
2091 2091 rdc_k_info_t *krdc = &rdc_k_info[p->index];
2092 2092 int rc2;
2093 2093
2094 2094
2095 2095 rc2 = rdc_diskq_enqueue(krdc, p);
2096 2096
2097 2097 /*
2098 2098 * overload flag with error return if any
2099 2099 */
2100 2100 if (!RDC_SUCCESS(rc2)) {
2101 2101 p->flag = rc2;
2102 2102 } else {
2103 2103 p->flag = 0;
2104 2104 }
2105 2105 mutex_enter(&sync->lock);
2106 2106 sync->complete++;
2107 2107 cv_broadcast(&sync->cv);
2108 2108 mutex_exit(&sync->lock);
2109 2109 }
2110 2110
2111 2111 /*
2112 2112 * _rdc_sync_write_thr
2113 2113 * syncronous write thread which writes to network while
2114 2114 * local write is occuring
2115 2115 */
2116 2116 void
2117 2117 _rdc_sync_write_thr(rdc_aio_t *p)
2118 2118 {
2119 2119 rdc_thrsync_t *sync = (rdc_thrsync_t *)p->next;
2120 2120 rdc_buf_t *h = (rdc_buf_t *)p->handle;
2121 2121 rdc_k_info_t *krdc = &rdc_k_info[p->index];
2122 2122 #ifdef DEBUG
2123 2123 rdc_u_info_t *urdc;
2124 2124 #endif
2125 2125 int rc2;
2126 2126 int bitmask;
2127 2127
2128 2128 rdc_group_enter(krdc);
2129 2129 krdc->aux_state |= RDC_AUXWRITE;
2130 2130 #ifdef DEBUG
2131 2131 urdc = &rdc_u_info[krdc->index];
2132 2132 if (!IS_ENABLED(urdc)) {
2133 2133 cmn_err(CE_WARN, "!rdc_sync_write_thr: set not enabled %s:%s",
2134 2134 urdc->secondary.file,
2135 2135 urdc->secondary.bitmap);
2136 2136 }
2137 2137 #endif
2138 2138 rdc_group_exit(krdc);
2139 2139 bitmask = p->iostatus; /* overload */
2140 2140 rc2 = _rdc_remote_write(krdc, h, &h->rdc_bufh, p->pos, p->len,
2141 2141 p->flag, bitmask);
2142 2142
2143 2143
2144 2144 /*
2145 2145 * overload flag with error return if any
2146 2146 */
2147 2147 if (!RDC_SUCCESS(rc2)) {
2148 2148 p->flag = rc2;
2149 2149 } else {
2150 2150 p->flag = 0;
2151 2151 }
2152 2152
2153 2153 rdc_group_enter(krdc);
2154 2154 krdc->aux_state &= ~RDC_AUXWRITE;
2155 2155 rdc_group_exit(krdc);
2156 2156
2157 2157 mutex_enter(&sync->lock);
2158 2158 sync->complete++;
2159 2159 cv_broadcast(&sync->cv);
2160 2160 mutex_exit(&sync->lock);
2161 2161 }
2162 2162
2163 2163 /*
2164 2164 * _rdc_write
2165 2165 *
2166 2166 * Commit changes to the buffer locally and send remote.
2167 2167 *
2168 2168 * If this write is whilst the local primary volume is being synced,
2169 2169 * then we write the remote end first to ensure that the new data
2170 2170 * cannot be overwritten by a concurrent sync operation.
2171 2171 */
2172 2172
2173 2173 static int
2174 2174 _rdc_write(rdc_buf_t *h, nsc_off_t pos, nsc_size_t len, int flag)
2175 2175 {
2176 2176 rdc_k_info_t *krdc = h->rdc_fd->rdc_info;
2177 2177 rdc_u_info_t *urdc = &rdc_u_info[krdc->index];
2178 2178 rdc_k_info_t *this;
2179 2179 rdc_k_info_t *multi = NULL;
2180 2180 int remote = RDC_REMOTE(h);
2181 2181 int rc1, rc2;
2182 2182 uint_t bitmask;
2183 2183 int first;
2184 2184 int rsync;
2185 2185 int nthr;
2186 2186 int winddown;
2187 2187 int thrrc = 0;
2188 2188 rdc_aio_t *bp[SNDR_MAXTHREADS];
2189 2189 aio_buf_t *anon;
2190 2190 nsthread_t *tp;
2191 2191 rdc_thrsync_t *sync = &h->rdc_sync;
2192 2192
2193 2193 /* If this is the multi-hop secondary, move along to the primary */
2194 2194 if (IS_MULTI(krdc) && !IS_PRIMARY(urdc)) {
2195 2195 multi = krdc;
2196 2196 krdc = krdc->multi_next;
2197 2197 urdc = &rdc_u_info[krdc->index];
2198 2198
2199 2199 if (!IS_ENABLED(urdc)) {
2200 2200 krdc = h->rdc_fd->rdc_info;
2201 2201 urdc = &rdc_u_info[krdc->index];
2202 2202 multi = NULL;
2203 2203 }
2204 2204 }
2205 2205 this = krdc;
2206 2206
2207 2207 rsync = (IS_PRIMARY(urdc)) && (IS_SLAVE(urdc));
2208 2208
2209 2209 /*
2210 2210 * If this is a many group with a reverse sync in progress and
2211 2211 * this is not the slave krdc/urdc, then search for the slave
2212 2212 * so that we can do the remote io to the correct secondary
2213 2213 * before the local io.
2214 2214 */
2215 2215 if (rsync && !(IS_SLAVE(urdc))) {
2216 2216 rdc_many_enter(krdc);
2217 2217 for (krdc = krdc->many_next; krdc != this;
2218 2218 krdc = krdc->many_next) {
2219 2219 urdc = &rdc_u_info[krdc->index];
2220 2220 if (!IS_ENABLED(urdc))
2221 2221 continue;
2222 2222 if (rdc_get_vflags(urdc) & RDC_SLAVE)
2223 2223 break;
2224 2224 }
2225 2225 rdc_many_exit(krdc);
2226 2226
2227 2227 this = krdc;
2228 2228 }
2229 2229
2230 2230 urdc = &rdc_u_info[krdc->index];
2231 2231
2232 2232 rc1 = rc2 = 0;
2233 2233 first = 1;
2234 2234 nthr = 0;
2235 2235 if (!RDC_HANDLE_LIMITS(&h->rdc_bufh, pos, len)) {
2236 2236 cmn_err(CE_WARN,
2237 2237 "!_rdc_write: bounds check: io(handle) pos %" NSC_XSZFMT
2238 2238 "(%" NSC_XSZFMT ") len %" NSC_XSZFMT "(%" NSC_XSZFMT ")",
2239 2239 pos, h->rdc_bufh.sb_pos, len, h->rdc_bufh.sb_len);
2240 2240 h->rdc_bufh.sb_error = EINVAL;
2241 2241 return (h->rdc_bufh.sb_error);
2242 2242 }
2243 2243
2244 2244 DTRACE_PROBE(rdc_write_bitmap_start);
2245 2245
2246 2246 /* if this is a 1 to many, set all the bits for all the sets */
2247 2247 do {
2248 2248 if (RDC_SET_BITMAP(krdc, pos, len, &bitmask) < 0) {
2249 2249 if (rdc_eio_nobmp) {
2250 2250 (void) nsc_uncommit
2251 2251 (h->rdc_bufp, pos, len, flag);
2252 2252 /* set the error, but try the other sets */
2253 2253 h->rdc_bufh.sb_error = EIO;
2254 2254 }
2255 2255 }
2256 2256
2257 2257 if (IS_MANY(krdc) && IS_STATE(urdc, RDC_PRIMARY)) {
2258 2258 rdc_many_enter(krdc);
2259 2259 for (krdc = krdc->many_next; krdc != this;
2260 2260 krdc = krdc->many_next) {
2261 2261 urdc = &rdc_u_info[krdc->index];
2262 2262 if (!IS_ENABLED(urdc))
2263 2263 continue;
2264 2264 break;
2265 2265 }
2266 2266 rdc_many_exit(krdc);
2267 2267 }
2268 2268
2269 2269 } while (krdc != this);
2270 2270
2271 2271 urdc = &rdc_u_info[krdc->index];
2272 2272
2273 2273 DTRACE_PROBE(rdc_write_bitmap_end);
2274 2274
2275 2275 write1:
2276 2276 /* just in case we switch mode during write */
2277 2277 if (IS_ASYNC(urdc) && (!IS_STATE(urdc, RDC_SYNCING)) &&
2278 2278 (!IS_STATE(urdc, RDC_LOGGING) ||
2279 2279 IS_STATE(urdc, RDC_QUEUING))) {
2280 2280 h->rdc_flags |= RDC_ASYNC_BUF;
2281 2281 }
2282 2282 if (BUF_IS_ASYNC(h)) {
2283 2283 /*
2284 2284 * We are async mode
2285 2285 */
2286 2286 aio_buf_t *p;
2287 2287 DTRACE_PROBE(rdc_write_async_start);
2288 2288
2289 2289 if ((krdc->type_flag & RDC_DISABLEPEND) ||
2290 2290 ((IS_STATE(urdc, RDC_LOGGING) &&
2291 2291 !IS_STATE(urdc, RDC_QUEUING)))) {
2292 2292 goto localwrite;
2293 2293 }
2294 2294 if (IS_STATE(urdc, RDC_VOL_FAILED)) {
2295 2295 /*
2296 2296 * overload remote as we don't want to do local
2297 2297 * IO later. forge ahead with async
2298 2298 */
2299 2299 remote++;
2300 2300 }
2301 2301 if ((IS_STATE(urdc, RDC_SYNCING)) ||
2302 2302 (IS_STATE(urdc, RDC_LOGGING) &&
2303 2303 !IS_STATE(urdc, RDC_QUEUING))) {
2304 2304 goto localwrite;
2305 2305 }
2306 2306
2307 2307 p = rdc_aio_buf_add(krdc->index, h);
2308 2308 if (p == NULL) {
2309 2309 #ifdef DEBUG
2310 2310 cmn_err(CE_WARN,
2311 2311 "!rdc_alloc_buf aio_buf allocation failed");
2312 2312 #endif
2313 2313 goto localwrite;
2314 2314 }
2315 2315
2316 2316 mutex_enter(&h->aio_lock);
2317 2317
2318 2318 DTRACE_PROBE(rdc_write_async__allocabuf_start);
2319 2319 rc1 = nsc_alloc_abuf(pos, len, 0, &p->rdc_abufp);
2320 2320 DTRACE_PROBE(rdc_write_async__allocabuf_end);
2321 2321 if (!RDC_SUCCESS(rc1)) {
2322 2322 #ifdef DEBUG
2323 2323 cmn_err(CE_WARN,
2324 2324 "!rdc_alloc_buf NSC_ANON allocation failed rc %d",
2325 2325 rc1);
2326 2326 #endif
2327 2327 mutex_exit(&h->aio_lock);
2328 2328 goto localwrite;
2329 2329 }
2330 2330 h->rdc_flags |= RDC_ASYNC_VEC;
2331 2331 mutex_exit(&h->aio_lock);
2332 2332
2333 2333 /*
2334 2334 * Copy buffer into anonymous buffer
2335 2335 */
2336 2336
2337 2337 DTRACE_PROBE(rdc_write_async_nsccopy_start);
2338 2338 rc1 =
2339 2339 nsc_copy(&h->rdc_bufh, p->rdc_abufp, pos, pos, len);
2340 2340 DTRACE_PROBE(rdc_write_async_nsccopy_end);
2341 2341 if (!RDC_SUCCESS(rc1)) {
2342 2342 #ifdef DEBUG
2343 2343 cmn_err(CE_WARN,
2344 2344 "!_rdc_write: nsc_copy failed rc=%d state %x",
2345 2345 rc1, rdc_get_vflags(urdc));
2346 2346 #endif
2347 2347 rc1 = nsc_free_buf(p->rdc_abufp);
2348 2348 rdc_aio_buf_del(h, krdc);
2349 2349 rdc_group_enter(krdc);
2350 2350 rdc_group_log(krdc, RDC_FLUSH|RDC_OTHERREMOTE,
2351 2351 "nsc_copy failure");
2352 2352 rdc_group_exit(krdc);
2353 2353 }
2354 2354 DTRACE_PROBE(rdc_write_async_end);
2355 2355
2356 2356 /*
2357 2357 * using a diskq, launch a thread to queue it
2358 2358 * and free the aio->h and aio
2359 2359 * if the thread fails, do it the old way (see localwrite)
2360 2360 */
2361 2361
2362 2362 if (RDC_IS_DISKQ(krdc->group)) {
2363 2363
2364 2364 if (nthr >= SNDR_MAXTHREADS) {
2365 2365 #ifdef DEBUG
2366 2366 cmn_err(CE_NOTE, "!nthr overrun in _rdc_write");
2367 2367 #endif
2368 2368 thrrc = ENOEXEC;
2369 2369 goto localwrite;
2370 2370 }
2371 2371
2372 2372 anon = rdc_aio_buf_get(h, krdc->index);
2373 2373 if (anon == NULL) {
2374 2374 #ifdef DEBUG
2375 2375 cmn_err(CE_WARN, "!rdc_aio_buf_get failed for "
2376 2376 "%p", (void *)h);
2377 2377 #endif
2378 2378 thrrc = ENOEXEC;
2379 2379 goto localwrite;
2380 2380 }
2381 2381
2382 2382 /* get a populated rdc_aio_t */
2383 2383 bp[nthr] =
2384 2384 rdc_aio_tbuf_get(sync, anon->rdc_abufp, pos, len,
2385 2385 flag, krdc->index, bitmask);
2386 2386
2387 2387 if (bp[nthr] == NULL) {
2388 2388 #ifdef DEBUG
2389 2389 cmn_err(CE_NOTE, "!_rdcwrite: "
2390 2390 "kmem_alloc failed bp aio (1)");
2391 2391 #endif
2392 2392 thrrc = ENOEXEC;
2393 2393 goto localwrite;
2394 2394 }
2395 2395 /* start the queue io */
2396 2396 tp = nst_create(_rdc_ioset, _rdc_diskq_enqueue_thr,
2397 2397 (void *)bp[nthr], NST_SLEEP);
2398 2398
2399 2399 if (tp == NULL) {
2400 2400 #ifdef DEBUG
2401 2401 cmn_err(CE_NOTE,
2402 2402 "!_rdcwrite: nst_create failure");
2403 2403 #endif
2404 2404 thrrc = ENOEXEC;
2405 2405 } else {
2406 2406 mutex_enter(&(sync->lock));
2407 2407 sync->threads++;
2408 2408 mutex_exit(&(sync->lock));
2409 2409 nthr++;
2410 2410
2411 2411 }
2412 2412 /*
2413 2413 * the handle that is to be enqueued is now in
2414 2414 * the rdc_aio_t, and will be freed there.
2415 2415 * dump the aio_t now. If this is 1 to many
2416 2416 * we may not do this in _rdc_free_buf()
2417 2417 * if this was not the index that the rdc_buf_t
2418 2418 * was allocated on.
2419 2419 */
2420 2420 rdc_aio_buf_del(h, krdc);
2421 2421
2422 2422 }
2423 2423 } /* end of async */
2424 2424
2425 2425 /*
2426 2426 * We try to overlap local and network IO for the sync case
2427 2427 * (we already do it for async)
2428 2428 * If one to many, we need to track the resulting nst_thread
2429 2429 * so we don't trash the nsc_buf on a free
2430 2430 * Start network IO first then do local (sync only)
2431 2431 */
2432 2432
2433 2433 if (IS_PRIMARY(urdc) && !IS_STATE(urdc, RDC_LOGGING) &&
2434 2434 !BUF_IS_ASYNC(h)) {
2435 2435 /*
2436 2436 * if forward syncing, we must do local IO first
2437 2437 * then remote io. Don't spawn thread
2438 2438 */
2439 2439 if (!rsync && (IS_STATE(urdc, RDC_SYNCING))) {
2440 2440 thrrc = ENOEXEC;
2441 2441 goto localwrite;
2442 2442 }
2443 2443 if (IS_MULTI(krdc)) {
2444 2444 rdc_k_info_t *ktmp;
2445 2445 rdc_u_info_t *utmp;
2446 2446
2447 2447 ktmp = krdc->multi_next;
2448 2448 utmp = &rdc_u_info[ktmp->index];
2449 2449 if (IS_ENABLED(utmp))
2450 2450 multi = ktmp;
2451 2451 }
2452 2452 if (nthr >= SNDR_MAXTHREADS) {
2453 2453 #ifdef DEBUG
2454 2454 cmn_err(CE_NOTE, "!nthr overrun in _rdc_write");
2455 2455 #endif
2456 2456 thrrc = ENOEXEC;
2457 2457 goto localwrite;
2458 2458 }
2459 2459
2460 2460 bp[nthr] = rdc_aio_tbuf_get(sync, h, pos, len,
2461 2461 flag, krdc->index, bitmask);
2462 2462
2463 2463 if (bp[nthr] == NULL) {
2464 2464 thrrc = ENOEXEC;
2465 2465 goto localwrite;
2466 2466 }
2467 2467 tp = nst_create(_rdc_ioset, _rdc_sync_write_thr,
2468 2468 (void *)bp[nthr], NST_SLEEP);
2469 2469 if (tp == NULL) {
2470 2470 #ifdef DEBUG
2471 2471 cmn_err(CE_NOTE, "!_rdcwrite: nst_create failure");
2472 2472 #endif
2473 2473 thrrc = ENOEXEC;
2474 2474 } else {
2475 2475 mutex_enter(&(sync->lock));
2476 2476 sync->threads++;
2477 2477 mutex_exit(&(sync->lock));
2478 2478 nthr++;
2479 2479 }
2480 2480 }
2481 2481 localwrite:
2482 2482 if (!remote && !rsync && first) {
2483 2483 DTRACE_PROBE(rdc_write_nscwrite_start);
2484 2484 rc1 = nsc_write(h->rdc_bufp, pos, len, flag);
2485 2485 DTRACE_PROBE(rdc_write_nscwrite_end);
2486 2486 if (!RDC_SUCCESS(rc1)) {
2487 2487 rdc_many_enter(krdc);
2488 2488 if (IS_PRIMARY(urdc))
2489 2489 /* Primary, so reverse sync needed */
2490 2490 rdc_set_mflags(urdc, RDC_RSYNC_NEEDED);
2491 2491 else
2492 2492 /* Secondary, so sync needed */
2493 2493 rdc_set_flags(urdc, RDC_SYNC_NEEDED);
2494 2494 rdc_set_flags_log(urdc, RDC_VOL_FAILED,
2495 2495 "local write failed");
2496 2496 rdc_many_exit(krdc);
2497 2497 rdc_write_state(urdc);
2498 2498 }
2499 2499 }
2500 2500
2501 2501 /*
2502 2502 * This is where we either enqueue async IO for the flusher
2503 2503 * or do sync IO in the case of an error in thread creation
2504 2504 * or we are doing a forward sync
2505 2505 * NOTE: if we are async, and using a diskq, we have
2506 2506 * already enqueued this write.
2507 2507 * _rdc_remote_write will end up enqueuueing to memory,
2508 2508 * or in case of a thread creation error above, try again
2509 2509 * enqueue the diskq write if thrrc == ENOEXEC
2510 2510 */
2511 2511 if ((IS_PRIMARY(urdc)) && (thrrc == ENOEXEC) ||
2512 2512 (BUF_IS_ASYNC(h) && !RDC_IS_DISKQ(krdc->group))) {
2513 2513 thrrc = 0;
2514 2514 if (IS_MULTI(krdc)) {
2515 2515 rdc_k_info_t *ktmp;
2516 2516 rdc_u_info_t *utmp;
2517 2517
2518 2518 ktmp = krdc->multi_next;
2519 2519 utmp = &rdc_u_info[ktmp->index];
2520 2520 if (IS_ENABLED(utmp))
2521 2521 multi = ktmp;
2522 2522 }
2523 2523
2524 2524 DTRACE_PROBE(rdc_write_remote_start);
2525 2525
2526 2526 rc2 = _rdc_remote_write(krdc, h, &h->rdc_bufh,
2527 2527 pos, len, flag, bitmask);
2528 2528
2529 2529 DTRACE_PROBE(rdc_rdcwrite_remote_end);
2530 2530 }
2531 2531
2532 2532 if (!RDC_SUCCESS(rc1)) {
2533 2533 if ((IS_PRIMARY(urdc)) && !RDC_SUCCESS(rc2)) {
2534 2534 h->rdc_bufh.sb_error = rc1;
2535 2535 }
2536 2536 } else if ((remote || rsync) && !RDC_SUCCESS(rc2)) {
2537 2537 h->rdc_bufh.sb_error = rc2;
2538 2538 }
2539 2539 write2:
2540 2540 /*
2541 2541 * If one to many, jump back into the loop to continue IO
2542 2542 */
2543 2543 if (IS_MANY(krdc) && (IS_PRIMARY(urdc))) {
2544 2544 rdc_many_enter(krdc);
2545 2545 for (krdc = krdc->many_next; krdc != this;
2546 2546 krdc = krdc->many_next) {
2547 2547 urdc = &rdc_u_info[krdc->index];
2548 2548 if (!IS_ENABLED(urdc))
2549 2549 continue;
2550 2550 rc2 = first = 0;
2551 2551 h->rdc_flags &= ~RDC_ASYNC_BUF;
2552 2552 rdc_many_exit(krdc);
2553 2553 goto write1;
2554 2554 }
2555 2555 rdc_many_exit(krdc);
2556 2556 }
2557 2557 urdc = &rdc_u_info[krdc->index];
2558 2558
2559 2559 /*
2560 2560 * collect all of our threads if any
2561 2561 */
2562 2562 if (nthr) {
2563 2563
2564 2564 mutex_enter(&(sync->lock));
2565 2565 /* wait for the threads */
2566 2566 while (sync->complete != sync->threads) {
2567 2567 cv_wait(&(sync->cv), &(sync->lock));
2568 2568 }
2569 2569 mutex_exit(&(sync->lock));
2570 2570
2571 2571 /* collect status */
2572 2572
2573 2573 winddown = 0;
2574 2574 while (winddown < nthr) {
2575 2575 /*
2576 2576 * Get any error return from thread
2577 2577 */
2578 2578 if ((remote || rsync) && bp[winddown]->flag) {
2579 2579 h->rdc_bufh.sb_error = bp[winddown]->flag;
2580 2580 }
2581 2581 if (bp[winddown])
2582 2582 kmem_free(bp[winddown], sizeof (rdc_aio_t));
2583 2583 winddown++;
2584 2584 }
2585 2585 }
2586 2586
2587 2587 if (rsync && !(IS_STATE(urdc, RDC_VOL_FAILED))) {
2588 2588 rc1 = nsc_write(h->rdc_bufp, pos, len, flag);
2589 2589 if (!RDC_SUCCESS(rc1)) {
2590 2590 /* rsync, so reverse sync needed already set */
2591 2591 rdc_many_enter(krdc);
2592 2592 rdc_set_flags_log(urdc, RDC_VOL_FAILED,
2593 2593 "rsync local write failed");
2594 2594 rdc_many_exit(krdc);
2595 2595 rdc_write_state(urdc);
2596 2596
2597 2597 /*
2598 2598 * only report the error if a remote error
2599 2599 * occurred as well.
2600 2600 */
2601 2601 if (h->rdc_bufh.sb_error)
2602 2602 h->rdc_bufh.sb_error = rc1;
2603 2603 }
2604 2604 }
2605 2605
2606 2606 if (multi) {
2607 2607 /* Multi-hop secondary, just set bits in the bitmap */
2608 2608 (void) RDC_SET_BITMAP(multi, pos, len, &bitmask);
2609 2609 }
2610 2610
2611 2611 return (h->rdc_bufh.sb_error);
2612 2612 }
2613 2613
2614 2614
2615 2615 static void
2616 2616 _rdc_bzero(nsc_buf_t *h, nsc_off_t pos, nsc_size_t len)
2617 2617 {
2618 2618 nsc_vec_t *v;
2619 2619 uchar_t *a;
2620 2620 size_t sz;
2621 2621 int l;
2622 2622
2623 2623 if (!RDC_HANDLE_LIMITS(h, pos, len)) {
2624 2624 cmn_err(CE_WARN,
2625 2625 "!_rdc_bzero: bounds check: io(handle) pos %" NSC_XSZFMT
2626 2626 "(%" NSC_XSZFMT ") len %" NSC_XSZFMT "(%" NSC_XSZFMT ")",
2627 2627 pos, h->sb_pos, len, h->sb_len);
2628 2628 return;
2629 2629 }
2630 2630
2631 2631 if (!len)
2632 2632 return;
2633 2633
2634 2634 /* find starting point */
2635 2635
2636 2636 v = h->sb_vec;
2637 2637 pos -= h->sb_pos;
2638 2638
2639 2639 for (; pos >= FBA_NUM(v->sv_len); v++)
2640 2640 pos -= FBA_NUM(v->sv_len);
2641 2641
2642 2642 a = v->sv_addr + FBA_SIZE(pos);
2643 2643 l = v->sv_len - FBA_SIZE(pos);
2644 2644
2645 2645 /* zero */
2646 2646
2647 2647 len = FBA_SIZE(len); /* convert to bytes */
2648 2648
2649 2649 while (len) {
2650 2650 if (!a) /* end of vec */
2651 2651 break;
2652 2652
2653 2653 sz = (size_t)min((nsc_size_t)l, len);
2654 2654
2655 2655 bzero(a, sz);
2656 2656
2657 2657 len -= sz;
2658 2658 l -= sz;
2659 2659 a += sz;
2660 2660
2661 2661 if (!l) {
2662 2662 v++;
2663 2663 a = v->sv_addr;
2664 2664 l = v->sv_len;
2665 2665 }
2666 2666 }
2667 2667 }
2668 2668
2669 2669
2670 2670 /*
2671 2671 * _rdc_zero
2672 2672 *
2673 2673 * Zero and commit the specified area of the buffer.
2674 2674 *
2675 2675 * If this write is whilst the local primary volume is being synced,
2676 2676 * then we write the remote end first to ensure that the new data
2677 2677 * cannot be overwritten by a concurrent sync operation.
2678 2678 */
2679 2679
2680 2680 static int
2681 2681 _rdc_zero(rdc_buf_t *h, nsc_off_t pos, nsc_size_t len, int flag)
2682 2682 {
2683 2683 rdc_k_info_t *krdc = h->rdc_fd->rdc_info;
2684 2684 rdc_u_info_t *urdc = &rdc_u_info[krdc->index];
2685 2685 rdc_k_info_t *this;
2686 2686 rdc_k_info_t *multi = NULL;
2687 2687 int remote = RDC_REMOTE(h);
2688 2688 int rc1, rc2;
2689 2689 uint_t bitmask;
2690 2690 int first;
2691 2691 int rsync;
2692 2692
2693 2693 /* If this is the multi-hop secondary, move along to the primary */
2694 2694 if (IS_MULTI(krdc) && !(rdc_get_vflags(urdc) & RDC_PRIMARY)) {
2695 2695 multi = krdc;
2696 2696 krdc = krdc->multi_next;
2697 2697 urdc = &rdc_u_info[krdc->index];
2698 2698
2699 2699 if (!IS_ENABLED(urdc)) {
2700 2700 krdc = h->rdc_fd->rdc_info;
2701 2701 urdc = &rdc_u_info[krdc->index];
2702 2702 multi = NULL;
2703 2703 }
2704 2704 }
2705 2705 this = krdc;
2706 2706
2707 2707 rsync = ((rdc_get_vflags(urdc) & RDC_PRIMARY) &&
2708 2708 (rdc_get_mflags(urdc) & RDC_SLAVE));
2709 2709
2710 2710 /*
2711 2711 * If this is a many group with a reverse sync in progress and
2712 2712 * this is not the slave krdc/urdc, then search for the slave
2713 2713 * so that we can do the remote io to the correct secondary
2714 2714 * before the local io.
2715 2715 */
2716 2716 if (rsync && !(rdc_get_vflags(urdc) & RDC_SLAVE)) {
2717 2717 rdc_many_enter(krdc);
2718 2718 for (krdc = krdc->many_next; krdc != this;
2719 2719 krdc = krdc->many_next) {
2720 2720 urdc = &rdc_u_info[krdc->index];
2721 2721 if (!IS_ENABLED(urdc))
2722 2722 continue;
2723 2723 if (rdc_get_vflags(urdc) & RDC_SLAVE)
2724 2724 break;
2725 2725 }
2726 2726 rdc_many_exit(krdc);
2727 2727
2728 2728 this = krdc;
2729 2729 }
2730 2730
2731 2731 rc1 = rc2 = 0;
2732 2732 first = 1;
2733 2733
2734 2734 if (!RDC_HANDLE_LIMITS(&h->rdc_bufh, pos, len)) {
2735 2735 cmn_err(CE_WARN,
2736 2736 "!_rdc_zero: bounds check: io(handle) pos %" NSC_XSZFMT
2737 2737 "(%" NSC_XSZFMT ") len %" NSC_XSZFMT "(%" NSC_XSZFMT ")",
2738 2738 pos, h->rdc_bufh.sb_pos, len, h->rdc_bufh.sb_len);
2739 2739 h->rdc_bufh.sb_error = EINVAL;
2740 2740 return (h->rdc_bufh.sb_error);
2741 2741 }
2742 2742
2743 2743 zero1:
2744 2744 if (RDC_SET_BITMAP(krdc, pos, len, &bitmask) < 0) {
2745 2745 (void) nsc_uncommit(h->rdc_bufp, pos, len, flag);
2746 2746 h->rdc_bufh.sb_error = EIO;
2747 2747 goto zero2;
2748 2748 }
2749 2749
2750 2750 if (IS_ASYNC(urdc)) {
2751 2751 /*
2752 2752 * We are async mode
2753 2753 */
2754 2754 aio_buf_t *p;
2755 2755
2756 2756 if ((krdc->type_flag & RDC_DISABLEPEND) ||
2757 2757 (rdc_get_vflags(urdc) & RDC_LOGGING)) {
2758 2758 mutex_exit(&krdc->group->ra_queue.net_qlock);
2759 2759 goto localzero;
2760 2760 }
2761 2761
2762 2762 if ((rdc_get_vflags(urdc) & RDC_VOL_FAILED) ||
2763 2763 (rdc_get_vflags(urdc) & RDC_BMP_FAILED)) {
2764 2764 mutex_exit(&krdc->group->ra_queue.net_qlock);
2765 2765 goto zero2;
2766 2766 }
2767 2767 if (rdc_get_vflags(urdc) & RDC_LOGGING) {
2768 2768 mutex_exit(&krdc->group->ra_queue.net_qlock);
2769 2769 goto localzero;
2770 2770 }
2771 2771 p = rdc_aio_buf_add(krdc->index, h);
2772 2772 if (p == NULL) {
2773 2773 #ifdef DEBUG
2774 2774 cmn_err(CE_WARN,
2775 2775 "!rdc_alloc_buf aio_buf allocation failed");
2776 2776 #endif
2777 2777 goto localzero;
2778 2778 }
2779 2779 mutex_enter(&h->aio_lock);
2780 2780 rc1 = nsc_alloc_abuf(pos, len, 0, &p->rdc_abufp);
2781 2781 if (!RDC_SUCCESS(rc1)) {
2782 2782 #ifdef DEBUG
2783 2783 cmn_err(CE_WARN,
2784 2784 "!rdc_alloc_buf NSC_ANON allocation failed rc %d",
2785 2785 rc1);
2786 2786 #endif
2787 2787 mutex_exit(&h->aio_lock);
2788 2788 goto localzero;
2789 2789 }
2790 2790 h->rdc_flags |= RDC_ASYNC_VEC;
2791 2791 mutex_exit(&h->aio_lock);
2792 2792
2793 2793 /*
2794 2794 * Copy buffer into anonymous buffer
2795 2795 */
2796 2796
2797 2797 rc1 = nsc_zero(p->rdc_abufp, pos, len, flag);
2798 2798 if (!RDC_SUCCESS(rc1)) {
2799 2799 #ifdef DEBUG
2800 2800 cmn_err(CE_WARN,
2801 2801 "!_rdc_zero: nsc_zero failed rc=%d state %x",
2802 2802 rc1, rdc_get_vflags(urdc));
2803 2803 #endif
2804 2804 rc1 = nsc_free_buf(p->rdc_abufp);
2805 2805 rdc_aio_buf_del(h, krdc);
2806 2806 rdc_group_enter(krdc);
2807 2807 rdc_group_log(krdc, RDC_FLUSH | RDC_OTHERREMOTE,
2808 2808 "nsc_zero failed");
2809 2809 rdc_group_exit(krdc);
2810 2810 }
2811 2811 } /* end of async */
2812 2812
2813 2813 localzero:
2814 2814
2815 2815 if (flag & NSC_NOBLOCK) {
2816 2816 cmn_err(CE_WARN,
2817 2817 "!_rdc_zero: removing unsupported NSC_NOBLOCK flag");
2818 2818 flag &= ~(NSC_NOBLOCK);
2819 2819 }
2820 2820
2821 2821 if (!remote && !rsync && first) {
2822 2822 rc1 = nsc_zero(h->rdc_bufp, pos, len, flag);
2823 2823 if (!RDC_SUCCESS(rc1)) {
2824 2824 ASSERT(rdc_get_vflags(urdc) & RDC_PRIMARY);
2825 2825 rdc_many_enter(krdc);
2826 2826 /* Primary, so reverse sync needed */
2827 2827 rdc_set_mflags(urdc, RDC_RSYNC_NEEDED);
2828 2828 rdc_set_flags_log(urdc, RDC_VOL_FAILED,
2829 2829 "nsc_zero failed");
2830 2830 rdc_many_exit(krdc);
2831 2831 rdc_write_state(urdc);
2832 2832 }
2833 2833 }
2834 2834
2835 2835 /*
2836 2836 * send new data to remote end - nsc_zero has zero'd
2837 2837 * the data in the buffer, or _rdc_bzero will be used below.
2838 2838 */
2839 2839
2840 2840 if (rdc_get_vflags(urdc) & RDC_PRIMARY) {
2841 2841 if (first && (remote || rsync || !RDC_SUCCESS(rc1))) {
2842 2842 /* bzero so that we can send new data to remote node */
2843 2843 _rdc_bzero(&h->rdc_bufh, pos, len);
2844 2844 }
2845 2845
2846 2846 if (IS_MULTI(krdc)) {
2847 2847 rdc_k_info_t *ktmp;
2848 2848 rdc_u_info_t *utmp;
2849 2849
2850 2850 ktmp = krdc->multi_next;
2851 2851 utmp = &rdc_u_info[ktmp->index];
2852 2852 if (IS_ENABLED(utmp))
2853 2853 multi = ktmp;
2854 2854 }
2855 2855
2856 2856 rc2 = _rdc_remote_write(krdc, h, &h->rdc_bufh,
2857 2857 pos, len, flag, bitmask);
2858 2858 }
2859 2859
2860 2860 if (!RDC_SUCCESS(rc1)) {
2861 2861 if ((rdc_get_vflags(urdc) & RDC_PRIMARY) && !RDC_SUCCESS(rc2)) {
2862 2862 h->rdc_bufh.sb_error = rc1;
2863 2863 }
2864 2864 } else if ((remote || rsync) && !RDC_SUCCESS(rc2)) {
2865 2865 h->rdc_bufh.sb_error = rc2;
2866 2866 }
2867 2867
2868 2868 zero2:
2869 2869 if (IS_MANY(krdc) && (rdc_get_vflags(urdc) && RDC_PRIMARY)) {
2870 2870 rdc_many_enter(krdc);
2871 2871 for (krdc = krdc->many_next; krdc != this;
2872 2872 krdc = krdc->many_next) {
2873 2873 urdc = &rdc_u_info[krdc->index];
2874 2874 if (!IS_ENABLED(urdc))
2875 2875 continue;
2876 2876 rc2 = first = 0;
2877 2877 rdc_many_exit(krdc);
2878 2878 goto zero1;
2879 2879 }
2880 2880 rdc_many_exit(krdc);
2881 2881 }
2882 2882
2883 2883 if (rsync && !(rdc_get_vflags(urdc) & RDC_VOL_FAILED)) {
2884 2884 rc1 = nsc_write(h->rdc_bufp, pos, len, flag);
2885 2885 if (!RDC_SUCCESS(rc1)) {
2886 2886 /* rsync, so reverse sync needed already set */
2887 2887 rdc_many_enter(krdc);
2888 2888 rdc_set_flags_log(urdc, RDC_VOL_FAILED,
2889 2889 "nsc_write failed");
2890 2890 rdc_many_exit(krdc);
2891 2891 rdc_write_state(urdc);
2892 2892
2893 2893 /*
2894 2894 * only report the error if a remote error
2895 2895 * occurred as well.
2896 2896 */
2897 2897 if (h->rdc_bufh.sb_error)
2898 2898 h->rdc_bufh.sb_error = rc1;
2899 2899 }
2900 2900 }
2901 2901
2902 2902 if (multi) {
2903 2903 /* Multi-hop secondary, just set bits in the bitmap */
2904 2904 (void) RDC_SET_BITMAP(multi, pos, len, &bitmask);
2905 2905 }
2906 2906
2907 2907 return (h->rdc_bufh.sb_error);
2908 2908 }
2909 2909
2910 2910
2911 2911 /*
2912 2912 * _rdc_uncommit
2913 2913 * - refresh specified data region in the buffer to prevent the cache
2914 2914 * serving the scribbled on data back to another client.
2915 2915 *
2916 2916 * Only needs to happen on the local node. If in remote io mode, then
2917 2917 * just return 0 - we do not cache the data on the local node and the
2918 2918 * changed data will not have made it to the cache on the other node,
2919 2919 * so it has no need to uncommit.
2920 2920 */
2921 2921
2922 2922 static int
2923 2923 _rdc_uncommit(rdc_buf_t *h, nsc_off_t pos, nsc_size_t len, int flag)
2924 2924 {
2925 2925 int remote = RDC_REMOTE(h);
2926 2926 int rc = 0;
2927 2927
2928 2928 if (!RDC_HANDLE_LIMITS(&h->rdc_bufh, pos, len)) {
2929 2929 cmn_err(CE_WARN,
2930 2930 "!_rdc_uncommit: bounds check: io(handle) pos %" NSC_XSZFMT
2931 2931 "(%" NSC_XSZFMT ") len %" NSC_XSZFMT "(%" NSC_XSZFMT ")",
2932 2932 pos, h->rdc_bufh.sb_pos, len, h->rdc_bufh.sb_len);
2933 2933 h->rdc_bufh.sb_error = EINVAL;
2934 2934 return (h->rdc_bufh.sb_error);
2935 2935 }
2936 2936
2937 2937 if (flag & NSC_NOBLOCK) {
2938 2938 cmn_err(CE_WARN,
2939 2939 "!_rdc_uncommit: removing unsupported NSC_NOBLOCK flag");
2940 2940 flag &= ~(NSC_NOBLOCK);
2941 2941 }
2942 2942
2943 2943 if (!remote) {
2944 2944 rc = nsc_uncommit(h->rdc_bufp, pos, len, flag);
2945 2945 }
2946 2946
2947 2947 if (!RDC_SUCCESS(rc))
2948 2948 h->rdc_bufh.sb_error = rc;
2949 2949
2950 2950 return (rc);
2951 2951 }
2952 2952
2953 2953
2954 2954 /*
2955 2955 * _rdc_trksize
2956 2956 *
2957 2957 * only needs to happen on local node.
↓ open down ↓ |
2957 lines elided |
↑ open up ↑ |
2958 2958 */
2959 2959
2960 2960 static int
2961 2961 _rdc_trksize(rdc_fd_t *rfd, nsc_size_t trksize)
2962 2962 {
2963 2963 return (nsc_set_trksize(RDC_FD(rfd), trksize));
2964 2964 }
2965 2965
2966 2966
2967 2967 static nsc_def_t _rdc_fd_def[] = {
2968 - "Attach", (uintptr_t)_rdc_attach_fd, 0,
2969 - "Pinned", (uintptr_t)_rdc_pinned, 0,
2970 - "Unpinned", (uintptr_t)_rdc_unpinned, 0,
2971 - 0, 0, 0
2968 + { "Attach", (uintptr_t)_rdc_attach_fd, 0 },
2969 + { "Pinned", (uintptr_t)_rdc_pinned, 0 },
2970 + { "Unpinned", (uintptr_t)_rdc_unpinned, 0 },
2971 + { NULL, (uintptr_t)NULL, 0 }
2972 2972 };
2973 2973
2974 2974
2975 2975 static nsc_def_t _rdc_io_def[] = {
2976 - "Open", (uintptr_t)_rdc_openc, 0,
2977 - "Close", (uintptr_t)_rdc_close, 0,
2978 - "Attach", (uintptr_t)_rdc_attach, 0,
2979 - "Detach", (uintptr_t)_rdc_detach, 0,
2980 - "AllocHandle", (uintptr_t)_rdc_alloc_handle, 0,
2981 - "FreeHandle", (uintptr_t)_rdc_free_handle, 0,
2982 - "AllocBuf", (uintptr_t)_rdc_alloc_buf, 0,
2983 - "FreeBuf", (uintptr_t)_rdc_free_buf, 0,
2984 - "GetPinned", (uintptr_t)_rdc_get_pinned, 0,
2985 - "Discard", (uintptr_t)_rdc_discard_pinned, 0,
2986 - "PartSize", (uintptr_t)_rdc_partsize, 0,
2987 - "MaxFbas", (uintptr_t)_rdc_maxfbas, 0,
2988 - "Control", (uintptr_t)_rdc_control, 0,
2989 - "Read", (uintptr_t)_rdc_read, 0,
2990 - "Write", (uintptr_t)_rdc_write, 0,
2991 - "Zero", (uintptr_t)_rdc_zero, 0,
2992 - "Uncommit", (uintptr_t)_rdc_uncommit, 0,
2993 - "TrackSize", (uintptr_t)_rdc_trksize, 0,
2994 - "Provide", 0, 0,
2995 - 0, 0, 0
2976 + { "Open", (uintptr_t)_rdc_openc, 0 },
2977 + { "Close", (uintptr_t)_rdc_close, 0 },
2978 + { "Attach", (uintptr_t)_rdc_attach, 0 },
2979 + { "Detach", (uintptr_t)_rdc_detach, 0 },
2980 + { "AllocHandle", (uintptr_t)_rdc_alloc_handle, 0 },
2981 + { "FreeHandle", (uintptr_t)_rdc_free_handle, 0 },
2982 + { "AllocBuf", (uintptr_t)_rdc_alloc_buf, 0 },
2983 + { "FreeBuf", (uintptr_t)_rdc_free_buf, 0 },
2984 + { "GetPinned", (uintptr_t)_rdc_get_pinned, 0 },
2985 + { "Discard", (uintptr_t)_rdc_discard_pinned, 0 },
2986 + { "PartSize", (uintptr_t)_rdc_partsize, 0 },
2987 + { "MaxFbas", (uintptr_t)_rdc_maxfbas, 0 },
2988 + { "Control", (uintptr_t)_rdc_control, 0 },
2989 + { "Read", (uintptr_t)_rdc_read, 0 },
2990 + { "Write", (uintptr_t)_rdc_write, 0 },
2991 + { "Zero", (uintptr_t)_rdc_zero, 0 },
2992 + { "Uncommit", (uintptr_t)_rdc_uncommit, 0 },
2993 + { "TrackSize", (uintptr_t)_rdc_trksize, 0 },
2994 + { "Provide", (uintptr_t)NULL, 0 },
2995 + { NULL, (uintptr_t)NULL, 0 }
2996 2996 };
2997 2997
2998 2998 static nsc_def_t _rdc_ior_def[] = {
2999 - "Open", (uintptr_t)_rdc_openr, 0,
3000 - "Close", (uintptr_t)_rdc_close, 0,
3001 - "Attach", (uintptr_t)_rdc_attach, 0,
3002 - "Detach", (uintptr_t)_rdc_detach, 0,
3003 - "AllocHandle", (uintptr_t)_rdc_alloc_handle, 0,
3004 - "FreeHandle", (uintptr_t)_rdc_free_handle, 0,
3005 - "AllocBuf", (uintptr_t)_rdc_alloc_buf, 0,
3006 - "FreeBuf", (uintptr_t)_rdc_free_buf, 0,
3007 - "GetPinned", (uintptr_t)_rdc_get_pinned, 0,
3008 - "Discard", (uintptr_t)_rdc_discard_pinned, 0,
3009 - "PartSize", (uintptr_t)_rdc_partsize, 0,
3010 - "MaxFbas", (uintptr_t)_rdc_maxfbas, 0,
3011 - "Control", (uintptr_t)_rdc_control, 0,
3012 - "Read", (uintptr_t)_rdc_read, 0,
3013 - "Write", (uintptr_t)_rdc_write, 0,
3014 - "Zero", (uintptr_t)_rdc_zero, 0,
3015 - "Uncommit", (uintptr_t)_rdc_uncommit, 0,
3016 - "TrackSize", (uintptr_t)_rdc_trksize, 0,
3017 - "Provide", 0, 0,
3018 - 0, 0, 0
2999 + { "Open", (uintptr_t)_rdc_openr, 0 },
3000 + { "Close", (uintptr_t)_rdc_close, 0 },
3001 + { "Attach", (uintptr_t)_rdc_attach, 0 },
3002 + { "Detach", (uintptr_t)_rdc_detach, 0 },
3003 + { "AllocHandle", (uintptr_t)_rdc_alloc_handle, 0 },
3004 + { "FreeHandle", (uintptr_t)_rdc_free_handle, 0 },
3005 + { "AllocBuf", (uintptr_t)_rdc_alloc_buf, 0 },
3006 + { "FreeBuf", (uintptr_t)_rdc_free_buf, 0 },
3007 + { "GetPinned", (uintptr_t)_rdc_get_pinned, 0 },
3008 + { "Discard", (uintptr_t)_rdc_discard_pinned, 0 },
3009 + { "PartSize", (uintptr_t)_rdc_partsize, 0 },
3010 + { "MaxFbas", (uintptr_t)_rdc_maxfbas, 0 },
3011 + { "Control", (uintptr_t)_rdc_control, 0 },
3012 + { "Read", (uintptr_t)_rdc_read, 0 },
3013 + { "Write", (uintptr_t)_rdc_write, 0 },
3014 + { "Zero", (uintptr_t)_rdc_zero, 0 },
3015 + { "Uncommit", (uintptr_t)_rdc_uncommit, 0 },
3016 + { "TrackSize", (uintptr_t)_rdc_trksize, 0 },
3017 + { "Provide", (uintptr_t)NULL, 0 },
3018 + { NULL, (uintptr_t)NULL, 0 }
3019 3019 };
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX