1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  */
  25 
  26 #include <sys/types.h>
  27 #include <sys/time.h>
  28 #include <sys/ksynch.h>
  29 #include <sys/kmem.h>
  30 #include <sys/errno.h>
  31 #include <sys/cmn_err.h>
  32 #include <sys/debug.h>
  33 #include <sys/ddi.h>
  34 #include <sys/nsc_thread.h>
  35 #include <sys/sysmacros.h>
  36 #include <sys/unistat/spcs_s.h>
  37 #include <sys/unistat/spcs_errors.h>
  38 
  39 #include <sys/unistat/spcs_s_k.h>
  40 #include <sys/nsctl/nsctl.h>
  41 #include "dsw.h"
  42 #include "dsw_dev.h"
  43 #include "../rdc/rdc_update.h"
  44 #include <sys/nskernd.h>
  45 
  46 #include <sys/sdt.h>              /* dtrace is S10 or later */
  47 
  48 #ifdef DS_DDICT
  49 #include "../contract.h"
  50 #endif
  51 
  52 /*
  53  * Instant Image
  54  *
  55  * This file contains the core implementation of II.
  56  *
  57  * II is implemented as a simple filter module that pushes itself between
  58  * user (SV, STE, etc.) and SDBC or NET.
  59  *
  60  */
  61 
  62 
  63 #define REMOTE_VOL(s, ip)       (((s) && ((ip->bi_flags)&DSW_SHDEXPORT)) || \
  64                                     (!(s)&&((ip->bi_flags)&DSW_SHDIMPORT)))
  65 
  66 #define total_ref(ip)   ((ip->bi_shdref + ip->bi_shdrref + ip->bi_bmpref) + \
  67                             (NSHADOWS(ip) ? 0 : ip->bi_mstref + ip->bi_mstrref))
  68 
  69 
  70 #define II_TAIL_COPY(d, s, m, t)        bcopy(&(s.m), &(d.m), \
  71                                         sizeof (d) - (uintptr_t)&((t *)0)->m)
  72 extern dev_info_t *ii_dip;
  73 
  74 #define II_LINK_CLUSTER(ip, cluster) \
  75         _ii_ll_add(ip, &_ii_cluster_mutex, &_ii_cluster_top, cluster, \
  76             &ip->bi_cluster)
  77 #define II_UNLINK_CLUSTER(ip) \
  78         _ii_ll_remove(ip, &_ii_cluster_mutex, &_ii_cluster_top, &ip->bi_cluster)
  79 
  80 #define II_LINK_GROUP(ip, group) \
  81         _ii_ll_add(ip, &_ii_group_mutex, &_ii_group_top, group, &ip->bi_group)
  82 #define II_UNLINK_GROUP(ip) \
  83         _ii_ll_remove(ip, &_ii_group_mutex, &_ii_group_top, &ip->bi_group)
  84 
  85 _ii_info_t *_ii_info_top;
  86 _ii_info_t *_ii_mst_top = 0;
  87 _ii_overflow_t  *_ii_overflow_top;
  88 _ii_lsthead_t *_ii_cluster_top;
  89 _ii_lsthead_t *_ii_group_top;
  90 
  91 int     ii_debug;               /* level of cmn_err noise */
  92 int     ii_bitmap;              /* bitmap operations switch */
  93 uint_t  ii_header = 16;         /* Undocumented tunable (with adb!), start */
  94                                 /* of area cleared in volume when a dependent */
  95                                 /* shadow is disabled. */
  96                                 /* max # of chunks in copy loop before delay */
  97 int     ii_throttle_unit = MIN_THROTTLE_UNIT;
  98                                 /* length of delay during update loop */
  99 int     ii_throttle_delay = MIN_THROTTLE_DELAY;
 100 int     ii_copy_direct = 1;
 101 int     ii_nconcopy = 10;       /* default value when starting with no cache */
 102 kmutex_t _ii_cluster_mutex;
 103 kmutex_t _ii_group_mutex;
 104 
 105 static int _ii_shutting_down = 0;
 106 static nsc_io_t *_ii_io, *_ii_ior;
 107 static nsc_mem_t *_ii_local_mem;
 108 static nsc_def_t _ii_fd_def[], _ii_io_def[], _ii_ior_def[];
 109 static kmutex_t _ii_info_mutex;
 110 static kmutex_t _ii_overflow_mutex;
 111 static kmutex_t _ii_config_mutex;
 112 static _ii_bmp_ops_t alloc_buf_bmp, kmem_buf_bmp;
 113 static nsc_svc_t *ii_volume_update;     /* IIVolumeUpdate token */
 114 static nsc_svc_t *ii_report_luns;       /* IIReportLuns token */
 115 static nsc_svc_t *ii_get_initiators;    /* IIGetInitiators token */
 116 static ksema_t  _ii_concopy_sema;
 117 static int      _ii_concopy_init = 0;
 118 static int      _ii_instance = 0;
 119 
 120 void _ii_deinit_dev();
 121 
 122 static void _ii_info_free(_ii_info_t *ip);
 123 static void _ii_info_freeshd(_ii_info_t *ip);
 124 static void ii_sibling_free(_ii_info_t *ip);
 125 ii_header_t *_ii_bm_header_get(_ii_info_t *ip, nsc_buf_t **tmp);
 126 int _ii_bm_header_put(ii_header_t *hdr, _ii_info_t *ip,
 127     nsc_buf_t *tmp);
 128 static void _ii_bm_header_free(ii_header_t *hdr, _ii_info_t *ip,
 129     nsc_buf_t *tmp);
 130 static int _ii_copyvol(_ii_info_t *, int, int, spcs_s_info_t, int);
 131 static void _ii_stopvol(_ii_info_t *ip);
 132 static int _ii_stopcopy(_ii_info_t *ip);
 133 static _ii_info_t *_ii_find_set(char *volume);
 134 static _ii_info_t *_ii_find_vol(char *, int);
 135 static _ii_overflow_t *_ii_find_overflow(char *volume);
 136 static void _ii_ioctl_done(_ii_info_t *ip);
 137 static void _ii_lock_chunk(_ii_info_t *ip, chunkid_t);
 138 static void _ii_unlock_chunks(_ii_info_t *ip, chunkid_t, int);
 139 void _ii_error(_ii_info_t *ip, int error_type);
 140 static nsc_buf_t *_ii_alloc_handle(void (*d_cb)(), void (*r_cb)(),
 141     void (*w_cb)(), ii_fd_t *bfd);
 142 static int _ii_free_handle(ii_buf_t *h, ii_fd_t *bfd);
 143 extern nsc_size_t ii_btsize(nsc_size_t);
 144 extern int ii_tinit(_ii_info_t *);
 145 extern chunkid_t ii_tsearch(_ii_info_t *, chunkid_t);
 146 extern void ii_tdelete(_ii_info_t *, chunkid_t);
 147 extern void ii_reclaim_overflow(_ii_info_t *);
 148 static void ii_overflow_free(_ii_info_t *ip, int disable);
 149 static int ii_overflow_attach(_ii_info_t *, char *, int);
 150 int _ii_nsc_io(_ii_info_t *, int, nsc_fd_t *, int, nsc_off_t, unsigned char *,
 151         nsc_size_t);
 152 static nsc_path_t *_ii_register_path(char *path, int type, nsc_io_t *io);
 153 static int _ii_unregister_path(nsc_path_t *sp, int flag, char *type);
 154 static int _ii_reserve_begin(_ii_info_t *ip);
 155 static int _ii_wait_for_it(_ii_info_t *ip);
 156 static void _ii_reserve_end(_ii_info_t *ip);
 157 static kstat_t *_ii_overflow_kstat_create(_ii_info_t *ip, _ii_overflow_t *op);
 158 static int _ii_ll_add(_ii_info_t *, kmutex_t *, _ii_lsthead_t **, char *,
 159     char **);
 160 static int _ii_ll_remove(_ii_info_t *, kmutex_t *, _ii_lsthead_t **, char **);
 161 #define _ii_unlock_chunk(ip, chunk)     _ii_unlock_chunks(ip, chunk, 1)
 162 extern const int dsw_major_rev;
 163 extern const int dsw_minor_rev;
 164 extern const int dsw_micro_rev;
 165 extern const int dsw_baseline_rev;
 166 
 167 /*
 168  * These constants are used by ii_overflow_free() to indicate how the
 169  * reclamation should take place.
 170  *      NO_RECLAIM: just detach the overflow from the set; do not
 171  *              attempt to reclaim chunks, do not decrement the
 172  *              used-by count
 173  *      RECLAIM: reclaim all chunks before decrementing the used-by count
 174  *      INIT_OVR: decrement the used-by count only; do not reclaim chunks
 175  */
 176 
 177 #define NO_RECLAIM 0
 178 #define RECLAIM 1
 179 #define INIT_OVR 2
 180 
 181 struct  copy_args {                     /* arguments passed to copy process */
 182         _ii_info_t *ip;
 183         int flag;
 184         int rtype;
 185         int wait;
 186         spcs_s_info_t kstatus;
 187         int rc;
 188 };
 189 
 190 /* set-specific kstats info */
 191 ii_kstat_set_t ii_kstat_set = {
 192         { DSW_SKSTAT_SIZE, KSTAT_DATA_ULONG },
 193         { DSW_SKSTAT_MTIME, KSTAT_DATA_ULONG },
 194         { DSW_SKSTAT_FLAGS, KSTAT_DATA_ULONG },
 195         { DSW_SKSTAT_THROTTLE_UNIT, KSTAT_DATA_ULONG },
 196         { DSW_SKSTAT_THROTTLE_DELAY, KSTAT_DATA_ULONG },
 197         { DSW_SKSTAT_SHDCHKS, KSTAT_DATA_ULONG },
 198         { DSW_SKSTAT_SHDCHKUSED, KSTAT_DATA_ULONG },
 199         { DSW_SKSTAT_SHDBITS, KSTAT_DATA_ULONG },
 200         { DSW_SKSTAT_COPYBITS, KSTAT_DATA_ULONG },
 201         { DSW_SKSTAT_MSTA, KSTAT_DATA_CHAR },
 202         { DSW_SKSTAT_MSTB, KSTAT_DATA_CHAR },
 203         { DSW_SKSTAT_MSTC, KSTAT_DATA_CHAR },
 204         { DSW_SKSTAT_MSTD, KSTAT_DATA_CHAR },
 205         { DSW_SKSTAT_SETA, KSTAT_DATA_CHAR },
 206         { DSW_SKSTAT_SETB, KSTAT_DATA_CHAR },
 207         { DSW_SKSTAT_SETC, KSTAT_DATA_CHAR },
 208         { DSW_SKSTAT_SETD, KSTAT_DATA_CHAR },
 209         { DSW_SKSTAT_BMPA, KSTAT_DATA_CHAR },
 210         { DSW_SKSTAT_BMPB, KSTAT_DATA_CHAR },
 211         { DSW_SKSTAT_BMPC, KSTAT_DATA_CHAR },
 212         { DSW_SKSTAT_BMPD, KSTAT_DATA_CHAR },
 213         { DSW_SKSTAT_OVRA, KSTAT_DATA_CHAR },
 214         { DSW_SKSTAT_OVRB, KSTAT_DATA_CHAR },
 215         { DSW_SKSTAT_OVRC, KSTAT_DATA_CHAR },
 216         { DSW_SKSTAT_OVRD, KSTAT_DATA_CHAR },
 217         { DSW_SKSTAT_MSTIO, KSTAT_DATA_CHAR },
 218         { DSW_SKSTAT_SHDIO, KSTAT_DATA_CHAR },
 219         { DSW_SKSTAT_BMPIO, KSTAT_DATA_CHAR },
 220         { DSW_SKSTAT_OVRIO, KSTAT_DATA_CHAR },
 221 };
 222 
 223 /*
 224  * _ii_init_dev
 225  *      Initialise the shadow driver
 226  *
 227  */
 228 
 229 int
 230 _ii_init_dev()
 231 {
 232         _ii_io = nsc_register_io("ii", NSC_II_ID|NSC_REFCNT|NSC_FILTER,
 233             _ii_io_def);
 234         if (_ii_io == NULL)
 235                 cmn_err(CE_WARN, "!ii: nsc_register_io failed.");
 236 
 237         _ii_ior = nsc_register_io("ii-raw", NSC_IIR_ID|NSC_REFCNT|NSC_FILTER,
 238             _ii_ior_def);
 239         if (_ii_ior == NULL)
 240                 cmn_err(CE_WARN, "!ii: nsc_register_io r failed.");
 241 
 242         _ii_local_mem = nsc_register_mem("ii:kmem", NSC_MEM_LOCAL, 0);
 243         if (_ii_local_mem == NULL)
 244                 cmn_err(CE_WARN, "!ii: nsc_register_mem failed.");
 245 
 246 
 247         if (!_ii_io || !_ii_ior || !_ii_local_mem) {
 248                 _ii_deinit_dev();
 249                 return (ENOMEM);
 250         }
 251 
 252         mutex_init(&_ii_info_mutex, NULL, MUTEX_DRIVER, NULL);
 253         mutex_init(&_ii_overflow_mutex, NULL, MUTEX_DRIVER, NULL);
 254         mutex_init(&_ii_config_mutex, NULL, MUTEX_DRIVER, NULL);
 255         mutex_init(&_ii_cluster_mutex, NULL, MUTEX_DRIVER, NULL);
 256         mutex_init(&_ii_group_mutex, NULL, MUTEX_DRIVER, NULL);
 257 
 258         ii_volume_update = nsc_register_svc("RDCVolumeUpdated", 0);
 259         ii_report_luns = nsc_register_svc("IIReportLuns", 0);
 260         ii_get_initiators = nsc_register_svc("IIGetInitiators", 0);
 261 
 262         if (!ii_volume_update || !ii_report_luns || !ii_get_initiators) {
 263                 _ii_deinit_dev();
 264                 return (ENOMEM);
 265         }
 266 
 267         return (0);
 268 }
 269 
 270 
 271 /*
 272  * _ii_deinit_dev
 273  *      De-initialise the shadow driver
 274  *
 275  */
 276 
 277 void
 278 _ii_deinit_dev()
 279 {
 280 
 281         if (_ii_io)
 282                 (void) nsc_unregister_io(_ii_io, 0);
 283 
 284         if (_ii_ior)
 285                 (void) nsc_unregister_io(_ii_ior, 0);
 286 
 287         if (_ii_local_mem)
 288                 (void) nsc_unregister_mem(_ii_local_mem);
 289 
 290         if (ii_volume_update)
 291                 (void) nsc_unregister_svc(ii_volume_update);
 292 
 293         if (ii_report_luns)
 294                 (void) nsc_unregister_svc(ii_report_luns);
 295 
 296         if (ii_get_initiators)
 297                 (void) nsc_unregister_svc(ii_get_initiators);
 298 
 299         mutex_destroy(&_ii_info_mutex);
 300         mutex_destroy(&_ii_overflow_mutex);
 301         mutex_destroy(&_ii_config_mutex);
 302         mutex_destroy(&_ii_cluster_mutex);
 303         mutex_destroy(&_ii_group_mutex);
 304         if (_ii_concopy_init)
 305                 sema_destroy(&_ii_concopy_sema);
 306         _ii_concopy_init = 0;
 307 
 308 }
 309 
 310 static char *
 311 ii_pathname(nsc_fd_t *fd)
 312 {
 313         char *rc;
 314 
 315         if (fd == NULL || (rc = nsc_pathname(fd)) == NULL)
 316                 return ("");
 317         else
 318                 return (rc);
 319 }
 320 
 321 
 322 /*
 323  * _ii_rlse_d
 324  *      Internal mechanics of _ii_rlse_devs().  Takes care of
 325  *      resetting the ownership information as required.
 326  */
 327 
 328 static void
 329 _ii_rlse_d(ip, mst, raw)
 330 _ii_info_t *ip;
 331 int mst, raw;
 332 {
 333         _ii_info_dev_t *cip;
 334         _ii_info_dev_t *rip;
 335 
 336         rip = mst ? (ip->bi_mstrdev) : &(ip->bi_shdrdev);
 337         cip = mst ? (ip->bi_mstdev) : &(ip->bi_shddev);
 338 
 339         DTRACE_PROBE2(_ii_rlse_d_type,
 340                         _ii_info_dev_t *, rip,
 341                         _ii_info_dev_t *, cip);
 342 
 343 
 344         if (RSRV(cip)) {
 345                 if (raw) {
 346                         ASSERT(cip->bi_orsrv > 0);
 347                         cip->bi_orsrv--;
 348                 } else {
 349                         ASSERT(cip->bi_rsrv > 0);
 350                         cip->bi_rsrv--;
 351                 }
 352 
 353                 if (cip->bi_rsrv > 0) {
 354                         nsc_set_owner(cip->bi_fd, cip->bi_iodev);
 355                 } else if (cip->bi_orsrv > 0) {
 356                         nsc_set_owner(cip->bi_fd, rip->bi_iodev);
 357                 } else {
 358                         nsc_set_owner(cip->bi_fd, NULL);
 359                 }
 360 
 361                 if (!RSRV(cip)) {
 362                         nsc_release(cip->bi_fd);
 363                 }
 364         } else {
 365                 if (raw) {
 366                         ASSERT(rip->bi_rsrv > 0);
 367                         rip->bi_rsrv--;
 368                 } else {
 369                         ASSERT(rip->bi_orsrv > 0);
 370                         rip->bi_orsrv--;
 371                 }
 372 
 373                 if (rip->bi_rsrv > 0) {
 374                         nsc_set_owner(rip->bi_fd, rip->bi_iodev);
 375                 } else if (rip->bi_orsrv > 0) {
 376                         nsc_set_owner(rip->bi_fd, cip->bi_iodev);
 377                 } else {
 378                         nsc_set_owner(rip->bi_fd, NULL);
 379                 }
 380 
 381                 if (!RSRV(rip)) {
 382                         rip->bi_flag = 0;
 383                         nsc_release(rip->bi_fd);
 384                         cv_broadcast(&ip->bi_releasecv);
 385                 }
 386         }
 387 
 388 }
 389 
 390 
 391 /*
 392  * _ii_rlse_devs
 393  *      Release named underlying devices.
 394  *
 395  *      NOTE: the 'devs' argument must be the same as that passed to
 396  *      the preceding _ii_rsrv_devs call.
 397  */
 398 
 399 void
 400 _ii_rlse_devs(ip, devs)
 401 _ii_info_t *ip;
 402 int devs;
 403 {
 404 
 405         ASSERT(!(devs & (MST|SHD)));
 406 
 407         ASSERT(ip->bi_head != (_ii_info_t *)0xdeadbeef);
 408         if (!ip) {
 409                 cmn_err(CE_WARN, "!ii: _ii_rlse_devs null ip");
 410                 return;
 411         }
 412 
 413         mutex_enter(&ip->bi_rsrvmutex);
 414 
 415         DTRACE_PROBE(_ii_rlse_devs_mutex);
 416 
 417         if ((devs&(MST|MSTR)) != 0 && (ip->bi_flags&DSW_SHDIMPORT) == 0) {
 418                 if (NSHADOWS(ip) && ip != ip->bi_master)
 419                         _ii_rlse_devs(ip->bi_master, devs&(MST|MSTR));
 420                 else
 421                         _ii_rlse_d(ip, 1, (devs&MSTR));
 422         }
 423 
 424         if ((devs&(SHD|SHDR)) != 0 && (ip->bi_flags&DSW_SHDEXPORT) == 0) {
 425                 _ii_rlse_d(ip, 0, (devs&SHDR));
 426         }
 427 
 428         if ((devs&BMP) != 0 && ip->bi_bmpfd) {
 429                 if (--(ip->bi_bmprsrv) == 0)
 430                         nsc_release(ip->bi_bmpfd);
 431         }
 432 
 433         ASSERT(ip->bi_bmprsrv >= 0);
 434         ASSERT(ip->bi_shdrsrv >= 0);
 435         ASSERT(ip->bi_shdrrsrv >= 0);
 436         mutex_exit(&ip->bi_rsrvmutex);
 437 
 438 }
 439 
 440 
 441 /*
 442  * _ii_rsrv_d
 443  *      Reserve device flagged, unless its companion is already reserved,
 444  *      in that case increase the reserve on the companion.
 445  */
 446 
 447 static int
 448 _ii_rsrv_d(int raw, _ii_info_dev_t *rid, _ii_info_dev_t *cid, int flag,
 449     _ii_info_t *ip)
 450 {
 451         _ii_info_dev_t *p = NULL;
 452         int other = 0;
 453         int rc;
 454 
 455         /*
 456          * If user wants to do a cache reserve and it's already
 457          * raw reserved, we need to do a real nsc_reserve, so wait
 458          * until the release has been done.
 459          */
 460         if (RSRV(rid) && (flag == II_EXTERNAL) &&
 461             (raw == 0) && (rid->bi_flag != II_EXTERNAL)) {
 462                 ip->bi_release++;
 463                 while (RSRV(rid)) {
 464                         DTRACE_PROBE1(_ii_rsrv_d_wait, _ii_info_dev_t *, rid);
 465                         cv_wait(&ip->bi_releasecv, &ip->bi_rsrvmutex);
 466                         DTRACE_PROBE1(_ii_rsrv_d_resume, _ii_info_dev_t *, rid);
 467                 }
 468                 ip->bi_release--;
 469         }
 470 
 471         if (RSRV(rid)) {
 472                 p = rid;
 473                 if (!raw) {
 474                         other = 1;
 475                 }
 476         } else if (RSRV(cid)) {
 477                 p = cid;
 478                 if (raw) {
 479                         other = 1;
 480                 }
 481         }
 482 
 483         if (p) {
 484                 if (other) {
 485                         p->bi_orsrv++;
 486                 } else {
 487                         p->bi_rsrv++;
 488                 }
 489 
 490                 if (p->bi_iodev) {
 491                         nsc_set_owner(p->bi_fd, p->bi_iodev);
 492                 }
 493 
 494                 return (0);
 495         }
 496         p = raw ? rid : cid;
 497 
 498         if ((rc = nsc_reserve(p->bi_fd, 0)) == 0) {
 499                 if (p->bi_iodev) {
 500                         nsc_set_owner(p->bi_fd, p->bi_iodev);
 501                 }
 502                 p->bi_rsrv++;
 503                 if (raw)
 504                         p->bi_flag = flag;
 505         }
 506 
 507         return (rc);
 508 }
 509 
 510 /*
 511  * _ii_rsrv_devs
 512  *      Reserve named underlying devices.
 513  *
 514  */
 515 
 516 int
 517 _ii_rsrv_devs(_ii_info_t *ip, int devs, int flag)
 518 {
 519         int rc = 0;
 520         int got = 0;
 521 
 522         ASSERT(!(devs & (MST|SHD)));
 523 
 524         if (!ip) {
 525                 cmn_err(CE_WARN, "!ii: _ii_rsrv_devs null ip");
 526                 return (EINVAL);
 527         }
 528 
 529         mutex_enter(&ip->bi_rsrvmutex);
 530 
 531         DTRACE_PROBE(_ii_rsrv_devs_mutex);
 532 
 533         if (rc == 0 && (devs&(MST|MSTR)) != 0 &&
 534             (ip->bi_flags&DSW_SHDIMPORT) == 0) {
 535                 DTRACE_PROBE(_ii_rsrv_devs_master);
 536                 if (NSHADOWS(ip) && ip != ip->bi_master) {
 537                         if ((rc = _ii_rsrv_devs(ip->bi_master, devs&(MST|MSTR),
 538                             flag)) != 0) {
 539                                 cmn_err(CE_WARN,
 540                                     "!ii: nsc_reserve multi-master failed");
 541                         } else {
 542                                 got |= devs&(MST|MSTR);
 543                         }
 544                 } else {
 545                         if ((rc = _ii_rsrv_d((devs&MSTR) != 0, ip->bi_mstrdev,
 546                             ip->bi_mstdev, flag, ip)) != 0) {
 547                                 cmn_err(CE_WARN,
 548                                     "!ii: nsc_reserve master failed %d", rc);
 549                         } else {
 550                                 got |= (devs&(MST|MSTR));
 551                         }
 552                 }
 553         }
 554 
 555         if (rc == 0 && (devs&(SHD|SHDR)) != 0 &&
 556             (ip->bi_flags&DSW_SHDEXPORT) == 0) {
 557                 DTRACE_PROBE(_ii_rsrv_devs_shadow);
 558                 if ((rc = _ii_rsrv_d((devs&SHDR) != 0, &ip->bi_shdrdev,
 559                     &ip->bi_shddev, flag, ip)) != 0) {
 560                         cmn_err(CE_WARN,
 561                             "!ii: nsc_reserve shadow failed %d", rc);
 562                 } else {
 563                         got |= (devs&(SHD|SHDR));
 564                 }
 565         }
 566 
 567         if (rc == 0 && (devs&BMP) != 0 && ip->bi_bmpfd) {
 568                 DTRACE_PROBE(_ii_rsrv_devs_bitmap);
 569                 if ((ip->bi_bmprsrv == 0) &&
 570                     (rc = nsc_reserve(ip->bi_bmpfd, 0)) != 0) {
 571                         cmn_err(CE_WARN,
 572                             "!ii: nsc_reserve bitmap failed %d", rc);
 573                 } else {
 574                         (ip->bi_bmprsrv)++;
 575                         got |= BMP;
 576                 }
 577         }
 578         mutex_exit(&ip->bi_rsrvmutex);
 579         if (rc != 0 && got != 0)
 580                 _ii_rlse_devs(ip, got);
 581 
 582         return (rc);
 583 }
 584 
 585 static int
 586 _ii_reserve_begin(_ii_info_t *ip)
 587 {
 588         int rc;
 589 
 590         mutex_enter(&ip->bi_rlsemutex);
 591         if ((rc = _ii_wait_for_it(ip)) == 0) {
 592                 ++ip->bi_rsrvcnt;
 593         }
 594         mutex_exit(&ip->bi_rlsemutex);
 595 
 596         return (rc);
 597 }
 598 
 599 static int
 600 _ii_wait_for_it(_ii_info_t *ip)
 601 {
 602         int nosig;
 603 
 604         nosig = 1;
 605         while (ip->bi_rsrvcnt > 0) {
 606                 nosig = cv_wait_sig(&ip->bi_reservecv, &ip->bi_rlsemutex);
 607                 if (!nosig) {
 608                         break;
 609                 }
 610         }
 611 
 612         return (nosig? 0 : EINTR);
 613 }
 614 
 615 static void
 616 _ii_reserve_end(_ii_info_t *ip)
 617 {
 618         mutex_enter(&ip->bi_rlsemutex);
 619         if (ip->bi_rsrvcnt <= 0) {
 620                 mutex_exit(&ip->bi_rlsemutex);
 621                 return;
 622         }
 623         --ip->bi_rsrvcnt;
 624         mutex_exit(&ip->bi_rlsemutex);
 625         cv_broadcast(&ip->bi_reservecv);
 626 
 627 }
 628 
 629 static int
 630 ii_fill_copy_bmp(_ii_info_t *ip)
 631 {
 632         int rc;
 633         chunkid_t max_chunk, chunk_num;
 634 
 635         if ((rc = II_FILL_COPY_BMP(ip)) != 0)
 636                 return (rc);
 637         /*
 638          * make certain that the last bits of the last byte of the bitmap
 639          * aren't filled as they may be copied out to the user.
 640          */
 641 
 642         chunk_num = ip->bi_size / DSW_SIZE;
 643         if ((ip->bi_size % DSW_SIZE) != 0)
 644                 ++chunk_num;
 645 
 646         max_chunk = chunk_num;
 647         if ((max_chunk & 0x7) != 0)
 648                 max_chunk = (max_chunk + 7) & ~7;
 649 
 650         DTRACE_PROBE2(_ii_fill_copy_bmp_chunks, chunkid_t, chunk_num,
 651             chunkid_t, max_chunk);
 652 
 653         for (; chunk_num < max_chunk; chunk_num++) {
 654                 (void) II_CLR_COPY_BIT(ip, chunk_num);
 655         }
 656 
 657         return (0);
 658 }
 659 
 660 static int
 661 ii_update_denied(_ii_info_t *ip, spcs_s_info_t kstatus,
 662                                 int direction, int all)
 663 {
 664         rdc_update_t update;
 665         int size;
 666         unsigned char *bmp;
 667 
 668         update.volume = direction == CV_SHD2MST ? ii_pathname(MSTFD(ip)) :
 669             ip->bi_keyname;
 670         update.denied = 0;
 671         update.protocol = RDC_SVC_ONRETURN;
 672         update.size = size = FBA_SIZE(DSW_BM_FBA_LEN(ip->bi_size));
 673         update.status = kstatus;
 674         update.bitmap = bmp = kmem_alloc(update.size, KM_SLEEP);
 675         if (bmp == NULL) {
 676                 spcs_s_add(kstatus, ENOMEM);
 677                 return (1);
 678         }
 679 
 680         DTRACE_PROBE2(_ii_update_denied, int, all, int, size);
 681 
 682         if (all) {
 683                 while (size-- > 0)
 684                         *bmp++ = (unsigned char)0xff;
 685         } else {
 686                 if (II_CHANGE_BMP(ip, update.bitmap) != 0) {
 687                         /* failed to read bitmap */
 688                         spcs_s_add(kstatus, EIO);
 689                         update.denied = 1;
 690                 }
 691         }
 692 
 693         /* check that no user of volume objects */
 694         if (update.denied == 0) {
 695                 (void) nsc_call_svc(ii_volume_update, (intptr_t)&update);
 696         }
 697         kmem_free(update.bitmap, FBA_SIZE(DSW_BM_FBA_LEN(ip->bi_size)));
 698 
 699         return (update.denied);
 700 }
 701 
 702 static int
 703 ii_need_same_size(_ii_info_t *ip)
 704 {
 705         rdc_update_t update;
 706 
 707         update.volume = ip->bi_keyname;
 708         update.denied = 0;
 709         update.protocol = RDC_SVC_VOL_ENABLED;
 710 
 711         (void) nsc_call_svc(ii_volume_update, (intptr_t)&update);
 712 
 713         return (update.denied);
 714 }
 715 
 716 /*
 717  * ii_volume:   check if vol is already known to Instant Image and return
 718  *      volume type if it is.
 719  */
 720 
 721 static int
 722 ii_volume(char *vol, int locked)
 723 {
 724         _ii_info_t *ip;
 725         _ii_overflow_t  *op;
 726         int rc = NONE;
 727 
 728         /* scan overflow volume list */
 729         mutex_enter(&_ii_overflow_mutex);
 730 
 731         DTRACE_PROBE(_ii_volume_mutex);
 732 
 733         for (op = _ii_overflow_top; op; op = op->ii_next) {
 734                 if (strcmp(vol, op->ii_volname) == 0)
 735                         break;
 736         }
 737         mutex_exit(&_ii_overflow_mutex);
 738         if (op) {
 739                 return (OVR);
 740         }
 741 
 742         if (!locked) {
 743                 mutex_enter(&_ii_info_mutex);
 744         }
 745 
 746         DTRACE_PROBE(_ii_volume_mutex2);
 747 
 748         for (ip = _ii_info_top; ip; ip = ip->bi_next) {
 749                 if (strcmp(vol, ii_pathname(ip->bi_mstfd)) == 0) {
 750                         rc = MST;
 751                         break;
 752                 }
 753                 if (strcmp(vol, ip->bi_keyname)  == 0) {
 754                         rc = SHD;
 755                         break;
 756                 }
 757                 if (strcmp(vol, ii_pathname(ip->bi_bmpfd)) == 0) {
 758                         rc = BMP;
 759                         break;
 760                 }
 761         }
 762         DTRACE_PROBE1(_ii_volume_data, int, rc);
 763 
 764         if (!locked) {
 765                 mutex_exit(&_ii_info_mutex);
 766         }
 767 
 768         return (rc);
 769 }
 770 
 771 /*
 772  * ii_open_shadow: open shadow volume for both cached and raw access,
 773  *      if the normal device open fails attempt a file open to allow
 774  *      shadowing into a file.
 775  */
 776 
 777 static int
 778 ii_open_shadow(_ii_info_t *ip, char *shadow_vol)
 779 {
 780         int rc = 0;
 781         int file_rc = 0;
 782 
 783         ip->bi_shdfd = nsc_open(shadow_vol,
 784             NSC_IIR_ID|NSC_DEVICE|NSC_RDWR, _ii_fd_def,
 785             (blind_t)&(ip->bi_shddev), &rc);
 786         if (!ip->bi_shdfd) {
 787                 ip->bi_shdfd = nsc_open(shadow_vol,
 788                     NSC_IIR_ID|NSC_FILE|NSC_RDWR, _ii_fd_def,
 789                     (blind_t)&(ip->bi_shddev), &file_rc);
 790                 file_rc = 1;
 791                 if (!ip->bi_shdfd) {
 792                         return (rc);
 793                 }
 794                 DTRACE_PROBE(_ii_open_shadow);
 795         }
 796         else
 797                 DTRACE_PROBE(_ii_open_shadow);
 798 
 799         if (file_rc == 0) {
 800                 ip->bi_shdrfd = nsc_open(shadow_vol,
 801                     NSC_IIR_ID|NSC_DEVICE|NSC_RDWR, _ii_fd_def,
 802                     (blind_t)&(ip->bi_shdrdev), &rc);
 803                 DTRACE_PROBE(_ii_open_shadow);
 804         } else {
 805                 ip->bi_shdrfd = nsc_open(shadow_vol,
 806                     NSC_IIR_ID|NSC_FILE|NSC_RDWR, _ii_fd_def,
 807                     (blind_t)&(ip->bi_shdrdev), &rc);
 808                 DTRACE_PROBE(_ii_open_shadow);
 809         }
 810 
 811         if (!ip->bi_shdrfd) {
 812                 (void) nsc_close(ip->bi_shdfd);
 813                 DTRACE_PROBE(_ii_open_shadow);
 814                 return (rc);
 815         }
 816 
 817         return (0);
 818 }
 819 
 820 static void
 821 ii_register_shd(_ii_info_t *ip)
 822 {
 823         ip->bi_shd_tok = _ii_register_path(ip->bi_keyname,
 824             NSC_CACHE, _ii_io);
 825         ip->bi_shdr_tok = _ii_register_path(ip->bi_keyname,
 826             NSC_DEVICE, _ii_ior);
 827 
 828 }
 829 
 830 static void
 831 ii_register_mst(_ii_info_t *ip)
 832 {
 833         ip->bi_mst_tok = _ii_register_path(ii_pathname(ip->bi_mstfd),
 834             NSC_CACHE, _ii_io);
 835         ip->bi_mstr_tok = _ii_register_path(ii_pathname(ip->bi_mstrfd),
 836             NSC_DEVICE, _ii_ior);
 837 
 838 }
 839 
 840 static int
 841 ii_register_ok(_ii_info_t *ip)
 842 {
 843         int rc;
 844         int sibling;
 845         int exported;
 846 
 847         rc = 1;
 848         sibling = NSHADOWS(ip) && ip != ip->bi_head;
 849         exported = ip->bi_flags & DSW_SHDEXPORT;
 850 
 851         if ((ip->bi_bmpfd && !ip->bi_bmp_tok) || (!exported && (
 852             !ip->bi_shd_tok || !ip->bi_shdr_tok)))
 853                 rc = 0;
 854         else if (!sibling && (!ip->bi_mst_tok || !ip->bi_mstr_tok))
 855                 rc = 0;
 856 
 857         return (rc);
 858 }
 859 
 860 #ifndef DISABLE_KSTATS
 861 
 862 /*
 863  * _ii_kstat_create
 864  *      Create and install kstat_io data
 865  *
 866  * Calling/Exit State:
 867  *      Returns 0 if kstats couldn't be created, otherwise it returns
 868  *      a pointer to the created kstat_t.
 869  */
 870 
 871 static kstat_t *
 872 _ii_kstat_create(_ii_info_t *ip, char *type)
 873 {
 874         kstat_t *result;
 875         char name[ IOSTAT_NAME_LEN ];
 876         int setnum;
 877         char *nptr;
 878         static int mstnum = 0;
 879         static int shdbmpnum = -1;
 880 
 881         switch (*type) {
 882         case 'm':
 883                 setnum = mstnum++;
 884                 nptr = ip->bi_kstat_io.mstio;
 885                 break;
 886         case 's':
 887                 /* assumption: shadow kstats created before bitmap */
 888                 setnum = ++shdbmpnum;
 889                 nptr = ip->bi_kstat_io.shdio;
 890                 break;
 891         case 'b':
 892                 setnum = shdbmpnum;
 893                 nptr = ip->bi_kstat_io.bmpio;
 894                 break;
 895         default:
 896                 cmn_err(CE_WARN, "!Unable to determine kstat type (%c)", *type);
 897                 setnum = -1;
 898                 break;
 899         }
 900         /*
 901          * The name of the kstat, defined below, is designed to work
 902          * with the 'iostat -x' command.  This command leaves only
 903          * 9 characters for the name, and the kstats built in to Solaris
 904          * all seem to be of the form <service><number>.  For that
 905          * reason, we have chosen ii<type><number>, where <type> is
 906          * m, s, b, or o (for master, shadow, bitmap, and overflow
 907          * respectively), and the number is monotonically increasing from
 908          * 0 for each time one of those <type>s are created.  Note that
 909          * the shadow and bitmap are always created in pairs and so, for
 910          * any given set, they will have the same <number>.
 911          */
 912         (void) sprintf(name, "ii%c%d", *type, setnum);
 913         (void) strncpy(nptr, name, IOSTAT_NAME_LEN);
 914         result = kstat_create("ii", 0, name, "disk", KSTAT_TYPE_IO, 1, 0);
 915         if (result) {
 916                 result->ks_private = ip;
 917                 result->ks_lock = &ip->bi_kstat_io.statmutex;
 918                 kstat_install(result);
 919         } else {
 920                 cmn_err(CE_WARN, "!Unable to create %s kstats for set %s", type,
 921                     ip->bi_keyname);
 922         }
 923 
 924         return (result);
 925 }
 926 
 927 /*
 928  * _ii_overflow_kstat_create
 929  *      Create and install kstat_io data for an overflow volume
 930  *
 931  * Calling/Exit State:
 932  *      Returns 0 if kstats couldn't be created, otherwise it returns
 933  *      a pointer to the created kstat_t.
 934  *
 935  * See comments in _ii_kstat_create for additional information.
 936  *
 937  */
 938 static kstat_t *
 939 _ii_overflow_kstat_create(_ii_info_t *ip, _ii_overflow_t *op)
 940 {
 941         kstat_t *result;
 942         char *nptr;
 943         char name [IOSTAT_NAME_LEN];
 944         static int ovrnum = 0;
 945         int setnum = ovrnum++;
 946 
 947         nptr = ip->bi_kstat_io.ovrio;
 948 
 949         (void) sprintf(name, "iio%d", setnum);
 950         (void) strncpy(nptr, name, IOSTAT_NAME_LEN);
 951 
 952         mutex_init(&op->ii_kstat_mutex, NULL, MUTEX_DRIVER, NULL);
 953 
 954         if ((result =
 955             kstat_create("ii", 0, name, "disk", KSTAT_TYPE_IO, 1, 0))) {
 956                 result->ks_private = ip;
 957                 result->ks_lock = &op->ii_kstat_mutex;
 958                 kstat_install(result);
 959         } else {
 960                 mutex_destroy(&op->ii_kstat_mutex);
 961                 cmn_err(CE_WARN, "!Unabled to create overflow kstat for set "
 962                     "%s", ip->bi_keyname);
 963         }
 964 
 965         return (result);
 966 }
 967 
 968 #endif
 969 
 970 static void
 971 ii_str_kstat_copy(char *str, char *p1, char *p2, char *p3, char *p4)
 972 {
 973         static int whinged = 0;
 974         char *part[ 4 ];
 975         char fulldata[ DSW_NAMELEN ];
 976         int i, offset, remain;
 977         int num_parts;
 978         int leftover;
 979         int kscharsize = KSTAT_DATA_CHAR_LEN - 1;
 980 
 981         /*
 982          * NOTE: the following lines must be changed if DSW_NAMELEN
 983          * ever changes.  You'll need a part[] for every kscharsize
 984          * characters (or fraction thereof).  The ii_kstat_set_t
 985          * definition in dsw_dev.h will also need new ovr_? entries.
 986          */
 987         part[ 0 ] = p1;
 988         part[ 1 ] = p2;
 989         part[ 2 ] = p3;
 990         part[ 3 ] = p4;
 991 
 992         bzero(fulldata, DSW_NAMELEN);
 993         if (str) {
 994                 (void) strncpy(fulldata, str, DSW_NAMELEN);
 995         }
 996 
 997         num_parts = DSW_NAMELEN / kscharsize;
 998         leftover = DSW_NAMELEN % kscharsize;
 999         if (leftover) {
1000                 ++num_parts;
1001         }
1002 
1003         if (num_parts > sizeof (part) / sizeof (part[0])) {
1004                 /*
1005                  * DSW_NAMELEN is 64 and kscharsize is 15.
1006                  * It's always "whinged"
1007                  */
1008                 if (!whinged) {
1009 #ifdef DEBUG
1010                         cmn_err(CE_WARN, "!May not have enough room "
1011                             "to store volume name in kstats");
1012 #endif
1013                         whinged = 1;
1014                 }
1015                 num_parts = sizeof (part) / sizeof (part[0]);
1016         }
1017 
1018         offset = 0;
1019         remain = DSW_NAMELEN;
1020         for (i = 0; i < num_parts; i++) {
1021                 int to_copy = remain > kscharsize? kscharsize : remain;
1022                 bcopy(&fulldata[ offset ], part[ i ], to_copy);
1023                 offset += to_copy;
1024                 remain -= to_copy;
1025         }
1026 }
1027 
1028 static int
1029 ii_set_stats_update(kstat_t *ksp, int rw)
1030 {
1031         _ii_info_t *ip = (_ii_info_t *)ksp->ks_private;
1032         ii_kstat_set_t *kp = (ii_kstat_set_t *)ksp->ks_data;
1033 
1034         if (KSTAT_WRITE == rw) {
1035                 return (EACCES);
1036         }
1037 
1038         /* copy values over */
1039         kp->size.value.ul = ip->bi_size;
1040         kp->flags.value.ul = ip->bi_flags;
1041         kp->unit.value.ul = ip->bi_throttle_unit;
1042         kp->delay.value.ul = ip->bi_throttle_delay;
1043         kp->mtime.value.ul = ip->bi_mtime;
1044 
1045         /* update bitmap counters if necessary */
1046         if (ip->bi_state & DSW_CNTCPYBITS) {
1047                 ip->bi_copybits = 0;
1048                 if (_ii_rsrv_devs(ip, BMP, II_INTERNAL) == 0) {
1049                         ip->bi_state &= ~DSW_CNTCPYBITS;
1050                         II_CNT_BITS(ip, ip->bi_copyfba,
1051                             &ip->bi_copybits,
1052                             DSW_BM_SIZE_BYTES(ip));
1053                         _ii_rlse_devs(ip, BMP);
1054                 }
1055         }
1056 
1057         if (ip->bi_state & DSW_CNTSHDBITS) {
1058                 ip->bi_shdbits = 0;
1059                 if (_ii_rsrv_devs(ip, BMP, II_INTERNAL) == 0) {
1060                         ip->bi_state &= ~DSW_CNTSHDBITS;
1061                         II_CNT_BITS(ip, ip->bi_shdfba,
1062                             &ip->bi_shdbits,
1063                             DSW_BM_SIZE_BYTES(ip));
1064                         _ii_rlse_devs(ip, BMP);
1065                 }
1066         }
1067 
1068         kp->copybits.value.ul = ip->bi_copybits;
1069         kp->shdbits.value.ul = ip->bi_shdbits;
1070 
1071         /* copy volume names */
1072         ii_str_kstat_copy(ii_pathname(MSTFD(ip)),
1073             kp->mst_a.value.c, kp->mst_b.value.c,
1074             kp->mst_c.value.c, kp->mst_d.value.c);
1075 
1076         ii_str_kstat_copy(ip->bi_keyname, kp->set_a.value.c, kp->set_b.value.c,
1077             kp->set_c.value.c, kp->set_d.value.c);
1078 
1079         ii_str_kstat_copy(ii_pathname(ip->bi_bmpfd),
1080             kp->bmp_a.value.c, kp->bmp_b.value.c,
1081             kp->bmp_c.value.c, kp->bmp_d.value.c);
1082 
1083         if (ip->bi_overflow) {
1084                 ii_str_kstat_copy(ip->bi_overflow->ii_volname,
1085                     kp->ovr_a.value.c, kp->ovr_b.value.c, kp->ovr_c.value.c,
1086                     kp->ovr_d.value.c);
1087                 (void) strlcpy(kp->ovr_io.value.c, ip->bi_kstat_io.ovrio,
1088                     KSTAT_DATA_CHAR_LEN);
1089         } else {
1090                 ii_str_kstat_copy("", kp->ovr_a.value.c, kp->ovr_b.value.c,
1091                     kp->ovr_c.value.c, kp->ovr_d.value.c);
1092                 bzero(kp->ovr_io.value.c, KSTAT_DATA_CHAR_LEN);
1093         }
1094         if ((ip->bi_flags) & DSW_TREEMAP) {
1095                 kp->shdchks.value.ul = ip->bi_shdchks;
1096                 kp->shdchkused.value.ul = ip->bi_shdchkused;
1097         } else {
1098                 kp->shdchks.value.ul = 0;
1099                 kp->shdchkused.value.ul = 0;
1100         }
1101         /* make sure value.c are always null terminated */
1102         (void) strlcpy(kp->mst_io.value.c, ip->bi_kstat_io.mstio,
1103             KSTAT_DATA_CHAR_LEN);
1104         (void) strlcpy(kp->shd_io.value.c, ip->bi_kstat_io.shdio,
1105             KSTAT_DATA_CHAR_LEN);
1106         (void) strlcpy(kp->bmp_io.value.c, ip->bi_kstat_io.bmpio,
1107             KSTAT_DATA_CHAR_LEN);
1108 
1109         return (0);
1110 }
1111 
1112 /*
1113  * _ii_config
1114  *      Configure an II device pair
1115  *
1116  * Calling/Exit State:
1117  *      Returns 0 if the pairing was configured, otherwise an
1118  *      error code. The ioctl data stucture is copied out to the user
1119  *      and contains any additional error information, and the master
1120  *      and shadow volume names if not supplied by the user.
1121  *
1122  * Description:
1123  *      Reads the user configuration structure and attempts
1124  *      to establish an II pairing. The snapshot of the master
1125  *      device is established at this point in time.
1126  */
1127 
1128 int
1129 _ii_config(intptr_t arg, int ilp32, int *rvp, int iflags)
1130 {
1131         dsw_config_t uconf;
1132         dsw_config32_t *uconf32;
1133         _ii_info_t *ip, *hip, **ipp;
1134         int rc;
1135         int type;
1136         int nshadows;
1137         int add_to_mst_top;
1138         int import;
1139         int existing;
1140         int resized;
1141         nsc_size_t mst_size, shd_size, bmp_size;
1142         nsc_off_t shdfba;
1143         nsc_off_t copyfba;
1144         int keylen, keyoffset;
1145         ii_header_t *bm_header;
1146         nsc_buf_t *tmp;
1147         spcs_s_info_t kstatus;
1148         spcs_s_info32_t ustatus32;
1149         int rtype;
1150         uint_t hints;
1151 
1152         /* Import is a once only operation like an enable */
1153         ASSERT((iflags&(II_EXISTING|II_IMPORT)) != (II_EXISTING|II_IMPORT));
1154         existing = (iflags&II_EXISTING) != 0;
1155         import = (iflags&II_IMPORT) != 0;
1156         *rvp = 0;
1157         if (ilp32) {
1158                 uconf32 = kmem_zalloc(sizeof (dsw_config32_t), KM_SLEEP);
1159                 if (uconf32 == NULL) {
1160                         return (ENOMEM);
1161                 }
1162                 if (copyin((void *)arg, uconf32, sizeof (*uconf32)) < 0)
1163                         return (EFAULT);
1164                 II_TAIL_COPY(uconf, (*uconf32), master_vol, dsw_config_t);
1165                 uconf.status = (spcs_s_info_t)uconf32->status;
1166                 ustatus32 = uconf32->status;
1167                 kmem_free(uconf32, sizeof (dsw_config32_t));
1168         } else if (copyin((void *)arg, &uconf, sizeof (uconf)) < 0)
1169                 return (EFAULT);
1170 
1171         DTRACE_PROBE3(_ii_config_info, char *, uconf.master_vol,
1172             char *, uconf.shadow_vol, char *, uconf.bitmap_vol);
1173 
1174         kstatus = spcs_s_kcreate();
1175         if (kstatus == NULL)
1176                 return (ENOMEM);
1177 
1178         if (_ii_shutting_down)
1179                 return (spcs_s_ocopyoutf(&kstatus, uconf.status,
1180                     DSW_ESHUTDOWN));
1181 
1182         if (uconf.bitmap_vol[0] == 0)
1183                 return (spcs_s_ocopyoutf(&kstatus, uconf.status, DSW_EEMPTY));
1184 
1185         mutex_enter(&_ii_config_mutex);
1186         ip = nsc_kmem_zalloc(sizeof (*ip), KM_SLEEP, _ii_local_mem);
1187         if (!ip) {
1188                 mutex_exit(&_ii_config_mutex);
1189                 return (spcs_s_ocopyoutf(&kstatus, uconf.status, ENOMEM));
1190         }
1191         ip->bi_mstdev = nsc_kmem_zalloc(sizeof (*ip->bi_mstdev), KM_SLEEP,
1192             _ii_local_mem);
1193         ip->bi_mstrdev = nsc_kmem_zalloc(sizeof (*ip->bi_mstdev), KM_SLEEP,
1194             _ii_local_mem);
1195         if (ip->bi_mstdev == NULL || ip->bi_mstrdev == NULL) {
1196                 mutex_exit(&_ii_config_mutex);
1197                 _ii_info_free(ip);
1198                 return (spcs_s_ocopyoutf(&kstatus, uconf.status, ENOMEM));
1199         }
1200 
1201         ip->bi_disabled = 1; /* mark as disabled until we are ready to go */
1202         mutex_init(&ip->bi_mutex, NULL, MUTEX_DRIVER, NULL);
1203         mutex_init(&ip->bi_bmpmutex, NULL, MUTEX_DRIVER, NULL);
1204         mutex_init(&ip->bi_rsrvmutex, NULL, MUTEX_DRIVER, NULL);
1205         mutex_init(&ip->bi_rlsemutex, NULL, MUTEX_DRIVER, NULL);
1206         mutex_init(&ip->bi_chksmutex, NULL, MUTEX_DRIVER, NULL);
1207         cv_init(&ip->bi_copydonecv, NULL, CV_DRIVER, NULL);
1208         cv_init(&ip->bi_reservecv, NULL, CV_DRIVER, NULL);
1209         cv_init(&ip->bi_releasecv, NULL, CV_DRIVER, NULL);
1210         cv_init(&ip->bi_ioctlcv, NULL, CV_DRIVER, NULL);
1211         cv_init(&ip->bi_closingcv, NULL, CV_DRIVER, NULL);
1212         cv_init(&ip->bi_busycv, NULL, CV_DRIVER, NULL);
1213         rw_init(&ip->bi_busyrw, NULL, RW_DRIVER, NULL);
1214         rw_init(&ip->bi_linkrw, NULL, RW_DRIVER, NULL);
1215         (void) strncpy(ip->bi_keyname, uconf.shadow_vol, DSW_NAMELEN);
1216         ip->bi_keyname[DSW_NAMELEN-1] = '\0';
1217         ip->bi_throttle_unit = ii_throttle_unit;
1218         ip->bi_throttle_delay = ii_throttle_delay;
1219 
1220         /* First check the list to see if uconf.bitmap_vol's already there */
1221 
1222         if (ii_volume(uconf.bitmap_vol, 0) != NONE) {
1223                 DTRACE_PROBE(_ii_config_bmp_found);
1224                 mutex_exit(&_ii_config_mutex);
1225                 _ii_info_free(ip);
1226                 return (spcs_s_ocopyoutf(&kstatus, uconf.status, DSW_EINUSE));
1227         }
1228 
1229         ip->bi_bmpfd = nsc_open(uconf.bitmap_vol,
1230             NSC_IIR_ID|NSC_FILE|NSC_RDWR, NULL, (blind_t)&(ip->bi_bmpdev), &rc);
1231         if (!ip->bi_bmpfd)
1232                 ip->bi_bmpfd = nsc_open(uconf.bitmap_vol,
1233                     NSC_IIR_ID|NSC_CACHE|NSC_DEVICE|NSC_RDWR, NULL,
1234                     (blind_t)&(ip->bi_bmpdev), &rc);
1235         if (!ip->bi_bmpfd && !existing) {
1236                 mutex_exit(&_ii_config_mutex);
1237                 _ii_info_free(ip);
1238                 spcs_s_add(kstatus, rc);
1239                 DTRACE_PROBE(_ii_config_no_bmp);
1240                 return (spcs_s_ocopyoutf(&kstatus, uconf.status, DSW_EOPEN));
1241         }
1242 
1243         if (import) {
1244                 uconf.flag = DSW_GOLDEN;
1245                 II_FLAG_SETX(DSW_SHDIMPORT|DSW_GOLDEN, ip);
1246         }
1247 
1248         if (existing) {
1249 
1250                 DTRACE_PROBE(_ii_config_existing);
1251                 /*
1252                  * ii_config is used by enable, import and resume (existing)
1253                  * If not importing or resuming, then this must be enable.
1254                  * Indicate this fact for SNMP use.
1255                  */
1256 
1257                 if (!ip->bi_bmpfd) {
1258                         /*
1259                          * Couldn't read bitmap, mark master and shadow as
1260                          * unusable.
1261                          */
1262                         II_FLAG_ASSIGN(DSW_BMPOFFLINE|DSW_MSTOFFLINE|
1263                             DSW_SHDOFFLINE, ip);
1264 
1265                         /*
1266                          * Set cluster tag for this element so it can
1267                          * be suspended later
1268                          */
1269                         (void) II_LINK_CLUSTER(ip, uconf.cluster_tag);
1270 
1271                         /* need to check on master, might be shared */
1272                         goto header_checked;
1273                 }
1274                 /* check the header */
1275                 (void) _ii_rsrv_devs(ip, BMP, II_INTERNAL);
1276 
1277                 /* get first block of bit map */
1278                 mutex_enter(&ip->bi_mutex);
1279                 bm_header = _ii_bm_header_get(ip, &tmp);
1280                 mutex_exit(&ip->bi_mutex);
1281                 if (bm_header == NULL) {
1282                         if (ii_debug > 0)
1283                                 cmn_err(CE_WARN,
1284                                     "!ii: _ii_bm_header_get returned NULL");
1285                         mutex_exit(&_ii_config_mutex);
1286                         _ii_info_free(ip);
1287                         return (spcs_s_ocopyoutf(&kstatus, uconf.status,
1288                             DSW_EHDRBMP));
1289                 }
1290 
1291                 if (bm_header->ii_magic != DSW_DIRTY &&
1292                     bm_header->ii_magic != DSW_CLEAN) {
1293                         mutex_exit(&_ii_config_mutex);
1294                         _ii_bm_header_free(bm_header, ip, tmp);
1295                         _ii_info_free(ip);
1296                         return (spcs_s_ocopyoutf(&kstatus, uconf.status,
1297                             DSW_EINVALBMP));
1298                 }
1299 
1300                 II_FLAG_ASSIGN(bm_header->ii_state, ip);
1301                 /* Restore copy throttle parameters, if header version is 3 */
1302                 if (bm_header->ii_version >= 3) { /* II_HEADER_VERSION */
1303                         ip->bi_throttle_delay = bm_header->ii_throttle_delay;
1304                         ip->bi_throttle_unit  = bm_header->ii_throttle_unit;
1305                 }
1306 
1307                 /* Restore cluster & group names, if header version is 4 */
1308                 if (bm_header->ii_version >= 4) {
1309                         /* cluster */
1310                         if (*bm_header->clstr_name) {
1311                                 (void) strncpy(uconf.cluster_tag,
1312                                     bm_header->clstr_name, DSW_NAMELEN);
1313                                 (void) II_LINK_CLUSTER(ip, uconf.cluster_tag);
1314                         }
1315 
1316                         /* group */
1317                         if (*bm_header->group_name) {
1318                                 (void) strncpy(uconf.group_name,
1319                                     bm_header->group_name, DSW_NAMELEN);
1320                                 (void) II_LINK_GROUP(ip, uconf.group_name);
1321                         }
1322                 }
1323                 /* restore latest modification time, if header version >= 5 */
1324                 if (bm_header->ii_version >= 5) {
1325                         ip->bi_mtime = bm_header->ii_mtime;
1326                 }
1327 
1328                 /* Fetch master and shadow names from bitmap header */
1329                 if (uconf.master_vol[0] == 0)
1330                         (void) strncpy(uconf.master_vol, bm_header->master_vol,
1331                             DSW_NAMELEN);
1332                 if (uconf.shadow_vol[0] == 0)
1333                         (void) strncpy(uconf.shadow_vol, bm_header->shadow_vol,
1334                             DSW_NAMELEN);
1335 
1336                 /* return the fetched names to the user */
1337                 if (ilp32) {
1338                         uconf32 = kmem_zalloc(sizeof (dsw_config32_t),
1339                             KM_SLEEP);
1340                         if (uconf32 == NULL) {
1341                                 mutex_exit(&_ii_config_mutex);
1342                                 _ii_bm_header_free(bm_header, ip, tmp);
1343                                 _ii_rlse_devs(ip, BMP);
1344                                 _ii_info_free(ip);
1345                                 return (ENOMEM);
1346                         }
1347                         uconf32->status = ustatus32;
1348                         II_TAIL_COPY((*uconf32), uconf, master_vol,
1349                             dsw_config32_t);
1350                         rc = copyout(uconf32, (void *)arg, sizeof (*uconf32));
1351                         kmem_free(uconf32, sizeof (dsw_config32_t));
1352                 } else {
1353                         rc = copyout(&uconf, (void *)arg, sizeof (uconf));
1354                 }
1355                 if (rc) {
1356                         mutex_exit(&_ii_config_mutex);
1357                         _ii_bm_header_free(bm_header, ip, tmp);
1358                         _ii_rlse_devs(ip, BMP);
1359                         _ii_info_free(ip);
1360                         return (EFAULT);
1361                 }
1362 
1363                 if (strncmp(bm_header->bitmap_vol, uconf.bitmap_vol,
1364                     DSW_NAMELEN) || ((!(ip->bi_flags&DSW_SHDIMPORT)) &&
1365                     strncmp(bm_header->master_vol, uconf.master_vol,
1366                     DSW_NAMELEN)) || strncmp(bm_header->shadow_vol,
1367                     uconf.shadow_vol, DSW_NAMELEN)) {
1368                         mutex_exit(&_ii_config_mutex);
1369                         _ii_bm_header_free(bm_header, ip, tmp);
1370                         _ii_rlse_devs(ip, BMP);
1371                         _ii_info_free(ip);
1372                         return (spcs_s_ocopyoutf(&kstatus, uconf.status,
1373                             DSW_EMISMATCH));
1374                 }
1375                 shdfba = bm_header->ii_shdfba;
1376                 copyfba = bm_header->ii_copyfba;
1377                 if ((ip->bi_flags)&DSW_TREEMAP) {
1378                         if (ii_debug > 0)
1379                                 cmn_err(CE_NOTE,
1380                                     "!II: Resuming short shadow volume");
1381 
1382                         ip->bi_mstchks = bm_header->ii_mstchks;
1383                         ip->bi_shdchks = bm_header->ii_shdchks;
1384                         ip->bi_shdchkused = bm_header->ii_shdchkused;
1385                         ip->bi_shdfchk = bm_header->ii_shdfchk;
1386 
1387                         if (bm_header->overflow_vol[0] != 0)
1388                                 if ((rc = ii_overflow_attach(ip,
1389                                     bm_header->overflow_vol, 0)) != 0) {
1390                                         mutex_exit(&_ii_config_mutex);
1391                                         _ii_bm_header_free(bm_header, ip, tmp);
1392                                         _ii_rlse_devs(ip, BMP);
1393                                         _ii_info_free(ip);
1394                                         return (spcs_s_ocopyoutf(&kstatus,
1395                                             uconf.status, rc));
1396                         }
1397                 }
1398                 _ii_bm_header_free(bm_header, ip, tmp);
1399                 _ii_rlse_devs(ip, BMP);
1400         }
1401 header_checked:
1402 
1403         if (ip->bi_flags&DSW_SHDIMPORT)
1404                 (void) strcpy(uconf.master_vol, "<imported shadow>");
1405         if (!uconf.master_vol[0] || !uconf.shadow_vol[0]) {
1406                 mutex_exit(&_ii_config_mutex);
1407                 _ii_info_free(ip);
1408                 return (spcs_s_ocopyoutf(&kstatus, uconf.status, DSW_EEMPTY));
1409         }
1410 
1411         /* check that no volume has been given twice */
1412         if (strncmp(uconf.master_vol, uconf.shadow_vol, DSW_NAMELEN) == 0) {
1413                 mutex_exit(&_ii_config_mutex);
1414                 _ii_info_free(ip);
1415                 return (spcs_s_ocopyoutf(&kstatus, uconf.status, DSW_EOPEN));
1416         }
1417 
1418         if (strncmp(uconf.master_vol, uconf.bitmap_vol, DSW_NAMELEN) == 0) {
1419                 mutex_exit(&_ii_config_mutex);
1420                 _ii_info_free(ip);
1421                 return (spcs_s_ocopyoutf(&kstatus, uconf.status, DSW_EOPEN));
1422         }
1423 
1424         if (strncmp(uconf.bitmap_vol, uconf.shadow_vol, DSW_NAMELEN) == 0) {
1425                 mutex_exit(&_ii_config_mutex);
1426                 _ii_info_free(ip);
1427                 return (spcs_s_ocopyoutf(&kstatus, uconf.status, DSW_EOPEN));
1428         }
1429 
1430         /* check that master is not already a bitmap, shadow or overflow */
1431         type = ii_volume(uconf.master_vol, 1);
1432         if (type != NONE && type != MST) {
1433                 mutex_exit(&_ii_config_mutex);
1434                 _ii_info_free(ip);
1435                 return (spcs_s_ocopyoutf(&kstatus, uconf.status, DSW_EINUSE));
1436         }
1437 
1438         /* check that shadow is not used as anything else */
1439         type = ii_volume(uconf.shadow_vol, 1);
1440         if (type != NONE && type != SHD) {
1441                 mutex_exit(&_ii_config_mutex);
1442                 _ii_info_free(ip);
1443                 return (spcs_s_ocopyoutf(&kstatus, uconf.status, DSW_EINUSE));
1444         }
1445 
1446         /* Setup the table bitmap operations table */
1447         switch (ii_bitmap) {
1448         case II_KMEM:
1449                 if (ii_debug > 0)
1450                         cmn_err(CE_NOTE, "!ii: using volatile bitmaps");
1451                 ip->bi_bitmap_ops = &kmem_buf_bmp;
1452                 break;
1453         case II_FWC:
1454                 hints = 0;
1455                 (void) nsc_node_hints(&hints);
1456                 if ((hints & NSC_FORCED_WRTHRU) == 0)
1457                         ip->bi_bitmap_ops = &kmem_buf_bmp;
1458                 else
1459                         ip->bi_bitmap_ops = &alloc_buf_bmp;
1460                 if (ii_debug > 0) {
1461                         cmn_err(CE_NOTE, "!ii: chosen to use %s bitmaps",
1462                             ip->bi_bitmap_ops == &kmem_buf_bmp ?
1463                             "volatile" : "persistent");
1464                 }
1465                 break;
1466         case II_WTHRU:
1467         default:
1468                 if (ii_debug > 0)
1469                         cmn_err(CE_NOTE, "!ii: using persistent bitmaps");
1470                 ip->bi_bitmap_ops = &alloc_buf_bmp;
1471                 break;
1472         }
1473 
1474         /*
1475          * If we found aother shadow volume with the same name,
1476          * If this is an resume operation,
1477          * If this shadow is in the exported state
1478          * then try an on the fly join instead
1479          */
1480         for (hip = _ii_info_top; hip; hip = hip->bi_next)
1481                 if (strcmp(uconf.shadow_vol, hip->bi_keyname) == 0)
1482                                 break;
1483         if ((hip) && (type == SHD) && existing &&
1484             (ip->bi_flags & DSW_SHDEXPORT)) {
1485 
1486                 /*
1487                  * Stop any copy in progress
1488                  */
1489                 while (_ii_stopcopy(hip) == EINTR)
1490                         ;
1491 
1492                 /*
1493                  * Start the imported shadow teardown
1494                  */
1495                 mutex_enter(&hip->bi_mutex);
1496 
1497                 /* disable accesss to imported shadow */
1498                 hip->bi_disabled = 1;
1499 
1500                 /* Wait for any I/O's to complete */
1501                 while (hip->bi_ioctl) {
1502                         hip->bi_state |= DSW_IOCTL;
1503                         cv_wait(&hip->bi_ioctlcv, &hip->bi_mutex);
1504                 }
1505                 mutex_exit(&hip->bi_mutex);
1506 
1507                 /* this rw_enter forces us to drain all active IO */
1508                 rw_enter(&hip->bi_linkrw, RW_WRITER);
1509                 rw_exit(&hip->bi_linkrw);
1510 
1511                 /* remove ip from _ii_info_top linked list */
1512                 mutex_enter(&_ii_info_mutex);
1513                 for (ipp = &_ii_info_top; *ipp; ipp = &((*ipp)->bi_next)) {
1514                         if (hip == *ipp) {
1515                                 *ipp = hip->bi_next;
1516                                 break;
1517                         }
1518                 }
1519                 if (hip->bi_kstat) {
1520                         kstat_delete(hip->bi_kstat);
1521                         hip->bi_kstat = NULL;
1522                 }
1523                 mutex_exit(&_ii_info_mutex);
1524 
1525                 /* Gain access to both bitmap volumes */
1526                 rtype = BMP;
1527                 if (((rc = _ii_rsrv_devs(hip, rtype, II_INTERNAL)) != 0) ||
1528                     ((rc = _ii_rsrv_devs(ip, rtype, II_INTERNAL)) != 0)) {
1529                         mutex_exit(&_ii_config_mutex);
1530                         _ii_info_free(ip);
1531                         return (spcs_s_ocopyoutf(&kstatus, uconf.status, rc));
1532                 }
1533 
1534                 /* Merge imported bitmap */
1535                 rc = II_JOIN_BMP(ip, hip);
1536 
1537                 /* Release access to bitmap volume */
1538                 _ii_rlse_devs(hip, rtype);
1539                 ii_sibling_free(hip);
1540 
1541                 /* Clear the fact that we are exported */
1542                 mutex_enter(&ip->bi_mutex);
1543                 II_FLAG_CLR(DSW_SHDEXPORT, ip);
1544 
1545                 /* Release resources */
1546                 mutex_exit(&ip->bi_mutex);
1547                 _ii_rlse_devs(ip, BMP);
1548 
1549         } else if (type != NONE) {
1550                 mutex_exit(&_ii_config_mutex);
1551                 _ii_info_free(ip);
1552                 return (spcs_s_ocopyoutf(&kstatus, uconf.status, DSW_EINUSE));
1553         }
1554 
1555         /*
1556          * Handle non-exported shadow
1557          */
1558         if ((ip->bi_flags & DSW_SHDEXPORT) == 0) {
1559                 if ((rc = ii_open_shadow(ip, uconf.shadow_vol)) != 0) {
1560                         mutex_exit(&_ii_config_mutex);
1561                         _ii_info_free(ip);
1562                         spcs_s_add(kstatus, rc);
1563                         return (spcs_s_ocopyoutf(&kstatus, uconf.status,
1564                             DSW_EOPEN));
1565                 }
1566         }
1567 
1568         /*
1569          * allocate _ii_concopy_sema and set to a value that won't allow
1570          * all cache to be allocated by copy loops.
1571          */
1572 
1573         if (_ii_concopy_init == 0 && ip->bi_bmpfd != NULL) {
1574                 int asize = 0, wsize;
1575                 nsc_size_t cfbas, maxfbas;
1576 
1577                 (void) nsc_cache_sizes(&asize, &wsize);
1578 
1579                 if (asize > 0) {
1580                         cfbas = FBA_NUM(asize);
1581                         (void) _ii_rsrv_devs(ip, BMP, II_INTERNAL);
1582                         rc = nsc_maxfbas(ip->bi_bmpfd, 0, &maxfbas);
1583                         _ii_rlse_devs(ip, BMP);
1584                         if (!II_SUCCESS(rc))
1585                                 maxfbas = 1024;         /* i.e. _SD_MAX_FBAS */
1586                         ii_nconcopy = cfbas / (maxfbas * 2) / 3;
1587                 }
1588                 if (ii_nconcopy < 2)
1589                         ii_nconcopy = 2;
1590                 ASSERT(ii_nconcopy > 0);
1591                 sema_init(&_ii_concopy_sema, ii_nconcopy, NULL,
1592                     SEMA_DRIVER, NULL);
1593                 _ii_concopy_init = 1;
1594         }
1595 
1596         /* check for shared master volume */
1597         for (hip = _ii_mst_top; hip; hip = hip->bi_nextmst)
1598                 if (strcmp(uconf.master_vol, ii_pathname(hip->bi_mstfd)) == 0)
1599                         break;
1600         add_to_mst_top = (hip == NULL);
1601         if (!hip)
1602                 for (hip = _ii_info_top; hip; hip = hip->bi_next)
1603                         if (strcmp(uconf.master_vol,
1604                             ii_pathname(hip->bi_mstfd)) == 0)
1605                                 break;
1606         nshadows = (hip != NULL);
1607 
1608         /* Check if master is offline */
1609         if (hip) {
1610                 if (hip->bi_flags & DSW_MSTOFFLINE) {
1611                         mutex_exit(&_ii_config_mutex);
1612                         _ii_info_free(ip);
1613                         return (spcs_s_ocopyoutf(&kstatus, uconf.status,
1614                             DSW_EOFFLINE));
1615                 }
1616         }
1617 
1618         if (!nshadows && (ip->bi_flags&DSW_SHDIMPORT) == 0) {
1619                 ip->bi_mstfd = nsc_open(uconf.master_vol,
1620                     NSC_IIR_ID|NSC_DEVICE|NSC_RDWR, _ii_fd_def,
1621                     (blind_t)(ip->bi_mstdev), &rc);
1622                 if (!ip->bi_mstfd) {
1623                         mutex_exit(&_ii_config_mutex);
1624                         _ii_info_free(ip);
1625                         spcs_s_add(kstatus, rc);
1626                         return (spcs_s_ocopyoutf(&kstatus, uconf.status,
1627                             DSW_EOPEN));
1628                 }
1629 
1630                 ip->bi_mstrfd = nsc_open(uconf.master_vol,
1631                     NSC_IIR_ID|NSC_DEVICE|NSC_RDWR, _ii_fd_def,
1632                     (blind_t)(ip->bi_mstrdev), &rc);
1633                 if (!ip->bi_mstrfd) {
1634                         mutex_exit(&_ii_config_mutex);
1635                         _ii_info_free(ip);
1636                         spcs_s_add(kstatus, rc);
1637                         return (spcs_s_ocopyoutf(&kstatus, uconf.status,
1638                             DSW_EOPEN));
1639                 }
1640         }
1641 
1642         ip->bi_head = ip;
1643         ip->bi_master = ip;
1644 
1645         mutex_enter(&_ii_info_mutex);
1646         ip->bi_next = _ii_info_top;
1647         _ii_info_top = ip;
1648         if (nshadows) {
1649                 /* link new shadow group together with others sharing master */
1650                 if (ii_debug > 0)
1651                         cmn_err(CE_NOTE,
1652                             "!II: shadow %s shares master %s with other shadow"
1653                             " groups", uconf.shadow_vol, uconf.master_vol);
1654                 hip = hip->bi_head;
1655                 nsc_kmem_free(ip->bi_mstrdev, sizeof (*ip->bi_mstrdev));
1656                 nsc_kmem_free(ip->bi_mstdev, sizeof (*ip->bi_mstdev));
1657                 ip->bi_mstrdev = hip->bi_mstrdev;
1658                 ip->bi_mstdev = hip->bi_mstdev;
1659                 ip->bi_head = hip;
1660                 ip->bi_sibling = hip->bi_sibling;
1661                 if (add_to_mst_top) {
1662                         hip->bi_nextmst = _ii_mst_top;
1663                         _ii_mst_top = hip;
1664                 }
1665                 hip->bi_sibling = ip;
1666                 ip->bi_master = ip->bi_head->bi_master;
1667         }
1668         mutex_exit(&_ii_info_mutex);
1669         mutex_exit(&_ii_config_mutex);
1670 
1671         keylen = strlen(ip->bi_keyname);
1672         if (keylen > KSTAT_STRLEN - 1) {
1673                 keyoffset = keylen + 1 - KSTAT_STRLEN;
1674         } else {
1675                 keyoffset = 0;
1676         }
1677         ip->bi_kstat = kstat_create("ii", _ii_instance++,
1678             &ip->bi_keyname[ keyoffset ], "iiset", KSTAT_TYPE_NAMED,
1679             sizeof (ii_kstat_set) / sizeof (kstat_named_t),
1680             KSTAT_FLAG_VIRTUAL);
1681         if (ip->bi_kstat) {
1682                 ip->bi_kstat->ks_data = &ii_kstat_set;
1683                 ip->bi_kstat->ks_update = ii_set_stats_update;
1684                 ip->bi_kstat->ks_private = ip;
1685                 kstat_install(ip->bi_kstat);
1686         } else {
1687                 cmn_err(CE_WARN, "!Unable to create set-specific kstats");
1688         }
1689 
1690 #ifndef DISABLE_KSTATS
1691         /* create kstats information */
1692         mutex_init(&ip->bi_kstat_io.statmutex, NULL, MUTEX_DRIVER, NULL);
1693         if (ip == ip->bi_master) {
1694                 ip->bi_kstat_io.master = _ii_kstat_create(ip, "master");
1695         } else {
1696                 ip->bi_kstat_io.master = ip->bi_master->bi_kstat_io.master;
1697                 (void) strlcpy(ip->bi_kstat_io.mstio,
1698                     ip->bi_master->bi_kstat_io.mstio, KSTAT_DATA_CHAR_LEN);
1699         }
1700         ip->bi_kstat_io.shadow = _ii_kstat_create(ip, "shadow");
1701         ip->bi_kstat_io.bitmap = _ii_kstat_create(ip, "bitmap");
1702 #endif
1703 
1704         (void) _ii_reserve_begin(ip);
1705         rtype = MSTR|SHDR|BMP;
1706         if ((rc = _ii_rsrv_devs(ip, rtype, II_INTERNAL)) != 0) {
1707                 spcs_s_add(kstatus, rc);
1708                 rc = DSW_ERSRVFAIL;
1709                 goto fail;
1710         }
1711 
1712         if (ip->bi_flags&DSW_SHDIMPORT) {
1713                 rc = 0;         /* no master for imported volumes */
1714                 mst_size = 0;
1715         } else
1716                 rc = nsc_partsize(MSTFD(ip), &mst_size);
1717         if (rc == 0 && (ip->bi_flags&DSW_SHDEXPORT) == 0)
1718                 rc = nsc_partsize(SHDFD(ip), &shd_size);
1719         if (!ip->bi_bmpfd)
1720                 rc = EINVAL;
1721         if (rc == 0)
1722                 rc = nsc_partsize(ip->bi_bmpfd, &bmp_size);
1723 
1724         if (ip->bi_flags&DSW_SHDIMPORT)
1725                 ip->bi_size = shd_size;
1726         else
1727                 ip->bi_size = mst_size;
1728 
1729         if ((((ip->bi_flags&DSW_SHDIMPORT) != DSW_SHDIMPORT) &&
1730             (mst_size < 1)) ||
1731             (((ip->bi_flags&DSW_SHDEXPORT) != DSW_SHDEXPORT) &&
1732             (shd_size < 1)) ||
1733             ((rc == 0) && (bmp_size < 1))) {
1734                 /* could be really zero, or could be > 1 TB; fail the enable */
1735                 rc = EINVAL;
1736         }
1737 
1738         if (rc != 0) {  /* rc set means an nsc_partsize() failed */
1739                 /*
1740                  * If existing group, mark bitmap as offline and set
1741                  * bmp_size to "right size".
1742                  */
1743                 if (existing) {
1744                         bmp_size = 2 * DSW_BM_FBA_LEN(mst_size) +
1745                             DSW_SHD_BM_OFFSET;
1746                         goto no_more_bmp_tests;
1747                 }
1748                 spcs_s_add(kstatus, rc);
1749                 rc = DSW_EPARTSIZE;
1750                 _ii_rlse_devs(ip, rtype);
1751                 _ii_reserve_end(ip);
1752                 goto fail;
1753         }
1754 
1755         if (ip->bi_flags&DSW_SHDIMPORT)
1756                 mst_size = shd_size;
1757         if (ip->bi_flags&DSW_SHDEXPORT)
1758                 shd_size = mst_size;
1759         /*
1760          * Check with RDC if the master & shadow sizes are different.
1761          * Once II is enabled, the shadow size will be made to appear
1762          * the same as the master, and this will panic RDC if we're
1763          * changing sizes on it.
1764          */
1765         resized = (shd_size != mst_size);
1766         if (resized && ii_need_same_size(ip)) {
1767                 cmn_err(CE_WARN, "!Cannot enable II set: would change volume "
1768                     "size on RDC");
1769                 rc = DSW_EOPACKAGE;
1770                 _ii_rlse_devs(ip, rtype);
1771                 _ii_reserve_end(ip);
1772                 goto fail;
1773         }
1774         if (bmp_size < 2 * DSW_BM_FBA_LEN(mst_size) + DSW_SHD_BM_OFFSET) {
1775                 /* bitmap volume too small */
1776                 if (ii_debug > 0)
1777                         cmn_err(CE_NOTE,
1778                             "!ii: invalid sizes: bmp %" NSC_SZFMT " mst %"
1779                             NSC_SZFMT " %" NSC_SZFMT "",
1780                             bmp_size, mst_size, DSW_BM_FBA_LEN(mst_size));
1781                 rc = DSW_EBMPSIZE;
1782                 _ii_rlse_devs(ip, rtype);
1783                 _ii_reserve_end(ip);
1784                 goto fail;
1785         }
1786         if ((shd_size < mst_size) && (uconf.flag&DSW_GOLDEN) != 0) {
1787                 /* shadow volume too small */
1788                 if (ii_debug > 0)
1789                         cmn_err(CE_NOTE, "!shd size too small (%" NSC_SZFMT
1790                             ") for independent set's master (%" NSC_SZFMT ")",
1791                             shd_size, mst_size);
1792                 rc = DSW_ESHDSIZE;
1793                 _ii_rlse_devs(ip, rtype);
1794                 _ii_reserve_end(ip);
1795                 goto fail;
1796         }
1797 
1798         ip->bi_busy = kmem_zalloc(1 + (ip->bi_size / (DSW_SIZE * DSW_BITS)),
1799             KM_SLEEP);
1800         if (!ip->bi_busy) {
1801                 rc = ENOMEM;
1802                 _ii_rlse_devs(ip, rtype);
1803                 _ii_reserve_end(ip);
1804                 goto fail;
1805         }
1806 
1807         if (existing == 0) {
1808 
1809                 DTRACE_PROBE(_ii_config);
1810 
1811                 /* first time this shadow has been set up */
1812                 mutex_enter(&ip->bi_mutex);
1813                 bm_header = _ii_bm_header_get(ip, &tmp);
1814                 mutex_exit(&ip->bi_mutex);
1815                 if (bm_header == NULL) {
1816                         if (ii_debug > 0)
1817                                 cmn_err(CE_WARN,
1818                                     "!ii: _ii_bm_header_get returned NULL");
1819                         rc = DSW_EHDRBMP;
1820                         _ii_rlse_devs(ip, rtype);
1821                         _ii_reserve_end(ip);
1822                         goto fail;
1823                 }
1824                 bzero(bm_header, sizeof (*bm_header));
1825                 /* copy pathnames into it */
1826                 (void) strncpy(bm_header->master_vol, uconf.master_vol,
1827                     DSW_NAMELEN);
1828                 (void) strncpy(bm_header->shadow_vol, uconf.shadow_vol,
1829                     DSW_NAMELEN);
1830                 (void) strncpy(bm_header->bitmap_vol, uconf.bitmap_vol,
1831                     DSW_NAMELEN);
1832                 (void) strncpy(bm_header->clstr_name, uconf.cluster_tag,
1833                     DSW_NAMELEN);
1834                 (void) strncpy(bm_header->group_name, uconf.group_name,
1835                     DSW_NAMELEN);
1836 
1837                 if (uconf.cluster_tag[0] != 0)
1838                         (void) II_LINK_CLUSTER(ip, uconf.cluster_tag);
1839 
1840                 if (uconf.group_name[0] != 0)
1841                         (void) II_LINK_GROUP(ip, uconf.group_name);
1842 
1843 
1844                 bm_header->ii_state = (uconf.flag & DSW_GOLDEN);
1845                 II_FLAG_ASSIGN(bm_header->ii_state, ip);
1846 
1847                 if (import) {
1848                         II_FLAG_SETX(DSW_SHDIMPORT, ip);
1849                         bm_header->ii_state |= DSW_SHDIMPORT;
1850                 }
1851                 if (resized) {
1852                         II_FLAG_SETX(DSW_RESIZED, ip);
1853                         bm_header->ii_state |= DSW_RESIZED;
1854                 }
1855                 bm_header->ii_type = (uconf.flag & DSW_GOLDEN) ?
1856                     DSW_GOLDEN_TYPE : DSW_QUICK_TYPE;
1857                 bm_header->ii_magic = DSW_DIRTY;
1858                 bm_header->ii_version = II_HEADER_VERSION;
1859                 bm_header->ii_shdfba = DSW_SHD_BM_OFFSET;
1860                 bm_header->ii_copyfba = DSW_COPY_BM_OFFSET;
1861                 bm_header->ii_throttle_delay = ip->bi_throttle_delay;
1862                 bm_header->ii_throttle_unit = ip->bi_throttle_unit;
1863                 ip->bi_shdfba = bm_header->ii_shdfba;
1864                 ip->bi_copyfba = bm_header->ii_copyfba;
1865                 ip->bi_mtime = ddi_get_time();
1866 
1867                 /* write it to disk */
1868                 mutex_enter(&ip->bi_mutex);
1869                 rc = _ii_bm_header_put(bm_header, ip, tmp);
1870                 mutex_exit(&ip->bi_mutex);
1871                 if (!II_SUCCESS(rc)) {
1872                         spcs_s_add(kstatus, rc);
1873                         rc = DSW_EHDRBMP;
1874                         _ii_rlse_devs(ip, rtype);
1875                         _ii_reserve_end(ip);
1876                         goto fail;
1877                 }
1878                 if ((shd_size < mst_size) && (uconf.flag & DSW_GOLDEN) == 0) {
1879                 /*
1880                  * shadow volume smaller than master, must use a dependent
1881                  * copy with a bitmap file stored mapping for chunk locations.
1882                  */
1883                                         /* number of chunks in shadow volume */
1884                         nsc_size_t shd_chunks;
1885                         nsc_size_t bmp_chunks;
1886                         nsc_size_t tmp_chunks;
1887 
1888                         if (ii_debug > 1)
1889                                 cmn_err(CE_NOTE, "!ii: using tree index on %s",
1890                                     uconf.master_vol);
1891                         shd_chunks = shd_size / DSW_SIZE;
1892                         /* do not add in partial chunk at end */
1893 
1894                         ip->bi_mstchks = mst_size / DSW_SIZE;
1895                         if (mst_size % DSW_SIZE != 0)
1896                                 ip->bi_mstchks++;
1897                         bmp_chunks = ii_btsize(bmp_size - ip->bi_copyfba -
1898                             DSW_BM_FBA_LEN(ip->bi_size));
1899                         tmp_chunks = ip->bi_copyfba +
1900                             DSW_BM_FBA_LEN(ip->bi_size);
1901                         if (bmp_chunks < (nsc_size_t)ip->bi_mstchks) {
1902                                 if (ii_debug > -1) {
1903                                         cmn_err(CE_NOTE, "!ii: bitmap vol too"
1904                                             "small: %" NSC_SZFMT " vs. %"
1905                                             NSC_SZFMT, bmp_size,
1906                                             tmp_chunks);
1907                                 }
1908                                 spcs_s_add(kstatus, rc);
1909                                 rc = DSW_EHDRBMP;
1910                                 _ii_rlse_devs(ip, rtype);
1911                                 _ii_reserve_end(ip);
1912                                 goto fail;
1913                         }
1914                         mutex_enter(&ip->bi_mutex);
1915                         II_FLAG_SET(DSW_TREEMAP, ip);
1916                         mutex_exit(&ip->bi_mutex);
1917 
1918                         /* following values are written to header by ii_tinit */
1919 #if (defined(NSC_MULTI_TERABYTE) && !defined(II_MULTIMULTI_TERABYTE))
1920                         ASSERT(shd_chunks <= INT32_MAX);
1921                         ASSERT(mst_size / DSW_SIZE <= INT32_MAX);
1922 #endif
1923                         ip->bi_mstchks = mst_size / DSW_SIZE;
1924                         if (mst_size % DSW_SIZE != 0)
1925                                 ip->bi_mstchks++;
1926 #ifdef  II_MULTIMULTI_TERABYTE
1927                         ip->bi_shdchks = shd_chunks;
1928 #else
1929                         /* still have 31 bit chunkid's */
1930                         ip->bi_shdchks = (chunkid_t)shd_chunks;
1931 #endif
1932                         ip->bi_shdchkused = 0;
1933                         rc = ii_tinit(ip);
1934                 } else {
1935                         ip->bi_shdchks = shd_size / DSW_SIZE;
1936                         ip->bi_shdchkused = 0;
1937                 }
1938                 if (rc == 0)
1939                         rc = II_LOAD_BMP(ip, 1);
1940                 if (rc == 0)
1941                         rc = II_ZEROBM(ip);
1942                 if (rc == 0)
1943                         rc = II_COPYBM(ip);     /* also clear copy bitmap */
1944                 if (rc == 0 && (uconf.flag & DSW_GOLDEN) && !import)
1945                         rc = ii_fill_copy_bmp(ip);
1946                 if (rc) {
1947                         spcs_s_add(kstatus, rc);
1948                         rc = DSW_EHDRBMP;
1949                         _ii_rlse_devs(ip, rtype);
1950                         goto fail;
1951                 }
1952                 /* check that changing shadow won't upset RDC */
1953                 if (ii_update_denied(ip, kstatus, 0, 1)) {
1954                         rc = DSW_EOPACKAGE;
1955                         _ii_rlse_devs(ip, rtype);
1956                         _ii_reserve_end(ip);
1957                         goto fail;
1958                 }
1959                 ip->bi_disabled = 0; /* all okay and ready, we can go now */
1960                 _ii_rlse_devs(ip, rtype);
1961                 /* no _ii_reserve_end() here - we must register first */
1962                 ip->bi_bmp_tok = _ii_register_path(ii_pathname(ip->bi_bmpfd),
1963                     NSC_CACHE|NSC_DEVICE, _ii_io);
1964                 if (!nshadows)
1965                         ii_register_mst(ip);
1966                 ii_register_shd(ip);
1967 
1968                 if (!ii_register_ok(ip)) {
1969                         ip->bi_disabled = 1; /* argh */
1970                         rc = DSW_EREGISTER;
1971                         goto fail;
1972                 }
1973                 /* no _ii_reserve_begin() here -- we're still in process */
1974                 (void) _ii_rsrv_devs(ip, rtype, II_INTERNAL);
1975 
1976                 if (ii_debug > 0)
1977                         cmn_err(CE_NOTE, "!ii: config: master %s shadow %s",
1978                             uconf.master_vol, uconf.shadow_vol);
1979                 rc = 0;
1980                 if ((uconf.flag & DSW_GOLDEN) && !import) {
1981                         mutex_enter(&ip->bi_mutex);
1982                         II_FLAG_SET(DSW_COPYINGM | DSW_COPYINGP, ip);
1983                         ip->bi_ioctl++;      /* we are effectively in an ioctl */
1984                         mutex_exit(&ip->bi_mutex);
1985                         rc = _ii_copyvol(ip, 0, rtype, kstatus, 1);
1986                 }
1987                 _ii_rlse_devs(ip, rtype);
1988                 _ii_reserve_end(ip);
1989 
1990                 ++iigkstat.num_sets.value.ul;
1991 
1992                 return (spcs_s_ocopyoutf(&kstatus, uconf.status, rc));
1993         }
1994 
1995         ip->bi_shdchks = shd_size / DSW_SIZE;
1996         ip->bi_shdfba = shdfba;
1997         ip->bi_copyfba = copyfba;
1998         rc = II_LOAD_BMP(ip, 0);                /* reload saved bitmap */
1999         mutex_enter(&ip->bi_mutex);
2000         if (rc == 0)
2001                 bm_header = _ii_bm_header_get(ip, &tmp);
2002         mutex_exit(&ip->bi_mutex);
2003         if (rc || bm_header == NULL) {
2004                 if (existing) {
2005                         goto no_more_bmp_tests;
2006                 }
2007                 rc = DSW_EHDRBMP;
2008                 goto fail;
2009         }
2010 
2011         /*
2012          * If the header is dirty and it wasn't kept on persistent storage
2013          * then the bitmaps must be assumed to be bad.
2014          */
2015         if (bm_header->ii_magic == DSW_DIRTY &&
2016             ip->bi_bitmap_ops != &alloc_buf_bmp) {
2017                 type = bm_header->ii_type;
2018                 _ii_bm_header_free(bm_header, ip, tmp);
2019                 if (type == DSW_GOLDEN_TYPE) {
2020                         if ((ip->bi_flags & DSW_COPYINGM) != 0)
2021                                 _ii_error(ip, DSW_SHDOFFLINE);
2022                         else if ((ip->bi_flags & DSW_COPYINGS) != 0)
2023                                 _ii_error(ip, DSW_MSTOFFLINE);
2024                         else {
2025                                 /* No copying, so they're just different */
2026                                 rc = ii_fill_copy_bmp(ip);
2027                                 if (rc) {
2028                                         spcs_s_add(kstatus, rc);
2029                                         rc = DSW_EHDRBMP;
2030                                         goto fail;
2031                                 }
2032                         }
2033                 } else
2034                         _ii_error(ip, DSW_SHDOFFLINE);
2035 
2036                 mutex_enter(&ip->bi_mutex);
2037                 bm_header = _ii_bm_header_get(ip, &tmp);
2038                 mutex_exit(&ip->bi_mutex);
2039                 if (bm_header == NULL) {
2040                         rc = DSW_EHDRBMP;
2041                         goto fail;
2042                 }
2043         }
2044 
2045         bm_header->ii_magic = DSW_DIRTY;
2046         mutex_enter(&ip->bi_mutex);
2047         rc = _ii_bm_header_put(bm_header, ip, tmp);
2048         mutex_exit(&ip->bi_mutex);
2049         if (!II_SUCCESS(rc)) {
2050                 spcs_s_add(kstatus, rc);
2051                 rc = DSW_EHDRBMP;
2052                 goto fail;
2053         }
2054 
2055         ip->bi_bmp_tok = _ii_register_path(ii_pathname(ip->bi_bmpfd),
2056             NSC_CACHE|NSC_DEVICE, _ii_io);
2057 no_more_bmp_tests:
2058         _ii_rlse_devs(ip, rtype);
2059         ip->bi_disabled = 0; /* all okay and ready, we can go now */
2060         if (!nshadows)
2061                 ii_register_mst(ip);
2062         if ((ip->bi_flags & DSW_SHDEXPORT) == 0)
2063                 ii_register_shd(ip);
2064 
2065         if (!ii_register_ok(ip)) {
2066                 rc = DSW_EREGISTER;
2067                 goto fail;
2068         }
2069         _ii_reserve_end(ip);
2070 
2071         if (ii_debug > 0)
2072                 cmn_err(CE_NOTE, "!ii: config: master %s shadow %s",
2073                     uconf.master_vol, uconf.shadow_vol);
2074 
2075         rc = 0;
2076         if (ip->bi_flags & DSW_COPYINGP) {
2077                 /* Copy was in progress, so continue it */
2078                 (void) _ii_rsrv_devs(ip, rtype, II_INTERNAL);
2079                 mutex_enter(&ip->bi_mutex);
2080                 ip->bi_ioctl++;              /* we are effectively in an ioctl */
2081                 mutex_exit(&ip->bi_mutex);
2082                 rc = _ii_copyvol(ip, ((ip->bi_flags & DSW_COPYINGS) != 0) ?
2083                     CV_SHD2MST : 0, rtype, kstatus, 0);
2084         }
2085 
2086         ++iigkstat.num_sets.value.ul;
2087 
2088         return (spcs_s_ocopyoutf(&kstatus, uconf.status, rc));
2089 
2090 fail:
2091         /* remove ip from _ii_info_top linked list */
2092         mutex_enter(&_ii_info_mutex);
2093         for (ipp = &_ii_info_top; *ipp; ipp = &((*ipp)->bi_next)) {
2094                 if (ip == *ipp) {
2095                         *ipp = ip->bi_next;
2096                         break;
2097                 }
2098         }
2099         mutex_exit(&_ii_info_mutex);
2100         ii_sibling_free(ip);
2101 
2102         return (spcs_s_ocopyoutf(&kstatus, uconf.status, rc));
2103 }
2104 
2105 static int
2106 _ii_perform_disable(char *setname, spcs_s_info_t *kstatusp, int reclaim)
2107 {
2108         _ii_info_t **xip, *ip;
2109         _ii_overflow_t *op;
2110         nsc_buf_t *tmp = NULL;
2111         int rc;
2112         ii_header_t *bm_header;
2113         int rtype;
2114 
2115         mutex_enter(&_ii_info_mutex);
2116         ip = _ii_find_set(setname);
2117         if (ip == NULL) {
2118                 mutex_exit(&_ii_info_mutex);
2119                 return (DSW_ENOTFOUND);
2120         }
2121 
2122         if ((ip->bi_flags & DSW_GOLDEN) &&
2123             ((ip->bi_flags & DSW_COPYINGP) != 0)) {
2124                 /*
2125                  * Cannot disable an independent copy while still copying
2126                  * as it means that a data dependency exists.
2127                  */
2128                 mutex_exit(&_ii_info_mutex);
2129                 _ii_ioctl_done(ip);
2130                 mutex_exit(&ip->bi_mutex);
2131                 DTRACE_PROBE(_ii_perform_disable_end_DSW_EDEPENDENCY);
2132                 return (DSW_EDEPENDENCY);
2133         }
2134 
2135         if ((ip->bi_flags & DSW_GOLDEN) == 0 &&
2136             ii_update_denied(ip, *kstatusp, 0, 1)) {
2137                 /* Cannot disable a dependent shadow while RDC is unsure */
2138                 mutex_exit(&_ii_info_mutex);
2139                 _ii_ioctl_done(ip);
2140                 mutex_exit(&ip->bi_mutex);
2141                 DTRACE_PROBE(DSW_EOPACKAGE);
2142                 return (DSW_EOPACKAGE);
2143         }
2144 
2145         if (((ip->bi_flags & DSW_RESIZED) == DSW_RESIZED) &&
2146             ii_need_same_size(ip)) {
2147                 /* We can't disable the set whilst RDC is using it */
2148                 mutex_exit(&_ii_info_mutex);
2149                 _ii_ioctl_done(ip);
2150                 mutex_exit(&ip->bi_mutex);
2151                 cmn_err(CE_WARN, "!Cannot disable II set: would change "
2152                     "volume size on RDC");
2153                 DTRACE_PROBE(DSW_EOPACKAGE_resize);
2154                 return (DSW_EOPACKAGE);
2155         }
2156 
2157         ip->bi_disabled = 1;
2158         if (NSHADOWS(ip) && (ip->bi_master == ip)) {
2159                 ip->bi_flags &= (~DSW_COPYING);
2160                 ip->bi_state |= DSW_MULTIMST;
2161         }
2162         mutex_exit(&_ii_info_mutex);
2163 
2164         _ii_ioctl_done(ip);
2165         mutex_exit(&ip->bi_mutex);
2166 
2167         _ii_stopvol(ip);
2168 
2169         rtype = SHDR|BMP;
2170         if ((rc = _ii_rsrv_devs(ip, rtype, II_INTERNAL)) != 0) {
2171                 spcs_s_add(*kstatusp, rc);
2172                 DTRACE_PROBE(DSW_ERSRVFAIL);
2173                 return (DSW_ERSRVFAIL);
2174         }
2175 
2176         if ((ii_header < 128) &&
2177             (((ip->bi_flags & DSW_GOLDEN) == 0) ||
2178             (ip->bi_flags & DSW_COPYING))) {
2179                 /*
2180                  * Not a full copy so attempt to prevent use of partial copy
2181                  * by clearing where the first ufs super-block would be
2182                  * located. Solaris often incorporates the disk header into
2183                  * the start of the first slice, so avoid clearing the very
2184                  * first 16 blocks of the volume.
2185                  */
2186 
2187                 if (ii_debug > 1)
2188                         cmn_err(CE_NOTE, "!ii: Shadow copy invalidated");
2189                 II_READ_START(ip, shadow);
2190                 rc = nsc_alloc_buf(SHDFD(ip), ii_header, 128 - ii_header,
2191                     NSC_RDWRBUF, &tmp);
2192                 II_READ_END(ip, shadow, rc, 128 - ii_header);
2193                 if (II_SUCCESS(rc)) {
2194                         rc = nsc_zero(tmp, ii_header, 128 - ii_header, 0);
2195                         if (II_SUCCESS(rc)) {
2196                                 II_NSC_WRITE(ip, shadow, rc, tmp, ii_header,
2197                                     (128 - ii_header), 0);
2198                         }
2199                 }
2200                 if (tmp)
2201                         (void) nsc_free_buf(tmp);
2202                 if (!II_SUCCESS(rc))
2203                         _ii_error(ip, DSW_SHDOFFLINE);
2204         }
2205 
2206         /* this rw_enter forces us to drain all active IO */
2207         rw_enter(&ip->bi_linkrw, RW_WRITER);
2208         rw_exit(&ip->bi_linkrw);
2209 
2210         /* remove ip from _ii_info_top linked list */
2211         mutex_enter(&_ii_info_mutex);
2212         for (xip = &_ii_info_top; *xip; xip = &((*xip)->bi_next)) {
2213                 if (ip == *xip) {
2214                         *xip = ip->bi_next;
2215                         break;
2216                 }
2217         }
2218         if (ip->bi_kstat) {
2219                 kstat_delete(ip->bi_kstat);
2220                 ip->bi_kstat = NULL;
2221         }
2222         mutex_exit(&_ii_info_mutex);
2223 
2224         rc = II_SAVE_BMP(ip, 1);
2225         mutex_enter(&ip->bi_mutex);
2226         if (rc == 0)
2227                 bm_header = _ii_bm_header_get(ip, &tmp);
2228         if (rc == 0 && bm_header) {
2229                 if (ii_debug > 1)
2230                         cmn_err(CE_NOTE, "!ii: Invalid header written");
2231                 bm_header->ii_magic = DSW_INVALID;
2232                 /* write it to disk */
2233                 (void) _ii_bm_header_put(bm_header, ip, tmp);
2234         }
2235         mutex_exit(&ip->bi_mutex);
2236 
2237         op = ip->bi_overflow;
2238         if (op && (reclaim == -1)) {
2239                 reclaim = (op->ii_drefcnt == 1? NO_RECLAIM : RECLAIM);
2240         }
2241 
2242         if ((op != NULL) && (op->ii_hversion >= 1) &&
2243             (op->ii_hmagic == II_OMAGIC)) {
2244                 mutex_enter(&_ii_overflow_mutex);
2245                 if (ip->bi_flags & DSW_OVRHDRDRTY) {
2246                         mutex_enter(&ip->bi_mutex);
2247                         ip->bi_flags &= ~DSW_OVRHDRDRTY;
2248                         mutex_exit(&ip->bi_mutex);
2249                         ASSERT(op->ii_urefcnt > 0);
2250                         op->ii_urefcnt--;
2251                 }
2252                 if (op->ii_urefcnt == 0) {
2253                         op->ii_flags &= ~IIO_CNTR_INVLD;
2254                         op->ii_unused = op->ii_nchunks - 1;
2255                 }
2256                 mutex_exit(&_ii_overflow_mutex);
2257         }
2258         ii_overflow_free(ip, reclaim);
2259         _ii_rlse_devs(ip, rtype);
2260 
2261         ii_sibling_free(ip);
2262 
2263         --iigkstat.num_sets.value.ul;
2264         return (0);
2265 }
2266 
2267 /*
2268  * _ii_disable
2269  *      Deconfigures an II pair
2270  *
2271  * Calling/Exit State:
2272  *      Returns 0 if the pair was disabled. Otherwise an error code
2273  *      is returned and any additional error information is copied
2274  *      out to the user.
2275  *
2276  * Description:
2277  *      Reads the user configuration structure and attempts to
2278  *      deconfigure that pairing based on the master device pathname.
2279  */
2280 
2281 int
2282 _ii_disable(intptr_t arg, int ilp32, int *rvp)
2283 {
2284         dsw_ioctl_t uparms;
2285         dsw_ioctl32_t uparms32;
2286         _ii_overflow_t *op;
2287         int rc, rerr;
2288         spcs_s_info_t kstatus;
2289         uint64_t hash;
2290         int reclaim;
2291         _ii_lsthead_t *oldhead, **head;
2292         _ii_lstinfo_t *np, **xnp, *oldp;
2293 
2294         *rvp = 0;
2295 
2296         if (ilp32) {
2297                 if (copyin((void *)arg, &uparms32, sizeof (uparms32)) < 0)
2298                         return (EFAULT);
2299                 II_TAIL_COPY(uparms, uparms32, shadow_vol, dsw_ioctl_t);
2300                 uparms.status = (spcs_s_info_t)uparms32.status;
2301         } else if (copyin((void *)arg, &uparms, sizeof (uparms)) < 0)
2302                 return (EFAULT);
2303 
2304         kstatus = spcs_s_kcreate();
2305         if (kstatus == NULL)
2306                 return (ENOMEM);
2307 
2308         if (!uparms.shadow_vol[0])
2309                 return (spcs_s_ocopyoutf(&kstatus, uparms.status, DSW_EEMPTY));
2310 
2311         DTRACE_PROBE2(_ii_disable_info, char *, uparms.shadow_vol,
2312             int, uparms.flags);
2313 
2314         /* group or single set? */
2315         if (uparms.flags & CV_IS_GROUP) {
2316                 hash = nsc_strhash(uparms.shadow_vol);
2317                 mutex_enter(&_ii_group_mutex);
2318                 for (head = &_ii_group_top; *head;
2319                     head = &((*head)->lst_next)) {
2320                         if ((hash == (*head)->lst_hash) &&
2321                             strncmp((*head)->lst_name, uparms.shadow_vol,
2322                             DSW_NAMELEN) == 0)
2323                                 break;
2324                 }
2325 
2326                 if (!*head) {
2327                         mutex_exit(&_ii_group_mutex);
2328                         return (spcs_s_ocopyoutf(&kstatus, uparms.status,
2329                             DSW_EGNOTFOUND));
2330                 }
2331 
2332                 /* clear any overflow vol usage counts */
2333                 for (np = (*head)->lst_start; np; np = np->lst_next) {
2334                         if (np->lst_ip->bi_overflow) {
2335                                 np->lst_ip->bi_overflow->ii_detachcnt = 0;
2336                         }
2337                 }
2338 
2339                 /* now increment */
2340                 for (np = (*head)->lst_start; np; np = np->lst_next) {
2341                         if (np->lst_ip->bi_overflow) {
2342                                 ++np->lst_ip->bi_overflow->ii_detachcnt;
2343                         }
2344                 }
2345 
2346                 /* finally, disable all group members */
2347                 rerr = 0;
2348                 xnp = &(*head)->lst_start;
2349                 while (*xnp) {
2350                         op = (*xnp)->lst_ip->bi_overflow;
2351                         if (op) {
2352                                 reclaim = (op->ii_drefcnt == op->ii_detachcnt?
2353                                     NO_RECLAIM : RECLAIM);
2354                                 --op->ii_detachcnt;
2355                         }
2356 
2357                         /* clear out the group pointer */
2358                         (*xnp)->lst_ip->bi_group = NULL;
2359 
2360                         rc = _ii_perform_disable((*xnp)->lst_ip->bi_keyname,
2361                             &kstatus, reclaim);
2362                         if (rc) {
2363                                 /* restore group name */
2364                                 (*xnp)->lst_ip->bi_group = (*head)->lst_name;
2365 
2366                                 /* restore detachcnt */
2367                                 if (op) {
2368                                         ++op->ii_detachcnt;
2369                                 }
2370 
2371                                 /* don't delete branch */
2372                                 ++rerr;
2373                                 spcs_s_add(kstatus, rc);
2374 
2375                                 /* move forward in linked list */
2376                                 xnp = &(*xnp)->lst_next;
2377                         } else {
2378                                 oldp = (*xnp);
2379                                 *xnp = (*xnp)->lst_next;
2380                                 kmem_free(oldp, sizeof (_ii_lstinfo_t));
2381                         }
2382                 }
2383                 if (rerr) {
2384                         mutex_exit(&_ii_group_mutex);
2385                         return (spcs_s_ocopyoutf(&kstatus, uparms.status,
2386                             DSW_EDISABLE));
2387                 }
2388                 /* no errors, all sets disabled, OK to free list head */
2389                 oldhead = *head;
2390                 *head = (*head)->lst_next;
2391                 kmem_free(oldhead, sizeof (_ii_lsthead_t));
2392                 mutex_exit(&_ii_group_mutex);
2393         } else {
2394                 /* only a single set is being disabled */
2395                 rc = _ii_perform_disable(uparms.shadow_vol, &kstatus, -1);
2396                 if (rc)
2397                         return (spcs_s_ocopyoutf(&kstatus, uparms.status, rc));
2398         }
2399 
2400         spcs_s_kfree(kstatus);
2401 
2402         return (0);
2403 }
2404 
2405 
2406 /*
2407  * _ii_stat
2408  *      Get state of the shadow.
2409  *
2410  * Calling/Exit State:
2411  *      Returns 0 on success, otherwise an error code is returned
2412  *      and any additional error information is copied out to the user.
2413  *      The size variable in the dsw_stat_t is set to the FBA size
2414  *      of the volume, the stat variable is set to the state, and
2415  *      the structure is copied out.
2416  */
2417 /*ARGSUSED*/
2418 int
2419 _ii_stat(intptr_t arg, int ilp32, int *rvp)
2420 {
2421         dsw_stat_t ustat;
2422         dsw_stat32_t ustat32;
2423         _ii_info_t *ip;
2424         spcs_s_info_t kstatus;
2425         char *group, *cluster;
2426 
2427         if (ilp32) {
2428                 if (copyin((void *)arg, &ustat32, sizeof (ustat32)) < 0)
2429                         return (EFAULT);
2430                 II_TAIL_COPY(ustat, ustat32, shadow_vol, dsw_stat_t);
2431                 ustat.status = (spcs_s_info_t)ustat32.status;
2432         } else if (copyin((void *)arg, &ustat, sizeof (ustat)) < 0)
2433                 return (EFAULT);
2434 
2435         kstatus = spcs_s_kcreate();
2436         if (kstatus == NULL)
2437                 return (ENOMEM);
2438 
2439         if (!ustat.shadow_vol[0])
2440                 return (spcs_s_ocopyoutf(&kstatus, ustat.status, DSW_EEMPTY));
2441 
2442         mutex_enter(&_ii_info_mutex);
2443         ip = _ii_find_set(ustat.shadow_vol);
2444         mutex_exit(&_ii_info_mutex);
2445         if (ip == NULL)
2446                 return (spcs_s_ocopyoutf(&kstatus, ustat.status,
2447                     DSW_ENOTFOUND));
2448 
2449         ustat.stat = ip->bi_flags;
2450         ustat.size = ip->bi_size;
2451         ustat.mtime = ip->bi_mtime;
2452 
2453         if (ilp32)
2454                 bzero(ustat32.overflow_vol, DSW_NAMELEN);
2455         else
2456                 bzero(ustat.overflow_vol, DSW_NAMELEN);
2457         if (ip->bi_overflow) {
2458                 (void) strncpy(ilp32 ? ustat32.overflow_vol :
2459                     ustat.overflow_vol, ip->bi_overflow->ii_volname,
2460                     DSW_NAMELEN);
2461         }
2462 
2463         ustat.shdsize = ip->bi_shdchks;
2464         if ((ip->bi_flags) & DSW_TREEMAP) {
2465                 ustat.shdused = ip->bi_shdchkused;
2466         } else {
2467                 ustat.shdused = 0;
2468         }
2469 
2470         /* copy over group and cluster associations */
2471         group = ilp32? ustat32.group_name : ustat.group_name;
2472         cluster = ilp32? ustat32.cluster_tag : ustat.cluster_tag;
2473         bzero(group, DSW_NAMELEN);
2474         bzero(cluster, DSW_NAMELEN);
2475         if (ip->bi_group)
2476                 (void) strncpy(group, ip->bi_group, DSW_NAMELEN);
2477         if (ip->bi_cluster)
2478                 (void) strncpy(cluster, ip->bi_cluster, DSW_NAMELEN);
2479 
2480         _ii_ioctl_done(ip);
2481         mutex_exit(&ip->bi_mutex);
2482 
2483         spcs_s_kfree(kstatus);
2484         if (ilp32) {
2485                 ustat32.stat = ustat.stat;
2486                 ustat32.size = ustat.size;
2487                 ustat32.shdsize = ustat.shdsize;
2488                 ustat32.shdused = ustat.shdused;
2489                 ustat32.mtime = ustat.mtime;
2490                 if (copyout(&ustat32, (void *)arg, sizeof (ustat32)))
2491                         return (EFAULT);
2492         } else if (copyout(&ustat, (void *)arg, sizeof (ustat)))
2493                 return (EFAULT);
2494 
2495         return (0);
2496 }
2497 
2498 
2499 /*
2500  * _ii_list
2501  *      List what shadow sets are currently configured.
2502  *
2503  * Calling/Exit State:
2504  *      Returns 0 on success, otherwise an error code is returned
2505  *      and any additional error information is copied out to the user.
2506  */
2507 /*ARGSUSED*/
2508 int
2509 _ii_list(intptr_t arg, int ilp32, int *rvp)
2510 {
2511         dsw_list_t ulist;
2512         dsw_list32_t ulist32;
2513         _ii_info_t *ip;
2514         dsw_config_t cf, *cfp;
2515         dsw_config32_t cf32, *cf32p;
2516         int rc;
2517         int used;
2518         spcs_s_info_t kstatus;
2519 
2520         if (ilp32) {
2521                 if (copyin((void *)arg, &ulist32, sizeof (ulist32)) < 0)
2522                         return (EFAULT);
2523                 II_TAIL_COPY(ulist, ulist32, list_size, dsw_list_t);
2524                 ulist.status = (spcs_s_info_t)ulist32.status;
2525         } else if (copyin((void *)arg, &ulist, sizeof (ulist)) < 0)
2526                 return (EFAULT);
2527 
2528         kstatus = spcs_s_kcreate();
2529         if (kstatus == NULL)
2530                 return (ENOMEM);
2531 
2532         cf32p = (dsw_config32_t *)(unsigned long)ulist32.list;
2533         cfp = ulist.list;
2534         ulist.list_used = 0;
2535         mutex_enter(&_ii_info_mutex);
2536         ip = _ii_info_top;
2537 
2538         DTRACE_PROBE1(_ii_list_count, int, ulist.list_size);
2539 
2540         for (rc = used = 0; used < ulist.list_size && ip; ip = ip->bi_next) {
2541 
2542                 if (ip->bi_disabled)
2543                         continue;
2544 
2545                 mutex_enter(&ip->bi_mutex);
2546                 ip->bi_ioctl++;
2547                 if (ilp32) {
2548                         bzero(&cf32, sizeof (cf32));
2549                         cf32.flag = ip->bi_flags;
2550                         (void) strncpy(cf32.master_vol,
2551                             ii_pathname(ip->bi_mstfd), DSW_NAMELEN);
2552                         (void) strncpy(cf32.shadow_vol,
2553                             ip->bi_keyname, DSW_NAMELEN);
2554                         (void) strncpy(cf32.bitmap_vol, (ip->bi_bmpfd)
2555                             ? ii_pathname(ip->bi_bmpfd)
2556                             : "<offline_bitmap>", DSW_NAMELEN);
2557                         if (copyout(&cf32, (void *)cf32p, sizeof (cf32)))
2558                                 rc = EFAULT;
2559                         cf32p++;
2560                 } else {
2561                         bzero(&cf, sizeof (cf));
2562                         cf.flag = ip->bi_flags;
2563                         (void) strncpy(cf.master_vol,
2564                             ii_pathname(ip->bi_mstfd), DSW_NAMELEN);
2565                         (void) strncpy(cf.shadow_vol,
2566                             ip->bi_keyname, DSW_NAMELEN);
2567                         (void) strncpy(cf.bitmap_vol, (ip->bi_bmpfd)
2568                             ? ii_pathname(ip->bi_bmpfd)
2569                             : "<offline_bitmap>", DSW_NAMELEN);
2570                         if (copyout(&cf, (void *)cfp, sizeof (cf)))
2571                                 rc = EFAULT;
2572                         cfp++;
2573                 }
2574                 _ii_ioctl_done(ip);
2575                 mutex_exit(&ip->bi_mutex);
2576                 used++;
2577         }
2578         mutex_exit(&_ii_info_mutex);
2579 
2580         spcs_s_kfree(kstatus);
2581         if (rc)
2582                 return (rc);
2583 
2584         ulist.list_used = used;
2585         if (ilp32) {
2586                 ulist32.list_used = ulist.list_used;
2587                 if (copyout(&ulist32, (void *)arg, sizeof (ulist32)))
2588                         return (EFAULT);
2589         } else if (copyout(&ulist, (void *)arg, sizeof (ulist)))
2590                 return (EFAULT);
2591 
2592         return (0);
2593 }
2594 
2595 /*
2596  * _ii_listlen
2597  *      Counts the number of items the DSWIOC_LIST and DSWIOC_OLIST
2598  *      ioctl calls would return.
2599  *
2600  * Calling/Exit State:
2601  *      Returns 0 on success, otherwise an error code is returned.
2602  *      Result is returned as successful ioctl value.
2603  */
2604 /*ARGSUSED*/
2605 int
2606 _ii_listlen(int cmd, int ilp32, int *rvp)
2607 {
2608         _ii_info_t *ip;
2609         _ii_overflow_t *op;
2610         int count = 0;
2611 
2612         switch (cmd) {
2613 
2614         case DSWIOC_LISTLEN:
2615                 mutex_enter(&_ii_info_mutex);
2616                 for (ip = _ii_info_top; ip; ip = ip->bi_next) {
2617                         if (ip->bi_disabled == 0) {
2618                                 count++;
2619                         }
2620                 }
2621                 mutex_exit(&_ii_info_mutex);
2622                 break;
2623         case DSWIOC_OLISTLEN:
2624                 mutex_enter(&_ii_overflow_mutex);
2625                 for (op = _ii_overflow_top; op; op = op->ii_next)
2626                         count++;
2627                 mutex_exit(&_ii_overflow_mutex);
2628                 break;
2629         default:
2630                 return (EINVAL);
2631         }
2632         *rvp = count;
2633 
2634         return (0);
2635 }
2636 
2637 /*
2638  * _ii_report_bmp
2639  *
2640  *      Report to the user daemon that the bitmap has gone bad
2641  */
2642 static int
2643 _ii_report_bmp(_ii_info_t *ip)
2644 {
2645         int rc;
2646         struct nskernd *nsk;
2647 
2648         nsk = kmem_zalloc(sizeof (*nsk), KM_SLEEP);
2649         if (!nsk) {
2650                 return (ENOMEM);
2651         }
2652         nsk->command = NSKERND_IIBITMAP;
2653         nsk->data1 = (int64_t)(ip->bi_flags | DSW_BMPOFFLINE);
2654         (void) strncpy(nsk->char1, ip->bi_keyname,
2655             min(DSW_NAMELEN, NSC_MAXPATH));
2656 
2657         rc = nskernd_get(nsk);
2658         if (rc == 0) {
2659                 rc = (int)nsk->data1;
2660         }
2661         if (rc == 0) {
2662                 DTRACE_PROBE(_ii_report_bmp_end);
2663         } else {
2664                 DTRACE_PROBE1(_ii_report_bmp_end_2, int, rc);
2665         }
2666         kmem_free(nsk, sizeof (*nsk));
2667         return (rc);
2668 }
2669 
2670 /*
2671  * _ii_offline
2672  *      Set volume offline flag(s) for a shadow.
2673  *
2674  * Calling/Exit State:
2675  *      Returns 0 on success, otherwise an error code is returned
2676  *      and any additional error information is copied out to the user.
2677  */
2678 /*ARGSUSED*/
2679 int
2680 _ii_offline(intptr_t arg, int ilp32, int *rvp)
2681 {
2682         dsw_ioctl_t uparms;
2683         dsw_ioctl32_t uparms32;
2684         _ii_info_t *ip;
2685         int rc;
2686         spcs_s_info_t kstatus;
2687 
2688         if (ilp32) {
2689                 if (copyin((void *)arg, &uparms32, sizeof (uparms32)) < 0)
2690                         return (EFAULT);
2691                 II_TAIL_COPY(uparms, uparms32, shadow_vol, dsw_ioctl_t);
2692                 uparms.status = (spcs_s_info_t)uparms32.status;
2693         } else if (copyin((void *)arg, &uparms, sizeof (uparms)) < 0)
2694                 return (EFAULT);
2695 
2696         kstatus = spcs_s_kcreate();
2697         if (kstatus == NULL)
2698                 return (ENOMEM);
2699 
2700         if (!uparms.shadow_vol[0])
2701                 return (spcs_s_ocopyoutf(&kstatus, uparms.status, DSW_EEMPTY));
2702 
2703         mutex_enter(&_ii_info_mutex);
2704         ip = _ii_find_set(uparms.shadow_vol);
2705         mutex_exit(&_ii_info_mutex);
2706         if (ip == NULL)
2707                 return (spcs_s_ocopyoutf(&kstatus, uparms.status,
2708                     DSW_ENOTFOUND));
2709 
2710         if ((rc = _ii_rsrv_devs(ip, BMP, II_INTERNAL)) != 0) {
2711                 _ii_ioctl_done(ip);
2712                 mutex_exit(&ip->bi_mutex);
2713                 spcs_s_add(kstatus, rc);
2714                 return (spcs_s_ocopyoutf(&kstatus, uparms.status,
2715                     DSW_ERSRVFAIL));
2716         }
2717 
2718         mutex_exit(&ip->bi_mutex);
2719         _ii_error(ip, uparms.flags & DSW_OFFLINE);
2720         mutex_enter(&ip->bi_mutex);
2721         _ii_ioctl_done(ip);
2722         mutex_exit(&ip->bi_mutex);
2723 
2724         _ii_rlse_devs(ip, BMP);
2725 
2726         spcs_s_kfree(kstatus);
2727 
2728         return (0);
2729 }
2730 
2731 
2732 /*
2733  * _ii_wait
2734  *      Wait for a copy to complete.
2735  *
2736  * Calling/Exit State:
2737  *      Returns 0 if the copy completed, otherwise error code.
2738  *
2739  */
2740 /*ARGSUSED*/
2741 int
2742 _ii_wait(intptr_t arg, int ilp32, int *rvp)
2743 {
2744         dsw_ioctl_t uparms;
2745         dsw_ioctl32_t uparms32;
2746         _ii_info_t *ip;
2747         int rc = 0;
2748         spcs_s_info_t kstatus;
2749 
2750         if (ilp32) {
2751                 if (copyin((void *)arg, &uparms32, sizeof (uparms32)) < 0)
2752                         return (EFAULT);
2753                 II_TAIL_COPY(uparms, uparms32, shadow_vol, dsw_ioctl_t);
2754                 uparms.status = (spcs_s_info_t)uparms32.status;
2755                 uparms.pid = uparms32.pid;
2756         } else if (copyin((void *)arg, &uparms, sizeof (uparms)) < 0)
2757                 return (EFAULT);
2758 
2759         kstatus = spcs_s_kcreate();
2760         if (kstatus == NULL)
2761                 return (ENOMEM);
2762 
2763         if (!uparms.shadow_vol[0])
2764                 return (spcs_s_ocopyoutf(&kstatus, uparms.status, DSW_EEMPTY));
2765 
2766         mutex_enter(&_ii_info_mutex);
2767         ip = _ii_find_set(uparms.shadow_vol);
2768         mutex_exit(&_ii_info_mutex);
2769         if (ip == NULL)
2770                 return (spcs_s_ocopyoutf(&kstatus, uparms.status,
2771                     DSW_ENOTFOUND));
2772 
2773         while (ip->bi_flags & DSW_COPYINGP) {
2774                 if (cv_wait_sig(&ip->bi_copydonecv, &ip->bi_mutex) == 0) {
2775                         /* Awoken by a signal */
2776                         rc = EINTR;
2777                         break;
2778                 }
2779         }
2780 
2781         /* Is this an attempt to unlock the copy/update PID? */
2782         if (uparms.flags & CV_LOCK_PID) {
2783                 if (ip->bi_locked_pid == 0) {
2784                         rc = DSW_ENOTLOCKED;
2785                 } else if (uparms.pid == -1) {
2786                         cmn_err(CE_WARN, "!ii: Copy/Update PID %d, cleared",
2787                             ip->bi_locked_pid);
2788                         ip->bi_locked_pid = 0;
2789                 } else if (uparms.pid != ip->bi_locked_pid) {
2790                         rc = DSW_EINUSE;
2791                 } else {
2792                         ip->bi_locked_pid = 0;
2793                 }
2794         }
2795 
2796         _ii_ioctl_done(ip);
2797         mutex_exit(&ip->bi_mutex);
2798 
2799         return (spcs_s_ocopyoutf(&kstatus, uparms.status, rc));
2800 }
2801 
2802 
2803 static int
2804 _ii_reset_mstvol(_ii_info_t *ip)
2805 {
2806         _ii_info_t *xip;
2807 
2808         if (!NSHADOWS(ip))
2809                 return (DSW_COPYINGS | DSW_COPYINGP);
2810 
2811         /* check for siblings updating master */
2812         for (xip = ip->bi_head; xip; xip = xip->bi_sibling) {
2813                 if (xip == ip)
2814                         continue;
2815                 /* check if master is okay */
2816                 if ((xip->bi_flags & DSW_MSTOFFLINE) == 0) {
2817                         return (0);
2818                 }
2819         }
2820 
2821         return (DSW_COPYINGS | DSW_COPYINGP);
2822 }
2823 
2824 /*
2825  * _ii_reset
2826  *      Reset offlined underlying volumes
2827  *
2828  * Calling/Exit State:
2829  *      Returns 0 on success, otherwise an error code is returned
2830  *      and any additional error information is copied out to the user.
2831  */
2832 /*ARGSUSED*/
2833 int
2834 _ii_reset(intptr_t arg, int ilp32, int *rvp)
2835 {
2836         dsw_ioctl_t uparms;
2837         dsw_ioctl32_t uparms32;
2838         _ii_info_t *ip;
2839         nsc_buf_t *tmp = NULL;
2840         int rc;
2841         int flags;
2842         ii_header_t *bm_header;
2843         spcs_s_info_t kstatus;
2844         int rtype;
2845 
2846         if (ilp32) {
2847                 if (copyin((void *)arg, &uparms32, sizeof (uparms32)) < 0)
2848                         return (EFAULT);
2849                 II_TAIL_COPY(uparms, uparms32, shadow_vol, dsw_ioctl_t);
2850                 uparms.status = (spcs_s_info_t)uparms32.status;
2851         } else if (copyin((void *)arg, &uparms, sizeof (uparms)) < 0)
2852                 return (EFAULT);
2853 
2854         kstatus = spcs_s_kcreate();
2855         if (kstatus == NULL)
2856                 return (ENOMEM);
2857 
2858         if (!uparms.shadow_vol[0])
2859                 return (spcs_s_ocopyoutf(&kstatus, uparms.status, DSW_EEMPTY));
2860 
2861         mutex_enter(&_ii_info_mutex);
2862         ip = _ii_find_set(uparms.shadow_vol);
2863         mutex_exit(&_ii_info_mutex);
2864         if (ip == NULL)
2865                 return (spcs_s_ocopyoutf(&kstatus, uparms.status,
2866                     DSW_ENOTFOUND));
2867 
2868         mutex_exit(&ip->bi_mutex);
2869 
2870         /* Figure out what to do according to what was flagged as  */
2871 
2872         if ((ip->bi_flags & DSW_OFFLINE) == 0) {
2873                 /* Nothing offline, so no op */
2874                 mutex_enter(&ip->bi_mutex);
2875                 _ii_ioctl_done(ip);
2876                 mutex_exit(&ip->bi_mutex);
2877                 spcs_s_kfree(kstatus);
2878                 return (0);
2879         }
2880 
2881         if (!ip->bi_bmpfd) {
2882                 /* No bitmap fd, can't do anything */
2883                 mutex_enter(&ip->bi_mutex);
2884                 _ii_ioctl_done(ip);
2885                 mutex_exit(&ip->bi_mutex);
2886                 spcs_s_kfree(kstatus);
2887                 return (DSW_EHDRBMP);
2888         }
2889 
2890         rtype = MSTR|SHDR|BMP;
2891         if ((rc = _ii_rsrv_devs(ip, rtype, II_INTERNAL)) != 0) {
2892                 mutex_enter(&ip->bi_mutex);
2893                 _ii_ioctl_done(ip);
2894                 mutex_exit(&ip->bi_mutex);
2895                 spcs_s_add(kstatus, rc);
2896                 return (spcs_s_ocopyoutf(&kstatus, uparms.status,
2897                     DSW_ERSRVFAIL));
2898         }
2899 
2900         /*
2901          * Cannot use _ii_bm_header_get as it will fail if DSW_BMPOFFLINE
2902          */
2903         II_READ_START(ip, bitmap);
2904         rc = nsc_alloc_buf(ip->bi_bmpfd, 0, FBA_LEN(sizeof (ii_header_t)),
2905             NSC_RDWRBUF, &tmp);
2906         II_READ_END(ip, bitmap, rc, FBA_LEN(sizeof (ii_header_t)));
2907         if (!II_SUCCESS(rc)) {
2908                 _ii_rlse_devs(ip, rtype);
2909                 mutex_enter(&ip->bi_mutex);
2910                 _ii_ioctl_done(ip);
2911                 mutex_exit(&ip->bi_mutex);
2912                 if (tmp)
2913                         (void) nsc_free_buf(tmp);
2914                 _ii_error(ip, DSW_BMPOFFLINE);
2915                 spcs_s_add(kstatus, rc);
2916                 return (spcs_s_ocopyoutf(&kstatus, uparms.status, DSW_EHDRBMP));
2917         }
2918 
2919         bm_header = (ii_header_t *)(tmp)->sb_vec[0].sv_addr;
2920         if (bm_header == NULL) {
2921                 _ii_rlse_devs(ip, rtype);
2922                 mutex_enter(&ip->bi_mutex);
2923                 _ii_ioctl_done(ip);
2924                 mutex_exit(&ip->bi_mutex);
2925                 if (tmp)
2926                         (void) nsc_free_buf(tmp);
2927                 return (spcs_s_ocopyoutf(&kstatus, uparms.status, DSW_EHDRBMP));
2928         }
2929 
2930         flags = ip->bi_flags & ~DSW_COPY_FLAGS;
2931         if ((flags & (DSW_SHDIMPORT|DSW_SHDEXPORT)) == 0) {
2932                 if (((flags & DSW_SHDOFFLINE) == 0) &&
2933                     ((flags & DSW_MSTOFFLINE) == DSW_MSTOFFLINE)) {
2934                         /* Shadow was OK but master was offline */
2935                         flags |= _ii_reset_mstvol(ip);
2936                 } else if ((flags & DSW_SHDOFFLINE) == DSW_SHDOFFLINE) {
2937                         /* Shadow was offline, don't care what the master was */
2938                         flags |= (DSW_COPYINGM | DSW_COPYINGP);
2939                 }
2940         }
2941         if (ip->bi_flags & DSW_VOVERFLOW) {
2942                 ip->bi_flags &= ~DSW_VOVERFLOW;
2943                 ip->bi_flags |= DSW_FRECLAIM;
2944         }
2945         flags &= ~(DSW_OFFLINE | DSW_CFGOFFLINE | DSW_VOVERFLOW | DSW_OVERFLOW);
2946         if ((ip->bi_flags & DSW_BMPOFFLINE) == DSW_BMPOFFLINE) {
2947                 /* free any overflow allocation */
2948                 ii_overflow_free(ip, INIT_OVR);
2949                 /* Bitmap now OK, so set up new bitmap header */
2950                 (void) strncpy(bm_header->master_vol, ii_pathname(ip->bi_mstfd),
2951                     DSW_NAMELEN);
2952                 (void) strncpy(bm_header->shadow_vol, ii_pathname(ip->bi_shdfd),
2953                     DSW_NAMELEN);
2954                 (void) strncpy(bm_header->bitmap_vol, ii_pathname(ip->bi_bmpfd),
2955                     DSW_NAMELEN);
2956                 if (ip->bi_cluster) {
2957                         (void) strncpy(bm_header->clstr_name, ip->bi_cluster,
2958                             DSW_NAMELEN);
2959                 }
2960                 if (ip->bi_group) {
2961                         (void) strncpy(bm_header->group_name, ip->bi_group,
2962                             DSW_NAMELEN);
2963                 }
2964                 bm_header->ii_type = (flags & DSW_GOLDEN) ?
2965                     DSW_GOLDEN_TYPE : DSW_QUICK_TYPE;
2966                 bm_header->ii_magic = DSW_DIRTY;
2967                 bm_header->ii_version = II_HEADER_VERSION;
2968                 bm_header->ii_shdfba = DSW_SHD_BM_OFFSET;
2969                 bm_header->ii_copyfba = DSW_COPY_BM_OFFSET;
2970                 bm_header->ii_throttle_delay = ip->bi_throttle_delay;
2971                 bm_header->ii_throttle_unit = ip->bi_throttle_unit;
2972                 ip->bi_shdfba = bm_header->ii_shdfba;
2973                 ip->bi_copyfba = bm_header->ii_copyfba;
2974         } else if ((ip->bi_flags & DSW_SHDOFFLINE) == DSW_SHDOFFLINE) {
2975                 /* bitmap didn't go offline, but shadow did */
2976                 if (ip->bi_overflow) {
2977                         ii_overflow_free(ip, RECLAIM);
2978                 }
2979         }
2980         _ii_lock_chunk(ip, II_NULLCHUNK);
2981         mutex_enter(&ip->bi_mutex);
2982         II_FLAG_ASSIGN(flags, ip);
2983 
2984         mutex_exit(&ip->bi_mutex);
2985         rc = ii_fill_copy_bmp(ip);
2986         if (rc == 0)
2987                 rc = II_ZEROBM(ip);
2988         if (rc == 0) {
2989                 if ((ip->bi_flags&(DSW_GOLDEN)) == 0) {
2990                         /* just clear bitmaps for dependent copy */
2991                         if (ip->bi_flags & DSW_TREEMAP) {
2992                                 bm_header->ii_state = ip->bi_flags;
2993                                 mutex_enter(&ip->bi_mutex);
2994                                 rc = _ii_bm_header_put(bm_header, ip, tmp);
2995                                 mutex_exit(&ip->bi_mutex);
2996                                 tmp = NULL;
2997                                 if (rc == 0) {
2998                                         rc = ii_tinit(ip);
2999                                         if (rc == 0) {
3000                                                 mutex_enter(&ip->bi_mutex);
3001                                                 bm_header =
3002                                                     _ii_bm_header_get(ip, &tmp);
3003                                                 mutex_exit(&ip->bi_mutex);
3004                                         }
3005                                 }
3006                         }
3007 
3008                         if (rc == 0)
3009                                 II_FLAG_CLRX(DSW_COPY_FLAGS, ip);
3010                         /*
3011                          * if copy flags were set, another process may be
3012                          * waiting
3013                          */
3014                         if (rc == 0 && (flags & DSW_COPYINGP))
3015                                 cv_broadcast(&ip->bi_copydonecv);
3016 
3017                         if (rc == 0)
3018                                 rc = II_COPYBM(ip);
3019                 }
3020         }
3021         _ii_unlock_chunk(ip, II_NULLCHUNK);
3022         if (rc) {
3023                 if (tmp)
3024                         _ii_bm_header_free(bm_header, ip, tmp);
3025                 mutex_enter(&ip->bi_mutex);
3026                 _ii_ioctl_done(ip);
3027                 mutex_exit(&ip->bi_mutex);
3028                 _ii_rlse_devs(ip, rtype);
3029                 spcs_s_add(kstatus, rc);
3030                 return (spcs_s_ocopyoutf(&kstatus, uparms.status, DSW_EHDRBMP));
3031         }
3032         bm_header->ii_state = ip->bi_flags;
3033         mutex_enter(&ip->bi_mutex);
3034         rc = _ii_bm_header_put(bm_header, ip, tmp);
3035         if (!II_SUCCESS(rc)) {
3036                 _ii_ioctl_done(ip);
3037                 mutex_exit(&ip->bi_mutex);
3038                 _ii_rlse_devs(ip, rtype);
3039                 spcs_s_add(kstatus, rc);
3040                 return (spcs_s_ocopyoutf(&kstatus, uparms.status, DSW_EHDRBMP));
3041         }
3042 
3043         /* check with RDC */
3044         if (ii_update_denied(ip, kstatus, (ip->bi_flags & DSW_COPYINGS) ?
3045             CV_SHD2MST : 0, 1)) {
3046                 _ii_ioctl_done(ip);
3047                 mutex_exit(&ip->bi_mutex);
3048                 _ii_rlse_devs(ip, rtype);
3049                 return (spcs_s_ocopyoutf(&kstatus, uparms.status, rc));
3050         }
3051 
3052         /* don't perform copy for dependent shadows */
3053         if ((ip->bi_flags&(DSW_GOLDEN)) == 0) {
3054                 _ii_ioctl_done(ip);
3055                 mutex_exit(&ip->bi_mutex);
3056                 _ii_rlse_devs(ip, rtype);
3057                 return (spcs_s_ocopyoutf(&kstatus, uparms.status, rc));
3058         }
3059 
3060         mutex_exit(&ip->bi_mutex);
3061         /* _ii_copyvol calls _ii_ioctl_done() */
3062         if (ip->bi_flags & DSW_COPYINGS)
3063                 rc = _ii_copyvol(ip, CV_SHD2MST, rtype, kstatus, 1);
3064         else if (ip->bi_flags & DSW_COPYINGM)
3065                 rc = _ii_copyvol(ip, 0, rtype, kstatus, 1);
3066         else {
3067                 mutex_enter(&ip->bi_mutex);
3068                 _ii_ioctl_done(ip);
3069                 mutex_exit(&ip->bi_mutex);
3070         }
3071 
3072         _ii_rlse_devs(ip, rtype);
3073 
3074         return (spcs_s_ocopyoutf(&kstatus, uparms.status, rc));
3075 }
3076 
3077 
3078 /*
3079  * _ii_version
3080  *      Get version of the InstantImage module.
3081  *
3082  * Calling/Exit State:
3083  *      Returns 0 on success, otherwise EFAULT is returned.
3084  *      The major and minor revisions are copied out to the user if
3085  *      successful.
3086  */
3087 /*ARGSUSED*/
3088 int
3089 _ii_version(intptr_t arg, int ilp32, int *rvp)
3090 {
3091         dsw_version_t uversion;
3092         dsw_version32_t uversion32;
3093 
3094         if (ilp32) {
3095                 if (copyin((void *)arg, &uversion32, sizeof (uversion32)) < 0)
3096                         return (EFAULT);
3097 
3098                 uversion32.major = dsw_major_rev;
3099                 uversion32.minor = dsw_minor_rev;
3100                 uversion32.micro = dsw_micro_rev;
3101                 uversion32.baseline = dsw_baseline_rev;
3102 
3103                 if (copyout(&uversion32, (void *)arg, sizeof (uversion32)))
3104                         return (EFAULT);
3105         } else {
3106                 if (copyin((void *)arg, &uversion, sizeof (uversion)) < 0)
3107                         return (EFAULT);
3108 
3109                 uversion.major = dsw_major_rev;
3110                 uversion.minor = dsw_minor_rev;
3111                 uversion.micro = dsw_micro_rev;
3112                 uversion.baseline = dsw_baseline_rev;
3113 
3114                 if (copyout(&uversion, (void *)arg, sizeof (uversion)))
3115                         return (EFAULT);
3116         }
3117 
3118         return (0);
3119 }
3120 
3121 /*
3122  * _ii_copyparm
3123  *      Get and set copy parameters.
3124  *
3125  * Calling/Exit State:
3126  *      Returns 0 on success, otherwise EFAULT is returned.
3127  *      The previous values are returned to the user.
3128  */
3129 /*ARGSUSED*/
3130 int
3131 _ii_copyparm(intptr_t arg, int ilp32, int *rvp)
3132 {
3133         dsw_copyp_t copyp;
3134         dsw_copyp32_t copyp32;
3135         spcs_s_info_t kstatus;
3136         _ii_info_t *ip;
3137         int rc = 0;
3138         int tmp;
3139 
3140         if (ilp32) {
3141                 if (copyin((void *)arg, &copyp32, sizeof (copyp32)) < 0)
3142                         return (EFAULT);
3143                 II_TAIL_COPY(copyp, copyp32, shadow_vol, dsw_copyp_t);
3144                 copyp.status = (spcs_s_info_t)copyp32.status;
3145         } else if (copyin((void *)arg, &copyp, sizeof (copyp)) < 0)
3146                         return (EFAULT);
3147 
3148         kstatus = spcs_s_kcreate();
3149         if (kstatus == NULL)
3150                 return (ENOMEM);
3151 
3152         if (!copyp.shadow_vol[0])
3153                 return (spcs_s_ocopyoutf(&kstatus, copyp.status, DSW_EEMPTY));
3154 
3155         mutex_enter(&_ii_info_mutex);
3156         ip = _ii_find_set(copyp.shadow_vol);
3157         mutex_exit(&_ii_info_mutex);
3158         if (ip == NULL)
3159                 return (spcs_s_ocopyoutf(&kstatus, copyp.status,
3160                     DSW_ENOTFOUND));
3161 
3162         tmp = ip->bi_throttle_delay;
3163         if (copyp.copy_delay != -1) {
3164                 if (copyp.copy_delay >= MIN_THROTTLE_DELAY &&
3165                     copyp.copy_delay <= MAX_THROTTLE_DELAY)
3166                         ip->bi_throttle_delay = copyp.copy_delay;
3167                 else {
3168                         cmn_err(CE_WARN, "!ii: delay out of range %d",
3169                             copyp.copy_delay);
3170                         rc = EINVAL;
3171                 }
3172         }
3173         copyp.copy_delay = tmp;
3174 
3175         tmp = ip->bi_throttle_unit;
3176         if (copyp.copy_unit != -1) {
3177                 if (copyp.copy_unit >= MIN_THROTTLE_UNIT &&
3178                     copyp.copy_unit <= MAX_THROTTLE_UNIT) {
3179                         if (rc != EINVAL)
3180                                 ip->bi_throttle_unit = copyp.copy_unit;
3181                 } else {
3182                         cmn_err(CE_WARN, "!ii: unit out of range %d",
3183                             copyp.copy_unit);
3184                         if (rc != EINVAL) {
3185                                 rc = EINVAL;
3186                                 ip->bi_throttle_delay = copyp.copy_delay;
3187                         }
3188                 }
3189         }
3190         copyp.copy_unit = tmp;
3191 
3192         _ii_ioctl_done(ip);
3193         mutex_exit(&ip->bi_mutex);
3194 
3195         if (ilp32) {
3196                 copyp32.copy_delay = copyp.copy_delay;
3197                 copyp32.copy_unit = copyp.copy_unit;
3198                 if (copyout(&copyp32, (void *)arg, sizeof (copyp32)) < 0)
3199                         return (EFAULT);
3200         } else if (copyout(&copyp, (void *)arg, sizeof (copyp)))
3201                         return (EFAULT);
3202 
3203         return (spcs_s_ocopyoutf(&kstatus, copyp.status, rc));
3204 }
3205 
3206 
3207 /*
3208  * _ii_suspend_vol
3209  *      suspend an individual InstantImage group
3210  *
3211  * Calling/Exit State:
3212  *      Returns 0 on success, nonzero otherwise
3213  */
3214 
3215 int
3216 _ii_suspend_vol(_ii_info_t *ip)
3217 {
3218         _ii_info_t **xip;
3219         int copy_flag;
3220         int rc;
3221         nsc_buf_t *tmp = NULL;
3222         ii_header_t *bm_header;
3223 
3224         copy_flag = ip->bi_flags & DSW_COPY_FLAGS;
3225 
3226         _ii_stopvol(ip);
3227         ASSERT(total_ref(ip) == 0);
3228 
3229         if ((rc = _ii_rsrv_devs(ip, BMP, II_INTERNAL)) != 0)
3230                 return (rc);
3231 
3232         /* this rw_enter forces us to drain all active IO */
3233         rw_enter(&ip->bi_linkrw, RW_WRITER);
3234         rw_exit(&ip->bi_linkrw);
3235 
3236         mutex_enter(&_ii_info_mutex);
3237         for (xip = &_ii_info_top; *xip; xip = &(*xip)->bi_next) {
3238                 if (ip == *xip)
3239                         break;
3240         }
3241         *xip = ip->bi_next;
3242         mutex_exit(&_ii_info_mutex);
3243 
3244         rc = II_SAVE_BMP(ip, 1);
3245         mutex_enter(&ip->bi_mutex);
3246         if (rc == 0)
3247                 bm_header = _ii_bm_header_get(ip, &tmp);
3248         if (rc == 0 && bm_header) {
3249                 bm_header->ii_magic = DSW_CLEAN;
3250                 bm_header->ii_state |= copy_flag;
3251                 bm_header->ii_throttle_delay = ip->bi_throttle_delay;
3252                 bm_header->ii_throttle_unit = ip->bi_throttle_unit;
3253                 /* copy over the mtime */
3254                 bm_header->ii_mtime = ip->bi_mtime;
3255                 /* write it to disk */
3256                 rc = _ii_bm_header_put(bm_header, ip, tmp);
3257         }
3258         --iigkstat.num_sets.value.ul;
3259         mutex_exit(&ip->bi_mutex);
3260 
3261         ii_overflow_free(ip, NO_RECLAIM);
3262         _ii_rlse_devs(ip, BMP);
3263 
3264         ii_sibling_free(ip);
3265 
3266         return (rc);
3267 }
3268 
3269 /*
3270  * _ii_suspend_cluster
3271  *      Cluster resource group is switching over to another node, so
3272  *      all shadowed volumes in that group are suspended.
3273  *
3274  * Returns 0 on success, or ESRCH if the name of the cluster resource
3275  * group couldn't be found.
3276  */
3277 int
3278 _ii_suspend_cluster(char *shadow_vol)
3279 {
3280         int found, last;
3281         uint64_t hash;
3282         _ii_info_t *ip;
3283         _ii_lsthead_t **cp, *xcp;
3284         _ii_lstinfo_t **np, *xnp;
3285 
3286         /* find appropriate cluster list */
3287         mutex_enter(&_ii_cluster_mutex);
3288         hash = nsc_strhash(shadow_vol);
3289         for (cp = &_ii_cluster_top; *cp; cp = &((*cp)->lst_next)) {
3290                 if ((hash == (*cp)->lst_hash) && strncmp(shadow_vol,
3291                     (*cp)->lst_name, DSW_NAMELEN) == 0)
3292                         break;
3293         }
3294 
3295         if (!*cp) {
3296                 mutex_exit(&_ii_cluster_mutex);
3297                 return (DSW_ECNOTFOUND);
3298         }
3299 
3300         found = 1;
3301         last = 0;
3302         while (found && !last) {
3303                 found = 0;
3304 
3305                 mutex_enter(&_ii_info_mutex);
3306                 for (np = &(*cp)->lst_start; *np; np = &((*np)->lst_next)) {
3307                         ip = (*np)->lst_ip;
3308 
3309                         if (ip->bi_disabled)
3310                                 continue;
3311 
3312                         found++;
3313 
3314                         ip->bi_disabled = 1;
3315                         if (NSHADOWS(ip) && (ip->bi_master == ip)) {
3316                                 ip->bi_flags &= (~DSW_COPYING);
3317                                 ip->bi_state |= DSW_MULTIMST;
3318                         }
3319                         mutex_exit(&_ii_info_mutex);
3320 
3321                         xnp = *np;
3322                         *np = (*np)->lst_next;
3323                         kmem_free(xnp, sizeof (_ii_lstinfo_t));
3324                         ip->bi_cluster = NULL;
3325 
3326                         (void) _ii_suspend_vol(ip);
3327                         break;
3328                 }
3329                 if (found == 0)
3330                         mutex_exit(&_ii_info_mutex);
3331                 else if (!(*cp)->lst_start) {
3332                         xcp = *cp;
3333                         *cp = (*cp)->lst_next;
3334                         kmem_free(xcp, sizeof (_ii_lsthead_t));
3335                         last = 1;
3336                 }
3337         }
3338         mutex_exit(&_ii_cluster_mutex);
3339 
3340         return (0);
3341 }
3342 
3343 /*
3344  * _ii_shutdown
3345  *      System is shutting down, so all shadowed volumes are suspended.
3346  *
3347  *      This always succeeds, so always returns 0.
3348  */
3349 
3350 /* ARGSUSED */
3351 
3352 int
3353 _ii_shutdown(intptr_t arg, int *rvp)
3354 {
3355         _ii_info_t **xip, *ip;
3356         int found;
3357 
3358         *rvp = 0;
3359 
3360         _ii_shutting_down = 1;
3361 
3362         /* Go through the list until only disabled entries are found */
3363 
3364         found = 1;
3365         while (found) {
3366                 found = 0;
3367 
3368                 mutex_enter(&_ii_info_mutex);
3369                 for (xip = &_ii_info_top; *xip; xip = &(*xip)->bi_next) {
3370                         ip = *xip;
3371                         if (ip->bi_disabled) {
3372                                 /* Also covers not fully configured yet */
3373                                 continue;
3374                         }
3375                         found++;
3376 
3377                         ip->bi_disabled = 1;
3378                         mutex_exit(&_ii_info_mutex);
3379 
3380                         (void) _ii_suspend_vol(ip);
3381 
3382                         break;
3383                 }
3384                 if (found == 0)
3385                         mutex_exit(&_ii_info_mutex);
3386         }
3387 
3388         _ii_shutting_down = 0;
3389 
3390         return (0);
3391 }
3392 
3393 /*
3394  * _ii_suspend
3395  *      Suspend an InstantImage, saving its state to allow a subsequent resume.
3396  *
3397  * Calling/Exit State:
3398  *      Returns 0 if the pair was suspended. Otherwise an error code
3399  *      is returned and any additional error information is copied
3400  *      out to the user.
3401  */
3402 
3403 /* ARGSUSED */
3404 
3405 int
3406 _ii_suspend(intptr_t arg, int ilp32, int *rvp)
3407 {
3408         dsw_ioctl_t uparms;
3409         dsw_ioctl32_t uparms32;
3410         _ii_info_t *ip;
3411         int rc;
3412         spcs_s_info_t kstatus;
3413 
3414         *rvp = 0;
3415 
3416         if (ilp32) {
3417                 if (copyin((void *)arg, &uparms32, sizeof (uparms32)) < 0)
3418                         return (EFAULT);
3419                 II_TAIL_COPY(uparms, uparms32, shadow_vol, dsw_ioctl_t);
3420                 uparms.status = (spcs_s_info_t)uparms32.status;
3421         } else if (copyin((void *)arg, &uparms, sizeof (uparms)) < 0)
3422                 return (EFAULT);
3423 
3424         kstatus = spcs_s_kcreate();
3425         if (kstatus == NULL)
3426                 return (ENOMEM);
3427 
3428         if (!uparms.shadow_vol[0])
3429                 return (spcs_s_ocopyoutf(&kstatus, uparms.status, DSW_EEMPTY));
3430 
3431         if ((uparms.flags & CV_IS_CLUSTER) != 0) {
3432                 rc = _ii_suspend_cluster(uparms.shadow_vol);
3433         } else {
3434                 mutex_enter(&_ii_info_mutex);
3435                 ip = _ii_find_set(uparms.shadow_vol);
3436                 if (ip == NULL) {
3437                         mutex_exit(&_ii_info_mutex);
3438                         return (spcs_s_ocopyoutf(&kstatus, uparms.status,
3439                             DSW_ENOTFOUND));
3440                 }
3441 
3442                 ip->bi_disabled = 1;
3443                 if (NSHADOWS(ip) && (ip->bi_master == ip)) {
3444                         ip->bi_flags &= (~DSW_COPYING);
3445                         ip->bi_state |= DSW_MULTIMST;
3446                 }
3447                 mutex_exit(&_ii_info_mutex);
3448 
3449                 _ii_ioctl_done(ip);
3450                 mutex_exit(&ip->bi_mutex);
3451 
3452                 rc = _ii_suspend_vol(ip);
3453         }
3454 
3455         return (spcs_s_ocopyoutf(&kstatus, uparms.status, rc));
3456 }
3457 
3458 
3459 /*
3460  * _ii_abort
3461  *      Stop any copying process for shadow.
3462  *
3463  * Calling/Exit State:
3464  *      Returns 0 if the abort succeeded. Otherwise an error code
3465  *      is returned and any additional error information is copied
3466  *      out to the user.
3467  */
3468 
3469 /* ARGSUSED */
3470 
3471 int
3472 _ii_abort(intptr_t arg, int ilp32, int *rvp)
3473 {
3474         dsw_ioctl_t uabort;
3475         dsw_ioctl32_t uabort32;
3476         _ii_info_t *ip;
3477         int rc;
3478         spcs_s_info_t kstatus;
3479 
3480         if (ilp32) {
3481                 if (copyin((void *)arg, &uabort32, sizeof (uabort32)) < 0)
3482                         return (EFAULT);
3483                 II_TAIL_COPY(uabort, uabort32, shadow_vol, dsw_ioctl_t);
3484                 uabort.status = (spcs_s_info_t)uabort32.status;
3485         } else if (copyin((void *)arg, &uabort, sizeof (uabort)) < 0)
3486                 return (EFAULT);
3487 
3488         kstatus = spcs_s_kcreate();
3489         if (kstatus == NULL)
3490                 return (ENOMEM);
3491 
3492         if (!uabort.shadow_vol[0])
3493                 return (spcs_s_ocopyoutf(&kstatus, uabort.status, DSW_EEMPTY));
3494 
3495         mutex_enter(&_ii_info_mutex);
3496         ip = _ii_find_set(uabort.shadow_vol);
3497         mutex_exit(&_ii_info_mutex);
3498         if (ip == NULL)
3499                 return (spcs_s_ocopyoutf(&kstatus, uabort.status,
3500                     DSW_ENOTFOUND));
3501 
3502         mutex_exit(&ip->bi_mutex);
3503 
3504         rc = _ii_stopcopy(ip);
3505 
3506         mutex_enter(&ip->bi_mutex);
3507         _ii_ioctl_done(ip);
3508         mutex_exit(&ip->bi_mutex);
3509 
3510         return (spcs_s_ocopyoutf(&kstatus, uabort.status, rc));
3511 }
3512 
3513 
3514 /*
3515  * _ii_segment
3516  *      Copy out II pair bitmaps (cpy, shd, idx) in segments
3517  *
3518  * Calling/Exit State:
3519  *      Returns 0 if the operation succeeded. Otherwise an error code
3520  *      is returned and any additional error information is copied
3521  *      out to the user.
3522  *
3523  */
3524 int
3525 _ii_segment(intptr_t arg, int ilp32, int *rvp)
3526 {
3527         dsw_segment_t usegment;
3528         dsw_segment32_t usegment32;
3529         _ii_info_t *ip;
3530         int rc, size;
3531         spcs_s_info_t kstatus;
3532         int32_t bi_idxfba;
3533 
3534         *rvp = 0;
3535 
3536         if (ilp32) {
3537                 if (copyin((void *)arg, &usegment32, sizeof (usegment32)))
3538                         return (EFAULT);
3539                 usegment.status = (spcs_s_info_t)usegment32.status;
3540                 bcopy(usegment32.shadow_vol, usegment.shadow_vol, DSW_NAMELEN);
3541                 usegment.seg_number = (unsigned)usegment32.seg_number;
3542                 usegment.shd_bitmap =
3543                     (unsigned char   *)(unsigned long)usegment32.shd_bitmap;
3544                 usegment.shd_size = usegment32.shd_size;
3545                 usegment.cpy_bitmap =
3546                     (unsigned char   *)(unsigned long)usegment32.cpy_bitmap;
3547                 usegment.cpy_size = usegment32.cpy_size;
3548                 usegment.idx_bitmap =
3549                     (unsigned char   *)(unsigned long)usegment32.idx_bitmap;
3550                 usegment.idx_size = usegment32.idx_size;
3551         } else if (copyin((void *)arg, &usegment, sizeof (usegment)))
3552                 return (EFAULT);
3553 
3554         kstatus = spcs_s_kcreate();
3555         if (kstatus == NULL)
3556                 return (ENOMEM);
3557 
3558         if (usegment.shadow_vol[0]) {
3559                 mutex_enter(&_ii_info_mutex);
3560                 ip = _ii_find_set(usegment.shadow_vol);
3561                 mutex_exit(&_ii_info_mutex);
3562                 if (ip == NULL)
3563                         return (spcs_s_ocopyoutf(&kstatus, usegment.status,
3564                             DSW_ENOTFOUND));
3565         } else
3566                 return (spcs_s_ocopyoutf(&kstatus, usegment.status,
3567                     DSW_EEMPTY));
3568 
3569         mutex_exit(&ip->bi_mutex);
3570 
3571         size = ((((ip->bi_size + (DSW_SIZE-1))
3572             / DSW_SIZE) + (DSW_BITS-1))) / DSW_BITS;
3573         bi_idxfba = ip->bi_copyfba + (ip->bi_copyfba - ip->bi_shdfba);
3574         if (((nsc_size_t)usegment.seg_number > DSW_BM_FBA_LEN(ip->bi_size)) ||
3575             (usegment.shd_size > size) ||
3576             (usegment.cpy_size > size) ||
3577             (!(ip->bi_flags & DSW_GOLDEN) && (usegment.idx_size > size*32))) {
3578                 _ii_ioctl_done(ip);
3579                 return (spcs_s_ocopyoutf(&kstatus, usegment.status,
3580                     DSW_EMISMATCH));
3581         }
3582 
3583         if ((rc = _ii_rsrv_devs(ip, BMP, II_INTERNAL)) != 0) {
3584                 mutex_enter(&ip->bi_mutex);
3585                 _ii_ioctl_done(ip);
3586                 mutex_exit(&ip->bi_mutex);
3587                 spcs_s_add(kstatus, rc);
3588                 return (spcs_s_ocopyoutf(&kstatus, usegment.status,
3589                     DSW_ERSRVFAIL));
3590         }
3591 
3592         if (usegment.shd_bitmap && usegment.shd_size > 0)
3593                 rc = II_CO_BMP(ip, ip->bi_shdfba+usegment.seg_number,
3594                     usegment.shd_bitmap, usegment.shd_size);
3595         if (rc == 0 && usegment.cpy_bitmap && usegment.cpy_size > 0)
3596                 rc = II_CO_BMP(ip, ip->bi_copyfba+usegment.seg_number,
3597                     usegment.cpy_bitmap, usegment.cpy_size);
3598         if (!(ip->bi_flags & DSW_GOLDEN)) {
3599                 if (rc == 0 && usegment.idx_bitmap && usegment.idx_size > 0)
3600                         rc = II_CO_BMP(ip, bi_idxfba+usegment.seg_number*32,
3601                             usegment.idx_bitmap, usegment.idx_size);
3602         }
3603 
3604         _ii_rlse_devs(ip, BMP);
3605         mutex_enter(&ip->bi_mutex);
3606         _ii_ioctl_done(ip);
3607         mutex_exit(&ip->bi_mutex);
3608         if (rc) {
3609                 spcs_s_add(kstatus, rc);
3610                 return (spcs_s_ocopyoutf(&kstatus, usegment.status, DSW_EIO));
3611         }
3612 
3613         spcs_s_kfree(kstatus);
3614         return (0);
3615 }
3616 
3617 
3618 /*
3619  * _ii_bitmap
3620  *      Copy out II pair bitmaps to user program
3621  *
3622  * Calling/Exit State:
3623  *      Returns 0 if the operation succeeded. Otherwise an error code
3624  *      is returned and any additional error information is copied
3625  *      out to the user.
3626  */
3627 
3628 int
3629 _ii_bitmap(intptr_t arg, int ilp32, int *rvp)
3630 {
3631         dsw_bitmap_t ubitmap;
3632         dsw_bitmap32_t ubitmap32;
3633         _ii_info_t *ip;
3634         int rc;
3635         spcs_s_info_t kstatus;
3636 
3637         *rvp = 0;
3638 
3639         if (ilp32) {
3640                 if (copyin((void *)arg, &ubitmap32, sizeof (ubitmap32)))
3641                         return (EFAULT);
3642                 ubitmap.status = (spcs_s_info_t)ubitmap32.status;
3643                 bcopy(ubitmap32.shadow_vol, ubitmap.shadow_vol, DSW_NAMELEN);
3644                 ubitmap.shd_bitmap =
3645                     (unsigned char   *)(unsigned long)ubitmap32.shd_bitmap;
3646                 ubitmap.shd_size = ubitmap32.shd_size;
3647                 ubitmap.copy_bitmap =
3648                     (unsigned char   *)(unsigned long)ubitmap32.copy_bitmap;
3649                 ubitmap.copy_size = ubitmap32.copy_size;
3650         } else if (copyin((void *)arg, &ubitmap, sizeof (ubitmap)))
3651                 return (EFAULT);
3652 
3653         kstatus = spcs_s_kcreate();
3654         if (kstatus == NULL)
3655                 return (ENOMEM);
3656 
3657         if (!ubitmap.shadow_vol[0])
3658                 return (spcs_s_ocopyoutf(&kstatus, ubitmap.status, DSW_EEMPTY));
3659 
3660         mutex_enter(&_ii_info_mutex);
3661         ip = _ii_find_set(ubitmap.shadow_vol);
3662         mutex_exit(&_ii_info_mutex);
3663         if (ip == NULL)
3664                 return (spcs_s_ocopyoutf(&kstatus, ubitmap.status,
3665                     DSW_ENOTFOUND));
3666 
3667         mutex_exit(&ip->bi_mutex);
3668 
3669         if ((rc = _ii_rsrv_devs(ip, BMP, II_INTERNAL)) != 0) {
3670                 mutex_enter(&ip->bi_mutex);
3671                 _ii_ioctl_done(ip);
3672                 mutex_exit(&ip->bi_mutex);
3673                 spcs_s_add(kstatus, rc);
3674                 return (spcs_s_ocopyoutf(&kstatus, ubitmap.status,
3675                     DSW_ERSRVFAIL));
3676         }
3677 
3678         if (ubitmap.shd_bitmap && ubitmap.shd_size > 0)
3679                 rc = II_CO_BMP(ip, ip->bi_shdfba, ubitmap.shd_bitmap,
3680                     ubitmap.shd_size);
3681         if (rc == 0 && ubitmap.copy_bitmap && ubitmap.copy_size > 0)
3682                 rc = II_CO_BMP(ip, ip->bi_copyfba, ubitmap.copy_bitmap,
3683                     ubitmap.copy_size);
3684         _ii_rlse_devs(ip, BMP);
3685         mutex_enter(&ip->bi_mutex);
3686         _ii_ioctl_done(ip);
3687         mutex_exit(&ip->bi_mutex);
3688         if (rc) {
3689                 spcs_s_add(kstatus, rc);
3690                 return (spcs_s_ocopyoutf(&kstatus, ubitmap.status, DSW_EIO));
3691         }
3692 
3693         spcs_s_kfree(kstatus);
3694 
3695         return (0);
3696 }
3697 
3698 /*
3699  * _ii_export
3700  *      Exports the shadow volume
3701  *
3702  * Calling/Exit State:
3703  *      Returns 0 if the shadow was exported. Otherwise an error code
3704  *      is returned and any additional error information is copied
3705  *      out to the user.
3706  *
3707  * Description:
3708  */
3709 
3710 int
3711 _ii_export(intptr_t arg, int ilp32, int *rvp)
3712 {
3713         dsw_ioctl_t uparms;
3714         dsw_ioctl32_t uparms32;
3715         _ii_info_t *ip;
3716         nsc_fd_t *fd;
3717         int rc = 0;
3718         spcs_s_info_t kstatus;
3719 
3720         *rvp = 0;
3721 
3722         if (ilp32) {
3723                 if (copyin((void *)arg, &uparms32, sizeof (uparms32)) < 0)
3724                         return (EFAULT);
3725                 II_TAIL_COPY(uparms, uparms32, shadow_vol, dsw_ioctl_t);
3726                 uparms.status = (spcs_s_info_t)uparms32.status;
3727         } else if (copyin((void *)arg, &uparms, sizeof (uparms)) < 0)
3728                 return (EFAULT);
3729 
3730         kstatus = spcs_s_kcreate();
3731         if (kstatus == NULL)
3732                 return (ENOMEM);
3733 
3734         if (!uparms.shadow_vol[0])
3735                 return (spcs_s_ocopyoutf(&kstatus, uparms.status, DSW_EEMPTY));
3736 
3737         mutex_enter(&_ii_info_mutex);
3738         ip = _ii_find_set(uparms.shadow_vol);
3739         mutex_exit(&_ii_info_mutex);
3740         if (ip == NULL)
3741                 return (spcs_s_ocopyoutf(&kstatus, uparms.status,
3742                     DSW_ENOTFOUND));
3743 
3744         if ((ip->bi_flags & DSW_GOLDEN) == 0 ||
3745             ((ip->bi_flags & (DSW_COPYING|DSW_SHDEXPORT|DSW_SHDIMPORT)) != 0)) {
3746                 /*
3747                  * Cannot export a dependent copy or while still copying or
3748                  * the shadow is already in an exported state
3749                  */
3750                 rc = ip->bi_flags & (DSW_SHDEXPORT|DSW_SHDIMPORT)
3751                     ? DSW_EALREADY : DSW_EDEPENDENCY;
3752                 _ii_ioctl_done(ip);
3753                 mutex_exit(&ip->bi_mutex);
3754                 return (spcs_s_ocopyoutf(&kstatus, uparms.status, rc));
3755         }
3756         if ((rc = _ii_rsrv_devs(ip, BMP, II_INTERNAL)) != 0) {
3757                 _ii_ioctl_done(ip);
3758                 mutex_exit(&ip->bi_mutex);
3759                 spcs_s_add(kstatus, rc);
3760                 return (spcs_s_ocopyoutf(&kstatus, uparms.status,
3761                     DSW_ERSRVFAIL));
3762         }
3763         II_FLAG_SET(DSW_SHDEXPORT, ip);
3764 
3765         mutex_exit(&ip->bi_mutex);
3766 
3767         /* this rw_enter forces us to drain all active IO */
3768         rw_enter(&ip->bi_linkrw, RW_WRITER);
3769         rw_exit(&ip->bi_linkrw);
3770 
3771         mutex_enter(&ip->bi_mutex);
3772 
3773         _ii_rlse_devs(ip, BMP);
3774 
3775         /* Shut shadow volume. */
3776         if (ip->bi_shdfd) {
3777                 if (ip->bi_shdrsrv) {
3778                         nsc_release(ip->bi_shdfd);
3779                         ip->bi_shdrsrv = NULL;
3780                 }
3781                 fd = ip->bi_shdfd;
3782                 ip->bi_shdfd = NULL;
3783                 mutex_exit(&ip->bi_mutex);
3784                 (void) nsc_close(fd);
3785                 mutex_enter(&ip->bi_mutex);
3786         }
3787 
3788         if (ip->bi_shdrfd) {
3789                 if (ip->bi_shdrrsrv) {
3790                         nsc_release(ip->bi_shdrfd);
3791                         ip->bi_shdrrsrv = NULL;
3792                 }
3793                 fd = ip->bi_shdrfd;
3794                 ip->bi_shdrfd = NULL;
3795                 mutex_exit(&ip->bi_mutex);
3796                 (void) nsc_close(fd);
3797                 mutex_enter(&ip->bi_mutex);
3798         }
3799         _ii_ioctl_done(ip);
3800         mutex_exit(&ip->bi_mutex);
3801 
3802         (void) _ii_reserve_begin(ip);
3803         if (ip->bi_shd_tok) {
3804                 (void) _ii_unregister_path(ip->bi_shd_tok, 0, "shadow");
3805                 ip->bi_shd_tok = NULL;
3806         }
3807 
3808         if (ip->bi_shdr_tok) {
3809                 (void) _ii_unregister_path(ip->bi_shdr_tok, 0,
3810                     "raw shadow");
3811                 ip->bi_shdr_tok = NULL;
3812         }
3813         _ii_reserve_end(ip);
3814 
3815         spcs_s_kfree(kstatus);
3816 
3817         return (0);
3818 }
3819 
3820 /*
3821  * _ii_join
3822  *      Rejoins the shadow volume
3823  *
3824  * Calling/Exit State:
3825  *      Returns 0 if the shadow was exported. Otherwise an error code
3826  *      is returned and any additional error information is copied
3827  *      out to the user.
3828  *
3829  * Description:
3830  */
3831 
3832 int
3833 _ii_join(intptr_t arg, int ilp32, int *rvp)
3834 {
3835         dsw_bitmap_t ubitmap;
3836         dsw_bitmap32_t ubitmap32;
3837         _ii_info_t *ip;
3838         uint64_t bm_size;
3839         int rc = 0;
3840         int rtype = 0;
3841         spcs_s_info_t kstatus;
3842 
3843         *rvp = 0;
3844 
3845         if (ilp32) {
3846                 if (copyin((void *)arg, &ubitmap32, sizeof (ubitmap32)) < 0)
3847                         return (EFAULT);
3848                 II_TAIL_COPY(ubitmap, ubitmap32, shadow_vol, dsw_bitmap_t);
3849                 ubitmap.status = (spcs_s_info_t)ubitmap32.status;
3850                 ubitmap.shd_bitmap =
3851                     (unsigned char   *)(unsigned long)ubitmap32.shd_bitmap;
3852                 ubitmap.shd_size = ubitmap32.shd_size;
3853         } else if (copyin((void *)arg, &ubitmap, sizeof (ubitmap)) < 0)
3854                 return (EFAULT);
3855 
3856         kstatus = spcs_s_kcreate();
3857         if (kstatus == NULL)
3858                 return (ENOMEM);
3859 
3860         if (!ubitmap.shadow_vol[0])
3861                 return (spcs_s_ocopyoutf(&kstatus, ubitmap.status, DSW_EEMPTY));
3862 
3863         mutex_enter(&_ii_info_mutex);
3864         ip = _ii_find_set(ubitmap.shadow_vol);
3865         mutex_exit(&_ii_info_mutex);
3866         if (ip == NULL)
3867                 return (spcs_s_ocopyoutf(&kstatus, ubitmap.status,
3868                     DSW_ENOTFOUND));
3869 
3870         /*
3871          * Check that group has shadow exported.
3872          */
3873         if ((ip->bi_flags & DSW_SHDEXPORT) == 0) {
3874                 /*
3875                  * Cannot join if the shadow isn't exported.
3876                  */
3877                 _ii_ioctl_done(ip);
3878                 mutex_exit(&ip->bi_mutex);
3879                 return (spcs_s_ocopyoutf(&kstatus, ubitmap.status,
3880                     DSW_ENOTEXPORTED));
3881         }
3882         /* check bitmap is at least large enough for master volume size */
3883         bm_size = FBA_SIZE(DSW_BM_FBA_LEN(ip->bi_size));
3884         if (ubitmap.shd_size < bm_size) {
3885                 /* bitmap is to small */
3886                 _ii_ioctl_done(ip);
3887                 mutex_exit(&ip->bi_mutex);
3888                 return (spcs_s_ocopyoutf(&kstatus, ubitmap.status,
3889                     DSW_EINVALBMP));
3890         }
3891         /* read in bitmap and or with differences bitmap */
3892         rtype = BMP;
3893         if ((rc = _ii_rsrv_devs(ip, rtype, II_INTERNAL)) != 0) {
3894                 _ii_ioctl_done(ip);
3895                 mutex_exit(&ip->bi_mutex);
3896                 spcs_s_add(kstatus, rc);
3897                 return (spcs_s_ocopyoutf(&kstatus, ubitmap.status,
3898                     DSW_ERSRVFAIL));
3899         }
3900         rc = II_CI_BMP(ip, ip->bi_shdfba, ubitmap.shd_bitmap,
3901             ubitmap.shd_size);
3902         /* open up shadow */
3903         if ((rc = ii_open_shadow(ip, ip->bi_keyname)) != 0) {
3904                 _ii_ioctl_done(ip);
3905                 mutex_exit(&ip->bi_mutex);
3906                 spcs_s_add(kstatus, rc);
3907                 _ii_rlse_devs(ip, rtype);
3908                 return (spcs_s_ocopyoutf(&kstatus, ubitmap.status, DSW_EOPEN));
3909         }
3910         ii_register_shd(ip);
3911         if (!rc)
3912                 II_FLAG_CLR(DSW_SHDEXPORT, ip);
3913         _ii_ioctl_done(ip);
3914         mutex_exit(&ip->bi_mutex);
3915         _ii_rlse_devs(ip, rtype);
3916 
3917         if (rc) {
3918                 spcs_s_add(kstatus, rc);
3919                 return (spcs_s_ocopyoutf(&kstatus, ubitmap.status, DSW_EIO));
3920         }
3921 
3922         spcs_s_kfree(kstatus);
3923 
3924         return (0);
3925 }
3926 
3927 
3928 /*
3929  * _ii_ocreate
3930  *      Configures a volume suitable for use as an overflow volume.
3931  *
3932  * Calling/Exit State:
3933  *      Returns 0 if the volume was configured successfully. Otherwise
3934  *       an error code is returned and any additional error information
3935  *      is copied out to the user.
3936  *
3937  * Description:
3938  */
3939 
3940 int
3941 _ii_ocreate(intptr_t arg, int ilp32, int *rvp)
3942 {
3943         dsw_ioctl_t uioctl;
3944         dsw_ioctl32_t uioctl32;
3945         _ii_overflow_t  ov;
3946         _ii_overflow_t  *op = &ov;
3947         int rc = 0;
3948         nsc_fd_t        *fd;
3949         nsc_iodev_t     *iodev;
3950         nsc_size_t vol_size;
3951         char *overflow_vol;
3952         spcs_s_info_t kstatus;
3953 
3954         *rvp = 0;
3955 
3956         if (ilp32) {
3957                 if (copyin((void *)arg, &uioctl32, sizeof (uioctl32)) < 0)
3958                         return (EFAULT);
3959                 II_TAIL_COPY(uioctl, uioctl32, shadow_vol, dsw_ioctl_t);
3960                 uioctl.status = (spcs_s_info_t)uioctl32.status;
3961         } else if (copyin((void *)arg, &uioctl, sizeof (uioctl)) < 0)
3962                 return (EFAULT);
3963 
3964         overflow_vol = uioctl.shadow_vol;
3965         kstatus = spcs_s_kcreate();
3966         if (kstatus == NULL)
3967                 return (ENOMEM);
3968 
3969         if (!overflow_vol[0])
3970                 return (spcs_s_ocopyoutf(&kstatus, uioctl.status, DSW_EEMPTY));
3971 
3972         if (ii_volume(overflow_vol, 0) != NONE)
3973                 return (spcs_s_ocopyoutf(&kstatus, uioctl.status, DSW_EINUSE));
3974 
3975         fd = nsc_open(overflow_vol,
3976             NSC_IIR_ID|NSC_FILE|NSC_RDWR, NULL, (blind_t)&(iodev), &rc);
3977         if (!fd)
3978                 fd = nsc_open(uioctl.shadow_vol,
3979                     NSC_IIR_ID|NSC_DEVICE|NSC_RDWR, NULL,
3980                     (blind_t)&(iodev), &rc);
3981         if (fd == NULL) {
3982                 spcs_s_add(kstatus, rc);
3983                 return (spcs_s_ocopyoutf(&kstatus, uioctl.status, DSW_EIO));
3984         }
3985         if ((rc = nsc_reserve(fd, 0)) != 0) {
3986                 spcs_s_add(kstatus, rc);
3987                 (void) nsc_close(fd);
3988                 return (spcs_s_ocopyoutf(&kstatus, uioctl.status,
3989                     DSW_ERSRVFAIL));
3990         }
3991         /* setup magic number etc; */
3992         rc = nsc_partsize(fd, &vol_size);
3993         if (rc) {
3994                 spcs_s_add(kstatus, rc);
3995                 (void) nsc_close(fd);
3996                 return (spcs_s_ocopyoutf(&kstatus, uioctl.status, DSW_EIO));
3997         }
3998         op->ii_hmagic = II_OMAGIC;
3999                 /* take 1 off as chunk 0 contains header */
4000         op->ii_nchunks = (vol_size / DSW_SIZE) -1;
4001         op->ii_drefcnt = 0;
4002         op->ii_used = 1;                     /* we have used the header */
4003         op->ii_unused = op->ii_nchunks - op->ii_used;
4004         op->ii_freehead = II_NULLNODE;
4005         op->ii_hversion = OV_HEADER_VERSION;
4006         op->ii_flags = 0;
4007         op->ii_urefcnt = 0;
4008         (void) strncpy(op->ii_volname, uioctl.shadow_vol, DSW_NAMELEN);
4009         rc = _ii_nsc_io(0, KS_NA, fd, NSC_WRBUF, II_OHEADER_FBA,
4010             (unsigned char *)&op->ii_do, sizeof (op->ii_do));
4011         (void) nsc_release(fd);
4012         (void) nsc_close(fd);
4013         if (rc) {
4014                 spcs_s_add(kstatus, rc);
4015                 return (spcs_s_ocopyoutf(&kstatus, uioctl.status, DSW_EIO));
4016         }
4017 
4018         spcs_s_kfree(kstatus);
4019 
4020         return (0);
4021 }
4022 
4023 
4024 /*
4025  * _ii_oattach
4026  *      Attaches the volume in the "bitmap_vol" field as an overflow volume.
4027  *
4028  * Calling/Exit State:
4029  *      Returns 0 if the volume was attached. Fails if the shadow group
4030  *      is of the wrong type (eg independent) or already has an overflow
4031  *      volume attached.
4032  *
4033  * Description:
4034  */
4035 
4036 int
4037 _ii_oattach(intptr_t arg, int ilp32, int *rvp)
4038 {
4039         dsw_config_t uconfig;
4040         dsw_config32_t uconfig32;
4041         _ii_info_t *ip;
4042         int rc = 0;
4043         int rtype = 0;
4044         ii_header_t *bm_header;
4045         nsc_buf_t *tmp = NULL;
4046         spcs_s_info_t kstatus;
4047 
4048         *rvp = 0;
4049 
4050         if (ilp32) {
4051                 if (copyin((void *)arg, &uconfig32, sizeof (uconfig32)) < 0)
4052                         return (EFAULT);
4053                 II_TAIL_COPY(uconfig, uconfig32, shadow_vol, dsw_config_t);
4054                 uconfig.status = (spcs_s_info_t)uconfig32.status;
4055         } else if (copyin((void *)arg, &uconfig, sizeof (uconfig)) < 0)
4056                 return (EFAULT);
4057 
4058         kstatus = spcs_s_kcreate();
4059         if (kstatus == NULL)
4060                 return (ENOMEM);
4061 
4062         if (!uconfig.shadow_vol[0])
4063                 return (spcs_s_ocopyoutf(&kstatus, uconfig.status, DSW_EEMPTY));
4064 
4065         switch (ii_volume(uconfig.bitmap_vol, 0)) {
4066         case NONE:
4067         case OVR:
4068                 break;
4069         default:
4070                 return (spcs_s_ocopyoutf(&kstatus, uconfig.status, DSW_EINUSE));
4071         }
4072         mutex_enter(&_ii_info_mutex);
4073         ip = _ii_find_set(uconfig.shadow_vol);
4074         mutex_exit(&_ii_info_mutex);
4075         if (ip == NULL)
4076                 return (spcs_s_ocopyoutf(&kstatus, uconfig.status,
4077                     DSW_ENOTFOUND));
4078 
4079         /* check shadow doesn't already have an overflow volume */
4080         if (ip->bi_overflow) {
4081                 _ii_ioctl_done(ip);
4082                 mutex_exit(&ip->bi_mutex);
4083                 return (spcs_s_ocopyoutf(&kstatus, uconfig.status,
4084                     DSW_EALREADY));
4085         }
4086         /* check shadow is mapped so can have an overflow */
4087         if ((ip->bi_flags&DSW_TREEMAP) == 0) {
4088                 _ii_ioctl_done(ip);
4089                 mutex_exit(&ip->bi_mutex);
4090                 return (spcs_s_ocopyoutf(&kstatus, uconfig.status,
4091                     DSW_EWRONGTYPE));
4092         }
4093         rtype = BMP;
4094         if ((rc = _ii_rsrv_devs(ip, rtype, II_INTERNAL)) != 0) {
4095                 _ii_ioctl_done(ip);
4096                 mutex_exit(&ip->bi_mutex);
4097                 spcs_s_add(kstatus, rc);
4098                 return (spcs_s_ocopyoutf(&kstatus, uconfig.status,
4099                     DSW_ERSRVFAIL));
4100         }
4101         /* attach volume */
4102         if ((rc = ii_overflow_attach(ip, uconfig.bitmap_vol, 1)) != 0) {
4103                 _ii_ioctl_done(ip);
4104                 mutex_exit(&ip->bi_mutex);
4105                 _ii_rlse_devs(ip, rtype);
4106                 return (spcs_s_ocopyoutf(&kstatus, uconfig.status, rc));
4107         }
4108 
4109         /* re-write header so shadow can be restarted with overflow volume */
4110 
4111         bm_header = _ii_bm_header_get(ip, &tmp);
4112         if (bm_header == NULL) {
4113                 /* detach volume */
4114                 ii_overflow_free(ip, RECLAIM);
4115                 _ii_ioctl_done(ip);
4116                 mutex_exit(&ip->bi_mutex);
4117                 _ii_rlse_devs(ip, rtype);
4118                 return (spcs_s_ocopyoutf(&kstatus, uconfig.status,
4119                     DSW_EHDRBMP));
4120         }
4121         (void) strncpy(bm_header->overflow_vol, uconfig.bitmap_vol,
4122             DSW_NAMELEN);
4123         (void) _ii_bm_header_put(bm_header, ip, tmp);
4124         _ii_rlse_devs(ip, rtype);
4125         _ii_ioctl_done(ip);
4126         mutex_exit(&ip->bi_mutex);
4127 
4128         spcs_s_kfree(kstatus);
4129 
4130         return (0);
4131 }
4132 
4133 
4134 /*
4135  * _ii_odetach
4136  *      Breaks the link with the overflow volume.
4137  *
4138  * Calling/Exit State:
4139  *      Returns 0 if the overflow volume was detached. Otherwise an error code
4140  *      is returned and any additional error information is copied
4141  *      out to the user.
4142  *
4143  * Description:
4144  */
4145 
4146 int
4147 _ii_odetach(intptr_t arg, int ilp32, int *rvp)
4148 {
4149         dsw_bitmap_t ubitmap;
4150         dsw_bitmap32_t ubitmap32;
4151         _ii_info_t *ip;
4152         int rc = 0;
4153         int rtype = 0;
4154         ii_header_t *bm_header;
4155         nsc_buf_t *tmp = NULL;
4156         spcs_s_info_t kstatus;
4157 
4158         *rvp = 0;
4159 
4160         if (ilp32) {
4161                 if (copyin((void *)arg, &ubitmap32, sizeof (ubitmap32)) < 0)
4162                         return (EFAULT);
4163                 II_TAIL_COPY(ubitmap, ubitmap32, shadow_vol, dsw_bitmap_t);
4164                 ubitmap.status = (spcs_s_info_t)ubitmap32.status;
4165         } else if (copyin((void *)arg, &ubitmap, sizeof (ubitmap)) < 0)
4166                 return (EFAULT);
4167 
4168         kstatus = spcs_s_kcreate();
4169         if (kstatus == NULL)
4170                 return (ENOMEM);
4171 
4172         if (!ubitmap.shadow_vol[0])
4173                 return (spcs_s_ocopyoutf(&kstatus, ubitmap.status, DSW_EEMPTY));
4174 
4175         mutex_enter(&_ii_info_mutex);
4176         ip = _ii_find_set(ubitmap.shadow_vol);
4177         mutex_exit(&_ii_info_mutex);
4178         if (ip == NULL)
4179                 return (spcs_s_ocopyoutf(&kstatus, ubitmap.status,
4180                     DSW_ENOTFOUND));
4181 
4182         if ((ip->bi_flags&DSW_VOVERFLOW) != 0) {
4183                 _ii_ioctl_done(ip);
4184                 mutex_exit(&ip->bi_mutex);
4185                 return (spcs_s_ocopyoutf(&kstatus, ubitmap.status,
4186                     DSW_EODEPENDENCY));
4187         }
4188         rtype = BMP;
4189         if ((rc = _ii_rsrv_devs(ip, rtype, II_INTERNAL)) != 0) {
4190                 _ii_ioctl_done(ip);
4191                 mutex_exit(&ip->bi_mutex);
4192                 spcs_s_add(kstatus, rc);
4193                 return (spcs_s_ocopyoutf(&kstatus, ubitmap.status,
4194                     DSW_ERSRVFAIL));
4195         }
4196         ii_overflow_free(ip, RECLAIM);
4197         /* re-write header to break link with overflow volume */
4198 
4199         bm_header = _ii_bm_header_get(ip, &tmp);
4200         if (bm_header == NULL) {
4201                 _ii_rlse_devs(ip, rtype);
4202                 _ii_ioctl_done(ip);
4203                 mutex_exit(&ip->bi_mutex);
4204                 return (spcs_s_ocopyoutf(&kstatus, ubitmap.status,
4205                     DSW_EHDRBMP));
4206         }
4207         bzero(bm_header->overflow_vol, DSW_NAMELEN);
4208         (void) _ii_bm_header_put(bm_header, ip, tmp);
4209 
4210         _ii_rlse_devs(ip, rtype);
4211         _ii_ioctl_done(ip);
4212 
4213         mutex_exit(&ip->bi_mutex);
4214         if (rc) {
4215                 spcs_s_add(kstatus, rc);
4216                 return (spcs_s_ocopyoutf(&kstatus, ubitmap.status, DSW_EIO));
4217         }
4218 
4219         spcs_s_kfree(kstatus);
4220 
4221         --iigkstat.assoc_over.value.ul;
4222 
4223         return (0);
4224 }
4225 
4226 
4227 /*
4228  * _ii_gc_list
4229  *      Returns a list of all lists, or all entries in a list
4230  *
4231  */
4232 int
4233 _ii_gc_list(intptr_t arg, int ilp32, int *rvp, kmutex_t *mutex,
4234     _ii_lsthead_t *lst)
4235 {
4236         dsw_aioctl_t ulist;
4237         dsw_aioctl32_t ulist32;
4238         size_t name_offset;
4239         int i;
4240         spcs_s_info_t kstatus;
4241         char *carg = (char *)arg;
4242         uint64_t hash;
4243         _ii_lsthead_t *cp;
4244         _ii_lstinfo_t *np;
4245 
4246         *rvp = 0;
4247         name_offset = offsetof(dsw_aioctl_t, shadow_vol[0]);
4248         if (ilp32) {
4249                 if (copyin((void *) arg, &ulist32, sizeof (ulist32)) < 0)
4250                         return (EFAULT);
4251                 II_TAIL_COPY(ulist, ulist32, flags, dsw_aioctl_t);
4252                 ulist.status = (spcs_s_info_t)ulist32.status;
4253                 name_offset = offsetof(dsw_aioctl32_t, shadow_vol[0]);
4254         } else if (copyin((void *) arg, &ulist, sizeof (ulist)) < 0)
4255                 return (EFAULT);
4256 
4257         kstatus = spcs_s_kcreate();
4258         if (kstatus == NULL)
4259                 return (ENOMEM);
4260 
4261         mutex_enter(mutex);
4262         if (ulist.shadow_vol[ 0 ] != 0) {
4263                 /* search for specific list */
4264                 hash = nsc_strhash(ulist.shadow_vol);
4265                 for (cp = lst; cp; cp = cp->lst_next) {
4266                         if ((hash == cp->lst_hash) && strncmp(ulist.shadow_vol,
4267                             cp->lst_name, DSW_NAMELEN) == 0) {
4268                                 break;
4269                         }
4270                 }
4271                 if (cp) {
4272                         for (i = 0, np = cp->lst_start; i < ulist.count && np;
4273                             np = np->lst_next, carg += DSW_NAMELEN, i++) {
4274                                 if (copyout(np->lst_ip->bi_keyname,
4275                                     carg + name_offset, DSW_NAMELEN)) {
4276                                         mutex_exit(mutex);
4277                                         return (spcs_s_ocopyoutf(&kstatus,
4278                                             ulist.status, EFAULT));
4279                                 }
4280                         }
4281                 } else {
4282                         i = 0;
4283                 }
4284         } else {
4285                 /* return full list */
4286                 for (i = 0, cp = lst; i < ulist.count && cp;
4287                     carg += DSW_NAMELEN, i++, cp = cp->lst_next) {
4288                         if (copyout(cp->lst_name, carg + name_offset,
4289                             DSW_NAMELEN)) {
4290                                 mutex_exit(mutex);
4291                                 return (spcs_s_ocopyoutf(&kstatus, ulist.status,
4292                                     EFAULT));
4293                         }
4294                 }
4295         }
4296         mutex_exit(mutex);
4297         ulist32.count = ulist.count = i;
4298 
4299         if (ilp32) {
4300                 if (copyout(&ulist32, (void *) arg, name_offset))
4301                         return (EFAULT);
4302         } else {
4303                 if (copyout(&ulist, (void*) arg, name_offset))
4304                         return (EFAULT);
4305         }
4306 
4307         return (spcs_s_ocopyoutf(&kstatus, ulist.status, 0));
4308 }
4309 
4310 /*
4311  * _ii_olist
4312  *      Breaks the link with the overflow volume.
4313  *
4314  * Calling/Exit State:
4315  *      Returns 0 if the overflow volume was detached. Otherwise an error code
4316  *      is returned and any additional error information is copied
4317  *      out to the user.
4318  *
4319  * Description:
4320  */
4321 
4322 int
4323 _ii_olist(intptr_t arg, int ilp32, int *rvp)
4324 {
4325         dsw_aioctl_t ulist;
4326         dsw_aioctl32_t ulist32;
4327         _ii_overflow_t *op;
4328         size_t name_offset;
4329         int rc = 0;
4330         int i;
4331         char *carg = (char *)arg;
4332         spcs_s_info_t kstatus;
4333 
4334         *rvp = 0;
4335 
4336         name_offset = offsetof(dsw_aioctl_t, shadow_vol[0]);
4337         if (ilp32) {
4338                 if (copyin((void *)arg, &ulist32, sizeof (ulist32)) < 0)
4339                         return (EFAULT);
4340                 II_TAIL_COPY(ulist, ulist32, flags, dsw_aioctl_t);
4341                 ulist.status = (spcs_s_info_t)ulist32.status;
4342                 name_offset = offsetof(dsw_aioctl32_t, shadow_vol[0]);
4343         } else if (copyin((void *)arg, &ulist, sizeof (ulist)) < 0)
4344                 return (EFAULT);
4345 
4346         kstatus = spcs_s_kcreate();
4347         if (kstatus == NULL)
4348                 return (ENOMEM);
4349 
4350         i = 0;
4351 
4352         mutex_enter(&_ii_overflow_mutex);
4353         for (op = _ii_overflow_top; i < ulist.count && op;
4354             carg += DSW_NAMELEN) {
4355                 if (copyout(op->ii_volname, carg+name_offset, DSW_NAMELEN)) {
4356                         mutex_exit(&_ii_overflow_mutex);
4357                         return (spcs_s_ocopyoutf(&kstatus, ulist.status,
4358                             EFAULT));
4359                 }
4360                 i++;
4361                 op = op->ii_next;
4362         }
4363         mutex_exit(&_ii_overflow_mutex);
4364         ulist32.count = ulist.count = i;
4365         /* return count of items listed to user */
4366         if (ilp32) {
4367                 if (copyout(&ulist32, (void *)arg, name_offset))
4368                         return (EFAULT);
4369         } else {
4370                 if (copyout(&ulist, (void *)arg, name_offset))
4371                         return (EFAULT);
4372         }
4373 
4374         return (spcs_s_ocopyoutf(&kstatus, ulist.status, rc));
4375 }
4376 
4377 /*
4378  * _ii_ostat
4379  *      Breaks the link with the overflow volume.
4380  *
4381  * Calling/Exit State:
4382  *      Returns 0 if the overflow volume was detached. Otherwise an error code
4383  *      is returned and any additional error information is copied
4384  *      out to the user.
4385  *
4386  * Description:
4387  */
4388 
4389 int
4390 _ii_ostat(intptr_t arg, int ilp32, int *rvp, int is_iost_2)
4391 {
4392         dsw_ostat_t ustat;
4393         dsw_ostat32_t ustat32;
4394         _ii_overflow_t *op;
4395         spcs_s_info_t kstatus;
4396 
4397         *rvp = 0;
4398 
4399         if (ilp32) {
4400                 if (copyin((void *)arg, &ustat32, sizeof (ustat32)) < 0)
4401                         return (EFAULT);
4402                 II_TAIL_COPY(ustat, ustat32, overflow_vol, dsw_ostat_t);
4403                 ustat.status = (spcs_s_info_t)ustat32.status;
4404         } else if (copyin((void *)arg, &ustat, sizeof (ustat)) < 0)
4405                 return (EFAULT);
4406 
4407         kstatus = spcs_s_kcreate();
4408         if (kstatus == NULL)
4409                 return (ENOMEM);
4410         if (!ustat.overflow_vol[0])
4411                 return (spcs_s_ocopyoutf(&kstatus, ustat.status, DSW_EEMPTY));
4412 
4413         op = _ii_find_overflow(ustat.overflow_vol);
4414         if (op == NULL)
4415                 return (spcs_s_ocopyoutf(&kstatus, ustat.status,
4416                     DSW_ENOTFOUND));
4417 
4418         ustat.nchunks = op->ii_nchunks;
4419         ustat.used = op->ii_used;
4420         ustat.unused = op->ii_unused;
4421         ustat.drefcnt = op->ii_drefcnt;
4422         ustat.crefcnt = op->ii_crefcnt;
4423         if (is_iost_2) {
4424                 ustat.hversion = op->ii_hversion;
4425                 ustat.flags = op->ii_flags;
4426                 ustat.hmagic = op->ii_hmagic;
4427         }
4428 
4429         spcs_s_kfree(kstatus);
4430         if (ilp32) {
4431                 ustat32.nchunks = ustat.nchunks;
4432                 ustat32.used = ustat.used;
4433                 ustat32.unused = ustat.unused;
4434                 ustat32.drefcnt = ustat.drefcnt;
4435                 ustat32.crefcnt = ustat.crefcnt;
4436                 if (is_iost_2) {
4437                         ustat32.hversion = ustat.hversion;
4438                         ustat32.flags = ustat.flags;
4439                         ustat32.hmagic = ustat.hmagic;
4440                 }
4441                 if (copyout(&ustat32, (void *)arg, sizeof (ustat32)))
4442                         return (EFAULT);
4443         } else {
4444                 if (copyout(&ustat, (void *)arg, sizeof (ustat)))
4445                         return (EFAULT);
4446         }
4447         return (0);
4448 }
4449 
4450 /*
4451  * _ii_move_grp()
4452  *      Move a set from one group to another, possibly creating the new
4453  *      group.
4454  */
4455 
4456 int
4457 _ii_move_grp(intptr_t arg, int ilp32, int *rvp)
4458 {
4459         dsw_movegrp_t umove;
4460         dsw_movegrp32_t umove32;
4461         spcs_s_info_t kstatus;
4462         _ii_info_t *ip;
4463         int rc = 0;
4464         nsc_buf_t *tmp;
4465         ii_header_t *bm_header;
4466 
4467         *rvp = 0;
4468 
4469         if (ilp32) {
4470                 if (copyin((void *)arg, &umove32, sizeof (umove32)) < 0)
4471                         return (EFAULT);
4472                 II_TAIL_COPY(umove, umove32, shadow_vol, dsw_movegrp_t);
4473                 umove.status = (spcs_s_info_t)umove32.status;
4474         } else if (copyin((void *)arg, &umove, sizeof (umove)) < 0)
4475                 return (EFAULT);
4476 
4477         kstatus = spcs_s_kcreate();
4478         if (kstatus == NULL)
4479                 return (ENOMEM);
4480 
4481         if (!umove.shadow_vol[0])
4482                 return (spcs_s_ocopyoutf(&kstatus, umove.status, DSW_EEMPTY));
4483 
4484         mutex_enter(&_ii_info_mutex);
4485         ip = _ii_find_set(umove.shadow_vol);
4486         mutex_exit(&_ii_info_mutex);
4487 
4488         if (!ip)
4489                 return (spcs_s_ocopyoutf(&kstatus, umove.status,
4490                     DSW_ENOTFOUND));
4491 
4492         if (!umove.new_group[0]) {
4493                 /* are we clearing the group association? */
4494                 if (ip->bi_group) {
4495                         DTRACE_PROBE2(_ii_move_grp1, char *, ip->bi_keyname,
4496                             char *, ip->bi_group);
4497                         rc = II_UNLINK_GROUP(ip);
4498                 }
4499         } else if (!ip->bi_group) {
4500                 rc = II_LINK_GROUP(ip, umove.new_group);
4501                 DTRACE_PROBE2(_ii_move_grp2, char *, ip->bi_keyname,
4502                     char *, ip->bi_group);
4503         } else {
4504                 /* remove it from one group and add it to the other */
4505                 DTRACE_PROBE3(_ii_move_grp, char *, ip->bi_keyname,
4506                     char *, ip->bi_group, char *, umove.new_group);
4507                 rc = II_UNLINK_GROUP(ip);
4508                 if (!rc)
4509                         rc = II_LINK_GROUP(ip, umove.new_group);
4510         }
4511 
4512         /* ** BEGIN UPDATE BITMAP HEADER ** */
4513         if ((rc = _ii_rsrv_devs(ip, BMP, II_INTERNAL)) != 0) {
4514                 _ii_ioctl_done(ip);
4515                 mutex_exit(&ip->bi_mutex);
4516                 spcs_s_add(kstatus, rc);
4517                 return (spcs_s_ocopyoutf(&kstatus, umove.status,
4518                     DSW_ERSRVFAIL));
4519         }
4520         bm_header = _ii_bm_header_get(ip, &tmp);
4521         if (bm_header) {
4522                 (void) strncpy(bm_header->group_name, umove.new_group,
4523                     DSW_NAMELEN);
4524                 (void) _ii_bm_header_put(bm_header, ip, tmp);
4525         }
4526         _ii_rlse_devs(ip, BMP);
4527         /* ** END UPDATE BITMAP HEADER ** */
4528 
4529         _ii_ioctl_done(ip);
4530         mutex_exit(&ip->bi_mutex);
4531 
4532         return (spcs_s_ocopyoutf(&kstatus, umove.status, rc));
4533 }
4534 
4535 /*
4536  * _ii_change_tag()
4537  *      Move a set from one group to another, possibly creating the new
4538  *      group.
4539  */
4540 
4541 int
4542 _ii_change_tag(intptr_t arg, int ilp32, int *rvp)
4543 {
4544         dsw_movegrp_t umove;
4545         dsw_movegrp32_t umove32;
4546         spcs_s_info_t kstatus;
4547         _ii_info_t *ip;
4548         int rc = 0;
4549         nsc_buf_t *tmp;
4550         ii_header_t *bm_header;
4551 
4552         *rvp = 0;
4553 
4554         if (ilp32) {
4555                 if (copyin((void *)arg, &umove32, sizeof (umove32)) < 0)
4556                         return (EFAULT);
4557                 II_TAIL_COPY(umove, umove32, shadow_vol, dsw_movegrp_t);
4558                 umove.status = (spcs_s_info_t)umove32.status;
4559         } else if (copyin((void *)arg, &umove, sizeof (umove)) < 0)
4560                 return (EFAULT);
4561 
4562         kstatus = spcs_s_kcreate();
4563         if (kstatus == NULL)
4564                 return (ENOMEM);
4565 
4566         if (!umove.shadow_vol[0])
4567                 return (spcs_s_ocopyoutf(&kstatus, umove.status, DSW_EEMPTY));
4568 
4569         mutex_enter(&_ii_info_mutex);
4570         ip = _ii_find_set(umove.shadow_vol);
4571         mutex_exit(&_ii_info_mutex);
4572 
4573         if (!ip)
4574                 return (spcs_s_ocopyoutf(&kstatus, umove.status,
4575                     DSW_ENOTFOUND));
4576 
4577         if (!umove.new_group[0]) {
4578                 /* are we clearing the group association? */
4579                 if (ip->bi_cluster) {
4580                         DTRACE_PROBE2(_ii_change_tag, char *, ip->bi_keyname,
4581                             char *, ip->bi_cluster);
4582                         rc = II_UNLINK_CLUSTER(ip);
4583                 }
4584         } else if (!ip->bi_cluster) {
4585                 /* are we adding it to a group for the first time? */
4586                 rc = II_LINK_CLUSTER(ip, umove.new_group);
4587                 DTRACE_PROBE2(_ii_change_tag, char *, ip->bi_keyname,
4588                     char *, ip->bi_cluster);
4589         } else {
4590                 /* remove it from one group and add it to the other */
4591                 DTRACE_PROBE3(_ii_change_tag_2, char *, ip->bi_keyname,
4592                     char *, ip->bi_cluster, char *, umove.new_group);
4593                 rc = II_UNLINK_CLUSTER(ip);
4594                 if (!rc)
4595                         rc = II_LINK_CLUSTER(ip, umove.new_group);
4596         }
4597 
4598         /* ** BEGIN UPDATE BITMAP HEADER ** */
4599         if ((rc = _ii_rsrv_devs(ip, BMP, II_INTERNAL)) != 0) {
4600                 _ii_ioctl_done(ip);
4601                 mutex_exit(&ip->bi_mutex);
4602                 spcs_s_add(kstatus, rc);
4603                 return (spcs_s_ocopyoutf(&kstatus, umove.status,
4604                     DSW_ERSRVFAIL));
4605         }
4606         bm_header = _ii_bm_header_get(ip, &tmp);
4607         if (bm_header) {
4608                 (void) strncpy(bm_header->clstr_name, umove.new_group,
4609                     DSW_NAMELEN);
4610                 (void) _ii_bm_header_put(bm_header, ip, tmp);
4611         }
4612         _ii_rlse_devs(ip, BMP);
4613         /* ** END UPDATE BITMAP HEADER ** */
4614 
4615         _ii_ioctl_done(ip);
4616         mutex_exit(&ip->bi_mutex);
4617 
4618         return (spcs_s_ocopyoutf(&kstatus, umove.status, rc));
4619 }
4620 
4621 
4622 /*
4623  * _ii_spcs_s_ocopyoutf()
4624  * Wrapper for spcs_s_ocopyoutf() used by _ii_chk_copy() which permits
4625  * the spcs_s_info_t argument to be NULL. _ii_chk_copy() requires this
4626  * functionality as it is sometimes called by _ii_control_copy() which
4627  * has no user context to copy any errors into. At all other times a NULL
4628  * spcs_s_info_t argument would indicate a bug in the calling function.
4629  */
4630 
4631 static int
4632 _ii_spcs_s_ocopyoutf(spcs_s_info_t *kstatusp, spcs_s_info_t ustatus, int err)
4633 {
4634         if (ustatus)
4635                 return (spcs_s_ocopyoutf(kstatusp, ustatus, err));
4636         spcs_s_kfree(*kstatusp);
4637         return (err);
4638 }
4639 
4640 static int
4641 _ii_chk_copy(_ii_info_t *ip, int flags, spcs_s_info_t *kstatusp, pid_t pid,
4642     spcs_s_info_t ustatus)
4643 {
4644         _ii_info_t *xip;
4645         int rc;
4646         int rtype;
4647 
4648         if ((ip->bi_flags & DSW_COPYINGP) != 0) {
4649                 _ii_ioctl_done(ip);
4650                 mutex_exit(&ip->bi_mutex);
4651                 return (_ii_spcs_s_ocopyoutf(kstatusp, ustatus, DSW_ECOPYING));
4652         }
4653 
4654         if (ip->bi_flags & DSW_OFFLINE) {
4655                 _ii_ioctl_done(ip);
4656                 mutex_exit(&ip->bi_mutex);
4657                 return (_ii_spcs_s_ocopyoutf(kstatusp, ustatus, DSW_EOFFLINE));
4658         }
4659 
4660         if ((ip->bi_flags & (DSW_SHDIMPORT|DSW_SHDEXPORT)) != 0) {
4661                 _ii_ioctl_done(ip);
4662                 mutex_exit(&ip->bi_mutex);
4663                 return (_ii_spcs_s_ocopyoutf(kstatusp, ustatus,
4664                     DSW_EISEXPORTED));
4665         }
4666 
4667         if ((flags & CV_SHD2MST) == CV_SHD2MST) {
4668                 if ((ip->bi_flags & DSW_COPYINGM) != 0) {
4669                                 _ii_ioctl_done(ip);
4670                                 mutex_exit(&ip->bi_mutex);
4671                                 return (_ii_spcs_s_ocopyoutf(kstatusp, ustatus,
4672                                     DSW_ECOPYING));
4673                 }
4674                 /* check if any sibling shadow is copying towards this master */
4675                 for (xip = ip->bi_head; xip; xip = xip->bi_sibling) {
4676                         if (ip != xip && (xip->bi_flags & DSW_COPYINGS) != 0) {
4677                                 _ii_ioctl_done(ip);
4678                                 mutex_exit(&ip->bi_mutex);
4679                                 return (_ii_spcs_s_ocopyoutf(kstatusp, ustatus,
4680                                     DSW_ECOPYING));
4681                         }
4682                 }
4683         }
4684 
4685         if (((flags & CV_SHD2MST) == 0) &&
4686             ((ip->bi_flags & DSW_COPYINGS) != 0)) {
4687                 _ii_ioctl_done(ip);
4688                 mutex_exit(&ip->bi_mutex);
4689                 return (_ii_spcs_s_ocopyoutf(kstatusp, ustatus, DSW_ECOPYING));
4690         }
4691 
4692         if (ip->bi_flags & DSW_TREEMAP) {
4693                 if ((ip->bi_flags & DSW_OVERFLOW) && (flags & CV_SHD2MST)) {
4694                         _ii_ioctl_done(ip);
4695                         mutex_exit(&ip->bi_mutex);
4696                         return (_ii_spcs_s_ocopyoutf(kstatusp, ustatus,
4697                             DSW_EINCOMPLETE));
4698                 }
4699         }
4700 
4701         /* Assure that no other PID owns this copy/update */
4702         if (ip->bi_locked_pid == 0) {
4703                 if (flags & CV_LOCK_PID)
4704                         ip->bi_locked_pid = pid;
4705         } else if (ip->bi_locked_pid != pid) {
4706                 _ii_ioctl_done(ip);
4707                 mutex_exit(&ip->bi_mutex);
4708                 return (spcs_s_ocopyoutf(kstatusp, ustatus, DSW_EINUSE));
4709         }
4710 
4711         mutex_exit(&ip->bi_mutex);
4712 
4713         rtype = MSTR|SHDR|BMP;
4714         if ((rc = _ii_rsrv_devs(ip, rtype, II_INTERNAL)) != 0) {
4715                 mutex_enter(&ip->bi_mutex);
4716                 _ii_ioctl_done(ip);
4717                 mutex_exit(&ip->bi_mutex);
4718                 spcs_s_add(*kstatusp, rc);
4719                 return (_ii_spcs_s_ocopyoutf(kstatusp, ustatus,
4720                     DSW_ERSRVFAIL));
4721         }
4722 
4723         if (ii_update_denied(ip, *kstatusp, flags & CV_SHD2MST, 0)) {
4724                 mutex_enter(&ip->bi_mutex);
4725                 _ii_ioctl_done(ip);
4726                 mutex_exit(&ip->bi_mutex);
4727                 _ii_rlse_devs(ip, rtype);
4728                 return (_ii_spcs_s_ocopyoutf(kstatusp, ustatus,
4729                     DSW_EOPACKAGE));
4730         }
4731 
4732         return (0);
4733 }
4734 
4735 static int
4736 _ii_do_copy(_ii_info_t *ip, int flags, spcs_s_info_t kstatus, int waitflag)
4737 {
4738         int rc = 0;
4739         int rtype = MSTR|SHDR|BMP;
4740         _ii_overflow_t *op;
4741         int quick_update = 0;
4742 
4743         waitflag = (waitflag != 0);
4744         /*
4745          * a copy of a tree-mapped device must be downgraded to
4746          * an update.
4747          */
4748         if (ip->bi_flags & DSW_TREEMAP)
4749                 flags |= CV_BMP_ONLY;
4750 
4751         /*
4752          * If we want to update the dependent shadow we only need to zero
4753          * the shadow bitmap.
4754          */
4755 
4756         if (((ip->bi_flags & DSW_GOLDEN) == 0) &&
4757             (flags & (CV_BMP_ONLY|CV_SHD2MST)) == CV_BMP_ONLY) {
4758 
4759                 DTRACE_PROBE(DEPENDENT);
4760 
4761                 /* assign updating time */
4762                 ip->bi_mtime = ddi_get_time();
4763 
4764                 if (ip->bi_flags & DSW_TREEMAP) {
4765                         DTRACE_PROBE(COMPACT_DEPENDENT);
4766 
4767                         if (ip->bi_overflow &&
4768                             (ip->bi_overflow->ii_flags & IIO_VOL_UPDATE) == 0) {
4769                                 /* attempt to do a quick update */
4770                                 quick_update = 1;
4771                                 ip->bi_overflow->ii_flags |= IIO_VOL_UPDATE;
4772                                 ip->bi_overflow->ii_detachcnt = 1;
4773                         }
4774 
4775                         rc = ii_tinit(ip);
4776 
4777                         if (quick_update && ip->bi_overflow) {
4778                                 /* clean up */
4779                                 ip->bi_overflow->ii_flags &= ~(IIO_VOL_UPDATE);
4780                                 ip->bi_overflow->ii_detachcnt = 0;
4781                         }
4782                 }
4783 
4784                 if (rc == 0)
4785                         rc = II_ZEROBM(ip);     /* update copy of shadow */
4786                 if (((op = ip->bi_overflow) != NULL) &&
4787                     (op->ii_hversion >= 1) && (op->ii_hmagic == II_OMAGIC)) {
4788                         mutex_enter(&_ii_overflow_mutex);
4789                         if (ip->bi_flags & DSW_OVRHDRDRTY) {
4790                                 mutex_enter(&ip->bi_mutex);
4791                                 ip->bi_flags &= ~DSW_OVRHDRDRTY;
4792                                 mutex_exit(&ip->bi_mutex);
4793                                 ASSERT(op->ii_urefcnt > 0);
4794                                 op->ii_urefcnt--;
4795                         }
4796                         if (op->ii_urefcnt == 0) {
4797                                 op->ii_flags &= ~IIO_CNTR_INVLD;
4798                                 op->ii_unused = op->ii_nchunks - 1;
4799                         }
4800                         mutex_exit(&_ii_overflow_mutex);
4801                 }
4802                 mutex_enter(&ip->bi_mutex);
4803                 II_FLAG_CLR(DSW_OVERFLOW, ip);
4804                 mutex_exit(&ip->bi_mutex);
4805 
4806                 _ii_unlock_chunk(ip, II_NULLCHUNK);
4807                 mutex_enter(&ip->bi_mutex);
4808                 _ii_ioctl_done(ip);
4809                 mutex_exit(&ip->bi_mutex);
4810                 _ii_rlse_devs(ip, rtype);
4811                 if (rc) {
4812                         spcs_s_add(kstatus, rc);
4813                         return (DSW_EIO);
4814                 } else {
4815                         DTRACE_PROBE(_ii_do_copy_end);
4816                         return (0);
4817                 }
4818         }
4819 
4820         /*
4821          * need to perform an actual copy.
4822          */
4823 
4824         /*
4825          * Perform bitmap copy if asked or from dependent shadow to master.
4826          */
4827         if ((flags & CV_BMP_ONLY) ||
4828             ((flags & CV_SHD2MST) &&
4829             ((ip->bi_flags & DSW_GOLDEN) == 0))) {
4830                 DTRACE_PROBE(INDEPENDENT_fast);
4831                 rc = II_ORBM(ip);               /* save shadow bits for copy */
4832         } else {
4833                 DTRACE_PROBE(INDEPENDENT_slow);
4834                 rc = ii_fill_copy_bmp(ip); /* set bits for independent copy */
4835         }
4836         if (rc == 0)
4837                 rc = II_ZEROBM(ip);
4838         _ii_unlock_chunk(ip, II_NULLCHUNK);
4839         if (rc == 0) {
4840                 mutex_enter(&ip->bi_mutex);
4841                 if (ip->bi_flags & (DSW_COPYINGP | DSW_SHDEXPORT)) {
4842                         rc = (ip->bi_flags & DSW_COPYINGP)
4843                             ? DSW_ECOPYING : DSW_EISEXPORTED;
4844 
4845                         _ii_ioctl_done(ip);
4846                         mutex_exit(&ip->bi_mutex);
4847                         _ii_rlse_devs(ip, rtype);
4848                         return (rc);
4849                 }
4850 
4851                 /* assign copying time */
4852                 ip->bi_mtime = ddi_get_time();
4853 
4854                 if (flags & CV_SHD2MST)
4855                         II_FLAG_SET(DSW_COPYINGS | DSW_COPYINGP, ip);
4856                 else
4857                         II_FLAG_SET(DSW_COPYINGM | DSW_COPYINGP, ip);
4858                 mutex_exit(&ip->bi_mutex);
4859                 rc = _ii_copyvol(ip, (flags & CV_SHD2MST),
4860                     rtype, kstatus, waitflag);
4861         } else {
4862                 mutex_enter(&ip->bi_mutex);
4863                 _ii_ioctl_done(ip);
4864                 mutex_exit(&ip->bi_mutex);
4865         }
4866 
4867         if (waitflag)
4868                 _ii_rlse_devs(ip, rtype);
4869 
4870         return (rc);
4871 }
4872 
4873 /*
4874  * _ii_copy
4875  *      Copy or update (take snapshot) II volume.
4876  *
4877  * Calling/Exit State:
4878  *      Returns 0 if the operation succeeded. Otherwise an error code
4879  *      is returned and any additional error information is copied
4880  *      out to the user.
4881  */
4882 
4883 int
4884 _ii_copy(intptr_t arg, int ilp32, int *rvp)
4885 {
4886         dsw_ioctl_t ucopy;
4887         dsw_ioctl32_t ucopy32;
4888         _ii_info_t *ip;
4889         int rc = 0;
4890         spcs_s_info_t kstatus;
4891 
4892         *rvp = 0;
4893 
4894         if (ilp32) {
4895                 if (copyin((void *)arg, &ucopy32, sizeof (ucopy32)) < 0)
4896                         return (EFAULT);
4897                 II_TAIL_COPY(ucopy, ucopy32, shadow_vol, dsw_ioctl_t);
4898                 ucopy.status = (spcs_s_info_t)ucopy32.status;
4899         } else if (copyin((void *)arg, &ucopy, sizeof (ucopy)) < 0)
4900                 return (EFAULT);
4901 
4902         kstatus = spcs_s_kcreate();
4903         if (kstatus == NULL)
4904                 return (ENOMEM);
4905 
4906         if (!ucopy.shadow_vol[0])
4907                 return (spcs_s_ocopyoutf(&kstatus, ucopy.status, DSW_EEMPTY));
4908 
4909         mutex_enter(&_ii_info_mutex);
4910         ip = _ii_find_set(ucopy.shadow_vol);
4911         mutex_exit(&_ii_info_mutex);
4912         if (ip == NULL)
4913                 return (spcs_s_ocopyoutf(&kstatus, ucopy.status,
4914                     DSW_ENOTFOUND));
4915 
4916         /* Check that the copy/update makes sense */
4917         if ((rc = _ii_chk_copy(ip, ucopy.flags, &kstatus, ucopy.pid,
4918             ucopy.status)) == 0) {
4919                 /* perform the copy */
4920                 _ii_lock_chunk(ip, II_NULLCHUNK);
4921                 /* _ii_do_copy() calls _ii_ioctl_done() */
4922                 rc = _ii_do_copy(ip, ucopy.flags, kstatus, 1);
4923                 return (spcs_s_ocopyoutf(&kstatus, ucopy.status, rc));
4924         }
4925 
4926         return (rc);
4927 }
4928 
4929 /*
4930  * _ii_mass_copy
4931  * Copies/updates the sets pointed to in the ipa array.
4932  *
4933  * Calling/Exit State:
4934  * Returns 0 if the operations was successful.  Otherwise an
4935  * error code.
4936  */
4937 int
4938 _ii_mass_copy(_ii_info_t **ipa, dsw_aioctl_t *ucopy, int wait)
4939 {
4940         int i;
4941         int rc = 0;
4942         int failed;
4943         int rtype = MSTR|SHDR|BMP;
4944         _ii_info_t *ip;
4945         spcs_s_info_t kstatus;
4946 
4947         kstatus = spcs_s_kcreate();
4948         if (kstatus == NULL)
4949                 return (ENOMEM);
4950 
4951         /* Check copy validitity */
4952         for (i = 0; i < ucopy->count; i++) {
4953                 ip = ipa[i];
4954 
4955                 rc = _ii_chk_copy(ip, ucopy->flags, &kstatus, ucopy->pid,
4956                     ucopy->status);
4957 
4958                 if (rc) {
4959                         /* Clean up the mess */
4960 
4961                         DTRACE_PROBE1(_ii_mass_copy_end1, int, rc);
4962 
4963                         /*
4964                          * The array ipa now looks like:
4965                          *    0..(i-1): needs mutex_enter/ioctl_done/mutex_exit
4966                          *    i: needs nothing (_ii_chk_copy does cleanup)
4967                          *    (i+1)..n: needs just ioctl_done/mutex_exit
4968                          */
4969 
4970                         failed = i;
4971 
4972                         for (i = 0; i < failed; i++) {
4973                                 mutex_enter(&(ipa[i]->bi_mutex));
4974                                 _ii_ioctl_done(ipa[i]);
4975                                 mutex_exit(&(ipa[i]->bi_mutex));
4976                                 _ii_rlse_devs(ipa[i], rtype);
4977                         }
4978 
4979                         /* skip 'failed', start with failed + 1 */
4980 
4981                         for (i = failed + 1; i < ucopy->count; i++) {
4982                                 _ii_ioctl_done(ipa[i]);
4983                                 mutex_exit(&(ipa[i]->bi_mutex));
4984                         }
4985 
4986                         return (rc);
4987                 }
4988         }
4989 
4990         /* Check for duplicate shadows in same II group */
4991         if (ucopy->flags & CV_SHD2MST) {
4992                 /* Reset the state of all masters */
4993                 for (i = 0; i < ucopy->count; i++) {
4994                         ip = ipa[i];
4995                         ip->bi_master->bi_state &= ~DSW_MSTTARGET;
4996                 }
4997 
4998                 for (i = 0; i < ucopy->count; i++) {
4999                         ip = ipa[i];
5000                         /*
5001                          * Check the state of the master.  If DSW_MSTTARGET is
5002                          * set, it's because this master is attached to another
5003                          * shadow within this set.
5004                          */
5005                         if (ip->bi_master->bi_state & DSW_MSTTARGET) {
5006                                 rc = EINVAL;
5007                                 break;
5008                         }
5009 
5010                         /*
5011                          * Set the DSW_MSTTARGET bit on the master associated
5012                          * with this shadow.  This will allow us to detect
5013                          * multiple shadows pointing to this master within
5014                          * this loop.
5015                          */
5016                         ip->bi_master->bi_state |= DSW_MSTTARGET;
5017                 }
5018         }
5019 
5020         /* Handle error */
5021         if (rc) {
5022                 DTRACE_PROBE1(_ii_mass_copy_end2, int, rc);
5023                 for (i = 0; i < ucopy->count; i++) {
5024                         ip = ipa[i];
5025 
5026                         _ii_rlse_devs(ip, rtype);
5027 
5028                         mutex_enter(&ip->bi_mutex);
5029                         _ii_ioctl_done(ip);
5030                         mutex_exit(&ip->bi_mutex);
5031                 }
5032 
5033                 return (spcs_s_ocopyoutf(&kstatus, ucopy->status, rc));
5034         }
5035 
5036         /* Lock bitmaps & prepare counts */
5037         for (i = 0; i < ucopy->count; i++) {
5038                 ip = ipa[i];
5039                 _ii_lock_chunk(ip, II_NULLCHUNK);
5040                 if (ip->bi_overflow) {
5041                         ip->bi_overflow->ii_detachcnt = 0;
5042                 }
5043         }
5044 
5045         /* determine which volumes we're dealing with */
5046         for (i = 0; i < ucopy->count; i++) {
5047                 ip = ipa[i];
5048                 if (ip->bi_overflow) {
5049                         ip->bi_overflow->ii_flags |= IIO_VOL_UPDATE;
5050                         if ((ucopy->flags & (CV_BMP_ONLY|CV_SHD2MST)) ==
5051                             CV_BMP_ONLY) {
5052                                 ++ip->bi_overflow->ii_detachcnt;
5053                         }
5054                 }
5055         }
5056 
5057         /* Perform copy */
5058         for (i = 0; i < ucopy->count; i++) {
5059                 ip = ipa[i];
5060                 rc = _ii_do_copy(ip, ucopy->flags, kstatus, wait);
5061                 /* Hum... what to do if one of these fails? */
5062         }
5063 
5064         /* clear out flags so as to prevent any accidental reuse */
5065         for (i = 0; i < ucopy->count; i++) {
5066                 ip = ipa[i];
5067                 if (ip->bi_overflow)
5068                         ip->bi_overflow->ii_flags &= ~(IIO_VOL_UPDATE);
5069         }
5070 
5071         /*
5072          * We can only clean up the kstatus structure if there are
5073          * no waiters.  If someone's waiting for the information,
5074          * _ii_copyvolp() uses spcs_s_add to write to kstatus.  Panic
5075          * would ensue if we freed it up now.
5076          */
5077         if (!wait)
5078                 rc = spcs_s_ocopyoutf(&kstatus, ucopy->status, rc);
5079 
5080         return (rc);
5081 }
5082 
5083 /*
5084  * _ii_list_copy
5085  * Retrieve a list from a character array and use _ii_mass_copy to
5086  * initiate a copy/update operation on all of the specified sets.
5087  *
5088  * Calling/Exit State:
5089  * Returns 0 if the operations was successful.  Otherwise an
5090  * error code.
5091  */
5092 int
5093 _ii_list_copy(char *list, dsw_aioctl_t *ucopy, int wait)
5094 {
5095         int i;
5096         int rc = 0;
5097         char *name;
5098         _ii_info_t *ip;
5099         _ii_info_t **ipa;
5100 
5101         ipa = kmem_zalloc(sizeof (_ii_info_t *) * ucopy->count, KM_SLEEP);
5102 
5103         /* Reserve devices */
5104         name = list;
5105         mutex_enter(&_ii_info_mutex);
5106         for (i = 0; i < ucopy->count; i++, name += DSW_NAMELEN) {
5107                 ip = _ii_find_set(name);
5108 
5109                 if (ip == NULL) {
5110                         rc = DSW_ENOTFOUND;
5111                         break;
5112                 }
5113 
5114                 ipa[i] = ip;
5115         }
5116 
5117         if (rc != 0) {
5118                 /* Failed to find all sets, release those we do have */
5119                 while (i-- > 0) {
5120                         ip = ipa[i];
5121                         mutex_enter(&ip->bi_mutex);
5122                         _ii_ioctl_done(ip);
5123                         mutex_exit(&ip->bi_mutex);
5124                 }
5125         } else {
5126                 /* Begin copy operation */
5127                 rc = _ii_mass_copy(ipa, ucopy, wait);
5128         }
5129 
5130         mutex_exit(&_ii_info_mutex);
5131 
5132         kmem_free(ipa, sizeof (_ii_info_t *) * ucopy->count);
5133 
5134         return (rc);
5135 }
5136 
5137 /*
5138  * _ii_group_copy
5139  * Retrieve list of sets in a group and use _ii_mass_copy to initiate
5140  * a copy/update of all of them.
5141  *
5142  * Calling/Exit State:
5143  * Returns 0 if the operations was successful.  Otherwise an
5144  * error code.
5145  */
5146 int
5147 _ii_group_copy(char *name, dsw_aioctl_t *ucopy, int wait)
5148 {
5149         int             i;
5150         int             rc;
5151         uint64_t        hash;
5152         _ii_info_t      **ipa;
5153         _ii_lsthead_t   *head;
5154         _ii_lstinfo_t   *np;
5155 
5156         /* find group */
5157         hash = nsc_strhash(name);
5158 
5159         mutex_enter(&_ii_group_mutex);
5160 
5161         for (head = _ii_group_top; head; head = head->lst_next) {
5162                 if (hash == head->lst_hash && strncmp(head->lst_name,
5163                     name, DSW_NAMELEN) == 0)
5164                         break;
5165         }
5166 
5167         if (!head) {
5168                 mutex_exit(&_ii_group_mutex);
5169                 DTRACE_PROBE(_ii_group_copy);
5170                 return (DSW_EGNOTFOUND);
5171         }
5172 
5173         /* Count entries */
5174         for (ucopy->count = 0, np = head->lst_start; np; np = np->lst_next)
5175                 ++ucopy->count;
5176 
5177         if (ucopy->count == 0) {
5178                 mutex_exit(&_ii_group_mutex);
5179                 return (DSW_EGNOTFOUND);
5180         }
5181 
5182         ipa = kmem_zalloc(sizeof (_ii_info_t *) * ucopy->count, KM_SLEEP);
5183         if (ipa == NULL) {
5184                 mutex_exit(&_ii_group_mutex);
5185                 return (ENOMEM);
5186         }
5187 
5188         /* Create list */
5189         mutex_enter(&_ii_info_mutex);
5190         np = head->lst_start;
5191         for (i = 0; i < ucopy->count; i++) {
5192                 ASSERT(np != 0);
5193 
5194                 ipa[i] = np->lst_ip;
5195 
5196                 mutex_enter(&ipa[i]->bi_mutex);
5197                 ipa[i]->bi_ioctl++;
5198 
5199                 np = np->lst_next;
5200         }
5201 
5202         /* Begin copy operation */
5203         rc = _ii_mass_copy(ipa, ucopy, wait);
5204 
5205         mutex_exit(&_ii_info_mutex);
5206         mutex_exit(&_ii_group_mutex);
5207 
5208         kmem_free(ipa, sizeof (_ii_info_t *) * ucopy->count);
5209 
5210         return (rc);
5211 }
5212 
5213 /*
5214  * _ii_acopy
5215  *      Copy or update (take snapshot) II multiple volumes.
5216  *
5217  * Calling/Exit State:
5218  *      Returns 0 if the operation succeeded. Otherwise an error code
5219  *      is returned and any additional error information is copied
5220  *      out to the user.
5221  */
5222 int
5223 _ii_acopy(intptr_t arg, int ilp32, int *rvp)
5224 {
5225         int rc;
5226         size_t name_offset;
5227         char *list;
5228         char *nptr;
5229         char name[DSW_NAMELEN];
5230         dsw_aioctl_t ucopy;
5231         dsw_aioctl32_t ucopy32;
5232         spcs_s_info_t kstatus;
5233 
5234         *rvp = 0;
5235 
5236         name_offset = offsetof(dsw_aioctl_t, shadow_vol[0]);
5237 
5238         if (ilp32) {
5239                 if (copyin((void *)arg, &ucopy32, sizeof (ucopy32)) < 0)
5240                         return (EFAULT);
5241                 II_TAIL_COPY(ucopy, ucopy32, flags, dsw_ioctl_t);
5242                 ucopy.status = (spcs_s_info_t)ucopy32.status;
5243                 name_offset = offsetof(dsw_aioctl32_t, shadow_vol[0]);
5244         } else if (copyin((void *)arg, &ucopy, sizeof (ucopy)) < 0)
5245                 return (EFAULT);
5246 
5247         kstatus = spcs_s_kcreate();
5248 
5249         if (kstatus == NULL)
5250                 return (ENOMEM);
5251 
5252         nptr = (char *)arg + name_offset;
5253         rc = 0;
5254 
5255         if (ucopy.flags & CV_IS_GROUP) {
5256                 if (copyin(nptr, name, DSW_NAMELEN) < 0)
5257                         return (spcs_s_ocopyoutf(&kstatus, ucopy.status,
5258                             EFAULT));
5259 
5260                 /* kstatus information is handled within _ii_group_copy */
5261                 rc = _ii_group_copy(name, &ucopy, 0);
5262         } else if (ucopy.count > 0) {
5263                 list = kmem_alloc(DSW_NAMELEN * ucopy.count, KM_SLEEP);
5264 
5265                 if (list == NULL)
5266                         return (spcs_s_ocopyoutf(&kstatus, ucopy.status,
5267                             ENOMEM));
5268 
5269                 if (copyin(nptr, list, DSW_NAMELEN * ucopy.count) < 0)
5270                         return (spcs_s_ocopyoutf(&kstatus, ucopy.status,
5271                             EFAULT));
5272 
5273                 rc = _ii_list_copy(list, &ucopy, 0);
5274                 kmem_free(list, DSW_NAMELEN * ucopy.count);
5275         }
5276 
5277         return (spcs_s_ocopyoutf(&kstatus, ucopy.status, rc));
5278 }
5279 
5280 /*
5281  * _ii_bitsset
5282  *      Copy out II pair bitmaps to user program
5283  *
5284  * Calling/Exit State:
5285  *      Returns 0 if the operation succeeded. Otherwise an error code
5286  *      is returned and any additional error information is copied
5287  *      out to the user.
5288  */
5289 int
5290 _ii_bitsset(intptr_t arg, int ilp32, int cmd, int *rvp)
5291 {
5292         dsw_bitsset_t ubitsset;
5293         dsw_bitsset32_t ubitsset32;
5294         nsc_size_t nbitsset;
5295         _ii_info_t *ip;
5296         int rc;
5297         spcs_s_info_t kstatus;
5298         int bitmap_size;
5299 
5300         *rvp = 0;
5301 
5302         if (ilp32) {
5303                 if (copyin((void *)arg, &ubitsset32, sizeof (ubitsset32)))
5304                         return (EFAULT);
5305                 ubitsset.status = (spcs_s_info_t)ubitsset32.status;
5306                 bcopy(ubitsset32.shadow_vol, ubitsset.shadow_vol, DSW_NAMELEN);
5307         } else if (copyin((void *)arg, &ubitsset, sizeof (ubitsset)))
5308                 return (EFAULT);
5309 
5310         kstatus = spcs_s_kcreate();
5311         if (kstatus == NULL)
5312                 return (ENOMEM);
5313 
5314         if (!ubitsset.shadow_vol[0])
5315                 return (spcs_s_ocopyoutf(&kstatus, ubitsset.status,
5316                     DSW_EEMPTY));
5317 
5318         mutex_enter(&_ii_info_mutex);
5319         ip = _ii_find_set(ubitsset.shadow_vol);
5320         mutex_exit(&_ii_info_mutex);
5321         if (ip == NULL)
5322                 return (spcs_s_ocopyoutf(&kstatus, ubitsset.status,
5323                     DSW_ENOTFOUND));
5324 
5325         mutex_exit(&ip->bi_mutex);
5326 
5327         if ((rc = _ii_rsrv_devs(ip, BMP, II_INTERNAL)) != 0) {
5328                 mutex_enter(&ip->bi_mutex);
5329                 _ii_ioctl_done(ip);
5330                 mutex_exit(&ip->bi_mutex);
5331                 spcs_s_add(kstatus, rc);
5332                 return (spcs_s_ocopyoutf(&kstatus, ubitsset.status,
5333                     DSW_ERSRVFAIL));
5334         }
5335 
5336         ubitsset.tot_size = ip->bi_size / DSW_SIZE;
5337         if ((ip->bi_size % DSW_SIZE) != 0)
5338                 ++ubitsset.tot_size;
5339         bitmap_size = (ubitsset.tot_size + 7) / 8;
5340         if (cmd == DSWIOC_SBITSSET)
5341                 rc = II_CNT_BITS(ip, ip->bi_shdfba, &nbitsset, bitmap_size);
5342         else
5343                 rc = II_CNT_BITS(ip, ip->bi_copyfba, &nbitsset, bitmap_size);
5344         ubitsset.tot_set = nbitsset;
5345         _ii_rlse_devs(ip, BMP);
5346         mutex_enter(&ip->bi_mutex);
5347         _ii_ioctl_done(ip);
5348         mutex_exit(&ip->bi_mutex);
5349         if (rc) {
5350                 spcs_s_add(kstatus, rc);
5351                 return (spcs_s_ocopyoutf(&kstatus, ubitsset.status, DSW_EIO));
5352         }
5353 
5354         spcs_s_kfree(kstatus);
5355         /* return the fetched names to the user */
5356         if (ilp32) {
5357                 ubitsset32.status = (spcs_s_info32_t)ubitsset.status;
5358                 ubitsset32.tot_size = ubitsset.tot_size;
5359                 ubitsset32.tot_set = ubitsset.tot_set;
5360                 rc = copyout(&ubitsset32, (void *)arg, sizeof (ubitsset32));
5361         } else {
5362                 rc = copyout(&ubitsset, (void *)arg, sizeof (ubitsset));
5363         }
5364 
5365         return (rc);
5366 }
5367 
5368 /*
5369  * _ii_stopvol
5370  *      Stop any copying process for shadow, and stop shadowing
5371  *
5372  */
5373 
5374 static void
5375 _ii_stopvol(_ii_info_t *ip)
5376 {
5377         nsc_path_t *mst_tok;
5378         nsc_path_t *mstr_tok;
5379         nsc_path_t *shd_tok;
5380         nsc_path_t *shdr_tok;
5381         nsc_path_t *bmp_tok;
5382         int rc;
5383 
5384         while (_ii_stopcopy(ip) == EINTR)
5385                 ;
5386 
5387         DTRACE_PROBE(_ii_stopvol);
5388 
5389         mutex_enter(&ip->bi_mutex);
5390         mst_tok = ip->bi_mst_tok;
5391         mstr_tok = ip->bi_mstr_tok;
5392         shd_tok = ip->bi_shd_tok;
5393         shdr_tok = ip->bi_shdr_tok;
5394         bmp_tok = ip->bi_bmp_tok;
5395         ip->bi_shd_tok = 0;
5396         ip->bi_shdr_tok = 0;
5397         if (!NSHADOWS(ip)) {
5398                 ip->bi_mst_tok = 0;
5399                 ip->bi_mstr_tok = 0;
5400         }
5401         ip->bi_bmp_tok = 0;
5402 
5403         /* Wait for any _ii_open() calls to complete */
5404 
5405         while (ip->bi_ioctl) {
5406                 ip->bi_state |= DSW_IOCTL;
5407                 cv_wait(&ip->bi_ioctlcv, &ip->bi_mutex);
5408         }
5409         mutex_exit(&ip->bi_mutex);
5410 
5411         rc = _ii_reserve_begin(ip);
5412         if (rc) {
5413                 cmn_err(CE_WARN, "!_ii_stopvol: _ii_reserve_begin %d", rc);
5414         }
5415         if (!NSHADOWS(ip)) {
5416                 if (mst_tok) {
5417                         rc = _ii_unregister_path(mst_tok, NSC_PCATCH,
5418                             "master");
5419                         if (rc)
5420                                 cmn_err(CE_WARN, "!ii: unregister master %d",
5421                                     rc);
5422                 }
5423 
5424                 if (mstr_tok) {
5425                         rc = _ii_unregister_path(mstr_tok, NSC_PCATCH,
5426                             "raw master");
5427                         if (rc)
5428                                 cmn_err(CE_WARN, "!ii: unregister raw "
5429                                     "master %d", rc);
5430                 }
5431         }
5432 
5433         if (shd_tok) {
5434                 rc = _ii_unregister_path(shd_tok, NSC_PCATCH, "shadow");
5435                 if (rc)
5436                         cmn_err(CE_WARN, "!ii: unregister shadow %d", rc);
5437         }
5438 
5439         if (shdr_tok) {
5440                 rc = _ii_unregister_path(shdr_tok, NSC_PCATCH, "raw shadow");
5441                 if (rc)
5442                         cmn_err(CE_WARN, "!ii: unregister raw shadow %d", rc);
5443         }
5444 
5445         if (bmp_tok) {
5446                 rc = _ii_unregister_path(bmp_tok, NSC_PCATCH, "bitmap");
5447                 if (rc)
5448                         cmn_err(CE_WARN, "!ii: unregister bitmap %d", rc);
5449         }
5450         _ii_reserve_end(ip);
5451 
5452         /* Wait for all necessary _ii_close() calls to complete */
5453         mutex_enter(&ip->bi_mutex);
5454 
5455         while (total_ref(ip) != 0) {
5456                 ip->bi_state |= DSW_CLOSING;
5457                 cv_wait(&ip->bi_closingcv, &ip->bi_mutex);
5458         }
5459         if (!NSHADOWS(ip)) {
5460                 nsc_set_owner(ip->bi_mstfd, NULL);
5461                 nsc_set_owner(ip->bi_mstrfd, NULL);
5462         }
5463         nsc_set_owner(ip->bi_shdfd, NULL);
5464         nsc_set_owner(ip->bi_shdrfd, NULL);
5465         mutex_exit(&ip->bi_mutex);
5466 
5467 }
5468 
5469 
5470 /*
5471  * _ii_ioctl_done
5472  *      If this is the last one to complete, wakeup all processes waiting
5473  *      for ioctls to complete
5474  *
5475  */
5476 
5477 static void
5478 _ii_ioctl_done(_ii_info_t *ip)
5479 {
5480         ASSERT(ip->bi_ioctl > 0);
5481         ip->bi_ioctl--;
5482         if (ip->bi_ioctl == 0 && (ip->bi_state & DSW_IOCTL)) {
5483                 ip->bi_state &= ~DSW_IOCTL;
5484                 cv_broadcast(&ip->bi_ioctlcv);
5485         }
5486 
5487 }
5488 
5489 /*
5490  * _ii_find_vol
5491  *      Search the configured shadows list for the supplied volume.
5492  *      If found, flag an ioctl in progress and return the locked _ii_info_t.
5493  *
5494  *      The caller must check to see if the bi_disable flag is set and
5495  *      treat it appropriately.
5496  *
5497  * ASSUMPTION:
5498  *      _ii_info_mutex must be locked prior to calling this function
5499  *
5500  */
5501 
5502 static _ii_info_t *
5503 _ii_find_vol(char *volume, int vol)
5504 {
5505         _ii_info_t **xip, *ip;
5506 
5507         for (xip = &_ii_info_top; *xip; xip = &(*xip)->bi_next) {
5508                 if ((*xip)->bi_disabled)
5509                         continue;
5510                 if (strcmp(volume, vol == MST ? ii_pathname((*xip)->bi_mstfd) :
5511                     (*xip)->bi_keyname) == 0) {
5512                         break;
5513                 }
5514         }
5515 
5516         if (!*xip) {
5517                 DTRACE_PROBE(VolNotFound);
5518                 return (NULL);
5519         }
5520 
5521         ip = *xip;
5522         if (!ip->bi_shd_tok && ((ip->bi_flags & DSW_SHDEXPORT) == 0)) {
5523                 /* Not fully configured until bi_shd_tok is set */
5524                 DTRACE_PROBE(SetNotConfiged);
5525                 return (NULL);
5526 
5527         }
5528         mutex_enter(&ip->bi_mutex);
5529         ip->bi_ioctl++;
5530 
5531         return (ip);
5532 }
5533 
5534 static _ii_info_t *
5535 _ii_find_set(char *volume)
5536 {
5537         return (_ii_find_vol(volume, SHD));
5538 }
5539 
5540 /*
5541  * _ii_find_overflow
5542  *      Search the configured shadows list for the supplied overflow volume.
5543  *
5544  */
5545 
5546 static _ii_overflow_t *
5547 _ii_find_overflow(char *volume)
5548 {
5549         _ii_overflow_t **xop, *op;
5550 
5551         mutex_enter(&_ii_overflow_mutex);
5552 
5553         DTRACE_PROBE(_ii_find_overflowmutex);
5554 
5555         for (xop = &_ii_overflow_top; *xop; xop = &(*xop)->ii_next) {
5556                 if (strcmp(volume, (*xop)->ii_volname) == 0) {
5557                         break;
5558                 }
5559         }
5560 
5561         if (!*xop) {
5562                 mutex_exit(&_ii_overflow_mutex);
5563                 return (NULL);
5564         }
5565 
5566         op = *xop;
5567         mutex_exit(&_ii_overflow_mutex);
5568 
5569         return (op);
5570 }
5571 
5572 /*
5573  * _ii_bm_header_get
5574  *      Fetch the bitmap volume header
5575  *
5576  */
5577 
5578 ii_header_t *
5579 _ii_bm_header_get(_ii_info_t *ip, nsc_buf_t **tmp)
5580 {
5581         ii_header_t *hdr;
5582         nsc_off_t read_fba;
5583         int rc;
5584 
5585         ASSERT(ip->bi_bmprsrv);              /* assert bitmap is reserved */
5586         ASSERT(MUTEX_HELD(&ip->bi_mutex));
5587 
5588         if ((ip->bi_flags & DSW_BMPOFFLINE) != 0)
5589                 return (NULL);
5590 
5591         *tmp = NULL;
5592         read_fba = 0;
5593 
5594         II_READ_START(ip, bitmap);
5595         rc = nsc_alloc_buf(ip->bi_bmpfd, read_fba,
5596             FBA_LEN(sizeof (ii_header_t)), NSC_RDWRBUF, tmp);
5597         II_READ_END(ip, bitmap, rc, FBA_LEN(sizeof (ii_header_t)));
5598         if (!II_SUCCESS(rc)) {
5599                 if (ii_debug > 2)
5600                         cmn_err(CE_WARN, "!ii: nsc_alloc_buf returned 0x%x",
5601                             rc);
5602                 if (*tmp)
5603                         (void) nsc_free_buf(*tmp);
5604                 *tmp = NULL;
5605                 mutex_exit(&ip->bi_mutex);
5606                 _ii_error(ip, DSW_BMPOFFLINE);
5607                 mutex_enter(&ip->bi_mutex);
5608                 return (NULL);
5609         }
5610 
5611         hdr = (ii_header_t *)(*tmp)->sb_vec[0].sv_addr;
5612 
5613         return (hdr);
5614 }
5615 
5616 
5617 /*
5618  * _ii_bm_header_free
5619  *      Free the bitmap volume header
5620  *
5621  */
5622 
5623 /* ARGSUSED */
5624 
5625 void
5626 _ii_bm_header_free(ii_header_t *hdr, _ii_info_t *ip, nsc_buf_t *tmp)
5627 {
5628         (void) nsc_free_buf(tmp);
5629 
5630 }
5631 
5632 /*
5633  * _ii_bm_header_put
5634  *      Write out the modified bitmap volume header and free it
5635  *
5636  */
5637 
5638 /* ARGSUSED */
5639 
5640 int
5641 _ii_bm_header_put(ii_header_t *hdr, _ii_info_t *ip, nsc_buf_t *tmp)
5642 {
5643         nsc_off_t write_fba;
5644         int rc;
5645 
5646         ASSERT(MUTEX_HELD(&ip->bi_mutex));
5647 
5648         write_fba = 0;
5649 
5650         II_NSC_WRITE(ip, bitmap, rc, tmp, write_fba,
5651             FBA_LEN(sizeof (ii_header_t)), 0);
5652 
5653         (void) nsc_free_buf(tmp);
5654         if (!II_SUCCESS(rc)) {
5655                 mutex_exit(&ip->bi_mutex);
5656                 _ii_error(ip, DSW_BMPOFFLINE);
5657                 mutex_enter(&ip->bi_mutex);
5658                 DTRACE_PROBE(_ii_bm_header_put);
5659                 return (rc);
5660         } else {
5661                 DTRACE_PROBE(_ii_bm_header_put_end);
5662                 return (0);
5663         }
5664 }
5665 
5666 /*
5667  * _ii_flag_op
5668  *      Clear or set a flag in bi_flags and dsw_state.
5669  *      This relies on the ownership of the header block's nsc_buf
5670  *      for locking.
5671  *
5672  */
5673 
5674 void
5675 _ii_flag_op(and, or, ip, update)
5676 int     and, or;
5677 _ii_info_t *ip;
5678 int update;
5679 {
5680         ii_header_t *bm_header;
5681         nsc_buf_t *tmp;
5682 
5683         ip->bi_flags &= and;
5684         ip->bi_flags |= or;
5685 
5686         if (update == TRUE) {
5687 
5688                 /*
5689                  * No point trying to access bitmap header if it's offline
5690                  * or has been disassociated from set via DSW_HANGING
5691                  */
5692                 if ((ip->bi_flags & (DSW_BMPOFFLINE|DSW_HANGING)) == 0) {
5693                         bm_header = _ii_bm_header_get(ip, &tmp);
5694                         if (bm_header == NULL) {
5695                                 if (tmp)
5696                                         (void) nsc_free_buf(tmp);
5697                                 DTRACE_PROBE(_ii_flag_op_end);
5698                                 return;
5699                         }
5700                         bm_header->ii_state &= and;
5701                         bm_header->ii_state |= or;
5702                         /* copy over the mtime */
5703                         bm_header->ii_mtime = ip->bi_mtime;
5704                         (void) _ii_bm_header_put(bm_header, ip, tmp);
5705                 }
5706         }
5707 
5708 }
5709 
5710 /*
5711  * _ii_nsc_io
5712  *      Perform read or write on an underlying nsc device
5713  * fd           - nsc file descriptor
5714  * flag         - nsc io direction and characteristics flag
5715  * fba_pos      - offset from beginning of device in FBAs
5716  * io_addr      - pointer to data buffer
5717  * io_len       - length of io in bytes
5718  */
5719 
5720 int
5721 _ii_nsc_io(_ii_info_t *ip, int ks, nsc_fd_t *fd, int flag, nsc_off_t fba_pos,
5722     unsigned char *io_addr, nsc_size_t io_len)
5723 {
5724         nsc_buf_t *tmp = NULL;
5725         nsc_vec_t *vecp;
5726         uchar_t *vaddr;
5727         size_t  copy_len;
5728         int64_t vlen;
5729         int     rc;
5730         nsc_size_t      fba_req, fba_len;
5731         nsc_size_t      maxfbas = 0;
5732         nsc_size_t      tocopy;
5733         unsigned char *toaddr;
5734 
5735         rc = nsc_maxfbas(fd, 0, &maxfbas);
5736         if (!II_SUCCESS(rc)) {
5737 #ifdef DEBUG
5738                 cmn_err(CE_WARN, "!_ii_nsc_io: maxfbas failed (%d)", rc);
5739 #endif
5740                 maxfbas = DSW_CBLK_FBA;
5741         }
5742 
5743         toaddr = io_addr;
5744         fba_req = FBA_LEN(io_len);
5745 
5746 #ifdef DEBUG_SPLIT_IO
5747         cmn_err(CE_NOTE, "!_ii_nsc_io: maxfbas = %08x", maxfbas);
5748         cmn_err(CE_NOTE, "!_ii_nsc_io: toaddr=%08x, io_len=%08x, fba_req=%08x",
5749             toaddr, io_len, fba_req);
5750 #endif
5751 
5752 loop:
5753         tmp = NULL;
5754         fba_len = min(fba_req, maxfbas);
5755         tocopy = min(io_len, FBA_SIZE(fba_len));
5756 
5757         DTRACE_PROBE2(_ii_nsc_io_buffer, nsc_off_t, fba_pos,
5758             nsc_size_t, fba_len);
5759 
5760 #ifdef DEBUG_SPLIT_IO
5761         cmn_err(CE_NOTE, "!_ii_nsc_io: fba_pos=%08x, fba_len=%08x",
5762             fba_pos, fba_len);
5763 #endif
5764 
5765 #ifndef DISABLE_KSTATS
5766         if (flag & NSC_READ) {
5767                 switch (ks) {
5768                 case KS_MST:
5769                         II_READ_START(ip, master);
5770                         break;
5771                 case KS_SHD:
5772                         II_READ_START(ip, shadow);
5773                         break;
5774                 case KS_BMP:
5775                         II_READ_START(ip, bitmap);
5776                         break;
5777                 case KS_OVR:
5778                         II_READ_START(ip, overflow);
5779                         break;
5780                 default:
5781                         cmn_err(CE_WARN, "!Invalid kstats type %d", ks);
5782                         break;
5783                 }
5784         }
5785 #endif
5786 
5787         rc = nsc_alloc_buf(fd, fba_pos, fba_len, flag, &tmp);
5788 
5789 #ifndef DISABLE_KSTATS
5790         if (flag & NSC_READ) {
5791                 switch (ks) {
5792                 case KS_MST:
5793                         II_READ_END(ip, master, rc, fba_len);
5794                         break;
5795                 case KS_SHD:
5796                         II_READ_END(ip, shadow, rc, fba_len);
5797                         break;
5798                 case KS_BMP:
5799                         II_READ_END(ip, bitmap, rc, fba_len);
5800                         break;
5801                 case KS_OVR:
5802                         II_READ_END(ip, overflow, rc, fba_len);
5803                         break;
5804                 }
5805         }
5806 #endif
5807 
5808         if (!II_SUCCESS(rc)) {
5809                 if (tmp) {
5810                         (void) nsc_free_buf(tmp);
5811                 }
5812 
5813                 return (EIO);
5814         }
5815 
5816         if ((flag & (NSC_WRITE|NSC_READ)) == NSC_WRITE &&
5817             (FBA_OFF(io_len) != 0)) {
5818                 /*
5819                  * Not overwriting all of the last FBA, so read in the
5820                  * old contents now before we overwrite it with the new
5821                  * data.
5822                  */
5823 #ifdef DEBUG_SPLIT_IO
5824                 cmn_err(CE_NOTE, "!_ii_nsc_io: Read-B4-Write %08x",
5825                     fba_pos+FBA_NUM(io_len));
5826 #endif
5827 
5828 #ifdef DISABLE_KSTATS
5829                 rc = nsc_read(tmp, fba_pos+FBA_NUM(io_len), 1, 0);
5830 #else
5831                 switch (ks) {
5832                 case KS_MST:
5833                         II_NSC_READ(ip, master, rc, tmp,
5834                             fba_pos+FBA_NUM(io_len), 1, 0);
5835                         break;
5836                 case KS_SHD:
5837                         II_NSC_READ(ip, shadow, rc, tmp,
5838                             fba_pos+FBA_NUM(io_len), 1, 0);
5839                         break;
5840                 case KS_BMP:
5841                         II_NSC_READ(ip, bitmap, rc, tmp,
5842                             fba_pos+FBA_NUM(io_len), 1, 0);
5843                         break;
5844                 case KS_OVR:
5845                         II_NSC_READ(ip, overflow, rc, tmp,
5846                             fba_pos+FBA_NUM(io_len), 1, 0);
5847                         break;
5848                 case KS_NA:
5849                         rc = nsc_read(tmp, fba_pos+FBA_NUM(io_len), 1, 0);
5850                         break;
5851                 default:
5852                         cmn_err(CE_WARN, "!Invalid kstats type %d", ks);
5853                         rc = nsc_read(tmp, fba_pos+FBA_NUM(io_len), 1, 0);
5854                         break;
5855                 }
5856 #endif
5857                 if (!II_SUCCESS(rc)) {
5858                         (void) nsc_free_buf(tmp);
5859                         return (EIO);
5860                 }
5861         }
5862 
5863         vecp = tmp->sb_vec;
5864         vlen = vecp->sv_len;
5865         vaddr = vecp->sv_addr;
5866 
5867         while (tocopy > 0) {
5868                 if (vecp->sv_addr == 0 || vecp->sv_len == 0) {
5869 #ifdef DEBUG
5870                         cmn_err(CE_WARN, "!_ii_nsc_io: ran off end of handle");
5871 #endif
5872                         break;
5873                 }
5874 
5875                 copy_len = (size_t)min(vlen, tocopy);
5876 
5877                 DTRACE_PROBE1(_ii_nsc_io_bcopy, size_t, copy_len);
5878 
5879                 if (flag & NSC_WRITE)
5880                         bcopy(io_addr, vaddr, copy_len);
5881                 else
5882                         bcopy(vaddr, io_addr, copy_len);
5883 
5884                 toaddr += copy_len;
5885                 tocopy -= copy_len;
5886                 io_addr += copy_len;
5887                 io_len -= copy_len;
5888                 vaddr += copy_len;
5889                 vlen -= copy_len;
5890 
5891                 if (vlen <= 0) {
5892                         vecp++;
5893                         vaddr = vecp->sv_addr;
5894                         vlen = vecp->sv_len;
5895                 }
5896         }
5897 
5898         if (flag & NSC_WRITE) {
5899 #ifdef DISABLE_KSTATS
5900                 rc = nsc_write(tmp, tmp->sb_pos, tmp->sb_len, 0);
5901 #else
5902                 switch (ks) {
5903                 case KS_MST:
5904                         II_NSC_WRITE(ip, master, rc, tmp, tmp->sb_pos,
5905                             tmp->sb_len, 0);
5906                         break;
5907                 case KS_SHD:
5908                         II_NSC_WRITE(ip, shadow, rc, tmp, tmp->sb_pos,
5909                             tmp->sb_len, 0);
5910                         break;
5911                 case KS_BMP:
5912                         II_NSC_WRITE(ip, bitmap, rc, tmp, tmp->sb_pos,
5913                             tmp->sb_len, 0);
5914                         break;
5915                 case KS_OVR:
5916                         II_NSC_WRITE(ip, overflow, rc, tmp, tmp->sb_pos,
5917                             tmp->sb_len, 0);
5918                         break;
5919                 case KS_NA:
5920                         rc = nsc_write(tmp, tmp->sb_pos, tmp->sb_len, 0);
5921                         break;
5922                 default:
5923                         cmn_err(CE_WARN, "!Invalid kstats type %d", ks);
5924                         rc = nsc_write(tmp, tmp->sb_pos, tmp->sb_len, 0);
5925                         break;
5926                 }
5927 #endif
5928                 if (!II_SUCCESS(rc)) {
5929                         (void) nsc_free_buf(tmp);
5930                         return (rc);
5931                 }
5932         }
5933 
5934         (void) nsc_free_buf(tmp);
5935 
5936         fba_pos += fba_len;
5937         fba_req -= fba_len;
5938         if (fba_req > 0)
5939                 goto loop;
5940 
5941         return (0);
5942 }
5943 
5944 
5945 /*
5946  * ii_overflow_attach
5947  */
5948 static int
5949 ii_overflow_attach(_ii_info_t *ip, char *name, int first)
5950 {
5951         _ii_overflow_t *op;
5952         int rc = 0;
5953         int reserved = 0;
5954         int mutex_set = 0;
5955         int II_OLD_OMAGIC = 0x426c7565; /* "Blue" */
5956 
5957         mutex_enter(&_ii_overflow_mutex);
5958         /* search for name in list */
5959         for (op = _ii_overflow_top; op; op = op->ii_next) {
5960                 if (strncmp(op->ii_volname, name, DSW_NAMELEN) == 0)
5961                         break;
5962         }
5963         if (op) {
5964                 ip->bi_overflow = op;
5965                 op->ii_crefcnt++;
5966                 op->ii_drefcnt++;
5967                 if ((op->ii_flags & IIO_CNTR_INVLD) && (op->ii_hversion >= 1)) {
5968                         if (!first)
5969                                 mutex_enter(&ip->bi_mutex);
5970                         ip->bi_flags |= DSW_OVRHDRDRTY;
5971                         if (!first)
5972                                 mutex_exit(&ip->bi_mutex);
5973                         op->ii_urefcnt++;
5974                 }
5975 #ifndef DISABLE_KSTATS
5976                 ip->bi_kstat_io.overflow = op->ii_overflow;
5977                 (void) strlcpy(ip->bi_kstat_io.ovrio, op->ii_ioname,
5978                     KSTAT_DATA_CHAR_LEN);
5979 #endif
5980                 /* write header */
5981                 if (!(rc = nsc_reserve(op->ii_dev->bi_fd, NSC_MULTI))) {
5982                         rc = _ii_nsc_io(ip, KS_OVR, op->ii_dev->bi_fd,
5983                             NSC_WRBUF, II_OHEADER_FBA,
5984                             (unsigned char *)&op->ii_do, sizeof (op->ii_do));
5985                         (void) nsc_release(op->ii_dev->bi_fd);
5986                         ++iigkstat.assoc_over.value.ul;
5987                 }
5988                 mutex_exit(&_ii_overflow_mutex);
5989                 return (rc);
5990         }
5991         if ((op = kmem_zalloc(sizeof (*op), KM_SLEEP)) == NULL) {
5992                 mutex_exit(&_ii_overflow_mutex);
5993                 return (ENOMEM);
5994         }
5995         if ((op->ii_dev = kmem_zalloc(sizeof (_ii_info_dev_t), KM_SLEEP))
5996             == NULL) {
5997                 kmem_free(op, sizeof (*op));
5998                 mutex_exit(&_ii_overflow_mutex);
5999                 return (ENOMEM);
6000         }
6001 #ifndef DISABLE_KSTATS
6002         if ((op->ii_overflow = _ii_overflow_kstat_create(ip, op))) {
6003                 ip->bi_kstat_io.overflow = op->ii_overflow;
6004                 (void) strlcpy(op->ii_ioname, ip->bi_kstat_io.ovrio,
6005                     KSTAT_DATA_CHAR_LEN);
6006         } else {
6007                 goto fail;
6008         }
6009 #endif
6010         /* open overflow volume */
6011         op->ii_dev->bi_fd = nsc_open(name, NSC_IIR_ID|NSC_FILE|NSC_RDWR, NULL,
6012             (blind_t)&(op->ii_dev->bi_iodev), &rc);
6013         if (!op->ii_dev->bi_fd)
6014                 op->ii_dev->bi_fd = nsc_open(name,
6015                     NSC_IIR_ID|NSC_DEVICE|NSC_RDWR, NULL,
6016                     (blind_t)&(op->ii_dev->bi_iodev), &rc);
6017         if (op->ii_dev->bi_fd == NULL) {
6018                 goto fail;
6019         }
6020         if ((rc = nsc_reserve(op->ii_dev->bi_fd, 0)) != 0)
6021                 goto fail;
6022         reserved = 1;
6023         /* register path */
6024         op->ii_dev->bi_tok = _ii_register_path(name, NSC_DEVICE,
6025             _ii_ior);
6026         if (!op->ii_dev->bi_tok) {
6027                 goto fail;
6028         }
6029         /* read header */
6030         rc = _ii_nsc_io(ip, KS_OVR, op->ii_dev->bi_fd, NSC_RDBUF,
6031             II_OHEADER_FBA, (unsigned char *)&op->ii_do, sizeof (op->ii_do));
6032         if (!II_SUCCESS(rc)) {
6033                 _ii_error(ip, DSW_OVROFFLINE);
6034                 goto fail;
6035         }
6036         /* On resume, check for old hmagic */
6037         if (strncmp(op->ii_volname, name, DSW_NAMELEN) ||
6038             ((op->ii_hmagic != II_OLD_OMAGIC) &&
6039             (op->ii_hmagic != II_OMAGIC))) {
6040                 rc = DSW_EOMAGIC;
6041                 goto fail;
6042         }
6043         /* set up counts */
6044         op->ii_crefcnt = 1;
6045         op->ii_drefcnt = 0;
6046         op->ii_urefcnt = 0;
6047         op->ii_hmagic = II_OMAGIC;
6048         if (!first) {
6049                 /* if header version > 0, check if header written */
6050                 if (((op->ii_flags & IIO_HDR_WRTN) == 0) &&
6051                     (op->ii_hversion >= 1)) {
6052                         op->ii_flags |= IIO_CNTR_INVLD;
6053                         mutex_enter(&ip->bi_mutex);
6054                         ip->bi_flags |= DSW_OVRHDRDRTY;
6055                         mutex_exit(&ip->bi_mutex);
6056                         op->ii_urefcnt++;
6057                 }
6058         }
6059         op->ii_flags &= ~IIO_HDR_WRTN;
6060         op->ii_drefcnt++;
6061         /* write header */
6062         rc = _ii_nsc_io(ip, KS_OVR, op->ii_dev->bi_fd, NSC_WRBUF,
6063             II_OHEADER_FBA, (unsigned char *)&op->ii_do, sizeof (op->ii_do));
6064         nsc_release(op->ii_dev->bi_fd);
6065         reserved = 0;
6066         if (!II_SUCCESS(rc)) {
6067                 _ii_error(ip, DSW_OVROFFLINE);
6068                 goto fail;
6069         }
6070 
6071         mutex_init(&op->ii_mutex, NULL, MUTEX_DRIVER, NULL);
6072         mutex_set++;
6073 
6074         /* link onto list */
6075         op->ii_next = _ii_overflow_top;
6076         _ii_overflow_top = op;
6077         ip->bi_overflow = op;
6078 
6079         ++iigkstat.assoc_over.value.ul;
6080         mutex_exit(&_ii_overflow_mutex);
6081 
6082         DTRACE_PROBE(_ii_overflow_attach_end);
6083         return (0);
6084 fail:
6085 #ifndef DISABLE_KSTATS
6086         /* Clean-up kstat stuff */
6087         if (op->ii_overflow) {
6088                 kstat_delete(op->ii_overflow);
6089                 mutex_destroy(&op->ii_kstat_mutex);
6090         }
6091 #endif
6092         /* clean up mutex if we made it that far */
6093         if (mutex_set) {
6094                 mutex_destroy(&op->ii_mutex);
6095         }
6096 
6097         if (op->ii_dev) {
6098                 if (op->ii_dev->bi_tok) {
6099                         (void) _ii_unregister_path(op->ii_dev->bi_tok, 0,
6100                             "overflow");
6101                 }
6102                 if (reserved)
6103                         (void) nsc_release(op->ii_dev->bi_fd);
6104                 if (op->ii_dev->bi_fd)
6105                         (void) nsc_close(op->ii_dev->bi_fd);
6106                 kmem_free(op->ii_dev, sizeof (_ii_info_dev_t));
6107         }
6108         kmem_free(op, sizeof (*op));
6109         mutex_exit(&_ii_overflow_mutex);
6110 
6111         return (rc);
6112 }
6113 
6114 /*
6115  * ii_overflow_free
6116  * Assumes that ip is locked for I/O
6117  */
6118 static void
6119 ii_overflow_free(_ii_info_t *ip, int reclaim)
6120 {
6121         _ii_overflow_t *op, **xp;
6122 
6123         if ((op = ip->bi_overflow) == NULL)
6124                 return;
6125         ip->bi_kstat_io.overflow = NULL;
6126         mutex_enter(&_ii_overflow_mutex);
6127         switch (reclaim) {
6128         case NO_RECLAIM:
6129                 if (--(op->ii_drefcnt) == 0) {
6130                         /* indicate header written */
6131                         op->ii_flags |= IIO_HDR_WRTN;
6132                         /* write out header */
6133                         ASSERT(op->ii_dev->bi_fd);
6134                         (void) nsc_reserve(op->ii_dev->bi_fd, NSC_MULTI);
6135                         (void) _ii_nsc_io(ip, KS_OVR, op->ii_dev->bi_fd,
6136                             NSC_WRBUF, II_OHEADER_FBA,
6137                             (unsigned char *)&op->ii_do,
6138                             sizeof (op->ii_do));
6139                         nsc_release(op->ii_dev->bi_fd);
6140                 }
6141                 break;
6142         case RECLAIM:
6143                 ii_reclaim_overflow(ip);
6144                 /* FALLTHRU */
6145         case INIT_OVR:
6146                 if (--(op->ii_drefcnt) == 0) {
6147                         /* reset to new condition, c.f. _ii_ocreate() */
6148                         op->ii_used = 1;
6149                         op->ii_unused = op->ii_nchunks - op->ii_used;
6150                         op->ii_freehead = II_NULLNODE;
6151                 }
6152 
6153                 /* write out header */
6154                 ASSERT(op->ii_dev->bi_fd);
6155                 (void) nsc_reserve(op->ii_dev->bi_fd, NSC_MULTI);
6156                 (void) _ii_nsc_io(ip, KS_OVR, op->ii_dev->bi_fd, NSC_WRBUF,
6157                     II_OHEADER_FBA, (unsigned char *)&op->ii_do,
6158                     sizeof (op->ii_do));
6159                 nsc_release(op->ii_dev->bi_fd);
6160         }
6161 
6162         if (--(op->ii_crefcnt) == 0) {
6163                 /* Close fd and unlink from active chain; */
6164 
6165                 (void) _ii_unregister_path(op->ii_dev->bi_tok, 0, "overflow");
6166                 (void) nsc_close(op->ii_dev->bi_fd);
6167 
6168                 for (xp = &_ii_overflow_top; *xp && *xp != op;
6169                     xp = &((*xp)->ii_next))
6170                         /* NULL statement */;
6171                 *xp = op->ii_next;
6172 
6173                 if (op->ii_overflow) {
6174                         kstat_delete(op->ii_overflow);
6175                 }
6176 
6177                 /* Clean up ii_overflow_t mutexs */
6178                 mutex_destroy(&op->ii_kstat_mutex);
6179                 mutex_destroy(&op->ii_mutex);
6180 
6181                 if (op->ii_dev)
6182                         kmem_free(op->ii_dev, sizeof (_ii_info_dev_t));
6183                 kmem_free(op, sizeof (*op));
6184         }
6185         ip->bi_overflow = NULL;
6186         --iigkstat.assoc_over.value.ul;
6187         mutex_exit(&_ii_overflow_mutex);
6188 
6189 }
6190 
6191 /*
6192  * ii_sibling_free
6193  *      Free resources and unlink the sibling chains etc.
6194  */
6195 
6196 static void
6197 ii_sibling_free(_ii_info_t *ip)
6198 {
6199         _ii_info_t *hip, *yip;
6200 
6201         if (!ip)
6202                 return;
6203 
6204         if (ip->bi_shdr_tok)
6205                 (void) _ii_unregister_path(ip->bi_shdr_tok, 0, "raw shadow");
6206 
6207         if (ip->bi_shd_tok)
6208                 (void) _ii_unregister_path(ip->bi_shd_tok, 0, "shadow");
6209 
6210         rw_enter(&ip->bi_linkrw, RW_WRITER);
6211 
6212         ip->bi_shd_tok = NULL;
6213         ip->bi_shdr_tok = NULL;
6214 
6215         if (NSHADOWS(ip)) {
6216                 mutex_enter(&_ii_info_mutex);
6217                 if (ip->bi_head == ip) {     /* removing head of list */
6218                         hip = ip->bi_sibling;
6219                         for (yip = hip; yip; yip = yip->bi_sibling)
6220                                 yip->bi_head = hip;
6221 
6222                 } else {                /* removing member of list */
6223                         hip = ip->bi_head;
6224                         for (yip = ip->bi_head; yip; yip = yip->bi_sibling) {
6225                                 if (yip->bi_sibling == ip) {
6226                                         yip->bi_sibling = ip->bi_sibling;
6227                                         break;
6228                                 }
6229                         }
6230                 }
6231                 hip->bi_master->bi_head = hip;
6232                 if (ip->bi_master == ip) {    /* master I/O goes through this */
6233                         mutex_exit(&_ii_info_mutex);
6234                         _ii_info_freeshd(ip);
6235                         rw_exit(&ip->bi_linkrw);
6236                         return;
6237                 }
6238                 mutex_exit(&_ii_info_mutex);
6239         } else {
6240                 if (ip->bi_master != ip)     /* last ref to master side ip */
6241                         _ii_info_free(ip->bi_master);        /* ==A== */
6242         }
6243 
6244         if (ip->bi_master != ip) {   /* info_free ==A== will close these */
6245                 /*
6246                  * Null out any pointers to shared master side resources
6247                  * that should only be freed once when the last reference
6248                  * to this master is freed and calls _ii_info_free().
6249                  */
6250                 ip->bi_mstdev = NULL;
6251                 ip->bi_mstrdev = NULL;
6252                 ip->bi_kstat_io.master = NULL;
6253         }
6254         rw_exit(&ip->bi_linkrw);
6255         _ii_info_free(ip);
6256 
6257 }
6258 
6259 /*
6260  * _ii_info_freeshd
6261  *      Free shadow side resources
6262  *
6263  * Calling/Exit State:
6264  *      No mutexes should be held on entry to this function.
6265  *
6266  * Description:
6267  *      Frees the system resources associated with the shadow
6268  *      access, leaving the master side alone. This allows the
6269  *      original master side to continue in use while there are
6270  *      outstanding references to this _ii_info_t.
6271  */
6272 
6273 static void
6274 _ii_info_freeshd(_ii_info_t *ip)
6275 {
6276         if (!ip)
6277                 return;
6278         if ((ip->bi_flags&DSW_HANGING) == DSW_HANGING)
6279                 return;         /* this work has already been completed */
6280 
6281         II_FLAG_SETX(DSW_HANGING, ip);
6282 
6283         if (ip->bi_cluster)
6284                 (void) II_UNLINK_CLUSTER(ip);
6285         if (ip->bi_group)
6286                 (void) II_UNLINK_GROUP(ip);
6287 
6288         if (ip->bi_shdfd && ip->bi_shdrsrv)
6289                 nsc_release(ip->bi_shdfd);
6290         if (ip->bi_shdrfd && ip->bi_shdrrsrv)
6291                 nsc_release(ip->bi_shdrfd);
6292         if (ip->bi_bmpfd && ip->bi_bmprsrv)
6293                 nsc_release(ip->bi_bmpfd);
6294 
6295         if (ip->bi_bmp_tok)
6296                 (void) _ii_unregister_path(ip->bi_bmp_tok, 0, "bitmap");
6297 
6298         if (ip->bi_shdr_tok)
6299                 (void) _ii_unregister_path(ip->bi_shdr_tok, 0, "raw shadow");
6300 
6301         if (ip->bi_shd_tok)
6302                 (void) _ii_unregister_path(ip->bi_shd_tok, 0, "shadow");
6303         ip->bi_shd_tok = NULL;
6304         ip->bi_shdr_tok = NULL;
6305 
6306         if (ip->bi_shdfd)
6307                 (void) nsc_close(ip->bi_shdfd);
6308 
6309         if (ip->bi_shdrfd)
6310                 (void) nsc_close(ip->bi_shdrfd);
6311 
6312         if (ip->bi_bmpfd)
6313                 (void) nsc_close(ip->bi_bmpfd);
6314 
6315         ip->bi_shdfd = NULL;
6316         ip->bi_shdrfd = NULL;
6317         ip->bi_bmpfd = NULL;
6318 
6319         if (ip->bi_busy)
6320                 kmem_free(ip->bi_busy,
6321                     1 + (ip->bi_size / (DSW_SIZE * DSW_BITS)));
6322         ip->bi_busy = NULL;
6323 
6324         if (ip->bi_kstat_io.shadow) {
6325                 kstat_delete(ip->bi_kstat_io.shadow);
6326                 ip->bi_kstat_io.shadow = NULL;
6327         }
6328         if (ip->bi_kstat_io.bitmap) {
6329                 kstat_delete(ip->bi_kstat_io.bitmap);
6330                 ip->bi_kstat_io.bitmap = NULL;
6331         }
6332         if (ip->bi_kstat) {
6333                 kstat_delete(ip->bi_kstat);
6334                 ip->bi_kstat = NULL;
6335         }
6336 
6337 }
6338 
6339 /*
6340  * _ii_info_free
6341  *      Free resources
6342  *
6343  * Calling/Exit State:
6344  *      No mutexes should be held on entry to this function.
6345  *
6346  * Description:
6347  *      Frees the system resources associated with the specified
6348  *      II information structure.
6349  */
6350 
6351 static void
6352 _ii_info_free(_ii_info_t *ip)
6353 {
6354         _ii_info_t **xip;
6355 
6356         if (!ip)
6357                 return;
6358 
6359         mutex_enter(&_ii_info_mutex);
6360         for (xip = &_ii_mst_top; *xip; xip = &((*xip)->bi_nextmst)) {
6361                 if (ip == *xip) {
6362                         *xip = ip->bi_nextmst;
6363                         break;
6364                 }
6365         }
6366         mutex_exit(&_ii_info_mutex);
6367 
6368         /* this rw_enter forces us to wait until all nsc_buffers are freed */
6369         rw_enter(&ip->bi_linkrw, RW_WRITER);
6370         if (ip->bi_mstdev && ip->bi_mstfd && ip->bi_mstrsrv)
6371                 nsc_release(ip->bi_mstfd);
6372         if (ip->bi_mstrdev && ip->bi_mstrfd && ip->bi_mstrrsrv)
6373                 nsc_release(ip->bi_mstrfd);
6374 
6375         if (ip->bi_mstdev && ip->bi_mst_tok)
6376                 (void) _ii_unregister_path(ip->bi_mst_tok, 0, "master");
6377         if (ip->bi_mstrdev && ip->bi_mstr_tok)
6378                 (void) _ii_unregister_path(ip->bi_mstr_tok, 0, "raw master");
6379 
6380         if (ip->bi_mstdev && ip->bi_mstfd)
6381                 (void) nsc_close(ip->bi_mstfd);
6382         if (ip->bi_mstrdev && ip->bi_mstrfd)
6383                 (void) nsc_close(ip->bi_mstrfd);
6384         rw_exit(&ip->bi_linkrw);
6385 
6386         if (ip->bi_mstdev) {
6387                 nsc_kmem_free(ip->bi_mstdev, sizeof (*ip->bi_mstdev));
6388         }
6389         if (ip->bi_mstrdev) {
6390                 nsc_kmem_free(ip->bi_mstrdev, sizeof (*ip->bi_mstrdev));
6391         }
6392 
6393         if (ip->bi_kstat_io.master) {
6394                 kstat_delete(ip->bi_kstat_io.master);
6395         }
6396         if (ip->bi_kstat_io.shadow) {
6397                 kstat_delete(ip->bi_kstat_io.shadow);
6398                 ip->bi_kstat_io.shadow = 0;
6399         }
6400         if (ip->bi_kstat_io.bitmap) {
6401                 kstat_delete(ip->bi_kstat_io.bitmap);
6402                 ip->bi_kstat_io.bitmap = 0;
6403         }
6404         if (ip->bi_kstat) {
6405                 kstat_delete(ip->bi_kstat);
6406                 ip->bi_kstat = NULL;
6407         }
6408 
6409         /* this rw_enter forces us to wait until all nsc_buffers are freed */
6410         rw_enter(&ip->bi_linkrw, RW_WRITER);
6411         rw_exit(&ip->bi_linkrw);
6412 
6413         mutex_destroy(&ip->bi_mutex);
6414         mutex_destroy(&ip->bi_rsrvmutex);
6415         mutex_destroy(&ip->bi_rlsemutex);
6416         mutex_destroy(&ip->bi_bmpmutex);
6417         mutex_destroy(&ip->bi_chksmutex);
6418         cv_destroy(&ip->bi_copydonecv);
6419         cv_destroy(&ip->bi_reservecv);
6420         cv_destroy(&ip->bi_releasecv);
6421         cv_destroy(&ip->bi_ioctlcv);
6422         cv_destroy(&ip->bi_closingcv);
6423         cv_destroy(&ip->bi_busycv);
6424         rw_destroy(&ip->bi_busyrw);
6425         rw_destroy(&ip->bi_linkrw);
6426 
6427         _ii_info_freeshd(ip);
6428 
6429 #ifdef DEBUG
6430         ip->bi_head = (_ii_info_t *)0xdeadbeef;
6431 #endif
6432 
6433         nsc_kmem_free(ip, sizeof (*ip));
6434 
6435 }
6436 
6437 /*
6438  * _ii_copy_chunks
6439  *      Perform a copy of some chunks
6440  *
6441  * Calling/Exit State:
6442  *      Returns 0 if the data was copied successfully, otherwise
6443  *      error code.
6444  *
6445  * Description:
6446  *      flag is set to CV_SHD2MST if the data is to be copied from the shadow
6447  *      to the master, 0 if it is to be copied from the master to the shadow.
6448  */
6449 
6450 static int
6451 _ii_copy_chunks(_ii_info_t *ip, int flag, chunkid_t chunk_num, int nchunks)
6452 {
6453         int     mst_flag;
6454         int     shd_flag;
6455         int     ovr_flag;
6456         nsc_off_t       pos;
6457         nsc_size_t      len;
6458         int     rc;
6459         nsc_off_t       shd_pos;
6460         chunkid_t       shd_chunk;
6461         nsc_buf_t *mst_tmp = NULL;
6462         nsc_buf_t *shd_tmp = NULL;
6463 
6464         if (ip->bi_flags & DSW_MSTOFFLINE) {
6465                 DTRACE_PROBE(_ii_copy_chunks_end);
6466                 return (EIO);
6467         }
6468 
6469         if (ip->bi_flags & (DSW_SHDOFFLINE|DSW_SHDEXPORT|DSW_SHDIMPORT)) {
6470                 DTRACE_PROBE(_ii_copy_chunks_end);
6471                 return (EIO);
6472         }
6473 
6474         if (flag == CV_SHD2MST) {
6475                 mst_flag = NSC_WRBUF|NSC_WRTHRU;
6476                 shd_flag = NSC_RDBUF;
6477         } else {
6478                 shd_flag = NSC_WRBUF|NSC_WRTHRU;
6479                 mst_flag = NSC_RDBUF;
6480         }
6481 
6482         pos = DSW_CHK2FBA(chunk_num);
6483         len = DSW_SIZE * nchunks;
6484         if (pos + len > ip->bi_size)
6485                 len = ip->bi_size - pos;
6486         if (ip->bi_flags & DSW_TREEMAP) {
6487                 ASSERT(nchunks == 1);
6488                 shd_chunk = ii_tsearch(ip, chunk_num);
6489                 if (shd_chunk == II_NULLNODE) {
6490                         /* shadow is full */
6491                         mutex_enter(&ip->bi_mutex);
6492                         II_FLAG_SET(DSW_OVERFLOW, ip);
6493                         mutex_exit(&ip->bi_mutex);
6494                         DTRACE_PROBE(_ii_copy_chunks_end);
6495                         return (EIO);
6496                 }
6497 
6498                 ovr_flag = II_ISOVERFLOW(shd_chunk);
6499                 shd_pos = DSW_CHK2FBA((ovr_flag) ?
6500                     II_2OVERFLOW(shd_chunk) : shd_chunk);
6501         } else {
6502                 ovr_flag = FALSE;
6503                 shd_chunk = chunk_num;
6504                 shd_pos = pos;
6505         }
6506 
6507         /*
6508          * Always allocate the master side before the shadow to
6509          * avoid deadlocks on the same chunk.
6510          */
6511 
6512         DTRACE_PROBE2(_ii_copy_chunks_alloc, nsc_off_t, pos, nsc_size_t, len);
6513 
6514         II_ALLOC_BUF(ip, master, rc, MSTFD(ip), pos, len, mst_flag, &mst_tmp);
6515         if (!II_SUCCESS(rc)) {
6516                 if (mst_tmp)
6517                         (void) nsc_free_buf(mst_tmp);
6518                 _ii_error(ip, DSW_MSTOFFLINE);
6519                 DTRACE_PROBE(_ii_copy_chunks_end);
6520                 return (rc);
6521         }
6522 
6523         if (ovr_flag) {
6524                 /* use overflow volume */
6525                 (void) nsc_reserve(OVRFD(ip), NSC_MULTI);
6526                 II_ALLOC_BUF(ip, overflow, rc, OVRFD(ip), shd_pos, len,
6527                     shd_flag, &shd_tmp);
6528         } else {
6529                 II_ALLOC_BUF(ip, shadow, rc, SHDFD(ip), shd_pos, len, shd_flag,
6530                     &shd_tmp);
6531         }
6532         if (!II_SUCCESS(rc)) {
6533                 (void) nsc_free_buf(mst_tmp);
6534                 if (shd_tmp)
6535                         (void) nsc_free_buf(shd_tmp);
6536                 if (ovr_flag)
6537                         nsc_release(OVRFD(ip));
6538                 _ii_error(ip, DSW_SHDOFFLINE);
6539                 if (ovr_flag)
6540                         _ii_error(ip, DSW_OVROFFLINE);
6541                 DTRACE_PROBE(_ii_copy_chunks_end);
6542                 return (rc);
6543         }
6544 
6545         /*
6546          * The direction of copy is determined by the mst_flag.
6547          */
6548         DTRACE_PROBE2(_ii_copy_chunks_copy, kstat_named_t, ii_copy_direct,
6549             int, mst_flag);
6550 
6551         if (ii_copy_direct) {
6552                 if (mst_flag & NSC_WRBUF) {
6553                         if (ovr_flag) {
6554                                 II_NSC_COPY_DIRECT(ip, overflow, master, rc,
6555                                     shd_tmp, mst_tmp, shd_pos, pos, len)
6556                         } else {
6557                                 II_NSC_COPY_DIRECT(ip, shadow, master, rc,
6558                                     shd_tmp, mst_tmp, shd_pos, pos, len)
6559                         }
6560                         if (!II_SUCCESS(rc)) {
6561                                 /* A copy has failed - something is wrong */
6562                                 _ii_error(ip, DSW_MSTOFFLINE);
6563                                 _ii_error(ip, DSW_SHDOFFLINE);
6564                                 if (ovr_flag)
6565                                         _ii_error(ip, DSW_OVROFFLINE);
6566                         }
6567                 } else {
6568                         if (ovr_flag) {
6569                                 II_NSC_COPY_DIRECT(ip, master, overflow, rc,
6570                                     mst_tmp, shd_tmp, pos, shd_pos, len);
6571                         } else {
6572                                 II_NSC_COPY_DIRECT(ip, master, shadow, rc,
6573                                     mst_tmp, shd_tmp, pos, shd_pos, len);
6574                         }
6575                         if (!II_SUCCESS(rc)) {
6576                                 /*
6577                                  * A failure has occurred during the above copy.
6578                                  * The macro calls nsc_copy_direct, which will
6579                                  * never return a read failure, only a write
6580                                  * failure. With this assumption, we should
6581                                  * take only the target volume offline.
6582                                  */
6583                                 _ii_error(ip, DSW_SHDOFFLINE);
6584                                 if (ovr_flag)
6585                                         _ii_error(ip, DSW_OVROFFLINE);
6586                         }
6587                 }
6588         } else {
6589                 if (mst_flag & NSC_WRBUF) {
6590                         rc = nsc_copy(shd_tmp, mst_tmp, shd_pos, pos, len);
6591                         if (II_SUCCESS(rc)) {
6592                                 II_NSC_WRITE(ip, master, rc, mst_tmp, pos, len,
6593                                     0);
6594                                 if (!II_SUCCESS(rc))
6595                                         _ii_error(ip, DSW_MSTOFFLINE);
6596                         } else {
6597                                 /* A copy has failed - something is wrong */
6598                                 _ii_error(ip, DSW_MSTOFFLINE);
6599                                 _ii_error(ip, DSW_SHDOFFLINE);
6600                         }
6601                 } else {
6602                         rc = nsc_copy(mst_tmp, shd_tmp, pos, shd_pos, len);
6603                         if (II_SUCCESS(rc)) {
6604                                 if (ovr_flag) {
6605                                         II_NSC_WRITE(ip, overflow, rc, shd_tmp,
6606                                             shd_pos, len, 0);
6607                                 } else {
6608                                         II_NSC_WRITE(ip, shadow, rc, shd_tmp,
6609                                             shd_pos, len, 0);
6610                                 }
6611                                 if (!II_SUCCESS(rc)) {
6612                                         _ii_error(ip, DSW_SHDOFFLINE);
6613                                         if (ovr_flag)
6614                                                 _ii_error(ip, DSW_OVROFFLINE);
6615                                 }
6616                         } else {
6617                                 /* A copy has failed - something is wrong */
6618                                 _ii_error(ip, DSW_MSTOFFLINE);
6619                                 _ii_error(ip, DSW_SHDOFFLINE);
6620                         }
6621                 }
6622         }
6623 
6624         (void) nsc_free_buf(mst_tmp);
6625         (void) nsc_free_buf(shd_tmp);
6626         if (ovr_flag)
6627                 nsc_release(OVRFD(ip));
6628 
6629         DTRACE_PROBE(_ii_copy_chunks);
6630 
6631         if (II_SUCCESS(rc)) {
6632                 (void) II_CLR_COPY_BITS(ip, chunk_num, nchunks);
6633                 rc = 0;
6634         }
6635 
6636         return (rc);
6637 }
6638 
6639 
6640 /*
6641  * _ii_copy_on_write
6642  *
6643  * Calling/Exit State:
6644  *      Returns 0 on success, otherwise error code.
6645  *
6646  * Description:
6647  *      Determines if a copy on write is necessary, and performs it.
6648  *      A copy on write is necessary in the following cases:
6649  *              - No copy is in progress and the shadow bit is clear, which
6650  *                means this is the first write to this track.
6651  *              - A copy is in progress and the copy bit is set, which means
6652  *                that a track copy is required.
6653  *      If a copy to the master is to be done, make a recursive call to this
6654  *      function to do any necessary copy on write on other InstantImage groups
6655  *      that share the same master volume.
6656  */
6657 
6658 static int
6659 _ii_copy_on_write(_ii_info_t *ip, int flag, chunkid_t chunk_num, int nchunks)
6660 {
6661         int rc = 0;
6662         int rtype;
6663         int hanging =  (ip->bi_flags&DSW_HANGING);
6664 
6665         if (hanging ||
6666             (flag & (CV_SIBLING|CV_SHD2MST)) == CV_SHD2MST && NSHADOWS(ip)) {
6667                 _ii_info_t *xip;
6668                 /*
6669                  * Preserve copy of master for all other shadows of this master
6670                  * before writing our data onto the master.
6671                  */
6672 
6673                 /*
6674                  * Avoid deadlock with COW on same chunk of sibling shadow
6675                  * by unlocking this chunk before copying all other sibling
6676                  * chunks.
6677                  */
6678 
6679                 /*
6680                  * Only using a single chunk when copying to master avoids
6681                  * complex code here.
6682                  */
6683 
6684                 ASSERT(nchunks == 1);
6685                 if (!hanging)
6686                         _ii_unlock_chunk(ip, chunk_num);
6687                 for (xip = ip->bi_head; xip; xip = xip->bi_sibling) {
6688                         if (xip == ip)          /* don't copy ourselves again */
6689                                 continue;
6690 
6691                         DTRACE_PROBE(_ii_copy_on_write);
6692 
6693                         rw_enter(&xip->bi_linkrw, RW_READER);
6694                         mutex_enter(&xip->bi_mutex);
6695                         if (xip->bi_disabled) {
6696                                 mutex_exit(&xip->bi_mutex);
6697                                 rw_exit(&xip->bi_linkrw);
6698                                 continue;       /* this set is stopping */
6699                         }
6700                         xip->bi_shdref++;
6701                         mutex_exit(&xip->bi_mutex);
6702                         /* don't waste time asking for MST as ip shares it */
6703                         rtype = SHDR|BMP;
6704                         (void) _ii_rsrv_devs(xip, rtype, II_INTERNAL);
6705                         _ii_lock_chunk(xip, chunk_num);
6706                         rc = _ii_copy_on_write(xip, flag | CV_SIBLING,
6707                             chunk_num, 1);
6708 
6709                         /*
6710                          * See comments in _ii_shadow_write()
6711                          */
6712                         if (rc == 0 ||
6713                             (rc == EIO && (xip->bi_flags&DSW_OVERFLOW) != 0))
6714                                 (void) II_SET_SHD_BIT(xip, chunk_num);
6715 
6716                         _ii_unlock_chunk(xip, chunk_num);
6717                         _ii_rlse_devs(xip, rtype);
6718                         mutex_enter(&xip->bi_mutex);
6719                         xip->bi_shdref--;
6720                         if (xip->bi_state & DSW_CLOSING) {
6721                                 if (total_ref(xip) == 0) {
6722                                         cv_signal(&xip->bi_closingcv);
6723                                 }
6724                         }
6725                         mutex_exit(&xip->bi_mutex);
6726                         rw_exit(&xip->bi_linkrw);
6727                 }
6728                 if (hanging) {
6729                         DTRACE_PROBE(_ii_copy_on_write_end);
6730                         return (0);
6731                 }
6732                 /*
6733                  * Reacquire chunk lock and check that a COW by a sibling
6734                  * has not already copied this chunk.
6735                  */
6736                 _ii_lock_chunk(ip, chunk_num);
6737                 rc = II_TST_SHD_BIT(ip, chunk_num);
6738                 if (rc < 0) {
6739                         DTRACE_PROBE(_ii_copy_on_write_end);
6740                         return (EIO);
6741                 }
6742                 if (rc != 0) {
6743                         DTRACE_PROBE(_ii_copy_on_write_end);
6744                         return (0);
6745                 }
6746         }
6747 
6748         if ((ip->bi_flags & DSW_COPYING) == 0) {
6749                 /* Not copying at all */
6750 
6751                 if ((ip->bi_flags & DSW_GOLDEN) == DSW_GOLDEN) {
6752                         /* No copy-on-write as it is independent */
6753                         DTRACE_PROBE(_ii_copy_on_write_end);
6754                         return (0);
6755                 }
6756 
6757                 /* Dependent, so depends on shadow bit */
6758 
6759                 if ((flag == CV_SHD2MST) &&
6760                     ((ip->bi_flags & DSW_SHDOFFLINE) != 0)) {
6761                         /*
6762                          * Writing master but shadow is offline, so
6763                          * no need to copy on write or set shadow bit
6764                          */
6765                         DTRACE_PROBE(_ii_copy_on_write_end);
6766                         return (0);
6767                 }
6768                 if (ip->bi_flags & DSW_BMPOFFLINE) {
6769                         DTRACE_PROBE(_ii_copy_on_write_end);
6770                         return (EIO);
6771                 }
6772                 rc = II_TST_SHD_BIT(ip, chunk_num);
6773                 if (rc < 0) {
6774                         DTRACE_PROBE(_ii_copy_on_write_end);
6775                         return (EIO);
6776                 }
6777                 if (rc == 0) {
6778                         /* Shadow bit clear, copy master to shadow */
6779                         rc = _ii_copy_chunks(ip, 0, chunk_num, nchunks);
6780                 }
6781         } else {
6782                 /* Copying one way or the other */
6783                 if (ip->bi_flags & DSW_BMPOFFLINE) {
6784                         DTRACE_PROBE(_ii_copy_on_write_end);
6785                         return (EIO);
6786                 }
6787                 rc = II_TST_COPY_BIT(ip, chunk_num);
6788                 if (rc < 0) {
6789                         DTRACE_PROBE(_ii_copy_on_write_end);
6790                         return (EIO);
6791                 }
6792                 if (rc) {
6793                         /* Copy bit set, do a copy */
6794                         if ((ip->bi_flags & DSW_COPYINGS) == 0) {
6795                                 /* Copy master to shadow */
6796                                 rc = _ii_copy_chunks(ip, 0, chunk_num, nchunks);
6797                         } else {
6798                                 /* Copy shadow to master */
6799                                 rc = _ii_copy_chunks(ip, CV_SHD2MST, chunk_num,
6800                                     nchunks);
6801                         }
6802                 }
6803         }
6804         return (rc);
6805 }
6806 
6807 #ifdef  DEBUG
6808 int ii_maxchunks = 0;
6809 #endif
6810 
6811 /*
6812  * _ii_copyvolp()
6813  *      Copy volume process.
6814  *
6815  * Calling/Exit State:
6816  *      Passes 0 back to caller when the copy is complete or has been aborted,
6817  *      otherwise error code.
6818  *
6819  * Description:
6820  *      According to the flag, copy the master to the shadow volume or the
6821  *      shadow to the master volume. Upon return wakeup all processes waiting
6822  *      for this copy.
6823  *
6824  */
6825 
6826 static void
6827 _ii_copyvolp(struct copy_args *ca)
6828 {
6829         chunkid_t       chunk_num;
6830         int     rc = 0;
6831         chunkid_t       max_chunk;
6832         nsc_size_t      nc_max;
6833         int             nc_try, nc_got;
6834         nsc_size_t      mst_max, shd_max;
6835         _ii_info_t *ip;
6836         int     flag;
6837         nsc_size_t      bitmap_size;
6838         nsc_size_t      shadow_set, copy_set;
6839         int     chunkcount = 0;
6840         int     rsrv = 1;
6841         spcs_s_info_t kstatus;
6842 
6843         ip = ca->ip;
6844         flag = ca->flag;
6845         kstatus = ca->kstatus;
6846 
6847         if (ip->bi_disabled) {
6848                 rc = DSW_EABORTED;
6849                 goto skip;
6850         }
6851         max_chunk = ip->bi_size / DSW_SIZE;
6852         if ((ip->bi_size % DSW_SIZE) != 0)
6853                 ++max_chunk;
6854         if ((ip->bi_flags&DSW_TREEMAP))
6855                 nc_max = 1;
6856         else {
6857                 mst_max = shd_max = 0;
6858                 (void) nsc_maxfbas(MSTFD(ip), 0, &mst_max);
6859                 (void) nsc_maxfbas(SHDFD(ip), 0, &shd_max);
6860                 nc_max = (mst_max < shd_max) ? mst_max : shd_max;
6861                 nc_max /= DSW_SIZE;
6862                 ASSERT(nc_max > 0 && nc_max < 1000);
6863         }
6864 #ifdef  DEBUG
6865         if (ii_maxchunks > 0)
6866                 nc_max = ii_maxchunks;
6867 #endif
6868         for (chunk_num = nc_got = 0; /* CSTYLED */; /* CSTYLED */) {
6869                 if ((flag & CV_SHD2MST) && NSHADOWS(ip))
6870                         nc_try = 1;
6871                 else
6872                         nc_try = (int)nc_max;
6873                 chunk_num = II_NEXT_COPY_BIT(ip, chunk_num + nc_got,
6874                     max_chunk, nc_try, &nc_got);
6875 
6876                 if (chunk_num >= max_chunk)  /* loop complete */
6877                         break;
6878                 if (ip->bi_flags & DSW_COPYINGX) {
6879                         /* request to abort copy */
6880                         _ii_unlock_chunks(ip, chunk_num, nc_got);
6881                         rc = DSW_EABORTED;
6882                         break;
6883                 }
6884 
6885                 sema_p(&_ii_concopy_sema);
6886                 rc = _ii_copy_on_write(ip, (flag & CV_SHD2MST), chunk_num,
6887                     nc_got);
6888                 sema_v(&_ii_concopy_sema);
6889                 if (ip->bi_flags & DSW_TREEMAP)
6890                         ii_tdelete(ip, chunk_num);
6891                 _ii_unlock_chunks(ip, chunk_num, nc_got);
6892                 if (!II_SUCCESS(rc)) {
6893                         if (ca->wait)
6894                                 spcs_s_add(kstatus, rc);
6895                         rc = DSW_EIO;
6896                         break;
6897                 }
6898                 if (ip->bi_release ||
6899                     (++chunkcount % ip->bi_throttle_unit) == 0) {
6900                         _ii_rlse_devs(ip, (ca->rtype&(~BMP)));
6901                         rsrv = 0;
6902                         delay(ip->bi_throttle_delay);
6903                         ca->rtype = MSTR|SHDR|(ca->rtype&BMP);
6904                         if ((rc = _ii_rsrv_devs(ip, (ca->rtype&(~BMP)),
6905                             II_INTERNAL)) != 0) {
6906                                 if (ca->wait)
6907                                         spcs_s_add(kstatus, rc);
6908                                 rc = DSW_EIO;
6909                                 break;
6910                         }
6911                         rsrv = 1;
6912                         if (nc_max > 1) {
6913                                 /*
6914                                  * maxfbas could have changed during the
6915                                  * release/reserve, so recalculate the size
6916                                  * of transfer we can do.
6917                                  */
6918                                 (void) nsc_maxfbas(MSTFD(ip), 0, &mst_max);
6919                                 (void) nsc_maxfbas(SHDFD(ip), 0, &shd_max);
6920                                 nc_max = (mst_max < shd_max) ?
6921                                     mst_max : shd_max;
6922                                 nc_max /= DSW_SIZE;
6923                         }
6924                 }
6925         }
6926 skip:
6927         mutex_enter(&ip->bi_mutex);
6928         if (ip->bi_flags & DSW_COPYINGX)
6929                 II_FLAG_CLR(DSW_COPYINGP|DSW_COPYINGX, ip);
6930         else
6931                 II_FLAG_CLR(DSW_COPY_FLAGS, ip);
6932 
6933         if ((ip->bi_flags & DSW_TREEMAP) && (flag & CV_SHD2MST) &&
6934             (ip->bi_flags & DSW_VOVERFLOW)) {
6935                 int rs;
6936                 bitmap_size = ip->bi_size / DSW_SIZE;
6937                 if ((ip->bi_size % DSW_SIZE) != 0)
6938                         ++bitmap_size;
6939                 bitmap_size += 7;
6940                 bitmap_size /= 8;
6941 
6942                 /* Count the number of copy bits set */
6943                 rs = II_CNT_BITS(ip, ip->bi_copyfba, &copy_set, bitmap_size);
6944                 if ((rs == 0) && (copy_set == 0)) {
6945                         /*
6946                          * If we counted successfully and completed the copy
6947                          * see if any writes have forced the set into the
6948                          * overflow
6949                          */
6950                         rs = II_CNT_BITS(ip, ip->bi_shdfba, &shadow_set,
6951                             bitmap_size);
6952                         if ((rs == 0) && (shadow_set <
6953                             (nsc_size_t)ip->bi_shdchks)) {
6954                                 II_FLAG_CLR(DSW_VOVERFLOW, ip);
6955                                 --iigkstat.spilled_over.value.ul;
6956                         }
6957                 }
6958         }
6959 
6960         ca->rc = rc;
6961         cv_broadcast(&ip->bi_copydonecv);
6962         mutex_exit(&ip->bi_mutex);
6963         if (!ca->wait) {
6964                 if (rsrv)
6965                         _ii_rlse_devs(ip, ca->rtype);
6966                 kmem_free(ca, sizeof (*ca));
6967         }
6968 
6969 }
6970 
6971 /*
6972  * _ii_copyvol()
6973  *      Copy a volume.
6974  *
6975  * Calling/Exit State:
6976  *      Returns 0 when the copy is complete or has been aborted,
6977  *      otherwise error code.
6978  *
6979  * Description:
6980  *      According to the flag, copy the master to the shadow volume or the
6981  *      shadow to the master volume. Upon return wakeup all processes waiting
6982  *      for this copy. Uses a separate process (_ii_copyvolp) to allow the
6983  *      caller to be interrupted.
6984  */
6985 
6986 static int
6987 _ii_copyvol(_ii_info_t *ip, int flag, int rtype, spcs_s_info_t kstatus,
6988                                 int wait)
6989 {
6990         struct copy_args *ca;
6991         int rc;
6992 
6993         /*
6994          * start copy in separate process.
6995          */
6996 
6997         ca = (struct copy_args *)kmem_alloc(sizeof (*ca), KM_SLEEP);
6998         ca->ip = ip;
6999         ca->flag = flag;
7000         ca->rtype = rtype;
7001         ca->kstatus = kstatus;
7002         ca->wait = wait;
7003         ca->rc = 0;
7004 
7005         if (rc = nsc_create_process((void (*)(void *))_ii_copyvolp,
7006             (void *)ca, FALSE)) {
7007                 mutex_enter(&ip->bi_mutex);
7008                 _ii_ioctl_done(ip);
7009                 mutex_exit(&ip->bi_mutex);
7010                 cmn_err(CE_NOTE, "!Can't create II copy process");
7011                 kmem_free(ca, sizeof (*ca));
7012                 return (rc);
7013         }
7014         mutex_enter(&ip->bi_mutex);
7015         if (wait == 0) {
7016                 _ii_ioctl_done(ip);
7017                 mutex_exit(&ip->bi_mutex);
7018                 return (0);
7019         }
7020         while (ip->bi_flags & DSW_COPYINGP) {
7021                 (void) cv_wait_sig(&ip->bi_copydonecv, &ip->bi_mutex);
7022         }
7023         _ii_ioctl_done(ip);
7024         mutex_exit(&ip->bi_mutex);
7025         rc = ca->rc;
7026         kmem_free(ca, sizeof (*ca));
7027 
7028         return (rc);
7029 }
7030 
7031 /*
7032  * _ii_stopcopy
7033  *      Stops any copy process on ip.
7034  *
7035  * Calling/Exit State:
7036  *      Returns 0 if the copy was stopped, otherwise error code.
7037  *
7038  * Description:
7039  *      Stop an in-progress copy by setting the DSW_COPYINGX flag, then
7040  *      wait for the copy to complete.
7041  */
7042 
7043 static int
7044 _ii_stopcopy(_ii_info_t *ip)
7045 {
7046         mutex_enter(&ip->bi_mutex);
7047         DTRACE_PROBE1(_ii_stopcopy_flags,
7048             uint_t, ip->bi_flags);
7049 
7050         while (ip->bi_flags & DSW_COPYINGP) {
7051 
7052                 DTRACE_PROBE(_ii_stopcopy);
7053 
7054                 II_FLAG_SET(DSW_COPYINGX, ip);
7055 
7056                 if (cv_wait_sig(&ip->bi_copydonecv, &ip->bi_mutex) == 0) {
7057                         /* Awoken by a signal */
7058                         mutex_exit(&ip->bi_mutex);
7059                         DTRACE_PROBE(_ii_stopcopy);
7060                         return (EINTR);
7061                 }
7062         }
7063 
7064         mutex_exit(&ip->bi_mutex);
7065 
7066         return (0);
7067 }
7068 
7069 /*
7070  * _ii_error
7071  *      Given the error type that occurred, and the current state of the
7072  *      shadowing, set the appropriate error condition(s).
7073  *
7074  */
7075 
7076 void
7077 _ii_error(_ii_info_t *ip, int error_type)
7078 {
7079         int copy_flags;
7080         int golden;
7081         int flags;
7082         int recursive_call = (error_type & DSW_OVERFLOW) != 0;
7083         int offline_bits = DSW_OFFLINE;
7084         _ii_info_t *xip;
7085         int rc;
7086 
7087         error_type &= ~DSW_OVERFLOW;
7088 
7089         mutex_enter(&ip->bi_mutex);
7090         flags = (ip->bi_flags) & offline_bits;
7091         if ((flags ^ error_type) == 0) {
7092                 /* nothing new offline */
7093                 mutex_exit(&ip->bi_mutex);
7094                 return;
7095         }
7096 
7097         if (error_type == DSW_BMPOFFLINE &&
7098             (ip->bi_flags & DSW_BMPOFFLINE) == 0) {
7099                 /* first, let nskerd know */
7100                 rc = _ii_report_bmp(ip);
7101                 if (rc) {
7102                         if (ii_debug > 0) {
7103                                 cmn_err(CE_WARN, "!Unable to mark bitmap bad in"
7104                                     " config DB; rc = %d", rc);
7105                         }
7106                         ip->bi_flags |= DSW_CFGOFFLINE;
7107                 }
7108         }
7109 
7110         flags = ip->bi_flags;
7111         golden = ((flags & DSW_GOLDEN) == DSW_GOLDEN);
7112         copy_flags = flags & DSW_COPYING;
7113 
7114         switch (error_type) {
7115 
7116         case DSW_BMPOFFLINE:
7117                 /* prevent further use of bitmap */
7118                 flags |= DSW_BMPOFFLINE;
7119                 if (ii_debug > 0)
7120                         cmn_err(CE_NOTE, "!ii: Bitmap offline");
7121 
7122                 switch (copy_flags) {
7123 
7124                 case DSW_COPYINGM:
7125                         /* Bitmap offline, copying master to shadow */
7126                         flags |= DSW_SHDOFFLINE;
7127                         if (ii_debug > 0)
7128                                 cmn_err(CE_NOTE, "!ii: Implied shadow offline");
7129                         break;
7130 
7131                 case DSW_COPYINGS:
7132                         /* Bitmap offline, copying shadow to master */
7133                         if (golden) {
7134                                 /* Shadow is still usable */
7135                                 if (ii_debug > 0)
7136                                         cmn_err(CE_NOTE,
7137                                             "!ii: Implied master offline");
7138                                 flags |= DSW_MSTOFFLINE;
7139                         } else {
7140                                 /*
7141                                  * Snapshot restore from shadow to master
7142                                  * is a dumb thing to do anyway. Lose both.
7143                                  */
7144                                 flags |= DSW_SHDOFFLINE | DSW_MSTOFFLINE;
7145                                 if (ii_debug > 0)
7146                                         cmn_err(CE_NOTE,
7147                                             "ii: Implied master and "
7148                                             "shadow offline");
7149                         }
7150                         break;
7151 
7152                 case 0:
7153                         /* Bitmap offline, no copying in progress */
7154                         if (!golden) {
7155                                 if (ii_debug > 0)
7156                                         cmn_err(CE_NOTE,
7157                                             "!ii: Implied shadow offline");
7158                                 flags |= DSW_SHDOFFLINE;
7159                         }
7160                         break;
7161                 }
7162                 break;
7163 
7164         case DSW_OVROFFLINE:
7165                 flags |= DSW_OVROFFLINE;
7166                 ASSERT(ip->bi_overflow);
7167                 if (ii_debug > 0)
7168                         cmn_err(CE_NOTE, "!ii: Overflow offline");
7169                 /* FALLTHRU */
7170         case DSW_SHDOFFLINE:
7171                 flags |= DSW_SHDOFFLINE;
7172                 if (ii_debug > 0)
7173                         cmn_err(CE_NOTE, "!ii: Shadow offline");
7174 
7175                 if (copy_flags == DSW_COPYINGS) {
7176                         /* Shadow offline, copying shadow to master */
7177                         if (ii_debug > 0)
7178                                 cmn_err(CE_NOTE, "!ii: Implied master offline");
7179                         flags |= DSW_MSTOFFLINE;
7180                 }
7181                 break;
7182 
7183         case DSW_MSTOFFLINE:
7184                 flags |= DSW_MSTOFFLINE;
7185                 if (ii_debug > 0)
7186                         cmn_err(CE_NOTE, "!ii: Master offline");
7187 
7188                 switch (copy_flags) {
7189 
7190                 case DSW_COPYINGM:
7191                         /* Master offline, copying master to shadow */
7192                         flags |= DSW_SHDOFFLINE;
7193                         if (ii_debug > 0)
7194                                 cmn_err(CE_NOTE, "!ii: Implied shadow offline");
7195                         break;
7196 
7197                 case DSW_COPYINGS:
7198                         /* Master offline, copying shadow to master */
7199                         if (!golden) {
7200                                 flags |= DSW_SHDOFFLINE;
7201                                 if (ii_debug > 0)
7202                                         cmn_err(CE_NOTE,
7203                                             "!ii: Implied shadow offline");
7204                         }
7205                         break;
7206 
7207                 case 0:
7208                         /* Master offline, no copying in progress */
7209                         if (!golden) {
7210                                 flags |= DSW_SHDOFFLINE;
7211                                 if (ii_debug > 0)
7212                                         cmn_err(CE_NOTE,
7213                                             "!ii: Implied shadow offline");
7214                         }
7215                         break;
7216                 }
7217                 break;
7218 
7219         default:
7220                 break;
7221         }
7222 
7223         II_FLAG_SET(flags, ip);
7224         mutex_exit(&ip->bi_mutex);
7225 
7226         if (!recursive_call &&
7227             NSHADOWS(ip) && (flags&DSW_MSTOFFLINE) == DSW_MSTOFFLINE) {
7228                 /* take master offline for all other sibling shadows */
7229                 for (xip = ip->bi_head; xip; xip = xip->bi_sibling) {
7230                         if (xip == ip)
7231                                 continue;
7232                         if (_ii_rsrv_devs(xip, BMP, II_INTERNAL) != 0)
7233                                 continue;
7234                                         /* overload DSW_OVERFLOW */
7235                         _ii_error(xip, DSW_MSTOFFLINE|DSW_OVERFLOW);
7236                         _ii_rlse_devs(xip, BMP);
7237                 }
7238         }
7239 
7240 }
7241 
7242 
7243 /*
7244  * _ii_lock_chunk
7245  *      Locks access to the specified chunk
7246  *
7247  */
7248 
7249 static void
7250 _ii_lock_chunk(_ii_info_t *ip, chunkid_t chunk)
7251 {
7252         if (chunk == II_NULLCHUNK) {
7253 
7254                 DTRACE_PROBE(_ii_lock_chunk_type);
7255 
7256                 rw_enter(&ip->bi_busyrw, RW_WRITER);
7257 
7258         } else {
7259 
7260                 DTRACE_PROBE(_ii_lock_chunk_type);
7261 
7262                 if (ip->bi_busy == NULL) {
7263                         DTRACE_PROBE(_ii_lock_chunk_end);
7264                         return;
7265                 }
7266 
7267                 rw_enter(&ip->bi_busyrw, RW_READER);
7268                 mutex_enter(&ip->bi_mutex);
7269                 while (DSW_BIT_ISSET(ip->bi_busy[chunk / DSW_BITS],
7270                     chunk % DSW_BITS))
7271                         cv_wait(&ip->bi_busycv, &ip->bi_mutex);
7272                 DSW_BIT_SET(ip->bi_busy[chunk / DSW_BITS], chunk % DSW_BITS);
7273                 mutex_exit(&ip->bi_mutex);
7274         }
7275 
7276 }
7277 
7278 
7279 /*
7280  * _ii_trylock_chunk
7281  *      Tries to lock access to the specified chunk
7282  * Returns non-zero on success.
7283  *
7284  */
7285 
7286 static int
7287 _ii_trylock_chunk(_ii_info_t *ip, chunkid_t chunk)
7288 {
7289         int rc;
7290 
7291         ASSERT(chunk != II_NULLCHUNK);
7292         if (rw_tryenter(&ip->bi_busyrw, RW_READER) == 0) {
7293                 DTRACE_PROBE(_ii_trylock_chunk);
7294                 return (0);
7295         }
7296 
7297         if (ip->bi_busy == NULL) {
7298                 DTRACE_PROBE(_ii_trylock_chunk_end);
7299                 return (0);
7300         }
7301 
7302         mutex_enter(&ip->bi_mutex);
7303         if (DSW_BIT_ISSET(ip->bi_busy[chunk / DSW_BITS], chunk % DSW_BITS)) {
7304                 rw_exit(&ip->bi_busyrw); /* RW_READER */
7305                 rc = 0;
7306         } else {
7307                 DSW_BIT_SET(ip->bi_busy[chunk / DSW_BITS], chunk % DSW_BITS);
7308                 rc = 1;
7309         }
7310         mutex_exit(&ip->bi_mutex);
7311 
7312         return (rc);
7313 }
7314 
7315 /*
7316  * _ii_unlock_chunks
7317  *      Unlocks access to the specified chunks
7318  *
7319  */
7320 
7321 static void
7322 _ii_unlock_chunks(_ii_info_t *ip, chunkid_t  chunk, int n)
7323 {
7324         if (chunk == II_NULLCHUNK) {
7325 
7326                 DTRACE_PROBE(_ii_unlock_chunks);
7327 
7328                 rw_exit(&ip->bi_busyrw); /* RW_WRITER */
7329 
7330         } else {
7331 
7332                 if (ip->bi_busy == NULL) {
7333                         DTRACE_PROBE(_ii_unlock_chunks_end);
7334                         return;
7335                 }
7336                 mutex_enter(&ip->bi_mutex);
7337 
7338                 DTRACE_PROBE(_ii_unlock_chunks);
7339 
7340                 for (; n-- > 0; chunk++) {
7341                         ASSERT(DSW_BIT_ISSET(ip->bi_busy[chunk / DSW_BITS],
7342                             chunk % DSW_BITS));
7343                         DSW_BIT_CLR(ip->bi_busy[chunk / DSW_BITS],
7344                             chunk % DSW_BITS);
7345                         rw_exit(&ip->bi_busyrw); /* RW_READER */
7346                 }
7347                 cv_broadcast(&ip->bi_busycv);
7348                 mutex_exit(&ip->bi_mutex);
7349 
7350         }
7351 }
7352 
7353 /*
7354  * Copyout the bit map.
7355  */
7356 static int
7357 _ii_ab_co_bmp(_ii_info_t *ip, nsc_off_t bm_offset, unsigned char *user_bm,
7358     int user_bm_size)
7359 {
7360         nsc_off_t       last_fba;
7361         nsc_buf_t *tmp;
7362         nsc_vec_t *nsc_vecp;
7363         nsc_off_t       fba_pos;
7364         int     buf_fba_len;
7365         int     buf_byte_len;
7366         size_t  co_len;
7367         int     rc;
7368 
7369         DTRACE_PROBE2(_ii_ab_co_bmp_start, nsc_off_t, bm_offset,
7370             nsc_size_t, user_bm_size);
7371 
7372         if (ip->bi_flags & DSW_BMPOFFLINE)
7373                 return (EIO);
7374 
7375         /* First calculate the size of the shadow and copy bitmaps */
7376         co_len = DSW_BM_FBA_LEN(ip->bi_size);
7377         ASSERT((ip->bi_copyfba - ip->bi_shdfba) == co_len);
7378 
7379         /* Are we in the ranges of the various bitmaps/indexes? */
7380         if (bm_offset < ip->bi_shdfba)
7381                 return (EIO);
7382         else if (bm_offset < (last_fba = ip->bi_shdfba + co_len))
7383                 /*EMPTY*/;
7384         else if (bm_offset < (last_fba = ip->bi_copyfba + co_len))
7385                 /*EMPTY*/;
7386         else if ((ip->bi_flags & DSW_TREEMAP) &&
7387             (bm_offset < (last_fba = last_fba + (co_len * 32))))
7388                 /*EMPTY*/;
7389         else return (EIO);
7390 
7391         /* Are we within the size of the segment being copied? */
7392         if (FBA_LEN(user_bm_size) > last_fba - bm_offset)
7393                 return (EIO);
7394 
7395         for (fba_pos = bm_offset; fba_pos < last_fba && user_bm_size > 0;
7396             fba_pos += DSW_CBLK_FBA) {
7397                 tmp = NULL;
7398                 buf_fba_len = fba_pos + DSW_CBLK_FBA < last_fba ?
7399                     DSW_CBLK_FBA : last_fba - fba_pos;
7400                 II_READ_START(ip, bitmap);
7401                 rc = nsc_alloc_buf(ip->bi_bmpfd, fba_pos, buf_fba_len,
7402                     NSC_RDBUF, &tmp);
7403                 II_READ_END(ip, bitmap, rc, buf_fba_len);
7404                 if (!II_SUCCESS(rc)) {
7405                         if (tmp)
7406                                 (void) nsc_free_buf(tmp);
7407 
7408                         _ii_error(ip, DSW_BMPOFFLINE);
7409                         return (EIO);
7410                 }
7411 
7412                 /* copyout each nsc_vec's worth of data */
7413                 buf_byte_len = FBA_SIZE(buf_fba_len);
7414                 for (nsc_vecp = tmp->sb_vec;
7415                     buf_byte_len > 0 && user_bm_size > 0;
7416                     nsc_vecp++) {
7417                         co_len = (user_bm_size > nsc_vecp->sv_len) ?
7418                             nsc_vecp->sv_len : user_bm_size;
7419                         if (copyout(nsc_vecp->sv_addr, user_bm, co_len)) {
7420                                 (void) nsc_free_buf(tmp);
7421                                 return (EFAULT);
7422                         }
7423                         user_bm += co_len;
7424                         user_bm_size -= co_len;
7425                         buf_byte_len -= co_len;
7426                 }
7427 
7428 
7429                 (void) nsc_free_buf(tmp);
7430         }
7431 
7432         return (0);
7433 }
7434 
7435 /*
7436  * Copyin a bit map and or with differences bitmap.
7437  */
7438 static int
7439 _ii_ab_ci_bmp(_ii_info_t *ip, nsc_off_t bm_offset, unsigned char *user_bm,
7440 int user_bm_size)
7441 {
7442         nsc_off_t       last_fba;
7443         nsc_buf_t *tmp;
7444         nsc_vec_t *nsc_vecp;
7445         nsc_off_t       fba_pos;
7446         int     buf_fba_len;
7447         int     buf_byte_len;
7448         size_t  ci_len;
7449         int     rc;
7450         int     n;
7451         unsigned char *tmp_buf, *tmpp, *tmpq;
7452 
7453         DTRACE_PROBE2(_ii_ab_ci_bmp_start, nsc_off_t, bm_offset,
7454             nsc_size_t, user_bm_size);
7455 
7456         if (ip->bi_flags & DSW_BMPOFFLINE)
7457                 return (EIO);
7458 
7459         tmp_buf = NULL;
7460         last_fba = bm_offset + DSW_BM_FBA_LEN(ip->bi_size);
7461 
7462         for (fba_pos = bm_offset; fba_pos < last_fba && user_bm_size > 0;
7463             fba_pos += DSW_CBLK_FBA) {
7464                 tmp = NULL;
7465                 buf_fba_len = fba_pos + DSW_CBLK_FBA < last_fba ?
7466                     DSW_CBLK_FBA : last_fba - fba_pos;
7467                 II_READ_START(ip, bitmap);
7468                 rc = nsc_alloc_buf(ip->bi_bmpfd, fba_pos, buf_fba_len,
7469                     NSC_RDWRBUF, &tmp);
7470                 II_READ_END(ip, bitmap, rc, buf_fba_len);
7471                 if (!II_SUCCESS(rc)) {
7472                         if (tmp)
7473                                 (void) nsc_free_buf(tmp);
7474 
7475                         _ii_error(ip, DSW_BMPOFFLINE);
7476                         return (EIO);
7477                 }
7478 
7479                 /* copyin each nsc_vec's worth of data */
7480                 buf_byte_len = FBA_SIZE(buf_fba_len);
7481                 for (nsc_vecp = tmp->sb_vec;
7482                     buf_byte_len > 0 && user_bm_size > 0;
7483                     nsc_vecp++) {
7484                         ci_len = (user_bm_size > nsc_vecp->sv_len) ?
7485                             nsc_vecp->sv_len : user_bm_size;
7486                         tmpp = tmp_buf = kmem_alloc(ci_len, KM_SLEEP);
7487                         tmpq = nsc_vecp->sv_addr;
7488                         if (copyin(user_bm, tmpp, ci_len)) {
7489                                 (void) nsc_free_buf(tmp);
7490                                 kmem_free(tmp_buf, ci_len);
7491                                 return (EFAULT);
7492                         }
7493                         for (n = ci_len; n-- > 0; /* CSTYLED */)
7494                                 *tmpq++ |= *tmpp++;
7495                         user_bm += ci_len;
7496                         user_bm_size -= ci_len;
7497                         buf_byte_len -= ci_len;
7498                         kmem_free(tmp_buf, ci_len);
7499                 }
7500 
7501                 II_NSC_WRITE(ip, bitmap, rc, tmp, fba_pos, buf_fba_len, 0);
7502                 if (!II_SUCCESS(rc)) {
7503                         (void) nsc_free_buf(tmp);
7504                         _ii_error(ip, DSW_BMPOFFLINE);
7505                         return (EIO);
7506                 }
7507 
7508                 (void) nsc_free_buf(tmp);
7509         }
7510 
7511         ip->bi_state |= (DSW_CNTSHDBITS|DSW_CNTCPYBITS);
7512 
7513         return (0);
7514 }
7515 
7516 /*
7517  * Completely zero the bit map.
7518  *
7519  *      Returns 0 if no error
7520  *      Returns non-zero if there was an error
7521  */
7522 static int
7523 _ii_ab_zerobm(_ii_info_t *ip)
7524 {
7525         nsc_off_t fba_pos;
7526         int rc;
7527         nsc_size_t len;
7528         nsc_size_t size;
7529         nsc_buf_t *tmp;
7530 
7531         size = DSW_BM_FBA_LEN(ip->bi_size) + ip->bi_shdfba;
7532         for (fba_pos = ip->bi_shdfba; fba_pos < size; fba_pos += DSW_CBLK_FBA) {
7533                 tmp = NULL;
7534                 len = fba_pos + DSW_CBLK_FBA < size ?
7535                     DSW_CBLK_FBA : size - fba_pos;
7536                 II_READ_START(ip, bitmap);
7537                 rc = nsc_alloc_buf(ip->bi_bmpfd, fba_pos, len, NSC_RDWRBUF,
7538                     &tmp);
7539                 II_READ_END(ip, bitmap, rc, len);
7540                 if (!II_SUCCESS(rc)) {
7541                         if (tmp)
7542                                 (void) nsc_free_buf(tmp);
7543 
7544                         _ii_error(ip, DSW_BMPOFFLINE);
7545                         return (rc);
7546                 }
7547 
7548                 rc = nsc_zero(tmp, fba_pos, len, 0);
7549                 if (II_SUCCESS(rc)) {
7550                         II_NSC_WRITE(ip, bitmap, rc, tmp, fba_pos, len, 0);
7551                 }
7552 
7553                 (void) nsc_free_buf(tmp);
7554                 if (!II_SUCCESS(rc)) {
7555                         _ii_error(ip, DSW_BMPOFFLINE);
7556                         return (rc);
7557                 }
7558         }
7559 
7560         ip->bi_state |= (DSW_CNTSHDBITS|DSW_CNTCPYBITS);
7561 
7562         return (0);
7563 }
7564 
7565 
7566 /*
7567  * Copy shadow bitmap to copy bitmap
7568  */
7569 static int
7570 _ii_ab_copybm(_ii_info_t *ip)
7571 {
7572         nsc_off_t copy_fba_pos, shd_fba_pos;
7573         int rc;
7574         nsc_size_t len;
7575         nsc_off_t size;
7576         nsc_buf_t *copy_tmp, *shd_tmp;
7577 
7578         size = DSW_BM_FBA_LEN(ip->bi_size) + ip->bi_shdfba;
7579         copy_fba_pos = ip->bi_copyfba;
7580         for (shd_fba_pos = ip->bi_shdfba; shd_fba_pos < size;
7581             copy_fba_pos += DSW_CBLK_FBA, shd_fba_pos += DSW_CBLK_FBA) {
7582                 shd_tmp = NULL;
7583                 len = shd_fba_pos + DSW_CBLK_FBA < size ?
7584                     DSW_CBLK_FBA : size - shd_fba_pos;
7585                 II_READ_START(ip, bitmap);
7586                 rc = nsc_alloc_buf(ip->bi_bmpfd, shd_fba_pos, len, NSC_RDBUF,
7587                     &shd_tmp);
7588                 II_READ_END(ip, bitmap, rc, len);
7589                 if (!II_SUCCESS(rc)) {
7590                         if (shd_tmp)
7591                                 (void) nsc_free_buf(shd_tmp);
7592 
7593                         _ii_error(ip, DSW_BMPOFFLINE);
7594                         if (ii_debug > 1)
7595                                 cmn_err(CE_NOTE, "!ii: copybm failed 1 rc %d",
7596                                     rc);
7597 
7598                         return (rc);
7599                 }
7600 
7601                 copy_tmp = NULL;
7602                 rc = nsc_alloc_buf(ip->bi_bmpfd, copy_fba_pos, len, NSC_WRBUF,
7603                     &copy_tmp);
7604                 if (!II_SUCCESS(rc)) {
7605                         (void) nsc_free_buf(shd_tmp);
7606                         if (copy_tmp)
7607                                 (void) nsc_free_buf(copy_tmp);
7608 
7609                         _ii_error(ip, DSW_BMPOFFLINE);
7610                         if (ii_debug > 1)
7611                                 cmn_err(CE_NOTE, "!ii: copybm failed 2 rc %d",
7612                                     rc);
7613 
7614                         return (rc);
7615                 }
7616                 rc = nsc_copy(shd_tmp, copy_tmp, shd_fba_pos, copy_fba_pos,
7617                     len);
7618                 if (II_SUCCESS(rc)) {
7619                         II_NSC_WRITE(ip, bitmap, rc, copy_tmp, copy_fba_pos,
7620                             len, 0);
7621                 }
7622 
7623                 (void) nsc_free_buf(shd_tmp);
7624                 (void) nsc_free_buf(copy_tmp);
7625                 if (!II_SUCCESS(rc)) {
7626                         if (ii_debug > 1)
7627                                 cmn_err(CE_NOTE, "!ii: copybm failed 4 rc %d",
7628                                     rc);
7629                         _ii_error(ip, DSW_BMPOFFLINE);
7630                         return (rc);
7631                 }
7632         }
7633 
7634         ip->bi_state |= (DSW_CNTSHDBITS|DSW_CNTCPYBITS);
7635 
7636         return (0);
7637 }
7638 
7639 
7640 /*
7641  * stolen from nsc_copy_h()
7642  */
7643 
7644 static int
7645 _ii_nsc_or(nsc_buf_t *h1, nsc_buf_t *h2, nsc_off_t pos1, nsc_off_t pos2,
7646         nsc_size_t len)
7647 {
7648         unsigned char *a1, *a2;
7649         unsigned char *b1, *b2;
7650         nsc_vec_t *v1, *v2;
7651         int i, sz, l1, l2;
7652 
7653         if (pos1 < h1->sb_pos || pos1 + len > h1->sb_pos + h1->sb_len ||
7654             pos2 < h2->sb_pos || pos2 + len > h2->sb_pos + h2->sb_len)
7655                 return (EINVAL);
7656 
7657         if (!len)
7658                 return (0);
7659 
7660         /* find starting point in "from" vector */
7661 
7662         v1 = h1->sb_vec;
7663         pos1 -= h1->sb_pos;
7664 
7665         for (; pos1 >= FBA_NUM(v1->sv_len); v1++)
7666                 pos1 -= FBA_NUM(v1->sv_len);
7667 
7668         a1 = v1->sv_addr + FBA_SIZE(pos1);
7669         l1 = v1->sv_len - FBA_SIZE(pos1);
7670 
7671         /* find starting point in "to" vector */
7672 
7673         v2 = h2->sb_vec;
7674         pos2 -= h2->sb_pos;
7675 
7676         for (; pos2 >= FBA_NUM(v2->sv_len); v2++)
7677                 pos2 -= FBA_NUM(v2->sv_len);
7678 
7679         a2 = v2->sv_addr + FBA_SIZE(pos2);
7680         l2 = v2->sv_len - FBA_SIZE(pos2);
7681 
7682         /* copy required data */
7683 
7684         len = FBA_SIZE(len);
7685 
7686         while (len) {
7687                 sz = min(l1, l2);
7688                 sz = (int)min((nsc_size_t)sz, len);
7689 
7690                 b1 = a1;
7691                 b2 = a2;
7692                 for (i = sz; i-- > 0; /* CSTYLED */)
7693                         *b2++ |= *b1++;
7694 
7695                 l1 -= sz;
7696                 l2 -= sz;
7697                 a1 += sz;
7698                 a2 += sz;
7699                 len -= sz;
7700 
7701                 if (!l1) {
7702                         a1 = (++v1)->sv_addr;
7703                         l1 = v1->sv_len;
7704                 }
7705                 if (!l2) {
7706                         a2 = (++v2)->sv_addr;
7707                         l2 = v2->sv_len;
7708                 }
7709         }
7710 
7711         return (0);
7712 }
7713 
7714 
7715 /*
7716  * Or the shadow bitmap in to the copy bitmap, clear the
7717  * shadow bitmap.
7718  */
7719 static int
7720 _ii_ab_orbm(_ii_info_t *ip)
7721 {
7722         nsc_off_t copy_fba_pos, shd_fba_pos;
7723         int rc;
7724         nsc_size_t len;
7725         size_t size;
7726         nsc_buf_t *copy_tmp, *shd_tmp;
7727 
7728         if (ip->bi_flags & DSW_BMPOFFLINE)
7729                 return (EIO);
7730 
7731         size = DSW_BM_FBA_LEN(ip->bi_size) + ip->bi_shdfba;
7732         copy_fba_pos = ip->bi_copyfba;
7733         for (shd_fba_pos = ip->bi_shdfba; shd_fba_pos < size;
7734             copy_fba_pos += DSW_CBLK_FBA, shd_fba_pos += DSW_CBLK_FBA) {
7735                 shd_tmp = NULL;
7736                 len = shd_fba_pos + DSW_CBLK_FBA < size ?
7737                     DSW_CBLK_FBA : size - shd_fba_pos;
7738                 II_READ_START(ip, bitmap);
7739                 rc = nsc_alloc_buf(ip->bi_bmpfd, shd_fba_pos, len,
7740                     NSC_RDBUF|NSC_WRBUF, &shd_tmp);
7741                 II_READ_END(ip, bitmap, rc, len);
7742                 if (!II_SUCCESS(rc)) {
7743                         if (shd_tmp)
7744                                 (void) nsc_free_buf(shd_tmp);
7745 
7746                         _ii_error(ip, DSW_BMPOFFLINE);
7747                         return (rc);
7748                 }
7749 
7750                 copy_tmp = NULL;
7751                 II_READ_START(ip, bitmap);
7752                 rc = nsc_alloc_buf(ip->bi_bmpfd, copy_fba_pos, len,
7753                     NSC_RDBUF|NSC_WRBUF, &copy_tmp);
7754                 II_READ_END(ip, bitmap, rc, len);
7755                 if (!II_SUCCESS(rc)) {
7756                         (void) nsc_free_buf(shd_tmp);
7757                         if (copy_tmp)
7758                                 (void) nsc_free_buf(copy_tmp);
7759 
7760                         _ii_error(ip, DSW_BMPOFFLINE);
7761                         return (rc);
7762                 }
7763                 rc = _ii_nsc_or(shd_tmp, copy_tmp, shd_fba_pos, copy_fba_pos,
7764                     len);
7765                 if (II_SUCCESS(rc)) {
7766                         II_NSC_WRITE(ip, bitmap, rc, copy_tmp, copy_fba_pos,
7767                             len, 0);
7768                 }
7769                 if (II_SUCCESS(rc))
7770                         rc = nsc_zero(shd_tmp, shd_fba_pos, len, 0);
7771                 if (II_SUCCESS(rc)) {
7772                         II_NSC_WRITE(ip, bitmap, rc, shd_tmp, shd_fba_pos, len,
7773                             0);
7774                 }
7775 
7776                 (void) nsc_free_buf(shd_tmp);
7777                 (void) nsc_free_buf(copy_tmp);
7778                 if (!II_SUCCESS(rc)) {
7779                         _ii_error(ip, DSW_BMPOFFLINE);
7780                         return (rc);
7781                 }
7782         }
7783 
7784         ip->bi_state |= (DSW_CNTSHDBITS|DSW_CNTCPYBITS);
7785 
7786         return (0);
7787 }
7788 
7789 /*
7790  * _ii_ab_tst_shd_bit
7791  *      Determine if a chunk has been copied to the shadow device
7792  *      Relies on the alloc_buf/free_buf semantics for locking.
7793  *
7794  * Calling/Exit State:
7795  *      Returns 1 if the modified bit has been set for the shadow device,
7796  *      Returns 0 if the modified bit has not been set for the shadow device,
7797  *      Returns -1 if there was an error
7798  */
7799 
7800 static int
7801 _ii_ab_tst_shd_bit(_ii_info_t *ip, chunkid_t chunk)
7802 {
7803         int rc;
7804         nsc_off_t fba;
7805         nsc_buf_t *tmp = NULL;
7806 
7807         if (ip->bi_flags & DSW_BMPOFFLINE)
7808                 return (EIO);
7809 
7810         fba = ip->bi_shdfba + chunk / (FBA_SIZE(1) * DSW_BITS);
7811         chunk %= FBA_SIZE(1) * DSW_BITS;
7812         II_READ_START(ip, bitmap);
7813         rc = nsc_alloc_buf(ip->bi_bmpfd, fba, 1, NSC_RDBUF, &tmp);
7814         II_READ_END(ip, bitmap, rc, 1);
7815         if (!II_SUCCESS(rc)) {
7816                 _ii_error(ip, DSW_BMPOFFLINE);
7817                 if (tmp)
7818                         (void) nsc_free_buf(tmp);
7819                 return (-1);
7820         }
7821         rc = DSW_BIT_ISSET(tmp->sb_vec->sv_addr[chunk/DSW_BITS],
7822             chunk%DSW_BITS);
7823         (void) nsc_free_buf(tmp);
7824 
7825         return (rc);
7826 }
7827 
7828 
7829 /*
7830  * _ii_ab_set_shd_bit
7831  *      Records that a chunk has been copied to the shadow device
7832  *
7833  *      Returns non-zero if an error is encountered
7834  *      Returns 0 if no error
7835  */
7836 
7837 static int
7838 _ii_ab_set_shd_bit(_ii_info_t *ip, chunkid_t chunk)
7839 {
7840         int rc;
7841         nsc_off_t fba;
7842         nsc_buf_t *tmp = NULL;
7843 
7844         if (ip->bi_flags & DSW_BMPOFFLINE)
7845                 return (EIO);
7846 
7847         fba = ip->bi_shdfba + chunk / (FBA_SIZE(1) * DSW_BITS);
7848         chunk %= FBA_SIZE(1) * DSW_BITS;
7849         II_READ_START(ip, bitmap);
7850         rc = nsc_alloc_buf(ip->bi_bmpfd, fba, 1, NSC_RDBUF|NSC_WRBUF, &tmp);
7851         II_READ_END(ip, bitmap, rc, 1);
7852         if (!II_SUCCESS(rc)) {
7853                 _ii_error(ip, DSW_BMPOFFLINE);
7854                 if (tmp)
7855                         (void) nsc_free_buf(tmp);
7856                 return (rc);
7857         }
7858         if (DSW_BIT_ISSET(tmp->sb_vec->sv_addr[chunk/DSW_BITS],
7859             chunk%DSW_BITS) == 0) {
7860                 DSW_BIT_SET(tmp->sb_vec->sv_addr[chunk/DSW_BITS],
7861                     chunk%DSW_BITS);
7862                 II_NSC_WRITE(ip, bitmap, rc, tmp, fba, 1, 0);
7863                 if ((ip->bi_state & DSW_CNTSHDBITS) == 0)
7864                         ip->bi_shdbits++;
7865         }
7866         (void) nsc_free_buf(tmp);
7867         if (!II_SUCCESS(rc)) {
7868                 _ii_error(ip, DSW_BMPOFFLINE);
7869                 return (rc);
7870         }
7871 
7872         return (0);
7873 }
7874 
7875 
7876 /*
7877  * _ii_ab_tst_copy_bit
7878  *      Determine if a chunk needs to be copied during updates.
7879  *
7880  * Calling/Exit State:
7881  *      Returns 1 if the copy bit for the chunk is set
7882  *      Returns 0 if the copy bit for the chunk is not set
7883  *      Returns -1 if an error is encountered
7884  */
7885 
7886 static int
7887 _ii_ab_tst_copy_bit(_ii_info_t *ip, chunkid_t chunk)
7888 {
7889         int rc;
7890         nsc_off_t fba;
7891         nsc_buf_t *tmp = NULL;
7892 
7893         if (ip->bi_flags & DSW_BMPOFFLINE)
7894                 return (-1);
7895 
7896         fba = ip->bi_copyfba + chunk / (FBA_SIZE(1) * DSW_BITS);
7897         chunk %= FBA_SIZE(1) * DSW_BITS;
7898         II_READ_START(ip, bitmap);
7899         rc = nsc_alloc_buf(ip->bi_bmpfd, fba, 1, NSC_RDBUF, &tmp);
7900         II_READ_END(ip, bitmap, rc, 1);
7901         if (!II_SUCCESS(rc)) {
7902                 if (tmp)
7903                         (void) nsc_free_buf(tmp);
7904                 _ii_error(ip, DSW_BMPOFFLINE);
7905                 return (-1);
7906         }
7907         rc = DSW_BIT_ISSET(tmp->sb_vec->sv_addr[chunk/DSW_BITS],
7908             chunk%DSW_BITS);
7909         (void) nsc_free_buf(tmp);
7910 
7911         return (rc);
7912 }
7913 
7914 
7915 /*
7916  * _ii_ab_set_copy_bit
7917  *      Records that a chunk has been copied to the shadow device
7918  *
7919  *      Returns non-zero if an error is encountered
7920  *      Returns 0 if no error
7921  */
7922 
7923 static int
7924 _ii_ab_set_copy_bit(_ii_info_t *ip, chunkid_t chunk)
7925 {
7926         int rc;
7927         nsc_off_t fba;
7928         nsc_buf_t *tmp = NULL;
7929 
7930         if (ip->bi_flags & DSW_BMPOFFLINE)
7931                 return (EIO);
7932 
7933         fba = ip->bi_copyfba + chunk / (FBA_SIZE(1) * DSW_BITS);
7934         chunk %= FBA_SIZE(1) * DSW_BITS;
7935         II_READ_START(ip, bitmap);
7936         rc = nsc_alloc_buf(ip->bi_bmpfd, fba, 1, NSC_RDBUF|NSC_WRBUF, &tmp);
7937         II_READ_END(ip, bitmap, rc, 1);
7938         if (!II_SUCCESS(rc)) {
7939                 if (tmp)
7940                         (void) nsc_free_buf(tmp);
7941                 _ii_error(ip, DSW_BMPOFFLINE);
7942                 return (rc);
7943         }
7944         if (DSW_BIT_ISSET(tmp->sb_vec->sv_addr[chunk/DSW_BITS],
7945             chunk%DSW_BITS) == 0) {
7946                 DSW_BIT_SET(tmp->sb_vec->sv_addr[chunk/DSW_BITS],
7947                     chunk%DSW_BITS);
7948                 if ((ip->bi_state & DSW_CNTCPYBITS) == 0)
7949                         ip->bi_copybits++;
7950 
7951                 II_NSC_WRITE(ip, bitmap, rc, tmp, fba, 1, 0);
7952         }
7953         (void) nsc_free_buf(tmp);
7954         if (!II_SUCCESS(rc)) {
7955                 _ii_error(ip, DSW_BMPOFFLINE);
7956                 return (rc);
7957         }
7958 
7959         return (0);
7960 }
7961 
7962 
7963 /*
7964  * _ii_ab_clr_copy_bits
7965  *      Records that a chunk has been cleared on the shadow device, this
7966  *      function assumes that the bits to clear are all in the same fba,
7967  *      as is the case when they were generated by _ii_ab_next_copy_bit().
7968  *
7969  *      Returns non-zero if an error is encountered
7970  *      Returns 0 if no error
7971  */
7972 
7973 static int
7974 _ii_ab_clr_copy_bits(_ii_info_t *ip, chunkid_t chunk, int nchunks)
7975 {
7976         int rc;
7977         nsc_off_t fba;
7978         nsc_buf_t *tmp = NULL;
7979 
7980         if (ip->bi_flags & DSW_BMPOFFLINE)
7981                 return (EIO);
7982 
7983         fba = ip->bi_copyfba + chunk / (FBA_SIZE(1) * DSW_BITS);
7984         chunk %= FBA_SIZE(1) * DSW_BITS;
7985         II_READ_START(ip, bitmap);
7986         rc = nsc_alloc_buf(ip->bi_bmpfd, fba, 1, NSC_RDBUF|NSC_WRBUF, &tmp);
7987         II_READ_END(ip, bitmap, rc, 1);
7988         if (!II_SUCCESS(rc)) {
7989                 if (tmp)
7990                         (void) nsc_free_buf(tmp);
7991                 _ii_error(ip, DSW_BMPOFFLINE);
7992                 return (rc);
7993         }
7994         for (; nchunks-- > 0; chunk++) {
7995                 DSW_BIT_CLR(tmp->sb_vec->sv_addr[chunk/DSW_BITS],
7996                     chunk%DSW_BITS);
7997                 if (ip->bi_copybits > 0)
7998                         ip->bi_copybits--;
7999         }
8000 
8001         II_NSC_WRITE(ip, bitmap, rc, tmp, fba, 1, 0);
8002         (void) nsc_free_buf(tmp);
8003         if (!II_SUCCESS(rc)) {
8004                 _ii_error(ip, DSW_BMPOFFLINE);
8005                 return (rc);
8006         }
8007 
8008         return (0);
8009 }
8010 
8011 /*
8012  * _ii_ab_fill_copy_bmp
8013  *      Fills the copy bitmap with 1's.
8014  *
8015  *      Returns non-zero if an error is encountered
8016  *      Returns 0 if no error
8017  */
8018 
8019 static int
8020 _ii_ab_fill_copy_bmp(_ii_info_t *ip)
8021 {
8022         int rc;
8023         nsc_off_t fba;
8024         nsc_buf_t *tmp;
8025         unsigned char *p;
8026         int i, j;
8027 
8028         if (ip->bi_flags & DSW_BMPOFFLINE)
8029                 return (EIO);
8030 
8031         fba = ip->bi_copyfba;
8032         for (i = DSW_BM_FBA_LEN(ip->bi_size); i-- > 0; fba++) {
8033                 tmp = NULL;
8034                 rc = nsc_alloc_buf(ip->bi_bmpfd, fba, 1, NSC_WRBUF, &tmp);
8035                 if (!II_SUCCESS(rc)) {
8036                         if (tmp)
8037                                 (void) nsc_free_buf(tmp);
8038                         _ii_error(ip, DSW_BMPOFFLINE);
8039                         return (rc);
8040                 }
8041                 p = (unsigned char *)tmp->sb_vec->sv_addr;
8042                 for (j = FBA_SIZE(1); j-- > 0; p++)
8043                         *p = (unsigned char)0xff;
8044                 II_NSC_WRITE(ip, bitmap, rc, tmp, fba, 1, 0);
8045                 if (!II_SUCCESS(rc)) {
8046                         _ii_error(ip, DSW_BMPOFFLINE);
8047                         (void) nsc_free_buf(tmp);
8048                         return (rc);
8049                 }
8050                 (void) nsc_free_buf(tmp);
8051         }
8052 
8053         ip->bi_state |= (DSW_CNTSHDBITS|DSW_CNTCPYBITS);
8054 
8055         return (0);
8056 }
8057 
8058 /*
8059  * _ii_ab_load_bmp
8060  *      Load bitmap from persistent storage.
8061  */
8062 
8063 static int
8064 _ii_ab_load_bmp(_ii_info_t *ip, int flag)
8065 /* ARGSUSED */
8066 {
8067         if (ip->bi_flags & DSW_BMPOFFLINE)
8068                 return (EIO);
8069 
8070         ip->bi_state |= (DSW_CNTSHDBITS|DSW_CNTCPYBITS);
8071 
8072         return (0);
8073 }
8074 
8075 /*
8076  * _ii_ab_next_copy_bit
8077  *      Find next set copy bit.
8078  *
8079  * Returns the next bits set in the copy bitmap, with the corresponding chunks
8080  * locked. Used to avoid having to reread the same bit map block as each bit
8081  * is tested.
8082  */
8083 
8084 static chunkid_t
8085 _ii_ab_next_copy_bit(_ii_info_t *ip, chunkid_t startchunk, chunkid_t maxchunk,
8086         int wanted, int *got)
8087 {
8088         chunkid_t rc;
8089         nsc_off_t fba;
8090         chunkid_t chunk;
8091         int bits_per_fba = FBA_SIZE(1) * DSW_BITS;
8092         int high;
8093         chunkid_t nextchunk;
8094         nsc_buf_t *tmp = NULL;
8095 
8096         *got = 0;
8097 again:
8098         if (ip->bi_flags & DSW_BMPOFFLINE)
8099                 return (maxchunk + 1);
8100 
8101         while (startchunk < maxchunk) {
8102                 tmp = NULL;
8103                 fba = ip->bi_copyfba + startchunk / bits_per_fba;
8104                 chunk = startchunk % bits_per_fba;
8105                 II_READ_START(ip, bitmap);
8106                 rc = nsc_alloc_buf(ip->bi_bmpfd, fba, 1, NSC_RDBUF, &tmp);
8107                 II_READ_END(ip, bitmap, rc, 1);
8108                 if (!II_SUCCESS(rc)) {
8109                         if (tmp)
8110                                 (void) nsc_free_buf(tmp);
8111                         _ii_error(ip, DSW_BMPOFFLINE);
8112                         return (maxchunk + 1);
8113                 }
8114                 high = startchunk + bits_per_fba - startchunk%bits_per_fba;
8115                 if (high > maxchunk)
8116                         high = maxchunk;
8117                 for (; startchunk < high; chunk++, startchunk++) {
8118                         if (DSW_BIT_ISSET(tmp->sb_vec->sv_addr[chunk/DSW_BITS],
8119                             chunk%DSW_BITS)) {
8120                                 /*
8121                                  * trylock won't sleep so can use while
8122                                  * holding the buf.
8123                                  */
8124                                 if (!_ii_trylock_chunk(ip, startchunk)) {
8125                                         (void) nsc_free_buf(tmp);
8126                                         _ii_lock_chunk(ip, startchunk);
8127                                         if (_ii_ab_tst_copy_bit(ip, startchunk)
8128                                             != 1) {
8129                                                 /*
8130                                                  * another process copied this
8131                                                  * chunk while we were acquiring
8132                                                  * the chunk lock.
8133                                                  */
8134                                                 _ii_unlock_chunk(ip,
8135                                                     startchunk);
8136                                                 DTRACE_PROBE(
8137                                                     _ii_ab_next_copy_bit_again);
8138                                                 goto again;
8139                                         }
8140                                         *got = 1;
8141                                         DTRACE_PROBE(_ii_ab_next_copy_bit_end);
8142                                         return (startchunk);
8143                                 }
8144                                 *got = 1;
8145                                 nextchunk = startchunk + 1;
8146                                 chunk++;
8147                                 for (; --wanted > 0 && nextchunk < high;
8148                                     nextchunk++, chunk++) {
8149                                         if (!DSW_BIT_ISSET(tmp->sb_vec->sv_addr
8150                                             [chunk/DSW_BITS], chunk%DSW_BITS)) {
8151                                                 break;  /* end of bit run */
8152                                         }
8153                                         if (_ii_trylock_chunk(ip, nextchunk))
8154                                                 (*got)++;
8155                                         else
8156                                                 break;
8157                                 }
8158                                 (void) nsc_free_buf(tmp);
8159                                 DTRACE_PROBE(_ii_ab_next_copy_bit);
8160                                 return (startchunk);
8161                         }
8162                 }
8163                 (void) nsc_free_buf(tmp);
8164         }
8165 
8166         return (maxchunk + 1);
8167 }
8168 
8169 /*
8170  * _ii_ab_save_bmp
8171  *      Save bitmap to persistent storage.
8172  */
8173 
8174 static int
8175 _ii_ab_save_bmp(_ii_info_t *ip, int flag)
8176 /* ARGSUSED */
8177 {
8178         if (ip->bi_flags & DSW_BMPOFFLINE)
8179                 return (EIO);
8180 
8181         return (0);
8182 }
8183 
8184 /*
8185  * _ii_ab_change_bmp
8186  *      copy change bitmap to memory
8187  */
8188 
8189 static int
8190 _ii_ab_change_bmp(_ii_info_t *ip, unsigned char *ptr)
8191 /* ARGSUSED */
8192 {
8193         int     bm_size;
8194         int     i, j, fba;
8195         int     rc;
8196         unsigned char *p;
8197         nsc_buf_t *tmp = NULL;
8198 
8199         if (ip->bi_flags & DSW_BMPOFFLINE)
8200                 return (EIO);
8201         bm_size = FBA_SIZE(DSW_BM_FBA_LEN(ip->bi_size));
8202 
8203         rc = _ii_nsc_io(ip, KS_BMP, ip->bi_bmpfd, NSC_RDBUF, ip->bi_shdfba,
8204             ptr, bm_size);
8205         if (!II_SUCCESS(rc)) {
8206                 _ii_error(ip, DSW_BMPOFFLINE);
8207                 return (rc);
8208         }
8209 
8210         fba = ip->bi_copyfba;
8211         for (i = DSW_BM_FBA_LEN(ip->bi_size); i-- > 0; fba++) {
8212                 tmp = NULL;
8213                 II_READ_START(ip, bitmap);
8214                 rc = nsc_alloc_buf(ip->bi_bmpfd, fba, 1, NSC_RDBUF, &tmp);
8215                 II_READ_END(ip, bitmap, rc, 1);
8216                 if (!II_SUCCESS(rc)) {
8217                         if (tmp)
8218                                 (void) nsc_free_buf(tmp);
8219                         _ii_error(ip, DSW_BMPOFFLINE);
8220                         return (rc);
8221                 }
8222                 p = (unsigned char *)tmp->sb_vec->sv_addr;
8223                 for (j = FBA_SIZE(1); j-- > 0; p++)
8224                         *ptr |= *p;
8225                 (void) nsc_free_buf(tmp);
8226         }
8227 
8228         return (0);
8229 }
8230 
8231 /*
8232  * Count bits set in the bit map.
8233  */
8234 static int
8235 _ii_ab_cnt_bits(_ii_info_t *ip, nsc_off_t bm_offset, nsc_size_t *counter,
8236 int bm_size)
8237 {
8238         nsc_size_t      last_fba;
8239         nsc_buf_t *tmp;
8240         nsc_vec_t *sd_vecp;
8241         nsc_off_t       fba_pos;
8242         int     buf_fba_len;
8243         int     buf_byte_len;
8244         int     co_len;
8245         int     i;
8246         unsigned int j, k;
8247         unsigned char *cp;
8248         int     rc;
8249 
8250         *counter = 0;
8251         if (ip->bi_flags & DSW_BMPOFFLINE)
8252                 return (EIO);
8253 
8254         last_fba = bm_offset + DSW_BM_FBA_LEN(ip->bi_size);
8255 
8256         for (fba_pos = bm_offset; fba_pos < last_fba && bm_size > 0;
8257             fba_pos += DSW_CBLK_FBA) {
8258                 tmp = NULL;
8259                 buf_fba_len = fba_pos + DSW_CBLK_FBA < last_fba ?
8260                     DSW_CBLK_FBA : last_fba - fba_pos;
8261                 II_READ_START(ip, bitmap);
8262                 rc = nsc_alloc_buf(ip->bi_bmpfd, fba_pos, buf_fba_len,
8263                     NSC_RDBUF, &tmp);
8264                 II_READ_END(ip, bitmap, rc, 1);
8265                 if (!II_SUCCESS(rc)) {
8266                         if (tmp)
8267                                 (void) nsc_free_buf(tmp);
8268 
8269                         _ii_error(ip, DSW_BMPOFFLINE);
8270                         return (EIO);
8271                 }
8272 
8273                 /* count each sd_vec's worth of data */
8274                 buf_byte_len = FBA_SIZE(buf_fba_len);
8275                 for (sd_vecp = tmp->sb_vec;
8276                     buf_byte_len > 0 && bm_size > 0;
8277                     sd_vecp++) {
8278                         co_len = (bm_size > sd_vecp->sv_len) ?
8279                             sd_vecp->sv_len : bm_size;
8280                         cp = sd_vecp->sv_addr;
8281                         for (i = k = 0; i < co_len; i++)
8282                                 for (j = (unsigned)*cp++; j; j &= j - 1)
8283                                         k++;
8284                         *counter += k;
8285                         bm_size -= co_len;
8286                         buf_byte_len -= co_len;
8287                 }
8288 
8289 
8290                 (void) nsc_free_buf(tmp);
8291         }
8292 
8293         return (0);
8294 }
8295 
8296 /*
8297  * OR the bitmaps as part of a join operation
8298  */
8299 static int
8300 _ii_ab_join_bmp(_ii_info_t *dest_ip, _ii_info_t *src_ip)
8301 {
8302         int rc;
8303         nsc_size_t len;
8304         nsc_size_t size;
8305         nsc_buf_t *dest_tmp, *src_tmp;
8306         nsc_off_t src_fba_pos;
8307 
8308         if ((src_ip->bi_flags & DSW_BMPOFFLINE) ||
8309             (dest_ip->bi_flags & DSW_BMPOFFLINE))
8310                 return (EIO);
8311 
8312         size = DSW_BM_FBA_LEN(src_ip->bi_size) + src_ip->bi_shdfba;
8313         for (src_fba_pos = src_ip->bi_shdfba; src_fba_pos < size;
8314             src_fba_pos += DSW_CBLK_FBA) {
8315                 src_tmp = NULL;
8316                 len = src_fba_pos + DSW_CBLK_FBA < size ?
8317                     DSW_CBLK_FBA : size - src_fba_pos;
8318                 II_READ_START(src_ip, bitmap);
8319                 rc = nsc_alloc_buf(src_ip->bi_bmpfd, src_fba_pos, len,
8320                     NSC_RDWRBUF, &src_tmp);
8321                 II_READ_END(src_ip, bitmap, rc, len);
8322                 if (!II_SUCCESS(rc)) {
8323                         if (src_tmp)
8324                                 (void) nsc_free_buf(src_tmp);
8325 
8326                         _ii_error(src_ip, DSW_BMPOFFLINE);
8327                         return (rc);
8328                 }
8329 
8330                 dest_tmp = NULL;
8331                 II_READ_START(dest_ip, bitmap);
8332                 rc = nsc_alloc_buf(dest_ip->bi_bmpfd, src_fba_pos, len,
8333                     NSC_RDWRBUF, &dest_tmp);
8334                 II_READ_END(dest_ip, bitmap, rc, len);
8335                 if (!II_SUCCESS(rc)) {
8336                         (void) nsc_free_buf(src_tmp);
8337                         if (dest_tmp)
8338                                 (void) nsc_free_buf(dest_tmp);
8339 
8340                         _ii_error(dest_ip, DSW_BMPOFFLINE);
8341                         return (rc);
8342                 }
8343                 rc = _ii_nsc_or(src_tmp, dest_tmp, src_fba_pos, src_fba_pos,
8344                     len);
8345                 if (II_SUCCESS(rc)) {
8346                         II_NSC_WRITE(dest_ip, bitmap, rc, dest_tmp,
8347                             src_fba_pos, len, 0);
8348                 }
8349 
8350                 (void) nsc_free_buf(src_tmp);
8351                 (void) nsc_free_buf(dest_tmp);
8352                 if (!II_SUCCESS(rc)) {
8353                         _ii_error(dest_ip, DSW_BMPOFFLINE);
8354                         return (rc);
8355                 }
8356         }
8357 
8358         dest_ip->bi_state |= (DSW_CNTSHDBITS|DSW_CNTCPYBITS);
8359 
8360         return (0);
8361 
8362 }
8363 
8364 static _ii_bmp_ops_t alloc_buf_bmp = {
8365         _ii_ab_co_bmp,
8366         _ii_ab_ci_bmp,
8367         _ii_ab_zerobm,
8368         _ii_ab_copybm,
8369         _ii_ab_orbm,
8370         _ii_ab_tst_shd_bit,
8371         _ii_ab_set_shd_bit,
8372         _ii_ab_tst_copy_bit,
8373         _ii_ab_set_copy_bit,
8374         _ii_ab_clr_copy_bits,
8375         _ii_ab_next_copy_bit,
8376         _ii_ab_fill_copy_bmp,
8377         _ii_ab_load_bmp,
8378         _ii_ab_save_bmp,
8379         _ii_ab_change_bmp,
8380         _ii_ab_cnt_bits,
8381         _ii_ab_join_bmp
8382 };
8383 
8384 
8385 /*
8386  * Copyout the bit map.
8387  */
8388 static int
8389 _ii_km_co_bmp(_ii_info_t *ip, nsc_off_t bm_offset, unsigned char *user_bm,
8390     int user_bm_size)
8391 {
8392         int     start_offset;
8393         int     bm_size;
8394         size_t  co_len;
8395         nsc_off_t       last_fba;
8396 
8397         /* First calculate the size of the shadow and copy bitmaps */
8398         co_len = DSW_BM_FBA_LEN(ip->bi_size);
8399         ASSERT((ip->bi_copyfba - ip->bi_shdfba) == co_len);
8400 
8401         /* Are we in the ranges of the various bitmaps/indexes? */
8402         if (bm_offset < ip->bi_shdfba)
8403                 return (EIO);
8404         else if (bm_offset < (last_fba = ip->bi_shdfba + co_len))
8405                 /*EMPTY*/;
8406         else if (bm_offset < (last_fba = ip->bi_copyfba + co_len))
8407                 /*EMPTY*/;
8408         else if ((ip->bi_flags & DSW_TREEMAP) &&
8409             (bm_offset < (last_fba = last_fba + (co_len * 32))))
8410                 /*EMPTY*/;
8411         else return (EIO);
8412 
8413         if (FBA_LEN(user_bm_size) > last_fba - bm_offset)
8414                 return (EIO);
8415 
8416         start_offset = FBA_SIZE(bm_offset);
8417         bm_size = FBA_SIZE(last_fba);
8418 
8419         co_len = (user_bm_size > bm_size) ? bm_size : user_bm_size;
8420         if (copyout(ip->bi_bitmap + start_offset, user_bm, co_len))
8421                 return (EFAULT);
8422 
8423         return (0);
8424 }
8425 
8426 /*
8427  * Copyin a bit map and or with differences bitmap.
8428  */
8429 static int
8430 _ii_km_ci_bmp(_ii_info_t *ip, nsc_off_t bm_offset, unsigned char *user_bm,
8431     int user_bm_size)
8432 {
8433         unsigned char *tmp_buf;
8434         unsigned char *dest;
8435         unsigned char *p;
8436         size_t  tmp_size;
8437         int     n;
8438         int     start_offset;
8439         int     bm_size;
8440         size_t  ci_len;
8441         int     rc = 0;
8442 
8443         start_offset = FBA_SIZE(bm_offset);
8444         bm_size = FBA_SIZE(DSW_BM_FBA_LEN(ip->bi_size));
8445 
8446         tmp_buf = NULL;
8447         tmp_size = FBA_SIZE(1);
8448 
8449         tmp_buf = kmem_alloc(tmp_size, KM_SLEEP);
8450         start_offset = FBA_SIZE(bm_offset);
8451         dest = ip->bi_bitmap + start_offset;
8452         bm_size = FBA_SIZE(DSW_BM_FBA_LEN(ip->bi_size));
8453 
8454         ci_len = (user_bm_size > bm_size) ? bm_size : user_bm_size;
8455         while (ci_len > 0) {
8456                 n = (tmp_size > ci_len) ? ci_len : tmp_size;
8457                 if (copyin(user_bm, tmp_buf, n)) {
8458                         rc = EFAULT;
8459                         break;
8460                 }
8461                 user_bm += n;
8462                 for (p = tmp_buf; n--> 0; ci_len--)
8463                         *dest++ |= *p++;
8464         }
8465         if (tmp_buf)
8466                 kmem_free(tmp_buf, tmp_size);
8467 
8468         ip->bi_state |= (DSW_CNTSHDBITS|DSW_CNTCPYBITS);
8469 
8470         return (rc);
8471 }
8472 
8473 /*
8474  * Completely zero the bit map.
8475  */
8476 static int
8477 _ii_km_zerobm(_ii_info_t *ip)
8478 {
8479         int start_offset = FBA_SIZE(ip->bi_shdfba);
8480         int len;
8481 
8482         len = FBA_SIZE(ip->bi_copyfba - ip->bi_shdfba);
8483         mutex_enter(&ip->bi_bmpmutex);
8484         bzero(ip->bi_bitmap+start_offset, len);
8485         mutex_exit(&ip->bi_bmpmutex);
8486 
8487         ip->bi_state |= (DSW_CNTSHDBITS|DSW_CNTCPYBITS);
8488 
8489         return (0);
8490 }
8491 
8492 
8493 /*
8494  * Copy shadow bitmap to copy bitmap
8495  */
8496 static int
8497 _ii_km_copybm(_ii_info_t *ip)
8498 {
8499         int copy_offset, shd_offset;
8500         int len;
8501 
8502         len = FBA_SIZE(ip->bi_copyfba - ip->bi_shdfba);
8503         shd_offset = FBA_SIZE(ip->bi_shdfba);
8504         copy_offset = FBA_SIZE(ip->bi_copyfba);
8505         mutex_enter(&ip->bi_bmpmutex);
8506         bcopy(ip->bi_bitmap+shd_offset, ip->bi_bitmap+copy_offset, len);
8507         mutex_exit(&ip->bi_bmpmutex);
8508 
8509         ip->bi_state |= (DSW_CNTSHDBITS|DSW_CNTCPYBITS);
8510 
8511         return (0);
8512 }
8513 
8514 
8515 /*
8516  * Or the shadow bitmap in to the copy bitmap, clear the
8517  * shadow bitmap.
8518  */
8519 static int
8520 _ii_km_orbm(_ii_info_t *ip)
8521 {
8522         unsigned char *copy, *shd;
8523         int copy_offset, shd_offset;
8524         int len;
8525 
8526         len = FBA_SIZE(ip->bi_copyfba - ip->bi_shdfba);
8527         shd_offset = FBA_SIZE(ip->bi_shdfba);
8528         copy_offset = FBA_SIZE(ip->bi_copyfba);
8529         shd = ip->bi_bitmap + shd_offset;
8530         copy = ip->bi_bitmap + copy_offset;
8531 
8532         mutex_enter(&ip->bi_bmpmutex);
8533         while (len-- > 0)
8534                 *copy++ |= *shd++;
8535         mutex_exit(&ip->bi_bmpmutex);
8536 
8537         ip->bi_state |= (DSW_CNTSHDBITS|DSW_CNTCPYBITS);
8538 
8539         return (0);
8540 }
8541 
8542 /*
8543  * _ii_km_tst_shd_bit
8544  *      Determine if a chunk has been copied to the shadow device
8545  *
8546  * Calling/Exit State:
8547  *      Returns 1 if the modified bit has been set for the shadow device,
8548  *      otherwise returns 0.
8549  */
8550 
8551 static int
8552 _ii_km_tst_shd_bit(_ii_info_t *ip, chunkid_t chunk)
8553 {
8554         unsigned char *bmp;
8555         int bmp_offset;
8556         int rc;
8557 
8558         bmp_offset = FBA_SIZE(ip->bi_shdfba);
8559         bmp = ip->bi_bitmap + bmp_offset;
8560 
8561         mutex_enter(&ip->bi_bmpmutex);
8562         rc = DSW_BIT_ISSET(bmp[chunk/DSW_BITS], chunk%DSW_BITS);
8563         mutex_exit(&ip->bi_bmpmutex);
8564 
8565         return (rc);
8566 }
8567 
8568 
8569 /*
8570  * _ii_km_set_shd_bit
8571  *      Records that a chunk has been copied to the shadow device
8572  */
8573 
8574 static int
8575 _ii_km_set_shd_bit(_ii_info_t *ip, chunkid_t chunk)
8576 {
8577         unsigned char *bmp;
8578         int bmp_offset;
8579 
8580         bmp_offset = FBA_SIZE(ip->bi_shdfba);
8581         bmp = ip->bi_bitmap + bmp_offset;
8582 
8583         mutex_enter(&ip->bi_bmpmutex);
8584         if (DSW_BIT_ISSET(bmp[chunk/DSW_BITS], chunk%DSW_BITS) == 0) {
8585                 DSW_BIT_SET(bmp[chunk/DSW_BITS], chunk%DSW_BITS);
8586                 if ((ip->bi_state & DSW_CNTSHDBITS) == 0)
8587                         ip->bi_shdbits++;
8588         }
8589         mutex_exit(&ip->bi_bmpmutex);
8590 
8591         return (0);
8592 }
8593 
8594 /*
8595  * _ii_km_tst_copy_bit
8596  *      Determine if a chunk needs to be copied during updates.
8597  *
8598  * Calling/Exit State:
8599  *      Returns 1 if the copy bit for the chunk is set,
8600  *      otherwise returns 0
8601  */
8602 
8603 static int
8604 _ii_km_tst_copy_bit(_ii_info_t *ip, chunkid_t chunk)
8605 {
8606         unsigned char *bmp;
8607         int bmp_offset;
8608         int rc;
8609 
8610         bmp_offset = FBA_SIZE(ip->bi_copyfba);
8611         bmp = ip->bi_bitmap + bmp_offset;
8612 
8613         mutex_enter(&ip->bi_bmpmutex);
8614         rc = DSW_BIT_ISSET(bmp[chunk/DSW_BITS], chunk%DSW_BITS);
8615         mutex_exit(&ip->bi_bmpmutex);
8616 
8617         return (rc);
8618 }
8619 
8620 
8621 /*
8622  * _ii_km_set_copy_bit
8623  *      Records that a chunk has been copied to the shadow device
8624  */
8625 
8626 static int
8627 _ii_km_set_copy_bit(_ii_info_t *ip, chunkid_t chunk)
8628 {
8629         unsigned char *bmp;
8630         int bmp_offset;
8631 
8632         bmp_offset = FBA_SIZE(ip->bi_copyfba);
8633         bmp = ip->bi_bitmap + bmp_offset;
8634 
8635         mutex_enter(&ip->bi_bmpmutex);
8636         if (DSW_BIT_ISSET(bmp[chunk/DSW_BITS], chunk%DSW_BITS) == 0) {
8637                 DSW_BIT_SET(bmp[chunk/DSW_BITS], chunk%DSW_BITS);
8638                 if ((ip->bi_state & DSW_CNTCPYBITS) == 0)
8639                         ip->bi_copybits++;
8640         }
8641         mutex_exit(&ip->bi_bmpmutex);
8642 
8643         return (0);
8644 }
8645 
8646 
8647 /*
8648  * _ii_km_clr_copy_bits
8649  *      Records that a chunk has been cleared on the shadow device
8650  */
8651 
8652 static int
8653 _ii_km_clr_copy_bits(_ii_info_t *ip, chunkid_t chunk, int nchunks)
8654 {
8655         unsigned char *bmp;
8656         int bmp_offset;
8657 
8658         bmp_offset = FBA_SIZE(ip->bi_copyfba);
8659         bmp = ip->bi_bitmap + bmp_offset;
8660 
8661         mutex_enter(&ip->bi_bmpmutex);
8662         for (; nchunks-- > 0; chunk++) {
8663                 DSW_BIT_CLR(bmp[chunk/DSW_BITS], chunk%DSW_BITS);
8664                 if (ip->bi_copybits > 0)
8665                         ip->bi_copybits--;
8666         }
8667         mutex_exit(&ip->bi_bmpmutex);
8668 
8669         return (0);
8670 }
8671 
8672 /*
8673  * _ii_km_fill_copy_bmp
8674  *      Fills the copy bitmap with 1's.
8675  */
8676 
8677 static int
8678 _ii_km_fill_copy_bmp(_ii_info_t *ip)
8679 {
8680         int len;
8681         unsigned char *bmp;
8682         int bmp_offset;
8683 
8684         bmp_offset = FBA_SIZE(ip->bi_copyfba);
8685         bmp = ip->bi_bitmap + bmp_offset;
8686 
8687         len = FBA_SIZE(ip->bi_copyfba - ip->bi_shdfba);
8688 
8689         mutex_enter(&ip->bi_bmpmutex);
8690         while (len-- > 0)
8691                 *bmp++ = (unsigned char)0xff;
8692         mutex_exit(&ip->bi_bmpmutex);
8693 
8694         ip->bi_state |= (DSW_CNTSHDBITS|DSW_CNTCPYBITS);
8695 
8696         return (0);
8697 }
8698 
8699 /*
8700  * _ii_km_load_bmp
8701  *      Load bitmap from persistent storage.
8702  */
8703 
8704 static int
8705 _ii_km_load_bmp(_ii_info_t *ip, int flag)
8706 {
8707         nsc_off_t bmp_offset;
8708         nsc_size_t bitmap_size;
8709         int rc;
8710 
8711         if (ip->bi_flags & DSW_BMPOFFLINE)
8712                 return (EIO);
8713 
8714         if (ip->bi_bitmap == NULL) {
8715                 bitmap_size = FBA_SIZE(2 * (ip->bi_copyfba - ip->bi_shdfba) +
8716                     ip->bi_shdfba);
8717                 ip->bi_bitmap = nsc_kmem_zalloc(bitmap_size, KM_SLEEP,
8718                     _ii_local_mem);
8719         }
8720         if (flag)
8721                 return (0);             /* just create an empty bitmap */
8722         bmp_offset = FBA_SIZE(ip->bi_shdfba);
8723         rc = _ii_nsc_io(ip, KS_BMP, ip->bi_bmpfd, NSC_RDBUF, ip->bi_shdfba,
8724             ip->bi_bitmap + bmp_offset,
8725             2 * FBA_SIZE(ip->bi_copyfba - ip->bi_shdfba));
8726         if (!II_SUCCESS(rc))
8727                 _ii_error(ip, DSW_BMPOFFLINE);
8728 
8729         ip->bi_state |= (DSW_CNTSHDBITS|DSW_CNTCPYBITS);
8730 
8731         return (rc);
8732 }
8733 
8734 /*
8735  * _ii_km_save_bmp
8736  *      Save bitmap to persistent storage.
8737  */
8738 
8739 static int
8740 _ii_km_save_bmp(_ii_info_t *ip, int flag)
8741 {
8742         int bmp_offset;
8743         int bitmap_size;
8744         int rc;
8745 
8746         bmp_offset = FBA_SIZE(ip->bi_shdfba);
8747         if (ip->bi_flags & DSW_BMPOFFLINE)
8748                 rc = EIO;
8749         else {
8750                 rc = _ii_nsc_io(ip, KS_BMP, ip->bi_bmpfd, NSC_WRBUF,
8751                     ip->bi_shdfba, ip->bi_bitmap + bmp_offset,
8752                     2 * FBA_SIZE(ip->bi_copyfba - ip->bi_shdfba));
8753                 if (!II_SUCCESS(rc))
8754                         _ii_error(ip, DSW_BMPOFFLINE);
8755         }
8756 
8757         if (flag && ip->bi_bitmap) {         /* dispose of bitmap memory */
8758                 bitmap_size = FBA_SIZE(2 * (ip->bi_copyfba - ip->bi_shdfba) +
8759                     ip->bi_shdfba);
8760                 nsc_kmem_free(ip->bi_bitmap, bitmap_size);
8761                 ip->bi_bitmap = NULL;
8762         }
8763 
8764         return (rc);
8765 }
8766 
8767 /*
8768  * _ii_km_next_copy_bit
8769  *      Find next set copy bit.
8770  *
8771  * Returns the next bits set in the copy bitmap, with the corresponding chunks
8772  * locked. Used to cut down on the number of times the bmpmutex is acquired.
8773  */
8774 
8775 static chunkid_t
8776 _ii_km_next_copy_bit(_ii_info_t *ip, chunkid_t chunk, chunkid_t maxchunk,
8777         int want, int *got)
8778 {
8779         unsigned char *bmp;
8780         int bmp_offset;
8781         int nextchunk;
8782 
8783         *got = 0;
8784         bmp_offset = FBA_SIZE(ip->bi_copyfba);
8785         bmp = ip->bi_bitmap + bmp_offset;
8786 
8787         mutex_enter(&ip->bi_bmpmutex);
8788         for (; chunk < maxchunk; chunk++) {
8789                 if (DSW_BIT_ISSET(bmp[chunk/DSW_BITS], chunk%DSW_BITS)) {
8790                         /*
8791                          * trylock won't sleep so can use while
8792                          * holding bi_bmpmutex.
8793                          */
8794                         if (!_ii_trylock_chunk(ip, chunk)) {
8795                                 mutex_exit(&ip->bi_bmpmutex);
8796                                 _ii_lock_chunk(ip, chunk);
8797                                 *got = 1;
8798 
8799                                 DTRACE_PROBE(_ii_km_next_copy_bit);
8800 
8801                                 return (chunk);
8802                         }
8803                         *got = 1;
8804                         for (nextchunk = chunk + 1;
8805                             *got < want && nextchunk < maxchunk; nextchunk++) {
8806                                 if (!DSW_BIT_ISSET(bmp[nextchunk/DSW_BITS],
8807                                     nextchunk%DSW_BITS))
8808                                         break;
8809                                 if (_ii_trylock_chunk(ip, nextchunk))
8810                                         (*got)++;
8811                                 else
8812                                         break;
8813                         }
8814                         mutex_exit(&ip->bi_bmpmutex);
8815 
8816                         DTRACE_PROBE(_ii_km_next_copy_bit);
8817                         return (chunk);
8818                 }
8819         }
8820         mutex_exit(&ip->bi_bmpmutex);
8821 
8822         return (maxchunk + 1);
8823 }
8824 
8825 /*
8826  * _ii_km_change_bmp
8827  *      copy change bitmap to memory
8828  */
8829 
8830 static int
8831 _ii_km_change_bmp(_ii_info_t *ip, unsigned char *ptr)
8832 /* ARGSUSED */
8833 {
8834         int     start_offset;
8835         int     bm_size;
8836         unsigned char *q;
8837 
8838         bm_size = FBA_SIZE(DSW_BM_FBA_LEN(ip->bi_size));
8839 
8840         start_offset = FBA_SIZE(ip->bi_shdfba);
8841         bcopy(ip->bi_bitmap + start_offset, ptr, bm_size);
8842 
8843         start_offset = FBA_SIZE(ip->bi_copyfba);
8844         q = ip->bi_bitmap + start_offset;
8845         while (bm_size-- > 0)
8846                 *ptr |= *q;
8847 
8848         return (0);
8849 }
8850 
8851 /*
8852  * Count bits set in the bit map.
8853  */
8854 static int
8855 _ii_km_cnt_bits(_ii_info_t *ip, nsc_off_t bm_offset, nsc_size_t *counter,
8856     int bm_size)
8857 {
8858         int     start_offset;
8859         int     i;
8860         nsc_size_t j, k;
8861         unsigned char *cp;
8862 
8863         start_offset = FBA_SIZE(bm_offset);
8864 
8865         cp = ip->bi_bitmap + start_offset;
8866         for (i = k = 0; i < bm_size; i++)
8867                 for (j = (unsigned)*cp++; j; j &= j - 1)
8868                         k++;
8869         *counter = k;
8870 
8871         return (0);
8872 }
8873 
8874 /*
8875  * Or the shadow bitmap in to the copy bitmap, clear the
8876  * shadow bitmap.
8877  */
8878 static int
8879 _ii_km_join_bmp(_ii_info_t *dest_ip, _ii_info_t *src_ip)
8880 {
8881         uchar_t *dest, *src;
8882         nsc_size_t bm_size;
8883 
8884         dest = dest_ip->bi_bitmap + FBA_SIZE(dest_ip->bi_shdfba);
8885         src = src_ip->bi_bitmap + FBA_SIZE(src_ip->bi_shdfba);
8886         bm_size = FBA_SIZE(DSW_BM_FBA_LEN(dest_ip->bi_size));
8887 
8888         while (bm_size-- > 0)
8889                 *dest++ |= *src++;
8890 
8891         dest_ip->bi_state |= (DSW_CNTSHDBITS|DSW_CNTCPYBITS);
8892 
8893         return (0);
8894 }
8895 
8896 static _ii_bmp_ops_t kmem_buf_bmp = {
8897         _ii_km_co_bmp,
8898         _ii_km_ci_bmp,
8899         _ii_km_zerobm,
8900         _ii_km_copybm,
8901         _ii_km_orbm,
8902         _ii_km_tst_shd_bit,
8903         _ii_km_set_shd_bit,
8904         _ii_km_tst_copy_bit,
8905         _ii_km_set_copy_bit,
8906         _ii_km_clr_copy_bits,
8907         _ii_km_next_copy_bit,
8908         _ii_km_fill_copy_bmp,
8909         _ii_km_load_bmp,
8910         _ii_km_save_bmp,
8911         _ii_km_change_bmp,
8912         _ii_km_cnt_bits,
8913         _ii_km_join_bmp
8914 };
8915 
8916 
8917 static int
8918 ii_read_volume(_ii_info_t *ip, int mst_src, nsc_buf_t *srcbuf,
8919         nsc_buf_t *dstbuf, chunkid_t chunk_num, nsc_off_t fba, nsc_size_t len)
8920 {
8921         int rc;
8922         nsc_buf_t *tmp;
8923         nsc_off_t mapped_fba;
8924         chunkid_t mapped_chunk;
8925         int overflow;
8926 
8927         if (mst_src || (ip->bi_flags&DSW_TREEMAP) == 0) {
8928                 /* simple read with optional copy */
8929                 if (mst_src) {
8930                         II_NSC_READ(ip, master, rc, srcbuf, fba, len, 0);
8931                 } else {
8932                         II_NSC_READ(ip, shadow, rc, srcbuf, fba, len, 0);
8933                 }
8934                 if (dstbuf && II_SUCCESS(rc)) {
8935                         rc = nsc_copy(srcbuf, dstbuf, fba, fba, len);
8936                 }
8937 
8938                 return (rc);
8939         }
8940         /* read from mapped shadow into final buffer */
8941         mapped_chunk = ii_tsearch(ip, chunk_num);
8942         if (mapped_chunk == II_NULLNODE)
8943                 return (EIO);
8944         overflow = II_ISOVERFLOW(mapped_chunk);
8945         if (overflow)
8946                 mapped_chunk = II_2OVERFLOW(mapped_chunk);
8947         /* convert chunk number from tsearch into final fba */
8948         mapped_fba = DSW_CHK2FBA(mapped_chunk) + (fba % DSW_SIZE);
8949         tmp = NULL;
8950         if (overflow) {
8951                 (void) nsc_reserve(OVRFD(ip), NSC_MULTI);
8952                 II_READ_START(ip, overflow);
8953                 rc = nsc_alloc_buf(OVRFD(ip), mapped_fba, len, NSC_RDBUF, &tmp);
8954                 II_READ_END(ip, overflow, rc, len);
8955         } else {
8956                 II_READ_START(ip, shadow);
8957                 rc = nsc_alloc_buf(SHDFD(ip), mapped_fba, len, NSC_RDBUF, &tmp);
8958                 II_READ_END(ip, shadow, rc, len);
8959         }
8960         if (II_SUCCESS(rc)) {
8961                 if (dstbuf == NULL)
8962                         dstbuf = srcbuf;
8963                 rc = nsc_copy(tmp, dstbuf, mapped_fba, fba, len);
8964                 (void) nsc_free_buf(tmp);
8965         }
8966         if (overflow)
8967                 nsc_release(OVRFD(ip));
8968 
8969         return (rc);
8970 }
8971 
8972 /*
8973  * _ii_fill_buf
8974  *      Read data from the required device
8975  *
8976  * Calling/Exit State:
8977  *      Returns 0 if the data was read successfully, otherwise
8978  *      error code.
8979  *
8980  * Description:
8981  *      Reads the data from fba_pos for length fba_len from the
8982  *      required device. This data may be a mix of data from the master
8983  *      device and the shadow device, depending on the state of the
8984  *      bitmaps.
8985  */
8986 
8987 static int
8988 _ii_fill_buf(ii_fd_t *bfd, nsc_off_t fba_pos, nsc_size_t fba_len, int flag,
8989     nsc_buf_t **handle, nsc_buf_t **handle2)
8990 {
8991         _ii_info_t *ip = bfd->ii_info;
8992         _ii_info_t *xip;
8993         int second_shd = 0;
8994         nsc_off_t temp_fba;
8995         nsc_size_t temp_len;
8996         nsc_size_t bmp_len;
8997         chunkid_t chunk_num;
8998         int rc;
8999         int fill_from_pair;
9000         int rtype = SHDR|BMP;
9001         nsc_buf_t *second_buf = NULL;
9002 
9003         if (flag&NSC_RDAHEAD)
9004                 return (NSC_DONE);
9005 
9006         chunk_num = fba_pos / DSW_SIZE;
9007         temp_fba = fba_pos;
9008         temp_len = fba_len;
9009 
9010         /*
9011          * If the master is being updated from a shadow we need to fill from
9012          * the correct shadow volume.
9013          */
9014         if (NSHADOWS(ip) && bfd->ii_shd == 0) {
9015                 for (xip = ip->bi_head; xip; xip = xip->bi_sibling) {
9016                         if (xip == ip)
9017                                 continue;
9018                         if (xip->bi_flags &DSW_COPYINGS) {
9019                                 second_shd = 1;
9020                                 ip = xip;
9021                                 if ((rc = _ii_rsrv_devs(ip, rtype,
9022                                     II_INTERNAL)) != 0)
9023                                         return (EIO);
9024                                 rc = nsc_alloc_buf(SHDFD(ip), fba_pos, fba_len,
9025                                     (flag&NSC_RDAHEAD)|NSC_MIXED, &second_buf);
9026                                 if (!II_SUCCESS(rc)) {
9027                                         rc = EIO;
9028                                         goto out;
9029                                 }
9030                                 handle2 = &second_buf;
9031                                 break;
9032                         }
9033                 }
9034         }
9035 
9036         while (temp_len > 0) {
9037                 if ((temp_fba + temp_len) > DSW_CHK2FBA(chunk_num + 1)) {
9038                         bmp_len = DSW_CHK2FBA(chunk_num + 1) - temp_fba;
9039                         temp_len -= bmp_len;
9040                 } else {
9041                         bmp_len = temp_len;
9042                         temp_len = 0;
9043                 }
9044 
9045                 fill_from_pair = 0;
9046 
9047                 if ((ip->bi_flags & DSW_COPYINGM) == DSW_COPYINGM) {
9048                         rc = II_TST_COPY_BIT(ip, chunk_num);
9049                         /* Treat a failed bitmap volume as a clear bit */
9050                         if (rc > 0) {
9051                                 /* Copy bit set */
9052                                 if (bfd->ii_shd) {
9053                                         if (*handle2)
9054                                                 fill_from_pair = 1;
9055                                         else {
9056                                                 rc = EIO;
9057                                                 goto out;
9058                                         }
9059                                 }
9060                         }
9061                 }
9062                 if ((ip->bi_flags & DSW_COPYINGS) == DSW_COPYINGS) {
9063                         rc = II_TST_COPY_BIT(ip, chunk_num);
9064                         /* Treat a failed bitmap volume as a clear bit */
9065                         if (rc > 0) {
9066                                 /* Copy bit set */
9067                                 if (bfd->ii_shd == 0) {
9068                                         if (*handle2 ||
9069                                             (ip->bi_flags&DSW_TREEMAP))
9070                                                 fill_from_pair = 1;
9071                                         else {
9072                                                 rc = EIO;
9073                                                 goto out;
9074                                         }
9075                                 }
9076                         }
9077                 }
9078                 if (((ip->bi_flags & DSW_GOLDEN) == 0) && bfd->ii_shd) {
9079                         /* Dependent shadow read */
9080 
9081                         rc = II_TST_SHD_BIT(ip, chunk_num);
9082                         if (rc < 0) {
9083                                 rc = EIO;
9084                                 goto out;
9085                         }
9086                         if (rc == 0) {
9087                                 /* Shadow bit clear */
9088                                 if (*handle2)
9089                                         fill_from_pair = 1;
9090                                 else {
9091                                         rc = EIO;
9092                                         goto out;
9093                                 }
9094                         }
9095                 }
9096 
9097                 if (fill_from_pair) {
9098                         /* it matters now */
9099                         if (ip->bi_flags & (DSW_MSTOFFLINE | DSW_SHDOFFLINE)) {
9100                                 rc = EIO;
9101                                 goto out;
9102                         }
9103                         if (*handle2 == NULL &&
9104                             (ip->bi_flags&DSW_TREEMAP) == 0) {
9105                                 rc = EIO;
9106                                 goto out;
9107                         }
9108                         rc = ii_read_volume(ip, bfd->ii_shd,
9109                             *handle2, *handle, chunk_num, temp_fba, bmp_len);
9110                         if (!II_SUCCESS(rc)) {
9111                                 _ii_error(ip, DSW_MSTOFFLINE);
9112                                 _ii_error(ip, DSW_SHDOFFLINE);
9113                                 goto out;
9114                         }
9115                 } else {
9116                         if (bfd->ii_shd && (ip->bi_flags & DSW_SHDOFFLINE)) {
9117                                 rc = EIO;
9118                                 goto out;
9119                         }
9120                         if ((bfd->ii_shd == 0) &&
9121                             (ip->bi_flags & DSW_MSTOFFLINE)) {
9122                                 rc = EIO;
9123                                 goto out;
9124                         }
9125                         rc = ii_read_volume(ip, !(bfd->ii_shd), *handle, NULL,
9126                             chunk_num, temp_fba, bmp_len);
9127                         if (!II_SUCCESS(rc)) {
9128                                 if (bfd->ii_shd)
9129                                         _ii_error(ip, DSW_SHDOFFLINE);
9130                                 else
9131                                         _ii_error(ip, DSW_MSTOFFLINE);
9132                                 goto out;
9133                         }
9134                 }
9135 
9136                 temp_fba += bmp_len;
9137                 chunk_num++;
9138         }
9139 
9140         rc = 0;
9141 out:
9142         if (second_buf)
9143                 (void) nsc_free_buf(second_buf);
9144         if (second_shd)
9145                 _ii_rlse_devs(ip, rtype);
9146 
9147         return (rc);
9148 }
9149 
9150 
9151 /*
9152  * _ii_shadow_write
9153  *      Perform any copy on write required by a write buffer request
9154  *
9155  * Calling/Exit State:
9156  *      Returns 0 on success, otherwise error code.
9157  *
9158  */
9159 
9160 static int
9161 _ii_shadow_write(ii_fd_t *bfd, nsc_off_t pos, nsc_size_t len)
9162 {
9163         _ii_info_t *ip = bfd->ii_info;
9164         chunkid_t       chunk_num;
9165         int     rc;
9166         int     flag;
9167         int hanging;
9168 
9169         DTRACE_PROBE2(_ii_shadow_write_start, nsc_off_t, pos, nsc_size_t, len);
9170 
9171         /* fail immediately if config DB is unavailable */
9172         if ((ip->bi_flags & DSW_CFGOFFLINE) == DSW_CFGOFFLINE) {
9173                 return (EIO);
9174         }
9175 
9176         chunk_num = pos / DSW_SIZE;
9177 
9178         if (bfd->ii_shd)
9179                 flag = 0;               /* To shadow */
9180         else
9181                 flag = CV_SHD2MST;      /* To master */
9182 
9183         mutex_enter(&ip->bi_mutex);
9184         ip->bi_shdref++;
9185         mutex_exit(&ip->bi_mutex);
9186         hanging = (ip->bi_flags&DSW_HANGING) != 0;
9187 
9188         for (; (chunk_num >= 0) &&
9189             DSW_CHK2FBA(chunk_num) < (pos + len); chunk_num++) {
9190 
9191                 if (!hanging)
9192                         _ii_lock_chunk(ip, chunk_num);
9193                 rc = _ii_copy_on_write(ip, flag, chunk_num, 1);
9194 
9195                 /*
9196                  * Set the shadow bit when a small shadow has overflowed so
9197                  * that ii_read_volume can return an error if an attempt is
9198                  * made to read that chunk.
9199                  */
9200                 if (!hanging) {
9201                         if (rc == 0 ||
9202                             (rc == EIO && (ip->bi_flags&DSW_OVERFLOW) != 0))
9203                                 (void) II_SET_SHD_BIT(ip, chunk_num);
9204                         _ii_unlock_chunk(ip, chunk_num);
9205                 }
9206         }
9207 
9208         mutex_enter(&ip->bi_mutex);
9209         ip->bi_shdref--;
9210         if (ip->bi_state & DSW_CLOSING) {
9211                 if (total_ref(ip) == 0) {
9212                         cv_signal(&ip->bi_closingcv);
9213                 }
9214         }
9215         mutex_exit(&ip->bi_mutex);
9216 
9217         /* did the bitmap fail during this process? */
9218         return (ip->bi_flags & DSW_CFGOFFLINE? EIO : 0);
9219 }
9220 
9221 /*
9222  * _ii_alloc_buf
9223  *      Allocate a buffer of data
9224  *
9225  * Calling/Exit State:
9226  *      Returns 0 for success, < 0 for async I/O, > 0 is an error code.
9227  *
9228  * Description:
9229  *      For a write buffer, calls dsw_shadow_write to perform any necessary
9230  *      copy on write operations, then allocates the real buffers from the
9231  *      underlying devices.
9232  *      For a read buffer, allocates the real buffers from the underlying
9233  *      devices, then calls _ii_fill_buf to fill the required buffer.
9234  *      For a buffer that is neither read nor write, just allocate the
9235  *      buffers so that a _ii_fill_buf can be done later by _ii_read.
9236  */
9237 
9238 static int
9239 _ii_alloc_buf(ii_fd_t *bfd, nsc_off_t pos, nsc_size_t len, int flag,
9240     ii_buf_t **ptr)
9241 {
9242         _ii_info_t *ip = bfd->ii_info;
9243         ii_buf_t *h;
9244         int     raw = II_RAW(bfd);
9245         int rc = 0;
9246         int ioflag;
9247         int fbuf = 0, fbuf2 = 0, abuf = 0;
9248         int rw_ent = 0;
9249 
9250         if (bfd->ii_bmp) {
9251                 DTRACE_PROBE(_ii_alloc_buf_end);
9252                 /* any I/O to the bitmap device is barred */
9253                 return (EIO);
9254         }
9255 
9256         if (len == 0) {
9257                 DTRACE_PROBE(_ii_alloc_buf_end);
9258                 return (EINVAL);
9259         }
9260 
9261         /* Bounds checking */
9262         if (pos + len > ip->bi_size) {
9263                 if (ii_debug > 1)
9264                         cmn_err(CE_NOTE,
9265                             "!ii: Attempt to access beyond end of ii volume");
9266                 DTRACE_PROBE(_ii_alloc_buf_end);
9267                 return (EIO);
9268         }
9269 
9270         h = *ptr;
9271         if (h == NULL) {
9272                 h = (ii_buf_t *)_ii_alloc_handle(NULL, NULL, NULL, bfd);
9273                 if (h == NULL) {
9274                         DTRACE_PROBE(_ii_alloc_buf_end);
9275                         return (ENOMEM);
9276                 }
9277         }
9278 
9279         /*
9280          * Temporary nsc_reserve of bitmap and other device.
9281          * This device has already been reserved by the preceding _ii_attach.
9282          * Corresponding nsc_release is in _ii_free_buf.
9283          */
9284 
9285         h->ii_rsrv = BMP | (raw ? (bfd->ii_shd ? MSTR : SHDR)
9286             : (bfd->ii_shd ? MST : SHD));
9287 
9288         if (!bfd->ii_shd)
9289                 ip = ip->bi_master;
9290 
9291         rw_enter(&ip->bi_linkrw, RW_READER);
9292         rw_ent = 1;
9293         if (ip->bi_shdfd == NULL || (ip->bi_flags & DSW_SHDEXPORT) ==
9294             DSW_SHDEXPORT)
9295                 h->ii_rsrv &= ~(SHD|SHDR);
9296         if ((rc = _ii_rsrv_devs(ip, h->ii_rsrv, II_EXTERNAL)) != 0) {
9297                 rw_exit(&ip->bi_linkrw);
9298                 rw_ent = 0;
9299                 h->ii_rsrv = NULL;
9300                 goto error;
9301         }
9302 
9303         if (flag & NSC_WRBUF) {
9304                 rc = _ii_shadow_write(bfd, pos, len);
9305                 if (!II_SUCCESS(rc))
9306                         goto error;
9307         }
9308 
9309         if (!(flag & NSC_RDAHEAD))
9310                 ioflag = flag & ~(NSC_RDBUF);
9311         else
9312                 ioflag = flag;
9313 
9314         if (bfd->ii_shd) {
9315                 /*
9316                  * SHADOW
9317                  */
9318 
9319                 if (ip->bi_flags & DSW_SHDEXPORT) {
9320                         rc = EIO;
9321                         goto error;
9322                 }
9323                 /*
9324                  * The master device buffer has to be allocated first
9325                  * so that deadlocks are avoided.
9326                  */
9327                 DTRACE_PROBE(AllocBufFor_SHADOW);
9328 
9329                 if ((ip->bi_flags & (DSW_MSTOFFLINE|DSW_SHDIMPORT)) == 0) {
9330                         rc = nsc_alloc_buf(MSTFD(ip), pos, len,
9331                             (flag&NSC_RDAHEAD)|NSC_MIXED, &h->ii_bufp2);
9332                         if (!II_SUCCESS(rc)) {
9333                                 if (ii_debug > 2)
9334                                         cmn_err(CE_WARN, "!ii: "
9335                                             "Join/write-S race detected\n");
9336                                 if (h->ii_bufp2)
9337                                         (void) nsc_free_buf(h->ii_bufp2);
9338                                 h->ii_bufp2 = NULL;
9339                                 /*
9340                                  * Carry on as this will not matter if
9341                                  * _ii_fill_buf is not called, or if
9342                                  * it is called but doesn't need to read this
9343                                  * volume.
9344                                  */
9345                                 rc = 0;
9346                         }
9347                         fbuf2 = 1;
9348                 }
9349 
9350                 if (ip->bi_flags & DSW_SHDOFFLINE) {
9351                         rc = EIO;
9352                         goto error;
9353                 }
9354                 if ((ip->bi_flags)&DSW_TREEMAP) {
9355                         rc = nsc_alloc_abuf(pos, len, 0, &h->ii_abufp);
9356                         if (!II_SUCCESS(rc)) {
9357                                 _ii_error(ip, DSW_SHDOFFLINE);
9358                                 goto error;
9359                         }
9360                         abuf = 1;
9361                 } else {
9362                         II_ALLOC_BUF(ip, shadow, rc, SHDFD(ip), pos, len,
9363                             ioflag, &h->ii_bufp);        /* do not read yet */
9364                         if (!II_SUCCESS(rc)) {
9365                                 _ii_error(ip, DSW_SHDOFFLINE);
9366                                 goto error;
9367                         }
9368                         fbuf = 1;
9369                 }
9370         } else {
9371                 /*
9372                  * MASTER
9373                  */
9374 
9375                 /*
9376                  * The master device buffer has to be allocated first
9377                  * so that deadlocks are avoided.
9378                  */
9379 
9380                 if (ip->bi_flags & (DSW_MSTOFFLINE|DSW_SHDIMPORT)) {
9381                         rc = EIO;
9382                         goto error;
9383                 }
9384 
9385                 DTRACE_PROBE(AllocBufFor_MASTER);
9386 
9387                 II_ALLOC_BUF(ip, master, rc, MSTFD(ip), pos, len, ioflag,
9388                     &h->ii_bufp);                /* do not read yet */
9389                 if (!II_SUCCESS(rc)) {
9390                         _ii_error(ip, DSW_MSTOFFLINE);
9391                         goto error;
9392                 }
9393                 fbuf = 1;
9394 
9395                 /*
9396                  * If shadow FD and (dependent set OR copying) and
9397                  * not (compact dependent && shadow offline && shadow exported)
9398                  */
9399                 if ((ip->bi_shdfd) &&
9400                     ((ip->bi_flags & DSW_COPYINGP) ||
9401                     (!(ip->bi_flags & DSW_GOLDEN))) &&
9402                     (!(ip->bi_flags &
9403                     (DSW_TREEMAP|DSW_SHDOFFLINE|DSW_SHDEXPORT)))) {
9404                         rc = nsc_alloc_buf(SHDFD(ip), pos, len,
9405                             (flag&NSC_RDAHEAD)|NSC_MIXED, &h->ii_bufp2);
9406                         if (!II_SUCCESS(rc)) {
9407                                 if (ii_debug > 2)
9408                                         cmn_err(CE_WARN, "!ii: "
9409                                             "Join/write-M race detected\n");
9410                                 if (h->ii_bufp2)
9411                                         (void) nsc_free_buf(h->ii_bufp2);
9412                                 h->ii_bufp2 = NULL;
9413                                 /*
9414                                  * Carry on as this will not matter if
9415                                  * _ii_fill_buf is not called, or if
9416                                  * it is called but doesn't need to read this
9417                                  * volume.
9418                                  */
9419                                 rc = 0;
9420                         }
9421                         fbuf2 = 1;
9422                 }
9423         }
9424 
9425         if (flag & NSC_RDBUF)
9426                 rc = _ii_fill_buf(bfd, pos, len, flag,
9427                     h->ii_abufp ? &h->ii_abufp : &h->ii_bufp, &h->ii_bufp2);
9428 
9429 error:
9430         if (II_SUCCESS(rc)) {
9431                 h->ii_bufh.sb_vec = h->ii_abufp ? h->ii_abufp->sb_vec :
9432                     h->ii_bufp->sb_vec;
9433                 h->ii_bufh.sb_error = 0;
9434                 h->ii_bufh.sb_flag |= flag;
9435                 h->ii_bufh.sb_pos = pos;
9436                 h->ii_bufh.sb_len = len;
9437         } else {
9438                 h->ii_bufh.sb_error = rc;
9439                 if (h->ii_bufp2 && fbuf2) {
9440                         (void) nsc_free_buf(h->ii_bufp2);
9441                         h->ii_bufp2 = NULL;
9442                 }
9443                 if (h->ii_bufp && fbuf) {
9444                         (void) nsc_free_buf(h->ii_bufp);
9445                         h->ii_bufp = NULL;
9446                 }
9447                 if (h->ii_abufp && abuf) {
9448                         (void) nsc_free_buf(h->ii_abufp);
9449                         h->ii_abufp = NULL;
9450                 }
9451 
9452                 if (h->ii_rsrv) {
9453                         /*
9454                          * Release temporary reserve - reserved above.
9455                          */
9456                         _ii_rlse_devs(ip, h->ii_rsrv);
9457                         h->ii_rsrv = NULL;
9458                 }
9459                 if (rw_ent)
9460                         rw_exit(&ip->bi_linkrw);
9461         }
9462 
9463         return (rc);
9464 }
9465 
9466 
9467 /*
9468  * _ii_free_buf
9469  */
9470 
9471 static int
9472 _ii_free_buf(ii_buf_t *h)
9473 {
9474         ii_fd_t *bfd;
9475         int rsrv;
9476         int rc;
9477 
9478         if (h->ii_abufp == NULL) {
9479                 rc = nsc_free_buf(h->ii_bufp);
9480         } else {
9481                 rc = nsc_free_buf(h->ii_abufp);
9482                 h->ii_abufp = NULL;
9483         }
9484         if (!II_SUCCESS(rc))
9485                 return (rc);
9486         if (h->ii_bufp2) {
9487                 rc = nsc_free_buf(h->ii_bufp2);
9488                 h->ii_bufp2 = NULL;
9489                 if (!II_SUCCESS(rc))
9490                         return (rc);
9491         }
9492 
9493         bfd = h->ii_fd;
9494         rsrv = h->ii_rsrv;
9495 
9496         if ((h->ii_bufh.sb_flag & NSC_HALLOCATED) == 0) {
9497                 rc = _ii_free_handle(h, h->ii_fd);
9498                 if (!II_SUCCESS(rc))
9499                         return (rc);
9500         } else {
9501                 h->ii_bufh.sb_flag = NSC_HALLOCATED;
9502                 h->ii_bufh.sb_vec = NULL;
9503                 h->ii_bufh.sb_error = 0;
9504                 h->ii_bufh.sb_pos = 0;
9505                 h->ii_bufh.sb_len = 0;
9506                 h->ii_rsrv = NULL;
9507         }
9508 
9509         /*
9510          * Release temporary reserve - reserved in _ii_alloc_buf.
9511          */
9512 
9513         if (rsrv)
9514                 _ii_rlse_devs(bfd->ii_info, rsrv);
9515         rw_exit(&bfd->ii_info->bi_linkrw);
9516 
9517         return (0);
9518 }
9519 
9520 
9521 /*
9522  * _ii_open
9523  *      Open a device
9524  *
9525  * Calling/Exit State:
9526  *      Returns a token to identify the shadow device.
9527  *
9528  * Description:
9529  *      Performs the housekeeping operations associated with an upper layer
9530  *      of the nsc stack opening a shadowed device.
9531  */
9532 
9533 /* ARGSUSED */
9534 
9535 static int
9536 _ii_open(char *path, int flag, blind_t *cdp, nsc_iodev_t *iodev)
9537 {
9538         _ii_info_t *ip;
9539         _ii_overflow_t *op;
9540         ii_fd_t *bfd;
9541         int is_mst = 0;
9542         int is_shd = 0;
9543         int raw = (flag & NSC_CACHE) == 0;
9544 
9545         bfd = nsc_kmem_zalloc(sizeof (*bfd), KM_SLEEP, _ii_local_mem);
9546         if (!bfd)
9547                 return (ENOMEM);
9548 
9549         DTRACE_PROBE1(_ii_open_mutex,
9550             ii_fd_t *, bfd);
9551 
9552         mutex_enter(&_ii_info_mutex);
9553 
9554         for (ip = _ii_info_top; ip; ip = ip->bi_next) {
9555                 if (strcmp(path, ii_pathname(ip->bi_mstfd)) == 0) {
9556                         is_mst = 1;
9557                         break;
9558                 } else if (strcmp(path, ip->bi_keyname) == 0) {
9559                         is_shd = 1;
9560                         break;
9561                 } else if (strcmp(path, ii_pathname(ip->bi_bmpfd)) == 0)
9562                         break;
9563         }
9564 
9565         if (is_mst)
9566                 ip = ip->bi_master;
9567 
9568         if (ip && ip->bi_disabled && !(ip->bi_state & DSW_MULTIMST)) {
9569                 DTRACE_PROBE(_ii_open_Disabled);
9570                 mutex_exit(&_ii_info_mutex);
9571                 return (EINTR);
9572         }
9573 
9574         if (!ip) {
9575                 /* maybe it's an overflow */
9576                 mutex_exit(&_ii_info_mutex);
9577                 mutex_enter(&_ii_overflow_mutex);
9578                 for (op = _ii_overflow_top; op; op = op->ii_next) {
9579                         if (strcmp(path, op->ii_volname) == 0)
9580                                 break;
9581                 }
9582                 mutex_exit(&_ii_overflow_mutex);
9583 
9584                 if (!op) {
9585                         nsc_kmem_free(bfd, sizeof (*bfd));
9586                         DTRACE_PROBE(_ii_open_end_EINVAL);
9587                         return (EINVAL);
9588                 }
9589                 bfd->ii_ovr = 1;
9590                 bfd->ii_oflags = flag;
9591                 bfd->ii_optr = op;
9592                 *cdp = (blind_t)bfd;
9593 
9594                 DTRACE_PROBE(_ii_open_end_overflow);
9595                 return (0);
9596         }
9597         mutex_enter(&ip->bi_mutex);
9598         ip->bi_ioctl++;
9599         mutex_exit(&_ii_info_mutex);
9600 
9601         if (is_mst) {
9602                 if (raw) {
9603                         ip->bi_mstr_iodev = NULL;    /* set in attach */
9604                         ip->bi_mstrref++;
9605                 } else {
9606                         ip->bi_mst_iodev = NULL;     /* set in attach */
9607                         ip->bi_mstref++;
9608                 }
9609                 ip->bi_master->bi_iifd = bfd;
9610         } else if (is_shd) {
9611                 if (raw) {
9612                         ip->bi_shdr_iodev = NULL;    /* set in attach */
9613                         ip->bi_shdrref++;
9614                 } else {
9615                         ip->bi_shd_iodev = NULL;     /* set in attach */
9616                         ip->bi_shdref++;
9617                 }
9618                 bfd->ii_shd = 1;
9619         } else {
9620                 ip->bi_bmpref++;
9621                 ip->bi_bmp_iodev = NULL;     /* set in attach */
9622                 bfd->ii_bmp = 1;
9623         }
9624 
9625         _ii_ioctl_done(ip);
9626         mutex_exit(&ip->bi_mutex);
9627 
9628         bfd->ii_info = ip;
9629         bfd->ii_oflags = flag;
9630 
9631         *cdp = (blind_t)bfd;
9632 
9633         return (0);
9634 }
9635 
9636 static int
9637 _ii_openc(char *path, int flag, blind_t *cdp, nsc_iodev_t *iodev)
9638 {
9639         return (_ii_open(path, NSC_CACHE|flag, cdp, iodev));
9640 }
9641 
9642 static int
9643 _ii_openr(char *path, int flag, blind_t *cdp, nsc_iodev_t *iodev)
9644 {
9645         return (_ii_open(path, NSC_DEVICE|flag, cdp, iodev));
9646 }
9647 
9648 
9649 /*
9650  * _ii_close
9651  *      Close a device
9652  *
9653  * Calling/Exit State:
9654  *      Always succeeds - returns 0
9655  *
9656  * Description:
9657  *      Performs the housekeeping operations associated with an upper layer
9658  *      of the nsc stack closing a shadowed device.
9659  */
9660 
9661 static int
9662 _ii_close(bfd)
9663 ii_fd_t *bfd;
9664 {
9665         _ii_info_t *ip = bfd->ii_info;
9666         _ii_info_dev_t *dip;
9667         int raw;
9668 
9669         if (!ip) {
9670                 ASSERT(bfd->ii_ovr);
9671                 return (0);
9672         }
9673 
9674         raw = II_RAW(bfd);
9675 
9676         mutex_enter(&ip->bi_mutex);
9677 
9678         if (bfd->ii_shd && raw) {
9679                 dip = &ip->bi_shdrdev;
9680         } else if (bfd->ii_shd) {
9681                 dip = &ip->bi_shddev;
9682         } else if (bfd->ii_bmp) {
9683                 dip = &ip->bi_bmpdev;
9684         } else if (raw) {
9685                 dip = ip->bi_mstrdev;
9686         } else {
9687                 dip = ip->bi_mstdev;
9688         }
9689 
9690         if (dip) {
9691                 dip->bi_ref--;
9692                 if (dip->bi_ref == 0)
9693                         dip->bi_iodev = NULL;
9694         }
9695 
9696         if (ip->bi_state & DSW_CLOSING) {
9697                 if (total_ref(ip) == 0) {
9698                         cv_signal(&ip->bi_closingcv);
9699                 }
9700         } else if ((ip->bi_flags & DSW_HANGING) &&
9701             (ip->bi_head->bi_state & DSW_CLOSING))
9702                 cv_signal(&ip->bi_head->bi_closingcv);
9703 
9704         if (!(bfd->ii_shd || bfd->ii_bmp))        /* is master device */
9705                 ip->bi_master->bi_iifd = NULL;
9706         mutex_exit(&ip->bi_mutex);
9707 
9708         nsc_kmem_free(bfd, sizeof (*bfd));
9709 
9710         return (0);
9711 }
9712 
9713 /*
9714  * _ii_alloc_handle
9715  *      Allocate a handle
9716  *
9717  */
9718 
9719 static nsc_buf_t *
9720 _ii_alloc_handle(void (*d_cb)(), void (*r_cb)(), void (*w_cb)(), ii_fd_t *bfd)
9721 {
9722         ii_buf_t *h;
9723 
9724         if (REMOTE_VOL(bfd->ii_shd, bfd->ii_info))
9725                 return (NULL);
9726 
9727         h = kmem_alloc(sizeof (*h), KM_SLEEP);
9728         if (!h)
9729                 return (NULL);
9730 
9731         h->ii_abufp = NULL;
9732         h->ii_bufp = nsc_alloc_handle(II_FD(bfd), d_cb, r_cb, w_cb);
9733         if (!h->ii_bufp) {
9734                 kmem_free(h, sizeof (*h));
9735                 return (NULL);
9736         }
9737         h->ii_bufp2 = NULL;
9738         h->ii_bufh.sb_flag = NSC_HALLOCATED;
9739         h->ii_fd = bfd;
9740         h->ii_rsrv = NULL;
9741 
9742         return ((nsc_buf_t *)h);
9743 }
9744 
9745 
9746 /*
9747  * _ii_free_handle
9748  *      Free a handle
9749  *
9750  */
9751 
9752 static int       /*ARGSUSED*/
9753 _ii_free_handle(ii_buf_t *h, ii_fd_t *bfd)
9754 {
9755         int rc;
9756 
9757         if (h->ii_abufp)
9758                 (void) nsc_free_buf(h->ii_abufp);
9759         rc = nsc_free_handle(h->ii_bufp);
9760         if (!II_SUCCESS(rc)) {
9761                 return (rc);
9762         }
9763 
9764         kmem_free(h, sizeof (ii_buf_t));
9765 
9766         return (0);
9767 }
9768 
9769 
9770 /*
9771  * _ii_attach
9772  *      Attach
9773  *
9774  * Calling/Exit State:
9775  *      Returns 0 for success, errno on failure.
9776  *
9777  * Description:
9778  */
9779 
9780 static int
9781 _ii_attach(ii_fd_t *bfd, nsc_iodev_t *iodev)
9782 {
9783         _ii_info_t *ip;
9784         int dev;
9785         int raw;
9786         int rc;
9787         _ii_info_dev_t *infop;
9788 
9789         raw  = II_RAW(bfd);
9790 
9791         DTRACE_PROBE2(_ii_attach_info,
9792             char *, bfd->ii_shd? "shadow" : "master",
9793             int, raw);
9794 
9795         if (bfd->ii_ovr)
9796                 return (EINVAL);
9797 
9798         ip = bfd->ii_info;
9799         if (ip == NULL)
9800                 return (EINVAL);
9801 
9802         mutex_enter(&ip->bi_mutex);
9803         if (bfd->ii_bmp) {
9804                 infop = &ip->bi_bmpdev;
9805         } else if (bfd->ii_shd) {
9806                 if (raw) {
9807                         infop = &ip->bi_shdrdev;
9808                 } else {
9809                         infop = &ip->bi_shddev;
9810                 }
9811         } else if (!bfd->ii_ovr) {
9812                 if (raw) {
9813                         infop = ip->bi_mstrdev;
9814                 } else {
9815                         infop = ip->bi_mstdev;
9816                 }
9817         }
9818 
9819         if (iodev) {
9820                 infop->bi_iodev = iodev;
9821                 nsc_set_owner(infop->bi_fd, infop->bi_iodev);
9822         }
9823         mutex_exit(&ip->bi_mutex);
9824 
9825         if (bfd->ii_bmp)
9826                 return (EINVAL);
9827 
9828         if (raw)
9829                 dev = bfd->ii_shd ? SHDR : MSTR;
9830         else
9831                 dev = bfd->ii_shd ? SHD : MST;
9832 
9833         rc = _ii_rsrv_devs(ip, dev, II_EXTERNAL);
9834 
9835         return (rc);
9836 }
9837 
9838 
9839 /*
9840  * _ii_detach
9841  *      Detach
9842  *
9843  * Calling/Exit State:
9844  *      Returns 0 for success, always succeeds
9845  *
9846  * Description:
9847  */
9848 
9849 static int
9850 _ii_detach(bfd)
9851 ii_fd_t *bfd;
9852 {
9853         int dev;
9854         int raw;
9855 
9856         raw = II_RAW(bfd);
9857 
9858         DTRACE_PROBE2(_ii_detach_info,
9859             char *, bfd->ii_shd? "shadow" : "master",
9860             int, raw);
9861 
9862         if (bfd->ii_bmp)
9863                 return (0);
9864 
9865         ASSERT(bfd->ii_info);
9866         dev = bfd->ii_shd ? (raw ? SHDR : SHD) : (raw ? MSTR : MST);
9867         _ii_rlse_devs(bfd->ii_info, dev);
9868 
9869         return (0);
9870 }
9871 
9872 /*
9873  * _ii_get_pinned
9874  *
9875  */
9876 
9877 static int
9878 _ii_get_pinned(ii_fd_t *bfd)
9879 {
9880         int rc;
9881 
9882         if (REMOTE_VOL(bfd->ii_shd, bfd->ii_info))
9883                 return (EIO);
9884 
9885         rc = nsc_get_pinned(II_FD(bfd));
9886 
9887         return (rc);
9888 }
9889 
9890 /*
9891  * _ii_discard_pinned
9892  *
9893  */
9894 
9895 static int
9896 _ii_discard_pinned(ii_fd_t *bfd, nsc_off_t pos, nsc_size_t len)
9897 {
9898         int rc;
9899 
9900         if (REMOTE_VOL(bfd->ii_shd, bfd->ii_info))
9901                 return (EIO);
9902         rc = nsc_discard_pinned(II_FD(bfd), pos, len);
9903 
9904         return (rc);
9905 }
9906 
9907 /*
9908  * _ii_partsize
9909  *
9910  */
9911 
9912 static int
9913 _ii_partsize(ii_fd_t *bfd, nsc_size_t *ptr)
9914 {
9915         /* Always return saved size */
9916         *ptr = bfd->ii_info->bi_size;
9917         return (0);
9918 }
9919 
9920 /*
9921  * _ii_maxfbas
9922  *
9923  */
9924 
9925 static int
9926 _ii_maxfbas(ii_fd_t *bfd, int flag, nsc_size_t *ptr)
9927 {
9928         int rc;
9929         int rs;
9930         int dev;
9931         _ii_info_t *ip;
9932 
9933         ip = bfd->ii_info;
9934         if (REMOTE_VOL(bfd->ii_shd, ip))
9935                 return (EIO);
9936 
9937         dev =  ((ip->bi_flags)&DSW_SHDIMPORT) ? SHDR : MSTR;
9938 
9939         DTRACE_PROBE1(_ii_maxfbas_info,
9940             char *, dev == SHDR? "shadow" : "master");
9941 
9942         rs = _ii_rsrv_devs(ip, dev, II_INTERNAL);
9943         rc = nsc_maxfbas((dev == MSTR) ? MSTFD(ip) : SHDFD(ip), flag, ptr);
9944 
9945         if (rs == 0)
9946                 _ii_rlse_devs(ip, dev);
9947 
9948         return (rc);
9949 }
9950 
9951 /*
9952  * ii_get_group_list
9953  */
9954 _ii_info_t **
9955 ii_get_group_list(char *group, int *count)
9956 {
9957         int i;
9958         int nip;
9959         uint64_t   hash;
9960         _ii_info_t **ipa;
9961         _ii_lsthead_t *head;
9962         _ii_lstinfo_t *np;
9963 
9964         hash = nsc_strhash(group);
9965 
9966         for (head = _ii_group_top; head; head = head->lst_next) {
9967                 if (hash == head->lst_hash && strncmp(head->lst_name,
9968                     group, DSW_NAMELEN) == 0)
9969                         break;
9970         }
9971 
9972         if (!head) {
9973                 return (NULL);
9974         }
9975 
9976         /* Count entries */
9977         for (nip = 0, np = head->lst_start; np; np = np->lst_next)
9978                 ++nip;
9979 
9980         ASSERT(nip > 0);
9981 
9982         ipa = kmem_zalloc(sizeof (_ii_info_t *) * nip, KM_SLEEP);
9983 
9984         np = head->lst_start;
9985 
9986         for (i = 0; i < nip; i++) {
9987                 ASSERT(np != 0);
9988 
9989                 ipa[i] = np->lst_ip;
9990                 np = np->lst_next;
9991         }
9992 
9993         *count = nip;
9994         return (ipa);
9995 }
9996 
9997 /*
9998  * _ii_pinned
9999  *
10000  */
10001 
10002 static void
10003 _ii_pinned(_ii_info_dev_t *dip, nsc_off_t pos, nsc_size_t len)
10004 {
10005         DTRACE_PROBE3(_ii_pinned_start, nsc_iodev_t, dip->bi_iodev,
10006             nsc_off_t, pos, nsc_size_t, len);
10007 
10008         nsc_pinned_data(dip->bi_iodev, pos, len);
10009 
10010 }
10011 
10012 /*
10013  * _ii_unpinned
10014  *
10015  */
10016 
10017 static void
10018 _ii_unpinned(_ii_info_dev_t *dip, nsc_off_t pos, nsc_size_t len)
10019 {
10020         nsc_unpinned_data(dip->bi_iodev, pos, len);
10021 
10022 }
10023 
10024 
10025 /*
10026  * _ii_read
10027  */
10028 
10029 static int
10030 _ii_read(ii_buf_t *h, nsc_off_t pos, nsc_size_t len, int flag)
10031 {
10032         int rc;
10033         void *sb_vec;
10034         nsc_vec_t **src;
10035 
10036         if (REMOTE_VOL(h->ii_fd->ii_shd, h->ii_fd->ii_info))
10037                 rc = EIO;
10038         else {
10039                 src =  h->ii_abufp? &h->ii_abufp->sb_vec : &h->ii_bufp->sb_vec;
10040                 sb_vec = *src;
10041                 *src = h->ii_bufh.sb_vec;
10042                 rc = _ii_fill_buf(h->ii_fd, pos, len, flag,
10043                     h->ii_abufp ? &h->ii_abufp : &h->ii_bufp, &h->ii_bufp2);
10044                 *src = sb_vec;
10045         }
10046         if (!II_SUCCESS(rc))
10047                 h->ii_bufh.sb_error = rc;
10048 
10049         return (rc);
10050 }
10051 
10052 
10053 /*
10054  * _ii_write
10055  */
10056 
10057 static int
10058 _ii_write(ii_buf_t *h, nsc_off_t pos, nsc_size_t len, int flag)
10059 {
10060         int rc;
10061         ii_fd_t *bfd = h->ii_fd;
10062         _ii_info_t *ip = bfd->ii_info;
10063         chunkid_t       chunk_num;
10064         nsc_size_t      copy_len;
10065         nsc_off_t       mapped_fba;
10066         chunkid_t       mapped_chunk;
10067         int     overflow;
10068         nsc_buf_t *tmp;
10069         void    *sb_vec;
10070 
10071         if (REMOTE_VOL(h->ii_fd->ii_shd, h->ii_fd->ii_info))
10072                 rc = EIO;
10073         else if ((ip->bi_flags&DSW_TREEMAP) == 0 || !bfd->ii_shd) {
10074                 sb_vec = h->ii_bufp->sb_vec;
10075                 h->ii_bufp->sb_vec = h->ii_bufh.sb_vec;
10076                 if (bfd->ii_shd) {
10077                         II_NSC_WRITE(ip, shadow, rc, h->ii_bufp, pos, len,
10078                             flag);
10079                 } else {
10080                         II_NSC_WRITE(ip, master, rc, h->ii_bufp, pos, len,
10081                             flag);
10082                 }
10083                 h->ii_bufp->sb_vec = sb_vec;
10084         } else {
10085                 /* write of mapped shadow buffer */
10086                 rc = 0;
10087                 chunk_num = pos / DSW_SIZE;
10088                 while (len > 0 && II_SUCCESS(rc)) {
10089                         /*
10090                          * don't need to test bitmaps as allocating the
10091                          * write buffer will c-o-write the chunk.
10092                          */
10093                         mapped_chunk = ii_tsearch(ip, chunk_num);
10094                         if (mapped_chunk == II_NULLNODE) {
10095                                 rc = EIO;
10096                                 break;
10097                         }
10098                         overflow = II_ISOVERFLOW(mapped_chunk);
10099                         if (overflow)
10100                                 mapped_chunk = II_2OVERFLOW(mapped_chunk);
10101                         mapped_fba = DSW_CHK2FBA(mapped_chunk) +
10102                             (pos % DSW_SIZE);
10103                         copy_len = DSW_SIZE - (pos % DSW_SIZE);
10104                         if (copy_len > len)
10105                                 copy_len = len;
10106                         tmp = NULL;
10107                         if (overflow) {
10108                                 (void) nsc_reserve(OVRFD(ip), NSC_MULTI);
10109                                 rc = nsc_alloc_buf(OVRFD(ip), mapped_fba,
10110                                     copy_len, NSC_WRBUF, &tmp);
10111                         } else
10112                                 rc = nsc_alloc_buf(SHDFD(ip), mapped_fba,
10113                                     copy_len, NSC_WRBUF, &tmp);
10114                         sb_vec = h->ii_abufp->sb_vec;
10115                         h->ii_abufp->sb_vec = h->ii_bufh.sb_vec;
10116                         if (II_SUCCESS(rc)) {
10117                                 rc = nsc_copy(h->ii_abufp, tmp, pos,
10118                                     mapped_fba, copy_len);
10119                         }
10120                         if (overflow) {
10121                                 II_NSC_WRITE(ip, overflow, rc, tmp, mapped_fba,
10122                                     copy_len, flag);
10123                         } else {
10124                                 II_NSC_WRITE(ip, shadow, rc, tmp, mapped_fba,
10125                                     copy_len, flag);
10126                         }
10127                         h->ii_abufp->sb_vec = sb_vec;
10128                         (void) nsc_free_buf(tmp);
10129                         if (overflow)
10130                                 nsc_release(OVRFD(ip));
10131                         /* move on to next chunk */
10132                         pos += copy_len;
10133                         len -= copy_len;
10134                         chunk_num++;
10135                 }
10136         }
10137         if (!II_SUCCESS(rc))
10138                 h->ii_bufh.sb_error = rc;
10139 
10140         return (rc);
10141 }
10142 
10143 
10144 /*
10145  * _ii_zero
10146  */
10147 
10148 static int
10149 _ii_zero(ii_buf_t *h, nsc_off_t pos, nsc_size_t len, int flag)
10150 {
10151         int rc;
10152         void *sb_vec;
10153 
10154         sb_vec = h->ii_bufp->sb_vec;
10155         h->ii_bufp->sb_vec = h->ii_bufh.sb_vec;
10156         rc = nsc_zero(h->ii_bufp, pos, len, flag);
10157         h->ii_bufp->sb_vec = sb_vec;
10158         if (!II_SUCCESS(rc))
10159                 h->ii_bufh.sb_error = rc;
10160 
10161         return (rc);
10162 }
10163 
10164 
10165 /*
10166  * _ii_uncommit
10167  */
10168 
10169 static int
10170 _ii_uncommit(ii_buf_t *h, nsc_off_t pos, nsc_size_t len, int flag)
10171 {
10172         int rc;
10173         void *sb_vec;
10174 
10175         sb_vec = h->ii_bufp->sb_vec;
10176         h->ii_bufp->sb_vec = h->ii_bufh.sb_vec;
10177         rc = nsc_uncommit(h->ii_bufp, pos, len, flag);
10178         h->ii_bufp->sb_vec = sb_vec;
10179         if (!II_SUCCESS(rc))
10180                 h->ii_bufh.sb_error = rc;
10181 
10182         return (rc);
10183 }
10184 
10185 
10186 /*
10187  * _ii_trksize
10188  */
10189 
10190 static int
10191 _ii_trksize(ii_fd_t *bfd, int trksize)
10192 {
10193         int rc;
10194 
10195         rc = nsc_set_trksize(II_FD(bfd), trksize);
10196 
10197         return (rc);
10198 }
10199 
10200 /*
10201  * _ii_register_path
10202  */
10203 
10204 static nsc_path_t *
10205 _ii_register_path(char *path, int type, nsc_io_t *io)
10206 {
10207         nsc_path_t *tok;
10208 
10209         tok = nsc_register_path(path, type, io);
10210 
10211         return (tok);
10212 }
10213 
10214 /*
10215  * _ii_unregister_path
10216  */
10217 /*ARGSUSED*/
10218 static int
10219 _ii_unregister_path(nsc_path_t *sp, int flag, char *type)
10220 {
10221         int rc;
10222 
10223         rc = nsc_unregister_path(sp, flag);
10224 
10225         return (rc);
10226 }
10227 
10228 int
10229 _ii_ll_add(_ii_info_t *ip, kmutex_t *mutex, _ii_lsthead_t **lst, char *name,
10230     char **key)
10231 {
10232         _ii_lsthead_t **head;
10233         _ii_lstinfo_t *node;
10234         uint64_t hash;
10235 
10236         ASSERT(key && !*key);
10237         ASSERT(ip && mutex && lst && name);
10238 
10239         node = kmem_zalloc(sizeof (_ii_lstinfo_t), KM_SLEEP);
10240         if (!node) {
10241                 cmn_err(CE_WARN, "!ii: _ii_ll_add: ENOMEM");
10242                 DTRACE_PROBE(_ii_ll_add_end_ENOMEM);
10243                 return (ENOMEM);
10244         }
10245         node->lst_ip = ip;
10246 
10247         /* find out where we should insert it */
10248         hash = nsc_strhash(name);
10249 
10250         mutex_enter(mutex);
10251         for (head = lst; *head; head = &((*head)->lst_next)) {
10252                 if (((*head)->lst_hash == hash) &&
10253                     strncmp(name, (*head)->lst_name, DSW_NAMELEN) == 0) {
10254                         node->lst_next = (*head)->lst_start;
10255                         (*head)->lst_start = node;
10256                         break;
10257                 }
10258         }
10259 
10260         if (!*head) {
10261                 /* create a new entry */
10262                 *head = kmem_zalloc(sizeof (_ii_lsthead_t), KM_SLEEP);
10263                 if (!*head) {
10264                         /* bother */
10265                         cmn_err(CE_WARN, "!ii: _ii_ll_add: ENOMEM");
10266                         kmem_free(node, sizeof (_ii_lstinfo_t));
10267                         DTRACE_PROBE(_ii_ll_add_end_2);
10268                         return (ENOMEM);
10269                 }
10270                 (*head)->lst_hash = hash;
10271                 (void) strncpy((*head)->lst_name, name, DSW_NAMELEN);
10272                 (*head)->lst_start = node;
10273         }
10274         mutex_exit(mutex);
10275 
10276         *key = (*head)->lst_name;
10277 
10278         return (0);
10279 }
10280 
10281 int
10282 _ii_ll_remove(_ii_info_t *ip, kmutex_t *mutex, _ii_lsthead_t **lst, char **key)
10283 {
10284         _ii_lsthead_t **head, *oldhead = 0;
10285         _ii_lstinfo_t **node, *oldnode = 0;
10286         uint64_t hash;
10287         int found;
10288 
10289         ASSERT(key && *key);
10290         ASSERT(ip && lst);
10291 
10292         hash = nsc_strhash(*key);
10293 
10294         mutex_enter(mutex);
10295         for (head = lst; *head; head = &((*head)->lst_next)) {
10296                 if (((*head)->lst_hash == hash) &&
10297                     strncmp(*key, (*head)->lst_name, DSW_NAMELEN) == 0)
10298                         break;
10299         }
10300         if (!*head) {
10301                 /* no such link (!) */
10302                 mutex_exit(mutex);
10303                 return (0);
10304         }
10305 
10306         found = 0;
10307         for (node = &(*head)->lst_start; *node; node = &((*node)->lst_next)) {
10308                 if (ip == (*node)->lst_ip) {
10309                         oldnode = *node;
10310                         *node = (*node)->lst_next;
10311                         kmem_free(oldnode, sizeof (_ii_lstinfo_t));
10312                         found = 1;
10313                         break;
10314                 }
10315         }
10316 
10317         ASSERT(found);
10318 
10319         if (!found) {
10320                 mutex_exit(mutex);
10321                 return (0);
10322         }
10323 
10324         /* did we just delete the last set in this resource group? */
10325         if (!(*head)->lst_start) {
10326                 oldhead = *head;
10327                 *head = (*head)->lst_next;
10328                 kmem_free(oldhead, sizeof (_ii_lsthead_t));
10329         }
10330         mutex_exit(mutex);
10331 
10332         *key = NULL;
10333 
10334         return (0);
10335 }
10336 
10337 static nsc_def_t _ii_fd_def[] = {
10338         "Pinned",       (uintptr_t)_ii_pinned,          0,
10339         "Unpinned",     (uintptr_t)_ii_unpinned,        0,
10340         0,              0,                              0
10341 };
10342 
10343 
10344 static nsc_def_t _ii_io_def[] = {
10345         "Open",         (uintptr_t)_ii_openc,           0,
10346         "Close",        (uintptr_t)_ii_close,           0,
10347         "Attach",       (uintptr_t)_ii_attach,          0,
10348         "Detach",       (uintptr_t)_ii_detach,          0,
10349         "AllocHandle",  (uintptr_t)_ii_alloc_handle,    0,
10350         "FreeHandle",   (uintptr_t)_ii_free_handle,     0,
10351         "AllocBuf",     (uintptr_t)_ii_alloc_buf,       0,
10352         "FreeBuf",      (uintptr_t)_ii_free_buf,        0,
10353         "GetPinned",    (uintptr_t)_ii_get_pinned,      0,
10354         "Discard",      (uintptr_t)_ii_discard_pinned,  0,
10355         "PartSize",     (uintptr_t)_ii_partsize,        0,
10356         "MaxFbas",      (uintptr_t)_ii_maxfbas, 0,
10357         "Read",         (uintptr_t)_ii_read,            0,
10358         "Write",        (uintptr_t)_ii_write,           0,
10359         "Zero",         (uintptr_t)_ii_zero,            0,
10360         "Uncommit",     (uintptr_t)_ii_uncommit,        0,
10361         "TrackSize",    (uintptr_t)_ii_trksize, 0,
10362         "Provide",      0,                              0,
10363         0,              0,                              0
10364 };
10365 
10366 static nsc_def_t _ii_ior_def[] = {
10367         "Open",         (uintptr_t)_ii_openr,           0,
10368         "Close",        (uintptr_t)_ii_close,           0,
10369         "Attach",       (uintptr_t)_ii_attach,          0,
10370         "Detach",       (uintptr_t)_ii_detach,          0,
10371         "AllocHandle",  (uintptr_t)_ii_alloc_handle,    0,
10372         "FreeHandle",   (uintptr_t)_ii_free_handle,     0,
10373         "AllocBuf",     (uintptr_t)_ii_alloc_buf,       0,
10374         "FreeBuf",      (uintptr_t)_ii_free_buf,        0,
10375         "GetPinned",    (uintptr_t)_ii_get_pinned,      0,
10376         "Discard",      (uintptr_t)_ii_discard_pinned,  0,
10377         "PartSize",     (uintptr_t)_ii_partsize,        0,
10378         "MaxFbas",      (uintptr_t)_ii_maxfbas, 0,
10379         "Read",         (uintptr_t)_ii_read,            0,
10380         "Write",        (uintptr_t)_ii_write,           0,
10381         "Zero",         (uintptr_t)_ii_zero,            0,
10382         "Uncommit",     (uintptr_t)_ii_uncommit,        0,
10383         "TrackSize",    (uintptr_t)_ii_trksize, 0,
10384         "Provide",      0,                              0,
10385         0,              0,                              0
10386 };