1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  */
  25 
  26 #include <sys/types.h>
  27 #include <sys/ksynch.h>
  28 #include <sys/cmn_err.h>
  29 #include <sys/errno.h>
  30 #include <sys/kmem.h>
  31 #include <sys/cred.h>
  32 #include <sys/buf.h>
  33 #include <sys/ddi.h>
  34 
  35 #include <sys/nsc_thread.h>
  36 #include <sys/nsctl/nsctl.h>
  37 
  38 #include <sys/sdt.h>              /* dtrace is S10 or later */
  39 
  40 #include "sd_bcache.h"
  41 #include "sd_trace.h"
  42 #include "sd_io.h"
  43 #include "sd_bio.h"
  44 #include "sd_ft.h"
  45 #include "sd_misc.h"
  46 #include "sd_pcu.h"
  47 
  48 #include <sys/unistat/spcs_s.h>
  49 #include <sys/unistat/spcs_s_k.h>
  50 #include <sys/unistat/spcs_errors.h>
  51 #include <sys/nsctl/safestore.h>
  52 #ifndef DS_DDICT
  53 #include <sys/ddi_impldefs.h>
  54 #endif
  55 
  56 
  57 /*
  58  * kstat interface
  59  */
  60 
  61 static kstat_t *sdbc_global_stats_kstat;
  62 static int sdbc_global_stats_update(kstat_t *ksp, int rw);
  63 
  64 typedef struct {
  65         kstat_named_t   ci_sdbc_count;
  66         kstat_named_t   ci_sdbc_loc_count;
  67         kstat_named_t   ci_sdbc_rdhits;
  68         kstat_named_t   ci_sdbc_rdmiss;
  69         kstat_named_t   ci_sdbc_wrhits;
  70         kstat_named_t   ci_sdbc_wrmiss;
  71         kstat_named_t   ci_sdbc_blksize;
  72         kstat_named_t   ci_sdbc_lru_blocks;
  73 #ifdef DEBUG
  74         kstat_named_t   ci_sdbc_lru_noreq;
  75         kstat_named_t   ci_sdbc_lru_req;
  76 #endif
  77         kstat_named_t   ci_sdbc_wlru_inq;
  78         kstat_named_t   ci_sdbc_cachesize;
  79         kstat_named_t   ci_sdbc_numblocks;
  80         kstat_named_t   ci_sdbc_num_shared;
  81         kstat_named_t   ci_sdbc_wrcancelns;
  82         kstat_named_t   ci_sdbc_destaged;
  83         kstat_named_t   ci_sdbc_nodehints;
  84 } sdbc_global_stats_t;
  85 
  86 static sdbc_global_stats_t sdbc_global_stats = {
  87         {SDBC_GKSTAT_COUNT,             KSTAT_DATA_ULONG},
  88         {SDBC_GKSTAT_LOC_COUNT,         KSTAT_DATA_ULONG},
  89         {SDBC_GKSTAT_RDHITS,            KSTAT_DATA_ULONG},
  90         {SDBC_GKSTAT_RDMISS,            KSTAT_DATA_ULONG},
  91         {SDBC_GKSTAT_WRHITS,            KSTAT_DATA_ULONG},
  92         {SDBC_GKSTAT_WRMISS,            KSTAT_DATA_ULONG},
  93         {SDBC_GKSTAT_BLKSIZE,           KSTAT_DATA_ULONG},
  94         {SDBC_GKSTAT_LRU_BLOCKS,        KSTAT_DATA_ULONG},
  95 #ifdef DEBUG
  96         {SDBC_GKSTAT_LRU_NOREQ,         KSTAT_DATA_ULONG},
  97         {SDBC_GKSTAT_LRU_REQ,           KSTAT_DATA_ULONG},
  98 #endif
  99         {SDBC_GKSTAT_WLRU_INQ,          KSTAT_DATA_ULONG},
 100         {SDBC_GKSTAT_CACHESIZE,         KSTAT_DATA_ULONG},
 101         {SDBC_GKSTAT_NUMBLOCKS,         KSTAT_DATA_ULONG},
 102         {SDBC_GKSTAT_NUM_SHARED,        KSTAT_DATA_ULONG},
 103         {SDBC_GKSTAT_WRCANCELNS,        KSTAT_DATA_ULONG},
 104         {SDBC_GKSTAT_DESTAGED,          KSTAT_DATA_ULONG},
 105         {SDBC_GKSTAT_NODEHINTS,         KSTAT_DATA_ULONG},
 106 };
 107 
 108 static kstat_t **sdbc_cd_kstats;
 109 static kstat_t **sdbc_cd_io_kstats;
 110 static kmutex_t *sdbc_cd_io_kstats_mutexes;
 111 static kstat_t *sdbc_global_io_kstat;
 112 static kmutex_t sdbc_global_io_kstat_mutex;
 113 static int sdbc_cd_stats_update(kstat_t *ksp, int rw);
 114 static int cd_kstat_add(int cd);
 115 static int cd_kstat_remove(int cd);
 116 
 117 typedef struct {
 118         kstat_named_t   ci_sdbc_vol_name;
 119         kstat_named_t   ci_sdbc_failed;
 120         kstat_named_t   ci_sdbc_cd;
 121         kstat_named_t   ci_sdbc_cache_read;
 122         kstat_named_t   ci_sdbc_cache_write;
 123         kstat_named_t   ci_sdbc_disk_read;
 124         kstat_named_t   ci_sdbc_disk_write;
 125         kstat_named_t   ci_sdbc_filesize;
 126         kstat_named_t   ci_sdbc_numdirty;
 127         kstat_named_t   ci_sdbc_numio;
 128         kstat_named_t   ci_sdbc_numfail;
 129         kstat_named_t   ci_sdbc_destaged;
 130         kstat_named_t   ci_sdbc_wrcancelns;
 131         kstat_named_t   ci_sdbc_cdhints;
 132 } sdbc_cd_stats_t;
 133 
 134 static sdbc_cd_stats_t sdbc_cd_stats = {
 135         {SDBC_CDKSTAT_VOL_NAME,         KSTAT_DATA_CHAR},
 136         {SDBC_CDKSTAT_FAILED,           KSTAT_DATA_ULONG},
 137         {SDBC_CDKSTAT_CD,               KSTAT_DATA_ULONG},
 138         {SDBC_CDKSTAT_CACHE_READ,       KSTAT_DATA_ULONG},
 139         {SDBC_CDKSTAT_CACHE_WRITE,      KSTAT_DATA_ULONG},
 140         {SDBC_CDKSTAT_DISK_READ,        KSTAT_DATA_ULONG},
 141         {SDBC_CDKSTAT_DISK_WRITE,       KSTAT_DATA_ULONG},
 142 #ifdef NSC_MULTI_TERABYTE
 143         {SDBC_CDKSTAT_FILESIZE,         KSTAT_DATA_UINT64},
 144 #else
 145         {SDBC_CDKSTAT_FILESIZE,         KSTAT_DATA_ULONG},
 146 #endif
 147         {SDBC_CDKSTAT_NUMDIRTY,         KSTAT_DATA_ULONG},
 148         {SDBC_CDKSTAT_NUMIO,            KSTAT_DATA_ULONG},
 149         {SDBC_CDKSTAT_NUMFAIL,          KSTAT_DATA_ULONG},
 150         {SDBC_CDKSTAT_DESTAGED,         KSTAT_DATA_ULONG},
 151         {SDBC_CDKSTAT_WRCANCELNS,       KSTAT_DATA_ULONG},
 152         {SDBC_CDKSTAT_CDHINTS,          KSTAT_DATA_ULONG},
 153 };
 154 
 155 #ifdef DEBUG
 156 /*
 157  * dynmem kstat interface
 158  */
 159 static kstat_t *sdbc_dynmem_kstat_dm;
 160 static int simplect_dm;
 161 static int sdbc_dynmem_kstat_update_dm(kstat_t *ksp, int rw);
 162 
 163 typedef struct {
 164         kstat_named_t  ci_sdbc_monitor_dynmem;
 165         kstat_named_t  ci_sdbc_max_dyn_list;
 166         kstat_named_t  ci_sdbc_cache_aging_ct1;
 167         kstat_named_t  ci_sdbc_cache_aging_ct2;
 168         kstat_named_t  ci_sdbc_cache_aging_ct3;
 169         kstat_named_t  ci_sdbc_cache_aging_sec1;
 170         kstat_named_t  ci_sdbc_cache_aging_sec2;
 171         kstat_named_t  ci_sdbc_cache_aging_sec3;
 172         kstat_named_t  ci_sdbc_cache_aging_pcnt1;
 173         kstat_named_t  ci_sdbc_cache_aging_pcnt2;
 174         kstat_named_t  ci_sdbc_max_holds_pcnt;
 175 
 176         kstat_named_t  ci_sdbc_alloc_ct;
 177         kstat_named_t  ci_sdbc_dealloc_ct;
 178         kstat_named_t  ci_sdbc_history;
 179         kstat_named_t  ci_sdbc_nodatas;
 180         kstat_named_t  ci_sdbc_candidates;
 181         kstat_named_t  ci_sdbc_deallocs;
 182         kstat_named_t  ci_sdbc_hosts;
 183         kstat_named_t  ci_sdbc_pests;
 184         kstat_named_t  ci_sdbc_metas;
 185         kstat_named_t  ci_sdbc_holds;
 186         kstat_named_t  ci_sdbc_others;
 187         kstat_named_t  ci_sdbc_notavail;
 188 
 189         kstat_named_t  ci_sdbc_process_directive;
 190 
 191         kstat_named_t  ci_sdbc_simplect;
 192 } sdbc_dynmem_dm_t;
 193 
 194 static sdbc_dynmem_dm_t sdbc_dynmem_dm = {
 195         {SDBC_DMKSTAT_MONITOR_DYNMEM,           KSTAT_DATA_ULONG},
 196         {SDBC_DMKSTAT_MAX_DYN_LIST,             KSTAT_DATA_ULONG},
 197         {SDBC_DMKSTAT_CACHE_AGING_CT1,          KSTAT_DATA_ULONG},
 198         {SDBC_DMKSTAT_CACHE_AGING_CT2,          KSTAT_DATA_ULONG},
 199         {SDBC_DMKSTAT_CACHE_AGING_CT3,          KSTAT_DATA_ULONG},
 200         {SDBC_DMKSTAT_CACHE_AGING_SEC1,         KSTAT_DATA_ULONG},
 201         {SDBC_DMKSTAT_CACHE_AGING_SEC2,         KSTAT_DATA_ULONG},
 202         {SDBC_DMKSTAT_CACHE_AGING_SEC3,         KSTAT_DATA_ULONG},
 203         {SDBC_DMKSTAT_CACHE_AGING_PCNT1,        KSTAT_DATA_ULONG},
 204         {SDBC_DMKSTAT_CACHE_AGING_PCNT2,        KSTAT_DATA_ULONG},
 205         {SDBC_DMKSTAT_MAX_HOLDS_PCNT,           KSTAT_DATA_ULONG},
 206         {SDBC_DMKSTAT_ALLOC_CNT,                KSTAT_DATA_ULONG},
 207         {SDBC_DMKSTAT_DEALLOC_CNT,              KSTAT_DATA_ULONG},
 208         {SDBC_DMKSTAT_HISTORY,                  KSTAT_DATA_ULONG},
 209         {SDBC_DMKSTAT_NODATAS,                  KSTAT_DATA_ULONG},
 210         {SDBC_DMKSTAT_CANDIDATES,               KSTAT_DATA_ULONG},
 211         {SDBC_DMKSTAT_DEALLOCS,                 KSTAT_DATA_ULONG},
 212         {SDBC_DMKSTAT_HOSTS,                    KSTAT_DATA_ULONG},
 213         {SDBC_DMKSTAT_PESTS,                    KSTAT_DATA_ULONG},
 214         {SDBC_DMKSTAT_METAS,                    KSTAT_DATA_ULONG},
 215         {SDBC_DMKSTAT_HOLDS,                    KSTAT_DATA_ULONG},
 216         {SDBC_DMKSTAT_OTHERS,                   KSTAT_DATA_ULONG},
 217         {SDBC_DMKSTAT_NOTAVAIL,                 KSTAT_DATA_ULONG},
 218         {SDBC_DMKSTAT_PROCESS_DIRECTIVE,        KSTAT_DATA_ULONG},
 219         {SDBC_DMKSTAT_SIMPLECT,                 KSTAT_DATA_ULONG}
 220 };
 221 #endif
 222 
 223 /* End of dynmem kstats */
 224 
 225 #ifdef DEBUG
 226 int *dmchainpull_table;  /* dmchain wastage stats */
 227 #endif
 228 
 229 /*
 230  * dynmem process vars
 231  */
 232 extern _dm_process_vars_t dynmem_processing_dm;
 233 
 234 /* metadata for volumes */
 235 ss_voldata_t *_sdbc_gl_file_info;
 236 
 237 size_t _sdbc_gl_file_info_size;
 238 
 239 /* metadata for cache write blocks */
 240 static ss_centry_info_t *_sdbc_gl_centry_info;
 241 
 242 /* wblocks * sizeof(ss_centry_info_t) */
 243 static size_t _sdbc_gl_centry_info_size;
 244 
 245 static int _SD_DELAY_QUEUE = 1;
 246 static int sdbc_allocb_inuse, sdbc_allocb_lost, sdbc_allocb_hit;
 247 static int sdbc_allocb_pageio1, sdbc_allocb_pageio2;
 248 static int sdbc_centry_hit, sdbc_centry_inuse, sdbc_centry_lost;
 249 static int sdbc_dmchain_not_avail;
 250 static int sdbc_allocb_deallocd;
 251 static int sdbc_centry_deallocd;
 252 static int sdbc_check_cot;
 253 static int sdbc_ra_hash; /* 1-block read-ahead fails due to hash hit */
 254 static int sdbc_ra_none; /* 1-block read-ahead fails due to "would block" */
 255 
 256 
 257 /*
 258  * Set the following variable to 1 to enable pagelist io mutual
 259  * exclusion on all _sd_alloc_buf() operations.
 260  *
 261  * This is set to ON to prevent front end / back end races between new
 262  * NSC_WRTHRU io operations coming in through _sd_alloc_buf(), and
 263  * previously written data being flushed out to disk by the sdbc
 264  * flusher at the back end.
 265  * -- see bugtraq 4287564
 266  * -- Simon Crosland, Mon Nov  8 16:34:09 GMT 1999
 267  */
 268 static int sdbc_pageio_always = 1;
 269 
 270 int sdbc_use_dmchain = 0; /* start time switch for dm chaining */
 271 int sdbc_prefetch1 = 1;   /* do 1-block read-ahead */
 272 /*
 273  * if sdbc_static_cache is 1 allocate all cache memory at startup.
 274  * deallocate only at shutdown.
 275  */
 276 int sdbc_static_cache = 1;
 277 
 278 #ifdef DEBUG
 279 /*
 280  * Pagelist io mutual exclusion debug facility.
 281  */
 282 #define SDBC_PAGEIO_OFF         0       /* no debug */
 283 #define SDBC_PAGEIO_RDEV        1       /* force NSC_PAGEIO for specified dev */
 284 #define SDBC_PAGEIO_RAND        2       /* randomly force NSC_PAGEIO */
 285 #define SDBC_PAGEIO_ALL         3       /* always force NSC_PAGEIO */
 286 static int sdbc_pageio_debug = SDBC_PAGEIO_OFF;
 287 static dev_t sdbc_pageio_rdev = (dev_t)-1;
 288 #endif
 289 
 290 /*
 291  * INF SD cache global data
 292  */
 293 
 294 _sd_cd_info_t   *_sd_cache_files;
 295 _sd_stats_t     *_sd_cache_stats;
 296 kmutex_t        _sd_cache_lock;
 297 
 298 _sd_hash_table_t        *_sd_htable;
 299 _sd_queue_t     _sd_lru_q;
 300 
 301 _sd_cctl_t      *_sd_cctl[_SD_CCTL_GROUPS];
 302 int             _sd_cctl_groupsz;
 303 
 304 _sd_net_t  _sd_net_config;
 305 
 306 extern krwlock_t sdbc_queue_lock;
 307 
 308 unsigned int _sd_node_hint;
 309 
 310 #define _SD_LRU_Q       (&_sd_lru_q)
 311 int BLK_FBAS;           /* number of FBA's in a cache block */
 312 int CACHE_BLOCK_SIZE;   /* size in bytes of a cache block */
 313 int CBLOCKS;
 314 _sd_bitmap_t BLK_FBA_BITS;
 315 static int sdbc_prefetch_valid_cnt;
 316 static int sdbc_prefetch_busy_cnt;
 317 static int sdbc_prefetch_trailing;
 318 static int sdbc_prefetch_deallocd;
 319 static int sdbc_prefetch_pageio1;
 320 static int sdbc_prefetch_pageio2;
 321 static int sdbc_prefetch_hit;
 322 static int sdbc_prefetch_lost;
 323 static int _sd_prefetch_opt = 1; /* 0 to disable & use _prefetch_sb_vec[] */
 324 static nsc_vec_t _prefetch_sb_vec[_SD_MAX_BLKS + 1];
 325 
 326 _sd_bitmap_t _fba_bits[] = {
 327         0x0000, 0x0001, 0x0003, 0x0007,
 328         0x000f, 0x001f, 0x003f, 0x007f,
 329         0x00ff,
 330 #if defined(_SD_8K_BLKSIZE)
 331                 0x01ff, 0x03ff, 0x07ff,
 332         0x0fff, 0x1fff, 0x3fff, 0x7fff,
 333         0xffff,
 334 #endif
 335 };
 336 
 337 
 338 static int _sd_ccsync_cnt = 256;
 339 static _sd_cctl_sync_t *_sd_ccent_sync;
 340 
 341 nsc_io_t *sdbc_io;
 342 
 343 #ifdef _MULTI_DATAMODEL
 344 _sd_stats32_t *_sd_cache_stats32 = NULL;
 345 #endif
 346 
 347 
 348 #ifdef DEBUG
 349 int cmn_level = CE_PANIC;
 350 #else
 351 int cmn_level = CE_WARN;
 352 #endif
 353 
 354 /*
 355  * Forward declare all statics that are used before defined to enforce
 356  * parameter checking
 357  * Some (if not all) of these could be removed if the code were reordered
 358  */
 359 
 360 static void _sdbc_stats_deconfigure(void);
 361 static int _sdbc_stats_configure(int cblocks);
 362 static int _sdbc_lruq_configure(_sd_queue_t *);
 363 static void _sdbc_lruq_deconfigure(void);
 364 static int _sdbc_mem_configure(int cblocks, spcs_s_info_t kstatus);
 365 static void _sdbc_mem_deconfigure(int cblocks);
 366 static void _sd_ins_queue(_sd_queue_t *, _sd_cctl_t *centry);
 367 static int _sd_flush_cd(int cd);
 368 static int _sd_check_buffer_alloc(int cd, nsc_off_t fba_pos, nsc_size_t fba_len,
 369     _sd_buf_handle_t **hp);
 370 static int _sd_doread(_sd_buf_handle_t *handle, _sd_cctl_t *cc_ent,
 371     nsc_off_t fba_pos, nsc_size_t fba_len, int flag);
 372 static void _sd_async_read_ea(blind_t xhandle, nsc_off_t fba_pos,
 373     nsc_size_t fba_len, int error);
 374 static void _sd_async_write_ea(blind_t xhandle, nsc_off_t fba_pos,
 375     nsc_size_t fba_len, int error);
 376 static void _sd_queue_write(_sd_buf_handle_t *handle, nsc_off_t fba_pos,
 377     nsc_size_t fba_len);
 378 static int _sd_remote_store(_sd_cctl_t *cc_ent, nsc_off_t fba_pos,
 379     nsc_size_t fba_len);
 380 static int _sd_copy_direct(_sd_buf_handle_t *handle1, _sd_buf_handle_t *handle2,
 381     nsc_off_t fba_pos1, nsc_off_t fba_pos2, nsc_size_t fba_len);
 382 static int _sd_sync_write(_sd_buf_handle_t *handle, nsc_off_t fba_pos,
 383     nsc_size_t fba_len, int flag);
 384 static int _sd_sync_write2(_sd_buf_handle_t *wr_handle, nsc_off_t wr_st_pos,
 385     nsc_size_t fba_len, int flag, _sd_buf_handle_t *rd_handle,
 386     nsc_off_t rd_st_pos);
 387 static int sdbc_fd_attach_cd(blind_t xcd);
 388 static int sdbc_fd_detach_cd(blind_t xcd);
 389 static int sdbc_fd_flush_cd(blind_t xcd);
 390 static int _sdbc_gl_centry_configure(spcs_s_info_t);
 391 static int _sdbc_gl_file_configure(spcs_s_info_t);
 392 static void _sdbc_gl_centry_deconfigure(void);
 393 static void _sdbc_gl_file_deconfigure(void);
 394 static int sdbc_doread_prefetch(_sd_cctl_t *cc_ent, nsc_off_t fba_pos,
 395     nsc_size_t fba_len);
 396 static _sd_bitmap_t update_dirty(_sd_cctl_t *cc_ent, sdbc_cblk_fba_t st_off,
 397     sdbc_cblk_fba_t st_len);
 398 static int _sd_prefetch_buf(int cd, nsc_off_t fba_pos, nsc_size_t fba_len,
 399     int flag, _sd_buf_handle_t *handle, int locked);
 400 
 401 /* dynmem support */
 402 static int _sd_setup_category_on_type(_sd_cctl_t *header);
 403 static int _sd_setup_mem_chaining(_sd_cctl_t *header, int flag);
 404 
 405 static int sdbc_check_cctl_cot(_sd_cctl_t *);
 406 
 407 static int sdbc_dmqueues_configure();
 408 static void sdbc_dmqueues_deconfigure();
 409 static _sd_cctl_t *sdbc_get_dmchain(int, int *, int);
 410 static int sdbc_dmchain_avail(_sd_cctl_t *);
 411 void sdbc_requeue_dmchain(_sd_queue_t *, _sd_cctl_t *, int, int);
 412 static void sdbc_ins_dmqueue_back(_sd_queue_t *, _sd_cctl_t *);
 413 void sdbc_ins_dmqueue_front(_sd_queue_t *, _sd_cctl_t *);
 414 void sdbc_remq_dmchain(_sd_queue_t *, _sd_cctl_t *);
 415 static void sdbc_clear_dmchain(_sd_cctl_t *, _sd_cctl_t *);
 416 void sdbc_requeue_head_dm_try(_sd_cctl_t *);
 417 static _sd_cctl_t *sdbc_alloc_dmc(int, nsc_off_t, nsc_size_t, int *,
 418     sdbc_allocbuf_t *, int);
 419 static _sd_cctl_t *sdbc_alloc_lru(int, nsc_off_t, int *, int);
 420 static _sd_cctl_t *sdbc_alloc_from_dmchain(int, nsc_off_t, sdbc_allocbuf_t *,
 421     int);
 422 static void sdbc_centry_init_dm(_sd_cctl_t *);
 423 static int sdbc_centry_memalloc_dm(_sd_cctl_t *, int, int);
 424 static void sdbc_centry_alloc_end(sdbc_allocbuf_t *);
 425 
 426 
 427 
 428 
 429 /* _SD_DEBUG */
 430 #if defined(_SD_DEBUG) || defined(DEBUG)
 431 static int _sd_cctl_valid(_sd_cctl_t *);
 432 #endif
 433 
 434 static
 435 nsc_def_t _sdbc_fd_def[] = {
 436         "Attach",       (uintptr_t)sdbc_fd_attach_cd,   0,
 437         "Detach",       (uintptr_t)sdbc_fd_detach_cd,   0,
 438         "Flush",        (uintptr_t)sdbc_fd_flush_cd,    0,
 439         0,              0,                              0
 440 };
 441 
 442 
 443 /*
 444  * _sdbc_cache_configure - initialize cache blocks, queues etc.
 445  *
 446  * ARGUMENTS:
 447  *      cblocks  - Number of cache blocks
 448  *
 449  * RETURNS:
 450  *      0 on success.
 451  *      SDBC_EENABLEFAIL or SDBC_EMEMCONFIG on failure.
 452  *
 453  */
 454 
 455 
 456 
 457 int
 458 _sdbc_cache_configure(int cblocks, spcs_s_info_t kstatus)
 459 {
 460         CBLOCKS = cblocks;
 461 
 462         _sd_cache_files = (_sd_cd_info_t *)
 463             kmem_zalloc(sdbc_max_devs * sizeof (_sd_cd_info_t),
 464             KM_SLEEP);
 465 
 466         if (_sdbc_stats_configure(cblocks))
 467                 return (SDBC_EENABLEFAIL);
 468 
 469         if (sdbc_use_dmchain) {
 470                 if (sdbc_dmqueues_configure())
 471                         return (SDBC_EENABLEFAIL);
 472         } else {
 473                 if (_sdbc_lruq_configure(_SD_LRU_Q))
 474                         return (SDBC_EENABLEFAIL);
 475         }
 476 
 477 
 478         if (_sdbc_mem_configure(cblocks, kstatus))
 479                 return (SDBC_EMEMCONFIG);
 480 
 481         CACHE_BLOCK_SIZE = BLK_SIZE(1);
 482         BLK_FBAS = FBA_NUM(CACHE_BLOCK_SIZE);
 483         BLK_FBA_BITS = _fba_bits[BLK_FBAS];
 484 
 485         sdbc_allocb_pageio1 = 0;
 486         sdbc_allocb_pageio2 = 0;
 487         sdbc_allocb_hit = 0;
 488         sdbc_allocb_inuse = 0;
 489         sdbc_allocb_lost = 0;
 490         sdbc_centry_inuse = 0;
 491         sdbc_centry_lost = 0;
 492         sdbc_centry_hit = 0;
 493         sdbc_centry_deallocd = 0;
 494         sdbc_dmchain_not_avail = 0;
 495         sdbc_allocb_deallocd = 0;
 496 
 497         sdbc_prefetch_valid_cnt = 0;
 498         sdbc_prefetch_busy_cnt = 0;
 499         sdbc_prefetch_trailing = 0;
 500         sdbc_prefetch_deallocd = 0;
 501         sdbc_prefetch_pageio1 = 0;
 502         sdbc_prefetch_pageio2 = 0;
 503         sdbc_prefetch_hit = 0;
 504         sdbc_prefetch_lost = 0;
 505 
 506         sdbc_check_cot = 0;
 507         sdbc_prefetch1 = 1;
 508         sdbc_ra_hash = 0;
 509         sdbc_ra_none = 0;
 510 
 511         return (0);
 512 }
 513 
 514 /*
 515  * _sdbc_cache_deconfigure - cache is being deconfigured. Release any
 516  * memory that we acquired during the configuration process and return
 517  * to the unconfigured state.
 518  *
 519  *  NOTE: all users of the cache should be inactive at this point,
 520  *  i.e. we are unregistered from sd and all cache daemons/threads are
 521  *  gone.
 522  *
 523  */
 524 void
 525 _sdbc_cache_deconfigure(void)
 526 {
 527         /* CCIO shutdown must happen before memory is free'd */
 528 
 529         if (_sd_cache_files) {
 530                 kmem_free(_sd_cache_files,
 531                     sdbc_max_devs * sizeof (_sd_cd_info_t));
 532                 _sd_cache_files = (_sd_cd_info_t *)NULL;
 533         }
 534 
 535 
 536         BLK_FBA_BITS = 0;
 537         BLK_FBAS = 0;
 538         CACHE_BLOCK_SIZE = 0;
 539         _sdbc_mem_deconfigure(CBLOCKS);
 540         _sdbc_gl_centry_deconfigure();
 541         _sdbc_gl_file_deconfigure();
 542 
 543         if (sdbc_use_dmchain)
 544                 sdbc_dmqueues_deconfigure();
 545         else
 546                 _sdbc_lruq_deconfigure();
 547         _sdbc_stats_deconfigure();
 548 
 549         CBLOCKS = 0;
 550 }
 551 
 552 
 553 /*
 554  * _sdbc_stats_deconfigure - cache is being deconfigured turn off
 555  * stats. This could seemingly do more but we leave most of the
 556  * data intact until cache is configured again.
 557  *
 558  */
 559 static void
 560 _sdbc_stats_deconfigure(void)
 561 {
 562         int i;
 563 
 564 #ifdef DEBUG
 565         if (sdbc_dynmem_kstat_dm) {
 566                 kstat_delete(sdbc_dynmem_kstat_dm);
 567                 sdbc_dynmem_kstat_dm  = NULL;
 568         }
 569 #endif
 570 
 571         if (sdbc_global_stats_kstat) {
 572                 kstat_delete(sdbc_global_stats_kstat);
 573                 sdbc_global_stats_kstat  = NULL;
 574         }
 575 
 576         if (sdbc_cd_kstats) {
 577                 for (i = 0; i < sdbc_max_devs; i++) {
 578                         if (sdbc_cd_kstats[i]) {
 579                                 kstat_delete(sdbc_cd_kstats[i]);
 580                                 sdbc_cd_kstats[i] = NULL;
 581                         }
 582                 }
 583                 kmem_free(sdbc_cd_kstats, sizeof (kstat_t *) * sdbc_max_devs);
 584                 sdbc_cd_kstats = NULL;
 585         }
 586 
 587         if (sdbc_global_io_kstat) {
 588                 kstat_delete(sdbc_global_io_kstat);
 589                 mutex_destroy(&sdbc_global_io_kstat_mutex);
 590                 sdbc_global_io_kstat = NULL;
 591         }
 592 
 593         if (sdbc_cd_io_kstats) {
 594                 for (i = 0; i < sdbc_max_devs; i++) {
 595                         if (sdbc_cd_io_kstats[i]) {
 596                                 kstat_delete(sdbc_cd_io_kstats[i]);
 597                                 sdbc_cd_io_kstats[i] = NULL;
 598                         }
 599                 }
 600                 kmem_free(sdbc_cd_io_kstats, sizeof (kstat_t *) *
 601                     sdbc_max_devs);
 602                 sdbc_cd_io_kstats = NULL;
 603         }
 604 
 605         if (sdbc_cd_io_kstats_mutexes) {
 606         /* mutexes are already destroyed in cd_kstat_remove() */
 607                 kmem_free(sdbc_cd_io_kstats_mutexes,
 608                     sizeof (kmutex_t) * sdbc_max_devs);
 609                 sdbc_cd_io_kstats_mutexes = NULL;
 610         }
 611 
 612 
 613         if (_sd_cache_stats) {
 614                 kmem_free(_sd_cache_stats,
 615                     sizeof (_sd_stats_t) +
 616                     (sdbc_max_devs - 1) * sizeof (_sd_shared_t));
 617                 _sd_cache_stats = NULL;
 618         }
 619 #ifdef _MULTI_DATAMODEL
 620         if (_sd_cache_stats32) {
 621                 kmem_free(_sd_cache_stats32, sizeof (_sd_stats32_t) +
 622                     (sdbc_max_devs - 1) * sizeof (_sd_shared_t));
 623                 _sd_cache_stats32 = NULL;
 624         }
 625 #endif
 626 }
 627 
 628 static int
 629 _sdbc_stats_configure(int cblocks)
 630 {
 631 
 632         _sd_cache_stats = kmem_zalloc(sizeof (_sd_stats_t) +
 633             (sdbc_max_devs - 1) * sizeof (_sd_shared_t), KM_SLEEP);
 634         _sd_cache_stats->st_blksize = (int)BLK_SIZE(1);
 635         _sd_cache_stats->st_cachesize = cblocks * BLK_SIZE(1);
 636         _sd_cache_stats->st_numblocks = cblocks;
 637         _sd_cache_stats->st_wrcancelns = 0;
 638         _sd_cache_stats->st_destaged = 0;
 639 #ifdef _MULTI_DATAMODEL
 640         _sd_cache_stats32 = kmem_zalloc(sizeof (_sd_stats32_t) +
 641             (sdbc_max_devs - 1) * sizeof (_sd_shared_t), KM_SLEEP);
 642 #endif
 643 
 644         /* kstat implementation - global stats */
 645         sdbc_global_stats_kstat = kstat_create(SDBC_KSTAT_MODULE, 0,
 646             SDBC_KSTAT_GSTATS, SDBC_KSTAT_CLASS, KSTAT_TYPE_NAMED,
 647             sizeof (sdbc_global_stats)/sizeof (kstat_named_t),
 648             KSTAT_FLAG_VIRTUAL|KSTAT_FLAG_WRITABLE);
 649 
 650         if (sdbc_global_stats_kstat != NULL) {
 651                 sdbc_global_stats_kstat->ks_data = &sdbc_global_stats;
 652                 sdbc_global_stats_kstat->ks_update = sdbc_global_stats_update;
 653                 sdbc_global_stats_kstat->ks_private = _sd_cache_stats;
 654                 kstat_install(sdbc_global_stats_kstat);
 655         } else {
 656                 cmn_err(CE_WARN, "!sdbc: gstats kstat failed");
 657         }
 658 
 659         /* global I/O kstats */
 660         sdbc_global_io_kstat = kstat_create(SDBC_KSTAT_MODULE, 0,
 661             SDBC_IOKSTAT_GSTATS, "disk", KSTAT_TYPE_IO, 1, 0);
 662 
 663         if (sdbc_global_io_kstat) {
 664                 mutex_init(&sdbc_global_io_kstat_mutex, NULL, MUTEX_DRIVER,
 665                     NULL);
 666                 sdbc_global_io_kstat->ks_lock =
 667                     &sdbc_global_io_kstat_mutex;
 668                 kstat_install(sdbc_global_io_kstat);
 669         }
 670 
 671         /*
 672          * kstat implementation - cd stats
 673          * NOTE: one kstat instance for each open cache descriptor
 674          */
 675         sdbc_cd_kstats = kmem_zalloc(sizeof (kstat_t *) * sdbc_max_devs,
 676             KM_SLEEP);
 677 
 678         /*
 679          * kstat implementation - i/o kstats per cache descriptor
 680          * NOTE: one I/O kstat instance for each cd
 681          */
 682         sdbc_cd_io_kstats = kmem_zalloc(sizeof (kstat_t *) * sdbc_max_devs,
 683             KM_SLEEP);
 684 
 685         sdbc_cd_io_kstats_mutexes = kmem_zalloc(sizeof (kmutex_t) *
 686             sdbc_max_devs, KM_SLEEP);
 687 
 688 #ifdef DEBUG
 689         /* kstat implementation - dynamic memory stats */
 690         sdbc_dynmem_kstat_dm = kstat_create(SDBC_KSTAT_MODULE, 0,
 691             SDBC_KSTAT_DYNMEM, SDBC_KSTAT_CLASS, KSTAT_TYPE_NAMED,
 692             sizeof (sdbc_dynmem_dm)/sizeof (kstat_named_t),
 693             KSTAT_FLAG_VIRTUAL|KSTAT_FLAG_WRITABLE);
 694 
 695         if (sdbc_dynmem_kstat_dm != NULL) {
 696                 sdbc_dynmem_kstat_dm->ks_data = &sdbc_dynmem_dm;
 697                 sdbc_dynmem_kstat_dm->ks_update = sdbc_dynmem_kstat_update_dm;
 698                 sdbc_dynmem_kstat_dm->ks_private = &dynmem_processing_dm;
 699                 kstat_install(sdbc_dynmem_kstat_dm);
 700         } else {
 701                 cmn_err(CE_WARN, "!sdbc: dynmem kstat failed");
 702         }
 703 #endif
 704 
 705         return (0);
 706 }
 707 
 708 /*
 709  * sdbc_dmqueues_configure()
 710  * initialize the queues of dynamic memory chains.
 711  */
 712 
 713 _sd_queue_t *sdbc_dm_queues;
 714 static int max_dm_queues;
 715 
 716 
 717 static int
 718 sdbc_dmqueues_configure()
 719 {
 720         int i;
 721 
 722         /*
 723          * CAUTION! this code depends on max_dyn_list not changing
 724          * if it does change behavior may be incorrect, as cc_alloc_size_dm
 725          * depends on max_dyn_list and indexes to dmqueues are derived from
 726          * cc_alloc_size_dm.
 727          * see _sd_setup_category_on_type() and _sd_dealloc_dm()
 728          * TODO: prevent max_dyn_list from on-the-fly modification (easy) or
 729          * allow for on-the-fly changes to number of dm queues (hard).
 730          */
 731         max_dm_queues = dynmem_processing_dm.max_dyn_list;
 732 
 733         ++max_dm_queues; /* need a "0" queue for centrys with no memory */
 734 
 735         sdbc_dm_queues = (_sd_queue_t *)
 736             kmem_zalloc(max_dm_queues * sizeof (_sd_queue_t), KM_SLEEP);
 737 
 738 #ifdef DEBUG
 739         dmchainpull_table = (int *)kmem_zalloc(max_dm_queues *
 740             max_dm_queues * sizeof (int), KM_SLEEP);
 741 #endif
 742 
 743         for (i = 0; i < max_dm_queues; ++i) {
 744                 (void) _sdbc_lruq_configure(&sdbc_dm_queues[i]);
 745                 sdbc_dm_queues[i].sq_dmchain_cblocks = i;
 746         }
 747 
 748         return (0);
 749 }
 750 
 751 static void
 752 sdbc_dmqueues_deconfigure()
 753 {
 754         /* CAUTION! this code depends on max_dyn_list not changing */
 755 
 756         if (sdbc_dm_queues)
 757                 kmem_free(sdbc_dm_queues, max_dm_queues * sizeof (_sd_queue_t));
 758         sdbc_dm_queues = NULL;
 759         max_dm_queues = 0;
 760 }
 761 
 762 #define GOOD_LRUSIZE(q) ((q->sq_inq >= 0) || (q->sq_inq <= CBLOCKS))
 763 
 764 /*
 765  * _sdbc_lruq_configure - initialize the lru queue
 766  *
 767  * ARGUMENTS: NONE
 768  * RETURNS:   0
 769  *
 770  */
 771 
 772 static int
 773 _sdbc_lruq_configure(_sd_queue_t *_sd_lru)
 774 {
 775 
 776         _sd_lru->sq_inq = 0;
 777 
 778         mutex_init(&_sd_lru->sq_qlock, NULL, MUTEX_DRIVER, NULL);
 779 
 780         _sd_lru->sq_qhead.cc_next = _sd_lru->sq_qhead.cc_prev
 781             = &(_sd_lru->sq_qhead);
 782         return (0);
 783 }
 784 
 785 /*
 786  * _sdbc_lruq_deconfigure - deconfigure the lru queue
 787  *
 788  * ARGUMENTS: NONE
 789  *
 790  */
 791 
 792 static void
 793 _sdbc_lruq_deconfigure(void)
 794 {
 795         _sd_queue_t *_sd_lru;
 796 
 797         _sd_lru = _SD_LRU_Q;
 798 
 799         mutex_destroy(&_sd_lru->sq_qlock);
 800         bzero(_sd_lru, sizeof (_sd_queue_t));
 801 
 802 }
 803 
 804 /*
 805  * _sdbc_mem_configure - initialize the cache memory.
 806  *              Create and initialize the hash table.
 807  *              Create cache control blocks and fill them with relevent
 808  *              information and enqueue onto the lru queue.
 809  *              Initialize the Write control blocks (blocks that contain
 810  *              information as to where the data will be mirrored)
 811  *              Initialize the Fault tolerant blocks (blocks that contain
 812  *              information about the mirror nodes dirty writes)
 813  *
 814  * ARGUMENTS:
 815  *      cblocks - Number of cache blocks.
 816  * RETURNS:   0
 817  *
 818  */
 819 static int
 820 _sdbc_mem_configure(int cblocks, spcs_s_info_t kstatus)
 821 {
 822         int num_blks, i, blk;
 823         _sd_cctl_t *centry;
 824         _sd_net_t *netc;
 825         _sd_cctl_t *prev_entry_dm, *first_entry_dm;
 826 
 827         if ((_sd_htable = _sdbc_hash_configure(cblocks)) == NULL) {
 828                 spcs_s_add(kstatus, SDBC_ENOHASH);
 829                 return (-1);
 830         }
 831 
 832         _sd_cctl_groupsz = (cblocks / _SD_CCTL_GROUPS) +
 833             ((cblocks % _SD_CCTL_GROUPS) != 0);
 834 
 835         for (i = 0; i < _SD_CCTL_GROUPS; i++) {
 836                 _sd_cctl[i] = (_sd_cctl_t *)
 837                     nsc_kmem_zalloc(_sd_cctl_groupsz * sizeof (_sd_cctl_t),
 838                     KM_SLEEP, sdbc_cache_mem);
 839 
 840                 if (_sd_cctl[i] == NULL) {
 841                         spcs_s_add(kstatus, SDBC_ENOCB);
 842                         return (-1);
 843                 }
 844         }
 845 
 846         _sd_ccent_sync = (_sd_cctl_sync_t *)
 847             nsc_kmem_zalloc(_sd_ccsync_cnt * sizeof (_sd_cctl_sync_t),
 848             KM_SLEEP, sdbc_local_mem);
 849 
 850         if (_sd_ccent_sync == NULL) {
 851                 spcs_s_add(kstatus, SDBC_ENOCCTL);
 852                 return (-1);
 853         }
 854 
 855         for (i = 0; i < _sd_ccsync_cnt; i++) {
 856                 mutex_init(&_sd_ccent_sync[i]._cc_lock, NULL, MUTEX_DRIVER,
 857                     NULL);
 858                 cv_init(&_sd_ccent_sync[i]._cc_blkcv, NULL, CV_DRIVER, NULL);
 859         }
 860 
 861         blk = 0;
 862 
 863         netc = &_sd_net_config;
 864 
 865         num_blks = (netc->sn_cpages * (int)netc->sn_psize)/BLK_SIZE(1);
 866 
 867         prev_entry_dm = 0;
 868         first_entry_dm = 0;
 869         for (i = 0; i < num_blks; i++, blk++) {
 870                 centry = _sd_cctl[(blk/_sd_cctl_groupsz)] +
 871                     (blk%_sd_cctl_groupsz);
 872                 centry->cc_sync = &_sd_ccent_sync[blk % _sd_ccsync_cnt];
 873                 centry->cc_next = centry->cc_prev = NULL;
 874                 centry->cc_dirty_next = centry->cc_dirty_link = NULL;
 875                 centry->cc_await_use = centry->cc_await_page = 0;
 876                 centry->cc_inuse = centry->cc_pageio = 0;
 877                 centry->cc_flag = 0;
 878                 centry->cc_iocount = 0;
 879                 centry->cc_valid = 0;
 880 
 881                 if (!first_entry_dm)
 882                         first_entry_dm = centry;
 883                 if (prev_entry_dm)
 884                         prev_entry_dm->cc_link_list_dm = centry;
 885                 prev_entry_dm = centry;
 886                 centry->cc_link_list_dm = first_entry_dm;
 887                 centry->cc_data = 0;
 888                 centry->cc_write = NULL;
 889                 centry->cc_dirty = 0;
 890 
 891                 {
 892                 _sd_queue_t *q;
 893                         if (sdbc_use_dmchain) {
 894                                 q = &sdbc_dm_queues[0];
 895                                 centry->cc_cblocks = 0;
 896                         } else
 897                                 q = _SD_LRU_Q;
 898 
 899                         _sd_ins_queue(q, centry);
 900                 }
 901 
 902         }
 903 
 904         if (_sdbc_gl_centry_configure(kstatus) != 0)
 905                 return (-1);
 906 
 907         if (_sdbc_gl_file_configure(kstatus) != 0)
 908                 return (-1);
 909 
 910         return (0);
 911 }
 912 
 913 /*
 914  * _sdbc_gl_file_configure()
 915  *      allocate and initialize space for the global filename data.
 916  *
 917  */
 918 static int
 919 _sdbc_gl_file_configure(spcs_s_info_t kstatus)
 920 {
 921         ss_voldata_t *fileinfo;
 922         ss_voldata_t tempfinfo;
 923         ss_vdir_t vdir;
 924         ss_vdirkey_t key;
 925         int err = 0;
 926 
 927         _sdbc_gl_file_info_size = safestore_config.ssc_maxfiles *
 928             sizeof (ss_voldata_t);
 929 
 930         if ((_sdbc_gl_file_info = kmem_zalloc(_sdbc_gl_file_info_size,
 931             KM_NOSLEEP)) == NULL) {
 932                 spcs_s_add(kstatus, SDBC_ENOSFNV);
 933                 return (-1);
 934         }
 935 
 936         /* setup the key to get a directory stream of all volumes */
 937         key.vk_type  = CDIR_ALL;
 938 
 939         fileinfo = _sdbc_gl_file_info;
 940 
 941         /*
 942          * if coming up after a crash, "refresh" the host
 943          * memory copy from safestore.
 944          */
 945         if (_sdbc_warm_start()) {
 946 
 947                 if (SSOP_GETVDIR(sdbc_safestore, &key, &vdir)) {
 948                         cmn_err(CE_WARN, "!sdbc(_sdbc_gl_file_configure): "
 949                             "cannot read safestore");
 950                         return (-1);
 951                 }
 952 
 953 
 954                 /*
 955                  * cycle through the vdir getting volume data
 956                  * and volume tokens
 957                  */
 958 
 959                 while ((err = SSOP_GETVDIRENT(sdbc_safestore, &vdir, fileinfo))
 960                     == SS_OK) {
 961                         ++fileinfo;
 962                 }
 963 
 964                 if (err != SS_EOF) {
 965                         /*
 966                          * fail to configure since
 967                          * recovery is not possible.
 968                          */
 969                         spcs_s_add(kstatus, SDBC_ENOREFRESH);
 970                         return (-1);
 971                 }
 972 
 973         } else { /* normal initialization, not a warm start */
 974 
 975                 /*
 976                  * if this fails, continue: cache will start
 977                  * in writethru mode
 978                  */
 979 
 980                 if (SSOP_GETVDIR(sdbc_safestore, &key, &vdir)) {
 981                         cmn_err(CE_WARN, "!sdbc(_sdbc_gl_file_configure): "
 982                             "cannot read safestore");
 983                         return (-1);
 984                 }
 985 
 986                 /*
 987                  * cycle through the vdir getting just the volume tokens
 988                  * and initializing volume entries
 989                  */
 990 
 991                 while ((err = SSOP_GETVDIRENT(sdbc_safestore, &vdir,
 992                     &tempfinfo)) == 0) {
 993                         /*
 994                          * initialize the host memory copy of the
 995                          * global file region.  this means setting the
 996                          * _pinned and _attached fields to _SD_NO_HOST
 997                          * because the default of zero conflicts with
 998                          * the min nodeid of zero.
 999                          */
1000                         fileinfo->sv_vol = tempfinfo.sv_vol;
1001                         fileinfo->sv_pinned = _SD_NO_HOST;
1002                         fileinfo->sv_attached = _SD_NO_HOST;
1003                         fileinfo->sv_cd = _SD_NO_CD;
1004 
1005                         /* initialize the directory entry */
1006                         if ((err = SSOP_SETVOL(sdbc_safestore, fileinfo))
1007                             == SS_ERR) {
1008                                 cmn_err(CE_WARN,
1009                                     "!sdbc(_sdbc_gl_file_configure): "
1010                                     "volume entry write failure %p",
1011                                     (void *)fileinfo->sv_vol);
1012                                 break;
1013                         }
1014 
1015                         ++fileinfo;
1016                 }
1017 
1018                 /* coming up clean, continue in w-t mode */
1019                 if (err != SS_EOF)
1020                         cmn_err(CE_WARN, "!sdbc(_sdbc_gl_file_configure) "
1021                             "unable to init safe store volinfo");
1022         }
1023 
1024         return (0);
1025 }
1026 
1027 static void
1028 _sdbc_gl_centry_deconfigure(void)
1029 {
1030         if (_sdbc_gl_centry_info)
1031                 kmem_free(_sdbc_gl_centry_info, _sdbc_gl_centry_info_size);
1032 
1033         _sdbc_gl_centry_info = NULL;
1034         _sdbc_gl_centry_info_size = 0;
1035 }
1036 
1037 static int
1038 _sdbc_gl_centry_configure(spcs_s_info_t kstatus)
1039 {
1040 
1041         int wblocks;
1042         ss_centry_info_t *cinfo;
1043         ss_cdirkey_t key;
1044         ss_cdir_t cdir;
1045         int err = 0;
1046 
1047 
1048         wblocks = safestore_config.ssc_wsize / BLK_SIZE(1);
1049         _sdbc_gl_centry_info_size = sizeof (ss_centry_info_t) * wblocks;
1050 
1051         if ((_sdbc_gl_centry_info = kmem_zalloc(_sdbc_gl_centry_info_size,
1052             KM_NOSLEEP)) == NULL) {
1053                 cmn_err(CE_WARN, "!sdbc(_sdbc_gl_centry_configure) "
1054                     "alloc failed for gl_centry_info region");
1055 
1056                 _sdbc_gl_centry_deconfigure();
1057                 return (-1);
1058         }
1059 
1060         /*
1061          * synchronize the centry info area with safe store
1062          */
1063 
1064         /* setup the key to get a directory stream of all centrys */
1065         key.ck_type  = CDIR_ALL;
1066 
1067         cinfo = _sdbc_gl_centry_info;
1068 
1069         if (_sdbc_warm_start()) {
1070 
1071                 if (SSOP_GETCDIR(sdbc_safestore, &key, &cdir)) {
1072                         cmn_err(CE_WARN, "!sdbc(_sdbc_gl_centry_configure): "
1073                             "cannot read safestore");
1074                         return (-1);
1075                 }
1076 
1077 
1078                 /*
1079                  * cycle through the cdir getting resource
1080                  * tokens and reading centrys
1081                  */
1082 
1083                 while ((err = SSOP_GETCDIRENT(sdbc_safestore, &cdir, cinfo))
1084                     == 0) {
1085                         ++cinfo;
1086                 }
1087 
1088                 if (err != SS_EOF) {
1089                         /*
1090                          * fail to configure since
1091                          * recovery is not possible.
1092                          */
1093                         _sdbc_gl_centry_deconfigure();
1094                         spcs_s_add(kstatus, SDBC_EGLDMAFAIL);
1095                         return (-1);
1096                 }
1097 
1098         } else {
1099 
1100                 if (SSOP_GETCDIR(sdbc_safestore, &key, &cdir)) {
1101                         cmn_err(CE_WARN, "!sdbc(_sdbc_gl_centry_configure): "
1102                             "cannot read safestore");
1103                         return (-1);
1104                 }
1105 
1106                 /*
1107                  * cycle through the cdir getting resource
1108                  * tokens and initializing centrys
1109                  */
1110 
1111                 while ((err = SSOP_GETCDIRENT(sdbc_safestore, &cdir, cinfo))
1112                     == 0) {
1113                         cinfo->sc_cd = -1;
1114                         cinfo->sc_fpos = -1;
1115 
1116                         if ((err = SSOP_SETCENTRY(sdbc_safestore, cinfo))
1117                             == SS_ERR) {
1118                                 cmn_err(CE_WARN,
1119                                     "!sdbc(_sdbc_gl_centry_configure): "
1120                                     "cache entry write failure %p",
1121                                     (void *)cinfo->sc_res);
1122                                 break;
1123                         }
1124 
1125                         ++cinfo;
1126                 }
1127 
1128                 /* coming up clean, continue in w-t mode */
1129                 if (err != SS_EOF) {
1130                         cmn_err(CE_WARN, "!sdbc(sdbc_gl_centry_configure) "
1131                             "_sdbc_gl_centry_info initialization failed");
1132                 }
1133         }
1134 
1135         return (0);
1136 }
1137 
1138 
1139 static void
1140 _sdbc_gl_file_deconfigure(void)
1141 {
1142 
1143         if (_sdbc_gl_file_info)
1144                 kmem_free(_sdbc_gl_file_info, _sdbc_gl_file_info_size);
1145 
1146         _sdbc_gl_file_info = NULL;
1147 
1148         _sdbc_gl_file_info_size = 0;
1149 }
1150 
1151 
1152 /*
1153  * _sdbc_mem_deconfigure - deconfigure the cache memory.
1154  * Release any memory/locks/sv's acquired during _sdbc_mem_configure.
1155  *
1156  * ARGUMENTS:
1157  *      cblocks - Number of cache blocks.
1158  *
1159  */
1160 /* ARGSUSED */
1161 static void
1162 _sdbc_mem_deconfigure(int cblocks)
1163 {
1164         int i;
1165 
1166         if (_sd_ccent_sync) {
1167                 for (i = 0; i < _sd_ccsync_cnt; i++) {
1168                         mutex_destroy(&_sd_ccent_sync[i]._cc_lock);
1169                         cv_destroy(&_sd_ccent_sync[i]._cc_blkcv);
1170                 }
1171                 nsc_kmem_free(_sd_ccent_sync,
1172                     _sd_ccsync_cnt * sizeof (_sd_cctl_sync_t));
1173         }
1174         _sd_ccent_sync = NULL;
1175 
1176         for (i = 0; i < _SD_CCTL_GROUPS; i++) {
1177                 if (_sd_cctl[i] != NULL) {
1178                         nsc_kmem_free(_sd_cctl[i],
1179                             _sd_cctl_groupsz * sizeof (_sd_cctl_t));
1180                         _sd_cctl[i] = NULL;
1181                 }
1182         }
1183         _sd_cctl_groupsz = 0;
1184 
1185         _sdbc_hash_deconfigure(_sd_htable);
1186         _sd_htable = NULL;
1187 
1188 }
1189 
1190 
1191 #if defined(_SD_DEBUG) || defined(DEBUG)
1192 static int
1193 _sd_cctl_valid(_sd_cctl_t *addr)
1194 {
1195         _sd_cctl_t *end;
1196         int i, valid;
1197 
1198         valid = 0;
1199         for (i = 0; i < _SD_CCTL_GROUPS; i++) {
1200                 end = _sd_cctl[i] + _sd_cctl_groupsz;
1201                 if (addr >= _sd_cctl[i] && addr < end) {
1202                         valid = 1;
1203                         break;
1204                 }
1205         }
1206 
1207         return (valid);
1208 }
1209 #endif
1210 
1211 
1212 /*
1213  * _sd_ins_queue - insert centry into LRU queue
1214  * (during initialization, locking not required)
1215  */
1216 static void
1217 _sd_ins_queue(_sd_queue_t *q, _sd_cctl_t *centry)
1218 {
1219         _sd_cctl_t *q_head;
1220 
1221         ASSERT(_sd_cctl_valid(centry));
1222 
1223         q_head = &q->sq_qhead;
1224         centry->cc_prev = q_head;
1225         centry->cc_next = q_head->cc_next;
1226         q_head->cc_next->cc_prev = centry;
1227         q_head->cc_next = centry;
1228         q->sq_inq++;
1229 
1230         ASSERT(GOOD_LRUSIZE(q));
1231 }
1232 
1233 
1234 
1235 void
1236 _sd_requeue(_sd_cctl_t *centry)
1237 {
1238         _sd_queue_t *q = _SD_LRU_Q;
1239 
1240         /* was FAST */
1241         mutex_enter(&q->sq_qlock);
1242 #if defined(_SD_DEBUG)
1243         if (1) {
1244                 _sd_cctl_t *cp, *cn, *qp;
1245                 cp = centry->cc_prev;
1246                 cn = centry->cc_next;
1247                 qp = (q->sq_qhead).cc_prev;
1248                 if (!_sd_cctl_valid(centry) ||
1249                     (cp !=  &(q->sq_qhead) && !_sd_cctl_valid(cp)) ||
1250                     (cn !=  &(q->sq_qhead) && !_sd_cctl_valid(cn)) ||
1251                     !_sd_cctl_valid(qp))
1252                         cmn_err(CE_PANIC,
1253                             "_sd_requeue %x prev %x next %x qp %x",
1254                             centry, cp, cn, qp);
1255         }
1256 #endif
1257         centry->cc_prev->cc_next = centry->cc_next;
1258         centry->cc_next->cc_prev = centry->cc_prev;
1259         centry->cc_next = &(q->sq_qhead);
1260         centry->cc_prev = q->sq_qhead.cc_prev;
1261         q->sq_qhead.cc_prev->cc_next = centry;
1262         q->sq_qhead.cc_prev = centry;
1263         centry->cc_seq = q->sq_seq++;
1264         /* was FAST */
1265         mutex_exit(&q->sq_qlock);
1266         (q->sq_req_stat)++;
1267 
1268 }
1269 
1270 void
1271 _sd_requeue_head(_sd_cctl_t *centry)
1272 {
1273         _sd_queue_t *q = _SD_LRU_Q;
1274 
1275         /* was FAST */
1276         mutex_enter(&q->sq_qlock);
1277 #if defined(_SD_DEBUG)
1278         if (1) {
1279                 _sd_cctl_t *cp, *cn, *qn;
1280                 cp = centry->cc_prev;
1281                 cn = centry->cc_next;
1282                 qn = (q->sq_qhead).cc_prev;
1283                 if (!_sd_cctl_valid(centry) ||
1284                     (cp != &(q->sq_qhead) && !_sd_cctl_valid(cp)) ||
1285                     (cn != &(q->sq_qhead) && !_sd_cctl_valid(cn)) ||
1286                     !_sd_cctl_valid(qn))
1287                         cmn_err(CE_PANIC,
1288                             "_sd_requeue_head %x prev %x next %x qn %x",
1289                             centry, cp, cn, qn);
1290         }
1291 #endif
1292         centry->cc_prev->cc_next = centry->cc_next;
1293         centry->cc_next->cc_prev = centry->cc_prev;
1294         centry->cc_prev = &(q->sq_qhead);
1295         centry->cc_next = q->sq_qhead.cc_next;
1296         q->sq_qhead.cc_next->cc_prev = centry;
1297         q->sq_qhead.cc_next = centry;
1298         centry->cc_seq = q->sq_seq++;
1299         centry->cc_flag &= ~CC_QHEAD;
1300         /* was FAST */
1301         mutex_exit(&q->sq_qlock);
1302 }
1303 
1304 
1305 
1306 /*
1307  * _sd_open -   Open a file.
1308  *
1309  * ARGUMENTS:
1310  *      filename -  Name of the file to be opened.
1311  *      flag    -  Flag associated with open.
1312  *                      (currently used to determine a ckd device)
1313  * RETURNS:
1314  *      cd - the cache descriptor.
1315  */
1316 
1317 int
1318 _sd_open(char *filename, int flag)
1319 {
1320         int cd;
1321 
1322         if (!_sd_cache_initialized) {
1323                 cmn_err(CE_WARN, "!sdbc(_sd_open) cache not initialized");
1324                 return (-EINVAL);
1325         }
1326         cd = _sd_open_cd(filename, -1, flag);
1327         SDTRACE(SDF_OPEN, (cd < 0) ? SDT_INV_CD : cd, 0, SDT_INV_BL, 0, cd);
1328 
1329         return (cd);
1330 }
1331 
1332 
1333 static int
1334 _sd_open_io(char *filename, int flag, blind_t *cdp, nsc_iodev_t *iodev)
1335 {
1336         _sd_cd_info_t *cdi;
1337         int cd;
1338         int rc = 0;
1339 
1340         if ((cd = _sd_open(filename, flag)) >= 0) {
1341 
1342                 cdi = &(_sd_cache_files[cd]);
1343                 cdi->cd_iodev = iodev;
1344                 nsc_set_owner(cdi->cd_rawfd, cdi->cd_iodev);
1345 
1346                 *cdp = (blind_t)(unsigned long)cd;
1347         } else
1348                 rc = -cd;
1349 
1350         return (rc);
1351 }
1352 
1353 
1354 
1355 int
1356 _sd_open_cd(char *filename, const int cd, const int flag)
1357 {
1358         int new_cd, rc = 0, alloc_cd = -1;
1359         ss_voldata_t *cdg;
1360         int preexists = 0;
1361         _sd_cd_info_t *cdi;
1362         int failover_open, open_failed;
1363         major_t devmaj;
1364         minor_t devmin;
1365 
1366         if (_sdbc_shutdown_in_progress)
1367                 return (-EIO);
1368 
1369         if (strlen(filename) > (NSC_MAXPATH-1))
1370                 return (-ENAMETOOLONG);
1371 
1372         /*
1373          * If the cd is >= 0, then this is a open for a specific cd.
1374          * This happens when the mirror node crashes, and we attempt to
1375          * reopen the files with the same cache descriptors as existed on
1376          * the other node
1377          */
1378 
1379 retry_open:
1380         failover_open = 0;
1381         open_failed   = 0;
1382         if (cd >= 0) {
1383                 failover_open++;
1384                 cdi = &(_sd_cache_files[cd]);
1385                 mutex_enter(&_sd_cache_lock);
1386                 if (cdi->cd_info == NULL)
1387                         cdi->cd_info = &_sd_cache_stats->st_shared[cd];
1388                 else if (cdi->cd_info->sh_alloc &&
1389                     strcmp(cdi->cd_info->sh_filename, filename)) {
1390                         cmn_err(CE_WARN, "!sdbc(_sd_open_cd) cd %d mismatch",
1391                             cd);
1392                         mutex_exit(&_sd_cache_lock);
1393                         return (-EEXIST);
1394                 }
1395 
1396                 if (cdi->cd_info->sh_failed != 2) {
1397                         if (cdi->cd_info->sh_alloc != 0)
1398                                 preexists = 1;
1399                         else {
1400                                 cdi->cd_info->sh_alloc = CD_ALLOC_IN_PROGRESS;
1401                                 (void) strcpy(cdi->cd_info->sh_filename,
1402                                     filename);
1403                                 if (_sd_cache_stats->st_count < sdbc_max_devs)
1404                                         _sd_cache_stats->st_count++;
1405                         }
1406                 }
1407 
1408                 mutex_exit(&_sd_cache_lock);
1409                 alloc_cd = cd;
1410 
1411                 goto known_cd;
1412         }
1413 
1414         new_cd = 0;
1415         mutex_enter(&_sd_cache_lock);
1416 
1417         for (cdi = &(_sd_cache_files[new_cd]),
1418             cdg = _sdbc_gl_file_info + new_cd;
1419             new_cd < (sdbc_max_devs); new_cd++, cdi++, cdg++) {
1420                 if (strlen(cdg->sv_volname) != 0)
1421                         if (strcmp(cdg->sv_volname, filename))
1422                                 continue;
1423 
1424                 if (cdi->cd_info == NULL)
1425                         cdi->cd_info = &_sd_cache_stats->st_shared[new_cd];
1426 
1427                 if (cdi->cd_info->sh_failed != 2) {
1428                         if (cdi->cd_info->sh_alloc != 0)
1429                                 preexists = 1;
1430                         else {
1431                                 if (cd == -2) {
1432                                         mutex_exit(&_sd_cache_lock);
1433                                         return (-1);
1434                                 }
1435                                 cdi->cd_info->sh_alloc = CD_ALLOC_IN_PROGRESS;
1436                                 (void) strcpy(cdi->cd_info->sh_filename,
1437                                     filename);
1438                                 (void) strcpy(cdg->sv_volname, filename);
1439 
1440                                 cdg->sv_cd = new_cd;
1441                                 /* update safestore */
1442                                 SSOP_SETVOL(sdbc_safestore, cdg);
1443                                 if (_sd_cache_stats->st_count < sdbc_max_devs)
1444                                         _sd_cache_stats->st_count++;
1445                                 cdi->cd_flag = 0;
1446                         }
1447                 }
1448                 alloc_cd = new_cd;
1449                 break;
1450         }
1451 
1452         mutex_exit(&_sd_cache_lock);
1453 
1454         if (alloc_cd == -1)
1455                 return (-ENOSPC);
1456 
1457 known_cd:
1458         /*
1459          * If preexists: someone else is attempting to open this file as
1460          * well. Do only one open, but block everyone else here till the
1461          * open is completed.
1462          */
1463         if (preexists) {
1464                 while (cdi->cd_info->sh_alloc == CD_ALLOC_IN_PROGRESS) {
1465                         delay(drv_usectohz(20000));
1466                 }
1467                 if ((cdi->cd_info->sh_alloc != CD_ALLOCATED))
1468                         goto retry_open;
1469                         return (alloc_cd);
1470         }
1471 
1472         if (!(cdi->cd_rawfd =
1473             nsc_open(filename, NSC_SDBC_ID|NSC_DEVICE, _sdbc_fd_def,
1474             (blind_t)(unsigned long)alloc_cd, &rc)) ||
1475             !nsc_getval(cdi->cd_rawfd, "DevMaj", (int *)&devmaj) ||
1476             !nsc_getval(cdi->cd_rawfd, "DevMin", (int *)&devmin)) {
1477                 if (cdi->cd_rawfd) {
1478                         (void) nsc_close(cdi->cd_rawfd);
1479                         cdi->cd_rawfd = NULL;
1480                 }
1481                 /*
1482                  * take into account that there may be pinned data on a
1483                  * device that can no longer be opened
1484                  */
1485                 open_failed++;
1486                 if (!(cdi->cd_info->sh_failed) && !failover_open) {
1487                         cdi->cd_info->sh_alloc = 0;
1488                         mutex_enter(&_sd_cache_lock);
1489                         _sd_cache_stats->st_count--;
1490                         mutex_exit(&_sd_cache_lock);
1491                         if (!rc)
1492                                 rc = EIO;
1493                         return (-rc);
1494                 }
1495         }
1496 
1497         cdi->cd_strategy = nsc_get_strategy(devmaj);
1498         cdi->cd_crdev        = makedevice(devmaj, devmin);
1499         cdi->cd_desc = alloc_cd;
1500         cdi->cd_dirty_head = cdi->cd_dirty_tail = NULL;
1501         cdi->cd_io_head      = cdi->cd_io_tail = NULL;
1502         cdi->cd_hint = 0;
1503 #ifdef DEBUG
1504         /* put the dev_t in the ioerr_inject_table */
1505         _sdbc_ioj_set_dev(alloc_cd, cdi->cd_crdev);
1506 #endif
1507 
1508         cdi->cd_global = (_sdbc_gl_file_info + alloc_cd);
1509         if (open_failed) {
1510                 cdi->cd_info->sh_failed = 2;
1511         } else if (cdi->cd_info->sh_failed != 2)
1512                 if ((cdi->cd_global->sv_pinned == _SD_SELF_HOST) &&
1513                     !failover_open)
1514                         cdi->cd_info->sh_failed = 1;
1515                 else
1516                         cdi->cd_info->sh_failed = 0;
1517 
1518         cdi->cd_flag |= flag;
1519         mutex_init(&cdi->cd_lock, NULL, MUTEX_DRIVER, NULL);
1520 
1521 #ifndef _SD_NOTRACE
1522         (void) _sdbc_tr_configure(alloc_cd);
1523 #endif
1524         cdi->cd_info->sh_alloc = CD_ALLOCATED;
1525         cdi->cd_global = (_sdbc_gl_file_info + alloc_cd);
1526         cdi->cd_info->sh_cd = (unsigned short) alloc_cd;
1527         mutex_enter(&_sd_cache_lock);
1528         _sd_cache_stats->st_loc_count++;
1529         mutex_exit(&_sd_cache_lock);
1530 
1531         if (cd_kstat_add(alloc_cd) < 0) {
1532                 cmn_err(CE_WARN, "!Could not create kstats for cache descriptor"
1533                     " %d", alloc_cd);
1534         }
1535 
1536 
1537         return (open_failed ? -EIO : alloc_cd);
1538 }
1539 
1540 
1541 /*
1542  * _sd_close -   Close a cache descriptor.
1543  *
1544  * ARGUMENTS:
1545  *      cd   -   the cache descriptor to be closed.
1546  * RETURNS:
1547  *      0 on success.
1548  *      Error otherwise.
1549  *
1550  * Note: Under Construction.
1551  */
1552 
1553 int
1554 _sd_close(int cd)
1555 {
1556         int rc;
1557         _sd_cd_info_t *cdi = &(_sd_cache_files[cd]);
1558 
1559         if (!FILE_OPENED(cd)) {
1560                 rc = EINVAL;
1561                 goto out;
1562         }
1563 
1564         SDTRACE(ST_ENTER|SDF_CLOSE, cd, 0, SDT_INV_BL, 0, 0);
1565 
1566         mutex_enter(&_sd_cache_lock);
1567         if ((cdi->cd_info->sh_alloc == 0) ||
1568             (cdi->cd_info->sh_alloc & CD_CLOSE_IN_PROGRESS)) {
1569                 mutex_exit(&_sd_cache_lock);
1570                 SDTRACE(ST_EXIT|SDF_CLOSE, cd, 0, SDT_INV_BL, 0, EINVAL);
1571                 rc = EINVAL;
1572                 goto out;
1573         }
1574         cdi->cd_info->sh_alloc |= CD_CLOSE_IN_PROGRESS;
1575         mutex_exit(&_sd_cache_lock);
1576 
1577         /*
1578          * _sd_flush_cd() will return -1 for the case where pinned
1579          * data is present, but has been transfered to the mirror
1580          * node.  In this case it is safe to close the device as
1581          * though _sd_flush_cd() had returned 0.
1582          */
1583 
1584         rc = _sd_flush_cd(cd);
1585         if (rc == -1)
1586                 rc = 0;
1587 
1588         if (rc != 0) {
1589                 mutex_enter(&_sd_cache_lock);
1590                 if ((rc == EAGAIN) &&
1591                     (cdi->cd_global->sv_pinned == _SD_NO_HOST)) {
1592                         cdi->cd_global->sv_pinned = _SD_SELF_HOST;
1593                         SSOP_SETVOL(sdbc_safestore, cdi->cd_global);
1594                 }
1595 
1596                 cdi->cd_info->sh_alloc &= ~CD_CLOSE_IN_PROGRESS;
1597                 mutex_exit(&_sd_cache_lock);
1598                 SDTRACE(ST_EXIT|SDF_CLOSE, cd, 0, SDT_INV_BL,
1599                     _SD_CD_WBLK_USED(cd), rc);
1600                 goto out;
1601         }
1602 
1603         rc = nsc_close(cdi->cd_rawfd);
1604         if (rc) {
1605                 mutex_enter(&_sd_cache_lock);
1606                 cdi->cd_info->sh_alloc &= ~CD_CLOSE_IN_PROGRESS;
1607                 mutex_exit(&_sd_cache_lock);
1608                 SDTRACE(ST_EXIT|SDF_CLOSE, cd, 0, SDT_INV_BL, 0, rc);
1609                 goto out;
1610         }
1611         mutex_enter(&_sd_cache_lock);
1612         _sd_cache_stats->st_loc_count--;
1613         mutex_exit(&_sd_cache_lock);
1614 
1615         if (cd_kstat_remove(cd) < 0) {
1616                 cmn_err(CE_WARN, "!Could not remove kstat for cache descriptor "
1617                     "%d", cd);
1618         }
1619 
1620         cdi->cd_info->sh_alloc = 0;
1621         cdi->cd_info->sh_failed = 0;
1622         /* cdi->cd_info = NULL; */
1623         cdi->cd_flag = 0;
1624         SDTRACE(ST_EXIT|SDF_CLOSE, cd, 0, SDT_INV_BL, 0, NSC_DONE);
1625         rc = NSC_DONE;
1626         goto out;
1627 
1628 out:
1629         return (rc);
1630 }
1631 
1632 
1633 static int
1634 _sd_close_io(blind_t xcd)
1635 {
1636         _sd_cd_info_t *cdi;
1637         int cd = (int)(unsigned long)xcd;
1638         int rc = 0;
1639 
1640         if ((rc = _sd_close((int)cd)) == NSC_DONE) {
1641                 cdi = &(_sd_cache_files[cd]);
1642                 cdi->cd_iodev = NULL;
1643         }
1644 
1645         return (rc);
1646 }
1647 
1648 
1649 /*
1650  * _sdbc_remote_store_pinned - reflect pinned/failed blocks for cd
1651  * to our remote mirror. Returns count of blocks reflected or -1 on error.
1652  *
1653  */
1654 int
1655 _sdbc_remote_store_pinned(int cd)
1656 {
1657         int cnt = 0;
1658         _sd_cd_info_t *cdi = &(_sd_cache_files[cd]);
1659         _sd_cctl_t *cc_ent, *cc_list;
1660 
1661         ASSERT(cd >= 0);
1662         if (cdi->cd_info->sh_failed) {
1663 
1664                 if (cdi->cd_global->sv_pinned == _SD_NO_HOST) {
1665                         cdi->cd_global->sv_pinned = _SD_SELF_HOST;
1666                         SSOP_SETVOL(sdbc_safestore, cdi->cd_global);
1667                 }
1668 
1669                 mutex_enter(&cdi->cd_lock);
1670                 cc_ent = cc_list = cdi->cd_fail_head;
1671                 while (cc_ent) {
1672                         cnt++;
1673 
1674                         /* is this always necessary? jgk */
1675 
1676                         if (SSOP_WRITE_CBLOCK(sdbc_safestore,
1677                             cc_ent->cc_write->sc_res, cc_ent->cc_data,
1678                             CACHE_BLOCK_SIZE, 0)) {
1679                                 mutex_exit(&cdi->cd_lock);
1680                                 return (-1);
1681                         }
1682 
1683                         /* update the cache block metadata */
1684                         CENTRY_SET_FTPOS(cc_ent);
1685                         cc_ent->cc_write->sc_flag = cc_ent->cc_flag;
1686 
1687                         cc_ent->cc_write->sc_dirty = CENTRY_DIRTY(cc_ent);
1688 
1689                         SSOP_SETCENTRY(sdbc_safestore, cc_ent->cc_write);
1690 
1691                         cc_ent = cc_ent->cc_dirty_next;
1692                         if (!cc_ent)
1693                                 cc_ent = cc_list = cc_list->cc_dirty_link;
1694                 }
1695                 mutex_exit(&cdi->cd_lock);
1696         }
1697 
1698         return (cnt);
1699 }
1700 
1701 /*
1702  * _sd_flush_cd()
1703  *      reflect pinned blocks to mirrored node
1704  *      wait for dirty blocks to be flushed
1705  * returns:
1706  *      EIO     I/O failure, or pinned blocks and no mirror
1707  *      EAGAIN  Hang: count of outstanding writes isn't decreasing
1708  *      -1      pinned blocks, reflected to mirror
1709  *      0       success
1710  */
1711 static int
1712 _sd_flush_cd(int cd)
1713 {
1714         int rc;
1715 
1716         if ((rc = _sd_wait_for_flush(cd)) == 0)
1717                 return (0);
1718 
1719         /*
1720          * if we timed out simply return otherwise
1721          * it must be an i/o type of error
1722          */
1723         if (rc == EAGAIN)
1724                 return (rc);
1725 
1726         if (_sd_is_mirror_down())
1727                 return (EIO); /* already failed, no mirror */
1728 
1729         /* flush any pinned/failed blocks to mirror */
1730         if (_sdbc_remote_store_pinned(cd) >= 0)
1731                 /*
1732                  * At this point it looks like we have blocks on the
1733                  * failed list and taking up space on this node but
1734                  * no longer have responsibility for the blocks.
1735                  * These blocks will in fact be freed from the cache
1736                  * and the failed list when the mirror picks them up
1737                  * from safe storage and then calls _sd_cd_discard_mirror
1738                  * which will issue an rpc telling us to finish up.
1739                  *
1740                  * Should the other node die before sending the rpc then
1741                  * we are safe with these blocks simply waiting on the
1742                  * failed list.
1743                  */
1744                 return (-1);
1745         else
1746                 return (rc);
1747 }
1748 
1749 /*
1750  * _sdbc_io_attach_cd -- set up for client access to device, reserve raw device
1751  *
1752  * ARGUMENTS:
1753  *      cd   -  the cache descriptor to attach.
1754  *
1755  * RETURNS:
1756  *      0 on success.
1757  *      Error otherwise.
1758  */
1759 int
1760 _sdbc_io_attach_cd(blind_t xcd)
1761 {
1762         int rc = 0;
1763         _sd_cd_info_t *cdi;
1764         int cd = (int)(unsigned long)xcd;
1765 
1766         SDTRACE(ST_ENTER|SDF_ATTACH, cd, 0, SDT_INV_BL, 0, 0);
1767         if (!_sd_cache_initialized ||
1768             _sdbc_shutdown_in_progress ||
1769             !FILE_OPENED(cd)) {
1770                 SDTRACE(ST_EXIT|SDF_ATTACH, cd, 0, SDT_INV_BL, 0, EINVAL);
1771 
1772                 DTRACE_PROBE(_sdbc_io_attach_cd_end1);
1773 
1774                 return (EINVAL);
1775         }
1776         cdi = &(_sd_cache_files[cd]);
1777 
1778         /*
1779          * check if disk is failed without raw device open.  If it is,
1780          * it has to be recovered using _sd_disk_online
1781          */
1782 
1783         if (cdi->cd_global->sv_pinned == _SD_SELF_HOST) {
1784                 _sd_print(3,
1785                     "_sdbc_io_attach_cd: pinned data. returning EINVAL");
1786 
1787                 DTRACE_PROBE(_sdbc_io_attach_cd_end2);
1788 
1789                 return (EINVAL);
1790         }
1791 
1792         if ((cdi->cd_info == NULL) || (cdi->cd_info->sh_failed)) {
1793                 DTRACE_PROBE1(_sdbc_io_attach_cd_end3,
1794                     struct _sd_shared *, cdi->cd_info);
1795 
1796                 return (EINVAL);
1797         }
1798 
1799 #if defined(_SD_FAULT_RES)
1800         /* wait for node recovery to finish */
1801         if (_sd_node_recovery)
1802                 (void) _sd_recovery_wait();
1803 #endif
1804 
1805         /* this will provoke a sdbc_fd_attach_cd call .. */
1806 
1807         rc = nsc_reserve(cdi->cd_rawfd, NSC_MULTI);
1808         SDTRACE(ST_EXIT|SDF_ATTACH, cd, 0, SDT_INV_BL, 0, rc);
1809 
1810         return (rc);
1811 }
1812 
1813 /*
1814  * sdbc_fd_attach_cd -- setup cache for access to raw device underlying cd.
1815  * This is provoked by some piece of sdbc doing a reserve on the raw device.
1816  *
1817  * ARGUMENTS:
1818  *      cd   -  the cache descriptor to attach.
1819  *
1820  * RETURNS:
1821  *      0 on success.
1822  *      Error otherwise.
1823  */
1824 static int
1825 sdbc_fd_attach_cd(blind_t xcd)
1826 {
1827         int rc = 0;
1828         int cd = (int)(unsigned long)xcd;
1829         _sd_cd_info_t *cdi;
1830 
1831         if (!_sd_cache_initialized || !FILE_OPENED(cd)) {
1832                 SDTRACE(ST_INFO|SDF_ATTACH, cd, 0, SDT_INV_BL, 0, EINVAL);
1833 
1834                 DTRACE_PROBE(sdbc_fd_attach_cd_end1);
1835 
1836                 return (EINVAL);
1837         }
1838         cdi = &(_sd_cache_files[cd]);
1839 
1840 #if defined(_SD_FAULT_RES)
1841         /* retrieve pinned/failed data */
1842         if (!_sd_node_recovery) {
1843                 (void) _sd_repin_cd(cd);
1844         }
1845 #endif
1846 
1847         rc = nsc_partsize(cdi->cd_rawfd, &cdi->cd_info->sh_filesize);
1848         if (rc != 0) {
1849                 SDTRACE(ST_INFO|SDF_ATTACH, cd, 0, SDT_INV_BL, 0, rc);
1850 
1851                 DTRACE_PROBE(sdbc_fd_attach_cd_end3);
1852 
1853                 return (rc);
1854         }
1855 
1856         cdi->cd_global->sv_attached = _SD_SELF_HOST;
1857 
1858         SSOP_SETVOL(sdbc_safestore, cdi->cd_global);
1859 
1860         mutex_enter(&_sd_cache_lock);
1861         cdi->cd_info->sh_flag |= CD_ATTACHED;
1862         mutex_exit(&_sd_cache_lock);
1863 
1864         return (0);
1865 }
1866 
1867 /*
1868  * _sdbc_io_detach_cd -- release raw device
1869  * Called when a cache client is being detached from this cd.
1870  *
1871  * ARGUMENTS:
1872  *      cd   -   the cache descriptor to detach.
1873  * RETURNS:
1874  *      0 on success.
1875  *      Error otherwise.
1876  */
1877 int
1878 _sdbc_io_detach_cd(blind_t xcd)
1879 {
1880         int cd = (int)(unsigned long)xcd;
1881         _sd_cd_info_t *cdi;
1882 
1883 
1884         SDTRACE(ST_ENTER|SDF_DETACH, cd, 0, SDT_INV_BL, 0, 0);
1885         if (!_sd_cache_initialized || !FILE_OPENED(cd)) {
1886                 SDTRACE(ST_EXIT|SDF_DETACH, cd, 0, SDT_INV_BL, 0, EINVAL);
1887 
1888                 DTRACE_PROBE(_sdbc_io_detach_cd_end1);
1889 
1890                 return (EINVAL);
1891         }
1892 
1893 #if defined(_SD_FAULT_RES)
1894         if (_sd_node_recovery)
1895                 (void) _sd_recovery_wait();
1896 #endif
1897         /* relinquish responsibility for device */
1898         cdi = &(_sd_cache_files[cd]);
1899         if (!(cdi->cd_rawfd) || !nsc_held(cdi->cd_rawfd)) {
1900                 cmn_err(CE_WARN, "!sdbc(_sdbc_detach_cd)(%d) not attached", cd);
1901                 SDTRACE(ST_EXIT|SDF_DETACH, cd, 0, SDT_INV_BL, 0, EPROTO);
1902                 DTRACE_PROBE1(_sdbc_io_detach_cd_end2,
1903                     nsc_fd_t *, cdi->cd_rawfd);
1904 
1905                 return (EPROTO);
1906         }
1907         /* this will provoke/allow a call to sdbc_fd_detach_cd */
1908         nsc_release(cdi->cd_rawfd);
1909 
1910         SDTRACE(ST_EXIT|SDF_DETACH, cd, 0, SDT_INV_BL, 0, 0);
1911 
1912         return (0);
1913 }
1914 
1915 /*
1916  * _sdbc_detach_cd -- flush dirty writes to disk, release raw device
1917  * Called when raw device is being detached from this cd.
1918  *
1919  * ARGUMENTS:
1920  *      cd   -   the cache descriptor to detach.
1921  *      rd_only   -  non-zero if detach is for read access.
1922  * RETURNS:
1923  *      0 on success.
1924  *      Error otherwise.
1925  */
1926 static int
1927 sdbc_detach_cd(blind_t xcd, int rd_only)
1928 {
1929         int rc;
1930         int cd = (int)(unsigned long)xcd;
1931         _sd_cd_info_t *cdi;
1932 
1933         SDTRACE(ST_INFO|SDF_DETACH, cd, 0, SDT_INV_BL, 0, 0);
1934 
1935         if (!_sd_cache_initialized || !FILE_OPENED(cd)) {
1936                 SDTRACE(ST_INFO|SDF_DETACH, cd, 0, SDT_INV_BL, 0, EINVAL);
1937 
1938                 DTRACE_PROBE(sdbc_detach_cd_end1);
1939 
1940                 return (EINVAL);
1941         }
1942 
1943 
1944         rc = _sd_flush_cd(cd);
1945         if (rc > 0) {
1946                 SDTRACE(ST_INFO|SDF_DETACH, cd, 0, SDT_INV_BL, 0, rc);
1947 
1948                 DTRACE_PROBE(sdbc_detach_cd_end2);
1949 
1950                 return (rc);
1951         }
1952 
1953         if (!rd_only) {
1954                 _sd_hash_invalidate_cd(cd);
1955                 cdi = &(_sd_cache_files[cd]);
1956 
1957                 if (cdi->cd_global->sv_attached == _SD_SELF_HOST) {
1958                         cdi->cd_global->sv_attached = _SD_NO_HOST;
1959                         SSOP_SETVOL(sdbc_safestore, cdi->cd_global);
1960                 } else {
1961                         cmn_err(CE_WARN,
1962                             "!sdbc(_sdbc_detach_cd) (%d) attached by node %d",
1963                             cd, cdi->cd_global->sv_attached);
1964                         SDTRACE(SDF_DETACH, cd, 0, SDT_INV_BL, 0, EPROTO);
1965 
1966                         DTRACE_PROBE1(sdbc_detach_cd_end3,
1967                             int, cdi->cd_global->sv_attached);
1968 
1969                         return (EPROTO);
1970                 }
1971 
1972                 mutex_enter(&_sd_cache_lock);
1973                 cdi->cd_info->sh_flag &= ~CD_ATTACHED;
1974                 mutex_exit(&_sd_cache_lock);
1975         }
1976 
1977         SDTRACE(ST_INFO|SDF_DETACH, cd, 0, SDT_INV_BL, 0, 0);
1978 
1979         return (0);
1980 }
1981 
1982 /*
1983  * _sdbc_fd_detach_cd -- flush dirty writes to disk, release raw device
1984  * Called when raw device is being detached from this cd.
1985  *
1986  * ARGUMENTS:
1987  *      xcd   -   the cache descriptor to detach.
1988  * RETURNS:
1989  *      0 on success.
1990  *      Error otherwise.
1991  */
1992 static int
1993 sdbc_fd_detach_cd(blind_t xcd)
1994 {
1995         return (sdbc_detach_cd(xcd, 0));
1996 }
1997 
1998 /*
1999  * sdbc_fd_flush_cd - raw device "xcd" is being detached and needs
2000  * flushing.  We only need to flush we don't need to hash invalidate
2001  * this file.
2002  */
2003 static int
2004 sdbc_fd_flush_cd(blind_t xcd)
2005 {
2006         return (sdbc_detach_cd(xcd, 1));
2007 }
2008 
2009 /*
2010  * _sd_get_pinned - re-issue PINNED callbacks for cache device
2011  *
2012  * ARGUMENTS:
2013  *      cd   -   the cache descriptor to reissue pinned calbacks from.
2014  * RETURNS:
2015  *      0 on success.
2016  *      Error otherwise.
2017  */
2018 int
2019 _sd_get_pinned(blind_t xcd)
2020 {
2021         _sd_cd_info_t *cdi;
2022         _sd_cctl_t *cc_list, *cc_ent;
2023         int cd = (int)(unsigned long)xcd;
2024 
2025         cdi = &_sd_cache_files[cd];
2026 
2027         if (cd < 0 || cd >= sdbc_max_devs) {
2028                 DTRACE_PROBE(_sd_get_pinned_end1);
2029                 return (EINVAL);
2030         }
2031 
2032         if (!FILE_OPENED(cd)) {
2033                 DTRACE_PROBE(_sd_get_pinned_end2);
2034                 return (0);
2035         }
2036 
2037         mutex_enter(&cdi->cd_lock);
2038 
2039         if (!cdi->cd_info->sh_failed) {
2040                 mutex_exit(&cdi->cd_lock);
2041 
2042                 DTRACE_PROBE(_sd_get_pinned_end3);
2043                 return (0);
2044         }
2045 
2046         cc_ent = cc_list = cdi->cd_fail_head;
2047         while (cc_ent) {
2048                 if (CENTRY_PINNED(cc_ent))
2049                         nsc_pinned_data(cdi->cd_iodev,
2050                             BLK_TO_FBA_NUM(CENTRY_BLK(cc_ent)), BLK_FBAS);
2051                 cc_ent = cc_ent->cc_dirty_next;
2052                 if (!cc_ent)
2053                         cc_ent = cc_list = cc_list->cc_dirty_link;
2054         }
2055 
2056         mutex_exit(&cdi->cd_lock);
2057 
2058         return (0);
2059 }
2060 
2061 /*
2062  * _sd_allocate_buf - allocate a vector of buffers for io.
2063  *                      *This call has been replaced by _sd_alloc_buf*
2064  */
2065 
2066 _sd_buf_handle_t *
2067 _sd_allocate_buf(int cd, nsc_off_t fba_pos, nsc_size_t fba_len, int flag,
2068     int *sts)
2069 {
2070         _sd_buf_handle_t *handle = NULL;
2071 
2072         *sts = _sd_alloc_buf((blind_t)(unsigned long)cd, fba_pos, fba_len,
2073             flag, &handle);
2074         if (*sts == NSC_HIT)
2075                 *sts = NSC_DONE;
2076         return (handle);
2077 }
2078 
2079 
2080 /*
2081  * _sd_prefetch_buf - _sd_alloc_buf w/flag = NSC_RDAHEAD|NSC_RDBUF
2082  *      no 'bufvec' (data is not read by caller)
2083  *      skip leading valid or busy entries (data available sooner)
2084  *      truncate on busy block (to avoid deadlock)
2085  *      release trailing valid entries, adjust length before starting I/O.
2086  */
2087 static int
2088 _sd_prefetch_buf(int cd, nsc_off_t fba_pos, nsc_size_t fba_len, int flag,
2089     _sd_buf_handle_t *handle, int locked)
2090 {
2091         _sd_cd_info_t *cdi;
2092         nsc_off_t cblk;         /* position of temp cache block */
2093         sdbc_cblk_fba_t st_cblk_len;    /* FBA len of starting cache block */
2094         sdbc_cblk_fba_t end_cblk_len;   /* FBA len of ending cache block */
2095         sdbc_cblk_fba_t st_cblk_off;    /* FBA offset into starting cblock */
2096         nsc_off_t io_pos;       /* offset in FBA's */
2097         nsc_size_t fba_orig_len;
2098         int sts, stall;
2099         _sd_cctl_t *centry = NULL;
2100         _sd_cctl_t *lentry = NULL;
2101         _sd_cctl_t *ioent = NULL;
2102         _sd_cctl_t *last_ioent = NULL;
2103         sdbc_allocbuf_t alloc_tok = {0};
2104         int this_entry_type = 0;
2105         nsc_size_t request_blocks = 0; /* number of cache blocks required */
2106         int pageio;
2107 
2108         handle->bh_flag |= NSC_HACTIVE;
2109         ASSERT(cd >= 0);
2110         cdi = &_sd_cache_files[cd];
2111 
2112         /* prefetch: truncate if req'd */
2113         if (fba_len > sdbc_max_fbas)
2114                 fba_len = sdbc_max_fbas;
2115         if ((fba_pos + fba_len) > cdi->cd_info->sh_filesize) {
2116                 if (fba_pos >= cdi->cd_info->sh_filesize) {
2117                         sts = EIO;
2118                         goto done;
2119                 }
2120                 fba_len = cdi->cd_info->sh_filesize - fba_pos;
2121         }
2122 
2123         fba_orig_len = fba_len;
2124 
2125         _SD_SETUP_HANDLE(handle, cd, fba_pos, fba_len, flag);
2126         handle->bh_centry = NULL;
2127 
2128         cblk = FBA_TO_BLK_NUM(fba_pos);
2129         st_cblk_off = BLK_FBA_OFF(fba_pos);
2130         st_cblk_len = BLK_FBAS - st_cblk_off;
2131 
2132         /*
2133          * count number of blocks on chain that is required
2134          */
2135         if ((nsc_size_t)st_cblk_len >= fba_len) {
2136                 st_cblk_len = (sdbc_cblk_fba_t)fba_len;
2137                 end_cblk_len = 0;
2138         } else {
2139                 end_cblk_len = BLK_FBA_OFF(fba_pos + fba_len);
2140         }
2141 
2142         request_blocks = 1;  /* at least one */
2143 
2144         /* middle piece */
2145         request_blocks += (fba_len - (st_cblk_len + end_cblk_len)) >>
2146             BLK_FBA_SHFT;
2147 
2148         if (end_cblk_len)
2149                 ++request_blocks;
2150 
2151         stall = 0;
2152         do {
2153                 pageio = ((flag & NSC_PAGEIO) != 0 || sdbc_pageio_always != 0);
2154 cget:
2155                 if (centry = (_sd_cctl_t *)
2156                     _sd_hash_search(cd, cblk, _sd_htable)) {
2157 try:
2158                         /* prefetch: skip leading valid blocks */
2159                         if ((ioent == NULL) &&
2160                             SDBC_VALID_BITS(st_cblk_off, st_cblk_len, centry)) {
2161 skip:
2162                                 sdbc_prefetch_valid_cnt++;
2163                                 --request_blocks;
2164                                 lentry = centry;
2165                                 centry = NULL;
2166                                 cblk++;
2167                                 fba_len -= st_cblk_len;
2168                                 st_cblk_off = 0;
2169                                 st_cblk_len = (sdbc_cblk_fba_t)
2170                                     ((fba_len > (nsc_size_t)BLK_FBAS) ?
2171                                     BLK_FBAS : fba_len);
2172                                 continue;
2173                         }
2174 
2175                         if (SET_CENTRY_INUSE(centry)) {
2176                                 /*
2177                                  * prefetch: skip leading busy
2178                                  * or truncate at busy block
2179                                  */
2180                                 if (ioent == NULL)
2181                                         goto skip;
2182                                 sdbc_prefetch_busy_cnt++;
2183                                 fba_orig_len -= fba_len;
2184                                 fba_len = 0;
2185                                 centry = lentry; /* backup */
2186                                 break;
2187                         }
2188 
2189                         /*
2190                          * bug 4529671
2191                          * now that we own the centry make sure that
2192                          * it is still good.  it could have been processed
2193                          * by _sd_dealloc_dm() in the window between
2194                          * _sd_hash_search() and SET_CENTRY_INUSE().
2195                          */
2196                         if ((_sd_cctl_t *)
2197                             _sd_hash_search(cd, cblk, _sd_htable) != centry) {
2198                                 sdbc_prefetch_deallocd++;
2199 #ifdef DEBUG
2200                                 cmn_err(CE_WARN,
2201                                     "!prefetch centry %p cd %d cblk %" NSC_SZFMT
2202                                     " fba_len %" NSC_SZFMT " lost to dealloc?! "
2203                                     "cc_data %p",
2204                                     (void *)centry, cd, cblk, fba_orig_len,
2205                                     (void *)centry->cc_data);
2206 #endif
2207 
2208                                 CLEAR_CENTRY_INUSE(centry);
2209                                 continue;
2210                         }
2211 
2212                         if (CC_CD_BLK_MATCH(cd, cblk, centry)) {
2213                                 /*
2214                                  * Do pagelist io mutual exclusion
2215                                  * before messing with the centry.
2216                                  */
2217                                 if (pageio && SET_CENTRY_PAGEIO(centry)) {
2218                                         /* flusher not done with pageio */
2219                                         /*
2220                                          * prefetch: skip leading busy
2221                                          * or truncate at busy block
2222                                          */
2223                                         CLEAR_CENTRY_INUSE(centry);
2224                                         if (ioent == NULL)
2225                                                 goto skip;
2226                                         sdbc_prefetch_pageio1++;
2227                                         fba_orig_len -= fba_len;
2228                                         fba_len = 0;
2229                                         centry = lentry; /* backup */
2230                                         break;
2231 
2232                                 }
2233 
2234                                 sdbc_prefetch_hit++;
2235                                 this_entry_type = HASH_ENTRY_DM;
2236                                 pageio = 0;
2237                                 centry->cc_toflush = 0;
2238 
2239                                 centry->cc_hits++;
2240 
2241                                 /* this will reset the age flag */
2242                                 sdbc_centry_init_dm(centry);
2243 
2244                                 DTRACE_PROBE1(_sd_prefetch_buf,
2245                                     _sd_cctl_t *, centry);
2246                         } else {
2247                                 /* block mismatch */
2248                                 sdbc_prefetch_lost++;
2249 
2250                                 CLEAR_CENTRY_INUSE(centry);
2251                                 continue;
2252                         }
2253                 } else {
2254                         centry = sdbc_centry_alloc(cd, cblk, request_blocks,
2255                             &stall, &alloc_tok, ALLOC_NOWAIT);
2256 
2257                         if (centry == NULL) {
2258                                 /*
2259                                  * prefetch: cache is very busy. just do
2260                                  * the i/o for the blocks already acquired,
2261                                  * if any.
2262                                  */
2263                                 fba_orig_len -= fba_len;
2264                                 fba_len = 0;
2265                                 /*
2266                                  * if we have a chain of centry's
2267                                  * then back up (set centry to lentry).
2268                                  * if there is no chain (ioent == NULL)
2269                                  * then centry remains NULL.  this can occur
2270                                  * if all previous centrys were hash hits
2271                                  * on valid blocks that were processed in
2272                                  * the skip logic above.
2273                                  */
2274                                 if (ioent)
2275                                         centry = lentry; /* backup */
2276                                 break;
2277                         }
2278 
2279                         /*
2280                          * dmchaining adjustment.
2281                          * if centry was obtained from the dmchain
2282                          * then clear local pageio variable because the
2283                          * centry already has cc_pageio set.
2284                          */
2285                         if (CENTRY_PAGEIO(centry))
2286                                 pageio = 0;
2287 
2288                         DTRACE_PROBE1(_sd_alloc_buf, _sd_cctl_t *, centry);
2289 
2290                         this_entry_type = ELIGIBLE_ENTRY_DM;
2291                         if (centry->cc_aging_dm & FOUND_IN_HASH_DM)
2292                                 this_entry_type = HASH_ENTRY_DM;
2293                         else {
2294                                 if (centry->cc_aging_dm & FOUND_HOLD_OVER_DM)
2295                                         this_entry_type = HOLD_ENTRY_DM;
2296                         }
2297                 }
2298 
2299                 centry->cc_chain = NULL;
2300 
2301                 centry->cc_aging_dm &= ~(FOUND_IN_HASH_DM|FOUND_HOLD_OVER_DM);
2302 
2303                 /*
2304                  * Do pagelist io mutual exclusion now if we did not do
2305                  * it above.
2306                  */
2307 
2308                 if (pageio && SET_CENTRY_PAGEIO(centry)) {
2309                         /* flusher not done with pageio */
2310                         sdbc_prefetch_pageio2++;
2311 
2312                         /*
2313                          * prefetch: skip leading busy
2314                          * or truncate at busy block
2315                          */
2316                         CLEAR_CENTRY_INUSE(centry);
2317                         if (ioent == NULL)
2318                                 goto skip;
2319                         sdbc_prefetch_busy_cnt++;
2320                         fba_orig_len -= fba_len;
2321                         fba_len = 0;
2322                         centry = lentry; /* backup */
2323                         break;
2324                 }
2325 
2326                 pageio = 0;
2327 
2328                 fba_len -= st_cblk_len;
2329 
2330                 if (ioent == NULL)  {
2331                         if (!SDBC_VALID_BITS(st_cblk_off, st_cblk_len,
2332                             centry)) {
2333                                 io_pos = BLK_TO_FBA_NUM(cblk) + st_cblk_off;
2334                                 ioent = last_ioent = centry;
2335                         } else {
2336                                 DATA_LOG(SDF_ALLOC, centry, st_cblk_off,
2337                                     st_cblk_len);
2338                                 DTRACE_PROBE4(_sd_prefetch_buf_data1,
2339                                     uint64_t, (uint64_t)(BLK_TO_FBA_NUM(cblk) +
2340                                     st_cblk_off), int, st_cblk_len,
2341                                     char *, *(int64_t *)(centry->cc_data +
2342                                     FBA_SIZE(st_cblk_off)), char *,
2343                                     *(int64_t *)(centry->cc_data +
2344                                     FBA_SIZE(st_cblk_off + st_cblk_len) - 8));
2345                         }
2346 
2347                         handle->bh_centry = centry;
2348                         st_cblk_off = 0;
2349                         st_cblk_len = (sdbc_cblk_fba_t)
2350                             ((fba_len > (nsc_size_t)BLK_FBAS) ?
2351                             BLK_FBAS : fba_len);
2352                 } else {
2353                         if (!SDBC_VALID_BITS(st_cblk_off, st_cblk_len, centry))
2354                                 last_ioent = centry;
2355                         else {
2356                                 DTRACE_PROBE4(_sd_prefetch_buf_data2,
2357                                     uint64_t, (uint64_t)(BLK_TO_FBA_NUM(cblk) +
2358                                     st_cblk_off), int, st_cblk_len,
2359                                     char *, *(int64_t *)(centry->cc_data +
2360                                     FBA_SIZE(st_cblk_off)), char *,
2361                                     *(int64_t *)(centry->cc_data +
2362                                     FBA_SIZE(st_cblk_off + st_cblk_len) - 8));
2363                         }
2364 
2365                         lentry->cc_chain = centry;
2366                         if (fba_len < (nsc_size_t)BLK_FBAS)
2367                                 st_cblk_len = (sdbc_cblk_fba_t)fba_len;
2368                 }
2369                 lentry = centry;
2370                 cblk++;
2371 
2372                 /* if this block has a new identity clear prefetch history */
2373                 if (this_entry_type != HASH_ENTRY_DM)
2374                         centry->cc_aging_dm &=
2375                             ~(PREFETCH_BUF_I | PREFETCH_BUF_E);
2376 
2377                 centry->cc_aging_dm &= ~(ENTRY_FIELD_DM);
2378                 centry->cc_aging_dm |= this_entry_type | PREFETCH_BUF_E;
2379                 if (flag & NSC_METADATA)
2380                         centry->cc_aging_dm |= STICKY_METADATA_DM;
2381 
2382                 --request_blocks;
2383         } while (fba_len > 0);
2384 
2385 
2386         if (locked) {
2387                 rw_exit(&sdbc_queue_lock);
2388                 locked = 0;
2389         }
2390 
2391         sdbc_centry_alloc_end(&alloc_tok);
2392 
2393         if (centry) {
2394                 centry->cc_chain = NULL;
2395                 if (sts = _sd_setup_category_on_type(handle->bh_centry)) {
2396                         (void) _sd_free_buf(handle);
2397                         goto done;
2398                 }
2399 
2400                 (void) _sd_setup_mem_chaining(handle->bh_centry, 0);
2401         }
2402 
2403 
2404         if (ioent) {
2405                 /* prefetch: trailing valid can be released, adjust len */
2406                 if ((centry != last_ioent)) {
2407                         centry = last_ioent->cc_chain;
2408                         last_ioent->cc_chain = NULL;
2409                         while (centry) {
2410                                 lentry = centry->cc_chain;
2411                                 centry->cc_aging_dm &= ~PREFETCH_BUF_E;
2412                                 _sd_centry_release(centry);
2413                                 centry = lentry;
2414                                 sdbc_prefetch_trailing++;
2415                         }
2416                         fba_len = (CENTRY_BLK(last_ioent) -
2417                             CENTRY_BLK(ioent) + 1) *  BLK_FBAS -
2418                             BLK_FBA_OFF(io_pos);
2419                         fba_orig_len = fba_len + (io_pos - fba_pos);
2420                 }
2421 
2422                 _SD_DISCONNECT_CALLBACK(handle);
2423                 sts = _sd_doread(handle,  ioent, io_pos,
2424                     (fba_pos + fba_orig_len - io_pos), flag);
2425                 if (sts > 0)
2426                         (void) _sd_free_buf(handle);
2427         } else {
2428                 CACHE_FBA_READ(cd, fba_orig_len);
2429                 CACHE_READ_HIT;
2430                 FBA_READ_IO_KSTATS(cd, FBA_SIZE(fba_orig_len));
2431 
2432                 sts = NSC_HIT;
2433         }
2434 done:
2435         if (locked)
2436                 rw_exit(&sdbc_queue_lock);
2437 
2438         return (sts);
2439 }
2440 
2441 
2442 /*
2443  * _sd_cc_wait - wait for inuse cache block to become available
2444  * Usage:
2445  *      if (SET_CENTRY_INUSE(centry)) {
2446  *              _sd_cc_wait(cd, blk, centry, CC_INUSE);
2447  *              goto try_again;
2448  *      }
2449  * -or-
2450  *      if (SET_CENTRY_PAGEIO(centry)) {
2451  *              _sd_cc_wait(cd, blk, centry, CC_PAGEIO);
2452  *              goto try_again;
2453  *      }
2454  */
2455 void
2456 _sd_cc_wait(int cd, nsc_off_t cblk, _sd_cctl_t *centry, int flag)
2457 {
2458         volatile ushort_t *waiters;
2459         volatile uchar_t *uflag;
2460 
2461         if (flag == CC_INUSE) {
2462                 waiters = &(centry->cc_await_use);
2463                 uflag = &(CENTRY_INUSE(centry));
2464         } else if (flag == CC_PAGEIO) {
2465                 waiters = &(centry->cc_await_page);
2466                 uflag = &(CENTRY_PAGEIO(centry));
2467         } else {
2468                 /* Oops! */
2469 #ifdef DEBUG
2470                 cmn_err(CE_WARN, "!_sd_cc_wait: unknown flag value (%x)", flag);
2471 #endif
2472                 return;
2473         }
2474 
2475         mutex_enter(&centry->cc_lock);
2476         if (CC_CD_BLK_MATCH(cd, cblk, centry) && (*uflag) != 0) {
2477                 (*waiters)++;
2478                 sd_serialize();
2479                 if ((*uflag) != 0) {
2480                         unsigned stime = nsc_usec();
2481                         cv_wait(&centry->cc_blkcv, &centry->cc_lock);
2482                         (*waiters)--;
2483                         mutex_exit(&centry->cc_lock);
2484                         SDTRACE(ST_INFO|SDF_ENT_GET,
2485                             cd, 0, BLK_TO_FBA_NUM(cblk), (nsc_usec()-stime), 0);
2486                 } else {
2487                         (*waiters)--;
2488                         mutex_exit(&centry->cc_lock);
2489                 }
2490         } else
2491                 mutex_exit(&centry->cc_lock);
2492 
2493 }
2494 
2495 /*
2496  * _sd_alloc_buf  - Allocate a vector of buffers for io.
2497  *
2498  * ARGUMENTS:
2499  *      cd       - Cache descriptor (from a previous open)
2500  *      fba_pos  - disk position (512-byte FBAs)
2501  *      fba_len  - length in disk FBAs.
2502  *      flag     - allocation type. Flag is one or more of
2503  *                 NSC_RDBUF, NSC_WRBUF, NSC_NOBLOCK and hints.
2504  *                 NSC_RDAHEAD - prefetch for future read.
2505  *      handle_p - pointer to a handle pointer.
2506  *                 If the handle pointer is non-null, its used as a
2507  *                 pre-allocated handle. Else a new handle will be allocated
2508  *                 and stored in *handle_p
2509  *
2510  * RETURNS:
2511  *      errno if return > 0.
2512  *      else NSC_HIT or NSC_DONE on success
2513  *      or   NSC_PENDING on io in progress and NSC_NOBLOCK
2514  *              specified in the flag.
2515  * USAGE:
2516  *      This routine allocates the cache blocks requested and creates a list
2517  *      of entries for this request.
2518  *      If NSC_NOBLOCK was not specified, this call could block on read io.
2519  *      If flag specified NSC_RDBUF and the request is not an entire
2520  *      hit, an io is initiated.
2521  */
2522 int
2523 _sd_alloc_buf(blind_t xcd, nsc_off_t fba_pos, nsc_size_t fba_len, int flag,
2524     _sd_buf_handle_t **handle_p)
2525 {
2526         int cd = (int)(unsigned long)xcd;
2527         _sd_cd_info_t *cdi;
2528         _sd_buf_handle_t *handle;
2529         int sts;
2530         nsc_off_t st_cblk, cblk; /* position of start and temp cache block */
2531         sdbc_cblk_fba_t st_cblk_len;    /* FBA len of starting cache block */
2532         sdbc_cblk_fba_t end_cblk_len;   /* FBA len of ending cache block */
2533         sdbc_cblk_fba_t st_cblk_off;    /* FBA offset into starting cblock */
2534         nsc_off_t io_pos;       /* offset in FBA's */
2535         _sd_bufvec_t *bufvec;
2536         _sd_cctl_t *centry, *lentry, *ioent = NULL;
2537         nsc_size_t fba_orig_len = fba_len;      /* FBA length of orig request */
2538         int stall, pageio;
2539         unsigned char cc_flag;
2540         int this_entry_type;
2541         int locked = 0;
2542         nsc_size_t dmchain_request_blocks; /* size of dmchain in cache blocks */
2543         sdbc_allocbuf_t alloc_tok = {0};
2544         int min_frag = 0;       /* frag statistics */
2545         int max_frag = 0;       /* frag statistics */
2546         int nfrags = 0;         /* frag statistics */
2547 #ifdef DEBUG
2548         int err = 0;
2549 #endif
2550 
2551 
2552         ASSERT(*handle_p != NULL);
2553         handle = *handle_p;
2554 
2555         if (_sdbc_shutdown_in_progress)
2556                 return (EIO);
2557 
2558         if (xcd == NSC_ANON_CD)
2559                 cd = _CD_NOHASH;
2560 
2561         KSTAT_RUNQ_ENTER(cd);
2562 
2563         /*
2564          * Force large writes on nvram systems to be write-through to
2565          * avoid the (slow) bcopy into nvram.
2566          */
2567 
2568         if (flag & NSC_WRBUF) {
2569                 if (fba_len > (nsc_size_t)sdbc_wrthru_len) {
2570                         flag |= NSC_WRTHRU;
2571                 }
2572         }
2573 
2574 #ifdef DEBUG
2575         if (sdbc_pageio_debug != SDBC_PAGEIO_OFF) {
2576                 switch (sdbc_pageio_debug) {
2577                 case SDBC_PAGEIO_RDEV:
2578                         if (cd != _CD_NOHASH &&
2579                             sdbc_pageio_rdev != (dev_t)-1 &&
2580                             _sd_cache_files[cd].cd_crdev == sdbc_pageio_rdev)
2581                                 flag |= NSC_PAGEIO;
2582                         break;
2583 
2584                 case SDBC_PAGEIO_RAND:
2585                         if ((nsc_lbolt() % 3) == 0)
2586                                 flag |= NSC_PAGEIO;
2587                         break;
2588 
2589                 case SDBC_PAGEIO_ALL:
2590                         flag |= NSC_PAGEIO;
2591                         break;
2592                 }
2593         }
2594 #endif /* DEBUG */
2595 
2596         if (fba_len > (nsc_size_t)BLK_FBAS) {
2597                 rw_enter(&sdbc_queue_lock, RW_WRITER);
2598                 locked = 1;
2599         }
2600 
2601         /*
2602          * _CD_NOHASH: client wants temporary (not hashed) cache memory
2603          * not associated with a local disk.  Skip local disk checks.
2604          */
2605         if (cd == _CD_NOHASH) {
2606                 flag &= ~(NSC_RDBUF | NSC_WRBUF | NSC_RDAHEAD);
2607                 handle = *handle_p;
2608                 handle->bh_flag |= NSC_HACTIVE;
2609                 goto setup;
2610         }
2611 
2612         SDTRACE(ST_ENTER|SDF_ALLOCBUF, cd, fba_len, fba_pos, flag, 0);
2613 
2614 
2615         if ((flag & NSC_RDAHEAD) && _sd_prefetch_opt) {
2616                 sts = _sd_prefetch_buf(cd, fba_pos, fba_len, flag, handle,
2617                     locked);
2618                 goto done;
2619         }
2620 
2621 #if !defined(_SD_NOCHECKS)
2622         if (flag & NSC_RDAHEAD) { /* _sd_prefetch_opt == 0 */
2623                 nsc_size_t file_size;   /* file_size in FBA's */
2624                 /* prefetch: truncate if req'd */
2625                 if (fba_len > sdbc_max_fbas)
2626                         fba_len = sdbc_max_fbas;
2627                 file_size = _sd_cache_files[(cd)].cd_info->sh_filesize;
2628                 if ((fba_pos + fba_len) > file_size) {
2629                         fba_len = file_size - fba_pos;
2630 #ifdef NSC_MULTI_TERABYTE
2631                         if ((int64_t)fba_len <= 0) {
2632 #else
2633                         if ((int32_t)fba_len <= 0) {
2634 #endif
2635                                 sts = EIO;
2636                                 SDTRACE(ST_EXIT|SDF_ALLOCBUF, cd, fba_len,
2637                                     fba_pos, flag, sts);
2638                                 goto done;
2639                         }
2640                 }
2641         } else
2642         if (sts = _sd_check_buffer_alloc(cd, fba_pos, fba_len, handle_p)) {
2643                 SDTRACE(ST_EXIT|SDF_ALLOCBUF, cd, fba_len, fba_pos, flag, sts);
2644                 goto done;
2645         }
2646 #endif
2647         if (fba_len == 0) {
2648                 SDTRACE(ST_EXIT|SDF_ALLOCBUF, cd, fba_len, fba_pos,
2649                     flag, EINVAL);
2650                 sts = EINVAL;
2651                 goto done;
2652         }
2653 
2654         handle->bh_flag |= NSC_HACTIVE;
2655         cdi = &_sd_cache_files[cd];
2656 
2657         if (cdi->cd_recovering) {
2658                 /*
2659                  * If recovering this device, then block all allocates
2660                  * for reading or writing. If we allow reads then
2661                  * this path could see old data before we recover.
2662                  * If we allow writes then new data could be overwritten
2663                  * by old data.
2664                  * This is clearly still not a complete solution as
2665                  * the thread doing this allocate could conceivably be
2666                  * by this point (and in _sd_write/_sd_read for that matter
2667                  * which don't even have this protection). But this type
2668                  * of path seems to only exist in a failover situation
2669                  * where a device has failed on the other node and works
2670                  * on this node so the problem is not a huge one but exists
2671                  * never the less.
2672                  */
2673                 if (sts = _sd_recovery_wblk_wait(cd)) {
2674                         handle->bh_flag &= ~NSC_HACTIVE;
2675                         SDTRACE(ST_EXIT|SDF_ALLOCBUF, cd, fba_len, fba_pos,
2676                             flag, sts);
2677                         goto done;
2678                 }
2679         }
2680 
2681         /* write & disk failed, return error immediately */
2682         if ((flag & NSC_WRBUF) && cdi->cd_info->sh_failed) {
2683                 handle->bh_flag &= ~NSC_HACTIVE;
2684                 SDTRACE(ST_EXIT|SDF_ALLOCBUF, cd, fba_len, fba_pos, flag, EIO);
2685                 sts = EIO;
2686                 goto done;
2687         }
2688 
2689 setup:
2690 
2691         _SD_SETUP_HANDLE(handle, cd, fba_pos, fba_len, flag);
2692         handle->bh_centry = NULL;
2693         bufvec = handle->bh_bufvec;
2694         if (flag & NSC_RDAHEAD) { /* _sd_prefetch_opt == 0 */
2695                 /* CKD prefetch: bufvec not req'd, use placeholder */
2696                 bufvec->bufaddr = NULL;
2697                 bufvec->bufvmeaddr = NULL;
2698                 bufvec->buflen  = 0;
2699                 bufvec = _prefetch_sb_vec;
2700         }
2701         st_cblk = FBA_TO_BLK_NUM(fba_pos);
2702         st_cblk_off = BLK_FBA_OFF(fba_pos);
2703         st_cblk_len = BLK_FBAS - st_cblk_off;
2704         if ((nsc_size_t)st_cblk_len >= fba_len) {
2705                 end_cblk_len = 0;
2706                 st_cblk_len = (sdbc_cblk_fba_t)fba_len;
2707         } else
2708                 end_cblk_len = BLK_FBA_OFF(fba_pos + fba_len);
2709         cblk = st_cblk;
2710 
2711 
2712         /*
2713          * count number of blocks on chain that is required
2714          */
2715 
2716         /* middle piece */
2717         dmchain_request_blocks =
2718             (fba_len - (st_cblk_len + end_cblk_len)) >> BLK_FBA_SHFT;
2719 
2720         /* start piece */
2721         ++dmchain_request_blocks;
2722 
2723         /* end piece */
2724         if (end_cblk_len)
2725                 ++dmchain_request_blocks;
2726 
2727 
2728         cc_flag = 0;
2729         if ((handle->bh_flag & NSC_PINNABLE) && (handle->bh_flag & NSC_WRBUF))
2730                 cc_flag |= CC_PINNABLE;
2731         if (handle->bh_flag & (NSC_NOCACHE|NSC_SEQ_IO))
2732                 cc_flag |= CC_QHEAD;
2733         lentry = NULL;
2734         stall = 0;
2735 
2736         do {
2737                 pageio = ((flag & NSC_PAGEIO) != 0 || sdbc_pageio_always != 0);
2738 cget:
2739                 if ((centry = (_sd_cctl_t *)
2740                     _sd_hash_search(cd, cblk, _sd_htable)) != 0) {
2741 
2742                         if (SET_CENTRY_INUSE(centry)) {
2743                                 /* already inuse: wait for block, retry */
2744                                 sdbc_allocb_inuse++;
2745                                 if (locked)
2746                                         rw_exit(&sdbc_queue_lock);
2747                                 _sd_cc_wait(cd, cblk, centry, CC_INUSE);
2748                                 if (locked)
2749                                         rw_enter(&sdbc_queue_lock, RW_WRITER);
2750                                 goto cget;
2751                         }
2752 
2753                         /*
2754                          * bug 4529671
2755                          * now that we own the centry make sure that
2756                          * it is still good.  it could have been processed
2757                          * by _sd_dealloc_dm() in the window between
2758                          * _sd_hash_search() and SET_CENTRY_INUSE().
2759                          */
2760                         if ((_sd_cctl_t *)
2761                             _sd_hash_search(cd, cblk, _sd_htable) != centry) {
2762                                 sdbc_allocb_deallocd++;
2763 #ifdef DEBUG
2764                                 cmn_err(CE_WARN,
2765                                     "!centry %p cd %d cblk %" NSC_SZFMT
2766                                     " fba_len %" NSC_SZFMT " lost to dealloc?! "
2767                                     "cc_data %p", (void *)centry, cd, cblk,
2768                                     fba_orig_len, (void *)centry->cc_data);
2769 #endif
2770 
2771                                 CLEAR_CENTRY_INUSE(centry);
2772                                 goto cget;
2773                         }
2774 
2775                         if (CC_CD_BLK_MATCH(cd, cblk, centry)) {
2776                                 /*
2777                                  * Do pagelist io mutual exclusion
2778                                  * before messing with the centry.
2779                                  */
2780                                 if (pageio && SET_CENTRY_PAGEIO(centry)) {
2781                                         /* wait for flusher to finish pageio */
2782                                         sdbc_allocb_pageio1++;
2783 
2784                                         CLEAR_CENTRY_INUSE(centry);
2785                                         if (locked)
2786                                                 rw_exit(&sdbc_queue_lock);
2787                                         _sd_cc_wait(cd, cblk, centry,
2788                                             CC_PAGEIO);
2789                                         if (locked)
2790                                                 rw_enter(&sdbc_queue_lock,
2791                                                     RW_WRITER);
2792                                         goto cget;
2793                                 }
2794 
2795                                 sdbc_allocb_hit++;
2796                                 this_entry_type = HASH_ENTRY_DM;
2797                                 pageio = 0;
2798                                 centry->cc_toflush = 0;
2799 
2800                                 centry->cc_hits++;
2801 
2802                                 /* this will reset the age flag */
2803                                 sdbc_centry_init_dm(centry);
2804 
2805                                 DTRACE_PROBE1(_sd_alloc_buf1,
2806                                     _sd_cctl_t *, centry);
2807                         } else {
2808                                 /* block mismatch: release, alloc new block */
2809                                 sdbc_allocb_lost++;
2810 
2811                                 CLEAR_CENTRY_INUSE(centry);
2812 
2813                                 goto cget;
2814 
2815                         }
2816                 } else {
2817                         centry = sdbc_centry_alloc(cd, cblk,
2818                             dmchain_request_blocks, &stall,
2819                             &alloc_tok, locked ? ALLOC_LOCKED : 0);
2820 
2821                         /*
2822                          * dmchaining adjustment.
2823                          * if centry was obtained from the dmchain
2824                          * then clear local pageio variable because the
2825                          * centry already has cc_pageio set.
2826                          */
2827                         if (CENTRY_PAGEIO(centry))
2828                                 pageio = 0;
2829 
2830                         DTRACE_PROBE1(_sd_alloc_buf2, _sd_cctl_t *, centry);
2831 
2832                         this_entry_type = ELIGIBLE_ENTRY_DM;
2833                         if (centry->cc_aging_dm & FOUND_IN_HASH_DM)
2834                                 this_entry_type = HASH_ENTRY_DM;
2835                         else {
2836                                 if (centry->cc_aging_dm & FOUND_HOLD_OVER_DM)
2837                                         this_entry_type = HOLD_ENTRY_DM;
2838                         }
2839                 }
2840 
2841                 centry->cc_aging_dm &= ~(FOUND_IN_HASH_DM|FOUND_HOLD_OVER_DM);
2842 
2843                 /*
2844                  * Do pagelist io mutual exclusion now if we did not do
2845                  * it above.
2846                  */
2847 
2848                 if (pageio && SET_CENTRY_PAGEIO(centry)) {
2849                         /* wait for flusher to finish pageio */
2850                         sdbc_allocb_pageio2++;
2851 
2852 
2853                         CLEAR_CENTRY_INUSE(centry);
2854                         if (locked)
2855                                 rw_exit(&sdbc_queue_lock);
2856                         _sd_cc_wait(cd, cblk, centry, CC_PAGEIO);
2857                         if (locked)
2858                                 rw_enter(&sdbc_queue_lock, RW_WRITER);
2859                         goto cget;
2860                 }
2861 
2862                 pageio = 0;
2863 
2864                 if (CENTRY_DIRTY(centry)) {
2865                         /*
2866                          * end action might set PEND_DIRTY flag
2867                          * must lock if need to change flag bits
2868                          */
2869                         if (centry->cc_flag != (centry->cc_flag | cc_flag)) {
2870                                 /* was FAST */
2871                                 mutex_enter(&centry->cc_lock);
2872                                 centry->cc_flag |= cc_flag;
2873                                 /* was FAST */
2874                                 mutex_exit(&centry->cc_lock);
2875                         }
2876                 } else
2877                         centry->cc_flag |= cc_flag;
2878 
2879                 centry->cc_chain = NULL;
2880 
2881                 /*
2882                  * step 0:check valid bits in each cache ele as
2883                  * the chain grows - set ioent/io_pos to first
2884                  * instance of invalid data
2885                  */
2886                 if (cblk == st_cblk) {
2887                         handle->bh_centry = centry;
2888                         fba_len -= st_cblk_len;
2889                         lentry = centry;
2890                         if (flag & NSC_RDBUF)  {
2891                                 if (!SDBC_VALID_BITS(st_cblk_off, st_cblk_len,
2892                                     centry)) {
2893                                         io_pos = fba_pos;
2894                                         ioent = centry;
2895                                 } else {
2896                                         DATA_LOG(SDF_ALLOC, centry, st_cblk_off,
2897                                             st_cblk_len);
2898 
2899                                         DTRACE_PROBE4(_sd_alloc_data1,
2900                                             uint64_t, (uint64_t)
2901                                             (BLK_TO_FBA_NUM(cblk) +
2902                                             st_cblk_off), int, st_cblk_len,
2903                                             char *, *(int64_t *)
2904                                             (centry->cc_data +
2905                                             FBA_SIZE(st_cblk_off)),
2906                                             char *, *(int64_t *)
2907                                             (centry->cc_data +
2908                                             FBA_SIZE(st_cblk_off + st_cblk_len)
2909                                             - 8));
2910                                 }
2911                         }
2912                         cblk++;
2913                 } else if (fba_len == (nsc_size_t)end_cblk_len) {
2914                         lentry->cc_chain = centry;
2915                         fba_len -= end_cblk_len;
2916                         if (flag & NSC_RDBUF) {
2917                                 if (ioent == NULL) {
2918                                         if (!SDBC_VALID_BITS(0, end_cblk_len,
2919                                             centry)) {
2920                                                 io_pos = BLK_TO_FBA_NUM(cblk);
2921                                                 ioent = centry;
2922                                         } else {
2923                                                 DATA_LOG(SDF_ALLOC, centry, 0,
2924                                                     end_cblk_len);
2925 
2926                                                 DTRACE_PROBE4(_sd_alloc_data2,
2927                                                     uint64_t,
2928                                                     BLK_TO_FBA_NUM(cblk),
2929                                                     int, end_cblk_len,
2930                                                     char *, *(int64_t *)
2931                                                     (centry->cc_data),
2932                                                     char *, *(int64_t *)
2933                                                     (centry->cc_data +
2934                                                     FBA_SIZE(end_cblk_len)
2935                                                     - 8));
2936                                         }
2937                                 }
2938                         }
2939                 } else {
2940                         lentry->cc_chain = centry;
2941                         lentry = centry;
2942                         fba_len -= BLK_FBAS;
2943                         if (flag & NSC_RDBUF) {
2944                                 if (ioent == NULL) {
2945                                         if (!FULLY_VALID(centry)) {
2946                                                 io_pos = BLK_TO_FBA_NUM(cblk);
2947                                                 ioent = centry;
2948                                         } else {
2949                                                 DATA_LOG(SDF_ALLOC, centry, 0,
2950                                                     BLK_FBAS);
2951 
2952                                                 DTRACE_PROBE4(_sd_alloc_data3,
2953                                                     uint64_t, (uint64_t)
2954                                                     BLK_TO_FBA_NUM(cblk),
2955                                                     int, BLK_FBAS,
2956                                                     char *, *(int64_t *)
2957                                                     (centry->cc_data),
2958                                                     char *, *(int64_t *)
2959                                                     (centry->cc_data +
2960                                                     FBA_SIZE(BLK_FBAS) - 8));
2961                                         }
2962                                 }
2963                         }
2964                         cblk++;
2965                 }
2966 
2967                 /* if this block has a new identity clear prefetch history */
2968                 if (this_entry_type != HASH_ENTRY_DM)
2969                         centry->cc_aging_dm &=
2970                             ~(PREFETCH_BUF_I | PREFETCH_BUF_E);
2971 
2972                 centry->cc_aging_dm &= ~(ENTRY_FIELD_DM);
2973                 centry->cc_aging_dm |= this_entry_type;
2974                 if (flag & NSC_METADATA)
2975                         centry->cc_aging_dm |= STICKY_METADATA_DM;
2976 
2977                 --dmchain_request_blocks;
2978         } while (fba_len);
2979 
2980         if (locked) {
2981                 rw_exit(&sdbc_queue_lock);
2982                 locked = 0;
2983         }
2984 
2985         ASSERT(dmchain_request_blocks == 0);
2986 
2987         /*
2988          * do any necessary cleanup now that all the blocks are allocated.
2989          */
2990         sdbc_centry_alloc_end(&alloc_tok);
2991 
2992         /* be sure you nul term. the chain */
2993         centry->cc_chain = NULL;
2994 
2995         /*
2996          * step one: establish HOST/PARASITE/OTHER relationships
2997          * between the centry ele in the list and calc the alloc size
2998          * (fill in CATAGORY based on TYPE and immediate neighbors)
2999          */
3000         if (sts = _sd_setup_category_on_type(handle->bh_centry)) {
3001 #ifdef DEBUG
3002                 err = _sd_free_buf(handle);
3003                 if (err) {
3004                         cmn_err(CE_WARN, "!sdbc(_sd_alloc_buf): _sd_free_buf "
3005                             "failed: err:%d handle:%p", err, (void *)handle);
3006                 }
3007 #else
3008                 (void) _sd_free_buf(handle);
3009 #endif
3010                 goto done;
3011         }
3012 
3013         /*
3014          * step two: alloc the needed mem and fill in the data and chaining
3015          * fields (leave bufvec for step three)
3016          */
3017         (void) _sd_setup_mem_chaining(handle->bh_centry, 0);
3018 
3019         /*
3020          * step three: do the bufvec
3021          */
3022         fba_len = fba_orig_len;
3023         centry = handle->bh_centry;
3024         bufvec = handle->bh_bufvec;
3025 
3026         while (centry) {
3027                 DTRACE_PROBE3(_sd_alloc_buf_centrys, _sd_cctl_t *, centry,
3028                     int, cd, uint64_t,
3029                     (uint64_t)BLK_TO_FBA_NUM(CENTRY_BLK(centry)));
3030 
3031                 if (fba_len == fba_orig_len) {
3032                         bufvec->bufaddr = (centry->cc_data +
3033                             FBA_SIZE(st_cblk_off));
3034                         bufvec->bufvmeaddr = 0; /* not used */
3035                         bufvec->buflen  = FBA_SIZE(st_cblk_len);
3036                         bufvec++;
3037                         fba_len -= st_cblk_len;
3038                 } else if (fba_len == (nsc_size_t)end_cblk_len) {
3039                         _sd_bufvec_t *pbufvec = bufvec - 1;
3040 
3041                         if ((pbufvec->bufaddr + pbufvec->buflen) ==
3042                             centry->cc_data) {
3043                                 /* contiguous */
3044                                 pbufvec->buflen += FBA_SIZE(end_cblk_len);
3045                         } else {
3046 
3047                                 bufvec->bufaddr = centry->cc_data;
3048                                 bufvec->bufvmeaddr = 0; /* not used */
3049                                 bufvec->buflen = FBA_SIZE(end_cblk_len);
3050                                 bufvec++;
3051                         }
3052 
3053                         fba_len -= end_cblk_len;
3054                 } else {
3055                         _sd_bufvec_t *pbufvec = bufvec - 1;
3056 
3057                         if ((pbufvec->bufaddr + pbufvec->buflen) ==
3058                             centry->cc_data) {
3059                                 /* contiguous */
3060                                 pbufvec->buflen += CACHE_BLOCK_SIZE;
3061                         } else {
3062 
3063                                 bufvec->bufaddr = centry->cc_data;
3064                                 bufvec->bufvmeaddr = 0; /* not used */
3065                                 bufvec->buflen  = CACHE_BLOCK_SIZE;
3066                                 bufvec++;
3067                         }
3068 
3069                         fba_len -= BLK_FBAS;
3070                 }
3071 
3072                 centry = centry->cc_chain;
3073         }
3074 
3075         /* be sure you nul term. the chain */
3076         bufvec->bufaddr = NULL;
3077         bufvec->bufvmeaddr = 0;
3078         bufvec->buflen = 0;
3079 
3080         /* frag statistics */
3081         {
3082                 _sd_bufvec_t *tbufvec;
3083 
3084                 for (tbufvec = handle->bh_bufvec; tbufvec != bufvec;
3085                     ++tbufvec) {
3086                         if ((min_frag > tbufvec->buflen) || (min_frag == 0))
3087                                 min_frag = tbufvec->buflen;
3088 
3089                         if (max_frag < tbufvec->buflen)
3090                                 max_frag = tbufvec->buflen;
3091                 }
3092 
3093                 nfrags = bufvec - handle->bh_bufvec;
3094                 min_frag = FBA_LEN(min_frag);
3095                 max_frag = FBA_LEN(max_frag);
3096         }
3097 
3098         /* buffer memory frag stats */
3099         DTRACE_PROBE4(_sd_alloc_buf_frag, uint64_t, (uint64_t)fba_orig_len,
3100             int, nfrags, int, min_frag, int, max_frag);
3101 
3102 
3103         if (flag & NSC_WRBUF) {
3104                 if (_SD_IS_WRTHRU(handle))
3105                         goto alloc_done;
3106                 if (_sd_alloc_write(handle->bh_centry, &stall)) {
3107                         _sd_unblock(&_sd_flush_cv);
3108                         handle->bh_flag |= NSC_FORCED_WRTHRU;
3109                 } else {
3110                         for (centry = handle->bh_centry;
3111                             centry; centry = centry->cc_chain) {
3112 
3113                                 CENTRY_SET_FTPOS(centry);
3114                                 SSOP_SETCENTRY(sdbc_safestore,
3115                                     centry->cc_write);
3116                         }
3117                 }
3118         }
3119 
3120 alloc_done:
3121         if (locked) {
3122                 rw_exit(&sdbc_queue_lock);
3123                 locked = 0;
3124         }
3125         if (ioent) {
3126                 _SD_DISCONNECT_CALLBACK(handle);
3127                 sts = _sd_doread(handle,  ioent, io_pos,
3128                     (fba_pos + fba_orig_len - io_pos), flag);
3129                 if (sts > 0)
3130                         (void) _sd_free_buf(handle);
3131         } else
3132                 if (flag & NSC_RDBUF) {
3133                         CACHE_FBA_READ(cd, fba_orig_len);
3134                         CACHE_READ_HIT;
3135                         FBA_READ_IO_KSTATS(cd, FBA_SIZE(fba_orig_len));
3136 
3137                         sts = NSC_HIT;
3138         } else
3139                 sts = (stall) ? NSC_DONE : NSC_HIT;
3140 
3141         SDTRACE(ST_EXIT|SDF_ALLOCBUF, cd, fba_orig_len, fba_pos, flag, sts);
3142 
3143 done:
3144         if (locked)
3145                 rw_exit(&sdbc_queue_lock);
3146 
3147         KSTAT_RUNQ_EXIT(cd);
3148 
3149         return (sts);
3150 }
3151 
3152 /*
3153  * consistency checking for ccents
3154  */
3155 
3156 #define ELIGIBLE(p) (p & ELIGIBLE_ENTRY_DM)
3157 #define HOLD(p) (p & HOLD_ENTRY_DM)
3158 #define HASHE(p) (p & HASH_ENTRY_DM)
3159 
3160 #define HOST(p) (p & HOST_ENTRY_DM)
3161 #define PARA(p) (p & PARASITIC_ENTRY_DM)
3162 #define OTHER(p) \
3163         (!(p & (HOST_ENTRY_DM | PARASITIC_ENTRY_DM | ELIGIBLE_ENTRY_DM)))
3164 
3165 #define AVAIL(p) (p & AVAIL_ENTRY_DM)
3166 
3167 /*
3168  * sdbc_check_cctl_cot -- consistency check for _sd_setup_category_on_type()
3169  * may only be called on entry to state machine (when ccent is either
3170  * ELIGIBLE_ENTRY_DM, HOLD_ENTRY_DM or HASH_ENTRY_DM).
3171  *
3172  * print message or panic (DEBUG) if inconsistency detected.
3173  */
3174 static int
3175 sdbc_check_cctl_cot(_sd_cctl_t *centry)
3176 {
3177         uint_t age;
3178         int size;
3179         uchar_t *data;
3180         int host_or_other;
3181         int para;
3182         int ccent_ok = 1;
3183 
3184         age = centry->cc_aging_dm;
3185         size = centry->cc_alloc_size_dm;
3186         data = centry->cc_data;
3187         host_or_other = size && data;
3188         para = !size && data;
3189 
3190         /*
3191          * on entry to _sd_setup_category_on_type(),
3192          * one of three mutually exclusive entry field bits must be set
3193          */
3194 
3195         switch ((age & (ELIGIBLE_ENTRY_DM | HOLD_ENTRY_DM | HASH_ENTRY_DM))) {
3196                 case ELIGIBLE_ENTRY_DM:
3197                 case HOLD_ENTRY_DM:
3198                 case HASH_ENTRY_DM:
3199                         /* ok */
3200                         break;
3201                 default:
3202                         /* zero or multiple flag bits */
3203                         ccent_ok = 0;
3204                         break;
3205         }
3206 
3207         /* categories are mutually exclusive */
3208         if (HOST(age) && PARA(age))
3209                 ccent_ok = 0;
3210 
3211         /* these bits should be cleared out (STICKY_METADATA_DM not used) */
3212         if (age & (AVAIL_ENTRY_DM | FOUND_HOLD_OVER_DM | FOUND_IN_HASH_DM |
3213             STICKY_METADATA_DM))
3214                 ccent_ok = 0;
3215 
3216         /* eligible has no data and no size */
3217         if (ELIGIBLE(age) && (size || data))
3218                 ccent_ok = 0;
3219 
3220         /* parasite has zero size and non-zero data */
3221         if (PARA(age) && !para)
3222                 ccent_ok = 0;
3223 
3224         /* host has non-zero size and non-zero data */
3225         if (HOST(age) && !host_or_other)
3226                 ccent_ok = 0;
3227 
3228         /* "other" is just like a host */
3229         if (OTHER(age) && !host_or_other)
3230                 ccent_ok = 0;
3231 
3232         /* a HOLD or a HASH must have a size */
3233         if ((size) && !(age & (HASH_ENTRY_DM | HOLD_ENTRY_DM)))
3234                 ccent_ok = 0;
3235 
3236         if (!ccent_ok)
3237                 cmn_err(cmn_level,
3238                     "!sdbc(sdbc_check_cctl_cot): inconsistent ccent %p "
3239                     "age %x size %d data %p", (void *)centry, age, size,
3240                     (void *)data);
3241 
3242         return (ccent_ok);
3243 }
3244 
3245 /*
3246  * sdbc_mark_cctl_cot  -- mark cctls bad and invalidate when
3247  *                        inconsistency found in _sd_setup_category_on_type()
3248  * returns nothing
3249  *
3250  * Note:  this is an error recovery path that is triggered when an
3251  * inconsistency in a cctl is detected.  _sd_centry_release() will take
3252  * these cache entries out of circulation and place them on a separate list
3253  * for debugging purposes.
3254  */
3255 void
3256 sdbc_mark_cctl_cot(_sd_cctl_t *header, _sd_cctl_t *centry)
3257 {
3258         _sd_cctl_t *cur_ent = header;
3259 
3260         /* the entire chain is guilty by association */
3261         while (cur_ent) {
3262 
3263                 (void) _sd_hash_delete((struct _sd_hash_hd *)cur_ent,
3264                     _sd_htable);
3265 
3266                 cur_ent->cc_aging_dm |= BAD_CHAIN_DM;
3267 
3268                 cur_ent = cur_ent->cc_chain;
3269         }
3270 
3271         centry->cc_aging_dm |= BAD_ENTRY_DM; /* this is the problem child */
3272 }
3273 
3274 /*
3275  * _sd_setup_category_on_type(_sd_cctl_t *) - Setup the centry CATEGORY based on
3276  * centry TYPE and immediate neighbors. Identify each eligible (ie not HASH)
3277  * centry as a host/parasite. host actually have memory allocated to
3278  * them and parasites are chained to the host and point to page offsets within
3279  * the host's memory.
3280  *
3281  * RETURNS:
3282  *      0 on success, EINTR if inconsistency detected in centry
3283  *
3284  * Note:
3285  *      none
3286  */
3287 static int
3288 _sd_setup_category_on_type(_sd_cctl_t *header)
3289 {
3290         _sd_cctl_t *prev_ent, *next_ent, *centry;
3291         _sd_cctl_t *anchor = NULL;
3292         int      current_pest_count, local_max_dyn_list;
3293         int      cl;
3294         int ret = 0;
3295 
3296         ASSERT(header);
3297 
3298         if (sdbc_use_dmchain)
3299                 local_max_dyn_list = max_dm_queues - 1;
3300         else {
3301                 /* pickup a fresh copy - has the world changed */
3302                 local_max_dyn_list = dynmem_processing_dm.max_dyn_list;
3303         }
3304 
3305         prev_ent = 0;
3306         centry = header;
3307         next_ent = centry->cc_chain;
3308         current_pest_count = 0;
3309         cl = 2;
3310 
3311         /* try to recover from bad cctl */
3312         if (sdbc_check_cot && !sdbc_check_cctl_cot(centry))
3313                 ret = EINTR;
3314 
3315         while (cl && (ret == 0)) {
3316                 switch (cl) {
3317                         case (1):  /* chain to next/monitor for completion */
3318                                 prev_ent = centry;
3319                                 centry = next_ent;
3320                                 next_ent = 0;
3321                                 cl = 0;
3322                                 if (centry) {
3323 
3324                                         if (sdbc_check_cot &&
3325                                             !sdbc_check_cctl_cot(centry)) {
3326                                                 ret = EINTR;
3327                                                 break;
3328                                         }
3329 
3330                                         next_ent = centry->cc_chain;
3331                                         cl = 2;
3332                                 }
3333                         break;
3334 
3335                         case (2): /* vector to appropriate routine */
3336                                 if (!(centry->cc_aging_dm & ELIGIBLE_ENTRY_DM))
3337                                         cl = 5;
3338                                 else if (prev_ent && (prev_ent->cc_aging_dm &
3339                                     ELIGIBLE_ENTRY_DM))
3340                                         cl = 15;
3341                                 else
3342                                         cl = 10;
3343                         break;
3344 
3345                         case (5): /* process NON-ELIGIBLE entries */
3346                                 if (!(centry->cc_aging_dm &
3347                                     (HASH_ENTRY_DM|HOLD_ENTRY_DM))) {
3348                                         /* no catagory */
3349 
3350                                         /* consistency check */
3351                                         if (centry->cc_alloc_size_dm ||
3352                                             centry->cc_data) {
3353                                                 cmn_err(cmn_level,
3354                                                     "!sdbc(setup_cot): "
3355                                                     "OTHER with data/size %p",
3356                                                     (void *)centry);
3357 
3358                                                 ret = EINTR;
3359                                                 break;
3360                                         }
3361 
3362                                         centry->cc_aging_dm &=
3363                                             ~CATAGORY_ENTRY_DM;
3364                                         centry->cc_alloc_size_dm = BLK_SIZE(1);
3365                                         DTRACE_PROBE1(_sd_setup_category,
3366                                             _sd_cctl_t *, centry);
3367                                 }
3368                                 cl = 1;
3369                         break;
3370 
3371                         /*
3372                          * no prev entry (ie top of list) or no prev
3373                          * ELIGIBLE entry
3374                          */
3375                         case (10):
3376                                 /*
3377                                  * this is an eligible entry, does it start
3378                                  * a list or is it a loner
3379                                  */
3380                                 /* consistency check */
3381                                 if (centry->cc_alloc_size_dm ||
3382                                     centry->cc_data) {
3383                                         cmn_err(cmn_level, "!sdbc(setup_cot): "
3384                                             "HOST with data/size %p",
3385                                             (void *)centry);
3386                                         ret = EINTR;
3387                                         break;
3388                                 }
3389 
3390                                 if (next_ent && (next_ent->cc_aging_dm &
3391                                     ELIGIBLE_ENTRY_DM)) {
3392 
3393 
3394                                         /* it starts a list */
3395                                         /* host catagory */
3396                                         centry->cc_aging_dm |= HOST_ENTRY_DM;
3397                                         /* start out with one page */
3398                                         centry->cc_alloc_size_dm = BLK_SIZE(1);
3399                                         anchor = centry;
3400                                         DTRACE_PROBE1(_sd_setup_category,
3401                                             _sd_cctl_t *, anchor);
3402                                         cl = 1;
3403                                 } else {
3404                                         /*
3405                                          * it's a loner
3406                                          * drop status to no category and
3407                                          * restart
3408                                          */
3409                                         cl = 2;
3410                                         centry->cc_aging_dm &=
3411                                             ~ELIGIBLE_ENTRY_DM;
3412                                 }
3413                         break;
3414 
3415                         case (15): /* default to parasite catagory */
3416 
3417                                 /* consistency check */
3418                                 if (centry->cc_alloc_size_dm ||
3419                                     centry->cc_data) {
3420                                         cmn_err(cmn_level, "!sdbc(setup_cot): "
3421                                             "PARA with data/size %p",
3422                                             (void *)centry);
3423 
3424                                         ret = EINTR;
3425                                         break;
3426                                 }
3427 
3428                                 if (current_pest_count < local_max_dyn_list-1) {
3429                                         /* continue to grow the pest list */
3430                                         current_pest_count++;
3431                                         centry->cc_aging_dm |=
3432                                             PARASITIC_ENTRY_DM;
3433 
3434                                         /*
3435                                          * offset of host ent mem this will pt
3436                                          * to
3437                                          */
3438                                         centry->cc_alloc_size_dm =
3439                                             anchor->cc_alloc_size_dm;
3440                                         /*
3441                                          * up the host mem req by one for
3442                                          * this parasite
3443                                          */
3444                                         DTRACE_PROBE1(_sd_setup_category,
3445                                             _sd_cctl_t *, centry);
3446 
3447                                         anchor->cc_alloc_size_dm += BLK_SIZE(1);
3448 
3449                                         cl = 1;
3450                                 } else {
3451                                         /*
3452                                          * term this pest list - restart fresh
3453                                          * on this entry
3454                                          */
3455                                         current_pest_count = 0;
3456                                         prev_ent->cc_aging_dm &=
3457                                             ~(HOST_ENTRY_DM|ELIGIBLE_ENTRY_DM);
3458                                         cl = 2;
3459                                 }
3460                         break;
3461                         } /* switch(cl) */
3462         } /* while (cl) */
3463 
3464         if (ret != 0)
3465                 sdbc_mark_cctl_cot(header, centry);
3466 
3467         return (ret);
3468 }
3469 
3470 /*
3471  * _sd_setup_mem_chaining(_sd_cctl_t *) - Allocate memory, setup
3472  * mem ptrs an host/pest chaining. Do the actual allocation as described in
3473  * sd_setup_category_on_type().
3474  *
3475  * RETURNS:
3476  *      0 on success
3477  *      non-zero on error
3478  *
3479  * Note:
3480  *      if called with ALLOC_NOWAIT, caller must check for non-zero return
3481  */
3482 static int
3483 _sd_setup_mem_chaining(_sd_cctl_t *header, int flag)
3484 {
3485         _sd_cctl_t *prev_ent, *next_ent, *centry;
3486         _sd_cctl_t *anchor = NULL;
3487         int cl, rc = 0;
3488 
3489         ASSERT(header);
3490 
3491         if (!header)
3492                 return (0);
3493 
3494         prev_ent = 0;
3495         centry = header;
3496         next_ent = centry->cc_chain;
3497         cl = 2;
3498         while (cl) {
3499                 switch (cl) {
3500                         case (1):  /* chain to next/monitor for completion */
3501                                 centry->cc_aging_dm &= ~ELIGIBLE_ENTRY_DM;
3502                                 prev_ent = centry;
3503                                 centry = next_ent;
3504                                 next_ent = 0;
3505                                 cl = 0;
3506                                 if (centry) {
3507                                         next_ent = centry->cc_chain;
3508                                         cl = 2;
3509                                 }
3510                         break;
3511 
3512                         case (2): /* vector to appropriate routine */
3513                                 if (centry->cc_aging_dm & HOST_ENTRY_DM)
3514                                         cl = 10;
3515                                 else if (centry->cc_aging_dm &
3516                                     PARASITIC_ENTRY_DM)
3517                                         cl = 15;
3518                                 else
3519                                         cl = 5;
3520                         break;
3521 
3522                         case (5): /* OTHER processing - alloc mem */
3523                                 if (rc = sdbc_centry_memalloc_dm(centry,
3524                                     centry->cc_alloc_size_dm, flag))
3525                                         /* The allocation failed */
3526                                         cl = 0;
3527                                 else
3528                                         cl = 1;
3529                         break;
3530 
3531                                 /*
3532                                  * HOST entry processing - save the anchor pt,
3533                                  * alloc the memory,
3534                                  */
3535                         case (10): /* setup head and nxt ptrs */
3536                                 anchor = centry;
3537                                 if (rc = sdbc_centry_memalloc_dm(centry,
3538                                     centry->cc_alloc_size_dm, flag))
3539                                         /* The allocation failed */
3540                                         cl = 0;
3541                                 else
3542                                         cl = 1;
3543                         break;
3544 
3545                                 /*
3546                                  * PARASITIC entry processing - setup w/no
3547                                  * memory, setup head/next ptrs,
3548                                  */
3549                         case (15):
3550                                 /*
3551                                  * fudge the data mem ptr to an offset from
3552                                  * the anchor alloc
3553                                  */
3554                                 if (!(centry->cc_aging_dm &
3555                                     (HASH_ENTRY_DM| HOLD_ENTRY_DM))) {
3556                                         centry->cc_head_dm = anchor;
3557 
3558                                         /* chain prev to this */
3559                                         prev_ent->cc_next_dm = centry;
3560 
3561                                         /*
3562                                          * generate the actual data ptr into
3563                                          * host entry memory
3564                                          */
3565                                         centry->cc_data = anchor->cc_data +
3566                                             centry->cc_alloc_size_dm;
3567                                         centry->cc_alloc_size_dm = 0;
3568                                 }
3569                                 cl = 1;
3570                         break;
3571                 } /* switch(cl) */
3572         } /* while (cl) */
3573 
3574         return (rc);
3575 }
3576 
3577 /*
3578  * _sd_check_buffer_alloc - Check if buffer allocation is invalid.
3579  *
3580  * RETURNS:
3581  *      0 if its ok to continue with allocation.
3582  *      Else errno to be returned to the user.
3583  *
3584  * Note:
3585  *      This routine could block if the device is not local and
3586  *      recovery is in progress.
3587  */
3588 
3589 /* ARGSUSED */
3590 static int
3591 _sd_check_buffer_alloc(int cd, nsc_off_t fba_pos, nsc_size_t fba_len,
3592     _sd_buf_handle_t **hp)
3593 {
3594         /*
3595          * This check exists to ensure that someone will not pass in an
3596          * arbitrary pointer and try to pass it off as a handle.
3597          */
3598         if ((*hp)->bh_flag & (~_SD_VALID_FLAGS)) {
3599                 cmn_err(CE_WARN, "!sdbc(_sd_check_buffer_alloc) "
3600                     "cd %d invalid handle %p flags %x",
3601                     cd, (void *)*hp, (*hp)->bh_flag);
3602                 return (EINVAL);
3603         }
3604 
3605         if ((_sd_cache_initialized == 0) || (FILE_OPENED(cd) == 0)) {
3606                 cmn_err(CE_WARN, "!sdbc(_sd_check_buffer_alloc) "
3607                     "cd %d not open. Cache init %d",
3608                     cd, _sd_cache_initialized);
3609                 return (EINVAL);
3610         }
3611         ASSERT(cd >= 0);
3612         if (!(_sd_cache_files[cd].cd_rawfd) ||
3613             !nsc_held(_sd_cache_files[cd].cd_rawfd)) {
3614                 cmn_err(CE_WARN,
3615                     "!sdbc(_sd_check_buffer_alloc) cd %d is not attached", cd);
3616                 return (EINVAL);
3617         }
3618 
3619         ASSERT_IO_SIZE(fba_pos, fba_len, cd);
3620         ASSERT_LEN(fba_len);
3621 
3622         return (0);
3623 }
3624 
3625 /*
3626  * sdbc_check_handle -- check that handle is valid
3627  * return 1 if ok, 0 otherwise (if debug then panic).
3628  */
3629 static int
3630 sdbc_check_handle(_sd_buf_handle_t *handle)
3631 {
3632         int ret = 1;
3633 
3634         if (!_SD_HANDLE_ACTIVE(handle)) {
3635 
3636                 cmn_err(cmn_level, "!sdbc(_sd_free_buf): invalid handle %p"
3637                     "cd %d fpos %" NSC_SZFMT " flen %" NSC_SZFMT " flag %x",
3638                     (void *)handle, HANDLE_CD(handle), handle->bh_fba_pos,
3639                     handle->bh_fba_len, handle->bh_flag);
3640 
3641                 ret = 0;
3642         }
3643 
3644         return (ret);
3645 }
3646 
3647 /*
3648  * _sd_free_buf -  Free the buffers allocated in _sd_alloc_buf.
3649  *
3650  * ARGUMENTS:
3651  *      handle  -  The handle allocated in _sd_alloc_buf.
3652  *
3653  * RETURNS:
3654  *      0 on success.
3655  *      Else errno.
3656  *
3657  * NOTE:
3658  *      If handle was allocated through _sd_alloc_buf, the handle allocated
3659  *      flag (NSC_HALLOCATED) will be reset by _sd_alloc_buf. This indicates
3660  *      that _sd_free_buf should free up the handle as well.
3661  *      All other handles directly allocated from _sd_alloc_handle will have
3662  *      that flag set. Any handle with valid blocks will have the handle
3663  *      active flag. It is an error if the active flag is not set.
3664  *      (if free_buf were called without going through alloc_buf)
3665  */
3666 
3667 int
3668 _sd_free_buf(_sd_buf_handle_t *handle)
3669 {
3670         _sd_cctl_t *centry, *cc_chain;
3671         int cd = HANDLE_CD(handle);
3672         int flen = handle->bh_fba_len;
3673         int fpos = handle->bh_fba_pos;
3674 
3675         SDTRACE(ST_ENTER|SDF_FREEBUF, HANDLE_CD(handle),
3676             handle->bh_fba_len, handle->bh_fba_pos, 0, 0);
3677 
3678         if (sdbc_check_handle(handle) == 0)
3679                 return (EINVAL);
3680 
3681         if (handle->bh_flag & NSC_MIXED) {
3682                 /*
3683                  * Data in this handle will be a mix of data from the
3684                  * source device and data from another device, so
3685                  * invalidate all the blocks.
3686                  */
3687                 handle->bh_flag &= ~NSC_QUEUE;
3688                 centry = handle->bh_centry;
3689                 while (centry) {
3690                         centry->cc_valid = 0;
3691                         centry = centry->cc_chain;
3692                 }
3693         }
3694 
3695         if ((handle->bh_flag & NSC_QUEUE)) {
3696                 handle->bh_flag &= ~NSC_QUEUE;
3697                 _sd_queue_write(handle, handle->bh_fba_pos, handle->bh_fba_len);
3698         }
3699 
3700         handle->bh_flag &= ~NSC_HACTIVE;
3701 
3702         centry = handle->bh_centry;
3703         while (centry) {
3704                 cc_chain = centry->cc_chain;
3705                 _sd_centry_release(centry);
3706                 centry = cc_chain;
3707         }
3708 
3709         /*
3710          * help prevent dup call to _sd_centry_release if this handle
3711          * is erroneously _sd_free_buf'd twice.  (should not happen).
3712          */
3713         handle->bh_centry = NULL;
3714 
3715         if ((handle->bh_flag & NSC_HALLOCATED) == 0) {
3716                 handle->bh_flag |= NSC_HALLOCATED;
3717                 (void) _sd_free_handle(handle);
3718         } else {
3719                 handle->bh_flag = NSC_HALLOCATED;
3720         }
3721 
3722         SDTRACE(ST_EXIT|SDF_FREEBUF, cd, flen, fpos, 0, 0);
3723 
3724         return (0);
3725 }
3726 
3727 
3728 static int _sd_lruq_srch = 0x2000;
3729 
3730 /*
3731  * sdbc_get_dmchain -- get a candidate centry chain pointing to
3732  *                      contiguous memory
3733  *      ARGUMENTS:
3734  *      cblocks  - number of cache blocks requested
3735  *      stall   - pointer to stall count (no blocks avail)
3736  *      flag    - ALLOC_NOWAIT flag
3737  *
3738  *      RETURNS:
3739  *              a cache entry or possible NULL if ALLOC_NOWAIT set
3740  *      USAGE:
3741  *              attempt to satisfy entire request from queue
3742  *              that has no memory allocated.
3743  *              if this fails then attempt a partial allocation
3744  *              with a preallocated block of requested size up to
3745  *              max_dyn_list.
3746  *              then look for largest chain less than max_dyn_list.
3747  */
3748 static _sd_cctl_t *
3749 sdbc_get_dmchain(int cblocks, int *stall, int flag)
3750 {
3751         _sd_cctl_t *cc_dmchain = NULL;
3752         _sd_queue_t *q;
3753         _sd_cctl_t *qhead;
3754         int num_tries;
3755         int cblocks_orig = cblocks;
3756         int nowait = flag & ALLOC_NOWAIT;
3757         int i;
3758 
3759         num_tries = _sd_lruq_srch;
3760 
3761         ASSERT(cblocks != 0);
3762 
3763         while (!cc_dmchain) {
3764                 /* get it from the os if possible */
3765                 q = &sdbc_dm_queues[0];
3766                 qhead = &(q->sq_qhead);
3767 
3768                 if (q->sq_inq >= cblocks) {
3769                         mutex_enter(&q->sq_qlock);
3770                         if (q->sq_inq >= cblocks) {
3771                                 _sd_cctl_t *cc_ent;
3772 
3773                                 cc_dmchain = qhead->cc_next;
3774 
3775                                 /*
3776                                  * set the inuse and pageio bits
3777                                  * Note: this code expects the cc_ent to
3778                                  * be available.  no other thread may set the
3779                                  * inuse or pageio bit for an entry on the
3780                                  * 0 queue.
3781                                  */
3782                                 cc_ent = qhead;
3783                                 for (i = 0; i < cblocks; ++i) {
3784                                         cc_ent = cc_ent->cc_next;
3785 
3786                                         if (SET_CENTRY_INUSE(cc_ent)) {
3787                                                 cmn_err(CE_PANIC,
3788                                                     "centry inuse on 0 q! %p",
3789                                                     (void *)cc_ent);
3790                                         }
3791 
3792                                         if (SET_CENTRY_PAGEIO(cc_ent)) {
3793                                                 cmn_err(CE_PANIC,
3794                                                     "centry pageio on 0 q! %p",
3795                                                     (void *)cc_ent);
3796                                         }
3797                                 }
3798                                 /* got a dmchain */
3799 
3800                                 /* remove this chain from the 0 queue */
3801                                 cc_dmchain->cc_prev->cc_next = cc_ent->cc_next;
3802                                 cc_ent->cc_next->cc_prev = cc_dmchain->cc_prev;
3803                                 cc_dmchain->cc_prev = NULL;
3804                                 cc_ent->cc_next = NULL;
3805 
3806                                 q->sq_inq -= cblocks;
3807 
3808                                 ASSERT(GOOD_LRUSIZE(q));
3809 
3810                         }
3811                         mutex_exit(&q->sq_qlock);
3812                         if (cc_dmchain)
3813                                 continue;
3814                 }
3815 
3816                 /* look for a pre-allocated block of the requested size */
3817 
3818 
3819                 if (cblocks > (max_dm_queues - 1))
3820                         cblocks = max_dm_queues - 1;
3821 
3822                 q = &sdbc_dm_queues[cblocks];
3823                 qhead = &(q->sq_qhead);
3824 
3825                 if (q->sq_inq != 0) {
3826                         _sd_cctl_t *tmp_dmchain;
3827 
3828                         mutex_enter(&q->sq_qlock);
3829 
3830                         for (tmp_dmchain = qhead->cc_next; tmp_dmchain != qhead;
3831                             tmp_dmchain = tmp_dmchain->cc_next) {
3832 
3833                                 /*
3834                                  * get a dmchain
3835                                  * set the inuse and pageio bits
3836                                  */
3837                                 if (sdbc_dmchain_avail(tmp_dmchain)) {
3838                                         /* put on MRU end of queue */
3839                                         sdbc_requeue_dmchain(q, tmp_dmchain,
3840                                             1, 0);
3841                                         cc_dmchain = tmp_dmchain;
3842                                         break;
3843                                 }
3844                                 sdbc_dmchain_not_avail++;
3845                         }
3846 
3847                         mutex_exit(&q->sq_qlock);
3848                         if (cc_dmchain)
3849                                 continue;
3850                 }
3851 
3852                 /*
3853                  * spin block
3854                  * nudge the deallocator,  accelerate ageing
3855                  */
3856 
3857                 mutex_enter(&dynmem_processing_dm.thread_dm_lock);
3858                 cv_broadcast(&dynmem_processing_dm.thread_dm_cv);
3859                 mutex_exit(&dynmem_processing_dm.thread_dm_lock);
3860 
3861                 if (nowait)
3862                         break;
3863 
3864                 if (!(--num_tries)) {
3865                         delay(drv_usectohz(20000));
3866                         (void) (*stall)++;
3867                         num_tries = _sd_lruq_srch;
3868                         cblocks = cblocks_orig;
3869                 } else { /* see if smaller request size is available */
3870                         if (!(--cblocks))
3871                                 cblocks = cblocks_orig;
3872                 }
3873 
3874         } /* while (!cc_dmchain) */
3875 
3876         return (cc_dmchain);
3877 }
3878 
3879 static int
3880 sdbc_dmchain_avail(_sd_cctl_t *cc_ent)
3881 {
3882         int chain_avail = 1;
3883         _sd_cctl_t *anchor = cc_ent;
3884 
3885         while (cc_ent) {
3886 
3887                 ASSERT(_sd_cctl_valid(cc_ent));
3888 
3889                 if (cc_ent->cc_aging_dm & BAD_CHAIN_DM) {
3890                         chain_avail = 0;
3891                         break;
3892                 }
3893 
3894                 if (CENTRY_DIRTY(cc_ent)) {
3895                         chain_avail = 0;
3896                         break;
3897                 }
3898                 if (SET_CENTRY_INUSE(cc_ent)) {
3899                         chain_avail = 0;
3900                         break;
3901                 }
3902 
3903                 if ((SET_CENTRY_PAGEIO(cc_ent))) {
3904 
3905                         CLEAR_CENTRY_INUSE(cc_ent);
3906                         chain_avail = 0;
3907                         break;
3908                 }
3909 
3910                 if (CENTRY_DIRTY(cc_ent)) {
3911 
3912                         CLEAR_CENTRY_PAGEIO(cc_ent);
3913                         CLEAR_CENTRY_INUSE(cc_ent);
3914                         chain_avail = 0;
3915                         break;
3916                 }
3917 
3918                 cc_ent->cc_flag = 0;
3919                 cc_ent->cc_toflush = 0;
3920 
3921                 cc_ent = cc_ent->cc_next_dm;
3922         }
3923 
3924         if (!chain_avail)
3925                 sdbc_clear_dmchain(anchor, cc_ent);
3926         else {
3927                 cc_ent = anchor;
3928 
3929                 /*
3930                  * prevent possible deadlocks in _sd_cc_wait():
3931                  * remove from hash and wakeup any waiters now that we
3932                  * have acquired the chain.
3933                  */
3934                 while (cc_ent) {
3935                         (void) _sd_hash_delete((struct _sd_hash_hd *)cc_ent,
3936                             _sd_htable);
3937 
3938                         mutex_enter(&cc_ent->cc_lock);
3939                         if (cc_ent->cc_await_use) {
3940                                 cv_broadcast(&cc_ent->cc_blkcv);
3941                         }
3942                         mutex_exit(&cc_ent->cc_lock);
3943 
3944                         cc_ent->cc_creat = nsc_lbolt();
3945                         cc_ent->cc_hits = 0;
3946 
3947                         cc_ent = cc_ent->cc_next_dm;
3948                 }
3949         }
3950 
3951         return (chain_avail);
3952 }
3953 
3954 static void
3955 sdbc_clear_dmchain(_sd_cctl_t *cc_ent_start, _sd_cctl_t *cc_ent_end)
3956 {
3957         _sd_cctl_t *cc_ent = cc_ent_start;
3958         _sd_cctl_t *prev_ent;
3959 
3960         ASSERT(_sd_cctl_valid(cc_ent));
3961 
3962         while (cc_ent != cc_ent_end) {
3963 
3964                 ASSERT(_sd_cctl_valid(cc_ent));
3965 
3966                 prev_ent = cc_ent;
3967                 cc_ent = cc_ent->cc_next_dm;
3968 
3969                 CLEAR_CENTRY_PAGEIO(prev_ent);
3970                 CLEAR_CENTRY_INUSE(prev_ent);
3971         }
3972 
3973 }
3974 
3975 /*
3976  * put a dmchain on the LRU end of a queue
3977  */
3978 void
3979 sdbc_ins_dmqueue_front(_sd_queue_t *q, _sd_cctl_t *cc_ent)
3980 {
3981         _sd_cctl_t *qhead = &(q->sq_qhead);
3982 
3983         ASSERT(_sd_cctl_valid(cc_ent));
3984 
3985         mutex_enter(&q->sq_qlock);
3986         cc_ent->cc_next = qhead->cc_next;
3987         cc_ent->cc_prev = qhead;
3988         qhead->cc_next->cc_prev = cc_ent;
3989         qhead->cc_next = cc_ent;
3990         q->sq_inq++;
3991         cc_ent->cc_cblocks = q->sq_dmchain_cblocks;
3992 
3993         ASSERT(GOOD_LRUSIZE(q));
3994 
3995         mutex_exit(&q->sq_qlock);
3996 
3997 }
3998 
3999 /*
4000  * put a dmchain on the MRU end of a queue
4001  */
4002 static void
4003 sdbc_ins_dmqueue_back(_sd_queue_t *q, _sd_cctl_t *cc_ent)
4004 {
4005         _sd_cctl_t *qhead = &(q->sq_qhead);
4006 
4007         ASSERT(_sd_cctl_valid(cc_ent));
4008 
4009         mutex_enter(&q->sq_qlock);
4010         cc_ent->cc_next = qhead;
4011         cc_ent->cc_prev = qhead->cc_prev;
4012         qhead->cc_prev->cc_next = cc_ent;
4013         qhead->cc_prev = cc_ent;
4014         cc_ent->cc_seq = q->sq_seq++;
4015         q->sq_inq++;
4016         cc_ent->cc_cblocks = q->sq_dmchain_cblocks;
4017 
4018         ASSERT(GOOD_LRUSIZE(q));
4019 
4020         mutex_exit(&q->sq_qlock);
4021 
4022 }
4023 
4024 /*
4025  * remove dmchain from a queue
4026  */
4027 void
4028 sdbc_remq_dmchain(_sd_queue_t *q, _sd_cctl_t *cc_ent)
4029 {
4030 
4031         ASSERT(_sd_cctl_valid(cc_ent));
4032 
4033         mutex_enter(&q->sq_qlock);
4034         cc_ent->cc_prev->cc_next = cc_ent->cc_next;
4035         cc_ent->cc_next->cc_prev = cc_ent->cc_prev;
4036         cc_ent->cc_next = cc_ent->cc_prev = NULL; /* defensive programming */
4037         cc_ent->cc_cblocks = -1; /* indicate not on any queue */
4038 
4039         q->sq_inq--;
4040 
4041         ASSERT(GOOD_LRUSIZE(q));
4042 
4043         mutex_exit(&q->sq_qlock);
4044 
4045 }
4046 
4047 /*
4048  * requeue a dmchain to the MRU end of its queue.
4049  * if getlock is 0 on entry the queue lock (sq_qlock) must be held
4050  */
4051 void
4052 sdbc_requeue_dmchain(_sd_queue_t *q, _sd_cctl_t *cc_ent, int mru,
4053                         int getlock)
4054 {
4055         _sd_cctl_t *qhead = &(q->sq_qhead);
4056 
4057 
4058         ASSERT(_sd_cctl_valid(cc_ent));
4059 
4060         if (getlock)
4061                 mutex_enter(&q->sq_qlock);
4062 
4063         /* inline of sdbc_remq_dmchain() */
4064         cc_ent->cc_prev->cc_next = cc_ent->cc_next;
4065         cc_ent->cc_next->cc_prev = cc_ent->cc_prev;
4066 
4067         if (mru) { /* put on MRU end of queue */
4068                 /* inline of sdbc_ins_dmqueue_back */
4069                 cc_ent->cc_next = qhead;
4070                 cc_ent->cc_prev = qhead->cc_prev;
4071                 qhead->cc_prev->cc_next = cc_ent;
4072                 qhead->cc_prev = cc_ent;
4073                 cc_ent->cc_seq = q->sq_seq++;
4074                 (q->sq_req_stat)++;
4075         } else { /* put on LRU end of queue i.e. requeue to head */
4076                 /* inline of sdbc_ins_dmqueue_front */
4077                 cc_ent->cc_next = qhead->cc_next;
4078                 cc_ent->cc_prev = qhead;
4079                 qhead->cc_next->cc_prev = cc_ent;
4080                 qhead->cc_next = cc_ent;
4081                 cc_ent->cc_seq = q->sq_seq++;
4082 
4083                 /*
4084                  * clear the CC_QHEAD bit on all members of the chain
4085                  */
4086                 {
4087                         _sd_cctl_t *tcent;
4088 
4089                         for (tcent = cc_ent;  tcent; tcent = tcent->cc_next_dm)
4090                                 tcent->cc_flag &= ~CC_QHEAD;
4091                 }
4092         }
4093 
4094         if (getlock)
4095                 mutex_exit(&q->sq_qlock);
4096 
4097 }
4098 
4099 /*
4100  * sdbc_dmchain_dirty(cc_ent)
4101  * return first dirty cc_ent in dmchain, NULL if chain is not dirty
4102  */
4103 static _sd_cctl_t *
4104 sdbc_dmchain_dirty(_sd_cctl_t *cc_ent)
4105 {
4106         for (/* CSTYLED */;  cc_ent; cc_ent = cc_ent->cc_next_dm)
4107                 if (CENTRY_DIRTY(cc_ent))
4108                         break;
4109 
4110         return (cc_ent);
4111 }
4112 
4113 /*
4114  * sdbc_requeue_head_dm_try()
4115  * attempt to requeue a dmchain to the head of the queue
4116  */
4117 void
4118 sdbc_requeue_head_dm_try(_sd_cctl_t *cc_ent)
4119 {
4120         int qidx;
4121         _sd_queue_t *q;
4122 
4123         if (!sdbc_dmchain_dirty(cc_ent)) {
4124                 qidx = cc_ent->cc_cblocks;
4125                 q = &sdbc_dm_queues[qidx];
4126                 sdbc_requeue_dmchain(q, cc_ent, 0, 1); /* requeue head */
4127         }
4128 }
4129 
4130 /*
4131  * sdbc_centry_alloc_blks -- allocate cache entries with memory
4132  *
4133  * ARGUMENTS:
4134  *      cd      - Cache descriptor (from a previous open)
4135  *      cblk    - cache block number.
4136  *      reqblks - number of cache blocks to be allocated
4137  *      flag    - can be ALLOC_NOWAIT
4138  * RETURNS:
4139  *      A cache block chain or NULL if ALLOC_NOWAIT and request fails
4140  *
4141  *      Note: caller must check for null return if called with
4142  *      ALLOC_NOWAIT set.
4143  */
4144 _sd_cctl_t *
4145 sdbc_centry_alloc_blks(int cd, nsc_off_t cblk, nsc_size_t reqblks, int flag)
4146 {
4147         sdbc_allocbuf_t alloc_tok = {0}; /* must be 0 */
4148         int stall = 0;
4149         _sd_cctl_t *centry = NULL;
4150         _sd_cctl_t *lentry = NULL;
4151         _sd_cctl_t *anchor = NULL;
4152         _sd_cctl_t *next_centry;
4153 
4154         ASSERT(reqblks);
4155 
4156         while (reqblks) {
4157                 centry = sdbc_centry_alloc(cd, cblk, reqblks, &stall,
4158                     &alloc_tok, flag);
4159 
4160                 if (!centry)
4161                         break;
4162 
4163                 centry->cc_chain = NULL;
4164 
4165                 if (lentry == NULL)
4166                         anchor = centry;
4167                 else
4168                         lentry->cc_chain = centry;
4169 
4170                 lentry = centry;
4171 
4172                 centry->cc_aging_dm &= ~(ENTRY_FIELD_DM);
4173 
4174                 if (centry->cc_aging_dm & FOUND_IN_HASH_DM)
4175                         centry->cc_aging_dm |= HASH_ENTRY_DM;
4176                 else
4177                         if (centry->cc_aging_dm & FOUND_HOLD_OVER_DM)
4178                                 centry->cc_aging_dm |= HOLD_ENTRY_DM;
4179                         else
4180                                 centry->cc_aging_dm |= ELIGIBLE_ENTRY_DM;
4181 
4182                 centry->cc_aging_dm &= ~(FOUND_IN_HASH_DM|FOUND_HOLD_OVER_DM);
4183                 --reqblks;
4184         }
4185 
4186         sdbc_centry_alloc_end(&alloc_tok);
4187 
4188         if (reqblks || (_sd_setup_category_on_type(anchor))) {
4189                 centry = anchor;
4190                 while (centry) {
4191                         next_centry = centry->cc_chain;
4192                         _sd_centry_release(centry);
4193                         centry = next_centry;
4194                 }
4195                 anchor = NULL;
4196 
4197         } else
4198                 /* This is where the memory is actually allocated */
4199                 if (_sd_setup_mem_chaining(anchor, flag))
4200                         anchor = NULL;
4201 
4202         return (anchor);
4203 }
4204 
4205 
4206 /*
4207  * sdbc_centry_alloc - sdbc internal function to allocate a new cache block.
4208  *
4209  * ARGUMENTS:
4210  *      cd      - Cache descriptor (from a previous open)
4211  *      cblk    - cache block number.
4212  *      stall   - pointer to stall count (no blocks avail)
4213  *      req_blocks - number of cache blocks remaining in caller's i/o request
4214  *      alloc_tok - pointer to token initialized to 0 on first call to function
4215  *      flag    - lock status of sdbc_queue_lock or ALLOC_NOWAIT flag
4216  * RETURNS:
4217  *      A cache block, or possibly NULL if ALLOC_NOWAIT set .
4218  *
4219  * USAGE:
4220  *      switch to the appropriate allocation function.
4221  *      this function is used when callers need more than one cache block.
4222  *      it is called repeatedly until the entire request is satisfied,
4223  *      at which time the caller will then do the memory allocation.
4224  *      if only one cache block is needed callers may use
4225  *      sdbc_centry_alloc_blks() which also allocates memory.
4226  *
4227  *      Note: caller must check for null return if called with
4228  *      ALLOC_NOWAIT set.
4229  */
4230 
4231 _sd_cctl_t *
4232 sdbc_centry_alloc(int cd, nsc_off_t cblk, nsc_size_t req_blocks, int *stall,
4233                         sdbc_allocbuf_t *alloc_tok, int flag)
4234 {
4235         _sd_cctl_t *centry;
4236 
4237         if (sdbc_use_dmchain)
4238                 centry = sdbc_alloc_dmc(cd, cblk, req_blocks, stall, alloc_tok,
4239                     flag);
4240         else
4241                 centry = sdbc_alloc_lru(cd, cblk, stall, flag);
4242 
4243         return (centry);
4244 }
4245 
4246 /*
4247  * sdbc_alloc_dmc -- allocate a centry from a dmchain
4248  *
4249  * ARGUMENTS:
4250  *      cd      - Cache descriptor (from a previous open)
4251  *      cblk    - cache block number.
4252  *      stall   - pointer to stall count (no blocks avail)
4253  *      req_blocks - number of cache blocks in clients i/o request
4254  *      alloc_tok - pointer to token initialized to 0 on first call to function
4255  *      flag    - lock status of sdbc_queue_lock, or ALLOC_NOWAIT flag
4256  * RETURNS:
4257  *      A cache block or possibly NULL if ALLOC_NOWAIT set
4258  *
4259  * USAGE:
4260  *      if dmchain is empty, allocate one.
4261  */
4262 static _sd_cctl_t *
4263 sdbc_alloc_dmc(int cd, nsc_off_t cblk, nsc_size_t req_blocks, int *stall,
4264                         sdbc_allocbuf_t *alloc_tok, int flag)
4265 {
4266         sdbc_allocbuf_impl_t *dmc = (sdbc_allocbuf_impl_t *)alloc_tok;
4267         _sd_cctl_t *centry = NULL;
4268 
4269         if (!dmc->sab_dmchain) {
4270                 /*
4271                  * Note - sdbc_get_dmchain() returns
4272                  * with cc_inuse and cc_pageio set
4273                  * for all members of dmchain.
4274                  */
4275                 if (dmc->sab_dmchain =
4276                     sdbc_get_dmchain(req_blocks, stall, flag)) {
4277 
4278                         /* remember q it came from */
4279                         if (dmc->sab_dmchain->cc_alloc_size_dm)
4280                                 dmc->sab_q = dmc->sab_dmchain->cc_cblocks;
4281                 }
4282         }
4283 
4284         /*
4285          * Note: dmchain pointer is advanced in sdbc_alloc_from_dmchain()
4286          */
4287         if (dmc->sab_dmchain) /* could be NULL if ALLOC_NOWAIT set */
4288                 centry = sdbc_alloc_from_dmchain(cd, cblk, alloc_tok, flag);
4289 
4290         return (centry);
4291 }
4292 
4293 /*
4294  * sdbc_alloc_from_dmchain -- allocate centry from a dmchain of centrys
4295  *
4296  * ARGUMENTS:
4297  *      cd      - Cache descriptor (from a previous open)
4298  *      cblk    - cache block number.
4299  *      alloc_tok - pointer to token
4300  *      flag    - lock status of sdbc_queue_lock or ALLOC_NOWAIT
4301  *
4302  * RETURNS:
4303  *      A cache block or possibly NULL if ALLOC_NOWAIT set.
4304  *
4305  * USAGE:
4306  *      This routine allocates a new cache block from the supplied dmchain.
4307  *      Assumes that dmchain is non-NULL and that all cache entries in
4308  *      the dmchain have been removed from hash and have their cc_inuse and
4309  *      cc_pageio bits set.
4310  */
4311 static _sd_cctl_t *
4312 sdbc_alloc_from_dmchain(int cd, nsc_off_t cblk, sdbc_allocbuf_t *alloc_tok,
4313     int flag)
4314 {
4315         _sd_cctl_t *cc_ent, *old_ent;
4316         int categorize_centry;
4317         int locked = flag & ALLOC_LOCKED;
4318         int nowait = flag & ALLOC_NOWAIT;
4319         sdbc_allocbuf_impl_t *dmc = (sdbc_allocbuf_impl_t *)alloc_tok;
4320 
4321         SDTRACE(ST_ENTER|SDF_ENT_ALLOC, cd, 0, BLK_TO_FBA_NUM(cblk), 0, 0);
4322 
4323         ASSERT(dmc->sab_dmchain);
4324 
4325         cc_ent = dmc->sab_dmchain;
4326 
4327         ASSERT(_sd_cctl_valid(cc_ent));
4328 
4329         cc_ent->cc_valid = 0;
4330         categorize_centry = 0;
4331         if (cc_ent->cc_data)
4332                 categorize_centry = FOUND_HOLD_OVER_DM;
4333 
4334 alloc_try:
4335         if (cd == _CD_NOHASH)
4336                 CENTRY_BLK(cc_ent) = cblk;
4337         else if ((old_ent = (_sd_cctl_t *)
4338             _sd_hash_insert(cd, cblk, (struct _sd_hash_hd *)cc_ent,
4339             _sd_htable)) != cc_ent) {
4340 
4341                 if (SET_CENTRY_INUSE(old_ent)) {
4342                         sdbc_centry_inuse++;
4343 
4344                         if (nowait) {
4345                                 cc_ent = NULL;
4346                                 goto out;
4347                         }
4348 
4349                         if (locked)
4350                                 rw_exit(&sdbc_queue_lock);
4351                         _sd_cc_wait(cd, cblk, old_ent, CC_INUSE);
4352                         if (locked)
4353                                 rw_enter(&sdbc_queue_lock, RW_WRITER);
4354                         goto alloc_try;
4355                 }
4356 
4357                 /*
4358                  * bug 4529671
4359                  * now that we own the centry make sure that
4360                  * it is still good. it could have been processed
4361                  * by _sd_dealloc_dm() in the window between
4362                  * _sd_hash_insert() and SET_CENTRY_INUSE().
4363                  */
4364                 if ((_sd_cctl_t *)_sd_hash_search(cd, cblk, _sd_htable)
4365                     != old_ent) {
4366                         sdbc_centry_deallocd++;
4367 #ifdef DEBUG
4368                         cmn_err(CE_WARN, "!cc_ent %p cd %d cblk %" NSC_SZFMT
4369                             " lost to dealloc?! cc_data %p", (void *)old_ent,
4370                             cd, cblk, (void *)old_ent->cc_data);
4371 #endif
4372 
4373                         CLEAR_CENTRY_INUSE(old_ent);
4374 
4375                         if (nowait) {
4376                                 cc_ent = NULL;
4377                                 goto out;
4378                         }
4379 
4380                         goto alloc_try;
4381                 }
4382 
4383                 if (CC_CD_BLK_MATCH(cd, cblk, old_ent)) {
4384                         sdbc_centry_hit++;
4385                         old_ent->cc_toflush = 0;
4386                         /* _sd_centry_release(cc_ent); */
4387                         cc_ent = old_ent;
4388                         categorize_centry = FOUND_IN_HASH_DM;
4389                 } else {
4390                         sdbc_centry_lost++;
4391 
4392                         CLEAR_CENTRY_INUSE(old_ent);
4393 
4394                         if (nowait) {
4395                                 cc_ent = NULL;
4396                                 goto out;
4397                         }
4398 
4399                         goto alloc_try;
4400                 }
4401         }
4402 
4403         /*
4404          * advance the dmchain pointer, but only if we got the
4405          * cc_ent from the dmchain
4406          */
4407         if (categorize_centry != FOUND_IN_HASH_DM) {
4408                 if (cc_ent->cc_data)
4409                         dmc->sab_dmchain = dmc->sab_dmchain->cc_next_dm;
4410                 else
4411                         dmc->sab_dmchain = dmc->sab_dmchain->cc_next;
4412         }
4413 
4414 
4415         SDTRACE(ST_EXIT|SDF_ENT_ALLOC, cd, 0, BLK_TO_FBA_NUM(cblk), 0, 0);
4416 
4417         mutex_enter(&cc_ent->cc_lock);
4418         if (cc_ent->cc_await_use) {
4419                 cv_broadcast(&cc_ent->cc_blkcv);
4420         }
4421         mutex_exit(&cc_ent->cc_lock);
4422 
4423         sdbc_centry_init_dm(cc_ent);
4424 
4425         cc_ent->cc_aging_dm |= categorize_centry;
4426 
4427         out:
4428 
4429         SDTRACE(ST_INFO|SDF_ENT_ALLOC, cd, 0, BLK_TO_FBA_NUM(cblk), 0, 0);
4430 
4431         return (cc_ent);
4432 }
4433 
4434 /*
4435  * sdbc_centry_alloc_end -- tidy up after all cache blocks have been
4436  *      allocated for a request
4437  * ARGUMENTS:
4438  *      alloc_tok  - pointer to allocation token
4439  * RETURNS
4440  *      nothing
4441  * USAGE:
4442  *      at this time only useful when sdbc_use_dmchain is true.
4443  *      if there are cache blocks remaining on the chain then the inuse and
4444  *      pageio bits must be cleared (they were set in sdbc_get_dmchain().
4445  *
4446  */
4447 static void
4448 sdbc_centry_alloc_end(sdbc_allocbuf_t *alloc_tok)
4449 {
4450         _sd_cctl_t *next_centry;
4451         _sd_cctl_t *prev_centry;
4452         _sd_queue_t *q;
4453         sdbc_allocbuf_impl_t *dmc = (sdbc_allocbuf_impl_t *)alloc_tok;
4454 #ifdef DEBUG
4455         int chainpull = 0;
4456 #endif
4457 
4458         if (!sdbc_use_dmchain)
4459                 return;
4460 
4461         next_centry = dmc->sab_dmchain;
4462 
4463         while (next_centry != NULL) {
4464                 CLEAR_CENTRY_PAGEIO(next_centry);
4465 
4466                 prev_centry = next_centry;
4467 
4468                 if (next_centry->cc_data) {
4469 #ifdef DEBUG
4470                         ++chainpull;
4471 #endif
4472                         next_centry = next_centry->cc_next_dm;
4473 
4474                         /* clear bit after final reference */
4475 
4476                         CLEAR_CENTRY_INUSE(prev_centry);
4477                 } else {
4478                         next_centry = next_centry->cc_next;
4479 
4480                         /*
4481                          * a floater from the 0 queue, insert on q.
4482                          *
4483                          * since this centry is not on any queue
4484                          * the inuse bit can be cleared before
4485                          * inserting on the q.  this is also required
4486                          * since sdbc_get_dmchain() does not expect
4487                          * inuse bits to be set on 0 queue entry's.
4488                          */
4489 
4490                         CLEAR_CENTRY_INUSE(prev_centry);
4491                         q = &sdbc_dm_queues[0];
4492                         sdbc_ins_dmqueue_front(q, prev_centry);
4493                 }
4494         }
4495 
4496 #ifdef DEBUG
4497         /* compute wastage stats */
4498         ASSERT((chainpull >= 0) && (chainpull < max_dm_queues));
4499         if (chainpull)
4500                 (*(dmchainpull_table + (dmc->sab_q *
4501                     max_dm_queues + chainpull)))++;
4502 #endif
4503 
4504 }
4505 
4506 
4507 /*
4508  * sdbc_alloc_lru - allocate a new cache block from the lru queue
4509  *
4510  * ARGUMENTS:
4511  *      cd      - Cache descriptor (from a previous open)
4512  *      cblk    - cache block number.
4513  *      stall   - pointer to stall count (no blocks avail)
4514  *      flag    - lock status of sdbc_queue_lock or ALLOC_NOWAIT
4515  *
4516  * RETURNS:
4517  *      A cache block or NULL if ALLOC_NOWAIT specified
4518  *
4519  * USAGE:
4520  *      This routine allocates a new cache block from the lru.
4521  *      If an allocation cannot be done, we block, unless ALLOC_NOWAIT is set.
4522  */
4523 
4524 static _sd_cctl_t *
4525 sdbc_alloc_lru(int cd, nsc_off_t cblk, int *stall, int flag)
4526 {
4527         _sd_cctl_t *cc_ent, *old_ent, *ccnext;
4528         _sd_queue_t *q = _SD_LRU_Q;
4529         _sd_cctl_t *qhead = &(q->sq_qhead);
4530         int tries = 0, num_tries;
4531         int categorize_centry;
4532         int locked = flag & ALLOC_LOCKED;
4533         int nowait = flag & ALLOC_NOWAIT;
4534 
4535         if (nowait) {
4536                 num_tries = q->sq_inq / 100; /* only search 1% of q */
4537 
4538                 if (num_tries <= 0) /* ensure num_tries is non-zero */
4539                         num_tries = q->sq_inq;
4540         } else
4541                 num_tries = _sd_lruq_srch;
4542 
4543         SDTRACE(ST_ENTER|SDF_ENT_ALLOC, cd, 0, BLK_TO_FBA_NUM(cblk), 0, 0);
4544 retry_alloc_centry:
4545 
4546         for (cc_ent = (qhead->cc_next); cc_ent != qhead; cc_ent = ccnext) {
4547                 if (--num_tries <= 0)
4548                         if (nowait) {
4549                                 cc_ent = NULL;
4550                                 goto out;
4551                         } else
4552                                 break;
4553 
4554                 ccnext = cc_ent->cc_next;
4555 
4556                 if (cc_ent->cc_aging_dm & BAD_CHAIN_DM)
4557                         continue;
4558 
4559                 if (CENTRY_DIRTY(cc_ent))
4560                         continue;
4561                 if (SET_CENTRY_INUSE(cc_ent))
4562                         continue;
4563 
4564                 if (CENTRY_DIRTY(cc_ent)) {
4565                         sdbc_centry_lost++;
4566 
4567                         CLEAR_CENTRY_INUSE(cc_ent);
4568                         continue;
4569                 }
4570                 cc_ent->cc_flag = 0; /* CC_INUSE */
4571                 cc_ent->cc_toflush = 0;
4572 
4573                 /*
4574                  * Inlined requeue of the LRU. (should match _sd_requeue)
4575                  */
4576                 /* was FAST */
4577                 mutex_enter(&q->sq_qlock);
4578 #if defined(_SD_DEBUG)
4579         if (1) {
4580                 _sd_cctl_t *cp, *cn, *qp;
4581                 cp = cc_ent->cc_prev;
4582                 cn = cc_ent->cc_next;
4583                 qp = (q->sq_qhead).cc_prev;
4584                 if (!_sd_cctl_valid(cc_ent) ||
4585                     (cp != &(q->sq_qhead) && !_sd_cctl_valid(cp)) ||
4586                     (cn != &(q->sq_qhead) && !_sd_cctl_valid(cn)) ||
4587                     !_sd_cctl_valid(qp))
4588                         cmn_err(CE_PANIC,
4589                             "_sd_centry_alloc %x prev %x next %x qp %x",
4590                             cc_ent, cp, cn, qp);
4591         }
4592 #endif
4593                 cc_ent->cc_prev->cc_next = cc_ent->cc_next;
4594                 cc_ent->cc_next->cc_prev = cc_ent->cc_prev;
4595                 cc_ent->cc_next = qhead;
4596                 cc_ent->cc_prev = qhead->cc_prev;
4597                 qhead->cc_prev->cc_next = cc_ent;
4598                 qhead->cc_prev = cc_ent;
4599                 cc_ent->cc_seq = q->sq_seq++;
4600                 /* was FAST */
4601                 mutex_exit(&q->sq_qlock);
4602                 /*
4603                  * End inlined requeue.
4604                  */
4605 
4606 #if defined(_SD_STATS)
4607                 if (_sd_hash_delete(cc_ent, _sd_htable) == 0)
4608                         SDTRACE(SDF_REPLACE,
4609                             CENTRY_CD(cc_ent), cc_ent->cc_hits,
4610                             BLK_TO_FBA_NUM(CENTRY_BLK(cc_ent)),
4611                             nsc_lbolt(), cc_ent->cc_creat);
4612                 cc_ent->cc_creat = nsc_lbolt();
4613                 cc_ent->cc_hits = 0;
4614 #else
4615 #if defined(_SD_DEBUG)
4616                 if (_sd_hash_delete(cc_ent, _sd_htable) == 0) {
4617                         SDTRACE(SDF_REPLACE|ST_DL,
4618                             CENTRY_CD(cc_ent),
4619                             cc_ent->cc_valid,
4620                             BLK_TO_FBA_NUM(CENTRY_BLK(cc_ent)),
4621                             cd, BLK_TO_FBA_NUM(cblk));
4622                         if (cc_ent->cc_await_use ||
4623                             ((cd == CENTRY_CD(cc_ent)) &&
4624                             (cblk == CENTRY_BLK(cc_ent))))
4625                                 DATA_LOG(SDF_REPLACE|ST_DL, cc_ent, 0,
4626                                     BLK_FBAS);
4627                 }
4628 #else
4629                 (void) _sd_hash_delete((struct _sd_hash_hd *)cc_ent,
4630                     _sd_htable);
4631 #endif
4632 #endif
4633                 cc_ent->cc_creat = nsc_lbolt();
4634                 cc_ent->cc_hits = 0;
4635 
4636                 cc_ent->cc_valid = 0;
4637                 categorize_centry = 0;
4638                 if (cc_ent->cc_data)
4639                         categorize_centry = FOUND_HOLD_OVER_DM;
4640 
4641         alloc_try:
4642                 if (cd == _CD_NOHASH)
4643                         CENTRY_BLK(cc_ent) = cblk;
4644                 else if ((old_ent = (_sd_cctl_t *)
4645                     _sd_hash_insert(cd, cblk, (struct _sd_hash_hd *)cc_ent,
4646                     _sd_htable)) != cc_ent) {
4647 
4648                         if (SET_CENTRY_INUSE(old_ent)) {
4649                                 sdbc_centry_inuse++;
4650 
4651                                 if (nowait) {
4652                                         _sd_centry_release(cc_ent);
4653                                         cc_ent = NULL;
4654                                         goto out;
4655                                 }
4656 
4657                                 if (locked)
4658                                         rw_exit(&sdbc_queue_lock);
4659                                 _sd_cc_wait(cd, cblk, old_ent, CC_INUSE);
4660                                 if (locked)
4661                                         rw_enter(&sdbc_queue_lock, RW_WRITER);
4662                                 goto alloc_try;
4663                         }
4664 
4665                         /*
4666                          * bug 4529671
4667                          * now that we own the centry make sure that
4668                          * it is still good. it could have been processed
4669                          * by _sd_dealloc_dm() in the window between
4670                          * _sd_hash_insert() and SET_CENTRY_INUSE().
4671                          */
4672                         if ((_sd_cctl_t *)
4673                             _sd_hash_search(cd, cblk, _sd_htable) != old_ent) {
4674                                 sdbc_centry_deallocd++;
4675 #ifdef DEBUG
4676                                 cmn_err(CE_WARN, "!cc_ent %p cd %d cblk %"
4677                                     NSC_SZFMT " lost to dealloc?! cc_data %p",
4678                                     (void *)old_ent, cd, cblk,
4679                                     (void *)old_ent->cc_data);
4680 #endif
4681 
4682                                 CLEAR_CENTRY_INUSE(old_ent);
4683 
4684                                 if (nowait) {
4685                                         _sd_centry_release(cc_ent);
4686                                         cc_ent = NULL;
4687                                         goto out;
4688                                 }
4689 
4690                                 goto alloc_try;
4691                         }
4692 
4693                         if (CC_CD_BLK_MATCH(cd, cblk, old_ent)) {
4694                                 sdbc_centry_hit++;
4695                                 old_ent->cc_toflush = 0;
4696                                 _sd_centry_release(cc_ent);
4697                                 cc_ent = old_ent;
4698                                 categorize_centry = FOUND_IN_HASH_DM;
4699                         } else {
4700                                 sdbc_centry_lost++;
4701 
4702                                 CLEAR_CENTRY_INUSE(old_ent);
4703 
4704                                 if (nowait) {
4705                                         _sd_centry_release(cc_ent);
4706                                         cc_ent = NULL;
4707                                         goto out;
4708                                 }
4709 
4710                                 goto alloc_try;
4711                         }
4712                 }
4713 
4714                 SDTRACE(ST_EXIT|SDF_ENT_ALLOC, cd, tries,
4715                     BLK_TO_FBA_NUM(cblk), 0, 0);
4716 
4717                 if (cc_ent->cc_await_use) {
4718                         mutex_enter(&cc_ent->cc_lock);
4719                         cv_broadcast(&cc_ent->cc_blkcv);
4720                         mutex_exit(&cc_ent->cc_lock);
4721                 }
4722 
4723                 sdbc_centry_init_dm(cc_ent);
4724 
4725                 cc_ent->cc_aging_dm |= categorize_centry;
4726 
4727         out:
4728                 return (cc_ent);
4729         }
4730 
4731         SDTRACE(ST_INFO|SDF_ENT_ALLOC, cd, ++tries, BLK_TO_FBA_NUM(cblk), 0, 0);
4732 
4733         delay(drv_usectohz(20000));
4734         (void) (*stall)++;
4735         num_tries = _sd_lruq_srch;
4736         goto retry_alloc_centry;
4737 }
4738 
4739 /*
4740  * sdbc_centry_init_dm - setup the cache block for dynamic memory allocation
4741  *
4742  * ARGUMENTS:
4743  *      centry   - Cache block.
4744  *
4745  * RETURNS:
4746  *      NONE
4747  *
4748  * USAGE:
4749  *      This routine is the central point in which cache entry blocks are setup
4750  */
4751 static void
4752 sdbc_centry_init_dm(_sd_cctl_t *centry)
4753 {
4754 
4755         /* an entry already setup - don't touch simply refresh age */
4756         if (centry->cc_data) {
4757                 centry->cc_aging_dm &= ~(FINAL_AGING_DM);
4758 
4759                 DTRACE_PROBE1(sdbc_centry_init_dm_end,
4760                     char *, centry->cc_data);
4761                 return;
4762         }
4763 
4764         centry->cc_aging_dm &= ~(FINAL_AGING_DM | CATAGORY_ENTRY_DM);
4765 
4766         if (centry->cc_head_dm || centry->cc_next_dm)
4767                 cmn_err(cmn_level, "!sdbc(sdbc_centry_init_dm): "
4768                     "non-zero mem chain in ccent %p", (void *)centry);
4769 
4770         centry->cc_head_dm = 0;
4771 
4772         if (!sdbc_use_dmchain)
4773                 centry->cc_next_dm = 0;
4774 
4775         centry->cc_data = 0;
4776 
4777 }
4778 
4779 /*
4780  * sdbc_centry_memalloc_dm
4781  *
4782  * Actually allocate the cache memory, storing it in the cc_data field for
4783  * the cctl
4784  *
4785  * ARGS:
4786  *      centry: cache control block for which to allocate the memory
4787  *      alloc_request: number of bytes to allocate
4788  *      flag: if called with ALLOC_NOWAIT, caller must check for non-zero return
4789  *
4790  * RETURNS:
4791  *      0 on success
4792  *      non-zero on error
4793  */
4794 static int
4795 sdbc_centry_memalloc_dm(_sd_cctl_t *centry, int alloc_request, int flag)
4796 {
4797         int cblocks;
4798         _sd_queue_t *newq;
4799         int sleep;
4800         sleep = (flag & ALLOC_NOWAIT) ? KM_NOSLEEP : KM_SLEEP;
4801 
4802         if (!centry->cc_data && (alloc_request > 0)) {
4803                 /* host or other */
4804                 dynmem_processing_dm.alloc_ct++;
4805                 centry->cc_data = (unsigned char *)
4806                     kmem_alloc((size_t)centry->cc_alloc_size_dm, sleep);
4807 
4808 
4809                 if (sdbc_use_dmchain) {
4810                         cblocks = centry->cc_alloc_size_dm >> _sd_cblock_shift;
4811                         newq = &sdbc_dm_queues[cblocks];
4812 
4813                         /* set the dmqueue index */
4814                         centry->cc_cblocks = cblocks;
4815 
4816                         /* put on appropriate queue */
4817                         sdbc_ins_dmqueue_back(newq, centry);
4818                 }
4819 
4820                 /*
4821                  * for KM_NOSLEEP (should never happen with KM_SLEEP)
4822                  */
4823                 if (!centry->cc_data)
4824                         return (LOW_RESOURCES_DM);
4825                 centry->cc_head_dm = centry;
4826                 centry->cc_alloc_ct_dm++;
4827         }
4828 
4829         return (0);
4830 }
4831 
4832 /*
4833  * _sd_centry_release - release a cache block
4834  *
4835  * ARGUMENTS:
4836  *      centry   - Cache block.
4837  *
4838  * RETURNS:
4839  *      NONE
4840  *
4841  * USAGE:
4842  *      This routine frees up a cache block. It also frees up a write
4843  *      block if allocated and its valid to release it.
4844  */
4845 
4846 void
4847 _sd_centry_release(_sd_cctl_t *centry)
4848 {
4849         ss_centry_info_t *wctl;
4850 
4851         SDTRACE(ST_ENTER|SDF_ENT_FREE, CENTRY_CD(centry), 0,
4852             BLK_TO_FBA_NUM(CENTRY_BLK(centry)), 0, 0);
4853 
4854         CLEAR_CENTRY_PAGEIO(centry);
4855 
4856         if ((wctl = centry->cc_write) != 0) {
4857                 /* was FAST */
4858                 mutex_enter(&centry->cc_lock);
4859                 if (CENTRY_DIRTY(centry))
4860                         wctl = NULL;
4861                 else {
4862                         centry->cc_write = NULL;
4863                         centry->cc_flag &= ~(CC_PINNABLE);
4864                 }
4865                 /* was FAST */
4866                 mutex_exit(&centry->cc_lock);
4867                 if (wctl)  {
4868                         wctl->sc_dirty = 0;
4869                         SSOP_SETCENTRY(sdbc_safestore, wctl);
4870                         SSOP_DEALLOCRESOURCE(sdbc_safestore, wctl->sc_res);
4871                 }
4872         }
4873 
4874         if (!(centry->cc_aging_dm & BAD_CHAIN_DM)) {
4875                 if (sdbc_use_dmchain) {
4876                         if (centry->cc_alloc_size_dm) {
4877 
4878                                 /* see if this can be queued to head */
4879                                 if (CENTRY_QHEAD(centry)) {
4880                                         sdbc_requeue_head_dm_try(centry);
4881                                 } else {
4882                                         int qidx;
4883                                         _sd_queue_t *q;
4884 
4885                                         qidx = centry->cc_cblocks;
4886                                         q = &sdbc_dm_queues[qidx];
4887 
4888                                         if (_sd_lru_reinsert(q, centry)) {
4889                                                 sdbc_requeue_dmchain(q,
4890                                                     centry, 1, 1);
4891                                         }
4892                                 }
4893                         } else {
4894                                 /*
4895                                  * Fix for bug 4949134:
4896                                  * If an internal block is marked with CC_QHEAD
4897                                  * but the HOST block is not, the chain will
4898                                  * never age properly, and will never be made
4899                                  * available.  Only the HOST of the dmchain is
4900                                  * checked for CC_QHEAD, so clearing an internal
4901                                  * block indiscriminately (as is being done
4902                                  * here) does no damage.
4903                                  *
4904                                  * The same result could instead be achieved by
4905                                  * not setting the CC_QHEAD flag in the first
4906                                  * place, if the block is an internal dmchain
4907                                  * block, and if it is found in the hash table.
4908                                  * The current solution was chosen since it is
4909                                  * the least intrusive.
4910                                  */
4911                                 centry->cc_flag &= ~CC_QHEAD;
4912                         }
4913                 } else {
4914                         if (CENTRY_QHEAD(centry)) {
4915                                 if (!CENTRY_DIRTY(centry))
4916                                         _sd_requeue_head(centry);
4917                         } else if (_sd_lru_reinsert(_SD_LRU_Q, centry))
4918                                 _sd_requeue(centry);
4919                 }
4920         }
4921 
4922         SDTRACE(ST_EXIT|SDF_ENT_FREE, CENTRY_CD(centry), 0,
4923             BLK_TO_FBA_NUM(CENTRY_BLK(centry)), 0, 0);
4924 
4925         /* only clear inuse after final reference to centry */
4926 
4927         CLEAR_CENTRY_INUSE(centry);
4928 }
4929 
4930 
4931 /*
4932  * lookup to centry info associated with safestore resource
4933  * return pointer to the centry info structure
4934  */
4935 ss_centry_info_t *
4936 sdbc_get_cinfo_byres(ss_resource_t *res)
4937 {
4938         ss_centry_info_t *cinfo;
4939         ss_centry_info_t *cend;
4940         int found = 0;
4941 
4942         ASSERT(res != NULL);
4943 
4944         if (res == NULL)
4945                 return (NULL);
4946 
4947         cinfo = _sdbc_gl_centry_info;
4948         cend = _sdbc_gl_centry_info +
4949             (_sdbc_gl_centry_info_size / sizeof (ss_centry_info_t)) - 1;
4950 
4951         for (; cinfo <= cend; ++cinfo)
4952                 if (cinfo->sc_res == res) {
4953                         ++found;
4954                         break;
4955                 }
4956 
4957         if (!found)
4958                 cinfo = NULL; /* bad */
4959 
4960         return (cinfo);
4961 }
4962 
4963 /*
4964  * _sd_alloc_write - Allocate a write block (for remote mirroring)
4965  *                 and set centry->cc_write
4966  *
4967  * ARGUMENTS:
4968  *      centry   - Head of Cache chain
4969  *      stall    - pointer to stall count (no blocks avail)
4970  *
4971  * RETURNS:
4972  *      0 - and sets  cc_write for all entries when write contl block obtained.
4973  *      -1 - if a write control block could not be obtained.
4974  */
4975 
4976 int
4977 _sd_alloc_write(_sd_cctl_t *centry, int *stall)
4978 {
4979 
4980         ss_resourcelist_t *reslist;
4981         ss_resourcelist_t *savereslist;
4982         ss_resource_t *res;
4983         _sd_cctl_t *ce;
4984         int err;
4985         int need;
4986 
4987 
4988         need = 0;
4989 
4990         for (ce = centry; ce; ce = ce->cc_chain) {
4991                 if (!(ce->cc_write))
4992                         need++;
4993         }
4994 
4995         if (!need)
4996                 return (0);
4997 
4998         if ((SSOP_ALLOCRESOURCE(sdbc_safestore, need, stall, &reslist))
4999             == SS_OK) {
5000                 savereslist = reslist;
5001                 for (ce = centry; ce; ce = ce->cc_chain) {
5002                         if (ce->cc_write)
5003                                 continue;
5004                         err = SSOP_GETRESOURCE(sdbc_safestore, &reslist, &res);
5005                         if (err == SS_OK)
5006                                 ce->cc_write = sdbc_get_cinfo_byres(res);
5007 
5008                         ASSERT(err == SS_OK); /* panic if DEBUG on */
5009                         ASSERT(ce->cc_write != NULL);
5010 
5011                         /*
5012                          * this is bad and should not happen.
5013                          * we use the saved reslist to cleanup
5014                          * and return.
5015                          */
5016                         if ((err != SS_OK) || !ce->cc_write) {
5017 
5018                                 cmn_err(CE_WARN, "!_sd_alloc_write: "
5019                                     "bad resource list 0x%p"
5020                                     "changing to forced write thru mode",
5021                                     (void *)savereslist);
5022 
5023                                 (void) _sd_set_node_hint(NSC_FORCED_WRTHRU);
5024 
5025                                 while (SSOP_GETRESOURCE(sdbc_safestore,
5026                                     &savereslist, &res) == SS_OK) {
5027 
5028                                         SSOP_DEALLOCRESOURCE(sdbc_safestore,
5029                                             res);
5030                                 }
5031 
5032                                 return (-1);
5033 
5034                         }
5035 
5036                 }
5037                 return (0);
5038         }
5039 
5040         /* no safestore resources available.  do sync write */
5041         _sd_unblock(&_sd_flush_cv);
5042         return (-1);
5043 }
5044 
5045 /*
5046  * _sd_read - Interface call to do read.
5047  *
5048  * ARGUMENTS:
5049  *      handle  - handle allocated earlier on.
5050  *      fba_pos - disk block number to read from.
5051  *      fba_len - length in fbas.
5052  *      flag    - flag: (NSC_NOBLOCK for async io)
5053  *
5054  * RETURNS:
5055  *      errno if return > 0
5056  *      NSC_DONE or NSC_PENDING otherwise.
5057  *
5058  * USAGE:
5059  *      This routine checks if the request is valid and calls the underlying
5060  *      doread routine (also called by alloc_buf)
5061  */
5062 
5063 int
5064 _sd_read(_sd_buf_handle_t *handle, nsc_off_t fba_pos, nsc_size_t fba_len,
5065     int flag)
5066 {
5067         sdbc_cblk_fba_t st_cblk_len;    /* FBA len of starting cache block */
5068         sdbc_cblk_fba_t end_cblk_len;   /* FBA len of ending cache block */
5069         sdbc_cblk_fba_t st_cblk_off;    /* FBA offset into starting cblock */
5070         _sd_cctl_t *cc_ent = NULL;
5071         nsc_size_t fba_orig_len = fba_len;
5072         int ret;
5073         int cd = HANDLE_CD(handle);
5074 
5075         if (_sdbc_shutdown_in_progress || (handle->bh_flag & NSC_ABUF)) {
5076                 ret = EIO;
5077                 goto out;
5078         }
5079 
5080 
5081 #if !defined(_SD_NOCHECKS)
5082         if (!_SD_HANDLE_ACTIVE(handle)) {
5083                 cmn_err(CE_WARN, "!sdbc(_sd_read) handle %p not active",
5084                     (void *)handle);
5085                 ret = EINVAL;
5086                 goto out;
5087         }
5088         ASSERT_HANDLE_LIMITS(handle, fba_pos, fba_len);
5089 #endif
5090         if (fba_len == 0) {
5091                 ret = NSC_DONE;
5092                 goto out;
5093         }
5094 
5095         KSTAT_RUNQ_ENTER(cd);
5096 
5097         st_cblk_off = BLK_FBA_OFF(fba_pos);
5098         st_cblk_len = BLK_FBAS - st_cblk_off;
5099         if ((nsc_size_t)st_cblk_len >= fba_len) {
5100                 end_cblk_len = 0;
5101                 st_cblk_len = (sdbc_cblk_fba_t)fba_len;
5102         } else {
5103                 end_cblk_len = BLK_FBA_OFF(fba_pos + fba_len);
5104         }
5105 
5106         cc_ent = handle->bh_centry;
5107         while (CENTRY_BLK(cc_ent) != FBA_TO_BLK_NUM(fba_pos))
5108                 cc_ent = cc_ent->cc_chain;
5109 
5110         if (!SDBC_VALID_BITS(st_cblk_off, st_cblk_len, cc_ent))
5111                 goto need_io;
5112         DATA_LOG(SDF_RD, cc_ent, st_cblk_off, st_cblk_len);
5113 
5114         DTRACE_PROBE4(_sd_read_data1, uint64_t,
5115             (uint64_t)(BLK_TO_FBA_NUM(CENTRY_BLK(cc_ent)) + st_cblk_off),
5116             uint64_t, (uint64_t)st_cblk_len, char *,
5117             *(int64_t *)(cc_ent->cc_data + FBA_SIZE(st_cblk_off)),
5118             char *, *(int64_t *)(cc_ent->cc_data +
5119             FBA_SIZE(st_cblk_off + st_cblk_len) - 8));
5120 
5121         fba_pos += st_cblk_len;
5122         fba_len -= st_cblk_len;
5123         cc_ent = cc_ent->cc_chain;
5124 
5125         while (fba_len > (nsc_size_t)end_cblk_len) {
5126                 if (!FULLY_VALID(cc_ent))
5127                         goto need_io;
5128                 DATA_LOG(SDF_RD, cc_ent, 0, BLK_FBAS);
5129 
5130                 DTRACE_PROBE4(_sd_read_data2, uint64_t,
5131                     (uint64_t)BLK_TO_FBA_NUM(CENTRY_BLK(cc_ent)),
5132                     uint64_t, (uint64_t)BLK_FBAS,
5133                     char *, *(int64_t *)(cc_ent->cc_data),
5134                     char *, *(int64_t *)(cc_ent->cc_data +
5135                     FBA_SIZE(BLK_FBAS) - 8));
5136 
5137                 fba_pos += BLK_FBAS;
5138                 fba_len -= BLK_FBAS;
5139                 cc_ent = cc_ent->cc_chain;
5140         }
5141         if (fba_len) {
5142                 if (!SDBC_VALID_BITS(0, end_cblk_len, cc_ent))
5143                         goto need_io;
5144                 DATA_LOG(SDF_RD, cc_ent, 0, end_cblk_len);
5145 
5146                 DTRACE_PROBE4(_sd_read_data3, uint64_t,
5147                     (uint64_t)BLK_TO_FBA_NUM(CENTRY_BLK(cc_ent)),
5148                     uint64_t, (uint64_t)end_cblk_len,
5149                     char *, *(int64_t *)(cc_ent->cc_data),
5150                     char *, *(int64_t *)(cc_ent->cc_data +
5151                     FBA_SIZE(end_cblk_len) - 8));
5152         }
5153 
5154         CACHE_FBA_READ(handle->bh_cd, fba_orig_len);
5155         CACHE_READ_HIT;
5156 
5157         FBA_READ_IO_KSTATS(handle->bh_cd, FBA_SIZE(fba_orig_len));
5158 
5159         ret = NSC_HIT;
5160         goto stats_exit;
5161 need_io:
5162         _SD_DISCONNECT_CALLBACK(handle);
5163 
5164         ret = _sd_doread(handle, cc_ent, fba_pos, fba_len, flag);
5165 
5166 stats_exit:
5167         KSTAT_RUNQ_EXIT(cd);
5168 out:
5169         return (ret);
5170 }
5171 
5172 
5173 /*
5174  * sdbc_doread_prefetch - read ahead one cache block
5175  *
5176  * ARGUMENTS:
5177  *      cc_ent - cache entry
5178  *      fba_pos - disk block number to read from
5179  *      fba_len - length in fbas.
5180  *
5181  * RETURNS:
5182  *      number of fbas, if any, that are to be read beyond (fba_pos + fba_len)
5183  *
5184  * USAGE:
5185  *      if readahead is to be done allocate a cache block and place
5186  *      on the cc_chain of cc_ent
5187  */
5188 static int
5189 sdbc_doread_prefetch(_sd_cctl_t *cc_ent, nsc_off_t fba_pos, nsc_size_t fba_len)
5190 {
5191         nsc_off_t st_cblk = FBA_TO_BLK_NUM(fba_pos);
5192         nsc_off_t next_cblk = FBA_TO_BLK_NUM(fba_pos + BLK_FBAS);
5193         nsc_size_t filesize;
5194         int fba_count = 0; /* number of fbas to prefetch */
5195         _sd_cctl_t *cc_ra; /* the read ahead cache entry */
5196         int cd = CENTRY_CD(cc_ent);
5197         nsc_size_t vol_fill;
5198 
5199         filesize = _sd_cache_files[cd].cd_info->sh_filesize;
5200         vol_fill = filesize - (fba_pos + fba_len);
5201 
5202         /* readahead only for small reads */
5203         if ((fba_len <= FBA_LEN(CACHE_BLOCK_SIZE)) && (fba_pos != 0) &&
5204             (vol_fill > 0)) {
5205 
5206                 /*
5207                  * if prev block is in cache and next block is not,
5208                  * then read ahead one block
5209                  */
5210                 if (_sd_hash_search(cd, st_cblk - 1, _sd_htable)) {
5211                         if (!_sd_hash_search(cd, next_cblk, _sd_htable)) {
5212 
5213                                 cc_ra = sdbc_centry_alloc_blks
5214                                     (cd, next_cblk, 1, ALLOC_NOWAIT);
5215                                 if (cc_ra) {
5216                                         /* if in cache don't readahead */
5217                                         if (cc_ra->cc_aging_dm &
5218                                             HASH_ENTRY_DM) {
5219                                                 ++sdbc_ra_hash;
5220                                                 _sd_centry_release(cc_ra);
5221                                         } else {
5222                                                 cc_ent->cc_chain = cc_ra;
5223                                                 cc_ra->cc_chain = 0;
5224                                                 fba_count =
5225                                                     (vol_fill >
5226                                                     (nsc_size_t)BLK_FBAS) ?
5227                                                     BLK_FBAS : (int)vol_fill;
5228                                                 /*
5229                                                  * indicate implicit prefetch
5230                                                  * and mark for release in
5231                                                  * _sd_read_complete()
5232                                                  */
5233                                                 cc_ra->cc_aging_dm |=
5234                                                     (PREFETCH_BUF_I |
5235                                                     PREFETCH_BUF_IR);
5236                                         }
5237                                 } else {
5238                                         ++sdbc_ra_none;
5239                                 }
5240                         }
5241                 }
5242 
5243         }
5244 
5245         return (fba_count);
5246 }
5247 
5248 /*
5249  * _sd_doread - Check if blocks in cache. If not completely true, do io.
5250  *
5251  * ARGUMENTS:
5252  *      handle  - handle allocated earlier on.
5253  *      fba_pos - disk block number to read from.
5254  *      fba_len - length in fbas.
5255  *      flag    - flag: (NSC_NOBLOCK for async io)
5256  *
5257  * RETURNS:
5258  *      errno if return > 0
5259  *      NSC_DONE(from disk), or NSC_PENDING otherwise.
5260  *
5261  * Comments:
5262  *      It initiates an io and either blocks waiting for the completion
5263  *      or return NSC_PENDING, depending on whether the flag bit
5264  *      NSC_NOBLOCK is reset or set.
5265  *
5266  */
5267 
5268 
5269 static int
5270 _sd_doread(_sd_buf_handle_t *handle, _sd_cctl_t *cc_ent, nsc_off_t fba_pos,
5271     nsc_size_t fba_len, int flag)
5272 {
5273         int cd, err;
5274         nsc_size_t fba_orig_len; /* length in FBA's of the original request */
5275         nsc_size_t file_len;    /* length in bytes of io to be done */
5276         sdbc_cblk_fba_t st_cblk_len;    /* FBA len of starting cache block */
5277         sdbc_cblk_fba_t end_cblk_len;   /* FBA len of ending cache block */
5278         sdbc_cblk_fba_t st_cblk_off;    /* FBA offset into starting cblock */
5279         int num_bdl;
5280         _sd_cctl_t *cc_temp;
5281         struct buf *bp;
5282         unsigned int want_bits;
5283         void (*fn)(blind_t, nsc_off_t, nsc_size_t, int);
5284         sdbc_cblk_fba_t end_cblk_fill;  /* FBA's to fill to end of last block */
5285         nsc_size_t vol_end_fill; /* # of FBA's to fill to end of the volume */
5286 
5287         cd = HANDLE_CD(handle);
5288         SDTRACE(ST_ENTER|SDF_READ, cd, fba_len, fba_pos, flag, 0);
5289 
5290         ASSERT(cd >= 0);
5291         if (_sd_cache_files[cd].cd_info->sh_failed) {
5292                 SDTRACE(ST_EXIT|SDF_READ, cd, fba_len, fba_pos, flag, EIO);
5293                 return (EIO);
5294         }
5295 
5296         /*
5297          * adjust the position and length so that the entire cache
5298          * block is read in
5299          */
5300 
5301         /* first, adjust to beginning of cache block */
5302 
5303         fba_len += BLK_FBA_OFF(fba_pos); /* add start offset to length */
5304         fba_pos &= ~BLK_FBA_MASK; /* move position back to start of block */
5305 
5306         /* compute fill to end of cache block */
5307         end_cblk_fill = (BLK_FBAS - 1) - ((fba_len - 1) % BLK_FBAS);
5308         vol_end_fill = _sd_cache_files[(cd)].cd_info->sh_filesize -
5309             (fba_pos + fba_len);
5310 
5311         /* fill to lesser of cache block or end of volume */
5312         fba_len += ((nsc_size_t)end_cblk_fill < vol_end_fill) ? end_cblk_fill :
5313             vol_end_fill;
5314 
5315         DTRACE_PROBE2(_sd_doread_rfill, nsc_off_t, fba_pos,
5316             nsc_size_t, fba_len);
5317 
5318 
5319         /* for small reads do 1-block readahead if previous block is in cache */
5320         if (sdbc_prefetch1)
5321                 fba_len += sdbc_doread_prefetch(cc_ent, fba_pos, fba_len);
5322 
5323         fba_orig_len = fba_len;
5324         st_cblk_off = BLK_FBA_OFF(fba_pos);
5325         st_cblk_len = BLK_FBAS - st_cblk_off;
5326         if ((nsc_size_t)st_cblk_len >= fba_len) {
5327                 end_cblk_len = 0;
5328                 st_cblk_len = (sdbc_cblk_fba_t)fba_len;
5329         } else {
5330                 end_cblk_len = BLK_FBA_OFF(fba_pos + fba_len);
5331         }
5332 
5333         cc_temp = cc_ent;
5334         num_bdl = 0;
5335         while (cc_temp) {
5336                 num_bdl += (SDBC_LOOKUP_IOCOUNT(CENTRY_DIRTY(cc_temp)));
5337                 cc_temp = cc_temp->cc_chain;
5338         }
5339         bp = sd_alloc_iob(_sd_cache_files[cd].cd_crdev,
5340             fba_pos, num_bdl, B_READ);
5341         if (bp == NULL) {
5342                 SDTRACE(ST_EXIT|SDF_READ, cd, fba_len, fba_pos, flag, E2BIG);
5343                 return (E2BIG);
5344         }
5345 
5346         want_bits = SDBC_GET_BITS(st_cblk_off, st_cblk_len);
5347         if (want_bits & CENTRY_DIRTY(cc_ent))
5348                 _sd_ccent_rd(cc_ent, want_bits, bp);
5349         else {
5350                 sd_add_fba(bp, &cc_ent->cc_addr, st_cblk_off, st_cblk_len);
5351         }
5352         file_len = FBA_SIZE(st_cblk_len);
5353         cc_ent = cc_ent->cc_chain;
5354         fba_len -= st_cblk_len;
5355 
5356         while (fba_len > (nsc_size_t)end_cblk_len) {
5357                 if (CENTRY_DIRTY(cc_ent))
5358                         _sd_ccent_rd(cc_ent, (uint_t)BLK_FBA_BITS, bp);
5359                 else {
5360                         sd_add_fba(bp, &cc_ent->cc_addr, 0, BLK_FBAS);
5361                 }
5362                 file_len += CACHE_BLOCK_SIZE;
5363                 cc_ent = cc_ent->cc_chain;
5364                 fba_len -= BLK_FBAS;
5365         }
5366 
5367         if (fba_len) {
5368                 want_bits = SDBC_GET_BITS(0, end_cblk_len);
5369                 if (want_bits & CENTRY_DIRTY(cc_ent))
5370                         _sd_ccent_rd(cc_ent, want_bits, bp);
5371                 else {
5372                         sd_add_fba(bp, &cc_ent->cc_addr, 0, end_cblk_len);
5373                 }
5374                 file_len += FBA_SIZE(end_cblk_len);
5375         }
5376 
5377         CACHE_READ_MISS;
5378         FBA_READ_IO_KSTATS(cd, file_len);
5379 
5380         DISK_FBA_READ(cd, FBA_NUM(file_len));
5381 
5382         fn = (handle->bh_flag & NSC_NOBLOCK) ? _sd_async_read_ea : NULL;
5383         err = sd_start_io(bp, _sd_cache_files[cd].cd_strategy, fn, handle);
5384 
5385         if (err != NSC_PENDING) {
5386                 _sd_read_complete(handle, fba_pos, fba_orig_len, err);
5387         }
5388 
5389         SDTRACE(ST_EXIT|SDF_READ, cd, fba_orig_len, fba_pos, flag, err);
5390 
5391         return (err);
5392 }
5393 
5394 
5395 
5396 /*
5397  * _sd_read_complete - Do whatever is necessary after a read io is done.
5398  *
5399  * ARGUMENTS:
5400  *      handle  - handle allocated earlier on.
5401  *      fba_pos - disk block number to read from.
5402  *      fba_len - length in fbas.
5403  *      error   - error from io if any.
5404  *
5405  * RETURNS:
5406  *      NONE.
5407  *
5408  * Comments:
5409  *      This routine marks the cache blocks valid if the io completed
5410  *      sucessfully. Called from the async end action as well as after
5411  *      a synchrnous read completes.
5412  */
5413 
5414 void
5415 _sd_read_complete(_sd_buf_handle_t *handle, nsc_off_t fba_pos,
5416     nsc_size_t fba_len, int error)
5417 {
5418         sdbc_cblk_fba_t st_cblk_len;    /* FBA len of starting cache block */
5419         sdbc_cblk_fba_t end_cblk_len;   /* FBA len of ending cache block */
5420         sdbc_cblk_fba_t st_cblk_off;    /* FBA offset into starting cblock */
5421         nsc_size_t cur_fba_len; /* length in FBA's */
5422         _sd_cctl_t *cc_iocent;
5423         _sd_cctl_t *first_iocent; /* first buffer when processing prefetch */
5424 
5425         cc_iocent = handle->bh_centry;
5426 
5427         if ((handle->bh_error = error) == 0) {
5428                 while (CENTRY_BLK(cc_iocent) != FBA_TO_BLK_NUM(fba_pos))
5429                         cc_iocent = cc_iocent->cc_chain;
5430 
5431                 cur_fba_len = fba_len;
5432                 st_cblk_off = BLK_FBA_OFF(fba_pos);
5433                 st_cblk_len = BLK_FBAS - st_cblk_off;
5434                 if ((nsc_size_t)st_cblk_len >= fba_len) {
5435                         end_cblk_len = 0;
5436                         st_cblk_len = (sdbc_cblk_fba_t)fba_len;
5437                 } else {
5438                         end_cblk_len = BLK_FBA_OFF(fba_pos + fba_len);
5439                 }
5440 
5441                 SDBC_SET_VALID_BITS(st_cblk_off, st_cblk_len, cc_iocent);
5442                 DATA_LOG(SDF_RDIO, cc_iocent, st_cblk_off, st_cblk_len);
5443 
5444                 DTRACE_PROBE4(_sd_read_complete_data1, uint64_t, (uint64_t)
5445                     BLK_TO_FBA_NUM(CENTRY_BLK(cc_iocent)) + st_cblk_off,
5446                     int, st_cblk_len, char *,
5447                     *(int64_t *)(cc_iocent->cc_data + FBA_SIZE(st_cblk_off)),
5448                     char *, *(int64_t *)(cc_iocent->cc_data +
5449                     FBA_SIZE(st_cblk_off + st_cblk_len) - 8));
5450 
5451 
5452                 first_iocent = cc_iocent;
5453                 cc_iocent = cc_iocent->cc_chain;
5454                 cur_fba_len -= st_cblk_len;
5455 
5456                 while (cur_fba_len > (nsc_size_t)end_cblk_len) {
5457                         SET_FULLY_VALID(cc_iocent);
5458                         DATA_LOG(SDF_RDIO, cc_iocent, 0, BLK_FBAS);
5459 
5460                         DTRACE_PROBE4(_sd_read_complete_data2, uint64_t,
5461                             (uint64_t)BLK_TO_FBA_NUM(CENTRY_BLK(cc_iocent)),
5462                             int, BLK_FBAS, char *,
5463                             *(int64_t *)(cc_iocent->cc_data), char *,
5464                             *(int64_t *)(cc_iocent->cc_data +
5465                             FBA_SIZE(BLK_FBAS) - 8));
5466 
5467                         /*
5468                          * 4755485 release implicit prefetch buffers
5469                          *
5470                          * the cc_chain of the first buffer must NULL'd
5471                          * else _sd_free_buf() will do a double free when
5472                          * it traverses the chain.
5473                          *
5474                          * if a buffer has been marked PREFETCH_BUF_IR then
5475                          * it is guaranteed that
5476                          *    1. it is the second in a chain of two.
5477                          *    2. cur_fba_len is BLK_FBAS.
5478                          *    3. end_cblk_len is zero.
5479                          *
5480                          * because of 1 (and 2) above, we can safely exit the
5481                          * while loop via the break statement without
5482                          * executing the last two statements.  the break
5483                          * statement is necessary because it would be unsafe
5484                          * to access cc_iocent which could be reallocated
5485                          * immediately after the _sd_centry_release().
5486                          */
5487                         if (cc_iocent->cc_aging_dm & PREFETCH_BUF_IR) {
5488                                 cc_iocent->cc_aging_dm &= ~(PREFETCH_BUF_IR);
5489                                 _sd_centry_release(cc_iocent);
5490                                 first_iocent->cc_chain = NULL;
5491                                 break;
5492                         }
5493 
5494                         cc_iocent = cc_iocent->cc_chain;
5495                         cur_fba_len -= BLK_FBAS;
5496                 }
5497                 if (end_cblk_len) {
5498                         SDBC_SET_VALID_BITS(0, end_cblk_len, cc_iocent);
5499                         DATA_LOG(SDF_RDIO, cc_iocent, 0, end_cblk_len);
5500 
5501                         DTRACE_PROBE4(_sd_read_complete_data3, uint64_t,
5502                             (uint64_t)BLK_TO_FBA_NUM(CENTRY_BLK(cc_iocent)),
5503                             int, end_cblk_len, char *,
5504                             *(int64_t *)(cc_iocent->cc_data), char *,
5505                             *(int64_t *)(cc_iocent->cc_data +
5506                             FBA_SIZE(end_cblk_len) - 8));
5507                 }
5508         }
5509 
5510 }
5511 
5512 
5513 /*
5514  * _sd_async_read_ea - End action for async reads.
5515  *
5516  * ARGUMENTS:
5517  *      xhandle  - handle allocated earlier on (cast to blind_t).
5518  *      fba_pos - disk block number read from.
5519  *      fba_len - length in fbas.
5520  *      error   - error from io if any.
5521  *
5522  * RETURNS:
5523  *      NONE.
5524  *
5525  * Comments:
5526  *      This routine is called at interrupt level when the io is done.
5527  *      This is called only when read is asynchronous (NSC_NOBLOCK)
5528  */
5529 
5530 static void
5531 _sd_async_read_ea(blind_t xhandle, nsc_off_t fba_pos, nsc_size_t fba_len,
5532     int error)
5533 {
5534         _sd_buf_handle_t *handle = xhandle;
5535         int cd;
5536 
5537         if (error) {
5538                 cd = HANDLE_CD(handle);
5539                 ASSERT(cd >= 0);
5540                 _sd_cache_files[cd].cd_info->sh_failed = 1;
5541         }
5542         SDTRACE(ST_ENTER|SDF_READ_EA, HANDLE_CD(handle),
5543             handle->bh_fba_len, handle->bh_fba_pos, 0, error);
5544 
5545         _sd_read_complete(handle, fba_pos, fba_len, error);
5546 
5547 #if defined(_SD_DEBUG_PATTERN)
5548         check_buf_consistency(handle, "rd");
5549 #endif
5550 
5551         SDTRACE(ST_EXIT|SDF_READ_EA, HANDLE_CD(handle),
5552             handle->bh_fba_len, handle->bh_fba_pos, 0, 0);
5553         _SD_READ_CALLBACK(handle);
5554 }
5555 
5556 
5557 /*
5558  * _sd_async_write_ea - End action for async writes.
5559  *
5560  * ARGUMENTS:
5561  *      xhandle  - handle allocated earlier on. (cast to blind_t)
5562  *      fba_pos - disk block number written to.
5563  *      fba_len - length in fbas.
5564  *      error   - error from io if any.
5565  *
5566  * RETURNS:
5567  *      NONE.
5568  *
5569  * Comments:
5570  *      This routine is called at interrupt level when the write io is done.
5571  *      This is called only when we are in write-through mode and the write
5572  *      call indicated asynchronous callback. (NSC_NOBLOCK)
5573  */
5574 
5575 /* ARGSUSED */
5576 
5577 static void
5578 _sd_async_write_ea(blind_t xhandle, nsc_off_t fba_pos, nsc_size_t fba_len,
5579     int error)
5580 {
5581         _sd_buf_handle_t *handle = xhandle;
5582         handle->bh_error = error;
5583 
5584         if (error)
5585                 _sd_cache_files[HANDLE_CD(handle)].cd_info->sh_failed = 1;
5586 
5587         _SD_WRITE_CALLBACK(handle);
5588 }
5589 
5590 /*
5591  * update_dirty - set dirty bits in cache block which is already dirty
5592  *      cc_inuse is held, need cc_lock to avoid race with _sd_process_pending
5593  *      must check for I/O in-progress and set PEND_DIRTY.
5594  *      return previous dirty bits
5595  *      [if set _sd_process_pending will re-issue]
5596  */
5597 static _sd_bitmap_t
5598 update_dirty(_sd_cctl_t *cc_ent, sdbc_cblk_fba_t st_off, sdbc_cblk_fba_t st_len)
5599 {
5600         _sd_bitmap_t old;
5601 
5602         /* was FAST */
5603         mutex_enter(&cc_ent->cc_lock);
5604         old = CENTRY_DIRTY(cc_ent);
5605         if (old) {
5606                 /*
5607                  * If we are writing to an FBA that is still marked dirty,
5608                  * record a write cancellation.
5609                  */
5610                 if (old & SDBC_GET_BITS(st_off, st_len)) {
5611                         CACHE_WRITE_CANCELLATION(CENTRY_CD(cc_ent));
5612                 }
5613 
5614                 /* This is a write to a block that was already dirty */
5615                 SDBC_SET_DIRTY(st_off, st_len, cc_ent);
5616                 sd_serialize();
5617                 if (CENTRY_IO_INPROGRESS(cc_ent))
5618                         cc_ent->cc_flag |= CC_PEND_DIRTY;
5619         }
5620         /* was FAST */
5621         mutex_exit(&cc_ent->cc_lock);
5622         return (old);
5623 }
5624 
5625 /*
5626  * _sd_write - Interface call to commit part of handle.
5627  *
5628  * ARGUMENTS:
5629  *      handle  - handle allocated earlier o.
5630  *      fba_pos - disk block number to write to.
5631  *      fba_len - length in fbas.
5632  *      flag    - (NSC_NOBLOCK | NSC_WRTHRU)
5633  *
5634  * RETURNS:
5635  *      errno if return > 0
5636  *      NSC_HIT (in cache), NSC_DONE (to disk) or NSC_PENDING otherwise.
5637  *
5638  * Comments:
5639  *      This routine checks validity of the handle and then calls the
5640  *      sync-write function if this write is determined to be write-through.
5641  *      Else, it reflects the data to the write blocks on the mirror node,
5642  *      (allocated in alloc_buf). If the cache block is not dirty, it is
5643  *      marked dirty and queued up for io processing later on.
5644  *      If parts are already dirty but io is not in progress yet, it is
5645  *      marked dirty and left alone (it is already in the queue)
5646  *      If parts are already dirty but io is in progress, it is marked
5647  *      dirty and also a flag is set indicating that this buffer should
5648  *      be reprocessed after the io-end-action.
5649  *      Attempt is made to coalesce multiple writes into a single list
5650  *      for io processing later on.
5651  *
5652  *      Issuing of writes may be delayed until the handle is released;
5653  *      _sd_queue_write() sets NSC_QUEUE, indicating that dirty bits
5654  *      and reflection to mirror have already been done, just queue I/O.
5655  */
5656 
5657 
5658 
5659 int
5660 _sd_write(_sd_buf_handle_t *handle, nsc_off_t fba_pos, nsc_size_t fba_len,
5661     int flag)
5662 {
5663         int cd = HANDLE_CD(handle);
5664         int num_queued, ret, queue_only, store_only;
5665         sdbc_cblk_fba_t st_cblk_len;    /* FBA len of starting cache block */
5666         sdbc_cblk_fba_t end_cblk_len;   /* FBA len of ending cache block */
5667         sdbc_cblk_fba_t st_cblk_off;    /* FBA offset into starting cblock */
5668         nsc_size_t cur_fba_len; /* position in disk blocks */
5669         _sd_cctl_t *cc_ent = NULL;
5670         _sd_cctl_t *cur_chain = NULL, *dirty_next = NULL;
5671 
5672 
5673         if (_sdbc_shutdown_in_progress) {
5674                 ret = EIO;
5675                 goto out;
5676         }
5677 
5678 
5679         if (!_SD_HANDLE_ACTIVE(handle)) {
5680                 SDALERT(SDF_WRITE,
5681                     SDT_INV_CD, 0, SDT_INV_BL, handle->bh_flag, 0);
5682                 ret = EINVAL;
5683                 goto out;
5684         }
5685 #if !defined(_SD_NOCHECKS)
5686         ASSERT_HANDLE_LIMITS(handle, fba_pos, fba_len);
5687         if ((handle->bh_flag & NSC_WRBUF) == 0) {
5688                 ret = EINVAL;
5689                 goto out;
5690         }
5691 #endif
5692         if (fba_len == 0) {
5693                 ret = NSC_DONE;
5694                 goto out;
5695         }
5696 
5697         /*
5698          * store_only: don't queue this I/O yet
5699          * queue_only: queue I/O to disk, don't store in mirror node
5700          */
5701         if (flag & NSC_QUEUE)
5702                 queue_only = 1, store_only = 0;
5703         else
5704                 if (_SD_DELAY_QUEUE && (fba_len != handle->bh_fba_len))
5705                         queue_only = 0, store_only = 1;
5706         else
5707                 queue_only = store_only = 0;
5708 
5709         if (!queue_only && _SD_FORCE_DISCONNECT(fba_len))
5710                 _SD_DISCONNECT_CALLBACK(handle);
5711 
5712         if (_sd_cache_files[cd].cd_info->sh_failed) {
5713                 ret = EIO;
5714                 goto out;
5715         }
5716 
5717         KSTAT_RUNQ_ENTER(cd);
5718 
5719         SDTRACE(ST_ENTER|SDF_WRITE, cd, fba_len, fba_pos, flag, 0);
5720 
5721 #if defined(_SD_DEBUG_PATTERN)
5722         check_buf_consistency(handle, "wr");
5723 #endif
5724 
5725         cc_ent = handle->bh_centry;
5726 
5727         while (CENTRY_BLK(cc_ent) != FBA_TO_BLK_NUM(fba_pos))
5728                 cc_ent = cc_ent->cc_chain;
5729 
5730         if (((handle->bh_flag | flag) & _SD_WRTHRU_MASK) ||
5731             (!queue_only && _sd_remote_store(cc_ent, fba_pos, fba_len))) {
5732                 flag |= NSC_WRTHRU;
5733 
5734                 ret = _sd_sync_write(handle, fba_pos, fba_len, flag);
5735                 goto stats_exit;
5736         }
5737 
5738         if (store_only)         /* enqueue in _sd_free_buf() */
5739                 handle->bh_flag |= NSC_QUEUE;
5740         cur_fba_len = fba_len;
5741         st_cblk_off = BLK_FBA_OFF(fba_pos);
5742         st_cblk_len = BLK_FBAS - st_cblk_off;
5743         if ((nsc_size_t)st_cblk_len >= fba_len) {
5744                 end_cblk_len = 0;
5745                 st_cblk_len = (sdbc_cblk_fba_t)fba_len;
5746         } else {
5747                 end_cblk_len = BLK_FBA_OFF(fba_pos + fba_len);
5748         }
5749 
5750         if (CENTRY_DIRTY(cc_ent) && update_dirty(cc_ent, st_cblk_off,
5751             st_cblk_len))
5752                 goto loop1;
5753         if (store_only) {
5754                 SDBC_SET_TOFLUSH(st_cblk_off, st_cblk_len, cc_ent);
5755                 goto loop1;
5756         }
5757         SDBC_SET_DIRTY(st_cblk_off, st_cblk_len, cc_ent);
5758         cur_chain = dirty_next = cc_ent;
5759         num_queued = 1;
5760 
5761 loop1:
5762         DATA_LOG(SDF_WR, cc_ent, st_cblk_off, st_cblk_len);
5763 
5764         DTRACE_PROBE4(_sd_write_data1, uint64_t, (uint64_t)
5765             (BLK_TO_FBA_NUM(CENTRY_BLK(cc_ent)) + st_cblk_off),
5766             int, st_cblk_len, char *,
5767             *(int64_t *)(cc_ent->cc_data + FBA_SIZE(st_cblk_off)),
5768             char *, *(int64_t *)(cc_ent->cc_data +
5769             FBA_SIZE(st_cblk_off+ st_cblk_len) - 8));
5770 
5771         cur_fba_len -= st_cblk_len;
5772         cc_ent = cc_ent->cc_chain;
5773 
5774         while (cur_fba_len > (nsc_size_t)end_cblk_len) {
5775                 if (CENTRY_DIRTY(cc_ent) && update_dirty(cc_ent, 0, BLK_FBAS)) {
5776                         if (cur_chain) {
5777                                 _sd_enqueue_dirty(cd, cur_chain, dirty_next,
5778                                     num_queued);
5779                                 cur_chain = dirty_next = NULL;
5780                         }
5781                         goto loop2;
5782                 }
5783                 if (store_only) {
5784                         SDBC_SET_TOFLUSH(0, BLK_FBAS, cc_ent);
5785                         goto loop2;
5786                 }
5787                 SDBC_SET_DIRTY(0, BLK_FBAS, cc_ent);
5788                 if (dirty_next) {
5789                         dirty_next->cc_dirty_next = cc_ent;
5790                         dirty_next = cc_ent;
5791                         num_queued++;
5792                 } else {
5793                         cur_chain = dirty_next = cc_ent;
5794                         num_queued = 1;
5795                 }
5796         loop2:
5797                 DATA_LOG(SDF_WR, cc_ent, 0, BLK_FBAS);
5798 
5799                 DTRACE_PROBE4(_sd_write_data2, uint64_t,
5800                     (uint64_t)(BLK_TO_FBA_NUM(CENTRY_BLK(cc_ent))),
5801                     int, BLK_FBAS, char *, *(int64_t *)(cc_ent->cc_data),
5802                     char *, *(int64_t *)(cc_ent->cc_data +
5803                     FBA_SIZE(BLK_FBAS) - 8));
5804 
5805                 cc_ent = cc_ent->cc_chain;
5806                 cur_fba_len -= BLK_FBAS;
5807         }
5808 
5809 #if defined(_SD_DEBUG)
5810         if (cur_fba_len != end_cblk_len)
5811                 cmn_err(CE_WARN, "!fba_len %" NSC_SZFMT " end_cblk_len %d in "
5812                     "_sd_write", cur_fba_len, end_cblk_len);
5813 #endif
5814 
5815         if (cur_fba_len) {
5816                 if (CENTRY_DIRTY(cc_ent) && update_dirty(cc_ent, 0,
5817                     end_cblk_len)) {
5818                         if (cur_chain) {
5819                                 _sd_enqueue_dirty(cd, cur_chain, dirty_next,
5820                                     num_queued);
5821                                 cur_chain = dirty_next = NULL;
5822                         }
5823                         goto loop3;
5824                 }
5825                 if (store_only) {
5826                         SDBC_SET_TOFLUSH(0, end_cblk_len, cc_ent);
5827                         goto loop3;
5828                 }
5829                 SDBC_SET_DIRTY(0, end_cblk_len, cc_ent);
5830                 if (dirty_next) {
5831                         dirty_next->cc_dirty_next = cc_ent;
5832                         dirty_next = cc_ent;
5833                         num_queued++;
5834                 } else {
5835                         cur_chain = dirty_next = cc_ent;
5836                         num_queued = 1;
5837                 }
5838         }
5839 loop3:
5840         if (cur_fba_len) {
5841                 DATA_LOG(SDF_WR, cc_ent, 0, end_cblk_len);
5842 
5843                 DTRACE_PROBE4(_sd_write_data3, uint64_t,
5844                     (uint64_t)(BLK_TO_FBA_NUM(CENTRY_BLK(cc_ent))),
5845                     int, end_cblk_len, char *, *(int64_t *)(cc_ent->cc_data),
5846                     char *, *(int64_t *)(cc_ent->cc_data +
5847                     FBA_SIZE(end_cblk_len) - 8));
5848 
5849         }
5850 
5851         if (!store_only && cur_chain) {
5852                 _sd_enqueue_dirty(cd, cur_chain, dirty_next, num_queued);
5853         }
5854 
5855         if (!queue_only) {
5856                 CACHE_FBA_WRITE(cd,  fba_len);
5857                 CACHE_WRITE_HIT;
5858 
5859                 FBA_WRITE_IO_KSTATS(cd, FBA_SIZE(fba_len));
5860         }
5861 
5862         ret = NSC_HIT;
5863 
5864 stats_exit:
5865         SDTRACE(ST_EXIT|SDF_WRITE, cd, fba_len, fba_pos, flag, ret);
5866         KSTAT_RUNQ_EXIT(cd);
5867 out:
5868         return (ret);
5869 }
5870 
5871 
5872 /*
5873  * _sd_queue_write(handle, fba_pos, fba_len): Queues delayed writes for
5874  *                                          flushing
5875  *
5876  * ARGUMENTS:  handle  - handle allocated with NSC_WRBUF
5877  *      fba_pos - starting fba pos from _sd_alloc_buf()
5878  *      fba_len - fba len from _sd_alloc_buf()
5879  *
5880  * USAGE    :  Called if _SD_DELAY_QUEUE is set. Finds all blocks in the
5881  *      handle marked for flushing and queues them to be written in
5882  *      optimized (i.e. sequential) order
5883  */
5884 static void
5885 _sd_queue_write(_sd_buf_handle_t *handle, nsc_off_t fba_pos, nsc_size_t fba_len)
5886 {
5887         nsc_off_t fba_end;
5888         sdbc_cblk_fba_t sblk, len, dirty;
5889         _sd_cctl_t *cc_ent;
5890         nsc_off_t flush_pos;
5891         int flush_pos_valid = 0;
5892         nsc_size_t flush_len = 0;
5893 
5894         cc_ent = handle->bh_centry;
5895         fba_end = fba_pos + fba_len;
5896         fba_pos = BLK_TO_FBA_NUM(CENTRY_BLK(cc_ent)); /* 1st block */
5897         while (fba_pos < fba_end) {
5898                 dirty = cc_ent->cc_toflush;
5899                 cc_ent->cc_toflush = 0;
5900                 /*
5901                  * Full block
5902                  */
5903                 if (_SD_BMAP_ISFULL(dirty)) {
5904                         if (flush_pos_valid == 0) {
5905                                 flush_pos_valid = 1;
5906                                 flush_pos = fba_pos;
5907                         }
5908                         flush_len += BLK_FBAS;
5909                 }
5910                 /*
5911                  * Partial block
5912                  */
5913                 else while (dirty) {
5914                         sblk = SDBC_LOOKUP_STPOS(dirty);
5915                         len  = SDBC_LOOKUP_LEN(dirty);
5916                         SDBC_LOOKUP_MODIFY(dirty);
5917 
5918                         if (sblk && flush_pos_valid) {
5919                                 (void) _sd_write(handle, flush_pos, flush_len,
5920                                     NSC_QUEUE);
5921                                 flush_pos_valid = 0;
5922                                 flush_len = 0;
5923                         }
5924                         if (flush_pos_valid == 0) {
5925                                 flush_pos_valid = 1;
5926                                 flush_pos = fba_pos + sblk;
5927                         }
5928                         flush_len += len;
5929                 }
5930                 fba_pos += BLK_FBAS;
5931                 cc_ent = cc_ent->cc_chain;
5932                 /*
5933                  * If we find a gap, write out what we've got
5934                  */
5935                 if (flush_pos_valid && (flush_pos + flush_len) != fba_pos) {
5936                         (void) _sd_write(handle, flush_pos, flush_len,
5937                             NSC_QUEUE);
5938                         flush_pos_valid = 0;
5939                         flush_len = 0;
5940                 }
5941         }
5942         if (flush_pos_valid)
5943                 (void) _sd_write(handle, flush_pos, flush_len, NSC_QUEUE);
5944 }
5945 
5946 
5947 static int
5948 _sd_remote_store(_sd_cctl_t *cc_ent, nsc_off_t fba_pos, nsc_size_t fba_len)
5949 {
5950         sdbc_cblk_fba_t st_cblk_len;    /* FBA len of starting cache block */
5951         sdbc_cblk_fba_t end_cblk_len;   /* FBA len of ending cache block */
5952         sdbc_cblk_fba_t st_cblk_off;    /* FBA offset into starting cblock */
5953         ss_resource_t *ss_res;
5954 
5955         if (_sd_nodes_configured <= 2 && _sd_is_mirror_down())
5956                 return (0);
5957         st_cblk_off = BLK_FBA_OFF(fba_pos);
5958         st_cblk_len = BLK_FBAS - st_cblk_off;
5959         if ((nsc_size_t)st_cblk_len >= fba_len) {
5960                 end_cblk_len = 0;
5961                 st_cblk_len = (sdbc_cblk_fba_t)fba_len;
5962         } else {
5963                 end_cblk_len = BLK_FBA_OFF(fba_pos + fba_len);
5964         }
5965 
5966         fba_len -= st_cblk_len;
5967 
5968         ss_res = cc_ent->cc_write->sc_res;
5969         if (SSOP_WRITE_CBLOCK(sdbc_safestore, ss_res,
5970             cc_ent->cc_data + FBA_SIZE(st_cblk_off), FBA_SIZE(st_cblk_len),
5971             FBA_SIZE(st_cblk_off))) {
5972 
5973                 cmn_err(CE_WARN,
5974                     "!sdbc(_sd_write) safe store failed. Going synchronous");
5975                 SDTRACE(SDF_REFLECT, CENTRY_CD(cc_ent), fba_len,
5976                     fba_pos, 0, -1);
5977                 return (-1);
5978         }
5979 
5980         cc_ent = cc_ent->cc_chain;
5981         while (fba_len > (nsc_size_t)end_cblk_len) {
5982                 fba_len -= BLK_FBAS;
5983 
5984                 if (SSOP_WRITE_CBLOCK(sdbc_safestore, ss_res, cc_ent->cc_data,
5985                     CACHE_BLOCK_SIZE, 0)) {
5986 
5987                         cmn_err(CE_WARN, "!sdbc(_sd_write) safe store failed. "
5988                             "Going synchronous");
5989                         SDTRACE(SDF_REFLECT, CENTRY_CD(cc_ent), fba_len,
5990                             fba_pos, 0, -1);
5991                         return (-1);
5992                 }
5993 
5994                 cc_ent = cc_ent->cc_chain;
5995         } /* end while */
5996 
5997         if (fba_len) {
5998                 if (SSOP_WRITE_CBLOCK(sdbc_safestore, ss_res,
5999                     cc_ent->cc_data, FBA_SIZE(end_cblk_len), 0)) {
6000 
6001                         cmn_err(CE_WARN, "!sdbc(_sd_write) nvmem dma failed. "
6002                             "Going synchronous");
6003                         SDTRACE(SDF_REFLECT, CENTRY_CD(cc_ent), fba_len,
6004                             fba_pos, 0, -1);
6005                         return (-1);
6006                 }
6007         }
6008         return (0);
6009 }
6010 
6011 
6012 /*
6013  * _sd_sync_write2 - Write-through function.
6014  *
6015  * ARGUMENTS:
6016  *      wr_handle - handle into which to write the data.
6017  *      wr_st_pos - starting FBA position in wr_handle.
6018  *      fba_len   - length in fbas.
6019  *      flag    - NSC_NOBLOCK for async io.
6020  *      rd_handle - handle from which to read the data, or NULL.
6021  *      rd_st_pos - starting FBA position in rd_handle.
6022  *
6023  * RETURNS:
6024  *      errno if return > 0
6025  *      NSC_DONE or NSC_PENDING otherwise.
6026  *
6027  * Comments:
6028  *      This routine initiates io of the indicated portion. It returns
6029  *      synchronously after io is completed if NSC_NOBLOCK is not set.
6030  *      Else NSC_PENDING is returned with a subsequent write callback on
6031  *      io completion.
6032  *
6033  *      See _sd_copy_direct() for usage when
6034  *          (wr_handle != rd_handle && rd_handle != NULL)
6035  */
6036 
6037 static int
6038 _sd_sync_write2(_sd_buf_handle_t *wr_handle, nsc_off_t wr_st_pos,
6039     nsc_size_t fba_len, int flag, _sd_buf_handle_t *rd_handle,
6040     nsc_off_t rd_st_pos)
6041 {
6042         void (*fn)(blind_t, nsc_off_t, nsc_size_t, int);
6043         _sd_cctl_t *wr_ent, *rd_ent;
6044         nsc_size_t this_len;
6045         nsc_off_t rd_pos, wr_pos;
6046         nsc_size_t log_bytes;
6047         int cd = HANDLE_CD(wr_handle);
6048         int err;
6049         uint_t dirty;
6050         struct buf *bp;
6051 
6052         LINTUSED(flag);
6053 
6054         _SD_DISCONNECT_CALLBACK(wr_handle);
6055 
6056         if (rd_handle == NULL) {
6057                 rd_handle = wr_handle;
6058                 rd_st_pos = wr_st_pos;
6059         }
6060 
6061         wr_ent = wr_handle->bh_centry;
6062         while (CENTRY_BLK(wr_ent) != FBA_TO_BLK_NUM(wr_st_pos))
6063                 wr_ent = wr_ent->cc_chain;
6064 
6065         rd_ent = rd_handle->bh_centry;
6066         while (CENTRY_BLK(rd_ent) != FBA_TO_BLK_NUM(rd_st_pos))
6067                 rd_ent = rd_ent->cc_chain;
6068 
6069         bp = sd_alloc_iob(_sd_cache_files[cd].cd_crdev,
6070             wr_st_pos, FBA_TO_BLK_LEN(fba_len) + 2, B_WRITE);
6071 
6072         if (bp == NULL)
6073                 return (E2BIG);
6074 
6075         wr_pos = BLK_FBA_OFF(wr_st_pos);
6076         rd_pos = BLK_FBA_OFF(rd_st_pos);
6077         log_bytes = 0;
6078 
6079         do {
6080                 this_len = min((BLK_FBAS - rd_pos), (BLK_FBAS - wr_pos));
6081 
6082                 if (this_len > fba_len)
6083                         this_len = fba_len;
6084 
6085                 /*
6086                  * clear dirty bits in the write handle.
6087                  */
6088 
6089                 if (CENTRY_DIRTY(wr_ent)) {
6090                         mutex_enter(&wr_ent->cc_lock);
6091 
6092                         if (CENTRY_DIRTY(wr_ent)) {
6093                                 if (this_len == (nsc_size_t)BLK_FBAS ||
6094                                     rd_handle != wr_handle) {
6095                                         /*
6096                                          * optimization for when we have a
6097                                          * full cache block, or are doing
6098                                          * copy_direct (see below).
6099                                          */
6100 
6101                                         wr_ent->cc_write->sc_dirty = 0;
6102                                 } else {
6103                                         dirty = wr_ent->cc_write->sc_dirty;
6104                                         dirty &= ~(SDBC_GET_BITS(
6105                                             wr_pos, this_len));
6106                                         wr_ent->cc_write->sc_dirty = dirty;
6107                                 }
6108 
6109                                 SSOP_SETCENTRY(sdbc_safestore,
6110                                     wr_ent->cc_write);
6111                         }
6112 
6113                         mutex_exit(&wr_ent->cc_lock);
6114                 }
6115 
6116                 /*
6117                  * update valid bits in the write handle.
6118                  */
6119 
6120                 if (rd_handle == wr_handle) {
6121                         if (this_len == (nsc_size_t)BLK_FBAS) {
6122                                 SET_FULLY_VALID(wr_ent);
6123                         } else {
6124                                 SDBC_SET_VALID_BITS(wr_pos, this_len, wr_ent);
6125                         }
6126                 } else {
6127                         /*
6128                          * doing copy_direct, so mark the write handle
6129                          * as invalid since the data is on disk, but not
6130                          * in cache.
6131                          */
6132                         wr_ent->cc_valid = 0;
6133                 }
6134 
6135                 DATA_LOG(SDF_WRSYNC, rd_ent, rd_pos, this_len);
6136 
6137                 DTRACE_PROBE4(_sd_sync_write2_data, uint64_t,
6138                     (uint64_t)BLK_TO_FBA_NUM(CENTRY_BLK(rd_ent)) + rd_pos,
6139                     uint64_t, (uint64_t)this_len, char *,
6140                     *(int64_t *)(rd_ent->cc_data + FBA_SIZE(rd_pos)),
6141                     char *, *(int64_t *)(rd_ent->cc_data +
6142                     FBA_SIZE(rd_pos + this_len) - 8));
6143 
6144                 sd_add_fba(bp, &rd_ent->cc_addr, rd_pos, this_len);
6145 
6146                 log_bytes += FBA_SIZE(this_len);
6147                 fba_len -= this_len;
6148 
6149                 wr_pos += this_len;
6150                 if (wr_pos >= (nsc_size_t)BLK_FBAS) {
6151                         wr_ent = wr_ent->cc_chain;
6152                         wr_pos = 0;
6153                 }
6154 
6155                 rd_pos += this_len;
6156                 if (rd_pos >= (nsc_size_t)BLK_FBAS) {
6157                         rd_ent = rd_ent->cc_chain;
6158                         rd_pos = 0;
6159                 }
6160 
6161         } while (fba_len > 0);
6162 
6163         DISK_FBA_WRITE(cd, FBA_NUM(log_bytes));
6164         CACHE_WRITE_MISS;
6165 
6166         FBA_WRITE_IO_KSTATS(cd, log_bytes);
6167 
6168         fn = (wr_handle->bh_flag & NSC_NOBLOCK) ? _sd_async_write_ea : NULL;
6169 
6170         err = sd_start_io(bp, _sd_cache_files[cd].cd_strategy, fn, wr_handle);
6171 
6172         if (err != NSC_PENDING) {
6173                 DATA_LOG_CHAIN(SDF_WRSYEA, wr_handle->bh_centry,
6174                     wr_st_pos, FBA_NUM(log_bytes));
6175         }
6176 
6177         return (err);
6178 }
6179 
6180 
6181 static int
6182 _sd_sync_write(_sd_buf_handle_t *handle, nsc_off_t fba_pos, nsc_size_t fba_len,
6183     int flag)
6184 {
6185         return (_sd_sync_write2(handle, fba_pos, fba_len, flag, NULL, 0));
6186 }
6187 
6188 
6189 /*
6190  * _sd_zero - Interface call to zero out a portion of cache blocks.
6191  *
6192  * ARGUMENTS:
6193  *      handle  - handle allocated earlier on.
6194  *      fba_pos - disk block number to zero from.
6195  *      fba_len - length in fbas.
6196  *      flag    - NSC_NOBLOCK for async io.
6197  *
6198  * RETURNS:
6199  *      errno if return > 0
6200  *      NSC_DONE or NSC_PENDING otherwise.
6201  *
6202  * Comments:
6203  *      This routine zeroes out the indicated portion of the cache blocks
6204  *      and commits the data to disk.
6205  *      (See write for more details on the commit)
6206  */
6207 
6208 
6209 int
6210 _sd_zero(_sd_buf_handle_t *handle, nsc_off_t fba_pos, nsc_size_t fba_len,
6211     int flag)
6212 {
6213         int cd;
6214         sdbc_cblk_fba_t st_cblk_len;    /* FBA len of starting cache block */
6215         sdbc_cblk_fba_t end_cblk_len;   /* FBA len of ending cache block */
6216         sdbc_cblk_fba_t st_cblk_off;    /* FBA offset into starting cblock */
6217         nsc_size_t cur_fba_len; /* position in disk blocks */
6218         int ret;
6219         _sd_cctl_t *cc_ent;
6220 
6221         if (_sdbc_shutdown_in_progress) {
6222                 DTRACE_PROBE(shutdown);
6223                 return (EIO);
6224         }
6225 
6226         if (!_SD_HANDLE_ACTIVE(handle)) {
6227                 cmn_err(CE_WARN, "!sdbc(_sd_zero) handle %p not active",
6228                     (void *)handle);
6229 
6230                 DTRACE_PROBE1(handle_active, int, handle->bh_flag);
6231 
6232                 return (EINVAL);
6233         }
6234         ASSERT_HANDLE_LIMITS(handle, fba_pos, fba_len);
6235         if ((handle->bh_flag & NSC_WRBUF) == 0) {
6236                 DTRACE_PROBE1(handle_write, int, handle->bh_flag);
6237                 return (EINVAL);
6238         }
6239 
6240         if (fba_len == 0) {
6241                 DTRACE_PROBE(zero_len);
6242                 return (NSC_DONE);
6243         }
6244 
6245         if (_SD_FORCE_DISCONNECT(fba_len))
6246                 _SD_DISCONNECT_CALLBACK(handle);
6247 
6248         cd = HANDLE_CD(handle);
6249         SDTRACE(ST_ENTER|SDF_ZERO, cd, fba_len, fba_pos, flag, 0);
6250 
6251         cc_ent = handle->bh_centry;
6252         while (CENTRY_BLK(cc_ent) != FBA_TO_BLK_NUM(fba_pos))
6253                 cc_ent = cc_ent->cc_chain;
6254         cur_fba_len = fba_len;
6255         st_cblk_off = BLK_FBA_OFF(fba_pos);
6256         st_cblk_len = BLK_FBAS - st_cblk_off;
6257         if ((nsc_size_t)st_cblk_len >= fba_len) {
6258                 end_cblk_len = 0;
6259                 st_cblk_len = (sdbc_cblk_fba_t)fba_len;
6260         } else {
6261                 end_cblk_len = BLK_FBA_OFF(fba_pos + fba_len);
6262         }
6263 
6264         cur_fba_len -= st_cblk_len;
6265         bzero(cc_ent->cc_data + FBA_SIZE(st_cblk_off), FBA_SIZE(st_cblk_len));
6266 
6267         cc_ent = cc_ent->cc_chain;
6268         while (cur_fba_len > (nsc_size_t)end_cblk_len) {
6269                 cur_fba_len -= BLK_FBAS;
6270                 bzero(cc_ent->cc_data, CACHE_BLOCK_SIZE);
6271                 cc_ent = cc_ent->cc_chain;
6272         }
6273         if (cur_fba_len) {
6274                 bzero(cc_ent->cc_data, FBA_SIZE(cur_fba_len));
6275         }
6276 
6277         ret = _sd_write(handle, fba_pos, fba_len, flag);
6278         SDTRACE(ST_EXIT|SDF_ZERO, cd, fba_len, fba_pos, flag, ret);
6279 
6280         return (ret);
6281 }
6282 
6283 
6284 /*
6285  * _sd_copy - Copies portions of 2 handles.
6286  *
6287  * ARGUMENTS:
6288  *      handle1  - handle allocated earlier on.
6289  *      handle2  - handle allocated earlier on.
6290  *      fba_pos1 - disk block number to read from.
6291  *      fba_pos2 - disk block number to write to.
6292  *      fba_len - length in fbas.
6293  *
6294  * RETURNS:
6295  *      errno if return > 0
6296  *      NSC_DONE otherwise.
6297  *
6298  * Comments:
6299  *      This routine copies the 2 handles.
6300  *      WARNING: this could put the cache blocks in the destination handle
6301  *      in an inconsistent state. (the blocks could be valid in cache,
6302  *      but the copy makes the cache different from disk)
6303  *
6304  */
6305 
6306 
6307 int
6308 _sd_copy(_sd_buf_handle_t *handle1, _sd_buf_handle_t *handle2,
6309     nsc_off_t fba_pos1, nsc_off_t fba_pos2, nsc_size_t fba_len)
6310 {
6311         sdbc_cblk_fba_t st_cblk_len;    /* FBA len of starting cache block */
6312         sdbc_cblk_fba_t end_cblk_len;   /* FBA len of ending cache block */
6313         sdbc_cblk_fba_t st_cblk_off;    /* FBA offset into starting cblock */
6314         nsc_off_t off1, off2;   /* offsets in FBA's into the disk */
6315         nsc_size_t cur_fba_len; /* position in disk blocks */
6316         _sd_cctl_t *cc_ent1, *cc_ent2;
6317 
6318         if (_sdbc_shutdown_in_progress) {
6319                 DTRACE_PROBE(shutdown);
6320                 return (EIO);
6321         }
6322         if (!_SD_HANDLE_ACTIVE(handle1) || !_SD_HANDLE_ACTIVE(handle2)) {
6323                 cmn_err(CE_WARN, "!sdbc(_sd_copy) handle %p or %p not active",
6324                     (void *)handle1, (void *)handle2);
6325 
6326                 DTRACE_PROBE2(handle_active1, int, handle1->bh_flag,
6327                     int, handle2->bh_flag);
6328 
6329                 return (EINVAL);
6330         }
6331         ASSERT_HANDLE_LIMITS(handle1, fba_pos1, fba_len);
6332         ASSERT_HANDLE_LIMITS(handle2, fba_pos2, fba_len);
6333 
6334         cc_ent1 = handle1->bh_centry;
6335         while (CENTRY_BLK(cc_ent1) != FBA_TO_BLK_NUM(fba_pos1))
6336                 cc_ent1 = cc_ent1->cc_chain;
6337 
6338         cc_ent2 = handle2->bh_centry;
6339         while (CENTRY_BLK(cc_ent2) != FBA_TO_BLK_NUM(fba_pos2))
6340                 cc_ent2 = cc_ent2->cc_chain;
6341 
6342         if (BLK_FBA_OFF(fba_pos1) != BLK_FBA_OFF(fba_pos2)) {
6343                 /* Different offsets, do it slowly (per fba) */
6344 
6345                 while (fba_len) {
6346                         off1 = FBA_SIZE(BLK_FBA_OFF(fba_pos1));
6347                         off2 = FBA_SIZE(BLK_FBA_OFF(fba_pos2));
6348 
6349                         bcopy(cc_ent1->cc_data+off1, cc_ent2->cc_data+off2,
6350                             FBA_SIZE(1));
6351 
6352                         fba_pos1++;
6353                         fba_pos2++;
6354                         fba_len--;
6355 
6356                         if (FBA_TO_BLK_NUM(fba_pos1) != CENTRY_BLK(cc_ent1))
6357                                 cc_ent1 = cc_ent1->cc_chain;
6358                         if (FBA_TO_BLK_NUM(fba_pos2) != CENTRY_BLK(cc_ent2))
6359                                 cc_ent2 = cc_ent2->cc_chain;
6360                 }
6361 
6362                 DTRACE_PROBE(_sd_copy_end);
6363                 return (NSC_DONE);
6364         }
6365         cur_fba_len = fba_len;
6366         st_cblk_off = BLK_FBA_OFF(fba_pos1);
6367         st_cblk_len = BLK_FBAS - st_cblk_off;
6368         if ((nsc_size_t)st_cblk_len >= fba_len) {
6369                 end_cblk_len = 0;
6370                 st_cblk_len = (sdbc_cblk_fba_t)fba_len;
6371         } else {
6372                 end_cblk_len = BLK_FBA_OFF(fba_pos1 + fba_len);
6373         }
6374 
6375         bcopy(cc_ent1->cc_data + FBA_SIZE(st_cblk_off),
6376             cc_ent2->cc_data + FBA_SIZE(st_cblk_off), FBA_SIZE(st_cblk_len));
6377         cur_fba_len -= st_cblk_len;
6378         cc_ent1 = cc_ent1->cc_chain;
6379         cc_ent2 = cc_ent2->cc_chain;
6380 
6381         while (cur_fba_len > (nsc_size_t)end_cblk_len) {
6382                 bcopy(cc_ent1->cc_data, cc_ent2->cc_data, CACHE_BLOCK_SIZE);
6383                 cc_ent1 = cc_ent1->cc_chain;
6384                 cc_ent2 = cc_ent2->cc_chain;
6385                 cur_fba_len -= BLK_FBAS;
6386         }
6387         if (cur_fba_len) {
6388                 bcopy(cc_ent1->cc_data, cc_ent2->cc_data,
6389                     FBA_SIZE(end_cblk_len));
6390         }
6391 
6392         return (NSC_DONE);
6393 }
6394 
6395 
6396 /*
6397  * _sd_copy_direct - Copies data from one handle direct to another disk.
6398  *
6399  * ARGUMENTS:
6400  *      handle1  - handle to read from
6401  *      handle2  - handle to write to
6402  *      fba_pos1 - disk block number to read from.
6403  *      fba_pos2 - disk block number to write to.
6404  *      fba_len - length in fbas.
6405  *
6406  * RETURNS:
6407  *      errno if return > 0
6408  *      NSC_DONE otherwise.
6409  *
6410  * Comments:
6411  *      This routine copies data from handle1 directly (sync write)
6412  *      onto the disk pointed to by handle2. The handle2 is then
6413  *      invalidated since the data it contains is now stale compared to
6414  *      the disk.
6415  */
6416 
6417 static int
6418 _sd_copy_direct(_sd_buf_handle_t *handle1, _sd_buf_handle_t *handle2,
6419     nsc_off_t fba_pos1, nsc_off_t fba_pos2, nsc_size_t fba_len)
6420 {
6421         int rc;
6422 
6423         if (_sdbc_shutdown_in_progress) {
6424                 DTRACE_PROBE(shutdown);
6425                 return (EIO);
6426         }
6427 
6428         if (!_SD_HANDLE_ACTIVE(handle1) || !_SD_HANDLE_ACTIVE(handle2)) {
6429                 cmn_err(CE_WARN,
6430                     "!sdbc(_sd_copy_direct) handle %p or %p not active",
6431                     (void *)handle1, (void *)handle2);
6432 
6433                 DTRACE_PROBE2(handle_active2, int, handle1->bh_flag,
6434                     int, handle2->bh_flag);
6435 
6436                 return (EINVAL);
6437         }
6438 
6439         ASSERT_HANDLE_LIMITS(handle1, fba_pos1, fba_len);
6440         ASSERT_HANDLE_LIMITS(handle2, fba_pos2, fba_len);
6441 
6442         if ((handle2->bh_flag & NSC_WRITE) == 0) {
6443                 cmn_err(CE_WARN,
6444                     "!sdbc(_sd_copy_direct) handle2 %p is not writeable",
6445                     (void *)handle2);
6446                 DTRACE_PROBE1(handle2_write, int, handle2->bh_flag);
6447                 return (EINVAL);
6448         }
6449 
6450         rc = _sd_sync_write2(handle2, fba_pos2, fba_len, 0, handle1, fba_pos1);
6451 
6452         return (rc);
6453 }
6454 
6455 
6456 /*
6457  * _sd_enqueue_dirty - Enqueue a list of dirty buffers.
6458  *
6459  * ARGUMENTS:
6460  *      cd      - cache descriptor.
6461  *      chain   - pointer to list.
6462  *      cc_last - last entry in the chain.
6463  *      numq    - number of entries in the list.
6464  *
6465  * RETURNS:
6466  *      NONE.
6467  *
6468  * Comments:
6469  *      This routine queues up the dirty blocks for io processing.
6470  *      It uses the cc_last to try to coalesce multiple lists into a
6471  *      single list, if consecutive writes are sequential in nature.
6472  */
6473 
6474 void
6475 _sd_enqueue_dirty(int cd, _sd_cctl_t *chain, _sd_cctl_t *cc_last, int numq)
6476 {
6477         _sd_cd_info_t *cdi;
6478         _sd_cctl_t *last_ent;
6479         int start_write = 0, maxq = SGIO_MAX;
6480 
6481         ASSERT(cd >= 0);
6482         cdi = &(_sd_cache_files[cd]);
6483 #if defined(_SD_DEBUG)
6484         if (chain->cc_dirty_link)
6485                 cmn_err(CE_WARN, "!dirty_link set in enq %x fl %x",
6486                     chain->cc_dirty_link, chain->cc_flag);
6487 #endif
6488 
6489         /* was FAST */
6490         mutex_enter(&(cdi->cd_lock));
6491         cdi->cd_info->sh_numdirty += numq;
6492         if (cc_last == NULL)
6493                 numq = 0;
6494 
6495         if (cdi->cd_dirty_head == NULL)  {
6496                 cdi->cd_dirty_head = cdi->cd_dirty_tail = chain;
6497                 cdi->cd_last_ent = cc_last;
6498                 cdi->cd_lastchain_ptr = chain;
6499                 cdi->cd_lastchain = numq;
6500         } else {
6501                 if ((cc_last) && (last_ent = cdi->cd_last_ent) &&
6502                     (CENTRY_BLK(chain) == (CENTRY_BLK(last_ent)+1)) &&
6503                     (SDBC_DIRTY_NEIGHBORS(last_ent, chain)) &&
6504                     (cdi->cd_lastchain + numq < maxq)) {
6505                         cdi->cd_last_ent->cc_dirty_next = chain;
6506                         cdi->cd_last_ent = cc_last;
6507                         cdi->cd_lastchain += numq;
6508                 } else {
6509                         cdi->cd_dirty_tail->cc_dirty_link = chain;
6510                         cdi->cd_dirty_tail = chain;
6511                         cdi->cd_last_ent = cc_last;
6512                         cdi->cd_lastchain_ptr = chain;
6513                         cdi->cd_lastchain = numq;
6514                         start_write = 1;
6515                 }
6516         }
6517         /* was FAST */
6518         mutex_exit(&(cdi->cd_lock));
6519         if (start_write)
6520                 (void) _SD_CD_WRITER(cd);
6521 }
6522 
6523 /*
6524  * _sd_enqueue_dirty_chain  - Enqueue a chain of a list of dirty buffers.
6525  *
6526  * ARGUMENTS:
6527  *      cd      - cache descriptor.
6528  *      chain_first     - first list in  this chain.
6529  *      chain_last      - last list in this chain.
6530  *      numq    - number of entries being queue (total of all lists)
6531  *
6532  * RETURNS:
6533  *      NONE.
6534  *
6535  * Comments:
6536  *      This routine is called from the processing after io completions.
6537  *      If the buffers are still dirty, they are queued up in one shot.
6538  */
6539 
6540 void
6541 _sd_enqueue_dirty_chain(int cd,
6542                         _sd_cctl_t *chain_first,
6543                         _sd_cctl_t *chain_last,
6544                         int numq)
6545 {
6546         _sd_cd_info_t *cdi;
6547 
6548         ASSERT(cd >= 0);
6549         cdi = &(_sd_cache_files[cd]);
6550         if (chain_last->cc_dirty_link)
6551                 cmn_err(CE_PANIC,
6552                     "!_sd_enqueue_dirty_chain: chain_last %p dirty_link %p",
6553                     (void *)chain_last, (void *)chain_last->cc_dirty_link);
6554         /* was FAST */
6555         mutex_enter(&(cdi->cd_lock));
6556         cdi->cd_last_ent = NULL;
6557         cdi->cd_lastchain_ptr = NULL;
6558         cdi->cd_lastchain = 0;
6559 
6560         cdi->cd_info->sh_numdirty += numq;
6561         if (cdi->cd_dirty_head == NULL)  {
6562                 cdi->cd_dirty_head = chain_first;
6563                 cdi->cd_dirty_tail = chain_last;
6564         } else {
6565                 cdi->cd_dirty_tail->cc_dirty_link = chain_first;
6566                 cdi->cd_dirty_tail = chain_last;
6567         }
6568         /* was FAST */
6569         mutex_exit(&(cdi->cd_lock));
6570 }
6571 
6572 
6573 #ifndef _MULTI_DATAMODEL
6574 /* ARGSUSED */
6575 #endif
6576 static int
6577 convert_stats(_sd_stats32_t *uptr)
6578 /*
6579  *      Convert the 64 bit statistic structure to 32bit version.
6580  *      Possibly losing information when cache is > 4gb. Ha!
6581  *
6582  *      NOTE: this code isn't really MT ready since the copied to struct
6583  *      is static. However the race is pretty benign and isn't a whole
6584  *      lot worse than the vanilla version which copies data to user
6585  *      space from kernel structures that can be changing under it too.
6586  *      We can't use a local stack structure since the data size is
6587  *      70k or so and kernel stacks are tiny (8k).
6588  */
6589 {
6590 #ifndef _MULTI_DATAMODEL
6591         return (SDBC_EMODELCONVERT);
6592 #else
6593         int rc = 0;
6594 
6595         /*
6596          * This could be done in less code with bcopy type operations
6597          * but this is simpler to follow and easier to change if
6598          * the structures change.
6599          */
6600 
6601         _sd_cache_stats32->net_dirty = _sd_cache_stats->net_dirty;
6602         _sd_cache_stats32->net_pending = _sd_cache_stats->net_pending;
6603         _sd_cache_stats32->net_free = _sd_cache_stats->net_free;
6604         _sd_cache_stats32->st_count = _sd_cache_stats->st_count;
6605         _sd_cache_stats32->st_loc_count = _sd_cache_stats->st_loc_count;
6606         _sd_cache_stats32->st_rdhits = _sd_cache_stats->st_rdhits;
6607         _sd_cache_stats32->st_rdmiss = _sd_cache_stats->st_rdmiss;
6608         _sd_cache_stats32->st_wrhits = _sd_cache_stats->st_wrhits;
6609         _sd_cache_stats32->st_wrmiss = _sd_cache_stats->st_wrmiss;
6610         _sd_cache_stats32->st_blksize = _sd_cache_stats->st_blksize;
6611 
6612         _sd_cache_stats32->st_lru_blocks = _sd_cache_stats->st_lru_blocks;
6613         _sd_cache_stats32->st_lru_noreq = _sd_cache_stats->st_lru_noreq;
6614         _sd_cache_stats32->st_lru_req = _sd_cache_stats->st_lru_req;
6615 
6616         _sd_cache_stats32->st_wlru_inq = _sd_cache_stats->st_wlru_inq;
6617 
6618         _sd_cache_stats32->st_cachesize = _sd_cache_stats->st_cachesize;
6619         _sd_cache_stats32->st_numblocks = _sd_cache_stats->st_numblocks;
6620         _sd_cache_stats32->st_wrcancelns = _sd_cache_stats->st_wrcancelns;
6621         _sd_cache_stats32->st_destaged = _sd_cache_stats->st_destaged;
6622 
6623         /*
6624          * bcopy the shared stats which has nothing that needs conversion
6625          * in them
6626          */
6627 
6628         bcopy(_sd_cache_stats->st_shared, _sd_cache_stats32->st_shared,
6629             sizeof (_sd_shared_t) * sdbc_max_devs);
6630 
6631         if (copyout(_sd_cache_stats32, uptr, sizeof (_sd_stats32_t) +
6632             (sdbc_max_devs - 1) * sizeof (_sd_shared_t)))
6633                 rc = EFAULT;
6634 
6635         return (rc);
6636 #endif /* _MULTI_DATAMODEL */
6637 }
6638 
6639 
6640 int
6641 _sd_get_stats(_sd_stats_t *uptr, int convert_32)
6642 {
6643         int rc = 0;
6644 
6645         if (_sd_cache_stats == NULL) {
6646                 static _sd_stats_t dummy;
6647 #ifdef _MULTI_DATAMODEL
6648                 static _sd_stats32_t dummy32;
6649 #endif
6650 
6651                 if (convert_32) {
6652 #ifdef _MULTI_DATAMODEL
6653                         if (copyout(&dummy32, uptr, sizeof (_sd_stats32_t)))
6654                                 rc = EFAULT;
6655 #else
6656                         rc = SDBC_EMODELCONVERT;
6657 #endif
6658                 } else if (copyout(&dummy, uptr, sizeof (_sd_stats_t)))
6659                         rc = EFAULT;
6660                 return (rc);
6661         }
6662 
6663         _sd_cache_stats->st_lru_blocks = _sd_lru_q.sq_inq;
6664         _sd_cache_stats->st_lru_noreq  = _sd_lru_q.sq_noreq_stat;
6665         _sd_cache_stats->st_lru_req    = _sd_lru_q.sq_req_stat;
6666 
6667         if (sdbc_safestore) {
6668                 ssioc_stats_t ss_stats;
6669 
6670                 if (SSOP_CTL(sdbc_safestore, SSIOC_STATS,
6671                     (uintptr_t)&ss_stats) == 0)
6672                         _sd_cache_stats->st_wlru_inq = ss_stats.wq_inq;
6673                 else
6674                         _sd_cache_stats->st_wlru_inq = 0;
6675         }
6676 
6677         if (convert_32)
6678                 rc = convert_stats((_sd_stats32_t *)uptr);
6679         else if (copyout(_sd_cache_stats, uptr,
6680             sizeof (_sd_stats_t) + (sdbc_max_devs - 1) * sizeof (_sd_shared_t)))
6681                 rc = EFAULT;
6682 
6683         return (rc);
6684 }
6685 
6686 
6687 int
6688 _sd_set_hint(int cd, uint_t hint)
6689 {
6690         int ret = 0;
6691         if (FILE_OPENED(cd))  {
6692                 SDTRACE(ST_ENTER|SDF_HINT, cd, 1, SDT_INV_BL, hint, 0);
6693                 _sd_cache_files[cd].cd_hint |= (hint & _SD_HINT_MASK);
6694                 SDTRACE(ST_EXIT|SDF_HINT, cd, 1, SDT_INV_BL, hint, ret);
6695         } else
6696                 ret = EINVAL;
6697 
6698         return (ret);
6699 }
6700 
6701 
6702 
6703 int
6704 _sd_clear_hint(int cd, uint_t hint)
6705 {
6706         int ret = 0;
6707         if (FILE_OPENED(cd)) {
6708                 SDTRACE(ST_ENTER|SDF_HINT, cd, 2, SDT_INV_BL, hint, 0);
6709                 _sd_cache_files[cd].cd_hint &= ~(hint & _SD_HINT_MASK);
6710                 SDTRACE(ST_EXIT|SDF_HINT, cd, 2, SDT_INV_BL, hint, ret);
6711         } else
6712                 ret = EINVAL;
6713 
6714         return (ret);
6715 }
6716 
6717 
6718 int
6719 _sd_get_cd_hint(int cd, uint_t *hint)
6720 {
6721         *hint = 0;
6722         if (FILE_OPENED(cd)) {
6723                 *hint = _sd_cache_files[cd].cd_hint;
6724                 return (0);
6725         } else
6726                 return (EINVAL);
6727 }
6728 
6729 static int
6730 _sd_node_hint_caller(blind_t hint, int  hint_action)
6731 {
6732         int rc;
6733 
6734         switch (hint_action) {
6735                 case NSC_GET_NODE_HINT:
6736                         rc = _sd_get_node_hint((uint_t *)hint);
6737                 break;
6738                 case NSC_SET_NODE_HINT:
6739                         rc = _sd_set_node_hint((uint_t)(unsigned long)hint);
6740                 break;
6741                 case NSC_CLEAR_NODE_HINT:
6742                         rc = _sd_clear_node_hint((uint_t)(unsigned long)hint);
6743                 break;
6744                 default:
6745                         rc = EINVAL;
6746                 break;
6747         }
6748 
6749         return (rc);
6750 }
6751 
6752 int
6753 _sd_set_node_hint(uint_t hint)
6754 {
6755         SDTRACE(ST_ENTER|SDF_HINT, SDT_INV_CD, 3, SDT_INV_BL, hint, 0);
6756         if ((_sd_node_hint & NSC_NO_FORCED_WRTHRU) &&
6757             (hint & NSC_FORCED_WRTHRU))
6758                 return (EINVAL);
6759         _sd_node_hint |= (hint & _SD_HINT_MASK);
6760         SDTRACE(ST_EXIT|SDF_HINT, SDT_INV_CD, 3, SDT_INV_BL,  hint, 0);
6761         return (0);
6762 }
6763 
6764 
6765 int
6766 _sd_clear_node_hint(uint_t hint)
6767 {
6768         SDTRACE(ST_ENTER|SDF_HINT, SDT_INV_CD, 4, SDT_INV_BL, hint, 0);
6769         _sd_node_hint &= ~(hint & _SD_HINT_MASK);
6770         SDTRACE(ST_EXIT|SDF_HINT, SDT_INV_CD, 4, SDT_INV_BL, hint, 0);
6771         return (0);
6772 }
6773 
6774 
6775 int
6776 _sd_get_node_hint(uint_t *hint)
6777 {
6778         *hint = _sd_node_hint;
6779         return (0);
6780 }
6781 
6782 
6783 int
6784 _sd_get_partsize(blind_t xcd, nsc_size_t *ptr)
6785 {
6786         int cd = (int)(unsigned long)xcd;
6787 
6788         if (FILE_OPENED(cd)) {
6789                 *ptr = _sd_cache_files[cd].cd_info->sh_filesize;
6790                 return (0);
6791         } else
6792                 return (EINVAL);
6793 }
6794 
6795 
6796 int
6797 _sd_get_maxfbas(blind_t xcd, int flag, nsc_size_t *ptr)
6798 {
6799         int cd = (int)(unsigned long)xcd;
6800 
6801         if (!FILE_OPENED(cd))
6802                 return (EINVAL);
6803 
6804         if (flag & NSC_CACHEBLK)
6805                 *ptr = BLK_FBAS;
6806         else
6807                 *ptr = sdbc_max_fbas;
6808 
6809         return (0);
6810 }
6811 
6812 
6813 int
6814 _sd_control(blind_t xcd, int cmd, void *ptr, int len)
6815 {
6816         _sd_cd_info_t *cdi;
6817         int cd = (int)(unsigned long)xcd;
6818 
6819         cdi = &(_sd_cache_files[cd]);
6820         return (nsc_control(cdi->cd_rawfd, cmd, ptr, len));
6821 }
6822 
6823 
6824 int
6825 _sd_discard_pinned(blind_t xcd, nsc_off_t fba_pos, nsc_size_t fba_len)
6826 {
6827         int cd = (int)(unsigned long)xcd;
6828         _sd_cctl_t *cc_ent, **cc_lst, **cc_tmp, *nxt;
6829         ss_centry_info_t *wctl;
6830         int found = 0;
6831         nsc_off_t cblk;
6832         _sd_cd_info_t *cdi = &_sd_cache_files[cd];
6833         int rc;
6834 
6835         if ((!FILE_OPENED(cd)) || (!cdi->cd_info->sh_failed)) {
6836 
6837                 return (EINVAL);
6838         }
6839 
6840         for (cblk = FBA_TO_BLK_NUM(fba_pos);
6841             cblk < FBA_TO_BLK_LEN(fba_pos + fba_len); cblk++) {
6842                 if (cc_ent =
6843                     (_sd_cctl_t *)_sd_hash_search(cd, cblk, _sd_htable)) {
6844                         if (!CENTRY_PINNED(cc_ent))
6845                                 continue;
6846 
6847                         /*
6848                          * remove cc_ent from failed links
6849                          * cc_lst - pointer to "cc_dirty_link" pointer
6850                          *          starts at &cd_failed_head.
6851                          * cc_tmp - pointer to "cc_dirty_next"
6852                          *          except when equal to cc_lst.
6853                          */
6854                         mutex_enter(&cdi->cd_lock);
6855                         cc_tmp = cc_lst = &(cdi->cd_fail_head);
6856                         while (*cc_tmp != cc_ent) {
6857                                 cc_tmp = &((*cc_tmp)->cc_dirty_next);
6858                                 if (!*cc_tmp)
6859                                         cc_lst = &((*cc_lst)->cc_dirty_link),
6860                                             cc_tmp = cc_lst;
6861                         }
6862                         if (*cc_tmp) {
6863                                 found++;
6864                                 if (cc_lst != cc_tmp) /* break chain */
6865                                         *cc_tmp = NULL;
6866                                 nxt = cc_ent->cc_dirty_next;
6867                                 if (nxt) {
6868                                         nxt->cc_dirty_link =
6869                                             (*cc_lst)->cc_dirty_link;
6870                                         *cc_lst = nxt;
6871                                 } else {
6872                                         *cc_lst = (*cc_lst)->cc_dirty_link;
6873                                 }
6874                                 cdi->cd_info->sh_numfail--;
6875                                 nsc_unpinned_data(cdi->cd_iodev,
6876                                     BLK_TO_FBA_NUM(CENTRY_BLK(cc_ent)),
6877                                     BLK_FBAS);
6878                         }
6879                         mutex_exit(&cdi->cd_lock);
6880 
6881                         /* clear dirty bits */
6882                         /* was FAST */
6883                         mutex_enter(&cc_ent->cc_lock);
6884                         cc_ent->cc_valid = cc_ent->cc_dirty = 0;
6885                         cc_ent->cc_flag &= ~(CC_QHEAD|CC_PEND_DIRTY|CC_PINNED);
6886                         cc_ent->cc_dirty_link = NULL;
6887                         wctl = cc_ent->cc_write;
6888                         cc_ent->cc_write = NULL;
6889                         /* was FAST */
6890                         mutex_exit(&cc_ent->cc_lock);
6891 
6892                         /* release cache block to head of LRU */
6893                         if (wctl) {
6894                                 wctl->sc_flag = 0;
6895                                 wctl->sc_dirty = 0;
6896                                 SSOP_SETCENTRY(sdbc_safestore, wctl);
6897                                 SSOP_DEALLOCRESOURCE(sdbc_safestore,
6898                                     wctl->sc_res);
6899                         }
6900 
6901                         if (!sdbc_use_dmchain)
6902                                 _sd_requeue_head(cc_ent);
6903                 }
6904         }
6905 
6906         rc = found ? NSC_DONE : EINVAL;
6907 
6908         return (rc);
6909 }
6910 
6911 
6912 /*
6913  * Handle allocation
6914  */
6915 
6916 _sd_buf_hlist_t  _sd_handle_list;
6917 
6918 /*
6919  * _sdbc_handles_unload - cache is being unloaded.
6920  */
6921 void
6922 _sdbc_handles_unload(void)
6923 {
6924         mutex_destroy(&_sd_handle_list.hl_lock);
6925 
6926 }
6927 
6928 /*
6929  * _sdbc_handles_load - cache is being unloaded.
6930  */
6931 int
6932 _sdbc_handles_load(void)
6933 {
6934         mutex_init(&_sd_handle_list.hl_lock, NULL, MUTEX_DRIVER, NULL);
6935 
6936         return (0);
6937 }
6938 
6939 int
6940 _sdbc_handles_configure()
6941 {
6942         _sd_handle_list.hl_count = 0;
6943 
6944         _sd_handle_list.hl_top.bh_next = &_sd_handle_list.hl_top;
6945         _sd_handle_list.hl_top.bh_prev = &_sd_handle_list.hl_top;
6946 
6947         return (0);
6948 }
6949 
6950 
6951 
6952 /*
6953  * _sdbc_handles_deconfigure - cache is being deconfigured
6954  */
6955 void
6956 _sdbc_handles_deconfigure(void)
6957 {
6958         _sd_handle_list.hl_count = 0;
6959 }
6960 
6961 
6962 _sd_buf_handle_t *
6963 _sd_alloc_handle(sdbc_callback_fn_t d_cb, sdbc_callback_fn_t r_cb,
6964                 sdbc_callback_fn_t w_cb)
6965 {
6966         _sd_buf_handle_t *handle;
6967 
6968         handle = (_sd_buf_handle_t *)kmem_zalloc(sizeof (_sd_buf_handle_t),
6969             KM_SLEEP);
6970         /* maintain list and count for debugging */
6971         mutex_enter(&_sd_handle_list.hl_lock);
6972 
6973         handle->bh_prev = &_sd_handle_list.hl_top;
6974         handle->bh_next = _sd_handle_list.hl_top.bh_next;
6975         _sd_handle_list.hl_top.bh_next->bh_prev = handle;
6976         _sd_handle_list.hl_top.bh_next = handle;
6977 
6978         ++_sd_handle_list.hl_count;
6979         mutex_exit(&_sd_handle_list.hl_lock);
6980 #if !defined(_SD_NOCHECKS)
6981         ASSERT(!(handle->bh_flag & (NSC_HALLOCATED | NSC_HACTIVE)));
6982 #endif
6983         handle->bh_disconnect_cb = d_cb;
6984         handle->bh_read_cb = r_cb;
6985         handle->bh_write_cb = w_cb;
6986         handle->bh_flag |= NSC_HALLOCATED;
6987         handle->bh_alloc_thread = nsc_threadp();
6988 
6989         return (handle);
6990 }
6991 
6992 int
6993 _sd_free_handle(_sd_buf_handle_t *handle)
6994 {
6995 
6996         if ((handle->bh_flag & NSC_HALLOCATED) == 0) {
6997                 cmn_err(CE_WARN, "!sdbc(_sd_free_handle) handle %p not valid",
6998                     (void *)handle);
6999 
7000                 DTRACE_PROBE(_sd_free_handle_end);
7001 
7002                 return (EINVAL);
7003         }
7004         if (_SD_HANDLE_ACTIVE(handle)) {
7005                 cmn_err(CE_WARN,
7006                     "!sdbc(_sd_free_handle) attempt to free active handle %p",
7007                     (void *)handle);
7008 
7009                 DTRACE_PROBE1(free_handle_active, int, handle->bh_flag);
7010 
7011                 return (EINVAL);
7012         }
7013 
7014 
7015         /* remove from queue before free */
7016         mutex_enter(&_sd_handle_list.hl_lock);
7017         handle->bh_prev->bh_next = handle->bh_next;
7018         handle->bh_next->bh_prev = handle->bh_prev;
7019         --_sd_handle_list.hl_count;
7020         mutex_exit(&_sd_handle_list.hl_lock);
7021 
7022         kmem_free(handle, sizeof (_sd_buf_handle_t));
7023 
7024         return (0);
7025 }
7026 
7027 
7028 
7029 
7030 #if !defined  (_SD_8K_BLKSIZE)
7031 #define _SD_MAX_MAP 0x100
7032 #else   /* !(_SD_8K_BLKSIZE)    */
7033 #define _SD_MAX_MAP 0x10000
7034 #endif  /* !(_SD_8K_BLKSIZE)    */
7035 
7036 char _sd_contig_bmap[_SD_MAX_MAP];
7037 _sd_map_info_t _sd_lookup_map[_SD_MAX_MAP];
7038 
7039 void
7040 _sd_init_contig_bmap(void)
7041 {
7042         int i, j;
7043 
7044         for (i = 1; i < _SD_MAX_MAP; i = ((i << 1) | 1))
7045                 for (j = i; j < _SD_MAX_MAP; j <<= 1)
7046                         _sd_contig_bmap[j] = 1;
7047 }
7048 
7049 
7050 
7051 
7052 void
7053 _sd_init_lookup_map(void)
7054 {
7055         unsigned int i, j, k;
7056         int stpos, len;
7057         _sd_bitmap_t mask;
7058 
7059         for (i = 0; i < _SD_MAX_MAP; i++) {
7060                 for (j = i, k = 0; j && ((j & 1) == 0); j >>= 1, k++)
7061                 ;
7062                 stpos =  k;
7063                 _sd_lookup_map[i].mi_stpos = (unsigned char)k;
7064 
7065                 for (k = 0; j & 1; j >>= 1, k++)
7066                 ;
7067                 len = k;
7068                 _sd_lookup_map[i].mi_len = (unsigned char)k;
7069 
7070                 _sd_lookup_map[i].mi_mask = SDBC_GET_BITS(stpos, len);
7071         }
7072         for (i = 0; i < _SD_MAX_MAP; i++) {
7073                 mask = (_sd_bitmap_t)i;
7074                 for (j = 0; mask; j++)
7075                         SDBC_LOOKUP_MODIFY(mask);
7076 
7077                 _sd_lookup_map[i].mi_dirty_count = (unsigned char)j;
7078         }
7079         for (i = 0; i < _SD_MAX_MAP; i++) {
7080                 _sd_lookup_map[i].mi_io_count = SDBC_LOOKUP_DTCOUNT(i);
7081                 mask = ~i;
7082                 _sd_lookup_map[i].mi_io_count += SDBC_LOOKUP_DTCOUNT(mask);
7083         }
7084 }
7085 
7086 
7087 nsc_def_t _sd_sdbc_def[] = {
7088         "Open",         (uintptr_t)_sd_open_io,                 0,
7089         "Close",        (uintptr_t)_sd_close_io,                0,
7090         "Attach",       (uintptr_t)_sdbc_io_attach_cd,          0,
7091         "Detach",       (uintptr_t)_sdbc_io_detach_cd,          0,
7092         "AllocBuf",     (uintptr_t)_sd_alloc_buf,               0,
7093         "FreeBuf",      (uintptr_t)_sd_free_buf,                0,
7094         "Read",         (uintptr_t)_sd_read,                    0,
7095         "Write",        (uintptr_t)_sd_write,                   0,
7096         "Zero",         (uintptr_t)_sd_zero,                    0,
7097         "Copy",         (uintptr_t)_sd_copy,                    0,
7098         "CopyDirect",   (uintptr_t)_sd_copy_direct,             0,
7099         "Uncommit",     (uintptr_t)_sd_uncommit,                0,
7100         "AllocHandle",  (uintptr_t)_sd_alloc_handle,            0,
7101         "FreeHandle",   (uintptr_t)_sd_free_handle,             0,
7102         "Discard",      (uintptr_t)_sd_discard_pinned,          0,
7103         "Sizes",        (uintptr_t)_sd_cache_sizes,             0,
7104         "GetPinned",    (uintptr_t)_sd_get_pinned,              0,
7105         "NodeHints",    (uintptr_t)_sd_node_hint_caller,        0,
7106         "PartSize",     (uintptr_t)_sd_get_partsize,            0,
7107         "MaxFbas",      (uintptr_t)_sd_get_maxfbas,             0,
7108         "Control",      (uintptr_t)_sd_control,                 0,
7109         "Provide",      NSC_CACHE,                              0,
7110         0,              0,                                      0
7111 };
7112 
7113 /*
7114  * do the SD_GET_CD_CLUSTER_DATA ioctl (get the global filename data)
7115  */
7116 /* ARGSUSED */
7117 int
7118 sd_get_file_info_data(char *uaddrp)
7119 {
7120         return (ENOTTY);
7121 }
7122 
7123 /*
7124  * do the SD_GET_CD_CLUSTER_SIZE ioctl (get size of global filename area)
7125  */
7126 int
7127 sd_get_file_info_size(void *uaddrp)
7128 {
7129         if (copyout(&_sdbc_gl_file_info_size, uaddrp,
7130             sizeof (_sdbc_gl_file_info_size))) {
7131                 return (EFAULT);
7132         }
7133 
7134         return (0);
7135 }
7136 
7137 
7138 /*
7139  * SD_GET_GLMUL_SIZES ioctl
7140  * get sizes of the global info regions (for this node only)
7141  */
7142 /* ARGSUSED */
7143 int
7144 sd_get_glmul_sizes(int *uaddrp)
7145 {
7146         return (ENOTTY);
7147 }
7148 
7149 /*
7150  * SD_GET_GLMUL_INFO ioctl
7151  * get the global metadata for write blocks (for this node only)
7152  */
7153 /* ARGSUSED */
7154 int
7155 sd_get_glmul_info(char *uaddrp)
7156 {
7157 
7158         return (ENOTTY);
7159 }
7160 
7161 int
7162 sdbc_global_stats_update(kstat_t *ksp, int rw)
7163 {
7164         sdbc_global_stats_t *sdbc_gstats;
7165         _sd_stats_t *gstats_vars;
7166         uint_t hint;
7167 
7168         sdbc_gstats = (sdbc_global_stats_t *)(ksp->ks_data);
7169 
7170         gstats_vars = _sd_cache_stats;
7171 
7172         if (rw == KSTAT_WRITE) {
7173                 return (EACCES);
7174         }
7175 
7176         /* default to READ */
7177         sdbc_gstats->ci_sdbc_count.value.ul = gstats_vars->st_count;
7178         sdbc_gstats->ci_sdbc_loc_count.value.ul = gstats_vars->st_loc_count;
7179         sdbc_gstats->ci_sdbc_rdhits.value.ul = (ulong_t)gstats_vars->st_rdhits;
7180         sdbc_gstats->ci_sdbc_rdmiss.value.ul = (ulong_t)gstats_vars->st_rdmiss;
7181         sdbc_gstats->ci_sdbc_wrhits.value.ul = (ulong_t)gstats_vars->st_wrhits;
7182         sdbc_gstats->ci_sdbc_wrmiss.value.ul = (ulong_t)gstats_vars->st_wrmiss;
7183 
7184         sdbc_gstats->ci_sdbc_blksize.value.ul =
7185             (ulong_t)gstats_vars->st_blksize;
7186         sdbc_gstats->ci_sdbc_lru_blocks.value.ul = (ulong_t)_sd_lru_q.sq_inq;
7187 #ifdef DEBUG
7188         sdbc_gstats->ci_sdbc_lru_noreq.value.ul =
7189             (ulong_t)_sd_lru_q.sq_noreq_stat;
7190         sdbc_gstats->ci_sdbc_lru_req.value.ul = (ulong_t)_sd_lru_q.sq_req_stat;
7191 #endif
7192         sdbc_gstats->ci_sdbc_wlru_inq.value.ul =
7193             (ulong_t)gstats_vars->st_wlru_inq;
7194         sdbc_gstats->ci_sdbc_cachesize.value.ul =
7195             (ulong_t)gstats_vars->st_cachesize;
7196         sdbc_gstats->ci_sdbc_numblocks.value.ul =
7197             (ulong_t)gstats_vars->st_numblocks;
7198         sdbc_gstats->ci_sdbc_wrcancelns.value.ul =
7199             (ulong_t)gstats_vars->st_wrcancelns;
7200         sdbc_gstats->ci_sdbc_destaged.value.ul =
7201             (ulong_t)gstats_vars->st_destaged;
7202         sdbc_gstats->ci_sdbc_num_shared.value.ul = (ulong_t)sdbc_max_devs;
7203         (void) _sd_get_node_hint(&hint);
7204         sdbc_gstats->ci_sdbc_nodehints.value.ul = (ulong_t)hint;
7205 
7206 
7207         return (0);
7208 }
7209 
7210 int
7211 sdbc_cd_stats_update(kstat_t *ksp, int rw)
7212 {
7213         sdbc_cd_stats_t *sdbc_shstats;
7214         _sd_shared_t *shstats_vars;
7215         int name_len;
7216         uint_t hint;
7217 
7218         sdbc_shstats = (sdbc_cd_stats_t *)(ksp->ks_data);
7219 
7220         shstats_vars = (_sd_shared_t *)(ksp->ks_private);
7221 
7222         if (rw == KSTAT_WRITE) {
7223                 return (EACCES);
7224         }
7225 
7226         /* copy tail of filename to kstat. leave 1 byte for null char */
7227         if (shstats_vars->sh_filename != NULL) {
7228                 name_len = (int)strlen(shstats_vars->sh_filename);
7229                 name_len -= (KSTAT_DATA_CHAR_LEN - 1);
7230 
7231                 if (name_len < 0) {
7232                         name_len = 0;
7233                 }
7234 
7235                 (void) strlcpy(sdbc_shstats->ci_sdbc_vol_name.value.c,
7236                     shstats_vars->sh_filename + name_len, KSTAT_DATA_CHAR_LEN);
7237         } else {
7238                 cmn_err(CE_WARN, "!Kstat error: no volume name associated "
7239                     "with cache descriptor");
7240         }
7241 
7242         sdbc_shstats->ci_sdbc_failed.value.ul =
7243             (ulong_t)shstats_vars->sh_failed;
7244         sdbc_shstats->ci_sdbc_cd.value.ul = (ulong_t)shstats_vars->sh_cd;
7245         sdbc_shstats->ci_sdbc_cache_read.value.ul =
7246             (ulong_t)shstats_vars->sh_cache_read;
7247         sdbc_shstats->ci_sdbc_cache_write.value.ul =
7248             (ulong_t)shstats_vars->sh_cache_write;
7249         sdbc_shstats->ci_sdbc_disk_read.value.ul =
7250             (ulong_t)shstats_vars->sh_disk_read;
7251         sdbc_shstats->ci_sdbc_disk_write.value.ul =
7252             (ulong_t)shstats_vars->sh_disk_write;
7253 #ifdef NSC_MULTI_TERABYTE
7254         sdbc_shstats->ci_sdbc_filesize.value.ui64 =
7255             (uint64_t)shstats_vars->sh_filesize;
7256 #else
7257         sdbc_shstats->ci_sdbc_filesize.value.ul =
7258             (ulong_t)shstats_vars->sh_filesize;
7259 #endif
7260         sdbc_shstats->ci_sdbc_numdirty.value.ul =
7261             (ulong_t)shstats_vars->sh_numdirty;
7262         sdbc_shstats->ci_sdbc_numio.value.ul = (ulong_t)shstats_vars->sh_numio;
7263         sdbc_shstats->ci_sdbc_numfail.value.ul =
7264             (ulong_t)shstats_vars->sh_numfail;
7265         sdbc_shstats->ci_sdbc_destaged.value.ul =
7266             (ulong_t)shstats_vars->sh_destaged;
7267         sdbc_shstats->ci_sdbc_wrcancelns.value.ul =
7268             (ulong_t)shstats_vars->sh_wrcancelns;
7269         (void) _sd_get_cd_hint(shstats_vars->sh_cd, &hint);
7270         sdbc_shstats->ci_sdbc_cdhints.value.ul = (ulong_t)hint;
7271 
7272 
7273         return (0);
7274 }
7275 
7276 
7277 /*
7278  * cd_kstat_add
7279  *
7280  * Installs all kstats and associated infrastructure (mutex, buffer),
7281  * associated with a particular cache descriptor.  This function is called
7282  * when the cache descriptor is opened in _sd_open().
7283  * "cd" -- cache descriptor number whose kstats we wish to add
7284  * returns: 0 on success, -1 on failure
7285  */
7286 static int
7287 cd_kstat_add(int cd)
7288 {
7289         char name[KSTAT_STRLEN];
7290 
7291         if (cd < 0 || cd >= sdbc_max_devs) {
7292                 cmn_err(CE_WARN, "!invalid cache descriptor: %d", cd);
7293                 return (-1);
7294         }
7295 
7296         /* create a regular kstat for this cache descriptor */
7297         if (!sdbc_cd_kstats) {
7298                 cmn_err(CE_WARN, "!sdbc_cd_kstats not allocated");
7299                 return (-1);
7300         }
7301 
7302         (void) snprintf(name, KSTAT_STRLEN, "%s%d", SDBC_KSTAT_CDSTATS, cd);
7303 
7304         sdbc_cd_kstats[cd] = kstat_create(SDBC_KSTAT_MODULE,
7305             cd, name, SDBC_KSTAT_CLASS, KSTAT_TYPE_NAMED,
7306             sizeof (sdbc_cd_stats)/sizeof (kstat_named_t),
7307             KSTAT_FLAG_VIRTUAL|KSTAT_FLAG_WRITABLE);
7308 
7309         if (sdbc_cd_kstats[cd] != NULL) {
7310                 sdbc_cd_kstats[cd]->ks_data = &sdbc_cd_stats;
7311                 sdbc_cd_kstats[cd]->ks_update = sdbc_cd_stats_update;
7312                 sdbc_cd_kstats[cd]->ks_private =
7313                     &_sd_cache_stats->st_shared[cd];
7314                 kstat_install(sdbc_cd_kstats[cd]);
7315         } else {
7316                 cmn_err(CE_WARN, "!cdstats %d kstat allocation failed", cd);
7317         }
7318 
7319         /* create an I/O kstat for this cache descriptor */
7320         if (!sdbc_cd_io_kstats) {
7321                 cmn_err(CE_WARN, "!sdbc_cd_io_kstats not allocated");
7322                 return (-1);
7323         }
7324 
7325         (void) snprintf(name, KSTAT_STRLEN, "%s%d", SDBC_IOKSTAT_CDSTATS, cd);
7326 
7327         sdbc_cd_io_kstats[cd] = kstat_create(
7328             SDBC_KSTAT_MODULE, cd, name, "disk", KSTAT_TYPE_IO, 1, 0);
7329 
7330         if (sdbc_cd_io_kstats[cd]) {
7331                 if (!sdbc_cd_io_kstats_mutexes) {
7332                         cmn_err(CE_WARN, "!sdbc_cd_io_kstats_mutexes not "
7333                             "allocated");
7334                         return (-1);
7335                 }
7336 
7337                 mutex_init(&sdbc_cd_io_kstats_mutexes[cd], NULL,
7338                     MUTEX_DRIVER, NULL);
7339 
7340                 sdbc_cd_io_kstats[cd]->ks_lock = &sdbc_cd_io_kstats_mutexes[cd];
7341 
7342                 kstat_install(sdbc_cd_io_kstats[cd]);
7343 
7344         } else {
7345                 cmn_err(CE_WARN, "!sdbc cd %d io kstat allocation failed", cd);
7346         }
7347 
7348         return (0);
7349 }
7350 
7351 /*
7352  * cd_kstat_remove
7353  *
7354  * Uninstalls all kstats and associated infrastructure (mutex, buffer),
7355  * associated with a particular cache descriptor.  This function is called
7356  * when the cache descriptor is closed in _sd_close().
7357  * "cd" -- cache descriptor number whose kstats we wish to remove
7358  * returns: 0 on success, -1 on failure
7359  */
7360 static int
7361 cd_kstat_remove(int cd)
7362 {
7363         if (cd < 0 || cd >= sdbc_max_devs) {
7364                 cmn_err(CE_WARN, "!invalid cache descriptor: %d", cd);
7365                 return (-1);
7366         }
7367 
7368         /* delete the regular kstat corresponding to this cache descriptor */
7369         if (sdbc_cd_kstats && sdbc_cd_kstats[cd]) {
7370                 kstat_delete(sdbc_cd_kstats[cd]);
7371                 sdbc_cd_kstats[cd] = NULL;
7372         }
7373 
7374         /* delete the I/O kstat corresponding to this cache descriptor */
7375         if (sdbc_cd_io_kstats && sdbc_cd_io_kstats[cd]) {
7376                 kstat_delete(sdbc_cd_io_kstats[cd]);
7377                 sdbc_cd_io_kstats[cd] = NULL;
7378 
7379                 if (sdbc_cd_io_kstats_mutexes) {
7380                         /* destroy the mutex associated with this I/O kstat */
7381                         mutex_destroy(&sdbc_cd_io_kstats_mutexes[cd]);
7382                 }
7383         }
7384 
7385         return (0);
7386 }
7387 
7388 #ifdef DEBUG
7389 /*
7390  * kstat update
7391  */
7392 int
7393 sdbc_dynmem_kstat_update_dm(kstat_t *ksp, int rw)
7394 {
7395         sdbc_dynmem_dm_t *sdbc_dynmem;
7396         _dm_process_vars_t *process_vars;
7397         _dm_process_vars_t local_dm_process_vars;
7398 
7399         simplect_dm++;
7400 
7401         sdbc_dynmem = (sdbc_dynmem_dm_t *)(ksp->ks_data);
7402 
7403         /* global dynmem_processing_dm */
7404         process_vars = (_dm_process_vars_t *)(ksp->ks_private);
7405 
7406         if (rw == KSTAT_WRITE) {
7407                 simplect_dm = sdbc_dynmem->ci_sdbc_simplect.value.ul;
7408                 local_dm_process_vars.monitor_dynmem_process =
7409                     sdbc_dynmem->ci_sdbc_monitor_dynmem.value.ul;
7410                 local_dm_process_vars.max_dyn_list =
7411                     sdbc_dynmem->ci_sdbc_max_dyn_list.value.ul;
7412                 local_dm_process_vars.cache_aging_ct1 =
7413                     sdbc_dynmem->ci_sdbc_cache_aging_ct1.value.ul;
7414                 local_dm_process_vars.cache_aging_ct2 =
7415                     sdbc_dynmem->ci_sdbc_cache_aging_ct2.value.ul;
7416                 local_dm_process_vars.cache_aging_ct3 =
7417                     sdbc_dynmem->ci_sdbc_cache_aging_ct3.value.ul;
7418                 local_dm_process_vars.cache_aging_sec1 =
7419                     sdbc_dynmem->ci_sdbc_cache_aging_sec1.value.ul;
7420                 local_dm_process_vars.cache_aging_sec2 =
7421                     sdbc_dynmem->ci_sdbc_cache_aging_sec2.value.ul;
7422                 local_dm_process_vars.cache_aging_sec3 =
7423                     sdbc_dynmem->ci_sdbc_cache_aging_sec3.value.ul;
7424                 local_dm_process_vars.cache_aging_pcnt1 =
7425                     sdbc_dynmem->ci_sdbc_cache_aging_pcnt1.value.ul;
7426                 local_dm_process_vars.cache_aging_pcnt2 =
7427                     sdbc_dynmem->ci_sdbc_cache_aging_pcnt2.value.ul;
7428                 local_dm_process_vars.max_holds_pcnt =
7429                     sdbc_dynmem->ci_sdbc_max_holds_pcnt.value.ul;
7430                 local_dm_process_vars.process_directive =
7431                     sdbc_dynmem->ci_sdbc_process_directive.value.ul;
7432                 (void) sdbc_edit_xfer_process_vars_dm(&local_dm_process_vars);
7433 
7434                 if (process_vars->process_directive & WAKE_DEALLOC_THREAD_DM) {
7435                         process_vars->process_directive &=
7436                             ~WAKE_DEALLOC_THREAD_DM;
7437                         mutex_enter(&dynmem_processing_dm.thread_dm_lock);
7438                         cv_broadcast(&dynmem_processing_dm.thread_dm_cv);
7439                         mutex_exit(&dynmem_processing_dm.thread_dm_lock);
7440                 }
7441 
7442                 return (0);
7443         }
7444 
7445         /* default to READ */
7446         sdbc_dynmem->ci_sdbc_simplect.value.ul = simplect_dm;
7447         sdbc_dynmem->ci_sdbc_monitor_dynmem.value.ul =
7448             process_vars->monitor_dynmem_process;
7449         sdbc_dynmem->ci_sdbc_max_dyn_list.value.ul =
7450             process_vars->max_dyn_list;
7451         sdbc_dynmem->ci_sdbc_cache_aging_ct1.value.ul =
7452             process_vars->cache_aging_ct1;
7453         sdbc_dynmem->ci_sdbc_cache_aging_ct2.value.ul =
7454             process_vars->cache_aging_ct2;
7455         sdbc_dynmem->ci_sdbc_cache_aging_ct3.value.ul =
7456             process_vars->cache_aging_ct3;
7457         sdbc_dynmem->ci_sdbc_cache_aging_sec1.value.ul =
7458             process_vars->cache_aging_sec1;
7459         sdbc_dynmem->ci_sdbc_cache_aging_sec2.value.ul =
7460             process_vars->cache_aging_sec2;
7461         sdbc_dynmem->ci_sdbc_cache_aging_sec3.value.ul =
7462             process_vars->cache_aging_sec3;
7463         sdbc_dynmem->ci_sdbc_cache_aging_pcnt1.value.ul =
7464             process_vars->cache_aging_pcnt1;
7465         sdbc_dynmem->ci_sdbc_cache_aging_pcnt2.value.ul =
7466             process_vars->cache_aging_pcnt2;
7467         sdbc_dynmem->ci_sdbc_max_holds_pcnt.value.ul =
7468             process_vars->max_holds_pcnt;
7469         sdbc_dynmem->ci_sdbc_process_directive.value.ul =
7470             process_vars->process_directive;
7471 
7472         sdbc_dynmem->ci_sdbc_alloc_ct.value.ul = process_vars->alloc_ct;
7473         sdbc_dynmem->ci_sdbc_dealloc_ct.value.ul = process_vars->dealloc_ct;
7474         sdbc_dynmem->ci_sdbc_history.value.ul = process_vars->history;
7475         sdbc_dynmem->ci_sdbc_nodatas.value.ul = process_vars->nodatas;
7476         sdbc_dynmem->ci_sdbc_candidates.value.ul = process_vars->candidates;
7477         sdbc_dynmem->ci_sdbc_deallocs.value.ul = process_vars->deallocs;
7478         sdbc_dynmem->ci_sdbc_hosts.value.ul = process_vars->hosts;
7479         sdbc_dynmem->ci_sdbc_pests.value.ul = process_vars->pests;
7480         sdbc_dynmem->ci_sdbc_metas.value.ul = process_vars->metas;
7481         sdbc_dynmem->ci_sdbc_holds.value.ul = process_vars->holds;
7482         sdbc_dynmem->ci_sdbc_others.value.ul = process_vars->others;
7483         sdbc_dynmem->ci_sdbc_notavail.value.ul = process_vars->notavail;
7484 
7485         return (0);
7486 }
7487 #endif