Print this page
cstyle sort of updates
7127 remove -Wno-missing-braces from Makefile.uts
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/uts/common/avs/ns/sdbc/sd_bcache.c
+++ new/usr/src/uts/common/avs/ns/sdbc/sd_bcache.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21 /*
22 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
23 23 * Use is subject to license terms.
24 24 */
25 25
26 26 #include <sys/types.h>
27 27 #include <sys/ksynch.h>
28 28 #include <sys/cmn_err.h>
29 29 #include <sys/errno.h>
30 30 #include <sys/kmem.h>
31 31 #include <sys/cred.h>
32 32 #include <sys/buf.h>
33 33 #include <sys/ddi.h>
34 34
35 35 #include <sys/nsc_thread.h>
36 36 #include <sys/nsctl/nsctl.h>
37 37
38 38 #include <sys/sdt.h> /* dtrace is S10 or later */
39 39
40 40 #include "sd_bcache.h"
41 41 #include "sd_trace.h"
42 42 #include "sd_io.h"
43 43 #include "sd_bio.h"
44 44 #include "sd_ft.h"
45 45 #include "sd_misc.h"
46 46 #include "sd_pcu.h"
47 47
48 48 #include <sys/unistat/spcs_s.h>
49 49 #include <sys/unistat/spcs_s_k.h>
50 50 #include <sys/unistat/spcs_errors.h>
51 51 #include <sys/nsctl/safestore.h>
52 52 #ifndef DS_DDICT
53 53 #include <sys/ddi_impldefs.h>
54 54 #endif
55 55
56 56
57 57 /*
58 58 * kstat interface
59 59 */
60 60
61 61 static kstat_t *sdbc_global_stats_kstat;
62 62 static int sdbc_global_stats_update(kstat_t *ksp, int rw);
63 63
64 64 typedef struct {
65 65 kstat_named_t ci_sdbc_count;
66 66 kstat_named_t ci_sdbc_loc_count;
67 67 kstat_named_t ci_sdbc_rdhits;
68 68 kstat_named_t ci_sdbc_rdmiss;
69 69 kstat_named_t ci_sdbc_wrhits;
70 70 kstat_named_t ci_sdbc_wrmiss;
71 71 kstat_named_t ci_sdbc_blksize;
72 72 kstat_named_t ci_sdbc_lru_blocks;
73 73 #ifdef DEBUG
74 74 kstat_named_t ci_sdbc_lru_noreq;
75 75 kstat_named_t ci_sdbc_lru_req;
76 76 #endif
77 77 kstat_named_t ci_sdbc_wlru_inq;
78 78 kstat_named_t ci_sdbc_cachesize;
79 79 kstat_named_t ci_sdbc_numblocks;
80 80 kstat_named_t ci_sdbc_num_shared;
81 81 kstat_named_t ci_sdbc_wrcancelns;
82 82 kstat_named_t ci_sdbc_destaged;
83 83 kstat_named_t ci_sdbc_nodehints;
84 84 } sdbc_global_stats_t;
85 85
86 86 static sdbc_global_stats_t sdbc_global_stats = {
87 87 {SDBC_GKSTAT_COUNT, KSTAT_DATA_ULONG},
88 88 {SDBC_GKSTAT_LOC_COUNT, KSTAT_DATA_ULONG},
89 89 {SDBC_GKSTAT_RDHITS, KSTAT_DATA_ULONG},
90 90 {SDBC_GKSTAT_RDMISS, KSTAT_DATA_ULONG},
91 91 {SDBC_GKSTAT_WRHITS, KSTAT_DATA_ULONG},
92 92 {SDBC_GKSTAT_WRMISS, KSTAT_DATA_ULONG},
93 93 {SDBC_GKSTAT_BLKSIZE, KSTAT_DATA_ULONG},
94 94 {SDBC_GKSTAT_LRU_BLOCKS, KSTAT_DATA_ULONG},
95 95 #ifdef DEBUG
96 96 {SDBC_GKSTAT_LRU_NOREQ, KSTAT_DATA_ULONG},
97 97 {SDBC_GKSTAT_LRU_REQ, KSTAT_DATA_ULONG},
98 98 #endif
99 99 {SDBC_GKSTAT_WLRU_INQ, KSTAT_DATA_ULONG},
100 100 {SDBC_GKSTAT_CACHESIZE, KSTAT_DATA_ULONG},
101 101 {SDBC_GKSTAT_NUMBLOCKS, KSTAT_DATA_ULONG},
102 102 {SDBC_GKSTAT_NUM_SHARED, KSTAT_DATA_ULONG},
103 103 {SDBC_GKSTAT_WRCANCELNS, KSTAT_DATA_ULONG},
104 104 {SDBC_GKSTAT_DESTAGED, KSTAT_DATA_ULONG},
105 105 {SDBC_GKSTAT_NODEHINTS, KSTAT_DATA_ULONG},
106 106 };
107 107
108 108 static kstat_t **sdbc_cd_kstats;
109 109 static kstat_t **sdbc_cd_io_kstats;
110 110 static kmutex_t *sdbc_cd_io_kstats_mutexes;
111 111 static kstat_t *sdbc_global_io_kstat;
112 112 static kmutex_t sdbc_global_io_kstat_mutex;
113 113 static int sdbc_cd_stats_update(kstat_t *ksp, int rw);
114 114 static int cd_kstat_add(int cd);
115 115 static int cd_kstat_remove(int cd);
116 116
117 117 typedef struct {
118 118 kstat_named_t ci_sdbc_vol_name;
119 119 kstat_named_t ci_sdbc_failed;
120 120 kstat_named_t ci_sdbc_cd;
121 121 kstat_named_t ci_sdbc_cache_read;
122 122 kstat_named_t ci_sdbc_cache_write;
123 123 kstat_named_t ci_sdbc_disk_read;
124 124 kstat_named_t ci_sdbc_disk_write;
125 125 kstat_named_t ci_sdbc_filesize;
126 126 kstat_named_t ci_sdbc_numdirty;
127 127 kstat_named_t ci_sdbc_numio;
128 128 kstat_named_t ci_sdbc_numfail;
129 129 kstat_named_t ci_sdbc_destaged;
130 130 kstat_named_t ci_sdbc_wrcancelns;
131 131 kstat_named_t ci_sdbc_cdhints;
132 132 } sdbc_cd_stats_t;
133 133
134 134 static sdbc_cd_stats_t sdbc_cd_stats = {
135 135 {SDBC_CDKSTAT_VOL_NAME, KSTAT_DATA_CHAR},
136 136 {SDBC_CDKSTAT_FAILED, KSTAT_DATA_ULONG},
137 137 {SDBC_CDKSTAT_CD, KSTAT_DATA_ULONG},
138 138 {SDBC_CDKSTAT_CACHE_READ, KSTAT_DATA_ULONG},
139 139 {SDBC_CDKSTAT_CACHE_WRITE, KSTAT_DATA_ULONG},
140 140 {SDBC_CDKSTAT_DISK_READ, KSTAT_DATA_ULONG},
141 141 {SDBC_CDKSTAT_DISK_WRITE, KSTAT_DATA_ULONG},
142 142 #ifdef NSC_MULTI_TERABYTE
143 143 {SDBC_CDKSTAT_FILESIZE, KSTAT_DATA_UINT64},
144 144 #else
145 145 {SDBC_CDKSTAT_FILESIZE, KSTAT_DATA_ULONG},
146 146 #endif
147 147 {SDBC_CDKSTAT_NUMDIRTY, KSTAT_DATA_ULONG},
148 148 {SDBC_CDKSTAT_NUMIO, KSTAT_DATA_ULONG},
149 149 {SDBC_CDKSTAT_NUMFAIL, KSTAT_DATA_ULONG},
150 150 {SDBC_CDKSTAT_DESTAGED, KSTAT_DATA_ULONG},
151 151 {SDBC_CDKSTAT_WRCANCELNS, KSTAT_DATA_ULONG},
152 152 {SDBC_CDKSTAT_CDHINTS, KSTAT_DATA_ULONG},
153 153 };
154 154
155 155 #ifdef DEBUG
156 156 /*
157 157 * dynmem kstat interface
158 158 */
159 159 static kstat_t *sdbc_dynmem_kstat_dm;
160 160 static int simplect_dm;
161 161 static int sdbc_dynmem_kstat_update_dm(kstat_t *ksp, int rw);
162 162
163 163 typedef struct {
164 164 kstat_named_t ci_sdbc_monitor_dynmem;
165 165 kstat_named_t ci_sdbc_max_dyn_list;
166 166 kstat_named_t ci_sdbc_cache_aging_ct1;
167 167 kstat_named_t ci_sdbc_cache_aging_ct2;
168 168 kstat_named_t ci_sdbc_cache_aging_ct3;
169 169 kstat_named_t ci_sdbc_cache_aging_sec1;
170 170 kstat_named_t ci_sdbc_cache_aging_sec2;
171 171 kstat_named_t ci_sdbc_cache_aging_sec3;
172 172 kstat_named_t ci_sdbc_cache_aging_pcnt1;
173 173 kstat_named_t ci_sdbc_cache_aging_pcnt2;
174 174 kstat_named_t ci_sdbc_max_holds_pcnt;
175 175
176 176 kstat_named_t ci_sdbc_alloc_ct;
177 177 kstat_named_t ci_sdbc_dealloc_ct;
178 178 kstat_named_t ci_sdbc_history;
179 179 kstat_named_t ci_sdbc_nodatas;
180 180 kstat_named_t ci_sdbc_candidates;
181 181 kstat_named_t ci_sdbc_deallocs;
182 182 kstat_named_t ci_sdbc_hosts;
183 183 kstat_named_t ci_sdbc_pests;
184 184 kstat_named_t ci_sdbc_metas;
185 185 kstat_named_t ci_sdbc_holds;
186 186 kstat_named_t ci_sdbc_others;
187 187 kstat_named_t ci_sdbc_notavail;
188 188
189 189 kstat_named_t ci_sdbc_process_directive;
190 190
191 191 kstat_named_t ci_sdbc_simplect;
192 192 } sdbc_dynmem_dm_t;
193 193
194 194 static sdbc_dynmem_dm_t sdbc_dynmem_dm = {
195 195 {SDBC_DMKSTAT_MONITOR_DYNMEM, KSTAT_DATA_ULONG},
196 196 {SDBC_DMKSTAT_MAX_DYN_LIST, KSTAT_DATA_ULONG},
197 197 {SDBC_DMKSTAT_CACHE_AGING_CT1, KSTAT_DATA_ULONG},
198 198 {SDBC_DMKSTAT_CACHE_AGING_CT2, KSTAT_DATA_ULONG},
199 199 {SDBC_DMKSTAT_CACHE_AGING_CT3, KSTAT_DATA_ULONG},
200 200 {SDBC_DMKSTAT_CACHE_AGING_SEC1, KSTAT_DATA_ULONG},
201 201 {SDBC_DMKSTAT_CACHE_AGING_SEC2, KSTAT_DATA_ULONG},
202 202 {SDBC_DMKSTAT_CACHE_AGING_SEC3, KSTAT_DATA_ULONG},
203 203 {SDBC_DMKSTAT_CACHE_AGING_PCNT1, KSTAT_DATA_ULONG},
204 204 {SDBC_DMKSTAT_CACHE_AGING_PCNT2, KSTAT_DATA_ULONG},
205 205 {SDBC_DMKSTAT_MAX_HOLDS_PCNT, KSTAT_DATA_ULONG},
206 206 {SDBC_DMKSTAT_ALLOC_CNT, KSTAT_DATA_ULONG},
207 207 {SDBC_DMKSTAT_DEALLOC_CNT, KSTAT_DATA_ULONG},
208 208 {SDBC_DMKSTAT_HISTORY, KSTAT_DATA_ULONG},
209 209 {SDBC_DMKSTAT_NODATAS, KSTAT_DATA_ULONG},
210 210 {SDBC_DMKSTAT_CANDIDATES, KSTAT_DATA_ULONG},
211 211 {SDBC_DMKSTAT_DEALLOCS, KSTAT_DATA_ULONG},
212 212 {SDBC_DMKSTAT_HOSTS, KSTAT_DATA_ULONG},
213 213 {SDBC_DMKSTAT_PESTS, KSTAT_DATA_ULONG},
214 214 {SDBC_DMKSTAT_METAS, KSTAT_DATA_ULONG},
215 215 {SDBC_DMKSTAT_HOLDS, KSTAT_DATA_ULONG},
216 216 {SDBC_DMKSTAT_OTHERS, KSTAT_DATA_ULONG},
217 217 {SDBC_DMKSTAT_NOTAVAIL, KSTAT_DATA_ULONG},
218 218 {SDBC_DMKSTAT_PROCESS_DIRECTIVE, KSTAT_DATA_ULONG},
219 219 {SDBC_DMKSTAT_SIMPLECT, KSTAT_DATA_ULONG}
220 220 };
221 221 #endif
222 222
223 223 /* End of dynmem kstats */
224 224
225 225 #ifdef DEBUG
226 226 int *dmchainpull_table; /* dmchain wastage stats */
227 227 #endif
228 228
229 229 /*
230 230 * dynmem process vars
231 231 */
232 232 extern _dm_process_vars_t dynmem_processing_dm;
233 233
234 234 /* metadata for volumes */
235 235 ss_voldata_t *_sdbc_gl_file_info;
236 236
237 237 size_t _sdbc_gl_file_info_size;
238 238
239 239 /* metadata for cache write blocks */
240 240 static ss_centry_info_t *_sdbc_gl_centry_info;
241 241
242 242 /* wblocks * sizeof(ss_centry_info_t) */
243 243 static size_t _sdbc_gl_centry_info_size;
244 244
245 245 static int _SD_DELAY_QUEUE = 1;
246 246 static int sdbc_allocb_inuse, sdbc_allocb_lost, sdbc_allocb_hit;
247 247 static int sdbc_allocb_pageio1, sdbc_allocb_pageio2;
248 248 static int sdbc_centry_hit, sdbc_centry_inuse, sdbc_centry_lost;
249 249 static int sdbc_dmchain_not_avail;
250 250 static int sdbc_allocb_deallocd;
251 251 static int sdbc_centry_deallocd;
252 252 static int sdbc_check_cot;
253 253 static int sdbc_ra_hash; /* 1-block read-ahead fails due to hash hit */
254 254 static int sdbc_ra_none; /* 1-block read-ahead fails due to "would block" */
255 255
256 256
257 257 /*
258 258 * Set the following variable to 1 to enable pagelist io mutual
259 259 * exclusion on all _sd_alloc_buf() operations.
260 260 *
261 261 * This is set to ON to prevent front end / back end races between new
262 262 * NSC_WRTHRU io operations coming in through _sd_alloc_buf(), and
263 263 * previously written data being flushed out to disk by the sdbc
264 264 * flusher at the back end.
265 265 * -- see bugtraq 4287564
266 266 * -- Simon Crosland, Mon Nov 8 16:34:09 GMT 1999
267 267 */
268 268 static int sdbc_pageio_always = 1;
269 269
270 270 int sdbc_use_dmchain = 0; /* start time switch for dm chaining */
271 271 int sdbc_prefetch1 = 1; /* do 1-block read-ahead */
272 272 /*
273 273 * if sdbc_static_cache is 1 allocate all cache memory at startup.
274 274 * deallocate only at shutdown.
275 275 */
276 276 int sdbc_static_cache = 1;
277 277
278 278 #ifdef DEBUG
279 279 /*
280 280 * Pagelist io mutual exclusion debug facility.
281 281 */
282 282 #define SDBC_PAGEIO_OFF 0 /* no debug */
283 283 #define SDBC_PAGEIO_RDEV 1 /* force NSC_PAGEIO for specified dev */
284 284 #define SDBC_PAGEIO_RAND 2 /* randomly force NSC_PAGEIO */
285 285 #define SDBC_PAGEIO_ALL 3 /* always force NSC_PAGEIO */
286 286 static int sdbc_pageio_debug = SDBC_PAGEIO_OFF;
287 287 static dev_t sdbc_pageio_rdev = (dev_t)-1;
288 288 #endif
289 289
290 290 /*
291 291 * INF SD cache global data
292 292 */
293 293
294 294 _sd_cd_info_t *_sd_cache_files;
295 295 _sd_stats_t *_sd_cache_stats;
296 296 kmutex_t _sd_cache_lock;
297 297
298 298 _sd_hash_table_t *_sd_htable;
299 299 _sd_queue_t _sd_lru_q;
300 300
301 301 _sd_cctl_t *_sd_cctl[_SD_CCTL_GROUPS];
302 302 int _sd_cctl_groupsz;
303 303
304 304 _sd_net_t _sd_net_config;
305 305
306 306 extern krwlock_t sdbc_queue_lock;
307 307
308 308 unsigned int _sd_node_hint;
309 309
310 310 #define _SD_LRU_Q (&_sd_lru_q)
311 311 int BLK_FBAS; /* number of FBA's in a cache block */
312 312 int CACHE_BLOCK_SIZE; /* size in bytes of a cache block */
313 313 int CBLOCKS;
314 314 _sd_bitmap_t BLK_FBA_BITS;
315 315 static int sdbc_prefetch_valid_cnt;
316 316 static int sdbc_prefetch_busy_cnt;
317 317 static int sdbc_prefetch_trailing;
318 318 static int sdbc_prefetch_deallocd;
319 319 static int sdbc_prefetch_pageio1;
320 320 static int sdbc_prefetch_pageio2;
321 321 static int sdbc_prefetch_hit;
322 322 static int sdbc_prefetch_lost;
323 323 static int _sd_prefetch_opt = 1; /* 0 to disable & use _prefetch_sb_vec[] */
324 324 static nsc_vec_t _prefetch_sb_vec[_SD_MAX_BLKS + 1];
325 325
326 326 _sd_bitmap_t _fba_bits[] = {
327 327 0x0000, 0x0001, 0x0003, 0x0007,
328 328 0x000f, 0x001f, 0x003f, 0x007f,
329 329 0x00ff,
330 330 #if defined(_SD_8K_BLKSIZE)
331 331 0x01ff, 0x03ff, 0x07ff,
332 332 0x0fff, 0x1fff, 0x3fff, 0x7fff,
333 333 0xffff,
334 334 #endif
335 335 };
336 336
337 337
338 338 static int _sd_ccsync_cnt = 256;
339 339 static _sd_cctl_sync_t *_sd_ccent_sync;
340 340
341 341 nsc_io_t *sdbc_io;
342 342
343 343 #ifdef _MULTI_DATAMODEL
344 344 _sd_stats32_t *_sd_cache_stats32 = NULL;
345 345 #endif
346 346
347 347
348 348 #ifdef DEBUG
349 349 int cmn_level = CE_PANIC;
350 350 #else
351 351 int cmn_level = CE_WARN;
352 352 #endif
353 353
354 354 /*
355 355 * Forward declare all statics that are used before defined to enforce
356 356 * parameter checking
357 357 * Some (if not all) of these could be removed if the code were reordered
358 358 */
359 359
360 360 static void _sdbc_stats_deconfigure(void);
361 361 static int _sdbc_stats_configure(int cblocks);
362 362 static int _sdbc_lruq_configure(_sd_queue_t *);
363 363 static void _sdbc_lruq_deconfigure(void);
364 364 static int _sdbc_mem_configure(int cblocks, spcs_s_info_t kstatus);
365 365 static void _sdbc_mem_deconfigure(int cblocks);
366 366 static void _sd_ins_queue(_sd_queue_t *, _sd_cctl_t *centry);
367 367 static int _sd_flush_cd(int cd);
368 368 static int _sd_check_buffer_alloc(int cd, nsc_off_t fba_pos, nsc_size_t fba_len,
369 369 _sd_buf_handle_t **hp);
370 370 static int _sd_doread(_sd_buf_handle_t *handle, _sd_cctl_t *cc_ent,
371 371 nsc_off_t fba_pos, nsc_size_t fba_len, int flag);
372 372 static void _sd_async_read_ea(blind_t xhandle, nsc_off_t fba_pos,
373 373 nsc_size_t fba_len, int error);
374 374 static void _sd_async_write_ea(blind_t xhandle, nsc_off_t fba_pos,
375 375 nsc_size_t fba_len, int error);
376 376 static void _sd_queue_write(_sd_buf_handle_t *handle, nsc_off_t fba_pos,
377 377 nsc_size_t fba_len);
378 378 static int _sd_remote_store(_sd_cctl_t *cc_ent, nsc_off_t fba_pos,
379 379 nsc_size_t fba_len);
380 380 static int _sd_copy_direct(_sd_buf_handle_t *handle1, _sd_buf_handle_t *handle2,
381 381 nsc_off_t fba_pos1, nsc_off_t fba_pos2, nsc_size_t fba_len);
382 382 static int _sd_sync_write(_sd_buf_handle_t *handle, nsc_off_t fba_pos,
383 383 nsc_size_t fba_len, int flag);
384 384 static int _sd_sync_write2(_sd_buf_handle_t *wr_handle, nsc_off_t wr_st_pos,
385 385 nsc_size_t fba_len, int flag, _sd_buf_handle_t *rd_handle,
386 386 nsc_off_t rd_st_pos);
387 387 static int sdbc_fd_attach_cd(blind_t xcd);
388 388 static int sdbc_fd_detach_cd(blind_t xcd);
389 389 static int sdbc_fd_flush_cd(blind_t xcd);
390 390 static int _sdbc_gl_centry_configure(spcs_s_info_t);
391 391 static int _sdbc_gl_file_configure(spcs_s_info_t);
392 392 static void _sdbc_gl_centry_deconfigure(void);
393 393 static void _sdbc_gl_file_deconfigure(void);
394 394 static int sdbc_doread_prefetch(_sd_cctl_t *cc_ent, nsc_off_t fba_pos,
395 395 nsc_size_t fba_len);
396 396 static _sd_bitmap_t update_dirty(_sd_cctl_t *cc_ent, sdbc_cblk_fba_t st_off,
397 397 sdbc_cblk_fba_t st_len);
398 398 static int _sd_prefetch_buf(int cd, nsc_off_t fba_pos, nsc_size_t fba_len,
399 399 int flag, _sd_buf_handle_t *handle, int locked);
400 400
401 401 /* dynmem support */
402 402 static int _sd_setup_category_on_type(_sd_cctl_t *header);
403 403 static int _sd_setup_mem_chaining(_sd_cctl_t *header, int flag);
404 404
405 405 static int sdbc_check_cctl_cot(_sd_cctl_t *);
406 406
407 407 static int sdbc_dmqueues_configure();
408 408 static void sdbc_dmqueues_deconfigure();
409 409 static _sd_cctl_t *sdbc_get_dmchain(int, int *, int);
410 410 static int sdbc_dmchain_avail(_sd_cctl_t *);
411 411 void sdbc_requeue_dmchain(_sd_queue_t *, _sd_cctl_t *, int, int);
412 412 static void sdbc_ins_dmqueue_back(_sd_queue_t *, _sd_cctl_t *);
413 413 void sdbc_ins_dmqueue_front(_sd_queue_t *, _sd_cctl_t *);
414 414 void sdbc_remq_dmchain(_sd_queue_t *, _sd_cctl_t *);
415 415 static void sdbc_clear_dmchain(_sd_cctl_t *, _sd_cctl_t *);
416 416 void sdbc_requeue_head_dm_try(_sd_cctl_t *);
417 417 static _sd_cctl_t *sdbc_alloc_dmc(int, nsc_off_t, nsc_size_t, int *,
418 418 sdbc_allocbuf_t *, int);
419 419 static _sd_cctl_t *sdbc_alloc_lru(int, nsc_off_t, int *, int);
420 420 static _sd_cctl_t *sdbc_alloc_from_dmchain(int, nsc_off_t, sdbc_allocbuf_t *,
421 421 int);
422 422 static void sdbc_centry_init_dm(_sd_cctl_t *);
423 423 static int sdbc_centry_memalloc_dm(_sd_cctl_t *, int, int);
424 424 static void sdbc_centry_alloc_end(sdbc_allocbuf_t *);
425 425
↓ open down ↓ |
425 lines elided |
↑ open up ↑ |
426 426
427 427
428 428
429 429 /* _SD_DEBUG */
430 430 #if defined(_SD_DEBUG) || defined(DEBUG)
431 431 static int _sd_cctl_valid(_sd_cctl_t *);
432 432 #endif
433 433
434 434 static
435 435 nsc_def_t _sdbc_fd_def[] = {
436 - "Attach", (uintptr_t)sdbc_fd_attach_cd, 0,
437 - "Detach", (uintptr_t)sdbc_fd_detach_cd, 0,
438 - "Flush", (uintptr_t)sdbc_fd_flush_cd, 0,
439 - 0, 0, 0
436 + { "Attach", (uintptr_t)sdbc_fd_attach_cd, 0 },
437 + { "Detach", (uintptr_t)sdbc_fd_detach_cd, 0 },
438 + { "Flush", (uintptr_t)sdbc_fd_flush_cd, 0 },
439 + { NULL, (uintptr_t)NULL, 0 }
440 440 };
441 441
442 442
443 443 /*
444 444 * _sdbc_cache_configure - initialize cache blocks, queues etc.
445 445 *
446 446 * ARGUMENTS:
447 447 * cblocks - Number of cache blocks
448 448 *
449 449 * RETURNS:
450 450 * 0 on success.
451 451 * SDBC_EENABLEFAIL or SDBC_EMEMCONFIG on failure.
452 452 *
453 453 */
454 454
455 455
456 456
457 457 int
458 458 _sdbc_cache_configure(int cblocks, spcs_s_info_t kstatus)
459 459 {
460 460 CBLOCKS = cblocks;
461 461
462 462 _sd_cache_files = (_sd_cd_info_t *)
463 463 kmem_zalloc(sdbc_max_devs * sizeof (_sd_cd_info_t),
464 464 KM_SLEEP);
465 465
466 466 if (_sdbc_stats_configure(cblocks))
467 467 return (SDBC_EENABLEFAIL);
468 468
469 469 if (sdbc_use_dmchain) {
470 470 if (sdbc_dmqueues_configure())
471 471 return (SDBC_EENABLEFAIL);
472 472 } else {
473 473 if (_sdbc_lruq_configure(_SD_LRU_Q))
474 474 return (SDBC_EENABLEFAIL);
475 475 }
476 476
477 477
478 478 if (_sdbc_mem_configure(cblocks, kstatus))
479 479 return (SDBC_EMEMCONFIG);
480 480
481 481 CACHE_BLOCK_SIZE = BLK_SIZE(1);
482 482 BLK_FBAS = FBA_NUM(CACHE_BLOCK_SIZE);
483 483 BLK_FBA_BITS = _fba_bits[BLK_FBAS];
484 484
485 485 sdbc_allocb_pageio1 = 0;
486 486 sdbc_allocb_pageio2 = 0;
487 487 sdbc_allocb_hit = 0;
488 488 sdbc_allocb_inuse = 0;
489 489 sdbc_allocb_lost = 0;
490 490 sdbc_centry_inuse = 0;
491 491 sdbc_centry_lost = 0;
492 492 sdbc_centry_hit = 0;
493 493 sdbc_centry_deallocd = 0;
494 494 sdbc_dmchain_not_avail = 0;
495 495 sdbc_allocb_deallocd = 0;
496 496
497 497 sdbc_prefetch_valid_cnt = 0;
498 498 sdbc_prefetch_busy_cnt = 0;
499 499 sdbc_prefetch_trailing = 0;
500 500 sdbc_prefetch_deallocd = 0;
501 501 sdbc_prefetch_pageio1 = 0;
502 502 sdbc_prefetch_pageio2 = 0;
503 503 sdbc_prefetch_hit = 0;
504 504 sdbc_prefetch_lost = 0;
505 505
506 506 sdbc_check_cot = 0;
507 507 sdbc_prefetch1 = 1;
508 508 sdbc_ra_hash = 0;
509 509 sdbc_ra_none = 0;
510 510
511 511 return (0);
512 512 }
513 513
514 514 /*
515 515 * _sdbc_cache_deconfigure - cache is being deconfigured. Release any
516 516 * memory that we acquired during the configuration process and return
517 517 * to the unconfigured state.
518 518 *
519 519 * NOTE: all users of the cache should be inactive at this point,
520 520 * i.e. we are unregistered from sd and all cache daemons/threads are
521 521 * gone.
522 522 *
523 523 */
524 524 void
525 525 _sdbc_cache_deconfigure(void)
526 526 {
527 527 /* CCIO shutdown must happen before memory is free'd */
528 528
529 529 if (_sd_cache_files) {
530 530 kmem_free(_sd_cache_files,
531 531 sdbc_max_devs * sizeof (_sd_cd_info_t));
532 532 _sd_cache_files = (_sd_cd_info_t *)NULL;
533 533 }
534 534
535 535
536 536 BLK_FBA_BITS = 0;
537 537 BLK_FBAS = 0;
538 538 CACHE_BLOCK_SIZE = 0;
539 539 _sdbc_mem_deconfigure(CBLOCKS);
540 540 _sdbc_gl_centry_deconfigure();
541 541 _sdbc_gl_file_deconfigure();
542 542
543 543 if (sdbc_use_dmchain)
544 544 sdbc_dmqueues_deconfigure();
545 545 else
546 546 _sdbc_lruq_deconfigure();
547 547 _sdbc_stats_deconfigure();
548 548
549 549 CBLOCKS = 0;
550 550 }
551 551
552 552
553 553 /*
554 554 * _sdbc_stats_deconfigure - cache is being deconfigured turn off
555 555 * stats. This could seemingly do more but we leave most of the
556 556 * data intact until cache is configured again.
557 557 *
558 558 */
559 559 static void
560 560 _sdbc_stats_deconfigure(void)
561 561 {
562 562 int i;
563 563
564 564 #ifdef DEBUG
565 565 if (sdbc_dynmem_kstat_dm) {
566 566 kstat_delete(sdbc_dynmem_kstat_dm);
567 567 sdbc_dynmem_kstat_dm = NULL;
568 568 }
569 569 #endif
570 570
571 571 if (sdbc_global_stats_kstat) {
572 572 kstat_delete(sdbc_global_stats_kstat);
573 573 sdbc_global_stats_kstat = NULL;
574 574 }
575 575
576 576 if (sdbc_cd_kstats) {
577 577 for (i = 0; i < sdbc_max_devs; i++) {
578 578 if (sdbc_cd_kstats[i]) {
579 579 kstat_delete(sdbc_cd_kstats[i]);
580 580 sdbc_cd_kstats[i] = NULL;
581 581 }
582 582 }
583 583 kmem_free(sdbc_cd_kstats, sizeof (kstat_t *) * sdbc_max_devs);
584 584 sdbc_cd_kstats = NULL;
585 585 }
586 586
587 587 if (sdbc_global_io_kstat) {
588 588 kstat_delete(sdbc_global_io_kstat);
589 589 mutex_destroy(&sdbc_global_io_kstat_mutex);
590 590 sdbc_global_io_kstat = NULL;
591 591 }
592 592
593 593 if (sdbc_cd_io_kstats) {
594 594 for (i = 0; i < sdbc_max_devs; i++) {
595 595 if (sdbc_cd_io_kstats[i]) {
596 596 kstat_delete(sdbc_cd_io_kstats[i]);
597 597 sdbc_cd_io_kstats[i] = NULL;
598 598 }
599 599 }
600 600 kmem_free(sdbc_cd_io_kstats, sizeof (kstat_t *) *
601 601 sdbc_max_devs);
602 602 sdbc_cd_io_kstats = NULL;
603 603 }
604 604
605 605 if (sdbc_cd_io_kstats_mutexes) {
606 606 /* mutexes are already destroyed in cd_kstat_remove() */
607 607 kmem_free(sdbc_cd_io_kstats_mutexes,
608 608 sizeof (kmutex_t) * sdbc_max_devs);
609 609 sdbc_cd_io_kstats_mutexes = NULL;
610 610 }
611 611
612 612
613 613 if (_sd_cache_stats) {
614 614 kmem_free(_sd_cache_stats,
615 615 sizeof (_sd_stats_t) +
616 616 (sdbc_max_devs - 1) * sizeof (_sd_shared_t));
617 617 _sd_cache_stats = NULL;
618 618 }
619 619 #ifdef _MULTI_DATAMODEL
620 620 if (_sd_cache_stats32) {
621 621 kmem_free(_sd_cache_stats32, sizeof (_sd_stats32_t) +
622 622 (sdbc_max_devs - 1) * sizeof (_sd_shared_t));
623 623 _sd_cache_stats32 = NULL;
624 624 }
625 625 #endif
626 626 }
627 627
628 628 static int
629 629 _sdbc_stats_configure(int cblocks)
630 630 {
631 631
632 632 _sd_cache_stats = kmem_zalloc(sizeof (_sd_stats_t) +
633 633 (sdbc_max_devs - 1) * sizeof (_sd_shared_t), KM_SLEEP);
634 634 _sd_cache_stats->st_blksize = (int)BLK_SIZE(1);
635 635 _sd_cache_stats->st_cachesize = cblocks * BLK_SIZE(1);
636 636 _sd_cache_stats->st_numblocks = cblocks;
637 637 _sd_cache_stats->st_wrcancelns = 0;
638 638 _sd_cache_stats->st_destaged = 0;
639 639 #ifdef _MULTI_DATAMODEL
640 640 _sd_cache_stats32 = kmem_zalloc(sizeof (_sd_stats32_t) +
641 641 (sdbc_max_devs - 1) * sizeof (_sd_shared_t), KM_SLEEP);
642 642 #endif
643 643
644 644 /* kstat implementation - global stats */
645 645 sdbc_global_stats_kstat = kstat_create(SDBC_KSTAT_MODULE, 0,
646 646 SDBC_KSTAT_GSTATS, SDBC_KSTAT_CLASS, KSTAT_TYPE_NAMED,
647 647 sizeof (sdbc_global_stats)/sizeof (kstat_named_t),
648 648 KSTAT_FLAG_VIRTUAL|KSTAT_FLAG_WRITABLE);
649 649
650 650 if (sdbc_global_stats_kstat != NULL) {
651 651 sdbc_global_stats_kstat->ks_data = &sdbc_global_stats;
652 652 sdbc_global_stats_kstat->ks_update = sdbc_global_stats_update;
653 653 sdbc_global_stats_kstat->ks_private = _sd_cache_stats;
654 654 kstat_install(sdbc_global_stats_kstat);
655 655 } else {
656 656 cmn_err(CE_WARN, "!sdbc: gstats kstat failed");
657 657 }
658 658
659 659 /* global I/O kstats */
660 660 sdbc_global_io_kstat = kstat_create(SDBC_KSTAT_MODULE, 0,
661 661 SDBC_IOKSTAT_GSTATS, "disk", KSTAT_TYPE_IO, 1, 0);
662 662
663 663 if (sdbc_global_io_kstat) {
664 664 mutex_init(&sdbc_global_io_kstat_mutex, NULL, MUTEX_DRIVER,
665 665 NULL);
666 666 sdbc_global_io_kstat->ks_lock =
667 667 &sdbc_global_io_kstat_mutex;
668 668 kstat_install(sdbc_global_io_kstat);
669 669 }
670 670
671 671 /*
672 672 * kstat implementation - cd stats
673 673 * NOTE: one kstat instance for each open cache descriptor
674 674 */
675 675 sdbc_cd_kstats = kmem_zalloc(sizeof (kstat_t *) * sdbc_max_devs,
676 676 KM_SLEEP);
677 677
678 678 /*
679 679 * kstat implementation - i/o kstats per cache descriptor
680 680 * NOTE: one I/O kstat instance for each cd
681 681 */
682 682 sdbc_cd_io_kstats = kmem_zalloc(sizeof (kstat_t *) * sdbc_max_devs,
683 683 KM_SLEEP);
684 684
685 685 sdbc_cd_io_kstats_mutexes = kmem_zalloc(sizeof (kmutex_t) *
686 686 sdbc_max_devs, KM_SLEEP);
687 687
688 688 #ifdef DEBUG
689 689 /* kstat implementation - dynamic memory stats */
690 690 sdbc_dynmem_kstat_dm = kstat_create(SDBC_KSTAT_MODULE, 0,
691 691 SDBC_KSTAT_DYNMEM, SDBC_KSTAT_CLASS, KSTAT_TYPE_NAMED,
692 692 sizeof (sdbc_dynmem_dm)/sizeof (kstat_named_t),
693 693 KSTAT_FLAG_VIRTUAL|KSTAT_FLAG_WRITABLE);
694 694
695 695 if (sdbc_dynmem_kstat_dm != NULL) {
696 696 sdbc_dynmem_kstat_dm->ks_data = &sdbc_dynmem_dm;
697 697 sdbc_dynmem_kstat_dm->ks_update = sdbc_dynmem_kstat_update_dm;
698 698 sdbc_dynmem_kstat_dm->ks_private = &dynmem_processing_dm;
699 699 kstat_install(sdbc_dynmem_kstat_dm);
700 700 } else {
701 701 cmn_err(CE_WARN, "!sdbc: dynmem kstat failed");
702 702 }
703 703 #endif
704 704
705 705 return (0);
706 706 }
707 707
708 708 /*
709 709 * sdbc_dmqueues_configure()
710 710 * initialize the queues of dynamic memory chains.
711 711 */
712 712
713 713 _sd_queue_t *sdbc_dm_queues;
714 714 static int max_dm_queues;
715 715
716 716
717 717 static int
718 718 sdbc_dmqueues_configure()
719 719 {
720 720 int i;
721 721
722 722 /*
723 723 * CAUTION! this code depends on max_dyn_list not changing
724 724 * if it does change behavior may be incorrect, as cc_alloc_size_dm
725 725 * depends on max_dyn_list and indexes to dmqueues are derived from
726 726 * cc_alloc_size_dm.
727 727 * see _sd_setup_category_on_type() and _sd_dealloc_dm()
728 728 * TODO: prevent max_dyn_list from on-the-fly modification (easy) or
729 729 * allow for on-the-fly changes to number of dm queues (hard).
730 730 */
731 731 max_dm_queues = dynmem_processing_dm.max_dyn_list;
732 732
733 733 ++max_dm_queues; /* need a "0" queue for centrys with no memory */
734 734
735 735 sdbc_dm_queues = (_sd_queue_t *)
736 736 kmem_zalloc(max_dm_queues * sizeof (_sd_queue_t), KM_SLEEP);
737 737
738 738 #ifdef DEBUG
739 739 dmchainpull_table = (int *)kmem_zalloc(max_dm_queues *
740 740 max_dm_queues * sizeof (int), KM_SLEEP);
741 741 #endif
742 742
743 743 for (i = 0; i < max_dm_queues; ++i) {
744 744 (void) _sdbc_lruq_configure(&sdbc_dm_queues[i]);
745 745 sdbc_dm_queues[i].sq_dmchain_cblocks = i;
746 746 }
747 747
748 748 return (0);
749 749 }
750 750
751 751 static void
752 752 sdbc_dmqueues_deconfigure()
753 753 {
754 754 /* CAUTION! this code depends on max_dyn_list not changing */
755 755
756 756 if (sdbc_dm_queues)
757 757 kmem_free(sdbc_dm_queues, max_dm_queues * sizeof (_sd_queue_t));
758 758 sdbc_dm_queues = NULL;
759 759 max_dm_queues = 0;
760 760 }
761 761
762 762 #define GOOD_LRUSIZE(q) ((q->sq_inq >= 0) || (q->sq_inq <= CBLOCKS))
763 763
764 764 /*
765 765 * _sdbc_lruq_configure - initialize the lru queue
766 766 *
767 767 * ARGUMENTS: NONE
768 768 * RETURNS: 0
769 769 *
770 770 */
771 771
772 772 static int
773 773 _sdbc_lruq_configure(_sd_queue_t *_sd_lru)
774 774 {
775 775
776 776 _sd_lru->sq_inq = 0;
777 777
778 778 mutex_init(&_sd_lru->sq_qlock, NULL, MUTEX_DRIVER, NULL);
779 779
780 780 _sd_lru->sq_qhead.cc_next = _sd_lru->sq_qhead.cc_prev
781 781 = &(_sd_lru->sq_qhead);
782 782 return (0);
783 783 }
784 784
785 785 /*
786 786 * _sdbc_lruq_deconfigure - deconfigure the lru queue
787 787 *
788 788 * ARGUMENTS: NONE
789 789 *
790 790 */
791 791
792 792 static void
793 793 _sdbc_lruq_deconfigure(void)
794 794 {
795 795 _sd_queue_t *_sd_lru;
796 796
797 797 _sd_lru = _SD_LRU_Q;
798 798
799 799 mutex_destroy(&_sd_lru->sq_qlock);
800 800 bzero(_sd_lru, sizeof (_sd_queue_t));
801 801
802 802 }
803 803
804 804 /*
805 805 * _sdbc_mem_configure - initialize the cache memory.
806 806 * Create and initialize the hash table.
807 807 * Create cache control blocks and fill them with relevent
808 808 * information and enqueue onto the lru queue.
809 809 * Initialize the Write control blocks (blocks that contain
810 810 * information as to where the data will be mirrored)
811 811 * Initialize the Fault tolerant blocks (blocks that contain
812 812 * information about the mirror nodes dirty writes)
813 813 *
814 814 * ARGUMENTS:
815 815 * cblocks - Number of cache blocks.
816 816 * RETURNS: 0
817 817 *
818 818 */
819 819 static int
820 820 _sdbc_mem_configure(int cblocks, spcs_s_info_t kstatus)
821 821 {
822 822 int num_blks, i, blk;
823 823 _sd_cctl_t *centry;
824 824 _sd_net_t *netc;
825 825 _sd_cctl_t *prev_entry_dm, *first_entry_dm;
826 826
827 827 if ((_sd_htable = _sdbc_hash_configure(cblocks)) == NULL) {
828 828 spcs_s_add(kstatus, SDBC_ENOHASH);
829 829 return (-1);
830 830 }
831 831
832 832 _sd_cctl_groupsz = (cblocks / _SD_CCTL_GROUPS) +
833 833 ((cblocks % _SD_CCTL_GROUPS) != 0);
834 834
835 835 for (i = 0; i < _SD_CCTL_GROUPS; i++) {
836 836 _sd_cctl[i] = (_sd_cctl_t *)
837 837 nsc_kmem_zalloc(_sd_cctl_groupsz * sizeof (_sd_cctl_t),
838 838 KM_SLEEP, sdbc_cache_mem);
839 839
840 840 if (_sd_cctl[i] == NULL) {
841 841 spcs_s_add(kstatus, SDBC_ENOCB);
842 842 return (-1);
843 843 }
844 844 }
845 845
846 846 _sd_ccent_sync = (_sd_cctl_sync_t *)
847 847 nsc_kmem_zalloc(_sd_ccsync_cnt * sizeof (_sd_cctl_sync_t),
848 848 KM_SLEEP, sdbc_local_mem);
849 849
850 850 if (_sd_ccent_sync == NULL) {
851 851 spcs_s_add(kstatus, SDBC_ENOCCTL);
852 852 return (-1);
853 853 }
854 854
855 855 for (i = 0; i < _sd_ccsync_cnt; i++) {
856 856 mutex_init(&_sd_ccent_sync[i]._cc_lock, NULL, MUTEX_DRIVER,
857 857 NULL);
858 858 cv_init(&_sd_ccent_sync[i]._cc_blkcv, NULL, CV_DRIVER, NULL);
859 859 }
860 860
861 861 blk = 0;
862 862
863 863 netc = &_sd_net_config;
864 864
865 865 num_blks = (netc->sn_cpages * (int)netc->sn_psize)/BLK_SIZE(1);
866 866
867 867 prev_entry_dm = 0;
868 868 first_entry_dm = 0;
869 869 for (i = 0; i < num_blks; i++, blk++) {
870 870 centry = _sd_cctl[(blk/_sd_cctl_groupsz)] +
871 871 (blk%_sd_cctl_groupsz);
872 872 centry->cc_sync = &_sd_ccent_sync[blk % _sd_ccsync_cnt];
873 873 centry->cc_next = centry->cc_prev = NULL;
874 874 centry->cc_dirty_next = centry->cc_dirty_link = NULL;
875 875 centry->cc_await_use = centry->cc_await_page = 0;
876 876 centry->cc_inuse = centry->cc_pageio = 0;
877 877 centry->cc_flag = 0;
878 878 centry->cc_iocount = 0;
879 879 centry->cc_valid = 0;
880 880
881 881 if (!first_entry_dm)
882 882 first_entry_dm = centry;
883 883 if (prev_entry_dm)
884 884 prev_entry_dm->cc_link_list_dm = centry;
885 885 prev_entry_dm = centry;
886 886 centry->cc_link_list_dm = first_entry_dm;
887 887 centry->cc_data = 0;
888 888 centry->cc_write = NULL;
889 889 centry->cc_dirty = 0;
890 890
891 891 {
892 892 _sd_queue_t *q;
893 893 if (sdbc_use_dmchain) {
894 894 q = &sdbc_dm_queues[0];
895 895 centry->cc_cblocks = 0;
896 896 } else
897 897 q = _SD_LRU_Q;
898 898
899 899 _sd_ins_queue(q, centry);
900 900 }
901 901
902 902 }
903 903
904 904 if (_sdbc_gl_centry_configure(kstatus) != 0)
905 905 return (-1);
906 906
907 907 if (_sdbc_gl_file_configure(kstatus) != 0)
908 908 return (-1);
909 909
910 910 return (0);
911 911 }
912 912
913 913 /*
914 914 * _sdbc_gl_file_configure()
915 915 * allocate and initialize space for the global filename data.
916 916 *
917 917 */
918 918 static int
919 919 _sdbc_gl_file_configure(spcs_s_info_t kstatus)
920 920 {
921 921 ss_voldata_t *fileinfo;
922 922 ss_voldata_t tempfinfo;
923 923 ss_vdir_t vdir;
924 924 ss_vdirkey_t key;
925 925 int err = 0;
926 926
927 927 _sdbc_gl_file_info_size = safestore_config.ssc_maxfiles *
928 928 sizeof (ss_voldata_t);
929 929
930 930 if ((_sdbc_gl_file_info = kmem_zalloc(_sdbc_gl_file_info_size,
931 931 KM_NOSLEEP)) == NULL) {
932 932 spcs_s_add(kstatus, SDBC_ENOSFNV);
933 933 return (-1);
934 934 }
935 935
936 936 /* setup the key to get a directory stream of all volumes */
937 937 key.vk_type = CDIR_ALL;
938 938
939 939 fileinfo = _sdbc_gl_file_info;
940 940
941 941 /*
942 942 * if coming up after a crash, "refresh" the host
943 943 * memory copy from safestore.
944 944 */
945 945 if (_sdbc_warm_start()) {
946 946
947 947 if (SSOP_GETVDIR(sdbc_safestore, &key, &vdir)) {
948 948 cmn_err(CE_WARN, "!sdbc(_sdbc_gl_file_configure): "
949 949 "cannot read safestore");
950 950 return (-1);
951 951 }
952 952
953 953
954 954 /*
955 955 * cycle through the vdir getting volume data
956 956 * and volume tokens
957 957 */
958 958
959 959 while ((err = SSOP_GETVDIRENT(sdbc_safestore, &vdir, fileinfo))
960 960 == SS_OK) {
961 961 ++fileinfo;
962 962 }
963 963
964 964 if (err != SS_EOF) {
965 965 /*
966 966 * fail to configure since
967 967 * recovery is not possible.
968 968 */
969 969 spcs_s_add(kstatus, SDBC_ENOREFRESH);
970 970 return (-1);
971 971 }
972 972
973 973 } else { /* normal initialization, not a warm start */
974 974
975 975 /*
976 976 * if this fails, continue: cache will start
977 977 * in writethru mode
978 978 */
979 979
980 980 if (SSOP_GETVDIR(sdbc_safestore, &key, &vdir)) {
981 981 cmn_err(CE_WARN, "!sdbc(_sdbc_gl_file_configure): "
982 982 "cannot read safestore");
983 983 return (-1);
984 984 }
985 985
986 986 /*
987 987 * cycle through the vdir getting just the volume tokens
988 988 * and initializing volume entries
989 989 */
990 990
991 991 while ((err = SSOP_GETVDIRENT(sdbc_safestore, &vdir,
992 992 &tempfinfo)) == 0) {
993 993 /*
994 994 * initialize the host memory copy of the
995 995 * global file region. this means setting the
996 996 * _pinned and _attached fields to _SD_NO_HOST
997 997 * because the default of zero conflicts with
998 998 * the min nodeid of zero.
999 999 */
1000 1000 fileinfo->sv_vol = tempfinfo.sv_vol;
1001 1001 fileinfo->sv_pinned = _SD_NO_HOST;
1002 1002 fileinfo->sv_attached = _SD_NO_HOST;
1003 1003 fileinfo->sv_cd = _SD_NO_CD;
1004 1004
1005 1005 /* initialize the directory entry */
1006 1006 if ((err = SSOP_SETVOL(sdbc_safestore, fileinfo))
1007 1007 == SS_ERR) {
1008 1008 cmn_err(CE_WARN,
1009 1009 "!sdbc(_sdbc_gl_file_configure): "
1010 1010 "volume entry write failure %p",
1011 1011 (void *)fileinfo->sv_vol);
1012 1012 break;
1013 1013 }
1014 1014
1015 1015 ++fileinfo;
1016 1016 }
1017 1017
1018 1018 /* coming up clean, continue in w-t mode */
1019 1019 if (err != SS_EOF)
1020 1020 cmn_err(CE_WARN, "!sdbc(_sdbc_gl_file_configure) "
1021 1021 "unable to init safe store volinfo");
1022 1022 }
1023 1023
1024 1024 return (0);
1025 1025 }
1026 1026
1027 1027 static void
1028 1028 _sdbc_gl_centry_deconfigure(void)
1029 1029 {
1030 1030 if (_sdbc_gl_centry_info)
1031 1031 kmem_free(_sdbc_gl_centry_info, _sdbc_gl_centry_info_size);
1032 1032
1033 1033 _sdbc_gl_centry_info = NULL;
1034 1034 _sdbc_gl_centry_info_size = 0;
1035 1035 }
1036 1036
1037 1037 static int
1038 1038 _sdbc_gl_centry_configure(spcs_s_info_t kstatus)
1039 1039 {
1040 1040
1041 1041 int wblocks;
1042 1042 ss_centry_info_t *cinfo;
1043 1043 ss_cdirkey_t key;
1044 1044 ss_cdir_t cdir;
1045 1045 int err = 0;
1046 1046
1047 1047
1048 1048 wblocks = safestore_config.ssc_wsize / BLK_SIZE(1);
1049 1049 _sdbc_gl_centry_info_size = sizeof (ss_centry_info_t) * wblocks;
1050 1050
1051 1051 if ((_sdbc_gl_centry_info = kmem_zalloc(_sdbc_gl_centry_info_size,
1052 1052 KM_NOSLEEP)) == NULL) {
1053 1053 cmn_err(CE_WARN, "!sdbc(_sdbc_gl_centry_configure) "
1054 1054 "alloc failed for gl_centry_info region");
1055 1055
1056 1056 _sdbc_gl_centry_deconfigure();
1057 1057 return (-1);
1058 1058 }
1059 1059
1060 1060 /*
1061 1061 * synchronize the centry info area with safe store
1062 1062 */
1063 1063
1064 1064 /* setup the key to get a directory stream of all centrys */
1065 1065 key.ck_type = CDIR_ALL;
1066 1066
1067 1067 cinfo = _sdbc_gl_centry_info;
1068 1068
1069 1069 if (_sdbc_warm_start()) {
1070 1070
1071 1071 if (SSOP_GETCDIR(sdbc_safestore, &key, &cdir)) {
1072 1072 cmn_err(CE_WARN, "!sdbc(_sdbc_gl_centry_configure): "
1073 1073 "cannot read safestore");
1074 1074 return (-1);
1075 1075 }
1076 1076
1077 1077
1078 1078 /*
1079 1079 * cycle through the cdir getting resource
1080 1080 * tokens and reading centrys
1081 1081 */
1082 1082
1083 1083 while ((err = SSOP_GETCDIRENT(sdbc_safestore, &cdir, cinfo))
1084 1084 == 0) {
1085 1085 ++cinfo;
1086 1086 }
1087 1087
1088 1088 if (err != SS_EOF) {
1089 1089 /*
1090 1090 * fail to configure since
1091 1091 * recovery is not possible.
1092 1092 */
1093 1093 _sdbc_gl_centry_deconfigure();
1094 1094 spcs_s_add(kstatus, SDBC_EGLDMAFAIL);
1095 1095 return (-1);
1096 1096 }
1097 1097
1098 1098 } else {
1099 1099
1100 1100 if (SSOP_GETCDIR(sdbc_safestore, &key, &cdir)) {
1101 1101 cmn_err(CE_WARN, "!sdbc(_sdbc_gl_centry_configure): "
1102 1102 "cannot read safestore");
1103 1103 return (-1);
1104 1104 }
1105 1105
1106 1106 /*
1107 1107 * cycle through the cdir getting resource
1108 1108 * tokens and initializing centrys
1109 1109 */
1110 1110
1111 1111 while ((err = SSOP_GETCDIRENT(sdbc_safestore, &cdir, cinfo))
1112 1112 == 0) {
1113 1113 cinfo->sc_cd = -1;
1114 1114 cinfo->sc_fpos = -1;
1115 1115
1116 1116 if ((err = SSOP_SETCENTRY(sdbc_safestore, cinfo))
1117 1117 == SS_ERR) {
1118 1118 cmn_err(CE_WARN,
1119 1119 "!sdbc(_sdbc_gl_centry_configure): "
1120 1120 "cache entry write failure %p",
1121 1121 (void *)cinfo->sc_res);
1122 1122 break;
1123 1123 }
1124 1124
1125 1125 ++cinfo;
1126 1126 }
1127 1127
1128 1128 /* coming up clean, continue in w-t mode */
1129 1129 if (err != SS_EOF) {
1130 1130 cmn_err(CE_WARN, "!sdbc(sdbc_gl_centry_configure) "
1131 1131 "_sdbc_gl_centry_info initialization failed");
1132 1132 }
1133 1133 }
1134 1134
1135 1135 return (0);
1136 1136 }
1137 1137
1138 1138
1139 1139 static void
1140 1140 _sdbc_gl_file_deconfigure(void)
1141 1141 {
1142 1142
1143 1143 if (_sdbc_gl_file_info)
1144 1144 kmem_free(_sdbc_gl_file_info, _sdbc_gl_file_info_size);
1145 1145
1146 1146 _sdbc_gl_file_info = NULL;
1147 1147
1148 1148 _sdbc_gl_file_info_size = 0;
1149 1149 }
1150 1150
1151 1151
1152 1152 /*
1153 1153 * _sdbc_mem_deconfigure - deconfigure the cache memory.
1154 1154 * Release any memory/locks/sv's acquired during _sdbc_mem_configure.
1155 1155 *
1156 1156 * ARGUMENTS:
1157 1157 * cblocks - Number of cache blocks.
1158 1158 *
1159 1159 */
1160 1160 /* ARGSUSED */
1161 1161 static void
1162 1162 _sdbc_mem_deconfigure(int cblocks)
1163 1163 {
1164 1164 int i;
1165 1165
1166 1166 if (_sd_ccent_sync) {
1167 1167 for (i = 0; i < _sd_ccsync_cnt; i++) {
1168 1168 mutex_destroy(&_sd_ccent_sync[i]._cc_lock);
1169 1169 cv_destroy(&_sd_ccent_sync[i]._cc_blkcv);
1170 1170 }
1171 1171 nsc_kmem_free(_sd_ccent_sync,
1172 1172 _sd_ccsync_cnt * sizeof (_sd_cctl_sync_t));
1173 1173 }
1174 1174 _sd_ccent_sync = NULL;
1175 1175
1176 1176 for (i = 0; i < _SD_CCTL_GROUPS; i++) {
1177 1177 if (_sd_cctl[i] != NULL) {
1178 1178 nsc_kmem_free(_sd_cctl[i],
1179 1179 _sd_cctl_groupsz * sizeof (_sd_cctl_t));
1180 1180 _sd_cctl[i] = NULL;
1181 1181 }
1182 1182 }
1183 1183 _sd_cctl_groupsz = 0;
1184 1184
1185 1185 _sdbc_hash_deconfigure(_sd_htable);
1186 1186 _sd_htable = NULL;
1187 1187
1188 1188 }
1189 1189
1190 1190
1191 1191 #if defined(_SD_DEBUG) || defined(DEBUG)
1192 1192 static int
1193 1193 _sd_cctl_valid(_sd_cctl_t *addr)
1194 1194 {
1195 1195 _sd_cctl_t *end;
1196 1196 int i, valid;
1197 1197
1198 1198 valid = 0;
1199 1199 for (i = 0; i < _SD_CCTL_GROUPS; i++) {
1200 1200 end = _sd_cctl[i] + _sd_cctl_groupsz;
1201 1201 if (addr >= _sd_cctl[i] && addr < end) {
1202 1202 valid = 1;
1203 1203 break;
1204 1204 }
1205 1205 }
1206 1206
1207 1207 return (valid);
1208 1208 }
1209 1209 #endif
1210 1210
1211 1211
1212 1212 /*
1213 1213 * _sd_ins_queue - insert centry into LRU queue
1214 1214 * (during initialization, locking not required)
1215 1215 */
1216 1216 static void
1217 1217 _sd_ins_queue(_sd_queue_t *q, _sd_cctl_t *centry)
1218 1218 {
1219 1219 _sd_cctl_t *q_head;
1220 1220
1221 1221 ASSERT(_sd_cctl_valid(centry));
1222 1222
1223 1223 q_head = &q->sq_qhead;
1224 1224 centry->cc_prev = q_head;
1225 1225 centry->cc_next = q_head->cc_next;
1226 1226 q_head->cc_next->cc_prev = centry;
1227 1227 q_head->cc_next = centry;
1228 1228 q->sq_inq++;
1229 1229
1230 1230 ASSERT(GOOD_LRUSIZE(q));
1231 1231 }
1232 1232
1233 1233
1234 1234
1235 1235 void
1236 1236 _sd_requeue(_sd_cctl_t *centry)
1237 1237 {
1238 1238 _sd_queue_t *q = _SD_LRU_Q;
1239 1239
1240 1240 /* was FAST */
1241 1241 mutex_enter(&q->sq_qlock);
1242 1242 #if defined(_SD_DEBUG)
1243 1243 if (1) {
1244 1244 _sd_cctl_t *cp, *cn, *qp;
1245 1245 cp = centry->cc_prev;
1246 1246 cn = centry->cc_next;
1247 1247 qp = (q->sq_qhead).cc_prev;
1248 1248 if (!_sd_cctl_valid(centry) ||
1249 1249 (cp != &(q->sq_qhead) && !_sd_cctl_valid(cp)) ||
1250 1250 (cn != &(q->sq_qhead) && !_sd_cctl_valid(cn)) ||
1251 1251 !_sd_cctl_valid(qp))
1252 1252 cmn_err(CE_PANIC,
1253 1253 "_sd_requeue %x prev %x next %x qp %x",
1254 1254 centry, cp, cn, qp);
1255 1255 }
1256 1256 #endif
1257 1257 centry->cc_prev->cc_next = centry->cc_next;
1258 1258 centry->cc_next->cc_prev = centry->cc_prev;
1259 1259 centry->cc_next = &(q->sq_qhead);
1260 1260 centry->cc_prev = q->sq_qhead.cc_prev;
1261 1261 q->sq_qhead.cc_prev->cc_next = centry;
1262 1262 q->sq_qhead.cc_prev = centry;
1263 1263 centry->cc_seq = q->sq_seq++;
1264 1264 /* was FAST */
1265 1265 mutex_exit(&q->sq_qlock);
1266 1266 (q->sq_req_stat)++;
1267 1267
1268 1268 }
1269 1269
1270 1270 void
1271 1271 _sd_requeue_head(_sd_cctl_t *centry)
1272 1272 {
1273 1273 _sd_queue_t *q = _SD_LRU_Q;
1274 1274
1275 1275 /* was FAST */
1276 1276 mutex_enter(&q->sq_qlock);
1277 1277 #if defined(_SD_DEBUG)
1278 1278 if (1) {
1279 1279 _sd_cctl_t *cp, *cn, *qn;
1280 1280 cp = centry->cc_prev;
1281 1281 cn = centry->cc_next;
1282 1282 qn = (q->sq_qhead).cc_prev;
1283 1283 if (!_sd_cctl_valid(centry) ||
1284 1284 (cp != &(q->sq_qhead) && !_sd_cctl_valid(cp)) ||
1285 1285 (cn != &(q->sq_qhead) && !_sd_cctl_valid(cn)) ||
1286 1286 !_sd_cctl_valid(qn))
1287 1287 cmn_err(CE_PANIC,
1288 1288 "_sd_requeue_head %x prev %x next %x qn %x",
1289 1289 centry, cp, cn, qn);
1290 1290 }
1291 1291 #endif
1292 1292 centry->cc_prev->cc_next = centry->cc_next;
1293 1293 centry->cc_next->cc_prev = centry->cc_prev;
1294 1294 centry->cc_prev = &(q->sq_qhead);
1295 1295 centry->cc_next = q->sq_qhead.cc_next;
1296 1296 q->sq_qhead.cc_next->cc_prev = centry;
1297 1297 q->sq_qhead.cc_next = centry;
1298 1298 centry->cc_seq = q->sq_seq++;
1299 1299 centry->cc_flag &= ~CC_QHEAD;
1300 1300 /* was FAST */
1301 1301 mutex_exit(&q->sq_qlock);
1302 1302 }
1303 1303
1304 1304
1305 1305
1306 1306 /*
1307 1307 * _sd_open - Open a file.
1308 1308 *
1309 1309 * ARGUMENTS:
1310 1310 * filename - Name of the file to be opened.
1311 1311 * flag - Flag associated with open.
1312 1312 * (currently used to determine a ckd device)
1313 1313 * RETURNS:
1314 1314 * cd - the cache descriptor.
1315 1315 */
1316 1316
1317 1317 int
1318 1318 _sd_open(char *filename, int flag)
1319 1319 {
1320 1320 int cd;
1321 1321
1322 1322 if (!_sd_cache_initialized) {
1323 1323 cmn_err(CE_WARN, "!sdbc(_sd_open) cache not initialized");
1324 1324 return (-EINVAL);
1325 1325 }
1326 1326 cd = _sd_open_cd(filename, -1, flag);
1327 1327 SDTRACE(SDF_OPEN, (cd < 0) ? SDT_INV_CD : cd, 0, SDT_INV_BL, 0, cd);
1328 1328
1329 1329 return (cd);
1330 1330 }
1331 1331
1332 1332
1333 1333 static int
1334 1334 _sd_open_io(char *filename, int flag, blind_t *cdp, nsc_iodev_t *iodev)
1335 1335 {
1336 1336 _sd_cd_info_t *cdi;
1337 1337 int cd;
1338 1338 int rc = 0;
1339 1339
1340 1340 if ((cd = _sd_open(filename, flag)) >= 0) {
1341 1341
1342 1342 cdi = &(_sd_cache_files[cd]);
1343 1343 cdi->cd_iodev = iodev;
1344 1344 nsc_set_owner(cdi->cd_rawfd, cdi->cd_iodev);
1345 1345
1346 1346 *cdp = (blind_t)(unsigned long)cd;
1347 1347 } else
1348 1348 rc = -cd;
1349 1349
1350 1350 return (rc);
1351 1351 }
1352 1352
1353 1353
1354 1354
1355 1355 int
1356 1356 _sd_open_cd(char *filename, const int cd, const int flag)
1357 1357 {
1358 1358 int new_cd, rc = 0, alloc_cd = -1;
1359 1359 ss_voldata_t *cdg;
1360 1360 int preexists = 0;
1361 1361 _sd_cd_info_t *cdi;
1362 1362 int failover_open, open_failed;
1363 1363 major_t devmaj;
1364 1364 minor_t devmin;
1365 1365
1366 1366 if (_sdbc_shutdown_in_progress)
1367 1367 return (-EIO);
1368 1368
1369 1369 if (strlen(filename) > (NSC_MAXPATH-1))
1370 1370 return (-ENAMETOOLONG);
1371 1371
1372 1372 /*
1373 1373 * If the cd is >= 0, then this is a open for a specific cd.
1374 1374 * This happens when the mirror node crashes, and we attempt to
1375 1375 * reopen the files with the same cache descriptors as existed on
1376 1376 * the other node
1377 1377 */
1378 1378
1379 1379 retry_open:
1380 1380 failover_open = 0;
1381 1381 open_failed = 0;
1382 1382 if (cd >= 0) {
1383 1383 failover_open++;
1384 1384 cdi = &(_sd_cache_files[cd]);
1385 1385 mutex_enter(&_sd_cache_lock);
1386 1386 if (cdi->cd_info == NULL)
1387 1387 cdi->cd_info = &_sd_cache_stats->st_shared[cd];
1388 1388 else if (cdi->cd_info->sh_alloc &&
1389 1389 strcmp(cdi->cd_info->sh_filename, filename)) {
1390 1390 cmn_err(CE_WARN, "!sdbc(_sd_open_cd) cd %d mismatch",
1391 1391 cd);
1392 1392 mutex_exit(&_sd_cache_lock);
1393 1393 return (-EEXIST);
1394 1394 }
1395 1395
1396 1396 if (cdi->cd_info->sh_failed != 2) {
1397 1397 if (cdi->cd_info->sh_alloc != 0)
1398 1398 preexists = 1;
1399 1399 else {
1400 1400 cdi->cd_info->sh_alloc = CD_ALLOC_IN_PROGRESS;
1401 1401 (void) strcpy(cdi->cd_info->sh_filename,
1402 1402 filename);
1403 1403 if (_sd_cache_stats->st_count < sdbc_max_devs)
1404 1404 _sd_cache_stats->st_count++;
1405 1405 }
1406 1406 }
1407 1407
1408 1408 mutex_exit(&_sd_cache_lock);
1409 1409 alloc_cd = cd;
1410 1410
1411 1411 goto known_cd;
1412 1412 }
1413 1413
1414 1414 new_cd = 0;
1415 1415 mutex_enter(&_sd_cache_lock);
1416 1416
1417 1417 for (cdi = &(_sd_cache_files[new_cd]),
1418 1418 cdg = _sdbc_gl_file_info + new_cd;
1419 1419 new_cd < (sdbc_max_devs); new_cd++, cdi++, cdg++) {
1420 1420 if (strlen(cdg->sv_volname) != 0)
1421 1421 if (strcmp(cdg->sv_volname, filename))
1422 1422 continue;
1423 1423
1424 1424 if (cdi->cd_info == NULL)
1425 1425 cdi->cd_info = &_sd_cache_stats->st_shared[new_cd];
1426 1426
1427 1427 if (cdi->cd_info->sh_failed != 2) {
1428 1428 if (cdi->cd_info->sh_alloc != 0)
1429 1429 preexists = 1;
1430 1430 else {
1431 1431 if (cd == -2) {
1432 1432 mutex_exit(&_sd_cache_lock);
1433 1433 return (-1);
1434 1434 }
1435 1435 cdi->cd_info->sh_alloc = CD_ALLOC_IN_PROGRESS;
1436 1436 (void) strcpy(cdi->cd_info->sh_filename,
1437 1437 filename);
1438 1438 (void) strcpy(cdg->sv_volname, filename);
1439 1439
1440 1440 cdg->sv_cd = new_cd;
1441 1441 /* update safestore */
1442 1442 SSOP_SETVOL(sdbc_safestore, cdg);
1443 1443 if (_sd_cache_stats->st_count < sdbc_max_devs)
1444 1444 _sd_cache_stats->st_count++;
1445 1445 cdi->cd_flag = 0;
1446 1446 }
1447 1447 }
1448 1448 alloc_cd = new_cd;
1449 1449 break;
1450 1450 }
1451 1451
1452 1452 mutex_exit(&_sd_cache_lock);
1453 1453
1454 1454 if (alloc_cd == -1)
1455 1455 return (-ENOSPC);
1456 1456
1457 1457 known_cd:
1458 1458 /*
1459 1459 * If preexists: someone else is attempting to open this file as
1460 1460 * well. Do only one open, but block everyone else here till the
1461 1461 * open is completed.
1462 1462 */
1463 1463 if (preexists) {
1464 1464 while (cdi->cd_info->sh_alloc == CD_ALLOC_IN_PROGRESS) {
1465 1465 delay(drv_usectohz(20000));
1466 1466 }
1467 1467 if ((cdi->cd_info->sh_alloc != CD_ALLOCATED))
1468 1468 goto retry_open;
1469 1469 return (alloc_cd);
1470 1470 }
1471 1471
1472 1472 if (!(cdi->cd_rawfd =
1473 1473 nsc_open(filename, NSC_SDBC_ID|NSC_DEVICE, _sdbc_fd_def,
1474 1474 (blind_t)(unsigned long)alloc_cd, &rc)) ||
1475 1475 !nsc_getval(cdi->cd_rawfd, "DevMaj", (int *)&devmaj) ||
1476 1476 !nsc_getval(cdi->cd_rawfd, "DevMin", (int *)&devmin)) {
1477 1477 if (cdi->cd_rawfd) {
1478 1478 (void) nsc_close(cdi->cd_rawfd);
1479 1479 cdi->cd_rawfd = NULL;
1480 1480 }
1481 1481 /*
1482 1482 * take into account that there may be pinned data on a
1483 1483 * device that can no longer be opened
1484 1484 */
1485 1485 open_failed++;
1486 1486 if (!(cdi->cd_info->sh_failed) && !failover_open) {
1487 1487 cdi->cd_info->sh_alloc = 0;
1488 1488 mutex_enter(&_sd_cache_lock);
1489 1489 _sd_cache_stats->st_count--;
1490 1490 mutex_exit(&_sd_cache_lock);
1491 1491 if (!rc)
1492 1492 rc = EIO;
1493 1493 return (-rc);
1494 1494 }
1495 1495 }
1496 1496
1497 1497 cdi->cd_strategy = nsc_get_strategy(devmaj);
1498 1498 cdi->cd_crdev = makedevice(devmaj, devmin);
1499 1499 cdi->cd_desc = alloc_cd;
1500 1500 cdi->cd_dirty_head = cdi->cd_dirty_tail = NULL;
1501 1501 cdi->cd_io_head = cdi->cd_io_tail = NULL;
1502 1502 cdi->cd_hint = 0;
1503 1503 #ifdef DEBUG
1504 1504 /* put the dev_t in the ioerr_inject_table */
1505 1505 _sdbc_ioj_set_dev(alloc_cd, cdi->cd_crdev);
1506 1506 #endif
1507 1507
1508 1508 cdi->cd_global = (_sdbc_gl_file_info + alloc_cd);
1509 1509 if (open_failed) {
1510 1510 cdi->cd_info->sh_failed = 2;
1511 1511 } else if (cdi->cd_info->sh_failed != 2)
1512 1512 if ((cdi->cd_global->sv_pinned == _SD_SELF_HOST) &&
1513 1513 !failover_open)
1514 1514 cdi->cd_info->sh_failed = 1;
1515 1515 else
1516 1516 cdi->cd_info->sh_failed = 0;
1517 1517
1518 1518 cdi->cd_flag |= flag;
1519 1519 mutex_init(&cdi->cd_lock, NULL, MUTEX_DRIVER, NULL);
1520 1520
1521 1521 #ifndef _SD_NOTRACE
1522 1522 (void) _sdbc_tr_configure(alloc_cd);
1523 1523 #endif
1524 1524 cdi->cd_info->sh_alloc = CD_ALLOCATED;
1525 1525 cdi->cd_global = (_sdbc_gl_file_info + alloc_cd);
1526 1526 cdi->cd_info->sh_cd = (unsigned short) alloc_cd;
1527 1527 mutex_enter(&_sd_cache_lock);
1528 1528 _sd_cache_stats->st_loc_count++;
1529 1529 mutex_exit(&_sd_cache_lock);
1530 1530
1531 1531 if (cd_kstat_add(alloc_cd) < 0) {
1532 1532 cmn_err(CE_WARN, "!Could not create kstats for cache descriptor"
1533 1533 " %d", alloc_cd);
1534 1534 }
1535 1535
1536 1536
1537 1537 return (open_failed ? -EIO : alloc_cd);
1538 1538 }
1539 1539
1540 1540
1541 1541 /*
1542 1542 * _sd_close - Close a cache descriptor.
1543 1543 *
1544 1544 * ARGUMENTS:
1545 1545 * cd - the cache descriptor to be closed.
1546 1546 * RETURNS:
1547 1547 * 0 on success.
1548 1548 * Error otherwise.
1549 1549 *
1550 1550 * Note: Under Construction.
1551 1551 */
1552 1552
1553 1553 int
1554 1554 _sd_close(int cd)
1555 1555 {
1556 1556 int rc;
1557 1557 _sd_cd_info_t *cdi = &(_sd_cache_files[cd]);
1558 1558
1559 1559 if (!FILE_OPENED(cd)) {
1560 1560 rc = EINVAL;
1561 1561 goto out;
1562 1562 }
1563 1563
1564 1564 SDTRACE(ST_ENTER|SDF_CLOSE, cd, 0, SDT_INV_BL, 0, 0);
1565 1565
1566 1566 mutex_enter(&_sd_cache_lock);
1567 1567 if ((cdi->cd_info->sh_alloc == 0) ||
1568 1568 (cdi->cd_info->sh_alloc & CD_CLOSE_IN_PROGRESS)) {
1569 1569 mutex_exit(&_sd_cache_lock);
1570 1570 SDTRACE(ST_EXIT|SDF_CLOSE, cd, 0, SDT_INV_BL, 0, EINVAL);
1571 1571 rc = EINVAL;
1572 1572 goto out;
1573 1573 }
1574 1574 cdi->cd_info->sh_alloc |= CD_CLOSE_IN_PROGRESS;
1575 1575 mutex_exit(&_sd_cache_lock);
1576 1576
1577 1577 /*
1578 1578 * _sd_flush_cd() will return -1 for the case where pinned
1579 1579 * data is present, but has been transfered to the mirror
1580 1580 * node. In this case it is safe to close the device as
1581 1581 * though _sd_flush_cd() had returned 0.
1582 1582 */
1583 1583
1584 1584 rc = _sd_flush_cd(cd);
1585 1585 if (rc == -1)
1586 1586 rc = 0;
1587 1587
1588 1588 if (rc != 0) {
1589 1589 mutex_enter(&_sd_cache_lock);
1590 1590 if ((rc == EAGAIN) &&
1591 1591 (cdi->cd_global->sv_pinned == _SD_NO_HOST)) {
1592 1592 cdi->cd_global->sv_pinned = _SD_SELF_HOST;
1593 1593 SSOP_SETVOL(sdbc_safestore, cdi->cd_global);
1594 1594 }
1595 1595
1596 1596 cdi->cd_info->sh_alloc &= ~CD_CLOSE_IN_PROGRESS;
1597 1597 mutex_exit(&_sd_cache_lock);
1598 1598 SDTRACE(ST_EXIT|SDF_CLOSE, cd, 0, SDT_INV_BL,
1599 1599 _SD_CD_WBLK_USED(cd), rc);
1600 1600 goto out;
1601 1601 }
1602 1602
1603 1603 rc = nsc_close(cdi->cd_rawfd);
1604 1604 if (rc) {
1605 1605 mutex_enter(&_sd_cache_lock);
1606 1606 cdi->cd_info->sh_alloc &= ~CD_CLOSE_IN_PROGRESS;
1607 1607 mutex_exit(&_sd_cache_lock);
1608 1608 SDTRACE(ST_EXIT|SDF_CLOSE, cd, 0, SDT_INV_BL, 0, rc);
1609 1609 goto out;
1610 1610 }
1611 1611 mutex_enter(&_sd_cache_lock);
1612 1612 _sd_cache_stats->st_loc_count--;
1613 1613 mutex_exit(&_sd_cache_lock);
1614 1614
1615 1615 if (cd_kstat_remove(cd) < 0) {
1616 1616 cmn_err(CE_WARN, "!Could not remove kstat for cache descriptor "
1617 1617 "%d", cd);
1618 1618 }
1619 1619
1620 1620 cdi->cd_info->sh_alloc = 0;
1621 1621 cdi->cd_info->sh_failed = 0;
1622 1622 /* cdi->cd_info = NULL; */
1623 1623 cdi->cd_flag = 0;
1624 1624 SDTRACE(ST_EXIT|SDF_CLOSE, cd, 0, SDT_INV_BL, 0, NSC_DONE);
1625 1625 rc = NSC_DONE;
1626 1626 goto out;
1627 1627
1628 1628 out:
1629 1629 return (rc);
1630 1630 }
1631 1631
1632 1632
1633 1633 static int
1634 1634 _sd_close_io(blind_t xcd)
1635 1635 {
1636 1636 _sd_cd_info_t *cdi;
1637 1637 int cd = (int)(unsigned long)xcd;
1638 1638 int rc = 0;
1639 1639
1640 1640 if ((rc = _sd_close((int)cd)) == NSC_DONE) {
1641 1641 cdi = &(_sd_cache_files[cd]);
1642 1642 cdi->cd_iodev = NULL;
1643 1643 }
1644 1644
1645 1645 return (rc);
1646 1646 }
1647 1647
1648 1648
1649 1649 /*
1650 1650 * _sdbc_remote_store_pinned - reflect pinned/failed blocks for cd
1651 1651 * to our remote mirror. Returns count of blocks reflected or -1 on error.
1652 1652 *
1653 1653 */
1654 1654 int
1655 1655 _sdbc_remote_store_pinned(int cd)
1656 1656 {
1657 1657 int cnt = 0;
1658 1658 _sd_cd_info_t *cdi = &(_sd_cache_files[cd]);
1659 1659 _sd_cctl_t *cc_ent, *cc_list;
1660 1660
1661 1661 ASSERT(cd >= 0);
1662 1662 if (cdi->cd_info->sh_failed) {
1663 1663
1664 1664 if (cdi->cd_global->sv_pinned == _SD_NO_HOST) {
1665 1665 cdi->cd_global->sv_pinned = _SD_SELF_HOST;
1666 1666 SSOP_SETVOL(sdbc_safestore, cdi->cd_global);
1667 1667 }
1668 1668
1669 1669 mutex_enter(&cdi->cd_lock);
1670 1670 cc_ent = cc_list = cdi->cd_fail_head;
1671 1671 while (cc_ent) {
1672 1672 cnt++;
1673 1673
1674 1674 /* is this always necessary? jgk */
1675 1675
1676 1676 if (SSOP_WRITE_CBLOCK(sdbc_safestore,
1677 1677 cc_ent->cc_write->sc_res, cc_ent->cc_data,
1678 1678 CACHE_BLOCK_SIZE, 0)) {
1679 1679 mutex_exit(&cdi->cd_lock);
1680 1680 return (-1);
1681 1681 }
1682 1682
1683 1683 /* update the cache block metadata */
1684 1684 CENTRY_SET_FTPOS(cc_ent);
1685 1685 cc_ent->cc_write->sc_flag = cc_ent->cc_flag;
1686 1686
1687 1687 cc_ent->cc_write->sc_dirty = CENTRY_DIRTY(cc_ent);
1688 1688
1689 1689 SSOP_SETCENTRY(sdbc_safestore, cc_ent->cc_write);
1690 1690
1691 1691 cc_ent = cc_ent->cc_dirty_next;
1692 1692 if (!cc_ent)
1693 1693 cc_ent = cc_list = cc_list->cc_dirty_link;
1694 1694 }
1695 1695 mutex_exit(&cdi->cd_lock);
1696 1696 }
1697 1697
1698 1698 return (cnt);
1699 1699 }
1700 1700
1701 1701 /*
1702 1702 * _sd_flush_cd()
1703 1703 * reflect pinned blocks to mirrored node
1704 1704 * wait for dirty blocks to be flushed
1705 1705 * returns:
1706 1706 * EIO I/O failure, or pinned blocks and no mirror
1707 1707 * EAGAIN Hang: count of outstanding writes isn't decreasing
1708 1708 * -1 pinned blocks, reflected to mirror
1709 1709 * 0 success
1710 1710 */
1711 1711 static int
1712 1712 _sd_flush_cd(int cd)
1713 1713 {
1714 1714 int rc;
1715 1715
1716 1716 if ((rc = _sd_wait_for_flush(cd)) == 0)
1717 1717 return (0);
1718 1718
1719 1719 /*
1720 1720 * if we timed out simply return otherwise
1721 1721 * it must be an i/o type of error
1722 1722 */
1723 1723 if (rc == EAGAIN)
1724 1724 return (rc);
1725 1725
1726 1726 if (_sd_is_mirror_down())
1727 1727 return (EIO); /* already failed, no mirror */
1728 1728
1729 1729 /* flush any pinned/failed blocks to mirror */
1730 1730 if (_sdbc_remote_store_pinned(cd) >= 0)
1731 1731 /*
1732 1732 * At this point it looks like we have blocks on the
1733 1733 * failed list and taking up space on this node but
1734 1734 * no longer have responsibility for the blocks.
1735 1735 * These blocks will in fact be freed from the cache
1736 1736 * and the failed list when the mirror picks them up
1737 1737 * from safe storage and then calls _sd_cd_discard_mirror
1738 1738 * which will issue an rpc telling us to finish up.
1739 1739 *
1740 1740 * Should the other node die before sending the rpc then
1741 1741 * we are safe with these blocks simply waiting on the
1742 1742 * failed list.
1743 1743 */
1744 1744 return (-1);
1745 1745 else
1746 1746 return (rc);
1747 1747 }
1748 1748
1749 1749 /*
1750 1750 * _sdbc_io_attach_cd -- set up for client access to device, reserve raw device
1751 1751 *
1752 1752 * ARGUMENTS:
1753 1753 * cd - the cache descriptor to attach.
1754 1754 *
1755 1755 * RETURNS:
1756 1756 * 0 on success.
1757 1757 * Error otherwise.
1758 1758 */
1759 1759 int
1760 1760 _sdbc_io_attach_cd(blind_t xcd)
1761 1761 {
1762 1762 int rc = 0;
1763 1763 _sd_cd_info_t *cdi;
1764 1764 int cd = (int)(unsigned long)xcd;
1765 1765
1766 1766 SDTRACE(ST_ENTER|SDF_ATTACH, cd, 0, SDT_INV_BL, 0, 0);
1767 1767 if (!_sd_cache_initialized ||
1768 1768 _sdbc_shutdown_in_progress ||
1769 1769 !FILE_OPENED(cd)) {
1770 1770 SDTRACE(ST_EXIT|SDF_ATTACH, cd, 0, SDT_INV_BL, 0, EINVAL);
1771 1771
1772 1772 DTRACE_PROBE(_sdbc_io_attach_cd_end1);
1773 1773
1774 1774 return (EINVAL);
1775 1775 }
1776 1776 cdi = &(_sd_cache_files[cd]);
1777 1777
1778 1778 /*
1779 1779 * check if disk is failed without raw device open. If it is,
1780 1780 * it has to be recovered using _sd_disk_online
1781 1781 */
1782 1782
1783 1783 if (cdi->cd_global->sv_pinned == _SD_SELF_HOST) {
1784 1784 _sd_print(3,
1785 1785 "_sdbc_io_attach_cd: pinned data. returning EINVAL");
1786 1786
1787 1787 DTRACE_PROBE(_sdbc_io_attach_cd_end2);
1788 1788
1789 1789 return (EINVAL);
1790 1790 }
1791 1791
1792 1792 if ((cdi->cd_info == NULL) || (cdi->cd_info->sh_failed)) {
1793 1793 DTRACE_PROBE1(_sdbc_io_attach_cd_end3,
1794 1794 struct _sd_shared *, cdi->cd_info);
1795 1795
1796 1796 return (EINVAL);
1797 1797 }
1798 1798
1799 1799 #if defined(_SD_FAULT_RES)
1800 1800 /* wait for node recovery to finish */
1801 1801 if (_sd_node_recovery)
1802 1802 (void) _sd_recovery_wait();
1803 1803 #endif
1804 1804
1805 1805 /* this will provoke a sdbc_fd_attach_cd call .. */
1806 1806
1807 1807 rc = nsc_reserve(cdi->cd_rawfd, NSC_MULTI);
1808 1808 SDTRACE(ST_EXIT|SDF_ATTACH, cd, 0, SDT_INV_BL, 0, rc);
1809 1809
1810 1810 return (rc);
1811 1811 }
1812 1812
1813 1813 /*
1814 1814 * sdbc_fd_attach_cd -- setup cache for access to raw device underlying cd.
1815 1815 * This is provoked by some piece of sdbc doing a reserve on the raw device.
1816 1816 *
1817 1817 * ARGUMENTS:
1818 1818 * cd - the cache descriptor to attach.
1819 1819 *
1820 1820 * RETURNS:
1821 1821 * 0 on success.
1822 1822 * Error otherwise.
1823 1823 */
1824 1824 static int
1825 1825 sdbc_fd_attach_cd(blind_t xcd)
1826 1826 {
1827 1827 int rc = 0;
1828 1828 int cd = (int)(unsigned long)xcd;
1829 1829 _sd_cd_info_t *cdi;
1830 1830
1831 1831 if (!_sd_cache_initialized || !FILE_OPENED(cd)) {
1832 1832 SDTRACE(ST_INFO|SDF_ATTACH, cd, 0, SDT_INV_BL, 0, EINVAL);
1833 1833
1834 1834 DTRACE_PROBE(sdbc_fd_attach_cd_end1);
1835 1835
1836 1836 return (EINVAL);
1837 1837 }
1838 1838 cdi = &(_sd_cache_files[cd]);
1839 1839
1840 1840 #if defined(_SD_FAULT_RES)
1841 1841 /* retrieve pinned/failed data */
1842 1842 if (!_sd_node_recovery) {
1843 1843 (void) _sd_repin_cd(cd);
1844 1844 }
1845 1845 #endif
1846 1846
1847 1847 rc = nsc_partsize(cdi->cd_rawfd, &cdi->cd_info->sh_filesize);
1848 1848 if (rc != 0) {
1849 1849 SDTRACE(ST_INFO|SDF_ATTACH, cd, 0, SDT_INV_BL, 0, rc);
1850 1850
1851 1851 DTRACE_PROBE(sdbc_fd_attach_cd_end3);
1852 1852
1853 1853 return (rc);
1854 1854 }
1855 1855
1856 1856 cdi->cd_global->sv_attached = _SD_SELF_HOST;
1857 1857
1858 1858 SSOP_SETVOL(sdbc_safestore, cdi->cd_global);
1859 1859
1860 1860 mutex_enter(&_sd_cache_lock);
1861 1861 cdi->cd_info->sh_flag |= CD_ATTACHED;
1862 1862 mutex_exit(&_sd_cache_lock);
1863 1863
1864 1864 return (0);
1865 1865 }
1866 1866
1867 1867 /*
1868 1868 * _sdbc_io_detach_cd -- release raw device
1869 1869 * Called when a cache client is being detached from this cd.
1870 1870 *
1871 1871 * ARGUMENTS:
1872 1872 * cd - the cache descriptor to detach.
1873 1873 * RETURNS:
1874 1874 * 0 on success.
1875 1875 * Error otherwise.
1876 1876 */
1877 1877 int
1878 1878 _sdbc_io_detach_cd(blind_t xcd)
1879 1879 {
1880 1880 int cd = (int)(unsigned long)xcd;
1881 1881 _sd_cd_info_t *cdi;
1882 1882
1883 1883
1884 1884 SDTRACE(ST_ENTER|SDF_DETACH, cd, 0, SDT_INV_BL, 0, 0);
1885 1885 if (!_sd_cache_initialized || !FILE_OPENED(cd)) {
1886 1886 SDTRACE(ST_EXIT|SDF_DETACH, cd, 0, SDT_INV_BL, 0, EINVAL);
1887 1887
1888 1888 DTRACE_PROBE(_sdbc_io_detach_cd_end1);
1889 1889
1890 1890 return (EINVAL);
1891 1891 }
1892 1892
1893 1893 #if defined(_SD_FAULT_RES)
1894 1894 if (_sd_node_recovery)
1895 1895 (void) _sd_recovery_wait();
1896 1896 #endif
1897 1897 /* relinquish responsibility for device */
1898 1898 cdi = &(_sd_cache_files[cd]);
1899 1899 if (!(cdi->cd_rawfd) || !nsc_held(cdi->cd_rawfd)) {
1900 1900 cmn_err(CE_WARN, "!sdbc(_sdbc_detach_cd)(%d) not attached", cd);
1901 1901 SDTRACE(ST_EXIT|SDF_DETACH, cd, 0, SDT_INV_BL, 0, EPROTO);
1902 1902 DTRACE_PROBE1(_sdbc_io_detach_cd_end2,
1903 1903 nsc_fd_t *, cdi->cd_rawfd);
1904 1904
1905 1905 return (EPROTO);
1906 1906 }
1907 1907 /* this will provoke/allow a call to sdbc_fd_detach_cd */
1908 1908 nsc_release(cdi->cd_rawfd);
1909 1909
1910 1910 SDTRACE(ST_EXIT|SDF_DETACH, cd, 0, SDT_INV_BL, 0, 0);
1911 1911
1912 1912 return (0);
1913 1913 }
1914 1914
1915 1915 /*
1916 1916 * _sdbc_detach_cd -- flush dirty writes to disk, release raw device
1917 1917 * Called when raw device is being detached from this cd.
1918 1918 *
1919 1919 * ARGUMENTS:
1920 1920 * cd - the cache descriptor to detach.
1921 1921 * rd_only - non-zero if detach is for read access.
1922 1922 * RETURNS:
1923 1923 * 0 on success.
1924 1924 * Error otherwise.
1925 1925 */
1926 1926 static int
1927 1927 sdbc_detach_cd(blind_t xcd, int rd_only)
1928 1928 {
1929 1929 int rc;
1930 1930 int cd = (int)(unsigned long)xcd;
1931 1931 _sd_cd_info_t *cdi;
1932 1932
1933 1933 SDTRACE(ST_INFO|SDF_DETACH, cd, 0, SDT_INV_BL, 0, 0);
1934 1934
1935 1935 if (!_sd_cache_initialized || !FILE_OPENED(cd)) {
1936 1936 SDTRACE(ST_INFO|SDF_DETACH, cd, 0, SDT_INV_BL, 0, EINVAL);
1937 1937
1938 1938 DTRACE_PROBE(sdbc_detach_cd_end1);
1939 1939
1940 1940 return (EINVAL);
1941 1941 }
1942 1942
1943 1943
1944 1944 rc = _sd_flush_cd(cd);
1945 1945 if (rc > 0) {
1946 1946 SDTRACE(ST_INFO|SDF_DETACH, cd, 0, SDT_INV_BL, 0, rc);
1947 1947
1948 1948 DTRACE_PROBE(sdbc_detach_cd_end2);
1949 1949
1950 1950 return (rc);
1951 1951 }
1952 1952
1953 1953 if (!rd_only) {
1954 1954 _sd_hash_invalidate_cd(cd);
1955 1955 cdi = &(_sd_cache_files[cd]);
1956 1956
1957 1957 if (cdi->cd_global->sv_attached == _SD_SELF_HOST) {
1958 1958 cdi->cd_global->sv_attached = _SD_NO_HOST;
1959 1959 SSOP_SETVOL(sdbc_safestore, cdi->cd_global);
1960 1960 } else {
1961 1961 cmn_err(CE_WARN,
1962 1962 "!sdbc(_sdbc_detach_cd) (%d) attached by node %d",
1963 1963 cd, cdi->cd_global->sv_attached);
1964 1964 SDTRACE(SDF_DETACH, cd, 0, SDT_INV_BL, 0, EPROTO);
1965 1965
1966 1966 DTRACE_PROBE1(sdbc_detach_cd_end3,
1967 1967 int, cdi->cd_global->sv_attached);
1968 1968
1969 1969 return (EPROTO);
1970 1970 }
1971 1971
1972 1972 mutex_enter(&_sd_cache_lock);
1973 1973 cdi->cd_info->sh_flag &= ~CD_ATTACHED;
1974 1974 mutex_exit(&_sd_cache_lock);
1975 1975 }
1976 1976
1977 1977 SDTRACE(ST_INFO|SDF_DETACH, cd, 0, SDT_INV_BL, 0, 0);
1978 1978
1979 1979 return (0);
1980 1980 }
1981 1981
1982 1982 /*
1983 1983 * _sdbc_fd_detach_cd -- flush dirty writes to disk, release raw device
1984 1984 * Called when raw device is being detached from this cd.
1985 1985 *
1986 1986 * ARGUMENTS:
1987 1987 * xcd - the cache descriptor to detach.
1988 1988 * RETURNS:
1989 1989 * 0 on success.
1990 1990 * Error otherwise.
1991 1991 */
1992 1992 static int
1993 1993 sdbc_fd_detach_cd(blind_t xcd)
1994 1994 {
1995 1995 return (sdbc_detach_cd(xcd, 0));
1996 1996 }
1997 1997
1998 1998 /*
1999 1999 * sdbc_fd_flush_cd - raw device "xcd" is being detached and needs
2000 2000 * flushing. We only need to flush we don't need to hash invalidate
2001 2001 * this file.
2002 2002 */
2003 2003 static int
2004 2004 sdbc_fd_flush_cd(blind_t xcd)
2005 2005 {
2006 2006 return (sdbc_detach_cd(xcd, 1));
2007 2007 }
2008 2008
2009 2009 /*
2010 2010 * _sd_get_pinned - re-issue PINNED callbacks for cache device
2011 2011 *
2012 2012 * ARGUMENTS:
2013 2013 * cd - the cache descriptor to reissue pinned calbacks from.
2014 2014 * RETURNS:
2015 2015 * 0 on success.
2016 2016 * Error otherwise.
2017 2017 */
2018 2018 int
2019 2019 _sd_get_pinned(blind_t xcd)
2020 2020 {
2021 2021 _sd_cd_info_t *cdi;
2022 2022 _sd_cctl_t *cc_list, *cc_ent;
2023 2023 int cd = (int)(unsigned long)xcd;
2024 2024
2025 2025 cdi = &_sd_cache_files[cd];
2026 2026
2027 2027 if (cd < 0 || cd >= sdbc_max_devs) {
2028 2028 DTRACE_PROBE(_sd_get_pinned_end1);
2029 2029 return (EINVAL);
2030 2030 }
2031 2031
2032 2032 if (!FILE_OPENED(cd)) {
2033 2033 DTRACE_PROBE(_sd_get_pinned_end2);
2034 2034 return (0);
2035 2035 }
2036 2036
2037 2037 mutex_enter(&cdi->cd_lock);
2038 2038
2039 2039 if (!cdi->cd_info->sh_failed) {
2040 2040 mutex_exit(&cdi->cd_lock);
2041 2041
2042 2042 DTRACE_PROBE(_sd_get_pinned_end3);
2043 2043 return (0);
2044 2044 }
2045 2045
2046 2046 cc_ent = cc_list = cdi->cd_fail_head;
2047 2047 while (cc_ent) {
2048 2048 if (CENTRY_PINNED(cc_ent))
2049 2049 nsc_pinned_data(cdi->cd_iodev,
2050 2050 BLK_TO_FBA_NUM(CENTRY_BLK(cc_ent)), BLK_FBAS);
2051 2051 cc_ent = cc_ent->cc_dirty_next;
2052 2052 if (!cc_ent)
2053 2053 cc_ent = cc_list = cc_list->cc_dirty_link;
2054 2054 }
2055 2055
2056 2056 mutex_exit(&cdi->cd_lock);
2057 2057
2058 2058 return (0);
2059 2059 }
2060 2060
2061 2061 /*
2062 2062 * _sd_allocate_buf - allocate a vector of buffers for io.
2063 2063 * *This call has been replaced by _sd_alloc_buf*
2064 2064 */
2065 2065
2066 2066 _sd_buf_handle_t *
2067 2067 _sd_allocate_buf(int cd, nsc_off_t fba_pos, nsc_size_t fba_len, int flag,
2068 2068 int *sts)
2069 2069 {
2070 2070 _sd_buf_handle_t *handle = NULL;
2071 2071
2072 2072 *sts = _sd_alloc_buf((blind_t)(unsigned long)cd, fba_pos, fba_len,
2073 2073 flag, &handle);
2074 2074 if (*sts == NSC_HIT)
2075 2075 *sts = NSC_DONE;
2076 2076 return (handle);
2077 2077 }
2078 2078
2079 2079
2080 2080 /*
2081 2081 * _sd_prefetch_buf - _sd_alloc_buf w/flag = NSC_RDAHEAD|NSC_RDBUF
2082 2082 * no 'bufvec' (data is not read by caller)
2083 2083 * skip leading valid or busy entries (data available sooner)
2084 2084 * truncate on busy block (to avoid deadlock)
2085 2085 * release trailing valid entries, adjust length before starting I/O.
2086 2086 */
2087 2087 static int
2088 2088 _sd_prefetch_buf(int cd, nsc_off_t fba_pos, nsc_size_t fba_len, int flag,
2089 2089 _sd_buf_handle_t *handle, int locked)
2090 2090 {
2091 2091 _sd_cd_info_t *cdi;
2092 2092 nsc_off_t cblk; /* position of temp cache block */
↓ open down ↓ |
1643 lines elided |
↑ open up ↑ |
2093 2093 sdbc_cblk_fba_t st_cblk_len; /* FBA len of starting cache block */
2094 2094 sdbc_cblk_fba_t end_cblk_len; /* FBA len of ending cache block */
2095 2095 sdbc_cblk_fba_t st_cblk_off; /* FBA offset into starting cblock */
2096 2096 nsc_off_t io_pos; /* offset in FBA's */
2097 2097 nsc_size_t fba_orig_len;
2098 2098 int sts, stall;
2099 2099 _sd_cctl_t *centry = NULL;
2100 2100 _sd_cctl_t *lentry = NULL;
2101 2101 _sd_cctl_t *ioent = NULL;
2102 2102 _sd_cctl_t *last_ioent = NULL;
2103 - sdbc_allocbuf_t alloc_tok = {0};
2103 + sdbc_allocbuf_t alloc_tok = {{(intptr_t)NULL}};
2104 2104 int this_entry_type = 0;
2105 2105 nsc_size_t request_blocks = 0; /* number of cache blocks required */
2106 2106 int pageio;
2107 2107
2108 2108 handle->bh_flag |= NSC_HACTIVE;
2109 2109 ASSERT(cd >= 0);
2110 2110 cdi = &_sd_cache_files[cd];
2111 2111
2112 2112 /* prefetch: truncate if req'd */
2113 2113 if (fba_len > sdbc_max_fbas)
2114 2114 fba_len = sdbc_max_fbas;
2115 2115 if ((fba_pos + fba_len) > cdi->cd_info->sh_filesize) {
2116 2116 if (fba_pos >= cdi->cd_info->sh_filesize) {
2117 2117 sts = EIO;
2118 2118 goto done;
2119 2119 }
2120 2120 fba_len = cdi->cd_info->sh_filesize - fba_pos;
2121 2121 }
2122 2122
2123 2123 fba_orig_len = fba_len;
2124 2124
2125 2125 _SD_SETUP_HANDLE(handle, cd, fba_pos, fba_len, flag);
2126 2126 handle->bh_centry = NULL;
2127 2127
2128 2128 cblk = FBA_TO_BLK_NUM(fba_pos);
2129 2129 st_cblk_off = BLK_FBA_OFF(fba_pos);
2130 2130 st_cblk_len = BLK_FBAS - st_cblk_off;
2131 2131
2132 2132 /*
2133 2133 * count number of blocks on chain that is required
2134 2134 */
2135 2135 if ((nsc_size_t)st_cblk_len >= fba_len) {
2136 2136 st_cblk_len = (sdbc_cblk_fba_t)fba_len;
2137 2137 end_cblk_len = 0;
2138 2138 } else {
2139 2139 end_cblk_len = BLK_FBA_OFF(fba_pos + fba_len);
2140 2140 }
2141 2141
2142 2142 request_blocks = 1; /* at least one */
2143 2143
2144 2144 /* middle piece */
2145 2145 request_blocks += (fba_len - (st_cblk_len + end_cblk_len)) >>
2146 2146 BLK_FBA_SHFT;
2147 2147
2148 2148 if (end_cblk_len)
2149 2149 ++request_blocks;
2150 2150
2151 2151 stall = 0;
2152 2152 do {
2153 2153 pageio = ((flag & NSC_PAGEIO) != 0 || sdbc_pageio_always != 0);
2154 2154 cget:
2155 2155 if (centry = (_sd_cctl_t *)
2156 2156 _sd_hash_search(cd, cblk, _sd_htable)) {
2157 2157 try:
2158 2158 /* prefetch: skip leading valid blocks */
2159 2159 if ((ioent == NULL) &&
2160 2160 SDBC_VALID_BITS(st_cblk_off, st_cblk_len, centry)) {
2161 2161 skip:
2162 2162 sdbc_prefetch_valid_cnt++;
2163 2163 --request_blocks;
2164 2164 lentry = centry;
2165 2165 centry = NULL;
2166 2166 cblk++;
2167 2167 fba_len -= st_cblk_len;
2168 2168 st_cblk_off = 0;
2169 2169 st_cblk_len = (sdbc_cblk_fba_t)
2170 2170 ((fba_len > (nsc_size_t)BLK_FBAS) ?
2171 2171 BLK_FBAS : fba_len);
2172 2172 continue;
2173 2173 }
2174 2174
2175 2175 if (SET_CENTRY_INUSE(centry)) {
2176 2176 /*
2177 2177 * prefetch: skip leading busy
2178 2178 * or truncate at busy block
2179 2179 */
2180 2180 if (ioent == NULL)
2181 2181 goto skip;
2182 2182 sdbc_prefetch_busy_cnt++;
2183 2183 fba_orig_len -= fba_len;
2184 2184 fba_len = 0;
2185 2185 centry = lentry; /* backup */
2186 2186 break;
2187 2187 }
2188 2188
2189 2189 /*
2190 2190 * bug 4529671
2191 2191 * now that we own the centry make sure that
2192 2192 * it is still good. it could have been processed
2193 2193 * by _sd_dealloc_dm() in the window between
2194 2194 * _sd_hash_search() and SET_CENTRY_INUSE().
2195 2195 */
2196 2196 if ((_sd_cctl_t *)
2197 2197 _sd_hash_search(cd, cblk, _sd_htable) != centry) {
2198 2198 sdbc_prefetch_deallocd++;
2199 2199 #ifdef DEBUG
2200 2200 cmn_err(CE_WARN,
2201 2201 "!prefetch centry %p cd %d cblk %" NSC_SZFMT
2202 2202 " fba_len %" NSC_SZFMT " lost to dealloc?! "
2203 2203 "cc_data %p",
2204 2204 (void *)centry, cd, cblk, fba_orig_len,
2205 2205 (void *)centry->cc_data);
2206 2206 #endif
2207 2207
2208 2208 CLEAR_CENTRY_INUSE(centry);
2209 2209 continue;
2210 2210 }
2211 2211
2212 2212 if (CC_CD_BLK_MATCH(cd, cblk, centry)) {
2213 2213 /*
2214 2214 * Do pagelist io mutual exclusion
2215 2215 * before messing with the centry.
2216 2216 */
2217 2217 if (pageio && SET_CENTRY_PAGEIO(centry)) {
2218 2218 /* flusher not done with pageio */
2219 2219 /*
2220 2220 * prefetch: skip leading busy
2221 2221 * or truncate at busy block
2222 2222 */
2223 2223 CLEAR_CENTRY_INUSE(centry);
2224 2224 if (ioent == NULL)
2225 2225 goto skip;
2226 2226 sdbc_prefetch_pageio1++;
2227 2227 fba_orig_len -= fba_len;
2228 2228 fba_len = 0;
2229 2229 centry = lentry; /* backup */
2230 2230 break;
2231 2231
2232 2232 }
2233 2233
2234 2234 sdbc_prefetch_hit++;
2235 2235 this_entry_type = HASH_ENTRY_DM;
2236 2236 pageio = 0;
2237 2237 centry->cc_toflush = 0;
2238 2238
2239 2239 centry->cc_hits++;
2240 2240
2241 2241 /* this will reset the age flag */
2242 2242 sdbc_centry_init_dm(centry);
2243 2243
2244 2244 DTRACE_PROBE1(_sd_prefetch_buf,
2245 2245 _sd_cctl_t *, centry);
2246 2246 } else {
2247 2247 /* block mismatch */
2248 2248 sdbc_prefetch_lost++;
2249 2249
2250 2250 CLEAR_CENTRY_INUSE(centry);
2251 2251 continue;
2252 2252 }
2253 2253 } else {
2254 2254 centry = sdbc_centry_alloc(cd, cblk, request_blocks,
2255 2255 &stall, &alloc_tok, ALLOC_NOWAIT);
2256 2256
2257 2257 if (centry == NULL) {
2258 2258 /*
2259 2259 * prefetch: cache is very busy. just do
2260 2260 * the i/o for the blocks already acquired,
2261 2261 * if any.
2262 2262 */
2263 2263 fba_orig_len -= fba_len;
2264 2264 fba_len = 0;
2265 2265 /*
2266 2266 * if we have a chain of centry's
2267 2267 * then back up (set centry to lentry).
2268 2268 * if there is no chain (ioent == NULL)
2269 2269 * then centry remains NULL. this can occur
2270 2270 * if all previous centrys were hash hits
2271 2271 * on valid blocks that were processed in
2272 2272 * the skip logic above.
2273 2273 */
2274 2274 if (ioent)
2275 2275 centry = lentry; /* backup */
2276 2276 break;
2277 2277 }
2278 2278
2279 2279 /*
2280 2280 * dmchaining adjustment.
2281 2281 * if centry was obtained from the dmchain
2282 2282 * then clear local pageio variable because the
2283 2283 * centry already has cc_pageio set.
2284 2284 */
2285 2285 if (CENTRY_PAGEIO(centry))
2286 2286 pageio = 0;
2287 2287
2288 2288 DTRACE_PROBE1(_sd_alloc_buf, _sd_cctl_t *, centry);
2289 2289
2290 2290 this_entry_type = ELIGIBLE_ENTRY_DM;
2291 2291 if (centry->cc_aging_dm & FOUND_IN_HASH_DM)
2292 2292 this_entry_type = HASH_ENTRY_DM;
2293 2293 else {
2294 2294 if (centry->cc_aging_dm & FOUND_HOLD_OVER_DM)
2295 2295 this_entry_type = HOLD_ENTRY_DM;
2296 2296 }
2297 2297 }
2298 2298
2299 2299 centry->cc_chain = NULL;
2300 2300
2301 2301 centry->cc_aging_dm &= ~(FOUND_IN_HASH_DM|FOUND_HOLD_OVER_DM);
2302 2302
2303 2303 /*
2304 2304 * Do pagelist io mutual exclusion now if we did not do
2305 2305 * it above.
2306 2306 */
2307 2307
2308 2308 if (pageio && SET_CENTRY_PAGEIO(centry)) {
2309 2309 /* flusher not done with pageio */
2310 2310 sdbc_prefetch_pageio2++;
2311 2311
2312 2312 /*
2313 2313 * prefetch: skip leading busy
2314 2314 * or truncate at busy block
2315 2315 */
2316 2316 CLEAR_CENTRY_INUSE(centry);
2317 2317 if (ioent == NULL)
2318 2318 goto skip;
2319 2319 sdbc_prefetch_busy_cnt++;
2320 2320 fba_orig_len -= fba_len;
2321 2321 fba_len = 0;
2322 2322 centry = lentry; /* backup */
2323 2323 break;
2324 2324 }
2325 2325
2326 2326 pageio = 0;
2327 2327
2328 2328 fba_len -= st_cblk_len;
2329 2329
2330 2330 if (ioent == NULL) {
2331 2331 if (!SDBC_VALID_BITS(st_cblk_off, st_cblk_len,
2332 2332 centry)) {
2333 2333 io_pos = BLK_TO_FBA_NUM(cblk) + st_cblk_off;
2334 2334 ioent = last_ioent = centry;
2335 2335 } else {
2336 2336 DATA_LOG(SDF_ALLOC, centry, st_cblk_off,
2337 2337 st_cblk_len);
2338 2338 DTRACE_PROBE4(_sd_prefetch_buf_data1,
2339 2339 uint64_t, (uint64_t)(BLK_TO_FBA_NUM(cblk) +
2340 2340 st_cblk_off), int, st_cblk_len,
2341 2341 char *, *(int64_t *)(centry->cc_data +
2342 2342 FBA_SIZE(st_cblk_off)), char *,
2343 2343 *(int64_t *)(centry->cc_data +
2344 2344 FBA_SIZE(st_cblk_off + st_cblk_len) - 8));
2345 2345 }
2346 2346
2347 2347 handle->bh_centry = centry;
2348 2348 st_cblk_off = 0;
2349 2349 st_cblk_len = (sdbc_cblk_fba_t)
2350 2350 ((fba_len > (nsc_size_t)BLK_FBAS) ?
2351 2351 BLK_FBAS : fba_len);
2352 2352 } else {
2353 2353 if (!SDBC_VALID_BITS(st_cblk_off, st_cblk_len, centry))
2354 2354 last_ioent = centry;
2355 2355 else {
2356 2356 DTRACE_PROBE4(_sd_prefetch_buf_data2,
2357 2357 uint64_t, (uint64_t)(BLK_TO_FBA_NUM(cblk) +
2358 2358 st_cblk_off), int, st_cblk_len,
2359 2359 char *, *(int64_t *)(centry->cc_data +
2360 2360 FBA_SIZE(st_cblk_off)), char *,
2361 2361 *(int64_t *)(centry->cc_data +
2362 2362 FBA_SIZE(st_cblk_off + st_cblk_len) - 8));
2363 2363 }
2364 2364
2365 2365 lentry->cc_chain = centry;
2366 2366 if (fba_len < (nsc_size_t)BLK_FBAS)
2367 2367 st_cblk_len = (sdbc_cblk_fba_t)fba_len;
2368 2368 }
2369 2369 lentry = centry;
2370 2370 cblk++;
2371 2371
2372 2372 /* if this block has a new identity clear prefetch history */
2373 2373 if (this_entry_type != HASH_ENTRY_DM)
2374 2374 centry->cc_aging_dm &=
2375 2375 ~(PREFETCH_BUF_I | PREFETCH_BUF_E);
2376 2376
2377 2377 centry->cc_aging_dm &= ~(ENTRY_FIELD_DM);
2378 2378 centry->cc_aging_dm |= this_entry_type | PREFETCH_BUF_E;
2379 2379 if (flag & NSC_METADATA)
2380 2380 centry->cc_aging_dm |= STICKY_METADATA_DM;
2381 2381
2382 2382 --request_blocks;
2383 2383 } while (fba_len > 0);
2384 2384
2385 2385
2386 2386 if (locked) {
2387 2387 rw_exit(&sdbc_queue_lock);
2388 2388 locked = 0;
2389 2389 }
2390 2390
2391 2391 sdbc_centry_alloc_end(&alloc_tok);
2392 2392
2393 2393 if (centry) {
2394 2394 centry->cc_chain = NULL;
2395 2395 if (sts = _sd_setup_category_on_type(handle->bh_centry)) {
2396 2396 (void) _sd_free_buf(handle);
2397 2397 goto done;
2398 2398 }
2399 2399
2400 2400 (void) _sd_setup_mem_chaining(handle->bh_centry, 0);
2401 2401 }
2402 2402
2403 2403
2404 2404 if (ioent) {
2405 2405 /* prefetch: trailing valid can be released, adjust len */
2406 2406 if ((centry != last_ioent)) {
2407 2407 centry = last_ioent->cc_chain;
2408 2408 last_ioent->cc_chain = NULL;
2409 2409 while (centry) {
2410 2410 lentry = centry->cc_chain;
2411 2411 centry->cc_aging_dm &= ~PREFETCH_BUF_E;
2412 2412 _sd_centry_release(centry);
2413 2413 centry = lentry;
2414 2414 sdbc_prefetch_trailing++;
2415 2415 }
2416 2416 fba_len = (CENTRY_BLK(last_ioent) -
2417 2417 CENTRY_BLK(ioent) + 1) * BLK_FBAS -
2418 2418 BLK_FBA_OFF(io_pos);
2419 2419 fba_orig_len = fba_len + (io_pos - fba_pos);
2420 2420 }
2421 2421
2422 2422 _SD_DISCONNECT_CALLBACK(handle);
2423 2423 sts = _sd_doread(handle, ioent, io_pos,
2424 2424 (fba_pos + fba_orig_len - io_pos), flag);
2425 2425 if (sts > 0)
2426 2426 (void) _sd_free_buf(handle);
2427 2427 } else {
2428 2428 CACHE_FBA_READ(cd, fba_orig_len);
2429 2429 CACHE_READ_HIT;
2430 2430 FBA_READ_IO_KSTATS(cd, FBA_SIZE(fba_orig_len));
2431 2431
2432 2432 sts = NSC_HIT;
2433 2433 }
2434 2434 done:
2435 2435 if (locked)
2436 2436 rw_exit(&sdbc_queue_lock);
2437 2437
2438 2438 return (sts);
2439 2439 }
2440 2440
2441 2441
2442 2442 /*
2443 2443 * _sd_cc_wait - wait for inuse cache block to become available
2444 2444 * Usage:
2445 2445 * if (SET_CENTRY_INUSE(centry)) {
2446 2446 * _sd_cc_wait(cd, blk, centry, CC_INUSE);
2447 2447 * goto try_again;
2448 2448 * }
2449 2449 * -or-
2450 2450 * if (SET_CENTRY_PAGEIO(centry)) {
2451 2451 * _sd_cc_wait(cd, blk, centry, CC_PAGEIO);
2452 2452 * goto try_again;
2453 2453 * }
2454 2454 */
2455 2455 void
2456 2456 _sd_cc_wait(int cd, nsc_off_t cblk, _sd_cctl_t *centry, int flag)
2457 2457 {
2458 2458 volatile ushort_t *waiters;
2459 2459 volatile uchar_t *uflag;
2460 2460
2461 2461 if (flag == CC_INUSE) {
2462 2462 waiters = &(centry->cc_await_use);
2463 2463 uflag = &(CENTRY_INUSE(centry));
2464 2464 } else if (flag == CC_PAGEIO) {
2465 2465 waiters = &(centry->cc_await_page);
2466 2466 uflag = &(CENTRY_PAGEIO(centry));
2467 2467 } else {
2468 2468 /* Oops! */
2469 2469 #ifdef DEBUG
2470 2470 cmn_err(CE_WARN, "!_sd_cc_wait: unknown flag value (%x)", flag);
2471 2471 #endif
2472 2472 return;
2473 2473 }
2474 2474
2475 2475 mutex_enter(¢ry->cc_lock);
2476 2476 if (CC_CD_BLK_MATCH(cd, cblk, centry) && (*uflag) != 0) {
2477 2477 (*waiters)++;
2478 2478 sd_serialize();
2479 2479 if ((*uflag) != 0) {
2480 2480 unsigned stime = nsc_usec();
2481 2481 cv_wait(¢ry->cc_blkcv, ¢ry->cc_lock);
2482 2482 (*waiters)--;
2483 2483 mutex_exit(¢ry->cc_lock);
2484 2484 SDTRACE(ST_INFO|SDF_ENT_GET,
2485 2485 cd, 0, BLK_TO_FBA_NUM(cblk), (nsc_usec()-stime), 0);
2486 2486 } else {
2487 2487 (*waiters)--;
2488 2488 mutex_exit(¢ry->cc_lock);
2489 2489 }
2490 2490 } else
2491 2491 mutex_exit(¢ry->cc_lock);
2492 2492
2493 2493 }
2494 2494
2495 2495 /*
2496 2496 * _sd_alloc_buf - Allocate a vector of buffers for io.
2497 2497 *
2498 2498 * ARGUMENTS:
2499 2499 * cd - Cache descriptor (from a previous open)
2500 2500 * fba_pos - disk position (512-byte FBAs)
2501 2501 * fba_len - length in disk FBAs.
2502 2502 * flag - allocation type. Flag is one or more of
2503 2503 * NSC_RDBUF, NSC_WRBUF, NSC_NOBLOCK and hints.
2504 2504 * NSC_RDAHEAD - prefetch for future read.
2505 2505 * handle_p - pointer to a handle pointer.
2506 2506 * If the handle pointer is non-null, its used as a
2507 2507 * pre-allocated handle. Else a new handle will be allocated
2508 2508 * and stored in *handle_p
2509 2509 *
2510 2510 * RETURNS:
2511 2511 * errno if return > 0.
2512 2512 * else NSC_HIT or NSC_DONE on success
2513 2513 * or NSC_PENDING on io in progress and NSC_NOBLOCK
2514 2514 * specified in the flag.
2515 2515 * USAGE:
2516 2516 * This routine allocates the cache blocks requested and creates a list
2517 2517 * of entries for this request.
2518 2518 * If NSC_NOBLOCK was not specified, this call could block on read io.
2519 2519 * If flag specified NSC_RDBUF and the request is not an entire
2520 2520 * hit, an io is initiated.
2521 2521 */
2522 2522 int
2523 2523 _sd_alloc_buf(blind_t xcd, nsc_off_t fba_pos, nsc_size_t fba_len, int flag,
2524 2524 _sd_buf_handle_t **handle_p)
2525 2525 {
2526 2526 int cd = (int)(unsigned long)xcd;
2527 2527 _sd_cd_info_t *cdi;
2528 2528 _sd_buf_handle_t *handle;
2529 2529 int sts;
2530 2530 nsc_off_t st_cblk, cblk; /* position of start and temp cache block */
2531 2531 sdbc_cblk_fba_t st_cblk_len; /* FBA len of starting cache block */
2532 2532 sdbc_cblk_fba_t end_cblk_len; /* FBA len of ending cache block */
↓ open down ↓ |
419 lines elided |
↑ open up ↑ |
2533 2533 sdbc_cblk_fba_t st_cblk_off; /* FBA offset into starting cblock */
2534 2534 nsc_off_t io_pos; /* offset in FBA's */
2535 2535 _sd_bufvec_t *bufvec;
2536 2536 _sd_cctl_t *centry, *lentry, *ioent = NULL;
2537 2537 nsc_size_t fba_orig_len = fba_len; /* FBA length of orig request */
2538 2538 int stall, pageio;
2539 2539 unsigned char cc_flag;
2540 2540 int this_entry_type;
2541 2541 int locked = 0;
2542 2542 nsc_size_t dmchain_request_blocks; /* size of dmchain in cache blocks */
2543 - sdbc_allocbuf_t alloc_tok = {0};
2543 + sdbc_allocbuf_t alloc_tok = {{(intptr_t)NULL}};
2544 2544 int min_frag = 0; /* frag statistics */
2545 2545 int max_frag = 0; /* frag statistics */
2546 2546 int nfrags = 0; /* frag statistics */
2547 2547 #ifdef DEBUG
2548 2548 int err = 0;
2549 2549 #endif
2550 2550
2551 2551
2552 2552 ASSERT(*handle_p != NULL);
2553 2553 handle = *handle_p;
2554 2554
2555 2555 if (_sdbc_shutdown_in_progress)
2556 2556 return (EIO);
2557 2557
2558 2558 if (xcd == NSC_ANON_CD)
2559 2559 cd = _CD_NOHASH;
2560 2560
2561 2561 KSTAT_RUNQ_ENTER(cd);
2562 2562
2563 2563 /*
2564 2564 * Force large writes on nvram systems to be write-through to
2565 2565 * avoid the (slow) bcopy into nvram.
2566 2566 */
2567 2567
2568 2568 if (flag & NSC_WRBUF) {
2569 2569 if (fba_len > (nsc_size_t)sdbc_wrthru_len) {
2570 2570 flag |= NSC_WRTHRU;
2571 2571 }
2572 2572 }
2573 2573
2574 2574 #ifdef DEBUG
2575 2575 if (sdbc_pageio_debug != SDBC_PAGEIO_OFF) {
2576 2576 switch (sdbc_pageio_debug) {
2577 2577 case SDBC_PAGEIO_RDEV:
2578 2578 if (cd != _CD_NOHASH &&
2579 2579 sdbc_pageio_rdev != (dev_t)-1 &&
2580 2580 _sd_cache_files[cd].cd_crdev == sdbc_pageio_rdev)
2581 2581 flag |= NSC_PAGEIO;
2582 2582 break;
2583 2583
2584 2584 case SDBC_PAGEIO_RAND:
2585 2585 if ((nsc_lbolt() % 3) == 0)
2586 2586 flag |= NSC_PAGEIO;
2587 2587 break;
2588 2588
2589 2589 case SDBC_PAGEIO_ALL:
2590 2590 flag |= NSC_PAGEIO;
2591 2591 break;
2592 2592 }
2593 2593 }
2594 2594 #endif /* DEBUG */
2595 2595
2596 2596 if (fba_len > (nsc_size_t)BLK_FBAS) {
2597 2597 rw_enter(&sdbc_queue_lock, RW_WRITER);
2598 2598 locked = 1;
2599 2599 }
2600 2600
2601 2601 /*
2602 2602 * _CD_NOHASH: client wants temporary (not hashed) cache memory
2603 2603 * not associated with a local disk. Skip local disk checks.
2604 2604 */
2605 2605 if (cd == _CD_NOHASH) {
2606 2606 flag &= ~(NSC_RDBUF | NSC_WRBUF | NSC_RDAHEAD);
2607 2607 handle = *handle_p;
2608 2608 handle->bh_flag |= NSC_HACTIVE;
2609 2609 goto setup;
2610 2610 }
2611 2611
2612 2612 SDTRACE(ST_ENTER|SDF_ALLOCBUF, cd, fba_len, fba_pos, flag, 0);
2613 2613
2614 2614
2615 2615 if ((flag & NSC_RDAHEAD) && _sd_prefetch_opt) {
2616 2616 sts = _sd_prefetch_buf(cd, fba_pos, fba_len, flag, handle,
2617 2617 locked);
2618 2618 goto done;
2619 2619 }
2620 2620
2621 2621 #if !defined(_SD_NOCHECKS)
2622 2622 if (flag & NSC_RDAHEAD) { /* _sd_prefetch_opt == 0 */
2623 2623 nsc_size_t file_size; /* file_size in FBA's */
2624 2624 /* prefetch: truncate if req'd */
2625 2625 if (fba_len > sdbc_max_fbas)
2626 2626 fba_len = sdbc_max_fbas;
2627 2627 file_size = _sd_cache_files[(cd)].cd_info->sh_filesize;
2628 2628 if ((fba_pos + fba_len) > file_size) {
2629 2629 fba_len = file_size - fba_pos;
2630 2630 #ifdef NSC_MULTI_TERABYTE
2631 2631 if ((int64_t)fba_len <= 0) {
2632 2632 #else
2633 2633 if ((int32_t)fba_len <= 0) {
2634 2634 #endif
2635 2635 sts = EIO;
2636 2636 SDTRACE(ST_EXIT|SDF_ALLOCBUF, cd, fba_len,
2637 2637 fba_pos, flag, sts);
2638 2638 goto done;
2639 2639 }
2640 2640 }
2641 2641 } else
2642 2642 if (sts = _sd_check_buffer_alloc(cd, fba_pos, fba_len, handle_p)) {
2643 2643 SDTRACE(ST_EXIT|SDF_ALLOCBUF, cd, fba_len, fba_pos, flag, sts);
2644 2644 goto done;
2645 2645 }
2646 2646 #endif
2647 2647 if (fba_len == 0) {
2648 2648 SDTRACE(ST_EXIT|SDF_ALLOCBUF, cd, fba_len, fba_pos,
2649 2649 flag, EINVAL);
2650 2650 sts = EINVAL;
2651 2651 goto done;
2652 2652 }
2653 2653
2654 2654 handle->bh_flag |= NSC_HACTIVE;
2655 2655 cdi = &_sd_cache_files[cd];
2656 2656
2657 2657 if (cdi->cd_recovering) {
2658 2658 /*
2659 2659 * If recovering this device, then block all allocates
2660 2660 * for reading or writing. If we allow reads then
2661 2661 * this path could see old data before we recover.
2662 2662 * If we allow writes then new data could be overwritten
2663 2663 * by old data.
2664 2664 * This is clearly still not a complete solution as
2665 2665 * the thread doing this allocate could conceivably be
2666 2666 * by this point (and in _sd_write/_sd_read for that matter
2667 2667 * which don't even have this protection). But this type
2668 2668 * of path seems to only exist in a failover situation
2669 2669 * where a device has failed on the other node and works
2670 2670 * on this node so the problem is not a huge one but exists
2671 2671 * never the less.
2672 2672 */
2673 2673 if (sts = _sd_recovery_wblk_wait(cd)) {
2674 2674 handle->bh_flag &= ~NSC_HACTIVE;
2675 2675 SDTRACE(ST_EXIT|SDF_ALLOCBUF, cd, fba_len, fba_pos,
2676 2676 flag, sts);
2677 2677 goto done;
2678 2678 }
2679 2679 }
2680 2680
2681 2681 /* write & disk failed, return error immediately */
2682 2682 if ((flag & NSC_WRBUF) && cdi->cd_info->sh_failed) {
2683 2683 handle->bh_flag &= ~NSC_HACTIVE;
2684 2684 SDTRACE(ST_EXIT|SDF_ALLOCBUF, cd, fba_len, fba_pos, flag, EIO);
2685 2685 sts = EIO;
2686 2686 goto done;
2687 2687 }
2688 2688
2689 2689 setup:
2690 2690
2691 2691 _SD_SETUP_HANDLE(handle, cd, fba_pos, fba_len, flag);
2692 2692 handle->bh_centry = NULL;
2693 2693 bufvec = handle->bh_bufvec;
2694 2694 if (flag & NSC_RDAHEAD) { /* _sd_prefetch_opt == 0 */
2695 2695 /* CKD prefetch: bufvec not req'd, use placeholder */
2696 2696 bufvec->bufaddr = NULL;
2697 2697 bufvec->bufvmeaddr = NULL;
2698 2698 bufvec->buflen = 0;
2699 2699 bufvec = _prefetch_sb_vec;
2700 2700 }
2701 2701 st_cblk = FBA_TO_BLK_NUM(fba_pos);
2702 2702 st_cblk_off = BLK_FBA_OFF(fba_pos);
2703 2703 st_cblk_len = BLK_FBAS - st_cblk_off;
2704 2704 if ((nsc_size_t)st_cblk_len >= fba_len) {
2705 2705 end_cblk_len = 0;
2706 2706 st_cblk_len = (sdbc_cblk_fba_t)fba_len;
2707 2707 } else
2708 2708 end_cblk_len = BLK_FBA_OFF(fba_pos + fba_len);
2709 2709 cblk = st_cblk;
2710 2710
2711 2711
2712 2712 /*
2713 2713 * count number of blocks on chain that is required
2714 2714 */
2715 2715
2716 2716 /* middle piece */
2717 2717 dmchain_request_blocks =
2718 2718 (fba_len - (st_cblk_len + end_cblk_len)) >> BLK_FBA_SHFT;
2719 2719
2720 2720 /* start piece */
2721 2721 ++dmchain_request_blocks;
2722 2722
2723 2723 /* end piece */
2724 2724 if (end_cblk_len)
2725 2725 ++dmchain_request_blocks;
2726 2726
2727 2727
2728 2728 cc_flag = 0;
2729 2729 if ((handle->bh_flag & NSC_PINNABLE) && (handle->bh_flag & NSC_WRBUF))
2730 2730 cc_flag |= CC_PINNABLE;
2731 2731 if (handle->bh_flag & (NSC_NOCACHE|NSC_SEQ_IO))
2732 2732 cc_flag |= CC_QHEAD;
2733 2733 lentry = NULL;
2734 2734 stall = 0;
2735 2735
2736 2736 do {
2737 2737 pageio = ((flag & NSC_PAGEIO) != 0 || sdbc_pageio_always != 0);
2738 2738 cget:
2739 2739 if ((centry = (_sd_cctl_t *)
2740 2740 _sd_hash_search(cd, cblk, _sd_htable)) != 0) {
2741 2741
2742 2742 if (SET_CENTRY_INUSE(centry)) {
2743 2743 /* already inuse: wait for block, retry */
2744 2744 sdbc_allocb_inuse++;
2745 2745 if (locked)
2746 2746 rw_exit(&sdbc_queue_lock);
2747 2747 _sd_cc_wait(cd, cblk, centry, CC_INUSE);
2748 2748 if (locked)
2749 2749 rw_enter(&sdbc_queue_lock, RW_WRITER);
2750 2750 goto cget;
2751 2751 }
2752 2752
2753 2753 /*
2754 2754 * bug 4529671
2755 2755 * now that we own the centry make sure that
2756 2756 * it is still good. it could have been processed
2757 2757 * by _sd_dealloc_dm() in the window between
2758 2758 * _sd_hash_search() and SET_CENTRY_INUSE().
2759 2759 */
2760 2760 if ((_sd_cctl_t *)
2761 2761 _sd_hash_search(cd, cblk, _sd_htable) != centry) {
2762 2762 sdbc_allocb_deallocd++;
2763 2763 #ifdef DEBUG
2764 2764 cmn_err(CE_WARN,
2765 2765 "!centry %p cd %d cblk %" NSC_SZFMT
2766 2766 " fba_len %" NSC_SZFMT " lost to dealloc?! "
2767 2767 "cc_data %p", (void *)centry, cd, cblk,
2768 2768 fba_orig_len, (void *)centry->cc_data);
2769 2769 #endif
2770 2770
2771 2771 CLEAR_CENTRY_INUSE(centry);
2772 2772 goto cget;
2773 2773 }
2774 2774
2775 2775 if (CC_CD_BLK_MATCH(cd, cblk, centry)) {
2776 2776 /*
2777 2777 * Do pagelist io mutual exclusion
2778 2778 * before messing with the centry.
2779 2779 */
2780 2780 if (pageio && SET_CENTRY_PAGEIO(centry)) {
2781 2781 /* wait for flusher to finish pageio */
2782 2782 sdbc_allocb_pageio1++;
2783 2783
2784 2784 CLEAR_CENTRY_INUSE(centry);
2785 2785 if (locked)
2786 2786 rw_exit(&sdbc_queue_lock);
2787 2787 _sd_cc_wait(cd, cblk, centry,
2788 2788 CC_PAGEIO);
2789 2789 if (locked)
2790 2790 rw_enter(&sdbc_queue_lock,
2791 2791 RW_WRITER);
2792 2792 goto cget;
2793 2793 }
2794 2794
2795 2795 sdbc_allocb_hit++;
2796 2796 this_entry_type = HASH_ENTRY_DM;
2797 2797 pageio = 0;
2798 2798 centry->cc_toflush = 0;
2799 2799
2800 2800 centry->cc_hits++;
2801 2801
2802 2802 /* this will reset the age flag */
2803 2803 sdbc_centry_init_dm(centry);
2804 2804
2805 2805 DTRACE_PROBE1(_sd_alloc_buf1,
2806 2806 _sd_cctl_t *, centry);
2807 2807 } else {
2808 2808 /* block mismatch: release, alloc new block */
2809 2809 sdbc_allocb_lost++;
2810 2810
2811 2811 CLEAR_CENTRY_INUSE(centry);
2812 2812
2813 2813 goto cget;
2814 2814
2815 2815 }
2816 2816 } else {
2817 2817 centry = sdbc_centry_alloc(cd, cblk,
2818 2818 dmchain_request_blocks, &stall,
2819 2819 &alloc_tok, locked ? ALLOC_LOCKED : 0);
2820 2820
2821 2821 /*
2822 2822 * dmchaining adjustment.
2823 2823 * if centry was obtained from the dmchain
2824 2824 * then clear local pageio variable because the
2825 2825 * centry already has cc_pageio set.
2826 2826 */
2827 2827 if (CENTRY_PAGEIO(centry))
2828 2828 pageio = 0;
2829 2829
2830 2830 DTRACE_PROBE1(_sd_alloc_buf2, _sd_cctl_t *, centry);
2831 2831
2832 2832 this_entry_type = ELIGIBLE_ENTRY_DM;
2833 2833 if (centry->cc_aging_dm & FOUND_IN_HASH_DM)
2834 2834 this_entry_type = HASH_ENTRY_DM;
2835 2835 else {
2836 2836 if (centry->cc_aging_dm & FOUND_HOLD_OVER_DM)
2837 2837 this_entry_type = HOLD_ENTRY_DM;
2838 2838 }
2839 2839 }
2840 2840
2841 2841 centry->cc_aging_dm &= ~(FOUND_IN_HASH_DM|FOUND_HOLD_OVER_DM);
2842 2842
2843 2843 /*
2844 2844 * Do pagelist io mutual exclusion now if we did not do
2845 2845 * it above.
2846 2846 */
2847 2847
2848 2848 if (pageio && SET_CENTRY_PAGEIO(centry)) {
2849 2849 /* wait for flusher to finish pageio */
2850 2850 sdbc_allocb_pageio2++;
2851 2851
2852 2852
2853 2853 CLEAR_CENTRY_INUSE(centry);
2854 2854 if (locked)
2855 2855 rw_exit(&sdbc_queue_lock);
2856 2856 _sd_cc_wait(cd, cblk, centry, CC_PAGEIO);
2857 2857 if (locked)
2858 2858 rw_enter(&sdbc_queue_lock, RW_WRITER);
2859 2859 goto cget;
2860 2860 }
2861 2861
2862 2862 pageio = 0;
2863 2863
2864 2864 if (CENTRY_DIRTY(centry)) {
2865 2865 /*
2866 2866 * end action might set PEND_DIRTY flag
2867 2867 * must lock if need to change flag bits
2868 2868 */
2869 2869 if (centry->cc_flag != (centry->cc_flag | cc_flag)) {
2870 2870 /* was FAST */
2871 2871 mutex_enter(¢ry->cc_lock);
2872 2872 centry->cc_flag |= cc_flag;
2873 2873 /* was FAST */
2874 2874 mutex_exit(¢ry->cc_lock);
2875 2875 }
2876 2876 } else
2877 2877 centry->cc_flag |= cc_flag;
2878 2878
2879 2879 centry->cc_chain = NULL;
2880 2880
2881 2881 /*
2882 2882 * step 0:check valid bits in each cache ele as
2883 2883 * the chain grows - set ioent/io_pos to first
2884 2884 * instance of invalid data
2885 2885 */
2886 2886 if (cblk == st_cblk) {
2887 2887 handle->bh_centry = centry;
2888 2888 fba_len -= st_cblk_len;
2889 2889 lentry = centry;
2890 2890 if (flag & NSC_RDBUF) {
2891 2891 if (!SDBC_VALID_BITS(st_cblk_off, st_cblk_len,
2892 2892 centry)) {
2893 2893 io_pos = fba_pos;
2894 2894 ioent = centry;
2895 2895 } else {
2896 2896 DATA_LOG(SDF_ALLOC, centry, st_cblk_off,
2897 2897 st_cblk_len);
2898 2898
2899 2899 DTRACE_PROBE4(_sd_alloc_data1,
2900 2900 uint64_t, (uint64_t)
2901 2901 (BLK_TO_FBA_NUM(cblk) +
2902 2902 st_cblk_off), int, st_cblk_len,
2903 2903 char *, *(int64_t *)
2904 2904 (centry->cc_data +
2905 2905 FBA_SIZE(st_cblk_off)),
2906 2906 char *, *(int64_t *)
2907 2907 (centry->cc_data +
2908 2908 FBA_SIZE(st_cblk_off + st_cblk_len)
2909 2909 - 8));
2910 2910 }
2911 2911 }
2912 2912 cblk++;
2913 2913 } else if (fba_len == (nsc_size_t)end_cblk_len) {
2914 2914 lentry->cc_chain = centry;
2915 2915 fba_len -= end_cblk_len;
2916 2916 if (flag & NSC_RDBUF) {
2917 2917 if (ioent == NULL) {
2918 2918 if (!SDBC_VALID_BITS(0, end_cblk_len,
2919 2919 centry)) {
2920 2920 io_pos = BLK_TO_FBA_NUM(cblk);
2921 2921 ioent = centry;
2922 2922 } else {
2923 2923 DATA_LOG(SDF_ALLOC, centry, 0,
2924 2924 end_cblk_len);
2925 2925
2926 2926 DTRACE_PROBE4(_sd_alloc_data2,
2927 2927 uint64_t,
2928 2928 BLK_TO_FBA_NUM(cblk),
2929 2929 int, end_cblk_len,
2930 2930 char *, *(int64_t *)
2931 2931 (centry->cc_data),
2932 2932 char *, *(int64_t *)
2933 2933 (centry->cc_data +
2934 2934 FBA_SIZE(end_cblk_len)
2935 2935 - 8));
2936 2936 }
2937 2937 }
2938 2938 }
2939 2939 } else {
2940 2940 lentry->cc_chain = centry;
2941 2941 lentry = centry;
2942 2942 fba_len -= BLK_FBAS;
2943 2943 if (flag & NSC_RDBUF) {
2944 2944 if (ioent == NULL) {
2945 2945 if (!FULLY_VALID(centry)) {
2946 2946 io_pos = BLK_TO_FBA_NUM(cblk);
2947 2947 ioent = centry;
2948 2948 } else {
2949 2949 DATA_LOG(SDF_ALLOC, centry, 0,
2950 2950 BLK_FBAS);
2951 2951
2952 2952 DTRACE_PROBE4(_sd_alloc_data3,
2953 2953 uint64_t, (uint64_t)
2954 2954 BLK_TO_FBA_NUM(cblk),
2955 2955 int, BLK_FBAS,
2956 2956 char *, *(int64_t *)
2957 2957 (centry->cc_data),
2958 2958 char *, *(int64_t *)
2959 2959 (centry->cc_data +
2960 2960 FBA_SIZE(BLK_FBAS) - 8));
2961 2961 }
2962 2962 }
2963 2963 }
2964 2964 cblk++;
2965 2965 }
2966 2966
2967 2967 /* if this block has a new identity clear prefetch history */
2968 2968 if (this_entry_type != HASH_ENTRY_DM)
2969 2969 centry->cc_aging_dm &=
2970 2970 ~(PREFETCH_BUF_I | PREFETCH_BUF_E);
2971 2971
2972 2972 centry->cc_aging_dm &= ~(ENTRY_FIELD_DM);
2973 2973 centry->cc_aging_dm |= this_entry_type;
2974 2974 if (flag & NSC_METADATA)
2975 2975 centry->cc_aging_dm |= STICKY_METADATA_DM;
2976 2976
2977 2977 --dmchain_request_blocks;
2978 2978 } while (fba_len);
2979 2979
2980 2980 if (locked) {
2981 2981 rw_exit(&sdbc_queue_lock);
2982 2982 locked = 0;
2983 2983 }
2984 2984
2985 2985 ASSERT(dmchain_request_blocks == 0);
2986 2986
2987 2987 /*
2988 2988 * do any necessary cleanup now that all the blocks are allocated.
2989 2989 */
2990 2990 sdbc_centry_alloc_end(&alloc_tok);
2991 2991
2992 2992 /* be sure you nul term. the chain */
2993 2993 centry->cc_chain = NULL;
2994 2994
2995 2995 /*
2996 2996 * step one: establish HOST/PARASITE/OTHER relationships
2997 2997 * between the centry ele in the list and calc the alloc size
2998 2998 * (fill in CATAGORY based on TYPE and immediate neighbors)
2999 2999 */
3000 3000 if (sts = _sd_setup_category_on_type(handle->bh_centry)) {
3001 3001 #ifdef DEBUG
3002 3002 err = _sd_free_buf(handle);
3003 3003 if (err) {
3004 3004 cmn_err(CE_WARN, "!sdbc(_sd_alloc_buf): _sd_free_buf "
3005 3005 "failed: err:%d handle:%p", err, (void *)handle);
3006 3006 }
3007 3007 #else
3008 3008 (void) _sd_free_buf(handle);
3009 3009 #endif
3010 3010 goto done;
3011 3011 }
3012 3012
3013 3013 /*
3014 3014 * step two: alloc the needed mem and fill in the data and chaining
3015 3015 * fields (leave bufvec for step three)
3016 3016 */
3017 3017 (void) _sd_setup_mem_chaining(handle->bh_centry, 0);
3018 3018
3019 3019 /*
3020 3020 * step three: do the bufvec
3021 3021 */
3022 3022 fba_len = fba_orig_len;
3023 3023 centry = handle->bh_centry;
3024 3024 bufvec = handle->bh_bufvec;
3025 3025
3026 3026 while (centry) {
3027 3027 DTRACE_PROBE3(_sd_alloc_buf_centrys, _sd_cctl_t *, centry,
3028 3028 int, cd, uint64_t,
3029 3029 (uint64_t)BLK_TO_FBA_NUM(CENTRY_BLK(centry)));
3030 3030
3031 3031 if (fba_len == fba_orig_len) {
3032 3032 bufvec->bufaddr = (centry->cc_data +
3033 3033 FBA_SIZE(st_cblk_off));
3034 3034 bufvec->bufvmeaddr = 0; /* not used */
3035 3035 bufvec->buflen = FBA_SIZE(st_cblk_len);
3036 3036 bufvec++;
3037 3037 fba_len -= st_cblk_len;
3038 3038 } else if (fba_len == (nsc_size_t)end_cblk_len) {
3039 3039 _sd_bufvec_t *pbufvec = bufvec - 1;
3040 3040
3041 3041 if ((pbufvec->bufaddr + pbufvec->buflen) ==
3042 3042 centry->cc_data) {
3043 3043 /* contiguous */
3044 3044 pbufvec->buflen += FBA_SIZE(end_cblk_len);
3045 3045 } else {
3046 3046
3047 3047 bufvec->bufaddr = centry->cc_data;
3048 3048 bufvec->bufvmeaddr = 0; /* not used */
3049 3049 bufvec->buflen = FBA_SIZE(end_cblk_len);
3050 3050 bufvec++;
3051 3051 }
3052 3052
3053 3053 fba_len -= end_cblk_len;
3054 3054 } else {
3055 3055 _sd_bufvec_t *pbufvec = bufvec - 1;
3056 3056
3057 3057 if ((pbufvec->bufaddr + pbufvec->buflen) ==
3058 3058 centry->cc_data) {
3059 3059 /* contiguous */
3060 3060 pbufvec->buflen += CACHE_BLOCK_SIZE;
3061 3061 } else {
3062 3062
3063 3063 bufvec->bufaddr = centry->cc_data;
3064 3064 bufvec->bufvmeaddr = 0; /* not used */
3065 3065 bufvec->buflen = CACHE_BLOCK_SIZE;
3066 3066 bufvec++;
3067 3067 }
3068 3068
3069 3069 fba_len -= BLK_FBAS;
3070 3070 }
3071 3071
3072 3072 centry = centry->cc_chain;
3073 3073 }
3074 3074
3075 3075 /* be sure you nul term. the chain */
3076 3076 bufvec->bufaddr = NULL;
3077 3077 bufvec->bufvmeaddr = 0;
3078 3078 bufvec->buflen = 0;
3079 3079
3080 3080 /* frag statistics */
3081 3081 {
3082 3082 _sd_bufvec_t *tbufvec;
3083 3083
3084 3084 for (tbufvec = handle->bh_bufvec; tbufvec != bufvec;
3085 3085 ++tbufvec) {
3086 3086 if ((min_frag > tbufvec->buflen) || (min_frag == 0))
3087 3087 min_frag = tbufvec->buflen;
3088 3088
3089 3089 if (max_frag < tbufvec->buflen)
3090 3090 max_frag = tbufvec->buflen;
3091 3091 }
3092 3092
3093 3093 nfrags = bufvec - handle->bh_bufvec;
3094 3094 min_frag = FBA_LEN(min_frag);
3095 3095 max_frag = FBA_LEN(max_frag);
3096 3096 }
3097 3097
3098 3098 /* buffer memory frag stats */
3099 3099 DTRACE_PROBE4(_sd_alloc_buf_frag, uint64_t, (uint64_t)fba_orig_len,
3100 3100 int, nfrags, int, min_frag, int, max_frag);
3101 3101
3102 3102
3103 3103 if (flag & NSC_WRBUF) {
3104 3104 if (_SD_IS_WRTHRU(handle))
3105 3105 goto alloc_done;
3106 3106 if (_sd_alloc_write(handle->bh_centry, &stall)) {
3107 3107 _sd_unblock(&_sd_flush_cv);
3108 3108 handle->bh_flag |= NSC_FORCED_WRTHRU;
3109 3109 } else {
3110 3110 for (centry = handle->bh_centry;
3111 3111 centry; centry = centry->cc_chain) {
3112 3112
3113 3113 CENTRY_SET_FTPOS(centry);
3114 3114 SSOP_SETCENTRY(sdbc_safestore,
3115 3115 centry->cc_write);
3116 3116 }
3117 3117 }
3118 3118 }
3119 3119
3120 3120 alloc_done:
3121 3121 if (locked) {
3122 3122 rw_exit(&sdbc_queue_lock);
3123 3123 locked = 0;
3124 3124 }
3125 3125 if (ioent) {
3126 3126 _SD_DISCONNECT_CALLBACK(handle);
3127 3127 sts = _sd_doread(handle, ioent, io_pos,
3128 3128 (fba_pos + fba_orig_len - io_pos), flag);
3129 3129 if (sts > 0)
3130 3130 (void) _sd_free_buf(handle);
3131 3131 } else
3132 3132 if (flag & NSC_RDBUF) {
3133 3133 CACHE_FBA_READ(cd, fba_orig_len);
3134 3134 CACHE_READ_HIT;
3135 3135 FBA_READ_IO_KSTATS(cd, FBA_SIZE(fba_orig_len));
3136 3136
3137 3137 sts = NSC_HIT;
3138 3138 } else
3139 3139 sts = (stall) ? NSC_DONE : NSC_HIT;
3140 3140
3141 3141 SDTRACE(ST_EXIT|SDF_ALLOCBUF, cd, fba_orig_len, fba_pos, flag, sts);
3142 3142
3143 3143 done:
3144 3144 if (locked)
3145 3145 rw_exit(&sdbc_queue_lock);
3146 3146
3147 3147 KSTAT_RUNQ_EXIT(cd);
3148 3148
3149 3149 return (sts);
3150 3150 }
3151 3151
3152 3152 /*
3153 3153 * consistency checking for ccents
3154 3154 */
3155 3155
3156 3156 #define ELIGIBLE(p) (p & ELIGIBLE_ENTRY_DM)
3157 3157 #define HOLD(p) (p & HOLD_ENTRY_DM)
3158 3158 #define HASHE(p) (p & HASH_ENTRY_DM)
3159 3159
3160 3160 #define HOST(p) (p & HOST_ENTRY_DM)
3161 3161 #define PARA(p) (p & PARASITIC_ENTRY_DM)
3162 3162 #define OTHER(p) \
3163 3163 (!(p & (HOST_ENTRY_DM | PARASITIC_ENTRY_DM | ELIGIBLE_ENTRY_DM)))
3164 3164
3165 3165 #define AVAIL(p) (p & AVAIL_ENTRY_DM)
3166 3166
3167 3167 /*
3168 3168 * sdbc_check_cctl_cot -- consistency check for _sd_setup_category_on_type()
3169 3169 * may only be called on entry to state machine (when ccent is either
3170 3170 * ELIGIBLE_ENTRY_DM, HOLD_ENTRY_DM or HASH_ENTRY_DM).
3171 3171 *
3172 3172 * print message or panic (DEBUG) if inconsistency detected.
3173 3173 */
3174 3174 static int
3175 3175 sdbc_check_cctl_cot(_sd_cctl_t *centry)
3176 3176 {
3177 3177 uint_t age;
3178 3178 int size;
3179 3179 uchar_t *data;
3180 3180 int host_or_other;
3181 3181 int para;
3182 3182 int ccent_ok = 1;
3183 3183
3184 3184 age = centry->cc_aging_dm;
3185 3185 size = centry->cc_alloc_size_dm;
3186 3186 data = centry->cc_data;
3187 3187 host_or_other = size && data;
3188 3188 para = !size && data;
3189 3189
3190 3190 /*
3191 3191 * on entry to _sd_setup_category_on_type(),
3192 3192 * one of three mutually exclusive entry field bits must be set
3193 3193 */
3194 3194
3195 3195 switch ((age & (ELIGIBLE_ENTRY_DM | HOLD_ENTRY_DM | HASH_ENTRY_DM))) {
3196 3196 case ELIGIBLE_ENTRY_DM:
3197 3197 case HOLD_ENTRY_DM:
3198 3198 case HASH_ENTRY_DM:
3199 3199 /* ok */
3200 3200 break;
3201 3201 default:
3202 3202 /* zero or multiple flag bits */
3203 3203 ccent_ok = 0;
3204 3204 break;
3205 3205 }
3206 3206
3207 3207 /* categories are mutually exclusive */
3208 3208 if (HOST(age) && PARA(age))
3209 3209 ccent_ok = 0;
3210 3210
3211 3211 /* these bits should be cleared out (STICKY_METADATA_DM not used) */
3212 3212 if (age & (AVAIL_ENTRY_DM | FOUND_HOLD_OVER_DM | FOUND_IN_HASH_DM |
3213 3213 STICKY_METADATA_DM))
3214 3214 ccent_ok = 0;
3215 3215
3216 3216 /* eligible has no data and no size */
3217 3217 if (ELIGIBLE(age) && (size || data))
3218 3218 ccent_ok = 0;
3219 3219
3220 3220 /* parasite has zero size and non-zero data */
3221 3221 if (PARA(age) && !para)
3222 3222 ccent_ok = 0;
3223 3223
3224 3224 /* host has non-zero size and non-zero data */
3225 3225 if (HOST(age) && !host_or_other)
3226 3226 ccent_ok = 0;
3227 3227
3228 3228 /* "other" is just like a host */
3229 3229 if (OTHER(age) && !host_or_other)
3230 3230 ccent_ok = 0;
3231 3231
3232 3232 /* a HOLD or a HASH must have a size */
3233 3233 if ((size) && !(age & (HASH_ENTRY_DM | HOLD_ENTRY_DM)))
3234 3234 ccent_ok = 0;
3235 3235
3236 3236 if (!ccent_ok)
3237 3237 cmn_err(cmn_level,
3238 3238 "!sdbc(sdbc_check_cctl_cot): inconsistent ccent %p "
3239 3239 "age %x size %d data %p", (void *)centry, age, size,
3240 3240 (void *)data);
3241 3241
3242 3242 return (ccent_ok);
3243 3243 }
3244 3244
3245 3245 /*
3246 3246 * sdbc_mark_cctl_cot -- mark cctls bad and invalidate when
3247 3247 * inconsistency found in _sd_setup_category_on_type()
3248 3248 * returns nothing
3249 3249 *
3250 3250 * Note: this is an error recovery path that is triggered when an
3251 3251 * inconsistency in a cctl is detected. _sd_centry_release() will take
3252 3252 * these cache entries out of circulation and place them on a separate list
3253 3253 * for debugging purposes.
3254 3254 */
3255 3255 void
3256 3256 sdbc_mark_cctl_cot(_sd_cctl_t *header, _sd_cctl_t *centry)
3257 3257 {
3258 3258 _sd_cctl_t *cur_ent = header;
3259 3259
3260 3260 /* the entire chain is guilty by association */
3261 3261 while (cur_ent) {
3262 3262
3263 3263 (void) _sd_hash_delete((struct _sd_hash_hd *)cur_ent,
3264 3264 _sd_htable);
3265 3265
3266 3266 cur_ent->cc_aging_dm |= BAD_CHAIN_DM;
3267 3267
3268 3268 cur_ent = cur_ent->cc_chain;
3269 3269 }
3270 3270
3271 3271 centry->cc_aging_dm |= BAD_ENTRY_DM; /* this is the problem child */
3272 3272 }
3273 3273
3274 3274 /*
3275 3275 * _sd_setup_category_on_type(_sd_cctl_t *) - Setup the centry CATEGORY based on
3276 3276 * centry TYPE and immediate neighbors. Identify each eligible (ie not HASH)
3277 3277 * centry as a host/parasite. host actually have memory allocated to
3278 3278 * them and parasites are chained to the host and point to page offsets within
3279 3279 * the host's memory.
3280 3280 *
3281 3281 * RETURNS:
3282 3282 * 0 on success, EINTR if inconsistency detected in centry
3283 3283 *
3284 3284 * Note:
3285 3285 * none
3286 3286 */
3287 3287 static int
3288 3288 _sd_setup_category_on_type(_sd_cctl_t *header)
3289 3289 {
3290 3290 _sd_cctl_t *prev_ent, *next_ent, *centry;
3291 3291 _sd_cctl_t *anchor = NULL;
3292 3292 int current_pest_count, local_max_dyn_list;
3293 3293 int cl;
3294 3294 int ret = 0;
3295 3295
3296 3296 ASSERT(header);
3297 3297
3298 3298 if (sdbc_use_dmchain)
3299 3299 local_max_dyn_list = max_dm_queues - 1;
3300 3300 else {
3301 3301 /* pickup a fresh copy - has the world changed */
3302 3302 local_max_dyn_list = dynmem_processing_dm.max_dyn_list;
3303 3303 }
3304 3304
3305 3305 prev_ent = 0;
3306 3306 centry = header;
3307 3307 next_ent = centry->cc_chain;
3308 3308 current_pest_count = 0;
3309 3309 cl = 2;
3310 3310
3311 3311 /* try to recover from bad cctl */
3312 3312 if (sdbc_check_cot && !sdbc_check_cctl_cot(centry))
3313 3313 ret = EINTR;
3314 3314
3315 3315 while (cl && (ret == 0)) {
3316 3316 switch (cl) {
3317 3317 case (1): /* chain to next/monitor for completion */
3318 3318 prev_ent = centry;
3319 3319 centry = next_ent;
3320 3320 next_ent = 0;
3321 3321 cl = 0;
3322 3322 if (centry) {
3323 3323
3324 3324 if (sdbc_check_cot &&
3325 3325 !sdbc_check_cctl_cot(centry)) {
3326 3326 ret = EINTR;
3327 3327 break;
3328 3328 }
3329 3329
3330 3330 next_ent = centry->cc_chain;
3331 3331 cl = 2;
3332 3332 }
3333 3333 break;
3334 3334
3335 3335 case (2): /* vector to appropriate routine */
3336 3336 if (!(centry->cc_aging_dm & ELIGIBLE_ENTRY_DM))
3337 3337 cl = 5;
3338 3338 else if (prev_ent && (prev_ent->cc_aging_dm &
3339 3339 ELIGIBLE_ENTRY_DM))
3340 3340 cl = 15;
3341 3341 else
3342 3342 cl = 10;
3343 3343 break;
3344 3344
3345 3345 case (5): /* process NON-ELIGIBLE entries */
3346 3346 if (!(centry->cc_aging_dm &
3347 3347 (HASH_ENTRY_DM|HOLD_ENTRY_DM))) {
3348 3348 /* no catagory */
3349 3349
3350 3350 /* consistency check */
3351 3351 if (centry->cc_alloc_size_dm ||
3352 3352 centry->cc_data) {
3353 3353 cmn_err(cmn_level,
3354 3354 "!sdbc(setup_cot): "
3355 3355 "OTHER with data/size %p",
3356 3356 (void *)centry);
3357 3357
3358 3358 ret = EINTR;
3359 3359 break;
3360 3360 }
3361 3361
3362 3362 centry->cc_aging_dm &=
3363 3363 ~CATAGORY_ENTRY_DM;
3364 3364 centry->cc_alloc_size_dm = BLK_SIZE(1);
3365 3365 DTRACE_PROBE1(_sd_setup_category,
3366 3366 _sd_cctl_t *, centry);
3367 3367 }
3368 3368 cl = 1;
3369 3369 break;
3370 3370
3371 3371 /*
3372 3372 * no prev entry (ie top of list) or no prev
3373 3373 * ELIGIBLE entry
3374 3374 */
3375 3375 case (10):
3376 3376 /*
3377 3377 * this is an eligible entry, does it start
3378 3378 * a list or is it a loner
3379 3379 */
3380 3380 /* consistency check */
3381 3381 if (centry->cc_alloc_size_dm ||
3382 3382 centry->cc_data) {
3383 3383 cmn_err(cmn_level, "!sdbc(setup_cot): "
3384 3384 "HOST with data/size %p",
3385 3385 (void *)centry);
3386 3386 ret = EINTR;
3387 3387 break;
3388 3388 }
3389 3389
3390 3390 if (next_ent && (next_ent->cc_aging_dm &
3391 3391 ELIGIBLE_ENTRY_DM)) {
3392 3392
3393 3393
3394 3394 /* it starts a list */
3395 3395 /* host catagory */
3396 3396 centry->cc_aging_dm |= HOST_ENTRY_DM;
3397 3397 /* start out with one page */
3398 3398 centry->cc_alloc_size_dm = BLK_SIZE(1);
3399 3399 anchor = centry;
3400 3400 DTRACE_PROBE1(_sd_setup_category,
3401 3401 _sd_cctl_t *, anchor);
3402 3402 cl = 1;
3403 3403 } else {
3404 3404 /*
3405 3405 * it's a loner
3406 3406 * drop status to no category and
3407 3407 * restart
3408 3408 */
3409 3409 cl = 2;
3410 3410 centry->cc_aging_dm &=
3411 3411 ~ELIGIBLE_ENTRY_DM;
3412 3412 }
3413 3413 break;
3414 3414
3415 3415 case (15): /* default to parasite catagory */
3416 3416
3417 3417 /* consistency check */
3418 3418 if (centry->cc_alloc_size_dm ||
3419 3419 centry->cc_data) {
3420 3420 cmn_err(cmn_level, "!sdbc(setup_cot): "
3421 3421 "PARA with data/size %p",
3422 3422 (void *)centry);
3423 3423
3424 3424 ret = EINTR;
3425 3425 break;
3426 3426 }
3427 3427
3428 3428 if (current_pest_count < local_max_dyn_list-1) {
3429 3429 /* continue to grow the pest list */
3430 3430 current_pest_count++;
3431 3431 centry->cc_aging_dm |=
3432 3432 PARASITIC_ENTRY_DM;
3433 3433
3434 3434 /*
3435 3435 * offset of host ent mem this will pt
3436 3436 * to
3437 3437 */
3438 3438 centry->cc_alloc_size_dm =
3439 3439 anchor->cc_alloc_size_dm;
3440 3440 /*
3441 3441 * up the host mem req by one for
3442 3442 * this parasite
3443 3443 */
3444 3444 DTRACE_PROBE1(_sd_setup_category,
3445 3445 _sd_cctl_t *, centry);
3446 3446
3447 3447 anchor->cc_alloc_size_dm += BLK_SIZE(1);
3448 3448
3449 3449 cl = 1;
3450 3450 } else {
3451 3451 /*
3452 3452 * term this pest list - restart fresh
3453 3453 * on this entry
3454 3454 */
3455 3455 current_pest_count = 0;
3456 3456 prev_ent->cc_aging_dm &=
3457 3457 ~(HOST_ENTRY_DM|ELIGIBLE_ENTRY_DM);
3458 3458 cl = 2;
3459 3459 }
3460 3460 break;
3461 3461 } /* switch(cl) */
3462 3462 } /* while (cl) */
3463 3463
3464 3464 if (ret != 0)
3465 3465 sdbc_mark_cctl_cot(header, centry);
3466 3466
3467 3467 return (ret);
3468 3468 }
3469 3469
3470 3470 /*
3471 3471 * _sd_setup_mem_chaining(_sd_cctl_t *) - Allocate memory, setup
3472 3472 * mem ptrs an host/pest chaining. Do the actual allocation as described in
3473 3473 * sd_setup_category_on_type().
3474 3474 *
3475 3475 * RETURNS:
3476 3476 * 0 on success
3477 3477 * non-zero on error
3478 3478 *
3479 3479 * Note:
3480 3480 * if called with ALLOC_NOWAIT, caller must check for non-zero return
3481 3481 */
3482 3482 static int
3483 3483 _sd_setup_mem_chaining(_sd_cctl_t *header, int flag)
3484 3484 {
3485 3485 _sd_cctl_t *prev_ent, *next_ent, *centry;
3486 3486 _sd_cctl_t *anchor = NULL;
3487 3487 int cl, rc = 0;
3488 3488
3489 3489 ASSERT(header);
3490 3490
3491 3491 if (!header)
3492 3492 return (0);
3493 3493
3494 3494 prev_ent = 0;
3495 3495 centry = header;
3496 3496 next_ent = centry->cc_chain;
3497 3497 cl = 2;
3498 3498 while (cl) {
3499 3499 switch (cl) {
3500 3500 case (1): /* chain to next/monitor for completion */
3501 3501 centry->cc_aging_dm &= ~ELIGIBLE_ENTRY_DM;
3502 3502 prev_ent = centry;
3503 3503 centry = next_ent;
3504 3504 next_ent = 0;
3505 3505 cl = 0;
3506 3506 if (centry) {
3507 3507 next_ent = centry->cc_chain;
3508 3508 cl = 2;
3509 3509 }
3510 3510 break;
3511 3511
3512 3512 case (2): /* vector to appropriate routine */
3513 3513 if (centry->cc_aging_dm & HOST_ENTRY_DM)
3514 3514 cl = 10;
3515 3515 else if (centry->cc_aging_dm &
3516 3516 PARASITIC_ENTRY_DM)
3517 3517 cl = 15;
3518 3518 else
3519 3519 cl = 5;
3520 3520 break;
3521 3521
3522 3522 case (5): /* OTHER processing - alloc mem */
3523 3523 if (rc = sdbc_centry_memalloc_dm(centry,
3524 3524 centry->cc_alloc_size_dm, flag))
3525 3525 /* The allocation failed */
3526 3526 cl = 0;
3527 3527 else
3528 3528 cl = 1;
3529 3529 break;
3530 3530
3531 3531 /*
3532 3532 * HOST entry processing - save the anchor pt,
3533 3533 * alloc the memory,
3534 3534 */
3535 3535 case (10): /* setup head and nxt ptrs */
3536 3536 anchor = centry;
3537 3537 if (rc = sdbc_centry_memalloc_dm(centry,
3538 3538 centry->cc_alloc_size_dm, flag))
3539 3539 /* The allocation failed */
3540 3540 cl = 0;
3541 3541 else
3542 3542 cl = 1;
3543 3543 break;
3544 3544
3545 3545 /*
3546 3546 * PARASITIC entry processing - setup w/no
3547 3547 * memory, setup head/next ptrs,
3548 3548 */
3549 3549 case (15):
3550 3550 /*
3551 3551 * fudge the data mem ptr to an offset from
3552 3552 * the anchor alloc
3553 3553 */
3554 3554 if (!(centry->cc_aging_dm &
3555 3555 (HASH_ENTRY_DM| HOLD_ENTRY_DM))) {
3556 3556 centry->cc_head_dm = anchor;
3557 3557
3558 3558 /* chain prev to this */
3559 3559 prev_ent->cc_next_dm = centry;
3560 3560
3561 3561 /*
3562 3562 * generate the actual data ptr into
3563 3563 * host entry memory
3564 3564 */
3565 3565 centry->cc_data = anchor->cc_data +
3566 3566 centry->cc_alloc_size_dm;
3567 3567 centry->cc_alloc_size_dm = 0;
3568 3568 }
3569 3569 cl = 1;
3570 3570 break;
3571 3571 } /* switch(cl) */
3572 3572 } /* while (cl) */
3573 3573
3574 3574 return (rc);
3575 3575 }
3576 3576
3577 3577 /*
3578 3578 * _sd_check_buffer_alloc - Check if buffer allocation is invalid.
3579 3579 *
3580 3580 * RETURNS:
3581 3581 * 0 if its ok to continue with allocation.
3582 3582 * Else errno to be returned to the user.
3583 3583 *
3584 3584 * Note:
3585 3585 * This routine could block if the device is not local and
3586 3586 * recovery is in progress.
3587 3587 */
3588 3588
3589 3589 /* ARGSUSED */
3590 3590 static int
3591 3591 _sd_check_buffer_alloc(int cd, nsc_off_t fba_pos, nsc_size_t fba_len,
3592 3592 _sd_buf_handle_t **hp)
3593 3593 {
3594 3594 /*
3595 3595 * This check exists to ensure that someone will not pass in an
3596 3596 * arbitrary pointer and try to pass it off as a handle.
3597 3597 */
3598 3598 if ((*hp)->bh_flag & (~_SD_VALID_FLAGS)) {
3599 3599 cmn_err(CE_WARN, "!sdbc(_sd_check_buffer_alloc) "
3600 3600 "cd %d invalid handle %p flags %x",
3601 3601 cd, (void *)*hp, (*hp)->bh_flag);
3602 3602 return (EINVAL);
3603 3603 }
3604 3604
3605 3605 if ((_sd_cache_initialized == 0) || (FILE_OPENED(cd) == 0)) {
3606 3606 cmn_err(CE_WARN, "!sdbc(_sd_check_buffer_alloc) "
3607 3607 "cd %d not open. Cache init %d",
3608 3608 cd, _sd_cache_initialized);
3609 3609 return (EINVAL);
3610 3610 }
3611 3611 ASSERT(cd >= 0);
3612 3612 if (!(_sd_cache_files[cd].cd_rawfd) ||
3613 3613 !nsc_held(_sd_cache_files[cd].cd_rawfd)) {
3614 3614 cmn_err(CE_WARN,
3615 3615 "!sdbc(_sd_check_buffer_alloc) cd %d is not attached", cd);
3616 3616 return (EINVAL);
3617 3617 }
3618 3618
3619 3619 ASSERT_IO_SIZE(fba_pos, fba_len, cd);
3620 3620 ASSERT_LEN(fba_len);
3621 3621
3622 3622 return (0);
3623 3623 }
3624 3624
3625 3625 /*
3626 3626 * sdbc_check_handle -- check that handle is valid
3627 3627 * return 1 if ok, 0 otherwise (if debug then panic).
3628 3628 */
3629 3629 static int
3630 3630 sdbc_check_handle(_sd_buf_handle_t *handle)
3631 3631 {
3632 3632 int ret = 1;
3633 3633
3634 3634 if (!_SD_HANDLE_ACTIVE(handle)) {
3635 3635
3636 3636 cmn_err(cmn_level, "!sdbc(_sd_free_buf): invalid handle %p"
3637 3637 "cd %d fpos %" NSC_SZFMT " flen %" NSC_SZFMT " flag %x",
3638 3638 (void *)handle, HANDLE_CD(handle), handle->bh_fba_pos,
3639 3639 handle->bh_fba_len, handle->bh_flag);
3640 3640
3641 3641 ret = 0;
3642 3642 }
3643 3643
3644 3644 return (ret);
3645 3645 }
3646 3646
3647 3647 /*
3648 3648 * _sd_free_buf - Free the buffers allocated in _sd_alloc_buf.
3649 3649 *
3650 3650 * ARGUMENTS:
3651 3651 * handle - The handle allocated in _sd_alloc_buf.
3652 3652 *
3653 3653 * RETURNS:
3654 3654 * 0 on success.
3655 3655 * Else errno.
3656 3656 *
3657 3657 * NOTE:
3658 3658 * If handle was allocated through _sd_alloc_buf, the handle allocated
3659 3659 * flag (NSC_HALLOCATED) will be reset by _sd_alloc_buf. This indicates
3660 3660 * that _sd_free_buf should free up the handle as well.
3661 3661 * All other handles directly allocated from _sd_alloc_handle will have
3662 3662 * that flag set. Any handle with valid blocks will have the handle
3663 3663 * active flag. It is an error if the active flag is not set.
3664 3664 * (if free_buf were called without going through alloc_buf)
3665 3665 */
3666 3666
3667 3667 int
3668 3668 _sd_free_buf(_sd_buf_handle_t *handle)
3669 3669 {
3670 3670 _sd_cctl_t *centry, *cc_chain;
3671 3671 int cd = HANDLE_CD(handle);
3672 3672 int flen = handle->bh_fba_len;
3673 3673 int fpos = handle->bh_fba_pos;
3674 3674
3675 3675 SDTRACE(ST_ENTER|SDF_FREEBUF, HANDLE_CD(handle),
3676 3676 handle->bh_fba_len, handle->bh_fba_pos, 0, 0);
3677 3677
3678 3678 if (sdbc_check_handle(handle) == 0)
3679 3679 return (EINVAL);
3680 3680
3681 3681 if (handle->bh_flag & NSC_MIXED) {
3682 3682 /*
3683 3683 * Data in this handle will be a mix of data from the
3684 3684 * source device and data from another device, so
3685 3685 * invalidate all the blocks.
3686 3686 */
3687 3687 handle->bh_flag &= ~NSC_QUEUE;
3688 3688 centry = handle->bh_centry;
3689 3689 while (centry) {
3690 3690 centry->cc_valid = 0;
3691 3691 centry = centry->cc_chain;
3692 3692 }
3693 3693 }
3694 3694
3695 3695 if ((handle->bh_flag & NSC_QUEUE)) {
3696 3696 handle->bh_flag &= ~NSC_QUEUE;
3697 3697 _sd_queue_write(handle, handle->bh_fba_pos, handle->bh_fba_len);
3698 3698 }
3699 3699
3700 3700 handle->bh_flag &= ~NSC_HACTIVE;
3701 3701
3702 3702 centry = handle->bh_centry;
3703 3703 while (centry) {
3704 3704 cc_chain = centry->cc_chain;
3705 3705 _sd_centry_release(centry);
3706 3706 centry = cc_chain;
3707 3707 }
3708 3708
3709 3709 /*
3710 3710 * help prevent dup call to _sd_centry_release if this handle
3711 3711 * is erroneously _sd_free_buf'd twice. (should not happen).
3712 3712 */
3713 3713 handle->bh_centry = NULL;
3714 3714
3715 3715 if ((handle->bh_flag & NSC_HALLOCATED) == 0) {
3716 3716 handle->bh_flag |= NSC_HALLOCATED;
3717 3717 (void) _sd_free_handle(handle);
3718 3718 } else {
3719 3719 handle->bh_flag = NSC_HALLOCATED;
3720 3720 }
3721 3721
3722 3722 SDTRACE(ST_EXIT|SDF_FREEBUF, cd, flen, fpos, 0, 0);
3723 3723
3724 3724 return (0);
3725 3725 }
3726 3726
3727 3727
3728 3728 static int _sd_lruq_srch = 0x2000;
3729 3729
3730 3730 /*
3731 3731 * sdbc_get_dmchain -- get a candidate centry chain pointing to
3732 3732 * contiguous memory
3733 3733 * ARGUMENTS:
3734 3734 * cblocks - number of cache blocks requested
3735 3735 * stall - pointer to stall count (no blocks avail)
3736 3736 * flag - ALLOC_NOWAIT flag
3737 3737 *
3738 3738 * RETURNS:
3739 3739 * a cache entry or possible NULL if ALLOC_NOWAIT set
3740 3740 * USAGE:
3741 3741 * attempt to satisfy entire request from queue
3742 3742 * that has no memory allocated.
3743 3743 * if this fails then attempt a partial allocation
3744 3744 * with a preallocated block of requested size up to
3745 3745 * max_dyn_list.
3746 3746 * then look for largest chain less than max_dyn_list.
3747 3747 */
3748 3748 static _sd_cctl_t *
3749 3749 sdbc_get_dmchain(int cblocks, int *stall, int flag)
3750 3750 {
3751 3751 _sd_cctl_t *cc_dmchain = NULL;
3752 3752 _sd_queue_t *q;
3753 3753 _sd_cctl_t *qhead;
3754 3754 int num_tries;
3755 3755 int cblocks_orig = cblocks;
3756 3756 int nowait = flag & ALLOC_NOWAIT;
3757 3757 int i;
3758 3758
3759 3759 num_tries = _sd_lruq_srch;
3760 3760
3761 3761 ASSERT(cblocks != 0);
3762 3762
3763 3763 while (!cc_dmchain) {
3764 3764 /* get it from the os if possible */
3765 3765 q = &sdbc_dm_queues[0];
3766 3766 qhead = &(q->sq_qhead);
3767 3767
3768 3768 if (q->sq_inq >= cblocks) {
3769 3769 mutex_enter(&q->sq_qlock);
3770 3770 if (q->sq_inq >= cblocks) {
3771 3771 _sd_cctl_t *cc_ent;
3772 3772
3773 3773 cc_dmchain = qhead->cc_next;
3774 3774
3775 3775 /*
3776 3776 * set the inuse and pageio bits
3777 3777 * Note: this code expects the cc_ent to
3778 3778 * be available. no other thread may set the
3779 3779 * inuse or pageio bit for an entry on the
3780 3780 * 0 queue.
3781 3781 */
3782 3782 cc_ent = qhead;
3783 3783 for (i = 0; i < cblocks; ++i) {
3784 3784 cc_ent = cc_ent->cc_next;
3785 3785
3786 3786 if (SET_CENTRY_INUSE(cc_ent)) {
3787 3787 cmn_err(CE_PANIC,
3788 3788 "centry inuse on 0 q! %p",
3789 3789 (void *)cc_ent);
3790 3790 }
3791 3791
3792 3792 if (SET_CENTRY_PAGEIO(cc_ent)) {
3793 3793 cmn_err(CE_PANIC,
3794 3794 "centry pageio on 0 q! %p",
3795 3795 (void *)cc_ent);
3796 3796 }
3797 3797 }
3798 3798 /* got a dmchain */
3799 3799
3800 3800 /* remove this chain from the 0 queue */
3801 3801 cc_dmchain->cc_prev->cc_next = cc_ent->cc_next;
3802 3802 cc_ent->cc_next->cc_prev = cc_dmchain->cc_prev;
3803 3803 cc_dmchain->cc_prev = NULL;
3804 3804 cc_ent->cc_next = NULL;
3805 3805
3806 3806 q->sq_inq -= cblocks;
3807 3807
3808 3808 ASSERT(GOOD_LRUSIZE(q));
3809 3809
3810 3810 }
3811 3811 mutex_exit(&q->sq_qlock);
3812 3812 if (cc_dmchain)
3813 3813 continue;
3814 3814 }
3815 3815
3816 3816 /* look for a pre-allocated block of the requested size */
3817 3817
3818 3818
3819 3819 if (cblocks > (max_dm_queues - 1))
3820 3820 cblocks = max_dm_queues - 1;
3821 3821
3822 3822 q = &sdbc_dm_queues[cblocks];
3823 3823 qhead = &(q->sq_qhead);
3824 3824
3825 3825 if (q->sq_inq != 0) {
3826 3826 _sd_cctl_t *tmp_dmchain;
3827 3827
3828 3828 mutex_enter(&q->sq_qlock);
3829 3829
3830 3830 for (tmp_dmchain = qhead->cc_next; tmp_dmchain != qhead;
3831 3831 tmp_dmchain = tmp_dmchain->cc_next) {
3832 3832
3833 3833 /*
3834 3834 * get a dmchain
3835 3835 * set the inuse and pageio bits
3836 3836 */
3837 3837 if (sdbc_dmchain_avail(tmp_dmchain)) {
3838 3838 /* put on MRU end of queue */
3839 3839 sdbc_requeue_dmchain(q, tmp_dmchain,
3840 3840 1, 0);
3841 3841 cc_dmchain = tmp_dmchain;
3842 3842 break;
3843 3843 }
3844 3844 sdbc_dmchain_not_avail++;
3845 3845 }
3846 3846
3847 3847 mutex_exit(&q->sq_qlock);
3848 3848 if (cc_dmchain)
3849 3849 continue;
3850 3850 }
3851 3851
3852 3852 /*
3853 3853 * spin block
3854 3854 * nudge the deallocator, accelerate ageing
3855 3855 */
3856 3856
3857 3857 mutex_enter(&dynmem_processing_dm.thread_dm_lock);
3858 3858 cv_broadcast(&dynmem_processing_dm.thread_dm_cv);
3859 3859 mutex_exit(&dynmem_processing_dm.thread_dm_lock);
3860 3860
3861 3861 if (nowait)
3862 3862 break;
3863 3863
3864 3864 if (!(--num_tries)) {
3865 3865 delay(drv_usectohz(20000));
3866 3866 (void) (*stall)++;
3867 3867 num_tries = _sd_lruq_srch;
3868 3868 cblocks = cblocks_orig;
3869 3869 } else { /* see if smaller request size is available */
3870 3870 if (!(--cblocks))
3871 3871 cblocks = cblocks_orig;
3872 3872 }
3873 3873
3874 3874 } /* while (!cc_dmchain) */
3875 3875
3876 3876 return (cc_dmchain);
3877 3877 }
3878 3878
3879 3879 static int
3880 3880 sdbc_dmchain_avail(_sd_cctl_t *cc_ent)
3881 3881 {
3882 3882 int chain_avail = 1;
3883 3883 _sd_cctl_t *anchor = cc_ent;
3884 3884
3885 3885 while (cc_ent) {
3886 3886
3887 3887 ASSERT(_sd_cctl_valid(cc_ent));
3888 3888
3889 3889 if (cc_ent->cc_aging_dm & BAD_CHAIN_DM) {
3890 3890 chain_avail = 0;
3891 3891 break;
3892 3892 }
3893 3893
3894 3894 if (CENTRY_DIRTY(cc_ent)) {
3895 3895 chain_avail = 0;
3896 3896 break;
3897 3897 }
3898 3898 if (SET_CENTRY_INUSE(cc_ent)) {
3899 3899 chain_avail = 0;
3900 3900 break;
3901 3901 }
3902 3902
3903 3903 if ((SET_CENTRY_PAGEIO(cc_ent))) {
3904 3904
3905 3905 CLEAR_CENTRY_INUSE(cc_ent);
3906 3906 chain_avail = 0;
3907 3907 break;
3908 3908 }
3909 3909
3910 3910 if (CENTRY_DIRTY(cc_ent)) {
3911 3911
3912 3912 CLEAR_CENTRY_PAGEIO(cc_ent);
3913 3913 CLEAR_CENTRY_INUSE(cc_ent);
3914 3914 chain_avail = 0;
3915 3915 break;
3916 3916 }
3917 3917
3918 3918 cc_ent->cc_flag = 0;
3919 3919 cc_ent->cc_toflush = 0;
3920 3920
3921 3921 cc_ent = cc_ent->cc_next_dm;
3922 3922 }
3923 3923
3924 3924 if (!chain_avail)
3925 3925 sdbc_clear_dmchain(anchor, cc_ent);
3926 3926 else {
3927 3927 cc_ent = anchor;
3928 3928
3929 3929 /*
3930 3930 * prevent possible deadlocks in _sd_cc_wait():
3931 3931 * remove from hash and wakeup any waiters now that we
3932 3932 * have acquired the chain.
3933 3933 */
3934 3934 while (cc_ent) {
3935 3935 (void) _sd_hash_delete((struct _sd_hash_hd *)cc_ent,
3936 3936 _sd_htable);
3937 3937
3938 3938 mutex_enter(&cc_ent->cc_lock);
3939 3939 if (cc_ent->cc_await_use) {
3940 3940 cv_broadcast(&cc_ent->cc_blkcv);
3941 3941 }
3942 3942 mutex_exit(&cc_ent->cc_lock);
3943 3943
3944 3944 cc_ent->cc_creat = nsc_lbolt();
3945 3945 cc_ent->cc_hits = 0;
3946 3946
3947 3947 cc_ent = cc_ent->cc_next_dm;
3948 3948 }
3949 3949 }
3950 3950
3951 3951 return (chain_avail);
3952 3952 }
3953 3953
3954 3954 static void
3955 3955 sdbc_clear_dmchain(_sd_cctl_t *cc_ent_start, _sd_cctl_t *cc_ent_end)
3956 3956 {
3957 3957 _sd_cctl_t *cc_ent = cc_ent_start;
3958 3958 _sd_cctl_t *prev_ent;
3959 3959
3960 3960 ASSERT(_sd_cctl_valid(cc_ent));
3961 3961
3962 3962 while (cc_ent != cc_ent_end) {
3963 3963
3964 3964 ASSERT(_sd_cctl_valid(cc_ent));
3965 3965
3966 3966 prev_ent = cc_ent;
3967 3967 cc_ent = cc_ent->cc_next_dm;
3968 3968
3969 3969 CLEAR_CENTRY_PAGEIO(prev_ent);
3970 3970 CLEAR_CENTRY_INUSE(prev_ent);
3971 3971 }
3972 3972
3973 3973 }
3974 3974
3975 3975 /*
3976 3976 * put a dmchain on the LRU end of a queue
3977 3977 */
3978 3978 void
3979 3979 sdbc_ins_dmqueue_front(_sd_queue_t *q, _sd_cctl_t *cc_ent)
3980 3980 {
3981 3981 _sd_cctl_t *qhead = &(q->sq_qhead);
3982 3982
3983 3983 ASSERT(_sd_cctl_valid(cc_ent));
3984 3984
3985 3985 mutex_enter(&q->sq_qlock);
3986 3986 cc_ent->cc_next = qhead->cc_next;
3987 3987 cc_ent->cc_prev = qhead;
3988 3988 qhead->cc_next->cc_prev = cc_ent;
3989 3989 qhead->cc_next = cc_ent;
3990 3990 q->sq_inq++;
3991 3991 cc_ent->cc_cblocks = q->sq_dmchain_cblocks;
3992 3992
3993 3993 ASSERT(GOOD_LRUSIZE(q));
3994 3994
3995 3995 mutex_exit(&q->sq_qlock);
3996 3996
3997 3997 }
3998 3998
3999 3999 /*
4000 4000 * put a dmchain on the MRU end of a queue
4001 4001 */
4002 4002 static void
4003 4003 sdbc_ins_dmqueue_back(_sd_queue_t *q, _sd_cctl_t *cc_ent)
4004 4004 {
4005 4005 _sd_cctl_t *qhead = &(q->sq_qhead);
4006 4006
4007 4007 ASSERT(_sd_cctl_valid(cc_ent));
4008 4008
4009 4009 mutex_enter(&q->sq_qlock);
4010 4010 cc_ent->cc_next = qhead;
4011 4011 cc_ent->cc_prev = qhead->cc_prev;
4012 4012 qhead->cc_prev->cc_next = cc_ent;
4013 4013 qhead->cc_prev = cc_ent;
4014 4014 cc_ent->cc_seq = q->sq_seq++;
4015 4015 q->sq_inq++;
4016 4016 cc_ent->cc_cblocks = q->sq_dmchain_cblocks;
4017 4017
4018 4018 ASSERT(GOOD_LRUSIZE(q));
4019 4019
4020 4020 mutex_exit(&q->sq_qlock);
4021 4021
4022 4022 }
4023 4023
4024 4024 /*
4025 4025 * remove dmchain from a queue
4026 4026 */
4027 4027 void
4028 4028 sdbc_remq_dmchain(_sd_queue_t *q, _sd_cctl_t *cc_ent)
4029 4029 {
4030 4030
4031 4031 ASSERT(_sd_cctl_valid(cc_ent));
4032 4032
4033 4033 mutex_enter(&q->sq_qlock);
4034 4034 cc_ent->cc_prev->cc_next = cc_ent->cc_next;
4035 4035 cc_ent->cc_next->cc_prev = cc_ent->cc_prev;
4036 4036 cc_ent->cc_next = cc_ent->cc_prev = NULL; /* defensive programming */
4037 4037 cc_ent->cc_cblocks = -1; /* indicate not on any queue */
4038 4038
4039 4039 q->sq_inq--;
4040 4040
4041 4041 ASSERT(GOOD_LRUSIZE(q));
4042 4042
4043 4043 mutex_exit(&q->sq_qlock);
4044 4044
4045 4045 }
4046 4046
4047 4047 /*
4048 4048 * requeue a dmchain to the MRU end of its queue.
4049 4049 * if getlock is 0 on entry the queue lock (sq_qlock) must be held
4050 4050 */
4051 4051 void
4052 4052 sdbc_requeue_dmchain(_sd_queue_t *q, _sd_cctl_t *cc_ent, int mru,
4053 4053 int getlock)
4054 4054 {
4055 4055 _sd_cctl_t *qhead = &(q->sq_qhead);
4056 4056
4057 4057
4058 4058 ASSERT(_sd_cctl_valid(cc_ent));
4059 4059
4060 4060 if (getlock)
4061 4061 mutex_enter(&q->sq_qlock);
4062 4062
4063 4063 /* inline of sdbc_remq_dmchain() */
4064 4064 cc_ent->cc_prev->cc_next = cc_ent->cc_next;
4065 4065 cc_ent->cc_next->cc_prev = cc_ent->cc_prev;
4066 4066
4067 4067 if (mru) { /* put on MRU end of queue */
4068 4068 /* inline of sdbc_ins_dmqueue_back */
4069 4069 cc_ent->cc_next = qhead;
4070 4070 cc_ent->cc_prev = qhead->cc_prev;
4071 4071 qhead->cc_prev->cc_next = cc_ent;
4072 4072 qhead->cc_prev = cc_ent;
4073 4073 cc_ent->cc_seq = q->sq_seq++;
4074 4074 (q->sq_req_stat)++;
4075 4075 } else { /* put on LRU end of queue i.e. requeue to head */
4076 4076 /* inline of sdbc_ins_dmqueue_front */
4077 4077 cc_ent->cc_next = qhead->cc_next;
4078 4078 cc_ent->cc_prev = qhead;
4079 4079 qhead->cc_next->cc_prev = cc_ent;
4080 4080 qhead->cc_next = cc_ent;
4081 4081 cc_ent->cc_seq = q->sq_seq++;
4082 4082
4083 4083 /*
4084 4084 * clear the CC_QHEAD bit on all members of the chain
4085 4085 */
4086 4086 {
4087 4087 _sd_cctl_t *tcent;
4088 4088
4089 4089 for (tcent = cc_ent; tcent; tcent = tcent->cc_next_dm)
4090 4090 tcent->cc_flag &= ~CC_QHEAD;
4091 4091 }
4092 4092 }
4093 4093
4094 4094 if (getlock)
4095 4095 mutex_exit(&q->sq_qlock);
4096 4096
4097 4097 }
4098 4098
4099 4099 /*
4100 4100 * sdbc_dmchain_dirty(cc_ent)
4101 4101 * return first dirty cc_ent in dmchain, NULL if chain is not dirty
4102 4102 */
4103 4103 static _sd_cctl_t *
4104 4104 sdbc_dmchain_dirty(_sd_cctl_t *cc_ent)
4105 4105 {
4106 4106 for (/* CSTYLED */; cc_ent; cc_ent = cc_ent->cc_next_dm)
4107 4107 if (CENTRY_DIRTY(cc_ent))
4108 4108 break;
4109 4109
4110 4110 return (cc_ent);
4111 4111 }
4112 4112
4113 4113 /*
4114 4114 * sdbc_requeue_head_dm_try()
4115 4115 * attempt to requeue a dmchain to the head of the queue
4116 4116 */
4117 4117 void
4118 4118 sdbc_requeue_head_dm_try(_sd_cctl_t *cc_ent)
4119 4119 {
4120 4120 int qidx;
4121 4121 _sd_queue_t *q;
4122 4122
4123 4123 if (!sdbc_dmchain_dirty(cc_ent)) {
4124 4124 qidx = cc_ent->cc_cblocks;
4125 4125 q = &sdbc_dm_queues[qidx];
4126 4126 sdbc_requeue_dmchain(q, cc_ent, 0, 1); /* requeue head */
4127 4127 }
4128 4128 }
4129 4129
4130 4130 /*
4131 4131 * sdbc_centry_alloc_blks -- allocate cache entries with memory
4132 4132 *
4133 4133 * ARGUMENTS:
4134 4134 * cd - Cache descriptor (from a previous open)
4135 4135 * cblk - cache block number.
4136 4136 * reqblks - number of cache blocks to be allocated
↓ open down ↓ |
1583 lines elided |
↑ open up ↑ |
4137 4137 * flag - can be ALLOC_NOWAIT
4138 4138 * RETURNS:
4139 4139 * A cache block chain or NULL if ALLOC_NOWAIT and request fails
4140 4140 *
4141 4141 * Note: caller must check for null return if called with
4142 4142 * ALLOC_NOWAIT set.
4143 4143 */
4144 4144 _sd_cctl_t *
4145 4145 sdbc_centry_alloc_blks(int cd, nsc_off_t cblk, nsc_size_t reqblks, int flag)
4146 4146 {
4147 - sdbc_allocbuf_t alloc_tok = {0}; /* must be 0 */
4147 + sdbc_allocbuf_t alloc_tok = {{(intptr_t)NULL}}; /* must be NULL */
4148 4148 int stall = 0;
4149 4149 _sd_cctl_t *centry = NULL;
4150 4150 _sd_cctl_t *lentry = NULL;
4151 4151 _sd_cctl_t *anchor = NULL;
4152 4152 _sd_cctl_t *next_centry;
4153 4153
4154 4154 ASSERT(reqblks);
4155 4155
4156 4156 while (reqblks) {
4157 4157 centry = sdbc_centry_alloc(cd, cblk, reqblks, &stall,
4158 4158 &alloc_tok, flag);
4159 4159
4160 4160 if (!centry)
4161 4161 break;
4162 4162
4163 4163 centry->cc_chain = NULL;
4164 4164
4165 4165 if (lentry == NULL)
4166 4166 anchor = centry;
4167 4167 else
4168 4168 lentry->cc_chain = centry;
4169 4169
4170 4170 lentry = centry;
4171 4171
4172 4172 centry->cc_aging_dm &= ~(ENTRY_FIELD_DM);
4173 4173
4174 4174 if (centry->cc_aging_dm & FOUND_IN_HASH_DM)
4175 4175 centry->cc_aging_dm |= HASH_ENTRY_DM;
4176 4176 else
4177 4177 if (centry->cc_aging_dm & FOUND_HOLD_OVER_DM)
4178 4178 centry->cc_aging_dm |= HOLD_ENTRY_DM;
4179 4179 else
4180 4180 centry->cc_aging_dm |= ELIGIBLE_ENTRY_DM;
4181 4181
4182 4182 centry->cc_aging_dm &= ~(FOUND_IN_HASH_DM|FOUND_HOLD_OVER_DM);
4183 4183 --reqblks;
4184 4184 }
4185 4185
4186 4186 sdbc_centry_alloc_end(&alloc_tok);
4187 4187
4188 4188 if (reqblks || (_sd_setup_category_on_type(anchor))) {
4189 4189 centry = anchor;
4190 4190 while (centry) {
4191 4191 next_centry = centry->cc_chain;
4192 4192 _sd_centry_release(centry);
4193 4193 centry = next_centry;
4194 4194 }
4195 4195 anchor = NULL;
4196 4196
4197 4197 } else
4198 4198 /* This is where the memory is actually allocated */
4199 4199 if (_sd_setup_mem_chaining(anchor, flag))
4200 4200 anchor = NULL;
4201 4201
4202 4202 return (anchor);
4203 4203 }
4204 4204
4205 4205
4206 4206 /*
4207 4207 * sdbc_centry_alloc - sdbc internal function to allocate a new cache block.
4208 4208 *
4209 4209 * ARGUMENTS:
4210 4210 * cd - Cache descriptor (from a previous open)
4211 4211 * cblk - cache block number.
4212 4212 * stall - pointer to stall count (no blocks avail)
4213 4213 * req_blocks - number of cache blocks remaining in caller's i/o request
4214 4214 * alloc_tok - pointer to token initialized to 0 on first call to function
4215 4215 * flag - lock status of sdbc_queue_lock or ALLOC_NOWAIT flag
4216 4216 * RETURNS:
4217 4217 * A cache block, or possibly NULL if ALLOC_NOWAIT set .
4218 4218 *
4219 4219 * USAGE:
4220 4220 * switch to the appropriate allocation function.
4221 4221 * this function is used when callers need more than one cache block.
4222 4222 * it is called repeatedly until the entire request is satisfied,
4223 4223 * at which time the caller will then do the memory allocation.
4224 4224 * if only one cache block is needed callers may use
4225 4225 * sdbc_centry_alloc_blks() which also allocates memory.
4226 4226 *
4227 4227 * Note: caller must check for null return if called with
4228 4228 * ALLOC_NOWAIT set.
4229 4229 */
4230 4230
4231 4231 _sd_cctl_t *
4232 4232 sdbc_centry_alloc(int cd, nsc_off_t cblk, nsc_size_t req_blocks, int *stall,
4233 4233 sdbc_allocbuf_t *alloc_tok, int flag)
4234 4234 {
4235 4235 _sd_cctl_t *centry;
4236 4236
4237 4237 if (sdbc_use_dmchain)
4238 4238 centry = sdbc_alloc_dmc(cd, cblk, req_blocks, stall, alloc_tok,
4239 4239 flag);
4240 4240 else
4241 4241 centry = sdbc_alloc_lru(cd, cblk, stall, flag);
4242 4242
4243 4243 return (centry);
4244 4244 }
4245 4245
4246 4246 /*
4247 4247 * sdbc_alloc_dmc -- allocate a centry from a dmchain
4248 4248 *
4249 4249 * ARGUMENTS:
4250 4250 * cd - Cache descriptor (from a previous open)
4251 4251 * cblk - cache block number.
4252 4252 * stall - pointer to stall count (no blocks avail)
4253 4253 * req_blocks - number of cache blocks in clients i/o request
4254 4254 * alloc_tok - pointer to token initialized to 0 on first call to function
4255 4255 * flag - lock status of sdbc_queue_lock, or ALLOC_NOWAIT flag
4256 4256 * RETURNS:
4257 4257 * A cache block or possibly NULL if ALLOC_NOWAIT set
4258 4258 *
4259 4259 * USAGE:
4260 4260 * if dmchain is empty, allocate one.
4261 4261 */
4262 4262 static _sd_cctl_t *
4263 4263 sdbc_alloc_dmc(int cd, nsc_off_t cblk, nsc_size_t req_blocks, int *stall,
4264 4264 sdbc_allocbuf_t *alloc_tok, int flag)
4265 4265 {
4266 4266 sdbc_allocbuf_impl_t *dmc = (sdbc_allocbuf_impl_t *)alloc_tok;
4267 4267 _sd_cctl_t *centry = NULL;
4268 4268
4269 4269 if (!dmc->sab_dmchain) {
4270 4270 /*
4271 4271 * Note - sdbc_get_dmchain() returns
4272 4272 * with cc_inuse and cc_pageio set
4273 4273 * for all members of dmchain.
4274 4274 */
4275 4275 if (dmc->sab_dmchain =
4276 4276 sdbc_get_dmchain(req_blocks, stall, flag)) {
4277 4277
4278 4278 /* remember q it came from */
4279 4279 if (dmc->sab_dmchain->cc_alloc_size_dm)
4280 4280 dmc->sab_q = dmc->sab_dmchain->cc_cblocks;
4281 4281 }
4282 4282 }
4283 4283
4284 4284 /*
4285 4285 * Note: dmchain pointer is advanced in sdbc_alloc_from_dmchain()
4286 4286 */
4287 4287 if (dmc->sab_dmchain) /* could be NULL if ALLOC_NOWAIT set */
4288 4288 centry = sdbc_alloc_from_dmchain(cd, cblk, alloc_tok, flag);
4289 4289
4290 4290 return (centry);
4291 4291 }
4292 4292
4293 4293 /*
4294 4294 * sdbc_alloc_from_dmchain -- allocate centry from a dmchain of centrys
4295 4295 *
4296 4296 * ARGUMENTS:
4297 4297 * cd - Cache descriptor (from a previous open)
4298 4298 * cblk - cache block number.
4299 4299 * alloc_tok - pointer to token
4300 4300 * flag - lock status of sdbc_queue_lock or ALLOC_NOWAIT
4301 4301 *
4302 4302 * RETURNS:
4303 4303 * A cache block or possibly NULL if ALLOC_NOWAIT set.
4304 4304 *
4305 4305 * USAGE:
4306 4306 * This routine allocates a new cache block from the supplied dmchain.
4307 4307 * Assumes that dmchain is non-NULL and that all cache entries in
4308 4308 * the dmchain have been removed from hash and have their cc_inuse and
4309 4309 * cc_pageio bits set.
4310 4310 */
4311 4311 static _sd_cctl_t *
4312 4312 sdbc_alloc_from_dmchain(int cd, nsc_off_t cblk, sdbc_allocbuf_t *alloc_tok,
4313 4313 int flag)
4314 4314 {
4315 4315 _sd_cctl_t *cc_ent, *old_ent;
4316 4316 int categorize_centry;
4317 4317 int locked = flag & ALLOC_LOCKED;
4318 4318 int nowait = flag & ALLOC_NOWAIT;
4319 4319 sdbc_allocbuf_impl_t *dmc = (sdbc_allocbuf_impl_t *)alloc_tok;
4320 4320
4321 4321 SDTRACE(ST_ENTER|SDF_ENT_ALLOC, cd, 0, BLK_TO_FBA_NUM(cblk), 0, 0);
4322 4322
4323 4323 ASSERT(dmc->sab_dmchain);
4324 4324
4325 4325 cc_ent = dmc->sab_dmchain;
4326 4326
4327 4327 ASSERT(_sd_cctl_valid(cc_ent));
4328 4328
4329 4329 cc_ent->cc_valid = 0;
4330 4330 categorize_centry = 0;
4331 4331 if (cc_ent->cc_data)
4332 4332 categorize_centry = FOUND_HOLD_OVER_DM;
4333 4333
4334 4334 alloc_try:
4335 4335 if (cd == _CD_NOHASH)
4336 4336 CENTRY_BLK(cc_ent) = cblk;
4337 4337 else if ((old_ent = (_sd_cctl_t *)
4338 4338 _sd_hash_insert(cd, cblk, (struct _sd_hash_hd *)cc_ent,
4339 4339 _sd_htable)) != cc_ent) {
4340 4340
4341 4341 if (SET_CENTRY_INUSE(old_ent)) {
4342 4342 sdbc_centry_inuse++;
4343 4343
4344 4344 if (nowait) {
4345 4345 cc_ent = NULL;
4346 4346 goto out;
4347 4347 }
4348 4348
4349 4349 if (locked)
4350 4350 rw_exit(&sdbc_queue_lock);
4351 4351 _sd_cc_wait(cd, cblk, old_ent, CC_INUSE);
4352 4352 if (locked)
4353 4353 rw_enter(&sdbc_queue_lock, RW_WRITER);
4354 4354 goto alloc_try;
4355 4355 }
4356 4356
4357 4357 /*
4358 4358 * bug 4529671
4359 4359 * now that we own the centry make sure that
4360 4360 * it is still good. it could have been processed
4361 4361 * by _sd_dealloc_dm() in the window between
4362 4362 * _sd_hash_insert() and SET_CENTRY_INUSE().
4363 4363 */
4364 4364 if ((_sd_cctl_t *)_sd_hash_search(cd, cblk, _sd_htable)
4365 4365 != old_ent) {
4366 4366 sdbc_centry_deallocd++;
4367 4367 #ifdef DEBUG
4368 4368 cmn_err(CE_WARN, "!cc_ent %p cd %d cblk %" NSC_SZFMT
4369 4369 " lost to dealloc?! cc_data %p", (void *)old_ent,
4370 4370 cd, cblk, (void *)old_ent->cc_data);
4371 4371 #endif
4372 4372
4373 4373 CLEAR_CENTRY_INUSE(old_ent);
4374 4374
4375 4375 if (nowait) {
4376 4376 cc_ent = NULL;
4377 4377 goto out;
4378 4378 }
4379 4379
4380 4380 goto alloc_try;
4381 4381 }
4382 4382
4383 4383 if (CC_CD_BLK_MATCH(cd, cblk, old_ent)) {
4384 4384 sdbc_centry_hit++;
4385 4385 old_ent->cc_toflush = 0;
4386 4386 /* _sd_centry_release(cc_ent); */
4387 4387 cc_ent = old_ent;
4388 4388 categorize_centry = FOUND_IN_HASH_DM;
4389 4389 } else {
4390 4390 sdbc_centry_lost++;
4391 4391
4392 4392 CLEAR_CENTRY_INUSE(old_ent);
4393 4393
4394 4394 if (nowait) {
4395 4395 cc_ent = NULL;
4396 4396 goto out;
4397 4397 }
4398 4398
4399 4399 goto alloc_try;
4400 4400 }
4401 4401 }
4402 4402
4403 4403 /*
4404 4404 * advance the dmchain pointer, but only if we got the
4405 4405 * cc_ent from the dmchain
4406 4406 */
4407 4407 if (categorize_centry != FOUND_IN_HASH_DM) {
4408 4408 if (cc_ent->cc_data)
4409 4409 dmc->sab_dmchain = dmc->sab_dmchain->cc_next_dm;
4410 4410 else
4411 4411 dmc->sab_dmchain = dmc->sab_dmchain->cc_next;
4412 4412 }
4413 4413
4414 4414
4415 4415 SDTRACE(ST_EXIT|SDF_ENT_ALLOC, cd, 0, BLK_TO_FBA_NUM(cblk), 0, 0);
4416 4416
4417 4417 mutex_enter(&cc_ent->cc_lock);
4418 4418 if (cc_ent->cc_await_use) {
4419 4419 cv_broadcast(&cc_ent->cc_blkcv);
4420 4420 }
4421 4421 mutex_exit(&cc_ent->cc_lock);
4422 4422
4423 4423 sdbc_centry_init_dm(cc_ent);
4424 4424
4425 4425 cc_ent->cc_aging_dm |= categorize_centry;
4426 4426
4427 4427 out:
4428 4428
4429 4429 SDTRACE(ST_INFO|SDF_ENT_ALLOC, cd, 0, BLK_TO_FBA_NUM(cblk), 0, 0);
4430 4430
4431 4431 return (cc_ent);
4432 4432 }
4433 4433
4434 4434 /*
4435 4435 * sdbc_centry_alloc_end -- tidy up after all cache blocks have been
4436 4436 * allocated for a request
4437 4437 * ARGUMENTS:
4438 4438 * alloc_tok - pointer to allocation token
4439 4439 * RETURNS
4440 4440 * nothing
4441 4441 * USAGE:
4442 4442 * at this time only useful when sdbc_use_dmchain is true.
4443 4443 * if there are cache blocks remaining on the chain then the inuse and
4444 4444 * pageio bits must be cleared (they were set in sdbc_get_dmchain().
4445 4445 *
4446 4446 */
4447 4447 static void
4448 4448 sdbc_centry_alloc_end(sdbc_allocbuf_t *alloc_tok)
4449 4449 {
4450 4450 _sd_cctl_t *next_centry;
4451 4451 _sd_cctl_t *prev_centry;
4452 4452 _sd_queue_t *q;
4453 4453 sdbc_allocbuf_impl_t *dmc = (sdbc_allocbuf_impl_t *)alloc_tok;
4454 4454 #ifdef DEBUG
4455 4455 int chainpull = 0;
4456 4456 #endif
4457 4457
4458 4458 if (!sdbc_use_dmchain)
4459 4459 return;
4460 4460
4461 4461 next_centry = dmc->sab_dmchain;
4462 4462
4463 4463 while (next_centry != NULL) {
4464 4464 CLEAR_CENTRY_PAGEIO(next_centry);
4465 4465
4466 4466 prev_centry = next_centry;
4467 4467
4468 4468 if (next_centry->cc_data) {
4469 4469 #ifdef DEBUG
4470 4470 ++chainpull;
4471 4471 #endif
4472 4472 next_centry = next_centry->cc_next_dm;
4473 4473
4474 4474 /* clear bit after final reference */
4475 4475
4476 4476 CLEAR_CENTRY_INUSE(prev_centry);
4477 4477 } else {
4478 4478 next_centry = next_centry->cc_next;
4479 4479
4480 4480 /*
4481 4481 * a floater from the 0 queue, insert on q.
4482 4482 *
4483 4483 * since this centry is not on any queue
4484 4484 * the inuse bit can be cleared before
4485 4485 * inserting on the q. this is also required
4486 4486 * since sdbc_get_dmchain() does not expect
4487 4487 * inuse bits to be set on 0 queue entry's.
4488 4488 */
4489 4489
4490 4490 CLEAR_CENTRY_INUSE(prev_centry);
4491 4491 q = &sdbc_dm_queues[0];
4492 4492 sdbc_ins_dmqueue_front(q, prev_centry);
4493 4493 }
4494 4494 }
4495 4495
4496 4496 #ifdef DEBUG
4497 4497 /* compute wastage stats */
4498 4498 ASSERT((chainpull >= 0) && (chainpull < max_dm_queues));
4499 4499 if (chainpull)
4500 4500 (*(dmchainpull_table + (dmc->sab_q *
4501 4501 max_dm_queues + chainpull)))++;
4502 4502 #endif
4503 4503
4504 4504 }
4505 4505
4506 4506
4507 4507 /*
4508 4508 * sdbc_alloc_lru - allocate a new cache block from the lru queue
4509 4509 *
4510 4510 * ARGUMENTS:
4511 4511 * cd - Cache descriptor (from a previous open)
4512 4512 * cblk - cache block number.
4513 4513 * stall - pointer to stall count (no blocks avail)
4514 4514 * flag - lock status of sdbc_queue_lock or ALLOC_NOWAIT
4515 4515 *
4516 4516 * RETURNS:
4517 4517 * A cache block or NULL if ALLOC_NOWAIT specified
4518 4518 *
4519 4519 * USAGE:
4520 4520 * This routine allocates a new cache block from the lru.
4521 4521 * If an allocation cannot be done, we block, unless ALLOC_NOWAIT is set.
4522 4522 */
4523 4523
4524 4524 static _sd_cctl_t *
4525 4525 sdbc_alloc_lru(int cd, nsc_off_t cblk, int *stall, int flag)
4526 4526 {
4527 4527 _sd_cctl_t *cc_ent, *old_ent, *ccnext;
4528 4528 _sd_queue_t *q = _SD_LRU_Q;
4529 4529 _sd_cctl_t *qhead = &(q->sq_qhead);
4530 4530 int tries = 0, num_tries;
4531 4531 int categorize_centry;
4532 4532 int locked = flag & ALLOC_LOCKED;
4533 4533 int nowait = flag & ALLOC_NOWAIT;
4534 4534
4535 4535 if (nowait) {
4536 4536 num_tries = q->sq_inq / 100; /* only search 1% of q */
4537 4537
4538 4538 if (num_tries <= 0) /* ensure num_tries is non-zero */
4539 4539 num_tries = q->sq_inq;
4540 4540 } else
4541 4541 num_tries = _sd_lruq_srch;
4542 4542
4543 4543 SDTRACE(ST_ENTER|SDF_ENT_ALLOC, cd, 0, BLK_TO_FBA_NUM(cblk), 0, 0);
4544 4544 retry_alloc_centry:
4545 4545
4546 4546 for (cc_ent = (qhead->cc_next); cc_ent != qhead; cc_ent = ccnext) {
4547 4547 if (--num_tries <= 0)
4548 4548 if (nowait) {
4549 4549 cc_ent = NULL;
4550 4550 goto out;
4551 4551 } else
4552 4552 break;
4553 4553
4554 4554 ccnext = cc_ent->cc_next;
4555 4555
4556 4556 if (cc_ent->cc_aging_dm & BAD_CHAIN_DM)
4557 4557 continue;
4558 4558
4559 4559 if (CENTRY_DIRTY(cc_ent))
4560 4560 continue;
4561 4561 if (SET_CENTRY_INUSE(cc_ent))
4562 4562 continue;
4563 4563
4564 4564 if (CENTRY_DIRTY(cc_ent)) {
4565 4565 sdbc_centry_lost++;
4566 4566
4567 4567 CLEAR_CENTRY_INUSE(cc_ent);
4568 4568 continue;
4569 4569 }
4570 4570 cc_ent->cc_flag = 0; /* CC_INUSE */
4571 4571 cc_ent->cc_toflush = 0;
4572 4572
4573 4573 /*
4574 4574 * Inlined requeue of the LRU. (should match _sd_requeue)
4575 4575 */
4576 4576 /* was FAST */
4577 4577 mutex_enter(&q->sq_qlock);
4578 4578 #if defined(_SD_DEBUG)
4579 4579 if (1) {
4580 4580 _sd_cctl_t *cp, *cn, *qp;
4581 4581 cp = cc_ent->cc_prev;
4582 4582 cn = cc_ent->cc_next;
4583 4583 qp = (q->sq_qhead).cc_prev;
4584 4584 if (!_sd_cctl_valid(cc_ent) ||
4585 4585 (cp != &(q->sq_qhead) && !_sd_cctl_valid(cp)) ||
4586 4586 (cn != &(q->sq_qhead) && !_sd_cctl_valid(cn)) ||
4587 4587 !_sd_cctl_valid(qp))
4588 4588 cmn_err(CE_PANIC,
4589 4589 "_sd_centry_alloc %x prev %x next %x qp %x",
4590 4590 cc_ent, cp, cn, qp);
4591 4591 }
4592 4592 #endif
4593 4593 cc_ent->cc_prev->cc_next = cc_ent->cc_next;
4594 4594 cc_ent->cc_next->cc_prev = cc_ent->cc_prev;
4595 4595 cc_ent->cc_next = qhead;
4596 4596 cc_ent->cc_prev = qhead->cc_prev;
4597 4597 qhead->cc_prev->cc_next = cc_ent;
4598 4598 qhead->cc_prev = cc_ent;
4599 4599 cc_ent->cc_seq = q->sq_seq++;
4600 4600 /* was FAST */
4601 4601 mutex_exit(&q->sq_qlock);
4602 4602 /*
4603 4603 * End inlined requeue.
4604 4604 */
4605 4605
4606 4606 #if defined(_SD_STATS)
4607 4607 if (_sd_hash_delete(cc_ent, _sd_htable) == 0)
4608 4608 SDTRACE(SDF_REPLACE,
4609 4609 CENTRY_CD(cc_ent), cc_ent->cc_hits,
4610 4610 BLK_TO_FBA_NUM(CENTRY_BLK(cc_ent)),
4611 4611 nsc_lbolt(), cc_ent->cc_creat);
4612 4612 cc_ent->cc_creat = nsc_lbolt();
4613 4613 cc_ent->cc_hits = 0;
4614 4614 #else
4615 4615 #if defined(_SD_DEBUG)
4616 4616 if (_sd_hash_delete(cc_ent, _sd_htable) == 0) {
4617 4617 SDTRACE(SDF_REPLACE|ST_DL,
4618 4618 CENTRY_CD(cc_ent),
4619 4619 cc_ent->cc_valid,
4620 4620 BLK_TO_FBA_NUM(CENTRY_BLK(cc_ent)),
4621 4621 cd, BLK_TO_FBA_NUM(cblk));
4622 4622 if (cc_ent->cc_await_use ||
4623 4623 ((cd == CENTRY_CD(cc_ent)) &&
4624 4624 (cblk == CENTRY_BLK(cc_ent))))
4625 4625 DATA_LOG(SDF_REPLACE|ST_DL, cc_ent, 0,
4626 4626 BLK_FBAS);
4627 4627 }
4628 4628 #else
4629 4629 (void) _sd_hash_delete((struct _sd_hash_hd *)cc_ent,
4630 4630 _sd_htable);
4631 4631 #endif
4632 4632 #endif
4633 4633 cc_ent->cc_creat = nsc_lbolt();
4634 4634 cc_ent->cc_hits = 0;
4635 4635
4636 4636 cc_ent->cc_valid = 0;
4637 4637 categorize_centry = 0;
4638 4638 if (cc_ent->cc_data)
4639 4639 categorize_centry = FOUND_HOLD_OVER_DM;
4640 4640
4641 4641 alloc_try:
4642 4642 if (cd == _CD_NOHASH)
4643 4643 CENTRY_BLK(cc_ent) = cblk;
4644 4644 else if ((old_ent = (_sd_cctl_t *)
4645 4645 _sd_hash_insert(cd, cblk, (struct _sd_hash_hd *)cc_ent,
4646 4646 _sd_htable)) != cc_ent) {
4647 4647
4648 4648 if (SET_CENTRY_INUSE(old_ent)) {
4649 4649 sdbc_centry_inuse++;
4650 4650
4651 4651 if (nowait) {
4652 4652 _sd_centry_release(cc_ent);
4653 4653 cc_ent = NULL;
4654 4654 goto out;
4655 4655 }
4656 4656
4657 4657 if (locked)
4658 4658 rw_exit(&sdbc_queue_lock);
4659 4659 _sd_cc_wait(cd, cblk, old_ent, CC_INUSE);
4660 4660 if (locked)
4661 4661 rw_enter(&sdbc_queue_lock, RW_WRITER);
4662 4662 goto alloc_try;
4663 4663 }
4664 4664
4665 4665 /*
4666 4666 * bug 4529671
4667 4667 * now that we own the centry make sure that
4668 4668 * it is still good. it could have been processed
4669 4669 * by _sd_dealloc_dm() in the window between
4670 4670 * _sd_hash_insert() and SET_CENTRY_INUSE().
4671 4671 */
4672 4672 if ((_sd_cctl_t *)
4673 4673 _sd_hash_search(cd, cblk, _sd_htable) != old_ent) {
4674 4674 sdbc_centry_deallocd++;
4675 4675 #ifdef DEBUG
4676 4676 cmn_err(CE_WARN, "!cc_ent %p cd %d cblk %"
4677 4677 NSC_SZFMT " lost to dealloc?! cc_data %p",
4678 4678 (void *)old_ent, cd, cblk,
4679 4679 (void *)old_ent->cc_data);
4680 4680 #endif
4681 4681
4682 4682 CLEAR_CENTRY_INUSE(old_ent);
4683 4683
4684 4684 if (nowait) {
4685 4685 _sd_centry_release(cc_ent);
4686 4686 cc_ent = NULL;
4687 4687 goto out;
4688 4688 }
4689 4689
4690 4690 goto alloc_try;
4691 4691 }
4692 4692
4693 4693 if (CC_CD_BLK_MATCH(cd, cblk, old_ent)) {
4694 4694 sdbc_centry_hit++;
4695 4695 old_ent->cc_toflush = 0;
4696 4696 _sd_centry_release(cc_ent);
4697 4697 cc_ent = old_ent;
4698 4698 categorize_centry = FOUND_IN_HASH_DM;
4699 4699 } else {
4700 4700 sdbc_centry_lost++;
4701 4701
4702 4702 CLEAR_CENTRY_INUSE(old_ent);
4703 4703
4704 4704 if (nowait) {
4705 4705 _sd_centry_release(cc_ent);
4706 4706 cc_ent = NULL;
4707 4707 goto out;
4708 4708 }
4709 4709
4710 4710 goto alloc_try;
4711 4711 }
4712 4712 }
4713 4713
4714 4714 SDTRACE(ST_EXIT|SDF_ENT_ALLOC, cd, tries,
4715 4715 BLK_TO_FBA_NUM(cblk), 0, 0);
4716 4716
4717 4717 if (cc_ent->cc_await_use) {
4718 4718 mutex_enter(&cc_ent->cc_lock);
4719 4719 cv_broadcast(&cc_ent->cc_blkcv);
4720 4720 mutex_exit(&cc_ent->cc_lock);
4721 4721 }
4722 4722
4723 4723 sdbc_centry_init_dm(cc_ent);
4724 4724
4725 4725 cc_ent->cc_aging_dm |= categorize_centry;
4726 4726
4727 4727 out:
4728 4728 return (cc_ent);
4729 4729 }
4730 4730
4731 4731 SDTRACE(ST_INFO|SDF_ENT_ALLOC, cd, ++tries, BLK_TO_FBA_NUM(cblk), 0, 0);
4732 4732
4733 4733 delay(drv_usectohz(20000));
4734 4734 (void) (*stall)++;
4735 4735 num_tries = _sd_lruq_srch;
4736 4736 goto retry_alloc_centry;
4737 4737 }
4738 4738
4739 4739 /*
4740 4740 * sdbc_centry_init_dm - setup the cache block for dynamic memory allocation
4741 4741 *
4742 4742 * ARGUMENTS:
4743 4743 * centry - Cache block.
4744 4744 *
4745 4745 * RETURNS:
4746 4746 * NONE
4747 4747 *
4748 4748 * USAGE:
4749 4749 * This routine is the central point in which cache entry blocks are setup
4750 4750 */
4751 4751 static void
4752 4752 sdbc_centry_init_dm(_sd_cctl_t *centry)
4753 4753 {
4754 4754
4755 4755 /* an entry already setup - don't touch simply refresh age */
4756 4756 if (centry->cc_data) {
4757 4757 centry->cc_aging_dm &= ~(FINAL_AGING_DM);
4758 4758
4759 4759 DTRACE_PROBE1(sdbc_centry_init_dm_end,
4760 4760 char *, centry->cc_data);
4761 4761 return;
4762 4762 }
4763 4763
4764 4764 centry->cc_aging_dm &= ~(FINAL_AGING_DM | CATAGORY_ENTRY_DM);
4765 4765
4766 4766 if (centry->cc_head_dm || centry->cc_next_dm)
4767 4767 cmn_err(cmn_level, "!sdbc(sdbc_centry_init_dm): "
4768 4768 "non-zero mem chain in ccent %p", (void *)centry);
4769 4769
4770 4770 centry->cc_head_dm = 0;
4771 4771
4772 4772 if (!sdbc_use_dmchain)
4773 4773 centry->cc_next_dm = 0;
4774 4774
4775 4775 centry->cc_data = 0;
4776 4776
4777 4777 }
4778 4778
4779 4779 /*
4780 4780 * sdbc_centry_memalloc_dm
4781 4781 *
4782 4782 * Actually allocate the cache memory, storing it in the cc_data field for
4783 4783 * the cctl
4784 4784 *
4785 4785 * ARGS:
4786 4786 * centry: cache control block for which to allocate the memory
4787 4787 * alloc_request: number of bytes to allocate
4788 4788 * flag: if called with ALLOC_NOWAIT, caller must check for non-zero return
4789 4789 *
4790 4790 * RETURNS:
4791 4791 * 0 on success
4792 4792 * non-zero on error
4793 4793 */
4794 4794 static int
4795 4795 sdbc_centry_memalloc_dm(_sd_cctl_t *centry, int alloc_request, int flag)
4796 4796 {
4797 4797 int cblocks;
4798 4798 _sd_queue_t *newq;
4799 4799 int sleep;
4800 4800 sleep = (flag & ALLOC_NOWAIT) ? KM_NOSLEEP : KM_SLEEP;
4801 4801
4802 4802 if (!centry->cc_data && (alloc_request > 0)) {
4803 4803 /* host or other */
4804 4804 dynmem_processing_dm.alloc_ct++;
4805 4805 centry->cc_data = (unsigned char *)
4806 4806 kmem_alloc((size_t)centry->cc_alloc_size_dm, sleep);
4807 4807
4808 4808
4809 4809 if (sdbc_use_dmchain) {
4810 4810 cblocks = centry->cc_alloc_size_dm >> _sd_cblock_shift;
4811 4811 newq = &sdbc_dm_queues[cblocks];
4812 4812
4813 4813 /* set the dmqueue index */
4814 4814 centry->cc_cblocks = cblocks;
4815 4815
4816 4816 /* put on appropriate queue */
4817 4817 sdbc_ins_dmqueue_back(newq, centry);
4818 4818 }
4819 4819
4820 4820 /*
4821 4821 * for KM_NOSLEEP (should never happen with KM_SLEEP)
4822 4822 */
4823 4823 if (!centry->cc_data)
4824 4824 return (LOW_RESOURCES_DM);
4825 4825 centry->cc_head_dm = centry;
4826 4826 centry->cc_alloc_ct_dm++;
4827 4827 }
4828 4828
4829 4829 return (0);
4830 4830 }
4831 4831
4832 4832 /*
4833 4833 * _sd_centry_release - release a cache block
4834 4834 *
4835 4835 * ARGUMENTS:
4836 4836 * centry - Cache block.
4837 4837 *
4838 4838 * RETURNS:
4839 4839 * NONE
4840 4840 *
4841 4841 * USAGE:
4842 4842 * This routine frees up a cache block. It also frees up a write
4843 4843 * block if allocated and its valid to release it.
4844 4844 */
4845 4845
4846 4846 void
4847 4847 _sd_centry_release(_sd_cctl_t *centry)
4848 4848 {
4849 4849 ss_centry_info_t *wctl;
4850 4850
4851 4851 SDTRACE(ST_ENTER|SDF_ENT_FREE, CENTRY_CD(centry), 0,
4852 4852 BLK_TO_FBA_NUM(CENTRY_BLK(centry)), 0, 0);
4853 4853
4854 4854 CLEAR_CENTRY_PAGEIO(centry);
4855 4855
4856 4856 if ((wctl = centry->cc_write) != 0) {
4857 4857 /* was FAST */
4858 4858 mutex_enter(¢ry->cc_lock);
4859 4859 if (CENTRY_DIRTY(centry))
4860 4860 wctl = NULL;
4861 4861 else {
4862 4862 centry->cc_write = NULL;
4863 4863 centry->cc_flag &= ~(CC_PINNABLE);
4864 4864 }
4865 4865 /* was FAST */
4866 4866 mutex_exit(¢ry->cc_lock);
4867 4867 if (wctl) {
4868 4868 wctl->sc_dirty = 0;
4869 4869 SSOP_SETCENTRY(sdbc_safestore, wctl);
4870 4870 SSOP_DEALLOCRESOURCE(sdbc_safestore, wctl->sc_res);
4871 4871 }
4872 4872 }
4873 4873
4874 4874 if (!(centry->cc_aging_dm & BAD_CHAIN_DM)) {
4875 4875 if (sdbc_use_dmchain) {
4876 4876 if (centry->cc_alloc_size_dm) {
4877 4877
4878 4878 /* see if this can be queued to head */
4879 4879 if (CENTRY_QHEAD(centry)) {
4880 4880 sdbc_requeue_head_dm_try(centry);
4881 4881 } else {
4882 4882 int qidx;
4883 4883 _sd_queue_t *q;
4884 4884
4885 4885 qidx = centry->cc_cblocks;
4886 4886 q = &sdbc_dm_queues[qidx];
4887 4887
4888 4888 if (_sd_lru_reinsert(q, centry)) {
4889 4889 sdbc_requeue_dmchain(q,
4890 4890 centry, 1, 1);
4891 4891 }
4892 4892 }
4893 4893 } else {
4894 4894 /*
4895 4895 * Fix for bug 4949134:
4896 4896 * If an internal block is marked with CC_QHEAD
4897 4897 * but the HOST block is not, the chain will
4898 4898 * never age properly, and will never be made
4899 4899 * available. Only the HOST of the dmchain is
4900 4900 * checked for CC_QHEAD, so clearing an internal
4901 4901 * block indiscriminately (as is being done
4902 4902 * here) does no damage.
4903 4903 *
4904 4904 * The same result could instead be achieved by
4905 4905 * not setting the CC_QHEAD flag in the first
4906 4906 * place, if the block is an internal dmchain
4907 4907 * block, and if it is found in the hash table.
4908 4908 * The current solution was chosen since it is
4909 4909 * the least intrusive.
4910 4910 */
4911 4911 centry->cc_flag &= ~CC_QHEAD;
4912 4912 }
4913 4913 } else {
4914 4914 if (CENTRY_QHEAD(centry)) {
4915 4915 if (!CENTRY_DIRTY(centry))
4916 4916 _sd_requeue_head(centry);
4917 4917 } else if (_sd_lru_reinsert(_SD_LRU_Q, centry))
4918 4918 _sd_requeue(centry);
4919 4919 }
4920 4920 }
4921 4921
4922 4922 SDTRACE(ST_EXIT|SDF_ENT_FREE, CENTRY_CD(centry), 0,
4923 4923 BLK_TO_FBA_NUM(CENTRY_BLK(centry)), 0, 0);
4924 4924
4925 4925 /* only clear inuse after final reference to centry */
4926 4926
4927 4927 CLEAR_CENTRY_INUSE(centry);
4928 4928 }
4929 4929
4930 4930
4931 4931 /*
4932 4932 * lookup to centry info associated with safestore resource
4933 4933 * return pointer to the centry info structure
4934 4934 */
4935 4935 ss_centry_info_t *
4936 4936 sdbc_get_cinfo_byres(ss_resource_t *res)
4937 4937 {
4938 4938 ss_centry_info_t *cinfo;
4939 4939 ss_centry_info_t *cend;
4940 4940 int found = 0;
4941 4941
4942 4942 ASSERT(res != NULL);
4943 4943
4944 4944 if (res == NULL)
4945 4945 return (NULL);
4946 4946
4947 4947 cinfo = _sdbc_gl_centry_info;
4948 4948 cend = _sdbc_gl_centry_info +
4949 4949 (_sdbc_gl_centry_info_size / sizeof (ss_centry_info_t)) - 1;
4950 4950
4951 4951 for (; cinfo <= cend; ++cinfo)
4952 4952 if (cinfo->sc_res == res) {
4953 4953 ++found;
4954 4954 break;
4955 4955 }
4956 4956
4957 4957 if (!found)
4958 4958 cinfo = NULL; /* bad */
4959 4959
4960 4960 return (cinfo);
4961 4961 }
4962 4962
4963 4963 /*
4964 4964 * _sd_alloc_write - Allocate a write block (for remote mirroring)
4965 4965 * and set centry->cc_write
4966 4966 *
4967 4967 * ARGUMENTS:
4968 4968 * centry - Head of Cache chain
4969 4969 * stall - pointer to stall count (no blocks avail)
4970 4970 *
4971 4971 * RETURNS:
4972 4972 * 0 - and sets cc_write for all entries when write contl block obtained.
4973 4973 * -1 - if a write control block could not be obtained.
4974 4974 */
4975 4975
4976 4976 int
4977 4977 _sd_alloc_write(_sd_cctl_t *centry, int *stall)
4978 4978 {
4979 4979
4980 4980 ss_resourcelist_t *reslist;
4981 4981 ss_resourcelist_t *savereslist;
4982 4982 ss_resource_t *res;
4983 4983 _sd_cctl_t *ce;
4984 4984 int err;
4985 4985 int need;
4986 4986
4987 4987
4988 4988 need = 0;
4989 4989
4990 4990 for (ce = centry; ce; ce = ce->cc_chain) {
4991 4991 if (!(ce->cc_write))
4992 4992 need++;
4993 4993 }
4994 4994
4995 4995 if (!need)
4996 4996 return (0);
4997 4997
4998 4998 if ((SSOP_ALLOCRESOURCE(sdbc_safestore, need, stall, &reslist))
4999 4999 == SS_OK) {
5000 5000 savereslist = reslist;
5001 5001 for (ce = centry; ce; ce = ce->cc_chain) {
5002 5002 if (ce->cc_write)
5003 5003 continue;
5004 5004 err = SSOP_GETRESOURCE(sdbc_safestore, &reslist, &res);
5005 5005 if (err == SS_OK)
5006 5006 ce->cc_write = sdbc_get_cinfo_byres(res);
5007 5007
5008 5008 ASSERT(err == SS_OK); /* panic if DEBUG on */
5009 5009 ASSERT(ce->cc_write != NULL);
5010 5010
5011 5011 /*
5012 5012 * this is bad and should not happen.
5013 5013 * we use the saved reslist to cleanup
5014 5014 * and return.
5015 5015 */
5016 5016 if ((err != SS_OK) || !ce->cc_write) {
5017 5017
5018 5018 cmn_err(CE_WARN, "!_sd_alloc_write: "
5019 5019 "bad resource list 0x%p"
5020 5020 "changing to forced write thru mode",
5021 5021 (void *)savereslist);
5022 5022
5023 5023 (void) _sd_set_node_hint(NSC_FORCED_WRTHRU);
5024 5024
5025 5025 while (SSOP_GETRESOURCE(sdbc_safestore,
5026 5026 &savereslist, &res) == SS_OK) {
5027 5027
5028 5028 SSOP_DEALLOCRESOURCE(sdbc_safestore,
5029 5029 res);
5030 5030 }
5031 5031
5032 5032 return (-1);
5033 5033
5034 5034 }
5035 5035
5036 5036 }
5037 5037 return (0);
5038 5038 }
5039 5039
5040 5040 /* no safestore resources available. do sync write */
5041 5041 _sd_unblock(&_sd_flush_cv);
5042 5042 return (-1);
5043 5043 }
5044 5044
5045 5045 /*
5046 5046 * _sd_read - Interface call to do read.
5047 5047 *
5048 5048 * ARGUMENTS:
5049 5049 * handle - handle allocated earlier on.
5050 5050 * fba_pos - disk block number to read from.
5051 5051 * fba_len - length in fbas.
5052 5052 * flag - flag: (NSC_NOBLOCK for async io)
5053 5053 *
5054 5054 * RETURNS:
5055 5055 * errno if return > 0
5056 5056 * NSC_DONE or NSC_PENDING otherwise.
5057 5057 *
5058 5058 * USAGE:
5059 5059 * This routine checks if the request is valid and calls the underlying
5060 5060 * doread routine (also called by alloc_buf)
5061 5061 */
5062 5062
5063 5063 int
5064 5064 _sd_read(_sd_buf_handle_t *handle, nsc_off_t fba_pos, nsc_size_t fba_len,
5065 5065 int flag)
5066 5066 {
5067 5067 sdbc_cblk_fba_t st_cblk_len; /* FBA len of starting cache block */
5068 5068 sdbc_cblk_fba_t end_cblk_len; /* FBA len of ending cache block */
5069 5069 sdbc_cblk_fba_t st_cblk_off; /* FBA offset into starting cblock */
5070 5070 _sd_cctl_t *cc_ent = NULL;
5071 5071 nsc_size_t fba_orig_len = fba_len;
5072 5072 int ret;
5073 5073 int cd = HANDLE_CD(handle);
5074 5074
5075 5075 if (_sdbc_shutdown_in_progress || (handle->bh_flag & NSC_ABUF)) {
5076 5076 ret = EIO;
5077 5077 goto out;
5078 5078 }
5079 5079
5080 5080
5081 5081 #if !defined(_SD_NOCHECKS)
5082 5082 if (!_SD_HANDLE_ACTIVE(handle)) {
5083 5083 cmn_err(CE_WARN, "!sdbc(_sd_read) handle %p not active",
5084 5084 (void *)handle);
5085 5085 ret = EINVAL;
5086 5086 goto out;
5087 5087 }
5088 5088 ASSERT_HANDLE_LIMITS(handle, fba_pos, fba_len);
5089 5089 #endif
5090 5090 if (fba_len == 0) {
5091 5091 ret = NSC_DONE;
5092 5092 goto out;
5093 5093 }
5094 5094
5095 5095 KSTAT_RUNQ_ENTER(cd);
5096 5096
5097 5097 st_cblk_off = BLK_FBA_OFF(fba_pos);
5098 5098 st_cblk_len = BLK_FBAS - st_cblk_off;
5099 5099 if ((nsc_size_t)st_cblk_len >= fba_len) {
5100 5100 end_cblk_len = 0;
5101 5101 st_cblk_len = (sdbc_cblk_fba_t)fba_len;
5102 5102 } else {
5103 5103 end_cblk_len = BLK_FBA_OFF(fba_pos + fba_len);
5104 5104 }
5105 5105
5106 5106 cc_ent = handle->bh_centry;
5107 5107 while (CENTRY_BLK(cc_ent) != FBA_TO_BLK_NUM(fba_pos))
5108 5108 cc_ent = cc_ent->cc_chain;
5109 5109
5110 5110 if (!SDBC_VALID_BITS(st_cblk_off, st_cblk_len, cc_ent))
5111 5111 goto need_io;
5112 5112 DATA_LOG(SDF_RD, cc_ent, st_cblk_off, st_cblk_len);
5113 5113
5114 5114 DTRACE_PROBE4(_sd_read_data1, uint64_t,
5115 5115 (uint64_t)(BLK_TO_FBA_NUM(CENTRY_BLK(cc_ent)) + st_cblk_off),
5116 5116 uint64_t, (uint64_t)st_cblk_len, char *,
5117 5117 *(int64_t *)(cc_ent->cc_data + FBA_SIZE(st_cblk_off)),
5118 5118 char *, *(int64_t *)(cc_ent->cc_data +
5119 5119 FBA_SIZE(st_cblk_off + st_cblk_len) - 8));
5120 5120
5121 5121 fba_pos += st_cblk_len;
5122 5122 fba_len -= st_cblk_len;
5123 5123 cc_ent = cc_ent->cc_chain;
5124 5124
5125 5125 while (fba_len > (nsc_size_t)end_cblk_len) {
5126 5126 if (!FULLY_VALID(cc_ent))
5127 5127 goto need_io;
5128 5128 DATA_LOG(SDF_RD, cc_ent, 0, BLK_FBAS);
5129 5129
5130 5130 DTRACE_PROBE4(_sd_read_data2, uint64_t,
5131 5131 (uint64_t)BLK_TO_FBA_NUM(CENTRY_BLK(cc_ent)),
5132 5132 uint64_t, (uint64_t)BLK_FBAS,
5133 5133 char *, *(int64_t *)(cc_ent->cc_data),
5134 5134 char *, *(int64_t *)(cc_ent->cc_data +
5135 5135 FBA_SIZE(BLK_FBAS) - 8));
5136 5136
5137 5137 fba_pos += BLK_FBAS;
5138 5138 fba_len -= BLK_FBAS;
5139 5139 cc_ent = cc_ent->cc_chain;
5140 5140 }
5141 5141 if (fba_len) {
5142 5142 if (!SDBC_VALID_BITS(0, end_cblk_len, cc_ent))
5143 5143 goto need_io;
5144 5144 DATA_LOG(SDF_RD, cc_ent, 0, end_cblk_len);
5145 5145
5146 5146 DTRACE_PROBE4(_sd_read_data3, uint64_t,
5147 5147 (uint64_t)BLK_TO_FBA_NUM(CENTRY_BLK(cc_ent)),
5148 5148 uint64_t, (uint64_t)end_cblk_len,
5149 5149 char *, *(int64_t *)(cc_ent->cc_data),
5150 5150 char *, *(int64_t *)(cc_ent->cc_data +
5151 5151 FBA_SIZE(end_cblk_len) - 8));
5152 5152 }
5153 5153
5154 5154 CACHE_FBA_READ(handle->bh_cd, fba_orig_len);
5155 5155 CACHE_READ_HIT;
5156 5156
5157 5157 FBA_READ_IO_KSTATS(handle->bh_cd, FBA_SIZE(fba_orig_len));
5158 5158
5159 5159 ret = NSC_HIT;
5160 5160 goto stats_exit;
5161 5161 need_io:
5162 5162 _SD_DISCONNECT_CALLBACK(handle);
5163 5163
5164 5164 ret = _sd_doread(handle, cc_ent, fba_pos, fba_len, flag);
5165 5165
5166 5166 stats_exit:
5167 5167 KSTAT_RUNQ_EXIT(cd);
5168 5168 out:
5169 5169 return (ret);
5170 5170 }
5171 5171
5172 5172
5173 5173 /*
5174 5174 * sdbc_doread_prefetch - read ahead one cache block
5175 5175 *
5176 5176 * ARGUMENTS:
5177 5177 * cc_ent - cache entry
5178 5178 * fba_pos - disk block number to read from
5179 5179 * fba_len - length in fbas.
5180 5180 *
5181 5181 * RETURNS:
5182 5182 * number of fbas, if any, that are to be read beyond (fba_pos + fba_len)
5183 5183 *
5184 5184 * USAGE:
5185 5185 * if readahead is to be done allocate a cache block and place
5186 5186 * on the cc_chain of cc_ent
5187 5187 */
5188 5188 static int
5189 5189 sdbc_doread_prefetch(_sd_cctl_t *cc_ent, nsc_off_t fba_pos, nsc_size_t fba_len)
5190 5190 {
5191 5191 nsc_off_t st_cblk = FBA_TO_BLK_NUM(fba_pos);
5192 5192 nsc_off_t next_cblk = FBA_TO_BLK_NUM(fba_pos + BLK_FBAS);
5193 5193 nsc_size_t filesize;
5194 5194 int fba_count = 0; /* number of fbas to prefetch */
5195 5195 _sd_cctl_t *cc_ra; /* the read ahead cache entry */
5196 5196 int cd = CENTRY_CD(cc_ent);
5197 5197 nsc_size_t vol_fill;
5198 5198
5199 5199 filesize = _sd_cache_files[cd].cd_info->sh_filesize;
5200 5200 vol_fill = filesize - (fba_pos + fba_len);
5201 5201
5202 5202 /* readahead only for small reads */
5203 5203 if ((fba_len <= FBA_LEN(CACHE_BLOCK_SIZE)) && (fba_pos != 0) &&
5204 5204 (vol_fill > 0)) {
5205 5205
5206 5206 /*
5207 5207 * if prev block is in cache and next block is not,
5208 5208 * then read ahead one block
5209 5209 */
5210 5210 if (_sd_hash_search(cd, st_cblk - 1, _sd_htable)) {
5211 5211 if (!_sd_hash_search(cd, next_cblk, _sd_htable)) {
5212 5212
5213 5213 cc_ra = sdbc_centry_alloc_blks
5214 5214 (cd, next_cblk, 1, ALLOC_NOWAIT);
5215 5215 if (cc_ra) {
5216 5216 /* if in cache don't readahead */
5217 5217 if (cc_ra->cc_aging_dm &
5218 5218 HASH_ENTRY_DM) {
5219 5219 ++sdbc_ra_hash;
5220 5220 _sd_centry_release(cc_ra);
5221 5221 } else {
5222 5222 cc_ent->cc_chain = cc_ra;
5223 5223 cc_ra->cc_chain = 0;
5224 5224 fba_count =
5225 5225 (vol_fill >
5226 5226 (nsc_size_t)BLK_FBAS) ?
5227 5227 BLK_FBAS : (int)vol_fill;
5228 5228 /*
5229 5229 * indicate implicit prefetch
5230 5230 * and mark for release in
5231 5231 * _sd_read_complete()
5232 5232 */
5233 5233 cc_ra->cc_aging_dm |=
5234 5234 (PREFETCH_BUF_I |
5235 5235 PREFETCH_BUF_IR);
5236 5236 }
5237 5237 } else {
5238 5238 ++sdbc_ra_none;
5239 5239 }
5240 5240 }
5241 5241 }
5242 5242
5243 5243 }
5244 5244
5245 5245 return (fba_count);
5246 5246 }
5247 5247
5248 5248 /*
5249 5249 * _sd_doread - Check if blocks in cache. If not completely true, do io.
5250 5250 *
5251 5251 * ARGUMENTS:
5252 5252 * handle - handle allocated earlier on.
5253 5253 * fba_pos - disk block number to read from.
5254 5254 * fba_len - length in fbas.
5255 5255 * flag - flag: (NSC_NOBLOCK for async io)
5256 5256 *
5257 5257 * RETURNS:
5258 5258 * errno if return > 0
5259 5259 * NSC_DONE(from disk), or NSC_PENDING otherwise.
5260 5260 *
5261 5261 * Comments:
5262 5262 * It initiates an io and either blocks waiting for the completion
5263 5263 * or return NSC_PENDING, depending on whether the flag bit
5264 5264 * NSC_NOBLOCK is reset or set.
5265 5265 *
5266 5266 */
5267 5267
5268 5268
5269 5269 static int
5270 5270 _sd_doread(_sd_buf_handle_t *handle, _sd_cctl_t *cc_ent, nsc_off_t fba_pos,
5271 5271 nsc_size_t fba_len, int flag)
5272 5272 {
5273 5273 int cd, err;
5274 5274 nsc_size_t fba_orig_len; /* length in FBA's of the original request */
5275 5275 nsc_size_t file_len; /* length in bytes of io to be done */
5276 5276 sdbc_cblk_fba_t st_cblk_len; /* FBA len of starting cache block */
5277 5277 sdbc_cblk_fba_t end_cblk_len; /* FBA len of ending cache block */
5278 5278 sdbc_cblk_fba_t st_cblk_off; /* FBA offset into starting cblock */
5279 5279 int num_bdl;
5280 5280 _sd_cctl_t *cc_temp;
5281 5281 struct buf *bp;
5282 5282 unsigned int want_bits;
5283 5283 void (*fn)(blind_t, nsc_off_t, nsc_size_t, int);
5284 5284 sdbc_cblk_fba_t end_cblk_fill; /* FBA's to fill to end of last block */
5285 5285 nsc_size_t vol_end_fill; /* # of FBA's to fill to end of the volume */
5286 5286
5287 5287 cd = HANDLE_CD(handle);
5288 5288 SDTRACE(ST_ENTER|SDF_READ, cd, fba_len, fba_pos, flag, 0);
5289 5289
5290 5290 ASSERT(cd >= 0);
5291 5291 if (_sd_cache_files[cd].cd_info->sh_failed) {
5292 5292 SDTRACE(ST_EXIT|SDF_READ, cd, fba_len, fba_pos, flag, EIO);
5293 5293 return (EIO);
5294 5294 }
5295 5295
5296 5296 /*
5297 5297 * adjust the position and length so that the entire cache
5298 5298 * block is read in
5299 5299 */
5300 5300
5301 5301 /* first, adjust to beginning of cache block */
5302 5302
5303 5303 fba_len += BLK_FBA_OFF(fba_pos); /* add start offset to length */
5304 5304 fba_pos &= ~BLK_FBA_MASK; /* move position back to start of block */
5305 5305
5306 5306 /* compute fill to end of cache block */
5307 5307 end_cblk_fill = (BLK_FBAS - 1) - ((fba_len - 1) % BLK_FBAS);
5308 5308 vol_end_fill = _sd_cache_files[(cd)].cd_info->sh_filesize -
5309 5309 (fba_pos + fba_len);
5310 5310
5311 5311 /* fill to lesser of cache block or end of volume */
5312 5312 fba_len += ((nsc_size_t)end_cblk_fill < vol_end_fill) ? end_cblk_fill :
5313 5313 vol_end_fill;
5314 5314
5315 5315 DTRACE_PROBE2(_sd_doread_rfill, nsc_off_t, fba_pos,
5316 5316 nsc_size_t, fba_len);
5317 5317
5318 5318
5319 5319 /* for small reads do 1-block readahead if previous block is in cache */
5320 5320 if (sdbc_prefetch1)
5321 5321 fba_len += sdbc_doread_prefetch(cc_ent, fba_pos, fba_len);
5322 5322
5323 5323 fba_orig_len = fba_len;
5324 5324 st_cblk_off = BLK_FBA_OFF(fba_pos);
5325 5325 st_cblk_len = BLK_FBAS - st_cblk_off;
5326 5326 if ((nsc_size_t)st_cblk_len >= fba_len) {
5327 5327 end_cblk_len = 0;
5328 5328 st_cblk_len = (sdbc_cblk_fba_t)fba_len;
5329 5329 } else {
5330 5330 end_cblk_len = BLK_FBA_OFF(fba_pos + fba_len);
5331 5331 }
5332 5332
5333 5333 cc_temp = cc_ent;
5334 5334 num_bdl = 0;
5335 5335 while (cc_temp) {
5336 5336 num_bdl += (SDBC_LOOKUP_IOCOUNT(CENTRY_DIRTY(cc_temp)));
5337 5337 cc_temp = cc_temp->cc_chain;
5338 5338 }
5339 5339 bp = sd_alloc_iob(_sd_cache_files[cd].cd_crdev,
5340 5340 fba_pos, num_bdl, B_READ);
5341 5341 if (bp == NULL) {
5342 5342 SDTRACE(ST_EXIT|SDF_READ, cd, fba_len, fba_pos, flag, E2BIG);
5343 5343 return (E2BIG);
5344 5344 }
5345 5345
5346 5346 want_bits = SDBC_GET_BITS(st_cblk_off, st_cblk_len);
5347 5347 if (want_bits & CENTRY_DIRTY(cc_ent))
5348 5348 _sd_ccent_rd(cc_ent, want_bits, bp);
5349 5349 else {
5350 5350 sd_add_fba(bp, &cc_ent->cc_addr, st_cblk_off, st_cblk_len);
5351 5351 }
5352 5352 file_len = FBA_SIZE(st_cblk_len);
5353 5353 cc_ent = cc_ent->cc_chain;
5354 5354 fba_len -= st_cblk_len;
5355 5355
5356 5356 while (fba_len > (nsc_size_t)end_cblk_len) {
5357 5357 if (CENTRY_DIRTY(cc_ent))
5358 5358 _sd_ccent_rd(cc_ent, (uint_t)BLK_FBA_BITS, bp);
5359 5359 else {
5360 5360 sd_add_fba(bp, &cc_ent->cc_addr, 0, BLK_FBAS);
5361 5361 }
5362 5362 file_len += CACHE_BLOCK_SIZE;
5363 5363 cc_ent = cc_ent->cc_chain;
5364 5364 fba_len -= BLK_FBAS;
5365 5365 }
5366 5366
5367 5367 if (fba_len) {
5368 5368 want_bits = SDBC_GET_BITS(0, end_cblk_len);
5369 5369 if (want_bits & CENTRY_DIRTY(cc_ent))
5370 5370 _sd_ccent_rd(cc_ent, want_bits, bp);
5371 5371 else {
5372 5372 sd_add_fba(bp, &cc_ent->cc_addr, 0, end_cblk_len);
5373 5373 }
5374 5374 file_len += FBA_SIZE(end_cblk_len);
5375 5375 }
5376 5376
5377 5377 CACHE_READ_MISS;
5378 5378 FBA_READ_IO_KSTATS(cd, file_len);
5379 5379
5380 5380 DISK_FBA_READ(cd, FBA_NUM(file_len));
5381 5381
5382 5382 fn = (handle->bh_flag & NSC_NOBLOCK) ? _sd_async_read_ea : NULL;
5383 5383 err = sd_start_io(bp, _sd_cache_files[cd].cd_strategy, fn, handle);
5384 5384
5385 5385 if (err != NSC_PENDING) {
5386 5386 _sd_read_complete(handle, fba_pos, fba_orig_len, err);
5387 5387 }
5388 5388
5389 5389 SDTRACE(ST_EXIT|SDF_READ, cd, fba_orig_len, fba_pos, flag, err);
5390 5390
5391 5391 return (err);
5392 5392 }
5393 5393
5394 5394
5395 5395
5396 5396 /*
5397 5397 * _sd_read_complete - Do whatever is necessary after a read io is done.
5398 5398 *
5399 5399 * ARGUMENTS:
5400 5400 * handle - handle allocated earlier on.
5401 5401 * fba_pos - disk block number to read from.
5402 5402 * fba_len - length in fbas.
5403 5403 * error - error from io if any.
5404 5404 *
5405 5405 * RETURNS:
5406 5406 * NONE.
5407 5407 *
5408 5408 * Comments:
5409 5409 * This routine marks the cache blocks valid if the io completed
5410 5410 * sucessfully. Called from the async end action as well as after
5411 5411 * a synchrnous read completes.
5412 5412 */
5413 5413
5414 5414 void
5415 5415 _sd_read_complete(_sd_buf_handle_t *handle, nsc_off_t fba_pos,
5416 5416 nsc_size_t fba_len, int error)
5417 5417 {
5418 5418 sdbc_cblk_fba_t st_cblk_len; /* FBA len of starting cache block */
5419 5419 sdbc_cblk_fba_t end_cblk_len; /* FBA len of ending cache block */
5420 5420 sdbc_cblk_fba_t st_cblk_off; /* FBA offset into starting cblock */
5421 5421 nsc_size_t cur_fba_len; /* length in FBA's */
5422 5422 _sd_cctl_t *cc_iocent;
5423 5423 _sd_cctl_t *first_iocent; /* first buffer when processing prefetch */
5424 5424
5425 5425 cc_iocent = handle->bh_centry;
5426 5426
5427 5427 if ((handle->bh_error = error) == 0) {
5428 5428 while (CENTRY_BLK(cc_iocent) != FBA_TO_BLK_NUM(fba_pos))
5429 5429 cc_iocent = cc_iocent->cc_chain;
5430 5430
5431 5431 cur_fba_len = fba_len;
5432 5432 st_cblk_off = BLK_FBA_OFF(fba_pos);
5433 5433 st_cblk_len = BLK_FBAS - st_cblk_off;
5434 5434 if ((nsc_size_t)st_cblk_len >= fba_len) {
5435 5435 end_cblk_len = 0;
5436 5436 st_cblk_len = (sdbc_cblk_fba_t)fba_len;
5437 5437 } else {
5438 5438 end_cblk_len = BLK_FBA_OFF(fba_pos + fba_len);
5439 5439 }
5440 5440
5441 5441 SDBC_SET_VALID_BITS(st_cblk_off, st_cblk_len, cc_iocent);
5442 5442 DATA_LOG(SDF_RDIO, cc_iocent, st_cblk_off, st_cblk_len);
5443 5443
5444 5444 DTRACE_PROBE4(_sd_read_complete_data1, uint64_t, (uint64_t)
5445 5445 BLK_TO_FBA_NUM(CENTRY_BLK(cc_iocent)) + st_cblk_off,
5446 5446 int, st_cblk_len, char *,
5447 5447 *(int64_t *)(cc_iocent->cc_data + FBA_SIZE(st_cblk_off)),
5448 5448 char *, *(int64_t *)(cc_iocent->cc_data +
5449 5449 FBA_SIZE(st_cblk_off + st_cblk_len) - 8));
5450 5450
5451 5451
5452 5452 first_iocent = cc_iocent;
5453 5453 cc_iocent = cc_iocent->cc_chain;
5454 5454 cur_fba_len -= st_cblk_len;
5455 5455
5456 5456 while (cur_fba_len > (nsc_size_t)end_cblk_len) {
5457 5457 SET_FULLY_VALID(cc_iocent);
5458 5458 DATA_LOG(SDF_RDIO, cc_iocent, 0, BLK_FBAS);
5459 5459
5460 5460 DTRACE_PROBE4(_sd_read_complete_data2, uint64_t,
5461 5461 (uint64_t)BLK_TO_FBA_NUM(CENTRY_BLK(cc_iocent)),
5462 5462 int, BLK_FBAS, char *,
5463 5463 *(int64_t *)(cc_iocent->cc_data), char *,
5464 5464 *(int64_t *)(cc_iocent->cc_data +
5465 5465 FBA_SIZE(BLK_FBAS) - 8));
5466 5466
5467 5467 /*
5468 5468 * 4755485 release implicit prefetch buffers
5469 5469 *
5470 5470 * the cc_chain of the first buffer must NULL'd
5471 5471 * else _sd_free_buf() will do a double free when
5472 5472 * it traverses the chain.
5473 5473 *
5474 5474 * if a buffer has been marked PREFETCH_BUF_IR then
5475 5475 * it is guaranteed that
5476 5476 * 1. it is the second in a chain of two.
5477 5477 * 2. cur_fba_len is BLK_FBAS.
5478 5478 * 3. end_cblk_len is zero.
5479 5479 *
5480 5480 * because of 1 (and 2) above, we can safely exit the
5481 5481 * while loop via the break statement without
5482 5482 * executing the last two statements. the break
5483 5483 * statement is necessary because it would be unsafe
5484 5484 * to access cc_iocent which could be reallocated
5485 5485 * immediately after the _sd_centry_release().
5486 5486 */
5487 5487 if (cc_iocent->cc_aging_dm & PREFETCH_BUF_IR) {
5488 5488 cc_iocent->cc_aging_dm &= ~(PREFETCH_BUF_IR);
5489 5489 _sd_centry_release(cc_iocent);
5490 5490 first_iocent->cc_chain = NULL;
5491 5491 break;
5492 5492 }
5493 5493
5494 5494 cc_iocent = cc_iocent->cc_chain;
5495 5495 cur_fba_len -= BLK_FBAS;
5496 5496 }
5497 5497 if (end_cblk_len) {
5498 5498 SDBC_SET_VALID_BITS(0, end_cblk_len, cc_iocent);
5499 5499 DATA_LOG(SDF_RDIO, cc_iocent, 0, end_cblk_len);
5500 5500
5501 5501 DTRACE_PROBE4(_sd_read_complete_data3, uint64_t,
5502 5502 (uint64_t)BLK_TO_FBA_NUM(CENTRY_BLK(cc_iocent)),
5503 5503 int, end_cblk_len, char *,
5504 5504 *(int64_t *)(cc_iocent->cc_data), char *,
5505 5505 *(int64_t *)(cc_iocent->cc_data +
5506 5506 FBA_SIZE(end_cblk_len) - 8));
5507 5507 }
5508 5508 }
5509 5509
5510 5510 }
5511 5511
5512 5512
5513 5513 /*
5514 5514 * _sd_async_read_ea - End action for async reads.
5515 5515 *
5516 5516 * ARGUMENTS:
5517 5517 * xhandle - handle allocated earlier on (cast to blind_t).
5518 5518 * fba_pos - disk block number read from.
5519 5519 * fba_len - length in fbas.
5520 5520 * error - error from io if any.
5521 5521 *
5522 5522 * RETURNS:
5523 5523 * NONE.
5524 5524 *
5525 5525 * Comments:
5526 5526 * This routine is called at interrupt level when the io is done.
5527 5527 * This is called only when read is asynchronous (NSC_NOBLOCK)
5528 5528 */
5529 5529
5530 5530 static void
5531 5531 _sd_async_read_ea(blind_t xhandle, nsc_off_t fba_pos, nsc_size_t fba_len,
5532 5532 int error)
5533 5533 {
5534 5534 _sd_buf_handle_t *handle = xhandle;
5535 5535 int cd;
5536 5536
5537 5537 if (error) {
5538 5538 cd = HANDLE_CD(handle);
5539 5539 ASSERT(cd >= 0);
5540 5540 _sd_cache_files[cd].cd_info->sh_failed = 1;
5541 5541 }
5542 5542 SDTRACE(ST_ENTER|SDF_READ_EA, HANDLE_CD(handle),
5543 5543 handle->bh_fba_len, handle->bh_fba_pos, 0, error);
5544 5544
5545 5545 _sd_read_complete(handle, fba_pos, fba_len, error);
5546 5546
5547 5547 #if defined(_SD_DEBUG_PATTERN)
5548 5548 check_buf_consistency(handle, "rd");
5549 5549 #endif
5550 5550
5551 5551 SDTRACE(ST_EXIT|SDF_READ_EA, HANDLE_CD(handle),
5552 5552 handle->bh_fba_len, handle->bh_fba_pos, 0, 0);
5553 5553 _SD_READ_CALLBACK(handle);
5554 5554 }
5555 5555
5556 5556
5557 5557 /*
5558 5558 * _sd_async_write_ea - End action for async writes.
5559 5559 *
5560 5560 * ARGUMENTS:
5561 5561 * xhandle - handle allocated earlier on. (cast to blind_t)
5562 5562 * fba_pos - disk block number written to.
5563 5563 * fba_len - length in fbas.
5564 5564 * error - error from io if any.
5565 5565 *
5566 5566 * RETURNS:
5567 5567 * NONE.
5568 5568 *
5569 5569 * Comments:
5570 5570 * This routine is called at interrupt level when the write io is done.
5571 5571 * This is called only when we are in write-through mode and the write
5572 5572 * call indicated asynchronous callback. (NSC_NOBLOCK)
5573 5573 */
5574 5574
5575 5575 /* ARGSUSED */
5576 5576
5577 5577 static void
5578 5578 _sd_async_write_ea(blind_t xhandle, nsc_off_t fba_pos, nsc_size_t fba_len,
5579 5579 int error)
5580 5580 {
5581 5581 _sd_buf_handle_t *handle = xhandle;
5582 5582 handle->bh_error = error;
5583 5583
5584 5584 if (error)
5585 5585 _sd_cache_files[HANDLE_CD(handle)].cd_info->sh_failed = 1;
5586 5586
5587 5587 _SD_WRITE_CALLBACK(handle);
5588 5588 }
5589 5589
5590 5590 /*
5591 5591 * update_dirty - set dirty bits in cache block which is already dirty
5592 5592 * cc_inuse is held, need cc_lock to avoid race with _sd_process_pending
5593 5593 * must check for I/O in-progress and set PEND_DIRTY.
5594 5594 * return previous dirty bits
5595 5595 * [if set _sd_process_pending will re-issue]
5596 5596 */
5597 5597 static _sd_bitmap_t
5598 5598 update_dirty(_sd_cctl_t *cc_ent, sdbc_cblk_fba_t st_off, sdbc_cblk_fba_t st_len)
5599 5599 {
5600 5600 _sd_bitmap_t old;
5601 5601
5602 5602 /* was FAST */
5603 5603 mutex_enter(&cc_ent->cc_lock);
5604 5604 old = CENTRY_DIRTY(cc_ent);
5605 5605 if (old) {
5606 5606 /*
5607 5607 * If we are writing to an FBA that is still marked dirty,
5608 5608 * record a write cancellation.
5609 5609 */
5610 5610 if (old & SDBC_GET_BITS(st_off, st_len)) {
5611 5611 CACHE_WRITE_CANCELLATION(CENTRY_CD(cc_ent));
5612 5612 }
5613 5613
5614 5614 /* This is a write to a block that was already dirty */
5615 5615 SDBC_SET_DIRTY(st_off, st_len, cc_ent);
5616 5616 sd_serialize();
5617 5617 if (CENTRY_IO_INPROGRESS(cc_ent))
5618 5618 cc_ent->cc_flag |= CC_PEND_DIRTY;
5619 5619 }
5620 5620 /* was FAST */
5621 5621 mutex_exit(&cc_ent->cc_lock);
5622 5622 return (old);
5623 5623 }
5624 5624
5625 5625 /*
5626 5626 * _sd_write - Interface call to commit part of handle.
5627 5627 *
5628 5628 * ARGUMENTS:
5629 5629 * handle - handle allocated earlier o.
5630 5630 * fba_pos - disk block number to write to.
5631 5631 * fba_len - length in fbas.
5632 5632 * flag - (NSC_NOBLOCK | NSC_WRTHRU)
5633 5633 *
5634 5634 * RETURNS:
5635 5635 * errno if return > 0
5636 5636 * NSC_HIT (in cache), NSC_DONE (to disk) or NSC_PENDING otherwise.
5637 5637 *
5638 5638 * Comments:
5639 5639 * This routine checks validity of the handle and then calls the
5640 5640 * sync-write function if this write is determined to be write-through.
5641 5641 * Else, it reflects the data to the write blocks on the mirror node,
5642 5642 * (allocated in alloc_buf). If the cache block is not dirty, it is
5643 5643 * marked dirty and queued up for io processing later on.
5644 5644 * If parts are already dirty but io is not in progress yet, it is
5645 5645 * marked dirty and left alone (it is already in the queue)
5646 5646 * If parts are already dirty but io is in progress, it is marked
5647 5647 * dirty and also a flag is set indicating that this buffer should
5648 5648 * be reprocessed after the io-end-action.
5649 5649 * Attempt is made to coalesce multiple writes into a single list
5650 5650 * for io processing later on.
5651 5651 *
5652 5652 * Issuing of writes may be delayed until the handle is released;
5653 5653 * _sd_queue_write() sets NSC_QUEUE, indicating that dirty bits
5654 5654 * and reflection to mirror have already been done, just queue I/O.
5655 5655 */
5656 5656
5657 5657
5658 5658
5659 5659 int
5660 5660 _sd_write(_sd_buf_handle_t *handle, nsc_off_t fba_pos, nsc_size_t fba_len,
5661 5661 int flag)
5662 5662 {
5663 5663 int cd = HANDLE_CD(handle);
5664 5664 int num_queued, ret, queue_only, store_only;
5665 5665 sdbc_cblk_fba_t st_cblk_len; /* FBA len of starting cache block */
5666 5666 sdbc_cblk_fba_t end_cblk_len; /* FBA len of ending cache block */
5667 5667 sdbc_cblk_fba_t st_cblk_off; /* FBA offset into starting cblock */
5668 5668 nsc_size_t cur_fba_len; /* position in disk blocks */
5669 5669 _sd_cctl_t *cc_ent = NULL;
5670 5670 _sd_cctl_t *cur_chain = NULL, *dirty_next = NULL;
5671 5671
5672 5672
5673 5673 if (_sdbc_shutdown_in_progress) {
5674 5674 ret = EIO;
5675 5675 goto out;
5676 5676 }
5677 5677
5678 5678
5679 5679 if (!_SD_HANDLE_ACTIVE(handle)) {
5680 5680 SDALERT(SDF_WRITE,
5681 5681 SDT_INV_CD, 0, SDT_INV_BL, handle->bh_flag, 0);
5682 5682 ret = EINVAL;
5683 5683 goto out;
5684 5684 }
5685 5685 #if !defined(_SD_NOCHECKS)
5686 5686 ASSERT_HANDLE_LIMITS(handle, fba_pos, fba_len);
5687 5687 if ((handle->bh_flag & NSC_WRBUF) == 0) {
5688 5688 ret = EINVAL;
5689 5689 goto out;
5690 5690 }
5691 5691 #endif
5692 5692 if (fba_len == 0) {
5693 5693 ret = NSC_DONE;
5694 5694 goto out;
5695 5695 }
5696 5696
5697 5697 /*
5698 5698 * store_only: don't queue this I/O yet
5699 5699 * queue_only: queue I/O to disk, don't store in mirror node
5700 5700 */
5701 5701 if (flag & NSC_QUEUE)
5702 5702 queue_only = 1, store_only = 0;
5703 5703 else
5704 5704 if (_SD_DELAY_QUEUE && (fba_len != handle->bh_fba_len))
5705 5705 queue_only = 0, store_only = 1;
5706 5706 else
5707 5707 queue_only = store_only = 0;
5708 5708
5709 5709 if (!queue_only && _SD_FORCE_DISCONNECT(fba_len))
5710 5710 _SD_DISCONNECT_CALLBACK(handle);
5711 5711
5712 5712 if (_sd_cache_files[cd].cd_info->sh_failed) {
5713 5713 ret = EIO;
5714 5714 goto out;
5715 5715 }
5716 5716
5717 5717 KSTAT_RUNQ_ENTER(cd);
5718 5718
5719 5719 SDTRACE(ST_ENTER|SDF_WRITE, cd, fba_len, fba_pos, flag, 0);
5720 5720
5721 5721 #if defined(_SD_DEBUG_PATTERN)
5722 5722 check_buf_consistency(handle, "wr");
5723 5723 #endif
5724 5724
5725 5725 cc_ent = handle->bh_centry;
5726 5726
5727 5727 while (CENTRY_BLK(cc_ent) != FBA_TO_BLK_NUM(fba_pos))
5728 5728 cc_ent = cc_ent->cc_chain;
5729 5729
5730 5730 if (((handle->bh_flag | flag) & _SD_WRTHRU_MASK) ||
5731 5731 (!queue_only && _sd_remote_store(cc_ent, fba_pos, fba_len))) {
5732 5732 flag |= NSC_WRTHRU;
5733 5733
5734 5734 ret = _sd_sync_write(handle, fba_pos, fba_len, flag);
5735 5735 goto stats_exit;
5736 5736 }
5737 5737
5738 5738 if (store_only) /* enqueue in _sd_free_buf() */
5739 5739 handle->bh_flag |= NSC_QUEUE;
5740 5740 cur_fba_len = fba_len;
5741 5741 st_cblk_off = BLK_FBA_OFF(fba_pos);
5742 5742 st_cblk_len = BLK_FBAS - st_cblk_off;
5743 5743 if ((nsc_size_t)st_cblk_len >= fba_len) {
5744 5744 end_cblk_len = 0;
5745 5745 st_cblk_len = (sdbc_cblk_fba_t)fba_len;
5746 5746 } else {
5747 5747 end_cblk_len = BLK_FBA_OFF(fba_pos + fba_len);
5748 5748 }
5749 5749
5750 5750 if (CENTRY_DIRTY(cc_ent) && update_dirty(cc_ent, st_cblk_off,
5751 5751 st_cblk_len))
5752 5752 goto loop1;
5753 5753 if (store_only) {
5754 5754 SDBC_SET_TOFLUSH(st_cblk_off, st_cblk_len, cc_ent);
5755 5755 goto loop1;
5756 5756 }
5757 5757 SDBC_SET_DIRTY(st_cblk_off, st_cblk_len, cc_ent);
5758 5758 cur_chain = dirty_next = cc_ent;
5759 5759 num_queued = 1;
5760 5760
5761 5761 loop1:
5762 5762 DATA_LOG(SDF_WR, cc_ent, st_cblk_off, st_cblk_len);
5763 5763
5764 5764 DTRACE_PROBE4(_sd_write_data1, uint64_t, (uint64_t)
5765 5765 (BLK_TO_FBA_NUM(CENTRY_BLK(cc_ent)) + st_cblk_off),
5766 5766 int, st_cblk_len, char *,
5767 5767 *(int64_t *)(cc_ent->cc_data + FBA_SIZE(st_cblk_off)),
5768 5768 char *, *(int64_t *)(cc_ent->cc_data +
5769 5769 FBA_SIZE(st_cblk_off+ st_cblk_len) - 8));
5770 5770
5771 5771 cur_fba_len -= st_cblk_len;
5772 5772 cc_ent = cc_ent->cc_chain;
5773 5773
5774 5774 while (cur_fba_len > (nsc_size_t)end_cblk_len) {
5775 5775 if (CENTRY_DIRTY(cc_ent) && update_dirty(cc_ent, 0, BLK_FBAS)) {
5776 5776 if (cur_chain) {
5777 5777 _sd_enqueue_dirty(cd, cur_chain, dirty_next,
5778 5778 num_queued);
5779 5779 cur_chain = dirty_next = NULL;
5780 5780 }
5781 5781 goto loop2;
5782 5782 }
5783 5783 if (store_only) {
5784 5784 SDBC_SET_TOFLUSH(0, BLK_FBAS, cc_ent);
5785 5785 goto loop2;
5786 5786 }
5787 5787 SDBC_SET_DIRTY(0, BLK_FBAS, cc_ent);
5788 5788 if (dirty_next) {
5789 5789 dirty_next->cc_dirty_next = cc_ent;
5790 5790 dirty_next = cc_ent;
5791 5791 num_queued++;
5792 5792 } else {
5793 5793 cur_chain = dirty_next = cc_ent;
5794 5794 num_queued = 1;
5795 5795 }
5796 5796 loop2:
5797 5797 DATA_LOG(SDF_WR, cc_ent, 0, BLK_FBAS);
5798 5798
5799 5799 DTRACE_PROBE4(_sd_write_data2, uint64_t,
5800 5800 (uint64_t)(BLK_TO_FBA_NUM(CENTRY_BLK(cc_ent))),
5801 5801 int, BLK_FBAS, char *, *(int64_t *)(cc_ent->cc_data),
5802 5802 char *, *(int64_t *)(cc_ent->cc_data +
5803 5803 FBA_SIZE(BLK_FBAS) - 8));
5804 5804
5805 5805 cc_ent = cc_ent->cc_chain;
5806 5806 cur_fba_len -= BLK_FBAS;
5807 5807 }
5808 5808
5809 5809 #if defined(_SD_DEBUG)
5810 5810 if (cur_fba_len != end_cblk_len)
5811 5811 cmn_err(CE_WARN, "!fba_len %" NSC_SZFMT " end_cblk_len %d in "
5812 5812 "_sd_write", cur_fba_len, end_cblk_len);
5813 5813 #endif
5814 5814
5815 5815 if (cur_fba_len) {
5816 5816 if (CENTRY_DIRTY(cc_ent) && update_dirty(cc_ent, 0,
5817 5817 end_cblk_len)) {
5818 5818 if (cur_chain) {
5819 5819 _sd_enqueue_dirty(cd, cur_chain, dirty_next,
5820 5820 num_queued);
5821 5821 cur_chain = dirty_next = NULL;
5822 5822 }
5823 5823 goto loop3;
5824 5824 }
5825 5825 if (store_only) {
5826 5826 SDBC_SET_TOFLUSH(0, end_cblk_len, cc_ent);
5827 5827 goto loop3;
5828 5828 }
5829 5829 SDBC_SET_DIRTY(0, end_cblk_len, cc_ent);
5830 5830 if (dirty_next) {
5831 5831 dirty_next->cc_dirty_next = cc_ent;
5832 5832 dirty_next = cc_ent;
5833 5833 num_queued++;
5834 5834 } else {
5835 5835 cur_chain = dirty_next = cc_ent;
5836 5836 num_queued = 1;
5837 5837 }
5838 5838 }
5839 5839 loop3:
5840 5840 if (cur_fba_len) {
5841 5841 DATA_LOG(SDF_WR, cc_ent, 0, end_cblk_len);
5842 5842
5843 5843 DTRACE_PROBE4(_sd_write_data3, uint64_t,
5844 5844 (uint64_t)(BLK_TO_FBA_NUM(CENTRY_BLK(cc_ent))),
5845 5845 int, end_cblk_len, char *, *(int64_t *)(cc_ent->cc_data),
5846 5846 char *, *(int64_t *)(cc_ent->cc_data +
5847 5847 FBA_SIZE(end_cblk_len) - 8));
5848 5848
5849 5849 }
5850 5850
5851 5851 if (!store_only && cur_chain) {
5852 5852 _sd_enqueue_dirty(cd, cur_chain, dirty_next, num_queued);
5853 5853 }
5854 5854
5855 5855 if (!queue_only) {
5856 5856 CACHE_FBA_WRITE(cd, fba_len);
5857 5857 CACHE_WRITE_HIT;
5858 5858
5859 5859 FBA_WRITE_IO_KSTATS(cd, FBA_SIZE(fba_len));
5860 5860 }
5861 5861
5862 5862 ret = NSC_HIT;
5863 5863
5864 5864 stats_exit:
5865 5865 SDTRACE(ST_EXIT|SDF_WRITE, cd, fba_len, fba_pos, flag, ret);
5866 5866 KSTAT_RUNQ_EXIT(cd);
5867 5867 out:
5868 5868 return (ret);
5869 5869 }
5870 5870
5871 5871
5872 5872 /*
5873 5873 * _sd_queue_write(handle, fba_pos, fba_len): Queues delayed writes for
5874 5874 * flushing
5875 5875 *
5876 5876 * ARGUMENTS: handle - handle allocated with NSC_WRBUF
5877 5877 * fba_pos - starting fba pos from _sd_alloc_buf()
5878 5878 * fba_len - fba len from _sd_alloc_buf()
5879 5879 *
5880 5880 * USAGE : Called if _SD_DELAY_QUEUE is set. Finds all blocks in the
5881 5881 * handle marked for flushing and queues them to be written in
5882 5882 * optimized (i.e. sequential) order
5883 5883 */
5884 5884 static void
5885 5885 _sd_queue_write(_sd_buf_handle_t *handle, nsc_off_t fba_pos, nsc_size_t fba_len)
5886 5886 {
5887 5887 nsc_off_t fba_end;
5888 5888 sdbc_cblk_fba_t sblk, len, dirty;
5889 5889 _sd_cctl_t *cc_ent;
5890 5890 nsc_off_t flush_pos;
5891 5891 int flush_pos_valid = 0;
5892 5892 nsc_size_t flush_len = 0;
5893 5893
5894 5894 cc_ent = handle->bh_centry;
5895 5895 fba_end = fba_pos + fba_len;
5896 5896 fba_pos = BLK_TO_FBA_NUM(CENTRY_BLK(cc_ent)); /* 1st block */
5897 5897 while (fba_pos < fba_end) {
5898 5898 dirty = cc_ent->cc_toflush;
5899 5899 cc_ent->cc_toflush = 0;
5900 5900 /*
5901 5901 * Full block
5902 5902 */
5903 5903 if (_SD_BMAP_ISFULL(dirty)) {
5904 5904 if (flush_pos_valid == 0) {
5905 5905 flush_pos_valid = 1;
5906 5906 flush_pos = fba_pos;
5907 5907 }
5908 5908 flush_len += BLK_FBAS;
5909 5909 }
5910 5910 /*
5911 5911 * Partial block
5912 5912 */
5913 5913 else while (dirty) {
5914 5914 sblk = SDBC_LOOKUP_STPOS(dirty);
5915 5915 len = SDBC_LOOKUP_LEN(dirty);
5916 5916 SDBC_LOOKUP_MODIFY(dirty);
5917 5917
5918 5918 if (sblk && flush_pos_valid) {
5919 5919 (void) _sd_write(handle, flush_pos, flush_len,
5920 5920 NSC_QUEUE);
5921 5921 flush_pos_valid = 0;
5922 5922 flush_len = 0;
5923 5923 }
5924 5924 if (flush_pos_valid == 0) {
5925 5925 flush_pos_valid = 1;
5926 5926 flush_pos = fba_pos + sblk;
5927 5927 }
5928 5928 flush_len += len;
5929 5929 }
5930 5930 fba_pos += BLK_FBAS;
5931 5931 cc_ent = cc_ent->cc_chain;
5932 5932 /*
5933 5933 * If we find a gap, write out what we've got
5934 5934 */
5935 5935 if (flush_pos_valid && (flush_pos + flush_len) != fba_pos) {
5936 5936 (void) _sd_write(handle, flush_pos, flush_len,
5937 5937 NSC_QUEUE);
5938 5938 flush_pos_valid = 0;
5939 5939 flush_len = 0;
5940 5940 }
5941 5941 }
5942 5942 if (flush_pos_valid)
5943 5943 (void) _sd_write(handle, flush_pos, flush_len, NSC_QUEUE);
5944 5944 }
5945 5945
5946 5946
5947 5947 static int
5948 5948 _sd_remote_store(_sd_cctl_t *cc_ent, nsc_off_t fba_pos, nsc_size_t fba_len)
5949 5949 {
5950 5950 sdbc_cblk_fba_t st_cblk_len; /* FBA len of starting cache block */
5951 5951 sdbc_cblk_fba_t end_cblk_len; /* FBA len of ending cache block */
5952 5952 sdbc_cblk_fba_t st_cblk_off; /* FBA offset into starting cblock */
5953 5953 ss_resource_t *ss_res;
5954 5954
5955 5955 if (_sd_nodes_configured <= 2 && _sd_is_mirror_down())
5956 5956 return (0);
5957 5957 st_cblk_off = BLK_FBA_OFF(fba_pos);
5958 5958 st_cblk_len = BLK_FBAS - st_cblk_off;
5959 5959 if ((nsc_size_t)st_cblk_len >= fba_len) {
5960 5960 end_cblk_len = 0;
5961 5961 st_cblk_len = (sdbc_cblk_fba_t)fba_len;
5962 5962 } else {
5963 5963 end_cblk_len = BLK_FBA_OFF(fba_pos + fba_len);
5964 5964 }
5965 5965
5966 5966 fba_len -= st_cblk_len;
5967 5967
5968 5968 ss_res = cc_ent->cc_write->sc_res;
5969 5969 if (SSOP_WRITE_CBLOCK(sdbc_safestore, ss_res,
5970 5970 cc_ent->cc_data + FBA_SIZE(st_cblk_off), FBA_SIZE(st_cblk_len),
5971 5971 FBA_SIZE(st_cblk_off))) {
5972 5972
5973 5973 cmn_err(CE_WARN,
5974 5974 "!sdbc(_sd_write) safe store failed. Going synchronous");
5975 5975 SDTRACE(SDF_REFLECT, CENTRY_CD(cc_ent), fba_len,
5976 5976 fba_pos, 0, -1);
5977 5977 return (-1);
5978 5978 }
5979 5979
5980 5980 cc_ent = cc_ent->cc_chain;
5981 5981 while (fba_len > (nsc_size_t)end_cblk_len) {
5982 5982 fba_len -= BLK_FBAS;
5983 5983
5984 5984 if (SSOP_WRITE_CBLOCK(sdbc_safestore, ss_res, cc_ent->cc_data,
5985 5985 CACHE_BLOCK_SIZE, 0)) {
5986 5986
5987 5987 cmn_err(CE_WARN, "!sdbc(_sd_write) safe store failed. "
5988 5988 "Going synchronous");
5989 5989 SDTRACE(SDF_REFLECT, CENTRY_CD(cc_ent), fba_len,
5990 5990 fba_pos, 0, -1);
5991 5991 return (-1);
5992 5992 }
5993 5993
5994 5994 cc_ent = cc_ent->cc_chain;
5995 5995 } /* end while */
5996 5996
5997 5997 if (fba_len) {
5998 5998 if (SSOP_WRITE_CBLOCK(sdbc_safestore, ss_res,
5999 5999 cc_ent->cc_data, FBA_SIZE(end_cblk_len), 0)) {
6000 6000
6001 6001 cmn_err(CE_WARN, "!sdbc(_sd_write) nvmem dma failed. "
6002 6002 "Going synchronous");
6003 6003 SDTRACE(SDF_REFLECT, CENTRY_CD(cc_ent), fba_len,
6004 6004 fba_pos, 0, -1);
6005 6005 return (-1);
6006 6006 }
6007 6007 }
6008 6008 return (0);
6009 6009 }
6010 6010
6011 6011
6012 6012 /*
6013 6013 * _sd_sync_write2 - Write-through function.
6014 6014 *
6015 6015 * ARGUMENTS:
6016 6016 * wr_handle - handle into which to write the data.
6017 6017 * wr_st_pos - starting FBA position in wr_handle.
6018 6018 * fba_len - length in fbas.
6019 6019 * flag - NSC_NOBLOCK for async io.
6020 6020 * rd_handle - handle from which to read the data, or NULL.
6021 6021 * rd_st_pos - starting FBA position in rd_handle.
6022 6022 *
6023 6023 * RETURNS:
6024 6024 * errno if return > 0
6025 6025 * NSC_DONE or NSC_PENDING otherwise.
6026 6026 *
6027 6027 * Comments:
6028 6028 * This routine initiates io of the indicated portion. It returns
6029 6029 * synchronously after io is completed if NSC_NOBLOCK is not set.
6030 6030 * Else NSC_PENDING is returned with a subsequent write callback on
6031 6031 * io completion.
6032 6032 *
6033 6033 * See _sd_copy_direct() for usage when
6034 6034 * (wr_handle != rd_handle && rd_handle != NULL)
6035 6035 */
6036 6036
6037 6037 static int
6038 6038 _sd_sync_write2(_sd_buf_handle_t *wr_handle, nsc_off_t wr_st_pos,
6039 6039 nsc_size_t fba_len, int flag, _sd_buf_handle_t *rd_handle,
6040 6040 nsc_off_t rd_st_pos)
6041 6041 {
6042 6042 void (*fn)(blind_t, nsc_off_t, nsc_size_t, int);
6043 6043 _sd_cctl_t *wr_ent, *rd_ent;
6044 6044 nsc_size_t this_len;
6045 6045 nsc_off_t rd_pos, wr_pos;
6046 6046 nsc_size_t log_bytes;
6047 6047 int cd = HANDLE_CD(wr_handle);
6048 6048 int err;
6049 6049 uint_t dirty;
6050 6050 struct buf *bp;
6051 6051
6052 6052 LINTUSED(flag);
6053 6053
6054 6054 _SD_DISCONNECT_CALLBACK(wr_handle);
6055 6055
6056 6056 if (rd_handle == NULL) {
6057 6057 rd_handle = wr_handle;
6058 6058 rd_st_pos = wr_st_pos;
6059 6059 }
6060 6060
6061 6061 wr_ent = wr_handle->bh_centry;
6062 6062 while (CENTRY_BLK(wr_ent) != FBA_TO_BLK_NUM(wr_st_pos))
6063 6063 wr_ent = wr_ent->cc_chain;
6064 6064
6065 6065 rd_ent = rd_handle->bh_centry;
6066 6066 while (CENTRY_BLK(rd_ent) != FBA_TO_BLK_NUM(rd_st_pos))
6067 6067 rd_ent = rd_ent->cc_chain;
6068 6068
6069 6069 bp = sd_alloc_iob(_sd_cache_files[cd].cd_crdev,
6070 6070 wr_st_pos, FBA_TO_BLK_LEN(fba_len) + 2, B_WRITE);
6071 6071
6072 6072 if (bp == NULL)
6073 6073 return (E2BIG);
6074 6074
6075 6075 wr_pos = BLK_FBA_OFF(wr_st_pos);
6076 6076 rd_pos = BLK_FBA_OFF(rd_st_pos);
6077 6077 log_bytes = 0;
6078 6078
6079 6079 do {
6080 6080 this_len = min((BLK_FBAS - rd_pos), (BLK_FBAS - wr_pos));
6081 6081
6082 6082 if (this_len > fba_len)
6083 6083 this_len = fba_len;
6084 6084
6085 6085 /*
6086 6086 * clear dirty bits in the write handle.
6087 6087 */
6088 6088
6089 6089 if (CENTRY_DIRTY(wr_ent)) {
6090 6090 mutex_enter(&wr_ent->cc_lock);
6091 6091
6092 6092 if (CENTRY_DIRTY(wr_ent)) {
6093 6093 if (this_len == (nsc_size_t)BLK_FBAS ||
6094 6094 rd_handle != wr_handle) {
6095 6095 /*
6096 6096 * optimization for when we have a
6097 6097 * full cache block, or are doing
6098 6098 * copy_direct (see below).
6099 6099 */
6100 6100
6101 6101 wr_ent->cc_write->sc_dirty = 0;
6102 6102 } else {
6103 6103 dirty = wr_ent->cc_write->sc_dirty;
6104 6104 dirty &= ~(SDBC_GET_BITS(
6105 6105 wr_pos, this_len));
6106 6106 wr_ent->cc_write->sc_dirty = dirty;
6107 6107 }
6108 6108
6109 6109 SSOP_SETCENTRY(sdbc_safestore,
6110 6110 wr_ent->cc_write);
6111 6111 }
6112 6112
6113 6113 mutex_exit(&wr_ent->cc_lock);
6114 6114 }
6115 6115
6116 6116 /*
6117 6117 * update valid bits in the write handle.
6118 6118 */
6119 6119
6120 6120 if (rd_handle == wr_handle) {
6121 6121 if (this_len == (nsc_size_t)BLK_FBAS) {
6122 6122 SET_FULLY_VALID(wr_ent);
6123 6123 } else {
6124 6124 SDBC_SET_VALID_BITS(wr_pos, this_len, wr_ent);
6125 6125 }
6126 6126 } else {
6127 6127 /*
6128 6128 * doing copy_direct, so mark the write handle
6129 6129 * as invalid since the data is on disk, but not
6130 6130 * in cache.
6131 6131 */
6132 6132 wr_ent->cc_valid = 0;
6133 6133 }
6134 6134
6135 6135 DATA_LOG(SDF_WRSYNC, rd_ent, rd_pos, this_len);
6136 6136
6137 6137 DTRACE_PROBE4(_sd_sync_write2_data, uint64_t,
6138 6138 (uint64_t)BLK_TO_FBA_NUM(CENTRY_BLK(rd_ent)) + rd_pos,
6139 6139 uint64_t, (uint64_t)this_len, char *,
6140 6140 *(int64_t *)(rd_ent->cc_data + FBA_SIZE(rd_pos)),
6141 6141 char *, *(int64_t *)(rd_ent->cc_data +
6142 6142 FBA_SIZE(rd_pos + this_len) - 8));
6143 6143
6144 6144 sd_add_fba(bp, &rd_ent->cc_addr, rd_pos, this_len);
6145 6145
6146 6146 log_bytes += FBA_SIZE(this_len);
6147 6147 fba_len -= this_len;
6148 6148
6149 6149 wr_pos += this_len;
6150 6150 if (wr_pos >= (nsc_size_t)BLK_FBAS) {
6151 6151 wr_ent = wr_ent->cc_chain;
6152 6152 wr_pos = 0;
6153 6153 }
6154 6154
6155 6155 rd_pos += this_len;
6156 6156 if (rd_pos >= (nsc_size_t)BLK_FBAS) {
6157 6157 rd_ent = rd_ent->cc_chain;
6158 6158 rd_pos = 0;
6159 6159 }
6160 6160
6161 6161 } while (fba_len > 0);
6162 6162
6163 6163 DISK_FBA_WRITE(cd, FBA_NUM(log_bytes));
6164 6164 CACHE_WRITE_MISS;
6165 6165
6166 6166 FBA_WRITE_IO_KSTATS(cd, log_bytes);
6167 6167
6168 6168 fn = (wr_handle->bh_flag & NSC_NOBLOCK) ? _sd_async_write_ea : NULL;
6169 6169
6170 6170 err = sd_start_io(bp, _sd_cache_files[cd].cd_strategy, fn, wr_handle);
6171 6171
6172 6172 if (err != NSC_PENDING) {
6173 6173 DATA_LOG_CHAIN(SDF_WRSYEA, wr_handle->bh_centry,
6174 6174 wr_st_pos, FBA_NUM(log_bytes));
6175 6175 }
6176 6176
6177 6177 return (err);
6178 6178 }
6179 6179
6180 6180
6181 6181 static int
6182 6182 _sd_sync_write(_sd_buf_handle_t *handle, nsc_off_t fba_pos, nsc_size_t fba_len,
6183 6183 int flag)
6184 6184 {
6185 6185 return (_sd_sync_write2(handle, fba_pos, fba_len, flag, NULL, 0));
6186 6186 }
6187 6187
6188 6188
6189 6189 /*
6190 6190 * _sd_zero - Interface call to zero out a portion of cache blocks.
6191 6191 *
6192 6192 * ARGUMENTS:
6193 6193 * handle - handle allocated earlier on.
6194 6194 * fba_pos - disk block number to zero from.
6195 6195 * fba_len - length in fbas.
6196 6196 * flag - NSC_NOBLOCK for async io.
6197 6197 *
6198 6198 * RETURNS:
6199 6199 * errno if return > 0
6200 6200 * NSC_DONE or NSC_PENDING otherwise.
6201 6201 *
6202 6202 * Comments:
6203 6203 * This routine zeroes out the indicated portion of the cache blocks
6204 6204 * and commits the data to disk.
6205 6205 * (See write for more details on the commit)
6206 6206 */
6207 6207
6208 6208
6209 6209 int
6210 6210 _sd_zero(_sd_buf_handle_t *handle, nsc_off_t fba_pos, nsc_size_t fba_len,
6211 6211 int flag)
6212 6212 {
6213 6213 int cd;
6214 6214 sdbc_cblk_fba_t st_cblk_len; /* FBA len of starting cache block */
6215 6215 sdbc_cblk_fba_t end_cblk_len; /* FBA len of ending cache block */
6216 6216 sdbc_cblk_fba_t st_cblk_off; /* FBA offset into starting cblock */
6217 6217 nsc_size_t cur_fba_len; /* position in disk blocks */
6218 6218 int ret;
6219 6219 _sd_cctl_t *cc_ent;
6220 6220
6221 6221 if (_sdbc_shutdown_in_progress) {
6222 6222 DTRACE_PROBE(shutdown);
6223 6223 return (EIO);
6224 6224 }
6225 6225
6226 6226 if (!_SD_HANDLE_ACTIVE(handle)) {
6227 6227 cmn_err(CE_WARN, "!sdbc(_sd_zero) handle %p not active",
6228 6228 (void *)handle);
6229 6229
6230 6230 DTRACE_PROBE1(handle_active, int, handle->bh_flag);
6231 6231
6232 6232 return (EINVAL);
6233 6233 }
6234 6234 ASSERT_HANDLE_LIMITS(handle, fba_pos, fba_len);
6235 6235 if ((handle->bh_flag & NSC_WRBUF) == 0) {
6236 6236 DTRACE_PROBE1(handle_write, int, handle->bh_flag);
6237 6237 return (EINVAL);
6238 6238 }
6239 6239
6240 6240 if (fba_len == 0) {
6241 6241 DTRACE_PROBE(zero_len);
6242 6242 return (NSC_DONE);
6243 6243 }
6244 6244
6245 6245 if (_SD_FORCE_DISCONNECT(fba_len))
6246 6246 _SD_DISCONNECT_CALLBACK(handle);
6247 6247
6248 6248 cd = HANDLE_CD(handle);
6249 6249 SDTRACE(ST_ENTER|SDF_ZERO, cd, fba_len, fba_pos, flag, 0);
6250 6250
6251 6251 cc_ent = handle->bh_centry;
6252 6252 while (CENTRY_BLK(cc_ent) != FBA_TO_BLK_NUM(fba_pos))
6253 6253 cc_ent = cc_ent->cc_chain;
6254 6254 cur_fba_len = fba_len;
6255 6255 st_cblk_off = BLK_FBA_OFF(fba_pos);
6256 6256 st_cblk_len = BLK_FBAS - st_cblk_off;
6257 6257 if ((nsc_size_t)st_cblk_len >= fba_len) {
6258 6258 end_cblk_len = 0;
6259 6259 st_cblk_len = (sdbc_cblk_fba_t)fba_len;
6260 6260 } else {
6261 6261 end_cblk_len = BLK_FBA_OFF(fba_pos + fba_len);
6262 6262 }
6263 6263
6264 6264 cur_fba_len -= st_cblk_len;
6265 6265 bzero(cc_ent->cc_data + FBA_SIZE(st_cblk_off), FBA_SIZE(st_cblk_len));
6266 6266
6267 6267 cc_ent = cc_ent->cc_chain;
6268 6268 while (cur_fba_len > (nsc_size_t)end_cblk_len) {
6269 6269 cur_fba_len -= BLK_FBAS;
6270 6270 bzero(cc_ent->cc_data, CACHE_BLOCK_SIZE);
6271 6271 cc_ent = cc_ent->cc_chain;
6272 6272 }
6273 6273 if (cur_fba_len) {
6274 6274 bzero(cc_ent->cc_data, FBA_SIZE(cur_fba_len));
6275 6275 }
6276 6276
6277 6277 ret = _sd_write(handle, fba_pos, fba_len, flag);
6278 6278 SDTRACE(ST_EXIT|SDF_ZERO, cd, fba_len, fba_pos, flag, ret);
6279 6279
6280 6280 return (ret);
6281 6281 }
6282 6282
6283 6283
6284 6284 /*
6285 6285 * _sd_copy - Copies portions of 2 handles.
6286 6286 *
6287 6287 * ARGUMENTS:
6288 6288 * handle1 - handle allocated earlier on.
6289 6289 * handle2 - handle allocated earlier on.
6290 6290 * fba_pos1 - disk block number to read from.
6291 6291 * fba_pos2 - disk block number to write to.
6292 6292 * fba_len - length in fbas.
6293 6293 *
6294 6294 * RETURNS:
6295 6295 * errno if return > 0
6296 6296 * NSC_DONE otherwise.
6297 6297 *
6298 6298 * Comments:
6299 6299 * This routine copies the 2 handles.
6300 6300 * WARNING: this could put the cache blocks in the destination handle
6301 6301 * in an inconsistent state. (the blocks could be valid in cache,
6302 6302 * but the copy makes the cache different from disk)
6303 6303 *
6304 6304 */
6305 6305
6306 6306
6307 6307 int
6308 6308 _sd_copy(_sd_buf_handle_t *handle1, _sd_buf_handle_t *handle2,
6309 6309 nsc_off_t fba_pos1, nsc_off_t fba_pos2, nsc_size_t fba_len)
6310 6310 {
6311 6311 sdbc_cblk_fba_t st_cblk_len; /* FBA len of starting cache block */
6312 6312 sdbc_cblk_fba_t end_cblk_len; /* FBA len of ending cache block */
6313 6313 sdbc_cblk_fba_t st_cblk_off; /* FBA offset into starting cblock */
6314 6314 nsc_off_t off1, off2; /* offsets in FBA's into the disk */
6315 6315 nsc_size_t cur_fba_len; /* position in disk blocks */
6316 6316 _sd_cctl_t *cc_ent1, *cc_ent2;
6317 6317
6318 6318 if (_sdbc_shutdown_in_progress) {
6319 6319 DTRACE_PROBE(shutdown);
6320 6320 return (EIO);
6321 6321 }
6322 6322 if (!_SD_HANDLE_ACTIVE(handle1) || !_SD_HANDLE_ACTIVE(handle2)) {
6323 6323 cmn_err(CE_WARN, "!sdbc(_sd_copy) handle %p or %p not active",
6324 6324 (void *)handle1, (void *)handle2);
6325 6325
6326 6326 DTRACE_PROBE2(handle_active1, int, handle1->bh_flag,
6327 6327 int, handle2->bh_flag);
6328 6328
6329 6329 return (EINVAL);
6330 6330 }
6331 6331 ASSERT_HANDLE_LIMITS(handle1, fba_pos1, fba_len);
6332 6332 ASSERT_HANDLE_LIMITS(handle2, fba_pos2, fba_len);
6333 6333
6334 6334 cc_ent1 = handle1->bh_centry;
6335 6335 while (CENTRY_BLK(cc_ent1) != FBA_TO_BLK_NUM(fba_pos1))
6336 6336 cc_ent1 = cc_ent1->cc_chain;
6337 6337
6338 6338 cc_ent2 = handle2->bh_centry;
6339 6339 while (CENTRY_BLK(cc_ent2) != FBA_TO_BLK_NUM(fba_pos2))
6340 6340 cc_ent2 = cc_ent2->cc_chain;
6341 6341
6342 6342 if (BLK_FBA_OFF(fba_pos1) != BLK_FBA_OFF(fba_pos2)) {
6343 6343 /* Different offsets, do it slowly (per fba) */
6344 6344
6345 6345 while (fba_len) {
6346 6346 off1 = FBA_SIZE(BLK_FBA_OFF(fba_pos1));
6347 6347 off2 = FBA_SIZE(BLK_FBA_OFF(fba_pos2));
6348 6348
6349 6349 bcopy(cc_ent1->cc_data+off1, cc_ent2->cc_data+off2,
6350 6350 FBA_SIZE(1));
6351 6351
6352 6352 fba_pos1++;
6353 6353 fba_pos2++;
6354 6354 fba_len--;
6355 6355
6356 6356 if (FBA_TO_BLK_NUM(fba_pos1) != CENTRY_BLK(cc_ent1))
6357 6357 cc_ent1 = cc_ent1->cc_chain;
6358 6358 if (FBA_TO_BLK_NUM(fba_pos2) != CENTRY_BLK(cc_ent2))
6359 6359 cc_ent2 = cc_ent2->cc_chain;
6360 6360 }
6361 6361
6362 6362 DTRACE_PROBE(_sd_copy_end);
6363 6363 return (NSC_DONE);
6364 6364 }
6365 6365 cur_fba_len = fba_len;
6366 6366 st_cblk_off = BLK_FBA_OFF(fba_pos1);
6367 6367 st_cblk_len = BLK_FBAS - st_cblk_off;
6368 6368 if ((nsc_size_t)st_cblk_len >= fba_len) {
6369 6369 end_cblk_len = 0;
6370 6370 st_cblk_len = (sdbc_cblk_fba_t)fba_len;
6371 6371 } else {
6372 6372 end_cblk_len = BLK_FBA_OFF(fba_pos1 + fba_len);
6373 6373 }
6374 6374
6375 6375 bcopy(cc_ent1->cc_data + FBA_SIZE(st_cblk_off),
6376 6376 cc_ent2->cc_data + FBA_SIZE(st_cblk_off), FBA_SIZE(st_cblk_len));
6377 6377 cur_fba_len -= st_cblk_len;
6378 6378 cc_ent1 = cc_ent1->cc_chain;
6379 6379 cc_ent2 = cc_ent2->cc_chain;
6380 6380
6381 6381 while (cur_fba_len > (nsc_size_t)end_cblk_len) {
6382 6382 bcopy(cc_ent1->cc_data, cc_ent2->cc_data, CACHE_BLOCK_SIZE);
6383 6383 cc_ent1 = cc_ent1->cc_chain;
6384 6384 cc_ent2 = cc_ent2->cc_chain;
6385 6385 cur_fba_len -= BLK_FBAS;
6386 6386 }
6387 6387 if (cur_fba_len) {
6388 6388 bcopy(cc_ent1->cc_data, cc_ent2->cc_data,
6389 6389 FBA_SIZE(end_cblk_len));
6390 6390 }
6391 6391
6392 6392 return (NSC_DONE);
6393 6393 }
6394 6394
6395 6395
6396 6396 /*
6397 6397 * _sd_copy_direct - Copies data from one handle direct to another disk.
6398 6398 *
6399 6399 * ARGUMENTS:
6400 6400 * handle1 - handle to read from
6401 6401 * handle2 - handle to write to
6402 6402 * fba_pos1 - disk block number to read from.
6403 6403 * fba_pos2 - disk block number to write to.
6404 6404 * fba_len - length in fbas.
6405 6405 *
6406 6406 * RETURNS:
6407 6407 * errno if return > 0
6408 6408 * NSC_DONE otherwise.
6409 6409 *
6410 6410 * Comments:
6411 6411 * This routine copies data from handle1 directly (sync write)
6412 6412 * onto the disk pointed to by handle2. The handle2 is then
6413 6413 * invalidated since the data it contains is now stale compared to
6414 6414 * the disk.
6415 6415 */
6416 6416
6417 6417 static int
6418 6418 _sd_copy_direct(_sd_buf_handle_t *handle1, _sd_buf_handle_t *handle2,
6419 6419 nsc_off_t fba_pos1, nsc_off_t fba_pos2, nsc_size_t fba_len)
6420 6420 {
6421 6421 int rc;
6422 6422
6423 6423 if (_sdbc_shutdown_in_progress) {
6424 6424 DTRACE_PROBE(shutdown);
6425 6425 return (EIO);
6426 6426 }
6427 6427
6428 6428 if (!_SD_HANDLE_ACTIVE(handle1) || !_SD_HANDLE_ACTIVE(handle2)) {
6429 6429 cmn_err(CE_WARN,
6430 6430 "!sdbc(_sd_copy_direct) handle %p or %p not active",
6431 6431 (void *)handle1, (void *)handle2);
6432 6432
6433 6433 DTRACE_PROBE2(handle_active2, int, handle1->bh_flag,
6434 6434 int, handle2->bh_flag);
6435 6435
6436 6436 return (EINVAL);
6437 6437 }
6438 6438
6439 6439 ASSERT_HANDLE_LIMITS(handle1, fba_pos1, fba_len);
6440 6440 ASSERT_HANDLE_LIMITS(handle2, fba_pos2, fba_len);
6441 6441
6442 6442 if ((handle2->bh_flag & NSC_WRITE) == 0) {
6443 6443 cmn_err(CE_WARN,
6444 6444 "!sdbc(_sd_copy_direct) handle2 %p is not writeable",
6445 6445 (void *)handle2);
6446 6446 DTRACE_PROBE1(handle2_write, int, handle2->bh_flag);
6447 6447 return (EINVAL);
6448 6448 }
6449 6449
6450 6450 rc = _sd_sync_write2(handle2, fba_pos2, fba_len, 0, handle1, fba_pos1);
6451 6451
6452 6452 return (rc);
6453 6453 }
6454 6454
6455 6455
6456 6456 /*
6457 6457 * _sd_enqueue_dirty - Enqueue a list of dirty buffers.
6458 6458 *
6459 6459 * ARGUMENTS:
6460 6460 * cd - cache descriptor.
6461 6461 * chain - pointer to list.
6462 6462 * cc_last - last entry in the chain.
6463 6463 * numq - number of entries in the list.
6464 6464 *
6465 6465 * RETURNS:
6466 6466 * NONE.
6467 6467 *
6468 6468 * Comments:
6469 6469 * This routine queues up the dirty blocks for io processing.
6470 6470 * It uses the cc_last to try to coalesce multiple lists into a
6471 6471 * single list, if consecutive writes are sequential in nature.
6472 6472 */
6473 6473
6474 6474 void
6475 6475 _sd_enqueue_dirty(int cd, _sd_cctl_t *chain, _sd_cctl_t *cc_last, int numq)
6476 6476 {
6477 6477 _sd_cd_info_t *cdi;
6478 6478 _sd_cctl_t *last_ent;
6479 6479 int start_write = 0, maxq = SGIO_MAX;
6480 6480
6481 6481 ASSERT(cd >= 0);
6482 6482 cdi = &(_sd_cache_files[cd]);
6483 6483 #if defined(_SD_DEBUG)
6484 6484 if (chain->cc_dirty_link)
6485 6485 cmn_err(CE_WARN, "!dirty_link set in enq %x fl %x",
6486 6486 chain->cc_dirty_link, chain->cc_flag);
6487 6487 #endif
6488 6488
6489 6489 /* was FAST */
6490 6490 mutex_enter(&(cdi->cd_lock));
6491 6491 cdi->cd_info->sh_numdirty += numq;
6492 6492 if (cc_last == NULL)
6493 6493 numq = 0;
6494 6494
6495 6495 if (cdi->cd_dirty_head == NULL) {
6496 6496 cdi->cd_dirty_head = cdi->cd_dirty_tail = chain;
6497 6497 cdi->cd_last_ent = cc_last;
6498 6498 cdi->cd_lastchain_ptr = chain;
6499 6499 cdi->cd_lastchain = numq;
6500 6500 } else {
6501 6501 if ((cc_last) && (last_ent = cdi->cd_last_ent) &&
6502 6502 (CENTRY_BLK(chain) == (CENTRY_BLK(last_ent)+1)) &&
6503 6503 (SDBC_DIRTY_NEIGHBORS(last_ent, chain)) &&
6504 6504 (cdi->cd_lastchain + numq < maxq)) {
6505 6505 cdi->cd_last_ent->cc_dirty_next = chain;
6506 6506 cdi->cd_last_ent = cc_last;
6507 6507 cdi->cd_lastchain += numq;
6508 6508 } else {
6509 6509 cdi->cd_dirty_tail->cc_dirty_link = chain;
6510 6510 cdi->cd_dirty_tail = chain;
6511 6511 cdi->cd_last_ent = cc_last;
6512 6512 cdi->cd_lastchain_ptr = chain;
6513 6513 cdi->cd_lastchain = numq;
6514 6514 start_write = 1;
6515 6515 }
6516 6516 }
6517 6517 /* was FAST */
6518 6518 mutex_exit(&(cdi->cd_lock));
6519 6519 if (start_write)
6520 6520 (void) _SD_CD_WRITER(cd);
6521 6521 }
6522 6522
6523 6523 /*
6524 6524 * _sd_enqueue_dirty_chain - Enqueue a chain of a list of dirty buffers.
6525 6525 *
6526 6526 * ARGUMENTS:
6527 6527 * cd - cache descriptor.
6528 6528 * chain_first - first list in this chain.
6529 6529 * chain_last - last list in this chain.
6530 6530 * numq - number of entries being queue (total of all lists)
6531 6531 *
6532 6532 * RETURNS:
6533 6533 * NONE.
6534 6534 *
6535 6535 * Comments:
6536 6536 * This routine is called from the processing after io completions.
6537 6537 * If the buffers are still dirty, they are queued up in one shot.
6538 6538 */
6539 6539
6540 6540 void
6541 6541 _sd_enqueue_dirty_chain(int cd,
6542 6542 _sd_cctl_t *chain_first,
6543 6543 _sd_cctl_t *chain_last,
6544 6544 int numq)
6545 6545 {
6546 6546 _sd_cd_info_t *cdi;
6547 6547
6548 6548 ASSERT(cd >= 0);
6549 6549 cdi = &(_sd_cache_files[cd]);
6550 6550 if (chain_last->cc_dirty_link)
6551 6551 cmn_err(CE_PANIC,
6552 6552 "!_sd_enqueue_dirty_chain: chain_last %p dirty_link %p",
6553 6553 (void *)chain_last, (void *)chain_last->cc_dirty_link);
6554 6554 /* was FAST */
6555 6555 mutex_enter(&(cdi->cd_lock));
6556 6556 cdi->cd_last_ent = NULL;
6557 6557 cdi->cd_lastchain_ptr = NULL;
6558 6558 cdi->cd_lastchain = 0;
6559 6559
6560 6560 cdi->cd_info->sh_numdirty += numq;
6561 6561 if (cdi->cd_dirty_head == NULL) {
6562 6562 cdi->cd_dirty_head = chain_first;
6563 6563 cdi->cd_dirty_tail = chain_last;
6564 6564 } else {
6565 6565 cdi->cd_dirty_tail->cc_dirty_link = chain_first;
6566 6566 cdi->cd_dirty_tail = chain_last;
6567 6567 }
6568 6568 /* was FAST */
6569 6569 mutex_exit(&(cdi->cd_lock));
6570 6570 }
6571 6571
6572 6572
6573 6573 #ifndef _MULTI_DATAMODEL
6574 6574 /* ARGSUSED */
6575 6575 #endif
6576 6576 static int
6577 6577 convert_stats(_sd_stats32_t *uptr)
6578 6578 /*
6579 6579 * Convert the 64 bit statistic structure to 32bit version.
6580 6580 * Possibly losing information when cache is > 4gb. Ha!
6581 6581 *
6582 6582 * NOTE: this code isn't really MT ready since the copied to struct
6583 6583 * is static. However the race is pretty benign and isn't a whole
6584 6584 * lot worse than the vanilla version which copies data to user
6585 6585 * space from kernel structures that can be changing under it too.
6586 6586 * We can't use a local stack structure since the data size is
6587 6587 * 70k or so and kernel stacks are tiny (8k).
6588 6588 */
6589 6589 {
6590 6590 #ifndef _MULTI_DATAMODEL
6591 6591 return (SDBC_EMODELCONVERT);
6592 6592 #else
6593 6593 int rc = 0;
6594 6594
6595 6595 /*
6596 6596 * This could be done in less code with bcopy type operations
6597 6597 * but this is simpler to follow and easier to change if
6598 6598 * the structures change.
6599 6599 */
6600 6600
6601 6601 _sd_cache_stats32->net_dirty = _sd_cache_stats->net_dirty;
6602 6602 _sd_cache_stats32->net_pending = _sd_cache_stats->net_pending;
6603 6603 _sd_cache_stats32->net_free = _sd_cache_stats->net_free;
6604 6604 _sd_cache_stats32->st_count = _sd_cache_stats->st_count;
6605 6605 _sd_cache_stats32->st_loc_count = _sd_cache_stats->st_loc_count;
6606 6606 _sd_cache_stats32->st_rdhits = _sd_cache_stats->st_rdhits;
6607 6607 _sd_cache_stats32->st_rdmiss = _sd_cache_stats->st_rdmiss;
6608 6608 _sd_cache_stats32->st_wrhits = _sd_cache_stats->st_wrhits;
6609 6609 _sd_cache_stats32->st_wrmiss = _sd_cache_stats->st_wrmiss;
6610 6610 _sd_cache_stats32->st_blksize = _sd_cache_stats->st_blksize;
6611 6611
6612 6612 _sd_cache_stats32->st_lru_blocks = _sd_cache_stats->st_lru_blocks;
6613 6613 _sd_cache_stats32->st_lru_noreq = _sd_cache_stats->st_lru_noreq;
6614 6614 _sd_cache_stats32->st_lru_req = _sd_cache_stats->st_lru_req;
6615 6615
6616 6616 _sd_cache_stats32->st_wlru_inq = _sd_cache_stats->st_wlru_inq;
6617 6617
6618 6618 _sd_cache_stats32->st_cachesize = _sd_cache_stats->st_cachesize;
6619 6619 _sd_cache_stats32->st_numblocks = _sd_cache_stats->st_numblocks;
6620 6620 _sd_cache_stats32->st_wrcancelns = _sd_cache_stats->st_wrcancelns;
6621 6621 _sd_cache_stats32->st_destaged = _sd_cache_stats->st_destaged;
6622 6622
6623 6623 /*
6624 6624 * bcopy the shared stats which has nothing that needs conversion
6625 6625 * in them
6626 6626 */
6627 6627
6628 6628 bcopy(_sd_cache_stats->st_shared, _sd_cache_stats32->st_shared,
6629 6629 sizeof (_sd_shared_t) * sdbc_max_devs);
6630 6630
6631 6631 if (copyout(_sd_cache_stats32, uptr, sizeof (_sd_stats32_t) +
6632 6632 (sdbc_max_devs - 1) * sizeof (_sd_shared_t)))
6633 6633 rc = EFAULT;
6634 6634
6635 6635 return (rc);
6636 6636 #endif /* _MULTI_DATAMODEL */
6637 6637 }
6638 6638
6639 6639
6640 6640 int
6641 6641 _sd_get_stats(_sd_stats_t *uptr, int convert_32)
6642 6642 {
6643 6643 int rc = 0;
6644 6644
6645 6645 if (_sd_cache_stats == NULL) {
6646 6646 static _sd_stats_t dummy;
6647 6647 #ifdef _MULTI_DATAMODEL
6648 6648 static _sd_stats32_t dummy32;
6649 6649 #endif
6650 6650
6651 6651 if (convert_32) {
6652 6652 #ifdef _MULTI_DATAMODEL
6653 6653 if (copyout(&dummy32, uptr, sizeof (_sd_stats32_t)))
6654 6654 rc = EFAULT;
6655 6655 #else
6656 6656 rc = SDBC_EMODELCONVERT;
6657 6657 #endif
6658 6658 } else if (copyout(&dummy, uptr, sizeof (_sd_stats_t)))
6659 6659 rc = EFAULT;
6660 6660 return (rc);
6661 6661 }
6662 6662
6663 6663 _sd_cache_stats->st_lru_blocks = _sd_lru_q.sq_inq;
6664 6664 _sd_cache_stats->st_lru_noreq = _sd_lru_q.sq_noreq_stat;
6665 6665 _sd_cache_stats->st_lru_req = _sd_lru_q.sq_req_stat;
6666 6666
6667 6667 if (sdbc_safestore) {
6668 6668 ssioc_stats_t ss_stats;
6669 6669
6670 6670 if (SSOP_CTL(sdbc_safestore, SSIOC_STATS,
6671 6671 (uintptr_t)&ss_stats) == 0)
6672 6672 _sd_cache_stats->st_wlru_inq = ss_stats.wq_inq;
6673 6673 else
6674 6674 _sd_cache_stats->st_wlru_inq = 0;
6675 6675 }
6676 6676
6677 6677 if (convert_32)
6678 6678 rc = convert_stats((_sd_stats32_t *)uptr);
6679 6679 else if (copyout(_sd_cache_stats, uptr,
6680 6680 sizeof (_sd_stats_t) + (sdbc_max_devs - 1) * sizeof (_sd_shared_t)))
6681 6681 rc = EFAULT;
6682 6682
6683 6683 return (rc);
6684 6684 }
6685 6685
6686 6686
6687 6687 int
6688 6688 _sd_set_hint(int cd, uint_t hint)
6689 6689 {
6690 6690 int ret = 0;
6691 6691 if (FILE_OPENED(cd)) {
6692 6692 SDTRACE(ST_ENTER|SDF_HINT, cd, 1, SDT_INV_BL, hint, 0);
6693 6693 _sd_cache_files[cd].cd_hint |= (hint & _SD_HINT_MASK);
6694 6694 SDTRACE(ST_EXIT|SDF_HINT, cd, 1, SDT_INV_BL, hint, ret);
6695 6695 } else
6696 6696 ret = EINVAL;
6697 6697
6698 6698 return (ret);
6699 6699 }
6700 6700
6701 6701
6702 6702
6703 6703 int
6704 6704 _sd_clear_hint(int cd, uint_t hint)
6705 6705 {
6706 6706 int ret = 0;
6707 6707 if (FILE_OPENED(cd)) {
6708 6708 SDTRACE(ST_ENTER|SDF_HINT, cd, 2, SDT_INV_BL, hint, 0);
6709 6709 _sd_cache_files[cd].cd_hint &= ~(hint & _SD_HINT_MASK);
6710 6710 SDTRACE(ST_EXIT|SDF_HINT, cd, 2, SDT_INV_BL, hint, ret);
6711 6711 } else
6712 6712 ret = EINVAL;
6713 6713
6714 6714 return (ret);
6715 6715 }
6716 6716
6717 6717
6718 6718 int
6719 6719 _sd_get_cd_hint(int cd, uint_t *hint)
6720 6720 {
6721 6721 *hint = 0;
6722 6722 if (FILE_OPENED(cd)) {
6723 6723 *hint = _sd_cache_files[cd].cd_hint;
6724 6724 return (0);
6725 6725 } else
6726 6726 return (EINVAL);
6727 6727 }
6728 6728
6729 6729 static int
6730 6730 _sd_node_hint_caller(blind_t hint, int hint_action)
6731 6731 {
6732 6732 int rc;
6733 6733
6734 6734 switch (hint_action) {
6735 6735 case NSC_GET_NODE_HINT:
6736 6736 rc = _sd_get_node_hint((uint_t *)hint);
6737 6737 break;
6738 6738 case NSC_SET_NODE_HINT:
6739 6739 rc = _sd_set_node_hint((uint_t)(unsigned long)hint);
6740 6740 break;
6741 6741 case NSC_CLEAR_NODE_HINT:
6742 6742 rc = _sd_clear_node_hint((uint_t)(unsigned long)hint);
6743 6743 break;
6744 6744 default:
6745 6745 rc = EINVAL;
6746 6746 break;
6747 6747 }
6748 6748
6749 6749 return (rc);
6750 6750 }
6751 6751
6752 6752 int
6753 6753 _sd_set_node_hint(uint_t hint)
6754 6754 {
6755 6755 SDTRACE(ST_ENTER|SDF_HINT, SDT_INV_CD, 3, SDT_INV_BL, hint, 0);
6756 6756 if ((_sd_node_hint & NSC_NO_FORCED_WRTHRU) &&
6757 6757 (hint & NSC_FORCED_WRTHRU))
6758 6758 return (EINVAL);
6759 6759 _sd_node_hint |= (hint & _SD_HINT_MASK);
6760 6760 SDTRACE(ST_EXIT|SDF_HINT, SDT_INV_CD, 3, SDT_INV_BL, hint, 0);
6761 6761 return (0);
6762 6762 }
6763 6763
6764 6764
6765 6765 int
6766 6766 _sd_clear_node_hint(uint_t hint)
6767 6767 {
6768 6768 SDTRACE(ST_ENTER|SDF_HINT, SDT_INV_CD, 4, SDT_INV_BL, hint, 0);
6769 6769 _sd_node_hint &= ~(hint & _SD_HINT_MASK);
6770 6770 SDTRACE(ST_EXIT|SDF_HINT, SDT_INV_CD, 4, SDT_INV_BL, hint, 0);
6771 6771 return (0);
6772 6772 }
6773 6773
6774 6774
6775 6775 int
6776 6776 _sd_get_node_hint(uint_t *hint)
6777 6777 {
6778 6778 *hint = _sd_node_hint;
6779 6779 return (0);
6780 6780 }
6781 6781
6782 6782
6783 6783 int
6784 6784 _sd_get_partsize(blind_t xcd, nsc_size_t *ptr)
6785 6785 {
6786 6786 int cd = (int)(unsigned long)xcd;
6787 6787
6788 6788 if (FILE_OPENED(cd)) {
6789 6789 *ptr = _sd_cache_files[cd].cd_info->sh_filesize;
6790 6790 return (0);
6791 6791 } else
6792 6792 return (EINVAL);
6793 6793 }
6794 6794
6795 6795
6796 6796 int
6797 6797 _sd_get_maxfbas(blind_t xcd, int flag, nsc_size_t *ptr)
6798 6798 {
6799 6799 int cd = (int)(unsigned long)xcd;
6800 6800
6801 6801 if (!FILE_OPENED(cd))
6802 6802 return (EINVAL);
6803 6803
6804 6804 if (flag & NSC_CACHEBLK)
6805 6805 *ptr = BLK_FBAS;
6806 6806 else
6807 6807 *ptr = sdbc_max_fbas;
6808 6808
6809 6809 return (0);
6810 6810 }
6811 6811
6812 6812
6813 6813 int
6814 6814 _sd_control(blind_t xcd, int cmd, void *ptr, int len)
6815 6815 {
6816 6816 _sd_cd_info_t *cdi;
6817 6817 int cd = (int)(unsigned long)xcd;
6818 6818
6819 6819 cdi = &(_sd_cache_files[cd]);
6820 6820 return (nsc_control(cdi->cd_rawfd, cmd, ptr, len));
6821 6821 }
6822 6822
6823 6823
6824 6824 int
6825 6825 _sd_discard_pinned(blind_t xcd, nsc_off_t fba_pos, nsc_size_t fba_len)
6826 6826 {
6827 6827 int cd = (int)(unsigned long)xcd;
6828 6828 _sd_cctl_t *cc_ent, **cc_lst, **cc_tmp, *nxt;
6829 6829 ss_centry_info_t *wctl;
6830 6830 int found = 0;
6831 6831 nsc_off_t cblk;
6832 6832 _sd_cd_info_t *cdi = &_sd_cache_files[cd];
6833 6833 int rc;
6834 6834
6835 6835 if ((!FILE_OPENED(cd)) || (!cdi->cd_info->sh_failed)) {
6836 6836
6837 6837 return (EINVAL);
6838 6838 }
6839 6839
6840 6840 for (cblk = FBA_TO_BLK_NUM(fba_pos);
6841 6841 cblk < FBA_TO_BLK_LEN(fba_pos + fba_len); cblk++) {
6842 6842 if (cc_ent =
6843 6843 (_sd_cctl_t *)_sd_hash_search(cd, cblk, _sd_htable)) {
6844 6844 if (!CENTRY_PINNED(cc_ent))
6845 6845 continue;
6846 6846
6847 6847 /*
6848 6848 * remove cc_ent from failed links
6849 6849 * cc_lst - pointer to "cc_dirty_link" pointer
6850 6850 * starts at &cd_failed_head.
6851 6851 * cc_tmp - pointer to "cc_dirty_next"
6852 6852 * except when equal to cc_lst.
6853 6853 */
6854 6854 mutex_enter(&cdi->cd_lock);
6855 6855 cc_tmp = cc_lst = &(cdi->cd_fail_head);
6856 6856 while (*cc_tmp != cc_ent) {
6857 6857 cc_tmp = &((*cc_tmp)->cc_dirty_next);
6858 6858 if (!*cc_tmp)
6859 6859 cc_lst = &((*cc_lst)->cc_dirty_link),
6860 6860 cc_tmp = cc_lst;
6861 6861 }
6862 6862 if (*cc_tmp) {
6863 6863 found++;
6864 6864 if (cc_lst != cc_tmp) /* break chain */
6865 6865 *cc_tmp = NULL;
6866 6866 nxt = cc_ent->cc_dirty_next;
6867 6867 if (nxt) {
6868 6868 nxt->cc_dirty_link =
6869 6869 (*cc_lst)->cc_dirty_link;
6870 6870 *cc_lst = nxt;
6871 6871 } else {
6872 6872 *cc_lst = (*cc_lst)->cc_dirty_link;
6873 6873 }
6874 6874 cdi->cd_info->sh_numfail--;
6875 6875 nsc_unpinned_data(cdi->cd_iodev,
6876 6876 BLK_TO_FBA_NUM(CENTRY_BLK(cc_ent)),
6877 6877 BLK_FBAS);
6878 6878 }
6879 6879 mutex_exit(&cdi->cd_lock);
6880 6880
6881 6881 /* clear dirty bits */
6882 6882 /* was FAST */
6883 6883 mutex_enter(&cc_ent->cc_lock);
6884 6884 cc_ent->cc_valid = cc_ent->cc_dirty = 0;
6885 6885 cc_ent->cc_flag &= ~(CC_QHEAD|CC_PEND_DIRTY|CC_PINNED);
6886 6886 cc_ent->cc_dirty_link = NULL;
6887 6887 wctl = cc_ent->cc_write;
6888 6888 cc_ent->cc_write = NULL;
6889 6889 /* was FAST */
6890 6890 mutex_exit(&cc_ent->cc_lock);
6891 6891
6892 6892 /* release cache block to head of LRU */
6893 6893 if (wctl) {
6894 6894 wctl->sc_flag = 0;
6895 6895 wctl->sc_dirty = 0;
6896 6896 SSOP_SETCENTRY(sdbc_safestore, wctl);
6897 6897 SSOP_DEALLOCRESOURCE(sdbc_safestore,
6898 6898 wctl->sc_res);
6899 6899 }
6900 6900
6901 6901 if (!sdbc_use_dmchain)
6902 6902 _sd_requeue_head(cc_ent);
6903 6903 }
6904 6904 }
6905 6905
6906 6906 rc = found ? NSC_DONE : EINVAL;
6907 6907
6908 6908 return (rc);
6909 6909 }
6910 6910
6911 6911
6912 6912 /*
6913 6913 * Handle allocation
6914 6914 */
6915 6915
6916 6916 _sd_buf_hlist_t _sd_handle_list;
6917 6917
6918 6918 /*
6919 6919 * _sdbc_handles_unload - cache is being unloaded.
6920 6920 */
6921 6921 void
6922 6922 _sdbc_handles_unload(void)
6923 6923 {
6924 6924 mutex_destroy(&_sd_handle_list.hl_lock);
6925 6925
6926 6926 }
6927 6927
6928 6928 /*
6929 6929 * _sdbc_handles_load - cache is being unloaded.
6930 6930 */
6931 6931 int
6932 6932 _sdbc_handles_load(void)
6933 6933 {
6934 6934 mutex_init(&_sd_handle_list.hl_lock, NULL, MUTEX_DRIVER, NULL);
6935 6935
6936 6936 return (0);
6937 6937 }
6938 6938
6939 6939 int
6940 6940 _sdbc_handles_configure()
6941 6941 {
6942 6942 _sd_handle_list.hl_count = 0;
6943 6943
6944 6944 _sd_handle_list.hl_top.bh_next = &_sd_handle_list.hl_top;
6945 6945 _sd_handle_list.hl_top.bh_prev = &_sd_handle_list.hl_top;
6946 6946
6947 6947 return (0);
6948 6948 }
6949 6949
6950 6950
6951 6951
6952 6952 /*
6953 6953 * _sdbc_handles_deconfigure - cache is being deconfigured
6954 6954 */
6955 6955 void
6956 6956 _sdbc_handles_deconfigure(void)
6957 6957 {
6958 6958 _sd_handle_list.hl_count = 0;
6959 6959 }
6960 6960
6961 6961
6962 6962 _sd_buf_handle_t *
6963 6963 _sd_alloc_handle(sdbc_callback_fn_t d_cb, sdbc_callback_fn_t r_cb,
6964 6964 sdbc_callback_fn_t w_cb)
6965 6965 {
6966 6966 _sd_buf_handle_t *handle;
6967 6967
6968 6968 handle = (_sd_buf_handle_t *)kmem_zalloc(sizeof (_sd_buf_handle_t),
6969 6969 KM_SLEEP);
6970 6970 /* maintain list and count for debugging */
6971 6971 mutex_enter(&_sd_handle_list.hl_lock);
6972 6972
6973 6973 handle->bh_prev = &_sd_handle_list.hl_top;
6974 6974 handle->bh_next = _sd_handle_list.hl_top.bh_next;
6975 6975 _sd_handle_list.hl_top.bh_next->bh_prev = handle;
6976 6976 _sd_handle_list.hl_top.bh_next = handle;
6977 6977
6978 6978 ++_sd_handle_list.hl_count;
6979 6979 mutex_exit(&_sd_handle_list.hl_lock);
6980 6980 #if !defined(_SD_NOCHECKS)
6981 6981 ASSERT(!(handle->bh_flag & (NSC_HALLOCATED | NSC_HACTIVE)));
6982 6982 #endif
6983 6983 handle->bh_disconnect_cb = d_cb;
6984 6984 handle->bh_read_cb = r_cb;
6985 6985 handle->bh_write_cb = w_cb;
6986 6986 handle->bh_flag |= NSC_HALLOCATED;
6987 6987 handle->bh_alloc_thread = nsc_threadp();
6988 6988
6989 6989 return (handle);
6990 6990 }
6991 6991
6992 6992 int
6993 6993 _sd_free_handle(_sd_buf_handle_t *handle)
6994 6994 {
6995 6995
6996 6996 if ((handle->bh_flag & NSC_HALLOCATED) == 0) {
6997 6997 cmn_err(CE_WARN, "!sdbc(_sd_free_handle) handle %p not valid",
6998 6998 (void *)handle);
6999 6999
7000 7000 DTRACE_PROBE(_sd_free_handle_end);
7001 7001
7002 7002 return (EINVAL);
7003 7003 }
7004 7004 if (_SD_HANDLE_ACTIVE(handle)) {
7005 7005 cmn_err(CE_WARN,
7006 7006 "!sdbc(_sd_free_handle) attempt to free active handle %p",
7007 7007 (void *)handle);
7008 7008
7009 7009 DTRACE_PROBE1(free_handle_active, int, handle->bh_flag);
7010 7010
7011 7011 return (EINVAL);
7012 7012 }
7013 7013
7014 7014
7015 7015 /* remove from queue before free */
7016 7016 mutex_enter(&_sd_handle_list.hl_lock);
7017 7017 handle->bh_prev->bh_next = handle->bh_next;
7018 7018 handle->bh_next->bh_prev = handle->bh_prev;
7019 7019 --_sd_handle_list.hl_count;
7020 7020 mutex_exit(&_sd_handle_list.hl_lock);
7021 7021
7022 7022 kmem_free(handle, sizeof (_sd_buf_handle_t));
7023 7023
7024 7024 return (0);
7025 7025 }
7026 7026
7027 7027
7028 7028
7029 7029
7030 7030 #if !defined (_SD_8K_BLKSIZE)
7031 7031 #define _SD_MAX_MAP 0x100
7032 7032 #else /* !(_SD_8K_BLKSIZE) */
7033 7033 #define _SD_MAX_MAP 0x10000
7034 7034 #endif /* !(_SD_8K_BLKSIZE) */
7035 7035
7036 7036 char _sd_contig_bmap[_SD_MAX_MAP];
7037 7037 _sd_map_info_t _sd_lookup_map[_SD_MAX_MAP];
7038 7038
7039 7039 void
7040 7040 _sd_init_contig_bmap(void)
7041 7041 {
7042 7042 int i, j;
7043 7043
7044 7044 for (i = 1; i < _SD_MAX_MAP; i = ((i << 1) | 1))
7045 7045 for (j = i; j < _SD_MAX_MAP; j <<= 1)
7046 7046 _sd_contig_bmap[j] = 1;
7047 7047 }
7048 7048
7049 7049
7050 7050
7051 7051
7052 7052 void
7053 7053 _sd_init_lookup_map(void)
7054 7054 {
7055 7055 unsigned int i, j, k;
7056 7056 int stpos, len;
7057 7057 _sd_bitmap_t mask;
7058 7058
7059 7059 for (i = 0; i < _SD_MAX_MAP; i++) {
7060 7060 for (j = i, k = 0; j && ((j & 1) == 0); j >>= 1, k++)
7061 7061 ;
7062 7062 stpos = k;
7063 7063 _sd_lookup_map[i].mi_stpos = (unsigned char)k;
7064 7064
7065 7065 for (k = 0; j & 1; j >>= 1, k++)
7066 7066 ;
7067 7067 len = k;
7068 7068 _sd_lookup_map[i].mi_len = (unsigned char)k;
7069 7069
7070 7070 _sd_lookup_map[i].mi_mask = SDBC_GET_BITS(stpos, len);
7071 7071 }
7072 7072 for (i = 0; i < _SD_MAX_MAP; i++) {
7073 7073 mask = (_sd_bitmap_t)i;
7074 7074 for (j = 0; mask; j++)
7075 7075 SDBC_LOOKUP_MODIFY(mask);
7076 7076
7077 7077 _sd_lookup_map[i].mi_dirty_count = (unsigned char)j;
↓ open down ↓ |
2920 lines elided |
↑ open up ↑ |
7078 7078 }
7079 7079 for (i = 0; i < _SD_MAX_MAP; i++) {
7080 7080 _sd_lookup_map[i].mi_io_count = SDBC_LOOKUP_DTCOUNT(i);
7081 7081 mask = ~i;
7082 7082 _sd_lookup_map[i].mi_io_count += SDBC_LOOKUP_DTCOUNT(mask);
7083 7083 }
7084 7084 }
7085 7085
7086 7086
7087 7087 nsc_def_t _sd_sdbc_def[] = {
7088 - "Open", (uintptr_t)_sd_open_io, 0,
7089 - "Close", (uintptr_t)_sd_close_io, 0,
7090 - "Attach", (uintptr_t)_sdbc_io_attach_cd, 0,
7091 - "Detach", (uintptr_t)_sdbc_io_detach_cd, 0,
7092 - "AllocBuf", (uintptr_t)_sd_alloc_buf, 0,
7093 - "FreeBuf", (uintptr_t)_sd_free_buf, 0,
7094 - "Read", (uintptr_t)_sd_read, 0,
7095 - "Write", (uintptr_t)_sd_write, 0,
7096 - "Zero", (uintptr_t)_sd_zero, 0,
7097 - "Copy", (uintptr_t)_sd_copy, 0,
7098 - "CopyDirect", (uintptr_t)_sd_copy_direct, 0,
7099 - "Uncommit", (uintptr_t)_sd_uncommit, 0,
7100 - "AllocHandle", (uintptr_t)_sd_alloc_handle, 0,
7101 - "FreeHandle", (uintptr_t)_sd_free_handle, 0,
7102 - "Discard", (uintptr_t)_sd_discard_pinned, 0,
7103 - "Sizes", (uintptr_t)_sd_cache_sizes, 0,
7104 - "GetPinned", (uintptr_t)_sd_get_pinned, 0,
7105 - "NodeHints", (uintptr_t)_sd_node_hint_caller, 0,
7106 - "PartSize", (uintptr_t)_sd_get_partsize, 0,
7107 - "MaxFbas", (uintptr_t)_sd_get_maxfbas, 0,
7108 - "Control", (uintptr_t)_sd_control, 0,
7109 - "Provide", NSC_CACHE, 0,
7110 - 0, 0, 0
7088 + { "Open", (uintptr_t)_sd_open_io, 0 },
7089 + { "Close", (uintptr_t)_sd_close_io, 0 },
7090 + { "Attach", (uintptr_t)_sdbc_io_attach_cd, 0 },
7091 + { "Detach", (uintptr_t)_sdbc_io_detach_cd, 0 },
7092 + { "AllocBuf", (uintptr_t)_sd_alloc_buf, 0 },
7093 + { "FreeBuf", (uintptr_t)_sd_free_buf, 0 },
7094 + { "Read", (uintptr_t)_sd_read, 0 },
7095 + { "Write", (uintptr_t)_sd_write, 0 },
7096 + { "Zero", (uintptr_t)_sd_zero, 0 },
7097 + { "Copy", (uintptr_t)_sd_copy, 0 },
7098 + { "CopyDirect", (uintptr_t)_sd_copy_direct, 0 },
7099 + { "Uncommit", (uintptr_t)_sd_uncommit, 0 },
7100 + { "AllocHandle", (uintptr_t)_sd_alloc_handle, 0 },
7101 + { "FreeHandle", (uintptr_t)_sd_free_handle, 0 },
7102 + { "Discard", (uintptr_t)_sd_discard_pinned, 0 },
7103 + { "Sizes", (uintptr_t)_sd_cache_sizes, 0 },
7104 + { "GetPinned", (uintptr_t)_sd_get_pinned, 0 },
7105 + { "NodeHints", (uintptr_t)_sd_node_hint_caller, 0 },
7106 + { "PartSize", (uintptr_t)_sd_get_partsize, 0 },
7107 + { "MaxFbas", (uintptr_t)_sd_get_maxfbas, 0 },
7108 + { "Control", (uintptr_t)_sd_control, 0 },
7109 + { "Provide", NSC_CACHE, 0 },
7110 + { NULL, (uintptr_t)NULL, 0 }
7111 7111 };
7112 7112
7113 7113 /*
7114 7114 * do the SD_GET_CD_CLUSTER_DATA ioctl (get the global filename data)
7115 7115 */
7116 7116 /* ARGSUSED */
7117 7117 int
7118 7118 sd_get_file_info_data(char *uaddrp)
7119 7119 {
7120 7120 return (ENOTTY);
7121 7121 }
7122 7122
7123 7123 /*
7124 7124 * do the SD_GET_CD_CLUSTER_SIZE ioctl (get size of global filename area)
7125 7125 */
7126 7126 int
7127 7127 sd_get_file_info_size(void *uaddrp)
7128 7128 {
7129 7129 if (copyout(&_sdbc_gl_file_info_size, uaddrp,
7130 7130 sizeof (_sdbc_gl_file_info_size))) {
7131 7131 return (EFAULT);
7132 7132 }
7133 7133
7134 7134 return (0);
7135 7135 }
7136 7136
7137 7137
7138 7138 /*
7139 7139 * SD_GET_GLMUL_SIZES ioctl
7140 7140 * get sizes of the global info regions (for this node only)
7141 7141 */
7142 7142 /* ARGSUSED */
7143 7143 int
7144 7144 sd_get_glmul_sizes(int *uaddrp)
7145 7145 {
7146 7146 return (ENOTTY);
7147 7147 }
7148 7148
7149 7149 /*
7150 7150 * SD_GET_GLMUL_INFO ioctl
7151 7151 * get the global metadata for write blocks (for this node only)
7152 7152 */
7153 7153 /* ARGSUSED */
7154 7154 int
7155 7155 sd_get_glmul_info(char *uaddrp)
7156 7156 {
7157 7157
7158 7158 return (ENOTTY);
7159 7159 }
7160 7160
7161 7161 int
7162 7162 sdbc_global_stats_update(kstat_t *ksp, int rw)
7163 7163 {
7164 7164 sdbc_global_stats_t *sdbc_gstats;
7165 7165 _sd_stats_t *gstats_vars;
7166 7166 uint_t hint;
7167 7167
7168 7168 sdbc_gstats = (sdbc_global_stats_t *)(ksp->ks_data);
7169 7169
7170 7170 gstats_vars = _sd_cache_stats;
7171 7171
7172 7172 if (rw == KSTAT_WRITE) {
7173 7173 return (EACCES);
7174 7174 }
7175 7175
7176 7176 /* default to READ */
7177 7177 sdbc_gstats->ci_sdbc_count.value.ul = gstats_vars->st_count;
7178 7178 sdbc_gstats->ci_sdbc_loc_count.value.ul = gstats_vars->st_loc_count;
7179 7179 sdbc_gstats->ci_sdbc_rdhits.value.ul = (ulong_t)gstats_vars->st_rdhits;
7180 7180 sdbc_gstats->ci_sdbc_rdmiss.value.ul = (ulong_t)gstats_vars->st_rdmiss;
7181 7181 sdbc_gstats->ci_sdbc_wrhits.value.ul = (ulong_t)gstats_vars->st_wrhits;
7182 7182 sdbc_gstats->ci_sdbc_wrmiss.value.ul = (ulong_t)gstats_vars->st_wrmiss;
7183 7183
7184 7184 sdbc_gstats->ci_sdbc_blksize.value.ul =
7185 7185 (ulong_t)gstats_vars->st_blksize;
7186 7186 sdbc_gstats->ci_sdbc_lru_blocks.value.ul = (ulong_t)_sd_lru_q.sq_inq;
7187 7187 #ifdef DEBUG
7188 7188 sdbc_gstats->ci_sdbc_lru_noreq.value.ul =
7189 7189 (ulong_t)_sd_lru_q.sq_noreq_stat;
7190 7190 sdbc_gstats->ci_sdbc_lru_req.value.ul = (ulong_t)_sd_lru_q.sq_req_stat;
7191 7191 #endif
7192 7192 sdbc_gstats->ci_sdbc_wlru_inq.value.ul =
7193 7193 (ulong_t)gstats_vars->st_wlru_inq;
7194 7194 sdbc_gstats->ci_sdbc_cachesize.value.ul =
7195 7195 (ulong_t)gstats_vars->st_cachesize;
7196 7196 sdbc_gstats->ci_sdbc_numblocks.value.ul =
7197 7197 (ulong_t)gstats_vars->st_numblocks;
7198 7198 sdbc_gstats->ci_sdbc_wrcancelns.value.ul =
7199 7199 (ulong_t)gstats_vars->st_wrcancelns;
7200 7200 sdbc_gstats->ci_sdbc_destaged.value.ul =
7201 7201 (ulong_t)gstats_vars->st_destaged;
7202 7202 sdbc_gstats->ci_sdbc_num_shared.value.ul = (ulong_t)sdbc_max_devs;
7203 7203 (void) _sd_get_node_hint(&hint);
7204 7204 sdbc_gstats->ci_sdbc_nodehints.value.ul = (ulong_t)hint;
7205 7205
7206 7206
7207 7207 return (0);
7208 7208 }
7209 7209
7210 7210 int
7211 7211 sdbc_cd_stats_update(kstat_t *ksp, int rw)
7212 7212 {
7213 7213 sdbc_cd_stats_t *sdbc_shstats;
7214 7214 _sd_shared_t *shstats_vars;
7215 7215 int name_len;
7216 7216 uint_t hint;
7217 7217
7218 7218 sdbc_shstats = (sdbc_cd_stats_t *)(ksp->ks_data);
7219 7219
7220 7220 shstats_vars = (_sd_shared_t *)(ksp->ks_private);
7221 7221
7222 7222 if (rw == KSTAT_WRITE) {
7223 7223 return (EACCES);
7224 7224 }
7225 7225
7226 7226 /* copy tail of filename to kstat. leave 1 byte for null char */
7227 7227 if (shstats_vars->sh_filename != NULL) {
7228 7228 name_len = (int)strlen(shstats_vars->sh_filename);
7229 7229 name_len -= (KSTAT_DATA_CHAR_LEN - 1);
7230 7230
7231 7231 if (name_len < 0) {
7232 7232 name_len = 0;
7233 7233 }
7234 7234
7235 7235 (void) strlcpy(sdbc_shstats->ci_sdbc_vol_name.value.c,
7236 7236 shstats_vars->sh_filename + name_len, KSTAT_DATA_CHAR_LEN);
7237 7237 } else {
7238 7238 cmn_err(CE_WARN, "!Kstat error: no volume name associated "
7239 7239 "with cache descriptor");
7240 7240 }
7241 7241
7242 7242 sdbc_shstats->ci_sdbc_failed.value.ul =
7243 7243 (ulong_t)shstats_vars->sh_failed;
7244 7244 sdbc_shstats->ci_sdbc_cd.value.ul = (ulong_t)shstats_vars->sh_cd;
7245 7245 sdbc_shstats->ci_sdbc_cache_read.value.ul =
7246 7246 (ulong_t)shstats_vars->sh_cache_read;
7247 7247 sdbc_shstats->ci_sdbc_cache_write.value.ul =
7248 7248 (ulong_t)shstats_vars->sh_cache_write;
7249 7249 sdbc_shstats->ci_sdbc_disk_read.value.ul =
7250 7250 (ulong_t)shstats_vars->sh_disk_read;
7251 7251 sdbc_shstats->ci_sdbc_disk_write.value.ul =
7252 7252 (ulong_t)shstats_vars->sh_disk_write;
7253 7253 #ifdef NSC_MULTI_TERABYTE
7254 7254 sdbc_shstats->ci_sdbc_filesize.value.ui64 =
7255 7255 (uint64_t)shstats_vars->sh_filesize;
7256 7256 #else
7257 7257 sdbc_shstats->ci_sdbc_filesize.value.ul =
7258 7258 (ulong_t)shstats_vars->sh_filesize;
7259 7259 #endif
7260 7260 sdbc_shstats->ci_sdbc_numdirty.value.ul =
7261 7261 (ulong_t)shstats_vars->sh_numdirty;
7262 7262 sdbc_shstats->ci_sdbc_numio.value.ul = (ulong_t)shstats_vars->sh_numio;
7263 7263 sdbc_shstats->ci_sdbc_numfail.value.ul =
7264 7264 (ulong_t)shstats_vars->sh_numfail;
7265 7265 sdbc_shstats->ci_sdbc_destaged.value.ul =
7266 7266 (ulong_t)shstats_vars->sh_destaged;
7267 7267 sdbc_shstats->ci_sdbc_wrcancelns.value.ul =
7268 7268 (ulong_t)shstats_vars->sh_wrcancelns;
7269 7269 (void) _sd_get_cd_hint(shstats_vars->sh_cd, &hint);
7270 7270 sdbc_shstats->ci_sdbc_cdhints.value.ul = (ulong_t)hint;
7271 7271
7272 7272
7273 7273 return (0);
7274 7274 }
7275 7275
7276 7276
7277 7277 /*
7278 7278 * cd_kstat_add
7279 7279 *
7280 7280 * Installs all kstats and associated infrastructure (mutex, buffer),
7281 7281 * associated with a particular cache descriptor. This function is called
7282 7282 * when the cache descriptor is opened in _sd_open().
7283 7283 * "cd" -- cache descriptor number whose kstats we wish to add
7284 7284 * returns: 0 on success, -1 on failure
7285 7285 */
7286 7286 static int
7287 7287 cd_kstat_add(int cd)
7288 7288 {
7289 7289 char name[KSTAT_STRLEN];
7290 7290
7291 7291 if (cd < 0 || cd >= sdbc_max_devs) {
7292 7292 cmn_err(CE_WARN, "!invalid cache descriptor: %d", cd);
7293 7293 return (-1);
7294 7294 }
7295 7295
7296 7296 /* create a regular kstat for this cache descriptor */
7297 7297 if (!sdbc_cd_kstats) {
7298 7298 cmn_err(CE_WARN, "!sdbc_cd_kstats not allocated");
7299 7299 return (-1);
7300 7300 }
7301 7301
7302 7302 (void) snprintf(name, KSTAT_STRLEN, "%s%d", SDBC_KSTAT_CDSTATS, cd);
7303 7303
7304 7304 sdbc_cd_kstats[cd] = kstat_create(SDBC_KSTAT_MODULE,
7305 7305 cd, name, SDBC_KSTAT_CLASS, KSTAT_TYPE_NAMED,
7306 7306 sizeof (sdbc_cd_stats)/sizeof (kstat_named_t),
7307 7307 KSTAT_FLAG_VIRTUAL|KSTAT_FLAG_WRITABLE);
7308 7308
7309 7309 if (sdbc_cd_kstats[cd] != NULL) {
7310 7310 sdbc_cd_kstats[cd]->ks_data = &sdbc_cd_stats;
7311 7311 sdbc_cd_kstats[cd]->ks_update = sdbc_cd_stats_update;
7312 7312 sdbc_cd_kstats[cd]->ks_private =
7313 7313 &_sd_cache_stats->st_shared[cd];
7314 7314 kstat_install(sdbc_cd_kstats[cd]);
7315 7315 } else {
7316 7316 cmn_err(CE_WARN, "!cdstats %d kstat allocation failed", cd);
7317 7317 }
7318 7318
7319 7319 /* create an I/O kstat for this cache descriptor */
7320 7320 if (!sdbc_cd_io_kstats) {
7321 7321 cmn_err(CE_WARN, "!sdbc_cd_io_kstats not allocated");
7322 7322 return (-1);
7323 7323 }
7324 7324
7325 7325 (void) snprintf(name, KSTAT_STRLEN, "%s%d", SDBC_IOKSTAT_CDSTATS, cd);
7326 7326
7327 7327 sdbc_cd_io_kstats[cd] = kstat_create(
7328 7328 SDBC_KSTAT_MODULE, cd, name, "disk", KSTAT_TYPE_IO, 1, 0);
7329 7329
7330 7330 if (sdbc_cd_io_kstats[cd]) {
7331 7331 if (!sdbc_cd_io_kstats_mutexes) {
7332 7332 cmn_err(CE_WARN, "!sdbc_cd_io_kstats_mutexes not "
7333 7333 "allocated");
7334 7334 return (-1);
7335 7335 }
7336 7336
7337 7337 mutex_init(&sdbc_cd_io_kstats_mutexes[cd], NULL,
7338 7338 MUTEX_DRIVER, NULL);
7339 7339
7340 7340 sdbc_cd_io_kstats[cd]->ks_lock = &sdbc_cd_io_kstats_mutexes[cd];
7341 7341
7342 7342 kstat_install(sdbc_cd_io_kstats[cd]);
7343 7343
7344 7344 } else {
7345 7345 cmn_err(CE_WARN, "!sdbc cd %d io kstat allocation failed", cd);
7346 7346 }
7347 7347
7348 7348 return (0);
7349 7349 }
7350 7350
7351 7351 /*
7352 7352 * cd_kstat_remove
7353 7353 *
7354 7354 * Uninstalls all kstats and associated infrastructure (mutex, buffer),
7355 7355 * associated with a particular cache descriptor. This function is called
7356 7356 * when the cache descriptor is closed in _sd_close().
7357 7357 * "cd" -- cache descriptor number whose kstats we wish to remove
7358 7358 * returns: 0 on success, -1 on failure
7359 7359 */
7360 7360 static int
7361 7361 cd_kstat_remove(int cd)
7362 7362 {
7363 7363 if (cd < 0 || cd >= sdbc_max_devs) {
7364 7364 cmn_err(CE_WARN, "!invalid cache descriptor: %d", cd);
7365 7365 return (-1);
7366 7366 }
7367 7367
7368 7368 /* delete the regular kstat corresponding to this cache descriptor */
7369 7369 if (sdbc_cd_kstats && sdbc_cd_kstats[cd]) {
7370 7370 kstat_delete(sdbc_cd_kstats[cd]);
7371 7371 sdbc_cd_kstats[cd] = NULL;
7372 7372 }
7373 7373
7374 7374 /* delete the I/O kstat corresponding to this cache descriptor */
7375 7375 if (sdbc_cd_io_kstats && sdbc_cd_io_kstats[cd]) {
7376 7376 kstat_delete(sdbc_cd_io_kstats[cd]);
7377 7377 sdbc_cd_io_kstats[cd] = NULL;
7378 7378
7379 7379 if (sdbc_cd_io_kstats_mutexes) {
7380 7380 /* destroy the mutex associated with this I/O kstat */
7381 7381 mutex_destroy(&sdbc_cd_io_kstats_mutexes[cd]);
7382 7382 }
7383 7383 }
7384 7384
7385 7385 return (0);
7386 7386 }
7387 7387
7388 7388 #ifdef DEBUG
7389 7389 /*
7390 7390 * kstat update
7391 7391 */
7392 7392 int
7393 7393 sdbc_dynmem_kstat_update_dm(kstat_t *ksp, int rw)
7394 7394 {
7395 7395 sdbc_dynmem_dm_t *sdbc_dynmem;
7396 7396 _dm_process_vars_t *process_vars;
7397 7397 _dm_process_vars_t local_dm_process_vars;
7398 7398
7399 7399 simplect_dm++;
7400 7400
7401 7401 sdbc_dynmem = (sdbc_dynmem_dm_t *)(ksp->ks_data);
7402 7402
7403 7403 /* global dynmem_processing_dm */
7404 7404 process_vars = (_dm_process_vars_t *)(ksp->ks_private);
7405 7405
7406 7406 if (rw == KSTAT_WRITE) {
7407 7407 simplect_dm = sdbc_dynmem->ci_sdbc_simplect.value.ul;
7408 7408 local_dm_process_vars.monitor_dynmem_process =
7409 7409 sdbc_dynmem->ci_sdbc_monitor_dynmem.value.ul;
7410 7410 local_dm_process_vars.max_dyn_list =
7411 7411 sdbc_dynmem->ci_sdbc_max_dyn_list.value.ul;
7412 7412 local_dm_process_vars.cache_aging_ct1 =
7413 7413 sdbc_dynmem->ci_sdbc_cache_aging_ct1.value.ul;
7414 7414 local_dm_process_vars.cache_aging_ct2 =
7415 7415 sdbc_dynmem->ci_sdbc_cache_aging_ct2.value.ul;
7416 7416 local_dm_process_vars.cache_aging_ct3 =
7417 7417 sdbc_dynmem->ci_sdbc_cache_aging_ct3.value.ul;
7418 7418 local_dm_process_vars.cache_aging_sec1 =
7419 7419 sdbc_dynmem->ci_sdbc_cache_aging_sec1.value.ul;
7420 7420 local_dm_process_vars.cache_aging_sec2 =
7421 7421 sdbc_dynmem->ci_sdbc_cache_aging_sec2.value.ul;
7422 7422 local_dm_process_vars.cache_aging_sec3 =
7423 7423 sdbc_dynmem->ci_sdbc_cache_aging_sec3.value.ul;
7424 7424 local_dm_process_vars.cache_aging_pcnt1 =
7425 7425 sdbc_dynmem->ci_sdbc_cache_aging_pcnt1.value.ul;
7426 7426 local_dm_process_vars.cache_aging_pcnt2 =
7427 7427 sdbc_dynmem->ci_sdbc_cache_aging_pcnt2.value.ul;
7428 7428 local_dm_process_vars.max_holds_pcnt =
7429 7429 sdbc_dynmem->ci_sdbc_max_holds_pcnt.value.ul;
7430 7430 local_dm_process_vars.process_directive =
7431 7431 sdbc_dynmem->ci_sdbc_process_directive.value.ul;
7432 7432 (void) sdbc_edit_xfer_process_vars_dm(&local_dm_process_vars);
7433 7433
7434 7434 if (process_vars->process_directive & WAKE_DEALLOC_THREAD_DM) {
7435 7435 process_vars->process_directive &=
7436 7436 ~WAKE_DEALLOC_THREAD_DM;
7437 7437 mutex_enter(&dynmem_processing_dm.thread_dm_lock);
7438 7438 cv_broadcast(&dynmem_processing_dm.thread_dm_cv);
7439 7439 mutex_exit(&dynmem_processing_dm.thread_dm_lock);
7440 7440 }
7441 7441
7442 7442 return (0);
7443 7443 }
7444 7444
7445 7445 /* default to READ */
7446 7446 sdbc_dynmem->ci_sdbc_simplect.value.ul = simplect_dm;
7447 7447 sdbc_dynmem->ci_sdbc_monitor_dynmem.value.ul =
7448 7448 process_vars->monitor_dynmem_process;
7449 7449 sdbc_dynmem->ci_sdbc_max_dyn_list.value.ul =
7450 7450 process_vars->max_dyn_list;
7451 7451 sdbc_dynmem->ci_sdbc_cache_aging_ct1.value.ul =
7452 7452 process_vars->cache_aging_ct1;
7453 7453 sdbc_dynmem->ci_sdbc_cache_aging_ct2.value.ul =
7454 7454 process_vars->cache_aging_ct2;
7455 7455 sdbc_dynmem->ci_sdbc_cache_aging_ct3.value.ul =
7456 7456 process_vars->cache_aging_ct3;
7457 7457 sdbc_dynmem->ci_sdbc_cache_aging_sec1.value.ul =
7458 7458 process_vars->cache_aging_sec1;
7459 7459 sdbc_dynmem->ci_sdbc_cache_aging_sec2.value.ul =
7460 7460 process_vars->cache_aging_sec2;
7461 7461 sdbc_dynmem->ci_sdbc_cache_aging_sec3.value.ul =
7462 7462 process_vars->cache_aging_sec3;
7463 7463 sdbc_dynmem->ci_sdbc_cache_aging_pcnt1.value.ul =
7464 7464 process_vars->cache_aging_pcnt1;
7465 7465 sdbc_dynmem->ci_sdbc_cache_aging_pcnt2.value.ul =
7466 7466 process_vars->cache_aging_pcnt2;
7467 7467 sdbc_dynmem->ci_sdbc_max_holds_pcnt.value.ul =
7468 7468 process_vars->max_holds_pcnt;
7469 7469 sdbc_dynmem->ci_sdbc_process_directive.value.ul =
7470 7470 process_vars->process_directive;
7471 7471
7472 7472 sdbc_dynmem->ci_sdbc_alloc_ct.value.ul = process_vars->alloc_ct;
7473 7473 sdbc_dynmem->ci_sdbc_dealloc_ct.value.ul = process_vars->dealloc_ct;
7474 7474 sdbc_dynmem->ci_sdbc_history.value.ul = process_vars->history;
7475 7475 sdbc_dynmem->ci_sdbc_nodatas.value.ul = process_vars->nodatas;
7476 7476 sdbc_dynmem->ci_sdbc_candidates.value.ul = process_vars->candidates;
7477 7477 sdbc_dynmem->ci_sdbc_deallocs.value.ul = process_vars->deallocs;
7478 7478 sdbc_dynmem->ci_sdbc_hosts.value.ul = process_vars->hosts;
7479 7479 sdbc_dynmem->ci_sdbc_pests.value.ul = process_vars->pests;
7480 7480 sdbc_dynmem->ci_sdbc_metas.value.ul = process_vars->metas;
7481 7481 sdbc_dynmem->ci_sdbc_holds.value.ul = process_vars->holds;
7482 7482 sdbc_dynmem->ci_sdbc_others.value.ul = process_vars->others;
7483 7483 sdbc_dynmem->ci_sdbc_notavail.value.ul = process_vars->notavail;
7484 7484
7485 7485 return (0);
7486 7486 }
7487 7487 #endif
↓ open down ↓ |
367 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX