1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  */
  25 
  26 #include <sys/types.h>
  27 #include <sys/ksynch.h>
  28 #include <sys/cmn_err.h>
  29 #include <sys/kmem.h>
  30 #include <sys/buf.h>
  31 #include <sys/cred.h>
  32 #include <sys/errno.h>
  33 #include <sys/ddi.h>
  34 
  35 #include <sys/nsc_thread.h>
  36 #include <sys/nsctl/nsctl.h>
  37 
  38 #include <sys/sdt.h>              /* dtrace is S10 or later */
  39 
  40 #include "sd_bcache.h"
  41 #include "sd_trace.h"
  42 #include "sd_io.h"
  43 #include "sd_bio.h"
  44 #include "sd_misc.h"
  45 #include "sd_ft.h"
  46 #include "sd_pcu.h"
  47 
  48 /*
  49  * dynamic memory support
  50  */
  51 _dm_process_vars_t dynmem_processing_dm;
  52 static int  sd_dealloc_flag_dm = NO_THREAD_DM;
  53 static void _sd_dealloc_dm(void);
  54 static int  _sd_entry_availability_dm(_sd_cctl_t *cc_ent, int *nodata);
  55 
  56 extern void sdbc_requeue_dmchain(_sd_queue_t *, _sd_cctl_t *, int, int);
  57 extern void sdbc_ins_dmqueue_front(_sd_queue_t *q, _sd_cctl_t *cc_ent);
  58 extern void sdbc_remq_dmchain(_sd_queue_t *q, _sd_cctl_t *cc_ent);
  59 extern void sdbc_requeue_head_dm_try(_sd_cctl_t *);
  60 extern int sdbc_use_dmchain;
  61 extern _sd_queue_t *sdbc_dm_queues;
  62 
  63 kcondvar_t   _sd_flush_cv;
  64 static volatile int _sd_flush_exit;
  65 
  66 /* secret flush toggle flag for testing */
  67 #ifdef DEBUG
  68 int _sdbc_flush_flag = 1; /* 0 ==> noflushing, 1 ==> flush */
  69 #endif
  70 
  71 static int sdbc_flush_pageio;
  72 
  73 
  74 
  75 /*
  76  * Forward declare all statics that are used before defined to enforce
  77  * parameter checking
  78  * Some (if not all) of these could be removed if the code were reordered
  79  */
  80 
  81 static void _sd_flcent_ea(blind_t xcc_ent, nsc_off_t fba_pos,
  82     nsc_size_t fba_len, int error);
  83 static void _sd_flclist_ea(blind_t xcc_ent, nsc_off_t fba_pos,
  84     nsc_size_t fba_len, int error);
  85 static void _sd_process_reflush(_sd_cctl_t *cc_ent);
  86 static void _sd_flush_thread(void);
  87 
  88 int
  89 _sdbc_flush_configure(void)
  90 {
  91         _sd_flush_exit = 1;
  92         sdbc_flush_pageio = 0;
  93         return (nsc_create_process(
  94             (void (*)(void *))_sd_flush_thread, 0, TRUE));
  95 }
  96 
  97 
  98 void
  99 _sdbc_flush_deconfigure(void)
 100 {
 101         _sd_unblock(&_sd_flush_cv);
 102         _sd_flush_exit = 0;
 103 }
 104 
 105 static int
 106 sdbc_alloc_static_cache(int reqblks)
 107 {
 108         _sd_cctl_t *centry;
 109         _sd_cctl_t *next_centry;
 110 
 111         if (centry = sdbc_centry_alloc_blks(_CD_NOHASH, 0, reqblks,
 112             ALLOC_NOWAIT)) {
 113                 /* release the blocks to the queue */
 114                 while (centry) {
 115                         next_centry = centry->cc_chain;
 116                         _sd_centry_release(centry);
 117                         centry = next_centry;
 118                 }
 119                 return (reqblks);
 120         }
 121         return (0);
 122 }
 123 
 124 int
 125 _sdbc_dealloc_configure_dm(void)
 126 {
 127         int rc = 0;
 128         int reqblks = MEGABYTE/BLK_SIZE(1); /* alloc in mb chunks */
 129         int i;
 130         int blk_groups; /* number of ~MB groups */
 131         int blks_remaining;
 132         int blks_allocd = 0;
 133 
 134         dynmem_processing_dm.alloc_ct = 0;
 135         dynmem_processing_dm.dealloc_ct = 0;
 136 
 137         if (sdbc_static_cache) { /* alloc all static cache memory here */
 138                 dynmem_processing_dm.max_dyn_list = reqblks;
 139 
 140                 blk_groups = CBLOCKS / reqblks;
 141                 blks_remaining = CBLOCKS % reqblks;
 142 
 143                 for (i = 0; i < blk_groups; ++i) {
 144                         if (!sdbc_alloc_static_cache(reqblks))
 145                                 break;
 146                         blks_allocd += reqblks;
 147                 }
 148                 DTRACE_PROBE2(_sdbc_dealloc_configure_dm1,
 149                     int, i, int, blks_allocd);
 150 
 151                 /* if successful then allocate any remaining blocks */
 152                 if ((i == blk_groups) && blks_remaining)
 153                         if (sdbc_alloc_static_cache(blks_remaining))
 154                                 blks_allocd += blks_remaining;
 155 
 156                 DTRACE_PROBE2(_sdbc_dealloc_configure_dm2,
 157                     int, i, int, blks_allocd);
 158 
 159                 sd_dealloc_flag_dm = NO_THREAD_DM;
 160 
 161                 if (blks_allocd < CBLOCKS) {
 162                         cmn_err(CE_WARN, "!Failed to allocate sdbc cache "
 163                             "memory.\n requested mem: %d MB; actual mem: %d MB",
 164                             CBLOCKS/reqblks, blks_allocd/reqblks);
 165                         rc = ENOMEM;
 166                 }
 167 
 168 
 169 #ifdef DEBUG
 170                 cmn_err(CE_NOTE, "!sdbc(_sdbc_dealloc_configure_dm) %d bytes "
 171                     "(%d cache blocks) allocated for static cache, "
 172                     "block size %d", blks_allocd * BLK_SIZE(1), blks_allocd,
 173                     BLK_SIZE(1));
 174 #endif /* DEBUG */
 175         } else {
 176                 sd_dealloc_flag_dm = PROCESS_CACHE_DM;
 177                 rc = nsc_create_process((void (*)(void *))_sd_dealloc_dm, 0,
 178                     TRUE);
 179                 if (rc != 0)
 180                         sd_dealloc_flag_dm = NO_THREAD_DM;
 181         }
 182         return (rc);
 183 }
 184 
 185 /*
 186  * sdbc_dealloc_dm_shutdown - deallocate cache memory.
 187  *
 188  * ARGUMENTS: none
 189  *
 190  * RETURNS: nothing
 191  *
 192  * USAGE:
 193  *      this function is intended for use after all i/o has stopped and all
 194  *      other cache threads have terminated.  write cache resources, if any
 195  *      are released, except in the case of pinned data.
 196  */
 197 static void
 198 sdbc_dealloc_dm_shutdown()
 199 {
 200         _sd_cctl_t *cc_ent;
 201         ss_centry_info_t *wctl;
 202 
 203         cc_ent = _sd_cctl[0];
 204 
 205         if (!cc_ent)
 206                 return;
 207 
 208         do {
 209                 if (cc_ent->cc_alloc_size_dm) {
 210                         /* HOST or OTHER */
 211 
 212                         if (cc_ent->cc_data)
 213                                 kmem_free(cc_ent->cc_data,
 214                                     cc_ent->cc_alloc_size_dm);
 215 
 216                         cc_ent->cc_alloc_size_dm = 0;
 217 
 218                         dynmem_processing_dm.dealloc_ct++;
 219 
 220                         DTRACE_PROBE2(sdbc_dealloc_dm_shutdown, char *,
 221                             cc_ent->cc_data, int, cc_ent->cc_alloc_size_dm);
 222                 }
 223 
 224                 /* release safestore resource, if any. preserve pinned data */
 225                 if (!(CENTRY_DIRTY(cc_ent)) && (wctl = cc_ent->cc_write)) {
 226                         wctl->sc_flag = 0;
 227                         wctl->sc_dirty = 0;
 228 
 229                         SSOP_SETCENTRY(sdbc_safestore, wctl);
 230                         SSOP_DEALLOCRESOURCE(sdbc_safestore, wctl->sc_res);
 231                 }
 232                 cc_ent = cc_ent->cc_link_list_dm;
 233         } while (cc_ent != _sd_cctl[0]);
 234 }
 235 
 236 void
 237 _sdbc_dealloc_deconfigure_dm(void)
 238 {
 239         int one_sec;
 240 
 241         if (sdbc_static_cache) {
 242                 sdbc_dealloc_dm_shutdown();
 243                 return;
 244         }
 245 
 246         if (sd_dealloc_flag_dm == NO_THREAD_DM)
 247                 return;                 /* thread never started */
 248         one_sec = HZ; /* drv_usectohz(1000000); */
 249 
 250         mutex_enter(&dynmem_processing_dm.thread_dm_lock);
 251         sd_dealloc_flag_dm = CACHE_SHUTDOWN_DM;
 252         cv_broadcast(&dynmem_processing_dm.thread_dm_cv);
 253         mutex_exit(&dynmem_processing_dm.thread_dm_lock);
 254 
 255         while (sd_dealloc_flag_dm != CACHE_THREAD_TERMINATED_DM)
 256                 delay(one_sec);
 257 
 258         sd_dealloc_flag_dm = NO_THREAD_DM;
 259 }
 260 
 261 /*
 262  * This complicated - possibly overly complicated routine works as follows:
 263  * In general the routine sleeps a specified amount of time then wakes and
 264  * examines the entire centry list. If an entry is avail. it ages it by one
 265  * tick else it clears the aging flag completely. It then determines if the
 266  * centry has aged sufficiently to have its memory deallocated and for it to
 267  * be placed at the top of the lru.
 268  *
 269  * There are two deallocation schemes in place depending on whether the
 270  * centry is a standalone entry or it is a member of a host/parasite chain.
 271  *
 272  * The behavior for a standalone entry is as follows:
 273  * If the given centry is selected it will age normally however at full
 274  * aging it will only be placed at the head of the lru. It's memory will
 275  * not be deallocated until a further aging level has been reached. The
 276  * entries selected for this behavior are goverend by counting the number
 277  * of these holdovers in existence on each wakeup and and comparing it
 278  * to a specified percentage. This comparision is always one cycle out of
 279  * date and will float in the relative vicinity of the specified number.
 280  *
 281  * The behavior for a host/parasite chain is as follows:
 282  * The chain is examined. If all entries are fully aged the entire chain
 283  * is removed - ie mem is dealloc. from the host entry and all memory ref.
 284  * removed from the parasitic entries and each entry requeued on to the lru.
 285  *
 286  * There are three delay timeouts and two percentage levels specified. Timeout
 287  * level 1 is honored between 100% free and pcnt level 1. Timeout level 2 is
 288  * honored between pcnt level 1 and pcnt level 2, Timeout level 3 is
 289  * honored between pcnt level 2 and 0% free. In addition there exist an
 290  * accelerated
 291  * aging flag which mimics hysterisis behavior. If the available centrys fall
 292  * between pcnt1 and pcnt2 an 8 bit counter is switched on. The effect is to
 293  * keep the timer value at timer level 2 for 8 cycles even if the number
 294  * available cache entries drifts above pcnt1. If it falls below pcnt2 an
 295  * additional 8 bit counter is switched on. This causes the sleep timer to
 296  * remain at timer level 3 for at least 8 cycles even if it floats above
 297  * pcnt2 or even pcnt1. The effect of all this is to accelerate the release
 298  * of system resources under a heavy load.
 299  *
 300  * All of the footwork can be stubbed out by a judicious selection of values
 301  * for the times, aging counts and pcnts.
 302  *
 303  * All of these behavior parameters are adjustable on the fly via the kstat
 304  * mechanism. In addition there is a thread wakeup msg available through the
 305  * same mechanism.
 306  */
 307 
 308 static void
 309 _sd_dealloc_dm(void)
 310 {
 311         int one_sec_tics, tic_delay;
 312         int sleep_tics_lvl1, sleep_tics_lvl2, sleep_tics_lvl3;
 313         int transition_lvl1, transition_lvl2;
 314         int host_cache_aging_ct, meta_cache_aging_ct, hold_cache_aging_ct;
 315         int max_holds_ct;
 316         int cache_aging_ct, hold_candidate, last_holds_ct;
 317         _sd_cctl_t *cc_ent, *next_ccentry, *cur_ent, *nxt_ent;
 318         ss_centry_info_t *wctl;
 319         int current_breakout_count, number_cache_entries;
 320         int dealloc;
 321         _dm_process_vars_t *ppvars;
 322 
 323         int write_dealloc; /* remove after debugging */
 324 
 325         ppvars = &dynmem_processing_dm;
 326 
 327         /* setup a one sec time var */
 328         one_sec_tics = HZ; /* drv_usectohz(1000000); */
 329 
 330         ppvars->history = 0;
 331 
 332         cc_ent = _sd_cctl[0];
 333 
 334         number_cache_entries = _sd_net_config.sn_cpages;
 335 
 336         last_holds_ct = 0;
 337 
 338         /*CONSTANTCONDITION*/
 339         while (1) {
 340                 if (sd_dealloc_flag_dm == CACHE_SHUTDOWN_DM) {
 341                         /* finished.  shutdown - get out */
 342                         sdbc_dealloc_dm_shutdown(); /* free all memory */
 343                         sd_dealloc_flag_dm = CACHE_THREAD_TERMINATED_DM;
 344                         return;
 345                 }
 346 
 347                 /* has the world changed */
 348 
 349                 /*
 350                  * get num cctl entries (%) below which different sleep
 351                  * rates kick in
 352                  */
 353                 transition_lvl1 =
 354                     (ppvars->cache_aging_pcnt1*number_cache_entries) / 100;
 355                 transition_lvl2 =
 356                     (ppvars->cache_aging_pcnt2*number_cache_entries) / 100;
 357 
 358                 /* get sleep rates for each level */
 359                 sleep_tics_lvl1 = ppvars->cache_aging_sec1 * one_sec_tics;
 360                 sleep_tics_lvl2 = ppvars->cache_aging_sec2 * one_sec_tics;
 361                 sleep_tics_lvl3 = ppvars->cache_aging_sec3 * one_sec_tics;
 362 
 363                 /* get num of cycles for full normal aging */
 364                 host_cache_aging_ct = ppvars->cache_aging_ct1;
 365 
 366                 /* get num of cycles for full meta aging */
 367                 meta_cache_aging_ct = ppvars->cache_aging_ct2;
 368 
 369                 /* get num of cycles for full extended holdover aging */
 370                 hold_cache_aging_ct = ppvars->cache_aging_ct3;
 371 
 372                 /* get maximum holds count in % */
 373                 max_holds_ct = (ppvars->max_holds_pcnt*number_cache_entries)
 374                     / 100;
 375 
 376                 /* apply the delay */
 377                 tic_delay = sleep_tics_lvl1;
 378                 if (sd_dealloc_flag_dm == TIME_DELAY_LVL1)
 379                         tic_delay = sleep_tics_lvl2;
 380                 else
 381                         if (sd_dealloc_flag_dm == TIME_DELAY_LVL2)
 382                                 tic_delay = sleep_tics_lvl3;
 383 
 384                 mutex_enter(&ppvars->thread_dm_lock);
 385                 (void) cv_reltimedwait(&ppvars->thread_dm_cv,
 386                     &ppvars->thread_dm_lock, tic_delay, TR_CLOCK_TICK);
 387                 mutex_exit(&ppvars->thread_dm_lock);
 388 
 389                 /* check for special directives on wakeup */
 390                 if (ppvars->process_directive &
 391                     MAX_OUT_ACCEL_HIST_FLAG_DM) {
 392                         ppvars->process_directive &=
 393                             ~MAX_OUT_ACCEL_HIST_FLAG_DM;
 394                         ppvars->history =
 395                             (HISTORY_LVL1|HISTORY_LVL2);
 396                 }
 397 
 398                 /* Start of deallocation loop */
 399                 current_breakout_count = 0;
 400 
 401                 ppvars->nodatas = 0;
 402                 write_dealloc = 0;
 403                 ppvars->deallocs = 0;
 404                 ppvars->candidates = 0;
 405                 ppvars->hosts = 0;
 406                 ppvars->pests = 0;
 407                 ppvars->metas = 0;
 408                 ppvars->holds = 0;
 409                 ppvars->others = 0;
 410                 ppvars->notavail = 0;
 411 
 412                 while (sd_dealloc_flag_dm != CACHE_SHUTDOWN_DM &&
 413                     current_breakout_count < number_cache_entries) {
 414 
 415                         next_ccentry = cc_ent->cc_link_list_dm;
 416 
 417                         if (_sd_entry_availability_dm(cc_ent, &ppvars->nodatas)
 418                             == FALSE) {
 419                                 ppvars->notavail++;
 420                                 goto next_dealloc_entry;
 421                         }
 422 
 423                         cache_aging_ct = host_cache_aging_ct;
 424                         hold_candidate = FALSE;
 425                         if (cc_ent->cc_aging_dm & HOST_ENTRY_DM)
 426                                 ppvars->hosts++;
 427                         else
 428                                 if (cc_ent->cc_aging_dm & PARASITIC_ENTRY_DM)
 429                                         ppvars->pests++;
 430                         else
 431                                 if (cc_ent->cc_aging_dm & STICKY_METADATA_DM) {
 432                                         cache_aging_ct = meta_cache_aging_ct;
 433                                         ppvars->metas++;
 434                                 } else {
 435                                         if (last_holds_ct < max_holds_ct)
 436                                                 hold_candidate = TRUE;
 437                                         ppvars->others++;
 438                                 }
 439 
 440                         ppvars->candidates++;
 441 
 442                         if ((cc_ent->cc_aging_dm & FINAL_AGING_DM) <
 443                             cache_aging_ct) {
 444                                 cc_ent->cc_aging_dm += FIRST_AGING_DM;
 445                                 CLEAR_CENTRY_PAGEIO(cc_ent);
 446                                 CLEAR_CENTRY_INUSE(cc_ent);
 447                                 goto next_dealloc_entry;
 448                         }
 449 
 450                         /* bonafide aged entry - examine its chain */
 451                         dealloc = TRUE;
 452                         cur_ent = cc_ent->cc_head_dm;
 453                         while (cur_ent) {
 454                                 if (cur_ent == cc_ent)
 455                                         cur_ent->cc_aging_dm |= AVAIL_ENTRY_DM;
 456                                 else {
 457                                         if (_sd_entry_availability_dm(cur_ent,
 458                                             0) == TRUE) {
 459                                                 cur_ent->cc_aging_dm |=
 460                                                     AVAIL_ENTRY_DM;
 461                                                 if ((cur_ent->cc_aging_dm &
 462                                                     FINAL_AGING_DM) <
 463                                                     cache_aging_ct)
 464                                                         dealloc = FALSE;
 465                                         } else
 466                                                 dealloc = FALSE;
 467                                 }
 468 
 469                                 cur_ent = cur_ent->cc_next_dm;
 470                         }
 471                         cur_ent = cc_ent->cc_head_dm;
 472 
 473                         /* chain not fully free - free inuse for all entries */
 474                         if (dealloc == FALSE) {
 475                                 while (cur_ent) {
 476                                         nxt_ent = cur_ent->cc_next_dm;
 477 
 478                                         if (cur_ent->cc_aging_dm &
 479                                             AVAIL_ENTRY_DM) {
 480                                                 cur_ent->cc_aging_dm &=
 481                                                     ~AVAIL_ENTRY_DM;
 482                                                 CLEAR_CENTRY_PAGEIO(cur_ent);
 483                                                 CLEAR_CENTRY_INUSE(cur_ent);
 484                                         }
 485                                         cur_ent = nxt_ent;
 486                                 }
 487                         } else { /* OK - free memory */
 488                                 if (hold_candidate == TRUE &&
 489                                     (cur_ent->cc_aging_dm & FINAL_AGING_DM) <
 490                                     hold_cache_aging_ct) {
 491                                         ppvars->holds++;
 492 
 493                                         ASSERT(cur_ent == cc_ent);
 494 
 495                                         cc_ent->cc_aging_dm += FIRST_AGING_DM;
 496 
 497                                         cur_ent->cc_aging_dm &= ~AVAIL_ENTRY_DM;
 498 
 499                                         wctl = cur_ent->cc_write;
 500 
 501                                         CLEAR_CENTRY_PAGEIO(cur_ent);
 502                                         CLEAR_CENTRY_INUSE(cur_ent);
 503 
 504                                         if (wctl) {
 505                                                 write_dealloc++;
 506                                                 wctl->sc_flag = 0;
 507                                                 wctl->sc_dirty = 0;
 508                                                 SSOP_SETCENTRY(sdbc_safestore,
 509                                                     wctl);
 510                                                 SSOP_DEALLOCRESOURCE(
 511                                                     sdbc_safestore,
 512                                                     wctl->sc_res);
 513                                         }
 514                                         goto next_dealloc_entry;
 515                                 } /* if (hold_candidate == TRUE */
 516 
 517                                 while (cur_ent) {
 518 
 519                                         DTRACE_PROBE4(_sd_dealloc_dm,
 520                                             _sd_cctl_t *, cur_ent,
 521                                             int, CENTRY_CD(cur_ent),
 522                                             int, CENTRY_BLK(cur_ent),
 523                                             uint_t, cur_ent->cc_aging_dm);
 524 
 525                                         if ((cur_ent->cc_aging_dm
 526                                             & BAD_CHAIN_DM)) {
 527                                                 (void) _sd_hash_delete(
 528                                                     (_sd_hash_hd_t *)cur_ent,
 529                                                     _sd_htable);
 530 
 531                                                 nxt_ent = cur_ent->cc_next_dm;
 532                                                 CLEAR_CENTRY_PAGEIO(cur_ent);
 533                                                 CLEAR_CENTRY_INUSE(cur_ent);
 534                                                 cur_ent = nxt_ent;
 535                                                 continue;
 536                                         }
 537 
 538                                         ppvars->deallocs++;
 539 
 540                                         if (cur_ent->cc_alloc_size_dm) {
 541                                                 int qidx;
 542                                                 _sd_queue_t *q;
 543 
 544                                                 /* HOST or OTHER */
 545 
 546                                                 /* debugging */
 547                                                 ppvars->dealloc_ct++;
 548                                                 cur_ent->cc_dealloc_ct_dm++;
 549                                                 kmem_free(cur_ent->cc_data,
 550                                                     cur_ent->cc_alloc_size_dm);
 551 
 552                                                 /*
 553                                                  * remove from queue
 554                                                  * in preparation for putting
 555                                                  * on the 0 queue after
 556                                                  * memory is freed
 557                                                  */
 558                                                 if (sdbc_use_dmchain) {
 559 
 560                                                         qidx =
 561                                                             cur_ent->cc_cblocks;
 562                                                         q = &sdbc_dm_queues
 563                                                             [qidx];
 564 
 565                                                         sdbc_remq_dmchain(q,
 566                                                             cur_ent);
 567                                                 }
 568                                         }
 569 
 570                                         wctl = cur_ent->cc_write;
 571                                         cur_ent->cc_write = 0;
 572                                         cur_ent->cc_data = 0;
 573                                         cur_ent->cc_alloc_size_dm = 0;
 574                                         cur_ent->cc_head_dm = NULL;
 575                                         cur_ent->cc_aging_dm &=
 576                                             ~(FINAL_AGING_DM | ENTRY_FIELD_DM |
 577                                             CATAGORY_ENTRY_DM | AVAIL_ENTRY_DM |
 578                                             PREFETCH_BUF_I | PREFETCH_BUF_E);
 579 
 580                                         (void) _sd_hash_delete(
 581                                             (_sd_hash_hd_t *)cur_ent,
 582                                             _sd_htable);
 583                                         cur_ent->cc_valid = 0;
 584 
 585                                         if (sdbc_use_dmchain) {
 586                                                 _sd_queue_t *q;
 587 
 588                                                 nxt_ent = cur_ent->cc_next_dm;
 589 
 590                                                 cur_ent->cc_next_dm = NULL;
 591 
 592                                                 CLEAR_CENTRY_PAGEIO(cur_ent);
 593                                                 CLEAR_CENTRY_INUSE(cur_ent);
 594 
 595                                                 q = &sdbc_dm_queues[0];
 596                                                 sdbc_ins_dmqueue_front(q,
 597                                                     cur_ent);
 598                                         } else {
 599                                                 _sd_requeue_head(cur_ent);
 600 
 601                                                 nxt_ent = cur_ent->cc_next_dm;
 602                                                 cur_ent->cc_next_dm = NULL;
 603 
 604                                                 CLEAR_CENTRY_PAGEIO(cur_ent);
 605                                                 CLEAR_CENTRY_INUSE(cur_ent);
 606                                         }
 607 
 608                                         cur_ent = nxt_ent;
 609 
 610                                         if (wctl) {
 611                                                 write_dealloc++;
 612                                                 wctl->sc_flag = 0;
 613                                                 wctl->sc_dirty = 0;
 614                                                 SSOP_SETCENTRY(sdbc_safestore,
 615                                                     wctl);
 616                                                 SSOP_DEALLOCRESOURCE(
 617                                                     sdbc_safestore,
 618                                                     wctl->sc_res);
 619                                         }
 620                                 } /* while (cur_ent) */
 621                         } /* else OK - free memory */
 622 next_dealloc_entry:
 623                 current_breakout_count++;
 624 
 625                 cc_ent = next_ccentry;
 626                 }  /* while (entries) */
 627 
 628                 if (ppvars->monitor_dynmem_process & RPT_DEALLOC_STATS1_DM) {
 629                         cmn_err(CE_NOTE,
 630                             "!notavl=%x, nodat=%x, cand=%x, hosts=%x,"
 631                             " pests=%x, metas=%x, holds=%x, others=%x,"
 632                             " deallo=%x",
 633                             ppvars->notavail, ppvars->nodatas,
 634                             ppvars->candidates, ppvars->hosts, ppvars->pests,
 635                             ppvars->metas, ppvars->holds, ppvars->others,
 636                             ppvars->deallocs);
 637                 }
 638 
 639                 if (ppvars->monitor_dynmem_process & RPT_DEALLOC_STATS2_DM) {
 640                         cmn_err(CE_NOTE,
 641                             "!hist=%x, gross a/d=%x %x", ppvars->history,
 642                             ppvars->alloc_ct, ppvars->dealloc_ct);
 643                 }
 644 
 645                 if (sd_dealloc_flag_dm == CACHE_SHUTDOWN_DM)
 646                         continue;
 647 
 648                 last_holds_ct = ppvars->holds;
 649 
 650                 /* set the history flag which will govern the sleep rate */
 651                 if (ppvars->nodatas > transition_lvl1) {
 652                         /* upper - lots of virgin cctls */
 653                         if (ppvars->history)
 654                                 ppvars->history >>= 1;
 655                 } else {
 656                         if (ppvars->nodatas > transition_lvl2) {
 657                                 /* middle - not so many virgin cctls */
 658                                 if (ppvars->history & (HISTORY_LVL1-1))
 659                                         ppvars->history >>= 1;
 660                                 else
 661                                         ppvars->history = HISTORY_LVL1;
 662 
 663                         } else {
 664                                 /*
 665                                  * appear to be running low - accelerate the
 666                                  * aging to free more
 667                                  */
 668                                 if (ppvars->history & HISTORY_LVL2)
 669                                         ppvars->history >>= 1;
 670                                 else
 671                                         ppvars->history =
 672                                             (HISTORY_LVL1|HISTORY_LVL2);
 673                         }
 674                 }
 675 
 676                 sd_dealloc_flag_dm = TIME_DELAY_LVL0;
 677                 if (ppvars->history & HISTORY_LVL2)
 678                         sd_dealloc_flag_dm = TIME_DELAY_LVL2;
 679                 else
 680                         if (ppvars->history & HISTORY_LVL1)
 681                                 sd_dealloc_flag_dm = TIME_DELAY_LVL1;
 682 
 683         } /* while (TRUE) */
 684 }
 685 
 686 int
 687 _sd_entry_availability_dm(_sd_cctl_t *cc_ent, int *nodata)
 688 {
 689         /*
 690          * if using dmchaining return immediately and do not attempt
 691          * to acquire the cc_ent if there is no memory associated with
 692          * this cc_ent.
 693          * this avoids conflicts for centrys on the 0 queue.
 694          * see sdbc_get_dmchain()
 695          */
 696 
 697         if ((sdbc_use_dmchain) && (cc_ent->cc_data == 0)) {
 698 
 699                 if (nodata)
 700                         (*nodata)++;
 701 
 702                 DTRACE_PROBE(sdbc_availability_dm_end1);
 703                 return (FALSE);
 704         }
 705 
 706         if ((SET_CENTRY_INUSE(cc_ent))) {
 707 
 708                 DTRACE_PROBE(sdbc_availability_dm_end2);
 709 
 710                 return (FALSE);
 711         }
 712 
 713 
 714         if ((SET_CENTRY_PAGEIO(cc_ent))) {
 715 
 716                 CLEAR_CENTRY_INUSE(cc_ent);
 717 
 718                 DTRACE_PROBE(sdbc_availability_dm_end3);
 719 
 720                 return (FALSE);
 721         }
 722 
 723         /*
 724          * we allow the QHEAD flag as it does not affect the availabilty
 725          * of memory for aging
 726          */
 727         if ((CENTRY_DIRTY(cc_ent)) || (CENTRY_IO_INPROGRESS(cc_ent)) ||
 728             (cc_ent->cc_flag & ~(CC_QHEAD)) ||
 729             cc_ent->cc_dirty_next || cc_ent->cc_dirty_link ||
 730             cc_ent->cc_data == 0) {
 731 
 732                 cc_ent->cc_aging_dm &= ~FINAL_AGING_DM;
 733                 if (nodata)
 734                         if (cc_ent->cc_data == 0) {
 735                                 (*nodata)++;
 736                 }
 737 
 738                 CLEAR_CENTRY_PAGEIO(cc_ent);
 739                 CLEAR_CENTRY_INUSE(cc_ent);
 740 
 741                 DTRACE_PROBE(sdbc_availability_dm_end4);
 742 
 743                 return (FALSE);
 744         }
 745 
 746         return (TRUE);
 747 }
 748 
 749 /*
 750  * function below to prohibit code movement by compiler
 751  * and avoid using spinlocks for syncronization
 752  */
 753 static void
 754 _sd_cc_iostatus_initiate(_sd_cctl_t *cc_ent)
 755 {
 756         cc_ent->cc_iostatus = _SD_IO_INITIATE;
 757         sd_serialize();
 758 }
 759 
 760 /*
 761  * Yet another switch!
 762  * alloc mem and coalesce if at least this number of frags
 763  */
 764 static int sdbc_coalesce_backend = 1;
 765 
 766 /*
 767  * optimization for _sd_async_flclist()
 768  * called only if not doing pageio and sdbc_coalesce_backend > 0
 769  *
 770  * returns with pagio bit set in the centrys in list
 771  */
 772 static unsigned char *
 773 sdbc_alloc_io_mem(_sd_cctl_t *cc_ent, int first_dirty, int last_dirty)
 774 {
 775         unsigned char *prev_addr = NULL;
 776         _sd_cctl_t *cc_ent_orig = cc_ent;
 777         int fba_len;
 778         int total_len_bytes = 0;
 779         unsigned char *start_addr = NULL; /* function return value */
 780         unsigned char *next_addr;
 781         int num_frags = 0;
 782 
 783         if (first_dirty && (!_SD_BMAP_ISFULL(first_dirty))) {
 784                 WAIT_CENTRY_PAGEIO(cc_ent, sdbc_flush_pageio);
 785 
 786                 fba_len = SDBC_LOOKUP_LEN(first_dirty);
 787                 total_len_bytes += FBA_SIZE(fba_len);
 788 
 789                 prev_addr = cc_ent->cc_data;
 790                 cc_ent = cc_ent->cc_dirty_next;
 791         }
 792 
 793         while (cc_ent) {
 794 
 795                 WAIT_CENTRY_PAGEIO(cc_ent, sdbc_flush_pageio);
 796                 /* check for contiguity */
 797                 if (prev_addr &&
 798                     !((prev_addr + CACHE_BLOCK_SIZE) == cc_ent->cc_data))
 799                         ++num_frags;
 800 
 801                 /* compute length */
 802                 if (FULLY_DIRTY(cc_ent)) {
 803                         total_len_bytes += CACHE_BLOCK_SIZE;
 804                 } else {
 805                         fba_len = SDBC_LOOKUP_LEN(last_dirty);
 806                         total_len_bytes += FBA_SIZE(fba_len);
 807                 }
 808 
 809                 prev_addr = cc_ent->cc_data;
 810                 cc_ent = cc_ent->cc_dirty_next;
 811         }
 812 
 813         if (num_frags >= sdbc_coalesce_backend) {
 814                 /*
 815                  * TODO - determine metric for deciding
 816                  * whether to coalesce memory or do separate i/o's
 817                  */
 818 
 819                 DTRACE_PROBE(sdbc_io_mem_kmem_start);
 820 
 821                 if (start_addr = kmem_alloc(total_len_bytes, KM_NOSLEEP)) {
 822                         int sblk, offset;
 823 
 824                         cc_ent = cc_ent_orig;
 825 
 826                         cc_ent->cc_anon_addr.sa_virt = start_addr;
 827                         cc_ent->cc_anon_len = total_len_bytes;
 828 
 829                         next_addr = start_addr;
 830 
 831                         DTRACE_PROBE2(sdbc_io_mem_bcopy_start,
 832                             int, num_frags, int, total_len_bytes);
 833 
 834                         /* copy the first dirty piece */
 835                         if (first_dirty && (!_SD_BMAP_ISFULL(first_dirty))) {
 836 
 837                                 fba_len = SDBC_LOOKUP_LEN(first_dirty);
 838                                 sblk = SDBC_LOOKUP_STPOS(first_dirty);
 839                                 offset = FBA_SIZE(sblk);
 840 
 841                                 bcopy(cc_ent->cc_data + offset, next_addr,
 842                                     FBA_SIZE(fba_len));
 843                                 cc_ent = cc_ent->cc_dirty_next;
 844                                 next_addr += FBA_SIZE(fba_len);
 845                         }
 846 
 847                         /* copy the rest of data */
 848                         while (cc_ent) {
 849                                 if (FULLY_DIRTY(cc_ent)) {
 850                                         bcopy(cc_ent->cc_data, next_addr,
 851                                             CACHE_BLOCK_SIZE);
 852                                         next_addr += CACHE_BLOCK_SIZE;
 853                                 } else {
 854                                         fba_len = SDBC_LOOKUP_LEN(last_dirty);
 855                                         bcopy(cc_ent->cc_data, next_addr,
 856                                             FBA_SIZE(fba_len));
 857                                         next_addr += FBA_SIZE(fba_len);
 858                                 }
 859 
 860                                 cc_ent = cc_ent->cc_dirty_next;
 861                         }
 862 
 863                         DTRACE_PROBE(sdbc_io_mem_bcopy_end);
 864                 }
 865 
 866                 DTRACE_PROBE(sdbc_io_mem_kmem_end);
 867         }
 868 
 869         return (start_addr);
 870 }
 871 
 872 void
 873 _sd_async_flclist(_sd_cctl_t *cclist, dev_t rdev)
 874 {
 875         int flushed, i, cd;
 876         uint_t first_dirty, last_dirty;
 877         _sd_cctl_t *cc_ent, *cc_prev = NULL;
 878         struct buf *bp;
 879         int dblk, fba_len;
 880         int len;
 881         int toflush;
 882         int coalesce; /* convenience boolean */
 883         unsigned char *anon_mem = NULL;
 884         extern int sdbc_do_page;
 885 
 886 
 887         SDTRACE(ST_ENTER|SDF_FLCLIST, CENTRY_CD(cclist),
 888             0, BLK_TO_FBA_NUM(CENTRY_BLK(cclist)), 0, 0);
 889 
 890         coalesce = (!sdbc_do_page && sdbc_coalesce_backend);
 891 
 892         cc_ent = cclist;
 893         _sd_cc_iostatus_initiate(cc_ent);
 894         first_dirty = CENTRY_DIRTY(cc_ent);
 895         if (SDBC_IS_FRAGMENTED(first_dirty)) {
 896                 cclist = cc_ent->cc_dirty_next;
 897                 cc_ent->cc_dirty_next = NULL;
 898                 _sd_async_flcent(cc_ent, rdev);
 899                 cc_ent = cclist;
 900                 first_dirty = 0;
 901         }
 902 
 903         toflush = 0;
 904         while (cc_ent->cc_dirty_next) {
 905                 if (cc_ent->cc_iocount)
 906                         SDALERT(SDF_FLCLIST, CENTRY_CD(cc_ent), 0,
 907                             BLK_TO_FBA_NUM(CENTRY_BLK(cc_ent)),
 908                             cc_ent->cc_iocount, 0);
 909                 cc_prev = cc_ent;
 910                 cc_ent = cc_ent->cc_dirty_next;
 911                 toflush++;
 912         }
 913         _sd_cc_iostatus_initiate(cc_ent);
 914         last_dirty = CENTRY_DIRTY(cc_ent);
 915         if (SDBC_IS_FRAGMENTED(last_dirty)) {
 916                 if (cc_prev)
 917                         cc_prev->cc_dirty_next = NULL;
 918                 _sd_async_flcent(cc_ent, rdev);
 919                 last_dirty = 0;
 920         }
 921         else
 922                 toflush++;
 923 
 924         if (toflush == 0)
 925                 return;
 926 
 927 
 928         dblk = BLK_TO_FBA_NUM(CENTRY_BLK(cclist));
 929         if (first_dirty && (!_SD_BMAP_ISFULL(first_dirty)))
 930                 dblk += SDBC_LOOKUP_STPOS(first_dirty);
 931 
 932         cd = CENTRY_CD(cclist);
 933         bp = sd_alloc_iob(rdev, dblk, toflush, B_WRITE);
 934         cc_ent = cclist;
 935 
 936         if (coalesce && (anon_mem = sdbc_alloc_io_mem(cc_ent, first_dirty,
 937             last_dirty)))
 938                 sd_add_fba(bp, &cc_ent->cc_anon_addr, 0,
 939                     FBA_NUM(cc_ent->cc_anon_len));
 940 
 941         if (first_dirty && (!_SD_BMAP_ISFULL(first_dirty))) {
 942                 cc_ent->cc_iocount = flushed = 1;
 943 
 944                 /* pageio bit already set in sdbc_alloc_io_mem() above */
 945                 if (!coalesce)
 946                         WAIT_CENTRY_PAGEIO(cc_ent, sdbc_flush_pageio);
 947 
 948                 fba_len = SDBC_LOOKUP_LEN(first_dirty);
 949 
 950                 /* build buffer only if it was not done above */
 951                 if (!anon_mem) {
 952                         i = SDBC_LOOKUP_STPOS(first_dirty);
 953                         sd_add_fba(bp, &cc_ent->cc_addr, i, fba_len);
 954                         DATA_LOG(SDF_FLSHLIST, cc_ent, i, fba_len);
 955 
 956                         DTRACE_PROBE4(_sd_async_flclist_data1, int,
 957                             BLK_TO_FBA_NUM(CENTRY_BLK(cc_ent)) + i,
 958                             int, fba_len, char *,
 959                             *(int64_t *)(cc_ent->cc_data + FBA_SIZE(i)),
 960                             char *, *(int64_t *)(cc_ent->cc_data +
 961                             FBA_SIZE(i + fba_len) - 8));
 962                 }
 963 
 964                 len = FBA_SIZE(fba_len);
 965                 cc_ent = cc_ent->cc_dirty_next;
 966         } else {
 967                 len = 0;
 968                 flushed = 0;
 969         }
 970         while (cc_ent) {
 971                 _sd_cc_iostatus_initiate(cc_ent);
 972 
 973                 /* pageio bit already set in sdbc_alloc_io_mem() above */
 974                 if (!coalesce)
 975                         WAIT_CENTRY_PAGEIO(cc_ent, sdbc_flush_pageio);
 976 
 977                 if (FULLY_DIRTY(cc_ent)) {
 978                         flushed++;
 979                         cc_ent->cc_iocount = 1;
 980 
 981                         /* build buffer only if it was not done above */
 982                         if (!anon_mem) {
 983                                 sd_add_fba(bp, &cc_ent->cc_addr, 0, BLK_FBAS);
 984                                 DATA_LOG(SDF_FLSHLIST, cc_ent, 0, BLK_FBAS);
 985 
 986                                 DTRACE_PROBE4(_sd_async_flclist_data2,
 987                                     int, BLK_TO_FBA_NUM(CENTRY_BLK(cc_ent)),
 988                                     int, BLK_FBAS, char *,
 989                                     *(int64_t *)(cc_ent->cc_data),
 990                                     char *, *(int64_t *)(cc_ent->cc_data +
 991                                     FBA_SIZE(BLK_FBAS) - 8));
 992                         }
 993 
 994                         len += CACHE_BLOCK_SIZE;
 995                 } else {
 996 #if defined(_SD_DEBUG)
 997                         /*
 998                          * consistency check.
 999                          */
1000                         if (!last_dirty || cc_ent->cc_dirty_next ||
1001                             SDBC_IS_FRAGMENTED(last_dirty)) {
1002                                 SDALERT(SDF_FLCLIST, cd, 0,
1003                                     BLK_TO_FBA_NUM(CENTRY_BLK(cc_ent)),
1004                                     cc_ent->cc_dirty_next, last_dirty);
1005                                 cmn_err(CE_WARN,
1006                                     "!_sd_err: flclist: last_dirty %x next %x",
1007                                     last_dirty, cc_ent->cc_dirty_next);
1008                         }
1009 #endif
1010                         flushed++;
1011                         cc_ent->cc_iocount = 1;
1012 
1013                         fba_len = SDBC_LOOKUP_LEN(last_dirty);
1014 
1015                         /* build buffer only if it was not done above */
1016                         if (!anon_mem) {
1017                                 sd_add_fba(bp, &cc_ent->cc_addr, 0, fba_len);
1018                                 DATA_LOG(SDF_FLSHLIST, cc_ent, 0, fba_len);
1019 
1020                                 DTRACE_PROBE4(_sd_async_flclist_data3, int,
1021                                     BLK_TO_FBA_NUM(CENTRY_BLK(cc_ent)),
1022                                     int, fba_len, char *,
1023                                     *(int64_t *)(cc_ent->cc_data), char *,
1024                                     *(int64_t *)(cc_ent->cc_data +
1025                                     FBA_SIZE(fba_len) - 8));
1026                         }
1027 
1028                         len += FBA_SIZE(fba_len);
1029                 }
1030                 cc_ent = cc_ent->cc_dirty_next;
1031         }
1032 
1033 #ifdef DEBUG
1034         if (anon_mem)
1035                 ASSERT(len == cclist->cc_anon_len);
1036 #endif
1037 
1038         /* SDTRACE(ST_INFO|SDF_FLCLIST, cd, FBA_NUM(len), dblk, flushed, bp); */
1039         (void) sd_start_io(bp, _sd_cache_files[cd].cd_strategy,
1040             _sd_flclist_ea, cclist);
1041 
1042         DISK_FBA_WRITE(cd, FBA_NUM(len));
1043         /* increment number of bytes destaged to disk */
1044         WRITE_DESTAGED(cd, FBA_NUM(len));
1045 
1046         _sd_enqueue_io_pending(cd, cclist);
1047 
1048         SDTRACE(ST_EXIT|SDF_FLCLIST, cd, FBA_NUM(len), dblk, flushed, 0);
1049 }
1050 
1051 
1052 void
1053 _sd_enqueue_io_pending(int cd, _sd_cctl_t *cclist)
1054 {
1055         _sd_cd_info_t *cdi;
1056 
1057         cdi = &(_sd_cache_files[cd]);
1058         if (cdi->cd_io_head == NULL)
1059                 cdi->cd_io_head = cdi->cd_io_tail = cclist;
1060         else {
1061                 cdi->cd_io_tail->cc_dirty_link = cclist;
1062                 cdi->cd_io_tail = cclist;
1063         }
1064 }
1065 
1066 
1067 
1068 void
1069 _sd_async_flcent(_sd_cctl_t *cc_ent, dev_t rdev)
1070 {
1071         int dblk, len, sblk;
1072         int dirty;
1073         struct buf *bp;
1074         int cd;
1075 
1076         cd = CENTRY_CD(cc_ent);
1077 
1078         SDTRACE(ST_ENTER|SDF_FLCENT, cd, 0,
1079             BLK_TO_FBA_NUM(CENTRY_BLK(cc_ent)), 0, 0);
1080 #if defined(_SD_DEBUG_PATTERN)
1081         check_write_consistency(cc_ent);
1082 #endif
1083         if (cc_ent->cc_iocount)
1084                 SDALERT(SDF_FLCENT, cd, 0, BLK_TO_FBA_NUM(CENTRY_BLK(cc_ent)),
1085                     cc_ent->cc_iocount, 0);
1086         _sd_cc_iostatus_initiate(cc_ent);
1087         WAIT_CENTRY_PAGEIO(cc_ent, sdbc_flush_pageio);
1088 
1089         dirty = CENTRY_DIRTY(cc_ent);
1090 
1091         if (_SD_BMAP_ISFULL(dirty)) {
1092                 cc_ent->cc_iocount = 1;
1093                 dblk = BLK_TO_FBA_NUM(CENTRY_BLK(cc_ent));
1094                 bp = sd_alloc_iob(rdev, dblk, 1, B_WRITE);
1095                 sd_add_fba(bp, &cc_ent->cc_addr, 0, BLK_FBAS);
1096                 DATA_LOG(SDF_FLSHENT, cc_ent, 0, BLK_FBAS);
1097 
1098                 DTRACE_PROBE4(_sd_async_flcent_data1,
1099                     int, BLK_TO_FBA_NUM(CENTRY_BLK(cc_ent)),
1100                     int, BLK_FBAS, char *, *(int64_t *)(cc_ent->cc_data),
1101                     char *, *(int64_t *)(cc_ent->cc_data +
1102                     FBA_SIZE(BLK_FBAS) - 8));
1103                 cc_ent->cc_iocount = 1;
1104                 (void) sd_start_io(bp, _sd_cache_files[cd].cd_strategy,
1105                     _sd_flcent_ea, cc_ent);
1106                 DISK_FBA_WRITE(cd, BLK_FBAS);
1107                 /* increment number of bytes destaged to disk */
1108                 WRITE_DESTAGED(cd, BLK_FBAS);
1109         } else {
1110                 cc_ent->cc_iocount = SDBC_LOOKUP_DTCOUNT(dirty);
1111 
1112                 while (dirty) {
1113                         sblk = SDBC_LOOKUP_STPOS(dirty);
1114                         len = SDBC_LOOKUP_LEN(dirty);
1115                         SDBC_LOOKUP_MODIFY(dirty);
1116 
1117                         dblk = BLK_TO_FBA_NUM(CENTRY_BLK(cc_ent)) + sblk;
1118                         bp = sd_alloc_iob(rdev, dblk, 1, B_WRITE);
1119                         sd_add_fba(bp, &cc_ent->cc_addr, sblk, len);
1120                         DATA_LOG(SDF_FLSHENT, cc_ent, sblk, len);
1121 
1122                         DTRACE_PROBE4(_sd_async_flcent_data2, int,
1123                             BLK_TO_FBA_NUM(CENTRY_BLK(cc_ent)) + sblk,
1124                             int, len, char *,
1125                             *(int64_t *)(cc_ent->cc_data + FBA_SIZE(sblk)),
1126                             char *, *(int64_t *)(cc_ent->cc_data +
1127                             FBA_SIZE(sblk + len) - 8));
1128 
1129                         /* SDTRACE(ST_INFO|SDF_FLCENT, cd, len, dblk, 0, bp); */
1130 
1131                         (void) sd_start_io(bp, _sd_cache_files[cd].cd_strategy,
1132                             _sd_flcent_ea, cc_ent);
1133                         DISK_FBA_WRITE(cd, len);
1134                         /* increment number of bytes destaged to disk */
1135                         WRITE_DESTAGED(cd, len);
1136                 }
1137         }
1138         _sd_enqueue_io_pending(cd, cc_ent);
1139 
1140         SDTRACE(ST_EXIT|SDF_FLCENT, cd, 0, dblk, 0, 0);
1141 }
1142 
1143 static void
1144 _sd_process_pending(int cd)
1145 {
1146         _sd_cd_info_t *cdi;
1147         _sd_cctl_t *cc_ent, *cc_next;
1148         int dirty_enq;
1149         ss_centry_info_t *wctl;
1150         _sd_cctl_t *dirty_hd, **dirty_nxt;
1151         int sts, processed = 0;
1152 
1153         cdi = &(_sd_cache_files[cd]);
1154 
1155         SDTRACE(ST_ENTER|SDF_FLDONE, cd, 0,
1156             SDT_INV_BL, cdi->cd_info->sh_numio, 0);
1157 process_loop:
1158         if (cdi->cd_io_head == NULL) {
1159                 if (processed) {
1160                         mutex_enter(&cdi->cd_lock);
1161                         cdi->cd_info->sh_numio -= processed;
1162                         mutex_exit(&cdi->cd_lock);
1163                 }
1164                 SDTRACE(ST_EXIT|SDF_FLDONE, cd, 0,
1165                     SDT_INV_BL, cdi->cd_info->sh_numio, processed);
1166                 return;
1167         }
1168         cc_ent = cdi->cd_io_head;
1169         if ((sts = cc_ent->cc_iostatus) == _SD_IO_INITIATE) {
1170                 if (processed)  {
1171                         mutex_enter(&cdi->cd_lock);
1172                         cdi->cd_info->sh_numio -= processed;
1173                         mutex_exit(&cdi->cd_lock);
1174                 }
1175                 SDTRACE(ST_EXIT|SDF_FLDONE, cd, 0,
1176                     SDT_INV_BL, cdi->cd_info->sh_numio, processed);
1177                 return;
1178         }
1179         LINTUSED(sts);
1180 #if defined(_SD_DEBUG)
1181         if ((sts != _SD_IO_DONE) && (sts != _SD_IO_FAILED))
1182                 SDALERT(SDF_FLDONE, cd, 0,
1183                     BLK_TO_FBA_NUM(CENTRY_BLK(cc_ent)), 0, sts);
1184 #endif
1185 
1186         if ((cdi->cd_io_head = cc_ent->cc_dirty_link) == NULL)
1187                 cdi->cd_io_tail = NULL;
1188 
1189         cc_ent->cc_dirty_link = NULL;
1190         if (cc_ent->cc_iostatus == _SD_IO_FAILED &&
1191             _sd_process_failure(cc_ent))
1192                 goto process_loop;
1193 
1194         dirty_enq = 0;
1195         dirty_nxt = &(dirty_hd);
1196 
1197         DTRACE_PROBE1(_sd_process_pending_cd, int, cd);
1198 
1199         for (; cc_ent; cc_ent = cc_next) {
1200 
1201                 DTRACE_PROBE1(_sd_process_pending_cc_ent,
1202                     _sd_cctl_t *, cc_ent);
1203                 processed++;
1204                 cc_next = cc_ent->cc_dirty_next;
1205                 cc_ent->cc_dirty_next = NULL;
1206 
1207                 if (CENTRY_PINNED(cc_ent))
1208                         _sd_process_reflush(cc_ent);
1209 
1210                 /*
1211                  * Optimize for common case where block not inuse
1212                  * Grabbing cc_inuse is faster than cc_lock.
1213                  */
1214                 if (SET_CENTRY_INUSE(cc_ent))
1215                         goto must_lock;
1216 
1217                 cc_ent->cc_iostatus = _SD_IO_NONE;
1218                 if (CENTRY_DIRTY_PENDING(cc_ent)) {
1219                         cc_ent->cc_flag &= ~CC_PEND_DIRTY;
1220 
1221                         CLEAR_CENTRY_INUSE(cc_ent);
1222                         if (dirty_enq)
1223                                 dirty_nxt = &((*dirty_nxt)->cc_dirty_link);
1224                         (*dirty_nxt) = cc_ent;
1225                         dirty_enq++;
1226                         continue;
1227                 }
1228                 cc_ent->cc_dirty = 0;
1229                 wctl = cc_ent->cc_write;
1230                 cc_ent->cc_write = NULL;
1231                 cc_ent->cc_flag &= ~(CC_PINNABLE);
1232 
1233 
1234                 wctl->sc_dirty = 0;
1235                 SSOP_SETCENTRY(sdbc_safestore, wctl);
1236                 SSOP_DEALLOCRESOURCE(sdbc_safestore, wctl->sc_res);
1237 
1238                 /*
1239                  * if this was a QHEAD cache block, then
1240                  * _sd_centry_release() did not requeue it as
1241                  * it was dirty.  Requeue it now.
1242                  */
1243 
1244                 if (CENTRY_QHEAD(cc_ent))
1245                         if (sdbc_use_dmchain) {
1246 
1247                                 /* attempt to que head */
1248                                 if (cc_ent->cc_alloc_size_dm) {
1249 
1250                                         sdbc_requeue_head_dm_try(cc_ent);
1251                                 }
1252                         } else
1253                                 _sd_requeue_head(cc_ent);
1254 
1255                 CLEAR_CENTRY_INUSE(cc_ent);
1256                 continue;
1257 
1258                 /*
1259                  * Block is inuse, must take cc_lock
1260                  * if DIRTY_PENDING, must re-issue
1261                  */
1262         must_lock:
1263                 /* was FAST */
1264                 mutex_enter(&cc_ent->cc_lock);
1265                 cc_ent->cc_iostatus = _SD_IO_NONE;
1266                 if (CENTRY_DIRTY_PENDING(cc_ent)) {
1267                         cc_ent->cc_flag &= ~CC_PEND_DIRTY;
1268                         /* was FAST */
1269                         mutex_exit(&cc_ent->cc_lock);
1270                         if (dirty_enq)
1271                                 dirty_nxt = &((*dirty_nxt)->cc_dirty_link);
1272                         (*dirty_nxt) = cc_ent;
1273                         dirty_enq++;
1274                         continue;
1275                 }
1276                 /*
1277                  * clear dirty bits, if block no longer inuse release cc_write
1278                  */
1279                 cc_ent->cc_dirty = 0;
1280                 if (SET_CENTRY_INUSE(cc_ent) == 0) {
1281 
1282                         wctl = cc_ent->cc_write;
1283                         cc_ent->cc_write = NULL;
1284                         cc_ent->cc_flag &= ~(CC_PINNABLE);
1285                         /* was FAST */
1286                         mutex_exit(&cc_ent->cc_lock);
1287 
1288 
1289                         wctl->sc_dirty = 0;
1290                         SSOP_SETCENTRY(sdbc_safestore, wctl);
1291                         SSOP_DEALLOCRESOURCE(sdbc_safestore, wctl->sc_res);
1292 
1293                         /*
1294                          * if this was a QHEAD cache block, then
1295                          * _sd_centry_release() did not requeue it as
1296                          * it was dirty.  Requeue it now.
1297                          */
1298 
1299                         if (CENTRY_QHEAD(cc_ent))
1300                                 if (sdbc_use_dmchain) {
1301 
1302                                         /* attempt to que head */
1303                                         if (cc_ent->cc_alloc_size_dm) {
1304                                                 sdbc_requeue_head_dm_try
1305                                                     (cc_ent);
1306                                         }
1307                                 } else
1308                                         _sd_requeue_head(cc_ent);
1309                         CLEAR_CENTRY_INUSE(cc_ent);
1310                 } else {
1311                         /* was FAST */
1312                         mutex_exit(&cc_ent->cc_lock);
1313                 }
1314         }
1315 
1316         if (dirty_enq)
1317                 _sd_enqueue_dirty_chain(cd, dirty_hd, (*dirty_nxt), dirty_enq);
1318 
1319         goto process_loop;
1320 }
1321 
1322 
1323 static void
1324 _sd_flcent_ea(blind_t xcc_ent, nsc_off_t fba_pos, nsc_size_t fba_len, int error)
1325 {
1326         _sd_cctl_t *cc_ent = (_sd_cctl_t *)xcc_ent;
1327         int cd;
1328         nsc_off_t dblk;
1329 
1330         _sd_cd_info_t *cdi;
1331 
1332         cd = CENTRY_CD(cc_ent);
1333         dblk = BLK_TO_FBA_NUM(CENTRY_BLK(cc_ent));
1334         cdi = &(_sd_cache_files[cd]);
1335 
1336         SDTRACE(ST_ENTER|SDF_FLCENT_EA, cd, 0, dblk, 2, (unsigned long)cc_ent);
1337 
1338         if (error) {
1339                 if (cdi->cd_info->sh_failed == 0) {
1340                         cdi->cd_info->sh_failed = 1;
1341                         cmn_err(CE_WARN, "!sdbc(_sd_flcent_ea) "
1342                             "Disk write failed cd %d (%s): err %d",
1343                             cd, cdi->cd_info->sh_filename, error);
1344                 }
1345         }
1346 
1347         /* was FAST */
1348         mutex_enter(&cc_ent->cc_lock);
1349         if (--(cc_ent->cc_iocount) != 0) {
1350                 /* more io's to complete before the cc_ent is done. */
1351 
1352                 if (cc_ent->cc_iocount < 0) {
1353                         /* was FAST */
1354                         mutex_exit(&cc_ent->cc_lock);
1355                         SDALERT(SDF_FLCENT_EA, cd, 0,
1356                             dblk, cc_ent->cc_iocount, 0);
1357                 } else {
1358                         /* was FAST */
1359                         mutex_exit(&cc_ent->cc_lock);
1360                 }
1361                 SDTRACE(ST_EXIT|SDF_FLCENT_EA, cd, 0, dblk, 2,
1362                     (unsigned long)cc_ent);
1363 
1364                 DTRACE_PROBE(_sd_flcent_ea_end);
1365                 return;
1366         }
1367         /* was FAST */
1368         mutex_exit(&cc_ent->cc_lock);
1369 
1370         DATA_LOG(SDF_FLEA, cc_ent, BLK_FBA_OFF(fba_pos), fba_len);
1371 
1372         DTRACE_PROBE4(_sd_flcent_ea_data, uint64_t, ((uint64_t)
1373             BLK_TO_FBA_NUM(CENTRY_BLK(cc_ent) + BLK_FBA_OFF(fba_pos))),
1374             uint64_t, (uint64_t)fba_len, char *,
1375             *(int64_t *)(cc_ent->cc_data + FBA_SIZE(BLK_FBA_OFF(fba_pos))),
1376             char *, *(int64_t *)(cc_ent->cc_data +
1377             FBA_SIZE(BLK_FBA_OFF(fba_pos) + fba_len) - 8));
1378 
1379         /*
1380          * All io's are done for this cc_ent.
1381          * Clear the pagelist io flag.
1382          */
1383         CLEAR_CENTRY_PAGEIO(cc_ent);
1384 
1385         if (error)
1386                 cc_ent->cc_iostatus = _SD_IO_FAILED;
1387         else
1388                 cc_ent->cc_iostatus = _SD_IO_DONE;
1389 
1390         SDTRACE(ST_EXIT|SDF_FLCENT_EA, cd, 0, dblk, 2, (unsigned long)cc_ent);
1391 
1392 }
1393 
1394 
1395 
1396 static void
1397 _sd_flclist_ea(blind_t xcc_ent, nsc_off_t fba_pos, nsc_size_t fba_len,
1398     int error)
1399 {
1400         _sd_cctl_t *cc_ent = (_sd_cctl_t *)xcc_ent;
1401         _sd_cctl_t *first_cc = cc_ent;
1402         _sd_cd_info_t *cdi;
1403         int cd;
1404         nsc_off_t dblk;
1405 
1406         cd = CENTRY_CD(cc_ent);
1407         dblk = BLK_TO_FBA_NUM(CENTRY_BLK(cc_ent));
1408         cdi = &(_sd_cache_files[cd]);
1409 
1410         SDTRACE(ST_ENTER|SDF_FLCLIST_EA, cd, 0, dblk, 1, (unsigned long)cc_ent);
1411 
1412         if (error) {
1413                 if (cdi->cd_info->sh_failed == 0) {
1414                         cdi->cd_info->sh_failed = 1;
1415                         cmn_err(CE_WARN, "!sdbc(_sd_flclist_ea) "
1416                             "Disk write failed cd %d (%s): err %d",
1417                             cd, cdi->cd_info->sh_filename, error);
1418                 }
1419         }
1420         /*
1421          * Important: skip the first cc_ent in the list. Marking this will
1422          * make the writer think the io is done,  though the rest of the
1423          * chain have not been processed here. so mark the first cc_ent
1424          * last. Optimization, so as not to use locks
1425          */
1426 
1427         cc_ent = cc_ent->cc_dirty_next;
1428         while (cc_ent) {
1429                 DTRACE_PROBE2(_sd_flclist_ea, _sd_cctl_t *, cc_ent,
1430                     int, CENTRY_CD(cc_ent));
1431 
1432                 if (cc_ent->cc_iocount != 1)
1433                         SDALERT(SDF_FLCLIST_EA, cd, 0,
1434                             BLK_TO_FBA_NUM(CENTRY_BLK(cc_ent)),
1435                             cc_ent->cc_iocount, 0);
1436                 cc_ent->cc_iocount = 0;
1437 
1438                 /*
1439                  * Clear the pagelist io flag.
1440                  */
1441                 CLEAR_CENTRY_PAGEIO(cc_ent);
1442 
1443                 if (error)
1444                         cc_ent->cc_iostatus = _SD_IO_FAILED;
1445                 else
1446                         cc_ent->cc_iostatus = _SD_IO_DONE;
1447                 if (cc_ent->cc_dirty_next) {
1448                         DATA_LOG(SDF_FLSTEA, cc_ent, 0, BLK_FBAS);
1449 
1450                         DTRACE_PROBE4(_sd_flclist_ea_data1, uint64_t,
1451                             BLK_TO_FBA_NUM(CENTRY_BLK(cc_ent)),
1452                             int, BLK_FBAS, char *,
1453                             *(int64_t *)(cc_ent->cc_data),
1454                             char *, *(int64_t *)(cc_ent->cc_data +
1455                             FBA_SIZE(BLK_FBAS) - 8));
1456                 } else {
1457                         DATA_LOG(SDF_FLSTEA, cc_ent, 0,
1458                             BLK_FBA_OFF(fba_pos + fba_len));
1459 
1460                         DTRACE_PROBE4(_sd_flclist_ea_data2, uint64_t,
1461                             (uint64_t)BLK_TO_FBA_NUM(CENTRY_BLK(cc_ent)),
1462                             uint64_t, (uint64_t)BLK_FBA_OFF(fba_pos + fba_len),
1463                             char *, *(int64_t *)(cc_ent->cc_data),
1464                             char *, *(int64_t *)(cc_ent->cc_data +
1465                             FBA_SIZE(BLK_FBA_OFF(fba_pos + fba_len)) - 8));
1466                 }
1467 
1468                 cc_ent = cc_ent->cc_dirty_next;
1469         }
1470 
1471         /*
1472          * Now process the first cc_ent in the list.
1473          */
1474         cc_ent = first_cc;
1475         DATA_LOG(SDF_FLSTEA, cc_ent, BLK_FBA_OFF(fba_pos),
1476             BLK_FBAS - BLK_FBA_OFF(fba_pos));
1477 
1478         DTRACE_PROBE4(_sd_flclist_ea_data3, uint64_t,
1479             (uint64_t)fba_pos, int, BLK_FBAS - BLK_FBA_OFF(fba_pos),
1480             char *, *(int64_t *)(cc_ent->cc_data +
1481             FBA_SIZE(BLK_FBA_OFF(fba_pos))), char *,
1482             *(int64_t *)(cc_ent->cc_data + FBA_SIZE(BLK_FBA_OFF(fba_pos) +
1483             BLK_FBAS - BLK_FBA_OFF(fba_pos)) - 8));
1484 
1485         cc_ent->cc_iocount = 0;
1486 
1487         if (cc_ent->cc_anon_addr.sa_virt) {
1488                 kmem_free(cc_ent->cc_anon_addr.sa_virt, cc_ent->cc_anon_len);
1489                 cc_ent->cc_anon_addr.sa_virt = NULL;
1490                 cc_ent->cc_anon_len = 0;
1491         }
1492 
1493         /*
1494          * Clear the pagelist io flag.
1495          */
1496         CLEAR_CENTRY_PAGEIO(cc_ent);
1497 
1498         if (error)
1499                 cc_ent->cc_iostatus = _SD_IO_FAILED;
1500         else
1501                 cc_ent->cc_iostatus = _SD_IO_DONE;
1502 
1503         SDTRACE(ST_EXIT|SDF_FLCLIST_EA, cd, 0, dblk, 1, (unsigned long)cc_ent);
1504 }
1505 
1506 
1507 static void
1508 _sd_mark_failed(_sd_cctl_t *cclist)
1509 {
1510         _sd_cctl_t *cc_ent;
1511         int cd;
1512 
1513         cd = CENTRY_CD(cclist);
1514         cc_ent = cclist;
1515         while (cc_ent) {
1516                 cc_ent->cc_iostatus = _SD_IO_FAILED;
1517                 cc_ent = cc_ent->cc_dirty_next;
1518         }
1519         _sd_enqueue_io_pending(cd, cclist);
1520 }
1521 
1522 
1523 
1524 /*
1525  * Fail single chain of cache blocks, updating numfail/numio counts.
1526  * For dual-copy, log & clear PINNED, fall thru to regular processing.
1527  */
1528 int
1529 _sd_process_failure(_sd_cctl_t *cc_ent)
1530 {
1531         int cd, num;
1532         _sd_cctl_t *cc_chain;
1533         _sd_cd_info_t *cdi;
1534 
1535         cd = CENTRY_CD(cc_ent);
1536         cdi = &(_sd_cache_files[cd]);
1537 
1538         cc_chain = cc_ent;
1539 
1540         if (!cdi->cd_global->sv_pinned) {
1541                 cdi->cd_global->sv_pinned = _SD_SELF_HOST;
1542                 SSOP_SETVOL(sdbc_safestore, cdi->cd_global);
1543         }
1544 
1545         for (num = 0; cc_ent; cc_ent = cc_ent->cc_dirty_next) {
1546                 num++;
1547                 /* was FAST */
1548                 mutex_enter(&cc_ent->cc_lock);
1549                 cc_ent->cc_flag |= (CC_PEND_DIRTY |
1550                     (CENTRY_PINNABLE(cc_ent) ? CC_PINNED : 0));
1551                 if (cc_ent->cc_write) {
1552                         cc_ent->cc_write->sc_flag = cc_ent->cc_flag;
1553                         SSOP_SETCENTRY(sdbc_safestore, cc_ent->cc_write);
1554                 }
1555                 mutex_exit(&cc_ent->cc_lock);
1556                 if (CENTRY_PINNED(cc_ent))
1557                         nsc_pinned_data(cdi->cd_iodev,
1558                             BLK_TO_FBA_NUM(CENTRY_BLK(cc_ent)), BLK_FBAS);
1559         }
1560 
1561         /*
1562          *  In normal processing we wouldn't need a lock here as all i/o
1563          *  is single threaded by cd. However during failover blocks can
1564          *  be failing from real i/o and as soon as the disk is marked bad
1565          *  the failover code which is furiously cloning safe-store into
1566          *  more blocks will short circuit to here (see _sd_ft_clone)
1567          *  and two threads can be executing in here simultaneously.
1568          */
1569         mutex_enter(&cdi->cd_lock);
1570         cc_chain->cc_dirty_link = cdi->cd_fail_head;
1571         cdi->cd_fail_head = cc_chain;
1572         cdi->cd_info->sh_numfail += num;
1573         cdi->cd_info->sh_numio   -= num;
1574         mutex_exit(&cdi->cd_lock);
1575         return (1);             /* blocks are failed */
1576 }
1577 
1578 
1579 static void
1580 _sd_process_reflush(_sd_cctl_t *cc_ent)
1581 {
1582         int cd;
1583 
1584         if (CENTRY_PINNABLE(cc_ent)) {
1585                 cd = CENTRY_CD(cc_ent);
1586                 nsc_unpinned_data(_sd_cache_files[cd].cd_iodev,
1587                     BLK_TO_FBA_NUM(CENTRY_BLK(cc_ent)), BLK_FBAS);
1588         }
1589 
1590         /* was FAST */
1591         mutex_enter(&cc_ent->cc_lock);
1592         cc_ent->cc_flag &= ~CC_PINNED;
1593         /* was FAST */
1594         mutex_exit(&cc_ent->cc_lock);
1595 }
1596 
1597 
1598 
1599 /*
1600  * cd_write_thread -- flush dirty buffers.
1601  *
1602  * ARGUMENTS:
1603  *
1604  *  cd - cache descriptor
1605  *
1606  * USAGE:
1607  *  called by cd's writer thread, returns when no more entries
1608  *
1609  * NOTE: if sdbc is being shutdown (for powerfail) then we will
1610  * process pending i/o's but issue no more new ones.
1611  */
1612 static int SD_LOOP_DELAY = 32;
1613 #if !defined(m88k) && !defined(sun)
1614 static int SD_WRITE_HIGH = 255; /* cache blocks */
1615 #endif
1616 
1617 static void
1618 cd_write_thread(int cd)
1619 {
1620         _sd_cctl_t *cc_list, *dirty_head, *last_chain;
1621         _sd_cd_info_t *cdi;
1622 
1623         cdi = &(_sd_cache_files[cd]);
1624         if (!FILE_OPENED(cd)) {
1625                 cdi->cd_writer = _SD_WRITER_NONE;
1626                 return;
1627         }
1628         cdi->cd_writer = _SD_WRITER_RUNNING;
1629 
1630         _sd_process_pending(cd);
1631 
1632         if (_sdbc_shutdown_in_progress) {
1633                 cdi->cd_write_inprogress = 0;
1634                 cdi->cd_writer = _SD_WRITER_NONE;
1635                 return;
1636         }
1637 #if !defined(m88k) && !defined(sun)
1638         if (cdi->cd_info->sh_numio > SD_WRITE_HIGH) {
1639                 /* let I/Os complete before issuing more */
1640                 cdi->cd_writer = _SD_WRITER_NONE;
1641                 return;
1642         }
1643 #endif
1644 
1645 #ifdef DEBUG
1646         if (!_sdbc_flush_flag) { /* hang the flusher for testing */
1647                 cdi->cd_write_inprogress = 0;
1648                 cdi->cd_writer = _SD_WRITER_NONE;
1649                 return;
1650         }
1651 #endif
1652 
1653         dirty_head = cdi->cd_dirty_head;
1654         if (dirty_head && (dirty_head != cdi->cd_lastchain_ptr ||
1655             ++cdi->cd_info->sh_flushloop > SD_LOOP_DELAY)) {
1656                 cdi->cd_info->sh_flushloop = 0;
1657                 /* was FAST */
1658                 mutex_enter(&cdi->cd_lock);
1659                 if (SD_LOOP_DELAY == 0 ||
1660                     dirty_head == cdi->cd_lastchain_ptr) {
1661                         last_chain = NULL;
1662                         cdi->cd_dirty_head = NULL;
1663                         cdi->cd_dirty_tail = NULL;
1664                         cdi->cd_info->sh_numio += cdi->cd_info->sh_numdirty;
1665                         cdi->cd_info->sh_numdirty = 0;
1666                 } else
1667 #if !defined(m88k) && !defined(sun)
1668                 if (cdi->cd_info->sh_numdirty > SD_WRITE_HIGH) {
1669                         int count = 0;
1670                         for (last_chain = dirty_head; last_chain;
1671                             last_chain = last_chain->cc_dirty_next)
1672                                 count++;
1673                         last_chain = dirty_head->cc_dirty_link;
1674                         cdi->cd_dirty_head = last_chain;
1675                         /* cdi->cd_dirty_tail is unchanged */
1676                         cdi->cd_info->sh_numio += count;
1677                         cdi->cd_info->sh_numdirty -= count;
1678                 } else
1679 #endif
1680                 {
1681                         last_chain = cdi->cd_lastchain_ptr;
1682                         cdi->cd_dirty_head = last_chain;
1683                         cdi->cd_dirty_tail = last_chain;
1684                         cdi->cd_info->sh_numio += cdi->cd_info->sh_numdirty -
1685                             cdi->cd_lastchain;
1686                         cdi->cd_info->sh_numdirty = cdi->cd_lastchain;
1687                 }
1688                 /* was FAST */
1689                 mutex_exit(&cdi->cd_lock);
1690 
1691                 while (((cc_list = dirty_head) != NULL) &&
1692                     cc_list != last_chain) {
1693                         dirty_head = cc_list->cc_dirty_link;
1694                         cc_list->cc_dirty_link = NULL;
1695                         if (cdi->cd_info->sh_failed)
1696                                 _sd_mark_failed(cc_list);
1697                         else if (cc_list->cc_dirty_next == NULL)
1698                                 _sd_async_flcent(cc_list, cdi->cd_crdev);
1699                         else
1700                                 _sd_async_flclist(cc_list, cdi->cd_crdev);
1701                         cdi->cd_write_inprogress++;
1702                 }
1703         }
1704         cdi->cd_write_inprogress = 0;
1705         cdi->cd_writer = _SD_WRITER_NONE;
1706 }
1707 
1708 /*
1709  * cd_writer -- spawn new writer if not running already
1710  *      called after enqueing the dirty blocks
1711  */
1712 int
1713 cd_writer(int cd)
1714 {
1715         _sd_cd_info_t *cdi;
1716         nstset_t *tset = NULL;
1717         nsthread_t *t;
1718 
1719 #if defined(_SD_USE_THREADS)
1720         tset = _sd_ioset;
1721 #endif  /* _SD_USE_THREADS */
1722 
1723         cdi = &(_sd_cache_files[cd]);
1724 
1725         if (cdi->cd_writer)
1726                 return (0);
1727 
1728         if (tset == NULL) {
1729                 _sd_unblock(&_sd_flush_cv);
1730                 return (0);
1731         }
1732 
1733         if (cdi->cd_writer || xmem_bu(_SD_WRITER_CREATE, &cdi->cd_writer))
1734                 return (0);
1735 
1736         t = nst_create(tset, cd_write_thread, (blind_t)(unsigned long)cd, 0);
1737         if (t)
1738                 return (1);
1739 
1740         cmn_err(CE_WARN, "!sdbc(cd_writer) cd %d nst_create error", cd);
1741         cdi->cd_writer = _SD_WRITER_NONE;
1742         return (-1);
1743 }
1744 
1745 /*
1746  * _sd_ccent_rd - add appropriate parts of cc_ent to struct buf.
1747  *      optimized not to read dirty FBAs from disk.
1748  *
1749  * ARGUMENTS:
1750  *
1751  * cc_ent   - single cache block
1752  * wanted   - bitlist of FBAs that need to be read
1753  * bp   - struct buf to extend
1754  *
1755  * USAGE:
1756  *      Called for each dirty in a read I/O.
1757  *      The bp must be sized to allow for one entry per FBA that needs
1758  *      to be read (see _sd_doread()).
1759  */
1760 
1761 void
1762 _sd_ccent_rd(_sd_cctl_t *cc_ent, uint_t wanted, struct buf *bp)
1763 {
1764         int index, offset = 0, size = 0;
1765         int state, state1 = -3; /* state1 is previous state */
1766         sd_addr_t *addr = NULL;
1767         uint_t dirty;
1768 
1769         dirty  = CENTRY_DIRTY(cc_ent);
1770         for (index = 0; index < BLK_FBAS; index++) {
1771                 if (!_SD_BIT_ISSET(wanted, index))
1772                         continue;
1773                 state = _SD_BIT_ISSET(dirty, index);
1774                 if (state == state1) /* same state, expand size */
1775                         size++;
1776                 else {
1777                         if (state1 != -3) /* not first FBA */
1778                                 sd_add_fba(bp, addr, offset, size);
1779                         state1 = state; /* new previous state */
1780                         offset = index;
1781                         size  = 1;
1782                         if (state) {            /* dirty, don't overwrite */
1783                                 addr = NULL;
1784                         } else {
1785                                 addr = &cc_ent->cc_addr;
1786                         }
1787                 }
1788         }
1789         if (state1 != -3)
1790                 sd_add_fba(bp, addr, offset, size);
1791 }
1792 
1793 
1794 
1795 int _SD_WR_THRESHOLD = 1000;
1796 static void
1797 _sd_flush_thread(void)
1798 {
1799         int cd;
1800         _sd_cd_info_t *cdi;
1801         _sd_shared_t *shi;
1802         int cnt;
1803         int short_sleep = 0;
1804         long tics;
1805         int waiting_for_idle = 0;
1806         int check_count = 0;
1807         int pending, last_pending;
1808         int SD_LONG_SLEEP_TICS, SD_SHORT_SLEEP_TICS;
1809         nstset_t *tset = NULL;
1810         nsthread_t *t;
1811 
1812 #if defined(_SD_USE_THREADS)
1813         tset = _sd_ioset;
1814 #endif  /* _SD_USE_THREADS */
1815 
1816         mutex_enter(&_sd_cache_lock);
1817         _sd_cache_dem_cnt++;
1818         mutex_exit(&_sd_cache_lock);
1819 
1820         /* .2 seconds */
1821         SD_LONG_SLEEP_TICS = drv_usectohz(200000);
1822         /* .02 seconds */
1823         SD_SHORT_SLEEP_TICS = drv_usectohz(20000);
1824 
1825         /* CONSTCOND */
1826         while (1) {
1827                 if (_sd_flush_exit == 0) {
1828                         /*
1829                          * wait until no i/o's pending (on two successive
1830                          * iterations) or we see no progress after
1831                          * GIVE_UP_WAITING total sleeps.
1832                          */
1833 /* at most 5*128 ticks about 6 seconds of no progress */
1834 #define GIVE_UP_WAITING 128
1835                         if (waiting_for_idle) {
1836                                 pending = _sd_pending_iobuf();
1837                                 /*LINTED*/
1838                                 if (pending == last_pending) {
1839                                         if (pending != 0)
1840                                                 check_count++;
1841                                 } else
1842                                         check_count = 0;
1843                                 if ((last_pending == 0 && (pending == 0)) ||
1844                                     (check_count == GIVE_UP_WAITING)) {
1845                                         mutex_enter(&_sd_cache_lock);
1846                                         _sd_cache_dem_cnt--;
1847                                         mutex_exit(&_sd_cache_lock);
1848                                         if (check_count == GIVE_UP_WAITING)
1849                                                 cmn_err(CE_WARN,
1850                                                     "!_sd_flush_thread "
1851                                                     "exiting with %d IOs "
1852                                                     "pending", pending);
1853                                         return;
1854                                 }
1855                                 last_pending = pending;
1856                         } else {
1857                                 waiting_for_idle = 1;
1858                                 last_pending = _sd_pending_iobuf();
1859                         }
1860                 }
1861 
1862                 /*
1863                  * Normally wakeup every SD_LONG_SLEEP_TICS to flush.
1864                  */
1865 
1866                 if (!short_sleep) {
1867                         ssioc_stats_t ss_stats;
1868                         int rc;
1869 
1870                         if ((rc = SSOP_CTL(sdbc_safestore, SSIOC_STATS,
1871                             (uintptr_t)&ss_stats)) == 0) {
1872 
1873                                 if (ss_stats.wq_inq < _SD_WR_THRESHOLD)
1874                                         short_sleep = 1;
1875                         } else {
1876                                 if (rc == SS_ERR)
1877                                         cmn_err(CE_WARN,
1878                                             "!sdbc(_sd_flush_thread)"
1879                                             "cannot get safestore inq");
1880                         }
1881                 }
1882 
1883                 if (short_sleep)
1884                         tics = SD_SHORT_SLEEP_TICS;
1885                 else
1886                         tics = SD_LONG_SLEEP_TICS;
1887 
1888                 _sd_timed_block(tics, &_sd_flush_cv);
1889                 cd = 0;
1890                 cnt = short_sleep = 0;
1891                 for (; (cnt < _sd_cache_stats->st_loc_count) &&
1892                     (cd < sdbc_max_devs); cd++) {
1893                         cdi = &_sd_cache_files[cd];
1894                         shi = cdi->cd_info;
1895 
1896                         if (shi == NULL || (shi->sh_failed == 2))
1897                                 continue;
1898 
1899                         if (!(shi->sh_alloc & CD_ALLOCATED) ||
1900                             !(shi->sh_flag & CD_ATTACHED))
1901                                 continue;
1902                         cnt++;
1903                         if (cdi->cd_writer)
1904                                 continue;
1905                         if (!_SD_CD_WBLK_USED(cd)) {
1906                                 if (cdi->cd_failover == 2) {
1907                                         nsc_release(cdi->cd_rawfd);
1908                                         cdi->cd_failover = 0;
1909                                 }
1910                                 continue;
1911                         }
1912                         if (cdi->cd_writer ||
1913                             xmem_bu(_SD_WRITER_CREATE, &cdi->cd_writer))
1914                                 continue;
1915 
1916                         t = NULL;
1917                         if (tset) {
1918                                 t = nst_create(tset,
1919                                     cd_write_thread, (blind_t)(unsigned long)cd,
1920                                     0);
1921                         }
1922                         if (!t)
1923                                 cd_write_thread(cd);
1924                 }
1925         }
1926 }
1927 
1928 
1929 #if defined(_SD_DEBUG_PATTERN)
1930 check_write_consistency(cc_entry)
1931         _sd_cctl_t *cc_entry;
1932 {
1933         int *data;
1934         nsc_off_t fba_pos;
1935         int i, dirty_bl;
1936 
1937         while (cc_entry) {
1938                 dirty_bl = CENTRY_DIRTY(cc_entry);
1939                 if (dirty_bl == 0) {
1940                         cmn_err(CE_WARN, "!check: no dirty");
1941                 }
1942                 data = (int *)cc_entry->cc_data;
1943                 fba_pos = BLK_TO_FBA_NUM(CENTRY_BLK(cc_entry));
1944 
1945                 for (i = 0; i < 8; i++, data += 128, fba_pos++) {
1946                         if (dirty_bl & 1) {
1947                                 if (*((int *)(data + 2)) != fba_pos) {
1948                                         cmn_err(CE_WARN, "!wr exp %" NSC_SZFMT
1949                                             " got %x", fba_pos, *(data + 2));
1950                                 }
1951                         }
1952                         dirty_bl >>= 1;
1953                 }
1954                 cc_entry = cc_entry->cc_dirty_next;
1955         }
1956 }
1957 
1958 check_buf_consistency(handle, rw)
1959         _sd_buf_handle_t *handle;
1960         char *rw;
1961 {
1962         _sd_bufvec_t *bvec1;
1963         int *data;
1964         nsc_off_t fpos;
1965         nsc_size_t fba_len, i;
1966         nsc_size_t len = 0;
1967 
1968         bvec1 = handle->bh_bufvec;
1969         fpos =  handle->bh_fba_pos;
1970 
1971         while (bvec1->bufaddr) {
1972                 fba_len = FBA_NUM(bvec1->buflen);
1973                 data = (int *)bvec1->bufaddr;
1974                 for (i = 0; i < fba_len; i++, data += 128, fpos++) {
1975                         len++;
1976                         if (*(data+2) != fpos) {
1977                                 cmn_err(CE_WARN, "!%s exp%" NSC_SZFMT " got%x",
1978                                     rw, fpos, *(data + 2));
1979                         }
1980                 }
1981                 bvec1++;
1982         }
1983         if (handle->bh_fba_len != len) {
1984                 cmn_err(CE_WARN, "!len %" NSC_SZFMT " real %" NSC_SZFMT, len,
1985                     handle->bh_fba_len);
1986         }
1987 }
1988 #endif
1989 
1990 int
1991 _sdbc_wait_pending(void)
1992 {
1993         int tries, pend, last;
1994 
1995         tries = 0;
1996         last  = _sd_pending_iobuf();
1997         while ((pend = _sd_pending_iobuf()) > 0) {
1998                 if (pend == last) {
1999                         if (++tries > 60) {
2000                                 return (pend);
2001                         }
2002                 } else {
2003                         pend = last;
2004                         tries = 0;
2005                 }
2006                 delay(HZ);
2007         }
2008         return (0);
2009 }