1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  */
  25 
  26 #include <sys/types.h>
  27 #include <sys/ksynch.h>
  28 #include <sys/cmn_err.h>
  29 #include <sys/kmem.h>
  30 #include <sys/ddi.h>
  31 #include <sys/nsc_thread.h>
  32 #include <sys/nsctl/nsctl.h>
  33 
  34 #include <sys/sdt.h>              /* dtrace is S10 or later */
  35 
  36 #include "sd_bcache.h"
  37 #include "sd_hash.h"
  38 
  39 #if defined(_SD_DEBUG)
  40 int _sd_hash_max_inlist = 0;
  41 #endif
  42 
  43 
  44 #define _SD_HB_LOCKS 32
  45 static kmutex_t _sd_hb_locks[_SD_HB_LOCKS];
  46 
  47 
  48 /*
  49  * _sdbc_hash_load - allocate all the locks for buckets.
  50  *
  51  *
  52  */
  53 int
  54 _sdbc_hash_load(void)
  55 {
  56         int i;
  57         for (i = 0; i < _SD_HB_LOCKS; i++) {
  58                 mutex_init(&_sd_hb_locks[i], NULL, MUTEX_DRIVER, NULL);
  59         }
  60         return (0);
  61 }
  62 
  63 /*
  64  * _sdbc_hash_unload - free all the locks for buckets.
  65  *
  66  *
  67  */
  68 void
  69 _sdbc_hash_unload(void)
  70 {
  71         int i;
  72         for (i = 0; i < _SD_HB_LOCKS; i++) {
  73                 mutex_destroy(&_sd_hb_locks[i]);
  74         }
  75 }
  76 
  77 
  78 /*
  79  * _sdbc_hash_configure - create a hash table
  80  *
  81  * ARGUMENTS:
  82  *      num_ents - Number of entries (or hash buckets)
  83  *      htype    - Type of memory to allocate.
  84  *
  85  * RETURNS:
  86  *      The address of the hash table just created
  87  *      or zero in the event of failure.
  88  *
  89  * USAGE:
  90  *      This routine rounds of the number of entries to the next higher
  91  *      power of 2. Allocate the hash buckets and initializes the locks
  92  *      and returns the hash table that is created.
  93  *      It is the caller's responsibility to save the hash_table and pass
  94  *      it as a key for future accesses to the hash.
  95  *      It is also the caller's responsibility to destroy the hash table
  96  *      when appropriate.
  97  */
  98 
  99 
 100 _sd_hash_table_t *
 101 _sdbc_hash_configure(int num_ents)
 102 {
 103         _sd_hash_table_t *hash_table;
 104         _sd_hash_bucket_t *bucket;
 105         int i;
 106         int get_high_bit(int);
 107 
 108         if ((hash_table = (_sd_hash_table_t *)
 109                 nsc_kmem_zalloc(sizeof (_sd_hash_table_t),
 110                                 KM_SLEEP, sdbc_hash_mem)) == NULL)
 111                 return (NULL);
 112 
 113         hash_table->ht_bits = get_high_bit(num_ents);
 114         hash_table->ht_size = (1 << hash_table->ht_bits);
 115 
 116         /*
 117          * this is where we compute the mask used in the hash function
 118          * the ht_nmask is basically an not of ht_mask used in hash
 119          * function.
 120          */
 121         hash_table->ht_mask = (hash_table->ht_size - 1);
 122         hash_table->ht_nmask = (~0 & ~(hash_table->ht_mask));
 123 
 124         if ((hash_table->ht_buckets = (_sd_hash_bucket_t *)
 125                 nsc_kmem_zalloc(hash_table->ht_size *
 126                                 sizeof (_sd_hash_bucket_t), KM_SLEEP,
 127                                 sdbc_hash_mem)) == NULL)
 128                 return (NULL);
 129 
 130         for (i = 0; i < (hash_table->ht_size); i++) {
 131                 bucket = (hash_table->ht_buckets + i);
 132 
 133                 bucket->hb_lock = &_sd_hb_locks[i % _SD_HB_LOCKS];
 134                 bucket->hb_head = bucket->hb_tail = NULL;
 135                 bucket->hb_inlist = 0;
 136         }
 137 
 138         return (hash_table);
 139 }
 140 
 141 
 142 /*
 143  * _sdbc_hash_deconfigure - deconfigure a hash table
 144  *
 145  * ARGUMENTS:
 146  *      hash_table - hash table that was created earlier on.
 147  *
 148  * RETURNS:
 149  *      None.
 150  *
 151  * USAGE:
 152  *      this routine deallocates memory that was allocated during the
 153  *      hash create.
 154  */
 155 
 156 void
 157 _sdbc_hash_deconfigure(_sd_hash_table_t *hash_table)
 158 {
 159         if (!hash_table)
 160                 return;
 161 
 162         nsc_kmem_free(hash_table->ht_buckets,
 163                         hash_table->ht_size * sizeof (_sd_hash_bucket_t));
 164 
 165         nsc_kmem_free(hash_table, sizeof (_sd_hash_table_t));
 166 }
 167 
 168 static int _sd_forced_hash_miss;
 169 static int _sd_hash_collision;
 170 
 171 
 172 /*
 173  * _sd_hash_search - search the hash table for an entry
 174  *
 175  * ARGUMENTS:
 176  *      cd         - device that we are interested in.
 177  *      block_num  - block number we are interested in.
 178  *      hash_table - hash table to search in.
 179  *
 180  * RETURNS:
 181  *      returns a hash header if a match was found in the hash table
 182  *      for the device & block_num.
 183  *      Else returns 0.
 184  *
 185  * USAGE:
 186  *      This routine is called to check if a block already exists for
 187  *      the device, block_num combination. If the block does not exist,
 188  *      then a new block needs to be allocated and inserted into the hash
 189  *      table for future references.
 190  */
 191 
 192 _sd_hash_hd_t *
 193 _sd_hash_search(int cd, nsc_off_t block_num, _sd_hash_table_t *table)
 194 {
 195         int i;
 196         _sd_hash_bucket_t *bucket;
 197         _sd_hash_hd_t *hptr;
 198 #if defined(_SD_HASH_OPTIMIZE)
 199 #define MAX_HSEARCH_RETRIES     30
 200         int tries = 0;
 201         _sd_hash_hd_t *hnext;
 202         unsigned int seq;
 203 
 204         i = HASH(cd, block_num, table);
 205         bucket = (table->ht_buckets + i);
 206 retry_search:
 207         seq = bucket->hb_seq;
 208         for (hptr = bucket->hb_head; hptr; hptr = hnext) {
 209                 /*
 210                  * Save pointer for next before checking the seq counter.
 211                  */
 212                 hnext = hptr->hh_next;
 213                 /*
 214                  * enforce ordering of load of hptr->hh_next
 215                  * above and bucket->hb_seq below
 216                  */
 217                 sd_serialize();
 218                 if (bucket->hb_seq != seq) {
 219                         /*
 220                          * To avoid looping forever, break out if a certain
 221                          * limit is reached. Its okay to return miss
 222                          * since the insert will do a proper search.
 223                          */
 224                         if (++tries < MAX_HSEARCH_RETRIES) goto retry_search;
 225                         else {
 226                                 _sd_forced_hash_miss++;
 227                                 DTRACE_PROBE1(_sd_hash_search_end,
 228                                                 int, _sd_forced_hash_miss);
 229                                 return (NULL);
 230                         }
 231                 }
 232                 if ((hptr->hh_cd == cd) && (hptr->hh_blk_num == block_num))
 233                         break;
 234                 if (hptr->hh_blk_num > block_num) {
 235                         DTRACE_PROBE1(_sd_hash_search_end,
 236                                         _sd_hash_hd_t *, hptr);
 237                         return (NULL);
 238                 }
 239         }
 240 
 241         DTRACE_PROBE1(_sd_hash_search_end,
 242                         _sd_hash_hd_t *, hptr);
 243         return (hptr);
 244 #else
 245 
 246         i = HASH(cd, block_num, table);
 247         bucket = (table->ht_buckets + i);
 248 
 249         mutex_enter(bucket->hb_lock);
 250 
 251         for (hptr = bucket->hb_head; hptr; hptr = hptr->hh_next) {
 252                 if ((hptr->hh_cd == cd) && (hptr->hh_blk_num == block_num))
 253                         break;
 254                 /*
 255                  * the list is ordered. If we go beyond our block, no
 256                  * point searching
 257                  */
 258                 if (hptr->hh_blk_num > block_num) {
 259                         hptr = NULL;
 260                         break;
 261                 }
 262         }
 263         mutex_exit(bucket->hb_lock);
 264 
 265         return (hptr);
 266 #endif
 267 }
 268 
 269 
 270 /*
 271  * _sd_hash_insert - insert an entry into the hash table
 272  *
 273  * ARGUMENTS:
 274  *      cd         - device that we are interested in.
 275  *      block_num  - block number we are interested in.
 276  *      hptr       - pointer to block that we are inserting.
 277  *      table      - hash table to search in.
 278  *
 279  * RETURNS:
 280  *      Pointer to block that was passed in, except if the cd, block_num
 281  *      already exists in the hash.  Caller must check for return
 282  *      not equal hptr.
 283  *
 284  * USAGE:
 285  *      this routine inserts the hptr into the appropriate hash bucket and
 286  *      sets the cd, block_num in the block for future references.
 287  */
 288 
 289 _sd_hash_hd_t *
 290 _sd_hash_insert(int cd,
 291                 nsc_off_t block_num,
 292                 _sd_hash_hd_t *hptr,
 293                 _sd_hash_table_t *table)
 294 {
 295         int i;
 296         _sd_hash_hd_t *p;
 297         _sd_hash_bucket_t *bucket;
 298 
 299         i = HASH(cd, block_num, table);
 300         bucket = (table->ht_buckets + i);
 301 
 302 #if defined(_SD_DEBUG)
 303         if (hptr->hh_hashed) {
 304                 cmn_err(CE_WARN, "_sd_err: hptr %p bucket %p already hashed",
 305                         hptr, bucket);
 306         }
 307 #endif
 308         hptr->hh_cd = (ushort_t)cd;
 309         hptr->hh_blk_num = block_num;
 310 
 311         mutex_enter(bucket->hb_lock);
 312 
 313         for (p = bucket->hb_head; (p && (p->hh_blk_num <= block_num));
 314                                                         p = p->hh_next) {
 315                 if ((p->hh_cd == cd) && (p->hh_blk_num == block_num)) {
 316                         mutex_exit(bucket->hb_lock);
 317                         _sd_hash_collision++;
 318                         DTRACE_PROBE2(_sd_hash_insert_end,
 319                                         _sd_hash_hd_t *, p,
 320                                         int, _sd_hash_collision);
 321 
 322                         return (p);
 323                 }
 324         }
 325         hptr->hh_hashed = 1;
 326         /*
 327          * At this point, (p) points to the next higher block number or is
 328          * NULL. If it is NULL, we are queueing to the tail of list.
 329          * Else, insert just before p
 330          */
 331         if (p) {
 332                 hptr->hh_next = p;
 333                 if ((hptr->hh_prev = p->hh_prev) != NULL)
 334                         p->hh_prev->hh_next = hptr;
 335                 else
 336                         bucket->hb_head = hptr;
 337                 p->hh_prev = hptr;
 338         } else {
 339                 hptr->hh_next = NULL;
 340                 hptr->hh_prev = bucket->hb_tail;
 341                 if (bucket->hb_head)
 342                         bucket->hb_tail->hh_next = hptr;
 343                 else
 344                         bucket->hb_head = hptr;
 345                 bucket->hb_tail = hptr;
 346         }
 347 #if defined(_SD_HASH_OPTIMIZE)
 348         bucket->hb_seq++;
 349 #endif
 350 #if defined(_SD_DEBUG)
 351         if (_sd_hash_max_inlist < (int)++(bucket->hb_inlist))
 352                 _sd_hash_max_inlist = bucket->hb_inlist;
 353 #endif
 354         mutex_exit(bucket->hb_lock);
 355 
 356         return (hptr);
 357 }
 358 
 359 
 360 
 361 /*
 362  * _sd_hash_delete - delete an entry from the hash table
 363  *
 364  * ARGUMENTS:
 365  *      hptr       - pointer to delete from hash table.
 366  *      hash_table - hash table that was created earlier on.
 367  *
 368  * RETURNS:
 369  *      0 on success.  -1 on errors.
 370  *
 371  * USAGE:
 372  *      this routine deletes a hash entry from the hash table.
 373  */
 374 
 375 int
 376 _sd_hash_delete(_sd_hash_hd_t *hptr, _sd_hash_table_t *table)
 377 {
 378         int i;
 379         _sd_hash_bucket_t *bucket;
 380 
 381         if (hptr->hh_hashed == 0) {
 382                 DTRACE_PROBE(_sd_hash_delete_end1);
 383                 return (-1);
 384         }
 385 
 386         i = HASH(hptr->hh_cd, hptr->hh_blk_num, table);
 387         bucket = (table->ht_buckets + i);
 388 
 389         /* was FAST */
 390         mutex_enter(bucket->hb_lock);
 391         if (hptr->hh_hashed == 0) {
 392                 /* was FAST */
 393                 mutex_exit(bucket->hb_lock);
 394                 DTRACE_PROBE(_sd_hash_delete_end2);
 395                 return (-1);
 396         }
 397         hptr->hh_hashed = 0;
 398 #if defined(_SD_HASH_OPTIMIZE)
 399         /*
 400          * Increment sequence counter on bucket. This will signal a lookup
 401          * to redo the lookup since we might have broken the link used
 402          * during the lookup.
 403          */
 404         bucket->hb_seq++;
 405 #endif
 406 
 407         if (hptr->hh_prev)
 408                 hptr->hh_prev->hh_next = hptr->hh_next;
 409         else
 410                 bucket->hb_head = hptr->hh_next;
 411         if (hptr->hh_next)
 412                 hptr->hh_next->hh_prev = hptr->hh_prev;
 413         else
 414                 bucket->hb_tail = hptr->hh_prev;
 415 #if defined(_SD_DEBUG)
 416         bucket->hb_inlist--;
 417 #endif
 418         /* was FAST */
 419         mutex_exit(bucket->hb_lock);
 420 
 421         return (0);
 422 }
 423 
 424 /*
 425  * _sd_hash_replace - replace 'old' with 'new' entry.
 426  *
 427  * ARGUMENTS:
 428  *      old   - pointer to block being deleted (to be anonymous)
 429  *      new   - pointer to block inserting in place.
 430  *      table - hash table to search in.
 431  *
 432  * RETURNS:
 433  *      pointer to inserted block.
 434  *
 435  * USAGE:
 436  *      expects old & new to refer to same block.
 437  *      new must not be already hashed.
 438  */
 439 
 440 _sd_hash_hd_t *
 441 _sd_hash_replace(_sd_hash_hd_t *old, _sd_hash_hd_t *new,
 442                         _sd_hash_table_t *table)
 443 {
 444         int i;
 445         _sd_hash_bucket_t *bucket;
 446 
 447         if ((old->hh_cd != new->hh_cd) || (old->hh_blk_num != new->hh_blk_num))
 448                 cmn_err(CE_PANIC, "_sd_hash_replace: mismatch %p %p",
 449                     (void *)old, (void *)new);
 450         if (new->hh_hashed)
 451                 cmn_err(CE_PANIC, "_sd_hash_replace: new %p already hashed",
 452                     (void *)new);
 453         if (old->hh_hashed == 0) {
 454                 _sd_hash_hd_t *hptr;
 455                 hptr = _sd_hash_insert(new->hh_cd, new->hh_blk_num, new, table);
 456 
 457                 DTRACE_PROBE1(_sd_hash_replace_end,
 458                                 _sd_hash_hd_t *, hptr);
 459 
 460                 return (hptr);
 461         }
 462 
 463         i = HASH(old->hh_cd, old->hh_blk_num, table);
 464         bucket = (table->ht_buckets + i);
 465 
 466         /* was FAST */
 467         mutex_enter(bucket->hb_lock);
 468         if (old->hh_hashed == 0) {
 469                 _sd_hash_hd_t *hptr;
 470                 /* was FAST */
 471                 mutex_exit(bucket->hb_lock);
 472 
 473                 hptr = _sd_hash_insert(new->hh_cd, new->hh_blk_num, new, table);
 474 
 475                 DTRACE_PROBE1(_sd_hash_replace_end,
 476                                 _sd_hash_hd_t *, hptr);
 477                 return (hptr);
 478         }
 479         old->hh_hashed = 0;
 480         new->hh_hashed = 1;
 481         new->hh_prev = old->hh_prev;
 482         new->hh_next = old->hh_next;
 483 
 484         if (new->hh_prev)
 485                 new->hh_prev->hh_next = new;
 486         else
 487                 bucket->hb_head = new;
 488         if (new->hh_next)
 489                 new->hh_next->hh_prev = new;
 490         else
 491                 bucket->hb_tail = new;
 492 #if defined(_SD_HASH_OPTIMIZE)
 493         bucket->hb_seq++;
 494 #endif
 495         /* was FAST */
 496         mutex_exit(bucket->hb_lock);
 497 
 498         return (new);
 499 }