il_caloverflow Wdiff usr/src/lib/libmtmalloc/common/mtmalloc.c

Print this page

XXXX all calloc() implementations should check overflow

Split	Close
Expand all
Collapse all

          --- old/usr/src/lib/libmtmalloc/common/mtmalloc.c
          +++ new/usr/src/lib/libmtmalloc/common/mtmalloc.c

   1    1  /*
   2    2   * CDDL HEADER START
   3    3   *
   4    4   * The contents of this file are subject to the terms of the
   5    5   * Common Development and Distribution License (the "License").
   6    6   * You may not use this file except in compliance with the License.
   7    7   *
   8    8   * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9    9   * or http://www.opensolaris.org/os/licensing.
  10   10   * See the License for the specific language governing permissions
  11   11   * and limitations under the License.
  12   12   *
  13   13   * When distributing Covered Code, include this CDDL HEADER in each
  14   14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  
  22   22  /*
  23   23   * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  24   24   * Use is subject to license terms.
  25   25   */
  26   26  
  27   27  #include <mtmalloc.h>
  28   28  #include "mtmalloc_impl.h"
  29   29  #include <unistd.h>
  30   30  #include <synch.h>
  31   31  #include <thread.h>
  32   32  #include <pthread.h>
  33   33  #include <stdio.h>
  34   34  #include <limits.h>
  35   35  #include <errno.h>
  36   36  #include <string.h>
  37   37  #include <strings.h>
  38   38  #include <sys/param.h>
  39   39  #include <sys/sysmacros.h>
  40   40  
  41   41  /*
  42   42   * To turn on the asserts just compile -DDEBUG
  43   43   */
  44   44  
  45   45  #ifndef DEBUG
  46   46  #define NDEBUG
  47   47  #endif
  48   48  
  49   49  #include <assert.h>
  50   50  
  51   51  /*
  52   52   * The MT hot malloc implementation contained herein is designed to be
  53   53   * plug-compatible with the libc version of malloc. It is not intended
  54   54   * to replace that implementation until we decide that it is ok to break
  55   55   * customer apps (Solaris 3.0).
  56   56   *
  57   57   * For requests up to 2^^16, the allocator initializes itself into NCPUS
  58   58   * worth of chains of caches. When a memory request is made, the calling thread
  59   59   * is vectored into one of NCPUS worth of caches.  The LWP id gives us a cheap,
  60   60   * contention-reducing index to use, eventually, this should be replaced with
  61   61   * the actual CPU sequence number, when an interface to get it is available.
  62   62   *
  63   63   * Once the thread is vectored into one of the list of caches the real
  64   64   * allocation of the memory begins. The size is determined to figure out which
  65   65   * bucket the allocation should be satisfied from. The management of free
  66   66   * buckets is done via a bitmask. A free bucket is represented by a 1. The
  67   67   * first free bit represents the first free bucket. The position of the bit,
  68   68   * represents the position of the bucket in the arena.
  69   69   *
  70   70   * When the memory from the arena is handed out, the address of the cache
  71   71   * control structure is written in the word preceeding the returned memory.
  72   72   * This cache control address is used during free() to mark the buffer free
  73   73   * in the cache control structure.
  74   74   *
  75   75   * When all available memory in a cache has been depleted, a new chunk of memory
  76   76   * is allocated via sbrk(). The new cache is allocated from this chunk of memory
  77   77   * and initialized in the function create_cache(). New caches are installed at
  78   78   * the front of a singly linked list of the same size memory pools. This helps
  79   79   * to ensure that there will tend to be available memory in the beginning of the
  80   80   * list.
  81   81   *
  82   82   * Long linked lists hurt performance. To decrease this effect, there is a
  83   83   * tunable, requestsize, that bumps up the sbrk allocation size and thus
  84   84   * increases the number of available blocks within an arena.  We also keep
  85   85   * a "hint" for each cache list, which is the last cache in the list allocated
  86   86   * from.  This lowers the cost of searching if there are a lot of fully
  87   87   * allocated blocks at the front of the list.
  88   88   *
  89   89   * For requests greater than 2^^16 (oversize allocations), there are two pieces
  90   90   * of overhead. There is the OVERHEAD used to hold the cache addr
  91   91   * (&oversize_list), plus an oversize_t structure to further describe the block.
  92   92   *
  93   93   * The oversize list is kept as defragmented as possible by coalescing
  94   94   * freed oversized allocations with adjacent neighbors.
  95   95   *
  96   96   * Addresses handed out are stored in a hash table, and are aligned on
  97   97   * MTMALLOC_MIN_ALIGN-byte boundaries at both ends. Request sizes are rounded-up
  98   98   * where necessary in order to achieve this. This eases the implementation of
  99   99   * MTDEBUGPATTERN and MTINITPATTERN, particularly where coalescing occurs.
 100  100   *
 101  101   * A memalign allocation takes memalign header overhead.  There's two
 102  102   * types of memalign headers distinguished by MTMALLOC_MEMALIGN_MAGIC
 103  103   * and MTMALLOC_MEMALIGN_MIN_MAGIC.  When the size of memory taken to
 104  104   * get to the aligned address from malloc'ed address is the minimum size
 105  105   * OVERHEAD, we create a header taking only one OVERHEAD space with magic
 106  106   * number MTMALLOC_MEMALIGN_MIN_MAGIC, and we know by subtracting OVERHEAD
 107  107   * from memaligned address, we can get to the malloc'ed address. Otherwise,
 108  108   * we create a memalign header taking two OVERHEAD space, one stores
 109  109   * MTMALLOC_MEMALIGN_MAGIC magic number, the other one points back to the
 110  110   * malloc'ed address.
 111  111   */
 112  112  
 113  113  #if defined(__i386) || defined(__amd64)
 114  114  #include <arpa/inet.h>  /* for htonl() */
 115  115  #endif
 116  116  
 117  117  static void * morecore(size_t);
 118  118  static void create_cache(cache_t *, size_t bufsize, uint_t hunks);
 119  119  static void * malloc_internal(size_t, percpu_t *);
 120  120  static void * oversize(size_t);
 121  121  static oversize_t *find_oversize(size_t);
 122  122  static void add_oversize(oversize_t *);
 123  123  static void copy_pattern(uint32_t, void *, size_t);
 124  124  static void * verify_pattern(uint32_t, void *, size_t);
 125  125  static void reinit_cpu_list(void);
 126  126  static void reinit_cache(cache_t *);
 127  127  static void free_oversize(oversize_t *);
 128  128  static oversize_t *oversize_header_alloc(uintptr_t, size_t);
 129  129  
 130  130  /*
 131  131   * oversize hash table stuff
 132  132   */
 133  133  #define NUM_BUCKETS     67      /* must be prime */
 134  134  #define HASH_OVERSIZE(caddr)    ((uintptr_t)(caddr) % NUM_BUCKETS)
 135  135  oversize_t *ovsz_hashtab[NUM_BUCKETS];
 136  136  
 137  137  #define ALIGN(x, a)     ((((uintptr_t)(x) + ((uintptr_t)(a) - 1)) \
 138  138                          & ~((uintptr_t)(a) - 1)))
 139  139  
 140  140  /* need this to deal with little endianess of x86 */
 141  141  #if defined(__i386) || defined(__amd64)
 142  142  #define FLIP_EM(x)      htonl((x))
 143  143  #else
 144  144  #define FLIP_EM(x)      (x)
 145  145  #endif
 146  146  
 147  147  #define INSERT_ONLY                     0
 148  148  #define COALESCE_LEFT                   0x00000001
 149  149  #define COALESCE_RIGHT                  0x00000002
 150  150  #define COALESCE_WITH_BOTH_SIDES        (COALESCE_LEFT | COALESCE_RIGHT)
 151  151  
 152  152  #define OVERHEAD        8       /* size needed to write cache addr */
 153  153  #define HUNKSIZE        8192    /* just a multiplier */
 154  154  
 155  155  #define MAX_CACHED_SHIFT        16      /* 64K is the max cached size */
 156  156  #define MAX_CACHED              (1 << MAX_CACHED_SHIFT)
 157  157  #define MIN_CACHED_SHIFT        4       /* smaller requests rounded up */
 158  158  #define MTMALLOC_MIN_ALIGN      8       /* min guaranteed alignment */
 159  159  
 160  160  /* maximum size before overflow */
 161  161  #define MAX_MTMALLOC    (SIZE_MAX - (SIZE_MAX % MTMALLOC_MIN_ALIGN) \
 162  162                          - OVSZ_HEADER_SIZE)
 163  163  
 164  164  #define NUM_CACHES      (MAX_CACHED_SHIFT - MIN_CACHED_SHIFT + 1)
 165  165  #define CACHELIST_SIZE  ALIGN(NUM_CACHES * sizeof (cache_head_t), \
 166  166      CACHE_COHERENCY_UNIT)
 167  167  
 168  168  #define MINSIZE         9       /* for requestsize, tunable */
 169  169  #define MAXSIZE         256     /* arbitrary, big enough, for requestsize */
 170  170  
 171  171  #define FREEPATTERN     0xdeadbeef /* debug fill pattern for free buf */
 172  172  #define INITPATTERN     0xbaddcafe /* debug fill pattern for new buf */
 173  173  
 174  174  #define misaligned(p)   ((unsigned)(p) & (sizeof (int) - 1))
 175  175  #define IS_OVERSIZE(x, y)       (((x) < (y)) && (((x) > MAX_CACHED)? 1 : 0))
 176  176  
 177  177  static long requestsize = MINSIZE; /* 9 pages per cache; tunable; 9 is min */
 178  178  
 179  179  static uint_t cpu_mask;
 180  180  static curcpu_func curcpu;
 181  181  
 182  182  static int32_t debugopt;
 183  183  static int32_t reinit;
 184  184  
 185  185  static percpu_t *cpu_list;
 186  186  static oversize_t oversize_list;
 187  187  static mutex_t oversize_lock = DEFAULTMUTEX;
 188  188  
 189  189  static int ncpus = 0;
 190  190  
 191  191  #define MTMALLOC_OVERSIZE_MAGIC         ((uintptr_t)&oversize_list)
 192  192  #define MTMALLOC_MEMALIGN_MAGIC         ((uintptr_t)&oversize_list + 1)
 193  193  #define MTMALLOC_MEMALIGN_MIN_MAGIC     ((uintptr_t)&oversize_list + 2)
 194  194  
 195  195  /*
 196  196   * We require allocations handed out to be aligned on MTMALLOC_MIN_ALIGN-byte
 197  197   * boundaries. We round up sizeof (oversize_t) (when necessary) to ensure that
 198  198   * this is achieved.
 199  199   */
 200  200  #define OVSZ_SIZE               (ALIGN(sizeof (oversize_t), MTMALLOC_MIN_ALIGN))
 201  201  #define OVSZ_HEADER_SIZE        (OVSZ_SIZE + OVERHEAD)
 202  202  
 203  203  /*
 204  204   * memalign header takes 2 OVERHEAD space.  One for memalign magic, and the
 205  205   * other one points back to the start address of originally allocated space.
 206  206   */
 207  207  #define MEMALIGN_HEADER_SIZE    2 * OVERHEAD
 208  208  #define MEMALIGN_HEADER_ALLOC(x, shift, malloc_addr)\
 209  209          if (shift == OVERHEAD)\
 210  210                  *((uintptr_t *)((caddr_t)x - OVERHEAD)) = \
 211  211                          MTMALLOC_MEMALIGN_MIN_MAGIC; \
 212  212          else {\
 213  213                  *((uintptr_t *)((caddr_t)x - OVERHEAD)) = \
 214  214                          MTMALLOC_MEMALIGN_MAGIC; \
 215  215                  *((uintptr_t *)((caddr_t)x - 2 * OVERHEAD)) = \
 216  216                          (uintptr_t)malloc_addr; \
 217  217          }
 218  218  
 219  219  /*
 220  220   * Add big to the oversize hash table at the head of the relevant bucket.
 221  221   */
 222  222  static void
 223  223  insert_hash(oversize_t *big)
 224  224  {
 225  225          caddr_t ret = big->addr;
 226  226          int bucket = HASH_OVERSIZE(ret);
 227  227  
 228  228          assert(MUTEX_HELD(&oversize_lock));
 229  229          big->hash_next = ovsz_hashtab[bucket];
 230  230          ovsz_hashtab[bucket] = big;
 231  231  }
 232  232  
 233  233  void *
 234  234  malloc(size_t bytes)
 235  235  {
 236  236          percpu_t *list_rotor;
 237  237          uint_t  list_index;
 238  238  
 239  239          if (bytes > MAX_CACHED)
 240  240                  return (oversize(bytes));
 241  241  
 242  242          list_index = (curcpu() & cpu_mask);
 243  243  
 244  244          list_rotor = &cpu_list[list_index];
 245  245  
 246  246          return (malloc_internal(bytes, list_rotor));
 247  247  }
 248  248  
 249  249  void *
 250  250  realloc(void * ptr, size_t bytes)
 251  251  {
 252  252          void *new, *data_ptr;
 253  253          cache_t *cacheptr;
 254  254          caddr_t mem;
 255  255          size_t shift = 0;
 256  256  
 257  257          if (ptr == NULL)
 258  258                  return (malloc(bytes));
 259  259  
 260  260          if (bytes == 0) {
 261  261                  free(ptr);
 262  262                  return (NULL);
 263  263          }
 264  264  
 265  265          data_ptr = ptr;
 266  266          mem = (caddr_t)ptr - OVERHEAD;
 267  267  
 268  268          /*
 269  269           * Optimization possibility :
 270  270           *      p = malloc(64);
 271  271           *      q = realloc(p, 64);
 272  272           * q can be same as p.
 273  273           * Apply this optimization for the normal
 274  274           * sized caches for now.
 275  275           */
 276  276          if (*(uintptr_t *)mem < MTMALLOC_OVERSIZE_MAGIC ||
 277  277              *(uintptr_t *)mem > MTMALLOC_MEMALIGN_MIN_MAGIC) {
 278  278                  cacheptr = (cache_t *)*(uintptr_t *)mem;
 279  279                  if (bytes <= (cacheptr->mt_size - OVERHEAD))
 280  280                          return (ptr);
 281  281          }
 282  282  
 283  283          new = malloc(bytes);
 284  284  
 285  285          if (new == NULL)
 286  286                  return (NULL);
 287  287  
 288  288          /*
 289  289           * If new == ptr, ptr has previously been freed. Passing a freed pointer
 290  290           * to realloc() is not allowed - unless the caller specifically states
 291  291           * otherwise, in which case we must avoid freeing ptr (ie new) before we
 292  292           * return new. There is (obviously) no requirement to memcpy() ptr to
 293  293           * new before we return.
 294  294           */
 295  295          if (new == ptr) {
 296  296                  if (!(debugopt & MTDOUBLEFREE))
 297  297                          abort();
 298  298                  return (new);
 299  299          }
 300  300  
 301  301          if (*(uintptr_t *)mem == MTMALLOC_MEMALIGN_MAGIC) {
 302  302                  mem -= OVERHEAD;
 303  303                  ptr = (void *)*(uintptr_t *)mem;
 304  304                  mem = (caddr_t)ptr - OVERHEAD;
 305  305                  shift = (size_t)((uintptr_t)data_ptr - (uintptr_t)ptr);
 306  306          } else if (*(uintptr_t *)mem == MTMALLOC_MEMALIGN_MIN_MAGIC) {
 307  307                  ptr = (void *) mem;
 308  308                  mem -= OVERHEAD;
 309  309                  shift = OVERHEAD;
 310  310          }
 311  311  
 312  312          if (*(uintptr_t *)mem == MTMALLOC_OVERSIZE_MAGIC) {
 313  313                  oversize_t *old;
 314  314  
 315  315                  old = (oversize_t *)(mem - OVSZ_SIZE);
 316  316                  (void) memcpy(new, data_ptr, MIN(bytes, old->size - shift));
 317  317                  free(ptr);
 318  318                  return (new);
 319  319          }
 320  320  
 321  321          cacheptr = (cache_t *)*(uintptr_t *)mem;
 322  322  
 323  323          (void) memcpy(new, data_ptr,

↓ open down ↓

323 lines elided

↑ open up ↑

 324  324              MIN(cacheptr->mt_size - OVERHEAD - shift, bytes));
 325  325          free(ptr);
 326  326  
 327  327          return (new);
 328  328  }
 329  329  
 330  330  void *
 331  331  calloc(size_t nelem, size_t bytes)
 332  332  {
 333  333          void * ptr;
 334      -        size_t size = nelem * bytes;
      334 +        size_t size;
      335 +
      336 +        if (nelem == 0 || bytes == 0) {
      337 +                size = 0;
      338 +        } else {
      339 +                size = nelem * bytes;
      340 +
      341 +                /* check for overflow */
      342 +                if (size / nelem != bytes) {
      343 +                        errno = ENOMEM;
      344 +                        return (NULL);
      345 +                }
      346 +        }
 335  347  
 336  348          ptr = malloc(size);
 337  349          if (ptr == NULL)
 338  350                  return (NULL);
 339  351          (void) memset(ptr, 0, size);
 340  352  
 341  353          return (ptr);
 342  354  }
 343  355  
 344  356  void

 345  357  free(void * ptr)
 346  358  {
 347  359          cache_t *cacheptr;
 348  360          caddr_t mem;
 349  361          int32_t i;
 350  362          caddr_t freeblocks;
 351  363          uintptr_t offset;
 352  364          uchar_t mask;
 353  365          int32_t which_bit, num_bytes;
 354  366  
 355  367          if (ptr == NULL)
 356  368                  return;
 357  369  
 358  370          mem = (caddr_t)ptr - OVERHEAD;
 359  371  
 360  372          if (*(uintptr_t *)mem == MTMALLOC_MEMALIGN_MAGIC) {
 361  373                  mem -= OVERHEAD;
 362  374                  ptr = (void *)*(uintptr_t *)mem;
 363  375                  mem = (caddr_t)ptr - OVERHEAD;
 364  376          } else if (*(uintptr_t *)mem == MTMALLOC_MEMALIGN_MIN_MAGIC) {
 365  377                  ptr = (void *) mem;
 366  378                  mem -= OVERHEAD;
 367  379          }
 368  380  
 369  381          if (*(uintptr_t *)mem == MTMALLOC_OVERSIZE_MAGIC) {
 370  382                  oversize_t *big, **opp;
 371  383                  int bucket;
 372  384  
 373  385                  big = (oversize_t *)(mem - OVSZ_SIZE);
 374  386                  (void) mutex_lock(&oversize_lock);
 375  387  
 376  388                  bucket = HASH_OVERSIZE(big->addr);
 377  389                  for (opp = &ovsz_hashtab[bucket]; *opp != NULL;
 378  390                      opp = &(*opp)->hash_next)
 379  391                          if (*opp == big)
 380  392                                  break;
 381  393  
 382  394                  if (*opp == NULL) {
 383  395                          if (!(debugopt & MTDOUBLEFREE))
 384  396                                  abort();
 385  397                          (void) mutex_unlock(&oversize_lock);
 386  398                          return;
 387  399                  }
 388  400  
 389  401                  *opp = big->hash_next;  /* remove big from the hash table */
 390  402                  big->hash_next = NULL;
 391  403  
 392  404                  if (debugopt & MTDEBUGPATTERN)
 393  405                          copy_pattern(FREEPATTERN, ptr, big->size);
 394  406                  add_oversize(big);
 395  407                  (void) mutex_unlock(&oversize_lock);
 396  408                  return;
 397  409          }
 398  410  
 399  411          cacheptr = (cache_t *)*(uintptr_t *)mem;
 400  412          freeblocks = cacheptr->mt_freelist;
 401  413  
 402  414          /*
 403  415           * This is the distance measured in bits into the arena.
 404  416           * The value of offset is in bytes but there is a 1-1 correlation
 405  417           * between distance into the arena and distance into the
 406  418           * freelist bitmask.
 407  419           */
 408  420          offset = mem - cacheptr->mt_arena;
 409  421  
 410  422          /*
 411  423           * i is total number of bits to offset into freelist bitmask.
 412  424           */
 413  425  
 414  426          i = offset / cacheptr->mt_size;
 415  427  
 416  428          num_bytes = i >> 3;
 417  429  
 418  430          /*
 419  431           * which_bit is the bit offset into the byte in the freelist.
 420  432           * if our freelist bitmask looks like 0xf3 and we are freeing
 421  433           * block 5 (ie: the 6th block) our mask will be 0xf7 after
 422  434           * the free. Things go left to right that's why the mask is 0x80
 423  435           * and not 0x01.
 424  436           */
 425  437          which_bit = i - (num_bytes << 3);
 426  438  
 427  439          mask = 0x80 >> which_bit;
 428  440  
 429  441          freeblocks += num_bytes;
 430  442  
 431  443          if (debugopt & MTDEBUGPATTERN)
 432  444                  copy_pattern(FREEPATTERN, ptr, cacheptr->mt_size - OVERHEAD);
 433  445  
 434  446          (void) mutex_lock(&cacheptr->mt_cache_lock);
 435  447  
 436  448          if (*freeblocks & mask) {
 437  449                  if (!(debugopt & MTDOUBLEFREE))
 438  450                          abort();
 439  451          } else {
 440  452                  *freeblocks |= mask;
 441  453                  cacheptr->mt_nfree++;
 442  454          }
 443  455  
 444  456          (void) mutex_unlock(&cacheptr->mt_cache_lock);
 445  457  }
 446  458  
 447  459  void *
 448  460  memalign(size_t alignment, size_t size)
 449  461  {
 450  462          size_t alloc_size;
 451  463          uintptr_t offset;
 452  464          void *alloc_buf;
 453  465          void *ret_buf;
 454  466  
 455  467          if (size == 0 || alignment == 0 || misaligned(alignment) ||
 456  468              (alignment & (alignment - 1)) != 0) {
 457  469                  errno = EINVAL;
 458  470                  return (NULL);
 459  471          }
 460  472  
 461  473          /* <= MTMALLOC_MIN_ALIGN, malloc can provide directly */
 462  474          if (alignment <= MTMALLOC_MIN_ALIGN)
 463  475                  return (malloc(size));
 464  476  
 465  477          alloc_size = size + alignment - MTMALLOC_MIN_ALIGN;
 466  478  
 467  479          if (alloc_size < size) { /* overflow */
 468  480                  errno = ENOMEM;
 469  481                  return (NULL);
 470  482          }
 471  483  
 472  484          alloc_buf = malloc(alloc_size);
 473  485  
 474  486          if (alloc_buf == NULL)
 475  487                  /* malloc sets errno */
 476  488                  return (NULL);
 477  489  
 478  490          /*
 479  491           * If alloc_size > MAX_CACHED, malloc() will have returned a multiple of
 480  492           * MTMALLOC_MIN_ALIGN, having rounded-up alloc_size if necessary. Since
 481  493           * we will use alloc_size to return the excess fragments to the free
 482  494           * list, we also round-up alloc_size if necessary.
 483  495           */
 484  496          if ((alloc_size > MAX_CACHED) &&
 485  497              (alloc_size & (MTMALLOC_MIN_ALIGN - 1)))
 486  498                  alloc_size = ALIGN(alloc_size, MTMALLOC_MIN_ALIGN);
 487  499  
 488  500          if ((offset = (uintptr_t)alloc_buf & (alignment - 1)) == 0) {
 489  501                  /* aligned correctly */
 490  502  
 491  503                  size_t frag_size = alloc_size -
 492  504                      (size + MTMALLOC_MIN_ALIGN + OVSZ_HEADER_SIZE);
 493  505  
 494  506                  /*
 495  507                   * If the leftover piece of the memory > MAX_CACHED,
 496  508                   * split off the piece and return it back to the freelist.
 497  509                   */
 498  510                  if (IS_OVERSIZE(frag_size, alloc_size)) {
 499  511                          oversize_t *orig, *tail;
 500  512                          uintptr_t taddr;
 501  513                          size_t data_size;
 502  514                          taddr = ALIGN((uintptr_t)alloc_buf + size,
 503  515                              MTMALLOC_MIN_ALIGN);
 504  516                          data_size = taddr - (uintptr_t)alloc_buf;
 505  517                          orig = (oversize_t *)((uintptr_t)alloc_buf -
 506  518                              OVSZ_HEADER_SIZE);
 507  519                          frag_size = orig->size - data_size -
 508  520                              OVSZ_HEADER_SIZE;
 509  521                          orig->size = data_size;
 510  522                          tail = oversize_header_alloc(taddr, frag_size);
 511  523                          free_oversize(tail);
 512  524                  }
 513  525                  ret_buf = alloc_buf;
 514  526          } else {
 515  527                  uchar_t oversize_bits = 0;
 516  528                  size_t  head_sz, data_sz, tail_sz;
 517  529                  uintptr_t ret_addr, taddr, shift, tshift;
 518  530                  oversize_t *orig, *tail, *big;
 519  531                  size_t tsize;
 520  532  
 521  533                  /* needs to be aligned */
 522  534                  shift = alignment - offset;
 523  535  
 524  536                  assert(shift >= MTMALLOC_MIN_ALIGN);
 525  537  
 526  538                  ret_addr = ((uintptr_t)alloc_buf + shift);
 527  539                  ret_buf = (void *)ret_addr;
 528  540  
 529  541                  if (alloc_size <= MAX_CACHED) {
 530  542                          MEMALIGN_HEADER_ALLOC(ret_addr, shift, alloc_buf);
 531  543                          return (ret_buf);
 532  544                  }
 533  545  
 534  546                  /*
 535  547                   * Only check for the fragments when the memory is allocted
 536  548                   * from oversize_list.  Split off a fragment and return it
 537  549                   * to the oversize freelist when it's > MAX_CACHED.
 538  550                   */
 539  551  
 540  552                  head_sz = shift - MAX(MEMALIGN_HEADER_SIZE, OVSZ_HEADER_SIZE);
 541  553  
 542  554                  tail_sz = alloc_size -
 543  555                      (shift + size + MTMALLOC_MIN_ALIGN + OVSZ_HEADER_SIZE);
 544  556  
 545  557                  oversize_bits |= IS_OVERSIZE(head_sz, alloc_size) |
 546  558                      IS_OVERSIZE(size, alloc_size) << DATA_SHIFT |
 547  559                      IS_OVERSIZE(tail_sz, alloc_size) << TAIL_SHIFT;
 548  560  
 549  561                  switch (oversize_bits) {
 550  562                          case NONE_OVERSIZE:
 551  563                          case DATA_OVERSIZE:
 552  564                                  MEMALIGN_HEADER_ALLOC(ret_addr, shift,
 553  565                                      alloc_buf);
 554  566                                  break;
 555  567                          case HEAD_OVERSIZE:
 556  568                                  /*
 557  569                                   * If we can extend data > MAX_CACHED and have
 558  570                                   * head still > MAX_CACHED, we split head-end
 559  571                                   * as the case of head-end and data oversized,
 560  572                                   * otherwise just create memalign header.
 561  573                                   */
 562  574                                  tsize = (shift + size) - (MAX_CACHED + 8 +
 563  575                                      MTMALLOC_MIN_ALIGN + OVSZ_HEADER_SIZE);
 564  576  
 565  577                                  if (!IS_OVERSIZE(tsize, alloc_size)) {
 566  578                                          MEMALIGN_HEADER_ALLOC(ret_addr, shift,
 567  579                                              alloc_buf);
 568  580                                          break;
 569  581                                  } else {
 570  582                                          tsize += OVSZ_HEADER_SIZE;
 571  583                                          taddr = ALIGN((uintptr_t)alloc_buf +
 572  584                                              tsize, MTMALLOC_MIN_ALIGN);
 573  585                                          tshift = ret_addr - taddr;
 574  586                                          MEMALIGN_HEADER_ALLOC(ret_addr, tshift,
 575  587                                              taddr);
 576  588                                          ret_addr = taddr;
 577  589                                          shift = ret_addr - (uintptr_t)alloc_buf;
 578  590                                  }
 579  591                                  /* FALLTHROUGH */
 580  592                          case HEAD_AND_DATA_OVERSIZE:
 581  593                                  /*
 582  594                                   * Split off the head fragment and
 583  595                                   * return it back to oversize freelist.
 584  596                                   * Create oversize header for the piece
 585  597                                   * of (data + tail fragment).
 586  598                                   */
 587  599                                  orig = (oversize_t *)((uintptr_t)alloc_buf -
 588  600                                      OVSZ_HEADER_SIZE);
 589  601                                  big = oversize_header_alloc(ret_addr -
 590  602                                      OVSZ_HEADER_SIZE, (orig->size - shift));
 591  603                                  (void) mutex_lock(&oversize_lock);
 592  604                                  insert_hash(big);
 593  605                                  (void) mutex_unlock(&oversize_lock);
 594  606                                  orig->size = shift - OVSZ_HEADER_SIZE;
 595  607  
 596  608                                  /* free up the head fragment */
 597  609                                  free_oversize(orig);
 598  610                                  break;
 599  611                          case TAIL_OVERSIZE:
 600  612                                  /*
 601  613                                   * If we can extend data > MAX_CACHED and have
 602  614                                   * tail-end still > MAX_CACHED, we split tail
 603  615                                   * end, otherwise just create memalign header.
 604  616                                   */
 605  617                                  orig = (oversize_t *)((uintptr_t)alloc_buf -
 606  618                                      OVSZ_HEADER_SIZE);
 607  619                                  tsize =  orig->size - (MAX_CACHED + 8 +
 608  620                                      shift + OVSZ_HEADER_SIZE +
 609  621                                      MTMALLOC_MIN_ALIGN);
 610  622                                  if (!IS_OVERSIZE(tsize, alloc_size)) {
 611  623                                          MEMALIGN_HEADER_ALLOC(ret_addr, shift,
 612  624                                              alloc_buf);
 613  625                                          break;
 614  626                                  } else {
 615  627                                          size = MAX_CACHED + 8;
 616  628                                  }
 617  629                                  /* FALLTHROUGH */
 618  630                          case DATA_AND_TAIL_OVERSIZE:
 619  631                                  /*
 620  632                                   * Split off the tail fragment and
 621  633                                   * return it back to oversize freelist.
 622  634                                   * Create memalign header and adjust
 623  635                                   * the size for the piece of
 624  636                                   * (head fragment + data).
 625  637                                   */
 626  638                                  taddr = ALIGN(ret_addr + size,
 627  639                                      MTMALLOC_MIN_ALIGN);
 628  640                                  data_sz = (size_t)(taddr -
 629  641                                      (uintptr_t)alloc_buf);
 630  642                                  orig = (oversize_t *)((uintptr_t)alloc_buf -
 631  643                                      OVSZ_HEADER_SIZE);
 632  644                                  tsize = orig->size - data_sz;
 633  645                                  orig->size = data_sz;
 634  646                                  MEMALIGN_HEADER_ALLOC(ret_buf, shift,
 635  647                                      alloc_buf);
 636  648                                  tsize -= OVSZ_HEADER_SIZE;
 637  649                                  tail = oversize_header_alloc(taddr,  tsize);
 638  650                                  free_oversize(tail);
 639  651                                  break;
 640  652                          case HEAD_AND_TAIL_OVERSIZE:
 641  653                                  /*
 642  654                                   * Split off the head fragment.
 643  655                                   * We try to free up tail-end when we can
 644  656                                   * extend data size to (MAX_CACHED + 8)
 645  657                                   * and remain tail-end oversized.
 646  658                                   * The bottom line is all split pieces
 647  659                                   * should be oversize in size.
 648  660                                   */
 649  661                                  orig = (oversize_t *)((uintptr_t)alloc_buf -
 650  662                                      OVSZ_HEADER_SIZE);
 651  663                                  tsize =  orig->size - (MAX_CACHED + 8 +
 652  664                                      OVSZ_HEADER_SIZE + shift +
 653  665                                      MTMALLOC_MIN_ALIGN);
 654  666  
 655  667                                  if (!IS_OVERSIZE(tsize, alloc_size)) {
 656  668                                          /*
 657  669                                           * If the chunk is not big enough
 658  670                                           * to make both data and tail oversize
 659  671                                           * we just keep them as one piece.
 660  672                                           */
 661  673                                          big = oversize_header_alloc(ret_addr -
 662  674                                              OVSZ_HEADER_SIZE,
 663  675                                              orig->size - shift);
 664  676                                          (void) mutex_lock(&oversize_lock);
 665  677                                          insert_hash(big);
 666  678                                          (void) mutex_unlock(&oversize_lock);
 667  679                                          orig->size = shift - OVSZ_HEADER_SIZE;
 668  680                                          free_oversize(orig);
 669  681                                          break;
 670  682                                  } else {
 671  683                                          /*
 672  684                                           * extend data size > MAX_CACHED
 673  685                                           * and handle it as head, data, tail
 674  686                                           * are all oversized.
 675  687                                           */
 676  688                                          size = MAX_CACHED + 8;
 677  689                                  }
 678  690                                  /* FALLTHROUGH */
 679  691                          case ALL_OVERSIZE:
 680  692                                  /*
 681  693                                   * split off the head and tail fragments,
 682  694                                   * return them back to the oversize freelist.
 683  695                                   * Alloc oversize header for data seg.
 684  696                                   */
 685  697                                  orig = (oversize_t *)((uintptr_t)alloc_buf -
 686  698                                      OVSZ_HEADER_SIZE);
 687  699                                  tsize = orig->size;
 688  700                                  orig->size = shift - OVSZ_HEADER_SIZE;
 689  701                                  free_oversize(orig);
 690  702  
 691  703                                  taddr = ALIGN(ret_addr + size,
 692  704                                      MTMALLOC_MIN_ALIGN);
 693  705                                  data_sz = taddr - ret_addr;
 694  706                                  assert(tsize > (shift + data_sz +
 695  707                                      OVSZ_HEADER_SIZE));
 696  708                                  tail_sz = tsize -
 697  709                                      (shift + data_sz + OVSZ_HEADER_SIZE);
 698  710  
 699  711                                  /* create oversize header for data seg */
 700  712                                  big = oversize_header_alloc(ret_addr -
 701  713                                      OVSZ_HEADER_SIZE, data_sz);
 702  714                                  (void) mutex_lock(&oversize_lock);
 703  715                                  insert_hash(big);
 704  716                                  (void) mutex_unlock(&oversize_lock);
 705  717  
 706  718                                  /* create oversize header for tail fragment */
 707  719                                  tail = oversize_header_alloc(taddr, tail_sz);
 708  720                                  free_oversize(tail);
 709  721                                  break;
 710  722                          default:
 711  723                                  /* should not reach here */
 712  724                                  assert(0);
 713  725                  }
 714  726          }
 715  727          return (ret_buf);
 716  728  }
 717  729  
 718  730  
 719  731  void *
 720  732  valloc(size_t size)
 721  733  {
 722  734          static unsigned pagesize;
 723  735  
 724  736          if (size == 0)
 725  737                  return (NULL);
 726  738  
 727  739          if (!pagesize)
 728  740                  pagesize = sysconf(_SC_PAGESIZE);
 729  741  
 730  742          return (memalign(pagesize, size));
 731  743  }
 732  744  
 733  745  void
 734  746  mallocctl(int cmd, long value)
 735  747  {
 736  748          switch (cmd) {
 737  749  
 738  750          case MTDEBUGPATTERN:
 739  751                  /*
 740  752                   * Reinitialize free blocks in case malloc() is called prior
 741  753                   * to mallocctl().
 742  754                   */
 743  755                  if (value && !(debugopt & cmd)) {
 744  756                          reinit++;
 745  757                          debugopt |= cmd;
 746  758                          reinit_cpu_list();
 747  759                  }
 748  760                  /*FALLTHRU*/
 749  761          case MTDOUBLEFREE:
 750  762          case MTINITBUFFER:
 751  763                  if (value)
 752  764                          debugopt |= cmd;
 753  765                  else
 754  766                          debugopt &= ~cmd;
 755  767                  break;
 756  768          case MTCHUNKSIZE:
 757  769                  if (value >= MINSIZE && value <= MAXSIZE)
 758  770                          requestsize = value;
 759  771                  break;
 760  772          default:
 761  773                  break;
 762  774          }
 763  775  }
 764  776  
 765  777  /*
 766  778   * Initialization function, called from the init section of the library.
 767  779   * No locking is required here because we are single-threaded during
 768  780   * library initialization.
 769  781   */
 770  782  static void
 771  783  setup_caches(void)
 772  784  {
 773  785          uintptr_t oldbrk;
 774  786          uintptr_t newbrk;
 775  787  
 776  788          size_t cache_space_needed;
 777  789          size_t padding;
 778  790  
 779  791          curcpu_func new_curcpu;
 780  792          uint_t new_cpu_mask;
 781  793          percpu_t *new_cpu_list;
 782  794  
 783  795          uint_t i, j;
 784  796          uintptr_t list_addr;
 785  797  
 786  798          /*
 787  799           * Get a decent "current cpu identifier", to be used to reduce
 788  800           * contention.  Eventually, this should be replaced by an interface
 789  801           * to get the actual CPU sequence number in libthread/liblwp.
 790  802           */
 791  803          new_curcpu = (curcpu_func)thr_self;
 792  804          if ((ncpus = 2 * sysconf(_SC_NPROCESSORS_CONF)) <= 0)
 793  805                  ncpus = 4; /* decent default value */
 794  806  
 795  807          /* round ncpus up to a power of 2 */
 796  808          while (ncpus & (ncpus - 1))
 797  809                  ncpus++;
 798  810  
 799  811          new_cpu_mask = ncpus - 1;       /* create the cpu mask */
 800  812  
 801  813          /*
 802  814           * We now do some magic with the brk.  What we want to get in the
 803  815           * end is a bunch of well-aligned stuff in a big initial allocation.
 804  816           * Along the way, we do sanity checks to make sure no one else has
 805  817           * touched the brk (which shouldn't happen, but it's always good to
 806  818           * check)
 807  819           *
 808  820           * First, make sure sbrk is sane, and store the current brk in oldbrk.
 809  821           */
 810  822          oldbrk = (uintptr_t)sbrk(0);
 811  823          if ((void *)oldbrk == (void *)-1)
 812  824                  abort();        /* sbrk is broken -- we're doomed. */
 813  825  
 814  826          /*
 815  827           * Now, align the brk to a multiple of CACHE_COHERENCY_UNIT, so that
 816  828           * the percpu structures and cache lists will be properly aligned.
 817  829           *
 818  830           *   2.  All hunks will be page-aligned, assuming HUNKSIZE >= PAGESIZE,
 819  831           *      so they can be paged out individually.
 820  832           */
 821  833          newbrk = ALIGN(oldbrk, CACHE_COHERENCY_UNIT);
 822  834          if (newbrk != oldbrk && (uintptr_t)sbrk(newbrk - oldbrk) != oldbrk)
 823  835                  abort();        /* sbrk is broken -- we're doomed. */
 824  836  
 825  837          /*
 826  838           * For each cpu, there is one percpu_t and a list of caches
 827  839           */
 828  840          cache_space_needed = ncpus * (sizeof (percpu_t) + CACHELIST_SIZE);
 829  841  
 830  842          new_cpu_list = (percpu_t *)sbrk(cache_space_needed);
 831  843  
 832  844          if (new_cpu_list == (percpu_t *)-1 ||
 833  845              (uintptr_t)new_cpu_list != newbrk)
 834  846                  abort();        /* sbrk is broken -- we're doomed. */
 835  847  
 836  848          /*
 837  849           * Finally, align the brk to HUNKSIZE so that all hunks are
 838  850           * page-aligned, to avoid edge-effects.
 839  851           */
 840  852  
 841  853          newbrk = (uintptr_t)new_cpu_list + cache_space_needed;
 842  854  
 843  855          padding = ALIGN(newbrk, HUNKSIZE) - newbrk;
 844  856  
 845  857          if (padding > 0 && (uintptr_t)sbrk(padding) != newbrk)
 846  858                  abort();        /* sbrk is broken -- we're doomed. */
 847  859  
 848  860          list_addr = ((uintptr_t)new_cpu_list + (sizeof (percpu_t) * ncpus));
 849  861  
 850  862          /* initialize the percpu list */
 851  863          for (i = 0; i < ncpus; i++) {
 852  864                  new_cpu_list[i].mt_caches = (cache_head_t *)list_addr;
 853  865                  for (j = 0; j < NUM_CACHES; j++) {
 854  866                          new_cpu_list[i].mt_caches[j].mt_cache = NULL;
 855  867                          new_cpu_list[i].mt_caches[j].mt_hint = NULL;
 856  868                  }
 857  869  
 858  870                  (void) mutex_init(&new_cpu_list[i].mt_parent_lock,
 859  871                      USYNC_THREAD, NULL);
 860  872  
 861  873                  /* get the correct cache list alignment */
 862  874                  list_addr += CACHELIST_SIZE;
 863  875          }
 864  876  
 865  877          /*
 866  878           * Initialize oversize listhead
 867  879           */
 868  880          oversize_list.next_bysize = &oversize_list;
 869  881          oversize_list.prev_bysize = &oversize_list;
 870  882          oversize_list.next_byaddr = &oversize_list;
 871  883          oversize_list.prev_byaddr = &oversize_list;
 872  884          oversize_list.addr = NULL;
 873  885          oversize_list.size = 0;         /* sentinal */
 874  886  
 875  887          /*
 876  888           * Now install the global variables.
 877  889           */
 878  890          curcpu = new_curcpu;
 879  891          cpu_mask = new_cpu_mask;
 880  892          cpu_list = new_cpu_list;
 881  893  }
 882  894  
 883  895  static void
 884  896  create_cache(cache_t *cp, size_t size, uint_t chunksize)
 885  897  {
 886  898          long nblocks;
 887  899  
 888  900          (void) mutex_init(&cp->mt_cache_lock, USYNC_THREAD, NULL);
 889  901          cp->mt_size = size;
 890  902          cp->mt_freelist = ((caddr_t)cp + sizeof (cache_t));
 891  903          cp->mt_span = chunksize * HUNKSIZE - sizeof (cache_t);
 892  904          cp->mt_hunks = chunksize;
 893  905          /*
 894  906           * rough calculation. We will need to adjust later.
 895  907           */
 896  908          nblocks = cp->mt_span / cp->mt_size;
 897  909          nblocks >>= 3;
 898  910          if (nblocks == 0) { /* less than 8 free blocks in this pool */
 899  911                  int32_t numblocks = 0;
 900  912                  long i = cp->mt_span;
 901  913                  size_t sub = cp->mt_size;
 902  914                  uchar_t mask = 0;
 903  915  
 904  916                  while (i > sub) {
 905  917                          numblocks++;
 906  918                          i -= sub;
 907  919                  }
 908  920                  nblocks = numblocks;
 909  921                  cp->mt_arena = (caddr_t)ALIGN(cp->mt_freelist + 8, 8);
 910  922                  cp->mt_nfree = numblocks;
 911  923                  while (numblocks--) {
 912  924                          mask |= 0x80 >> numblocks;
 913  925                  }
 914  926                  *(cp->mt_freelist) = mask;
 915  927          } else {
 916  928                  cp->mt_arena = (caddr_t)ALIGN((caddr_t)cp->mt_freelist +
 917  929                      nblocks, 32);
 918  930                  /* recompute nblocks */
 919  931                  nblocks = (uintptr_t)((caddr_t)cp->mt_freelist +
 920  932                      cp->mt_span - cp->mt_arena) / cp->mt_size;
 921  933                  cp->mt_nfree = ((nblocks >> 3) << 3);
 922  934                  /* Set everything to free */
 923  935                  (void) memset(cp->mt_freelist, 0xff, nblocks >> 3);
 924  936          }
 925  937  
 926  938          if (debugopt & MTDEBUGPATTERN)
 927  939                  copy_pattern(FREEPATTERN, cp->mt_arena, cp->mt_size * nblocks);
 928  940  
 929  941          cp->mt_next = NULL;
 930  942  }
 931  943  
 932  944  static void
 933  945  reinit_cpu_list(void)
 934  946  {
 935  947          oversize_t *wp = oversize_list.next_bysize;
 936  948          percpu_t *cpuptr;
 937  949          cache_t *thiscache;
 938  950          cache_head_t *cachehead;
 939  951  
 940  952          /* Reinitialize free oversize blocks. */
 941  953          (void) mutex_lock(&oversize_lock);
 942  954          if (debugopt & MTDEBUGPATTERN)
 943  955                  for (; wp != &oversize_list; wp = wp->next_bysize)
 944  956                          copy_pattern(FREEPATTERN, wp->addr, wp->size);
 945  957          (void) mutex_unlock(&oversize_lock);
 946  958  
 947  959          /* Reinitialize free blocks. */
 948  960          for (cpuptr = &cpu_list[0]; cpuptr < &cpu_list[ncpus]; cpuptr++) {
 949  961                  (void) mutex_lock(&cpuptr->mt_parent_lock);
 950  962                  for (cachehead = &cpuptr->mt_caches[0]; cachehead <
 951  963                      &cpuptr->mt_caches[NUM_CACHES]; cachehead++) {
 952  964                          for (thiscache = cachehead->mt_cache; thiscache != NULL;
 953  965                              thiscache = thiscache->mt_next) {
 954  966                                  (void) mutex_lock(&thiscache->mt_cache_lock);
 955  967                                  if (thiscache->mt_nfree == 0) {
 956  968                                          (void) mutex_unlock(
 957  969                                              &thiscache->mt_cache_lock);
 958  970                                          continue;
 959  971                                  }
 960  972                                  if (thiscache != NULL)
 961  973                                          reinit_cache(thiscache);
 962  974                                  (void) mutex_unlock(&thiscache->mt_cache_lock);
 963  975                          }
 964  976                  }
 965  977                  (void) mutex_unlock(&cpuptr->mt_parent_lock);
 966  978          }
 967  979          reinit = 0;
 968  980  }
 969  981  
 970  982  static void
 971  983  reinit_cache(cache_t *thiscache)
 972  984  {
 973  985          uint32_t *freeblocks; /* not a uintptr_t on purpose */
 974  986          int32_t i, n;
 975  987          caddr_t ret;
 976  988  
 977  989          freeblocks = (uint32_t *)thiscache->mt_freelist;
 978  990          while (freeblocks < (uint32_t *)thiscache->mt_arena) {
 979  991                  if (*freeblocks & 0xffffffff) {
 980  992                          for (i = 0; i < 32; i++) {
 981  993                                  if (FLIP_EM(*freeblocks) & (0x80000000 >> i)) {
 982  994                                          n = (uintptr_t)(((freeblocks -
 983  995                                              (uint32_t *)thiscache->mt_freelist)
 984  996                                              << 5) + i) * thiscache->mt_size;
 985  997                                          ret = thiscache->mt_arena + n;
 986  998                                          ret += OVERHEAD;
 987  999                                          copy_pattern(FREEPATTERN, ret,
 988 1000                                              thiscache->mt_size);
 989 1001                                  }
 990 1002                          }
 991 1003                  }
 992 1004                  freeblocks++;
 993 1005          }
 994 1006  }
 995 1007  
 996 1008  static void *
 997 1009  malloc_internal(size_t size, percpu_t *cpuptr)
 998 1010  {
 999 1011          cache_head_t *cachehead;
1000 1012          cache_t *thiscache, *hintcache;
1001 1013          int32_t i, n, logsz, bucket;
1002 1014          uint32_t index;
1003 1015          uint32_t *freeblocks; /* not a uintptr_t on purpose */
1004 1016          caddr_t ret;
1005 1017  
1006 1018          logsz = MIN_CACHED_SHIFT;
1007 1019  
1008 1020          while (size > (1 << logsz))
1009 1021                  logsz++;
1010 1022  
1011 1023          bucket = logsz - MIN_CACHED_SHIFT;
1012 1024  
1013 1025          (void) mutex_lock(&cpuptr->mt_parent_lock);
1014 1026  
1015 1027          /*
1016 1028           * Find a cache of the appropriate size with free buffers.
1017 1029           *
1018 1030           * We don't need to lock each cache as we check their mt_nfree count,
1019 1031           * since:
1020 1032           *      1.  We are only looking for caches with mt_nfree > 0.  If a
1021 1033           *         free happens during our search, it will increment mt_nfree,
1022 1034           *         which will not effect the test.
1023 1035           *      2.  Allocations can decrement mt_nfree, but they can't happen
1024 1036           *         as long as we hold mt_parent_lock.
1025 1037           */
1026 1038  
1027 1039          cachehead = &cpuptr->mt_caches[bucket];
1028 1040  
1029 1041          /* Search through the list, starting at the mt_hint */
1030 1042          thiscache = cachehead->mt_hint;
1031 1043  
1032 1044          while (thiscache != NULL && thiscache->mt_nfree == 0)
1033 1045                  thiscache = thiscache->mt_next;
1034 1046  
1035 1047          if (thiscache == NULL) {
1036 1048                  /* wrap around -- search up to the hint */
1037 1049                  thiscache = cachehead->mt_cache;
1038 1050                  hintcache = cachehead->mt_hint;
1039 1051  
1040 1052                  while (thiscache != NULL && thiscache != hintcache &&
1041 1053                      thiscache->mt_nfree == 0)
1042 1054                          thiscache = thiscache->mt_next;
1043 1055  
1044 1056                  if (thiscache == hintcache)
1045 1057                          thiscache = NULL;
1046 1058          }
1047 1059  
1048 1060  
1049 1061          if (thiscache == NULL) { /* there are no free caches */
1050 1062                  int32_t thisrequest = requestsize;
1051 1063                  int32_t buffer_size = (1 << logsz) + OVERHEAD;
1052 1064  
1053 1065                  thiscache = (cache_t *)morecore(thisrequest * HUNKSIZE);
1054 1066  
1055 1067                  if (thiscache == (cache_t *)-1) {
1056 1068                          (void) mutex_unlock(&cpuptr->mt_parent_lock);
1057 1069                          errno = EAGAIN;
1058 1070                          return (NULL);
1059 1071                  }
1060 1072                  create_cache(thiscache, buffer_size, thisrequest);
1061 1073  
1062 1074                  /* link in the new block at the beginning of the list */
1063 1075                  thiscache->mt_next = cachehead->mt_cache;
1064 1076                  cachehead->mt_cache = thiscache;
1065 1077          }
1066 1078  
1067 1079          /* update the hint to the cache we found or created */
1068 1080          cachehead->mt_hint = thiscache;
1069 1081  
1070 1082          /* thiscache now points to a cache with available space */
1071 1083          (void) mutex_lock(&thiscache->mt_cache_lock);
1072 1084  
1073 1085          freeblocks = (uint32_t *)thiscache->mt_freelist;
1074 1086          while (freeblocks < (uint32_t *)thiscache->mt_arena) {
1075 1087                  if (*freeblocks & 0xffffffff)
1076 1088                          break;
1077 1089                  freeblocks++;
1078 1090                  if (freeblocks < (uint32_t *)thiscache->mt_arena &&
1079 1091                      *freeblocks & 0xffffffff)
1080 1092                          break;
1081 1093                  freeblocks++;
1082 1094                  if (freeblocks < (uint32_t *)thiscache->mt_arena &&
1083 1095                      *freeblocks & 0xffffffff)
1084 1096                          break;
1085 1097                  freeblocks++;
1086 1098                  if (freeblocks < (uint32_t *)thiscache->mt_arena &&
1087 1099                      *freeblocks & 0xffffffff)
1088 1100                          break;
1089 1101                  freeblocks++;
1090 1102          }
1091 1103  
1092 1104          /*
1093 1105           * the offset from mt_freelist to freeblocks is the offset into
1094 1106           * the arena. Be sure to include the offset into freeblocks
1095 1107           * of the bitmask. n is the offset.
1096 1108           */
1097 1109          for (i = 0; i < 32; ) {
1098 1110                  if (FLIP_EM(*freeblocks) & (0x80000000 >> i++))
1099 1111                          break;
1100 1112                  if (FLIP_EM(*freeblocks) & (0x80000000 >> i++))
1101 1113                          break;
1102 1114                  if (FLIP_EM(*freeblocks) & (0x80000000 >> i++))
1103 1115                          break;
1104 1116                  if (FLIP_EM(*freeblocks) & (0x80000000 >> i++))
1105 1117                          break;
1106 1118          }
1107 1119          index = 0x80000000 >> --i;
1108 1120  
1109 1121  
1110 1122          *freeblocks &= FLIP_EM(~index);
1111 1123  
1112 1124          thiscache->mt_nfree--;
1113 1125  
1114 1126          (void) mutex_unlock(&thiscache->mt_cache_lock);
1115 1127          (void) mutex_unlock(&cpuptr->mt_parent_lock);
1116 1128  
1117 1129          n = (uintptr_t)(((freeblocks - (uint32_t *)thiscache->mt_freelist) << 5)
1118 1130              + i) * thiscache->mt_size;
1119 1131          /*
1120 1132           * Now you have the offset in n, you've changed the free mask
1121 1133           * in the freelist. Nothing left to do but find the block
1122 1134           * in the arena and put the value of thiscache in the word
1123 1135           * ahead of the handed out address and return the memory
1124 1136           * back to the user.
1125 1137           */
1126 1138          ret = thiscache->mt_arena + n;
1127 1139  
1128 1140          /* Store the cache addr for this buf. Makes free go fast. */
1129 1141          *(uintptr_t *)ret = (uintptr_t)thiscache;
1130 1142  
1131 1143          /*
1132 1144           * This assert makes sure we don't hand out memory that is not
1133 1145           * owned by this cache.
1134 1146           */
1135 1147          assert(ret + thiscache->mt_size <= thiscache->mt_freelist +
1136 1148              thiscache->mt_span);
1137 1149  
1138 1150          ret += OVERHEAD;
1139 1151  
1140 1152          assert(((uintptr_t)ret & 7) == 0); /* are we 8 byte aligned */
1141 1153  
1142 1154          if (reinit == 0 && (debugopt & MTDEBUGPATTERN))
1143 1155                  if (verify_pattern(FREEPATTERN, ret, size))
1144 1156                          abort();        /* reference after free */
1145 1157  
1146 1158          if (debugopt & MTINITBUFFER)
1147 1159                  copy_pattern(INITPATTERN, ret, size);
1148 1160          return ((void *)ret);
1149 1161  }
1150 1162  
1151 1163  static void *
1152 1164  morecore(size_t bytes)
1153 1165  {
1154 1166          void * ret;
1155 1167  
1156 1168          if (bytes > LONG_MAX) {
1157 1169                  intptr_t wad;
1158 1170                  /*
1159 1171                   * The request size is too big. We need to do this in
1160 1172                   * chunks. Sbrk only takes an int for an arg.
1161 1173                   */
1162 1174                  if (bytes == ULONG_MAX)
1163 1175                          return ((void *)-1);
1164 1176  
1165 1177                  ret = sbrk(0);
1166 1178                  wad = LONG_MAX;
1167 1179                  while (wad > 0) {
1168 1180                          if (sbrk(wad) == (void *)-1) {
1169 1181                                  if (ret != sbrk(0))
1170 1182                                          (void) sbrk(-LONG_MAX);
1171 1183                                  return ((void *)-1);
1172 1184                          }
1173 1185                          bytes -= LONG_MAX;
1174 1186                          wad = bytes;
1175 1187                  }
1176 1188          } else
1177 1189                  ret = sbrk(bytes);
1178 1190  
1179 1191          return (ret);
1180 1192  }
1181 1193  
1182 1194  
1183 1195  static void *
1184 1196  oversize(size_t size)
1185 1197  {
1186 1198          caddr_t ret;
1187 1199          oversize_t *big;
1188 1200  
1189 1201          /* make sure we will not overflow */
1190 1202          if (size > MAX_MTMALLOC) {
1191 1203                  errno = ENOMEM;
1192 1204                  return (NULL);
1193 1205          }
1194 1206  
1195 1207          /*
1196 1208           * Since we ensure every address we hand back is
1197 1209           * MTMALLOC_MIN_ALIGN-byte aligned, ALIGNing size ensures that the
1198 1210           * memory handed out is MTMALLOC_MIN_ALIGN-byte aligned at both ends.
1199 1211           * This eases the implementation of MTDEBUGPATTERN and MTINITPATTERN,
1200 1212           * particularly where coalescing occurs.
1201 1213           */
1202 1214          size = ALIGN(size, MTMALLOC_MIN_ALIGN);
1203 1215  
1204 1216          /*
1205 1217           * The idea with the global lock is that we are sure to
1206 1218           * block in the kernel anyway since given an oversize alloc
1207 1219           * we are sure to have to call morecore();
1208 1220           */
1209 1221          (void) mutex_lock(&oversize_lock);
1210 1222  
1211 1223          if ((big = find_oversize(size)) != NULL) {
1212 1224                  if (reinit == 0 && (debugopt & MTDEBUGPATTERN))
1213 1225                          if (verify_pattern(FREEPATTERN, big->addr, size))
1214 1226                                  abort();        /* reference after free */
1215 1227          } else {
1216 1228                  /* Get more 8-byte aligned memory from heap */
1217 1229                  ret = morecore(size + OVSZ_HEADER_SIZE);
1218 1230                  if (ret == (caddr_t)-1) {
1219 1231                          (void) mutex_unlock(&oversize_lock);
1220 1232                          errno = ENOMEM;
1221 1233                          return (NULL);
1222 1234                  }
1223 1235                  big = oversize_header_alloc((uintptr_t)ret, size);
1224 1236          }
1225 1237          ret = big->addr;
1226 1238  
1227 1239          insert_hash(big);
1228 1240  
1229 1241          if (debugopt & MTINITBUFFER)
1230 1242                  copy_pattern(INITPATTERN, ret, size);
1231 1243  
1232 1244          (void) mutex_unlock(&oversize_lock);
1233 1245          assert(((uintptr_t)ret & 7) == 0); /* are we 8 byte aligned */
1234 1246          return ((void *)ret);
1235 1247  }
1236 1248  
1237 1249  static void
1238 1250  insert_oversize(oversize_t *op, oversize_t *nx)
1239 1251  {
1240 1252          oversize_t *sp;
1241 1253  
1242 1254          /* locate correct insertion point in size-ordered list */
1243 1255          for (sp = oversize_list.next_bysize;
1244 1256              sp != &oversize_list && (op->size > sp->size);
1245 1257              sp = sp->next_bysize)
1246 1258                  ;
1247 1259  
1248 1260          /* link into size-ordered list */
1249 1261          op->next_bysize = sp;
1250 1262          op->prev_bysize = sp->prev_bysize;
1251 1263          op->prev_bysize->next_bysize = op;
1252 1264          op->next_bysize->prev_bysize = op;
1253 1265  
1254 1266          /*
1255 1267           * link item into address-ordered list
1256 1268           * (caller provides insertion point as an optimization)
1257 1269           */
1258 1270          op->next_byaddr = nx;
1259 1271          op->prev_byaddr = nx->prev_byaddr;
1260 1272          op->prev_byaddr->next_byaddr = op;
1261 1273          op->next_byaddr->prev_byaddr = op;
1262 1274  
1263 1275  }
1264 1276  
1265 1277  static void
1266 1278  unlink_oversize(oversize_t *lp)
1267 1279  {
1268 1280          /* unlink from address list */
1269 1281          lp->prev_byaddr->next_byaddr = lp->next_byaddr;
1270 1282          lp->next_byaddr->prev_byaddr = lp->prev_byaddr;
1271 1283  
1272 1284          /* unlink from size list */
1273 1285          lp->prev_bysize->next_bysize = lp->next_bysize;
1274 1286          lp->next_bysize->prev_bysize = lp->prev_bysize;
1275 1287  }
1276 1288  
1277 1289  static void
1278 1290  position_oversize_by_size(oversize_t *op)
1279 1291  {
1280 1292          oversize_t *sp;
1281 1293  
1282 1294          if (op->size > op->next_bysize->size ||
1283 1295              op->size < op->prev_bysize->size) {
1284 1296  
1285 1297                  /* unlink from size list */
1286 1298                  op->prev_bysize->next_bysize = op->next_bysize;
1287 1299                  op->next_bysize->prev_bysize = op->prev_bysize;
1288 1300  
1289 1301                  /* locate correct insertion point in size-ordered list */
1290 1302                  for (sp = oversize_list.next_bysize;
1291 1303                      sp != &oversize_list && (op->size > sp->size);
1292 1304                      sp = sp->next_bysize)
1293 1305                          ;
1294 1306  
1295 1307                  /* link into size-ordered list */
1296 1308                  op->next_bysize = sp;
1297 1309                  op->prev_bysize = sp->prev_bysize;
1298 1310                  op->prev_bysize->next_bysize = op;
1299 1311                  op->next_bysize->prev_bysize = op;
1300 1312          }
1301 1313  }
1302 1314  
1303 1315  static void
1304 1316  add_oversize(oversize_t *lp)
1305 1317  {
1306 1318          int merge_flags = INSERT_ONLY;
1307 1319          oversize_t *nx;         /* ptr to item right of insertion point */
1308 1320          oversize_t *pv;         /* ptr to item left of insertion point */
1309 1321          uint_t size_lp, size_pv, size_nx;
1310 1322          uintptr_t endp_lp, endp_pv, endp_nx;
1311 1323  
1312 1324          /*
1313 1325           * Locate insertion point in address-ordered list
1314 1326           */
1315 1327  
1316 1328          for (nx = oversize_list.next_byaddr;
1317 1329              nx != &oversize_list && (lp->addr > nx->addr);
1318 1330              nx = nx->next_byaddr)
1319 1331                  ;
1320 1332  
1321 1333          /*
1322 1334           * Determine how to add chunk to oversize freelist
1323 1335           */
1324 1336  
1325 1337          size_lp = OVSZ_HEADER_SIZE + lp->size;
1326 1338          endp_lp = ALIGN((uintptr_t)lp + size_lp, MTMALLOC_MIN_ALIGN);
1327 1339          size_lp = endp_lp - (uintptr_t)lp;
1328 1340  
1329 1341          pv = nx->prev_byaddr;
1330 1342  
1331 1343          if (pv->size) {
1332 1344  
1333 1345                  size_pv = OVSZ_HEADER_SIZE + pv->size;
1334 1346                  endp_pv = ALIGN((uintptr_t)pv + size_pv,
1335 1347                      MTMALLOC_MIN_ALIGN);
1336 1348                  size_pv = endp_pv - (uintptr_t)pv;
1337 1349  
1338 1350                  /* Check for adjacency with left chunk */
1339 1351                  if ((uintptr_t)lp == endp_pv)
1340 1352                          merge_flags |= COALESCE_LEFT;
1341 1353          }
1342 1354  
1343 1355          if (nx->size) {
1344 1356  
1345 1357                  /* Check for adjacency with right chunk */
1346 1358                  if ((uintptr_t)nx == endp_lp) {
1347 1359                          size_nx = OVSZ_HEADER_SIZE + nx->size;
1348 1360                          endp_nx = ALIGN((uintptr_t)nx + size_nx,
1349 1361                              MTMALLOC_MIN_ALIGN);
1350 1362                          size_nx = endp_nx - (uintptr_t)nx;
1351 1363                          merge_flags |= COALESCE_RIGHT;
1352 1364                  }
1353 1365          }
1354 1366  
1355 1367          /*
1356 1368           * If MTDEBUGPATTERN==1, lp->addr will have been overwritten with
1357 1369           * FREEPATTERN for lp->size bytes. If we can merge, the oversize
1358 1370           * header(s) that will also become part of the memory available for
1359 1371           * reallocation (ie lp and/or nx) must also be overwritten with
1360 1372           * FREEPATTERN or we will SIGABRT when this memory is next reallocated.
1361 1373           */
1362 1374          switch (merge_flags) {
1363 1375  
1364 1376          case INSERT_ONLY:               /* Coalescing not possible */
1365 1377                  insert_oversize(lp, nx);
1366 1378                  break;
1367 1379          case COALESCE_LEFT:
1368 1380                  pv->size += size_lp;
1369 1381                  position_oversize_by_size(pv);
1370 1382                  if (debugopt & MTDEBUGPATTERN)
1371 1383                          copy_pattern(FREEPATTERN, lp, OVSZ_HEADER_SIZE);
1372 1384                  break;
1373 1385          case COALESCE_RIGHT:
1374 1386                  unlink_oversize(nx);
1375 1387                  lp->size += size_nx;
1376 1388                  insert_oversize(lp, pv->next_byaddr);
1377 1389                  if (debugopt & MTDEBUGPATTERN)
1378 1390                          copy_pattern(FREEPATTERN, nx, OVSZ_HEADER_SIZE);
1379 1391                  break;
1380 1392          case COALESCE_WITH_BOTH_SIDES:  /* Merge (with right) to the left */
1381 1393                  pv->size += size_lp + size_nx;
1382 1394                  unlink_oversize(nx);
1383 1395                  position_oversize_by_size(pv);
1384 1396                  if (debugopt & MTDEBUGPATTERN) {
1385 1397                          copy_pattern(FREEPATTERN, lp, OVSZ_HEADER_SIZE);
1386 1398                          copy_pattern(FREEPATTERN, nx, OVSZ_HEADER_SIZE);
1387 1399                  }
1388 1400                  break;
1389 1401          }
1390 1402  }
1391 1403  
1392 1404  /*
1393 1405   * Find memory on our list that is at least size big. If we find a block that is
1394 1406   * big enough, we break it up and return the associated oversize_t struct back
1395 1407   * to the calling client. Any leftover piece of that block is returned to the
1396 1408   * freelist.
1397 1409   */
1398 1410  static oversize_t *
1399 1411  find_oversize(size_t size)
1400 1412  {
1401 1413          oversize_t *wp = oversize_list.next_bysize;
1402 1414          while (wp != &oversize_list && size > wp->size)
1403 1415                  wp = wp->next_bysize;
1404 1416  
1405 1417          if (wp == &oversize_list) /* empty list or nothing big enough */
1406 1418                  return (NULL);
1407 1419          /* breaking up a chunk of memory */
1408 1420          if ((long)((wp->size - (size + OVSZ_HEADER_SIZE + MTMALLOC_MIN_ALIGN)))
1409 1421              > MAX_CACHED) {
1410 1422                  caddr_t off;
1411 1423                  oversize_t *np;
1412 1424                  size_t osize;
1413 1425                  off = (caddr_t)ALIGN(wp->addr + size,
1414 1426                      MTMALLOC_MIN_ALIGN);
1415 1427                  osize = wp->size;
1416 1428                  wp->size = (size_t)(off - wp->addr);
1417 1429                  np = oversize_header_alloc((uintptr_t)off,
1418 1430                      osize - (wp->size + OVSZ_HEADER_SIZE));
1419 1431                  if ((long)np->size < 0)
1420 1432                          abort();
1421 1433                  unlink_oversize(wp);
1422 1434                  add_oversize(np);
1423 1435          } else {
1424 1436                  unlink_oversize(wp);
1425 1437          }
1426 1438          return (wp);
1427 1439  }
1428 1440  
1429 1441  static void
1430 1442  copy_pattern(uint32_t pattern, void *buf_arg, size_t size)
1431 1443  {
1432 1444          uint32_t *bufend = (uint32_t *)((char *)buf_arg + size);
1433 1445          uint32_t *buf = buf_arg;
1434 1446  
1435 1447          while (buf < bufend - 3) {
1436 1448                  buf[3] = buf[2] = buf[1] = buf[0] = pattern;
1437 1449                  buf += 4;
1438 1450          }
1439 1451          while (buf < bufend)
1440 1452                  *buf++ = pattern;
1441 1453  }
1442 1454  
1443 1455  static void *
1444 1456  verify_pattern(uint32_t pattern, void *buf_arg, size_t size)
1445 1457  {
1446 1458          uint32_t *bufend = (uint32_t *)((char *)buf_arg + size);
1447 1459          uint32_t *buf;
1448 1460  
1449 1461          for (buf = buf_arg; buf < bufend; buf++)
1450 1462                  if (*buf != pattern)
1451 1463                          return (buf);
1452 1464          return (NULL);
1453 1465  }
1454 1466  
1455 1467  static void
1456 1468  free_oversize(oversize_t *ovp)
1457 1469  {
1458 1470          assert(((uintptr_t)ovp->addr & 7) == 0); /* are we 8 byte aligned */
1459 1471          assert(ovp->size > MAX_CACHED);
1460 1472  
1461 1473          ovp->next_bysize = ovp->prev_bysize = NULL;
1462 1474          ovp->next_byaddr = ovp->prev_byaddr = NULL;
1463 1475          (void) mutex_lock(&oversize_lock);
1464 1476          add_oversize(ovp);
1465 1477          (void) mutex_unlock(&oversize_lock);
1466 1478  }
1467 1479  
1468 1480  static oversize_t *
1469 1481  oversize_header_alloc(uintptr_t mem, size_t size)
1470 1482  {
1471 1483          oversize_t *ovsz_hdr;
1472 1484  
1473 1485          assert(size > MAX_CACHED);
1474 1486  
1475 1487          ovsz_hdr = (oversize_t *)mem;
1476 1488          ovsz_hdr->prev_bysize = NULL;
1477 1489          ovsz_hdr->next_bysize = NULL;
1478 1490          ovsz_hdr->prev_byaddr = NULL;
1479 1491          ovsz_hdr->next_byaddr = NULL;
1480 1492          ovsz_hdr->hash_next = NULL;
1481 1493          ovsz_hdr->size = size;
1482 1494          mem += OVSZ_SIZE;
1483 1495          *(uintptr_t *)mem = MTMALLOC_OVERSIZE_MAGIC;
1484 1496          mem += OVERHEAD;
1485 1497          assert(((uintptr_t)mem & 7) == 0); /* are we 8 byte aligned */
1486 1498          ovsz_hdr->addr = (caddr_t)mem;
1487 1499          return (ovsz_hdr);
1488 1500  }
1489 1501  
1490 1502  static void
1491 1503  malloc_prepare()
1492 1504  {
1493 1505          percpu_t *cpuptr;
1494 1506          cache_head_t *cachehead;
1495 1507          cache_t *thiscache;
1496 1508  
1497 1509          (void) mutex_lock(&oversize_lock);
1498 1510          for (cpuptr = &cpu_list[0]; cpuptr < &cpu_list[ncpus]; cpuptr++) {
1499 1511                  (void) mutex_lock(&cpuptr->mt_parent_lock);
1500 1512                  for (cachehead = &cpuptr->mt_caches[0];
1501 1513                      cachehead < &cpuptr->mt_caches[NUM_CACHES];
1502 1514                      cachehead++) {
1503 1515                          for (thiscache = cachehead->mt_cache;
1504 1516                              thiscache != NULL;
1505 1517                              thiscache = thiscache->mt_next) {
1506 1518                                  (void) mutex_lock(
1507 1519                                      &thiscache->mt_cache_lock);
1508 1520                          }
1509 1521                  }
1510 1522          }
1511 1523  }
1512 1524  
1513 1525  static void
1514 1526  malloc_release()
1515 1527  {
1516 1528          percpu_t *cpuptr;
1517 1529          cache_head_t *cachehead;
1518 1530          cache_t *thiscache;
1519 1531  
1520 1532          for (cpuptr = &cpu_list[ncpus - 1]; cpuptr >= &cpu_list[0]; cpuptr--) {
1521 1533                  for (cachehead = &cpuptr->mt_caches[NUM_CACHES - 1];
1522 1534                      cachehead >= &cpuptr->mt_caches[0];
1523 1535                      cachehead--) {
1524 1536                          for (thiscache = cachehead->mt_cache;
1525 1537                              thiscache != NULL;
1526 1538                              thiscache = thiscache->mt_next) {
1527 1539                                  (void) mutex_unlock(
1528 1540                                      &thiscache->mt_cache_lock);
1529 1541                          }
1530 1542                  }
1531 1543                  (void) mutex_unlock(&cpuptr->mt_parent_lock);
1532 1544          }
1533 1545          (void) mutex_unlock(&oversize_lock);
1534 1546  }
1535 1547  
1536 1548  #pragma init(malloc_init)
1537 1549  static void
1538 1550  malloc_init(void)
1539 1551  {
1540 1552          /*
1541 1553           * This works in the init section for this library
1542 1554           * because setup_caches() doesn't call anything in libc
1543 1555           * that calls malloc().  If it did, disaster would ensue.
1544 1556           *
1545 1557           * For this to work properly, this library must be the first
1546 1558           * one to have its init section called (after libc) by the
1547 1559           * dynamic linker.  If some other library's init section
1548 1560           * ran first and called malloc(), disaster would ensue.
1549 1561           * Because this is an interposer library for malloc(), the
1550 1562           * dynamic linker arranges for its init section to run first.
1551 1563           */
1552 1564          (void) setup_caches();
1553 1565  
1554 1566          (void) pthread_atfork(malloc_prepare, malloc_release, malloc_release);
1555 1567  }

↓ open down ↓

1211 lines elided

↑ open up ↑

XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX