1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  24  * Use is subject to license terms.
  25  */
  26 
  27 #include <mtmalloc.h>
  28 #include "mtmalloc_impl.h"
  29 #include <unistd.h>
  30 #include <synch.h>
  31 #include <thread.h>
  32 #include <pthread.h>
  33 #include <stdio.h>
  34 #include <limits.h>
  35 #include <errno.h>
  36 #include <string.h>
  37 #include <strings.h>
  38 #include <sys/param.h>
  39 #include <sys/sysmacros.h>
  40 
  41 /*
  42  * To turn on the asserts just compile -DDEBUG
  43  */
  44 
  45 #ifndef DEBUG
  46 #define NDEBUG
  47 #endif
  48 
  49 #include <assert.h>
  50 
  51 /*
  52  * The MT hot malloc implementation contained herein is designed to be
  53  * plug-compatible with the libc version of malloc. It is not intended
  54  * to replace that implementation until we decide that it is ok to break
  55  * customer apps (Solaris 3.0).
  56  *
  57  * For requests up to 2^^16, the allocator initializes itself into NCPUS
  58  * worth of chains of caches. When a memory request is made, the calling thread
  59  * is vectored into one of NCPUS worth of caches.  The LWP id gives us a cheap,
  60  * contention-reducing index to use, eventually, this should be replaced with
  61  * the actual CPU sequence number, when an interface to get it is available.
  62  *
  63  * Once the thread is vectored into one of the list of caches the real
  64  * allocation of the memory begins. The size is determined to figure out which
  65  * bucket the allocation should be satisfied from. The management of free
  66  * buckets is done via a bitmask. A free bucket is represented by a 1. The
  67  * first free bit represents the first free bucket. The position of the bit,
  68  * represents the position of the bucket in the arena.
  69  *
  70  * When the memory from the arena is handed out, the address of the cache
  71  * control structure is written in the word preceeding the returned memory.
  72  * This cache control address is used during free() to mark the buffer free
  73  * in the cache control structure.
  74  *
  75  * When all available memory in a cache has been depleted, a new chunk of memory
  76  * is allocated via sbrk(). The new cache is allocated from this chunk of memory
  77  * and initialized in the function create_cache(). New caches are installed at
  78  * the front of a singly linked list of the same size memory pools. This helps
  79  * to ensure that there will tend to be available memory in the beginning of the
  80  * list.
  81  *
  82  * Long linked lists hurt performance. To decrease this effect, there is a
  83  * tunable, requestsize, that bumps up the sbrk allocation size and thus
  84  * increases the number of available blocks within an arena.  We also keep
  85  * a "hint" for each cache list, which is the last cache in the list allocated
  86  * from.  This lowers the cost of searching if there are a lot of fully
  87  * allocated blocks at the front of the list.
  88  *
  89  * For requests greater than 2^^16 (oversize allocations), there are two pieces
  90  * of overhead. There is the OVERHEAD used to hold the cache addr
  91  * (&oversize_list), plus an oversize_t structure to further describe the block.
  92  *
  93  * The oversize list is kept as defragmented as possible by coalescing
  94  * freed oversized allocations with adjacent neighbors.
  95  *
  96  * Addresses handed out are stored in a hash table, and are aligned on
  97  * MTMALLOC_MIN_ALIGN-byte boundaries at both ends. Request sizes are rounded-up
  98  * where necessary in order to achieve this. This eases the implementation of
  99  * MTDEBUGPATTERN and MTINITPATTERN, particularly where coalescing occurs.
 100  *
 101  * A memalign allocation takes memalign header overhead.  There's two
 102  * types of memalign headers distinguished by MTMALLOC_MEMALIGN_MAGIC
 103  * and MTMALLOC_MEMALIGN_MIN_MAGIC.  When the size of memory taken to
 104  * get to the aligned address from malloc'ed address is the minimum size
 105  * OVERHEAD, we create a header taking only one OVERHEAD space with magic
 106  * number MTMALLOC_MEMALIGN_MIN_MAGIC, and we know by subtracting OVERHEAD
 107  * from memaligned address, we can get to the malloc'ed address. Otherwise,
 108  * we create a memalign header taking two OVERHEAD space, one stores
 109  * MTMALLOC_MEMALIGN_MAGIC magic number, the other one points back to the
 110  * malloc'ed address.
 111  */
 112 
 113 #if defined(__i386) || defined(__amd64)
 114 #include <arpa/inet.h>    /* for htonl() */
 115 #endif
 116 
 117 static void * morecore(size_t);
 118 static void create_cache(cache_t *, size_t bufsize, uint_t hunks);
 119 static void * malloc_internal(size_t, percpu_t *);
 120 static void * oversize(size_t);
 121 static oversize_t *find_oversize(size_t);
 122 static void add_oversize(oversize_t *);
 123 static void copy_pattern(uint32_t, void *, size_t);
 124 static void * verify_pattern(uint32_t, void *, size_t);
 125 static void reinit_cpu_list(void);
 126 static void reinit_cache(cache_t *);
 127 static void free_oversize(oversize_t *);
 128 static oversize_t *oversize_header_alloc(uintptr_t, size_t);
 129 
 130 /*
 131  * oversize hash table stuff
 132  */
 133 #define NUM_BUCKETS     67      /* must be prime */
 134 #define HASH_OVERSIZE(caddr)    ((uintptr_t)(caddr) % NUM_BUCKETS)
 135 oversize_t *ovsz_hashtab[NUM_BUCKETS];
 136 
 137 #define ALIGN(x, a)     ((((uintptr_t)(x) + ((uintptr_t)(a) - 1)) \
 138                         & ~((uintptr_t)(a) - 1)))
 139 
 140 /* need this to deal with little endianess of x86 */
 141 #if defined(__i386) || defined(__amd64)
 142 #define FLIP_EM(x)      htonl((x))
 143 #else
 144 #define FLIP_EM(x)      (x)
 145 #endif
 146 
 147 #define INSERT_ONLY                     0
 148 #define COALESCE_LEFT                   0x00000001
 149 #define COALESCE_RIGHT                  0x00000002
 150 #define COALESCE_WITH_BOTH_SIDES        (COALESCE_LEFT | COALESCE_RIGHT)
 151 
 152 #define OVERHEAD        8       /* size needed to write cache addr */
 153 #define HUNKSIZE        8192    /* just a multiplier */
 154 
 155 #define MAX_CACHED_SHIFT        16      /* 64K is the max cached size */
 156 #define MAX_CACHED              (1 << MAX_CACHED_SHIFT)
 157 #define MIN_CACHED_SHIFT        4       /* smaller requests rounded up */
 158 #define MTMALLOC_MIN_ALIGN      8       /* min guaranteed alignment */
 159 
 160 /* maximum size before overflow */
 161 #define MAX_MTMALLOC    (SIZE_MAX - (SIZE_MAX % MTMALLOC_MIN_ALIGN) \
 162                         - OVSZ_HEADER_SIZE)
 163 
 164 #define NUM_CACHES      (MAX_CACHED_SHIFT - MIN_CACHED_SHIFT + 1)
 165 #define CACHELIST_SIZE  ALIGN(NUM_CACHES * sizeof (cache_head_t), \
 166     CACHE_COHERENCY_UNIT)
 167 
 168 #define MINSIZE         9       /* for requestsize, tunable */
 169 #define MAXSIZE         256     /* arbitrary, big enough, for requestsize */
 170 
 171 #define FREEPATTERN     0xdeadbeef /* debug fill pattern for free buf */
 172 #define INITPATTERN     0xbaddcafe /* debug fill pattern for new buf */
 173 
 174 #define misaligned(p)   ((unsigned)(p) & (sizeof (int) - 1))
 175 #define IS_OVERSIZE(x, y)       (((x) < (y)) && (((x) > MAX_CACHED)? 1 : 0))
 176 
 177 static long requestsize = MINSIZE; /* 9 pages per cache; tunable; 9 is min */
 178 
 179 static uint_t cpu_mask;
 180 static curcpu_func curcpu;
 181 
 182 static int32_t debugopt;
 183 static int32_t reinit;
 184 
 185 static percpu_t *cpu_list;
 186 static oversize_t oversize_list;
 187 static mutex_t oversize_lock = DEFAULTMUTEX;
 188 
 189 static int ncpus = 0;
 190 
 191 #define MTMALLOC_OVERSIZE_MAGIC         ((uintptr_t)&oversize_list)
 192 #define MTMALLOC_MEMALIGN_MAGIC         ((uintptr_t)&oversize_list + 1)
 193 #define MTMALLOC_MEMALIGN_MIN_MAGIC     ((uintptr_t)&oversize_list + 2)
 194 
 195 /*
 196  * We require allocations handed out to be aligned on MTMALLOC_MIN_ALIGN-byte
 197  * boundaries. We round up sizeof (oversize_t) (when necessary) to ensure that
 198  * this is achieved.
 199  */
 200 #define OVSZ_SIZE               (ALIGN(sizeof (oversize_t), MTMALLOC_MIN_ALIGN))
 201 #define OVSZ_HEADER_SIZE        (OVSZ_SIZE + OVERHEAD)
 202 
 203 /*
 204  * memalign header takes 2 OVERHEAD space.  One for memalign magic, and the
 205  * other one points back to the start address of originally allocated space.
 206  */
 207 #define MEMALIGN_HEADER_SIZE    2 * OVERHEAD
 208 #define MEMALIGN_HEADER_ALLOC(x, shift, malloc_addr)\
 209         if (shift == OVERHEAD)\
 210                 *((uintptr_t *)((caddr_t)x - OVERHEAD)) = \
 211                         MTMALLOC_MEMALIGN_MIN_MAGIC; \
 212         else {\
 213                 *((uintptr_t *)((caddr_t)x - OVERHEAD)) = \
 214                         MTMALLOC_MEMALIGN_MAGIC; \
 215                 *((uintptr_t *)((caddr_t)x - 2 * OVERHEAD)) = \
 216                         (uintptr_t)malloc_addr; \
 217         }
 218 
 219 /*
 220  * Add big to the oversize hash table at the head of the relevant bucket.
 221  */
 222 static void
 223 insert_hash(oversize_t *big)
 224 {
 225         caddr_t ret = big->addr;
 226         int bucket = HASH_OVERSIZE(ret);
 227 
 228         assert(MUTEX_HELD(&oversize_lock));
 229         big->hash_next = ovsz_hashtab[bucket];
 230         ovsz_hashtab[bucket] = big;
 231 }
 232 
 233 void *
 234 malloc(size_t bytes)
 235 {
 236         percpu_t *list_rotor;
 237         uint_t  list_index;
 238 
 239         if (bytes > MAX_CACHED)
 240                 return (oversize(bytes));
 241 
 242         list_index = (curcpu() & cpu_mask);
 243 
 244         list_rotor = &cpu_list[list_index];
 245 
 246         return (malloc_internal(bytes, list_rotor));
 247 }
 248 
 249 void *
 250 realloc(void * ptr, size_t bytes)
 251 {
 252         void *new, *data_ptr;
 253         cache_t *cacheptr;
 254         caddr_t mem;
 255         size_t shift = 0;
 256 
 257         if (ptr == NULL)
 258                 return (malloc(bytes));
 259 
 260         if (bytes == 0) {
 261                 free(ptr);
 262                 return (NULL);
 263         }
 264 
 265         data_ptr = ptr;
 266         mem = (caddr_t)ptr - OVERHEAD;
 267 
 268         /*
 269          * Optimization possibility :
 270          *      p = malloc(64);
 271          *      q = realloc(p, 64);
 272          * q can be same as p.
 273          * Apply this optimization for the normal
 274          * sized caches for now.
 275          */
 276         if (*(uintptr_t *)mem < MTMALLOC_OVERSIZE_MAGIC ||
 277             *(uintptr_t *)mem > MTMALLOC_MEMALIGN_MIN_MAGIC) {
 278                 cacheptr = (cache_t *)*(uintptr_t *)mem;
 279                 if (bytes <= (cacheptr->mt_size - OVERHEAD))
 280                         return (ptr);
 281         }
 282 
 283         new = malloc(bytes);
 284 
 285         if (new == NULL)
 286                 return (NULL);
 287 
 288         /*
 289          * If new == ptr, ptr has previously been freed. Passing a freed pointer
 290          * to realloc() is not allowed - unless the caller specifically states
 291          * otherwise, in which case we must avoid freeing ptr (ie new) before we
 292          * return new. There is (obviously) no requirement to memcpy() ptr to
 293          * new before we return.
 294          */
 295         if (new == ptr) {
 296                 if (!(debugopt & MTDOUBLEFREE))
 297                         abort();
 298                 return (new);
 299         }
 300 
 301         if (*(uintptr_t *)mem == MTMALLOC_MEMALIGN_MAGIC) {
 302                 mem -= OVERHEAD;
 303                 ptr = (void *)*(uintptr_t *)mem;
 304                 mem = (caddr_t)ptr - OVERHEAD;
 305                 shift = (size_t)((uintptr_t)data_ptr - (uintptr_t)ptr);
 306         } else if (*(uintptr_t *)mem == MTMALLOC_MEMALIGN_MIN_MAGIC) {
 307                 ptr = (void *) mem;
 308                 mem -= OVERHEAD;
 309                 shift = OVERHEAD;
 310         }
 311 
 312         if (*(uintptr_t *)mem == MTMALLOC_OVERSIZE_MAGIC) {
 313                 oversize_t *old;
 314 
 315                 old = (oversize_t *)(mem - OVSZ_SIZE);
 316                 (void) memcpy(new, data_ptr, MIN(bytes, old->size - shift));
 317                 free(ptr);
 318                 return (new);
 319         }
 320 
 321         cacheptr = (cache_t *)*(uintptr_t *)mem;
 322 
 323         (void) memcpy(new, data_ptr,
 324             MIN(cacheptr->mt_size - OVERHEAD - shift, bytes));
 325         free(ptr);
 326 
 327         return (new);
 328 }
 329 
 330 void *
 331 calloc(size_t nelem, size_t bytes)
 332 {
 333         void * ptr;
 334         size_t size = nelem * bytes;
 335 
 336         ptr = malloc(size);
 337         if (ptr == NULL)
 338                 return (NULL);
 339         (void) memset(ptr, 0, size);
 340 
 341         return (ptr);
 342 }
 343 
 344 void
 345 free(void * ptr)
 346 {
 347         cache_t *cacheptr;
 348         caddr_t mem;
 349         int32_t i;
 350         caddr_t freeblocks;
 351         uintptr_t offset;
 352         uchar_t mask;
 353         int32_t which_bit, num_bytes;
 354 
 355         if (ptr == NULL)
 356                 return;
 357 
 358         mem = (caddr_t)ptr - OVERHEAD;
 359 
 360         if (*(uintptr_t *)mem == MTMALLOC_MEMALIGN_MAGIC) {
 361                 mem -= OVERHEAD;
 362                 ptr = (void *)*(uintptr_t *)mem;
 363                 mem = (caddr_t)ptr - OVERHEAD;
 364         } else if (*(uintptr_t *)mem == MTMALLOC_MEMALIGN_MIN_MAGIC) {
 365                 ptr = (void *) mem;
 366                 mem -= OVERHEAD;
 367         }
 368 
 369         if (*(uintptr_t *)mem == MTMALLOC_OVERSIZE_MAGIC) {
 370                 oversize_t *big, **opp;
 371                 int bucket;
 372 
 373                 big = (oversize_t *)(mem - OVSZ_SIZE);
 374                 (void) mutex_lock(&oversize_lock);
 375 
 376                 bucket = HASH_OVERSIZE(big->addr);
 377                 for (opp = &ovsz_hashtab[bucket]; *opp != NULL;
 378                     opp = &(*opp)->hash_next)
 379                         if (*opp == big)
 380                                 break;
 381 
 382                 if (*opp == NULL) {
 383                         if (!(debugopt & MTDOUBLEFREE))
 384                                 abort();
 385                         (void) mutex_unlock(&oversize_lock);
 386                         return;
 387                 }
 388 
 389                 *opp = big->hash_next;       /* remove big from the hash table */
 390                 big->hash_next = NULL;
 391 
 392                 if (debugopt & MTDEBUGPATTERN)
 393                         copy_pattern(FREEPATTERN, ptr, big->size);
 394                 add_oversize(big);
 395                 (void) mutex_unlock(&oversize_lock);
 396                 return;
 397         }
 398 
 399         cacheptr = (cache_t *)*(uintptr_t *)mem;
 400         freeblocks = cacheptr->mt_freelist;
 401 
 402         /*
 403          * This is the distance measured in bits into the arena.
 404          * The value of offset is in bytes but there is a 1-1 correlation
 405          * between distance into the arena and distance into the
 406          * freelist bitmask.
 407          */
 408         offset = mem - cacheptr->mt_arena;
 409 
 410         /*
 411          * i is total number of bits to offset into freelist bitmask.
 412          */
 413 
 414         i = offset / cacheptr->mt_size;
 415 
 416         num_bytes = i >> 3;
 417 
 418         /*
 419          * which_bit is the bit offset into the byte in the freelist.
 420          * if our freelist bitmask looks like 0xf3 and we are freeing
 421          * block 5 (ie: the 6th block) our mask will be 0xf7 after
 422          * the free. Things go left to right that's why the mask is 0x80
 423          * and not 0x01.
 424          */
 425         which_bit = i - (num_bytes << 3);
 426 
 427         mask = 0x80 >> which_bit;
 428 
 429         freeblocks += num_bytes;
 430 
 431         if (debugopt & MTDEBUGPATTERN)
 432                 copy_pattern(FREEPATTERN, ptr, cacheptr->mt_size - OVERHEAD);
 433 
 434         (void) mutex_lock(&cacheptr->mt_cache_lock);
 435 
 436         if (*freeblocks & mask) {
 437                 if (!(debugopt & MTDOUBLEFREE))
 438                         abort();
 439         } else {
 440                 *freeblocks |= mask;
 441                 cacheptr->mt_nfree++;
 442         }
 443 
 444         (void) mutex_unlock(&cacheptr->mt_cache_lock);
 445 }
 446 
 447 void *
 448 memalign(size_t alignment, size_t size)
 449 {
 450         size_t alloc_size;
 451         uintptr_t offset;
 452         void *alloc_buf;
 453         void *ret_buf;
 454 
 455         if (size == 0 || alignment == 0 || misaligned(alignment) ||
 456             (alignment & (alignment - 1)) != 0) {
 457                 errno = EINVAL;
 458                 return (NULL);
 459         }
 460 
 461         /* <= MTMALLOC_MIN_ALIGN, malloc can provide directly */
 462         if (alignment <= MTMALLOC_MIN_ALIGN)
 463                 return (malloc(size));
 464 
 465         alloc_size = size + alignment - MTMALLOC_MIN_ALIGN;
 466 
 467         if (alloc_size < size) { /* overflow */
 468                 errno = ENOMEM;
 469                 return (NULL);
 470         }
 471 
 472         alloc_buf = malloc(alloc_size);
 473 
 474         if (alloc_buf == NULL)
 475                 /* malloc sets errno */
 476                 return (NULL);
 477 
 478         /*
 479          * If alloc_size > MAX_CACHED, malloc() will have returned a multiple of
 480          * MTMALLOC_MIN_ALIGN, having rounded-up alloc_size if necessary. Since
 481          * we will use alloc_size to return the excess fragments to the free
 482          * list, we also round-up alloc_size if necessary.
 483          */
 484         if ((alloc_size > MAX_CACHED) &&
 485             (alloc_size & (MTMALLOC_MIN_ALIGN - 1)))
 486                 alloc_size = ALIGN(alloc_size, MTMALLOC_MIN_ALIGN);
 487 
 488         if ((offset = (uintptr_t)alloc_buf & (alignment - 1)) == 0) {
 489                 /* aligned correctly */
 490 
 491                 size_t frag_size = alloc_size -
 492                     (size + MTMALLOC_MIN_ALIGN + OVSZ_HEADER_SIZE);
 493 
 494                 /*
 495                  * If the leftover piece of the memory > MAX_CACHED,
 496                  * split off the piece and return it back to the freelist.
 497                  */
 498                 if (IS_OVERSIZE(frag_size, alloc_size)) {
 499                         oversize_t *orig, *tail;
 500                         uintptr_t taddr;
 501                         size_t data_size;
 502                         taddr = ALIGN((uintptr_t)alloc_buf + size,
 503                             MTMALLOC_MIN_ALIGN);
 504                         data_size = taddr - (uintptr_t)alloc_buf;
 505                         orig = (oversize_t *)((uintptr_t)alloc_buf -
 506                             OVSZ_HEADER_SIZE);
 507                         frag_size = orig->size - data_size -
 508                             OVSZ_HEADER_SIZE;
 509                         orig->size = data_size;
 510                         tail = oversize_header_alloc(taddr, frag_size);
 511                         free_oversize(tail);
 512                 }
 513                 ret_buf = alloc_buf;
 514         } else {
 515                 uchar_t oversize_bits = 0;
 516                 size_t  head_sz, data_sz, tail_sz;
 517                 uintptr_t ret_addr, taddr, shift, tshift;
 518                 oversize_t *orig, *tail, *big;
 519                 size_t tsize;
 520 
 521                 /* needs to be aligned */
 522                 shift = alignment - offset;
 523 
 524                 assert(shift >= MTMALLOC_MIN_ALIGN);
 525 
 526                 ret_addr = ((uintptr_t)alloc_buf + shift);
 527                 ret_buf = (void *)ret_addr;
 528 
 529                 if (alloc_size <= MAX_CACHED) {
 530                         MEMALIGN_HEADER_ALLOC(ret_addr, shift, alloc_buf);
 531                         return (ret_buf);
 532                 }
 533 
 534                 /*
 535                  * Only check for the fragments when the memory is allocted
 536                  * from oversize_list.  Split off a fragment and return it
 537                  * to the oversize freelist when it's > MAX_CACHED.
 538                  */
 539 
 540                 head_sz = shift - MAX(MEMALIGN_HEADER_SIZE, OVSZ_HEADER_SIZE);
 541 
 542                 tail_sz = alloc_size -
 543                     (shift + size + MTMALLOC_MIN_ALIGN + OVSZ_HEADER_SIZE);
 544 
 545                 oversize_bits |= IS_OVERSIZE(head_sz, alloc_size) |
 546                     IS_OVERSIZE(size, alloc_size) << DATA_SHIFT |
 547                     IS_OVERSIZE(tail_sz, alloc_size) << TAIL_SHIFT;
 548 
 549                 switch (oversize_bits) {
 550                         case NONE_OVERSIZE:
 551                         case DATA_OVERSIZE:
 552                                 MEMALIGN_HEADER_ALLOC(ret_addr, shift,
 553                                     alloc_buf);
 554                                 break;
 555                         case HEAD_OVERSIZE:
 556                                 /*
 557                                  * If we can extend data > MAX_CACHED and have
 558                                  * head still > MAX_CACHED, we split head-end
 559                                  * as the case of head-end and data oversized,
 560                                  * otherwise just create memalign header.
 561                                  */
 562                                 tsize = (shift + size) - (MAX_CACHED + 8 +
 563                                     MTMALLOC_MIN_ALIGN + OVSZ_HEADER_SIZE);
 564 
 565                                 if (!IS_OVERSIZE(tsize, alloc_size)) {
 566                                         MEMALIGN_HEADER_ALLOC(ret_addr, shift,
 567                                             alloc_buf);
 568                                         break;
 569                                 } else {
 570                                         tsize += OVSZ_HEADER_SIZE;
 571                                         taddr = ALIGN((uintptr_t)alloc_buf +
 572                                             tsize, MTMALLOC_MIN_ALIGN);
 573                                         tshift = ret_addr - taddr;
 574                                         MEMALIGN_HEADER_ALLOC(ret_addr, tshift,
 575                                             taddr);
 576                                         ret_addr = taddr;
 577                                         shift = ret_addr - (uintptr_t)alloc_buf;
 578                                 }
 579                                 /* FALLTHROUGH */
 580                         case HEAD_AND_DATA_OVERSIZE:
 581                                 /*
 582                                  * Split off the head fragment and
 583                                  * return it back to oversize freelist.
 584                                  * Create oversize header for the piece
 585                                  * of (data + tail fragment).
 586                                  */
 587                                 orig = (oversize_t *)((uintptr_t)alloc_buf -
 588                                     OVSZ_HEADER_SIZE);
 589                                 big = oversize_header_alloc(ret_addr -
 590                                     OVSZ_HEADER_SIZE, (orig->size - shift));
 591                                 (void) mutex_lock(&oversize_lock);
 592                                 insert_hash(big);
 593                                 (void) mutex_unlock(&oversize_lock);
 594                                 orig->size = shift - OVSZ_HEADER_SIZE;
 595 
 596                                 /* free up the head fragment */
 597                                 free_oversize(orig);
 598                                 break;
 599                         case TAIL_OVERSIZE:
 600                                 /*
 601                                  * If we can extend data > MAX_CACHED and have
 602                                  * tail-end still > MAX_CACHED, we split tail
 603                                  * end, otherwise just create memalign header.
 604                                  */
 605                                 orig = (oversize_t *)((uintptr_t)alloc_buf -
 606                                     OVSZ_HEADER_SIZE);
 607                                 tsize =  orig->size - (MAX_CACHED + 8 +
 608                                     shift + OVSZ_HEADER_SIZE +
 609                                     MTMALLOC_MIN_ALIGN);
 610                                 if (!IS_OVERSIZE(tsize, alloc_size)) {
 611                                         MEMALIGN_HEADER_ALLOC(ret_addr, shift,
 612                                             alloc_buf);
 613                                         break;
 614                                 } else {
 615                                         size = MAX_CACHED + 8;
 616                                 }
 617                                 /* FALLTHROUGH */
 618                         case DATA_AND_TAIL_OVERSIZE:
 619                                 /*
 620                                  * Split off the tail fragment and
 621                                  * return it back to oversize freelist.
 622                                  * Create memalign header and adjust
 623                                  * the size for the piece of
 624                                  * (head fragment + data).
 625                                  */
 626                                 taddr = ALIGN(ret_addr + size,
 627                                     MTMALLOC_MIN_ALIGN);
 628                                 data_sz = (size_t)(taddr -
 629                                     (uintptr_t)alloc_buf);
 630                                 orig = (oversize_t *)((uintptr_t)alloc_buf -
 631                                     OVSZ_HEADER_SIZE);
 632                                 tsize = orig->size - data_sz;
 633                                 orig->size = data_sz;
 634                                 MEMALIGN_HEADER_ALLOC(ret_buf, shift,
 635                                     alloc_buf);
 636                                 tsize -= OVSZ_HEADER_SIZE;
 637                                 tail = oversize_header_alloc(taddr,  tsize);
 638                                 free_oversize(tail);
 639                                 break;
 640                         case HEAD_AND_TAIL_OVERSIZE:
 641                                 /*
 642                                  * Split off the head fragment.
 643                                  * We try to free up tail-end when we can
 644                                  * extend data size to (MAX_CACHED + 8)
 645                                  * and remain tail-end oversized.
 646                                  * The bottom line is all split pieces
 647                                  * should be oversize in size.
 648                                  */
 649                                 orig = (oversize_t *)((uintptr_t)alloc_buf -
 650                                     OVSZ_HEADER_SIZE);
 651                                 tsize =  orig->size - (MAX_CACHED + 8 +
 652                                     OVSZ_HEADER_SIZE + shift +
 653                                     MTMALLOC_MIN_ALIGN);
 654 
 655                                 if (!IS_OVERSIZE(tsize, alloc_size)) {
 656                                         /*
 657                                          * If the chunk is not big enough
 658                                          * to make both data and tail oversize
 659                                          * we just keep them as one piece.
 660                                          */
 661                                         big = oversize_header_alloc(ret_addr -
 662                                             OVSZ_HEADER_SIZE,
 663                                             orig->size - shift);
 664                                         (void) mutex_lock(&oversize_lock);
 665                                         insert_hash(big);
 666                                         (void) mutex_unlock(&oversize_lock);
 667                                         orig->size = shift - OVSZ_HEADER_SIZE;
 668                                         free_oversize(orig);
 669                                         break;
 670                                 } else {
 671                                         /*
 672                                          * extend data size > MAX_CACHED
 673                                          * and handle it as head, data, tail
 674                                          * are all oversized.
 675                                          */
 676                                         size = MAX_CACHED + 8;
 677                                 }
 678                                 /* FALLTHROUGH */
 679                         case ALL_OVERSIZE:
 680                                 /*
 681                                  * split off the head and tail fragments,
 682                                  * return them back to the oversize freelist.
 683                                  * Alloc oversize header for data seg.
 684                                  */
 685                                 orig = (oversize_t *)((uintptr_t)alloc_buf -
 686                                     OVSZ_HEADER_SIZE);
 687                                 tsize = orig->size;
 688                                 orig->size = shift - OVSZ_HEADER_SIZE;
 689                                 free_oversize(orig);
 690 
 691                                 taddr = ALIGN(ret_addr + size,
 692                                     MTMALLOC_MIN_ALIGN);
 693                                 data_sz = taddr - ret_addr;
 694                                 assert(tsize > (shift + data_sz +
 695                                     OVSZ_HEADER_SIZE));
 696                                 tail_sz = tsize -
 697                                     (shift + data_sz + OVSZ_HEADER_SIZE);
 698 
 699                                 /* create oversize header for data seg */
 700                                 big = oversize_header_alloc(ret_addr -
 701                                     OVSZ_HEADER_SIZE, data_sz);
 702                                 (void) mutex_lock(&oversize_lock);
 703                                 insert_hash(big);
 704                                 (void) mutex_unlock(&oversize_lock);
 705 
 706                                 /* create oversize header for tail fragment */
 707                                 tail = oversize_header_alloc(taddr, tail_sz);
 708                                 free_oversize(tail);
 709                                 break;
 710                         default:
 711                                 /* should not reach here */
 712                                 assert(0);
 713                 }
 714         }
 715         return (ret_buf);
 716 }
 717 
 718 
 719 void *
 720 valloc(size_t size)
 721 {
 722         static unsigned pagesize;
 723 
 724         if (size == 0)
 725                 return (NULL);
 726 
 727         if (!pagesize)
 728                 pagesize = sysconf(_SC_PAGESIZE);
 729 
 730         return (memalign(pagesize, size));
 731 }
 732 
 733 void
 734 mallocctl(int cmd, long value)
 735 {
 736         switch (cmd) {
 737 
 738         case MTDEBUGPATTERN:
 739                 /*
 740                  * Reinitialize free blocks in case malloc() is called prior
 741                  * to mallocctl().
 742                  */
 743                 if (value && !(debugopt & cmd)) {
 744                         reinit++;
 745                         debugopt |= cmd;
 746                         reinit_cpu_list();
 747                 }
 748                 /*FALLTHRU*/
 749         case MTDOUBLEFREE:
 750         case MTINITBUFFER:
 751                 if (value)
 752                         debugopt |= cmd;
 753                 else
 754                         debugopt &= ~cmd;
 755                 break;
 756         case MTCHUNKSIZE:
 757                 if (value >= MINSIZE && value <= MAXSIZE)
 758                         requestsize = value;
 759                 break;
 760         default:
 761                 break;
 762         }
 763 }
 764 
 765 /*
 766  * Initialization function, called from the init section of the library.
 767  * No locking is required here because we are single-threaded during
 768  * library initialization.
 769  */
 770 static void
 771 setup_caches(void)
 772 {
 773         uintptr_t oldbrk;
 774         uintptr_t newbrk;
 775 
 776         size_t cache_space_needed;
 777         size_t padding;
 778 
 779         curcpu_func new_curcpu;
 780         uint_t new_cpu_mask;
 781         percpu_t *new_cpu_list;
 782 
 783         uint_t i, j;
 784         uintptr_t list_addr;
 785 
 786         /*
 787          * Get a decent "current cpu identifier", to be used to reduce
 788          * contention.  Eventually, this should be replaced by an interface
 789          * to get the actual CPU sequence number in libthread/liblwp.
 790          */
 791         new_curcpu = (curcpu_func)thr_self;
 792         if ((ncpus = 2 * sysconf(_SC_NPROCESSORS_CONF)) <= 0)
 793                 ncpus = 4; /* decent default value */
 794 
 795         /* round ncpus up to a power of 2 */
 796         while (ncpus & (ncpus - 1))
 797                 ncpus++;
 798 
 799         new_cpu_mask = ncpus - 1;       /* create the cpu mask */
 800 
 801         /*
 802          * We now do some magic with the brk.  What we want to get in the
 803          * end is a bunch of well-aligned stuff in a big initial allocation.
 804          * Along the way, we do sanity checks to make sure no one else has
 805          * touched the brk (which shouldn't happen, but it's always good to
 806          * check)
 807          *
 808          * First, make sure sbrk is sane, and store the current brk in oldbrk.
 809          */
 810         oldbrk = (uintptr_t)sbrk(0);
 811         if ((void *)oldbrk == (void *)-1)
 812                 abort();        /* sbrk is broken -- we're doomed. */
 813 
 814         /*
 815          * Now, align the brk to a multiple of CACHE_COHERENCY_UNIT, so that
 816          * the percpu structures and cache lists will be properly aligned.
 817          *
 818          *   2.  All hunks will be page-aligned, assuming HUNKSIZE >= PAGESIZE,
 819          *      so they can be paged out individually.
 820          */
 821         newbrk = ALIGN(oldbrk, CACHE_COHERENCY_UNIT);
 822         if (newbrk != oldbrk && (uintptr_t)sbrk(newbrk - oldbrk) != oldbrk)
 823                 abort();        /* sbrk is broken -- we're doomed. */
 824 
 825         /*
 826          * For each cpu, there is one percpu_t and a list of caches
 827          */
 828         cache_space_needed = ncpus * (sizeof (percpu_t) + CACHELIST_SIZE);
 829 
 830         new_cpu_list = (percpu_t *)sbrk(cache_space_needed);
 831 
 832         if (new_cpu_list == (percpu_t *)-1 ||
 833             (uintptr_t)new_cpu_list != newbrk)
 834                 abort();        /* sbrk is broken -- we're doomed. */
 835 
 836         /*
 837          * Finally, align the brk to HUNKSIZE so that all hunks are
 838          * page-aligned, to avoid edge-effects.
 839          */
 840 
 841         newbrk = (uintptr_t)new_cpu_list + cache_space_needed;
 842 
 843         padding = ALIGN(newbrk, HUNKSIZE) - newbrk;
 844 
 845         if (padding > 0 && (uintptr_t)sbrk(padding) != newbrk)
 846                 abort();        /* sbrk is broken -- we're doomed. */
 847 
 848         list_addr = ((uintptr_t)new_cpu_list + (sizeof (percpu_t) * ncpus));
 849 
 850         /* initialize the percpu list */
 851         for (i = 0; i < ncpus; i++) {
 852                 new_cpu_list[i].mt_caches = (cache_head_t *)list_addr;
 853                 for (j = 0; j < NUM_CACHES; j++) {
 854                         new_cpu_list[i].mt_caches[j].mt_cache = NULL;
 855                         new_cpu_list[i].mt_caches[j].mt_hint = NULL;
 856                 }
 857 
 858                 (void) mutex_init(&new_cpu_list[i].mt_parent_lock,
 859                     USYNC_THREAD, NULL);
 860 
 861                 /* get the correct cache list alignment */
 862                 list_addr += CACHELIST_SIZE;
 863         }
 864 
 865         /*
 866          * Initialize oversize listhead
 867          */
 868         oversize_list.next_bysize = &oversize_list;
 869         oversize_list.prev_bysize = &oversize_list;
 870         oversize_list.next_byaddr = &oversize_list;
 871         oversize_list.prev_byaddr = &oversize_list;
 872         oversize_list.addr = NULL;
 873         oversize_list.size = 0;         /* sentinal */
 874 
 875         /*
 876          * Now install the global variables.
 877          */
 878         curcpu = new_curcpu;
 879         cpu_mask = new_cpu_mask;
 880         cpu_list = new_cpu_list;
 881 }
 882 
 883 static void
 884 create_cache(cache_t *cp, size_t size, uint_t chunksize)
 885 {
 886         long nblocks;
 887 
 888         (void) mutex_init(&cp->mt_cache_lock, USYNC_THREAD, NULL);
 889         cp->mt_size = size;
 890         cp->mt_freelist = ((caddr_t)cp + sizeof (cache_t));
 891         cp->mt_span = chunksize * HUNKSIZE - sizeof (cache_t);
 892         cp->mt_hunks = chunksize;
 893         /*
 894          * rough calculation. We will need to adjust later.
 895          */
 896         nblocks = cp->mt_span / cp->mt_size;
 897         nblocks >>= 3;
 898         if (nblocks == 0) { /* less than 8 free blocks in this pool */
 899                 int32_t numblocks = 0;
 900                 long i = cp->mt_span;
 901                 size_t sub = cp->mt_size;
 902                 uchar_t mask = 0;
 903 
 904                 while (i > sub) {
 905                         numblocks++;
 906                         i -= sub;
 907                 }
 908                 nblocks = numblocks;
 909                 cp->mt_arena = (caddr_t)ALIGN(cp->mt_freelist + 8, 8);
 910                 cp->mt_nfree = numblocks;
 911                 while (numblocks--) {
 912                         mask |= 0x80 >> numblocks;
 913                 }
 914                 *(cp->mt_freelist) = mask;
 915         } else {
 916                 cp->mt_arena = (caddr_t)ALIGN((caddr_t)cp->mt_freelist +
 917                     nblocks, 32);
 918                 /* recompute nblocks */
 919                 nblocks = (uintptr_t)((caddr_t)cp->mt_freelist +
 920                     cp->mt_span - cp->mt_arena) / cp->mt_size;
 921                 cp->mt_nfree = ((nblocks >> 3) << 3);
 922                 /* Set everything to free */
 923                 (void) memset(cp->mt_freelist, 0xff, nblocks >> 3);
 924         }
 925 
 926         if (debugopt & MTDEBUGPATTERN)
 927                 copy_pattern(FREEPATTERN, cp->mt_arena, cp->mt_size * nblocks);
 928 
 929         cp->mt_next = NULL;
 930 }
 931 
 932 static void
 933 reinit_cpu_list(void)
 934 {
 935         oversize_t *wp = oversize_list.next_bysize;
 936         percpu_t *cpuptr;
 937         cache_t *thiscache;
 938         cache_head_t *cachehead;
 939 
 940         /* Reinitialize free oversize blocks. */
 941         (void) mutex_lock(&oversize_lock);
 942         if (debugopt & MTDEBUGPATTERN)
 943                 for (; wp != &oversize_list; wp = wp->next_bysize)
 944                         copy_pattern(FREEPATTERN, wp->addr, wp->size);
 945         (void) mutex_unlock(&oversize_lock);
 946 
 947         /* Reinitialize free blocks. */
 948         for (cpuptr = &cpu_list[0]; cpuptr < &cpu_list[ncpus]; cpuptr++) {
 949                 (void) mutex_lock(&cpuptr->mt_parent_lock);
 950                 for (cachehead = &cpuptr->mt_caches[0]; cachehead <
 951                     &cpuptr->mt_caches[NUM_CACHES]; cachehead++) {
 952                         for (thiscache = cachehead->mt_cache; thiscache != NULL;
 953                             thiscache = thiscache->mt_next) {
 954                                 (void) mutex_lock(&thiscache->mt_cache_lock);
 955                                 if (thiscache->mt_nfree == 0) {
 956                                         (void) mutex_unlock(
 957                                             &thiscache->mt_cache_lock);
 958                                         continue;
 959                                 }
 960                                 if (thiscache != NULL)
 961                                         reinit_cache(thiscache);
 962                                 (void) mutex_unlock(&thiscache->mt_cache_lock);
 963                         }
 964                 }
 965                 (void) mutex_unlock(&cpuptr->mt_parent_lock);
 966         }
 967         reinit = 0;
 968 }
 969 
 970 static void
 971 reinit_cache(cache_t *thiscache)
 972 {
 973         uint32_t *freeblocks; /* not a uintptr_t on purpose */
 974         int32_t i, n;
 975         caddr_t ret;
 976 
 977         freeblocks = (uint32_t *)thiscache->mt_freelist;
 978         while (freeblocks < (uint32_t *)thiscache->mt_arena) {
 979                 if (*freeblocks & 0xffffffff) {
 980                         for (i = 0; i < 32; i++) {
 981                                 if (FLIP_EM(*freeblocks) & (0x80000000 >> i)) {
 982                                         n = (uintptr_t)(((freeblocks -
 983                                             (uint32_t *)thiscache->mt_freelist)
 984                                             << 5) + i) * thiscache->mt_size;
 985                                         ret = thiscache->mt_arena + n;
 986                                         ret += OVERHEAD;
 987                                         copy_pattern(FREEPATTERN, ret,
 988                                             thiscache->mt_size);
 989                                 }
 990                         }
 991                 }
 992                 freeblocks++;
 993         }
 994 }
 995 
 996 static void *
 997 malloc_internal(size_t size, percpu_t *cpuptr)
 998 {
 999         cache_head_t *cachehead;
1000         cache_t *thiscache, *hintcache;
1001         int32_t i, n, logsz, bucket;
1002         uint32_t index;
1003         uint32_t *freeblocks; /* not a uintptr_t on purpose */
1004         caddr_t ret;
1005 
1006         logsz = MIN_CACHED_SHIFT;
1007 
1008         while (size > (1 << logsz))
1009                 logsz++;
1010 
1011         bucket = logsz - MIN_CACHED_SHIFT;
1012 
1013         (void) mutex_lock(&cpuptr->mt_parent_lock);
1014 
1015         /*
1016          * Find a cache of the appropriate size with free buffers.
1017          *
1018          * We don't need to lock each cache as we check their mt_nfree count,
1019          * since:
1020          *      1.  We are only looking for caches with mt_nfree > 0.  If a
1021          *         free happens during our search, it will increment mt_nfree,
1022          *         which will not effect the test.
1023          *      2.  Allocations can decrement mt_nfree, but they can't happen
1024          *         as long as we hold mt_parent_lock.
1025          */
1026 
1027         cachehead = &cpuptr->mt_caches[bucket];
1028 
1029         /* Search through the list, starting at the mt_hint */
1030         thiscache = cachehead->mt_hint;
1031 
1032         while (thiscache != NULL && thiscache->mt_nfree == 0)
1033                 thiscache = thiscache->mt_next;
1034 
1035         if (thiscache == NULL) {
1036                 /* wrap around -- search up to the hint */
1037                 thiscache = cachehead->mt_cache;
1038                 hintcache = cachehead->mt_hint;
1039 
1040                 while (thiscache != NULL && thiscache != hintcache &&
1041                     thiscache->mt_nfree == 0)
1042                         thiscache = thiscache->mt_next;
1043 
1044                 if (thiscache == hintcache)
1045                         thiscache = NULL;
1046         }
1047 
1048 
1049         if (thiscache == NULL) { /* there are no free caches */
1050                 int32_t thisrequest = requestsize;
1051                 int32_t buffer_size = (1 << logsz) + OVERHEAD;
1052 
1053                 thiscache = (cache_t *)morecore(thisrequest * HUNKSIZE);
1054 
1055                 if (thiscache == (cache_t *)-1) {
1056                         (void) mutex_unlock(&cpuptr->mt_parent_lock);
1057                         errno = EAGAIN;
1058                         return (NULL);
1059                 }
1060                 create_cache(thiscache, buffer_size, thisrequest);
1061 
1062                 /* link in the new block at the beginning of the list */
1063                 thiscache->mt_next = cachehead->mt_cache;
1064                 cachehead->mt_cache = thiscache;
1065         }
1066 
1067         /* update the hint to the cache we found or created */
1068         cachehead->mt_hint = thiscache;
1069 
1070         /* thiscache now points to a cache with available space */
1071         (void) mutex_lock(&thiscache->mt_cache_lock);
1072 
1073         freeblocks = (uint32_t *)thiscache->mt_freelist;
1074         while (freeblocks < (uint32_t *)thiscache->mt_arena) {
1075                 if (*freeblocks & 0xffffffff)
1076                         break;
1077                 freeblocks++;
1078                 if (freeblocks < (uint32_t *)thiscache->mt_arena &&
1079                     *freeblocks & 0xffffffff)
1080                         break;
1081                 freeblocks++;
1082                 if (freeblocks < (uint32_t *)thiscache->mt_arena &&
1083                     *freeblocks & 0xffffffff)
1084                         break;
1085                 freeblocks++;
1086                 if (freeblocks < (uint32_t *)thiscache->mt_arena &&
1087                     *freeblocks & 0xffffffff)
1088                         break;
1089                 freeblocks++;
1090         }
1091 
1092         /*
1093          * the offset from mt_freelist to freeblocks is the offset into
1094          * the arena. Be sure to include the offset into freeblocks
1095          * of the bitmask. n is the offset.
1096          */
1097         for (i = 0; i < 32; ) {
1098                 if (FLIP_EM(*freeblocks) & (0x80000000 >> i++))
1099                         break;
1100                 if (FLIP_EM(*freeblocks) & (0x80000000 >> i++))
1101                         break;
1102                 if (FLIP_EM(*freeblocks) & (0x80000000 >> i++))
1103                         break;
1104                 if (FLIP_EM(*freeblocks) & (0x80000000 >> i++))
1105                         break;
1106         }
1107         index = 0x80000000 >> --i;
1108 
1109 
1110         *freeblocks &= FLIP_EM(~index);
1111 
1112         thiscache->mt_nfree--;
1113 
1114         (void) mutex_unlock(&thiscache->mt_cache_lock);
1115         (void) mutex_unlock(&cpuptr->mt_parent_lock);
1116 
1117         n = (uintptr_t)(((freeblocks - (uint32_t *)thiscache->mt_freelist) << 5)
1118             + i) * thiscache->mt_size;
1119         /*
1120          * Now you have the offset in n, you've changed the free mask
1121          * in the freelist. Nothing left to do but find the block
1122          * in the arena and put the value of thiscache in the word
1123          * ahead of the handed out address and return the memory
1124          * back to the user.
1125          */
1126         ret = thiscache->mt_arena + n;
1127 
1128         /* Store the cache addr for this buf. Makes free go fast. */
1129         *(uintptr_t *)ret = (uintptr_t)thiscache;
1130 
1131         /*
1132          * This assert makes sure we don't hand out memory that is not
1133          * owned by this cache.
1134          */
1135         assert(ret + thiscache->mt_size <= thiscache->mt_freelist +
1136             thiscache->mt_span);
1137 
1138         ret += OVERHEAD;
1139 
1140         assert(((uintptr_t)ret & 7) == 0); /* are we 8 byte aligned */
1141 
1142         if (reinit == 0 && (debugopt & MTDEBUGPATTERN))
1143                 if (verify_pattern(FREEPATTERN, ret, size))
1144                         abort();        /* reference after free */
1145 
1146         if (debugopt & MTINITBUFFER)
1147                 copy_pattern(INITPATTERN, ret, size);
1148         return ((void *)ret);
1149 }
1150 
1151 static void *
1152 morecore(size_t bytes)
1153 {
1154         void * ret;
1155 
1156         if (bytes > LONG_MAX) {
1157                 intptr_t wad;
1158                 /*
1159                  * The request size is too big. We need to do this in
1160                  * chunks. Sbrk only takes an int for an arg.
1161                  */
1162                 if (bytes == ULONG_MAX)
1163                         return ((void *)-1);
1164 
1165                 ret = sbrk(0);
1166                 wad = LONG_MAX;
1167                 while (wad > 0) {
1168                         if (sbrk(wad) == (void *)-1) {
1169                                 if (ret != sbrk(0))
1170                                         (void) sbrk(-LONG_MAX);
1171                                 return ((void *)-1);
1172                         }
1173                         bytes -= LONG_MAX;
1174                         wad = bytes;
1175                 }
1176         } else
1177                 ret = sbrk(bytes);
1178 
1179         return (ret);
1180 }
1181 
1182 
1183 static void *
1184 oversize(size_t size)
1185 {
1186         caddr_t ret;
1187         oversize_t *big;
1188 
1189         /* make sure we will not overflow */
1190         if (size > MAX_MTMALLOC) {
1191                 errno = ENOMEM;
1192                 return (NULL);
1193         }
1194 
1195         /*
1196          * Since we ensure every address we hand back is
1197          * MTMALLOC_MIN_ALIGN-byte aligned, ALIGNing size ensures that the
1198          * memory handed out is MTMALLOC_MIN_ALIGN-byte aligned at both ends.
1199          * This eases the implementation of MTDEBUGPATTERN and MTINITPATTERN,
1200          * particularly where coalescing occurs.
1201          */
1202         size = ALIGN(size, MTMALLOC_MIN_ALIGN);
1203 
1204         /*
1205          * The idea with the global lock is that we are sure to
1206          * block in the kernel anyway since given an oversize alloc
1207          * we are sure to have to call morecore();
1208          */
1209         (void) mutex_lock(&oversize_lock);
1210 
1211         if ((big = find_oversize(size)) != NULL) {
1212                 if (reinit == 0 && (debugopt & MTDEBUGPATTERN))
1213                         if (verify_pattern(FREEPATTERN, big->addr, size))
1214                                 abort();        /* reference after free */
1215         } else {
1216                 /* Get more 8-byte aligned memory from heap */
1217                 ret = morecore(size + OVSZ_HEADER_SIZE);
1218                 if (ret == (caddr_t)-1) {
1219                         (void) mutex_unlock(&oversize_lock);
1220                         errno = ENOMEM;
1221                         return (NULL);
1222                 }
1223                 big = oversize_header_alloc((uintptr_t)ret, size);
1224         }
1225         ret = big->addr;
1226 
1227         insert_hash(big);
1228 
1229         if (debugopt & MTINITBUFFER)
1230                 copy_pattern(INITPATTERN, ret, size);
1231 
1232         (void) mutex_unlock(&oversize_lock);
1233         assert(((uintptr_t)ret & 7) == 0); /* are we 8 byte aligned */
1234         return ((void *)ret);
1235 }
1236 
1237 static void
1238 insert_oversize(oversize_t *op, oversize_t *nx)
1239 {
1240         oversize_t *sp;
1241 
1242         /* locate correct insertion point in size-ordered list */
1243         for (sp = oversize_list.next_bysize;
1244             sp != &oversize_list && (op->size > sp->size);
1245             sp = sp->next_bysize)
1246                 ;
1247 
1248         /* link into size-ordered list */
1249         op->next_bysize = sp;
1250         op->prev_bysize = sp->prev_bysize;
1251         op->prev_bysize->next_bysize = op;
1252         op->next_bysize->prev_bysize = op;
1253 
1254         /*
1255          * link item into address-ordered list
1256          * (caller provides insertion point as an optimization)
1257          */
1258         op->next_byaddr = nx;
1259         op->prev_byaddr = nx->prev_byaddr;
1260         op->prev_byaddr->next_byaddr = op;
1261         op->next_byaddr->prev_byaddr = op;
1262 
1263 }
1264 
1265 static void
1266 unlink_oversize(oversize_t *lp)
1267 {
1268         /* unlink from address list */
1269         lp->prev_byaddr->next_byaddr = lp->next_byaddr;
1270         lp->next_byaddr->prev_byaddr = lp->prev_byaddr;
1271 
1272         /* unlink from size list */
1273         lp->prev_bysize->next_bysize = lp->next_bysize;
1274         lp->next_bysize->prev_bysize = lp->prev_bysize;
1275 }
1276 
1277 static void
1278 position_oversize_by_size(oversize_t *op)
1279 {
1280         oversize_t *sp;
1281 
1282         if (op->size > op->next_bysize->size ||
1283             op->size < op->prev_bysize->size) {
1284 
1285                 /* unlink from size list */
1286                 op->prev_bysize->next_bysize = op->next_bysize;
1287                 op->next_bysize->prev_bysize = op->prev_bysize;
1288 
1289                 /* locate correct insertion point in size-ordered list */
1290                 for (sp = oversize_list.next_bysize;
1291                     sp != &oversize_list && (op->size > sp->size);
1292                     sp = sp->next_bysize)
1293                         ;
1294 
1295                 /* link into size-ordered list */
1296                 op->next_bysize = sp;
1297                 op->prev_bysize = sp->prev_bysize;
1298                 op->prev_bysize->next_bysize = op;
1299                 op->next_bysize->prev_bysize = op;
1300         }
1301 }
1302 
1303 static void
1304 add_oversize(oversize_t *lp)
1305 {
1306         int merge_flags = INSERT_ONLY;
1307         oversize_t *nx;         /* ptr to item right of insertion point */
1308         oversize_t *pv;         /* ptr to item left of insertion point */
1309         uint_t size_lp, size_pv, size_nx;
1310         uintptr_t endp_lp, endp_pv, endp_nx;
1311 
1312         /*
1313          * Locate insertion point in address-ordered list
1314          */
1315 
1316         for (nx = oversize_list.next_byaddr;
1317             nx != &oversize_list && (lp->addr > nx->addr);
1318             nx = nx->next_byaddr)
1319                 ;
1320 
1321         /*
1322          * Determine how to add chunk to oversize freelist
1323          */
1324 
1325         size_lp = OVSZ_HEADER_SIZE + lp->size;
1326         endp_lp = ALIGN((uintptr_t)lp + size_lp, MTMALLOC_MIN_ALIGN);
1327         size_lp = endp_lp - (uintptr_t)lp;
1328 
1329         pv = nx->prev_byaddr;
1330 
1331         if (pv->size) {
1332 
1333                 size_pv = OVSZ_HEADER_SIZE + pv->size;
1334                 endp_pv = ALIGN((uintptr_t)pv + size_pv,
1335                     MTMALLOC_MIN_ALIGN);
1336                 size_pv = endp_pv - (uintptr_t)pv;
1337 
1338                 /* Check for adjacency with left chunk */
1339                 if ((uintptr_t)lp == endp_pv)
1340                         merge_flags |= COALESCE_LEFT;
1341         }
1342 
1343         if (nx->size) {
1344 
1345                 /* Check for adjacency with right chunk */
1346                 if ((uintptr_t)nx == endp_lp) {
1347                         size_nx = OVSZ_HEADER_SIZE + nx->size;
1348                         endp_nx = ALIGN((uintptr_t)nx + size_nx,
1349                             MTMALLOC_MIN_ALIGN);
1350                         size_nx = endp_nx - (uintptr_t)nx;
1351                         merge_flags |= COALESCE_RIGHT;
1352                 }
1353         }
1354 
1355         /*
1356          * If MTDEBUGPATTERN==1, lp->addr will have been overwritten with
1357          * FREEPATTERN for lp->size bytes. If we can merge, the oversize
1358          * header(s) that will also become part of the memory available for
1359          * reallocation (ie lp and/or nx) must also be overwritten with
1360          * FREEPATTERN or we will SIGABRT when this memory is next reallocated.
1361          */
1362         switch (merge_flags) {
1363 
1364         case INSERT_ONLY:               /* Coalescing not possible */
1365                 insert_oversize(lp, nx);
1366                 break;
1367         case COALESCE_LEFT:
1368                 pv->size += size_lp;
1369                 position_oversize_by_size(pv);
1370                 if (debugopt & MTDEBUGPATTERN)
1371                         copy_pattern(FREEPATTERN, lp, OVSZ_HEADER_SIZE);
1372                 break;
1373         case COALESCE_RIGHT:
1374                 unlink_oversize(nx);
1375                 lp->size += size_nx;
1376                 insert_oversize(lp, pv->next_byaddr);
1377                 if (debugopt & MTDEBUGPATTERN)
1378                         copy_pattern(FREEPATTERN, nx, OVSZ_HEADER_SIZE);
1379                 break;
1380         case COALESCE_WITH_BOTH_SIDES:  /* Merge (with right) to the left */
1381                 pv->size += size_lp + size_nx;
1382                 unlink_oversize(nx);
1383                 position_oversize_by_size(pv);
1384                 if (debugopt & MTDEBUGPATTERN) {
1385                         copy_pattern(FREEPATTERN, lp, OVSZ_HEADER_SIZE);
1386                         copy_pattern(FREEPATTERN, nx, OVSZ_HEADER_SIZE);
1387                 }
1388                 break;
1389         }
1390 }
1391 
1392 /*
1393  * Find memory on our list that is at least size big. If we find a block that is
1394  * big enough, we break it up and return the associated oversize_t struct back
1395  * to the calling client. Any leftover piece of that block is returned to the
1396  * freelist.
1397  */
1398 static oversize_t *
1399 find_oversize(size_t size)
1400 {
1401         oversize_t *wp = oversize_list.next_bysize;
1402         while (wp != &oversize_list && size > wp->size)
1403                 wp = wp->next_bysize;
1404 
1405         if (wp == &oversize_list) /* empty list or nothing big enough */
1406                 return (NULL);
1407         /* breaking up a chunk of memory */
1408         if ((long)((wp->size - (size + OVSZ_HEADER_SIZE + MTMALLOC_MIN_ALIGN)))
1409             > MAX_CACHED) {
1410                 caddr_t off;
1411                 oversize_t *np;
1412                 size_t osize;
1413                 off = (caddr_t)ALIGN(wp->addr + size,
1414                     MTMALLOC_MIN_ALIGN);
1415                 osize = wp->size;
1416                 wp->size = (size_t)(off - wp->addr);
1417                 np = oversize_header_alloc((uintptr_t)off,
1418                     osize - (wp->size + OVSZ_HEADER_SIZE));
1419                 if ((long)np->size < 0)
1420                         abort();
1421                 unlink_oversize(wp);
1422                 add_oversize(np);
1423         } else {
1424                 unlink_oversize(wp);
1425         }
1426         return (wp);
1427 }
1428 
1429 static void
1430 copy_pattern(uint32_t pattern, void *buf_arg, size_t size)
1431 {
1432         uint32_t *bufend = (uint32_t *)((char *)buf_arg + size);
1433         uint32_t *buf = buf_arg;
1434 
1435         while (buf < bufend - 3) {
1436                 buf[3] = buf[2] = buf[1] = buf[0] = pattern;
1437                 buf += 4;
1438         }
1439         while (buf < bufend)
1440                 *buf++ = pattern;
1441 }
1442 
1443 static void *
1444 verify_pattern(uint32_t pattern, void *buf_arg, size_t size)
1445 {
1446         uint32_t *bufend = (uint32_t *)((char *)buf_arg + size);
1447         uint32_t *buf;
1448 
1449         for (buf = buf_arg; buf < bufend; buf++)
1450                 if (*buf != pattern)
1451                         return (buf);
1452         return (NULL);
1453 }
1454 
1455 static void
1456 free_oversize(oversize_t *ovp)
1457 {
1458         assert(((uintptr_t)ovp->addr & 7) == 0); /* are we 8 byte aligned */
1459         assert(ovp->size > MAX_CACHED);
1460 
1461         ovp->next_bysize = ovp->prev_bysize = NULL;
1462         ovp->next_byaddr = ovp->prev_byaddr = NULL;
1463         (void) mutex_lock(&oversize_lock);
1464         add_oversize(ovp);
1465         (void) mutex_unlock(&oversize_lock);
1466 }
1467 
1468 static oversize_t *
1469 oversize_header_alloc(uintptr_t mem, size_t size)
1470 {
1471         oversize_t *ovsz_hdr;
1472 
1473         assert(size > MAX_CACHED);
1474 
1475         ovsz_hdr = (oversize_t *)mem;
1476         ovsz_hdr->prev_bysize = NULL;
1477         ovsz_hdr->next_bysize = NULL;
1478         ovsz_hdr->prev_byaddr = NULL;
1479         ovsz_hdr->next_byaddr = NULL;
1480         ovsz_hdr->hash_next = NULL;
1481         ovsz_hdr->size = size;
1482         mem += OVSZ_SIZE;
1483         *(uintptr_t *)mem = MTMALLOC_OVERSIZE_MAGIC;
1484         mem += OVERHEAD;
1485         assert(((uintptr_t)mem & 7) == 0); /* are we 8 byte aligned */
1486         ovsz_hdr->addr = (caddr_t)mem;
1487         return (ovsz_hdr);
1488 }
1489 
1490 static void
1491 malloc_prepare()
1492 {
1493         percpu_t *cpuptr;
1494         cache_head_t *cachehead;
1495         cache_t *thiscache;
1496 
1497         (void) mutex_lock(&oversize_lock);
1498         for (cpuptr = &cpu_list[0]; cpuptr < &cpu_list[ncpus]; cpuptr++) {
1499                 (void) mutex_lock(&cpuptr->mt_parent_lock);
1500                 for (cachehead = &cpuptr->mt_caches[0];
1501                     cachehead < &cpuptr->mt_caches[NUM_CACHES];
1502                     cachehead++) {
1503                         for (thiscache = cachehead->mt_cache;
1504                             thiscache != NULL;
1505                             thiscache = thiscache->mt_next) {
1506                                 (void) mutex_lock(
1507                                     &thiscache->mt_cache_lock);
1508                         }
1509                 }
1510         }
1511 }
1512 
1513 static void
1514 malloc_release()
1515 {
1516         percpu_t *cpuptr;
1517         cache_head_t *cachehead;
1518         cache_t *thiscache;
1519 
1520         for (cpuptr = &cpu_list[ncpus - 1]; cpuptr >= &cpu_list[0]; cpuptr--) {
1521                 for (cachehead = &cpuptr->mt_caches[NUM_CACHES - 1];
1522                     cachehead >= &cpuptr->mt_caches[0];
1523                     cachehead--) {
1524                         for (thiscache = cachehead->mt_cache;
1525                             thiscache != NULL;
1526                             thiscache = thiscache->mt_next) {
1527                                 (void) mutex_unlock(
1528                                     &thiscache->mt_cache_lock);
1529                         }
1530                 }
1531                 (void) mutex_unlock(&cpuptr->mt_parent_lock);
1532         }
1533         (void) mutex_unlock(&oversize_lock);
1534 }
1535 
1536 #pragma init(malloc_init)
1537 static void
1538 malloc_init(void)
1539 {
1540         /*
1541          * This works in the init section for this library
1542          * because setup_caches() doesn't call anything in libc
1543          * that calls malloc().  If it did, disaster would ensue.
1544          *
1545          * For this to work properly, this library must be the first
1546          * one to have its init section called (after libc) by the
1547          * dynamic linker.  If some other library's init section
1548          * ran first and called malloc(), disaster would ensue.
1549          * Because this is an interposer library for malloc(), the
1550          * dynamic linker arranges for its init section to run first.
1551          */
1552         (void) setup_caches();
1553 
1554         (void) pthread_atfork(malloc_prepare, malloc_release, malloc_release);
1555 }