compat / nedmalloc / nedmalloc.con commit git-am: add am.threeWay config variable (e97a5e7)
   1/* Alternative malloc implementation for multiple threads without
   2lock contention based on dlmalloc. (C) 2005-2006 Niall Douglas
   3
   4Boost Software License - Version 1.0 - August 17th, 2003
   5
   6Permission is hereby granted, free of charge, to any person or organization
   7obtaining a copy of the software and accompanying documentation covered by
   8this license (the "Software") to use, reproduce, display, distribute,
   9execute, and transmit the Software, and to prepare derivative works of the
  10Software, and to permit third-parties to whom the Software is furnished to
  11do so, all subject to the following:
  12
  13The copyright notices in the Software and this entire statement, including
  14the above license grant, this restriction and the following disclaimer,
  15must be included in all copies of the Software, in whole or in part, and
  16all derivative works of the Software, unless such copies or derivative
  17works are solely in the form of machine-executable object code generated by
  18a source language processor.
  19
  20THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  21IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  22FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
  23SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
  24FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
  25ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  26DEALINGS IN THE SOFTWARE.
  27*/
  28
  29#ifdef _MSC_VER
  30/* Enable full aliasing on MSVC */
  31/*#pragma optimize("a", on)*/
  32#endif
  33
  34/*#define FULLSANITYCHECKS*/
  35
  36#include "nedmalloc.h"
  37#if defined(WIN32)
  38 #include <malloc.h>
  39#endif
  40#define MSPACES 1
  41#define ONLY_MSPACES 1
  42#ifndef USE_LOCKS
  43 #define USE_LOCKS 1
  44#endif
  45#define FOOTERS 1           /* Need to enable footers so frees lock the right mspace */
  46#undef DEBUG                            /* dlmalloc wants DEBUG either 0 or 1 */
  47#ifdef _DEBUG
  48 #define DEBUG 1
  49#else
  50 #define DEBUG 0
  51#endif
  52#ifdef NDEBUG               /* Disable assert checking on release builds */
  53 #undef DEBUG
  54#endif
  55/* The default of 64Kb means we spend too much time kernel-side */
  56#ifndef DEFAULT_GRANULARITY
  57#define DEFAULT_GRANULARITY (1*1024*1024)
  58#endif
  59/*#define USE_SPIN_LOCKS 0*/
  60
  61
  62/*#define FORCEINLINE*/
  63#include "malloc.c.h"
  64#ifdef NDEBUG               /* Disable assert checking on release builds */
  65 #undef DEBUG
  66#endif
  67
  68/* The maximum concurrent threads in a pool possible */
  69#ifndef MAXTHREADSINPOOL
  70#define MAXTHREADSINPOOL 16
  71#endif
  72/* The maximum number of threadcaches which can be allocated */
  73#ifndef THREADCACHEMAXCACHES
  74#define THREADCACHEMAXCACHES 256
  75#endif
  76/* The maximum size to be allocated from the thread cache */
  77#ifndef THREADCACHEMAX
  78#define THREADCACHEMAX 8192
  79#endif
  80#if 0
  81/* The number of cache entries for finer grained bins. This is (topbitpos(THREADCACHEMAX)-4)*2 */
  82#define THREADCACHEMAXBINS ((13-4)*2)
  83#else
  84/* The number of cache entries. This is (topbitpos(THREADCACHEMAX)-4) */
  85#define THREADCACHEMAXBINS (13-4)
  86#endif
  87/* Point at which the free space in a thread cache is garbage collected */
  88#ifndef THREADCACHEMAXFREESPACE
  89#define THREADCACHEMAXFREESPACE (512*1024)
  90#endif
  91
  92
  93#ifdef WIN32
  94 #define TLSVAR                 DWORD
  95 #define TLSALLOC(k)    (*(k)=TlsAlloc(), TLS_OUT_OF_INDEXES==*(k))
  96 #define TLSFREE(k)             (!TlsFree(k))
  97 #define TLSGET(k)              TlsGetValue(k)
  98 #define TLSSET(k, a)   (!TlsSetValue(k, a))
  99 #ifdef DEBUG
 100static LPVOID ChkedTlsGetValue(DWORD idx)
 101{
 102        LPVOID ret=TlsGetValue(idx);
 103        assert(S_OK==GetLastError());
 104        return ret;
 105}
 106  #undef TLSGET
 107  #define TLSGET(k) ChkedTlsGetValue(k)
 108 #endif
 109#else
 110 #define TLSVAR                 pthread_key_t
 111 #define TLSALLOC(k)    pthread_key_create(k, 0)
 112 #define TLSFREE(k)             pthread_key_delete(k)
 113 #define TLSGET(k)              pthread_getspecific(k)
 114 #define TLSSET(k, a)   pthread_setspecific(k, a)
 115#endif
 116
 117#if 0
 118/* Only enable if testing with valgrind. Causes misoperation */
 119#define mspace_malloc(p, s) malloc(s)
 120#define mspace_realloc(p, m, s) realloc(m, s)
 121#define mspace_calloc(p, n, s) calloc(n, s)
 122#define mspace_free(p, m) free(m)
 123#endif
 124
 125
 126#if defined(__cplusplus)
 127#if !defined(NO_NED_NAMESPACE)
 128namespace nedalloc {
 129#else
 130extern "C" {
 131#endif
 132#endif
 133
 134size_t nedblksize(void *mem) THROWSPEC
 135{
 136#if 0
 137        /* Only enable if testing with valgrind. Causes misoperation */
 138        return THREADCACHEMAX;
 139#else
 140        if(mem)
 141        {
 142                mchunkptr p=mem2chunk(mem);
 143                assert(cinuse(p));      /* If this fails, someone tried to free a block twice */
 144                if(cinuse(p))
 145                        return chunksize(p)-overhead_for(p);
 146        }
 147        return 0;
 148#endif
 149}
 150
 151void nedsetvalue(void *v) THROWSPEC                                     { nedpsetvalue(0, v); }
 152void * nedmalloc(size_t size) THROWSPEC                         { return nedpmalloc(0, size); }
 153void * nedcalloc(size_t no, size_t size) THROWSPEC      { return nedpcalloc(0, no, size); }
 154void * nedrealloc(void *mem, size_t size) THROWSPEC     { return nedprealloc(0, mem, size); }
 155void   nedfree(void *mem) THROWSPEC                                     { nedpfree(0, mem); }
 156void * nedmemalign(size_t alignment, size_t bytes) THROWSPEC { return nedpmemalign(0, alignment, bytes); }
 157#if !NO_MALLINFO
 158struct mallinfo nedmallinfo(void) THROWSPEC                     { return nedpmallinfo(0); }
 159#endif
 160int    nedmallopt(int parno, int value) THROWSPEC       { return nedpmallopt(0, parno, value); }
 161int    nedmalloc_trim(size_t pad) THROWSPEC                     { return nedpmalloc_trim(0, pad); }
 162void   nedmalloc_stats(void) THROWSPEC                                  { nedpmalloc_stats(0); }
 163size_t nedmalloc_footprint(void) THROWSPEC                              { return nedpmalloc_footprint(0); }
 164void **nedindependent_calloc(size_t elemsno, size_t elemsize, void **chunks) THROWSPEC  { return nedpindependent_calloc(0, elemsno, elemsize, chunks); }
 165void **nedindependent_comalloc(size_t elems, size_t *sizes, void **chunks) THROWSPEC    { return nedpindependent_comalloc(0, elems, sizes, chunks); }
 166
 167struct threadcacheblk_t;
 168typedef struct threadcacheblk_t threadcacheblk;
 169struct threadcacheblk_t
 170{       /* Keep less than 16 bytes on 32 bit systems and 32 bytes on 64 bit systems */
 171#ifdef FULLSANITYCHECKS
 172        unsigned int magic;
 173#endif
 174        unsigned int lastUsed, size;
 175        threadcacheblk *next, *prev;
 176};
 177typedef struct threadcache_t
 178{
 179#ifdef FULLSANITYCHECKS
 180        unsigned int magic1;
 181#endif
 182        int mymspace;                                           /* Last mspace entry this thread used */
 183        long threadid;
 184        unsigned int mallocs, frees, successes;
 185        size_t freeInCache;                                     /* How much free space is stored in this cache */
 186        threadcacheblk *bins[(THREADCACHEMAXBINS+1)*2];
 187#ifdef FULLSANITYCHECKS
 188        unsigned int magic2;
 189#endif
 190} threadcache;
 191struct nedpool_t
 192{
 193        MLOCK_T mutex;
 194        void *uservalue;
 195        int threads;                                            /* Max entries in m to use */
 196        threadcache *caches[THREADCACHEMAXCACHES];
 197        TLSVAR mycache;                                         /* Thread cache for this thread. 0 for unset, negative for use mspace-1 directly, otherwise is cache-1 */
 198        mstate m[MAXTHREADSINPOOL+1];           /* mspace entries for this pool */
 199};
 200static nedpool syspool;
 201
 202static FORCEINLINE unsigned int size2binidx(size_t _size) THROWSPEC
 203{       /* 8=1000       16=10000        20=10100        24=11000        32=100000       48=110000       4096=1000000000000 */
 204        unsigned int topbit, size=(unsigned int)(_size>>4);
 205        /* 16=1         20=1    24=1    32=10   48=11   64=100  96=110  128=1000        4096=100000000 */
 206
 207#if defined(__GNUC__)
 208        topbit = sizeof(size)*__CHAR_BIT__ - 1 - __builtin_clz(size);
 209#elif defined(_MSC_VER) && _MSC_VER>=1300
 210        {
 211            unsigned long bsrTopBit;
 212
 213            _BitScanReverse(&bsrTopBit, size);
 214
 215            topbit = bsrTopBit;
 216        }
 217#else
 218#if 0
 219        union {
 220                unsigned asInt[2];
 221                double asDouble;
 222        };
 223        int n;
 224
 225        asDouble = (double)size + 0.5;
 226        topbit = (asInt[!FOX_BIGENDIAN] >> 20) - 1023;
 227#else
 228        {
 229                unsigned int x=size;
 230                x = x | (x >> 1);
 231                x = x | (x >> 2);
 232                x = x | (x >> 4);
 233                x = x | (x >> 8);
 234                x = x | (x >>16);
 235                x = ~x;
 236                x = x - ((x >> 1) & 0x55555555);
 237                x = (x & 0x33333333) + ((x >> 2) & 0x33333333);
 238                x = (x + (x >> 4)) & 0x0F0F0F0F;
 239                x = x + (x << 8);
 240                x = x + (x << 16);
 241                topbit=31 - (x >> 24);
 242        }
 243#endif
 244#endif
 245        return topbit;
 246}
 247
 248
 249#ifdef FULLSANITYCHECKS
 250static void tcsanitycheck(threadcacheblk **ptr) THROWSPEC
 251{
 252        assert((ptr[0] && ptr[1]) || (!ptr[0] && !ptr[1]));
 253        if(ptr[0] && ptr[1])
 254        {
 255                assert(nedblksize(ptr[0])>=sizeof(threadcacheblk));
 256                assert(nedblksize(ptr[1])>=sizeof(threadcacheblk));
 257                assert(*(unsigned int *) "NEDN"==ptr[0]->magic);
 258                assert(*(unsigned int *) "NEDN"==ptr[1]->magic);
 259                assert(!ptr[0]->prev);
 260                assert(!ptr[1]->next);
 261                if(ptr[0]==ptr[1])
 262                {
 263                        assert(!ptr[0]->next);
 264                        assert(!ptr[1]->prev);
 265                }
 266        }
 267}
 268static void tcfullsanitycheck(threadcache *tc) THROWSPEC
 269{
 270        threadcacheblk **tcbptr=tc->bins;
 271        int n;
 272        for(n=0; n<=THREADCACHEMAXBINS; n++, tcbptr+=2)
 273        {
 274                threadcacheblk *b, *ob=0;
 275                tcsanitycheck(tcbptr);
 276                for(b=tcbptr[0]; b; ob=b, b=b->next)
 277                {
 278                        assert(*(unsigned int *) "NEDN"==b->magic);
 279                        assert(!ob || ob->next==b);
 280                        assert(!ob || b->prev==ob);
 281                }
 282        }
 283}
 284#endif
 285
 286static NOINLINE void RemoveCacheEntries(nedpool *p, threadcache *tc, unsigned int age) THROWSPEC
 287{
 288#ifdef FULLSANITYCHECKS
 289        tcfullsanitycheck(tc);
 290#endif
 291        if(tc->freeInCache)
 292        {
 293                threadcacheblk **tcbptr=tc->bins;
 294                int n;
 295                for(n=0; n<=THREADCACHEMAXBINS; n++, tcbptr+=2)
 296                {
 297                        threadcacheblk **tcb=tcbptr+1;          /* come from oldest end of list */
 298                        /*tcsanitycheck(tcbptr);*/
 299                        for(; *tcb && tc->frees-(*tcb)->lastUsed>=age; )
 300                        {
 301                                threadcacheblk *f=*tcb;
 302                                size_t blksize=f->size; /*nedblksize(f);*/
 303                                assert(blksize<=nedblksize(f));
 304                                assert(blksize);
 305#ifdef FULLSANITYCHECKS
 306                                assert(*(unsigned int *) "NEDN"==(*tcb)->magic);
 307#endif
 308                                *tcb=(*tcb)->prev;
 309                                if(*tcb)
 310                                        (*tcb)->next=0;
 311                                else
 312                                        *tcbptr=0;
 313                                tc->freeInCache-=blksize;
 314                                assert((long) tc->freeInCache>=0);
 315                                mspace_free(0, f);
 316                                /*tcsanitycheck(tcbptr);*/
 317                        }
 318                }
 319        }
 320#ifdef FULLSANITYCHECKS
 321        tcfullsanitycheck(tc);
 322#endif
 323}
 324static void DestroyCaches(nedpool *p) THROWSPEC
 325{
 326        if(p->caches)
 327        {
 328                threadcache *tc;
 329                int n;
 330                for(n=0; n<THREADCACHEMAXCACHES; n++)
 331                {
 332                        if((tc=p->caches[n]))
 333                        {
 334                                tc->frees++;
 335                                RemoveCacheEntries(p, tc, 0);
 336                                assert(!tc->freeInCache);
 337                                tc->mymspace=-1;
 338                                tc->threadid=0;
 339                                mspace_free(0, tc);
 340                                p->caches[n]=0;
 341                        }
 342                }
 343        }
 344}
 345
 346static NOINLINE threadcache *AllocCache(nedpool *p) THROWSPEC
 347{
 348        threadcache *tc=0;
 349        int n, end;
 350        ACQUIRE_LOCK(&p->mutex);
 351        for(n=0; n<THREADCACHEMAXCACHES && p->caches[n]; n++);
 352        if(THREADCACHEMAXCACHES==n)
 353        {       /* List exhausted, so disable for this thread */
 354                RELEASE_LOCK(&p->mutex);
 355                return 0;
 356        }
 357        tc=p->caches[n]=(threadcache *) mspace_calloc(p->m[0], 1, sizeof(threadcache));
 358        if(!tc)
 359        {
 360                RELEASE_LOCK(&p->mutex);
 361                return 0;
 362        }
 363#ifdef FULLSANITYCHECKS
 364        tc->magic1=*(unsigned int *)"NEDMALC1";
 365        tc->magic2=*(unsigned int *)"NEDMALC2";
 366#endif
 367        tc->threadid=(long)(size_t)CURRENT_THREAD;
 368        for(end=0; p->m[end]; end++);
 369        tc->mymspace=tc->threadid % end;
 370        RELEASE_LOCK(&p->mutex);
 371        if(TLSSET(p->mycache, (void *)(size_t)(n+1))) abort();
 372        return tc;
 373}
 374
 375static void *threadcache_malloc(nedpool *p, threadcache *tc, size_t *size) THROWSPEC
 376{
 377        void *ret=0;
 378        unsigned int bestsize;
 379        unsigned int idx=size2binidx(*size);
 380        size_t blksize=0;
 381        threadcacheblk *blk, **binsptr;
 382#ifdef FULLSANITYCHECKS
 383        tcfullsanitycheck(tc);
 384#endif
 385        /* Calculate best fit bin size */
 386        bestsize=1<<(idx+4);
 387#if 0
 388        /* Finer grained bin fit */
 389        idx<<=1;
 390        if(*size>bestsize)
 391        {
 392                idx++;
 393                bestsize+=bestsize>>1;
 394        }
 395        if(*size>bestsize)
 396        {
 397                idx++;
 398                bestsize=1<<(4+(idx>>1));
 399        }
 400#else
 401        if(*size>bestsize)
 402        {
 403                idx++;
 404                bestsize<<=1;
 405        }
 406#endif
 407        assert(bestsize>=*size);
 408        if(*size<bestsize) *size=bestsize;
 409        assert(*size<=THREADCACHEMAX);
 410        assert(idx<=THREADCACHEMAXBINS);
 411        binsptr=&tc->bins[idx*2];
 412        /* Try to match close, but move up a bin if necessary */
 413        blk=*binsptr;
 414        if(!blk || blk->size<*size)
 415        {       /* Bump it up a bin */
 416                if(idx<THREADCACHEMAXBINS)
 417                {
 418                        idx++;
 419                        binsptr+=2;
 420                        blk=*binsptr;
 421                }
 422        }
 423        if(blk)
 424        {
 425                blksize=blk->size; /*nedblksize(blk);*/
 426                assert(nedblksize(blk)>=blksize);
 427                assert(blksize>=*size);
 428                if(blk->next)
 429                        blk->next->prev=0;
 430                *binsptr=blk->next;
 431                if(!*binsptr)
 432                        binsptr[1]=0;
 433#ifdef FULLSANITYCHECKS
 434                blk->magic=0;
 435#endif
 436                assert(binsptr[0]!=blk && binsptr[1]!=blk);
 437                assert(nedblksize(blk)>=sizeof(threadcacheblk) && nedblksize(blk)<=THREADCACHEMAX+CHUNK_OVERHEAD);
 438                /*printf("malloc: %p, %p, %p, %lu\n", p, tc, blk, (long) size);*/
 439                ret=(void *) blk;
 440        }
 441        ++tc->mallocs;
 442        if(ret)
 443        {
 444                assert(blksize>=*size);
 445                ++tc->successes;
 446                tc->freeInCache-=blksize;
 447                assert((long) tc->freeInCache>=0);
 448        }
 449#if defined(DEBUG) && 0
 450        if(!(tc->mallocs & 0xfff))
 451        {
 452                printf("*** threadcache=%u, mallocs=%u (%f), free=%u (%f), freeInCache=%u\n", (unsigned int) tc->threadid, tc->mallocs,
 453                        (float) tc->successes/tc->mallocs, tc->frees, (float) tc->successes/tc->frees, (unsigned int) tc->freeInCache);
 454        }
 455#endif
 456#ifdef FULLSANITYCHECKS
 457        tcfullsanitycheck(tc);
 458#endif
 459        return ret;
 460}
 461static NOINLINE void ReleaseFreeInCache(nedpool *p, threadcache *tc, int mymspace) THROWSPEC
 462{
 463        unsigned int age=THREADCACHEMAXFREESPACE/8192;
 464        /*ACQUIRE_LOCK(&p->m[mymspace]->mutex);*/
 465        while(age && tc->freeInCache>=THREADCACHEMAXFREESPACE)
 466        {
 467                RemoveCacheEntries(p, tc, age);
 468                /*printf("*** Removing cache entries older than %u (%u)\n", age, (unsigned int) tc->freeInCache);*/
 469                age>>=1;
 470        }
 471        /*RELEASE_LOCK(&p->m[mymspace]->mutex);*/
 472}
 473static void threadcache_free(nedpool *p, threadcache *tc, int mymspace, void *mem, size_t size) THROWSPEC
 474{
 475        unsigned int bestsize;
 476        unsigned int idx=size2binidx(size);
 477        threadcacheblk **binsptr, *tck=(threadcacheblk *) mem;
 478        assert(size>=sizeof(threadcacheblk) && size<=THREADCACHEMAX+CHUNK_OVERHEAD);
 479#ifdef DEBUG
 480        {       /* Make sure this is a valid memory block */
 481            mchunkptr p  = mem2chunk(mem);
 482            mstate fm = get_mstate_for(p);
 483            if (!ok_magic(fm)) {
 484              USAGE_ERROR_ACTION(fm, p);
 485              return;
 486            }
 487        }
 488#endif
 489#ifdef FULLSANITYCHECKS
 490        tcfullsanitycheck(tc);
 491#endif
 492        /* Calculate best fit bin size */
 493        bestsize=1<<(idx+4);
 494#if 0
 495        /* Finer grained bin fit */
 496        idx<<=1;
 497        if(size>bestsize)
 498        {
 499                unsigned int biggerbestsize=bestsize+bestsize<<1;
 500                if(size>=biggerbestsize)
 501                {
 502                        idx++;
 503                        bestsize=biggerbestsize;
 504                }
 505        }
 506#endif
 507        if(bestsize!=size)      /* dlmalloc can round up, so we round down to preserve indexing */
 508                size=bestsize;
 509        binsptr=&tc->bins[idx*2];
 510        assert(idx<=THREADCACHEMAXBINS);
 511        if(tck==*binsptr)
 512        {
 513                fprintf(stderr, "Attempt to free already freed memory block %p - aborting!\n", tck);
 514                abort();
 515        }
 516#ifdef FULLSANITYCHECKS
 517        tck->magic=*(unsigned int *) "NEDN";
 518#endif
 519        tck->lastUsed=++tc->frees;
 520        tck->size=(unsigned int) size;
 521        tck->next=*binsptr;
 522        tck->prev=0;
 523        if(tck->next)
 524                tck->next->prev=tck;
 525        else
 526                binsptr[1]=tck;
 527        assert(!*binsptr || (*binsptr)->size==tck->size);
 528        *binsptr=tck;
 529        assert(tck==tc->bins[idx*2]);
 530        assert(tc->bins[idx*2+1]==tck || binsptr[0]->next->prev==tck);
 531        /*printf("free: %p, %p, %p, %lu\n", p, tc, mem, (long) size);*/
 532        tc->freeInCache+=size;
 533#ifdef FULLSANITYCHECKS
 534        tcfullsanitycheck(tc);
 535#endif
 536#if 1
 537        if(tc->freeInCache>=THREADCACHEMAXFREESPACE)
 538                ReleaseFreeInCache(p, tc, mymspace);
 539#endif
 540}
 541
 542
 543
 544
 545static NOINLINE int InitPool(nedpool *p, size_t capacity, int threads) THROWSPEC
 546{       /* threads is -1 for system pool */
 547        ensure_initialization();
 548        ACQUIRE_MALLOC_GLOBAL_LOCK();
 549        if(p->threads) goto done;
 550        if(INITIAL_LOCK(&p->mutex)) goto err;
 551        if(TLSALLOC(&p->mycache)) goto err;
 552        if(!(p->m[0]=(mstate) create_mspace(capacity, 1))) goto err;
 553        p->m[0]->extp=p;
 554        p->threads=(threads<1 || threads>MAXTHREADSINPOOL) ? MAXTHREADSINPOOL : threads;
 555done:
 556        RELEASE_MALLOC_GLOBAL_LOCK();
 557        return 1;
 558err:
 559        if(threads<0)
 560                abort();                        /* If you can't allocate for system pool, we're screwed */
 561        DestroyCaches(p);
 562        if(p->m[0])
 563        {
 564                destroy_mspace(p->m[0]);
 565                p->m[0]=0;
 566        }
 567        if(p->mycache)
 568        {
 569                if(TLSFREE(p->mycache)) abort();
 570                p->mycache=0;
 571        }
 572        RELEASE_MALLOC_GLOBAL_LOCK();
 573        return 0;
 574}
 575static NOINLINE mstate FindMSpace(nedpool *p, threadcache *tc, int *lastUsed, size_t size) THROWSPEC
 576{       /* Gets called when thread's last used mspace is in use. The strategy
 577        is to run through the list of all available mspaces looking for an
 578        unlocked one and if we fail, we create a new one so long as we don't
 579        exceed p->threads */
 580        int n, end;
 581        for(n=end=*lastUsed+1; p->m[n]; end=++n)
 582        {
 583                if(TRY_LOCK(&p->m[n]->mutex)) goto found;
 584        }
 585        for(n=0; n<*lastUsed && p->m[n]; n++)
 586        {
 587                if(TRY_LOCK(&p->m[n]->mutex)) goto found;
 588        }
 589        if(end<p->threads)
 590        {
 591                mstate temp;
 592                if(!(temp=(mstate) create_mspace(size, 1)))
 593                        goto badexit;
 594                /* Now we're ready to modify the lists, we lock */
 595                ACQUIRE_LOCK(&p->mutex);
 596                while(p->m[end] && end<p->threads)
 597                        end++;
 598                if(end>=p->threads)
 599                {       /* Drat, must destroy it now */
 600                        RELEASE_LOCK(&p->mutex);
 601                        destroy_mspace((mspace) temp);
 602                        goto badexit;
 603                }
 604                /* We really want to make sure this goes into memory now but we
 605                have to be careful of breaking aliasing rules, so write it twice */
 606                {
 607                        volatile struct malloc_state **_m=(volatile struct malloc_state **) &p->m[end];
 608                        *_m=(p->m[end]=temp);
 609                }
 610                ACQUIRE_LOCK(&p->m[end]->mutex);
 611                /*printf("Created mspace idx %d\n", end);*/
 612                RELEASE_LOCK(&p->mutex);
 613                n=end;
 614                goto found;
 615        }
 616        /* Let it lock on the last one it used */
 617badexit:
 618        ACQUIRE_LOCK(&p->m[*lastUsed]->mutex);
 619        return p->m[*lastUsed];
 620found:
 621        *lastUsed=n;
 622        if(tc)
 623                tc->mymspace=n;
 624        else
 625        {
 626                if(TLSSET(p->mycache, (void *)(size_t)(-(n+1)))) abort();
 627        }
 628        return p->m[n];
 629}
 630
 631nedpool *nedcreatepool(size_t capacity, int threads) THROWSPEC
 632{
 633        nedpool *ret;
 634        if(!(ret=(nedpool *) nedpcalloc(0, 1, sizeof(nedpool)))) return 0;
 635        if(!InitPool(ret, capacity, threads))
 636        {
 637                nedpfree(0, ret);
 638                return 0;
 639        }
 640        return ret;
 641}
 642void neddestroypool(nedpool *p) THROWSPEC
 643{
 644        int n;
 645        ACQUIRE_LOCK(&p->mutex);
 646        DestroyCaches(p);
 647        for(n=0; p->m[n]; n++)
 648        {
 649                destroy_mspace(p->m[n]);
 650                p->m[n]=0;
 651        }
 652        RELEASE_LOCK(&p->mutex);
 653        if(TLSFREE(p->mycache)) abort();
 654        nedpfree(0, p);
 655}
 656
 657void nedpsetvalue(nedpool *p, void *v) THROWSPEC
 658{
 659        if(!p) { p=&syspool; if(!syspool.threads) InitPool(&syspool, 0, -1); }
 660        p->uservalue=v;
 661}
 662void *nedgetvalue(nedpool **p, void *mem) THROWSPEC
 663{
 664        nedpool *np=0;
 665        mchunkptr mcp=mem2chunk(mem);
 666        mstate fm;
 667        if(!(is_aligned(chunk2mem(mcp))) && mcp->head != FENCEPOST_HEAD) return 0;
 668        if(!cinuse(mcp)) return 0;
 669        if(!next_pinuse(mcp)) return 0;
 670        if(!is_mmapped(mcp) && !pinuse(mcp))
 671        {
 672                if(next_chunk(prev_chunk(mcp))!=mcp) return 0;
 673        }
 674        fm=get_mstate_for(mcp);
 675        if(!ok_magic(fm)) return 0;
 676        if(!ok_address(fm, mcp)) return 0;
 677        if(!fm->extp) return 0;
 678        np=(nedpool *) fm->extp;
 679        if(p) *p=np;
 680        return np->uservalue;
 681}
 682
 683void neddisablethreadcache(nedpool *p) THROWSPEC
 684{
 685        int mycache;
 686        if(!p)
 687        {
 688                p=&syspool;
 689                if(!syspool.threads) InitPool(&syspool, 0, -1);
 690        }
 691        mycache=(int)(size_t) TLSGET(p->mycache);
 692        if(!mycache)
 693        {       /* Set to mspace 0 */
 694                if(TLSSET(p->mycache, (void *)-1)) abort();
 695        }
 696        else if(mycache>0)
 697        {       /* Set to last used mspace */
 698                threadcache *tc=p->caches[mycache-1];
 699#if defined(DEBUG)
 700                printf("Threadcache utilisation: %lf%% in cache with %lf%% lost to other threads\n",
 701                        100.0*tc->successes/tc->mallocs, 100.0*((double) tc->mallocs-tc->frees)/tc->mallocs);
 702#endif
 703                if(TLSSET(p->mycache, (void *)(size_t)(-tc->mymspace))) abort();
 704                tc->frees++;
 705                RemoveCacheEntries(p, tc, 0);
 706                assert(!tc->freeInCache);
 707                tc->mymspace=-1;
 708                tc->threadid=0;
 709                mspace_free(0, p->caches[mycache-1]);
 710                p->caches[mycache-1]=0;
 711        }
 712}
 713
 714#define GETMSPACE(m,p,tc,ms,s,action)           \
 715  do                                            \
 716  {                                             \
 717    mstate m = GetMSpace((p),(tc),(ms),(s));    \
 718    action;                                     \
 719    RELEASE_LOCK(&m->mutex);                    \
 720  } while (0)
 721
 722static FORCEINLINE mstate GetMSpace(nedpool *p, threadcache *tc, int mymspace, size_t size) THROWSPEC
 723{       /* Returns a locked and ready for use mspace */
 724        mstate m=p->m[mymspace];
 725        assert(m);
 726        if(!TRY_LOCK(&p->m[mymspace]->mutex)) m=FindMSpace(p, tc, &mymspace, size);\
 727        /*assert(IS_LOCKED(&p->m[mymspace]->mutex));*/
 728        return m;
 729}
 730static FORCEINLINE void GetThreadCache(nedpool **p, threadcache **tc, int *mymspace, size_t *size) THROWSPEC
 731{
 732        int mycache;
 733        if(size && *size<sizeof(threadcacheblk)) *size=sizeof(threadcacheblk);
 734        if(!*p)
 735        {
 736                *p=&syspool;
 737                if(!syspool.threads) InitPool(&syspool, 0, -1);
 738        }
 739        mycache=(int)(size_t) TLSGET((*p)->mycache);
 740        if(mycache>0)
 741        {
 742                *tc=(*p)->caches[mycache-1];
 743                *mymspace=(*tc)->mymspace;
 744        }
 745        else if(!mycache)
 746        {
 747                *tc=AllocCache(*p);
 748                if(!*tc)
 749                {       /* Disable */
 750                        if(TLSSET((*p)->mycache, (void *)-1)) abort();
 751                        *mymspace=0;
 752                }
 753                else
 754                        *mymspace=(*tc)->mymspace;
 755        }
 756        else
 757        {
 758                *tc=0;
 759                *mymspace=-mycache-1;
 760        }
 761        assert(*mymspace>=0);
 762        assert((long)(size_t)CURRENT_THREAD==(*tc)->threadid);
 763#ifdef FULLSANITYCHECKS
 764        if(*tc)
 765        {
 766                if(*(unsigned int *)"NEDMALC1"!=(*tc)->magic1 || *(unsigned int *)"NEDMALC2"!=(*tc)->magic2)
 767                {
 768                        abort();
 769                }
 770        }
 771#endif
 772}
 773
 774void * nedpmalloc(nedpool *p, size_t size) THROWSPEC
 775{
 776        void *ret=0;
 777        threadcache *tc;
 778        int mymspace;
 779        GetThreadCache(&p, &tc, &mymspace, &size);
 780#if THREADCACHEMAX
 781        if(tc && size<=THREADCACHEMAX)
 782        {       /* Use the thread cache */
 783                ret=threadcache_malloc(p, tc, &size);
 784        }
 785#endif
 786        if(!ret)
 787        {       /* Use this thread's mspace */
 788        GETMSPACE(m, p, tc, mymspace, size,
 789                  ret=mspace_malloc(m, size));
 790        }
 791        return ret;
 792}
 793void * nedpcalloc(nedpool *p, size_t no, size_t size) THROWSPEC
 794{
 795        size_t rsize=size*no;
 796        void *ret=0;
 797        threadcache *tc;
 798        int mymspace;
 799        GetThreadCache(&p, &tc, &mymspace, &rsize);
 800#if THREADCACHEMAX
 801        if(tc && rsize<=THREADCACHEMAX)
 802        {       /* Use the thread cache */
 803                if((ret=threadcache_malloc(p, tc, &rsize)))
 804                        memset(ret, 0, rsize);
 805        }
 806#endif
 807        if(!ret)
 808        {       /* Use this thread's mspace */
 809        GETMSPACE(m, p, tc, mymspace, rsize,
 810                  ret=mspace_calloc(m, 1, rsize));
 811        }
 812        return ret;
 813}
 814void * nedprealloc(nedpool *p, void *mem, size_t size) THROWSPEC
 815{
 816        void *ret=0;
 817        threadcache *tc;
 818        int mymspace;
 819        if(!mem) return nedpmalloc(p, size);
 820        GetThreadCache(&p, &tc, &mymspace, &size);
 821#if THREADCACHEMAX
 822        if(tc && size && size<=THREADCACHEMAX)
 823        {       /* Use the thread cache */
 824                size_t memsize=nedblksize(mem);
 825                assert(memsize);
 826                if((ret=threadcache_malloc(p, tc, &size)))
 827                {
 828                        memcpy(ret, mem, memsize<size ? memsize : size);
 829                        if(memsize<=THREADCACHEMAX)
 830                                threadcache_free(p, tc, mymspace, mem, memsize);
 831                        else
 832                                mspace_free(0, mem);
 833                }
 834        }
 835#endif
 836        if(!ret)
 837        {       /* Reallocs always happen in the mspace they happened in, so skip
 838                locking the preferred mspace for this thread */
 839                ret=mspace_realloc(0, mem, size);
 840        }
 841        return ret;
 842}
 843void   nedpfree(nedpool *p, void *mem) THROWSPEC
 844{       /* Frees always happen in the mspace they happened in, so skip
 845        locking the preferred mspace for this thread */
 846        threadcache *tc;
 847        int mymspace;
 848        size_t memsize;
 849        assert(mem);
 850        GetThreadCache(&p, &tc, &mymspace, 0);
 851#if THREADCACHEMAX
 852        memsize=nedblksize(mem);
 853        assert(memsize);
 854        if(mem && tc && memsize<=(THREADCACHEMAX+CHUNK_OVERHEAD))
 855                threadcache_free(p, tc, mymspace, mem, memsize);
 856        else
 857#endif
 858                mspace_free(0, mem);
 859}
 860void * nedpmemalign(nedpool *p, size_t alignment, size_t bytes) THROWSPEC
 861{
 862        void *ret;
 863        threadcache *tc;
 864        int mymspace;
 865        GetThreadCache(&p, &tc, &mymspace, &bytes);
 866        {       /* Use this thread's mspace */
 867        GETMSPACE(m, p, tc, mymspace, bytes,
 868                  ret=mspace_memalign(m, alignment, bytes));
 869        }
 870        return ret;
 871}
 872#if !NO_MALLINFO
 873struct mallinfo nedpmallinfo(nedpool *p) THROWSPEC
 874{
 875        int n;
 876        struct mallinfo ret={0};
 877        if(!p) { p=&syspool; if(!syspool.threads) InitPool(&syspool, 0, -1); }
 878        for(n=0; p->m[n]; n++)
 879        {
 880                struct mallinfo t=mspace_mallinfo(p->m[n]);
 881                ret.arena+=t.arena;
 882                ret.ordblks+=t.ordblks;
 883                ret.hblkhd+=t.hblkhd;
 884                ret.usmblks+=t.usmblks;
 885                ret.uordblks+=t.uordblks;
 886                ret.fordblks+=t.fordblks;
 887                ret.keepcost+=t.keepcost;
 888        }
 889        return ret;
 890}
 891#endif
 892int    nedpmallopt(nedpool *p, int parno, int value) THROWSPEC
 893{
 894        return mspace_mallopt(parno, value);
 895}
 896int    nedpmalloc_trim(nedpool *p, size_t pad) THROWSPEC
 897{
 898        int n, ret=0;
 899        if(!p) { p=&syspool; if(!syspool.threads) InitPool(&syspool, 0, -1); }
 900        for(n=0; p->m[n]; n++)
 901        {
 902                ret+=mspace_trim(p->m[n], pad);
 903        }
 904        return ret;
 905}
 906void   nedpmalloc_stats(nedpool *p) THROWSPEC
 907{
 908        int n;
 909        if(!p) { p=&syspool; if(!syspool.threads) InitPool(&syspool, 0, -1); }
 910        for(n=0; p->m[n]; n++)
 911        {
 912                mspace_malloc_stats(p->m[n]);
 913        }
 914}
 915size_t nedpmalloc_footprint(nedpool *p) THROWSPEC
 916{
 917        size_t ret=0;
 918        int n;
 919        if(!p) { p=&syspool; if(!syspool.threads) InitPool(&syspool, 0, -1); }
 920        for(n=0; p->m[n]; n++)
 921        {
 922                ret+=mspace_footprint(p->m[n]);
 923        }
 924        return ret;
 925}
 926void **nedpindependent_calloc(nedpool *p, size_t elemsno, size_t elemsize, void **chunks) THROWSPEC
 927{
 928        void **ret;
 929        threadcache *tc;
 930        int mymspace;
 931        GetThreadCache(&p, &tc, &mymspace, &elemsize);
 932    GETMSPACE(m, p, tc, mymspace, elemsno*elemsize,
 933              ret=mspace_independent_calloc(m, elemsno, elemsize, chunks));
 934        return ret;
 935}
 936void **nedpindependent_comalloc(nedpool *p, size_t elems, size_t *sizes, void **chunks) THROWSPEC
 937{
 938        void **ret;
 939        threadcache *tc;
 940        int mymspace;
 941    size_t i, *adjustedsizes=(size_t *) alloca(elems*sizeof(size_t));
 942    if(!adjustedsizes) return 0;
 943    for(i=0; i<elems; i++)
 944        adjustedsizes[i]=sizes[i]<sizeof(threadcacheblk) ? sizeof(threadcacheblk) : sizes[i];
 945        GetThreadCache(&p, &tc, &mymspace, 0);
 946        GETMSPACE(m, p, tc, mymspace, 0,
 947              ret=mspace_independent_comalloc(m, elems, adjustedsizes, chunks));
 948        return ret;
 949}
 950
 951#ifdef OVERRIDE_STRDUP
 952/*
 953 * This implementation is purely there to override the libc version, to
 954 * avoid a crash due to allocation and free on different 'heaps'.
 955 */
 956char *strdup(const char *s1)
 957{
 958        char *s2 = 0;
 959        if (s1) {
 960                s2 = malloc(strlen(s1) + 1);
 961                strcpy(s2, s1);
 962        }
 963        return s2;
 964}
 965#endif
 966
 967#if defined(__cplusplus)
 968}
 969#endif