compat / nedmalloc / nedmalloc.con commit completion: add --set-upstream-to and --unset-upstream (04308e9)
   1/* Alternative malloc implementation for multiple threads without
   2lock contention based on dlmalloc. (C) 2005-2006 Niall Douglas
   3
   4Boost Software License - Version 1.0 - August 17th, 2003
   5
   6Permission is hereby granted, free of charge, to any person or organization
   7obtaining a copy of the software and accompanying documentation covered by
   8this license (the "Software") to use, reproduce, display, distribute,
   9execute, and transmit the Software, and to prepare derivative works of the
  10Software, and to permit third-parties to whom the Software is furnished to
  11do so, all subject to the following:
  12
  13The copyright notices in the Software and this entire statement, including
  14the above license grant, this restriction and the following disclaimer,
  15must be included in all copies of the Software, in whole or in part, and
  16all derivative works of the Software, unless such copies or derivative
  17works are solely in the form of machine-executable object code generated by
  18a source language processor.
  19
  20THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  21IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  22FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
  23SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
  24FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
  25ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  26DEALINGS IN THE SOFTWARE.
  27*/
  28
  29#ifdef _MSC_VER
  30/* Enable full aliasing on MSVC */
  31/*#pragma optimize("a", on)*/
  32#endif
  33
  34/*#define FULLSANITYCHECKS*/
  35
  36#include "nedmalloc.h"
  37#if defined(WIN32)
  38 #include <malloc.h>
  39#endif
  40#define MSPACES 1
  41#define ONLY_MSPACES 1
  42#ifndef USE_LOCKS
  43 #define USE_LOCKS 1
  44#endif
  45#define FOOTERS 1           /* Need to enable footers so frees lock the right mspace */
  46#undef DEBUG                            /* dlmalloc wants DEBUG either 0 or 1 */
  47#ifdef _DEBUG
  48 #define DEBUG 1
  49#else
  50 #define DEBUG 0
  51#endif
  52#ifdef NDEBUG               /* Disable assert checking on release builds */
  53 #undef DEBUG
  54#endif
  55/* The default of 64Kb means we spend too much time kernel-side */
  56#ifndef DEFAULT_GRANULARITY
  57#define DEFAULT_GRANULARITY (1*1024*1024)
  58#endif
  59/*#define USE_SPIN_LOCKS 0*/
  60
  61
  62/*#define FORCEINLINE*/
  63#include "malloc.c.h"
  64#ifdef NDEBUG               /* Disable assert checking on release builds */
  65 #undef DEBUG
  66#endif
  67
  68/* The maximum concurrent threads in a pool possible */
  69#ifndef MAXTHREADSINPOOL
  70#define MAXTHREADSINPOOL 16
  71#endif
  72/* The maximum number of threadcaches which can be allocated */
  73#ifndef THREADCACHEMAXCACHES
  74#define THREADCACHEMAXCACHES 256
  75#endif
  76/* The maximum size to be allocated from the thread cache */
  77#ifndef THREADCACHEMAX
  78#define THREADCACHEMAX 8192
  79#endif
  80#if 0
  81/* The number of cache entries for finer grained bins. This is (topbitpos(THREADCACHEMAX)-4)*2 */
  82#define THREADCACHEMAXBINS ((13-4)*2)
  83#else
  84/* The number of cache entries. This is (topbitpos(THREADCACHEMAX)-4) */
  85#define THREADCACHEMAXBINS (13-4)
  86#endif
  87/* Point at which the free space in a thread cache is garbage collected */
  88#ifndef THREADCACHEMAXFREESPACE
  89#define THREADCACHEMAXFREESPACE (512*1024)
  90#endif
  91
  92
  93#ifdef WIN32
  94 #define TLSVAR                 DWORD
  95 #define TLSALLOC(k)    (*(k)=TlsAlloc(), TLS_OUT_OF_INDEXES==*(k))
  96 #define TLSFREE(k)             (!TlsFree(k))
  97 #define TLSGET(k)              TlsGetValue(k)
  98 #define TLSSET(k, a)   (!TlsSetValue(k, a))
  99 #ifdef DEBUG
 100static LPVOID ChkedTlsGetValue(DWORD idx)
 101{
 102        LPVOID ret=TlsGetValue(idx);
 103        assert(S_OK==GetLastError());
 104        return ret;
 105}
 106  #undef TLSGET
 107  #define TLSGET(k) ChkedTlsGetValue(k)
 108 #endif
 109#else
 110 #define TLSVAR                 pthread_key_t
 111 #define TLSALLOC(k)    pthread_key_create(k, 0)
 112 #define TLSFREE(k)             pthread_key_delete(k)
 113 #define TLSGET(k)              pthread_getspecific(k)
 114 #define TLSSET(k, a)   pthread_setspecific(k, a)
 115#endif
 116
 117#if 0
 118/* Only enable if testing with valgrind. Causes misoperation */
 119#define mspace_malloc(p, s) malloc(s)
 120#define mspace_realloc(p, m, s) realloc(m, s)
 121#define mspace_calloc(p, n, s) calloc(n, s)
 122#define mspace_free(p, m) free(m)
 123#endif
 124
 125
 126#if defined(__cplusplus)
 127#if !defined(NO_NED_NAMESPACE)
 128namespace nedalloc {
 129#else
 130extern "C" {
 131#endif
 132#endif
 133
 134size_t nedblksize(void *mem) THROWSPEC
 135{
 136#if 0
 137        /* Only enable if testing with valgrind. Causes misoperation */
 138        return THREADCACHEMAX;
 139#else
 140        if(mem)
 141        {
 142                mchunkptr p=mem2chunk(mem);
 143                assert(cinuse(p));      /* If this fails, someone tried to free a block twice */
 144                if(cinuse(p))
 145                        return chunksize(p)-overhead_for(p);
 146        }
 147        return 0;
 148#endif
 149}
 150
 151void nedsetvalue(void *v) THROWSPEC                                     { nedpsetvalue(0, v); }
 152void * nedmalloc(size_t size) THROWSPEC                         { return nedpmalloc(0, size); }
 153void * nedcalloc(size_t no, size_t size) THROWSPEC      { return nedpcalloc(0, no, size); }
 154void * nedrealloc(void *mem, size_t size) THROWSPEC     { return nedprealloc(0, mem, size); }
 155void   nedfree(void *mem) THROWSPEC                                     { nedpfree(0, mem); }
 156void * nedmemalign(size_t alignment, size_t bytes) THROWSPEC { return nedpmemalign(0, alignment, bytes); }
 157#if !NO_MALLINFO
 158struct mallinfo nedmallinfo(void) THROWSPEC                     { return nedpmallinfo(0); }
 159#endif
 160int    nedmallopt(int parno, int value) THROWSPEC       { return nedpmallopt(0, parno, value); }
 161int    nedmalloc_trim(size_t pad) THROWSPEC                     { return nedpmalloc_trim(0, pad); }
 162void   nedmalloc_stats() THROWSPEC                                      { nedpmalloc_stats(0); }
 163size_t nedmalloc_footprint() THROWSPEC                          { return nedpmalloc_footprint(0); }
 164void **nedindependent_calloc(size_t elemsno, size_t elemsize, void **chunks) THROWSPEC  { return nedpindependent_calloc(0, elemsno, elemsize, chunks); }
 165void **nedindependent_comalloc(size_t elems, size_t *sizes, void **chunks) THROWSPEC    { return nedpindependent_comalloc(0, elems, sizes, chunks); }
 166
 167struct threadcacheblk_t;
 168typedef struct threadcacheblk_t threadcacheblk;
 169struct threadcacheblk_t
 170{       /* Keep less than 16 bytes on 32 bit systems and 32 bytes on 64 bit systems */
 171#ifdef FULLSANITYCHECKS
 172        unsigned int magic;
 173#endif
 174        unsigned int lastUsed, size;
 175        threadcacheblk *next, *prev;
 176};
 177typedef struct threadcache_t
 178{
 179#ifdef FULLSANITYCHECKS
 180        unsigned int magic1;
 181#endif
 182        int mymspace;                                           /* Last mspace entry this thread used */
 183        long threadid;
 184        unsigned int mallocs, frees, successes;
 185        size_t freeInCache;                                     /* How much free space is stored in this cache */
 186        threadcacheblk *bins[(THREADCACHEMAXBINS+1)*2];
 187#ifdef FULLSANITYCHECKS
 188        unsigned int magic2;
 189#endif
 190} threadcache;
 191struct nedpool_t
 192{
 193        MLOCK_T mutex;
 194        void *uservalue;
 195        int threads;                                            /* Max entries in m to use */
 196        threadcache *caches[THREADCACHEMAXCACHES];
 197        TLSVAR mycache;                                         /* Thread cache for this thread. 0 for unset, negative for use mspace-1 directly, otherwise is cache-1 */
 198        mstate m[MAXTHREADSINPOOL+1];           /* mspace entries for this pool */
 199};
 200static nedpool syspool;
 201
 202static FORCEINLINE unsigned int size2binidx(size_t _size) THROWSPEC
 203{       /* 8=1000       16=10000        20=10100        24=11000        32=100000       48=110000       4096=1000000000000 */
 204        unsigned int topbit, size=(unsigned int)(_size>>4);
 205        /* 16=1         20=1    24=1    32=10   48=11   64=100  96=110  128=1000        4096=100000000 */
 206
 207#if defined(__GNUC__)
 208        topbit = sizeof(size)*__CHAR_BIT__ - 1 - __builtin_clz(size);
 209#elif defined(_MSC_VER) && _MSC_VER>=1300
 210        {
 211            unsigned long bsrTopBit;
 212
 213            _BitScanReverse(&bsrTopBit, size);
 214
 215            topbit = bsrTopBit;
 216        }
 217#else
 218#if 0
 219        union {
 220                unsigned asInt[2];
 221                double asDouble;
 222        };
 223        int n;
 224
 225        asDouble = (double)size + 0.5;
 226        topbit = (asInt[!FOX_BIGENDIAN] >> 20) - 1023;
 227#else
 228        {
 229                unsigned int x=size;
 230                x = x | (x >> 1);
 231                x = x | (x >> 2);
 232                x = x | (x >> 4);
 233                x = x | (x >> 8);
 234                x = x | (x >>16);
 235                x = ~x;
 236                x = x - ((x >> 1) & 0x55555555);
 237                x = (x & 0x33333333) + ((x >> 2) & 0x33333333);
 238                x = (x + (x >> 4)) & 0x0F0F0F0F;
 239                x = x + (x << 8);
 240                x = x + (x << 16);
 241                topbit=31 - (x >> 24);
 242        }
 243#endif
 244#endif
 245        return topbit;
 246}
 247
 248
 249#ifdef FULLSANITYCHECKS
 250static void tcsanitycheck(threadcacheblk **ptr) THROWSPEC
 251{
 252        assert((ptr[0] && ptr[1]) || (!ptr[0] && !ptr[1]));
 253        if(ptr[0] && ptr[1])
 254        {
 255                assert(nedblksize(ptr[0])>=sizeof(threadcacheblk));
 256                assert(nedblksize(ptr[1])>=sizeof(threadcacheblk));
 257                assert(*(unsigned int *) "NEDN"==ptr[0]->magic);
 258                assert(*(unsigned int *) "NEDN"==ptr[1]->magic);
 259                assert(!ptr[0]->prev);
 260                assert(!ptr[1]->next);
 261                if(ptr[0]==ptr[1])
 262                {
 263                        assert(!ptr[0]->next);
 264                        assert(!ptr[1]->prev);
 265                }
 266        }
 267}
 268static void tcfullsanitycheck(threadcache *tc) THROWSPEC
 269{
 270        threadcacheblk **tcbptr=tc->bins;
 271        int n;
 272        for(n=0; n<=THREADCACHEMAXBINS; n++, tcbptr+=2)
 273        {
 274                threadcacheblk *b, *ob=0;
 275                tcsanitycheck(tcbptr);
 276                for(b=tcbptr[0]; b; ob=b, b=b->next)
 277                {
 278                        assert(*(unsigned int *) "NEDN"==b->magic);
 279                        assert(!ob || ob->next==b);
 280                        assert(!ob || b->prev==ob);
 281                }
 282        }
 283}
 284#endif
 285
 286static NOINLINE void RemoveCacheEntries(nedpool *p, threadcache *tc, unsigned int age) THROWSPEC
 287{
 288#ifdef FULLSANITYCHECKS
 289        tcfullsanitycheck(tc);
 290#endif
 291        if(tc->freeInCache)
 292        {
 293                threadcacheblk **tcbptr=tc->bins;
 294                int n;
 295                for(n=0; n<=THREADCACHEMAXBINS; n++, tcbptr+=2)
 296                {
 297                        threadcacheblk **tcb=tcbptr+1;          /* come from oldest end of list */
 298                        /*tcsanitycheck(tcbptr);*/
 299                        for(; *tcb && tc->frees-(*tcb)->lastUsed>=age; )
 300                        {
 301                                threadcacheblk *f=*tcb;
 302                                size_t blksize=f->size; /*nedblksize(f);*/
 303                                assert(blksize<=nedblksize(f));
 304                                assert(blksize);
 305#ifdef FULLSANITYCHECKS
 306                                assert(*(unsigned int *) "NEDN"==(*tcb)->magic);
 307#endif
 308                                *tcb=(*tcb)->prev;
 309                                if(*tcb)
 310                                        (*tcb)->next=0;
 311                                else
 312                                        *tcbptr=0;
 313                                tc->freeInCache-=blksize;
 314                                assert((long) tc->freeInCache>=0);
 315                                mspace_free(0, f);
 316                                /*tcsanitycheck(tcbptr);*/
 317                        }
 318                }
 319        }
 320#ifdef FULLSANITYCHECKS
 321        tcfullsanitycheck(tc);
 322#endif
 323}
 324static void DestroyCaches(nedpool *p) THROWSPEC
 325{
 326        if(p->caches)
 327        {
 328                threadcache *tc;
 329                int n;
 330                for(n=0; n<THREADCACHEMAXCACHES; n++)
 331                {
 332                        if((tc=p->caches[n]))
 333                        {
 334                                tc->frees++;
 335                                RemoveCacheEntries(p, tc, 0);
 336                                assert(!tc->freeInCache);
 337                                tc->mymspace=-1;
 338                                tc->threadid=0;
 339                                mspace_free(0, tc);
 340                                p->caches[n]=0;
 341                        }
 342                }
 343        }
 344}
 345
 346static NOINLINE threadcache *AllocCache(nedpool *p) THROWSPEC
 347{
 348        threadcache *tc=0;
 349        int n, end;
 350        ACQUIRE_LOCK(&p->mutex);
 351        for(n=0; n<THREADCACHEMAXCACHES && p->caches[n]; n++);
 352        if(THREADCACHEMAXCACHES==n)
 353        {       /* List exhausted, so disable for this thread */
 354                RELEASE_LOCK(&p->mutex);
 355                return 0;
 356        }
 357        tc=p->caches[n]=(threadcache *) mspace_calloc(p->m[0], 1, sizeof(threadcache));
 358        if(!tc)
 359        {
 360                RELEASE_LOCK(&p->mutex);
 361                return 0;
 362        }
 363#ifdef FULLSANITYCHECKS
 364        tc->magic1=*(unsigned int *)"NEDMALC1";
 365        tc->magic2=*(unsigned int *)"NEDMALC2";
 366#endif
 367        tc->threadid=(long)(size_t)CURRENT_THREAD;
 368        for(end=0; p->m[end]; end++);
 369        tc->mymspace=tc->threadid % end;
 370        RELEASE_LOCK(&p->mutex);
 371        if(TLSSET(p->mycache, (void *)(size_t)(n+1))) abort();
 372        return tc;
 373}
 374
 375static void *threadcache_malloc(nedpool *p, threadcache *tc, size_t *size) THROWSPEC
 376{
 377        void *ret=0;
 378        unsigned int bestsize;
 379        unsigned int idx=size2binidx(*size);
 380        size_t blksize=0;
 381        threadcacheblk *blk, **binsptr;
 382#ifdef FULLSANITYCHECKS
 383        tcfullsanitycheck(tc);
 384#endif
 385        /* Calculate best fit bin size */
 386        bestsize=1<<(idx+4);
 387#if 0
 388        /* Finer grained bin fit */
 389        idx<<=1;
 390        if(*size>bestsize)
 391        {
 392                idx++;
 393                bestsize+=bestsize>>1;
 394        }
 395        if(*size>bestsize)
 396        {
 397                idx++;
 398                bestsize=1<<(4+(idx>>1));
 399        }
 400#else
 401        if(*size>bestsize)
 402        {
 403                idx++;
 404                bestsize<<=1;
 405        }
 406#endif
 407        assert(bestsize>=*size);
 408        if(*size<bestsize) *size=bestsize;
 409        assert(*size<=THREADCACHEMAX);
 410        assert(idx<=THREADCACHEMAXBINS);
 411        binsptr=&tc->bins[idx*2];
 412        /* Try to match close, but move up a bin if necessary */
 413        blk=*binsptr;
 414        if(!blk || blk->size<*size)
 415        {       /* Bump it up a bin */
 416                if(idx<THREADCACHEMAXBINS)
 417                {
 418                        idx++;
 419                        binsptr+=2;
 420                        blk=*binsptr;
 421                }
 422        }
 423        if(blk)
 424        {
 425                blksize=blk->size; /*nedblksize(blk);*/
 426                assert(nedblksize(blk)>=blksize);
 427                assert(blksize>=*size);
 428                if(blk->next)
 429                        blk->next->prev=0;
 430                *binsptr=blk->next;
 431                if(!*binsptr)
 432                        binsptr[1]=0;
 433#ifdef FULLSANITYCHECKS
 434                blk->magic=0;
 435#endif
 436                assert(binsptr[0]!=blk && binsptr[1]!=blk);
 437                assert(nedblksize(blk)>=sizeof(threadcacheblk) && nedblksize(blk)<=THREADCACHEMAX+CHUNK_OVERHEAD);
 438                /*printf("malloc: %p, %p, %p, %lu\n", p, tc, blk, (long) size);*/
 439                ret=(void *) blk;
 440        }
 441        ++tc->mallocs;
 442        if(ret)
 443        {
 444                assert(blksize>=*size);
 445                ++tc->successes;
 446                tc->freeInCache-=blksize;
 447                assert((long) tc->freeInCache>=0);
 448        }
 449#if defined(DEBUG) && 0
 450        if(!(tc->mallocs & 0xfff))
 451        {
 452                printf("*** threadcache=%u, mallocs=%u (%f), free=%u (%f), freeInCache=%u\n", (unsigned int) tc->threadid, tc->mallocs,
 453                        (float) tc->successes/tc->mallocs, tc->frees, (float) tc->successes/tc->frees, (unsigned int) tc->freeInCache);
 454        }
 455#endif
 456#ifdef FULLSANITYCHECKS
 457        tcfullsanitycheck(tc);
 458#endif
 459        return ret;
 460}
 461static NOINLINE void ReleaseFreeInCache(nedpool *p, threadcache *tc, int mymspace) THROWSPEC
 462{
 463        unsigned int age=THREADCACHEMAXFREESPACE/8192;
 464        /*ACQUIRE_LOCK(&p->m[mymspace]->mutex);*/
 465        while(age && tc->freeInCache>=THREADCACHEMAXFREESPACE)
 466        {
 467                RemoveCacheEntries(p, tc, age);
 468                /*printf("*** Removing cache entries older than %u (%u)\n", age, (unsigned int) tc->freeInCache);*/
 469                age>>=1;
 470        }
 471        /*RELEASE_LOCK(&p->m[mymspace]->mutex);*/
 472}
 473static void threadcache_free(nedpool *p, threadcache *tc, int mymspace, void *mem, size_t size) THROWSPEC
 474{
 475        unsigned int bestsize;
 476        unsigned int idx=size2binidx(size);
 477        threadcacheblk **binsptr, *tck=(threadcacheblk *) mem;
 478        assert(size>=sizeof(threadcacheblk) && size<=THREADCACHEMAX+CHUNK_OVERHEAD);
 479#ifdef DEBUG
 480        {       /* Make sure this is a valid memory block */
 481            mchunkptr p  = mem2chunk(mem);
 482            mstate fm = get_mstate_for(p);
 483            if (!ok_magic(fm)) {
 484              USAGE_ERROR_ACTION(fm, p);
 485              return;
 486            }
 487        }
 488#endif
 489#ifdef FULLSANITYCHECKS
 490        tcfullsanitycheck(tc);
 491#endif
 492        /* Calculate best fit bin size */
 493        bestsize=1<<(idx+4);
 494#if 0
 495        /* Finer grained bin fit */
 496        idx<<=1;
 497        if(size>bestsize)
 498        {
 499                unsigned int biggerbestsize=bestsize+bestsize<<1;
 500                if(size>=biggerbestsize)
 501                {
 502                        idx++;
 503                        bestsize=biggerbestsize;
 504                }
 505        }
 506#endif
 507        if(bestsize!=size)      /* dlmalloc can round up, so we round down to preserve indexing */
 508                size=bestsize;
 509        binsptr=&tc->bins[idx*2];
 510        assert(idx<=THREADCACHEMAXBINS);
 511        if(tck==*binsptr)
 512        {
 513                fprintf(stderr, "Attempt to free already freed memory block %p - aborting!\n", tck);
 514                abort();
 515        }
 516#ifdef FULLSANITYCHECKS
 517        tck->magic=*(unsigned int *) "NEDN";
 518#endif
 519        tck->lastUsed=++tc->frees;
 520        tck->size=(unsigned int) size;
 521        tck->next=*binsptr;
 522        tck->prev=0;
 523        if(tck->next)
 524                tck->next->prev=tck;
 525        else
 526                binsptr[1]=tck;
 527        assert(!*binsptr || (*binsptr)->size==tck->size);
 528        *binsptr=tck;
 529        assert(tck==tc->bins[idx*2]);
 530        assert(tc->bins[idx*2+1]==tck || binsptr[0]->next->prev==tck);
 531        /*printf("free: %p, %p, %p, %lu\n", p, tc, mem, (long) size);*/
 532        tc->freeInCache+=size;
 533#ifdef FULLSANITYCHECKS
 534        tcfullsanitycheck(tc);
 535#endif
 536#if 1
 537        if(tc->freeInCache>=THREADCACHEMAXFREESPACE)
 538                ReleaseFreeInCache(p, tc, mymspace);
 539#endif
 540}
 541
 542
 543
 544
 545static NOINLINE int InitPool(nedpool *p, size_t capacity, int threads) THROWSPEC
 546{       /* threads is -1 for system pool */
 547        ensure_initialization();
 548        ACQUIRE_MALLOC_GLOBAL_LOCK();
 549        if(p->threads) goto done;
 550        if(INITIAL_LOCK(&p->mutex)) goto err;
 551        if(TLSALLOC(&p->mycache)) goto err;
 552        if(!(p->m[0]=(mstate) create_mspace(capacity, 1))) goto err;
 553        p->m[0]->extp=p;
 554        p->threads=(threads<1 || threads>MAXTHREADSINPOOL) ? MAXTHREADSINPOOL : threads;
 555done:
 556        RELEASE_MALLOC_GLOBAL_LOCK();
 557        return 1;
 558err:
 559        if(threads<0)
 560                abort();                        /* If you can't allocate for system pool, we're screwed */
 561        DestroyCaches(p);
 562        if(p->m[0])
 563        {
 564                destroy_mspace(p->m[0]);
 565                p->m[0]=0;
 566        }
 567        if(p->mycache)
 568        {
 569                if(TLSFREE(p->mycache)) abort();
 570                p->mycache=0;
 571        }
 572        RELEASE_MALLOC_GLOBAL_LOCK();
 573        return 0;
 574}
 575static NOINLINE mstate FindMSpace(nedpool *p, threadcache *tc, int *lastUsed, size_t size) THROWSPEC
 576{       /* Gets called when thread's last used mspace is in use. The strategy
 577        is to run through the list of all available mspaces looking for an
 578        unlocked one and if we fail, we create a new one so long as we don't
 579        exceed p->threads */
 580        int n, end;
 581        for(n=end=*lastUsed+1; p->m[n]; end=++n)
 582        {
 583                if(TRY_LOCK(&p->m[n]->mutex)) goto found;
 584        }
 585        for(n=0; n<*lastUsed && p->m[n]; n++)
 586        {
 587                if(TRY_LOCK(&p->m[n]->mutex)) goto found;
 588        }
 589        if(end<p->threads)
 590        {
 591                mstate temp;
 592                if(!(temp=(mstate) create_mspace(size, 1)))
 593                        goto badexit;
 594                /* Now we're ready to modify the lists, we lock */
 595                ACQUIRE_LOCK(&p->mutex);
 596                while(p->m[end] && end<p->threads)
 597                        end++;
 598                if(end>=p->threads)
 599                {       /* Drat, must destroy it now */
 600                        RELEASE_LOCK(&p->mutex);
 601                        destroy_mspace((mspace) temp);
 602                        goto badexit;
 603                }
 604                /* We really want to make sure this goes into memory now but we
 605                have to be careful of breaking aliasing rules, so write it twice */
 606                *((volatile struct malloc_state **) &p->m[end])=p->m[end]=temp;
 607                ACQUIRE_LOCK(&p->m[end]->mutex);
 608                /*printf("Created mspace idx %d\n", end);*/
 609                RELEASE_LOCK(&p->mutex);
 610                n=end;
 611                goto found;
 612        }
 613        /* Let it lock on the last one it used */
 614badexit:
 615        ACQUIRE_LOCK(&p->m[*lastUsed]->mutex);
 616        return p->m[*lastUsed];
 617found:
 618        *lastUsed=n;
 619        if(tc)
 620                tc->mymspace=n;
 621        else
 622        {
 623                if(TLSSET(p->mycache, (void *)(size_t)(-(n+1)))) abort();
 624        }
 625        return p->m[n];
 626}
 627
 628nedpool *nedcreatepool(size_t capacity, int threads) THROWSPEC
 629{
 630        nedpool *ret;
 631        if(!(ret=(nedpool *) nedpcalloc(0, 1, sizeof(nedpool)))) return 0;
 632        if(!InitPool(ret, capacity, threads))
 633        {
 634                nedpfree(0, ret);
 635                return 0;
 636        }
 637        return ret;
 638}
 639void neddestroypool(nedpool *p) THROWSPEC
 640{
 641        int n;
 642        ACQUIRE_LOCK(&p->mutex);
 643        DestroyCaches(p);
 644        for(n=0; p->m[n]; n++)
 645        {
 646                destroy_mspace(p->m[n]);
 647                p->m[n]=0;
 648        }
 649        RELEASE_LOCK(&p->mutex);
 650        if(TLSFREE(p->mycache)) abort();
 651        nedpfree(0, p);
 652}
 653
 654void nedpsetvalue(nedpool *p, void *v) THROWSPEC
 655{
 656        if(!p) { p=&syspool; if(!syspool.threads) InitPool(&syspool, 0, -1); }
 657        p->uservalue=v;
 658}
 659void *nedgetvalue(nedpool **p, void *mem) THROWSPEC
 660{
 661        nedpool *np=0;
 662        mchunkptr mcp=mem2chunk(mem);
 663        mstate fm;
 664        if(!(is_aligned(chunk2mem(mcp))) && mcp->head != FENCEPOST_HEAD) return 0;
 665        if(!cinuse(mcp)) return 0;
 666        if(!next_pinuse(mcp)) return 0;
 667        if(!is_mmapped(mcp) && !pinuse(mcp))
 668        {
 669                if(next_chunk(prev_chunk(mcp))!=mcp) return 0;
 670        }
 671        fm=get_mstate_for(mcp);
 672        if(!ok_magic(fm)) return 0;
 673        if(!ok_address(fm, mcp)) return 0;
 674        if(!fm->extp) return 0;
 675        np=(nedpool *) fm->extp;
 676        if(p) *p=np;
 677        return np->uservalue;
 678}
 679
 680void neddisablethreadcache(nedpool *p) THROWSPEC
 681{
 682        int mycache;
 683        if(!p)
 684        {
 685                p=&syspool;
 686                if(!syspool.threads) InitPool(&syspool, 0, -1);
 687        }
 688        mycache=(int)(size_t) TLSGET(p->mycache);
 689        if(!mycache)
 690        {       /* Set to mspace 0 */
 691                if(TLSSET(p->mycache, (void *)-1)) abort();
 692        }
 693        else if(mycache>0)
 694        {       /* Set to last used mspace */
 695                threadcache *tc=p->caches[mycache-1];
 696#if defined(DEBUG)
 697                printf("Threadcache utilisation: %lf%% in cache with %lf%% lost to other threads\n",
 698                        100.0*tc->successes/tc->mallocs, 100.0*((double) tc->mallocs-tc->frees)/tc->mallocs);
 699#endif
 700                if(TLSSET(p->mycache, (void *)(size_t)(-tc->mymspace))) abort();
 701                tc->frees++;
 702                RemoveCacheEntries(p, tc, 0);
 703                assert(!tc->freeInCache);
 704                tc->mymspace=-1;
 705                tc->threadid=0;
 706                mspace_free(0, p->caches[mycache-1]);
 707                p->caches[mycache-1]=0;
 708        }
 709}
 710
 711#define GETMSPACE(m,p,tc,ms,s,action)           \
 712  do                                            \
 713  {                                             \
 714    mstate m = GetMSpace((p),(tc),(ms),(s));    \
 715    action;                                     \
 716    RELEASE_LOCK(&m->mutex);                    \
 717  } while (0)
 718
 719static FORCEINLINE mstate GetMSpace(nedpool *p, threadcache *tc, int mymspace, size_t size) THROWSPEC
 720{       /* Returns a locked and ready for use mspace */
 721        mstate m=p->m[mymspace];
 722        assert(m);
 723        if(!TRY_LOCK(&p->m[mymspace]->mutex)) m=FindMSpace(p, tc, &mymspace, size);\
 724        /*assert(IS_LOCKED(&p->m[mymspace]->mutex));*/
 725        return m;
 726}
 727static FORCEINLINE void GetThreadCache(nedpool **p, threadcache **tc, int *mymspace, size_t *size) THROWSPEC
 728{
 729        int mycache;
 730        if(size && *size<sizeof(threadcacheblk)) *size=sizeof(threadcacheblk);
 731        if(!*p)
 732        {
 733                *p=&syspool;
 734                if(!syspool.threads) InitPool(&syspool, 0, -1);
 735        }
 736        mycache=(int)(size_t) TLSGET((*p)->mycache);
 737        if(mycache>0)
 738        {
 739                *tc=(*p)->caches[mycache-1];
 740                *mymspace=(*tc)->mymspace;
 741        }
 742        else if(!mycache)
 743        {
 744                *tc=AllocCache(*p);
 745                if(!*tc)
 746                {       /* Disable */
 747                        if(TLSSET((*p)->mycache, (void *)-1)) abort();
 748                        *mymspace=0;
 749                }
 750                else
 751                        *mymspace=(*tc)->mymspace;
 752        }
 753        else
 754        {
 755                *tc=0;
 756                *mymspace=-mycache-1;
 757        }
 758        assert(*mymspace>=0);
 759        assert((long)(size_t)CURRENT_THREAD==(*tc)->threadid);
 760#ifdef FULLSANITYCHECKS
 761        if(*tc)
 762        {
 763                if(*(unsigned int *)"NEDMALC1"!=(*tc)->magic1 || *(unsigned int *)"NEDMALC2"!=(*tc)->magic2)
 764                {
 765                        abort();
 766                }
 767        }
 768#endif
 769}
 770
 771void * nedpmalloc(nedpool *p, size_t size) THROWSPEC
 772{
 773        void *ret=0;
 774        threadcache *tc;
 775        int mymspace;
 776        GetThreadCache(&p, &tc, &mymspace, &size);
 777#if THREADCACHEMAX
 778        if(tc && size<=THREADCACHEMAX)
 779        {       /* Use the thread cache */
 780                ret=threadcache_malloc(p, tc, &size);
 781        }
 782#endif
 783        if(!ret)
 784        {       /* Use this thread's mspace */
 785        GETMSPACE(m, p, tc, mymspace, size,
 786                  ret=mspace_malloc(m, size));
 787        }
 788        return ret;
 789}
 790void * nedpcalloc(nedpool *p, size_t no, size_t size) THROWSPEC
 791{
 792        size_t rsize=size*no;
 793        void *ret=0;
 794        threadcache *tc;
 795        int mymspace;
 796        GetThreadCache(&p, &tc, &mymspace, &rsize);
 797#if THREADCACHEMAX
 798        if(tc && rsize<=THREADCACHEMAX)
 799        {       /* Use the thread cache */
 800                if((ret=threadcache_malloc(p, tc, &rsize)))
 801                        memset(ret, 0, rsize);
 802        }
 803#endif
 804        if(!ret)
 805        {       /* Use this thread's mspace */
 806        GETMSPACE(m, p, tc, mymspace, rsize,
 807                  ret=mspace_calloc(m, 1, rsize));
 808        }
 809        return ret;
 810}
 811void * nedprealloc(nedpool *p, void *mem, size_t size) THROWSPEC
 812{
 813        void *ret=0;
 814        threadcache *tc;
 815        int mymspace;
 816        if(!mem) return nedpmalloc(p, size);
 817        GetThreadCache(&p, &tc, &mymspace, &size);
 818#if THREADCACHEMAX
 819        if(tc && size && size<=THREADCACHEMAX)
 820        {       /* Use the thread cache */
 821                size_t memsize=nedblksize(mem);
 822                assert(memsize);
 823                if((ret=threadcache_malloc(p, tc, &size)))
 824                {
 825                        memcpy(ret, mem, memsize<size ? memsize : size);
 826                        if(memsize<=THREADCACHEMAX)
 827                                threadcache_free(p, tc, mymspace, mem, memsize);
 828                        else
 829                                mspace_free(0, mem);
 830                }
 831        }
 832#endif
 833        if(!ret)
 834        {       /* Reallocs always happen in the mspace they happened in, so skip
 835                locking the preferred mspace for this thread */
 836                ret=mspace_realloc(0, mem, size);
 837        }
 838        return ret;
 839}
 840void   nedpfree(nedpool *p, void *mem) THROWSPEC
 841{       /* Frees always happen in the mspace they happened in, so skip
 842        locking the preferred mspace for this thread */
 843        threadcache *tc;
 844        int mymspace;
 845        size_t memsize;
 846        assert(mem);
 847        GetThreadCache(&p, &tc, &mymspace, 0);
 848#if THREADCACHEMAX
 849        memsize=nedblksize(mem);
 850        assert(memsize);
 851        if(mem && tc && memsize<=(THREADCACHEMAX+CHUNK_OVERHEAD))
 852                threadcache_free(p, tc, mymspace, mem, memsize);
 853        else
 854#endif
 855                mspace_free(0, mem);
 856}
 857void * nedpmemalign(nedpool *p, size_t alignment, size_t bytes) THROWSPEC
 858{
 859        void *ret;
 860        threadcache *tc;
 861        int mymspace;
 862        GetThreadCache(&p, &tc, &mymspace, &bytes);
 863        {       /* Use this thread's mspace */
 864        GETMSPACE(m, p, tc, mymspace, bytes,
 865                  ret=mspace_memalign(m, alignment, bytes));
 866        }
 867        return ret;
 868}
 869#if !NO_MALLINFO
 870struct mallinfo nedpmallinfo(nedpool *p) THROWSPEC
 871{
 872        int n;
 873        struct mallinfo ret={0};
 874        if(!p) { p=&syspool; if(!syspool.threads) InitPool(&syspool, 0, -1); }
 875        for(n=0; p->m[n]; n++)
 876        {
 877                struct mallinfo t=mspace_mallinfo(p->m[n]);
 878                ret.arena+=t.arena;
 879                ret.ordblks+=t.ordblks;
 880                ret.hblkhd+=t.hblkhd;
 881                ret.usmblks+=t.usmblks;
 882                ret.uordblks+=t.uordblks;
 883                ret.fordblks+=t.fordblks;
 884                ret.keepcost+=t.keepcost;
 885        }
 886        return ret;
 887}
 888#endif
 889int    nedpmallopt(nedpool *p, int parno, int value) THROWSPEC
 890{
 891        return mspace_mallopt(parno, value);
 892}
 893int    nedpmalloc_trim(nedpool *p, size_t pad) THROWSPEC
 894{
 895        int n, ret=0;
 896        if(!p) { p=&syspool; if(!syspool.threads) InitPool(&syspool, 0, -1); }
 897        for(n=0; p->m[n]; n++)
 898        {
 899                ret+=mspace_trim(p->m[n], pad);
 900        }
 901        return ret;
 902}
 903void   nedpmalloc_stats(nedpool *p) THROWSPEC
 904{
 905        int n;
 906        if(!p) { p=&syspool; if(!syspool.threads) InitPool(&syspool, 0, -1); }
 907        for(n=0; p->m[n]; n++)
 908        {
 909                mspace_malloc_stats(p->m[n]);
 910        }
 911}
 912size_t nedpmalloc_footprint(nedpool *p) THROWSPEC
 913{
 914        size_t ret=0;
 915        int n;
 916        if(!p) { p=&syspool; if(!syspool.threads) InitPool(&syspool, 0, -1); }
 917        for(n=0; p->m[n]; n++)
 918        {
 919                ret+=mspace_footprint(p->m[n]);
 920        }
 921        return ret;
 922}
 923void **nedpindependent_calloc(nedpool *p, size_t elemsno, size_t elemsize, void **chunks) THROWSPEC
 924{
 925        void **ret;
 926        threadcache *tc;
 927        int mymspace;
 928        GetThreadCache(&p, &tc, &mymspace, &elemsize);
 929    GETMSPACE(m, p, tc, mymspace, elemsno*elemsize,
 930              ret=mspace_independent_calloc(m, elemsno, elemsize, chunks));
 931        return ret;
 932}
 933void **nedpindependent_comalloc(nedpool *p, size_t elems, size_t *sizes, void **chunks) THROWSPEC
 934{
 935        void **ret;
 936        threadcache *tc;
 937        int mymspace;
 938    size_t i, *adjustedsizes=(size_t *) alloca(elems*sizeof(size_t));
 939    if(!adjustedsizes) return 0;
 940    for(i=0; i<elems; i++)
 941        adjustedsizes[i]=sizes[i]<sizeof(threadcacheblk) ? sizeof(threadcacheblk) : sizes[i];
 942        GetThreadCache(&p, &tc, &mymspace, 0);
 943        GETMSPACE(m, p, tc, mymspace, 0,
 944              ret=mspace_independent_comalloc(m, elems, adjustedsizes, chunks));
 945        return ret;
 946}
 947
 948#ifdef OVERRIDE_STRDUP
 949/*
 950 * This implementation is purely there to override the libc version, to
 951 * avoid a crash due to allocation and free on different 'heaps'.
 952 */
 953char *strdup(const char *s1)
 954{
 955        char *s2 = 0;
 956        if (s1) {
 957                s2 = malloc(strlen(s1) + 1);
 958                strcpy(s2, s1);
 959        }
 960        return s2;
 961}
 962#endif
 963
 964#if defined(__cplusplus)
 965}
 966#endif